Skip to content

vllm_omni.diffusion.models.hidream_image.pipeline_hidream_image

logger module-attribute

logger = get_logger(__name__)

HiDreamImagePipeline

Bases: Module, CFGParallelMixin, DiffusionPipelineProfilerMixin, ProgressBarMixin

attention_kwargs property

attention_kwargs

default_sample_size instance-attribute

default_sample_size = 128

device instance-attribute

device = get_local_device()

do_classifier_free_guidance property

do_classifier_free_guidance

guidance_scale property

guidance_scale

interrupt property

interrupt

num_timesteps property

num_timesteps

od_config instance-attribute

od_config = od_config

scheduler instance-attribute

scheduler = from_pretrained(
    model,
    subfolder="scheduler",
    local_files_only=local_files_only,
)

stage instance-attribute

stage = None

text_encoder instance-attribute

text_encoder = from_pretrained_with_prefetch(
    from_pretrained,
    model,
    subfolder="text_encoder",
    prefetch_list=hidream_subfolders,
    local_files_only=local_files_only,
)

text_encoder_2 instance-attribute

text_encoder_2 = from_pretrained_with_prefetch(
    from_pretrained,
    model,
    subfolder="text_encoder_2",
    prefetch_list=hidream_subfolders,
    local_files_only=local_files_only,
)

text_encoder_3 instance-attribute

text_encoder_3 = from_pretrained_with_prefetch(
    from_pretrained,
    model,
    subfolder="text_encoder_3",
    prefetch_list=hidream_subfolders,
    local_files_only=local_files_only,
)

text_encoder_4 instance-attribute

text_encoder_4 = to(device)

tokenizer instance-attribute

tokenizer = from_pretrained(
    model,
    subfolder="tokenizer",
    local_files_only=local_files_only,
)

tokenizer_2 instance-attribute

tokenizer_2 = from_pretrained(
    model,
    subfolder="tokenizer_2",
    local_files_only=local_files_only,
)

tokenizer_3 instance-attribute

tokenizer_3 = from_pretrained(
    model,
    subfolder="tokenizer_3",
    local_files_only=local_files_only,
)

tokenizer_4 instance-attribute

tokenizer_4 = from_pretrained(llama_path, use_fast=False)

transformer instance-attribute

transformer = HiDreamImageTransformer2DModel(
    od_config=od_config,
    quant_config=quantization_config,
    **transformer_kwargs,
)

vae instance-attribute

vae = to(device)

vae_scale_factor instance-attribute

vae_scale_factor = (
    2 ** (len(block_out_channels) - 1)
    if getattr(self, "vae", None)
    else 8
)

weights_sources instance-attribute

weights_sources = [
    ComponentSource(
        model_or_path=model,
        subfolder="transformer",
        revision=None,
        prefix="transformer.",
        fall_back_to_pt=True,
    )
]

check_cfg_parallel_validity

check_cfg_parallel_validity(
    true_cfg_scale: float, has_neg_prompt: bool
)

check_inputs

check_inputs(
    prompt,
    prompt_2,
    prompt_3,
    prompt_4,
    negative_prompt=None,
    negative_prompt_2=None,
    negative_prompt_3=None,
    negative_prompt_4=None,
    prompt_embeds_t5=None,
    prompt_embeds_llama3=None,
    negative_prompt_embeds_t5=None,
    negative_prompt_embeds_llama3=None,
    pooled_prompt_embeds=None,
    negative_pooled_prompt_embeds=None,
    callback_on_step_end_tensor_inputs=None,
)

diffuse

diffuse(
    prompt_embeds_t5: Tensor,
    prompt_embeds_llama3: Tensor,
    pooled_prompt_embeds: Tensor,
    latents: Tensor,
    timesteps: Tensor,
    do_true_cfg: bool,
) -> Tensor

disable_vae_slicing

disable_vae_slicing()

Disable sliced VAE decoding. If enable_vae_slicing was previously enabled, this method will go back to computing decoding in one step.

disable_vae_tiling

disable_vae_tiling()

Disable tiled VAE decoding. If enable_vae_tiling was previously enabled, this method will go back to computing decoding in one step.

enable_vae_slicing

enable_vae_slicing()

Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.

enable_vae_tiling

enable_vae_tiling()

Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow processing larger images.

encode_prompt

encode_prompt(
    prompt: str | list[str] | None = None,
    prompt_2: str | list[str] | None = None,
    prompt_3: str | list[str] | None = None,
    prompt_4: str | list[str] | None = None,
    dtype: dtype | None = None,
    num_images_per_prompt: int = 1,
    do_classifier_free_guidance: bool = True,
    negative_prompt: str | list[str] | None = None,
    negative_prompt_2: str | list[str] | None = None,
    negative_prompt_3: str | list[str] | None = None,
    negative_prompt_4: str | list[str] | None = None,
    prompt_embeds_t5: list[FloatTensor] | None = None,
    prompt_embeds_llama3: list[FloatTensor] | None = None,
    negative_prompt_embeds_t5: list[FloatTensor]
    | None = None,
    negative_prompt_embeds_llama3: list[FloatTensor]
    | None = None,
    pooled_prompt_embeds: FloatTensor | None = None,
    negative_pooled_prompt_embeds: FloatTensor
    | None = None,
    max_sequence_length: int = 128,
    lora_scale: float | None = None,
)

forward

forward(
    req: OmniDiffusionRequest,
    prompt: str | list[str] = None,
    prompt_2: str | list[str] | None = None,
    prompt_3: str | list[str] | None = None,
    prompt_4: str | list[str] | None = None,
    height: int | None = None,
    width: int | None = None,
    num_inference_steps: int = 50,
    sigmas: list[float] | None = None,
    guidance_scale: float = 5.0,
    negative_prompt: str | list[str] | None = None,
    negative_prompt_2: str | list[str] | None = None,
    negative_prompt_3: str | list[str] | None = None,
    negative_prompt_4: str | list[str] | None = None,
    num_images_per_prompt: int | None = 1,
    generator: Generator | list[Generator] | None = None,
    latents: FloatTensor | None = None,
    prompt_embeds_t5: FloatTensor | None = None,
    prompt_embeds_llama3: FloatTensor | None = None,
    negative_prompt_embeds_t5: FloatTensor | None = None,
    negative_prompt_embeds_llama3: FloatTensor
    | None = None,
    pooled_prompt_embeds: FloatTensor | None = None,
    negative_pooled_prompt_embeds: FloatTensor
    | None = None,
    output_type: str | None = "pil",
    return_dict: bool = True,
    attention_kwargs: dict[str, Any] | None = None,
    callback_on_step_end: Callable[[int, int], None]
    | None = None,
    callback_on_step_end_tensor_inputs: list[str] = [
        "latents"
    ],
    max_sequence_length: int = 128,
    **kwargs,
)

load_weights

load_weights(
    weights: Iterable[tuple[str, Tensor]],
) -> set[str]

prepare_latents

prepare_latents(
    batch_size,
    num_channels_latents,
    height,
    width,
    dtype,
    generator,
    latents=None,
)

prepare_timesteps

prepare_timesteps(
    num_inference_steps, sigmas, image_seq_len
)

calculate_shift

calculate_shift(
    image_seq_len,
    base_seq_len: int = 256,
    max_seq_len: int = 4096,
    base_shift: float = 0.5,
    max_shift: float = 1.15,
)

get_hidream_image_post_process_func

get_hidream_image_post_process_func(
    od_config: OmniDiffusionConfig,
)

retrieve_timesteps

retrieve_timesteps(
    scheduler,
    num_inference_steps: int | None = None,
    device: str | device | None = None,
    timesteps: list[int] | None = None,
    sigmas: list[float] | None = None,
    **kwargs,
)

Calls the scheduler's set_timesteps method and retrieves timesteps from the scheduler after the call. Handles custom timesteps. Any kwargs will be supplied to scheduler.set_timesteps.

Parameters:

Name Type Description Default
scheduler `SchedulerMixin`

The scheduler to get timesteps from.

required
num_inference_steps `int`

The number of diffusion steps used when generating samples with a pre-trained model. If used, timesteps must be None.

None
device `str` or `torch.device`, *optional*

The device to which the timesteps should be moved to. If None, the timesteps are not moved.

None
timesteps `list[int]`, *optional*

Custom timesteps used to override the timestep spacing strategy of the scheduler. If timesteps is passed, num_inference_steps and sigmas must be None.

None
sigmas `list[float]`, *optional*

Custom sigmas used to override the timestep spacing strategy of the scheduler. If sigmas is passed, num_inference_steps and timesteps must be None.

None

Returns:

Name Type Description
timesteps `torch.Tensor`

The timestep schedule from the scheduler.

num_inference_steps `int`

The number of inference steps.