Skip to content

vllm_omni.diffusion.models.hidream_image

HiDream Image diffusion model components.

Modules:

Name Description
hidream_image_transformer
pipeline_hidream_i1_image
pipeline_hidream_image

HiDreamImagePipeline

Bases: Module, CFGParallelMixin, DiffusionPipelineProfilerMixin, ProgressBarMixin

attention_kwargs property

attention_kwargs

default_sample_size instance-attribute

default_sample_size = 128

device instance-attribute

device = get_local_device()

do_classifier_free_guidance property

do_classifier_free_guidance

guidance_scale property

guidance_scale

interrupt property

interrupt

num_timesteps property

num_timesteps

od_config instance-attribute

od_config = od_config

scheduler instance-attribute

scheduler = from_pretrained(
    model,
    subfolder="scheduler",
    local_files_only=local_files_only,
)

stage instance-attribute

stage = None

text_encoder instance-attribute

text_encoder = from_pretrained_with_prefetch(
    from_pretrained,
    model,
    subfolder="text_encoder",
    prefetch_list=hidream_subfolders,
    local_files_only=local_files_only,
)

text_encoder_2 instance-attribute

text_encoder_2 = from_pretrained_with_prefetch(
    from_pretrained,
    model,
    subfolder="text_encoder_2",
    prefetch_list=hidream_subfolders,
    local_files_only=local_files_only,
)

text_encoder_3 instance-attribute

text_encoder_3 = from_pretrained_with_prefetch(
    from_pretrained,
    model,
    subfolder="text_encoder_3",
    prefetch_list=hidream_subfolders,
    local_files_only=local_files_only,
)

text_encoder_4 instance-attribute

text_encoder_4 = to(device)

tokenizer instance-attribute

tokenizer = from_pretrained(
    model,
    subfolder="tokenizer",
    local_files_only=local_files_only,
)

tokenizer_2 instance-attribute

tokenizer_2 = from_pretrained(
    model,
    subfolder="tokenizer_2",
    local_files_only=local_files_only,
)

tokenizer_3 instance-attribute

tokenizer_3 = from_pretrained(
    model,
    subfolder="tokenizer_3",
    local_files_only=local_files_only,
)

tokenizer_4 instance-attribute

tokenizer_4 = from_pretrained(llama_path, use_fast=False)

transformer instance-attribute

transformer = HiDreamImageTransformer2DModel(
    od_config=od_config,
    quant_config=quantization_config,
    **transformer_kwargs,
)

vae instance-attribute

vae = to(device)

vae_scale_factor instance-attribute

vae_scale_factor = (
    2 ** (len(block_out_channels) - 1)
    if getattr(self, "vae", None)
    else 8
)

weights_sources instance-attribute

weights_sources = [
    ComponentSource(
        model_or_path=model,
        subfolder="transformer",
        revision=None,
        prefix="transformer.",
        fall_back_to_pt=True,
    )
]

check_cfg_parallel_validity

check_cfg_parallel_validity(
    true_cfg_scale: float, has_neg_prompt: bool
)

check_inputs

check_inputs(
    prompt,
    prompt_2,
    prompt_3,
    prompt_4,
    negative_prompt=None,
    negative_prompt_2=None,
    negative_prompt_3=None,
    negative_prompt_4=None,
    prompt_embeds_t5=None,
    prompt_embeds_llama3=None,
    negative_prompt_embeds_t5=None,
    negative_prompt_embeds_llama3=None,
    pooled_prompt_embeds=None,
    negative_pooled_prompt_embeds=None,
    callback_on_step_end_tensor_inputs=None,
)

diffuse

diffuse(
    prompt_embeds_t5: Tensor,
    prompt_embeds_llama3: Tensor,
    pooled_prompt_embeds: Tensor,
    latents: Tensor,
    timesteps: Tensor,
    do_true_cfg: bool,
) -> Tensor

disable_vae_slicing

disable_vae_slicing()

Disable sliced VAE decoding. If enable_vae_slicing was previously enabled, this method will go back to computing decoding in one step.

disable_vae_tiling

disable_vae_tiling()

Disable tiled VAE decoding. If enable_vae_tiling was previously enabled, this method will go back to computing decoding in one step.

enable_vae_slicing

enable_vae_slicing()

Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.

enable_vae_tiling

enable_vae_tiling()

Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow processing larger images.

encode_prompt

encode_prompt(
    prompt: str | list[str] | None = None,
    prompt_2: str | list[str] | None = None,
    prompt_3: str | list[str] | None = None,
    prompt_4: str | list[str] | None = None,
    dtype: dtype | None = None,
    num_images_per_prompt: int = 1,
    do_classifier_free_guidance: bool = True,
    negative_prompt: str | list[str] | None = None,
    negative_prompt_2: str | list[str] | None = None,
    negative_prompt_3: str | list[str] | None = None,
    negative_prompt_4: str | list[str] | None = None,
    prompt_embeds_t5: list[FloatTensor] | None = None,
    prompt_embeds_llama3: list[FloatTensor] | None = None,
    negative_prompt_embeds_t5: list[FloatTensor]
    | None = None,
    negative_prompt_embeds_llama3: list[FloatTensor]
    | None = None,
    pooled_prompt_embeds: FloatTensor | None = None,
    negative_pooled_prompt_embeds: FloatTensor
    | None = None,
    max_sequence_length: int = 128,
    lora_scale: float | None = None,
)

forward

forward(
    req: OmniDiffusionRequest,
    prompt: str | list[str] = None,
    prompt_2: str | list[str] | None = None,
    prompt_3: str | list[str] | None = None,
    prompt_4: str | list[str] | None = None,
    height: int | None = None,
    width: int | None = None,
    num_inference_steps: int = 50,
    sigmas: list[float] | None = None,
    guidance_scale: float = 5.0,
    negative_prompt: str | list[str] | None = None,
    negative_prompt_2: str | list[str] | None = None,
    negative_prompt_3: str | list[str] | None = None,
    negative_prompt_4: str | list[str] | None = None,
    num_images_per_prompt: int | None = 1,
    generator: Generator | list[Generator] | None = None,
    latents: FloatTensor | None = None,
    prompt_embeds_t5: FloatTensor | None = None,
    prompt_embeds_llama3: FloatTensor | None = None,
    negative_prompt_embeds_t5: FloatTensor | None = None,
    negative_prompt_embeds_llama3: FloatTensor
    | None = None,
    pooled_prompt_embeds: FloatTensor | None = None,
    negative_pooled_prompt_embeds: FloatTensor
    | None = None,
    output_type: str | None = "pil",
    return_dict: bool = True,
    attention_kwargs: dict[str, Any] | None = None,
    callback_on_step_end: Callable[[int, int], None]
    | None = None,
    callback_on_step_end_tensor_inputs: list[str] = [
        "latents"
    ],
    max_sequence_length: int = 128,
    **kwargs,
)

load_weights

load_weights(
    weights: Iterable[tuple[str, Tensor]],
) -> set[str]

prepare_latents

prepare_latents(
    batch_size,
    num_channels_latents,
    height,
    width,
    dtype,
    generator,
    latents=None,
)

prepare_timesteps

prepare_timesteps(
    num_inference_steps, sigmas, image_seq_len
)

HiDreamImageTransformer2DModel

Bases: Module

caption_projection instance-attribute

caption_projection = ModuleList(caption_projection)

double_stream_blocks instance-attribute

double_stream_blocks = ModuleList(
    [
        (
            HiDreamBlock(
                HiDreamImageTransformerBlock(
                    dim=inner_dim,
                    num_attention_heads=num_attention_heads,
                    attention_head_dim=attention_head_dim,
                    num_routed_experts=num_routed_experts,
                    num_activated_experts=num_activated_experts,
                    _force_inference_output=force_inference_output,
                )
            )
        )
        for _ in (range(num_layers))
    ]
)

final_layer instance-attribute

final_layer = HiDreamImageOutEmbed(
    inner_dim, patch_size, out_channels
)

force_inference_output instance-attribute

force_inference_output = force_inference_output

in_channels instance-attribute

in_channels = in_channels

inner_dim instance-attribute

inner_dim = num_attention_heads * attention_head_dim

llama_layers instance-attribute

llama_layers = llama_layers

max_seq instance-attribute

max_seq = (
    max_resolution[0]
    * max_resolution[1]
    // (patch_size * patch_size)
)

out_channels instance-attribute

out_channels = out_channels or in_channels

p_embedder instance-attribute

p_embedder = HiDreamImagePooledEmbed(
    text_emb_dim, inner_dim
)

parallel_config instance-attribute

parallel_config = parallel_config

patch_size instance-attribute

patch_size = patch_size

pe_embedder instance-attribute

pe_embedder = HiDreamImageEmbedND(
    theta=10000, axes_dim=axes_dims_rope
)

single_stream_blocks instance-attribute

single_stream_blocks = ModuleList(
    [
        (
            HiDreamBlock(
                HiDreamImageSingleTransformerBlock(
                    dim=inner_dim,
                    num_attention_heads=num_attention_heads,
                    attention_head_dim=attention_head_dim,
                    num_routed_experts=num_routed_experts,
                    num_activated_experts=num_activated_experts,
                    _force_inference_output=force_inference_output,
                )
            )
        )
        for _ in (range(num_single_layers))
    ]
)

t_embedder instance-attribute

t_embedder = HiDreamImageTimestepEmbed(inner_dim)

x_embedder instance-attribute

x_embedder = HiDreamImagePatchEmbed(
    patch_size=patch_size,
    in_channels=in_channels,
    out_channels=inner_dim,
)

forward

forward(
    hidden_states: Tensor,
    timesteps: LongTensor = None,
    encoder_hidden_states_t5: Tensor = None,
    encoder_hidden_states_llama3: Tensor = None,
    pooled_embeds: Tensor = None,
    img_ids: Tensor | None = None,
    img_sizes: list[tuple[int, int]] | None = None,
    hidden_states_masks: Tensor | None = None,
    return_dict: bool = True,
    **kwargs,
) -> tuple[Tensor] | Transformer2DModelOutput

load_weights

load_weights(
    weights: Iterable[tuple[str, Tensor]],
) -> set[str]

patchify

patchify(hidden_states)

unpatchify

unpatchify(
    x: Tensor,
    img_sizes: list[tuple[int, int]],
    is_training: bool,
) -> list[Tensor]

get_hidream_image_post_process_func

get_hidream_image_post_process_func(
    od_config: OmniDiffusionConfig,
)