Skip to content

vllm_omni.diffusion.models.ltx2.pipeline_ltx2_image2video

logger module-attribute

logger = init_logger(__name__)

LTX2I2VDMD2Pipeline

Bases: DMD2PipelineMixin, LTX2ImageToVideoPipeline

LTX-2 I2V pipeline for FastGen DMD2-distilled models.

LTX2ImageToVideoPipeline

Bases: LTX2Pipeline

support_image_input class-attribute instance-attribute

support_image_input = True

video_processor instance-attribute

video_processor = VideoProcessor(
    vae_scale_factor=vae_spatial_compression_ratio,
    resample="bilinear",
)

check_inputs

check_inputs(
    image,
    height,
    width,
    prompt,
    latents=None,
    prompt_embeds=None,
    negative_prompt_embeds=None,
    prompt_attention_mask=None,
    negative_prompt_attention_mask=None,
)

forward

forward(
    req: OmniDiffusionRequest,
    image: Image | Tensor | None = None,
    prompt: str | list[str] | None = None,
    negative_prompt: str | list[str] | None = None,
    height: int | None = None,
    width: int | None = None,
    num_frames: int | None = None,
    frame_rate: float | None = None,
    num_inference_steps: int | None = None,
    sigmas: list[float] | None = None,
    timesteps: list[int] | None = None,
    guidance_scale: float = 4.0,
    guidance_rescale: float = 0.0,
    noise_scale: float = 0.0,
    num_videos_per_prompt: int | None = 1,
    generator: Generator | list[Generator] | None = None,
    latents: Tensor | None = None,
    audio_latents: Tensor | None = None,
    prompt_embeds: Tensor | None = None,
    negative_prompt_embeds: Tensor | None = None,
    prompt_attention_mask: Tensor | None = None,
    negative_prompt_attention_mask: Tensor | None = None,
    decode_timestep: float | list[float] = 0.0,
    decode_noise_scale: float | list[float] | None = None,
    output_type: str = "np",
    return_dict: bool = True,
    attention_kwargs: dict[str, Any] | None = None,
    max_sequence_length: int | None = None,
) -> DiffusionOutput

prepare_latents

prepare_latents(
    image: Tensor | None = None,
    batch_size: int = 1,
    num_channels_latents: int = 128,
    height: int = 512,
    width: int = 768,
    num_frames: int = 121,
    noise_scale: float = 0.0,
    dtype: dtype | None = None,
    device: device | None = None,
    generator: Generator | list[Generator] | None = None,
    latents: Tensor | None = None,
) -> tuple[Tensor, Tensor]

LTX2ImageToVideoTwoStagesPipeline

Bases: Module, SupportsComponentDiscovery

LTXImageToVideoTwoStagesPipeline is for two stages image to video generation

device instance-attribute

device = get_local_device()

distilled instance-attribute

distilled = False

dtype instance-attribute

dtype = getattr(od_config, 'dtype', bfloat16)

dummy_run_num_frames class-attribute instance-attribute

dummy_run_num_frames = 2

lora_manager instance-attribute

lora_manager = DiffusionLoRAManager(
    pipeline=pipe,
    device=device,
    dtype=dtype,
    max_cached_adapters=max_cpu_loras,
)

model_path instance-attribute

model_path = model

pipe instance-attribute

pipe = LTX2ImageToVideoPipeline(
    od_config=od_config, prefix=prefix
)

support_image_input class-attribute instance-attribute

support_image_input = True

upsample_pipe instance-attribute

upsample_pipe = LTX2LatentUpsamplePipeline(
    vae=vae, od_config=od_config
)

weights_sources instance-attribute

weights_sources = [
    ComponentSource(
        model_or_path=model,
        subfolder="transformer",
        revision=None,
        prefix="pipe.transformer.",
        fall_back_to_pt=True,
    )
]

forward

forward(
    req: OmniDiffusionRequest,
    image: Image | Tensor | None = None,
    prompt: str | list[str] | None = None,
    negative_prompt: str | list[str] | None = None,
    height: int | None = None,
    width: int | None = None,
    num_frames: int | None = None,
    frame_rate: float | None = None,
    num_inference_steps: int | None = None,
    sigmas: list[float] | None = None,
    timesteps: list[int] | None = None,
    guidance_scale: float = 4.0,
    guidance_rescale: float = 0.0,
    noise_scale: float = 0.0,
    num_videos_per_prompt: int | None = 1,
    generator: Generator | list[Generator] | None = None,
    latents: Tensor | None = None,
    audio_latents: Tensor | None = None,
    prompt_embeds: Tensor | None = None,
    negative_prompt_embeds: Tensor | None = None,
    prompt_attention_mask: Tensor | None = None,
    negative_prompt_attention_mask: Tensor | None = None,
    decode_timestep: float | list[float] = 0.0,
    decode_noise_scale: float | list[float] | None = None,
    output_type: str = "np",
    return_dict: bool = True,
    attention_kwargs: dict[str, Any] | None = None,
    max_sequence_length: int | None = None,
)

load_weights

load_weights(
    weights: Iterable[tuple[str, Tensor]],
) -> set[str]

get_ltx2_post_process_func

get_ltx2_post_process_func(od_config: OmniDiffusionConfig)