vllm_omni.diffusion.models.flux2.pipeline_flux2 ¶

logger `module-attribute` ¶

logger = logging.getLogger(__name__)

Flux2ImageProcessor ¶

Bases: VaeImageProcessor

Image processor to preprocess the reference image for Flux2.

check_image_input `staticmethod` ¶

check_image_input(
    image: Image,
    max_aspect_ratio: int = 8,
    min_side_length: int = 64,
    max_area: int = 1024 * 1024,
) -> Image

concatenate_images `staticmethod` ¶

concatenate_images(images: list[Image]) -> Image

Flux2Pipeline ¶

Bases: Module, CFGParallelMixin, SupportImageInput, ProgressBarMixin, DiffusionPipelineProfilerMixin, SupportsComponentDiscovery

Flux2 pipeline for text-to-image generation.

attention_kwargs `property` ¶

attention_kwargs

current_timestep `property` ¶

current_timestep

default_sample_size `instance-attribute` ¶

default_sample_size = 128

guidance_scale `property` ¶

guidance_scale

image_processor `instance-attribute` ¶

image_processor = Flux2ImageProcessor(
    vae_scale_factor=self.vae_scale_factor * 2
)

interrupt `property` ¶

interrupt

num_timesteps `property` ¶

num_timesteps

od_config `instance-attribute` ¶

od_config = od_config

scheduler `instance-attribute` ¶

scheduler = FlowMatchEulerDiscreteScheduler.from_pretrained(
    model,
    subfolder="scheduler",
    local_files_only=local_files_only,
)

support_image_input `class-attribute` `instance-attribute` ¶

support_image_input = True

system_message `instance-attribute` ¶

system_message = SYSTEM_MESSAGE

system_message_upsampling_i2i `instance-attribute` ¶

system_message_upsampling_i2i = (
    SYSTEM_MESSAGE_UPSAMPLING_I2I
)

system_message_upsampling_t2i `instance-attribute` ¶

system_message_upsampling_t2i = (
    SYSTEM_MESSAGE_UPSAMPLING_T2I
)

text_encoder `instance-attribute` ¶

text_encoder = MistralEncoderModel(
    text_encoder_config, prefix="text_encoder"
).to(self._execution_device)

tokenizer `instance-attribute` ¶

tokenizer = PixtralProcessor.from_pretrained(
    model,
    subfolder="tokenizer",
    local_files_only=local_files_only,
)

tokenizer_max_length `instance-attribute` ¶

tokenizer_max_length = 512

transformer `instance-attribute` ¶

transformer = Flux2Transformer2DModel(
    quant_config=od_config.quantization_config,
    od_config=od_config,
    **transformer_kwargs,
)

upsampling_max_image_size `instance-attribute` ¶

upsampling_max_image_size = UPSAMPLING_MAX_IMAGE_SIZE

vae `instance-attribute` ¶

vae = AutoencoderKLFlux2.from_pretrained(
    model,
    subfolder="vae",
    local_files_only=local_files_only,
).to(self._execution_device)

vae_scale_factor `instance-attribute` ¶

vae_scale_factor = (
    2 ** (len(self.vae.config.block_out_channels) - 1)
    if getattr(self, "vae", None)
    else 8
)

weights_sources `instance-attribute` ¶

weights_sources = [
    DiffusersPipelineLoader.ComponentSource(
        model_or_path=od_config.model,
        subfolder="transformer",
        revision=None,
        prefix="transformer.",
        fall_back_to_pt=True,
    )
]

check_cfg_parallel_validity ¶

check_cfg_parallel_validity(
    true_cfg_scale: float, has_neg_prompt: bool
)

check_inputs ¶

check_inputs(
    prompt,
    height,
    width,
    prompt_embeds=None,
    callback_on_step_end_tensor_inputs=None,
)

encode_prompt ¶

encode_prompt(
    prompt: str | list[str],
    device: device | None = None,
    num_images_per_prompt: int = 1,
    prompt_embeds: Tensor | None = None,
    max_sequence_length: int = 512,
    text_encoder_out_layers: tuple[int, ...] = (10, 20, 30),
)

forward ¶

forward(
    req: DiffusionRequestBatch,
    image: Image | list[Image] | None = None,
    prompt: str | list[str] | None = None,
    height: int | None = None,
    width: int | None = None,
    num_inference_steps: int = 50,
    sigmas: list[float] | None = None,
    guidance_scale: float | None = 4.0,
    num_images_per_prompt: int = 1,
    generator: Generator | list[Generator] | None = None,
    latents: Tensor | None = None,
    prompt_embeds: Tensor | None = None,
    negative_prompt_embeds: Tensor | None = None,
    output_type: str | None = "pil",
    return_dict: bool = True,
    attention_kwargs: dict[str, Any] | None = None,
    callback_on_step_end: Callable[[int, int, dict], None]
    | None = None,
    callback_on_step_end_tensor_inputs: list[str] = [
        "latents"
    ],
    max_sequence_length: int = 512,
    text_encoder_out_layers: tuple[int, ...] = (10, 20, 30),
    caption_upsample_temperature: float = None,
) -> DiffusionOutput

load_weights ¶

load_weights(
    weights: Iterable[tuple[str, Tensor]],
) -> set[str]

prepare_image_latents ¶

prepare_image_latents(
    images: list[Tensor],
    batch_size,
    generator: Generator,
    device,
    dtype,
)

prepare_latents ¶

prepare_latents(
    batch_size,
    num_latents_channels,
    height,
    width,
    dtype,
    device,
    generator: Generator,
    latents: Tensor | None = None,
)

upsample_prompt ¶

upsample_prompt(
    prompt: str | list[str],
    images: list[Image] | list[list[Image]] = None,
    temperature: float = 0.15,
    device: device = None,
) -> list[str]

compute_empirical_mu ¶

compute_empirical_mu(
    image_seq_len: int, num_steps: int
) -> float

format_input ¶

format_input(
    prompts: list[str],
    system_message: str = SYSTEM_MESSAGE,
    images: list[Image] | list[list[Image]] = None,
) -> list[list[dict[str, Any]]]

Format a batch of text prompts into the conversation format expected by apply_chat_template. Optionally, add images to the input.

Parameters:

Name	Type	Description	Default
`prompts`	`list[str]`	List of text prompts	required
`system_message`	`str`	System message to use (default: CREATIVE_SYSTEM_MESSAGE)	`SYSTEM_MESSAGE`
`images`	`optional`	List of images to add to the input.	`None`

Returns:

Type	Description
`list[list[dict[str, Any]]]`	`list[list[dict[str, Any]]]`: List of conversations, where each conversation is a list of message dicts

get_flux2_post_process_func ¶

get_flux2_post_process_func(od_config: OmniDiffusionConfig)

retrieve_latents ¶

retrieve_latents(
    encoder_output: Tensor,
    generator: Generator = None,
    sample_mode: str = "sample",
)

retrieve_timesteps ¶

retrieve_timesteps(
    scheduler,
    num_inference_steps: int | None = None,
    device: str | device | None = None,
    timesteps: list[int] | None = None,
    sigmas: list[float] | None = None,
    **kwargs,
) -> tuple[Tensor, int]

Calls the scheduler's set_timesteps method and retrieves timesteps from the scheduler after the call. Handles custom timesteps. Any kwargs will be supplied to scheduler.set_timesteps.

Parameters:

Name	Type	Description	Default
`scheduler`	`SchedulerMixin`	The scheduler to get timesteps from.	required
`num_inference_steps`	`int`	The number of diffusion steps used when generating samples with a pre-trained model. If used, `timesteps` must be `None`.	`None`
`device`	`str` or `torch.device`, optional	The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.	`None`
`timesteps`	`List[int]`, optional	Custom timesteps used to override the timestep spacing strategy of the scheduler. If `timesteps` is passed, `num_inference_steps` and `sigmas` must be `None`.	`None`
`sigmas`	`List[float]`, optional	Custom sigmas used to override the timestep spacing strategy of the scheduler. If `sigmas` is passed, `num_inference_steps` and `timesteps` must be `None`.	`None`

Returns:

Type	Description
`Tensor`	`tuple[torch.Tensor, int]`: A tuple where the first element is the timestep schedule from the scheduler and the
`int`	second element is the number of inference steps.

vllm_omni.diffusion.models.flux2.pipeline_flux2 ¶

logger module-attribute ¶

Flux2ImageProcessor ¶

check_image_input staticmethod ¶

concatenate_images staticmethod ¶

Flux2Pipeline ¶

attention_kwargs property ¶

current_timestep property ¶

default_sample_size instance-attribute ¶

guidance_scale property ¶

image_processor instance-attribute ¶

interrupt property ¶

num_timesteps property ¶

od_config instance-attribute ¶

scheduler instance-attribute ¶

support_image_input class-attribute instance-attribute ¶

system_message instance-attribute ¶

system_message_upsampling_i2i instance-attribute ¶

system_message_upsampling_t2i instance-attribute ¶

text_encoder instance-attribute ¶

tokenizer instance-attribute ¶

tokenizer_max_length instance-attribute ¶

transformer instance-attribute ¶

upsampling_max_image_size instance-attribute ¶

vae instance-attribute ¶

vae_scale_factor instance-attribute ¶

weights_sources instance-attribute ¶