Skip to content

vllm_omni.diffusion.models.sd3.pipeline_sd3

logger module-attribute

logger = getLogger(__name__)

StableDiffusion3Pipeline

Bases: Module, CFGParallelMixin, DiffusionPipelineProfilerMixin

current_timestep property

current_timestep

default_sample_size instance-attribute

default_sample_size = 128

device instance-attribute

device = get_local_device()

guidance_scale property

guidance_scale

image_processor instance-attribute

image_processor = VaeImageProcessor(
    vae_scale_factor=vae_scale_factor
)

interrupt property

interrupt

num_timesteps property

num_timesteps

od_config instance-attribute

od_config = od_config

output_type instance-attribute

output_type = output_type

patch_size instance-attribute

patch_size = 2

scheduler instance-attribute

scheduler = from_pretrained(
    model,
    subfolder="scheduler",
    local_files_only=local_files_only,
)

text_encoder instance-attribute

text_encoder = from_pretrained_with_prefetch(
    from_pretrained,
    model,
    subfolder="text_encoder",
    prefetch_list=sd3_subfolders,
    local_files_only=local_files_only,
    torch_dtype=dtype,
)

text_encoder_2 instance-attribute

text_encoder_2 = from_pretrained_with_prefetch(
    from_pretrained,
    model,
    subfolder="text_encoder_2",
    prefetch_list=sd3_subfolders,
    local_files_only=local_files_only,
    torch_dtype=dtype,
)

text_encoder_3 instance-attribute

text_encoder_3 = from_pretrained_with_prefetch(
    from_pretrained,
    model,
    subfolder="text_encoder_3",
    prefetch_list=sd3_subfolders,
    local_files_only=local_files_only,
    torch_dtype=dtype,
)

tokenizer instance-attribute

tokenizer = from_pretrained(
    model,
    subfolder="tokenizer",
    local_files_only=local_files_only,
)

tokenizer_2 instance-attribute

tokenizer_2 = from_pretrained(
    model,
    subfolder="tokenizer_2",
    local_files_only=local_files_only,
)

tokenizer_3 instance-attribute

tokenizer_3 = from_pretrained(
    model,
    subfolder="tokenizer_3",
    local_files_only=local_files_only,
)

tokenizer_max_length instance-attribute

tokenizer_max_length = (
    model_max_length
    if hasattr(self, "tokenizer") and tokenizer is not None
    else 77
)

transformer instance-attribute

transformer = SD3Transformer2DModel(od_config=od_config)

vae instance-attribute

vae = to(device)

vae_scale_factor instance-attribute

vae_scale_factor = (
    2 ** (len(block_out_channels) - 1)
    if getattr(self, "vae", None)
    else 8
)

weights_sources instance-attribute

weights_sources = [
    ComponentSource(
        model_or_path=model,
        subfolder="transformer",
        revision=None,
        prefix="transformer.",
        fall_back_to_pt=True,
    )
]

check_inputs

check_inputs(
    prompt,
    prompt_2,
    prompt_3,
    height,
    width,
    negative_prompt=None,
    negative_prompt_2=None,
    negative_prompt_3=None,
    prompt_embeds=None,
    negative_prompt_embeds=None,
    max_sequence_length=None,
)

diffuse

diffuse(
    latents: Tensor,
    timesteps: Tensor,
    prompt_embeds: Tensor,
    pooled_prompt_embeds: Tensor | None,
    negative_prompt_embeds: Tensor | None,
    negative_pooled_prompt_embeds: Tensor | None,
    do_true_cfg: bool,
    guidance_scale: float,
    cfg_normalize: bool = False,
) -> Tensor

Diffusion loop with optional classifier-free guidance.

Parameters:

Name Type Description Default
latents Tensor

Noise latents to denoise

required
timesteps Tensor

Diffusion timesteps

required
prompt_embeds Tensor

Positive prompt embeddings

required
pooled_prompt_embeds Tensor | None

Pooled positive prompt embeddings

required
negative_prompt_embeds Tensor | None

Negative prompt embeddings

required
negative_pooled_prompt_embeds Tensor | None

Pooled negative prompt embeddings

required
do_true_cfg bool

Whether to apply CFG

required
guidance_scale float

CFG scale factor

required
cfg_normalize bool

Whether to normalize CFG output (default: False)

False

Returns:

Type Description
Tensor

Denoised latents

encode_prompt

encode_prompt(
    prompt: str | list[str],
    prompt_2: str | list[str],
    prompt_3: str | list[str],
    prompt_embeds: Tensor | None = None,
    max_sequence_length: int = 256,
    num_images_per_prompt: int = 1,
)

Parameters:

Name Type Description Default
prompt `str` or `List[str]`, *optional*

prompt to be encoded

required
prompt_2 `str` or `List[str]`, *optional*

The prompt or prompts to be sent to the tokenizer_2 and text_encoder_2. If not defined, prompt is used in all text-encoders

required
prompt_3 `str` or `List[str]`, *optional*

The prompt or prompts to be sent to the tokenizer_3 and text_encoder_3. If not defined, prompt is used in all text-encoders

required
num_images_per_prompt `int`

number of images that should be generated per prompt

1
prompt_embeds `torch.FloatTensor`, *optional*

Pre-generated text embeddings. Can be used to easily tweak text inputs, e.g. prompt weighting. If not provided, text embeddings will be generated from prompt input argument.

None

forward

forward(
    req: OmniDiffusionRequest,
    prompt: str | list[str] = "",
    prompt_2: str | list[str] = "",
    prompt_3: str | list[str] = "",
    negative_prompt: str | list[str] = "",
    negative_prompt_2: str | list[str] = "",
    negative_prompt_3: str | list[str] = "",
    height: int | None = None,
    width: int | None = None,
    num_inference_steps: int = 28,
    sigmas: list[float] | None = None,
    num_images_per_prompt: int = 1,
    generator: Generator | list[Generator] | None = None,
    latents: Tensor | None = None,
    prompt_embeds: Tensor | None = None,
    negative_prompt_embeds: Tensor | None = None,
    pooled_prompt_embeds: Tensor | None = None,
    negative_pooled_prompt_embeds: Tensor | None = None,
    max_sequence_length: int = 256,
) -> DiffusionOutput

load_weights

load_weights(
    weights: Iterable[tuple[str, Tensor]],
) -> set[str]

prepare_latents

prepare_latents(
    batch_size,
    num_channels_latents,
    height,
    width,
    generator,
    latents=None,
) -> Tensor

prepare_timesteps

prepare_timesteps(
    num_inference_steps, sigmas, image_seq_len
)

calculate_shift

calculate_shift(
    image_seq_len,
    base_seq_len: int = 256,
    max_seq_len: int = 4096,
    base_shift: float = 0.5,
    max_shift: float = 1.15,
)

get_sd3_image_post_process_func

get_sd3_image_post_process_func(
    od_config: OmniDiffusionConfig,
)

retrieve_timesteps

retrieve_timesteps(
    scheduler,
    num_inference_steps: int | None = None,
    device: str | device | None = None,
    timesteps: list[int] | None = None,
    sigmas: list[float] | None = None,
    **kwargs,
) -> tuple[Tensor, int]

Calls the scheduler's set_timesteps method and retrieves timesteps from the scheduler after the call. Handles custom timesteps. Any kwargs will be supplied to scheduler.set_timesteps.

Parameters:

Name Type Description Default
scheduler `SchedulerMixin`

The scheduler to get timesteps from.

required
num_inference_steps `int`

The number of diffusion steps used when generating samples with a pre-trained model. If used, timesteps must be None.

None
device `str` or `torch.device`, *optional*

The device to which the timesteps should be moved to. If None, the timesteps are not moved.

None
timesteps `List[int]`, *optional*

Custom timesteps used to override the timestep spacing strategy of the scheduler. If timesteps is passed, num_inference_steps and sigmas must be None.

None
sigmas `List[float]`, *optional*

Custom sigmas used to override the timestep spacing strategy of the scheduler. If sigmas is passed, num_inference_steps and timesteps must be None.

None

Returns:

Type Description
Tensor

Tuple[torch.Tensor, int]: A tuple where the first element is the timestep schedule from the scheduler and the

int

second element is the number of inference steps.