Skip to content

vllm_omni.diffusion.models.bagel.pipeline_bagel

BagelPipeline implementation for vLLM-Omni.

logger module-attribute

logger = init_logger(__name__)

BagelGenParams dataclass

cfg_img_scale class-attribute instance-attribute

cfg_img_scale: float = 1.5

cfg_interval class-attribute instance-attribute

cfg_interval: tuple = (0.4, 1.0)

cfg_renorm_min class-attribute instance-attribute

cfg_renorm_min: float = 0.0

cfg_renorm_type class-attribute instance-attribute

cfg_renorm_type: str = 'global'

cfg_text_scale class-attribute instance-attribute

cfg_text_scale: float = 4.0

num_timesteps class-attribute instance-attribute

num_timesteps: int = 50

timestep_shift class-attribute instance-attribute

timestep_shift: float = 3.0

BagelPipeline

Bases: Module, SupportsComponentDiscovery, DiffusionPipelineProfilerMixin

Bagel generation pipeline (MoT) packaged for vllm-omni diffusion engine.

This pipeline is self-contained and uses the ported Bagel core files.

bagel instance-attribute

bagel = Bagel(
    language_model=language_model,
    vit_model=vit_model,
    parallel_config=parallel_config,
    quant_config=quant_config,
    prefix="bagel",
    config=BagelConfig(
        llm_config=llm_config,
        vae_config=vae_cfg,
        vit_config=vit_cfg,
        vit_max_num_patch_per_side=int(
            get("vit_max_num_patch_per_side", 70)
        ),
        connector_act=str(
            get("connector_act", "gelu_pytorch_tanh")
        ),
        interpolate_pos=bool(get("interpolate_pos", False)),
        latent_patch_size=int(get("latent_patch_size", 2)),
        max_latent_size=int(get("max_latent_size", 32)),
        timestep_shift=float(get("timestep_shift", 1.0)),
    ),
)

device instance-attribute

device = get_local_device()

image_processor instance-attribute

image_processor = from_pretrained(
    model_path, local_files_only=True
)

language_model instance-attribute

language_model = Qwen2MoTForCausalLM(
    llm_config,
    parallel_config=parallel_config,
    quant_config=quant_config,
    prefix="bagel.language_model",
)

od_config instance-attribute

od_config = od_config

scheduler instance-attribute

scheduler: object | None = None

scheduler_kwargs instance-attribute

scheduler_kwargs: dict = {}

tokenizer instance-attribute

tokenizer = from_pretrained(
    model_path,
    local_files_only=True,
    trust_remote_code=True,
)

transformer instance-attribute

transformer = model

vae instance-attribute

vae = DistributedAutoEncoder(ae_params)

vit_model instance-attribute

vit_model = SiglipVisionModel(vit_conf)

weights_sources instance-attribute

weights_sources = [
    ComponentSource(
        model_or_path=model,
        subfolder=None,
        revision=revision,
        prefix="",
        fall_back_to_pt=False,
    )
]

forward

load_weights

load_weights(
    weights: Iterable[tuple[str, Tensor]],
) -> set[str]

SiglipNaViTWrapper

Bases: Module

vision_model instance-attribute

vision_model = vision_model

forward

forward(
    packed_pixel_values,
    packed_flattened_position_ids,
    cu_seqlens,
    max_seqlen,
)

add_special_tokens

add_special_tokens(tokenizer)

default_ae_params

default_ae_params() -> AutoEncoderParams

get_bagel_post_process_func

get_bagel_post_process_func(od_config: OmniDiffusionConfig)