Skip to content

vllm_omni.diffusion.models.internvla_a1.adapter_qwen3_vl

Qwen3VLForConditionalGeneration

Bases: Qwen3VLForConditionalGeneration

accepts_loss_kwargs class-attribute instance-attribute

accepts_loss_kwargs = False

config instance-attribute

config: Qwen3VLConfig

lm_head instance-attribute

lm_head = Linear(hidden_size, vocab_size, bias=False)

model instance-attribute

model = Qwen3VLModel(config)

Qwen3VLModel

Bases: Qwen3VLModel

accepts_loss_kwargs class-attribute instance-attribute

accepts_loss_kwargs = False

base_model_prefix class-attribute instance-attribute

base_model_prefix = ''

config instance-attribute

config: Qwen3VLConfig

language_model instance-attribute

language_model = _from_config(text_config)

rope_deltas instance-attribute

rope_deltas = None

visual instance-attribute

visual = _from_config(vision_config)

Qwen3VLTextAttention

Bases: Qwen3VLTextAttention

k_norm instance-attribute

k_norm = Qwen3VLTextRMSNorm(head_dim, eps=rms_norm_eps)

q_norm instance-attribute

q_norm = Qwen3VLTextRMSNorm(head_dim, eps=rms_norm_eps)

forward

forward(
    hidden_states: Tensor,
    position_embeddings: tuple[Tensor, Tensor],
    attention_mask: Tensor | None,
    past_key_values: Cache | None = None,
    cache_position: LongTensor | None = None,
    use_cache: bool = False,
    **kwargs: Unpack[FlashAttentionKwargs],
) -> tuple[Tensor, Tensor | None]

Qwen3VLTextDecoderLayer

Bases: Qwen3VLTextDecoderLayer

input_layernorm instance-attribute

input_layernorm = Qwen3VLTextRMSNorm(
    hidden_size, eps=rms_norm_eps
)

post_attention_layernorm instance-attribute

post_attention_layernorm = Qwen3VLTextRMSNorm(
    hidden_size, eps=rms_norm_eps
)

self_attn instance-attribute

self_attn = Qwen3VLTextAttention(
    config=config, layer_idx=layer_idx
)

Qwen3VLTextModel

Bases: Qwen3VLTextModel

config instance-attribute

config: Qwen3VLTextConfig

embed_tokens instance-attribute

embed_tokens = Embedding(
    vocab_size, hidden_size, padding_idx
)

gradient_checkpointing instance-attribute

gradient_checkpointing = False

layers instance-attribute

layers = ModuleList(
    [
        (Qwen3VLTextDecoderLayer(config, layer_idx))
        for layer_idx in (range(num_hidden_layers))
    ]
)

norm instance-attribute

norm = Qwen3VLTextRMSNorm(hidden_size, eps=rms_norm_eps)

padding_idx instance-attribute

padding_idx = pad_token_id

rotary_emb instance-attribute

rotary_emb = Qwen3VLTextRotaryEmbedding(config=config)

vocab_size instance-attribute

vocab_size = vocab_size

forward

forward(
    input_ids: LongTensor = None,
    attention_mask: Tensor | None = None,
    position_ids: LongTensor | None = None,
    past_key_values: Cache | None = None,
    inputs_embeds: FloatTensor | None = None,
    use_cache: bool | None = None,
    cache_position: LongTensor | None = None,
    visual_pos_masks: Tensor | None = None,
    deepstack_visual_embeds: list[Tensor] | None = None,
    **kwargs: Unpack[FlashAttentionKwargs],
) -> tuple | BaseModelOutputWithPast

Qwen3VLTextRMSNorm

Bases: Qwen3VLTextRMSNorm

forward

forward(hidden_states: Tensor) -> Tensor