vllm_omni.diffusion.models.internvla_a1.adapter_qwen3_vl ¶
Qwen3VLForConditionalGeneration ¶
Bases: Qwen3VLForConditionalGeneration
Qwen3VLModel ¶
Bases: Qwen3VLModel
Qwen3VLTextAttention ¶
Bases: Qwen3VLTextAttention
Qwen3VLTextDecoderLayer ¶
Bases: Qwen3VLTextDecoderLayer
input_layernorm instance-attribute ¶
input_layernorm = Qwen3VLTextRMSNorm(
hidden_size, eps=rms_norm_eps
)
post_attention_layernorm instance-attribute ¶
post_attention_layernorm = Qwen3VLTextRMSNorm(
hidden_size, eps=rms_norm_eps
)
self_attn instance-attribute ¶
self_attn = Qwen3VLTextAttention(
config=config, layer_idx=layer_idx
)
Qwen3VLTextModel ¶
Bases: Qwen3VLTextModel
layers instance-attribute ¶
layers = ModuleList(
[
(Qwen3VLTextDecoderLayer(config, layer_idx))
for layer_idx in (range(num_hidden_layers))
]
)
forward ¶
forward(
input_ids: LongTensor = None,
attention_mask: Tensor | None = None,
position_ids: LongTensor | None = None,
past_key_values: Cache | None = None,
inputs_embeds: FloatTensor | None = None,
use_cache: bool | None = None,
cache_position: LongTensor | None = None,
visual_pos_masks: Tensor | None = None,
deepstack_visual_embeds: list[Tensor] | None = None,
**kwargs: Unpack[FlashAttentionKwargs],
) -> tuple | BaseModelOutputWithPast