vllm_omni.diffusion.models.internvla_a1.model_internvla_a1 ¶
InternVLAA1 ¶
Bases: Module
cosmos instance-attribute ¶
cosmos = ImageTokenizer(
checkpoint_enc=str(cosmos_encoder_path),
checkpoint_dec=str(cosmos_decoder_path),
device=device,
)
cosmos_in_proj instance-attribute ¶
downsample_conv instance-attribute ¶
qwen3_vl_with_expert instance-attribute ¶
qwen3_vl_with_expert = Qwen3VLWithExpertModel(
vlm_config, action_expert_config, precision=dtype
)
upsample_conv instance-attribute ¶
denoise_step ¶
denoise_step(
state: Tensor,
prefix_pad_masks: Tensor,
past_key_values: Any,
max_prefix_position_ids: Tensor,
x_t: Tensor,
timestep: Tensor,
) -> Tensor
denoise_step_optimized ¶
denoise_step_optimized(
suffix_static: SuffixStaticContext,
past_key_values: Any,
x_t: Tensor,
timestep: Tensor,
) -> Tensor
embed_prefix ¶
embed_prefix(
pixel_values: Tensor,
image_grid_thw: Tensor,
lang_tokens: Tensor,
lang_masks: Tensor,
) -> tuple[Tensor, Tensor, Tensor]
embed_suffix ¶
embed_suffix(
state: Tensor, noisy_actions: Tensor, timestep: Tensor
) -> tuple[Tensor, Tensor, Tensor]
get_position_ids ¶
get_position_ids(
lang_tokens: Tensor,
image_grid_thw: Tensor | None,
pad_masks: Tensor,
) -> tuple[Tensor, Any]
prepare_suffix_static_context ¶
prepare_suffix_static_context(
state: Tensor,
prefix_pad_masks: Tensor,
max_prefix_position_ids: Tensor,
) -> SuffixStaticContext
InternVLAA1Policy ¶
Bases: Module
input_builder instance-attribute ¶
input_builder = Qwen3VLInputBuilder(
processor_model_name=processor_model_name,
max_length=tokenizer_max_length,
)
model instance-attribute ¶
model = InternVLAA1(
config,
cosmos_encoder_path=cosmos_encoder_path,
cosmos_decoder_path=cosmos_decoder_path,
)
forward ¶
forward(
batch: dict[str, Any],
*,
noise: Tensor | None = None,
decode_image: bool = False,
) -> tuple[Tensor, Tensor | None]
from_pretrained classmethod ¶
from_pretrained(
checkpoint_dir: str | Path,
*,
config: InternVLAA1Config | None = None,
processor_model_name: str = DEFAULT_QWEN3_VL_MODEL,
strict: bool = False,
) -> InternVLAA1Policy
Qwen3VLInputBuilder ¶
Qwen3VLWithExpertModel ¶
Bases: Module
QwenConfig ¶
SuffixStaticContext dataclass ¶
compute_layer_complete ¶
compute_layer_complete(
layer_idx: int,
inputs_embeds: list[Tensor],
attention_mask: Tensor,
position_ids: Tensor,
und_expert: Module,
gen_expert: Module,
act_expert: Module,
) -> list[Tensor]