Skip to content

vllm_omni.diffusion.models.internvla_a1.config

ACTION module-attribute

ACTION = 'action'

DEFAULT_COSMOS_DIR module-attribute

DEFAULT_COSMOS_DIR = Path(
    getenv(
        "INTERNVLA_A1_COSMOS_DIR",
        str(
            Path(HF_HOME) / "hub" / "Cosmos-Tokenizer-CI8x8"
        ),
    )
)

DEFAULT_COSMOS_REPO module-attribute

DEFAULT_COSMOS_REPO = 'nvidia/Cosmos-Tokenizer-CI8x8'

DEFAULT_QWEN3_VL_MODEL module-attribute

DEFAULT_QWEN3_VL_MODEL = getenv(
    "INTERNVLA_A1_PROCESSOR_DIR",
    "Qwen/Qwen3-VL-2B-Instruct",
)

OBS_IMAGE module-attribute

OBS_IMAGE = OBS_STR + '.image'

OBS_IMAGES module-attribute

OBS_IMAGES = OBS_IMAGE + 's'

OBS_PREFIX module-attribute

OBS_PREFIX = OBS_STR + '.'

OBS_STATE module-attribute

OBS_STATE = OBS_STR + '.state'

OBS_STR module-attribute

OBS_STR = 'observation'

OBS_TASK module-attribute

OBS_TASK = OBS_STR + '.task'

OPENPI_ATTENTION_MASK_VALUE module-attribute

OPENPI_ATTENTION_MASK_VALUE = -2.3819763e+38

InternVLAA1Config dataclass

Standalone-compatible InternVLA-A1 config with a few fake-smoke defaults.

action_expert_variant class-attribute instance-attribute

action_expert_variant: str = 'qwen3_28l'

attn_implementation class-attribute instance-attribute

attn_implementation: str = 'eager'

chunk_size class-attribute instance-attribute

chunk_size: int = 50

compile_mode class-attribute instance-attribute

compile_mode: str = 'max-autotune'

compile_model class-attribute instance-attribute

compile_model: bool = False

device class-attribute instance-attribute

device: str = 'cuda'

dtype class-attribute instance-attribute

dtype: str = 'bfloat16'

empty_cameras class-attribute instance-attribute

empty_cameras: int = 0

enable_regional_compile class-attribute instance-attribute

enable_regional_compile: bool = False

enable_suffix_static_context_optimization class-attribute instance-attribute

enable_suffix_static_context_optimization: bool = False

freeze_vision_encoder class-attribute instance-attribute

freeze_vision_encoder: bool = False

gradient_checkpointing class-attribute instance-attribute

gradient_checkpointing: bool = False

hidden_size class-attribute instance-attribute

hidden_size: int = 128

image_history class-attribute instance-attribute

image_history: int = 2

image_resolution class-attribute instance-attribute

image_resolution: tuple[int, int] = (224, 224)

input_features class-attribute instance-attribute

input_features: dict[str, Any] = field(default_factory=dict)

intermediate_size class-attribute instance-attribute

intermediate_size: int = 256

lambda_gen class-attribute instance-attribute

lambda_gen: float = 0.01

max_action_dim class-attribute instance-attribute

max_action_dim: int = 32

max_period class-attribute instance-attribute

max_period: float = 4.0

max_state_dim class-attribute instance-attribute

max_state_dim: int = 32

min_period class-attribute instance-attribute

min_period: float = 0.004

n_action_steps class-attribute instance-attribute

n_action_steps: int = 50

num_attention_heads class-attribute instance-attribute

num_attention_heads: int = 4

num_cameras class-attribute instance-attribute

num_cameras: int = 3

num_hidden_layers class-attribute instance-attribute

num_hidden_layers: int = 2

num_inference_steps class-attribute instance-attribute

num_inference_steps: int = 10

output_features class-attribute instance-attribute

output_features: dict[str, Any] = field(
    default_factory=dict
)

pixel_feature_dim class-attribute instance-attribute

pixel_feature_dim: int = 48

qwen3_vl_variant class-attribute instance-attribute

qwen3_vl_variant: str = 'qwen3_vl_28l'

regional_compile_dynamic class-attribute instance-attribute

regional_compile_dynamic: bool = True

scale_factor class-attribute instance-attribute

scale_factor: int = 8

time_sampling_beta_alpha class-attribute instance-attribute

time_sampling_beta_alpha: float = 1.5

time_sampling_beta_beta class-attribute instance-attribute

time_sampling_beta_beta: float = 1.0

time_sampling_offset class-attribute instance-attribute

time_sampling_offset: float = 0.001

time_sampling_scale class-attribute instance-attribute

time_sampling_scale: float = 0.999

tokenizer_max_length class-attribute instance-attribute

tokenizer_max_length: int = 48

train_expert_only class-attribute instance-attribute

train_expert_only: bool = False

train_vlm_only class-attribute instance-attribute

train_vlm_only: bool = False

type class-attribute instance-attribute

type: str = 'internvla_a1'

vocab_size class-attribute instance-attribute

vocab_size: int = 256

from_model_config classmethod

from_model_config(
    model_config: dict[str, Any] | None,
) -> InternVLAA1Config

from_pretrained classmethod

from_pretrained(
    checkpoint_dir: str | Path,
) -> InternVLAA1Config

InternVLAA1TrainMetadata dataclass

action_mode class-attribute instance-attribute

action_mode: str = 'delta'

processor_model_name class-attribute instance-attribute

processor_model_name: str = DEFAULT_QWEN3_VL_MODEL

from_pretrained classmethod

from_pretrained(
    checkpoint_dir: str | Path,
) -> InternVLAA1TrainMetadata