Skip to content

vllm_omni.diffusion.models.gr00t.modeling

Modules:

Name Description
gr00t_n1d7
modules
processing_gr00t_n1d7

Gr00tN1d7

Bases: PreTrainedModel

Gr00tN1d7: VLA model with Cosmos-Reason2-2B (Qwen3-VL) backbone.

action_head instance-attribute

action_head = Gr00tN1d7ActionHead(config)

all_tied_weights_keys property

all_tied_weights_keys: dict[str, Any]

backbone instance-attribute

backbone = backbone_cls(
    model_name=config.model_name,
    select_layer=config.select_layer,
    backbone_embedding_dim=config.backbone_embedding_dim,
    load_bf16=config.load_bf16,
    transformers_loading_kwargs=transformers_loading_kwargs,
)

collator instance-attribute

collator = Gr00tN1d7DataCollator(
    model_name=config.model_name,
    model_type=config.backbone_model_type,
    transformers_loading_kwargs=transformers_loading_kwargs,
)

config instance-attribute

config = config

config_class class-attribute instance-attribute

config_class = Gr00tN1d7Config

device property

device

dtype property

dtype

supports_gradient_checkpointing class-attribute instance-attribute

supports_gradient_checkpointing = True

forward

forward(inputs: dict) -> BatchFeature

Forward pass through the complete model.

Parameters:

Name Type Description Default
inputs dict

Dictionary containing: - Action inputs (state, action, embodiment_id, etc.)

required

Returns:

Type Description
BatchFeature

BatchFeature containing loss and other outputs

get_action

get_action(
    inputs: dict, options: dict[str, Any] | None = None
) -> BatchFeature

Generate actions using the complete model.

prepare_input

prepare_input(
    inputs: dict,
) -> tuple[BatchFeature, BatchFeature]

Prepare inputs for backbone and action head.

Gr00tN1d7Processor

Bases: ProcessorMixin

apply_sincos_state_encoding instance-attribute

apply_sincos_state_encoding = apply_sincos_state_encoding

clip_outliers instance-attribute

clip_outliers = clip_outliers

collator property

collator

color_jitter_params instance-attribute

color_jitter_params = color_jitter_params

crop_fraction instance-attribute

crop_fraction = crop_fraction

data_collator_class class-attribute instance-attribute

data_collator_class = Gr00tN1d7DataCollator

embodiment_id_mapping instance-attribute

embodiment_id_mapping = (
    embodiment_id_mapping
    or EMBODIMENT_TAG_TO_PROJECTOR_INDEX
)

eval_image_transform instance-attribute

eval_image_transform = _build_eval_image_transform(
    image_target_size, image_crop_size
)

exclude_state instance-attribute

exclude_state = exclude_state

formalize_language instance-attribute

formalize_language = formalize_language

image_crop_size instance-attribute

image_crop_size = image_crop_size

image_target_size instance-attribute

image_target_size = image_target_size

max_action_dim instance-attribute

max_action_dim = max_action_dim

max_action_horizon instance-attribute

max_action_horizon = max_action_horizon

max_state_dim instance-attribute

max_state_dim = max_state_dim

modality_configs instance-attribute

modality_configs = parse_modality_configs(modality_configs)

model_name instance-attribute

model_name = model_name

model_type instance-attribute

model_type = model_type

processor instance-attribute

processor = build_processor(
    model_name, transformers_loading_kwargs
)

random_rotation_angle instance-attribute

random_rotation_angle = random_rotation_angle

shortest_image_edge instance-attribute

shortest_image_edge = shortest_image_edge

state_action_processor instance-attribute

state_action_processor = StateActionProcessor(
    modality_configs=modality_configs,
    statistics=statistics,
    use_percentiles=use_percentiles,
    clip_outliers=clip_outliers,
    apply_sincos_state_encoding=apply_sincos_state_encoding,
    use_relative_action=use_relative_action,
)

statistics instance-attribute

statistics: dict[
    str, dict[str, dict[str, dict[str, list[float]]]]
] = {}

use_mean_std instance-attribute

use_mean_std = use_mean_std

use_percentiles instance-attribute

use_percentiles = use_percentiles

use_relative_action instance-attribute

use_relative_action = use_relative_action

decode_action

decode_action(
    action: ndarray,
    embodiment_tag: EmbodimentTag,
    state: dict[str, ndarray] | None = None,
)

Undo action normalization and convert relative actions to absolute.

from_pretrained classmethod

from_pretrained(
    pretrained_model_name_or_path: str | Path, **kwargs
)

save_pretrained

save_pretrained(save_directory: str | Path) -> list[Path]

set_statistics

set_statistics(
    statistics: dict[
        str, dict[str, dict[str, dict[str, list[float]]]]
    ],
    override: bool = False,
) -> None

Set dataset statistics for normalization.