InternVLA-A1¶
Source https://github.com/vllm-project/vllm-omni/tree/main/examples/offline_inference/internvla_a1.
Full usage and result-reporting guidance lives in docs/user_guide/examples/offline_inference/internvla_a1.md.
Quick start:
export INTERNVLA_A1_MODEL_DIR=/path/to/InternVLA-A1-3B-ft-pen
export INTERNVLA_A1_DATASET_DIR=/path/to/Genie1-Place_Markpen
export INTERNVLA_A1_PROCESSOR_DIR=/path/to/Qwen3-VL-2B-Instruct
# hf tenstep/Cosmos-Tokenizer-CI8x8-SafeTensors
export INTERNVLA_A1_COSMOS_DIR=/path/to/Cosmos-Tokenizer-CI8x8-SafeTensor
bash run.sh --num-samples 1 --num-episodes 0
bash run.sh --num-episodes 1
bash collect_results.sh
Expected files under INTERNVLA_A1_COSMOS_DIR:
encoder.safetensorsdecoder.safetensors
Reference Hugging Face repo: tenstep/Cosmos-Tokenizer-CI8x8-SafeTensors
Key entrypoints:
run.sh: wrapper for offline inference and GT evaluationcollect_results.sh: collect sample output, latency, metrics, plots, and logsend2end.py: underlying Python entrypoint
Example materials¶
collect_results.sh
#!/usr/bin/env bash
set -euo pipefail
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(cd "$ROOT_DIR/../../.." && pwd)"
TIMESTAMP="$(date +%Y%m%d_%H%M%S)"
RESULT_ROOT="${INTERNVLA_A1_RESULT_DIR:-$REPO_ROOT/outputs/internvla_a1/collected_results/$TIMESTAMP}"
: "${INTERNVLA_A1_MODEL_DIR:?Please export INTERNVLA_A1_MODEL_DIR=/path/to/InternVLA-A1-3B-ft-pen}"
: "${INTERNVLA_A1_DATASET_DIR:?Please export INTERNVLA_A1_DATASET_DIR=/path/to/Genie1-Place_Markpen}"
: "${INTERNVLA_A1_PROCESSOR_DIR:?Please export INTERNVLA_A1_PROCESSOR_DIR=/path/to/Qwen3-VL-2B-Instruct}"
: "${INTERNVLA_A1_COSMOS_DIR:?Please export INTERNVLA_A1_COSMOS_DIR=/path/to/Cosmos-Tokenizer-CI8x8-SafeTensor}"
EVAL_OUTPUT_DIR="$RESULT_ROOT/eval_outputs"
GPU_MONITOR_INTERVAL="${GPU_MONITOR_INTERVAL:-1}"
mkdir -p "$RESULT_ROOT"
export INTERNVLA_A1_MODEL_DIR
export INTERNVLA_A1_DATASET_DIR
export INTERNVLA_A1_PROCESSOR_DIR
export INTERNVLA_A1_COSMOS_DIR
write_env_summary() {
cat >"$RESULT_ROOT/env_summary.txt" <<EOF
timestamp=$TIMESTAMP
repo_root=$REPO_ROOT
result_root=$RESULT_ROOT
python=$(command -v python || true)
python_version=$(python --version 2>&1 || true)
model_dir=$INTERNVLA_A1_MODEL_DIR
dataset_dir=$INTERNVLA_A1_DATASET_DIR
processor_dir=$INTERNVLA_A1_PROCESSOR_DIR
cosmos_dir=$INTERNVLA_A1_COSMOS_DIR
pwd=$(pwd)
EOF
}
capture_gpu_snapshot() {
local output_file="$1"
if command -v nvidia-smi >/dev/null 2>&1; then
nvidia-smi --query-gpu=index,name,memory.total,memory.used,utilization.gpu --format=csv >"$output_file" 2>&1 || true
else
echo "nvidia-smi not found" >"$output_file"
fi
}
start_gpu_monitor() {
local output_file="$1"
if ! command -v nvidia-smi >/dev/null 2>&1; then
echo "nvidia-smi not found" >"$output_file"
return 1
fi
(
echo "timestamp,index,name,memory.total [MiB],memory.used [MiB],utilization.gpu [%]"
while true; do
local now
now="$(date '+%Y-%m-%d %H:%M:%S')"
nvidia-smi --query-gpu=index,name,memory.total,memory.used,utilization.gpu --format=csv,noheader,nounits \
| awk -F',' -v ts="$now" '{gsub(/^[ \t]+|[ \t]+$/, "", $0); print ts "," $0}'
sleep "$GPU_MONITOR_INTERVAL"
done
) >"$output_file" 2>/dev/null &
echo $!
}
stop_gpu_monitor() {
local monitor_pid="${1:-}"
if [[ -n "$monitor_pid" ]] && kill -0 "$monitor_pid" 2>/dev/null; then
kill "$monitor_pid" 2>/dev/null || true
wait "$monitor_pid" 2>/dev/null || true
fi
}
copy_eval_outputs() {
local source_dir="$1"
local target_dir="$2"
local source_real=""
local target_real=""
source_real="$(realpath "$source_dir" 2>/dev/null || echo "$source_dir")"
target_real="$(realpath "$target_dir" 2>/dev/null || echo "$target_dir")"
if [[ "$source_real" == "$target_real" ]]; then
return 0
fi
mkdir -p "$target_dir"
if [[ -d "$source_dir" ]]; then
cp -r "$source_dir"/. "$target_dir"/
fi
}
run_with_artifacts() {
local name="$1"
shift
local log_file="$RESULT_ROOT/${name}.log"
local time_file="$RESULT_ROOT/${name}_time.txt"
local gpu_file="$RESULT_ROOT/${name}_gpu.csv"
local status_file="$RESULT_ROOT/${name}_status.txt"
local monitor_pid=""
monitor_pid="$(start_gpu_monitor "$gpu_file" || true)"
set +e
local exit_code=0
local start_ts=0
local end_ts=0
start_ts="$(date +%s)"
if [[ -x /usr/bin/time ]]; then
/usr/bin/time -v -o "$time_file" "$@" >"$log_file" 2>&1
exit_code=$?
if [[ ! -s "$time_file" ]]; then
end_ts="$(date +%s)"
{
echo "timing_mode=usr_bin_time_empty_fallback"
echo "start_ts=$start_ts"
echo "end_ts=$end_ts"
echo "elapsed_seconds=$((end_ts - start_ts))"
} >"$time_file"
fi
else
"$@" >"$log_file" 2>&1
exit_code=$?
end_ts="$(date +%s)"
{
echo "timing_mode=shell_date"
echo "start_ts=$start_ts"
echo "end_ts=$end_ts"
echo "elapsed_seconds=$((end_ts - start_ts))"
} >"$time_file"
fi
set -e
stop_gpu_monitor "$monitor_pid"
echo "exit_code=$exit_code" >"$status_file"
if [[ $exit_code -ne 0 ]]; then
echo "[error] command failed for $name, see $log_file"
return $exit_code
fi
}
write_manifest() {
cat >"$RESULT_ROOT/README.txt" <<EOF
InternVLA-A1 collected results
Key files:
- env_summary.txt: environment and path summary
- sample_run.log / sample_run_time.txt: one-sample functional run
- forward_benchmark.log / forward_benchmark_time.txt: pure pipeline.forward latency benchmark
- eval_run.log / eval_run_time.txt: GT evaluation run
- pytest_e2e.log / pytest_e2e_time.txt: offline e2e pytest result
- gpu_info_before.csv / gpu_info_after.csv: point-in-time GPU snapshots
- *_gpu.csv: sampled GPU usage during each run
- eval_outputs/: copied output directory from the GT evaluation run
Important outputs:
- forward_benchmark/forward_latency.json
- eval_outputs/summary.json
- eval_outputs/registry/log.json
- eval_outputs/registry/plots/
EOF
}
write_skip_artifact() {
local name="$1"
local reason="$2"
echo "$reason" >"$RESULT_ROOT/${name}.log"
echo "timing_mode=skipped" >"$RESULT_ROOT/${name}_time.txt"
echo "exit_code=0" >"$RESULT_ROOT/${name}_status.txt"
echo "skipped_reason=$reason" >>"$RESULT_ROOT/${name}_status.txt"
if [[ ! -f "$RESULT_ROOT/${name}_gpu.csv" ]]; then
echo "skipped,$reason" >"$RESULT_ROOT/${name}_gpu.csv"
fi
}
write_env_summary
write_manifest
capture_gpu_snapshot "$RESULT_ROOT/gpu_info_before.csv"
run_with_artifacts \
"sample_run" \
bash "$ROOT_DIR/run.sh" \
--output-dir "$RESULT_ROOT/sample_outputs" \
--num-samples 1 \
--num-episodes 0
run_with_artifacts \
"forward_benchmark" \
python "$ROOT_DIR/end2end.py" \
--model-dir "$INTERNVLA_A1_MODEL_DIR" \
--dataset-dir "$INTERNVLA_A1_DATASET_DIR" \
--benchmark-forward \
--dtype bfloat16 \
--attn-implementation eager \
--warmup-iters 3 \
--benchmark-iters 10 \
--output-dir "$RESULT_ROOT/forward_benchmark"
run_with_artifacts \
"eval_run" \
bash "$ROOT_DIR/run.sh" \
--output-dir "$EVAL_OUTPUT_DIR" \
--num-episodes 1
copy_eval_outputs "$EVAL_OUTPUT_DIR" "$RESULT_ROOT/eval_outputs"
if python - <<'PY' >/dev/null 2>&1
import importlib.util
raise SystemExit(0 if importlib.util.find_spec("pytest") else 1)
PY
then
run_with_artifacts \
"pytest_e2e" \
python -m pytest -sv tests/e2e/offline_inference/test_internvla_a1.py -m advanced_model
else
write_skip_artifact "pytest_e2e" "pytest is not installed in the current python environment"
fi
capture_gpu_snapshot "$RESULT_ROOT/gpu_info_after.csv"
echo "Results written to: $RESULT_ROOT"
end2end.py
Large file omitted from the rendered docs. View it on GitHub: https://github.com/vllm-project/vllm-omni/blob/main/examples/offline_inference/internvla_a1/end2end.py.
internvla_a1_common.py
Large file omitted from the rendered docs. View it on GitHub: https://github.com/vllm-project/vllm-omni/blob/main/examples/offline_inference/internvla_a1/internvla_a1_common.py.
run.sh
#!/usr/bin/env bash
set -euo pipefail
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(cd "$ROOT_DIR/../../.." && pwd)"
: "${INTERNVLA_A1_MODEL_DIR:?Please export INTERNVLA_A1_MODEL_DIR=/path/to/InternVLA-A1-3B-ft-pen}"
: "${INTERNVLA_A1_DATASET_DIR:?Please export INTERNVLA_A1_DATASET_DIR=/path/to/Genie1-Place_Markpen}"
: "${INTERNVLA_A1_PROCESSOR_DIR:?Please export INTERNVLA_A1_PROCESSOR_DIR=/path/to/Qwen3-VL-2B-Instruct}"
: "${INTERNVLA_A1_COSMOS_DIR:?Please export INTERNVLA_A1_COSMOS_DIR=/path/to/Cosmos-Tokenizer-CI8x8-SafeTensor}"
INTERNVLA_A1_OUTPUT_DIR="${INTERNVLA_A1_OUTPUT_DIR:-$REPO_ROOT/outputs/internvla_a1/vllm_infer}"
export INTERNVLA_A1_MODEL_DIR
export INTERNVLA_A1_DATASET_DIR
export INTERNVLA_A1_PROCESSOR_DIR
export INTERNVLA_A1_COSMOS_DIR
python "$ROOT_DIR/end2end.py" \
--model-dir "$INTERNVLA_A1_MODEL_DIR" \
--dataset-dir "$INTERNVLA_A1_DATASET_DIR" \
--output-dir "$INTERNVLA_A1_OUTPUT_DIR" \
--num-episodes "${INTERNVLA_A1_NUM_EPISODES:-1}" \
--attn-implementation eager \
"$@"