Skip to content

vllm_omni.metrics.definitions

Single source of truth for vLLM-Omni Prometheus + bench CLI metric naming.

Consumed by: - vllm_omni.metrics.prometheus (server-side /metrics pipeline families) - vllm_omni.metrics.modality (audio families) - vllm_omni.metrics.transfer (cross-stage transfer families) - vllm_omni.benchmarks.metrics.metrics (bench CLI MultiModalsBenchmarkMetrics)

Naming conventions for the vllm_omni:* families exposed here: time-bearing metrics use the _s suffix (values in seconds), counters use _total (auto-suffixed by the prometheus client), sizes use _bytes.

AUDIO_CONTINUITY_DEFAULT_THRESHOLD_S module-attribute

AUDIO_CONTINUITY_DEFAULT_THRESHOLD_S = 0.1

AUDIO_CONTINUITY_LABELS module-attribute

AUDIO_CONTINUITY_LABELS = (
    "model_name",
    "stage",
    "replica",
    "threshold_ms",
)

AUDIO_CONTINUITY_OK module-attribute

AUDIO_CONTINUITY_OK = 'audio_continuity_ok'

AUDIO_CONTINUITY_OK_METRIC module-attribute

AUDIO_CONTINUITY_OK_METRIC = (
    METRIC_PREFIX + AUDIO_CONTINUITY_OK
)

AUDIO_CONTINUITY_OK_RATE module-attribute

AUDIO_CONTINUITY_OK_RATE = f'{AUDIO_CONTINUITY_OK}_rate'

AUDIO_DURATION module-attribute

AUDIO_DURATION = 'audio_duration'

AUDIO_DURATIONS module-attribute

AUDIO_DURATIONS = 'audio_durations'

AUDIO_DURATION_S module-attribute

AUDIO_DURATION_S = METRIC_PREFIX + AUDIO_DURATION + '_s'

AUDIO_FRAMES module-attribute

AUDIO_FRAMES = 'audio_frames'

AUDIO_FRAMES_METRIC module-attribute

AUDIO_FRAMES_METRIC = METRIC_PREFIX + AUDIO_FRAMES

AUDIO_RTF module-attribute

AUDIO_RTF = 'audio_rtf'

AUDIO_RTFS module-attribute

AUDIO_RTFS = 'audio_rtfs'

AUDIO_RTF_METRIC module-attribute

AUDIO_RTF_METRIC = METRIC_PREFIX + AUDIO_RTF

AUDIO_SAMPLE_RATE module-attribute

AUDIO_SAMPLE_RATE = 'audio_sample_rate'

AUDIO_SKIPPED_LABELS module-attribute

AUDIO_SKIPPED_LABELS = (
    "model_name",
    "stage",
    "replica",
    "reason",
)

AUDIO_SKIPPED_REQUESTS module-attribute

AUDIO_SKIPPED_REQUESTS = 'audio_skipped_requests'

AUDIO_SKIPPED_REQUESTS_METRIC module-attribute

AUDIO_SKIPPED_REQUESTS_METRIC = (
    METRIC_PREFIX + AUDIO_SKIPPED_REQUESTS
)

AUDIO_THROUGHPUT module-attribute

AUDIO_THROUGHPUT = 'audio_throughput'

AUDIO_TTFP module-attribute

AUDIO_TTFP = 'audio_ttfp'

AUDIO_TTFPS module-attribute

AUDIO_TTFPS = 'audio_ttfps'

AUDIO_TTFP_S module-attribute

AUDIO_TTFP_S = METRIC_PREFIX + AUDIO_TTFP + '_s'

AUDIO_UNDERRUN module-attribute

AUDIO_UNDERRUN = 'audio_underrun'

AUDIO_UNDERRUN_S module-attribute

AUDIO_UNDERRUN_S = METRIC_PREFIX + AUDIO_UNDERRUN + '_s'

AVERAGE_PIXELS_PER_IMAGE module-attribute

AVERAGE_PIXELS_PER_IMAGE = 'average_pixels_per_image'

BYTES_BUCKETS module-attribute

BYTES_BUCKETS = (
    1024,
    4096,
    16384,
    65536,
    262144,
    1048576,
    4194304,
    16777216,
    67108864,
    268435456,
)

DEFAULT_AUDIO_SAMPLE_RATE module-attribute

DEFAULT_AUDIO_SAMPLE_RATE = 24000

DENOISE_STEP_LATENCY module-attribute

DENOISE_STEP_LATENCY = 'denoise_step_latency'

DENOISE_STEP_LATENCY_MS module-attribute

DENOISE_STEP_LATENCY_MS = f'{DENOISE_STEP_LATENCY}_ms'

E2E_REQUEST_LATENCY_S module-attribute

E2E_REQUEST_LATENCY_S = (
    METRIC_PREFIX + "e2e_request_latency_s"
)

IMAGE_COUNT module-attribute

IMAGE_COUNT = 'image_count'

IMAGE_GENERATION module-attribute

IMAGE_GENERATION = 'image_generation'

IMAGE_GENERATION_TIME_MS module-attribute

IMAGE_GENERATION_TIME_MS = f'{IMAGE_GENERATION}_time_ms'

IMAGE_PIXELS module-attribute

IMAGE_PIXELS = 'image_pixels'

IMAGE_THROUGHPUT module-attribute

IMAGE_THROUGHPUT = 'image_throughput'

INTER_OUTPUT_LATENCIES_MS module-attribute

INTER_OUTPUT_LATENCIES_MS = 'inter_output_latencies_ms'

INTER_OUTPUT_LATENCY_MS module-attribute

INTER_OUTPUT_LATENCY_MS = 'inter_output_latency_ms'

ITLS module-attribute

ITLS = 'itls'

MEAN_AUDIO_DURATION_S module-attribute

MEAN_AUDIO_DURATION_S = f'mean_{AUDIO_DURATION}_s'

MEAN_AUDIO_RTF module-attribute

MEAN_AUDIO_RTF = f'mean_{AUDIO_RTF}'

MEAN_AUDIO_TTFP_MS module-attribute

MEAN_AUDIO_TTFP_MS = f'mean_{AUDIO_TTFP}_ms'

MEAN_AUDIO_UNDERRUN_S module-attribute

MEAN_AUDIO_UNDERRUN_S = f'mean_{AUDIO_UNDERRUN}_s'

MEAN_DENOISE_STEP_LATENCY_MS module-attribute

MEAN_DENOISE_STEP_LATENCY_MS = (
    f"mean_{DENOISE_STEP_LATENCY}_ms"
)

MEAN_IMAGE_GENERATION_MS module-attribute

MEAN_IMAGE_GENERATION_MS = f'mean_{IMAGE_GENERATION}_ms'

MEDIAN_AUDIO_DURATION_S module-attribute

MEDIAN_AUDIO_DURATION_S = f'median_{AUDIO_DURATION}_s'

MEDIAN_AUDIO_RTF module-attribute

MEDIAN_AUDIO_RTF = f'median_{AUDIO_RTF}'

MEDIAN_AUDIO_TTFP_MS module-attribute

MEDIAN_AUDIO_TTFP_MS = f'median_{AUDIO_TTFP}_ms'

MEDIAN_AUDIO_UNDERRUN_S module-attribute

MEDIAN_AUDIO_UNDERRUN_S = f'median_{AUDIO_UNDERRUN}_s'

MEDIAN_IMAGE_GENERATION_MS module-attribute

MEDIAN_IMAGE_GENERATION_MS = f"median_{IMAGE_GENERATION}_ms"

METRIC_PREFIX module-attribute

METRIC_PREFIX = 'vllm:omni_'

MISSING_AUDIO_DURATION_COUNT module-attribute

MISSING_AUDIO_DURATION_COUNT = (
    "missing_audio_duration_count"
)

MISSING_AUDIO_RTF_COUNT module-attribute

MISSING_AUDIO_RTF_COUNT = 'missing_audio_rtf_count'

NUM_REQUESTS_RUNNING module-attribute

NUM_REQUESTS_RUNNING = (
    METRIC_PREFIX + "num_requests_running"
)

NUM_REQUESTS_WAITING module-attribute

NUM_REQUESTS_WAITING = (
    METRIC_PREFIX + "num_requests_waiting"
)

NUM_TOKENS_IN module-attribute

NUM_TOKENS_IN = 'num_tokens_in'

NUM_TOKENS_OUT module-attribute

NUM_TOKENS_OUT = 'num_tokens_out'

OUTPUT_UNIT_COUNT module-attribute

OUTPUT_UNIT_COUNT = 'output_unit_count'

PERCENTILES_AUDIO_DURATION_S module-attribute

PERCENTILES_AUDIO_DURATION_S = (
    f"percentiles_{AUDIO_DURATION}_s"
)

PERCENTILES_AUDIO_RTF module-attribute

PERCENTILES_AUDIO_RTF = f'percentiles_{AUDIO_RTF}'

PERCENTILES_AUDIO_TTFP_MS module-attribute

PERCENTILES_AUDIO_TTFP_MS = f'percentiles_{AUDIO_TTFP}_ms'

PERCENTILES_AUDIO_UNDERRUN_S module-attribute

PERCENTILES_AUDIO_UNDERRUN_S = (
    f"percentiles_{AUDIO_UNDERRUN}_s"
)

PERCENTILES_IMAGE_GENERATION_MS module-attribute

PERCENTILES_IMAGE_GENERATION_MS = (
    f"percentiles_{IMAGE_GENERATION}_ms"
)

PIPELINE_LABELS module-attribute

PIPELINE_LABELS = ('model_name',)

POSTPROCESS_TIME module-attribute

POSTPROCESS_TIME = 'postprocess_time'

POSTPROCESS_TIMES_MS module-attribute

POSTPROCESS_TIMES_MS = f'{POSTPROCESS_TIME}s_ms'

POSTPROCESS_TIME_MS module-attribute

POSTPROCESS_TIME_MS = f'{POSTPROCESS_TIME}_ms'

REQUESTS_SUCCESS module-attribute

REQUESTS_SUCCESS = METRIC_PREFIX + 'requests_success'

RTF_BUCKETS module-attribute

RTF_BUCKETS = (
    0.1,
    0.25,
    0.5,
    0.75,
    0.9,
    1.0,
    1.25,
    1.5,
    2.0,
    5.0,
    10.0,
)

SECONDS_BUCKETS module-attribute

SECONDS_BUCKETS = (
    0.05,
    0.1,
    0.25,
    0.5,
    1.0,
    2.0,
    5.0,
    10.0,
    20.0,
    30.0,
    60.0,
    120.0,
    300.0,
)

SECONDS_FAST_BUCKETS module-attribute

SECONDS_FAST_BUCKETS = (
    0.001,
    0.005,
    0.01,
    0.025,
    0.05,
    0.1,
    0.25,
    0.5,
    1.0,
    2.5,
    5.0,
    10.0,
    60.0,
)

SERVING_TIME_TO_FIRST_OUTPUTS_MS module-attribute

SERVING_TIME_TO_FIRST_OUTPUTS_MS = (
    "serving_time_to_first_outputs_ms"
)

SERVING_TIME_TO_FIRST_OUTPUT_MS module-attribute

SERVING_TIME_TO_FIRST_OUTPUT_MS = (
    "serving_time_to_first_output_ms"
)

STAGE_GEN_TIME module-attribute

STAGE_GEN_TIME = 'stage_gen_time'

STAGE_GEN_TIMES_MS module-attribute

STAGE_GEN_TIMES_MS = f'{STAGE_GEN_TIME}s_ms'

STAGE_GEN_TIME_MS module-attribute

STAGE_GEN_TIME_MS = f'{STAGE_GEN_TIME}_ms'

STAGE_LABELS module-attribute

STAGE_LABELS = ('model_name', 'stage', 'replica')

STD_AUDIO_DURATION_S module-attribute

STD_AUDIO_DURATION_S = f'std_{AUDIO_DURATION}_s'

STD_AUDIO_RTF module-attribute

STD_AUDIO_RTF = f'std_{AUDIO_RTF}'

STD_AUDIO_TTFP_MS module-attribute

STD_AUDIO_TTFP_MS = f'std_{AUDIO_TTFP}_ms'

STD_AUDIO_UNDERRUN_S module-attribute

STD_AUDIO_UNDERRUN_S = f'std_{AUDIO_UNDERRUN}_s'

STD_IMAGE_GENERATION_MS module-attribute

STD_IMAGE_GENERATION_MS = f'std_{IMAGE_GENERATION}_ms'

SUCCESS_LABELS module-attribute

SUCCESS_LABELS = ('model_name', 'finished_reason')

TIME_PER_OUTPUT_UNITS_MS module-attribute

TIME_PER_OUTPUT_UNITS_MS = 'time_per_output_units_ms'

TIME_PER_OUTPUT_UNIT_MS module-attribute

TIME_PER_OUTPUT_UNIT_MS = 'time_per_output_unit_ms'

TOTAL_AUDIO_DURATION_S module-attribute

TOTAL_AUDIO_DURATION_S = f'total_{AUDIO_DURATION}_s'

TOTAL_AUDIO_FRAMES module-attribute

TOTAL_AUDIO_FRAMES = f'total_{AUDIO_FRAMES}'

TOTAL_IMAGES module-attribute

TOTAL_IMAGES = 'total_images'

TOTAL_OUTPUT module-attribute

TOTAL_OUTPUT = 'total_output'

TPOTS module-attribute

TPOTS = 'tpots'

TRANSFER_IN_FLIGHT_S module-attribute

TRANSFER_IN_FLIGHT_S = (
    METRIC_PREFIX + "transfer_in_flight_s"
)

TRANSFER_LABELS module-attribute

TRANSFER_LABELS = (
    "model_name",
    "from_stage",
    "from_replica",
    "to_stage",
    "to_replica",
)

TRANSFER_RX_S module-attribute

TRANSFER_RX_S = METRIC_PREFIX + 'transfer_rx_s'

TRANSFER_SIZE_BYTES module-attribute

TRANSFER_SIZE_BYTES = METRIC_PREFIX + 'transfer_size_bytes'

TRANSFER_TX_S module-attribute

TRANSFER_TX_S = METRIC_PREFIX + 'transfer_tx_s'

TTFTS module-attribute

TTFTS = 'ttfts'

VLLM_ITLS module-attribute

VLLM_ITLS = 'vllm_itls'

VLLM_ITLS_MS module-attribute

VLLM_ITLS_MS = 'vllm_itls_ms'

VLLM_ITL_MS module-attribute

VLLM_ITL_MS = 'vllm_itl_ms'

VLLM_TPOTS module-attribute

VLLM_TPOTS = 'vllm_tpots'

VLLM_TPOT_MS module-attribute

VLLM_TPOT_MS = 'vllm_tpot_ms'

VLLM_TTFTS module-attribute

VLLM_TTFTS = 'vllm_ttfts'

VLLM_TTFT_MS module-attribute

VLLM_TTFT_MS = 'vllm_ttft_ms'

compute_audio_rtf

compute_audio_rtf(
    stage_gen_time_s: float, audio_duration_s: float
) -> float

RTF = stage_gen_time / audio_content_duration.

SLO red line < 1 — must generate faster than content plays back to stream. Returns 0.0 when audio_duration_s is non-positive (caller decides whether to observe; we don't want to divide by zero or emit negative samples).

compute_denoise_step_latency

compute_denoise_step_latency(
    stage_gen_time: float, num_inference_steps: int
) -> float

Mean denoise step latency = image stage generation time / step count.

The returned value uses the same time unit as stage_gen_time.

resolve_audio_sample_rate

resolve_audio_sample_rate(
    source: dict[str, Any] | Any | None,
) -> int

Extract audio sample_rate from a dict or config object, with fallbacks.

Tries the same key chain as serving_chat.py's audio response path so /metrics audio_duration_s = audio_frames / sample_rate stays consistent with what the OpenAI streaming endpoint reports back to clients. Also accepts config objects that expose the same values as attributes. Returns DEFAULT_AUDIO_SAMPLE_RATE when no usable value is present.