vllm_omni.model_executor.models.qwen3_omni.pipeline ¶
Qwen3-Omni-MoE pipeline topology (frozen).
Stage 0: Thinker — multimodal understanding + text generation Stage 1: Talker — text embeddings → RVQ codec codes Stage 2: Code2Wav — RVQ codes → audio waveform
QWEN3_OMNI_PIPELINE module-attribute ¶
QWEN3_OMNI_PIPELINE = PipelineConfig(
model_type="qwen3_omni_moe",
model_arch="Qwen3OmniMoeForConditionalGeneration",
stages=(
StagePipelineConfig(
stage_id=0,
model_stage="thinker",
execution_type=LLM_AR,
input_sources=(),
final_output=True,
final_output_type="text",
owns_tokenizer=True,
requires_multimodal_data=True,
hf_config_name="thinker_config",
engine_output_type="latent",
custom_process_next_stage_input_func=f"{_PROC}.thinker2talker_full_payload",
async_chunk_process_next_stage_input_func=f"{_PROC}.thinker2talker_async_chunk",
sampling_constraints={"detokenize": True},
),
StagePipelineConfig(
stage_id=1,
model_stage="talker",
execution_type=LLM_AR,
input_sources=(0,),
hf_config_name="talker_config",
engine_output_type="latent",
custom_process_input_func=f"{_PROC}.thinker2talker",
sync_process_input_func=f"{_PROC}.thinker2talker_token_only",
custom_process_next_stage_input_func=f"{_PROC}.talker2code2wav_full_payload",
async_chunk_process_next_stage_input_func=f"{_PROC}.talker2code2wav_async_chunk",
sampling_constraints={
"detokenize": False,
"stop_token_ids": [2150],
},
),
StagePipelineConfig(
stage_id=2,
model_stage="code2wav",
execution_type=LLM_GENERATION,
input_sources=(1,),
final_output=True,
final_output_type="audio",
hf_config_name="thinker_config",
engine_output_type="audio",
custom_process_input_func=f"{_PROC}.talker2code2wav",
sampling_constraints={"detokenize": True},
),
),
)