vllm_omni.model_executor.models.ming_tts.pipeline ¶
Ming TTS pipeline: Stage-0 LLM+flow -> Stage-1 audio VAE.
MING_TTS_PIPELINE module-attribute ¶
MING_TTS_PIPELINE = PipelineConfig(
model_type="ming_tts",
model_arch="MingTTSForConditionalGeneration",
hf_architectures=("MingTTSForConditionalGeneration",),
stages=(
StagePipelineConfig(
stage_id=0,
model_stage="llm",
execution_type=LLM_AR,
input_sources=(),
owns_tokenizer=True,
hf_config_name="llm_config",
engine_output_type="latent",
async_chunk_process_next_stage_input_func=f"{_PROC}.llm2audio_vae_async_chunk",
sampling_constraints={
"temperature": 0.0,
"top_p": 1.0,
"top_k": -1,
"max_tokens": 512,
"detokenize": True,
},
),
StagePipelineConfig(
stage_id=1,
model_stage="audio_vae",
execution_type=LLM_GENERATION,
input_sources=(0,),
final_output=True,
final_output_type="audio",
hf_config_name="llm_config",
engine_output_type="audio",
sync_process_input_func=f"{_PROC}.llm2audio_vae",
sampling_constraints={
"temperature": 0.0,
"top_p": 1.0,
"top_k": -1,
"max_tokens": 1,
"detokenize": False,
},
),
),
)