vllm_omni.model_executor.models.moss_tts.pipeline ¶
Pipeline topology for all MOSS-TTS variants (2-stage: talker → codec).
MOSS_TTS_PIPELINE module-attribute ¶
MOSS_TTS_PIPELINE = PipelineConfig(
model_type="moss_tts",
model_arch="MossTTSDelayModel",
stages=(
StagePipelineConfig(
stage_id=0,
model_stage="moss_tts",
execution_type=LLM_AR,
input_sources=(),
owns_tokenizer=True,
engine_output_type="latent",
async_chunk_process_next_stage_input_func=f"{_PROC}.talker2codec_async_chunk",
sampling_constraints={"detokenize": False},
),
StagePipelineConfig(
stage_id=1,
model_stage="moss_tts_codec",
execution_type=LLM_GENERATION,
input_sources=(0,),
final_output=True,
final_output_type="audio",
engine_output_type="audio",
model_arch="MossTTSCodecDecoder",
sync_process_input_func=f"{_PROC}.talker2codec",
sampling_constraints={"detokenize": True},
),
),
)
MOSS_TTS_REALTIME_PIPELINE module-attribute ¶
MOSS_TTS_REALTIME_PIPELINE = PipelineConfig(
model_type="moss_tts_realtime",
model_arch="MossTTSRealtime",
stages=(
StagePipelineConfig(
stage_id=0,
model_stage="moss_tts",
execution_type=LLM_AR,
input_sources=(),
owns_tokenizer=True,
engine_output_type="latent",
async_chunk_process_next_stage_input_func=f"{_PROC}.talker2codec_async_chunk",
sampling_constraints={"detokenize": False},
),
StagePipelineConfig(
stage_id=1,
model_stage="moss_tts_codec",
execution_type=LLM_GENERATION,
input_sources=(0,),
final_output=True,
final_output_type="audio",
engine_output_type="audio",
model_arch="MossTTSCodecDecoder",
sync_process_input_func=f"{_PROC}.talker2codec",
sampling_constraints={"detokenize": True},
),
),
)