vllm_omni.model_executor.models.covo_audio.token2wav ¶
AttentionConfig module-attribute ¶
AttentionConfig = namedtuple(
"AttentionConfig",
["enable_flash", "enable_math", "enable_mem_efficient"],
)
AMPBlock1 ¶
Bases: Module
activations instance-attribute ¶
activations = ModuleList(
[
(
AliasFreeActivation1d(
activation=SnakeBeta(
channels, alpha_logscale=snake_logscale
)
)
)
for _ in (range(num_layers))
]
)
convs1 instance-attribute ¶
convs1 = ModuleList(
[
weight_norm(
Conv1d(
channels,
channels,
kernel_size,
1,
dilation=dilation[0],
causal=causal,
)
),
weight_norm(
Conv1d(
channels,
channels,
kernel_size,
1,
dilation=dilation[1],
causal=causal,
)
),
weight_norm(
Conv1d(
channels,
channels,
kernel_size,
1,
dilation=dilation[2],
causal=causal,
)
),
]
)
convs2 instance-attribute ¶
convs2 = ModuleList(
[
weight_norm(
Conv1d(
channels,
channels,
kernel_size,
1,
dilation=1,
causal=causal,
)
),
weight_norm(
Conv1d(
channels,
channels,
kernel_size,
1,
dilation=1,
causal=causal,
)
),
weight_norm(
Conv1d(
channels,
channels,
kernel_size,
1,
dilation=1,
causal=causal,
)
),
]
)
BigVGANFlowVAE ¶
Bases: Module
activation_post instance-attribute ¶
activation_post = AliasFreeActivation1d(
activation=activation_post
)
ConvPositionEmbed ¶
ConvTranspose1d ¶
Dropout ¶
EmbeddingTable ¶
Linear ¶
Mlp ¶
MultiHeadAttention ¶
Bases: Module
TimestepEmbedder ¶
Token2WavDecoder ¶
Bases: Module
Token2latentFlowMatching ¶
Bases: Module
conv_embed instance-attribute ¶
conv_embed = ConvPositionEmbed(
hidden_size=model_dim, kernel_size=31, groups=16
)
token_proj instance-attribute ¶
token_proj = Sequential(
Linear(token_input_dim, model_dim, bias=True),
ConvTranspose1d(
model_dim,
model_dim,
stride=upsample_factor,
kernel_size=upsample_factor * 2,
),
)
transformer_input_proj instance-attribute ¶
transformer_input_proj = Linear(
model_dim + target_dim * 2, model_dim
)
Token2latentFlowMatchingWithEmbed ¶
Bases: Token2latentFlowMatching
token_embedding instance-attribute ¶
token_embedding = EmbeddingTable(
num_embeddings=vocab_size,
embedding_dim=token_input_dim,
pad_id=token_pad_id,
)
inference ¶
inference(
*,
token,
prefix_target=None,
spkr_embed=None,
s_steps=10,
cfg_alpha=2,
rescale_logits=False,
**kwargs,
)