vllm_omni.model_executor.models.covo_audio.token2wav ¶
AttentionConfig module-attribute ¶
AttentionConfig = namedtuple(
"AttentionConfig",
["enable_flash", "enable_math", "enable_mem_efficient"],
)
AMPBlock1 ¶
Bases: Module
activations instance-attribute ¶
activations = nn.ModuleList(
[
(
AliasFreeActivation1d(
activation=SnakeBeta(
channels,
alpha_logscale=h.snake_logscale,
)
)
)
for _ in (range(self.num_layers))
]
)
convs1 instance-attribute ¶
convs1 = nn.ModuleList(
[
weight_norm(
Conv1d(
channels,
channels,
kernel_size,
1,
dilation=dilation[0],
causal=causal,
)
),
weight_norm(
Conv1d(
channels,
channels,
kernel_size,
1,
dilation=dilation[1],
causal=causal,
)
),
weight_norm(
Conv1d(
channels,
channels,
kernel_size,
1,
dilation=dilation[2],
causal=causal,
)
),
]
)
convs2 instance-attribute ¶
convs2 = nn.ModuleList(
[
weight_norm(
Conv1d(
channels,
channels,
kernel_size,
1,
dilation=1,
causal=causal,
)
),
weight_norm(
Conv1d(
channels,
channels,
kernel_size,
1,
dilation=1,
causal=causal,
)
),
weight_norm(
Conv1d(
channels,
channels,
kernel_size,
1,
dilation=1,
causal=causal,
)
),
]
)
BigVGANFlowVAE ¶
Bases: Module
activation_post instance-attribute ¶
activation_post = AliasFreeActivation1d(
activation=activation_post
)
ConvPositionEmbed ¶
ConvTranspose1d ¶
Dropout ¶
EmbeddingTable ¶
Linear ¶
Mlp ¶
MultiHeadAttention ¶
Bases: Module
TimestepEmbedder ¶
Token2WavDecoder ¶
Bases: Module
token2latent instance-attribute ¶
token2latent = Token2latentFlowMatchingWithEmbed(
config.token2latent
)
upsample_factor instance-attribute ¶
Token2latentFlowMatching ¶
Bases: Module
cond_proj instance-attribute ¶
cond_proj = Linear(
self.model_dim + self.spkr_embed_dim, self.model_dim
)
conv_embed instance-attribute ¶
conv_embed = ConvPositionEmbed(
hidden_size=self.model_dim, kernel_size=31, groups=16
)
output_layer instance-attribute ¶
output_layer = FinalLayer(
config.transformer.hidden_size, self.target_dim
)
token_input_dim instance-attribute ¶
token_proj instance-attribute ¶
token_proj = nn.Sequential(
Linear(self.token_input_dim, self.model_dim, bias=True),
ConvTranspose1d(
self.model_dim,
self.model_dim,
stride=config.upsample_factor,
kernel_size=config.upsample_factor * 2,
),
)
transformer_input_proj instance-attribute ¶
transformer_input_proj = Linear(
self.model_dim + self.target_dim * 2, self.model_dim
)
Token2latentFlowMatchingWithEmbed ¶
Bases: Token2latentFlowMatching
token_embedding instance-attribute ¶
token_embedding = EmbeddingTable(
num_embeddings=self.vocab_size,
embedding_dim=self.token_input_dim,
pad_id=self.token_pad_id,
)
inference ¶
inference(
*,
token,
prefix_target=None,
spkr_embed=None,
s_steps=10,
cfg_alpha=2,
rescale_logits=False,
**kwargs,
)