Skip to content

vllm_omni.model_executor.models.covo_audio.token2wav

AttentionConfig module-attribute

AttentionConfig = namedtuple(
    "AttentionConfig",
    ["enable_flash", "enable_math", "enable_mem_efficient"],
)

AMPBlock1

Bases: Module

activations instance-attribute

activations = ModuleList(
    [
        (
            AliasFreeActivation1d(
                activation=SnakeBeta(
                    channels, alpha_logscale=snake_logscale
                )
            )
        )
        for _ in (range(num_layers))
    ]
)

convs1 instance-attribute

convs1 = ModuleList(
    [
        weight_norm(
            Conv1d(
                channels,
                channels,
                kernel_size,
                1,
                dilation=dilation[0],
                causal=causal,
            )
        ),
        weight_norm(
            Conv1d(
                channels,
                channels,
                kernel_size,
                1,
                dilation=dilation[1],
                causal=causal,
            )
        ),
        weight_norm(
            Conv1d(
                channels,
                channels,
                kernel_size,
                1,
                dilation=dilation[2],
                causal=causal,
            )
        ),
    ]
)

convs2 instance-attribute

convs2 = ModuleList(
    [
        weight_norm(
            Conv1d(
                channels,
                channels,
                kernel_size,
                1,
                dilation=1,
                causal=causal,
            )
        ),
        weight_norm(
            Conv1d(
                channels,
                channels,
                kernel_size,
                1,
                dilation=1,
                causal=causal,
            )
        ),
        weight_norm(
            Conv1d(
                channels,
                channels,
                kernel_size,
                1,
                dilation=1,
                causal=causal,
            )
        ),
    ]
)

h instance-attribute

h = h

num_layers instance-attribute

num_layers = len(convs1) + len(convs2)

forward

forward(x)

BigVGANFlowVAE

Bases: Module

activation_post instance-attribute

activation_post = AliasFreeActivation1d(
    activation=activation_post
)

config instance-attribute

config = h

conv_post instance-attribute

conv_post = weight_norm(Conv1d(ch, 1, 7, 1, causal=causal))

conv_pre instance-attribute

conv_pre = weight_norm(
    Conv1d(
        latent_dim,
        upsample_initial_channel,
        7,
        1,
        causal=False,
    )
)

h instance-attribute

h = h

hop_size instance-attribute

hop_size = prod(downsample_rates)

num_kernels instance-attribute

num_kernels = len(resblock_kernel_sizes)

num_upsamples instance-attribute

num_upsamples = len(upsample_rates)

resblocks instance-attribute

resblocks = ModuleList()

ups instance-attribute

ups = ModuleList()

inference_from_latents

inference_from_latents(x, do_sample=True, noise_scale=1.0)

Conv1d

Bases: Conv1d

activation instance-attribute

activation = (
    activation if activation is not None else Identity()
)

bn instance-attribute

bn = BatchNorm1d(out_channels) if bn else Identity()

causal instance-attribute

causal = causal

in_channels instance-attribute

in_channels = in_channels

left_padding instance-attribute

left_padding = dilation * (kernel_size - 1)

transpose instance-attribute

transpose = input_transpose

forward

forward(x)

ConvPositionEmbed

Bases: Module

conv instance-attribute

conv = Conv1d(
    hidden_size,
    hidden_size,
    kernel_size,
    groups=groups,
    input_transpose=True,
    activation=GELU(),
)

forward

forward(x, mask=None)

ConvTranspose1d

Bases: ConvTranspose1d

causal instance-attribute

causal = causal

stride instance-attribute

stride = stride

transpose instance-attribute

transpose = input_transpose

forward

forward(x)

Dropout

Bases: Module

force_drop instance-attribute

force_drop = force_drop

inplace instance-attribute

inplace = inplace

p instance-attribute

p = p

forward

forward(x, **kwargs)

EmbeddingTable

Bases: Embedding

output_dim instance-attribute

output_dim = embedding_dim

pad_id instance-attribute

pad_id = pad_id

forward

forward(x)

FinalLayer

Bases: Module

adaLN_modulation instance-attribute

adaLN_modulation = Sequential(
    SiLU(), Linear(hidden_size, 2 * hidden_size, bias=True)
)

linear instance-attribute

linear = Linear(hidden_size, output_size, bias=True)

norm instance-attribute

norm = LayerNorm(
    hidden_size, elementwise_affine=False, eps=1e-06
)

forward

forward(x, c, mask=None)

JsonHParams

get

get(key, default=None)

items

items()

keys

keys()

values

values()

Linear

Bases: Linear

activation instance-attribute

activation = (
    activation if activation is not None else Identity()
)

output_dim instance-attribute

output_dim = out_channels

forward

forward(x, **kwargs)

Mlp

Bases: Module

act instance-attribute

act = act_layer()

drop instance-attribute

drop = Dropout(dropout)

fc1 instance-attribute

fc1 = Linear(hidden_size, ffn_hidden_size)

fc2 instance-attribute

fc2 = Linear(ffn_hidden_size, hidden_size)

forward

forward(x, **kwargs)

MultiHeadAttention

Bases: Module

attn_drop instance-attribute

attn_drop = Dropout(attn_drop)

cpu_config instance-attribute

cpu_config = AttentionConfig(True, True, True)

cuda_config instance-attribute

cuda_config = AttentionConfig(True, True, True)

head_dim instance-attribute

head_dim = hidden_size // num_heads

k_norm instance-attribute

k_norm = norm_layer(head_dim) if qk_norm else Identity()

k_proj instance-attribute

k_proj = Linear(hidden_size, hidden_size, bias=qkv_bias)

max_position_embeddings instance-attribute

max_position_embeddings = max_position_embeddings

num_heads instance-attribute

num_heads = num_heads

o_dropout instance-attribute

o_dropout = Dropout(dropout)

o_proj instance-attribute

o_proj = Linear(hidden_size, hidden_size)

q_norm instance-attribute

q_norm = norm_layer(head_dim) if qk_norm else Identity()

q_proj instance-attribute

q_proj = Linear(hidden_size, hidden_size, bias=qkv_bias)

rotary instance-attribute

rotary = RotaryEmbedding(head_dim)

rotary_bias instance-attribute

rotary_bias = rotary_bias

scale instance-attribute

scale = head_dim ** -0.5

v_proj instance-attribute

v_proj = Linear(hidden_size, hidden_size, bias=qkv_bias)

forward

forward(q, k=None, v=None, mask=None)

RotaryEmbedding

Bases: Module

device property

device

forward

forward(t)

TimestepEmbedder

Bases: Module

frequency_embedding_size instance-attribute

frequency_embedding_size = frequency_embedding_size

mlp instance-attribute

mlp = Sequential(
    Linear(
        frequency_embedding_size, hidden_size, bias=True
    ),
    SiLU(),
    Linear(hidden_size, hidden_size, bias=True),
)

forward

forward(t)

timestep_embedding staticmethod

timestep_embedding(t, dim, max_period=10000)

Token2WavDecoder

Bases: Module

config instance-attribute

config = config

global_mean_var instance-attribute

global_mean_var = getattr(config, 'global_mean_var', None)

token2latent instance-attribute

token2latent = Token2latentFlowMatchingWithEmbed(
    token2latent
)

trainable_module instance-attribute

trainable_module = ['wavegan', 'token2latent']

upsample_factor instance-attribute

upsample_factor = get('upsample_factor', 1)

wav_input_sr instance-attribute

wav_input_sr = get('wav_input_sr', 24000)

wavegan instance-attribute

wavegan = BigVGANFlowVAE(wavegan)

wavegan_hop_size instance-attribute

wavegan_hop_size = prod(downsample_rates)

inference

inference(data, **kwargs)

load_state_dict

load_state_dict(param_dict)

preprocess_infer_data

preprocess_infer_data(data)

state_dict

state_dict()

Token2latentFlowMatching

Bases: Module

blocks instance-attribute

blocks = ModuleList()

cond_proj instance-attribute

cond_proj = Linear(model_dim + spkr_embed_dim, model_dim)

config instance-attribute

config = config

conv_embed instance-attribute

conv_embed = ConvPositionEmbed(
    hidden_size=model_dim, kernel_size=31, groups=16
)

model_dim instance-attribute

model_dim = hidden_size

output_layer instance-attribute

output_layer = FinalLayer(hidden_size, target_dim)

spkr_embed_dim instance-attribute

spkr_embed_dim = get('spkr_embed_dim', 512)

target_dim instance-attribute

target_dim = z_dim

time_embedder instance-attribute

time_embedder = TimestepEmbedder(model_dim)

token_input_dim instance-attribute

token_input_dim = get('token_input_dim', model_dim)

token_pad_id instance-attribute

token_pad_id = -1

token_proj instance-attribute

token_proj = Sequential(
    Linear(token_input_dim, model_dim, bias=True),
    ConvTranspose1d(
        model_dim,
        model_dim,
        stride=upsample_factor,
        kernel_size=upsample_factor * 2,
    ),
)

transformer_input_proj instance-attribute

transformer_input_proj = Linear(
    model_dim + target_dim * 2, model_dim
)

cond_mask_spkr_embed

cond_mask_spkr_embed(x, spkr_embed)

inference

inference(
    *,
    token: Tensor,
    prefix_target: Tensor | None = None,
    spkr_embed: Tensor | None = None,
    s_steps: int | None = 10,
    cfg_alpha: float | None = 2.0,
    rescale_logits: bool = False,
    **kwargs,
)

sample

sample(
    tokens,
    audio,
    steps,
    alpha=None,
    g_cond=None,
    rescale_logits=False,
)

vectorfield_forward

vectorfield_forward(
    inputs, times, self_attn_mask, g_cond=None
)

Token2latentFlowMatchingWithEmbed

Bases: Token2latentFlowMatching

token_embedding instance-attribute

token_embedding = EmbeddingTable(
    num_embeddings=vocab_size,
    embedding_dim=token_input_dim,
    pad_id=token_pad_id,
)

vocab_size instance-attribute

vocab_size = token_vocab_size

inference

inference(
    *,
    token,
    prefix_target=None,
    spkr_embed=None,
    s_steps=10,
    cfg_alpha=2,
    rescale_logits=False,
    **kwargs,
)

TransformerBlock

Bases: Module

attn instance-attribute

attn = attention

ffn instance-attribute

ffn = ffn

modulation instance-attribute

modulation = modulation

modulation_layer instance-attribute

modulation_layer = Sequential(
    SiLU(), Linear(hidden_size, 6 * hidden_size, bias=True)
)

norm1 instance-attribute

norm1 = LayerNorm(
    hidden_size, elementwise_affine=not modulation, eps=eps
)

norm2 instance-attribute

norm2 = LayerNorm(
    hidden_size, elementwise_affine=not modulation, eps=eps
)

forward

forward(x, condition=None, mask=None)

apply_rotary_pos_emb

apply_rotary_pos_emb(pos, t)

eval_decorator

eval_decorator(fn)

exists

exists(val)

get_padding

get_padding(kernel_size, dilation=1)

init_weights

init_weights(m, mean=0.0, std=0.01)

modulate

modulate(x, shift, scale)

rotate_half

rotate_half(x)