Skip to content

vllm_omni.model_executor.models.cosyvoice3.utils

IGNORE_ID module-attribute

IGNORE_ID = -1

logger module-attribute

logger = getLogger(__name__)

concat_text_with_prompt_ids

concat_text_with_prompt_ids(
    text: Tensor,
    text_len: Tensor,
    prompt_text: Tensor,
    prompt_text_len: Tensor,
) -> tuple[Tensor, Tensor]

dynamic_range_compression_torch

dynamic_range_compression_torch(x, c=1, clip_val=1e-05)

exact_div

exact_div(x, y)

extract_speech_feat

extract_speech_feat(prompt_wav, feat_extractor, device)

extract_speech_token

extract_speech_token(
    prompt_wav, speech_tokenizer_session, device
)

extract_spk_embedding

extract_spk_embedding(prompt_wav, campplus_session, device)

extract_text_token

extract_text_token(text, tokenizer, allowed_special)

load_wav

load_wav(wav, target_sr, min_sr=16000)

log_mel_spectrogram

log_mel_spectrogram(
    audio: str | ndarray | Tensor,
    n_mels: int = 80,
    padding: int = 0,
    device: str | device | None = None,
)

Compute the log-Mel spectrogram of

Parameters

audio: Union[str, np.ndarray, torch.Tensor], shape = (*) The path to audio or either a NumPy array or Tensor containing the audio waveform in 16 kHz

int

The number of Mel-frequency filters, only 80 and 128 are supported

int

Number of zero samples to pad to the right

Optional[Union[str, torch.device]]

If given, the audio tensor is moved to this device before STFT

Returns

torch.Tensor, shape = (n_mels, n_frames) A Tensor that contains the Mel spectrogram

make_pad_mask

make_pad_mask(lengths: Tensor, max_len: int = 0) -> Tensor

Make mask tensor containing indices of padded part.

See description of make_non_pad_mask.

Parameters:

Name Type Description Default
lengths Tensor

Batch of lengths (B,).

required

Returns: torch.Tensor: Mask tensor containing indices of padded part.

Examples:

>>> lengths = [5, 3, 2]
>>> make_pad_mask(lengths)
masks = [[0, 0, 0, 0 ,0],
         [0, 0, 0, 1, 1],
         [0, 0, 1, 1, 1]]

mel_filters cached

mel_filters(device, n_mels: int) -> Tensor

Compute mel filterbank matrix for projecting STFT into a Mel spectrogram.

mel_spectrogram

mel_spectrogram(
    y,
    n_fft,
    num_mels,
    sampling_rate,
    hop_size,
    win_size,
    fmin,
    fmax,
    center=False,
)

spectral_normalize_torch

spectral_normalize_torch(magnitudes)