Bases: PreTrainedTokenizer
vocab_file instance-attribute
convert_tokens_to_string
convert_tokens_to_string(tokens)
from_pretrained classmethod
from_pretrained(
pretrained_model_name_or_path, *args, **kwargs
)
split_segments
split_segments(
tokenized: list[str],
max_text_tokens_per_segment: int = 120,
quick_streaming_tokens: int = 0,
) -> list[list[str]]