Skip to content

vllm.v1.sample.metadata

SamplingMetadata dataclass

Source code in vllm/v1/sample/metadata.py
@dataclass
class SamplingMetadata:

    temperature: Optional[torch.Tensor]
    all_greedy: bool
    all_random: bool

    top_p: Optional[torch.Tensor]
    top_k: Optional[torch.Tensor]
    min_p: Optional[torch.Tensor]

    generators: dict[int, torch.Generator]

    # None means no logprobs, 0 means sampled token logprobs only
    max_num_logprobs: Optional[int]

    no_penalties: bool
    prompt_token_ids: Optional[torch.Tensor]
    frequency_penalties: torch.Tensor
    presence_penalties: torch.Tensor
    repetition_penalties: torch.Tensor

    output_token_ids: list[list[int]]

    # req_index -> (min_tokens, stop_token_ids)
    min_tokens: dict[int, tuple[int, set[int]]]

    logit_bias: list[Optional[dict[int, float]]]

    # `allowed_token_ids_mask` is a 2D bool tensor of shape (max batch size,
    # vocab size).
    allowed_token_ids_mask: Optional[torch.Tensor]

    # req_index -> bad_words_token_ids
    bad_words_token_ids: dict[int, list[list[int]]]

all_greedy instance-attribute

all_greedy: bool

all_random instance-attribute

all_random: bool

allowed_token_ids_mask instance-attribute

allowed_token_ids_mask: Optional[Tensor]

bad_words_token_ids instance-attribute

bad_words_token_ids: dict[int, list[list[int]]]

frequency_penalties instance-attribute

frequency_penalties: Tensor

generators instance-attribute

generators: dict[int, Generator]

logit_bias instance-attribute

logit_bias: list[Optional[dict[int, float]]]

max_num_logprobs instance-attribute

max_num_logprobs: Optional[int]

min_p instance-attribute

min_p: Optional[Tensor]

min_tokens instance-attribute

min_tokens: dict[int, tuple[int, set[int]]]

no_penalties instance-attribute

no_penalties: bool

output_token_ids instance-attribute

output_token_ids: list[list[int]]

presence_penalties instance-attribute

presence_penalties: Tensor

prompt_token_ids instance-attribute

prompt_token_ids: Optional[Tensor]

repetition_penalties instance-attribute

repetition_penalties: Tensor

temperature instance-attribute

temperature: Optional[Tensor]

top_k instance-attribute

top_k: Optional[Tensor]

top_p instance-attribute

top_p: Optional[Tensor]

__init__

__init__(
    temperature: Optional[Tensor],
    all_greedy: bool,
    all_random: bool,
    top_p: Optional[Tensor],
    top_k: Optional[Tensor],
    min_p: Optional[Tensor],
    generators: dict[int, Generator],
    max_num_logprobs: Optional[int],
    no_penalties: bool,
    prompt_token_ids: Optional[Tensor],
    frequency_penalties: Tensor,
    presence_penalties: Tensor,
    repetition_penalties: Tensor,
    output_token_ids: list[list[int]],
    min_tokens: dict[int, tuple[int, set[int]]],
    logit_bias: list[Optional[dict[int, float]]],
    allowed_token_ids_mask: Optional[Tensor],
    bad_words_token_ids: dict[int, list[list[int]]],
) -> None