`vllm.v1.structured_output.backend_types` ¶

Classes:

StructuredOutputBackend –

Engine-level backend for structured output requests.
StructuredOutputGrammar –

Request-level backend for structured output requests.

`StructuredOutputBackend` `dataclass` ¶

Bases: ABC

Engine-level backend for structured output requests.

Methods:

allocate_token_bitmask –

Allocates a token bitmask for the specified maximum number of sequences.
compile_grammar –

Compiles a grammar specification into a structured output grammar.
destroy –

Backend-specific cleanup.

Source code in vllm/v1/structured_output/backend_types.py

@dataclass
class StructuredOutputBackend(ABC):
    """Engine-level backend for structured output requests."""

    vllm_config: VllmConfig
    tokenizer: TokenizerLike
    vocab_size: int

    @abstractmethod
    def compile_grammar(
        self, request_type: StructuredOutputOptions, grammar_spec: str
    ) -> StructuredOutputGrammar:
        """
        Compiles a grammar specification into a structured output grammar.

        Args:
            request_type (StructuredOutputOptions): The type of structured
                output request.
            grammar_spec (str): The grammar specification to compile.

        Returns:
            StructuredOutputGrammar: The compiled structured output grammar.
        """

    @abstractmethod
    def allocate_token_bitmask(self, max_num_seqs: int) -> "torch.Tensor":
        """
        Allocates a token bitmask for the specified maximum number of sequences.

        Args:
            max_num_seqs (int): The maximum number of sequences for which
                to allocate the bitmask.
        """

    @abstractmethod
    def destroy(self):
        """
        Backend-specific cleanup.
        """

`allocate_token_bitmask(max_num_seqs)` `abstractmethod` ¶

Allocates a token bitmask for the specified maximum number of sequences.

Parameters:

max_num_seqs ¶
(int) –

The maximum number of sequences for which to allocate the bitmask.

Source code in vllm/v1/structured_output/backend_types.py

@abstractmethod
def allocate_token_bitmask(self, max_num_seqs: int) -> "torch.Tensor":
    """
    Allocates a token bitmask for the specified maximum number of sequences.

    Args:
        max_num_seqs (int): The maximum number of sequences for which
            to allocate the bitmask.
    """

`compile_grammar(request_type, grammar_spec)` `abstractmethod` ¶

Compiles a grammar specification into a structured output grammar.

Parameters:

request_type ¶
(StructuredOutputOptions) –

The type of structured output request.
grammar_spec ¶
(str) –

The grammar specification to compile.

Returns:

StructuredOutputGrammar ( StructuredOutputGrammar ) –

The compiled structured output grammar.

Source code in vllm/v1/structured_output/backend_types.py

@abstractmethod
def compile_grammar(
    self, request_type: StructuredOutputOptions, grammar_spec: str
) -> StructuredOutputGrammar:
    """
    Compiles a grammar specification into a structured output grammar.

    Args:
        request_type (StructuredOutputOptions): The type of structured
            output request.
        grammar_spec (str): The grammar specification to compile.

    Returns:
        StructuredOutputGrammar: The compiled structured output grammar.
    """

`destroy()` `abstractmethod` ¶

Backend-specific cleanup.

Source code in vllm/v1/structured_output/backend_types.py

@abstractmethod
def destroy(self):
    """
    Backend-specific cleanup.
    """

`StructuredOutputGrammar` ¶

Bases: ABC

Request-level backend for structured output requests.

Methods:

accept_tokens –

Determines whether the provided tokens are accepted for the
fill_bitmask –

Fills the bitmask for a specific batch index.
is_terminated –

Checks whether the structured output process has terminated.
reset –

Resets the state of the structured output grammar.
rollback –

Rolls back the state of the grammar by a specified number of tokens.
validate_tokens –

Validates the provided tokens against the grammar.

Source code in vllm/v1/structured_output/backend_types.py

class StructuredOutputGrammar(ABC):
    """Request-level backend for structured output requests."""

    @abstractmethod
    def accept_tokens(self, request_id: str, tokens: list[int]) -> bool:
        """
        Determines whether the provided tokens are accepted for the
        given request.

        Args:
            request_id (str): The unique identifier for the request.
            tokens (list[int]): A list of token IDs to evaluate.

        Returns:
            bool: True if the tokens are accepted, False otherwise.
        """

    @abstractmethod
    def validate_tokens(self, tokens: list[int]) -> list[int]:
        """
        Validates the provided tokens against the grammar.
        Will not advance the FSM.

        Args:
            tokens (list[int]): A list of token IDs to validate.

        Returns:
            list[int]: A list of accepted token IDs. Will be a prefix
                of the input tokens, and empty if none are accepted.
        """

    @abstractmethod
    def rollback(self, num_tokens: int) -> None:
        """
        Rolls back the state of the grammar by a specified number of tokens.
        Will also revert counters for the number of processed tokens.

        Args:
            num_tokens (int): The number of tokens to roll back.
        """

    @abstractmethod
    def fill_bitmask(self, bitmask: "torch.Tensor", batch_index: int) -> None:
        """
        Fills the bitmask for a specific batch index.

        Args:
            bitmask (torch.Tensor): The bitmask to fill
            batch_index (int): The index in the bitmask to fill
        """

    @abstractmethod
    def is_terminated(self) -> bool:
        """
        Checks whether the structured output process has terminated.

        Returns:
            bool: True if the process is terminated, False otherwise.
        """

    @abstractmethod
    def reset(self):
        """
        Resets the state of the structured output grammar.
        """

`accept_tokens(request_id, tokens)` `abstractmethod` ¶

Determines whether the provided tokens are accepted for the given request.

Parameters:

request_id ¶
(str) –

The unique identifier for the request.
tokens ¶
(list[int]) –

A list of token IDs to evaluate.

Returns:

bool ( bool ) –

True if the tokens are accepted, False otherwise.

Source code in vllm/v1/structured_output/backend_types.py

@abstractmethod
def accept_tokens(self, request_id: str, tokens: list[int]) -> bool:
    """
    Determines whether the provided tokens are accepted for the
    given request.

    Args:
        request_id (str): The unique identifier for the request.
        tokens (list[int]): A list of token IDs to evaluate.

    Returns:
        bool: True if the tokens are accepted, False otherwise.
    """

`fill_bitmask(bitmask, batch_index)` `abstractmethod` ¶

Fills the bitmask for a specific batch index.

Parameters:

bitmask ¶
(Tensor) –

The bitmask to fill
batch_index ¶
(int) –

The index in the bitmask to fill

Source code in vllm/v1/structured_output/backend_types.py

@abstractmethod
def fill_bitmask(self, bitmask: "torch.Tensor", batch_index: int) -> None:
    """
    Fills the bitmask for a specific batch index.

    Args:
        bitmask (torch.Tensor): The bitmask to fill
        batch_index (int): The index in the bitmask to fill
    """

`is_terminated()` `abstractmethod` ¶

Checks whether the structured output process has terminated.

Returns:

bool ( bool ) –

True if the process is terminated, False otherwise.

Source code in vllm/v1/structured_output/backend_types.py

@abstractmethod
def is_terminated(self) -> bool:
    """
    Checks whether the structured output process has terminated.

    Returns:
        bool: True if the process is terminated, False otherwise.
    """

`reset()` `abstractmethod` ¶

Resets the state of the structured output grammar.

Source code in vllm/v1/structured_output/backend_types.py

@abstractmethod
def reset(self):
    """
    Resets the state of the structured output grammar.
    """

`rollback(num_tokens)` `abstractmethod` ¶

Rolls back the state of the grammar by a specified number of tokens. Will also revert counters for the number of processed tokens.

Parameters:

num_tokens ¶
(int) –

The number of tokens to roll back.

Source code in vllm/v1/structured_output/backend_types.py

@abstractmethod
def rollback(self, num_tokens: int) -> None:
    """
    Rolls back the state of the grammar by a specified number of tokens.
    Will also revert counters for the number of processed tokens.

    Args:
        num_tokens (int): The number of tokens to roll back.
    """

`validate_tokens(tokens)` `abstractmethod` ¶

Validates the provided tokens against the grammar. Will not advance the FSM.

Parameters:

tokens ¶
(list[int]) –

A list of token IDs to validate.

Returns:

list[int] –

list[int]: A list of accepted token IDs. Will be a prefix of the input tokens, and empty if none are accepted.

Source code in vllm/v1/structured_output/backend_types.py

@abstractmethod
def validate_tokens(self, tokens: list[int]) -> list[int]:
    """
    Validates the provided tokens against the grammar.
    Will not advance the FSM.

    Args:
        tokens (list[int]): A list of token IDs to validate.

    Returns:
        list[int]: A list of accepted token IDs. Will be a prefix
            of the input tokens, and empty if none are accepted.
    """

`vllm.v1.structured_output.backend_types` ¶

`StructuredOutputBackend` `dataclass` ¶

`allocate_token_bitmask(max_num_seqs)` `abstractmethod` ¶

`max_num_seqs` ¶

`compile_grammar(request_type, grammar_spec)` `abstractmethod` ¶

`request_type` ¶

`grammar_spec` ¶

`destroy()` `abstractmethod` ¶

`StructuredOutputGrammar` ¶

`accept_tokens(request_id, tokens)` `abstractmethod` ¶

`request_id` ¶

`tokens` ¶

`fill_bitmask(bitmask, batch_index)` `abstractmethod` ¶

`bitmask` ¶

`batch_index` ¶

`is_terminated()` `abstractmethod` ¶

`reset()` `abstractmethod` ¶

`rollback(num_tokens)` `abstractmethod` ¶

`num_tokens` ¶

`validate_tokens(tokens)` `abstractmethod` ¶

`tokens` ¶

vllm.v1.structured_output.backend_types ¶

StructuredOutputBackend dataclass ¶

allocate_token_bitmask(max_num_seqs) abstractmethod ¶

max_num_seqs ¶

compile_grammar(request_type, grammar_spec) abstractmethod ¶

request_type ¶

grammar_spec ¶

destroy() abstractmethod ¶

StructuredOutputGrammar ¶

accept_tokens(request_id, tokens) abstractmethod ¶

request_id ¶

tokens ¶

fill_bitmask(bitmask, batch_index) abstractmethod ¶

bitmask ¶

batch_index ¶

is_terminated() abstractmethod ¶

reset() abstractmethod ¶

rollback(num_tokens) abstractmethod ¶

num_tokens ¶

validate_tokens(tokens) abstractmethod ¶

tokens ¶

`vllm.v1.structured_output.backend_types` ¶

`StructuredOutputBackend` `dataclass` ¶

`allocate_token_bitmask(max_num_seqs)` `abstractmethod` ¶

`max_num_seqs` ¶

`compile_grammar(request_type, grammar_spec)` `abstractmethod` ¶

`request_type` ¶

`grammar_spec` ¶

`destroy()` `abstractmethod` ¶

`StructuredOutputGrammar` ¶

`accept_tokens(request_id, tokens)` `abstractmethod` ¶

`request_id` ¶

`tokens` ¶

`fill_bitmask(bitmask, batch_index)` `abstractmethod` ¶

`bitmask` ¶

`batch_index` ¶

`is_terminated()` `abstractmethod` ¶

`reset()` `abstractmethod` ¶

`rollback(num_tokens)` `abstractmethod` ¶

`num_tokens` ¶

`validate_tokens(tokens)` `abstractmethod` ¶

`tokens` ¶