Converts streaming text into terminal tokens.
The key feature is prefix-match buffering: when the text in the buffer could be the start of a multi-character terminal (e.g. "<tool_" that could become "<tool_call>"), the lexer holds the text rather than emitting it. When the next chunk arrives, it either completes the terminal or flushes the buffered text as content.
Terminals are tried in priority order (literals first, then by descending priority, then by pattern length).
Source code in vllm/parser/engine/incremental_lexer.py
| class IncrementalLexer:
"""Converts streaming text into terminal tokens.
The key feature is **prefix-match buffering**: when the text in the
buffer could be the start of a multi-character terminal (e.g.
``"<tool_"`` that could become ``"<tool_call>"``), the lexer holds
the text rather than emitting it. When the next chunk arrives, it
either completes the terminal or flushes the buffered text as
content.
Terminals are tried in priority order (literals first, then by
descending priority, then by pattern length).
"""
def __init__(
self,
terminals: list[TerminalDef] | LexerShape,
content_terminal: str = CONTENT_TERMINAL,
) -> None:
if isinstance(terminals, LexerShape):
shape = terminals
else:
shape = LexerShape(terminals)
self._shape = shape
self.terminals = shape.terminals
self.content_terminal = content_terminal
self.buffer = ""
self._literal_strings = shape.literal_strings
self._max_literal_len = shape.max_literal_len
self._literal_first_chars = shape.literal_first_chars
self._has_only_literals = shape.has_only_literals
self._prefix_set = shape.prefix_set
self._literals_by_first = shape.literals_by_first
def reset(self) -> None:
self.buffer = ""
def feed(self, text: str) -> list[LexToken]:
if not self.buffer and self._has_only_literals and self._literal_first_chars:
for ch in text:
if ch in self._literal_first_chars:
break
else:
return [LexToken(self.content_terminal, text)]
self.buffer += text
return self._drain()
def flush(self) -> list[LexToken]:
tokens: list[LexToken] = []
if self.buffer:
tokens.append(LexToken(self.content_terminal, self.buffer))
self.buffer = ""
return tokens
def _drain(self) -> list[LexToken]:
tokens: list[LexToken] = []
first_chars = self._literal_first_chars
content_terminal = self.content_terminal
has_only_literals = self._has_only_literals
literals_by_first = self._literals_by_first
prefix_set = self._prefix_set
while self.buffer:
if has_only_literals and first_chars:
has_potential = False
for ch in self.buffer:
if ch in first_chars:
has_potential = True
break
if not has_potential:
tokens.append(LexToken(content_terminal, self.buffer))
self.buffer = ""
break
best_match: tuple[str, str, int] | None = None
first = self.buffer[0]
for lit, name in literals_by_first.get(first, ()):
if self.buffer.startswith(lit) and (
best_match is None or len(lit) > best_match[2]
):
best_match = (name, lit, len(lit))
if self.buffer in prefix_set:
if best_match is not None:
tokens.append(LexToken(best_match[0], best_match[1]))
self.buffer = self.buffer[best_match[2] :]
continue
else:
break
if best_match is not None:
tokens.append(LexToken(best_match[0], best_match[1]))
self.buffer = self.buffer[best_match[2] :]
else:
content_end = self._find_content_boundary()
if content_end > 0:
tokens.append(LexToken(content_terminal, self.buffer[:content_end]))
self.buffer = self.buffer[content_end:]
else:
tokens.append(LexToken(content_terminal, self.buffer[0]))
self.buffer = self.buffer[1:]
return tokens
def _find_content_boundary(self) -> int:
buf = self.buffer
n = len(buf)
first_chars = self._literal_first_chars
for i in range(1, n):
if buf[i] not in first_chars:
continue
remaining = n - i
for lit, _ in self._literal_strings:
check_len = min(remaining, len(lit))
if buf[i : i + check_len] == lit[:check_len]:
return i
return n
|