vllm.model_executor.layers.pooler

ClassifierFn `module-attribute` ¶

ClassifierFn = Callable[[Tensor], Tensor]

PoolingFn `module-attribute` ¶

PoolingFn = Callable[
    [Union[Tensor, list[Tensor]], PoolingMetadata],
    Union[Tensor, list[Tensor]],
]

PoolingMetadata `module-attribute` ¶

PoolingMetadata = Union[PoolingMetadata, PoolingMetadata]

_T `module-attribute` ¶

_T = TypeVar('_T', Tensor, list[Tensor])

AllPool ¶

Bases: PoolingMethod

Source code in vllm/model_executor/layers/pooler.py

class AllPool(PoolingMethod):

    def get_supported_tasks(self) -> Set[PoolingTask]:
        return {"encode"}

    def forward_one(
        self,
        hidden_states: torch.Tensor,
        prompt_len: Optional[torch.Tensor] = None,
    ) -> torch.Tensor:
        assert prompt_len is None or prompt_len == hidden_states.shape[0], \
            "partial prefill not supported with ALL pooling"

        return hidden_states

    def forward_all(
        self,
        hidden_states: torch.Tensor,
        prompt_lens: torch.Tensor,
    ) -> Union[list[torch.Tensor], torch.Tensor]:
        return list(hidden_states.split_with_sizes(prompt_lens.tolist()))

forward_all ¶

forward_all(
    hidden_states: Tensor, prompt_lens: Tensor
) -> Union[list[Tensor], Tensor]

Source code in vllm/model_executor/layers/pooler.py

def forward_all(
    self,
    hidden_states: torch.Tensor,
    prompt_lens: torch.Tensor,
) -> Union[list[torch.Tensor], torch.Tensor]:
    return list(hidden_states.split_with_sizes(prompt_lens.tolist()))

forward_one ¶

forward_one(
    hidden_states: Tensor,
    prompt_len: Optional[Tensor] = None,
) -> Tensor

Source code in vllm/model_executor/layers/pooler.py

def forward_one(
    self,
    hidden_states: torch.Tensor,
    prompt_len: Optional[torch.Tensor] = None,
) -> torch.Tensor:
    assert prompt_len is None or prompt_len == hidden_states.shape[0], \
        "partial prefill not supported with ALL pooling"

    return hidden_states

get_supported_tasks ¶

get_supported_tasks() -> Set[PoolingTask]

Source code in vllm/model_executor/layers/pooler.py

def get_supported_tasks(self) -> Set[PoolingTask]:
    return {"encode"}

BasePoolerActivation ¶

Bases: Module, ABC

Source code in vllm/model_executor/layers/pooler.py

class BasePoolerActivation(nn.Module, ABC):

    @abstractmethod
    def forward(self, pooled_data: _T) -> _T:
        # shape:
        # classify (& score) -> (batch_size, num_classes)
        # embed -> (batch_size, embedding_dim) or list(embedding_dim)
        #          (batch_size, dimensions) or list(dimensions) if using MRL
        raise NotImplementedError

forward `abstractmethod` ¶

forward(pooled_data: _T) -> _T

Source code in vllm/model_executor/layers/pooler.py

@abstractmethod
def forward(self, pooled_data: _T) -> _T:
    # shape:
    # classify (& score) -> (batch_size, num_classes)
    # embed -> (batch_size, embedding_dim) or list(embedding_dim)
    #          (batch_size, dimensions) or list(dimensions) if using MRL
    raise NotImplementedError

CLSPool ¶

Bases: PoolingMethod

Source code in vllm/model_executor/layers/pooler.py

class CLSPool(PoolingMethod):

    def get_supported_tasks(self) -> Set[PoolingTask]:
        return {"encode", "embed", "classify", "score"}

    def forward_one(
        self,
        hidden_states: torch.Tensor,
        prompt_len: Optional[torch.Tensor] = None,
    ) -> torch.Tensor:
        assert prompt_len is None or prompt_len == hidden_states.shape[0], \
            "partial prefill not supported with CLS pooling"

        return hidden_states[0]

    def forward_all(
        self,
        hidden_states: torch.Tensor,
        prompt_lens: torch.Tensor,
    ) -> Union[list[torch.Tensor], torch.Tensor]:
        first_token_flat_indices = torch.zeros_like(prompt_lens)
        first_token_flat_indices[1:] += torch.cumsum(prompt_lens, dim=0)[:-1]
        return hidden_states[first_token_flat_indices]

forward_all ¶

forward_all(
    hidden_states: Tensor, prompt_lens: Tensor
) -> Union[list[Tensor], Tensor]

Source code in vllm/model_executor/layers/pooler.py

def forward_all(
    self,
    hidden_states: torch.Tensor,
    prompt_lens: torch.Tensor,
) -> Union[list[torch.Tensor], torch.Tensor]:
    first_token_flat_indices = torch.zeros_like(prompt_lens)
    first_token_flat_indices[1:] += torch.cumsum(prompt_lens, dim=0)[:-1]
    return hidden_states[first_token_flat_indices]

forward_one ¶

forward_one(
    hidden_states: Tensor,
    prompt_len: Optional[Tensor] = None,
) -> Tensor

Source code in vllm/model_executor/layers/pooler.py

def forward_one(
    self,
    hidden_states: torch.Tensor,
    prompt_len: Optional[torch.Tensor] = None,
) -> torch.Tensor:
    assert prompt_len is None or prompt_len == hidden_states.shape[0], \
        "partial prefill not supported with CLS pooling"

    return hidden_states[0]

get_supported_tasks ¶

get_supported_tasks() -> Set[PoolingTask]

Source code in vllm/model_executor/layers/pooler.py

def get_supported_tasks(self) -> Set[PoolingTask]:
    return {"encode", "embed", "classify", "score"}

ClassifierPooler ¶

Bases: Pooler

A pooling layer for classification tasks.

This layer does the following: 1. Applies a classification layer to the hidden states. 2. Optionally applies a pooler layer. 3. Applies an activation function to the output.

Source code in vllm/model_executor/layers/pooler.py

class ClassifierPooler(Pooler):
    """A pooling layer for classification tasks.

    This layer does the following:
    1. Applies a classification layer to the hidden states.
    2. Optionally applies a pooler layer.
    3. Applies an activation function to the output.
    """

    @staticmethod
    def act_fn_for_seq_cls(config: ModelConfig):
        return get_classification_activation_function(config.hf_config)

    @staticmethod
    def act_fn_for_cross_encoder(config: ModelConfig):
        return get_cross_encoder_activation_function(config.hf_config)

    def __init__(
        self,
        pooling: PoolingFn,
        classifier: ClassifierFn,
        act_fn: PoolerActivation,
    ) -> None:
        super().__init__()

        self.pooling = pooling
        self.classifier = classifier
        self.act_fn = act_fn

    def get_supported_tasks(self) -> Set[PoolingTask]:
        return {"classify", "score"}

    def forward(
        self,
        hidden_states: Union[torch.Tensor, list[torch.Tensor]],
        pooling_metadata: PoolingMetadata,
    ) -> PoolerOutput:
        pooled_data = self.pooling(hidden_states, pooling_metadata)

        # apply classifier once on the full batch if possible
        if isinstance(pooled_data, torch.Tensor):
            pooled_output = self.classifier(pooled_data)
        elif len({data.shape for data in pooled_data}) <= 1:
            pooled_output = self.classifier(torch.stack(pooled_data))
        else:
            pooled_output = [self.classifier(data) for data in pooled_data]

        scores = self.act_fn(pooled_output)

        return build_output(scores)

act_fn `instance-attribute` ¶

act_fn = act_fn

classifier `instance-attribute` ¶

classifier = classifier

pooling `instance-attribute` ¶

pooling = pooling

init ¶

__init__(
    pooling: PoolingFn,
    classifier: ClassifierFn,
    act_fn: PoolerActivation,
) -> None

Source code in vllm/model_executor/layers/pooler.py

def __init__(
    self,
    pooling: PoolingFn,
    classifier: ClassifierFn,
    act_fn: PoolerActivation,
) -> None:
    super().__init__()

    self.pooling = pooling
    self.classifier = classifier
    self.act_fn = act_fn

act_fn_for_cross_encoder `staticmethod` ¶

act_fn_for_cross_encoder(config: ModelConfig)

Source code in vllm/model_executor/layers/pooler.py

@staticmethod
def act_fn_for_cross_encoder(config: ModelConfig):
    return get_cross_encoder_activation_function(config.hf_config)

act_fn_for_seq_cls `staticmethod` ¶

act_fn_for_seq_cls(config: ModelConfig)

Source code in vllm/model_executor/layers/pooler.py

@staticmethod
def act_fn_for_seq_cls(config: ModelConfig):
    return get_classification_activation_function(config.hf_config)

forward ¶

forward(
    hidden_states: Union[Tensor, list[Tensor]],
    pooling_metadata: PoolingMetadata,
) -> PoolerOutput

Source code in vllm/model_executor/layers/pooler.py

def forward(
    self,
    hidden_states: Union[torch.Tensor, list[torch.Tensor]],
    pooling_metadata: PoolingMetadata,
) -> PoolerOutput:
    pooled_data = self.pooling(hidden_states, pooling_metadata)

    # apply classifier once on the full batch if possible
    if isinstance(pooled_data, torch.Tensor):
        pooled_output = self.classifier(pooled_data)
    elif len({data.shape for data in pooled_data}) <= 1:
        pooled_output = self.classifier(torch.stack(pooled_data))
    else:
        pooled_output = [self.classifier(data) for data in pooled_data]

    scores = self.act_fn(pooled_output)

    return build_output(scores)

get_supported_tasks ¶

get_supported_tasks() -> Set[PoolingTask]

Source code in vllm/model_executor/layers/pooler.py

def get_supported_tasks(self) -> Set[PoolingTask]:
    return {"classify", "score"}

DispatchPooler ¶

Bases: Pooler

Dispatches calls to a sub-pooler based on the pooling task.

Source code in vllm/model_executor/layers/pooler.py

class DispatchPooler(Pooler):
    """Dispatches calls to a sub-pooler based on the pooling task."""

    def __init__(self, poolers_by_task: Mapping[PoolingTask, Pooler]) -> None:
        super().__init__()

        for task, pooler in poolers_by_task.items():
            if task not in pooler.get_supported_tasks():
                raise ValueError(
                    f"{pooler=} does not support {task=}. "
                    f"Supported tasks: {pooler.get_supported_tasks()}")

        self.poolers_by_task = poolers_by_task

    def get_supported_tasks(self) -> Set[PoolingTask]:
        return set(self.poolers_by_task)

    def get_pooling_updates(self, task: PoolingTask) -> PoolingParamsUpdate:
        return self.poolers_by_task[task].get_pooling_updates(task)

    def forward(
        self,
        hidden_states: Union[torch.Tensor, list[torch.Tensor]],
        pooling_metadata: PoolingMetadata,
    ) -> PoolerOutput:
        poolers_by_task = self.poolers_by_task

        if isinstance(hidden_states, list):
            hidden_states_lst = hidden_states
        else:
            prompt_lens = get_prompt_lens(hidden_states, pooling_metadata)
            hidden_states_lst = list(hidden_states.split(prompt_lens.tolist()))

        outputs = list[PoolingSequenceGroupOutput]()
        offset = 0
        for task, group in groupby(get_tasks(pooling_metadata)):
            if not (pooler := poolers_by_task.get(task)):
                raise ValueError(
                    f"Unsupported task: {task} "
                    f"Supported tasks: {self.get_supported_tasks()}")

            num_items = len(list(group))
            group_output: PoolerOutput = pooler(
                hidden_states_lst[offset:offset + num_items],
                pooling_metadata[offset:offset + num_items],
            )

            outputs.extend(group_output.outputs)
            offset += num_items

        return PoolerOutput(outputs)

poolers_by_task `instance-attribute` ¶

poolers_by_task = poolers_by_task

init ¶

__init__(
    poolers_by_task: Mapping[PoolingTask, Pooler],
) -> None

Source code in vllm/model_executor/layers/pooler.py

def __init__(self, poolers_by_task: Mapping[PoolingTask, Pooler]) -> None:
    super().__init__()

    for task, pooler in poolers_by_task.items():
        if task not in pooler.get_supported_tasks():
            raise ValueError(
                f"{pooler=} does not support {task=}. "
                f"Supported tasks: {pooler.get_supported_tasks()}")

    self.poolers_by_task = poolers_by_task

forward ¶

forward(
    hidden_states: Union[Tensor, list[Tensor]],
    pooling_metadata: PoolingMetadata,
) -> PoolerOutput

Source code in vllm/model_executor/layers/pooler.py

def forward(
    self,
    hidden_states: Union[torch.Tensor, list[torch.Tensor]],
    pooling_metadata: PoolingMetadata,
) -> PoolerOutput:
    poolers_by_task = self.poolers_by_task

    if isinstance(hidden_states, list):
        hidden_states_lst = hidden_states
    else:
        prompt_lens = get_prompt_lens(hidden_states, pooling_metadata)
        hidden_states_lst = list(hidden_states.split(prompt_lens.tolist()))

    outputs = list[PoolingSequenceGroupOutput]()
    offset = 0
    for task, group in groupby(get_tasks(pooling_metadata)):
        if not (pooler := poolers_by_task.get(task)):
            raise ValueError(
                f"Unsupported task: {task} "
                f"Supported tasks: {self.get_supported_tasks()}")

        num_items = len(list(group))
        group_output: PoolerOutput = pooler(
            hidden_states_lst[offset:offset + num_items],
            pooling_metadata[offset:offset + num_items],
        )

        outputs.extend(group_output.outputs)
        offset += num_items

    return PoolerOutput(outputs)

get_pooling_updates ¶

get_pooling_updates(
    task: PoolingTask,
) -> PoolingParamsUpdate

Source code in vllm/model_executor/layers/pooler.py

def get_pooling_updates(self, task: PoolingTask) -> PoolingParamsUpdate:
    return self.poolers_by_task[task].get_pooling_updates(task)

get_supported_tasks ¶

get_supported_tasks() -> Set[PoolingTask]

Source code in vllm/model_executor/layers/pooler.py

def get_supported_tasks(self) -> Set[PoolingTask]:
    return set(self.poolers_by_task)

LambdaPoolerActivation ¶

Bases: PoolerActivation

Source code in vllm/model_executor/layers/pooler.py

class LambdaPoolerActivation(PoolerActivation):

    def __init__(self, fn: Callable[[torch.Tensor], torch.Tensor]):
        super().__init__()

        self.fn = fn

    def forward_chunk(self, pooled_data: torch.Tensor) -> torch.Tensor:
        return self.fn(pooled_data)

fn `instance-attribute` ¶

fn = fn

init ¶

__init__(fn: Callable[[Tensor], Tensor])

Source code in vllm/model_executor/layers/pooler.py

def __init__(self, fn: Callable[[torch.Tensor], torch.Tensor]):
    super().__init__()

    self.fn = fn

forward_chunk ¶

forward_chunk(pooled_data: Tensor) -> Tensor

Source code in vllm/model_executor/layers/pooler.py

def forward_chunk(self, pooled_data: torch.Tensor) -> torch.Tensor:
    return self.fn(pooled_data)

LastPool ¶

Bases: PoolingMethod

Source code in vllm/model_executor/layers/pooler.py

class LastPool(PoolingMethod):

    def get_supported_tasks(self) -> Set[PoolingTask]:
        return {"encode", "embed", "classify", "score"}

    def forward_one(
        self,
        hidden_states: torch.Tensor,
        prompt_len: Optional[torch.Tensor] = None,
    ) -> torch.Tensor:
        return hidden_states[-1]

    def forward_all(
        self,
        hidden_states: torch.Tensor,
        prompt_lens: torch.Tensor,
    ) -> Union[list[torch.Tensor], torch.Tensor]:
        last_token_flat_indices = torch.cumsum(prompt_lens, dim=0) - 1
        return hidden_states[last_token_flat_indices]

forward_all ¶

forward_all(
    hidden_states: Tensor, prompt_lens: Tensor
) -> Union[list[Tensor], Tensor]

Source code in vllm/model_executor/layers/pooler.py

def forward_all(
    self,
    hidden_states: torch.Tensor,
    prompt_lens: torch.Tensor,
) -> Union[list[torch.Tensor], torch.Tensor]:
    last_token_flat_indices = torch.cumsum(prompt_lens, dim=0) - 1
    return hidden_states[last_token_flat_indices]

forward_one ¶

forward_one(
    hidden_states: Tensor,
    prompt_len: Optional[Tensor] = None,
) -> Tensor

Source code in vllm/model_executor/layers/pooler.py

def forward_one(
    self,
    hidden_states: torch.Tensor,
    prompt_len: Optional[torch.Tensor] = None,
) -> torch.Tensor:
    return hidden_states[-1]

get_supported_tasks ¶

get_supported_tasks() -> Set[PoolingTask]

Source code in vllm/model_executor/layers/pooler.py

def get_supported_tasks(self) -> Set[PoolingTask]:
    return {"encode", "embed", "classify", "score"}

MeanPool ¶

Bases: PoolingMethod

Source code in vllm/model_executor/layers/pooler.py

class MeanPool(PoolingMethod):

    def get_supported_tasks(self) -> Set[PoolingTask]:
        return {"encode", "embed", "classify", "score"}

    def forward_one(
        self,
        hidden_states: torch.Tensor,
        prompt_len: Optional[torch.Tensor] = None,
    ) -> torch.Tensor:
        assert prompt_len is None or prompt_len == hidden_states.shape[0], \
            "partial prefill not supported with MEAN pooling"

        return hidden_states.mean(dim=0, dtype=torch.float32)

    def forward_all(
        self,
        hidden_states: torch.Tensor,
        prompt_lens: torch.Tensor,
    ) -> Union[list[torch.Tensor], torch.Tensor]:
        # Use float32 for torch.cumsum in MeanPool,
        # otherwise precision will be lost significantly.
        cumsum = torch.cumsum(hidden_states, dim=0, dtype=torch.float32)

        start_indices = torch.cat([
            torch.tensor([0], device=hidden_states.device),
            torch.cumsum(prompt_lens[:-1], dim=0)
        ])
        end_indices = torch.cumsum(prompt_lens, dim=0)
        return (cumsum[end_indices - 1] - cumsum[start_indices] +
                hidden_states[start_indices]) / prompt_lens.unsqueeze(1)

forward_all ¶

forward_all(
    hidden_states: Tensor, prompt_lens: Tensor
) -> Union[list[Tensor], Tensor]

Source code in vllm/model_executor/layers/pooler.py

def forward_all(
    self,
    hidden_states: torch.Tensor,
    prompt_lens: torch.Tensor,
) -> Union[list[torch.Tensor], torch.Tensor]:
    # Use float32 for torch.cumsum in MeanPool,
    # otherwise precision will be lost significantly.
    cumsum = torch.cumsum(hidden_states, dim=0, dtype=torch.float32)

    start_indices = torch.cat([
        torch.tensor([0], device=hidden_states.device),
        torch.cumsum(prompt_lens[:-1], dim=0)
    ])
    end_indices = torch.cumsum(prompt_lens, dim=0)
    return (cumsum[end_indices - 1] - cumsum[start_indices] +
            hidden_states[start_indices]) / prompt_lens.unsqueeze(1)

forward_one ¶

forward_one(
    hidden_states: Tensor,
    prompt_len: Optional[Tensor] = None,
) -> Tensor

Source code in vllm/model_executor/layers/pooler.py

def forward_one(
    self,
    hidden_states: torch.Tensor,
    prompt_len: Optional[torch.Tensor] = None,
) -> torch.Tensor:
    assert prompt_len is None or prompt_len == hidden_states.shape[0], \
        "partial prefill not supported with MEAN pooling"

    return hidden_states.mean(dim=0, dtype=torch.float32)

get_supported_tasks ¶

get_supported_tasks() -> Set[PoolingTask]

Source code in vllm/model_executor/layers/pooler.py

def get_supported_tasks(self) -> Set[PoolingTask]:
    return {"encode", "embed", "classify", "score"}

Pooler ¶

Bases: Module, ABC

The interface required for all poolers used in pooling models in vLLM.

Source code in vllm/model_executor/layers/pooler.py

class Pooler(nn.Module, ABC):
    """The interface required for all poolers used in pooling models in vLLM."""

    @staticmethod
    def for_encode(
        pooler_config: PoolerConfig,
        *,
        default_pooling_type: PoolingType = PoolingType.ALL,
        default_normalize: bool = False,
        default_softmax: bool = False,
        default_step_tag_id: Optional[int] = None,
        default_returned_token_ids: Optional[list[int]] = None,
    ):
        resolved_config = ResolvedPoolingConfig.from_config_with_defaults(
            pooler_config=pooler_config,
            pooling_type=default_pooling_type,
            normalize=default_normalize,
            softmax=default_softmax,
            step_tag_id=default_step_tag_id,
            returned_token_ids=default_returned_token_ids,
        )

        if resolved_config.pooling_type == PoolingType.STEP:
            return StepPooler.from_config(resolved_config)

        return SimplePooler.from_config(resolved_config)

    @staticmethod
    def for_embed(
        pooler_config: PoolerConfig,
        *,
        default_pooling_type: PoolingType = PoolingType.LAST,
        default_normalize: bool = True,
        default_softmax: bool = False,
    ):
        resolved_config = ResolvedPoolingConfig.from_config_with_defaults(
            pooler_config=pooler_config,
            pooling_type=default_pooling_type,
            normalize=default_normalize,
            softmax=default_softmax,
        )

        return SimplePooler.from_config(resolved_config)

    @staticmethod
    def for_classify(
        pooler_config: PoolerConfig,
        classifier: Optional[ClassifierFn],
        *,
        default_pooling_type: PoolingType = PoolingType.LAST,
        default_normalize: bool = False,
        default_softmax: bool = True,
    ):
        resolved_config = ResolvedPoolingConfig.from_config_with_defaults(
            pooler_config=pooler_config,
            pooling_type=default_pooling_type,
            normalize=default_normalize,
            softmax=default_softmax,
        )
        base_pooler = SimplePooler.from_config(resolved_config)
        if classifier is None:
            return base_pooler

        return ClassifierPooler(
            pooling=base_pooler.pooling,
            classifier=classifier,
            act_fn=base_pooler.head.activation,
        )

    @abstractmethod
    def get_supported_tasks(self) -> Set[PoolingTask]:
        """Determine which pooling tasks are supported."""
        raise NotImplementedError

    def get_pooling_updates(self, task: PoolingTask) -> PoolingParamsUpdate:
        """
        Construct the updated pooling parameters to use for a supported task.
        """
        return PoolingParamsUpdate()

    @abstractmethod
    def forward(
        self,
        hidden_states: Union[list[torch.Tensor], torch.Tensor],
        pooling_metadata: PoolingMetadata,
    ) -> PoolerOutput:
        raise NotImplementedError

for_classify `staticmethod` ¶

for_classify(
    pooler_config: PoolerConfig,
    classifier: Optional[ClassifierFn],
    *,
    default_pooling_type: PoolingType = LAST,
    default_normalize: bool = False,
    default_softmax: bool = True,
)

Source code in vllm/model_executor/layers/pooler.py

@staticmethod
def for_classify(
    pooler_config: PoolerConfig,
    classifier: Optional[ClassifierFn],
    *,
    default_pooling_type: PoolingType = PoolingType.LAST,
    default_normalize: bool = False,
    default_softmax: bool = True,
):
    resolved_config = ResolvedPoolingConfig.from_config_with_defaults(
        pooler_config=pooler_config,
        pooling_type=default_pooling_type,
        normalize=default_normalize,
        softmax=default_softmax,
    )
    base_pooler = SimplePooler.from_config(resolved_config)
    if classifier is None:
        return base_pooler

    return ClassifierPooler(
        pooling=base_pooler.pooling,
        classifier=classifier,
        act_fn=base_pooler.head.activation,
    )

for_embed `staticmethod` ¶

for_embed(
    pooler_config: PoolerConfig,
    *,
    default_pooling_type: PoolingType = LAST,
    default_normalize: bool = True,
    default_softmax: bool = False,
)

Source code in vllm/model_executor/layers/pooler.py

@staticmethod
def for_embed(
    pooler_config: PoolerConfig,
    *,
    default_pooling_type: PoolingType = PoolingType.LAST,
    default_normalize: bool = True,
    default_softmax: bool = False,
):
    resolved_config = ResolvedPoolingConfig.from_config_with_defaults(
        pooler_config=pooler_config,
        pooling_type=default_pooling_type,
        normalize=default_normalize,
        softmax=default_softmax,
    )

    return SimplePooler.from_config(resolved_config)

for_encode `staticmethod` ¶

for_encode(
    pooler_config: PoolerConfig,
    *,
    default_pooling_type: PoolingType = ALL,
    default_normalize: bool = False,
    default_softmax: bool = False,
    default_step_tag_id: Optional[int] = None,
    default_returned_token_ids: Optional[list[int]] = None,
)

Source code in vllm/model_executor/layers/pooler.py

@staticmethod
def for_encode(
    pooler_config: PoolerConfig,
    *,
    default_pooling_type: PoolingType = PoolingType.ALL,
    default_normalize: bool = False,
    default_softmax: bool = False,
    default_step_tag_id: Optional[int] = None,
    default_returned_token_ids: Optional[list[int]] = None,
):
    resolved_config = ResolvedPoolingConfig.from_config_with_defaults(
        pooler_config=pooler_config,
        pooling_type=default_pooling_type,
        normalize=default_normalize,
        softmax=default_softmax,
        step_tag_id=default_step_tag_id,
        returned_token_ids=default_returned_token_ids,
    )

    if resolved_config.pooling_type == PoolingType.STEP:
        return StepPooler.from_config(resolved_config)

    return SimplePooler.from_config(resolved_config)

forward `abstractmethod` ¶

forward(
    hidden_states: Union[list[Tensor], Tensor],
    pooling_metadata: PoolingMetadata,
) -> PoolerOutput

Source code in vllm/model_executor/layers/pooler.py

@abstractmethod
def forward(
    self,
    hidden_states: Union[list[torch.Tensor], torch.Tensor],
    pooling_metadata: PoolingMetadata,
) -> PoolerOutput:
    raise NotImplementedError

get_pooling_updates ¶

get_pooling_updates(
    task: PoolingTask,
) -> PoolingParamsUpdate

Construct the updated pooling parameters to use for a supported task.

Source code in vllm/model_executor/layers/pooler.py

def get_pooling_updates(self, task: PoolingTask) -> PoolingParamsUpdate:
    """
    Construct the updated pooling parameters to use for a supported task.
    """
    return PoolingParamsUpdate()

get_supported_tasks `abstractmethod` ¶

get_supported_tasks() -> Set[PoolingTask]

Determine which pooling tasks are supported.

Source code in vllm/model_executor/layers/pooler.py

@abstractmethod
def get_supported_tasks(self) -> Set[PoolingTask]:
    """Determine which pooling tasks are supported."""
    raise NotImplementedError

PoolerActivation ¶

Bases: BasePoolerActivation

Source code in vllm/model_executor/layers/pooler.py

class PoolerActivation(BasePoolerActivation):

    @staticmethod
    def wraps(module: nn.Module):
        if isinstance(module, nn.Identity):
            return PoolerIdentity()
        if isinstance(module, (nn.Sigmoid, nn.Softmax)):
            return PoolerClassify()

        return LambdaPoolerActivation(module)

    @abstractmethod
    def forward_chunk(self, pooled_data: torch.Tensor) -> torch.Tensor:
        raise NotImplementedError

    def forward(self, pooled_data: _T) -> _T:
        if isinstance(pooled_data, list):
            return [self.forward_chunk(data) for data in pooled_data]

        return self.forward_chunk(pooled_data)

forward ¶

forward(pooled_data: _T) -> _T

Source code in vllm/model_executor/layers/pooler.py

def forward(self, pooled_data: _T) -> _T:
    if isinstance(pooled_data, list):
        return [self.forward_chunk(data) for data in pooled_data]

    return self.forward_chunk(pooled_data)

forward_chunk `abstractmethod` ¶

forward_chunk(pooled_data: Tensor) -> Tensor

Source code in vllm/model_executor/layers/pooler.py

@abstractmethod
def forward_chunk(self, pooled_data: torch.Tensor) -> torch.Tensor:
    raise NotImplementedError

wraps `staticmethod` ¶

wraps(module: Module)

Source code in vllm/model_executor/layers/pooler.py

@staticmethod
def wraps(module: nn.Module):
    if isinstance(module, nn.Identity):
        return PoolerIdentity()
    if isinstance(module, (nn.Sigmoid, nn.Softmax)):
        return PoolerClassify()

    return LambdaPoolerActivation(module)

PoolerClassify ¶

Bases: PoolerActivation

Source code in vllm/model_executor/layers/pooler.py

class PoolerClassify(PoolerActivation):

    def forward_chunk(self, pooled_data: torch.Tensor) -> torch.Tensor:
        num_labels = pooled_data.shape[-1]
        if num_labels < 2:
            return F.sigmoid(pooled_data.float()).to(pooled_data.dtype)

        return F.softmax(pooled_data.float(), dim=-1).to(pooled_data.dtype)

forward_chunk ¶

forward_chunk(pooled_data: Tensor) -> Tensor

Source code in vllm/model_executor/layers/pooler.py

def forward_chunk(self, pooled_data: torch.Tensor) -> torch.Tensor:
    num_labels = pooled_data.shape[-1]
    if num_labels < 2:
        return F.sigmoid(pooled_data.float()).to(pooled_data.dtype)

    return F.softmax(pooled_data.float(), dim=-1).to(pooled_data.dtype)

PoolerHead ¶

Bases: Module

Source code in vllm/model_executor/layers/pooler.py

class PoolerHead(nn.Module):

    @classmethod
    def from_config(cls, pooler_config: ResolvedPoolingConfig) -> "PoolerHead":
        if pooler_config.normalize and pooler_config.softmax:
            raise ValueError("`normalize=True` and `softmax=True` should not "
                             "be set together")

        activation: PoolerActivation
        if pooler_config.normalize:
            activation = PoolerNormalize()
        elif pooler_config.softmax:
            activation = PoolerClassify()
        else:
            activation = PoolerIdentity()

        return cls(activation)

    def __init__(self, activation: PoolerActivation) -> None:
        super().__init__()

        self.activation = activation

    def forward(self, pooled_data: Union[list[torch.Tensor], torch.Tensor],
                pooling_metadata: PoolingMetadata):

        # Using float32 in PoolerHead
        if isinstance(pooled_data, list):
            for i in range(len(pooled_data)):
                pooled_data[i] = pooled_data[i].to(torch.float32)
        else:
            pooled_data = pooled_data.to(torch.float32)

        # for matryoshka representation
        if isinstance(pooling_metadata, V0PoolingMetadata):
            dimensions_list = [
                pooling_param.dimensions
                for _, pooling_param in pooling_metadata.seq_groups
            ]
        else:
            assert isinstance(pooled_data, list)
            dimensions_list = [
                pooling_param.dimensions
                for pooling_param in pooling_metadata.pooling_params
            ]
        if any(d is not None for d in dimensions_list):
            # change the output dimension
            assert len(pooled_data) == len(dimensions_list)
            if len(set(dimensions_list)) == 1 and not isinstance(
                    pooled_data, list):
                # if all dimensions are the same
                d = dimensions_list[0]
                pooled_data = pooled_data[..., :d]
            else:
                pooled_data = [
                    vecs if d is None else vecs[..., :d]
                    for vecs, d in zip(pooled_data, dimensions_list)
                ]

        return self.activation(pooled_data)

activation `instance-attribute` ¶

activation = activation

init ¶

__init__(activation: PoolerActivation) -> None

Source code in vllm/model_executor/layers/pooler.py

def __init__(self, activation: PoolerActivation) -> None:
    super().__init__()

    self.activation = activation

forward ¶

forward(
    pooled_data: Union[list[Tensor], Tensor],
    pooling_metadata: PoolingMetadata,
)

Source code in vllm/model_executor/layers/pooler.py

def forward(self, pooled_data: Union[list[torch.Tensor], torch.Tensor],
            pooling_metadata: PoolingMetadata):

    # Using float32 in PoolerHead
    if isinstance(pooled_data, list):
        for i in range(len(pooled_data)):
            pooled_data[i] = pooled_data[i].to(torch.float32)
    else:
        pooled_data = pooled_data.to(torch.float32)

    # for matryoshka representation
    if isinstance(pooling_metadata, V0PoolingMetadata):
        dimensions_list = [
            pooling_param.dimensions
            for _, pooling_param in pooling_metadata.seq_groups
        ]
    else:
        assert isinstance(pooled_data, list)
        dimensions_list = [
            pooling_param.dimensions
            for pooling_param in pooling_metadata.pooling_params
        ]
    if any(d is not None for d in dimensions_list):
        # change the output dimension
        assert len(pooled_data) == len(dimensions_list)
        if len(set(dimensions_list)) == 1 and not isinstance(
                pooled_data, list):
            # if all dimensions are the same
            d = dimensions_list[0]
            pooled_data = pooled_data[..., :d]
        else:
            pooled_data = [
                vecs if d is None else vecs[..., :d]
                for vecs, d in zip(pooled_data, dimensions_list)
            ]

    return self.activation(pooled_data)

from_config `classmethod` ¶

from_config(
    pooler_config: ResolvedPoolingConfig,
) -> PoolerHead

Source code in vllm/model_executor/layers/pooler.py

@classmethod
def from_config(cls, pooler_config: ResolvedPoolingConfig) -> "PoolerHead":
    if pooler_config.normalize and pooler_config.softmax:
        raise ValueError("`normalize=True` and `softmax=True` should not "
                         "be set together")

    activation: PoolerActivation
    if pooler_config.normalize:
        activation = PoolerNormalize()
    elif pooler_config.softmax:
        activation = PoolerClassify()
    else:
        activation = PoolerIdentity()

    return cls(activation)

PoolerIdentity ¶

Bases: PoolerActivation

Source code in vllm/model_executor/layers/pooler.py

class PoolerIdentity(PoolerActivation):

    def forward_chunk(self, pooled_data: torch.Tensor) -> torch.Tensor:
        return pooled_data

forward_chunk ¶

forward_chunk(pooled_data: Tensor) -> Tensor

Source code in vllm/model_executor/layers/pooler.py

def forward_chunk(self, pooled_data: torch.Tensor) -> torch.Tensor:
    return pooled_data

PoolerNormalize ¶

Bases: PoolerActivation

Source code in vllm/model_executor/layers/pooler.py

class PoolerNormalize(PoolerActivation):

    def forward_chunk(self, pooled_data: torch.Tensor) -> torch.Tensor:
        x = F.normalize(pooled_data.float(), p=2, dim=-1)
        return x.to(pooled_data.dtype)

forward_chunk ¶

forward_chunk(pooled_data: Tensor) -> Tensor

Source code in vllm/model_executor/layers/pooler.py

def forward_chunk(self, pooled_data: torch.Tensor) -> torch.Tensor:
    x = F.normalize(pooled_data.float(), p=2, dim=-1)
    return x.to(pooled_data.dtype)

PoolerScore ¶

Bases: PoolerActivation

Source code in vllm/model_executor/layers/pooler.py

class PoolerScore(PoolerActivation):

    def forward_chunk(self, pooled_data: torch.Tensor) -> torch.Tensor:
        num_labels = pooled_data.shape[-1]
        if num_labels < 2:
            return F.sigmoid(pooled_data.float()).to(pooled_data.dtype)

        return pooled_data

forward_chunk ¶

forward_chunk(pooled_data: Tensor) -> Tensor

Source code in vllm/model_executor/layers/pooler.py

def forward_chunk(self, pooled_data: torch.Tensor) -> torch.Tensor:
    num_labels = pooled_data.shape[-1]
    if num_labels < 2:
        return F.sigmoid(pooled_data.float()).to(pooled_data.dtype)

    return pooled_data

PoolingMethod ¶

Bases: Module, ABC

Source code in vllm/model_executor/layers/pooler.py

class PoolingMethod(nn.Module, ABC):

    @staticmethod
    def from_pooling_type(pooling_type: PoolingType) -> "PoolingMethod":
        if pooling_type == PoolingType.LAST:
            return LastPool()
        if pooling_type == PoolingType.ALL:
            return AllPool()
        if pooling_type == PoolingType.CLS:
            return CLSPool()
        if pooling_type == PoolingType.MEAN:
            return MeanPool()

        raise NotImplementedError(f"Unsupported method: {pooling_type}")

    @abstractmethod
    def get_supported_tasks(self) -> Set[PoolingTask]:
        raise NotImplementedError

    def get_pooling_updates(self, task: PoolingTask) -> PoolingParamsUpdate:
        return PoolingParamsUpdate()

    @abstractmethod
    def forward_one(
        self,
        hidden_states: torch.Tensor,
        prompt_len: Optional[torch.Tensor] = None,
    ) -> torch.Tensor:
        """
        Note:
            `prompt_len=None` means `prompt_len=len(hidden_states)`.
        """
        raise NotImplementedError

    @abstractmethod
    def forward_all(
        self,
        hidden_states: torch.Tensor,
        prompt_lens: torch.Tensor,
    ) -> Union[list[torch.Tensor], torch.Tensor]:
        raise NotImplementedError

    def forward(
        self,
        hidden_states: Union[torch.Tensor, list[torch.Tensor]],
        pooling_metadata: PoolingMetadata,
    ) -> Union[list[torch.Tensor], torch.Tensor]:
        prompt_lens = get_prompt_lens(hidden_states, pooling_metadata)

        if isinstance(hidden_states, list):
            return [
                self.forward_one(h, prompt_len)
                for h, prompt_len in zip(hidden_states, prompt_lens)
            ]

        return self.forward_all(hidden_states, prompt_lens)

forward ¶

forward(
    hidden_states: Union[Tensor, list[Tensor]],
    pooling_metadata: PoolingMetadata,
) -> Union[list[Tensor], Tensor]

Source code in vllm/model_executor/layers/pooler.py

def forward(
    self,
    hidden_states: Union[torch.Tensor, list[torch.Tensor]],
    pooling_metadata: PoolingMetadata,
) -> Union[list[torch.Tensor], torch.Tensor]:
    prompt_lens = get_prompt_lens(hidden_states, pooling_metadata)

    if isinstance(hidden_states, list):
        return [
            self.forward_one(h, prompt_len)
            for h, prompt_len in zip(hidden_states, prompt_lens)
        ]

    return self.forward_all(hidden_states, prompt_lens)

forward_all `abstractmethod` ¶

forward_all(
    hidden_states: Tensor, prompt_lens: Tensor
) -> Union[list[Tensor], Tensor]

Source code in vllm/model_executor/layers/pooler.py

@abstractmethod
def forward_all(
    self,
    hidden_states: torch.Tensor,
    prompt_lens: torch.Tensor,
) -> Union[list[torch.Tensor], torch.Tensor]:
    raise NotImplementedError

forward_one `abstractmethod` ¶

forward_one(
    hidden_states: Tensor,
    prompt_len: Optional[Tensor] = None,
) -> Tensor

Note

prompt_len=None means prompt_len=len(hidden_states).

Source code in vllm/model_executor/layers/pooler.py

@abstractmethod
def forward_one(
    self,
    hidden_states: torch.Tensor,
    prompt_len: Optional[torch.Tensor] = None,
) -> torch.Tensor:
    """
    Note:
        `prompt_len=None` means `prompt_len=len(hidden_states)`.
    """
    raise NotImplementedError

from_pooling_type `staticmethod` ¶

from_pooling_type(
    pooling_type: PoolingType,
) -> PoolingMethod

Source code in vllm/model_executor/layers/pooler.py

@staticmethod
def from_pooling_type(pooling_type: PoolingType) -> "PoolingMethod":
    if pooling_type == PoolingType.LAST:
        return LastPool()
    if pooling_type == PoolingType.ALL:
        return AllPool()
    if pooling_type == PoolingType.CLS:
        return CLSPool()
    if pooling_type == PoolingType.MEAN:
        return MeanPool()

    raise NotImplementedError(f"Unsupported method: {pooling_type}")

get_pooling_updates ¶

get_pooling_updates(
    task: PoolingTask,
) -> PoolingParamsUpdate

Source code in vllm/model_executor/layers/pooler.py

def get_pooling_updates(self, task: PoolingTask) -> PoolingParamsUpdate:
    return PoolingParamsUpdate()

get_supported_tasks `abstractmethod` ¶

get_supported_tasks() -> Set[PoolingTask]

Source code in vllm/model_executor/layers/pooler.py

@abstractmethod
def get_supported_tasks(self) -> Set[PoolingTask]:
    raise NotImplementedError

PoolingParamsUpdate `dataclass` ¶

Source code in vllm/model_executor/layers/pooler.py

@dataclass(frozen=True)
class PoolingParamsUpdate:
    requires_token_ids: bool = False
    """Set this flag to enable `get_prompt_token_ids` for your pooler."""

    def apply(self, params: PoolingParams) -> None:
        params.requires_token_ids = self.requires_token_ids

requires_token_ids `class-attribute` `instance-attribute` ¶

requires_token_ids: bool = False

Set this flag to enable get_prompt_token_ids for your pooler.

init ¶

__init__(requires_token_ids: bool = False) -> None

apply ¶

apply(params: PoolingParams) -> None

Source code in vllm/model_executor/layers/pooler.py

def apply(self, params: PoolingParams) -> None:
    params.requires_token_ids = self.requires_token_ids

PoolingType ¶

Bases: IntEnum

Enumeration for different types of pooling methods.

Source code in vllm/model_executor/layers/pooler.py

class PoolingType(IntEnum):
    """Enumeration for different types of pooling methods."""
    LAST = 0
    ALL = 1
    CLS = 2
    STEP = 3
    MEAN = 4

ALL `class-attribute` `instance-attribute` ¶

ALL = 1

CLS `class-attribute` `instance-attribute` ¶

CLS = 2

LAST `class-attribute` `instance-attribute` ¶

LAST = 0

MEAN `class-attribute` `instance-attribute` ¶

MEAN = 4

STEP `class-attribute` `instance-attribute` ¶

STEP = 3

ResolvedPoolingConfig `dataclass` ¶

Source code in vllm/model_executor/layers/pooler.py

@dataclass(frozen=True)
class ResolvedPoolingConfig:
    pooling_type: PoolingType

    normalize: bool
    softmax: bool
    step_tag_id: Optional[int]
    returned_token_ids: Optional[list[int]]

    @classmethod
    def from_config_with_defaults(
        cls,
        pooler_config: PoolerConfig,
        pooling_type: PoolingType,
        normalize: bool,
        softmax: bool,
        step_tag_id: Optional[int] = None,
        returned_token_ids: Optional[list[int]] = None,
    ) -> "ResolvedPoolingConfig":
        return cls(
            pooling_type=PoolingType[pooler_config.pooling_type]
            if pooler_config.pooling_type is not None else pooling_type,
            normalize=pooler_config.normalize
            if pooler_config.normalize is not None else normalize,
            softmax=pooler_config.softmax
            if pooler_config.softmax is not None else softmax,
            step_tag_id=pooler_config.step_tag_id
            if pooler_config.step_tag_id is not None else step_tag_id,
            returned_token_ids=pooler_config.returned_token_ids
            if pooler_config.returned_token_ids is not None else
            returned_token_ids,
        )

normalize `instance-attribute` ¶

normalize: bool

pooling_type `instance-attribute` ¶

pooling_type: PoolingType

returned_token_ids `instance-attribute` ¶

returned_token_ids: Optional[list[int]]

softmax `instance-attribute` ¶

softmax: bool

step_tag_id `instance-attribute` ¶

step_tag_id: Optional[int]

init ¶

__init__(
    pooling_type: PoolingType,
    normalize: bool,
    softmax: bool,
    step_tag_id: Optional[int],
    returned_token_ids: Optional[list[int]],
) -> None

from_config_with_defaults `classmethod` ¶

from_config_with_defaults(
    pooler_config: PoolerConfig,
    pooling_type: PoolingType,
    normalize: bool,
    softmax: bool,
    step_tag_id: Optional[int] = None,
    returned_token_ids: Optional[list[int]] = None,
) -> ResolvedPoolingConfig

Source code in vllm/model_executor/layers/pooler.py

@classmethod
def from_config_with_defaults(
    cls,
    pooler_config: PoolerConfig,
    pooling_type: PoolingType,
    normalize: bool,
    softmax: bool,
    step_tag_id: Optional[int] = None,
    returned_token_ids: Optional[list[int]] = None,
) -> "ResolvedPoolingConfig":
    return cls(
        pooling_type=PoolingType[pooler_config.pooling_type]
        if pooler_config.pooling_type is not None else pooling_type,
        normalize=pooler_config.normalize
        if pooler_config.normalize is not None else normalize,
        softmax=pooler_config.softmax
        if pooler_config.softmax is not None else softmax,
        step_tag_id=pooler_config.step_tag_id
        if pooler_config.step_tag_id is not None else step_tag_id,
        returned_token_ids=pooler_config.returned_token_ids
        if pooler_config.returned_token_ids is not None else
        returned_token_ids,
    )

SimplePooler ¶

Bases: Pooler

A layer that pools specific information from hidden states.

This layer does the following: 1. Extracts specific tokens or aggregates data based on pooling method. 2. Normalizes output if specified. 3. Returns structured results as PoolerOutput.

Source code in vllm/model_executor/layers/pooler.py

class SimplePooler(Pooler):
    """A layer that pools specific information from hidden states.

    This layer does the following:
    1. Extracts specific tokens or aggregates data based on pooling method.
    2. Normalizes output if specified.
    3. Returns structured results as `PoolerOutput`.
    """

    @classmethod
    def from_config(
        cls,
        pooler_config: ResolvedPoolingConfig,
    ) -> "SimplePooler":
        pooling = PoolingMethod.from_pooling_type(pooler_config.pooling_type)
        head = PoolerHead.from_config(pooler_config)

        return cls(pooling, head)

    def __init__(self, pooling: PoolingMethod, head: PoolerHead) -> None:
        super().__init__()

        self.pooling = pooling
        self.head = head

    def get_supported_tasks(self) -> Set[PoolingTask]:
        return self.pooling.get_supported_tasks()

    def get_pooling_updates(self, task: PoolingTask) -> PoolingParamsUpdate:
        return self.pooling.get_pooling_updates(task)

    def forward(
        self,
        hidden_states: Union[torch.Tensor, list[torch.Tensor]],
        pooling_metadata: PoolingMetadata,
    ) -> PoolerOutput:
        pooled_data = self.pooling(hidden_states, pooling_metadata)
        pooled_data = self.head(pooled_data, pooling_metadata)
        return build_output(pooled_data)

head `instance-attribute` ¶

head = head

pooling `instance-attribute` ¶

pooling = pooling

init ¶

__init__(pooling: PoolingMethod, head: PoolerHead) -> None

Source code in vllm/model_executor/layers/pooler.py

def __init__(self, pooling: PoolingMethod, head: PoolerHead) -> None:
    super().__init__()

    self.pooling = pooling
    self.head = head

forward ¶

forward(
    hidden_states: Union[Tensor, list[Tensor]],
    pooling_metadata: PoolingMetadata,
) -> PoolerOutput

Source code in vllm/model_executor/layers/pooler.py

def forward(
    self,
    hidden_states: Union[torch.Tensor, list[torch.Tensor]],
    pooling_metadata: PoolingMetadata,
) -> PoolerOutput:
    pooled_data = self.pooling(hidden_states, pooling_metadata)
    pooled_data = self.head(pooled_data, pooling_metadata)
    return build_output(pooled_data)

from_config `classmethod` ¶

from_config(
    pooler_config: ResolvedPoolingConfig,
) -> SimplePooler

Source code in vllm/model_executor/layers/pooler.py

@classmethod
def from_config(
    cls,
    pooler_config: ResolvedPoolingConfig,
) -> "SimplePooler":
    pooling = PoolingMethod.from_pooling_type(pooler_config.pooling_type)
    head = PoolerHead.from_config(pooler_config)

    return cls(pooling, head)

get_pooling_updates ¶

get_pooling_updates(
    task: PoolingTask,
) -> PoolingParamsUpdate

Source code in vllm/model_executor/layers/pooler.py

def get_pooling_updates(self, task: PoolingTask) -> PoolingParamsUpdate:
    return self.pooling.get_pooling_updates(task)

get_supported_tasks ¶

get_supported_tasks() -> Set[PoolingTask]

Source code in vllm/model_executor/layers/pooler.py

def get_supported_tasks(self) -> Set[PoolingTask]:
    return self.pooling.get_supported_tasks()

StepPooler ¶

Bases: Pooler

Source code in vllm/model_executor/layers/pooler.py

class StepPooler(Pooler):

    @classmethod
    def from_config(cls, pooler_config: ResolvedPoolingConfig) -> "StepPooler":
        assert pooler_config.pooling_type == PoolingType.STEP

        return cls(
            PoolerHead.from_config(pooler_config),
            step_tag_id=pooler_config.step_tag_id,
            returned_token_ids=pooler_config.returned_token_ids,
        )

    def __init__(
        self,
        head: PoolerHead,
        *,
        step_tag_id: Optional[int] = None,
        returned_token_ids: Optional[list[int]] = None,
    ) -> None:
        super().__init__()

        self.pooling = AllPool()
        self.head = head
        self.step_tag_id = step_tag_id
        self.returned_token_ids = returned_token_ids

    def extract_states(
        self,
        hidden_states: Union[torch.Tensor, list[torch.Tensor]],
        pooling_metadata: PoolingMetadata,
    ) -> Union[list[torch.Tensor], torch.Tensor]:
        pooled_data_lst = self.pooling(hidden_states, pooling_metadata)
        prompt_token_ids = get_prompt_token_ids(pooling_metadata)

        pooled_data = list[torch.Tensor]()
        returned_token_ids = self.returned_token_ids
        step_tag_id = self.step_tag_id

        for data, token_id in zip(pooled_data_lst, prompt_token_ids):
            if returned_token_ids is not None and len(returned_token_ids) > 0:
                data = data[:, returned_token_ids]

            if step_tag_id is not None:
                data = data[token_id == step_tag_id]
            pooled_data.append(data)

        return pooled_data

    def get_supported_tasks(self) -> Set[PoolingTask]:
        return {"encode"}

    def get_pooling_updates(self, task: PoolingTask) -> PoolingParamsUpdate:
        return PoolingParamsUpdate(requires_token_ids=True)

    def forward(
        self,
        hidden_states: Union[torch.Tensor, list[torch.Tensor]],
        pooling_metadata: PoolingMetadata,
    ) -> PoolerOutput:
        pooled_data = self.extract_states(hidden_states, pooling_metadata)
        pooled_data = self.head(pooled_data, pooling_metadata)
        return build_output(pooled_data)

head `instance-attribute` ¶

head = head

pooling `instance-attribute` ¶

pooling = AllPool()

returned_token_ids `instance-attribute` ¶

returned_token_ids = returned_token_ids

step_tag_id `instance-attribute` ¶

step_tag_id = step_tag_id

init ¶

__init__(
    head: PoolerHead,
    *,
    step_tag_id: Optional[int] = None,
    returned_token_ids: Optional[list[int]] = None,
) -> None

Source code in vllm/model_executor/layers/pooler.py

def __init__(
    self,
    head: PoolerHead,
    *,
    step_tag_id: Optional[int] = None,
    returned_token_ids: Optional[list[int]] = None,
) -> None:
    super().__init__()

    self.pooling = AllPool()
    self.head = head
    self.step_tag_id = step_tag_id
    self.returned_token_ids = returned_token_ids

extract_states ¶

extract_states(
    hidden_states: Union[Tensor, list[Tensor]],
    pooling_metadata: PoolingMetadata,
) -> Union[list[Tensor], Tensor]

Source code in vllm/model_executor/layers/pooler.py

def extract_states(
    self,
    hidden_states: Union[torch.Tensor, list[torch.Tensor]],
    pooling_metadata: PoolingMetadata,
) -> Union[list[torch.Tensor], torch.Tensor]:
    pooled_data_lst = self.pooling(hidden_states, pooling_metadata)
    prompt_token_ids = get_prompt_token_ids(pooling_metadata)

    pooled_data = list[torch.Tensor]()
    returned_token_ids = self.returned_token_ids
    step_tag_id = self.step_tag_id

    for data, token_id in zip(pooled_data_lst, prompt_token_ids):
        if returned_token_ids is not None and len(returned_token_ids) > 0:
            data = data[:, returned_token_ids]

        if step_tag_id is not None:
            data = data[token_id == step_tag_id]
        pooled_data.append(data)

    return pooled_data

forward ¶

forward(
    hidden_states: Union[Tensor, list[Tensor]],
    pooling_metadata: PoolingMetadata,
) -> PoolerOutput

Source code in vllm/model_executor/layers/pooler.py

def forward(
    self,
    hidden_states: Union[torch.Tensor, list[torch.Tensor]],
    pooling_metadata: PoolingMetadata,
) -> PoolerOutput:
    pooled_data = self.extract_states(hidden_states, pooling_metadata)
    pooled_data = self.head(pooled_data, pooling_metadata)
    return build_output(pooled_data)

from_config `classmethod` ¶

from_config(
    pooler_config: ResolvedPoolingConfig,
) -> StepPooler

Source code in vllm/model_executor/layers/pooler.py

@classmethod
def from_config(cls, pooler_config: ResolvedPoolingConfig) -> "StepPooler":
    assert pooler_config.pooling_type == PoolingType.STEP

    return cls(
        PoolerHead.from_config(pooler_config),
        step_tag_id=pooler_config.step_tag_id,
        returned_token_ids=pooler_config.returned_token_ids,
    )

get_pooling_updates ¶

get_pooling_updates(
    task: PoolingTask,
) -> PoolingParamsUpdate

Source code in vllm/model_executor/layers/pooler.py

def get_pooling_updates(self, task: PoolingTask) -> PoolingParamsUpdate:
    return PoolingParamsUpdate(requires_token_ids=True)

get_supported_tasks ¶

get_supported_tasks() -> Set[PoolingTask]

Source code in vllm/model_executor/layers/pooler.py

def get_supported_tasks(self) -> Set[PoolingTask]:
    return {"encode"}

build_output ¶

build_output(
    all_data: Union[Tensor, list[Tensor]],
) -> PoolerOutput

Source code in vllm/model_executor/layers/pooler.py

def build_output(
    all_data: Union[torch.Tensor, list[torch.Tensor]], ) -> PoolerOutput:
    all_outputs = [PoolingSequenceGroupOutput(data) for data in all_data]
    return PoolerOutput(outputs=all_outputs)

get_classification_activation_function ¶

get_classification_activation_function(
    config: PretrainedConfig,
)

Source code in vllm/model_executor/layers/pooler.py

def get_classification_activation_function(config: PretrainedConfig):
    return PoolerClassify()

get_cross_encoder_activation_function ¶

get_cross_encoder_activation_function(
    config: PretrainedConfig,
)

Source code in vllm/model_executor/layers/pooler.py

def get_cross_encoder_activation_function(config: PretrainedConfig):
    function_name: Optional[str] = None
    if (hasattr(config, "sentence_transformers")
            and "activation_fn" in config.sentence_transformers):
        function_name = config.sentence_transformers["activation_fn"]
    elif (hasattr(config, "sbert_ce_default_activation_function")
          and config.sbert_ce_default_activation_function is not None):
        function_name = config.sbert_ce_default_activation_function

    if function_name is not None:
        assert function_name.startswith("torch.nn.modules."), (
            "Loading of activation functions is restricted to "
            "torch.nn.modules for security reasons")
        fn = resolve_obj_by_qualname(function_name)()
        return PoolerActivation.wraps(fn)

    return PoolerScore()

get_prompt_lens ¶

get_prompt_lens(
    hidden_states: Union[Tensor, list[Tensor]],
    pooling_metadata: PoolingMetadata,
) -> Tensor

Source code in vllm/model_executor/layers/pooler.py

def get_prompt_lens(
    hidden_states: Union[torch.Tensor, list[torch.Tensor]],
    pooling_metadata: PoolingMetadata,
) -> torch.Tensor:
    if isinstance(pooling_metadata, V1PoolingMetadata):
        return pooling_metadata.prompt_lens

    return PoolingTensors.from_pooling_metadata(
        pooling_metadata, hidden_states[0].device).prompt_lens

get_prompt_token_ids ¶

get_prompt_token_ids(
    pooling_metadata: PoolingMetadata,
) -> list[Tensor]

Source code in vllm/model_executor/layers/pooler.py

def get_prompt_token_ids(
        pooling_metadata: PoolingMetadata) -> list[torch.Tensor]:
    if isinstance(pooling_metadata, V1PoolingMetadata):
        assert pooling_metadata.prompt_token_ids is not None, (
            "Please set `requires_token_ids=True` in `get_pooling_updates`")

        return [
            pooling_metadata.prompt_token_ids[i, :num]
            for i, num in enumerate(pooling_metadata.prompt_lens)
        ]

    return [
        torch.tensor(seq_data_i.prompt_token_ids)
        for seq_data_i in pooling_metadata.seq_data.values()
    ]

get_tasks ¶

get_tasks(
    pooling_metadata: PoolingMetadata,
) -> list[PoolingTask]

Source code in vllm/model_executor/layers/pooler.py

def get_tasks(pooling_metadata: PoolingMetadata) -> list[PoolingTask]:
    if isinstance(pooling_metadata, V0PoolingMetadata):
        pooling_params = [p for _, p in pooling_metadata.seq_groups]
    else:
        pooling_params = pooling_metadata.pooling_params

    tasks: list[PoolingTask] = [
        task for pooling_param in pooling_params
        if (task := pooling_param.task) is not None
    ]
    assert len(pooling_params) == len(tasks)

    return tasks

vllm.model_executor.layers.pooler

ClassifierFn module-attribute ¶

PoolingFn module-attribute ¶

PoolingMetadata module-attribute ¶

_T module-attribute ¶

AllPool ¶

forward_all ¶

forward_one ¶

get_supported_tasks ¶

BasePoolerActivation ¶

forward abstractmethod ¶

CLSPool ¶

forward_all ¶

forward_one ¶

get_supported_tasks ¶

ClassifierPooler ¶

act_fn instance-attribute ¶

classifier instance-attribute ¶

pooling instance-attribute ¶

__init__ ¶

act_fn_for_cross_encoder staticmethod ¶

act_fn_for_seq_cls staticmethod ¶

forward ¶

get_supported_tasks ¶

DispatchPooler ¶

poolers_by_task instance-attribute ¶

__init__ ¶

forward ¶

get_pooling_updates ¶

get_supported_tasks ¶

LambdaPoolerActivation ¶

fn instance-attribute ¶

__init__ ¶

forward_chunk ¶

LastPool ¶

forward_all ¶

forward_one ¶

get_supported_tasks ¶

MeanPool ¶

forward_all ¶

forward_one ¶

get_supported_tasks ¶

Pooler ¶

for_classify staticmethod ¶

for_embed staticmethod ¶

for_encode staticmethod ¶

forward abstractmethod ¶

get_pooling_updates ¶

get_supported_tasks abstractmethod ¶

PoolerActivation ¶

forward ¶

forward_chunk abstractmethod ¶

wraps staticmethod ¶

PoolerClassify ¶

forward_chunk ¶

PoolerHead ¶

activation instance-attribute ¶

__init__ ¶

forward ¶

from_config classmethod ¶

PoolerIdentity ¶

forward_chunk ¶

PoolerNormalize ¶

forward_chunk ¶

PoolerScore ¶

forward_chunk ¶

PoolingMethod ¶

forward ¶

forward_all abstractmethod ¶

forward_one abstractmethod ¶

from_pooling_type staticmethod ¶

get_pooling_updates ¶

get_supported_tasks abstractmethod ¶

PoolingParamsUpdate dataclass ¶

requires_token_ids class-attribute instance-attribute ¶

__init__ ¶

apply ¶

PoolingType ¶

ALL class-attribute instance-attribute ¶

CLS class-attribute instance-attribute ¶

ClassifierFn `module-attribute` ¶

PoolingFn `module-attribute` ¶

PoolingMetadata `module-attribute` ¶

_T `module-attribute` ¶

forward `abstractmethod` ¶

act_fn `instance-attribute` ¶

classifier `instance-attribute` ¶

pooling `instance-attribute` ¶

init ¶

act_fn_for_cross_encoder `staticmethod` ¶

act_fn_for_seq_cls `staticmethod` ¶

poolers_by_task `instance-attribute` ¶

init ¶

fn `instance-attribute` ¶

init ¶

for_classify `staticmethod` ¶

for_embed `staticmethod` ¶

for_encode `staticmethod` ¶

forward `abstractmethod` ¶

get_supported_tasks `abstractmethod` ¶

forward_chunk `abstractmethod` ¶

wraps `staticmethod` ¶

activation `instance-attribute` ¶

init ¶

from_config `classmethod` ¶

forward_all `abstractmethod` ¶

forward_one `abstractmethod` ¶

from_pooling_type `staticmethod` ¶

get_supported_tasks `abstractmethod` ¶

PoolingParamsUpdate `dataclass` ¶

requires_token_ids `class-attribute` `instance-attribute` ¶

init ¶

ALL `class-attribute` `instance-attribute` ¶

CLS `class-attribute` `instance-attribute` ¶

LAST `class-attribute` `instance-attribute` ¶

MEAN `class-attribute` `instance-attribute` ¶

STEP `class-attribute` `instance-attribute` ¶

ResolvedPoolingConfig `dataclass` ¶

normalize `instance-attribute` ¶

pooling_type `instance-attribute` ¶

returned_token_ids `instance-attribute` ¶

softmax `instance-attribute` ¶

step_tag_id `instance-attribute` ¶

init ¶

from_config_with_defaults `classmethod` ¶

head `instance-attribute` ¶

pooling `instance-attribute` ¶

init ¶

from_config `classmethod` ¶

head `instance-attribute` ¶

pooling `instance-attribute` ¶

returned_token_ids `instance-attribute` ¶

step_tag_id `instance-attribute` ¶

init ¶

from_config `classmethod` ¶