vllm.model_executor.models.bert
BertAttention
¶
Bases: Module
Source code in vllm/model_executor/models/bert.py
output
instance-attribute
¶
output = BertSelfOutput(
hidden_size=hidden_size,
layer_norm_eps=layer_norm_eps,
quant_config=quant_config,
prefix=f"{prefix}.output",
)
self
instance-attribute
¶
self = BertSelfAttention(
hidden_size=hidden_size,
num_attention_heads=num_attention_heads,
cache_config=cache_config,
quant_config=quant_config,
prefix=f"{prefix}.output",
)
__init__
¶
__init__(
hidden_size: int,
num_attention_heads: int,
layer_norm_eps: float,
cache_config: Optional[CacheConfig] = None,
quant_config: Optional[QuantizationConfig] = None,
prefix: str = "",
)
Source code in vllm/model_executor/models/bert.py
BertEmbedding
¶
Bases: Module
Source code in vllm/model_executor/models/bert.py
position_embeddings
instance-attribute
¶
position_embeddings = VocabParallelEmbedding(
max_position_embeddings, hidden_size
)
token_type_embeddings
instance-attribute
¶
token_type_embeddings = VocabParallelEmbedding(
type_vocab_size, hidden_size
)
word_embeddings
instance-attribute
¶
word_embeddings = VocabParallelEmbedding(
vocab_size, hidden_size
)
__init__
¶
Source code in vllm/model_executor/models/bert.py
forward
¶
forward(
input_ids: Tensor,
seq_lens: Tensor,
position_ids: Tensor,
token_type_ids: Optional[Tensor] = None,
) -> Tensor
Source code in vllm/model_executor/models/bert.py
BertEmbeddingModel
¶
Bases: Module, SupportsV0Only, SupportsQuant
A model that uses Bert to provide embedding functionalities.
This class encapsulates the BertModel and provides an interface for embedding operations and customized pooling functions.
Attributes:
| Name | Type | Description |
|---|---|---|
model |
An instance of BertModel used for forward operations. |
|
_pooler |
An instance of Pooler used for pooling operations. |
Source code in vllm/model_executor/models/bert.py
model
instance-attribute
¶
model = _build_model(
vllm_config=vllm_config,
prefix=maybe_prefix(prefix, "model"),
)
__init__
¶
__init__(*, vllm_config: VllmConfig, prefix: str = '')
Source code in vllm/model_executor/models/bert.py
_build_model
¶
_build_model(
vllm_config: VllmConfig, prefix: str = ""
) -> BertModel
_build_pooler
¶
_build_pooler(pooler_config: PoolerConfig) -> Pooler
Source code in vllm/model_executor/models/bert.py
forward
¶
forward(
input_ids: Optional[Tensor],
positions: Tensor,
intermediate_tensors: Optional[
IntermediateTensors
] = None,
inputs_embeds: Optional[Tensor] = None,
) -> Tensor
Source code in vllm/model_executor/models/bert.py
load_weights
¶
Source code in vllm/model_executor/models/bert.py
BertEncoder
¶
Bases: Module
Source code in vllm/model_executor/models/bert.py
layer
instance-attribute
¶
layer = ModuleList(
[
(
BertLayer(
config=config,
cache_config=cache_config,
quant_config=quant_config,
prefix=f"{prefix}.layer.{layer_idx}",
)
)
for layer_idx in (range(num_hidden_layers))
]
)
__init__
¶
__init__(vllm_config: VllmConfig, prefix: str = '')
Source code in vllm/model_executor/models/bert.py
BertForSequenceClassification
¶
Bases: Module, SupportsV0Only, SupportsCrossEncoding, SupportsQuant
A model that uses Bert to provide embedding functionalities.
This class encapsulates the BertModel and provides an interface for embedding operations and customized pooling functions.
Attributes:
| Name | Type | Description |
|---|---|---|
model |
An instance of BertModel used for forward operations. |
|
_pooler |
An instance of Pooler used for pooling operations. |
Source code in vllm/model_executor/models/bert.py
bert
instance-attribute
¶
bert = BertPoolingModel(
vllm_config=vllm_config,
prefix=maybe_prefix(prefix, "bert"),
embedding_class=BertEmbedding,
)
pooler
instance-attribute
¶
pooler = DispatchPooler(
{
"encode": for_encode(pooler_config),
"classify": ClassifierPooler(
pooling=pooler,
classifier=classifier,
act_fn=act_fn_for_seq_cls(model_config),
),
"score": ClassifierPooler(
pooling=pooler,
classifier=classifier,
act_fn=act_fn_for_cross_encoder(model_config),
),
}
)
__init__
¶
__init__(*, vllm_config: VllmConfig, prefix: str = '')
Source code in vllm/model_executor/models/bert.py
forward
¶
forward(
input_ids: Optional[Tensor],
positions: Tensor,
intermediate_tensors: Optional[
IntermediateTensors
] = None,
inputs_embeds: Optional[Tensor] = None,
token_type_ids: Optional[Tensor] = None,
) -> Tensor
Source code in vllm/model_executor/models/bert.py
BertIntermediate
¶
Bases: Module
Source code in vllm/model_executor/models/bert.py
dense
instance-attribute
¶
dense = ColumnParallelLinear(
input_size=hidden_size,
output_size=intermediate_size,
bias=True,
quant_config=quant_config,
prefix=f"{prefix}.dense",
)
__init__
¶
__init__(
hidden_size: int,
intermediate_size: int,
hidden_act: str,
quant_config: Optional[QuantizationConfig] = None,
prefix: str = "",
)
Source code in vllm/model_executor/models/bert.py
forward
¶
BertLayer
¶
Bases: Module
Source code in vllm/model_executor/models/bert.py
attention
instance-attribute
¶
attention = BertAttention(
hidden_size=hidden_size,
num_attention_heads=num_attention_heads,
layer_norm_eps=layer_norm_eps,
cache_config=cache_config,
quant_config=quant_config,
prefix=f"{prefix}.attention",
)
intermediate
instance-attribute
¶
intermediate = BertIntermediate(
hidden_size=hidden_size,
intermediate_size=intermediate_size,
hidden_act=hidden_act,
quant_config=quant_config,
prefix=f"{prefix}.intermediate",
)
output
instance-attribute
¶
output = BertOutput(
hidden_size=hidden_size,
intermediate_size=intermediate_size,
layer_norm_eps=layer_norm_eps,
quant_config=quant_config,
prefix=f"{prefix}.output",
)
__init__
¶
__init__(
config: BertConfig,
cache_config: Optional[CacheConfig] = None,
quant_config: Optional[QuantizationConfig] = None,
prefix: str = "",
)
Source code in vllm/model_executor/models/bert.py
BertModel
¶
Bases: Module, SupportsQuant
Source code in vllm/model_executor/models/bert.py
340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 | |
encoder
instance-attribute
¶
encoder = BertEncoder(
vllm_config=vllm_config, prefix=f"{prefix}.encoder"
)
packed_modules_mapping
class-attribute
instance-attribute
¶
__init__
¶
__init__(
*,
vllm_config: VllmConfig,
prefix: str = "",
embedding_class: type[Module] = BertEmbedding,
) -> None
Source code in vllm/model_executor/models/bert.py
_load_weights
¶
Source code in vllm/model_executor/models/bert.py
forward
¶
forward(
input_ids: Tensor,
position_ids: Tensor,
intermediate_tensors: Optional[
IntermediateTensors
] = None,
inputs_embeds: Optional[Tensor] = None,
token_type_ids: Optional[Tensor] = None,
) -> Tensor
Source code in vllm/model_executor/models/bert.py
load_weights
¶
Source code in vllm/model_executor/models/bert.py
BertOutput
¶
Bases: Module
Source code in vllm/model_executor/models/bert.py
dense
instance-attribute
¶
dense = RowParallelLinear(
input_size=intermediate_size,
output_size=hidden_size,
bias=True,
quant_config=quant_config,
prefix=f"{prefix}.dense",
)
__init__
¶
__init__(
hidden_size: int,
intermediate_size: int,
layer_norm_eps: float,
quant_config: Optional[QuantizationConfig] = None,
prefix: str = "",
)
Source code in vllm/model_executor/models/bert.py
forward
¶
BertPooler
¶
Bases: Pooler
Source code in vllm/model_executor/models/bert.py
__init__
¶
forward
¶
forward(
hidden_states: Union[Tensor, list[Tensor]],
pooling_metadata: PoolingMetadata,
) -> Union[Tensor, list[Tensor]]
Source code in vllm/model_executor/models/bert.py
get_pooling_updates
¶
get_pooling_updates(
task: PoolingTask,
) -> PoolingParamsUpdate
get_supported_tasks
¶
get_supported_tasks() -> Set[PoolingTask]
BertPoolingModel
¶
Bases: BertModel
Source code in vllm/model_executor/models/bert.py
__init__
¶
__init__(
*,
vllm_config: VllmConfig,
prefix: str = "",
embedding_class: type[Module] = BertEmbedding,
) -> None
Source code in vllm/model_executor/models/bert.py
load_weights
¶
Source code in vllm/model_executor/models/bert.py
BertSelfAttention
¶
Bases: Module
Source code in vllm/model_executor/models/bert.py
attn
instance-attribute
¶
attn = Attention(
num_heads=num_heads,
head_size=head_dim,
scale=scaling,
num_kv_heads=num_kv_heads,
cache_config=cache_config,
quant_config=quant_config,
prefix=f"{prefix}.attn",
attn_type=ENCODER_ONLY,
)
qkv_proj
instance-attribute
¶
qkv_proj = QKVParallelLinear(
hidden_size=hidden_size,
head_size=head_dim,
total_num_heads=total_num_heads,
total_num_kv_heads=total_num_kv_heads,
bias=True,
quant_config=quant_config,
prefix=f"{prefix}.qkv_proj",
)
__init__
¶
__init__(
hidden_size: int,
num_attention_heads: int,
cache_config: Optional[CacheConfig] = None,
quant_config: Optional[QuantizationConfig] = None,
prefix: str = "",
)
Source code in vllm/model_executor/models/bert.py
forward
¶
BertSelfOutput
¶
Bases: Module
Source code in vllm/model_executor/models/bert.py
dense
instance-attribute
¶
dense = RowParallelLinear(
input_size=hidden_size,
output_size=hidden_size,
bias=True,
quant_config=quant_config,
prefix=f"{prefix}.dense",
)
__init__
¶
__init__(
hidden_size: int,
layer_norm_eps: float,
quant_config: Optional[QuantizationConfig] = None,
prefix: str = "",
)