vllm.model_executor.models.aimv2
AIMv2Attention
¶
Bases: Module
Source code in vllm/model_executor/models/aimv2.py
__init__
¶
__init__(
config: AIMv2Config,
quant_config: QuantizationConfig,
prefix: str,
)
Source code in vllm/model_executor/models/aimv2.py
forward
¶
Source code in vllm/model_executor/models/aimv2.py
AIMv2Block
¶
Bases: Module
Source code in vllm/model_executor/models/aimv2.py
attn
instance-attribute
¶
attn = AIMv2Attention(
config,
quant_config=quant_config,
prefix=f"{prefix}.attn",
)
mlp
instance-attribute
¶
mlp = AIMv2SwiGLUFFN(
config,
quant_config=quant_config,
prefix=f"{prefix}.mlp",
)
__init__
¶
__init__(
config: AIMv2Config,
quant_config: QuantizationConfig,
prefix: str,
)
Source code in vllm/model_executor/models/aimv2.py
forward
¶
AIMv2Model
¶
Bases: Module
Source code in vllm/model_executor/models/aimv2.py
trunk
instance-attribute
¶
trunk = AIMv2Transformer(
config,
quant_config=quant_config,
prefix=f"{prefix}.trunk",
)
__init__
¶
__init__(
config: AIMv2Config,
quant_config: QuantizationConfig,
prefix: str = "",
)
Source code in vllm/model_executor/models/aimv2.py
forward
¶
AIMv2PatchEmbed
¶
Bases: Module
Source code in vllm/model_executor/models/aimv2.py
proj
instance-attribute
¶
proj = Conv2d(
num_channels,
hidden_size,
kernel_size=(patch_size, patch_size),
stride=(patch_size, patch_size),
)
AIMv2SwiGLUFFN
¶
Bases: Module
Source code in vllm/model_executor/models/aimv2.py
fc1
instance-attribute
¶
fc1 = ReplicatedLinear(
in_features,
hidden_features,
bias=bias,
quant_config=quant_config,
prefix=f"{prefix}.fc1",
)
fc2
instance-attribute
¶
fc2 = ReplicatedLinear(
hidden_features,
in_features,
bias=bias,
quant_config=quant_config,
prefix=f"{prefix}.fc2",
)
fc3
instance-attribute
¶
fc3 = ReplicatedLinear(
in_features,
hidden_features,
bias=bias,
quant_config=quant_config,
prefix=f"{prefix}.fc3",
)
__init__
¶
__init__(
config: AIMv2Config,
quant_config: QuantizationConfig,
prefix: str,
)
Source code in vllm/model_executor/models/aimv2.py
AIMv2Transformer
¶
Bases: Module
Source code in vllm/model_executor/models/aimv2.py
blocks
instance-attribute
¶
blocks = ModuleList(
[
AIMv2Block(
config,
quant_config,
prefix=f"{prefix}.blocks.{i}",
)
for i in range(num_hidden_layers)
]
)
__init__
¶
__init__(
config: AIMv2Config,
quant_config: QuantizationConfig,
prefix: str,
)
Source code in vllm/model_executor/models/aimv2.py
forward
¶
Source code in vllm/model_executor/models/aimv2.py
AIMv2ViTPreprocessor
¶
Bases: Module