vllm_omni.diffusion.models.t5_encoder.t5_gemma_encoder ¶
T5GemmaAttention ¶
Bases: Module
o_proj instance-attribute ¶
o_proj = RowParallelLinear(
input_size=total_num_heads * head_dim,
output_size=hidden_size,
bias=False,
input_is_parallel=True,
)
qkv_proj instance-attribute ¶
qkv_proj = QKVParallelLinear(
hidden_size=hidden_size,
head_size=head_dim,
total_num_heads=total_num_heads,
total_num_kv_heads=total_num_kv_heads,
bias=False,
)
rotary_emb instance-attribute ¶
rotary_emb = get_rope(
head_dim,
max_position=max_position_embeddings,
is_neox_style=True,
rope_parameters={"base": rope_theta},
)
forward ¶
forward(
positions: Tensor,
hidden_states: Tensor,
attention_mask: Tensor | None = None,
) -> Tensor
T5GemmaEncoderLayer ¶
Bases: Module
mlp instance-attribute ¶
mlp = T5GemmaMLP(
hidden_size=hidden_size,
intermediate_size=intermediate_size,
hidden_act=hidden_activation,
)
post_feedforward_layernorm instance-attribute ¶
post_feedforward_layernorm = T5GemmaRMSNorm(
hidden_size, eps=rms_norm_eps
)
post_self_attn_layernorm instance-attribute ¶
post_self_attn_layernorm = T5GemmaRMSNorm(
hidden_size, eps=rms_norm_eps
)
pre_feedforward_layernorm instance-attribute ¶
pre_feedforward_layernorm = T5GemmaRMSNorm(
hidden_size, eps=rms_norm_eps
)
pre_self_attn_layernorm instance-attribute ¶
pre_self_attn_layernorm = T5GemmaRMSNorm(
hidden_size, eps=rms_norm_eps
)
self_attn instance-attribute ¶
self_attn = T5GemmaAttention(
hidden_size=hidden_size,
num_heads=num_attention_heads,
num_kv_heads=num_key_value_heads,
head_dim=head_dim,
max_position_embeddings=max_position_embeddings,
rope_theta=rope_theta,
)
forward ¶
T5GemmaEncoderModelTP ¶
Bases: Module
layers instance-attribute ¶
layers = ModuleList(
[
(T5GemmaEncoderLayer(config))
for _ in (range(num_hidden_layers))
]
)
T5GemmaMLP ¶
Bases: Module
down_proj instance-attribute ¶
down_proj = RowParallelLinear(
input_size=intermediate_size,
output_size=hidden_size,
bias=False,
input_is_parallel=True,
)
gate_up_proj instance-attribute ¶
gate_up_proj = MergedColumnParallelLinear(
input_size=hidden_size,
output_sizes=[intermediate_size, intermediate_size],
bias=False,
gather_output=False,
)