Skip to content

vllm_omni.diffusion.models.t5_encoder.t5_gemma_encoder

T5GemmaAttention

Bases: Module

head_dim instance-attribute

head_dim = head_dim

hidden_size instance-attribute

hidden_size = hidden_size

kv_size instance-attribute

kv_size = num_kv_heads * head_dim

num_heads instance-attribute

num_heads = total_num_heads // tp_size

num_kv_heads instance-attribute

num_kv_heads = max(1, total_num_kv_heads // tp_size)

o_proj instance-attribute

o_proj = RowParallelLinear(
    input_size=total_num_heads * head_dim,
    output_size=hidden_size,
    bias=False,
    input_is_parallel=True,
)

q_size instance-attribute

q_size = num_heads * head_dim

qkv_proj instance-attribute

qkv_proj = QKVParallelLinear(
    hidden_size=hidden_size,
    head_size=head_dim,
    total_num_heads=total_num_heads,
    total_num_kv_heads=total_num_kv_heads,
    bias=False,
)

rotary_emb instance-attribute

rotary_emb = get_rope(
    head_dim,
    max_position=max_position_embeddings,
    is_neox_style=True,
    rope_parameters={"base": rope_theta},
)

total_num_heads instance-attribute

total_num_heads = num_heads

total_num_kv_heads instance-attribute

total_num_kv_heads = num_kv_heads

forward

forward(
    positions: Tensor,
    hidden_states: Tensor,
    attention_mask: Tensor | None = None,
) -> Tensor

T5GemmaEncoderLayer

Bases: Module

mlp instance-attribute

mlp = T5GemmaMLP(
    hidden_size=hidden_size,
    intermediate_size=intermediate_size,
    hidden_act=hidden_activation,
)

post_feedforward_layernorm instance-attribute

post_feedforward_layernorm = T5GemmaRMSNorm(
    hidden_size, eps=rms_norm_eps
)

post_self_attn_layernorm instance-attribute

post_self_attn_layernorm = T5GemmaRMSNorm(
    hidden_size, eps=rms_norm_eps
)

pre_feedforward_layernorm instance-attribute

pre_feedforward_layernorm = T5GemmaRMSNorm(
    hidden_size, eps=rms_norm_eps
)

pre_self_attn_layernorm instance-attribute

pre_self_attn_layernorm = T5GemmaRMSNorm(
    hidden_size, eps=rms_norm_eps
)

self_attn instance-attribute

self_attn = T5GemmaAttention(
    hidden_size=hidden_size,
    num_heads=num_attention_heads,
    num_kv_heads=num_key_value_heads,
    head_dim=head_dim,
    max_position_embeddings=max_position_embeddings,
    rope_theta=rope_theta,
)

forward

forward(
    positions: Tensor,
    hidden_states: Tensor,
    attention_mask: Tensor | None,
) -> Tensor

T5GemmaEncoderModelTP

Bases: Module

config instance-attribute

config = config

device property

device: device

dtype property

dtype: dtype

embed_tokens instance-attribute

embed_tokens = VocabParallelEmbedding(
    vocab_size, hidden_size
)

layers instance-attribute

layers = ModuleList(
    [
        (T5GemmaEncoderLayer(config))
        for _ in (range(num_hidden_layers))
    ]
)

norm instance-attribute

norm = T5GemmaRMSNorm(hidden_size, eps=rms_norm_eps)

vocab_size instance-attribute

vocab_size = vocab_size

forward

forward(
    input_ids: Tensor, attention_mask: Tensor | None = None
) -> Tensor

load_weights

load_weights(
    weights: Iterable[tuple[str, Tensor]],
) -> set[str]

T5GemmaMLP

Bases: Module

act_fn instance-attribute

act_fn = get_act_fn(hidden_act)

down_proj instance-attribute

down_proj = RowParallelLinear(
    input_size=intermediate_size,
    output_size=hidden_size,
    bias=False,
    input_is_parallel=True,
)

gate_up_proj instance-attribute

gate_up_proj = MergedColumnParallelLinear(
    input_size=hidden_size,
    output_sizes=[intermediate_size, intermediate_size],
    bias=False,
    gather_output=False,
)

forward

forward(x: Tensor) -> Tensor

T5GemmaRMSNorm

Bases: Module

variance_epsilon instance-attribute

variance_epsilon = eps

weight instance-attribute

weight = Parameter(zeros(hidden_size))

forward

forward(hidden_states: Tensor) -> Tensor