Skip to content

vllm_omni.diffusion.attention.backends.sage_attn3

logger module-attribute

logger = init_logger(__name__)

SageAttention3Backend

Bases: AttentionBackend

accept_output_buffer class-attribute instance-attribute

accept_output_buffer: bool = True

get_impl_cls staticmethod

get_impl_cls() -> type[SageAttention3Impl]

get_name staticmethod

get_name() -> str

get_supported_head_sizes staticmethod

get_supported_head_sizes() -> list[int]

SageAttention3Impl

Bases: AttentionImpl

causal instance-attribute

causal = causal

dropout instance-attribute

dropout = get('dropout_p', 0.0)

softmax_scale instance-attribute

softmax_scale = softmax_scale

forward_cuda

forward_cuda(
    query: Tensor,
    key: Tensor,
    value: Tensor,
    attn_metadata: AttentionMetadata | None = None,
) -> Tensor