Skip to content

vllm_omni.attention.fish_kvcache_attn ¶

FISH_KVCACHE_LONG_SPLIT_TOKENS `module-attribute` ¶

FISH_KVCACHE_LONG_SPLIT_TOKENS = 1024

FISH_KVCACHE_SMALL_PATH_MAX_SEQ_LEN `module-attribute` ¶

FISH_KVCACHE_SMALL_PATH_MAX_SEQ_LEN = 1024

can_use_fish_kvcache_attn ¶

can_use_fish_kvcache_attn(
    *,
    query: Tensor,
    key_cache: Tensor,
    value_cache: Tensor,
    block_table: Tensor | None,
    seq_lens: Tensor,
    max_query_len: int,
    max_seq_len: int,
    dcp_world_size: int,
    use_cascade: bool,
    alibi_slopes: Any,
    sliding_window: Any,
    output_scale: Tensor | None = None,
    output_block_scale: Tensor | None = None,
) -> bool

fish_decode_kvcache_attn ¶

fish_decode_kvcache_attn(
    query: Tensor,
    key_cache: Tensor,
    value_cache: Tensor,
    block_table: Tensor,
    seq_lens: Tensor,
    out: Tensor,
    *,
    scale: float,
    max_seq_len: int,
) -> Tensor

is_available ¶

is_available() -> bool

is_fish_kvcache_attn_enabled ¶

is_fish_kvcache_attn_enabled() -> bool

is_fish_kvcache_attn_required ¶

is_fish_kvcache_attn_required() -> bool

load_error ¶

load_error() -> Exception | None

prewarm_fish_kvcache_attn_workspace ¶

prewarm_fish_kvcache_attn_workspace(
    query: Tensor, max_seq_len: int
) -> None