Skip to content

vllm_omni.attention.fish_kvcache_attn

FISH_KVCACHE_LONG_SPLIT_TOKENS module-attribute

FISH_KVCACHE_LONG_SPLIT_TOKENS = 1024

FISH_KVCACHE_SMALL_PATH_MAX_SEQ_LEN module-attribute

FISH_KVCACHE_SMALL_PATH_MAX_SEQ_LEN = 1024

can_use_fish_kvcache_attn

can_use_fish_kvcache_attn(
    *,
    query: Tensor,
    key_cache: Tensor,
    value_cache: Tensor,
    block_table: Tensor | None,
    seq_lens: Tensor,
    max_query_len: int,
    max_seq_len: int,
    dcp_world_size: int,
    use_cascade: bool,
    alibi_slopes: Any,
    sliding_window: Any,
    output_scale: Tensor | None = None,
    output_block_scale: Tensor | None = None,
) -> bool

fish_decode_kvcache_attn

fish_decode_kvcache_attn(
    query: Tensor,
    key_cache: Tensor,
    value_cache: Tensor,
    block_table: Tensor,
    seq_lens: Tensor,
    out: Tensor,
    *,
    scale: float,
    max_seq_len: int,
) -> Tensor

is_available

is_available() -> bool

is_fish_kvcache_attn_enabled

is_fish_kvcache_attn_enabled() -> bool

is_fish_kvcache_attn_required

is_fish_kvcache_attn_required() -> bool

load_error

load_error() -> Exception | None

prewarm_fish_kvcache_attn_workspace

prewarm_fish_kvcache_attn_workspace(
    query: Tensor, max_seq_len: int
) -> None