vllm_omni.attention.fish_kvcache_triton ¶
fish_decode_kvcache_attn_triton ¶
fish_decode_kvcache_attn_triton(
query: Tensor,
key_cache: Tensor,
value_cache: Tensor,
block_table: Tensor,
seq_lens: Tensor,
out: Tensor,
*,
scale: float,
max_seq_len: int,
small_path_max_seq_len: int,
long_split_tokens: int,
partial_m: Tensor,
partial_l: Tensor,
partial_acc: Tensor,
) -> Tensor