vllm.utils.flashinfer
Compatibility wrapper for FlashInfer API changes.
Users of vLLM should always import only these wrappers.
__all__
module-attribute
¶
__all__ = [
"has_flashinfer",
"flashinfer_trtllm_fp8_block_scale_moe",
"flashinfer_cutlass_fused_moe",
"fp4_quantize",
"block_scale_interleave",
"autotune",
"has_flashinfer_moe",
"has_flashinfer_cutlass_fused_moe",
]
autotune
module-attribute
¶
autotune = _lazy_import_wrapper(
"flashinfer.autotuner",
"autotune",
fallback_fn=lambda *args, **kwargs: nullcontext(),
)
block_scale_interleave
module-attribute
¶
block_scale_interleave = _lazy_import_wrapper(
"flashinfer", "block_scale_interleave"
)
flashinfer_cutlass_fused_moe
module-attribute
¶
flashinfer_cutlass_fused_moe = _lazy_import_wrapper(
"flashinfer.fused_moe", "cutlass_fused_moe"
)
flashinfer_trtllm_fp8_block_scale_moe
module-attribute
¶
flashinfer_trtllm_fp8_block_scale_moe = (
_lazy_import_wrapper(
"flashinfer.fused_moe", "trtllm_fp8_block_scale_moe"
)
)
_get_submodule
¶
Safely import a submodule and return it, or None if not available.
_lazy_import_wrapper
¶
_lazy_import_wrapper(
module_name: str,
attr_name: str,
fallback_fn: Callable[..., Any] = _missing,
)
Create a lazy import wrapper for a specific function.
Source code in vllm/utils/flashinfer.py
_missing
¶
Placeholder for unavailable FlashInfer backend.
Source code in vllm/utils/flashinfer.py
has_flashinfer
cached
¶
has_flashinfer() -> bool
Return True if FlashInfer is available.
Source code in vllm/utils/flashinfer.py
has_flashinfer_cutlass_fused_moe
cached
¶
has_flashinfer_cutlass_fused_moe() -> bool
Return True if FlashInfer CUTLASS fused MoE is available.