Skip to content

Summary

Full package overview

Modules:

Name Description
attention
distributed
envs
extension
models
ops
platform
utils
v1

register

register()

Register the HPU platform.

Source code in vllm_gaudi/__init__.py
5
6
7
8
def register():
    """Register the HPU platform."""
    HpuPlatform.set_torch_compile()
    return "vllm_gaudi.platform.HpuPlatform"

register_models

register_models()
Source code in vllm_gaudi/__init__.py
def register_models():
    import vllm_gaudi.models.utils  # noqa: F401
    import vllm_gaudi.models.interfaces  # noqa: F401
    from .models import register_model
    register_model()

register_ops

register_ops()

Register custom PluggableLayers for the HPU platform

Source code in vllm_gaudi/__init__.py
def register_ops():
    """Register custom PluggableLayers for the HPU platform"""
    import vllm_gaudi.attention.oot_mla  # noqa: F401
    """Register custom ops for the HPU platform."""
    import vllm_gaudi.v1.sample.hpu_rejection_sampler  # noqa: F401
    import vllm_gaudi.distributed.kv_transfer.kv_connector.v1.hpu_nixl_connector  # noqa: F401
    if os.getenv('VLLM_HPU_HETERO_KV_LAYOUT', 'false').lower() == 'true':
        import vllm_gaudi.distributed.kv_transfer.kv_connector.v1.hetero_hpu_nixl_connector  # noqa: F401
    import vllm_gaudi.v1.kv_offload.worker.cpu_hpu  # noqa: F401
    import vllm_gaudi.ops.hpu_attention  # noqa: F401
    import vllm_gaudi.ops.hpu_fused_moe  # noqa: F401
    import vllm_gaudi.ops.hpu_grouped_topk_router  # noqa: F401
    import vllm_gaudi.ops.hpu_layernorm  # noqa: F401
    import vllm_gaudi.ops.hpu_lora  # noqa: F401
    import vllm_gaudi.ops.hpu_mamba_mixer2  # noqa: F401
    import vllm_gaudi.ops.hpu_rotary_embedding  # noqa: F401
    import vllm_gaudi.ops.hpu_modelopt  # noqa: F401
    import vllm_gaudi.ops.hpu_compressed_tensors  # noqa: F401
    import vllm_gaudi.ops.hpu_fp8  # noqa: F401
    import vllm_gaudi.ops.hpu_gptq  # noqa: F401
    import vllm_gaudi.ops.hpu_awq  # noqa: F401
    import vllm_gaudi.ops.hpu_conv  # noqa: F401
    import vllm_gaudi.ops.hpu_mm_encoder_attention  # noqa: F401

register_utils

register_utils()

Register utility functions for the HPU platform.

Source code in vllm_gaudi/__init__.py
def register_utils():
    """Register utility functions for the HPU platform."""
    import vllm_gaudi.utils  # noqa: F401