vllm_omni.platforms.npu ¶

Modules:

Name	Description
`models`
`omni_connectors`
`platform`
`profiler`
`quant`
`worker`

NPUOmniPlatform ¶

Bases: OmniPlatform, NPUPlatform

NPU/Ascend implementation of OmniPlatform.

Inherits all NPU-specific implementations from vllm-ascend's NPUPlatform, and adds Omni-specific interfaces from OmniPlatform.

dist_backend `class-attribute` `instance-attribute` ¶

dist_backend: str = 'hccl'

create_autocast_context `classmethod` ¶

create_autocast_context(
    *, device_type, dtype, enabled=True
)

get_default_stage_config_path `classmethod` ¶

get_default_stage_config_path() -> str

get_device_count `classmethod` ¶

get_device_count() -> int

get_device_memory `classmethod` ¶

get_device_memory(
    device: device | None = None,
) -> tuple[int, int]

get_device_total_memory `classmethod` ¶

get_device_total_memory(device_id: int = 0) -> int

get_device_version `classmethod` ¶

get_device_version() -> str | None

get_diffusion_attn_backend_cls `classmethod` ¶

get_diffusion_attn_backend_cls(
    selected_backend: str | None, head_size: int
) -> str

get_diffusion_model_impl_qualname `classmethod` ¶

get_diffusion_model_impl_qualname(op_name: str) -> str

get_diffusion_packed_modules_mapping `classmethod` ¶

get_diffusion_packed_modules_mapping(
    model_class: type[Module],
) -> dict[str, list[str]] | None

get_free_memory `classmethod` ¶

get_free_memory(device: device | None = None) -> int

get_graph_wrapper_cls `classmethod` ¶

get_graph_wrapper_cls() -> type

get_omni_ar_worker_cls `classmethod` ¶

get_omni_ar_worker_cls() -> str

get_omni_generation_worker_cls `classmethod` ¶

get_omni_generation_worker_cls() -> str

get_profiler_cls `classmethod` ¶

get_profiler_cls() -> str

get_torch_device `classmethod` ¶

get_torch_device(local_rank: int | None = None) -> device

init_diffusion_worker_vllm_config `classmethod` ¶

init_diffusion_worker_vllm_config(vllm_config: Any) -> None

prepare_diffusion_op_runtime `classmethod` ¶

prepare_diffusion_op_runtime(
    op_name: str, **kwargs: Any
) -> None

set_device `classmethod` ¶

set_device(device: device) -> None

set_forward_context `classmethod` ¶

set_forward_context(
    attn_metadata,
    vllm_config,
    *,
    cudagraph_runtime_mode,
    batch_descriptor,
)

supports_torch_inductor `classmethod` ¶

supports_torch_inductor() -> bool

synchronize `classmethod` ¶

synchronize() -> None

vllm_omni.platforms.npu ¶

NPUOmniPlatform ¶

dist_backend class-attribute instance-attribute ¶

create_autocast_context classmethod ¶

get_default_stage_config_path classmethod ¶

get_device_count classmethod ¶

get_device_memory classmethod ¶

get_device_total_memory classmethod ¶

get_device_version classmethod ¶

get_diffusion_attn_backend_cls classmethod ¶

get_diffusion_model_impl_qualname classmethod ¶

get_diffusion_packed_modules_mapping classmethod ¶

get_free_memory classmethod ¶

get_graph_wrapper_cls classmethod ¶

get_omni_ar_worker_cls classmethod ¶

get_omni_generation_worker_cls classmethod ¶

get_profiler_cls classmethod ¶

get_torch_device classmethod ¶

init_diffusion_worker_vllm_config classmethod ¶

prepare_diffusion_op_runtime classmethod ¶

set_device classmethod ¶

set_forward_context classmethod ¶

supports_torch_inductor classmethod ¶

synchronize classmethod ¶