Skip to content

vLLM Hardware Plugin for Intel® Gaudi®

hpu_row_parallel_linear

vllm_gaudi.lora.layers.hpu_row_parallel_linear ¶

HPU-specific LoRA layer for RowParallelLinear.

This module registers HPU-aware LoRA wrappers that can handle HPURowParallelLinear instances created via OOT registration.

HPURowParallelLinearWithLoRA ¶

Bases: RowParallelLinearWithLoRA

LoRA wrapper that can handle HPURowParallelLinear instances.

Source code in vllm_gaudi/lora/layers/hpu_row_parallel_linear.py

class HPURowParallelLinearWithLoRA(RowParallelLinearWithLoRA):
    """LoRA wrapper that can handle HPURowParallelLinear instances."""

    @classmethod
    @_not_fully_sharded_can_replace
    def can_replace_layer(
        cls,
        source_layer: nn.Module,
        lora_config: LoRAConfig,
        packed_modules_list: list,
        model_config: PretrainedConfig | None = None,
    ) -> bool:
        # Accept both RowParallelLinear and HPURowParallelLinear
        return isinstance(source_layer, RowParallelLinear)

can_replace_layer `classmethod` ¶

can_replace_layer(
    source_layer: Module,
    lora_config: LoRAConfig,
    packed_modules_list: list,
    model_config: PretrainedConfig | None = None,
) -> bool

Source code in vllm_gaudi/lora/layers/hpu_row_parallel_linear.py

@classmethod
@_not_fully_sharded_can_replace
def can_replace_layer(
    cls,
    source_layer: nn.Module,
    lora_config: LoRAConfig,
    packed_modules_list: list,
    model_config: PretrainedConfig | None = None,
) -> bool:
    # Accept both RowParallelLinear and HPURowParallelLinear
    return isinstance(source_layer, RowParallelLinear)

HPURowParallelLinearWithShardedLoRA ¶

Bases: RowParallelLinearWithShardedLoRA

Sharded LoRA wrapper that can handle HPURowParallelLinear instances.

Source code in vllm_gaudi/lora/layers/hpu_row_parallel_linear.py

class HPURowParallelLinearWithShardedLoRA(RowParallelLinearWithShardedLoRA):
    """Sharded LoRA wrapper that can handle HPURowParallelLinear instances."""

    @classmethod
    @_fully_sharded_can_replace
    def can_replace_layer(
        cls,
        source_layer: nn.Module,
        lora_config: LoRAConfig,
        packed_modules_list: list,
        model_config: PretrainedConfig | None = None,
    ) -> bool:
        # Use isinstance to accept HPURowParallelLinear (subclass of RowParallelLinear)
        return isinstance(source_layer, RowParallelLinear)

can_replace_layer `classmethod` ¶

can_replace_layer(
    source_layer: Module,
    lora_config: LoRAConfig,
    packed_modules_list: list,
    model_config: PretrainedConfig | None = None,
) -> bool

Source code in vllm_gaudi/lora/layers/hpu_row_parallel_linear.py

@classmethod
@_fully_sharded_can_replace
def can_replace_layer(
    cls,
    source_layer: nn.Module,
    lora_config: LoRAConfig,
    packed_modules_list: list,
    model_config: PretrainedConfig | None = None,
) -> bool:
    # Use isinstance to accept HPURowParallelLinear (subclass of RowParallelLinear)
    return isinstance(source_layer, RowParallelLinear)

register_hpu_lora_layers ¶

register_hpu_lora_layers()

Register HPU LoRA layers in the global lora_utils._all_lora_classes set.

This must be called before LoRA model creation to ensure HPU layers are checked before upstream layers.

Source code in vllm_gaudi/lora/layers/hpu_row_parallel_linear.py

def register_hpu_lora_layers():
    """Register HPU LoRA layers in the global lora_utils._all_lora_classes set.

    This must be called before LoRA model creation to ensure HPU layers
    are checked before upstream layers.
    """
    # Remove the upstream classes first (they use strict type check)
    lora_utils._all_lora_classes.discard(RowParallelLinearWithLoRA)
    lora_utils._all_lora_classes.discard(RowParallelLinearWithShardedLoRA)

    # Add HPU-aware classes
    lora_utils._all_lora_classes.add(HPURowParallelLinearWithLoRA)
    lora_utils._all_lora_classes.add(HPURowParallelLinearWithShardedLoRA)