`vllm.model_executor.layers.quantization.inc.schemes.inc_scheme` ¶

Classes:

INCScheme –

One class per quant type. Single registration point for the factory.

`INCScheme` ¶

Bases: ABC

One class per quant type. Single registration point for the factory.

Each subclass defines

can_handle(): when does this scheme apply?
get_linear_method(): required — how to quantize Linear layers
get_moe_method(): optional — how to quantize MoE layers
get_kvcache_method(): optional — how to quantize KV cache

Schemes that don't support MoE/KVCache inherit the default raise.

Methods:

get_kvcache_method –

Optional. Override if this scheme supports KV cache quantization.
get_moe_method –

Optional. Override if this scheme supports MoE.

Source code in vllm/model_executor/layers/quantization/inc/schemes/inc_scheme.py

class INCScheme(ABC):
    """One class per quant type. Single registration point for the factory.

    Each subclass defines:
      - can_handle(): when does this scheme apply?
      - get_linear_method(): required — how to quantize Linear layers
      - get_moe_method(): optional — how to quantize MoE layers
      - get_kvcache_method(): optional — how to quantize KV cache

    Schemes that don't support MoE/KVCache inherit the default raise.
    """

    @staticmethod
    @abstractmethod
    def can_handle(layer_config: "INCLayerConfig") -> bool:
        raise NotImplementedError

    @abstractmethod
    def get_linear_method(
        self,
        config: "INCConfig",
        layer: "torch.nn.Module",
        prefix: str,
        layer_config: "INCLayerConfig",
    ) -> "LinearMethodBase":
        raise NotImplementedError

    def get_moe_method(
        self,
        config: "INCConfig",
        layer: "torch.nn.Module",
        prefix: str,
        layer_config: "INCLayerConfig",
    ) -> "FusedMoEMethodBase | None":
        """Optional. Override if this scheme supports MoE.
        Default raises NotImplementedError."""
        raise NotImplementedError(
            f"{type(self).__name__} does not support MoE layers. "
            f"Layer config: {layer_config}"
        )

    def get_kvcache_method(
        self,
        config: "INCConfig",
        layer: "torch.nn.Module",
        prefix: str,
        layer_config: "INCLayerConfig",
    ) -> "QuantizationMethods":
        """Optional. Override if this scheme supports KV cache quantization.
        Default raises NotImplementedError."""
        raise NotImplementedError(
            f"{type(self).__name__} does not support KV cache quantization. "
            f"Layer config: {layer_config}"
        )

`get_kvcache_method(config, layer, prefix, layer_config)` ¶

Optional. Override if this scheme supports KV cache quantization. Default raises NotImplementedError.

Source code in vllm/model_executor/layers/quantization/inc/schemes/inc_scheme.py

def get_kvcache_method(
    self,
    config: "INCConfig",
    layer: "torch.nn.Module",
    prefix: str,
    layer_config: "INCLayerConfig",
) -> "QuantizationMethods":
    """Optional. Override if this scheme supports KV cache quantization.
    Default raises NotImplementedError."""
    raise NotImplementedError(
        f"{type(self).__name__} does not support KV cache quantization. "
        f"Layer config: {layer_config}"
    )

`get_moe_method(config, layer, prefix, layer_config)` ¶

Optional. Override if this scheme supports MoE. Default raises NotImplementedError.

Source code in vllm/model_executor/layers/quantization/inc/schemes/inc_scheme.py

def get_moe_method(
    self,
    config: "INCConfig",
    layer: "torch.nn.Module",
    prefix: str,
    layer_config: "INCLayerConfig",
) -> "FusedMoEMethodBase | None":
    """Optional. Override if this scheme supports MoE.
    Default raises NotImplementedError."""
    raise NotImplementedError(
        f"{type(self).__name__} does not support MoE layers. "
        f"Layer config: {layer_config}"
    )

vllm.model_executor.layers.quantization.inc.schemes.inc_scheme ¶

INCScheme ¶

get_kvcache_method(config, layer, prefix, layer_config) ¶

get_moe_method(config, layer, prefix, layer_config) ¶

`vllm.model_executor.layers.quantization.inc.schemes.inc_scheme` ¶

`INCScheme` ¶

`get_kvcache_method(config, layer, prefix, layer_config)` ¶

`get_moe_method(config, layer, prefix, layer_config)` ¶