Skip to content

vllm.model_executor.models.interfaces

MultiModalEmbeddings module-attribute

MultiModalEmbeddings = Union[
    list[Tensor], Tensor, tuple[Tensor, ...]
]

The output embeddings must be one of the following formats:

  • A list or tuple of 2D tensors, where each tensor corresponds to each input multimodal data item (e.g, image).
  • A single 3D tensor, with the batch dimension grouping the 2D tensors.

logger module-attribute

logger = init_logger(__name__)

HasInnerState

Bases: Protocol

The interface required for all models that has inner state.

Source code in vllm/model_executor/models/interfaces.py
@runtime_checkable
class HasInnerState(Protocol):
    """The interface required for all models that has inner state."""

    has_inner_state: ClassVar[Literal[True]] = True
    """
        A flag that indicates this model has inner state.
        Models that has inner state usually need access to the scheduler_config
        for max_num_seqs, etc. True for e.g. both Mamba and Jamba.
    """

has_inner_state class-attribute

has_inner_state: Literal[True] = True

A flag that indicates this model has inner state. Models that has inner state usually need access to the scheduler_config for max_num_seqs, etc. True for e.g. both Mamba and Jamba.

HasNoOps

Bases: Protocol

Source code in vllm/model_executor/models/interfaces.py
@runtime_checkable
class HasNoOps(Protocol):
    has_noops: ClassVar[Literal[True]] = True

has_noops class-attribute

has_noops: Literal[True] = True

IsAttentionFree

Bases: Protocol

The interface required for all models like Mamba that lack attention, but do have state whose size is constant wrt the number of tokens.

Source code in vllm/model_executor/models/interfaces.py
@runtime_checkable
class IsAttentionFree(Protocol):
    """The interface required for all models like Mamba that lack attention,
    but do have state whose size is constant wrt the number of tokens."""

    is_attention_free: ClassVar[Literal[True]] = True
    """
        A flag that indicates this model has no attention.
        Used for block manager and attention backend selection.
        True for Mamba but not Jamba.
    """

is_attention_free class-attribute

is_attention_free: Literal[True] = True

A flag that indicates this model has no attention. Used for block manager and attention backend selection. True for Mamba but not Jamba.

IsHybrid

Bases: Protocol

The interface required for all models like Jamba that have both attention and mamba blocks, indicates that hf_config has 'layers_block_type'

Source code in vllm/model_executor/models/interfaces.py
@runtime_checkable
class IsHybrid(Protocol):
    """The interface required for all models like Jamba that have both
    attention and mamba blocks, indicates that 
    hf_config has 'layers_block_type'"""

    is_hybrid: ClassVar[Literal[True]] = True
    """
        A flag that indicates this model has both mamba and attention blocks
        , also indicates that the model's hf_config has 
        'layers_block_type' """

is_hybrid class-attribute

is_hybrid: Literal[True] = True

A flag that indicates this model has both mamba and attention blocks , also indicates that the model's hf_config has 'layers_block_type'

SupportsCrossEncoding

Bases: Protocol

The interface required for all models that support cross encoding.

Source code in vllm/model_executor/models/interfaces.py
@runtime_checkable
class SupportsCrossEncoding(Protocol):
    """The interface required for all models that support cross encoding."""

    supports_cross_encoding: ClassVar[Literal[True]] = True

supports_cross_encoding class-attribute

supports_cross_encoding: Literal[True] = True

SupportsLoRA

Bases: Protocol

The interface required for all models that support LoRA.

Source code in vllm/model_executor/models/interfaces.py
@runtime_checkable
class SupportsLoRA(Protocol):
    """The interface required for all models that support LoRA."""

    supports_lora: ClassVar[Literal[True]] = True
    """
    A flag that indicates this model supports LoRA.

    Note:
        There is no need to redefine this flag if this class is in the
        MRO of your model class.
    """
    # The `embedding_module` and `embedding_padding_modules`
    # are empty by default.
    embedding_modules: ClassVar[dict[str, str]] = {}
    embedding_padding_modules: ClassVar[list[str]] = []
    packed_modules_mapping: ClassVar[dict[str, list[str]]] = {}

embedding_modules class-attribute

embedding_modules: dict[str, str] = {}

embedding_padding_modules class-attribute

embedding_padding_modules: list[str] = []

packed_modules_mapping class-attribute

packed_modules_mapping: dict[str, list[str]] = {}

supports_lora class-attribute

supports_lora: Literal[True] = True

A flag that indicates this model supports LoRA.

Note

There is no need to redefine this flag if this class is in the MRO of your model class.

SupportsMultiModal

Bases: Protocol

The interface required for all multi-modal models.

Source code in vllm/model_executor/models/interfaces.py
@runtime_checkable
class SupportsMultiModal(Protocol):
    """The interface required for all multi-modal models."""

    supports_multimodal: ClassVar[Literal[True]] = True
    """
    A flag that indicates this model supports multi-modal inputs.

    Note:
        There is no need to redefine this flag if this class is in the
        MRO of your model class.
    """

    def get_multimodal_embeddings(self,
                                  **kwargs: object) -> MultiModalEmbeddings:
        """
        Returns multimodal embeddings generated from multimodal kwargs 
        to be merged with text embeddings.

        Note:
            The returned multimodal embeddings must be in the same order as
            the appearances of their corresponding multimodal data item in the
            input prompt.
        """
        ...

    def get_language_model(self) -> torch.nn.Module:
        """
        Returns the underlying language model used for text generation.

        This is typically the `torch.nn.Module` instance responsible for 
        processing the merged multimodal embeddings and producing hidden states

        Returns:
            torch.nn.Module: The core language model component.
        """
        ...

    # Only for models that support v0 chunked prefill
    # TODO(ywang96): Remove this overload once v0 is deprecated
    @overload
    def get_input_embeddings(
        self,
        input_ids: Tensor,
        multimodal_embeddings: Optional[MultiModalEmbeddings] = None,
        attn_metadata: Optional["AttentionMetadata"] = None,
    ) -> Tensor:
        ...

    @overload
    def get_input_embeddings(
        self,
        input_ids: Tensor,
        multimodal_embeddings: Optional[MultiModalEmbeddings] = None,
    ) -> Tensor:
        """
        Returns the input embeddings merged from the text embeddings from 
        input_ids and the multimodal embeddings generated from multimodal 
        kwargs.
        """
        ...

supports_multimodal class-attribute

supports_multimodal: Literal[True] = True

A flag that indicates this model supports multi-modal inputs.

Note

There is no need to redefine this flag if this class is in the MRO of your model class.

get_language_model

get_language_model() -> Module

Returns the underlying language model used for text generation.

This is typically the torch.nn.Module instance responsible for processing the merged multimodal embeddings and producing hidden states

Returns:

Type Description
Module

torch.nn.Module: The core language model component.

Source code in vllm/model_executor/models/interfaces.py
def get_language_model(self) -> torch.nn.Module:
    """
    Returns the underlying language model used for text generation.

    This is typically the `torch.nn.Module` instance responsible for 
    processing the merged multimodal embeddings and producing hidden states

    Returns:
        torch.nn.Module: The core language model component.
    """
    ...

get_multimodal_embeddings

get_multimodal_embeddings(
    **kwargs: object,
) -> MultiModalEmbeddings

Returns multimodal embeddings generated from multimodal kwargs to be merged with text embeddings.

Note

The returned multimodal embeddings must be in the same order as the appearances of their corresponding multimodal data item in the input prompt.

Source code in vllm/model_executor/models/interfaces.py
def get_multimodal_embeddings(self,
                              **kwargs: object) -> MultiModalEmbeddings:
    """
    Returns multimodal embeddings generated from multimodal kwargs 
    to be merged with text embeddings.

    Note:
        The returned multimodal embeddings must be in the same order as
        the appearances of their corresponding multimodal data item in the
        input prompt.
    """
    ...

SupportsPP

Bases: Protocol

The interface required for all models that support pipeline parallel.

Source code in vllm/model_executor/models/interfaces.py
@runtime_checkable
class SupportsPP(Protocol):
    """The interface required for all models that support pipeline parallel."""

    supports_pp: ClassVar[Literal[True]] = True
    """
    A flag that indicates this model supports pipeline parallel.

    Note:
        There is no need to redefine this flag if this class is in the
        MRO of your model class.
    """

    def make_empty_intermediate_tensors(
        self,
        batch_size: int,
        dtype: torch.dtype,
        device: torch.device,
    ) -> "IntermediateTensors":
        """Called when PP rank > 0 for profiling purposes."""
        ...

    def forward(
        self,
        *,
        intermediate_tensors: Optional["IntermediateTensors"],
    ) -> Union[Tensor, "IntermediateTensors"]:
        """
        Accept [`IntermediateTensors`][vllm.sequence.IntermediateTensors] when
        PP rank > 0.

        Return [`IntermediateTensors`][vllm.sequence.IntermediateTensors] only
        for the last PP rank.
        """
        ...

supports_pp class-attribute

supports_pp: Literal[True] = True

A flag that indicates this model supports pipeline parallel.

Note

There is no need to redefine this flag if this class is in the MRO of your model class.

forward

forward(
    *, intermediate_tensors: Optional[IntermediateTensors]
) -> Union[Tensor, IntermediateTensors]

Accept IntermediateTensors when PP rank > 0.

Return IntermediateTensors only for the last PP rank.

Source code in vllm/model_executor/models/interfaces.py
def forward(
    self,
    *,
    intermediate_tensors: Optional["IntermediateTensors"],
) -> Union[Tensor, "IntermediateTensors"]:
    """
    Accept [`IntermediateTensors`][vllm.sequence.IntermediateTensors] when
    PP rank > 0.

    Return [`IntermediateTensors`][vllm.sequence.IntermediateTensors] only
    for the last PP rank.
    """
    ...

make_empty_intermediate_tensors

make_empty_intermediate_tensors(
    batch_size: int, dtype: dtype, device: device
) -> IntermediateTensors

Called when PP rank > 0 for profiling purposes.

Source code in vllm/model_executor/models/interfaces.py
def make_empty_intermediate_tensors(
    self,
    batch_size: int,
    dtype: torch.dtype,
    device: torch.device,
) -> "IntermediateTensors":
    """Called when PP rank > 0 for profiling purposes."""
    ...

SupportsQuant

The interface required for all models that support quantization.

Source code in vllm/model_executor/models/interfaces.py
class SupportsQuant:
    """The interface required for all models that support quantization."""

    packed_modules_mapping: ClassVar[dict[str, list[str]]] = {}
    quant_config: Optional[QuantizationConfig] = None

    def __new__(cls, *args, **kwargs) -> Self:
        instance = super().__new__(cls)
        quant_config = cls._find_quant_config(*args, **kwargs)
        if quant_config is not None:
            instance.quant_config = quant_config
            instance.quant_config.packed_modules_mapping.update(
                cls.packed_modules_mapping)
        return instance

    @staticmethod
    def _find_quant_config(*args, **kwargs) -> Optional[QuantizationConfig]:
        from vllm.config import VllmConfig  # avoid circular import

        args_values = list(args) + list(kwargs.values())
        for arg in args_values:
            if isinstance(arg, VllmConfig):
                return arg.quant_config

            if isinstance(arg, QuantizationConfig):
                return arg

        return None

packed_modules_mapping class-attribute

packed_modules_mapping: dict[str, list[str]] = {}

quant_config class-attribute instance-attribute

quant_config: Optional[QuantizationConfig] = None

__new__

__new__(*args, **kwargs) -> Self
Source code in vllm/model_executor/models/interfaces.py
def __new__(cls, *args, **kwargs) -> Self:
    instance = super().__new__(cls)
    quant_config = cls._find_quant_config(*args, **kwargs)
    if quant_config is not None:
        instance.quant_config = quant_config
        instance.quant_config.packed_modules_mapping.update(
            cls.packed_modules_mapping)
    return instance

_find_quant_config staticmethod

_find_quant_config(
    *args, **kwargs
) -> Optional[QuantizationConfig]
Source code in vllm/model_executor/models/interfaces.py
@staticmethod
def _find_quant_config(*args, **kwargs) -> Optional[QuantizationConfig]:
    from vllm.config import VllmConfig  # avoid circular import

    args_values = list(args) + list(kwargs.values())
    for arg in args_values:
        if isinstance(arg, VllmConfig):
            return arg.quant_config

        if isinstance(arg, QuantizationConfig):
            return arg

    return None

SupportsTranscription

Bases: Protocol

The interface required for all models that support transcription.

Source code in vllm/model_executor/models/interfaces.py
@runtime_checkable
class SupportsTranscription(Protocol):
    """The interface required for all models that support transcription."""

    supports_transcription: ClassVar[Literal[True]] = True

supports_transcription class-attribute

supports_transcription: Literal[True] = True

SupportsV0Only

Bases: Protocol

Models with this interface are not compatible with V1 vLLM.

Source code in vllm/model_executor/models/interfaces.py
@runtime_checkable
class SupportsV0Only(Protocol):
    """Models with this interface are not compatible with V1 vLLM."""

    supports_v0_only: ClassVar[Literal[True]] = True

supports_v0_only class-attribute

supports_v0_only: Literal[True] = True

_HasInnerStateType

Bases: Protocol

Source code in vllm/model_executor/models/interfaces.py
@runtime_checkable
class _HasInnerStateType(Protocol):
    has_inner_state: ClassVar[Literal[True]]

has_inner_state class-attribute

has_inner_state: Literal[True]

_HasNoOpsType

Bases: Protocol

Source code in vllm/model_executor/models/interfaces.py
@runtime_checkable
class _HasNoOpsType(Protocol):
    has_noops: ClassVar[Literal[True]]

has_noops class-attribute

has_noops: Literal[True]

_IsAttentionFreeType

Bases: Protocol

Source code in vllm/model_executor/models/interfaces.py
@runtime_checkable
class _IsAttentionFreeType(Protocol):
    is_attention_free: ClassVar[Literal[True]]

is_attention_free class-attribute

is_attention_free: Literal[True]

_IsHybridType

Bases: Protocol

Source code in vllm/model_executor/models/interfaces.py
@runtime_checkable
class _IsHybridType(Protocol):
    is_hybrid: ClassVar[Literal[True]]

is_hybrid class-attribute

is_hybrid: Literal[True]

_SupportsLoRAType

Bases: Protocol

Source code in vllm/model_executor/models/interfaces.py
@runtime_checkable
class _SupportsLoRAType(Protocol):
    supports_lora: Literal[True]

    packed_modules_mapping: dict[str, list[str]]
    embedding_modules: dict[str, str]
    embedding_padding_modules: list[str]

embedding_modules instance-attribute

embedding_modules: dict[str, str]

embedding_padding_modules instance-attribute

embedding_padding_modules: list[str]

packed_modules_mapping instance-attribute

packed_modules_mapping: dict[str, list[str]]

supports_lora instance-attribute

supports_lora: Literal[True]

_SupportsMultiModalType

Bases: Protocol

Source code in vllm/model_executor/models/interfaces.py
@runtime_checkable
class _SupportsMultiModalType(Protocol):
    supports_multimodal: Literal[True]

supports_multimodal instance-attribute

supports_multimodal: Literal[True]

_SupportsPPType

Bases: Protocol

Source code in vllm/model_executor/models/interfaces.py
@runtime_checkable
class _SupportsPPType(Protocol):
    supports_pp: Literal[True]

    def make_empty_intermediate_tensors(
        self,
        batch_size: int,
        dtype: torch.dtype,
        device: torch.device,
    ) -> "IntermediateTensors":
        ...

    def forward(
        self,
        *,
        intermediate_tensors: Optional["IntermediateTensors"],
    ) -> Union[Tensor, "IntermediateTensors"]:
        ...

supports_pp instance-attribute

supports_pp: Literal[True]

forward

forward(
    *, intermediate_tensors: Optional[IntermediateTensors]
) -> Union[Tensor, IntermediateTensors]
Source code in vllm/model_executor/models/interfaces.py
def forward(
    self,
    *,
    intermediate_tensors: Optional["IntermediateTensors"],
) -> Union[Tensor, "IntermediateTensors"]:
    ...

make_empty_intermediate_tensors

make_empty_intermediate_tensors(
    batch_size: int, dtype: dtype, device: device
) -> IntermediateTensors
Source code in vllm/model_executor/models/interfaces.py
def make_empty_intermediate_tensors(
    self,
    batch_size: int,
    dtype: torch.dtype,
    device: torch.device,
) -> "IntermediateTensors":
    ...

_supports_cross_encoding

_supports_cross_encoding(
    model: Union[type[object], object],
) -> Union[
    TypeIs[type[SupportsCrossEncoding]],
    TypeIs[SupportsCrossEncoding],
]
Source code in vllm/model_executor/models/interfaces.py
def _supports_cross_encoding(
    model: Union[type[object], object],
) -> Union[TypeIs[type[SupportsCrossEncoding]], TypeIs[SupportsCrossEncoding]]:

    if isinstance(model, type):
        return isinstance(model, SupportsCrossEncoding)

    return isinstance(model, SupportsCrossEncoding)

_supports_lora

_supports_lora(model: Union[type[object], object]) -> bool
Source code in vllm/model_executor/models/interfaces.py
def _supports_lora(model: Union[type[object], object]) -> bool:
    if isinstance(model, type):
        return isinstance(model, _SupportsLoRAType)

    return isinstance(model, SupportsLoRA)

_supports_pp_attributes

_supports_pp_attributes(
    model: Union[type[object], object],
) -> bool
Source code in vllm/model_executor/models/interfaces.py
def _supports_pp_attributes(model: Union[type[object], object]) -> bool:
    if isinstance(model, type):
        return isinstance(model, _SupportsPPType)

    return isinstance(model, SupportsPP)

_supports_pp_inspect

_supports_pp_inspect(
    model: Union[type[object], object],
) -> bool
Source code in vllm/model_executor/models/interfaces.py
def _supports_pp_inspect(model: Union[type[object], object]) -> bool:
    model_forward = getattr(model, "forward", None)
    if not callable(model_forward):
        return False

    return supports_kw(model_forward, "intermediate_tensors")

has_inner_state

has_inner_state(model: object) -> TypeIs[HasInnerState]
has_inner_state(
    model: type[object],
) -> TypeIs[type[HasInnerState]]
has_inner_state(
    model: Union[type[object], object],
) -> Union[
    TypeIs[type[HasInnerState]], TypeIs[HasInnerState]
]
Source code in vllm/model_executor/models/interfaces.py
def has_inner_state(
    model: Union[type[object], object]
) -> Union[TypeIs[type[HasInnerState]], TypeIs[HasInnerState]]:
    if isinstance(model, type):
        return isinstance(model, _HasInnerStateType)

    return isinstance(model, HasInnerState)

has_noops

has_noops(model: object) -> TypeIs[HasNoOps]
has_noops(model: type[object]) -> TypeIs[type[HasNoOps]]
has_noops(
    model: Union[type[object], object],
) -> Union[TypeIs[type[HasNoOps]], TypeIs[HasNoOps]]
Source code in vllm/model_executor/models/interfaces.py
def has_noops(
    model: Union[type[object], object]
) -> Union[TypeIs[type[HasNoOps]], TypeIs[HasNoOps]]:
    if isinstance(model, type):
        return isinstance(model, _HasNoOpsType)

    return isinstance(model, HasNoOps)

is_attention_free

is_attention_free(model: object) -> TypeIs[IsAttentionFree]
is_attention_free(
    model: type[object],
) -> TypeIs[type[IsAttentionFree]]
is_attention_free(
    model: Union[type[object], object],
) -> Union[
    TypeIs[type[IsAttentionFree]], TypeIs[IsAttentionFree]
]
Source code in vllm/model_executor/models/interfaces.py
def is_attention_free(
    model: Union[type[object], object]
) -> Union[TypeIs[type[IsAttentionFree]], TypeIs[IsAttentionFree]]:
    if isinstance(model, type):
        return isinstance(model, _IsAttentionFreeType)

    return isinstance(model, IsAttentionFree)

is_hybrid

is_hybrid(model: object) -> TypeIs[IsHybrid]
is_hybrid(model: type[object]) -> TypeIs[type[IsHybrid]]
is_hybrid(
    model: Union[type[object], object],
) -> Union[TypeIs[type[IsHybrid]], TypeIs[IsHybrid]]
Source code in vllm/model_executor/models/interfaces.py
def is_hybrid(
    model: Union[type[object], object]
) -> Union[TypeIs[type[IsHybrid]], TypeIs[IsHybrid]]:
    if isinstance(model, type):
        return isinstance(model, _IsHybridType)

    return isinstance(model, IsHybrid)

supports_cross_encoding

supports_cross_encoding(
    model: type[object],
) -> TypeIs[type[SupportsCrossEncoding]]
supports_cross_encoding(
    model: object,
) -> TypeIs[SupportsCrossEncoding]
supports_cross_encoding(
    model: Union[type[object], object],
) -> Union[
    TypeIs[type[SupportsCrossEncoding]],
    TypeIs[SupportsCrossEncoding],
]
Source code in vllm/model_executor/models/interfaces.py
def supports_cross_encoding(
    model: Union[type[object], object],
) -> Union[TypeIs[type[SupportsCrossEncoding]], TypeIs[SupportsCrossEncoding]]:
    return is_pooling_model(model) and _supports_cross_encoding(model)

supports_lora

supports_lora(
    model: type[object],
) -> TypeIs[type[SupportsLoRA]]
supports_lora(model: object) -> TypeIs[SupportsLoRA]
supports_lora(
    model: Union[type[object], object],
) -> Union[
    TypeIs[type[SupportsLoRA]], TypeIs[SupportsLoRA]
]
Source code in vllm/model_executor/models/interfaces.py
def supports_lora(
    model: Union[type[object], object],
) -> Union[TypeIs[type[SupportsLoRA]], TypeIs[SupportsLoRA]]:
    result = _supports_lora(model)

    if not result:
        lora_attrs = (
            "packed_modules_mapping",
            "embedding_modules",
            "embedding_padding_modules",
        )
        missing_attrs = tuple(attr for attr in lora_attrs
                              if not hasattr(model, attr))

        if getattr(model, "supports_lora", False):
            if missing_attrs:
                logger.warning(
                    "The model (%s) sets `supports_lora=True`, "
                    "but is missing LoRA-specific attributes: %s",
                    model,
                    missing_attrs,
                )
        else:
            if not missing_attrs:
                logger.warning(
                    "The model (%s) contains all LoRA-specific attributes, "
                    "but does not set `supports_lora=True`.", model)

    return result

supports_multimodal

supports_multimodal(
    model: type[object],
) -> TypeIs[type[SupportsMultiModal]]
supports_multimodal(
    model: object,
) -> TypeIs[SupportsMultiModal]
supports_multimodal(
    model: Union[type[object], object],
) -> Union[
    TypeIs[type[SupportsMultiModal]],
    TypeIs[SupportsMultiModal],
]
Source code in vllm/model_executor/models/interfaces.py
def supports_multimodal(
    model: Union[type[object], object],
) -> Union[TypeIs[type[SupportsMultiModal]], TypeIs[SupportsMultiModal]]:
    if isinstance(model, type):
        return isinstance(model, _SupportsMultiModalType)

    return isinstance(model, SupportsMultiModal)

supports_pp

supports_pp(
    model: type[object],
) -> TypeIs[type[SupportsPP]]
supports_pp(model: object) -> TypeIs[SupportsPP]
supports_pp(
    model: Union[type[object], object],
) -> Union[
    bool, TypeIs[type[SupportsPP]], TypeIs[SupportsPP]
]
Source code in vllm/model_executor/models/interfaces.py
def supports_pp(
    model: Union[type[object], object],
) -> Union[bool, TypeIs[type[SupportsPP]], TypeIs[SupportsPP]]:
    supports_attributes = _supports_pp_attributes(model)
    supports_inspect = _supports_pp_inspect(model)

    if supports_attributes and not supports_inspect:
        logger.warning(
            "The model (%s) sets `supports_pp=True`, but does not accept "
            "`intermediate_tensors` in its `forward` method", model)

    if not supports_attributes:
        pp_attrs = ("make_empty_intermediate_tensors", )
        missing_attrs = tuple(attr for attr in pp_attrs
                              if not hasattr(model, attr))

        if getattr(model, "supports_pp", False):
            if missing_attrs:
                logger.warning(
                    "The model (%s) sets `supports_pp=True`, "
                    "but is missing PP-specific attributes: %s",
                    model,
                    missing_attrs,
                )
        else:
            if not missing_attrs:
                logger.warning(
                    "The model (%s) contains all PP-specific attributes, "
                    "but does not set `supports_pp=True`.", model)

    return supports_attributes and supports_inspect

supports_transcription

supports_transcription(
    model: type[object],
) -> TypeIs[type[SupportsTranscription]]
supports_transcription(
    model: object,
) -> TypeIs[SupportsTranscription]
supports_transcription(
    model: Union[type[object], object],
) -> Union[
    TypeIs[type[SupportsTranscription]],
    TypeIs[SupportsTranscription],
]
Source code in vllm/model_executor/models/interfaces.py
def supports_transcription(
    model: Union[type[object], object],
) -> Union[TypeIs[type[SupportsTranscription]], TypeIs[SupportsTranscription]]:
    if isinstance(model, type):
        return isinstance(model, SupportsTranscription)

    return isinstance(model, SupportsTranscription)

supports_v0_only

supports_v0_only(
    model: type[object],
) -> TypeIs[type[SupportsV0Only]]
supports_v0_only(model: object) -> TypeIs[SupportsV0Only]
supports_v0_only(
    model: Union[type[object], object],
) -> Union[
    TypeIs[type[SupportsV0Only]], TypeIs[SupportsV0Only]
]
Source code in vllm/model_executor/models/interfaces.py
def supports_v0_only(
    model: Union[type[object], object],
) -> Union[TypeIs[type[SupportsV0Only]], TypeIs[SupportsV0Only]]:
    if isinstance(model, type):
        return isinstance(model, SupportsV0Only)

    return isinstance(model, SupportsV0Only)