vllm.multimodal.profiling#

Module Contents#

Classes#

BaseDummyInputsBuilder

Abstract base class that constructs the dummy data to profile multi-modal models.

DummyDecoderData

Dummy data used for profiling.

DummyEncoderData

Dummy data used for profiling.

MultiModalProfiler

Contains code for running memory profiling for multi-modal models.

ProcessorInputs

Represents the keyword arguments to vllm.multimodal.processing.BaseMultiModalProcessor.apply().

Data#

API#

class vllm.multimodal.profiling.BaseDummyInputsBuilder(info: vllm.multimodal.profiling._I)[source]#

Bases: abc.ABC, typing.Generic[vllm.multimodal.profiling._I]

Abstract base class that constructs the dummy data to profile multi-modal models.

Initialization

abstract get_dummy_mm_data(seq_len: int, mm_counts: collections.abc.Mapping[str, int]) vllm.multimodal.inputs.MultiModalDataDict[source]#

Build the multimodal input which, after processing, results in the maximum possible number of placeholder tokens.

get_dummy_processor_inputs(seq_len: int, mm_counts: collections.abc.Mapping[str, int]) vllm.multimodal.profiling.ProcessorInputs[source]#

Build the input which, after processing, results in the maximum possible number of placeholder tokens.

get_dummy_text(mm_counts: collections.abc.Mapping[str, int]) str[source]#

Build the text input corresponding to mm_counts.

class vllm.multimodal.profiling.DummyDecoderData[source]#

Bases: typing.NamedTuple

Dummy data used for profiling.

multi_modal_data: vllm.multimodal.inputs.MultiModalKwargs[source]#

None

multi_modal_placeholders: vllm.multimodal.inputs.MultiModalPlaceholderDict[source]#

None

prompt_token_ids: list[int][source]#

None

class vllm.multimodal.profiling.DummyEncoderData[source]#

Bases: typing.NamedTuple

Dummy data used for profiling.

prompt_token_ids: list[int][source]#

None

class vllm.multimodal.profiling.MultiModalProfiler(processor: vllm.multimodal.processing.BaseMultiModalProcessor[vllm.multimodal.profiling._I])[source]#

Bases: typing.Generic[vllm.multimodal.profiling._I]

Contains code for running memory profiling for multi-modal models.

Initialization

property dummy_inputs: vllm.multimodal.profiling.BaseDummyInputsBuilder[vllm.multimodal.profiling._I][source]#
get_decoder_dummy_data(seq_len: int, mm_counts: Optional[collections.abc.Mapping[str, int]] = None) vllm.multimodal.profiling.DummyDecoderData[source]#
get_encoder_dummy_data(seq_len: int, mm_counts: Optional[collections.abc.Mapping[str, int]] = None) vllm.multimodal.profiling.DummyEncoderData[source]#
get_mm_limits() collections.abc.Mapping[str, int][source]#
get_mm_max_tokens(seq_len: int, mm_counts: Optional[collections.abc.Mapping[str, int]] = None) collections.abc.Mapping[str, int][source]#
property processing_info: vllm.multimodal.processing.BaseProcessingInfo[source]#
class vllm.multimodal.profiling.ProcessorInputs[source]#

Represents the keyword arguments to vllm.multimodal.processing.BaseMultiModalProcessor.apply().

hf_processor_mm_kwargs: collections.abc.Mapping[str, object][source]#

‘field(…)’

mm_data: vllm.multimodal.inputs.MultiModalDataDict[source]#

None

prompt_text: str[source]#

None

vllm.multimodal.profiling.logger[source]#

‘init_logger(…)’