Index A | B | C | D | E | F | G | H | I | J | K | L | M | N | O | P | Q | R | S | T | U | V | W | X | Y | Z A ABORT (vllm.v1.engine.EngineCoreRequestType attribute) (vllm.v1.engine.FinishReason attribute) abort() (vllm.engine.async_llm_engine.AsyncLLMEngine method) (vllm.engine.multiprocessing.client.MQLLMEngineClient method) (vllm.engine.protocol.EngineClient method) (vllm.v1.engine.async_llm.AsyncLLM method) abort_request() (vllm.engine.async_llm_engine.RequestTracker method) (vllm.engine.llm_engine.LLMEngine method) (vllm.v1.engine.llm_engine.LLMEngine method) (vllm.v1.metrics.stats.LoRARequestStates method) abort_requests() (vllm.v1.engine.core.EngineCore method) (vllm.v1.engine.core_client.EngineCoreClient method) (vllm.v1.engine.core_client.InprocClient method) (vllm.v1.engine.core_client.SyncMPClient method) (vllm.v1.engine.output_processor.OutputProcessor method) abort_requests_async() (vllm.v1.engine.core_client.AsyncMPClient method) (vllm.v1.engine.core_client.DPAsyncMPClient method) (vllm.v1.engine.core_client.EngineCoreClient method) abort_seq_group() (vllm.core.scheduler.Scheduler method) AbsolutePositionalEncoding (class in vllm.model_executor.models.phi4mm_utils) AbstractWorkerManager (class in vllm.adapter_commons.worker_manager) accept_output_buffer (vllm.attention.backends.abstract.AttentionBackend attribute) (vllm.attention.backends.dual_chunk_flash_attn.DualChunkFlashAttentionBackend attribute) (vllm.attention.backends.flash_attn.FlashAttentionBackend attribute) (vllm.attention.backends.rocm_flash_attn.ROCmFlashAttentionBackend attribute) (vllm.v1.attention.backends.flash_attn.FlashAttentionBackend attribute) (vllm.v1.attention.backends.flashinfer.FlashInferBackend attribute) (vllm.v1.attention.backends.mla.common.MLACommonBackend attribute) (vllm.v1.attention.backends.triton_attn.TritonAttentionBackend attribute) accept_tokens() (vllm.v1.structured_output.backend_guidance.GuidanceGrammar method) (vllm.v1.structured_output.backend_types.StructuredOutputGrammar method) (vllm.v1.structured_output.backend_xgrammar.XgrammarGrammar method) acceptance_method (vllm.config.SpeculativeConfig attribute) accepted_tokens (vllm.spec_decode.metrics.SpecDecodeWorkerMetrics attribute) access_all_blocks_in_seq() (vllm.core.block_manager.SelfAttnBlockSpaceManager method) (vllm.core.interfaces.BlockSpaceManager method) (vllm.core.placeholder_block_space_manager.PlaceholderBlockSpaceManager method) acquire_read() (vllm.distributed.device_communicators.shm_broadcast.MessageQueue method) acquire_write() (vllm.distributed.device_communicators.shm_broadcast.MessageQueue method) act_type (vllm.model_executor.layers.quantization.kernels.mixed_precision.MPLinearKernel.MPLinearLayerConfig attribute) activate_adapter() (vllm.adapter_commons.models.AdapterModelManager method) (vllm.lora.models.LoRAModelManager method) (vllm.lora.models.LRUCacheLoRAModelManager method) (vllm.prompt_adapter.models.LRUCachePromptAdapterModelManager method) (vllm.prompt_adapter.models.PromptAdapterModelManager method) ACTIVATION_SCHEMES (in module vllm.model_executor.layers.quantization.fp8) (in module vllm.model_executor.layers.quantization.ptpc_fp8) (in module vllm.model_executor.layers.quantization.tpu_int8) ActivationQuantFusionPass (class in vllm.compilation.activation_quant_fusion) active_head_range (vllm.attention.backends.blocksparse_attn.BlocksparseParams attribute) active_lora_ids (vllm.lora.ops.triton_ops.lora_kernel_metadata.LoRAKernelMeta attribute) adapter_bias (vllm.model_executor.models.pixtral.VisionEncoderArgs attribute) adapter_id (vllm.adapter_commons.request.AdapterRequest property) (vllm.lora.request.LoRARequest property) (vllm.prompt_adapter.request.PromptAdapterRequest property) adapter_slots (vllm.adapter_commons.models.AdapterModelManager property) (vllm.lora.models.LoRAModelManager property) (vllm.prompt_adapter.models.PromptAdapterModelManager property) AdapterLRUCache (class in vllm.adapter_commons.models) AdapterMapping (class in vllm.adapter_commons.layers) AdapterModel (class in vllm.adapter_commons.models) AdapterModelManager (class in vllm.adapter_commons.models) AdapterRequest (class in vllm.adapter_commons.request) adaptive_enc_mask() (in module vllm.model_executor.models.phi4mm_utils) ADD (vllm.v1.engine.EngineCoreRequestType attribute) add() (vllm.compilation.pass_manager.PostGradPassManager method) (vllm.core.evictor.Evictor method) (vllm.core.evictor.LRUEvictor method) (vllm.entrypoints.chat_utils.BaseMultiModalItemTracker method) (vllm.outputs.RequestOutput method) add_adapter() (in module vllm.adapter_commons.utils) (vllm.adapter_commons.models.AdapterModelManager method) (vllm.adapter_commons.worker_manager.AbstractWorkerManager method) (vllm.lora.models.LoRAModelManager method) (vllm.lora.models.LRUCacheLoRAModelManager method) (vllm.lora.worker_manager.LRUCacheWorkerLoRAManager method) (vllm.lora.worker_manager.WorkerLoRAManager method) (vllm.prompt_adapter.models.LRUCachePromptAdapterModelManager method) (vllm.prompt_adapter.models.PromptAdapterModelManager method) (vllm.prompt_adapter.worker_manager.LRUCacheWorkerPromptAdapterManager method) (vllm.prompt_adapter.worker_manager.WorkerPromptAdapterManager method) add_adapter_worker() (in module vllm.adapter_commons.utils) add_arguments() (vllm.utils.SortedHelpFormatter method) add_bias() (vllm.model_executor.layers.quantization.kernels.scaled_mm.xla.XLAScaledMMLinearKernel method) add_cli_args() (in module vllm.benchmarks.latency) (in module vllm.benchmarks.serve) (in module vllm.benchmarks.throughput) (vllm.engine.arg_utils.AsyncEngineArgs static method) (vllm.engine.arg_utils.EngineArgs static method) (vllm.entrypoints.cli.benchmark.base.BenchmarkSubcommandBase method) (vllm.entrypoints.cli.benchmark.latency.BenchmarkLatencySubcommand method) (vllm.entrypoints.cli.benchmark.serve.BenchmarkServingSubcommand method) (vllm.entrypoints.cli.benchmark.throughput.BenchmarkThroughputSubcommand method) (vllm.model_executor.model_loader.tensorizer.TensorizerArgs static method) add_dummy_lora() (vllm.lora.worker_manager.WorkerLoRAManager method) add_dummy_prompt_adapter() (vllm.prompt_adapter.worker_manager.WorkerPromptAdapterManager method) add_expand() (vllm.lora.punica_wrapper.punica_base.PunicaWrapperABC method) (vllm.lora.punica_wrapper.punica_base.PunicaWrapperBase method) (vllm.lora.punica_wrapper.punica_cpu.PunicaWrapperCPU method) (vllm.lora.punica_wrapper.punica_gpu.PunicaWrapperGPU method) (vllm.lora.punica_wrapper.punica_hpu.PunicaWrapperHPU method) (vllm.lora.punica_wrapper.punica_tpu.PunicaWrapperTPU method) add_generation_prompt (vllm.entrypoints.openai.protocol.ChatCompletionRequest attribute) (vllm.entrypoints.openai.protocol.TokenizeChatRequest attribute) add_image_newline() (vllm.model_executor.models.phi3v.Phi3HDImageEmbedding method) add_logger() (vllm.engine.async_llm_engine.AsyncLLMEngine method) (vllm.engine.llm_engine.LLMEngine method) add_lora() (vllm.engine.async_llm_engine.AsyncLLMEngine method) (vllm.engine.llm_engine.LLMEngine method) (vllm.engine.multiprocessing.client.MQLLMEngineClient method) (vllm.engine.protocol.EngineClient method) (vllm.executor.executor_base.ExecutorBase method) (vllm.v1.engine.async_llm.AsyncLLM method) (vllm.v1.engine.core.EngineCore method) (vllm.v1.engine.core_client.EngineCoreClient method) (vllm.v1.engine.core_client.InprocClient method) (vllm.v1.engine.core_client.SyncMPClient method) (vllm.v1.engine.llm_engine.LLMEngine method) (vllm.v1.worker.gpu_worker.Worker method) (vllm.v1.worker.lora_model_runner_mixin.LoRAModelRunnerMixin method) (vllm.v1.worker.tpu_worker.TPUWorker method) (vllm.worker.cpu_model_runner.CPUModelRunnerBase method) (vllm.worker.cpu_worker.CPUWorker method) (vllm.worker.hpu_model_runner.HPUModelRunnerBase method) (vllm.worker.hpu_worker.HPUWorker method) (vllm.worker.model_runner.GPUModelRunnerBase method) (vllm.worker.worker.Worker method) (vllm.worker.worker_base.DelegateWorkerBase method) (vllm.worker.worker_base.LoRANotSupportedWorkerBase method) (vllm.worker.worker_base.WorkerBase method) add_lora_async() (vllm.v1.engine.core_client.AsyncMPClient method) (vllm.v1.engine.core_client.EngineCoreClient method) add_lora_embedding() (vllm.lora.punica_wrapper.punica_base.PunicaWrapperABC method) (vllm.lora.punica_wrapper.punica_base.PunicaWrapperBase method) (vllm.lora.punica_wrapper.punica_cpu.PunicaWrapperCPU method) (vllm.lora.punica_wrapper.punica_gpu.PunicaWrapperGPU method) (vllm.lora.punica_wrapper.punica_hpu.PunicaWrapperHPU method) (vllm.lora.punica_wrapper.punica_tpu.PunicaWrapperTPU method) add_lora_linear() (vllm.lora.punica_wrapper.punica_base.PunicaWrapperABC method) (vllm.lora.punica_wrapper.punica_base.PunicaWrapperBase method) (vllm.lora.punica_wrapper.punica_cpu.PunicaWrapperCPU method) (vllm.lora.punica_wrapper.punica_gpu.PunicaWrapperGPU method) (vllm.lora.punica_wrapper.punica_hpu.PunicaWrapperHPU method) (vllm.lora.punica_wrapper.punica_tpu.PunicaWrapperTPU method) add_lora_logits() (vllm.lora.punica_wrapper.punica_base.PunicaWrapperABC method) (vllm.lora.punica_wrapper.punica_base.PunicaWrapperBase method) (vllm.lora.punica_wrapper.punica_cpu.PunicaWrapperCPU method) (vllm.lora.punica_wrapper.punica_gpu.PunicaWrapperGPU method) (vllm.lora.punica_wrapper.punica_hpu.PunicaWrapperHPU method) (vllm.lora.punica_wrapper.punica_tpu.PunicaWrapperTPU method) add_new_req() (vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector.NixlConnectorMetadata method) add_num_batched_tokens() (vllm.core.scheduler.SchedulingBudget method) add_num_seqs() (vllm.core.scheduler.SchedulingBudget method) add_pending_message() (vllm.v1.engine.core_client.MPClient method) add_pos_emb() (vllm.model_executor.models.molmo.VisionTransformer method) add_pre_mm_projector_layer_norm (vllm.model_executor.models.pixtral.VisionEncoderArgs attribute) add_prompt_adapter() (vllm.engine.llm_engine.LLMEngine method) (vllm.executor.executor_base.ExecutorBase method) (vllm.worker.hpu_worker.HPUWorker method) (vllm.worker.model_runner.GPUModelRunnerBase method) (vllm.worker.worker.Worker method) add_remote_agent() (vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector.NixlConnectorWorker method) add_request() (vllm.distributed.kv_transfer.kv_connector.v1.shared_storage_connector.SharedStorageConnectorMetadata method) (vllm.engine.async_llm_engine.AsyncLLMEngine method) (vllm.engine.async_llm_engine.RequestTracker method) (vllm.engine.llm_engine.LLMEngine method) (vllm.sequence.ParallelSampleSequenceGroup static method) (vllm.sequence.SequenceGroupBase static method) (vllm.v1.core.sched.interface.SchedulerInterface method) (vllm.v1.core.sched.scheduler.Scheduler method) (vllm.v1.engine.async_llm.AsyncLLM method) (vllm.v1.engine.core.DPEngineCoreProc method) (vllm.v1.engine.core.EngineCore method) (vllm.v1.engine.core_client.EngineCoreClient method) (vllm.v1.engine.core_client.InprocClient method) (vllm.v1.engine.core_client.SyncMPClient method) (vllm.v1.engine.llm_engine.LLMEngine method) (vllm.v1.engine.output_processor.OutputProcessor method) (vllm.v1.metrics.stats.LoRARequestStates method) (vllm.v1.worker.gpu_input_batch.InputBatch method) add_request_async() (vllm.v1.engine.core_client.AsyncMPClient method) (vllm.v1.engine.core_client.DPAsyncMPClient method) (vllm.v1.engine.core_client.EngineCoreClient method) add_row() (vllm.v1.worker.block_table.BlockTable method) add_sampler_output() (vllm.worker.multi_step_model_runner.StatefulModelInput method) add_seq() (vllm.core.block.prefix_caching_block.LastAccessBlocksTracker method) add_seq_group() (vllm.core.scheduler.Scheduler method) (vllm.worker.cpu_model_runner.ModelInputForCPUBuilder method) (vllm.worker.model_runner.ModelInputForGPUBuilder method) (vllm.worker.model_runner_base.ModelRunnerInputBuilderBase method) (vllm.worker.xpu_model_runner.ModelInputForXPUBuilder method) add_shrink() (vllm.lora.punica_wrapper.punica_base.PunicaWrapperABC method) (vllm.lora.punica_wrapper.punica_base.PunicaWrapperBase method) (vllm.lora.punica_wrapper.punica_cpu.PunicaWrapperCPU method) (vllm.lora.punica_wrapper.punica_gpu.PunicaWrapperGPU method) (vllm.lora.punica_wrapper.punica_hpu.PunicaWrapperHPU method) (vllm.lora.punica_wrapper.punica_tpu.PunicaWrapperTPU method) add_special_tokens (vllm.entrypoints.openai.protocol.ChatCompletionRequest attribute) (vllm.entrypoints.openai.protocol.CompletionRequest attribute) (vllm.entrypoints.openai.protocol.EmbeddingChatRequest attribute) (vllm.entrypoints.openai.protocol.EmbeddingCompletionRequest attribute) (vllm.entrypoints.openai.protocol.TokenizeChatRequest attribute) (vllm.entrypoints.openai.protocol.TokenizeCompletionRequest attribute) added_vocab_end_index (vllm.model_executor.layers.vocab_parallel_embedding.VocabParallelEmbeddingShardIndices attribute) added_vocab_start_index (vllm.model_executor.layers.vocab_parallel_embedding.VocabParallelEmbeddingShardIndices attribute) additional_config (vllm.config.VllmConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) additional_data (vllm.entrypoints.openai.protocol.ClassificationRequest attribute) (vllm.entrypoints.openai.protocol.EmbeddingChatRequest attribute) (vllm.entrypoints.openai.protocol.EmbeddingCompletionRequest attribute) (vllm.entrypoints.openai.protocol.RerankRequest attribute) (vllm.entrypoints.openai.protocol.ScoreRequest attribute) (vllm.pooling_params.PoolingParams attribute) additional_env_vars (vllm.platforms.interface.Platform attribute) (vllm.platforms.tpu.TpuPlatform attribute) ADDITIONAL_VOCAB_SIZE (in module vllm.model_executor.models.molmo) adjust_bitblas_shard() (in module vllm.model_executor.layers.linear) adjust_bitsandbytes_4bit_shard() (in module vllm.model_executor.layers.linear) adjust_marlin_shard() (in module vllm.model_executor.layers.linear) adjust_rank() (vllm.worker.worker_base.WorkerWrapperBase method) adjust_request() (vllm.entrypoints.openai.tool_parsers.abstract_tool_parser.ToolParser method) (vllm.entrypoints.openai.tool_parsers.internlm2_tool_parser.Internlm2ToolParser method) (vllm.entrypoints.openai.tool_parsers.jamba_tool_parser.JambaToolParser method) (vllm.entrypoints.openai.tool_parsers.mistral_tool_parser.MistralToolParser method) adjust_scalar_to_fused_array() (in module vllm.model_executor.layers.linear) adjust_shard_indexes_for_packing() (vllm.model_executor.parameter.PackedColumnParameter method) (vllm.model_executor.parameter.PackedvLLMParameter method) adjusted_rank (vllm.executor.ray_distributed_executor.RayWorkerMetaData attribute) advance_step() (vllm.attention.backends.abstract.AttentionBackend method) (vllm.attention.backends.flash_attn.FlashAttentionMetadata method) (vllm.attention.backends.flashinfer.FlashInferMetadata method) (vllm.attention.backends.flashmla.FlashMLAMetadata method) (vllm.attention.backends.mla.common.MLACommonMetadata method) (vllm.attention.backends.placeholder_attn.PlaceholderAttentionMetadata method) (vllm.attention.backends.rocm_flash_attn.ROCmFlashAttentionMetadata method) advance_step_flashattn() (in module vllm._custom_ops) advance_step_flashinfer() (in module vllm._custom_ops) after_profile (vllm.utils.MemoryProfilingResult attribute) agent_metadata (vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector.NixlAgentMetadata attribute) AIMODataset (class in vllm.benchmarks.datasets) AIMv2Attention (class in vllm.model_executor.models.aimv2) AIMv2Block (class in vllm.model_executor.models.aimv2) AIMv2Config (class in vllm.transformers_utils.configs.ovis) AIMv2Model (class in vllm.model_executor.models.aimv2) AIMv2PatchEmbed (class in vllm.model_executor.models.aimv2) AIMv2SwiGLUFFN (class in vllm.model_executor.models.aimv2) AIMv2Transformer (class in vllm.model_executor.models.aimv2) Aimv2VisualTokenizerConfig (class in vllm.transformers_utils.configs.ovis) AIMv2ViTPreprocessor (class in vllm.model_executor.models.aimv2) AIOHTTP_TIMEOUT (in module vllm.benchmarks.endpoint_request_func) aiter_mla_decode_fwd() (in module vllm.attention.ops.rocm_aiter_mla) AiterMLABackend (class in vllm.attention.backends.rocm_aiter_mla) (class in vllm.v1.attention.backends.mla.rocm_aiter_mla) AiterMLADecodeMetadata (class in vllm.v1.attention.backends.mla.rocm_aiter_mla) AiterMLAImpl (class in vllm.attention.backends.rocm_aiter_mla) (class in vllm.v1.attention.backends.mla.rocm_aiter_mla) AiterMLAMetadata (class in vllm.attention.backends.rocm_aiter_mla) (class in vllm.v1.attention.backends.mla.rocm_aiter_mla) AiterMLAMetadataBuilder (class in vllm.attention.backends.rocm_aiter_mla) (class in vllm.v1.attention.backends.mla.rocm_aiter_mla) AiterMLAState (class in vllm.attention.backends.rocm_aiter_mla) AITERPagedAttention (class in vllm.attention.ops.rocm_aiter_paged_attn) AiterScaledMMLinearKernel (class in vllm.model_executor.layers.quantization.kernels.scaled_mm.aiter) alibi_slopes (vllm.attention.ops.triton_flash_attention.MetaData attribute) align_to_block_size() (in module vllm.distributed.kv_transfer.kv_connector.v1.shared_storage_connector) align_workers() (in module vllm.worker.hpu_model_runner) ALL (vllm.model_executor.layers.pooler.PoolingType attribute) all_block_ids (vllm.core.block.cpu_gpu_block_allocator.CpuGpuBlockAllocator property) (vllm.core.block.interfaces.BlockAllocator property) (vllm.core.block.interfaces.DeviceAwareBlockAllocator property) (vllm.core.block.naive_block.NaiveBlockAllocator property) (vllm.core.block.prefix_caching_block.PrefixCachingBlockAllocator property) all_close_1d() (in module vllm.model_executor.layers.quantization.utils.w8a8_utils) ALL_DECODER_LAYER_TYPES (in module vllm.model_executor.models.bamba) (in module vllm.model_executor.models.glm4) (in module vllm.model_executor.models.granitemoehybrid) (in module vllm.model_executor.models.jamba) (in module vllm.model_executor.models.qwen3) all_dims_equivalent() (vllm.compilation.noop_elimination.NoOpEliminationPass method) all_gather() (in module vllm.distributed.parallel_state) (vllm.distributed.device_communicators.base_device_communicator.DeviceCommunicatorBase method) (vllm.distributed.device_communicators.cpu_communicator.CpuCommunicator method) (vllm.distributed.device_communicators.hpu_communicator.HpuCommunicator method) (vllm.distributed.device_communicators.neuron_communicator.NeuronCommunicator method) (vllm.distributed.device_communicators.pynccl.PyNcclCommunicator method) (vllm.distributed.device_communicators.tpu_communicator.TpuCommunicator method) (vllm.distributed.parallel_state.GroupCoordinator method) all_gather_fake() (in module vllm.distributed.parallel_state) all_gather_interleave() (in module vllm.model_executor.models.qwen2_5_vl) all_gather_obj() (vllm.distributed.utils.StatelessProcessGroup method) all_greedy (vllm.v1.sample.metadata.SamplingMetadata attribute) (vllm.v1.sample.tpu.metadata.TPUSupportedSamplingMetadata attribute) (vllm.v1.worker.gpu_input_batch.InputBatch property) all_mm_data() (vllm.entrypoints.chat_utils.AsyncMultiModalItemTracker method) (vllm.entrypoints.chat_utils.MultiModalItemTracker method) ALL_PINNED_SENTINEL (in module vllm.utils) all_random (vllm.v1.sample.metadata.SamplingMetadata attribute) (vllm.v1.worker.gpu_input_batch.InputBatch property) all_reduce() (in module vllm._custom_ops) (in module vllm.distributed.parallel_state) (vllm.distributed.device_communicators.base_device_communicator.DeviceCommunicatorBase method) (vllm.distributed.device_communicators.cpu_communicator.CpuCommunicator method) (vllm.distributed.device_communicators.cuda_communicator.CudaCommunicator method) (vllm.distributed.device_communicators.custom_all_reduce.CustomAllreduce method) (vllm.distributed.device_communicators.hpu_communicator.HpuCommunicator method) (vllm.distributed.device_communicators.neuron_communicator.NeuronCommunicator method) (vllm.distributed.device_communicators.pynccl.PyNcclCommunicator method) (vllm.distributed.device_communicators.tpu_communicator.TpuCommunicator method) (vllm.distributed.device_communicators.xpu_communicator.XpuCommunicator method) (vllm.distributed.parallel_state.GroupCoordinator method) all_reduce_fake() (in module vllm.distributed.parallel_state) all_special_ids (vllm.transformers_utils.tokenizer_base.TokenizerBase property) (vllm.transformers_utils.tokenizers.mistral.MistralTokenizer property) all_special_tokens (vllm.transformers_utils.tokenizer_base.TokenizerBase property) (vllm.transformers_utils.tokenizers.mistral.MistralTokenizer property) all_special_tokens_extended (vllm.transformers_utils.tokenizer_base.TokenizerBase property) (vllm.transformers_utils.tokenizers.mistral.MistralTokenizer property) all_stop_token_ids (vllm.sampling_params.SamplingParams property) AllBlocksCleared (class in vllm.distributed.kv_events) allocate() (vllm.core.block.block_table.BlockTable method) (vllm.core.block_manager.SelfAttnBlockSpaceManager method) (vllm.core.interfaces.BlockSpaceManager method) (vllm.core.placeholder_block_space_manager.PlaceholderBlockSpaceManager method) (vllm.v1.core.encoder_cache_manager.EncoderCacheManager method) allocate_immutable_block() (vllm.core.block.cpu_gpu_block_allocator.CpuGpuBlockAllocator method) (vllm.core.block.interfaces.BlockAllocator method) (vllm.core.block.interfaces.DeviceAwareBlockAllocator method) (vllm.core.block.naive_block.NaiveBlockAllocator method) (vllm.core.block.prefix_caching_block.PrefixCachingBlockAllocator method) allocate_immutable_blocks() (vllm.core.block.cpu_gpu_block_allocator.CpuGpuBlockAllocator method) (vllm.core.block.interfaces.BlockAllocator method) (vllm.core.block.interfaces.DeviceAwareBlockAllocator method) (vllm.core.block.naive_block.NaiveBlockAllocator method) (vllm.core.block.prefix_caching_block.PrefixCachingBlockAllocator method) allocate_managed_buffer() (vllm.distributed.kv_transfer.kv_pipe.mooncake_pipe.MooncakeTransferEngine method) allocate_mutable_block() (vllm.core.block.cpu_gpu_block_allocator.CpuGpuBlockAllocator method) (vllm.core.block.interfaces.BlockAllocator method) (vllm.core.block.interfaces.DeviceAwareBlockAllocator method) (vllm.core.block.naive_block.NaiveBlockAllocator method) (vllm.core.block.prefix_caching_block.PrefixCachingBlockAllocator method) allocate_new_blocks() (vllm.v1.core.single_type_kv_cache_manager.SingleTypeKVCacheManager method) allocate_or_get_null_block() (vllm.core.block.cpu_gpu_block_allocator.CpuGpuBlockAllocator method) (vllm.core.block.interfaces.DeviceAwareBlockAllocator method) allocate_shared_buffer_and_handle() (in module vllm._custom_ops) allocate_slots() (vllm.v1.core.kv_cache_manager.KVCacheManager method) allocate_token_bitmask() (vllm.v1.structured_output.backend_guidance.GuidanceBackend method) (vllm.v1.structured_output.backend_types.StructuredOutputBackend method) (vllm.v1.structured_output.backend_xgrammar.XgrammarBackend method) AllocationData (class in vllm.device_allocator.cumem) AllocStatus (class in vllm.core.interfaces) allow_async_output_proc (vllm.engine.llm_engine.SchedulerOutputState attribute) allow_create_engine (vllm.entrypoints.openai.protocol.ModelPermission attribute) allow_fine_tuning (vllm.entrypoints.openai.protocol.ModelPermission attribute) allow_gpu_advance_step (in module vllm.spec_decode.draft_model_runner) allow_logprobs (vllm.entrypoints.openai.protocol.ModelPermission attribute) allow_patterns_overrides (vllm.model_executor.model_loader.default_loader.DefaultModelLoader.Source attribute) allow_sampling (vllm.entrypoints.openai.protocol.ModelPermission attribute) allow_search_indices (vllm.entrypoints.openai.protocol.ModelPermission attribute) allow_view (vllm.entrypoints.openai.protocol.ModelPermission attribute) allowed_local_media_path (vllm.config.ModelConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) (vllm.entrypoints.chat_utils.BaseMultiModalItemTracker property) allowed_token_ids (vllm.entrypoints.openai.protocol.CompletionRequest attribute) (vllm.sampling_params.SamplingParams attribute) allowed_token_ids_mask (vllm.v1.sample.metadata.SamplingMetadata attribute) (vllm.v1.sample.tpu.metadata.TPUSupportedSamplingMetadata attribute) AllowedTokenIdsLogitsProcessor (class in vllm.entrypoints.openai.logits_processors) AllPool (class in vllm.model_executor.layers.pooler) AllReduceRMSNormPattern (class in vllm.compilation.sequence_parallelism) ALLSPARK_AMPERE_K_ALIGN (in module vllm.model_executor.layers.quantization.utils.allspark_utils) ALLSPARK_AMPERE_M_CUBLAS_THRESHOLD (in module vllm.model_executor.layers.quantization.utils.allspark_utils) ALLSPARK_AMPERE_N_ALIGN (in module vllm.model_executor.layers.quantization.utils.allspark_utils) allspark_repack_weight() (in module vllm._custom_ops) ALLSPARK_SUPPORTED_QUANT_TYPES (in module vllm.model_executor.layers.quantization.utils.allspark_utils) allspark_w8a16_gemm() (in module vllm._custom_ops) AllSparkLinearKernel (class in vllm.model_executor.layers.quantization.kernels.mixed_precision.allspark) ALPHANUMERIC (in module vllm.entrypoints.openai.tool_parsers.mistral_tool_parser) always_start_with_space (vllm.model_executor.models.molmo.MolmoProcessorWrapper property) AlwaysHitShapeEnv (class in vllm.compilation.compiler_interface) anon_repr() (vllm.v1.core.sched.output.NewRequestData method) any_whitespace (vllm.model_executor.guided_decoding.xgrammar_decoding.GrammarConfig attribute) AnyFuture (in module vllm.v1.engine.core_client) AnyRequest (in module vllm.entrypoints.openai.serving_engine) AnyResponse (in module vllm.entrypoints.openai.serving_engine) AnyResponseFormat (in module vllm.entrypoints.openai.protocol) AnyTokenizer (in module vllm.transformers_utils.tokenizer) API_SERVER (vllm.usage.usage_lib.UsageContext attribute) api_url (vllm.benchmarks.endpoint_request_func.RequestFuncInput attribute) app (in module vllm.entrypoints.api_server) append() (vllm.core.block.common.BlockList method) (vllm.v1.core.kv_cache_utils.FreeKVCacheBlockQueue method) (vllm.v1.utils.ConstantList method) append_items_from_seq_group() (vllm.multimodal.base.MultiModalPlaceholderMap method) append_output() (vllm.engine.llm_engine.SchedulerContext method) append_output_token_ids() (vllm.v1.request.Request method) append_row() (vllm.v1.worker.block_table.BlockTable method) append_slots() (vllm.core.block_manager.SelfAttnBlockSpaceManager method) (vllm.core.interfaces.BlockSpaceManager method) (vllm.core.placeholder_block_space_manager.PlaceholderBlockSpaceManager method) append_token_id() (vllm.sequence.Sequence method) (vllm.sequence.SequenceData method) append_token_ids() (vllm.core.block.block_table.BlockTable method) (vllm.core.block.common.BlockList method) (vllm.core.block.cpu_gpu_block_allocator.NullBlock method) (vllm.core.block.interfaces.Block method) (vllm.core.block.naive_block.NaiveBlock method) (vllm.core.block.prefix_caching_block.PrefixCachingBlock method) apply() (vllm.lora.fully_sharded_layers.ColumnParallelLinearWithShardedLoRA method) (vllm.lora.fully_sharded_layers.MergedColumnParallelLinearWithShardedLoRA method) (vllm.lora.fully_sharded_layers.MergedQKVParallelLinearWithShardedLoRA method) (vllm.lora.fully_sharded_layers.QKVParallelLinearWithShardedLoRA method) (vllm.lora.fully_sharded_layers.RowParallelLinearWithShardedLoRA method) (vllm.lora.layers.BaseLinearLayerWithLoRA method) (vllm.model_executor.layers.fused_moe.layer.FusedMoEMethodBase method) (vllm.model_executor.layers.fused_moe.layer.UnquantizedFusedMoEMethod method) (vllm.model_executor.layers.linear.LinearMethodBase method) (vllm.model_executor.layers.linear.UnquantizedLinearMethod method) (vllm.model_executor.layers.quantization.aqlm.AQLMLinearMethod method) (vllm.model_executor.layers.quantization.awq.AWQLinearMethod method) (vllm.model_executor.layers.quantization.awq_marlin.AWQMarlinLinearMethod method) (vllm.model_executor.layers.quantization.awq_marlin.AWQMoEMethod method) (vllm.model_executor.layers.quantization.base_config.QuantizeMethodBase method) (vllm.model_executor.layers.quantization.bitblas.BitBLASLinearMethod method) (vllm.model_executor.layers.quantization.bitsandbytes.BitsAndBytesLinearMethod method) (vllm.model_executor.layers.quantization.compressed_tensors.compressed_tensors.CompressedTensorsLinearMethod method) (vllm.model_executor.layers.quantization.compressed_tensors.compressed_tensors_moe.CompressedTensorsW8A8Fp8MoECutlassMethod method) (vllm.model_executor.layers.quantization.compressed_tensors.compressed_tensors_moe.CompressedTensorsW8A8Fp8MoEMethod method) (vllm.model_executor.layers.quantization.compressed_tensors.compressed_tensors_moe.CompressedTensorsW8A8Int8MoEMethod method) (vllm.model_executor.layers.quantization.compressed_tensors.compressed_tensors_moe.CompressedTensorsWNA16MarlinMoEMethod method) (vllm.model_executor.layers.quantization.compressed_tensors.compressed_tensors_moe.CompressedTensorsWNA16MoEMethod method) (vllm.model_executor.layers.quantization.deepspeedfp.DeepSpeedFPLinearMethod method) (vllm.model_executor.layers.quantization.experts_int8.ExpertsInt8MoEMethod method) (vllm.model_executor.layers.quantization.fbgemm_fp8.FBGEMMFp8LinearMethod method) (vllm.model_executor.layers.quantization.fp8.Fp8LinearMethod method) (vllm.model_executor.layers.quantization.fp8.Fp8MoEMethod method) (vllm.model_executor.layers.quantization.gguf.GGUFLinearMethod method) (vllm.model_executor.layers.quantization.gguf.GGUFMoEMethod method) (vllm.model_executor.layers.quantization.gptq.GPTQLinearMethod method) (vllm.model_executor.layers.quantization.gptq_bitblas.GPTQBitBLASLinearMethod method) (vllm.model_executor.layers.quantization.gptq_marlin.GPTQMarlinLinearMethod method) (vllm.model_executor.layers.quantization.gptq_marlin.GPTQMarlinMoEMethod method) (vllm.model_executor.layers.quantization.gptq_marlin_24.GPTQMarlin24LinearMethod method) (vllm.model_executor.layers.quantization.hqq_marlin.HQQMarlinMethod method) (vllm.model_executor.layers.quantization.ipex_quant.IPEXAWQLinearMethod method) (vllm.model_executor.layers.quantization.ipex_quant.IPEXGPTQLinearMethod method) (vllm.model_executor.layers.quantization.kv_cache.BaseKVCacheMethod method) (vllm.model_executor.layers.quantization.marlin.MarlinLinearMethod method) (vllm.model_executor.layers.quantization.modelopt.ModelOptFp8LinearMethod method) (vllm.model_executor.layers.quantization.modelopt.ModelOptNvFp4FusedMoE method) (vllm.model_executor.layers.quantization.modelopt.ModelOptNvFp4LinearMethod method) (vllm.model_executor.layers.quantization.moe_wna16.MoeWNA16Method method) (vllm.model_executor.layers.quantization.ptpc_fp8.PTPCFp8LinearMethod method) (vllm.model_executor.layers.quantization.qqq.QQQLinearMethod method) (vllm.model_executor.layers.quantization.quark.quark.QuarkLinearMethod method) (vllm.model_executor.layers.quantization.quark.quark_moe.QuarkW8A8Fp8MoEMethod method) (vllm.model_executor.layers.quantization.torchao.TorchAOLinearMethod method) (vllm.model_executor.layers.quantization.tpu_int8.TPUInt8LinearMethod method) (vllm.model_executor.layers.quantization.utils.w8a8_utils.Fp8LinearOp method) (vllm.model_executor.layers.vocab_parallel_embedding.UnquantizedEmbeddingMethod method) (vllm.model_executor.models.llava.MantisMultiModalProcessor method) (vllm.model_executor.models.mllama.MllamaMultiModalProcessor method) (vllm.model_executor.models.paligemma.PaliGemmaMultiModalProcessor method) (vllm.model_executor.models.prithvi_geospatial_mae.PrithviGeoSpatialMAEMultiModalProcessor method) (vllm.model_executor.models.utils.WeightsMapper method) (vllm.multimodal.processing.BaseMultiModalProcessor method) (vllm.multimodal.processing.EncDecMultiModalProcessor method) apply_adapters_worker() (in module vllm.adapter_commons.utils) apply_all_penalties() (in module vllm.v1.sample.ops.penalties) apply_allowed_token_ids() (vllm.v1.sample.sampler.Sampler method) apply_awq_marlin_linear() (in module vllm.model_executor.layers.quantization.utils.marlin_utils) apply_bad_words() (in module vllm.v1.sample.ops.bad_words) (vllm.v1.sample.sampler.Sampler method) apply_chat_template() (vllm.transformers_utils.tokenizer_base.TokenizerBase method) (vllm.transformers_utils.tokenizers.mistral.MistralTokenizer method) apply_class_embedding() (vllm.model_executor.models.mllama.MllamaVisionModel method) apply_delta() (vllm.sequence.SequenceData method) (vllm.sequence.SequenceGroupMetadata method) apply_fp4_marlin_linear() (in module vllm.model_executor.layers.quantization.utils.marlin_utils_fp4) apply_fp8_marlin_linear() (in module vllm.model_executor.layers.quantization.utils.marlin_utils_fp8) apply_gptq() (vllm.model_executor.layers.quantization.bitblas.BitBLASLinearMethod method) apply_gptq_bitblas_linear() (vllm.model_executor.layers.quantization.kernels.mixed_precision.bitblas.BitBLASLinearKernel method) apply_gptq_marlin_linear() (in module vllm.model_executor.layers.quantization.utils.marlin_utils) apply_grammar_bitmask() (vllm.v1.worker.gpu_model_runner.GPUModelRunner method) (vllm.v1.worker.tpu_model_runner.TPUModelRunner method) apply_hf_chat_template() (in module vllm.entrypoints.chat_utils) apply_logits_bias() (vllm.v1.sample.sampler.Sampler method) apply_min_p() (vllm.v1.sample.sampler.Sampler method) (vllm.v1.sample.tpu.sampler.Sampler method) apply_min_token_penalties() (in module vllm.v1.sample.ops.penalties) apply_mistral_chat_template() (in module vllm.entrypoints.chat_utils) apply_model() (vllm.entrypoints.llm.LLM method) (vllm.executor.executor_base.ExecutorBase method) apply_multimodal_chat_transformation() (vllm.benchmarks.datasets.BenchmarkDataset method) apply_penalties() (in module vllm.model_executor.layers.utils) (vllm.v1.sample.sampler.Sampler method) apply_pooling() (vllm.model_executor.models.llava_onevision.LlavaOnevisionForConditionalGeneration method) apply_rope() (in module vllm.model_executor.models.moonvit) apply_rotary_emb_torch() (in module vllm.model_executor.models.qwen2_vl) apply_rotary_emb_vit() (in module vllm.model_executor.models.pixtral) apply_rotary_pos_emb_vision() (in module vllm.model_executor.models.qwen2_vl) apply_softcap() (in module vllm.attention.ops.triton_unified_attention) apply_temperature() (vllm.v1.sample.sampler.Sampler method) (vllm.v1.sample.tpu.sampler.Sampler method) apply_text_matches() (in module vllm.multimodal.processing) apply_token_matches() (in module vllm.multimodal.processing) apply_top_k_only() (in module vllm.v1.sample.ops.topk_topp_sampler) apply_top_k_top_p() (in module vllm.v1.sample.ops.topk_topp_sampler) apply_top_k_top_p_tpu() (in module vllm.v1.sample.ops.topk_topp_sampler) apply_w8a8_block_fp8_linear() (in module vllm.model_executor.layers.quantization.utils.fp8_utils) apply_w8a8_block_fp8_linear_fake() (in module vllm.model_executor.layers.quantization.utils.fp8_utils) apply_w8a8_block_int8_linear() (in module vllm.model_executor.layers.quantization.utils.int8_utils) apply_weights() (vllm.model_executor.layers.quantization.compressed_tensors.schemes.compressed_tensors_24.CompressedTensors24 method) (vllm.model_executor.layers.quantization.compressed_tensors.schemes.compressed_tensors_scheme.CompressedTensorsScheme method) (vllm.model_executor.layers.quantization.compressed_tensors.schemes.compressed_tensors_w4a16_24.CompressedTensorsW4A16Sparse24 method) (vllm.model_executor.layers.quantization.compressed_tensors.schemes.compressed_tensors_w4a16_nvfp4.CompressedTensorsW4A16Fp4 method) (vllm.model_executor.layers.quantization.compressed_tensors.schemes.compressed_tensors_w8a16_fp8.CompressedTensorsW8A16Fp8 method) (vllm.model_executor.layers.quantization.compressed_tensors.schemes.compressed_tensors_w8a8_fp8.CompressedTensorsW8A8Fp8 method) (vllm.model_executor.layers.quantization.compressed_tensors.schemes.compressed_tensors_w8a8_int8.CompressedTensorsW8A8Int8 method) (vllm.model_executor.layers.quantization.compressed_tensors.schemes.compressed_tensors_wNa16.CompressedTensorsWNA16 method) (vllm.model_executor.layers.quantization.kernels.mixed_precision.allspark.AllSparkLinearKernel method) (vllm.model_executor.layers.quantization.kernels.mixed_precision.bitblas.BitBLASLinearKernel method) (vllm.model_executor.layers.quantization.kernels.mixed_precision.exllama.ExllamaLinearKernel method) (vllm.model_executor.layers.quantization.kernels.mixed_precision.machete.MacheteLinearKernel method) (vllm.model_executor.layers.quantization.kernels.mixed_precision.marlin.MarlinLinearKernel method) (vllm.model_executor.layers.quantization.kernels.mixed_precision.MPLinearKernel.MPLinearKernel method) (vllm.model_executor.layers.quantization.kernels.scaled_mm.aiter.AiterScaledMMLinearKernel method) (vllm.model_executor.layers.quantization.kernels.scaled_mm.cutlass.CutlassScaledMMLinearKernel method) (vllm.model_executor.layers.quantization.kernels.scaled_mm.ScaledMMLinearKernel.ScaledMMLinearKernel method) (vllm.model_executor.layers.quantization.kernels.scaled_mm.triton.TritonScaledMMLinearKernel method) (vllm.model_executor.layers.quantization.kernels.scaled_mm.xla.XLAScaledMMLinearKernel method) (vllm.model_executor.layers.quantization.quark.schemes.quark_scheme.QuarkScheme method) (vllm.model_executor.layers.quantization.quark.schemes.quark_w4a4_mxfp4.QuarkW4A4MXFP4 method) (vllm.model_executor.layers.quantization.quark.schemes.quark_w8a8_fp8.QuarkW8A8Fp8 method) (vllm.model_executor.layers.quantization.quark.schemes.quark_w8a8_int8.QuarkW8A8Int8 method) aqlm_dequant() (in module vllm._custom_ops) aqlm_gemm() (in module vllm._custom_ops) AQLMConfig (class in vllm.model_executor.layers.quantization.aqlm) AQLMLinearMethod (class in vllm.model_executor.layers.quantization.aqlm) architectures (vllm.config.ModelConfig property) ARCTIC_PRETRAINED_CONFIG_ARCHIVE_MAP (in module vllm.transformers_utils.configs.arctic) ArcticAttention (class in vllm.model_executor.models.arctic) ArcticConfig (class in vllm.transformers_utils.configs.arctic) ArcticDecoderLayer (class in vllm.model_executor.models.arctic) ArcticForCausalLM (class in vllm.model_executor.models.arctic) ArcticLoRAConfig (class in vllm.transformers_utils.configs.arctic) ArcticMLP (class in vllm.model_executor.models.arctic) ArcticModel (class in vllm.model_executor.models.arctic) ArcticMoE (class in vllm.model_executor.models.arctic) ArcticQuantizationConfig (class in vllm.transformers_utils.configs.arctic) args (vllm.entrypoints.openai.protocol.LogitsProcessorConstructor attribute) argtypes (vllm.distributed.device_communicators.cuda_wrapper.Function attribute) (vllm.distributed.device_communicators.pynccl_wrapper.Function attribute) arguments (vllm.entrypoints.openai.protocol.DeltaFunctionCall attribute) (vllm.entrypoints.openai.protocol.FunctionCall attribute) AriaDummyInputsBuilder (class in vllm.model_executor.models.aria) AriaForConditionalGeneration (class in vllm.model_executor.models.aria) AriaFusedMoE (class in vllm.model_executor.models.aria) AriaImagePixelInputs (class in vllm.model_executor.models.aria) AriaMultiModalProcessor (class in vllm.model_executor.models.aria) AriaProcessingInfo (class in vllm.model_executor.models.aria) AriaProjector (class in vllm.model_executor.models.aria) AriaProjectorMLP (class in vllm.model_executor.models.aria) AriaTextDecoderLayer (class in vllm.model_executor.models.aria) AriaTextModel (class in vllm.model_executor.models.aria) AriaTextMoELayer (class in vllm.model_executor.models.aria) AriaVisionTransformer (class in vllm.model_executor.models.aria) ARM (vllm.platforms.interface.CpuArchEnum attribute) array_full() (in module vllm.sequence) arrival_time (vllm.sequence.RequestMetrics attribute) (vllm.v1.engine.EngineCoreRequest attribute) (vllm.v1.metrics.stats.RequestStateStats attribute) arrival_ts_s (vllm.v1.stats.common.RequestStats attribute) ARRIVED (vllm.v1.stats.common.RequestStatsUpdate.Type attribute) ARTIFICIAL_PREEMPTION_MAX_CNT (in module vllm.core.scheduler) ARTIFICIAL_PREEMPTION_PROB (in module vllm.core.scheduler) as_broadcastable_tensor_dict() (vllm.worker.cpu_enc_dec_model_runner.EncoderDecoderModelInputForCPU method) (vllm.worker.cpu_model_runner.ModelInputForCPU method) (vllm.worker.cpu_model_runner.ModelInputForCPUWithSamplingMetadata method) (vllm.worker.enc_dec_model_runner.EncoderDecoderModelInput method) (vllm.worker.hpu_model_runner.ModelInputForHPU method) (vllm.worker.hpu_model_runner.ModelInputForHPUWithSamplingMetadata method) (vllm.worker.model_runner.ModelInputForGPU method) (vllm.worker.model_runner.ModelInputForGPUWithSamplingMetadata method) (vllm.worker.model_runner_base.BroadcastableModelInput method) (vllm.worker.multi_step_model_runner.StatefulModelInput method) (vllm.worker.neuron_model_runner.ModelInputForNeuron method) (vllm.worker.tpu_model_runner.ModelInputForTPU method) (vllm.worker.worker_base.WorkerInput method) (vllm.worker.xpu_model_runner.ModelInputForXPU method) (vllm.worker.xpu_model_runner.ModelInputForXPUWithSamplingMetadata method) as_classification_model() (in module vllm.model_executor.models.adapters) as_embedding_model() (in module vllm.model_executor.models.adapters) as_kwargs() (vllm.multimodal.inputs.MultiModalKwargs static method) as_readonly() (vllm.core.block.common.RefCounter method) as_reward_model() (in module vllm.model_executor.models.adapters) as_version_str() (vllm.platforms.interface.DeviceCapability method) asdict_zerocopy() (vllm.attention.backends.abstract.AttentionMetadata method) (vllm.attention.backends.flashinfer.FlashInferMetadata method) aspect_ratio_ids (vllm.model_executor.models.mllama.MllamaImagePixelInputs attribute) aspect_ratio_mask (vllm.model_executor.models.mllama.MllamaImagePixelInputs attribute) aspect_ratios (vllm.model_executor.models.mllama4.Llama4ImagePatchInputs attribute) assembled_seq_group (vllm.sequence.SequenceGroupBase attribute) assert_enc_dec_mr_supported_scenario() (in module vllm.worker.utils) assert_hashable() (in module vllm.config) assert_prefix_caching_block_or_none() (in module vllm.core.block.prefix_caching_block) assert_ray_available() (in module vllm.executor.ray_utils) ASSET_DIR (in module vllm.assets.audio) async_callback (vllm.sequence.ExecuteModelRequest attribute) (vllm.worker.hpu_model_runner.ModelInputForHPU attribute) (vllm.worker.model_runner.ModelInputForGPU attribute) (vllm.worker.tpu_model_runner.ModelInputForTPU attribute) (vllm.worker.xpu_model_runner.ModelInputForXPU attribute) async_download_file() (vllm.connections.HTTPConnection method) async_get_bytes() (vllm.connections.HTTPConnection method) async_get_json() (vllm.connections.HTTPConnection method) async_get_text() (vllm.connections.HTTPConnection method) ASYNC_REQUEST_FUNCS (in module vllm.benchmarks.endpoint_request_func) async_request_openai_completions() (in module vllm.benchmarks.endpoint_request_func) async_tensor_h2d() (in module vllm.utils) AsyncEngineArgs (class in vllm.engine.arg_utils) AsyncEngineDeadError AsyncLLM (class in vllm.v1.engine.async_llm) AsyncLLMEngine (class in vllm.engine.async_llm_engine) AsyncMetricsCollector (class in vllm.spec_decode.metrics) AsyncMPClient (class in vllm.v1.engine.core_client) AsyncMultiModalContentParser (class in vllm.entrypoints.chat_utils) AsyncMultiModalItemTracker (class in vllm.entrypoints.chat_utils) AsyncStream (class in vllm.engine.async_llm_engine) at_layer_idx() (vllm.model_executor.models.mamba_cache.MambaCacheParams method) (vllm.model_executor.models.minimax_cache.MinimaxCacheParams method) AtomicCounter (class in vllm.utils) AttBlock (class in vllm.model_executor.models.phi4mm_utils) Attention (class in vllm.attention.layer) (class in vllm.model_executor.models.pixtral) attention (vllm.model_executor.models.module_mapping.ModelKeys attribute) (vllm.utils.LayerBlockType attribute) attention() (vllm.model_executor.models.qwen_vl.VisualAttentionBlock method) attention_qkvpacked() (vllm.model_executor.models.moonvit.MoonVitEncoderLayer method) AttentionBackend (class in vllm.attention.backends.abstract) AttentionImpl (class in vllm.attention.backends.abstract) AttentionLayer (class in vllm.attention.backends.abstract) AttentionMetadata (class in vllm.attention.backends.abstract) AttentionMetadataBuilder (class in vllm.attention.backends.abstract) AttentionSpec (class in vllm.v1.kv_cache_interface) AttentionState (class in vllm.attention.backends.abstract) AttentionType (class in vllm.attention.backends.abstract) AttModule (class in vllm.model_executor.models.phi4mm_utils) attn_bias (vllm.attention.backends.hpu_attn.HPUAttentionMetadata attribute) attn_config_defaults (in module vllm.transformers_utils.configs.mpt) attn_fwd() (in module vllm.attention.ops.triton_flash_attention) attn_metadata (vllm.forward_context.ForwardContext attribute) (vllm.worker.cpu_model_runner.ModelInputForCPU attribute) (vllm.worker.hpu_model_runner.ModelInputForHPU attribute) (vllm.worker.hpu_model_runner.PrepareDecodeMetadata attribute) (vllm.worker.hpu_model_runner.PreparePromptMetadata attribute) (vllm.worker.model_runner.ModelInputForGPU attribute) (vllm.worker.tpu_model_runner.ModelInputForTPU attribute) (vllm.worker.xpu_model_runner.ModelInputForXPU attribute) attribute_map (vllm.transformers_utils.configs.chatglm.ChatGLMConfig attribute) (vllm.transformers_utils.configs.dbrx.DbrxConfig attribute) (vllm.transformers_utils.configs.exaone.ExaoneConfig attribute) (vllm.transformers_utils.configs.falcon.RWConfig attribute) (vllm.transformers_utils.configs.jais.JAISConfig attribute) (vllm.transformers_utils.configs.mlp_speculator.MLPSpeculatorConfig attribute) (vllm.transformers_utils.configs.mpt.MPTConfig attribute) (vllm.transformers_utils.configs.telechat2.Telechat2Config attribute) attributes (vllm.transformers_utils.processors.deepseek_vl2.DeepseekVLV2Processor attribute) (vllm.transformers_utils.processors.ovis.OvisProcessor attribute) audio (vllm.multimodal.inputs.MultiModalDataBuiltins attribute) audio_and_sample_rate (vllm.assets.audio.AudioAsset property) audio_embed_sizes (vllm.model_executor.models.granite_speech.GraniteSpeechAudioInputs attribute) audio_embeds (vllm.model_executor.models.minicpmo.MiniCPMOAudioEmbeddingInputs attribute) audio_feature_lens (vllm.model_executor.models.minicpmo.MiniCPMOAudioFeatureInputs attribute) audio_features (vllm.model_executor.models.minicpmo.MiniCPMOAudioFeatureInputs attribute) audio_pattern (vllm.model_executor.models.minicpmo.MiniCPMOProcessingInfo attribute) audio_tokens (vllm.model_executor.models.phi4mm.Phi4MMProcessingInfo property) audio_url (vllm.entrypoints.chat_utils.ChatCompletionContentPartAudioParam attribute) (vllm.entrypoints.chat_utils.CustomChatCompletionContentSimpleAudioParam attribute) AudioAsset (class in vllm.assets.audio) AudioAssetName (in module vllm.assets.audio) AudioEmbedding (class in vllm.model_executor.models.phi4mm_audio) AudioEmbeddingItems (class in vllm.multimodal.parse) AudioItem (in module vllm.multimodal.inputs) AudioMediaIO (class in vllm.multimodal.audio) AudioProcessorItems (class in vllm.multimodal.parse) AudioResampler (class in vllm.multimodal.audio) AudioResponseFormat (in module vllm.entrypoints.openai.protocol) AudioURL (class in vllm.entrypoints.chat_utils) AUTO (vllm.config.LoadFormat attribute) (vllm.config.ModelImpl attribute) (vllm.transformers_utils.config.ConfigFormat attribute) auto_measure (vllm.utils.MemorySnapshot attribute) AutoWeightsLoader (class in vllm.model_executor.models.utils) avg_logprob (vllm.entrypoints.openai.protocol.TranscriptionSegment attribute) awq_dequantize() (in module vllm._custom_ops) awq_dequantize_kernel() (in module vllm.model_executor.layers.quantization.awq_triton) awq_dequantize_triton() (in module vllm.model_executor.layers.quantization.awq_triton) awq_gemm() (in module vllm._custom_ops) awq_gemm_kernel() (in module vllm.model_executor.layers.quantization.awq_triton) awq_gemm_triton() (in module vllm.model_executor.layers.quantization.awq_triton) awq_marlin_moe_repack() (in module vllm._custom_ops) awq_marlin_quantize() (in module vllm.model_executor.layers.quantization.utils.marlin_utils_test) awq_marlin_repack() (in module vllm._custom_ops) awq_pack() (in module vllm.model_executor.layers.quantization.utils.quant_utils) awq_to_marlin_zero_points() (in module vllm.model_executor.layers.quantization.utils.marlin_utils) AWQ_TRITON_SUPPORTED_GROUP_SIZES (in module vllm.model_executor.layers.quantization.awq_triton) AWQConfig (class in vllm.model_executor.layers.quantization.awq) AWQLinearMethod (class in vllm.model_executor.layers.quantization.awq) AWQMarlinConfig (class in vllm.model_executor.layers.quantization.awq_marlin) AWQMarlinLinearMethod (class in vllm.model_executor.layers.quantization.awq_marlin) AWQMoEMethod (class in vllm.model_executor.layers.quantization.awq_marlin) AyaVisionDummyInputsBuilder (class in vllm.model_executor.models.aya_vision) AyaVisionForConditionalGeneration (class in vllm.model_executor.models.aya_vision) AyaVisionImagePixelInputs (class in vllm.model_executor.models.aya_vision) AyaVisionMultiModalProcessor (class in vllm.model_executor.models.aya_vision) AyaVisionMultiModalProjector (class in vllm.model_executor.models.aya_vision) AyaVisionProcessingInfo (class in vllm.model_executor.models.aya_vision) B backend (vllm.config.CompilationConfig attribute) (vllm.config.DecodingConfig attribute) (vllm.sampling_params.GuidedDecodingParams attribute) backend_name_to_enum() (in module vllm.attention.selector) backend_was_auto (vllm.sampling_params.GuidedDecodingParams attribute) BackgroundProcHandle (class in vllm.v1.utils) BackgroundResources (class in vllm.v1.engine.core_client) backward() (vllm.model_executor.models.phimoe.mp static method) bad_words (vllm.sampling_params.SamplingParams attribute) bad_words_token_ids (vllm.sampling_params.SamplingParams property) (vllm.v1.sample.metadata.SamplingMetadata attribute) (vllm.v1.sample.tpu.metadata.TPUSupportedSamplingMetadata attribute) BaiChuanAttention (class in vllm.model_executor.models.baichuan) BaiChuanBaseForCausalLM (class in vllm.model_executor.models.baichuan) BaiChuanDecoderLayer (class in vllm.model_executor.models.baichuan) BaiChuanForCausalLM (class in vllm.model_executor.models.baichuan) BaichuanForCausalLM (class in vllm.model_executor.models.baichuan) BaiChuanMLP (class in vllm.model_executor.models.baichuan) BaiChuanModel (class in vllm.model_executor.models.baichuan) BambaAttentionDecoderLayer (class in vllm.model_executor.models.bamba) BambaForCausalLM (class in vllm.model_executor.models.bamba) BambaMixerDecoderLayer (class in vllm.model_executor.models.bamba) BambaMLP (class in vllm.model_executor.models.bamba) BambaModel (class in vllm.model_executor.models.bamba) barrier() (vllm.distributed.parallel_state.GroupCoordinator method) (vllm.distributed.utils.StatelessProcessGroup method) BartCrossAttention (class in vllm.model_executor.models.bart) BartDecoder (class in vllm.model_executor.models.bart) BartDecoderLayer (class in vllm.model_executor.models.bart) BartDecoderSelfAttention (class in vllm.model_executor.models.bart) BartEncoder (class in vllm.model_executor.models.bart) BartEncoderAttention (class in vllm.model_executor.models.bart) BartEncoderLayer (class in vllm.model_executor.models.bart) BartForConditionalGeneration (class in vllm.model_executor.models.bart) BartLearnedPositionalEmbedding (class in vllm.model_executor.models.bart) BartModel (class in vllm.model_executor.models.bart) BartParallelLMHead (class in vllm.model_executor.models.bart) BartScaledWordEmbedding (class in vllm.model_executor.models.bart) base() (in module vllm.entrypoints.openai.api_server) BASE_BLOCK (in module vllm.attention.ops.prefix_prefill) base_image_input_size() (vllm.model_executor.models.molmo.MolmoProcessorWrapper method) base_model_name (vllm.entrypoints.openai.serving_models.LoRAModulePath attribute) (vllm.lora.request.LoRARequest attribute) base_model_prefix (vllm.model_executor.models.bart.BartForConditionalGeneration attribute) (vllm.model_executor.models.mllama.MllamaForCausalLM attribute) (vllm.model_executor.models.mllama.MllamaTextModel attribute) (vllm.model_executor.models.ultravox.ModifiedWhisperEncoder attribute) base_output_proc_callback (vllm.worker.multi_step_model_runner.StatefulModelInput attribute) BaseDummyInputsBuilder (class in vllm.multimodal.profiling) BaseIncrementalDetokenizer (class in vllm.v1.engine.detokenizer) BaseInternVLProcessingInfo (class in vllm.model_executor.models.internvl) BaseInternVLProcessor (class in vllm.model_executor.models.internvl) BaseKVCacheMethod (class in vllm.model_executor.layers.quantization.kv_cache) BaseLayerWithLoRA (class in vllm.lora.layers) BaseLinearLayerWithLoRA (class in vllm.lora.layers) BaseLlavaMultiModalProcessor (class in vllm.model_executor.models.llava) BaseLlavaNextMultiModalProcessor (class in vllm.model_executor.models.llava_next) BaseLlavaProcessingInfo (class in vllm.model_executor.models.llava) (class in vllm.model_executor.models.mistral3) BaseLogitsProcessor (class in vllm.model_executor.guided_decoding.outlines_logits_processors) BaseModelLoader (class in vllm.model_executor.model_loader.base_loader) BaseModelPath (class in vllm.entrypoints.openai.serving_models) BaseMultiModalContentParser (class in vllm.entrypoints.chat_utils) BaseMultiModalField (class in vllm.multimodal.inputs) BaseMultiModalItemTracker (class in vllm.entrypoints.chat_utils) BaseMultiModalProcessor (class in vllm.multimodal.processing) BaseProcessingInfo (class in vllm.multimodal.processing) BaseResampler (class in vllm.model_executor.layers.resampler) BaseSkyworkR1VProcessingInfo (class in vllm.model_executor.models.skyworkr1v) BaseSkyworkR1VProcessor (class in vllm.model_executor.models.skyworkr1v) BaseVisualTokenizerConfig (class in vllm.transformers_utils.configs.ovis) BasevLLMParameter (class in vllm.model_executor.parameter) batch() (vllm.multimodal.inputs.MultiModalKwargs static method) batch_decode() (vllm.transformers_utils.processors.ovis.OvisProcessor method) batch_size (vllm.model_executor.guided_decoding.xgrammar_decoding.XGrammarLogitsProcessor attribute) (vllm.multimodal.inputs.MultiModalSharedField attribute) batch_size_padded (vllm.worker.hpu_model_runner.ModelInputForHPU attribute) batched() (vllm.multimodal.inputs.MultiModalFieldConfig static method) batched_rotary_embedding() (in module vllm._custom_ops) (vllm._ipex_ops.ipex_ops static method) BatchedTensorInputs (in module vllm.multimodal.inputs) BatchExpansionTop1Scorer (class in vllm.spec_decode.batch_expansion) BatchProgressTracker (class in vllm.entrypoints.openai.run_batch) BatchRequestInput (class in vllm.entrypoints.openai.protocol) BatchRequestOutput (class in vllm.entrypoints.openai.protocol) BatchResponseData (class in vllm.entrypoints.openai.protocol) batchsize_forward_time (in module vllm.forward_context) batchsize_logging_interval (in module vllm.forward_context) BatchType (class in vllm.worker.hpu_model_runner) beam_search() (vllm.engine.protocol.EngineClient method) (vllm.entrypoints.llm.LLM method) beam_width (vllm.sampling_params.BeamSearchParams attribute) BeamSearchInstance (class in vllm.beam_search) BeamSearchOutput (class in vllm.beam_search) BeamSearchParams (class in vllm.sampling_params) BeamSearchSequence (class in vllm.beam_search) before_create (vllm.utils.MemoryProfilingResult attribute) before_profile (vllm.utils.MemoryProfilingResult attribute) begin (vllm.entrypoints.openai.protocol.StructuralTag attribute) begin() (vllm.compilation.vllm_inductor_pass.VllmInductorPass method) begin_forward() (vllm.attention.backends.abstract.AttentionState method) (vllm.attention.backends.flashinfer.FlashInferMetadata method) (vllm.attention.backends.flashinfer.FlashInferState method) (vllm.attention.backends.mla.common.MLACommonState method) (vllm.attention.backends.utils.CommonAttentionState method) benchmark() (in module vllm.benchmarks.serve) BENCHMARK_CMD_MODULES (in module vllm.entrypoints.cli.benchmark.main) BenchmarkDataset (class in vllm.benchmarks.datasets) BenchmarkLatencySubcommand (class in vllm.entrypoints.cli.benchmark.latency) BenchmarkMetrics (class in vllm.benchmarks.serve) BenchmarkServingSubcommand (class in vllm.entrypoints.cli.benchmark.serve) BenchmarkSubcommand (class in vllm.entrypoints.cli.benchmark.main) BenchmarkSubcommandBase (class in vllm.entrypoints.cli.benchmark.base) BenchmarkThroughputSubcommand (class in vllm.entrypoints.cli.benchmark.throughput) BertAttention (class in vllm.model_executor.models.bert) BertEmbedding (class in vllm.model_executor.models.bert) BertEmbeddingModel (class in vllm.model_executor.models.bert) BertEncoder (class in vllm.model_executor.models.bert) BertForSequenceClassification (class in vllm.model_executor.models.bert) BertIntermediate (class in vllm.model_executor.models.bert) BertLayer (class in vllm.model_executor.models.bert) BertModel (class in vllm.model_executor.models.bert) BertOutput (class in vllm.model_executor.models.bert) BertPooler (class in vllm.model_executor.models.bert) BertSelfAttention (class in vllm.model_executor.models.bert) BertSelfOutput (class in vllm.model_executor.models.bert) BertWithRope (class in vllm.model_executor.models.bert_with_rope) BertWithRopeAttention (class in vllm.model_executor.models.bert_with_rope) BertWithRopeBlock (class in vllm.model_executor.models.bert_with_rope) BertWithRopeEmbedding (class in vllm.model_executor.models.bert_with_rope) BertWithRopeEncoder (class in vllm.model_executor.models.bert_with_rope) BertWithRopeGatedMLP (class in vllm.model_executor.models.bert_with_rope) BertWithRopeMLP (class in vllm.model_executor.models.bert_with_rope) best_of (vllm.benchmarks.endpoint_request_func.RequestFuncInput attribute) (vllm.entrypoints.openai.protocol.ChatCompletionRequest attribute) (vllm.entrypoints.openai.protocol.CompletionRequest attribute) (vllm.sampling_params.SamplingParams attribute) bfloat16 (vllm.scalar_type.scalar_types attribute) bgmv_expand() (in module vllm.lora.ops.torch_ops.lora_ops) (in module vllm.lora.ops.xla_ops.lora_ops) bgmv_expand_slice() (in module vllm.lora.ops.torch_ops.lora_ops) (in module vllm.lora.ops.xla_ops.lora_ops) bgmv_non_xla() (in module vllm.lora.ops.xla_ops.pallas) bgmv_shape_function() (in module vllm.lora.ops.xla_ops.pallas) bgmv_shrink() (in module vllm.lora.ops.torch_ops.lora_ops) (in module vllm.lora.ops.xla_ops.lora_ops) bgmv_xla() (in module vllm.lora.ops.xla_ops.pallas) bias (vllm.attention.ops.triton_flash_attention.MetaData attribute) (vllm.lora.layers.BaseLinearLayerWithLoRA property) (vllm.lora.peft_helper.PEFTHelper attribute) (vllm.scalar_type.ScalarType attribute) bias_enabled (vllm.config.LoRAConfig attribute) binary_mask_to_bias() (in module vllm.attention.ops.blocksparse_attention.utils) bind() (vllm.multimodal.processing.PromptUpdate method) bind_connector_metadata() (vllm.distributed.kv_transfer.kv_connector.v1.base.KVConnectorBase_V1 method) bind_kv_cache() (in module vllm.utils) (in module vllm.v1.utils) BITBLAS_DTYPES (vllm.model_executor.layers.quantization.bitblas.BitBLASLinearMethod attribute) (vllm.model_executor.layers.quantization.kernels.mixed_precision.bitblas.BitBLASLinearKernel attribute) bitblas_is_k_full() (in module vllm.model_executor.layers.quantization.utils.bitblas_utils) bitblas_make_empty_g_idx() (in module vllm.model_executor.layers.quantization.utils.bitblas_utils) bitblas_make_empty_zp() (in module vllm.model_executor.layers.quantization.utils.bitblas_utils) bitblas_matmul (vllm.model_executor.layers.quantization.kernels.mixed_precision.bitblas.BitBLASLinearKernel attribute) BITBLAS_MIN_WEIGHT_SIZE_K (in module vllm.model_executor.layers.quantization.utils.bitblas_utils) BITBLAS_MIN_WEIGHT_SIZE_N (in module vllm.model_executor.layers.quantization.utils.bitblas_utils) BITBLAS_OPTIMIZE_FEATURES (in module vllm.model_executor.layers.quantization.utils.bitblas_utils) BITBLAS_OPTIMIZE_FEATURES_CONTIGUOUS (in module vllm.model_executor.layers.quantization.utils.bitblas_utils) bitblas_repeat_scales_on_all_ranks() (in module vllm.model_executor.layers.quantization.utils.bitblas_utils) bitblas_sort_g_idx() (in module vllm.model_executor.layers.quantization.utils.bitblas_utils) BITBLAS_SUPPORTED_GROUP_SIZES (in module vllm.model_executor.layers.quantization.utils.bitblas_utils) BITBLAS_SUPPORTED_NUM_BITS (in module vllm.model_executor.layers.quantization.utils.bitblas_utils) BITBLAS_SUPPORTED_SYM (in module vllm.model_executor.layers.quantization.utils.bitblas_utils) bitblas_tile_size (vllm.model_executor.parameter.PackedColumnParameter property) (vllm.model_executor.parameter.PackedvLLMParameter property) BitBLASConfig (class in vllm.model_executor.layers.quantization.bitblas) BitBLASLinearKernel (class in vllm.model_executor.layers.quantization.kernels.mixed_precision.bitblas) BitBLASLinearMethod (class in vllm.model_executor.layers.quantization.bitblas) BITSANDBYTES (vllm.config.LoadFormat attribute) BitsAndBytesConfig (class in vllm.model_executor.layers.quantization.bitsandbytes) BitsAndBytesLinearMethod (class in vllm.model_executor.layers.quantization.bitsandbytes) BitsAndBytesModelLoader (class in vllm.model_executor.model_loader.bitsandbytes_loader) Blip2DummyInputsBuilder (class in vllm.model_executor.models.blip2) Blip2ForConditionalGeneration (class in vllm.model_executor.models.blip2) Blip2ImageEmbeddingInputs (class in vllm.model_executor.models.blip2) Blip2ImageInputs (in module vllm.model_executor.models.blip2) Blip2ImagePixelInputs (class in vllm.model_executor.models.blip2) Blip2MultiModalProcessor (class in vllm.model_executor.models.blip2) Blip2ProcessingInfo (class in vllm.model_executor.models.blip2) Blip2QFormerAttention (class in vllm.model_executor.models.blip2) Blip2QFormerEncoder (class in vllm.model_executor.models.blip2) Blip2QFormerIntermediate (class in vllm.model_executor.models.blip2) Blip2QFormerLayer (class in vllm.model_executor.models.blip2) Blip2QFormerModel (class in vllm.model_executor.models.blip2) Blip2QFormerMultiHeadAttention (class in vllm.model_executor.models.blip2) Blip2QFormerOutput (class in vllm.model_executor.models.blip2) Blip2QFormerSelfOutput (class in vllm.model_executor.models.blip2) BlipAttention (class in vllm.model_executor.models.blip) BlipEncoder (class in vllm.model_executor.models.blip) BlipEncoderLayer (class in vllm.model_executor.models.blip) BlipMLP (class in vllm.model_executor.models.blip) BlipVisionEmbeddings (class in vllm.model_executor.models.blip) BlipVisionModel (class in vllm.model_executor.models.blip) Block (class in vllm.core.block.interfaces) BLOCK (vllm.model_executor.layers.fused_moe.layer.FusedMoeWeightScaleSupported attribute) Block.Factory (class in vllm.core.block.interfaces) block_dequant() (in module vllm.model_executor.layers.quantization.utils.int8_utils) block_groups (vllm.attention.ops.hpu_paged_attn.HPUPagedAttentionMetadata attribute) block_hash (vllm.v1.core.kv_cache_utils.KVCacheBlock property) block_hashes (vllm.distributed.kv_events.BlockRemoved attribute) (vllm.distributed.kv_events.BlockStored attribute) block_id (vllm.core.block.cpu_gpu_block_allocator.NullBlock property) (vllm.core.block.interfaces.Block property) (vllm.core.block.naive_block.NaiveBlock property) (vllm.core.block.prefix_caching_block.PrefixCachingBlock property) (vllm.v1.core.kv_cache_utils.KVCacheBlock attribute) block_ids (vllm.v1.core.sched.output.NewRequestData attribute) (vllm.v1.worker.gpu_input_batch.CachedRequestState attribute) block_if_full() (vllm.distributed.kv_transfer.kv_pipe.pynccl_pipe.PyNcclPipe method) block_indices (vllm.attention.ops.hpu_paged_attn.HPUPagedAttentionMetadata attribute) block_is_computed() (vllm.core.block.prefix_caching_block.PrefixCachingBlockAllocator method) block_list (vllm.attention.ops.hpu_paged_attn.HPUPagedAttentionMetadata attribute) block_mapping (vllm.attention.ops.hpu_paged_attn.HPUPagedAttentionMetadata attribute) block_offsets (vllm.attention.ops.hpu_paged_attn.HPUPagedAttentionMetadata attribute) block_quant_to_tensor_quant() (in module vllm.model_executor.layers.quantization.utils.fp8_utils) block_size (vllm.attention.backends.blocksparse_attn.BlocksparseParams attribute) (vllm.attention.backends.dual_chunk_flash_attn.DualChunkFlashAttentionMetadata attribute) (vllm.config.CacheConfig attribute) (vllm.core.block.common.CacheMetricData attribute) (vllm.core.block.naive_block.NaiveBlock property) (vllm.core.block.prefix_caching_block.PrefixCachingBlock property) (vllm.distributed.kv_events.BlockStored attribute) (vllm.engine.arg_utils.EngineArgs attribute) (vllm.v1.kv_cache_interface.KVCacheSpec attribute) block_table (vllm.v1.attention.backends.flash_attn.FlashAttentionMetadata attribute) (vllm.v1.attention.backends.mla.common.MLACommonDecodeMetadata attribute) (vllm.v1.attention.backends.mla.common.MLACommonPrefillMetadata attribute) block_table_bound (vllm.attention.backends.flashinfer.FlashInferMetadata attribute) (vllm.attention.backends.rocm_aiter_mla.AiterMLAMetadata attribute) BLOCK_TABLE_EXTENDER (vllm.attention.backends.mla.common.MLACommonMetadataBuilder attribute) (vllm.attention.backends.rocm_aiter_mla.AiterMLAMetadataBuilder attribute) block_tables (vllm.attention.backends.blocksparse_attn.BlocksparseFlashAttentionMetadata attribute) (vllm.attention.backends.flash_attn.FlashAttentionMetadata attribute) (vllm.attention.backends.flashinfer.FlashInferMetadata attribute) (vllm.attention.backends.mla.common.MLACommonMetadata attribute) (vllm.attention.backends.pallas.PallasMetadata attribute) (vllm.attention.backends.placeholder_attn.PlaceholderAttentionMetadata attribute) (vllm.attention.ops.paged_attn.PagedAttentionMetadata attribute) (vllm.sequence.SequenceGroupMetadata attribute) (vllm.sequence.SequenceGroupMetadataDelta attribute) (vllm.v1.attention.backends.pallas.PallasMetadata attribute) block_tables_intra (vllm.attention.backends.dual_chunk_flash_attn.DualChunkFlashAttentionMetadata attribute) block_tables_succ (vllm.attention.backends.dual_chunk_flash_attn.DualChunkFlashAttentionMetadata attribute) block_usage (vllm.attention.ops.hpu_paged_attn.HPUPagedAttentionMetadata attribute) BlockAllocator (class in vllm.core.block.interfaces) BlockBase (class in vllm.model_executor.models.phi4mm_utils) BlockCollection (class in vllm.model_executor.models.molmo) BlockHashType (class in vllm.v1.core.kv_cache_utils) BlockId (in module vllm.core.block.common) (in module vllm.core.block.interfaces) BlockList (class in vllm.core.block.common) BlockMetaData (class in vllm.core.evictor) BlockPool (class in vllm.core.block.common) (class in vllm.v1.core.block_pool) BlockQuantScaleParameter (class in vllm.model_executor.parameter) BlockRemoved (class in vllm.distributed.kv_events) blocks (vllm.core.block.block_table.BlockTable property) (vllm.v1.core.kv_cache_manager.KVCacheBlocks attribute) blocks_to_copy (vllm.core.scheduler.SchedulerOutputs attribute) (vllm.core.scheduler.SchedulerRunningOutputs attribute) (vllm.core.scheduler.SchedulerSwappedInOutputs attribute) (vllm.sequence.ExecuteModelRequest attribute) (vllm.worker.worker_base.WorkerInput attribute) blocks_to_swap_in (vllm.core.scheduler.SchedulerOutputs attribute) (vllm.core.scheduler.SchedulerSwappedInOutputs attribute) (vllm.sequence.ExecuteModelRequest attribute) (vllm.worker.worker_base.WorkerInput attribute) blocks_to_swap_out (vllm.core.scheduler.SchedulerOutputs attribute) (vllm.core.scheduler.SchedulerRunningOutputs attribute) (vllm.sequence.ExecuteModelRequest attribute) (vllm.worker.worker_base.WorkerInput attribute) BlockSize (in module vllm.config) BlockSpaceManager (class in vllm.core.interfaces) blocksparse_flash_attn_varlen_fwd() (in module vllm.attention.ops.blocksparse_attention.blocksparse_attention_kernel) BlocksparseFlashAttentionBackend (class in vllm.attention.backends.blocksparse_attn) BlocksparseFlashAttentionImpl (class in vllm.attention.backends.blocksparse_attn) BlocksparseFlashAttentionMetadata (class in vllm.attention.backends.blocksparse_attn) BlocksparseFlashAttentionMetadataBuilder (class in vllm.attention.backends.blocksparse_attn) BlocksparseParams (class in vllm.attention.backends.blocksparse_attn) BlockStored (class in vllm.distributed.kv_events) BlockTable (class in vllm.core.block.block_table) (class in vllm.v1.worker.block_table) BlockTracker (class in vllm.core.block.prefix_caching_block) BloomAttention (class in vllm.model_executor.models.bloom) BloomBlock (class in vllm.model_executor.models.bloom) BloomForCausalLM (class in vllm.model_executor.models.bloom) BloomMLP (class in vllm.model_executor.models.bloom) BloomModel (class in vllm.model_executor.models.bloom) body (vllm.entrypoints.openai.protocol.BatchRequestInput attribute) (vllm.entrypoints.openai.protocol.BatchResponseData attribute) bonus_logits_indices (vllm.v1.spec_decode.metadata.SpecDecodeMetadata attribute) bos_id (vllm.transformers_utils.processors.deepseek_vl2.DeepseekVLV2Processor property) bos_token_id (vllm.transformers_utils.tokenizer_base.TokenizerBase property) (vllm.transformers_utils.tokenizers.mistral.MistralTokenizer property) BoundPromptUpdate (class in vllm.multimodal.processing) bpe2img() (vllm.model_executor.models.chameleon.ChameleonImageVocabularyMapping method) bpe2img_search_tensors() (vllm.model_executor.models.chameleon.ChameleonImageVocabularyMapping method) break_fp4_bytes() (in module vllm.model_executor.layers.quantization.utils.nvfp4_emulation_utils) broadcast() (vllm.distributed.device_communicators.pynccl.PyNcclCommunicator method) (vllm.distributed.parallel_state.GroupCoordinator method) broadcast_obj() (vllm.distributed.utils.StatelessProcessGroup method) broadcast_object() (vllm.distributed.device_communicators.shm_broadcast.MessageQueue method) (vllm.distributed.parallel_state.GroupCoordinator method) broadcast_object_list() (vllm.distributed.parallel_state.GroupCoordinator method) broadcast_recv_src_counter (vllm.distributed.utils.StatelessProcessGroup attribute) broadcast_send_counter (vllm.distributed.utils.StatelessProcessGroup attribute) broadcast_tensor_dict() (in module vllm.distributed.communication_op) (vllm.distributed.parallel_state.GroupCoordinator method) BroadcastableModelInput (class in vllm.worker.model_runner_base) BrokenPipeException bs_to_padded_graph_size (vllm.config.CompilationConfig attribute) buffer_handle (vllm.distributed.device_communicators.shm_broadcast.Handle attribute) buffer_steps (vllm.config.KVEventsConfig attribute) buffer_type (in module vllm.distributed.device_communicators.pynccl_wrapper) build() (vllm.attention.backends.abstract.AttentionMetadataBuilder method) (vllm.attention.backends.cpu_mla.CPUMLAMetadataBuilder method) (vllm.attention.backends.dual_chunk_flash_attn.DualChunkFlashAttentionMetadataBuilder method) (vllm.attention.backends.flash_attn.FlashAttentionMetadataBuilder method) (vllm.attention.backends.flashinfer.FlashInferMetadataBuilder method) (vllm.attention.backends.flashmla.FlashMLAMetadataBuilder method) (vllm.attention.backends.mla.common.MLACommonMetadataBuilder method) (vllm.attention.backends.placeholder_attn.PlaceholderAttentionMetadataBuilder method) (vllm.attention.backends.rocm_aiter_mla.AiterMLAMetadataBuilder method) (vllm.attention.backends.torch_sdpa.TorchSDPAMetadataBuilder method) (vllm.attention.backends.utils.CommonMetadataBuilder method) (vllm.v1.attention.backends.flash_attn.FlashAttentionMetadataBuilder method) (vllm.v1.attention.backends.flashinfer.FlashInferMetadataBuilder method) (vllm.v1.attention.backends.mla.common.MLACommonMetadataBuilder method) (vllm.worker.cpu_model_runner.ModelInputForCPUBuilder method) (vllm.worker.model_runner.ModelInputForGPUBuilder method) (vllm.worker.model_runner_base.ModelRunnerInputBuilderBase method) (vllm.worker.xpu_model_runner.ModelInputForXPUBuilder method) build_1_2_3_5_8_buckets() (in module vllm.engine.metrics) build_1_2_5_buckets() (in module vllm.engine.metrics) (in module vllm.v1.metrics.loggers) build_app() (in module vllm.entrypoints.api_server) (in module vllm.entrypoints.openai.api_server) build_async_engine_client() (in module vllm.entrypoints.openai.api_server) build_async_engine_client_from_engine_args() (in module vllm.entrypoints.openai.api_server) build_buckets() (in module vllm.engine.metrics) (in module vllm.v1.metrics.loggers) build_connector_meta() (vllm.distributed.kv_transfer.kv_connector.v1.base.KVConnectorBase_V1 method) (vllm.distributed.kv_transfer.kv_connector.v1.lmcache_connector.LMCacheConnectorV1 method) (vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector.NixlConnector method) (vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector.NixlConnectorScheduler method) (vllm.distributed.kv_transfer.kv_connector.v1.shared_storage_connector.SharedStorageConnector method) build_elems() (vllm.multimodal.inputs.BaseMultiModalField method) (vllm.multimodal.inputs.MultiModalBatchedField method) (vllm.multimodal.inputs.MultiModalFieldConfig method) (vllm.multimodal.inputs.MultiModalFlatField method) (vllm.multimodal.inputs.MultiModalSharedField method) build_explicit_enc_dec_prompt() (in module vllm.inputs.data) build_guided_decoding_logits_processor_async() (in module vllm.engine.async_llm_engine) build_output() (vllm.model_executor.layers.pooler.SimplePooler method) build_transform() (in module vllm.model_executor.models.internvl) (in module vllm.model_executor.models.skyworkr1v) builder (vllm.worker.cpu_model_runner.CPUModelRunnerBase attribute) (vllm.worker.model_runner.GPUModelRunnerBase attribute) builtin_platform_plugins (in module vllm.platforms) BurstGPTDataset (class in vllm.benchmarks.datasets) bytecode_hook() (vllm.compilation.wrapper.TorchCompileWrapperWithCustomDispatcher method) bytes (vllm.entrypoints.openai.protocol.ChatCompletionLogProb attribute) bytestr (in module vllm.v1.serial_utils) C cache (vllm.model_executor.models.constant_size_cache.ConstantSizeCache property) (vllm.model_executor.models.mamba_cache.MambaCacheManager property) (vllm.model_executor.models.minimax_cache.MinimaxCacheManager property) (vllm.utils.LRUCache property) cache_blocks() (vllm.v1.core.single_type_kv_cache_manager.SingleTypeKVCacheManager method) cache_config (vllm.config.VllmConfig attribute) cache_dir (vllm.config.CompilationConfig attribute) cache_dtype (vllm.config.CacheConfig attribute) cache_full_blocks() (vllm.v1.core.block_pool.BlockPool method) cache_salt (vllm.entrypoints.openai.protocol.ChatCompletionRequest attribute) (vllm.inputs.data.EmbedsInputs attribute) (vllm.inputs.data.EmbedsPrompt attribute) (vllm.inputs.data.TextPrompt attribute) (vllm.inputs.data.TokenInputs attribute) (vllm.inputs.data.TokensPrompt attribute) (vllm.multimodal.inputs.MultiModalInputs attribute) (vllm.v1.engine.EngineCoreRequest attribute) CacheConfig (class in vllm.config) cached_child_sampling_params (vllm.v1.engine.parallel_sampling.ParentRequest attribute) cached_feature_extractor_from_config() (in module vllm.transformers_utils.processor) cached_get_feature_extractor (in module vllm.transformers_utils.processor) cached_get_image_processor (in module vllm.transformers_utils.processor) cached_get_processor (in module vllm.transformers_utils.processor) cached_get_tokenizer (in module vllm.transformers_utils.tokenizer) cached_image_processor_from_config() (in module vllm.transformers_utils.processor) cached_outputs (vllm.worker.multi_step_model_runner.StatefulModelInput attribute) cached_processor_from_config() (in module vllm.transformers_utils.processor) cached_tokenizer_from_config() (in module vllm.transformers_utils.tokenizer) cached_tokenizers (vllm.model_executor.guided_decoding.guidance_logits_processors.GuidanceLogitsProcessor attribute) cached_tokens (vllm.entrypoints.openai.protocol.PromptTokenUsageInfo attribute) CachedRequestData (class in vllm.v1.core.sched.output) CachedRequestState (class in vllm.v1.worker.gpu_input_batch) CacheDType (in module vllm.config) CacheEngine (class in vllm.worker.cache_engine) CacheInfo (class in vllm.utils) CacheMetricData (class in vllm.core.block.common) calc_kv_scales() (vllm.attention.layer.Attention method) calc_length() (in module vllm.model_executor.models.phi4mm_utils) calc_token_per_chunk() (in module vllm.model_executor.models.mllama) calculate_h2ovl_targets() (in module vllm.model_executor.models.h2ovl) calculate_hs_mask() (vllm.model_executor.models.phi4mm_audio.ConformerEncoder method) calculate_internvl_targets() (in module vllm.model_executor.models.internvl) calculate_kv_scales (vllm.config.CacheConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) calculate_metrics() (in module vllm.benchmarks.serve) calculate_skyworkr1v_targets() (in module vllm.model_executor.models.skyworkr1v) call_hf_processor() (vllm.inputs.registry.InputProcessingContext method) call_id (vllm.v1.engine.UtilityOutput attribute) call_module() (vllm.compilation.backends.PiecewiseCompileInterpreter method) call_utility() (vllm.v1.engine.core_client.SyncMPClient method) call_utility_async() (vllm.v1.engine.core_client.AsyncMPClient method) (vllm.v1.engine.core_client.DPAsyncMPClient method) CallableInductorPass (class in vllm.compilation.inductor_pass) can_actually_p2p() (in module vllm.distributed.device_communicators.custom_all_reduce_utils) can_allocate() (vllm.core.block_manager.SelfAttnBlockSpaceManager method) (vllm.core.interfaces.BlockSpaceManager method) (vllm.core.placeholder_block_space_manager.PlaceholderBlockSpaceManager method) (vllm.v1.core.encoder_cache_manager.EncoderCacheManager method) can_append_slots() (vllm.core.block_manager.SelfAttnBlockSpaceManager method) (vllm.core.interfaces.BlockSpaceManager method) (vllm.core.placeholder_block_space_manager.PlaceholderBlockSpaceManager method) can_implement() (vllm.model_executor.layers.quantization.kernels.mixed_precision.allspark.AllSparkLinearKernel class method) (vllm.model_executor.layers.quantization.kernels.mixed_precision.bitblas.BitBLASLinearKernel class method) (vllm.model_executor.layers.quantization.kernels.mixed_precision.exllama.ExllamaLinearKernel class method) (vllm.model_executor.layers.quantization.kernels.mixed_precision.machete.MacheteLinearKernel class method) (vllm.model_executor.layers.quantization.kernels.mixed_precision.marlin.MarlinLinearKernel class method) (vllm.model_executor.layers.quantization.kernels.mixed_precision.MPLinearKernel.MPLinearKernel class method) (vllm.model_executor.layers.quantization.kernels.scaled_mm.aiter.AiterScaledMMLinearKernel class method) (vllm.model_executor.layers.quantization.kernels.scaled_mm.cutlass.CutlassScaledMMLinearKernel class method) (vllm.model_executor.layers.quantization.kernels.scaled_mm.ScaledMMLinearKernel.ScaledMMLinearKernel class method) (vllm.model_executor.layers.quantization.kernels.scaled_mm.triton.TritonScaledMMLinearKernel class method) (vllm.model_executor.layers.quantization.kernels.scaled_mm.xla.XLAScaledMMLinearKernel class method) can_replace_layer() (vllm.lora.fully_sharded_layers.ColumnParallelLinearWithShardedLoRA class method) (vllm.lora.fully_sharded_layers.MergedColumnParallelLinearWithShardedLoRA class method) (vllm.lora.fully_sharded_layers.MergedQKVParallelLinearWithShardedLoRA class method) (vllm.lora.fully_sharded_layers.QKVParallelLinearWithShardedLoRA class method) (vllm.lora.fully_sharded_layers.RowParallelLinearWithShardedLoRA class method) (vllm.lora.layers.BaseLayerWithLoRA class method) (vllm.lora.layers.ColumnParallelLinearWithLoRA class method) (vllm.lora.layers.LinearScalingRotaryEmbeddingWithLoRA class method) (vllm.lora.layers.LogitsProcessorWithLoRA class method) (vllm.lora.layers.MergedColumnParallelLinearWithLoRA class method) (vllm.lora.layers.MergedQKVParallelLinearWithLoRA class method) (vllm.lora.layers.QKVParallelLinearWithLoRA class method) (vllm.lora.layers.ReplicatedLinearWithLoRA class method) (vllm.lora.layers.RowParallelLinearWithLoRA class method) (vllm.lora.layers.VocabParallelEmbeddingWithLoRA class method) can_schedule() (vllm.core.scheduler.PartialPrefillMetadata method) (vllm.core.scheduler.SchedulingBudget method) can_swap_in() (vllm.core.block_manager.SelfAttnBlockSpaceManager method) (vllm.core.interfaces.BlockSpaceManager method) (vllm.core.placeholder_block_space_manager.PlaceholderBlockSpaceManager method) can_swap_out() (vllm.core.block_manager.SelfAttnBlockSpaceManager method) (vllm.core.interfaces.BlockSpaceManager method) (vllm.core.placeholder_block_space_manager.PlaceholderBlockSpaceManager method) can_update_inplace() (vllm.platforms.interface.Platform class method) (vllm.platforms.tpu.TpuPlatform class method) candidate_resolutions (vllm.transformers_utils.configs.deepseek_vl2.DeepseekVLV2Config attribute) capacity (vllm.adapter_commons.models.AdapterModelManager property) (vllm.lora.models.LoRAModelManager property) (vllm.prompt_adapter.models.PromptAdapterModelManager property) (vllm.utils.LRUCache property) capture() (vllm.distributed.device_communicators.custom_all_reduce.CustomAllreduce method) (vllm.worker.model_runner.CUDAGraphRunner method) capture_model() (vllm.v1.worker.gpu_model_runner.GPUModelRunner method) (vllm.v1.worker.tpu_model_runner.TPUModelRunner method) (vllm.worker.model_runner.GPUModelRunnerBase method) (vllm.worker.multi_step_model_runner.MultiStepModelRunner method) capture_seq_group_metadata_stats() (vllm.worker.hpu_model_runner.HabanaProfilerCounterHelper method) cascade_attention() (in module vllm.v1.attention.backends.flash_attn) cascade_wrapper (vllm.v1.attention.backends.flashinfer.FlashInferMetadata attribute) cast_overflow_tensors() (in module vllm.model_executor.models.utils) cat_with_pad() (in module vllm.model_executor.models.phi4mm) causal (vllm.attention.ops.triton_flash_attention.MetaData attribute) causal_conv1d_fn() (in module vllm.model_executor.layers.mamba.ops.causal_conv1d) causal_conv1d_fwd() (in module vllm._custom_ops) causal_conv1d_update() (in module vllm._custom_ops) (in module vllm.model_executor.layers.mamba.ops.causal_conv1d) CausalConv1D (class in vllm.model_executor.models.phi4mm_utils) CausalConv2D (class in vllm.model_executor.models.phi4mm_utils) ccol_row_to_dense() (in module vllm.attention.ops.blocksparse_attention.utils) cdiv() (in module vllm.utils) cdiv_fn() (in module vllm.attention.ops.chunked_prefill_paged_decode) (in module vllm.attention.ops.triton_flash_attention) (in module vllm.attention.ops.triton_unified_attention) ceil_div() (in module vllm.attention.ops.nki_flash_attn) (in module vllm.model_executor.layers.fused_moe.moe_align_block_size) CFGLogitsProcessor (class in vllm.model_executor.guided_decoding.outlines_logits_processors) ChameleonAttention (class in vllm.model_executor.models.chameleon) ChameleonDecoderLayer (class in vllm.model_executor.models.chameleon) ChameleonDummyInputsBuilder (class in vllm.model_executor.models.chameleon) ChameleonForConditionalGeneration (class in vllm.model_executor.models.chameleon) ChameleonImagePixelInputs (class in vllm.model_executor.models.chameleon) ChameleonImageVocabularyMapping (class in vllm.model_executor.models.chameleon) ChameleonLayerNorm (class in vllm.model_executor.models.chameleon) ChameleonMLP (class in vllm.model_executor.models.chameleon) ChameleonModel (class in vllm.model_executor.models.chameleon) ChameleonMultiModalProcessor (class in vllm.model_executor.models.chameleon) ChameleonProcessingInfo (class in vllm.model_executor.models.chameleon) ChameleonSwinDecoderLayer (class in vllm.model_executor.models.chameleon) ChameleonVQVAE (class in vllm.model_executor.models.chameleon) ChameleonVQVAEEncoder (class in vllm.model_executor.models.chameleon) ChameleonVQVAEEncoderAttnBlock (class in vllm.model_executor.models.chameleon) ChameleonVQVAEEncoderConvDownsample (class in vllm.model_executor.models.chameleon) ChameleonVQVAEEncoderResnetBlock (class in vllm.model_executor.models.chameleon) ChameleonVQVAEVectorQuantizer (class in vllm.model_executor.models.chameleon) change_subsampling_conv_chunking_factor() (vllm.model_executor.models.phi4mm_utils.NemoConvSubsampling method) CHANNEL (vllm.model_executor.layers.fused_moe.layer.FusedMoeWeightScaleSupported attribute) channel_chunked_conv() (vllm.model_executor.models.phi4mm_utils.NemoConvSubsampling method) ChannelAttention (class in vllm.model_executor.models.florence2) ChannelBlock (class in vllm.model_executor.models.florence2) ChannelQuantScaleParameter (class in vllm.model_executor.parameter) chat() (in module vllm.entrypoints.cli.openai) (in module vllm.entrypoints.openai.api_server) (vllm.entrypoints.llm.LLM method) chat_completion_full_generator() (vllm.entrypoints.openai.serving_chat.OpenAIServingChat method) chat_completion_stream_generator() (vllm.entrypoints.openai.serving_chat.OpenAIServingChat method) chat_template (vllm.entrypoints.openai.protocol.ChatCompletionRequest attribute) (vllm.entrypoints.openai.protocol.EmbeddingChatRequest attribute) (vllm.entrypoints.openai.protocol.TokenizeChatRequest attribute) (vllm.entrypoints.openai.serving_engine.EmbeddingServeContext attribute) chat_template_content_format (vllm.entrypoints.openai.serving_engine.EmbeddingServeContext attribute) chat_template_kwargs (vllm.entrypoints.openai.protocol.ChatCompletionRequest attribute) (vllm.entrypoints.openai.protocol.EmbeddingChatRequest attribute) (vllm.entrypoints.openai.protocol.TokenizeChatRequest attribute) CHAT_TEMPLATES_DIR (in module vllm.transformers_utils.chat_templates.registry) ChatCommand (class in vllm.entrypoints.cli.openai) ChatCompletionContentPartAudioParam (class in vllm.entrypoints.chat_utils) ChatCompletionContentPartImageEmbedsParam (class in vllm.entrypoints.chat_utils) ChatCompletionContentPartParam (in module vllm.entrypoints.chat_utils) ChatCompletionContentPartVideoParam (class in vllm.entrypoints.chat_utils) ChatCompletionLogProb (class in vllm.entrypoints.openai.protocol) ChatCompletionLogProbs (class in vllm.entrypoints.openai.protocol) ChatCompletionLogProbsContent (class in vllm.entrypoints.openai.protocol) ChatCompletionMessageParam (in module vllm.entrypoints.chat_utils) ChatCompletionNamedFunction (class in vllm.entrypoints.openai.protocol) ChatCompletionNamedToolChoiceParam (class in vllm.entrypoints.openai.protocol) ChatCompletionRequest (class in vllm.entrypoints.openai.protocol) ChatCompletionResponse (class in vllm.entrypoints.openai.protocol) ChatCompletionResponseChoice (class in vllm.entrypoints.openai.protocol) ChatCompletionResponseStreamChoice (class in vllm.entrypoints.openai.protocol) ChatCompletionStreamResponse (class in vllm.entrypoints.openai.protocol) ChatCompletionToolsParam (class in vllm.entrypoints.openai.protocol) ChatGLMBaseModel (class in vllm.model_executor.models.chatglm) ChatGLMConfig (class in vllm.transformers_utils.configs.chatglm) ChatGLMForCausalLM (class in vllm.model_executor.models.chatglm) ChatGLMModel (class in vllm.model_executor.models.chatglm) ChatLikeRequest (in module vllm.entrypoints.openai.serving_engine) ChatMessage (class in vllm.entrypoints.openai.protocol) ChatTemplateContentFormatOption (in module vllm.entrypoints.chat_utils) ChatTemplatePath (in module vllm.transformers_utils.chat_templates.registry) check_24() (in module vllm.model_executor.layers.quantization.utils.marlin_utils_test_24) check_allspark_supported_dtype_shape() (in module vllm.model_executor.layers.quantization.utils.allspark_utils) check_and_maybe_quantize_qkv() (in module vllm.attention.ops.triton_flash_attention) check_and_update_config() (vllm.platforms.cpu.CpuPlatform class method) (vllm.platforms.cuda.CudaPlatformBase class method) (vllm.platforms.hpu.HpuPlatform class method) (vllm.platforms.interface.Platform class method) (vllm.platforms.neuron.NeuronPlatform class method) (vllm.platforms.rocm.RocmPlatform class method) (vllm.platforms.tpu.TpuPlatform class method) (vllm.platforms.xpu.XPUPlatform class method) check_args() (vllm.attention.ops.triton_flash_attention.MetaData method) check_bitblas_supported() (in module vllm.model_executor.layers.quantization.utils.bitblas_utils) check_bitblas_supports_shape() (in module vllm.model_executor.layers.quantization.utils.bitblas_utils) check_cache_salt_support() (vllm.entrypoints.openai.protocol.ChatCompletionRequest class method) check_current_rank() (vllm.model_executor.layers.quantization.schema.KVCacheQuantSchema method) check_enough_kv_cache_memory() (in module vllm.v1.core.kv_cache_utils) check_equal_or_regex_match() (in module vllm.model_executor.layers.quantization.compressed_tensors.utils) (in module vllm.model_executor.layers.quantization.quark.utils) check_error() (vllm.v1.structured_output.backend_guidance.GuidanceGrammar method) check_for_ending_compilation() (vllm.compilation.backends.PiecewiseBackend method) check_generation_prompt() (vllm.entrypoints.openai.protocol.ChatCompletionRequest class method) (vllm.entrypoints.openai.protocol.EmbeddingChatRequest class method) (vllm.entrypoints.openai.protocol.TokenizeChatRequest class method) check_gguf_file() (in module vllm.transformers_utils.utils) check_goodput_args() (in module vllm.benchmarks.serve) check_guided_decoding_count() (vllm.entrypoints.openai.protocol.ChatCompletionRequest class method) (vllm.entrypoints.openai.protocol.CompletionRequest class method) check_health() (vllm.engine.async_llm_engine.AsyncLLMEngine method) (vllm.engine.llm_engine.LLMEngine method) (vllm.engine.multiprocessing.client.MQLLMEngineClient method) (vllm.engine.protocol.EngineClient method) (vllm.executor.executor_base.ExecutorBase method) (vllm.executor.mp_distributed_executor.MultiprocessingDistributedExecutor method) (vllm.executor.ray_distributed_executor.RayDistributedExecutor method) (vllm.executor.uniproc_executor.UniProcExecutor method) (vllm.v1.engine.async_llm.AsyncLLM method) (vllm.v1.executor.multiproc_executor.MultiprocExecutor method) (vllm.v1.worker.gpu_worker.Worker method) (vllm.v1.worker.tpu_worker.TPUWorker method) (vllm.v1.worker.worker_base.WorkerBase method) check_health_async() (vllm.executor.executor_base.ExecutorBase method) check_is_fp8() (vllm.model_executor.layers.quantization.schema.KVCacheQuantSchema method) check_logprobs() (vllm.entrypoints.openai.protocol.ChatCompletionRequest class method) (vllm.entrypoints.openai.protocol.CompletionRequest class method) check_lora_name() (vllm.lora.models.LoRAModel method) check_machete_supports_shape() (in module vllm.model_executor.layers.quantization.utils.machete_utils) check_marlin_supported() (in module vllm.model_executor.layers.quantization.utils.marlin_utils) check_marlin_supports_layer() (in module vllm.model_executor.layers.quantization.utils.marlin_utils) check_marlin_supports_shape() (in module vllm.model_executor.layers.quantization.utils.marlin_utils) check_model_type() (vllm.model_executor.layers.quantization.schema.QuantParamSchema method) check_moe_marlin_supports_layer() (in module vllm.model_executor.layers.quantization.utils.marlin_utils) check_no_caching_or_swa_for_blockmgr_encdec() (in module vllm.core.block.utils) check_port() (vllm.utils.FlexibleArgumentParser method) check_release_file() (in module vllm.collect_env) check_stop() (in module vllm.v1.core.sched.utils) check_stop_strings() (vllm.engine.output_processor.stop_checker.StopChecker static method) check_tool_usage() (vllm.entrypoints.openai.protocol.ChatCompletionRequest class method) check_tp_ranks() (vllm.model_executor.layers.quantization.schema.KVCacheQuantSchema method) check_type_for_url() (vllm.entrypoints.openai.protocol.BatchRequestInput class method) check_use_alibi() (in module vllm.utils) check_valid_update() (vllm.v1.stats.common.RequestStatsUpdate static method) child_requests (vllm.v1.engine.parallel_sampling.ParentRequest attribute) CHOICE (vllm.model_executor.guided_decoding.outlines_decoding.GuidedDecodingMode attribute) choice (vllm.sampling_params.GuidedDecodingParams attribute) CHOICE (vllm.v1.structured_output.backend_types.StructuredOutputOptions attribute) choice_as_grammar() (in module vllm.v1.structured_output.utils) (vllm.model_executor.guided_decoding.xgrammar_decoding.GrammarConfig static method) choices (vllm.entrypoints.openai.protocol.ChatCompletionResponse attribute) (vllm.entrypoints.openai.protocol.ChatCompletionStreamResponse attribute) (vllm.entrypoints.openai.protocol.CompletionResponse attribute) (vllm.entrypoints.openai.protocol.CompletionStreamResponse attribute) (vllm.entrypoints.openai.protocol.TranscriptionStreamResponse attribute) choose_mp_linear_kernel() (in module vllm.model_executor.layers.quantization.kernels.mixed_precision) choose_scaled_mm_linear_kernel() (in module vllm.model_executor.layers.quantization.kernels.scaled_mm) chunk_indices (vllm.model_executor.layers.mamba.mamba2_metadata.Mamba2Metadata attribute) chunk_list() (in module vllm.utils) chunk_offsets (vllm.model_executor.layers.mamba.mamba2_metadata.Mamba2Metadata attribute) chunk_size (vllm.attention.backends.dual_chunk_flash_attn.DualChunkFlashAttentionMetadata attribute) (vllm.model_executor.layers.mamba.mamba2_metadata.Mamba2Metadata attribute) chunk_state_varlen() (in module vllm.model_executor.layers.mamba.ops.ssd_chunk_state) chunked_context (vllm.v1.attention.backends.mla.common.MLACommonPrefillMetadata attribute) chunked_prefill (vllm.attention.backends.torch_sdpa.TorchSDPAMetadata attribute) chunked_prefill_enabled (vllm.config.SchedulerConfig attribute) chunked_prefill_paged_decode() (in module vllm.attention.ops.chunked_prefill_paged_decode) clamp_prompt_logprobs() (in module vllm.entrypoints.openai.serving_engine) class_token (vllm.transformers_utils.configs.deepseek_vl2.VisionEncoderConfig attribute) ClassificationData (class in vllm.entrypoints.openai.protocol) ClassificationMixin (class in vllm.entrypoints.openai.serving_classification) ClassificationOutput (class in vllm.outputs) ClassificationRequest (class in vllm.entrypoints.openai.protocol) ClassificationRequestOutput (class in vllm.outputs) ClassificationResponse (class in vllm.entrypoints.openai.protocol) ClassificationServeContext (in module vllm.entrypoints.openai.serving_engine) classify() (in module vllm.entrypoints.openai.api_server) (vllm.entrypoints.llm.LLM method) ClassRegistry (class in vllm.utils) cleanup() (vllm.engine.multiprocessing.engine.MQLLMEngine method) cleanup_dist_env_and_memory() (in module vllm.distributed.parallel_state) CLEANUP_THRESHOLD (vllm.core.evictor.LRUEvictor attribute) clear() (vllm.utils.LRUCache method) (vllm.v1.utils.ConstantList method) (vllm.v1.worker.block_table.BlockTable method) clear_backend() (vllm.v1.structured_output.StructuredOutputManager method) clear_connector_metadata() (vllm.distributed.kv_transfer.kv_connector.v1.base.KVConnectorBase_V1 method) clear_copy_on_writes() (vllm.core.block.cpu_gpu_block_allocator.CpuGpuBlockAllocator method) (vllm.core.block.interfaces.BlockAllocator method) (vllm.core.block.interfaces.DeviceAwareBlockAllocator method) (vllm.core.block.naive_block.NaiveBlockAllocator method) (vllm.core.block.prefix_caching_block.PrefixCachingBlockAllocator method) clear_cows() (vllm.core.block.common.CopyOnWriteTracker method) clear_inf() (vllm.benchmarks.utils.InfEncoder method) cli_env_setup() (in module vllm.entrypoints.utils) CLIP_VIT_LARGE_PATCH14_336_CONFIG (in module vllm.model_executor.models.phi3v) CLIPAttention (class in vllm.model_executor.models.clip) CLIPEncoder (class in vllm.model_executor.models.clip) CLIPEncoderInfo (class in vllm.model_executor.models.clip) CLIPEncoderLayer (class in vllm.model_executor.models.clip) CLIPMLP (class in vllm.model_executor.models.clip) CLIPVisionEmbeddings (class in vllm.model_executor.models.clip) CLIPVisionModel (class in vllm.model_executor.models.clip) CLIPVisionTransformer (class in vllm.model_executor.models.clip) CLISubcommand (class in vllm.entrypoints.cli.types) clone() (vllm.compilation.counter.CompilationCounter method) (vllm.lora.models.LoRAModel method) (vllm.model_executor.guided_decoding.xgrammar_decoding.XGrammarLogitsProcessor method) (vllm.pooling_params.PoolingParams method) (vllm.sampling_params.SamplingParams method) (vllm.sequence.ExecuteModelRequest method) close() (vllm.distributed.device_communicators.custom_all_reduce.CustomAllreduce method) (vllm.distributed.kv_transfer.kv_connector.base.KVConnectorBase method) (vllm.distributed.kv_transfer.kv_connector.lmcache_connector.LMCacheConnector method) (vllm.distributed.kv_transfer.kv_connector.mooncake_store_connector.MooncakeStoreConnector method) (vllm.distributed.kv_transfer.kv_connector.simple_connector.SimpleConnector method) (vllm.distributed.kv_transfer.kv_connector_agent.KVTransferAgent method) (vllm.distributed.kv_transfer.kv_lookup_buffer.base.KVCacheBufferBase method) (vllm.distributed.kv_transfer.kv_lookup_buffer.mooncake_store.MooncakeStore method) (vllm.distributed.kv_transfer.kv_lookup_buffer.simple_buffer.SimpleBuffer method) (vllm.distributed.kv_transfer.kv_pipe.base.KVPipeBase method) (vllm.distributed.kv_transfer.kv_pipe.mooncake_pipe.MooncakePipe method) (vllm.distributed.kv_transfer.kv_pipe.pynccl_pipe.PyNcclPipe method) (vllm.engine.multiprocessing.client.MQLLMEngineClient method) (vllm.executor.multiproc_worker_utils.ResultHandler method) (vllm.executor.multiproc_worker_utils.WorkerMonitor method) (vllm.v1.engine.core_client.CoreEngine method) CLS (vllm.model_executor.layers.pooler.PoolingType attribute) cls_to_become (vllm.model_executor.layers.quantization.gguf.GGUFUninitializedParameter attribute) CLSPool (class in vllm.model_executor.layers.pooler) cmd() (vllm.entrypoints.cli.benchmark.base.BenchmarkSubcommandBase static method) (vllm.entrypoints.cli.benchmark.latency.BenchmarkLatencySubcommand static method) (vllm.entrypoints.cli.benchmark.main.BenchmarkSubcommand static method) (vllm.entrypoints.cli.benchmark.serve.BenchmarkServingSubcommand static method) (vllm.entrypoints.cli.benchmark.throughput.BenchmarkThroughputSubcommand static method) (vllm.entrypoints.cli.collect_env.CollectEnvSubcommand static method) (vllm.entrypoints.cli.openai.ChatCommand static method) (vllm.entrypoints.cli.openai.CompleteCommand static method) (vllm.entrypoints.cli.serve.ServeSubcommand static method) (vllm.entrypoints.cli.types.CLISubcommand static method) cmd_init() (in module vllm.entrypoints.cli.benchmark.latency) (in module vllm.entrypoints.cli.benchmark.main) (in module vllm.entrypoints.cli.benchmark.serve) (in module vllm.entrypoints.cli.benchmark.throughput) (in module vllm.entrypoints.cli.collect_env) (in module vllm.entrypoints.cli.openai) (in module vllm.entrypoints.cli.serve) CMD_MODULES (in module vllm.entrypoints.cli.main) code (vllm.entrypoints.openai.protocol.ErrorResponse attribute) code_revision (vllm.config.ModelConfig attribute) (vllm.config.SpeculativeConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) Cohere2Config (class in vllm.transformers_utils.configs.cohere2) CohereAttention (class in vllm.model_executor.models.commandr) CohereDecoderLayer (class in vllm.model_executor.models.commandr) CohereForCausalLM (class in vllm.model_executor.models.commandr) CohereMLP (class in vllm.model_executor.models.commandr) CohereModel (class in vllm.model_executor.models.commandr) collect_detailed_traces (vllm.config.ObservabilityConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) collect_from_async_generator() (in module vllm.utils) collect_model_execute_time() (vllm.config.ObservabilityConfig method) collect_model_forward_time() (vllm.config.ObservabilityConfig method) CollectEnvSubcommand (class in vllm.entrypoints.cli.collect_env) collective_rpc() (vllm.engine.async_llm_engine.AsyncLLMEngine method) (vllm.engine.llm_engine.LLMEngine method) (vllm.entrypoints.llm.LLM method) (vllm.executor.executor_base.DistributedExecutorBase method) (vllm.executor.executor_base.ExecutorBase method) (vllm.executor.uniproc_executor.UniProcExecutor method) (vllm.v1.engine.async_llm.AsyncLLM method) (vllm.v1.engine.core.EngineCore method) (vllm.v1.engine.core_client.EngineCoreClient method) (vllm.v1.engine.core_client.InprocClient method) (vllm.v1.engine.core_client.SyncMPClient method) (vllm.v1.engine.llm_engine.LLMEngine method) (vllm.v1.executor.multiproc_executor.MultiprocExecutor method) collective_rpc_async() (vllm.v1.engine.core_client.AsyncMPClient method) (vllm.v1.engine.core_client.EngineCoreClient method) ColumnParallelConv2dPatch (class in vllm.model_executor.models.mllama) ColumnParallelLinear (class in vllm.model_executor.layers.linear) ColumnParallelLinearWithLoRA (class in vllm.lora.layers) ColumnParallelLinearWithShardedLoRA (class in vllm.lora.fully_sharded_layers) combine_hidden_states() (vllm.model_executor.models.llama_eagle3.Eagle3LlamaForCausalLM method) commit() (vllm.v1.worker.block_table.BlockTable method) common_attention_args() (vllm.attention.backends.hpu_attn.HPUAttentionImpl method) common_prefix_len (vllm.v1.attention.backends.flash_attn.FlashAttentionMetadata attribute) CommonAttentionMetadata (class in vllm.v1.attention.backends.utils) CommonAttentionState (class in vllm.attention.backends.utils) CommonMetadataBuilder (class in vllm.attention.backends.utils) compilation_config (vllm.compilation.backends.VllmBackend attribute) (vllm.config.VllmConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) compilation_counter (in module vllm.compilation.counter) compilation_start_time (in module vllm.compilation.backends) compilation_time (vllm.config.CompilationConfig attribute) CompilationConfig (class in vllm.config) CompilationCounter (class in vllm.compilation.counter) CompilationLevel (class in vllm.config) compile() (vllm.compilation.backends.CompilerManager method) (vllm.compilation.compiler_interface.CompilerInterface method) (vllm.compilation.compiler_interface.EagerAdaptor method) (vllm.compilation.compiler_interface.InductorAdaptor method) (vllm.compilation.compiler_interface.InductorStandaloneAdaptor method) compile_grammar() (vllm.v1.structured_output.backend_guidance.GuidanceBackend method) (vllm.v1.structured_output.backend_types.StructuredOutputBackend method) (vllm.v1.structured_output.backend_xgrammar.XgrammarBackend method) compile_model() (in module vllm.model_executor.model_loader.neuronx_distributed) compile_or_warm_up_model() (vllm.v1.worker.gpu_worker.Worker method) (vllm.v1.worker.tpu_worker.TPUWorker method) (vllm.v1.worker.worker_base.WorkerBase method) compile_sizes (vllm.config.CompilationConfig attribute) compiled (vllm.compilation.backends.ConcreteSizeEntry attribute) compiler_manager (vllm.compilation.backends.VllmBackend attribute) CompilerInterface (class in vllm.compilation.compiler_interface) CompilerManager (class in vllm.compilation.backends) CompleteCommand (class in vllm.entrypoints.cli.openai) completed (vllm.benchmarks.serve.BenchmarkMetrics attribute) completed() (vllm.entrypoints.openai.run_batch.BatchProgressTracker method) completed_block_cache_hit_rate (vllm.core.block.common.CacheMetricData attribute) completion() (in module vllm.entrypoints.openai.api_server) completion_seq_group_output_builder() (in module vllm.worker.multi_step_model_runner) completion_stream_generator() (vllm.entrypoints.openai.serving_completion.OpenAIServingCompletion method) completion_tokens (vllm.entrypoints.openai.protocol.UsageInfo attribute) CompletionLikeRequest (in module vllm.entrypoints.openai.serving_engine) CompletionLogProbs (class in vllm.entrypoints.openai.protocol) CompletionOutput (class in vllm.outputs) CompletionRequest (class in vllm.entrypoints.openai.protocol) CompletionResponse (class in vllm.entrypoints.openai.protocol) CompletionResponseChoice (class in vllm.entrypoints.openai.protocol) CompletionResponseStreamChoice (class in vllm.entrypoints.openai.protocol) CompletionSequenceGroupOutput (class in vllm.sequence) CompletionStreamResponse (class in vllm.entrypoints.openai.protocol) composed_weight_loader() (in module vllm.model_executor.model_loader.weight_utils) compress_quantized_24_weight() (in module vllm.model_executor.layers.quantization.utils.marlin_utils_test_24) CompressedTensors24 (class in vllm.model_executor.layers.quantization.compressed_tensors.schemes.compressed_tensors_24) CompressedTensorsConfig (class in vllm.model_executor.layers.quantization.compressed_tensors.compressed_tensors) CompressedTensorsKVCacheMethod (class in vllm.model_executor.layers.quantization.compressed_tensors.compressed_tensors) CompressedTensorsLinearMethod (class in vllm.model_executor.layers.quantization.compressed_tensors.compressed_tensors) CompressedTensorsMoEMethod (class in vllm.model_executor.layers.quantization.compressed_tensors.compressed_tensors_moe) CompressedTensorsScheme (class in vllm.model_executor.layers.quantization.compressed_tensors.schemes.compressed_tensors_scheme) CompressedTensorsW4A16Fp4 (class in vllm.model_executor.layers.quantization.compressed_tensors.schemes.compressed_tensors_w4a16_nvfp4) CompressedTensorsW4A16Sparse24 (class in vllm.model_executor.layers.quantization.compressed_tensors.schemes.compressed_tensors_w4a16_24) CompressedTensorsW8A16Fp8 (class in vllm.model_executor.layers.quantization.compressed_tensors.schemes.compressed_tensors_w8a16_fp8) CompressedTensorsW8A8Fp8 (class in vllm.model_executor.layers.quantization.compressed_tensors.schemes.compressed_tensors_w8a8_fp8) CompressedTensorsW8A8Fp8MoECutlassMethod (class in vllm.model_executor.layers.quantization.compressed_tensors.compressed_tensors_moe) CompressedTensorsW8A8Fp8MoEMethod (class in vllm.model_executor.layers.quantization.compressed_tensors.compressed_tensors_moe) CompressedTensorsW8A8Int8 (class in vllm.model_executor.layers.quantization.compressed_tensors.schemes.compressed_tensors_w8a8_int8) CompressedTensorsW8A8Int8MoEMethod (class in vllm.model_executor.layers.quantization.compressed_tensors.compressed_tensors_moe) CompressedTensorsWNA16 (class in vllm.model_executor.layers.quantization.compressed_tensors.schemes.compressed_tensors_wNa16) CompressedTensorsWNA16MarlinMoEMethod (class in vllm.model_executor.layers.quantization.compressed_tensors.compressed_tensors_moe) CompressedTensorsWNA16MoEMethod (class in vllm.model_executor.layers.quantization.compressed_tensors.compressed_tensors_moe) compression_ratio (vllm.entrypoints.openai.protocol.TranscriptionSegment attribute) compute_alibi_block() (in module vllm.attention.ops.triton_flash_attention) compute_alibi_tensor() (in module vllm.attention.ops.triton_flash_attention) compute_attn_mask_seqlen() (vllm.model_executor.models.qwen2_5_vl.Qwen2_5_VisionTransformer method) (vllm.model_executor.models.qwen2_vl.Qwen2VisionTransformer method) compute_encoder_budget() (in module vllm.v1.core.encoder_cache_manager) compute_hash() (in module vllm.envs) (vllm.compilation.backends.CompilerManager method) (vllm.compilation.compiler_interface.CompilerInterface method) (vllm.compilation.compiler_interface.InductorAdaptor method) (vllm.compilation.compiler_interface.InductorStandaloneAdaptor method) (vllm.config.CacheConfig method) (vllm.config.CompilationConfig method) (vllm.config.DecodingConfig method) (vllm.config.DeviceConfig method) (vllm.config.KVTransferConfig method) (vllm.config.LoadConfig method) (vllm.config.LoRAConfig method) (vllm.config.ModelConfig method) (vllm.config.MultiModalConfig method) (vllm.config.ObservabilityConfig method) (vllm.config.ParallelConfig method) (vllm.config.PoolerConfig method) (vllm.config.PromptAdapterConfig method) (vllm.config.SchedulerConfig method) (vllm.config.SpeculativeConfig method) (vllm.config.SupportsHash method) (vllm.config.VllmConfig method) compute_lens_change() (vllm.model_executor.models.phi4mm_audio.TransformerEncoderBase method) compute_logits() (vllm.model_executor.model_loader.neuron.NeuronCausalLM method) (vllm.model_executor.model_loader.neuronx_distributed.NeuronCausalLM method) (vllm.model_executor.model_loader.neuronx_distributed.NeuronMllamaForCausalLM method) (vllm.model_executor.models.arctic.ArcticForCausalLM method) (vllm.model_executor.models.aria.AriaForConditionalGeneration method) (vllm.model_executor.models.aya_vision.AyaVisionForConditionalGeneration method) (vllm.model_executor.models.baichuan.BaiChuanBaseForCausalLM method) (vllm.model_executor.models.bamba.BambaForCausalLM method) (vllm.model_executor.models.bart.BartForConditionalGeneration method) (vllm.model_executor.models.blip2.Blip2ForConditionalGeneration method) (vllm.model_executor.models.bloom.BloomForCausalLM method) (vllm.model_executor.models.chameleon.ChameleonForConditionalGeneration method) (vllm.model_executor.models.chatglm.ChatGLMBaseModel method) (vllm.model_executor.models.commandr.CohereForCausalLM method) (vllm.model_executor.models.dbrx.DbrxForCausalLM method) (vllm.model_executor.models.deepseek.DeepseekForCausalLM method) (vllm.model_executor.models.deepseek_mtp.DeepSeekMTP method) (vllm.model_executor.models.deepseek_mtp.DeepSeekMultiTokenPredictor method) (vllm.model_executor.models.deepseek_v2.DeepseekV2ForCausalLM method) (vllm.model_executor.models.deepseek_vl2.DeepseekVLV2ForCausalLM method) (vllm.model_executor.models.eagle.EAGLE method) (vllm.model_executor.models.exaone.ExaoneForCausalLM method) (vllm.model_executor.models.falcon.FalconForCausalLM method) (vllm.model_executor.models.florence2.Florence2ForConditionalGeneration method) (vllm.model_executor.models.florence2.Florence2LanguageForConditionalGeneration method) (vllm.model_executor.models.fuyu.FuyuForCausalLM method) (vllm.model_executor.models.gemma.GemmaForCausalLM method) (vllm.model_executor.models.gemma2.Gemma2ForCausalLM method) (vllm.model_executor.models.gemma3.Gemma3ForCausalLM method) (vllm.model_executor.models.gemma3_mm.Gemma3ForConditionalGeneration method) (vllm.model_executor.models.glm4.Glm4ForCausalLM method) (vllm.model_executor.models.gpt2.GPT2LMHeadModel method) (vllm.model_executor.models.gpt_bigcode.GPTBigCodeForCausalLM method) (vllm.model_executor.models.gpt_j.GPTJForCausalLM method) (vllm.model_executor.models.gpt_neox.GPTNeoXForCausalLM method) (vllm.model_executor.models.granite.GraniteForCausalLM method) (vllm.model_executor.models.granite_speech.GraniteSpeechForConditionalGeneration method) (vllm.model_executor.models.granitemoe.GraniteMoeForCausalLM method) (vllm.model_executor.models.granitemoehybrid.GraniteMoeHybridForCausalLM method) (vllm.model_executor.models.granitemoeshared.GraniteMoeSharedForCausalLM method) (vllm.model_executor.models.grok1.Grok1ForCausalLM method) (vllm.model_executor.models.idefics3.Idefics3ForConditionalGeneration method) (vllm.model_executor.models.interfaces_base.VllmModelForTextGeneration method) (vllm.model_executor.models.internlm2.InternLM2ForCausalLM method) (vllm.model_executor.models.internvl.InternVLChatModel method) (vllm.model_executor.models.jais.JAISLMHeadModel method) (vllm.model_executor.models.jamba.JambaForCausalLM method) (vllm.model_executor.models.kimi_vl.KimiVLForConditionalGeneration method) (vllm.model_executor.models.llama.LlamaForCausalLM method) (vllm.model_executor.models.llama_eagle3.Eagle3LlamaForCausalLM method) (vllm.model_executor.models.llava.LlavaForConditionalGeneration method) (vllm.model_executor.models.llava_next.LlavaNextForConditionalGeneration method) (vllm.model_executor.models.llava_next_video.LlavaNextVideoForConditionalGeneration method) (vllm.model_executor.models.llava_onevision.LlavaOnevisionForConditionalGeneration method) (vllm.model_executor.models.mamba.MambaForCausalLM method) (vllm.model_executor.models.mamba2.Mamba2ForCausalLM method) (vllm.model_executor.models.medusa.Medusa method) (vllm.model_executor.models.mimo.MiMoForCausalLM method) (vllm.model_executor.models.mimo_mtp.MiMoMTP method) (vllm.model_executor.models.mimo_mtp.MiMoMultiTokenPredictor method) (vllm.model_executor.models.minicpm.MiniCPMForCausalLM method) (vllm.model_executor.models.minicpmv.MiniCPMVBaseModel method) (vllm.model_executor.models.minimax_text_01.MiniMaxText01ForCausalLM method) (vllm.model_executor.models.minimax_vl_01.MiniMaxVL01ForConditionalGeneration method) (vllm.model_executor.models.mistral3.Mistral3ForConditionalGeneration method) (vllm.model_executor.models.mixtral.MixtralForCausalLM method) (vllm.model_executor.models.mixtral_quant.MixtralForCausalLM method) (vllm.model_executor.models.mllama.MllamaForConditionalGeneration method) (vllm.model_executor.models.mllama4.Llama4ForConditionalGeneration method) (vllm.model_executor.models.molmo.MolmoForCausalLM method) (vllm.model_executor.models.mpt.MPTForCausalLM method) (vllm.model_executor.models.nemotron.NemotronForCausalLM method) (vllm.model_executor.models.nemotron_nas.DeciLMForCausalLM method) (vllm.model_executor.models.olmo.OlmoForCausalLM method) (vllm.model_executor.models.olmo2.Olmo2ForCausalLM method) (vllm.model_executor.models.olmoe.OlmoeForCausalLM method) (vllm.model_executor.models.opt.OPTForCausalLM method) (vllm.model_executor.models.orion.OrionForCausalLM method) (vllm.model_executor.models.ovis.Ovis method) (vllm.model_executor.models.paligemma.PaliGemmaForConditionalGeneration method) (vllm.model_executor.models.persimmon.PersimmonForCausalLM method) (vllm.model_executor.models.phi.PhiForCausalLM method) (vllm.model_executor.models.phi3_small.Phi3SmallForCausalLM method) (vllm.model_executor.models.phi3v.Phi3VForCausalLM method) (vllm.model_executor.models.phi4mm.Phi4MMForCausalLM method) (vllm.model_executor.models.phimoe.PhiMoEForCausalLM method) (vllm.model_executor.models.pixtral.PixtralForConditionalGeneration method) (vllm.model_executor.models.plamo2.Plamo2ForCausalLM method) (vllm.model_executor.models.qwen.QWenBaseModel method) (vllm.model_executor.models.qwen2.Qwen2ForCausalLM method) (vllm.model_executor.models.qwen2_5_omni_thinker.Qwen2_5OmniThinkerForConditionalGeneration method) (vllm.model_executor.models.qwen2_5_vl.Qwen2_5_VLForConditionalGeneration method) (vllm.model_executor.models.qwen2_audio.Qwen2AudioForConditionalGeneration method) (vllm.model_executor.models.qwen2_moe.Qwen2MoeForCausalLM method) (vllm.model_executor.models.qwen2_vl.Qwen2VLForConditionalGeneration method) (vllm.model_executor.models.qwen3.Qwen3ForCausalLM method) (vllm.model_executor.models.qwen3_moe.Qwen3MoeForCausalLM method) (vllm.model_executor.models.skyworkr1v.SkyworkR1VChatModel method) (vllm.model_executor.models.solar.SolarForCausalLM method) (vllm.model_executor.models.stablelm.StablelmForCausalLM method) (vllm.model_executor.models.starcoder2.Starcoder2ForCausalLM method) (vllm.model_executor.models.transformers.TransformersForCausalLM method) (vllm.model_executor.models.ultravox.UltravoxModel method) (vllm.model_executor.models.whisper.WhisperForConditionalGeneration method) (vllm.model_executor.models.zamba2.Zamba2ForCausalLM method) (vllm.v1.worker.tpu_model_runner.TPUModelRunner method) (vllm.worker.hpu_model_runner.HpuModelAdapter method) compute_logprobs() (vllm.v1.sample.sampler.Sampler method) (vllm.v1.sample.tpu.sampler.Sampler method) compute_meta() (in module vllm.lora.punica_wrapper.utils) compute_probs() (in module vllm.v1.sample.rejection_sampler) compute_probs_and_sample_next_token() (in module vllm.v1.spec_decode.eagle) compute_slot_mapping() (in module vllm.attention.backends.utils) compute_slot_mapping_start_idx() (in module vllm.attention.backends.utils) computed (vllm.core.block.cpu_gpu_block_allocator.NullBlock property) (vllm.core.block.interfaces.Block property) (vllm.core.block.naive_block.NaiveBlock property) (vllm.core.block.prefix_caching_block.PrefixCachingBlock property) computed_block_nums (vllm.sequence.SequenceGroupMetadata attribute) (vllm.sequence.SequenceGroupMetadataDelta attribute) ComputedBlocksTracker (class in vllm.core.block.prefix_caching_block) concat_and_cache_mla() (in module vllm._custom_ops) ConcreteSizeEntry (class in vllm.compilation.backends) condense() (vllm.v1.worker.gpu_input_batch.InputBatch method) config (vllm.model_executor.guided_decoding.xgrammar_decoding.XGrammarLogitsProcessor attribute) config() (in module vllm.config) config_class (vllm.model_executor.models.blip.BlipVisionModel attribute) (vllm.model_executor.models.clip.CLIPVisionModel attribute) (vllm.model_executor.models.mllama.MllamaForCausalLM attribute) (vllm.model_executor.models.mllama.MllamaTextModel attribute) (vllm.model_executor.models.moonvit.MoonVitPretrainedModel attribute) (vllm.model_executor.models.siglip.SiglipVisionModel attribute) config_format (vllm.config.ModelConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) config_home (vllm.executor.ray_distributed_executor.RayDistributedExecutor attribute) config_verify() (vllm.model_executor.models.bert_with_rope.BertWithRope method) (vllm.model_executor.models.bert_with_rope.GteModel method) (vllm.model_executor.models.bert_with_rope.JinaRobertaModel method) (vllm.model_executor.models.bert_with_rope.NomicBertModel method) ConfigFormat (class in vllm.transformers_utils.config) ConfigT (in module vllm.config) configure() (vllm.compilation.pass_manager.PostGradPassManager method) configure_bitblas_matmul() (vllm.model_executor.layers.quantization.kernels.mixed_precision.bitblas.BitBLASLinearKernel method) configure_post_pass() (vllm.compilation.backends.VllmBackend method) configure_quant_config() (in module vllm.model_executor.model_loader.utils) ConformerEncoder (class in vllm.model_executor.models.phi4mm_audio) ConformerEncoderLayer (class in vllm.model_executor.models.phi4mm_audio) connector (vllm.model_executor.models.module_mapping.MultiModelKeys attribute) ConstantList (class in vllm.v1.utils) ConstantSizeCache (class in vllm.model_executor.models.constant_size_cache) construct_image_indicators() (vllm.transformers_utils.processors.ovis.OvisProcessor method) construct_image_placeholders() (vllm.transformers_utils.processors.ovis.OvisProcessor method) consume_space() (in module vllm.entrypoints.openai.tool_parsers.utils) consumer() (in module vllm.distributed.device_communicators.custom_all_reduce_utils) contains() (vllm.utils.ClassRegistry method) contains_object_print() (in module vllm.config) contains_trace_headers() (in module vllm.tracing) contains_type() (in module vllm.engine.arg_utils) content (vllm.entrypoints.chat_utils.ConversationMessage attribute) (vllm.entrypoints.chat_utils.CustomChatCompletionMessageParam attribute) (vllm.entrypoints.openai.protocol.ChatCompletionLogProbs attribute) (vllm.entrypoints.openai.protocol.ChatMessage attribute) (vllm.entrypoints.openai.protocol.DeltaMessage attribute) (vllm.entrypoints.openai.protocol.ExtractedToolCallInformation attribute) (vllm.inputs.parse.ParsedEmbedsPrompt attribute) (vllm.inputs.parse.ParsedStrPrompt attribute) (vllm.inputs.parse.ParsedText attribute) (vllm.inputs.parse.ParsedTextPrompt attribute) (vllm.inputs.parse.ParsedTokens attribute) (vllm.inputs.parse.ParsedTokensPrompt attribute) (vllm.multimodal.processing.BoundPromptUpdate property) (vllm.multimodal.processing.PromptInsertion property) (vllm.multimodal.processing.PromptReplacement property) (vllm.multimodal.processing.PromptUpdate property) content_hash (vllm.core.block.cpu_gpu_block_allocator.NullBlock property) (vllm.core.block.interfaces.Block property) (vllm.core.block.naive_block.NaiveBlock property) (vllm.core.block.prefix_caching_block.PrefixCachingBlock property) context_attention_fwd() (in module vllm.attention.ops.prefix_prefill) context_chunk_cu_seq_lens (vllm.attention.backends.mla.common.MLACommonMetadata attribute) context_chunk_max_seq_lens (vllm.attention.backends.mla.common.MLACommonMetadata attribute) context_chunk_seq_tot (vllm.attention.backends.mla.common.MLACommonMetadata attribute) context_chunk_starts (vllm.attention.backends.mla.common.MLACommonMetadata attribute) context_chunk_workspace (vllm.attention.backends.mla.common.MLACommonMetadata attribute) context_length (vllm.lora.peft_helper.PEFTHelper attribute) context_lens (vllm.attention.backends.pallas.PallasMetadata attribute) (vllm.v1.attention.backends.pallas.PallasMetadata attribute) context_lens_tensor (vllm.attention.backends.blocksparse_attn.BlocksparseFlashAttentionMetadata attribute) (vllm.attention.backends.flash_attn.FlashAttentionMetadata attribute) (vllm.attention.backends.hpu_attn.HPUAttentionMetadata attribute) (vllm.attention.backends.mla.common.MLACommonMetadata attribute) (vllm.attention.backends.placeholder_attn.PlaceholderAttentionMetadata attribute) (vllm.attention.backends.rocm_flash_attn.ROCmFlashAttentionMetadata attribute) (vllm.attention.backends.xformers.XFormersMetadata attribute) context_manager (in module vllm.compilation.monitor) continue_final_message (vllm.entrypoints.openai.protocol.ChatCompletionRequest attribute) (vllm.entrypoints.openai.protocol.TokenizeChatRequest attribute) continuous_usage_stats (vllm.entrypoints.openai.protocol.StreamOptions attribute) conv_split_by_batch() (vllm.model_executor.models.phi4mm_utils.NemoConvSubsampling method) conv_split_by_channel() (vllm.model_executor.models.phi4mm_utils.NemoConvSubsampling method) conv_state (vllm.model_executor.models.mamba_cache.MambaCacheParams attribute) ConvEmbed (class in vllm.model_executor.models.florence2) ConversationDataset (class in vllm.benchmarks.datasets) ConversationMessage (class in vllm.entrypoints.chat_utils) convert_bin_to_safetensor_file() (in module vllm.model_executor.model_loader.weight_utils) convert_dense_cross_attention_mask_to_tensor() (in module vllm.model_executor.models.mllama) convert_fp8() (in module vllm._custom_ops) convert_ids_list_to_tokens() (in module vllm.transformers_utils.detokenizer_utils) convert_ids_to_tokens() (vllm.transformers_utils.tokenizer_base.TokenizerBase method) (vllm.transformers_utils.tokenizers.mistral.MistralTokenizer method) convert_img2bpe() (vllm.model_executor.models.chameleon.ChameleonImageVocabularyMapping method) convert_lark_to_ebnf() (in module vllm.v1.structured_output.utils) convert_lark_to_gbnf() (in module vllm.model_executor.guided_decoding.utils) convert_mapping() (in module vllm.lora.punica_wrapper.utils) (in module vllm.prompt_adapter.models) convert_prompt_ids_to_tokens() (in module vllm.transformers_utils.detokenizer_utils) convert_pyslice_to_tensor() (in module vllm.model_executor.model_loader.weight_utils) convert_sparse_cross_attention_mask_to_dense() (in module vllm.model_executor.models.mllama) convert_stats_to_dict() (vllm.profiler.layerwise_profile.LayerwiseProfileResults method) convert_swizzled_to_linear() (in module vllm.model_executor.layers.quantization.utils.nvfp4_emulation_utils) convert_to_channelwise() (in module vllm.model_executor.layers.quantization.utils.w8a8_utils) convert_to_embedding_indices() (in module vllm.prompt_adapter.models) convert_to_pytorch_benchmark_format() (in module vllm.benchmarks.utils) convert_tokens_to_string() (vllm.transformers_utils.tokenizer_base.TokenizerBase method) (vllm.transformers_utils.tokenizers.mistral.MistralTokenizer method) convert_vertical_slash_indexes() (in module vllm._custom_ops) convert_vertical_slash_indexes_mergehead() (in module vllm._custom_ops) ConvModule (class in vllm.model_executor.models.phi4mm_utils) copy() (vllm.worker.cache_engine.CacheEngine method) (vllm.worker.cpu_worker.CPUCacheEngine method) copy_blocks() (in module vllm._custom_ops) (vllm._ipex_ops.ipex_ops static method) (vllm.attention.backends.abstract.AttentionBackend static method) (vllm.attention.backends.blocksparse_attn.BlocksparseFlashAttentionBackend static method) (vllm.attention.backends.cpu_mla.CPUMLABackend static method) (vllm.attention.backends.flash_attn.FlashAttentionBackend static method) (vllm.attention.backends.flashinfer.FlashInferBackend static method) (vllm.attention.backends.hpu_attn.HPUAttentionBackend static method) (vllm.attention.backends.ipex_attn.IpexAttnBackend static method) (vllm.attention.backends.mla.common.MLACommonBackend static method) (vllm.attention.backends.pallas.PallasAttentionBackend static method) (vllm.attention.backends.placeholder_attn.PlaceholderAttentionBackend static method) (vllm.attention.backends.rocm_flash_attn.ROCmFlashAttentionBackend static method) (vllm.attention.backends.torch_sdpa.TorchSDPABackend static method) (vllm.attention.backends.xformers.XFormersBackend static method) (vllm.attention.ops.hpu_paged_attn.HPUPagedAttention static method) (vllm.attention.ops.paged_attn.PagedAttention static method) copy_blocks_mla() (in module vllm._custom_ops) copy_inputs_before_cuda_graphs() (vllm.model_executor.models.bamba.BambaForCausalLM method) (vllm.model_executor.models.constant_size_cache.ConstantSizeCache method) (vllm.model_executor.models.granitemoehybrid.GraniteMoeHybridForCausalLM method) (vllm.model_executor.models.jamba.JambaForCausalLM method) (vllm.model_executor.models.mamba.MambaForCausalLM method) (vllm.model_executor.models.mamba2.Mamba2ForCausalLM method) (vllm.model_executor.models.minimax_text_01.MiniMaxText01ForCausalLM method) (vllm.model_executor.models.plamo2.Plamo2ForCausalLM method) (vllm.model_executor.models.zamba2.Zamba2ForCausalLM method) copy_slice() (in module vllm.v1.utils) CopyOnWriteTracker (class in vllm.core.block.common) core_engines (vllm.v1.engine.core_client.BackgroundResources attribute) CoreEngine (class in vllm.v1.engine.core_client) count (vllm.entrypoints.openai.protocol.TokenizeResponse attribute) Counter (class in vllm.utils) counter_after_loading_weights (vllm.model_executor.model_loader.default_loader.DefaultModelLoader attribute) counter_before_loading_weights (vllm.model_executor.model_loader.default_loader.DefaultModelLoader attribute) cow_block_if_not_appendable() (vllm.core.block.interfaces.BlockAllocator method) (vllm.core.block.naive_block.NaiveBlockAllocator method) (vllm.core.block.prefix_caching_block.PrefixCachingBlockAllocator method) cprofile() (in module vllm.utils) cprofile_context() (in module vllm.utils) CPU (vllm.platforms.interface.PlatformEnum attribute) (vllm.utils.Device attribute) cpu_backup_tensor (vllm.device_allocator.cumem.AllocationData attribute) cpu_cache_usage_sys (vllm.engine.metrics_types.Stats attribute) CPU_DEVICE (in module vllm.model_executor.models.minicpmo) cpu_group (vllm.distributed.parallel_state.GroupCoordinator attribute) cpu_offload_gb (vllm.config.CacheConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) cpu_platform_plugin() (in module vllm.platforms) cpu_prefix_cache_hit_rate (vllm.engine.metrics_types.Stats attribute) cpu_time_us (vllm.profiler.layerwise_profile.ModelStatsEntry attribute) CpuArchEnum (class in vllm.platforms.interface) CPUCacheEngine (class in vllm.worker.cpu_worker) CpuCommunicator (class in vllm.distributed.device_communicators.cpu_communicator) CPUEncoderDecoderModelRunner (class in vllm.worker.cpu_enc_dec_model_runner) CpuGpuBlockAllocator (class in vllm.core.block.cpu_gpu_block_allocator) CPUMLABackend (class in vllm.attention.backends.cpu_mla) CPUMLAImpl (class in vllm.attention.backends.cpu_mla) CPUMLAMetadata (class in vllm.attention.backends.cpu_mla) CPUMLAMetadataBuilder (class in vllm.attention.backends.cpu_mla) CPUModelRunner (class in vllm.worker.cpu_model_runner) CPUModelRunnerBase (class in vllm.worker.cpu_model_runner) CpuPlatform (class in vllm.platforms.cpu) CPUPoolingModelRunner (class in vllm.worker.cpu_pooling_model_runner) CPUWorker (class in vllm.worker.cpu_worker) create() (vllm.core.block.cpu_gpu_block_allocator.CpuGpuBlockAllocator static method) (vllm.distributed.kv_events.EventPublisherFactory class method) (vllm.distributed.utils.StatelessProcessGroup static method) (vllm.outputs.RequestOutputFactory static method) create_and_map() (in module vllm.device_allocator.cumem) create_attention_instances() (vllm.model_executor.models.transformers.TransformersModel method) create_chat_completion() (in module vllm.entrypoints.openai.api_server) (vllm.entrypoints.openai.serving_chat.OpenAIServingChat method) create_classify() (in module vllm.entrypoints.openai.api_server) (vllm.entrypoints.openai.serving_classification.ServingClassification method) create_completion() (in module vllm.entrypoints.openai.api_server) (vllm.entrypoints.openai.serving_completion.OpenAIServingCompletion method) create_connector_v0() (vllm.distributed.kv_transfer.kv_connector.factory.KVConnectorFactory class method) create_connector_v1() (vllm.distributed.kv_transfer.kv_connector.factory.KVConnectorFactory class method) create_decoder_prompt() (vllm.model_executor.models.florence2.Florence2MultiModalProcessor method) (vllm.multimodal.processing.EncDecMultiModalProcessor method) create_detokenize() (vllm.entrypoints.openai.serving_tokenization.OpenAIServingTokenization method) create_draft_parallel_config() (vllm.config.SpeculativeConfig static method) create_dummy_lora() (vllm.lora.models.LoRAModelManager method) create_dummy_lora_weights() (vllm.lora.lora.LoRALayerWeights class method) create_dummy_seq_group_metadata() (vllm.worker.hpu_model_runner.HPUModelRunnerBase method) create_embedding() (in module vllm.entrypoints.openai.api_server) (vllm.entrypoints.openai.serving_embedding.OpenAIServingEmbedding method) create_empty() (vllm.core.scheduler.SchedulerPrefillOutputs class method) (vllm.core.scheduler.SchedulerRunningOutputs class method) (vllm.core.scheduler.SchedulerSwappedInOutputs class method) (vllm.v1.core.kv_cache_manager.KVCacheBlocks class method) create_encoder_prompt() (vllm.model_executor.models.florence2.Florence2MultiModalProcessor method) (vllm.model_executor.models.mllama.MllamaMultiModalProcessor method) (vllm.model_executor.models.whisper.WhisperMultiModalProcessor method) (vllm.multimodal.processing.EncDecMultiModalProcessor method) create_engine_config() (vllm.engine.arg_utils.EngineArgs method) create_error_response() (in module vllm.entrypoints.openai.serving_models) (vllm.entrypoints.openai.serving_engine.OpenAIServing method) create_from_handle() (vllm.distributed.device_communicators.shm_broadcast.MessageQueue static method) create_from_process_group() (vllm.distributed.device_communicators.shm_broadcast.MessageQueue static method) create_input_mapper() (vllm.multimodal.registry.MultiModalRegistry method) create_kv_cache_group_specs() (in module vllm.v1.core.kv_cache_utils) create_kv_caches_with_random() (in module vllm.utils) create_kv_caches_with_random_flash() (in module vllm.utils) create_load_config() (vllm.engine.arg_utils.EngineArgs method) create_logprobs_output() (in module vllm.spec_decode.util) create_lora_manager() (in module vllm.lora.models) (vllm.lora.worker_manager.LRUCacheWorkerLoRAManager method) (vllm.lora.worker_manager.WorkerLoRAManager method) create_lora_mask() (vllm.worker.hpu_model_runner.HPUModelRunner method) create_lora_weights() (vllm.lora.layers.BaseLayerWithLoRA method) (vllm.lora.layers.BaseLinearLayerWithLoRA method) (vllm.lora.layers.LinearScalingRotaryEmbeddingWithLoRA method) (vllm.lora.layers.LogitsProcessorWithLoRA method) (vllm.lora.layers.MergedColumnParallelLinearWithLoRA method) (vllm.lora.layers.MergedQKVParallelLinearWithLoRA method) (vllm.lora.layers.VocabParallelEmbeddingWithLoRA method) create_model_config() (vllm.engine.arg_utils.EngineArgs method) create_output_by_sequence_group() (in module vllm.engine.output_processor.util) create_output_processor() (vllm.engine.output_processor.interfaces.SequenceGroupOutputProcessor static method) create_parser() (vllm.entrypoints.chat_utils.AsyncMultiModalItemTracker method) (vllm.entrypoints.chat_utils.BaseMultiModalItemTracker method) (vllm.entrypoints.chat_utils.MultiModalItemTracker method) create_parser_for_docs() (in module vllm.entrypoints.openai.cli_args) create_pooling() (in module vllm.entrypoints.openai.api_server) (vllm.entrypoints.openai.serving_pooling.OpenAIServingPooling method) create_position_ids_from_input_ids() (in module vllm.model_executor.models.roberta) create_processor() (vllm.multimodal.registry.MultiModalRegistry method) create_prompt_adapter_manager() (in module vllm.prompt_adapter.models) (vllm.prompt_adapter.worker_manager.LRUCacheWorkerPromptAdapterManager method) (vllm.prompt_adapter.worker_manager.WorkerPromptAdapterManager method) create_prompt_adapter_weights() (vllm.prompt_adapter.layers.VocabParallelEmbeddingWithPromptAdapter method) create_score() (in module vllm.entrypoints.openai.api_server) (vllm.entrypoints.openai.serving_score.ServingScores method) create_score_v1() (in module vllm.entrypoints.openai.api_server) create_sequence_group_output() (in module vllm.spec_decode.util) create_server_socket() (in module vllm.entrypoints.openai.api_server) create_shared_buffer() (vllm.distributed.device_communicators.custom_all_reduce.CustomAllreduce static method) create_sort_beams_key_function() (in module vllm.beam_search) create_spec_worker() (in module vllm.spec_decode.spec_decode_worker) create_speculative_config() (vllm.engine.arg_utils.EngineArgs method) create_streaming_error_response() (vllm.entrypoints.openai.serving_engine.OpenAIServing method) create_tokenize() (vllm.entrypoints.openai.serving_tokenization.OpenAIServingTokenization method) create_trace_span() (vllm.engine.llm_engine.LLMEngine method) create_transcription() (vllm.entrypoints.openai.serving_transcription.OpenAIServingTranscription method) create_transcriptions() (in module vllm.entrypoints.openai.api_server) create_weights() (vllm.model_executor.layers.fused_moe.layer.FusedMoEMethodBase method) (vllm.model_executor.layers.fused_moe.layer.UnquantizedFusedMoEMethod method) (vllm.model_executor.layers.linear.LinearMethodBase method) (vllm.model_executor.layers.linear.UnquantizedLinearMethod method) (vllm.model_executor.layers.quantization.aqlm.AQLMLinearMethod method) (vllm.model_executor.layers.quantization.awq.AWQLinearMethod method) (vllm.model_executor.layers.quantization.awq_marlin.AWQMarlinLinearMethod method) (vllm.model_executor.layers.quantization.awq_marlin.AWQMoEMethod method) (vllm.model_executor.layers.quantization.base_config.QuantizeMethodBase method) (vllm.model_executor.layers.quantization.bitblas.BitBLASLinearMethod method) (vllm.model_executor.layers.quantization.bitsandbytes.BitsAndBytesLinearMethod method) (vllm.model_executor.layers.quantization.compressed_tensors.compressed_tensors.CompressedTensorsLinearMethod method) (vllm.model_executor.layers.quantization.compressed_tensors.compressed_tensors_moe.CompressedTensorsW8A8Fp8MoECutlassMethod method) (vllm.model_executor.layers.quantization.compressed_tensors.compressed_tensors_moe.CompressedTensorsW8A8Fp8MoEMethod method) (vllm.model_executor.layers.quantization.compressed_tensors.compressed_tensors_moe.CompressedTensorsW8A8Int8MoEMethod method) (vllm.model_executor.layers.quantization.compressed_tensors.compressed_tensors_moe.CompressedTensorsWNA16MarlinMoEMethod method) (vllm.model_executor.layers.quantization.compressed_tensors.compressed_tensors_moe.CompressedTensorsWNA16MoEMethod method) (vllm.model_executor.layers.quantization.compressed_tensors.schemes.compressed_tensors_24.CompressedTensors24 method) (vllm.model_executor.layers.quantization.compressed_tensors.schemes.compressed_tensors_scheme.CompressedTensorsScheme method) (vllm.model_executor.layers.quantization.compressed_tensors.schemes.compressed_tensors_w4a16_24.CompressedTensorsW4A16Sparse24 method) (vllm.model_executor.layers.quantization.compressed_tensors.schemes.compressed_tensors_w4a16_nvfp4.CompressedTensorsW4A16Fp4 method) (vllm.model_executor.layers.quantization.compressed_tensors.schemes.compressed_tensors_w8a16_fp8.CompressedTensorsW8A16Fp8 method) (vllm.model_executor.layers.quantization.compressed_tensors.schemes.compressed_tensors_w8a8_fp8.CompressedTensorsW8A8Fp8 method) (vllm.model_executor.layers.quantization.compressed_tensors.schemes.compressed_tensors_w8a8_int8.CompressedTensorsW8A8Int8 method) (vllm.model_executor.layers.quantization.compressed_tensors.schemes.compressed_tensors_wNa16.CompressedTensorsWNA16 method) (vllm.model_executor.layers.quantization.deepspeedfp.DeepSpeedFPLinearMethod method) (vllm.model_executor.layers.quantization.experts_int8.ExpertsInt8MoEMethod method) (vllm.model_executor.layers.quantization.fbgemm_fp8.FBGEMMFp8LinearMethod method) (vllm.model_executor.layers.quantization.fp8.Fp8LinearMethod method) (vllm.model_executor.layers.quantization.fp8.Fp8MoEMethod method) (vllm.model_executor.layers.quantization.gguf.GGUFLinearMethod method) (vllm.model_executor.layers.quantization.gguf.GGUFMoEMethod method) (vllm.model_executor.layers.quantization.gptq.GPTQLinearMethod method) (vllm.model_executor.layers.quantization.gptq_bitblas.GPTQBitBLASLinearMethod method) (vllm.model_executor.layers.quantization.gptq_marlin.GPTQMarlinLinearMethod method) (vllm.model_executor.layers.quantization.gptq_marlin.GPTQMarlinMoEMethod method) (vllm.model_executor.layers.quantization.gptq_marlin_24.GPTQMarlin24LinearMethod method) (vllm.model_executor.layers.quantization.hqq_marlin.HQQMarlinMethod method) (vllm.model_executor.layers.quantization.kv_cache.BaseKVCacheMethod method) (vllm.model_executor.layers.quantization.marlin.MarlinLinearMethod method) (vllm.model_executor.layers.quantization.modelopt.ModelOptFp8LinearMethod method) (vllm.model_executor.layers.quantization.modelopt.ModelOptNvFp4FusedMoE method) (vllm.model_executor.layers.quantization.modelopt.ModelOptNvFp4LinearMethod method) (vllm.model_executor.layers.quantization.moe_wna16.MoeWNA16Method method) (vllm.model_executor.layers.quantization.qqq.QQQLinearMethod method) (vllm.model_executor.layers.quantization.quark.quark.QuarkLinearMethod method) (vllm.model_executor.layers.quantization.quark.quark_moe.QuarkW8A8Fp8MoEMethod method) (vllm.model_executor.layers.quantization.quark.schemes.quark_scheme.QuarkScheme method) (vllm.model_executor.layers.quantization.quark.schemes.quark_w4a4_mxfp4.QuarkW4A4MXFP4 method) (vllm.model_executor.layers.quantization.quark.schemes.quark_w8a8_fp8.QuarkW8A8Fp8 method) (vllm.model_executor.layers.quantization.quark.schemes.quark_w8a8_int8.QuarkW8A8Int8 method) (vllm.model_executor.layers.quantization.torchao.TorchAOLinearMethod method) (vllm.model_executor.layers.quantization.tpu_int8.TPUInt8LinearMethod method) (vllm.model_executor.layers.vocab_parallel_embedding.UnquantizedEmbeddingMethod method) create_weights_gptq() (vllm.model_executor.layers.quantization.bitblas.BitBLASLinearMethod method) create_worker() (vllm.spec_decode.spec_decode_worker.SpecDecodeWorker class method) created (vllm.entrypoints.openai.protocol.ChatCompletionResponse attribute) (vllm.entrypoints.openai.protocol.ChatCompletionStreamResponse attribute) (vllm.entrypoints.openai.protocol.ClassificationResponse attribute) (vllm.entrypoints.openai.protocol.CompletionResponse attribute) (vllm.entrypoints.openai.protocol.CompletionStreamResponse attribute) (vllm.entrypoints.openai.protocol.EmbeddingResponse attribute) (vllm.entrypoints.openai.protocol.ModelCard attribute) (vllm.entrypoints.openai.protocol.ModelPermission attribute) (vllm.entrypoints.openai.protocol.PoolingResponse attribute) (vllm.entrypoints.openai.protocol.ScoreResponse attribute) (vllm.entrypoints.openai.protocol.TranscriptionStreamResponse attribute) created_rank (vllm.executor.ray_distributed_executor.RayWorkerMetaData attribute) created_time (vllm.entrypoints.openai.serving_engine.ServeContext attribute) cross_block_table (vllm.sequence.SequenceGroupMetadata attribute) cross_block_tables (vllm.attention.backends.flash_attn.FlashAttentionMetadata attribute) (vllm.attention.backends.rocm_flash_attn.ROCmFlashAttentionMetadata attribute) (vllm.attention.backends.torch_sdpa.TorchSDPAMetadata attribute) (vllm.attention.backends.xformers.XFormersMetadata attribute) cross_slot_mapping (vllm.attention.backends.flash_attn.FlashAttentionMetadata attribute) (vllm.attention.backends.rocm_flash_attn.ROCmFlashAttentionMetadata attribute) (vllm.attention.backends.torch_sdpa.TorchSDPAMetadata attribute) (vllm.attention.backends.xformers.XFormersMetadata attribute) CrossEncodingPooler (class in vllm.model_executor.layers.pooler) crow_col_to_dense() (in module vllm.attention.ops.blocksparse_attention.utils) csr_matrix (class in vllm.attention.ops.blocksparse_attention.utils) ctx (vllm.model_executor.guided_decoding.xgrammar_decoding.XGrammarLogitsProcessor attribute) (vllm.v1.engine.core_client.BackgroundResources attribute) (vllm.v1.structured_output.backend_xgrammar.XgrammarGrammar attribute) cu_num_draft_tokens (vllm.v1.spec_decode.metadata.SpecDecodeMetadata attribute) cu_prefix_query_lens (vllm.v1.attention.backends.flash_attn.FlashAttentionMetadata attribute) cu_seq_lens (vllm.v1.attention.backends.mla.common.MLACommonPrefillMetadata.ChunkedContextMetadata attribute) cu_seqlens_k (vllm.attention.ops.triton_flash_attention.MetaData attribute) cu_seqlens_q (vllm.attention.ops.triton_flash_attention.MetaData attribute) cu_tokens_across_dp_cpu (vllm.forward_context.DPMetadata attribute) CUDA (vllm.platforms.interface.PlatformEnum attribute) cuda_device_count_stateless() (in module vllm.utils) cuda_get_device_properties() (in module vllm.utils) cuda_graph_sizes (vllm.config.SchedulerConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) cuda_is_initialized() (in module vllm.utils) cuda_memory (vllm.utils.MemorySnapshot attribute) cuda_platform_plugin() (in module vllm.platforms) cuda_time_us (vllm.profiler.layerwise_profile.ModelStatsEntry attribute) (vllm.profiler.layerwise_profile.SummaryStatsEntry attribute) CudaCommunicator (class in vllm.distributed.device_communicators.cuda_communicator) cudaDeviceReset() (vllm.distributed.device_communicators.cuda_wrapper.CudaRTLibrary method) cudaDeviceSynchronize() (vllm.distributed.device_communicators.cuda_wrapper.CudaRTLibrary method) cudaError_t (in module vllm.distributed.device_communicators.cuda_wrapper) cudaFree() (vllm.distributed.device_communicators.cuda_wrapper.CudaRTLibrary method) cudaGetErrorString() (vllm.distributed.device_communicators.cuda_wrapper.CudaRTLibrary method) cudagraph (vllm.compilation.backends.ConcreteSizeEntry attribute) cudagraph_capture_sizes (vllm.config.CompilationConfig attribute) cudagraph_copy_inputs (vllm.config.CompilationConfig attribute) cudagraph_num_of_warmups (vllm.config.CompilationConfig attribute) CUDAGraphRunner (class in vllm.worker.model_runner) cudaIpcGetMemHandle() (vllm.distributed.device_communicators.cuda_wrapper.CudaRTLibrary method) cudaIpcMemHandle_t (class in vllm.distributed.device_communicators.cuda_wrapper) cudaIpcOpenMemHandle() (vllm.distributed.device_communicators.cuda_wrapper.CudaRTLibrary method) cudaMalloc() (vllm.distributed.device_communicators.cuda_wrapper.CudaRTLibrary method) cudaMemcpy() (vllm.distributed.device_communicators.cuda_wrapper.CudaRTLibrary method) cudaMemcpyKind (in module vllm.distributed.device_communicators.cuda_wrapper) cudaMemset() (vllm.distributed.device_communicators.cuda_wrapper.CudaRTLibrary method) CudaPlatform (in module vllm.platforms.cuda) CudaPlatformBase (class in vllm.platforms.cuda) CUDART_CHECK() (vllm.distributed.device_communicators.cuda_wrapper.CudaRTLibrary method) CudaRTLibrary (class in vllm.distributed.device_communicators.cuda_wrapper) cudaSetDevice() (vllm.distributed.device_communicators.cuda_wrapper.CudaRTLibrary method) cudaStream_t (in module vllm.distributed.device_communicators.pynccl_wrapper) cum_logprob (vllm.beam_search.BeamSearchSequence attribute) cumem_available (in module vllm.device_allocator.cumem) CuMemAllocator (class in vllm.device_allocator.cumem) CUMULATIVE (vllm.sampling_params.RequestOutputKind attribute) cumulative_logprob (vllm.outputs.CompletionOutput attribute) (vllm.sequence.SequenceData property) (vllm.v1.engine.logprobs.LogprobsProcessor attribute) current_memory_usage() (vllm.utils.DeviceMemoryProfiler method) current_run_tensors() (vllm.model_executor.models.constant_size_cache.ConstantSizeCache method) (vllm.model_executor.models.mamba_cache.MambaCacheManager method) current_step (vllm.sequence.ExecuteModelRequest property) (vllm.sequence.SequenceGroupState attribute) (vllm.worker.multi_step_model_runner.StatefulModelInput attribute) current_stream() (in module vllm.utils) current_tool_index (vllm.entrypoints.openai.tool_parsers.pythonic_tool_parser.PythonicToolParser property) current_wave (vllm.v1.engine.EngineCoreRequest attribute) custom_all_reduce() (vllm.distributed.device_communicators.custom_all_reduce.CustomAllreduce method) custom_id (vllm.entrypoints.openai.protocol.BatchRequestInput attribute) (vllm.entrypoints.openai.protocol.BatchRequestOutput attribute) custom_ops (vllm.config.CompilationConfig attribute) custom_routing_function() (vllm.model_executor.models.llama4.Llama4MoE static method) CUSTOM_TYPE_CLOUDPICKLE (in module vllm.v1.serial_utils) CUSTOM_TYPE_PICKLE (in module vllm.v1.serial_utils) CUSTOM_TYPE_RAW_VIEW (in module vllm.v1.serial_utils) CustomAllreduce (class in vllm.distributed.device_communicators.custom_all_reduce) CustomChatCompletionContentSimpleAudioParam (class in vllm.entrypoints.chat_utils) CustomChatCompletionContentSimpleImageParam (class in vllm.entrypoints.chat_utils) CustomChatCompletionContentSimpleVideoParam (class in vllm.entrypoints.chat_utils) CustomChatCompletionMessageParam (class in vllm.entrypoints.chat_utils) CustomOp (class in vllm.model_executor.custom_op) CUTLASS_BLOCK_FP8_SUPPORTED (in module vllm.model_executor.layers.quantization.utils.w8a8_utils) cutlass_block_fp8_supported() (in module vllm.model_executor.layers.quantization.utils.w8a8_utils) cutlass_fp4_moe_mm() (in module vllm._custom_ops) cutlass_fp4_supported() (in module vllm.model_executor.layers.quantization.modelopt) CUTLASS_FP8_SUPPORTED (in module vllm.model_executor.layers.quantization.utils.w8a8_utils) cutlass_fp8_supported() (in module vllm.model_executor.layers.quantization.utils.w8a8_utils) cutlass_group_gemm_supported() (in module vllm._custom_ops) (in module vllm.model_executor.layers.quantization.utils.w8a8_utils) cutlass_mla_decode() (in module vllm._custom_ops) cutlass_moe_fp4() (in module vllm.model_executor.layers.fused_moe.cutlass_moe) cutlass_moe_fp8() (in module vllm.model_executor.layers.fused_moe.cutlass_moe) cutlass_moe_mm() (in module vllm._custom_ops) cutlass_scaled_fp4_mm() (in module vllm._custom_ops) cutlass_scaled_mm() (in module vllm._custom_ops) cutlass_scaled_mm_azp() (in module vllm._custom_ops) cutlass_scaled_mm_supports_block_fp8() (in module vllm._custom_ops) cutlass_scaled_mm_supports_fp4() (in module vllm._custom_ops) cutlass_scaled_mm_supports_fp8() (in module vllm._custom_ops) cutlass_scaled_sparse_mm() (in module vllm._custom_ops) cutlass_sparse_compress() (in module vllm._custom_ops) cutlass_sparse_scaled_mm_supported() (in module vllm._custom_ops) cutlass_w8a8_scaled_mm() (in module vllm.model_executor.layers.quantization.utils.w8a8_utils) CutlassScaledMMLinearKernel (class in vllm.model_executor.layers.quantization.kernels.scaled_mm.cutlass) CYAN (in module vllm.executor.multiproc_worker_utils) D D (in module vllm.v1.attention.backends.mla.common) data (vllm.entrypoints.openai.protocol.ClassificationResponse attribute) (vllm.entrypoints.openai.protocol.EmbeddingResponse attribute) (vllm.entrypoints.openai.protocol.ModelList attribute) (vllm.entrypoints.openai.protocol.PoolingResponse attribute) (vllm.entrypoints.openai.protocol.PoolingResponseData attribute) (vllm.entrypoints.openai.protocol.ScoreResponse attribute) (vllm.model_executor.models.blip2.Blip2ImageEmbeddingInputs attribute) (vllm.model_executor.models.blip2.Blip2ImagePixelInputs attribute) (vllm.model_executor.models.chameleon.ChameleonImagePixelInputs attribute) (vllm.model_executor.models.deepseek_vl2.DeepseekVL2ImagePixelInputs attribute) (vllm.model_executor.models.deepseek_vl2.DeepseekVL2VImageEmbeddingInputs attribute) (vllm.model_executor.models.florence2.Florence2ImagePixelInputs attribute) (vllm.model_executor.models.glm4v.GLMVImagePixelInputs attribute) (vllm.model_executor.models.idefics3.Idefics3ImageEmbeddingInputs attribute) (vllm.model_executor.models.internvl.InternVLImageEmbeddingInputs attribute) (vllm.model_executor.models.llava.LlavaImageEmbeddingInputs attribute) (vllm.model_executor.models.llava_next.LlavaNextImageEmbeddingInputs attribute) (vllm.model_executor.models.llava_next_video.LlavaNextVideoPixelInputs attribute) (vllm.model_executor.models.llava_onevision.LlavaOnevisionImageEmbeddingInputs attribute) (vllm.model_executor.models.minimax_vl_01.MiniMaxVL01ImageEmbeddingInputs attribute) (vllm.model_executor.models.mllama.MllamaImagePixelInputs attribute) (vllm.model_executor.models.paligemma.PaliGemmaImageEmbeddingInputs attribute) (vllm.model_executor.models.paligemma.PaliGemmaImagePixelInputs attribute) (vllm.model_executor.models.phi3v.Phi3VImageEmbeddingInputs attribute) (vllm.model_executor.models.phi3v.Phi3VImagePixelInputs attribute) (vllm.model_executor.models.phi4mm.Phi4MMAudioEmbeddingInputs attribute) (vllm.model_executor.models.phi4mm.Phi4MMAudioFeatureInputs attribute) (vllm.model_executor.models.phi4mm.Phi4MMImageEmbeddingInputs attribute) (vllm.model_executor.models.phi4mm.Phi4MMImagePixelInputs attribute) (vllm.model_executor.models.qwen_vl.QwenImageEmbeddingInputs attribute) (vllm.model_executor.models.qwen_vl.QwenImagePixelInputs attribute) (vllm.model_executor.models.skyworkr1v.SkyworkR1VImageEmbeddingInputs attribute) (vllm.model_executor.models.ultravox.UltravoxAudioEmbeddingInputs attribute) (vllm.model_executor.models.ultravox.UltravoxAudioFeatureInputs attribute) (vllm.multimodal.inputs.MultiModalFieldElem attribute) (vllm.outputs.PoolingOutput attribute) (vllm.sequence.PoolingSequenceGroupOutput attribute) data_container (vllm.model_executor.layers.quantization.gguf.GGUFUninitializedParameter attribute) data_expiration_seconds (vllm.distributed.utils.StatelessProcessGroup attribute) data_parallel_master_ip (vllm.config.ParallelConfig attribute) data_parallel_master_port (vllm.config.ParallelConfig attribute) data_parallel_rank (vllm.config.ParallelConfig attribute) data_parallel_rank_local (vllm.config.ParallelConfig property) data_parallel_size (vllm.config.ParallelConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) data_type (vllm.attention.backends.flashinfer.FlashInferMetadata attribute) (vllm.v1.attention.backends.flashinfer.FlashInferMetadata attribute) DaViT (class in vllm.model_executor.models.florence2) DBRX_PRETRAINED_CONFIG_ARCHIVE_MAP (in module vllm.transformers_utils.configs.dbrx) DbrxAttention (class in vllm.model_executor.models.dbrx) DbrxAttentionConfig (class in vllm.transformers_utils.configs.dbrx) DbrxBlock (class in vllm.model_executor.models.dbrx) DbrxConfig (class in vllm.transformers_utils.configs.dbrx) DbrxExperts (class in vllm.model_executor.models.dbrx) DbrxFFNConfig (class in vllm.transformers_utils.configs.dbrx) DbrxForCausalLM (class in vllm.model_executor.models.dbrx) DbrxFusedNormAttention (class in vllm.model_executor.models.dbrx) DbrxModel (class in vllm.model_executor.models.dbrx) DbrxMoE (class in vllm.model_executor.models.dbrx) DbrxRouter (class in vllm.model_executor.models.dbrx) deactivate_adapter() (in module vllm.adapter_commons.utils) (vllm.adapter_commons.models.AdapterModelManager method) (vllm.lora.models.LoRAModelManager method) (vllm.prompt_adapter.models.PromptAdapterModelManager method) dead_error (vllm.engine.async_llm_engine.AsyncLLMEngine property) (vllm.engine.multiprocessing.client.MQLLMEngineClient property) (vllm.engine.multiprocessing.engine.MQLLMEngine property) (vllm.engine.protocol.EngineClient property) (vllm.v1.engine.async_llm.AsyncLLM property) debug_advance_input (in module vllm.spec_decode.draft_model_runner) debug_dump_path (vllm.config.CompilationConfig attribute) dec() (vllm.utils.AtomicCounter method) dec_hook() (vllm.v1.serial_utils.MsgpackDecoder method) DeciLMDecoderLayer (class in vllm.model_executor.models.nemotron_nas) DeciLMForCausalLM (class in vllm.model_executor.models.nemotron_nas) DeciModel (class in vllm.model_executor.models.nemotron_nas) DECODE (vllm.sequence.SequenceStage attribute) decode (vllm.v1.attention.backends.mla.common.MLACommonMetadata attribute) DECODE (vllm.worker.hpu_model_runner.BatchType attribute) (vllm.worker.hpu_model_runner.PhaseType attribute) (vllm.worker.tpu_model_runner.ExecutionMode attribute) decode() (vllm.transformers_utils.processors.deepseek_vl2.DeepseekVLV2Processor method) (vllm.transformers_utils.processors.ovis.OvisProcessor method) (vllm.transformers_utils.tokenizer_base.TokenizerBase method) (vllm.transformers_utils.tokenizers.mistral.MistralTokenizer method) (vllm.v1.serial_utils.MsgpackDecoder method) decode_attention_fwd() (in module vllm.attention.ops.triton_decode_attention) decode_attention_fwd_grouped() (in module vllm.attention.ops.triton_decode_attention) decode_attention_fwd_normal() (in module vllm.attention.ops.triton_decode_attention) decode_hook() (in module vllm.executor.msgspec_utils) decode_latency_s (vllm.v1.stats.common.RequestStats property) decode_metadata (vllm.attention.backends.abstract.AttentionMetadata property) (vllm.attention.backends.blocksparse_attn.BlocksparseFlashAttentionMetadata property) (vllm.attention.backends.dual_chunk_flash_attn.DualChunkFlashAttentionMetadata property) (vllm.attention.backends.flash_attn.FlashAttentionMetadata property) (vllm.attention.backends.flashinfer.FlashInferMetadata property) (vllm.attention.backends.flashmla.FlashMLAMetadata property) (vllm.attention.backends.ipex_attn.IpexAttnMetadata property) (vllm.attention.backends.mla.common.MLACommonMetadata property) (vllm.attention.backends.pallas.PallasMetadata property) (vllm.attention.backends.placeholder_attn.PlaceholderAttentionMetadata property) (vllm.attention.backends.rocm_aiter_mla.AiterMLAMetadata property) (vllm.attention.backends.rocm_flash_attn.ROCmFlashAttentionMetadata property) (vllm.attention.backends.torch_sdpa.TorchSDPAMetadata property) (vllm.attention.backends.xformers.XFormersMetadata property) decode_next() (vllm.v1.engine.detokenizer.BaseIncrementalDetokenizer method) (vllm.v1.engine.detokenizer.FastIncrementalDetokenizer method) (vllm.v1.engine.detokenizer.SlowIncrementalDetokenizer method) decode_num_splits (vllm.attention.backends.flashmla.FlashMLAMetadata attribute) decode_prompt_logprobs_inplace() (vllm.transformers_utils.detokenizer.Detokenizer method) decode_query_len (vllm.attention.backends.flashinfer.FlashInferMetadata attribute) decode_seq_groups (vllm.core.scheduler.SchedulerRunningOutputs attribute) (vllm.core.scheduler.SchedulerSwappedInOutputs attribute) decode_seq_groups_list (vllm.core.scheduler.SchedulerRunningOutputs attribute) decode_sequence_inplace() (vllm.transformers_utils.detokenizer.Detokenizer method) decode_tile_scheduler_metadata (vllm.attention.backends.flashmla.FlashMLAMetadata attribute) decode_time (vllm.v1.metrics.stats.FinishedRequestStats attribute) decode_tokens() (in module vllm.transformers_utils.tokenizer) decode_url (vllm.distributed.kv_transfer.kv_pipe.mooncake_pipe.MooncakeTransferEngineConfig attribute) decode_wrapper (vllm.attention.backends.flashinfer.FlashInferMetadata attribute) (vllm.v1.attention.backends.flashinfer.FlashInferMetadata attribute) decoded_token (vllm.sequence.Logprob attribute) DECODER (vllm.attention.backends.abstract.AttentionType attribute) decoder (vllm.inputs.data.EncoderDecoderInputs attribute) decoder_prompt (vllm.inputs.data.ExplicitEncoderDecoderPrompt attribute) DecoderOnlyInputs (in module vllm.inputs.data) DECODING (vllm.v1.stats.common.RequestStatsUpdate.Type attribute) decoding_config (vllm.config.VllmConfig attribute) decoding_ts_s_lst (vllm.v1.stats.common.RequestStats attribute) DecodingConfig (class in vllm.config) decr() (vllm.core.block.common.ReadOnlyRefCounter method) (vllm.core.block.common.RefCounter method) (vllm.core.block.common.RefCounterProtocol method) decr_ref() (vllm.v1.core.kv_cache_utils.KVCacheBlock method) decrement_server_load() (in module vllm.entrypoints.utils) deep_compare() (in module vllm.model_executor.layers.quantization.quark.utils) deep_gemm_moe_fp8() (in module vllm.model_executor.layers.fused_moe.deep_gemm_moe) DeepseekAttention (class in vllm.model_executor.models.deepseek) DeepseekDecoderLayer (class in vllm.model_executor.models.deepseek) DeepseekForCausalLM (class in vllm.model_executor.models.deepseek) DeepseekMLP (class in vllm.model_executor.models.deepseek) DeepseekModel (class in vllm.model_executor.models.deepseek) DeepseekMoE (class in vllm.model_executor.models.deepseek) DeepSeekMTP (class in vllm.model_executor.models.deepseek_mtp) DeepSeekMultiTokenPredictor (class in vllm.model_executor.models.deepseek_mtp) DeepSeekMultiTokenPredictorLayer (class in vllm.model_executor.models.deepseek_mtp) DeepSeekR1ReasoningParser (class in vllm.reasoning.deepseek_r1_reasoning_parser) DeepseekScalingRotaryEmbedding (class in vllm.model_executor.layers.rotary_embedding) DeepseekV2Attention (class in vllm.model_executor.models.deepseek_v2) DeepseekV2Config (class in vllm.transformers_utils.configs.deepseek_vl2) DeepseekV2DecoderLayer (class in vllm.model_executor.models.deepseek_v2) DeepseekV2ForCausalLM (class in vllm.model_executor.models.deepseek_v2) DeepseekV2MLAAttention (class in vllm.model_executor.models.deepseek_v2) DeepseekV2MLP (class in vllm.model_executor.models.deepseek_v2) DeepseekV2Model (class in vllm.model_executor.models.deepseek_v2) DeepseekV2MoE (class in vllm.model_executor.models.deepseek_v2) DeepseekV3ForCausalLM (class in vllm.model_executor.models.deepseek_v2) DeepSeekV3ToolParser (class in vllm.entrypoints.openai.tool_parsers.deepseekv3_tool_parser) DeepseekVL2DummyInputsBuilder (class in vllm.model_executor.models.deepseek_vl2) DeepseekVL2ImageInputs (in module vllm.model_executor.models.deepseek_vl2) DeepseekVL2ImagePixelInputs (class in vllm.model_executor.models.deepseek_vl2) DeepseekVL2MultiModalProcessor (class in vllm.model_executor.models.deepseek_vl2) DeepseekVL2ProcessingInfo (class in vllm.model_executor.models.deepseek_vl2) DeepseekVL2VImageEmbeddingInputs (class in vllm.model_executor.models.deepseek_vl2) DeepseekVLV2Config (class in vllm.transformers_utils.configs.deepseek_vl2) DeepseekVLV2ForCausalLM (class in vllm.model_executor.models.deepseek_vl2) DeepseekVLV2Processor (class in vllm.transformers_utils.processors.deepseek_vl2) DeepSpeedFPConfig (class in vllm.model_executor.layers.quantization.deepspeedfp) DeepSpeedFPLinearMethod (class in vllm.model_executor.layers.quantization.deepspeedfp) DeepSpeedFPParameter (class in vllm.model_executor.layers.quantization.deepspeedfp) DEFAULT_ATTN_OUTPUT_MULTIPLIER (in module vllm.model_executor.models.grok1) DEFAULT_CONDA_PATTERNS (in module vllm.collect_env) default_eight_bit_dtype_torch (in module vllm.attention.ops.triton_flash_attention) default_eight_bit_dtype_triton (in module vllm.attention.ops.triton_flash_attention) DEFAULT_EMBEDDING_MULTIPLIER_SCALE (in module vllm.model_executor.models.grok1) default_float8_info (in module vllm.attention.ops.triton_flash_attention) DEFAULT_GLOBAL_SEGMENT_SIZE (in module vllm.distributed.kv_transfer.kv_lookup_buffer.mooncake_store) DEFAULT_INPUT_LEN (vllm.benchmarks.datasets.RandomDataset attribute) (vllm.benchmarks.datasets.SonnetDataset attribute) DEFAULT_LN (in module vllm.model_executor.layers.resampler) (in module vllm.model_executor.models.minicpmv) DEFAULT_LOCAL_BUFFER_SIZE (in module vllm.distributed.kv_transfer.kv_lookup_buffer.mooncake_store) DEFAULT_LOGGING_CONFIG (in module vllm.logger) default_on() (vllm.model_executor.custom_op.CustomOp static method) DEFAULT_OUTPUT_LEN (vllm.benchmarks.datasets.InstructCoderDataset attribute) (vllm.benchmarks.datasets.RandomDataset attribute) (vllm.benchmarks.datasets.SonnetDataset attribute) (vllm.benchmarks.datasets.VisionArenaDataset attribute) DEFAULT_OUTPUT_MULTIPLIER_SCALE (in module vllm.model_executor.models.grok1) DEFAULT_PATTERN (vllm.model_executor.model_loader.sharded_state_loader.ShardedStateLoader attribute) DEFAULT_PIP_PATTERNS (in module vllm.collect_env) DEFAULT_PREFIX_LEN (vllm.benchmarks.datasets.RandomDataset attribute) (vllm.benchmarks.datasets.SonnetDataset attribute) DEFAULT_RANGE_RATIO (vllm.benchmarks.datasets.RandomDataset attribute) DEFAULT_SAMPLING_PARAMS (in module vllm.v1.sample.tpu.metadata) DEFAULT_SEED (vllm.benchmarks.datasets.BenchmarkDataset attribute) DEFAULT_SIMPLE_SAMPLING_PARAMS (in module vllm.spec_decode.batch_expansion) default_tag (vllm.device_allocator.cumem.CuMemAllocator attribute) DEFAULT_VOCAB_PADDING_SIZE (in module vllm.model_executor.layers.vocab_parallel_embedding) default_weight_loader() (in module vllm.model_executor.model_loader.weight_utils) DefaultModelLoader (class in vllm.model_executor.model_loader.default_loader) DefaultModelLoader.Source (class in vllm.model_executor.model_loader.default_loader) deferred_pythonize_logprobs() (in module vllm.worker.multi_step_model_runner) deferred_sample_results_args (vllm.model_executor.layers.sampler.SamplerOutput attribute) DeferredLogprobsReturnType (in module vllm.worker.multi_step_model_runner) defunctionalize() (vllm.compilation.fix_functionalization.FixFunctionalizationPass method) delay_factor (vllm.config.SchedulerConfig attribute) DelegateWorkerBase (class in vllm.worker.worker_base) delta (vllm.entrypoints.openai.protocol.ChatCompletionResponseStreamChoice attribute) (vllm.entrypoints.openai.protocol.TranscriptionResponseStreamChoice attribute) DELTA (vllm.sampling_params.RequestOutputKind attribute) DeltaFunctionCall (class in vllm.entrypoints.openai.protocol) DeltaMessage (class in vllm.entrypoints.openai.protocol) DeltaToolCall (class in vllm.entrypoints.openai.protocol) dense_to_ccol_row() (in module vllm.attention.ops.blocksparse_attention.utils) dense_to_crow_col() (in module vllm.attention.ops.blocksparse_attention.utils) DenseMLP (class in vllm.model_executor.models.plamo2) deprecate_args() (in module vllm.utils) DEPRECATE_INIT_POSARGS (vllm.entrypoints.llm.LLM attribute) deprecate_kwargs() (in module vllm.utils) DEPRECATE_LEGACY (vllm.entrypoints.llm.LLM attribute) deprecate_legacy_api() (vllm.entrypoints.llm.LLM class method) depth (vllm.transformers_utils.configs.deepseek_vl2.MlpProjectorConfig attribute) DepthWiseConv2d (class in vllm.model_executor.models.florence2) DepthWiseSeperableConv1d (class in vllm.model_executor.models.phi4mm_utils) DEQUANT_TYPES (in module vllm.model_executor.layers.quantization.gguf) dequantize_gemm() (in module vllm.model_executor.layers.quantization.aqlm) dequantize_to_dtype() (in module vllm.model_executor.layers.quantization.utils.nvfp4_emulation_utils) dequantize_weight() (in module vllm.model_executor.layers.quantization.aqlm) dequeue() (vllm.distributed.device_communicators.shm_broadcast.MessageQueue method) description (vllm.entrypoints.openai.protocol.FunctionDefinition attribute) (vllm.entrypoints.openai.protocol.JsonSchemaResponseFormat attribute) deserialize() (vllm.model_executor.model_loader.tensorizer.TensorizerAgent method) dest (vllm.multimodal.base.MultiModalPlaceholderMap.IndexMap attribute) dest_len (vllm.multimodal.base.MultiModalPlaceholderMap attribute) dest_ranges (vllm.multimodal.base.MultiModalPlaceholderMap attribute) destroy() (vllm.distributed.device_communicators.base_device_communicator.DeviceCommunicatorBase method) (vllm.distributed.device_communicators.cuda_communicator.CudaCommunicator method) (vllm.distributed.parallel_state.GroupCoordinator method) (vllm.v1.structured_output.backend_guidance.GuidanceBackend method) (vllm.v1.structured_output.backend_types.StructuredOutputBackend method) (vllm.v1.structured_output.backend_xgrammar.XgrammarBackend method) destroy_distributed_environment() (in module vllm.distributed.parallel_state) destroy_model_parallel() (in module vllm.distributed.parallel_state) DetailedTraceModules (in module vllm.config) determine_available_memory() (vllm.v1.executor.abstract.Executor method) (vllm.v1.executor.abstract.ExecutorWithExternalLauncher method) (vllm.v1.worker.gpu_worker.Worker method) (vllm.v1.worker.tpu_worker.TPUWorker method) determine_expert_map() (in module vllm.model_executor.layers.fused_moe.layer) determine_num_available_blocks() (vllm.executor.executor_base.ExecutorBase method) (vllm.executor.uniproc_executor.ExecutorWithExternalLauncher method) (vllm.spec_decode.proposer_worker_base.NonLLMProposerWorkerBase method) (vllm.spec_decode.smaller_tp_proposer_worker.SmallerTpProposerWorker method) (vllm.spec_decode.spec_decode_worker.SpecDecodeWorker method) (vllm.worker.cpu_worker.CPUWorker method) (vllm.worker.hpu_worker.HPUWorker method) (vllm.worker.neuron_worker.NeuronWorker method) (vllm.worker.tpu_worker.TPUWorker method) (vllm.worker.worker.Worker method) (vllm.worker.worker_base.DelegateWorkerBase method) (vllm.worker.worker_base.WorkerBase method) (vllm.worker.xpu_worker.XPUWorker method) deterministic (vllm.transformers_utils.configs.deepseek_vl2.VisionEncoderConfig attribute) detokenize (vllm.sampling_params.SamplingParams attribute) detokenize() (in module vllm.entrypoints.openai.api_server) detokenize_incrementally() (in module vllm.transformers_utils.detokenizer_utils) DETOKENIZED (vllm.v1.stats.common.RequestStatsUpdate.Type attribute) Detokenizer (class in vllm.transformers_utils.detokenizer) DetokenizeRequest (class in vllm.entrypoints.openai.protocol) DetokenizeResponse (class in vllm.entrypoints.openai.protocol) Device (class in vllm.utils) (in module vllm.config) device (vllm.attention.backends.flashinfer.FlashInferMetadata attribute) (vllm.config.DeviceConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) (vllm.engine.multiprocessing.RPCResetPrefixCacheRequest attribute) (vllm.model_executor.models.clip.CLIPVisionModel property) (vllm.model_executor.models.molmo.MolmoVisionBackbone property) (vllm.model_executor.models.ovis.VisualEmbedding property) (vllm.model_executor.models.ovis.VisualTokenizer property) (vllm.model_executor.models.pixtral.VisionTransformer property) (vllm.model_executor.models.qwen2_5_vl.Qwen2_5_VisionTransformer property) (vllm.model_executor.models.qwen2_vl.Qwen2VisionTransformer property) (vllm.spec_decode.spec_decode_worker.SpecDecodeWorker property) device_communicator (vllm.distributed.parallel_state.GroupCoordinator attribute) device_config (vllm.config.VllmConfig attribute) device_control_env_var (vllm.platforms.cuda.CudaPlatformBase attribute) (vllm.platforms.hpu.HpuPlatform attribute) (vllm.platforms.interface.Platform attribute) (vllm.platforms.neuron.NeuronPlatform attribute) (vllm.platforms.rocm.RocmPlatform attribute) (vllm.platforms.tpu.TpuPlatform attribute) (vllm.platforms.xpu.XPUPlatform attribute) device_group (vllm.distributed.parallel_state.GroupCoordinator attribute) device_id_to_physical_device_id() (vllm.platforms.interface.Platform class method) device_loading_context() (in module vllm.model_executor.model_loader.utils) device_name (vllm.distributed.kv_transfer.kv_lookup_buffer.mooncake_store.MooncakeStoreConfig attribute) (vllm.distributed.kv_transfer.kv_pipe.mooncake_pipe.MooncakeTransferEngineConfig attribute) (vllm.platforms.cpu.CpuPlatform attribute) (vllm.platforms.cuda.CudaPlatformBase attribute) (vllm.platforms.hpu.HpuPlatform attribute) (vllm.platforms.interface.Platform attribute) (vllm.platforms.neuron.NeuronPlatform attribute) (vllm.platforms.rocm.RocmPlatform attribute) (vllm.platforms.tpu.TpuPlatform attribute) (vllm.platforms.xpu.XPUPlatform attribute) device_support_bf16() (vllm.platforms.xpu.XPUPlatform class method) device_type (vllm.config.DeviceConfig attribute) (vllm.platforms.cpu.CpuPlatform attribute) (vllm.platforms.cuda.CudaPlatformBase attribute) (vllm.platforms.hpu.HpuPlatform attribute) (vllm.platforms.interface.Platform attribute) (vllm.platforms.interface.UnspecifiedPlatform attribute) (vllm.platforms.neuron.NeuronPlatform attribute) (vllm.platforms.rocm.RocmPlatform attribute) (vllm.platforms.tpu.TpuPlatform attribute) (vllm.platforms.xpu.XPUPlatform attribute) DeviceAwareBlockAllocator (class in vllm.core.block.interfaces) DeviceCapability (class in vllm.platforms.interface) DeviceCommunicatorBase (class in vllm.distributed.device_communicators.base_device_communicator) DeviceConfig (class in vllm.config) DeviceMemoryProfiler (class in vllm.utils) DictEmbeddingItems (class in vllm.multimodal.parse) dim (vllm.multimodal.inputs.MultiModalFlatField attribute) DIM_BLOCK_SIZE (in module vllm.lora.ops.xla_ops.pallas) dim_out (vllm.model_executor.models.florence2.DaViT property) dimensions (vllm.entrypoints.openai.protocol.EmbeddingChatRequest attribute) (vllm.entrypoints.openai.protocol.EmbeddingCompletionRequest attribute) (vllm.pooling_params.PoolingParams attribute) dims_equivalent() (vllm.compilation.noop_elimination.NoOpEliminationPass method) direct_register_custom_op() (in module vllm.utils) disable() (vllm.core.block.prefix_caching_block.BlockTracker method) disable_additional_properties (vllm.config.DecodingConfig attribute) (vllm.sampling_params.GuidedDecodingParams attribute) disable_any_whitespace (vllm.config.DecodingConfig attribute) (vllm.sampling_params.GuidedDecodingParams attribute) disable_async_output_proc (vllm.engine.arg_utils.EngineArgs attribute) disable_by_batch_size (vllm.config.SpeculativeConfig attribute) disable_cascade_attn (vllm.config.ModelConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) disable_chunked_mm_input (vllm.config.SchedulerConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) disable_custom_all_reduce (vllm.config.ParallelConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) disable_fallback (vllm.config.DecodingConfig attribute) (vllm.sampling_params.GuidedDecodingParams attribute) disable_log_requests (vllm.engine.arg_utils.AsyncEngineArgs attribute) disable_log_stats (vllm.config.SpeculativeConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) disable_logprobs (vllm.config.SpeculativeConfig attribute) disable_mm_preprocessor_cache (vllm.config.ModelConfig attribute) (vllm.config.MultiModalConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) disable_mqa_scorer (vllm.config.SpeculativeConfig attribute) disable_sliding_window (vllm.config.ModelConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) disabled_custom_ops (vllm.config.CompilationConfig attribute) DisabledTqdm (class in vllm.model_executor.model_loader.weight_utils) dispatch_cuda_rmsnorm_func() (in module vllm.model_executor.layers.layernorm) dispatch_forward() (vllm.model_executor.custom_op.CustomOp method) dispatch_fused_experts_func() (in module vllm.model_executor.layers.fused_moe.fused_moe) dispatch_key (vllm.platforms.cpu.CpuPlatform attribute) (vllm.platforms.cuda.CudaPlatformBase attribute) (vllm.platforms.hpu.HpuPlatform attribute) (vllm.platforms.interface.Platform attribute) (vllm.platforms.rocm.RocmPlatform attribute) (vllm.platforms.tpu.TpuPlatform attribute) (vllm.platforms.xpu.XPUPlatform attribute) dispatch_to_code() (vllm.compilation.wrapper.TorchCompileWrapperWithCustomDispatcher method) dispatch_topk_func() (in module vllm.model_executor.layers.fused_moe.fused_moe) dispatch_unquantized_gemm() (in module vllm.model_executor.layers.utils) dispatch_w8a8_scaled_mm() (in module vllm.model_executor.layers.quantization.utils.w8a8_utils) dispose() (in module vllm._custom_ops) distributed_executor_backend (vllm.config.ParallelConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) DistributedExecutorBackend (in module vllm.config) DistributedExecutorBase (class in vllm.executor.executor_base) divide() (in module vllm.distributed.utils) do_expand_kernel() (in module vllm.lora.ops.triton_ops.kernel_utils) do_log_stats() (vllm.engine.async_llm_engine.AsyncLLMEngine method) (vllm.engine.llm_engine.LLMEngine method) (vllm.engine.multiprocessing.client.MQLLMEngineClient method) (vllm.engine.protocol.EngineClient method) (vllm.v1.engine.async_llm.AsyncLLM method) do_metadata_broadcast (vllm.worker.cpu_worker.CPUWorker property) (vllm.worker.hpu_worker.HPUWorker property) (vllm.worker.neuron_worker.NeuronWorker property) (vllm.worker.tpu_worker.TPUWorker property) (vllm.worker.worker.Worker property) (vllm.worker.worker_base.LocalOrDistributedWorkerBase property) do_rerank() (in module vllm.entrypoints.openai.api_server) (vllm.entrypoints.openai.serving_score.ServingScores method) do_rerank_v1() (in module vllm.entrypoints.openai.api_server) do_rerank_v2() (in module vllm.entrypoints.openai.api_server) do_sample (vllm.model_executor.sampling_metadata.SequenceGroupToSample property) (vllm.sequence.SequenceGroupMetadata attribute) (vllm.sequence.SequenceGroupMetadataDelta attribute) do_shrink_kernel() (in module vllm.lora.ops.triton_ops.kernel_utils) do_tracing() (vllm.engine.llm_engine.LLMEngine method) DO_VALIDATE_OUTPUT (vllm.engine.llm_engine.LLMEngine attribute) document (vllm.entrypoints.openai.protocol.RerankResult attribute) documents (vllm.entrypoints.openai.protocol.ChatCompletionRequest attribute) (vllm.entrypoints.openai.protocol.RerankRequest attribute) down_proj (vllm.model_executor.models.module_mapping.ModelKeys attribute) download_dir (vllm.config.LoadConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) download_file() (vllm.connections.HTTPConnection method) download_model() (vllm.model_executor.model_loader.base_loader.BaseModelLoader method) (vllm.model_executor.model_loader.bitsandbytes_loader.BitsAndBytesModelLoader method) (vllm.model_executor.model_loader.default_loader.DefaultModelLoader method) (vllm.model_executor.model_loader.dummy_loader.DummyModelLoader method) (vllm.model_executor.model_loader.gguf_loader.GGUFModelLoader method) (vllm.model_executor.model_loader.runai_streamer_loader.RunaiModelStreamerLoader method) (vllm.model_executor.model_loader.sharded_state_loader.ShardedStateLoader method) (vllm.model_executor.model_loader.tensorizer_loader.TensorizerLoader method) download_safetensors_index_file_from_hf() (in module vllm.model_executor.model_loader.weight_utils) download_video_asset() (in module vllm.assets.video) download_weights_from_hf() (in module vllm.model_executor.model_loader.weight_utils) downsample_ratio (vllm.transformers_utils.configs.deepseek_vl2.MlpProjectorConfig attribute) dp_metadata (vllm.forward_context.ForwardContext attribute) DPAsyncMPClient (class in vllm.v1.engine.core_client) DPEngineCoreProc (class in vllm.v1.engine.core) DPMetadata (class in vllm.forward_context) draft_acceptance_rate (vllm.spec_decode.metrics.SpecDecodeWorkerMetrics attribute) draft_model_config (vllm.config.SpeculativeConfig attribute) draft_parallel_config (vllm.config.SpeculativeConfig attribute) draft_tensor_parallel_size (vllm.config.SpeculativeConfig attribute) draft_token_ids (vllm.v1.spec_decode.metadata.SpecDecodeMetadata attribute) draft_tokens (vllm.spec_decode.metrics.SpecDecodeWorkerMetrics attribute) drop_select() (vllm.distributed.kv_transfer.kv_lookup_buffer.base.KVLookupBufferBase method) (vllm.distributed.kv_transfer.kv_lookup_buffer.simple_buffer.SimpleBuffer method) drop_select_handler() (vllm.distributed.kv_transfer.kv_lookup_buffer.simple_buffer.SimpleBuffer method) ds_dequantize() (vllm.model_executor.layers.quantization.deepspeedfp.DeepSpeedFPParameter method) ds_quantize_() (vllm.model_executor.layers.quantization.deepspeedfp.DeepSpeedFPParameter method) ds_selective_dequantize() (vllm.model_executor.layers.quantization.deepspeedfp.DeepSpeedFPParameter method) dtype (vllm.compilation.fusion.QuantKey attribute) (vllm.config.ModelConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) (vllm.model_executor.layers.quantization.schema.KVCacheQuantSchema attribute) (vllm.model_executor.model_loader.tensorizer.TensorizerConfig attribute) (vllm.model_executor.models.aya_vision.AyaVisionForConditionalGeneration property) (vllm.model_executor.models.gemma3_mm.Gemma3ForConditionalGeneration property) (vllm.model_executor.models.molmo.MolmoVisionBackbone property) (vllm.model_executor.models.ovis.VisualEmbedding property) (vllm.model_executor.models.ovis.VisualTokenizer property) (vllm.model_executor.models.pixtral.VisionTransformer property) (vllm.model_executor.models.qwen2_5_vl.Qwen2_5_VisionTransformer property) (vllm.model_executor.models.qwen2_vl.Qwen2VisionTransformer property) (vllm.v1.kv_cache_interface.AttentionSpec attribute) DualChunkFlashAttentionBackend (class in vllm.attention.backends.dual_chunk_flash_attn) DualChunkFlashAttentionImpl (class in vllm.attention.backends.dual_chunk_flash_attn) DualChunkFlashAttentionMetadata (class in vllm.attention.backends.dual_chunk_flash_attn) DualChunkFlashAttentionMetadataBuilder (class in vllm.attention.backends.dual_chunk_flash_attn) DualChunkRotaryEmbedding (class in vllm.model_executor.layers.rotary_embedding) DUMMY (vllm.config.LoadFormat attribute) dummy_data_for_profiling() (vllm.inputs.registry.InputRegistry method) dummy_inputs (vllm.multimodal.profiling.MultiModalProfiler property) dummy_lora_cache() (vllm.lora.worker_manager.WorkerLoRAManager method) dummy_run() (vllm.v1.spec_decode.eagle.EagleProposer method) DUMMY_TOKEN_ID (in module vllm.worker.hpu_model_runner) DummyData (class in vllm.inputs.registry) DummyDecoderData (class in vllm.multimodal.profiling) DummyEncoderData (class in vllm.multimodal.profiling) DummyInputLayerNorm (class in vllm.model_executor.models.eagle) DummyInputsBuilderFactory (class in vllm.multimodal.registry) DummyModelLoader (class in vllm.model_executor.model_loader.dummy_loader) DummyOutputNorm (class in vllm.model_executor.models.eagle) dump_engine_exception() (in module vllm.logging_utils.dump_input) dump_graph() (vllm.compilation.vllm_inductor_pass.VllmInductorPass method) dump_graph_dir (vllm.config.PassConfig attribute) dump_graph_stages (vllm.config.PassConfig attribute) duration (vllm.entrypoints.openai.protocol.TranscriptionResponseVerbose attribute) dynamic_preprocess_h2ovl() (in module vllm.model_executor.models.h2ovl) dynamic_preprocess_internvl() (in module vllm.model_executor.models.internvl) dynamic_preprocess_skyworkr1v() (in module vllm.model_executor.models.skyworkr1v) DynamicNTKScalingRotaryEmbedding (class in vllm.model_executor.layers.rotary_embedding) DYNAMO_AS_IS (vllm.config.CompilationLevel attribute) DYNAMO_ONCE (vllm.config.CompilationLevel attribute) E e2e_latency (vllm.v1.metrics.stats.FinishedRequestStats attribute) e2e_latency_s (vllm.v1.stats.common.RequestStats property) EagerAdaptor (class in vllm.compilation.compiler_interface) EAGLE (class in vllm.model_executor.models.eagle) Eagle3LlamaForCausalLM (class in vllm.model_executor.models.llama_eagle3) EAGLEConfig (class in vllm.transformers_utils.configs.eagle) EagleLlamaForCausalLM (class in vllm.model_executor.models.llama_eagle) EagleProposer (class in vllm.v1.spec_decode.eagle) echo (vllm.entrypoints.openai.protocol.ChatCompletionRequest attribute) (vllm.entrypoints.openai.protocol.CompletionRequest attribute) effective_query_lens (vllm.attention.backends.pallas.PallasMetadata attribute) eight_bit (vllm.attention.ops.triton_flash_attention.MetaData attribute) eight_bit_dtype_torch (vllm.attention.ops.triton_flash_attention.MetaData attribute) eight_bit_dtype_triton (vllm.attention.ops.triton_flash_attention.MetaData attribute) embed() (vllm.entrypoints.llm.LLM method) embed_multimodal() (in module vllm.model_executor.models.utils) embedding (vllm.entrypoints.openai.protocol.EmbeddingResponseData attribute) (vllm.model_executor.models.module_mapping.ModelKeys attribute) (vllm.outputs.EmbeddingOutput attribute) (vllm.outputs.PoolingOutput property) (vllm.outputs.ScoringOutput property) embedding() (in module vllm.entrypoints.openai.api_server) (vllm.model_executor.layers.quantization.base_config.QuantizeMethodBase method) (vllm.model_executor.layers.quantization.gguf.GGUFEmbeddingMethod method) (vllm.model_executor.layers.vocab_parallel_embedding.UnquantizedEmbeddingMethod method) embedding_modules (vllm.model_executor.models.bamba.BambaForCausalLM attribute) (vllm.model_executor.models.commandr.CohereForCausalLM attribute) (vllm.model_executor.models.exaone.ExaoneForCausalLM attribute) (vllm.model_executor.models.gpt_bigcode.GPTBigCodeForCausalLM attribute) (vllm.model_executor.models.granite.GraniteForCausalLM attribute) (vllm.model_executor.models.granitemoe.GraniteMoeForCausalLM attribute) (vllm.model_executor.models.granitemoehybrid.GraniteMoeHybridForCausalLM attribute) (vllm.model_executor.models.granitemoeshared.GraniteMoeSharedForCausalLM attribute) (vllm.model_executor.models.interfaces.SupportsLoRA attribute) (vllm.model_executor.models.jamba.JambaForCausalLM attribute) (vllm.model_executor.models.llama.LlamaForCausalLM attribute) (vllm.model_executor.models.minicpm.MiniCPMForCausalLM attribute) (vllm.model_executor.models.mixtral.MixtralForCausalLM attribute) (vllm.model_executor.models.nemotron.NemotronForCausalLM attribute) (vllm.model_executor.models.nemotron_nas.DeciLMForCausalLM attribute) (vllm.model_executor.models.phimoe.PhiMoEForCausalLM attribute) (vllm.model_executor.models.solar.SolarForCausalLM attribute) (vllm.model_executor.models.transformers.TransformersForCausalLM attribute) embedding_padding_modules (vllm.model_executor.models.bamba.BambaForCausalLM attribute) (vllm.model_executor.models.exaone.ExaoneForCausalLM attribute) (vllm.model_executor.models.granite.GraniteForCausalLM attribute) (vllm.model_executor.models.granitemoe.GraniteMoeForCausalLM attribute) (vllm.model_executor.models.granitemoehybrid.GraniteMoeHybridForCausalLM attribute) (vllm.model_executor.models.granitemoeshared.GraniteMoeSharedForCausalLM attribute) (vllm.model_executor.models.interfaces.SupportsLoRA attribute) (vllm.model_executor.models.jamba.JambaForCausalLM attribute) (vllm.model_executor.models.llama.LlamaForCausalLM attribute) (vllm.model_executor.models.minicpm.MiniCPMForCausalLM attribute) (vllm.model_executor.models.mixtral.MixtralForCausalLM attribute) (vllm.model_executor.models.nemotron.NemotronForCausalLM attribute) (vllm.model_executor.models.nemotron_nas.DeciLMForCausalLM attribute) (vllm.model_executor.models.phimoe.PhiMoEForCausalLM attribute) (vllm.model_executor.models.solar.SolarForCausalLM attribute) (vllm.model_executor.models.transformers.TransformersForCausalLM attribute) EmbeddingAllReduceRMSNormPattern (class in vllm.compilation.sequence_parallelism) EmbeddingChatRequest (class in vllm.entrypoints.openai.protocol) EmbeddingCompletionRequest (class in vllm.entrypoints.openai.protocol) EmbeddingItems (class in vllm.multimodal.parse) EmbeddingMixin (class in vllm.entrypoints.openai.serving_embedding) EmbeddingOutput (class in vllm.outputs) EmbeddingRequest (in module vllm.entrypoints.openai.protocol) EmbeddingRequestOutput (class in vllm.outputs) EmbeddingResponse (class in vllm.entrypoints.openai.protocol) EmbeddingResponseData (class in vllm.entrypoints.openai.protocol) embeddings_indices (vllm.lora.punica_wrapper.punica_base.PunicaWrapperBase property) (vllm.lora.punica_wrapper.punica_tpu.PunicaWrapperTPU property) EmbeddingServeContext (class in vllm.entrypoints.openai.serving_engine) embeds_inputs() (in module vllm.inputs.data) EmbedsInputs (class in vllm.inputs.data) EmbedsPrompt (class in vllm.inputs.data) emitted_tokens (vllm.spec_decode.metrics.SpecDecodeWorkerMetrics attribute) empty() (vllm.worker.hpu_model_runner.PrepareDecodeMetadata class method) (vllm.worker.hpu_model_runner.PreparePromptMetadata class method) empty_bf16() (in module vllm.compilation.activation_quant_fusion) (in module vllm.compilation.fusion) empty_cpu() (vllm.v1.outputs.LogprobsTensors static method) empty_fp32() (in module vllm.compilation.activation_quant_fusion) (in module vllm.compilation.fusion) empty_fp8() (in module vllm.compilation.activation_quant_fusion) EMPTY_MODEL_RUNNER_OUTPUT (in module vllm.v1.outputs) enable() (vllm.core.block.prefix_caching_block.BlockTracker method) ENABLE_ARTIFICIAL_PREEMPT (in module vllm.core.scheduler) enable_chunked_prefill (vllm.config.SchedulerConfig attribute) (vllm.config.SpeculativeConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) enable_expert_parallel (vllm.config.ParallelConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) enable_fusion (vllm.config.PassConfig attribute) enable_hf_transfer() (in module vllm.model_executor.model_loader.weight_utils) enable_kv_cache_events (vllm.config.KVEventsConfig attribute) enable_kv_scales_calculation (vllm.attention.backends.abstract.AttentionMetadata attribute) enable_lora (vllm.engine.arg_utils.EngineArgs attribute) enable_lora_bias (vllm.engine.arg_utils.EngineArgs attribute) enable_noop (vllm.config.PassConfig attribute) enable_output_validation() (vllm.engine.llm_engine.LLMEngine class method) enable_prefix_caching (vllm.config.CacheConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) enable_prompt_adapter (vllm.engine.arg_utils.EngineArgs attribute) enable_prompt_embeds (vllm.config.ModelConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) enable_reasoning (vllm.engine.arg_utils.EngineArgs attribute) enable_sequence_parallelism (vllm.config.PassConfig attribute) enable_sleep_mode (vllm.config.ModelConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) enable_tqdm() (in module vllm.model_executor.model_loader.weight_utils) enable_trace_function_call() (in module vllm.logger) enable_trace_function_call_for_thread() (in module vllm.utils) ENABLE_TUNING (vllm.model_executor.layers.quantization.bitblas.BitBLASLinearMethod attribute) (vllm.model_executor.layers.quantization.kernels.mixed_precision.bitblas.BitBLASLinearKernel attribute) enabled() (vllm.model_executor.custom_op.CustomOp class method) enabled_custom_ops (vllm.config.CompilationConfig attribute) enc_hook() (vllm.v1.serial_utils.MsgpackEncoder method) EncDecMultiModalProcessor (class in vllm.multimodal.processing) encode() (vllm.engine.async_llm_engine.AsyncLLMEngine method) (vllm.engine.multiprocessing.client.MQLLMEngineClient method) (vllm.engine.protocol.EngineClient method) (vllm.entrypoints.llm.LLM method) (vllm.model_executor.models.chameleon.ChameleonVQVAE method) (vllm.model_executor.models.ovis.VisualTokenizer method) (vllm.transformers_utils.processors.deepseek_vl2.DeepseekVLV2Processor method) (vllm.transformers_utils.tokenizer_base.TokenizerBase method) (vllm.transformers_utils.tokenizer_group.TokenizerGroup method) (vllm.transformers_utils.tokenizers.mistral.MistralTokenizer method) (vllm.v1.engine.async_llm.AsyncLLM method) (vllm.v1.serial_utils.MsgpackEncoder method) encode_async() (vllm.transformers_utils.tokenizer_group.TokenizerGroup method) encode_audio_base64() (in module vllm.multimodal.utils) encode_base64() (vllm.multimodal.audio.AudioMediaIO method) (vllm.multimodal.image.ImageEmbeddingMediaIO method) (vllm.multimodal.image.ImageMediaIO method) (vllm.multimodal.video.VideoMediaIO method) encode_hook() (in module vllm.executor.msgspec_utils) encode_image() (vllm.model_executor.models.molmo.MolmoVisionBackbone method) encode_image_base64() (in module vllm.multimodal.utils) encode_into() (vllm.v1.serial_utils.MsgpackEncoder method) encode_one() (vllm.transformers_utils.tokenizer_base.TokenizerBase method) (vllm.transformers_utils.tokenizers.mistral.MistralTokenizer method) encode_tokens() (in module vllm.transformers_utils.tokenizer) encode_video_base64() (in module vllm.multimodal.utils) encoded_vocab (vllm.model_executor.guided_decoding.xgrammar_decoding.TokenizerData attribute) ENCODER (vllm.attention.backends.abstract.AttentionType attribute) encoder (vllm.inputs.data.EncoderDecoderInputs attribute) encoder_cache_size (vllm.config.SchedulerConfig attribute) ENCODER_DECODER (vllm.attention.backends.abstract.AttentionType attribute) encoder_input_positions (vllm.worker.cpu_enc_dec_model_runner.EncoderDecoderModelInputForCPU attribute) (vllm.worker.enc_dec_model_runner.EncoderDecoderModelInput attribute) encoder_input_tokens (vllm.worker.cpu_enc_dec_model_runner.EncoderDecoderModelInputForCPU attribute) (vllm.worker.enc_dec_model_runner.EncoderDecoderModelInput attribute) ENCODER_ONLY (vllm.attention.backends.abstract.AttentionType attribute) encoder_prompt (vllm.inputs.data.ExplicitEncoderDecoderPrompt attribute) (vllm.multimodal.inputs.MultiModalEncDecInputs attribute) (vllm.sequence.SequenceGroup property) encoder_prompt_token_ids (vllm.multimodal.inputs.MultiModalEncDecInputs attribute) (vllm.sequence.SequenceGroup property) encoder_seq_data (vllm.sequence.SequenceGroupMetadata attribute) encoder_seq_lens (vllm.attention.backends.flash_attn.FlashAttentionMetadata attribute) (vllm.attention.backends.rocm_flash_attn.ROCmFlashAttentionMetadata attribute) (vllm.attention.backends.torch_sdpa.TorchSDPAMetadata attribute) (vllm.attention.backends.xformers.XFormersMetadata attribute) encoder_seq_lens_tensor (vllm.attention.backends.flash_attn.FlashAttentionMetadata attribute) (vllm.attention.backends.rocm_flash_attn.ROCmFlashAttentionMetadata attribute) (vllm.attention.backends.torch_sdpa.TorchSDPAMetadata attribute) (vllm.attention.backends.xformers.XFormersMetadata attribute) encoder_seq_start_loc (vllm.attention.backends.flash_attn.FlashAttentionMetadata attribute) (vllm.attention.backends.xformers.XFormersMetadata attribute) encoder_token_type_ids (vllm.multimodal.inputs.MultiModalEncDecInputs attribute) EncoderCacheManager (class in vllm.v1.core.encoder_cache_manager) EncoderDecoderInputs (class in vllm.inputs.data) EncoderDecoderModelInput (class in vllm.worker.enc_dec_model_runner) EncoderDecoderModelInputForCPU (class in vllm.worker.cpu_enc_dec_model_runner) EncoderDecoderModelRunner (class in vllm.worker.enc_dec_model_runner) EncoderSeqId (in module vllm.core.block_manager) Encoding (class in vllm.transformers_utils.tokenizers.mistral) encoding_format (vllm.entrypoints.openai.protocol.EmbeddingChatRequest attribute) (vllm.entrypoints.openai.protocol.EmbeddingCompletionRequest attribute) encryption_keyfile (vllm.model_executor.model_loader.tensorizer.TensorizerArgs attribute) (vllm.model_executor.model_loader.tensorizer.TensorizerConfig attribute) end (vllm.entrypoints.openai.protocol.StructuralTag attribute) (vllm.entrypoints.openai.protocol.TranscriptionSegment attribute) (vllm.entrypoints.openai.protocol.TranscriptionWord attribute) end() (vllm.multimodal.processing.PromptIndexTargets static method) end_and_log() (vllm.compilation.vllm_inductor_pass.VllmInductorPass method) end_idx (vllm.multimodal.processing.PromptTargetMatch property) end_monitoring_torch_compile() (in module vllm.compilation.monitor) END_SEQ (vllm.distributed.kv_events.ZmqEventPublisher attribute) end_token (vllm.reasoning.deepseek_r1_reasoning_parser.DeepSeekR1ReasoningParser attribute) end_token_id (vllm.reasoning.deepseek_r1_reasoning_parser.DeepSeekR1ReasoningParser attribute) endpoint (vllm.config.KVEventsConfig attribute) enforce_eager (vllm.config.ModelConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) engine (in module vllm.entrypoints.api_server) engine_client() (in module vllm.entrypoints.openai.api_server) ENGINE_CONTEXT (vllm.usage.usage_lib.UsageContext attribute) ENGINE_CORE_DEAD (vllm.v1.engine.core.EngineCoreProc attribute) engine_core_process_stats (vllm.v1.stats.common.EngineCoreStatsSnapshot attribute) engine_dead (vllm.v1.engine.core_client.BackgroundResources attribute) ENGINE_DEAD_ERROR() (in module vllm.engine.multiprocessing) engine_id (vllm.config.KVTransferConfig attribute) (vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector.NixlAgentMetadata attribute) engine_index (vllm.v1.engine.EngineCoreOutputs attribute) ENGINE_ITERATION_TIMEOUT_S (in module vllm.engine.async_llm_engine) engine_prompts (vllm.entrypoints.openai.serving_engine.RequestProcessingMixin attribute) engine_step() (vllm.engine.async_llm_engine.AsyncLLMEngine method) (vllm.engine.multiprocessing.engine.MQLLMEngine method) EngineArgs (class in vllm.engine.arg_utils) EngineClient (class in vllm.engine.protocol) EngineCore (class in vllm.v1.engine.core) EngineCoreClient (class in vllm.v1.engine.core_client) EngineCoreEvent (class in vllm.v1.engine) EngineCoreEventType (class in vllm.v1.engine) EngineCoreOutput (class in vllm.v1.engine) EngineCoreOutputs (class in vllm.v1.engine) EngineCoreProc (class in vllm.v1.engine.core) EngineCoreProcessStats (class in vllm.v1.stats.common) EngineCoreRequest (class in vllm.v1.engine) EngineCoreRequestType (class in vllm.v1.engine) EngineCoreStatsSnapshot (class in vllm.v1.stats.common) EngineDeadError EngineGenerateError enqueue() (vllm.distributed.device_communicators.shm_broadcast.MessageQueue method) ensure_alive() (vllm.v1.engine.core_client.MPClient method) ensure_divisibility() (in module vllm.distributed.utils) ensure_kv_transfer_initialized() (in module vllm.distributed.kv_transfer.kv_transfer_state) ensure_model_parallel_initialized() (in module vllm.distributed.parallel_state) ensure_num_empty_slots() (vllm.core.block.block_table.BlockTable method) entries (vllm.distributed.utils.StatelessProcessGroup attribute) env_info_fmt (in module vllm.collect_env) environment_variables (in module vllm.envs) eos_id (vllm.transformers_utils.processors.deepseek_vl2.DeepseekVLV2Processor property) eos_token_id (vllm.transformers_utils.tokenizer_base.TokenizerBase property) (vllm.transformers_utils.tokenizers.mistral.MistralTokenizer property) (vllm.v1.engine.EngineCoreRequest attribute) error (vllm.benchmarks.endpoint_request_func.RequestFuncOutput attribute) (vllm.entrypoints.openai.protocol.BatchRequestOutput attribute) error_loader() (in module vllm.model_executor.layers.quantization.hqq_marlin) errored (vllm.engine.async_llm_engine.AsyncLLMEngine property) (vllm.engine.multiprocessing.client.MQLLMEngineClient property) (vllm.engine.protocol.EngineClient property) (vllm.v1.engine.async_llm.AsyncLLM property) ErrorResponse (class in vllm.entrypoints.openai.protocol) escape_ebnf_string() (vllm.model_executor.guided_decoding.xgrammar_decoding.GrammarConfig static method) estimate_max_model_len() (in module vllm.v1.core.kv_cache_utils) EVA2CLIPAttention (class in vllm.model_executor.models.glm4v) EVA2CLIPGLU (class in vllm.model_executor.models.glm4v) EVA2CLIPMLP (class in vllm.model_executor.models.glm4v) EVA2CLIPModel (class in vllm.model_executor.models.glm4v) EVA2CLIPPatchEmbedding (class in vllm.model_executor.models.glm4v) EVA2CLIPTransformer (class in vllm.model_executor.models.glm4v) EVA2CLIPTransformerLayer (class in vllm.model_executor.models.glm4v) evaluate_guards_expression() (vllm.compilation.compiler_interface.AlwaysHitShapeEnv method) event_arg_repr() (in module vllm.profiler.utils) event_has_module() (in module vllm.profiler.utils) event_is_torch_op() (in module vllm.profiler.utils) event_module_repr() (in module vllm.profiler.utils) event_torch_op_repr() (in module vllm.profiler.utils) event_torch_op_stack_trace() (in module vllm.profiler.utils) EventBatch (class in vllm.distributed.kv_events) EventPublisher (class in vllm.distributed.kv_events) EventPublisherFactory (class in vllm.distributed.kv_events) events (vllm.distributed.kv_events.EventBatch attribute) (vllm.distributed.kv_events.KVEventBatch attribute) (vllm.v1.engine.EngineCoreOutput attribute) evict() (vllm.core.evictor.Evictor method) (vllm.core.evictor.LRUEvictor method) EvictionPolicy (class in vllm.core.evictor) Evictor (class in vllm.core.evictor) EXAONE_PRETRAINED_CONFIG_ARCHIVE_MAP (in module vllm.transformers_utils.configs.exaone) ExaoneAttention (class in vllm.model_executor.models.exaone) ExaoneBlockAttention (class in vllm.model_executor.models.exaone) ExaoneConfig (class in vllm.transformers_utils.configs.exaone) ExaoneDecoderLayer (class in vllm.model_executor.models.exaone) ExaoneForCausalLM (class in vllm.model_executor.models.exaone) ExaoneGatedMLP (class in vllm.model_executor.models.exaone) ExaoneModel (class in vllm.model_executor.models.exaone) exception (vllm.engine.multiprocessing.RPCError attribute) (vllm.executor.multiproc_worker_utils.Result attribute) execute_dummy_batch() (vllm.v1.engine.core.EngineCore method) (vllm.v1.engine.core_client.EngineCoreClient method) (vllm.v1.engine.core_client.InprocClient method) (vllm.v1.engine.core_client.SyncMPClient method) (vllm.v1.worker.gpu_worker.Worker method) execute_dummy_batch_async() (vllm.v1.engine.core_client.AsyncMPClient method) (vllm.v1.engine.core_client.EngineCoreClient method) execute_method() (vllm.executor.multiproc_worker_utils.ProcessWorkerWrapper method) (vllm.worker.worker_base.WorkerWrapperBase method) execute_method_async() (vllm.executor.multiproc_worker_utils.ProcessWorkerWrapper method) execute_model() (vllm.executor.executor_base.DistributedExecutorBase method) (vllm.executor.executor_base.ExecutorBase method) (vllm.executor.ray_distributed_executor.RayDistributedExecutor method) (vllm.spec_decode.draft_model_runner.TP1DraftModelRunner method) (vllm.spec_decode.proposer_worker_base.NonLLMProposerWorkerBase method) (vllm.spec_decode.smaller_tp_proposer_worker.SmallerTpProposerWorker method) (vllm.spec_decode.spec_decode_worker.SpecDecodeWorker method) (vllm.v1.engine.core.EngineCore method) (vllm.v1.executor.abstract.Executor method) (vllm.v1.executor.multiproc_executor.MultiprocExecutor method) (vllm.v1.executor.ray_distributed_executor.RayDistributedExecutor method) (vllm.v1.worker.gpu_model_runner.GPUModelRunner method) (vllm.v1.worker.gpu_worker.Worker method) (vllm.v1.worker.tpu_model_runner.TPUModelRunner method) (vllm.v1.worker.tpu_worker.TPUWorker method) (vllm.worker.cpu_enc_dec_model_runner.CPUEncoderDecoderModelRunner method) (vllm.worker.cpu_model_runner.CPUModelRunner method) (vllm.worker.cpu_pooling_model_runner.CPUPoolingModelRunner method) (vllm.worker.enc_dec_model_runner.EncoderDecoderModelRunner method) (vllm.worker.hpu_model_runner.HPUModelRunner method) (vllm.worker.hpu_worker.HPUWorker method) (vllm.worker.model_runner.ModelRunner method) (vllm.worker.model_runner_base.ModelRunnerBase method) (vllm.worker.multi_step_model_runner.MultiStepModelRunner method) (vllm.worker.multi_step_neuron_model_runner.MultiStepNeuronModelRunner method) (vllm.worker.multi_step_neuronx_distributed_model_runner.MultiStepNeuronxDistributedModelRunner method) (vllm.worker.neuron_model_runner.NeuronModelRunner method) (vllm.worker.neuronx_distributed_model_runner.NeuronxDistributedModelRunner method) (vllm.worker.pooling_model_runner.PoolingModelRunner method) (vllm.worker.tpu_model_runner.TPUModelRunner method) (vllm.worker.worker_base.DelegateWorkerBase method) (vllm.worker.worker_base.LocalOrDistributedWorkerBase method) (vllm.worker.worker_base.WorkerBase method) (vllm.worker.xpu_model_runner.XPUModelRunner method) execute_model_async() (vllm.executor.executor_base.DistributedExecutorBase method) (vllm.executor.executor_base.ExecutorBase method) (vllm.executor.ray_distributed_executor.RayDistributedExecutor method) EXECUTE_MODEL_TIMEOUT_S (in module vllm.v1.executor.multiproc_executor) execute_worker() (vllm.worker.cpu_worker.CPUWorker method) (vllm.worker.hpu_worker.HPUWorker method) (vllm.worker.neuron_worker.NeuronWorker method) (vllm.worker.tpu_worker.TPUWorker method) (vllm.worker.worker.Worker method) (vllm.worker.worker_base.LocalOrDistributedWorkerBase method) ExecuteModelRequest (class in vllm.sequence) ExecutionMode (class in vllm.worker.tpu_model_runner) Executor (class in vllm.v1.executor.abstract) EXECUTOR_FAILED (vllm.v1.engine.EngineCoreRequestType attribute) ExecutorBase (class in vllm.executor.executor_base) ExecutorWithExternalLauncher (class in vllm.executor.uniproc_executor) (class in vllm.v1.executor.abstract) ExllamaLinearKernel (class in vllm.model_executor.layers.quantization.kernels.mixed_precision.exllama) ExllamaState (class in vllm.model_executor.layers.quantization.gptq) expand() (vllm.lora.punica_wrapper.punica_tpu.PunicaWrapperTPU method) expand_batch_to_tokens() (in module vllm.v1.sample.rejection_sampler) expand_kernel() (in module vllm.v1.sample.rejection_sampler) expand_slice() (vllm.lora.punica_wrapper.punica_tpu.PunicaWrapperTPU method) expand_weights() (in module vllm.model_executor.layers.fused_moe.rocm_aiter_fused_moe) expand_with_bonus_tokens() (vllm.sequence.HiddenStates method) expect() (vllm.compilation.counter.CompilationCounter method) expected_output_len (vllm.benchmarks.datasets.SampleRequest attribute) ExpertsInt8Config (class in vllm.model_executor.layers.quantization.experts_int8) ExpertsInt8MoEMethod (class in vllm.model_executor.layers.quantization.experts_int8) expire_data() (vllm.distributed.utils.StatelessProcessGroup method) ExplicitEncoderDecoderPrompt (class in vllm.inputs.data) exponent (vllm.scalar_type.ScalarType attribute) export_handle() (vllm.distributed.device_communicators.shm_broadcast.MessageQueue method) export_model_stats_table_csv() (vllm.profiler.layerwise_profile.LayerwiseProfileResults method) export_summary_stats_table_csv() (vllm.profiler.layerwise_profile.LayerwiseProfileResults method) exported_functions (vllm.distributed.device_communicators.cuda_wrapper.CudaRTLibrary attribute) (vllm.distributed.device_communicators.pynccl_wrapper.NCCLLibrary attribute) ext_hook() (vllm.v1.serial_utils.MsgpackDecoder method) EXTD_RANGE_MAX_MIN (vllm.scalar_type.NanRepr attribute) extend() (vllm.multimodal.base.MultiModalPlaceholderMap method) (vllm.v1.utils.ConstantList method) extend_pe() (vllm.model_executor.models.phi4mm_utils.AbsolutePositionalEncoding method) extra_args (vllm.sampling_params.SamplingParams attribute) extra_body (vllm.benchmarks.endpoint_request_func.RequestFuncInput attribute) extra_config (vllm.config.TokenizerPoolConfig attribute) extra_groups_for_head_shards() (in module vllm.model_executor.layers.mamba.mamba_mixer2) extra_hash (vllm.core.block.cpu_gpu_block_allocator.NullBlock property) (vllm.core.block.interfaces.Block property) (vllm.core.block.naive_block.NaiveBlock property) (vllm.core.block.prefix_caching_block.PrefixCachingBlock property) extra_hash() (vllm.sequence.Sequence method) extra_keys (vllm.v1.core.kv_cache_utils.BlockHashType attribute) extra_multi_layer_output_idxs (vllm.model_executor.models.phi4mm_audio.ConformerEncoder attribute) extra_repr() (vllm.attention.layer.Attention method) (vllm.lora.layers.LinearScalingRotaryEmbeddingWithLoRA method) (vllm.model_executor.layers.activation.GeluAndMul method) (vllm.model_executor.layers.fused_moe.layer.FusedMoE method) (vllm.model_executor.layers.layernorm.RMSNorm method) (vllm.model_executor.layers.linear.ColumnParallelLinear method) (vllm.model_executor.layers.linear.QKVCrossParallelLinear method) (vllm.model_executor.layers.linear.ReplicatedLinear method) (vllm.model_executor.layers.linear.RowParallelLinear method) (vllm.model_executor.layers.logits_processor.LogitsProcessor method) (vllm.model_executor.layers.rotary_embedding.DualChunkRotaryEmbedding method) (vllm.model_executor.layers.rotary_embedding.RotaryEmbedding method) (vllm.model_executor.layers.vocab_parallel_embedding.VocabParallelEmbedding method) (vllm.model_executor.models.mllama.MllamaTextRMSNorm method) (vllm.model_executor.models.moonvit.Rope2DPosEmb method) extra_special_tokens() (vllm.transformers_utils.processors.ovis.OvisProcessor method) extra_vocab_size (vllm.lora.lora.LoRALayerWeights property) (vllm.lora.models.LoRAModel property) extract_content_ids() (vllm.reasoning.abs_reasoning_parsers.ReasoningParser method) (vllm.reasoning.deepseek_r1_reasoning_parser.DeepSeekR1ReasoningParser method) (vllm.reasoning.qwen3_reasoning_parser.Qwen3ReasoningParser method) extract_feature() (vllm.model_executor.models.internvl.InternVLChatModel method) (vllm.model_executor.models.skyworkr1v.SkyworkR1VChatModel method) extract_intermediate_diff() (in module vllm.entrypoints.openai.tool_parsers.utils) extract_layer_index() (in module vllm.model_executor.models.utils) extract_previous_hidden_states() (in module vllm.worker.worker_base) extract_reasoning_content() (vllm.reasoning.abs_reasoning_parsers.ReasoningParser method) (vllm.reasoning.deepseek_r1_reasoning_parser.DeepSeekR1ReasoningParser method) (vllm.reasoning.granite_reasoning_parser.GraniteReasoningParser method) (vllm.reasoning.qwen3_reasoning_parser.Qwen3ReasoningParser method) extract_reasoning_content_streaming() (vllm.reasoning.abs_reasoning_parsers.ReasoningParser method) (vllm.reasoning.deepseek_r1_reasoning_parser.DeepSeekR1ReasoningParser method) (vllm.reasoning.granite_reasoning_parser.GraniteReasoningParser method) (vllm.reasoning.qwen3_reasoning_parser.Qwen3ReasoningParser method) extract_states() (vllm.model_executor.layers.pooler.AllPool method) (vllm.model_executor.layers.pooler.CLSPool method) (vllm.model_executor.layers.pooler.LastPool method) (vllm.model_executor.layers.pooler.MeanPool method) (vllm.model_executor.layers.pooler.SimplePooler method) (vllm.model_executor.layers.pooler.StepPool method) extract_tool_call_required_streaming() (vllm.entrypoints.openai.serving_chat.OpenAIServingChat method) extract_tool_calls() (vllm.entrypoints.openai.tool_parsers.abstract_tool_parser.ToolParser method) (vllm.entrypoints.openai.tool_parsers.deepseekv3_tool_parser.DeepSeekV3ToolParser method) (vllm.entrypoints.openai.tool_parsers.granite_20b_fc_tool_parser.Granite20bFCToolParser method) (vllm.entrypoints.openai.tool_parsers.granite_tool_parser.GraniteToolParser method) (vllm.entrypoints.openai.tool_parsers.hermes_tool_parser.Hermes2ProToolParser method) (vllm.entrypoints.openai.tool_parsers.internlm2_tool_parser.Internlm2ToolParser method) (vllm.entrypoints.openai.tool_parsers.jamba_tool_parser.JambaToolParser method) (vllm.entrypoints.openai.tool_parsers.llama_tool_parser.Llama3JsonToolParser method) (vllm.entrypoints.openai.tool_parsers.mistral_tool_parser.MistralToolParser method) (vllm.entrypoints.openai.tool_parsers.phi4mini_tool_parser.Phi4MiniJsonToolParser method) (vllm.entrypoints.openai.tool_parsers.pythonic_tool_parser.PythonicToolParser method) extract_tool_calls_streaming() (vllm.entrypoints.openai.tool_parsers.abstract_tool_parser.ToolParser method) (vllm.entrypoints.openai.tool_parsers.deepseekv3_tool_parser.DeepSeekV3ToolParser method) (vllm.entrypoints.openai.tool_parsers.granite_20b_fc_tool_parser.Granite20bFCToolParser method) (vllm.entrypoints.openai.tool_parsers.granite_tool_parser.GraniteToolParser method) (vllm.entrypoints.openai.tool_parsers.hermes_tool_parser.Hermes2ProToolParser method) (vllm.entrypoints.openai.tool_parsers.internlm2_tool_parser.Internlm2ToolParser method) (vllm.entrypoints.openai.tool_parsers.jamba_tool_parser.JambaToolParser method) (vllm.entrypoints.openai.tool_parsers.llama_tool_parser.Llama3JsonToolParser method) (vllm.entrypoints.openai.tool_parsers.mistral_tool_parser.MistralToolParser method) (vllm.entrypoints.openai.tool_parsers.phi4mini_tool_parser.Phi4MiniJsonToolParser method) (vllm.entrypoints.openai.tool_parsers.pythonic_tool_parser.PythonicToolParser method) extract_trace_context() (in module vllm.tracing) extract_trace_headers() (in module vllm.tracing) ExtractedToolCallInformation (class in vllm.entrypoints.openai.protocol) F F (in module vllm.utils) FAILURE (vllm.v1.executor.multiproc_executor.WorkerProc.ResponseStatus attribute) failure_message (vllm.v1.engine.UtilityOutput attribute) FailureCallback (in module vllm.v1.executor.abstract) Fairseq2LlamaForCausalLM (class in vllm.model_executor.models.fairseq2_llama) FalconAttention (class in vllm.model_executor.models.falcon) FalconConfig (in module vllm.model_executor.models.falcon) FalconDecoderLayer (class in vllm.model_executor.models.falcon) FalconForCausalLM (class in vllm.model_executor.models.falcon) FalconMLP (class in vllm.model_executor.models.falcon) FalconModel (class in vllm.model_executor.models.falcon) fall_back_to_pt (vllm.model_executor.model_loader.default_loader.DefaultModelLoader.Source attribute) fall_back_to_pt_during_load (vllm.model_executor.models.deepseek.DeepseekModel attribute) (vllm.model_executor.models.deepseek_v2.DeepseekV2Model attribute) (vllm.model_executor.models.granitemoe.GraniteMoeForCausalLM attribute) (vllm.model_executor.models.granitemoeshared.GraniteMoeSharedForCausalLM attribute) (vllm.model_executor.models.grok1.Grok1ForCausalLM attribute) (vllm.model_executor.models.mixtral.MixtralForCausalLM attribute) (vllm.model_executor.models.mixtral_quant.MixtralForCausalLM attribute) (vllm.model_executor.models.phimoe.PhiMoEForCausalLM attribute) (vllm.model_executor.models.qwen2_moe.Qwen2MoeForCausalLM attribute) (vllm.model_executor.models.qwen3_moe.Qwen3MoeForCausalLM attribute) fast_topk() (in module vllm.model_executor.models.utils) FastGELU (class in vllm.model_executor.layers.activation) FastIncrementalDetokenizer (class in vllm.v1.engine.detokenizer) FASTSAFETENSORS (vllm.config.LoadFormat attribute) fastsafetensors_weights_iterator() (in module vllm.model_executor.model_loader.weight_utils) FatreluAndMul (class in vllm.model_executor.layers.activation) FBGEMMFp8Config (class in vllm.model_executor.layers.quantization.fbgemm_fp8) FBGEMMFp8LinearMethod (class in vllm.model_executor.layers.quantization.fbgemm_fp8) feat_is_patch (vllm.model_executor.models.molmo.MolmoImageInputs attribute) feature_attention_mask (vllm.model_executor.models.qwen2_audio.Qwen2AudioInputs attribute) feed_forward_chunk() (vllm.model_executor.models.blip2.Blip2QFormerLayer method) feed_forward_chunk_query() (vllm.model_executor.models.blip2.Blip2QFormerLayer method) FeedForward (class in vllm.model_executor.models.phi4mm_utils) (class in vllm.model_executor.models.pixtral) fetch_audio (in module vllm.multimodal.utils) fetch_audio() (vllm.multimodal.utils.MediaConnector method) fetch_audio_async() (vllm.multimodal.utils.MediaConnector method) fetch_image (in module vllm.multimodal.utils) fetch_image() (vllm.multimodal.utils.MediaConnector method) fetch_image_async() (vllm.multimodal.utils.MediaConnector method) fetch_image_embedding() (vllm.multimodal.utils.MediaConnector method) fetch_video (in module vllm.multimodal.utils) fetch_video() (vllm.multimodal.utils.MediaConnector method) fetch_video_async() (vllm.multimodal.utils.MediaConnector method) ffn_config_defaults (in module vllm.transformers_utils.configs.mpt) field (vllm.multimodal.inputs.MultiModalFieldElem attribute) field_names (vllm.entrypoints.openai.protocol.ChatCompletionLogProbsContent attribute) (vllm.entrypoints.openai.protocol.OpenAIBaseModel attribute) file (vllm.entrypoints.openai.protocol.TranscriptionRequest attribute) file_exists() (in module vllm.transformers_utils.config) file_or_path_exists() (in module vllm.transformers_utils.config) filename (vllm.assets.audio.AudioAsset property) (vllm.assets.video.VideoAsset property) fileno() (vllm.v1.utils.BackgroundProcHandle method) FilesystemResolver (class in vllm.plugins.lora_resolvers.filesystem_resolver) fill_bitmask() (vllm.v1.structured_output.backend_guidance.GuidanceGrammar method) (vllm.v1.structured_output.backend_types.StructuredOutputGrammar method) (vllm.v1.structured_output.backend_xgrammar.XgrammarGrammar method) filter_duplicate_safetensors_files() (in module vllm.model_executor.model_loader.weight_utils) filter_files_not_needed_for_inference() (in module vllm.model_executor.model_loader.weight_utils) FINAL_ONLY (vllm.sampling_params.RequestOutputKind attribute) final_res_batch (vllm.entrypoints.openai.serving_engine.ResponseGenerationMixin attribute) final_usage_info (vllm.entrypoints.openai.protocol.RequestResponseMetadata attribute) find_all_indices() (in module vllm.entrypoints.openai.tool_parsers.utils) find_auto_fn() (in module vllm.compilation.fx_utils) (vllm.compilation.multi_output_match.MultiOutputMatch method) find_auto_fn_maybe() (in module vllm.compilation.fx_utils) find_cached_blocks_prefix() (vllm.core.block.cpu_gpu_block_allocator.CpuGpuBlockAllocator method) (vllm.core.block.interfaces.BlockAllocator method) (vllm.core.block.interfaces.DeviceAwareBlockAllocator method) (vllm.core.block.naive_block.NaiveBlockAllocator method) (vllm.core.block.prefix_caching_block.PrefixCachingBlockAllocator method) find_closest_aspect_ratio() (in module vllm.model_executor.models.internvl) (in module vllm.model_executor.models.skyworkr1v) find_common_prefix() (in module vllm.entrypoints.openai.tool_parsers.utils) find_common_suffix() (in module vllm.entrypoints.openai.tool_parsers.utils) find_getitem() (in module vllm.compilation.fx_utils) find_getitem_maybe() (in module vllm.compilation.fx_utils) find_library() (in module vllm.utils) find_loaded_library() (in module vllm.device_allocator.cumem) (in module vllm.distributed.device_communicators.cuda_wrapper) find_longest_cache_hit() (vllm.v1.core.single_type_kv_cache_manager.FullAttentionManager method) (vllm.v1.core.single_type_kv_cache_manager.SingleTypeKVCacheManager method) (vllm.v1.core.single_type_kv_cache_manager.SlidingWindowManager method) find_matched_target() (in module vllm.model_executor.layers.quantization.compressed_tensors.utils) find_mm_placeholders() (in module vllm.multimodal.processing) find_nccl_library() (in module vllm.utils) find_process_using_port() (in module vllm.utils) find_specified_fn() (in module vllm.compilation.fx_utils) find_specified_fn_maybe() (in module vllm.compilation.fx_utils) find_text_matches() (in module vllm.multimodal.processing) find_token_matches() (in module vllm.multimodal.processing) find_tokenizer_file() (in module vllm.transformers_utils.tokenizers.mistral) finish() (vllm.engine.async_llm_engine.AsyncStream method) finish_measurements() (vllm.worker.hpu_model_runner.HPUModelRunner method) (vllm.worker.hpu_worker.HPUWorker method) finish_reason (vllm.beam_search.BeamSearchSequence attribute) (vllm.entrypoints.openai.protocol.ChatCompletionResponseChoice attribute) (vllm.entrypoints.openai.protocol.ChatCompletionResponseStreamChoice attribute) (vllm.entrypoints.openai.protocol.CompletionResponseChoice attribute) (vllm.entrypoints.openai.protocol.CompletionResponseStreamChoice attribute) (vllm.entrypoints.openai.protocol.TranscriptionResponseStreamChoice attribute) (vllm.outputs.CompletionOutput attribute) (vllm.v1.engine.EngineCoreOutput attribute) (vllm.v1.metrics.stats.FinishedRequestStats attribute) (vllm.v1.stats.common.RequestStats attribute) (vllm.v1.stats.common.RequestStatsUpdate attribute) FINISH_REASON_STRINGS (in module vllm.v1.engine) finish_request() (vllm.v1.metrics.stats.LoRARequestStates method) finish_requests() (vllm.v1.core.sched.interface.SchedulerInterface method) (vllm.v1.core.sched.scheduler.Scheduler method) finish_seq() (vllm.sequence.SequenceGroupBase method) finish_step() (vllm.sequence.SequenceGroupMetadata method) finished (vllm.engine.async_llm_engine.AsyncStream property) (vllm.v1.engine.EngineCoreOutput property) FINISHED (vllm.v1.stats.common.RequestStatsUpdate.Type attribute) finished() (vllm.outputs.CompletionOutput method) FINISHED_ABORTED (vllm.sequence.SequenceStatus attribute) (vllm.v1.request.RequestStatus attribute) FINISHED_IGNORED (vllm.sequence.SequenceStatus attribute) (vllm.v1.request.RequestStatus attribute) FINISHED_LENGTH_CAPPED (vllm.sequence.SequenceStatus attribute) (vllm.v1.request.RequestStatus attribute) finished_reason_requests (vllm.engine.metrics_types.Stats attribute) finished_recving (vllm.v1.outputs.ModelRunnerOutput attribute) finished_req_ids (vllm.v1.core.sched.output.SchedulerOutput attribute) finished_reqs (vllm.sequence.SequenceGroupBase attribute) finished_requests (vllm.v1.engine.EngineCoreOutputs attribute) finished_requests_ids (vllm.sequence.ExecuteModelRequest attribute) (vllm.worker.model_runner.ModelInputForGPU attribute) finished_sending (vllm.v1.outputs.ModelRunnerOutput attribute) FINISHED_STOPPED (vllm.sequence.SequenceStatus attribute) (vllm.v1.request.RequestStatus attribute) finished_time (vllm.sequence.RequestMetrics attribute) finished_ts_s (vllm.v1.stats.common.RequestStats attribute) FinishedRequestStats (class in vllm.v1.metrics.stats) FinishReason (class in vllm.v1.engine) first_rank (vllm.distributed.parallel_state.GroupCoordinator property) first_scheduled_time (vllm.sequence.RequestMetrics attribute) first_token_latency_s (vllm.v1.stats.common.RequestStats property) first_token_time (vllm.sequence.RequestMetrics attribute) first_token_ts (vllm.v1.metrics.stats.RequestStateStats attribute) first_token_ts_s (vllm.v1.stats.common.RequestStats attribute) FixFunctionalizationPass (class in vllm.compilation.fix_functionalization) flag_sharded_weights() (vllm.model_executor.models.fairseq2_llama.Fairseq2LlamaForCausalLM method) flash_attn_varlen_nkifunc() (in module vllm.attention.ops.nki_flash_attn) flash_mla_with_kvcache() (in module vllm._custom_ops) (in module vllm.attention.ops.flashmla) flash_paged_attention() (in module vllm.attention.ops.nki_flash_attn) FlashAttentionBackend (class in vllm.attention.backends.flash_attn) (class in vllm.v1.attention.backends.flash_attn) FlashAttentionImpl (class in vllm.attention.backends.flash_attn) (class in vllm.v1.attention.backends.flash_attn) FlashAttentionMetadata (class in vllm.attention.backends.flash_attn) (class in vllm.v1.attention.backends.flash_attn) FlashAttentionMetadata.LocalAttentionMetadata (class in vllm.v1.attention.backends.flash_attn) FlashAttentionMetadataBuilder (class in vllm.attention.backends.flash_attn) (class in vllm.v1.attention.backends.flash_attn) FLASHINFER_KV_CACHE_LAYOUT (in module vllm.attention.backends.flashinfer) flashinfer_sample() (in module vllm.v1.sample.ops.topk_topp_sampler) FLASHINFER_WORKSPACE_BUFFER_SIZE (in module vllm.v1.attention.backends.flashinfer) FlashInferBackend (class in vllm.attention.backends.flashinfer) (class in vllm.v1.attention.backends.flashinfer) FlashInferImpl (class in vllm.attention.backends.flashinfer) (class in vllm.v1.attention.backends.flashinfer) FlashInferMetadata (class in vllm.attention.backends.flashinfer) (class in vllm.v1.attention.backends.flashinfer) FlashInferMetadataBuilder (class in vllm.attention.backends.flashinfer) (class in vllm.v1.attention.backends.flashinfer) FlashInferState (class in vllm.attention.backends.flashinfer) FlashMLABackend (class in vllm.attention.backends.flashmla) (class in vllm.v1.attention.backends.mla.flashmla) FlashMLADecodeMetadata (class in vllm.v1.attention.backends.mla.flashmla) FlashMLAImpl (class in vllm.attention.backends.flashmla) (class in vllm.v1.attention.backends.mla.flashmla) FlashMLAMetadata (class in vllm.attention.backends.flashmla) (class in vllm.v1.attention.backends.mla.flashmla) FlashMLAMetadataBuilder (class in vllm.attention.backends.flashmla) (class in vllm.v1.attention.backends.mla.flashmla) FlashMLAState (class in vllm.attention.backends.flashmla) flat() (vllm.multimodal.inputs.MultiModalFieldConfig static method) flat_data (vllm.model_executor.models.fuyu.FuyuImagePatchInputs attribute) (vllm.model_executor.models.mllama4.Llama4ImagePatchInputs attribute) (vllm.model_executor.models.ovis.OvisImagePatchInputs attribute) flat_encoder_result() (vllm.model_executor.models.mllama.MllamaForConditionalGeneration method) flat_from_sizes() (vllm.multimodal.inputs.MultiModalFieldConfig static method) flatten() (in module vllm.worker.hpu_model_runner) flatten_2d_lists() (in module vllm.utils) flatten_bn() (in module vllm.model_executor.models.utils) FlexibleArgumentParser (class in vllm.utils) float16 (vllm.scalar_type.scalar_types attribute) float16_e5m10 (vllm.scalar_type.scalar_types attribute) float16_e8m7 (vllm.scalar_type.scalar_types attribute) FLOAT4_E2M1_MAX (in module vllm.model_executor.layers.fused_moe.cutlass_moe) float4_e2m1f (vllm.scalar_type.scalar_types attribute) float6_e3m2f (vllm.scalar_type.scalar_types attribute) FLOAT8_E4M3_MAX (in module vllm.model_executor.layers.fused_moe.cutlass_moe) float8_e4m3fn (vllm.scalar_type.scalar_types attribute) float8_e5m2 (vllm.scalar_type.scalar_types attribute) float_() (vllm.scalar_type.ScalarType class method) float_IEEE754() (vllm.scalar_type.ScalarType class method) Florence2DummyInputsBuilder (class in vllm.model_executor.models.florence2) Florence2ForConditionalGeneration (class in vllm.model_executor.models.florence2) Florence2ImagePixelInputs (class in vllm.model_executor.models.florence2) Florence2LanguageForConditionalGeneration (class in vllm.model_executor.models.florence2) Florence2LanguageModel (class in vllm.model_executor.models.florence2) Florence2MultiModalProcessor (class in vllm.model_executor.models.florence2) Florence2ProcessingInfo (class in vllm.model_executor.models.florence2) forced_attn_backend (in module vllm.attention.selector) fork() (vllm.core.block.block_table.BlockTable method) (vllm.core.block.cpu_gpu_block_allocator.CpuGpuBlockAllocator method) (vllm.core.block.interfaces.BlockAllocator method) (vllm.core.block.interfaces.DeviceAwareBlockAllocator method) (vllm.core.block.naive_block.NaiveBlockAllocator method) (vllm.core.block.prefix_caching_block.PrefixCachingBlockAllocator method) (vllm.core.block_manager.SelfAttnBlockSpaceManager method) (vllm.core.interfaces.BlockSpaceManager method) (vllm.core.placeholder_block_space_manager.PlaceholderBlockSpaceManager method) (vllm.sequence.Sequence method) fork_seq() (vllm.core.scheduler.Scheduler method) format() (vllm.logging_utils.formatter.NewLineFormatter method) forward() (vllm.attention.backends.abstract.AttentionImpl method) (vllm.attention.backends.abstract.AttentionLayer method) (vllm.attention.backends.abstract.MLAAttentionImpl method) (vllm.attention.backends.blocksparse_attn.BlocksparseFlashAttentionImpl method) (vllm.attention.backends.dual_chunk_flash_attn.DualChunkFlashAttentionImpl method) (vllm.attention.backends.flash_attn.FlashAttentionImpl method) (vllm.attention.backends.flashinfer.FlashInferImpl method) (vllm.attention.backends.hpu_attn.HPUAttentionImpl method) (vllm.attention.backends.ipex_attn.IpexAttnBackendImpl method) (vllm.attention.backends.mla.common.MLACommonImpl method) (vllm.attention.backends.pallas.PallasAttentionBackendImpl method) (vllm.attention.backends.placeholder_attn.PlaceholderAttentionImpl method) (vllm.attention.backends.rocm_flash_attn.ROCmFlashAttentionImpl method) (vllm.attention.backends.torch_sdpa.TorchSDPABackendImpl method) (vllm.attention.backends.xformers.XFormersImpl method) (vllm.attention.layer.Attention method) (vllm.attention.layer.MultiHeadAttention method) (vllm.attention.ops.blocksparse_attention.interface.LocalStridedBlockSparseAttn method) (vllm.compilation.wrapper.TorchCompileWrapperWithCustomDispatcher method) (vllm.lora.layers.ColumnParallelLinearWithLoRA method) (vllm.lora.layers.LinearScalingRotaryEmbeddingWithLoRA method) (vllm.lora.layers.LogitsProcessorWithLoRA method) (vllm.lora.layers.ReplicatedLinearWithLoRA method) (vllm.lora.layers.RowParallelLinearWithLoRA method) (vllm.lora.layers.VocabParallelEmbeddingWithLoRA method) (vllm.model_executor.custom_op.CustomOp method) (vllm.model_executor.layers.activation.ScaledActivation method) (vllm.model_executor.layers.fused_moe.layer.FusedMoE method) (vllm.model_executor.layers.linear.ColumnParallelLinear method) (vllm.model_executor.layers.linear.LinearBase method) (vllm.model_executor.layers.linear.QKVCrossParallelLinear method) (vllm.model_executor.layers.linear.ReplicatedLinear method) (vllm.model_executor.layers.linear.RowParallelLinear method) (vllm.model_executor.layers.logits_processor.LogitsProcessor method) (vllm.model_executor.layers.pooler.CrossEncodingPooler method) (vllm.model_executor.layers.pooler.PoolerHead method) (vllm.model_executor.layers.pooler.SimplePooler method) (vllm.model_executor.layers.rejection_sampler.RejectionSampler method) (vllm.model_executor.layers.resampler.Resampler2 method) (vllm.model_executor.layers.rotary_embedding.DeepseekScalingRotaryEmbedding method) (vllm.model_executor.layers.rotary_embedding.DualChunkRotaryEmbedding method) (vllm.model_executor.layers.rotary_embedding.Llama4VisionRotaryEmbedding method) (vllm.model_executor.layers.rotary_embedding.MRotaryEmbedding method) (vllm.model_executor.layers.rotary_embedding.Phi3LongRoPEScaledRotaryEmbedding method) (vllm.model_executor.layers.sampler.Sampler method) (vllm.model_executor.layers.spec_decode_base_sampler.SpecDecodeDeterministicBaseSampler method) (vllm.model_executor.layers.spec_decode_base_sampler.SpecDecodeStochasticBaseSampler method) (vllm.model_executor.layers.typical_acceptance_sampler.TypicalAcceptanceSampler method) (vllm.model_executor.layers.vocab_parallel_embedding.ParallelLMHead method) (vllm.model_executor.layers.vocab_parallel_embedding.VocabParallelEmbedding method) (vllm.model_executor.model_loader.neuron.NeuronCausalLM method) (vllm.model_executor.model_loader.neuron.NeuronSpeculationCausalLM method) (vllm.model_executor.model_loader.neuronx_distributed.NeuronCausalLM method) (vllm.model_executor.model_loader.neuronx_distributed.NeuronMllamaForCausalLM method) (vllm.model_executor.model_loader.neuronx_distributed.NeuronSpeculationCausalLM method) (vllm.model_executor.models.aimv2.AIMv2Attention method) (vllm.model_executor.models.aimv2.AIMv2Block method) (vllm.model_executor.models.aimv2.AIMv2Model method) (vllm.model_executor.models.aimv2.AIMv2PatchEmbed method) (vllm.model_executor.models.aimv2.AIMv2SwiGLUFFN method) (vllm.model_executor.models.aimv2.AIMv2Transformer method) (vllm.model_executor.models.aimv2.AIMv2ViTPreprocessor method) (vllm.model_executor.models.arctic.ArcticAttention method) (vllm.model_executor.models.arctic.ArcticDecoderLayer method) (vllm.model_executor.models.arctic.ArcticForCausalLM method) (vllm.model_executor.models.arctic.ArcticMLP method) (vllm.model_executor.models.arctic.ArcticModel method) (vllm.model_executor.models.arctic.ArcticMoE method) (vllm.model_executor.models.aria.AriaForConditionalGeneration method) (vllm.model_executor.models.aria.AriaProjector method) (vllm.model_executor.models.aria.AriaProjectorMLP method) (vllm.model_executor.models.aria.AriaTextMoELayer method) (vllm.model_executor.models.aya_vision.AyaVisionForConditionalGeneration method) (vllm.model_executor.models.aya_vision.AyaVisionMultiModalProjector method) (vllm.model_executor.models.baichuan.BaiChuanAttention method) (vllm.model_executor.models.baichuan.BaiChuanBaseForCausalLM method) (vllm.model_executor.models.baichuan.BaiChuanDecoderLayer method) (vllm.model_executor.models.baichuan.BaiChuanMLP method) (vllm.model_executor.models.baichuan.BaiChuanModel method) (vllm.model_executor.models.bamba.BambaAttentionDecoderLayer method) (vllm.model_executor.models.bamba.BambaForCausalLM method) (vllm.model_executor.models.bamba.BambaMixerDecoderLayer method) (vllm.model_executor.models.bamba.BambaMLP method) (vllm.model_executor.models.bamba.BambaModel method) (vllm.model_executor.models.bart.BartCrossAttention method) (vllm.model_executor.models.bart.BartDecoder method) (vllm.model_executor.models.bart.BartDecoderLayer method) (vllm.model_executor.models.bart.BartDecoderSelfAttention method) (vllm.model_executor.models.bart.BartEncoder method) (vllm.model_executor.models.bart.BartEncoderAttention method) (vllm.model_executor.models.bart.BartEncoderLayer method) (vllm.model_executor.models.bart.BartForConditionalGeneration method) (vllm.model_executor.models.bart.BartLearnedPositionalEmbedding method) (vllm.model_executor.models.bart.BartModel method) (vllm.model_executor.models.bart.BartParallelLMHead method) (vllm.model_executor.models.bart.BartScaledWordEmbedding method) (vllm.model_executor.models.bert.BertAttention method) (vllm.model_executor.models.bert.BertEmbedding method) (vllm.model_executor.models.bert.BertEmbeddingModel method) (vllm.model_executor.models.bert.BertEncoder method) (vllm.model_executor.models.bert.BertForSequenceClassification method) (vllm.model_executor.models.bert.BertIntermediate method) (vllm.model_executor.models.bert.BertLayer method) (vllm.model_executor.models.bert.BertModel method) (vllm.model_executor.models.bert.BertOutput method) (vllm.model_executor.models.bert.BertPooler method) (vllm.model_executor.models.bert.BertSelfAttention method) (vllm.model_executor.models.bert.BertSelfOutput method) (vllm.model_executor.models.bert_with_rope.BertWithRope method) (vllm.model_executor.models.bert_with_rope.BertWithRopeAttention method) (vllm.model_executor.models.bert_with_rope.BertWithRopeBlock method) (vllm.model_executor.models.bert_with_rope.BertWithRopeEmbedding method) (vllm.model_executor.models.bert_with_rope.BertWithRopeEncoder method) (vllm.model_executor.models.bert_with_rope.BertWithRopeGatedMLP method) (vllm.model_executor.models.bert_with_rope.BertWithRopeMLP method) (vllm.model_executor.models.bert_with_rope.JinaRobertaModel method) (vllm.model_executor.models.bert_with_rope.NomicExpertMLP method) (vllm.model_executor.models.bert_with_rope.NomicExperts method) (vllm.model_executor.models.bert_with_rope.NomicMoELayer method) (vllm.model_executor.models.bert_with_rope.NomicRouter method) (vllm.model_executor.models.blip.BlipAttention method) (vllm.model_executor.models.blip.BlipEncoder method) (vllm.model_executor.models.blip.BlipEncoderLayer method) (vllm.model_executor.models.blip.BlipMLP method) (vllm.model_executor.models.blip.BlipVisionEmbeddings method) (vllm.model_executor.models.blip.BlipVisionModel method) (vllm.model_executor.models.blip2.Blip2ForConditionalGeneration method) (vllm.model_executor.models.blip2.Blip2QFormerAttention method) (vllm.model_executor.models.blip2.Blip2QFormerEncoder method) (vllm.model_executor.models.blip2.Blip2QFormerIntermediate method) (vllm.model_executor.models.blip2.Blip2QFormerLayer method) (vllm.model_executor.models.blip2.Blip2QFormerModel method) (vllm.model_executor.models.blip2.Blip2QFormerMultiHeadAttention method) (vllm.model_executor.models.blip2.Blip2QFormerOutput method) (vllm.model_executor.models.blip2.Blip2QFormerSelfOutput method) (vllm.model_executor.models.bloom.BloomAttention method) (vllm.model_executor.models.bloom.BloomBlock method) (vllm.model_executor.models.bloom.BloomForCausalLM method) (vllm.model_executor.models.bloom.BloomMLP method) (vllm.model_executor.models.bloom.BloomModel method) (vllm.model_executor.models.chameleon.ChameleonAttention method) (vllm.model_executor.models.chameleon.ChameleonDecoderLayer method) (vllm.model_executor.models.chameleon.ChameleonForConditionalGeneration method) (vllm.model_executor.models.chameleon.ChameleonLayerNorm method) (vllm.model_executor.models.chameleon.ChameleonMLP method) (vllm.model_executor.models.chameleon.ChameleonModel method) (vllm.model_executor.models.chameleon.ChameleonSwinDecoderLayer method) (vllm.model_executor.models.chameleon.ChameleonVQVAEEncoder method) (vllm.model_executor.models.chameleon.ChameleonVQVAEEncoderAttnBlock method) (vllm.model_executor.models.chameleon.ChameleonVQVAEEncoderConvDownsample method) (vllm.model_executor.models.chameleon.ChameleonVQVAEEncoderResnetBlock method) (vllm.model_executor.models.chameleon.ChameleonVQVAEVectorQuantizer method) (vllm.model_executor.models.chatglm.ChatGLMForCausalLM method) (vllm.model_executor.models.chatglm.ChatGLMModel method) (vllm.model_executor.models.chatglm.GLMAttention method) (vllm.model_executor.models.chatglm.GLMBlock method) (vllm.model_executor.models.chatglm.GLMMLP method) (vllm.model_executor.models.chatglm.GLMTransformer method) (vllm.model_executor.models.clip.CLIPAttention method) (vllm.model_executor.models.clip.CLIPEncoder method) (vllm.model_executor.models.clip.CLIPEncoderLayer method) (vllm.model_executor.models.clip.CLIPMLP method) (vllm.model_executor.models.clip.CLIPVisionEmbeddings method) (vllm.model_executor.models.clip.CLIPVisionModel method) (vllm.model_executor.models.clip.CLIPVisionTransformer method) (vllm.model_executor.models.commandr.CohereAttention method) (vllm.model_executor.models.commandr.CohereDecoderLayer method) (vllm.model_executor.models.commandr.CohereForCausalLM method) (vllm.model_executor.models.commandr.CohereMLP method) (vllm.model_executor.models.commandr.CohereModel method) (vllm.model_executor.models.commandr.LayerNorm method) (vllm.model_executor.models.dbrx.DbrxAttention method) (vllm.model_executor.models.dbrx.DbrxBlock method) (vllm.model_executor.models.dbrx.DbrxForCausalLM method) (vllm.model_executor.models.dbrx.DbrxFusedNormAttention method) (vllm.model_executor.models.dbrx.DbrxModel method) (vllm.model_executor.models.dbrx.DbrxMoE method) (vllm.model_executor.models.dbrx.DbrxRouter method) (vllm.model_executor.models.deepseek.DeepseekAttention method) (vllm.model_executor.models.deepseek.DeepseekDecoderLayer method) (vllm.model_executor.models.deepseek.DeepseekForCausalLM method) (vllm.model_executor.models.deepseek.DeepseekMLP method) (vllm.model_executor.models.deepseek.DeepseekModel method) (vllm.model_executor.models.deepseek.DeepseekMoE method) (vllm.model_executor.models.deepseek_mtp.DeepSeekMTP method) (vllm.model_executor.models.deepseek_mtp.DeepSeekMultiTokenPredictor method) (vllm.model_executor.models.deepseek_mtp.DeepSeekMultiTokenPredictorLayer method) (vllm.model_executor.models.deepseek_mtp.SharedHead method) (vllm.model_executor.models.deepseek_v2.DeepseekV2Attention method) (vllm.model_executor.models.deepseek_v2.DeepseekV2DecoderLayer method) (vllm.model_executor.models.deepseek_v2.DeepseekV2ForCausalLM method) (vllm.model_executor.models.deepseek_v2.DeepseekV2MLAAttention method) (vllm.model_executor.models.deepseek_v2.DeepseekV2MLP method) (vllm.model_executor.models.deepseek_v2.DeepseekV2Model method) (vllm.model_executor.models.deepseek_v2.DeepseekV2MoE method) (vllm.model_executor.models.deepseek_vl2.DeepseekVLV2ForCausalLM method) (vllm.model_executor.models.deepseek_vl2.MlpProjector method) (vllm.model_executor.models.eagle.DummyInputLayerNorm method) (vllm.model_executor.models.eagle.DummyOutputNorm method) (vllm.model_executor.models.eagle.EAGLE method) (vllm.model_executor.models.exaone.ExaoneAttention method) (vllm.model_executor.models.exaone.ExaoneBlockAttention method) (vllm.model_executor.models.exaone.ExaoneDecoderLayer method) (vllm.model_executor.models.exaone.ExaoneForCausalLM method) (vllm.model_executor.models.exaone.ExaoneGatedMLP method) (vllm.model_executor.models.exaone.ExaoneModel method) (vllm.model_executor.models.falcon.FalconAttention method) (vllm.model_executor.models.falcon.FalconDecoderLayer method) (vllm.model_executor.models.falcon.FalconForCausalLM method) (vllm.model_executor.models.falcon.FalconMLP method) (vllm.model_executor.models.falcon.FalconModel method) (vllm.model_executor.models.florence2.ChannelAttention method) (vllm.model_executor.models.florence2.ChannelBlock method) (vllm.model_executor.models.florence2.ConvEmbed method) (vllm.model_executor.models.florence2.DaViT method) (vllm.model_executor.models.florence2.DepthWiseConv2d method) (vllm.model_executor.models.florence2.Florence2ForConditionalGeneration method) (vllm.model_executor.models.florence2.Florence2LanguageForConditionalGeneration method) (vllm.model_executor.models.florence2.Florence2LanguageModel method) (vllm.model_executor.models.florence2.LearnedAbsolutePositionEmbedding2D method) (vllm.model_executor.models.florence2.Mlp method) (vllm.model_executor.models.florence2.MySequential method) (vllm.model_executor.models.florence2.PositionalEmbeddingCosine1D method) (vllm.model_executor.models.florence2.PreNorm method) (vllm.model_executor.models.florence2.SpatialBlock method) (vllm.model_executor.models.florence2.WindowAttention method) (vllm.model_executor.models.fuyu.FuyuForCausalLM method) (vllm.model_executor.models.gemma.GemmaAttention method) (vllm.model_executor.models.gemma.GemmaDecoderLayer method) (vllm.model_executor.models.gemma.GemmaForCausalLM method) (vllm.model_executor.models.gemma.GemmaMLP method) (vllm.model_executor.models.gemma.GemmaModel method) (vllm.model_executor.models.gemma2.Gemma2Attention method) (vllm.model_executor.models.gemma2.Gemma2DecoderLayer method) (vllm.model_executor.models.gemma2.Gemma2ForCausalLM method) (vllm.model_executor.models.gemma2.Gemma2MLP method) (vllm.model_executor.models.gemma2.Gemma2Model method) (vllm.model_executor.models.gemma3.Gemma3Attention method) (vllm.model_executor.models.gemma3.Gemma3DecoderLayer method) (vllm.model_executor.models.gemma3.Gemma3ForCausalLM method) (vllm.model_executor.models.gemma3.Gemma3MLP method) (vllm.model_executor.models.gemma3.Gemma3Model method) (vllm.model_executor.models.gemma3_mm.Gemma3ForConditionalGeneration method) (vllm.model_executor.models.gemma3_mm.Gemma3MultiModalProjector method) (vllm.model_executor.models.glm4.Glm4Attention method) (vllm.model_executor.models.glm4.Glm4DecoderLayer method) (vllm.model_executor.models.glm4.Glm4ForCausalLM method) (vllm.model_executor.models.glm4v.EVA2CLIPAttention method) (vllm.model_executor.models.glm4v.EVA2CLIPGLU method) (vllm.model_executor.models.glm4v.EVA2CLIPMLP method) (vllm.model_executor.models.glm4v.EVA2CLIPModel method) (vllm.model_executor.models.glm4v.EVA2CLIPPatchEmbedding method) (vllm.model_executor.models.glm4v.EVA2CLIPTransformer method) (vllm.model_executor.models.glm4v.EVA2CLIPTransformerLayer method) (vllm.model_executor.models.glm4v.GLM4VForCausalLM method) (vllm.model_executor.models.gpt2.GPT2Attention method) (vllm.model_executor.models.gpt2.GPT2Block method) (vllm.model_executor.models.gpt2.GPT2LMHeadModel method) (vllm.model_executor.models.gpt2.GPT2MLP method) (vllm.model_executor.models.gpt2.GPT2Model method) (vllm.model_executor.models.gpt_bigcode.GPTBigCodeAttention method) (vllm.model_executor.models.gpt_bigcode.GPTBigCodeBlock method) (vllm.model_executor.models.gpt_bigcode.GPTBigCodeForCausalLM method) (vllm.model_executor.models.gpt_bigcode.GPTBigCodeModel method) (vllm.model_executor.models.gpt_bigcode.GPTBigMLP method) (vllm.model_executor.models.gpt_j.GPTJAttention method) (vllm.model_executor.models.gpt_j.GPTJBlock method) (vllm.model_executor.models.gpt_j.GPTJForCausalLM method) (vllm.model_executor.models.gpt_j.GPTJMLP method) (vllm.model_executor.models.gpt_j.GPTJModel method) (vllm.model_executor.models.gpt_neox.GPTNeoXAttention method) (vllm.model_executor.models.gpt_neox.GPTNeoXForCausalLM method) (vllm.model_executor.models.gpt_neox.GPTNeoXLayer method) (vllm.model_executor.models.gpt_neox.GPTNeoXMLP method) (vllm.model_executor.models.gpt_neox.GPTNeoXModel method) (vllm.model_executor.models.granite.GraniteAttention method) (vllm.model_executor.models.granite.GraniteDecoderLayer method) (vllm.model_executor.models.granite.GraniteForCausalLM method) (vllm.model_executor.models.granite.GraniteMLP method) (vllm.model_executor.models.granite.GraniteModel method) (vllm.model_executor.models.granite_speech.GraniteSpeechConformerAttention method) (vllm.model_executor.models.granite_speech.GraniteSpeechConformerBlock method) (vllm.model_executor.models.granite_speech.GraniteSpeechConformerConvModule method) (vllm.model_executor.models.granite_speech.GraniteSpeechConformerDepthWiseConv1d method) (vllm.model_executor.models.granite_speech.GraniteSpeechConformerFeedForward method) (vllm.model_executor.models.granite_speech.GraniteSpeechCTCEncoder method) (vllm.model_executor.models.granite_speech.GraniteSpeechEncoderProjector method) (vllm.model_executor.models.granite_speech.GraniteSpeechForConditionalGeneration method) (vllm.model_executor.models.granitemoe.GraniteMoeAttention method) (vllm.model_executor.models.granitemoe.GraniteMoeDecoderLayer method) (vllm.model_executor.models.granitemoe.GraniteMoeForCausalLM method) (vllm.model_executor.models.granitemoe.GraniteMoeModel method) (vllm.model_executor.models.granitemoe.GraniteMoeMoE method) (vllm.model_executor.models.granitemoehybrid.GraniteMoeHybridAttention method) (vllm.model_executor.models.granitemoehybrid.GraniteMoeHybridAttentionDecoderLayer method) (vllm.model_executor.models.granitemoehybrid.GraniteMoeHybridForCausalLM method) (vllm.model_executor.models.granitemoehybrid.GraniteMoeHybridMambaDecoderLayer method) (vllm.model_executor.models.granitemoehybrid.GraniteMoeHybridModel method) (vllm.model_executor.models.granitemoeshared.GraniteMoeSharedDecoderLayer method) (vllm.model_executor.models.granitemoeshared.GraniteMoeSharedForCausalLM method) (vllm.model_executor.models.granitemoeshared.GraniteMoeSharedMLP method) (vllm.model_executor.models.granitemoeshared.GraniteMoeSharedModel method) (vllm.model_executor.models.gritlm.GritLM method) (vllm.model_executor.models.gritlm.GritLMPooler method) (vllm.model_executor.models.grok1.Grok1Attention method) (vllm.model_executor.models.grok1.Grok1DecoderLayer method) (vllm.model_executor.models.grok1.Grok1ForCausalLM method) (vllm.model_executor.models.grok1.Grok1Model method) (vllm.model_executor.models.grok1.Grok1MoE method) (vllm.model_executor.models.idefics2_vision_model.Idefics2Encoder method) (vllm.model_executor.models.idefics2_vision_model.Idefics2EncoderLayer method) (vllm.model_executor.models.idefics2_vision_model.Idefics2VisionAttention method) (vllm.model_executor.models.idefics2_vision_model.Idefics2VisionEmbeddings method) (vllm.model_executor.models.idefics2_vision_model.Idefics2VisionMLP method) (vllm.model_executor.models.idefics2_vision_model.Idefics2VisionTransformer method) (vllm.model_executor.models.idefics3.Idefics3Connector method) (vllm.model_executor.models.idefics3.Idefics3ForConditionalGeneration method) (vllm.model_executor.models.idefics3.Idefics3Model method) (vllm.model_executor.models.idefics3.Idefics3SimpleMLP method) (vllm.model_executor.models.interfaces.SupportsPP method) (vllm.model_executor.models.interfaces_base.VllmModel method) (vllm.model_executor.models.intern_vit.InternMLP method) (vllm.model_executor.models.intern_vit.InternParallelAttention method) (vllm.model_executor.models.intern_vit.InternSdpaAttention method) (vllm.model_executor.models.intern_vit.InternVisionEmbeddings method) (vllm.model_executor.models.intern_vit.InternVisionEncoder method) (vllm.model_executor.models.intern_vit.InternVisionEncoderLayer method) (vllm.model_executor.models.intern_vit.InternVisionModel method) (vllm.model_executor.models.intern_vit.InternVisionPatchModel method) (vllm.model_executor.models.internlm2.InternLM2Attention method) (vllm.model_executor.models.internlm2.InternLM2ForCausalLM method) (vllm.model_executor.models.internlm2.InternLM2ForRewardModel method) (vllm.model_executor.models.internlm2.InternLM2MLP method) (vllm.model_executor.models.internlm2.InternLM2Model method) (vllm.model_executor.models.internlm2.InternLMDecoderLayer method) (vllm.model_executor.models.internlm2_ve.InternLM2VEDecoderLayer method) (vllm.model_executor.models.internlm2_ve.InternLM2VEModel method) (vllm.model_executor.models.internvl.InternVLChatModel method) (vllm.model_executor.models.jais.JAISAttention method) (vllm.model_executor.models.jais.JAISBlock method) (vllm.model_executor.models.jais.JAISLMHeadModel method) (vllm.model_executor.models.jais.JAISMLP method) (vllm.model_executor.models.jais.JAISModel method) (vllm.model_executor.models.jais.SwiGLUActivation method) (vllm.model_executor.models.jamba.JambaAttentionDecoderLayer method) (vllm.model_executor.models.jamba.JambaForCausalLM method) (vllm.model_executor.models.jamba.JambaMambaDecoderLayer method) (vllm.model_executor.models.jamba.JambaModel method) (vllm.model_executor.models.jamba.JambaMoE method) (vllm.model_executor.models.kimi_vl.KimiVLForConditionalGeneration method) (vllm.model_executor.models.kimi_vl.KimiVLMultiModalProjector method) (vllm.model_executor.models.llama.LlamaAttention method) (vllm.model_executor.models.llama.LlamaDecoderLayer method) (vllm.model_executor.models.llama.LlamaForCausalLM method) (vllm.model_executor.models.llama.LlamaMLP method) (vllm.model_executor.models.llama.LlamaModel method) (vllm.model_executor.models.llama4.Llama4Attention method) (vllm.model_executor.models.llama4.Llama4DecoderLayer method) (vllm.model_executor.models.llama4.Llama4MoE method) (vllm.model_executor.models.llama_eagle.EagleLlamaForCausalLM method) (vllm.model_executor.models.llama_eagle.LlamaModel method) (vllm.model_executor.models.llama_eagle3.Eagle3LlamaForCausalLM method) (vllm.model_executor.models.llama_eagle3.LlamaDecoderLayer method) (vllm.model_executor.models.llama_eagle3.LlamaModel method) (vllm.model_executor.models.llava.LlavaForConditionalGeneration method) (vllm.model_executor.models.llava.LlavaMultiModalProjector method) (vllm.model_executor.models.llava_next.LlavaNextForConditionalGeneration method) (vllm.model_executor.models.llava_next_video.LlavaNextMultiModalProjector method) (vllm.model_executor.models.llava_next_video.LlavaNextVideoForConditionalGeneration method) (vllm.model_executor.models.llava_next_video.LlavaNextVideoPooler method) (vllm.model_executor.models.llava_onevision.LlavaOnevisionForConditionalGeneration method) (vllm.model_executor.models.llava_onevision.LlavaOnevisionMultiModalProjector method) (vllm.model_executor.models.mamba.MambaDecoderLayer method) (vllm.model_executor.models.mamba.MambaForCausalLM method) (vllm.model_executor.models.mamba.MambaModel method) (vllm.model_executor.models.mamba2.Mamba2DecoderLayer method) (vllm.model_executor.models.mamba2.Mamba2ForCausalLM method) (vllm.model_executor.models.mamba2.Mamba2Model method) (vllm.model_executor.models.medusa.Medusa method) (vllm.model_executor.models.medusa.ResidualBlock method) (vllm.model_executor.models.mimo.MiMoModel method) (vllm.model_executor.models.mimo_mtp.MiMoMTP method) (vllm.model_executor.models.mimo_mtp.MiMoMultiTokenPredictor method) (vllm.model_executor.models.mimo_mtp.MiMoMultiTokenPredictorLayer method) (vllm.model_executor.models.minicpm.MiniCPMAttention method) (vllm.model_executor.models.minicpm.MiniCPMDecoderLayer method) (vllm.model_executor.models.minicpm.MiniCPMForCausalLM method) (vllm.model_executor.models.minicpm.MiniCPMMLP method) (vllm.model_executor.models.minicpm.MiniCPMModel method) (vllm.model_executor.models.minicpm.MiniCPMMoE method) (vllm.model_executor.models.minicpm3.MiniCPM3Attention method) (vllm.model_executor.models.minicpmo.MiniCPMWhisperEncoder method) (vllm.model_executor.models.minicpmo.MiniCPMWhisperEncoderLayer method) (vllm.model_executor.models.minicpmo.MultiModalProjector method) (vllm.model_executor.models.minicpmv.MiniCPMVBaseModel method) (vllm.model_executor.models.minicpmv.Resampler2_5 method) (vllm.model_executor.models.minimax_text_01.MiniMaxText01Attention method) (vllm.model_executor.models.minimax_text_01.MiniMaxText01DecoderLayer method) (vllm.model_executor.models.minimax_text_01.MiniMaxText01ForCausalLM method) (vllm.model_executor.models.minimax_text_01.MiniMaxText01LinearAttention method) (vllm.model_executor.models.minimax_text_01.MiniMaxText01MLP method) (vllm.model_executor.models.minimax_text_01.MiniMaxText01Model method) (vllm.model_executor.models.minimax_text_01.MiniMaxText01MoE method) (vllm.model_executor.models.minimax_text_01.MiniMaxText01RMSNormTP method) (vllm.model_executor.models.minimax_text_01.MiniMaxText01RotaryEmbedding method) (vllm.model_executor.models.minimax_vl_01.MiniMaxVL01ForConditionalGeneration method) (vllm.model_executor.models.minimax_vl_01.MiniMaxVL01MultiModalProjector method) (vllm.model_executor.models.mistral3.Mistral3ForConditionalGeneration method) (vllm.model_executor.models.mistral3.Mistral3MultiModalProjector method) (vllm.model_executor.models.mistral3.Mistral3PatchMerger method) (vllm.model_executor.models.mixtral.MixtralAttention method) (vllm.model_executor.models.mixtral.MixtralDecoderLayer method) (vllm.model_executor.models.mixtral.MixtralForCausalLM method) (vllm.model_executor.models.mixtral.MixtralModel method) (vllm.model_executor.models.mixtral.MixtralMoE method) (vllm.model_executor.models.mixtral_quant.MixtralAttention method) (vllm.model_executor.models.mixtral_quant.MixtralDecoderLayer method) (vllm.model_executor.models.mixtral_quant.MixtralForCausalLM method) (vllm.model_executor.models.mixtral_quant.MixtralMLP method) (vllm.model_executor.models.mixtral_quant.MixtralModel method) (vllm.model_executor.models.mixtral_quant.MixtralMoE method) (vllm.model_executor.models.mllama.ColumnParallelConv2dPatch method) (vllm.model_executor.models.mllama.MllamaCrossAttentionDecoderLayer method) (vllm.model_executor.models.mllama.MllamaForCausalLM method) (vllm.model_executor.models.mllama.MllamaForConditionalGeneration method) (vllm.model_executor.models.mllama.MllamaPrecomputedAspectRatioEmbedding method) (vllm.model_executor.models.mllama.MllamaPrecomputedPositionEmbedding method) (vllm.model_executor.models.mllama.MllamaTextCrossAttention method) (vllm.model_executor.models.mllama.MllamaTextModel method) (vllm.model_executor.models.mllama.MllamaTextRMSNorm method) (vllm.model_executor.models.mllama.MllamaVisionEncoder method) (vllm.model_executor.models.mllama.MllamaVisionEncoderLayer method) (vllm.model_executor.models.mllama.MllamaVisionModel method) (vllm.model_executor.models.mllama.MllamaVisionSdpaAttention method) (vllm.model_executor.models.mllama4.Llama4ForConditionalGeneration method) (vllm.model_executor.models.mllama4.Llama4MultiModalProjector method) (vllm.model_executor.models.mllama4.Llama4UnfoldConvolution method) (vllm.model_executor.models.mllama4.Llama4VisionAttention method) (vllm.model_executor.models.mllama4.Llama4VisionEncoder method) (vllm.model_executor.models.mllama4.Llama4VisionEncoderLayer method) (vllm.model_executor.models.mllama4.Llama4VisionMLP method) (vllm.model_executor.models.mllama4.Llama4VisionModel method) (vllm.model_executor.models.mllama4.Llama4VisionPixelShuffleMLP method) (vllm.model_executor.models.mlp_speculator.MLPSpeculatorLayerNorm method) (vllm.model_executor.models.modernbert.ModernBertAttention method) (vllm.model_executor.models.modernbert.ModernBertEmbeddings method) (vllm.model_executor.models.modernbert.ModernBertEncoderLayer method) (vllm.model_executor.models.modernbert.ModernBertForSequenceClassification method) (vllm.model_executor.models.modernbert.ModernBertLayer method) (vllm.model_executor.models.modernbert.ModernBertMLP method) (vllm.model_executor.models.modernbert.ModernBertModel method) (vllm.model_executor.models.modernbert.ModernBertPooler method) (vllm.model_executor.models.molmo.BlockCollection method) (vllm.model_executor.models.molmo.ImageProjectorMLP method) (vllm.model_executor.models.molmo.LanguageModelMLP method) (vllm.model_executor.models.molmo.MolmoAttention method) (vllm.model_executor.models.molmo.MolmoDecoderLayer method) (vllm.model_executor.models.molmo.MolmoDecoderNormAfterLayer method) (vllm.model_executor.models.molmo.MolmoForCausalLM method) (vllm.model_executor.models.molmo.MolmoModel method) (vllm.model_executor.models.molmo.MolmoVisionBackbone method) (vllm.model_executor.models.molmo.MultiHeadDotProductAttention method) (vllm.model_executor.models.molmo.ResidualAttentionBlock method) (vllm.model_executor.models.molmo.VisionTransformer method) (vllm.model_executor.models.molmo.ViTMLP method) (vllm.model_executor.models.moonvit.Learnable2DInterpPosEmb method) (vllm.model_executor.models.moonvit.MLP2 method) (vllm.model_executor.models.moonvit.MoonVisionPatchEmbed method) (vllm.model_executor.models.moonvit.MoonVitEncoder method) (vllm.model_executor.models.moonvit.MoonVitEncoderLayer method) (vllm.model_executor.models.moonvit.MoonVitPretrainedModel method) (vllm.model_executor.models.moonvit.MoonVitVLProjector method) (vllm.model_executor.models.mpt.MPTAttention method) (vllm.model_executor.models.mpt.MPTBlock method) (vllm.model_executor.models.mpt.MPTForCausalLM method) (vllm.model_executor.models.mpt.MPTMLP method) (vllm.model_executor.models.mpt.MPTModel method) (vllm.model_executor.models.nemotron.NemotronAttention method) (vllm.model_executor.models.nemotron.NemotronDecoderLayer method) (vllm.model_executor.models.nemotron.NemotronForCausalLM method) (vllm.model_executor.models.nemotron.NemotronLayerNorm1P method) (vllm.model_executor.models.nemotron.NemotronMLP method) (vllm.model_executor.models.nemotron.NemotronModel method) (vllm.model_executor.models.nemotron_nas.DeciLMDecoderLayer method) (vllm.model_executor.models.nemotron_nas.DeciLMForCausalLM method) (vllm.model_executor.models.nemotron_nas.DeciModel method) (vllm.model_executor.models.olmo.OlmoAttention method) (vllm.model_executor.models.olmo.OlmoDecoderLayer method) (vllm.model_executor.models.olmo.OlmoForCausalLM method) (vllm.model_executor.models.olmo.OlmoMLP method) (vllm.model_executor.models.olmo.OlmoModel method) (vllm.model_executor.models.olmo2.Olmo2Attention method) (vllm.model_executor.models.olmo2.Olmo2DecoderLayer method) (vllm.model_executor.models.olmo2.Olmo2ForCausalLM method) (vllm.model_executor.models.olmo2.Olmo2MLP method) (vllm.model_executor.models.olmo2.Olmo2Model method) (vllm.model_executor.models.olmoe.OlmoeAttention method) (vllm.model_executor.models.olmoe.OlmoeDecoderLayer method) (vllm.model_executor.models.olmoe.OlmoeForCausalLM method) (vllm.model_executor.models.olmoe.OlmoeModel method) (vllm.model_executor.models.olmoe.OlmoeMoE method) (vllm.model_executor.models.opt.OPTAttention method) (vllm.model_executor.models.opt.OPTDecoder method) (vllm.model_executor.models.opt.OPTDecoderLayer method) (vllm.model_executor.models.opt.OPTForCausalLM method) (vllm.model_executor.models.opt.OPTLearnedPositionalEmbedding method) (vllm.model_executor.models.opt.OPTModel method) (vllm.model_executor.models.orion.OrionAttention method) (vllm.model_executor.models.orion.OrionDecoderLayer method) (vllm.model_executor.models.orion.OrionForCausalLM method) (vllm.model_executor.models.orion.OrionMLP method) (vllm.model_executor.models.orion.OrionModel method) (vllm.model_executor.models.ovis.Ovis method) (vllm.model_executor.models.ovis.VisualEmbedding method) (vllm.model_executor.models.ovis.VisualTokenizer method) (vllm.model_executor.models.paligemma.PaliGemmaForConditionalGeneration method) (vllm.model_executor.models.paligemma.PaliGemmaMultiModalProjector method) (vllm.model_executor.models.persimmon.PersimmonAttention method) (vllm.model_executor.models.persimmon.PersimmonDecoderLayer method) (vllm.model_executor.models.persimmon.PersimmonForCausalLM method) (vllm.model_executor.models.persimmon.PersimmonMLP method) (vllm.model_executor.models.persimmon.PersimmonModel method) (vllm.model_executor.models.phi.PhiAttention method) (vllm.model_executor.models.phi.PhiForCausalLM method) (vllm.model_executor.models.phi.PhiLayer method) (vllm.model_executor.models.phi.PhiMLP method) (vllm.model_executor.models.phi.PhiModel method) (vllm.model_executor.models.phi3_small.Phi3SmallDecoderLayer method) (vllm.model_executor.models.phi3_small.Phi3SmallForCausalLM method) (vllm.model_executor.models.phi3_small.Phi3SmallMLP method) (vllm.model_executor.models.phi3_small.Phi3SmallModel method) (vllm.model_executor.models.phi3_small.Phi3SmallSelfAttention method) (vllm.model_executor.models.phi3v.Phi3HDImageEmbedding method) (vllm.model_executor.models.phi3v.Phi3VForCausalLM method) (vllm.model_executor.models.phi4mm.Phi4MMForCausalLM method) (vllm.model_executor.models.phi4mm.Phi4MMImageEncoder method) (vllm.model_executor.models.phi4mm_audio.AudioEmbedding method) (vllm.model_executor.models.phi4mm_audio.ConformerEncoder method) (vllm.model_executor.models.phi4mm_audio.ConformerEncoderLayer method) (vllm.model_executor.models.phi4mm_audio.TransformerEncoderBase method) (vllm.model_executor.models.phi4mm_audio.WindowQformer method) (vllm.model_executor.models.phi4mm_utils.AbsolutePositionalEncoding method) (vllm.model_executor.models.phi4mm_utils.AttModule method) (vllm.model_executor.models.phi4mm_utils.CausalConv1D method) (vllm.model_executor.models.phi4mm_utils.CausalConv2D method) (vllm.model_executor.models.phi4mm_utils.ConvModule method) (vllm.model_executor.models.phi4mm_utils.DepthWiseSeperableConv1d method) (vllm.model_executor.models.phi4mm_utils.FeedForward method) (vllm.model_executor.models.phi4mm_utils.GLU method) (vllm.model_executor.models.phi4mm_utils.GLULinear method) (vllm.model_executor.models.phi4mm_utils.GLUPointWiseConv method) (vllm.model_executor.models.phi4mm_utils.MeanVarianceNormLayer method) (vllm.model_executor.models.phi4mm_utils.MultiHeadedAttention method) (vllm.model_executor.models.phi4mm_utils.MultiSequential method) (vllm.model_executor.models.phi4mm_utils.NemoConvSubsampling method) (vllm.model_executor.models.phi4mm_utils.Swish method) (vllm.model_executor.models.phi4mm_utils.T5RelativeAttentionLogitBias method) (vllm.model_executor.models.phimoe.mp static method) (vllm.model_executor.models.phimoe.PhiMoE method) (vllm.model_executor.models.phimoe.PhiMoEAttention method) (vllm.model_executor.models.phimoe.PhiMoEDecoderLayer method) (vllm.model_executor.models.phimoe.PhiMoEForCausalLM method) (vllm.model_executor.models.phimoe.PhiMoEModel method) (vllm.model_executor.models.pixtral.Attention method) (vllm.model_executor.models.pixtral.FeedForward method) (vllm.model_executor.models.pixtral.PatchMerger method) (vllm.model_executor.models.pixtral.PixtralForConditionalGeneration method) (vllm.model_executor.models.pixtral.PixtralHFAttention method) (vllm.model_executor.models.pixtral.PixtralHFMLP method) (vllm.model_executor.models.pixtral.PixtralHFTransformer method) (vllm.model_executor.models.pixtral.PixtralHFTransformerBlock method) (vllm.model_executor.models.pixtral.PixtralHFVisionModel method) (vllm.model_executor.models.pixtral.Transformer method) (vllm.model_executor.models.pixtral.TransformerBlock method) (vllm.model_executor.models.pixtral.VisionLanguageAdapter method) (vllm.model_executor.models.pixtral.VisionTransformer method) (vllm.model_executor.models.plamo2.DenseMLP method) (vllm.model_executor.models.plamo2.Plamo2AttentionMixer method) (vllm.model_executor.models.plamo2.Plamo2Decoder method) (vllm.model_executor.models.plamo2.Plamo2DecoderLayer method) (vllm.model_executor.models.plamo2.Plamo2ForCausalLM method) (vllm.model_executor.models.plamo2.Plamo2MambaMixer method) (vllm.model_executor.models.plamo2.Plamo2Model method) (vllm.model_executor.models.prithvi_geospatial_mae.PrithviGeoSpatialMAE method) (vllm.model_executor.models.qwen.QWenAttention method) (vllm.model_executor.models.qwen.QWenBlock method) (vllm.model_executor.models.qwen.QWenLMHeadModel method) (vllm.model_executor.models.qwen.QWenMLP method) (vllm.model_executor.models.qwen.QWenModel method) (vllm.model_executor.models.qwen2.Qwen2Attention method) (vllm.model_executor.models.qwen2.Qwen2DecoderLayer method) (vllm.model_executor.models.qwen2.Qwen2EmbeddingModel method) (vllm.model_executor.models.qwen2.Qwen2ForCausalLM method) (vllm.model_executor.models.qwen2.Qwen2MLP method) (vllm.model_executor.models.qwen2.Qwen2Model method) (vllm.model_executor.models.qwen2_5_omni_thinker.Qwen2_5OmniThinkerForConditionalGeneration method) (vllm.model_executor.models.qwen2_5_vl.Qwen2_5_VisionAttention method) (vllm.model_executor.models.qwen2_5_vl.Qwen2_5_VisionBlock method) (vllm.model_executor.models.qwen2_5_vl.Qwen2_5_VisionMLP method) (vllm.model_executor.models.qwen2_5_vl.Qwen2_5_VisionPatchEmbed method) (vllm.model_executor.models.qwen2_5_vl.Qwen2_5_VisionPatchMerger method) (vllm.model_executor.models.qwen2_5_vl.Qwen2_5_VisionRotaryEmbedding method) (vllm.model_executor.models.qwen2_5_vl.Qwen2_5_VisionTransformer method) (vllm.model_executor.models.qwen2_5_vl.Qwen2_5_VLForConditionalGeneration method) (vllm.model_executor.models.qwen2_audio.Qwen2AudioForConditionalGeneration method) (vllm.model_executor.models.qwen2_audio.Qwen2AudioMultiModalProjector method) (vllm.model_executor.models.qwen2_moe.Qwen2MoeAttention method) (vllm.model_executor.models.qwen2_moe.Qwen2MoeDecoderLayer method) (vllm.model_executor.models.qwen2_moe.Qwen2MoeForCausalLM method) (vllm.model_executor.models.qwen2_moe.Qwen2MoeMLP method) (vllm.model_executor.models.qwen2_moe.Qwen2MoeModel method) (vllm.model_executor.models.qwen2_moe.Qwen2MoeSparseMoeBlock method) (vllm.model_executor.models.qwen2_rm.Qwen2RewardBaseModel method) (vllm.model_executor.models.qwen2_rm.ReLU method) (vllm.model_executor.models.qwen2_vl.Qwen2VisionAttention method) (vllm.model_executor.models.qwen2_vl.Qwen2VisionBlock method) (vllm.model_executor.models.qwen2_vl.Qwen2VisionMLP method) (vllm.model_executor.models.qwen2_vl.Qwen2VisionPatchEmbed method) (vllm.model_executor.models.qwen2_vl.Qwen2VisionPatchMerger method) (vllm.model_executor.models.qwen2_vl.Qwen2VisionRotaryEmbedding method) (vllm.model_executor.models.qwen2_vl.Qwen2VisionTransformer method) (vllm.model_executor.models.qwen2_vl.Qwen2VLForConditionalGeneration method) (vllm.model_executor.models.qwen3.Qwen3Attention method) (vllm.model_executor.models.qwen3.Qwen3DecoderLayer method) (vllm.model_executor.models.qwen3.Qwen3ForCausalLM method) (vllm.model_executor.models.qwen3_moe.Qwen3MoeAttention method) (vllm.model_executor.models.qwen3_moe.Qwen3MoeDecoderLayer method) (vllm.model_executor.models.qwen3_moe.Qwen3MoeForCausalLM method) (vllm.model_executor.models.qwen3_moe.Qwen3MoeMLP method) (vllm.model_executor.models.qwen3_moe.Qwen3MoeModel method) (vllm.model_executor.models.qwen3_moe.Qwen3MoeSparseMoeBlock method) (vllm.model_executor.models.qwen_vl.QwenVLForConditionalGeneration method) (vllm.model_executor.models.qwen_vl.QwenVLMLP method) (vllm.model_executor.models.qwen_vl.TransformerBlock method) (vllm.model_executor.models.qwen_vl.VisionTransformer method) (vllm.model_executor.models.qwen_vl.VisualAttention method) (vllm.model_executor.models.qwen_vl.VisualAttentionBlock method) (vllm.model_executor.models.roberta.RobertaClassificationHead method) (vllm.model_executor.models.roberta.RobertaEmbedding method) (vllm.model_executor.models.roberta.RobertaForSequenceClassification method) (vllm.model_executor.models.siglip.SiglipAttention method) (vllm.model_executor.models.siglip.SiglipEncoder method) (vllm.model_executor.models.siglip.SiglipEncoderLayer method) (vllm.model_executor.models.siglip.SiglipMLP method) (vllm.model_executor.models.siglip.SiglipMultiheadAttentionPoolingHead method) (vllm.model_executor.models.siglip.SiglipVisionEmbeddings method) (vllm.model_executor.models.siglip.SiglipVisionModel method) (vllm.model_executor.models.siglip.SiglipVisionTransformer method) (vllm.model_executor.models.skyworkr1v.SkyworkR1VChatModel method) (vllm.model_executor.models.solar.SolarAttention method) (vllm.model_executor.models.solar.SolarDecoderLayer method) (vllm.model_executor.models.solar.SolarForCausalLM method) (vllm.model_executor.models.solar.SolarMLP method) (vllm.model_executor.models.solar.SolarModel method) (vllm.model_executor.models.stablelm.StablelmAttention method) (vllm.model_executor.models.stablelm.StablelmDecoderLayer method) (vllm.model_executor.models.stablelm.StableLMEpochModel method) (vllm.model_executor.models.stablelm.StablelmForCausalLM method) (vllm.model_executor.models.stablelm.StablelmMLP method) (vllm.model_executor.models.starcoder2.Starcoder2Attention method) (vllm.model_executor.models.starcoder2.Starcoder2DecoderLayer method) (vllm.model_executor.models.starcoder2.Starcoder2ForCausalLM method) (vllm.model_executor.models.starcoder2.Starcoder2MLP method) (vllm.model_executor.models.starcoder2.Starcoder2Model method) (vllm.model_executor.models.transformers.TransformersForCausalLM method) (vllm.model_executor.models.transformers.TransformersModel method) (vllm.model_executor.models.ultravox.ModifiedWhisperEncoder method) (vllm.model_executor.models.ultravox.StackAudioFrames method) (vllm.model_executor.models.ultravox.UltravoxModel method) (vllm.model_executor.models.ultravox.UltravoxProjector method) (vllm.model_executor.models.utils.PPMissingLayer method) (vllm.model_executor.models.whisper.WhisperAttention method) (vllm.model_executor.models.whisper.WhisperCrossAttention method) (vllm.model_executor.models.whisper.WhisperDecoder method) (vllm.model_executor.models.whisper.WhisperDecoderLayer method) (vllm.model_executor.models.whisper.WhisperEncoder method) (vllm.model_executor.models.whisper.WhisperEncoderLayer method) (vllm.model_executor.models.whisper.WhisperForConditionalGeneration method) (vllm.model_executor.models.whisper.WhisperMLP method) (vllm.model_executor.models.whisper.WhisperModel method) (vllm.model_executor.models.whisper.WhisperPositionalEmbedding method) (vllm.model_executor.models.zamba2.Zamba2Attention method) (vllm.model_executor.models.zamba2.Zamba2AttentionDecoderLayer method) (vllm.model_executor.models.zamba2.Zamba2ForCausalLM method) (vllm.model_executor.models.zamba2.Zamba2HybridLayer method) (vllm.model_executor.models.zamba2.Zamba2LoRA method) (vllm.model_executor.models.zamba2.Zamba2MambaDecoderLayer method) (vllm.model_executor.models.zamba2.Zamba2MLP method) (vllm.model_executor.models.zamba2.Zamba2Model method) (vllm.prompt_adapter.layers.VocabParallelEmbeddingWithPromptAdapter method) (vllm.v1.attention.backends.flash_attn.FlashAttentionImpl method) (vllm.v1.attention.backends.flashinfer.FlashInferImpl method) (vllm.v1.attention.backends.mla.common.MLACommonImpl method) (vllm.v1.attention.backends.pallas.PallasAttentionBackendImpl method) (vllm.v1.attention.backends.triton_attn.TritonAttentionImpl method) (vllm.v1.sample.rejection_sampler.RejectionSampler method) (vllm.v1.sample.sampler.Sampler method) (vllm.v1.sample.tpu.sampler.Sampler method) (vllm.worker.hpu_model_runner.HpuModelAdapter method) (vllm.worker.model_runner.CUDAGraphRunner method) (vllm.worker.tpu_model_runner.ModelWrapper method) forward_cpu() (vllm.model_executor.custom_op.CustomOp method) (vllm.model_executor.layers.fused_moe.layer.UnquantizedFusedMoEMethod method) forward_cuda() (vllm.model_executor.custom_op.CustomOp method) (vllm.model_executor.layers.activation.FastGELU method) (vllm.model_executor.layers.activation.FatreluAndMul method) (vllm.model_executor.layers.activation.GeluAndMul method) (vllm.model_executor.layers.activation.MulAndSilu method) (vllm.model_executor.layers.activation.NewGELU method) (vllm.model_executor.layers.activation.QuickGELU method) (vllm.model_executor.layers.activation.ReLUSquaredActivation method) (vllm.model_executor.layers.activation.SiluAndMul method) (vllm.model_executor.layers.fused_moe.layer.UnquantizedFusedMoEMethod method) (vllm.model_executor.layers.layernorm.GemmaRMSNorm method) (vllm.model_executor.layers.layernorm.RMSNorm method) (vllm.model_executor.layers.mamba.mamba_mixer.MambaMixer method) (vllm.model_executor.layers.mamba.mamba_mixer2.MambaMixer2 method) (vllm.model_executor.layers.mamba.mamba_mixer2.Mixer2RMSNormGated method) (vllm.model_executor.layers.rotary_embedding.RotaryEmbedding method) (vllm.v1.sample.ops.topk_topp_sampler.TopKTopPSampler method) forward_decode() (vllm.attention.ops.hpu_paged_attn.HPUPagedAttention static method) (vllm.attention.ops.paged_attn.PagedAttention static method) (vllm.attention.ops.rocm_aiter_paged_attn.AITERPagedAttention static method) forward_embeddings() (vllm.model_executor.models.phi4mm_audio.TransformerEncoderBase method) forward_features() (vllm.model_executor.models.florence2.DaViT method) forward_features_unpool() (vllm.model_executor.models.florence2.DaViT method) forward_hip() (vllm.model_executor.custom_op.CustomOp method) forward_hpu() (vllm.model_executor.custom_op.CustomOp method) (vllm.model_executor.layers.fused_moe.layer.UnquantizedFusedMoEMethod method) (vllm.model_executor.layers.layernorm.RMSNorm method) (vllm.model_executor.layers.rotary_embedding.RotaryEmbedding method) forward_impl() (vllm.model_executor.layers.fused_moe.layer.FusedMoE method) forward_native (vllm.model_executor.layers.fused_moe.layer.UnquantizedFusedMoEMethod attribute) forward_native() (vllm.model_executor.custom_op.CustomOp method) (vllm.model_executor.layers.activation.FastGELU method) (vllm.model_executor.layers.activation.FatreluAndMul method) (vllm.model_executor.layers.activation.GeluAndMul method) (vllm.model_executor.layers.activation.MulAndSilu method) (vllm.model_executor.layers.activation.NewGELU method) (vllm.model_executor.layers.activation.QuickGELU method) (vllm.model_executor.layers.activation.ReLUSquaredActivation method) (vllm.model_executor.layers.activation.SiluAndMul method) (vllm.model_executor.layers.layernorm.GemmaRMSNorm method) (vllm.model_executor.layers.layernorm.RMSNorm method) (vllm.model_executor.layers.mamba.mamba_mixer.MambaMixer method) (vllm.model_executor.layers.mamba.mamba_mixer2.MambaMixer2 method) (vllm.model_executor.layers.mamba.mamba_mixer2.Mixer2RMSNormGated method) (vllm.model_executor.layers.rotary_embedding.RotaryEmbedding method) (vllm.v1.sample.ops.topk_topp_sampler.TopKTopPSampler method) forward_neuron() (vllm.model_executor.custom_op.CustomOp method) (vllm.model_executor.layers.activation.SiluAndMul method) (vllm.model_executor.layers.rotary_embedding.RotaryEmbedding method) forward_oot() (vllm.model_executor.custom_op.CustomOp method) forward_prefix() (vllm.attention.ops.paged_attn.PagedAttention static method) forward_start_time (in module vllm.forward_context) forward_static() (vllm.model_executor.layers.layernorm.GemmaRMSNorm static method) forward_tpu() (vllm.model_executor.custom_op.CustomOp method) (vllm.model_executor.layers.fused_moe.layer.UnquantizedFusedMoEMethod method) (vllm.v1.sample.ops.topk_topp_sampler.TopKTopPSampler method) forward_xpu() (vllm.model_executor.custom_op.CustomOp method) (vllm.model_executor.layers.activation.FastGELU method) (vllm.model_executor.layers.activation.GeluAndMul method) (vllm.model_executor.layers.activation.NewGELU method) (vllm.model_executor.layers.activation.QuickGELU method) (vllm.model_executor.layers.activation.SiluAndMul method) (vllm.model_executor.layers.layernorm.RMSNorm method) (vllm.model_executor.layers.rotary_embedding.RotaryEmbedding method) ForwardContext (class in vllm.forward_context) fp4_marlin_process_global_scale() (in module vllm.model_executor.layers.quantization.utils.marlin_utils_fp4) fp4_marlin_process_scales() (in module vllm.model_executor.layers.quantization.utils.marlin_utils_fp4) FP4_MARLIN_SUPPORTED_GROUP_SIZES (in module vllm.model_executor.layers.quantization.utils.marlin_utils_fp4) FP8_DTYPE (in module vllm.attention.ops.rocm_aiter_paged_attn) (in module vllm.compilation.fusion) fp8_dtype() (vllm.platforms.interface.Platform class method) (vllm.platforms.rocm.RocmPlatform class method) fp8_fused_exponent_bias_into_scales() (in module vllm.model_executor.layers.quantization.utils.marlin_utils_fp8) FP8_MAX (in module vllm.attention.ops.triton_flash_attention) FP8_MIN (in module vllm.attention.ops.triton_flash_attention) Fp8Config (class in vllm.model_executor.layers.quantization.fp8) Fp8KVCacheMethod (class in vllm.model_executor.layers.quantization.fp8) Fp8LinearMethod (class in vllm.model_executor.layers.quantization.fp8) Fp8LinearOp (class in vllm.model_executor.layers.quantization.utils.w8a8_utils) Fp8MoEMethod (class in vllm.model_executor.layers.quantization.fp8) free() (vllm.core.block.block_table.BlockTable method) (vllm.core.block.cpu_gpu_block_allocator.CpuGpuBlockAllocator method) (vllm.core.block.interfaces.BlockAllocator method) (vllm.core.block.interfaces.DeviceAwareBlockAllocator method) (vllm.core.block.naive_block.NaiveBlockAllocator method) (vllm.core.block.prefix_caching_block.PrefixCachingBlockAllocator method) (vllm.core.block_manager.SelfAttnBlockSpaceManager method) (vllm.core.interfaces.BlockSpaceManager method) (vllm.core.placeholder_block_space_manager.PlaceholderBlockSpaceManager method) (vllm.v1.core.encoder_cache_manager.EncoderCacheManager method) (vllm.v1.core.kv_cache_manager.KVCacheManager method) (vllm.v1.core.single_type_kv_cache_manager.SingleTypeKVCacheManager method) free_block() (vllm.core.block.common.BlockPool method) free_block_hashes() (vllm.v1.core.kv_cache_manager.KVCacheManager method) free_block_id() (vllm.core.block.naive_block.NaiveBlockAllocator method) free_blocks() (vllm.v1.core.block_pool.BlockPool method) free_cross() (vllm.core.block_manager.SelfAttnBlockSpaceManager method) free_encoder_input() (vllm.v1.core.encoder_cache_manager.EncoderCacheManager method) free_encoder_input_ids (vllm.v1.core.sched.output.SchedulerOutput attribute) free_finished_seq_groups() (vllm.core.scheduler.Scheduler method) free_managed_buffer() (vllm.distributed.kv_transfer.kv_pipe.mooncake_pipe.MooncakeTransferEngine method) free_pending_messages() (vllm.v1.engine.core_client.MPClient method) free_seq() (vllm.core.scheduler.Scheduler method) free_shared_buffer() (in module vllm._custom_ops) (vllm.distributed.device_communicators.custom_all_reduce.CustomAllreduce static method) FreeKVCacheBlockQueue (class in vllm.v1.core.kv_cache_utils) freqs_cis (vllm.model_executor.models.pixtral.VisionTransformer property) frequency_penalties (vllm.model_executor.sampling_metadata.SamplingTensors attribute) (vllm.v1.sample.metadata.SamplingMetadata attribute) (vllm.v1.sample.tpu.metadata.TPUSupportedSamplingMetadata attribute) frequency_penalty (vllm.entrypoints.openai.protocol.ChatCompletionRequest attribute) (vllm.entrypoints.openai.protocol.CompletionRequest attribute) (vllm.entrypoints.openai.protocol.TranscriptionRequest attribute) (vllm.sampling_params.SamplingParams attribute) from_base() (vllm.outputs.ClassificationOutput static method) (vllm.outputs.ClassificationRequestOutput static method) (vllm.outputs.EmbeddingOutput static method) (vllm.outputs.EmbeddingRequestOutput static method) (vllm.outputs.ScoringOutput static method) (vllm.outputs.ScoringRequestOutput static method) from_broadcasted_tensor_dict() (vllm.worker.cpu_enc_dec_model_runner.EncoderDecoderModelInputForCPU class method) (vllm.worker.cpu_model_runner.ModelInputForCPU class method) (vllm.worker.cpu_model_runner.ModelInputForCPUWithSamplingMetadata class method) (vllm.worker.enc_dec_model_runner.EncoderDecoderModelInput class method) (vllm.worker.hpu_model_runner.ModelInputForHPU class method) (vllm.worker.hpu_model_runner.ModelInputForHPUWithSamplingMetadata class method) (vllm.worker.model_runner.ModelInputForGPU class method) (vllm.worker.model_runner.ModelInputForGPUWithSamplingMetadata class method) (vllm.worker.model_runner_base.BroadcastableModelInput class method) (vllm.worker.multi_step_model_runner.StatefulModelInput class method) (vllm.worker.neuron_model_runner.ModelInputForNeuron class method) (vllm.worker.tpu_model_runner.ModelInputForTPU class method) (vllm.worker.worker_base.WorkerInput class method) (vllm.worker.xpu_model_runner.ModelInputForXPU class method) (vllm.worker.xpu_model_runner.ModelInputForXPUWithSamplingMetadata class method) from_cli() (vllm.config.CompilationConfig class method) from_cli_args() (vllm.engine.arg_utils.EngineArgs class method) (vllm.model_executor.model_loader.tensorizer.TensorizerArgs class method) from_config() (vllm.lora.lora.LoRALayerWeights class method) (vllm.model_executor.layers.quantization.aqlm.AQLMConfig class method) (vllm.model_executor.layers.quantization.awq.AWQConfig class method) (vllm.model_executor.layers.quantization.awq_marlin.AWQMarlinConfig class method) (vllm.model_executor.layers.quantization.base_config.QuantizationConfig class method) (vllm.model_executor.layers.quantization.bitblas.BitBLASConfig class method) (vllm.model_executor.layers.quantization.bitsandbytes.BitsAndBytesConfig class method) (vllm.model_executor.layers.quantization.compressed_tensors.compressed_tensors.CompressedTensorsConfig class method) (vllm.model_executor.layers.quantization.deepspeedfp.DeepSpeedFPConfig class method) (vllm.model_executor.layers.quantization.experts_int8.ExpertsInt8Config class method) (vllm.model_executor.layers.quantization.fbgemm_fp8.FBGEMMFp8Config class method) (vllm.model_executor.layers.quantization.fp8.Fp8Config class method) (vllm.model_executor.layers.quantization.gguf.GGUFConfig class method) (vllm.model_executor.layers.quantization.gptq.GPTQConfig class method) (vllm.model_executor.layers.quantization.gptq_bitblas.GPTQBitBLASConfig class method) (vllm.model_executor.layers.quantization.gptq_marlin.GPTQMarlinConfig class method) (vllm.model_executor.layers.quantization.gptq_marlin_24.GPTQMarlin24Config class method) (vllm.model_executor.layers.quantization.hqq_marlin.HQQMarlinConfig class method) (vllm.model_executor.layers.quantization.ipex_quant.IPEXConfig class method) (vllm.model_executor.layers.quantization.marlin.MarlinConfig class method) (vllm.model_executor.layers.quantization.modelopt.ModelOptFp8Config class method) (vllm.model_executor.layers.quantization.modelopt.ModelOptNvFp4Config class method) (vllm.model_executor.layers.quantization.moe_wna16.MoeWNA16Config class method) (vllm.model_executor.layers.quantization.neuron_quant.NeuronQuantConfig class method) (vllm.model_executor.layers.quantization.ptpc_fp8.PTPCFp8Config class method) (vllm.model_executor.layers.quantization.qqq.QQQConfig class method) (vllm.model_executor.layers.quantization.quark.quark.QuarkConfig class method) (vllm.model_executor.layers.quantization.torchao.TorchAOConfig class method) (vllm.model_executor.layers.quantization.tpu_int8.Int8TpuConfig class method) (vllm.model_executor.models.florence2.DaViT class method) from_config_with_defaults() (vllm.model_executor.layers.pooler.Pooler class method) from_dict() (vllm.config.SpeculativeConfig class method) (vllm.lora.peft_helper.PEFTHelper class method) (vllm.transformers_utils.configs.arctic.ArcticConfig class method) from_elems() (vllm.multimodal.inputs.MultiModalKwargsItem static method) from_engine_args() (vllm.engine.async_llm_engine.AsyncLLMEngine class method) (vllm.engine.llm_engine.LLMEngine class method) (vllm.engine.multiprocessing.engine.MQLLMEngine static method) (vllm.v1.engine.async_llm.AsyncLLM class method) (vllm.v1.engine.llm_engine.LLMEngine class method) from_engine_core_request() (vllm.v1.request.Request class method) from_file() (vllm.distributed.kv_transfer.kv_lookup_buffer.mooncake_store.MooncakeStoreConfig static method) (vllm.distributed.kv_transfer.kv_pipe.mooncake_pipe.MooncakeTransferEngineConfig static method) from_guided_params() (vllm.model_executor.guided_decoding.xgrammar_decoding.GrammarConfig class method) from_hf_inputs() (vllm.multimodal.inputs.MultiModalKwargs static method) from_id() (vllm.scalar_type.ScalarType class method) from_input_batch() (vllm.v1.sample.tpu.metadata.TPUSupportedSamplingMetadata class method) from_items() (vllm.multimodal.inputs.MultiModalKwargs static method) from_layer() (in module vllm.lora.utils) from_layer_logits_processor() (in module vllm.lora.utils) from_lists() (vllm.model_executor.sampling_metadata.SamplingTensors class method) from_local_checkpoint() (vllm.adapter_commons.models.AdapterModel method) (vllm.lora.models.LoRAModel class method) (vllm.prompt_adapter.models.PromptAdapterModel class method) from_local_dir() (vllm.lora.peft_helper.PEFTHelper class method) from_lora_tensors() (vllm.lora.models.LoRAModel class method) from_new_request() (vllm.v1.engine.detokenizer.IncrementalDetokenizer class method) (vllm.v1.engine.logprobs.LogprobsProcessor class method) (vllm.v1.engine.output_processor.RequestState class method) from_optional() (vllm.sampling_params.GuidedDecodingParams static method) (vllm.sampling_params.SamplingParams static method) from_pooling_metadata() (vllm.model_executor.pooling_metadata.PoolingTensors class method) from_pooling_type() (vllm.model_executor.layers.pooler.SimplePooler static method) from_pretrained() (vllm.transformers_utils.configs.dbrx.DbrxAttentionConfig class method) (vllm.transformers_utils.configs.dbrx.DbrxFFNConfig class method) (vllm.transformers_utils.configs.eagle.EAGLEConfig class method) (vllm.transformers_utils.configs.medusa.MedusaConfig class method) (vllm.transformers_utils.tokenizers.mistral.MistralTokenizer class method) from_prompt_token_counts() (vllm.sequence.SequenceData static method) from_queues() (vllm.core.scheduler.PartialPrefillMetadata class method) from_raw_dict() (vllm.distributed.kv_transfer.kv_connector.v1.base.KVTransferParams static method) (vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector.NixlKVTransferParams static method) from_request() (vllm.v1.core.sched.output.CachedRequestData class method) (vllm.v1.core.sched.output.NewRequestData class method) from_sampling_metadata() (vllm.model_executor.sampling_metadata.SamplingTensors class method) from_seq() (vllm.multimodal.processing.PromptUpdateDetails static method) from_seq_group() (vllm.multimodal.base.MultiModalPlaceholderMap class method) (vllm.outputs.PoolingRequestOutput static method) (vllm.outputs.RequestOutput class method) from_seqs() (vllm.sequence.SequenceData static method) from_string_field() (vllm.model_executor.models.module_mapping.MultiModelKeys static method) from_torch() (vllm.distributed.device_communicators.pynccl_wrapper.ncclDataTypeEnum class method) (vllm.distributed.device_communicators.pynccl_wrapper.ncclRedOpTypeEnum class method) from_unready_handle() (vllm.v1.executor.multiproc_executor.WorkerProcHandle class method) from_vllm_config() (vllm.engine.async_llm_engine.AsyncLLMEngine class method) (vllm.engine.llm_engine.LLMEngine class method) (vllm.engine.multiprocessing.engine.MQLLMEngine class method) (vllm.v1.engine.async_llm.AsyncLLM class method) (vllm.v1.engine.llm_engine.LLMEngine class method) frozen_model_input (vllm.worker.multi_step_model_runner.StatefulModelInput attribute) full (vllm.multimodal.processing.PromptUpdateDetails attribute) full_cuda_graph (vllm.config.CompilationConfig attribute) full_groupby() (in module vllm.utils) full_groupby_modality() (in module vllm.multimodal.processing) full_weight_shape (vllm.model_executor.layers.quantization.kernels.mixed_precision.MPLinearKernel.MPLinearLayerConfig attribute) FullAttentionManager (class in vllm.v1.core.single_type_kv_cache_manager) FullAttentionSpec (class in vllm.v1.kv_cache_interface) fully_sharded_loras (vllm.config.LoRAConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) Function (class in vllm.distributed.device_communicators.cuda_wrapper) (class in vllm.distributed.device_communicators.pynccl_wrapper) function (vllm.entrypoints.openai.protocol.ChatCompletionNamedToolChoiceParam attribute) (vllm.entrypoints.openai.protocol.ChatCompletionToolsParam attribute) (vllm.entrypoints.openai.protocol.DeltaToolCall attribute) (vllm.entrypoints.openai.protocol.ToolCall attribute) FunctionCall (class in vllm.entrypoints.openai.protocol) FunctionDefinition (class in vllm.entrypoints.openai.protocol) fused_add (vllm.compilation.fusion.FusedRMSQuantKey attribute) fused_add_rms_norm() (in module vllm._custom_ops) (in module vllm.model_executor.layers.layernorm) (vllm._ipex_ops.ipex_ops static method) fused_experts() (in module vllm.model_executor.layers.fused_moe.fused_moe) fused_experts_impl() (in module vllm.model_executor.layers.fused_moe.fused_moe) fused_marlin_moe() (in module vllm.model_executor.layers.fused_moe.fused_marlin_moe) fused_marlin_moe_fake() (in module vllm.model_executor.layers.fused_moe.fused_marlin_moe) fused_moe() (in module vllm.model_executor.layers.fused_moe.fused_moe) (in module vllm.model_executor.layers.fused_moe.moe_pallas) (in module vllm.model_executor.layers.fused_moe.moe_torch_iterative) fused_moe_kernel() (in module vllm.model_executor.layers.fused_moe.fused_moe) fused_moe_kernel_gptq_awq() (in module vllm.model_executor.layers.fused_moe.fused_moe) FUSED_OPS (in module vllm.compilation.fusion) fused_topk() (in module vllm.model_executor.layers.fused_moe.fused_moe) FusedAddRMSNormDynamicQuantPattern (class in vllm.compilation.fusion) FusedAddRMSNormDynamicQuantPattern.Match (class in vllm.compilation.fusion) FusedAddRMSNormStaticQuantPattern (class in vllm.compilation.fusion) FusedAddRMSNormStaticQuantPattern.Match (class in vllm.compilation.fusion) FusedMoE (class in vllm.model_executor.layers.fused_moe.layer) FusedMoEMethodBase (class in vllm.model_executor.layers.fused_moe.layer) FusedMoeWeightScaleSupported (class in vllm.model_executor.layers.fused_moe.layer) FusedRMSQuantKey (class in vllm.compilation.fusion) FusionPass (class in vllm.compilation.fusion) FutureWrapper (class in vllm.v1.executor.ray_distributed_executor) FuyuDummyInputsBuilder (class in vllm.model_executor.models.fuyu) FuyuForCausalLM (class in vllm.model_executor.models.fuyu) FuyuImagePatchInputs (class in vllm.model_executor.models.fuyu) FuyuMultiModalProcessor (class in vllm.model_executor.models.fuyu) FuyuProcessingInfo (class in vllm.model_executor.models.fuyu) G g (vllm.model_executor.models.phi4mm_utils.MultiHeadedAttention attribute) gate_weight_loader() (vllm.model_executor.models.minimax_text_01.MiniMaxText01MoE static method) gather() (vllm.distributed.device_communicators.base_device_communicator.DeviceCommunicatorBase method) (vllm.distributed.device_communicators.cpu_communicator.CpuCommunicator method) (vllm.distributed.device_communicators.xpu_communicator.XpuCommunicator method) (vllm.distributed.parallel_state.GroupCoordinator method) gather_cache() (in module vllm._custom_ops) gather_list() (in module vllm.worker.hpu_model_runner) gather_logprobs() (vllm.v1.sample.sampler.Sampler method) (vllm.v1.sample.tpu.sampler.Sampler method) (vllm.v1.worker.tpu_model_runner.TPUModelRunner method) gather_mm_placeholders() (in module vllm.v1.worker.utils) GB_bytes (in module vllm.utils) gegelu() (in module vllm.model_executor.models.phi3_small) gelu_and_mul() (vllm._ipex_ops.ipex_ops static method) gelu_fast() (vllm._ipex_ops.ipex_ops static method) gelu_new() (vllm._ipex_ops.ipex_ops static method) gelu_quick() (vllm._ipex_ops.ipex_ops static method) gelu_tanh_and_mul() (vllm._ipex_ops.ipex_ops static method) GeluAndMul (class in vllm.model_executor.layers.activation) Gemma2Attention (class in vllm.model_executor.models.gemma2) Gemma2DecoderLayer (class in vllm.model_executor.models.gemma2) Gemma2ForCausalLM (class in vllm.model_executor.models.gemma2) Gemma2MLP (class in vllm.model_executor.models.gemma2) Gemma2Model (class in vllm.model_executor.models.gemma2) Gemma3Attention (class in vllm.model_executor.models.gemma3) Gemma3DecoderLayer (class in vllm.model_executor.models.gemma3) Gemma3DummyInputsBuilder (class in vllm.model_executor.models.gemma3_mm) Gemma3ForCausalLM (class in vllm.model_executor.models.gemma3) Gemma3ForConditionalGeneration (class in vllm.model_executor.models.gemma3_mm) Gemma3ImageInputs (in module vllm.model_executor.models.gemma3_mm) Gemma3ImagePixelInputs (class in vllm.model_executor.models.gemma3_mm) Gemma3MLP (class in vllm.model_executor.models.gemma3) Gemma3Model (class in vllm.model_executor.models.gemma3) Gemma3MultiModalProcessor (class in vllm.model_executor.models.gemma3_mm) Gemma3MultiModalProjector (class in vllm.model_executor.models.gemma3_mm) Gemma3ProcessingInfo (class in vllm.model_executor.models.gemma3_mm) GemmaAttention (class in vllm.model_executor.models.gemma) GemmaDecoderLayer (class in vllm.model_executor.models.gemma) GemmaForCausalLM (class in vllm.model_executor.models.gemma) GemmaMLP (class in vllm.model_executor.models.gemma) GemmaModel (class in vllm.model_executor.models.gemma) GemmaRMSNorm (class in vllm.model_executor.layers.layernorm) GEN_AI_LATENCY_E2E (vllm.tracing.SpanAttributes attribute) GEN_AI_LATENCY_TIME_IN_MODEL_EXECUTE (vllm.tracing.SpanAttributes attribute) GEN_AI_LATENCY_TIME_IN_MODEL_FORWARD (vllm.tracing.SpanAttributes attribute) GEN_AI_LATENCY_TIME_IN_QUEUE (vllm.tracing.SpanAttributes attribute) GEN_AI_LATENCY_TIME_IN_SCHEDULER (vllm.tracing.SpanAttributes attribute) GEN_AI_LATENCY_TIME_TO_FIRST_TOKEN (vllm.tracing.SpanAttributes attribute) GEN_AI_REQUEST_ID (vllm.tracing.SpanAttributes attribute) GEN_AI_REQUEST_MAX_TOKENS (vllm.tracing.SpanAttributes attribute) GEN_AI_REQUEST_N (vllm.tracing.SpanAttributes attribute) GEN_AI_REQUEST_TEMPERATURE (vllm.tracing.SpanAttributes attribute) GEN_AI_REQUEST_TOP_P (vllm.tracing.SpanAttributes attribute) GEN_AI_RESPONSE_MODEL (vllm.tracing.SpanAttributes attribute) GEN_AI_USAGE_COMPLETION_TOKENS (vllm.tracing.SpanAttributes attribute) GEN_AI_USAGE_NUM_SEQUENCES (vllm.tracing.SpanAttributes attribute) GEN_AI_USAGE_PROMPT_TOKENS (vllm.tracing.SpanAttributes attribute) gen_inter_data_builder() (vllm.worker.model_runner.ModelInputForGPUBuilder method) gen_seq_group_to_sample_builder() (in module vllm.model_executor.sampling_metadata) generate() (in module vllm.entrypoints.api_server) (vllm.engine.async_llm_engine.AsyncLLMEngine method) (vllm.engine.multiprocessing.client.MQLLMEngineClient method) (vllm.engine.protocol.EngineClient method) (vllm.entrypoints.llm.LLM method) (vllm.v1.engine.async_llm.AsyncLLM method) generate_block_hash_extra_keys() (in module vllm.v1.core.kv_cache_utils) generate_draft_token_ids() (vllm.v1.worker.gpu_model_runner.GPUModelRunner method) generate_proposals() (vllm.model_executor.models.medusa.Medusa method) (vllm.model_executor.models.mlp_speculator.MLPSpeculator method) (vllm.worker.cpu_model_runner.CPUModelRunner method) generate_random_id() (vllm.entrypoints.openai.tool_parsers.mistral_tool_parser.MistralToolCall static method) generate_uniform_probs() (in module vllm.v1.sample.rejection_sampler) generated_text (vllm.benchmarks.endpoint_request_func.RequestFuncOutput attribute) generation_config (vllm.config.ModelConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) generator (vllm.model_executor.models.module_mapping.MultiModelKeys attribute) (vllm.model_executor.sampling_metadata.SequenceGroupToSample attribute) (vllm.v1.worker.gpu_input_batch.CachedRequestState attribute) generator() (vllm.engine.async_llm_engine.AsyncStream method) generators (vllm.v1.sample.metadata.SamplingMetadata attribute) (vllm.v1.sample.tpu.metadata.TPUSupportedSamplingMetadata property) (vllm.worker.model_runner_base.ModelRunnerBase attribute) generic_dequantize_gemm() (in module vllm.model_executor.layers.quantization.aqlm) get() (vllm.core.block.common.ReadOnlyRefCounter method) (vllm.core.block.common.RefCounter method) (vllm.core.block.common.RefCounterProtocol method) (vllm.distributed.kv_transfer.kv_lookup_buffer.base.KVStoreBufferBase method) (vllm.distributed.kv_transfer.kv_lookup_buffer.mooncake_store.MooncakeStore method) (vllm.executor.multiproc_worker_utils.ResultFuture method) (vllm.multimodal.parse.DictEmbeddingItems method) (vllm.multimodal.parse.EmbeddingItems method) (vllm.multimodal.parse.ModalityDataItems method) (vllm.multimodal.parse.ProcessorBatchItems method) (vllm.multimodal.processing.ProcessingCache method) (vllm.utils.LRUCache method) (vllm.v1.engine.output_processor.RequestOutputCollector method) get_1d_sincos_pos_embed_from_grid() (in module vllm.model_executor.layers.resampler) get_2d_sincos_pos_embed() (in module vllm.model_executor.layers.resampler) get_2d_sincos_pos_embed_from_grid() (in module vllm.model_executor.layers.resampler) get_abs_pos() (in module vllm.model_executor.layers.resampler) get_act_and_mul_fn() (in module vllm.model_executor.layers.activation) get_act_fn() (in module vllm.model_executor.layers.activation) get_activation() (in module vllm.model_executor.models.phi4mm_utils) get_adapter() (in module vllm.adapter_commons.utils) (vllm.adapter_commons.models.AdapterModelManager method) (vllm.lora.models.LoRAModelManager method) (vllm.prompt_adapter.models.PromptAdapterModelManager method) get_adapter_absolute_path() (in module vllm.lora.utils) get_added_vocab() (vllm.transformers_utils.tokenizer_base.TokenizerBase method) (vllm.transformers_utils.tokenizers.mistral.MistralTokenizer method) get_aiter_mla_metadata() (in module vllm.attention.ops.rocm_aiter_mla) get_all() (vllm.multimodal.parse.ModalityDataItems method) get_all_blocks_recursively() (in module vllm.core.block.common) get_all_counts() (vllm.multimodal.parse.MultiModalDataItems method) get_all_free_blocks() (vllm.v1.core.kv_cache_utils.FreeKVCacheBlockQueue method) get_all_num_logprobs() (in module vllm.spec_decode.util) get_all_seq_ids() (in module vllm.sequence) get_all_seq_ids_and_request_ids() (in module vllm.sequence) get_all_weights() (vllm.model_executor.model_loader.default_loader.DefaultModelLoader method) get_allowed_kwarg_only_overrides() (in module vllm.utils) get_allowed_mm_limits() (vllm.multimodal.processing.BaseProcessingInfo method) get_and_reset_finished_requests_ids() (vllm.core.scheduler.Scheduler method) get_and_reset_swaps() (vllm.core.block.cpu_gpu_block_allocator.CpuGpuBlockAllocator method) get_and_update_p0() (vllm.v1.engine.mm_input_cache.MirroredProcessingCache method) get_and_update_p1() (vllm.v1.engine.mm_input_cache.MirroredProcessingCache method) get_architecture_class_name() (in module vllm.model_executor.model_loader.utils) get_argments() (vllm.entrypoints.openai.tool_parsers.internlm2_tool_parser.Internlm2ToolParser method) get_async_client() (vllm.connections.HTTPConnection method) get_async_response() (vllm.connections.HTTPConnection method) get_attention_mask_by_audio_len() (vllm.model_executor.models.ultravox.ModifiedWhisperEncoder method) get_attn_backend() (in module vllm.attention.selector) get_attn_backend_cls() (vllm.platforms.cpu.CpuPlatform class method) (vllm.platforms.cuda.CudaPlatformBase class method) (vllm.platforms.hpu.HpuPlatform class method) (vllm.platforms.interface.Platform class method) (vllm.platforms.rocm.RocmPlatform class method) (vllm.platforms.tpu.TpuPlatform class method) (vllm.platforms.xpu.XPUPlatform class method) get_attn_bias() (vllm.attention.backends.torch_sdpa.TorchSDPAMetadata method) get_attn_pattern() (vllm.attention.ops.blocksparse_attention.interface.LocalStridedBlockSparseAttn method) get_attr_docs() (in module vllm.config) get_audio() (vllm.assets.video.VideoAsset method) get_audio_features() (vllm.model_executor.models.phi4mm_audio.AudioEmbedding method) get_audio_hidden_states() (vllm.model_executor.models.minicpmo.MiniCPMO method) get_audio_len_by_num_chunks() (vllm.model_executor.models.minicpmo.MiniCPMOProcessingInfo method) get_audio_length() (vllm.multimodal.parse.AudioProcessorItems method) get_audio_num_frames() (vllm.model_executor.models.phi4mm.Phi4MMProcessingInfo method) get_audio_placeholder() (vllm.model_executor.models.minicpmo.MiniCPMOProcessingInfo method) get_audio_prompt_texts() (vllm.model_executor.models.minicpmo.MiniCPMOMultiModalProcessor method) get_autotune_configs() (in module vllm.attention.ops.triton_flash_attention) get_bad_words_logits_processors() (in module vllm.logits_process) get_beam_search_score() (in module vllm.beam_search) get_blip_num_patches() (in module vllm.model_executor.models.blip) get_blip_patch_grid_length() (in module vllm.model_executor.models.blip) get_block_ids() (vllm.v1.core.kv_cache_manager.KVCacheBlocks method) (vllm.v1.core.kv_cache_manager.KVCacheManager method) get_block_space_manager_class() (vllm.core.interfaces.BlockSpaceManager static method) get_block_table() (vllm.core.block_manager.SelfAttnBlockSpaceManager method) (vllm.core.interfaces.BlockSpaceManager method) (vllm.core.placeholder_block_space_manager.PlaceholderBlockSpaceManager method) get_bos_token_id() (vllm.inputs.preprocess.InputPreprocessor method) get_bsz_seq_len() (in module vllm.model_executor.models.bart) get_builder_cls() (vllm.attention.backends.abstract.AttentionBackend static method) (vllm.attention.backends.blocksparse_attn.BlocksparseFlashAttentionBackend static method) (vllm.attention.backends.cpu_mla.CPUMLABackend static method) (vllm.attention.backends.dual_chunk_flash_attn.DualChunkFlashAttentionBackend static method) (vllm.attention.backends.flash_attn.FlashAttentionBackend static method) (vllm.attention.backends.flashinfer.FlashInferBackend static method) (vllm.attention.backends.flashmla.FlashMLABackend static method) (vllm.attention.backends.mla.common.MLACommonBackend static method) (vllm.attention.backends.placeholder_attn.PlaceholderAttentionBackend static method) (vllm.attention.backends.rocm_aiter_mla.AiterMLABackend static method) (vllm.attention.backends.rocm_flash_attn.ROCmFlashAttentionBackend static method) (vllm.attention.backends.torch_sdpa.TorchSDPABackend static method) (vllm.attention.backends.xformers.XFormersBackend static method) (vllm.v1.attention.backends.flash_attn.FlashAttentionBackend static method) (vllm.v1.attention.backends.flashinfer.FlashInferBackend static method) (vllm.v1.attention.backends.mla.common.MLACommonBackend static method) (vllm.v1.attention.backends.mla.flashmla.FlashMLABackend static method) (vllm.v1.attention.backends.mla.rocm_aiter_mla.AiterMLABackend static method) (vllm.v1.attention.backends.triton_attn.TritonAttentionBackend static method) get_bytes() (vllm.connections.HTTPConnection method) get_cache_block_size() (vllm.worker.cache_engine.CacheEngine static method) (vllm.worker.cpu_worker.CPUCacheEngine static method) get_cache_block_size_bytes() (vllm.spec_decode.proposer_worker_base.NonLLMProposerWorkerBase method) (vllm.spec_decode.smaller_tp_proposer_worker.SmallerTpProposerWorker method) (vllm.spec_decode.spec_decode_worker.SpecDecodeWorker method) (vllm.worker.cpu_worker.CPUWorker method) (vllm.worker.hpu_worker.HPUWorker method) (vllm.worker.neuron_worker.NeuronWorker method) (vllm.worker.tpu_worker.TPUWorker method) (vllm.worker.worker.Worker method) (vllm.worker.worker_base.DelegateWorkerBase method) (vllm.worker.worker_base.WorkerBase method) get_cache_dir() (in module vllm.assets.base) get_cache_scale() (vllm.model_executor.layers.quantization.base_config.QuantizationConfig method) (vllm.model_executor.layers.quantization.compressed_tensors.compressed_tensors.CompressedTensorsConfig method) (vllm.model_executor.layers.quantization.fp8.Fp8Config method) (vllm.model_executor.layers.quantization.quark.quark.QuarkConfig method) get_cached_block() (vllm.v1.core.block_pool.BlockPool method) get_cached_input_ids() (vllm.v1.core.encoder_cache_manager.EncoderCacheManager method) get_cached_seq_group_to_sample() (vllm.model_executor.sampling_metadata.SamplingMetadataCache method) get_cached_tokenizer() (in module vllm.transformers_utils.tokenizer) get_cachingallocator_config() (in module vllm.collect_env) get_candidate_tilings() (in module vllm.model_executor.models.molmo) get_cast_device() (vllm.model_executor.models.qwen_vl.TransformerBlock method) get_cast_dtype() (vllm.model_executor.models.qwen_vl.TransformerBlock method) get_cdna_autotune_configs() (in module vllm.attention.ops.triton_flash_attention) get_chat_request_role() (vllm.entrypoints.openai.serving_chat.OpenAIServingChat method) get_chat_template_fallback_path() (in module vllm.transformers_utils.chat_templates.registry) get_child_info() (vllm.v1.engine.parallel_sampling.ParentRequest method) get_chunk_length() (vllm.model_executor.models.minicpmo.MiniCPMOProcessingInfo method) get_clang_version() (in module vllm.collect_env) get_class() (vllm.v1.executor.abstract.Executor static method) get_cmake_version() (in module vllm.collect_env) get_common_computed_block_ids() (vllm.core.block.cpu_gpu_block_allocator.CpuGpuBlockAllocator method) (vllm.core.block.interfaces.BlockAllocator method) (vllm.core.block.interfaces.DeviceAwareBlockAllocator method) (vllm.core.block.naive_block.NaiveBlockAllocator method) (vllm.core.block.prefix_caching_block.PrefixCachingBlockAllocator method) (vllm.core.block_manager.SelfAttnBlockSpaceManager method) (vllm.core.interfaces.BlockSpaceManager method) (vllm.core.placeholder_block_space_manager.PlaceholderBlockSpaceManager method) get_compiler() (vllm.model_executor.guided_decoding.xgrammar_decoding.GrammarCompilerCache class method) get_computed_blocks() (vllm.v1.core.kv_cache_manager.KVCacheManager method) get_conda_packages() (in module vllm.collect_env) get_config() (in module vllm.model_executor.layers.fused_moe) (in module vllm.transformers_utils.config) get_config_dtype_str() (in module vllm.model_executor.layers.fused_moe.fused_moe) get_config_file_name() (in module vllm.model_executor.layers.fused_moe.fused_moe) get_config_filenames() (vllm.model_executor.layers.quantization.aqlm.AQLMConfig class method) (vllm.model_executor.layers.quantization.awq.AWQConfig static method) (vllm.model_executor.layers.quantization.awq_marlin.AWQMarlinConfig class method) (vllm.model_executor.layers.quantization.base_config.QuantizationConfig static method) (vllm.model_executor.layers.quantization.bitblas.BitBLASConfig class method) (vllm.model_executor.layers.quantization.bitsandbytes.BitsAndBytesConfig static method) (vllm.model_executor.layers.quantization.compressed_tensors.compressed_tensors.CompressedTensorsConfig class method) (vllm.model_executor.layers.quantization.deepspeedfp.DeepSpeedFPConfig static method) (vllm.model_executor.layers.quantization.experts_int8.ExpertsInt8Config class method) (vllm.model_executor.layers.quantization.fbgemm_fp8.FBGEMMFp8Config class method) (vllm.model_executor.layers.quantization.fp8.Fp8Config class method) (vllm.model_executor.layers.quantization.gguf.GGUFConfig class method) (vllm.model_executor.layers.quantization.gptq.GPTQConfig class method) (vllm.model_executor.layers.quantization.gptq_bitblas.GPTQBitBLASConfig class method) (vllm.model_executor.layers.quantization.gptq_marlin.GPTQMarlinConfig class method) (vllm.model_executor.layers.quantization.gptq_marlin_24.GPTQMarlin24Config class method) (vllm.model_executor.layers.quantization.hqq_marlin.HQQMarlinConfig class method) (vllm.model_executor.layers.quantization.ipex_quant.IPEXConfig static method) (vllm.model_executor.layers.quantization.marlin.MarlinConfig class method) (vllm.model_executor.layers.quantization.modelopt.ModelOptFp8Config class method) (vllm.model_executor.layers.quantization.modelopt.ModelOptNvFp4Config class method) (vllm.model_executor.layers.quantization.moe_wna16.MoeWNA16Config class method) (vllm.model_executor.layers.quantization.neuron_quant.NeuronQuantConfig static method) (vllm.model_executor.layers.quantization.qqq.QQQConfig class method) (vllm.model_executor.layers.quantization.quark.quark.QuarkConfig class method) (vllm.model_executor.layers.quantization.torchao.TorchAOConfig static method) (vllm.model_executor.layers.quantization.tpu_int8.Int8TpuConfig static method) get_content() (vllm.multimodal.processing.BoundPromptUpdate method) get_core_engine_for_request() (vllm.v1.engine.core_client.DPAsyncMPClient method) get_count() (vllm.multimodal.parse.DictEmbeddingItems method) (vllm.multimodal.parse.EmbeddingItems method) (vllm.multimodal.parse.ModalityDataItems method) (vllm.multimodal.parse.MultiModalDataItems method) (vllm.multimodal.parse.ProcessorBatchItems method) get_counter_dict() (vllm.worker.hpu_model_runner.HabanaProfilerCounterHelper method) get_cpu_architecture() (vllm.platforms.interface.Platform class method) get_cpu_info() (in module vllm.collect_env) get_cpu_memory() (in module vllm.utils) get_cpu_tensor() (vllm.v1.worker.block_table.BlockTable method) get_cross_attention_mask() (vllm.model_executor.models.mllama.MllamaForConditionalGeneration method) get_cross_attention_states() (vllm.model_executor.models.mllama.MllamaForConditionalGeneration method) get_cross_block_table() (vllm.core.block_manager.SelfAttnBlockSpaceManager method) get_cross_encoder_activation_function() (in module vllm.transformers_utils.config) get_cu_count() (vllm.platforms.interface.Platform class method) (vllm.platforms.rocm.RocmPlatform class method) get_cuda_module_loading_config() (in module vllm.collect_env) get_cuda_view_from_cpu_tensor() (in module vllm.utils) get_cudnn_version() (in module vllm.collect_env) get_cumulative_logprob() (vllm.sequence.Sequence method) get_current_memory_usage() (vllm.platforms.cuda.CudaPlatformBase class method) (vllm.platforms.interface.Platform class method) (vllm.platforms.rocm.RocmPlatform class method) (vllm.platforms.xpu.XPUPlatform class method) get_current_usage() (vllm.device_allocator.cumem.CuMemAllocator method) get_current_vllm_config() (in module vllm.config) get_cutlass_moe_mm_data() (in module vllm._custom_ops) get_cv2_video_api() (vllm.multimodal.video.OpenCVVideoBackend method) get_data() (vllm.distributed.device_communicators.shm_broadcast.ShmRingBuffer method) get_data_socket() (vllm.engine.multiprocessing.client.MQLLMEngineClient method) get_decoder() (vllm.model_executor.models.phi3_small.Phi3SmallForCausalLM method) get_decoder_dummy_data() (vllm.multimodal.profiling.MultiModalProfiler method) (vllm.multimodal.registry.MultiModalRegistry method) get_decoder_start_token_id() (vllm.inputs.preprocess.InputPreprocessor method) get_decoding_config() (vllm.engine.async_llm_engine.AsyncLLMEngine method) (vllm.engine.llm_engine.LLMEngine method) (vllm.engine.multiprocessing.client.MQLLMEngineClient method) (vllm.engine.protocol.EngineClient method) (vllm.v1.engine.async_llm.AsyncLLM method) get_default_audio_pool_step() (vllm.model_executor.models.minicpmo.MiniCPMOProcessingInfo method) get_default_audio_sampling_rate() (vllm.model_executor.models.minicpmo.MiniCPMOProcessingInfo method) get_default_cache_root() (in module vllm.envs) get_default_config() (in module vllm.model_executor.layers.fused_moe.fused_moe) get_default_config_root() (in module vllm.envs) get_default_sampling_params() (vllm.entrypoints.llm.LLM method) get_delta_and_reset() (vllm.sequence.SequenceData method) get_device_attribute() (in module vllm._custom_ops) get_device_capability() (vllm.platforms.cuda.CudaPlatformBase class method) (vllm.platforms.cuda.NonNvmlCudaPlatform class method) (vllm.platforms.cuda.NvmlCudaPlatform class method) (vllm.platforms.interface.Platform class method) (vllm.platforms.rocm.RocmPlatform class method) (vllm.platforms.xpu.XPUPlatform static method) get_device_communicator_cls() (vllm.platforms.cpu.CpuPlatform class method) (vllm.platforms.cuda.CudaPlatformBase class method) (vllm.platforms.hpu.HpuPlatform class method) (vllm.platforms.interface.Platform class method) (vllm.platforms.neuron.NeuronPlatform class method) (vllm.platforms.rocm.RocmPlatform class method) (vllm.platforms.tpu.TpuPlatform class method) (vllm.platforms.xpu.XPUPlatform class method) get_device_name() (vllm.platforms.cpu.CpuPlatform class method) (vllm.platforms.cuda.CudaPlatformBase class method) (vllm.platforms.cuda.NonNvmlCudaPlatform class method) (vllm.platforms.cuda.NvmlCudaPlatform class method) (vllm.platforms.interface.Platform class method) (vllm.platforms.neuron.NeuronPlatform class method) (vllm.platforms.rocm.RocmPlatform class method) (vllm.platforms.tpu.TpuPlatform class method) (vllm.platforms.xpu.XPUPlatform static method) get_device_tensor() (vllm.v1.worker.block_table.BlockTable method) get_device_total_memory() (vllm.platforms.cpu.CpuPlatform class method) (vllm.platforms.cuda.CudaPlatformBase class method) (vllm.platforms.cuda.NonNvmlCudaPlatform class method) (vllm.platforms.cuda.NvmlCudaPlatform class method) (vllm.platforms.interface.Platform class method) (vllm.platforms.rocm.RocmPlatform class method) (vllm.platforms.tpu.TpuPlatform class method) (vllm.platforms.xpu.XPUPlatform class method) get_device_uuid() (vllm.platforms.cuda.NvmlCudaPlatform class method) (vllm.platforms.interface.Platform class method) get_diff_sampling_param() (vllm.config.ModelConfig method) get_distributed_init_method() (in module vllm.utils) get_dp_group() (in module vllm.distributed.parallel_state) get_dtype_size() (in module vllm.utils) get_dummy_mm_data() (vllm.model_executor.models.aria.AriaDummyInputsBuilder method) (vllm.model_executor.models.aya_vision.AyaVisionDummyInputsBuilder method) (vllm.model_executor.models.blip2.Blip2DummyInputsBuilder method) (vllm.model_executor.models.chameleon.ChameleonDummyInputsBuilder method) (vllm.model_executor.models.deepseek_vl2.DeepseekVL2DummyInputsBuilder method) (vllm.model_executor.models.florence2.Florence2DummyInputsBuilder method) (vllm.model_executor.models.fuyu.FuyuDummyInputsBuilder method) (vllm.model_executor.models.gemma3_mm.Gemma3DummyInputsBuilder method) (vllm.model_executor.models.glm4v.GLM4VDummyInputsBuilder method) (vllm.model_executor.models.granite_speech.GraniteSpeechDummyInputsBuilder method) (vllm.model_executor.models.idefics3.Idefics3DummyInputsBuilder method) (vllm.model_executor.models.internvl.InternVLDummyInputsBuilder method) (vllm.model_executor.models.kimi_vl.KimiVLDummyInputsBuilder method) (vllm.model_executor.models.llava.LlavaDummyInputsBuilder method) (vllm.model_executor.models.llava_next_video.LlavaNextVideoDummyInputsBuilder method) (vllm.model_executor.models.llava_onevision.LlavaOnevisionDummyInputsBuilder method) (vllm.model_executor.models.minicpmo.MiniCPMODummyInputsBuilder method) (vllm.model_executor.models.minicpmv.MiniCPMVDummyInputsBuilder method) (vllm.model_executor.models.mistral3.Mistral3DummyInputsBuilder method) (vllm.model_executor.models.mllama.MllamaDummyInputsBuilder method) (vllm.model_executor.models.mllama4.Mllama4DummyInputsBuilder method) (vllm.model_executor.models.molmo.MolmoDummyInputsBuilder method) (vllm.model_executor.models.nvlm_d.NVLMDummyInputsBuilder method) (vllm.model_executor.models.ovis.OvisDummyInputsBuilder method) (vllm.model_executor.models.paligemma.PaliGemmaDummyInputsBuilder method) (vllm.model_executor.models.phi3v.Phi3VDummyInputsBuilder method) (vllm.model_executor.models.phi4mm.Phi4MMDummyInputsBuilder method) (vllm.model_executor.models.pixtral.PixtralDummyInputsBuilder method) (vllm.model_executor.models.prithvi_geospatial_mae.PrithviGeoSpatialMAEInputBuilder method) (vllm.model_executor.models.qwen2_5_omni_thinker.Qwen2_5OmniThinkerDummyInputsBuilder method) (vllm.model_executor.models.qwen2_audio.Qwen2AudioDummyInputsBuilder method) (vllm.model_executor.models.qwen2_vl.Qwen2VLDummyInputsBuilder method) (vllm.model_executor.models.qwen_vl.QwenVLDummyInputsBuilder method) (vllm.model_executor.models.skyworkr1v.SkyworkR1VDummyInputsBuilder method) (vllm.model_executor.models.ultravox.UltravoxDummyInputsBuilder method) (vllm.model_executor.models.whisper.WhisperDummyInputsBuilder method) (vllm.multimodal.profiling.BaseDummyInputsBuilder method) get_dummy_processor_inputs() (vllm.multimodal.profiling.BaseDummyInputsBuilder method) get_dummy_text() (vllm.model_executor.models.aria.AriaDummyInputsBuilder method) (vllm.model_executor.models.aya_vision.AyaVisionDummyInputsBuilder method) (vllm.model_executor.models.blip2.Blip2DummyInputsBuilder method) (vllm.model_executor.models.chameleon.ChameleonDummyInputsBuilder method) (vllm.model_executor.models.deepseek_vl2.DeepseekVL2DummyInputsBuilder method) (vllm.model_executor.models.florence2.Florence2DummyInputsBuilder method) (vllm.model_executor.models.fuyu.FuyuDummyInputsBuilder method) (vllm.model_executor.models.gemma3_mm.Gemma3DummyInputsBuilder method) (vllm.model_executor.models.glm4v.GLM4VDummyInputsBuilder method) (vllm.model_executor.models.granite_speech.GraniteSpeechDummyInputsBuilder method) (vllm.model_executor.models.idefics3.Idefics3DummyInputsBuilder method) (vllm.model_executor.models.internvl.InternVLDummyInputsBuilder method) (vllm.model_executor.models.kimi_vl.KimiVLDummyInputsBuilder method) (vllm.model_executor.models.llava.LlavaDummyInputsBuilder method) (vllm.model_executor.models.llava_next_video.LlavaNextVideoDummyInputsBuilder method) (vllm.model_executor.models.llava_onevision.LlavaOnevisionDummyInputsBuilder method) (vllm.model_executor.models.minicpmo.MiniCPMODummyInputsBuilder method) (vllm.model_executor.models.minicpmv.MiniCPMVDummyInputsBuilder method) (vllm.model_executor.models.mistral3.Mistral3DummyInputsBuilder method) (vllm.model_executor.models.mllama.MllamaDummyInputsBuilder method) (vllm.model_executor.models.mllama4.Mllama4DummyInputsBuilder method) (vllm.model_executor.models.molmo.MolmoDummyInputsBuilder method) (vllm.model_executor.models.nvlm_d.NVLMDummyInputsBuilder method) (vllm.model_executor.models.ovis.OvisDummyInputsBuilder method) (vllm.model_executor.models.paligemma.PaliGemmaDummyInputsBuilder method) (vllm.model_executor.models.phi3v.Phi3VDummyInputsBuilder method) (vllm.model_executor.models.phi4mm.Phi4MMDummyInputsBuilder method) (vllm.model_executor.models.pixtral.PixtralDummyInputsBuilder method) (vllm.model_executor.models.prithvi_geospatial_mae.PrithviGeoSpatialMAEInputBuilder method) (vllm.model_executor.models.qwen2_5_omni_thinker.Qwen2_5OmniThinkerDummyInputsBuilder method) (vllm.model_executor.models.qwen2_audio.Qwen2AudioDummyInputsBuilder method) (vllm.model_executor.models.qwen2_vl.Qwen2VLDummyInputsBuilder method) (vllm.model_executor.models.qwen_vl.QwenVLDummyInputsBuilder method) (vllm.model_executor.models.skyworkr1v.SkyworkR1VDummyInputsBuilder method) (vllm.model_executor.models.ultravox.UltravoxDummyInputsBuilder method) (vllm.model_executor.models.whisper.WhisperDummyInputsBuilder method) (vllm.multimodal.profiling.BaseDummyInputsBuilder method) get_dynamic_hd() (vllm.model_executor.models.phi4mm.Phi4MMProcessingInfo method) get_dynamic_override() (in module vllm.model_executor.layers.quantization.utils.gptq_utils) get_eagle3_aux_hidden_state_layers() (vllm.model_executor.models.llama.LlamaForCausalLM method) get_encoder_dummy_data() (vllm.multimodal.profiling.MultiModalProfiler method) (vllm.multimodal.registry.MultiModalRegistry method) get_encoder_outputs() (vllm.model_executor.models.whisper.WhisperModel method) get_encoder_seq() (vllm.sequence.SequenceGroup method) get_env_info() (in module vllm.collect_env) get_env_variable_attn_backend() (in module vllm.attention.selector) get_env_vars() (in module vllm.collect_env) get_eos_token_id() (vllm.inputs.preprocess.InputPreprocessor method) get_exception_traceback() (in module vllm.utils) get_feature_extractor() (in module vllm.transformers_utils.processor) (vllm.model_executor.models.phi4mm.Phi4MMProcessingInfo method) (vllm.model_executor.models.qwen2_5_omni_thinker.Qwen2_5OmniThinkerProcessingInfo method) (vllm.model_executor.models.qwen2_audio.Qwen2AudioProcessingInfo method) (vllm.model_executor.models.ultravox.UltravoxProcessingInfo method) (vllm.model_executor.models.whisper.WhisperProcessingInfo method) get_feature_size() (vllm.multimodal.parse.EmbeddingItems method) get_field() (in module vllm.config) get_finished() (vllm.distributed.kv_transfer.kv_connector.v1.base.KVConnectorBase_V1 method) (vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector.NixlConnector method) (vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector.NixlConnectorWorker method) get_finished_kv_transfers() (vllm.v1.worker.gpu_model_runner.GPUModelRunner static method) get_finished_reason() (vllm.sequence.SequenceStatus static method) (vllm.v1.request.Request method) (vllm.v1.request.RequestStatus static method) get_finished_seqs() (vllm.sequence.SequenceGroup method) get_first_seq_id() (vllm.sequence.SequenceGroupMetadata method) get_flash_mla_metadata() (in module vllm._custom_ops) get_forward_context() (in module vllm.forward_context) get_fp8_dtype_for_flashinfer() (vllm.attention.backends.flashinfer.FlashInferBackend static method) get_frame_size() (vllm.model_executor.models.minicpmv.MiniCPMVVideoEmbeddingItems method) (vllm.multimodal.parse.VideoProcessorItems method) get_freed_ids() (vllm.v1.core.encoder_cache_manager.EncoderCacheManager method) get_freqs_cis_by_idx() (vllm.model_executor.models.moonvit.Rope2DPosEmb method) get_freqs_cis_by_seqlens() (vllm.model_executor.models.moonvit.Rope2DPosEmb method) get_from_extra_config() (vllm.config.KVTransferConfig method) get_from_keys() (vllm.model_executor.layers.quantization.base_config.QuantizationConfig static method) (vllm.model_executor.layers.quantization.bitblas.BitBLASConfig static method) get_from_keys_or() (vllm.model_executor.layers.quantization.base_config.QuantizationConfig static method) get_full_text_row_masked_out_mask() (vllm.model_executor.models.mllama.MllamaForConditionalGeneration method) get_gcc_version() (in module vllm.collect_env) get_general_autotune_configs() (in module vllm.attention.ops.triton_flash_attention) get_generators() (vllm.worker.model_runner_base.ModelRunnerBase method) get_gguf_extra_tensor_names() (in module vllm.model_executor.model_loader.weight_utils) get_global_forced_attn_backend() (in module vllm.attention.selector) get_gpu_info() (in module vllm.collect_env) get_gpu_topo() (in module vllm.collect_env) get_graph_buffer_ipc_meta() (in module vllm._custom_ops) get_graph_input_buffers() (vllm.attention.backends.abstract.AttentionState method) (vllm.attention.backends.flashinfer.FlashInferState method) (vllm.attention.backends.flashmla.FlashMLAState method) (vllm.attention.backends.mla.common.MLACommonState method) (vllm.attention.backends.rocm_aiter_mla.AiterMLAState method) (vllm.attention.backends.utils.CommonAttentionState method) get_guided_decoding_logits_processor() (in module vllm.model_executor.guided_decoding) get_h2ovl_target_ratios() (in module vllm.model_executor.models.h2ovl) get_head_size() (vllm.config.ModelConfig method) get_head_sliding_step() (in module vllm.attention.ops.blocksparse_attention.utils) get_hf_config() (vllm.inputs.registry.InputContext method) (vllm.model_executor.models.aria.AriaProcessingInfo method) (vllm.model_executor.models.aya_vision.AyaVisionProcessingInfo method) (vllm.model_executor.models.blip2.Blip2ProcessingInfo method) (vllm.model_executor.models.chameleon.ChameleonProcessingInfo method) (vllm.model_executor.models.deepseek_vl2.DeepseekVL2ProcessingInfo method) (vllm.model_executor.models.florence2.Florence2ProcessingInfo method) (vllm.model_executor.models.fuyu.FuyuProcessingInfo method) (vllm.model_executor.models.gemma3_mm.Gemma3ProcessingInfo method) (vllm.model_executor.models.glm4v.GLM4VProcessingInfo method) (vllm.model_executor.models.kimi_vl.KimiVLProcessingInfo method) (vllm.model_executor.models.llava.BaseLlavaProcessingInfo method) (vllm.model_executor.models.llava_next.LlavaNextProcessingInfo method) (vllm.model_executor.models.llava_next_video.LlavaNextVideoProcessingInfo method) (vllm.model_executor.models.llava_onevision.LlavaOnevisionProcessingInfo method) (vllm.model_executor.models.minicpmv.MiniCPMVProcessingInfo method) (vllm.model_executor.models.minimax_vl_01.MiniMaxVL01ProcessingInfo method) (vllm.model_executor.models.mistral3.BaseLlavaProcessingInfo method) (vllm.model_executor.models.mllama.MllamaProcessingInfo method) (vllm.model_executor.models.mllama4.Mllama4ProcessingInfo method) (vllm.model_executor.models.ovis.OvisProcessingInfo method) (vllm.model_executor.models.paligemma.PaliGemmaProcessingInfo method) (vllm.model_executor.models.qwen2_5_omni_thinker.Qwen2_5OmniThinkerProcessingInfo method) (vllm.model_executor.models.qwen2_5_vl.Qwen2_5_VLProcessingInfo method) (vllm.model_executor.models.qwen2_audio.Qwen2AudioProcessingInfo method) (vllm.model_executor.models.qwen2_vl.Qwen2VLProcessingInfo method) (vllm.model_executor.models.whisper.WhisperProcessingInfo method) (vllm.multimodal.processing.BaseProcessingInfo method) get_hf_config_sliding_window() (vllm.config.ModelConfig method) get_hf_file_to_dict() (in module vllm.transformers_utils.config) get_hf_image_processor_config() (in module vllm.transformers_utils.config) (vllm.inputs.registry.InputContext method) get_hf_processor() (vllm.inputs.registry.InputContext method) (vllm.inputs.registry.InputProcessingContext method) (vllm.model_executor.models.aria.AriaProcessingInfo method) (vllm.model_executor.models.aya_vision.AyaVisionProcessingInfo method) (vllm.model_executor.models.chameleon.ChameleonProcessingInfo method) (vllm.model_executor.models.deepseek_vl2.DeepseekVL2ProcessingInfo method) (vllm.model_executor.models.florence2.Florence2ProcessingInfo method) (vllm.model_executor.models.fuyu.FuyuProcessingInfo method) (vllm.model_executor.models.gemma3_mm.Gemma3ProcessingInfo method) (vllm.model_executor.models.glm4v.GLM4VProcessingInfo method) (vllm.model_executor.models.h2ovl.H2OVLProcessingInfo method) (vllm.model_executor.models.idefics3.Idefics3ProcessingInfo method) (vllm.model_executor.models.internvl.BaseInternVLProcessingInfo method) (vllm.model_executor.models.internvl.InternVLProcessingInfo method) (vllm.model_executor.models.llava.BaseLlavaProcessingInfo method) (vllm.model_executor.models.llava.LlavaProcessingInfo method) (vllm.model_executor.models.llava.MantisProcessingInfo method) (vllm.model_executor.models.llava.PixtralHFProcessingInfo method) (vllm.model_executor.models.llava_next.LlavaNextProcessingInfo method) (vllm.model_executor.models.llava_next_video.LlavaNextVideoProcessingInfo method) (vllm.model_executor.models.llava_onevision.LlavaOnevisionProcessingInfo method) (vllm.model_executor.models.minicpmv.MiniCPMVProcessingInfo method) (vllm.model_executor.models.minimax_vl_01.MiniMaxVL01ProcessingInfo method) (vllm.model_executor.models.mistral3.BaseLlavaProcessingInfo method) (vllm.model_executor.models.mistral3.Mistral3ProcessingInfo method) (vllm.model_executor.models.mllama.MllamaProcessingInfo method) (vllm.model_executor.models.mllama4.Mllama4ProcessingInfo method) (vllm.model_executor.models.molmo.MolmoProcessingInfo method) (vllm.model_executor.models.nvlm_d.NVLMProcessingInfo method) (vllm.model_executor.models.ovis.OvisProcessingInfo method) (vllm.model_executor.models.phi3v.Phi3VProcessingInfo method) (vllm.model_executor.models.phi4mm.Phi4MMProcessingInfo method) (vllm.model_executor.models.pixtral.PixtralProcessingInfo method) (vllm.model_executor.models.qwen2_5_omni_thinker.Qwen2_5OmniThinkerProcessingInfo method) (vllm.model_executor.models.qwen2_5_vl.Qwen2_5_VLProcessingInfo method) (vllm.model_executor.models.qwen2_audio.Qwen2AudioProcessingInfo method) (vllm.model_executor.models.qwen2_vl.Qwen2VLProcessingInfo method) (vllm.model_executor.models.qwen_vl.QwenVLProcessingInfo method) (vllm.model_executor.models.skyworkr1v.BaseSkyworkR1VProcessingInfo method) (vllm.model_executor.models.skyworkr1v.SkyworkR1VProcessingInfo method) (vllm.model_executor.models.smolvlm.SmolVLMProcessingInfo method) (vllm.model_executor.models.ultravox.UltravoxProcessingInfo method) (vllm.model_executor.models.whisper.WhisperProcessingInfo method) (vllm.multimodal.processing.BaseProcessingInfo method) get_hf_text_config() (in module vllm.transformers_utils.config) get_hidden_size() (vllm.config.ModelConfig method) get_hit_rate() (vllm.core.block.common.CacheMetricData method) get_image_feature_grid_size() (vllm.model_executor.models.fuyu.FuyuProcessingInfo method) get_image_max_slice_num() (vllm.model_executor.models.minicpmv.MiniCPMVProcessingInfo method) get_image_pad_token() (vllm.model_executor.models.ovis.OvisProcessingInfo method) get_image_processor() (in module vllm.transformers_utils.processor) (vllm.model_executor.models.aya_vision.AyaVisionProcessingInfo method) (vllm.model_executor.models.fuyu.FuyuProcessingInfo method) (vllm.model_executor.models.minicpmv.MiniCPMVProcessingInfo method) (vllm.model_executor.models.ovis.OvisProcessingInfo method) (vllm.model_executor.models.qwen2_vl.Qwen2VLProcessingInfo method) get_image_prompt_texts() (vllm.model_executor.models.minicpmv.MiniCPMVMultiModalProcessor method) get_image_repl() (vllm.model_executor.models.gemma3_mm.Gemma3ProcessingInfo method) (vllm.model_executor.models.h2ovl.H2OVLProcessor method) (vllm.model_executor.models.idefics3.Idefics3ProcessingInfo method) (vllm.model_executor.models.internvl.BaseInternVLProcessor method) (vllm.model_executor.models.internvl.InternVLProcessor method) (vllm.model_executor.models.nvlm_d.NVLMProcessor method) (vllm.model_executor.models.skyworkr1v.BaseSkyworkR1VProcessor method) (vllm.model_executor.models.skyworkr1v.SkyworkR1VProcessor method) get_image_segment_len() (vllm.model_executor.models.ovis.OvisProcessingInfo method) get_image_size() (vllm.model_executor.models.clip.CLIPEncoderInfo method) (vllm.model_executor.models.minicpmv.MiniCPMVImageEmbeddingItems method) (vllm.model_executor.models.pixtral.PixtralHFEncoderInfo method) (vllm.model_executor.models.siglip.SiglipEncoderInfo method) (vllm.model_executor.models.vision.VisionEncoderInfo method) (vllm.multimodal.parse.ImageProcessorItems method) (vllm.transformers_utils.processors.ovis.OvisProcessor method) get_image_size_with_most_features() (vllm.model_executor.models.aya_vision.AyaVisionProcessingInfo method) (vllm.model_executor.models.deepseek_vl2.DeepseekVL2ProcessingInfo method) (vllm.model_executor.models.fuyu.FuyuProcessingInfo method) (vllm.model_executor.models.gemma3_mm.Gemma3ProcessingInfo method) (vllm.model_executor.models.idefics3.Idefics3ProcessingInfo method) (vllm.model_executor.models.internvl.BaseInternVLProcessingInfo method) (vllm.model_executor.models.llava.BaseLlavaProcessingInfo method) (vllm.model_executor.models.llava_next.LlavaNextProcessingInfo method) (vllm.model_executor.models.llava_next_video.LlavaNextVideoProcessingInfo method) (vllm.model_executor.models.llava_onevision.LlavaOnevisionProcessingInfo method) (vllm.model_executor.models.minicpmv.MiniCPMVProcessingInfo method) (vllm.model_executor.models.mistral3.BaseLlavaProcessingInfo method) (vllm.model_executor.models.mllama.MllamaProcessingInfo method) (vllm.model_executor.models.mllama4.Mllama4ProcessingInfo method) (vllm.model_executor.models.molmo.MolmoProcessingInfo method) (vllm.model_executor.models.ovis.OvisProcessingInfo method) (vllm.model_executor.models.phi3v.Phi3VProcessingInfo method) (vllm.model_executor.models.phi4mm.Phi4MMProcessingInfo method) (vllm.model_executor.models.pixtral.PixtralProcessingInfo method) (vllm.model_executor.models.qwen2_vl.Qwen2VLProcessingInfo method) (vllm.model_executor.models.skyworkr1v.BaseSkyworkR1VProcessingInfo method) get_image_tokens() (vllm.model_executor.models.chameleon.ChameleonModel method) get_img_features() (vllm.model_executor.models.phi3v.Phi3ImageEmbeddingBase method) (vllm.model_executor.models.phi4mm.Phi4MMImageEncoder method) get_impl_cls() (vllm.attention.backends.abstract.AttentionBackend static method) (vllm.attention.backends.blocksparse_attn.BlocksparseFlashAttentionBackend static method) (vllm.attention.backends.cpu_mla.CPUMLABackend static method) (vllm.attention.backends.dual_chunk_flash_attn.DualChunkFlashAttentionBackend static method) (vllm.attention.backends.flash_attn.FlashAttentionBackend static method) (vllm.attention.backends.flashinfer.FlashInferBackend static method) (vllm.attention.backends.flashmla.FlashMLABackend static method) (vllm.attention.backends.hpu_attn.HPUAttentionBackend static method) (vllm.attention.backends.ipex_attn.IpexAttnBackend static method) (vllm.attention.backends.pallas.PallasAttentionBackend static method) (vllm.attention.backends.placeholder_attn.PlaceholderAttentionBackend static method) (vllm.attention.backends.rocm_aiter_mla.AiterMLABackend static method) (vllm.attention.backends.rocm_flash_attn.ROCmFlashAttentionBackend static method) (vllm.attention.backends.torch_sdpa.TorchSDPABackend static method) (vllm.attention.backends.triton_mla.TritonMLABackend static method) (vllm.attention.backends.xformers.XFormersBackend static method) (vllm.v1.attention.backends.flash_attn.FlashAttentionBackend static method) (vllm.v1.attention.backends.flashinfer.FlashInferBackend static method) (vllm.v1.attention.backends.mla.flashmla.FlashMLABackend static method) (vllm.v1.attention.backends.mla.rocm_aiter_mla.AiterMLABackend static method) (vllm.v1.attention.backends.mla.triton_mla.TritonMLABackend static method) (vllm.v1.attention.backends.pallas.PallasAttentionBackend static method) (vllm.v1.attention.backends.triton_attn.TritonAttentionBackend static method) get_inductor_factors() (in module vllm.compilation.compiler_interface) get_infinity_values() (vllm.platforms.interface.Platform class method) (vllm.platforms.tpu.TpuPlatform class method) get_initial_dt_bias() (in module vllm.model_executor.models.plamo2) get_input_embeddings() (vllm.model_executor.models.arctic.ArcticForCausalLM method) (vllm.model_executor.models.arctic.ArcticModel method) (vllm.model_executor.models.aria.AriaForConditionalGeneration method) (vllm.model_executor.models.aya_vision.AyaVisionForConditionalGeneration method) (vllm.model_executor.models.baichuan.BaiChuanBaseForCausalLM method) (vllm.model_executor.models.baichuan.BaiChuanModel method) (vllm.model_executor.models.bamba.BambaForCausalLM method) (vllm.model_executor.models.bamba.BambaModel method) (vllm.model_executor.models.blip2.Blip2ForConditionalGeneration method) (vllm.model_executor.models.bloom.BloomForCausalLM method) (vllm.model_executor.models.bloom.BloomModel method) (vllm.model_executor.models.chameleon.ChameleonForConditionalGeneration method) (vllm.model_executor.models.chameleon.ChameleonModel method) (vllm.model_executor.models.chatglm.ChatGLMModel method) (vllm.model_executor.models.commandr.CohereForCausalLM method) (vllm.model_executor.models.commandr.CohereModel method) (vllm.model_executor.models.dbrx.DbrxForCausalLM method) (vllm.model_executor.models.dbrx.DbrxModel method) (vllm.model_executor.models.deepseek.DeepseekForCausalLM method) (vllm.model_executor.models.deepseek.DeepseekModel method) (vllm.model_executor.models.deepseek_v2.DeepseekV2ForCausalLM method) (vllm.model_executor.models.deepseek_v2.DeepseekV2Model method) (vllm.model_executor.models.deepseek_vl2.DeepseekVLV2ForCausalLM method) (vllm.model_executor.models.eagle.EAGLE method) (vllm.model_executor.models.exaone.ExaoneForCausalLM method) (vllm.model_executor.models.exaone.ExaoneModel method) (vllm.model_executor.models.falcon.FalconForCausalLM method) (vllm.model_executor.models.falcon.FalconModel method) (vllm.model_executor.models.florence2.Florence2ForConditionalGeneration method) (vllm.model_executor.models.florence2.Florence2LanguageForConditionalGeneration method) (vllm.model_executor.models.fuyu.FuyuForCausalLM method) (vllm.model_executor.models.gemma.GemmaForCausalLM method) (vllm.model_executor.models.gemma.GemmaModel method) (vllm.model_executor.models.gemma2.Gemma2ForCausalLM method) (vllm.model_executor.models.gemma2.Gemma2Model method) (vllm.model_executor.models.gemma3.Gemma3ForCausalLM method) (vllm.model_executor.models.gemma3.Gemma3Model method) (vllm.model_executor.models.gemma3_mm.Gemma3ForConditionalGeneration method) (vllm.model_executor.models.glm4.Glm4ForCausalLM method) (vllm.model_executor.models.glm4v.GLM4VForCausalLM method) (vllm.model_executor.models.gpt2.GPT2LMHeadModel method) (vllm.model_executor.models.gpt2.GPT2Model method) (vllm.model_executor.models.gpt_bigcode.GPTBigCodeForCausalLM method) (vllm.model_executor.models.gpt_bigcode.GPTBigCodeModel method) (vllm.model_executor.models.gpt_j.GPTJForCausalLM method) (vllm.model_executor.models.gpt_j.GPTJModel method) (vllm.model_executor.models.gpt_neox.GPTNeoXForCausalLM method) (vllm.model_executor.models.gpt_neox.GPTNeoXModel method) (vllm.model_executor.models.granite.GraniteForCausalLM method) (vllm.model_executor.models.granite.GraniteModel method) (vllm.model_executor.models.granite_speech.GraniteSpeechForConditionalGeneration method) (vllm.model_executor.models.granitemoe.GraniteMoeForCausalLM method) (vllm.model_executor.models.granitemoe.GraniteMoeModel method) (vllm.model_executor.models.granitemoehybrid.GraniteMoeHybridForCausalLM method) (vllm.model_executor.models.granitemoehybrid.GraniteMoeHybridModel method) (vllm.model_executor.models.granitemoeshared.GraniteMoeSharedForCausalLM method) (vllm.model_executor.models.granitemoeshared.GraniteMoeSharedModel method) (vllm.model_executor.models.grok1.Grok1ForCausalLM method) (vllm.model_executor.models.grok1.Grok1Model method) (vllm.model_executor.models.idefics2_vision_model.Idefics2VisionTransformer method) (vllm.model_executor.models.idefics3.Idefics3ForConditionalGeneration method) (vllm.model_executor.models.idefics3.Idefics3Model method) (vllm.model_executor.models.intern_vit.InternVisionModel method) (vllm.model_executor.models.intern_vit.InternVisionPatchModel method) (vllm.model_executor.models.internlm2.InternLM2ForCausalLM method) (vllm.model_executor.models.internlm2.InternLM2Model method) (vllm.model_executor.models.internvl.InternVLChatModel method) (vllm.model_executor.models.jais.JAISLMHeadModel method) (vllm.model_executor.models.jais.JAISModel method) (vllm.model_executor.models.jamba.JambaForCausalLM method) (vllm.model_executor.models.jamba.JambaModel method) (vllm.model_executor.models.kimi_vl.KimiVLForConditionalGeneration method) (vllm.model_executor.models.llama.LlamaForCausalLM method) (vllm.model_executor.models.llama.LlamaModel method) (vllm.model_executor.models.llava.LlavaForConditionalGeneration method) (vllm.model_executor.models.llava_next.LlavaNextForConditionalGeneration method) (vllm.model_executor.models.llava_next_video.LlavaNextVideoForConditionalGeneration method) (vllm.model_executor.models.llava_onevision.LlavaOnevisionForConditionalGeneration method) (vllm.model_executor.models.mamba.MambaForCausalLM method) (vllm.model_executor.models.mamba.MambaModel method) (vllm.model_executor.models.mamba2.Mamba2ForCausalLM method) (vllm.model_executor.models.mamba2.Mamba2Model method) (vllm.model_executor.models.minicpm.MiniCPMForCausalLM method) (vllm.model_executor.models.minicpm.MiniCPMModel method) (vllm.model_executor.models.minicpmv.MiniCPMVBaseModel method) (vllm.model_executor.models.minimax_text_01.MiniMaxText01ForCausalLM method) (vllm.model_executor.models.minimax_text_01.MiniMaxText01Model method) (vllm.model_executor.models.minimax_vl_01.MiniMaxVL01ForConditionalGeneration method) (vllm.model_executor.models.mistral3.Mistral3ForConditionalGeneration method) (vllm.model_executor.models.mixtral.MixtralForCausalLM method) (vllm.model_executor.models.mixtral.MixtralModel method) (vllm.model_executor.models.mixtral_quant.MixtralForCausalLM method) (vllm.model_executor.models.mixtral_quant.MixtralModel method) (vllm.model_executor.models.mllama4.Llama4ForConditionalGeneration method) (vllm.model_executor.models.molmo.MolmoForCausalLM method) (vllm.model_executor.models.molmo.MolmoModel method) (vllm.model_executor.models.mpt.MPTForCausalLM method) (vllm.model_executor.models.mpt.MPTModel method) (vllm.model_executor.models.nemotron.NemotronForCausalLM method) (vllm.model_executor.models.nemotron.NemotronModel method) (vllm.model_executor.models.nemotron_nas.DeciLMForCausalLM method) (vllm.model_executor.models.nemotron_nas.DeciModel method) (vllm.model_executor.models.olmo.OlmoForCausalLM method) (vllm.model_executor.models.olmo.OlmoModel method) (vllm.model_executor.models.olmoe.OlmoeForCausalLM method) (vllm.model_executor.models.olmoe.OlmoeModel method) (vllm.model_executor.models.opt.OPTDecoder method) (vllm.model_executor.models.opt.OPTForCausalLM method) (vllm.model_executor.models.opt.OPTModel method) (vllm.model_executor.models.orion.OrionForCausalLM method) (vllm.model_executor.models.orion.OrionModel method) (vllm.model_executor.models.ovis.Ovis method) (vllm.model_executor.models.paligemma.PaliGemmaForConditionalGeneration method) (vllm.model_executor.models.persimmon.PersimmonForCausalLM method) (vllm.model_executor.models.persimmon.PersimmonModel method) (vllm.model_executor.models.phi.PhiForCausalLM method) (vllm.model_executor.models.phi.PhiModel method) (vllm.model_executor.models.phi3_small.Phi3SmallForCausalLM method) (vllm.model_executor.models.phi3_small.Phi3SmallModel method) (vllm.model_executor.models.phi3v.Phi3VForCausalLM method) (vllm.model_executor.models.phi4mm.Phi4MMForCausalLM method) (vllm.model_executor.models.phimoe.PhiMoEForCausalLM method) (vllm.model_executor.models.phimoe.PhiMoEModel method) (vllm.model_executor.models.pixtral.PixtralForConditionalGeneration method) (vllm.model_executor.models.qwen.QWenModel method) (vllm.model_executor.models.qwen2.Qwen2ForCausalLM method) (vllm.model_executor.models.qwen2.Qwen2Model method) (vllm.model_executor.models.qwen2_5_omni_thinker.Qwen2_5OmniThinkerForConditionalGeneration method) (vllm.model_executor.models.qwen2_5_vl.Qwen2_5_VLForConditionalGeneration method) (vllm.model_executor.models.qwen2_audio.Qwen2AudioForConditionalGeneration method) (vllm.model_executor.models.qwen2_moe.Qwen2MoeForCausalLM method) (vllm.model_executor.models.qwen2_moe.Qwen2MoeModel method) (vllm.model_executor.models.qwen2_rm.Qwen2RewardBaseModel method) (vllm.model_executor.models.qwen2_vl.Qwen2VLForConditionalGeneration method) (vllm.model_executor.models.qwen3.Qwen3ForCausalLM method) (vllm.model_executor.models.qwen3_moe.Qwen3MoeForCausalLM method) (vllm.model_executor.models.qwen3_moe.Qwen3MoeModel method) (vllm.model_executor.models.qwen_vl.QwenVLForConditionalGeneration method) (vllm.model_executor.models.siglip.SiglipVisionModel method) (vllm.model_executor.models.skyworkr1v.SkyworkR1VChatModel method) (vllm.model_executor.models.solar.SolarModel method) (vllm.model_executor.models.stablelm.StableLMEpochModel method) (vllm.model_executor.models.stablelm.StablelmForCausalLM method) (vllm.model_executor.models.starcoder2.Starcoder2ForCausalLM method) (vllm.model_executor.models.starcoder2.Starcoder2Model method) (vllm.model_executor.models.teleflm.TeleFLMModel method) (vllm.model_executor.models.transformers.TransformersModel method) (vllm.model_executor.models.ultravox.UltravoxModel method) (vllm.model_executor.models.whisper.WhisperDecoder method) (vllm.model_executor.models.whisper.WhisperForConditionalGeneration method) (vllm.model_executor.models.zamba2.Zamba2ForCausalLM method) (vllm.model_executor.models.zamba2.Zamba2Model method) (vllm.v1.worker.tpu_model_runner.TPUModelRunner method) get_input_embeddings_v0() (vllm.model_executor.models.llava_onevision.LlavaOnevisionForConditionalGeneration method) (vllm.model_executor.models.phi4mm.Phi4MMForCausalLM method) (vllm.model_executor.models.qwen2_5_omni_thinker.Qwen2_5OmniThinkerForConditionalGeneration method) (vllm.model_executor.models.qwen2_5_vl.Qwen2_5_VLForConditionalGeneration method) (vllm.model_executor.models.qwen2_vl.Qwen2VLForConditionalGeneration method) get_input_positions() (vllm.model_executor.layers.rotary_embedding.MRotaryEmbedding class method) get_input_positions_tensor() (vllm.model_executor.layers.rotary_embedding.MRotaryEmbedding class method) get_input_preprocessor() (vllm.engine.async_llm_engine.AsyncLLMEngine method) (vllm.engine.multiprocessing.client.MQLLMEngineClient method) (vllm.engine.protocol.EngineClient method) (vllm.v1.engine.async_llm.AsyncLLM method) get_inputs() (vllm.compilation.sequence_parallelism.EmbeddingAllReduceRMSNormPattern method) (vllm.compilation.sequence_parallelism.LastAllReduceRMSNormPattern method) (vllm.compilation.sequence_parallelism.MiddleAllReduceRMSNormPattern method) get_instance() (vllm.device_allocator.cumem.CuMemAllocator static method) get_int_dtype() (in module vllm.model_executor.layers.quantization.aqlm) get_internvl_target_ratios() (in module vllm.model_executor.models.internvl) get_ip() (in module vllm.utils) get_item() (vllm.multimodal.inputs.MultiModalKwargs method) (vllm.multimodal.processing.ProcessingCache method) get_item_count() (vllm.multimodal.inputs.MultiModalKwargs method) get_items() (vllm.multimodal.inputs.MultiModalKwargs method) (vllm.multimodal.parse.MultiModalDataItems method) get_json() (vllm.connections.HTTPConnection method) get_kv_cache_config() (in module vllm.v1.core.kv_cache_utils) get_kv_cache_layout() (vllm.attention.backends.flashinfer.FlashInferState method) get_kv_cache_shape() (vllm.attention.backends.abstract.AttentionBackend static method) (vllm.attention.backends.blocksparse_attn.BlocksparseFlashAttentionBackend static method) (vllm.attention.backends.cpu_mla.CPUMLABackend static method) (vllm.attention.backends.flash_attn.FlashAttentionBackend static method) (vllm.attention.backends.flashinfer.FlashInferBackend static method) (vllm.attention.backends.hpu_attn.HPUAttentionBackend static method) (vllm.attention.backends.ipex_attn.IpexAttnBackend static method) (vllm.attention.backends.mla.common.MLACommonBackend static method) (vllm.attention.backends.pallas.PallasAttentionBackend static method) (vllm.attention.backends.placeholder_attn.PlaceholderAttentionBackend static method) (vllm.attention.backends.rocm_flash_attn.ROCmFlashAttentionBackend static method) (vllm.attention.backends.torch_sdpa.TorchSDPABackend static method) (vllm.attention.backends.xformers.XFormersBackend static method) (vllm.attention.ops.hpu_paged_attn.HPUPagedAttention static method) (vllm.attention.ops.paged_attn.PagedAttention static method) (vllm.v1.attention.backends.flash_attn.FlashAttentionBackend static method) (vllm.v1.attention.backends.flashinfer.FlashInferBackend static method) (vllm.v1.attention.backends.mla.common.MLACommonBackend static method) (vllm.v1.attention.backends.pallas.PallasAttentionBackend static method) (vllm.v1.attention.backends.triton_attn.TritonAttentionBackend static method) get_kv_cache_spec() (vllm.v1.worker.gpu_model_runner.GPUModelRunner method) (vllm.v1.worker.gpu_worker.Worker method) (vllm.v1.worker.tpu_model_runner.TPUModelRunner method) (vllm.v1.worker.tpu_worker.TPUWorker method) (vllm.v1.worker.worker_base.WorkerBase method) get_kv_cache_specs() (vllm.v1.executor.abstract.Executor method) get_kv_cache_stride_order() (vllm.attention.backends.abstract.AttentionBackend static method) (vllm.attention.backends.flashinfer.FlashInferBackend static method) get_kv_cache_torch_dtype() (in module vllm.utils) get_kv_connector() (vllm.v1.core.sched.interface.SchedulerInterface method) (vllm.v1.core.sched.scheduler.Scheduler method) get_kv_from_cache() (vllm.distributed.kv_transfer.kv_connector.utils.model_aware_kv_ops_helper method) get_kv_transfer_group() (in module vllm.distributed.kv_transfer.kv_transfer_state) get_kwargs() (in module vllm.engine.arg_utils) get_language_model() (vllm.model_executor.models.aria.AriaForConditionalGeneration method) (vllm.model_executor.models.aya_vision.AyaVisionForConditionalGeneration method) (vllm.model_executor.models.blip2.Blip2ForConditionalGeneration method) (vllm.model_executor.models.chameleon.ChameleonForConditionalGeneration method) (vllm.model_executor.models.deepseek_vl2.DeepseekVLV2ForCausalLM method) (vllm.model_executor.models.florence2.Florence2ForConditionalGeneration method) (vllm.model_executor.models.fuyu.FuyuForCausalLM method) (vllm.model_executor.models.gemma3_mm.Gemma3ForConditionalGeneration method) (vllm.model_executor.models.glm4v.GLM4VForCausalLM method) (vllm.model_executor.models.idefics3.Idefics3ForConditionalGeneration method) (vllm.model_executor.models.interfaces.SupportsMultiModal method) (vllm.model_executor.models.internvl.InternVLChatModel method) (vllm.model_executor.models.kimi_vl.KimiVLForConditionalGeneration method) (vllm.model_executor.models.llava.LlavaForConditionalGeneration method) (vllm.model_executor.models.llava_next.LlavaNextForConditionalGeneration method) (vllm.model_executor.models.llava_next_video.LlavaNextVideoForConditionalGeneration method) (vllm.model_executor.models.llava_onevision.LlavaOnevisionForConditionalGeneration method) (vllm.model_executor.models.minicpmv.MiniCPMVBaseModel method) (vllm.model_executor.models.minimax_vl_01.MiniMaxVL01ForConditionalGeneration method) (vllm.model_executor.models.mistral3.Mistral3ForConditionalGeneration method) (vllm.model_executor.models.mllama.MllamaForConditionalGeneration method) (vllm.model_executor.models.mllama4.Llama4ForConditionalGeneration method) (vllm.model_executor.models.molmo.MolmoForCausalLM method) (vllm.model_executor.models.ovis.Ovis method) (vllm.model_executor.models.paligemma.PaliGemmaForConditionalGeneration method) (vllm.model_executor.models.phi3v.Phi3VForCausalLM method) (vllm.model_executor.models.phi4mm.Phi4MMForCausalLM method) (vllm.model_executor.models.pixtral.PixtralForConditionalGeneration method) (vllm.model_executor.models.qwen2_5_omni_thinker.Qwen2_5OmniThinkerForConditionalGeneration method) (vllm.model_executor.models.qwen2_5_vl.Qwen2_5_VLForConditionalGeneration method) (vllm.model_executor.models.qwen2_audio.Qwen2AudioForConditionalGeneration method) (vllm.model_executor.models.qwen2_vl.Qwen2VLForConditionalGeneration method) (vllm.model_executor.models.qwen_vl.QwenVLForConditionalGeneration method) (vllm.model_executor.models.skyworkr1v.SkyworkR1VChatModel method) (vllm.model_executor.models.ultravox.UltravoxModel method) (vllm.model_executor.models.whisper.WhisperForConditionalGeneration method) get_last_token_id() (vllm.sequence.Sequence method) (vllm.sequence.SequenceData method) get_last_token_latency() (vllm.sequence.SequenceGroup method) get_layers_from_vllm_config() (in module vllm.config) get_layers_start_end_indices() (vllm.config.ModelConfig method) get_len() (vllm.sequence.Sequence method) (vllm.sequence.SequenceData method) get_libc_version() (in module vllm.collect_env) get_limit_per_prompt() (vllm.config.MultiModalConfig method) get_linear_method() (vllm.model_executor.layers.quantization.compressed_tensors.compressed_tensors.CompressedTensorsConfig method) (vllm.model_executor.layers.quantization.deepspeedfp.DeepSpeedFPConfig method) (vllm.model_executor.layers.quantization.quark.quark.QuarkConfig method) get_linear_quant_method() (in module vllm.model_executor.layers.quantization.utils.gptq_utils) get_local_guidance_guided_decoding_logits_processor() (in module vllm.model_executor.guided_decoding.guidance_decoding) get_local_guided_decoding_logits_processor() (in module vllm.model_executor.guided_decoding) get_local_lm_format_enforcer_guided_decoding_logits_processor() (in module vllm.model_executor.guided_decoding.lm_format_enforcer_decoding) get_local_outlines_guided_decoding_logits_processor() (in module vllm.model_executor.guided_decoding.outlines_decoding) get_local_path() (vllm.assets.audio.AudioAsset method) get_local_xgrammar_guided_decoding_logits_processor() (in module vllm.model_executor.guided_decoding.xgrammar_decoding) get_lock() (in module vllm.model_executor.model_loader.weight_utils) get_logits_processors() (in module vllm.entrypoints.openai.logits_processors) (in module vllm.entrypoints.openai.protocol) get_logprobs() (in module vllm.model_executor.layers.sampler) get_lora() (vllm.lora.models.LoRAModel method) get_lora_config() (vllm.engine.async_llm_engine.AsyncLLMEngine method) (vllm.engine.llm_engine.LLMEngine method) get_lora_id() (in module vllm.lora.models) get_lora_tokenizer() (in module vllm.transformers_utils.tokenizer) (vllm.transformers_utils.tokenizer_group.TokenizerGroup method) get_lora_tokenizer_async (in module vllm.transformers_utils.tokenizer) get_lora_tokenizer_async() (vllm.transformers_utils.tokenizer_group.TokenizerGroup method) get_lora_vocab_padding_size() (vllm.platforms.interface.Platform class method) (vllm.platforms.tpu.TpuPlatform class method) get_lru_cache() (vllm.multimodal.processing.ProcessingCache static method) get_lsb_version() (in module vllm.collect_env) get_mac_version() (in module vllm.collect_env) get_manager_for_kv_cache_spec() (in module vllm.v1.core.single_type_kv_cache_manager) get_masked_input_and_mask() (in module vllm.model_executor.layers.vocab_parallel_embedding) get_match_index (vllm.multimodal.processing.PromptIndex attribute) get_max_audio_chunks_with_most_features() (vllm.model_executor.models.minicpmo.MiniCPMOProcessingInfo method) get_max_audio_len() (vllm.model_executor.models.granite_speech.GraniteSpeechMultiModalProcessingInfo method) get_max_audio_tokens() (vllm.model_executor.models.granite_speech.GraniteSpeechMultiModalProcessingInfo method) (vllm.model_executor.models.minicpmo.MiniCPMOProcessingInfo method) get_max_audio_tokens_per_chunk() (vllm.model_executor.models.minicpmo.MiniCPMOProcessingInfo method) get_max_block_per_batch() (vllm.worker.model_runner.GPUModelRunnerBase method) get_max_image_tokens() (vllm.model_executor.models.llava.BaseLlavaProcessingInfo method) (vllm.model_executor.models.minicpmv.MiniCPMVProcessingInfo method) (vllm.model_executor.models.qwen2_vl.Qwen2VLProcessingInfo method) get_max_input_len() (vllm.transformers_utils.tokenizer_group.TokenizerGroup method) get_max_multimodal_tokens() (vllm.multimodal.registry.MultiModalRegistry method) get_max_num_emitted_tokens() (vllm.spec_decode.metrics.AsyncMetricsCollector static method) get_max_num_running_seqs() (vllm.sequence.SequenceGroup method) get_max_num_tiles() (vllm.model_executor.models.mllama4.Mllama4ProcessingInfo method) get_max_shared_memory_bytes() (in module vllm.utils) get_max_shared_memory_per_block_device_attribute() (in module vllm._custom_ops) get_max_tokens_by_modality() (vllm.multimodal.registry.MultiModalRegistry method) get_max_tokens_per_item_by_modality() (vllm.multimodal.registry.MultiModalRegistry method) get_max_tokens_per_item_by_nonzero_modality() (vllm.multimodal.registry.MultiModalRegistry method) get_max_video_frame_tokens() (vllm.model_executor.models.minicpmv.MiniCPMVProcessingInfo method) get_max_video_frames() (vllm.model_executor.models.minicpmv.MiniCPMVProcessingInfo method) get_max_video_tokens() (vllm.model_executor.models.llava_onevision.LlavaOnevisionProcessingInfo method) (vllm.model_executor.models.minicpmv.MiniCPMVProcessingInfo method) (vllm.model_executor.models.qwen2_vl.Qwen2VLProcessingInfo method) GET_META_MSG (in module vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector) get_metadata() (vllm.distributed.device_communicators.shm_broadcast.ShmRingBuffer method) get_metadata_cls() (vllm.attention.backends.abstract.AttentionBackend static method) (vllm.attention.backends.blocksparse_attn.BlocksparseFlashAttentionBackend static method) (vllm.attention.backends.cpu_mla.CPUMLABackend static method) (vllm.attention.backends.dual_chunk_flash_attn.DualChunkFlashAttentionBackend static method) (vllm.attention.backends.flash_attn.FlashAttentionBackend static method) (vllm.attention.backends.flashinfer.FlashInferBackend static method) (vllm.attention.backends.flashmla.FlashMLABackend static method) (vllm.attention.backends.hpu_attn.HPUAttentionBackend static method) (vllm.attention.backends.ipex_attn.IpexAttnBackend static method) (vllm.attention.backends.mla.common.MLACommonBackend static method) (vllm.attention.backends.pallas.PallasAttentionBackend static method) (vllm.attention.backends.placeholder_attn.PlaceholderAttentionBackend static method) (vllm.attention.backends.rocm_aiter_mla.AiterMLABackend static method) (vllm.attention.backends.rocm_flash_attn.ROCmFlashAttentionBackend static method) (vllm.attention.backends.torch_sdpa.TorchSDPABackend static method) (vllm.attention.backends.xformers.XFormersBackend static method) (vllm.v1.attention.backends.flash_attn.FlashAttentionBackend static method) (vllm.v1.attention.backends.flashinfer.FlashInferBackend static method) (vllm.v1.attention.backends.mla.common.MLACommonBackend static method) (vllm.v1.attention.backends.mla.flashmla.FlashMLABackend static method) (vllm.v1.attention.backends.mla.rocm_aiter_mla.AiterMLABackend static method) (vllm.v1.attention.backends.pallas.PallasAttentionBackend static method) (vllm.v1.attention.backends.triton_attn.TritonAttentionBackend static method) get_min_capability() (vllm.model_executor.layers.quantization.aqlm.AQLMConfig class method) (vllm.model_executor.layers.quantization.awq.AWQConfig class method) (vllm.model_executor.layers.quantization.awq_marlin.AWQMarlinConfig class method) (vllm.model_executor.layers.quantization.base_config.QuantizationConfig class method) (vllm.model_executor.layers.quantization.bitblas.BitBLASConfig class method) (vllm.model_executor.layers.quantization.bitsandbytes.BitsAndBytesConfig class method) (vllm.model_executor.layers.quantization.compressed_tensors.compressed_tensors.CompressedTensorsConfig class method) (vllm.model_executor.layers.quantization.compressed_tensors.schemes.compressed_tensors_24.CompressedTensors24 class method) (vllm.model_executor.layers.quantization.compressed_tensors.schemes.compressed_tensors_scheme.CompressedTensorsScheme class method) (vllm.model_executor.layers.quantization.compressed_tensors.schemes.compressed_tensors_w4a16_24.CompressedTensorsW4A16Sparse24 class method) (vllm.model_executor.layers.quantization.compressed_tensors.schemes.compressed_tensors_w4a16_nvfp4.CompressedTensorsW4A16Fp4 class method) (vllm.model_executor.layers.quantization.compressed_tensors.schemes.compressed_tensors_w8a16_fp8.CompressedTensorsW8A16Fp8 class method) (vllm.model_executor.layers.quantization.compressed_tensors.schemes.compressed_tensors_w8a8_fp8.CompressedTensorsW8A8Fp8 class method) (vllm.model_executor.layers.quantization.compressed_tensors.schemes.compressed_tensors_w8a8_int8.CompressedTensorsW8A8Int8 class method) (vllm.model_executor.layers.quantization.compressed_tensors.schemes.compressed_tensors_wNa16.CompressedTensorsWNA16 class method) (vllm.model_executor.layers.quantization.deepspeedfp.DeepSpeedFPConfig class method) (vllm.model_executor.layers.quantization.experts_int8.ExpertsInt8Config class method) (vllm.model_executor.layers.quantization.fbgemm_fp8.FBGEMMFp8Config class method) (vllm.model_executor.layers.quantization.fp8.Fp8Config class method) (vllm.model_executor.layers.quantization.gguf.GGUFConfig class method) (vllm.model_executor.layers.quantization.gptq.GPTQConfig class method) (vllm.model_executor.layers.quantization.gptq_bitblas.GPTQBitBLASConfig class method) (vllm.model_executor.layers.quantization.gptq_marlin.GPTQMarlinConfig class method) (vllm.model_executor.layers.quantization.gptq_marlin_24.GPTQMarlin24Config class method) (vllm.model_executor.layers.quantization.hqq_marlin.HQQMarlinConfig class method) (vllm.model_executor.layers.quantization.ipex_quant.IPEXConfig class method) (vllm.model_executor.layers.quantization.kernels.mixed_precision.allspark.AllSparkLinearKernel class method) (vllm.model_executor.layers.quantization.kernels.mixed_precision.bitblas.BitBLASLinearKernel class method) (vllm.model_executor.layers.quantization.kernels.mixed_precision.exllama.ExllamaLinearKernel class method) (vllm.model_executor.layers.quantization.kernels.mixed_precision.machete.MacheteLinearKernel class method) (vllm.model_executor.layers.quantization.kernels.mixed_precision.marlin.MarlinLinearKernel class method) (vllm.model_executor.layers.quantization.kernels.mixed_precision.MPLinearKernel.MPLinearKernel class method) (vllm.model_executor.layers.quantization.kernels.scaled_mm.aiter.AiterScaledMMLinearKernel class method) (vllm.model_executor.layers.quantization.kernels.scaled_mm.cutlass.CutlassScaledMMLinearKernel class method) (vllm.model_executor.layers.quantization.kernels.scaled_mm.ScaledMMLinearKernel.ScaledMMLinearKernel class method) (vllm.model_executor.layers.quantization.kernels.scaled_mm.triton.TritonScaledMMLinearKernel class method) (vllm.model_executor.layers.quantization.kernels.scaled_mm.xla.XLAScaledMMLinearKernel class method) (vllm.model_executor.layers.quantization.marlin.MarlinConfig class method) (vllm.model_executor.layers.quantization.modelopt.ModelOptFp8Config class method) (vllm.model_executor.layers.quantization.modelopt.ModelOptNvFp4Config class method) (vllm.model_executor.layers.quantization.moe_wna16.MoeWNA16Config class method) (vllm.model_executor.layers.quantization.neuron_quant.NeuronQuantConfig class method) (vllm.model_executor.layers.quantization.qqq.QQQConfig class method) (vllm.model_executor.layers.quantization.quark.quark.QuarkConfig class method) (vllm.model_executor.layers.quantization.quark.schemes.quark_scheme.QuarkScheme class method) (vllm.model_executor.layers.quantization.quark.schemes.quark_w4a4_mxfp4.QuarkW4A4MXFP4 class method) (vllm.model_executor.layers.quantization.quark.schemes.quark_w8a8_fp8.QuarkW8A8Fp8 class method) (vllm.model_executor.layers.quantization.quark.schemes.quark_w8a8_int8.QuarkW8A8Int8 class method) (vllm.model_executor.layers.quantization.torchao.TorchAOConfig class method) (vllm.model_executor.layers.quantization.tpu_int8.Int8TpuConfig class method) get_min_page_size() (vllm.v1.attention.backends.pallas.PallasAttentionBackend static method) get_min_sliding_window() (in module vllm.config) get_mla_dims() (in module vllm.attention.backends.utils) get_mla_metadata() (in module vllm.attention.ops.flashmla) get_mm_config() (vllm.inputs.registry.InputContext method) get_mm_limits() (vllm.multimodal.profiling.MultiModalProfiler method) get_mm_limits_per_prompt() (vllm.multimodal.registry.MultiModalRegistry method) get_mm_mapping() (vllm.model_executor.models.gemma3_mm.Gemma3ForConditionalGeneration method) (vllm.model_executor.models.glm4v.GLM4VForCausalLM method) (vllm.model_executor.models.granite_speech.GraniteSpeechForConditionalGeneration method) (vllm.model_executor.models.idefics3.Idefics3ForConditionalGeneration method) (vllm.model_executor.models.minicpmv.MiniCPMVBaseModel method) (vllm.model_executor.models.mistral3.Mistral3ForConditionalGeneration method) (vllm.model_executor.models.mllama.MllamaForConditionalGeneration method) (vllm.model_executor.models.molmo.MolmoForCausalLM method) (vllm.model_executor.models.phi4mm.Phi4MMForCausalLM method) (vllm.model_executor.models.qwen2_5_vl.Qwen2_5_VLForConditionalGeneration method) (vllm.model_executor.models.qwen2_vl.Qwen2VLForConditionalGeneration method) (vllm.model_executor.models.qwen_vl.QwenVLForConditionalGeneration method) (vllm.model_executor.models.ultravox.UltravoxModel method) get_mm_max_tokens() (vllm.multimodal.profiling.MultiModalProfiler method) get_model() (in module vllm.model_executor.model_loader) (vllm.spec_decode.ngram_worker.NGramWorker method) (vllm.spec_decode.smaller_tp_proposer_worker.SmallerTpProposerWorker method) (vllm.spec_decode.spec_decode_worker.SpecDecodeWorker method) (vllm.v1.worker.gpu_model_runner.GPUModelRunner method) (vllm.v1.worker.gpu_worker.Worker method) (vllm.v1.worker.tpu_model_runner.TPUModelRunner method) (vllm.v1.worker.tpu_worker.TPUWorker method) (vllm.worker.cpu_model_runner.CPUModelRunnerBase method) (vllm.worker.hpu_model_runner.HPUModelRunnerBase method) (vllm.worker.model_runner.GPUModelRunnerBase method) (vllm.worker.model_runner_base.ModelRunnerBase method) (vllm.worker.neuron_model_runner.NeuronModelRunner method) (vllm.worker.tpu_model_runner.TPUModelRunner method) (vllm.worker.worker_base.DelegateWorkerBase method) (vllm.worker.worker_base.LocalOrDistributedWorkerBase method) (vllm.worker.worker_base.WorkerBase method) (vllm.worker.xpu_model_runner.XPUModelRunner method) get_model_architecture() (in module vllm.model_executor.model_loader.utils) get_model_args() (vllm.distributed.kv_transfer.kv_connector.utils.model_aware_kv_ops_helper method) get_model_config() (vllm.engine.async_llm_engine.AsyncLLMEngine method) (vllm.engine.llm_engine.LLMEngine method) (vllm.engine.multiprocessing.client.MQLLMEngineClient method) (vllm.engine.protocol.EngineClient method) (vllm.v1.engine.async_llm.AsyncLLM method) (vllm.v1.engine.llm_engine.LLMEngine method) get_model_loader() (in module vllm.model_executor.model_loader) get_model_version() (vllm.model_executor.models.minicpmv.MiniCPMVProcessingInfo method) get_moe_configs() (in module vllm.model_executor.layers.fused_moe.fused_moe) get_moe_method() (vllm.model_executor.layers.quantization.compressed_tensors.compressed_tensors_moe.CompressedTensorsMoEMethod static method) (vllm.model_executor.layers.quantization.quark.quark_moe.QuarkMoEMethod static method) get_moe_wna16_block_config() (in module vllm.model_executor.layers.fused_moe.fused_moe) get_mp_context() (in module vllm.utils) get_multi_modal_data_neuron() (vllm.worker.neuronx_distributed_model_runner.NeuronxDistributedModelRunner method) get_multimodal_config() (vllm.config.ModelConfig method) get_multimodal_embeddings() (vllm.model_executor.models.aria.AriaForConditionalGeneration method) (vllm.model_executor.models.aya_vision.AyaVisionForConditionalGeneration method) (vllm.model_executor.models.blip2.Blip2ForConditionalGeneration method) (vllm.model_executor.models.chameleon.ChameleonForConditionalGeneration method) (vllm.model_executor.models.deepseek_vl2.DeepseekVLV2ForCausalLM method) (vllm.model_executor.models.florence2.Florence2ForConditionalGeneration method) (vllm.model_executor.models.fuyu.FuyuForCausalLM method) (vllm.model_executor.models.gemma3_mm.Gemma3ForConditionalGeneration method) (vllm.model_executor.models.glm4v.GLM4VForCausalLM method) (vllm.model_executor.models.granite_speech.GraniteSpeechForConditionalGeneration method) (vllm.model_executor.models.idefics3.Idefics3ForConditionalGeneration method) (vllm.model_executor.models.interfaces.SupportsMultiModal method) (vllm.model_executor.models.internvl.InternVLChatModel method) (vllm.model_executor.models.kimi_vl.KimiVLForConditionalGeneration method) (vllm.model_executor.models.llava.LlavaForConditionalGeneration method) (vllm.model_executor.models.llava_next.LlavaNextForConditionalGeneration method) (vllm.model_executor.models.llava_next_video.LlavaNextVideoForConditionalGeneration method) (vllm.model_executor.models.llava_onevision.LlavaOnevisionForConditionalGeneration method) (vllm.model_executor.models.minicpmv.MiniCPMVBaseModel method) (vllm.model_executor.models.minimax_vl_01.MiniMaxVL01ForConditionalGeneration method) (vllm.model_executor.models.mistral3.Mistral3ForConditionalGeneration method) (vllm.model_executor.models.mllama4.Llama4ForConditionalGeneration method) (vllm.model_executor.models.molmo.MolmoForCausalLM method) (vllm.model_executor.models.ovis.Ovis method) (vllm.model_executor.models.paligemma.PaliGemmaForConditionalGeneration method) (vllm.model_executor.models.phi3v.Phi3VForCausalLM method) (vllm.model_executor.models.phi4mm.Phi4MMForCausalLM method) (vllm.model_executor.models.pixtral.PixtralForConditionalGeneration method) (vllm.model_executor.models.qwen2_5_omni_thinker.Qwen2_5OmniThinkerForConditionalGeneration method) (vllm.model_executor.models.qwen2_5_vl.Qwen2_5_VLForConditionalGeneration method) (vllm.model_executor.models.qwen2_audio.Qwen2AudioForConditionalGeneration method) (vllm.model_executor.models.qwen2_vl.Qwen2VLForConditionalGeneration method) (vllm.model_executor.models.qwen_vl.QwenVLForConditionalGeneration method) (vllm.model_executor.models.skyworkr1v.SkyworkR1VChatModel method) (vllm.model_executor.models.ultravox.UltravoxModel method) (vllm.model_executor.models.whisper.WhisperForConditionalGeneration method) (vllm.v1.worker.tpu_model_runner.TPUModelRunner method) get_multimodal_embeddings_v0() (vllm.model_executor.models.qwen2_5_omni_thinker.Qwen2_5OmniThinkerForConditionalGeneration method) get_name() (vllm.attention.backends.abstract.AttentionBackend static method) (vllm.attention.backends.blocksparse_attn.BlocksparseFlashAttentionBackend static method) (vllm.attention.backends.cpu_mla.CPUMLABackend static method) (vllm.attention.backends.dual_chunk_flash_attn.DualChunkFlashAttentionBackend static method) (vllm.attention.backends.flash_attn.FlashAttentionBackend static method) (vllm.attention.backends.flashinfer.FlashInferBackend static method) (vllm.attention.backends.flashmla.FlashMLABackend static method) (vllm.attention.backends.hpu_attn.HPUAttentionBackend static method) (vllm.attention.backends.ipex_attn.IpexAttnBackend static method) (vllm.attention.backends.mla.common.MLACommonBackend static method) (vllm.attention.backends.pallas.PallasAttentionBackend static method) (vllm.attention.backends.placeholder_attn.PlaceholderAttentionBackend static method) (vllm.attention.backends.rocm_aiter_mla.AiterMLABackend static method) (vllm.attention.backends.rocm_flash_attn.ROCmFlashAttentionBackend static method) (vllm.attention.backends.torch_sdpa.TorchSDPABackend static method) (vllm.attention.backends.triton_mla.TritonMLABackend static method) (vllm.attention.backends.xformers.XFormersBackend static method) (vllm.model_executor.layers.quantization.aqlm.AQLMConfig class method) (vllm.model_executor.layers.quantization.awq.AWQConfig method) (vllm.model_executor.layers.quantization.awq_marlin.AWQMarlinConfig class method) (vllm.model_executor.layers.quantization.base_config.QuantizationConfig method) (vllm.model_executor.layers.quantization.bitblas.BitBLASConfig class method) (vllm.model_executor.layers.quantization.bitsandbytes.BitsAndBytesConfig class method) (vllm.model_executor.layers.quantization.compressed_tensors.compressed_tensors.CompressedTensorsConfig method) (vllm.model_executor.layers.quantization.deepspeedfp.DeepSpeedFPConfig class method) (vllm.model_executor.layers.quantization.experts_int8.ExpertsInt8Config class method) (vllm.model_executor.layers.quantization.fbgemm_fp8.FBGEMMFp8Config class method) (vllm.model_executor.layers.quantization.fp8.Fp8Config class method) (vllm.model_executor.layers.quantization.gguf.GGUFConfig method) (vllm.model_executor.layers.quantization.gptq.GPTQConfig class method) (vllm.model_executor.layers.quantization.gptq_bitblas.GPTQBitBLASConfig class method) (vllm.model_executor.layers.quantization.gptq_marlin.GPTQMarlinConfig class method) (vllm.model_executor.layers.quantization.gptq_marlin_24.GPTQMarlin24Config class method) (vllm.model_executor.layers.quantization.hqq_marlin.HQQMarlinConfig class method) (vllm.model_executor.layers.quantization.ipex_quant.IPEXConfig class method) (vllm.model_executor.layers.quantization.marlin.MarlinConfig class method) (vllm.model_executor.layers.quantization.modelopt.ModelOptFp8Config class method) (vllm.model_executor.layers.quantization.modelopt.ModelOptNvFp4Config class method) (vllm.model_executor.layers.quantization.moe_wna16.MoeWNA16Config class method) (vllm.model_executor.layers.quantization.neuron_quant.NeuronQuantConfig method) (vllm.model_executor.layers.quantization.ptpc_fp8.PTPCFp8Config class method) (vllm.model_executor.layers.quantization.qqq.QQQConfig class method) (vllm.model_executor.layers.quantization.quark.quark.QuarkConfig method) (vllm.model_executor.layers.quantization.torchao.TorchAOConfig method) (vllm.model_executor.layers.quantization.tpu_int8.Int8TpuConfig method) (vllm.v1.attention.backends.flash_attn.FlashAttentionBackend static method) (vllm.v1.attention.backends.flashinfer.FlashInferBackend static method) (vllm.v1.attention.backends.mla.common.MLACommonBackend static method) (vllm.v1.attention.backends.mla.flashmla.FlashMLABackend static method) (vllm.v1.attention.backends.mla.rocm_aiter_mla.AiterMLABackend static method) (vllm.v1.attention.backends.mla.triton_mla.TritonMLABackend static method) (vllm.v1.attention.backends.pallas.PallasAttentionBackend static method) (vllm.v1.attention.backends.triton_attn.TritonAttentionBackend static method) get_navit_vision_model() (in module vllm.model_executor.models.phi4mm) get_neuron_eagle_speculation_model() (in module vllm.model_executor.model_loader.neuron) get_neuron_framework_to_use() (vllm.platforms.neuron.NeuronPlatform method) get_neuron_model() (in module vllm.model_executor.model_loader.neuron) (in module vllm.model_executor.model_loader.neuronx_distributed) get_neuron_sdk_version() (in module vllm.collect_env) get_neuron_speculation_model() (in module vllm.model_executor.model_loader.neuron) (in module vllm.model_executor.model_loader.neuronx_distributed) get_neuronx_distributed_model_runner() (vllm.worker.neuron_worker.NeuronWorker method) get_new_and_aborted_requests() (vllm.engine.async_llm_engine.RequestTracker method) get_new_blocks() (vllm.v1.core.block_pool.BlockPool method) get_next_dp_init_port() (vllm.config.ParallelConfig method) get_next_input_positions() (vllm.model_executor.layers.rotary_embedding.MRotaryEmbedding static method) get_next_input_positions_tensor() (vllm.model_executor.layers.rotary_embedding.MRotaryEmbedding static method) get_next_output_text() (vllm.v1.engine.detokenizer.BaseIncrementalDetokenizer method) (vllm.v1.engine.detokenizer.IncrementalDetokenizer method) get_nowait() (vllm.v1.engine.output_processor.RequestOutputCollector method) get_num_attention_heads() (vllm.config.ModelConfig method) get_num_audio_tokens() (vllm.model_executor.models.whisper.WhisperProcessingInfo method) get_num_blocks_to_allocate() (vllm.v1.core.single_type_kv_cache_manager.SingleTypeKVCacheManager method) get_num_blocks_touched_by_append_slots() (vllm.core.block.block_table.BlockTable method) get_num_cached_tokens() (vllm.core.block.prefix_caching_block.ComputedBlocksTracker method) (vllm.core.block_manager.SelfAttnBlockSpaceManager method) (vllm.core.interfaces.BlockSpaceManager method) (vllm.core.placeholder_block_space_manager.PlaceholderBlockSpaceManager method) (vllm.sequence.SequenceData method) get_num_common_prefix_blocks() (vllm.v1.core.kv_cache_manager.KVCacheManager method) (vllm.v1.core.single_type_kv_cache_manager.FullAttentionManager method) (vllm.v1.core.single_type_kv_cache_manager.SingleTypeKVCacheManager method) (vllm.v1.core.single_type_kv_cache_manager.SlidingWindowManager method) get_num_computed_tokens() (vllm.sequence.Sequence method) (vllm.sequence.SequenceData method) get_num_crops() (vllm.model_executor.models.gemma3_mm.Gemma3ProcessingInfo method) get_num_embeds() (vllm.multimodal.inputs.PlaceholderRange method) get_num_encoder_tokens() (vllm.v1.request.Request method) get_num_frames() (vllm.model_executor.models.minicpmv.MiniCPMVVideoEmbeddingItems method) (vllm.multimodal.parse.VideoProcessorItems method) get_num_frames_with_most_features() (vllm.model_executor.models.llava_next_video.LlavaNextVideoProcessingInfo method) (vllm.model_executor.models.llava_onevision.LlavaOnevisionProcessingInfo method) (vllm.model_executor.models.minicpmo.MiniCPMOProcessingInfo method) (vllm.model_executor.models.minicpmv.MiniCPMVProcessingInfo method) (vllm.model_executor.models.qwen2_vl.Qwen2VLProcessingInfo method) get_num_free_blocks() (vllm.core.block.cpu_gpu_block_allocator.CpuGpuBlockAllocator method) (vllm.core.block.interfaces.BlockAllocator method) (vllm.core.block.interfaces.DeviceAwareBlockAllocator method) (vllm.core.block.naive_block.NaiveBlockAllocator method) (vllm.core.block.prefix_caching_block.PrefixCachingBlockAllocator method) (vllm.v1.core.block_pool.BlockPool method) get_num_free_cpu_blocks() (vllm.core.block_manager.SelfAttnBlockSpaceManager method) (vllm.core.interfaces.BlockSpaceManager method) (vllm.core.placeholder_block_space_manager.PlaceholderBlockSpaceManager method) get_num_free_gpu_blocks() (vllm.core.block_manager.SelfAttnBlockSpaceManager method) (vllm.core.interfaces.BlockSpaceManager method) (vllm.core.placeholder_block_space_manager.PlaceholderBlockSpaceManager method) get_num_full_blocks_touched() (vllm.core.block.cpu_gpu_block_allocator.CpuGpuBlockAllocator method) (vllm.core.block.interfaces.BlockAllocator method) (vllm.core.block.interfaces.DeviceAwareBlockAllocator method) (vllm.core.block.naive_block.NaiveBlockAllocator method) (vllm.core.block.prefix_caching_block.PrefixCachingBlockAllocator method) get_num_image_feature_tokens() (vllm.model_executor.models.glm4v.GLM4VProcessingInfo method) get_num_image_tokens() (vllm.model_executor.models.aria.AriaProcessingInfo method) (vllm.model_executor.models.blip2.Blip2ProcessingInfo method) (vllm.model_executor.models.chameleon.ChameleonProcessingInfo method) (vllm.model_executor.models.clip.CLIPEncoderInfo method) (vllm.model_executor.models.deepseek_vl2.DeepseekVL2ProcessingInfo method) (vllm.model_executor.models.florence2.Florence2ProcessingInfo method) (vllm.model_executor.models.fuyu.FuyuProcessingInfo method) (vllm.model_executor.models.gemma3_mm.Gemma3ProcessingInfo method) (vllm.model_executor.models.glm4v.GLM4VProcessingInfo method) (vllm.model_executor.models.h2ovl.H2OVLProcessingInfo method) (vllm.model_executor.models.h2ovl.H2OVLProcessor method) (vllm.model_executor.models.idefics3.Idefics3ProcessingInfo method) (vllm.model_executor.models.internvl.BaseInternVLProcessingInfo method) (vllm.model_executor.models.internvl.BaseInternVLProcessor method) (vllm.model_executor.models.kimi_vl.KimiVLProcessingInfo method) (vllm.model_executor.models.llava.BaseLlavaProcessingInfo method) (vllm.model_executor.models.llava_next.LlavaNextProcessingInfo method) (vllm.model_executor.models.minicpmv.MiniCPMVProcessingInfo method) (vllm.model_executor.models.mistral3.BaseLlavaProcessingInfo method) (vllm.model_executor.models.molmo.MolmoProcessingInfo method) (vllm.model_executor.models.paligemma.PaliGemmaProcessingInfo method) (vllm.model_executor.models.phi3v.Phi3VProcessingInfo method) (vllm.model_executor.models.phi4mm.Phi4MMProcessingInfo method) (vllm.model_executor.models.pixtral.PixtralHFEncoderInfo method) (vllm.model_executor.models.pixtral.PixtralProcessingInfo method) (vllm.model_executor.models.qwen2_vl.Qwen2VLProcessingInfo method) (vllm.model_executor.models.qwen_vl.QwenVLProcessingInfo method) (vllm.model_executor.models.siglip.SiglipEncoderInfo method) (vllm.model_executor.models.skyworkr1v.BaseSkyworkR1VProcessingInfo method) (vllm.model_executor.models.skyworkr1v.BaseSkyworkR1VProcessor method) (vllm.model_executor.models.vision.VisionEncoderInfo method) get_num_kv_heads() (vllm.config.ModelConfig method) get_num_layers() (vllm.config.ModelConfig method) get_num_layers_by_block_type() (vllm.config.ModelConfig method) get_num_new_matched_tokens() (vllm.distributed.kv_transfer.kv_connector.v1.base.KVConnectorBase_V1 method) (vllm.distributed.kv_transfer.kv_connector.v1.lmcache_connector.LMCacheConnectorV1 method) (vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector.NixlConnector method) (vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector.NixlConnectorScheduler method) (vllm.distributed.kv_transfer.kv_connector.v1.shared_storage_connector.SharedStorageConnector method) get_num_new_tokens() (vllm.sequence.Sequence method) get_num_nodes_in_placement_group() (in module vllm.executor.ray_utils) get_num_patches() (in module vllm.model_executor.models.molmo) (vllm.model_executor.models.aya_vision.AyaVisionProcessingInfo method) (vllm.model_executor.models.idefics3.Idefics3ProcessingInfo method) get_num_prefill_decode_query_kv_tokens() (in module vllm.attention.backends.utils) get_num_required_blocks() (vllm.core.block.block_table.BlockTable static method) get_num_tiles_per_image() (vllm.model_executor.models.mllama.MllamaProcessingInfo method) get_num_total_blocks() (vllm.core.block.cpu_gpu_block_allocator.CpuGpuBlockAllocator method) (vllm.core.block.interfaces.BlockAllocator method) (vllm.core.block.interfaces.DeviceAwareBlockAllocator method) (vllm.core.block.naive_block.NaiveBlockAllocator method) (vllm.core.block.prefix_caching_block.PrefixCachingBlockAllocator method) get_num_tpu_nodes() (in module vllm.executor.ray_utils) get_num_uncomputed_tokens() (vllm.sequence.SequenceData method) (vllm.sequence.SequenceGroup method) get_num_unfinished_requests() (vllm.engine.llm_engine.LLMEngine method) (vllm.v1.core.sched.interface.SchedulerInterface method) (vllm.v1.core.sched.scheduler.Scheduler method) (vllm.v1.engine.llm_engine.LLMEngine method) (vllm.v1.engine.output_processor.OutputProcessor method) get_num_unfinished_seq_groups() (vllm.core.scheduler.Scheduler method) get_num_video_tokens() (vllm.model_executor.models.llava_next_video.LlavaNextVideoProcessingInfo method) (vllm.model_executor.models.llava_onevision.LlavaOnevisionProcessingInfo method) (vllm.model_executor.models.qwen2_vl.Qwen2VLProcessingInfo method) get_numpy_array() (vllm.v1.worker.block_table.BlockTable method) get_nvidia_driver_version() (in module vllm.collect_env) get_nvidia_smi() (in module vllm.collect_env) get_nxd_sampling_params() (vllm.worker.neuronx_distributed_model_runner.NeuronxDistributedModelRunner method) get_object() (vllm.utils.PyObjectCache method) get_offset() (in module vllm.model_executor.models.phi4mm_utils) (vllm.model_executor.models.phi4mm_audio.TransformerEncoderBase method) get_open_port() (in module vllm.utils) get_open_zmq_inproc_path() (in module vllm.utils) get_open_zmq_ipc_path() (in module vllm.utils) get_os() (in module vllm.collect_env) get_outlines_guided_decoding_logits_processor() (in module vllm.model_executor.guided_decoding.outlines_decoding) get_output() (vllm.v1.engine.core_client.EngineCoreClient method) (vllm.v1.engine.core_client.InprocClient method) (vllm.v1.engine.core_client.SyncMPClient method) get_output_async() (vllm.v1.engine.core_client.AsyncMPClient method) (vllm.v1.engine.core_client.EngineCoreClient method) get_output_embeddings() (vllm.model_executor.models.phi3_small.Phi3SmallForCausalLM method) get_output_len() (vllm.sequence.Sequence method) (vllm.sequence.SequenceData method) get_output_text_to_return() (vllm.sequence.Sequence method) get_output_token_ids() (vllm.sequence.Sequence method) (vllm.sequence.SequenceData method) get_output_token_ids_to_return() (vllm.sequence.Sequence method) get_outputs() (vllm.v1.engine.parallel_sampling.ParentRequest method) get_pack_factor() (in module vllm.model_executor.layers.quantization.utils.quant_utils) get_page_size() (vllm.v1.attention.backends.pallas.PallasAttentionBackend static method) get_parallel_config() (vllm.engine.async_llm_engine.AsyncLLMEngine method) (vllm.engine.llm_engine.LLMEngine method) get_pass_context() (in module vllm.compilation.inductor_pass) get_passthrough_data() (vllm.multimodal.parse.DictEmbeddingItems method) (vllm.multimodal.parse.EmbeddingItems method) (vllm.multimodal.parse.ModalityDataItems method) (vllm.multimodal.parse.ProcessorBatchItems method) get_patch_grid_length() (vllm.model_executor.models.clip.CLIPEncoderInfo method) (vllm.model_executor.models.pixtral.PixtralHFEncoderInfo method) (vllm.model_executor.models.siglip.SiglipEncoderInfo method) (vllm.model_executor.models.vision.VisionEncoderInfo method) get_patch_grid_size() (vllm.model_executor.models.pixtral.PixtralHFEncoderInfo method) get_patch_per_chunk() (vllm.model_executor.models.mllama4.Mllama4ProcessingInfo static method) get_patch_size() (vllm.model_executor.models.clip.CLIPEncoderInfo method) (vllm.model_executor.models.pixtral.PixtralHFEncoderInfo method) (vllm.model_executor.models.siglip.SiglipEncoderInfo method) (vllm.model_executor.models.vision.VisionEncoderInfo method) get_patches_grid_size() (in module vllm.model_executor.models.molmo) (vllm.model_executor.models.molmo.MolmoProcessorWrapper method) get_per_layer_parameters() (in module vllm.attention.backends.flashinfer) (in module vllm.v1.attention.backends.flashinfer) get_physical_block_id() (vllm.core.block.cpu_gpu_block_allocator.CpuGpuBlockAllocator method) (vllm.core.block.interfaces.BlockAllocator method) (vllm.core.block.interfaces.DeviceAwareBlockAllocator method) (vllm.core.block.naive_block.NaiveBlockAllocator method) (vllm.core.block.prefix_caching_block.PrefixCachingBlockAllocator method) get_pip_packages() (in module vllm.collect_env) get_pipeline_model_parallel_group (in module vllm.distributed.parallel_state) get_platform() (in module vllm.collect_env) get_pluggable_allocator() (in module vllm.device_allocator.cumem) get_pooling_config() (in module vllm.transformers_utils.config) get_pooling_config_name() (in module vllm.transformers_utils.config) get_pp_group() (in module vllm.distributed.parallel_state) get_pp_indices() (in module vllm.distributed.utils) get_pp_missing_layer_names() (in module vllm.model_executor.models.utils) get_prefix_cache_hit_rate() (vllm.core.block.cpu_gpu_block_allocator.CpuGpuBlockAllocator method) (vllm.core.block.interfaces.BlockAllocator method) (vllm.core.block.interfaces.DeviceAwareBlockAllocator method) (vllm.core.block.naive_block.NaiveBlockAllocator method) (vllm.core.block.prefix_caching_block.PrefixCachingBlockAllocator method) (vllm.core.block_manager.SelfAttnBlockSpaceManager method) (vllm.core.interfaces.BlockSpaceManager method) (vllm.core.placeholder_block_space_manager.PlaceholderBlockSpaceManager method) (vllm.core.scheduler.Scheduler method) get_prefix_token_ids() (vllm.sequence.SequenceData method) get_pretty_env_info() (in module vllm.collect_env) get_processor() (in module vllm.transformers_utils.processor) get_processor_data() (vllm.multimodal.parse.DictEmbeddingItems method) (vllm.multimodal.parse.EmbeddingItems method) (vllm.multimodal.parse.ModalityDataItems method) (vllm.multimodal.parse.ProcessorBatchItems method) get_prompt_adapter_id() (in module vllm.prompt_adapter.models) get_prompt_len() (vllm.sequence.Sequence method) (vllm.sequence.SequenceData method) get_prompt_lens() (vllm.model_executor.layers.pooler.SimplePooler method) get_prompt_token_ids() (vllm.sequence.Sequence method) (vllm.sequence.SequenceData method) get_pruned_guards() (vllm.compilation.compiler_interface.AlwaysHitShapeEnv method) get_punica_wrapper() (in module vllm.lora.punica_wrapper.punica_selector) (vllm.platforms.cpu.CpuPlatform class method) (vllm.platforms.cuda.CudaPlatformBase class method) (vllm.platforms.hpu.HpuPlatform class method) (vllm.platforms.interface.Platform class method) (vllm.platforms.rocm.RocmPlatform class method) (vllm.platforms.tpu.TpuPlatform class method) get_python_platform() (in module vllm.collect_env) get_pythonized_sample_results() (in module vllm.model_executor.layers.sampler) get_qqq_scale_perms() (in module vllm.model_executor.layers.quantization.utils.marlin_utils_test_qqq) get_qqq_weight_perm() (in module vllm.model_executor.layers.quantization.utils.marlin_utils_test_qqq) get_quant_config() (in module vllm.model_executor.model_loader.weight_utils) get_quant_method() (vllm.model_executor.layers.quantization.aqlm.AQLMConfig method) (vllm.model_executor.layers.quantization.awq.AWQConfig method) (vllm.model_executor.layers.quantization.awq_marlin.AWQMarlinConfig method) (vllm.model_executor.layers.quantization.base_config.QuantizationConfig method) (vllm.model_executor.layers.quantization.bitblas.BitBLASConfig method) (vllm.model_executor.layers.quantization.bitsandbytes.BitsAndBytesConfig method) (vllm.model_executor.layers.quantization.compressed_tensors.compressed_tensors.CompressedTensorsConfig method) (vllm.model_executor.layers.quantization.deepspeedfp.DeepSpeedFPConfig method) (vllm.model_executor.layers.quantization.experts_int8.ExpertsInt8Config method) (vllm.model_executor.layers.quantization.fbgemm_fp8.FBGEMMFp8Config method) (vllm.model_executor.layers.quantization.fp8.Fp8Config method) (vllm.model_executor.layers.quantization.gguf.GGUFConfig method) (vllm.model_executor.layers.quantization.gptq.GPTQConfig method) (vllm.model_executor.layers.quantization.gptq_bitblas.GPTQBitBLASConfig method) (vllm.model_executor.layers.quantization.gptq_marlin.GPTQMarlinConfig method) (vllm.model_executor.layers.quantization.gptq_marlin_24.GPTQMarlin24Config method) (vllm.model_executor.layers.quantization.hqq_marlin.HQQMarlinConfig method) (vllm.model_executor.layers.quantization.ipex_quant.IPEXConfig method) (vllm.model_executor.layers.quantization.marlin.MarlinConfig method) (vllm.model_executor.layers.quantization.modelopt.ModelOptFp8Config method) (vllm.model_executor.layers.quantization.modelopt.ModelOptNvFp4Config method) (vllm.model_executor.layers.quantization.moe_wna16.MoeWNA16Config method) (vllm.model_executor.layers.quantization.neuron_quant.NeuronQuantConfig method) (vllm.model_executor.layers.quantization.ptpc_fp8.PTPCFp8Config method) (vllm.model_executor.layers.quantization.qqq.QQQConfig method) (vllm.model_executor.layers.quantization.quark.quark.QuarkConfig method) (vllm.model_executor.layers.quantization.torchao.TorchAOConfig method) (vllm.model_executor.layers.quantization.tpu_int8.Int8TpuConfig method) get_quantization_config() (in module vllm.model_executor.layers.quantization) (vllm.config.VllmConfig static method) (vllm.model_executor.layers.quantization.neuron_quant.NeuronQuantConfig method) get_random_lora_request() (vllm.benchmarks.datasets.BenchmarkDataset method) get_rdna_autotune_configs() (in module vllm.attention.ops.triton_flash_attention) get_reasoning_parser() (vllm.reasoning.abs_reasoning_parsers.ReasoningParserManager class method) get_request() (in module vllm.benchmarks.serve) get_requests() (in module vllm.benchmarks.throughput) get_response() (vllm.connections.HTTPConnection method) get_rocm_version() (in module vllm.collect_env) get_rope() (in module vllm.model_executor.layers.rotary_embedding) get_running_cuda_version() (in module vllm.collect_env) get_sampled_token_logprobs() (in module vllm.spec_decode.util) get_sampler() (in module vllm.model_executor.layers.sampler) get_sampling_frames() (vllm.model_executor.models.phi4mm_utils.NemoConvSubsampling method) get_scale_perms() (in module vllm.model_executor.layers.quantization.utils.marlin_utils) get_scale_perms_24() (in module vllm.model_executor.layers.quantization.utils.marlin_utils_test_24) get_scaled_act_names() (vllm.model_executor.layers.quantization.torchao.TorchAOConfig method) get_scheduler_config() (vllm.engine.async_llm_engine.AsyncLLMEngine method) (vllm.engine.llm_engine.LLMEngine method) get_scheme() (vllm.model_executor.layers.quantization.compressed_tensors.compressed_tensors.CompressedTensorsConfig method) (vllm.model_executor.layers.quantization.quark.quark.QuarkConfig method) get_sentence_transformer_tokenizer_config() (in module vllm.transformers_utils.config) get_seq_len_block_table_args() (in module vllm.attention.backends.utils) (vllm.attention.backends.torch_sdpa.TorchSDPAMetadata method) get_seq_lens() (vllm.attention.backends.torch_sdpa.TorchSDPAMetadata method) get_seqlen_agnostic_capture_inputs() (vllm.model_executor.models.bamba.BambaForCausalLM method) (vllm.model_executor.models.constant_size_cache.ConstantSizeCache method) (vllm.model_executor.models.granitemoehybrid.GraniteMoeHybridForCausalLM method) (vllm.model_executor.models.jamba.JambaForCausalLM method) (vllm.model_executor.models.mamba.MambaForCausalLM method) (vllm.model_executor.models.mamba2.Mamba2ForCausalLM method) (vllm.model_executor.models.mamba_cache.MambaCacheManager method) (vllm.model_executor.models.minimax_text_01.MiniMaxText01ForCausalLM method) (vllm.model_executor.models.plamo2.Plamo2ForCausalLM method) (vllm.model_executor.models.zamba2.Zamba2ForCausalLM method) get_seqs() (vllm.sequence.SequenceGroup method) get_served_model_name() (in module vllm.config) get_server_load_metrics() (in module vllm.entrypoints.openai.api_server) get_shape_from_layout() (in module vllm.attention.ops.triton_flash_attention) get_sharded_to_full_mapping() (vllm.model_executor.layers.vocab_parallel_embedding.VocabParallelEmbedding method) get_skyworkr1v_target_ratios() (in module vllm.model_executor.models.skyworkr1v) get_slice_image_placeholder() (vllm.model_executor.models.minicpmv.MiniCPMVProcessingInfo method) get_sliced_grid() (vllm.model_executor.models.minicpmv.MiniCPMVProcessingInfo method) get_sliding_window() (vllm.config.ModelConfig method) get_span_exporter() (in module vllm.tracing) get_sparse_attention_config() (in module vllm.model_executor.model_loader.weight_utils) get_sparse_attn_mask() (in module vllm.attention.ops.blocksparse_attention.utils) get_spec_layer_idx_from_weight_name() (in module vllm.model_executor.models.deepseek_v2) (in module vllm.model_executor.models.kimi_vl) get_spec_proposals() (vllm.spec_decode.interfaces.SpeculativeProposer method) (vllm.spec_decode.medusa_worker.MedusaWorker method) (vllm.spec_decode.multi_step_worker.MultiStepWorker method) (vllm.spec_decode.ngram_worker.NGramWorker method) (vllm.spec_decode.smaller_tp_proposer_worker.SmallerTpProposerWorker method) (vllm.spec_decode.top1_proposer.Top1Proposer method) get_state_cls() (vllm.attention.backends.abstract.AttentionBackend static method) (vllm.attention.backends.blocksparse_attn.BlocksparseFlashAttentionBackend static method) (vllm.attention.backends.cpu_mla.CPUMLABackend static method) (vllm.attention.backends.flash_attn.FlashAttentionBackend static method) (vllm.attention.backends.flashinfer.FlashInferBackend static method) (vllm.attention.backends.flashmla.FlashMLABackend static method) (vllm.attention.backends.hpu_attn.HPUAttentionBackend static method) (vllm.attention.backends.ipex_attn.IpexAttnBackend static method) (vllm.attention.backends.mla.common.MLACommonBackend static method) (vllm.attention.backends.pallas.PallasAttentionBackend static method) (vllm.attention.backends.placeholder_attn.PlaceholderAttentionBackend static method) (vllm.attention.backends.rocm_aiter_mla.AiterMLABackend static method) (vllm.attention.backends.rocm_flash_attn.ROCmFlashAttentionBackend static method) (vllm.attention.backends.torch_sdpa.TorchSDPABackend static method) (vllm.attention.backends.xformers.XFormersBackend static method) (vllm.v1.attention.backends.pallas.PallasAttentionBackend static method) get_stats() (vllm.v1.metrics.stats.LoRARequestStates method) get_streaming_cache_size() (vllm.model_executor.models.phi4mm_utils.NemoConvSubsampling method) get_strides_from_layout() (in module vllm.attention.ops.triton_flash_attention) get_structured_output_key() (in module vllm.v1.structured_output.request) get_sub_grids() (in module vllm.model_executor.models.pixtral) get_sub_modules() (vllm.model_executor.model_loader.utils.ParamMapping method) get_supported_act_dtypes() (vllm.model_executor.layers.quantization.aqlm.AQLMConfig class method) (vllm.model_executor.layers.quantization.awq.AWQConfig method) (vllm.model_executor.layers.quantization.awq_marlin.AWQMarlinConfig class method) (vllm.model_executor.layers.quantization.base_config.QuantizationConfig method) (vllm.model_executor.layers.quantization.bitblas.BitBLASConfig class method) (vllm.model_executor.layers.quantization.bitsandbytes.BitsAndBytesConfig class method) (vllm.model_executor.layers.quantization.compressed_tensors.compressed_tensors.CompressedTensorsConfig method) (vllm.model_executor.layers.quantization.deepspeedfp.DeepSpeedFPConfig class method) (vllm.model_executor.layers.quantization.experts_int8.ExpertsInt8Config class method) (vllm.model_executor.layers.quantization.fbgemm_fp8.FBGEMMFp8Config class method) (vllm.model_executor.layers.quantization.fp8.Fp8Config class method) (vllm.model_executor.layers.quantization.gguf.GGUFConfig method) (vllm.model_executor.layers.quantization.gptq.GPTQConfig class method) (vllm.model_executor.layers.quantization.gptq_bitblas.GPTQBitBLASConfig class method) (vllm.model_executor.layers.quantization.gptq_marlin.GPTQMarlinConfig class method) (vllm.model_executor.layers.quantization.gptq_marlin_24.GPTQMarlin24Config class method) (vllm.model_executor.layers.quantization.hqq_marlin.HQQMarlinConfig class method) (vllm.model_executor.layers.quantization.ipex_quant.IPEXConfig class method) (vllm.model_executor.layers.quantization.marlin.MarlinConfig class method) (vllm.model_executor.layers.quantization.modelopt.ModelOptFp8Config class method) (vllm.model_executor.layers.quantization.modelopt.ModelOptNvFp4Config class method) (vllm.model_executor.layers.quantization.moe_wna16.MoeWNA16Config class method) (vllm.model_executor.layers.quantization.neuron_quant.NeuronQuantConfig method) (vllm.model_executor.layers.quantization.qqq.QQQConfig class method) (vllm.model_executor.layers.quantization.quark.quark.QuarkConfig method) (vllm.model_executor.layers.quantization.torchao.TorchAOConfig method) (vllm.model_executor.layers.quantization.tpu_int8.Int8TpuConfig method) get_supported_head_sizes() (vllm.attention.backends.cpu_mla.CPUMLABackend static method) (vllm.attention.backends.flash_attn.FlashAttentionBackend static method) (vllm.attention.backends.flashinfer.FlashInferBackend static method) (vllm.attention.backends.mla.common.MLACommonBackend static method) (vllm.attention.ops.hpu_paged_attn.HPUPagedAttention static method) (vllm.attention.ops.paged_attn.PagedAttention static method) (vllm.v1.attention.backends.flash_attn.FlashAttentionBackend static method) (vllm.v1.attention.backends.flashinfer.FlashInferBackend static method) (vllm.v1.attention.backends.mla.common.MLACommonBackend static method) (vllm.v1.attention.backends.triton_attn.TritonAttentionBackend static method) get_supported_lora_modules() (in module vllm.lora.utils) get_supported_mm_limits() (vllm.model_executor.models.aria.AriaProcessingInfo method) (vllm.model_executor.models.aya_vision.AyaVisionProcessingInfo method) (vllm.model_executor.models.blip2.Blip2ProcessingInfo method) (vllm.model_executor.models.chameleon.ChameleonProcessingInfo method) (vllm.model_executor.models.deepseek_vl2.DeepseekVL2ProcessingInfo method) (vllm.model_executor.models.florence2.Florence2ProcessingInfo method) (vllm.model_executor.models.fuyu.FuyuProcessingInfo method) (vllm.model_executor.models.gemma3_mm.Gemma3ProcessingInfo method) (vllm.model_executor.models.glm4v.GLM4VProcessingInfo method) (vllm.model_executor.models.granite_speech.GraniteSpeechMultiModalProcessingInfo method) (vllm.model_executor.models.idefics3.Idefics3ProcessingInfo method) (vllm.model_executor.models.internvl.BaseInternVLProcessingInfo method) (vllm.model_executor.models.kimi_vl.KimiVLProcessingInfo method) (vllm.model_executor.models.llava.BaseLlavaProcessingInfo method) (vllm.model_executor.models.llava_next_video.LlavaNextVideoProcessingInfo method) (vllm.model_executor.models.llava_onevision.LlavaOnevisionProcessingInfo method) (vllm.model_executor.models.minicpmo.MiniCPMOProcessingInfo method) (vllm.model_executor.models.minicpmv.MiniCPMVProcessingInfo method) (vllm.model_executor.models.minimax_vl_01.MiniMaxVL01ProcessingInfo method) (vllm.model_executor.models.mistral3.BaseLlavaProcessingInfo method) (vllm.model_executor.models.mllama.MllamaProcessingInfo method) (vllm.model_executor.models.mllama4.Mllama4ProcessingInfo method) (vllm.model_executor.models.molmo.MolmoProcessingInfo method) (vllm.model_executor.models.ovis.OvisProcessingInfo method) (vllm.model_executor.models.paligemma.PaliGemmaProcessingInfo method) (vllm.model_executor.models.phi3v.Phi3VProcessingInfo method) (vllm.model_executor.models.phi4mm.Phi4MMProcessingInfo method) (vllm.model_executor.models.pixtral.PixtralProcessingInfo method) (vllm.model_executor.models.prithvi_geospatial_mae.PrithviGeoSpatialMAEProcessingInfo method) (vllm.model_executor.models.qwen2_5_omni_thinker.Qwen2_5OmniThinkerProcessingInfo method) (vllm.model_executor.models.qwen2_audio.Qwen2AudioProcessingInfo method) (vllm.model_executor.models.qwen2_vl.Qwen2VLProcessingInfo method) (vllm.model_executor.models.qwen_vl.QwenVLProcessingInfo method) (vllm.model_executor.models.skyworkr1v.BaseSkyworkR1VProcessingInfo method) (vllm.model_executor.models.ultravox.UltravoxProcessingInfo method) (vllm.model_executor.models.whisper.WhisperProcessingInfo method) (vllm.multimodal.processing.BaseProcessingInfo method) get_sync_client() (vllm.connections.HTTPConnection method) get_tensor_model_parallel_group (in module vllm.distributed.parallel_state) get_tensor_model_parallel_rank() (in module vllm.distributed.parallel_state) get_tensor_model_parallel_world_size() (in module vllm.distributed.parallel_state) get_text() (vllm.connections.HTTPConnection method) get_throughput() (in module vllm.engine.metrics) get_tnx_model_runner() (vllm.worker.neuron_worker.NeuronWorker method) get_token_bin_counts_and_mask() (in module vllm.model_executor.layers.utils) get_token_embeddings() (vllm.sequence.SequenceData method) get_token_id() (vllm.v1.worker.gpu_input_batch.CachedRequestState method) get_token_ids() (vllm.sequence.Sequence method) (vllm.sequence.SequenceData method) get_token_per_chunk_from_config() (vllm.model_executor.models.mllama.MllamaProcessingInfo method) get_token_value() (vllm.transformers_utils.processors.ovis.OvisProcessor method) get_tokenizer() (in module vllm.transformers_utils.tokenizer) (vllm.engine.async_llm_engine.AsyncLLMEngine method) (vllm.engine.llm_engine.LLMEngine method) (vllm.engine.multiprocessing.client.MQLLMEngineClient method) (vllm.engine.protocol.EngineClient method) (vllm.entrypoints.llm.LLM method) (vllm.model_executor.models.pixtral.PixtralProcessingInfo method) (vllm.model_executor.models.qwen_vl.QwenVLProcessingInfo method) (vllm.multimodal.processing.BaseProcessingInfo method) (vllm.transformers_utils.tokenizer_base.TokenizerRegistry static method) (vllm.v1.engine.async_llm.AsyncLLM method) get_tokenizer_data() (vllm.model_executor.guided_decoding.xgrammar_decoding.TokenizerDataCache class method) get_tokenizer_for_seq() (vllm.transformers_utils.detokenizer.Detokenizer method) get_tokenizer_group() (vllm.engine.llm_engine.LLMEngine method) (vllm.inputs.preprocess.InputPreprocessor method) (vllm.v1.engine.llm_engine.LLMEngine method) get_tool_parser() (vllm.entrypoints.openai.tool_parsers.abstract_tool_parser.ToolParserManager class method) get_total_num_kv_heads() (vllm.config.ModelConfig method) get_tp_group() (in module vllm.distributed.parallel_state) get_type() (in module vllm.engine.arg_utils) get_unhashed_block_ids() (vllm.v1.core.kv_cache_manager.KVCacheBlocks method) get_unseen_token_ids() (vllm.core.block.block_table.BlockTable method) get_usage() (vllm.v1.core.block_pool.BlockPool method) get_version_by_config() (in module vllm.model_executor.models.minicpmv) get_video_frame_size_with_most_features() (vllm.model_executor.models.minicpmv.MiniCPMVProcessingInfo method) get_video_max_slice_num() (vllm.model_executor.models.minicpmv.MiniCPMVProcessingInfo method) get_video_prompt_texts() (vllm.model_executor.models.minicpmv.MiniCPMVMultiModalProcessor method) get_vision_config() (vllm.model_executor.models.aria.AriaProcessingInfo method) (vllm.model_executor.models.pixtral.PixtralProcessingInfo method) get_vision_encoder_info() (in module vllm.model_executor.models.vision) (vllm.model_executor.models.llava.BaseLlavaProcessingInfo method) (vllm.model_executor.models.llava_next_video.LlavaNextVideoProcessingInfo method) (vllm.model_executor.models.mistral3.BaseLlavaProcessingInfo method) (vllm.model_executor.models.paligemma.PaliGemmaProcessingInfo method) get_vision_hidden_states() (vllm.model_executor.models.minicpmv.MiniCPMV2_0 method) (vllm.model_executor.models.minicpmv.MiniCPMV2_5 method) (vllm.model_executor.models.minicpmv.MiniCPMV2_6 method) (vllm.model_executor.models.minicpmv.MiniCPMVBaseModel method) get_vit_attn_backend() (in module vllm.model_executor.models.vision) get_vllm_config() (vllm.engine.async_llm_engine.AsyncLLMEngine method) (vllm.engine.llm_engine.LLMEngine method) (vllm.engine.multiprocessing.client.MQLLMEngineClient method) (vllm.engine.protocol.EngineClient method) (vllm.v1.engine.async_llm.AsyncLLM method) (vllm.v1.engine.llm_engine.LLMEngine method) get_vllm_optional_dependencies() (in module vllm.utils) get_vllm_public_assets() (in module vllm.assets.base) get_vllm_version() (in module vllm.collect_env) get_vocab() (vllm.transformers_utils.tokenizer_base.TokenizerBase method) (vllm.transformers_utils.tokenizers.mistral.MistralTokenizer method) get_vocab_size() (vllm.config.ModelConfig method) get_w8a8_block_fp8_configs() (in module vllm.model_executor.layers.quantization.utils.fp8_utils) get_w8a8_block_int8_configs() (in module vllm.model_executor.layers.quantization.utils.int8_utils) get_weight_loader() (vllm.model_executor.layers.quantization.moe_wna16.MoeWNA16Method static method) get_weight_perm() (in module vllm.model_executor.layers.quantization.utils.marlin_utils_test) get_weight_perm_24() (in module vllm.model_executor.layers.quantization.utils.marlin_utils_test_24) get_window_index() (vllm.model_executor.models.qwen2_5_vl.Qwen2_5_VisionTransformer method) get_windows_version() (in module vllm.collect_env) get_world_group() (in module vllm.distributed.parallel_state) getitem_users() (vllm.compilation.fix_functionalization.FixFunctionalizationPass method) ggml_dequantize() (in module vllm._custom_ops) ggml_moe_a8() (in module vllm._custom_ops) ggml_moe_a8_vec() (in module vllm._custom_ops) ggml_moe_get_block_size() (in module vllm._custom_ops) ggml_mul_mat_a8() (in module vllm._custom_ops) ggml_mul_mat_vec_a8() (in module vllm._custom_ops) GGUF (vllm.config.LoadFormat attribute) gguf_quant_weights_iterator() (in module vllm.model_executor.model_loader.weight_utils) GGUFConfig (class in vllm.model_executor.layers.quantization.gguf) GGUFEmbeddingMethod (class in vllm.model_executor.layers.quantization.gguf) GGUFLinearMethod (class in vllm.model_executor.layers.quantization.gguf) GGUFModelLoader (class in vllm.model_executor.model_loader.gguf_loader) GGUFMoEMethod (class in vllm.model_executor.layers.quantization.gguf) GGUFUninitializedParameter (class in vllm.model_executor.layers.quantization.gguf) GiB_bytes (in module vllm.utils) Glm4Attention (class in vllm.model_executor.models.glm4) Glm4DecoderLayer (class in vllm.model_executor.models.glm4) Glm4ForCausalLM (class in vllm.model_executor.models.glm4) Glm4Model (class in vllm.model_executor.models.glm4) GLM4VDummyInputsBuilder (class in vllm.model_executor.models.glm4v) GLM4VForCausalLM (class in vllm.model_executor.models.glm4v) GLM4VModel (class in vllm.model_executor.models.glm4v) GLM4VMultiModalProcessor (class in vllm.model_executor.models.glm4v) GLM4VProcessingInfo (class in vllm.model_executor.models.glm4v) GLM4VProcessor (class in vllm.model_executor.models.glm4v) GLMAttention (class in vllm.model_executor.models.chatglm) GLMBlock (class in vllm.model_executor.models.chatglm) GlmForCausalLM (class in vllm.model_executor.models.glm) GLMMLP (class in vllm.model_executor.models.chatglm) GLMTransformer (class in vllm.model_executor.models.chatglm) GLMVImagePixelInputs (class in vllm.model_executor.models.glm4v) glob() (in module vllm.transformers_utils.s3_utils) global_force_attn_backend() (in module vllm.attention.selector) global_force_attn_backend_context_manager() (in module vllm.attention.selector) global_graph_pool (in module vllm.compilation.backends) global_http_connection (in module vllm.connections) global_media_connector (in module vllm.multimodal.utils) global_pool (vllm.transformers_utils.configs.deepseek_vl2.VisionEncoderConfig attribute) global_segment_size (vllm.distributed.kv_transfer.kv_lookup_buffer.mooncake_store.MooncakeStoreConfig attribute) global_thread_pool (in module vllm.model_executor.guided_decoding.outlines_decoding) global_view_pos (vllm.transformers_utils.configs.deepseek_vl2.DeepseekVLV2Config attribute) GLU (class in vllm.model_executor.models.phi4mm_utils) GLULinear (class in vllm.model_executor.models.phi4mm_utils) GLUPointWiseConv (class in vllm.model_executor.models.phi4mm_utils) GPT2Attention (class in vllm.model_executor.models.gpt2) GPT2Block (class in vllm.model_executor.models.gpt2) GPT2LMHeadModel (class in vllm.model_executor.models.gpt2) GPT2MLP (class in vllm.model_executor.models.gpt2) GPT2Model (class in vllm.model_executor.models.gpt2) GPTBigCodeAttention (class in vllm.model_executor.models.gpt_bigcode) GPTBigCodeBlock (class in vllm.model_executor.models.gpt_bigcode) GPTBigCodeForCausalLM (class in vllm.model_executor.models.gpt_bigcode) GPTBigCodeModel (class in vllm.model_executor.models.gpt_bigcode) GPTBigMLP (class in vllm.model_executor.models.gpt_bigcode) GPTJAttention (class in vllm.model_executor.models.gpt_j) GPTJBlock (class in vllm.model_executor.models.gpt_j) GPTJForCausalLM (class in vllm.model_executor.models.gpt_j) GPTJMLP (class in vllm.model_executor.models.gpt_j) GPTJModel (class in vllm.model_executor.models.gpt_j) GPTNeoXAttention (class in vllm.model_executor.models.gpt_neox) GPTNeoXForCausalLM (class in vllm.model_executor.models.gpt_neox) GPTNeoXLayer (class in vllm.model_executor.models.gpt_neox) GPTNeoXMLP (class in vllm.model_executor.models.gpt_neox) GPTNeoXModel (class in vllm.model_executor.models.gpt_neox) GPTQ_BITBLAS_MAX_PARALLEL (in module vllm.model_executor.layers.quantization.utils.bitblas_utils) GPTQ_BITBLAS_STORAGE_DTYPE (vllm.model_executor.layers.quantization.gptq_bitblas.GPTQBitBLASConfig attribute) GPTQ_CKPT_STORAGE_DTYPE (vllm.model_executor.layers.quantization.gptq_bitblas.GPTQBitBLASConfig attribute) gptq_gemm() (in module vllm._custom_ops) gptq_marlin_24_gemm() (in module vllm._custom_ops) GPTQ_MARLIN_24_MAX_PARALLEL (in module vllm.model_executor.layers.quantization.gptq_marlin_24) GPTQ_MARLIN_24_MIN_THREAD_K (in module vllm.model_executor.layers.quantization.gptq_marlin_24) GPTQ_MARLIN_24_MIN_THREAD_N (in module vllm.model_executor.layers.quantization.gptq_marlin_24) GPTQ_MARLIN_24_SUPPORTED_GROUP_SIZES (in module vllm.model_executor.layers.quantization.gptq_marlin_24) GPTQ_MARLIN_24_SUPPORTED_QUANT_TYPES (in module vllm.model_executor.layers.quantization.gptq_marlin_24) GPTQ_MARLIN_24_TILE (in module vllm.model_executor.layers.quantization.gptq_marlin_24) gptq_marlin_gemm() (in module vllm._custom_ops) GPTQ_MARLIN_MAX_PARALLEL (in module vllm.model_executor.layers.quantization.utils.marlin_utils) GPTQ_MARLIN_MIN_THREAD_K (in module vllm.model_executor.layers.quantization.utils.marlin_utils) GPTQ_MARLIN_MIN_THREAD_N (in module vllm.model_executor.layers.quantization.utils.marlin_utils) gptq_marlin_moe_repack() (in module vllm._custom_ops) gptq_marlin_repack() (in module vllm._custom_ops) GPTQ_MARLIN_TILE (in module vllm.model_executor.layers.quantization.utils.marlin_utils) gptq_pack() (in module vllm.model_executor.layers.quantization.utils.quant_utils) gptq_quantize_weights() (in module vllm.model_executor.layers.quantization.utils.quant_utils) gptq_shuffle() (in module vllm._custom_ops) GPTQBitBLASConfig (class in vllm.model_executor.layers.quantization.gptq_bitblas) GPTQBitBLASLinearMethod (class in vllm.model_executor.layers.quantization.gptq_bitblas) GPTQConfig (class in vllm.model_executor.layers.quantization.gptq) GPTQLinearMethod (class in vllm.model_executor.layers.quantization.gptq) GPTQMarlin24Config (class in vllm.model_executor.layers.quantization.gptq_marlin_24) GPTQMarlin24LinearMethod (class in vllm.model_executor.layers.quantization.gptq_marlin_24) GPTQMarlinConfig (class in vllm.model_executor.layers.quantization.gptq_marlin) GPTQMarlinLinearMethod (class in vllm.model_executor.layers.quantization.gptq_marlin) GPTQMarlinMoEMethod (class in vllm.model_executor.layers.quantization.gptq_marlin) GPTQMarlinState (class in vllm.model_executor.layers.quantization.compressed_tensors.compressed_tensors_moe) GPU (vllm.utils.Device attribute) gpu_cache_usage (vllm.v1.metrics.stats.SchedulerStats attribute) gpu_cache_usage_sys (vllm.engine.metrics_types.Stats attribute) (vllm.v1.stats.common.KVCacheStats attribute) gpu_memory_utilization (vllm.config.CacheConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) gpu_p2p_access_check() (in module vllm.distributed.device_communicators.custom_all_reduce_utils) gpu_prefix_cache_hit_rate (vllm.engine.metrics_types.Stats attribute) (vllm.v1.stats.common.KVCacheStats attribute) GPUModelRunner (class in vllm.v1.worker.gpu_model_runner) GPUModelRunnerBase (class in vllm.worker.model_runner) GRAMMAR (vllm.model_executor.guided_decoding.outlines_decoding.GuidedDecodingMode attribute) grammar (vllm.sampling_params.GuidedDecodingParams attribute) GRAMMAR (vllm.v1.structured_output.backend_types.StructuredOutputOptions attribute) grammar (vllm.v1.structured_output.request.StructuredOutputRequest property) grammar_bitmask (vllm.v1.core.sched.output.SchedulerOutput attribute) grammar_bitmask() (vllm.v1.structured_output.StructuredOutputManager method) grammar_init() (vllm.v1.structured_output.StructuredOutputManager method) grammar_is_likely_lark() (in module vllm.model_executor.guided_decoding.utils) (in module vllm.v1.structured_output.utils) grammar_str (vllm.model_executor.guided_decoding.xgrammar_decoding.GrammarConfig attribute) GrammarCompilerCache (class in vllm.model_executor.guided_decoding.xgrammar_decoding) GrammarConfig (class in vllm.model_executor.guided_decoding.xgrammar_decoding) Granite20bFCToolParser (class in vllm.entrypoints.openai.tool_parsers.granite_20b_fc_tool_parser) GraniteAttention (class in vllm.model_executor.models.granite) GraniteDecoderLayer (class in vllm.model_executor.models.granite) GraniteForCausalLM (class in vllm.model_executor.models.granite) GraniteMLP (class in vllm.model_executor.models.granite) GraniteModel (class in vllm.model_executor.models.granite) GraniteMoeAttention (class in vllm.model_executor.models.granitemoe) GraniteMoeDecoderLayer (class in vllm.model_executor.models.granitemoe) GraniteMoeForCausalLM (class in vllm.model_executor.models.granitemoe) GraniteMoeHybridAttention (class in vllm.model_executor.models.granitemoehybrid) GraniteMoeHybridAttentionDecoderLayer (class in vllm.model_executor.models.granitemoehybrid) GraniteMoeHybridForCausalLM (class in vllm.model_executor.models.granitemoehybrid) GraniteMoeHybridMambaDecoderLayer (class in vllm.model_executor.models.granitemoehybrid) GraniteMoeHybridModel (class in vllm.model_executor.models.granitemoehybrid) GraniteMoeModel (class in vllm.model_executor.models.granitemoe) GraniteMoeMoE (class in vllm.model_executor.models.granitemoe) GraniteMoeSharedDecoderLayer (class in vllm.model_executor.models.granitemoeshared) GraniteMoeSharedForCausalLM (class in vllm.model_executor.models.granitemoeshared) GraniteMoeSharedMLP (class in vllm.model_executor.models.granitemoeshared) GraniteMoeSharedModel (class in vllm.model_executor.models.granitemoeshared) GraniteReasoningParser (class in vllm.reasoning.granite_reasoning_parser) GraniteSpeechAudioInputs (class in vllm.model_executor.models.granite_speech) GraniteSpeechConformerAttention (class in vllm.model_executor.models.granite_speech) GraniteSpeechConformerBlock (class in vllm.model_executor.models.granite_speech) GraniteSpeechConformerConvModule (class in vllm.model_executor.models.granite_speech) GraniteSpeechConformerDepthWiseConv1d (class in vllm.model_executor.models.granite_speech) GraniteSpeechConformerFeedForward (class in vllm.model_executor.models.granite_speech) GraniteSpeechCTCEncoder (class in vllm.model_executor.models.granite_speech) GraniteSpeechDummyInputsBuilder (class in vllm.model_executor.models.granite_speech) GraniteSpeechEncoderProjector (class in vllm.model_executor.models.granite_speech) GraniteSpeechForConditionalGeneration (class in vllm.model_executor.models.granite_speech) GraniteSpeechMultiModalProcessingInfo (class in vllm.model_executor.models.granite_speech) GraniteSpeechMultiModalProcessor (class in vllm.model_executor.models.granite_speech) GraniteToolParser (class in vllm.entrypoints.openai.tool_parsers.granite_tool_parser) graph (vllm.compilation.backends.SplitItem attribute) (vllm.compilation.backends.VllmBackend attribute) (vllm.compilation.multi_output_match.MultiOutputMatch property) (vllm.worker.model_runner.CUDAGraphRunner property) graph_capture() (in module vllm.distributed.parallel_state) (vllm.attention.backends.abstract.AttentionState method) (vllm.attention.backends.flashinfer.FlashInferState method) (vllm.attention.backends.flashmla.FlashMLAState method) (vllm.attention.backends.mla.common.MLACommonState method) (vllm.attention.backends.rocm_aiter_mla.AiterMLAState method) (vllm.attention.backends.utils.CommonAttentionState method) (vllm.distributed.parallel_state.GroupCoordinator method) graph_capture_get_metadata_for_batch() (vllm.attention.backends.abstract.AttentionState method) (vllm.attention.backends.flashinfer.FlashInferState method) (vllm.attention.backends.flashmla.FlashMLAState method) (vllm.attention.backends.mla.common.MLACommonState method) (vllm.attention.backends.rocm_aiter_mla.AiterMLAState method) (vllm.attention.backends.utils.CommonAttentionState method) graph_clone() (vllm.attention.backends.abstract.AttentionState method) (vllm.attention.backends.flashinfer.FlashInferState method) (vllm.attention.backends.mla.common.MLACommonState method) (vllm.attention.backends.utils.CommonAttentionState method) graph_id (vllm.compilation.backends.SplitItem attribute) graph_pool (vllm.compilation.backends.VllmBackend attribute) GraphCaptureContext (class in vllm.distributed.parallel_state) GREEDY (vllm.sampling_params.SamplingType attribute) greedy_sample() (vllm.v1.sample.sampler.Sampler method) (vllm.v1.sample.tpu.sampler.Sampler method) greedy_samples (vllm.model_executor.layers.sampler.SampleResultArgsType attribute) GREEDY_TEMPERATURE (in module vllm.v1.sample.rejection_sampler) GritLM (class in vllm.model_executor.models.gritlm) GritLMPooler (class in vllm.model_executor.models.gritlm) Grok1Attention (class in vllm.model_executor.models.grok1) Grok1DecoderLayer (class in vllm.model_executor.models.grok1) Grok1ForCausalLM (class in vllm.model_executor.models.grok1) Grok1Model (class in vllm.model_executor.models.grok1) Grok1MoE (class in vllm.model_executor.models.grok1) group (vllm.entrypoints.openai.protocol.ModelPermission attribute) GROUP (vllm.model_executor.layers.fused_moe.layer.FusedMoeWeightScaleSupported attribute) group_broadcast() (in module vllm.model_executor.layers.quantization.utils.quant_utils) group_id (vllm.sequence.SequenceGroupBase attribute) group_mm_inputs_by_modality() (in module vllm.multimodal.utils) group_size (vllm.model_executor.layers.quantization.kernels.mixed_precision.MPLinearKernel.MPLinearLayerConfig attribute) (vllm.transformers_utils.configs.arctic.ArcticQuantizationConfig attribute) GroupCoordinator (class in vllm.distributed.parallel_state) grouped_topk() (in module vllm.model_executor.layers.fused_moe.fused_moe) GroupQuantScaleParameter (class in vllm.model_executor.parameter) GteModel (class in vllm.model_executor.models.bert_with_rope) GuidanceBackend (class in vllm.v1.structured_output.backend_guidance) GuidanceGrammar (class in vllm.v1.structured_output.backend_guidance) GuidanceLogitsProcessor (class in vllm.model_executor.guided_decoding.guidance_logits_processors) guided_choice (vllm.entrypoints.openai.protocol.ChatCompletionRequest attribute) (vllm.entrypoints.openai.protocol.CompletionRequest attribute) (vllm.model_executor.guided_decoding.guided_fields.GuidedDecodingRequest attribute) (vllm.model_executor.guided_decoding.guided_fields.LLMGuidedOptions attribute) guided_decoding (vllm.sampling_params.SamplingParams attribute) guided_decoding_backend (vllm.config.DecodingConfig property) (vllm.engine.arg_utils.EngineArgs attribute) (vllm.entrypoints.openai.protocol.ChatCompletionRequest attribute) (vllm.entrypoints.openai.protocol.CompletionRequest attribute) (vllm.model_executor.guided_decoding.guided_fields.GuidedDecodingRequest attribute) (vllm.model_executor.guided_decoding.guided_fields.LLMGuidedOptions attribute) guided_decoding_disable_additional_properties (vllm.engine.arg_utils.EngineArgs attribute) guided_decoding_disable_any_whitespace (vllm.engine.arg_utils.EngineArgs attribute) guided_decoding_disable_fallback (vllm.engine.arg_utils.EngineArgs attribute) guided_grammar (vllm.entrypoints.openai.protocol.ChatCompletionRequest attribute) (vllm.entrypoints.openai.protocol.CompletionRequest attribute) (vllm.model_executor.guided_decoding.guided_fields.GuidedDecodingRequest attribute) (vllm.model_executor.guided_decoding.guided_fields.LLMGuidedOptions attribute) guided_json (vllm.entrypoints.openai.protocol.ChatCompletionRequest attribute) (vllm.entrypoints.openai.protocol.CompletionRequest attribute) (vllm.model_executor.guided_decoding.guided_fields.GuidedDecodingRequest attribute) (vllm.model_executor.guided_decoding.guided_fields.LLMGuidedOptions attribute) guided_json_object (vllm.model_executor.guided_decoding.guided_fields.GuidedDecodingRequest attribute) (vllm.model_executor.guided_decoding.guided_fields.LLMGuidedOptions attribute) guided_regex (vllm.entrypoints.openai.protocol.ChatCompletionRequest attribute) (vllm.entrypoints.openai.protocol.CompletionRequest attribute) (vllm.model_executor.guided_decoding.guided_fields.GuidedDecodingRequest attribute) (vllm.model_executor.guided_decoding.guided_fields.LLMGuidedOptions attribute) guided_whitespace_pattern (vllm.entrypoints.openai.protocol.ChatCompletionRequest attribute) (vllm.entrypoints.openai.protocol.CompletionRequest attribute) (vllm.model_executor.guided_decoding.guided_fields.GuidedDecodingRequest attribute) (vllm.model_executor.guided_decoding.guided_fields.LLMGuidedOptions attribute) GuidedDecodingBackend (in module vllm.config) GuidedDecodingBackendV0 (in module vllm.config) GuidedDecodingBackendV1 (in module vllm.config) GuidedDecodingMode (class in vllm.model_executor.guided_decoding.outlines_decoding) GuidedDecodingParams (class in vllm.sampling_params) GuidedDecodingRequest (class in vllm.model_executor.guided_decoding.guided_fields) H h (vllm.model_executor.models.phi4mm_utils.MultiHeadedAttention attribute) H2OVLChatConfig (class in vllm.transformers_utils.configs.h2ovl) H2OVLChatModel (class in vllm.model_executor.models.h2ovl) H2OVLMultiModalProcessor (class in vllm.model_executor.models.h2ovl) H2OVLProcessingInfo (class in vllm.model_executor.models.h2ovl) H2OVLProcessor (class in vllm.model_executor.models.h2ovl) h_k (vllm.model_executor.models.phi4mm_utils.MultiHeadedAttention attribute) HabanaProfilerCounterHelper (class in vllm.worker.hpu_model_runner) Handle (class in vllm.distributed.device_communicators.shm_broadcast) handle (vllm.device_allocator.cumem.AllocationData attribute) handle() (vllm.distributed.device_communicators.shm_broadcast.ShmRingBuffer method) (vllm.entrypoints.openai.serving_engine.OpenAIServing method) handle_new_input() (vllm.engine.multiprocessing.engine.MQLLMEngine method) HandleType (in module vllm.device_allocator.cumem) has_bias() (vllm.scalar_type.ScalarType method) has_cache() (vllm.v1.core.encoder_cache_manager.EncoderCacheManager method) has_cdna_target() (in module vllm.attention.ops.triton_flash_attention) has_deep_gemm (in module vllm.model_executor.layers.fused_moe.deep_gemm_moe) (in module vllm.model_executor.layers.quantization.fp8) has_device_capability() (vllm.platforms.cuda.NvmlCudaPlatform class method) (vllm.platforms.interface.Platform class method) has_finished_requests() (vllm.v1.core.sched.interface.SchedulerInterface method) (vllm.v1.core.sched.scheduler.Scheduler method) has_g_idx (vllm.model_executor.layers.quantization.kernels.mixed_precision.MPLinearKernel.MPLinearLayerConfig attribute) has_infs() (vllm.scalar_type.ScalarType method) has_initial_states (vllm.model_executor.layers.mamba.mamba2_metadata.Mamba2Metadata attribute) has_inner_state (vllm.model_executor.models.interfaces.HasInnerState attribute) has_inner_state() (in module vllm.model_executor.models.interfaces) has_kv_transfer_group() (in module vllm.distributed.kv_transfer.kv_transfer_state) has_lmf_unsupported_json_features() (in module vllm.model_executor.guided_decoding.utils) has_nans() (vllm.scalar_type.ScalarType method) has_new_requests() (vllm.engine.async_llm_engine.RequestTracker method) has_noops (vllm.model_executor.models.interfaces.HasNoOps attribute) has_noops() (in module vllm.model_executor.models.interfaces) has_processor() (vllm.multimodal.registry.MultiModalRegistry method) has_requests() (vllm.v1.core.sched.interface.SchedulerInterface method) HAS_TRITON (in module vllm.triton_utils.importing) has_unfinished_dp() (vllm.config.ParallelConfig static method) has_unfinished_requests() (vllm.engine.llm_engine.LLMEngine method) (vllm.v1.core.sched.interface.SchedulerInterface method) (vllm.v1.engine.llm_engine.LLMEngine method) (vllm.v1.engine.output_processor.OutputProcessor method) has_unfinished_requests_dp() (vllm.v1.engine.llm_engine.LLMEngine method) has_unfinished_requests_for_virtual_engine() (vllm.engine.llm_engine.LLMEngine method) has_unfinished_seqs() (vllm.core.scheduler.Scheduler method) has_xgrammar_unsupported_json_features() (in module vllm.model_executor.guided_decoding.utils) (in module vllm.v1.structured_output.backend_xgrammar) hash_block_tokens() (in module vllm.v1.core.kv_cache_utils) (vllm.core.block.prefix_caching_block.PrefixCachingBlock class method) hash_dict() (vllm.compilation.inductor_pass.InductorPass static method) hash_kwargs() (vllm.multimodal.hasher.MultiModalHasher class method) hash_of_block() (vllm.sequence.Sequence method) hash_prompt_mm_data() (vllm.multimodal.hasher.MultiModalHasher class method) hash_request_tokens() (in module vllm.v1.core.kv_cache_utils) hash_source() (vllm.compilation.inductor_pass.InductorPass static method) hash_value (vllm.v1.core.kv_cache_utils.BlockHashType attribute) HashableDict (class in vllm.transformers_utils.processor) HashableList (class in vllm.transformers_utils.processor) HasInnerState (class in vllm.model_executor.models.interfaces) HasNoOps (class in vllm.model_executor.models.interfaces) hd_feature_transform() (vllm.model_executor.models.phi3v.Phi3HDImageEmbedding method) head_dim (vllm.attention.backends.flashinfer.FlashInferMetadata attribute) (vllm.attention.backends.mla.common.MLACommonMetadata attribute) (vllm.transformers_utils.configs.falcon.RWConfig property) (vllm.v1.attention.backends.flashinfer.FlashInferMetadata attribute) (vllm.v1.attention.backends.mla.common.MLACommonMetadata attribute) head_size (vllm.v1.kv_cache_interface.AttentionSpec attribute) head_sliding_step (vllm.attention.backends.blocksparse_attn.BlocksparseParams attribute) HeadMajorColumnParallelLinear (class in vllm.model_executor.models.phi3_small) HeadMajorQKVParallelLinear (class in vllm.model_executor.models.phi3_small) heads (vllm.transformers_utils.configs.deepseek_vl2.VisionEncoderConfig attribute) health() (in module vllm.entrypoints.api_server) (in module vllm.entrypoints.openai.api_server) HEALTHY_RESPONSE (in module vllm.engine.multiprocessing.engine) height (vllm.model_executor.models.kimi_vl.MaxImageTokenMeta attribute) (vllm.multimodal.parse.ImageSize attribute) help (vllm.entrypoints.cli.benchmark.base.BenchmarkSubcommandBase property) (vllm.entrypoints.cli.benchmark.latency.BenchmarkLatencySubcommand property) (vllm.entrypoints.cli.benchmark.serve.BenchmarkServingSubcommand property) (vllm.entrypoints.cli.benchmark.throughput.BenchmarkThroughputSubcommand property) Hermes2ProToolParser (class in vllm.entrypoints.openai.tool_parsers.hermes_tool_parser) HF (vllm.transformers_utils.config.ConfigFormat attribute) hf_config (vllm.model_executor.model_loader.tensorizer.TensorizerConfig attribute) hf_config_override() (vllm.config.SpeculativeConfig static method) hf_config_path (vllm.config.ModelConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) hf_overrides (vllm.config.ModelConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) hf_processor_mm_kwargs (vllm.multimodal.profiling.ProcessorInputs attribute) hf_to_vllm_mapper (vllm.model_executor.models.aria.AriaForConditionalGeneration attribute) (vllm.model_executor.models.bert.BertEmbeddingModel attribute) (vllm.model_executor.models.bert_with_rope.BertWithRope attribute) (vllm.model_executor.models.bert_with_rope.GteModel attribute) (vllm.model_executor.models.bert_with_rope.JinaRobertaModel attribute) (vllm.model_executor.models.bert_with_rope.NomicBertModel attribute) (vllm.model_executor.models.chatglm.ChatGLMBaseModel attribute) (vllm.model_executor.models.deepseek_vl2.DeepseekVLV2ForCausalLM attribute) (vllm.model_executor.models.modernbert.ModernBertModel attribute) (vllm.model_executor.models.molmo.MolmoForCausalLM attribute) (vllm.model_executor.models.opt.OPTForCausalLM attribute) (vllm.model_executor.models.phi3_small.Phi3SmallForCausalLM attribute) (vllm.model_executor.models.phi3v.Phi3VForCausalLM attribute) (vllm.model_executor.models.phi4mm.Phi4MMForCausalLM attribute) (vllm.model_executor.models.qwen2.Qwen2EmbeddingModel attribute) (vllm.model_executor.models.qwen2_5_omni_thinker.Qwen2_5OmniThinkerForConditionalGeneration attribute) (vllm.model_executor.models.qwen2_5_vl.Qwen2_5_VLForConditionalGeneration attribute) (vllm.model_executor.models.qwen2_vl.Qwen2VLForConditionalGeneration attribute) (vllm.model_executor.models.telechat2.TeleChat2ForCausalLM attribute) (vllm.model_executor.models.transformers.TransformersForCausalLM property) (vllm.model_executor.models.ultravox.UltravoxModel attribute) (vllm.model_executor.models.whisper.WhisperForConditionalGeneration attribute) (vllm.model_executor.models.zamba2.Zamba2ForCausalLM attribute) HF_TOKEN (in module vllm.transformers_utils.config) hf_token (vllm.config.ModelConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) HfAudioItem (in module vllm.multimodal.inputs) HfImageItem (in module vllm.multimodal.inputs) HfOverrides (in module vllm.config) HfVideoItem (in module vllm.multimodal.inputs) hidden_size (vllm.model_executor.models.pixtral.VisionEncoderArgs attribute) (vllm.model_executor.models.plamo2.Plamo2Config attribute) (vllm.outputs.EmbeddingOutput property) hidden_size_per_head (vllm.model_executor.models.plamo2.Plamo2Config attribute) hidden_states (vllm.model_executor.layers.sampler.SamplerOutput attribute) (vllm.sequence.HiddenStates attribute) (vllm.spec_decode.interfaces.SpeculativeScores attribute) HiddenStates (class in vllm.sequence) hit_rate (vllm.v1.core.kv_cache_utils.PrefixCachingMetrics property) hit_ratio (vllm.utils.CacheInfo property) hits (vllm.utils.CacheInfo attribute) (vllm.v1.metrics.stats.PrefixCacheStats attribute) homo_head (vllm.attention.backends.blocksparse_attn.BlocksparseParams attribute) homo_head_group (vllm.attention.backends.blocksparse_attn.BlocksparseParams attribute) HPU (vllm.platforms.interface.PlatformEnum attribute) hpu_platform_plugin() (in module vllm.platforms) HPUAttentionBackend (class in vllm.attention.backends.hpu_attn) HPUAttentionImpl (class in vllm.attention.backends.hpu_attn) HPUAttentionMetadata (class in vllm.attention.backends.hpu_attn) HPUCacheEngine (class in vllm.worker.hpu_worker) HpuCommunicator (class in vllm.distributed.device_communicators.hpu_communicator) HpuModelAdapter (class in vllm.worker.hpu_model_runner) HPUModelRunner (class in vllm.worker.hpu_model_runner) HPUModelRunnerBase (class in vllm.worker.hpu_model_runner) HPUPagedAttention (class in vllm.attention.ops.hpu_paged_attn) HPUPagedAttentionMetadata (class in vllm.attention.ops.hpu_paged_attn) HpuPlatform (class in vllm.platforms.hpu) HPUWorker (class in vllm.worker.hpu_worker) HQQEmptyParameter (class in vllm.model_executor.layers.quantization.hqq_marlin) HQQMarlinConfig (class in vllm.model_executor.layers.quantization.hqq_marlin) HQQMarlinMethod (class in vllm.model_executor.layers.quantization.hqq_marlin) HQQweightParameter (class in vllm.model_executor.layers.quantization.hqq_marlin) HQQZeroScaleParameter (class in vllm.model_executor.layers.quantization.hqq_marlin) HTTPConnection (class in vllm.connections) HuggingFaceDataset (class in vllm.benchmarks.datasets) human_readable_int() (in module vllm.engine.arg_utils) hwm (vllm.config.KVEventsConfig attribute) I id (vllm.entrypoints.openai.protocol.BatchRequestOutput attribute) (vllm.entrypoints.openai.protocol.ChatCompletionResponse attribute) (vllm.entrypoints.openai.protocol.ChatCompletionStreamResponse attribute) (vllm.entrypoints.openai.protocol.ClassificationResponse attribute) (vllm.entrypoints.openai.protocol.CompletionResponse attribute) (vllm.entrypoints.openai.protocol.CompletionStreamResponse attribute) (vllm.entrypoints.openai.protocol.DeltaToolCall attribute) (vllm.entrypoints.openai.protocol.EmbeddingResponse attribute) (vllm.entrypoints.openai.protocol.ModelCard attribute) (vllm.entrypoints.openai.protocol.ModelPermission attribute) (vllm.entrypoints.openai.protocol.PoolingResponse attribute) (vllm.entrypoints.openai.protocol.RerankResponse attribute) (vllm.entrypoints.openai.protocol.ScoreResponse attribute) (vllm.entrypoints.openai.protocol.ToolCall attribute) (vllm.entrypoints.openai.protocol.TranscriptionSegment attribute) (vllm.entrypoints.openai.protocol.TranscriptionStreamResponse attribute) (vllm.entrypoints.openai.tool_parsers.mistral_tool_parser.MistralToolCall attribute) id() (vllm.scalar_type.ScalarType method) Idefics2Encoder (class in vllm.model_executor.models.idefics2_vision_model) Idefics2EncoderLayer (class in vllm.model_executor.models.idefics2_vision_model) Idefics2VisionAttention (class in vllm.model_executor.models.idefics2_vision_model) Idefics2VisionEmbeddings (class in vllm.model_executor.models.idefics2_vision_model) Idefics2VisionMLP (class in vllm.model_executor.models.idefics2_vision_model) Idefics2VisionTransformer (class in vllm.model_executor.models.idefics2_vision_model) Idefics3Connector (class in vllm.model_executor.models.idefics3) Idefics3DummyInputsBuilder (class in vllm.model_executor.models.idefics3) Idefics3ForConditionalGeneration (class in vllm.model_executor.models.idefics3) Idefics3ImageEmbeddingInputs (class in vllm.model_executor.models.idefics3) Idefics3ImagePixelInputs (class in vllm.model_executor.models.idefics3) Idefics3Model (class in vllm.model_executor.models.idefics3) Idefics3MultiModalProcessor (class in vllm.model_executor.models.idefics3) Idefics3ProcessingInfo (class in vllm.model_executor.models.idefics3) Idefics3SimpleMLP (class in vllm.model_executor.models.idefics3) identity() (in module vllm.utils) ids() (vllm.core.block.common.BlockList method) IEEE_754 (vllm.scalar_type.NanRepr attribute) ignore_eos (vllm.benchmarks.endpoint_request_func.RequestFuncInput attribute) (vllm.entrypoints.openai.protocol.ChatCompletionRequest attribute) (vllm.entrypoints.openai.protocol.CompletionRequest attribute) (vllm.sampling_params.BeamSearchParams attribute) (vllm.sampling_params.SamplingParams attribute) ignore_head (vllm.transformers_utils.configs.deepseek_vl2.VisionEncoderConfig attribute) IGNORE_ID (in module vllm.transformers_utils.configs.ovis) (in module vllm.transformers_utils.processors.ovis) ignore_patterns (vllm.config.LoadConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) ignored_seq_groups (vllm.core.scheduler.SchedulerOutputs attribute) (vllm.core.scheduler.SchedulerPrefillOutputs attribute) im_col_id() (vllm.model_executor.models.molmo.MolmoProcessorWrapper method) IM_COL_TOKEN (in module vllm.model_executor.models.molmo) im_end_id() (vllm.model_executor.models.molmo.MolmoProcessorWrapper method) IM_END_TOKEN (in module vllm.model_executor.models.molmo) im_start_id() (vllm.model_executor.models.molmo.MolmoProcessorWrapper method) IM_START_TOKEN (in module vllm.model_executor.models.molmo) image (vllm.multimodal.inputs.MultiModalDataBuiltins attribute) IMAGE_ATOM_ID (in module vllm.transformers_utils.configs.ovis) image_attention_mask (vllm.model_executor.models.phi4mm.Phi4MMImagePixelInputs attribute) image_break_id() (vllm.model_executor.models.pixtral.PixtralProcessorAdapter method) image_default_input_size (vllm.model_executor.models.molmo.VisionBackboneConfig attribute) image_emb_dim (vllm.model_executor.models.molmo.VisionBackboneConfig attribute) image_embeds (vllm.assets.image.ImageAsset property) (vllm.entrypoints.chat_utils.ChatCompletionContentPartImageEmbedsParam attribute) (vllm.model_executor.models.minicpmv.MiniCPMVImageEmbeddingInputs attribute) (vllm.model_executor.models.qwen2_5_vl.Qwen2_5_VLImageEmbeddingInputs attribute) (vllm.model_executor.models.qwen2_vl.Qwen2VLImageEmbeddingInputs attribute) image_end_id() (vllm.model_executor.models.pixtral.PixtralProcessorAdapter method) image_end_tag (vllm.model_executor.models.qwen_vl.QwenVLProcessor property) image_grid_hws (vllm.model_executor.models.kimi_vl.KimiVLImagePixelInputs attribute) image_grid_pinpoints (vllm.model_executor.models.llava_next.LlavaNextLikeConfig attribute) image_grid_thw (vllm.model_executor.models.qwen2_5_vl.Qwen2_5_VLImageEmbeddingInputs attribute) (vllm.model_executor.models.qwen2_5_vl.Qwen2_5_VLImagePixelInputs attribute) (vllm.model_executor.models.qwen2_vl.Qwen2VLImageEmbeddingInputs attribute) (vllm.model_executor.models.qwen2_vl.Qwen2VLImagePixelInputs attribute) IMAGE_INDICATOR_IDS (in module vllm.model_executor.models.ovis) (in module vllm.transformers_utils.configs.ovis) image_indicators_to_visual_tokens() (vllm.model_executor.models.ovis.OvisMultiModalProcessor method) image_masks (vllm.model_executor.models.molmo.MolmoImageInputs attribute) image_mlp_activations (vllm.model_executor.models.molmo.VisionBackboneConfig attribute) image_mlp_dim (vllm.model_executor.models.molmo.VisionBackboneConfig attribute) image_norm_eps (vllm.model_executor.models.molmo.VisionBackboneConfig attribute) image_num_heads (vllm.model_executor.models.molmo.VisionBackboneConfig attribute) image_num_key_value_heads (vllm.model_executor.models.molmo.VisionBackboneConfig attribute) image_num_layers (vllm.model_executor.models.molmo.VisionBackboneConfig attribute) image_num_patch (vllm.model_executor.models.molmo.VisionBackboneConfig property) image_num_pos (vllm.model_executor.models.molmo.VisionBackboneConfig attribute) image_pad_tag (vllm.model_executor.models.qwen_vl.QwenVLProcessor property) IMAGE_PAD_TOKEN_ID_MAP (in module vllm.model_executor.models.ovis) IMAGE_PAD_TOKEN_MAP (in module vllm.model_executor.models.ovis) image_patch_id() (vllm.model_executor.models.molmo.MolmoProcessorWrapper method) image_patch_size (vllm.model_executor.models.molmo.VisionBackboneConfig attribute) image_patch_size() (vllm.model_executor.models.molmo.MolmoProcessorWrapper method) IMAGE_PATCH_TOKEN (in module vllm.model_executor.models.molmo) image_pattern (vllm.model_executor.models.minicpmv.MiniCPMVProcessingInfo attribute) image_pixels_to_features() (vllm.model_executor.models.idefics3.Idefics3Model method) image_pos_patch_size (vllm.model_executor.models.molmo.VisionBackboneConfig attribute) image_processor (vllm.model_executor.models.pixtral.PixtralProcessorAdapter property) image_processor_class (vllm.transformers_utils.processors.ovis.OvisProcessor attribute) image_size (vllm.model_executor.models.pixtral.VisionEncoderArgs attribute) (vllm.transformers_utils.configs.deepseek_vl2.VisionEncoderConfig attribute) image_size() (vllm.model_executor.models.pixtral.PixtralProcessorAdapter method) image_sizes (vllm.model_executor.models.llava_next.LlavaNextImagePixelInputs attribute) (vllm.model_executor.models.llava_onevision.LlavaOnevisionImagePixelInputs attribute) (vllm.model_executor.models.phi3v.Phi3VImagePixelInputs attribute) (vllm.model_executor.models.phi4mm.Phi4MMImagePixelInputs attribute) image_start_tag (vllm.model_executor.models.qwen_vl.QwenVLProcessor property) image_to_pixel_values_h2ovl() (in module vllm.model_executor.models.h2ovl) image_to_pixel_values_internvl() (in module vllm.model_executor.models.internvl) image_to_pixel_values_skyworkr1v() (in module vllm.model_executor.models.skyworkr1v) IMAGE_TOKEN (in module vllm.model_executor.models.ovis) (in module vllm.transformers_utils.configs.ovis) image_token (vllm.model_executor.models.llava.LlavaLikeProcessor attribute) (vllm.model_executor.models.mistral3.LlavaLikeProcessor attribute) IMAGE_TOKEN_ID (in module vllm.transformers_utils.configs.ovis) image_token_id (vllm.model_executor.models.h2ovl.H2OVLProcessor property) (vllm.model_executor.models.internvl.BaseInternVLProcessor property) (vllm.model_executor.models.internvl.InternVLProcessor property) (vllm.model_executor.models.kimi_vl.KimiVLProcessingInfo property) (vllm.model_executor.models.nvlm_d.NVLMProcessor property) (vllm.model_executor.models.pixtral.VisionEncoderArgs attribute) (vllm.model_executor.models.skyworkr1v.BaseSkyworkR1VProcessor property) (vllm.model_executor.models.skyworkr1v.SkyworkR1VProcessor property) image_token_id() (vllm.model_executor.models.pixtral.PixtralProcessorAdapter method) image_token_index (vllm.model_executor.models.llava.LlavaLikeConfig attribute) (vllm.model_executor.models.mistral3.LlavaLikeConfig attribute) image_token_length_h() (vllm.model_executor.models.molmo.MolmoProcessorWrapper method) image_token_length_w() (vllm.model_executor.models.molmo.MolmoProcessorWrapper method) image_tokens (vllm.model_executor.models.phi4mm.Phi4MMProcessingInfo property) image_tokens() (vllm.model_executor.models.chameleon.ChameleonImageVocabularyMapping method) image_url (vllm.entrypoints.chat_utils.CustomChatCompletionContentSimpleImageParam attribute) ImageAsset (class in vllm.assets.image) ImageAssetName (in module vllm.assets.image) ImageEmbeddingItems (class in vllm.multimodal.parse) ImageEmbeddingMediaIO (class in vllm.multimodal.image) ImageInputs (in module vllm.model_executor.models.idefics3) ImageItem (in module vllm.multimodal.inputs) ImageMediaIO (class in vllm.multimodal.image) IMAGENET_MEAN (in module vllm.model_executor.models.internvl) (in module vllm.model_executor.models.skyworkr1v) IMAGENET_STD (in module vllm.model_executor.models.internvl) (in module vllm.model_executor.models.skyworkr1v) ImageProcessorItems (class in vllm.multimodal.parse) ImageProjectorMLP (class in vllm.model_executor.models.molmo) images (vllm.model_executor.models.molmo.MolmoImageInputs attribute) (vllm.model_executor.models.pixtral.PixtralImagePixelInputs attribute) images_spatial_crop (vllm.model_executor.models.deepseek_vl2.DeepseekVL2ImagePixelInputs attribute) ImageSize (class in vllm.multimodal.parse) ImageTransform (class in vllm.transformers_utils.processors.deepseek_vl2) IMATRIX_QUANT_TYPES (in module vllm.model_executor.layers.quantization.gguf) img2bpe() (vllm.model_executor.models.chameleon.ChameleonImageVocabularyMapping method) img2bpe_mapping_tensor() (vllm.model_executor.models.chameleon.ChameleonImageVocabularyMapping method) IMG_CONTEXT (in module vllm.model_executor.models.internvl) (in module vllm.model_executor.models.skyworkr1v) IMG_END (in module vllm.model_executor.models.internvl) (in module vllm.model_executor.models.skyworkr1v) IMG_PAD (in module vllm.model_executor.models.nvlm_d) IMG_START (in module vllm.model_executor.models.internvl) (in module vllm.model_executor.models.skyworkr1v) import_from_path() (in module vllm.utils) import_pynvml() (in module vllm.utils) import_reasoning_parser() (vllm.reasoning.abs_reasoning_parsers.ReasoningParserManager class method) import_tool_parser() (vllm.entrypoints.openai.tool_parsers.abstract_tool_parser.ToolParserManager class method) in_the_same_node_as() (in module vllm.distributed.parallel_state) in_wsl() (in module vllm.platforms.interface) inc() (vllm.utils.AtomicCounter method) include_gpu_probs_tensor (vllm.lora.layers.LogitsProcessorWithLoRA property) include_stop_str_in_output (vllm.entrypoints.openai.protocol.ChatCompletionRequest attribute) (vllm.entrypoints.openai.protocol.CompletionRequest attribute) (vllm.sampling_params.BeamSearchParams attribute) (vllm.sampling_params.SamplingParams attribute) include_usage (vllm.entrypoints.openai.protocol.StreamOptions attribute) incr() (vllm.core.block.common.ReadOnlyRefCounter method) (vllm.core.block.common.RefCounter method) (vllm.core.block.common.RefCounterProtocol method) incr_ref() (vllm.v1.core.kv_cache_utils.KVCacheBlock method) increase_pool() (vllm.core.block.common.BlockPool method) IncrementalDetokenizer (class in vllm.v1.engine.detokenizer) indent_string() (in module vllm.profiler.utils) index (vllm.entrypoints.openai.protocol.ChatCompletionResponseChoice attribute) (vllm.entrypoints.openai.protocol.ChatCompletionResponseStreamChoice attribute) (vllm.entrypoints.openai.protocol.ClassificationData attribute) (vllm.entrypoints.openai.protocol.CompletionResponseChoice attribute) (vllm.entrypoints.openai.protocol.CompletionResponseStreamChoice attribute) (vllm.entrypoints.openai.protocol.DeltaToolCall attribute) (vllm.entrypoints.openai.protocol.EmbeddingResponseData attribute) (vllm.entrypoints.openai.protocol.PoolingResponseData attribute) (vllm.entrypoints.openai.protocol.RerankResult attribute) (vllm.entrypoints.openai.protocol.ScoreResponseData attribute) (vllm.outputs.CompletionOutput attribute) index() (vllm.v1.utils.ConstantList method) index_map() (vllm.multimodal.base.MultiModalPlaceholderMap method) index_mapping (vllm.adapter_commons.layers.AdapterMapping attribute) inducator_tokens (vllm.model_executor.models.ovis.OvisImagePatchInputs attribute) inductor_compile_config (vllm.config.CompilationConfig attribute) inductor_passes (vllm.config.CompilationConfig attribute) InductorAdaptor (class in vllm.compilation.compiler_interface) InductorPass (class in vllm.compilation.inductor_pass) InductorStandaloneAdaptor (class in vllm.compilation.compiler_interface) infeasible_seq_groups (vllm.core.scheduler.SchedulerSwappedInOutputs attribute) InfEncoder (class in vllm.benchmarks.utils) infer_device() (in module vllm.prompt_adapter.utils) infer_global_hyperparameters() (in module vllm.attention.backends.flashinfer) (in module vllm.v1.attention.backends.flashinfer) inference_latency_s (vllm.v1.stats.common.RequestStats property) inference_mode() (vllm.platforms.cpu.CpuPlatform class method) (vllm.platforms.hpu.HpuPlatform static method) (vllm.platforms.interface.Platform class method) (vllm.platforms.tpu.TpuPlatform class method) (vllm.platforms.xpu.XPUPlatform static method) inference_time (vllm.v1.metrics.stats.FinishedRequestStats attribute) info() (vllm.engine.metrics.LoggingStatLogger method) (vllm.engine.metrics.PrometheusStatLogger method) (vllm.engine.metrics.RayPrometheusStatLogger method) (vllm.engine.metrics_types.StatLoggerBase method) init_app() (in module vllm.entrypoints.api_server) init_app_state() (in module vllm.entrypoints.openai.api_server) init_audio_module() (vllm.model_executor.models.minicpmo.MiniCPMO method) init_backend() (vllm.config.CompilationConfig method) init_block() (vllm.core.block.common.BlockPool method) init_buffers() (vllm.model_executor.models.transformers.TransformersModel method) init_cached_hf_modules() (in module vllm.utils) init_cached_inter_data() (vllm.worker.model_runner.ModelInputForGPUBuilder method) init_config_defaults (in module vllm.transformers_utils.configs.mpt) init_custom_ar() (in module vllm._custom_ops) init_device() (vllm.spec_decode.medusa_worker.MedusaWorker method) (vllm.spec_decode.multi_step_worker.MultiStepWorker method) (vllm.spec_decode.ngram_worker.NGramWorker method) (vllm.spec_decode.smaller_tp_proposer_worker.SmallerTpProposerWorker method) (vllm.spec_decode.spec_decode_worker.SpecDecodeWorker method) (vllm.v1.worker.gpu_worker.Worker method) (vllm.v1.worker.tpu_worker.TPUWorker method) (vllm.worker.cpu_worker.CPUWorker method) (vllm.worker.hpu_worker.HPUWorker method) (vllm.worker.neuron_worker.NeuronWorker method) (vllm.worker.tpu_worker.TPUWorker method) (vllm.worker.worker.Worker method) (vllm.worker.worker_base.DelegateWorkerBase method) (vllm.worker.worker_base.WorkerBase method) (vllm.worker.worker_base.WorkerWrapperBase method) (vllm.worker.xpu_worker.XPUWorker method) init_distributed_environment() (in module vllm.distributed.parallel_state) (vllm.worker.cpu_worker.CPUWorker method) (vllm.worker.neuron_worker.NeuronWorker method) init_gpu_tensors() (vllm.model_executor.layers.spec_decode_base_sampler.SpecDecodeBaseSampler method) (vllm.spec_decode.metrics.AsyncMetricsCollector method) init_llm() (vllm.model_executor.models.minicpmv.MiniCPMV2_0 method) (vllm.model_executor.models.minicpmv.MiniCPMV2_5 method) (vllm.model_executor.models.minicpmv.MiniCPMV2_6 method) (vllm.model_executor.models.minicpmv.MiniCPMVBaseModel method) init_logger() (in module vllm.logger) init_mm_limits_per_prompt() (vllm.multimodal.registry.MultiModalRegistry method) init_model_parallel_group() (in module vllm.distributed.parallel_state) init_multi_step() (vllm.sequence.SequenceGroup method) init_multi_step_from_lookahead_slots() (vllm.sequence.SequenceGroup method) init_parameters() (vllm.model_executor.models.transformers.TransformersModel method) init_processor() (vllm.inputs.registry.InputContext method) init_relative_attention_bias() (vllm.model_executor.models.phi4mm_audio.ConformerEncoder method) init_resampler() (vllm.model_executor.models.minicpmo.MiniCPMO method) (vllm.model_executor.models.minicpmv.MiniCPMV2_0 method) (vllm.model_executor.models.minicpmv.MiniCPMV2_5 method) (vllm.model_executor.models.minicpmv.MiniCPMV2_6 method) (vllm.model_executor.models.minicpmv.MiniCPMVBaseModel method) init_static_loras() (vllm.entrypoints.openai.serving_models.OpenAIServingModels method) init_tensors() (vllm.model_executor.layers.spec_decode_base_sampler.SpecDecodeBaseSampler method) (vllm.spec_decode.metrics.AsyncMetricsCollector method) init_tokenizer_from_configs() (in module vllm.transformers_utils.tokenizer_group) init_tpu_worker_distributed_environment() (in module vllm.v1.worker.tpu_worker) init_tracer() (in module vllm.tracing) init_vision_module() (vllm.model_executor.models.minicpmo.MiniCPMO method) (vllm.model_executor.models.minicpmv.MiniCPMV2_0 method) (vllm.model_executor.models.minicpmv.MiniCPMV2_5 method) (vllm.model_executor.models.minicpmv.MiniCPMV2_6 method) (vllm.model_executor.models.minicpmv.MiniCPMVBaseModel method) init_vision_tower_for_llava() (in module vllm.model_executor.models.llava) (in module vllm.model_executor.models.mistral3) init_vllm_registered_model() (in module vllm.model_executor.models.utils) init_with_cudagraph_sizes() (vllm.config.CompilationConfig method) init_worker() (vllm.worker.worker_base.WorkerWrapperBase method) init_worker_distributed_environment() (in module vllm.v1.worker.gpu_worker) (in module vllm.worker.hpu_worker) (in module vllm.worker.worker) (vllm.worker.xpu_worker.XPUWorker method) init_world_group() (in module vllm.distributed.parallel_state) INITIAL_INCREMENTAL_DETOKENIZATION_OFFSET (in module vllm.transformers_utils.detokenizer_utils) initialize() (vllm.distributed.kv_transfer.kv_pipe.mooncake_pipe.MooncakeTransferEngine method) initialize_cache() (vllm.compilation.backends.CompilerManager method) (vllm.compilation.compiler_interface.CompilerInterface method) (vllm.compilation.compiler_interface.InductorAdaptor method) (vllm.compilation.compiler_interface.InductorStandaloneAdaptor method) (vllm.executor.executor_base.ExecutorBase method) (vllm.spec_decode.proposer_worker_base.NonLLMProposerWorkerBase method) (vllm.spec_decode.smaller_tp_proposer_worker.SmallerTpProposerWorker method) (vllm.spec_decode.spec_decode_worker.SpecDecodeWorker method) (vllm.worker.cpu_worker.CPUWorker method) (vllm.worker.hpu_worker.HPUWorker method) (vllm.worker.neuron_worker.NeuronWorker method) (vllm.worker.tpu_worker.TPUWorker method) (vllm.worker.worker.Worker method) (vllm.worker.worker_base.DelegateWorkerBase method) (vllm.worker.worker_base.WorkerBase method) initialize_dummy_weights() (in module vllm.model_executor.model_loader.weight_utils) initialize_from_config() (vllm.v1.executor.abstract.Executor method) (vllm.v1.worker.gpu_worker.Worker method) (vllm.v1.worker.tpu_worker.TPUWorker method) (vllm.worker.worker_base.WorkerWrapperBase method) initialize_kv_cache() (vllm.v1.worker.gpu_model_runner.GPUModelRunner method) (vllm.v1.worker.tpu_model_runner.TPUModelRunner method) initialize_model() (in module vllm.model_executor.model_loader.utils) initialize_model_parallel() (in module vllm.distributed.parallel_state) initialize_ray_cluster() (in module vllm.executor.ray_utils) inject_24() (in module vllm.model_executor.layers.quantization.utils.marlin_utils_test_24) inplace_fused_experts() (in module vllm.model_executor.layers.fused_moe.fused_moe) inplace_fused_experts_fake() (in module vllm.model_executor.layers.fused_moe.fused_moe) InprocClient (class in vllm.v1.engine.core_client) input (vllm.entrypoints.openai.protocol.ClassificationRequest attribute) (vllm.entrypoints.openai.protocol.EmbeddingCompletionRequest attribute) input_addresses (vllm.compilation.backends.ConcreteSizeEntry attribute) input_block_ids (vllm.worker.neuron_model_runner.ModelInputForNeuron attribute) input_buffers (vllm.compilation.backends.VllmBackend attribute) input_dim (vllm.lora.lora.LoRALayerWeights property) (vllm.lora.lora.PackedLoRALayerWeights property) (vllm.model_executor.parameter.RowvLLMParameter property) (vllm.transformers_utils.configs.deepseek_vl2.MlpProjectorConfig attribute) input_features (vllm.model_executor.models.granite_speech.GraniteSpeechAudioInputs attribute) (vllm.model_executor.models.qwen2_audio.Qwen2AudioInputs attribute) (vllm.model_executor.models.whisper.WhisperAudioInputs attribute) input_features_mask (vllm.model_executor.models.granite_speech.GraniteSpeechAudioInputs attribute) input_ids (vllm.transformers_utils.tokenizers.mistral.Encoding attribute) input_lens (vllm.worker.tpu_model_runner.ModelInputForTPU attribute) input_positions (vllm.attention.backends.cpu_mla.CPUMLAMetadata attribute) (vllm.worker.cpu_model_runner.ModelInputForCPU attribute) (vllm.worker.hpu_model_runner.ModelInputForHPU attribute) (vllm.worker.hpu_model_runner.PrepareDecodeMetadata attribute) (vllm.worker.hpu_model_runner.PreparePromptMetadata attribute) (vllm.worker.model_runner.ModelInputForGPU attribute) (vllm.worker.neuron_model_runner.ModelInputForNeuron attribute) (vllm.worker.xpu_model_runner.ModelInputForXPU attribute) INPUT_PROCESSED (vllm.v1.stats.common.RequestStatsUpdate.Type attribute) input_processor_end_ts_s (vllm.v1.stats.common.RequestStats attribute) input_queue_size (vllm.v1.stats.common.EngineCoreProcessStats attribute) INPUT_REGISTRY (in module vllm.inputs) input_socket (vllm.v1.engine.core_client.BackgroundResources attribute) input_symmetric (vllm.model_executor.layers.quantization.kernels.scaled_mm.ScaledMMLinearKernel.ScaledMMLinearLayerConfig attribute) input_to_float8() (in module vllm.model_executor.layers.quantization.utils.fp8_utils) input_to_int8() (in module vllm.model_executor.layers.quantization.utils.int8_utils) input_tokens (vllm.worker.cpu_model_runner.ModelInputForCPU attribute) (vllm.worker.hpu_model_runner.ModelInputForHPU attribute) (vllm.worker.hpu_model_runner.PrepareDecodeMetadata attribute) (vllm.worker.hpu_model_runner.PreparePromptMetadata attribute) (vllm.worker.model_runner.ModelInputForGPU attribute) (vllm.worker.neuron_model_runner.ModelInputForNeuron attribute) (vllm.worker.xpu_model_runner.ModelInputForXPU attribute) InputBatch (class in vllm.v1.worker.gpu_input_batch) InputContext (class in vllm.inputs.registry) InputPreprocessor (class in vllm.inputs.preprocess) InputProcessingContext (class in vllm.inputs.registry) InputProcessingError InputRegistry (class in vllm.inputs.registry) inputs_embeds (vllm.worker.model_runner.ModelInputForGPU attribute) INSERT (vllm.multimodal.processing.UpdateMode attribute) insert() (vllm.distributed.kv_transfer.kv_connector.simple_connector.SimpleConnector method) (vllm.distributed.kv_transfer.kv_lookup_buffer.base.KVLookupBufferBase method) (vllm.distributed.kv_transfer.kv_lookup_buffer.simple_buffer.SimpleBuffer method) (vllm.v1.utils.ConstantList method) insert_auto_fn() (vllm.compilation.multi_output_match.MultiOutputMatch method) insert_defunctionalized() (vllm.compilation.fix_functionalization.FixFunctionalizationPass method) insert_fused_node() (vllm.compilation.fusion.QuantMultiOutputMatch method) insert_getitems() (vllm.compilation.multi_output_match.MultiOutputMatch method) inserting_after_match() (vllm.compilation.multi_output_match.MultiOutputMatch method) insertion (vllm.multimodal.processing.PromptInsertion attribute) instance (vllm.device_allocator.cumem.CuMemAllocator attribute) instance() (vllm.compilation.fusion.FusionPass class method) instance_id (vllm.config.VllmConfig attribute) InstructCoderDataset (class in vllm.benchmarks.datasets) int4 (vllm.scalar_type.scalar_types attribute) int8 (vllm.scalar_type.scalar_types attribute) Int8TpuConfig (class in vllm.model_executor.layers.quantization.tpu_int8) int_() (vllm.scalar_type.ScalarType class method) intermediate_size (vllm.model_executor.models.pixtral.VisionEncoderArgs attribute) (vllm.model_executor.models.plamo2.Plamo2Config attribute) IntermediateTensors (class in vllm.sequence) InternLM2Attention (class in vllm.model_executor.models.internlm2) InternLM2ForCausalLM (class in vllm.model_executor.models.internlm2) InternLM2ForRewardModel (class in vllm.model_executor.models.internlm2) InternLM2MLP (class in vllm.model_executor.models.internlm2) InternLM2Model (class in vllm.model_executor.models.internlm2) Internlm2ToolParser (class in vllm.entrypoints.openai.tool_parsers.internlm2_tool_parser) InternLM2VEDecoderLayer (class in vllm.model_executor.models.internlm2_ve) InternLM2VEForCausalLM (class in vllm.model_executor.models.internlm2_ve) InternLM2VEModel (class in vllm.model_executor.models.internlm2_ve) InternLMDecoderLayer (class in vllm.model_executor.models.internlm2) InternMLP (class in vllm.model_executor.models.intern_vit) InternParallelAttention (class in vllm.model_executor.models.intern_vit) InternSdpaAttention (class in vllm.model_executor.models.intern_vit) InternVisionEmbeddings (class in vllm.model_executor.models.intern_vit) InternVisionEncoder (class in vllm.model_executor.models.intern_vit) InternVisionEncoderLayer (class in vllm.model_executor.models.intern_vit) InternVisionModel (class in vllm.model_executor.models.intern_vit) InternVisionPatchModel (class in vllm.model_executor.models.intern_vit) InternVLChatConfig (class in vllm.transformers_utils.configs.internvl) InternVLChatModel (class in vllm.model_executor.models.internvl) InternVLDummyInputsBuilder (class in vllm.model_executor.models.internvl) InternVLImageEmbeddingInputs (class in vllm.model_executor.models.internvl) InternVLImageInputs (in module vllm.model_executor.models.internvl) InternVLImagePixelInputs (class in vllm.model_executor.models.internvl) InternVLMultiModalProcessor (class in vllm.model_executor.models.internvl) InternVLProcessingInfo (class in vllm.model_executor.models.internvl) InternVLProcessor (class in vllm.model_executor.models.internvl) interpolate_pos_encoding() (vllm.model_executor.models.siglip.SiglipVisionEmbeddings method) inv_sqrt_d_k (vllm.model_executor.models.phi4mm_utils.MultiHeadedAttention attribute) INVALID_TOKEN_ID (in module vllm.v1.worker.tpu_model_runner) inverse_packed_mapping (vllm.model_executor.model_loader.utils.ParamMapping attribute) invocations (vllm.profiler.layerwise_profile.SummaryStatsEntry attribute) invocations() (in module vllm.entrypoints.openai.api_server) invoke_fused_moe_kernel() (in module vllm.model_executor.layers.fused_moe.fused_moe) ip (vllm.executor.ray_distributed_executor.RayWorkerMetaData attribute) IPC_DATA_EXT (in module vllm.engine.multiprocessing) IPC_HEALTH_EXT (in module vllm.engine.multiprocessing) IPC_INPUT_EXT (in module vllm.engine.multiprocessing) IPC_OUTPUT_EXT (in module vllm.engine.multiprocessing) ipex_ops (class in vllm._ipex_ops) IPEX_QUANT_METHOD_MAP (vllm.model_executor.layers.quantization.ipex_quant.IPEXConfig attribute) IpexAttnBackend (class in vllm.attention.backends.ipex_attn) IpexAttnBackendImpl (class in vllm.attention.backends.ipex_attn) IpexAttnMetadata (class in vllm.attention.backends.ipex_attn) IPEXAWQLinearMethod (class in vllm.model_executor.layers.quantization.ipex_quant) IPEXConfig (class in vllm.model_executor.layers.quantization.ipex_quant) IPEXGPTQLinearMethod (class in vllm.model_executor.layers.quantization.ipex_quant) is_activation_quantization_format() (in module vllm.model_executor.layers.quantization.compressed_tensors.utils) is_aiter_mla_enabled() (in module vllm.attention.backends.rocm_aiter_mla) (in module vllm.v1.attention.backends.mla.rocm_aiter_mla) is_all_cross_attn_metadata_set (vllm.attention.backends.flash_attn.FlashAttentionMetadata property) (vllm.attention.backends.torch_sdpa.TorchSDPAMetadata property) (vllm.attention.backends.xformers.XFormersMetadata property) is_all_cross_attn_metadata_set() (in module vllm.attention.backends.utils) is_all_encoder_attn_metadata_set (vllm.attention.backends.flash_attn.FlashAttentionMetadata property) (vllm.attention.backends.torch_sdpa.TorchSDPAMetadata property) (vllm.attention.backends.xformers.XFormersMetadata property) is_all_encoder_attn_metadata_set() (in module vllm.attention.backends.utils) is_appendable() (vllm.core.block.common.CopyOnWriteTracker method) is_applicable_for_shape() (vllm.compilation.inductor_pass.InductorPass method) (vllm.compilation.sequence_parallelism.SequenceParallelismPass method) is_async (vllm.engine.llm_engine.OutputData attribute) is_async_output_supported() (vllm.platforms.cpu.CpuPlatform class method) (vllm.platforms.cuda.CudaPlatformBase class method) (vllm.platforms.hpu.HpuPlatform class method) (vllm.platforms.interface.Platform class method) (vllm.platforms.neuron.NeuronPlatform class method) (vllm.platforms.rocm.RocmPlatform class method) (vllm.platforms.tpu.TpuPlatform class method) (vllm.platforms.xpu.XPUPlatform class method) is_attention_free (vllm.config.CacheConfig attribute) (vllm.model_executor.models.interfaces.IsAttentionFree attribute) is_attention_free() (in module vllm.model_executor.models.interfaces) is_awq_marlin_compatible() (vllm.model_executor.layers.quantization.awq_marlin.AWQMarlinConfig class method) is_base_model() (vllm.entrypoints.openai.serving_models.OpenAIServingModels method) is_block_cached() (vllm.core.block.prefix_caching_block.PrefixCachingBlockAllocator method) is_block_tables_empty() (in module vllm.attention.backends.utils) is_blocking (vllm.entrypoints.openai.protocol.ModelPermission attribute) is_channelwise (vllm.model_executor.layers.quantization.kernels.scaled_mm.ScaledMMLinearKernel.ScaledMMLinearLayerConfig attribute) is_complete_json() (in module vllm.entrypoints.openai.tool_parsers.utils) is_composition (vllm.transformers_utils.configs.internvl.InternVLChatConfig attribute) (vllm.transformers_utils.configs.skyworkr1v.SkyworkR1VChatConfig attribute) (vllm.transformers_utils.configs.ultravox.UltravoxConfig attribute) IS_COMPUTE_8_OR_ABOVE (in module vllm.attention.ops.blocksparse_attention.interface) is_cpu() (vllm.platforms.interface.Platform method) is_cross_encoder (vllm.config.ModelConfig property) is_cuda() (vllm.platforms.interface.Platform method) is_cuda_alike() (vllm.platforms.interface.Platform method) is_deepseek_mla (vllm.config.ModelConfig property) is_driver_worker (vllm.worker.worker_base.LocalOrDistributedWorkerBase attribute) is_embed (vllm.multimodal.inputs.PlaceholderRange attribute) (vllm.multimodal.processing.PlaceholderFeaturesInfo attribute) (vllm.multimodal.processing.PromptUpdateDetails attribute) is_empty() (vllm.core.scheduler.SchedulerOutputs method) is_enabled (vllm.adapter_commons.worker_manager.AbstractWorkerManager property) (vllm.lora.worker_manager.WorkerLoRAManager property) (vllm.prompt_adapter.worker_manager.WorkerPromptAdapterManager property) is_encoder_decoder (vllm.config.ModelConfig property) is_encoder_decoder() (in module vllm.transformers_utils.config) (vllm.sequence.SequenceGroup method) is_engine_errored (vllm.engine.multiprocessing.RPCError attribute) is_explicit_encoder_decoder_prompt() (in module vllm.inputs.parse) is_fast (vllm.transformers_utils.tokenizer_base.TokenizerBase property) (vllm.transformers_utils.tokenizers.mistral.MistralTokenizer property) is_finished (vllm.v1.stats.common.RequestStats property) is_finished() (vllm.sequence.Sequence method) (vllm.sequence.SequenceGroup method) (vllm.sequence.SequenceStatus static method) (vllm.v1.request.Request method) (vllm.v1.request.RequestStatus static method) is_first_multi_step (vllm.sequence.ExecuteModelRequest property) (vllm.worker.hpu_model_runner.ModelInputForHPU attribute) (vllm.worker.multi_step_model_runner.StatefulModelInput attribute) (vllm.worker.tpu_model_runner.ModelInputForTPU attribute) is_first_rank (vllm.distributed.parallel_state.GroupCoordinator property) is_first_step_output (vllm.engine.llm_engine.OutputData attribute) is_flashmla_supported() (in module vllm.attention.ops.flashmla) is_floating_point() (vllm.scalar_type.ScalarType method) is_fp4_marlin_supported() (in module vllm.model_executor.layers.quantization.utils.marlin_utils_fp4) is_fp8() (in module vllm.model_executor.layers.quantization.utils.fp8_utils) is_fp8_fnuz() (vllm.platforms.interface.Platform class method) (vllm.platforms.rocm.RocmPlatform class method) is_fp8_marlin_supported() (in module vllm.model_executor.layers.quantization.utils.marlin_utils_fp8) is_full (vllm.core.block.cpu_gpu_block_allocator.NullBlock property) (vllm.core.block.interfaces.Block property) (vllm.core.block.naive_block.NaiveBlock property) (vllm.core.block.prefix_caching_block.PrefixCachingBlock property) is_fully_connected() (vllm.platforms.cuda.CudaPlatformBase class method) (vllm.platforms.cuda.NonNvmlCudaPlatform class method) (vllm.platforms.cuda.NvmlCudaPlatform class method) (vllm.platforms.rocm.RocmPlatform static method) is_func() (in module vllm.compilation.fx_utils) is_gptq_bitblas_compatible() (vllm.model_executor.layers.quantization.gptq_bitblas.GPTQBitBLASConfig class method) is_gptq_marlin_compatible() (vllm.model_executor.layers.quantization.gptq_marlin.GPTQMarlinConfig class method) is_grammar_ready (vllm.v1.structured_output.request.StructuredOutputRequest property) is_hip (in module vllm.attention.backends.mla.common) is_hip_ (in module vllm.attention.ops.triton_decode_attention) is_hpu() (vllm.platforms.interface.Platform method) is_hybrid (vllm.model_executor.models.interfaces.IsHybrid attribute) is_hybrid() (in module vllm.model_executor.models.interfaces) is_ieee_754() (vllm.scalar_type.ScalarType method) is_in_doc_build() (in module vllm.utils) is_in_ray_actor() (in module vllm.utils) is_init_field() (in module vllm.config) is_integer() (vllm.scalar_type.ScalarType method) is_kv_cache_type_uniform() (in module vllm.v1.core.kv_cache_utils) is_kv_consumer (vllm.config.KVTransferConfig property) is_kv_producer (vllm.config.KVTransferConfig property) is_kv_transfer_instance (vllm.config.KVTransferConfig property) is_last_rank (vllm.distributed.parallel_state.GroupCoordinator property) is_last_step (vllm.engine.llm_engine.OutputData attribute) (vllm.sequence.ExecuteModelRequest property) (vllm.worker.hpu_model_runner.ModelInputForHPU attribute) (vllm.worker.multi_step_model_runner.StatefulModelInput attribute) (vllm.worker.tpu_model_runner.ModelInputForTPU attribute) is_layer_excluded() (vllm.model_executor.layers.quantization.modelopt.ModelOptNvFp4Config method) is_layer_skipped() (in module vllm.model_executor.layers.quantization.utils.quant_utils) (vllm.model_executor.layers.quantization.hqq_marlin.HQQMarlinConfig method) is_layer_skipped_awq() (in module vllm.model_executor.layers.quantization.awq) is_layer_skipped_bnb() (in module vllm.model_executor.layers.quantization.bitsandbytes) is_layer_skipped_quant() (in module vllm.model_executor.layers.quantization.moe_wna16) is_list_of() (in module vllm.utils) is_mamba() (in module vllm.model_executor.models.plamo2) is_matryoshka (vllm.config.ModelConfig property) is_moe_wna16_compatible() (vllm.model_executor.layers.quantization.moe_wna16.MoeWNA16Config class method) is_multi_step (vllm.config.SchedulerConfig property) (vllm.worker.multi_step_model_runner.StatefulModelInput attribute) is_multimodal_model (vllm.config.ModelConfig property) (vllm.config.SchedulerConfig attribute) is_neuron() (vllm.platforms.interface.Platform method) is_neuronx_distributed_inference() (vllm.platforms.neuron.NeuronPlatform class method) is_not_builtin() (in module vllm.engine.arg_utils) is_otel_available() (in module vllm.tracing) is_out_of_tree() (vllm.platforms.interface.Platform method) is_packed (vllm.lora.lora.LoRALayerWeights property) (vllm.lora.lora.PackedLoRALayerWeights property) is_pin_memory_available() (in module vllm.utils) (vllm.platforms.cpu.CpuPlatform class method) (vllm.platforms.hpu.HpuPlatform class method) (vllm.platforms.interface.Platform class method) (vllm.platforms.neuron.NeuronPlatform class method) (vllm.platforms.tpu.TpuPlatform class method) (vllm.platforms.xpu.XPUPlatform class method) is_pooling_model() (in module vllm.model_executor.models.interfaces_base) is_power_of_2() (in module vllm.attention.ops.nki_flash_attn) is_pp_missing_parameter() (in module vllm.model_executor.models.utils) is_prefill (vllm.lora.layers.LoRAMapping attribute) is_prefill() (vllm.sequence.Sequence method) (vllm.sequence.SequenceGroup method) (vllm.worker.tpu_model_runner.ExecutionMode method) is_profile_run (vllm.attention.backends.cpu_mla.CPUMLAMetadata attribute) (vllm.attention.backends.flashinfer.FlashInferMetadata attribute) (vllm.attention.backends.mla.common.MLACommonMetadata attribute) is_prompt (vllm.attention.backends.hpu_attn.HPUAttentionMetadata attribute) (vllm.attention.backends.ipex_attn.IpexAttnMetadata attribute) (vllm.model_executor.sampling_metadata.SequenceGroupToSample attribute) (vllm.sequence.SequenceGroupMetadata attribute) (vllm.sequence.SequenceGroupMetadataDelta attribute) (vllm.worker.cpu_model_runner.ModelInputForCPUWithSamplingMetadata attribute) (vllm.worker.hpu_model_runner.ModelInputForHPUWithSamplingMetadata attribute) (vllm.worker.model_runner.ModelInputForGPUWithSamplingMetadata attribute) is_quantized_kv_cache() (in module vllm.attention.backends.abstract) is_reasoning_end() (vllm.reasoning.abs_reasoning_parsers.ReasoningParser method) (vllm.reasoning.deepseek_r1_reasoning_parser.DeepSeekR1ReasoningParser method) (vllm.reasoning.qwen3_reasoning_parser.Qwen3ReasoningParser method) is_regex_target_modules() (in module vllm.lora.utils) is_rocm() (vllm.platforms.interface.Platform method) is_rocm_aiter_moe_enabled() (in module vllm.model_executor.layers.fused_moe.rocm_aiter_fused_moe) is_rocm_aiter_paged_attn_enabled() (in module vllm.attention.backends.rocm_flash_attn) is_rocm_aiter_rmsnorm_enabled() (in module vllm.model_executor.layers.layernorm) is_rocm_cdna() (in module vllm.attention.ops.triton_flash_attention) is_running (vllm.engine.async_llm_engine.AsyncLLMEngine property) (vllm.engine.multiprocessing.client.MQLLMEngineClient property) (vllm.engine.protocol.EngineClient property) (vllm.v1.engine.async_llm.AsyncLLM property) is_s3() (in module vllm.transformers_utils.utils) IS_SERVER_READY (vllm.engine.multiprocessing.RPCStartupRequest attribute) is_set() (in module vllm.envs) is_signed() (vllm.scalar_type.ScalarType method) is_single_step_prompt (vllm.sequence.SequenceGroupMetadata property) is_sleep_mode_available() (vllm.platforms.interface.Platform method) is_sleeping (vllm.engine.multiprocessing.RPCIsSleepingResponse attribute) is_sleeping() (vllm.engine.async_llm_engine.AsyncLLMEngine method) (vllm.engine.llm_engine.LLMEngine method) (vllm.engine.multiprocessing.client.MQLLMEngineClient method) (vllm.engine.multiprocessing.engine.MQLLMEngine method) (vllm.engine.protocol.EngineClient method) (vllm.v1.engine.async_llm.AsyncLLM method) (vllm.v1.engine.core.EngineCore method) (vllm.v1.engine.core_client.EngineCoreClient method) (vllm.v1.engine.core_client.InprocClient method) (vllm.v1.engine.core_client.SyncMPClient method) (vllm.v1.engine.llm_engine.LLMEngine method) is_sleeping_async() (vllm.v1.engine.core_client.AsyncMPClient method) (vllm.v1.engine.core_client.EngineCoreClient method) is_spec_decode_supported() (in module vllm.v1.spec_decode.utils) is_splitting_graph (vllm.compilation.backends.SplitItem attribute) is_static_input_scheme (vllm.model_executor.layers.quantization.kernels.scaled_mm.ScaledMMLinearKernel.ScaledMMLinearLayerConfig attribute) is_stopped (vllm.engine.async_llm_engine.AsyncLLMEngine property) (vllm.engine.multiprocessing.client.MQLLMEngineClient property) (vllm.engine.protocol.EngineClient property) (vllm.v1.engine.async_llm.AsyncLLM property) is_store (vllm.distributed.kv_transfer.kv_connector.v1.shared_storage_connector.ReqMeta attribute) is_terminated() (vllm.v1.structured_output.backend_guidance.GuidanceGrammar method) (vllm.v1.structured_output.backend_types.StructuredOutputGrammar method) (vllm.v1.structured_output.backend_xgrammar.XgrammarGrammar method) is_text_generation_model() (in module vllm.model_executor.models.interfaces_base) is_tokens (vllm.inputs.parse.ParsedText attribute) (vllm.inputs.parse.ParsedTokens attribute) is_torch_equal_or_newer() (in module vllm.utils) is_tpu() (vllm.platforms.interface.Platform method) is_tracing_enabled() (vllm.engine.async_llm_engine.AsyncLLMEngine method) (vllm.engine.llm_engine.LLMEngine method) (vllm.engine.multiprocessing.client.MQLLMEngineClient method) (vllm.engine.protocol.EngineClient method) (vllm.v1.engine.async_llm.AsyncLLM method) is_transformers_neuronx() (vllm.platforms.neuron.NeuronPlatform class method) IS_TURING (in module vllm.attention.ops.prefix_prefill) is_type() (in module vllm.engine.arg_utils) is_unsupported_config() (vllm.engine.multiprocessing.client.MQLLMEngineClient static method) is_usage_stats_enabled() (in module vllm.usage.usage_lib) is_uva_available() (in module vllm.utils) is_v1_compatible (vllm.config.ModelConfig property) is_v1_kv_transfer_group() (in module vllm.distributed.kv_transfer.kv_transfer_state) is_valid_id() (vllm.entrypoints.openai.tool_parsers.mistral_tool_parser.MistralToolCall static method) is_valid_ipv6_address() (in module vllm.utils) is_valid_sequence() (in module vllm.benchmarks.datasets) is_vllm_model() (in module vllm.model_executor.models.interfaces_base) is_vllm_tensorized() (in module vllm.model_executor.model_loader.tensorizer) is_weak_contiguous() (in module vllm.distributed.device_communicators.custom_all_reduce) (in module vllm.model_executor.layers.quantization.compressed_tensors.triton_scaled_mm) is_xnnpack_available() (in module vllm.collect_env) is_xpu() (vllm.platforms.interface.Platform method) IsAttentionFree (class in vllm.model_executor.models.interfaces) IsHybrid (class in vllm.model_executor.models.interfaces) ISO639_1_OTHER_LANGS (in module vllm.entrypoints.openai.serving_transcription) ISO639_1_SUPPORTED_LANGS (in module vllm.entrypoints.openai.serving_transcription) item_idx (vllm.multimodal.processing.PlaceholderFeaturesInfo attribute) item_to_bytes() (vllm.multimodal.hasher.MultiModalHasher class method) items() (vllm.sequence.IntermediateTensors method) iter_item_to_bytes() (vllm.multimodal.hasher.MultiModalHasher class method) iter_token_matches() (in module vllm.multimodal.processing) iterate_over_files() (vllm.model_executor.model_loader.sharded_state_loader.ShardedStateLoader method) IterationStats (class in vllm.v1.metrics.stats) iterencode() (vllm.benchmarks.utils.InfEncoder method) itl (vllm.benchmarks.endpoint_request_func.RequestFuncOutput attribute) J JAISAttention (class in vllm.model_executor.models.jais) JAISBlock (class in vllm.model_executor.models.jais) JAISConfig (class in vllm.transformers_utils.configs.jais) JAISLMHeadModel (class in vllm.model_executor.models.jais) JAISMLP (class in vllm.model_executor.models.jais) JAISModel (class in vllm.model_executor.models.jais) JambaAttentionDecoderLayer (class in vllm.model_executor.models.jamba) JambaForCausalLM (class in vllm.model_executor.models.jamba) JambaForSequenceClassification (class in vllm.model_executor.models.jamba) JambaMambaDecoderLayer (class in vllm.model_executor.models.jamba) JambaMLP (class in vllm.model_executor.models.jamba) JambaModel (class in vllm.model_executor.models.jamba) JambaMoE (class in vllm.model_executor.models.jamba) JambaToolParser (class in vllm.entrypoints.openai.tool_parsers.jamba_tool_parser) jina_merge_lora_weights() (vllm.model_executor.models.bert_with_rope.JinaRobertaModel method) jina_to_vllm_mapper (vllm.model_executor.models.roberta.RobertaForSequenceClassification attribute) JinaRobertaModel (class in vllm.model_executor.models.bert_with_rope) jit_linear_forward_prefix() (vllm.model_executor.models.minimax_text_01.MiniMaxText01LinearKernel static method) JOIN_TIMEOUT_S (in module vllm.executor.multiproc_worker_utils) JSON (vllm.model_executor.guided_decoding.outlines_decoding.GuidedDecodingMode attribute) json (vllm.sampling_params.GuidedDecodingParams attribute) JSON (vllm.v1.structured_output.backend_types.StructuredOutputOptions attribute) JSON_GRAMMAR (in module vllm.model_executor.guided_decoding.outlines_decoding) json_iter_leaves() (in module vllm.jsontree) json_map_leaves() (in module vllm.jsontree) json_object (vllm.model_executor.guided_decoding.xgrammar_decoding.GrammarConfig attribute) (vllm.sampling_params.GuidedDecodingParams attribute) JSON_OBJECT (vllm.v1.structured_output.backend_types.StructuredOutputOptions attribute) json_reduce_leaves() (in module vllm.jsontree) json_schema (vllm.entrypoints.openai.protocol.JsonSchemaResponseFormat attribute) (vllm.entrypoints.openai.protocol.ResponseFormat attribute) json_str (vllm.model_executor.guided_decoding.xgrammar_decoding.GrammarConfig attribute) JSONLogitsProcessor (class in vllm.model_executor.guided_decoding.outlines_logits_processors) JsonSchemaResponseFormat (class in vllm.entrypoints.openai.protocol) JSONTree (in module vllm.jsontree) K k_proj (vllm.model_executor.models.module_mapping.ModelKeys attribute) kE2M1ToFloat (in module vllm.model_executor.layers.quantization.utils.nvfp4_emulation_utils) kernel_paged_attention_2d() (in module vllm.attention.ops.chunked_prefill_paged_decode) kernel_type (vllm.model_executor.layers.quantization.gptq_bitblas.GPTQBitBLASLinearMethod attribute) kernel_unified_attention_2d() (in module vllm.attention.ops.triton_unified_attention) key (vllm.multimodal.inputs.MultiModalFieldElem attribute) (vllm.multimodal.processing.ProcessingCacheItem attribute) (vllm.multimodal.processing.ProcessingCacheOptionalItem attribute) keys_to_ignore_at_inference (vllm.model_executor.models.phimoe.PhiMoEConfig attribute) (vllm.transformers_utils.configs.arctic.ArcticConfig attribute) (vllm.transformers_utils.configs.cohere2.Cohere2Config attribute) (vllm.transformers_utils.configs.deepseek_vl2.DeepseekV2Config attribute) (vllm.transformers_utils.configs.exaone.ExaoneConfig attribute) (vllm.transformers_utils.configs.falcon.RWConfig attribute) (vllm.transformers_utils.configs.jais.JAISConfig attribute) (vllm.transformers_utils.configs.minimax_text_01.MiniMaxText01Config attribute) (vllm.transformers_utils.configs.nemotron.NemotronConfig attribute) (vllm.transformers_utils.configs.solar.SolarConfig attribute) (vllm.transformers_utils.configs.telechat2.Telechat2Config attribute) kFp8DynamicTensorSym (in module vllm.compilation.fusion) kFp8DynamicTokenSym (in module vllm.compilation.fusion) kFp8StaticTensorSym (in module vllm.compilation.fusion) kill_process_tree() (in module vllm.utils) kill_worker() (vllm.executor.multiproc_worker_utils.ProcessWorkerWrapper method) KimiVLConfig (class in vllm.transformers_utils.configs.kimi_vl) KimiVLDummyInputsBuilder (class in vllm.model_executor.models.kimi_vl) KimiVLForConditionalGeneration (class in vllm.model_executor.models.kimi_vl) KimiVLImageInputs (in module vllm.model_executor.models.kimi_vl) KimiVLImagePixelInputs (class in vllm.model_executor.models.kimi_vl) KimiVLMultiModalProcessor (class in vllm.model_executor.models.kimi_vl) KimiVLMultiModalProjector (class in vllm.model_executor.models.kimi_vl) KimiVLProcessingInfo (class in vllm.model_executor.models.kimi_vl) KQUANT_TYPES (in module vllm.model_executor.layers.quantization.gguf) kv_buffer_device (vllm.config.KVTransferConfig attribute) kv_buffer_size (vllm.config.KVTransferConfig attribute) kv_cache (vllm.model_executor.layers.quantization.schema.QuantParamSchema attribute) (vllm.worker.cpu_worker.CPUWorker property) (vllm.worker.hpu_worker.HPUWorker property) (vllm.worker.neuron_worker.NeuronWorker property) (vllm.worker.tpu_worker.TPUWorker property) (vllm.worker.worker.Worker property) (vllm.worker.worker_base.LocalOrDistributedWorkerBase property) kv_cache_dtype (vllm.engine.arg_utils.EngineArgs attribute) kv_cache_groups (vllm.v1.kv_cache_interface.KVCacheConfig attribute) KV_CACHE_QUANT_ALGOS (in module vllm.model_executor.layers.quantization.modelopt) kv_cache_spec (vllm.v1.kv_cache_interface.KVCacheGroupSpec attribute) kv_cache_stats (vllm.v1.stats.common.SchedulerStats attribute) kv_caches_base_addr (vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector.NixlAgentMetadata attribute) kv_connector (vllm.config.KVTransferConfig attribute) kv_connector_extra_config (vllm.config.KVTransferConfig attribute) kv_connector_metadata (vllm.v1.core.sched.output.SchedulerOutput attribute) kv_connector_no_forward() (vllm.v1.worker.gpu_model_runner.GPUModelRunner method) kv_events_config (vllm.config.VllmConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) kv_ip (vllm.config.KVTransferConfig attribute) kv_lora_rank (vllm.attention.backends.utils.MLADims attribute) kv_parallel_size (vllm.config.KVTransferConfig attribute) kv_port (vllm.config.KVTransferConfig attribute) kv_proj_encoder (vllm.model_executor.layers.linear.QKVCrossParallelLinear property) kv_rank (vllm.config.KVTransferConfig attribute) kv_role (vllm.config.KVTransferConfig attribute) kv_start_loc (vllm.attention.backends.torch_sdpa.TorchSDPAMetadata attribute) kv_transfer_config (vllm.config.VllmConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) kv_transfer_params (vllm.entrypoints.openai.protocol.ChatCompletionRequest attribute) (vllm.entrypoints.openai.protocol.ChatCompletionResponse attribute) (vllm.entrypoints.openai.protocol.CompletionRequest attribute) (vllm.entrypoints.openai.protocol.CompletionResponse attribute) (vllm.v1.engine.EngineCoreOutput attribute) kva_proj (vllm.model_executor.models.module_mapping.ModelKeys attribute) kvb_proj (vllm.model_executor.models.module_mapping.ModelKeys attribute) KVCache (in module vllm.model_executor.models.mamba) (in module vllm.model_executor.models.mamba2) KVCacheBlock (class in vllm.v1.core.kv_cache_utils) KVCacheBlocks (class in vllm.v1.core.kv_cache_manager) KVCacheBufferBase (class in vllm.distributed.kv_transfer.kv_lookup_buffer.base) KVCacheConfig (class in vllm.v1.kv_cache_interface) KVCacheEvent (class in vllm.distributed.kv_events) KVCacheGroupSpec (class in vllm.v1.kv_cache_interface) KVCacheManager (class in vllm.v1.core.kv_cache_manager) KVCacheQuantSchema (class in vllm.model_executor.layers.quantization.schema) KVCacheSpec (class in vllm.v1.kv_cache_interface) KVCacheStats (class in vllm.v1.stats.common) KVCacheTensor (class in vllm.v1.kv_cache_interface) KVConnectorBase (class in vllm.distributed.kv_transfer.kv_connector.base) KVConnectorBase_V1 (class in vllm.distributed.kv_transfer.kv_connector.v1.base) KVConnectorBaseType (in module vllm.distributed.kv_transfer.kv_connector.base) KVConnectorFactory (class in vllm.distributed.kv_transfer.kv_connector.factory) KVConnectorMetadata (class in vllm.distributed.kv_transfer.kv_connector.v1.base) KVConnectorRole (class in vllm.distributed.kv_transfer.kv_connector.v1.base) KVConsumer (in module vllm.config) KVEventBatch (class in vllm.distributed.kv_events) KVEventsConfig (class in vllm.config) KVLookupBufferBase (class in vllm.distributed.kv_transfer.kv_lookup_buffer.base) KVPipeBase (class in vllm.distributed.kv_transfer.kv_pipe.base) KVProducer (in module vllm.config) KVRole (in module vllm.config) KVStoreBufferBase (class in vllm.distributed.kv_transfer.kv_lookup_buffer.base) KVTransferAgent (class in vllm.distributed.kv_transfer.kv_connector_agent) KVTransferConfig (class in vllm.config) KVTransferParams (class in vllm.distributed.kv_transfer.kv_connector.v1.base) kwargs (vllm.entrypoints.openai.protocol.LogitsProcessorConstructor attribute) L label (vllm.entrypoints.openai.protocol.ClassificationData attribute) labelname_finish_reason (vllm.engine.metrics.Metrics attribute) labelname_max_lora (vllm.engine.metrics.Metrics attribute) labelname_running_lora_adapters (vllm.engine.metrics.Metrics attribute) labelname_waiting_lora_adapters (vllm.engine.metrics.Metrics attribute) language (vllm.entrypoints.openai.protocol.TranscriptionRequest attribute) (vllm.entrypoints.openai.protocol.TranscriptionResponseVerbose attribute) language_model (vllm.model_executor.models.module_mapping.MultiModelKeys attribute) LanguageModelMLP (class in vllm.model_executor.models.molmo) LAST (vllm.model_executor.layers.pooler.PoolingType attribute) last_accessed (vllm.core.block.cpu_gpu_block_allocator.NullBlock property) (vllm.core.block.interfaces.Block property) (vllm.core.block.naive_block.NaiveBlock property) (vllm.core.block.prefix_caching_block.PrefixCachingBlock property) last_logging_time (in module vllm.forward_context) last_output (vllm.engine.llm_engine.SchedulerOutputState attribute) last_rank (vllm.distributed.parallel_state.GroupCoordinator property) last_sampled_token_ids (vllm.sequence.ExecuteModelRequest attribute) (vllm.worker.multi_step_model_runner.StatefulModelInput attribute) last_token_time (vllm.sequence.RequestMetrics attribute) last_token_ts (vllm.v1.metrics.stats.RequestStateStats attribute) last_update_type (vllm.v1.stats.common.RequestStats attribute) last_updated_ts_s (vllm.v1.stats.common.RequestStats attribute) LastAccessBlocksTracker (class in vllm.core.block.prefix_caching_block) LastAllReduceRMSNormPattern (class in vllm.compilation.sequence_parallelism) LastPool (class in vllm.model_executor.layers.pooler) latency (vllm.benchmarks.endpoint_request_func.RequestFuncOutput attribute) LATER (vllm.core.interfaces.AllocStatus attribute) layer_names (vllm.v1.kv_cache_interface.KVCacheGroupSpec attribute) layer_norm_func() (in module vllm.model_executor.models.commandr) LayerBlockType (class in vllm.utils) LayerFn (class in vllm.model_executor.models.utils) LayerNorm (class in vllm.model_executor.models.commandr) layers (vllm.transformers_utils.configs.deepseek_vl2.VisionEncoderConfig attribute) layerwise_profile (class in vllm.profiler.layerwise_profile) LayerwiseProfileResults (class in vllm.profiler.layerwise_profile) layout (vllm.attention.ops.triton_flash_attention.MetaData attribute) LazyDict (class in vllm.utils) LazyLoader (class in vllm.utils) Learnable2DInterpPosEmb (class in vllm.model_executor.models.moonvit) LearnedAbsolutePositionEmbedding2D (class in vllm.model_executor.models.florence2) left_shift_bitsandbytes_4bit_shard() (in module vllm.model_executor.layers.linear) length (vllm.multimodal.inputs.PlaceholderRange attribute) (vllm.multimodal.processing.PlaceholderFeaturesInfo property) LENGTH (vllm.v1.engine.FinishReason attribute) length_penalty (vllm.entrypoints.openai.protocol.ChatCompletionRequest attribute) (vllm.entrypoints.openai.protocol.CompletionRequest attribute) (vllm.sampling_params.BeamSearchParams attribute) lens (vllm.model_executor.models.ultravox.UltravoxAudioFeatureInputs attribute) level (vllm.config.CompilationConfig attribute) lifespan() (in module vllm.entrypoints.openai.api_server) lightning_attention() (in module vllm.model_executor.layers.lightning_attn) lightning_attention_ (in module vllm.model_executor.layers.lightning_attn) limit_mm_per_prompt (vllm.config.ModelConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) limit_per_prompt (vllm.config.MultiModalConfig attribute) linear_decode_forward_triton() (in module vllm.model_executor.layers.lightning_attn) LinearBase (class in vllm.model_executor.layers.linear) LinearMethodBase (class in vllm.model_executor.layers.linear) LinearScalingRotaryEmbedding (class in vllm.model_executor.layers.rotary_embedding) LinearScalingRotaryEmbeddingWithLoRA (class in vllm.lora.layers) list() (vllm.core.block.common.BlockList method) list_adapters() (in module vllm.adapter_commons.utils) (vllm.adapter_commons.models.AdapterModelManager method) (vllm.adapter_commons.worker_manager.AbstractWorkerManager method) (vllm.lora.models.LoRAModelManager method) (vllm.lora.models.LRUCacheLoRAModelManager method) (vllm.lora.worker_manager.WorkerLoRAManager method) (vllm.prompt_adapter.models.LRUCachePromptAdapterModelManager method) (vllm.prompt_adapter.models.PromptAdapterModelManager method) (vllm.prompt_adapter.worker_manager.WorkerPromptAdapterManager method) list_adapters_worker() (in module vllm.adapter_commons.utils) list_files() (in module vllm.transformers_utils.s3_utils) list_local_repo_files() (in module vllm.transformers_utils.tokenizers.mistral) list_loras() (vllm.engine.llm_engine.LLMEngine method) (vllm.executor.executor_base.ExecutorBase method) (vllm.v1.engine.async_llm.AsyncLLM method) (vllm.v1.engine.core.EngineCore method) (vllm.v1.engine.core_client.EngineCoreClient method) (vllm.v1.engine.core_client.InprocClient method) (vllm.v1.engine.core_client.SyncMPClient method) (vllm.v1.engine.llm_engine.LLMEngine method) (vllm.v1.worker.gpu_worker.Worker method) (vllm.v1.worker.lora_model_runner_mixin.LoRAModelRunnerMixin method) (vllm.worker.cpu_model_runner.CPUModelRunnerBase method) (vllm.worker.cpu_worker.CPUWorker method) (vllm.worker.hpu_model_runner.HPUModelRunnerBase method) (vllm.worker.hpu_worker.HPUWorker method) (vllm.worker.model_runner.GPUModelRunnerBase method) (vllm.worker.worker.Worker method) (vllm.worker.worker_base.DelegateWorkerBase method) (vllm.worker.worker_base.LoRANotSupportedWorkerBase method) (vllm.worker.worker_base.WorkerBase method) list_loras_async() (vllm.v1.engine.core_client.AsyncMPClient method) (vllm.v1.engine.core_client.EngineCoreClient method) list_prompt_adapters() (vllm.engine.llm_engine.LLMEngine method) (vllm.executor.executor_base.ExecutorBase method) (vllm.worker.hpu_worker.HPUWorker method) (vllm.worker.model_runner.GPUModelRunnerBase method) (vllm.worker.worker.Worker method) list_repo_files() (in module vllm.transformers_utils.config) listen_for_disconnect() (in module vllm.entrypoints.utils) literal_to_kwargs() (in module vllm.engine.arg_utils) ll_matcher (vllm.v1.structured_output.backend_guidance.GuidanceGrammar attribute) ll_tokenizer (vllm.v1.structured_output.backend_guidance.GuidanceGrammar attribute) Llama3JsonToolParser (class in vllm.entrypoints.openai.tool_parsers.llama_tool_parser) Llama3RotaryEmbedding (class in vllm.model_executor.layers.rotary_embedding) Llama4Attention (class in vllm.model_executor.models.llama4) Llama4DecoderLayer (class in vllm.model_executor.models.llama4) Llama4ForCausalLM (class in vllm.model_executor.models.llama4) Llama4ForConditionalGeneration (class in vllm.model_executor.models.mllama4) Llama4ImagePatchInputs (class in vllm.model_executor.models.mllama4) Llama4Model (class in vllm.model_executor.models.llama4) Llama4MoE (class in vllm.model_executor.models.llama4) Llama4MultiModalProjector (class in vllm.model_executor.models.mllama4) Llama4UnfoldConvolution (class in vllm.model_executor.models.mllama4) Llama4VisionAttention (class in vllm.model_executor.models.mllama4) Llama4VisionEncoder (class in vllm.model_executor.models.mllama4) Llama4VisionEncoderLayer (class in vllm.model_executor.models.mllama4) Llama4VisionMLP (class in vllm.model_executor.models.mllama4) Llama4VisionModel (class in vllm.model_executor.models.mllama4) Llama4VisionPixelShuffleMLP (class in vllm.model_executor.models.mllama4) Llama4VisionRotaryEmbedding (class in vllm.model_executor.layers.rotary_embedding) LlamaAttention (class in vllm.model_executor.models.llama) LlamaDecoderLayer (class in vllm.model_executor.models.llama) (class in vllm.model_executor.models.llama_eagle) (class in vllm.model_executor.models.llama_eagle3) LlamaForCausalLM (class in vllm.model_executor.models.llama) LlamaMLP (class in vllm.model_executor.models.llama) LlamaModel (class in vllm.model_executor.models.llama) (class in vllm.model_executor.models.llama_eagle) (class in vllm.model_executor.models.llama_eagle3) LlavaDummyInputsBuilder (class in vllm.model_executor.models.llava) LlavaForConditionalGeneration (class in vllm.model_executor.models.llava) LlavaImageEmbeddingInputs (class in vllm.model_executor.models.llava) LlavaImageInputs (in module vllm.model_executor.models.llava) LlavaImagePixelInputs (class in vllm.model_executor.models.llava) LlavaLikeConfig (class in vllm.model_executor.models.llava) (class in vllm.model_executor.models.mistral3) LlavaLikeProcessor (class in vllm.model_executor.models.llava) (class in vllm.model_executor.models.mistral3) LlavaMultiModalProcessor (class in vllm.model_executor.models.llava) LlavaMultiModalProjector (class in vllm.model_executor.models.llava) LlavaNextForConditionalGeneration (class in vllm.model_executor.models.llava_next) LlavaNextImageEmbeddingInputs (class in vllm.model_executor.models.llava_next) LlavaNextImageInputs (in module vllm.model_executor.models.llava_next) LlavaNextImagePixelInputs (class in vllm.model_executor.models.llava_next) LlavaNextLikeConfig (class in vllm.model_executor.models.llava_next) LlavaNextMultiModalProcessor (class in vllm.model_executor.models.llava_next) LlavaNextMultiModalProjector (class in vllm.model_executor.models.llava_next_video) LlavaNextProcessingInfo (class in vllm.model_executor.models.llava_next) LlavaNextVideoDummyInputsBuilder (class in vllm.model_executor.models.llava_next_video) LlavaNextVideoForConditionalGeneration (class in vllm.model_executor.models.llava_next_video) LlavaNextVideoMultiModalProcessor (class in vllm.model_executor.models.llava_next_video) LlavaNextVideoPixelInputs (class in vllm.model_executor.models.llava_next_video) LlavaNextVideoPooler (class in vllm.model_executor.models.llava_next_video) LlavaNextVideoProcessingInfo (class in vllm.model_executor.models.llava_next_video) LlavaOnevisionDummyInputsBuilder (class in vllm.model_executor.models.llava_onevision) LlavaOnevisionForConditionalGeneration (class in vllm.model_executor.models.llava_onevision) LlavaOnevisionImageEmbeddingInputs (class in vllm.model_executor.models.llava_onevision) LlavaOnevisionImageInputs (in module vllm.model_executor.models.llava_onevision) LlavaOnevisionImagePixelInputs (class in vllm.model_executor.models.llava_onevision) LlavaOnevisionLikeConfig (class in vllm.model_executor.models.llava_onevision) LlavaOnevisionMultiInputs (in module vllm.model_executor.models.llava_onevision) LlavaOnevisionMultiModalProcessor (class in vllm.model_executor.models.llava_onevision) LlavaOnevisionMultiModalProjector (class in vllm.model_executor.models.llava_onevision) LlavaOnevisionProcessingInfo (class in vllm.model_executor.models.llava_onevision) LlavaOnevisionVideoPixelInputs (class in vllm.model_executor.models.llava_onevision) LlavaProcessingInfo (class in vllm.model_executor.models.llava) LLM (class in vllm.entrypoints.llm) LLM_CLASS (vllm.usage.usage_lib.UsageContext attribute) LLMEngine (class in vllm.engine.llm_engine) (class in vllm.v1.engine.llm_engine) LLMGuidedOptions (class in vllm.model_executor.guided_decoding.guided_fields) LLMM1() (in module vllm._custom_ops) lm_head_weight_loader() (vllm.model_executor.models.baichuan.BaiChuanBaseForCausalLM method) LMCacheConnector (class in vllm.distributed.kv_transfer.kv_connector.lmcache_connector) LMCacheConnectorV1 (class in vllm.distributed.kv_transfer.kv_connector.v1.lmcache_connector) load() (vllm.compilation.backends.CompilerManager method) (vllm.compilation.compiler_interface.CompilerInterface method) (vllm.compilation.compiler_interface.InductorAdaptor method) (vllm.compilation.compiler_interface.InductorStandaloneAdaptor method) load_aware_call() (in module vllm.entrypoints.utils) load_base64() (vllm.multimodal.audio.AudioMediaIO method) (vllm.multimodal.base.MediaIO method) (vllm.multimodal.image.ImageEmbeddingMediaIO method) (vllm.multimodal.image.ImageMediaIO method) (vllm.multimodal.video.VideoMediaIO method) load_block_tables() (in module vllm.attention.ops.nki_flash_attn) load_bytes() (vllm.multimodal.audio.AudioMediaIO method) (vllm.multimodal.base.MediaIO method) (vllm.multimodal.image.ImageEmbeddingMediaIO method) (vllm.multimodal.image.ImageMediaIO method) (vllm.multimodal.video.OpenCVVideoBackend class method) (vllm.multimodal.video.VideoLoader class method) (vllm.multimodal.video.VideoMediaIO method) load_chat_template() (in module vllm.entrypoints.chat_utils) load_column_parallel_weight() (in module vllm.model_executor.models.phi3_small) (vllm.model_executor.parameter.BasevLLMParameter method) (vllm.model_executor.parameter.PerTensorScaleParameter method) load_config (vllm.config.VllmConfig attribute) load_data() (vllm.benchmarks.datasets.BenchmarkDataset method) (vllm.benchmarks.datasets.BurstGPTDataset method) (vllm.benchmarks.datasets.HuggingFaceDataset method) (vllm.benchmarks.datasets.ShareGPTDataset method) (vllm.benchmarks.datasets.SonnetDataset method) load_file() (vllm.multimodal.audio.AudioMediaIO method) (vllm.multimodal.base.MediaIO method) (vllm.multimodal.image.ImageEmbeddingMediaIO method) (vllm.multimodal.image.ImageMediaIO method) (vllm.multimodal.video.VideoMediaIO method) load_format (vllm.config.LoadConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) load_from_env() (vllm.distributed.kv_transfer.kv_lookup_buffer.mooncake_store.MooncakeStoreConfig static method) (vllm.distributed.kv_transfer.kv_pipe.mooncake_pipe.MooncakeTransferEngineConfig static method) load_from_url() (vllm.multimodal.utils.MediaConnector method) load_from_url_async() (vllm.multimodal.utils.MediaConnector method) load_general_plugins() (in module vllm.plugins) load_kv_tile_from_cache() (in module vllm.attention.ops.nki_flash_attn) load_lora_adapter() (vllm.entrypoints.openai.serving_models.OpenAIServingModels method) load_lora_model() (vllm.v1.worker.lora_model_runner_mixin.LoRAModelRunnerMixin method) load_merged_column_weight() (vllm.model_executor.layers.quantization.hqq_marlin.HQQEmptyParameter method) (vllm.model_executor.layers.quantization.hqq_marlin.HQQweightParameter method) (vllm.model_executor.layers.quantization.hqq_marlin.HQQZeroScaleParameter method) (vllm.model_executor.parameter.BasevLLMParameter method) (vllm.model_executor.parameter.PerTensorScaleParameter method) load_model() (vllm.model_executor.model_loader.base_loader.BaseModelLoader method) (vllm.model_executor.model_loader.bitsandbytes_loader.BitsAndBytesModelLoader method) (vllm.model_executor.model_loader.default_loader.DefaultModelLoader method) (vllm.model_executor.model_loader.dummy_loader.DummyModelLoader method) (vllm.model_executor.model_loader.gguf_loader.GGUFModelLoader method) (vllm.model_executor.model_loader.runai_streamer_loader.RunaiModelStreamerLoader method) (vllm.model_executor.model_loader.sharded_state_loader.ShardedStateLoader method) (vllm.model_executor.model_loader.tensorizer_loader.TensorizerLoader method) (vllm.spec_decode.ngram_worker.NGramWorker method) (vllm.spec_decode.smaller_tp_proposer_worker.SmallerTpProposerWorker method) (vllm.spec_decode.spec_decode_worker.SpecDecodeWorker method) (vllm.v1.spec_decode.eagle.EagleProposer method) (vllm.v1.spec_decode.ngram_proposer.NgramProposer method) (vllm.v1.worker.gpu_model_runner.GPUModelRunner method) (vllm.v1.worker.gpu_worker.Worker method) (vllm.v1.worker.tpu_model_runner.TPUModelRunner method) (vllm.v1.worker.tpu_worker.TPUWorker method) (vllm.worker.cpu_model_runner.CPUModelRunnerBase method) (vllm.worker.cpu_worker.CPUWorker method) (vllm.worker.hpu_model_runner.HPUModelRunnerBase method) (vllm.worker.hpu_worker.HPUWorker method) (vllm.worker.model_runner.GPUModelRunnerBase method) (vllm.worker.multi_step_model_runner.MultiStepModelRunner method) (vllm.worker.multi_step_neuron_model_runner.MultiStepNeuronModelRunner method) (vllm.worker.multi_step_neuronx_distributed_model_runner.MultiStepNeuronxDistributedModelRunner method) (vllm.worker.neuron_model_runner.NeuronModelRunner method) (vllm.worker.neuron_worker.NeuronWorker method) (vllm.worker.neuronx_distributed_model_runner.NeuronxDistributedModelRunner method) (vllm.worker.tpu_model_runner.TPUModelRunner method) (vllm.worker.tpu_worker.TPUWorker method) (vllm.worker.worker.Worker method) (vllm.worker.worker_base.DelegateWorkerBase method) (vllm.worker.worker_base.WorkerBase method) (vllm.worker.xpu_model_runner.XPUModelRunner method) load_moe_expert_weights() (vllm.model_executor.models.llama4.Llama4Model method) load_params_config() (in module vllm.transformers_utils.config) load_peft_weights() (in module vllm.prompt_adapter.utils) load_plugins_by_group() (in module vllm.plugins) load_qkv_weight() (vllm.model_executor.layers.quantization.hqq_marlin.HQQEmptyParameter method) (vllm.model_executor.layers.quantization.hqq_marlin.HQQweightParameter method) (vllm.model_executor.layers.quantization.hqq_marlin.HQQZeroScaleParameter method) (vllm.model_executor.parameter.BasevLLMParameter method) (vllm.model_executor.parameter.PerTensorScaleParameter method) load_row_parallel_weight() (vllm.model_executor.layers.quantization.hqq_marlin.HQQEmptyParameter method) (vllm.model_executor.layers.quantization.hqq_marlin.HQQweightParameter method) (vllm.model_executor.layers.quantization.hqq_marlin.HQQZeroScaleParameter method) (vllm.model_executor.parameter.BasevLLMParameter method) (vllm.model_executor.parameter.PerTensorScaleParameter method) (vllm.model_executor.parameter.RowvLLMParameter method) load_v_tile() (in module vllm.attention.ops.nki_flash_attn) load_weights() (vllm.model_executor.model_loader.neuron.NeuronCausalLM method) (vllm.model_executor.model_loader.neuronx_distributed.NeuronCausalLM method) (vllm.model_executor.model_loader.neuronx_distributed.NeuronMllamaForCausalLM method) (vllm.model_executor.model_loader.neuronx_distributed.NeuronSpeculationCausalLM method) (vllm.model_executor.models.arctic.ArcticForCausalLM method) (vllm.model_executor.models.aria.AriaForConditionalGeneration method) (vllm.model_executor.models.aria.AriaTextModel method) (vllm.model_executor.models.aria.AriaVisionTransformer method) (vllm.model_executor.models.aya_vision.AyaVisionForConditionalGeneration method) (vllm.model_executor.models.baichuan.BaiChuanBaseForCausalLM method) (vllm.model_executor.models.baichuan.BaiChuanModel method) (vllm.model_executor.models.bamba.BambaForCausalLM method) (vllm.model_executor.models.bamba.BambaModel method) (vllm.model_executor.models.bart.BartForConditionalGeneration method) (vllm.model_executor.models.bert.BertEmbeddingModel method) (vllm.model_executor.models.bert.BertForSequenceClassification method) (vllm.model_executor.models.bert.BertModel method) (vllm.model_executor.models.bert_with_rope.BertWithRope method) (vllm.model_executor.models.bert_with_rope.GteModel method) (vllm.model_executor.models.bert_with_rope.JinaRobertaModel method) (vllm.model_executor.models.blip.BlipVisionModel method) (vllm.model_executor.models.blip2.Blip2ForConditionalGeneration method) (vllm.model_executor.models.bloom.BloomForCausalLM method) (vllm.model_executor.models.chameleon.ChameleonForConditionalGeneration method) (vllm.model_executor.models.chatglm.ChatGLMBaseModel method) (vllm.model_executor.models.chatglm.ChatGLMModel method) (vllm.model_executor.models.clip.CLIPVisionModel method) (vllm.model_executor.models.commandr.CohereForCausalLM method) (vllm.model_executor.models.dbrx.DbrxForCausalLM method) (vllm.model_executor.models.deepseek.DeepseekForCausalLM method) (vllm.model_executor.models.deepseek.DeepseekModel method) (vllm.model_executor.models.deepseek_mtp.DeepSeekMTP method) (vllm.model_executor.models.deepseek_v2.DeepseekV2ForCausalLM method) (vllm.model_executor.models.deepseek_vl2.DeepseekVLV2ForCausalLM method) (vllm.model_executor.models.eagle.EAGLE method) (vllm.model_executor.models.exaone.ExaoneForCausalLM method) (vllm.model_executor.models.exaone.ExaoneModel method) (vllm.model_executor.models.fairseq2_llama.Fairseq2LlamaForCausalLM method) (vllm.model_executor.models.falcon.FalconForCausalLM method) (vllm.model_executor.models.falcon.FalconModel method) (vllm.model_executor.models.florence2.Florence2ForConditionalGeneration method) (vllm.model_executor.models.florence2.Florence2LanguageForConditionalGeneration method) (vllm.model_executor.models.fuyu.FuyuForCausalLM method) (vllm.model_executor.models.gemma.GemmaForCausalLM method) (vllm.model_executor.models.gemma.GemmaModel method) (vllm.model_executor.models.gemma2.Gemma2ForCausalLM method) (vllm.model_executor.models.gemma2.Gemma2Model method) (vllm.model_executor.models.gemma3.Gemma3ForCausalLM method) (vllm.model_executor.models.gemma3.Gemma3Model method) (vllm.model_executor.models.gemma3_mm.Gemma3ForConditionalGeneration method) (vllm.model_executor.models.glm4.Glm4ForCausalLM method) (vllm.model_executor.models.gpt2.GPT2LMHeadModel method) (vllm.model_executor.models.gpt_bigcode.GPTBigCodeForCausalLM method) (vllm.model_executor.models.gpt_bigcode.GPTBigCodeModel method) (vllm.model_executor.models.gpt_j.GPTJForCausalLM method) (vllm.model_executor.models.gpt_j.GPTJModel method) (vllm.model_executor.models.gpt_neox.GPTNeoXForCausalLM method) (vllm.model_executor.models.gpt_neox.GPTNeoXModel method) (vllm.model_executor.models.granite.GraniteForCausalLM method) (vllm.model_executor.models.granite.GraniteModel method) (vllm.model_executor.models.granite_speech.GraniteSpeechForConditionalGeneration method) (vllm.model_executor.models.granitemoe.GraniteMoeForCausalLM method) (vllm.model_executor.models.granitemoe.GraniteMoeModel method) (vllm.model_executor.models.granitemoehybrid.GraniteMoeHybridForCausalLM method) (vllm.model_executor.models.granitemoehybrid.GraniteMoeHybridModel method) (vllm.model_executor.models.granitemoeshared.GraniteMoeSharedForCausalLM method) (vllm.model_executor.models.granitemoeshared.GraniteMoeSharedModel method) (vllm.model_executor.models.grok1.Grok1ForCausalLM method) (vllm.model_executor.models.grok1.Grok1Model method) (vllm.model_executor.models.idefics2_vision_model.Idefics2VisionTransformer method) (vllm.model_executor.models.idefics3.Idefics3ForConditionalGeneration method) (vllm.model_executor.models.intern_vit.InternVisionModel method) (vllm.model_executor.models.internlm2.InternLM2ForCausalLM method) (vllm.model_executor.models.internvl.InternVLChatModel method) (vllm.model_executor.models.jais.JAISLMHeadModel method) (vllm.model_executor.models.jamba.JambaForCausalLM method) (vllm.model_executor.models.jamba.JambaForSequenceClassification method) (vllm.model_executor.models.kimi_vl.KimiVLForConditionalGeneration method) (vllm.model_executor.models.llama.LlamaForCausalLM method) (vllm.model_executor.models.llama.LlamaModel method) (vllm.model_executor.models.llama4.Llama4ForCausalLM method) (vllm.model_executor.models.llama4.Llama4Model method) (vllm.model_executor.models.llama_eagle.EagleLlamaForCausalLM method) (vllm.model_executor.models.llama_eagle.LlamaModel method) (vllm.model_executor.models.llama_eagle3.Eagle3LlamaForCausalLM method) (vllm.model_executor.models.llama_eagle3.LlamaModel method) (vllm.model_executor.models.llava.LlavaForConditionalGeneration method) (vllm.model_executor.models.llava_next.LlavaNextForConditionalGeneration method) (vllm.model_executor.models.llava_next_video.LlavaNextVideoForConditionalGeneration method) (vllm.model_executor.models.llava_onevision.LlavaOnevisionForConditionalGeneration method) (vllm.model_executor.models.mamba.MambaForCausalLM method) (vllm.model_executor.models.mamba.MambaModel method) (vllm.model_executor.models.mamba2.Mamba2ForCausalLM method) (vllm.model_executor.models.medusa.Medusa method) (vllm.model_executor.models.mimo.MiMoModel method) (vllm.model_executor.models.mimo_mtp.MiMoMTP method) (vllm.model_executor.models.minicpm.MiniCPMForCausalLM method) (vllm.model_executor.models.minicpm.MiniCPMModel method) (vllm.model_executor.models.minicpmo.MiniCPMO method) (vllm.model_executor.models.minicpmv.MiniCPMVBaseModel method) (vllm.model_executor.models.minimax_text_01.MiniMaxText01ForCausalLM method) (vllm.model_executor.models.minimax_vl_01.MiniMaxVL01ForConditionalGeneration method) (vllm.model_executor.models.mistral3.Mistral3ForConditionalGeneration method) (vllm.model_executor.models.mixtral.MixtralForCausalLM method) (vllm.model_executor.models.mixtral.MixtralModel method) (vllm.model_executor.models.mixtral_quant.MixtralForCausalLM method) (vllm.model_executor.models.mllama.MllamaForConditionalGeneration method) (vllm.model_executor.models.mllama4.Llama4ForConditionalGeneration method) (vllm.model_executor.models.mlp_speculator.MLPSpeculator method) (vllm.model_executor.models.modernbert.ModernBertForSequenceClassification method) (vllm.model_executor.models.modernbert.ModernBertModel method) (vllm.model_executor.models.molmo.MolmoForCausalLM method) (vllm.model_executor.models.molmo.MolmoModel method) (vllm.model_executor.models.molmo.MolmoVisionBackbone method) (vllm.model_executor.models.mpt.MPTForCausalLM method) (vllm.model_executor.models.mpt.MPTModel method) (vllm.model_executor.models.nemotron.NemotronForCausalLM method) (vllm.model_executor.models.nemotron_nas.DeciLMForCausalLM method) (vllm.model_executor.models.nemotron_nas.DeciModel method) (vllm.model_executor.models.olmo.OlmoForCausalLM method) (vllm.model_executor.models.olmo2.Olmo2ForCausalLM method) (vllm.model_executor.models.olmoe.OlmoeForCausalLM method) (vllm.model_executor.models.olmoe.OlmoeModel method) (vllm.model_executor.models.opt.OPTForCausalLM method) (vllm.model_executor.models.opt.OPTModel method) (vllm.model_executor.models.orion.OrionForCausalLM method) (vllm.model_executor.models.orion.OrionModel method) (vllm.model_executor.models.ovis.Ovis method) (vllm.model_executor.models.paligemma.PaliGemmaForConditionalGeneration method) (vllm.model_executor.models.persimmon.PersimmonForCausalLM method) (vllm.model_executor.models.persimmon.PersimmonModel method) (vllm.model_executor.models.phi.PhiForCausalLM method) (vllm.model_executor.models.phi.PhiModel method) (vllm.model_executor.models.phi3_small.Phi3SmallForCausalLM method) (vllm.model_executor.models.phi3_small.Phi3SmallModel method) (vllm.model_executor.models.phi3v.Phi3VForCausalLM method) (vllm.model_executor.models.phi4mm.Phi4MMForCausalLM method) (vllm.model_executor.models.phimoe.PhiMoEForCausalLM method) (vllm.model_executor.models.phimoe.PhiMoEModel method) (vllm.model_executor.models.pixtral.PixtralForConditionalGeneration method) (vllm.model_executor.models.pixtral.PixtralHFVisionModel method) (vllm.model_executor.models.plamo2.Plamo2ForCausalLM method) (vllm.model_executor.models.prithvi_geospatial_mae.PrithviGeoSpatialMAE method) (vllm.model_executor.models.qwen.QWenBaseModel method) (vllm.model_executor.models.qwen2.Qwen2EmbeddingModel method) (vllm.model_executor.models.qwen2.Qwen2ForCausalLM method) (vllm.model_executor.models.qwen2.Qwen2Model method) (vllm.model_executor.models.qwen2_5_omni_thinker.Qwen2_5OmniThinkerForConditionalGeneration method) (vllm.model_executor.models.qwen2_5_vl.Qwen2_5_VisionTransformer method) (vllm.model_executor.models.qwen2_5_vl.Qwen2_5_VLForConditionalGeneration method) (vllm.model_executor.models.qwen2_audio.Qwen2AudioForConditionalGeneration method) (vllm.model_executor.models.qwen2_moe.Qwen2MoeForCausalLM method) (vllm.model_executor.models.qwen2_moe.Qwen2MoeModel method) (vllm.model_executor.models.qwen2_rm.Qwen2RewardBaseModel method) (vllm.model_executor.models.qwen2_vl.Qwen2VisionTransformer method) (vllm.model_executor.models.qwen2_vl.Qwen2VLForConditionalGeneration method) (vllm.model_executor.models.qwen3.Qwen3ForCausalLM method) (vllm.model_executor.models.qwen3_moe.Qwen3MoeForCausalLM method) (vllm.model_executor.models.qwen3_moe.Qwen3MoeModel method) (vllm.model_executor.models.roberta.RobertaEmbeddingModel method) (vllm.model_executor.models.roberta.RobertaForSequenceClassification method) (vllm.model_executor.models.siglip.SiglipVisionModel method) (vllm.model_executor.models.skyworkr1v.SkyworkR1VChatModel method) (vllm.model_executor.models.solar.SolarForCausalLM method) (vllm.model_executor.models.stablelm.StableLMEpochModel method) (vllm.model_executor.models.stablelm.StablelmForCausalLM method) (vllm.model_executor.models.starcoder2.Starcoder2ForCausalLM method) (vllm.model_executor.models.starcoder2.Starcoder2Model method) (vllm.model_executor.models.telechat2.TeleChat2ForCausalLM method) (vllm.model_executor.models.telechat2.TeleChat2Model method) (vllm.model_executor.models.transformers.TransformersForCausalLM method) (vllm.model_executor.models.transformers.TransformersModel method) (vllm.model_executor.models.ultravox.UltravoxModel method) (vllm.model_executor.models.utils.AutoWeightsLoader method) (vllm.model_executor.models.whisper.WhisperForConditionalGeneration method) (vllm.model_executor.models.whisper.WhisperModel method) (vllm.model_executor.models.zamba2.Zamba2ForCausalLM method) (vllm.model_executor.models.zamba2.Zamba2Model method) load_with_tensorizer() (in module vllm.model_executor.model_loader.tensorizer) LoadConfig (class in vllm.config) LoaderFunction (in module vllm.model_executor.model_loader.weight_utils) LoadFormat (class in vllm.config) LoadLoRAAdapterRequest (class in vllm.entrypoints.openai.protocol) local_attn_metadata (vllm.v1.attention.backends.flash_attn.FlashAttentionMetadata attribute) local_block_ids (vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector.ReqMeta attribute) local_block_table (vllm.v1.attention.backends.flash_attn.FlashAttentionMetadata.LocalAttentionMetadata attribute) local_blocks (vllm.attention.backends.blocksparse_attn.BlocksparseParams attribute) local_buffer_size (vllm.distributed.kv_transfer.kv_lookup_buffer.mooncake_store.MooncakeStoreConfig attribute) local_cache_dir (vllm.config.CompilationConfig attribute) local_hostname (vllm.distributed.kv_transfer.kv_lookup_buffer.mooncake_store.MooncakeStoreConfig attribute) local_interval_elapsed() (in module vllm.engine.metrics) local_max_query_len (vllm.v1.attention.backends.flash_attn.FlashAttentionMetadata.LocalAttentionMetadata attribute) local_max_seq_len (vllm.v1.attention.backends.flash_attn.FlashAttentionMetadata.LocalAttentionMetadata attribute) local_moe_fused() (vllm.model_executor.models.arctic.ArcticMoE method) local_path (vllm.entrypoints.openai.serving_models.PromptAdapterPath attribute) (vllm.lora.request.LoRARequest property) (vllm.prompt_adapter.request.PromptAdapterRequest property) local_query_start_loc (vllm.v1.attention.backends.flash_attn.FlashAttentionMetadata.LocalAttentionMetadata attribute) local_rank (vllm.distributed.parallel_state.GroupCoordinator attribute) local_reader_ranks (vllm.distributed.device_communicators.shm_broadcast.Handle attribute) local_scheduler_metadata (vllm.v1.attention.backends.flash_attn.FlashAttentionMetadata.LocalAttentionMetadata attribute) local_seqused_k (vllm.v1.attention.backends.flash_attn.FlashAttentionMetadata.LocalAttentionMetadata attribute) local_size (vllm.attention.backends.dual_chunk_flash_attn.DualChunkFlashAttentionMetadata attribute) local_subscribe_addr (vllm.distributed.device_communicators.shm_broadcast.Handle attribute) LocalOrDistributedWorkerBase (class in vllm.worker.worker_base) LocalStridedBlockSparseAttn (class in vllm.attention.ops.blocksparse_attention.interface) log() (vllm.engine.metrics.LoggingStatLogger method) (vllm.engine.metrics.PrometheusStatLogger method) (vllm.engine.metrics_types.StatLoggerBase method) (vllm.v1.metrics.loggers.LoggingStatLogger method) (vllm.v1.metrics.loggers.StatLoggerBase method) (vllm.v1.spec_decode.metrics.SpecDecodingLogging method) log_engine_initialized() (vllm.v1.metrics.loggers.LoggingStatLogger method) (vllm.v1.metrics.loggers.PrometheusStatLogger method) (vllm.v1.metrics.loggers.StatLoggerBase method) log_graph_warmup_summary() (vllm.worker.hpu_model_runner.HPUModelRunnerBase method) log_inputs() (vllm.entrypoints.logger.RequestLogger method) log_metrics_info() (vllm.v1.metrics.loggers.PrometheusStatLogger method) log_non_default_args() (in module vllm.entrypoints.openai.cli_args) log_replacement() (in module vllm.model_executor.models.transformers) log_tracing_disabled_warning() (in module vllm.tracing) log_warmup() (vllm.worker.hpu_model_runner.HPUModelRunnerBase method) log_warnings() (vllm.platforms.cuda.CudaPlatformBase class method) (vllm.platforms.cuda.NvmlCudaPlatform class method) logger (in module vllm._custom_ops) (in module vllm._ipex_ops) (in module vllm.adapter_commons.models) (in module vllm.attention.backends.dual_chunk_flash_attn) (in module vllm.attention.backends.flash_attn) (in module vllm.attention.backends.flashinfer) (in module vllm.attention.backends.hpu_attn) (in module vllm.attention.backends.ipex_attn) (in module vllm.attention.backends.pallas) (in module vllm.attention.backends.rocm_flash_attn) (in module vllm.attention.backends.torch_sdpa) (in module vllm.attention.backends.utils) (in module vllm.attention.backends.xformers) (in module vllm.attention.ops.flashmla) (in module vllm.attention.ops.triton_decode_attention) (in module vllm.attention.ops.triton_unified_attention) (in module vllm.attention.selector) (in module vllm.benchmarks.datasets) (in module vllm.compilation.activation_quant_fusion) (in module vllm.compilation.backends) (in module vllm.compilation.decorators) (in module vllm.compilation.fix_functionalization) (in module vllm.compilation.fusion) (in module vllm.compilation.monitor) (in module vllm.compilation.noop_elimination) (in module vllm.compilation.pass_manager) (in module vllm.compilation.sequence_parallelism) (in module vllm.compilation.vllm_inductor_pass) (in module vllm.compilation.wrapper) (in module vllm.config) (in module vllm.core.block.prefix_caching_block) (in module vllm.core.scheduler) (in module vllm.distributed.device_communicators.cuda_wrapper) (in module vllm.distributed.device_communicators.custom_all_reduce) (in module vllm.distributed.device_communicators.custom_all_reduce_utils) (in module vllm.distributed.device_communicators.pynccl) (in module vllm.distributed.device_communicators.pynccl_wrapper) (in module vllm.distributed.device_communicators.shm_broadcast) (in module vllm.distributed.device_communicators.tpu_communicator) (in module vllm.distributed.kv_events) (in module vllm.distributed.kv_transfer.kv_connector.factory) (in module vllm.distributed.kv_transfer.kv_connector.lmcache_connector) (in module vllm.distributed.kv_transfer.kv_connector.mooncake_store_connector) (in module vllm.distributed.kv_transfer.kv_connector.simple_connector) (in module vllm.distributed.kv_transfer.kv_connector.utils) (in module vllm.distributed.kv_transfer.kv_connector.v1.base) (in module vllm.distributed.kv_transfer.kv_connector.v1.lmcache_connector) (in module vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector) (in module vllm.distributed.kv_transfer.kv_connector.v1.shared_storage_connector) (in module vllm.distributed.kv_transfer.kv_connector_agent) (in module vllm.distributed.kv_transfer.kv_lookup_buffer.mooncake_store) (in module vllm.distributed.kv_transfer.kv_lookup_buffer.simple_buffer) (in module vllm.distributed.kv_transfer.kv_pipe.mooncake_pipe) (in module vllm.distributed.kv_transfer.kv_pipe.pynccl_pipe) (in module vllm.distributed.parallel_state) (in module vllm.distributed.utils) (in module vllm.engine.arg_utils) (in module vllm.engine.async_llm_engine) (in module vllm.engine.llm_engine) (in module vllm.engine.metrics) (in module vllm.engine.multiprocessing.client) (in module vllm.engine.multiprocessing.engine) (in module vllm.engine.output_processor.multi_step) (in module vllm.engine.output_processor.single_step) (in module vllm.engine.protocol) (in module vllm.entrypoints.api_server) (in module vllm.entrypoints.chat_utils) (in module vllm.entrypoints.launcher) (in module vllm.entrypoints.llm) (in module vllm.entrypoints.logger) (in module vllm.entrypoints.openai.api_server) (in module vllm.entrypoints.openai.cli_args) (in module vllm.entrypoints.openai.protocol) (in module vllm.entrypoints.openai.serving_chat) (in module vllm.entrypoints.openai.serving_classification) (in module vllm.entrypoints.openai.serving_completion) (in module vllm.entrypoints.openai.serving_embedding) (in module vllm.entrypoints.openai.serving_engine) (in module vllm.entrypoints.openai.serving_models) (in module vllm.entrypoints.openai.serving_pooling) (in module vllm.entrypoints.openai.serving_score) (in module vllm.entrypoints.openai.serving_tokenization) (in module vllm.entrypoints.openai.serving_transcription) (in module vllm.entrypoints.openai.tool_parsers.abstract_tool_parser) (in module vllm.entrypoints.openai.tool_parsers.deepseekv3_tool_parser) (in module vllm.entrypoints.openai.tool_parsers.granite_20b_fc_tool_parser) (in module vllm.entrypoints.openai.tool_parsers.granite_tool_parser) (in module vllm.entrypoints.openai.tool_parsers.hermes_tool_parser) (in module vllm.entrypoints.openai.tool_parsers.internlm2_tool_parser) (in module vllm.entrypoints.openai.tool_parsers.jamba_tool_parser) (in module vllm.entrypoints.openai.tool_parsers.llama_tool_parser) (in module vllm.entrypoints.openai.tool_parsers.mistral_tool_parser) (in module vllm.entrypoints.openai.tool_parsers.phi4mini_tool_parser) (in module vllm.entrypoints.openai.tool_parsers.pythonic_tool_parser) (in module vllm.entrypoints.ssl) (in module vllm.entrypoints.utils) (in module vllm.executor.executor_base) (in module vllm.executor.mp_distributed_executor) (in module vllm.executor.multiproc_worker_utils) (in module vllm.executor.ray_distributed_executor) (in module vllm.executor.ray_utils) (in module vllm.executor.uniproc_executor) (in module vllm.forward_context) (in module vllm.inputs.preprocess) (in module vllm.logger) (in module vllm.logging_utils.dump_input) (in module vllm.lora.models) (in module vllm.lora.peft_helper) (in module vllm.lora.punica_wrapper.punica_selector) (in module vllm.lora.resolver) (in module vllm.lora.utils) (in module vllm.lora.worker_manager) (in module vllm.model_executor.custom_op) (in module vllm.model_executor.guided_decoding) (in module vllm.model_executor.guided_decoding.guidance_logits_processors) (in module vllm.model_executor.guided_decoding.outlines_logits_processors) (in module vllm.model_executor.guided_decoding.xgrammar_decoding) (in module vllm.model_executor.layers.fused_moe.deep_gemm_moe) (in module vllm.model_executor.layers.fused_moe.fused_moe) (in module vllm.model_executor.layers.fused_moe.layer) (in module vllm.model_executor.layers.linear) (in module vllm.model_executor.layers.quantization.awq_marlin) (in module vllm.model_executor.layers.quantization.bitblas) (in module vllm.model_executor.layers.quantization.compressed_tensors.compressed_tensors) (in module vllm.model_executor.layers.quantization.compressed_tensors.compressed_tensors_moe) (in module vllm.model_executor.layers.quantization.compressed_tensors.schemes.compressed_tensors_w8a8_int8) (in module vllm.model_executor.layers.quantization.compressed_tensors.schemes.compressed_tensors_wNa16) (in module vllm.model_executor.layers.quantization.fbgemm_fp8) (in module vllm.model_executor.layers.quantization.fp8) (in module vllm.model_executor.layers.quantization.gguf) (in module vllm.model_executor.layers.quantization.gptq_bitblas) (in module vllm.model_executor.layers.quantization.gptq_marlin) (in module vllm.model_executor.layers.quantization.gptq_marlin_24) (in module vllm.model_executor.layers.quantization.hqq_marlin) (in module vllm.model_executor.layers.quantization.kernels.mixed_precision.bitblas) (in module vllm.model_executor.layers.quantization.kv_cache) (in module vllm.model_executor.layers.quantization.marlin) (in module vllm.model_executor.layers.quantization.modelopt) (in module vllm.model_executor.layers.quantization.ptpc_fp8) (in module vllm.model_executor.layers.quantization.qqq) (in module vllm.model_executor.layers.quantization.quark.quark) (in module vllm.model_executor.layers.quantization.quark.quark_moe) (in module vllm.model_executor.layers.quantization.quark.schemes.quark_w8a8_int8) (in module vllm.model_executor.layers.quantization.utils.fp8_utils) (in module vllm.model_executor.layers.quantization.utils.int8_utils) (in module vllm.model_executor.layers.quantization.utils.marlin_utils) (in module vllm.model_executor.layers.quantization.utils.marlin_utils_fp4) (in module vllm.model_executor.layers.quantization.utils.marlin_utils_fp8) (in module vllm.model_executor.layers.rejection_sampler) (in module vllm.model_executor.model_loader.bitsandbytes_loader) (in module vllm.model_executor.model_loader.default_loader) (in module vllm.model_executor.model_loader.neuronx_distributed) (in module vllm.model_executor.model_loader.sharded_state_loader) (in module vllm.model_executor.model_loader.tensorizer) (in module vllm.model_executor.model_loader.tensorizer_loader) (in module vllm.model_executor.model_loader.utils) (in module vllm.model_executor.model_loader.weight_utils) (in module vllm.model_executor.models.arctic) (in module vllm.model_executor.models.bart) (in module vllm.model_executor.models.chameleon) (in module vllm.model_executor.models.eagle) (in module vllm.model_executor.models.gemma) (in module vllm.model_executor.models.gemma2) (in module vllm.model_executor.models.gemma3) (in module vllm.model_executor.models.gemma3_mm) (in module vllm.model_executor.models.gritlm) (in module vllm.model_executor.models.interfaces) (in module vllm.model_executor.models.interfaces_base) (in module vllm.model_executor.models.llama_eagle) (in module vllm.model_executor.models.llama_eagle3) (in module vllm.model_executor.models.mimo) (in module vllm.model_executor.models.mllama) (in module vllm.model_executor.models.olmoe) (in module vllm.model_executor.models.paligemma) (in module vllm.model_executor.models.phi3v) (in module vllm.model_executor.models.qwen2) (in module vllm.model_executor.models.qwen2_5_omni_thinker) (in module vllm.model_executor.models.qwen2_5_vl) (in module vllm.model_executor.models.qwen2_moe) (in module vllm.model_executor.models.qwen2_vl) (in module vllm.model_executor.models.qwen3) (in module vllm.model_executor.models.qwen3_moe) (in module vllm.model_executor.models.registry) (in module vllm.model_executor.models.transformers) (in module vllm.model_executor.models.utils) (in module vllm.model_executor.models.vision) (in module vllm.model_executor.models.whisper) (in module vllm.model_executor.parameter) (in module vllm.multimodal.hasher) (in module vllm.multimodal.processing) (in module vllm.multimodal.profiling) (in module vllm.multimodal.registry) (in module vllm.platforms) (in module vllm.platforms.cpu) (in module vllm.platforms.cuda) (in module vllm.platforms.hpu) (in module vllm.platforms.interface) (in module vllm.platforms.neuron) (in module vllm.platforms.rocm) (in module vllm.platforms.tpu) (in module vllm.platforms.xpu) (in module vllm.plugins) (in module vllm.prompt_adapter.models) (in module vllm.prompt_adapter.worker_manager) (in module vllm.reasoning.abs_reasoning_parsers) (in module vllm.reasoning.deepseek_r1_reasoning_parser) (in module vllm.reasoning.granite_reasoning_parser) (in module vllm.reasoning.qwen3_reasoning_parser) (in module vllm.sampling_params) (in module vllm.scripts) (in module vllm.spec_decode.draft_model_runner) (in module vllm.spec_decode.smaller_tp_proposer_worker) (in module vllm.spec_decode.spec_decode_worker) (in module vllm.tracing) (in module vllm.transformers_utils.chat_templates.registry) (in module vllm.transformers_utils.config) (in module vllm.transformers_utils.configs.arctic) (in module vllm.transformers_utils.configs.dbrx) (in module vllm.transformers_utils.configs.exaone) (in module vllm.transformers_utils.configs.jais) (in module vllm.transformers_utils.configs.nemotron) (in module vllm.transformers_utils.configs.solar) (in module vllm.transformers_utils.tokenizer) (in module vllm.transformers_utils.tokenizers.mistral) (in module vllm.transformers_utils.utils) (in module vllm.triton_utils.importing) (in module vllm.utils) (in module vllm.v1.attention.backends.flash_attn) (in module vllm.v1.attention.backends.flashinfer) (in module vllm.v1.attention.backends.mla.common) (in module vllm.v1.attention.backends.mla.flashmla) (in module vllm.v1.attention.backends.mla.triton_mla) (in module vllm.v1.attention.backends.pallas) (in module vllm.v1.attention.backends.triton_attn) (in module vllm.v1.core.block_pool) (in module vllm.v1.core.encoder_cache_manager) (in module vllm.v1.core.kv_cache_manager) (in module vllm.v1.core.kv_cache_utils) (in module vllm.v1.core.sched.scheduler) (in module vllm.v1.engine.async_llm) (in module vllm.v1.engine.core) (in module vllm.v1.engine.core_client) (in module vllm.v1.engine.detokenizer) (in module vllm.v1.engine.llm_engine) (in module vllm.v1.engine.logprobs) (in module vllm.v1.executor.multiproc_executor) (in module vllm.v1.kv_cache_interface) (in module vllm.v1.metrics.loggers) (in module vllm.v1.sample.ops.topk_topp_sampler) (in module vllm.v1.sample.rejection_sampler) (in module vllm.v1.serial_utils) (in module vllm.v1.spec_decode.eagle) (in module vllm.v1.spec_decode.metrics) (in module vllm.v1.structured_output) (in module vllm.v1.structured_output.backend_guidance) (in module vllm.v1.structured_output.backend_xgrammar) (in module vllm.v1.utils) (in module vllm.v1.worker.block_table) (in module vllm.v1.worker.gpu_model_runner) (in module vllm.v1.worker.gpu_worker) (in module vllm.v1.worker.lora_model_runner_mixin) (in module vllm.v1.worker.tpu_model_runner) (in module vllm.v1.worker.tpu_worker) (in module vllm.v1.worker.worker_base) (in module vllm.worker.cache_engine) (in module vllm.worker.cpu_model_runner) (in module vllm.worker.cpu_worker) (in module vllm.worker.enc_dec_model_runner) (in module vllm.worker.hpu_model_runner) (in module vllm.worker.hpu_worker) (in module vllm.worker.model_runner) (in module vllm.worker.model_runner_base) (in module vllm.worker.multi_step_model_runner) (in module vllm.worker.neuron_model_runner) (in module vllm.worker.neuron_worker) (in module vllm.worker.neuronx_distributed_model_runner) (in module vllm.worker.pooling_model_runner) (in module vllm.worker.tpu_model_runner) (in module vllm.worker.tpu_worker) (in module vllm.worker.worker) (in module vllm.worker.worker_base) (in module vllm.worker.xpu_model_runner) (in module vllm.worker.xpu_worker) LoggingStatLogger (class in vllm.engine.metrics) (class in vllm.v1.metrics.loggers) logit_bias (vllm.entrypoints.openai.protocol.ChatCompletionRequest attribute) (vllm.entrypoints.openai.protocol.CompletionRequest attribute) (vllm.sampling_params.SamplingParams attribute) (vllm.v1.sample.metadata.SamplingMetadata attribute) (vllm.v1.sample.tpu.metadata.TPUSupportedSamplingMetadata attribute) logit_bias_logits_processor() (in module vllm.entrypoints.openai.logits_processors) logits_as_input (vllm.lora.layers.LogitsProcessorWithLoRA property) logits_indices (vllm.v1.spec_decode.metadata.SpecDecodeMetadata attribute) logits_processor_pattern (vllm.config.ModelConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) logits_processors (vllm.entrypoints.openai.protocol.ChatCompletionRequest attribute) (vllm.entrypoints.openai.protocol.CompletionRequest attribute) (vllm.sampling_params.SamplingParams attribute) logits_soft_cap (vllm.attention.backends.flashinfer.FlashInferMetadata attribute) (vllm.attention.backends.flashinfer.PerLayerParameters attribute) (vllm.v1.attention.backends.flashinfer.PerLayerParameters attribute) LogitsProcessor (class in vllm.model_executor.layers.logits_processor) (in module vllm.logits_process) LogitsProcessorConstructor (class in vllm.entrypoints.openai.protocol) LogitsProcessors (in module vllm.entrypoints.openai.protocol) LogitsProcessorWithLoRA (class in vllm.lora.layers) Logprob (class in vllm.sequence) logprob (vllm.entrypoints.openai.protocol.ChatCompletionLogProb attribute) (vllm.sequence.Logprob attribute) logprob_token_ids (vllm.v1.outputs.LogprobsLists attribute) (vllm.v1.outputs.LogprobsTensors attribute) logprobs (vllm.beam_search.BeamSearchSequence attribute) (vllm.benchmarks.endpoint_request_func.RequestFuncInput attribute) (vllm.entrypoints.openai.protocol.ChatCompletionRequest attribute) (vllm.entrypoints.openai.protocol.ChatCompletionResponseChoice attribute) (vllm.entrypoints.openai.protocol.ChatCompletionResponseStreamChoice attribute) (vllm.entrypoints.openai.protocol.CompletionRequest attribute) (vllm.entrypoints.openai.protocol.CompletionResponseChoice attribute) (vllm.entrypoints.openai.protocol.CompletionResponseStreamChoice attribute) (vllm.model_executor.layers.sampler.SamplerOutput attribute) (vllm.outputs.CompletionOutput attribute) (vllm.sampling_params.SamplingParams attribute) (vllm.sequence.SequenceOutput attribute) (vllm.spec_decode.interfaces.SpeculativeScores attribute) (vllm.v1.engine.logprobs.LogprobsProcessor attribute) (vllm.v1.outputs.LogprobsLists attribute) (vllm.v1.outputs.LogprobsTensors attribute) (vllm.v1.outputs.ModelRunnerOutput attribute) (vllm.v1.sample.tpu.metadata.TPUSupportedSamplingMetadata attribute) (vllm.worker.multi_step_model_runner.ModelOutput attribute) logprobs_tensors (vllm.v1.outputs.SamplerOutput attribute) LogprobsLists (class in vllm.v1.outputs) LogprobsProcessor (class in vllm.v1.engine.logprobs) LogprobsTensors (class in vllm.v1.outputs) long_lora_indices (vllm.lora.punica_wrapper.punica_base.PunicaWrapperBase property) long_lora_max_len (vllm.lora.request.LoRARequest attribute) long_lora_scaling_factors (vllm.config.LoRAConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) long_prefill_token_threshold (vllm.config.SchedulerConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) long_prefills (vllm.core.scheduler.PartialPrefillMetadata attribute) LongContextLoRAContext (class in vllm.lora.models) lora_alpha (vllm.lora.peft_helper.PEFTHelper attribute) (vllm.transformers_utils.configs.arctic.ArcticLoRAConfig attribute) lora_config (vllm.config.VllmConfig attribute) lora_dtype (vllm.config.LoRAConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) lora_enabled (vllm.core.scheduler.Scheduler property) lora_extra_vocab_size (vllm.config.LoRAConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) lora_id (vllm.distributed.kv_events.BlockStored attribute) lora_ids (vllm.worker.hpu_model_runner.ModelInputForHPU attribute) (vllm.worker.hpu_model_runner.PrepareDecodeMetadata attribute) (vllm.worker.hpu_model_runner.PreparePromptMetadata attribute) lora_index_mapping (vllm.worker.hpu_model_runner.PrepareDecodeMetadata attribute) (vllm.worker.hpu_model_runner.PreparePromptMetadata attribute) lora_int_id (vllm.entrypoints.openai.protocol.UnloadLoRAAdapterRequest attribute) (vllm.lora.request.LoRARequest attribute) (vllm.sequence.Sequence property) (vllm.sequence.SequenceGroup property) (vllm.sequence.SequenceGroupMetadata property) lora_local_path (vllm.lora.request.LoRARequest attribute) lora_mapping (vllm.worker.cpu_model_runner.ModelInputForCPU attribute) (vllm.worker.hpu_model_runner.ModelInputForHPU attribute) (vllm.worker.model_runner.ModelInputForGPU attribute) lora_name (vllm.entrypoints.openai.protocol.LoadLoRAAdapterRequest attribute) (vllm.entrypoints.openai.protocol.UnloadLoRAAdapterRequest attribute) (vllm.lora.request.LoRARequest attribute) lora_path (vllm.entrypoints.openai.protocol.LoadLoRAAdapterRequest attribute) (vllm.lora.request.LoRARequest attribute) lora_path_on_disk() (in module vllm.benchmarks.datasets) lora_prompt_mapping (vllm.worker.hpu_model_runner.PrepareDecodeMetadata attribute) (vllm.worker.hpu_model_runner.PreparePromptMetadata attribute) lora_r (vllm.transformers_utils.configs.arctic.ArcticLoRAConfig attribute) LORA_RANK_BLOCK (in module vllm.lora.ops.xla_ops.pallas) lora_request (vllm.benchmarks.datasets.SampleRequest attribute) (vllm.engine.multiprocessing.RPCLoadAdapterRequest attribute) (vllm.engine.multiprocessing.RPCProcessRequest attribute) (vllm.entrypoints.openai.serving_engine.ServeContext attribute) (vllm.outputs.CompletionOutput attribute) (vllm.sequence.SequenceGroupMetadata attribute) (vllm.v1.core.sched.output.NewRequestData attribute) (vllm.v1.engine.EngineCoreRequest attribute) (vllm.v1.worker.gpu_input_batch.CachedRequestState attribute) lora_requests (vllm.core.scheduler.SchedulerOutputs property) (vllm.worker.cpu_model_runner.ModelInputForCPU attribute) (vllm.worker.hpu_model_runner.ModelInputForHPU attribute) (vllm.worker.hpu_model_runner.PrepareDecodeMetadata attribute) (vllm.worker.hpu_model_runner.PreparePromptMetadata attribute) (vllm.worker.model_runner.ModelInputForGPU attribute) lora_slots (vllm.lora.models.LoRAModelManager property) lora_token_start_loc (vllm.lora.ops.triton_ops.lora_kernel_metadata.LoRAKernelMeta attribute) lora_tokenizer_cache (in module vllm.benchmarks.datasets) lora_vocab_padding_size (vllm.config.LoRAConfig attribute) LORA_WARMUP_RANK (in module vllm.worker.enc_dec_model_runner) (in module vllm.worker.hpu_model_runner) (in module vllm.worker.model_runner) (vllm.v1.worker.lora_model_runner_mixin.LoRAModelRunnerMixin attribute) LoRAConfig (class in vllm.config) LoRADType (in module vllm.config) LoRAKernelMeta (class in vllm.lora.ops.triton_ops.lora_kernel_metadata) LoRALayerWeights (class in vllm.lora.lora) LoRALRUCache (class in vllm.lora.models) LoRAMapping (class in vllm.lora.layers) LoRAModel (class in vllm.lora.models) LoRAModelManager (class in vllm.lora.models) LoRAModelRunnerMixin (class in vllm.v1.worker.lora_model_runner_mixin) LoRAModulePath (class in vllm.entrypoints.openai.serving_models) LoRANotSupportedWorkerBase (class in vllm.worker.worker_base) LoRAParserAction (class in vllm.entrypoints.openai.cli_args) LoRARequest (class in vllm.lora.request) LoRARequestStates (class in vllm.v1.metrics.stats) LoRAResolver (class in vllm.lora.resolver) LoRAResolverRegistry (in module vllm.lora.resolver) LoRAStats (class in vllm.v1.metrics.stats) LRU (vllm.core.evictor.EvictionPolicy attribute) LRUCache (class in vllm.utils) LRUCacheLoRAModelManager (class in vllm.lora.models) LRUCachePromptAdapterModelManager (class in vllm.prompt_adapter.models) LRUCacheWorkerLoRAManager (class in vllm.lora.worker_manager) LRUCacheWorkerPromptAdapterManager (class in vllm.prompt_adapter.worker_manager) LRUEvictor (class in vllm.core.evictor) M M (in module vllm.v1.attention.backends.mla.common) machete_mm() (in module vllm._custom_ops) machete_prepack_B() (in module vllm._custom_ops) MACHETE_PREPACKED_BLOCK_SHAPE (in module vllm.model_executor.layers.quantization.utils.machete_utils) MACHETE_SUPPORTED_GROUP_SIZES (in module vllm.model_executor.layers.quantization.utils.machete_utils) machete_supported_schedules() (in module vllm._custom_ops) MacheteLinearKernel (class in vllm.model_executor.layers.quantization.kernels.mixed_precision.machete) main() (in module vllm.benchmarks.latency) (in module vllm.benchmarks.serve) (in module vllm.benchmarks.throughput) (in module vllm.collect_env) (in module vllm.entrypoints.cli.main) (in module vllm.entrypoints.openai.run_batch) (in module vllm.scripts) main_input_name (vllm.model_executor.models.blip.BlipVisionModel attribute) (vllm.model_executor.models.clip.CLIPVisionModel attribute) (vllm.model_executor.models.siglip.SiglipVisionModel attribute) major (vllm.platforms.interface.DeviceCapability attribute) make() (vllm.lora.ops.triton_ops.lora_kernel_metadata.LoRAKernelMeta static method) make_arg_parser() (in module vllm.entrypoints.openai.cli_args) make_async() (in module vllm.utils) make_async_error_request_output() (in module vllm.entrypoints.openai.run_batch) make_client() (vllm.v1.engine.core_client.EngineCoreClient static method) make_compiler() (in module vllm.compilation.backends) make_data_socket() (vllm.engine.multiprocessing.engine.MQLLMEngine method) make_dummy() (vllm.v1.spec_decode.metadata.SpecDecodeMetadata class method) make_empty_intermediate_tensors() (vllm.model_executor.models.deepseek_v2.DeepseekV2ForCausalLM method) (vllm.model_executor.models.granite.GraniteForCausalLM method) (vllm.model_executor.models.granitemoe.GraniteMoeForCausalLM method) (vllm.model_executor.models.granitemoeshared.GraniteMoeSharedForCausalLM method) (vllm.model_executor.models.interfaces.SupportsPP method) (vllm.model_executor.models.minimax_text_01.MiniMaxText01ForCausalLM method) make_empty_intermediate_tensors_factory() (in module vllm.model_executor.models.utils) make_error_request_output() (in module vllm.entrypoints.openai.run_batch) make_evictor() (in module vllm.core.evictor) make_expert_params_mapping() (vllm.model_executor.layers.fused_moe.layer.FusedMoE class method) make_layers() (in module vllm.model_executor.models.utils) make_local_attention_virtual_batches() (in module vllm.v1.attention.backends.flash_attn) make_lora_inputs() (vllm.v1.worker.gpu_input_batch.InputBatch method) make_meta() (vllm.distributed.kv_transfer.kv_connector.v1.shared_storage_connector.ReqMeta static method) make_metadata() (vllm.attention.backends.abstract.AttentionBackend class method) make_mistral_chat_completion_request() (in module vllm.transformers_utils.tokenizers.mistral) make_model_input_from_broadcasted_tensor_dict() (vllm.worker.cpu_enc_dec_model_runner.CPUEncoderDecoderModelRunner method) (vllm.worker.cpu_model_runner.CPUModelRunner method) (vllm.worker.cpu_pooling_model_runner.CPUPoolingModelRunner method) (vllm.worker.enc_dec_model_runner.EncoderDecoderModelRunner method) (vllm.worker.hpu_model_runner.HPUModelRunner method) (vllm.worker.model_runner.ModelRunner method) (vllm.worker.model_runner_base.ModelRunnerBase method) (vllm.worker.multi_step_model_runner.MultiStepModelRunner method) (vllm.worker.neuron_model_runner.NeuronModelRunner method) (vllm.worker.pooling_model_runner.PoolingModelRunner method) (vllm.worker.tpu_model_runner.TPUModelRunner method) (vllm.worker.xpu_model_runner.XPUModelRunner method) make_ndarray_with_pad() (in module vllm.utils) make_prefix_cache_stats() (vllm.v1.core.kv_cache_manager.KVCacheManager method) make_request_output() (vllm.v1.engine.output_processor.RequestState method) make_spec_decoding_stats() (vllm.v1.core.sched.scheduler.Scheduler method) make_stats() (vllm.v1.core.sched.interface.SchedulerInterface method) (vllm.v1.core.sched.scheduler.Scheduler method) make_tensor_with_pad() (in module vllm.utils) make_worker_process() (vllm.v1.executor.multiproc_executor.WorkerProc static method) make_zmq_socket() (in module vllm.utils) mamba (vllm.utils.LayerBlockType attribute) Mamba2DecoderLayer (class in vllm.model_executor.models.mamba2) Mamba2ForCausalLM (class in vllm.model_executor.models.mamba2) Mamba2Metadata (class in vllm.model_executor.layers.mamba.mamba2_metadata) Mamba2Model (class in vllm.model_executor.models.mamba2) mamba_chunk_scan_combined() (in module vllm.model_executor.layers.mamba.ops.ssd_combined) mamba_d_conv (vllm.model_executor.models.plamo2.Plamo2Config attribute) mamba_d_state (vllm.model_executor.models.plamo2.Plamo2Config attribute) mamba_num_heads (vllm.model_executor.models.plamo2.Plamo2Config attribute) mamba_step (vllm.model_executor.models.plamo2.Plamo2Config attribute) mamba_v2_sharded_weight_loader() (in module vllm.model_executor.layers.mamba.mamba_mixer2) MambaCacheManager (class in vllm.model_executor.models.mamba_cache) MambaCacheParams (class in vllm.model_executor.models.mamba_cache) MambaDecoderLayer (class in vllm.model_executor.models.mamba) MambaForCausalLM (class in vllm.model_executor.models.mamba) MambaMixer (class in vllm.model_executor.layers.mamba.mamba_mixer) MambaMixer2 (class in vllm.model_executor.layers.mamba.mamba_mixer2) MambaModel (class in vllm.model_executor.models.mamba) MantisForConditionalGeneration (class in vllm.model_executor.models.llava) MantisMultiModalProcessor (class in vllm.model_executor.models.llava) MantisProcessingInfo (class in vllm.model_executor.models.llava) mantissa (vllm.scalar_type.ScalarType attribute) mantissa_bits (vllm.transformers_utils.configs.arctic.ArcticQuantizationConfig attribute) map_model_name_to_mtp_param_name() (vllm.model_executor.models.mimo_mtp.MiMoMTP method) MAPPING_PROMPT_FUNCS (vllm.benchmarks.datasets.NextEditPredictionDataset attribute) mark_blocks_as_accessed() (vllm.core.block.cpu_gpu_block_allocator.CpuGpuBlockAllocator method) (vllm.core.block.interfaces.BlockAllocator method) (vllm.core.block.interfaces.DeviceAwareBlockAllocator method) (vllm.core.block.naive_block.NaiveBlockAllocator method) (vllm.core.block.prefix_caching_block.PrefixCachingBlockAllocator method) mark_blocks_as_computed() (vllm.core.block.cpu_gpu_block_allocator.CpuGpuBlockAllocator method) (vllm.core.block.interfaces.BlockAllocator method) (vllm.core.block.interfaces.DeviceAwareBlockAllocator method) (vllm.core.block.naive_block.NaiveBlockAllocator method) (vllm.core.block.prefix_caching_block.PrefixCachingBlockAllocator method) (vllm.core.block_manager.SelfAttnBlockSpaceManager method) (vllm.core.interfaces.BlockSpaceManager method) (vllm.core.placeholder_block_space_manager.PlaceholderBlockSpaceManager method) marlin_24_quantize() (in module vllm.model_executor.layers.quantization.utils.marlin_utils_test_24) marlin_gemm() (in module vllm._custom_ops) marlin_is_k_full() (in module vllm.model_executor.layers.quantization.utils.marlin_utils) marlin_make_empty_g_idx() (in module vllm.model_executor.layers.quantization.utils.marlin_utils) marlin_make_empty_zp() (in module vllm.model_executor.layers.quantization.utils.marlin_utils) marlin_make_workspace() (in module vllm.model_executor.layers.quantization.utils.marlin_utils) marlin_make_workspace_new() (in module vllm.model_executor.layers.quantization.utils.marlin_utils) marlin_moe_permute_scales() (in module vllm.model_executor.layers.quantization.utils.marlin_utils) marlin_permute_scales() (in module vllm.model_executor.layers.quantization.utils.marlin_utils) marlin_permute_scales_24() (in module vllm.model_executor.layers.quantization.utils.marlin_utils_test_24) marlin_permute_weights() (in module vllm.model_executor.layers.quantization.utils.marlin_utils_test) marlin_qqq_gemm() (in module vllm._custom_ops) MARLIN_QQQ_MAX_PARALLEL (in module vllm.model_executor.layers.quantization.qqq) MARLIN_QQQ_MIN_THREAD_K (in module vllm.model_executor.layers.quantization.qqq) MARLIN_QQQ_MIN_THREAD_N (in module vllm.model_executor.layers.quantization.qqq) marlin_qqq_permute_scales() (in module vllm.model_executor.layers.quantization.utils.marlin_utils_test_qqq) marlin_qqq_quantize() (in module vllm.model_executor.layers.quantization.utils.marlin_utils_test_qqq) MARLIN_QQQ_SUPPORTED_GROUP_SIZES (in module vllm.model_executor.layers.quantization.qqq) MARLIN_QQQ_SUPPORTED_NUM_BITS (in module vllm.model_executor.layers.quantization.qqq) MARLIN_QQQ_SUPPORTED_SYM (in module vllm.model_executor.layers.quantization.qqq) MARLIN_QQQ_TILE (in module vllm.model_executor.layers.quantization.qqq) marlin_qqq_weights() (in module vllm.model_executor.layers.quantization.utils.marlin_utils_test_qqq) marlin_quant_fp8_torch() (in module vllm.model_executor.layers.quantization.utils.marlin_utils_fp8) marlin_quantize() (in module vllm.model_executor.layers.quantization.utils.marlin_utils_test) marlin_repeat_scales_on_all_ranks() (in module vllm.model_executor.layers.quantization.utils.marlin_utils) marlin_sort_g_idx() (in module vllm.model_executor.layers.quantization.utils.marlin_utils) MARLIN_SUPPORTED_GROUP_SIZES (in module vllm.model_executor.layers.quantization.utils.marlin_utils) marlin_tile_size (vllm.model_executor.parameter.PackedColumnParameter property) (vllm.model_executor.parameter.PackedvLLMParameter property) marlin_weights() (in module vllm.model_executor.layers.quantization.utils.marlin_utils_test) marlin_zero_points() (in module vllm.model_executor.layers.quantization.utils.marlin_utils) MarlinConfig (class in vllm.model_executor.layers.quantization.marlin) MarlinLinearKernel (class in vllm.model_executor.layers.quantization.kernels.mixed_precision.marlin) MarlinLinearMethod (class in vllm.model_executor.layers.quantization.marlin) MarlinWorkspace (class in vllm.model_executor.layers.quantization.utils.marlin_utils_test) mask_creator() (in module vllm.model_executor.layers.quantization.utils.marlin_utils_test_24) masked_load() (in module vllm.attention.ops.triton_flash_attention) masked_softmax() (in module vllm.model_executor.models.phi4mm_utils) master_server_address (vllm.distributed.kv_transfer.kv_lookup_buffer.mooncake_store.MooncakeStoreConfig attribute) matcher (vllm.v1.structured_output.backend_xgrammar.XgrammarGrammar attribute) matchers (vllm.model_executor.guided_decoding.xgrammar_decoding.XGrammarLogitsProcessor attribute) materialize_nested() (vllm.model_executor.layers.quantization.gguf.GGUFUninitializedParameter method) MATMUL_LAYOUT (vllm.model_executor.layers.quantization.kernels.mixed_precision.bitblas.BitBLASLinearKernel attribute) matryoshka_dimensions (vllm.config.ModelConfig property) max() (vllm.scalar_type.ScalarType method) MAX_AUDIO_CLIP_FILESIZE_MB (in module vllm.entrypoints.openai.serving_transcription) max_capture_size (vllm.config.CompilationConfig attribute) max_completion_tokens (vllm.entrypoints.openai.protocol.ChatCompletionRequest attribute) max_concurrent_batches (vllm.v1.executor.abstract.Executor property) (vllm.v1.executor.multiproc_executor.MultiprocExecutor property) (vllm.v1.executor.ray_distributed_executor.RayDistributedExecutor property) max_context_length (vllm.model_executor.models.ultravox.ModifiedWhisperEncoder property) max_cpu_loras (vllm.config.LoRAConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) max_cpu_prompt_adapters (vllm.config.PromptAdapterConfig attribute) max_crops() (vllm.model_executor.models.molmo.MolmoProcessorWrapper method) max_decode_query_len (vllm.attention.backends.blocksparse_attn.BlocksparseFlashAttentionMetadata attribute) (vllm.attention.backends.flash_attn.FlashAttentionMetadata attribute) (vllm.attention.backends.mla.common.MLACommonMetadata attribute) (vllm.attention.backends.placeholder_attn.PlaceholderAttentionMetadata attribute) (vllm.attention.backends.rocm_flash_attn.ROCmFlashAttentionMetadata attribute) (vllm.attention.backends.xformers.XFormersMetadata attribute) max_decode_seq_len (vllm.attention.backends.blocksparse_attn.BlocksparseFlashAttentionMetadata attribute) (vllm.attention.backends.flash_attn.FlashAttentionMetadata attribute) (vllm.attention.backends.mla.common.MLACommonMetadata attribute) (vllm.attention.backends.placeholder_attn.PlaceholderAttentionMetadata attribute) (vllm.attention.backends.rocm_flash_attn.ROCmFlashAttentionMetadata attribute) (vllm.attention.backends.xformers.XFormersMetadata attribute) (vllm.attention.ops.paged_attn.PagedAttentionMetadata attribute) max_encoder_seq_len (vllm.attention.backends.flash_attn.FlashAttentionMetadata attribute) (vllm.attention.backends.rocm_flash_attn.ROCmFlashAttentionMetadata attribute) (vllm.attention.backends.torch_sdpa.TorchSDPAMetadata attribute) (vllm.attention.backends.xformers.XFormersMetadata attribute) max_fn() (in module vllm.attention.ops.triton_flash_attention) max_kv_len (vllm.attention.backends.torch_sdpa.TorchSDPAMetadata attribute) max_logprobs (vllm.config.ModelConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) max_long_partial_prefills (vllm.config.SchedulerConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) max_lora (vllm.engine.metrics_types.Stats attribute) max_lora_rank (vllm.config.LoRAConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) max_loras (vllm.config.LoRAConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) max_memory_usage_bytes() (vllm.v1.kv_cache_interface.FullAttentionSpec method) (vllm.v1.kv_cache_interface.KVCacheSpec method) (vllm.v1.kv_cache_interface.SlidingWindowSpec method) max_model_len (vllm.config.ModelConfig attribute) (vllm.config.SchedulerConfig attribute) (vllm.config.SpeculativeConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) (vllm.entrypoints.openai.protocol.ModelCard attribute) (vllm.entrypoints.openai.protocol.TokenizeResponse attribute) (vllm.worker.cpu_worker.CPUWorker property) (vllm.worker.hpu_worker.HPUWorker property) (vllm.worker.worker.Worker property) max_num_batched_tokens (vllm.config.SchedulerConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) max_num_encoder_input_tokens (vllm.config.SchedulerConfig attribute) max_num_generation_tokens (vllm.v1.engine.parallel_sampling.ParentRequest attribute) max_num_generation_tokens_requests (vllm.engine.metrics_types.Stats attribute) max_num_logprobs (vllm.v1.sample.metadata.SamplingMetadata attribute) (vllm.v1.worker.gpu_input_batch.InputBatch property) max_num_partial_prefills (vllm.config.SchedulerConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) max_num_seqs (vllm.config.SchedulerConfig attribute) (vllm.core.scheduler.SchedulingBudget attribute) (vllm.engine.arg_utils.EngineArgs attribute) max_parallel_loading_workers (vllm.config.ParallelConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) max_patches_per_side (vllm.model_executor.models.pixtral.VisionTransformer property) max_prefill_seq_len (vllm.attention.backends.blocksparse_attn.BlocksparseFlashAttentionMetadata attribute) (vllm.attention.backends.flash_attn.FlashAttentionMetadata attribute) (vllm.attention.backends.flashinfer.FlashInferMetadata attribute) (vllm.attention.backends.mla.common.MLACommonMetadata attribute) (vllm.attention.backends.placeholder_attn.PlaceholderAttentionMetadata attribute) (vllm.attention.backends.rocm_flash_attn.ROCmFlashAttentionMetadata attribute) (vllm.attention.backends.xformers.XFormersMetadata attribute) max_prompt_adapter_token (vllm.config.PromptAdapterConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) max_prompt_adapters (vllm.config.PromptAdapterConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) max_query_len (vllm.attention.backends.blocksparse_attn.BlocksparseFlashAttentionMetadata attribute) (vllm.attention.backends.flash_attn.FlashAttentionMetadata attribute) (vllm.attention.backends.mla.common.MLACommonMetadata attribute) (vllm.attention.backends.placeholder_attn.PlaceholderAttentionMetadata attribute) (vllm.attention.backends.rocm_flash_attn.ROCmFlashAttentionMetadata attribute) (vllm.attention.backends.torch_sdpa.TorchSDPAMetadata attribute) (vllm.attention.backends.xformers.XFormersMetadata attribute) (vllm.v1.attention.backends.flash_attn.FlashAttentionMetadata attribute) (vllm.v1.attention.backends.mla.common.MLACommonPrefillMetadata attribute) max_queue_size (vllm.config.KVEventsConfig attribute) max_seq_len (vllm.v1.attention.backends.flash_attn.FlashAttentionMetadata attribute) max_seq_len_inter (vllm.attention.backends.dual_chunk_flash_attn.DualChunkFlashAttentionMetadata attribute) max_seq_len_intra (vllm.attention.backends.dual_chunk_flash_attn.DualChunkFlashAttentionMetadata attribute) max_seq_len_succ (vllm.attention.backends.dual_chunk_flash_attn.DualChunkFlashAttentionMetadata attribute) max_seq_len_to_capture (vllm.config.ModelConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) max_seq_lens (vllm.v1.attention.backends.mla.common.MLACommonPrefillMetadata.ChunkedContextMetadata attribute) max_seqlen (vllm.attention.backends.blocksparse_attn.BlocksparseParams attribute) (vllm.attention.backends.ipex_attn.IpexAttnMetadata attribute) max_seqlens_k (vllm.attention.ops.triton_flash_attention.MetaData attribute) max_seqlens_q (vllm.attention.ops.triton_flash_attention.MetaData attribute) MAX_SPEC_LEN (in module vllm.v1.sample.rejection_sampler) MAX_TENSOR_DIMENSIONS (vllm.distributed.kv_transfer.kv_pipe.pynccl_pipe.PyNcclPipe attribute) max_threads (vllm.model_executor.guided_decoding.xgrammar_decoding.GrammarConfig attribute) max_token_id (vllm.transformers_utils.tokenizer_base.TokenizerBase property) (vllm.transformers_utils.tokenizers.mistral.MistralTokenizer property) max_tokens (vllm.entrypoints.openai.protocol.ChatCompletionRequest attribute) (vllm.entrypoints.openai.protocol.CompletionRequest attribute) (vllm.sampling_params.BeamSearchParams attribute) (vllm.sampling_params.SamplingParams attribute) max_tokens_param (vllm.v1.metrics.stats.FinishedRequestStats attribute) MAX_TOKENS_PER_EXPERT (in module vllm.model_executor.layers.fused_moe.cutlass_moe) max_tokens_requests (vllm.engine.metrics_types.Stats attribute) MaxImageTokenMeta (class in vllm.model_executor.models.kimi_vl) maybe_advance_frozen_model_input() (vllm.worker.multi_step_model_runner.StatefulModelInput method) maybe_advance_sampling_metadata() (vllm.worker.multi_step_model_runner.StatefulModelInput method) maybe_assemble_group() (vllm.sequence.ParallelSampleSequenceGroup method) (vllm.sequence.SequenceGroupBase method) maybe_backend_fallback() (in module vllm.model_executor.guided_decoding) maybe_collect_rejsample_metrics() (vllm.spec_decode.metrics.AsyncMetricsCollector method) maybe_convert_int() (in module vllm.envs) maybe_create_device_identity() (in module vllm.model_executor.layers.quantization.utils.w8a8_utils) maybe_dummy_run_with_lora() (vllm.v1.worker.lora_model_runner_mixin.LoRAModelRunnerMixin method) maybe_increment_partial_prefills() (vllm.core.scheduler.PartialPrefillMetadata method) maybe_load_lm_head_weight() (vllm.spec_decode.multi_step_worker.MultiStepWorker method) (vllm.spec_decode.smaller_tp_proposer_worker.SmallerTpProposerWorker method) maybe_mock_device_tensors() (in module vllm.spec_decode.util) maybe_model_redirect() (in module vllm.transformers_utils.utils) maybe_offload_to_cpu() (in module vllm.model_executor.models.utils) maybe_oversample_requests() (vllm.benchmarks.datasets.BenchmarkDataset method) maybe_prefix() (in module vllm.model_executor.models.utils) maybe_pull_model_tokenizer_for_s3() (vllm.config.ModelConfig method) maybe_pythonize() (vllm.worker.multi_step_model_runner.ModelOutput method) maybe_quantize_fp8() (in module vllm.attention.ops.triton_flash_attention) maybe_register_config_serialize_by_value() (in module vllm.transformers_utils.config) maybe_remap_kv_scale_name() (in module vllm.model_executor.model_loader.weight_utils) maybe_remap_mistral() (vllm.model_executor.models.llama.LlamaForCausalLM method) maybe_save_kv_layer_to_connector() (in module vllm.attention.layer) maybe_serialize_tool_calls() (in module vllm.transformers_utils.tokenizers.mistral) maybe_set_first_scheduled_time() (vllm.sequence.SequenceGroup method) maybe_set_first_token_time() (vllm.sequence.SequenceGroup method) maybe_setup_kv_connector() (vllm.v1.worker.gpu_model_runner.GPUModelRunner static method) maybe_stop_sequence() (vllm.engine.output_processor.stop_checker.StopChecker method) maybe_update_spec_decode_metrics() (vllm.engine.metrics_types.StatLoggerBase method) maybe_wait_for_kv_save() (vllm.v1.worker.gpu_model_runner.GPUModelRunner static method) maybe_warn_marlin_atomic_add() (in module vllm.model_executor.layers.quantization.utils.marlin_utils) maybe_warn_marlin_atomic_add_env() (in module vllm.model_executor.layers.quantization.utils.marlin_utils) maybe_wrap_worker() (vllm.spec_decode.smaller_tp_proposer_worker.SmallerTpProposerWorker class method) MaybeDeferredSampleResultType (in module vllm.model_executor.layers.sampler) MEAN (vllm.model_executor.layers.pooler.PoolingType attribute) mean_e2el_ms (vllm.benchmarks.serve.BenchmarkMetrics attribute) mean_itl_ms (vllm.benchmarks.serve.BenchmarkMetrics attribute) mean_tpot_ms (vllm.benchmarks.serve.BenchmarkMetrics attribute) mean_ttft_ms (vllm.benchmarks.serve.BenchmarkMetrics attribute) MeanPool (class in vllm.model_executor.layers.pooler) MeanVarianceNormLayer (class in vllm.model_executor.models.phi4mm_utils) measure() (vllm.utils.MemorySnapshot method) MediaConnector (class in vllm.multimodal.utils) MediaIO (class in vllm.multimodal.base) median_e2el_ms (vllm.benchmarks.serve.BenchmarkMetrics attribute) median_itl_ms (vllm.benchmarks.serve.BenchmarkMetrics attribute) median_tpot_ms (vllm.benchmarks.serve.BenchmarkMetrics attribute) median_ttft_ms (vllm.benchmarks.serve.BenchmarkMetrics attribute) Medusa (class in vllm.model_executor.models.medusa) MedusaConfig (class in vllm.transformers_utils.configs.medusa) MedusaWorker (class in vllm.spec_decode.medusa_worker) mem_margin (vllm.worker.hpu_model_runner.HPUModelRunnerBase property) memory_dims() (vllm.model_executor.models.phi4mm_utils.AttBlock method) memory_profiling() (in module vllm.utils) MemoryProfilingResult (class in vllm.utils) MemorySnapshot (class in vllm.utils) merge_and_sort_multimodal_metadata() (in module vllm.multimodal.utils) merge_async_iterators() (in module vllm.utils) merge_attn_states() (in module vllm._custom_ops) (in module vllm.attention.ops.merge_attn_states) (in module vllm.attention.ops.triton_merge_attn_states) merge_attn_states_kernel() (in module vllm.attention.ops.triton_merge_attn_states) merge_multimodal_embeddings() (in module vllm.model_executor.models.utils) merge_multimodal_embeddings_from_map() (in module vllm.model_executor.models.utils) MergedColumnParallelLinear (class in vllm.model_executor.layers.linear) MergedColumnParallelLinearWithLoRA (class in vllm.lora.layers) MergedColumnParallelLinearWithShardedLoRA (class in vllm.lora.fully_sharded_layers) MergedQKVParallelLinearWithLoRA (class in vllm.lora.layers) MergedQKVParallelLinearWithShardedLoRA (class in vllm.lora.fully_sharded_layers) message (vllm.entrypoints.openai.protocol.ChatCompletionResponseChoice attribute) (vllm.entrypoints.openai.protocol.ErrorResponse attribute) message_format (vllm.model_executor.models.molmo.MolmoProcessorWrapper property) MessageQueue (class in vllm.distributed.device_communicators.shm_broadcast) messages (vllm.entrypoints.openai.protocol.ChatCompletionRequest attribute) (vllm.entrypoints.openai.protocol.EmbeddingChatRequest attribute) (vllm.entrypoints.openai.protocol.TokenizeChatRequest attribute) meta_args() (vllm.lora.ops.triton_ops.lora_kernel_metadata.LoRAKernelMeta method) meta_size() (in module vllm._custom_ops) MetaData (class in vllm.attention.ops.triton_flash_attention) Metadata (in module vllm.distributed.kv_transfer.kv_pipe.pynccl_pipe) metadata (vllm.model_executor.guided_decoding.xgrammar_decoding.TokenizerData attribute) metadata_backend (vllm.distributed.kv_transfer.kv_pipe.mooncake_pipe.MooncakeTransferEngineConfig attribute) METADATA_DTYPE (vllm.distributed.kv_transfer.kv_pipe.pynccl_pipe.PyNcclPipe attribute) METADATA_LENGTH (vllm.distributed.kv_transfer.kv_pipe.pynccl_pipe.PyNcclPipe attribute) metadata_server (vllm.distributed.kv_transfer.kv_lookup_buffer.mooncake_store.MooncakeStoreConfig attribute) (vllm.distributed.kv_transfer.kv_pipe.mooncake_pipe.MooncakeTransferEngineConfig attribute) method (vllm.config.SpeculativeConfig attribute) (vllm.entrypoints.openai.protocol.BatchRequestInput attribute) method_has_implemented_embedding() (in module vllm.model_executor.layers.quantization.base_config) Metrics (class in vllm.engine.metrics) metrics_context() (vllm.compilation.compiler_interface.InductorAdaptor method) metrics_info() (vllm.config.CacheConfig method) (vllm.config.SupportsMetricsInfo method) MiddleAllReduceRMSNormPattern (class in vllm.compilation.sequence_parallelism) MILLISECONDS_TO_SECONDS_CONVERSION (in module vllm.benchmarks.serve) MiMoForCausalLM (class in vllm.model_executor.models.mimo) MiMoModel (class in vllm.model_executor.models.mimo) MiMoMTP (class in vllm.model_executor.models.mimo_mtp) MiMoMultiTokenPredictor (class in vllm.model_executor.models.mimo_mtp) MiMoMultiTokenPredictorLayer (class in vllm.model_executor.models.mimo_mtp) min() (vllm.scalar_type.ScalarType method) MIN_IPEX_VERSION (in module vllm.model_executor.layers.quantization.ipex_quant) MIN_NUM_SEQS (in module vllm.v1.worker.tpu_model_runner) min_p (vllm.entrypoints.openai.protocol.ChatCompletionRequest attribute) (vllm.entrypoints.openai.protocol.CompletionRequest attribute) (vllm.entrypoints.openai.protocol.TranscriptionRequest attribute) (vllm.sampling_params.SamplingParams attribute) (vllm.v1.sample.metadata.SamplingMetadata attribute) (vllm.v1.sample.tpu.metadata.TPUSupportedSamplingMetadata attribute) min_ps (vllm.model_executor.sampling_metadata.SamplingTensors attribute) min_tokens (vllm.entrypoints.openai.protocol.ChatCompletionRequest attribute) (vllm.entrypoints.openai.protocol.CompletionRequest attribute) (vllm.sampling_params.SamplingParams attribute) (vllm.v1.sample.metadata.SamplingMetadata attribute) (vllm.v1.sample.tpu.metadata.TPUSupportedSamplingMetadata attribute) MiniCPM3Attention (class in vllm.model_executor.models.minicpm3) MiniCPM3DecoderLayer (class in vllm.model_executor.models.minicpm3) MiniCPM3ForCausalLM (class in vllm.model_executor.models.minicpm3) MiniCPM3Model (class in vllm.model_executor.models.minicpm3) MiniCPMAttention (class in vllm.model_executor.models.minicpm) MiniCPMDecoderLayer (class in vllm.model_executor.models.minicpm) MiniCPMForCausalLM (class in vllm.model_executor.models.minicpm) MiniCPMMLP (class in vllm.model_executor.models.minicpm) MiniCPMModel (class in vllm.model_executor.models.minicpm) MiniCPMMoE (class in vllm.model_executor.models.minicpm) MiniCPMO (class in vllm.model_executor.models.minicpmo) MiniCPMOAudioEmbeddingInputs (class in vllm.model_executor.models.minicpmo) MiniCPMOAudioEmbeddingItems (class in vllm.model_executor.models.minicpmo) MiniCPMOAudioFeatureInputs (class in vllm.model_executor.models.minicpmo) MiniCPMOAudioInputs (in module vllm.model_executor.models.minicpmo) MiniCPMODummyInputsBuilder (class in vllm.model_executor.models.minicpmo) MiniCPMOMultiModalDataParser (class in vllm.model_executor.models.minicpmo) MiniCPMOMultiModalProcessor (class in vllm.model_executor.models.minicpmo) MiniCPMOProcessingInfo (class in vllm.model_executor.models.minicpmo) MiniCPMV (class in vllm.model_executor.models.minicpmv) MiniCPMV2_0 (class in vllm.model_executor.models.minicpmv) MiniCPMV2_5 (class in vllm.model_executor.models.minicpmv) MiniCPMV2_6 (class in vllm.model_executor.models.minicpmv) MiniCPMVBaseModel (class in vllm.model_executor.models.minicpmv) MiniCPMVDummyInputsBuilder (class in vllm.model_executor.models.minicpmv) MiniCPMVImageEmbeddingInputs (class in vllm.model_executor.models.minicpmv) MiniCPMVImageEmbeddingItems (class in vllm.model_executor.models.minicpmv) MiniCPMVImageInputs (in module vllm.model_executor.models.minicpmv) MiniCPMVImagePixelInputs (class in vllm.model_executor.models.minicpmv) MiniCPMVMultiModalDataParser (class in vllm.model_executor.models.minicpmv) MiniCPMVMultiModalProcessor (class in vllm.model_executor.models.minicpmv) MiniCPMVProcessingInfo (class in vllm.model_executor.models.minicpmv) MiniCPMVVideoEmbeddingItems (class in vllm.model_executor.models.minicpmv) MiniCPMWhisperEncoder (class in vllm.model_executor.models.minicpmo) MiniCPMWhisperEncoderLayer (class in vllm.model_executor.models.minicpmo) minimax_cache (vllm.model_executor.models.minimax_cache.MinimaxCacheParams attribute) MinimaxCacheManager (class in vllm.model_executor.models.minimax_cache) MinimaxCacheParams (class in vllm.model_executor.models.minimax_cache) MiniMaxText01Attention (class in vllm.model_executor.models.minimax_text_01) MiniMaxText01Config (class in vllm.transformers_utils.configs.minimax_text_01) MiniMaxText01DecoderLayer (class in vllm.model_executor.models.minimax_text_01) MiniMaxText01ForCausalLM (class in vllm.model_executor.models.minimax_text_01) MiniMaxText01LinearAttention (class in vllm.model_executor.models.minimax_text_01) MiniMaxText01LinearKernel (class in vllm.model_executor.models.minimax_text_01) MiniMaxText01MLP (class in vllm.model_executor.models.minimax_text_01) MiniMaxText01Model (class in vllm.model_executor.models.minimax_text_01) MiniMaxText01MoE (class in vllm.model_executor.models.minimax_text_01) MiniMaxText01RMSNormTP (class in vllm.model_executor.models.minimax_text_01) MiniMaxText01RotaryEmbedding (class in vllm.model_executor.models.minimax_text_01) MiniMaxVL01Config (class in vllm.transformers_utils.configs.minimax_vl_01) MiniMaxVL01DummyInputsBuilder (class in vllm.model_executor.models.minimax_vl_01) MiniMaxVL01ForConditionalGeneration (class in vllm.model_executor.models.minimax_vl_01) MiniMaxVL01ImageEmbeddingInputs (class in vllm.model_executor.models.minimax_vl_01) MiniMaxVL01ImageInputs (in module vllm.model_executor.models.minimax_vl_01) MiniMaxVL01ImagePixelInputs (class in vllm.model_executor.models.minimax_vl_01) MiniMaxVL01MultiModalProcessor (class in vllm.model_executor.models.minimax_vl_01) MiniMaxVL01MultiModalProjector (class in vllm.model_executor.models.minimax_vl_01) MiniMaxVL01ProcessingInfo (class in vllm.model_executor.models.minimax_vl_01) MINIMUM_BITBLAS_VERSION (in module vllm.model_executor.layers.quantization.utils.bitblas_utils) minor (vllm.platforms.interface.DeviceCapability attribute) MirroredProcessingCache (class in vllm.v1.engine.mm_input_cache) MISTRAL (vllm.config.LoadFormat attribute) (vllm.transformers_utils.config.ConfigFormat attribute) Mistral3DummyInputsBuilder (class in vllm.model_executor.models.mistral3) Mistral3ForConditionalGeneration (class in vllm.model_executor.models.mistral3) Mistral3ImagePixelInputs (class in vllm.model_executor.models.mistral3) Mistral3MultiModalProcessor (class in vllm.model_executor.models.mistral3) Mistral3MultiModalProjector (class in vllm.model_executor.models.mistral3) Mistral3PatchMerger (class in vllm.model_executor.models.mistral3) Mistral3ProcessingInfo (class in vllm.model_executor.models.mistral3) MISTRAL_CONFIG_NAME (in module vllm.transformers_utils.config) mistral_mapping (vllm.model_executor.models.llama.LlamaForCausalLM attribute) (vllm.model_executor.models.nemotron_nas.DeciLMForCausalLM attribute) MistralTokenizer (class in vllm.transformers_utils.tokenizers.mistral) MistralToolCall (class in vllm.entrypoints.openai.tool_parsers.mistral_tool_parser) MistralToolParser (class in vllm.entrypoints.openai.tool_parsers.mistral_tool_parser) MIXED (vllm.worker.hpu_model_runner.BatchType attribute) Mixer2RMSNormGated (class in vllm.model_executor.layers.mamba.mamba_mixer2) MixtralAttention (class in vllm.model_executor.models.mixtral) (class in vllm.model_executor.models.mixtral_quant) MixtralDecoderLayer (class in vllm.model_executor.models.mixtral) (class in vllm.model_executor.models.mixtral_quant) MixtralForCausalLM (class in vllm.model_executor.models.mixtral) (class in vllm.model_executor.models.mixtral_quant) MixtralMLP (class in vllm.model_executor.models.mixtral_quant) MixtralModel (class in vllm.model_executor.models.mixtral) (class in vllm.model_executor.models.mixtral_quant) MixtralMoE (class in vllm.model_executor.models.mixtral) (class in vllm.model_executor.models.mixtral_quant) mla_decode_fwd_fake() (in module vllm.attention.ops.rocm_aiter_mla) mla_decode_fwd_impl() (in module vllm.attention.ops.rocm_aiter_mla) mla_decode_kvcache_cpu() (in module vllm._custom_ops) MLAAttentionImpl (class in vllm.attention.backends.abstract) MLACommonBackend (class in vllm.attention.backends.mla.common) (class in vllm.v1.attention.backends.mla.common) MLACommonDecodeMetadata (class in vllm.v1.attention.backends.mla.common) MLACommonImpl (class in vllm.attention.backends.mla.common) (class in vllm.v1.attention.backends.mla.common) MLACommonMetadata (class in vllm.attention.backends.mla.common) (class in vllm.v1.attention.backends.mla.common) MLACommonMetadataBuilder (class in vllm.attention.backends.mla.common) (class in vllm.v1.attention.backends.mla.common) MLACommonPrefillMetadata (class in vllm.v1.attention.backends.mla.common) MLACommonPrefillMetadata.ChunkedContextMetadata (class in vllm.v1.attention.backends.mla.common) MLACommonState (class in vllm.attention.backends.mla.common) MLADims (class in vllm.attention.backends.utils) Mllama4DummyInputsBuilder (class in vllm.model_executor.models.mllama4) Mllama4MultiModalProcessor (class in vllm.model_executor.models.mllama4) Mllama4ProcessingInfo (class in vllm.model_executor.models.mllama4) MllamaConfig (class in vllm.transformers_utils.configs.mllama) MllamaCrossAttentionDecoderLayer (class in vllm.model_executor.models.mllama) MllamaDummyInputsBuilder (class in vllm.model_executor.models.mllama) MllamaForCausalLM (class in vllm.model_executor.models.mllama) MllamaForConditionalGeneration (class in vllm.model_executor.models.mllama) MllamaImagePixelInputs (class in vllm.model_executor.models.mllama) MllamaMultiModalProcessor (class in vllm.model_executor.models.mllama) MllamaPrecomputedAspectRatioEmbedding (class in vllm.model_executor.models.mllama) MllamaPrecomputedPositionEmbedding (class in vllm.model_executor.models.mllama) MllamaProcessingInfo (class in vllm.model_executor.models.mllama) MllamaTextConfig (class in vllm.transformers_utils.configs.mllama) MllamaTextCrossAttention (class in vllm.model_executor.models.mllama) MllamaTextModel (class in vllm.model_executor.models.mllama) MllamaTextRMSNorm (class in vllm.model_executor.models.mllama) MllamaVisionEncoder (class in vllm.model_executor.models.mllama) MllamaVisionEncoderLayer (class in vllm.model_executor.models.mllama) MllamaVisionModel (class in vllm.model_executor.models.mllama) MllamaVisionSdpaAttention (class in vllm.model_executor.models.mllama) Mlp (class in vllm.model_executor.models.florence2) mlp (vllm.model_executor.models.module_mapping.ModelKeys attribute) MLP2 (class in vllm.model_executor.models.moonvit) mlp_ratio (vllm.transformers_utils.configs.deepseek_vl2.MlpProjectorConfig attribute) (vllm.transformers_utils.configs.deepseek_vl2.VisionEncoderConfig attribute) MlpProjector (class in vllm.model_executor.models.deepseek_vl2) MlpProjectorConfig (class in vllm.transformers_utils.configs.deepseek_vl2) MLPSpeculator (class in vllm.model_executor.models.mlp_speculator) MLPSpeculatorConfig (class in vllm.transformers_utils.configs.mlp_speculator) MLPSpeculatorLayerNorm (class in vllm.model_executor.models.mlp_speculator) MLPSpeculatorWorker (class in vllm.spec_decode.mlp_speculator_worker) mm_data (vllm.multimodal.profiling.ProcessorInputs attribute) mm_hashes (vllm.multimodal.inputs.MultiModalInputs attribute) (vllm.v1.core.sched.output.NewRequestData attribute) (vllm.v1.engine.EngineCoreRequest attribute) mm_inputs (vllm.v1.core.sched.output.NewRequestData attribute) (vllm.v1.engine.EngineCoreRequest attribute) (vllm.v1.worker.gpu_input_batch.CachedRequestState attribute) mm_k() (in module vllm.lora.ops.triton_ops.kernel_utils) mm_kwargs (vllm.multimodal.inputs.MultiModalInputs attribute) MM_PARSER_MAP (in module vllm.entrypoints.chat_utils) mm_placeholder_counts() (vllm.entrypoints.chat_utils.BaseMultiModalContentParser method) mm_placeholders (vllm.multimodal.inputs.MultiModalInputs attribute) (vllm.v1.engine.EngineCoreRequest attribute) mm_positions (vllm.v1.core.sched.output.NewRequestData attribute) (vllm.v1.worker.gpu_input_batch.CachedRequestState attribute) mm_processor_kwargs (vllm.beam_search.BeamSearchSequence attribute) (vllm.config.ModelConfig attribute) (vllm.config.MultiModalConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) (vllm.entrypoints.openai.protocol.ChatCompletionRequest attribute) (vllm.entrypoints.openai.protocol.EmbeddingChatRequest attribute) (vllm.entrypoints.openai.protocol.TokenizeChatRequest attribute) (vllm.inputs.data.ExplicitEncoderDecoderPrompt attribute) (vllm.inputs.data.TextPrompt attribute) (vllm.inputs.data.TokensPrompt attribute) mm_projector_id (vllm.model_executor.models.pixtral.VisionEncoderArgs attribute) mm_registry (vllm.entrypoints.chat_utils.BaseMultiModalItemTracker property) (vllm.v1.engine.processor.Processor property) MMF_CLASS_TO_FACTORY (in module vllm.v1.serial_utils) MMQ_QUANT_TYPES (in module vllm.model_executor.layers.quantization.gguf) MMVQ_QUANT_TYPES (in module vllm.model_executor.layers.quantization.gguf) modalities (vllm.multimodal.inputs.MultiModalKwargs property) modality (vllm.multimodal.inputs.MultiModalFieldElem attribute) (vllm.multimodal.inputs.MultiModalKwargsItem property) (vllm.multimodal.processing.BoundPromptUpdate property) (vllm.multimodal.processing.PlaceholderFeaturesInfo attribute) (vllm.multimodal.processing.PromptTargetMatch property) (vllm.multimodal.processing.PromptUpdate attribute) ModalityData (in module vllm.multimodal.inputs) ModalityDataItems (class in vllm.multimodal.parse) ModalityDataParser (in module vllm.multimodal.parse) ModalityStr (in module vllm.entrypoints.chat_utils) mode (vllm.multimodal.processing.BoundPromptUpdate property) (vllm.multimodal.processing.PromptInsertion property) (vllm.multimodal.processing.PromptReplacement property) (vllm.multimodal.processing.PromptUpdate property) model (vllm.benchmarks.endpoint_request_func.RequestFuncInput attribute) (vllm.config.ModelConfig attribute) (vllm.config.SpeculativeConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) (vllm.entrypoints.openai.protocol.ChatCompletionRequest attribute) (vllm.entrypoints.openai.protocol.ChatCompletionResponse attribute) (vllm.entrypoints.openai.protocol.ChatCompletionStreamResponse attribute) (vllm.entrypoints.openai.protocol.ClassificationRequest attribute) (vllm.entrypoints.openai.protocol.ClassificationResponse attribute) (vllm.entrypoints.openai.protocol.CompletionRequest attribute) (vllm.entrypoints.openai.protocol.CompletionResponse attribute) (vllm.entrypoints.openai.protocol.CompletionStreamResponse attribute) (vllm.entrypoints.openai.protocol.DetokenizeRequest attribute) (vllm.entrypoints.openai.protocol.EmbeddingChatRequest attribute) (vllm.entrypoints.openai.protocol.EmbeddingCompletionRequest attribute) (vllm.entrypoints.openai.protocol.EmbeddingResponse attribute) (vllm.entrypoints.openai.protocol.PoolingResponse attribute) (vllm.entrypoints.openai.protocol.RerankRequest attribute) (vllm.entrypoints.openai.protocol.RerankResponse attribute) (vllm.entrypoints.openai.protocol.ScoreRequest attribute) (vllm.entrypoints.openai.protocol.ScoreResponse attribute) (vllm.entrypoints.openai.protocol.TokenizeChatRequest attribute) (vllm.entrypoints.openai.protocol.TokenizeCompletionRequest attribute) (vllm.entrypoints.openai.protocol.TranscriptionRequest attribute) (vllm.entrypoints.openai.protocol.TranscriptionStreamResponse attribute) model_aware_kv_ops_helper (class in vllm.distributed.kv_transfer.kv_connector.utils) model_class (vllm.model_executor.model_loader.tensorizer.TensorizerConfig attribute) model_config (vllm.config.VllmConfig attribute) (vllm.entrypoints.chat_utils.BaseMultiModalItemTracker property) (vllm.entrypoints.openai.protocol.OpenAIBaseModel attribute) (vllm.entrypoints.openai.serving_engine.RequestProcessingMixin attribute) (vllm.entrypoints.openai.serving_engine.ResponseGenerationMixin attribute) (vllm.entrypoints.openai.serving_engine.ServeContext attribute) (vllm.inputs.registry.InputContext attribute) (vllm.model_executor.layers.quantization.schema.QuantParamSchema attribute) model_execute_duration_s (vllm.v1.stats.common.RequestStats attribute) model_execute_time (vllm.model_executor.layers.sampler.SamplerOutput attribute) (vllm.sequence.RequestMetrics attribute) model_execute_time_requests (vllm.engine.metrics_types.Stats attribute) model_forward_duration_s (vllm.v1.stats.common.RequestStats attribute) model_forward_time (vllm.model_executor.layers.sampler.SamplerOutput attribute) (vllm.sequence.RequestMetrics attribute) model_forward_time_requests (vllm.engine.metrics_types.Stats attribute) model_id (vllm.multimodal.processing.BaseProcessingInfo property) model_impl (vllm.config.ModelConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) model_input (vllm.worker.multi_step_worker.MultiStepState attribute) model_input_names (vllm.transformers_utils.processors.ovis.OvisProcessor property) model_loader_extra_config (vllm.config.LoadConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) model_name (vllm.benchmarks.endpoint_request_func.RequestFuncInput attribute) (vllm.entrypoints.openai.serving_engine.ServeContext attribute) (vllm.transformers_utils.configs.deepseek_vl2.VisionEncoderConfig attribute) model_name() (vllm.entrypoints.openai.serving_models.OpenAIServingModels method) model_or_path (vllm.model_executor.model_loader.default_loader.DefaultModelLoader.Source attribute) model_parallel_is_initialized() (in module vllm.distributed.parallel_state) model_path (vllm.entrypoints.openai.serving_models.BaseModelPath attribute) model_runner (vllm.worker.neuron_worker.NeuronWorker attribute) (vllm.worker.worker_base.LocalOrDistributedWorkerBase attribute) model_type (vllm.model_executor.layers.quantization.schema.QuantParamSchema attribute) (vllm.model_executor.models.module_mapping.ModelKeys attribute) (vllm.model_executor.models.moonvit.MoonVitPretrainedModel attribute) (vllm.model_executor.models.phimoe.PhiMoEConfig attribute) (vllm.model_executor.models.plamo2.Plamo2Config attribute) (vllm.transformers_utils.configs.arctic.ArcticConfig attribute) (vllm.transformers_utils.configs.chatglm.ChatGLMConfig attribute) (vllm.transformers_utils.configs.cohere2.Cohere2Config attribute) (vllm.transformers_utils.configs.dbrx.DbrxConfig attribute) (vllm.transformers_utils.configs.deepseek_vl2.DeepseekV2Config attribute) (vllm.transformers_utils.configs.deepseek_vl2.DeepseekVLV2Config attribute) (vllm.transformers_utils.configs.deepseek_vl2.MlpProjectorConfig attribute) (vllm.transformers_utils.configs.deepseek_vl2.VisionEncoderConfig attribute) (vllm.transformers_utils.configs.eagle.EAGLEConfig attribute) (vllm.transformers_utils.configs.exaone.ExaoneConfig attribute) (vllm.transformers_utils.configs.falcon.RWConfig attribute) (vllm.transformers_utils.configs.h2ovl.H2OVLChatConfig attribute) (vllm.transformers_utils.configs.internvl.InternVLChatConfig attribute) (vllm.transformers_utils.configs.jais.JAISConfig attribute) (vllm.transformers_utils.configs.kimi_vl.KimiVLConfig attribute) (vllm.transformers_utils.configs.medusa.MedusaConfig attribute) (vllm.transformers_utils.configs.minimax_text_01.MiniMaxText01Config attribute) (vllm.transformers_utils.configs.minimax_vl_01.MiniMaxVL01Config attribute) (vllm.transformers_utils.configs.mlp_speculator.MLPSpeculatorConfig attribute) (vllm.transformers_utils.configs.moonvit.MoonViTConfig attribute) (vllm.transformers_utils.configs.mpt.MPTConfig attribute) (vllm.transformers_utils.configs.nemotron.NemotronConfig attribute) (vllm.transformers_utils.configs.nvlm_d.NVLM_D_Config attribute) (vllm.transformers_utils.configs.ovis.AIMv2Config attribute) (vllm.transformers_utils.configs.ovis.Aimv2VisualTokenizerConfig attribute) (vllm.transformers_utils.configs.ovis.OvisConfig attribute) (vllm.transformers_utils.configs.ovis.SiglipVisualTokenizerConfig attribute) (vllm.transformers_utils.configs.skyworkr1v.SkyworkR1VChatConfig attribute) (vllm.transformers_utils.configs.solar.SolarConfig attribute) (vllm.transformers_utils.configs.telechat2.Telechat2Config attribute) (vllm.transformers_utils.configs.ultravox.UltravoxConfig attribute) MODEL_WEIGHTS_S3_BUCKET (in module vllm.test_utils) ModelCard (class in vllm.entrypoints.openai.protocol) ModelConfig (class in vllm.config) ModelDType (in module vllm.config) ModelImpl (class in vllm.config) ModelInputForCPU (class in vllm.worker.cpu_model_runner) ModelInputForCPUBuilder (class in vllm.worker.cpu_model_runner) ModelInputForCPUBuilder.ModelInputData (class in vllm.worker.cpu_model_runner) ModelInputForCPUWithPoolingMetadata (class in vllm.worker.cpu_pooling_model_runner) ModelInputForCPUWithSamplingMetadata (class in vllm.worker.cpu_model_runner) ModelInputForGPU (class in vllm.worker.model_runner) ModelInputForGPUBuilder (class in vllm.worker.model_runner) ModelInputForGPUBuilder.InterDataForSeqGroup (class in vllm.worker.model_runner) ModelInputForGPUWithPoolingMetadata (class in vllm.worker.pooling_model_runner) ModelInputForGPUWithSamplingMetadata (class in vllm.worker.model_runner) ModelInputForHPU (class in vllm.worker.hpu_model_runner) ModelInputForHPUWithSamplingMetadata (class in vllm.worker.hpu_model_runner) ModelInputForNeuron (class in vllm.worker.neuron_model_runner) ModelInputForTPU (class in vllm.worker.tpu_model_runner) ModelInputForXPU (class in vllm.worker.xpu_model_runner) ModelInputForXPUBuilder (class in vllm.worker.xpu_model_runner) ModelInputForXPUWithSamplingMetadata (class in vllm.worker.xpu_model_runner) ModelKeys (class in vllm.model_executor.models.module_mapping) ModelList (class in vllm.entrypoints.openai.protocol) ModelOptFp8Config (class in vllm.model_executor.layers.quantization.modelopt) ModelOptFp8KVCacheMethod (class in vllm.model_executor.layers.quantization.modelopt) ModelOptFp8LinearMethod (class in vllm.model_executor.layers.quantization.modelopt) ModelOptNvFp4Config (class in vllm.model_executor.layers.quantization.modelopt) ModelOptNvFp4FusedMoE (class in vllm.model_executor.layers.quantization.modelopt) ModelOptNvFp4LinearMethod (class in vllm.model_executor.layers.quantization.modelopt) ModelOutput (class in vllm.worker.multi_step_model_runner) ModelPermission (class in vllm.entrypoints.openai.protocol) ModelRegistry (in module vllm.model_executor.models.registry) ModelRunner (class in vllm.worker.model_runner) ModelRunnerBase (class in vllm.worker.model_runner_base) ModelRunnerInputBase (class in vllm.worker.model_runner_base) ModelRunnerInputBuilderBase (class in vllm.worker.model_runner_base) ModelRunnerOutput (class in vllm.v1.outputs) ModelRunnerWrapperBase (class in vllm.worker.model_runner_base) models() (in module vllm.entrypoints.openai.api_server) MODELS_ON_S3 (in module vllm.test_utils) modelscope_list_repo_files() (in module vllm.transformers_utils.utils) ModelStatsEntry (class in vllm.profiler.layerwise_profile) ModelWeightParameter (class in vllm.model_executor.parameter) ModelWrapper (class in vllm.worker.tpu_model_runner) ModernBertAttention (class in vllm.model_executor.models.modernbert) ModernBertEmbeddings (class in vllm.model_executor.models.modernbert) ModernBertEncoderLayer (class in vllm.model_executor.models.modernbert) ModernBertForSequenceClassification (class in vllm.model_executor.models.modernbert) ModernBertLayer (class in vllm.model_executor.models.modernbert) ModernBertMLP (class in vllm.model_executor.models.modernbert) ModernBertModel (class in vllm.model_executor.models.modernbert) ModernBertPooler (class in vllm.model_executor.models.modernbert) ModernBertRotaryEmbedding (class in vllm.model_executor.models.modernbert) ModifiedWhisperEncoder (class in vllm.model_executor.models.ultravox) modify_decoder_layer() (in module vllm.worker.hpu_model_runner) module vllm vllm._custom_ops vllm._ipex_ops vllm.adapter_commons vllm.adapter_commons.layers vllm.adapter_commons.models vllm.adapter_commons.request vllm.adapter_commons.utils vllm.adapter_commons.worker_manager vllm.assets vllm.assets.audio vllm.assets.base vllm.assets.image vllm.assets.video vllm.attention vllm.attention.backends vllm.attention.backends.abstract vllm.attention.backends.blocksparse_attn vllm.attention.backends.cpu_mla vllm.attention.backends.dual_chunk_flash_attn vllm.attention.backends.flash_attn vllm.attention.backends.flashinfer vllm.attention.backends.flashmla vllm.attention.backends.hpu_attn vllm.attention.backends.ipex_attn vllm.attention.backends.mla vllm.attention.backends.mla.common vllm.attention.backends.pallas vllm.attention.backends.placeholder_attn vllm.attention.backends.rocm_aiter_mla vllm.attention.backends.rocm_flash_attn vllm.attention.backends.torch_sdpa vllm.attention.backends.triton_mla vllm.attention.backends.utils vllm.attention.backends.xformers vllm.attention.layer vllm.attention.ops vllm.attention.ops.blocksparse_attention vllm.attention.ops.blocksparse_attention.blocksparse_attention_kernel vllm.attention.ops.blocksparse_attention.interface vllm.attention.ops.blocksparse_attention.utils vllm.attention.ops.chunked_prefill_paged_decode vllm.attention.ops.flashmla vllm.attention.ops.hpu_paged_attn vllm.attention.ops.ipex_attn vllm.attention.ops.merge_attn_states vllm.attention.ops.nki_flash_attn vllm.attention.ops.paged_attn vllm.attention.ops.prefix_prefill vllm.attention.ops.rocm_aiter_mla vllm.attention.ops.rocm_aiter_paged_attn vllm.attention.ops.triton_decode_attention vllm.attention.ops.triton_flash_attention vllm.attention.ops.triton_merge_attn_states vllm.attention.ops.triton_unified_attention vllm.attention.selector vllm.beam_search vllm.benchmarks vllm.benchmarks.datasets vllm.benchmarks.endpoint_request_func vllm.benchmarks.latency vllm.benchmarks.serve vllm.benchmarks.throughput vllm.benchmarks.utils vllm.collect_env vllm.compilation vllm.compilation.activation_quant_fusion vllm.compilation.backends vllm.compilation.compiler_interface vllm.compilation.counter vllm.compilation.decorators vllm.compilation.fix_functionalization vllm.compilation.fusion vllm.compilation.fx_utils vllm.compilation.inductor_pass vllm.compilation.monitor vllm.compilation.multi_output_match vllm.compilation.noop_elimination vllm.compilation.pass_manager vllm.compilation.sequence_parallelism vllm.compilation.torch25_custom_graph_pass vllm.compilation.vllm_inductor_pass vllm.compilation.wrapper vllm.config vllm.connections vllm.core vllm.core.block vllm.core.block.block_table vllm.core.block.common vllm.core.block.cpu_gpu_block_allocator vllm.core.block.interfaces vllm.core.block.naive_block vllm.core.block.prefix_caching_block vllm.core.block.utils vllm.core.block_manager vllm.core.evictor vllm.core.interfaces vllm.core.placeholder_block_space_manager vllm.core.scheduler vllm.device_allocator vllm.device_allocator.cumem vllm.distributed vllm.distributed.communication_op vllm.distributed.device_communicators vllm.distributed.device_communicators.base_device_communicator vllm.distributed.device_communicators.cpu_communicator vllm.distributed.device_communicators.cuda_communicator vllm.distributed.device_communicators.cuda_wrapper vllm.distributed.device_communicators.custom_all_reduce vllm.distributed.device_communicators.custom_all_reduce_utils vllm.distributed.device_communicators.hpu_communicator vllm.distributed.device_communicators.neuron_communicator vllm.distributed.device_communicators.pynccl vllm.distributed.device_communicators.pynccl_wrapper vllm.distributed.device_communicators.shm_broadcast vllm.distributed.device_communicators.tpu_communicator vllm.distributed.device_communicators.xpu_communicator vllm.distributed.kv_events vllm.distributed.kv_transfer vllm.distributed.kv_transfer.kv_connector vllm.distributed.kv_transfer.kv_connector.base vllm.distributed.kv_transfer.kv_connector.factory vllm.distributed.kv_transfer.kv_connector.lmcache_connector vllm.distributed.kv_transfer.kv_connector.mooncake_store_connector vllm.distributed.kv_transfer.kv_connector.simple_connector vllm.distributed.kv_transfer.kv_connector.utils vllm.distributed.kv_transfer.kv_connector.v1 vllm.distributed.kv_transfer.kv_connector.v1.base vllm.distributed.kv_transfer.kv_connector.v1.lmcache_connector vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector vllm.distributed.kv_transfer.kv_connector.v1.shared_storage_connector vllm.distributed.kv_transfer.kv_connector_agent vllm.distributed.kv_transfer.kv_lookup_buffer vllm.distributed.kv_transfer.kv_lookup_buffer.base vllm.distributed.kv_transfer.kv_lookup_buffer.mooncake_store vllm.distributed.kv_transfer.kv_lookup_buffer.simple_buffer vllm.distributed.kv_transfer.kv_pipe vllm.distributed.kv_transfer.kv_pipe.base vllm.distributed.kv_transfer.kv_pipe.mooncake_pipe vllm.distributed.kv_transfer.kv_pipe.pynccl_pipe vllm.distributed.kv_transfer.kv_transfer_state vllm.distributed.parallel_state vllm.distributed.utils vllm.engine vllm.engine.arg_utils vllm.engine.async_llm_engine vllm.engine.async_timeout vllm.engine.llm_engine vllm.engine.metrics vllm.engine.metrics_types vllm.engine.multiprocessing vllm.engine.multiprocessing.client vllm.engine.multiprocessing.engine vllm.engine.output_processor vllm.engine.output_processor.interfaces vllm.engine.output_processor.multi_step vllm.engine.output_processor.single_step vllm.engine.output_processor.stop_checker vllm.engine.output_processor.util vllm.engine.protocol vllm.entrypoints vllm.entrypoints.api_server vllm.entrypoints.chat_utils vllm.entrypoints.cli vllm.entrypoints.cli.benchmark vllm.entrypoints.cli.benchmark.base vllm.entrypoints.cli.benchmark.latency vllm.entrypoints.cli.benchmark.main vllm.entrypoints.cli.benchmark.serve vllm.entrypoints.cli.benchmark.throughput vllm.entrypoints.cli.collect_env vllm.entrypoints.cli.main vllm.entrypoints.cli.openai vllm.entrypoints.cli.serve vllm.entrypoints.cli.types vllm.entrypoints.launcher vllm.entrypoints.llm vllm.entrypoints.logger vllm.entrypoints.openai vllm.entrypoints.openai.api_server vllm.entrypoints.openai.cli_args vllm.entrypoints.openai.logits_processors vllm.entrypoints.openai.protocol vllm.entrypoints.openai.run_batch vllm.entrypoints.openai.serving_chat vllm.entrypoints.openai.serving_classification vllm.entrypoints.openai.serving_completion vllm.entrypoints.openai.serving_embedding vllm.entrypoints.openai.serving_engine vllm.entrypoints.openai.serving_models vllm.entrypoints.openai.serving_pooling vllm.entrypoints.openai.serving_score vllm.entrypoints.openai.serving_tokenization vllm.entrypoints.openai.serving_transcription vllm.entrypoints.openai.tool_parsers vllm.entrypoints.openai.tool_parsers.abstract_tool_parser vllm.entrypoints.openai.tool_parsers.deepseekv3_tool_parser vllm.entrypoints.openai.tool_parsers.granite_20b_fc_tool_parser vllm.entrypoints.openai.tool_parsers.granite_tool_parser vllm.entrypoints.openai.tool_parsers.hermes_tool_parser vllm.entrypoints.openai.tool_parsers.internlm2_tool_parser vllm.entrypoints.openai.tool_parsers.jamba_tool_parser vllm.entrypoints.openai.tool_parsers.llama_tool_parser vllm.entrypoints.openai.tool_parsers.mistral_tool_parser vllm.entrypoints.openai.tool_parsers.phi4mini_tool_parser vllm.entrypoints.openai.tool_parsers.pythonic_tool_parser vllm.entrypoints.openai.tool_parsers.utils vllm.entrypoints.score_utils vllm.entrypoints.ssl vllm.entrypoints.utils vllm.env_override vllm.envs vllm.executor vllm.executor.executor_base vllm.executor.mp_distributed_executor vllm.executor.msgspec_utils vllm.executor.multiproc_worker_utils vllm.executor.ray_distributed_executor vllm.executor.ray_utils vllm.executor.uniproc_executor vllm.forward_context vllm.inputs vllm.inputs.data vllm.inputs.parse vllm.inputs.preprocess vllm.inputs.registry vllm.jsontree vllm.logger vllm.logging_utils vllm.logging_utils.dump_input vllm.logging_utils.formatter vllm.logits_process vllm.lora vllm.lora.fully_sharded_layers vllm.lora.layers vllm.lora.lora vllm.lora.models vllm.lora.ops vllm.lora.ops.torch_ops vllm.lora.ops.torch_ops.lora_ops vllm.lora.ops.triton_ops vllm.lora.ops.triton_ops.kernel_utils vllm.lora.ops.triton_ops.lora_expand_op vllm.lora.ops.triton_ops.lora_kernel_metadata vllm.lora.ops.triton_ops.lora_shrink_op vllm.lora.ops.triton_ops.utils vllm.lora.ops.xla_ops vllm.lora.ops.xla_ops.lora_ops vllm.lora.ops.xla_ops.pallas vllm.lora.peft_helper vllm.lora.punica_wrapper vllm.lora.punica_wrapper.punica_base vllm.lora.punica_wrapper.punica_cpu vllm.lora.punica_wrapper.punica_gpu vllm.lora.punica_wrapper.punica_hpu vllm.lora.punica_wrapper.punica_selector vllm.lora.punica_wrapper.punica_tpu vllm.lora.punica_wrapper.utils vllm.lora.request vllm.lora.resolver vllm.lora.utils vllm.lora.worker_manager vllm.model_executor vllm.model_executor.custom_op vllm.model_executor.guided_decoding vllm.model_executor.guided_decoding.guidance_decoding vllm.model_executor.guided_decoding.guidance_logits_processors vllm.model_executor.guided_decoding.guided_fields vllm.model_executor.guided_decoding.lm_format_enforcer_decoding vllm.model_executor.guided_decoding.outlines_decoding vllm.model_executor.guided_decoding.outlines_logits_processors vllm.model_executor.guided_decoding.utils vllm.model_executor.guided_decoding.xgrammar_decoding vllm.model_executor.layers vllm.model_executor.layers.activation vllm.model_executor.layers.fused_moe vllm.model_executor.layers.fused_moe.cutlass_moe vllm.model_executor.layers.fused_moe.deep_gemm_moe vllm.model_executor.layers.fused_moe.fused_marlin_moe vllm.model_executor.layers.fused_moe.fused_moe vllm.model_executor.layers.fused_moe.layer vllm.model_executor.layers.fused_moe.moe_align_block_size vllm.model_executor.layers.fused_moe.moe_pallas vllm.model_executor.layers.fused_moe.moe_permute_unpermute vllm.model_executor.layers.fused_moe.moe_torch_iterative vllm.model_executor.layers.fused_moe.rocm_aiter_fused_moe vllm.model_executor.layers.fused_moe.utils vllm.model_executor.layers.layernorm vllm.model_executor.layers.lightning_attn vllm.model_executor.layers.linear vllm.model_executor.layers.logits_processor vllm.model_executor.layers.mamba vllm.model_executor.layers.mamba.mamba2_metadata vllm.model_executor.layers.mamba.mamba_mixer vllm.model_executor.layers.mamba.mamba_mixer2 vllm.model_executor.layers.mamba.ops vllm.model_executor.layers.mamba.ops.causal_conv1d vllm.model_executor.layers.mamba.ops.mamba_ssm vllm.model_executor.layers.mamba.ops.ssd_bmm vllm.model_executor.layers.mamba.ops.ssd_chunk_scan vllm.model_executor.layers.mamba.ops.ssd_chunk_state vllm.model_executor.layers.mamba.ops.ssd_combined vllm.model_executor.layers.mamba.ops.ssd_state_passing vllm.model_executor.layers.pooler vllm.model_executor.layers.quantization vllm.model_executor.layers.quantization.aqlm vllm.model_executor.layers.quantization.awq vllm.model_executor.layers.quantization.awq_marlin vllm.model_executor.layers.quantization.awq_triton vllm.model_executor.layers.quantization.base_config vllm.model_executor.layers.quantization.bitblas vllm.model_executor.layers.quantization.bitsandbytes vllm.model_executor.layers.quantization.compressed_tensors vllm.model_executor.layers.quantization.compressed_tensors.compressed_tensors vllm.model_executor.layers.quantization.compressed_tensors.compressed_tensors_moe vllm.model_executor.layers.quantization.compressed_tensors.schemes vllm.model_executor.layers.quantization.compressed_tensors.schemes.compressed_tensors_24 vllm.model_executor.layers.quantization.compressed_tensors.schemes.compressed_tensors_scheme vllm.model_executor.layers.quantization.compressed_tensors.schemes.compressed_tensors_w4a16_24 vllm.model_executor.layers.quantization.compressed_tensors.schemes.compressed_tensors_w4a16_nvfp4 vllm.model_executor.layers.quantization.compressed_tensors.schemes.compressed_tensors_w8a16_fp8 vllm.model_executor.layers.quantization.compressed_tensors.schemes.compressed_tensors_w8a8_fp8 vllm.model_executor.layers.quantization.compressed_tensors.schemes.compressed_tensors_w8a8_int8 vllm.model_executor.layers.quantization.compressed_tensors.schemes.compressed_tensors_wNa16 vllm.model_executor.layers.quantization.compressed_tensors.triton_scaled_mm vllm.model_executor.layers.quantization.compressed_tensors.utils vllm.model_executor.layers.quantization.deepspeedfp vllm.model_executor.layers.quantization.experts_int8 vllm.model_executor.layers.quantization.fbgemm_fp8 vllm.model_executor.layers.quantization.fp8 vllm.model_executor.layers.quantization.gguf vllm.model_executor.layers.quantization.gptq vllm.model_executor.layers.quantization.gptq_bitblas vllm.model_executor.layers.quantization.gptq_marlin vllm.model_executor.layers.quantization.gptq_marlin_24 vllm.model_executor.layers.quantization.hqq_marlin vllm.model_executor.layers.quantization.ipex_quant vllm.model_executor.layers.quantization.kernels vllm.model_executor.layers.quantization.kernels.mixed_precision vllm.model_executor.layers.quantization.kernels.mixed_precision.allspark vllm.model_executor.layers.quantization.kernels.mixed_precision.bitblas vllm.model_executor.layers.quantization.kernels.mixed_precision.exllama vllm.model_executor.layers.quantization.kernels.mixed_precision.machete vllm.model_executor.layers.quantization.kernels.mixed_precision.marlin vllm.model_executor.layers.quantization.kernels.mixed_precision.MPLinearKernel vllm.model_executor.layers.quantization.kernels.scaled_mm vllm.model_executor.layers.quantization.kernels.scaled_mm.aiter vllm.model_executor.layers.quantization.kernels.scaled_mm.cutlass vllm.model_executor.layers.quantization.kernels.scaled_mm.ScaledMMLinearKernel vllm.model_executor.layers.quantization.kernels.scaled_mm.triton vllm.model_executor.layers.quantization.kernels.scaled_mm.xla vllm.model_executor.layers.quantization.kv_cache vllm.model_executor.layers.quantization.marlin vllm.model_executor.layers.quantization.modelopt vllm.model_executor.layers.quantization.moe_wna16 vllm.model_executor.layers.quantization.neuron_quant vllm.model_executor.layers.quantization.ptpc_fp8 vllm.model_executor.layers.quantization.qqq vllm.model_executor.layers.quantization.quark vllm.model_executor.layers.quantization.quark.quark vllm.model_executor.layers.quantization.quark.quark_moe vllm.model_executor.layers.quantization.quark.schemes vllm.model_executor.layers.quantization.quark.schemes.quark_scheme vllm.model_executor.layers.quantization.quark.schemes.quark_w4a4_mxfp4 vllm.model_executor.layers.quantization.quark.schemes.quark_w8a8_fp8 vllm.model_executor.layers.quantization.quark.schemes.quark_w8a8_int8 vllm.model_executor.layers.quantization.quark.utils vllm.model_executor.layers.quantization.schema vllm.model_executor.layers.quantization.torchao vllm.model_executor.layers.quantization.tpu_int8 vllm.model_executor.layers.quantization.utils vllm.model_executor.layers.quantization.utils.allspark_utils vllm.model_executor.layers.quantization.utils.bitblas_utils vllm.model_executor.layers.quantization.utils.fp8_utils vllm.model_executor.layers.quantization.utils.gptq_utils vllm.model_executor.layers.quantization.utils.int8_utils vllm.model_executor.layers.quantization.utils.layer_utils vllm.model_executor.layers.quantization.utils.machete_utils vllm.model_executor.layers.quantization.utils.marlin_utils vllm.model_executor.layers.quantization.utils.marlin_utils_fp4 vllm.model_executor.layers.quantization.utils.marlin_utils_fp8 vllm.model_executor.layers.quantization.utils.marlin_utils_test vllm.model_executor.layers.quantization.utils.marlin_utils_test_24 vllm.model_executor.layers.quantization.utils.marlin_utils_test_qqq vllm.model_executor.layers.quantization.utils.mxfp4_utils vllm.model_executor.layers.quantization.utils.nvfp4_emulation_utils vllm.model_executor.layers.quantization.utils.quant_utils vllm.model_executor.layers.quantization.utils.w8a8_utils vllm.model_executor.layers.rejection_sampler vllm.model_executor.layers.resampler vllm.model_executor.layers.rotary_embedding vllm.model_executor.layers.sampler vllm.model_executor.layers.spec_decode_base_sampler vllm.model_executor.layers.typical_acceptance_sampler vllm.model_executor.layers.utils vllm.model_executor.layers.vocab_parallel_embedding vllm.model_executor.model_loader vllm.model_executor.model_loader.base_loader vllm.model_executor.model_loader.bitsandbytes_loader vllm.model_executor.model_loader.default_loader vllm.model_executor.model_loader.dummy_loader vllm.model_executor.model_loader.gguf_loader vllm.model_executor.model_loader.neuron vllm.model_executor.model_loader.neuronx_distributed vllm.model_executor.model_loader.runai_streamer_loader vllm.model_executor.model_loader.sharded_state_loader vllm.model_executor.model_loader.tensorizer vllm.model_executor.model_loader.tensorizer_loader vllm.model_executor.model_loader.utils vllm.model_executor.model_loader.weight_utils vllm.model_executor.models vllm.model_executor.models.adapters vllm.model_executor.models.aimv2 vllm.model_executor.models.arctic vllm.model_executor.models.aria vllm.model_executor.models.aya_vision vllm.model_executor.models.baichuan vllm.model_executor.models.bamba vllm.model_executor.models.bart vllm.model_executor.models.bert vllm.model_executor.models.bert_with_rope vllm.model_executor.models.blip vllm.model_executor.models.blip2 vllm.model_executor.models.bloom vllm.model_executor.models.chameleon vllm.model_executor.models.chatglm vllm.model_executor.models.clip vllm.model_executor.models.commandr vllm.model_executor.models.constant_size_cache vllm.model_executor.models.dbrx vllm.model_executor.models.deepseek vllm.model_executor.models.deepseek_mtp vllm.model_executor.models.deepseek_v2 vllm.model_executor.models.deepseek_vl2 vllm.model_executor.models.eagle vllm.model_executor.models.exaone vllm.model_executor.models.fairseq2_llama vllm.model_executor.models.falcon vllm.model_executor.models.florence2 vllm.model_executor.models.fuyu vllm.model_executor.models.gemma vllm.model_executor.models.gemma2 vllm.model_executor.models.gemma3 vllm.model_executor.models.gemma3_mm vllm.model_executor.models.glm vllm.model_executor.models.glm4 vllm.model_executor.models.glm4v vllm.model_executor.models.gpt2 vllm.model_executor.models.gpt_bigcode vllm.model_executor.models.gpt_j vllm.model_executor.models.gpt_neox vllm.model_executor.models.granite vllm.model_executor.models.granite_speech vllm.model_executor.models.granitemoe vllm.model_executor.models.granitemoehybrid vllm.model_executor.models.granitemoeshared vllm.model_executor.models.gritlm vllm.model_executor.models.grok1 vllm.model_executor.models.h2ovl vllm.model_executor.models.idefics2_vision_model vllm.model_executor.models.idefics3 vllm.model_executor.models.interfaces vllm.model_executor.models.interfaces_base vllm.model_executor.models.intern_vit vllm.model_executor.models.internlm2 vllm.model_executor.models.internlm2_ve vllm.model_executor.models.internvl vllm.model_executor.models.jais vllm.model_executor.models.jamba vllm.model_executor.models.kimi_vl vllm.model_executor.models.llama vllm.model_executor.models.llama4 vllm.model_executor.models.llama_eagle vllm.model_executor.models.llama_eagle3 vllm.model_executor.models.llava vllm.model_executor.models.llava_next vllm.model_executor.models.llava_next_video vllm.model_executor.models.llava_onevision vllm.model_executor.models.mamba vllm.model_executor.models.mamba2 vllm.model_executor.models.mamba_cache vllm.model_executor.models.medusa vllm.model_executor.models.mimo vllm.model_executor.models.mimo_mtp vllm.model_executor.models.minicpm vllm.model_executor.models.minicpm3 vllm.model_executor.models.minicpmo vllm.model_executor.models.minicpmv vllm.model_executor.models.minimax_cache vllm.model_executor.models.minimax_text_01 vllm.model_executor.models.minimax_vl_01 vllm.model_executor.models.mistral3 vllm.model_executor.models.mixtral vllm.model_executor.models.mixtral_quant vllm.model_executor.models.mllama vllm.model_executor.models.mllama4 vllm.model_executor.models.mlp_speculator vllm.model_executor.models.modernbert vllm.model_executor.models.module_mapping vllm.model_executor.models.molmo vllm.model_executor.models.moonvit vllm.model_executor.models.mpt vllm.model_executor.models.nemotron vllm.model_executor.models.nemotron_nas vllm.model_executor.models.nvlm_d vllm.model_executor.models.olmo vllm.model_executor.models.olmo2 vllm.model_executor.models.olmoe vllm.model_executor.models.opt vllm.model_executor.models.orion vllm.model_executor.models.ovis vllm.model_executor.models.paligemma vllm.model_executor.models.persimmon vllm.model_executor.models.phi vllm.model_executor.models.phi3 vllm.model_executor.models.phi3_small vllm.model_executor.models.phi3v vllm.model_executor.models.phi4mm vllm.model_executor.models.phi4mm_audio vllm.model_executor.models.phi4mm_utils vllm.model_executor.models.phimoe vllm.model_executor.models.pixtral vllm.model_executor.models.plamo2 vllm.model_executor.models.prithvi_geospatial_mae vllm.model_executor.models.qwen vllm.model_executor.models.qwen2 vllm.model_executor.models.qwen2_5_omni_thinker vllm.model_executor.models.qwen2_5_vl vllm.model_executor.models.qwen2_audio vllm.model_executor.models.qwen2_moe vllm.model_executor.models.qwen2_rm vllm.model_executor.models.qwen2_vl vllm.model_executor.models.qwen3 vllm.model_executor.models.qwen3_moe vllm.model_executor.models.qwen_vl vllm.model_executor.models.registry vllm.model_executor.models.roberta vllm.model_executor.models.siglip vllm.model_executor.models.skyworkr1v vllm.model_executor.models.smolvlm vllm.model_executor.models.solar vllm.model_executor.models.stablelm vllm.model_executor.models.starcoder2 vllm.model_executor.models.telechat2 vllm.model_executor.models.teleflm vllm.model_executor.models.transformers vllm.model_executor.models.ultravox vllm.model_executor.models.utils vllm.model_executor.models.vision vllm.model_executor.models.whisper vllm.model_executor.models.zamba2 vllm.model_executor.parameter vllm.model_executor.pooling_metadata vllm.model_executor.sampling_metadata vllm.model_executor.utils vllm.multimodal vllm.multimodal.audio vllm.multimodal.base vllm.multimodal.hasher vllm.multimodal.image vllm.multimodal.inputs vllm.multimodal.parse vllm.multimodal.processing vllm.multimodal.profiling vllm.multimodal.registry vllm.multimodal.utils vllm.multimodal.video vllm.outputs vllm.platforms vllm.platforms.cpu vllm.platforms.cuda vllm.platforms.hpu vllm.platforms.interface vllm.platforms.neuron vllm.platforms.rocm vllm.platforms.tpu vllm.platforms.xpu vllm.plugins vllm.plugins.lora_resolvers vllm.plugins.lora_resolvers.filesystem_resolver vllm.pooling_params vllm.profiler vllm.profiler.layerwise_profile vllm.profiler.utils vllm.prompt_adapter vllm.prompt_adapter.layers vllm.prompt_adapter.models vllm.prompt_adapter.request vllm.prompt_adapter.utils vllm.prompt_adapter.worker_manager vllm.reasoning vllm.reasoning.abs_reasoning_parsers vllm.reasoning.deepseek_r1_reasoning_parser vllm.reasoning.granite_reasoning_parser vllm.reasoning.qwen3_reasoning_parser vllm.sampling_params vllm.scalar_type vllm.scripts vllm.sequence vllm.spec_decode vllm.spec_decode.batch_expansion vllm.spec_decode.draft_model_runner vllm.spec_decode.interfaces vllm.spec_decode.medusa_worker vllm.spec_decode.metrics vllm.spec_decode.mlp_speculator_worker vllm.spec_decode.mqa_scorer vllm.spec_decode.multi_step_worker vllm.spec_decode.ngram_worker vllm.spec_decode.proposer_worker_base vllm.spec_decode.smaller_tp_proposer_worker vllm.spec_decode.spec_decode_worker vllm.spec_decode.target_model_runner vllm.spec_decode.top1_proposer vllm.spec_decode.util vllm.test_utils vllm.tracing vllm.transformers_utils vllm.transformers_utils.chat_templates vllm.transformers_utils.chat_templates.registry vllm.transformers_utils.config vllm.transformers_utils.configs vllm.transformers_utils.configs.arctic vllm.transformers_utils.configs.chatglm vllm.transformers_utils.configs.cohere2 vllm.transformers_utils.configs.dbrx vllm.transformers_utils.configs.deepseek_vl2 vllm.transformers_utils.configs.eagle vllm.transformers_utils.configs.exaone vllm.transformers_utils.configs.falcon vllm.transformers_utils.configs.h2ovl vllm.transformers_utils.configs.internvl vllm.transformers_utils.configs.jais vllm.transformers_utils.configs.kimi_vl vllm.transformers_utils.configs.medusa vllm.transformers_utils.configs.minimax_text_01 vllm.transformers_utils.configs.minimax_vl_01 vllm.transformers_utils.configs.mllama vllm.transformers_utils.configs.mlp_speculator vllm.transformers_utils.configs.moonvit vllm.transformers_utils.configs.mpt vllm.transformers_utils.configs.nemotron vllm.transformers_utils.configs.nvlm_d vllm.transformers_utils.configs.ovis vllm.transformers_utils.configs.skyworkr1v vllm.transformers_utils.configs.solar vllm.transformers_utils.configs.telechat2 vllm.transformers_utils.configs.ultravox vllm.transformers_utils.detokenizer vllm.transformers_utils.detokenizer_utils vllm.transformers_utils.processor vllm.transformers_utils.processors vllm.transformers_utils.processors.deepseek_vl2 vllm.transformers_utils.processors.ovis vllm.transformers_utils.s3_utils vllm.transformers_utils.tokenizer vllm.transformers_utils.tokenizer_base vllm.transformers_utils.tokenizer_group vllm.transformers_utils.tokenizers vllm.transformers_utils.tokenizers.mistral vllm.transformers_utils.utils vllm.triton_utils vllm.triton_utils.importing vllm.usage vllm.usage.usage_lib vllm.utils vllm.v1 vllm.v1.attention vllm.v1.attention.backends vllm.v1.attention.backends.flash_attn vllm.v1.attention.backends.flashinfer vllm.v1.attention.backends.mla vllm.v1.attention.backends.mla.common vllm.v1.attention.backends.mla.flashmla vllm.v1.attention.backends.mla.rocm_aiter_mla vllm.v1.attention.backends.mla.triton_mla vllm.v1.attention.backends.pallas vllm.v1.attention.backends.triton_attn vllm.v1.attention.backends.utils vllm.v1.core vllm.v1.core.block_pool vllm.v1.core.encoder_cache_manager vllm.v1.core.kv_cache_manager vllm.v1.core.kv_cache_utils vllm.v1.core.sched vllm.v1.core.sched.interface vllm.v1.core.sched.output vllm.v1.core.sched.scheduler vllm.v1.core.sched.utils vllm.v1.core.single_type_kv_cache_manager vllm.v1.engine vllm.v1.engine.async_llm vllm.v1.engine.core vllm.v1.engine.core_client vllm.v1.engine.detokenizer vllm.v1.engine.exceptions vllm.v1.engine.llm_engine vllm.v1.engine.logprobs vllm.v1.engine.mm_input_cache vllm.v1.engine.output_processor vllm.v1.engine.parallel_sampling vllm.v1.engine.processor vllm.v1.executor vllm.v1.executor.abstract vllm.v1.executor.multiproc_executor vllm.v1.executor.ray_distributed_executor vllm.v1.kv_cache_interface vllm.v1.metrics vllm.v1.metrics.loggers vllm.v1.metrics.stats vllm.v1.outputs vllm.v1.request vllm.v1.sample vllm.v1.sample.metadata vllm.v1.sample.ops vllm.v1.sample.ops.bad_words vllm.v1.sample.ops.penalties vllm.v1.sample.ops.topk_topp_sampler vllm.v1.sample.rejection_sampler vllm.v1.sample.sampler vllm.v1.sample.tpu vllm.v1.sample.tpu.metadata vllm.v1.sample.tpu.sampler vllm.v1.serial_utils vllm.v1.spec_decode vllm.v1.spec_decode.eagle vllm.v1.spec_decode.metadata vllm.v1.spec_decode.metrics vllm.v1.spec_decode.ngram_proposer vllm.v1.spec_decode.utils vllm.v1.stats vllm.v1.stats.common vllm.v1.structured_output vllm.v1.structured_output.backend_guidance vllm.v1.structured_output.backend_types vllm.v1.structured_output.backend_xgrammar vllm.v1.structured_output.request vllm.v1.structured_output.utils vllm.v1.utils vllm.v1.worker vllm.v1.worker.block_table vllm.v1.worker.gpu_input_batch vllm.v1.worker.gpu_model_runner vllm.v1.worker.gpu_worker vllm.v1.worker.lora_model_runner_mixin vllm.v1.worker.tpu_model_runner vllm.v1.worker.tpu_worker vllm.v1.worker.utils vllm.v1.worker.worker_base vllm.version vllm.worker vllm.worker.cache_engine vllm.worker.cpu_enc_dec_model_runner vllm.worker.cpu_model_runner vllm.worker.cpu_pooling_model_runner vllm.worker.cpu_worker vllm.worker.enc_dec_model_runner vllm.worker.hpu_model_runner vllm.worker.hpu_worker vllm.worker.model_runner vllm.worker.model_runner_base vllm.worker.multi_step_hpu_worker vllm.worker.multi_step_model_runner vllm.worker.multi_step_neuron_model_runner vllm.worker.multi_step_neuronx_distributed_model_runner vllm.worker.multi_step_tpu_worker vllm.worker.multi_step_worker vllm.worker.neuron_model_runner vllm.worker.neuron_worker vllm.worker.neuronx_distributed_model_runner vllm.worker.pooling_model_runner vllm.worker.tpu_model_runner vllm.worker.tpu_worker vllm.worker.utils vllm.worker.worker vllm.worker.worker_base vllm.worker.xpu_model_runner vllm.worker.xpu_worker module_list (vllm.model_executor.models.module_mapping.ModelKeys attribute) modules_to_save (vllm.lora.peft_helper.PEFTHelper attribute) moe_align_block_size() (in module vllm._custom_ops) (in module vllm.model_executor.layers.fused_moe.moe_align_block_size) moe_align_block_size_stage1() (in module vllm.model_executor.layers.fused_moe.moe_align_block_size) moe_align_block_size_stage2() (in module vllm.model_executor.layers.fused_moe.moe_align_block_size) moe_align_block_size_stage3() (in module vllm.model_executor.layers.fused_moe.moe_align_block_size) moe_align_block_size_stage4() (in module vllm.model_executor.layers.fused_moe.moe_align_block_size) moe_align_block_size_triton() (in module vllm.model_executor.layers.fused_moe.moe_align_block_size) moe_awq_to_marlin_zero_points() (in module vllm.model_executor.layers.quantization.utils.marlin_utils) moe_forward() (in module vllm.model_executor.layers.fused_moe.layer) moe_forward_fake() (in module vllm.model_executor.layers.fused_moe.layer) moe_kernel_prepare_input() (in module vllm.model_executor.layers.fused_moe.fused_moe) moe_permute() (in module vllm.model_executor.layers.fused_moe.moe_permute_unpermute) moe_sum() (in module vllm._custom_ops) moe_unpermute() (in module vllm.model_executor.layers.fused_moe.moe_permute_unpermute) moe_wna16_gemm() (in module vllm._custom_ops) moe_wna16_marlin_gemm() (in module vllm._custom_ops) MoeWNA16Config (class in vllm.model_executor.layers.quantization.moe_wna16) MoeWNA16Method (class in vllm.model_executor.layers.quantization.moe_wna16) MolmoAttention (class in vllm.model_executor.models.molmo) MolmoDecoderLayer (class in vllm.model_executor.models.molmo) MolmoDecoderNormAfterLayer (class in vllm.model_executor.models.molmo) MolmoDummyInputsBuilder (class in vllm.model_executor.models.molmo) MolmoForCausalLM (class in vllm.model_executor.models.molmo) MolmoImageInputs (class in vllm.model_executor.models.molmo) MolmoModel (class in vllm.model_executor.models.molmo) MolmoMultiModalProcessor (class in vllm.model_executor.models.molmo) MolmoProcessingInfo (class in vllm.model_executor.models.molmo) MolmoProcessorWrapper (class in vllm.model_executor.models.molmo) MolmoVisionBackbone (class in vllm.model_executor.models.molmo) monotonic_ts_s (vllm.v1.stats.common.RequestStatsUpdate attribute) MooncakePipe (class in vllm.distributed.kv_transfer.kv_pipe.mooncake_pipe) MooncakeStore (class in vllm.distributed.kv_transfer.kv_lookup_buffer.mooncake_store) MooncakeStoreConfig (class in vllm.distributed.kv_transfer.kv_lookup_buffer.mooncake_store) MooncakeStoreConnector (class in vllm.distributed.kv_transfer.kv_connector.mooncake_store_connector) MooncakeTransferEngine (class in vllm.distributed.kv_transfer.kv_pipe.mooncake_pipe) MooncakeTransferEngineConfig (class in vllm.distributed.kv_transfer.kv_pipe.mooncake_pipe) MoonVisionPatchEmbed (class in vllm.model_executor.models.moonvit) MoonViTConfig (class in vllm.transformers_utils.configs.moonvit) MoonVitEncoder (class in vllm.model_executor.models.moonvit) MoonVitEncoderLayer (class in vllm.model_executor.models.moonvit) MoonVitPretrainedModel (class in vllm.model_executor.models.moonvit) MoonVitVLProjector (class in vllm.model_executor.models.moonvit) mount_metrics() (in module vllm.entrypoints.openai.api_server) move_row() (vllm.v1.worker.block_table.BlockTable method) mp (class in vllm.model_executor.models.phimoe) MPClient (class in vllm.v1.engine.core_client) MPLinearKernel (class in vllm.model_executor.layers.quantization.kernels.mixed_precision.MPLinearKernel) MPLinearLayerConfig (class in vllm.model_executor.layers.quantization.kernels.mixed_precision.MPLinearKernel) MPTAttention (class in vllm.model_executor.models.mpt) MPTBlock (class in vllm.model_executor.models.mpt) MPTConfig (class in vllm.transformers_utils.configs.mpt) MPTForCausalLM (class in vllm.model_executor.models.mpt) MPTMLP (class in vllm.model_executor.models.mpt) MPTModel (class in vllm.model_executor.models.mpt) mq_broadcaster (vllm.distributed.parallel_state.GroupCoordinator attribute) MQAScorer (class in vllm.spec_decode.mqa_scorer) MQClientClosedError MQEngineDeadError MQLLMEngine (class in vllm.engine.multiprocessing.engine) MQLLMEngineClient (class in vllm.engine.multiprocessing.client) mrope_position_delta (vllm.sequence.SequenceData property) (vllm.v1.worker.gpu_input_batch.CachedRequestState attribute) mrope_positions (vllm.v1.worker.gpu_input_batch.CachedRequestState attribute) MRotaryEmbedding (class in vllm.model_executor.layers.rotary_embedding) MsgpackDecoder (class in vllm.v1.serial_utils) MsgpackEncoder (class in vllm.v1.serial_utils) MulAndSilu (class in vllm.model_executor.layers.activation) multi_modal_content (vllm.benchmarks.endpoint_request_func.RequestFuncInput attribute) multi_modal_data (vllm.beam_search.BeamSearchSequence attribute) (vllm.benchmarks.datasets.SampleRequest attribute) (vllm.inputs.data.TextPrompt attribute) (vllm.inputs.data.TokensPrompt attribute) (vllm.inputs.registry.DummyData attribute) (vllm.multimodal.profiling.DummyDecoderData attribute) (vllm.sequence.Sequence property) (vllm.sequence.SequenceGroup property) (vllm.sequence.SequenceGroupMetadata attribute) multi_modal_kwargs (vllm.worker.cpu_model_runner.ModelInputForCPU attribute) (vllm.worker.hpu_model_runner.ModelInputForHPU attribute) (vllm.worker.hpu_model_runner.PreparePromptMetadata attribute) (vllm.worker.model_runner.ModelInputForGPU attribute) (vllm.worker.neuron_model_runner.ModelInputForNeuron attribute) (vllm.worker.xpu_model_runner.ModelInputForXPU attribute) multi_modal_placeholder_index_maps (vllm.attention.backends.abstract.AttentionMetadata attribute) multi_modal_placeholders (vllm.inputs.registry.DummyData attribute) (vllm.multimodal.profiling.DummyDecoderData attribute) (vllm.sequence.Sequence property) (vllm.sequence.SequenceGroup property) (vllm.sequence.SequenceGroupMetadata attribute) MULTI_STEP_ATTENTION_BACKENDS (in module vllm.worker.multi_step_model_runner) MULTI_STEP_CHUNKED_PREFILL_ATTENTION_BACKENDS (in module vllm.worker.multi_step_model_runner) multi_step_stream_outputs (vllm.config.SchedulerConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) multihead_attention() (in module vllm.model_executor.models.moonvit) MultiHeadAttention (class in vllm.attention.layer) MultiHeadDotProductAttention (class in vllm.model_executor.models.molmo) MultiHeadedAttention (class in vllm.model_executor.models.phi4mm_utils) MULTIMODAL_REGISTRY (in module vllm.multimodal) MultiModalBatchedField (class in vllm.multimodal.inputs) MultiModalConfig (class in vllm.config) MultiModalContentParser (class in vllm.entrypoints.chat_utils) MultiModalDataBuiltins (class in vllm.multimodal.inputs) MultiModalDataDict (in module vllm.multimodal.inputs) MultiModalDataItems (class in vllm.multimodal.parse) MultiModalDataParser (class in vllm.multimodal.parse) MultiModalEmbeddings (in module vllm.model_executor.models.interfaces) MultiModalEncDecInputs (class in vllm.multimodal.inputs) MultiModalFieldConfig (class in vllm.multimodal.inputs) MultiModalFieldElem (class in vllm.multimodal.inputs) MultiModalFlatField (class in vllm.multimodal.inputs) MultiModalHashDict (in module vllm.multimodal.hasher) MultiModalHasher (class in vllm.multimodal.hasher) MultiModalHashes (in module vllm.multimodal.processing) MultiModalInputs (class in vllm.multimodal.inputs) MultiModalItemTracker (class in vllm.entrypoints.chat_utils) MultiModalKwargs (class in vllm.multimodal.inputs) MultiModalKwargsItem (class in vllm.multimodal.inputs) MultiModalPlaceholderDict (in module vllm.multimodal.inputs) MultiModalPlaceholderMap (class in vllm.multimodal.base) MultiModalPlaceholderMap.IndexMap (class in vllm.multimodal.base) MultiModalProcessorFactory (class in vllm.multimodal.registry) MultiModalProfiler (class in vllm.multimodal.profiling) MultiModalProjector (class in vllm.model_executor.models.minicpmo) MultiModalRegistry (class in vllm.multimodal.registry) MultiModalSharedField (class in vllm.multimodal.inputs) MultiModelKeys (class in vllm.model_executor.models.module_mapping) multinomial_samples (vllm.model_executor.layers.sampler.SampleResultArgsType attribute) MultinomialSamplesType (in module vllm.model_executor.layers.sampler) MultiOutputMatch (class in vllm.compilation.multi_output_match) MultiprocessingDistributedExecutor (class in vllm.executor.mp_distributed_executor) MultiprocExecutor (class in vllm.v1.executor.multiproc_executor) MultiSequential (class in vllm.model_executor.models.phi4mm_utils) MultiStepHPUWorker (class in vllm.worker.multi_step_hpu_worker) MultiStepModelRunner (class in vllm.worker.multi_step_model_runner) MultiStepNeuronModelRunner (class in vllm.worker.multi_step_neuron_model_runner) MultiStepNeuronxDistributedModelRunner (class in vllm.worker.multi_step_neuronx_distributed_model_runner) MultiStepOutputProcessor (class in vllm.engine.output_processor.multi_step) MultiStepState (class in vllm.worker.multi_step_worker) MultiStepTPUWorker (class in vllm.worker.multi_step_tpu_worker) MultiStepWorker (class in vllm.spec_decode.multi_step_worker) (class in vllm.worker.multi_step_worker) MySequential (class in vllm.model_executor.models.florence2) N N (in module vllm.multimodal.registry) n (vllm.entrypoints.openai.protocol.ChatCompletionRequest attribute) (vllm.entrypoints.openai.protocol.CompletionRequest attribute) (vllm.sampling_params.SamplingParams attribute) (vllm.v1.engine.parallel_sampling.ParentRequest property) (vllm.worker.tpu_model_runner.ModelInputForTPU attribute) n_blocks (vllm.sequence.Sequence property) n_embed (vllm.transformers_utils.configs.deepseek_vl2.MlpProjectorConfig attribute) n_requests (vllm.engine.metrics_types.Stats attribute) naive_attn_with_masks() (vllm.model_executor.models.gemma3.Gemma3Attention method) naive_multicast() (vllm.model_executor.layers.fused_moe.layer.FusedMoE method) NaiveBlock (class in vllm.core.block.naive_block) NaiveBlockAllocator (class in vllm.core.block.naive_block) name (vllm.assets.audio.AudioAsset attribute) (vllm.assets.image.ImageAsset attribute) (vllm.assets.video.VideoAsset attribute) (vllm.compilation.compiler_interface.CompilerInterface attribute) (vllm.compilation.compiler_interface.EagerAdaptor attribute) (vllm.compilation.compiler_interface.InductorAdaptor attribute) (vllm.compilation.compiler_interface.InductorStandaloneAdaptor attribute) (vllm.distributed.device_communicators.cuda_wrapper.Function attribute) (vllm.distributed.device_communicators.pynccl_wrapper.Function attribute) (vllm.entrypoints.chat_utils.ConversationMessage attribute) (vllm.entrypoints.chat_utils.CustomChatCompletionMessageParam attribute) (vllm.entrypoints.cli.types.CLISubcommand attribute) (vllm.entrypoints.openai.protocol.ChatCompletionNamedFunction attribute) (vllm.entrypoints.openai.protocol.DeltaFunctionCall attribute) (vllm.entrypoints.openai.protocol.FunctionCall attribute) (vllm.entrypoints.openai.protocol.FunctionDefinition attribute) (vllm.entrypoints.openai.protocol.JsonSchemaResponseFormat attribute) (vllm.entrypoints.openai.serving_models.BaseModelPath attribute) (vllm.entrypoints.openai.serving_models.LoRAModulePath attribute) (vllm.entrypoints.openai.serving_models.PromptAdapterPath attribute) (vllm.lora.request.LoRARequest property) (vllm.model_executor.models.minimax_text_01.MiniMaxText01RMSNormTP attribute) (vllm.model_executor.models.minimax_text_01.MiniMaxText01RotaryEmbedding attribute) (vllm.profiler.layerwise_profile.ModelStatsEntry attribute) (vllm.profiler.layerwise_profile.SummaryStatsEntry attribute) (vllm.prompt_adapter.request.PromptAdapterRequest property) nan_repr (vllm.scalar_type.ScalarType attribute) NanRepr (class in vllm.scalar_type) NCCL_CHECK() (vllm.distributed.device_communicators.pynccl_wrapper.NCCLLibrary method) ncclAllGather() (vllm.distributed.device_communicators.pynccl_wrapper.NCCLLibrary method) ncclAllReduce() (vllm.distributed.device_communicators.pynccl_wrapper.NCCLLibrary method) ncclAvg (vllm.distributed.device_communicators.pynccl_wrapper.ncclRedOpTypeEnum attribute) ncclBfloat16 (vllm.distributed.device_communicators.pynccl_wrapper.ncclDataTypeEnum attribute) ncclBroadcast() (vllm.distributed.device_communicators.pynccl_wrapper.NCCLLibrary method) ncclChar (vllm.distributed.device_communicators.pynccl_wrapper.ncclDataTypeEnum attribute) ncclComm_t (in module vllm.distributed.device_communicators.pynccl_wrapper) ncclCommDestroy() (vllm.distributed.device_communicators.pynccl_wrapper.NCCLLibrary method) ncclCommInitRank() (vllm.distributed.device_communicators.pynccl_wrapper.NCCLLibrary method) ncclDataType_t (in module vllm.distributed.device_communicators.pynccl_wrapper) ncclDataTypeEnum (class in vllm.distributed.device_communicators.pynccl_wrapper) ncclDouble (vllm.distributed.device_communicators.pynccl_wrapper.ncclDataTypeEnum attribute) ncclFloat (vllm.distributed.device_communicators.pynccl_wrapper.ncclDataTypeEnum attribute) ncclFloat16 (vllm.distributed.device_communicators.pynccl_wrapper.ncclDataTypeEnum attribute) ncclFloat32 (vllm.distributed.device_communicators.pynccl_wrapper.ncclDataTypeEnum attribute) ncclFloat64 (vllm.distributed.device_communicators.pynccl_wrapper.ncclDataTypeEnum attribute) ncclGetErrorString() (vllm.distributed.device_communicators.pynccl_wrapper.NCCLLibrary method) ncclGetUniqueId() (vllm.distributed.device_communicators.pynccl_wrapper.NCCLLibrary method) ncclGetVersion() (vllm.distributed.device_communicators.pynccl_wrapper.NCCLLibrary method) ncclHalf (vllm.distributed.device_communicators.pynccl_wrapper.ncclDataTypeEnum attribute) ncclInt (vllm.distributed.device_communicators.pynccl_wrapper.ncclDataTypeEnum attribute) ncclInt32 (vllm.distributed.device_communicators.pynccl_wrapper.ncclDataTypeEnum attribute) ncclInt64 (vllm.distributed.device_communicators.pynccl_wrapper.ncclDataTypeEnum attribute) ncclInt8 (vllm.distributed.device_communicators.pynccl_wrapper.ncclDataTypeEnum attribute) NCCLLibrary (class in vllm.distributed.device_communicators.pynccl_wrapper) ncclMax (vllm.distributed.device_communicators.pynccl_wrapper.ncclRedOpTypeEnum attribute) ncclMin (vllm.distributed.device_communicators.pynccl_wrapper.ncclRedOpTypeEnum attribute) ncclNumOps (vllm.distributed.device_communicators.pynccl_wrapper.ncclRedOpTypeEnum attribute) ncclNumTypes (vllm.distributed.device_communicators.pynccl_wrapper.ncclDataTypeEnum attribute) ncclProd (vllm.distributed.device_communicators.pynccl_wrapper.ncclRedOpTypeEnum attribute) ncclRecv() (vllm.distributed.device_communicators.pynccl_wrapper.NCCLLibrary method) ncclRedOp_t (in module vllm.distributed.device_communicators.pynccl_wrapper) ncclRedOpTypeEnum (class in vllm.distributed.device_communicators.pynccl_wrapper) ncclReduceScatter() (vllm.distributed.device_communicators.pynccl_wrapper.NCCLLibrary method) ncclResult_t (in module vllm.distributed.device_communicators.pynccl_wrapper) ncclSend() (vllm.distributed.device_communicators.pynccl_wrapper.NCCLLibrary method) ncclSum (vllm.distributed.device_communicators.pynccl_wrapper.ncclRedOpTypeEnum attribute) ncclUint32 (vllm.distributed.device_communicators.pynccl_wrapper.ncclDataTypeEnum attribute) ncclUint64 (vllm.distributed.device_communicators.pynccl_wrapper.ncclDataTypeEnum attribute) ncclUint8 (vllm.distributed.device_communicators.pynccl_wrapper.ncclDataTypeEnum attribute) ncclUniqueId (class in vllm.distributed.device_communicators.pynccl_wrapper) need_alibi() (vllm.attention.ops.triton_flash_attention.MetaData method) need_bias() (vllm.attention.ops.triton_flash_attention.MetaData method) need_causal() (vllm.attention.ops.triton_flash_attention.MetaData method) need_extra_keys() (in module vllm.v1.core.kv_cache_utils) need_recv_kv() (vllm.worker.model_runner.ModelRunner method) need_send_kv() (vllm.worker.model_runner.ModelRunner method) need_to_compile (vllm.compilation.backends.ConcreteSizeEntry attribute) NemoConvSubsampling (class in vllm.model_executor.models.phi4mm_utils) NemotronAttention (class in vllm.model_executor.models.nemotron) NemotronConfig (class in vllm.transformers_utils.configs.nemotron) NemotronDecoderLayer (class in vllm.model_executor.models.nemotron) NemotronForCausalLM (class in vllm.model_executor.models.nemotron) NemotronLayerNorm1P (class in vllm.model_executor.models.nemotron) NemotronMLP (class in vllm.model_executor.models.nemotron) NemotronModel (class in vllm.model_executor.models.nemotron) nested_tensors_equal() (in module vllm.multimodal.inputs) NestedTensors (in module vllm.multimodal.inputs) NEURON (vllm.platforms.interface.PlatformEnum attribute) neuron_platform_plugin() (in module vllm.platforms) NeuronCausalLM (class in vllm.model_executor.model_loader.neuron) (class in vllm.model_executor.model_loader.neuronx_distributed) NeuronCommunicator (class in vllm.distributed.device_communicators.neuron_communicator) NeuronFramework (class in vllm.platforms.neuron) NeuronMllamaForCausalLM (class in vllm.model_executor.model_loader.neuronx_distributed) NeuronModelRunner (class in vllm.worker.neuron_model_runner) NeuronPlatform (class in vllm.platforms.neuron) NeuronQuantConfig (class in vllm.model_executor.layers.quantization.neuron_quant) NeuronSpeculationCausalLM (class in vllm.model_executor.model_loader.neuron) (class in vllm.model_executor.model_loader.neuronx_distributed) NeuronWorker (class in vllm.worker.neuron_worker) NEURONX_DISTRIBUTED_INFERENCE (vllm.platforms.neuron.NeuronFramework attribute) NeuronxDistributedModelRunner (class in vllm.worker.neuronx_distributed_model_runner) NEVER (vllm.core.interfaces.AllocStatus attribute) new() (vllm.v1.spec_decode.metrics.SpecDecodingStats class method) new_block_ids (vllm.v1.core.sched.output.CachedRequestData attribute) new_cumulative_logprob (vllm.sequence.SequenceDataDelta attribute) new_event() (vllm.v1.engine.EngineCoreEvent class method) new_logprobs (vllm.v1.engine.EngineCoreOutput attribute) new_num_computed_tokens (vllm.sequence.SequenceDataDelta attribute) new_output_token_ids (vllm.sequence.SequenceDataDelta attribute) new_prompt_logprobs_tensors (vllm.v1.engine.EngineCoreOutput attribute) new_stage (vllm.sequence.SequenceDataDelta attribute) new_token_ids (vllm.v1.core.sched.output.CachedRequestData attribute) (vllm.v1.engine.EngineCoreOutput attribute) NewGELU (class in vllm.model_executor.layers.activation) NewLineFormatter (class in vllm.logging_utils.formatter) NewRequestData (class in vllm.v1.core.sched.output) next_cache_id (vllm.core.scheduler.Scheduler property) next_free_block (vllm.v1.core.kv_cache_utils.KVCacheBlock attribute) next_power_of_2() (in module vllm.utils) next_rank (vllm.distributed.parallel_state.GroupCoordinator property) NextEditPredictionDataset (class in vllm.benchmarks.datasets) NgramProposer (class in vllm.v1.spec_decode.ngram_proposer) NGramWorker (class in vllm.spec_decode.ngram_worker) NixlAgentMetadata (class in vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector) NixlConnector (class in vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector) NixlConnectorMetadata (class in vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector) NixlConnectorScheduler (class in vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector) NixlConnectorWorker (class in vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector) NixlKVTransferParams (class in vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector) no_add_bias() (vllm.model_executor.layers.quantization.kernels.scaled_mm.xla.XLAScaledMMLinearKernel method) no_allowed_token_ids (vllm.v1.worker.gpu_input_batch.InputBatch property) NO_COMPILATION (vllm.config.CompilationLevel attribute) no_compile_layers (vllm.forward_context.ForwardContext attribute) no_lora_flag_cpu (vllm.lora.ops.triton_ops.lora_kernel_metadata.LoRAKernelMeta attribute) no_min_p (vllm.v1.worker.gpu_input_batch.InputBatch property) no_penalties (vllm.v1.sample.metadata.SamplingMetadata attribute) (vllm.v1.sample.tpu.metadata.TPUSupportedSamplingMetadata attribute) (vllm.v1.worker.gpu_input_batch.InputBatch property) no_prompt_logprob (vllm.v1.worker.gpu_input_batch.InputBatch property) no_proposals (vllm.spec_decode.interfaces.SpeculativeProposals attribute) no_speech_prob (vllm.entrypoints.openai.protocol.TranscriptionSegment attribute) no_top_k (vllm.v1.worker.gpu_input_batch.InputBatch property) no_top_p (vllm.v1.worker.gpu_input_batch.InputBatch property) NoBadWordsLogitsProcessor (class in vllm.logits_process) nodes (vllm.compilation.multi_output_match.MultiOutputMatch property) NomicBertModel (class in vllm.model_executor.models.bert_with_rope) NomicExpertMLP (class in vllm.model_executor.models.bert_with_rope) NomicExperts (class in vllm.model_executor.models.bert_with_rope) NomicMoELayer (class in vllm.model_executor.models.bert_with_rope) NomicRouter (class in vllm.model_executor.models.bert_with_rope) non_carry_over_env_vars_file (vllm.executor.ray_distributed_executor.RayDistributedExecutor attribute) non_kv_cache_memory (vllm.utils.MemoryProfilingResult attribute) non_torch_increase (vllm.utils.MemoryProfilingResult attribute) non_torch_memory (vllm.utils.MemorySnapshot attribute) NONE (vllm.scalar_type.NanRepr attribute) NONE_HASH (in module vllm.v1.core.kv_cache_utils) NONE_INT (in module vllm.distributed.kv_transfer.kv_pipe.mooncake_pipe) NONES (in module vllm.v1.engine.logprobs) NonLLMProposerWorkerBase (class in vllm.spec_decode.proposer_worker_base) NonNvmlCudaPlatform (class in vllm.platforms.cuda) NoOpEliminationPass (class in vllm.compilation.noop_elimination) NORM2FN (in module vllm.model_executor.models.intern_vit) normalize (vllm.config.PoolerConfig attribute) normalize_e4m3fn_to_e4m3fnuz() (in module vllm.model_executor.layers.quantization.utils.w8a8_utils) now (vllm.engine.metrics_types.Stats attribute) np_cache_weights_iterator() (in module vllm.model_executor.model_loader.weight_utils) np_ndarrays (vllm.assets.video.VideoAsset property) NPCACHE (vllm.config.LoadFormat attribute) nullable_kvs() (in module vllm.engine.arg_utils) NullBlock (class in vllm.core.block.cpu_gpu_block_allocator) NullEventPublisher (class in vllm.distributed.kv_events) num_accepted_tokens (vllm.v1.spec_decode.metrics.SpecDecodingStats attribute) num_accepted_tokens_per_pos (vllm.v1.spec_decode.metrics.SpecDecodingStats attribute) num_actual_tokens (vllm.v1.attention.backends.flash_attn.FlashAttentionMetadata attribute) (vllm.v1.attention.backends.flashinfer.FlashInferMetadata attribute) (vllm.v1.attention.backends.mla.common.MLACommonMetadata attribute) num_added_elements (vllm.model_executor.layers.vocab_parallel_embedding.VocabParallelEmbeddingShardIndices property) num_added_elements_padded (vllm.model_executor.layers.vocab_parallel_embedding.VocabParallelEmbeddingShardIndices property) num_added_vocab_padding (vllm.model_executor.layers.vocab_parallel_embedding.VocabParallelEmbeddingShardIndices property) num_attention_heads (vllm.model_executor.models.pixtral.VisionEncoderArgs attribute) (vllm.model_executor.models.plamo2.Plamo2Config attribute) (vllm.transformers_utils.configs.medusa.MedusaConfig property) num_backend_compilations (vllm.compilation.counter.CompilationCounter attribute) num_batched_tokens (vllm.core.scheduler.SchedulerOutputs attribute) (vllm.core.scheduler.SchedulingBudget property) num_blocks (vllm.core.evictor.Evictor property) (vllm.core.evictor.LRUEvictor property) (vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector.NixlAgentMetadata attribute) (vllm.v1.kv_cache_interface.KVCacheConfig attribute) num_cached_tokens (vllm.core.scheduler.SchedulingBudget property) (vllm.v1.stats.common.RequestStats attribute) (vllm.v1.stats.common.RequestStatsUpdate attribute) num_channels (vllm.model_executor.models.pixtral.VisionEncoderArgs attribute) num_classes (vllm.entrypoints.openai.protocol.ClassificationData attribute) (vllm.outputs.ClassificationOutput property) (vllm.transformers_utils.configs.deepseek_vl2.VisionEncoderConfig attribute) num_common_prefix_blocks (vllm.v1.core.sched.output.SchedulerOutput attribute) num_completed_blocks (vllm.core.block.common.CacheMetricData attribute) num_computed_tokens (vllm.v1.core.sched.output.CachedRequestData attribute) (vllm.v1.core.sched.output.NewRequestData attribute) (vllm.v1.stats.common.RequestStats attribute) (vllm.v1.stats.common.RequestStatsUpdate attribute) (vllm.v1.worker.gpu_input_batch.CachedRequestState attribute) num_contexts (vllm.attention.ops.triton_flash_attention.MetaData attribute) num_cpu_blocks (vllm.config.CacheConfig attribute) num_crops (vllm.model_executor.models.molmo.MolmoImageInputs attribute) num_cudagraph_caputured (vllm.compilation.counter.CompilationCounter attribute) num_curr_seqs (vllm.core.scheduler.SchedulingBudget property) num_decode_tokens (vllm.attention.backends.abstract.AttentionMetadata attribute) (vllm.v1.attention.backends.flashinfer.FlashInferMetadata attribute) (vllm.v1.attention.backends.mla.common.MLACommonMetadata attribute) num_decodes (vllm.v1.attention.backends.flashinfer.FlashInferMetadata attribute) (vllm.v1.attention.backends.mla.common.MLACommonMetadata attribute) num_decoding_tokens_per_seq (vllm.core.scheduler.Scheduler property) num_draft_tokens (vllm.v1.spec_decode.metadata.SpecDecodeMetadata attribute) (vllm.v1.spec_decode.metrics.SpecDecodingStats attribute) num_drafts (vllm.v1.spec_decode.metrics.SpecDecodingStats attribute) num_elements_padded (vllm.model_executor.layers.vocab_parallel_embedding.VocabParallelEmbeddingShardIndices property) num_empty_slots (vllm.core.block.cpu_gpu_block_allocator.NullBlock property) (vllm.core.block.interfaces.Block property) (vllm.core.block.naive_block.NaiveBlock property) (vllm.core.block.prefix_caching_block.PrefixCachingBlock property) num_encoder_tokens (vllm.attention.backends.flash_attn.FlashAttentionMetadata attribute) (vllm.attention.backends.rocm_flash_attn.ROCmFlashAttentionMetadata attribute) (vllm.attention.backends.torch_sdpa.TorchSDPAMetadata attribute) (vllm.attention.backends.xformers.XFormersMetadata attribute) num_finished_seqs() (vllm.sequence.SequenceGroup method) num_finished_warmup (vllm.compilation.backends.ConcreteSizeEntry attribute) num_frames (vllm.assets.video.VideoAsset attribute) num_full_slots (vllm.core.block.block_table.BlockTable property) num_generation_tokens (vllm.v1.metrics.stats.FinishedRequestStats attribute) (vllm.v1.metrics.stats.RequestStateStats attribute) num_generation_tokens_iter (vllm.engine.metrics_types.Stats attribute) num_generation_tokens_requests (vllm.engine.metrics_types.Stats attribute) num_gpu_blocks (vllm.config.CacheConfig attribute) num_gpu_blocks_override (vllm.config.CacheConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) num_graphs_seen (vllm.compilation.counter.CompilationCounter attribute) num_hashed_tokens_of_block() (vllm.sequence.Sequence method) num_heads (vllm.attention.backends.blocksparse_attn.BlocksparseParams attribute) num_hidden_layers (vllm.model_executor.models.pixtral.VisionEncoderArgs attribute) (vllm.model_executor.models.plamo2.Plamo2Config attribute) num_img_tokens (vllm.model_executor.models.phi4mm.Phi4MMImagePixelInputs attribute) num_incompleted_block_hit (vllm.core.block.common.CacheMetricData attribute) num_incompleted_block_queries (vllm.core.block.common.CacheMetricData attribute) num_key_value_heads (vllm.model_executor.models.plamo2.Plamo2Config attribute) num_kv_heads (vllm.attention.backends.blocksparse_attn.BlocksparseParams attribute) (vllm.attention.backends.flashinfer.FlashInferMetadata attribute) (vllm.v1.attention.backends.flashinfer.FlashInferMetadata attribute) (vllm.v1.kv_cache_interface.AttentionSpec attribute) num_logprobs (vllm.v1.engine.logprobs.LogprobsProcessor attribute) num_lookahead_slots (vllm.config.SchedulerConfig attribute) (vllm.config.SpeculativeConfig property) (vllm.core.scheduler.SchedulerOutputs attribute) (vllm.core.scheduler.SchedulerPrefillOutputs attribute) (vllm.core.scheduler.SchedulerRunningOutputs attribute) (vllm.core.scheduler.SchedulerSwappedInOutputs attribute) (vllm.engine.arg_utils.EngineArgs attribute) (vllm.sequence.ExecuteModelRequest attribute) num_lookahead_tokens (vllm.transformers_utils.configs.medusa.MedusaConfig property) num_models_seen (vllm.compilation.counter.CompilationCounter attribute) num_new_tokens (vllm.v1.stats.common.RequestStatsUpdate attribute) num_org_elements (vllm.model_executor.layers.vocab_parallel_embedding.VocabParallelEmbeddingShardIndices property) num_org_elements_padded (vllm.model_executor.layers.vocab_parallel_embedding.VocabParallelEmbeddingShardIndices property) num_org_vocab_padding (vllm.model_executor.layers.vocab_parallel_embedding.VocabParallelEmbeddingShardIndices property) num_output_tokens (vllm.v1.request.Request property) (vllm.v1.stats.common.RequestStats property) num_patches (vllm.model_executor.models.aya_vision.AyaVisionImagePixelInputs attribute) (vllm.model_executor.models.gemma3_mm.Gemma3ImagePixelInputs attribute) (vllm.model_executor.models.idefics3.Idefics3ImagePixelInputs attribute) (vllm.model_executor.models.internvl.InternVLImagePixelInputs attribute) (vllm.model_executor.models.skyworkr1v.SkyworkR1VImagePixelInputs attribute) num_piecewise_capturable_graphs_seen (vllm.compilation.counter.CompilationCounter attribute) num_piecewise_graphs_seen (vllm.compilation.counter.CompilationCounter attribute) num_preemption_iter (vllm.engine.metrics_types.Stats attribute) num_prefill_groups (vllm.core.scheduler.SchedulerOutputs attribute) num_prefill_tokens (vllm.attention.backends.abstract.AttentionMetadata attribute) (vllm.attention.backends.mla.common.MLACommonMetadata attribute) (vllm.v1.attention.backends.flashinfer.FlashInferMetadata attribute) num_prefills (vllm.attention.backends.abstract.AttentionMetadata attribute) (vllm.v1.attention.backends.flashinfer.FlashInferMetadata attribute) (vllm.v1.attention.backends.mla.common.MLACommonMetadata attribute) NUM_PREFIX_TOKENS (in module vllm.model_executor.models.molmo) num_processed_tokens (vllm.v1.structured_output.backend_xgrammar.XgrammarGrammar attribute) num_prompt_logprobs (vllm.v1.engine.logprobs.LogprobsProcessor attribute) num_prompt_tokens (vllm.v1.metrics.stats.FinishedRequestStats attribute) (vllm.v1.stats.common.RequestStats attribute) (vllm.v1.stats.common.RequestStatsUpdate attribute) num_prompt_tokens_iter (vllm.engine.metrics_types.Stats attribute) num_prompt_tokens_requests (vllm.engine.metrics_types.Stats attribute) num_qo_heads (vllm.attention.backends.flashinfer.FlashInferMetadata attribute) (vllm.v1.attention.backends.flashinfer.FlashInferMetadata attribute) num_queries (vllm.worker.multi_step_model_runner.StatefulModelInput attribute) num_readers (vllm.model_executor.model_loader.tensorizer.TensorizerArgs attribute) (vllm.model_executor.model_loader.tensorizer.TensorizerConfig attribute) num_recomputing_layers (vllm.transformers_utils.configs.deepseek_vl2.VisionEncoderConfig attribute) num_reqs (vllm.v1.worker.gpu_input_batch.InputBatch property) num_running_reqs (vllm.v1.metrics.stats.SchedulerStats attribute) (vllm.v1.stats.common.SchedulerStats attribute) num_running_seqs (vllm.profiler.layerwise_profile.LayerwiseProfileResults attribute) num_running_sys (vllm.engine.metrics_types.Stats attribute) num_samples (vllm.worker.tpu_model_runner.ModelInputForTPU attribute) num_scheduled_tokens (vllm.v1.core.sched.output.SchedulerOutput attribute) num_scheduler_steps (vllm.config.SchedulerConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) num_seq_groups (vllm.worker.worker_base.WorkerInput attribute) num_seqs (vllm.v1.attention.backends.pallas.PallasMetadata attribute) (vllm.worker.multi_step_model_runner.StatefulModelInput attribute) num_seqs() (vllm.sequence.SequenceGroup method) num_single_step_prefills (vllm.worker.multi_step_model_runner.StatefulModelInput attribute) num_slices (vllm.model_executor.models.minicpmv.MiniCPMVImagePixelInputs attribute) num_spec_tokens (vllm.spec_decode.metrics.SpecDecodeWorkerMetrics attribute) (vllm.v1.spec_decode.metrics.SpecDecodingStats attribute) num_speculative_tokens (vllm.config.SpeculativeConfig attribute) (vllm.sequence.SequenceGroupMetadata attribute) num_splits (vllm.v1.attention.backends.mla.flashmla.FlashMLADecodeMetadata attribute) num_steps (vllm.sequence.ExecuteModelRequest attribute) (vllm.sequence.SequenceGroupState attribute) (vllm.worker.worker_base.WorkerInput attribute) num_swapped_sys (vllm.engine.metrics_types.Stats attribute) num_tokens (vllm.v1.request.Request property) (vllm.v1.worker.gpu_input_batch.CachedRequestState property) num_tokens_iter (vllm.engine.metrics_types.Stats attribute) num_tokens_per_lora (vllm.lora.ops.triton_ops.lora_kernel_metadata.LoRAKernelMeta attribute) num_tokens_total (vllm.core.block.cpu_gpu_block_allocator.NullBlock property) (vllm.core.block.interfaces.Block property) (vllm.core.block.naive_block.NaiveBlock property) (vllm.core.block.prefix_caching_block.PrefixCachingBlock property) num_tokens_with_spec (vllm.v1.request.Request property) num_waiting_reqs (vllm.v1.metrics.stats.SchedulerStats attribute) (vllm.v1.stats.common.SchedulerStats attribute) num_waiting_sys (vllm.engine.metrics_types.Stats attribute) NUM_WARPS (in module vllm.attention.ops.prefix_prefill) NVLM_D_Config (class in vllm.transformers_utils.configs.nvlm_d) NVLM_D_Model (class in vllm.model_executor.models.nvlm_d) NVLMDummyInputsBuilder (class in vllm.model_executor.models.nvlm_d) NVLMMultiModalProcessor (class in vllm.model_executor.models.nvlm_d) NVLMProcessingInfo (class in vllm.model_executor.models.nvlm_d) NVLMProcessor (class in vllm.model_executor.models.nvlm_d) nvml_available (in module vllm.platforms.cuda) NvmlCudaPlatform (class in vllm.platforms.cuda) nvtx_range() (in module vllm.spec_decode.util) O o_proj (vllm.model_executor.models.module_mapping.ModelKeys attribute) object (vllm.entrypoints.openai.protocol.ChatCompletionResponse attribute) (vllm.entrypoints.openai.protocol.ChatCompletionStreamResponse attribute) (vllm.entrypoints.openai.protocol.ClassificationResponse attribute) (vllm.entrypoints.openai.protocol.CompletionResponse attribute) (vllm.entrypoints.openai.protocol.CompletionStreamResponse attribute) (vllm.entrypoints.openai.protocol.EmbeddingResponse attribute) (vllm.entrypoints.openai.protocol.EmbeddingResponseData attribute) (vllm.entrypoints.openai.protocol.ErrorResponse attribute) (vllm.entrypoints.openai.protocol.ModelCard attribute) (vllm.entrypoints.openai.protocol.ModelList attribute) (vllm.entrypoints.openai.protocol.ModelPermission attribute) (vllm.entrypoints.openai.protocol.PoolingResponse attribute) (vllm.entrypoints.openai.protocol.PoolingResponseData attribute) (vllm.entrypoints.openai.protocol.ScoreResponse attribute) (vllm.entrypoints.openai.protocol.ScoreResponseData attribute) (vllm.entrypoints.openai.protocol.TranscriptionStreamResponse attribute) observability_config (vllm.config.VllmConfig attribute) (vllm.worker.worker_base.LocalOrDistributedWorkerBase attribute) ObservabilityConfig (class in vllm.config) observe() (vllm.v1.core.kv_cache_utils.PrefixCachingMetrics method) (vllm.v1.spec_decode.metrics.SpecDecodingLogging method) (vllm.v1.spec_decode.metrics.SpecDecodingProm method) observe_draft() (vllm.v1.spec_decode.metrics.SpecDecodingStats method) observe_finished_request() (vllm.v1.engine.parallel_sampling.ParentRequest static method) observe_num_generation_tokens() (vllm.v1.engine.parallel_sampling.ParentRequest method) OCP_MX_BLOCK_SIZE (in module vllm.model_executor.layers.quantization.utils.mxfp4_utils) offset (vllm.multimodal.inputs.PlaceholderRange attribute) offsets_by_lora_id (vllm.lora.models.LongContextLoRAContext attribute) OK (vllm.core.interfaces.AllocStatus attribute) Olmo2Attention (class in vllm.model_executor.models.olmo2) Olmo2DecoderLayer (class in vllm.model_executor.models.olmo2) Olmo2ForCausalLM (class in vllm.model_executor.models.olmo2) Olmo2MLP (class in vllm.model_executor.models.olmo2) Olmo2Model (class in vllm.model_executor.models.olmo2) OlmoAttention (class in vllm.model_executor.models.olmo) OlmoDecoderLayer (class in vllm.model_executor.models.olmo) OlmoeAttention (class in vllm.model_executor.models.olmoe) OlmoeDecoderLayer (class in vllm.model_executor.models.olmoe) OlmoeForCausalLM (class in vllm.model_executor.models.olmoe) OlmoeModel (class in vllm.model_executor.models.olmoe) OlmoeMoE (class in vllm.model_executor.models.olmoe) OlmoForCausalLM (class in vllm.model_executor.models.olmo) OlmoMLP (class in vllm.model_executor.models.olmo) OlmoModel (class in vllm.model_executor.models.olmo) omni_get_updates_use_audio_in_video() (vllm.model_executor.layers.rotary_embedding.MRotaryEmbedding class method) on_mi250_mi300() (in module vllm.platforms.rocm) OOT (vllm.platforms.interface.PlatformEnum attribute) op_registry (vllm.model_executor.custom_op.CustomOp attribute) open_mem_handle() (in module vllm._custom_ops) open_stream() (vllm.model_executor.model_loader.tensorizer.TensorizerConfig method) OPENAI_API_SERVER (vllm.usage.usage_lib.UsageContext attribute) OPENAI_BATCH_RUNNER (vllm.usage.usage_lib.UsageContext attribute) OpenAIBaseModel (class in vllm.entrypoints.openai.protocol) OpenAIServing (class in vllm.entrypoints.openai.serving_engine) OpenAIServingChat (class in vllm.entrypoints.openai.serving_chat) OpenAIServingCompletion (class in vllm.entrypoints.openai.serving_completion) OpenAIServingEmbedding (class in vllm.entrypoints.openai.serving_embedding) OpenAIServingModels (class in vllm.entrypoints.openai.serving_models) OpenAIServingPooling (class in vllm.entrypoints.openai.serving_pooling) OpenAIServingTokenization (class in vllm.entrypoints.openai.serving_tokenization) OpenAIServingTranscription (class in vllm.entrypoints.openai.serving_transcription) OpenCVVideoBackend (class in vllm.multimodal.video) OPT_FEATURES (vllm.model_executor.layers.quantization.bitblas.BitBLASLinearMethod attribute) (vllm.model_executor.layers.quantization.kernels.mixed_precision.bitblas.BitBLASLinearKernel attribute) OPTAttention (class in vllm.model_executor.models.opt) OPTDecoder (class in vllm.model_executor.models.opt) OPTDecoderLayer (class in vllm.model_executor.models.opt) OPTForCausalLM (class in vllm.model_executor.models.opt) optimize() (vllm.lora.lora.LoRALayerWeights method) (vllm.lora.lora.PackedLoRALayerWeights method) optimized_dequantize_gemm() (in module vllm.model_executor.layers.quantization.aqlm) optional_type() (in module vllm.engine.arg_utils) OPTLearnedPositionalEmbedding (class in vllm.model_executor.models.opt) OPTModel (class in vllm.model_executor.models.opt) order (vllm.utils.LRUCache property) org_vocab_end_index (vllm.model_executor.layers.vocab_parallel_embedding.VocabParallelEmbeddingShardIndices attribute) org_vocab_size (vllm.lora.layers.LogitsProcessorWithLoRA property) org_vocab_start_index (vllm.model_executor.layers.vocab_parallel_embedding.VocabParallelEmbeddingShardIndices attribute) organization (vllm.entrypoints.openai.protocol.ModelPermission attribute) orig_seq_lens (vllm.attention.backends.dual_chunk_flash_attn.DualChunkFlashAttentionMetadata attribute) orig_seq_lens_tensor (vllm.attention.backends.dual_chunk_flash_attn.DualChunkFlashAttentionMetadata attribute) orig_to_new_prefix (vllm.model_executor.models.utils.WeightsMapper attribute) orig_to_new_substr (vllm.model_executor.models.utils.WeightsMapper attribute) orig_to_new_suffix (vllm.model_executor.models.utils.WeightsMapper attribute) original_max_position_embeddings (vllm.attention.backends.dual_chunk_flash_attn.DualChunkFlashAttentionMetadata attribute) OrionAttention (class in vllm.model_executor.models.orion) OrionDecoderLayer (class in vllm.model_executor.models.orion) OrionForCausalLM (class in vllm.model_executor.models.orion) OrionMLP (class in vllm.model_executor.models.orion) OrionModel (class in vllm.model_executor.models.orion) otel_import_error_traceback (in module vllm.tracing) OTHER (vllm.platforms.interface.CpuArchEnum attribute) otlp_traces_endpoint (vllm.config.ObservabilityConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) outplace_fused_experts() (in module vllm.model_executor.layers.fused_moe.fused_moe) outplace_fused_experts_fake() (in module vllm.model_executor.layers.fused_moe.fused_moe) output (vllm.compilation.backends.ConcreteSizeEntry attribute) (vllm.model_executor.models.module_mapping.ModelKeys attribute) output_aggregator (vllm.v1.engine.parallel_sampling.ParentRequest attribute) output_dim (vllm.lora.lora.LoRALayerWeights property) (vllm.lora.lora.PackedLoRALayerWeights property) output_dtype (vllm.attention.ops.triton_flash_attention.MetaData attribute) output_embed (vllm.sequence.SequenceOutput attribute) output_embeds (vllm.sequence.SequenceData property) output_kind (vllm.sampling_params.SamplingParams attribute) output_len (vllm.benchmarks.endpoint_request_func.RequestFuncInput attribute) output_produced (vllm.sequence.SequenceGroupBase attribute) output_queue_size (vllm.v1.stats.common.EngineCoreProcessStats attribute) output_queue_task (vllm.v1.engine.core_client.BackgroundResources attribute) output_socket (vllm.v1.engine.core_client.BackgroundResources attribute) output_text_buffer_length (vllm.sampling_params.SamplingParams attribute) output_throughput (vllm.benchmarks.serve.BenchmarkMetrics attribute) output_token (vllm.sequence.SequenceOutput attribute) output_token_ids (vllm.sequence.SequenceData property) (vllm.v1.engine.detokenizer.IncrementalDetokenizer property) (vllm.v1.engine.detokenizer.SlowIncrementalDetokenizer property) (vllm.v1.sample.metadata.SamplingMetadata attribute) (vllm.v1.sample.tpu.metadata.TPUSupportedSamplingMetadata attribute) (vllm.v1.worker.gpu_input_batch.CachedRequestState attribute) output_token_ids_array (vllm.sequence.SequenceData property) output_token_latency_s_lst (vllm.v1.stats.common.RequestStats property) output_token_ts_s_lst (vllm.v1.stats.common.RequestStats attribute) output_tokens (vllm.benchmarks.endpoint_request_func.RequestFuncOutput attribute) (vllm.model_executor.sampling_metadata.SamplingTensors attribute) OutputData (class in vllm.engine.llm_engine) OutputProcessor (class in vllm.v1.engine.output_processor) OutputProcessorOutput (class in vllm.v1.engine.output_processor) outputs (vllm.engine.llm_engine.OutputData attribute) (vllm.model_executor.layers.sampler.SamplerOutput attribute) (vllm.sequence.PoolerOutput attribute) (vllm.v1.engine.EngineCoreOutputs attribute) overlap_margins() (vllm.model_executor.models.molmo.MolmoProcessorWrapper method) override_config() (in module vllm.model_executor.layers.fused_moe) (in module vllm.model_executor.layers.quantization.utils.gptq_utils) override_generation_config (vllm.config.ModelConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) override_neuron_config (vllm.config.ModelConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) override_pooler_config (vllm.config.ModelConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) override_quantization_method() (vllm.model_executor.layers.quantization.awq_marlin.AWQMarlinConfig class method) (vllm.model_executor.layers.quantization.base_config.QuantizationConfig class method) (vllm.model_executor.layers.quantization.bitblas.BitBLASConfig class method) (vllm.model_executor.layers.quantization.gptq_bitblas.GPTQBitBLASConfig class method) (vllm.model_executor.layers.quantization.gptq_marlin.GPTQMarlinConfig class method) (vllm.model_executor.layers.quantization.gptq_marlin_24.GPTQMarlin24Config class method) (vllm.model_executor.layers.quantization.ipex_quant.IPEXConfig class method) (vllm.model_executor.layers.quantization.marlin.MarlinConfig class method) (vllm.model_executor.layers.quantization.moe_wna16.MoeWNA16Config class method) Ovis (class in vllm.model_executor.models.ovis) OvisConfig (class in vllm.transformers_utils.configs.ovis) OvisDummyInputsBuilder (class in vllm.model_executor.models.ovis) OvisImagePatchInputs (class in vllm.model_executor.models.ovis) OvisMultiModalProcessor (class in vllm.model_executor.models.ovis) OvisProcessingInfo (class in vllm.model_executor.models.ovis) OvisProcessor (class in vllm.transformers_utils.processors.ovis) OvisProcessorKwargs (class in vllm.transformers_utils.processors.ovis) owned_by (vllm.entrypoints.openai.protocol.ModelCard attribute) P P (in module vllm.utils) p (vllm.worker.tpu_model_runner.ModelInputForTPU attribute) pack() (vllm.lora.lora.PackedLoRALayerWeights class method) pack_cols() (in module vllm.model_executor.layers.quantization.utils.quant_utils) pack_fp8_to_int32() (in module vllm.model_executor.layers.quantization.utils.marlin_utils_fp8) pack_params() (vllm.model_executor.models.deepseek.DeepseekMoE method) pack_quantized_values_into_int32() (in module vllm.model_executor.layers.quantization.utils.quant_utils) pack_rows() (in module vllm.model_executor.layers.quantization.utils.quant_utils) packed_dim (vllm.model_executor.parameter.PackedColumnParameter property) (vllm.model_executor.parameter.PackedvLLMParameter property) packed_factor (vllm.model_executor.parameter.PackedColumnParameter property) (vllm.model_executor.parameter.PackedvLLMParameter property) packed_mapping (vllm.model_executor.model_loader.utils.ParamMapping attribute) packed_modules_mapping (vllm.model_executor.models.arctic.ArcticForCausalLM attribute) (vllm.model_executor.models.aria.AriaTextModel attribute) (vllm.model_executor.models.aria.AriaVisionTransformer attribute) (vllm.model_executor.models.baichuan.BaiChuanBaseForCausalLM attribute) (vllm.model_executor.models.bamba.BambaForCausalLM attribute) (vllm.model_executor.models.bart.BartForConditionalGeneration attribute) (vllm.model_executor.models.bert.BertModel attribute) (vllm.model_executor.models.blip.BlipVisionModel attribute) (vllm.model_executor.models.chameleon.ChameleonForConditionalGeneration attribute) (vllm.model_executor.models.chatglm.ChatGLMForCausalLM attribute) (vllm.model_executor.models.chatglm.ChatGLMModel attribute) (vllm.model_executor.models.clip.CLIPVisionModel attribute) (vllm.model_executor.models.commandr.CohereForCausalLM attribute) (vllm.model_executor.models.exaone.ExaoneForCausalLM attribute) (vllm.model_executor.models.falcon.FalconForCausalLM attribute) (vllm.model_executor.models.gemma.GemmaForCausalLM attribute) (vllm.model_executor.models.gemma2.Gemma2ForCausalLM attribute) (vllm.model_executor.models.gemma3.Gemma3ForCausalLM attribute) (vllm.model_executor.models.gemma3_mm.Gemma3ForConditionalGeneration attribute) (vllm.model_executor.models.glm4.Glm4ForCausalLM attribute) (vllm.model_executor.models.glm4v.GLM4VForCausalLM attribute) (vllm.model_executor.models.gpt_bigcode.GPTBigCodeForCausalLM attribute) (vllm.model_executor.models.granite.GraniteForCausalLM attribute) (vllm.model_executor.models.granite_speech.GraniteSpeechForConditionalGeneration attribute) (vllm.model_executor.models.granitemoe.GraniteMoeForCausalLM attribute) (vllm.model_executor.models.granitemoehybrid.GraniteMoeHybridForCausalLM attribute) (vllm.model_executor.models.granitemoeshared.GraniteMoeSharedForCausalLM attribute) (vllm.model_executor.models.grok1.Grok1ForCausalLM attribute) (vllm.model_executor.models.idefics3.Idefics3ForConditionalGeneration attribute) (vllm.model_executor.models.interfaces.SupportsLoRA attribute) (vllm.model_executor.models.interfaces.SupportsQuant attribute) (vllm.model_executor.models.internlm2.InternLM2ForCausalLM attribute) (vllm.model_executor.models.jamba.JambaForCausalLM attribute) (vllm.model_executor.models.llama.LlamaForCausalLM attribute) (vllm.model_executor.models.llama4.Llama4ForCausalLM attribute) (vllm.model_executor.models.llava.LlavaForConditionalGeneration attribute) (vllm.model_executor.models.minicpm.MiniCPMForCausalLM attribute) (vllm.model_executor.models.minicpm3.MiniCPM3ForCausalLM attribute) (vllm.model_executor.models.minicpmo.MiniCPMO attribute) (vllm.model_executor.models.minicpmv.MiniCPMV2_5 attribute) (vllm.model_executor.models.minicpmv.MiniCPMV2_6 attribute) (vllm.model_executor.models.minimax_vl_01.MiniMaxVL01ForConditionalGeneration attribute) (vllm.model_executor.models.mistral3.Mistral3ForConditionalGeneration attribute) (vllm.model_executor.models.mixtral.MixtralForCausalLM attribute) (vllm.model_executor.models.mllama.MllamaForConditionalGeneration attribute) (vllm.model_executor.models.mllama4.Llama4ForConditionalGeneration attribute) (vllm.model_executor.models.molmo.MolmoForCausalLM attribute) (vllm.model_executor.models.molmo.MolmoVisionBackbone attribute) (vllm.model_executor.models.nemotron.NemotronForCausalLM attribute) (vllm.model_executor.models.nemotron_nas.DeciLMForCausalLM attribute) (vllm.model_executor.models.opt.OPTForCausalLM attribute) (vllm.model_executor.models.paligemma.PaliGemmaForConditionalGeneration attribute) (vllm.model_executor.models.phi.PhiForCausalLM attribute) (vllm.model_executor.models.phi3.Phi3ForCausalLM attribute) (vllm.model_executor.models.phi4mm.Phi4MMForCausalLM attribute) (vllm.model_executor.models.phimoe.PhiMoEForCausalLM attribute) (vllm.model_executor.models.plamo2.Plamo2ForCausalLM attribute) (vllm.model_executor.models.qwen.QWenLMHeadModel attribute) (vllm.model_executor.models.qwen2.Qwen2EmbeddingModel attribute) (vllm.model_executor.models.qwen2.Qwen2ForCausalLM attribute) (vllm.model_executor.models.qwen2_5_vl.Qwen2_5_VLForConditionalGeneration attribute) (vllm.model_executor.models.qwen2_rm.Qwen2RewardBaseModel attribute) (vllm.model_executor.models.qwen2_vl.Qwen2VLForConditionalGeneration attribute) (vllm.model_executor.models.qwen3.Qwen3ForCausalLM attribute) (vllm.model_executor.models.qwen_vl.QwenVLForConditionalGeneration attribute) (vllm.model_executor.models.solar.SolarForCausalLM attribute) (vllm.model_executor.models.ultravox.UltravoxModel attribute) (vllm.model_executor.models.whisper.WhisperForConditionalGeneration attribute) PackedColumnParameter (class in vllm.model_executor.parameter) PackedLoRALayerWeights (class in vllm.lora.lora) PackedvLLMParameter (class in vllm.model_executor.parameter) pad_and_concat_to_dim3() (in module vllm.model_executor.models.ultravox) pad_dummy_encoder_prompt (vllm.model_executor.models.whisper.WhisperMultiModalProcessor property) (vllm.multimodal.processing.EncDecMultiModalProcessor property) pad_for_cudagraph() (vllm.config.VllmConfig method) pad_id (vllm.transformers_utils.processors.deepseek_vl2.DeepseekVLV2Processor property) pad_list() (in module vllm.worker.hpu_model_runner) PAD_SLOT_ID (in module vllm.attention.backends.utils) pad_token (vllm.transformers_utils.tokenizer_base.TokenizerBase property) (vllm.transformers_utils.tokenizers.mistral.MistralTokenizer property) pad_vocab_size() (in module vllm.model_executor.layers.vocab_parallel_embedding) padded_added_vocab_end_index (vllm.model_executor.layers.vocab_parallel_embedding.VocabParallelEmbeddingShardIndices attribute) padded_added_vocab_start_index (vllm.model_executor.layers.vocab_parallel_embedding.VocabParallelEmbeddingShardIndices attribute) padded_org_vocab_end_index (vllm.model_executor.layers.vocab_parallel_embedding.VocabParallelEmbeddingShardIndices attribute) padded_org_vocab_start_index (vllm.model_executor.layers.vocab_parallel_embedding.VocabParallelEmbeddingShardIndices attribute) PADDING_SLOT_ID (in module vllm.v1.spec_decode.eagle) page_size (vllm.attention.backends.flashinfer.FlashInferMetadata attribute) (vllm.v1.attention.backends.flashinfer.FlashInferMetadata attribute) page_size_bytes (vllm.v1.kv_cache_interface.AttentionSpec property) (vllm.v1.kv_cache_interface.KVCacheSpec property) paged_attention() (in module vllm.attention.backends.pallas) paged_attention_rocm() (in module vllm._custom_ops) paged_attention_v1() (in module vllm._custom_ops) (vllm._ipex_ops.ipex_ops static method) paged_attention_v2() (in module vllm._custom_ops) (vllm._ipex_ops.ipex_ops static method) paged_kv_indices (vllm.attention.backends.flashinfer.FlashInferMetadata attribute) (vllm.attention.backends.rocm_aiter_mla.AiterMLAMetadata attribute) (vllm.v1.attention.backends.flashinfer.FlashInferMetadata attribute) (vllm.v1.attention.backends.mla.rocm_aiter_mla.AiterMLADecodeMetadata attribute) paged_kv_indptr (vllm.attention.backends.flashinfer.FlashInferMetadata attribute) (vllm.attention.backends.rocm_aiter_mla.AiterMLAMetadata attribute) (vllm.v1.attention.backends.flashinfer.FlashInferMetadata attribute) (vllm.v1.attention.backends.mla.rocm_aiter_mla.AiterMLADecodeMetadata attribute) paged_kv_last_page_len (vllm.attention.backends.flashinfer.FlashInferMetadata attribute) (vllm.v1.attention.backends.flashinfer.FlashInferMetadata attribute) (vllm.v1.attention.backends.mla.rocm_aiter_mla.AiterMLADecodeMetadata attribute) paged_kv_last_page_lens (vllm.attention.backends.rocm_aiter_mla.AiterMLAMetadata attribute) PagedAttention (class in vllm.attention.ops.paged_attn) (in module vllm.attention.ops.ipex_attn) PagedAttentionMetadata (class in vllm.attention.ops.paged_attn) PaliGemmaDummyInputsBuilder (class in vllm.model_executor.models.paligemma) PaliGemmaForConditionalGeneration (class in vllm.model_executor.models.paligemma) PaliGemmaImageEmbeddingInputs (class in vllm.model_executor.models.paligemma) PaliGemmaImageInputs (in module vllm.model_executor.models.paligemma) PaliGemmaImagePixelInputs (class in vllm.model_executor.models.paligemma) PaliGemmaMultiModalProcessor (class in vllm.model_executor.models.paligemma) PaliGemmaMultiModalProjector (class in vllm.model_executor.models.paligemma) PaliGemmaProcessingInfo (class in vllm.model_executor.models.paligemma) PallasAttentionBackend (class in vllm.attention.backends.pallas) (class in vllm.v1.attention.backends.pallas) PallasAttentionBackendImpl (class in vllm.attention.backends.pallas) (class in vllm.v1.attention.backends.pallas) PallasMetadata (class in vllm.attention.backends.pallas) (class in vllm.v1.attention.backends.pallas) parallel_config (vllm.config.VllmConfig attribute) parallel_tool_calls (vllm.entrypoints.openai.protocol.ChatCompletionRequest attribute) ParallelConfig (class in vllm.config) ParallelLMHead (class in vllm.model_executor.layers.vocab_parallel_embedding) ParallelSampleSequenceGroup (class in vllm.sequence) param (vllm.entrypoints.openai.protocol.ErrorResponse attribute) parameters (vllm.entrypoints.openai.protocol.FunctionDefinition attribute) ParamMapping (class in vllm.model_executor.model_loader.utils) params (vllm.engine.multiprocessing.RPCProcessRequest attribute) params_mapping (vllm.model_executor.models.bart.BartForConditionalGeneration attribute) parent (vllm.entrypoints.openai.protocol.ModelCard attribute) parent_block_hash (vllm.distributed.kv_events.BlockStored attribute) parent_seq_id (vllm.sequence.SequenceOutput attribute) ParentRequest (class in vllm.v1.engine.parallel_sampling) parse_and_batch_prompt() (in module vllm.inputs.parse) parse_args() (in module vllm.entrypoints.openai.run_batch) (vllm.utils.FlexibleArgumentParser method) parse_audio() (vllm.entrypoints.chat_utils.AsyncMultiModalContentParser method) (vllm.entrypoints.chat_utils.BaseMultiModalContentParser method) (vllm.entrypoints.chat_utils.MultiModalContentParser method) parse_chat_messages() (in module vllm.entrypoints.chat_utils) parse_chat_messages_futures() (in module vllm.entrypoints.chat_utils) parse_fine_tuned_lora_name() (in module vllm.lora.utils) parse_goodput() (in module vllm.benchmarks.serve) parse_image() (vllm.entrypoints.chat_utils.AsyncMultiModalContentParser method) (vllm.entrypoints.chat_utils.BaseMultiModalContentParser method) (vllm.entrypoints.chat_utils.MultiModalContentParser method) parse_image_embeds() (vllm.entrypoints.chat_utils.AsyncMultiModalContentParser method) (vllm.entrypoints.chat_utils.BaseMultiModalContentParser method) (vllm.entrypoints.chat_utils.MultiModalContentParser method) parse_input_audio() (vllm.entrypoints.chat_utils.AsyncMultiModalContentParser method) (vllm.entrypoints.chat_utils.BaseMultiModalContentParser method) (vllm.entrypoints.chat_utils.MultiModalContentParser method) parse_mm_data() (vllm.multimodal.parse.MultiModalDataParser method) parse_output() (vllm.v1.sample.rejection_sampler.RejectionSampler static method) parse_singleton_prompt() (in module vllm.inputs.parse) parse_type() (in module vllm.engine.arg_utils) parse_video() (vllm.entrypoints.chat_utils.AsyncMultiModalContentParser method) (vllm.entrypoints.chat_utils.BaseMultiModalContentParser method) (vllm.entrypoints.chat_utils.MultiModalContentParser method) ParsedEmbedsPrompt (class in vllm.inputs.parse) ParsedSingletonPrompt (in module vllm.inputs.parse) ParsedStrPrompt (class in vllm.inputs.parse) ParsedText (class in vllm.inputs.parse) ParsedTextPrompt (class in vllm.inputs.parse) ParsedTokens (class in vllm.inputs.parse) ParsedTokensPrompt (class in vllm.inputs.parse) partial_json_loads() (in module vllm.entrypoints.openai.tool_parsers.utils) PartialPrefillMetadata (class in vllm.core.scheduler) partition_weight_shape (vllm.model_executor.layers.quantization.kernels.mixed_precision.MPLinearKernel.MPLinearLayerConfig attribute) pass_config (vllm.config.CompilationConfig attribute) pass_context() (in module vllm.compilation.inductor_pass) PassConfig (class in vllm.config) PassContext (class in vllm.compilation.inductor_pass) PATCH_MERGE (in module vllm.model_executor.models.pixtral) patch_merger() (in module vllm.model_executor.models.moonvit) patch_padding_side() (in module vllm.transformers_utils.tokenizer) patch_rope_scaling() (in module vllm.transformers_utils.config) patch_rope_scaling_dict() (in module vllm.transformers_utils.config) patch_size (vllm.model_executor.models.pixtral.VisionEncoderArgs attribute) (vllm.transformers_utils.configs.deepseek_vl2.VisionEncoderConfig attribute) patch_size() (vllm.model_executor.models.pixtral.PixtralProcessorAdapter method) patch_tensor_parallel_group() (in module vllm.distributed.parallel_state) patches_per_image (vllm.model_executor.models.fuyu.FuyuImagePatchInputs attribute) (vllm.model_executor.models.mllama4.Llama4ImagePatchInputs attribute) (vllm.model_executor.models.ovis.OvisImagePatchInputs attribute) PatchMerger (class in vllm.model_executor.models.pixtral) path (vllm.entrypoints.openai.serving_models.LoRAModulePath attribute) (vllm.lora.request.LoRARequest property) path_to_dict_mapping (vllm.distributed.device_communicators.cuda_wrapper.CudaRTLibrary attribute) (vllm.distributed.device_communicators.pynccl_wrapper.NCCLLibrary attribute) path_to_library_cache (vllm.distributed.device_communicators.cuda_wrapper.CudaRTLibrary attribute) (vllm.distributed.device_communicators.pynccl_wrapper.NCCLLibrary attribute) pbar() (vllm.entrypoints.openai.run_batch.BatchProgressTracker method) pct_cuda_time (vllm.profiler.layerwise_profile.ModelStatsEntry attribute) (vllm.profiler.layerwise_profile.SummaryStatsEntry attribute) PEFTHelper (class in vllm.lora.peft_helper) per_tensor (vllm.compilation.fusion.QuantKey attribute) per_tensor_dequantize() (in module vllm.model_executor.layers.quantization.utils.w8a8_utils) per_token_group_quant_fp8() (in module vllm.model_executor.layers.quantization.utils.fp8_utils) per_token_group_quant_int8() (in module vllm.model_executor.layers.quantization.utils.int8_utils) per_token_group_quant_mxfp4() (in module vllm.model_executor.layers.quantization.utils.mxfp4_utils) per_token_quant_int8() (in module vllm.model_executor.layers.quantization.utils.int8_utils) percentiles_e2el_ms (vllm.benchmarks.serve.BenchmarkMetrics attribute) percentiles_itl_ms (vllm.benchmarks.serve.BenchmarkMetrics attribute) percentiles_tpot_ms (vllm.benchmarks.serve.BenchmarkMetrics attribute) percentiles_ttft_ms (vllm.benchmarks.serve.BenchmarkMetrics attribute) PerLayerParameters (class in vllm.attention.backends.flashinfer) (class in vllm.v1.attention.backends.flashinfer) permission (vllm.entrypoints.openai.protocol.ModelCard attribute) permute() (vllm.model_executor.models.pixtral.PatchMerger method) permute_cols() (in module vllm._custom_ops) permute_param_layout_() (in module vllm.model_executor.parameter) permute_qk_weight_for_rotary() (vllm.model_executor.models.llama4.Llama4ForCausalLM method) permute_rows() (in module vllm.model_executor.layers.quantization.utils.quant_utils) PersimmonAttention (class in vllm.model_executor.models.persimmon) PersimmonDecoderLayer (class in vllm.model_executor.models.persimmon) PersimmonForCausalLM (class in vllm.model_executor.models.persimmon) PersimmonMLP (class in vllm.model_executor.models.persimmon) PersimmonModel (class in vllm.model_executor.models.persimmon) PerTensorScaleParameter (class in vllm.model_executor.parameter) PG_WAIT_TIMEOUT (in module vllm.executor.ray_utils) PhaseType (class in vllm.worker.hpu_model_runner) Phi3ForCausalLM (class in vllm.model_executor.models.phi3) Phi3HDImageEmbedding (class in vllm.model_executor.models.phi3v) Phi3ImageEmbeddingBase (class in vllm.model_executor.models.phi3v) Phi3LongRoPEScaledRotaryEmbedding (class in vllm.model_executor.layers.rotary_embedding) Phi3SmallDecoderLayer (class in vllm.model_executor.models.phi3_small) Phi3SmallForCausalLM (class in vllm.model_executor.models.phi3_small) Phi3SmallMLP (class in vllm.model_executor.models.phi3_small) Phi3SmallModel (class in vllm.model_executor.models.phi3_small) Phi3SmallSelfAttention (class in vllm.model_executor.models.phi3_small) Phi3VDummyInputsBuilder (class in vllm.model_executor.models.phi3v) Phi3VForCausalLM (class in vllm.model_executor.models.phi3v) Phi3VImageEmbeddingInputs (class in vllm.model_executor.models.phi3v) Phi3VImageInputs (in module vllm.model_executor.models.phi3v) Phi3VImagePixelInputs (class in vllm.model_executor.models.phi3v) Phi3VMultiModalProcessor (class in vllm.model_executor.models.phi3v) Phi3VProcessingInfo (class in vllm.model_executor.models.phi3v) Phi4MiniJsonToolParser (class in vllm.entrypoints.openai.tool_parsers.phi4mini_tool_parser) Phi4MMAudioEmbeddingInputs (class in vllm.model_executor.models.phi4mm) Phi4MMAudioFeatureInputs (class in vllm.model_executor.models.phi4mm) Phi4MMAudioInputs (in module vllm.model_executor.models.phi4mm) Phi4MMDummyInputsBuilder (class in vllm.model_executor.models.phi4mm) Phi4MMForCausalLM (class in vllm.model_executor.models.phi4mm) Phi4MMImageEmbeddingInputs (class in vllm.model_executor.models.phi4mm) Phi4MMImageEncoder (class in vllm.model_executor.models.phi4mm) Phi4MMImageInput (in module vllm.model_executor.models.phi4mm) Phi4MMImagePixelInputs (class in vllm.model_executor.models.phi4mm) Phi4MMMultiModalProcessor (class in vllm.model_executor.models.phi4mm) Phi4MMProcessingInfo (class in vllm.model_executor.models.phi4mm) PhiAttention (class in vllm.model_executor.models.phi) PhiForCausalLM (class in vllm.model_executor.models.phi) PhiLayer (class in vllm.model_executor.models.phi) PhiMLP (class in vllm.model_executor.models.phi) PhiModel (class in vllm.model_executor.models.phi) PhiMoE (class in vllm.model_executor.models.phimoe) phimoe_routing_function() (in module vllm.model_executor.models.phimoe) PhiMoEAttention (class in vllm.model_executor.models.phimoe) PhiMoEConfig (class in vllm.model_executor.models.phimoe) PhiMoEDecoderLayer (class in vllm.model_executor.models.phimoe) PhiMoEForCausalLM (class in vllm.model_executor.models.phimoe) PhiMoEModel (class in vllm.model_executor.models.phimoe) physical_block_ids (vllm.core.block.block_table.BlockTable property) PIECEWISE (vllm.config.CompilationLevel attribute) piecewise_graphs (vllm.compilation.backends.VllmBackend attribute) PiecewiseBackend (class in vllm.compilation.backends) PiecewiseCompileInterpreter (class in vllm.compilation.backends) pil_image (vllm.assets.image.ImageAsset property) pil_images (vllm.assets.video.VideoAsset property) pin() (vllm.utils.LRUCache method) pin_adapter() (vllm.adapter_commons.models.AdapterModelManager method) (vllm.lora.models.LoRAModelManager method) (vllm.lora.models.LRUCacheLoRAModelManager method) (vllm.lora.worker_manager.WorkerLoRAManager method) (vllm.prompt_adapter.models.LRUCachePromptAdapterModelManager method) (vllm.prompt_adapter.models.PromptAdapterModelManager method) (vllm.prompt_adapter.worker_manager.WorkerPromptAdapterManager method) pin_lora() (vllm.engine.llm_engine.LLMEngine method) (vllm.executor.executor_base.ExecutorBase method) (vllm.v1.engine.async_llm.AsyncLLM method) (vllm.v1.engine.core.EngineCore method) (vllm.v1.engine.core_client.EngineCoreClient method) (vllm.v1.engine.core_client.InprocClient method) (vllm.v1.engine.core_client.SyncMPClient method) (vllm.v1.engine.llm_engine.LLMEngine method) (vllm.v1.worker.gpu_worker.Worker method) (vllm.v1.worker.lora_model_runner_mixin.LoRAModelRunnerMixin method) (vllm.worker.cpu_model_runner.CPUModelRunnerBase method) (vllm.worker.cpu_worker.CPUWorker method) (vllm.worker.hpu_model_runner.HPUModelRunnerBase method) (vllm.worker.hpu_worker.HPUWorker method) (vllm.worker.model_runner.GPUModelRunnerBase method) (vllm.worker.worker.Worker method) (vllm.worker.worker_base.DelegateWorkerBase method) (vllm.worker.worker_base.LoRANotSupportedWorkerBase method) (vllm.worker.worker_base.WorkerBase method) pin_lora_async() (vllm.v1.engine.core_client.AsyncMPClient method) (vllm.v1.engine.core_client.EngineCoreClient method) pin_prompt_adapter() (vllm.executor.executor_base.ExecutorBase method) (vllm.worker.hpu_worker.HPUWorker method) (vllm.worker.model_runner.GPUModelRunnerBase method) (vllm.worker.worker.Worker method) ping() (in module vllm.entrypoints.openai.api_server) pipeline_parallel() (vllm.model_executor.models.transformers.TransformersModel method) pipeline_parallel_size (vllm.config.ParallelConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) pixel_attention_mask (vllm.model_executor.models.idefics3.Idefics3ImagePixelInputs attribute) pixel_mask (vllm.model_executor.models.aria.AriaImagePixelInputs attribute) pixel_shuffle() (in module vllm.model_executor.models.mllama4) (vllm.model_executor.models.aya_vision.AyaVisionMultiModalProjector method) (vllm.model_executor.models.idefics3.Idefics3Connector method) (vllm.model_executor.models.internvl.InternVLChatModel method) (vllm.model_executor.models.skyworkr1v.SkyworkR1VChatModel method) pixel_values (vllm.model_executor.models.aria.AriaImagePixelInputs attribute) (vllm.model_executor.models.aya_vision.AyaVisionImagePixelInputs attribute) (vllm.model_executor.models.gemma3_mm.Gemma3ImagePixelInputs attribute) (vllm.model_executor.models.idefics3.Idefics3ImagePixelInputs attribute) (vllm.model_executor.models.kimi_vl.KimiVLImagePixelInputs attribute) (vllm.model_executor.models.llava.LlavaImagePixelInputs attribute) (vllm.model_executor.models.llava.PixtralHFImagePixelInputs attribute) (vllm.model_executor.models.llava_next.LlavaNextImagePixelInputs attribute) (vllm.model_executor.models.llava_onevision.LlavaOnevisionImagePixelInputs attribute) (vllm.model_executor.models.minicpmv.MiniCPMVImagePixelInputs attribute) (vllm.model_executor.models.minimax_vl_01.MiniMaxVL01ImagePixelInputs attribute) (vllm.model_executor.models.mistral3.Mistral3ImagePixelInputs attribute) (vllm.model_executor.models.qwen2_5_vl.Qwen2_5_VLImagePixelInputs attribute) (vllm.model_executor.models.qwen2_vl.Qwen2VLImagePixelInputs attribute) pixel_values_flat (vllm.model_executor.models.internvl.InternVLImagePixelInputs attribute) (vllm.model_executor.models.skyworkr1v.SkyworkR1VImagePixelInputs attribute) pixel_values_videos (vllm.model_executor.models.llava_onevision.LlavaOnevisionVideoPixelInputs attribute) (vllm.model_executor.models.qwen2_5_vl.Qwen2_5_VLVideoPixelInputs attribute) (vllm.model_executor.models.qwen2_vl.Qwen2VLVideoPixelInputs attribute) PixtralDummyInputsBuilder (class in vllm.model_executor.models.pixtral) PixtralForConditionalGeneration (class in vllm.model_executor.models.pixtral) PixtralHFAttention (class in vllm.model_executor.models.pixtral) PixtralHFEncoderInfo (class in vllm.model_executor.models.pixtral) PixtralHFImagePixelInputs (class in vllm.model_executor.models.llava) PixtralHFMLP (class in vllm.model_executor.models.pixtral) PixtralHFMultiModalProcessor (class in vllm.model_executor.models.llava) PixtralHFProcessingInfo (class in vllm.model_executor.models.llava) PixtralHFTransformer (class in vllm.model_executor.models.pixtral) PixtralHFTransformerBlock (class in vllm.model_executor.models.pixtral) PixtralHFVisionModel (class in vllm.model_executor.models.pixtral) PixtralImagePixelInputs (class in vllm.model_executor.models.pixtral) PixtralMultiModalProcessor (class in vllm.model_executor.models.pixtral) PixtralProcessingInfo (class in vllm.model_executor.models.pixtral) PixtralProcessorAdapter (class in vllm.model_executor.models.pixtral) placeholder_attr() (vllm.utils.PlaceholderModule method) PLACEHOLDER_TOKEN_ID (in module vllm.v1.sample.rejection_sampler) PlaceholderAttentionBackend (class in vllm.attention.backends.placeholder_attn) PlaceholderAttentionImpl (class in vllm.attention.backends.placeholder_attn) PlaceholderAttentionMetadata (class in vllm.attention.backends.placeholder_attn) PlaceholderAttentionMetadataBuilder (class in vllm.attention.backends.placeholder_attn) PlaceholderBlockSpaceManager (class in vllm.core.placeholder_block_space_manager) PlaceholderFeaturesInfo (class in vllm.multimodal.processing) PlaceholderModule (class in vllm.utils) PlaceholderRange (class in vllm.multimodal.inputs) placement_group (vllm.config.ParallelConfig attribute) Plamo2AttentionMixer (class in vllm.model_executor.models.plamo2) Plamo2Config (class in vllm.model_executor.models.plamo2) Plamo2Decoder (class in vllm.model_executor.models.plamo2) Plamo2DecoderLayer (class in vllm.model_executor.models.plamo2) Plamo2ForCausalLM (class in vllm.model_executor.models.plamo2) Plamo2MambaMixer (class in vllm.model_executor.models.plamo2) Plamo2Model (class in vllm.model_executor.models.plamo2) Plamo2PreTrainedModel (class in vllm.model_executor.models.plamo2) Platform (class in vllm.platforms.interface) PlatformEnum (class in vllm.platforms.interface) plugins_loaded (in module vllm.plugins) policy (vllm.config.SchedulerConfig attribute) POLLING_TIMEOUT_MS (in module vllm.engine.multiprocessing.engine) (in module vllm.v1.executor.multiproc_executor) POLLING_TIMEOUT_S (in module vllm.v1.engine.core) (in module vllm.v1.executor.multiproc_executor) pool_size (vllm.config.TokenizerPoolConfig attribute) pool_type (vllm.config.TokenizerPoolConfig attribute) Pooler (class in vllm.model_executor.layers.pooler) pooler() (vllm.model_executor.models.bert.BertEmbeddingModel method) (vllm.model_executor.models.bert.BertForSequenceClassification method) (vllm.model_executor.models.gritlm.GritLM method) (vllm.model_executor.models.interfaces_base.VllmModelForPooling method) (vllm.model_executor.models.internlm2.InternLM2ForRewardModel method) (vllm.model_executor.models.jamba.JambaForSequenceClassification method) (vllm.model_executor.models.modernbert.ModernBertForSequenceClassification method) (vllm.model_executor.models.prithvi_geospatial_mae.PrithviGeoSpatialMAE method) (vllm.model_executor.models.qwen2.Qwen2EmbeddingModel method) (vllm.model_executor.models.qwen2_rm.Qwen2RewardBaseModel method) (vllm.model_executor.models.roberta.RobertaForSequenceClassification method) pooler_config (vllm.config.ModelConfig attribute) PoolerConfig (class in vllm.config) PoolerHead (class in vllm.model_executor.layers.pooler) PoolerOutput (class in vllm.sequence) pooling() (in module vllm.entrypoints.openai.api_server) pooling_metadata (vllm.worker.cpu_pooling_model_runner.ModelInputForCPUWithPoolingMetadata attribute) (vllm.worker.pooling_model_runner.ModelInputForGPUWithPoolingMetadata attribute) pooling_params (vllm.sequence.SequenceGroupMetadata attribute) POOLING_SIZE (in module vllm.model_executor.models.molmo) pooling_size (vllm.model_executor.models.molmo.MolmoProcessorWrapper property) pooling_type (vllm.config.PoolerConfig attribute) PoolingChatRequest (in module vllm.entrypoints.openai.protocol) PoolingCompletionRequest (in module vllm.entrypoints.openai.protocol) PoolingMetadata (class in vllm.model_executor.pooling_metadata) PoolingModelRunner (class in vllm.worker.pooling_model_runner) PoolingOutput (class in vllm.outputs) PoolingParams (class in vllm.pooling_params) PoolingRequest (in module vllm.entrypoints.openai.protocol) PoolingRequestOutput (class in vllm.outputs) PoolingResponse (class in vllm.entrypoints.openai.protocol) PoolingResponseData (class in vllm.entrypoints.openai.protocol) PoolingSequenceGroupOutput (class in vllm.sequence) PoolingTensors (class in vllm.model_executor.pooling_metadata) PoolingType (class in vllm.model_executor.layers.pooler) pop() (vllm.utils.LRUCache method) (vllm.v1.utils.ConstantList method) pop_prompt_logprobs() (vllm.v1.engine.logprobs.LogprobsProcessor method) popitem() (vllm.utils.LRUCache method) popleft() (vllm.v1.core.kv_cache_utils.FreeKVCacheBlockQueue method) position_ids (vllm.worker.tpu_model_runner.ModelInputForTPU attribute) position_meshgrid() (in module vllm.model_executor.models.pixtral) PositionalEmbeddingCosine1D (class in vllm.model_executor.models.florence2) possible_config_file_names (vllm.model_executor.model_loader.bitsandbytes_loader.BitsAndBytesModelLoader attribute) post_grad_passes (vllm.compilation.backends.VllmBackend attribute) post_process_image_text_to_text() (vllm.transformers_utils.processors.ovis.OvisProcessor method) posterior_alpha (vllm.config.SpeculativeConfig attribute) posterior_threshold (vllm.config.SpeculativeConfig attribute) PostGradPassManager (class in vllm.compilation.pass_manager) POWERPC (vllm.platforms.interface.CpuArchEnum attribute) PPMissingLayer (class in vllm.model_executor.models.utils) pre_register_and_update() (vllm.platforms.interface.Platform class method) precompute_freqs_cis_2d() (in module vllm.model_executor.models.pixtral) precompute_indices_and_offsets() (in module vllm.worker.hpu_model_runner) precomputed_freqs_cis() (vllm.model_executor.models.moonvit.Rope2DPosEmb method) preempted (vllm.core.scheduler.SchedulerOutputs attribute) (vllm.core.scheduler.SchedulerRunningOutputs attribute) PREEMPTED (vllm.v1.engine.EngineCoreEventType attribute) (vllm.v1.request.RequestStatus attribute) (vllm.v1.stats.common.RequestStatsUpdate.Type attribute) preempted_request() (vllm.v1.metrics.stats.LoRARequestStates static method) preempted_ts_s_lst (vllm.v1.stats.common.RequestStats attribute) preemption_mode (vllm.config.SchedulerConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) PreemptionMode (class in vllm.core.scheduler) (in module vllm.config) PREFILL (vllm.sequence.SequenceStage attribute) prefill (vllm.v1.attention.backends.mla.common.MLACommonMetadata attribute) PREFILL (vllm.worker.hpu_model_runner.BatchType attribute) (vllm.worker.hpu_model_runner.PhaseType attribute) (vllm.worker.tpu_model_runner.ExecutionMode attribute) prefill_block_tables (vllm.attention.backends.torch_sdpa.TorchSDPAMetadata attribute) prefill_hidden_states (vllm.model_executor.layers.sampler.SamplerOutput attribute) prefill_latency_s (vllm.v1.stats.common.RequestStats property) prefill_metadata (vllm.attention.backends.abstract.AttentionMetadata property) (vllm.attention.backends.blocksparse_attn.BlocksparseFlashAttentionMetadata property) (vllm.attention.backends.dual_chunk_flash_attn.DualChunkFlashAttentionMetadata property) (vllm.attention.backends.flash_attn.FlashAttentionMetadata property) (vllm.attention.backends.flashinfer.FlashInferMetadata property) (vllm.attention.backends.ipex_attn.IpexAttnMetadata property) (vllm.attention.backends.mla.common.MLACommonMetadata property) (vllm.attention.backends.pallas.PallasMetadata property) (vllm.attention.backends.placeholder_attn.PlaceholderAttentionMetadata property) (vllm.attention.backends.rocm_aiter_mla.AiterMLAMetadata property) (vllm.attention.backends.rocm_flash_attn.ROCmFlashAttentionMetadata property) (vllm.attention.backends.torch_sdpa.TorchSDPAMetadata property) (vllm.attention.backends.xformers.XFormersMetadata property) (vllm.lora.punica_wrapper.punica_base.PunicaWrapperBase property) prefill_seq_groups (vllm.core.scheduler.SchedulerRunningOutputs attribute) (vllm.core.scheduler.SchedulerSwappedInOutputs attribute) prefill_seq_groups_list (vllm.core.scheduler.SchedulerRunningOutputs attribute) prefill_start_ts_s_lst (vllm.v1.stats.common.RequestStats attribute) prefill_time (vllm.v1.metrics.stats.FinishedRequestStats attribute) prefill_ts_s (vllm.v1.stats.common.RequestStats property) prefill_url (vllm.distributed.kv_transfer.kv_pipe.mooncake_pipe.MooncakeTransferEngineConfig attribute) prefill_wrapper (vllm.attention.backends.flashinfer.FlashInferMetadata attribute) (vllm.v1.attention.backends.flashinfer.FlashInferMetadata attribute) prefilled (vllm.model_executor.guided_decoding.xgrammar_decoding.XGrammarLogitsProcessor attribute) PREFILLING (vllm.v1.stats.common.RequestStatsUpdate.Type attribute) prefix (vllm.model_executor.model_loader.default_loader.DefaultModelLoader.Source attribute) prefix() (vllm.multimodal.processing.PromptIndexTargets static method) prefix_cache_stats (vllm.v1.metrics.stats.SchedulerStats attribute) prefix_caching_hash_algo (vllm.config.CacheConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) prefix_kv_lens (vllm.v1.attention.backends.flash_attn.FlashAttentionMetadata attribute) PREFIX_PREFILL (vllm.worker.hpu_model_runner.PhaseType attribute) (vllm.worker.tpu_model_runner.ExecutionMode attribute) prefix_scheduler_metadata (vllm.v1.attention.backends.flash_attn.FlashAttentionMetadata attribute) PrefixCacheStats (class in vllm.v1.metrics.stats) PrefixCachingBlock (class in vllm.core.block.prefix_caching_block) PrefixCachingBlockAllocator (class in vllm.core.block.prefix_caching_block) PrefixCachingHashAlgo (in module vllm.config) PrefixCachingMetrics (class in vllm.v1.core.kv_cache_utils) PrefixHash (in module vllm.core.block.prefix_caching_block) PreNorm (class in vllm.model_executor.models.florence2) prep_initial_states (vllm.model_executor.layers.mamba.mamba2_metadata.Mamba2Metadata attribute) prepare() (vllm.attention.backends.abstract.AttentionMetadataBuilder method) (vllm.attention.backends.cpu_mla.CPUMLAMetadataBuilder method) (vllm.attention.backends.dual_chunk_flash_attn.DualChunkFlashAttentionMetadataBuilder method) (vllm.attention.backends.flash_attn.FlashAttentionMetadataBuilder method) (vllm.attention.backends.flashinfer.FlashInferMetadataBuilder method) (vllm.attention.backends.mla.common.MLACommonMetadataBuilder method) (vllm.attention.backends.placeholder_attn.PlaceholderAttentionMetadataBuilder method) (vllm.attention.backends.rocm_aiter_mla.AiterMLAMetadataBuilder method) (vllm.attention.backends.torch_sdpa.TorchSDPAMetadataBuilder method) (vllm.attention.backends.utils.CommonMetadataBuilder method) (vllm.model_executor.sampling_metadata.SamplingMetadata static method) (vllm.worker.cpu_model_runner.ModelInputForCPUBuilder method) (vllm.worker.model_runner.ModelInputForGPUBuilder method) (vllm.worker.model_runner_base.ModelRunnerInputBuilderBase method) (vllm.worker.xpu_model_runner.ModelInputForXPUBuilder method) prepare_attn_masks() (vllm.model_executor.models.gemma3_mm.Gemma3ForConditionalGeneration method) prepare_fp4_layer_for_marlin() (in module vllm.model_executor.layers.quantization.utils.marlin_utils_fp4) prepare_fp8_layer_for_marlin() (in module vllm.model_executor.layers.quantization.utils.marlin_utils_fp8) prepare_graph_input_buffers() (vllm.attention.backends.abstract.AttentionState method) (vllm.attention.backends.flashinfer.FlashInferState method) (vllm.attention.backends.flashmla.FlashMLAState method) (vllm.attention.backends.mla.common.MLACommonState method) (vllm.attention.backends.rocm_aiter_mla.AiterMLAState method) (vllm.attention.backends.utils.CommonAttentionState method) prepare_input() (vllm.worker.multi_step_hpu_worker.MultiStepHPUWorker method) (vllm.worker.multi_step_tpu_worker.MultiStepTPUWorker method) (vllm.worker.multi_step_worker.MultiStepWorker method) (vllm.worker.worker_base.LocalOrDistributedWorkerBase method) prepare_input_kernel() (in module vllm.v1.spec_decode.eagle) prepare_input_tensors() (vllm.worker.hpu_model_runner.HPUModelRunnerBase method) prepare_inputs() (vllm.v1.spec_decode.eagle.EagleProposer static method) prepare_mamba2_metadata() (in module vllm.model_executor.layers.mamba.mamba2_metadata) prepare_model_input() (vllm.spec_decode.target_model_runner.TargetModelRunner method) (vllm.worker.cpu_enc_dec_model_runner.CPUEncoderDecoderModelRunner method) (vllm.worker.cpu_model_runner.CPUModelRunner method) (vllm.worker.cpu_pooling_model_runner.CPUPoolingModelRunner method) (vllm.worker.enc_dec_model_runner.EncoderDecoderModelRunner method) (vllm.worker.hpu_model_runner.HPUModelRunner method) (vllm.worker.model_runner.ModelRunner method) (vllm.worker.model_runner_base.ModelRunnerBase method) (vllm.worker.multi_step_model_runner.MultiStepModelRunner method) (vllm.worker.neuron_model_runner.NeuronModelRunner method) (vllm.worker.pooling_model_runner.PoolingModelRunner method) (vllm.worker.tpu_model_runner.TPUModelRunner method) (vllm.worker.xpu_model_runner.XPUModelRunner method) prepare_moe_fp4_layer_for_marlin() (in module vllm.model_executor.layers.quantization.utils.marlin_utils_fp4) prepare_moe_fp8_layer_for_marlin() (in module vllm.model_executor.layers.quantization.utils.marlin_utils_fp8) prepare_object_to_dump() (in module vllm.logging_utils.dump_input) prepare_prefill_hidden_states() (in module vllm.spec_decode.spec_decode_worker) prepare_structured_decoding_input() (vllm.v1.worker.tpu_model_runner.TPUModelRunner method) prepare_tensors() (vllm.lora.ops.triton_ops.lora_kernel_metadata.LoRAKernelMeta method) prepare_worker_input() (vllm.worker.cpu_worker.CPUWorker method) (vllm.worker.hpu_worker.HPUWorker method) (vllm.worker.neuron_worker.NeuronWorker method) (vllm.worker.tpu_worker.TPUWorker method) (vllm.worker.worker.Worker method) (vllm.worker.worker_base.LocalOrDistributedWorkerBase method) PrepareDecodeMetadata (class in vllm.worker.hpu_model_runner) PreparePromptMetadata (class in vllm.worker.hpu_model_runner) preprocess() (vllm.inputs.preprocess.InputPreprocessor method) preprocess_async() (vllm.inputs.preprocess.InputPreprocessor method) preprocess_image() (vllm.transformers_utils.processors.ovis.OvisProcessor method) presence_penalties (vllm.model_executor.sampling_metadata.SamplingTensors attribute) (vllm.v1.sample.metadata.SamplingMetadata attribute) (vllm.v1.sample.tpu.metadata.TPUSupportedSamplingMetadata attribute) presence_penalty (vllm.entrypoints.openai.protocol.ChatCompletionRequest attribute) (vllm.entrypoints.openai.protocol.CompletionRequest attribute) (vllm.entrypoints.openai.protocol.TranscriptionRequest attribute) (vllm.sampling_params.SamplingParams attribute) pretty_str() (in module vllm.collect_env) prev_block (vllm.core.block.cpu_gpu_block_allocator.NullBlock property) (vllm.core.block.interfaces.Block property) (vllm.core.block.naive_block.NaiveBlock property) (vllm.core.block.prefix_caching_block.PrefixCachingBlock property) prev_free_block (vllm.v1.core.kv_cache_utils.KVCacheBlock attribute) prev_rank (vllm.distributed.parallel_state.GroupCoordinator property) prev_set_stream (in module vllm.utils) previous_hidden_states (vllm.sequence.ExecuteModelRequest attribute) (vllm.worker.model_runner.ModelInputForGPU attribute) print_model_table() (vllm.profiler.layerwise_profile.LayerwiseProfileResults method) print_summary_table() (vllm.profiler.layerwise_profile.LayerwiseProfileResults method) print_table() (vllm.profiler.utils.TablePrinter method) printed_error (vllm.v1.structured_output.backend_guidance.GuidanceGrammar attribute) PrinterInductorPass (class in vllm.compilation.vllm_inductor_pass) priority (vllm.engine.multiprocessing.RPCProcessRequest attribute) (vllm.entrypoints.openai.protocol.ChatCompletionRequest attribute) (vllm.entrypoints.openai.protocol.ClassificationRequest attribute) (vllm.entrypoints.openai.protocol.CompletionRequest attribute) (vllm.entrypoints.openai.protocol.EmbeddingChatRequest attribute) (vllm.entrypoints.openai.protocol.EmbeddingCompletionRequest attribute) (vllm.entrypoints.openai.protocol.RerankRequest attribute) (vllm.entrypoints.openai.protocol.ScoreRequest attribute) PrithviGeoSpatialMAE (class in vllm.model_executor.models.prithvi_geospatial_mae) PrithviGeoSpatialMAEInputBuilder (class in vllm.model_executor.models.prithvi_geospatial_mae) PrithviGeoSpatialMAEMultiModalProcessor (class in vllm.model_executor.models.prithvi_geospatial_mae) PrithviGeoSpatialMAEProcessingInfo (class in vllm.model_executor.models.prithvi_geospatial_mae) probs (vllm.entrypoints.openai.protocol.ClassificationData attribute) (vllm.outputs.ClassificationOutput attribute) (vllm.spec_decode.interfaces.SpeculativeScores attribute) probs_dtype (vllm.model_executor.layers.spec_decode_base_sampler.SpecDecodeBaseSampler property) proc (vllm.v1.executor.multiproc_executor.UnreadyWorkerProcHandle attribute) (vllm.v1.executor.multiproc_executor.WorkerProcHandle attribute) process() (vllm.compilation.fusion.FusedAddRMSNormDynamicQuantPattern.Match method) (vllm.compilation.fusion.FusedAddRMSNormStaticQuantPattern.Match method) (vllm.compilation.fusion.RMSNormDynamicQuantPattern.Match method) (vllm.compilation.multi_output_match.MultiOutputMatch method) process_audios() (vllm.model_executor.models.minicpmo.MiniCPMOMultiModalProcessor method) process_engine_outputs() (vllm.v1.engine.core_client.DPAsyncMPClient static method) process_exception() (vllm.engine.async_llm_engine.RequestTracker method) process_for_additional_properties() (in module vllm.v1.structured_output.backend_guidance) process_image() (in module vllm.benchmarks.datasets) process_images() (vllm.model_executor.models.minicpmv.MiniCPMVMultiModalProcessor method) process_input_socket() (vllm.v1.engine.core.EngineCoreProc method) process_inputs() (vllm.v1.engine.processor.Processor method) process_matches() (vllm.compilation.fusion.FusionPass method) process_mm_inputs() (vllm.model_executor.models.minicpmo.MiniCPMOMultiModalProcessor method) (vllm.model_executor.models.minicpmv.MiniCPMVMultiModalProcessor method) process_one() (vllm.transformers_utils.processors.deepseek_vl2.DeepseekVLV2Processor method) process_output_socket() (vllm.v1.engine.core.EngineCoreProc method) process_outputs() (vllm.engine.output_processor.interfaces.SequenceGroupOutputProcessor method) (vllm.engine.output_processor.multi_step.MultiStepOutputProcessor method) (vllm.engine.output_processor.single_step.SingleStepOutputProcessor method) (vllm.v1.engine.output_processor.OutputProcessor method) process_prompt_logprob() (vllm.engine.output_processor.interfaces.SequenceGroupOutputProcessor method) (vllm.engine.output_processor.multi_step.MultiStepOutputProcessor method) (vllm.engine.output_processor.single_step.SingleStepOutputProcessor method) process_request_output() (vllm.engine.async_llm_engine.RequestTracker method) process_request_outputs() (vllm.engine.async_llm_engine.AsyncLLMEngine method) process_videos() (vllm.model_executor.models.minicpmv.MiniCPMVMultiModalProcessor method) process_weights_after_loading() (in module vllm.model_executor.model_loader.utils) (vllm.attention.backends.mla.common.MLACommonImpl method) (vllm.attention.layer.Attention method) (vllm.model_executor.layers.fused_moe.layer.UnquantizedFusedMoEMethod method) (vllm.model_executor.layers.linear.QKVCrossParallelLinear method) (vllm.model_executor.layers.quantization.awq.AWQLinearMethod method) (vllm.model_executor.layers.quantization.awq_marlin.AWQMarlinLinearMethod method) (vllm.model_executor.layers.quantization.awq_marlin.AWQMoEMethod method) (vllm.model_executor.layers.quantization.base_config.QuantizeMethodBase method) (vllm.model_executor.layers.quantization.compressed_tensors.compressed_tensors.CompressedTensorsLinearMethod method) (vllm.model_executor.layers.quantization.compressed_tensors.compressed_tensors_moe.CompressedTensorsW8A8Fp8MoECutlassMethod method) (vllm.model_executor.layers.quantization.compressed_tensors.compressed_tensors_moe.CompressedTensorsW8A8Fp8MoEMethod method) (vllm.model_executor.layers.quantization.compressed_tensors.compressed_tensors_moe.CompressedTensorsW8A8Int8MoEMethod method) (vllm.model_executor.layers.quantization.compressed_tensors.compressed_tensors_moe.CompressedTensorsWNA16MarlinMoEMethod method) (vllm.model_executor.layers.quantization.compressed_tensors.compressed_tensors_moe.CompressedTensorsWNA16MoEMethod method) (vllm.model_executor.layers.quantization.compressed_tensors.schemes.compressed_tensors_24.CompressedTensors24 method) (vllm.model_executor.layers.quantization.compressed_tensors.schemes.compressed_tensors_scheme.CompressedTensorsScheme method) (vllm.model_executor.layers.quantization.compressed_tensors.schemes.compressed_tensors_w4a16_24.CompressedTensorsW4A16Sparse24 method) (vllm.model_executor.layers.quantization.compressed_tensors.schemes.compressed_tensors_w4a16_nvfp4.CompressedTensorsW4A16Fp4 method) (vllm.model_executor.layers.quantization.compressed_tensors.schemes.compressed_tensors_w8a16_fp8.CompressedTensorsW8A16Fp8 method) (vllm.model_executor.layers.quantization.compressed_tensors.schemes.compressed_tensors_w8a8_fp8.CompressedTensorsW8A8Fp8 method) (vllm.model_executor.layers.quantization.compressed_tensors.schemes.compressed_tensors_w8a8_int8.CompressedTensorsW8A8Int8 method) (vllm.model_executor.layers.quantization.compressed_tensors.schemes.compressed_tensors_wNa16.CompressedTensorsWNA16 method) (vllm.model_executor.layers.quantization.fbgemm_fp8.FBGEMMFp8LinearMethod method) (vllm.model_executor.layers.quantization.fp8.Fp8LinearMethod method) (vllm.model_executor.layers.quantization.fp8.Fp8MoEMethod method) (vllm.model_executor.layers.quantization.gptq.GPTQLinearMethod method) (vllm.model_executor.layers.quantization.gptq_bitblas.GPTQBitBLASLinearMethod method) (vllm.model_executor.layers.quantization.gptq_marlin.GPTQMarlinLinearMethod method) (vllm.model_executor.layers.quantization.gptq_marlin.GPTQMarlinMoEMethod method) (vllm.model_executor.layers.quantization.gptq_marlin_24.GPTQMarlin24LinearMethod method) (vllm.model_executor.layers.quantization.hqq_marlin.HQQMarlinMethod method) (vllm.model_executor.layers.quantization.ipex_quant.IPEXAWQLinearMethod method) (vllm.model_executor.layers.quantization.ipex_quant.IPEXGPTQLinearMethod method) (vllm.model_executor.layers.quantization.kernels.mixed_precision.allspark.AllSparkLinearKernel method) (vllm.model_executor.layers.quantization.kernels.mixed_precision.bitblas.BitBLASLinearKernel method) (vllm.model_executor.layers.quantization.kernels.mixed_precision.exllama.ExllamaLinearKernel method) (vllm.model_executor.layers.quantization.kernels.mixed_precision.machete.MacheteLinearKernel method) (vllm.model_executor.layers.quantization.kernels.mixed_precision.marlin.MarlinLinearKernel method) (vllm.model_executor.layers.quantization.kernels.mixed_precision.MPLinearKernel.MPLinearKernel method) (vllm.model_executor.layers.quantization.kernels.scaled_mm.aiter.AiterScaledMMLinearKernel method) (vllm.model_executor.layers.quantization.kernels.scaled_mm.cutlass.CutlassScaledMMLinearKernel method) (vllm.model_executor.layers.quantization.kernels.scaled_mm.ScaledMMLinearKernel.ScaledMMLinearKernel method) (vllm.model_executor.layers.quantization.kernels.scaled_mm.triton.TritonScaledMMLinearKernel method) (vllm.model_executor.layers.quantization.kernels.scaled_mm.xla.XLAScaledMMLinearKernel method) (vllm.model_executor.layers.quantization.kv_cache.BaseKVCacheMethod method) (vllm.model_executor.layers.quantization.marlin.MarlinLinearMethod method) (vllm.model_executor.layers.quantization.modelopt.ModelOptFp8LinearMethod method) (vllm.model_executor.layers.quantization.modelopt.ModelOptNvFp4FusedMoE method) (vllm.model_executor.layers.quantization.modelopt.ModelOptNvFp4LinearMethod method) (vllm.model_executor.layers.quantization.ptpc_fp8.PTPCFp8LinearMethod method) (vllm.model_executor.layers.quantization.qqq.QQQLinearMethod method) (vllm.model_executor.layers.quantization.quark.quark.QuarkLinearMethod method) (vllm.model_executor.layers.quantization.quark.quark_moe.QuarkW8A8Fp8MoEMethod method) (vllm.model_executor.layers.quantization.quark.schemes.quark_scheme.QuarkScheme method) (vllm.model_executor.layers.quantization.quark.schemes.quark_w4a4_mxfp4.QuarkW4A4MXFP4 method) (vllm.model_executor.layers.quantization.quark.schemes.quark_w8a8_fp8.QuarkW8A8Fp8 method) (vllm.model_executor.layers.quantization.quark.schemes.quark_w8a8_int8.QuarkW8A8Int8 method) (vllm.model_executor.layers.quantization.tpu_int8.TPUInt8LinearMethod method) (vllm.v1.attention.backends.mla.common.MLACommonImpl method) processing_info (vllm.multimodal.profiling.MultiModalProfiler property) ProcessingCache (class in vllm.multimodal.processing) ProcessingCacheItem (class in vllm.multimodal.processing) ProcessingCacheOptionalItem (class in vllm.multimodal.processing) ProcessingInfoFactory (class in vllm.multimodal.registry) Processor (class in vllm.v1.engine.processor) ProcessorBatchItems (class in vllm.multimodal.parse) ProcessorInputs (class in vllm.multimodal.profiling) (in module vllm.inputs.data) ProcessWorkerWrapper (class in vllm.executor.multiproc_worker_utils) produce_guards_expression() (vllm.compilation.compiler_interface.AlwaysHitShapeEnv method) producer() (in module vllm.distributed.device_communicators.custom_all_reduce_utils) profile() (vllm.v1.engine.core.EngineCore method) (vllm.v1.engine.core_client.EngineCoreClient method) (vllm.v1.engine.core_client.InprocClient method) (vllm.v1.engine.core_client.SyncMPClient method) (vllm.v1.executor.abstract.Executor method) (vllm.v1.worker.gpu_worker.Worker method) (vllm.v1.worker.tpu_worker.TPUWorker method) profile_async() (vllm.v1.engine.core_client.AsyncMPClient method) (vllm.v1.engine.core_client.EngineCoreClient method) profile_run() (vllm.v1.worker.gpu_model_runner.GPUModelRunner method) (vllm.v1.worker.tpu_model_runner.TPUModelRunner method) (vllm.worker.enc_dec_model_runner.EncoderDecoderModelRunner method) (vllm.worker.hpu_model_runner.HPUModelRunnerBase method) (vllm.worker.model_runner.GPUModelRunnerBase method) (vllm.worker.multi_step_model_runner.MultiStepModelRunner method) (vllm.worker.xpu_model_runner.XPUModelRunner method) profile_time (vllm.utils.MemoryProfilingResult attribute) projector_config (vllm.transformers_utils.configs.deepseek_vl2.DeepseekVLV2Config attribute) projector_type (vllm.transformers_utils.configs.deepseek_vl2.MlpProjectorConfig attribute) prometheus_multiproc_dir (in module vllm.entrypoints.openai.api_server) PrometheusStatLogger (class in vllm.engine.metrics) (class in vllm.v1.metrics.loggers) promote_to_immutable_block() (vllm.core.block.interfaces.BlockAllocator method) (vllm.core.block.naive_block.NaiveBlockAllocator method) (vllm.core.block.prefix_caching_block.PrefixCachingBlockAllocator method) prompt (vllm.benchmarks.datasets.SampleRequest attribute) (vllm.benchmarks.endpoint_request_func.RequestFuncInput attribute) (vllm.engine.multiprocessing.RPCProcessRequest attribute) (vllm.entrypoints.openai.protocol.CompletionRequest attribute) (vllm.entrypoints.openai.protocol.DetokenizeResponse attribute) (vllm.entrypoints.openai.protocol.TokenizeCompletionRequest attribute) (vllm.entrypoints.openai.protocol.TranscriptionRequest attribute) (vllm.entrypoints.openai.serving_engine.TextTokensPrompt attribute) (vllm.inputs.data.TextPrompt attribute) (vllm.inputs.data.TokenInputs attribute) (vllm.multimodal.inputs.MultiModalInputs attribute) (vllm.sequence.Sequence property) (vllm.sequence.SequenceGroup property) prompt_adapter_config (vllm.config.VllmConfig attribute) prompt_adapter_dtype (vllm.config.PromptAdapterConfig attribute) prompt_adapter_id (vllm.prompt_adapter.request.PromptAdapterRequest attribute) (vllm.sequence.Sequence property) (vllm.sequence.SequenceGroup property) (vllm.sequence.SequenceGroupMetadata property) prompt_adapter_local_path (vllm.prompt_adapter.request.PromptAdapterRequest attribute) prompt_adapter_mapping (vllm.worker.model_runner.ModelInputForGPU attribute) prompt_adapter_name (vllm.prompt_adapter.request.PromptAdapterRequest attribute) prompt_adapter_num_virtual_tokens (vllm.prompt_adapter.request.PromptAdapterRequest attribute) (vllm.sequence.SequenceGroup property) (vllm.sequence.SequenceGroupMetadata property) prompt_adapter_request (vllm.engine.multiprocessing.RPCProcessRequest attribute) (vllm.entrypoints.openai.serving_engine.ServeContext attribute) (vllm.sequence.SequenceGroupMetadata attribute) prompt_adapter_requests (vllm.core.scheduler.SchedulerOutputs property) (vllm.worker.model_runner.ModelInputForGPU attribute) prompt_adapter_slots (vllm.prompt_adapter.models.PromptAdapterModelManager property) prompt_embeds (vllm.inputs.data.EmbedsInputs attribute) (vllm.inputs.data.EmbedsPrompt attribute) (vllm.sequence.SequenceData property) prompt_len (vllm.benchmarks.datasets.SampleRequest attribute) (vllm.benchmarks.endpoint_request_func.RequestFuncInput attribute) (vllm.benchmarks.endpoint_request_func.RequestFuncOutput attribute) prompt_lens (vllm.model_executor.pooling_metadata.PoolingTensors attribute) prompt_logprob_indices (vllm.model_executor.sampling_metadata.SequenceGroupToSample attribute) prompt_logprobs (vllm.entrypoints.openai.protocol.ChatCompletionRequest attribute) (vllm.entrypoints.openai.protocol.ChatCompletionResponse attribute) (vllm.entrypoints.openai.protocol.CompletionRequest attribute) (vllm.entrypoints.openai.protocol.CompletionResponseChoice attribute) (vllm.sampling_params.SamplingParams attribute) (vllm.sequence.CompletionSequenceGroupOutput attribute) (vllm.spec_decode.interfaces.SpeculativeScores attribute) (vllm.v1.engine.logprobs.LogprobsProcessor attribute) prompt_logprobs_dict (vllm.v1.outputs.ModelRunnerOutput attribute) prompt_lookup_max (vllm.config.SpeculativeConfig attribute) prompt_lookup_min (vllm.config.SpeculativeConfig attribute) prompt_mapping (vllm.adapter_commons.layers.AdapterMapping attribute) prompt_text (vllm.multimodal.profiling.ProcessorInputs attribute) prompt_token_ids (vllm.entrypoints.openai.serving_engine.TextTokensPrompt attribute) (vllm.inputs.data.TokenInputs attribute) (vllm.inputs.data.TokensPrompt attribute) (vllm.multimodal.inputs.MultiModalInputs attribute) (vllm.multimodal.profiling.DummyDecoderData attribute) (vllm.multimodal.profiling.DummyEncoderData attribute) (vllm.sequence.Sequence property) (vllm.sequence.SequenceData property) (vllm.sequence.SequenceGroup property) (vllm.v1.core.sched.output.NewRequestData attribute) (vllm.v1.engine.EngineCoreRequest attribute) (vllm.v1.sample.metadata.SamplingMetadata attribute) (vllm.v1.sample.tpu.metadata.TPUSupportedSamplingMetadata attribute) (vllm.v1.worker.gpu_input_batch.CachedRequestState attribute) prompt_token_ids_array (vllm.sequence.SequenceData property) prompt_tokens (vllm.entrypoints.openai.protocol.UsageInfo attribute) (vllm.model_executor.sampling_metadata.SamplingTensors attribute) prompt_tokens_details (vllm.entrypoints.openai.protocol.UsageInfo attribute) PromptAdapterConfig (class in vllm.config) PromptAdapterLRUCache (class in vllm.prompt_adapter.models) PromptAdapterMapping (class in vllm.prompt_adapter.layers) PromptAdapterModel (class in vllm.prompt_adapter.models) PromptAdapterModelManager (class in vllm.prompt_adapter.models) PromptAdapterParserAction (class in vllm.entrypoints.openai.cli_args) PromptAdapterPath (class in vllm.entrypoints.openai.serving_models) PromptAdapterRequest (class in vllm.prompt_adapter.request) PromptIndex (class in vllm.multimodal.processing) PromptIndexTargets (class in vllm.multimodal.processing) PromptInsertion (class in vllm.multimodal.processing) PromptLogprobs (in module vllm.sequence) PromptReplacement (class in vllm.multimodal.processing) PromptSeq (in module vllm.multimodal.processing) PromptTarget (in module vllm.multimodal.processing) PromptTargetMatch (class in vllm.multimodal.processing) PromptTokenUsageInfo (class in vllm.entrypoints.openai.protocol) PromptType (in module vllm.inputs.data) PromptUpdate (class in vllm.multimodal.processing) PromptUpdateContent (in module vllm.multimodal.processing) PromptUpdateDetails (class in vllm.multimodal.processing) PromptUpdateInfo (in module vllm.multimodal.processing) propagate_error() (vllm.v1.engine.output_processor.OutputProcessor method) propagate_exception() (vllm.engine.async_llm_engine.RequestTracker method) proposal_lens (vllm.spec_decode.interfaces.SpeculativeProposals attribute) proposal_probs (vllm.spec_decode.interfaces.SpeculativeProposals attribute) proposal_token_ids (vllm.spec_decode.interfaces.SpeculativeProposals attribute) propose() (vllm.v1.spec_decode.eagle.EagleProposer method) (vllm.v1.spec_decode.ngram_proposer.NgramProposer method) ProposerWorkerBase (class in vllm.spec_decode.proposer_worker_base) protocol (vllm.distributed.kv_transfer.kv_lookup_buffer.mooncake_store.MooncakeStoreConfig attribute) (vllm.distributed.kv_transfer.kv_pipe.mooncake_pipe.MooncakeTransferEngineConfig attribute) prune() (vllm.sequence.HiddenStates method) PT (vllm.config.LoadFormat attribute) pt_load_map_location (vllm.config.LoadConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) pt_weights_iterator() (in module vllm.model_executor.model_loader.weight_utils) PTPCFp8Config (class in vllm.model_executor.layers.quantization.ptpc_fp8) PTPCFp8LinearMethod (class in vllm.model_executor.layers.quantization.ptpc_fp8) publish() (vllm.distributed.kv_events.EventPublisher method) (vllm.distributed.kv_events.NullEventPublisher method) (vllm.distributed.kv_events.ZmqEventPublisher method) publisher (vllm.config.KVEventsConfig attribute) pull_files() (vllm.transformers_utils.s3_utils.S3Model method) PunicaWrapperABC (class in vllm.lora.punica_wrapper.punica_base) PunicaWrapperBase (class in vllm.lora.punica_wrapper.punica_base) PunicaWrapperCPU (class in vllm.lora.punica_wrapper.punica_cpu) PunicaWrapperGPU (class in vllm.lora.punica_wrapper.punica_gpu) PunicaWrapperHPU (class in vllm.lora.punica_wrapper.punica_hpu) PunicaWrapperTPU (class in vllm.lora.punica_wrapper.punica_tpu) put() (vllm.distributed.kv_transfer.kv_lookup_buffer.base.KVStoreBufferBase method) (vllm.distributed.kv_transfer.kv_lookup_buffer.mooncake_store.MooncakeStore method) (vllm.engine.async_llm_engine.AsyncStream method) (vllm.multimodal.processing.ProcessingCache method) (vllm.utils.LRUCache method) (vllm.v1.engine.output_processor.RequestOutputCollector method) put_item() (vllm.multimodal.processing.ProcessingCache method) put_kv_to_cache() (vllm.distributed.kv_transfer.kv_connector.utils.model_aware_kv_ops_helper method) PyNcclCommunicator (class in vllm.distributed.device_communicators.pynccl) PyNcclPipe (class in vllm.distributed.kv_transfer.kv_pipe.pynccl_pipe) pynvml (in module vllm.platforms.cuda) PyObjectCache (class in vllm.utils) python_free_callback() (vllm.device_allocator.cumem.CuMemAllocator method) python_malloc_callback() (vllm.device_allocator.cumem.CuMemAllocator method) PythonicToolParser (class in vllm.entrypoints.openai.tool_parsers.pythonic_tool_parser) pythonization_cache (vllm.worker.multi_step_model_runner.ModelOutput attribute) PythonizationCache (class in vllm.worker.multi_step_model_runner) pythonize() (vllm.worker.multi_step_model_runner.ModelOutput method) pythonized (vllm.worker.multi_step_model_runner.ModelOutput attribute) Q q_bits (vllm.transformers_utils.configs.arctic.ArcticQuantizationConfig attribute) q_data_type (vllm.attention.backends.flashinfer.FlashInferMetadata attribute) (vllm.v1.attention.backends.flashinfer.FlashInferMetadata attribute) q_lora_rank (vllm.attention.backends.utils.MLADims attribute) q_proj (vllm.model_executor.models.module_mapping.ModelKeys attribute) q_proj_decoder (vllm.model_executor.layers.linear.QKVCrossParallelLinear property) qa_proj (vllm.model_executor.models.module_mapping.ModelKeys attribute) qb_proj (vllm.model_executor.models.module_mapping.ModelKeys attribute) qk_nope_head_dim (vllm.attention.backends.utils.MLADims attribute) qk_proj (vllm.model_executor.models.module_mapping.ModelKeys attribute) qk_rope_head_dim (vllm.attention.backends.utils.MLADims attribute) qkv_proj (vllm.model_executor.models.module_mapping.ModelKeys attribute) QKVCrossParallelLinear (class in vllm.model_executor.layers.linear) QKVCrossParallelLinearWithLoRA (class in vllm.lora.layers) QKVParallelLinear (class in vllm.model_executor.layers.linear) QKVParallelLinearWithLoRA (class in vllm.lora.layers) QKVParallelLinearWithShardedLoRA (class in vllm.lora.fully_sharded_layers) qlora_adapter_name_or_path (vllm.engine.arg_utils.EngineArgs attribute) qo_indptr (vllm.v1.attention.backends.flashinfer.FlashInferMetadata attribute) qqq_quantize_weights() (in module vllm.model_executor.layers.quantization.utils.quant_utils) QQQConfig (class in vllm.model_executor.layers.quantization.qqq) QQQLinearMethod (class in vllm.model_executor.layers.quantization.qqq) qualname (vllm.entrypoints.openai.protocol.LogitsProcessorConstructor attribute) quant (vllm.compilation.fusion.FusedRMSQuantKey attribute) QUANT_ALGOS (in module vllm.model_executor.layers.quantization.modelopt) quant_config (vllm.config.VllmConfig attribute) (vllm.model_executor.models.interfaces.SupportsQuant attribute) quant_fp8() (in module vllm.attention.ops.triton_flash_attention) QUANT_OPS (in module vllm.compilation.fusion) quantization (vllm.config.ModelConfig attribute) (vllm.config.SpeculativeConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) QUANTIZATION_METHODS (in module vllm.model_executor.layers.quantization) QUANTIZATION_SCHEME_MAP_TYPE (in module vllm.model_executor.layers.quantization.compressed_tensors.compressed_tensors) QuantizationConfig (class in vllm.model_executor.layers.quantization.base_config) QuantizationMethods (in module vllm.model_executor.layers.quantization) quantize_in_place_and_get_scales() (in module vllm.model_executor.layers.quantization.experts_int8) quantize_weights() (in module vllm.model_executor.layers.quantization.utils.quant_utils) QuantizeMethodBase (class in vllm.model_executor.layers.quantization.base_config) quantizing_weight_loader() (vllm.model_executor.layers.quantization.experts_int8.ExpertsInt8MoEMethod static method) QuantKey (class in vllm.compilation.fusion) QuantMultiOutputMatch (class in vllm.compilation.fusion) QuantParamSchema (class in vllm.model_executor.layers.quantization.schema) QuarkConfig (class in vllm.model_executor.layers.quantization.quark.quark) QuarkKVCacheMethod (class in vllm.model_executor.layers.quantization.quark.quark) QuarkLinearMethod (class in vllm.model_executor.layers.quantization.quark.quark) QuarkMoEMethod (class in vllm.model_executor.layers.quantization.quark.quark_moe) QuarkScheme (class in vllm.model_executor.layers.quantization.quark.schemes.quark_scheme) QuarkW4A4MXFP4 (class in vllm.model_executor.layers.quantization.quark.schemes.quark_w4a4_mxfp4) QuarkW8A8Fp8 (class in vllm.model_executor.layers.quantization.quark.schemes.quark_w8a8_fp8) QuarkW8A8Fp8MoEMethod (class in vllm.model_executor.layers.quantization.quark.quark_moe) QuarkW8A8Int8 (class in vllm.model_executor.layers.quantization.quark.schemes.quark_w8a8_int8) queries (vllm.v1.metrics.stats.PrefixCacheStats attribute) query (vllm.entrypoints.openai.protocol.RerankRequest attribute) query() (vllm.core.block.common.CacheMetricData method) query_bitblas_supported_quant_types() (in module vllm.model_executor.layers.quantization.utils.bitblas_utils) query_len (vllm.model_executor.sampling_metadata.SequenceGroupToSample attribute) query_lens (vllm.worker.cpu_model_runner.ModelInputForCPU attribute) (vllm.worker.hpu_model_runner.ModelInputForHPU attribute) (vllm.worker.hpu_model_runner.PreparePromptMetadata attribute) (vllm.worker.model_runner.ModelInputForGPU attribute) (vllm.worker.xpu_model_runner.ModelInputForXPU attribute) query_machete_supported_act_types() (in module vllm.model_executor.layers.quantization.utils.machete_utils) query_machete_supported_quant_types() (in module vllm.model_executor.layers.quantization.utils.machete_utils) query_marlin_supported_quant_types() (in module vllm.model_executor.layers.quantization.utils.marlin_utils) query_start_loc (vllm.attention.backends.blocksparse_attn.BlocksparseFlashAttentionMetadata attribute) (vllm.attention.backends.flash_attn.FlashAttentionMetadata attribute) (vllm.attention.backends.flashinfer.FlashInferMetadata attribute) (vllm.attention.backends.mla.common.MLACommonMetadata attribute) (vllm.attention.backends.placeholder_attn.PlaceholderAttentionMetadata attribute) (vllm.attention.backends.rocm_flash_attn.ROCmFlashAttentionMetadata attribute) (vllm.attention.backends.torch_sdpa.TorchSDPAMetadata attribute) (vllm.attention.backends.xformers.XFormersMetadata attribute) (vllm.v1.attention.backends.flash_attn.FlashAttentionMetadata attribute) (vllm.v1.attention.backends.flashinfer.FlashInferMetadata property) (vllm.v1.attention.backends.mla.common.MLACommonMetadata attribute) (vllm.v1.attention.backends.mla.common.MLACommonPrefillMetadata attribute) (vllm.v1.attention.backends.pallas.PallasMetadata attribute) (vllm.v1.attention.backends.utils.CommonAttentionMetadata attribute) queue_duration_s (vllm.v1.stats.common.RequestStats property) QUEUED (vllm.v1.engine.EngineCoreEventType attribute) (vllm.v1.stats.common.RequestStatsUpdate.Type attribute) queued_time (vllm.v1.metrics.stats.FinishedRequestStats attribute) queued_ts (vllm.v1.metrics.stats.RequestStateStats attribute) queued_ts_s (vllm.v1.stats.common.RequestStats attribute) quick_gelu() (in module vllm.model_executor.models.phi3_small) QuickGELU (class in vllm.model_executor.layers.activation) Qwen2_5_VisionAttention (class in vllm.model_executor.models.qwen2_5_vl) Qwen2_5_VisionBlock (class in vllm.model_executor.models.qwen2_5_vl) Qwen2_5_VisionMLP (class in vllm.model_executor.models.qwen2_5_vl) Qwen2_5_VisionPatchEmbed (class in vllm.model_executor.models.qwen2_5_vl) Qwen2_5_VisionPatchMerger (class in vllm.model_executor.models.qwen2_5_vl) Qwen2_5_VisionRotaryEmbedding (class in vllm.model_executor.models.qwen2_5_vl) Qwen2_5_VisionTransformer (class in vllm.model_executor.models.qwen2_5_vl) Qwen2_5_VLForConditionalGeneration (class in vllm.model_executor.models.qwen2_5_vl) Qwen2_5_VLImageEmbeddingInputs (class in vllm.model_executor.models.qwen2_5_vl) Qwen2_5_VLImageInputs (in module vllm.model_executor.models.qwen2_5_vl) Qwen2_5_VLImagePixelInputs (class in vllm.model_executor.models.qwen2_5_vl) Qwen2_5_VLMultiModalProcessor (class in vllm.model_executor.models.qwen2_5_vl) Qwen2_5_VLProcessingInfo (class in vllm.model_executor.models.qwen2_5_vl) Qwen2_5_VLVideoEmbeddingInputs (class in vllm.model_executor.models.qwen2_5_vl) Qwen2_5_VLVideoInputs (in module vllm.model_executor.models.qwen2_5_vl) Qwen2_5_VLVideoPixelInputs (class in vllm.model_executor.models.qwen2_5_vl) Qwen2_5OmniConditionalGenerationMixin (class in vllm.model_executor.models.qwen2_5_omni_thinker) Qwen2_5OmniThinkerDummyInputsBuilder (class in vllm.model_executor.models.qwen2_5_omni_thinker) Qwen2_5OmniThinkerForConditionalGeneration (class in vllm.model_executor.models.qwen2_5_omni_thinker) Qwen2_5OmniThinkerMultiModalDataParser (class in vllm.model_executor.models.qwen2_5_omni_thinker) Qwen2_5OmniThinkerMultiModalProcessor (class in vllm.model_executor.models.qwen2_5_omni_thinker) Qwen2_5OmniThinkerProcessingInfo (class in vllm.model_executor.models.qwen2_5_omni_thinker) Qwen2Attention (class in vllm.model_executor.models.qwen2) Qwen2AudioDummyInputsBuilder (class in vllm.model_executor.models.qwen2_audio) Qwen2AudioForConditionalGeneration (class in vllm.model_executor.models.qwen2_audio) Qwen2AudioInputs (class in vllm.model_executor.models.qwen2_audio) Qwen2AudioMultiModalProcessor (class in vllm.model_executor.models.qwen2_audio) Qwen2AudioMultiModalProjector (class in vllm.model_executor.models.qwen2_audio) Qwen2AudioProcessingInfo (class in vllm.model_executor.models.qwen2_audio) Qwen2DecoderLayer (class in vllm.model_executor.models.qwen2) Qwen2EmbeddingModel (class in vllm.model_executor.models.qwen2) Qwen2ForCausalLM (class in vllm.model_executor.models.qwen2) Qwen2ForProcessRewardModel (class in vllm.model_executor.models.qwen2_rm) Qwen2ForRewardModel (class in vllm.model_executor.models.qwen2_rm) Qwen2MLP (class in vllm.model_executor.models.qwen2) Qwen2Model (class in vllm.model_executor.models.qwen2) Qwen2MoeAttention (class in vllm.model_executor.models.qwen2_moe) Qwen2MoeDecoderLayer (class in vllm.model_executor.models.qwen2_moe) Qwen2MoeForCausalLM (class in vllm.model_executor.models.qwen2_moe) Qwen2MoeMLP (class in vllm.model_executor.models.qwen2_moe) Qwen2MoeModel (class in vllm.model_executor.models.qwen2_moe) Qwen2MoeSparseMoeBlock (class in vllm.model_executor.models.qwen2_moe) Qwen2RewardBaseModel (class in vllm.model_executor.models.qwen2_rm) Qwen2VisionAttention (class in vllm.model_executor.models.qwen2_vl) Qwen2VisionBlock (class in vllm.model_executor.models.qwen2_vl) Qwen2VisionMLP (class in vllm.model_executor.models.qwen2_vl) Qwen2VisionPatchEmbed (class in vllm.model_executor.models.qwen2_vl) Qwen2VisionPatchMerger (class in vllm.model_executor.models.qwen2_vl) Qwen2VisionRotaryEmbedding (class in vllm.model_executor.models.qwen2_vl) Qwen2VisionTransformer (class in vllm.model_executor.models.qwen2_vl) Qwen2VLDummyInputsBuilder (class in vllm.model_executor.models.qwen2_vl) Qwen2VLForConditionalGeneration (class in vllm.model_executor.models.qwen2_vl) Qwen2VLImageEmbeddingInputs (class in vllm.model_executor.models.qwen2_vl) Qwen2VLImageInputs (in module vllm.model_executor.models.qwen2_vl) Qwen2VLImagePixelInputs (class in vllm.model_executor.models.qwen2_vl) Qwen2VLMultiModalDataParser (class in vllm.model_executor.models.qwen2_vl) Qwen2VLMultiModalProcessor (class in vllm.model_executor.models.qwen2_vl) Qwen2VLProcessingInfo (class in vllm.model_executor.models.qwen2_vl) Qwen2VLVideoEmbeddingInputs (class in vllm.model_executor.models.qwen2_vl) Qwen2VLVideoInputs (in module vllm.model_executor.models.qwen2_vl) Qwen2VLVideoPixelInputs (class in vllm.model_executor.models.qwen2_vl) Qwen3Attention (class in vllm.model_executor.models.qwen3) Qwen3DecoderLayer (class in vllm.model_executor.models.qwen3) Qwen3ForCausalLM (class in vllm.model_executor.models.qwen3) Qwen3Model (class in vllm.model_executor.models.qwen3) Qwen3MoeAttention (class in vllm.model_executor.models.qwen3_moe) Qwen3MoeDecoderLayer (class in vllm.model_executor.models.qwen3_moe) Qwen3MoeForCausalLM (class in vllm.model_executor.models.qwen3_moe) Qwen3MoeMLP (class in vllm.model_executor.models.qwen3_moe) Qwen3MoeModel (class in vllm.model_executor.models.qwen3_moe) Qwen3MoeSparseMoeBlock (class in vllm.model_executor.models.qwen3_moe) Qwen3ReasoningParser (class in vllm.reasoning.qwen3_reasoning_parser) QWenAttention (class in vllm.model_executor.models.qwen) QWenBaseModel (class in vllm.model_executor.models.qwen) QWenBlock (class in vllm.model_executor.models.qwen) QwenImageEmbeddingInputs (class in vllm.model_executor.models.qwen_vl) QwenImageInputs (in module vllm.model_executor.models.qwen_vl) QwenImagePixelInputs (class in vllm.model_executor.models.qwen_vl) QWenLMHeadModel (class in vllm.model_executor.models.qwen) QWenMLP (class in vllm.model_executor.models.qwen) QWenModel (class in vllm.model_executor.models.qwen) QwenVLDummyInputsBuilder (class in vllm.model_executor.models.qwen_vl) QwenVLForConditionalGeneration (class in vllm.model_executor.models.qwen_vl) QwenVLMLP (class in vllm.model_executor.models.qwen_vl) QwenVLModel (class in vllm.model_executor.models.qwen_vl) QwenVLMultiModalProcessor (class in vllm.model_executor.models.qwen_vl) QwenVLProcessingInfo (class in vllm.model_executor.models.qwen_vl) QwenVLProcessor (class in vllm.model_executor.models.qwen_vl) R r (vllm.lora.peft_helper.PEFTHelper attribute) raise_if_cache_size_invalid() (in module vllm.worker.hpu_worker) (in module vllm.worker.worker) rand_marlin_weight_fp4_like() (in module vllm.model_executor.layers.quantization.utils.marlin_utils_fp4) RANDOM (vllm.sampling_params.SamplingType attribute) random_sample() (in module vllm.v1.sample.ops.topk_topp_sampler) RANDOM_SEED (vllm.sampling_params.SamplingType attribute) random_tool_call_id() (in module vllm.entrypoints.chat_utils) random_uuid() (in module vllm.utils) RandomDataset (class in vllm.benchmarks.datasets) rank (vllm.config.ParallelConfig attribute) (vllm.distributed.parallel_state.GroupCoordinator attribute) (vllm.distributed.utils.StatelessProcessGroup attribute) (vllm.sequence.Logprob attribute) (vllm.spec_decode.spec_decode_worker.SpecDecodeWorker property) (vllm.v1.executor.multiproc_executor.UnreadyWorkerProcHandle attribute) (vllm.v1.executor.multiproc_executor.WorkerProcHandle attribute) rank_in_group (vllm.distributed.parallel_state.GroupCoordinator attribute) ranks (vllm.distributed.parallel_state.GroupCoordinator attribute) raw_request (vllm.entrypoints.openai.serving_engine.ServeContext attribute) ray_device_key (vllm.platforms.cuda.CudaPlatformBase attribute) (vllm.platforms.hpu.HpuPlatform attribute) (vllm.platforms.interface.Platform attribute) (vllm.platforms.neuron.NeuronPlatform attribute) (vllm.platforms.rocm.RocmPlatform attribute) (vllm.platforms.tpu.TpuPlatform attribute) (vllm.platforms.xpu.XPUPlatform attribute) ray_is_available() (in module vllm.executor.ray_utils) ray_workers_use_nsight (vllm.config.ParallelConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) RayDistributedExecutor (class in vllm.executor.ray_distributed_executor) (class in vllm.v1.executor.ray_distributed_executor) RayMetrics (class in vllm.engine.metrics) RayPrometheusStatLogger (class in vllm.engine.metrics) RayWorkerMetaData (class in vllm.executor.ray_distributed_executor) read_bytes_from_buffer() (vllm.distributed.kv_transfer.kv_pipe.mooncake_pipe.MooncakeTransferEngine method) read_file() (in module vllm.entrypoints.openai.run_batch) ReadOnlyRefCounter (class in vllm.core.block.common) READY (vllm.model_executor.layers.quantization.compressed_tensors.compressed_tensors_moe.GPTQMarlinState attribute) (vllm.model_executor.layers.quantization.gptq.ExllamaState attribute) ready_pipe (vllm.v1.executor.multiproc_executor.UnreadyWorkerProcHandle attribute) READY_STR (vllm.v1.executor.multiproc_executor.WorkerProc attribute) real_batch_size (vllm.worker.hpu_model_runner.ModelInputForHPU attribute) reasoner (vllm.model_executor.guided_decoding.xgrammar_decoding.XGrammarLogitsProcessor attribute) reasoning_backend (vllm.config.DecodingConfig attribute) reasoning_content (vllm.entrypoints.openai.protocol.ChatMessage attribute) (vllm.entrypoints.openai.protocol.DeltaMessage attribute) reasoning_parser (vllm.engine.arg_utils.EngineArgs attribute) reasoning_parsers (vllm.reasoning.abs_reasoning_parsers.ReasoningParserManager attribute) ReasoningParser (class in vllm.reasoning.abs_reasoning_parsers) ReasoningParserManager (class in vllm.reasoning.abs_reasoning_parsers) RECOMPUTE (vllm.core.scheduler.PreemptionMode attribute) record() (vllm.v1.metrics.loggers.LoggingStatLogger method) (vllm.v1.metrics.loggers.PrometheusStatLogger method) (vllm.v1.metrics.loggers.StatLoggerBase method) record_cow() (vllm.core.block.common.CopyOnWriteTracker method) record_event() (vllm.v1.request.Request method) record_match() (vllm.compilation.fusion.FusionPass method) record_step_event() (vllm.worker.multi_step_model_runner.StatefulModelInput method) recv() (vllm.distributed.device_communicators.base_device_communicator.DeviceCommunicatorBase method) (vllm.distributed.device_communicators.cuda_communicator.CudaCommunicator method) (vllm.distributed.device_communicators.pynccl.PyNcclCommunicator method) (vllm.distributed.device_communicators.shm_broadcast.MessageQueue static method) (vllm.distributed.parallel_state.GroupCoordinator method) recv_bytes() (vllm.distributed.kv_transfer.kv_pipe.mooncake_pipe.MooncakeTransferEngine method) recv_kv_caches_and_hidden_states() (vllm.distributed.kv_transfer.kv_connector.base.KVConnectorBase method) (vllm.distributed.kv_transfer.kv_connector.lmcache_connector.LMCacheConnector method) (vllm.distributed.kv_transfer.kv_connector.mooncake_store_connector.MooncakeStoreConnector method) (vllm.distributed.kv_transfer.kv_connector.simple_connector.SimpleConnector method) (vllm.distributed.kv_transfer.kv_connector_agent.KVTransferAgent method) recv_obj() (vllm.distributed.utils.StatelessProcessGroup method) recv_object() (vllm.distributed.parallel_state.GroupCoordinator method) recv_src_counter (vllm.distributed.utils.StatelessProcessGroup attribute) recv_tensor() (vllm.distributed.kv_transfer.kv_pipe.base.KVPipeBase method) (vllm.distributed.kv_transfer.kv_pipe.mooncake_pipe.MooncakePipe method) (vllm.distributed.kv_transfer.kv_pipe.pynccl_pipe.PyNcclPipe method) recv_tensor_dict() (vllm.distributed.parallel_state.GroupCoordinator method) reduce_data() (vllm.multimodal.inputs.BaseMultiModalField method) reduce_scatter() (in module vllm.distributed.parallel_state) (vllm.distributed.device_communicators.base_device_communicator.DeviceCommunicatorBase method) (vllm.distributed.device_communicators.cuda_communicator.CudaCommunicator method) (vllm.distributed.device_communicators.pynccl.PyNcclCommunicator method) (vllm.distributed.parallel_state.GroupCoordinator method) reduce_scatter_fake() (in module vllm.distributed.parallel_state) ref_cnt (vllm.v1.core.kv_cache_utils.KVCacheBlock attribute) RefCount (in module vllm.core.block.common) Refcount (in module vllm.core.block.naive_block) RefCounter (class in vllm.core.block.common) refcounter (vllm.core.block.naive_block.NaiveBlockAllocator property) RefCounterProtocol (class in vllm.core.block.common) refresh_sampling_metadata() (vllm.v1.worker.gpu_input_batch.InputBatch method) REGEX (vllm.model_executor.guided_decoding.outlines_decoding.GuidedDecodingMode attribute) regex (vllm.sampling_params.GuidedDecodingParams attribute) REGEX (vllm.v1.structured_output.backend_types.StructuredOutputOptions attribute) regex_str (vllm.model_executor.guided_decoding.xgrammar_decoding.GrammarConfig attribute) RegexLogitsProcessor (class in vllm.model_executor.guided_decoding.outlines_logits_processors) register() (vllm.compilation.fusion.FusedAddRMSNormDynamicQuantPattern method) (vllm.compilation.fusion.FusedAddRMSNormStaticQuantPattern method) (vllm.compilation.fusion.RMSNormDynamicQuantPattern method) (vllm.compilation.fusion.RMSNormStaticQuantPattern method) (vllm.compilation.sequence_parallelism.EmbeddingAllReduceRMSNormPattern method) (vllm.compilation.sequence_parallelism.LastAllReduceRMSNormPattern method) (vllm.compilation.sequence_parallelism.MiddleAllReduceRMSNormPattern method) (vllm.model_executor.custom_op.CustomOp class method) (vllm.transformers_utils.tokenizer_base.TokenizerRegistry static method) register_buffer() (in module vllm._custom_ops) register_chat_template_fallback_path() (in module vllm.transformers_utils.chat_templates.registry) register_connector() (vllm.distributed.kv_transfer.kv_connector.factory.KVConnectorFactory class method) register_failure_callback() (vllm.v1.executor.abstract.Executor method) (vllm.v1.executor.multiproc_executor.MultiprocExecutor method) register_filesystem_resolver() (in module vllm.plugins.lora_resolvers.filesystem_resolver) register_graph_buffers() (in module vllm._custom_ops) (vllm.distributed.device_communicators.custom_all_reduce.CustomAllreduce method) register_kv_caches() (vllm.distributed.kv_transfer.kv_connector.v1.base.KVConnectorBase_V1 method) (vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector.NixlConnector method) (vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector.NixlConnectorWorker method) register_module() (vllm.entrypoints.openai.tool_parsers.abstract_tool_parser.ToolParserManager class method) (vllm.lora.models.LoRAModelManager method) (vllm.prompt_adapter.models.PromptAdapterModelManager method) (vllm.reasoning.abs_reasoning_parsers.ReasoningParserManager class method) register_processor() (vllm.multimodal.registry.MultiModalRegistry method) register_publisher() (vllm.distributed.kv_events.EventPublisherFactory class method) register_quantization_config() (in module vllm.model_executor.layers.quantization) register_signal_handlers() (in module vllm.entrypoints.cli.main) registry (vllm.config.ModelConfig property) REGISTRY (vllm.transformers_utils.tokenizer_base.TokenizerRegistry attribute) rejection_greedy_sample_kernel() (in module vllm.v1.sample.rejection_sampler) rejection_random_sample_kernel() (in module vllm.v1.sample.rejection_sampler) rejection_sample() (in module vllm.v1.sample.rejection_sampler) RejectionSampler (class in vllm.model_executor.layers.rejection_sampler) (class in vllm.v1.sample.rejection_sampler) relevance_score (vllm.entrypoints.openai.protocol.RerankResult attribute) ReLU (class in vllm.model_executor.models.qwen2_rm) ReLUSquaredActivation (class in vllm.model_executor.layers.activation) remaining_steps (vllm.sequence.SequenceGroupState property) remaining_token_budget() (vllm.core.scheduler.SchedulingBudget method) remote_addr_ipv6 (vllm.distributed.device_communicators.shm_broadcast.Handle attribute) remote_block_ids (vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector.ReqMeta attribute) remote_engine_id (vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector.ReqMeta attribute) remote_host (vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector.ReqMeta attribute) remote_port (vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector.ReqMeta attribute) remote_subscribe_addr (vllm.distributed.device_communicators.shm_broadcast.Handle attribute) remove() (vllm.core.evictor.Evictor method) (vllm.core.evictor.LRUEvictor method) (vllm.v1.core.kv_cache_utils.FreeKVCacheBlockQueue method) (vllm.v1.utils.ConstantList method) remove_adapter() (in module vllm.adapter_commons.utils) (vllm.adapter_commons.models.AdapterModelManager method) (vllm.adapter_commons.worker_manager.AbstractWorkerManager method) (vllm.lora.models.LoRAModelManager method) (vllm.lora.worker_manager.WorkerLoRAManager method) (vllm.prompt_adapter.models.PromptAdapterModelManager method) (vllm.prompt_adapter.worker_manager.WorkerPromptAdapterManager method) remove_all_adapters() (vllm.adapter_commons.models.AdapterModelManager method) (vllm.adapter_commons.worker_manager.AbstractWorkerManager method) (vllm.lora.models.LoRAModelManager method) (vllm.lora.worker_manager.WorkerLoRAManager method) (vllm.prompt_adapter.models.PromptAdapterModelManager method) (vllm.prompt_adapter.worker_manager.WorkerPromptAdapterManager method) remove_all_loras() (vllm.worker.cpu_model_runner.CPUModelRunnerBase method) (vllm.worker.hpu_model_runner.HPUModelRunnerBase method) (vllm.worker.model_runner.GPUModelRunnerBase method) (vllm.worker.multi_step_model_runner.MultiStepModelRunner method) remove_all_prompt_adapters() (vllm.worker.model_runner.GPUModelRunnerBase method) remove_logger() (vllm.engine.async_llm_engine.AsyncLLMEngine method) (vllm.engine.llm_engine.LLMEngine method) remove_lora() (vllm.engine.llm_engine.LLMEngine method) (vllm.executor.executor_base.ExecutorBase method) (vllm.v1.engine.async_llm.AsyncLLM method) (vllm.v1.engine.core.EngineCore method) (vllm.v1.engine.core_client.EngineCoreClient method) (vllm.v1.engine.core_client.InprocClient method) (vllm.v1.engine.core_client.SyncMPClient method) (vllm.v1.engine.llm_engine.LLMEngine method) (vllm.v1.worker.gpu_worker.Worker method) (vllm.v1.worker.lora_model_runner_mixin.LoRAModelRunnerMixin method) (vllm.worker.cpu_model_runner.CPUModelRunnerBase method) (vllm.worker.cpu_worker.CPUWorker method) (vllm.worker.hpu_model_runner.HPUModelRunnerBase method) (vllm.worker.hpu_worker.HPUWorker method) (vllm.worker.model_runner.GPUModelRunnerBase method) (vllm.worker.worker.Worker method) (vllm.worker.worker_base.DelegateWorkerBase method) (vllm.worker.worker_base.LoRANotSupportedWorkerBase method) (vllm.worker.worker_base.WorkerBase method) remove_lora_async() (vllm.v1.engine.core_client.AsyncMPClient method) (vllm.v1.engine.core_client.EngineCoreClient method) remove_oldest() (vllm.utils.LRUCache method) remove_oldest_adapter() (vllm.lora.models.LRUCacheLoRAModelManager method) (vllm.prompt_adapter.models.LRUCachePromptAdapterModelManager method) remove_prompt_adapter() (vllm.engine.llm_engine.LLMEngine method) (vllm.executor.executor_base.ExecutorBase method) (vllm.worker.hpu_worker.HPUWorker method) (vllm.worker.model_runner.GPUModelRunnerBase method) (vllm.worker.worker.Worker method) remove_request() (vllm.v1.worker.gpu_input_batch.InputBatch method) remove_seq() (vllm.core.block.prefix_caching_block.ComputedBlocksTracker method) (vllm.core.block.prefix_caching_block.LastAccessBlocksTracker method) remove_skipped_blocks() (vllm.v1.core.single_type_kv_cache_manager.FullAttentionManager method) (vllm.v1.core.single_type_kv_cache_manager.SingleTypeKVCacheManager method) (vllm.v1.core.single_type_kv_cache_manager.SlidingWindowManager method) reorder_batch() (vllm.v1.attention.backends.flash_attn.FlashAttentionMetadataBuilder method) (vllm.v1.attention.backends.flashinfer.FlashInferMetadataBuilder method) (vllm.v1.attention.backends.mla.common.MLACommonMetadataBuilder method) reorder_context_mask() (in module vllm.attention.ops.nki_flash_attn) REPACK (vllm.model_executor.layers.quantization.compressed_tensors.compressed_tensors_moe.GPTQMarlinState attribute) repack_bitblas_from_gptq() (vllm.model_executor.layers.quantization.kernels.mixed_precision.bitblas.BitBLASLinearKernel method) repeat_kv() (vllm.attention.backends.rocm_flash_attn.ROCmFlashAttentionImpl method) repetition_penalties (vllm.model_executor.sampling_metadata.SamplingTensors attribute) (vllm.v1.sample.metadata.SamplingMetadata attribute) (vllm.v1.sample.tpu.metadata.TPUSupportedSamplingMetadata attribute) repetition_penalty (vllm.entrypoints.openai.protocol.ChatCompletionRequest attribute) (vllm.entrypoints.openai.protocol.CompletionRequest attribute) (vllm.entrypoints.openai.protocol.TranscriptionRequest attribute) (vllm.sampling_params.SamplingParams attribute) REPLACE (vllm.multimodal.processing.UpdateMode attribute) replace_linear_class() (in module vllm.model_executor.models.transformers) replace_parameter() (in module vllm.model_executor.layers.quantization.utils.layer_utils) replace_submodule() (in module vllm.lora.utils) (vllm.prompt_adapter.models.PromptAdapterModelManager method) replace_token_matches() (in module vllm.multimodal.processing) replace_users_with_mutated_args() (vllm.compilation.fix_functionalization.FixFunctionalizationPass method) replace_weight_name() (in module vllm.model_executor.models.minimax_text_01) replacement (vllm.multimodal.processing.PromptReplacement attribute) replay_endpoint (vllm.config.KVEventsConfig attribute) ReplicatedLinear (class in vllm.model_executor.layers.linear) ReplicatedLinearWithLoRA (class in vllm.lora.layers) report_usage() (vllm.usage.usage_lib.UsageMessage method) report_usage_stats() (in module vllm.v1.utils) req_id (vllm.v1.core.sched.output.CachedRequestData attribute) (vllm.v1.core.sched.output.NewRequestData attribute) (vllm.v1.worker.gpu_input_batch.CachedRequestState attribute) req_id_to_index (vllm.v1.outputs.ModelRunnerOutput attribute) req_ids (vllm.v1.outputs.ModelRunnerOutput attribute) (vllm.v1.worker.gpu_input_batch.InputBatch property) ReqMeta (class in vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector) (class in vllm.distributed.kv_transfer.kv_connector.v1.shared_storage_connector) reqs_to_abort (vllm.v1.engine.output_processor.OutputProcessorOutput attribute) requantize_with_max_scale() (in module vllm.model_executor.layers.quantization.utils.w8a8_utils) Request (class in vllm.v1.request) request (vllm.entrypoints.openai.serving_engine.ServeContext attribute) request_finished() (vllm.distributed.kv_transfer.kv_connector.v1.base.KVConnectorBase_V1 method) (vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector.NixlConnector method) (vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector.NixlConnectorScheduler method) request_goodput (vllm.benchmarks.serve.BenchmarkMetrics attribute) request_id (vllm.engine.multiprocessing.RPCAbortRequest attribute) (vllm.engine.multiprocessing.RPCAdapterLoadedResponse attribute) (vllm.engine.multiprocessing.RPCError attribute) (vllm.engine.multiprocessing.RPCIsSleepingRequest attribute) (vllm.engine.multiprocessing.RPCIsSleepingResponse attribute) (vllm.engine.multiprocessing.RPCLoadAdapterRequest attribute) (vllm.engine.multiprocessing.RPCProcessRequest attribute) (vllm.entrypoints.openai.protocol.BatchResponseData attribute) (vllm.entrypoints.openai.protocol.ChatCompletionRequest attribute) (vllm.entrypoints.openai.protocol.RequestResponseMetadata attribute) (vllm.entrypoints.openai.serving_engine.ServeContext attribute) (vllm.sequence.SequenceGroupMetadata attribute) (vllm.sequence.SequenceGroupMetadataDelta attribute) (vllm.v1.engine.EngineCoreOutput attribute) (vllm.v1.engine.EngineCoreRequest attribute) (vllm.v1.engine.parallel_sampling.ParentRequest attribute) (vllm.v1.stats.common.RequestStats attribute) (vllm.v1.stats.common.RequestStatsUpdate attribute) request_id_prefix (vllm.entrypoints.openai.serving_classification.ServingClassification attribute) (vllm.entrypoints.openai.serving_embedding.OpenAIServingEmbedding attribute) (vllm.entrypoints.openai.serving_engine.OpenAIServing attribute) request_ids_to_seq_ids (vllm.worker.model_runner.ModelInputForGPU attribute) request_output_to_completion_response() (vllm.entrypoints.openai.serving_completion.OpenAIServingCompletion method) request_output_to_pooling_response() (vllm.entrypoints.openai.serving_pooling.OpenAIServingPooling method) request_output_to_rerank_response() (vllm.entrypoints.openai.serving_score.ServingScores method) request_output_to_score_response() (vllm.entrypoints.openai.serving_score.ServingScores method) request_outputs (vllm.v1.engine.output_processor.OutputProcessorOutput attribute) REQUEST_OUTPUTS_T (in module vllm.engine.multiprocessing) request_prompts (vllm.entrypoints.openai.serving_engine.RequestProcessingMixin attribute) request_throughput (vllm.benchmarks.serve.BenchmarkMetrics attribute) RequestFuncInput (class in vllm.benchmarks.endpoint_request_func) RequestFuncOutput (class in vllm.benchmarks.endpoint_request_func) RequestLogger (class in vllm.entrypoints.logger) RequestMetrics (class in vllm.sequence) RequestOutput (class in vllm.outputs) RequestOutputCollector (class in vllm.v1.engine.output_processor) RequestOutputFactory (class in vllm.outputs) RequestOutputKind (class in vllm.sampling_params) RequestProcessingMixin (class in vllm.entrypoints.openai.serving_engine) RequestPrompt (in module vllm.entrypoints.openai.serving_engine) RequestResponseMetadata (class in vllm.entrypoints.openai.protocol) requests (vllm.distributed.kv_transfer.kv_connector.v1.shared_storage_connector.SharedStorageConnectorMetadata attribute) (vllm.v1.metrics.stats.PrefixCacheStats attribute) requests_stats_updates (vllm.v1.stats.common.EngineCoreStatsSnapshot attribute) RequestState (class in vllm.v1.engine.output_processor) RequestStateStats (class in vllm.v1.metrics.stats) RequestStats (class in vllm.v1.stats.common) RequestStatsUpdate (class in vllm.v1.stats.common) RequestStatsUpdate.Type (class in vllm.v1.stats.common) RequestStatus (class in vllm.v1.request) RequestT (in module vllm.entrypoints.openai.serving_engine) RequestTracker (class in vllm.engine.async_llm_engine) rerank() (in module vllm.entrypoints.openai.api_server) RerankDocument (class in vllm.entrypoints.openai.protocol) RerankRequest (class in vllm.entrypoints.openai.protocol) RerankResponse (class in vllm.entrypoints.openai.protocol) RerankResult (class in vllm.entrypoints.openai.protocol) RerankUsage (class in vllm.entrypoints.openai.protocol) resample() (vllm.multimodal.audio.AudioResampler method) resample_audio_librosa() (in module vllm.multimodal.audio) resample_audio_scipy() (in module vllm.multimodal.audio) Resampler2 (class in vllm.model_executor.layers.resampler) Resampler2_5 (class in vllm.model_executor.models.minicpmv) rescale_image_size() (in module vllm.multimodal.image) rescale_video_size() (in module vllm.multimodal.video) RESET (in module vllm.executor.multiproc_worker_utils) (vllm.engine.multiprocessing.RPCResetMultiModalCacheRequest attribute) reset (vllm.v1.metrics.stats.PrefixCacheStats attribute) reset() (vllm.core.block.common.BlockList method) (vllm.core.block.prefix_caching_block.BlockTracker method) (vllm.model_executor.sampling_metadata.SamplingMetadataCache method) (vllm.multimodal.processing.ProcessingCache method) (vllm.utils.Counter method) (vllm.utils.PyObjectCache method) (vllm.v1.core.kv_cache_utils.PrefixCachingMetrics method) (vllm.v1.engine.mm_input_cache.MirroredProcessingCache method) (vllm.v1.spec_decode.metrics.SpecDecodingLogging method) (vllm.v1.structured_output.backend_guidance.GuidanceGrammar method) (vllm.v1.structured_output.backend_types.StructuredOutputGrammar method) (vllm.v1.structured_output.backend_xgrammar.XgrammarGrammar method) (vllm.worker.multi_step_model_runner.PythonizationCache method) reset_cached_inter_data() (vllm.worker.model_runner.ModelInputForGPUBuilder method) reset_dynamo_cache() (vllm.v1.worker.tpu_model_runner.TPUModelRunner method) reset_hash() (vllm.v1.core.kv_cache_utils.KVCacheBlock method) reset_lora() (vllm.lora.layers.BaseLayerWithLoRA method) (vllm.lora.layers.BaseLinearLayerWithLoRA method) (vllm.lora.layers.LinearScalingRotaryEmbeddingWithLoRA method) (vllm.lora.layers.LogitsProcessorWithLoRA method) (vllm.lora.layers.VocabParallelEmbeddingWithLoRA method) reset_mm_cache() (vllm.engine.async_llm_engine.AsyncLLMEngine method) (vllm.engine.llm_engine.LLMEngine method) (vllm.engine.multiprocessing.client.MQLLMEngineClient method) (vllm.engine.multiprocessing.engine.MQLLMEngine method) (vllm.engine.protocol.EngineClient method) (vllm.v1.engine.async_llm.AsyncLLM method) (vllm.v1.engine.core.EngineCore method) (vllm.v1.engine.core_client.EngineCoreClient method) (vllm.v1.engine.core_client.InprocClient method) (vllm.v1.engine.core_client.SyncMPClient method) (vllm.v1.engine.llm_engine.LLMEngine method) reset_mm_cache_async() (vllm.v1.engine.core_client.AsyncMPClient method) (vllm.v1.engine.core_client.EngineCoreClient method) reset_parameters() (vllm.model_executor.models.moonvit.Learnable2DInterpPosEmb method) (vllm.model_executor.models.phi4mm_utils.NemoConvSubsampling method) reset_prefix_cache() (vllm.core.block.cpu_gpu_block_allocator.CpuGpuBlockAllocator method) (vllm.core.block.interfaces.BlockAllocator method) (vllm.core.block.interfaces.DeviceAwareBlockAllocator method) (vllm.core.block.naive_block.NaiveBlockAllocator method) (vllm.core.block.prefix_caching_block.PrefixCachingBlockAllocator method) (vllm.core.block_manager.SelfAttnBlockSpaceManager method) (vllm.core.interfaces.BlockSpaceManager method) (vllm.core.placeholder_block_space_manager.PlaceholderBlockSpaceManager method) (vllm.core.scheduler.Scheduler method) (vllm.engine.async_llm_engine.AsyncLLMEngine method) (vllm.engine.llm_engine.LLMEngine method) (vllm.engine.multiprocessing.client.MQLLMEngineClient method) (vllm.engine.multiprocessing.engine.MQLLMEngine method) (vllm.engine.protocol.EngineClient method) (vllm.entrypoints.llm.LLM method) (vllm.v1.core.block_pool.BlockPool method) (vllm.v1.core.kv_cache_manager.KVCacheManager method) (vllm.v1.core.sched.interface.SchedulerInterface method) (vllm.v1.core.sched.scheduler.Scheduler method) (vllm.v1.engine.async_llm.AsyncLLM method) (vllm.v1.engine.core.EngineCore method) (vllm.v1.engine.core_client.EngineCoreClient method) (vllm.v1.engine.core_client.InprocClient method) (vllm.v1.engine.core_client.SyncMPClient method) (vllm.v1.engine.llm_engine.LLMEngine method) reset_prefix_cache_async() (vllm.v1.engine.core_client.AsyncMPClient method) (vllm.v1.engine.core_client.EngineCoreClient method) reset_processor_cache() (vllm.multimodal.registry.MultiModalRegistry method) reset_prompt_adapter() (vllm.prompt_adapter.layers.VocabParallelEmbeddingWithPromptAdapter method) reset_state_for_recompute() (vllm.sequence.Sequence method) (vllm.sequence.SequenceData method) reshape_and_cache() (in module vllm._custom_ops) (in module vllm.attention.ops.nki_flash_attn) (vllm._ipex_ops.ipex_ops static method) reshape_and_cache_flash() (in module vllm._custom_ops) reshape_fairseq2_weights() (vllm.model_executor.models.fairseq2_llama.Fairseq2LlamaForCausalLM method) reshape_hd_patches_2x2merge() (vllm.model_executor.models.phi3v.Phi3HDImageEmbedding method) ResidualAttentionBlock (class in vllm.model_executor.models.molmo) ResidualBlock (class in vllm.model_executor.models.medusa) resize_video() (in module vllm.multimodal.video) resolve_chat_template_content_format() (in module vllm.entrypoints.chat_utils) resolve_current_platform_cls_qualname() (in module vllm.platforms) resolve_h2ovl_min_max_num() (in module vllm.model_executor.models.h2ovl) resolve_hf_chat_template() (in module vllm.entrypoints.chat_utils) resolve_internvl_min_max_num() (in module vllm.model_executor.models.internvl) resolve_lora() (vllm.entrypoints.openai.serving_models.OpenAIServingModels method) (vllm.lora.resolver.LoRAResolver method) (vllm.plugins.lora_resolvers.filesystem_resolver.FilesystemResolver method) resolve_min_max_num() (vllm.model_executor.models.h2ovl.H2OVLProcessor method) (vllm.model_executor.models.internvl.BaseInternVLProcessor method) (vllm.model_executor.models.skyworkr1v.BaseSkyworkR1VProcessor method) resolve_mistral_chat_template() (in module vllm.entrypoints.chat_utils) resolve_mm_processor_kwargs() (in module vllm.utils) resolve_obj_by_qualname() (in module vllm.utils) resolve_skyworkr1v_min_max_num() (in module vllm.model_executor.models.skyworkr1v) resolve_target_ratios() (vllm.model_executor.models.h2ovl.H2OVLProcessor method) (vllm.model_executor.models.internvl.BaseInternVLProcessor method) (vllm.model_executor.models.skyworkr1v.BaseSkyworkR1VProcessor method) resolve_transformers_arch() (in module vllm.model_executor.model_loader.utils) resolve_visual_encoder_outputs() (in module vllm.model_executor.models.vision) response (vllm.entrypoints.openai.protocol.BatchRequestOutput attribute) response_format (vllm.entrypoints.openai.protocol.ChatCompletionRequest attribute) (vllm.entrypoints.openai.protocol.CompletionRequest attribute) (vllm.entrypoints.openai.protocol.TranscriptionRequest attribute) ResponseFormat (class in vllm.entrypoints.openai.protocol) ResponseGenerationMixin (class in vllm.entrypoints.openai.serving_engine) restype (vllm.distributed.device_communicators.cuda_wrapper.Function attribute) (vllm.distributed.device_communicators.pynccl_wrapper.Function attribute) Result (class in vllm.executor.multiproc_worker_utils) result (vllm.v1.engine.UtilityOutput attribute) result() (vllm.v1.executor.ray_distributed_executor.FutureWrapper method) result_generator (vllm.entrypoints.openai.serving_engine.ResponseGenerationMixin attribute) ResultFuture (class in vllm.executor.multiproc_worker_utils) ResultHandler (class in vllm.executor.multiproc_worker_utils) results (vllm.entrypoints.openai.protocol.RerankResponse attribute) resumed_from_preemption (vllm.v1.core.sched.output.CachedRequestData attribute) return_encoded_softmax (vllm.attention.ops.triton_flash_attention.MetaData attribute) return_tokens_as_token_ids (vllm.entrypoints.openai.protocol.ChatCompletionRequest attribute) (vllm.entrypoints.openai.protocol.CompletionRequest attribute) returned_callable (vllm.compilation.backends.VllmBackend attribute) returned_token_ids (vllm.config.PoolerConfig attribute) revision (vllm.config.ModelConfig attribute) (vllm.config.SpeculativeConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) (vllm.model_executor.model_loader.default_loader.DefaultModelLoader.Source attribute) RMS_ADD_OP (in module vllm.compilation.fusion) rms_norm() (in module vllm._custom_ops) (in module vllm.model_executor.layers.layernorm) (vllm._ipex_ops.ipex_ops static method) rms_norm_dynamic_per_token_quant() (in module vllm._custom_ops) rms_norm_eps (vllm.model_executor.models.plamo2.Plamo2Config attribute) RMS_OP (in module vllm.compilation.fusion) RMSNorm (class in vllm.model_executor.layers.layernorm) RMSNormDynamicQuantPattern (class in vllm.compilation.fusion) RMSNormDynamicQuantPattern.Match (class in vllm.compilation.fusion) RMSNormQuantPattern (class in vllm.compilation.fusion) RMSNormStaticQuantPattern (class in vllm.compilation.fusion) roberta_task_weights_filter() (in module vllm.model_executor.models.roberta) RobertaClassificationHead (class in vllm.model_executor.models.roberta) RobertaEmbedding (class in vllm.model_executor.models.roberta) RobertaEmbeddingModel (class in vllm.model_executor.models.roberta) RobertaForSequenceClassification (class in vllm.model_executor.models.roberta) ROCM (vllm.platforms.interface.PlatformEnum attribute) rocm_aiter_asm_moe_fake() (in module vllm.model_executor.layers.fused_moe.rocm_aiter_fused_moe) rocm_aiter_asm_moe_impl() (in module vllm.model_executor.layers.fused_moe.rocm_aiter_fused_moe) rocm_aiter_asm_moe_tkw1_fake() (in module vllm.model_executor.layers.fused_moe.rocm_aiter_fused_moe) rocm_aiter_asm_moe_tkw1_impl() (in module vllm.model_executor.layers.fused_moe.rocm_aiter_fused_moe) rocm_aiter_ck_moe_fake() (in module vllm.model_executor.layers.fused_moe.rocm_aiter_fused_moe) rocm_aiter_ck_moe_impl() (in module vllm.model_executor.layers.fused_moe.rocm_aiter_fused_moe) rocm_aiter_fmoe_fp8_blockscale_g1u1_fake() (in module vllm.model_executor.layers.fused_moe.rocm_aiter_fused_moe) rocm_aiter_fmoe_fp8_blockscale_g1u1_impl() (in module vllm.model_executor.layers.fused_moe.rocm_aiter_fused_moe) rocm_aiter_fused_add_rms_norm() (in module vllm.model_executor.layers.layernorm) rocm_aiter_fused_experts() (in module vllm.model_executor.layers.fused_moe.rocm_aiter_fused_moe) rocm_aiter_rms_norm() (in module vllm.model_executor.layers.layernorm) rocm_aiter_topk_softmax() (in module vllm.model_executor.layers.fused_moe.rocm_aiter_fused_moe) rocm_aiter_topk_softmax_fake() (in module vllm.model_executor.layers.fused_moe.rocm_aiter_fused_moe) rocm_aiter_topk_softmax_impl() (in module vllm.model_executor.layers.fused_moe.rocm_aiter_fused_moe) rocm_per_tensor_w8a8_scaled_mm() (in module vllm.model_executor.layers.quantization.utils.w8a8_utils) rocm_platform_plugin() (in module vllm.platforms) rocm_unquantized_gemm() (in module vllm.model_executor.layers.utils) ROCmFlashAttentionBackend (class in vllm.attention.backends.rocm_flash_attn) ROCmFlashAttentionImpl (class in vllm.attention.backends.rocm_flash_attn) ROCmFlashAttentionMetadata (class in vllm.attention.backends.rocm_flash_attn) ROCmFlashAttentionMetadataBuilder (class in vllm.attention.backends.rocm_flash_attn) RocmPlatform (class in vllm.platforms.rocm) role (vllm.distributed.kv_transfer.kv_connector.v1.base.KVConnectorBase_V1 property) (vllm.entrypoints.chat_utils.ConversationMessage attribute) (vllm.entrypoints.chat_utils.CustomChatCompletionMessageParam attribute) (vllm.entrypoints.openai.protocol.ChatMessage attribute) (vllm.entrypoints.openai.protocol.DeltaMessage attribute) rollback() (vllm.v1.structured_output.backend_guidance.GuidanceGrammar method) (vllm.v1.structured_output.backend_types.StructuredOutputGrammar method) (vllm.v1.structured_output.backend_xgrammar.XgrammarGrammar method) root (vllm.entrypoints.openai.protocol.ModelCard attribute) Rope2DPosEmb (class in vllm.model_executor.models.moonvit) rope_scaling (vllm.config.ModelConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) rope_theta (vllm.config.ModelConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) (vllm.model_executor.models.pixtral.VisionEncoderArgs attribute) rot_dim (vllm.lora.models.LongContextLoRAContext attribute) rot_pos_emb() (vllm.model_executor.models.qwen2_5_vl.Qwen2_5_VisionTransformer method) (vllm.model_executor.models.qwen2_vl.Qwen2VisionTransformer method) rotary (vllm.transformers_utils.configs.falcon.RWConfig property) rotary_dim (vllm.lora.layers.LinearScalingRotaryEmbeddingWithLoRA property) rotary_embedding() (in module vllm._custom_ops) (vllm._ipex_ops.ipex_ops static method) RotaryEmbedding (class in vllm.model_executor.layers.rotary_embedding) rotate_half() (in module vllm.model_executor.models.qwen2_vl) round_down() (in module vllm.utils) round_up() (in module vllm.utils) (in module vllm.worker.hpu_model_runner) rounding (vllm.transformers_utils.configs.arctic.ArcticQuantizationConfig attribute) router (in module vllm.entrypoints.openai.api_server) row_parallel_weight_loader() (in module vllm.model_executor.model_loader.weight_utils) RowParallelLinear (class in vllm.model_executor.layers.linear) RowParallelLinearWithLoRA (class in vllm.lora.layers) RowParallelLinearWithShardedLoRA (class in vllm.lora.fully_sharded_layers) RowvLLMParameter (class in vllm.model_executor.parameter) RPC_REQUEST_T (in module vllm.engine.multiprocessing) RPCAbortRequest (class in vllm.engine.multiprocessing) RPCAdapterLoadedResponse (class in vllm.engine.multiprocessing) RPCError (class in vllm.engine.multiprocessing) RPCIsSleepingRequest (class in vllm.engine.multiprocessing) RPCIsSleepingResponse (class in vllm.engine.multiprocessing) RPCLoadAdapterRequest (class in vllm.engine.multiprocessing) RPCProcessRequest (class in vllm.engine.multiprocessing) RPCResetMultiModalCacheRequest (class in vllm.engine.multiprocessing) RPCResetPrefixCacheRequest (class in vllm.engine.multiprocessing) RPCSleepRequest (class in vllm.engine.multiprocessing) RPCStartupRequest (class in vllm.engine.multiprocessing) RPCStartupResponse (class in vllm.engine.multiprocessing) RPCUProfileRequest (class in vllm.engine.multiprocessing) RPCWakeUpRequest (class in vllm.engine.multiprocessing) run() (in module vllm.collect_env) (vllm.compilation.backends.PiecewiseCompileInterpreter method) (vllm.executor.multiproc_worker_utils.ResultHandler method) (vllm.executor.multiproc_worker_utils.WorkerMonitor method) run_and_parse_first_match() (in module vllm.collect_env) run_and_read_all() (in module vllm.collect_env) run_and_return_first_line() (in module vllm.collect_env) run_busy_loop() (vllm.v1.engine.core.DPEngineCoreProc method) (vllm.v1.engine.core.EngineCoreProc method) run_engine_core() (vllm.v1.engine.core.EngineCoreProc static method) run_engine_loop() (vllm.engine.async_llm_engine.AsyncLLMEngine static method) (vllm.engine.multiprocessing.engine.MQLLMEngine method) run_heartbeat_loop() (vllm.engine.multiprocessing.client.MQLLMEngineClient method) run_hf() (in module vllm.benchmarks.throughput) run_method() (in module vllm.utils) run_mp_engine() (in module vllm.engine.multiprocessing.engine) run_once() (in module vllm.utils) run_output_handler_loop() (vllm.engine.multiprocessing.client.MQLLMEngineClient method) run_request() (in module vllm.entrypoints.openai.run_batch) run_server() (in module vllm.entrypoints.api_server) (in module vllm.entrypoints.openai.api_server) run_startup_loop() (vllm.engine.multiprocessing.engine.MQLLMEngine method) run_vllm() (in module vllm.benchmarks.throughput) run_vllm_async() (in module vllm.benchmarks.throughput) run_vllm_chat() (in module vllm.benchmarks.throughput) runai_safetensors_weights_iterator() (in module vllm.model_executor.model_loader.weight_utils) RUNAI_STREAMER (vllm.config.LoadFormat attribute) RUNAI_STREAMER_SHARDED (vllm.config.LoadFormat attribute) RunaiModelStreamerLoader (class in vllm.model_executor.model_loader.runai_streamer_loader) runnable (vllm.compilation.backends.ConcreteSizeEntry attribute) runner_type (vllm.config.ModelConfig property) (vllm.config.SchedulerConfig attribute) RunnerType (in module vllm.config) RUNNING (vllm.sequence.SequenceStatus attribute) (vllm.v1.request.RequestStatus attribute) running_lora_adapters (vllm.engine.metrics_types.Stats attribute) running_queue_size (vllm.core.scheduler.SchedulerOutputs attribute) (vllm.sequence.ExecuteModelRequest attribute) running_requests (vllm.v1.metrics.stats.LoRAStats attribute) runtime_shape (vllm.compilation.backends.ConcreteSizeEntry attribute) RWConfig (class in vllm.transformers_utils.configs.falcon) S s3_access_key_id (vllm.model_executor.model_loader.tensorizer.TensorizerArgs attribute) (vllm.model_executor.model_loader.tensorizer.TensorizerConfig attribute) s3_endpoint (vllm.model_executor.model_loader.tensorizer.TensorizerArgs attribute) (vllm.model_executor.model_loader.tensorizer.TensorizerConfig attribute) s3_secret_access_key (vllm.model_executor.model_loader.tensorizer.TensorizerArgs attribute) (vllm.model_executor.model_loader.tensorizer.TensorizerConfig attribute) S3Model (class in vllm.transformers_utils.s3_utils) SAFETENSORS (vllm.config.LoadFormat attribute) safetensors_weights_iterator() (in module vllm.model_executor.model_loader.weight_utils) SAFETENSORS_WEIGHTS_NAME (in module vllm.prompt_adapter.utils) sample() (vllm.benchmarks.datasets.AIMODataset method) (vllm.benchmarks.datasets.BenchmarkDataset method) (vllm.benchmarks.datasets.BurstGPTDataset method) (vllm.benchmarks.datasets.ConversationDataset method) (vllm.benchmarks.datasets.InstructCoderDataset method) (vllm.benchmarks.datasets.NextEditPredictionDataset method) (vllm.benchmarks.datasets.RandomDataset method) (vllm.benchmarks.datasets.ShareGPTDataset method) (vllm.benchmarks.datasets.SonnetDataset method) (vllm.benchmarks.datasets.VisionArenaDataset method) (vllm.model_executor.model_loader.neuron.NeuronCausalLM method) (vllm.model_executor.model_loader.neuron.NeuronSpeculationCausalLM method) (vllm.model_executor.model_loader.neuronx_distributed.NeuronCausalLM method) (vllm.model_executor.model_loader.neuronx_distributed.NeuronMllamaForCausalLM method) (vllm.model_executor.model_loader.neuronx_distributed.NeuronSpeculationCausalLM method) (vllm.model_executor.models.medusa.Medusa method) (vllm.model_executor.models.mimo_mtp.MiMoMTP method) (vllm.v1.sample.sampler.Sampler method) (vllm.v1.sample.tpu.sampler.Sampler method) (vllm.worker.hpu_model_runner.HpuModelAdapter method) sample_frames_from_video() (in module vllm.multimodal.video) sample_from_logits() (vllm.v1.worker.tpu_model_runner.TPUModelRunner method) sample_indices (vllm.model_executor.sampling_metadata.SequenceGroupToSample attribute) sample_metadata (vllm.model_executor.layers.sampler.SampleResultArgsType attribute) sample_random_requests() (in module vllm.benchmarks.serve) sample_recovered_tokens() (in module vllm.v1.sample.rejection_sampler) sample_recovered_tokens_kernel() (in module vllm.v1.sample.rejection_sampler) sample_results_dict (vllm.model_executor.layers.sampler.SampleResultArgsType attribute) sampled_token_embeds (vllm.model_executor.layers.sampler.SamplerOutput attribute) sampled_token_ids (vllm.model_executor.layers.sampler.SamplerOutput attribute) (vllm.v1.outputs.ModelRunnerOutput attribute) (vllm.v1.outputs.SamplerOutput attribute) (vllm.worker.multi_step_model_runner.ModelOutput attribute) sampled_token_ids_cpu (vllm.model_executor.layers.sampler.SamplerOutput attribute) sampled_token_probs (vllm.model_executor.layers.sampler.SamplerOutput attribute) sampled_token_ranks (vllm.v1.outputs.LogprobsLists attribute) SampleLogprobs (in module vllm.sequence) SampleMetadataType (in module vllm.model_executor.layers.sampler) Sampler (class in vllm.model_executor.layers.sampler) (class in vllm.v1.sample.sampler) (class in vllm.v1.sample.tpu.sampler) sampler_indices (vllm.lora.punica_wrapper.punica_base.PunicaWrapperBase property) sampler_indices_padded (vllm.lora.punica_wrapper.punica_base.PunicaWrapperBase property) (vllm.lora.punica_wrapper.punica_tpu.PunicaWrapperTPU property) sampler_output (vllm.worker.multi_step_model_runner.ModelOutput attribute) sampler_output() (vllm.spec_decode.medusa_worker.MedusaWorker method) (vllm.spec_decode.mlp_speculator_worker.MLPSpeculatorWorker method) (vllm.spec_decode.multi_step_worker.MultiStepWorker method) (vllm.spec_decode.ngram_worker.NGramWorker method) (vllm.spec_decode.proposer_worker_base.ProposerWorkerBase method) (vllm.spec_decode.smaller_tp_proposer_worker.SmallerTpProposerWorker method) sampler_output_ready_event (vllm.worker.multi_step_model_runner.ModelOutput attribute) sampler_output_to_torch() (in module vllm.spec_decode.util) SampleRequest (class in vllm.benchmarks.datasets) SampleResultArgsType (class in vllm.model_executor.layers.sampler) SampleResultsDictType (in module vllm.model_executor.layers.sampler) SampleResultType (in module vllm.model_executor.layers.sampler) SampleReturnType (in module vllm.model_executor.layers.sampler) SamplerOutput (class in vllm.model_executor.layers.sampler) (class in vllm.v1.outputs) samples (vllm.sequence.CompletionSequenceGroupOutput attribute) sampling_metadata (vllm.model_executor.layers.sampler.SampleResultArgsType attribute) (vllm.worker.cpu_model_runner.ModelInputForCPUWithSamplingMetadata attribute) (vllm.worker.hpu_model_runner.ModelInputForHPUWithSamplingMetadata attribute) (vllm.worker.model_runner.ModelInputForGPUWithSamplingMetadata attribute) (vllm.worker.neuron_model_runner.ModelInputForNeuron attribute) (vllm.worker.xpu_model_runner.ModelInputForXPUWithSamplingMetadata attribute) sampling_params (vllm.model_executor.sampling_metadata.SequenceGroupToSample attribute) (vllm.sequence.SequenceGroupMetadata attribute) (vllm.v1.core.sched.output.NewRequestData attribute) (vllm.v1.engine.EngineCoreRequest attribute) (vllm.v1.engine.parallel_sampling.ParentRequest attribute) (vllm.v1.stats.common.RequestStats attribute) (vllm.v1.stats.common.RequestStatsUpdate attribute) (vllm.v1.structured_output.request.StructuredOutputRequest attribute) (vllm.v1.worker.gpu_input_batch.CachedRequestState attribute) sampling_type() (vllm.sampling_params.SamplingParams method) SamplingMetadata (class in vllm.model_executor.sampling_metadata) (class in vllm.v1.sample.metadata) SamplingMetadataCache (class in vllm.model_executor.sampling_metadata) SamplingParams (class in vllm.sampling_params) SamplingTensors (class in vllm.model_executor.sampling_metadata) SamplingType (class in vllm.sampling_params) sanity_check_mm_encoder_outputs() (in module vllm.v1.worker.utils) save_kv_layer() (vllm.distributed.kv_transfer.kv_connector.v1.base.KVConnectorBase_V1 method) (vllm.distributed.kv_transfer.kv_connector.v1.lmcache_connector.LMCacheConnectorV1 method) (vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector.NixlConnector method) (vllm.distributed.kv_transfer.kv_connector.v1.shared_storage_connector.SharedStorageConnector method) save_model() (vllm.model_executor.model_loader.sharded_state_loader.ShardedStateLoader static method) (vllm.model_executor.model_loader.tensorizer_loader.TensorizerLoader static method) save_new_computed_blocks() (vllm.v1.core.single_type_kv_cache_manager.SingleTypeKVCacheManager method) save_sharded_state() (vllm.executor.executor_base.ExecutorBase method) (vllm.v1.engine.core.EngineCore method) (vllm.v1.engine.core_client.EngineCoreClient method) (vllm.v1.engine.core_client.InprocClient method) (vllm.v1.engine.core_client.SyncMPClient method) (vllm.v1.worker.gpu_worker.Worker method) (vllm.worker.model_runner.GPUModelRunnerBase method) (vllm.worker.multi_step_model_runner.MultiStepModelRunner method) (vllm.worker.worker.Worker method) save_sharded_state_async() (vllm.v1.engine.core_client.AsyncMPClient method) (vllm.v1.engine.core_client.EngineCoreClient method) save_tensorized_model() (vllm.worker.model_runner.GPUModelRunnerBase method) (vllm.worker.multi_step_model_runner.MultiStepModelRunner method) (vllm.worker.worker.Worker method) save_to_file() (vllm.compilation.backends.CompilerManager method) save_to_pytorch_benchmark_format() (in module vllm.benchmarks.latency) (in module vllm.benchmarks.serve) (in module vllm.benchmarks.throughput) scalar_types (class in vllm.scalar_type) ScalarType (class in vllm.scalar_type) scale (vllm.lora.layers.LogitsProcessorWithLoRA property) scale_fp8() (in module vllm.attention.ops.triton_flash_attention) scaled_dequantize() (in module vllm.model_executor.layers.quantization.utils.quant_utils) scaled_fp4_experts_quant() (in module vllm._custom_ops) scaled_fp4_quant() (in module vllm._custom_ops) scaled_fp8_quant() (in module vllm._custom_ops) scaled_int8_quant() (in module vllm._custom_ops) scaled_mm_kernel() (in module vllm.model_executor.layers.quantization.compressed_tensors.triton_scaled_mm) scaled_quantize() (in module vllm.model_executor.layers.quantization.utils.quant_utils) ScaledActivation (class in vllm.model_executor.layers.activation) ScaledMMLinearKernel (class in vllm.model_executor.layers.quantization.kernels.scaled_mm.ScaledMMLinearKernel) ScaledMMLinearLayerConfig (class in vllm.model_executor.layers.quantization.kernels.scaled_mm.ScaledMMLinearKernel) scaling_factor (vllm.attention.backends.dual_chunk_flash_attn.DualChunkFlashAttentionMetadata attribute) (vllm.model_executor.layers.quantization.schema.KVCacheQuantSchema attribute) scaling_factor_to_offset (vllm.lora.layers.LinearScalingRotaryEmbeddingWithLoRA property) (vllm.model_executor.layers.rotary_embedding.LinearScalingRotaryEmbedding property) scaling_factors (vllm.lora.layers.LinearScalingRotaryEmbeddingWithLoRA property) (vllm.lora.models.LongContextLoRAContext attribute) scatter_mm_placeholders() (in module vllm.v1.worker.utils) sched_yield() (in module vllm.distributed.device_communicators.shm_broadcast) schedulable_prefills (vllm.core.scheduler.PartialPrefillMetadata attribute) schedule() (vllm.core.scheduler.Scheduler method) (vllm.v1.core.sched.interface.SchedulerInterface method) (vllm.v1.core.sched.scheduler.Scheduler method) SCHEDULED (vllm.v1.engine.EngineCoreEventType attribute) scheduled_cached_reqs (vllm.v1.core.sched.output.SchedulerOutput attribute) scheduled_encoder_inputs (vllm.v1.core.sched.output.SchedulerOutput attribute) scheduled_new_reqs (vllm.v1.core.sched.output.SchedulerOutput attribute) scheduled_request() (vllm.v1.metrics.stats.LoRARequestStates static method) scheduled_seq_group_builder() (in module vllm.core.scheduler) scheduled_seq_groups (vllm.core.scheduler.SchedulerOutputs attribute) scheduled_spec_decode_tokens (vllm.v1.core.sched.output.SchedulerOutput attribute) scheduled_ts (vllm.v1.metrics.stats.RequestStateStats attribute) ScheduledSequenceGroup (class in vllm.core.scheduler) Scheduler (class in vllm.core.scheduler) (class in vllm.v1.core.sched.scheduler) SCHEDULER (vllm.distributed.kv_transfer.kv_connector.v1.base.KVConnectorRole attribute) scheduler_cls (vllm.config.SchedulerConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) scheduler_config (vllm.config.VllmConfig attribute) (vllm.core.scheduler.PartialPrefillMetadata attribute) scheduler_delay_factor (vllm.engine.arg_utils.EngineArgs attribute) scheduler_metadata (vllm.v1.attention.backends.flash_attn.FlashAttentionMetadata attribute) scheduler_outputs (vllm.engine.llm_engine.OutputData attribute) (vllm.engine.llm_engine.SchedulerOutputState attribute) (vllm.worker.model_runner.ModelInputForGPU attribute) scheduler_running_outputs_builder() (in module vllm.core.scheduler) scheduler_stats (vllm.v1.engine.EngineCoreOutputs attribute) (vllm.v1.stats.common.EngineCoreStatsSnapshot attribute) scheduler_time (vllm.sequence.RequestMetrics attribute) SchedulerConfig (class in vllm.config) SchedulerContext (class in vllm.engine.llm_engine) SchedulerInterface (class in vllm.v1.core.sched.interface) SchedulerOutput (class in vllm.v1.core.sched.output) SchedulerOutputs (class in vllm.core.scheduler) SchedulerOutputState (class in vllm.engine.llm_engine) SchedulerPolicy (in module vllm.config) SchedulerPrefillOutputs (class in vllm.core.scheduler) SchedulerRunningOutputs (class in vllm.core.scheduler) SchedulerStats (class in vllm.v1.metrics.stats) (class in vllm.v1.stats.common) SchedulerSwappedInOutputs (class in vllm.core.scheduler) scheduling_policy (vllm.engine.arg_utils.EngineArgs attribute) SchedulingBudget (class in vllm.core.scheduler) score (vllm.entrypoints.openai.protocol.ScoreResponseData attribute) (vllm.outputs.ScoringOutput attribute) score() (in module vllm.entrypoints.openai.api_server) (vllm.entrypoints.llm.LLM method) score_proposals() (vllm.spec_decode.batch_expansion.BatchExpansionTop1Scorer method) (vllm.spec_decode.interfaces.SpeculativeScorer method) (vllm.spec_decode.mqa_scorer.MQAScorer method) ScoreRequest (class in vllm.entrypoints.openai.protocol) ScoreResponse (class in vllm.entrypoints.openai.protocol) ScoreResponseData (class in vllm.entrypoints.openai.protocol) ScoringOutput (class in vllm.outputs) ScoringRequestOutput (class in vllm.outputs) sd_worker_cls (vllm.config.ParallelConfig attribute) sdpa_attention() (in module vllm.model_executor.models.moonvit) second_last_token_hidden_states (vllm.sequence.HiddenStates attribute) second_per_grid_ts (vllm.model_executor.models.qwen2_5_vl.Qwen2_5_VLVideoPixelInputs attribute) seed (vllm.config.ModelConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) (vllm.entrypoints.openai.protocol.ChatCompletionRequest attribute) (vllm.entrypoints.openai.protocol.CompletionRequest attribute) (vllm.entrypoints.openai.protocol.TranscriptionRequest attribute) (vllm.sampling_params.SamplingParams attribute) seed_everything() (vllm.platforms.interface.Platform class method) seek (vllm.entrypoints.openai.protocol.TranscriptionSegment attribute) segments (vllm.entrypoints.openai.protocol.TranscriptionResponseVerbose attribute) select() (vllm.distributed.kv_transfer.kv_connector.simple_connector.SimpleConnector method) select_best_resolution() (vllm.transformers_utils.processors.deepseek_vl2.DeepseekVLV2Processor method) select_experts() (vllm.model_executor.layers.fused_moe.layer.FusedMoE static method) select_hidden_states() (vllm.v1.worker.tpu_model_runner.TPUModelRunner method) select_proj_params() (vllm.model_executor.layers.linear.QKVCrossParallelLinear method) select_text() (vllm.multimodal.processing.PromptUpdateDetails static method) select_tiling() (in module vllm.model_executor.models.molmo) (vllm.model_executor.models.molmo.MolmoProcessorWrapper method) select_token_id() (vllm.multimodal.processing.PromptUpdateDetails static method) selected_token_ranks (vllm.v1.outputs.LogprobsTensors attribute) selective_scan_fn() (in module vllm.model_executor.layers.mamba.ops.mamba_ssm) selective_scan_fwd() (in module vllm._custom_ops) selective_state_update() (in module vllm.model_executor.layers.mamba.ops.mamba_ssm) self_attention() (vllm.model_executor.models.bamba.BambaAttentionDecoderLayer method) (vllm.model_executor.models.jamba.JambaAttentionDecoderLayer method) self_attn_layer_norm (vllm.model_executor.models.bart.BartDecoderLayer attribute) SelfAttnBlockSpaceManager (class in vllm.core.block_manager) send() (vllm.distributed.device_communicators.base_device_communicator.DeviceCommunicatorBase method) (vllm.distributed.device_communicators.cuda_communicator.CudaCommunicator method) (vllm.distributed.device_communicators.pynccl.PyNcclCommunicator method) (vllm.distributed.parallel_state.GroupCoordinator method) send_bytes() (vllm.distributed.kv_transfer.kv_pipe.mooncake_pipe.MooncakeTransferEngine method) send_delta_data (vllm.config.SchedulerConfig attribute) send_dst_counter (vllm.distributed.utils.StatelessProcessGroup attribute) send_kv_caches_and_hidden_states() (vllm.distributed.kv_transfer.kv_connector.base.KVConnectorBase method) (vllm.distributed.kv_transfer.kv_connector.lmcache_connector.LMCacheConnector method) (vllm.distributed.kv_transfer.kv_connector.mooncake_store_connector.MooncakeStoreConnector method) (vllm.distributed.kv_transfer.kv_connector.simple_connector.SimpleConnector method) (vllm.distributed.kv_transfer.kv_connector_agent.KVTransferAgent method) send_obj() (vllm.distributed.utils.StatelessProcessGroup method) send_object() (vllm.distributed.parallel_state.GroupCoordinator method) send_tensor() (vllm.distributed.kv_transfer.kv_pipe.base.KVPipeBase method) (vllm.distributed.kv_transfer.kv_pipe.mooncake_pipe.MooncakePipe method) (vllm.distributed.kv_transfer.kv_pipe.pynccl_pipe.PyNcclPipe method) send_tensor_dict() (vllm.distributed.parallel_state.GroupCoordinator method) send_tensor_wrapper() (vllm.distributed.kv_transfer.kv_pipe.pynccl_pipe.PyNcclPipe method) sep_token (vllm.transformers_utils.tokenizer_base.TokenizerBase property) (vllm.transformers_utils.tokenizers.mistral.MistralTokenizer property) separate_weights() (vllm.model_executor.models.mllama4.Llama4ForConditionalGeneration method) seq_data (vllm.inputs.registry.DummyData attribute) (vllm.model_executor.sampling_metadata.SequenceGroupToSample attribute) (vllm.sequence.SequenceGroupMetadata attribute) seq_data_delta (vllm.sequence.SequenceGroupMetadataDelta attribute) seq_group (vllm.core.scheduler.ScheduledSequenceGroup attribute) seq_group_metadata_builder() (in module vllm.core.scheduler) seq_group_metadata_list (vllm.engine.llm_engine.OutputData attribute) (vllm.engine.llm_engine.SchedulerOutputState attribute) (vllm.sequence.ExecuteModelRequest attribute) (vllm.sequence.HiddenStates attribute) seq_groups (vllm.core.scheduler.SchedulerPrefillOutputs attribute) (vllm.worker.tpu_model_runner.ModelInputForTPU attribute) seq_id_to_index (vllm.sequence.SequenceGroupBase attribute) seq_ids (vllm.model_executor.sampling_metadata.SequenceGroupToSample attribute) (vllm.sequence.HiddenStates property) seq_idx (vllm.model_executor.layers.mamba.mamba2_metadata.Mamba2Metadata attribute) seq_len (vllm.model_executor.sampling_metadata.SequenceGroupToSample attribute) seq_lens (vllm.attention.backends.blocksparse_attn.BlocksparseFlashAttentionMetadata attribute) (vllm.attention.backends.flash_attn.FlashAttentionMetadata attribute) (vllm.attention.backends.ipex_attn.IpexAttnMetadata attribute) (vllm.attention.backends.mla.common.MLACommonMetadata attribute) (vllm.attention.backends.placeholder_attn.PlaceholderAttentionMetadata attribute) (vllm.attention.backends.rocm_flash_attn.ROCmFlashAttentionMetadata attribute) (vllm.attention.backends.torch_sdpa.TorchSDPAMetadata attribute) (vllm.attention.backends.xformers.XFormersMetadata attribute) (vllm.v1.attention.backends.flash_attn.FlashAttentionMetadata attribute) (vllm.v1.attention.backends.mla.common.MLACommonDecodeMetadata attribute) (vllm.v1.attention.backends.utils.CommonAttentionMetadata attribute) (vllm.worker.cpu_model_runner.ModelInputForCPU attribute) (vllm.worker.hpu_model_runner.ModelInputForHPU attribute) (vllm.worker.hpu_model_runner.PreparePromptMetadata attribute) (vllm.worker.model_runner.ModelInputForGPU attribute) (vllm.worker.xpu_model_runner.ModelInputForXPU attribute) seq_lens_inter (vllm.attention.backends.dual_chunk_flash_attn.DualChunkFlashAttentionMetadata attribute) seq_lens_intra (vllm.attention.backends.dual_chunk_flash_attn.DualChunkFlashAttentionMetadata attribute) seq_lens_succ (vllm.attention.backends.dual_chunk_flash_attn.DualChunkFlashAttentionMetadata attribute) seq_lens_tensor (vllm.attention.backends.blocksparse_attn.BlocksparseFlashAttentionMetadata attribute) (vllm.attention.backends.flash_attn.FlashAttentionMetadata attribute) (vllm.attention.backends.flashinfer.FlashInferMetadata attribute) (vllm.attention.backends.hpu_attn.HPUAttentionMetadata attribute) (vllm.attention.backends.mla.common.MLACommonMetadata attribute) (vllm.attention.backends.placeholder_attn.PlaceholderAttentionMetadata attribute) (vllm.attention.backends.rocm_flash_attn.ROCmFlashAttentionMetadata attribute) (vllm.attention.backends.xformers.XFormersMetadata attribute) (vllm.attention.ops.paged_attn.PagedAttentionMetadata attribute) seq_output_builder() (in module vllm.worker.multi_step_model_runner) seq_start_loc (vllm.attention.backends.blocksparse_attn.BlocksparseFlashAttentionMetadata attribute) (vllm.attention.backends.flash_attn.FlashAttentionMetadata attribute) (vllm.attention.backends.flashinfer.FlashInferMetadata attribute) (vllm.attention.backends.mla.common.MLACommonMetadata attribute) (vllm.attention.backends.placeholder_attn.PlaceholderAttentionMetadata attribute) (vllm.attention.backends.rocm_flash_attn.ROCmFlashAttentionMetadata attribute) (vllm.attention.backends.xformers.XFormersMetadata attribute) seq_tot (vllm.v1.attention.backends.mla.common.MLACommonPrefillMetadata.ChunkedContextMetadata attribute) SeqId (in module vllm.core.block_manager) (in module vllm.spec_decode.batch_expansion) (in module vllm.spec_decode.mqa_scorer) (in module vllm.spec_decode.util) seqlen_q (vllm.attention.backends.ipex_attn.IpexAttnMetadata attribute) Sequence (class in vllm.sequence) SequenceData (class in vllm.sequence) SequenceDataDelta (class in vllm.sequence) SequenceGroup (class in vllm.sequence) SequenceGroupBase (class in vllm.sequence) SequenceGroupMetadata (class in vllm.sequence) SequenceGroupMetadataDelta (class in vllm.sequence) SequenceGroupOutput (class in vllm.sequence) SequenceGroupOutputProcessor (class in vllm.engine.output_processor.interfaces) SequenceGroupState (class in vllm.sequence) SequenceGroupToSample (class in vllm.model_executor.sampling_metadata) SequenceOutput (class in vllm.sequence) SequenceParallelismPass (class in vllm.compilation.sequence_parallelism) sequences (vllm.beam_search.BeamSearchOutput attribute) SequenceStage (class in vllm.sequence) SequenceStatus (class in vllm.sequence) serialize_guidance_grammar() (in module vllm.v1.structured_output.backend_guidance) serialize_item() (vllm.multimodal.hasher.MultiModalHasher class method) serialize_vllm_model() (in module vllm.model_executor.model_loader.tensorizer) serve_http() (in module vllm.entrypoints.launcher) ServeContext (class in vllm.entrypoints.openai.serving_engine) served_model_name (vllm.config.ModelConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) ServeSubcommand (class in vllm.entrypoints.cli.serve) ServingClassification (class in vllm.entrypoints.openai.serving_classification) ServingScores (class in vllm.entrypoints.openai.serving_score) set_active_adapters() (vllm.adapter_commons.worker_manager.AbstractWorkerManager method) (vllm.lora.worker_manager.WorkerLoRAManager method) (vllm.prompt_adapter.worker_manager.WorkerPromptAdapterManager method) set_active_adapters_worker() (in module vllm.adapter_commons.utils) set_active_loras() (vllm.v1.worker.lora_model_runner_mixin.LoRAModelRunnerMixin method) (vllm.worker.cpu_model_runner.CPUModelRunnerBase method) (vllm.worker.hpu_model_runner.HPUModelRunnerBase method) (vllm.worker.model_runner.GPUModelRunnerBase method) set_active_prompt_adapters() (vllm.worker.model_runner.GPUModelRunnerBase method) set_adapter_mapping() (in module vllm.adapter_commons.utils) (vllm.adapter_commons.models.AdapterModelManager method) (vllm.lora.models.LoRAModelManager method) (vllm.prompt_adapter.models.PromptAdapterModelManager method) set_attn_bias() (vllm.attention.backends.torch_sdpa.TorchSDPAMetadata method) set_audio_embed_sizes() (vllm.model_executor.models.phi4mm_audio.AudioEmbedding method) set_audio_embeds() (vllm.model_executor.models.phi4mm_audio.AudioEmbedding method) set_aux_hidden_state_layers() (vllm.model_executor.models.llama.LlamaForCausalLM method) set_cpu_offload_max_bytes() (in module vllm.model_executor.models.utils) set_current_vllm_config() (in module vllm.config) set_custom_all_reduce() (in module vllm.distributed.parallel_state) set_decoder() (vllm.model_executor.models.phi3_small.Phi3SmallForCausalLM method) set_default_torch_dtype() (in module vllm.model_executor.model_loader.utils) set_eight_bit_params() (vllm.attention.ops.triton_flash_attention.MetaData method) set_errored() (vllm.engine.async_llm_engine.AsyncLLMEngine method) set_export() (vllm.model_executor.models.phi4mm_utils.AttModule method) set_finished_time() (vllm.sequence.SequenceGroup method) set_forward_context() (in module vllm.forward_context) set_in_profile_run() (vllm.worker.model_runner.GPUModelRunnerBase method) set_include_gpu_probs_tensor() (vllm.spec_decode.medusa_worker.MedusaWorker method) (vllm.spec_decode.multi_step_worker.MultiStepWorker method) (vllm.spec_decode.proposer_worker_base.ProposerWorkerBase method) (vllm.spec_decode.smaller_tp_proposer_worker.SmallerTpProposerWorker method) set_indices_of_seq_with_bonus_tokens() (vllm.spec_decode.draft_model_runner.TP1DraftModelRunner method) set_inductor_config() (in module vllm.compilation.compiler_interface) set_input_embeddings() (vllm.model_executor.models.phi3_small.Phi3SmallForCausalLM method) set_kv_transfer_params() (vllm.distributed.kv_transfer.kv_connector.v1.base.KVConnectorBase_V1 method) set_last_token_time() (vllm.sequence.SequenceGroup method) set_lora() (vllm.lora.layers.BaseLayerWithLoRA method) (vllm.lora.layers.BaseLinearLayerWithLoRA method) (vllm.lora.layers.LinearScalingRotaryEmbeddingWithLoRA method) (vllm.lora.layers.LogitsProcessorWithLoRA method) (vllm.lora.layers.MergedColumnParallelLinearWithLoRA method) (vllm.lora.layers.VocabParallelEmbeddingWithLoRA method) set_mapping() (vllm.lora.layers.BaseLayerWithLoRA method) (vllm.prompt_adapter.layers.VocabParallelEmbeddingWithPromptAdapter method) set_multiprocessing_worker_envs() (in module vllm.executor.multiproc_worker_utils) set_ngram_window_size() (vllm.spec_decode.ngram_worker.NGramWorker method) set_output_embeddings() (vllm.model_executor.models.phi3_small.Phi3SmallForCausalLM method) set_prompt_adapter() (vllm.prompt_adapter.layers.VocabParallelEmbeddingWithPromptAdapter method) set_random_seed() (in module vllm.model_executor.utils) set_result() (vllm.executor.multiproc_worker_utils.ResultFuture method) set_runtime_usage_data() (in module vllm.usage.usage_lib) set_seq_group_list() (vllm.worker.cpu_model_runner.ModelInputForCPUBuilder method) set_should_modify_greedy_probs_inplace() (vllm.spec_decode.medusa_worker.MedusaWorker method) (vllm.spec_decode.multi_step_worker.MultiStepWorker method) (vllm.spec_decode.proposer_worker_base.ProposerWorkerBase method) (vllm.spec_decode.smaller_tp_proposer_worker.SmallerTpProposerWorker method) set_splitting_ops_for_v1() (vllm.config.CompilationConfig method) set_tokenizer() (vllm.entrypoints.llm.LLM method) set_ulimit() (in module vllm.utils) set_varlen_params() (vllm.attention.ops.triton_flash_attention.MetaData method) set_vllm_use_v1() (in module vllm.envs) set_weight_attrs() (in module vllm.model_executor.utils) setup() (vllm.engine.multiprocessing.client.MQLLMEngineClient method) setup_default_loggers() (in module vllm.v1.metrics.loggers) setup_profiler() (in module vllm.worker.hpu_model_runner) sgl_moe_align_block_size() (in module vllm._custom_ops) sgmv_expand() (in module vllm.lora.ops.torch_ops.lora_ops) sgmv_expand_slice() (in module vllm.lora.ops.torch_ops.lora_ops) sgmv_shrink() (in module vllm.lora.ops.torch_ops.lora_ops) sha256() (in module vllm.utils) shard_base_weights (vllm.transformers_utils.configs.arctic.ArcticLoRAConfig attribute) SHARDED_STATE (vllm.config.LoadFormat attribute) sharded_weight_loader() (in module vllm.model_executor.model_loader.weight_utils) ShardedStateLoader (class in vllm.model_executor.model_loader.sharded_state_loader) shared() (vllm.multimodal.inputs.MultiModalFieldConfig static method) shared_kv_last_page_len (vllm.v1.attention.backends.flashinfer.FlashInferMetadata attribute) shared_kv_page_indices (vllm.v1.attention.backends.flashinfer.FlashInferMetadata attribute) shared_kv_page_indptr (vllm.v1.attention.backends.flashinfer.FlashInferMetadata attribute) shared_moe_coefficient_loader() (vllm.model_executor.models.minimax_text_01.MiniMaxText01DecoderLayer static method) shared_qo_indptr (vllm.v1.attention.backends.flashinfer.FlashInferMetadata attribute) SharedHead (class in vllm.model_executor.models.deepseek_mtp) SharedStorageConnector (class in vllm.distributed.kv_transfer.kv_connector.v1.shared_storage_connector) SharedStorageConnectorMetadata (class in vllm.distributed.kv_transfer.kv_connector.v1.shared_storage_connector) ShareGPTDataset (class in vllm.benchmarks.datasets) ShmRingBuffer (class in vllm.distributed.device_communicators.shm_broadcast) should_custom_ar() (vllm.distributed.device_communicators.custom_all_reduce.CustomAllreduce method) should_ignore_layer() (in module vllm.model_executor.layers.quantization.compressed_tensors.utils) (in module vllm.model_executor.layers.quantization.quark.utils) should_modify_greedy_probs_inplace (vllm.lora.layers.LogitsProcessorWithLoRA property) should_moe_wna16_use_cuda() (in module vllm.model_executor.layers.fused_moe.fused_moe) should_use_atomic_add_reduce() (in module vllm.model_executor.layers.quantization.utils.marlin_utils) show_available_models() (in module vllm.entrypoints.openai.api_server) (vllm.entrypoints.openai.serving_models.OpenAIServingModels method) show_hidden_metrics() (vllm.config.ObservabilityConfig method) show_hidden_metrics_for_version (vllm.config.ObservabilityConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) show_version() (in module vllm.entrypoints.openai.api_server) shrink() (vllm.lora.punica_wrapper.punica_tpu.PunicaWrapperTPU method) shuffle_weights() (in module vllm.model_executor.layers.fused_moe.rocm_aiter_fused_moe) shutdown() (in module vllm.v1.utils) (vllm.distributed.kv_events.EventPublisher method) (vllm.distributed.kv_events.NullEventPublisher method) (vllm.distributed.kv_events.ZmqEventPublisher method) (vllm.executor.executor_base.ExecutorBase method) (vllm.executor.mp_distributed_executor.MultiprocessingDistributedExecutor method) (vllm.executor.ray_distributed_executor.RayDistributedExecutor method) (vllm.v1.core.sched.interface.SchedulerInterface method) (vllm.v1.core.sched.scheduler.Scheduler method) (vllm.v1.engine.async_llm.AsyncLLM method) (vllm.v1.engine.core.DPEngineCoreProc method) (vllm.v1.engine.core.EngineCore method) (vllm.v1.engine.core_client.EngineCoreClient method) (vllm.v1.engine.core_client.InprocClient method) (vllm.v1.engine.core_client.MPClient method) (vllm.v1.executor.multiproc_executor.MultiprocExecutor method) (vllm.v1.executor.multiproc_executor.WorkerProc method) (vllm.v1.utils.BackgroundProcHandle method) shutdown_background_loop() (vllm.engine.async_llm_engine.AsyncLLMEngine method) shutdown_inc() (vllm.worker.hpu_model_runner.HPUModelRunner method) (vllm.worker.hpu_worker.HPUWorker method) shutdown_path (vllm.v1.engine.core_client.BackgroundResources attribute) SHUTDOWN_TIMEOUT (vllm.distributed.kv_events.ZmqEventPublisher attribute) SIGLIP_NAME (in module vllm.model_executor.models.phi4mm) SiglipAttention (class in vllm.model_executor.models.siglip) SiglipEncoder (class in vllm.model_executor.models.siglip) SiglipEncoderInfo (class in vllm.model_executor.models.siglip) SiglipEncoderLayer (class in vllm.model_executor.models.siglip) SiglipMLP (class in vllm.model_executor.models.siglip) SiglipMultiheadAttentionPoolingHead (class in vllm.model_executor.models.siglip) SiglipVisionEmbeddings (class in vllm.model_executor.models.siglip) SiglipVisionModel (class in vllm.model_executor.models.siglip) SiglipVisionTransformer (class in vllm.model_executor.models.siglip) SiglipVisualTokenizerConfig (class in vllm.transformers_utils.configs.ovis) signal_handler() (in module vllm.engine.multiprocessing.engine) signed (vllm.scalar_type.ScalarType attribute) silu_and_mul() (vllm._ipex_ops.ipex_ops static method) silu_mul_pattern_static() (in module vllm.compilation.activation_quant_fusion) silu_mul_replacement_static() (in module vllm.compilation.activation_quant_fusion) SiluAndMul (class in vllm.model_executor.layers.activation) simple_compile_backend (vllm.platforms.interface.Platform attribute) (vllm.platforms.tpu.TpuPlatform attribute) simple_reinit() (vllm.worker.model_runner.ModelInputForGPUBuilder.InterDataForSeqGroup method) SimpleBuffer (class in vllm.distributed.kv_transfer.kv_lookup_buffer.simple_buffer) SimpleConnector (class in vllm.distributed.kv_transfer.kv_connector.simple_connector) SimplePooler (class in vllm.model_executor.layers.pooler) single_step_process_prompt_logprob() (in module vllm.engine.output_processor.single_step) SingleStepOutputProcessor (class in vllm.engine.output_processor.single_step) SingletonInputs (in module vllm.inputs.data) SingletonPrompt (in module vllm.inputs.data) SingleTypeKVCacheManager (class in vllm.v1.core.single_type_kv_cache_manager) size (vllm.v1.kv_cache_interface.KVCacheTensor attribute) size_bits (vllm.scalar_type.ScalarType property) skip (vllm.engine.llm_engine.OutputData attribute) skip_attention_mask() (in module vllm.model_executor.models.mllama) skip_special_tokens (vllm.entrypoints.openai.protocol.ChatCompletionRequest attribute) (vllm.entrypoints.openai.protocol.CompletionRequest attribute) (vllm.sampling_params.SamplingParams attribute) skip_tokenizer_init (vllm.config.ModelConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) SkyworkR1VChatConfig (class in vllm.transformers_utils.configs.skyworkr1v) SkyworkR1VChatModel (class in vllm.model_executor.models.skyworkr1v) SkyworkR1VDummyInputsBuilder (class in vllm.model_executor.models.skyworkr1v) SkyworkR1VImageEmbeddingInputs (class in vllm.model_executor.models.skyworkr1v) SkyworkR1VImageInputs (in module vllm.model_executor.models.skyworkr1v) SkyworkR1VImagePixelInputs (class in vllm.model_executor.models.skyworkr1v) SkyworkR1VMultiModalProcessor (class in vllm.model_executor.models.skyworkr1v) SkyworkR1VProcessingInfo (class in vllm.model_executor.models.skyworkr1v) SkyworkR1VProcessor (class in vllm.model_executor.models.skyworkr1v) sleep() (vllm.device_allocator.cumem.CuMemAllocator method) (vllm.engine.async_llm_engine.AsyncLLMEngine method) (vllm.engine.llm_engine.LLMEngine method) (vllm.engine.multiprocessing.client.MQLLMEngineClient method) (vllm.engine.multiprocessing.engine.MQLLMEngine method) (vllm.engine.protocol.EngineClient method) (vllm.entrypoints.llm.LLM method) (vllm.executor.executor_base.ExecutorBase method) (vllm.v1.engine.async_llm.AsyncLLM method) (vllm.v1.engine.core.EngineCore method) (vllm.v1.engine.core_client.EngineCoreClient method) (vllm.v1.engine.core_client.InprocClient method) (vllm.v1.engine.core_client.SyncMPClient method) (vllm.v1.engine.llm_engine.LLMEngine method) (vllm.v1.worker.gpu_worker.Worker method) (vllm.worker.worker.Worker method) sleep_async() (vllm.v1.engine.core_client.AsyncMPClient method) (vllm.v1.engine.core_client.EngineCoreClient method) SLEEP_LEVEL_1 (vllm.engine.multiprocessing.RPCSleepRequest attribute) SLEEP_LEVEL_2 (vllm.engine.multiprocessing.RPCSleepRequest attribute) slice() (vllm.v1.outputs.LogprobsLists method) slice_bias() (vllm.lora.fully_sharded_layers.RowParallelLinearWithShardedLoRA method) (vllm.lora.layers.ColumnParallelLinearWithLoRA method) (vllm.lora.layers.MergedColumnParallelLinearWithLoRA method) (vllm.lora.layers.QKVParallelLinearWithLoRA method) (vllm.lora.layers.RowParallelLinearWithLoRA method) slice_lora_a() (vllm.lora.fully_sharded_layers.ColumnParallelLinearWithShardedLoRA method) (vllm.lora.fully_sharded_layers.MergedColumnParallelLinearWithShardedLoRA method) (vllm.lora.fully_sharded_layers.MergedQKVParallelLinearWithShardedLoRA method) (vllm.lora.fully_sharded_layers.QKVParallelLinearWithShardedLoRA method) (vllm.lora.layers.BaseLayerWithLoRA method) (vllm.lora.layers.ColumnParallelLinearWithLoRA method) (vllm.lora.layers.MergedColumnParallelLinearWithLoRA method) (vllm.lora.layers.RowParallelLinearWithLoRA method) slice_lora_b() (vllm.lora.fully_sharded_layers.RowParallelLinearWithShardedLoRA method) (vllm.lora.layers.BaseLayerWithLoRA method) (vllm.lora.layers.ColumnParallelLinearWithLoRA method) (vllm.lora.layers.MergedColumnParallelLinearWithLoRA method) (vllm.lora.layers.QKVParallelLinearWithLoRA method) (vllm.lora.layers.RowParallelLinearWithLoRA method) slices (vllm.multimodal.inputs.MultiModalFlatField attribute) sliding_window (vllm.config.CacheConfig attribute) (vllm.v1.kv_cache_interface.SlidingWindowSpec attribute) SlidingWindowManager (class in vllm.v1.core.single_type_kv_cache_manager) SlidingWindowSpec (class in vllm.v1.kv_cache_interface) slot_mapping (vllm.attention.backends.abstract.AttentionMetadata attribute) (vllm.attention.backends.ipex_attn.IpexAttnMetadata attribute) (vllm.distributed.kv_transfer.kv_connector.v1.shared_storage_connector.ReqMeta attribute) (vllm.v1.attention.backends.flash_attn.FlashAttentionMetadata attribute) (vllm.v1.attention.backends.flashinfer.FlashInferMetadata attribute) (vllm.v1.attention.backends.mla.common.MLACommonMetadata attribute) (vllm.v1.attention.backends.pallas.PallasMetadata attribute) (vllm.worker.hpu_model_runner.PrepareDecodeMetadata attribute) (vllm.worker.hpu_model_runner.PreparePromptMetadata attribute) SlowIncrementalDetokenizer (class in vllm.v1.engine.detokenizer) sm_scale (vllm.attention.backends.flashinfer.FlashInferMetadata attribute) (vllm.attention.backends.flashinfer.PerLayerParameters attribute) (vllm.v1.attention.backends.flashinfer.PerLayerParameters attribute) SmallerTpProposerWorker (class in vllm.spec_decode.smaller_tp_proposer_worker) SmolVLMForConditionalGeneration (class in vllm.model_executor.models.smolvlm) SmolVLMProcessingInfo (class in vllm.model_executor.models.smolvlm) socket (vllm.distributed.utils.StatelessProcessGroup attribute) soft_cap (vllm.lora.layers.LogitsProcessorWithLoRA property) softmax (vllm.config.PoolerConfig attribute) SolarAttention (class in vllm.model_executor.models.solar) SolarConfig (class in vllm.transformers_utils.configs.solar) SolarDecoderLayer (class in vllm.model_executor.models.solar) SolarForCausalLM (class in vllm.model_executor.models.solar) SolarMLP (class in vllm.model_executor.models.solar) SolarModel (class in vllm.model_executor.models.solar) SonnetDataset (class in vllm.benchmarks.datasets) sort_weights() (in module vllm.model_executor.layers.quantization.utils.quant_utils) SortedHelpFormatter (class in vllm.utils) spaces_between_special_tokens (vllm.entrypoints.openai.protocol.ChatCompletionRequest attribute) (vllm.entrypoints.openai.protocol.CompletionRequest attribute) (vllm.sampling_params.SamplingParams attribute) SpanAttributes (class in vllm.tracing) sparse_cutlass_supported() (in module vllm.model_executor.layers.quantization.utils.w8a8_utils) sparse_semi_structured_from_dense_cutlass() (in module vllm.model_executor.layers.quantization.utils.marlin_utils_test_24) sparse_semi_structured_to_dense_cutlass() (in module vllm.model_executor.layers.quantization.utils.marlin_utils_test_24) sparsemixer() (in module vllm.model_executor.models.phimoe) SPARSITY_CONFIG_NAME (in module vllm.model_executor.layers.quantization.compressed_tensors.compressed_tensors) spatial_merge_size (vllm.model_executor.models.pixtral.VisionEncoderArgs attribute) SpatialBlock (class in vllm.model_executor.models.florence2) spda() (vllm.attention.ops.blocksparse_attention.interface.LocalStridedBlockSparseAttn method) spec_decode_metrics (vllm.engine.metrics_types.Stats attribute) spec_decode_worker_metrics (vllm.model_executor.layers.sampler.SamplerOutput attribute) spec_decoding_stats (vllm.v1.metrics.stats.SchedulerStats attribute) spec_manager_map (in module vllm.v1.core.single_type_kv_cache_manager) spec_step_idx (vllm.sequence.ExecuteModelRequest attribute) spec_target_max_model_len (vllm.config.ModelConfig attribute) spec_token_acceptance_counts (vllm.sequence.RequestMetrics attribute) spec_token_ids (vllm.v1.outputs.ModelRunnerOutput attribute) SpecDecodeBaseSampler (class in vllm.model_executor.layers.spec_decode_base_sampler) SpecDecodeDeterministicBaseSampler (class in vllm.model_executor.layers.spec_decode_base_sampler) SpecDecodeMetadata (class in vllm.v1.spec_decode.metadata) SpecDecodeStochasticBaseSampler (class in vllm.model_executor.layers.spec_decode_base_sampler) SpecDecodeWorker (class in vllm.spec_decode.spec_decode_worker) SpecDecodeWorkerMetrics (class in vllm.spec_decode.metrics) SpecDecodingLogging (class in vllm.v1.spec_decode.metrics) SpecDecodingProm (class in vllm.v1.spec_decode.metrics) SpecDecodingStats (class in vllm.v1.spec_decode.metrics) SPECULATION_TERMINATION_ID (vllm.model_executor.model_loader.neuron.NeuronSpeculationCausalLM attribute) speculative_config (vllm.config.VllmConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) speculative_token_tree (vllm.config.SpeculativeConfig attribute) SpeculativeAcceptanceMethod (in module vllm.config) SpeculativeConfig (class in vllm.config) SpeculativeMethod (in module vllm.config) SpeculativeProposals (class in vllm.spec_decode.interfaces) SpeculativeProposer (class in vllm.spec_decode.interfaces) SpeculativeScorer (class in vllm.spec_decode.interfaces) SpeculativeScores (class in vllm.spec_decode.interfaces) split_batch_by_proposal_len() (in module vllm.spec_decode.util) split_enc_dec_inputs() (in module vllm.inputs.parse) split_gm (vllm.compilation.backends.VllmBackend attribute) split_graph() (in module vllm.compilation.backends) split_kv_cache() (vllm.attention.backends.ipex_attn.IpexAttnBackendImpl method) (vllm.attention.ops.hpu_paged_attn.HPUPagedAttention static method) (vllm.attention.ops.paged_attn.PagedAttention static method) split_num_cache_blocks_evenly() (in module vllm.spec_decode.spec_decode_worker) split_qkv() (vllm.model_executor.models.internlm2.InternLM2Attention method) (vllm.model_executor.models.qwen2_5_vl.Qwen2_5_VisionAttention method) (vllm.model_executor.models.qwen2_vl.Qwen2VisionAttention method) split_tensor_along_last_dim() (in module vllm.distributed.utils) split_up_gate_proj() (vllm.model_executor.models.bert_with_rope.GteModel method) split_zmq_path() (in module vllm.utils) SplitItem (class in vllm.compilation.backends) splitting_ops (vllm.config.CompilationConfig attribute) SQRT2 (in module vllm.model_executor.models.mlp_speculator) src (vllm.multimodal.base.MultiModalPlaceholderMap.IndexMap attribute) src_len (vllm.multimodal.base.MultiModalPlaceholderMap attribute) src_ranges (vllm.multimodal.base.MultiModalPlaceholderMap attribute) SSLCertRefresher (class in vllm.entrypoints.ssl) ssm_state (vllm.model_executor.models.mamba_cache.MambaCacheParams attribute) st_argmax() (in module vllm.model_executor.models.ovis) StablelmAttention (class in vllm.model_executor.models.stablelm) StablelmDecoderLayer (class in vllm.model_executor.models.stablelm) StableLMEpochModel (class in vllm.model_executor.models.stablelm) StablelmForCausalLM (class in vllm.model_executor.models.stablelm) StablelmMLP (class in vllm.model_executor.models.stablelm) StackAudioFrames (class in vllm.model_executor.models.ultravox) stacked_params_mapping (vllm.model_executor.models.bart.BartForConditionalGeneration attribute) stage (vllm.sequence.SequenceData property) STANDARD_QUANT_TYPES (in module vllm.model_executor.layers.quantization.gguf) Starcoder2Attention (class in vllm.model_executor.models.starcoder2) Starcoder2DecoderLayer (class in vllm.model_executor.models.starcoder2) Starcoder2ForCausalLM (class in vllm.model_executor.models.starcoder2) Starcoder2MLP (class in vllm.model_executor.models.starcoder2) Starcoder2Model (class in vllm.model_executor.models.starcoder2) start (vllm.entrypoints.openai.protocol.TranscriptionSegment attribute) (vllm.entrypoints.openai.protocol.TranscriptionWord attribute) start() (vllm.engine.multiprocessing.engine.MQLLMEngine method) (vllm.multimodal.processing.PromptIndexTargets static method) start_background_loop() (vllm.engine.async_llm_engine.AsyncLLMEngine method) START_DP_WAVE (vllm.v1.engine.EngineCoreRequestType attribute) start_idx (vllm.multimodal.processing.PlaceholderFeaturesInfo attribute) (vllm.multimodal.processing.PromptTargetMatch property) start_load_kv() (vllm.distributed.kv_transfer.kv_connector.v1.base.KVConnectorBase_V1 method) (vllm.distributed.kv_transfer.kv_connector.v1.lmcache_connector.LMCacheConnectorV1 method) (vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector.NixlConnector method) (vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector.NixlConnectorWorker method) (vllm.distributed.kv_transfer.kv_connector.v1.shared_storage_connector.SharedStorageConnector method) start_monitoring_torch_compile() (in module vllm.compilation.monitor) START_PROFILE (vllm.engine.multiprocessing.RPCUProfileRequest attribute) start_profile() (vllm.engine.async_llm_engine.AsyncLLMEngine method) (vllm.engine.llm_engine.LLMEngine method) (vllm.engine.multiprocessing.client.MQLLMEngineClient method) (vllm.engine.multiprocessing.engine.MQLLMEngine method) (vllm.engine.protocol.EngineClient method) (vllm.entrypoints.llm.LLM method) (vllm.executor.executor_base.ExecutorBase method) (vllm.spec_decode.spec_decode_worker.SpecDecodeWorker method) (vllm.v1.engine.async_llm.AsyncLLM method) (vllm.v1.engine.llm_engine.LLMEngine method) (vllm.worker.cpu_worker.CPUWorker method) (vllm.worker.hpu_worker.HPUWorker method) (vllm.worker.tpu_worker.TPUWorker method) (vllm.worker.worker.Worker method) start_token (vllm.reasoning.deepseek_r1_reasoning_parser.DeepSeekR1ReasoningParser attribute) start_token_id (vllm.reasoning.deepseek_r1_reasoning_parser.DeepSeekR1ReasoningParser attribute) start_wave (vllm.v1.engine.EngineCoreOutputs attribute) start_worker_execution_loop() (vllm.spec_decode.spec_decode_worker.SpecDecodeWorker method) (vllm.worker.worker_base.WorkerBase method) start_worker_monitor() (vllm.v1.executor.multiproc_executor.MultiprocExecutor method) starts (vllm.v1.attention.backends.mla.common.MLACommonPrefillMetadata.ChunkedContextMetadata attribute) STARTUP_POLL_PERIOD_MS (in module vllm.v1.engine.core_client) stat() (vllm.utils.LRUCache method) state (vllm.sequence.SequenceGroupMetadata attribute) (vllm.sequence.SequenceGroupMetadataDelta attribute) state_indices_tensor (vllm.model_executor.models.mamba_cache.MambaCacheParams attribute) (vllm.model_executor.models.minimax_cache.MinimaxCacheParams attribute) StatefulModelInput (class in vllm.worker.multi_step_model_runner) stateless_destroy_torch_distributed_process_group() (in module vllm.distributed.utils) stateless_init_dp_group() (vllm.config.ParallelConfig method) stateless_init_torch_distributed_process_group() (in module vllm.distributed.utils) StatelessProcessGroup (class in vllm.distributed.utils) static (vllm.compilation.fusion.QuantKey attribute) static_forward_context (vllm.config.CompilationConfig attribute) StatLoggerBase (class in vllm.engine.metrics_types) (class in vllm.v1.metrics.loggers) StatLoggerFactory (in module vllm.v1.metrics.loggers) Stats (class in vllm.engine.metrics_types) StatsEntry (in module vllm.profiler.layerwise_profile) status_code (vllm.entrypoints.openai.protocol.BatchResponseData attribute) std_e2el_ms (vllm.benchmarks.serve.BenchmarkMetrics attribute) std_itl_ms (vllm.benchmarks.serve.BenchmarkMetrics attribute) std_tpot_ms (vllm.benchmarks.serve.BenchmarkMetrics attribute) std_ttft_ms (vllm.benchmarks.serve.BenchmarkMetrics attribute) STEP (vllm.model_executor.layers.pooler.PoolingType attribute) step() (vllm.engine.llm_engine.LLMEngine method) (vllm.v1.engine.core.EngineCore method) (vllm.v1.engine.llm_engine.LLMEngine method) step_cuda_events (vllm.worker.multi_step_model_runner.StatefulModelInput attribute) step_index (vllm.sequence.CompletionSequenceGroupOutput attribute) step_tag_id (vllm.config.PoolerConfig attribute) step_with_batch_queue() (vllm.v1.engine.core.EngineCore method) StepPool (class in vllm.model_executor.layers.pooler) stop (vllm.entrypoints.openai.protocol.ChatCompletionRequest attribute) (vllm.entrypoints.openai.protocol.CompletionRequest attribute) (vllm.sampling_params.SamplingParams attribute) STOP (vllm.v1.engine.FinishReason attribute) stop() (vllm.entrypoints.ssl.SSLCertRefresher method) STOP_ITERATION (in module vllm.engine.async_llm_engine) STOP_PROFILE (vllm.engine.multiprocessing.RPCUProfileRequest attribute) stop_profile() (vllm.engine.async_llm_engine.AsyncLLMEngine method) (vllm.engine.llm_engine.LLMEngine method) (vllm.engine.multiprocessing.client.MQLLMEngineClient method) (vllm.engine.multiprocessing.engine.MQLLMEngine method) (vllm.engine.protocol.EngineClient method) (vllm.entrypoints.llm.LLM method) (vllm.executor.executor_base.ExecutorBase method) (vllm.spec_decode.spec_decode_worker.SpecDecodeWorker method) (vllm.v1.engine.async_llm.AsyncLLM method) (vllm.v1.engine.llm_engine.LLMEngine method) (vllm.worker.cpu_worker.CPUWorker method) (vllm.worker.hpu_worker.HPUWorker method) (vllm.worker.tpu_worker.TPUWorker method) (vllm.worker.worker.Worker method) stop_reason (vllm.beam_search.BeamSearchSequence attribute) (vllm.entrypoints.openai.protocol.ChatCompletionResponseChoice attribute) (vllm.entrypoints.openai.protocol.ChatCompletionResponseStreamChoice attribute) (vllm.entrypoints.openai.protocol.CompletionResponseChoice attribute) (vllm.entrypoints.openai.protocol.CompletionResponseStreamChoice attribute) (vllm.entrypoints.openai.protocol.TranscriptionResponseStreamChoice attribute) (vllm.outputs.CompletionOutput attribute) (vllm.v1.engine.EngineCoreOutput attribute) stop_remote_worker_execution_loop() (vllm.engine.llm_engine.LLMEngine method) (vllm.executor.executor_base.DistributedExecutorBase method) (vllm.executor.executor_base.ExecutorBase method) stop_remote_worker_execution_loop_async() (vllm.executor.executor_base.DistributedExecutorBase method) (vllm.executor.executor_base.ExecutorBase method) stop_token_ids (vllm.entrypoints.openai.protocol.ChatCompletionRequest attribute) (vllm.entrypoints.openai.protocol.CompletionRequest attribute) (vllm.sampling_params.SamplingParams attribute) StopChecker (class in vllm.engine.output_processor.stop_checker) STORAGE_DTYPE (vllm.model_executor.layers.quantization.bitblas.BitBLASConfig attribute) store (vllm.distributed.utils.StatelessProcessGroup attribute) StoreBoolean (class in vllm.utils) STR_BACKEND_ENV_VAR (in module vllm.utils) STR_DTYPE_TO_TORCH_DTYPE (in module vllm.utils) STR_DUAL_CHUNK_FLASH_ATTN_VAL (in module vllm.utils) STR_FLASH_ATTN_VAL (in module vllm.utils) STR_FLASHINFER_ATTN_VAL (in module vllm.utils) STR_INVALID_VAL (in module vllm.utils) STR_NOT_IMPL_ENC_DEC_BACKEND (in module vllm.utils) STR_NOT_IMPL_ENC_DEC_CHUNKED_PREFILL (in module vllm.utils) STR_NOT_IMPL_ENC_DEC_ERR_STRS (in module vllm.utils) STR_NOT_IMPL_ENC_DEC_LOGIT_SOFTCAP (in module vllm.utils) STR_NOT_IMPL_ENC_DEC_LORA (in module vllm.utils) STR_NOT_IMPL_ENC_DEC_MM (in module vllm.utils) STR_NOT_IMPL_ENC_DEC_PP (in module vllm.utils) STR_NOT_IMPL_ENC_DEC_PREFIX_CACHE (in module vllm.utils) STR_NOT_IMPL_ENC_DEC_PROMPT_ADAPTER (in module vllm.utils) STR_NOT_IMPL_ENC_DEC_ROCM_HIP (in module vllm.attention.backends.utils) STR_NOT_IMPL_ENC_DEC_SPEC_DEC (in module vllm.utils) STR_NOT_IMPL_ENC_DEC_SWA (in module vllm.utils) STR_ROCM_FLASH_ATTN_VAL (in module vllm.utils) STR_TORCH_SDPA_ATTN_VAL (in module vllm.utils) STR_XFORMERS_ATTN_VAL (in module vllm.utils) stream (vllm.distributed.parallel_state.GraphCaptureContext attribute) (vllm.entrypoints.openai.protocol.ChatCompletionRequest attribute) (vllm.entrypoints.openai.protocol.CompletionRequest attribute) (vllm.entrypoints.openai.protocol.TranscriptionRequest attribute) stream_continuous_usage_stats (vllm.entrypoints.openai.protocol.TranscriptionRequest attribute) stream_include_usage (vllm.entrypoints.openai.protocol.TranscriptionRequest attribute) stream_options (vllm.entrypoints.openai.protocol.ChatCompletionRequest attribute) (vllm.entrypoints.openai.protocol.CompletionRequest attribute) streaming (vllm.sequence.SequenceGroupBase attribute) StreamOptions (class in vllm.entrypoints.openai.protocol) strict (vllm.entrypoints.openai.protocol.JsonSchemaResponseFormat attribute) structural_tag (vllm.entrypoints.openai.protocol.ChatCompletionRequest attribute) (vllm.model_executor.guided_decoding.guided_fields.GuidedDecodingRequest attribute) (vllm.sampling_params.GuidedDecodingParams attribute) STRUCTURAL_TAG (vllm.v1.structured_output.backend_types.StructuredOutputOptions attribute) structural_tag_schema (vllm.entrypoints.openai.protocol.StructuralTag attribute) StructuralTag (class in vllm.entrypoints.openai.protocol) StructuralTagResponseFormat (class in vllm.entrypoints.openai.protocol) structured_decode() (vllm.v1.worker.tpu_model_runner.TPUModelRunner method) structured_output_key() (vllm.v1.structured_output.request.StructuredOutputRequest method) structured_output_request_ids (vllm.v1.core.sched.output.SchedulerOutput attribute) StructuredOutputBackend (class in vllm.v1.structured_output.backend_types) StructuredOutputGrammar (class in vllm.v1.structured_output.backend_types) StructuredOutputKey (in module vllm.v1.structured_output.backend_types) StructuredOutputManager (class in vllm.v1.structured_output) StructuredOutputOptions (class in vllm.v1.structured_output.backend_types) StructuredOutputRequest (class in vllm.v1.structured_output.request) structures (vllm.entrypoints.openai.protocol.StructuralTagResponseFormat attribute) submitted() (vllm.entrypoints.openai.run_batch.BatchProgressTracker method) submod_name (vllm.compilation.backends.SplitItem attribute) subparser_init() (vllm.entrypoints.cli.benchmark.base.BenchmarkSubcommandBase method) (vllm.entrypoints.cli.benchmark.main.BenchmarkSubcommand method) (vllm.entrypoints.cli.collect_env.CollectEnvSubcommand method) (vllm.entrypoints.cli.openai.ChatCommand method) (vllm.entrypoints.cli.openai.CompleteCommand method) (vllm.entrypoints.cli.serve.ServeSubcommand method) (vllm.entrypoints.cli.types.CLISubcommand method) subsequent_chunk_mask() (vllm.model_executor.models.minicpmo.MiniCPMO method) subtract_num_batched_tokens() (vllm.core.scheduler.SchedulingBudget method) subtract_num_seqs() (vllm.core.scheduler.SchedulingBudget method) subtuple() (in module vllm.worker.hpu_model_runner) success (vllm.benchmarks.endpoint_request_func.RequestFuncOutput attribute) SUCCESS (vllm.v1.executor.multiproc_executor.WorkerProc.ResponseStatus attribute) suffix (vllm.entrypoints.openai.protocol.CompletionRequest attribute) suffix_kv_lens (vllm.v1.attention.backends.flash_attn.FlashAttentionMetadata attribute) summarize_vllm_build_flags() (in module vllm.collect_env) SummaryStatsEntry (class in vllm.profiler.layerwise_profile) support_torch_compile() (in module vllm.compilation.decorators) SUPPORTED_DATASET_PATHS (vllm.benchmarks.datasets.AIMODataset attribute) (vllm.benchmarks.datasets.ConversationDataset attribute) (vllm.benchmarks.datasets.HuggingFaceDataset attribute) (vllm.benchmarks.datasets.InstructCoderDataset attribute) (vllm.benchmarks.datasets.NextEditPredictionDataset attribute) (vllm.benchmarks.datasets.VisionArenaDataset attribute) supported_dtypes (vllm.platforms.cpu.CpuPlatform property) (vllm.platforms.cuda.CudaPlatformBase property) (vllm.platforms.interface.Platform property) SUPPORTED_GPTQ_QUANT_TYPES (in module vllm.model_executor.layers.quantization.utils.quant_utils) SUPPORTED_GROUP_SIZES (in module vllm.model_executor.layers.quantization.utils.quant_utils) SUPPORTED_LAYOUTS (in module vllm.attention.ops.triton_flash_attention) SUPPORTED_QUANT_DTYPE_LIST (in module vllm.model_executor.layers.quantization.neuron_quant) SUPPORTED_QUANT_TYPES (vllm.model_executor.layers.quantization.kernels.mixed_precision.exllama.ExllamaLinearKernel attribute) supported_quantization (vllm.platforms.interface.Platform attribute) (vllm.platforms.neuron.NeuronPlatform attribute) (vllm.platforms.rocm.RocmPlatform attribute) (vllm.platforms.tpu.TpuPlatform attribute) supported_runner_types (vllm.config.ModelConfig property) SUPPORTED_STRATEGIES (in module vllm.model_executor.layers.quantization.compressed_tensors.schemes.compressed_tensors_w8a16_fp8) supports_cross_encoding (vllm.model_executor.models.interfaces.SupportsCrossEncoding attribute) supports_cross_encoding() (in module vllm.model_executor.models.interfaces) supports_custom_op() (in module vllm.utils) supports_cutlass_24() (vllm.model_executor.layers.quantization.compressed_tensors.compressed_tensors.CompressedTensorsConfig static method) supports_dynamo() (in module vllm.utils) supports_fp8() (vllm.platforms.cuda.CudaPlatformBase class method) (vllm.platforms.interface.Platform class method) (vllm.platforms.rocm.RocmPlatform class method) supports_gpu_multi_step() (vllm.spec_decode.draft_model_runner.TP1DraftModelRunner method) supports_kw() (in module vllm.utils) supports_lora (vllm.model_executor.models.interfaces.SupportsLoRA attribute) supports_lora() (in module vllm.model_executor.models.interfaces) supports_moe_ops (in module vllm._custom_ops) supports_multimodal (vllm.model_executor.models.interfaces.SupportsMultiModal attribute) supports_multimodal() (in module vllm.model_executor.models.interfaces) supports_mx() (vllm.platforms.interface.Platform class method) (vllm.platforms.rocm.RocmPlatform class method) supports_pp (vllm.model_executor.models.interfaces.SupportsPP attribute) supports_pp() (in module vllm.model_executor.models.interfaces) supports_transcription (vllm.model_executor.models.interfaces.SupportsTranscription attribute) supports_transcription() (in module vllm.model_executor.models.interfaces) supports_v0_only (vllm.model_executor.models.interfaces.SupportsV0Only attribute) supports_v0_only() (in module vllm.model_executor.models.interfaces) supports_v1() (vllm.platforms.cuda.CudaPlatformBase class method) (vllm.platforms.interface.Platform class method) (vllm.platforms.rocm.RocmPlatform class method) (vllm.platforms.tpu.TpuPlatform class method) SupportsCrossEncoding (class in vllm.model_executor.models.interfaces) SupportsHash (class in vllm.config) SupportsLoRA (class in vllm.model_executor.models.interfaces) SupportsMetricsInfo (class in vllm.config) SupportsMultiModal (class in vllm.model_executor.models.interfaces) SupportsPP (class in vllm.model_executor.models.interfaces) SupportsQuant (class in vllm.model_executor.models.interfaces) SupportsTranscription (class in vllm.model_executor.models.interfaces) SupportsV0Only (class in vllm.model_executor.models.interfaces) SWAP (vllm.core.scheduler.PreemptionMode attribute) swap() (vllm.core.block.cpu_gpu_block_allocator.CpuGpuBlockAllocator method) (vllm.core.block.interfaces.DeviceAwareBlockAllocator method) swap_blocks() (in module vllm._custom_ops) (vllm._ipex_ops.ipex_ops static method) (vllm.attention.backends.abstract.AttentionBackend static method) (vllm.attention.backends.blocksparse_attn.BlocksparseFlashAttentionBackend static method) (vllm.attention.backends.cpu_mla.CPUMLABackend static method) (vllm.attention.backends.flash_attn.FlashAttentionBackend static method) (vllm.attention.backends.flashinfer.FlashInferBackend static method) (vllm.attention.backends.hpu_attn.HPUAttentionBackend static method) (vllm.attention.backends.ipex_attn.IpexAttnBackend static method) (vllm.attention.backends.mla.common.MLACommonBackend static method) (vllm.attention.backends.pallas.PallasAttentionBackend static method) (vllm.attention.backends.placeholder_attn.PlaceholderAttentionBackend static method) (vllm.attention.backends.rocm_flash_attn.ROCmFlashAttentionBackend static method) (vllm.attention.backends.torch_sdpa.TorchSDPABackend static method) (vllm.attention.backends.xformers.XFormersBackend static method) (vllm.attention.ops.hpu_paged_attn.HPUPagedAttention static method) (vllm.attention.ops.paged_attn.PagedAttention static method) (vllm.v1.attention.backends.pallas.PallasAttentionBackend static method) swap_dict_values() (in module vllm.utils) swap_in() (vllm.core.block.interfaces.BlockAllocator method) (vllm.core.block.naive_block.NaiveBlockAllocator method) (vllm.core.block.prefix_caching_block.PrefixCachingBlockAllocator method) (vllm.core.block_manager.SelfAttnBlockSpaceManager method) (vllm.core.interfaces.BlockSpaceManager method) (vllm.core.placeholder_block_space_manager.PlaceholderBlockSpaceManager method) (vllm.worker.cache_engine.CacheEngine method) (vllm.worker.cpu_worker.CPUCacheEngine method) swap_out() (vllm.core.block.interfaces.BlockAllocator method) (vllm.core.block.naive_block.NaiveBlockAllocator method) (vllm.core.block.prefix_caching_block.PrefixCachingBlockAllocator method) (vllm.core.block_manager.SelfAttnBlockSpaceManager method) (vllm.core.interfaces.BlockSpaceManager method) (vllm.core.placeholder_block_space_manager.PlaceholderBlockSpaceManager method) (vllm.worker.cache_engine.CacheEngine method) (vllm.worker.cpu_worker.CPUCacheEngine method) swap_row() (vllm.v1.worker.block_table.BlockTable method) swap_space (vllm.config.CacheConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) swap_states() (vllm.v1.worker.gpu_input_batch.InputBatch method) SWAPPED (vllm.sequence.SequenceStatus attribute) swapped_out (vllm.core.scheduler.SchedulerRunningOutputs attribute) SwiGLUActivation (class in vllm.model_executor.models.jais) Swish (class in vllm.model_executor.models.phi4mm_utils) swizzle_blockscale() (vllm.model_executor.layers.quantization.modelopt.ModelOptNvFp4FusedMoE method) (vllm.model_executor.layers.quantization.modelopt.ModelOptNvFp4LinearMethod method) sym_tensor_indices (vllm.compilation.backends.VllmBackend attribute) symmetric (vllm.compilation.fusion.QuantKey attribute) sync_weight_attrs() (vllm.model_executor.layers.linear.QKVCrossParallelLinear method) SyncMPClient (class in vllm.v1.engine.core_client) system_efficiency (vllm.spec_decode.metrics.SpecDecodeWorkerMetrics attribute) SystemEnv (in module vllm.collect_env) T T (in module vllm.adapter_commons.models) (in module vllm.attention.backends.abstract) (in module vllm.attention.backends.mla.common) (in module vllm.config) (in module vllm.engine.arg_utils) (in module vllm.executor.multiproc_worker_utils) (in module vllm.model_executor.models.interfaces_base) (in module vllm.utils) (in module vllm.v1.utils) (in module vllm.worker.model_runner_base) t (vllm.worker.tpu_model_runner.ModelInputForTPU attribute) T5RelativeAttentionLogitBias (class in vllm.model_executor.models.phi4mm_utils) T_co (in module vllm.model_executor.models.interfaces_base) TablePrinter (class in vllm.profiler.utils) tag (vllm.device_allocator.cumem.AllocationData attribute) tags (vllm.engine.multiprocessing.RPCWakeUpRequest attribute) take_events() (vllm.v1.core.block_pool.BlockPool method) (vllm.v1.core.kv_cache_manager.KVCacheManager method) (vllm.v1.request.Request method) tanh() (in module vllm.attention.ops.triton_decode_attention) target (vllm.multimodal.processing.BoundPromptUpdate property) (vllm.multimodal.processing.PromptUpdate attribute) target_logits_indices (vllm.v1.spec_decode.metadata.SpecDecodeMetadata attribute) target_model_config (vllm.config.SpeculativeConfig attribute) target_modules (vllm.lora.peft_helper.PEFTHelper attribute) target_parallel_config (vllm.config.SpeculativeConfig attribute) TargetModelRunner (class in vllm.spec_decode.target_model_runner) TargetSeqId (in module vllm.spec_decode.batch_expansion) (in module vllm.spec_decode.mqa_scorer) task (vllm.config.ModelConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) TASK_HANDLERS (in module vllm.entrypoints.openai.api_server) task_id (vllm.executor.multiproc_worker_utils.Result attribute) TaskOption (in module vllm.config) TAttentionMetadata (in module vllm.attention.backends.utils) Telechat2Config (class in vllm.transformers_utils.configs.telechat2) TeleChat2ForCausalLM (class in vllm.model_executor.models.telechat2) TeleChat2Model (class in vllm.model_executor.models.telechat2) TeleFLMForCausalLM (class in vllm.model_executor.models.teleflm) TeleFLMModel (class in vllm.model_executor.models.teleflm) temp_dir (in module vllm.model_executor.model_loader.weight_utils) temperature (vllm.entrypoints.openai.protocol.ChatCompletionRequest attribute) (vllm.entrypoints.openai.protocol.CompletionRequest attribute) (vllm.entrypoints.openai.protocol.TranscriptionRequest attribute) (vllm.entrypoints.openai.protocol.TranscriptionSegment attribute) (vllm.sampling_params.BeamSearchParams attribute) (vllm.sampling_params.SamplingParams attribute) (vllm.v1.sample.metadata.SamplingMetadata attribute) (vllm.v1.sample.tpu.metadata.TPUSupportedSamplingMetadata attribute) temperatures (vllm.model_executor.sampling_metadata.SamplingTensors attribute) TENSOR (vllm.model_executor.layers.fused_moe.layer.FusedMoeWeightScaleSupported attribute) tensor_hash() (vllm.distributed.kv_transfer.kv_connector.mooncake_store_connector.MooncakeStoreConnector static method) (vllm.distributed.kv_transfer.kv_pipe.mooncake_pipe.MooncakePipe method) tensor_model_parallel_all_gather() (in module vllm.distributed.communication_op) tensor_model_parallel_all_reduce() (in module vllm.distributed.communication_op) tensor_model_parallel_gather() (in module vllm.distributed.communication_op) tensor_model_parallel_reduce_scatter() (in module vllm.distributed.communication_op) tensor_parallel() (vllm.model_executor.models.transformers.TransformersModel method) tensor_parallel_size (vllm.config.ParallelConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) tensorize_vllm_model() (in module vllm.model_executor.model_loader.tensorizer) TENSORIZER (vllm.config.LoadFormat attribute) tensorizer_uri (vllm.model_executor.model_loader.tensorizer.TensorizerArgs attribute) (vllm.model_executor.model_loader.tensorizer.TensorizerConfig attribute) tensorizer_weights_iterator() (in module vllm.model_executor.model_loader.tensorizer) TensorizerAgent (class in vllm.model_executor.model_loader.tensorizer) TensorizerArgs (class in vllm.model_executor.model_loader.tensorizer) TensorizerConfig (class in vllm.model_executor.model_loader.tensorizer) TensorizerLoader (class in vllm.model_executor.model_loader.tensorizer_loader) TensorMetadata (in module vllm.distributed.parallel_state) tensors (vllm.sequence.IntermediateTensors attribute) (vllm.v1.kv_cache_interface.KVCacheConfig attribute) terminate_if_errored() (in module vllm.entrypoints.launcher) terminate_worker() (vllm.executor.multiproc_worker_utils.ProcessWorkerWrapper method) terminated (vllm.v1.structured_output.backend_guidance.GuidanceGrammar attribute) text (vllm.beam_search.BeamSearchSequence attribute) (vllm.entrypoints.openai.protocol.CompletionResponseChoice attribute) (vllm.entrypoints.openai.protocol.CompletionResponseStreamChoice attribute) (vllm.entrypoints.openai.protocol.RerankDocument attribute) (vllm.entrypoints.openai.protocol.TranscriptionResponse attribute) (vllm.entrypoints.openai.protocol.TranscriptionResponseVerbose attribute) (vllm.entrypoints.openai.protocol.TranscriptionSegment attribute) (vllm.outputs.CompletionOutput attribute) text_1 (vllm.entrypoints.openai.protocol.ScoreRequest attribute) text_2 (vllm.entrypoints.openai.protocol.ScoreRequest attribute) text_offset (vllm.entrypoints.openai.protocol.CompletionLogProbs attribute) TextPrompt (class in vllm.inputs.data) TextTokensPrompt (class in vllm.entrypoints.openai.serving_engine) tgt_sizes (vllm.model_executor.models.minicpmv.MiniCPMVImagePixelInputs attribute) thinker_uses_mrope() (in module vllm.transformers_utils.config) tie_weights() (vllm.model_executor.layers.vocab_parallel_embedding.ParallelLMHead method) tile_scheduler_metadata (vllm.v1.attention.backends.mla.flashmla.FlashMLADecodeMetadata attribute) tile_tag (vllm.transformers_utils.configs.deepseek_vl2.DeepseekVLV2Config attribute) time_decode_requests (vllm.engine.metrics_types.Stats attribute) time_e2e_requests (vllm.engine.metrics_types.Stats attribute) time_in_queue (vllm.sequence.RequestMetrics attribute) time_in_queue_requests (vllm.engine.metrics_types.Stats attribute) time_inference_requests (vllm.engine.metrics_types.Stats attribute) time_per_output_tokens_iter (vllm.engine.metrics_types.Stats attribute) time_prefill_requests (vllm.engine.metrics_types.Stats attribute) time_queue_requests (vllm.engine.metrics_types.Stats attribute) time_to_first_tokens_iter (vllm.engine.metrics_types.Stats attribute) TIMEOUT_KEEP_ALIVE (in module vllm.entrypoints.api_server) (in module vllm.entrypoints.openai.api_server) Timer (class in vllm.spec_decode.util) (in module vllm.spec_decode.metrics) timestamp (vllm.utils.MemorySnapshot attribute) (vllm.v1.engine.EngineCoreEvent attribute) (vllm.v1.engine.EngineCoreOutputs attribute) timestamp_granularities (vllm.entrypoints.openai.protocol.TranscriptionRequest attribute) TModelInputForCPU (in module vllm.worker.cpu_model_runner) TModelInputForGPU (in module vllm.worker.model_runner) TModelInputForHPU (in module vllm.worker.hpu_model_runner) TModelInputForXPU (in module vllm.worker.xpu_model_runner) to_be_finished (vllm.sequence.SequenceGroupBase attribute) to_beam_search_params() (vllm.entrypoints.openai.protocol.ChatCompletionRequest method) (vllm.entrypoints.openai.protocol.CompletionRequest method) to_dict() (vllm.transformers_utils.configs.arctic.ArcticConfig method) to_enc_dec_tuple_list() (in module vllm.inputs.data) to_int() (vllm.platforms.interface.DeviceCapability method) to_pooling_params() (vllm.entrypoints.openai.protocol.ClassificationRequest method) (vllm.entrypoints.openai.protocol.EmbeddingChatRequest method) (vllm.entrypoints.openai.protocol.EmbeddingCompletionRequest method) (vllm.entrypoints.openai.protocol.RerankRequest method) (vllm.entrypoints.openai.protocol.ScoreRequest method) to_range() (vllm.multimodal.processing.PlaceholderFeaturesInfo method) to_sampling_params() (vllm.entrypoints.openai.protocol.ChatCompletionRequest method) (vllm.entrypoints.openai.protocol.CompletionRequest method) (vllm.entrypoints.openai.protocol.TranscriptionRequest method) token (vllm.entrypoints.openai.protocol.ChatCompletionLogProb attribute) token_bitmask (vllm.model_executor.guided_decoding.xgrammar_decoding.XGrammarLogitsProcessor attribute) token_budget (vllm.core.scheduler.SchedulingBudget attribute) token_chunk_size (vllm.core.scheduler.ScheduledSequenceGroup attribute) (vllm.sequence.SequenceGroupMetadata attribute) (vllm.sequence.SequenceGroupMetadataDelta attribute) token_id_dtype (vllm.model_executor.layers.spec_decode_base_sampler.SpecDecodeBaseSampler property) token_ids (vllm.core.block.cpu_gpu_block_allocator.NullBlock property) (vllm.core.block.interfaces.Block property) (vllm.core.block.naive_block.NaiveBlock property) (vllm.core.block.prefix_caching_block.PrefixCachingBlock property) (vllm.distributed.kv_events.BlockStored attribute) (vllm.distributed.kv_transfer.kv_connector.v1.shared_storage_connector.ReqMeta attribute) (vllm.outputs.CompletionOutput attribute) (vllm.spec_decode.interfaces.SpeculativeScores attribute) (vllm.v1.core.kv_cache_utils.BlockHashType attribute) (vllm.worker.tpu_model_runner.ModelInputForTPU attribute) token_indices_sorted_by_lora_ids (vllm.lora.ops.triton_ops.lora_kernel_metadata.LoRAKernelMeta attribute) token_inputs() (in module vllm.inputs.data) token_len (vllm.model_executor.models.ultravox.UltravoxAudioFeatureInputs attribute) token_logprobs (vllm.entrypoints.openai.protocol.CompletionLogProbs attribute) token_lora_indices (vllm.lora.punica_wrapper.punica_base.PunicaWrapperBase property) token_lora_mapping (vllm.lora.ops.triton_ops.lora_kernel_metadata.LoRAKernelMeta attribute) token_pooling (vllm.transformers_utils.configs.deepseek_vl2.MlpProjectorConfig attribute) token_type_ids (vllm.inputs.data.TokenInputs attribute) (vllm.inputs.data.TokensPrompt attribute) (vllm.multimodal.inputs.MultiModalInputs attribute) (vllm.sequence.Sequence property) (vllm.sequence.SequenceGroup property) (vllm.sequence.SequenceGroupMetadata attribute) (vllm.worker.cpu_model_runner.ModelInputForCPU attribute) token_types (vllm.worker.model_runner.ModelInputForGPU attribute) TokenId (in module vllm.spec_decode.batch_expansion) TokenInputs (class in vllm.inputs.data) tokenization() (in module vllm.entrypoints.openai.api_server) tokenize() (in module vllm.entrypoints.openai.api_server) (vllm.model_executor.models.ovis.VisualTokenizer method) tokenize_with_images() (vllm.transformers_utils.processors.deepseek_vl2.DeepseekVLV2Processor method) TokenizeChatRequest (class in vllm.entrypoints.openai.protocol) TokenizeCompletionRequest (class in vllm.entrypoints.openai.protocol) tokenizer (vllm.config.ModelConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) (vllm.engine.llm_engine.LLMEngine attribute) (vllm.entrypoints.openai.serving_engine.ServeContext attribute) (vllm.inputs.registry.InputProcessingContext attribute) (vllm.multimodal.processing.BoundPromptUpdate attribute) (vllm.v1.engine.logprobs.LogprobsProcessor attribute) tokenizer_class (vllm.transformers_utils.processors.deepseek_vl2.DeepseekVLV2Processor attribute) (vllm.transformers_utils.processors.ovis.OvisProcessor attribute) tokenizer_data (vllm.model_executor.guided_decoding.xgrammar_decoding.GrammarConfig attribute) tokenizer_hash (vllm.model_executor.guided_decoding.xgrammar_decoding.GrammarConfig attribute) tokenizer_info (vllm.model_executor.guided_decoding.xgrammar_decoding.XGrammarLogitsProcessor attribute) tokenizer_info() (vllm.model_executor.guided_decoding.xgrammar_decoding.GrammarConfig static method) tokenizer_mode (vllm.config.ModelConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) tokenizer_pool_config (vllm.config.ParallelConfig attribute) tokenizer_pool_extra_config (vllm.engine.arg_utils.EngineArgs attribute) tokenizer_pool_size (vllm.engine.arg_utils.EngineArgs attribute) tokenizer_pool_type (vllm.engine.arg_utils.EngineArgs attribute) tokenizer_revision (vllm.config.ModelConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) TokenizerBase (class in vllm.transformers_utils.tokenizer_base) TokenizerData (class in vllm.model_executor.guided_decoding.xgrammar_decoding) TokenizerDataCache (class in vllm.model_executor.guided_decoding.xgrammar_decoding) TokenizeRequest (in module vllm.entrypoints.openai.protocol) TokenizeResponse (class in vllm.entrypoints.openai.protocol) TokenizerGroup (class in vllm.transformers_utils.tokenizer_group) TokenizerMode (in module vllm.config) TokenizerPoolConfig (class in vllm.config) TokenizerRegistry (class in vllm.transformers_utils.tokenizer_base) tokens (vllm.beam_search.BeamSearchSequence attribute) (vllm.entrypoints.openai.protocol.CompletionLogProbs attribute) (vllm.entrypoints.openai.protocol.DetokenizeRequest attribute) (vllm.entrypoints.openai.protocol.TokenizeResponse attribute) (vllm.entrypoints.openai.protocol.TranscriptionSegment attribute) (vllm.multimodal.processing.PlaceholderFeaturesInfo attribute) TOKENS_BLOCK (in module vllm.lora.ops.xla_ops.pallas) TokensPrompt (class in vllm.inputs.data) tolists() (vllm.v1.outputs.LogprobsTensors method) tool_call_id (vllm.entrypoints.chat_utils.ConversationMessage attribute) (vllm.entrypoints.chat_utils.CustomChatCompletionMessageParam attribute) TOOL_CALL_REGEX (vllm.entrypoints.openai.tool_parsers.pythonic_tool_parser.PythonicToolParser attribute) tool_calls (vllm.entrypoints.chat_utils.ConversationMessage attribute) (vllm.entrypoints.chat_utils.CustomChatCompletionMessageParam attribute) (vllm.entrypoints.openai.protocol.ChatMessage attribute) (vllm.entrypoints.openai.protocol.DeltaMessage attribute) (vllm.entrypoints.openai.protocol.ExtractedToolCallInformation attribute) tool_choice (vllm.entrypoints.openai.protocol.ChatCompletionRequest attribute) tool_parsers (vllm.entrypoints.openai.tool_parsers.abstract_tool_parser.ToolParserManager attribute) ToolCall (class in vllm.entrypoints.openai.protocol) ToolParser (class in vllm.entrypoints.openai.tool_parsers.abstract_tool_parser) ToolParserManager (class in vllm.entrypoints.openai.tool_parsers.abstract_tool_parser) tools (vllm.entrypoints.openai.protocol.ChatCompletionRequest attribute) tools_called (vllm.entrypoints.openai.protocol.ExtractedToolCallInformation attribute) Top1Proposer (class in vllm.spec_decode.top1_proposer) top_k (vllm.entrypoints.openai.protocol.ChatCompletionRequest attribute) (vllm.entrypoints.openai.protocol.CompletionRequest attribute) (vllm.entrypoints.openai.protocol.TranscriptionRequest attribute) (vllm.sampling_params.SamplingParams attribute) (vllm.v1.sample.metadata.SamplingMetadata attribute) (vllm.v1.sample.tpu.metadata.TPUSupportedSamplingMetadata attribute) top_ks (vllm.model_executor.sampling_metadata.SamplingTensors attribute) top_logprobs (vllm.entrypoints.openai.protocol.ChatCompletionLogProbsContent attribute) (vllm.entrypoints.openai.protocol.ChatCompletionRequest attribute) (vllm.entrypoints.openai.protocol.CompletionLogProbs attribute) top_n (vllm.entrypoints.openai.protocol.RerankRequest attribute) top_p (vllm.entrypoints.openai.protocol.ChatCompletionRequest attribute) (vllm.entrypoints.openai.protocol.CompletionRequest attribute) (vllm.entrypoints.openai.protocol.TranscriptionRequest attribute) (vllm.sampling_params.SamplingParams attribute) (vllm.v1.sample.metadata.SamplingMetadata attribute) (vllm.v1.sample.tpu.metadata.TPUSupportedSamplingMetadata attribute) top_ps (vllm.model_executor.sampling_metadata.SamplingTensors attribute) topic (vllm.config.KVEventsConfig attribute) topk_softmax() (in module vllm._custom_ops) TopKTopPSampler (class in vllm.v1.sample.ops.topk_topp_sampler) Torch25CustomGraphPass (class in vllm.compilation.torch25_custom_graph_pass) TORCH_BITBLAS_STORAGE_DTYPE (vllm.model_executor.layers.quantization.gptq_bitblas.GPTQBitBLASConfig attribute) torch_channelwise_w8a8_scaled_mm() (in module vllm.model_executor.layers.quantization.utils.w8a8_utils) torch_compile_start_time (in module vllm.compilation.monitor) TORCH_DEVICE_IDENTITY (in module vllm.model_executor.layers.quantization.utils.w8a8_utils) TORCH_DTYPE (vllm.model_executor.layers.quantization.bitblas.BitBLASConfig attribute) (vllm.model_executor.layers.quantization.gptq_bitblas.GPTQBitBLASConfig attribute) TORCH_DTYPE_TO_NEURON_AMP (in module vllm.model_executor.model_loader.neuron) (in module vllm.model_executor.model_loader.neuronx_distributed) TORCH_DTYPE_TO_NUMPY_DTYPE (in module vllm.utils) torch_memory (vllm.utils.MemorySnapshot attribute) torch_peak (vllm.utils.MemorySnapshot attribute) torch_peak_increase (vllm.utils.MemoryProfilingResult attribute) torch_per_tensor_w8a8_scaled_mm() (in module vllm.model_executor.layers.quantization.utils.w8a8_utils) torch_per_token_w8a8_scaled_mm() (in module vllm.model_executor.layers.quantization.utils.w8a8_utils) TORCH_STORAGE_DTYPE (vllm.model_executor.layers.quantization.bitblas.BitBLASConfig attribute) torch_storage_dtype (vllm.model_executor.layers.quantization.gptq_bitblas.GPTQBitBLASConfig property) torch_vllm_inplace_fused_experts() (in module vllm.model_executor.layers.fused_moe.fused_moe) torch_vllm_outplace_fused_experts() (in module vllm.model_executor.layers.fused_moe.fused_moe) torchao_quantize_param_data() (in module vllm.model_executor.layers.quantization.torchao) TorchAOConfig (class in vllm.model_executor.layers.quantization.torchao) TorchAOLinearMethod (class in vllm.model_executor.layers.quantization.torchao) TorchCompileWrapperWithCustomDispatcher (class in vllm.compilation.wrapper) TorchSDPABackend (class in vllm.attention.backends.torch_sdpa) TorchSDPABackendImpl (class in vllm.attention.backends.torch_sdpa) TorchSDPAMetadata (class in vllm.attention.backends.torch_sdpa) TorchSDPAMetadataBuilder (class in vllm.attention.backends.torch_sdpa) total (vllm.utils.CacheInfo attribute) total_input (vllm.benchmarks.serve.BenchmarkMetrics attribute) total_num_scheduled_tokens (vllm.v1.core.sched.output.SchedulerOutput attribute) total_output (vllm.benchmarks.serve.BenchmarkMetrics attribute) total_token_throughput (vllm.benchmarks.serve.BenchmarkMetrics attribute) total_tokens (vllm.entrypoints.openai.protocol.RerankUsage attribute) (vllm.entrypoints.openai.protocol.UsageInfo attribute) touch() (vllm.utils.LRUCache method) (vllm.v1.core.block_pool.BlockPool method) tower_model (vllm.model_executor.models.module_mapping.MultiModelKeys attribute) TP1DraftModelRunner (class in vllm.spec_decode.draft_model_runner) tpot (vllm.benchmarks.endpoint_request_func.RequestFuncOutput attribute) TPU (vllm.platforms.interface.PlatformEnum attribute) tpu_platform_plugin() (in module vllm.platforms) TpuCommunicator (class in vllm.distributed.device_communicators.tpu_communicator) TPUInt8LinearMethod (class in vllm.model_executor.layers.quantization.tpu_int8) TPUModelRunner (class in vllm.v1.worker.tpu_model_runner) (class in vllm.worker.tpu_model_runner) TpuPlatform (class in vllm.platforms.tpu) TPUSupportedSamplingMetadata (class in vllm.v1.sample.tpu.metadata) TPUWorker (class in vllm.v1.worker.tpu_worker) (class in vllm.worker.tpu_worker) trace (vllm.profiler.layerwise_profile.ModelStatsEntry attribute) TRACE_HEADERS (in module vllm.tracing) trace_headers (vllm.engine.multiprocessing.RPCProcessRequest attribute) traced_files (vllm.config.CompilationConfig attribute) tracing_enabled (vllm.engine.multiprocessing.RPCStartupResponse attribute) track_batchsize (in module vllm.forward_context) transcription() (in module vllm.entrypoints.openai.api_server) transcription_stream_generator() (vllm.entrypoints.openai.serving_transcription.OpenAIServingTranscription method) TranscriptionRequest (class in vllm.entrypoints.openai.protocol) TranscriptionResponse (class in vllm.entrypoints.openai.protocol) TranscriptionResponseStreamChoice (class in vllm.entrypoints.openai.protocol) TranscriptionResponseVerbose (class in vllm.entrypoints.openai.protocol) TranscriptionSegment (class in vllm.entrypoints.openai.protocol) TranscriptionStreamResponse (class in vllm.entrypoints.openai.protocol) TranscriptionWord (class in vllm.entrypoints.openai.protocol) transfer_sync() (vllm.distributed.kv_transfer.kv_pipe.mooncake_pipe.MooncakeTransferEngine method) transform_block_tables_for_indirect_load() (in module vllm.attention.ops.nki_flash_attn) Transformer (class in vllm.model_executor.models.pixtral) TransformerBlock (class in vllm.model_executor.models.pixtral) (class in vllm.model_executor.models.qwen_vl) TransformerEncoderBase (class in vllm.model_executor.models.phi4mm_audio) TRANSFORMERS (vllm.config.ModelImpl attribute) TRANSFORMERS_NEURONX (vllm.platforms.neuron.NeuronFramework attribute) TransformersForCausalLM (class in vllm.model_executor.models.transformers) TransformersModel (class in vllm.model_executor.models.transformers) transpose_and_pad() (vllm.attention.ops.blocksparse_attention.interface.LocalStridedBlockSparseAttn static method) transpose_and_unpad() (vllm.attention.ops.blocksparse_attention.interface.LocalStridedBlockSparseAttn static method) transpose_for_scores() (vllm.model_executor.models.blip2.Blip2QFormerMultiHeadAttention method) transpose_p_local() (in module vllm.attention.ops.nki_flash_attn) triggers (vllm.entrypoints.openai.protocol.StructuralTagResponseFormat attribute) trim_attn_metadata() (vllm.worker.hpu_model_runner.HPUModelRunnerBase method) trim_string_back() (in module vllm.profiler.utils) trim_string_front() (in module vllm.profiler.utils) TRITON3 (in module vllm.model_executor.layers.mamba.ops.mamba_ssm) TRITON_22 (in module vllm.model_executor.layers.mamba.ops.ssd_chunk_scan) (in module vllm.model_executor.layers.mamba.ops.ssd_combined) triton_attention() (in module vllm.attention.ops.triton_flash_attention) triton_attention_rocm (in module vllm.attention.ops.triton_flash_attention) triton_scaled_mm() (in module vllm.model_executor.layers.quantization.compressed_tensors.triton_scaled_mm) TritonAttentionBackend (class in vllm.v1.attention.backends.triton_attn) TritonAttentionImpl (class in vllm.v1.attention.backends.triton_attn) TritonLanguagePlaceholder (class in vllm.triton_utils.importing) TritonMLABackend (class in vllm.attention.backends.triton_mla) (class in vllm.v1.attention.backends.mla.triton_mla) TritonMLAImpl (class in vllm.attention.backends.triton_mla) (class in vllm.v1.attention.backends.mla.triton_mla) TritonPlaceholder (class in vllm.triton_utils.importing) TritonScaledMMLinearKernel (class in vllm.model_executor.layers.quantization.kernels.scaled_mm.triton) truncate_prompt_tokens (vllm.entrypoints.openai.protocol.ChatCompletionRequest attribute) (vllm.entrypoints.openai.protocol.ClassificationRequest attribute) (vllm.entrypoints.openai.protocol.CompletionRequest attribute) (vllm.entrypoints.openai.protocol.EmbeddingChatRequest attribute) (vllm.entrypoints.openai.protocol.EmbeddingCompletionRequest attribute) (vllm.entrypoints.openai.protocol.RerankRequest attribute) (vllm.entrypoints.openai.protocol.ScoreRequest attribute) (vllm.entrypoints.openai.serving_engine.ServeContext attribute) (vllm.sampling_params.SamplingParams attribute) truncate_tool_call_ids() (in module vllm.transformers_utils.tokenizers.mistral) trust_remote_code (vllm.config.ModelConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) try_get_generation_config() (in module vllm.transformers_utils.config) (vllm.config.ModelConfig method) try_get_local_file() (in module vllm.transformers_utils.config) try_get_optimal_moe_config() (in module vllm.model_executor.layers.fused_moe.fused_moe) ts (vllm.distributed.kv_events.EventBatch attribute) ttft (vllm.benchmarks.endpoint_request_func.RequestFuncOutput attribute) type (vllm.entrypoints.chat_utils.ChatCompletionContentPartAudioParam attribute) (vllm.entrypoints.chat_utils.ChatCompletionContentPartImageEmbedsParam attribute) (vllm.entrypoints.chat_utils.ChatCompletionContentPartVideoParam attribute) (vllm.entrypoints.openai.protocol.ChatCompletionNamedToolChoiceParam attribute) (vllm.entrypoints.openai.protocol.ChatCompletionToolsParam attribute) (vllm.entrypoints.openai.protocol.DeltaToolCall attribute) (vllm.entrypoints.openai.protocol.ErrorResponse attribute) (vllm.entrypoints.openai.protocol.ResponseFormat attribute) (vllm.entrypoints.openai.protocol.StructuralTagResponseFormat attribute) (vllm.entrypoints.openai.protocol.ToolCall attribute) (vllm.inputs.data.EmbedsInputs attribute) (vllm.inputs.data.TokenInputs attribute) (vllm.inputs.parse.ParsedEmbedsPrompt attribute) (vllm.inputs.parse.ParsedStrPrompt attribute) (vllm.inputs.parse.ParsedTextPrompt attribute) (vllm.inputs.parse.ParsedTokensPrompt attribute) (vllm.model_executor.models.aya_vision.AyaVisionImagePixelInputs attribute) (vllm.model_executor.models.blip2.Blip2ImageEmbeddingInputs attribute) (vllm.model_executor.models.blip2.Blip2ImagePixelInputs attribute) (vllm.model_executor.models.chameleon.ChameleonImagePixelInputs attribute) (vllm.model_executor.models.deepseek_vl2.DeepseekVL2ImagePixelInputs attribute) (vllm.model_executor.models.deepseek_vl2.DeepseekVL2VImageEmbeddingInputs attribute) (vllm.model_executor.models.florence2.Florence2ImagePixelInputs attribute) (vllm.model_executor.models.fuyu.FuyuImagePatchInputs attribute) (vllm.model_executor.models.gemma3_mm.Gemma3ImagePixelInputs attribute) (vllm.model_executor.models.glm4v.GLMVImagePixelInputs attribute) (vllm.model_executor.models.idefics3.Idefics3ImageEmbeddingInputs attribute) (vllm.model_executor.models.idefics3.Idefics3ImagePixelInputs attribute) (vllm.model_executor.models.internvl.InternVLImageEmbeddingInputs attribute) (vllm.model_executor.models.internvl.InternVLImagePixelInputs attribute) (vllm.model_executor.models.kimi_vl.KimiVLImagePixelInputs attribute) (vllm.model_executor.models.llava.LlavaImageEmbeddingInputs attribute) (vllm.model_executor.models.llava.LlavaImagePixelInputs attribute) (vllm.model_executor.models.llava.PixtralHFImagePixelInputs attribute) (vllm.model_executor.models.llava_next.LlavaNextImageEmbeddingInputs attribute) (vllm.model_executor.models.llava_next.LlavaNextImagePixelInputs attribute) (vllm.model_executor.models.llava_next_video.LlavaNextVideoPixelInputs attribute) (vllm.model_executor.models.llava_onevision.LlavaOnevisionImageEmbeddingInputs attribute) (vllm.model_executor.models.llava_onevision.LlavaOnevisionImagePixelInputs attribute) (vllm.model_executor.models.llava_onevision.LlavaOnevisionVideoPixelInputs attribute) (vllm.model_executor.models.minicpmo.MiniCPMOAudioEmbeddingInputs attribute) (vllm.model_executor.models.minicpmo.MiniCPMOAudioFeatureInputs attribute) (vllm.model_executor.models.minicpmv.MiniCPMVImageEmbeddingInputs attribute) (vllm.model_executor.models.minicpmv.MiniCPMVImagePixelInputs attribute) (vllm.model_executor.models.minimax_vl_01.MiniMaxVL01ImageEmbeddingInputs attribute) (vllm.model_executor.models.minimax_vl_01.MiniMaxVL01ImagePixelInputs attribute) (vllm.model_executor.models.mistral3.Mistral3ImagePixelInputs attribute) (vllm.model_executor.models.mllama.MllamaImagePixelInputs attribute) (vllm.model_executor.models.mllama4.Llama4ImagePatchInputs attribute) (vllm.model_executor.models.ovis.OvisImagePatchInputs attribute) (vllm.model_executor.models.paligemma.PaliGemmaImageEmbeddingInputs attribute) (vllm.model_executor.models.paligemma.PaliGemmaImagePixelInputs attribute) (vllm.model_executor.models.phi3v.Phi3VImageEmbeddingInputs attribute) (vllm.model_executor.models.phi3v.Phi3VImagePixelInputs attribute) (vllm.model_executor.models.phi4mm.Phi4MMAudioEmbeddingInputs attribute) (vllm.model_executor.models.phi4mm.Phi4MMAudioFeatureInputs attribute) (vllm.model_executor.models.phi4mm.Phi4MMImageEmbeddingInputs attribute) (vllm.model_executor.models.phi4mm.Phi4MMImagePixelInputs attribute) (vllm.model_executor.models.pixtral.PixtralImagePixelInputs attribute) (vllm.model_executor.models.qwen2_5_vl.Qwen2_5_VLImageEmbeddingInputs attribute) (vllm.model_executor.models.qwen2_5_vl.Qwen2_5_VLImagePixelInputs attribute) (vllm.model_executor.models.qwen2_5_vl.Qwen2_5_VLVideoEmbeddingInputs attribute) (vllm.model_executor.models.qwen2_5_vl.Qwen2_5_VLVideoPixelInputs attribute) (vllm.model_executor.models.qwen2_vl.Qwen2VLImageEmbeddingInputs attribute) (vllm.model_executor.models.qwen2_vl.Qwen2VLImagePixelInputs attribute) (vllm.model_executor.models.qwen2_vl.Qwen2VLVideoEmbeddingInputs attribute) (vllm.model_executor.models.qwen2_vl.Qwen2VLVideoPixelInputs attribute) (vllm.model_executor.models.qwen_vl.QwenImageEmbeddingInputs attribute) (vllm.model_executor.models.qwen_vl.QwenImagePixelInputs attribute) (vllm.model_executor.models.skyworkr1v.SkyworkR1VImageEmbeddingInputs attribute) (vllm.model_executor.models.skyworkr1v.SkyworkR1VImagePixelInputs attribute) (vllm.model_executor.models.ultravox.UltravoxAudioEmbeddingInputs attribute) (vllm.model_executor.models.ultravox.UltravoxAudioFeatureInputs attribute) (vllm.multimodal.inputs.MultiModalInputs attribute) (vllm.v1.engine.EngineCoreEvent attribute) (vllm.v1.stats.common.RequestStatsUpdate attribute) type_id (vllm.v1.kv_cache_interface.FullAttentionSpec property) (vllm.v1.kv_cache_interface.KVCacheSpec property) (vllm.v1.kv_cache_interface.SlidingWindowSpec property) TYPE_MAP (vllm.model_executor.layers.quantization.awq_marlin.AWQMarlinConfig attribute) (vllm.model_executor.layers.quantization.gptq_bitblas.GPTQBitBLASConfig attribute) (vllm.model_executor.layers.quantization.gptq_marlin.GPTQMarlinConfig attribute) TypeHint (in module vllm.engine.arg_utils) TypeHintT (in module vllm.engine.arg_utils) TypicalAcceptanceSampler (class in vllm.model_executor.layers.typical_acceptance_sampler) U U (in module vllm.utils) uint() (vllm.scalar_type.ScalarType class method) uint2b2 (vllm.scalar_type.scalar_types attribute) uint3b4 (vllm.scalar_type.scalar_types attribute) uint4 (vllm.scalar_type.scalar_types attribute) uint4b8 (vllm.scalar_type.scalar_types attribute) uint8 (vllm.scalar_type.scalar_types attribute) uint8b128 (vllm.scalar_type.scalar_types attribute) UltravoxAudioEmbeddingInputs (class in vllm.model_executor.models.ultravox) UltravoxAudioFeatureInputs (class in vllm.model_executor.models.ultravox) UltravoxAudioInputs (in module vllm.model_executor.models.ultravox) UltravoxConfig (class in vllm.transformers_utils.configs.ultravox) UltravoxDummyInputsBuilder (class in vllm.model_executor.models.ultravox) UltravoxModel (class in vllm.model_executor.models.ultravox) UltravoxMultiModalProcessor (class in vllm.model_executor.models.ultravox) UltravoxProcessingInfo (class in vllm.model_executor.models.ultravox) UltravoxProjector (class in vllm.model_executor.models.ultravox) unfold_tensor() (in module vllm.model_executor.models.phi4mm_utils) unified_attention() (in module vllm.attention.layer) (in module vllm.attention.ops.triton_unified_attention) unified_attention_fake() (in module vllm.attention.layer) unified_attention_with_output() (in module vllm.attention.layer) unified_attention_with_output_fake() (in module vllm.attention.layer) unify_hybrid_kv_cache_specs() (in module vllm.v1.core.kv_cache_utils) unify_kv_cache_configs() (in module vllm.v1.core.kv_cache_utils) UNINITIALIZED (vllm.model_executor.layers.quantization.gptq.ExllamaState attribute) union_dict_and_str() (in module vllm.engine.arg_utils) UniProcExecutor (class in vllm.executor.uniproc_executor) (class in vllm.v1.executor.abstract) UniProcExecutorAsync (in module vllm.executor.uniproc_executor) UNKNOWN (vllm.platforms.interface.CpuArchEnum attribute) UNKNOWN_CONTEXT (vllm.usage.usage_lib.UsageContext attribute) unload_lora_adapter() (vllm.entrypoints.openai.serving_models.OpenAIServingModels method) UnloadLoRAAdapterRequest (class in vllm.entrypoints.openai.protocol) unmap_and_release() (in module vllm.device_allocator.cumem) unpack_4bit_u8() (vllm.model_executor.layers.quantization.hqq_marlin.HQQweightParameter method) unpack_cols() (in module vllm.model_executor.layers.quantization.utils.quant_utils) unpack_data() (vllm.model_executor.models.mllama.MllamaForConditionalGeneration method) unpack_gptq_qweight() (in module vllm.model_executor.layers.quantization.utils.bitblas_utils) unpack_gptq_qzeros() (in module vllm.model_executor.layers.quantization.utils.bitblas_utils) unpack_int_data() (in module vllm.model_executor.layers.quantization.aqlm) unpack_quantized_values_into_int32() (in module vllm.model_executor.layers.quantization.utils.quant_utils) UNQUANTIZED_TYPES (in module vllm.model_executor.layers.quantization.gguf) UnquantizedEmbeddingMethod (class in vllm.model_executor.layers.vocab_parallel_embedding) UnquantizedFusedMoEMethod (class in vllm.model_executor.layers.fused_moe.layer) UnquantizedLinearMethod (class in vllm.model_executor.layers.linear) UnreadyWorkerProcHandle (class in vllm.v1.executor.multiproc_executor) UNSPECIFIED (vllm.platforms.interface.PlatformEnum attribute) UnspecifiedPlatform (class in vllm.platforms.interface) UNUSED (vllm.model_executor.layers.quantization.gptq.ExllamaState attribute) unwrap_model() (in module vllm.worker.hpu_model_runner) update() (vllm.core.block.block_table.BlockTable method) (vllm.core.block.common.BlockList method) (vllm.core.evictor.Evictor method) (vllm.core.evictor.LRUEvictor method) (vllm.sequence.HiddenStates method) (vllm.v1.engine.detokenizer.BaseIncrementalDetokenizer method) (vllm.v1.engine.detokenizer.IncrementalDetokenizer method) update_cache() (vllm.model_executor.models.phi4mm_utils.CausalConv1D method) update_environment_variables() (in module vllm.utils) (vllm.worker.worker_base.WorkerWrapperBase method) update_freqs_cache() (vllm.model_executor.models.qwen2_5_vl.Qwen2_5_VisionRotaryEmbedding method) (vllm.model_executor.models.qwen2_vl.Qwen2VisionRotaryEmbedding method) update_from() (vllm.v1.stats.common.RequestStats method) update_from_events() (vllm.v1.metrics.stats.IterationStats method) update_from_finished_request() (vllm.v1.metrics.stats.IterationStats method) update_from_generation_config() (vllm.sampling_params.SamplingParams method) update_from_output() (vllm.v1.core.sched.interface.SchedulerInterface method) (vllm.v1.core.sched.scheduler.Scheduler method) (vllm.v1.engine.logprobs.LogprobsProcessor method) (vllm.v1.metrics.stats.IterationStats method) update_from_tokenizer() (vllm.sampling_params.SamplingParams method) update_iteration_stats() (vllm.v1.metrics.stats.LoRARequestStates method) update_last_access() (vllm.core.block.prefix_caching_block.LastAccessBlocksTracker method) update_metadata() (vllm.lora.punica_wrapper.punica_base.PunicaWrapperABC method) (vllm.lora.punica_wrapper.punica_base.PunicaWrapperBase method) (vllm.lora.punica_wrapper.punica_gpu.PunicaWrapperGPU method) update_num_cached_tokens() (vllm.sequence.SequenceData method) update_num_computed_tokens() (vllm.sequence.SequenceData method) (vllm.sequence.SequenceGroup method) update_seq_blocks_last_access() (vllm.core.block.prefix_caching_block.LastAccessBlocksTracker method) update_sizes_for_sequence_parallelism() (vllm.config.VllmConfig method) update_state_after_alloc() (vllm.distributed.kv_transfer.kv_connector.v1.base.KVConnectorBase_V1 method) (vllm.distributed.kv_transfer.kv_connector.v1.lmcache_connector.LMCacheConnectorV1 method) (vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector.NixlConnector method) (vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector.NixlConnectorScheduler method) (vllm.distributed.kv_transfer.kv_connector.v1.shared_storage_connector.SharedStorageConnector method) update_tensor_inplace() (in module vllm.model_executor.layers.quantization.utils.layer_utils) UpdateMode (class in vllm.multimodal.processing) upload_data() (in module vllm.entrypoints.openai.run_batch) url (vllm.assets.audio.AudioAsset property) (vllm.entrypoints.chat_utils.AudioURL attribute) (vllm.entrypoints.chat_utils.VideoURL attribute) (vllm.entrypoints.openai.protocol.BatchRequestInput attribute) usage (vllm.entrypoints.openai.protocol.ChatCompletionResponse attribute) (vllm.entrypoints.openai.protocol.ChatCompletionStreamResponse attribute) (vllm.entrypoints.openai.protocol.ClassificationResponse attribute) (vllm.entrypoints.openai.protocol.CompletionResponse attribute) (vllm.entrypoints.openai.protocol.CompletionStreamResponse attribute) (vllm.entrypoints.openai.protocol.EmbeddingResponse attribute) (vllm.entrypoints.openai.protocol.PoolingResponse attribute) (vllm.entrypoints.openai.protocol.RerankResponse attribute) (vllm.entrypoints.openai.protocol.ScoreResponse attribute) (vllm.entrypoints.openai.protocol.TranscriptionStreamResponse attribute) (vllm.utils.LRUCache property) (vllm.v1.core.kv_cache_manager.KVCacheManager property) usage_message (in module vllm.usage.usage_lib) UsageContext (class in vllm.usage.usage_lib) UsageInfo (class in vllm.entrypoints.openai.protocol) UsageMessage (class in vllm.usage.usage_lib) use_all_gather (vllm.lora.layers.LogitsProcessorWithLoRA property) use_all_gather() (vllm.platforms.interface.Platform class method) (vllm.platforms.neuron.NeuronPlatform class method) (vllm.platforms.tpu.TpuPlatform class method) use_async_output_proc (vllm.config.ModelConfig attribute) use_beam_search (vllm.entrypoints.openai.protocol.ChatCompletionRequest attribute) (vllm.entrypoints.openai.protocol.CompletionRequest attribute) use_cascade (vllm.v1.attention.backends.flash_attn.FlashAttentionMetadata attribute) (vllm.v1.attention.backends.flashinfer.FlashInferMetadata attribute) use_cascade_attention() (in module vllm.v1.attention.backends.flash_attn) (vllm.v1.attention.backends.flash_attn.FlashAttentionMetadataBuilder method) (vllm.v1.attention.backends.flashinfer.FlashInferMetadataBuilder method) (vllm.v1.attention.backends.mla.common.MLACommonMetadataBuilder method) (vllm.v1.attention.backends.triton_attn.TritonAttentionBackend static method) use_checkpoint (vllm.transformers_utils.configs.deepseek_vl2.VisionEncoderConfig attribute) use_cuda_graph (vllm.attention.backends.blocksparse_attn.BlocksparseFlashAttentionMetadata attribute) (vllm.attention.backends.flash_attn.FlashAttentionMetadata attribute) (vllm.attention.backends.flashinfer.FlashInferMetadata attribute) (vllm.attention.backends.mla.common.MLACommonMetadata attribute) (vllm.attention.backends.placeholder_attn.PlaceholderAttentionMetadata attribute) (vllm.attention.backends.rocm_flash_attn.ROCmFlashAttentionMetadata attribute) (vllm.attention.backends.xformers.XFormersMetadata attribute) use_cudagraph (vllm.compilation.backends.ConcreteSizeEntry attribute) (vllm.config.CompilationConfig attribute) use_custom_allreduce() (vllm.platforms.cuda.CudaPlatformBase class method) (vllm.platforms.interface.Platform class method) (vllm.platforms.rocm.RocmPlatform class method) use_device_communicator (vllm.distributed.parallel_state.GroupCoordinator attribute) use_dora (vllm.lora.peft_helper.PEFTHelper attribute) use_eagle() (vllm.config.SpeculativeConfig method) USE_FP32_REDUCE_DEFAULT (in module vllm.model_executor.layers.quantization.utils.marlin_utils) use_inductor (vllm.config.CompilationConfig attribute) use_memory_pool() (vllm.device_allocator.cumem.CuMemAllocator method) use_memory_pool_with_allocator() (in module vllm.device_allocator.cumem) use_mla (vllm.config.ModelConfig property) (vllm.v1.kv_cache_interface.AttentionSpec attribute) use_neuronx_distributed() (vllm.platforms.neuron.NeuronPlatform method) use_ray (vllm.config.ParallelConfig property) use_rocm_custom_paged_attention() (in module vllm.platforms.rocm) USE_ROWWISE_TORCH_SCALED_MM (in module vllm.model_executor.layers.quantization.utils.w8a8_utils) use_rslora (vllm.lora.peft_helper.PEFTHelper attribute) USE_SCHED_YIELD (in module vllm.distributed.device_communicators.shm_broadcast) use_structured_output (vllm.v1.request.Request property) use_tqdm_on_load (vllm.config.LoadConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) use_transformers_neuronx() (vllm.platforms.neuron.NeuronPlatform method) use_v2_block_manager (vllm.engine.arg_utils.EngineArgs attribute) user (vllm.entrypoints.openai.protocol.ChatCompletionRequest attribute) (vllm.entrypoints.openai.protocol.ClassificationRequest attribute) (vllm.entrypoints.openai.protocol.CompletionRequest attribute) (vllm.entrypoints.openai.protocol.EmbeddingChatRequest attribute) (vllm.entrypoints.openai.protocol.EmbeddingCompletionRequest attribute) uses_mrope (vllm.config.ModelConfig property) uses_mrope() (in module vllm.transformers_utils.config) uses_prompt_embeds() (vllm.sequence.SequenceGroup method) uses_ray (vllm.executor.executor_base.ExecutorBase attribute) (vllm.executor.mp_distributed_executor.MultiprocessingDistributedExecutor attribute) (vllm.executor.ray_distributed_executor.RayDistributedExecutor attribute) (vllm.executor.uniproc_executor.ExecutorWithExternalLauncher attribute) (vllm.executor.uniproc_executor.UniProcExecutor attribute) UTILITY (vllm.v1.engine.EngineCoreRequestType attribute) utility_output (vllm.v1.engine.EngineCoreOutputs attribute) UtilityOutput (class in vllm.v1.engine) uuid() (vllm.compilation.inductor_pass.CallableInductorPass method) (vllm.compilation.inductor_pass.InductorPass method) (vllm.compilation.pass_manager.PostGradPassManager method) (vllm.compilation.torch25_custom_graph_pass.Torch25CustomGraphPass method) (vllm.config.PassConfig method) V v_head_dim (vllm.attention.backends.utils.MLADims attribute) v_proj (vllm.model_executor.models.module_mapping.ModelKeys attribute) val2name() (vllm.model_executor.models.chameleon.ChameleonImageVocabularyMapping method) valid_kwargs (vllm.transformers_utils.processors.ovis.OvisProcessor attribute) VALID_MESSAGE_CONTENT_MM_PART_TYPES (in module vllm.entrypoints.chat_utils) validate() (vllm.entrypoints.cli.benchmark.main.BenchmarkSubcommand method) (vllm.entrypoints.cli.serve.ServeSubcommand method) (vllm.entrypoints.cli.types.CLISubcommand method) validate_alive() (vllm.v1.engine.core_client.BackgroundResources method) validate_args() (in module vllm.benchmarks.throughput) validate_chat_template() (in module vllm.entrypoints.chat_utils) validate_guidance_grammar() (in module vllm.v1.structured_output.backend_guidance) validate_json_request() (in module vllm.entrypoints.openai.api_server) validate_kv_cache_config() (vllm.model_executor.layers.quantization.quark.quark.QuarkKVCacheMethod static method) validate_kv_cache_scheme() (vllm.model_executor.layers.quantization.compressed_tensors.compressed_tensors.CompressedTensorsKVCacheMethod static method) validate_legal() (vllm.lora.peft_helper.PEFTHelper method) validate_output() (vllm.engine.llm_engine.LLMEngine class method) validate_outputs() (vllm.engine.llm_engine.LLMEngine class method) (vllm.v1.engine.llm_engine.LLMEngine class method) validate_parsed_serve_args() (in module vllm.entrypoints.openai.cli_args) validate_request() (vllm.platforms.interface.Platform class method) (vllm.platforms.tpu.TpuPlatform class method) validate_request_params() (in module vllm.transformers_utils.tokenizers.mistral) validate_stream_options() (vllm.entrypoints.openai.protocol.ChatCompletionRequest class method) (vllm.entrypoints.openai.protocol.CompletionRequest class method) validate_tokens() (vllm.v1.structured_output.backend_guidance.GuidanceGrammar method) (vllm.v1.structured_output.backend_types.StructuredOutputGrammar method) (vllm.v1.structured_output.backend_xgrammar.XgrammarGrammar method) validate_transcription_request() (vllm.entrypoints.openai.protocol.TranscriptionRequest class method) validate_xgrammar_grammar() (in module vllm.v1.structured_output.backend_xgrammar) value (vllm.executor.multiproc_worker_utils.Result attribute) (vllm.multimodal.processing.ProcessingCacheItem attribute) (vllm.multimodal.processing.ProcessingCacheOptionalItem attribute) (vllm.utils.AtomicCounter property) varlen (vllm.attention.ops.triton_flash_attention.MetaData attribute) varlen_attention() (vllm._ipex_ops.ipex_ops static method) varlen_attn() (vllm.attention.ops.blocksparse_attention.interface.LocalStridedBlockSparseAttn method) verify() (vllm.pooling_params.PoolingParams method) verify_async_output_proc() (vllm.config.ModelConfig method) verify_bitblas_supported() (in module vllm.model_executor.layers.quantization.utils.bitblas_utils) verify_bitblas_supports_shape() (in module vllm.model_executor.layers.quantization.utils.bitblas_utils) verify_dual_chunk_attention_config() (vllm.config.ModelConfig method) verify_hash (vllm.model_executor.model_loader.tensorizer.TensorizerArgs attribute) (vllm.model_executor.model_loader.tensorizer.TensorizerConfig attribute) verify_lora_support() (vllm.config.LoRAConfig method) verify_marlin_supported() (in module vllm.model_executor.layers.quantization.utils.marlin_utils) verify_marlin_supports_shape() (in module vllm.model_executor.layers.quantization.utils.marlin_utils) verify_model_arch() (vllm.platforms.interface.Platform class method) (vllm.platforms.rocm.RocmPlatform class method) verify_quantization() (vllm.platforms.interface.Platform class method) (vllm.platforms.rocm.RocmPlatform class method) verify_with_cache_config() (vllm.config.LoRAConfig method) verify_with_model_config() (vllm.config.LoRAConfig method) (vllm.config.PromptAdapterConfig method) (vllm.model_executor.model_loader.tensorizer.TensorizerConfig method) verify_with_parallel_config() (vllm.config.CacheConfig method) (vllm.config.ModelConfig method) (vllm.model_executor.model_loader.tensorizer.TensorizerConfig method) vert_stride (vllm.attention.backends.blocksparse_attn.BlocksparseParams attribute) video (vllm.multimodal.inputs.MultiModalDataBuiltins attribute) video_embeds (vllm.model_executor.models.qwen2_5_vl.Qwen2_5_VLVideoEmbeddingInputs attribute) (vllm.model_executor.models.qwen2_vl.Qwen2VLVideoEmbeddingInputs attribute) video_grid_thw (vllm.model_executor.models.qwen2_5_vl.Qwen2_5_VLVideoEmbeddingInputs attribute) (vllm.model_executor.models.qwen2_5_vl.Qwen2_5_VLVideoPixelInputs attribute) (vllm.model_executor.models.qwen2_vl.Qwen2VLVideoEmbeddingInputs attribute) (vllm.model_executor.models.qwen2_vl.Qwen2VLVideoPixelInputs attribute) video_pattern (vllm.model_executor.models.minicpmv.MiniCPMVProcessingInfo attribute) video_to_ndarrays() (in module vllm.assets.video) video_to_pil_images_list() (in module vllm.assets.video) video_token_index (vllm.model_executor.models.llava_onevision.LlavaOnevisionLikeConfig attribute) video_url (vllm.entrypoints.chat_utils.ChatCompletionContentPartVideoParam attribute) (vllm.entrypoints.chat_utils.CustomChatCompletionContentSimpleVideoParam attribute) VideoAsset (class in vllm.assets.video) VideoAssetName (in module vllm.assets.video) VideoEmbeddingItems (class in vllm.multimodal.parse) VideoItem (in module vllm.multimodal.inputs) VideoLoader (class in vllm.multimodal.video) VideoMediaIO (class in vllm.multimodal.video) VideoProcessorItems (class in vllm.multimodal.parse) VideoURL (class in vllm.entrypoints.chat_utils) virtual_engine (vllm.forward_context.ForwardContext attribute) (vllm.sequence.ExecuteModelRequest attribute) (vllm.worker.cpu_model_runner.ModelInputForCPU attribute) (vllm.worker.hpu_model_runner.ModelInputForHPU attribute) (vllm.worker.model_runner.ModelInputForGPU attribute) (vllm.worker.tpu_model_runner.ModelInputForTPU attribute) (vllm.worker.worker_base.WorkerInput attribute) (vllm.worker.xpu_model_runner.ModelInputForXPU attribute) vision_config (vllm.model_executor.models.llava.LlavaLikeConfig attribute) (vllm.model_executor.models.mistral3.LlavaLikeConfig attribute) (vllm.model_executor.models.vision.VisionLanguageConfig attribute) (vllm.transformers_utils.configs.deepseek_vl2.DeepseekVLV2Config attribute) VISION_ENCODER_TO_PROCESSING_CONFIG (in module vllm.model_executor.models.phi4mm) vision_feature_layer (vllm.model_executor.models.llava.LlavaLikeConfig attribute) (vllm.model_executor.models.mistral3.LlavaLikeConfig attribute) vision_feature_select_strategy (vllm.model_executor.models.llava.LlavaLikeConfig attribute) (vllm.model_executor.models.mistral3.LlavaLikeConfig attribute) VisionArenaDataset (class in vllm.benchmarks.datasets) VisionBackboneConfig (class in vllm.model_executor.models.molmo) VisionEncoderArgs (class in vllm.model_executor.models.pixtral) VisionEncoderConfig (class in vllm.transformers_utils.configs.deepseek_vl2) VisionEncoderInfo (class in vllm.model_executor.models.vision) VisionLanguageAdapter (class in vllm.model_executor.models.pixtral) VisionLanguageConfig (class in vllm.model_executor.models.vision) VisionTransformer (class in vllm.model_executor.models.molmo) (class in vllm.model_executor.models.pixtral) (class in vllm.model_executor.models.qwen_vl) VisualAttention (class in vllm.model_executor.models.qwen_vl) VisualAttentionBlock (class in vllm.model_executor.models.qwen_vl) VisualEmbedding (class in vllm.model_executor.models.ovis) VisualTokenizer (class in vllm.model_executor.models.ovis) VIT_LAYERS (in module vllm.model_executor.models.molmo) ViTMLP (class in vllm.model_executor.models.molmo) VL_VISION_ATTENTION_FUNCTIONS (in module vllm.model_executor.models.moonvit) vllm module VLLM (vllm.config.ModelImpl attribute) vllm._custom_ops module vllm._ipex_ops module vllm.adapter_commons module vllm.adapter_commons.layers module vllm.adapter_commons.models module vllm.adapter_commons.request module vllm.adapter_commons.utils module vllm.adapter_commons.worker_manager module vllm.assets module vllm.assets.audio module vllm.assets.base module vllm.assets.image module vllm.assets.video module vllm.attention module vllm.attention.backends module vllm.attention.backends.abstract module vllm.attention.backends.blocksparse_attn module vllm.attention.backends.cpu_mla module vllm.attention.backends.dual_chunk_flash_attn module vllm.attention.backends.flash_attn module vllm.attention.backends.flashinfer module vllm.attention.backends.flashmla module vllm.attention.backends.hpu_attn module vllm.attention.backends.ipex_attn module vllm.attention.backends.mla module vllm.attention.backends.mla.common module vllm.attention.backends.pallas module vllm.attention.backends.placeholder_attn module vllm.attention.backends.rocm_aiter_mla module vllm.attention.backends.rocm_flash_attn module vllm.attention.backends.torch_sdpa module vllm.attention.backends.triton_mla module vllm.attention.backends.utils module vllm.attention.backends.xformers module vllm.attention.layer module vllm.attention.ops module vllm.attention.ops.blocksparse_attention module vllm.attention.ops.blocksparse_attention.blocksparse_attention_kernel module vllm.attention.ops.blocksparse_attention.interface module vllm.attention.ops.blocksparse_attention.utils module vllm.attention.ops.chunked_prefill_paged_decode module vllm.attention.ops.flashmla module vllm.attention.ops.hpu_paged_attn module vllm.attention.ops.ipex_attn module vllm.attention.ops.merge_attn_states module vllm.attention.ops.nki_flash_attn module vllm.attention.ops.paged_attn module vllm.attention.ops.prefix_prefill module vllm.attention.ops.rocm_aiter_mla module vllm.attention.ops.rocm_aiter_paged_attn module vllm.attention.ops.triton_decode_attention module vllm.attention.ops.triton_flash_attention module vllm.attention.ops.triton_merge_attn_states module vllm.attention.ops.triton_unified_attention module vllm.attention.selector module vllm.beam_search module vllm.benchmarks module vllm.benchmarks.datasets module vllm.benchmarks.endpoint_request_func module vllm.benchmarks.latency module vllm.benchmarks.serve module vllm.benchmarks.throughput module vllm.benchmarks.utils module vllm.collect_env module vllm.compilation module vllm.compilation.activation_quant_fusion module vllm.compilation.backends module vllm.compilation.compiler_interface module vllm.compilation.counter module vllm.compilation.decorators module vllm.compilation.fix_functionalization module vllm.compilation.fusion module vllm.compilation.fx_utils module vllm.compilation.inductor_pass module vllm.compilation.monitor module vllm.compilation.multi_output_match module vllm.compilation.noop_elimination module vllm.compilation.pass_manager module vllm.compilation.sequence_parallelism module vllm.compilation.torch25_custom_graph_pass module vllm.compilation.vllm_inductor_pass module vllm.compilation.wrapper module vllm.config module vllm.connections module vllm.core module vllm.core.block module vllm.core.block.block_table module vllm.core.block.common module vllm.core.block.cpu_gpu_block_allocator module vllm.core.block.interfaces module vllm.core.block.naive_block module vllm.core.block.prefix_caching_block module vllm.core.block.utils module vllm.core.block_manager module vllm.core.evictor module vllm.core.interfaces module vllm.core.placeholder_block_space_manager module vllm.core.scheduler module vllm.device_allocator module vllm.device_allocator.cumem module vllm.distributed module vllm.distributed.communication_op module vllm.distributed.device_communicators module vllm.distributed.device_communicators.base_device_communicator module vllm.distributed.device_communicators.cpu_communicator module vllm.distributed.device_communicators.cuda_communicator module vllm.distributed.device_communicators.cuda_wrapper module vllm.distributed.device_communicators.custom_all_reduce module vllm.distributed.device_communicators.custom_all_reduce_utils module vllm.distributed.device_communicators.hpu_communicator module vllm.distributed.device_communicators.neuron_communicator module vllm.distributed.device_communicators.pynccl module vllm.distributed.device_communicators.pynccl_wrapper module vllm.distributed.device_communicators.shm_broadcast module vllm.distributed.device_communicators.tpu_communicator module vllm.distributed.device_communicators.xpu_communicator module vllm.distributed.kv_events module vllm.distributed.kv_transfer module vllm.distributed.kv_transfer.kv_connector module vllm.distributed.kv_transfer.kv_connector.base module vllm.distributed.kv_transfer.kv_connector.factory module vllm.distributed.kv_transfer.kv_connector.lmcache_connector module vllm.distributed.kv_transfer.kv_connector.mooncake_store_connector module vllm.distributed.kv_transfer.kv_connector.simple_connector module vllm.distributed.kv_transfer.kv_connector.utils module vllm.distributed.kv_transfer.kv_connector.v1 module vllm.distributed.kv_transfer.kv_connector.v1.base module vllm.distributed.kv_transfer.kv_connector.v1.lmcache_connector module vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector module vllm.distributed.kv_transfer.kv_connector.v1.shared_storage_connector module vllm.distributed.kv_transfer.kv_connector_agent module vllm.distributed.kv_transfer.kv_lookup_buffer module vllm.distributed.kv_transfer.kv_lookup_buffer.base module vllm.distributed.kv_transfer.kv_lookup_buffer.mooncake_store module vllm.distributed.kv_transfer.kv_lookup_buffer.simple_buffer module vllm.distributed.kv_transfer.kv_pipe module vllm.distributed.kv_transfer.kv_pipe.base module vllm.distributed.kv_transfer.kv_pipe.mooncake_pipe module vllm.distributed.kv_transfer.kv_pipe.pynccl_pipe module vllm.distributed.kv_transfer.kv_transfer_state module vllm.distributed.parallel_state module vllm.distributed.utils module vllm.engine module vllm.engine.arg_utils module vllm.engine.async_llm_engine module vllm.engine.async_timeout module vllm.engine.llm_engine module vllm.engine.metrics module vllm.engine.metrics_types module vllm.engine.multiprocessing module vllm.engine.multiprocessing.client module vllm.engine.multiprocessing.engine module vllm.engine.output_processor module vllm.engine.output_processor.interfaces module vllm.engine.output_processor.multi_step module vllm.engine.output_processor.single_step module vllm.engine.output_processor.stop_checker module vllm.engine.output_processor.util module vllm.engine.protocol module vllm.entrypoints module vllm.entrypoints.api_server module vllm.entrypoints.chat_utils module vllm.entrypoints.cli module vllm.entrypoints.cli.benchmark module vllm.entrypoints.cli.benchmark.base module vllm.entrypoints.cli.benchmark.latency module vllm.entrypoints.cli.benchmark.main module vllm.entrypoints.cli.benchmark.serve module vllm.entrypoints.cli.benchmark.throughput module vllm.entrypoints.cli.collect_env module vllm.entrypoints.cli.main module vllm.entrypoints.cli.openai module vllm.entrypoints.cli.serve module vllm.entrypoints.cli.types module vllm.entrypoints.launcher module vllm.entrypoints.llm module vllm.entrypoints.logger module vllm.entrypoints.openai module vllm.entrypoints.openai.api_server module vllm.entrypoints.openai.cli_args module vllm.entrypoints.openai.logits_processors module vllm.entrypoints.openai.protocol module vllm.entrypoints.openai.run_batch module vllm.entrypoints.openai.serving_chat module vllm.entrypoints.openai.serving_classification module vllm.entrypoints.openai.serving_completion module vllm.entrypoints.openai.serving_embedding module vllm.entrypoints.openai.serving_engine module vllm.entrypoints.openai.serving_models module vllm.entrypoints.openai.serving_pooling module vllm.entrypoints.openai.serving_score module vllm.entrypoints.openai.serving_tokenization module vllm.entrypoints.openai.serving_transcription module vllm.entrypoints.openai.tool_parsers module vllm.entrypoints.openai.tool_parsers.abstract_tool_parser module vllm.entrypoints.openai.tool_parsers.deepseekv3_tool_parser module vllm.entrypoints.openai.tool_parsers.granite_20b_fc_tool_parser module vllm.entrypoints.openai.tool_parsers.granite_tool_parser module vllm.entrypoints.openai.tool_parsers.hermes_tool_parser module vllm.entrypoints.openai.tool_parsers.internlm2_tool_parser module vllm.entrypoints.openai.tool_parsers.jamba_tool_parser module vllm.entrypoints.openai.tool_parsers.llama_tool_parser module vllm.entrypoints.openai.tool_parsers.mistral_tool_parser module vllm.entrypoints.openai.tool_parsers.phi4mini_tool_parser module vllm.entrypoints.openai.tool_parsers.pythonic_tool_parser module vllm.entrypoints.openai.tool_parsers.utils module vllm.entrypoints.score_utils module vllm.entrypoints.ssl module vllm.entrypoints.utils module vllm.env_override module vllm.envs module vllm.executor module vllm.executor.executor_base module vllm.executor.mp_distributed_executor module vllm.executor.msgspec_utils module vllm.executor.multiproc_worker_utils module vllm.executor.ray_distributed_executor module vllm.executor.ray_utils module vllm.executor.uniproc_executor module vllm.forward_context module vllm.inputs module vllm.inputs.data module vllm.inputs.parse module vllm.inputs.preprocess module vllm.inputs.registry module vllm.jsontree module vllm.logger module vllm.logging_utils module vllm.logging_utils.dump_input module vllm.logging_utils.formatter module vllm.logits_process module vllm.lora module vllm.lora.fully_sharded_layers module vllm.lora.layers module vllm.lora.lora module vllm.lora.models module vllm.lora.ops module vllm.lora.ops.torch_ops module vllm.lora.ops.torch_ops.lora_ops module vllm.lora.ops.triton_ops module vllm.lora.ops.triton_ops.kernel_utils module vllm.lora.ops.triton_ops.lora_expand_op module vllm.lora.ops.triton_ops.lora_kernel_metadata module vllm.lora.ops.triton_ops.lora_shrink_op module vllm.lora.ops.triton_ops.utils module vllm.lora.ops.xla_ops module vllm.lora.ops.xla_ops.lora_ops module vllm.lora.ops.xla_ops.pallas module vllm.lora.peft_helper module vllm.lora.punica_wrapper module vllm.lora.punica_wrapper.punica_base module vllm.lora.punica_wrapper.punica_cpu module vllm.lora.punica_wrapper.punica_gpu module vllm.lora.punica_wrapper.punica_hpu module vllm.lora.punica_wrapper.punica_selector module vllm.lora.punica_wrapper.punica_tpu module vllm.lora.punica_wrapper.utils module vllm.lora.request module vllm.lora.resolver module vllm.lora.utils module vllm.lora.worker_manager module vllm.model_executor module vllm.model_executor.custom_op module vllm.model_executor.guided_decoding module vllm.model_executor.guided_decoding.guidance_decoding module vllm.model_executor.guided_decoding.guidance_logits_processors module vllm.model_executor.guided_decoding.guided_fields module vllm.model_executor.guided_decoding.lm_format_enforcer_decoding module vllm.model_executor.guided_decoding.outlines_decoding module vllm.model_executor.guided_decoding.outlines_logits_processors module vllm.model_executor.guided_decoding.utils module vllm.model_executor.guided_decoding.xgrammar_decoding module vllm.model_executor.layers module vllm.model_executor.layers.activation module vllm.model_executor.layers.fused_moe module vllm.model_executor.layers.fused_moe.cutlass_moe module vllm.model_executor.layers.fused_moe.deep_gemm_moe module vllm.model_executor.layers.fused_moe.fused_marlin_moe module vllm.model_executor.layers.fused_moe.fused_moe module vllm.model_executor.layers.fused_moe.layer module vllm.model_executor.layers.fused_moe.moe_align_block_size module vllm.model_executor.layers.fused_moe.moe_pallas module vllm.model_executor.layers.fused_moe.moe_permute_unpermute module vllm.model_executor.layers.fused_moe.moe_torch_iterative module vllm.model_executor.layers.fused_moe.rocm_aiter_fused_moe module vllm.model_executor.layers.fused_moe.utils module vllm.model_executor.layers.layernorm module vllm.model_executor.layers.lightning_attn module vllm.model_executor.layers.linear module vllm.model_executor.layers.logits_processor module vllm.model_executor.layers.mamba module vllm.model_executor.layers.mamba.mamba2_metadata module vllm.model_executor.layers.mamba.mamba_mixer module vllm.model_executor.layers.mamba.mamba_mixer2 module vllm.model_executor.layers.mamba.ops module vllm.model_executor.layers.mamba.ops.causal_conv1d module vllm.model_executor.layers.mamba.ops.mamba_ssm module vllm.model_executor.layers.mamba.ops.ssd_bmm module vllm.model_executor.layers.mamba.ops.ssd_chunk_scan module vllm.model_executor.layers.mamba.ops.ssd_chunk_state module vllm.model_executor.layers.mamba.ops.ssd_combined module vllm.model_executor.layers.mamba.ops.ssd_state_passing module vllm.model_executor.layers.pooler module vllm.model_executor.layers.quantization module vllm.model_executor.layers.quantization.aqlm module vllm.model_executor.layers.quantization.awq module vllm.model_executor.layers.quantization.awq_marlin module vllm.model_executor.layers.quantization.awq_triton module vllm.model_executor.layers.quantization.base_config module vllm.model_executor.layers.quantization.bitblas module vllm.model_executor.layers.quantization.bitsandbytes module vllm.model_executor.layers.quantization.compressed_tensors module vllm.model_executor.layers.quantization.compressed_tensors.compressed_tensors module vllm.model_executor.layers.quantization.compressed_tensors.compressed_tensors_moe module vllm.model_executor.layers.quantization.compressed_tensors.schemes module vllm.model_executor.layers.quantization.compressed_tensors.schemes.compressed_tensors_24 module vllm.model_executor.layers.quantization.compressed_tensors.schemes.compressed_tensors_scheme module vllm.model_executor.layers.quantization.compressed_tensors.schemes.compressed_tensors_w4a16_24 module vllm.model_executor.layers.quantization.compressed_tensors.schemes.compressed_tensors_w4a16_nvfp4 module vllm.model_executor.layers.quantization.compressed_tensors.schemes.compressed_tensors_w8a16_fp8 module vllm.model_executor.layers.quantization.compressed_tensors.schemes.compressed_tensors_w8a8_fp8 module vllm.model_executor.layers.quantization.compressed_tensors.schemes.compressed_tensors_w8a8_int8 module vllm.model_executor.layers.quantization.compressed_tensors.schemes.compressed_tensors_wNa16 module vllm.model_executor.layers.quantization.compressed_tensors.triton_scaled_mm module vllm.model_executor.layers.quantization.compressed_tensors.utils module vllm.model_executor.layers.quantization.deepspeedfp module vllm.model_executor.layers.quantization.experts_int8 module vllm.model_executor.layers.quantization.fbgemm_fp8 module vllm.model_executor.layers.quantization.fp8 module vllm.model_executor.layers.quantization.gguf module vllm.model_executor.layers.quantization.gptq module vllm.model_executor.layers.quantization.gptq_bitblas module vllm.model_executor.layers.quantization.gptq_marlin module vllm.model_executor.layers.quantization.gptq_marlin_24 module vllm.model_executor.layers.quantization.hqq_marlin module vllm.model_executor.layers.quantization.ipex_quant module vllm.model_executor.layers.quantization.kernels module vllm.model_executor.layers.quantization.kernels.mixed_precision module vllm.model_executor.layers.quantization.kernels.mixed_precision.allspark module vllm.model_executor.layers.quantization.kernels.mixed_precision.bitblas module vllm.model_executor.layers.quantization.kernels.mixed_precision.exllama module vllm.model_executor.layers.quantization.kernels.mixed_precision.machete module vllm.model_executor.layers.quantization.kernels.mixed_precision.marlin module vllm.model_executor.layers.quantization.kernels.mixed_precision.MPLinearKernel module vllm.model_executor.layers.quantization.kernels.scaled_mm module vllm.model_executor.layers.quantization.kernels.scaled_mm.aiter module vllm.model_executor.layers.quantization.kernels.scaled_mm.cutlass module vllm.model_executor.layers.quantization.kernels.scaled_mm.ScaledMMLinearKernel module vllm.model_executor.layers.quantization.kernels.scaled_mm.triton module vllm.model_executor.layers.quantization.kernels.scaled_mm.xla module vllm.model_executor.layers.quantization.kv_cache module vllm.model_executor.layers.quantization.marlin module vllm.model_executor.layers.quantization.modelopt module vllm.model_executor.layers.quantization.moe_wna16 module vllm.model_executor.layers.quantization.neuron_quant module vllm.model_executor.layers.quantization.ptpc_fp8 module vllm.model_executor.layers.quantization.qqq module vllm.model_executor.layers.quantization.quark module vllm.model_executor.layers.quantization.quark.quark module vllm.model_executor.layers.quantization.quark.quark_moe module vllm.model_executor.layers.quantization.quark.schemes module vllm.model_executor.layers.quantization.quark.schemes.quark_scheme module vllm.model_executor.layers.quantization.quark.schemes.quark_w4a4_mxfp4 module vllm.model_executor.layers.quantization.quark.schemes.quark_w8a8_fp8 module vllm.model_executor.layers.quantization.quark.schemes.quark_w8a8_int8 module vllm.model_executor.layers.quantization.quark.utils module vllm.model_executor.layers.quantization.schema module vllm.model_executor.layers.quantization.torchao module vllm.model_executor.layers.quantization.tpu_int8 module vllm.model_executor.layers.quantization.utils module vllm.model_executor.layers.quantization.utils.allspark_utils module vllm.model_executor.layers.quantization.utils.bitblas_utils module vllm.model_executor.layers.quantization.utils.fp8_utils module vllm.model_executor.layers.quantization.utils.gptq_utils module vllm.model_executor.layers.quantization.utils.int8_utils module vllm.model_executor.layers.quantization.utils.layer_utils module vllm.model_executor.layers.quantization.utils.machete_utils module vllm.model_executor.layers.quantization.utils.marlin_utils module vllm.model_executor.layers.quantization.utils.marlin_utils_fp4 module vllm.model_executor.layers.quantization.utils.marlin_utils_fp8 module vllm.model_executor.layers.quantization.utils.marlin_utils_test module vllm.model_executor.layers.quantization.utils.marlin_utils_test_24 module vllm.model_executor.layers.quantization.utils.marlin_utils_test_qqq module vllm.model_executor.layers.quantization.utils.mxfp4_utils module vllm.model_executor.layers.quantization.utils.nvfp4_emulation_utils module vllm.model_executor.layers.quantization.utils.quant_utils module vllm.model_executor.layers.quantization.utils.w8a8_utils module vllm.model_executor.layers.rejection_sampler module vllm.model_executor.layers.resampler module vllm.model_executor.layers.rotary_embedding module vllm.model_executor.layers.sampler module vllm.model_executor.layers.spec_decode_base_sampler module vllm.model_executor.layers.typical_acceptance_sampler module vllm.model_executor.layers.utils module vllm.model_executor.layers.vocab_parallel_embedding module vllm.model_executor.model_loader module vllm.model_executor.model_loader.base_loader module vllm.model_executor.model_loader.bitsandbytes_loader module vllm.model_executor.model_loader.default_loader module vllm.model_executor.model_loader.dummy_loader module vllm.model_executor.model_loader.gguf_loader module vllm.model_executor.model_loader.neuron module vllm.model_executor.model_loader.neuronx_distributed module vllm.model_executor.model_loader.runai_streamer_loader module vllm.model_executor.model_loader.sharded_state_loader module vllm.model_executor.model_loader.tensorizer module vllm.model_executor.model_loader.tensorizer_loader module vllm.model_executor.model_loader.utils module vllm.model_executor.model_loader.weight_utils module vllm.model_executor.models module vllm.model_executor.models.adapters module vllm.model_executor.models.aimv2 module vllm.model_executor.models.arctic module vllm.model_executor.models.aria module vllm.model_executor.models.aya_vision module vllm.model_executor.models.baichuan module vllm.model_executor.models.bamba module vllm.model_executor.models.bart module vllm.model_executor.models.bert module vllm.model_executor.models.bert_with_rope module vllm.model_executor.models.blip module vllm.model_executor.models.blip2 module vllm.model_executor.models.bloom module vllm.model_executor.models.chameleon module vllm.model_executor.models.chatglm module vllm.model_executor.models.clip module vllm.model_executor.models.commandr module vllm.model_executor.models.constant_size_cache module vllm.model_executor.models.dbrx module vllm.model_executor.models.deepseek module vllm.model_executor.models.deepseek_mtp module vllm.model_executor.models.deepseek_v2 module vllm.model_executor.models.deepseek_vl2 module vllm.model_executor.models.eagle module vllm.model_executor.models.exaone module vllm.model_executor.models.fairseq2_llama module vllm.model_executor.models.falcon module vllm.model_executor.models.florence2 module vllm.model_executor.models.fuyu module vllm.model_executor.models.gemma module vllm.model_executor.models.gemma2 module vllm.model_executor.models.gemma3 module vllm.model_executor.models.gemma3_mm module vllm.model_executor.models.glm module vllm.model_executor.models.glm4 module vllm.model_executor.models.glm4v module vllm.model_executor.models.gpt2 module vllm.model_executor.models.gpt_bigcode module vllm.model_executor.models.gpt_j module vllm.model_executor.models.gpt_neox module vllm.model_executor.models.granite module vllm.model_executor.models.granite_speech module vllm.model_executor.models.granitemoe module vllm.model_executor.models.granitemoehybrid module vllm.model_executor.models.granitemoeshared module vllm.model_executor.models.gritlm module vllm.model_executor.models.grok1 module vllm.model_executor.models.h2ovl module vllm.model_executor.models.idefics2_vision_model module vllm.model_executor.models.idefics3 module vllm.model_executor.models.interfaces module vllm.model_executor.models.interfaces_base module vllm.model_executor.models.intern_vit module vllm.model_executor.models.internlm2 module vllm.model_executor.models.internlm2_ve module vllm.model_executor.models.internvl module vllm.model_executor.models.jais module vllm.model_executor.models.jamba module vllm.model_executor.models.kimi_vl module vllm.model_executor.models.llama module vllm.model_executor.models.llama4 module vllm.model_executor.models.llama_eagle module vllm.model_executor.models.llama_eagle3 module vllm.model_executor.models.llava module vllm.model_executor.models.llava_next module vllm.model_executor.models.llava_next_video module vllm.model_executor.models.llava_onevision module vllm.model_executor.models.mamba module vllm.model_executor.models.mamba2 module vllm.model_executor.models.mamba_cache module vllm.model_executor.models.medusa module vllm.model_executor.models.mimo module vllm.model_executor.models.mimo_mtp module vllm.model_executor.models.minicpm module vllm.model_executor.models.minicpm3 module vllm.model_executor.models.minicpmo module vllm.model_executor.models.minicpmv module vllm.model_executor.models.minimax_cache module vllm.model_executor.models.minimax_text_01 module vllm.model_executor.models.minimax_vl_01 module vllm.model_executor.models.mistral3 module vllm.model_executor.models.mixtral module vllm.model_executor.models.mixtral_quant module vllm.model_executor.models.mllama module vllm.model_executor.models.mllama4 module vllm.model_executor.models.mlp_speculator module vllm.model_executor.models.modernbert module vllm.model_executor.models.module_mapping module vllm.model_executor.models.molmo module vllm.model_executor.models.moonvit module vllm.model_executor.models.mpt module vllm.model_executor.models.nemotron module vllm.model_executor.models.nemotron_nas module vllm.model_executor.models.nvlm_d module vllm.model_executor.models.olmo module vllm.model_executor.models.olmo2 module vllm.model_executor.models.olmoe module vllm.model_executor.models.opt module vllm.model_executor.models.orion module vllm.model_executor.models.ovis module vllm.model_executor.models.paligemma module vllm.model_executor.models.persimmon module vllm.model_executor.models.phi module vllm.model_executor.models.phi3 module vllm.model_executor.models.phi3_small module vllm.model_executor.models.phi3v module vllm.model_executor.models.phi4mm module vllm.model_executor.models.phi4mm_audio module vllm.model_executor.models.phi4mm_utils module vllm.model_executor.models.phimoe module vllm.model_executor.models.pixtral module vllm.model_executor.models.plamo2 module vllm.model_executor.models.prithvi_geospatial_mae module vllm.model_executor.models.qwen module vllm.model_executor.models.qwen2 module vllm.model_executor.models.qwen2_5_omni_thinker module vllm.model_executor.models.qwen2_5_vl module vllm.model_executor.models.qwen2_audio module vllm.model_executor.models.qwen2_moe module vllm.model_executor.models.qwen2_rm module vllm.model_executor.models.qwen2_vl module vllm.model_executor.models.qwen3 module vllm.model_executor.models.qwen3_moe module vllm.model_executor.models.qwen_vl module vllm.model_executor.models.registry module vllm.model_executor.models.roberta module vllm.model_executor.models.siglip module vllm.model_executor.models.skyworkr1v module vllm.model_executor.models.smolvlm module vllm.model_executor.models.solar module vllm.model_executor.models.stablelm module vllm.model_executor.models.starcoder2 module vllm.model_executor.models.telechat2 module vllm.model_executor.models.teleflm module vllm.model_executor.models.transformers module vllm.model_executor.models.ultravox module vllm.model_executor.models.utils module vllm.model_executor.models.vision module vllm.model_executor.models.whisper module vllm.model_executor.models.zamba2 module vllm.model_executor.parameter module vllm.model_executor.pooling_metadata module vllm.model_executor.sampling_metadata module vllm.model_executor.utils module vllm.multimodal module vllm.multimodal.audio module vllm.multimodal.base module vllm.multimodal.hasher module vllm.multimodal.image module vllm.multimodal.inputs module vllm.multimodal.parse module vllm.multimodal.processing module vllm.multimodal.profiling module vllm.multimodal.registry module vllm.multimodal.utils module vllm.multimodal.video module vllm.outputs module vllm.platforms module vllm.platforms.cpu module vllm.platforms.cuda module vllm.platforms.hpu module vllm.platforms.interface module vllm.platforms.neuron module vllm.platforms.rocm module vllm.platforms.tpu module vllm.platforms.xpu module vllm.plugins module vllm.plugins.lora_resolvers module vllm.plugins.lora_resolvers.filesystem_resolver module vllm.pooling_params module vllm.profiler module vllm.profiler.layerwise_profile module vllm.profiler.utils module vllm.prompt_adapter module vllm.prompt_adapter.layers module vllm.prompt_adapter.models module vllm.prompt_adapter.request module vllm.prompt_adapter.utils module vllm.prompt_adapter.worker_manager module vllm.reasoning module vllm.reasoning.abs_reasoning_parsers module vllm.reasoning.deepseek_r1_reasoning_parser module vllm.reasoning.granite_reasoning_parser module vllm.reasoning.qwen3_reasoning_parser module vllm.sampling_params module vllm.scalar_type module vllm.scripts module vllm.sequence module vllm.spec_decode module vllm.spec_decode.batch_expansion module vllm.spec_decode.draft_model_runner module vllm.spec_decode.interfaces module vllm.spec_decode.medusa_worker module vllm.spec_decode.metrics module vllm.spec_decode.mlp_speculator_worker module vllm.spec_decode.mqa_scorer module vllm.spec_decode.multi_step_worker module vllm.spec_decode.ngram_worker module vllm.spec_decode.proposer_worker_base module vllm.spec_decode.smaller_tp_proposer_worker module vllm.spec_decode.spec_decode_worker module vllm.spec_decode.target_model_runner module vllm.spec_decode.top1_proposer module vllm.spec_decode.util module vllm.test_utils module vllm.tracing module vllm.transformers_utils module vllm.transformers_utils.chat_templates module vllm.transformers_utils.chat_templates.registry module vllm.transformers_utils.config module vllm.transformers_utils.configs module vllm.transformers_utils.configs.arctic module vllm.transformers_utils.configs.chatglm module vllm.transformers_utils.configs.cohere2 module vllm.transformers_utils.configs.dbrx module vllm.transformers_utils.configs.deepseek_vl2 module vllm.transformers_utils.configs.eagle module vllm.transformers_utils.configs.exaone module vllm.transformers_utils.configs.falcon module vllm.transformers_utils.configs.h2ovl module vllm.transformers_utils.configs.internvl module vllm.transformers_utils.configs.jais module vllm.transformers_utils.configs.kimi_vl module vllm.transformers_utils.configs.medusa module vllm.transformers_utils.configs.minimax_text_01 module vllm.transformers_utils.configs.minimax_vl_01 module vllm.transformers_utils.configs.mllama module vllm.transformers_utils.configs.mlp_speculator module vllm.transformers_utils.configs.moonvit module vllm.transformers_utils.configs.mpt module vllm.transformers_utils.configs.nemotron module vllm.transformers_utils.configs.nvlm_d module vllm.transformers_utils.configs.ovis module vllm.transformers_utils.configs.skyworkr1v module vllm.transformers_utils.configs.solar module vllm.transformers_utils.configs.telechat2 module vllm.transformers_utils.configs.ultravox module vllm.transformers_utils.detokenizer module vllm.transformers_utils.detokenizer_utils module vllm.transformers_utils.processor module vllm.transformers_utils.processors module vllm.transformers_utils.processors.deepseek_vl2 module vllm.transformers_utils.processors.ovis module vllm.transformers_utils.s3_utils module vllm.transformers_utils.tokenizer module vllm.transformers_utils.tokenizer_base module vllm.transformers_utils.tokenizer_group module vllm.transformers_utils.tokenizers module vllm.transformers_utils.tokenizers.mistral module vllm.transformers_utils.utils module vllm.triton_utils module vllm.triton_utils.importing module vllm.usage module vllm.usage.usage_lib module vllm.utils module vllm.v1 module vllm.v1.attention module vllm.v1.attention.backends module vllm.v1.attention.backends.flash_attn module vllm.v1.attention.backends.flashinfer module vllm.v1.attention.backends.mla module vllm.v1.attention.backends.mla.common module vllm.v1.attention.backends.mla.flashmla module vllm.v1.attention.backends.mla.rocm_aiter_mla module vllm.v1.attention.backends.mla.triton_mla module vllm.v1.attention.backends.pallas module vllm.v1.attention.backends.triton_attn module vllm.v1.attention.backends.utils module vllm.v1.core module vllm.v1.core.block_pool module vllm.v1.core.encoder_cache_manager module vllm.v1.core.kv_cache_manager module vllm.v1.core.kv_cache_utils module vllm.v1.core.sched module vllm.v1.core.sched.interface module vllm.v1.core.sched.output module vllm.v1.core.sched.scheduler module vllm.v1.core.sched.utils module vllm.v1.core.single_type_kv_cache_manager module vllm.v1.engine module vllm.v1.engine.async_llm module vllm.v1.engine.core module vllm.v1.engine.core_client module vllm.v1.engine.detokenizer module vllm.v1.engine.exceptions module vllm.v1.engine.llm_engine module vllm.v1.engine.logprobs module vllm.v1.engine.mm_input_cache module vllm.v1.engine.output_processor module vllm.v1.engine.parallel_sampling module vllm.v1.engine.processor module vllm.v1.executor module vllm.v1.executor.abstract module vllm.v1.executor.multiproc_executor module vllm.v1.executor.ray_distributed_executor module vllm.v1.kv_cache_interface module vllm.v1.metrics module vllm.v1.metrics.loggers module vllm.v1.metrics.stats module vllm.v1.outputs module vllm.v1.request module vllm.v1.sample module vllm.v1.sample.metadata module vllm.v1.sample.ops module vllm.v1.sample.ops.bad_words module vllm.v1.sample.ops.penalties module vllm.v1.sample.ops.topk_topp_sampler module vllm.v1.sample.rejection_sampler module vllm.v1.sample.sampler module vllm.v1.sample.tpu module vllm.v1.sample.tpu.metadata module vllm.v1.sample.tpu.sampler module vllm.v1.serial_utils module vllm.v1.spec_decode module vllm.v1.spec_decode.eagle module vllm.v1.spec_decode.metadata module vllm.v1.spec_decode.metrics module vllm.v1.spec_decode.ngram_proposer module vllm.v1.spec_decode.utils module vllm.v1.stats module vllm.v1.stats.common module vllm.v1.structured_output module vllm.v1.structured_output.backend_guidance module vllm.v1.structured_output.backend_types module vllm.v1.structured_output.backend_xgrammar module vllm.v1.structured_output.request module vllm.v1.structured_output.utils module vllm.v1.utils module vllm.v1.worker module vllm.v1.worker.block_table module vllm.v1.worker.gpu_input_batch module vllm.v1.worker.gpu_model_runner module vllm.v1.worker.gpu_worker module vllm.v1.worker.lora_model_runner_mixin module vllm.v1.worker.tpu_model_runner module vllm.v1.worker.tpu_worker module vllm.v1.worker.utils module vllm.v1.worker.worker_base module vllm.version module vllm.worker module vllm.worker.cache_engine module vllm.worker.cpu_enc_dec_model_runner module vllm.worker.cpu_model_runner module vllm.worker.cpu_pooling_model_runner module vllm.worker.cpu_worker module vllm.worker.enc_dec_model_runner module vllm.worker.hpu_model_runner module vllm.worker.hpu_worker module vllm.worker.model_runner module vllm.worker.model_runner_base module vllm.worker.multi_step_hpu_worker module vllm.worker.multi_step_model_runner module vllm.worker.multi_step_neuron_model_runner module vllm.worker.multi_step_neuronx_distributed_model_runner module vllm.worker.multi_step_tpu_worker module vllm.worker.multi_step_worker module vllm.worker.neuron_model_runner module vllm.worker.neuron_worker module vllm.worker.neuronx_distributed_model_runner module vllm.worker.pooling_model_runner module vllm.worker.tpu_model_runner module vllm.worker.tpu_worker module vllm.worker.utils module vllm.worker.worker module vllm.worker.worker_base module vllm.worker.xpu_model_runner module vllm.worker.xpu_worker module vllm_config (vllm.compilation.backends.VllmBackend attribute) VLLM_CONFIGURE_LOGGING (in module vllm.logger) vllm_flash_attention_forward() (in module vllm.model_executor.models.transformers) VLLM_INVALID_TOKEN_ID (in module vllm.sequence) vllm_lib (in module vllm.utils) VLLM_LOGGING_CONFIG_PATH (in module vllm.logger) VLLM_LOGGING_LEVEL (in module vllm.logger) VLLM_LOGGING_PREFIX (in module vllm.logger) vllm_long_context_scaling_factor (vllm.lora.peft_helper.PEFTHelper attribute) vllm_lora_scaling_factor (vllm.lora.peft_helper.PEFTHelper attribute) vllm_max_position_embeddings (vllm.lora.peft_helper.PEFTHelper attribute) VLLM_RINGBUFFER_WARNING_INTERVAL (in module vllm.distributed.device_communicators.shm_broadcast) VLLM_RPC_SUCCESS_STR (in module vllm.engine.multiprocessing) VLLM_S3_BUCKET_URL (in module vllm.assets.base) vllm_tensorized (vllm.model_executor.model_loader.tensorizer.TensorizerArgs attribute) (vllm.model_executor.model_loader.tensorizer.TensorizerConfig attribute) VLLM_TOKEN_ID_ARRAY_TYPE (in module vllm.sequence) vllm_topk_softmax() (in module vllm.model_executor.layers.fused_moe.fused_moe) vllm_version_matches_substr() (in module vllm.platforms) VllmBackend (class in vllm.compilation.backends) VllmConfig (class in vllm.config) VllmInductorPass (class in vllm.compilation.vllm_inductor_pass) VllmModel (class in vllm.model_executor.models.interfaces_base) VllmModelForPooling (class in vllm.model_executor.models.interfaces_base) VllmModelForTextGeneration (class in vllm.model_executor.models.interfaces_base) VLM_IMAGES_DIR (in module vllm.assets.image) vocab() (vllm.entrypoints.openai.tool_parsers.abstract_tool_parser.ToolParser method) (vllm.model_executor.models.molmo.MolmoProcessorWrapper method) (vllm.reasoning.abs_reasoning_parsers.ReasoningParser method) vocab_range_from_global_vocab_size() (in module vllm.model_executor.layers.vocab_parallel_embedding) vocab_range_from_per_partition_vocab_size() (in module vllm.model_executor.layers.vocab_parallel_embedding) vocab_size (vllm.lora.layers.LogitsProcessorWithLoRA property) (vllm.model_executor.models.plamo2.Plamo2Config attribute) (vllm.spec_decode.smaller_tp_proposer_worker.SmallerTpProposerWorker property) (vllm.transformers_utils.tokenizer_base.TokenizerBase property) (vllm.transformers_utils.tokenizers.mistral.MistralTokenizer property) (vllm.v1.structured_output.backend_guidance.GuidanceGrammar attribute) (vllm.v1.structured_output.backend_xgrammar.XgrammarGrammar attribute) (vllm.worker.cpu_model_runner.CPUModelRunnerBase property) (vllm.worker.cpu_worker.CPUWorker property) (vllm.worker.hpu_model_runner.HPUModelRunnerBase property) (vllm.worker.hpu_worker.HPUWorker property) (vllm.worker.model_runner.GPUModelRunnerBase property) (vllm.worker.multi_step_model_runner.MultiStepModelRunner property) (vllm.worker.neuron_model_runner.NeuronModelRunner property) (vllm.worker.worker.Worker property) (vllm.worker.worker_base.WorkerBase property) (vllm.worker.xpu_model_runner.XPUModelRunner property) VocabParallelEmbedding (class in vllm.model_executor.layers.vocab_parallel_embedding) VocabParallelEmbeddingShardIndices (class in vllm.model_executor.layers.vocab_parallel_embedding) VocabParallelEmbeddingWithLoRA (class in vllm.lora.layers) VocabParallelEmbeddingWithPromptAdapter (class in vllm.prompt_adapter.layers) W W4A16SPARSE24_SUPPORTED_BITS (in module vllm.model_executor.layers.quantization.compressed_tensors.schemes.compressed_tensors_w4a16_24) W4A16SPARSE24_SUPPORTED_TYPES_MAP (in module vllm.model_executor.layers.quantization.compressed_tensors.schemes.compressed_tensors_w4a16_24) w8a8_block_fp8_matmul() (in module vllm.model_executor.layers.quantization.utils.fp8_utils) w8a8_block_int8_matmul() (in module vllm.model_executor.layers.quantization.utils.int8_utils) wait_for_ack() (vllm.distributed.kv_transfer.kv_pipe.mooncake_pipe.MooncakeTransferEngine method) wait_for_kv_layer_from_connector() (in module vllm.attention.layer) wait_for_layer_load() (vllm.distributed.kv_transfer.kv_connector.v1.base.KVConnectorBase_V1 method) (vllm.distributed.kv_transfer.kv_connector.v1.lmcache_connector.LMCacheConnectorV1 method) (vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector.NixlConnector method) (vllm.distributed.kv_transfer.kv_connector.v1.shared_storage_connector.SharedStorageConnector method) wait_for_new_requests() (vllm.engine.async_llm_engine.RequestTracker method) wait_for_ready() (vllm.v1.executor.multiproc_executor.WorkerProc static method) wait_for_save() (vllm.distributed.kv_transfer.kv_connector.v1.base.KVConnectorBase_V1 method) (vllm.distributed.kv_transfer.kv_connector.v1.lmcache_connector.LMCacheConnectorV1 method) (vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector.NixlConnector method) (vllm.distributed.kv_transfer.kv_connector.v1.shared_storage_connector.SharedStorageConnector method) wait_previous_step() (vllm.worker.multi_step_model_runner.StatefulModelInput method) wait_until_ready() (vllm.distributed.device_communicators.shm_broadcast.MessageQueue method) WAITING (vllm.sequence.SequenceStatus attribute) (vllm.v1.request.RequestStatus attribute) WAITING_FOR_FSM (vllm.v1.request.RequestStatus attribute) WAITING_FOR_REMOTE_KVS (vllm.v1.request.RequestStatus attribute) waiting_lora_adapters (vllm.engine.metrics_types.Stats attribute) waiting_requests (vllm.v1.metrics.stats.LoRAStats attribute) wake_up() (vllm.device_allocator.cumem.CuMemAllocator method) (vllm.engine.async_llm_engine.AsyncLLMEngine method) (vllm.engine.llm_engine.LLMEngine method) (vllm.engine.multiprocessing.client.MQLLMEngineClient method) (vllm.engine.multiprocessing.engine.MQLLMEngine method) (vllm.engine.protocol.EngineClient method) (vllm.entrypoints.llm.LLM method) (vllm.executor.executor_base.ExecutorBase method) (vllm.v1.engine.async_llm.AsyncLLM method) (vllm.v1.engine.core.EngineCore method) (vllm.v1.engine.core_client.EngineCoreClient method) (vllm.v1.engine.core_client.InprocClient method) (vllm.v1.engine.core_client.SyncMPClient method) (vllm.v1.engine.llm_engine.LLMEngine method) (vllm.v1.worker.gpu_worker.Worker method) (vllm.worker.worker.Worker method) wake_up_async() (vllm.v1.engine.core_client.AsyncMPClient method) (vllm.v1.engine.core_client.EngineCoreClient method) warmup_all_buckets() (vllm.worker.hpu_model_runner.HPUModelRunnerBase method) warmup_graphs() (vllm.worker.hpu_model_runner.HPUModelRunnerBase method) warmup_model() (vllm.worker.hpu_model_runner.HPUModelRunnerBase method) (vllm.worker.tpu_model_runner.TPUModelRunner method) warmup_scenario() (vllm.worker.hpu_model_runner.HPUModelRunnerBase method) warn_for_unimplemented_methods() (in module vllm.utils) watchdog_loop() (in module vllm.entrypoints.launcher) wave_complete (vllm.v1.engine.EngineCoreOutputs attribute) weak_bind() (in module vllm.utils) weak_ref_tensor() (in module vllm.utils) weak_ref_tensors() (in module vllm.utils) weight (vllm.lora.layers.BaseLinearLayerWithLoRA property) (vllm.lora.layers.VocabParallelEmbeddingWithLoRA property) weight_direct_load() (vllm.model_executor.models.minimax_text_01.MiniMaxText01LinearAttention static method) weight_init (vllm.transformers_utils.configs.deepseek_vl2.VisionEncoderConfig attribute) weight_loader (vllm.model_executor.parameter.BasevLLMParameter property) weight_loader() (vllm.model_executor.layers.activation.ScaledActivation method) (vllm.model_executor.layers.fused_moe.layer.FusedMoE method) (vllm.model_executor.layers.linear.ColumnParallelLinear method) (vllm.model_executor.layers.linear.MergedColumnParallelLinear method) (vllm.model_executor.layers.linear.QKVCrossParallelLinear method) (vllm.model_executor.layers.linear.QKVParallelLinear method) (vllm.model_executor.layers.linear.ReplicatedLinear method) (vllm.model_executor.layers.linear.RowParallelLinear method) (vllm.model_executor.layers.vocab_parallel_embedding.VocabParallelEmbedding method) (vllm.model_executor.models.arctic.ArcticMoE method) (vllm.model_executor.models.aria.AriaFusedMoE method) (vllm.model_executor.models.dbrx.DbrxExperts method) (vllm.model_executor.models.minicpm.MiniCPMMoE method) (vllm.model_executor.models.minimax_text_01.MiniMaxText01RMSNormTP static method) (vllm.model_executor.models.phi3_small.HeadMajorColumnParallelLinear method) (vllm.model_executor.models.phi3_small.HeadMajorQKVParallelLinear method) weight_loader_v2() (vllm.model_executor.layers.linear.ColumnParallelLinear method) (vllm.model_executor.layers.linear.MergedColumnParallelLinear method) (vllm.model_executor.layers.linear.QKVParallelLinear method) (vllm.model_executor.layers.linear.RowParallelLinear method) WEIGHT_LOADER_V2_SUPPORTED (in module vllm.model_executor.layers.linear) weight_loader_with_alias() (in module vllm.model_executor.models.minimax_text_01) weight_type (vllm.model_executor.layers.quantization.kernels.mixed_precision.MPLinearKernel.MPLinearLayerConfig attribute) weights_memory (vllm.utils.MemoryProfilingResult attribute) WEIGHTS_NAME (in module vllm.prompt_adapter.utils) WeightsMapper (class in vllm.model_executor.models.utils) WeightsMapping (in module vllm.model_executor.models.utils) WhisperAttention (class in vllm.model_executor.models.whisper) WhisperAudioInputs (class in vllm.model_executor.models.whisper) WhisperCrossAttention (class in vllm.model_executor.models.whisper) WhisperDecoder (class in vllm.model_executor.models.whisper) WhisperDecoderLayer (class in vllm.model_executor.models.whisper) WhisperDummyInputsBuilder (class in vllm.model_executor.models.whisper) WhisperEncoder (class in vllm.model_executor.models.whisper) WhisperEncoderLayer (class in vllm.model_executor.models.whisper) WhisperForConditionalGeneration (class in vllm.model_executor.models.whisper) WhisperMLP (class in vllm.model_executor.models.whisper) WhisperModel (class in vllm.model_executor.models.whisper) WhisperMultiModalProcessor (class in vllm.model_executor.models.whisper) WhisperPositionalEmbedding (class in vllm.model_executor.models.whisper) WhisperProcessingInfo (class in vllm.model_executor.models.whisper) whitespace_pattern (vllm.sampling_params.GuidedDecodingParams attribute) width (vllm.model_executor.models.kimi_vl.MaxImageTokenMeta attribute) (vllm.multimodal.parse.ImageSize attribute) (vllm.transformers_utils.configs.deepseek_vl2.VisionEncoderConfig attribute) window_left (vllm.attention.backends.flashinfer.FlashInferMetadata attribute) (vllm.attention.backends.flashinfer.PerLayerParameters attribute) (vllm.v1.attention.backends.flashinfer.PerLayerParameters attribute) window_partition() (in module vllm.model_executor.models.florence2) window_reverse() (in module vllm.model_executor.models.florence2) WindowAttention (class in vllm.model_executor.models.florence2) WindowQformer (class in vllm.model_executor.models.phi4mm_audio) with_amdsmi_context() (in module vllm.platforms.rocm) with_cancellation() (in module vllm.entrypoints.utils) with_hf_config() (vllm.config.VllmConfig method) with_nvml_context() (in module vllm.platforms.cuda) with_retry() (in module vllm.transformers_utils.config) WNA16_SUPPORTED_BITS (in module vllm.model_executor.layers.quantization.compressed_tensors.schemes.compressed_tensors_wNa16) WNA16_SUPPORTED_TYPES_MAP (in module vllm.model_executor.layers.quantization.compressed_tensors.schemes.compressed_tensors_wNa16) WNA16_ZP_SUPPORTED_TYPES_MAP (in module vllm.model_executor.layers.quantization.compressed_tensors.schemes.compressed_tensors_wNa16) word (vllm.entrypoints.openai.protocol.TranscriptionWord attribute) words (vllm.entrypoints.openai.protocol.TranscriptionResponseVerbose attribute) Worker (class in vllm.v1.worker.gpu_worker) (class in vllm.worker.worker) WORKER (vllm.distributed.kv_transfer.kv_connector.v1.base.KVConnectorRole attribute) worker (vllm.executor.ray_distributed_executor.RayWorkerMetaData attribute) (vllm.worker.worker_base.DelegateWorkerBase attribute) worker_busy_loop() (vllm.v1.executor.multiproc_executor.WorkerProc method) worker_cls (vllm.config.ParallelConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) worker_extension_cls (vllm.config.ParallelConfig attribute) (vllm.engine.arg_utils.EngineArgs attribute) worker_input (vllm.worker.multi_step_worker.MultiStepState attribute) worker_main() (vllm.v1.executor.multiproc_executor.WorkerProc static method) worker_response_mq (vllm.v1.executor.multiproc_executor.WorkerProcHandle attribute) WORKER_SPECIFIC_ENV_VARS (vllm.executor.ray_distributed_executor.RayDistributedExecutor attribute) WorkerBase (class in vllm.v1.worker.worker_base) (class in vllm.worker.worker_base) WorkerInput (class in vllm.worker.worker_base) WorkerLoRAManager (class in vllm.lora.worker_manager) WorkerMonitor (class in vllm.executor.multiproc_worker_utils) WorkerProc (class in vllm.v1.executor.multiproc_executor) WorkerProc.ResponseStatus (class in vllm.v1.executor.multiproc_executor) WorkerProcHandle (class in vllm.v1.executor.multiproc_executor) WorkerPromptAdapterManager (class in vllm.prompt_adapter.worker_manager) WorkerWrapperBase (class in vllm.worker.worker_base) workspace (vllm.v1.attention.backends.mla.common.MLACommonPrefillMetadata.ChunkedContextMetadata attribute) world_size (vllm.config.ParallelConfig attribute) (vllm.distributed.parallel_state.GroupCoordinator attribute) (vllm.distributed.utils.StatelessProcessGroup attribute) world_size_across_dp (vllm.config.ParallelConfig attribute) write_bytes_to_buffer() (vllm.distributed.kv_transfer.kv_pipe.mooncake_pipe.MooncakeTransferEngine method) write_file() (in module vllm.entrypoints.openai.run_batch) write_local_file() (in module vllm.entrypoints.openai.run_batch) write_to_json() (in module vllm.benchmarks.utils) write_to_kv_cache() (in module vllm.attention.backends.pallas) (in module vllm.v1.attention.backends.pallas) write_to_paged_cache() (vllm.attention.ops.hpu_paged_attn.HPUPagedAttention static method) (vllm.attention.ops.paged_attn.PagedAttention static method) (vllm.attention.ops.rocm_aiter_paged_attn.AITERPagedAttention static method) write_zeros_to_output() (in module vllm.model_executor.layers.fused_moe.fused_moe) wvSplitK() (in module vllm._custom_ops) wvSplitKQ() (in module vllm._custom_ops) X X86 (vllm.platforms.interface.CpuArchEnum attribute) XFormersBackend (class in vllm.attention.backends.xformers) XFormersImpl (class in vllm.attention.backends.xformers) XFormersMetadata (class in vllm.attention.backends.xformers) XFormersMetadataBuilder (class in vllm.attention.backends.xformers) XgrammarBackend (class in vllm.v1.structured_output.backend_xgrammar) XgrammarGrammar (class in vllm.v1.structured_output.backend_xgrammar) XGrammarLogitsProcessor (class in vllm.model_executor.guided_decoding.xgrammar_decoding) XLAScaledMMLinearKernel (class in vllm.model_executor.layers.quantization.kernels.scaled_mm.xla) XPU (vllm.platforms.interface.PlatformEnum attribute) xpu_platform_plugin() (in module vllm.platforms) XpuCommunicator (class in vllm.distributed.device_communicators.xpu_communicator) XPUModelRunner (class in vllm.worker.xpu_model_runner) XPUPlatform (class in vllm.platforms.xpu) XPUWorker (class in vllm.worker.xpu_worker) Y yarn_get_mscale() (in module vllm.model_executor.layers.rotary_embedding) (in module vllm.model_executor.models.deepseek_v2) YaRNScalingRotaryEmbedding (class in vllm.model_executor.layers.rotary_embedding) Z Zamba2Attention (class in vllm.model_executor.models.zamba2) Zamba2AttentionDecoderLayer (class in vllm.model_executor.models.zamba2) Zamba2ForCausalLM (class in vllm.model_executor.models.zamba2) Zamba2HybridLayer (class in vllm.model_executor.models.zamba2) Zamba2LoRA (class in vllm.model_executor.models.zamba2) Zamba2MambaDecoderLayer (class in vllm.model_executor.models.zamba2) Zamba2MLP (class in vllm.model_executor.models.zamba2) Zamba2Model (class in vllm.model_executor.models.zamba2) zero_points (vllm.model_executor.layers.quantization.kernels.mixed_precision.MPLinearKernel.MPLinearLayerConfig attribute) ZEROS_MODE (vllm.model_executor.layers.quantization.bitblas.BitBLASConfig attribute) (vllm.model_executor.layers.quantization.gptq_bitblas.GPTQBitBLASConfig attribute) zeta_prompt (in module vllm.benchmarks.datasets) zip_enc_dec_prompts() (in module vllm.inputs.data) zmq_ctx() (in module vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector) zmq_socket_ctx() (in module vllm.utils) ZmqEventPublisher (class in vllm.distributed.kv_events)