Skip to content

vllm.engine.multiprocessing

Modules:

Name Description
client
engine

IPC_DATA_EXT module-attribute

IPC_DATA_EXT = '_data_socket'

IPC_HEALTH_EXT module-attribute

IPC_HEALTH_EXT = '_health_socket'

IPC_INPUT_EXT module-attribute

IPC_INPUT_EXT = '_input_socket'

IPC_OUTPUT_EXT module-attribute

IPC_OUTPUT_EXT = '_output_socket'

REQUEST_OUTPUTS_T module-attribute

VLLM_RPC_SUCCESS_STR module-attribute

VLLM_RPC_SUCCESS_STR = 'SUCCESS'

MQEngineDeadError

Bases: RuntimeError

Source code in vllm/engine/multiprocessing/__init__.py
class MQEngineDeadError(RuntimeError):
    pass

RPCAbortRequest dataclass

Source code in vllm/engine/multiprocessing/__init__.py
@dataclass
class RPCAbortRequest:
    request_id: str

request_id instance-attribute

request_id: str

__init__

__init__(request_id: str) -> None

RPCAdapterLoadedResponse dataclass

Source code in vllm/engine/multiprocessing/__init__.py
@dataclass
class RPCAdapterLoadedResponse:
    request_id: str

request_id instance-attribute

request_id: str

__init__

__init__(request_id: str) -> None

RPCError dataclass

Source code in vllm/engine/multiprocessing/__init__.py
@dataclass
class RPCError:
    request_id: Optional[str]
    is_engine_errored: bool
    exception: BaseException

exception instance-attribute

exception: BaseException

is_engine_errored instance-attribute

is_engine_errored: bool

request_id instance-attribute

request_id: Optional[str]

__init__

__init__(
    request_id: Optional[str],
    is_engine_errored: bool,
    exception: BaseException,
) -> None

RPCIsSleepingRequest dataclass

Source code in vllm/engine/multiprocessing/__init__.py
@dataclass
class RPCIsSleepingRequest:
    # Set the default value of request_id to a new UUID
    request_id: str = field(default_factory=lambda: str(uuid.uuid4()))

request_id class-attribute instance-attribute

request_id: str = field(
    default_factory=lambda: str(uuid4())
)

__init__

__init__(request_id: str = lambda: str(uuid4())()) -> None

RPCIsSleepingResponse dataclass

Source code in vllm/engine/multiprocessing/__init__.py
@dataclass
class RPCIsSleepingResponse:
    request_id: str
    is_sleeping: bool

is_sleeping instance-attribute

is_sleeping: bool

request_id instance-attribute

request_id: str

__init__

__init__(request_id: str, is_sleeping: bool) -> None

RPCLoadAdapterRequest dataclass

Source code in vllm/engine/multiprocessing/__init__.py
@dataclass
class RPCLoadAdapterRequest:
    lora_request: LoRARequest
    # Set the default value of request_id to a new UUID
    request_id: str = field(default_factory=lambda: str(uuid.uuid4()))

lora_request instance-attribute

lora_request: LoRARequest

request_id class-attribute instance-attribute

request_id: str = field(
    default_factory=lambda: str(uuid4())
)

__init__

__init__(
    lora_request: LoRARequest,
    request_id: str = lambda: str(uuid4())(),
) -> None

RPCProcessRequest dataclass

Source code in vllm/engine/multiprocessing/__init__.py
@dataclass
class RPCProcessRequest:
    prompt: PromptType
    params: Union[SamplingParams, PoolingParams]
    request_id: str
    lora_request: Optional[LoRARequest] = None
    trace_headers: Optional[Mapping[str, str]] = None
    prompt_adapter_request: Optional[PromptAdapterRequest] = None
    priority: int = 0

    @overload
    def __init__(
        self,
        prompt: PromptType,
        params: Union[SamplingParams, PoolingParams],
        request_id: str,
        lora_request: Optional[LoRARequest] = None,
        trace_headers: Optional[Mapping[str, str]] = None,
        prompt_adapter_request: Optional[PromptAdapterRequest] = None,
        priority: int = 0,
    ) -> None:
        ...

    @overload
    @deprecated("'inputs' will be renamed to 'prompt")
    def __init__(
        self,
        *,
        inputs: PromptType,
        params: Union[SamplingParams, PoolingParams],
        request_id: str,
        lora_request: Optional[LoRARequest] = None,
        trace_headers: Optional[Mapping[str, str]] = None,
        prompt_adapter_request: Optional[PromptAdapterRequest] = None,
        priority: int = 0,
    ) -> None:
        ...

    @deprecate_kwargs(
        "inputs",
        additional_message="Please use the 'prompt' parameter instead.",
    )
    def __init__(
            self,
            prompt: Optional[PromptType] = None,
            params: Optional[Union[SamplingParams, PoolingParams]] = None,
            request_id: Optional[str] = None,
            lora_request: Optional[LoRARequest] = None,
            trace_headers: Optional[Mapping[str, str]] = None,
            prompt_adapter_request: Optional[PromptAdapterRequest] = None,
            priority: int = 0,
            *,
            inputs: Optional[PromptType] = None,  # DEPRECATED
    ) -> None:
        if inputs is not None:
            prompt = inputs
        assert (prompt is not None and params is not None
                and request_id is not None)

        super().__init__()

        self.prompt = prompt
        self.params = params
        self.request_id = request_id
        self.lora_request = lora_request
        self.trace_headers = trace_headers
        self.prompt_adapter_request = prompt_adapter_request
        self.priority = priority

lora_request class-attribute instance-attribute

lora_request: Optional[LoRARequest] = lora_request

params instance-attribute

params: Union[SamplingParams, PoolingParams] = params

priority class-attribute instance-attribute

priority: int = priority

prompt instance-attribute

prompt: PromptType = prompt

prompt_adapter_request class-attribute instance-attribute

prompt_adapter_request: Optional[PromptAdapterRequest] = (
    prompt_adapter_request
)

request_id instance-attribute

request_id: str = request_id

trace_headers class-attribute instance-attribute

trace_headers: Optional[Mapping[str, str]] = trace_headers

__init__

__init__(
    prompt: PromptType,
    params: Union[SamplingParams, PoolingParams],
    request_id: str,
    lora_request: Optional[LoRARequest] = None,
    trace_headers: Optional[Mapping[str, str]] = None,
    prompt_adapter_request: Optional[
        PromptAdapterRequest
    ] = None,
    priority: int = 0,
) -> None
__init__(
    *,
    inputs: PromptType,
    params: Union[SamplingParams, PoolingParams],
    request_id: str,
    lora_request: Optional[LoRARequest] = None,
    trace_headers: Optional[Mapping[str, str]] = None,
    prompt_adapter_request: Optional[
        PromptAdapterRequest
    ] = None,
    priority: int = 0,
) -> None
__init__(
    prompt: Optional[PromptType] = None,
    params: Optional[
        Union[SamplingParams, PoolingParams]
    ] = None,
    request_id: Optional[str] = None,
    lora_request: Optional[LoRARequest] = None,
    trace_headers: Optional[Mapping[str, str]] = None,
    prompt_adapter_request: Optional[
        PromptAdapterRequest
    ] = None,
    priority: int = 0,
    *,
    inputs: Optional[PromptType] = None,
) -> None
Source code in vllm/engine/multiprocessing/__init__.py
@deprecate_kwargs(
    "inputs",
    additional_message="Please use the 'prompt' parameter instead.",
)
def __init__(
        self,
        prompt: Optional[PromptType] = None,
        params: Optional[Union[SamplingParams, PoolingParams]] = None,
        request_id: Optional[str] = None,
        lora_request: Optional[LoRARequest] = None,
        trace_headers: Optional[Mapping[str, str]] = None,
        prompt_adapter_request: Optional[PromptAdapterRequest] = None,
        priority: int = 0,
        *,
        inputs: Optional[PromptType] = None,  # DEPRECATED
) -> None:
    if inputs is not None:
        prompt = inputs
    assert (prompt is not None and params is not None
            and request_id is not None)

    super().__init__()

    self.prompt = prompt
    self.params = params
    self.request_id = request_id
    self.lora_request = lora_request
    self.trace_headers = trace_headers
    self.prompt_adapter_request = prompt_adapter_request
    self.priority = priority

RPCResetMultiModalCacheRequest

Bases: Enum

Source code in vllm/engine/multiprocessing/__init__.py
class RPCResetMultiModalCacheRequest(Enum):
    RESET = 1

RESET class-attribute instance-attribute

RESET = 1

RPCResetPrefixCacheRequest dataclass

Source code in vllm/engine/multiprocessing/__init__.py
@dataclass
class RPCResetPrefixCacheRequest:
    device: Device

device instance-attribute

device: Device

__init__

__init__(device: Device) -> None

RPCSleepRequest

Bases: Enum

Source code in vllm/engine/multiprocessing/__init__.py
class RPCSleepRequest(Enum):
    SLEEP_LEVEL_1 = 1
    SLEEP_LEVEL_2 = 2

SLEEP_LEVEL_1 class-attribute instance-attribute

SLEEP_LEVEL_1 = 1

SLEEP_LEVEL_2 class-attribute instance-attribute

SLEEP_LEVEL_2 = 2

RPCStartupRequest

Bases: Enum

Source code in vllm/engine/multiprocessing/__init__.py
class RPCStartupRequest(Enum):
    IS_SERVER_READY = 1

IS_SERVER_READY class-attribute instance-attribute

IS_SERVER_READY = 1

RPCStartupResponse dataclass

Source code in vllm/engine/multiprocessing/__init__.py
@dataclass
class RPCStartupResponse:
    tracing_enabled: bool

tracing_enabled instance-attribute

tracing_enabled: bool

__init__

__init__(tracing_enabled: bool) -> None

RPCUProfileRequest

Bases: Enum

Source code in vllm/engine/multiprocessing/__init__.py
class RPCUProfileRequest(Enum):
    START_PROFILE = 1
    STOP_PROFILE = 2

START_PROFILE class-attribute instance-attribute

START_PROFILE = 1

STOP_PROFILE class-attribute instance-attribute

STOP_PROFILE = 2

RPCWakeUpRequest dataclass

Source code in vllm/engine/multiprocessing/__init__.py
@dataclass
class RPCWakeUpRequest:
    tags: Optional[list[str]] = None

tags class-attribute instance-attribute

tags: Optional[list[str]] = None

__init__

__init__(tags: Optional[list[str]] = None) -> None

ENGINE_DEAD_ERROR

ENGINE_DEAD_ERROR(
    error: Optional[BaseException] = None,
) -> MQEngineDeadError
Source code in vllm/engine/multiprocessing/__init__.py
def ENGINE_DEAD_ERROR(
        error: Optional[BaseException] = None) -> MQEngineDeadError:
    if error is None:
        return MQEngineDeadError(
            "Engine loop is not running. Inspect the stacktrace to "
            "find the original error")

    return MQEngineDeadError(
        "Engine loop is not running. Inspect the stacktrace to "
        f"find the original error: {repr(error)}.")