Skip to content

vllm.connections

global_http_connection module-attribute

global_http_connection = HTTPConnection()

The global HTTPConnection instance used by vLLM.

HTTPConnection

Helper class to send HTTP requests.

Source code in vllm/connections.py
class HTTPConnection:
    """Helper class to send HTTP requests."""

    def __init__(self, *, reuse_client: bool = True) -> None:
        super().__init__()

        self.reuse_client = reuse_client

        self._sync_client: Optional[requests.Session] = None
        self._async_client: Optional[aiohttp.ClientSession] = None

    def get_sync_client(self) -> requests.Session:
        if self._sync_client is None or not self.reuse_client:
            self._sync_client = requests.Session()

        return self._sync_client

    # NOTE: We intentionally use an async function even though it is not
    # required, so that the client is only accessible inside async event loop
    async def get_async_client(self) -> aiohttp.ClientSession:
        if self._async_client is None or not self.reuse_client:
            self._async_client = aiohttp.ClientSession(trust_env=True)

        return self._async_client

    def _validate_http_url(self, url: str):
        parsed_url = urlparse(url)

        if parsed_url.scheme not in ("http", "https"):
            raise ValueError("Invalid HTTP URL: A valid HTTP URL "
                             "must have scheme 'http' or 'https'.")

    def _headers(self, **extras: str) -> MutableMapping[str, str]:
        return {"User-Agent": f"vLLM/{VLLM_VERSION}", **extras}

    def get_response(
        self,
        url: str,
        *,
        stream: bool = False,
        timeout: Optional[float] = None,
        extra_headers: Optional[Mapping[str, str]] = None,
    ):
        self._validate_http_url(url)

        client = self.get_sync_client()
        extra_headers = extra_headers or {}

        return client.get(url,
                          headers=self._headers(**extra_headers),
                          stream=stream,
                          timeout=timeout)

    async def get_async_response(
        self,
        url: str,
        *,
        timeout: Optional[float] = None,
        extra_headers: Optional[Mapping[str, str]] = None,
    ):
        self._validate_http_url(url)

        client = await self.get_async_client()
        extra_headers = extra_headers or {}

        return client.get(url,
                          headers=self._headers(**extra_headers),
                          timeout=timeout)

    def get_bytes(self, url: str, *, timeout: Optional[float] = None) -> bytes:
        with self.get_response(url, timeout=timeout) as r:
            r.raise_for_status()

            return r.content

    async def async_get_bytes(
        self,
        url: str,
        *,
        timeout: Optional[float] = None,
    ) -> bytes:
        async with await self.get_async_response(url, timeout=timeout) as r:
            r.raise_for_status()

            return await r.read()

    def get_text(self, url: str, *, timeout: Optional[float] = None) -> str:
        with self.get_response(url, timeout=timeout) as r:
            r.raise_for_status()

            return r.text

    async def async_get_text(
        self,
        url: str,
        *,
        timeout: Optional[float] = None,
    ) -> str:
        async with await self.get_async_response(url, timeout=timeout) as r:
            r.raise_for_status()

            return await r.text()

    def get_json(self, url: str, *, timeout: Optional[float] = None) -> str:
        with self.get_response(url, timeout=timeout) as r:
            r.raise_for_status()

            return r.json()

    async def async_get_json(
        self,
        url: str,
        *,
        timeout: Optional[float] = None,
    ) -> str:
        async with await self.get_async_response(url, timeout=timeout) as r:
            r.raise_for_status()

            return await r.json()

    def download_file(
        self,
        url: str,
        save_path: Path,
        *,
        timeout: Optional[float] = None,
        chunk_size: int = 128,
    ) -> Path:
        with self.get_response(url, timeout=timeout) as r:
            r.raise_for_status()

            with save_path.open("wb") as f:
                for chunk in r.iter_content(chunk_size):
                    f.write(chunk)

        return save_path

    async def async_download_file(
        self,
        url: str,
        save_path: Path,
        *,
        timeout: Optional[float] = None,
        chunk_size: int = 128,
    ) -> Path:
        async with await self.get_async_response(url, timeout=timeout) as r:
            r.raise_for_status()

            with save_path.open("wb") as f:
                async for chunk in r.content.iter_chunked(chunk_size):
                    f.write(chunk)

        return save_path

_async_client instance-attribute

_async_client: Optional[ClientSession] = None

_sync_client instance-attribute

_sync_client: Optional[Session] = None

reuse_client instance-attribute

reuse_client = reuse_client

__init__

__init__(*, reuse_client: bool = True) -> None
Source code in vllm/connections.py
def __init__(self, *, reuse_client: bool = True) -> None:
    super().__init__()

    self.reuse_client = reuse_client

    self._sync_client: Optional[requests.Session] = None
    self._async_client: Optional[aiohttp.ClientSession] = None

_headers

_headers(**extras: str) -> MutableMapping[str, str]
Source code in vllm/connections.py
def _headers(self, **extras: str) -> MutableMapping[str, str]:
    return {"User-Agent": f"vLLM/{VLLM_VERSION}", **extras}

_validate_http_url

_validate_http_url(url: str)
Source code in vllm/connections.py
def _validate_http_url(self, url: str):
    parsed_url = urlparse(url)

    if parsed_url.scheme not in ("http", "https"):
        raise ValueError("Invalid HTTP URL: A valid HTTP URL "
                         "must have scheme 'http' or 'https'.")

async_download_file async

async_download_file(
    url: str,
    save_path: Path,
    *,
    timeout: Optional[float] = None,
    chunk_size: int = 128,
) -> Path
Source code in vllm/connections.py
async def async_download_file(
    self,
    url: str,
    save_path: Path,
    *,
    timeout: Optional[float] = None,
    chunk_size: int = 128,
) -> Path:
    async with await self.get_async_response(url, timeout=timeout) as r:
        r.raise_for_status()

        with save_path.open("wb") as f:
            async for chunk in r.content.iter_chunked(chunk_size):
                f.write(chunk)

    return save_path

async_get_bytes async

async_get_bytes(
    url: str, *, timeout: Optional[float] = None
) -> bytes
Source code in vllm/connections.py
async def async_get_bytes(
    self,
    url: str,
    *,
    timeout: Optional[float] = None,
) -> bytes:
    async with await self.get_async_response(url, timeout=timeout) as r:
        r.raise_for_status()

        return await r.read()

async_get_json async

async_get_json(
    url: str, *, timeout: Optional[float] = None
) -> str
Source code in vllm/connections.py
async def async_get_json(
    self,
    url: str,
    *,
    timeout: Optional[float] = None,
) -> str:
    async with await self.get_async_response(url, timeout=timeout) as r:
        r.raise_for_status()

        return await r.json()

async_get_text async

async_get_text(
    url: str, *, timeout: Optional[float] = None
) -> str
Source code in vllm/connections.py
async def async_get_text(
    self,
    url: str,
    *,
    timeout: Optional[float] = None,
) -> str:
    async with await self.get_async_response(url, timeout=timeout) as r:
        r.raise_for_status()

        return await r.text()

download_file

download_file(
    url: str,
    save_path: Path,
    *,
    timeout: Optional[float] = None,
    chunk_size: int = 128,
) -> Path
Source code in vllm/connections.py
def download_file(
    self,
    url: str,
    save_path: Path,
    *,
    timeout: Optional[float] = None,
    chunk_size: int = 128,
) -> Path:
    with self.get_response(url, timeout=timeout) as r:
        r.raise_for_status()

        with save_path.open("wb") as f:
            for chunk in r.iter_content(chunk_size):
                f.write(chunk)

    return save_path

get_async_client async

get_async_client() -> ClientSession
Source code in vllm/connections.py
async def get_async_client(self) -> aiohttp.ClientSession:
    if self._async_client is None or not self.reuse_client:
        self._async_client = aiohttp.ClientSession(trust_env=True)

    return self._async_client

get_async_response async

get_async_response(
    url: str,
    *,
    timeout: Optional[float] = None,
    extra_headers: Optional[Mapping[str, str]] = None,
)
Source code in vllm/connections.py
async def get_async_response(
    self,
    url: str,
    *,
    timeout: Optional[float] = None,
    extra_headers: Optional[Mapping[str, str]] = None,
):
    self._validate_http_url(url)

    client = await self.get_async_client()
    extra_headers = extra_headers or {}

    return client.get(url,
                      headers=self._headers(**extra_headers),
                      timeout=timeout)

get_bytes

get_bytes(
    url: str, *, timeout: Optional[float] = None
) -> bytes
Source code in vllm/connections.py
def get_bytes(self, url: str, *, timeout: Optional[float] = None) -> bytes:
    with self.get_response(url, timeout=timeout) as r:
        r.raise_for_status()

        return r.content

get_json

get_json(
    url: str, *, timeout: Optional[float] = None
) -> str
Source code in vllm/connections.py
def get_json(self, url: str, *, timeout: Optional[float] = None) -> str:
    with self.get_response(url, timeout=timeout) as r:
        r.raise_for_status()

        return r.json()

get_response

get_response(
    url: str,
    *,
    stream: bool = False,
    timeout: Optional[float] = None,
    extra_headers: Optional[Mapping[str, str]] = None,
)
Source code in vllm/connections.py
def get_response(
    self,
    url: str,
    *,
    stream: bool = False,
    timeout: Optional[float] = None,
    extra_headers: Optional[Mapping[str, str]] = None,
):
    self._validate_http_url(url)

    client = self.get_sync_client()
    extra_headers = extra_headers or {}

    return client.get(url,
                      headers=self._headers(**extra_headers),
                      stream=stream,
                      timeout=timeout)

get_sync_client

get_sync_client() -> Session
Source code in vllm/connections.py
def get_sync_client(self) -> requests.Session:
    if self._sync_client is None or not self.reuse_client:
        self._sync_client = requests.Session()

    return self._sync_client

get_text

get_text(
    url: str, *, timeout: Optional[float] = None
) -> str
Source code in vllm/connections.py
def get_text(self, url: str, *, timeout: Optional[float] = None) -> str:
    with self.get_response(url, timeout=timeout) as r:
        r.raise_for_status()

        return r.text