Source examples/online_serving/openai_transcription_client.py.
OpenAI Transcription Client#
# SPDX-License-Identifier: Apache-2.0
import asyncio
import json
import httpx
from openai import OpenAI
from vllm.assets.audio import AudioAsset
mary_had_lamb = AudioAsset('mary_had_lamb').get_local_path()
winning_call = AudioAsset('winning_call').get_local_path()
# Modify OpenAI's API key and API base to use vLLM's API server.
openai_api_key = "EMPTY"
openai_api_base = "http://localhost:8000/v1"
client = OpenAI(
api_key=openai_api_key,
base_url=openai_api_base,
)
def sync_openai():
with open(str(mary_had_lamb), "rb") as f:
transcription = client.audio.transcriptions.create(
file=f,
model="openai/whisper-small",
language="en",
response_format="json",
temperature=0.0)
print("transcription result:", transcription.text)
sync_openai()
# OpenAI Transcription API client does not support streaming.
async def stream_openai_response():
data = {
"language": "en",
'stream': True,
"model": "openai/whisper-large-v3",
}
url = openai_api_base + "/audio/transcriptions"
print("transcription result:", end=' ')
async with httpx.AsyncClient() as client:
with open(str(winning_call), "rb") as f:
async with client.stream('POST', url, files={'file': f},
data=data) as response:
async for line in response.aiter_lines():
# Each line is a JSON object prefixed with 'data: '
if line:
if line.startswith('data: '):
line = line[len('data: '):]
# Last chunk, stream ends
if line.strip() == '[DONE]':
break
# Parse the JSON response
chunk = json.loads(line)
# Extract and print the content
content = chunk['choices'][0].get('delta',
{}).get('content')
print(content, end='')
# Run the asynchronous function
asyncio.run(stream_openai_response())