Source examples/online_serving/openai_chat_completion_structured_outputs_with_reasoning.py.
OpenAI Chat Completion Structured Outputs With Reasoning#
# SPDX-License-Identifier: Apache-2.0
"""
An example shows how to generate structured outputs from reasoning models
like DeepSeekR1. The thinking process will not be guided by the JSON
schema provided by the user. Only the final output will be structured.
To run this example, you need to start the vLLM server with the reasoning
parser:
```bash
vllm serve deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B \
--reasoning-parser deepseek_r1
```
This example demonstrates how to generate chat completions from reasoning models
using the OpenAI Python client library.
"""
from enum import Enum
from openai import OpenAI
from pydantic import BaseModel
# Modify OpenAI's API key and API base to use vLLM's API server.
openai_api_key = "EMPTY"
openai_api_base = "http://localhost:8000/v1"
def print_completion_details(completion):
print("reasoning_content: ",
completion.choices[0].message.reasoning_content)
print("content: ", completion.choices[0].message.content)
# Guided decoding by Regex
def guided_regex_completion(client: OpenAI, model: str):
prompt = ("What is the capital of France?")
completion = client.chat.completions.create(
model=model,
messages=[{
"role": "user",
"content": prompt,
}],
extra_body={
"guided_regex": "(Paris|London)",
},
)
print_completion_details(completion)
class People(BaseModel):
name: str
age: int
def guided_json_completion(client: OpenAI, model: str):
json_schema = People.model_json_schema()
prompt = ("Generate a JSON with the name and age of one random person.")
completion = client.chat.completions.create(
model=model,
messages=[{
"role": "user",
"content": prompt,
}],
extra_body={"guided_json": json_schema},
)
print_completion_details(completion)
# Guided decoding by JSON using Pydantic schema
class CarType(str, Enum):
sedan = "sedan"
suv = "SUV"
truck = "Truck"
coupe = "Coupe"
class CarDescription(BaseModel):
brand: str
model: str
car_type: CarType
def guided_car_json_completion(client: OpenAI, model: str):
json_schema = CarDescription.model_json_schema()
prompt = ("Generate a JSON with the brand, model and car_type of"
"the most iconic car from the 90's")
completion = client.chat.completions.create(
model=model,
messages=[{
"role": "user",
"content": prompt,
}],
extra_body={"guided_json": json_schema},
)
print_completion_details(completion)
# Guided decoding by Grammar
def guided_grammar_completion(client: OpenAI, model: str):
simplified_sql_grammar = """
root ::= select_statement
select_statement ::= "SELECT " column " from " table " where " condition
column ::= "col_1 " | "col_2 "
table ::= "table_1 " | "table_2 "
condition ::= column "= " number
number ::= "1 " | "2 "
"""
# This may be very slow https://github.com/vllm-project/vllm/issues/12122
prompt = ("Generate an SQL query to show the 'username' and 'email'"
"from the 'users' table.")
completion = client.chat.completions.create(
model=model,
messages=[{
"role": "user",
"content": prompt,
}],
extra_body={"guided_grammar": simplified_sql_grammar},
)
print_completion_details(completion)
def main():
client: OpenAI = OpenAI(
api_key=openai_api_key,
base_url=openai_api_base,
)
models = client.models.list()
model: str = models.data[0].id
print("Guided Regex Completion:")
guided_regex_completion(client, model)
print("\nGuided JSON Completion (People):")
guided_json_completion(client, model)
print("\nGuided JSON Completion (CarDescription):")
guided_car_json_completion(client, model)
print("\nGuided Grammar Completion:")
guided_grammar_completion(client, model)
if __name__ == "__main__":
main()