OpenAI Chat Completion With Reasoning

Source examples/online_serving/openai_chat_completion_with_reasoning.py.

# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""
An example shows how to generate chat completions from reasoning models
like DeepSeekR1.

To run this example, you need to start the vLLM server
with the reasoning parser:

```bash
vllm serve deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B \
    --reasoning-parser deepseek_r1
```

This example demonstrates how to generate chat completions from reasoning models
using the OpenAI Python client library.
"""

from openai import OpenAI

# Modify OpenAI's API key and API base to use vLLM's API server.
openai_api_key = "EMPTY"
openai_api_base = "http://localhost:8000/v1"


def main():
    client = OpenAI(
        api_key=openai_api_key,
        base_url=openai_api_base,
    )

    models = client.models.list()
    model = models.data[0].id

    # Round 1
    messages = [{"role": "user", "content": "9.11 and 9.8, which is greater?"}]
    # ruff: noqa: E501
    # For granite, add: `extra_body={"chat_template_kwargs": {"thinking": True}}`
    response = client.chat.completions.create(model=model, messages=messages)

    reasoning_content = response.choices[0].message.reasoning_content
    content = response.choices[0].message.content

    print("reasoning_content for Round 1:", reasoning_content)
    print("content for Round 1:", content)

    # Round 2
    messages.append({"role": "assistant", "content": content})
    messages.append(
        {
            "role": "user",
            "content": "How many Rs are there in the word 'strawberry'?",
        }
    )
    response = client.chat.completions.create(model=model, messages=messages)

    reasoning_content = response.choices[0].message.reasoning_content
    content = response.choices[0].message.content

    print("reasoning_content for Round 2:", reasoning_content)
    print("content for Round 2:", content)


if __name__ == "__main__":
    main()