OpenAI Chat Completion Client With Tools#
Source vllm-project/vllm.
1"""
2Set up this example by starting a vLLM OpenAI-compatible server with tool call
3options enabled. For example:
4
5IMPORTANT: for mistral, you must use one of the provided mistral tool call
6templates, or your own - the model default doesn't work for tool calls with vLLM
7See the vLLM docs on OpenAI server & tool calling for more details.
8
9vllm serve --model mistralai/Mistral-7B-Instruct-v0.3 \
10 --chat-template examples/tool_chat_template_mistral.jinja \
11 --enable-auto-tool-choice --tool-call-parser mistral
12
13OR
14vllm serve --model NousResearch/Hermes-2-Pro-Llama-3-8B \
15 --chat-template examples/tool_chat_template_hermes.jinja \
16 --enable-auto-tool-choice --tool-call-parser hermes
17"""
18import json
19
20from openai import OpenAI
21
22# Modify OpenAI's API key and API base to use vLLM's API server.
23openai_api_key = "EMPTY"
24openai_api_base = "http://localhost:8000/v1"
25
26client = OpenAI(
27 # defaults to os.environ.get("OPENAI_API_KEY")
28 api_key=openai_api_key,
29 base_url=openai_api_base,
30)
31
32models = client.models.list()
33model = models.data[0].id
34
35tools = [{
36 "type": "function",
37 "function": {
38 "name": "get_current_weather",
39 "description": "Get the current weather in a given location",
40 "parameters": {
41 "type": "object",
42 "properties": {
43 "city": {
44 "type":
45 "string",
46 "description":
47 "The city to find the weather for, e.g. 'San Francisco'"
48 },
49 "state": {
50 "type":
51 "string",
52 "description":
53 "the two-letter abbreviation for the state that the city is"
54 " in, e.g. 'CA' which would mean 'California'"
55 },
56 "unit": {
57 "type": "string",
58 "description": "The unit to fetch the temperature in",
59 "enum": ["celsius", "fahrenheit"]
60 }
61 },
62 "required": ["city", "state", "unit"]
63 }
64 }
65}]
66
67messages = [{
68 "role": "user",
69 "content": "Hi! How are you doing today?"
70}, {
71 "role": "assistant",
72 "content": "I'm doing well! How can I help you?"
73}, {
74 "role":
75 "user",
76 "content":
77 "Can you tell me what the temperate will be in Dallas, in fahrenheit?"
78}]
79
80chat_completion = client.chat.completions.create(messages=messages,
81 model=model,
82 tools=tools)
83
84print("Chat completion results:")
85print(chat_completion)
86print("\n\n")
87
88tool_calls_stream = client.chat.completions.create(messages=messages,
89 model=model,
90 tools=tools,
91 stream=True)
92
93chunks = []
94for chunk in tool_calls_stream:
95 chunks.append(chunk)
96 if chunk.choices[0].delta.tool_calls:
97 print(chunk.choices[0].delta.tool_calls[0])
98 else:
99 print(chunk.choices[0].delta)
100
101arguments = []
102tool_call_idx = -1
103for chunk in chunks:
104
105 if chunk.choices[0].delta.tool_calls:
106 tool_call = chunk.choices[0].delta.tool_calls[0]
107
108 if tool_call.index != tool_call_idx:
109 if tool_call_idx >= 0:
110 print(
111 f"streamed tool call arguments: {arguments[tool_call_idx]}"
112 )
113 tool_call_idx = chunk.choices[0].delta.tool_calls[0].index
114 arguments.append("")
115 if tool_call.id:
116 print(f"streamed tool call id: {tool_call.id} ")
117
118 if tool_call.function:
119 if tool_call.function.name:
120 print(f"streamed tool call name: {tool_call.function.name}")
121
122 if tool_call.function.arguments:
123 arguments[tool_call_idx] += tool_call.function.arguments
124
125if len(arguments):
126 print(f"streamed tool call arguments: {arguments[-1]}")
127
128print("\n\n")
129
130messages.append({
131 "role": "assistant",
132 "tool_calls": chat_completion.choices[0].message.tool_calls
133})
134
135
136# Now, simulate a tool call
137def get_current_weather(city: str, state: str, unit: 'str'):
138 return ("The weather in Dallas, Texas is 85 degrees fahrenheit. It is "
139 "partly cloudly, with highs in the 90's.")
140
141
142available_tools = {"get_current_weather": get_current_weather}
143
144completion_tool_calls = chat_completion.choices[0].message.tool_calls
145for call in completion_tool_calls:
146 tool_to_call = available_tools[call.function.name]
147 args = json.loads(call.function.arguments)
148 result = tool_to_call(**args)
149 print(result)
150 messages.append({
151 "role": "tool",
152 "content": result,
153 "tool_call_id": call.id,
154 "name": call.function.name
155 })
156
157chat_completion_2 = client.chat.completions.create(messages=messages,
158 model=model,
159 tools=tools,
160 stream=False)
161print("\n\n")
162print(chat_completion_2)