Offline Chat With Tools#

Source vllm-project/vllm.

  1# ruff: noqa
  2import json
  3import random
  4import string
  5
  6from vllm import LLM
  7from vllm.sampling_params import SamplingParams
  8
  9# This script is an offline demo for function calling
 10#
 11# If you want to run a server/client setup, please follow this code:
 12#
 13# - Server:
 14#
 15# ```bash
 16# vllm serve mistralai/Mistral-7B-Instruct-v0.3 --tokenizer-mode mistral --load-format mistral --config-format mistral
 17# ```
 18#
 19# - Client:
 20#
 21# ```bash
 22# curl --location 'http://<your-node-url>:8000/v1/chat/completions' \
 23# --header 'Content-Type: application/json' \
 24# --header 'Authorization: Bearer token' \
 25# --data '{
 26#     "model": "mistralai/Mistral-7B-Instruct-v0.3"
 27#     "messages": [
 28#       {
 29#         "role": "user",
 30#         "content": [
 31#             {"type" : "text", "text": "Describe this image in detail please."},
 32#             {"type": "image_url", "image_url": {"url": "https://s3.amazonaws.com/cms.ipressroom.com/338/files/201808/5b894ee1a138352221103195_A680%7Ejogging-edit/A680%7Ejogging-edit_hero.jpg"}},
 33#             {"type" : "text", "text": "and this one as well. Answer in French."},
 34#             {"type": "image_url", "image_url": {"url": "https://www.wolframcloud.com/obj/resourcesystem/images/a0e/a0ee3983-46c6-4c92-b85d-059044639928/6af8cfb971db031b.png"}}
 35#         ]
 36#       }
 37#     ]
 38#   }'
 39# ```
 40#
 41# Usage:
 42#     python demo.py simple
 43#     python demo.py advanced
 44
 45model_name = "mistralai/Mistral-7B-Instruct-v0.3"
 46# or switch to "mistralai/Mistral-Nemo-Instruct-2407"
 47# or "mistralai/Mistral-Large-Instruct-2407"
 48# or any other mistral model with function calling ability
 49
 50sampling_params = SamplingParams(max_tokens=8192, temperature=0.0)
 51llm = LLM(model=model_name,
 52          tokenizer_mode="mistral",
 53          config_format="mistral",
 54          load_format="mistral")
 55
 56
 57def generate_random_id(length=9):
 58    characters = string.ascii_letters + string.digits
 59    random_id = ''.join(random.choice(characters) for _ in range(length))
 60    return random_id
 61
 62
 63# simulate an API that can be called
 64def get_current_weather(city: str, state: str, unit: 'str'):
 65    return (f"The weather in {city}, {state} is 85 degrees {unit}. It is "
 66            "partly cloudly, with highs in the 90's.")
 67
 68
 69tool_funtions = {"get_current_weather": get_current_weather}
 70
 71tools = [{
 72    "type": "function",
 73    "function": {
 74        "name": "get_current_weather",
 75        "description": "Get the current weather in a given location",
 76        "parameters": {
 77            "type": "object",
 78            "properties": {
 79                "city": {
 80                    "type":
 81                    "string",
 82                    "description":
 83                    "The city to find the weather for, e.g. 'San Francisco'"
 84                },
 85                "state": {
 86                    "type":
 87                    "string",
 88                    "description":
 89                    "the two-letter abbreviation for the state that the city is"
 90                    " in, e.g. 'CA' which would mean 'California'"
 91                },
 92                "unit": {
 93                    "type": "string",
 94                    "description": "The unit to fetch the temperature in",
 95                    "enum": ["celsius", "fahrenheit"]
 96                }
 97            },
 98            "required": ["city", "state", "unit"]
 99        }
100    }
101}]
102
103messages = [{
104    "role":
105    "user",
106    "content":
107    "Can you tell me what the temperate will be in Dallas, in fahrenheit?"
108}]
109
110outputs = llm.chat(messages, sampling_params=sampling_params, tools=tools)
111output = outputs[0].outputs[0].text.strip()
112
113# append the assistant message
114messages.append({
115    "role": "assistant",
116    "content": output,
117})
118
119# let's now actually parse and execute the model's output simulating an API call by using the
120# above defined function
121tool_calls = json.loads(output)
122tool_answers = [
123    tool_funtions[call['name']](**call['arguments']) for call in tool_calls
124]
125
126# append the answer as a tool message and let the LLM give you an answer
127messages.append({
128    "role": "tool",
129    "content": "\n\n".join(tool_answers),
130    "tool_call_id": generate_random_id(),
131})
132
133outputs = llm.chat(messages, sampling_params, tools=tools)
134
135print(outputs[0].outputs[0].text.strip())
136# yields
137#   'The weather in Dallas, TX is 85 degrees fahrenheit. '
138#   'It is partly cloudly, with highs in the 90's.'