Offline Inference Pixtral#
Source vllm-project/vllm.
1# ruff: noqa
2import argparse
3
4from vllm import LLM
5from vllm.sampling_params import SamplingParams
6
7# This script is an offline demo for running Pixtral.
8#
9# If you want to run a server/client setup, please follow this code:
10#
11# - Server:
12#
13# ```bash
14# vllm serve mistralai/Pixtral-12B-2409 --tokenizer-mode mistral --limit-mm-per-prompt 'image=4' --max-model-len 16384
15# ```
16#
17# - Client:
18#
19# ```bash
20# curl --location 'http://<your-node-url>:8000/v1/chat/completions' \
21# --header 'Content-Type: application/json' \
22# --header 'Authorization: Bearer token' \
23# --data '{
24# "model": "mistralai/Pixtral-12B-2409",
25# "messages": [
26# {
27# "role": "user",
28# "content": [
29# {"type" : "text", "text": "Describe this image in detail please."},
30# {"type": "image_url", "image_url": {"url": "https://s3.amazonaws.com/cms.ipressroom.com/338/files/201808/5b894ee1a138352221103195_A680%7Ejogging-edit/A680%7Ejogging-edit_hero.jpg"}},
31# {"type" : "text", "text": "and this one as well. Answer in French."},
32# {"type": "image_url", "image_url": {"url": "https://www.wolframcloud.com/obj/resourcesystem/images/a0e/a0ee3983-46c6-4c92-b85d-059044639928/6af8cfb971db031b.png"}}
33# ]
34# }
35# ]
36# }'
37# ```
38#
39# Usage:
40# python demo.py simple
41# python demo.py advanced
42
43
44def run_simple_demo():
45 model_name = "mistralai/Pixtral-12B-2409"
46 sampling_params = SamplingParams(max_tokens=8192)
47
48 # Lower max_num_seqs or max_model_len on low-VRAM GPUs.
49 llm = LLM(model=model_name, tokenizer_mode="mistral")
50
51 prompt = "Describe this image in one sentence."
52 image_url = "https://picsum.photos/id/237/200/300"
53
54 messages = [
55 {
56 "role":
57 "user",
58 "content": [
59 {
60 "type": "text",
61 "text": prompt
62 },
63 {
64 "type": "image_url",
65 "image_url": {
66 "url": image_url
67 }
68 },
69 ],
70 },
71 ]
72 outputs = llm.chat(messages, sampling_params=sampling_params)
73
74 print(outputs[0].outputs[0].text)
75
76
77def run_advanced_demo():
78 model_name = "mistralai/Pixtral-12B-2409"
79 max_img_per_msg = 5
80 max_tokens_per_img = 4096
81
82 sampling_params = SamplingParams(max_tokens=8192, temperature=0.7)
83 llm = LLM(
84 model=model_name,
85 tokenizer_mode="mistral",
86 limit_mm_per_prompt={"image": max_img_per_msg},
87 max_model_len=max_img_per_msg * max_tokens_per_img,
88 )
89
90 prompt = "Describe the following image."
91
92 url_1 = "https://huggingface.co/datasets/patrickvonplaten/random_img/resolve/main/yosemite.png"
93 url_2 = "https://picsum.photos/seed/picsum/200/300"
94 url_3 = "https://picsum.photos/id/32/512/512"
95
96 messages = [
97 {
98 "role":
99 "user",
100 "content": [
101 {
102 "type": "text",
103 "text": prompt
104 },
105 {
106 "type": "image_url",
107 "image_url": {
108 "url": url_1
109 }
110 },
111 {
112 "type": "image_url",
113 "image_url": {
114 "url": url_2
115 }
116 },
117 ],
118 },
119 {
120 "role": "assistant",
121 "content": "The images show nature.",
122 },
123 {
124 "role": "user",
125 "content": "More details please and answer only in French!.",
126 },
127 {
128 "role": "user",
129 "content": [
130 {
131 "type": "image_url",
132 "image_url": {
133 "url": url_3
134 }
135 },
136 ],
137 },
138 ]
139
140 outputs = llm.chat(messages=messages, sampling_params=sampling_params)
141 print(outputs[0].outputs[0].text)
142
143
144def main():
145 parser = argparse.ArgumentParser(
146 description="Run a demo in simple or advanced mode.")
147
148 parser.add_argument(
149 "mode",
150 choices=["simple", "advanced"],
151 help="Specify the demo mode: 'simple' or 'advanced'",
152 )
153
154 args = parser.parse_args()
155
156 if args.mode == "simple":
157 print("Running simple demo...")
158 run_simple_demo()
159 elif args.mode == "advanced":
160 print("Running advanced demo...")
161 run_advanced_demo()
162
163
164if __name__ == "__main__":
165 main()