Offline Inference Pixtral#

Source vllm-project/vllm.

  1# ruff: noqa
  2import argparse
  3
  4from vllm import LLM
  5from vllm.sampling_params import SamplingParams
  6
  7# This script is an offline demo for running Pixtral.
  8#
  9# If you want to run a server/client setup, please follow this code:
 10#
 11# - Server:
 12#
 13# ```bash
 14# vllm serve mistralai/Pixtral-12B-2409 --tokenizer-mode mistral --limit-mm-per-prompt 'image=4' --max-model-len 16384
 15# ```
 16#
 17# - Client:
 18#
 19# ```bash
 20# curl --location 'http://<your-node-url>:8000/v1/chat/completions' \
 21# --header 'Content-Type: application/json' \
 22# --header 'Authorization: Bearer token' \
 23# --data '{
 24#     "model": "mistralai/Pixtral-12B-2409",
 25#     "messages": [
 26#       {
 27#         "role": "user",
 28#         "content": [
 29#             {"type" : "text", "text": "Describe this image in detail please."},
 30#             {"type": "image_url", "image_url": {"url": "https://s3.amazonaws.com/cms.ipressroom.com/338/files/201808/5b894ee1a138352221103195_A680%7Ejogging-edit/A680%7Ejogging-edit_hero.jpg"}},
 31#             {"type" : "text", "text": "and this one as well. Answer in French."},
 32#             {"type": "image_url", "image_url": {"url": "https://www.wolframcloud.com/obj/resourcesystem/images/a0e/a0ee3983-46c6-4c92-b85d-059044639928/6af8cfb971db031b.png"}}
 33#         ]
 34#       }
 35#     ]
 36#   }'
 37# ```
 38#
 39# Usage:
 40#     python demo.py simple
 41#     python demo.py advanced
 42
 43
 44def run_simple_demo():
 45    model_name = "mistralai/Pixtral-12B-2409"
 46    sampling_params = SamplingParams(max_tokens=8192)
 47
 48    # Lower max_num_seqs or max_model_len on low-VRAM GPUs.
 49    llm = LLM(model=model_name, tokenizer_mode="mistral")
 50
 51    prompt = "Describe this image in one sentence."
 52    image_url = "https://picsum.photos/id/237/200/300"
 53
 54    messages = [
 55        {
 56            "role":
 57            "user",
 58            "content": [
 59                {
 60                    "type": "text",
 61                    "text": prompt
 62                },
 63                {
 64                    "type": "image_url",
 65                    "image_url": {
 66                        "url": image_url
 67                    }
 68                },
 69            ],
 70        },
 71    ]
 72    outputs = llm.chat(messages, sampling_params=sampling_params)
 73
 74    print(outputs[0].outputs[0].text)
 75
 76
 77def run_advanced_demo():
 78    model_name = "mistralai/Pixtral-12B-2409"
 79    max_img_per_msg = 5
 80    max_tokens_per_img = 4096
 81
 82    sampling_params = SamplingParams(max_tokens=8192, temperature=0.7)
 83    llm = LLM(
 84        model=model_name,
 85        tokenizer_mode="mistral",
 86        limit_mm_per_prompt={"image": max_img_per_msg},
 87        max_model_len=max_img_per_msg * max_tokens_per_img,
 88    )
 89
 90    prompt = "Describe the following image."
 91
 92    url_1 = "https://huggingface.co/datasets/patrickvonplaten/random_img/resolve/main/yosemite.png"
 93    url_2 = "https://picsum.photos/seed/picsum/200/300"
 94    url_3 = "https://picsum.photos/id/32/512/512"
 95
 96    messages = [
 97        {
 98            "role":
 99            "user",
100            "content": [
101                {
102                    "type": "text",
103                    "text": prompt
104                },
105                {
106                    "type": "image_url",
107                    "image_url": {
108                        "url": url_1
109                    }
110                },
111                {
112                    "type": "image_url",
113                    "image_url": {
114                        "url": url_2
115                    }
116                },
117            ],
118        },
119        {
120            "role": "assistant",
121            "content": "The images show nature.",
122        },
123        {
124            "role": "user",
125            "content": "More details please and answer only in French!.",
126        },
127        {
128            "role": "user",
129            "content": [
130                {
131                    "type": "image_url",
132                    "image_url": {
133                        "url": url_3
134                    }
135                },
136            ],
137        },
138    ]
139
140    outputs = llm.chat(messages=messages, sampling_params=sampling_params)
141    print(outputs[0].outputs[0].text)
142
143
144def main():
145    parser = argparse.ArgumentParser(
146        description="Run a demo in simple or advanced mode.")
147
148    parser.add_argument(
149        "mode",
150        choices=["simple", "advanced"],
151        help="Specify the demo mode: 'simple' or 'advanced'",
152    )
153
154    args = parser.parse_args()
155
156    if args.mode == "simple":
157        print("Running simple demo...")
158        run_simple_demo()
159    elif args.mode == "advanced":
160        print("Running advanced demo...")
161        run_advanced_demo()
162
163
164if __name__ == "__main__":
165    main()