Florence2 Inference#
Source vllm-project/vllm.
1'''
2Demonstrate prompting of text-to-text
3encoder/decoder models, specifically Florence-2
4'''
5# TODO(Isotr0py):
6# Move to offline_inference_vision_language.py after porting vision backbone
7from vllm import LLM, SamplingParams
8
9dtype = "float"
10
11# Create a Florence-2 encoder/decoder model instance
12llm = LLM(
13 model="microsoft/Florence-2-base",
14 tokenizer="facebook/bart-base",
15 dtype=dtype,
16 trust_remote_code=True,
17)
18
19prompts = [
20 "<CAPTION>", "<DETAILED_CAPTION>", "<MORE_DETAILED_CAPTION>",
21 "<CAPTION_TO_PHRASE_GROUNDING>", "<OD>", "<DENSE_REGION_CAPTION>",
22 "<REGION_PROPOSAL>", "<OCR>", "<OCR_WITH_REGION>"
23]
24# Create a sampling params object.
25sampling_params = SamplingParams(
26 temperature=0,
27 top_p=1.0,
28 min_tokens=0,
29 max_tokens=20,
30)
31
32# Generate output tokens from the prompts. The output is a list of
33# RequestOutput objects that contain the prompt, generated
34# text, and other information.
35outputs = llm.generate(prompts, sampling_params)
36
37# Print the outputs.
38for output in outputs:
39 prompt = output.prompt
40 encoder_prompt = output.encoder_prompt
41 generated_text = output.outputs[0].text
42 print(f"Encoder prompt: {encoder_prompt!r}, "
43 f"Decoder prompt: {prompt!r}, "
44 f"Generated text: {generated_text!r}")