Offline Inference Encoder Decoder#

Source vllm-project/vllm.

 1'''
 2Demonstrate prompting of text-to-text
 3encoder/decoder models, specifically BART
 4'''
 5
 6from vllm import LLM, SamplingParams
 7from vllm.inputs import (ExplicitEncoderDecoderPrompt, TextPrompt,
 8                         TokensPrompt, zip_enc_dec_prompts)
 9
10dtype = "float"
11
12# Create a BART encoder/decoder model instance
13llm = LLM(
14    model="facebook/bart-large-cnn",
15    dtype=dtype,
16)
17
18# Get BART tokenizer
19tokenizer = llm.llm_engine.get_tokenizer_group()
20
21# Test prompts
22#
23# This section shows all of the valid ways to prompt an
24# encoder/decoder model.
25#
26# - Helpers for building prompts
27text_prompt_raw = "Hello, my name is"
28text_prompt = TextPrompt(prompt="The president of the United States is")
29tokens_prompt = TokensPrompt(prompt_token_ids=tokenizer.encode(
30    prompt="The capital of France is"))
31# - Pass a single prompt to encoder/decoder model
32#   (implicitly encoder input prompt);
33#   decoder input prompt is assumed to be None
34
35single_text_prompt_raw = text_prompt_raw  # Pass a string directly
36single_text_prompt = text_prompt  # Pass a TextPrompt
37single_tokens_prompt = tokens_prompt  # Pass a TokensPrompt
38
39# - Pass explicit encoder and decoder input prompts within one data structure.
40#   Encoder and decoder prompts can both independently be text or tokens, with
41#   no requirement that they be the same prompt type. Some example prompt-type
42#   combinations are shown below, note that these are not exhaustive.
43
44enc_dec_prompt1 = ExplicitEncoderDecoderPrompt(
45    # Pass encoder prompt string directly, &
46    # pass decoder prompt tokens
47    encoder_prompt=single_text_prompt_raw,
48    decoder_prompt=single_tokens_prompt,
49)
50enc_dec_prompt2 = ExplicitEncoderDecoderPrompt(
51    # Pass TextPrompt to encoder, and
52    # pass decoder prompt string directly
53    encoder_prompt=single_text_prompt,
54    decoder_prompt=single_text_prompt_raw,
55)
56enc_dec_prompt3 = ExplicitEncoderDecoderPrompt(
57    # Pass encoder prompt tokens directly, and
58    # pass TextPrompt to decoder
59    encoder_prompt=single_tokens_prompt,
60    decoder_prompt=single_text_prompt,
61)
62
63# - Finally, here's a useful helper function for zipping encoder and
64#   decoder prompts together into a list of ExplicitEncoderDecoderPrompt
65#   instances
66zipped_prompt_list = zip_enc_dec_prompts(
67    ['An encoder prompt', 'Another encoder prompt'],
68    ['A decoder prompt', 'Another decoder prompt'])
69
70# - Let's put all of the above example prompts together into one list
71#   which we will pass to the encoder/decoder LLM.
72prompts = [
73    single_text_prompt_raw, single_text_prompt, single_tokens_prompt,
74    enc_dec_prompt1, enc_dec_prompt2, enc_dec_prompt3
75] + zipped_prompt_list
76
77print(prompts)
78
79# Create a sampling params object.
80sampling_params = SamplingParams(
81    temperature=0,
82    top_p=1.0,
83    min_tokens=0,
84    max_tokens=20,
85)
86
87# Generate output tokens from the prompts. The output is a list of
88# RequestOutput objects that contain the prompt, generated
89# text, and other information.
90outputs = llm.generate(prompts, sampling_params)
91
92# Print the outputs.
93for output in outputs:
94    prompt = output.prompt
95    encoder_prompt = output.encoder_prompt
96    generated_text = output.outputs[0].text
97    print(f"Encoder prompt: {encoder_prompt!r}, "
98          f"Decoder prompt: {prompt!r}, "
99          f"Generated text: {generated_text!r}")