Offline Inference Embedding#
Source: examples/offline_inference_embedding.py.
1from vllm import LLM
2
3# Sample prompts.
4prompts = [
5 "Hello, my name is",
6 "The president of the United States is",
7 "The capital of France is",
8 "The future of AI is",
9]
10
11# Create an LLM.
12# You should pass task="embed" for embedding models
13model = LLM(
14 model="intfloat/e5-mistral-7b-instruct",
15 task="embed",
16 enforce_eager=True,
17)
18
19# Generate embedding. The output is a list of EmbeddingRequestOutputs.
20outputs = model.embed(prompts)
21
22# Print the outputs.
23for prompt, output in zip(prompts, outputs):
24 embeds = output.outputs.embedding
25 embeds_trimmed = ((str(embeds[:16])[:-1] +
26 ", ...]") if len(embeds) > 16 else embeds)
27 print(f"Prompt: {prompt!r} | "
28 f"Embeddings: {embeds_trimmed} (size={len(embeds)})")