Offline Inference Embedding#

Source vllm-project/vllm.

 1from vllm import LLM
 2
 3# Sample prompts.
 4prompts = [
 5    "Hello, my name is",
 6    "The president of the United States is",
 7    "The capital of France is",
 8    "The future of AI is",
 9]
10
11# Create an LLM.
12model = LLM(model="intfloat/e5-mistral-7b-instruct", enforce_eager=True)
13# Generate embedding. The output is a list of EmbeddingRequestOutputs.
14outputs = model.encode(prompts)
15# Print the outputs.
16for output in outputs:
17    print(output.outputs.embedding)  # list of 4096 floats