-
--block-size
-
--disable-log-stats
-
--download-dir
-
--dtype
-
--enable-prefix-caching
-
--gpu-memory-utilization
-
--load-format
-
--max-model-len
-
--max-num-batched-tokens
-
--max-num-seqs
-
--max-paddings
-
--max-parallel-loading-workers
|
-
--model
-
--pipeline-parallel-size
-
--quantization
-
--revision
-
--seed
-
--swap-space
-
--tensor-parallel-size
-
--tokenizer
-
--tokenizer-mode
-
--tokenizer-revision
-
--trust-remote-code
-
--worker-use-ray
|