def get_features():
supported_attn_impls = ['flex_impl', 'fsdpa_impl', 'naive_impl']
features = [
Value('fp32_alibi_biases', True, env_var='VLLM_ALIBI_USE_FLOAT32_BIASES'),
Value('fp32_softmax', Any(ModelType('qwen2'), ModelType('qwen2_5_vl'))),
Value(
'fused_block_softmax_adjustment',
All(VersionRange(">=1.22.0.494"), Hardware('gaudi3'), Kernel(block_softmax_adjustment),
Not(ModelType('qwen2')))),
Value('fused_block_softmax', False),
Value('flex_impl', False, env_var='VLLM_PROMPT_USE_FLEX_ATTENTION'),
Value('fsdpa_impl', All(Kernel(fsdpa), Not(ModelType('mllama'))), env_var='VLLM_PROMPT_USE_FUSEDSDPA'),
Value('naive_impl', True),
ValueFromList('prompt_attn_impl', supported_attn_impls),
Value('skip_warmup', False),
Value('merged_prefill', False),
Value('use_contiguous_pa', Disabled('prefix_caching'), env_var='VLLM_CONTIGUOUS_PA'),
Value('use_bucketing', True, env_var='VLLM_ENABLE_BUCKETING'),
Value('bucketing_strategy',
'exp',
env_var='VLLM_BUCKETING_STRATEGY',
env_var_type=str,
check=choice('exp', 'lin', 'pad')),
Value('defrag', Enabled('use_contiguous_pa')),
Value('regional_compilation', True, env_var='VLLM_T_COMPILE_REGIONAL_COMPILATION', env_var_type=boolean),
Value('dynamic_shapes_compilation', True, env_var='VLLM_T_COMPILE_DYNAMIC_SHAPES', env_var_type=boolean),
Value('fullgraph_compilation', False, env_var='VLLM_T_COMPILE_FULLGRAPH', env_var_type=boolean),
Value('scale_adjustment', True, env_var='VLLM_SCALE_ADJUSTMENT', env_var_type=boolean),
Value('flatten_input', Any(ModelType('qwen3_moe'), ModelType('granitemoe'), ModelType('glm4_moe'))),
Value('high_level_profiler_enabled', False, env_var='VLLM_PROFILER_ENABLED', env_var_type=boolean),
Value('track_graph_compilation', False, env_var='PT_HPU_METRICS_GC_DETAILS', env_var_type=boolean),
Value('per_token_kv_scaling_support',
All(VersionRange(">=1.24.0.350"), MinPackageVersion("neural_compressor_pt", "3.7")),
env_var_type=boolean),
Value('moe_chunk', "", env_var='VLLM_MOE_CHUNK', env_var_type=list_of(int)),
Value('moe_token_boundary', "", env_var='VLLM_MOE_TOKEN_BOUNDARY', env_var_type=list_of(int)),
Value('row_parallel_chunks', 1, env_var='VLLM_ROW_PARALLEL_CHUNKS', env_var_type=int),
Value('row_parallel_chunk_threshold', 8192, env_var='VLLM_ROW_PARALLEL_CHUNK_THRESHOLD', env_var_type=int),
Value('use_dispatch_fn',
All(VersionRange(">=1.24.0.460"), MinPackageVersion("neural_compressor_pt", "3.7")),
env_var_type=boolean),
Value('use_hpu_aligned_scale', False, env_var='HPU_ALIGNED_SCALE', env_var_type=boolean),
Value('enable_fsdpa_slicing',
All(Eq('use_bucketing', True), Eq('bucketing_strategy', 'pad'), Disabled('merged_prefill'),
Kernel(fsdpa)),
env_var='VLLM_HPU_FSDPA_SLICE_ENABLED',
env_var_type=boolean),
]
return split_values_and_flags(features)