diff --git a/vllm/attention/ops/triton_flash_attention.py b/vllm/attention/ops/triton_flash_attention.py index 3671c2f91e3b7..2e647a8e52278 100644 --- a/vllm/attention/ops/triton_flash_attention.py +++ b/vllm/attention/ops/triton_flash_attention.py @@ -394,7 +394,6 @@ def get_autotune_configs(): @triton.autotune( configs=autotune_configs, key=autotune_keys, - use_cuda_graph=True, ) @triton.jit def attn_fwd(