From 4868a431615e77c953f2cd624992bb3082872b61 Mon Sep 17 00:00:00 2001 From: Gregory Shtrasberg <156009573+gshtras@users.noreply.github.com> Date: Wed, 6 Nov 2024 11:54:03 -0500 Subject: [PATCH] Eliminated -Wswitch-bool warning and a leftover incorrect import (#266) --- csrc/attention/attention_kernels.cu | 22 ++++++++-------------- vllm/_custom_ops.py | 3 --- 2 files changed, 8 insertions(+), 17 deletions(-) diff --git a/csrc/attention/attention_kernels.cu b/csrc/attention/attention_kernels.cu index cdfcfabdd62a9..95f7b610c1933 100644 --- a/csrc/attention/attention_kernels.cu +++ b/csrc/attention/attention_kernels.cu @@ -782,13 +782,10 @@ void paged_attention_v1_launcher( blocksparse_block_size, blocksparse_head_sliding_step); #define CALL_V1_LAUNCHER_SPARSITY(T, CACHE_T, BLOCK_SIZE, IS_FP8_KV_CACHE) \ - switch (is_block_sparse) { \ - case true: \ - CALL_V1_LAUNCHER(T, CACHE_T, BLOCK_SIZE, IS_FP8_KV_CACHE, true); \ - break; \ - case false: \ - CALL_V1_LAUNCHER(T, CACHE_T, BLOCK_SIZE, IS_FP8_KV_CACHE, false); \ - break; \ + if (is_block_sparse) { \ + CALL_V1_LAUNCHER(T, CACHE_T, BLOCK_SIZE, IS_FP8_KV_CACHE, true); \ + } else { \ + CALL_V1_LAUNCHER(T, CACHE_T, BLOCK_SIZE, IS_FP8_KV_CACHE, false); \ } // NOTE(woosuk): To reduce the compilation time, we omitted block sizes @@ -951,13 +948,10 @@ void paged_attention_v2_launcher( blocksparse_head_sliding_step); #define CALL_V2_LAUNCHER_SPARSITY(T, CACHE_T, BLOCK_SIZE, IS_FP8_KV_CACHE) \ - switch (is_block_sparse) { \ - case true: \ - CALL_V2_LAUNCHER(T, CACHE_T, BLOCK_SIZE, IS_FP8_KV_CACHE, true); \ - break; \ - case false: \ - CALL_V2_LAUNCHER(T, CACHE_T, BLOCK_SIZE, IS_FP8_KV_CACHE, false); \ - break; \ + if (is_block_sparse) { \ + CALL_V2_LAUNCHER(T, CACHE_T, BLOCK_SIZE, IS_FP8_KV_CACHE, true); \ + } else { \ + CALL_V2_LAUNCHER(T, CACHE_T, BLOCK_SIZE, IS_FP8_KV_CACHE, false); \ } // NOTE(woosuk): To reduce the compilation time, we omitted block sizes diff --git a/vllm/_custom_ops.py b/vllm/_custom_ops.py index f71ccaf38741d..42700fee3bda0 100644 --- a/vllm/_custom_ops.py +++ b/vllm/_custom_ops.py @@ -38,9 +38,6 @@ def register_fake(fn): except ImportError: from torch.library import impl_abstract as register_fake -with contextlib.suppress(ImportError): - import vllm._custom_C # noqa: F401 - def hint_on_error(fn):