diff --git a/vllm/_custom_ops.py b/vllm/_custom_ops.py index 19f31b8ec419d..aeacf5dda5761 100644 --- a/vllm/_custom_ops.py +++ b/vllm/_custom_ops.py @@ -1,5 +1,4 @@ import contextlib -import functools import importlib from typing import TYPE_CHECKING, List, Optional, Tuple, Union @@ -36,34 +35,6 @@ def register_fake(fn): from torch.library import impl_abstract as register_fake -def hint_on_error(fn): - - @functools.wraps(fn) - def wrapper(*args, **kwargs): - try: - return fn(*args, **kwargs) - - except NotImplementedError as e: - msg = ( - "Error in calling custom op %s: %s\n" - "Not implemented or built, mostly likely because the current current device " - "does not support this kernel (less likely TORCH_CUDA_ARCH_LIST was set " - "incorrectly while building)") - logger.error(msg, fn.__name__, e) - raise NotImplementedError(msg % (fn.__name__, e)) from e - except AttributeError as e: - msg = ( - "Error in calling custom op %s: %s\n" - "Possibly you have built or installed an obsolete version of vllm.\n" - "Please try a clean build and install of vllm," - "or remove old built files such as vllm/*cpython*.so and build/ ." - ) - logger.error(msg, fn.__name__, e) - raise e - - return wrapper - - # activation ops def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None: torch.ops._C.silu_and_mul(out, x) @@ -1101,25 +1072,3 @@ def get_graph_buffer_ipc_meta(fa: int) -> Tuple[List[int], List[int]]: def register_graph_buffers(fa: int, handles: List[List[int]], offsets: List[List[int]]) -> None: torch.ops._C_custom_ar.register_graph_buffers(fa, handles, offsets) - - -# temporary fix for https://github.com/vllm-project/vllm/issues/5456 -# TODO: remove this in v0.6.0 -names_and_values = globals() -names_and_values_to_update = {} -# prepare variables to avoid dict size change during iteration -k, v, arg = None, None, None -fn_type = type(lambda x: x) -for k, v in names_and_values.items(): - # find functions that are defined in this file and have torch.Tensor - # in their annotations. `arg == "torch.Tensor"` is used to handle - # the case when users use `import __annotations__` to turn type - # hints into strings. - if isinstance(v, fn_type) \ - and v.__code__.co_filename == __file__ \ - and any(arg is torch.Tensor or arg == "torch.Tensor" - for arg in v.__annotations__.values()): - names_and_values_to_update[k] = hint_on_error(v) - -names_and_values.update(names_and_values_to_update) -del names_and_values_to_update, names_and_values, v, k, fn_type