From 9c485d9e252a8834ed15656838d5fbe0dc3a8f2f Mon Sep 17 00:00:00 2001 From: Jani Monoses Date: Tue, 21 Jan 2025 21:56:41 +0200 Subject: [PATCH] [Core] Free CPU pinned memory on environment cleanup (#10477) --- vllm/distributed/parallel_state.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/vllm/distributed/parallel_state.py b/vllm/distributed/parallel_state.py index bf8b30cccd5f6..ffdf8b0f48087 100644 --- a/vllm/distributed/parallel_state.py +++ b/vllm/distributed/parallel_state.py @@ -1183,6 +1183,11 @@ def cleanup_dist_env_and_memory(shutdown_ray: bool = False): from vllm.platforms import current_platform if not current_platform.is_cpu(): torch.cuda.empty_cache() + try: + torch._C._host_emptyCache() + except AttributeError: + logger.warning( + "torch._C._host_emptyCache() only available in Pytorch >=2.5") def in_the_same_node_as(pg: Union[ProcessGroup, StatelessProcessGroup],