From 9c485d9e252a8834ed15656838d5fbe0dc3a8f2f Mon Sep 17 00:00:00 2001
From: Jani Monoses <jani.monoses@gmail.com>
Date: Tue, 21 Jan 2025 21:56:41 +0200
Subject: [PATCH] [Core] Free CPU pinned memory on environment cleanup (#10477)

---
 vllm/distributed/parallel_state.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/vllm/distributed/parallel_state.py b/vllm/distributed/parallel_state.py
index bf8b30cccd5f6..ffdf8b0f48087 100644
--- a/vllm/distributed/parallel_state.py
+++ b/vllm/distributed/parallel_state.py
@@ -1183,6 +1183,11 @@ def cleanup_dist_env_and_memory(shutdown_ray: bool = False):
     from vllm.platforms import current_platform
     if not current_platform.is_cpu():
         torch.cuda.empty_cache()
+    try:
+        torch._C._host_emptyCache()
+    except AttributeError:
+        logger.warning(
+            "torch._C._host_emptyCache() only available in Pytorch >=2.5")
 
 
 def in_the_same_node_as(pg: Union[ProcessGroup, StatelessProcessGroup],