From d378a0859c854476c358f9cb0e61bd4287434ca8 Mon Sep 17 00:00:00 2001
From: youkaichao <youkaichao@gmail.com>
Date: Wed, 22 Jan 2025 13:49:49 +0800
Subject: [PATCH] format

Signed-off-by: youkaichao <youkaichao@gmail.com>
---
 vllm/device_allocator/cumem.py | 8 ++------
 vllm/entrypoints/llm.py        | 4 +---
 2 files changed, 3 insertions(+), 9 deletions(-)

diff --git a/vllm/device_allocator/cumem.py b/vllm/device_allocator/cumem.py
index 1377dbf8d0aba..3755dde6be95b 100644
--- a/vllm/device_allocator/cumem.py
+++ b/vllm/device_allocator/cumem.py
@@ -168,10 +168,8 @@ def sleep(
         All data in the memory allocation with the specified tag will be 
         offloaded to CPU memory, and others will be discarded.
 
-        Args:
-            offload_tags: The tags of the memory allocation that will be
+        :param offload_tags: The tags of the memory allocation that will be
             offloaded. The rest of the memory allocation will be discarded.
-
         """
         if offload_tags is None:
             # by default, allocated tensors are offloaded
@@ -220,10 +218,8 @@ def use_memory_pool(self, tag: Optional[str] = None):
         All memory allocation created inside the context will be allocated 
         in the memory pool, and has the specified tag.
 
-        Args:
-            tag: The tag of the memory allocation. If None, the default tag 
+        :param tag: The tag of the memory allocation. If None, the default tag
             will be used.
-
         """
         if tag is None:
             tag = CuMemAllocator.default_tag
diff --git a/vllm/entrypoints/llm.py b/vllm/entrypoints/llm.py
index 500a8bbc12e10..04056f37f851b 100644
--- a/vllm/entrypoints/llm.py
+++ b/vllm/entrypoints/llm.py
@@ -1138,8 +1138,7 @@ def sleep(self, level: int = 1):
         The caller should guarantee that no requests are being processed
         during the sleep period, before `wake_up` is called.
 
-        Args:
-            level: The sleep level. Level 1 sleep will offload the model 
+        :param level: The sleep level. Level 1 sleep will offload the model 
             weights and discard the kv cache. The content of kv cache is 
             forgotten. Level 1 sleep is good for sleeping and waking up the 
             engine to run the same model again. The model weights are backed 
@@ -1150,7 +1149,6 @@ def sleep(self, level: int = 1):
             waking up the engine to run a different model or update the model, 
             where previous model weights are not needed. It reduces CPU memory 
             pressure.
-
         """
         self.llm_engine.sleep(level=level)