format

Signed-off-by: youkaichao <[email protected]>
vllm-project · Jan 22, 2025 · d378a08 · d378a08
1 parent 7414e0c
commit d378a08
Show file tree

Hide file tree

Showing 2 changed files with 3 additions and 9 deletions.
diff --git a/vllm/device_allocator/cumem.py b/vllm/device_allocator/cumem.py
@@ -168,10 +168,8 @@ def sleep(
         All data in the memory allocation with the specified tag will be 
         offloaded to CPU memory, and others will be discarded.
 
-        Args:
-            offload_tags: The tags of the memory allocation that will be
+        :param offload_tags: The tags of the memory allocation that will be
             offloaded. The rest of the memory allocation will be discarded.
-
         """
         if offload_tags is None:
             # by default, allocated tensors are offloaded
@@ -220,10 +218,8 @@ def use_memory_pool(self, tag: Optional[str] = None):
         All memory allocation created inside the context will be allocated 
         in the memory pool, and has the specified tag.
 
-        Args:
-            tag: The tag of the memory allocation. If None, the default tag 
+        :param tag: The tag of the memory allocation. If None, the default tag
             will be used.
-
         """
         if tag is None:
             tag = CuMemAllocator.default_tag

diff --git a/vllm/entrypoints/llm.py b/vllm/entrypoints/llm.py
@@ -1138,8 +1138,7 @@ def sleep(self, level: int = 1):
         The caller should guarantee that no requests are being processed
         during the sleep period, before `wake_up` is called.
 
-        Args:
-            level: The sleep level. Level 1 sleep will offload the model 
+        :param level: The sleep level. Level 1 sleep will offload the model 
             weights and discard the kv cache. The content of kv cache is 
             forgotten. Level 1 sleep is good for sleeping and waking up the 
             engine to run the same model again. The model weights are backed 
@@ -1150,7 +1149,6 @@ def sleep(self, level: int = 1):
             waking up the engine to run a different model or update the model, 
             where previous model weights are not needed. It reduces CPU memory 
             pressure.
-
         """
         self.llm_engine.sleep(level=level)