Skip to content

Commit

Permalink
Yapf fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
mawong-amd committed Apr 10, 2024
1 parent 4357d8c commit d64f80a
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 23 deletions.
18 changes: 8 additions & 10 deletions benchmarks/benchmark_throughput.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,16 +215,14 @@ def main(args: argparse.Namespace):
args.output_len)

if args.backend == "vllm":
elapsed_time = run_vllm(requests, args.model, args.tokenizer,
args.quantization, args.tensor_parallel_size,
args.seed, args.n, args.use_beam_search,
args.trust_remote_code, args.dtype,
args.max_model_len, args.enforce_eager,
args.kv_cache_dtype,
args.quantization_param_path, args.device,
args.enable_prefix_caching,
args.gpu_memory_utilization,
args.worker_use_ray, args.download_dir)
elapsed_time = run_vllm(
requests, args.model, args.tokenizer, args.quantization,
args.tensor_parallel_size, args.seed, args.n, args.use_beam_search,
args.trust_remote_code, args.dtype, args.max_model_len,
args.enforce_eager, args.kv_cache_dtype,
args.quantization_param_path, args.device,
args.enable_prefix_caching, args.gpu_memory_utilization,
args.worker_use_ray, args.download_dir)
elif args.backend == "hf":
assert args.tensor_parallel_size == 1
elapsed_time = run_hf(requests, args.model, tokenizer, args.n,
Expand Down
18 changes: 7 additions & 11 deletions vllm/executor/torchrun_gpu_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,17 +24,13 @@

class TorchrunGPUExecutor(GPUExecutor):

def __init__(
self,
model_config: ModelConfig,
cache_config: CacheConfig,
parallel_config: ParallelConfig,
scheduler_config: SchedulerConfig,
device_config: DeviceConfig,
lora_config: Optional[LoRAConfig],
vision_language_config: Optional[VisionLanguageConfig],
speculative_config: Optional[SpeculativeConfig]
) -> None:
def __init__(self, model_config: ModelConfig, cache_config: CacheConfig,
parallel_config: ParallelConfig,
scheduler_config: SchedulerConfig,
device_config: DeviceConfig,
lora_config: Optional[LoRAConfig],
vision_language_config: Optional[VisionLanguageConfig],
speculative_config: Optional[SpeculativeConfig]) -> None:
self.local_rank = int(os.getenv("LOCAL_RANK", "0"))
self.is_driver_worker = self.local_rank == 0
super().__init__(model_config, cache_config, parallel_config,
Expand Down
4 changes: 2 additions & 2 deletions vllm/worker/worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,8 +272,8 @@ def init_worker_distributed_environment(
init_distributed_environment(parallel_config.world_size, rank,
distributed_init_method, local_rank)
else:
init_distributed_environment(parallel_config.world_size, -1,
"env://", local_rank)
init_distributed_environment(parallel_config.world_size, -1, "env://",
local_rank)

if pynccl_utils.is_initialized():
pynccl_world_size = pynccl_utils.get_world_size()
Expand Down

0 comments on commit d64f80a

Please sign in to comment.