From 375f770b957ae4cec8db79ce6dd9ad0351ca7590 Mon Sep 17 00:00:00 2001 From: Gregory Shtrasberg Date: Thu, 16 Jan 2025 19:16:39 +0000 Subject: [PATCH] Format --- vllm/model_executor/layers/quantization/fp8.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/vllm/model_executor/layers/quantization/fp8.py b/vllm/model_executor/layers/quantization/fp8.py index 51d8fe60c0d0d..835a6044e6f07 100644 --- a/vllm/model_executor/layers/quantization/fp8.py +++ b/vllm/model_executor/layers/quantization/fp8.py @@ -166,7 +166,8 @@ def create_weights( weight_loader = extra_weight_attrs.get("weight_loader") if self.block_quant: - assert not envs.VLLM_FP8_PADDING, "FP8 weight padding is not supported in block quantization." + assert not envs.VLLM_FP8_PADDING, ( + "FP8 weight padding is not supported in block quantization.") tp_size = get_tensor_model_parallel_world_size() assert self.quant_config.weight_block_size is not None block_n, block_k = ( @@ -254,14 +255,15 @@ def create_weights( def process_weights_after_loading(self, layer: Module) -> None: # Block quant doesn't need to process weights after loading if self.block_quant: - if current_platform.is_rocm() and not is_navi(): + if current_platform.is_rocm() and not is_navi(): weight, weight_scale, _ = \ normalize_e4m3fn_to_e4m3fnuz( weight=layer.weight, weight_scale=layer.weight_scale_inv, input_scale=layer.input_scale) layer.weight = Parameter(weight, requires_grad=False) - layer.weight_scale_inv = Parameter(weight_scale, requires_grad=False) + layer.weight_scale_inv = Parameter(weight_scale, + requires_grad=False) return layer.weight = torch.nn.Parameter(layer.weight.data, requires_grad=False) @@ -533,16 +535,16 @@ def process_weights_after_loading(self, layer: Module) -> None: layer.w2_input_scale) # Reset the parameter layer.w13_weight = torch.nn.Parameter(w13_weight, - requires_grad=False) + requires_grad=False) layer.w13_weight_scale_inv = torch.nn.Parameter( w13_weight_scale_inv, requires_grad=False) if w13_input_scale is not None: layer.w13_input_scale = torch.nn.Parameter( w13_input_scale, requires_grad=False) layer.w2_weight = torch.nn.Parameter(w2_weight, - requires_grad=False) - layer.w2_weight_scale_inv = torch.nn.Parameter(w2_weight_scale_inv, - requires_grad=False) + requires_grad=False) + layer.w2_weight_scale_inv = torch.nn.Parameter( + w2_weight_scale_inv, requires_grad=False) if w2_input_scale is not None: layer.w2_input_scale = torch.nn.Parameter( w2_input_scale, requires_grad=False)