Skip to content

Commit

Permalink
[Bugfix] Fix QKVParallelLinearWithShardedLora bias bug (#10844)
Browse files Browse the repository at this point in the history
Signed-off-by: Jee Jee Li <[email protected]>
  • Loading branch information
jeejeelee authored Dec 3, 2024
1 parent d746268 commit a4cf256
Show file tree
Hide file tree
Showing 2 changed files with 1 addition and 9 deletions.
1 change: 0 additions & 1 deletion .buildkite/test-pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -481,7 +481,6 @@ steps:

- label: LoRA TP Test (Distributed)
num_gpus: 4
soft_fail: true
source_file_dependencies:
- vllm/lora
- tests/lora
Expand Down
9 changes: 1 addition & 8 deletions vllm/lora/fully_sharded_layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,13 +77,6 @@ def apply(self, x: torch.Tensor,
add_input=True)
# now have column partitioned output

if self.bias_stacked is not None:
self.bias_stacked = self.bias_stacked.view(
-1, self.bias_stacked.shape[-1])
self.bias_stacked = self.bias_stacked[
self.punica_wrapper.token_lora_indices]
output += self.bias_stacked

output = output.view(*out_orig_shape)
return output

Expand Down Expand Up @@ -222,7 +215,7 @@ def apply(self, x: torch.Tensor,
self.punica_wrapper.add_expand(output,
buffer,
self.lora_b_stacked,
self.bias_all,
self.bias_stacked,
add_input=True)
# now have column partitioned output
output = output.view(*out_orig_shape)
Expand Down

0 comments on commit a4cf256

Please sign in to comment.