Skip to content

Commit

Permalink
[Bugfix] Fix k_proj's bias for whisper self attention (vllm-project#1…
Browse files Browse the repository at this point in the history
…2342)

Signed-off-by: Isotr0py <[email protected]>
  • Loading branch information
Isotr0py authored Jan 23, 2025
1 parent 8ae5ff2 commit c5b4b11
Showing 1 changed file with 18 additions and 3 deletions.
21 changes: 18 additions & 3 deletions vllm/model_executor/models/whisper.py
Original file line number Diff line number Diff line change
Expand Up @@ -729,7 +729,22 @@ def sample(
def load_weights(self, weights: Iterable[Tuple[str,
torch.Tensor]]) -> Set[str]:
loader = AutoWeightsLoader(self, skip_prefixes=["proj_out."])
loaded_weights = [(name, loaded_weight)
for name, loaded_weight in weights]
mapper = WeightsMapper({".fc1.": ".mlp.fc1.", ".fc2.": ".mlp.fc2."})
return loader.load_weights(loaded_weights, mapper=mapper)
# add fake zeros bias for k_proj to state_dict
weights = _create_fake_bias_for_k_proj(weights)
return loader.load_weights(weights, mapper=mapper)


def _create_fake_bias_for_k_proj(
weights: Iterable[Tuple[str, torch.Tensor]]
) -> Iterable[Tuple[str, torch.Tensor]]:
"""
Create full zeros bias for k_proj weight in self-attention layers.
So that the bias for k_proj in qkv_proj can be initialized with zeros.
"""
for name, weight in weights:
if ".self_attn.k_proj.weight" in name:
bias = torch.zeros(weight.size(0))
bias_name = name.replace("weight", "bias")
yield from [(name, weight), (bias_name, bias)]
yield name, weight

0 comments on commit c5b4b11

Please sign in to comment.