Update Gemma2 attention scale (#694)

* Update Gemma2 attention scale * remove import
TransformerLensOrg · Aug 11, 2024 · e30f96b · e30f96b
1 parent 464325a
commit e30f96b
Showing 1 changed file with 0 additions and 3 deletions.
diff --git a/transformer_lens/loading_from_pretrained.py b/transformer_lens/loading_from_pretrained.py
@@ -5,7 +5,6 @@
 
 import dataclasses
 import logging
-import math
 import os
 import re
 from pathlib import Path
@@ -1246,7 +1245,6 @@ def convert_hf_model_config(model_name: str, **kwargs):
             "rotary_base": 10000.0,
             "positional_embedding_type": "rotary",
             "use_attn_scale": True,
-            "attn_scale": math.sqrt(224),
             "n_key_value_heads": 4,
             "window_size": 4096,
             "use_local_attn": True,
@@ -1274,7 +1272,6 @@ def convert_hf_model_config(model_name: str, **kwargs):
             "rotary_base": 10000.0,
             "positional_embedding_type": "rotary",
             "use_attn_scale": True,
-            "attn_scale": math.sqrt(224),
             "n_key_value_heads": 8,
             "window_size": 4096,
             "use_local_attn": True,