From 15f549dfad6828285ebc2754ec01a0bf07890e0e Mon Sep 17 00:00:00 2001 From: Yu Shi Jie Date: Tue, 7 Jan 2025 20:36:55 -0500 Subject: [PATCH] fix: post_feedforward_layernorm --- litgpt/scripts/convert_hf_checkpoint.py | 1 + litgpt/scripts/convert_lit_checkpoint.py | 1 + 2 files changed, 2 insertions(+) diff --git a/litgpt/scripts/convert_hf_checkpoint.py b/litgpt/scripts/convert_hf_checkpoint.py index 733709469d..449c07a54b 100644 --- a/litgpt/scripts/convert_hf_checkpoint.py +++ b/litgpt/scripts/convert_hf_checkpoint.py @@ -471,6 +471,7 @@ def copy_weights_olmo2( "model.layers.{}.self_attn.rotary_emb.inv_freq": None, "model.layers.{}.post_attention_layernorm.weight": "transformer.h.{}.norm_2.weight", "model.layers.{}.post_attention_layernorm.bias": "transformer.h.{}.norm_2.bias", + "model.layers.{}.post_feedforward_layernorm.weight": "transformer.h.{}.post_mlp_norm.weight", "model.norm.weight": "transformer.ln_f.weight", "model.norm.bias": "transformer.ln_f.bias", "lm_head.weight": "lm_head.weight", diff --git a/litgpt/scripts/convert_lit_checkpoint.py b/litgpt/scripts/convert_lit_checkpoint.py index b7698865df..afb6608f94 100644 --- a/litgpt/scripts/convert_lit_checkpoint.py +++ b/litgpt/scripts/convert_lit_checkpoint.py @@ -355,6 +355,7 @@ def copy_weights_olmo2( "transformer.h.{}.attn.k_norm.weight": "model.layers.{}.self_attn.k_norm.weight", "transformer.h.{}.norm_2.weight": "model.layers.{}.post_attention_layernorm.weight", "transformer.h.{}.norm_2.bias": "model.layers.{}.post_attention_layernorm.bias", + "transformer.h.{}.post_mlp_norm.weight": "model.layers.{}.post_feedforward_layernorm.weight", "transformer.ln_f.weight": "model.norm.weight", "transformer.ln_f.bias": "model.norm.bias", "lm_head.weight": "lm_head.weight",