From 5f739821122a71e094795dd43c55122a352a1847 Mon Sep 17 00:00:00 2001
From: Neel Nanda <77788841+neelnanda-io@users.noreply.github.com>
Date: Sat, 4 Feb 2023 22:00:49 +0000
Subject: [PATCH] Removed broken import in demo

---
 activation_patching_in_TL_demo.py.ipynb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/activation_patching_in_TL_demo.py.ipynb b/activation_patching_in_TL_demo.py.ipynb
index a27b3e3d5..27226623d 100644
--- a/activation_patching_in_TL_demo.py.ipynb
+++ b/activation_patching_in_TL_demo.py.ipynb
@@ -1 +1 @@
-{"cells":[{"cell_type":"markdown","metadata":{},"source":[" # Activation Patching in TransformerLens Demo\n"," This is an accompaniment to [Exploratory Analysis Demo](https://neelnanda.io/exploratory-analysis-demo). That notebook explains some basic techniques for mech interp of networks, including an overview of activation patching ([summary here](https://dynalist.io/d/n2ZWtnoYHrU1s4vnFSAQ519J#z=qeWBvs-R-taFfcCq-S_hgMqx)). This demonstrates how to use the Activation Patching utils in TransformerLens.\n"]},{"cell_type":"markdown","metadata":{},"source":[" <b style=\"color: red\">To use this notebook, go to Runtime > Change Runtime Type and select GPU as the hardware accelerator.</b>\n","\n"," **Tips for reading this Colab:**\n"," * You can run all this code for yourself!\n"," * The graphs are interactive!\n"," * Use the table of contents pane in the sidebar to navigate\n"," * Collapse irrelevant sections with the dropdown arrows\n"," * Search the page using the search in the sidebar, not CTRL+F"]},{"cell_type":"markdown","metadata":{},"source":[" ## Setup (Ignore)"]},{"cell_type":"code","execution_count":1,"metadata":{},"outputs":[{"name":"stdout","output_type":"stream","text":["Running as a Jupyter notebook - intended for development only!\n"]}],"source":["# Janky code to do different setup when run in a Colab notebook vs VSCode\n","DEBUG_MODE = False\n","try:\n","    import google.colab\n","    IN_COLAB = True\n","    print(\"Running as a Colab notebook\")\n","    %pip install transformer_lens\n","    # Install my janky personal plotting utils\n","    %pip install git+https://github.com/neelnanda-io/neel-plotly.git\n","except:\n","    IN_COLAB = False\n","    print(\"Running as a Jupyter notebook - intended for development only!\")\n","    from IPython import get_ipython\n","\n","    ipython = get_ipython()\n","    # Code to automatically update the HookedTransformer code as its edited without restarting the kernel\n","    ipython.magic(\"load_ext autoreload\")\n","    ipython.magic(\"autoreload 2\")"]},{"cell_type":"code","execution_count":2,"metadata":{},"outputs":[],"source":["# Plotly needs a different renderer for VSCode/Notebooks vs Colab argh\n","import plotly.io as pio\n","\n","if IN_COLAB or not DEBUG_MODE:\n","    # Thanks to annoying rendering issues, Plotly graphics will either show up in colab OR Vscode depending on the renderer - this is bad for developing demos! Thus creating a debug mode.\n","    pio.renderers.default = \"colab\"\n","else:\n","    pio.renderers.default = \"png\""]},{"cell_type":"code","execution_count":3,"metadata":{},"outputs":[],"source":["# Import stuff\n","import torch\n","import torch.nn as nn\n","import torch.nn.functional as F\n","import torch.optim as optim\n","import numpy as np\n","import einops\n","from fancy_einsum import einsum\n","import tqdm.notebook as tqdm\n","import random\n","from pathlib import Path\n","import plotly.express as px\n","from torch.utils.data import DataLoader\n","\n","from torchtyping import TensorType as TT\n","from typing import List, Union, Optional\n","from functools import partial\n","import copy\n","\n","import itertools\n","from transformers import AutoModelForCausalLM, AutoConfig, AutoTokenizer\n","import dataclasses\n","import datasets\n","from IPython.display import HTML"]},{"cell_type":"code","execution_count":4,"metadata":{},"outputs":[],"source":["import pysvelte\n","\n","import transformer_lens\n","import transformer_lens.utils as utils\n","from transformer_lens.hook_points import (\n","    HookedRootModule,\n","    HookPoint,\n",")  # Hooking utilities\n","from transformer_lens import HookedTransformer, HookedTransformerConfig, FactoredMatrix, ActivationCache"]},{"cell_type":"markdown","metadata":{},"source":[" We turn automatic differentiation off, to save GPU memory, as this notebook focuses on model inference not model training."]},{"cell_type":"code","execution_count":5,"metadata":{},"outputs":[{"data":{"text/plain":["<torch.autograd.grad_mode.set_grad_enabled at 0x7f6b307d0990>"]},"execution_count":5,"metadata":{},"output_type":"execute_result"}],"source":["torch.set_grad_enabled(False)"]},{"cell_type":"markdown","metadata":{},"source":[" Plotting helper functions from a janky personal library of plotting utils. The library is not documented and I recommend against trying to read it, just use your preferred plotting library if you want to do anything non-obvious:"]},{"cell_type":"code","execution_count":6,"metadata":{},"outputs":[],"source":["from neel_plotly import line, imshow, scatter"]},{"cell_type":"code","execution_count":7,"metadata":{},"outputs":[],"source":["import transformer_lens.patching as patching"]},{"cell_type":"markdown","metadata":{},"source":[" ## Activation Patching Setup\n"," This just copies the relevant set up from Exploratory Analysis Demo, and isn't very important."]},{"cell_type":"code","execution_count":8,"metadata":{},"outputs":[{"name":"stderr","output_type":"stream","text":["Using pad_token, but it is not set yet.\n"]},{"name":"stdout","output_type":"stream","text":["Loaded pretrained model gpt2-small into HookedTransformer\n"]}],"source":["model = HookedTransformer.from_pretrained(\"gpt2-small\")"]},{"cell_type":"code","execution_count":9,"metadata":{},"outputs":[{"name":"stdout","output_type":"stream","text":["Clean string 0 <|endoftext|>When John and Mary went to the shops, John gave the bag to\n","Corrupted string 0 <|endoftext|>When John and Mary went to the shops, Mary gave the bag to\n","Answer token indices tensor([[ 5335,  1757],\n","        [ 1757,  5335],\n","        [ 4186,  3700],\n","        [ 3700,  4186],\n","        [ 6035, 15686],\n","        [15686,  6035],\n","        [ 5780, 14235],\n","        [14235,  5780]], device='cuda:0')\n"]}],"source":["prompts = ['When John and Mary went to the shops, John gave the bag to', 'When John and Mary went to the shops, Mary gave the bag to', 'When Tom and James went to the park, James gave the ball to', 'When Tom and James went to the park, Tom gave the ball to', 'When Dan and Sid went to the shops, Sid gave an apple to', 'When Dan and Sid went to the shops, Dan gave an apple to', 'After Martin and Amy went to the park, Amy gave a drink to', 'After Martin and Amy went to the park, Martin gave a drink to']\n","answers = [(' Mary', ' John'), (' John', ' Mary'), (' Tom', ' James'), (' James', ' Tom'), (' Dan', ' Sid'), (' Sid', ' Dan'), (' Martin', ' Amy'), (' Amy', ' Martin')]\n","\n","clean_tokens = model.to_tokens(prompts)\n","# Swap each adjacent pair, with a hacky list comprehension\n","corrupted_tokens = clean_tokens[\n","    [(i+1 if i%2==0 else i-1) for i in range(len(clean_tokens)) ]\n","    ]\n","print(\"Clean string 0\", model.to_string(clean_tokens[0]))\n","print(\"Corrupted string 0\", model.to_string(corrupted_tokens[0]))\n","\n","answer_token_indices = torch.tensor([[model.to_single_token(answers[i][j]) for j in range(2)] for i in range(len(answers))], device=model.cfg.device)\n","print(\"Answer token indices\", answer_token_indices)"]},{"cell_type":"code","execution_count":10,"metadata":{},"outputs":[{"name":"stdout","output_type":"stream","text":["Clean logit diff: 3.5519\n","Corrupted logit diff: -3.5519\n"]}],"source":["def get_logit_diff(logits, answer_token_indices=answer_token_indices):\n","    if len(logits.shape)==3:\n","        # Get final logits only\n","        logits = logits[:, -1, :]\n","    correct_logits = logits.gather(1, answer_token_indices[:, 0].unsqueeze(1))\n","    incorrect_logits = logits.gather(1, answer_token_indices[:, 1].unsqueeze(1))\n","    return (correct_logits - incorrect_logits).mean()\n","\n","clean_logits, clean_cache = model.run_with_cache(clean_tokens)\n","corrupted_logits, corrupted_cache = model.run_with_cache(corrupted_tokens)\n","\n","clean_logit_diff = get_logit_diff(clean_logits, answer_token_indices).item()\n","print(f\"Clean logit diff: {clean_logit_diff:.4f}\")\n","\n","corrupted_logit_diff = get_logit_diff(corrupted_logits, answer_token_indices).item()\n","print(f\"Corrupted logit diff: {corrupted_logit_diff:.4f}\")"]},{"cell_type":"code","execution_count":11,"metadata":{},"outputs":[{"name":"stdout","output_type":"stream","text":["Clean Baseline is 1: 1.0000\n","Corrupted Baseline is 0: 0.0000\n"]}],"source":["CLEAN_BASELINE = clean_logit_diff\n","CORRUPTED_BASELINE = corrupted_logit_diff\n","def ioi_metric(logits, answer_token_indices=answer_token_indices):\n","    return (get_logit_diff(logits, answer_token_indices) - CORRUPTED_BASELINE) / (CLEAN_BASELINE  - CORRUPTED_BASELINE)\n","\n","print(f\"Clean Baseline is 1: {ioi_metric(clean_logits).item():.4f}\")\n","print(f\"Corrupted Baseline is 0: {ioi_metric(corrupted_logits).item():.4f}\")"]},{"cell_type":"markdown","metadata":{},"source":[" ## Patching\n"," In the following cells, we use the patching module to call activation patching utilities"]},{"cell_type":"code","execution_count":12,"metadata":{},"outputs":[],"source":["# Whether to do the runs by head and by position, which are much slower\n","DO_SLOW_RUNS = False"]},{"cell_type":"markdown","metadata":{},"source":[" ### Patching Single Activation Types\n"," We start by patching single types of activation\n"," The general syntax is that the functions are called get_act_patch_... and take in (model, corrupted_tokens, clean_cache, patching_metric)"]},{"cell_type":"markdown","metadata":{},"source":[" We can patch the residual stream at the start of each block over each layer and position\n"," resid_pre -> attn_out, mlp_out, resid_mid all also work"]},{"cell_type":"code","execution_count":13,"metadata":{},"outputs":[{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"3f4b44886e5a47b1b3f3b51ec96e9b85","version_major":2,"version_minor":0},"text/plain":["  0%|          | 0/180 [00:00<?, ?it/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["<html>\n","<head><meta charset=\"utf-8\" /></head>\n","<body>\n","    <div>            <script src=\"https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js?config=TeX-AMS-MML_SVG\"></script><script type=\"text/javascript\">if (window.MathJax && window.MathJax.Hub && window.MathJax.Hub.Config) {window.MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}</script>                <script type=\"text/javascript\">window.PlotlyConfig = {MathJaxConfig: 'local'};</script>\n","        <script src=\"https://cdn.plot.ly/plotly-2.16.1.min.js\"></script>                <div id=\"1c8d77d6-ef47-4c62-a969-9532ea08d5fa\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div>            <script type=\"text/javascript\">                                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById(\"1c8d77d6-ef47-4c62-a969-9532ea08d5fa\")) {                    Plotly.newPlot(                        \"1c8d77d6-ef47-4c62-a969-9532ea08d5fa\",                        [{\"coloraxis\":\"coloraxis\",\"name\":\"0\",\"x\":[\"<|endoftext|> 0\",\"When 1\",\" John 2\",\" and 3\",\" Mary 4\",\" went 5\",\" to 6\",\" the 7\",\" shops 8\",\", 9\",\" John 10\",\" gave 11\",\" the 12\",\" bag 13\",\" to 14\"],\"z\":[[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.99999994,0.0,0.0,0.0,0.0],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0006502,-0.00024795762,0.000008927548,-0.00036488837,-0.000048866583],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.001051,-0.000026782645,-0.00002094282,-0.00045913106,-0.0005944539],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.000266,0.0008683216,0.00051635463,-0.0009931058,-0.0008661065],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9949083,0.005430064,0.0016051463,-0.0006189208,-0.0016321304],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.96756655,0.031341735,0.0028421823,-0.0012302229,-0.000986192],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9675207,0.031001415,0.0017822877,-0.0004862829,-0.0006460726],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9228318,0.051346023,0.00472905,0.0009341035,0.017046316],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.65654737,0.02385639,0.002356403,-0.000017116728,0.31869277],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027301382,0.031425238,0.0018207164,0.0007996197,0.9383891],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.026841983,0.020980678,0.0012512665,0.0003241103,1.0048288],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.0056879222,0.014263134,0.00048705481,-0.000090651534,0.99142164]],\"type\":\"heatmap\",\"xaxis\":\"x\",\"yaxis\":\"y\",\"hovertemplate\":\"x: %{x}<br>y: %{y}<br>color: %{z}<extra></extra>\"}],                        {\"template\":{\"data\":{\"histogram2dcontour\":[{\"type\":\"histogram2dcontour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"choropleth\":[{\"type\":\"choropleth\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"histogram2d\":[{\"type\":\"histogram2d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmap\":[{\"type\":\"heatmap\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmapgl\":[{\"type\":\"heatmapgl\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"contourcarpet\":[{\"type\":\"contourcarpet\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"contour\":[{\"type\":\"contour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"surface\":[{\"type\":\"surface\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"mesh3d\":[{\"type\":\"mesh3d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"scatter\":[{\"fillpattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2},\"type\":\"scatter\"}],\"parcoords\":[{\"type\":\"parcoords\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolargl\":[{\"type\":\"scatterpolargl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"bar\":[{\"error_x\":{\"color\":\"#2a3f5f\"},\"error_y\":{\"color\":\"#2a3f5f\"},\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"bar\"}],\"scattergeo\":[{\"type\":\"scattergeo\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolar\":[{\"type\":\"scatterpolar\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"histogram\":[{\"marker\":{\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"histogram\"}],\"scattergl\":[{\"type\":\"scattergl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatter3d\":[{\"type\":\"scatter3d\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattermapbox\":[{\"type\":\"scattermapbox\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterternary\":[{\"type\":\"scatterternary\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattercarpet\":[{\"type\":\"scattercarpet\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"carpet\":[{\"aaxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"baxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"type\":\"carpet\"}],\"table\":[{\"cells\":{\"fill\":{\"color\":\"#EBF0F8\"},\"line\":{\"color\":\"white\"}},\"header\":{\"fill\":{\"color\":\"#C8D4E3\"},\"line\":{\"color\":\"white\"}},\"type\":\"table\"}],\"barpolar\":[{\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"barpolar\"}],\"pie\":[{\"automargin\":true,\"type\":\"pie\"}]},\"layout\":{\"autotypenumbers\":\"strict\",\"colorway\":[\"#636efa\",\"#EF553B\",\"#00cc96\",\"#ab63fa\",\"#FFA15A\",\"#19d3f3\",\"#FF6692\",\"#B6E880\",\"#FF97FF\",\"#FECB52\"],\"font\":{\"color\":\"#2a3f5f\"},\"hovermode\":\"closest\",\"hoverlabel\":{\"align\":\"left\"},\"paper_bgcolor\":\"white\",\"plot_bgcolor\":\"#E5ECF6\",\"polar\":{\"bgcolor\":\"#E5ECF6\",\"angularaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"radialaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"ternary\":{\"bgcolor\":\"#E5ECF6\",\"aaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"baxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"caxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"coloraxis\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"colorscale\":{\"sequential\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"sequentialminus\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"diverging\":[[0,\"#8e0152\"],[0.1,\"#c51b7d\"],[0.2,\"#de77ae\"],[0.3,\"#f1b6da\"],[0.4,\"#fde0ef\"],[0.5,\"#f7f7f7\"],[0.6,\"#e6f5d0\"],[0.7,\"#b8e186\"],[0.8,\"#7fbc41\"],[0.9,\"#4d9221\"],[1,\"#276419\"]]},\"xaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"yaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"scene\":{\"xaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"yaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"zaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2}},\"shapedefaults\":{\"line\":{\"color\":\"#2a3f5f\"}},\"annotationdefaults\":{\"arrowcolor\":\"#2a3f5f\",\"arrowhead\":0,\"arrowwidth\":1},\"geo\":{\"bgcolor\":\"white\",\"landcolor\":\"#E5ECF6\",\"subunitcolor\":\"white\",\"showland\":true,\"showlakes\":true,\"lakecolor\":\"white\"},\"title\":{\"x\":0.05},\"mapbox\":{\"style\":\"light\"}}},\"xaxis\":{\"anchor\":\"y\",\"domain\":[0.0,1.0],\"title\":{\"text\":\"Position\"}},\"yaxis\":{\"anchor\":\"x\",\"domain\":[0.0,1.0],\"autorange\":\"reversed\",\"title\":{\"text\":\"Layer\"}},\"coloraxis\":{\"colorscale\":[[0.0,\"rgb(103,0,31)\"],[0.1,\"rgb(178,24,43)\"],[0.2,\"rgb(214,96,77)\"],[0.3,\"rgb(244,165,130)\"],[0.4,\"rgb(253,219,199)\"],[0.5,\"rgb(247,247,247)\"],[0.6,\"rgb(209,229,240)\"],[0.7,\"rgb(146,197,222)\"],[0.8,\"rgb(67,147,195)\"],[0.9,\"rgb(33,102,172)\"],[1.0,\"rgb(5,48,97)\"]],\"cmid\":0.0},\"title\":{\"text\":\"resid_pre Activation Patching\"}},                        {\"responsive\": true}                    ).then(function(){\n","                            \n","var gd = document.getElementById('1c8d77d6-ef47-4c62-a969-9532ea08d5fa');\n","var x = new MutationObserver(function (mutations, observer) {{\n","        var display = window.getComputedStyle(gd).display;\n","        if (!display || display === 'none') {{\n","            console.log([gd, 'removed!']);\n","            Plotly.purge(gd);\n","            observer.disconnect();\n","        }}\n","}});\n","\n","// Listen for the removal of the full notebook cells\n","var notebookContainer = gd.closest('#notebook-container');\n","if (notebookContainer) {{\n","    x.observe(notebookContainer, {childList: true});\n","}}\n","\n","// Listen for the clearing of the current output cell\n","var outputEl = gd.closest('.output');\n","if (outputEl) {{\n","    x.observe(outputEl, {childList: true});\n","}}\n","\n","                        })                };                            </script>        </div>\n","</body>\n","</html>"]},"metadata":{},"output_type":"display_data"}],"source":["resid_pre_act_patch_results = patching.get_act_patch_resid_pre(model, corrupted_tokens, clean_cache, ioi_metric)\n","imshow(resid_pre_act_patch_results, \n","       yaxis=\"Layer\", \n","       xaxis=\"Position\", \n","       x=[f\"{tok} {i}\" for i, tok in enumerate(model.to_str_tokens(clean_tokens[0]))],\n","       title=\"resid_pre Activation Patching\")"]},{"cell_type":"markdown","metadata":{},"source":[" We can patch head outputs over each head in each layer, patching across all positions at once\n"," out -> q, k, v, pattern all also work"]},{"cell_type":"code","execution_count":14,"metadata":{},"outputs":[{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"a99b6bfcba454ea1b862da644c1f3154","version_major":2,"version_minor":0},"text/plain":["  0%|          | 0/144 [00:00<?, ?it/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["<html>\n","<head><meta charset=\"utf-8\" /></head>\n","<body>\n","    <div>            <script src=\"https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js?config=TeX-AMS-MML_SVG\"></script><script type=\"text/javascript\">if (window.MathJax && window.MathJax.Hub && window.MathJax.Hub.Config) {window.MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}</script>                <script type=\"text/javascript\">window.PlotlyConfig = {MathJaxConfig: 'local'};</script>\n","        <script src=\"https://cdn.plot.ly/plotly-2.16.1.min.js\"></script>                <div id=\"3c5828c1-5f79-4630-8146-4fec588a81be\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div>            <script type=\"text/javascript\">                                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById(\"3c5828c1-5f79-4630-8146-4fec588a81be\")) {                    Plotly.newPlot(                        \"3c5828c1-5f79-4630-8146-4fec588a81be\",                        [{\"coloraxis\":\"coloraxis\",\"name\":\"0\",\"z\":[[0.000949475,0.01612389,0.0018556548,0.0034395899,-0.009822988,0.011059554,-0.004064585,-0.0015793706,-0.0012084075,0.0038287437,-0.0042575346,-0.0011430618],[-0.0010770785,-0.00037838038,0.0000025507281,-0.0002605099,-0.00014122979,0.0038325363,-0.0004281867,-0.0014292804,-0.0009218869,0.0006933953,0.0004336238,-0.0035704488],[-0.000496855,0.0008060301,0.00054236536,-0.0005317261,-0.0007156135,-0.0010388176,-0.0009492065,-0.00008638913,0.0002770225,0.0021078077,-0.00019734581,-0.0016404538],[0.11626182,0.00025030697,-0.0014675413,-0.00039700742,0.018962113,-0.00018828402,0.01116984,-0.0013302383,-0.000735281,-0.00030299966,-0.00014552576,-0.00022278597],[-0.0016504889,0.00029246113,-0.0014359257,0.030842163,-0.007431479,-0.00028242602,0.00601747,-0.011006527,-0.0012657653,0.0014903636,-0.00017975921,0.0029445807],[-0.004211588,0.0029607913,0.0020455497,0.0013399713,-0.0012184426,0.34349898,0.00056297256,-0.0001253213,-0.005152605,0.016240286,0.017090013,-0.004174804],[0.03977555,0.015227075,-0.0010225064,0.0008079767,-0.0049347864,-0.002123347,-0.014273841,0.0013753123,0.0014840539,0.13027029,-0.00033568926,0.001291541],[0.00037237274,0.019514346,0.00022292022,0.12424979,-0.00040388768,-0.0076521845,0.0013002673,-0.0011247369,-0.0074497373,0.19224228,-0.0032751013,-0.00050229207],[-0.0010082088,0.00003097792,-0.00085925974,0.012359889,-0.00040469316,-0.0043288204,0.31855473,0.0023298552,0.0021180105,0.00014116267,0.27793574,0.005737863],[0.005889967,-0.0009691425,0.009126103,0.020675799,-0.037004486,0.014263471,-0.048284512,0.05834116,0.00065117405,0.2636095,0.0004923241,-0.0026102003],[0.08374242,0.020676069,-0.0037430592,0.010851469,-0.0010961419,0.00047389843,0.048179664,-0.47991198,0.00018429011,0.011862027,0.06088635,0.00084617053],[0.0053282026,-0.0114937825,-0.11350821,0.0063299676,0.00031608893,-0.0011600779,-0.022669563,0.0040708277,0.0073162266,-0.008345379,-0.27817535,0.0036348547]],\"type\":\"heatmap\",\"xaxis\":\"x\",\"yaxis\":\"y\",\"hovertemplate\":\"x: %{x}<br>y: %{y}<br>color: %{z}<extra></extra>\"}],                        {\"template\":{\"data\":{\"histogram2dcontour\":[{\"type\":\"histogram2dcontour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"choropleth\":[{\"type\":\"choropleth\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"histogram2d\":[{\"type\":\"histogram2d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmap\":[{\"type\":\"heatmap\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmapgl\":[{\"type\":\"heatmapgl\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"contourcarpet\":[{\"type\":\"contourcarpet\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"contour\":[{\"type\":\"contour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"surface\":[{\"type\":\"surface\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"mesh3d\":[{\"type\":\"mesh3d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"scatter\":[{\"fillpattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2},\"type\":\"scatter\"}],\"parcoords\":[{\"type\":\"parcoords\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolargl\":[{\"type\":\"scatterpolargl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"bar\":[{\"error_x\":{\"color\":\"#2a3f5f\"},\"error_y\":{\"color\":\"#2a3f5f\"},\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"bar\"}],\"scattergeo\":[{\"type\":\"scattergeo\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolar\":[{\"type\":\"scatterpolar\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"histogram\":[{\"marker\":{\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"histogram\"}],\"scattergl\":[{\"type\":\"scattergl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatter3d\":[{\"type\":\"scatter3d\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattermapbox\":[{\"type\":\"scattermapbox\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterternary\":[{\"type\":\"scatterternary\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattercarpet\":[{\"type\":\"scattercarpet\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"carpet\":[{\"aaxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"baxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"type\":\"carpet\"}],\"table\":[{\"cells\":{\"fill\":{\"color\":\"#EBF0F8\"},\"line\":{\"color\":\"white\"}},\"header\":{\"fill\":{\"color\":\"#C8D4E3\"},\"line\":{\"color\":\"white\"}},\"type\":\"table\"}],\"barpolar\":[{\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"barpolar\"}],\"pie\":[{\"automargin\":true,\"type\":\"pie\"}]},\"layout\":{\"autotypenumbers\":\"strict\",\"colorway\":[\"#636efa\",\"#EF553B\",\"#00cc96\",\"#ab63fa\",\"#FFA15A\",\"#19d3f3\",\"#FF6692\",\"#B6E880\",\"#FF97FF\",\"#FECB52\"],\"font\":{\"color\":\"#2a3f5f\"},\"hovermode\":\"closest\",\"hoverlabel\":{\"align\":\"left\"},\"paper_bgcolor\":\"white\",\"plot_bgcolor\":\"#E5ECF6\",\"polar\":{\"bgcolor\":\"#E5ECF6\",\"angularaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"radialaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"ternary\":{\"bgcolor\":\"#E5ECF6\",\"aaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"baxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"caxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"coloraxis\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"colorscale\":{\"sequential\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"sequentialminus\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"diverging\":[[0,\"#8e0152\"],[0.1,\"#c51b7d\"],[0.2,\"#de77ae\"],[0.3,\"#f1b6da\"],[0.4,\"#fde0ef\"],[0.5,\"#f7f7f7\"],[0.6,\"#e6f5d0\"],[0.7,\"#b8e186\"],[0.8,\"#7fbc41\"],[0.9,\"#4d9221\"],[1,\"#276419\"]]},\"xaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"yaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"scene\":{\"xaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"yaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"zaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2}},\"shapedefaults\":{\"line\":{\"color\":\"#2a3f5f\"}},\"annotationdefaults\":{\"arrowcolor\":\"#2a3f5f\",\"arrowhead\":0,\"arrowwidth\":1},\"geo\":{\"bgcolor\":\"white\",\"landcolor\":\"#E5ECF6\",\"subunitcolor\":\"white\",\"showland\":true,\"showlakes\":true,\"lakecolor\":\"white\"},\"title\":{\"x\":0.05},\"mapbox\":{\"style\":\"light\"}}},\"xaxis\":{\"anchor\":\"y\",\"domain\":[0.0,1.0],\"title\":{\"text\":\"Head\"}},\"yaxis\":{\"anchor\":\"x\",\"domain\":[0.0,1.0],\"autorange\":\"reversed\",\"title\":{\"text\":\"Layer\"}},\"coloraxis\":{\"colorscale\":[[0.0,\"rgb(103,0,31)\"],[0.1,\"rgb(178,24,43)\"],[0.2,\"rgb(214,96,77)\"],[0.3,\"rgb(244,165,130)\"],[0.4,\"rgb(253,219,199)\"],[0.5,\"rgb(247,247,247)\"],[0.6,\"rgb(209,229,240)\"],[0.7,\"rgb(146,197,222)\"],[0.8,\"rgb(67,147,195)\"],[0.9,\"rgb(33,102,172)\"],[1.0,\"rgb(5,48,97)\"]],\"cmid\":0.0},\"title\":{\"text\":\"attn_head_out Activation Patching (All Pos)\"}},                        {\"responsive\": true}                    ).then(function(){\n","                            \n","var gd = document.getElementById('3c5828c1-5f79-4630-8146-4fec588a81be');\n","var x = new MutationObserver(function (mutations, observer) {{\n","        var display = window.getComputedStyle(gd).display;\n","        if (!display || display === 'none') {{\n","            console.log([gd, 'removed!']);\n","            Plotly.purge(gd);\n","            observer.disconnect();\n","        }}\n","}});\n","\n","// Listen for the removal of the full notebook cells\n","var notebookContainer = gd.closest('#notebook-container');\n","if (notebookContainer) {{\n","    x.observe(notebookContainer, {childList: true});\n","}}\n","\n","// Listen for the clearing of the current output cell\n","var outputEl = gd.closest('.output');\n","if (outputEl) {{\n","    x.observe(outputEl, {childList: true});\n","}}\n","\n","                        })                };                            </script>        </div>\n","</body>\n","</html>"]},"metadata":{},"output_type":"display_data"}],"source":["attn_head_out_all_pos_act_patch_results = patching.get_act_patch_attn_head_out_all_pos(model, corrupted_tokens, clean_cache, ioi_metric)\n","imshow(attn_head_out_all_pos_act_patch_results, \n","       yaxis=\"Layer\", \n","       xaxis=\"Head\", \n","       title=\"attn_head_out Activation Patching (All Pos)\")"]},{"cell_type":"markdown","metadata":{},"source":[" We can patch head outputs over each head in each layer, patching on each position in turn\n"," out -> q, k, v, pattern all also work, though note that pattern has output shape [layer, pos, head]\n"," We reshape it to plot nicely"]},{"cell_type":"code","execution_count":15,"metadata":{},"outputs":[],"source":["ALL_HEAD_LABELS = [f\"L{i}H{j}\" for i in range(model.cfg.n_layers) for j in range(model.cfg.n_heads)]\n","if DO_SLOW_RUNS:\n","    attn_head_out_act_patch_results = patching.get_act_patch_attn_head_out_by_pos(model, corrupted_tokens, clean_cache, ioi_metric)\n","    attn_head_out_act_patch_results = einops.rearrange(attn_head_out_act_patch_results, \"layer pos head -> (layer head) pos\")\n","    imshow(attn_head_out_act_patch_results, \n","        yaxis=\"Head Label\", \n","        xaxis=\"Pos\", \n","        x=[f\"{tok} {i}\" for i, tok in enumerate(model.to_str_tokens(clean_tokens[0]))],\n","        y=ALL_HEAD_LABELS,\n","        title=\"attn_head_out Activation Patching By Pos\")"]},{"cell_type":"markdown","metadata":{},"source":[" ### Patching multiple activation types\n"," Some utilities are provided to patch multiple activations types *in turn*. Note that this is *not* a utility to patch multiple activations at once, it's just a useful scan to get a sense for what's going on in a model\n"," By block: We patch the residual stream at the start of each block, attention output and MLP output over each layer and position"]},{"cell_type":"code","execution_count":16,"metadata":{},"outputs":[{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"273f73067c394332989ab06fb5f15186","version_major":2,"version_minor":0},"text/plain":["  0%|          | 0/180 [00:00<?, ?it/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"a3ef4fefd564453ba35fee5a836d6657","version_major":2,"version_minor":0},"text/plain":["  0%|          | 0/180 [00:00<?, ?it/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"c145d75160f741fd98be311cda941ccc","version_major":2,"version_minor":0},"text/plain":["  0%|          | 0/180 [00:00<?, ?it/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["<html>\n","<head><meta charset=\"utf-8\" /></head>\n","<body>\n","    <div>            <script src=\"https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js?config=TeX-AMS-MML_SVG\"></script><script type=\"text/javascript\">if (window.MathJax && window.MathJax.Hub && window.MathJax.Hub.Config) {window.MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}</script>                <script type=\"text/javascript\">window.PlotlyConfig = {MathJaxConfig: 'local'};</script>\n","        <script src=\"https://cdn.plot.ly/plotly-2.16.1.min.js\"></script>                <div id=\"bbb2904f-6b80-4410-a47c-034ec495c09e\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div>            <script type=\"text/javascript\">                                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById(\"bbb2904f-6b80-4410-a47c-034ec495c09e\")) {                    Plotly.newPlot(                        \"bbb2904f-6b80-4410-a47c-034ec495c09e\",                        [{\"coloraxis\":\"coloraxis\",\"name\":\"0\",\"x\":[\"<|endoftext|>_0\",\"When_1\",\" John_2\",\" and_3\",\" Mary_4\",\" went_5\",\" to_6\",\" the_7\",\" shops_8\",\",_9\",\" John_10\",\" gave_11\",\" the_12\",\" bag_13\",\" to_14\"],\"z\":[[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.99999994,0.0,0.0,0.0,0.0],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0006502,-0.00024795762,0.000008927548,-0.00036488837,-0.000048866583],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.001051,-0.000026782645,-0.00002094282,-0.00045913106,-0.0005944539],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.000266,0.0008683216,0.00051635463,-0.0009931058,-0.0008661065],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9949083,0.005430064,0.0016051463,-0.0006189208,-0.0016321304],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.96756655,0.031341735,0.0028421823,-0.0012302229,-0.000986192],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9675207,0.031001415,0.0017822877,-0.0004862829,-0.0006460726],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9228318,0.051346023,0.00472905,0.0009341035,0.017046316],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.65654737,0.02385639,0.002356403,-0.000017116728,0.31869277],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027301382,0.031425238,0.0018207164,0.0007996197,0.9383891],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.026841983,0.020980678,0.0012512665,0.0003241103,1.0048288],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.0056879222,0.014263134,0.00048705481,-0.000090651534,0.99142164]],\"type\":\"heatmap\",\"xaxis\":\"x\",\"yaxis\":\"y\",\"hovertemplate\":\"x: %{x}<br>y: %{y}<br>color: %{z}<extra></extra>\"},{\"coloraxis\":\"coloraxis\",\"name\":\"1\",\"x\":[\"<|endoftext|>_0\",\"When_1\",\" John_2\",\" and_3\",\" Mary_4\",\" went_5\",\" to_6\",\" the_7\",\" shops_8\",\",_9\",\" John_10\",\" gave_11\",\" the_12\",\" bag_13\",\" to_14\"],\"z\":[[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035457034,-0.00024795762,0.000008927548,-0.00036488837,-0.000048866583],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.002984419,0.00007954244,0.000021077069,0.00008041506,-0.00059626624],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.0019119721,0.0006667469,0.00039509436,-0.00070453796,-0.00027272655],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.15463124,0.0038024643,0.0005173951,-0.00012028697,-0.0005607575],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.0054061003,0.019582007,0.0010079069,-0.00024272192,0.00079287373],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.35209733,0.0010533165,0.00022416202,0.00013310774,0.000082730854],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.11985986,0.021244075,0.0027277018,0.0013412467,0.017973103],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.013310539,0.011509422,0.00037495705,-0.000040543153,0.29760385],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.0015007679,0.017352335,0.00058451947,0.0010121021,0.5697317],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.00012867752,0.0063013053,0.00014149828,0.00031239708,0.27152342],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.0009372919,0.00008689257,0.0003319974,9.733042e-7,-0.19297722],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.40617895]],\"type\":\"heatmap\",\"xaxis\":\"x2\",\"yaxis\":\"y2\",\"hovertemplate\":\"x: %{x}<br>y: %{y}<br>color: %{z}<extra></extra>\"},{\"coloraxis\":\"coloraxis\",\"name\":\"2\",\"x\":[\"<|endoftext|>_0\",\"When_1\",\" John_2\",\" and_3\",\" Mary_4\",\" went_5\",\" to_6\",\" the_7\",\" shops_8\",\",_9\",\" John_10\",\" gave_11\",\" the_12\",\" bag_13\",\" to_14\"],\"z\":[[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.8507897,-0.00027833143,-0.00007276288,-0.00047356283,0.000039939034],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.008865123,0.00022137635,0.00015015734,-0.000048598085,0.00030407365],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.013549603,0.000058264002,-0.00032958091,-0.00063821906,0.0007726693],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0019467425,0.0004991037,0.00017318102,0.00016834805,0.0004076802],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.019786568,0.004128219,-0.000049067956,-0.00016962342,0.0007913634],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.096524656,-0.0018828737,-0.00048349722,0.00070940447,-0.00018378667],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.015899561,-0.00085006375,0.00012364319,0.000028091572,-0.0072379597],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0103607895,0.003151022,0.0005308871,0.00023580811,0.008497046],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.012533204,0.000022184622,-0.00035351078,0.00008618776,-0.021631954],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.00033427964,0.0008096548,0.000016479047,0.00012914739,0.031623926],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0013593368,-0.00019445946,-0.00009867291,-0.00014169967,0.028763823],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.020447912]],\"type\":\"heatmap\",\"xaxis\":\"x3\",\"yaxis\":\"y3\",\"hovertemplate\":\"x: %{x}<br>y: %{y}<br>color: %{z}<extra></extra>\"}],                        {\"template\":{\"data\":{\"histogram2dcontour\":[{\"type\":\"histogram2dcontour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"choropleth\":[{\"type\":\"choropleth\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"histogram2d\":[{\"type\":\"histogram2d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmap\":[{\"type\":\"heatmap\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmapgl\":[{\"type\":\"heatmapgl\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"contourcarpet\":[{\"type\":\"contourcarpet\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"contour\":[{\"type\":\"contour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"surface\":[{\"type\":\"surface\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"mesh3d\":[{\"type\":\"mesh3d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"scatter\":[{\"fillpattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2},\"type\":\"scatter\"}],\"parcoords\":[{\"type\":\"parcoords\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolargl\":[{\"type\":\"scatterpolargl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"bar\":[{\"error_x\":{\"color\":\"#2a3f5f\"},\"error_y\":{\"color\":\"#2a3f5f\"},\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"bar\"}],\"scattergeo\":[{\"type\":\"scattergeo\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolar\":[{\"type\":\"scatterpolar\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"histogram\":[{\"marker\":{\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"histogram\"}],\"scattergl\":[{\"type\":\"scattergl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatter3d\":[{\"type\":\"scatter3d\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattermapbox\":[{\"type\":\"scattermapbox\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterternary\":[{\"type\":\"scatterternary\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattercarpet\":[{\"type\":\"scattercarpet\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"carpet\":[{\"aaxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"baxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"type\":\"carpet\"}],\"table\":[{\"cells\":{\"fill\":{\"color\":\"#EBF0F8\"},\"line\":{\"color\":\"white\"}},\"header\":{\"fill\":{\"color\":\"#C8D4E3\"},\"line\":{\"color\":\"white\"}},\"type\":\"table\"}],\"barpolar\":[{\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"barpolar\"}],\"pie\":[{\"automargin\":true,\"type\":\"pie\"}]},\"layout\":{\"autotypenumbers\":\"strict\",\"colorway\":[\"#636efa\",\"#EF553B\",\"#00cc96\",\"#ab63fa\",\"#FFA15A\",\"#19d3f3\",\"#FF6692\",\"#B6E880\",\"#FF97FF\",\"#FECB52\"],\"font\":{\"color\":\"#2a3f5f\"},\"hovermode\":\"closest\",\"hoverlabel\":{\"align\":\"left\"},\"paper_bgcolor\":\"white\",\"plot_bgcolor\":\"#E5ECF6\",\"polar\":{\"bgcolor\":\"#E5ECF6\",\"angularaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"radialaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"ternary\":{\"bgcolor\":\"#E5ECF6\",\"aaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"baxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"caxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"coloraxis\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"colorscale\":{\"sequential\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"sequentialminus\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"diverging\":[[0,\"#8e0152\"],[0.1,\"#c51b7d\"],[0.2,\"#de77ae\"],[0.3,\"#f1b6da\"],[0.4,\"#fde0ef\"],[0.5,\"#f7f7f7\"],[0.6,\"#e6f5d0\"],[0.7,\"#b8e186\"],[0.8,\"#7fbc41\"],[0.9,\"#4d9221\"],[1,\"#276419\"]]},\"xaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"yaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"scene\":{\"xaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"yaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"zaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2}},\"shapedefaults\":{\"line\":{\"color\":\"#2a3f5f\"}},\"annotationdefaults\":{\"arrowcolor\":\"#2a3f5f\",\"arrowhead\":0,\"arrowwidth\":1},\"geo\":{\"bgcolor\":\"white\",\"landcolor\":\"#E5ECF6\",\"subunitcolor\":\"white\",\"showland\":true,\"showlakes\":true,\"lakecolor\":\"white\"},\"title\":{\"x\":0.05},\"mapbox\":{\"style\":\"light\"}}},\"xaxis\":{\"anchor\":\"y\",\"domain\":[0.0,0.31999999999999995],\"title\":{\"text\":\"Position\"}},\"yaxis\":{\"anchor\":\"x\",\"domain\":[0.0,1.0],\"autorange\":\"reversed\",\"title\":{\"text\":\"Layer\"}},\"xaxis2\":{\"anchor\":\"y2\",\"domain\":[0.33999999999999997,0.6599999999999999],\"matches\":\"x\",\"title\":{\"text\":\"Position\"}},\"yaxis2\":{\"anchor\":\"x2\",\"domain\":[0.0,1.0],\"matches\":\"y\",\"showticklabels\":false},\"xaxis3\":{\"anchor\":\"y3\",\"domain\":[0.6799999999999999,0.9999999999999999],\"matches\":\"x\",\"title\":{\"text\":\"Position\"}},\"yaxis3\":{\"anchor\":\"x3\",\"domain\":[0.0,1.0],\"matches\":\"y\",\"showticklabels\":false},\"annotations\":[{\"font\":{},\"showarrow\":false,\"text\":\"Residual Stream\",\"x\":0.15999999999999998,\"xanchor\":\"center\",\"xref\":\"paper\",\"y\":1.0,\"yanchor\":\"bottom\",\"yref\":\"paper\"},{\"font\":{},\"showarrow\":false,\"text\":\"Attn Output\",\"x\":0.49999999999999994,\"xanchor\":\"center\",\"xref\":\"paper\",\"y\":1.0,\"yanchor\":\"bottom\",\"yref\":\"paper\"},{\"font\":{},\"showarrow\":false,\"text\":\"MLP Output\",\"x\":0.8399999999999999,\"xanchor\":\"center\",\"xref\":\"paper\",\"y\":1.0,\"yanchor\":\"bottom\",\"yref\":\"paper\"}],\"coloraxis\":{\"colorscale\":[[0.0,\"rgb(103,0,31)\"],[0.1,\"rgb(178,24,43)\"],[0.2,\"rgb(214,96,77)\"],[0.3,\"rgb(244,165,130)\"],[0.4,\"rgb(253,219,199)\"],[0.5,\"rgb(247,247,247)\"],[0.6,\"rgb(209,229,240)\"],[0.7,\"rgb(146,197,222)\"],[0.8,\"rgb(67,147,195)\"],[0.9,\"rgb(33,102,172)\"],[1.0,\"rgb(5,48,97)\"]],\"cmid\":0.0,\"cmin\":-1,\"cmax\":1},\"title\":{\"text\":\"Activation Patching Per Block\"}},                        {\"responsive\": true}                    ).then(function(){\n","                            \n","var gd = document.getElementById('bbb2904f-6b80-4410-a47c-034ec495c09e');\n","var x = new MutationObserver(function (mutations, observer) {{\n","        var display = window.getComputedStyle(gd).display;\n","        if (!display || display === 'none') {{\n","            console.log([gd, 'removed!']);\n","            Plotly.purge(gd);\n","            observer.disconnect();\n","        }}\n","}});\n","\n","// Listen for the removal of the full notebook cells\n","var notebookContainer = gd.closest('#notebook-container');\n","if (notebookContainer) {{\n","    x.observe(notebookContainer, {childList: true});\n","}}\n","\n","// Listen for the clearing of the current output cell\n","var outputEl = gd.closest('.output');\n","if (outputEl) {{\n","    x.observe(outputEl, {childList: true});\n","}}\n","\n","                        })                };                            </script>        </div>\n","</body>\n","</html>"]},"metadata":{},"output_type":"display_data"}],"source":["every_block_result = patching.get_act_patch_block_every(model, corrupted_tokens, clean_cache, ioi_metric)\n","imshow(every_block_result, facet_col=0, facet_labels=[\"Residual Stream\", \"Attn Output\", \"MLP Output\"], title=\"Activation Patching Per Block\", xaxis=\"Position\", yaxis=\"Layer\", zmax=1, zmin=-1, x= [f\"{tok}_{i}\" for i, tok in enumerate(model.to_str_tokens(clean_tokens[0]))])"]},{"cell_type":"markdown","metadata":{},"source":[" By head: For each head we patch the output, query, key, value or pattern. We do all positions at once so it's not super slow."]},{"cell_type":"code","execution_count":17,"metadata":{},"outputs":[{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"e6eefdb1a9984a468e01d5b315d51cd3","version_major":2,"version_minor":0},"text/plain":["  0%|          | 0/144 [00:00<?, ?it/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"5234ea6d3a0e40cb83ad760f547befe7","version_major":2,"version_minor":0},"text/plain":["  0%|          | 0/144 [00:00<?, ?it/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"93372689514d41a4b94e1e5e52492f98","version_major":2,"version_minor":0},"text/plain":["  0%|          | 0/144 [00:00<?, ?it/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"acfbbc29dd5640d1a3c4588daef06611","version_major":2,"version_minor":0},"text/plain":["  0%|          | 0/144 [00:00<?, ?it/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"841474694153493e9a74050fde490f84","version_major":2,"version_minor":0},"text/plain":["  0%|          | 0/144 [00:00<?, ?it/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["<html>\n","<head><meta charset=\"utf-8\" /></head>\n","<body>\n","    <div>            <script src=\"https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js?config=TeX-AMS-MML_SVG\"></script><script type=\"text/javascript\">if (window.MathJax && window.MathJax.Hub && window.MathJax.Hub.Config) {window.MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}</script>                <script type=\"text/javascript\">window.PlotlyConfig = {MathJaxConfig: 'local'};</script>\n","        <script src=\"https://cdn.plot.ly/plotly-2.16.1.min.js\"></script>                <div id=\"4177a921-b03d-40a1-a628-08281ebb379e\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div>            <script type=\"text/javascript\">                                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById(\"4177a921-b03d-40a1-a628-08281ebb379e\")) {                    Plotly.newPlot(                        \"4177a921-b03d-40a1-a628-08281ebb379e\",                        [{\"coloraxis\":\"coloraxis\",\"name\":\"0\",\"z\":[[0.000949475,0.01612389,0.0018556548,0.0034395899,-0.009822988,0.011059554,-0.004064585,-0.0015793706,-0.0012084075,0.0038287437,-0.0042575346,-0.0011430618],[-0.0010770785,-0.00037838038,0.0000025507281,-0.0002605099,-0.00014122979,0.0038325363,-0.0004281867,-0.0014292804,-0.0009218869,0.0006933953,0.0004336238,-0.0035704488],[-0.000496855,0.0008060301,0.00054236536,-0.0005317261,-0.0007156135,-0.0010388176,-0.0009492065,-0.00008638913,0.0002770225,0.0021078077,-0.00019734581,-0.0016404538],[0.11626182,0.00025030697,-0.0014675413,-0.00039700742,0.018962113,-0.00018828402,0.01116984,-0.0013302383,-0.000735281,-0.00030299966,-0.00014552576,-0.00022278597],[-0.0016504889,0.00029246113,-0.0014359257,0.030842163,-0.007431479,-0.00028242602,0.00601747,-0.011006527,-0.0012657653,0.0014903636,-0.00017975921,0.0029445807],[-0.004211588,0.0029607913,0.0020455497,0.0013399713,-0.0012184426,0.34349898,0.00056297256,-0.0001253213,-0.005152605,0.016240286,0.017090013,-0.004174804],[0.03977555,0.015227075,-0.0010225064,0.0008079767,-0.0049347864,-0.002123347,-0.014273841,0.0013753123,0.0014840539,0.13027029,-0.00033568926,0.001291541],[0.00037237274,0.019514346,0.00022292022,0.12424979,-0.00040388768,-0.0076521845,0.0013002673,-0.0011247369,-0.0074497373,0.19224228,-0.0032751013,-0.00050229207],[-0.0010082088,0.00003097792,-0.00085925974,0.012359889,-0.00040469316,-0.0043288204,0.31855473,0.0023298552,0.0021180105,0.00014116267,0.27793574,0.005737863],[0.005889967,-0.0009691425,0.009126103,0.020675799,-0.037004486,0.014263471,-0.048284512,0.05834116,0.00065117405,0.2636095,0.0004923241,-0.0026102003],[0.08374242,0.020676069,-0.0037430592,0.010851469,-0.0010961419,0.00047389843,0.048179664,-0.47991198,0.00018429011,0.011862027,0.06088635,0.00084617053],[0.0053282026,-0.0114937825,-0.11350821,0.0063299676,0.00031608893,-0.0011600779,-0.022669563,0.0040708277,0.0073162266,-0.008345379,-0.27817535,0.0036348547]],\"type\":\"heatmap\",\"xaxis\":\"x\",\"yaxis\":\"y\",\"hovertemplate\":\"x: %{x}<br>y: %{y}<br>color: %{z}<extra></extra>\"},{\"coloraxis\":\"coloraxis\",\"name\":\"1\",\"z\":[[0.0004925926,0.010944033,0.0015592668,0.009421383,-0.0048536328,0.010262049,-0.0031708572,-0.00038774425,-0.0011035255,0.00051189086,-0.00082774484,-0.0007998211],[0.0009907901,-0.00029890507,0.00012021985,-0.0003556252,-0.000109547065,0.00700581,-0.00034444898,-0.0014763682,-0.0012057225,0.00025067615,0.00006860116,-0.0019844666],[-0.002012256,0.0025707984,-0.00051769713,0.0012822108,0.00022473258,0.0032741951,0.00019607044,0.00021295223,0.000040543153,0.012865907,0.00028561443,-0.0019235176],[0.12042162,-0.00030823535,0.0008745641,-0.00023906364,0.028209778,0.0020729364,0.0011321206,0.006186791,0.0006772854,0.00004675216,-0.0015923256,-0.0002591674],[-0.0012818415,0.0013887708,-0.0017389253,-0.00039492655,-0.0016222966,-0.0009207457,-0.0022054065,-0.000481752,-0.00074810174,0.0009411516,-0.00026319487,0.0014971432],[-0.004147081,0.0029570658,0.0004330868,0.00060026016,-0.0005218924,0.34353,-0.0004413431,-0.00008531514,-0.0055975057,0.028573323,0.014875175,-0.0022643418],[-0.0011904517,0.0006325806,-0.0011616218,0.00042325305,-0.005044132,0.0027662981,-0.00018143732,0.0012201878,0.0010537192,0.13032328,-0.000016781107,0.0010141159],[0.00036686854,0.020354811,0.0003358235,-0.001465192,-0.00016918711,-0.00059559505,0.00082234136,-0.00054575515,0.000090953596,-0.00041200971,-0.0030538593,0.00014696893],[0.00047104564,0.0005044065,0.0015873584,0.024632582,-0.000052591986,-0.0010191166,0.010948195,-0.0005386064,0.0018448142,0.00073595217,-0.027599381,0.009300224],[0.006474654,-0.0015574545,0.012203254,0.001424246,-0.026727738,0.0025531447,-0.04373378,-0.01309772,0.004638197,0.27937496,0.00020476306,-0.00025453581],[0.09814128,0.026011655,0.013917243,0.015725339,0.00055894506,-0.00028316438,0.052197833,-0.512864,0.0001743557,-0.00403646,0.069167964,0.0025175686],[0.0015773905,0.008838944,-0.08574924,-0.0070981393,-0.0009571943,-0.0030238882,-0.006626993,0.0003132697,-0.00009719617,-0.0061496715,-0.33735374,-0.0022051716]],\"type\":\"heatmap\",\"xaxis\":\"x2\",\"yaxis\":\"y2\",\"hovertemplate\":\"x: %{x}<br>y: %{y}<br>color: %{z}<extra></extra>\"},{\"coloraxis\":\"coloraxis\",\"name\":\"2\",\"z\":[[-0.00019781568,0.0053396807,0.0006521809,0.0035052039,-0.008983262,0.0034816097,-0.0008626831,-0.000033427965,0.0005173615,0.00044201434,-0.0039073126,-0.00018831757],[-0.00043979925,-0.00044634385,-0.00006709086,0.000072225885,-0.000035978694,-0.0019324786,-0.00015780953,0.00001530437,0.0002059713,0.00033730024,0.00035153062,-0.0005672685],[0.0002102337,-0.0007199766,0.00048678633,-0.0005975416,-0.0005923731,-0.00054478185,-0.00022721618,-0.0004804095,0.00020593773,0.0011836386,-0.00035703482,-0.00091007294],[0.0010391868,-0.000120354096,-0.00007783077,-0.0007270246,-0.0013100003,-0.002310926,0.01098753,-0.000051081686,0.00014404902,0.00015049297,-0.00008068356,-0.000020070203],[-0.00053712964,-0.00081274257,-0.00013304061,0.030610517,-0.0071862065,0.00014834497,0.0013340309,-0.011421825,-0.0005332364,0.0005125621,0.00037274192,0.0029560255],[0.000007920682,0.0000062090094,0.0015981655,0.00033924685,-0.0012587173,-0.000054202974,0.00063325185,-0.00027051143,0.00007413893,-0.0067042867,0.0031767976,-0.0017284539],[0.048632182,0.015315109,-0.00046466885,-0.00011605813,-0.00004883302,-0.0039534946,-0.017375292,-0.00015388275,0.0012203221,-0.00017992702,-0.00042707915,0.00012374388],[-0.000028695691,-0.0013851125,-0.000121629455,0.1332166,-0.00024426577,-0.007315354,0.0003331385,-0.0007948874,-0.007938806,0.20841402,-0.00019147243,-0.00020634048],[-0.0020486375,-0.0003768365,-0.0033148054,-0.009665918,-0.00031776703,-0.005140993,0.31717074,0.0028421488,0.0004723546,-0.0011536675,0.27267054,-0.0031753546],[-0.00043909444,0.00005692151,-0.0020628679,0.02006631,-0.007870473,0.0113156345,0.0030571818,0.06856223,-0.0027475369,-0.009278911,0.00050719216,-0.0013157058],[-0.012956424,-0.003045234,-0.017922994,-0.004358456,-0.001152325,0.0005002448,-0.003113298,0.01958523,0.00004322813,0.012969311,-0.0076964195,-0.0009145032],[0.004100564,-0.020459324,-0.035875153,0.01465541,0.0008436869,0.0017800726,-0.018042274,0.0035195013,0.008252545,-0.0017664464,0.044165857,0.0064741843]],\"type\":\"heatmap\",\"xaxis\":\"x3\",\"yaxis\":\"y3\",\"hovertemplate\":\"x: %{x}<br>y: %{y}<br>color: %{z}<extra></extra>\"},{\"coloraxis\":\"coloraxis\",\"name\":\"3\",\"z\":[[0.00019825199,-0.00049141794,0.00075165933,0.0071840254,0.001131785,-0.00014062568,0.0005671678,-0.0022284638,-0.000011008406,-0.0011850146,-0.003645259,-0.00013888044],[0.00009827016,-0.0006734594,0.00006262709,0.0003996924,0.00013777288,0.001314464,0.00016364935,-0.000053632415,0.000016579734,0.0003300508,0.000043026757,0.0033119528],[-0.0011031899,-0.0004916864,-0.0008953727,0.00097303564,0.0005671007,0.0026970594,0.00017076454,0.00009404132,0.00005490778,0.011643504,-0.00009689411,-0.00022788742],[-0.0014167617,-0.00039103333,0.0007130292,-0.000690811,0.0077598854,-0.00019734581,0.0014261927,0.006242303,0.0005358543,-0.000038462294,0.0000018459217,-0.0007054777],[-0.00015371494,0.00034085783,0.00040905626,-0.0009754186,0.0007706555,0.000046181605,-0.005579852,0.0002847418,0.0016971739,0.00016744187,-0.00008759738,0.00078226806],[0.000010706346,-5.7055763e-7,0.00025708656,-0.00048101362,-0.00018761276,0.000061553095,-0.00022557162,0.00017116728,0.0004125467,0.039515376,0.0017563441,-0.00021130769],[-0.00035455122,-0.0026965896,0.000053296793,-0.0007689774,0.00062335096,-0.0011566881,-0.0072853495,-0.000055243403,-0.00042754904,0.00014428396,-0.00002198325,0.000007987806],[0.000027655264,0.00009524956,-0.000017855096,0.00024047325,-0.00010894294,0.0003022613,0.00011931366,-0.00006816485,0.00010605659,0.0055083646,-0.0000013424885,-0.00050699076],[0.00057941803,-0.00006816485,-0.00020580349,0.0035947815,-0.00010682852,0.000058800997,0.00009135634,0.0002454069,0.0002878631,0.0003180355,-0.025732148,-0.00003456908],[-0.00006145241,0.00045248575,0.00066600856,0.00065969885,0.0005129313,0.0013581957,-0.0022120182,-0.009116974,0.00023345875,0.008354172,-0.00020691103,-0.0003669021],[0.005819419,0.00131584,-0.001080032,0.0012897287,0.0003227678,0.0000333944,0.00028138558,-0.13102688,0.000108875814,-0.0023642564,0.0013522886,-0.00009739754],[-0.00054085505,0.0015879625,-0.0047873813,-0.0037840053,0.00009276596,-0.00017344952,-0.0015512455,-0.00009934415,0.0009413529,-0.0004506734,-0.11361534,-0.00061116787]],\"type\":\"heatmap\",\"xaxis\":\"x4\",\"yaxis\":\"y4\",\"hovertemplate\":\"x: %{x}<br>y: %{y}<br>color: %{z}<extra></extra>\"},{\"coloraxis\":\"coloraxis\",\"name\":\"4\",\"z\":[[0.00063983,0.005318738,0.0011584669,-0.000059270867,-0.0010662714,0.0050790366,-0.0030815816,-0.0020518594,-0.0014402217,0.003492551,-0.0025671737,-0.0009158457],[-0.0007604526,0.0001687508,0.00012277058,-0.000349047,0.00001577424,0.0050093615,-0.00029742834,-0.0014442492,-0.001099431,0.00047403268,0.000051450872,-0.0034951689],[-0.0007240712,0.0017471481,-0.00015492317,0.000056988636,-0.00009733042,-0.00042362226,-0.0007914641,0.00027369984,0.00010209625,0.00042217906,0.00015190257,-0.00074323517],[0.11458065,0.000211744,-0.0009424941,0.0004295292,0.020043757,0.002104552,0.00007648828,-0.0015437276,-0.00084878836,-0.00058180094,0.0001198171,-0.000019331834],[-0.001126952,0.0012375395,-0.0012315989,-0.00059519225,-0.0007545457,-0.0005840496,0.0048124855,0.00018130307,-0.0005359214,0.00085811864,-0.0002987037,-0.000012149521],[-0.004241257,0.0029521994,0.00052175816,0.00095367024,0.00016260892,0.34350926,-0.0003045771,0.00010380792,-0.0053008157,0.024865739,0.014382985,-0.0023286806],[-0.0023886561,-0.0021729518,-0.00047665054,0.00043382516,-0.0046743774,0.0018583733,-0.002653999,0.0014368319,0.00030266403,0.13043068,0.00008846999,0.001177631],[0.00031910953,0.020570247,0.0003182369,-0.0025125344,-0.000262188,-0.00024681652,0.0005525011,-0.00043167718,0.00025712012,0.008090641,-0.0030692643,-0.00042362226],[0.0009765261,0.0003930135,0.0017531222,0.022596464,-0.00004477199,0.00014143117,0.009584831,-0.00031535054,0.0015271478,0.0011821282,-0.010773404,0.009366207],[0.0063142604,-0.0010944637,0.011661728,0.0013478585,-0.029186305,0.0038335095,-0.044093564,-0.0050316467,0.0048221517,0.27664757,-0.00003138067,-0.00066194753],[0.095388845,0.025068624,0.014238567,0.014754351,0.00009907565,-0.000089577545,0.050828964,-0.50510013,0.00014700249,-0.0016021258,0.06883207,0.0023277074],[0.0013426899,0.00963118,-0.07776365,-0.0077289413,-0.0005727727,-0.002956596,-0.0049480097,0.00045886257,-0.00063385593,-0.0065203323,-0.32048947,-0.0024725283]],\"type\":\"heatmap\",\"xaxis\":\"x5\",\"yaxis\":\"y5\",\"hovertemplate\":\"x: %{x}<br>y: %{y}<br>color: %{z}<extra></extra>\"}],                        {\"template\":{\"data\":{\"histogram2dcontour\":[{\"type\":\"histogram2dcontour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"choropleth\":[{\"type\":\"choropleth\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"histogram2d\":[{\"type\":\"histogram2d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmap\":[{\"type\":\"heatmap\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmapgl\":[{\"type\":\"heatmapgl\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"contourcarpet\":[{\"type\":\"contourcarpet\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"contour\":[{\"type\":\"contour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"surface\":[{\"type\":\"surface\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"mesh3d\":[{\"type\":\"mesh3d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"scatter\":[{\"fillpattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2},\"type\":\"scatter\"}],\"parcoords\":[{\"type\":\"parcoords\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolargl\":[{\"type\":\"scatterpolargl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"bar\":[{\"error_x\":{\"color\":\"#2a3f5f\"},\"error_y\":{\"color\":\"#2a3f5f\"},\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"bar\"}],\"scattergeo\":[{\"type\":\"scattergeo\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolar\":[{\"type\":\"scatterpolar\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"histogram\":[{\"marker\":{\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"histogram\"}],\"scattergl\":[{\"type\":\"scattergl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatter3d\":[{\"type\":\"scatter3d\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattermapbox\":[{\"type\":\"scattermapbox\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterternary\":[{\"type\":\"scatterternary\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattercarpet\":[{\"type\":\"scattercarpet\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"carpet\":[{\"aaxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"baxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"type\":\"carpet\"}],\"table\":[{\"cells\":{\"fill\":{\"color\":\"#EBF0F8\"},\"line\":{\"color\":\"white\"}},\"header\":{\"fill\":{\"color\":\"#C8D4E3\"},\"line\":{\"color\":\"white\"}},\"type\":\"table\"}],\"barpolar\":[{\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"barpolar\"}],\"pie\":[{\"automargin\":true,\"type\":\"pie\"}]},\"layout\":{\"autotypenumbers\":\"strict\",\"colorway\":[\"#636efa\",\"#EF553B\",\"#00cc96\",\"#ab63fa\",\"#FFA15A\",\"#19d3f3\",\"#FF6692\",\"#B6E880\",\"#FF97FF\",\"#FECB52\"],\"font\":{\"color\":\"#2a3f5f\"},\"hovermode\":\"closest\",\"hoverlabel\":{\"align\":\"left\"},\"paper_bgcolor\":\"white\",\"plot_bgcolor\":\"#E5ECF6\",\"polar\":{\"bgcolor\":\"#E5ECF6\",\"angularaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"radialaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"ternary\":{\"bgcolor\":\"#E5ECF6\",\"aaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"baxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"caxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"coloraxis\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"colorscale\":{\"sequential\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"sequentialminus\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"diverging\":[[0,\"#8e0152\"],[0.1,\"#c51b7d\"],[0.2,\"#de77ae\"],[0.3,\"#f1b6da\"],[0.4,\"#fde0ef\"],[0.5,\"#f7f7f7\"],[0.6,\"#e6f5d0\"],[0.7,\"#b8e186\"],[0.8,\"#7fbc41\"],[0.9,\"#4d9221\"],[1,\"#276419\"]]},\"xaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"yaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"scene\":{\"xaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"yaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"zaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2}},\"shapedefaults\":{\"line\":{\"color\":\"#2a3f5f\"}},\"annotationdefaults\":{\"arrowcolor\":\"#2a3f5f\",\"arrowhead\":0,\"arrowwidth\":1},\"geo\":{\"bgcolor\":\"white\",\"landcolor\":\"#E5ECF6\",\"subunitcolor\":\"white\",\"showland\":true,\"showlakes\":true,\"lakecolor\":\"white\"},\"title\":{\"x\":0.05},\"mapbox\":{\"style\":\"light\"}}},\"xaxis\":{\"anchor\":\"y\",\"domain\":[0.0,0.18400000000000002],\"title\":{\"text\":\"Head\"}},\"yaxis\":{\"anchor\":\"x\",\"domain\":[0.0,1.0],\"autorange\":\"reversed\",\"title\":{\"text\":\"Layer\"}},\"xaxis2\":{\"anchor\":\"y2\",\"domain\":[0.20400000000000001,0.388],\"matches\":\"x\",\"title\":{\"text\":\"Head\"}},\"yaxis2\":{\"anchor\":\"x2\",\"domain\":[0.0,1.0],\"matches\":\"y\",\"showticklabels\":false},\"xaxis3\":{\"anchor\":\"y3\",\"domain\":[0.40800000000000003,0.5920000000000001],\"matches\":\"x\",\"title\":{\"text\":\"Head\"}},\"yaxis3\":{\"anchor\":\"x3\",\"domain\":[0.0,1.0],\"matches\":\"y\",\"showticklabels\":false},\"xaxis4\":{\"anchor\":\"y4\",\"domain\":[0.6120000000000001,0.7960000000000002],\"matches\":\"x\",\"title\":{\"text\":\"Head\"}},\"yaxis4\":{\"anchor\":\"x4\",\"domain\":[0.0,1.0],\"matches\":\"y\",\"showticklabels\":false},\"xaxis5\":{\"anchor\":\"y5\",\"domain\":[0.8160000000000001,1.0],\"matches\":\"x\",\"title\":{\"text\":\"Head\"}},\"yaxis5\":{\"anchor\":\"x5\",\"domain\":[0.0,1.0],\"matches\":\"y\",\"showticklabels\":false},\"annotations\":[{\"font\":{},\"showarrow\":false,\"text\":\"Output\",\"x\":0.09200000000000001,\"xanchor\":\"center\",\"xref\":\"paper\",\"y\":1.0,\"yanchor\":\"bottom\",\"yref\":\"paper\"},{\"font\":{},\"showarrow\":false,\"text\":\"Query\",\"x\":0.29600000000000004,\"xanchor\":\"center\",\"xref\":\"paper\",\"y\":1.0,\"yanchor\":\"bottom\",\"yref\":\"paper\"},{\"font\":{},\"showarrow\":false,\"text\":\"Key\",\"x\":0.5,\"xanchor\":\"center\",\"xref\":\"paper\",\"y\":1.0,\"yanchor\":\"bottom\",\"yref\":\"paper\"},{\"font\":{},\"showarrow\":false,\"text\":\"Value\",\"x\":0.7040000000000002,\"xanchor\":\"center\",\"xref\":\"paper\",\"y\":1.0,\"yanchor\":\"bottom\",\"yref\":\"paper\"},{\"font\":{},\"showarrow\":false,\"text\":\"Pattern\",\"x\":0.908,\"xanchor\":\"center\",\"xref\":\"paper\",\"y\":1.0,\"yanchor\":\"bottom\",\"yref\":\"paper\"}],\"coloraxis\":{\"colorscale\":[[0.0,\"rgb(103,0,31)\"],[0.1,\"rgb(178,24,43)\"],[0.2,\"rgb(214,96,77)\"],[0.3,\"rgb(244,165,130)\"],[0.4,\"rgb(253,219,199)\"],[0.5,\"rgb(247,247,247)\"],[0.6,\"rgb(209,229,240)\"],[0.7,\"rgb(146,197,222)\"],[0.8,\"rgb(67,147,195)\"],[0.9,\"rgb(33,102,172)\"],[1.0,\"rgb(5,48,97)\"]],\"cmid\":0.0,\"cmin\":-1,\"cmax\":1},\"title\":{\"text\":\"Activation Patching Per Head (All Pos)\"}},                        {\"responsive\": true}                    ).then(function(){\n","                            \n","var gd = document.getElementById('4177a921-b03d-40a1-a628-08281ebb379e');\n","var x = new MutationObserver(function (mutations, observer) {{\n","        var display = window.getComputedStyle(gd).display;\n","        if (!display || display === 'none') {{\n","            console.log([gd, 'removed!']);\n","            Plotly.purge(gd);\n","            observer.disconnect();\n","        }}\n","}});\n","\n","// Listen for the removal of the full notebook cells\n","var notebookContainer = gd.closest('#notebook-container');\n","if (notebookContainer) {{\n","    x.observe(notebookContainer, {childList: true});\n","}}\n","\n","// Listen for the clearing of the current output cell\n","var outputEl = gd.closest('.output');\n","if (outputEl) {{\n","    x.observe(outputEl, {childList: true});\n","}}\n","\n","                        })                };                            </script>        </div>\n","</body>\n","</html>"]},"metadata":{},"output_type":"display_data"}],"source":["every_head_all_pos_act_patch_result = patching.get_act_patch_attn_head_all_pos_every(model, corrupted_tokens, clean_cache, ioi_metric)\n","imshow(every_head_all_pos_act_patch_result, facet_col=0, facet_labels=[\"Output\", \"Query\", \"Key\", \"Value\", \"Pattern\"], title=\"Activation Patching Per Head (All Pos)\", xaxis=\"Head\", yaxis=\"Layer\", zmax=1, zmin=-1)\n","# [markdown]\n","# We can also do by head *and* by position. This is a bit slow, but it can give useful + fine-grained detail"]},{"cell_type":"code","execution_count":18,"metadata":{},"outputs":[],"source":["if DO_SLOW_RUNS:\n","    every_head_act_patch_result = patching.get_act_patch_attn_head_by_pos_every(model, corrupted_tokens, clean_cache, ioi_metric)\n","    every_head_act_patch_result = einops.rearrange(every_head_act_patch_result, \"act_type layer pos head -> act_type (layer head) pos\")\n","    imshow(every_head_act_patch_result, facet_col=0, facet_labels=[\"Output\", \"Query\", \"Key\", \"Value\", \"Pattern\"], title=\"Activation Patching Per Head (By Pos)\", xaxis=\"Position\", yaxis=\"Layer & Head\", zmax=1, zmin=-1, x= [f\"{tok}_{i}\" for i, tok in enumerate(model.to_str_tokens(clean_tokens[0]))], y=ALL_HEAD_LABELS)"]},{"cell_type":"markdown","metadata":{},"source":[" ## Induction Patching\n"," To show how easy it is, lets do that again with induction heads in a 2L Attention Only model\n"," The input will be repeated random tokens eg BOS 1 5 8 9 2 1 5 8 9 2, and we judge the model's ability to predict the second repetition with its induction heads\n"," Lets call A, B and C different (non-repeated) random sequences. We'll start with clean tokens AA and corrupted tokens AB, and see how well the model can predict the second A given the first A"]},{"cell_type":"markdown","metadata":{},"source":[" ### Setup"]},{"cell_type":"code","execution_count":19,"metadata":{},"outputs":[{"name":"stdout","output_type":"stream","text":["Loaded pretrained model attn-only-2l into HookedTransformer\n"]}],"source":["attn_only = HookedTransformer.from_pretrained(\"attn-only-2l\")\n","batch = 4\n","seq_len = 20\n","rand_tokens_A = torch.randint(100, 10000, (batch, seq_len)).to(attn_only.cfg.device)\n","rand_tokens_B = torch.randint(100, 10000, (batch, seq_len)).to(attn_only.cfg.device)\n","rand_tokens_C = torch.randint(100, 10000, (batch, seq_len)).to(attn_only.cfg.device)\n","bos = torch.tensor([attn_only.tokenizer.bos_token_id]*batch)[:, None].to(attn_only.cfg.device)\n","clean_tokens_induction = torch.cat([bos, rand_tokens_A, rand_tokens_A], dim=1).to(attn_only.cfg.device)\n","corrupted_tokens_induction = torch.cat([bos, rand_tokens_A, rand_tokens_B], dim=1).to(attn_only.cfg.device)"]},{"cell_type":"code","execution_count":20,"metadata":{},"outputs":[],"source":["clean_logits_induction, clean_cache_induction = attn_only.run_with_cache(clean_tokens_induction)\n","corrupted_logits_induction, corrupted_cache_induction = attn_only.run_with_cache(corrupted_tokens_induction)"]},{"cell_type":"markdown","metadata":{},"source":[" We define our metric as negative loss on the second half (negative loss so that higher is better)\n"," This time we won't normalise our metric"]},{"cell_type":"code","execution_count":21,"metadata":{},"outputs":[{"name":"stdout","output_type":"stream","text":["Clean baseline: -2.2928695678710938\n","Corrupted baseline: -13.125859260559082\n"]}],"source":["def induction_loss(logits, answer_token_indices=rand_tokens_A):\n","    seq_len = answer_token_indices.shape[1]\n","\n","    # logits: batch x seq_len x vocab_size\n","    # Take the logits for the answers, cut off the final element to get the predictions for all but the first element of the answers (which can't be predicted)\n","    final_logits = logits[:, -seq_len:-1]\n","    final_log_probs = final_logits.log_softmax(-1)\n","    return final_log_probs.gather(-1, answer_token_indices[:, 1:].unsqueeze(-1)).mean()\n","CLEAN_BASELINE_INDUCTION = induction_loss(clean_logits_induction).item()\n","print(\"Clean baseline:\", CLEAN_BASELINE_INDUCTION)\n","CORRUPTED_BASELINE_INDUCTION = induction_loss(corrupted_logits_induction).item()\n","print(\"Corrupted baseline:\", CORRUPTED_BASELINE_INDUCTION)"]},{"cell_type":"markdown","metadata":{},"source":[" ### Patching"]},{"cell_type":"code","execution_count":22,"metadata":{},"outputs":[{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"e622aaef492c45a58526ab56f37e3964","version_major":2,"version_minor":0},"text/plain":["  0%|          | 0/16 [00:00<?, ?it/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"382edd8deceb4452b5bf8e157b95178f","version_major":2,"version_minor":0},"text/plain":["  0%|          | 0/16 [00:00<?, ?it/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"a9065aae9b2a46e0bb595ae3e195b728","version_major":2,"version_minor":0},"text/plain":["  0%|          | 0/16 [00:00<?, ?it/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"8bda86976c0944b3a603a796dbd8917d","version_major":2,"version_minor":0},"text/plain":["  0%|          | 0/16 [00:00<?, ?it/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"50e16757155346a999e1eeb94dcbe49a","version_major":2,"version_minor":0},"text/plain":["  0%|          | 0/16 [00:00<?, ?it/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["<html>\n","<head><meta charset=\"utf-8\" /></head>\n","<body>\n","    <div>            <script src=\"https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js?config=TeX-AMS-MML_SVG\"></script><script type=\"text/javascript\">if (window.MathJax && window.MathJax.Hub && window.MathJax.Hub.Config) {window.MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}</script>                <script type=\"text/javascript\">window.PlotlyConfig = {MathJaxConfig: 'local'};</script>\n","        <script src=\"https://cdn.plot.ly/plotly-2.16.1.min.js\"></script>                <div id=\"57fb8a3b-1399-4382-85d6-cf71ccf9e2dd\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div>            <script type=\"text/javascript\">                                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById(\"57fb8a3b-1399-4382-85d6-cf71ccf9e2dd\")) {                    Plotly.newPlot(                        \"57fb8a3b-1399-4382-85d6-cf71ccf9e2dd\",                        [{\"coloraxis\":\"coloraxis\",\"name\":\"0\",\"z\":[[-12.630955,-12.988065,-13.006567,-12.529946,-13.078064,-13.068898,-13.280383,-13.24434],[-13.172629,-13.035272,-12.923991,-12.987872,-12.854983,-13.093642,-3.3364303,-11.604531]],\"type\":\"heatmap\",\"xaxis\":\"x\",\"yaxis\":\"y\",\"hovertemplate\":\"x: %{x}<br>y: %{y}<br>color: %{z}<extra></extra>\"},{\"coloraxis\":\"coloraxis\",\"name\":\"1\",\"z\":[[-13.107556,-13.046705,-12.993137,-13.010434,-13.149853,-13.181243,-13.078225,-13.225475],[-13.093582,-13.024863,-13.030213,-13.049493,-12.776813,-13.144259,-3.3615336,-11.5658655]],\"type\":\"heatmap\",\"xaxis\":\"x2\",\"yaxis\":\"y2\",\"hovertemplate\":\"x: %{x}<br>y: %{y}<br>color: %{z}<extra></extra>\"},{\"coloraxis\":\"coloraxis\",\"name\":\"2\",\"z\":[[-12.680319,-13.138184,-13.085978,-12.703294,-13.077439,-13.0484495,-13.202711,-13.259588],[-13.217579,-12.96091,-12.895449,-13.014744,-12.951784,-13.057919,-13.044403,-13.454396]],\"type\":\"heatmap\",\"xaxis\":\"x3\",\"yaxis\":\"y3\",\"hovertemplate\":\"x: %{x}<br>y: %{y}<br>color: %{z}<extra></extra>\"},{\"coloraxis\":\"coloraxis\",\"name\":\"3\",\"z\":[[-13.086151,-13.138182,-12.952564,-13.146914,-13.082162,-13.136836,-13.129994,-13.051876],[-13.097097,-12.972845,-13.048372,-13.211313,-12.823811,-13.151044,-13.135399,-13.120738]],\"type\":\"heatmap\",\"xaxis\":\"x4\",\"yaxis\":\"y4\",\"hovertemplate\":\"x: %{x}<br>y: %{y}<br>color: %{z}<extra></extra>\"},{\"coloraxis\":\"coloraxis\",\"name\":\"4\",\"z\":[[-13.086223,-13.054343,-12.982502,-13.03867,-13.105019,-13.114271,-13.120985,-13.165459],[-13.16909,-13.094792,-13.030987,-13.054218,-12.846087,-13.07762,-3.342551,-11.469898]],\"type\":\"heatmap\",\"xaxis\":\"x5\",\"yaxis\":\"y5\",\"hovertemplate\":\"x: %{x}<br>y: %{y}<br>color: %{z}<extra></extra>\"}],                        {\"template\":{\"data\":{\"histogram2dcontour\":[{\"type\":\"histogram2dcontour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"choropleth\":[{\"type\":\"choropleth\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"histogram2d\":[{\"type\":\"histogram2d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmap\":[{\"type\":\"heatmap\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmapgl\":[{\"type\":\"heatmapgl\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"contourcarpet\":[{\"type\":\"contourcarpet\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"contour\":[{\"type\":\"contour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"surface\":[{\"type\":\"surface\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"mesh3d\":[{\"type\":\"mesh3d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"scatter\":[{\"fillpattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2},\"type\":\"scatter\"}],\"parcoords\":[{\"type\":\"parcoords\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolargl\":[{\"type\":\"scatterpolargl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"bar\":[{\"error_x\":{\"color\":\"#2a3f5f\"},\"error_y\":{\"color\":\"#2a3f5f\"},\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"bar\"}],\"scattergeo\":[{\"type\":\"scattergeo\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolar\":[{\"type\":\"scatterpolar\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"histogram\":[{\"marker\":{\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"histogram\"}],\"scattergl\":[{\"type\":\"scattergl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatter3d\":[{\"type\":\"scatter3d\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattermapbox\":[{\"type\":\"scattermapbox\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterternary\":[{\"type\":\"scatterternary\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattercarpet\":[{\"type\":\"scattercarpet\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"carpet\":[{\"aaxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"baxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"type\":\"carpet\"}],\"table\":[{\"cells\":{\"fill\":{\"color\":\"#EBF0F8\"},\"line\":{\"color\":\"white\"}},\"header\":{\"fill\":{\"color\":\"#C8D4E3\"},\"line\":{\"color\":\"white\"}},\"type\":\"table\"}],\"barpolar\":[{\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"barpolar\"}],\"pie\":[{\"automargin\":true,\"type\":\"pie\"}]},\"layout\":{\"autotypenumbers\":\"strict\",\"colorway\":[\"#636efa\",\"#EF553B\",\"#00cc96\",\"#ab63fa\",\"#FFA15A\",\"#19d3f3\",\"#FF6692\",\"#B6E880\",\"#FF97FF\",\"#FECB52\"],\"font\":{\"color\":\"#2a3f5f\"},\"hovermode\":\"closest\",\"hoverlabel\":{\"align\":\"left\"},\"paper_bgcolor\":\"white\",\"plot_bgcolor\":\"#E5ECF6\",\"polar\":{\"bgcolor\":\"#E5ECF6\",\"angularaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"radialaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"ternary\":{\"bgcolor\":\"#E5ECF6\",\"aaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"baxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"caxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"coloraxis\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"colorscale\":{\"sequential\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"sequentialminus\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"diverging\":[[0,\"#8e0152\"],[0.1,\"#c51b7d\"],[0.2,\"#de77ae\"],[0.3,\"#f1b6da\"],[0.4,\"#fde0ef\"],[0.5,\"#f7f7f7\"],[0.6,\"#e6f5d0\"],[0.7,\"#b8e186\"],[0.8,\"#7fbc41\"],[0.9,\"#4d9221\"],[1,\"#276419\"]]},\"xaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"yaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"scene\":{\"xaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"yaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"zaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2}},\"shapedefaults\":{\"line\":{\"color\":\"#2a3f5f\"}},\"annotationdefaults\":{\"arrowcolor\":\"#2a3f5f\",\"arrowhead\":0,\"arrowwidth\":1},\"geo\":{\"bgcolor\":\"white\",\"landcolor\":\"#E5ECF6\",\"subunitcolor\":\"white\",\"showland\":true,\"showlakes\":true,\"lakecolor\":\"white\"},\"title\":{\"x\":0.05},\"mapbox\":{\"style\":\"light\"}}},\"xaxis\":{\"anchor\":\"y\",\"domain\":[0.0,0.18400000000000002],\"title\":{\"text\":\"Head\"}},\"yaxis\":{\"anchor\":\"x\",\"domain\":[0.0,1.0],\"autorange\":\"reversed\",\"title\":{\"text\":\"Layer\"}},\"xaxis2\":{\"anchor\":\"y2\",\"domain\":[0.20400000000000001,0.388],\"matches\":\"x\",\"title\":{\"text\":\"Head\"}},\"yaxis2\":{\"anchor\":\"x2\",\"domain\":[0.0,1.0],\"matches\":\"y\",\"showticklabels\":false},\"xaxis3\":{\"anchor\":\"y3\",\"domain\":[0.40800000000000003,0.5920000000000001],\"matches\":\"x\",\"title\":{\"text\":\"Head\"}},\"yaxis3\":{\"anchor\":\"x3\",\"domain\":[0.0,1.0],\"matches\":\"y\",\"showticklabels\":false},\"xaxis4\":{\"anchor\":\"y4\",\"domain\":[0.6120000000000001,0.7960000000000002],\"matches\":\"x\",\"title\":{\"text\":\"Head\"}},\"yaxis4\":{\"anchor\":\"x4\",\"domain\":[0.0,1.0],\"matches\":\"y\",\"showticklabels\":false},\"xaxis5\":{\"anchor\":\"y5\",\"domain\":[0.8160000000000001,1.0],\"matches\":\"x\",\"title\":{\"text\":\"Head\"}},\"yaxis5\":{\"anchor\":\"x5\",\"domain\":[0.0,1.0],\"matches\":\"y\",\"showticklabels\":false},\"annotations\":[{\"font\":{},\"showarrow\":false,\"text\":\"Output\",\"x\":0.09200000000000001,\"xanchor\":\"center\",\"xref\":\"paper\",\"y\":1.0,\"yanchor\":\"bottom\",\"yref\":\"paper\"},{\"font\":{},\"showarrow\":false,\"text\":\"Query\",\"x\":0.29600000000000004,\"xanchor\":\"center\",\"xref\":\"paper\",\"y\":1.0,\"yanchor\":\"bottom\",\"yref\":\"paper\"},{\"font\":{},\"showarrow\":false,\"text\":\"Key\",\"x\":0.5,\"xanchor\":\"center\",\"xref\":\"paper\",\"y\":1.0,\"yanchor\":\"bottom\",\"yref\":\"paper\"},{\"font\":{},\"showarrow\":false,\"text\":\"Value\",\"x\":0.7040000000000002,\"xanchor\":\"center\",\"xref\":\"paper\",\"y\":1.0,\"yanchor\":\"bottom\",\"yref\":\"paper\"},{\"font\":{},\"showarrow\":false,\"text\":\"Pattern\",\"x\":0.908,\"xanchor\":\"center\",\"xref\":\"paper\",\"y\":1.0,\"yanchor\":\"bottom\",\"yref\":\"paper\"}],\"coloraxis\":{\"colorscale\":[[0.0,\"rgb(103,0,31)\"],[0.1,\"rgb(178,24,43)\"],[0.2,\"rgb(214,96,77)\"],[0.3,\"rgb(244,165,130)\"],[0.4,\"rgb(253,219,199)\"],[0.5,\"rgb(247,247,247)\"],[0.6,\"rgb(209,229,240)\"],[0.7,\"rgb(146,197,222)\"],[0.8,\"rgb(67,147,195)\"],[0.9,\"rgb(33,102,172)\"],[1.0,\"rgb(5,48,97)\"]],\"cmid\":0.0,\"cmin\":-13.454396,\"cmax\":-2.2928695678710938},\"title\":{\"text\":\"Activation Patching Per Head (All Pos)\"}},                        {\"responsive\": true}                    ).then(function(){\n","                            \n","var gd = document.getElementById('57fb8a3b-1399-4382-85d6-cf71ccf9e2dd');\n","var x = new MutationObserver(function (mutations, observer) {{\n","        var display = window.getComputedStyle(gd).display;\n","        if (!display || display === 'none') {{\n","            console.log([gd, 'removed!']);\n","            Plotly.purge(gd);\n","            observer.disconnect();\n","        }}\n","}});\n","\n","// Listen for the removal of the full notebook cells\n","var notebookContainer = gd.closest('#notebook-container');\n","if (notebookContainer) {{\n","    x.observe(notebookContainer, {childList: true});\n","}}\n","\n","// Listen for the clearing of the current output cell\n","var outputEl = gd.closest('.output');\n","if (outputEl) {{\n","    x.observe(outputEl, {childList: true});\n","}}\n","\n","                        })                };                            </script>        </div>\n","</body>\n","</html>"]},"metadata":{},"output_type":"display_data"}],"source":["every_head_all_pos_act_patch_result = patching.get_act_patch_attn_head_all_pos_every(attn_only, corrupted_tokens_induction, clean_cache_induction, induction_loss)\n","imshow(every_head_all_pos_act_patch_result, facet_col=0, facet_labels=[\"Output\", \"Query\", \"Key\", \"Value\", \"Pattern\"], title=\"Activation Patching Per Head (All Pos)\", xaxis=\"Head\", yaxis=\"Layer\", zmax=CLEAN_BASELINE_INDUCTION)\n","\n","if DO_SLOW_RUNS:\n","    every_head_act_patch_result = patching.get_act_patch_attn_head_by_pos_every(attn_only, corrupted_tokens_induction, clean_cache_induction, induction_loss)\n","    every_head_act_patch_result = einops.rearrange(every_head_act_patch_result, \"act_type layer pos head -> act_type (layer head) pos\")\n","    imshow(every_head_act_patch_result, facet_col=0, facet_labels=[\"Output\", \"Query\", \"Key\", \"Value\", \"Pattern\"], title=\"Activation Patching Per Head (By Pos)\", xaxis=\"Position\", yaxis=\"Layer & Head\", zmax=CLEAN_BASELINE_INDUCTION, x= [f\"{tok}_{i}\" for i, tok in enumerate(attn_only.to_str_tokens(clean_tokens[0]))], y=ALL_HEAD_LABELS)"]},{"cell_type":"markdown","metadata":{},"source":[" ### Changing the Corrupted Baseline\n"," We can also change the corrupted baseline easily to check what things look like! We'll keep clean as AA, but rather than corrupted as AB, we'll try out:\n"," * BA - This has a corrupted first half, so we expect both keys *and* values to matter. Head output patching should work, but value and key and pattern won't.\n"," * BB - This is still inductiony but with different tokens. So keys, queries and patterns don't matter, head output patching will work, and value will.\n"," * BC - This is just random tokens, so everything is corrupted! The induction head needs queries, keys *and* values, so only output will work."]},{"cell_type":"code","execution_count":23,"metadata":{},"outputs":[],"source":["corrupted_tokens_induction_BA = torch.cat([bos, rand_tokens_B, rand_tokens_A], dim=1).to(attn_only.cfg.device)\n","corrupted_tokens_induction_BB = torch.cat([bos, rand_tokens_B, rand_tokens_B], dim=1).to(attn_only.cfg.device)\n","corrupted_tokens_induction_BC = torch.cat([bos, rand_tokens_B, rand_tokens_C], dim=1).to(attn_only.cfg.device)"]},{"cell_type":"code","execution_count":24,"metadata":{},"outputs":[{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"84f0c8fdecfa454591103ae4678260d5","version_major":2,"version_minor":0},"text/plain":["  0%|          | 0/16 [00:00<?, ?it/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"36bcd874bc584d279577a14915f39dc3","version_major":2,"version_minor":0},"text/plain":["  0%|          | 0/16 [00:00<?, ?it/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"c2bb1736cf6a4e43a2263bfe6f1d1309","version_major":2,"version_minor":0},"text/plain":["  0%|          | 0/16 [00:00<?, ?it/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"64a9057df33f4d8594101fd7ea74c876","version_major":2,"version_minor":0},"text/plain":["  0%|          | 0/16 [00:00<?, ?it/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"39df34a94d184e97add046794dd19cdc","version_major":2,"version_minor":0},"text/plain":["  0%|          | 0/16 [00:00<?, ?it/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["<html>\n","<head><meta charset=\"utf-8\" /></head>\n","<body>\n","    <div>            <script src=\"https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js?config=TeX-AMS-MML_SVG\"></script><script type=\"text/javascript\">if (window.MathJax && window.MathJax.Hub && window.MathJax.Hub.Config) {window.MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}</script>                <script type=\"text/javascript\">window.PlotlyConfig = {MathJaxConfig: 'local'};</script>\n","        <script src=\"https://cdn.plot.ly/plotly-2.16.1.min.js\"></script>                <div id=\"ef54b05a-b75e-40a1-8bab-0450783ee71c\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div>            <script type=\"text/javascript\">                                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById(\"ef54b05a-b75e-40a1-8bab-0450783ee71c\")) {                    Plotly.newPlot(                        \"ef54b05a-b75e-40a1-8bab-0450783ee71c\",                        [{\"coloraxis\":\"coloraxis\",\"name\":\"0\",\"z\":[[-12.921511,-12.877056,-12.984126,-12.905375,-12.970542,-13.053899,-13.2373705,-12.864943],[-13.1692505,-13.0807705,-13.057977,-13.0245495,-12.941997,-13.084174,-3.3831546,-11.245924]],\"type\":\"heatmap\",\"xaxis\":\"x\",\"yaxis\":\"y\",\"hovertemplate\":\"x: %{x}<br>y: %{y}<br>color: %{z}<extra></extra>\"},{\"coloraxis\":\"coloraxis\",\"name\":\"1\",\"z\":[[-13.0876255,-13.047407,-13.058392,-13.10271,-13.020917,-13.104961,-13.048052,-12.31762],[-13.123565,-13.093914,-13.052007,-13.041765,-13.061894,-13.077681,-13.115112,-13.048046]],\"type\":\"heatmap\",\"xaxis\":\"x2\",\"yaxis\":\"y2\",\"hovertemplate\":\"x: %{x}<br>y: %{y}<br>color: %{z}<extra></extra>\"},{\"coloraxis\":\"coloraxis\",\"name\":\"2\",\"z\":[[-12.99122,-12.947079,-12.989821,-12.987743,-12.981367,-13.105391,-13.203668,-13.100986],[-13.111007,-13.113584,-13.096868,-13.094455,-13.024431,-13.052156,-12.876199,-13.023597]],\"type\":\"heatmap\",\"xaxis\":\"x3\",\"yaxis\":\"y3\",\"hovertemplate\":\"x: %{x}<br>y: %{y}<br>color: %{z}<extra></extra>\"},{\"coloraxis\":\"coloraxis\",\"name\":\"3\",\"z\":[[-13.083906,-13.11855,-12.93277,-13.115893,-13.106962,-13.075,-13.117452,-12.339193],[-13.105621,-13.049583,-13.076819,-13.103814,-13.00783,-13.147947,-13.327594,-13.197513]],\"type\":\"heatmap\",\"xaxis\":\"x4\",\"yaxis\":\"y4\",\"hovertemplate\":\"x: %{x}<br>y: %{y}<br>color: %{z}<extra></extra>\"},{\"coloraxis\":\"coloraxis\",\"name\":\"4\",\"z\":[[-13.080789,-13.094806,-12.9795475,-13.079694,-13.0589485,-13.059589,-13.09329,-12.983442],[-13.14187,-13.092512,-13.058135,-13.0333805,-13.0066595,-13.124432,-13.322428,-13.160433]],\"type\":\"heatmap\",\"xaxis\":\"x5\",\"yaxis\":\"y5\",\"hovertemplate\":\"x: %{x}<br>y: %{y}<br>color: %{z}<extra></extra>\"}],                        {\"template\":{\"data\":{\"histogram2dcontour\":[{\"type\":\"histogram2dcontour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"choropleth\":[{\"type\":\"choropleth\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"histogram2d\":[{\"type\":\"histogram2d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmap\":[{\"type\":\"heatmap\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmapgl\":[{\"type\":\"heatmapgl\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"contourcarpet\":[{\"type\":\"contourcarpet\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"contour\":[{\"type\":\"contour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"surface\":[{\"type\":\"surface\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"mesh3d\":[{\"type\":\"mesh3d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"scatter\":[{\"fillpattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2},\"type\":\"scatter\"}],\"parcoords\":[{\"type\":\"parcoords\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolargl\":[{\"type\":\"scatterpolargl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"bar\":[{\"error_x\":{\"color\":\"#2a3f5f\"},\"error_y\":{\"color\":\"#2a3f5f\"},\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"bar\"}],\"scattergeo\":[{\"type\":\"scattergeo\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolar\":[{\"type\":\"scatterpolar\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"histogram\":[{\"marker\":{\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"histogram\"}],\"scattergl\":[{\"type\":\"scattergl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatter3d\":[{\"type\":\"scatter3d\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattermapbox\":[{\"type\":\"scattermapbox\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterternary\":[{\"type\":\"scatterternary\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattercarpet\":[{\"type\":\"scattercarpet\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"carpet\":[{\"aaxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"baxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"type\":\"carpet\"}],\"table\":[{\"cells\":{\"fill\":{\"color\":\"#EBF0F8\"},\"line\":{\"color\":\"white\"}},\"header\":{\"fill\":{\"color\":\"#C8D4E3\"},\"line\":{\"color\":\"white\"}},\"type\":\"table\"}],\"barpolar\":[{\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"barpolar\"}],\"pie\":[{\"automargin\":true,\"type\":\"pie\"}]},\"layout\":{\"autotypenumbers\":\"strict\",\"colorway\":[\"#636efa\",\"#EF553B\",\"#00cc96\",\"#ab63fa\",\"#FFA15A\",\"#19d3f3\",\"#FF6692\",\"#B6E880\",\"#FF97FF\",\"#FECB52\"],\"font\":{\"color\":\"#2a3f5f\"},\"hovermode\":\"closest\",\"hoverlabel\":{\"align\":\"left\"},\"paper_bgcolor\":\"white\",\"plot_bgcolor\":\"#E5ECF6\",\"polar\":{\"bgcolor\":\"#E5ECF6\",\"angularaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"radialaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"ternary\":{\"bgcolor\":\"#E5ECF6\",\"aaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"baxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"caxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"coloraxis\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"colorscale\":{\"sequential\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"sequentialminus\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"diverging\":[[0,\"#8e0152\"],[0.1,\"#c51b7d\"],[0.2,\"#de77ae\"],[0.3,\"#f1b6da\"],[0.4,\"#fde0ef\"],[0.5,\"#f7f7f7\"],[0.6,\"#e6f5d0\"],[0.7,\"#b8e186\"],[0.8,\"#7fbc41\"],[0.9,\"#4d9221\"],[1,\"#276419\"]]},\"xaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"yaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"scene\":{\"xaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"yaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"zaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2}},\"shapedefaults\":{\"line\":{\"color\":\"#2a3f5f\"}},\"annotationdefaults\":{\"arrowcolor\":\"#2a3f5f\",\"arrowhead\":0,\"arrowwidth\":1},\"geo\":{\"bgcolor\":\"white\",\"landcolor\":\"#E5ECF6\",\"subunitcolor\":\"white\",\"showland\":true,\"showlakes\":true,\"lakecolor\":\"white\"},\"title\":{\"x\":0.05},\"mapbox\":{\"style\":\"light\"}}},\"xaxis\":{\"anchor\":\"y\",\"domain\":[0.0,0.18400000000000002],\"title\":{\"text\":\"Head\"}},\"yaxis\":{\"anchor\":\"x\",\"domain\":[0.0,1.0],\"autorange\":\"reversed\",\"title\":{\"text\":\"Layer\"}},\"xaxis2\":{\"anchor\":\"y2\",\"domain\":[0.20400000000000001,0.388],\"matches\":\"x\",\"title\":{\"text\":\"Head\"}},\"yaxis2\":{\"anchor\":\"x2\",\"domain\":[0.0,1.0],\"matches\":\"y\",\"showticklabels\":false},\"xaxis3\":{\"anchor\":\"y3\",\"domain\":[0.40800000000000003,0.5920000000000001],\"matches\":\"x\",\"title\":{\"text\":\"Head\"}},\"yaxis3\":{\"anchor\":\"x3\",\"domain\":[0.0,1.0],\"matches\":\"y\",\"showticklabels\":false},\"xaxis4\":{\"anchor\":\"y4\",\"domain\":[0.6120000000000001,0.7960000000000002],\"matches\":\"x\",\"title\":{\"text\":\"Head\"}},\"yaxis4\":{\"anchor\":\"x4\",\"domain\":[0.0,1.0],\"matches\":\"y\",\"showticklabels\":false},\"xaxis5\":{\"anchor\":\"y5\",\"domain\":[0.8160000000000001,1.0],\"matches\":\"x\",\"title\":{\"text\":\"Head\"}},\"yaxis5\":{\"anchor\":\"x5\",\"domain\":[0.0,1.0],\"matches\":\"y\",\"showticklabels\":false},\"annotations\":[{\"font\":{},\"showarrow\":false,\"text\":\"Output\",\"x\":0.09200000000000001,\"xanchor\":\"center\",\"xref\":\"paper\",\"y\":1.0,\"yanchor\":\"bottom\",\"yref\":\"paper\"},{\"font\":{},\"showarrow\":false,\"text\":\"Query\",\"x\":0.29600000000000004,\"xanchor\":\"center\",\"xref\":\"paper\",\"y\":1.0,\"yanchor\":\"bottom\",\"yref\":\"paper\"},{\"font\":{},\"showarrow\":false,\"text\":\"Key\",\"x\":0.5,\"xanchor\":\"center\",\"xref\":\"paper\",\"y\":1.0,\"yanchor\":\"bottom\",\"yref\":\"paper\"},{\"font\":{},\"showarrow\":false,\"text\":\"Value\",\"x\":0.7040000000000002,\"xanchor\":\"center\",\"xref\":\"paper\",\"y\":1.0,\"yanchor\":\"bottom\",\"yref\":\"paper\"},{\"font\":{},\"showarrow\":false,\"text\":\"Pattern\",\"x\":0.908,\"xanchor\":\"center\",\"xref\":\"paper\",\"y\":1.0,\"yanchor\":\"bottom\",\"yref\":\"paper\"}],\"coloraxis\":{\"colorscale\":[[0.0,\"rgb(103,0,31)\"],[0.1,\"rgb(178,24,43)\"],[0.2,\"rgb(214,96,77)\"],[0.3,\"rgb(244,165,130)\"],[0.4,\"rgb(253,219,199)\"],[0.5,\"rgb(247,247,247)\"],[0.6,\"rgb(209,229,240)\"],[0.7,\"rgb(146,197,222)\"],[0.8,\"rgb(67,147,195)\"],[0.9,\"rgb(33,102,172)\"],[1.0,\"rgb(5,48,97)\"]],\"cmid\":0.0,\"cmin\":-13.327594,\"cmax\":-2.2928695678710938},\"title\":{\"text\":\"Activation Patching Per Head on BA (All Pos)\"}},                        {\"responsive\": true}                    ).then(function(){\n","                            \n","var gd = document.getElementById('ef54b05a-b75e-40a1-8bab-0450783ee71c');\n","var x = new MutationObserver(function (mutations, observer) {{\n","        var display = window.getComputedStyle(gd).display;\n","        if (!display || display === 'none') {{\n","            console.log([gd, 'removed!']);\n","            Plotly.purge(gd);\n","            observer.disconnect();\n","        }}\n","}});\n","\n","// Listen for the removal of the full notebook cells\n","var notebookContainer = gd.closest('#notebook-container');\n","if (notebookContainer) {{\n","    x.observe(notebookContainer, {childList: true});\n","}}\n","\n","// Listen for the clearing of the current output cell\n","var outputEl = gd.closest('.output');\n","if (outputEl) {{\n","    x.observe(outputEl, {childList: true});\n","}}\n","\n","                        })                };                            </script>        </div>\n","</body>\n","</html>"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"7ce8d65176ac47198138463641003156","version_major":2,"version_minor":0},"text/plain":["  0%|          | 0/16 [00:00<?, ?it/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"7b0dea8bb0114ac69d08f06f73934b65","version_major":2,"version_minor":0},"text/plain":["  0%|          | 0/16 [00:00<?, ?it/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"a6af42964f0145f6bcd54e28e9e4adfe","version_major":2,"version_minor":0},"text/plain":["  0%|          | 0/16 [00:00<?, ?it/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"3c0989d7873c45cfbcce13fe0289548e","version_major":2,"version_minor":0},"text/plain":["  0%|          | 0/16 [00:00<?, ?it/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"f0982b478f43462aaf6501c6fec675ed","version_major":2,"version_minor":0},"text/plain":["  0%|          | 0/16 [00:00<?, ?it/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["<html>\n","<head><meta charset=\"utf-8\" /></head>\n","<body>\n","    <div>            <script src=\"https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js?config=TeX-AMS-MML_SVG\"></script><script type=\"text/javascript\">if (window.MathJax && window.MathJax.Hub && window.MathJax.Hub.Config) {window.MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}</script>                <script type=\"text/javascript\">window.PlotlyConfig = {MathJaxConfig: 'local'};</script>\n","        <script src=\"https://cdn.plot.ly/plotly-2.16.1.min.js\"></script>                <div id=\"e38707d1-1a25-4556-bd49-0a85c6bea363\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div>            <script type=\"text/javascript\">                                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById(\"e38707d1-1a25-4556-bd49-0a85c6bea363\")) {                    Plotly.newPlot(                        \"e38707d1-1a25-4556-bd49-0a85c6bea363\",                        [{\"coloraxis\":\"coloraxis\",\"name\":\"0\",\"z\":[[-13.805504,-13.825346,-13.645974,-13.384713,-14.021775,-13.943011,-14.001552,-13.868244],[-14.046326,-14.084418,-13.944375,-13.924217,-13.97186,-14.030198,-3.6828191,-12.427961]],\"type\":\"heatmap\",\"xaxis\":\"x\",\"yaxis\":\"y\",\"hovertemplate\":\"x: %{x}<br>y: %{y}<br>color: %{z}<extra></extra>\"},{\"coloraxis\":\"coloraxis\",\"name\":\"1\",\"z\":[[-13.989873,-13.964358,-13.889407,-14.020409,-14.04137,-14.084052,-13.9674425,-12.949598],[-14.097729,-14.051899,-14.026574,-14.052086,-13.879718,-14.102751,-13.784435,-13.837878]],\"type\":\"heatmap\",\"xaxis\":\"x2\",\"yaxis\":\"y2\",\"hovertemplate\":\"x: %{x}<br>y: %{y}<br>color: %{z}<extra></extra>\"},{\"coloraxis\":\"coloraxis\",\"name\":\"2\",\"z\":[[-13.914917,-13.987852,-13.7277975,-13.531109,-14.002791,-13.955838,-13.972597,-14.098117],[-14.03552,-14.056699,-13.936212,-13.9273615,-14.01424,-13.986736,-3.7621412,-12.812471]],\"type\":\"heatmap\",\"xaxis\":\"x3\",\"yaxis\":\"y3\",\"hovertemplate\":\"x: %{x}<br>y: %{y}<br>color: %{z}<extra></extra>\"},{\"coloraxis\":\"coloraxis\",\"name\":\"3\",\"z\":[[-14.071397,-14.12512,-13.814937,-14.148074,-14.053765,-14.062568,-14.015211,-12.977612],[-14.065711,-14.079923,-14.047265,-14.121845,-13.929293,-14.081871,-13.881097,-13.952627]],\"type\":\"heatmap\",\"xaxis\":\"x4\",\"yaxis\":\"y4\",\"hovertemplate\":\"x: %{x}<br>y: %{y}<br>color: %{z}<extra></extra>\"},{\"coloraxis\":\"coloraxis\",\"name\":\"4\",\"z\":[[-14.017073,-14.045502,-13.821099,-14.039408,-14.061439,-13.973086,-13.950179,-13.840467],[-14.15493,-14.114534,-14.003726,-14.043842,-13.882369,-14.078369,-13.883451,-14.127655]],\"type\":\"heatmap\",\"xaxis\":\"x5\",\"yaxis\":\"y5\",\"hovertemplate\":\"x: %{x}<br>y: %{y}<br>color: %{z}<extra></extra>\"}],                        {\"template\":{\"data\":{\"histogram2dcontour\":[{\"type\":\"histogram2dcontour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"choropleth\":[{\"type\":\"choropleth\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"histogram2d\":[{\"type\":\"histogram2d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmap\":[{\"type\":\"heatmap\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmapgl\":[{\"type\":\"heatmapgl\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"contourcarpet\":[{\"type\":\"contourcarpet\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"contour\":[{\"type\":\"contour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"surface\":[{\"type\":\"surface\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"mesh3d\":[{\"type\":\"mesh3d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"scatter\":[{\"fillpattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2},\"type\":\"scatter\"}],\"parcoords\":[{\"type\":\"parcoords\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolargl\":[{\"type\":\"scatterpolargl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"bar\":[{\"error_x\":{\"color\":\"#2a3f5f\"},\"error_y\":{\"color\":\"#2a3f5f\"},\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"bar\"}],\"scattergeo\":[{\"type\":\"scattergeo\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolar\":[{\"type\":\"scatterpolar\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"histogram\":[{\"marker\":{\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"histogram\"}],\"scattergl\":[{\"type\":\"scattergl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatter3d\":[{\"type\":\"scatter3d\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattermapbox\":[{\"type\":\"scattermapbox\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterternary\":[{\"type\":\"scatterternary\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattercarpet\":[{\"type\":\"scattercarpet\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"carpet\":[{\"aaxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"baxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"type\":\"carpet\"}],\"table\":[{\"cells\":{\"fill\":{\"color\":\"#EBF0F8\"},\"line\":{\"color\":\"white\"}},\"header\":{\"fill\":{\"color\":\"#C8D4E3\"},\"line\":{\"color\":\"white\"}},\"type\":\"table\"}],\"barpolar\":[{\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"barpolar\"}],\"pie\":[{\"automargin\":true,\"type\":\"pie\"}]},\"layout\":{\"autotypenumbers\":\"strict\",\"colorway\":[\"#636efa\",\"#EF553B\",\"#00cc96\",\"#ab63fa\",\"#FFA15A\",\"#19d3f3\",\"#FF6692\",\"#B6E880\",\"#FF97FF\",\"#FECB52\"],\"font\":{\"color\":\"#2a3f5f\"},\"hovermode\":\"closest\",\"hoverlabel\":{\"align\":\"left\"},\"paper_bgcolor\":\"white\",\"plot_bgcolor\":\"#E5ECF6\",\"polar\":{\"bgcolor\":\"#E5ECF6\",\"angularaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"radialaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"ternary\":{\"bgcolor\":\"#E5ECF6\",\"aaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"baxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"caxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"coloraxis\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"colorscale\":{\"sequential\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"sequentialminus\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"diverging\":[[0,\"#8e0152\"],[0.1,\"#c51b7d\"],[0.2,\"#de77ae\"],[0.3,\"#f1b6da\"],[0.4,\"#fde0ef\"],[0.5,\"#f7f7f7\"],[0.6,\"#e6f5d0\"],[0.7,\"#b8e186\"],[0.8,\"#7fbc41\"],[0.9,\"#4d9221\"],[1,\"#276419\"]]},\"xaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"yaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"scene\":{\"xaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"yaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"zaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2}},\"shapedefaults\":{\"line\":{\"color\":\"#2a3f5f\"}},\"annotationdefaults\":{\"arrowcolor\":\"#2a3f5f\",\"arrowhead\":0,\"arrowwidth\":1},\"geo\":{\"bgcolor\":\"white\",\"landcolor\":\"#E5ECF6\",\"subunitcolor\":\"white\",\"showland\":true,\"showlakes\":true,\"lakecolor\":\"white\"},\"title\":{\"x\":0.05},\"mapbox\":{\"style\":\"light\"}}},\"xaxis\":{\"anchor\":\"y\",\"domain\":[0.0,0.18400000000000002],\"title\":{\"text\":\"Head\"}},\"yaxis\":{\"anchor\":\"x\",\"domain\":[0.0,1.0],\"autorange\":\"reversed\",\"title\":{\"text\":\"Layer\"}},\"xaxis2\":{\"anchor\":\"y2\",\"domain\":[0.20400000000000001,0.388],\"matches\":\"x\",\"title\":{\"text\":\"Head\"}},\"yaxis2\":{\"anchor\":\"x2\",\"domain\":[0.0,1.0],\"matches\":\"y\",\"showticklabels\":false},\"xaxis3\":{\"anchor\":\"y3\",\"domain\":[0.40800000000000003,0.5920000000000001],\"matches\":\"x\",\"title\":{\"text\":\"Head\"}},\"yaxis3\":{\"anchor\":\"x3\",\"domain\":[0.0,1.0],\"matches\":\"y\",\"showticklabels\":false},\"xaxis4\":{\"anchor\":\"y4\",\"domain\":[0.6120000000000001,0.7960000000000002],\"matches\":\"x\",\"title\":{\"text\":\"Head\"}},\"yaxis4\":{\"anchor\":\"x4\",\"domain\":[0.0,1.0],\"matches\":\"y\",\"showticklabels\":false},\"xaxis5\":{\"anchor\":\"y5\",\"domain\":[0.8160000000000001,1.0],\"matches\":\"x\",\"title\":{\"text\":\"Head\"}},\"yaxis5\":{\"anchor\":\"x5\",\"domain\":[0.0,1.0],\"matches\":\"y\",\"showticklabels\":false},\"annotations\":[{\"font\":{},\"showarrow\":false,\"text\":\"Output\",\"x\":0.09200000000000001,\"xanchor\":\"center\",\"xref\":\"paper\",\"y\":1.0,\"yanchor\":\"bottom\",\"yref\":\"paper\"},{\"font\":{},\"showarrow\":false,\"text\":\"Query\",\"x\":0.29600000000000004,\"xanchor\":\"center\",\"xref\":\"paper\",\"y\":1.0,\"yanchor\":\"bottom\",\"yref\":\"paper\"},{\"font\":{},\"showarrow\":false,\"text\":\"Key\",\"x\":0.5,\"xanchor\":\"center\",\"xref\":\"paper\",\"y\":1.0,\"yanchor\":\"bottom\",\"yref\":\"paper\"},{\"font\":{},\"showarrow\":false,\"text\":\"Value\",\"x\":0.7040000000000002,\"xanchor\":\"center\",\"xref\":\"paper\",\"y\":1.0,\"yanchor\":\"bottom\",\"yref\":\"paper\"},{\"font\":{},\"showarrow\":false,\"text\":\"Pattern\",\"x\":0.908,\"xanchor\":\"center\",\"xref\":\"paper\",\"y\":1.0,\"yanchor\":\"bottom\",\"yref\":\"paper\"}],\"coloraxis\":{\"colorscale\":[[0.0,\"rgb(103,0,31)\"],[0.1,\"rgb(178,24,43)\"],[0.2,\"rgb(214,96,77)\"],[0.3,\"rgb(244,165,130)\"],[0.4,\"rgb(253,219,199)\"],[0.5,\"rgb(247,247,247)\"],[0.6,\"rgb(209,229,240)\"],[0.7,\"rgb(146,197,222)\"],[0.8,\"rgb(67,147,195)\"],[0.9,\"rgb(33,102,172)\"],[1.0,\"rgb(5,48,97)\"]],\"cmid\":0.0,\"cmin\":-14.15493,\"cmax\":-2.2928695678710938},\"title\":{\"text\":\"Activation Patching Per Head on BB (All Pos)\"}},                        {\"responsive\": true}                    ).then(function(){\n","                            \n","var gd = document.getElementById('e38707d1-1a25-4556-bd49-0a85c6bea363');\n","var x = new MutationObserver(function (mutations, observer) {{\n","        var display = window.getComputedStyle(gd).display;\n","        if (!display || display === 'none') {{\n","            console.log([gd, 'removed!']);\n","            Plotly.purge(gd);\n","            observer.disconnect();\n","        }}\n","}});\n","\n","// Listen for the removal of the full notebook cells\n","var notebookContainer = gd.closest('#notebook-container');\n","if (notebookContainer) {{\n","    x.observe(notebookContainer, {childList: true});\n","}}\n","\n","// Listen for the clearing of the current output cell\n","var outputEl = gd.closest('.output');\n","if (outputEl) {{\n","    x.observe(outputEl, {childList: true});\n","}}\n","\n","                        })                };                            </script>        </div>\n","</body>\n","</html>"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"a7d2ec5fe6714deea0d72a26994a24ef","version_major":2,"version_minor":0},"text/plain":["  0%|          | 0/16 [00:00<?, ?it/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"c131036c69bd4f1e962670df29c4bfaf","version_major":2,"version_minor":0},"text/plain":["  0%|          | 0/16 [00:00<?, ?it/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"d2bdafd5cbce446492320082a5f7821e","version_major":2,"version_minor":0},"text/plain":["  0%|          | 0/16 [00:00<?, ?it/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"c4d1c5cf6c134f68a71c5bee920adac0","version_major":2,"version_minor":0},"text/plain":["  0%|          | 0/16 [00:00<?, ?it/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"cf6e5d5937d646c3abf8f9610dc81609","version_major":2,"version_minor":0},"text/plain":["  0%|          | 0/16 [00:00<?, ?it/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["<html>\n","<head><meta charset=\"utf-8\" /></head>\n","<body>\n","    <div>            <script src=\"https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js?config=TeX-AMS-MML_SVG\"></script><script type=\"text/javascript\">if (window.MathJax && window.MathJax.Hub && window.MathJax.Hub.Config) {window.MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}</script>                <script type=\"text/javascript\">window.PlotlyConfig = {MathJaxConfig: 'local'};</script>\n","        <script src=\"https://cdn.plot.ly/plotly-2.16.1.min.js\"></script>                <div id=\"2239ccf3-e07f-448b-b577-4134bae4700c\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div>            <script type=\"text/javascript\">                                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById(\"2239ccf3-e07f-448b-b577-4134bae4700c\")) {                    Plotly.newPlot(                        \"2239ccf3-e07f-448b-b577-4134bae4700c\",                        [{\"coloraxis\":\"coloraxis\",\"name\":\"0\",\"z\":[[-12.822885,-12.811085,-12.8884325,-12.81087,-12.889652,-12.878188,-13.202382,-12.906532],[-13.182879,-12.987521,-12.7554655,-12.92344,-12.589047,-12.970565,-3.6010063,-11.286802]],\"type\":\"heatmap\",\"xaxis\":\"x\",\"yaxis\":\"y\",\"hovertemplate\":\"x: %{x}<br>y: %{y}<br>color: %{z}<extra></extra>\"},{\"coloraxis\":\"coloraxis\",\"name\":\"1\",\"z\":[[-12.968789,-12.844384,-12.912667,-12.981978,-12.909443,-12.878584,-12.927259,-12.348582],[-13.075386,-12.829196,-12.817758,-12.933551,-12.723004,-12.950664,-13.066957,-12.905731]],\"type\":\"heatmap\",\"xaxis\":\"x2\",\"yaxis\":\"y2\",\"hovertemplate\":\"x: %{x}<br>y: %{y}<br>color: %{z}<extra></extra>\"},{\"coloraxis\":\"coloraxis\",\"name\":\"2\",\"z\":[[-12.820385,-12.9005785,-12.893993,-12.877544,-12.880168,-12.988029,-13.202785,-13.129946],[-13.170372,-13.053295,-12.780892,-13.032372,-12.68076,-12.986087,-12.676176,-12.962296]],\"type\":\"heatmap\",\"xaxis\":\"x3\",\"yaxis\":\"y3\",\"hovertemplate\":\"x: %{x}<br>y: %{y}<br>color: %{z}<extra></extra>\"},{\"coloraxis\":\"coloraxis\",\"name\":\"3\",\"z\":[[-12.969026,-13.095675,-12.719384,-12.916345,-13.048266,-12.982166,-13.106823,-12.325087],[-13.058377,-12.824394,-12.8636265,-13.143163,-12.616763,-13.001718,-13.132352,-13.121058]],\"type\":\"heatmap\",\"xaxis\":\"x4\",\"yaxis\":\"y4\",\"hovertemplate\":\"x: %{x}<br>y: %{y}<br>color: %{z}<extra></extra>\"},{\"coloraxis\":\"coloraxis\",\"name\":\"4\",\"z\":[[-12.964717,-12.970182,-12.920894,-13.007302,-12.984122,-12.876909,-13.061056,-12.948068],[-13.097929,-13.012596,-13.003614,-13.021066,-12.936759,-12.980106,-13.2769785,-13.026427]],\"type\":\"heatmap\",\"xaxis\":\"x5\",\"yaxis\":\"y5\",\"hovertemplate\":\"x: %{x}<br>y: %{y}<br>color: %{z}<extra></extra>\"}],                        {\"template\":{\"data\":{\"histogram2dcontour\":[{\"type\":\"histogram2dcontour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"choropleth\":[{\"type\":\"choropleth\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"histogram2d\":[{\"type\":\"histogram2d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmap\":[{\"type\":\"heatmap\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmapgl\":[{\"type\":\"heatmapgl\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"contourcarpet\":[{\"type\":\"contourcarpet\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"contour\":[{\"type\":\"contour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"surface\":[{\"type\":\"surface\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"mesh3d\":[{\"type\":\"mesh3d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"scatter\":[{\"fillpattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2},\"type\":\"scatter\"}],\"parcoords\":[{\"type\":\"parcoords\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolargl\":[{\"type\":\"scatterpolargl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"bar\":[{\"error_x\":{\"color\":\"#2a3f5f\"},\"error_y\":{\"color\":\"#2a3f5f\"},\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"bar\"}],\"scattergeo\":[{\"type\":\"scattergeo\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolar\":[{\"type\":\"scatterpolar\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"histogram\":[{\"marker\":{\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"histogram\"}],\"scattergl\":[{\"type\":\"scattergl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatter3d\":[{\"type\":\"scatter3d\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattermapbox\":[{\"type\":\"scattermapbox\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterternary\":[{\"type\":\"scatterternary\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattercarpet\":[{\"type\":\"scattercarpet\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"carpet\":[{\"aaxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"baxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"type\":\"carpet\"}],\"table\":[{\"cells\":{\"fill\":{\"color\":\"#EBF0F8\"},\"line\":{\"color\":\"white\"}},\"header\":{\"fill\":{\"color\":\"#C8D4E3\"},\"line\":{\"color\":\"white\"}},\"type\":\"table\"}],\"barpolar\":[{\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"barpolar\"}],\"pie\":[{\"automargin\":true,\"type\":\"pie\"}]},\"layout\":{\"autotypenumbers\":\"strict\",\"colorway\":[\"#636efa\",\"#EF553B\",\"#00cc96\",\"#ab63fa\",\"#FFA15A\",\"#19d3f3\",\"#FF6692\",\"#B6E880\",\"#FF97FF\",\"#FECB52\"],\"font\":{\"color\":\"#2a3f5f\"},\"hovermode\":\"closest\",\"hoverlabel\":{\"align\":\"left\"},\"paper_bgcolor\":\"white\",\"plot_bgcolor\":\"#E5ECF6\",\"polar\":{\"bgcolor\":\"#E5ECF6\",\"angularaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"radialaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"ternary\":{\"bgcolor\":\"#E5ECF6\",\"aaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"baxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"caxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"coloraxis\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"colorscale\":{\"sequential\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"sequentialminus\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"diverging\":[[0,\"#8e0152\"],[0.1,\"#c51b7d\"],[0.2,\"#de77ae\"],[0.3,\"#f1b6da\"],[0.4,\"#fde0ef\"],[0.5,\"#f7f7f7\"],[0.6,\"#e6f5d0\"],[0.7,\"#b8e186\"],[0.8,\"#7fbc41\"],[0.9,\"#4d9221\"],[1,\"#276419\"]]},\"xaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"yaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"scene\":{\"xaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"yaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"zaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2}},\"shapedefaults\":{\"line\":{\"color\":\"#2a3f5f\"}},\"annotationdefaults\":{\"arrowcolor\":\"#2a3f5f\",\"arrowhead\":0,\"arrowwidth\":1},\"geo\":{\"bgcolor\":\"white\",\"landcolor\":\"#E5ECF6\",\"subunitcolor\":\"white\",\"showland\":true,\"showlakes\":true,\"lakecolor\":\"white\"},\"title\":{\"x\":0.05},\"mapbox\":{\"style\":\"light\"}}},\"xaxis\":{\"anchor\":\"y\",\"domain\":[0.0,0.18400000000000002],\"title\":{\"text\":\"Head\"}},\"yaxis\":{\"anchor\":\"x\",\"domain\":[0.0,1.0],\"autorange\":\"reversed\",\"title\":{\"text\":\"Layer\"}},\"xaxis2\":{\"anchor\":\"y2\",\"domain\":[0.20400000000000001,0.388],\"matches\":\"x\",\"title\":{\"text\":\"Head\"}},\"yaxis2\":{\"anchor\":\"x2\",\"domain\":[0.0,1.0],\"matches\":\"y\",\"showticklabels\":false},\"xaxis3\":{\"anchor\":\"y3\",\"domain\":[0.40800000000000003,0.5920000000000001],\"matches\":\"x\",\"title\":{\"text\":\"Head\"}},\"yaxis3\":{\"anchor\":\"x3\",\"domain\":[0.0,1.0],\"matches\":\"y\",\"showticklabels\":false},\"xaxis4\":{\"anchor\":\"y4\",\"domain\":[0.6120000000000001,0.7960000000000002],\"matches\":\"x\",\"title\":{\"text\":\"Head\"}},\"yaxis4\":{\"anchor\":\"x4\",\"domain\":[0.0,1.0],\"matches\":\"y\",\"showticklabels\":false},\"xaxis5\":{\"anchor\":\"y5\",\"domain\":[0.8160000000000001,1.0],\"matches\":\"x\",\"title\":{\"text\":\"Head\"}},\"yaxis5\":{\"anchor\":\"x5\",\"domain\":[0.0,1.0],\"matches\":\"y\",\"showticklabels\":false},\"annotations\":[{\"font\":{},\"showarrow\":false,\"text\":\"Output\",\"x\":0.09200000000000001,\"xanchor\":\"center\",\"xref\":\"paper\",\"y\":1.0,\"yanchor\":\"bottom\",\"yref\":\"paper\"},{\"font\":{},\"showarrow\":false,\"text\":\"Query\",\"x\":0.29600000000000004,\"xanchor\":\"center\",\"xref\":\"paper\",\"y\":1.0,\"yanchor\":\"bottom\",\"yref\":\"paper\"},{\"font\":{},\"showarrow\":false,\"text\":\"Key\",\"x\":0.5,\"xanchor\":\"center\",\"xref\":\"paper\",\"y\":1.0,\"yanchor\":\"bottom\",\"yref\":\"paper\"},{\"font\":{},\"showarrow\":false,\"text\":\"Value\",\"x\":0.7040000000000002,\"xanchor\":\"center\",\"xref\":\"paper\",\"y\":1.0,\"yanchor\":\"bottom\",\"yref\":\"paper\"},{\"font\":{},\"showarrow\":false,\"text\":\"Pattern\",\"x\":0.908,\"xanchor\":\"center\",\"xref\":\"paper\",\"y\":1.0,\"yanchor\":\"bottom\",\"yref\":\"paper\"}],\"coloraxis\":{\"colorscale\":[[0.0,\"rgb(103,0,31)\"],[0.1,\"rgb(178,24,43)\"],[0.2,\"rgb(214,96,77)\"],[0.3,\"rgb(244,165,130)\"],[0.4,\"rgb(253,219,199)\"],[0.5,\"rgb(247,247,247)\"],[0.6,\"rgb(209,229,240)\"],[0.7,\"rgb(146,197,222)\"],[0.8,\"rgb(67,147,195)\"],[0.9,\"rgb(33,102,172)\"],[1.0,\"rgb(5,48,97)\"]],\"cmid\":0.0,\"cmin\":-13.2769785,\"cmax\":-2.2928695678710938},\"title\":{\"text\":\"Activation Patching Per Head on BC (All Pos)\"}},                        {\"responsive\": true}                    ).then(function(){\n","                            \n","var gd = document.getElementById('2239ccf3-e07f-448b-b577-4134bae4700c');\n","var x = new MutationObserver(function (mutations, observer) {{\n","        var display = window.getComputedStyle(gd).display;\n","        if (!display || display === 'none') {{\n","            console.log([gd, 'removed!']);\n","            Plotly.purge(gd);\n","            observer.disconnect();\n","        }}\n","}});\n","\n","// Listen for the removal of the full notebook cells\n","var notebookContainer = gd.closest('#notebook-container');\n","if (notebookContainer) {{\n","    x.observe(notebookContainer, {childList: true});\n","}}\n","\n","// Listen for the clearing of the current output cell\n","var outputEl = gd.closest('.output');\n","if (outputEl) {{\n","    x.observe(outputEl, {childList: true});\n","}}\n","\n","                        })                };                            </script>        </div>\n","</body>\n","</html>"]},"metadata":{},"output_type":"display_data"}],"source":["every_head_all_pos_act_patch_result = patching.get_act_patch_attn_head_all_pos_every(attn_only, corrupted_tokens_induction_BA, clean_cache_induction, induction_loss)\n","imshow(every_head_all_pos_act_patch_result, facet_col=0, facet_labels=[\"Output\", \"Query\", \"Key\", \"Value\", \"Pattern\"], title=\"Activation Patching Per Head on BA (All Pos)\", xaxis=\"Head\", yaxis=\"Layer\", zmax=CLEAN_BASELINE_INDUCTION)\n","every_head_all_pos_act_patch_result = patching.get_act_patch_attn_head_all_pos_every(attn_only, corrupted_tokens_induction_BB, clean_cache_induction, induction_loss)\n","imshow(every_head_all_pos_act_patch_result, facet_col=0, facet_labels=[\"Output\", \"Query\", \"Key\", \"Value\", \"Pattern\"], title=\"Activation Patching Per Head on BB (All Pos)\", xaxis=\"Head\", yaxis=\"Layer\", zmax=CLEAN_BASELINE_INDUCTION)\n","every_head_all_pos_act_patch_result = patching.get_act_patch_attn_head_all_pos_every(attn_only, corrupted_tokens_induction_BC, clean_cache_induction, induction_loss)\n","imshow(every_head_all_pos_act_patch_result, facet_col=0, facet_labels=[\"Output\", \"Query\", \"Key\", \"Value\", \"Pattern\"], title=\"Activation Patching Per Head on BC (All Pos)\", xaxis=\"Head\", yaxis=\"Layer\", zmax=CLEAN_BASELINE_INDUCTION)"]}],"metadata":{"kernelspec":{"display_name":"base","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.7.13"},"orig_nbformat":4,"vscode":{"interpreter":{"hash":"d4d1e4263499bec80672ea0156c357c1ee493ec2b1c70f0acce89fc37c4a6abe"}}},"nbformat":4,"nbformat_minor":2}
+{"cells":[{"cell_type":"markdown","metadata":{},"source":[" # Activation Patching in TransformerLens Demo\n"," This is an accompaniment to [Exploratory Analysis Demo](https://neelnanda.io/exploratory-analysis-demo). That notebook explains some basic techniques for mech interp of networks, including an overview of activation patching ([summary here](https://dynalist.io/d/n2ZWtnoYHrU1s4vnFSAQ519J#z=qeWBvs-R-taFfcCq-S_hgMqx)). This demonstrates how to use the Activation Patching utils in TransformerLens.\n"]},{"cell_type":"markdown","metadata":{},"source":[" <b style=\"color: red\">To use this notebook, go to Runtime > Change Runtime Type and select GPU as the hardware accelerator.</b>\n","\n"," **Tips for reading this Colab:**\n"," * You can run all this code for yourself!\n"," * The graphs are interactive!\n"," * Use the table of contents pane in the sidebar to navigate\n"," * Collapse irrelevant sections with the dropdown arrows\n"," * Search the page using the search in the sidebar, not CTRL+F"]},{"cell_type":"markdown","metadata":{},"source":[" ## Setup (Ignore)"]},{"cell_type":"code","execution_count":1,"metadata":{},"outputs":[{"name":"stdout","output_type":"stream","text":["Running as a Jupyter notebook - intended for development only!\n"]}],"source":["# Janky code to do different setup when run in a Colab notebook vs VSCode\n","DEBUG_MODE = False\n","try:\n","    import google.colab\n","    IN_COLAB = True\n","    print(\"Running as a Colab notebook\")\n","    %pip install transformer_lens\n","    # Install my janky personal plotting utils\n","    %pip install git+https://github.com/neelnanda-io/neel-plotly.git\n","except:\n","    IN_COLAB = False\n","    print(\"Running as a Jupyter notebook - intended for development only!\")\n","    from IPython import get_ipython\n","\n","    ipython = get_ipython()\n","    # Code to automatically update the HookedTransformer code as its edited without restarting the kernel\n","    ipython.magic(\"load_ext autoreload\")\n","    ipython.magic(\"autoreload 2\")"]},{"cell_type":"code","execution_count":2,"metadata":{},"outputs":[],"source":["# Plotly needs a different renderer for VSCode/Notebooks vs Colab argh\n","import plotly.io as pio\n","\n","if IN_COLAB or not DEBUG_MODE:\n","    # Thanks to annoying rendering issues, Plotly graphics will either show up in colab OR Vscode depending on the renderer - this is bad for developing demos! Thus creating a debug mode.\n","    pio.renderers.default = \"colab\"\n","else:\n","    pio.renderers.default = \"png\""]},{"cell_type":"code","execution_count":3,"metadata":{},"outputs":[],"source":["# Import stuff\n","import torch\n","import torch.nn as nn\n","import torch.nn.functional as F\n","import torch.optim as optim\n","import numpy as np\n","import einops\n","from fancy_einsum import einsum\n","import tqdm.notebook as tqdm\n","import random\n","from pathlib import Path\n","import plotly.express as px\n","from torch.utils.data import DataLoader\n","\n","from torchtyping import TensorType as TT\n","from typing import List, Union, Optional\n","from functools import partial\n","import copy\n","\n","import itertools\n","from transformers import AutoModelForCausalLM, AutoConfig, AutoTokenizer\n","import dataclasses\n","import datasets\n","from IPython.display import HTML"]},{"cell_type":"code","execution_count":4,"metadata":{},"outputs":[],"source":["import transformer_lens\n","import transformer_lens.utils as utils\n","from transformer_lens.hook_points import (\n","    HookedRootModule,\n","    HookPoint,\n",")  # Hooking utilities\n","from transformer_lens import HookedTransformer, HookedTransformerConfig, FactoredMatrix, ActivationCache"]},{"cell_type":"markdown","metadata":{},"source":[" We turn automatic differentiation off, to save GPU memory, as this notebook focuses on model inference not model training."]},{"cell_type":"code","execution_count":5,"metadata":{},"outputs":[{"data":{"text/plain":["<torch.autograd.grad_mode.set_grad_enabled at 0x7f6b307d0990>"]},"execution_count":5,"metadata":{},"output_type":"execute_result"}],"source":["torch.set_grad_enabled(False)"]},{"cell_type":"markdown","metadata":{},"source":[" Plotting helper functions from a janky personal library of plotting utils. The library is not documented and I recommend against trying to read it, just use your preferred plotting library if you want to do anything non-obvious:"]},{"cell_type":"code","execution_count":6,"metadata":{},"outputs":[],"source":["from neel_plotly import line, imshow, scatter"]},{"cell_type":"code","execution_count":7,"metadata":{},"outputs":[],"source":["import transformer_lens.patching as patching"]},{"cell_type":"markdown","metadata":{},"source":[" ## Activation Patching Setup\n"," This just copies the relevant set up from Exploratory Analysis Demo, and isn't very important."]},{"cell_type":"code","execution_count":8,"metadata":{},"outputs":[{"name":"stderr","output_type":"stream","text":["Using pad_token, but it is not set yet.\n"]},{"name":"stdout","output_type":"stream","text":["Loaded pretrained model gpt2-small into HookedTransformer\n"]}],"source":["model = HookedTransformer.from_pretrained(\"gpt2-small\")"]},{"cell_type":"code","execution_count":9,"metadata":{},"outputs":[{"name":"stdout","output_type":"stream","text":["Clean string 0 <|endoftext|>When John and Mary went to the shops, John gave the bag to\n","Corrupted string 0 <|endoftext|>When John and Mary went to the shops, Mary gave the bag to\n","Answer token indices tensor([[ 5335,  1757],\n","        [ 1757,  5335],\n","        [ 4186,  3700],\n","        [ 3700,  4186],\n","        [ 6035, 15686],\n","        [15686,  6035],\n","        [ 5780, 14235],\n","        [14235,  5780]], device='cuda:0')\n"]}],"source":["prompts = ['When John and Mary went to the shops, John gave the bag to', 'When John and Mary went to the shops, Mary gave the bag to', 'When Tom and James went to the park, James gave the ball to', 'When Tom and James went to the park, Tom gave the ball to', 'When Dan and Sid went to the shops, Sid gave an apple to', 'When Dan and Sid went to the shops, Dan gave an apple to', 'After Martin and Amy went to the park, Amy gave a drink to', 'After Martin and Amy went to the park, Martin gave a drink to']\n","answers = [(' Mary', ' John'), (' John', ' Mary'), (' Tom', ' James'), (' James', ' Tom'), (' Dan', ' Sid'), (' Sid', ' Dan'), (' Martin', ' Amy'), (' Amy', ' Martin')]\n","\n","clean_tokens = model.to_tokens(prompts)\n","# Swap each adjacent pair, with a hacky list comprehension\n","corrupted_tokens = clean_tokens[\n","    [(i+1 if i%2==0 else i-1) for i in range(len(clean_tokens)) ]\n","    ]\n","print(\"Clean string 0\", model.to_string(clean_tokens[0]))\n","print(\"Corrupted string 0\", model.to_string(corrupted_tokens[0]))\n","\n","answer_token_indices = torch.tensor([[model.to_single_token(answers[i][j]) for j in range(2)] for i in range(len(answers))], device=model.cfg.device)\n","print(\"Answer token indices\", answer_token_indices)"]},{"cell_type":"code","execution_count":10,"metadata":{},"outputs":[{"name":"stdout","output_type":"stream","text":["Clean logit diff: 3.5519\n","Corrupted logit diff: -3.5519\n"]}],"source":["def get_logit_diff(logits, answer_token_indices=answer_token_indices):\n","    if len(logits.shape)==3:\n","        # Get final logits only\n","        logits = logits[:, -1, :]\n","    correct_logits = logits.gather(1, answer_token_indices[:, 0].unsqueeze(1))\n","    incorrect_logits = logits.gather(1, answer_token_indices[:, 1].unsqueeze(1))\n","    return (correct_logits - incorrect_logits).mean()\n","\n","clean_logits, clean_cache = model.run_with_cache(clean_tokens)\n","corrupted_logits, corrupted_cache = model.run_with_cache(corrupted_tokens)\n","\n","clean_logit_diff = get_logit_diff(clean_logits, answer_token_indices).item()\n","print(f\"Clean logit diff: {clean_logit_diff:.4f}\")\n","\n","corrupted_logit_diff = get_logit_diff(corrupted_logits, answer_token_indices).item()\n","print(f\"Corrupted logit diff: {corrupted_logit_diff:.4f}\")"]},{"cell_type":"code","execution_count":11,"metadata":{},"outputs":[{"name":"stdout","output_type":"stream","text":["Clean Baseline is 1: 1.0000\n","Corrupted Baseline is 0: 0.0000\n"]}],"source":["CLEAN_BASELINE = clean_logit_diff\n","CORRUPTED_BASELINE = corrupted_logit_diff\n","def ioi_metric(logits, answer_token_indices=answer_token_indices):\n","    return (get_logit_diff(logits, answer_token_indices) - CORRUPTED_BASELINE) / (CLEAN_BASELINE  - CORRUPTED_BASELINE)\n","\n","print(f\"Clean Baseline is 1: {ioi_metric(clean_logits).item():.4f}\")\n","print(f\"Corrupted Baseline is 0: {ioi_metric(corrupted_logits).item():.4f}\")"]},{"cell_type":"markdown","metadata":{},"source":[" ## Patching\n"," In the following cells, we use the patching module to call activation patching utilities"]},{"cell_type":"code","execution_count":12,"metadata":{},"outputs":[],"source":["# Whether to do the runs by head and by position, which are much slower\n","DO_SLOW_RUNS = False"]},{"cell_type":"markdown","metadata":{},"source":[" ### Patching Single Activation Types\n"," We start by patching single types of activation\n"," The general syntax is that the functions are called get_act_patch_... and take in (model, corrupted_tokens, clean_cache, patching_metric)"]},{"cell_type":"markdown","metadata":{},"source":[" We can patch the residual stream at the start of each block over each layer and position\n"," resid_pre -> attn_out, mlp_out, resid_mid all also work"]},{"cell_type":"code","execution_count":13,"metadata":{},"outputs":[{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"3f4b44886e5a47b1b3f3b51ec96e9b85","version_major":2,"version_minor":0},"text/plain":["  0%|          | 0/180 [00:00<?, ?it/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["<html>\n","<head><meta charset=\"utf-8\" /></head>\n","<body>\n","    <div>            <script src=\"https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js?config=TeX-AMS-MML_SVG\"></script><script type=\"text/javascript\">if (window.MathJax && window.MathJax.Hub && window.MathJax.Hub.Config) {window.MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}</script>                <script type=\"text/javascript\">window.PlotlyConfig = {MathJaxConfig: 'local'};</script>\n","        <script src=\"https://cdn.plot.ly/plotly-2.16.1.min.js\"></script>                <div id=\"1c8d77d6-ef47-4c62-a969-9532ea08d5fa\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div>            <script type=\"text/javascript\">                                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById(\"1c8d77d6-ef47-4c62-a969-9532ea08d5fa\")) {                    Plotly.newPlot(                        \"1c8d77d6-ef47-4c62-a969-9532ea08d5fa\",                        [{\"coloraxis\":\"coloraxis\",\"name\":\"0\",\"x\":[\"<|endoftext|> 0\",\"When 1\",\" John 2\",\" and 3\",\" Mary 4\",\" went 5\",\" to 6\",\" the 7\",\" shops 8\",\", 9\",\" John 10\",\" gave 11\",\" the 12\",\" bag 13\",\" to 14\"],\"z\":[[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.99999994,0.0,0.0,0.0,0.0],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0006502,-0.00024795762,0.000008927548,-0.00036488837,-0.000048866583],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.001051,-0.000026782645,-0.00002094282,-0.00045913106,-0.0005944539],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.000266,0.0008683216,0.00051635463,-0.0009931058,-0.0008661065],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9949083,0.005430064,0.0016051463,-0.0006189208,-0.0016321304],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.96756655,0.031341735,0.0028421823,-0.0012302229,-0.000986192],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9675207,0.031001415,0.0017822877,-0.0004862829,-0.0006460726],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9228318,0.051346023,0.00472905,0.0009341035,0.017046316],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.65654737,0.02385639,0.002356403,-0.000017116728,0.31869277],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027301382,0.031425238,0.0018207164,0.0007996197,0.9383891],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.026841983,0.020980678,0.0012512665,0.0003241103,1.0048288],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.0056879222,0.014263134,0.00048705481,-0.000090651534,0.99142164]],\"type\":\"heatmap\",\"xaxis\":\"x\",\"yaxis\":\"y\",\"hovertemplate\":\"x: %{x}<br>y: %{y}<br>color: %{z}<extra></extra>\"}],                        {\"template\":{\"data\":{\"histogram2dcontour\":[{\"type\":\"histogram2dcontour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"choropleth\":[{\"type\":\"choropleth\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"histogram2d\":[{\"type\":\"histogram2d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmap\":[{\"type\":\"heatmap\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmapgl\":[{\"type\":\"heatmapgl\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"contourcarpet\":[{\"type\":\"contourcarpet\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"contour\":[{\"type\":\"contour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"surface\":[{\"type\":\"surface\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"mesh3d\":[{\"type\":\"mesh3d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"scatter\":[{\"fillpattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2},\"type\":\"scatter\"}],\"parcoords\":[{\"type\":\"parcoords\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolargl\":[{\"type\":\"scatterpolargl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"bar\":[{\"error_x\":{\"color\":\"#2a3f5f\"},\"error_y\":{\"color\":\"#2a3f5f\"},\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"bar\"}],\"scattergeo\":[{\"type\":\"scattergeo\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolar\":[{\"type\":\"scatterpolar\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"histogram\":[{\"marker\":{\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"histogram\"}],\"scattergl\":[{\"type\":\"scattergl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatter3d\":[{\"type\":\"scatter3d\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattermapbox\":[{\"type\":\"scattermapbox\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterternary\":[{\"type\":\"scatterternary\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattercarpet\":[{\"type\":\"scattercarpet\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"carpet\":[{\"aaxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"baxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"type\":\"carpet\"}],\"table\":[{\"cells\":{\"fill\":{\"color\":\"#EBF0F8\"},\"line\":{\"color\":\"white\"}},\"header\":{\"fill\":{\"color\":\"#C8D4E3\"},\"line\":{\"color\":\"white\"}},\"type\":\"table\"}],\"barpolar\":[{\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"barpolar\"}],\"pie\":[{\"automargin\":true,\"type\":\"pie\"}]},\"layout\":{\"autotypenumbers\":\"strict\",\"colorway\":[\"#636efa\",\"#EF553B\",\"#00cc96\",\"#ab63fa\",\"#FFA15A\",\"#19d3f3\",\"#FF6692\",\"#B6E880\",\"#FF97FF\",\"#FECB52\"],\"font\":{\"color\":\"#2a3f5f\"},\"hovermode\":\"closest\",\"hoverlabel\":{\"align\":\"left\"},\"paper_bgcolor\":\"white\",\"plot_bgcolor\":\"#E5ECF6\",\"polar\":{\"bgcolor\":\"#E5ECF6\",\"angularaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"radialaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"ternary\":{\"bgcolor\":\"#E5ECF6\",\"aaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"baxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"caxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"coloraxis\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"colorscale\":{\"sequential\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"sequentialminus\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"diverging\":[[0,\"#8e0152\"],[0.1,\"#c51b7d\"],[0.2,\"#de77ae\"],[0.3,\"#f1b6da\"],[0.4,\"#fde0ef\"],[0.5,\"#f7f7f7\"],[0.6,\"#e6f5d0\"],[0.7,\"#b8e186\"],[0.8,\"#7fbc41\"],[0.9,\"#4d9221\"],[1,\"#276419\"]]},\"xaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"yaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"scene\":{\"xaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"yaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"zaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2}},\"shapedefaults\":{\"line\":{\"color\":\"#2a3f5f\"}},\"annotationdefaults\":{\"arrowcolor\":\"#2a3f5f\",\"arrowhead\":0,\"arrowwidth\":1},\"geo\":{\"bgcolor\":\"white\",\"landcolor\":\"#E5ECF6\",\"subunitcolor\":\"white\",\"showland\":true,\"showlakes\":true,\"lakecolor\":\"white\"},\"title\":{\"x\":0.05},\"mapbox\":{\"style\":\"light\"}}},\"xaxis\":{\"anchor\":\"y\",\"domain\":[0.0,1.0],\"title\":{\"text\":\"Position\"}},\"yaxis\":{\"anchor\":\"x\",\"domain\":[0.0,1.0],\"autorange\":\"reversed\",\"title\":{\"text\":\"Layer\"}},\"coloraxis\":{\"colorscale\":[[0.0,\"rgb(103,0,31)\"],[0.1,\"rgb(178,24,43)\"],[0.2,\"rgb(214,96,77)\"],[0.3,\"rgb(244,165,130)\"],[0.4,\"rgb(253,219,199)\"],[0.5,\"rgb(247,247,247)\"],[0.6,\"rgb(209,229,240)\"],[0.7,\"rgb(146,197,222)\"],[0.8,\"rgb(67,147,195)\"],[0.9,\"rgb(33,102,172)\"],[1.0,\"rgb(5,48,97)\"]],\"cmid\":0.0},\"title\":{\"text\":\"resid_pre Activation Patching\"}},                        {\"responsive\": true}                    ).then(function(){\n","                            \n","var gd = document.getElementById('1c8d77d6-ef47-4c62-a969-9532ea08d5fa');\n","var x = new MutationObserver(function (mutations, observer) {{\n","        var display = window.getComputedStyle(gd).display;\n","        if (!display || display === 'none') {{\n","            console.log([gd, 'removed!']);\n","            Plotly.purge(gd);\n","            observer.disconnect();\n","        }}\n","}});\n","\n","// Listen for the removal of the full notebook cells\n","var notebookContainer = gd.closest('#notebook-container');\n","if (notebookContainer) {{\n","    x.observe(notebookContainer, {childList: true});\n","}}\n","\n","// Listen for the clearing of the current output cell\n","var outputEl = gd.closest('.output');\n","if (outputEl) {{\n","    x.observe(outputEl, {childList: true});\n","}}\n","\n","                        })                };                            </script>        </div>\n","</body>\n","</html>"]},"metadata":{},"output_type":"display_data"}],"source":["resid_pre_act_patch_results = patching.get_act_patch_resid_pre(model, corrupted_tokens, clean_cache, ioi_metric)\n","imshow(resid_pre_act_patch_results, \n","       yaxis=\"Layer\", \n","       xaxis=\"Position\", \n","       x=[f\"{tok} {i}\" for i, tok in enumerate(model.to_str_tokens(clean_tokens[0]))],\n","       title=\"resid_pre Activation Patching\")"]},{"cell_type":"markdown","metadata":{},"source":[" We can patch head outputs over each head in each layer, patching across all positions at once\n"," out -> q, k, v, pattern all also work"]},{"cell_type":"code","execution_count":14,"metadata":{},"outputs":[{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"a99b6bfcba454ea1b862da644c1f3154","version_major":2,"version_minor":0},"text/plain":["  0%|          | 0/144 [00:00<?, ?it/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["<html>\n","<head><meta charset=\"utf-8\" /></head>\n","<body>\n","    <div>            <script src=\"https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js?config=TeX-AMS-MML_SVG\"></script><script type=\"text/javascript\">if (window.MathJax && window.MathJax.Hub && window.MathJax.Hub.Config) {window.MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}</script>                <script type=\"text/javascript\">window.PlotlyConfig = {MathJaxConfig: 'local'};</script>\n","        <script src=\"https://cdn.plot.ly/plotly-2.16.1.min.js\"></script>                <div id=\"3c5828c1-5f79-4630-8146-4fec588a81be\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div>            <script type=\"text/javascript\">                                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById(\"3c5828c1-5f79-4630-8146-4fec588a81be\")) {                    Plotly.newPlot(                        \"3c5828c1-5f79-4630-8146-4fec588a81be\",                        [{\"coloraxis\":\"coloraxis\",\"name\":\"0\",\"z\":[[0.000949475,0.01612389,0.0018556548,0.0034395899,-0.009822988,0.011059554,-0.004064585,-0.0015793706,-0.0012084075,0.0038287437,-0.0042575346,-0.0011430618],[-0.0010770785,-0.00037838038,0.0000025507281,-0.0002605099,-0.00014122979,0.0038325363,-0.0004281867,-0.0014292804,-0.0009218869,0.0006933953,0.0004336238,-0.0035704488],[-0.000496855,0.0008060301,0.00054236536,-0.0005317261,-0.0007156135,-0.0010388176,-0.0009492065,-0.00008638913,0.0002770225,0.0021078077,-0.00019734581,-0.0016404538],[0.11626182,0.00025030697,-0.0014675413,-0.00039700742,0.018962113,-0.00018828402,0.01116984,-0.0013302383,-0.000735281,-0.00030299966,-0.00014552576,-0.00022278597],[-0.0016504889,0.00029246113,-0.0014359257,0.030842163,-0.007431479,-0.00028242602,0.00601747,-0.011006527,-0.0012657653,0.0014903636,-0.00017975921,0.0029445807],[-0.004211588,0.0029607913,0.0020455497,0.0013399713,-0.0012184426,0.34349898,0.00056297256,-0.0001253213,-0.005152605,0.016240286,0.017090013,-0.004174804],[0.03977555,0.015227075,-0.0010225064,0.0008079767,-0.0049347864,-0.002123347,-0.014273841,0.0013753123,0.0014840539,0.13027029,-0.00033568926,0.001291541],[0.00037237274,0.019514346,0.00022292022,0.12424979,-0.00040388768,-0.0076521845,0.0013002673,-0.0011247369,-0.0074497373,0.19224228,-0.0032751013,-0.00050229207],[-0.0010082088,0.00003097792,-0.00085925974,0.012359889,-0.00040469316,-0.0043288204,0.31855473,0.0023298552,0.0021180105,0.00014116267,0.27793574,0.005737863],[0.005889967,-0.0009691425,0.009126103,0.020675799,-0.037004486,0.014263471,-0.048284512,0.05834116,0.00065117405,0.2636095,0.0004923241,-0.0026102003],[0.08374242,0.020676069,-0.0037430592,0.010851469,-0.0010961419,0.00047389843,0.048179664,-0.47991198,0.00018429011,0.011862027,0.06088635,0.00084617053],[0.0053282026,-0.0114937825,-0.11350821,0.0063299676,0.00031608893,-0.0011600779,-0.022669563,0.0040708277,0.0073162266,-0.008345379,-0.27817535,0.0036348547]],\"type\":\"heatmap\",\"xaxis\":\"x\",\"yaxis\":\"y\",\"hovertemplate\":\"x: %{x}<br>y: %{y}<br>color: %{z}<extra></extra>\"}],                        {\"template\":{\"data\":{\"histogram2dcontour\":[{\"type\":\"histogram2dcontour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"choropleth\":[{\"type\":\"choropleth\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"histogram2d\":[{\"type\":\"histogram2d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmap\":[{\"type\":\"heatmap\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmapgl\":[{\"type\":\"heatmapgl\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"contourcarpet\":[{\"type\":\"contourcarpet\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"contour\":[{\"type\":\"contour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"surface\":[{\"type\":\"surface\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"mesh3d\":[{\"type\":\"mesh3d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"scatter\":[{\"fillpattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2},\"type\":\"scatter\"}],\"parcoords\":[{\"type\":\"parcoords\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolargl\":[{\"type\":\"scatterpolargl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"bar\":[{\"error_x\":{\"color\":\"#2a3f5f\"},\"error_y\":{\"color\":\"#2a3f5f\"},\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"bar\"}],\"scattergeo\":[{\"type\":\"scattergeo\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolar\":[{\"type\":\"scatterpolar\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"histogram\":[{\"marker\":{\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"histogram\"}],\"scattergl\":[{\"type\":\"scattergl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatter3d\":[{\"type\":\"scatter3d\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattermapbox\":[{\"type\":\"scattermapbox\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterternary\":[{\"type\":\"scatterternary\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattercarpet\":[{\"type\":\"scattercarpet\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"carpet\":[{\"aaxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"baxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"type\":\"carpet\"}],\"table\":[{\"cells\":{\"fill\":{\"color\":\"#EBF0F8\"},\"line\":{\"color\":\"white\"}},\"header\":{\"fill\":{\"color\":\"#C8D4E3\"},\"line\":{\"color\":\"white\"}},\"type\":\"table\"}],\"barpolar\":[{\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"barpolar\"}],\"pie\":[{\"automargin\":true,\"type\":\"pie\"}]},\"layout\":{\"autotypenumbers\":\"strict\",\"colorway\":[\"#636efa\",\"#EF553B\",\"#00cc96\",\"#ab63fa\",\"#FFA15A\",\"#19d3f3\",\"#FF6692\",\"#B6E880\",\"#FF97FF\",\"#FECB52\"],\"font\":{\"color\":\"#2a3f5f\"},\"hovermode\":\"closest\",\"hoverlabel\":{\"align\":\"left\"},\"paper_bgcolor\":\"white\",\"plot_bgcolor\":\"#E5ECF6\",\"polar\":{\"bgcolor\":\"#E5ECF6\",\"angularaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"radialaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"ternary\":{\"bgcolor\":\"#E5ECF6\",\"aaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"baxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"caxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"coloraxis\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"colorscale\":{\"sequential\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"sequentialminus\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"diverging\":[[0,\"#8e0152\"],[0.1,\"#c51b7d\"],[0.2,\"#de77ae\"],[0.3,\"#f1b6da\"],[0.4,\"#fde0ef\"],[0.5,\"#f7f7f7\"],[0.6,\"#e6f5d0\"],[0.7,\"#b8e186\"],[0.8,\"#7fbc41\"],[0.9,\"#4d9221\"],[1,\"#276419\"]]},\"xaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"yaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"scene\":{\"xaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"yaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"zaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2}},\"shapedefaults\":{\"line\":{\"color\":\"#2a3f5f\"}},\"annotationdefaults\":{\"arrowcolor\":\"#2a3f5f\",\"arrowhead\":0,\"arrowwidth\":1},\"geo\":{\"bgcolor\":\"white\",\"landcolor\":\"#E5ECF6\",\"subunitcolor\":\"white\",\"showland\":true,\"showlakes\":true,\"lakecolor\":\"white\"},\"title\":{\"x\":0.05},\"mapbox\":{\"style\":\"light\"}}},\"xaxis\":{\"anchor\":\"y\",\"domain\":[0.0,1.0],\"title\":{\"text\":\"Head\"}},\"yaxis\":{\"anchor\":\"x\",\"domain\":[0.0,1.0],\"autorange\":\"reversed\",\"title\":{\"text\":\"Layer\"}},\"coloraxis\":{\"colorscale\":[[0.0,\"rgb(103,0,31)\"],[0.1,\"rgb(178,24,43)\"],[0.2,\"rgb(214,96,77)\"],[0.3,\"rgb(244,165,130)\"],[0.4,\"rgb(253,219,199)\"],[0.5,\"rgb(247,247,247)\"],[0.6,\"rgb(209,229,240)\"],[0.7,\"rgb(146,197,222)\"],[0.8,\"rgb(67,147,195)\"],[0.9,\"rgb(33,102,172)\"],[1.0,\"rgb(5,48,97)\"]],\"cmid\":0.0},\"title\":{\"text\":\"attn_head_out Activation Patching (All Pos)\"}},                        {\"responsive\": true}                    ).then(function(){\n","                            \n","var gd = document.getElementById('3c5828c1-5f79-4630-8146-4fec588a81be');\n","var x = new MutationObserver(function (mutations, observer) {{\n","        var display = window.getComputedStyle(gd).display;\n","        if (!display || display === 'none') {{\n","            console.log([gd, 'removed!']);\n","            Plotly.purge(gd);\n","            observer.disconnect();\n","        }}\n","}});\n","\n","// Listen for the removal of the full notebook cells\n","var notebookContainer = gd.closest('#notebook-container');\n","if (notebookContainer) {{\n","    x.observe(notebookContainer, {childList: true});\n","}}\n","\n","// Listen for the clearing of the current output cell\n","var outputEl = gd.closest('.output');\n","if (outputEl) {{\n","    x.observe(outputEl, {childList: true});\n","}}\n","\n","                        })                };                            </script>        </div>\n","</body>\n","</html>"]},"metadata":{},"output_type":"display_data"}],"source":["attn_head_out_all_pos_act_patch_results = patching.get_act_patch_attn_head_out_all_pos(model, corrupted_tokens, clean_cache, ioi_metric)\n","imshow(attn_head_out_all_pos_act_patch_results, \n","       yaxis=\"Layer\", \n","       xaxis=\"Head\", \n","       title=\"attn_head_out Activation Patching (All Pos)\")"]},{"cell_type":"markdown","metadata":{},"source":[" We can patch head outputs over each head in each layer, patching on each position in turn\n"," out -> q, k, v, pattern all also work, though note that pattern has output shape [layer, pos, head]\n"," We reshape it to plot nicely"]},{"cell_type":"code","execution_count":15,"metadata":{},"outputs":[],"source":["ALL_HEAD_LABELS = [f\"L{i}H{j}\" for i in range(model.cfg.n_layers) for j in range(model.cfg.n_heads)]\n","if DO_SLOW_RUNS:\n","    attn_head_out_act_patch_results = patching.get_act_patch_attn_head_out_by_pos(model, corrupted_tokens, clean_cache, ioi_metric)\n","    attn_head_out_act_patch_results = einops.rearrange(attn_head_out_act_patch_results, \"layer pos head -> (layer head) pos\")\n","    imshow(attn_head_out_act_patch_results, \n","        yaxis=\"Head Label\", \n","        xaxis=\"Pos\", \n","        x=[f\"{tok} {i}\" for i, tok in enumerate(model.to_str_tokens(clean_tokens[0]))],\n","        y=ALL_HEAD_LABELS,\n","        title=\"attn_head_out Activation Patching By Pos\")"]},{"cell_type":"markdown","metadata":{},"source":[" ### Patching multiple activation types\n"," Some utilities are provided to patch multiple activations types *in turn*. Note that this is *not* a utility to patch multiple activations at once, it's just a useful scan to get a sense for what's going on in a model\n"," By block: We patch the residual stream at the start of each block, attention output and MLP output over each layer and position"]},{"cell_type":"code","execution_count":16,"metadata":{},"outputs":[{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"273f73067c394332989ab06fb5f15186","version_major":2,"version_minor":0},"text/plain":["  0%|          | 0/180 [00:00<?, ?it/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"a3ef4fefd564453ba35fee5a836d6657","version_major":2,"version_minor":0},"text/plain":["  0%|          | 0/180 [00:00<?, ?it/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"c145d75160f741fd98be311cda941ccc","version_major":2,"version_minor":0},"text/plain":["  0%|          | 0/180 [00:00<?, ?it/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["<html>\n","<head><meta charset=\"utf-8\" /></head>\n","<body>\n","    <div>            <script src=\"https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js?config=TeX-AMS-MML_SVG\"></script><script type=\"text/javascript\">if (window.MathJax && window.MathJax.Hub && window.MathJax.Hub.Config) {window.MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}</script>                <script type=\"text/javascript\">window.PlotlyConfig = {MathJaxConfig: 'local'};</script>\n","        <script src=\"https://cdn.plot.ly/plotly-2.16.1.min.js\"></script>                <div id=\"bbb2904f-6b80-4410-a47c-034ec495c09e\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div>            <script type=\"text/javascript\">                                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById(\"bbb2904f-6b80-4410-a47c-034ec495c09e\")) {                    Plotly.newPlot(                        \"bbb2904f-6b80-4410-a47c-034ec495c09e\",                        [{\"coloraxis\":\"coloraxis\",\"name\":\"0\",\"x\":[\"<|endoftext|>_0\",\"When_1\",\" John_2\",\" and_3\",\" Mary_4\",\" went_5\",\" to_6\",\" the_7\",\" shops_8\",\",_9\",\" John_10\",\" gave_11\",\" the_12\",\" bag_13\",\" to_14\"],\"z\":[[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.99999994,0.0,0.0,0.0,0.0],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0006502,-0.00024795762,0.000008927548,-0.00036488837,-0.000048866583],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.001051,-0.000026782645,-0.00002094282,-0.00045913106,-0.0005944539],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.000266,0.0008683216,0.00051635463,-0.0009931058,-0.0008661065],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9949083,0.005430064,0.0016051463,-0.0006189208,-0.0016321304],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.96756655,0.031341735,0.0028421823,-0.0012302229,-0.000986192],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9675207,0.031001415,0.0017822877,-0.0004862829,-0.0006460726],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9228318,0.051346023,0.00472905,0.0009341035,0.017046316],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.65654737,0.02385639,0.002356403,-0.000017116728,0.31869277],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027301382,0.031425238,0.0018207164,0.0007996197,0.9383891],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.026841983,0.020980678,0.0012512665,0.0003241103,1.0048288],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.0056879222,0.014263134,0.00048705481,-0.000090651534,0.99142164]],\"type\":\"heatmap\",\"xaxis\":\"x\",\"yaxis\":\"y\",\"hovertemplate\":\"x: %{x}<br>y: %{y}<br>color: %{z}<extra></extra>\"},{\"coloraxis\":\"coloraxis\",\"name\":\"1\",\"x\":[\"<|endoftext|>_0\",\"When_1\",\" John_2\",\" and_3\",\" Mary_4\",\" went_5\",\" to_6\",\" the_7\",\" shops_8\",\",_9\",\" John_10\",\" gave_11\",\" the_12\",\" bag_13\",\" to_14\"],\"z\":[[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.035457034,-0.00024795762,0.000008927548,-0.00036488837,-0.000048866583],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.002984419,0.00007954244,0.000021077069,0.00008041506,-0.00059626624],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.0019119721,0.0006667469,0.00039509436,-0.00070453796,-0.00027272655],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.15463124,0.0038024643,0.0005173951,-0.00012028697,-0.0005607575],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.0054061003,0.019582007,0.0010079069,-0.00024272192,0.00079287373],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.35209733,0.0010533165,0.00022416202,0.00013310774,0.000082730854],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.11985986,0.021244075,0.0027277018,0.0013412467,0.017973103],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.013310539,0.011509422,0.00037495705,-0.000040543153,0.29760385],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.0015007679,0.017352335,0.00058451947,0.0010121021,0.5697317],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.00012867752,0.0063013053,0.00014149828,0.00031239708,0.27152342],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.0009372919,0.00008689257,0.0003319974,9.733042e-7,-0.19297722],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.40617895]],\"type\":\"heatmap\",\"xaxis\":\"x2\",\"yaxis\":\"y2\",\"hovertemplate\":\"x: %{x}<br>y: %{y}<br>color: %{z}<extra></extra>\"},{\"coloraxis\":\"coloraxis\",\"name\":\"2\",\"x\":[\"<|endoftext|>_0\",\"When_1\",\" John_2\",\" and_3\",\" Mary_4\",\" went_5\",\" to_6\",\" the_7\",\" shops_8\",\",_9\",\" John_10\",\" gave_11\",\" the_12\",\" bag_13\",\" to_14\"],\"z\":[[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.8507897,-0.00027833143,-0.00007276288,-0.00047356283,0.000039939034],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.008865123,0.00022137635,0.00015015734,-0.000048598085,0.00030407365],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.013549603,0.000058264002,-0.00032958091,-0.00063821906,0.0007726693],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0019467425,0.0004991037,0.00017318102,0.00016834805,0.0004076802],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.019786568,0.004128219,-0.000049067956,-0.00016962342,0.0007913634],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.096524656,-0.0018828737,-0.00048349722,0.00070940447,-0.00018378667],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.015899561,-0.00085006375,0.00012364319,0.000028091572,-0.0072379597],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0103607895,0.003151022,0.0005308871,0.00023580811,0.008497046],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.012533204,0.000022184622,-0.00035351078,0.00008618776,-0.021631954],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.00033427964,0.0008096548,0.000016479047,0.00012914739,0.031623926],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0013593368,-0.00019445946,-0.00009867291,-0.00014169967,0.028763823],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.020447912]],\"type\":\"heatmap\",\"xaxis\":\"x3\",\"yaxis\":\"y3\",\"hovertemplate\":\"x: %{x}<br>y: %{y}<br>color: %{z}<extra></extra>\"}],                        {\"template\":{\"data\":{\"histogram2dcontour\":[{\"type\":\"histogram2dcontour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"choropleth\":[{\"type\":\"choropleth\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"histogram2d\":[{\"type\":\"histogram2d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmap\":[{\"type\":\"heatmap\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmapgl\":[{\"type\":\"heatmapgl\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"contourcarpet\":[{\"type\":\"contourcarpet\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"contour\":[{\"type\":\"contour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"surface\":[{\"type\":\"surface\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"mesh3d\":[{\"type\":\"mesh3d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"scatter\":[{\"fillpattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2},\"type\":\"scatter\"}],\"parcoords\":[{\"type\":\"parcoords\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolargl\":[{\"type\":\"scatterpolargl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"bar\":[{\"error_x\":{\"color\":\"#2a3f5f\"},\"error_y\":{\"color\":\"#2a3f5f\"},\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"bar\"}],\"scattergeo\":[{\"type\":\"scattergeo\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolar\":[{\"type\":\"scatterpolar\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"histogram\":[{\"marker\":{\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"histogram\"}],\"scattergl\":[{\"type\":\"scattergl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatter3d\":[{\"type\":\"scatter3d\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattermapbox\":[{\"type\":\"scattermapbox\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterternary\":[{\"type\":\"scatterternary\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattercarpet\":[{\"type\":\"scattercarpet\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"carpet\":[{\"aaxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"baxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"type\":\"carpet\"}],\"table\":[{\"cells\":{\"fill\":{\"color\":\"#EBF0F8\"},\"line\":{\"color\":\"white\"}},\"header\":{\"fill\":{\"color\":\"#C8D4E3\"},\"line\":{\"color\":\"white\"}},\"type\":\"table\"}],\"barpolar\":[{\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"barpolar\"}],\"pie\":[{\"automargin\":true,\"type\":\"pie\"}]},\"layout\":{\"autotypenumbers\":\"strict\",\"colorway\":[\"#636efa\",\"#EF553B\",\"#00cc96\",\"#ab63fa\",\"#FFA15A\",\"#19d3f3\",\"#FF6692\",\"#B6E880\",\"#FF97FF\",\"#FECB52\"],\"font\":{\"color\":\"#2a3f5f\"},\"hovermode\":\"closest\",\"hoverlabel\":{\"align\":\"left\"},\"paper_bgcolor\":\"white\",\"plot_bgcolor\":\"#E5ECF6\",\"polar\":{\"bgcolor\":\"#E5ECF6\",\"angularaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"radialaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"ternary\":{\"bgcolor\":\"#E5ECF6\",\"aaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"baxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"caxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"coloraxis\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"colorscale\":{\"sequential\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"sequentialminus\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"diverging\":[[0,\"#8e0152\"],[0.1,\"#c51b7d\"],[0.2,\"#de77ae\"],[0.3,\"#f1b6da\"],[0.4,\"#fde0ef\"],[0.5,\"#f7f7f7\"],[0.6,\"#e6f5d0\"],[0.7,\"#b8e186\"],[0.8,\"#7fbc41\"],[0.9,\"#4d9221\"],[1,\"#276419\"]]},\"xaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"yaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"scene\":{\"xaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"yaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"zaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2}},\"shapedefaults\":{\"line\":{\"color\":\"#2a3f5f\"}},\"annotationdefaults\":{\"arrowcolor\":\"#2a3f5f\",\"arrowhead\":0,\"arrowwidth\":1},\"geo\":{\"bgcolor\":\"white\",\"landcolor\":\"#E5ECF6\",\"subunitcolor\":\"white\",\"showland\":true,\"showlakes\":true,\"lakecolor\":\"white\"},\"title\":{\"x\":0.05},\"mapbox\":{\"style\":\"light\"}}},\"xaxis\":{\"anchor\":\"y\",\"domain\":[0.0,0.31999999999999995],\"title\":{\"text\":\"Position\"}},\"yaxis\":{\"anchor\":\"x\",\"domain\":[0.0,1.0],\"autorange\":\"reversed\",\"title\":{\"text\":\"Layer\"}},\"xaxis2\":{\"anchor\":\"y2\",\"domain\":[0.33999999999999997,0.6599999999999999],\"matches\":\"x\",\"title\":{\"text\":\"Position\"}},\"yaxis2\":{\"anchor\":\"x2\",\"domain\":[0.0,1.0],\"matches\":\"y\",\"showticklabels\":false},\"xaxis3\":{\"anchor\":\"y3\",\"domain\":[0.6799999999999999,0.9999999999999999],\"matches\":\"x\",\"title\":{\"text\":\"Position\"}},\"yaxis3\":{\"anchor\":\"x3\",\"domain\":[0.0,1.0],\"matches\":\"y\",\"showticklabels\":false},\"annotations\":[{\"font\":{},\"showarrow\":false,\"text\":\"Residual Stream\",\"x\":0.15999999999999998,\"xanchor\":\"center\",\"xref\":\"paper\",\"y\":1.0,\"yanchor\":\"bottom\",\"yref\":\"paper\"},{\"font\":{},\"showarrow\":false,\"text\":\"Attn Output\",\"x\":0.49999999999999994,\"xanchor\":\"center\",\"xref\":\"paper\",\"y\":1.0,\"yanchor\":\"bottom\",\"yref\":\"paper\"},{\"font\":{},\"showarrow\":false,\"text\":\"MLP Output\",\"x\":0.8399999999999999,\"xanchor\":\"center\",\"xref\":\"paper\",\"y\":1.0,\"yanchor\":\"bottom\",\"yref\":\"paper\"}],\"coloraxis\":{\"colorscale\":[[0.0,\"rgb(103,0,31)\"],[0.1,\"rgb(178,24,43)\"],[0.2,\"rgb(214,96,77)\"],[0.3,\"rgb(244,165,130)\"],[0.4,\"rgb(253,219,199)\"],[0.5,\"rgb(247,247,247)\"],[0.6,\"rgb(209,229,240)\"],[0.7,\"rgb(146,197,222)\"],[0.8,\"rgb(67,147,195)\"],[0.9,\"rgb(33,102,172)\"],[1.0,\"rgb(5,48,97)\"]],\"cmid\":0.0,\"cmin\":-1,\"cmax\":1},\"title\":{\"text\":\"Activation Patching Per Block\"}},                        {\"responsive\": true}                    ).then(function(){\n","                            \n","var gd = document.getElementById('bbb2904f-6b80-4410-a47c-034ec495c09e');\n","var x = new MutationObserver(function (mutations, observer) {{\n","        var display = window.getComputedStyle(gd).display;\n","        if (!display || display === 'none') {{\n","            console.log([gd, 'removed!']);\n","            Plotly.purge(gd);\n","            observer.disconnect();\n","        }}\n","}});\n","\n","// Listen for the removal of the full notebook cells\n","var notebookContainer = gd.closest('#notebook-container');\n","if (notebookContainer) {{\n","    x.observe(notebookContainer, {childList: true});\n","}}\n","\n","// Listen for the clearing of the current output cell\n","var outputEl = gd.closest('.output');\n","if (outputEl) {{\n","    x.observe(outputEl, {childList: true});\n","}}\n","\n","                        })                };                            </script>        </div>\n","</body>\n","</html>"]},"metadata":{},"output_type":"display_data"}],"source":["every_block_result = patching.get_act_patch_block_every(model, corrupted_tokens, clean_cache, ioi_metric)\n","imshow(every_block_result, facet_col=0, facet_labels=[\"Residual Stream\", \"Attn Output\", \"MLP Output\"], title=\"Activation Patching Per Block\", xaxis=\"Position\", yaxis=\"Layer\", zmax=1, zmin=-1, x= [f\"{tok}_{i}\" for i, tok in enumerate(model.to_str_tokens(clean_tokens[0]))])"]},{"cell_type":"markdown","metadata":{},"source":[" By head: For each head we patch the output, query, key, value or pattern. We do all positions at once so it's not super slow."]},{"cell_type":"code","execution_count":17,"metadata":{},"outputs":[{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"e6eefdb1a9984a468e01d5b315d51cd3","version_major":2,"version_minor":0},"text/plain":["  0%|          | 0/144 [00:00<?, ?it/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"5234ea6d3a0e40cb83ad760f547befe7","version_major":2,"version_minor":0},"text/plain":["  0%|          | 0/144 [00:00<?, ?it/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"93372689514d41a4b94e1e5e52492f98","version_major":2,"version_minor":0},"text/plain":["  0%|          | 0/144 [00:00<?, ?it/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"acfbbc29dd5640d1a3c4588daef06611","version_major":2,"version_minor":0},"text/plain":["  0%|          | 0/144 [00:00<?, ?it/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"841474694153493e9a74050fde490f84","version_major":2,"version_minor":0},"text/plain":["  0%|          | 0/144 [00:00<?, ?it/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["<html>\n","<head><meta charset=\"utf-8\" /></head>\n","<body>\n","    <div>            <script src=\"https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js?config=TeX-AMS-MML_SVG\"></script><script type=\"text/javascript\">if (window.MathJax && window.MathJax.Hub && window.MathJax.Hub.Config) {window.MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}</script>                <script type=\"text/javascript\">window.PlotlyConfig = {MathJaxConfig: 'local'};</script>\n","        <script src=\"https://cdn.plot.ly/plotly-2.16.1.min.js\"></script>                <div id=\"4177a921-b03d-40a1-a628-08281ebb379e\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div>            <script type=\"text/javascript\">                                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById(\"4177a921-b03d-40a1-a628-08281ebb379e\")) {                    Plotly.newPlot(                        \"4177a921-b03d-40a1-a628-08281ebb379e\",                        [{\"coloraxis\":\"coloraxis\",\"name\":\"0\",\"z\":[[0.000949475,0.01612389,0.0018556548,0.0034395899,-0.009822988,0.011059554,-0.004064585,-0.0015793706,-0.0012084075,0.0038287437,-0.0042575346,-0.0011430618],[-0.0010770785,-0.00037838038,0.0000025507281,-0.0002605099,-0.00014122979,0.0038325363,-0.0004281867,-0.0014292804,-0.0009218869,0.0006933953,0.0004336238,-0.0035704488],[-0.000496855,0.0008060301,0.00054236536,-0.0005317261,-0.0007156135,-0.0010388176,-0.0009492065,-0.00008638913,0.0002770225,0.0021078077,-0.00019734581,-0.0016404538],[0.11626182,0.00025030697,-0.0014675413,-0.00039700742,0.018962113,-0.00018828402,0.01116984,-0.0013302383,-0.000735281,-0.00030299966,-0.00014552576,-0.00022278597],[-0.0016504889,0.00029246113,-0.0014359257,0.030842163,-0.007431479,-0.00028242602,0.00601747,-0.011006527,-0.0012657653,0.0014903636,-0.00017975921,0.0029445807],[-0.004211588,0.0029607913,0.0020455497,0.0013399713,-0.0012184426,0.34349898,0.00056297256,-0.0001253213,-0.005152605,0.016240286,0.017090013,-0.004174804],[0.03977555,0.015227075,-0.0010225064,0.0008079767,-0.0049347864,-0.002123347,-0.014273841,0.0013753123,0.0014840539,0.13027029,-0.00033568926,0.001291541],[0.00037237274,0.019514346,0.00022292022,0.12424979,-0.00040388768,-0.0076521845,0.0013002673,-0.0011247369,-0.0074497373,0.19224228,-0.0032751013,-0.00050229207],[-0.0010082088,0.00003097792,-0.00085925974,0.012359889,-0.00040469316,-0.0043288204,0.31855473,0.0023298552,0.0021180105,0.00014116267,0.27793574,0.005737863],[0.005889967,-0.0009691425,0.009126103,0.020675799,-0.037004486,0.014263471,-0.048284512,0.05834116,0.00065117405,0.2636095,0.0004923241,-0.0026102003],[0.08374242,0.020676069,-0.0037430592,0.010851469,-0.0010961419,0.00047389843,0.048179664,-0.47991198,0.00018429011,0.011862027,0.06088635,0.00084617053],[0.0053282026,-0.0114937825,-0.11350821,0.0063299676,0.00031608893,-0.0011600779,-0.022669563,0.0040708277,0.0073162266,-0.008345379,-0.27817535,0.0036348547]],\"type\":\"heatmap\",\"xaxis\":\"x\",\"yaxis\":\"y\",\"hovertemplate\":\"x: %{x}<br>y: %{y}<br>color: %{z}<extra></extra>\"},{\"coloraxis\":\"coloraxis\",\"name\":\"1\",\"z\":[[0.0004925926,0.010944033,0.0015592668,0.009421383,-0.0048536328,0.010262049,-0.0031708572,-0.00038774425,-0.0011035255,0.00051189086,-0.00082774484,-0.0007998211],[0.0009907901,-0.00029890507,0.00012021985,-0.0003556252,-0.000109547065,0.00700581,-0.00034444898,-0.0014763682,-0.0012057225,0.00025067615,0.00006860116,-0.0019844666],[-0.002012256,0.0025707984,-0.00051769713,0.0012822108,0.00022473258,0.0032741951,0.00019607044,0.00021295223,0.000040543153,0.012865907,0.00028561443,-0.0019235176],[0.12042162,-0.00030823535,0.0008745641,-0.00023906364,0.028209778,0.0020729364,0.0011321206,0.006186791,0.0006772854,0.00004675216,-0.0015923256,-0.0002591674],[-0.0012818415,0.0013887708,-0.0017389253,-0.00039492655,-0.0016222966,-0.0009207457,-0.0022054065,-0.000481752,-0.00074810174,0.0009411516,-0.00026319487,0.0014971432],[-0.004147081,0.0029570658,0.0004330868,0.00060026016,-0.0005218924,0.34353,-0.0004413431,-0.00008531514,-0.0055975057,0.028573323,0.014875175,-0.0022643418],[-0.0011904517,0.0006325806,-0.0011616218,0.00042325305,-0.005044132,0.0027662981,-0.00018143732,0.0012201878,0.0010537192,0.13032328,-0.000016781107,0.0010141159],[0.00036686854,0.020354811,0.0003358235,-0.001465192,-0.00016918711,-0.00059559505,0.00082234136,-0.00054575515,0.000090953596,-0.00041200971,-0.0030538593,0.00014696893],[0.00047104564,0.0005044065,0.0015873584,0.024632582,-0.000052591986,-0.0010191166,0.010948195,-0.0005386064,0.0018448142,0.00073595217,-0.027599381,0.009300224],[0.006474654,-0.0015574545,0.012203254,0.001424246,-0.026727738,0.0025531447,-0.04373378,-0.01309772,0.004638197,0.27937496,0.00020476306,-0.00025453581],[0.09814128,0.026011655,0.013917243,0.015725339,0.00055894506,-0.00028316438,0.052197833,-0.512864,0.0001743557,-0.00403646,0.069167964,0.0025175686],[0.0015773905,0.008838944,-0.08574924,-0.0070981393,-0.0009571943,-0.0030238882,-0.006626993,0.0003132697,-0.00009719617,-0.0061496715,-0.33735374,-0.0022051716]],\"type\":\"heatmap\",\"xaxis\":\"x2\",\"yaxis\":\"y2\",\"hovertemplate\":\"x: %{x}<br>y: %{y}<br>color: %{z}<extra></extra>\"},{\"coloraxis\":\"coloraxis\",\"name\":\"2\",\"z\":[[-0.00019781568,0.0053396807,0.0006521809,0.0035052039,-0.008983262,0.0034816097,-0.0008626831,-0.000033427965,0.0005173615,0.00044201434,-0.0039073126,-0.00018831757],[-0.00043979925,-0.00044634385,-0.00006709086,0.000072225885,-0.000035978694,-0.0019324786,-0.00015780953,0.00001530437,0.0002059713,0.00033730024,0.00035153062,-0.0005672685],[0.0002102337,-0.0007199766,0.00048678633,-0.0005975416,-0.0005923731,-0.00054478185,-0.00022721618,-0.0004804095,0.00020593773,0.0011836386,-0.00035703482,-0.00091007294],[0.0010391868,-0.000120354096,-0.00007783077,-0.0007270246,-0.0013100003,-0.002310926,0.01098753,-0.000051081686,0.00014404902,0.00015049297,-0.00008068356,-0.000020070203],[-0.00053712964,-0.00081274257,-0.00013304061,0.030610517,-0.0071862065,0.00014834497,0.0013340309,-0.011421825,-0.0005332364,0.0005125621,0.00037274192,0.0029560255],[0.000007920682,0.0000062090094,0.0015981655,0.00033924685,-0.0012587173,-0.000054202974,0.00063325185,-0.00027051143,0.00007413893,-0.0067042867,0.0031767976,-0.0017284539],[0.048632182,0.015315109,-0.00046466885,-0.00011605813,-0.00004883302,-0.0039534946,-0.017375292,-0.00015388275,0.0012203221,-0.00017992702,-0.00042707915,0.00012374388],[-0.000028695691,-0.0013851125,-0.000121629455,0.1332166,-0.00024426577,-0.007315354,0.0003331385,-0.0007948874,-0.007938806,0.20841402,-0.00019147243,-0.00020634048],[-0.0020486375,-0.0003768365,-0.0033148054,-0.009665918,-0.00031776703,-0.005140993,0.31717074,0.0028421488,0.0004723546,-0.0011536675,0.27267054,-0.0031753546],[-0.00043909444,0.00005692151,-0.0020628679,0.02006631,-0.007870473,0.0113156345,0.0030571818,0.06856223,-0.0027475369,-0.009278911,0.00050719216,-0.0013157058],[-0.012956424,-0.003045234,-0.017922994,-0.004358456,-0.001152325,0.0005002448,-0.003113298,0.01958523,0.00004322813,0.012969311,-0.0076964195,-0.0009145032],[0.004100564,-0.020459324,-0.035875153,0.01465541,0.0008436869,0.0017800726,-0.018042274,0.0035195013,0.008252545,-0.0017664464,0.044165857,0.0064741843]],\"type\":\"heatmap\",\"xaxis\":\"x3\",\"yaxis\":\"y3\",\"hovertemplate\":\"x: %{x}<br>y: %{y}<br>color: %{z}<extra></extra>\"},{\"coloraxis\":\"coloraxis\",\"name\":\"3\",\"z\":[[0.00019825199,-0.00049141794,0.00075165933,0.0071840254,0.001131785,-0.00014062568,0.0005671678,-0.0022284638,-0.000011008406,-0.0011850146,-0.003645259,-0.00013888044],[0.00009827016,-0.0006734594,0.00006262709,0.0003996924,0.00013777288,0.001314464,0.00016364935,-0.000053632415,0.000016579734,0.0003300508,0.000043026757,0.0033119528],[-0.0011031899,-0.0004916864,-0.0008953727,0.00097303564,0.0005671007,0.0026970594,0.00017076454,0.00009404132,0.00005490778,0.011643504,-0.00009689411,-0.00022788742],[-0.0014167617,-0.00039103333,0.0007130292,-0.000690811,0.0077598854,-0.00019734581,0.0014261927,0.006242303,0.0005358543,-0.000038462294,0.0000018459217,-0.0007054777],[-0.00015371494,0.00034085783,0.00040905626,-0.0009754186,0.0007706555,0.000046181605,-0.005579852,0.0002847418,0.0016971739,0.00016744187,-0.00008759738,0.00078226806],[0.000010706346,-5.7055763e-7,0.00025708656,-0.00048101362,-0.00018761276,0.000061553095,-0.00022557162,0.00017116728,0.0004125467,0.039515376,0.0017563441,-0.00021130769],[-0.00035455122,-0.0026965896,0.000053296793,-0.0007689774,0.00062335096,-0.0011566881,-0.0072853495,-0.000055243403,-0.00042754904,0.00014428396,-0.00002198325,0.000007987806],[0.000027655264,0.00009524956,-0.000017855096,0.00024047325,-0.00010894294,0.0003022613,0.00011931366,-0.00006816485,0.00010605659,0.0055083646,-0.0000013424885,-0.00050699076],[0.00057941803,-0.00006816485,-0.00020580349,0.0035947815,-0.00010682852,0.000058800997,0.00009135634,0.0002454069,0.0002878631,0.0003180355,-0.025732148,-0.00003456908],[-0.00006145241,0.00045248575,0.00066600856,0.00065969885,0.0005129313,0.0013581957,-0.0022120182,-0.009116974,0.00023345875,0.008354172,-0.00020691103,-0.0003669021],[0.005819419,0.00131584,-0.001080032,0.0012897287,0.0003227678,0.0000333944,0.00028138558,-0.13102688,0.000108875814,-0.0023642564,0.0013522886,-0.00009739754],[-0.00054085505,0.0015879625,-0.0047873813,-0.0037840053,0.00009276596,-0.00017344952,-0.0015512455,-0.00009934415,0.0009413529,-0.0004506734,-0.11361534,-0.00061116787]],\"type\":\"heatmap\",\"xaxis\":\"x4\",\"yaxis\":\"y4\",\"hovertemplate\":\"x: %{x}<br>y: %{y}<br>color: %{z}<extra></extra>\"},{\"coloraxis\":\"coloraxis\",\"name\":\"4\",\"z\":[[0.00063983,0.005318738,0.0011584669,-0.000059270867,-0.0010662714,0.0050790366,-0.0030815816,-0.0020518594,-0.0014402217,0.003492551,-0.0025671737,-0.0009158457],[-0.0007604526,0.0001687508,0.00012277058,-0.000349047,0.00001577424,0.0050093615,-0.00029742834,-0.0014442492,-0.001099431,0.00047403268,0.000051450872,-0.0034951689],[-0.0007240712,0.0017471481,-0.00015492317,0.000056988636,-0.00009733042,-0.00042362226,-0.0007914641,0.00027369984,0.00010209625,0.00042217906,0.00015190257,-0.00074323517],[0.11458065,0.000211744,-0.0009424941,0.0004295292,0.020043757,0.002104552,0.00007648828,-0.0015437276,-0.00084878836,-0.00058180094,0.0001198171,-0.000019331834],[-0.001126952,0.0012375395,-0.0012315989,-0.00059519225,-0.0007545457,-0.0005840496,0.0048124855,0.00018130307,-0.0005359214,0.00085811864,-0.0002987037,-0.000012149521],[-0.004241257,0.0029521994,0.00052175816,0.00095367024,0.00016260892,0.34350926,-0.0003045771,0.00010380792,-0.0053008157,0.024865739,0.014382985,-0.0023286806],[-0.0023886561,-0.0021729518,-0.00047665054,0.00043382516,-0.0046743774,0.0018583733,-0.002653999,0.0014368319,0.00030266403,0.13043068,0.00008846999,0.001177631],[0.00031910953,0.020570247,0.0003182369,-0.0025125344,-0.000262188,-0.00024681652,0.0005525011,-0.00043167718,0.00025712012,0.008090641,-0.0030692643,-0.00042362226],[0.0009765261,0.0003930135,0.0017531222,0.022596464,-0.00004477199,0.00014143117,0.009584831,-0.00031535054,0.0015271478,0.0011821282,-0.010773404,0.009366207],[0.0063142604,-0.0010944637,0.011661728,0.0013478585,-0.029186305,0.0038335095,-0.044093564,-0.0050316467,0.0048221517,0.27664757,-0.00003138067,-0.00066194753],[0.095388845,0.025068624,0.014238567,0.014754351,0.00009907565,-0.000089577545,0.050828964,-0.50510013,0.00014700249,-0.0016021258,0.06883207,0.0023277074],[0.0013426899,0.00963118,-0.07776365,-0.0077289413,-0.0005727727,-0.002956596,-0.0049480097,0.00045886257,-0.00063385593,-0.0065203323,-0.32048947,-0.0024725283]],\"type\":\"heatmap\",\"xaxis\":\"x5\",\"yaxis\":\"y5\",\"hovertemplate\":\"x: %{x}<br>y: %{y}<br>color: %{z}<extra></extra>\"}],                        {\"template\":{\"data\":{\"histogram2dcontour\":[{\"type\":\"histogram2dcontour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"choropleth\":[{\"type\":\"choropleth\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"histogram2d\":[{\"type\":\"histogram2d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmap\":[{\"type\":\"heatmap\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmapgl\":[{\"type\":\"heatmapgl\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"contourcarpet\":[{\"type\":\"contourcarpet\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"contour\":[{\"type\":\"contour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"surface\":[{\"type\":\"surface\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"mesh3d\":[{\"type\":\"mesh3d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"scatter\":[{\"fillpattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2},\"type\":\"scatter\"}],\"parcoords\":[{\"type\":\"parcoords\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolargl\":[{\"type\":\"scatterpolargl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"bar\":[{\"error_x\":{\"color\":\"#2a3f5f\"},\"error_y\":{\"color\":\"#2a3f5f\"},\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"bar\"}],\"scattergeo\":[{\"type\":\"scattergeo\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolar\":[{\"type\":\"scatterpolar\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"histogram\":[{\"marker\":{\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"histogram\"}],\"scattergl\":[{\"type\":\"scattergl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatter3d\":[{\"type\":\"scatter3d\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattermapbox\":[{\"type\":\"scattermapbox\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterternary\":[{\"type\":\"scatterternary\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattercarpet\":[{\"type\":\"scattercarpet\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"carpet\":[{\"aaxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"baxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"type\":\"carpet\"}],\"table\":[{\"cells\":{\"fill\":{\"color\":\"#EBF0F8\"},\"line\":{\"color\":\"white\"}},\"header\":{\"fill\":{\"color\":\"#C8D4E3\"},\"line\":{\"color\":\"white\"}},\"type\":\"table\"}],\"barpolar\":[{\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"barpolar\"}],\"pie\":[{\"automargin\":true,\"type\":\"pie\"}]},\"layout\":{\"autotypenumbers\":\"strict\",\"colorway\":[\"#636efa\",\"#EF553B\",\"#00cc96\",\"#ab63fa\",\"#FFA15A\",\"#19d3f3\",\"#FF6692\",\"#B6E880\",\"#FF97FF\",\"#FECB52\"],\"font\":{\"color\":\"#2a3f5f\"},\"hovermode\":\"closest\",\"hoverlabel\":{\"align\":\"left\"},\"paper_bgcolor\":\"white\",\"plot_bgcolor\":\"#E5ECF6\",\"polar\":{\"bgcolor\":\"#E5ECF6\",\"angularaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"radialaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"ternary\":{\"bgcolor\":\"#E5ECF6\",\"aaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"baxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"caxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"coloraxis\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"colorscale\":{\"sequential\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"sequentialminus\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"diverging\":[[0,\"#8e0152\"],[0.1,\"#c51b7d\"],[0.2,\"#de77ae\"],[0.3,\"#f1b6da\"],[0.4,\"#fde0ef\"],[0.5,\"#f7f7f7\"],[0.6,\"#e6f5d0\"],[0.7,\"#b8e186\"],[0.8,\"#7fbc41\"],[0.9,\"#4d9221\"],[1,\"#276419\"]]},\"xaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"yaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"scene\":{\"xaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"yaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"zaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2}},\"shapedefaults\":{\"line\":{\"color\":\"#2a3f5f\"}},\"annotationdefaults\":{\"arrowcolor\":\"#2a3f5f\",\"arrowhead\":0,\"arrowwidth\":1},\"geo\":{\"bgcolor\":\"white\",\"landcolor\":\"#E5ECF6\",\"subunitcolor\":\"white\",\"showland\":true,\"showlakes\":true,\"lakecolor\":\"white\"},\"title\":{\"x\":0.05},\"mapbox\":{\"style\":\"light\"}}},\"xaxis\":{\"anchor\":\"y\",\"domain\":[0.0,0.18400000000000002],\"title\":{\"text\":\"Head\"}},\"yaxis\":{\"anchor\":\"x\",\"domain\":[0.0,1.0],\"autorange\":\"reversed\",\"title\":{\"text\":\"Layer\"}},\"xaxis2\":{\"anchor\":\"y2\",\"domain\":[0.20400000000000001,0.388],\"matches\":\"x\",\"title\":{\"text\":\"Head\"}},\"yaxis2\":{\"anchor\":\"x2\",\"domain\":[0.0,1.0],\"matches\":\"y\",\"showticklabels\":false},\"xaxis3\":{\"anchor\":\"y3\",\"domain\":[0.40800000000000003,0.5920000000000001],\"matches\":\"x\",\"title\":{\"text\":\"Head\"}},\"yaxis3\":{\"anchor\":\"x3\",\"domain\":[0.0,1.0],\"matches\":\"y\",\"showticklabels\":false},\"xaxis4\":{\"anchor\":\"y4\",\"domain\":[0.6120000000000001,0.7960000000000002],\"matches\":\"x\",\"title\":{\"text\":\"Head\"}},\"yaxis4\":{\"anchor\":\"x4\",\"domain\":[0.0,1.0],\"matches\":\"y\",\"showticklabels\":false},\"xaxis5\":{\"anchor\":\"y5\",\"domain\":[0.8160000000000001,1.0],\"matches\":\"x\",\"title\":{\"text\":\"Head\"}},\"yaxis5\":{\"anchor\":\"x5\",\"domain\":[0.0,1.0],\"matches\":\"y\",\"showticklabels\":false},\"annotations\":[{\"font\":{},\"showarrow\":false,\"text\":\"Output\",\"x\":0.09200000000000001,\"xanchor\":\"center\",\"xref\":\"paper\",\"y\":1.0,\"yanchor\":\"bottom\",\"yref\":\"paper\"},{\"font\":{},\"showarrow\":false,\"text\":\"Query\",\"x\":0.29600000000000004,\"xanchor\":\"center\",\"xref\":\"paper\",\"y\":1.0,\"yanchor\":\"bottom\",\"yref\":\"paper\"},{\"font\":{},\"showarrow\":false,\"text\":\"Key\",\"x\":0.5,\"xanchor\":\"center\",\"xref\":\"paper\",\"y\":1.0,\"yanchor\":\"bottom\",\"yref\":\"paper\"},{\"font\":{},\"showarrow\":false,\"text\":\"Value\",\"x\":0.7040000000000002,\"xanchor\":\"center\",\"xref\":\"paper\",\"y\":1.0,\"yanchor\":\"bottom\",\"yref\":\"paper\"},{\"font\":{},\"showarrow\":false,\"text\":\"Pattern\",\"x\":0.908,\"xanchor\":\"center\",\"xref\":\"paper\",\"y\":1.0,\"yanchor\":\"bottom\",\"yref\":\"paper\"}],\"coloraxis\":{\"colorscale\":[[0.0,\"rgb(103,0,31)\"],[0.1,\"rgb(178,24,43)\"],[0.2,\"rgb(214,96,77)\"],[0.3,\"rgb(244,165,130)\"],[0.4,\"rgb(253,219,199)\"],[0.5,\"rgb(247,247,247)\"],[0.6,\"rgb(209,229,240)\"],[0.7,\"rgb(146,197,222)\"],[0.8,\"rgb(67,147,195)\"],[0.9,\"rgb(33,102,172)\"],[1.0,\"rgb(5,48,97)\"]],\"cmid\":0.0,\"cmin\":-1,\"cmax\":1},\"title\":{\"text\":\"Activation Patching Per Head (All Pos)\"}},                        {\"responsive\": true}                    ).then(function(){\n","                            \n","var gd = document.getElementById('4177a921-b03d-40a1-a628-08281ebb379e');\n","var x = new MutationObserver(function (mutations, observer) {{\n","        var display = window.getComputedStyle(gd).display;\n","        if (!display || display === 'none') {{\n","            console.log([gd, 'removed!']);\n","            Plotly.purge(gd);\n","            observer.disconnect();\n","        }}\n","}});\n","\n","// Listen for the removal of the full notebook cells\n","var notebookContainer = gd.closest('#notebook-container');\n","if (notebookContainer) {{\n","    x.observe(notebookContainer, {childList: true});\n","}}\n","\n","// Listen for the clearing of the current output cell\n","var outputEl = gd.closest('.output');\n","if (outputEl) {{\n","    x.observe(outputEl, {childList: true});\n","}}\n","\n","                        })                };                            </script>        </div>\n","</body>\n","</html>"]},"metadata":{},"output_type":"display_data"}],"source":["every_head_all_pos_act_patch_result = patching.get_act_patch_attn_head_all_pos_every(model, corrupted_tokens, clean_cache, ioi_metric)\n","imshow(every_head_all_pos_act_patch_result, facet_col=0, facet_labels=[\"Output\", \"Query\", \"Key\", \"Value\", \"Pattern\"], title=\"Activation Patching Per Head (All Pos)\", xaxis=\"Head\", yaxis=\"Layer\", zmax=1, zmin=-1)\n","# [markdown]\n","# We can also do by head *and* by position. This is a bit slow, but it can give useful + fine-grained detail"]},{"cell_type":"code","execution_count":18,"metadata":{},"outputs":[],"source":["if DO_SLOW_RUNS:\n","    every_head_act_patch_result = patching.get_act_patch_attn_head_by_pos_every(model, corrupted_tokens, clean_cache, ioi_metric)\n","    every_head_act_patch_result = einops.rearrange(every_head_act_patch_result, \"act_type layer pos head -> act_type (layer head) pos\")\n","    imshow(every_head_act_patch_result, facet_col=0, facet_labels=[\"Output\", \"Query\", \"Key\", \"Value\", \"Pattern\"], title=\"Activation Patching Per Head (By Pos)\", xaxis=\"Position\", yaxis=\"Layer & Head\", zmax=1, zmin=-1, x= [f\"{tok}_{i}\" for i, tok in enumerate(model.to_str_tokens(clean_tokens[0]))], y=ALL_HEAD_LABELS)"]},{"cell_type":"markdown","metadata":{},"source":[" ## Induction Patching\n"," To show how easy it is, lets do that again with induction heads in a 2L Attention Only model\n"," The input will be repeated random tokens eg BOS 1 5 8 9 2 1 5 8 9 2, and we judge the model's ability to predict the second repetition with its induction heads\n"," Lets call A, B and C different (non-repeated) random sequences. We'll start with clean tokens AA and corrupted tokens AB, and see how well the model can predict the second A given the first A"]},{"cell_type":"markdown","metadata":{},"source":[" ### Setup"]},{"cell_type":"code","execution_count":19,"metadata":{},"outputs":[{"name":"stdout","output_type":"stream","text":["Loaded pretrained model attn-only-2l into HookedTransformer\n"]}],"source":["attn_only = HookedTransformer.from_pretrained(\"attn-only-2l\")\n","batch = 4\n","seq_len = 20\n","rand_tokens_A = torch.randint(100, 10000, (batch, seq_len)).to(attn_only.cfg.device)\n","rand_tokens_B = torch.randint(100, 10000, (batch, seq_len)).to(attn_only.cfg.device)\n","rand_tokens_C = torch.randint(100, 10000, (batch, seq_len)).to(attn_only.cfg.device)\n","bos = torch.tensor([attn_only.tokenizer.bos_token_id]*batch)[:, None].to(attn_only.cfg.device)\n","clean_tokens_induction = torch.cat([bos, rand_tokens_A, rand_tokens_A], dim=1).to(attn_only.cfg.device)\n","corrupted_tokens_induction = torch.cat([bos, rand_tokens_A, rand_tokens_B], dim=1).to(attn_only.cfg.device)"]},{"cell_type":"code","execution_count":20,"metadata":{},"outputs":[],"source":["clean_logits_induction, clean_cache_induction = attn_only.run_with_cache(clean_tokens_induction)\n","corrupted_logits_induction, corrupted_cache_induction = attn_only.run_with_cache(corrupted_tokens_induction)"]},{"cell_type":"markdown","metadata":{},"source":[" We define our metric as negative loss on the second half (negative loss so that higher is better)\n"," This time we won't normalise our metric"]},{"cell_type":"code","execution_count":21,"metadata":{},"outputs":[{"name":"stdout","output_type":"stream","text":["Clean baseline: -2.2928695678710938\n","Corrupted baseline: -13.125859260559082\n"]}],"source":["def induction_loss(logits, answer_token_indices=rand_tokens_A):\n","    seq_len = answer_token_indices.shape[1]\n","\n","    # logits: batch x seq_len x vocab_size\n","    # Take the logits for the answers, cut off the final element to get the predictions for all but the first element of the answers (which can't be predicted)\n","    final_logits = logits[:, -seq_len:-1]\n","    final_log_probs = final_logits.log_softmax(-1)\n","    return final_log_probs.gather(-1, answer_token_indices[:, 1:].unsqueeze(-1)).mean()\n","CLEAN_BASELINE_INDUCTION = induction_loss(clean_logits_induction).item()\n","print(\"Clean baseline:\", CLEAN_BASELINE_INDUCTION)\n","CORRUPTED_BASELINE_INDUCTION = induction_loss(corrupted_logits_induction).item()\n","print(\"Corrupted baseline:\", CORRUPTED_BASELINE_INDUCTION)"]},{"cell_type":"markdown","metadata":{},"source":[" ### Patching"]},{"cell_type":"code","execution_count":22,"metadata":{},"outputs":[{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"e622aaef492c45a58526ab56f37e3964","version_major":2,"version_minor":0},"text/plain":["  0%|          | 0/16 [00:00<?, ?it/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"382edd8deceb4452b5bf8e157b95178f","version_major":2,"version_minor":0},"text/plain":["  0%|          | 0/16 [00:00<?, ?it/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"a9065aae9b2a46e0bb595ae3e195b728","version_major":2,"version_minor":0},"text/plain":["  0%|          | 0/16 [00:00<?, ?it/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"8bda86976c0944b3a603a796dbd8917d","version_major":2,"version_minor":0},"text/plain":["  0%|          | 0/16 [00:00<?, ?it/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"50e16757155346a999e1eeb94dcbe49a","version_major":2,"version_minor":0},"text/plain":["  0%|          | 0/16 [00:00<?, ?it/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["<html>\n","<head><meta charset=\"utf-8\" /></head>\n","<body>\n","    <div>            <script src=\"https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js?config=TeX-AMS-MML_SVG\"></script><script type=\"text/javascript\">if (window.MathJax && window.MathJax.Hub && window.MathJax.Hub.Config) {window.MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}</script>                <script type=\"text/javascript\">window.PlotlyConfig = {MathJaxConfig: 'local'};</script>\n","        <script src=\"https://cdn.plot.ly/plotly-2.16.1.min.js\"></script>                <div id=\"57fb8a3b-1399-4382-85d6-cf71ccf9e2dd\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div>            <script type=\"text/javascript\">                                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById(\"57fb8a3b-1399-4382-85d6-cf71ccf9e2dd\")) {                    Plotly.newPlot(                        \"57fb8a3b-1399-4382-85d6-cf71ccf9e2dd\",                        [{\"coloraxis\":\"coloraxis\",\"name\":\"0\",\"z\":[[-12.630955,-12.988065,-13.006567,-12.529946,-13.078064,-13.068898,-13.280383,-13.24434],[-13.172629,-13.035272,-12.923991,-12.987872,-12.854983,-13.093642,-3.3364303,-11.604531]],\"type\":\"heatmap\",\"xaxis\":\"x\",\"yaxis\":\"y\",\"hovertemplate\":\"x: %{x}<br>y: %{y}<br>color: %{z}<extra></extra>\"},{\"coloraxis\":\"coloraxis\",\"name\":\"1\",\"z\":[[-13.107556,-13.046705,-12.993137,-13.010434,-13.149853,-13.181243,-13.078225,-13.225475],[-13.093582,-13.024863,-13.030213,-13.049493,-12.776813,-13.144259,-3.3615336,-11.5658655]],\"type\":\"heatmap\",\"xaxis\":\"x2\",\"yaxis\":\"y2\",\"hovertemplate\":\"x: %{x}<br>y: %{y}<br>color: %{z}<extra></extra>\"},{\"coloraxis\":\"coloraxis\",\"name\":\"2\",\"z\":[[-12.680319,-13.138184,-13.085978,-12.703294,-13.077439,-13.0484495,-13.202711,-13.259588],[-13.217579,-12.96091,-12.895449,-13.014744,-12.951784,-13.057919,-13.044403,-13.454396]],\"type\":\"heatmap\",\"xaxis\":\"x3\",\"yaxis\":\"y3\",\"hovertemplate\":\"x: %{x}<br>y: %{y}<br>color: %{z}<extra></extra>\"},{\"coloraxis\":\"coloraxis\",\"name\":\"3\",\"z\":[[-13.086151,-13.138182,-12.952564,-13.146914,-13.082162,-13.136836,-13.129994,-13.051876],[-13.097097,-12.972845,-13.048372,-13.211313,-12.823811,-13.151044,-13.135399,-13.120738]],\"type\":\"heatmap\",\"xaxis\":\"x4\",\"yaxis\":\"y4\",\"hovertemplate\":\"x: %{x}<br>y: %{y}<br>color: %{z}<extra></extra>\"},{\"coloraxis\":\"coloraxis\",\"name\":\"4\",\"z\":[[-13.086223,-13.054343,-12.982502,-13.03867,-13.105019,-13.114271,-13.120985,-13.165459],[-13.16909,-13.094792,-13.030987,-13.054218,-12.846087,-13.07762,-3.342551,-11.469898]],\"type\":\"heatmap\",\"xaxis\":\"x5\",\"yaxis\":\"y5\",\"hovertemplate\":\"x: %{x}<br>y: %{y}<br>color: %{z}<extra></extra>\"}],                        {\"template\":{\"data\":{\"histogram2dcontour\":[{\"type\":\"histogram2dcontour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"choropleth\":[{\"type\":\"choropleth\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"histogram2d\":[{\"type\":\"histogram2d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmap\":[{\"type\":\"heatmap\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmapgl\":[{\"type\":\"heatmapgl\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"contourcarpet\":[{\"type\":\"contourcarpet\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"contour\":[{\"type\":\"contour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"surface\":[{\"type\":\"surface\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"mesh3d\":[{\"type\":\"mesh3d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"scatter\":[{\"fillpattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2},\"type\":\"scatter\"}],\"parcoords\":[{\"type\":\"parcoords\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolargl\":[{\"type\":\"scatterpolargl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"bar\":[{\"error_x\":{\"color\":\"#2a3f5f\"},\"error_y\":{\"color\":\"#2a3f5f\"},\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"bar\"}],\"scattergeo\":[{\"type\":\"scattergeo\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolar\":[{\"type\":\"scatterpolar\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"histogram\":[{\"marker\":{\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"histogram\"}],\"scattergl\":[{\"type\":\"scattergl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatter3d\":[{\"type\":\"scatter3d\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattermapbox\":[{\"type\":\"scattermapbox\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterternary\":[{\"type\":\"scatterternary\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattercarpet\":[{\"type\":\"scattercarpet\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"carpet\":[{\"aaxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"baxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"type\":\"carpet\"}],\"table\":[{\"cells\":{\"fill\":{\"color\":\"#EBF0F8\"},\"line\":{\"color\":\"white\"}},\"header\":{\"fill\":{\"color\":\"#C8D4E3\"},\"line\":{\"color\":\"white\"}},\"type\":\"table\"}],\"barpolar\":[{\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"barpolar\"}],\"pie\":[{\"automargin\":true,\"type\":\"pie\"}]},\"layout\":{\"autotypenumbers\":\"strict\",\"colorway\":[\"#636efa\",\"#EF553B\",\"#00cc96\",\"#ab63fa\",\"#FFA15A\",\"#19d3f3\",\"#FF6692\",\"#B6E880\",\"#FF97FF\",\"#FECB52\"],\"font\":{\"color\":\"#2a3f5f\"},\"hovermode\":\"closest\",\"hoverlabel\":{\"align\":\"left\"},\"paper_bgcolor\":\"white\",\"plot_bgcolor\":\"#E5ECF6\",\"polar\":{\"bgcolor\":\"#E5ECF6\",\"angularaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"radialaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"ternary\":{\"bgcolor\":\"#E5ECF6\",\"aaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"baxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"caxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"coloraxis\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"colorscale\":{\"sequential\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"sequentialminus\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"diverging\":[[0,\"#8e0152\"],[0.1,\"#c51b7d\"],[0.2,\"#de77ae\"],[0.3,\"#f1b6da\"],[0.4,\"#fde0ef\"],[0.5,\"#f7f7f7\"],[0.6,\"#e6f5d0\"],[0.7,\"#b8e186\"],[0.8,\"#7fbc41\"],[0.9,\"#4d9221\"],[1,\"#276419\"]]},\"xaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"yaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"scene\":{\"xaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"yaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"zaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2}},\"shapedefaults\":{\"line\":{\"color\":\"#2a3f5f\"}},\"annotationdefaults\":{\"arrowcolor\":\"#2a3f5f\",\"arrowhead\":0,\"arrowwidth\":1},\"geo\":{\"bgcolor\":\"white\",\"landcolor\":\"#E5ECF6\",\"subunitcolor\":\"white\",\"showland\":true,\"showlakes\":true,\"lakecolor\":\"white\"},\"title\":{\"x\":0.05},\"mapbox\":{\"style\":\"light\"}}},\"xaxis\":{\"anchor\":\"y\",\"domain\":[0.0,0.18400000000000002],\"title\":{\"text\":\"Head\"}},\"yaxis\":{\"anchor\":\"x\",\"domain\":[0.0,1.0],\"autorange\":\"reversed\",\"title\":{\"text\":\"Layer\"}},\"xaxis2\":{\"anchor\":\"y2\",\"domain\":[0.20400000000000001,0.388],\"matches\":\"x\",\"title\":{\"text\":\"Head\"}},\"yaxis2\":{\"anchor\":\"x2\",\"domain\":[0.0,1.0],\"matches\":\"y\",\"showticklabels\":false},\"xaxis3\":{\"anchor\":\"y3\",\"domain\":[0.40800000000000003,0.5920000000000001],\"matches\":\"x\",\"title\":{\"text\":\"Head\"}},\"yaxis3\":{\"anchor\":\"x3\",\"domain\":[0.0,1.0],\"matches\":\"y\",\"showticklabels\":false},\"xaxis4\":{\"anchor\":\"y4\",\"domain\":[0.6120000000000001,0.7960000000000002],\"matches\":\"x\",\"title\":{\"text\":\"Head\"}},\"yaxis4\":{\"anchor\":\"x4\",\"domain\":[0.0,1.0],\"matches\":\"y\",\"showticklabels\":false},\"xaxis5\":{\"anchor\":\"y5\",\"domain\":[0.8160000000000001,1.0],\"matches\":\"x\",\"title\":{\"text\":\"Head\"}},\"yaxis5\":{\"anchor\":\"x5\",\"domain\":[0.0,1.0],\"matches\":\"y\",\"showticklabels\":false},\"annotations\":[{\"font\":{},\"showarrow\":false,\"text\":\"Output\",\"x\":0.09200000000000001,\"xanchor\":\"center\",\"xref\":\"paper\",\"y\":1.0,\"yanchor\":\"bottom\",\"yref\":\"paper\"},{\"font\":{},\"showarrow\":false,\"text\":\"Query\",\"x\":0.29600000000000004,\"xanchor\":\"center\",\"xref\":\"paper\",\"y\":1.0,\"yanchor\":\"bottom\",\"yref\":\"paper\"},{\"font\":{},\"showarrow\":false,\"text\":\"Key\",\"x\":0.5,\"xanchor\":\"center\",\"xref\":\"paper\",\"y\":1.0,\"yanchor\":\"bottom\",\"yref\":\"paper\"},{\"font\":{},\"showarrow\":false,\"text\":\"Value\",\"x\":0.7040000000000002,\"xanchor\":\"center\",\"xref\":\"paper\",\"y\":1.0,\"yanchor\":\"bottom\",\"yref\":\"paper\"},{\"font\":{},\"showarrow\":false,\"text\":\"Pattern\",\"x\":0.908,\"xanchor\":\"center\",\"xref\":\"paper\",\"y\":1.0,\"yanchor\":\"bottom\",\"yref\":\"paper\"}],\"coloraxis\":{\"colorscale\":[[0.0,\"rgb(103,0,31)\"],[0.1,\"rgb(178,24,43)\"],[0.2,\"rgb(214,96,77)\"],[0.3,\"rgb(244,165,130)\"],[0.4,\"rgb(253,219,199)\"],[0.5,\"rgb(247,247,247)\"],[0.6,\"rgb(209,229,240)\"],[0.7,\"rgb(146,197,222)\"],[0.8,\"rgb(67,147,195)\"],[0.9,\"rgb(33,102,172)\"],[1.0,\"rgb(5,48,97)\"]],\"cmid\":0.0,\"cmin\":-13.454396,\"cmax\":-2.2928695678710938},\"title\":{\"text\":\"Activation Patching Per Head (All Pos)\"}},                        {\"responsive\": true}                    ).then(function(){\n","                            \n","var gd = document.getElementById('57fb8a3b-1399-4382-85d6-cf71ccf9e2dd');\n","var x = new MutationObserver(function (mutations, observer) {{\n","        var display = window.getComputedStyle(gd).display;\n","        if (!display || display === 'none') {{\n","            console.log([gd, 'removed!']);\n","            Plotly.purge(gd);\n","            observer.disconnect();\n","        }}\n","}});\n","\n","// Listen for the removal of the full notebook cells\n","var notebookContainer = gd.closest('#notebook-container');\n","if (notebookContainer) {{\n","    x.observe(notebookContainer, {childList: true});\n","}}\n","\n","// Listen for the clearing of the current output cell\n","var outputEl = gd.closest('.output');\n","if (outputEl) {{\n","    x.observe(outputEl, {childList: true});\n","}}\n","\n","                        })                };                            </script>        </div>\n","</body>\n","</html>"]},"metadata":{},"output_type":"display_data"}],"source":["every_head_all_pos_act_patch_result = patching.get_act_patch_attn_head_all_pos_every(attn_only, corrupted_tokens_induction, clean_cache_induction, induction_loss)\n","imshow(every_head_all_pos_act_patch_result, facet_col=0, facet_labels=[\"Output\", \"Query\", \"Key\", \"Value\", \"Pattern\"], title=\"Activation Patching Per Head (All Pos)\", xaxis=\"Head\", yaxis=\"Layer\", zmax=CLEAN_BASELINE_INDUCTION)\n","\n","if DO_SLOW_RUNS:\n","    every_head_act_patch_result = patching.get_act_patch_attn_head_by_pos_every(attn_only, corrupted_tokens_induction, clean_cache_induction, induction_loss)\n","    every_head_act_patch_result = einops.rearrange(every_head_act_patch_result, \"act_type layer pos head -> act_type (layer head) pos\")\n","    imshow(every_head_act_patch_result, facet_col=0, facet_labels=[\"Output\", \"Query\", \"Key\", \"Value\", \"Pattern\"], title=\"Activation Patching Per Head (By Pos)\", xaxis=\"Position\", yaxis=\"Layer & Head\", zmax=CLEAN_BASELINE_INDUCTION, x= [f\"{tok}_{i}\" for i, tok in enumerate(attn_only.to_str_tokens(clean_tokens[0]))], y=ALL_HEAD_LABELS)"]},{"cell_type":"markdown","metadata":{},"source":[" ### Changing the Corrupted Baseline\n"," We can also change the corrupted baseline easily to check what things look like! We'll keep clean as AA, but rather than corrupted as AB, we'll try out:\n"," * BA - This has a corrupted first half, so we expect both keys *and* values to matter. Head output patching should work, but value and key and pattern won't.\n"," * BB - This is still inductiony but with different tokens. So keys, queries and patterns don't matter, head output patching will work, and value will.\n"," * BC - This is just random tokens, so everything is corrupted! The induction head needs queries, keys *and* values, so only output will work."]},{"cell_type":"code","execution_count":23,"metadata":{},"outputs":[],"source":["corrupted_tokens_induction_BA = torch.cat([bos, rand_tokens_B, rand_tokens_A], dim=1).to(attn_only.cfg.device)\n","corrupted_tokens_induction_BB = torch.cat([bos, rand_tokens_B, rand_tokens_B], dim=1).to(attn_only.cfg.device)\n","corrupted_tokens_induction_BC = torch.cat([bos, rand_tokens_B, rand_tokens_C], dim=1).to(attn_only.cfg.device)"]},{"cell_type":"code","execution_count":24,"metadata":{},"outputs":[{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"84f0c8fdecfa454591103ae4678260d5","version_major":2,"version_minor":0},"text/plain":["  0%|          | 0/16 [00:00<?, ?it/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"36bcd874bc584d279577a14915f39dc3","version_major":2,"version_minor":0},"text/plain":["  0%|          | 0/16 [00:00<?, ?it/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"c2bb1736cf6a4e43a2263bfe6f1d1309","version_major":2,"version_minor":0},"text/plain":["  0%|          | 0/16 [00:00<?, ?it/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"64a9057df33f4d8594101fd7ea74c876","version_major":2,"version_minor":0},"text/plain":["  0%|          | 0/16 [00:00<?, ?it/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"39df34a94d184e97add046794dd19cdc","version_major":2,"version_minor":0},"text/plain":["  0%|          | 0/16 [00:00<?, ?it/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["<html>\n","<head><meta charset=\"utf-8\" /></head>\n","<body>\n","    <div>            <script src=\"https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js?config=TeX-AMS-MML_SVG\"></script><script type=\"text/javascript\">if (window.MathJax && window.MathJax.Hub && window.MathJax.Hub.Config) {window.MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}</script>                <script type=\"text/javascript\">window.PlotlyConfig = {MathJaxConfig: 'local'};</script>\n","        <script src=\"https://cdn.plot.ly/plotly-2.16.1.min.js\"></script>                <div id=\"ef54b05a-b75e-40a1-8bab-0450783ee71c\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div>            <script type=\"text/javascript\">                                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById(\"ef54b05a-b75e-40a1-8bab-0450783ee71c\")) {                    Plotly.newPlot(                        \"ef54b05a-b75e-40a1-8bab-0450783ee71c\",                        [{\"coloraxis\":\"coloraxis\",\"name\":\"0\",\"z\":[[-12.921511,-12.877056,-12.984126,-12.905375,-12.970542,-13.053899,-13.2373705,-12.864943],[-13.1692505,-13.0807705,-13.057977,-13.0245495,-12.941997,-13.084174,-3.3831546,-11.245924]],\"type\":\"heatmap\",\"xaxis\":\"x\",\"yaxis\":\"y\",\"hovertemplate\":\"x: %{x}<br>y: %{y}<br>color: %{z}<extra></extra>\"},{\"coloraxis\":\"coloraxis\",\"name\":\"1\",\"z\":[[-13.0876255,-13.047407,-13.058392,-13.10271,-13.020917,-13.104961,-13.048052,-12.31762],[-13.123565,-13.093914,-13.052007,-13.041765,-13.061894,-13.077681,-13.115112,-13.048046]],\"type\":\"heatmap\",\"xaxis\":\"x2\",\"yaxis\":\"y2\",\"hovertemplate\":\"x: %{x}<br>y: %{y}<br>color: %{z}<extra></extra>\"},{\"coloraxis\":\"coloraxis\",\"name\":\"2\",\"z\":[[-12.99122,-12.947079,-12.989821,-12.987743,-12.981367,-13.105391,-13.203668,-13.100986],[-13.111007,-13.113584,-13.096868,-13.094455,-13.024431,-13.052156,-12.876199,-13.023597]],\"type\":\"heatmap\",\"xaxis\":\"x3\",\"yaxis\":\"y3\",\"hovertemplate\":\"x: %{x}<br>y: %{y}<br>color: %{z}<extra></extra>\"},{\"coloraxis\":\"coloraxis\",\"name\":\"3\",\"z\":[[-13.083906,-13.11855,-12.93277,-13.115893,-13.106962,-13.075,-13.117452,-12.339193],[-13.105621,-13.049583,-13.076819,-13.103814,-13.00783,-13.147947,-13.327594,-13.197513]],\"type\":\"heatmap\",\"xaxis\":\"x4\",\"yaxis\":\"y4\",\"hovertemplate\":\"x: %{x}<br>y: %{y}<br>color: %{z}<extra></extra>\"},{\"coloraxis\":\"coloraxis\",\"name\":\"4\",\"z\":[[-13.080789,-13.094806,-12.9795475,-13.079694,-13.0589485,-13.059589,-13.09329,-12.983442],[-13.14187,-13.092512,-13.058135,-13.0333805,-13.0066595,-13.124432,-13.322428,-13.160433]],\"type\":\"heatmap\",\"xaxis\":\"x5\",\"yaxis\":\"y5\",\"hovertemplate\":\"x: %{x}<br>y: %{y}<br>color: %{z}<extra></extra>\"}],                        {\"template\":{\"data\":{\"histogram2dcontour\":[{\"type\":\"histogram2dcontour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"choropleth\":[{\"type\":\"choropleth\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"histogram2d\":[{\"type\":\"histogram2d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmap\":[{\"type\":\"heatmap\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmapgl\":[{\"type\":\"heatmapgl\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"contourcarpet\":[{\"type\":\"contourcarpet\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"contour\":[{\"type\":\"contour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"surface\":[{\"type\":\"surface\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"mesh3d\":[{\"type\":\"mesh3d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"scatter\":[{\"fillpattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2},\"type\":\"scatter\"}],\"parcoords\":[{\"type\":\"parcoords\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolargl\":[{\"type\":\"scatterpolargl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"bar\":[{\"error_x\":{\"color\":\"#2a3f5f\"},\"error_y\":{\"color\":\"#2a3f5f\"},\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"bar\"}],\"scattergeo\":[{\"type\":\"scattergeo\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolar\":[{\"type\":\"scatterpolar\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"histogram\":[{\"marker\":{\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"histogram\"}],\"scattergl\":[{\"type\":\"scattergl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatter3d\":[{\"type\":\"scatter3d\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattermapbox\":[{\"type\":\"scattermapbox\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterternary\":[{\"type\":\"scatterternary\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattercarpet\":[{\"type\":\"scattercarpet\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"carpet\":[{\"aaxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"baxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"type\":\"carpet\"}],\"table\":[{\"cells\":{\"fill\":{\"color\":\"#EBF0F8\"},\"line\":{\"color\":\"white\"}},\"header\":{\"fill\":{\"color\":\"#C8D4E3\"},\"line\":{\"color\":\"white\"}},\"type\":\"table\"}],\"barpolar\":[{\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"barpolar\"}],\"pie\":[{\"automargin\":true,\"type\":\"pie\"}]},\"layout\":{\"autotypenumbers\":\"strict\",\"colorway\":[\"#636efa\",\"#EF553B\",\"#00cc96\",\"#ab63fa\",\"#FFA15A\",\"#19d3f3\",\"#FF6692\",\"#B6E880\",\"#FF97FF\",\"#FECB52\"],\"font\":{\"color\":\"#2a3f5f\"},\"hovermode\":\"closest\",\"hoverlabel\":{\"align\":\"left\"},\"paper_bgcolor\":\"white\",\"plot_bgcolor\":\"#E5ECF6\",\"polar\":{\"bgcolor\":\"#E5ECF6\",\"angularaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"radialaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"ternary\":{\"bgcolor\":\"#E5ECF6\",\"aaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"baxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"caxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"coloraxis\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"colorscale\":{\"sequential\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"sequentialminus\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"diverging\":[[0,\"#8e0152\"],[0.1,\"#c51b7d\"],[0.2,\"#de77ae\"],[0.3,\"#f1b6da\"],[0.4,\"#fde0ef\"],[0.5,\"#f7f7f7\"],[0.6,\"#e6f5d0\"],[0.7,\"#b8e186\"],[0.8,\"#7fbc41\"],[0.9,\"#4d9221\"],[1,\"#276419\"]]},\"xaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"yaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"scene\":{\"xaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"yaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"zaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2}},\"shapedefaults\":{\"line\":{\"color\":\"#2a3f5f\"}},\"annotationdefaults\":{\"arrowcolor\":\"#2a3f5f\",\"arrowhead\":0,\"arrowwidth\":1},\"geo\":{\"bgcolor\":\"white\",\"landcolor\":\"#E5ECF6\",\"subunitcolor\":\"white\",\"showland\":true,\"showlakes\":true,\"lakecolor\":\"white\"},\"title\":{\"x\":0.05},\"mapbox\":{\"style\":\"light\"}}},\"xaxis\":{\"anchor\":\"y\",\"domain\":[0.0,0.18400000000000002],\"title\":{\"text\":\"Head\"}},\"yaxis\":{\"anchor\":\"x\",\"domain\":[0.0,1.0],\"autorange\":\"reversed\",\"title\":{\"text\":\"Layer\"}},\"xaxis2\":{\"anchor\":\"y2\",\"domain\":[0.20400000000000001,0.388],\"matches\":\"x\",\"title\":{\"text\":\"Head\"}},\"yaxis2\":{\"anchor\":\"x2\",\"domain\":[0.0,1.0],\"matches\":\"y\",\"showticklabels\":false},\"xaxis3\":{\"anchor\":\"y3\",\"domain\":[0.40800000000000003,0.5920000000000001],\"matches\":\"x\",\"title\":{\"text\":\"Head\"}},\"yaxis3\":{\"anchor\":\"x3\",\"domain\":[0.0,1.0],\"matches\":\"y\",\"showticklabels\":false},\"xaxis4\":{\"anchor\":\"y4\",\"domain\":[0.6120000000000001,0.7960000000000002],\"matches\":\"x\",\"title\":{\"text\":\"Head\"}},\"yaxis4\":{\"anchor\":\"x4\",\"domain\":[0.0,1.0],\"matches\":\"y\",\"showticklabels\":false},\"xaxis5\":{\"anchor\":\"y5\",\"domain\":[0.8160000000000001,1.0],\"matches\":\"x\",\"title\":{\"text\":\"Head\"}},\"yaxis5\":{\"anchor\":\"x5\",\"domain\":[0.0,1.0],\"matches\":\"y\",\"showticklabels\":false},\"annotations\":[{\"font\":{},\"showarrow\":false,\"text\":\"Output\",\"x\":0.09200000000000001,\"xanchor\":\"center\",\"xref\":\"paper\",\"y\":1.0,\"yanchor\":\"bottom\",\"yref\":\"paper\"},{\"font\":{},\"showarrow\":false,\"text\":\"Query\",\"x\":0.29600000000000004,\"xanchor\":\"center\",\"xref\":\"paper\",\"y\":1.0,\"yanchor\":\"bottom\",\"yref\":\"paper\"},{\"font\":{},\"showarrow\":false,\"text\":\"Key\",\"x\":0.5,\"xanchor\":\"center\",\"xref\":\"paper\",\"y\":1.0,\"yanchor\":\"bottom\",\"yref\":\"paper\"},{\"font\":{},\"showarrow\":false,\"text\":\"Value\",\"x\":0.7040000000000002,\"xanchor\":\"center\",\"xref\":\"paper\",\"y\":1.0,\"yanchor\":\"bottom\",\"yref\":\"paper\"},{\"font\":{},\"showarrow\":false,\"text\":\"Pattern\",\"x\":0.908,\"xanchor\":\"center\",\"xref\":\"paper\",\"y\":1.0,\"yanchor\":\"bottom\",\"yref\":\"paper\"}],\"coloraxis\":{\"colorscale\":[[0.0,\"rgb(103,0,31)\"],[0.1,\"rgb(178,24,43)\"],[0.2,\"rgb(214,96,77)\"],[0.3,\"rgb(244,165,130)\"],[0.4,\"rgb(253,219,199)\"],[0.5,\"rgb(247,247,247)\"],[0.6,\"rgb(209,229,240)\"],[0.7,\"rgb(146,197,222)\"],[0.8,\"rgb(67,147,195)\"],[0.9,\"rgb(33,102,172)\"],[1.0,\"rgb(5,48,97)\"]],\"cmid\":0.0,\"cmin\":-13.327594,\"cmax\":-2.2928695678710938},\"title\":{\"text\":\"Activation Patching Per Head on BA (All Pos)\"}},                        {\"responsive\": true}                    ).then(function(){\n","                            \n","var gd = document.getElementById('ef54b05a-b75e-40a1-8bab-0450783ee71c');\n","var x = new MutationObserver(function (mutations, observer) {{\n","        var display = window.getComputedStyle(gd).display;\n","        if (!display || display === 'none') {{\n","            console.log([gd, 'removed!']);\n","            Plotly.purge(gd);\n","            observer.disconnect();\n","        }}\n","}});\n","\n","// Listen for the removal of the full notebook cells\n","var notebookContainer = gd.closest('#notebook-container');\n","if (notebookContainer) {{\n","    x.observe(notebookContainer, {childList: true});\n","}}\n","\n","// Listen for the clearing of the current output cell\n","var outputEl = gd.closest('.output');\n","if (outputEl) {{\n","    x.observe(outputEl, {childList: true});\n","}}\n","\n","                        })                };                            </script>        </div>\n","</body>\n","</html>"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"7ce8d65176ac47198138463641003156","version_major":2,"version_minor":0},"text/plain":["  0%|          | 0/16 [00:00<?, ?it/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"7b0dea8bb0114ac69d08f06f73934b65","version_major":2,"version_minor":0},"text/plain":["  0%|          | 0/16 [00:00<?, ?it/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"a6af42964f0145f6bcd54e28e9e4adfe","version_major":2,"version_minor":0},"text/plain":["  0%|          | 0/16 [00:00<?, ?it/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"3c0989d7873c45cfbcce13fe0289548e","version_major":2,"version_minor":0},"text/plain":["  0%|          | 0/16 [00:00<?, ?it/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"f0982b478f43462aaf6501c6fec675ed","version_major":2,"version_minor":0},"text/plain":["  0%|          | 0/16 [00:00<?, ?it/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["<html>\n","<head><meta charset=\"utf-8\" /></head>\n","<body>\n","    <div>            <script src=\"https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js?config=TeX-AMS-MML_SVG\"></script><script type=\"text/javascript\">if (window.MathJax && window.MathJax.Hub && window.MathJax.Hub.Config) {window.MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}</script>                <script type=\"text/javascript\">window.PlotlyConfig = {MathJaxConfig: 'local'};</script>\n","        <script src=\"https://cdn.plot.ly/plotly-2.16.1.min.js\"></script>                <div id=\"e38707d1-1a25-4556-bd49-0a85c6bea363\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div>            <script type=\"text/javascript\">                                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById(\"e38707d1-1a25-4556-bd49-0a85c6bea363\")) {                    Plotly.newPlot(                        \"e38707d1-1a25-4556-bd49-0a85c6bea363\",                        [{\"coloraxis\":\"coloraxis\",\"name\":\"0\",\"z\":[[-13.805504,-13.825346,-13.645974,-13.384713,-14.021775,-13.943011,-14.001552,-13.868244],[-14.046326,-14.084418,-13.944375,-13.924217,-13.97186,-14.030198,-3.6828191,-12.427961]],\"type\":\"heatmap\",\"xaxis\":\"x\",\"yaxis\":\"y\",\"hovertemplate\":\"x: %{x}<br>y: %{y}<br>color: %{z}<extra></extra>\"},{\"coloraxis\":\"coloraxis\",\"name\":\"1\",\"z\":[[-13.989873,-13.964358,-13.889407,-14.020409,-14.04137,-14.084052,-13.9674425,-12.949598],[-14.097729,-14.051899,-14.026574,-14.052086,-13.879718,-14.102751,-13.784435,-13.837878]],\"type\":\"heatmap\",\"xaxis\":\"x2\",\"yaxis\":\"y2\",\"hovertemplate\":\"x: %{x}<br>y: %{y}<br>color: %{z}<extra></extra>\"},{\"coloraxis\":\"coloraxis\",\"name\":\"2\",\"z\":[[-13.914917,-13.987852,-13.7277975,-13.531109,-14.002791,-13.955838,-13.972597,-14.098117],[-14.03552,-14.056699,-13.936212,-13.9273615,-14.01424,-13.986736,-3.7621412,-12.812471]],\"type\":\"heatmap\",\"xaxis\":\"x3\",\"yaxis\":\"y3\",\"hovertemplate\":\"x: %{x}<br>y: %{y}<br>color: %{z}<extra></extra>\"},{\"coloraxis\":\"coloraxis\",\"name\":\"3\",\"z\":[[-14.071397,-14.12512,-13.814937,-14.148074,-14.053765,-14.062568,-14.015211,-12.977612],[-14.065711,-14.079923,-14.047265,-14.121845,-13.929293,-14.081871,-13.881097,-13.952627]],\"type\":\"heatmap\",\"xaxis\":\"x4\",\"yaxis\":\"y4\",\"hovertemplate\":\"x: %{x}<br>y: %{y}<br>color: %{z}<extra></extra>\"},{\"coloraxis\":\"coloraxis\",\"name\":\"4\",\"z\":[[-14.017073,-14.045502,-13.821099,-14.039408,-14.061439,-13.973086,-13.950179,-13.840467],[-14.15493,-14.114534,-14.003726,-14.043842,-13.882369,-14.078369,-13.883451,-14.127655]],\"type\":\"heatmap\",\"xaxis\":\"x5\",\"yaxis\":\"y5\",\"hovertemplate\":\"x: %{x}<br>y: %{y}<br>color: %{z}<extra></extra>\"}],                        {\"template\":{\"data\":{\"histogram2dcontour\":[{\"type\":\"histogram2dcontour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"choropleth\":[{\"type\":\"choropleth\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"histogram2d\":[{\"type\":\"histogram2d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmap\":[{\"type\":\"heatmap\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmapgl\":[{\"type\":\"heatmapgl\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"contourcarpet\":[{\"type\":\"contourcarpet\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"contour\":[{\"type\":\"contour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"surface\":[{\"type\":\"surface\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"mesh3d\":[{\"type\":\"mesh3d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"scatter\":[{\"fillpattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2},\"type\":\"scatter\"}],\"parcoords\":[{\"type\":\"parcoords\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolargl\":[{\"type\":\"scatterpolargl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"bar\":[{\"error_x\":{\"color\":\"#2a3f5f\"},\"error_y\":{\"color\":\"#2a3f5f\"},\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"bar\"}],\"scattergeo\":[{\"type\":\"scattergeo\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolar\":[{\"type\":\"scatterpolar\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"histogram\":[{\"marker\":{\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"histogram\"}],\"scattergl\":[{\"type\":\"scattergl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatter3d\":[{\"type\":\"scatter3d\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattermapbox\":[{\"type\":\"scattermapbox\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterternary\":[{\"type\":\"scatterternary\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattercarpet\":[{\"type\":\"scattercarpet\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"carpet\":[{\"aaxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"baxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"type\":\"carpet\"}],\"table\":[{\"cells\":{\"fill\":{\"color\":\"#EBF0F8\"},\"line\":{\"color\":\"white\"}},\"header\":{\"fill\":{\"color\":\"#C8D4E3\"},\"line\":{\"color\":\"white\"}},\"type\":\"table\"}],\"barpolar\":[{\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"barpolar\"}],\"pie\":[{\"automargin\":true,\"type\":\"pie\"}]},\"layout\":{\"autotypenumbers\":\"strict\",\"colorway\":[\"#636efa\",\"#EF553B\",\"#00cc96\",\"#ab63fa\",\"#FFA15A\",\"#19d3f3\",\"#FF6692\",\"#B6E880\",\"#FF97FF\",\"#FECB52\"],\"font\":{\"color\":\"#2a3f5f\"},\"hovermode\":\"closest\",\"hoverlabel\":{\"align\":\"left\"},\"paper_bgcolor\":\"white\",\"plot_bgcolor\":\"#E5ECF6\",\"polar\":{\"bgcolor\":\"#E5ECF6\",\"angularaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"radialaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"ternary\":{\"bgcolor\":\"#E5ECF6\",\"aaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"baxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"caxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"coloraxis\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"colorscale\":{\"sequential\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"sequentialminus\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"diverging\":[[0,\"#8e0152\"],[0.1,\"#c51b7d\"],[0.2,\"#de77ae\"],[0.3,\"#f1b6da\"],[0.4,\"#fde0ef\"],[0.5,\"#f7f7f7\"],[0.6,\"#e6f5d0\"],[0.7,\"#b8e186\"],[0.8,\"#7fbc41\"],[0.9,\"#4d9221\"],[1,\"#276419\"]]},\"xaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"yaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"scene\":{\"xaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"yaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"zaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2}},\"shapedefaults\":{\"line\":{\"color\":\"#2a3f5f\"}},\"annotationdefaults\":{\"arrowcolor\":\"#2a3f5f\",\"arrowhead\":0,\"arrowwidth\":1},\"geo\":{\"bgcolor\":\"white\",\"landcolor\":\"#E5ECF6\",\"subunitcolor\":\"white\",\"showland\":true,\"showlakes\":true,\"lakecolor\":\"white\"},\"title\":{\"x\":0.05},\"mapbox\":{\"style\":\"light\"}}},\"xaxis\":{\"anchor\":\"y\",\"domain\":[0.0,0.18400000000000002],\"title\":{\"text\":\"Head\"}},\"yaxis\":{\"anchor\":\"x\",\"domain\":[0.0,1.0],\"autorange\":\"reversed\",\"title\":{\"text\":\"Layer\"}},\"xaxis2\":{\"anchor\":\"y2\",\"domain\":[0.20400000000000001,0.388],\"matches\":\"x\",\"title\":{\"text\":\"Head\"}},\"yaxis2\":{\"anchor\":\"x2\",\"domain\":[0.0,1.0],\"matches\":\"y\",\"showticklabels\":false},\"xaxis3\":{\"anchor\":\"y3\",\"domain\":[0.40800000000000003,0.5920000000000001],\"matches\":\"x\",\"title\":{\"text\":\"Head\"}},\"yaxis3\":{\"anchor\":\"x3\",\"domain\":[0.0,1.0],\"matches\":\"y\",\"showticklabels\":false},\"xaxis4\":{\"anchor\":\"y4\",\"domain\":[0.6120000000000001,0.7960000000000002],\"matches\":\"x\",\"title\":{\"text\":\"Head\"}},\"yaxis4\":{\"anchor\":\"x4\",\"domain\":[0.0,1.0],\"matches\":\"y\",\"showticklabels\":false},\"xaxis5\":{\"anchor\":\"y5\",\"domain\":[0.8160000000000001,1.0],\"matches\":\"x\",\"title\":{\"text\":\"Head\"}},\"yaxis5\":{\"anchor\":\"x5\",\"domain\":[0.0,1.0],\"matches\":\"y\",\"showticklabels\":false},\"annotations\":[{\"font\":{},\"showarrow\":false,\"text\":\"Output\",\"x\":0.09200000000000001,\"xanchor\":\"center\",\"xref\":\"paper\",\"y\":1.0,\"yanchor\":\"bottom\",\"yref\":\"paper\"},{\"font\":{},\"showarrow\":false,\"text\":\"Query\",\"x\":0.29600000000000004,\"xanchor\":\"center\",\"xref\":\"paper\",\"y\":1.0,\"yanchor\":\"bottom\",\"yref\":\"paper\"},{\"font\":{},\"showarrow\":false,\"text\":\"Key\",\"x\":0.5,\"xanchor\":\"center\",\"xref\":\"paper\",\"y\":1.0,\"yanchor\":\"bottom\",\"yref\":\"paper\"},{\"font\":{},\"showarrow\":false,\"text\":\"Value\",\"x\":0.7040000000000002,\"xanchor\":\"center\",\"xref\":\"paper\",\"y\":1.0,\"yanchor\":\"bottom\",\"yref\":\"paper\"},{\"font\":{},\"showarrow\":false,\"text\":\"Pattern\",\"x\":0.908,\"xanchor\":\"center\",\"xref\":\"paper\",\"y\":1.0,\"yanchor\":\"bottom\",\"yref\":\"paper\"}],\"coloraxis\":{\"colorscale\":[[0.0,\"rgb(103,0,31)\"],[0.1,\"rgb(178,24,43)\"],[0.2,\"rgb(214,96,77)\"],[0.3,\"rgb(244,165,130)\"],[0.4,\"rgb(253,219,199)\"],[0.5,\"rgb(247,247,247)\"],[0.6,\"rgb(209,229,240)\"],[0.7,\"rgb(146,197,222)\"],[0.8,\"rgb(67,147,195)\"],[0.9,\"rgb(33,102,172)\"],[1.0,\"rgb(5,48,97)\"]],\"cmid\":0.0,\"cmin\":-14.15493,\"cmax\":-2.2928695678710938},\"title\":{\"text\":\"Activation Patching Per Head on BB (All Pos)\"}},                        {\"responsive\": true}                    ).then(function(){\n","                            \n","var gd = document.getElementById('e38707d1-1a25-4556-bd49-0a85c6bea363');\n","var x = new MutationObserver(function (mutations, observer) {{\n","        var display = window.getComputedStyle(gd).display;\n","        if (!display || display === 'none') {{\n","            console.log([gd, 'removed!']);\n","            Plotly.purge(gd);\n","            observer.disconnect();\n","        }}\n","}});\n","\n","// Listen for the removal of the full notebook cells\n","var notebookContainer = gd.closest('#notebook-container');\n","if (notebookContainer) {{\n","    x.observe(notebookContainer, {childList: true});\n","}}\n","\n","// Listen for the clearing of the current output cell\n","var outputEl = gd.closest('.output');\n","if (outputEl) {{\n","    x.observe(outputEl, {childList: true});\n","}}\n","\n","                        })                };                            </script>        </div>\n","</body>\n","</html>"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"a7d2ec5fe6714deea0d72a26994a24ef","version_major":2,"version_minor":0},"text/plain":["  0%|          | 0/16 [00:00<?, ?it/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"c131036c69bd4f1e962670df29c4bfaf","version_major":2,"version_minor":0},"text/plain":["  0%|          | 0/16 [00:00<?, ?it/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"d2bdafd5cbce446492320082a5f7821e","version_major":2,"version_minor":0},"text/plain":["  0%|          | 0/16 [00:00<?, ?it/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"c4d1c5cf6c134f68a71c5bee920adac0","version_major":2,"version_minor":0},"text/plain":["  0%|          | 0/16 [00:00<?, ?it/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"cf6e5d5937d646c3abf8f9610dc81609","version_major":2,"version_minor":0},"text/plain":["  0%|          | 0/16 [00:00<?, ?it/s]"]},"metadata":{},"output_type":"display_data"},{"data":{"text/html":["<html>\n","<head><meta charset=\"utf-8\" /></head>\n","<body>\n","    <div>            <script src=\"https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js?config=TeX-AMS-MML_SVG\"></script><script type=\"text/javascript\">if (window.MathJax && window.MathJax.Hub && window.MathJax.Hub.Config) {window.MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}</script>                <script type=\"text/javascript\">window.PlotlyConfig = {MathJaxConfig: 'local'};</script>\n","        <script src=\"https://cdn.plot.ly/plotly-2.16.1.min.js\"></script>                <div id=\"2239ccf3-e07f-448b-b577-4134bae4700c\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div>            <script type=\"text/javascript\">                                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById(\"2239ccf3-e07f-448b-b577-4134bae4700c\")) {                    Plotly.newPlot(                        \"2239ccf3-e07f-448b-b577-4134bae4700c\",                        [{\"coloraxis\":\"coloraxis\",\"name\":\"0\",\"z\":[[-12.822885,-12.811085,-12.8884325,-12.81087,-12.889652,-12.878188,-13.202382,-12.906532],[-13.182879,-12.987521,-12.7554655,-12.92344,-12.589047,-12.970565,-3.6010063,-11.286802]],\"type\":\"heatmap\",\"xaxis\":\"x\",\"yaxis\":\"y\",\"hovertemplate\":\"x: %{x}<br>y: %{y}<br>color: %{z}<extra></extra>\"},{\"coloraxis\":\"coloraxis\",\"name\":\"1\",\"z\":[[-12.968789,-12.844384,-12.912667,-12.981978,-12.909443,-12.878584,-12.927259,-12.348582],[-13.075386,-12.829196,-12.817758,-12.933551,-12.723004,-12.950664,-13.066957,-12.905731]],\"type\":\"heatmap\",\"xaxis\":\"x2\",\"yaxis\":\"y2\",\"hovertemplate\":\"x: %{x}<br>y: %{y}<br>color: %{z}<extra></extra>\"},{\"coloraxis\":\"coloraxis\",\"name\":\"2\",\"z\":[[-12.820385,-12.9005785,-12.893993,-12.877544,-12.880168,-12.988029,-13.202785,-13.129946],[-13.170372,-13.053295,-12.780892,-13.032372,-12.68076,-12.986087,-12.676176,-12.962296]],\"type\":\"heatmap\",\"xaxis\":\"x3\",\"yaxis\":\"y3\",\"hovertemplate\":\"x: %{x}<br>y: %{y}<br>color: %{z}<extra></extra>\"},{\"coloraxis\":\"coloraxis\",\"name\":\"3\",\"z\":[[-12.969026,-13.095675,-12.719384,-12.916345,-13.048266,-12.982166,-13.106823,-12.325087],[-13.058377,-12.824394,-12.8636265,-13.143163,-12.616763,-13.001718,-13.132352,-13.121058]],\"type\":\"heatmap\",\"xaxis\":\"x4\",\"yaxis\":\"y4\",\"hovertemplate\":\"x: %{x}<br>y: %{y}<br>color: %{z}<extra></extra>\"},{\"coloraxis\":\"coloraxis\",\"name\":\"4\",\"z\":[[-12.964717,-12.970182,-12.920894,-13.007302,-12.984122,-12.876909,-13.061056,-12.948068],[-13.097929,-13.012596,-13.003614,-13.021066,-12.936759,-12.980106,-13.2769785,-13.026427]],\"type\":\"heatmap\",\"xaxis\":\"x5\",\"yaxis\":\"y5\",\"hovertemplate\":\"x: %{x}<br>y: %{y}<br>color: %{z}<extra></extra>\"}],                        {\"template\":{\"data\":{\"histogram2dcontour\":[{\"type\":\"histogram2dcontour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"choropleth\":[{\"type\":\"choropleth\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"histogram2d\":[{\"type\":\"histogram2d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmap\":[{\"type\":\"heatmap\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmapgl\":[{\"type\":\"heatmapgl\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"contourcarpet\":[{\"type\":\"contourcarpet\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"contour\":[{\"type\":\"contour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"surface\":[{\"type\":\"surface\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"mesh3d\":[{\"type\":\"mesh3d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"scatter\":[{\"fillpattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2},\"type\":\"scatter\"}],\"parcoords\":[{\"type\":\"parcoords\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolargl\":[{\"type\":\"scatterpolargl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"bar\":[{\"error_x\":{\"color\":\"#2a3f5f\"},\"error_y\":{\"color\":\"#2a3f5f\"},\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"bar\"}],\"scattergeo\":[{\"type\":\"scattergeo\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolar\":[{\"type\":\"scatterpolar\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"histogram\":[{\"marker\":{\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"histogram\"}],\"scattergl\":[{\"type\":\"scattergl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatter3d\":[{\"type\":\"scatter3d\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattermapbox\":[{\"type\":\"scattermapbox\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterternary\":[{\"type\":\"scatterternary\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattercarpet\":[{\"type\":\"scattercarpet\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"carpet\":[{\"aaxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"baxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"type\":\"carpet\"}],\"table\":[{\"cells\":{\"fill\":{\"color\":\"#EBF0F8\"},\"line\":{\"color\":\"white\"}},\"header\":{\"fill\":{\"color\":\"#C8D4E3\"},\"line\":{\"color\":\"white\"}},\"type\":\"table\"}],\"barpolar\":[{\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"barpolar\"}],\"pie\":[{\"automargin\":true,\"type\":\"pie\"}]},\"layout\":{\"autotypenumbers\":\"strict\",\"colorway\":[\"#636efa\",\"#EF553B\",\"#00cc96\",\"#ab63fa\",\"#FFA15A\",\"#19d3f3\",\"#FF6692\",\"#B6E880\",\"#FF97FF\",\"#FECB52\"],\"font\":{\"color\":\"#2a3f5f\"},\"hovermode\":\"closest\",\"hoverlabel\":{\"align\":\"left\"},\"paper_bgcolor\":\"white\",\"plot_bgcolor\":\"#E5ECF6\",\"polar\":{\"bgcolor\":\"#E5ECF6\",\"angularaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"radialaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"ternary\":{\"bgcolor\":\"#E5ECF6\",\"aaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"baxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"caxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"coloraxis\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"colorscale\":{\"sequential\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"sequentialminus\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"diverging\":[[0,\"#8e0152\"],[0.1,\"#c51b7d\"],[0.2,\"#de77ae\"],[0.3,\"#f1b6da\"],[0.4,\"#fde0ef\"],[0.5,\"#f7f7f7\"],[0.6,\"#e6f5d0\"],[0.7,\"#b8e186\"],[0.8,\"#7fbc41\"],[0.9,\"#4d9221\"],[1,\"#276419\"]]},\"xaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"yaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"scene\":{\"xaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"yaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"zaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2}},\"shapedefaults\":{\"line\":{\"color\":\"#2a3f5f\"}},\"annotationdefaults\":{\"arrowcolor\":\"#2a3f5f\",\"arrowhead\":0,\"arrowwidth\":1},\"geo\":{\"bgcolor\":\"white\",\"landcolor\":\"#E5ECF6\",\"subunitcolor\":\"white\",\"showland\":true,\"showlakes\":true,\"lakecolor\":\"white\"},\"title\":{\"x\":0.05},\"mapbox\":{\"style\":\"light\"}}},\"xaxis\":{\"anchor\":\"y\",\"domain\":[0.0,0.18400000000000002],\"title\":{\"text\":\"Head\"}},\"yaxis\":{\"anchor\":\"x\",\"domain\":[0.0,1.0],\"autorange\":\"reversed\",\"title\":{\"text\":\"Layer\"}},\"xaxis2\":{\"anchor\":\"y2\",\"domain\":[0.20400000000000001,0.388],\"matches\":\"x\",\"title\":{\"text\":\"Head\"}},\"yaxis2\":{\"anchor\":\"x2\",\"domain\":[0.0,1.0],\"matches\":\"y\",\"showticklabels\":false},\"xaxis3\":{\"anchor\":\"y3\",\"domain\":[0.40800000000000003,0.5920000000000001],\"matches\":\"x\",\"title\":{\"text\":\"Head\"}},\"yaxis3\":{\"anchor\":\"x3\",\"domain\":[0.0,1.0],\"matches\":\"y\",\"showticklabels\":false},\"xaxis4\":{\"anchor\":\"y4\",\"domain\":[0.6120000000000001,0.7960000000000002],\"matches\":\"x\",\"title\":{\"text\":\"Head\"}},\"yaxis4\":{\"anchor\":\"x4\",\"domain\":[0.0,1.0],\"matches\":\"y\",\"showticklabels\":false},\"xaxis5\":{\"anchor\":\"y5\",\"domain\":[0.8160000000000001,1.0],\"matches\":\"x\",\"title\":{\"text\":\"Head\"}},\"yaxis5\":{\"anchor\":\"x5\",\"domain\":[0.0,1.0],\"matches\":\"y\",\"showticklabels\":false},\"annotations\":[{\"font\":{},\"showarrow\":false,\"text\":\"Output\",\"x\":0.09200000000000001,\"xanchor\":\"center\",\"xref\":\"paper\",\"y\":1.0,\"yanchor\":\"bottom\",\"yref\":\"paper\"},{\"font\":{},\"showarrow\":false,\"text\":\"Query\",\"x\":0.29600000000000004,\"xanchor\":\"center\",\"xref\":\"paper\",\"y\":1.0,\"yanchor\":\"bottom\",\"yref\":\"paper\"},{\"font\":{},\"showarrow\":false,\"text\":\"Key\",\"x\":0.5,\"xanchor\":\"center\",\"xref\":\"paper\",\"y\":1.0,\"yanchor\":\"bottom\",\"yref\":\"paper\"},{\"font\":{},\"showarrow\":false,\"text\":\"Value\",\"x\":0.7040000000000002,\"xanchor\":\"center\",\"xref\":\"paper\",\"y\":1.0,\"yanchor\":\"bottom\",\"yref\":\"paper\"},{\"font\":{},\"showarrow\":false,\"text\":\"Pattern\",\"x\":0.908,\"xanchor\":\"center\",\"xref\":\"paper\",\"y\":1.0,\"yanchor\":\"bottom\",\"yref\":\"paper\"}],\"coloraxis\":{\"colorscale\":[[0.0,\"rgb(103,0,31)\"],[0.1,\"rgb(178,24,43)\"],[0.2,\"rgb(214,96,77)\"],[0.3,\"rgb(244,165,130)\"],[0.4,\"rgb(253,219,199)\"],[0.5,\"rgb(247,247,247)\"],[0.6,\"rgb(209,229,240)\"],[0.7,\"rgb(146,197,222)\"],[0.8,\"rgb(67,147,195)\"],[0.9,\"rgb(33,102,172)\"],[1.0,\"rgb(5,48,97)\"]],\"cmid\":0.0,\"cmin\":-13.2769785,\"cmax\":-2.2928695678710938},\"title\":{\"text\":\"Activation Patching Per Head on BC (All Pos)\"}},                        {\"responsive\": true}                    ).then(function(){\n","                            \n","var gd = document.getElementById('2239ccf3-e07f-448b-b577-4134bae4700c');\n","var x = new MutationObserver(function (mutations, observer) {{\n","        var display = window.getComputedStyle(gd).display;\n","        if (!display || display === 'none') {{\n","            console.log([gd, 'removed!']);\n","            Plotly.purge(gd);\n","            observer.disconnect();\n","        }}\n","}});\n","\n","// Listen for the removal of the full notebook cells\n","var notebookContainer = gd.closest('#notebook-container');\n","if (notebookContainer) {{\n","    x.observe(notebookContainer, {childList: true});\n","}}\n","\n","// Listen for the clearing of the current output cell\n","var outputEl = gd.closest('.output');\n","if (outputEl) {{\n","    x.observe(outputEl, {childList: true});\n","}}\n","\n","                        })                };                            </script>        </div>\n","</body>\n","</html>"]},"metadata":{},"output_type":"display_data"}],"source":["every_head_all_pos_act_patch_result = patching.get_act_patch_attn_head_all_pos_every(attn_only, corrupted_tokens_induction_BA, clean_cache_induction, induction_loss)\n","imshow(every_head_all_pos_act_patch_result, facet_col=0, facet_labels=[\"Output\", \"Query\", \"Key\", \"Value\", \"Pattern\"], title=\"Activation Patching Per Head on BA (All Pos)\", xaxis=\"Head\", yaxis=\"Layer\", zmax=CLEAN_BASELINE_INDUCTION)\n","every_head_all_pos_act_patch_result = patching.get_act_patch_attn_head_all_pos_every(attn_only, corrupted_tokens_induction_BB, clean_cache_induction, induction_loss)\n","imshow(every_head_all_pos_act_patch_result, facet_col=0, facet_labels=[\"Output\", \"Query\", \"Key\", \"Value\", \"Pattern\"], title=\"Activation Patching Per Head on BB (All Pos)\", xaxis=\"Head\", yaxis=\"Layer\", zmax=CLEAN_BASELINE_INDUCTION)\n","every_head_all_pos_act_patch_result = patching.get_act_patch_attn_head_all_pos_every(attn_only, corrupted_tokens_induction_BC, clean_cache_induction, induction_loss)\n","imshow(every_head_all_pos_act_patch_result, facet_col=0, facet_labels=[\"Output\", \"Query\", \"Key\", \"Value\", \"Pattern\"], title=\"Activation Patching Per Head on BC (All Pos)\", xaxis=\"Head\", yaxis=\"Layer\", zmax=CLEAN_BASELINE_INDUCTION)"]}],"metadata":{"kernelspec":{"display_name":"base","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.7.13"},"orig_nbformat":4,"vscode":{"interpreter":{"hash":"d4d1e4263499bec80672ea0156c357c1ee493ec2b1c70f0acce89fc37c4a6abe"}}},"nbformat":4,"nbformat_minor":2}