Skip to content

Commit

Permalink
Merge pull request #110 from SumanthRH/update_transformers
Browse files Browse the repository at this point in the history
update requirements and add float16 support
  • Loading branch information
jalammar authored Jan 24, 2024
2 parents bdf19fe + cbe80fe commit 296a396
Show file tree
Hide file tree
Showing 6 changed files with 52 additions and 32 deletions.
22 changes: 11 additions & 11 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
matplotlib~=3.3.1
numpy~=1.19.1
ipython~=7.16.1
scikit-learn~=0.24.2
seaborn~=0.11.0
transformers~=4.6.1
pytest~=6.1.2
setuptools~=49.6.0
torch~=1.9.0
PyYAML==5.4.1
captum==0.4.1
matplotlib==3.8.2
numpy==1.26.2
ipython==8.18.1
scikit-learn==1.3.2
seaborn==0.13.0
transformers==4.36.2
pytest==7.4.3
setuptools==68.2.2
torch==2.1.1
PyYAML==6.0.1
captum==0.6.0
10 changes: 5 additions & 5 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,11 +65,11 @@ def read(*names, **kwargs):
],
python_requires='!=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*',
install_requires=[
"transformers ~= 4.2",
"seaborn ~= 0.11",
"scikit-learn~=0.23",
"PyYAML~=5.4",
"captum ~= 0.4"
"transformers ~= 4.36",
"seaborn ~= 0.13",
"scikit-learn ~= 1.3",
"PyYAML ~= 6.0",
"captum ~= 0.6"
],
extras_require={
"dev": [
Expand Down
5 changes: 3 additions & 2 deletions src/ecco/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ def from_pretrained(hf_model_id: str,
hidden_states: Optional[bool] = True,
activations_layer_nums: Optional[List[int]] = None,
verbose: Optional[bool] = True,
gpu: Optional[bool] = True
gpu: Optional[bool] = True,
**model_kwargs: Dict[str, Any]
):
"""
Constructs a [LM][ecco.lm.LM] object based on a string identifier from HuggingFace Transformers. This is
Expand Down Expand Up @@ -80,7 +81,7 @@ def from_pretrained(hf_model_id: str,
else:
model_cls = AutoModel

model = model_cls.from_pretrained(hf_model_id, output_hidden_states=hidden_states, output_attentions=attention)
model = model_cls.from_pretrained(hf_model_id, output_hidden_states=hidden_states, output_attentions=attention, **model_kwargs)

lm_kwargs = {
'model_name': hf_model_id,
Expand Down
18 changes: 8 additions & 10 deletions src/ecco/lm.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,10 +67,6 @@ def __init__(self,
if torch.cuda.is_available() and gpu:
self.model = model.to('cuda')

self.device = 'cuda' if torch.cuda.is_available() \
and self.model.device.type == 'cuda' \
else 'cpu'

self.tokenizer = tokenizer
self.verbose = verbose
self._path = os.path.dirname(ecco.__file__)
Expand Down Expand Up @@ -104,6 +100,10 @@ def __init__(self,
# we're running it before every d.HTML cell
# d.display(d.HTML(filename=os.path.join(self._path, "html", "setup.html")))

@property
def device(self):
return self.model.device

def _reset(self):
self._all_activations_dict = defaultdict(dict)
self.activations = defaultdict(dict)
Expand All @@ -114,9 +114,7 @@ def _reset(self):
self._hooks = {}

def to(self, tensor: Union[torch.Tensor, BatchEncoding]):
if self.device == 'cuda':
return tensor.to('cuda')
return tensor
return tensor.to(self.device)

def _analyze_token(self,
encoder_input_embeds: torch.Tensor,
Expand All @@ -143,7 +141,7 @@ def _analyze_token(self,
'decoder_inputs_embeds': decoder_input_embeds
},
prediction_id=prediction_id
).cpu().detach().numpy()
).float().cpu().detach().numpy() # cast to float32 before numpy conversion
)

def generate(self, input_str: str,
Expand Down Expand Up @@ -521,7 +519,7 @@ def _get_embeddings(self, input_ids) -> Tuple[torch.FloatTensor, torch.FloatTens

vocab_size = embedding_matrix.shape[0]

one_hot_tensor = self.to(_one_hot_batched(input_ids, vocab_size))
one_hot_tensor = self.to(_one_hot_batched(input_ids, vocab_size)).to(self.model.dtype)
token_ids_tensor_one_hot = one_hot_tensor.clone().requires_grad_(True)

inputs_embeds = torch.matmul(token_ids_tensor_one_hot, embedding_matrix)
Expand Down Expand Up @@ -593,7 +591,7 @@ def _get_activations_hook(self, name: str, input_):
# overwrite the previous step activations. This collects all activations in the last step
# Assuming all input tokens are presented as input, no "past"
# The inputs to c_proj already pass through the gelu activation function
self._all_activations_dict[layer_type][layer_number] = input_[0].detach().cpu().numpy()
self._all_activations_dict[layer_type][layer_number] = input_[0].detach().float().cpu().numpy()

def _inhibit_neurons_hook(self, name: str, input_tensor):
"""
Expand Down
23 changes: 23 additions & 0 deletions src/ecco/model-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -342,3 +342,26 @@ EleutherAI/gpt-neo-2.7B:
- 'mlp\.c_proj'
token_prefix: ' '
partial_token_prefix: ''

# Llama
openlm-research/open_llama_3b:
embedding: "model.embed_tokens"
type: 'causal'
activations:
- 'mlp\.up_proj' #This is a regex
token_prefix: ''
partial_token_prefix: ''
meta-llama/Llama-2-7b:
embedding: "model.embed_tokens"
type: 'causal'
activations:
- 'mlp\.up_proj' #This is a regex
token_prefix: ''
partial_token_prefix: ''
meta-llama/Llama-2-13b:
embedding: "model.embed_tokens"
type: 'causal'
activations:
- 'mlp\.up_proj' #This is a regex
token_prefix: ''
partial_token_prefix: ''
6 changes: 2 additions & 4 deletions src/ecco/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,9 +112,7 @@ def __str__(self):
return "<LMOutput '{}' # of lm outputs: {}>".format(self.output_text, len(self._get_hidden_states()[1][-1]))

def to(self, tensor: torch.Tensor):
if self.device == 'cuda':
return tensor.to('cuda')
return tensor
return tensor.to(self.device)

def explorable(self, printJson: Optional[bool] = False):

Expand Down Expand Up @@ -394,7 +392,7 @@ def layer_predictions(self, position: int = 1, topk: Optional[int] = 10, layer:

layer_top_tokens = [self.tokenizer.decode(t) for t in sorted_softmax[-k:]][::-1]
top_tokens.append(layer_top_tokens)
layer_probs = softmax[sorted_softmax[-k:]].cpu().detach().numpy()[::-1]
layer_probs = softmax[sorted_softmax[-k:]].float().cpu().detach().numpy()[::-1]
probs.append(layer_probs.tolist())

# Package in output format
Expand Down

0 comments on commit 296a396

Please sign in to comment.