Skip to content

Commit

Permalink
Add embedding plot
Browse files Browse the repository at this point in the history
Signed-off-by: Evangelos Lamprou <[email protected]>
  • Loading branch information
vagos committed Jan 14, 2025
1 parent 690e0fb commit d90bedf
Showing 1 changed file with 6 additions and 2 deletions.
8 changes: 6 additions & 2 deletions infrastructure/colossal_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import viz.syntax as stx
import viz.dynamic as dyn
import sys
import ast

from all_scripts import get_all_scripts, benchmark_rename_map
from project_root import get_project_root
Expand Down Expand Up @@ -239,8 +240,11 @@ def main():
.merge(loc_data_script, on='script')\
.merge(syntax_script_all_cmds[['script', 'unique_cmds']], on='script')

perform_pca_and_plot(big_bench)
exit(0)
embedding_df = pd.read_csv(root / 'infrastructure/data/embeddings.csv')
embedding_df['embedding'] = embedding_df['embedding'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)
# Embedding is a list of numbers, turn them into columns
embedding_df = pd.concat([embedding_df['benchmark'], embedding_df['embedding'].apply(pd.Series)], axis=1)
perform_pca_and_plot(big_bench, embedding_df, 'dual_analysis')

# Calculate summary statistics
agg_order = ['min', 'max', 'mean']
Expand Down

0 comments on commit d90bedf

Please sign in to comment.