From d90bedf36dc26d37c9d412049d271665c440d06f Mon Sep 17 00:00:00 2001 From: Evangelos Lamprou Date: Tue, 14 Jan 2025 17:30:36 -0500 Subject: [PATCH] Add embedding plot Signed-off-by: Evangelos Lamprou --- infrastructure/colossal_table.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/infrastructure/colossal_table.py b/infrastructure/colossal_table.py index 3637c7ee..7a41bc22 100644 --- a/infrastructure/colossal_table.py +++ b/infrastructure/colossal_table.py @@ -5,6 +5,7 @@ import viz.syntax as stx import viz.dynamic as dyn import sys +import ast from all_scripts import get_all_scripts, benchmark_rename_map from project_root import get_project_root @@ -239,8 +240,11 @@ def main(): .merge(loc_data_script, on='script')\ .merge(syntax_script_all_cmds[['script', 'unique_cmds']], on='script') - perform_pca_and_plot(big_bench) - exit(0) + embedding_df = pd.read_csv(root / 'infrastructure/data/embeddings.csv') + embedding_df['embedding'] = embedding_df['embedding'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x) + # Embedding is a list of numbers, turn them into columns + embedding_df = pd.concat([embedding_df['benchmark'], embedding_df['embedding'].apply(pd.Series)], axis=1) + perform_pca_and_plot(big_bench, embedding_df, 'dual_analysis') # Calculate summary statistics agg_order = ['min', 'max', 'mean']