-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathrun_experiments.sh
executable file
·66 lines (54 loc) · 3.08 KB
/
run_experiments.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#!/bin/bash
# Input and output CSV files
input_csv="experiments.csv"
time=$(date +"%Y-%m-%d_%H-%M-%S")
dir="experiment_results/$time"
mkdir -p $dir
output_csv="$dir/results.csv"
prefill_time_file="prefill_time.txt"
decode_time_file="decode_time.txt"
total_time_file="total_time.txt"
layer_times_file="layer_times.txt"
num_repeats=1
# Read the header and write it to the output CSV
header=$(head -n 1 "$input_csv")
echo "$header" > "$output_csv"
# Process the CSV file line by line, skipping the header
tail -n +2 "$input_csv" | tr -d '\r' | while IFS=, read -r model_id saved_model_path context_length num_generated_tokens mode num_groups num_threads core batch_size enable_custom_attention use_custom_k use_custom_v k_pruning_percentage v_pruning_percentage; do
for ((i=1; i<=$num_repeats; i++)); do
rm -f $prefill_time_file
rm -f $decode_time_file
if [ "$mode" = "avx_sparse" ]; then
sed -i '/# CHANGE BELOW FOR GROUP SIZE/!b;n;cNUM_GROUPS = '"$num_groups" layer/avx_sparse_linear.py
sed -i '/\/\/ CHANGE BELOW FOR GROUP SIZE/!b;n;c#define NUM_COL_GROUPS '"$num_groups" csrc/avx_sparse_linear.cpp
python setup.py install
fi
# construct new variable core_vals = "0-($core-1)"
core_vals="0-$(($core-1))"
enable_custom_attention_str=""
if [ "$enable_custom_attention" = "True" ]; then
enable_custom_attention_str="--enable_custom_attention"
fi
use_custom_k_str=""
if [ "$use_custom_k" = "True" ]; then
use_custom_k_str="--use_custom_k"
fi
use_custom_v_str=""
if [ "$use_custom_v" = "True" ]; then
use_custom_v_str="--use_custom_v"
fi
# Run the experiment
numactl --cpunodebind 0 --membind 0 --physcpubind=$core_vals python llm_pipeline.py --model_id $model_id --saved_model_path $saved_model_path --context_length $context_length --num_generated_tokens $num_generated_tokens --mode $mode --num_threads $num_threads --batch_size $batch_size $enable_custom_attention_str $use_custom_k_str $use_custom_v_str --k_pruning $k_pruning_percentage --v_pruning $v_pruning_percentage
# Read the result from the result file
time_prefill=$(<$prefill_time_file)
time_decode=$(<$decode_time_file)
total_time=$(<$total_time_file)
# layer_times=$(<$layer_times_file)
# Construct the new CSV line with the result
new_line="$model_id,$saved_model_path,$context_length,$num_generated_tokens,$mode,$num_groups,$num_threads,$core,$batch_size,$enable_custom_attention,$use_custom_k,$use_custom_v,$k_pruning_percentage,$v_pruning_percentage,$time_prefill,$time_decode,$total_time"
# new_line="$model_id,$saved_model_path,$context_length,$num_generated_tokens,$mode,$num_groups,$num_threads,$core,$batch_size,$enable_custom_attention,$use_custom_k,$use_custom_v,$k_pruning_percentage,$v_pruning_percentage,$time_prefill,$time_decode,$total_time,$layer_times"
# new_line="$model_id,$saved_model_path,$context_length,$num_generated_tokens,$mode,$num_groups,$num_threads,$core,$batch_size,$time_prefill,$time_decode"
# Append the new line to the output CSV
echo "$new_line" >> "$output_csv"
done
done