-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathrun_calculations_SA.py
71 lines (63 loc) · 3.27 KB
/
run_calculations_SA.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import numpy as np
import SVM_scripts as svm
import sklearn as skl
import skopt
import skopt.space as sks
from tqdm import tqdm
import pickle
data_name = 'pgp-broccatelli'
# data_name = 'bbb-martins'
# data_name = 'ncats-solubility'
run_name = 'SA'
file_name = 'Polaris/' + data_name + '.parquet'
matrix, matrix_labels, names = svm.read_parquet(file_name)
slice_sizes = [int(177/n) for n in [3,2,1]] # + [195,230,265,300] + [None]
limit = 34
N = 18
try:
with open('results/slice_study_' + data_name + '_' + run_name + '.pkl', 'rb') as file:
slice_study_results = pickle.load(file)
except FileNotFoundError:
slice_study_results = {}
slice_keys = ['test_values', 'best_params', 'param_tables', 'cv_results']
keys = ['data_sets', 'param_grids']
for key in slice_keys + keys:
if key not in slice_study_results:
if key in slice_keys:
slice_study_results[key] = {slice_size:[] for slice_size in slice_sizes}
else:
slice_study_results[key] = []
elif key in slice_keys:
for slice_size in slice_sizes:
if slice_size not in slice_study_results[key]:
slice_study_results[key][slice_size] = []
start = len(slice_study_results['param_grids'])
for i in tqdm(range(start,N)):
vectors, labels, test_vectors, test_labels = svm.prepare_data_sets(matrix, matrix_labels, train_percentage=50, positive_negative_ratio=None, max_train_size=None, min_train_size=None, seed=i, normalize_data=True, print_info=True)
default_gamma = 1 / (vectors.shape[1] * vectors.var())
search_space = {
'base': sks.Categorical([2,10]),
'num_encoding': sks.Integer(1,4),
'kernel': sks.Real(0.05 * default_gamma, 2 * default_gamma, prior='log-uniform'),
'penalty': sks.Real(0,6),
}
slice_study_results['param_grids'] += [search_space]
slice_study_results['data_sets'] += [(vectors, labels, test_vectors, test_labels)]
inner_estimator = svm.qSVM_estimator(solver=('SA', 250), adjust_bias=False)
for s, slice_size in enumerate(slice_sizes):
print('Slice size: ', slice_size)
estimator = svm.slice_estimator(estimator=inner_estimator, slice_size=slice_size, force_unbiased=bool(data_name == 'bbb-martins'), adjust_outer_bias=True, seed=0)
try:
f, param_table, opt, optimal_params = svm.hyperparameter_optimization(estimator=estimator, search_space=search_space, vectors=vectors, labels=labels, folds=4, mode='bayes', limit=limit, filter=None, print_info='q', seed=0)
test_values = f(test_vectors)
cv_results = opt.cv_results_
except ValueError:
print('Run failed, putting None')
test_values, optimal_params, param_table, cv_results = [None] * 4
slice_study_results['test_values'][slice_size] += [test_values]
slice_study_results['best_params'][slice_size] += [optimal_params]
slice_study_results['param_tables'][slice_size] += [param_table]
slice_study_results['cv_results'][slice_size] += [cv_results]
with open('results/slice_study_' + data_name + '_' + run_name + '.pkl','wb') as f:
pickle.dump(slice_study_results, f)
# skopt.dump(opt, 'results/opts/' + data_name + '_' + run_name + '_N' + str(i) + '_slice_size' + str(slice_size) + '.pkl', store_objective=True)