-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathrun_calculations_QA2.py
62 lines (55 loc) · 3.16 KB
/
run_calculations_QA2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import numpy as np
import SVM_scripts as svm
import sklearn as skl
import skopt
import skopt.space as sks
from tqdm import tqdm
import pickle
# data_name = 'pgp-broccatelli'
data_name = 'bbb-martins'
# data_name = 'ncats-solubility'
run_name = 'QA'
file_name = 'Polaris/' + data_name + '.parquet'
matrix, matrix_labels, names = svm.read_parquet(file_name)
slice_sizes = [int(177/n) for n in [4,3,2,1]]
limits = [34,26,18,12]
N = 18
with open('results/slice_study_' + data_name + '_' + run_name + '.pkl', 'rb') as file:
slice_study_results = pickle.load(file)
start = len(slice_study_results['param_grids'])
data_sets, test_values_list, best_params, param_tables, cv_results, search_spaces, = slice_study_results['data_sets'], slice_study_results['test_values'], slice_study_results['best_params'], slice_study_results['param_tables'], slice_study_results['cv_results'], slice_study_results['param_grids']
# start = 0
# test_values_list, best_params, param_tables, cv_results = ({slice_size: [] for slice_size in slice_sizes} for _ in range(4))
# data_sets, search_spaces = [], []
for i in tqdm(range(start,N)):
vectors, labels, test_vectors, test_labels = svm.prepare_data_sets(matrix, matrix_labels, train_percentage=50, positive_negative_ratio=None, max_train_size=None, min_train_size=None, seed=i, normalize_data=True, print_info=True)
default_gamma = 1 / (vectors.shape[1] * vectors.var())
search_space = {
'base': sks.Categorical([2,10]),
'num_encoding': sks.Integer(1,4),
'kernel': sks.Real(0.05 * default_gamma, 2 * default_gamma, prior='log-uniform'),
'penalty': sks.Real(0,6),
}
search_spaces += [search_space]
data_sets += [(vectors, labels, test_vectors, test_labels)]
inner_estimator = svm.qSVM_estimator(solver=('clique', 250), adjust_bias=False)
for s, slice_size in enumerate(slice_sizes):
print('Slice size: ', slice_size)
estimator = svm.slice_estimator(estimator=inner_estimator, slice_size=slice_size, force_unbiased=True, adjust_outer_bias=True, seed=0)
limit = limits[s]
f, param_table, opt, optimal_params = svm.hyperparameter_optimization(estimator=estimator, search_space=search_space, vectors=vectors, labels=labels, folds=4, mode='bayes', limit=limit, filter=True, print_info='q', seed=0)
# save results:
test_values = f(test_vectors)
test_values_list[slice_size] += [test_values]
param_tables[slice_size] += [param_table]
best_params[slice_size] += [optimal_params]
cv_results[slice_size] += [opt.cv_results_]
slice_study_results['data_sets'] = data_sets
slice_study_results['test_values'] = test_values_list
slice_study_results['best_params'] = best_params
slice_study_results['param_tables'] = param_tables
slice_study_results['cv_results'] = cv_results
slice_study_results['param_grids'] = search_spaces
with open('results/slice_study_' + data_name + '_' + run_name + '.pkl','wb') as f:
pickle.dump(slice_study_results, f)
# skopt.dump(opt, 'results/opts/' + data_name + '_' + run_name + '_N' + str(i) + '_slice_size' + str(slice_size) + '.pkl', store_objective=True)