-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathextract_classifier_scores.py
84 lines (70 loc) · 2.89 KB
/
extract_classifier_scores.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
from utils import common as cm
from utils import dataset as ds
from utils import classifier as clf
from utils import problem as pb
from pathlib import Path
import pandas as pd
import numpy as np
if __name__ == "__main__":
args = cm.parse_arguments()
refugee_df, capacity_df, location_probs, employments = ds.load_synthetic_data(
data_dir=args.data_dir,
refugee_batch_size=args.refugee_batch_size,
refugee_batch_num=args.refugee_batch_num,
location_num=args.location_num,
)
clf_lp = clf.get_biased_location_probs(
location_probs,
beta=args.beta,
seed=args.seed
)
train_clf_lp, valid_clf_lp, test_clf_lp = cm.split_data(
clf_lp,
train_ratio=args.train_ratio,
test_ratio=args.test_ratio
)
ref_id = refugee_df['refugee_id'].values
train_ref_id, valid_ref_id, test_ref_id = cm.split_data(
ref_id,
train_ratio=args.train_ratio,
test_ratio=args.test_ratio
)
train_scores = clf.lp2df(train_clf_lp, train_ref_id)
valid_scores = clf.lp2df(valid_clf_lp, valid_ref_id)
test_scores = clf.lp2df(test_clf_lp, test_ref_id)
location_detail = f'bias_{args.beta}_classifier'
location_detail += f'_{args.refugee_batch_size}X{args.refugee_batch_num}_{args.location_num}_locations'
score_path = Path(args.save_dir) / 'scores' / location_detail
score_path.mkdir(exist_ok=True, parents=True)
train_scores.to_csv(score_path / 'train_scores.csv', index=False)
valid_scores.to_csv(score_path / 'valid_scores.csv', index=False)
test_scores.to_csv(score_path / 'test_scores.csv', index=False)
clf_assignments = pb.make_assignments(
clf_lp,
capacity_df,
refugee_batch_size=args.refugee_batch_size,
refugee_batch_num=args.refugee_batch_num,
location_num=args.location_num,
policy='maximum',
seed=args.seed
)
_, _, test_location_probs = cm.split_data(
location_probs,
train_ratio=args.train_ratio,
test_ratio=args.test_ratio
)
_, _, test_employments = cm.split_data(
employments,
train_ratio=args.train_ratio,
test_ratio=args.test_ratio
)
_, _, test_clf_assignments = cm.split_data(
clf_assignments.flatten(),
train_ratio=args.train_ratio,
test_ratio=args.test_ratio
)
avg_test_clf_probs = np.array([test_location_probs[i][l] for i, l in enumerate(test_clf_assignments)]).mean()
avg_test_clf_emp = np.array([test_employments[i][l] for i, l in enumerate(test_clf_assignments)]).mean()
print(f'Optimal Policy with Classifier Score: {avg_test_clf_probs:.4f} probability, {avg_test_clf_emp:.4f} utility')
with open(score_path / 'eval', 'w') as f:
print(f'Optimal Policy with Classifier Score: {avg_test_clf_probs:.4f} probability, {avg_test_clf_emp:.4f} utility', file=f)