-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtrain_utils.py
116 lines (81 loc) · 3.93 KB
/
train_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import torch
import numpy as np
import torch.nn.functional as F
import time
import csv
from sklearn.metrics import f1_score
import pickle
def output_to_class(output):
"""
takes the output from a nn feeded with a batch and returns the predicted classes
"""
classes = []
for sample in output:
classes.append(list(sample).index(max(sample)))
return classes
# This function trains the model for one epoch
def train(args, model, device, train_loader, optimizer, epoch, start_time):
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model(data)
loss = F.nll_loss(output, target)
loss.backward()
optimizer.step()
if batch_idx % args.log_interval == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}\tF1: {:.4f}\tRuntime: {:.1f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.item(), f1_score(target.detach().cpu().numpy(), output_to_class(output), average='micro'), time.time() - start_time))
# This function evaluates the model on the test data
def test(args, model, device, test_loader, epoch, trainDataset, testDataset, path_save):
model.eval()
with open(path_save + 'NN-submission-' +str(epoch)+'.csv', 'w', newline='') as writeFile:
instruments = list(15*np.ones(len(testDataset)))
for samples, indices in test_loader:
out = model(samples)
prediction_batch = output_to_class(out)
for pred, index in zip(prediction_batch,indices):
instruments[int(index)] = trainDataset.transformInstrumentsFamilyToString([pred])
fieldnames = ['Id', 'Predicted']
writer = csv.DictWriter(writeFile, fieldnames=fieldnames, delimiter=',',
quotechar='|', quoting=csv.QUOTE_MINIMAL)
writer.writeheader()
for i in range(len(instruments)):
writer.writerow({'Id': i, 'Predicted': instruments[i][0]})
print('saved predictions')
def save_output(args, model, device, test_loader, which_net, trainDataset, testDataset, path_save):
model.eval()
with open(path_save + 'output-' +which_net+'.txt', 'wb') as writeFile:
outputs = np.ones([len(testDataset), 10])
for samples, indices in test_loader:
out = model(samples)
for pred, index in zip(out,indices):
outputs[int(index)] = pred.detach().cpu().numpy()
pickle.dump([outputs], writeFile)
print('saved outputs')
def save_geometric_mean_predictions(path_1D, path_2D, path_save, trainDataset, testDataset):
# get outs
instruments = []
with open(path_1D, 'rb') as readFile:
out_1D = pickle.load(readFile)[0]
with open(path_2D, 'rb') as readFile:
out_2D = pickle.load(readFile)[0]
# geometric mean
for pred1, pred2 in zip(out_1D, out_2D):
#print('out1D: ', out_1D)
#print('out2D: ', out_2D)
pred = np.log(np.sqrt(np.exp(pred1)*np.exp(pred2)))
#print('pred: ', pred)
pred = output_to_class([pred])
pred = trainDataset.transformInstrumentsFamilyToString(pred)
instruments.append(pred)
# write submission file
with open(path_save + 'NN-submission-combined-model.csv', 'w', newline='') as writeFile:
fieldnames = ['Id', 'Predicted']
writer = csv.DictWriter(writeFile, fieldnames=fieldnames, delimiter=',',
quotechar='|', quoting=csv.QUOTE_MINIMAL)
writer.writeheader()
for i in range(len(instruments)):
writer.writerow({'Id': i, 'Predicted': instruments[i][0]})
print('saved predictions')