-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathexperiment_script_combined.py
96 lines (86 loc) · 3.16 KB
/
experiment_script_combined.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import subprocess
import os
import sys
import torch
from itertools import cycle
import numpy as np
algorithms = ['er_compositional', 'ewc_compositional', 'van_compositional']
algorithms += ['er_joint', 'ewc_joint', 'van_joint']
algorithms += ['er_nocomponents', 'ewc_nocomponents', 'van_nocomponents']
num_epochs = 100
mini_batch = 32
update_frequency = 100
save_frequency = 1
init_mode = 'random_onehot'
results_root = 'results'
num_gpus = torch.cuda.device_count()
gpu_use_total = np.zeros(num_gpus)
cuda_device_dict = {}
counter = 0
process_gpu_use = {}
did_not_start = 0
did_not_finish = 0
finished = 0
for i in range(10):
num_tasks = 10 # 10 MNIST 10 Fashion 20 CUB
size = 256
num_layers = 4
init_tasks = 4
architecture = 'mlp'
gpu_use = 20
num_train = -1
for a in algorithms:
ewc_lambda = 1e-3
cuda_device = counter % num_gpus
while np.all(gpu_use_total + gpu_use > 100):
for p in cycle(process_gpu_use):
try:
p.wait(1)
gpu_use_remove = process_gpu_use[p]
gpu_use_total[cuda_device_dict[p]] -= gpu_use_remove
del process_gpu_use[p]
del cuda_device_dict[p]
break
except subprocess.TimeoutExpired:
pass
cuda_device = np.argmin(gpu_use_total)
results_path = os.path.join(results_root, d, a, 'seed_{}'.format(i))
print(results_path + ': ', end='')
if not os.path.isdir(results_path):
print('Did not start')
did_not_start += 1
else:
completed_tasks = len([name for name in os.listdir(results_path) if os.path.isdir(os.path.join(results_path, name))])
if completed_tasks != num_tasks:
print('Did not finish')
did_not_finish += 1
else:
print('Finished')
finished += 1
continue
my_env = os.environ.copy()
my_env['CUDA_VISIBLE_DEVICES'] = str(cuda_device)
args = ['python', 'lifelong_experiment_combined.py',
'-T', str(num_tasks),
'-e', str(num_epochs),
'-b', str(mini_batch),
'-f', str(update_frequency),
'--lambda', str(ewc_lambda),
'-s', str(size),
'-l', str(num_layers),
'-k', str(init_tasks),
'-i', init_mode,
'-arc', architecture,
'-alg', a,
'-n', str(1),
'-r', results_root,
'-sf', str(save_frequency),
'--initial_seed', str(i),
'--num_train', str(num_train)]
p = subprocess.Popen(args, env=my_env)
process_gpu_use[p] = gpu_use
gpu_use_total[cuda_device] += gpu_use
counter += 1
cuda_device_dict[p] = cuda_device
print(cuda_device)
print(did_not_start, did_not_finish, finished)