-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmodel.py
107 lines (95 loc) · 5.15 KB
/
model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import torch.nn as nn
import torch
from torchrl.collectors import SyncDataCollector
import random
from play import play
from SAC import Agent
def loadModel(path="./models/model.pt"):
model = Agent()
model.load_state_dict(torch.load(path))
return model
class ModelController(object):
def __init__(self) -> None:
pass
def genetic_algorithm(self, num_generations=50, population_size=8, num_best_players=5, num_parents=3):
population = self.initialize_population(population_size)
bset_score = 0
best_generation = 0
best_id = 0
for generation in range(num_generations):
score_list = []
for (chromosome_left, chromosome_right) in population:
score = self.evaluate_fitness(chromosome_left, chromosome_right)
score_list.append(score)
# sort the index by their scores descendingly
score_list, indexs = zip(*sorted(zip(score_list, range(len(score_list))), reverse=True))
population = [population[i] for i in indexs][:num_best_players]
# save models and score
for i in range(num_best_players):
torch.save(population[i][0].state_dict(), "./models/model_left_" + str(generation) + "_" + str(i) + "_" + ".pt")
torch.save(population[i][1].state_dict(), "./models/model_right_" + str(generation) + "_" + str(i) + "_" + ".pt")
with open("./log/scores.txt", "a") as f:
f.write("./models/model_" + str(generation) + "_" + str(i) + ":" + str(score_list[i]) + "\n")
if score_list[i] > bset_score:
bset_score = score_list[i]
best_generation = generation
best_id = i
# make new chromosomes
for _ in range(population_size):
new_population = []
# for both left and right
# create new chromosomes
for _ in range(population_size):
# randomly select parents
parents = self.select_parents(population, num_parents)
child_left = self.create_chromosome()
child_right = self.create_chromosome()
for param in child_left.parameters():
param.requires_grad = False
param.data = torch.zeros_like(param)
for param in child_right.parameters():
param.requires_grad = False
param.data = torch.zeros_like(param)
random_list = [random.random() for _ in range(num_parents)]
# softmax random_list to make it a probability distribution and sum to 1
random_list = torch.softmax(torch.tensor(random_list), dim=-1)
# and the bigger the score, the bigger the probability to be selected///////////
random_list, _ = torch.sort(random_list, descending=True)
# for parent in parents:
# for child_param, parent_param in zip(child.parameters(), parent.parameters()):
# child_param.data = child_param.data + parent_param.data * random_list.pop()
for i in range(num_parents):
for child_param, parent_param in zip(child_left.parameters(), parents[i].parameters()):
child_param.data = child_param.data + parent_param.data * random_list[i]
for child_param, parent_param in zip(child_right.parameters(), parents[i].parameters()):
child_param.data = child_param.data + parent_param.data * random_list[i]
# add some noise
for param in child_left.parameters():
param.requires_grad = False
param.data = param.data * (1 + torch.randn_like(param) * 0.003)
for param in child_right.parameters():
param.requires_grad = False
param.data = param.data * (1 + torch.randn_like(param) * 0.003)
new_population.append((child_left, child_right))
population = new_population
return bset_score, best_generation, best_id
def evaluate_fitness(self, chromosome_left, chromosome_right):
# play the game
score = play(chromosome_left, chromosome_right)
print("final score: ", score)
return score
def initialize_population(self, population_size):
return [(self.create_chromosome(), self.create_chromosome()) for _ in range(population_size)]
def create_chromosome(self):
# create a chromosome with random weights
model = Agent()
for param in model.parameters():
param.requires_grad = False
param.data = torch.randn_like(param) * 0.1
return model
def select_parents(self, population, num_parents):
parents = []
for _ in range(num_parents):
parent = random.choice(population)
parents.append(parent)
return parents