-
Notifications
You must be signed in to change notification settings - Fork 26
/
Copy pathtrainer.py
59 lines (54 loc) · 2.23 KB
/
trainer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
#!/usr/bin/env python
# coding=utf-8
'''
Author: John
Email: [email protected]
Date: 2020-09-11 23:03:00
LastEditor: John
LastEditTime: 2022-12-03 14:32:07
Discription:
Environment:
'''
class Trainer:
def __init__(self) -> None:
pass
def train_one_episode(self, env, agent, cfg):
ep_reward = 0 # reward per episode
ep_step = 0
state = env.reset(seed = cfg.seed) # reset and obtain initial state
for _ in range(cfg.max_steps):
ep_step += 1
action = agent.sample_action(state) # sample action
if cfg.new_step_api:
next_state, reward, terminated, truncated , info = env.step(action) # update env and return transitions under new_step_api of OpenAI Gym
else:
next_state, reward, terminated, info = env.step(action) # update env and return transitions under old_step_api of OpenAI Gym
if terminated:
reward = 0
agent.memory.push((state, action, reward)) # store transitions
agent.update() # update agent
state = next_state # update next state for env
ep_reward += reward #
if terminated:
break
res = {'ep_reward':ep_reward,'ep_step':ep_step}
return agent,res
def test_one_episode(self, env, agent, cfg):
ep_reward = 0 # reward per episode
ep_step = 0
state = env.reset(seed = cfg.seed) # reset and obtain initial state
for _ in range(cfg.max_steps):
if cfg.render:
env.render()
ep_step += 1
action = agent.predict_action(state) # sample action
if cfg.new_step_api:
next_state, reward, terminated, truncated , info = env.step(action) # update env and return transitions under new_step_api of OpenAI Gym
else:
next_state, reward, terminated, info = env.step(action) # update env and return transitions under old_step_api of OpenAI Gym
state = next_state # update next state for env
ep_reward += reward #
if terminated:
break
res = {'ep_reward':ep_reward,'ep_step':ep_step}
return agent,res