From 67df72fde282adb6dd6e0ef39b8274de09814541 Mon Sep 17 00:00:00 2001 From: Buridi Aditya Date: Sun, 28 Oct 2018 19:27:01 +0530 Subject: [PATCH 1/7] Supports for Madras Env --- baselines/ddpg/ddpg.py | 2 +- baselines/run.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/baselines/ddpg/ddpg.py b/baselines/ddpg/ddpg.py index 8b8659bee6..3b1a65d923 100755 --- a/baselines/ddpg/ddpg.py +++ b/baselines/ddpg/ddpg.py @@ -51,7 +51,7 @@ def learn(network, env, rank = MPI.COMM_WORLD.Get_rank() nb_actions = env.action_space.shape[-1] - assert (np.abs(env.action_space.low) == env.action_space.high).all() # we assume symmetric actions. + #assert (np.abs(env.action_space.low) == env.action_space.high).all() # we assume symmetric actions. memory = Memory(limit=int(1e6), action_shape=env.action_space.shape, observation_shape=env.observation_space.shape) critic = Critic(network=network, **network_kwargs) diff --git a/baselines/run.py b/baselines/run.py index 4aaf1a7010..5753f30eec 100644 --- a/baselines/run.py +++ b/baselines/run.py @@ -49,7 +49,7 @@ 'SpaceInvaders-Snes', } - +_game_envs['madras'] = {'gym-torcs-v0','gym-madras-v0'} def train(args, extra_args): env_type, env_id = get_env_type(args.env) print('env_type: {}'.format(env_type)) From bba5cfc93cd49c423f1148bb2a0ed20ff9d3dadc Mon Sep 17 00:00:00 2001 From: rudrasohan Date: Fri, 30 Nov 2018 16:14:01 +0530 Subject: [PATCH 2/7] added saving function for ddpg --- baselines/ddpg/ddpg.py | 10 ++++++++++ baselines/ddpg/ddpg_learner.py | 6 ++++++ baselines/run.py | 2 +- 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/baselines/ddpg/ddpg.py b/baselines/ddpg/ddpg.py index 4bbda692b9..3fb4f72ac6 100755 --- a/baselines/ddpg/ddpg.py +++ b/baselines/ddpg/ddpg.py @@ -42,6 +42,8 @@ def learn(network, env, tau=0.01, eval_env=None, param_noise_adaption_interval=50, + load_path = None, + save_path = '' **network_kwargs): set_global_seeds(seed) @@ -91,6 +93,9 @@ def learn(network, env, batch_size=batch_size, action_noise=action_noise, param_noise=param_noise, critic_l2_reg=critic_l2_reg, actor_lr=actor_lr, critic_lr=critic_lr, enable_popart=popart, clip_norm=clip_norm, reward_scale=reward_scale) + + if load_path is not None: + agent.load(load_path) logger.info('Using agent with the following configuration:') logger.info(str(agent.__dict__.items())) @@ -269,5 +274,10 @@ def as_scalar(x): with open(os.path.join(logdir, 'eval_env_state.pkl'), 'wb') as f: pickle.dump(eval_env.get_state(), f) + os.mkdirs(logdir,exist_ok=True) + savepath = os.path.join(save_path, str(epoch)) + print('Saving to ',savepath) + agent.save(savepath) + return agent diff --git a/baselines/ddpg/ddpg_learner.py b/baselines/ddpg/ddpg_learner.py index 3fc8a7744e..09c51216e6 100755 --- a/baselines/ddpg/ddpg_learner.py +++ b/baselines/ddpg/ddpg_learner.py @@ -1,6 +1,7 @@ from copy import copy from functools import reduce +import functools import numpy as np import tensorflow as tf import tensorflow.contrib as tc @@ -9,6 +10,7 @@ from baselines.common.mpi_adam import MpiAdam import baselines.common.tf_util as U from baselines.common.mpi_running_mean_std import RunningMeanStd +from baselines.common.tf_util import save_variables, load_variables try: from mpi4py import MPI except ImportError: @@ -98,6 +100,8 @@ def __init__(self, actor, critic, memory, observation_shape, action_shape, param self.batch_size = batch_size self.stats_sample = None self.critic_l2_reg = critic_l2_reg + self.save = None + self.load = None # Observation normalization. if self.normalize_observations: @@ -333,6 +337,8 @@ def train(self): def initialize(self, sess): self.sess = sess self.sess.run(tf.global_variables_initializer()) + self.save = functools.partial(save_variables, sess=self.sess) + self.load = functools.partial(load_variables, sess=self.load) self.actor_optimizer.sync() self.critic_optimizer.sync() self.sess.run(self.target_init_updates) diff --git a/baselines/run.py b/baselines/run.py index caf00e0a1c..451544523e 100644 --- a/baselines/run.py +++ b/baselines/run.py @@ -5,7 +5,7 @@ from collections import defaultdict import tensorflow as tf import numpy as np - +import MADRaS from baselines.common.vec_env.vec_video_recorder import VecVideoRecorder from baselines.common.vec_env.vec_frame_stack import VecFrameStack from baselines.common.cmd_util import common_arg_parser, parse_unknown_args, make_vec_env, make_env From 3e28aeea358bd10d497c69e4fcecdcdf77e510df Mon Sep 17 00:00:00 2001 From: rudrasohan Date: Sat, 1 Dec 2018 04:21:43 +0530 Subject: [PATCH 3/7] fixed typos --- baselines/ddpg/ddpg.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/baselines/ddpg/ddpg.py b/baselines/ddpg/ddpg.py index 3fb4f72ac6..5587ac8c22 100755 --- a/baselines/ddpg/ddpg.py +++ b/baselines/ddpg/ddpg.py @@ -43,7 +43,7 @@ def learn(network, env, eval_env=None, param_noise_adaption_interval=50, load_path = None, - save_path = '' + save_path = '', **network_kwargs): set_global_seeds(seed) @@ -274,7 +274,7 @@ def as_scalar(x): with open(os.path.join(logdir, 'eval_env_state.pkl'), 'wb') as f: pickle.dump(eval_env.get_state(), f) - os.mkdirs(logdir,exist_ok=True) + os.makdirs(logdir,exist_ok=True) savepath = os.path.join(save_path, str(epoch)) print('Saving to ',savepath) agent.save(savepath) From f66db73df0bcab1040de042c71702333aa87055c Mon Sep 17 00:00:00 2001 From: rudrasohan Date: Sat, 1 Dec 2018 04:24:26 +0530 Subject: [PATCH 4/7] fixed typos 1 --- baselines/ddpg/ddpg.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/baselines/ddpg/ddpg.py b/baselines/ddpg/ddpg.py index 5587ac8c22..860f720118 100755 --- a/baselines/ddpg/ddpg.py +++ b/baselines/ddpg/ddpg.py @@ -274,9 +274,9 @@ def as_scalar(x): with open(os.path.join(logdir, 'eval_env_state.pkl'), 'wb') as f: pickle.dump(eval_env.get_state(), f) - os.makdirs(logdir,exist_ok=True) + os.makedirs(logdir, exist_ok=True) savepath = os.path.join(save_path, str(epoch)) - print('Saving to ',savepath) + print('Saving to ', savepath) agent.save(savepath) From 44e1b53eb840442bda686be39bab21f3bad803f9 Mon Sep 17 00:00:00 2001 From: rudrasohan Date: Sat, 1 Dec 2018 23:23:05 +0530 Subject: [PATCH 5/7] fixed loader --- baselines/ddpg/ddpg.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/baselines/ddpg/ddpg.py b/baselines/ddpg/ddpg.py index 860f720118..26c61ab2d1 100755 --- a/baselines/ddpg/ddpg.py +++ b/baselines/ddpg/ddpg.py @@ -43,9 +43,11 @@ def learn(network, env, eval_env=None, param_noise_adaption_interval=50, load_path = None, - save_path = '', + save_path = '/home/sohan/check1', **network_kwargs): + print("Save PATH;{}".format(save_path)) + print("Load PATH;{}".format(load_path)) set_global_seeds(seed) if total_timesteps is not None: @@ -61,7 +63,7 @@ def learn(network, env, nb_actions = env.action_space.shape[-1] #assert (np.abs(env.action_space.low) == env.action_space.high).all() # we assume symmetric actions. - + sess = U.get_session() memory = Memory(limit=int(1e6), action_shape=env.action_space.shape, observation_shape=env.observation_space.shape) critic = Critic(network=network, **network_kwargs) actor = Actor(nb_actions, network=network, **network_kwargs) @@ -94,16 +96,15 @@ def learn(network, env, actor_lr=actor_lr, critic_lr=critic_lr, enable_popart=popart, clip_norm=clip_norm, reward_scale=reward_scale) - if load_path is not None: - agent.load(load_path) logger.info('Using agent with the following configuration:') logger.info(str(agent.__dict__.items())) eval_episode_rewards_history = deque(maxlen=100) episode_rewards_history = deque(maxlen=100) - sess = U.get_session() # Prepare everything. agent.initialize(sess) + if load_path is not None: + agent.load(load_path) sess.graph.finalize() agent.reset() @@ -129,6 +130,8 @@ def learn(network, env, epoch_actions = [] epoch_qs = [] epoch_episodes = 0 + if load_path is None: + os.makedirs(save_path, exist_ok=True) for epoch in range(nb_epochs): for cycle in range(nb_epoch_cycles): # Perform rollouts. @@ -274,7 +277,6 @@ def as_scalar(x): with open(os.path.join(logdir, 'eval_env_state.pkl'), 'wb') as f: pickle.dump(eval_env.get_state(), f) - os.makedirs(logdir, exist_ok=True) savepath = os.path.join(save_path, str(epoch)) print('Saving to ', savepath) agent.save(savepath) From a33f0e1f2ad7211f6049b91be8ed24df62131abb Mon Sep 17 00:00:00 2001 From: rudrasohan Date: Sat, 1 Dec 2018 23:34:51 +0530 Subject: [PATCH 6/7] prevent file overwriting --- baselines/ddpg/ddpg.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/baselines/ddpg/ddpg.py b/baselines/ddpg/ddpg.py index 26c61ab2d1..3298e96add 100755 --- a/baselines/ddpg/ddpg.py +++ b/baselines/ddpg/ddpg.py @@ -63,7 +63,6 @@ def learn(network, env, nb_actions = env.action_space.shape[-1] #assert (np.abs(env.action_space.low) == env.action_space.high).all() # we assume symmetric actions. - sess = U.get_session() memory = Memory(limit=int(1e6), action_shape=env.action_space.shape, observation_shape=env.observation_space.shape) critic = Critic(network=network, **network_kwargs) actor = Actor(nb_actions, network=network, **network_kwargs) @@ -102,9 +101,12 @@ def learn(network, env, eval_episode_rewards_history = deque(maxlen=100) episode_rewards_history = deque(maxlen=100) # Prepare everything. + sess = U.get_session() agent.initialize(sess) + checkpoint_num = 0 if load_path is not None: agent.load(load_path) + checkpoint_num = int(os.path.split(load_path)[1]) + 1 sess.graph.finalize() agent.reset() @@ -277,7 +279,7 @@ def as_scalar(x): with open(os.path.join(logdir, 'eval_env_state.pkl'), 'wb') as f: pickle.dump(eval_env.get_state(), f) - savepath = os.path.join(save_path, str(epoch)) + savepath = os.path.join(save_path, str(epoch+checkpoint_num)) print('Saving to ', savepath) agent.save(savepath) From 0ddec01ef1e1de33affcbb206ca7c7ecd2b4919f Mon Sep 17 00:00:00 2001 From: rudrasohan Date: Sat, 1 Dec 2018 23:47:20 +0530 Subject: [PATCH 7/7] removed specific path --- baselines/ddpg/ddpg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/baselines/ddpg/ddpg.py b/baselines/ddpg/ddpg.py index 3298e96add..fab6befb6f 100755 --- a/baselines/ddpg/ddpg.py +++ b/baselines/ddpg/ddpg.py @@ -43,7 +43,7 @@ def learn(network, env, eval_env=None, param_noise_adaption_interval=50, load_path = None, - save_path = '/home/sohan/check1', + save_path = '', **network_kwargs): print("Save PATH;{}".format(save_path))