diff --git a/README.md b/README.md index 3b9fe86ca..f9de061c1 100644 --- a/README.md +++ b/README.md @@ -168,3 +168,27 @@ python enjoy.py --load-dir trained_models/ppo --env-name "Reacher-v2" ![QbertNoFrameskip-v4](imgs/acktr_qbert.png) ![beamriderNoFrameskip-v4](imgs/acktr_beamrider.png) + +## Visualization with tensorboard. + +### Requirements + +* [Tensorboard](https://github.com/tensorflow/tensorboard) +* [tensorboardX](https://github.com/lanpa/tensorboardX) + +### Installation of requirements + +```bash +pip install tensorboard +pip install tensorboardX +``` + +### Using tensorboard to visualize training + +```bash +python main.py --env-name "PongNoFrameskip-v4" --algo ppo --use-gae --lr 2.5e-4 --clip-param 0.1 --value-loss-coef 1 --num-processes 8 --num-steps 128 --num-mini-batch 4 --vis-interval 1 --log-interval 1 --tensorboard-logdir "/tmp/tfboard" +tensorboard --logdir "/tmp/tfboard" +``` + +In a browser open [localhost:6006](http://localhost:6006). Note that a new folder is created every time training is +started with the current timestamp. diff --git a/arguments.py b/arguments.py index c0d3eb929..d6ebcc238 100644 --- a/arguments.py +++ b/arguments.py @@ -63,6 +63,8 @@ def get_args(): help='enable visdom visualization') parser.add_argument('--port', type=int, default=8097, help='port to run the server on (default: 8097)') + parser.add_argument('--tensorboard-logdir', default=None, + help='logs to tensorboard in the specified directory') args = parser.parse_args() args.cuda = not args.no_cuda and torch.cuda.is_available() diff --git a/main.py b/main.py index ea47a50bd..5eb3c08cf 100755 --- a/main.py +++ b/main.py @@ -58,6 +58,13 @@ def main(): viz = Visdom(port=args.port) win = None + tensorboard_writer = None + if args.tensorboard_logdir is not None: + from tensorboardX import SummaryWriter + import time, os, datetime + ts_str = datetime.datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d_%H-%M-%S') + tensorboard_writer = SummaryWriter(log_dir=os.path.join(args.tensorboard_logdir, ts_str)) + envs = make_vec_envs(args.env_name, args.seed, args.num_processes, args.gamma, args.log_dir, args.add_timestep, device, False) @@ -150,9 +157,19 @@ def main(): np.mean(episode_rewards), np.median(episode_rewards), np.min(episode_rewards), - np.max(episode_rewards), dist_entropy, + np.max(episode_rewards), + dist_entropy, value_loss, action_loss)) + if tensorboard_writer is not None: + tensorboard_writer.add_scalar("mean_reward", np.mean(episode_rewards), total_num_steps) + tensorboard_writer.add_scalar("median_reward", np.median(episode_rewards), total_num_steps) + tensorboard_writer.add_scalar("min_reward", np.min(episode_rewards), total_num_steps) + tensorboard_writer.add_scalar("max_reward", np.max(episode_rewards), total_num_steps) + tensorboard_writer.add_scalar("dist_entropy", dist_entropy, total_num_steps) + tensorboard_writer.add_scalar("value_loss", value_loss, total_num_steps) + tensorboard_writer.add_scalar("action_loss", action_loss, total_num_steps) + if (args.eval_interval is not None and len(episode_rewards) > 1 and j % args.eval_interval == 0):