Update

[ghstack-poisoned]
pytorch · Oct 8, 2024 · 0d1ac6a · 0d1ac6a
1 parent 2315434
commit 0d1ac6a
Show file tree

Hide file tree

Showing 5 changed files with 5 additions and 8 deletions.
diff --git a/docs/requirements.txt b/docs/requirements.txt
@@ -16,9 +16,7 @@ sphinx_design
 torchvision
 dm_control
 mujoco
-atari-py
-ale-py
-gym[classic_control,accept-rom-license]
+gym[classic_control,accept-rom-license,ale-py,atari]
 pygame
 tqdm
 ipython

diff --git a/sota-implementations/redq/utils.py b/sota-implementations/redq/utils.py
@@ -1021,7 +1021,6 @@ def make_collector_offpolicy(
         "init_random_frames": cfg.collector.init_random_frames,
         "split_trajs": True,
         # trajectories must be separated if multi-step is used
-        "exploration_type": cfg.collector.exploration_type,
     }
 
     collector = collector_helper(**collector_helper_kwargs)

diff --git a/tutorials/sphinx-tutorials/coding_ddpg.py b/tutorials/sphinx-tutorials/coding_ddpg.py
@@ -899,7 +899,7 @@ def make_recorder(actor_model_explore, transform_state_dict, record_interval):
         record_frames=1000,
         policy_exploration=actor_model_explore,
         environment=environment,
-        exploration_type=ExplorationType.MEAN,
+        exploration_type=ExplorationType.DETERMINISTIC,
         record_interval=record_interval,
     )
     return recorder_obj

diff --git a/tutorials/sphinx-tutorials/coding_ppo.py b/tutorials/sphinx-tutorials/coding_ppo.py
@@ -651,7 +651,7 @@
         # number of steps (1000, which is our ``env`` horizon).
         # The ``rollout`` method of the ``env`` can take a policy as argument:
         # it will then execute this policy at each step.
-        with set_exploration_type(ExplorationType.MEAN), torch.no_grad():
+        with set_exploration_type(ExplorationType.DETERMINISTIC), torch.no_grad():
             # execute a rollout with the trained policy
             eval_rollout = env.rollout(1000, policy_module)
             logs["eval reward"].append(eval_rollout["next", "reward"].mean().item())

diff --git a/tutorials/sphinx-tutorials/getting-started-1.py b/tutorials/sphinx-tutorials/getting-started-1.py
@@ -172,7 +172,7 @@
 
 from torchrl.envs.utils import ExplorationType, set_exploration_type
 
-with set_exploration_type(ExplorationType.MEAN):
+with set_exploration_type(ExplorationType.DETERMINISTIC):
     # takes the mean as action
     rollout = env.rollout(max_steps=10, policy=policy)
 with set_exploration_type(ExplorationType.RANDOM):
@@ -221,7 +221,7 @@
 
 exploration_policy = TensorDictSequential(policy, exploration_module)
 
-with set_exploration_type(ExplorationType.MEAN):
+with set_exploration_type(ExplorationType.DETERMINISTIC):
     # Turns off exploration
     rollout = env.rollout(max_steps=10, policy=exploration_policy)
 with set_exploration_type(ExplorationType.RANDOM):