diff --git a/sota-implementations/impala/utils.py b/sota-implementations/impala/utils.py index 9fa3d6b399f..30293940377 100644 --- a/sota-implementations/impala/utils.py +++ b/sota-implementations/impala/utils.py @@ -100,7 +100,7 @@ def make_ppo_modules_pixels(proof_environment): out_keys=["common_features"], ) - # Define on head for the policy + # Define one head for the policy policy_net = MLP( in_features=common_mlp_output.shape[-1], out_features=num_outputs, diff --git a/torchrl/envs/common.py b/torchrl/envs/common.py index 2aacf76168b..d4015cdc886 100644 --- a/torchrl/envs/common.py +++ b/torchrl/envs/common.py @@ -2136,9 +2136,9 @@ def reset( self._assert_tensordict_shape(tensordict) tensordict_reset = self._reset(tensordict, **kwargs) - # We assume that this is done properly - # if reset.device != self.device: - # reset = reset.to(self.device, non_blocking=True) + # We assume that this is done properly + # if reset.device != self.device: + # reset = reset.to(self.device, non_blocking=True) if tensordict_reset is tensordict: raise RuntimeError( "EnvBase._reset should return outplace changes to the input " diff --git a/torchrl/envs/libs/gym.py b/torchrl/envs/libs/gym.py index a82286659cb..61960d1a40d 100644 --- a/torchrl/envs/libs/gym.py +++ b/torchrl/envs/libs/gym.py @@ -1281,7 +1281,7 @@ class GymEnv(GymWrapper): pixels_only (bool, optional): if ``True``, only the pixel observations will be returned (by default under the ``"pixels"`` entry in the output tensordict). If ``False``, observations (eg, states) and pixels will be returned - whenever ``from_pixels=True``. Defaults to ``True``. + whenever ``from_pixels=True``. Defaults to ``False``. frame_skip (int, optional): if provided, indicates for how many steps the same action is to be repeated. The observation returned will be the last observation of the sequence, whereas the reward will be the sum diff --git a/torchrl/envs/utils.py b/torchrl/envs/utils.py index 0c252c3db3f..a5a98fa2179 100644 --- a/torchrl/envs/utils.py +++ b/torchrl/envs/utils.py @@ -69,13 +69,13 @@ ACTION_MASK_ERROR = RuntimeError( - "An out-of-bounds actions has been provided to an env with an 'action_mask' output." - " If you are using a custom policy, make sure to take the action mask into account when computing the output." - " If you are using a default policy, please add the torchrl.envs.transforms.ActionMask transform to your environment." + "An out-of-bounds actions has been provided to an env with an 'action_mask' output. " + "If you are using a custom policy, make sure to take the action mask into account when computing the output. " + "If you are using a default policy, please add the torchrl.envs.transforms.ActionMask transform to your environment. " "If you are using a ParallelEnv or another batched inventor, " - "make sure to add the transform to the ParallelEnv (and not to the sub-environments)." - " For more info on using action masks, see the docs at: " - "https://pytorch.org/rl/reference/envs.html#environments-with-masked-actions" + "make sure to add the transform to the ParallelEnv (and not to the sub-environments). " + "For more info on using action masks, see the docs at: " + "https://pytorch.org/rl/main/reference/envs.html#environments-with-masked-actions" ) diff --git a/torchrl/modules/distributions/continuous.py b/torchrl/modules/distributions/continuous.py index 71fee70d5b8..33dfe6aa1df 100644 --- a/torchrl/modules/distributions/continuous.py +++ b/torchrl/modules/distributions/continuous.py @@ -374,8 +374,8 @@ class TanhNormal(FasterTransformedDistribution): .. math:: loc = tanh(loc / upscale) * upscale. - min (torch.Tensor or number, optional): minimum value of the distribution. Default is -1.0; - max (torch.Tensor or number, optional): maximum value of the distribution. Default is 1.0; + low (torch.Tensor or number, optional): minimum value of the distribution. Default is -1.0; + high (torch.Tensor or number, optional): maximum value of the distribution. Default is 1.0; event_dims (int, optional): number of dimensions describing the action. Default is 1. Setting ``event_dims`` to ``0`` will result in a log-probability that has the same shape as the input, ``1`` will reduce (sum over) the last dimension, ``2`` the last two etc. diff --git a/torchrl/objectives/value/functional.py b/torchrl/objectives/value/functional.py index d3ad8d93ca4..ddd688610c2 100644 --- a/torchrl/objectives/value/functional.py +++ b/torchrl/objectives/value/functional.py @@ -230,7 +230,7 @@ def _fast_vec_gae( ``[*Batch x TimeSteps x F]``, with ``F`` feature dimensions. """ - # _gen_num_per_traj and _split_and_pad_sequence need + # _get_num_per_traj and _split_and_pad_sequence need # time dimension at last position done = done.transpose(-2, -1) terminated = terminated.transpose(-2, -1) diff --git a/tutorials/sphinx-tutorials/pendulum.py b/tutorials/sphinx-tutorials/pendulum.py index 19f79c37480..94bd8427e30 100644 --- a/tutorials/sphinx-tutorials/pendulum.py +++ b/tutorials/sphinx-tutorials/pendulum.py @@ -128,7 +128,7 @@ # * :meth:`EnvBase._reset`, which codes for the resetting of the simulator # at a (potentially random) initial state; # * :meth:`EnvBase._step` which codes for the state transition dynamic; -# * :meth:`EnvBase._set_seed`` which implements the seeding mechanism; +# * :meth:`EnvBase._set_seed` which implements the seeding mechanism; # * the environment specs. # # Let us first describe the problem at hand: we would like to model a simple diff --git a/tutorials/sphinx-tutorials/torchrl_envs.py b/tutorials/sphinx-tutorials/torchrl_envs.py index f2ae0372db2..34189396ee9 100644 --- a/tutorials/sphinx-tutorials/torchrl_envs.py +++ b/tutorials/sphinx-tutorials/torchrl_envs.py @@ -608,7 +608,7 @@ def env_make(env_name): ############################################################################### # Transforming parallel environments # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -# There are two equivalent ways of transforming parallen environments: in each +# There are two equivalent ways of transforming parallel environments: in each # process separately, or on the main process. It is even possible to do both. # One can therefore think carefully about the transform design to leverage the # device capabilities (e.g. transforms on cuda devices) and vectorizing