diff --git a/docs/misc/changelog.rst b/docs/misc/changelog.rst index ac8ca1245..2f7dbe241 100644 --- a/docs/misc/changelog.rst +++ b/docs/misc/changelog.rst @@ -4,9 +4,11 @@ Changelog ========== -Release 1.8.0a14 (WIP) +Release 1.8.0 (2023-04-07) -------------------------- +**Multi-env HerReplayBuffer, Open RL Benchmark, Improved env checker** + .. warning:: Stable-Baselines3 (SB3) v1.8.0 will be the last one to use Gym as a backend. @@ -31,15 +33,29 @@ New Features: - Added support for dict/tuple observations spaces for ``VecCheckNan``, the check is now active in the ``env_checker()`` (@DavyMorgan) - Added multiprocessing support for ``HerReplayBuffer`` - ``HerReplayBuffer`` now supports all datatypes supported by ``ReplayBuffer`` -- Provide more helpful failure messages when validating the ``observation_space`` of custom gym environments using ``check_env``` (@FieteO) +- Provide more helpful failure messages when validating the ``observation_space`` of custom gym environments using ``check_env`` (@FieteO) - Added ``stats_window_size`` argument to control smoothing in rollout logging (@jonasreiher) `SB3-Contrib`_ ^^^^^^^^^^^^^^ +- Added warning about potential crashes caused by ``check_env`` in the ``MaskablePPO`` docs (@AlexPasqua) +- Fixed ``sb3_contrib/qrdqn/*.py`` type hints +- Removed shared layers in ``mlp_extractor`` (@AlexPasqua) `RL Zoo`_ ^^^^^^^^^ +- `Open RL Benchmark `_ +- Upgraded to new `HerReplayBuffer` implementation that supports multiple envs +- Removed `TimeFeatureWrapper` for Panda and Fetch envs, as the new replay buffer should handle timeout. +- Tuned hyperparameters for RecurrentPPO on Swimmer +- Documentation is now built using Sphinx and hosted on read the doc +- Removed `use_auth_token` for push to hub util +- Reverted from v3 to v2 for HumanoidStandup, Reacher, InvertedPendulum and InvertedDoublePendulum since they were not part of the mujoco refactoring (see https://github.com/openai/gym/pull/1304) +- Fixed `gym-minigrid` policy (from `MlpPolicy` to `MultiInputPolicy`) +- Replaced deprecated `optuna.suggest_loguniform(...)` by `optuna.suggest_float(..., log=True)` +- Switched to `ruff` and `pyproject.toml` +- Removed `online_sampling` and `max_episode_length` argument when using `HerReplayBuffer` Bug Fixes: ^^^^^^^^^^ @@ -47,7 +63,7 @@ Bug Fixes: - Added the argument ``dtype`` (default to ``float32``) to the noise for consistency with gym action (@sidney-tio) - Fixed PPO train/n_updates metric not accounting for early stopping (@adamfrly) - Fixed loading of normalized image-based environments -- Fixed `DictRolloutBuffer.add` with multidimensional action space (@younik) +- Fixed ``DictRolloutBuffer.add`` with multidimensional action space (@younik) Deprecations: ^^^^^^^^^^^^^ diff --git a/setup.py b/setup.py index 7e0043320..dd7b69637 100644 --- a/setup.py +++ b/setup.py @@ -90,7 +90,7 @@ extra_packages = extra_no_roms + [ # noqa: RUF005 # For atari roms, - "autorom[accept-rom-license]~=0.5.5", + "autorom[accept-rom-license]~=0.6.0", ] @@ -138,7 +138,7 @@ # For spelling "sphinxcontrib.spelling", # Type hints support - "sphinx-autodoc-typehints==1.21.1", # TODO: remove version constraint, see #1290 + "sphinx-autodoc-typehints", # Copy button for code snippets "sphinx_copybutton", ], diff --git a/stable_baselines3/common/distributions.py b/stable_baselines3/common/distributions.py index b1cd439a2..2b942e8f8 100644 --- a/stable_baselines3/common/distributions.py +++ b/stable_baselines3/common/distributions.py @@ -617,7 +617,6 @@ class TanhBijector: """ Bijective transformation of a probability distribution using a squashing function (tanh) - TODO: use Pyro instead (https://pyro.ai/) :param epsilon: small value to avoid NaN due to numerical imprecision. """ diff --git a/stable_baselines3/common/policies.py b/stable_baselines3/common/policies.py index 45d7930a0..c67c45cf2 100644 --- a/stable_baselines3/common/policies.py +++ b/stable_baselines3/common/policies.py @@ -337,11 +337,6 @@ def predict( :return: the model's action and the next hidden state (used in recurrent policies) """ - # TODO (GH/1): add support for RNN policies - # if state is None: - # state = self.initial_state - # if episode_start is None: - # episode_start = [False for _ in range(self.n_envs)] # Switch to eval mode (this affects batch norm / dropout) self.set_training_mode(False) diff --git a/stable_baselines3/version.txt b/stable_baselines3/version.txt index ee006ce36..27f9cd322 100644 --- a/stable_baselines3/version.txt +++ b/stable_baselines3/version.txt @@ -1 +1 @@ -1.8.0a14 +1.8.0 diff --git a/tests/test_her.py b/tests/test_her.py index f9794d5e2..57a8f0ff0 100644 --- a/tests/test_her.py +++ b/tests/test_her.py @@ -260,7 +260,7 @@ def env_fn(): del model.replay_buffer with pytest.raises(AttributeError): - model.replay_buffer + model.replay_buffer # noqa: B018 # Check that there is no warning assert len(recwarn) == 0