From 8af640b82fde89e5e1cacf60047412bbb1df6508 Mon Sep 17 00:00:00 2001
From: Fabrice Normandin <fabrice.normandin@gmail.com>
Date: Fri, 20 Nov 2020 14:28:38 -0500
Subject: [PATCH 01/18] Bugfix: Allow nesting of Sync/Async VectorEnvs

Signed-off-by: Fabrice Normandin <fabrice.normandin@gmail.com>
---
 gym/vector/async_vector_env.py  | 20 ++++++++
 gym/vector/sync_vector_env.py   | 13 +++--
 tests/vector/test_vector_env.py | 84 ++++++++++++++++++++++++++++++++-
 3 files changed, 113 insertions(+), 4 deletions(-)

diff --git a/gym/vector/async_vector_env.py b/gym/vector/async_vector_env.py
index e9c1c99f4aa..c4844759047 100644
--- a/gym/vector/async_vector_env.py
+++ b/gym/vector/async_vector_env.py
@@ -635,6 +635,16 @@ def _worker(index, env_fn, pipe, parent_pipe, shared_memory, error_queue):
     assert shared_memory is None
     env = env_fn()
     parent_pipe.close()
+
+    def step_fn(actions):
+        observation, reward, done, info = env.step(actions)
+        # Do nothing if the env is a VectorEnv, since it will automatically
+        # reset the envs that are done if needed in the 'step' method and return
+        # the initial observation instead of the final observation.
+        if not isinstance(env, VectorEnv) and done:
+            observation = env.reset()
+        return observation, reward, done, info
+
     try:
         while True:
             command, data = pipe.recv()
@@ -699,6 +709,16 @@ def _worker_shared_memory(index, env_fn, pipe, parent_pipe, shared_memory, error
     env = env_fn()
     observation_space = env.observation_space
     parent_pipe.close()
+
+    def step_fn(actions):
+        observation, reward, done, info = env.step(actions)
+        # Do nothing if the env is a VectorEnv, since it will automatically
+        # reset the envs that are done if needed in the 'step' method and return
+        # the initial observation instead of the final observation.
+        if not isinstance(env, VectorEnv) and done:
+            observation = env.reset()
+        return observation, reward, done, info
+
     try:
         while True:
             command, data = pipe.recv()
diff --git a/gym/vector/sync_vector_env.py b/gym/vector/sync_vector_env.py
index 97913499bfd..64c5a3d26b9 100644
--- a/gym/vector/sync_vector_env.py
+++ b/gym/vector/sync_vector_env.py
@@ -71,8 +71,11 @@ def __init__(self, env_fns, observation_space=None, action_space=None, copy=True
         self.observations = create_empty_array(
             self.single_observation_space, n=self.num_envs, fn=np.zeros
         )
-        self._rewards = np.zeros((self.num_envs,), dtype=np.float64)
-        self._dones = np.zeros((self.num_envs,), dtype=np.bool_)
+        shape = (self.num_envs,)
+        if isinstance(self.envs[0].unwrapped, VectorEnv):
+            shape += (self.envs[0].num_envs,)
+        self._rewards = np.zeros(shape, dtype=np.float64)
+        self._dones = np.zeros(shape, dtype=np.bool_)
         self._actions = None
 
     def seed(self, seed=None):
@@ -138,7 +141,11 @@ def step_wait(self):
             observation, self._rewards[i], self._dones[i], info = env.step(action)
             if self._dones[i]:
                 info["terminal_observation"] = observation
-                observation = env.reset()
+                # Do nothing if the env is a VectorEnv, since it will automatically
+                # reset the envs that are done if needed in the 'step' method and
+                # return the initial observation instead of the final observation.
+                if not isinstance(env, VectorEnv):
+                    observation = env.reset()
             observations.append(observation)
             infos.append(info)
         self.observations = concatenate(
diff --git a/tests/vector/test_vector_env.py b/tests/vector/test_vector_env.py
index 82870d79c29..5cd5a5a9374 100644
--- a/tests/vector/test_vector_env.py
+++ b/tests/vector/test_vector_env.py
@@ -1,7 +1,11 @@
+from functools import partial
+import pytest
 import numpy as np
 import pytest
 
-from gym.spaces import Tuple
+from gym import spaces
+from gym.spaces import Tuple, Box
+
 from gym.vector.async_vector_env import AsyncVectorEnv
 from gym.vector.sync_vector_env import SyncVectorEnv
 from gym.vector.vector_env import VectorEnv
@@ -58,3 +62,81 @@ def test_custom_space_vector_env():
 
     assert isinstance(env.single_action_space, CustomSpace)
     assert isinstance(env.action_space, Tuple)
+
+
+@pytest.mark.parametrize('base_env', ["CubeCrash-v0", "CartPole-v0"])
+@pytest.mark.parametrize('async_inner', [False, True])
+@pytest.mark.parametrize('async_outer', [False, True])
+@pytest.mark.parametrize('inner_envs', [1, 4, 7])
+@pytest.mark.parametrize('outer_envs', [1, 4, 7])
+def test_nesting_vector_envs(base_env: str,
+                             async_inner: bool,
+                             async_outer: bool,
+                             inner_envs: int,
+                             outer_envs: int):
+    inner_vector_wrapper = AsyncVectorEnv if async_inner else SyncVectorEnv
+    # When nesting AsyncVectorEnvs, only the "innermost" envs can have
+    # `daemon=True`, otherwise the "daemonic processes are not allowed to have
+    # children" AssertionError is raised in `multiprocessing.process`.
+    outer_vector_wrapper = (
+        partial(AsyncVectorEnv, daemon=False) if async_outer
+        else SyncVectorEnv
+    )
+    
+    env = outer_vector_wrapper([  # type: ignore
+        partial(inner_vector_wrapper, [
+            make_env(base_env, inner_envs * i + j) for j in range(inner_envs)
+        ]) for i in range(outer_envs)
+    ])
+    
+    # Create a single test environment.
+    with make_env(base_env, 0)() as temp_single_env:
+        single_observation_space = temp_single_env.observation_space
+        single_action_space = temp_single_env.action_space
+
+    assert isinstance(single_observation_space, Box)
+    assert isinstance(env.observation_space, Box)
+    assert env.observation_space.shape == (outer_envs, inner_envs, *single_observation_space.shape)
+    assert env.observation_space.dtype == single_observation_space.dtype
+    
+    assert isinstance(env.action_space, spaces.Tuple)
+    assert len(env.action_space.spaces) == outer_envs
+    assert all(
+        isinstance(outer_action_space, spaces.Tuple) and
+        len(outer_action_space.spaces) == inner_envs
+        for outer_action_space in env.action_space.spaces
+    )
+    assert all([
+        len(inner_action_space.spaces) == inner_envs
+        for inner_action_space in env.action_space.spaces
+    ])
+    assert all([
+        inner_action_space.spaces[i] == single_action_space
+        for inner_action_space in env.action_space.spaces
+        for i in range(inner_envs)
+    ])
+
+    with env:
+        observations = env.reset()
+        assert observations in env.observation_space
+        
+        actions = env.action_space.sample()
+        assert actions in env.action_space
+        
+        observations, rewards, dones, _ = env.step(actions)
+        assert observations in env.observation_space
+
+    assert isinstance(env.observation_space, Box)
+    assert isinstance(observations, np.ndarray)    
+    assert observations.dtype == env.observation_space.dtype
+    assert observations.shape == (outer_envs, inner_envs) + single_observation_space.shape
+
+    assert isinstance(rewards, np.ndarray)
+    assert isinstance(rewards[0], np.ndarray)
+    assert rewards.ndim == 2
+    assert rewards.shape == (outer_envs, inner_envs)
+
+    assert isinstance(dones, np.ndarray)
+    assert dones.dtype == np.bool_
+    assert dones.ndim == 2
+    assert dones.shape == (outer_envs, inner_envs)

From 20aac892e43da7daf765ae01b6ac8e023f221930 Mon Sep 17 00:00:00 2001
From: Fabrice Normandin <fabrice.normandin@gmail.com>
Date: Fri, 20 Nov 2020 15:11:24 -0500
Subject: [PATCH 02/18] Add support for wrapped inner VectorEnvs

Signed-off-by: Fabrice Normandin <fabrice.normandin@gmail.com>
---
 gym/vector/async_vector_env.py | 4 ++--
 gym/vector/sync_vector_env.py  | 6 +++++-
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/gym/vector/async_vector_env.py b/gym/vector/async_vector_env.py
index c4844759047..51484df43fd 100644
--- a/gym/vector/async_vector_env.py
+++ b/gym/vector/async_vector_env.py
@@ -641,7 +641,7 @@ def step_fn(actions):
         # Do nothing if the env is a VectorEnv, since it will automatically
         # reset the envs that are done if needed in the 'step' method and return
         # the initial observation instead of the final observation.
-        if not isinstance(env, VectorEnv) and done:
+        if not isinstance(env.unwrapped, VectorEnv) and done:
             observation = env.reset()
         return observation, reward, done, info
 
@@ -715,7 +715,7 @@ def step_fn(actions):
         # Do nothing if the env is a VectorEnv, since it will automatically
         # reset the envs that are done if needed in the 'step' method and return
         # the initial observation instead of the final observation.
-        if not isinstance(env, VectorEnv) and done:
+        if not isinstance(env.unwrapped, VectorEnv) and done:
             observation = env.reset()
         return observation, reward, done, info
 
diff --git a/gym/vector/sync_vector_env.py b/gym/vector/sync_vector_env.py
index 64c5a3d26b9..db73efa03c9 100644
--- a/gym/vector/sync_vector_env.py
+++ b/gym/vector/sync_vector_env.py
@@ -136,7 +136,7 @@ def step_async(self, actions):
         self._actions = iterate(self.action_space, actions)
 
     def step_wait(self):
-        observations, infos = [], []
+        observations, rewards, dones, infos = [], [], [], []
         for i, (env, action) in enumerate(zip(self.envs, self._actions)):
             observation, self._rewards[i], self._dones[i], info = env.step(action)
             if self._dones[i]:
@@ -147,7 +147,11 @@ def step_wait(self):
                 if not isinstance(env, VectorEnv):
                     observation = env.reset()
             observations.append(observation)
+            rewards.append(reward)
+            dones.append(done)
             infos.append(info)
+        self._rewards = np.stack(rewards)
+        self._dones = np.stack(dones)
         self.observations = concatenate(
             self.single_observation_space, observations, self.observations
         )

From 5f5abcf97cb1cf312a79e01110abec9f4ea590a5 Mon Sep 17 00:00:00 2001
From: Fabrice Normandin <fabrice.normandin@gmail.com>
Date: Tue, 3 Aug 2021 11:15:07 -0400
Subject: [PATCH 03/18] Minor change in test for nesting of VectorEnvs

Signed-off-by: Fabrice Normandin <fabrice.normandin@gmail.com>
---
 tests/vector/test_vector_env.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/vector/test_vector_env.py b/tests/vector/test_vector_env.py
index 5cd5a5a9374..30b135c619f 100644
--- a/tests/vector/test_vector_env.py
+++ b/tests/vector/test_vector_env.py
@@ -85,7 +85,7 @@ def test_nesting_vector_envs(base_env: str,
     
     env = outer_vector_wrapper([  # type: ignore
         partial(inner_vector_wrapper, [
-            make_env(base_env, inner_envs * i + j) for j in range(inner_envs)
+            make_env(base_env, seed=inner_envs * i + j) for j in range(inner_envs)
         ]) for i in range(outer_envs)
     ])
     

From 02af0da753b6d54ba1288f5933d52e5d858a7bb5 Mon Sep 17 00:00:00 2001
From: Fabrice Normandin <fabrice.normandin@gmail.com>
Date: Fri, 24 Sep 2021 16:21:39 -0400
Subject: [PATCH 04/18] Make test a bit clearer

Signed-off-by: Fabrice Normandin <fabrice.normandin@gmail.com>
---
 tests/vector/test_vector_env.py | 132 ++++++++++++++++++++++----------
 1 file changed, 90 insertions(+), 42 deletions(-)

diff --git a/tests/vector/test_vector_env.py b/tests/vector/test_vector_env.py
index 30b135c619f..7650e41935f 100644
--- a/tests/vector/test_vector_env.py
+++ b/tests/vector/test_vector_env.py
@@ -1,4 +1,5 @@
 from functools import partial
+from typing import Callable, Type
 import pytest
 import numpy as np
 import pytest
@@ -64,31 +65,67 @@ def test_custom_space_vector_env():
     assert isinstance(env.action_space, Tuple)
 
 
-@pytest.mark.parametrize('base_env', ["CubeCrash-v0", "CartPole-v0"])
-@pytest.mark.parametrize('async_inner', [False, True])
-@pytest.mark.parametrize('async_outer', [False, True])
-@pytest.mark.parametrize('inner_envs', [1, 4, 7])
-@pytest.mark.parametrize('outer_envs', [1, 4, 7])
-def test_nesting_vector_envs(base_env: str,
-                             async_inner: bool,
-                             async_outer: bool,
-                             inner_envs: int,
-                             outer_envs: int):
-    inner_vector_wrapper = AsyncVectorEnv if async_inner else SyncVectorEnv
-    # When nesting AsyncVectorEnvs, only the "innermost" envs can have
+@pytest.mark.parametrize("base_env", ["CubeCrash-v0", "CartPole-v0"])
+@pytest.mark.parametrize("async_inner", [False, True])
+@pytest.mark.parametrize("async_outer", [False, True])
+@pytest.mark.parametrize("n_inner_envs", [1, 4, 7])
+@pytest.mark.parametrize("n_outer_envs", [1, 4, 7])
+def test_nesting_vector_envs(
+    base_env: str,
+    async_inner: bool,
+    async_outer: bool,
+    n_inner_envs: int,
+    n_outer_envs: int,
+):
+    """Tests nesting of vector envs: Using a VectorEnv of VectorEnvs.
+
+    This can be useful for example when running a large number of environments
+    on a machine with few cores, as worker process of an AsyncVectorEnv can themselves
+    run multiple environments sequentially using a SyncVectorEnv (a.k.a. chunking).
+
+    This test uses creates `n_outer_envs` vectorized environments, each of which has
+    `n_inner_envs` inned environments. If `async_outer` is True, then the outermost
+    wrapper is an `AsyncVectorEnv` and a `SyncVectorEnv` when `async_outer` is False.
+    Same goes for the "inner" environments.
+
+    Parameters
+    ----------
+    - base_env : str
+        The base environment id.
+    - async_inner : bool
+        Wether the inner VectorEnv will be async or not.
+    - async_outer : bool
+        Wether the outer VectorEnv will be async or not.
+    - n_inner_envs : int
+        Number of inner environments.
+    - n_outer_envs : int
+        Number of outer environments.
+    """
+
+    inner_vectorenv_type: Type[VectorEnv] = (
+        AsyncVectorEnv if async_inner else SyncVectorEnv
+    )
+    outer_vectorenv_type: Type[VectorEnv] = (
+        partial(AsyncVectorEnv, daemon=False) if async_outer else SyncVectorEnv
+    )
+    # NOTE: When nesting AsyncVectorEnvs, only the "innermost" envs can have
     # `daemon=True`, otherwise the "daemonic processes are not allowed to have
     # children" AssertionError is raised in `multiprocessing.process`.
-    outer_vector_wrapper = (
-        partial(AsyncVectorEnv, daemon=False) if async_outer
-        else SyncVectorEnv
+
+    # Create the VectorEnv of VectorEnvs
+    env = outer_vectorenv_type(
+        [
+            partial(
+                inner_vectorenv_type,
+                env_fns=[
+                    make_env(base_env, seed=n_inner_envs * i + j)
+                    for j in range(n_inner_envs)
+                ],
+            )
+            for i in range(n_outer_envs)
+        ]
     )
-    
-    env = outer_vector_wrapper([  # type: ignore
-        partial(inner_vector_wrapper, [
-            make_env(base_env, seed=inner_envs * i + j) for j in range(inner_envs)
-        ]) for i in range(outer_envs)
-    ])
-    
+
     # Create a single test environment.
     with make_env(base_env, 0)() as temp_single_env:
         single_observation_space = temp_single_env.observation_space
@@ -96,47 +133,58 @@ def test_nesting_vector_envs(base_env: str,
 
     assert isinstance(single_observation_space, Box)
     assert isinstance(env.observation_space, Box)
-    assert env.observation_space.shape == (outer_envs, inner_envs, *single_observation_space.shape)
+    assert env.observation_space.shape == (
+        n_outer_envs,
+        n_inner_envs,
+        *single_observation_space.shape,
+    )
     assert env.observation_space.dtype == single_observation_space.dtype
-    
+
     assert isinstance(env.action_space, spaces.Tuple)
-    assert len(env.action_space.spaces) == outer_envs
+    assert len(env.action_space.spaces) == n_outer_envs
     assert all(
-        isinstance(outer_action_space, spaces.Tuple) and
-        len(outer_action_space.spaces) == inner_envs
+        isinstance(outer_action_space, spaces.Tuple)
+        and len(outer_action_space.spaces) == n_inner_envs
         for outer_action_space in env.action_space.spaces
     )
-    assert all([
-        len(inner_action_space.spaces) == inner_envs
-        for inner_action_space in env.action_space.spaces
-    ])
-    assert all([
-        inner_action_space.spaces[i] == single_action_space
-        for inner_action_space in env.action_space.spaces
-        for i in range(inner_envs)
-    ])
+    assert all(
+        [
+            len(inner_action_space.spaces) == n_inner_envs
+            for inner_action_space in env.action_space.spaces
+        ]
+    )
+    assert all(
+        [
+            inner_action_space.spaces[i] == single_action_space
+            for inner_action_space in env.action_space.spaces
+            for i in range(n_inner_envs)
+        ]
+    )
 
     with env:
         observations = env.reset()
         assert observations in env.observation_space
-        
+
         actions = env.action_space.sample()
         assert actions in env.action_space
-        
+
         observations, rewards, dones, _ = env.step(actions)
         assert observations in env.observation_space
 
     assert isinstance(env.observation_space, Box)
-    assert isinstance(observations, np.ndarray)    
+    assert isinstance(observations, np.ndarray)
     assert observations.dtype == env.observation_space.dtype
-    assert observations.shape == (outer_envs, inner_envs) + single_observation_space.shape
+    assert (
+        observations.shape
+        == (n_outer_envs, n_inner_envs) + single_observation_space.shape
+    )
 
     assert isinstance(rewards, np.ndarray)
     assert isinstance(rewards[0], np.ndarray)
     assert rewards.ndim == 2
-    assert rewards.shape == (outer_envs, inner_envs)
+    assert rewards.shape == (n_outer_envs, n_inner_envs)
 
     assert isinstance(dones, np.ndarray)
     assert dones.dtype == np.bool_
     assert dones.ndim == 2
-    assert dones.shape == (outer_envs, inner_envs)
+    assert dones.shape == (n_outer_envs, n_inner_envs)

From 846d02678ab2b373088812a96717581b23d46b6d Mon Sep 17 00:00:00 2001
From: Fabrice Normandin <fabrice.normandin@gmail.com>
Date: Mon, 4 Apr 2022 23:21:43 -0400
Subject: [PATCH 05/18] Update tests a bit and fix bugs with bool(array)

Signed-off-by: Fabrice Normandin <fabrice.normandin@gmail.com>
---
 gym/vector/async_vector_env.py  |  5 ++++-
 gym/vector/sync_vector_env.py   | 13 +++++--------
 tests/vector/test_vector_env.py | 30 ++++++++----------------------
 3 files changed, 17 insertions(+), 31 deletions(-)

diff --git a/gym/vector/async_vector_env.py b/gym/vector/async_vector_env.py
index 51484df43fd..60bf87fd12f 100644
--- a/gym/vector/async_vector_env.py
+++ b/gym/vector/async_vector_env.py
@@ -737,7 +737,10 @@ def step_fn(actions):
                     pipe.send((None, True))
             elif command == "step":
                 observation, reward, done, info = env.step(data)
-                if done:
+                if isinstance(env, VectorEnv):
+                    # VectorEnvs take care of resetting the envs that are done.
+                    pass
+                elif done:
                     info["terminal_observation"] = observation
                     observation = env.reset()
                 write_to_shared_memory(
diff --git a/gym/vector/sync_vector_env.py b/gym/vector/sync_vector_env.py
index db73efa03c9..eaf3e24ab7d 100644
--- a/gym/vector/sync_vector_env.py
+++ b/gym/vector/sync_vector_env.py
@@ -139,19 +139,16 @@ def step_wait(self):
         observations, rewards, dones, infos = [], [], [], []
         for i, (env, action) in enumerate(zip(self.envs, self._actions)):
             observation, self._rewards[i], self._dones[i], info = env.step(action)
-            if self._dones[i]:
-                info["terminal_observation"] = observation
+            if isinstance(env, VectorEnv):
                 # Do nothing if the env is a VectorEnv, since it will automatically
                 # reset the envs that are done if needed in the 'step' method and
                 # return the initial observation instead of the final observation.
-                if not isinstance(env, VectorEnv):
-                    observation = env.reset()
+                pass
+            elif self._dones[i]:
+                info["terminal_observation"] = observation
+                observation = env.reset()
             observations.append(observation)
-            rewards.append(reward)
-            dones.append(done)
             infos.append(info)
-        self._rewards = np.stack(rewards)
-        self._dones = np.stack(dones)
         self.observations = concatenate(
             self.single_observation_space, observations, self.observations
         )
diff --git a/tests/vector/test_vector_env.py b/tests/vector/test_vector_env.py
index 7650e41935f..d8ada7ea055 100644
--- a/tests/vector/test_vector_env.py
+++ b/tests/vector/test_vector_env.py
@@ -4,7 +4,7 @@
 import numpy as np
 import pytest
 
-from gym import spaces
+from gym import spaces, Space
 from gym.spaces import Tuple, Box
 
 from gym.vector.async_vector_env import AsyncVectorEnv
@@ -65,7 +65,7 @@ def test_custom_space_vector_env():
     assert isinstance(env.action_space, Tuple)
 
 
-@pytest.mark.parametrize("base_env", ["CubeCrash-v0", "CartPole-v0"])
+@pytest.mark.parametrize("base_env", ["Pendulum-v1", "CartPole-v1"])
 @pytest.mark.parametrize("async_inner", [False, True])
 @pytest.mark.parametrize("async_outer", [False, True])
 @pytest.mark.parametrize("n_inner_envs", [1, 4, 7])
@@ -140,26 +140,12 @@ def test_nesting_vector_envs(
     )
     assert env.observation_space.dtype == single_observation_space.dtype
 
-    assert isinstance(env.action_space, spaces.Tuple)
-    assert len(env.action_space.spaces) == n_outer_envs
-    assert all(
-        isinstance(outer_action_space, spaces.Tuple)
-        and len(outer_action_space.spaces) == n_inner_envs
-        for outer_action_space in env.action_space.spaces
-    )
-    assert all(
-        [
-            len(inner_action_space.spaces) == n_inner_envs
-            for inner_action_space in env.action_space.spaces
-        ]
-    )
-    assert all(
-        [
-            inner_action_space.spaces[i] == single_action_space
-            for inner_action_space in env.action_space.spaces
-            for i in range(n_inner_envs)
-        ]
-    )
+    from gym.vector.utils.spaces import iterate
+
+    def batch_size(space: Space) -> int:
+        return len(list(iterate(space, space.sample())))
+
+    assert batch_size(env.action_space) == n_outer_envs
 
     with env:
         observations = env.reset()

From 332d55bf8771927bb3f17559efa23213bd48a929 Mon Sep 17 00:00:00 2001
From: Fabrice Normandin <fabrice.normandin@gmail.com>
Date: Wed, 20 Apr 2022 23:13:09 -0400
Subject: [PATCH 06/18] Fix pre-commit issues

Signed-off-by: Fabrice Normandin <fabrice.normandin@gmail.com>
---
 tests/vector/test_vector_env.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/tests/vector/test_vector_env.py b/tests/vector/test_vector_env.py
index d8ada7ea055..0653f11a8fe 100644
--- a/tests/vector/test_vector_env.py
+++ b/tests/vector/test_vector_env.py
@@ -1,12 +1,11 @@
 from functools import partial
 from typing import Callable, Type
-import pytest
+
 import numpy as np
 import pytest
 
-from gym import spaces, Space
-from gym.spaces import Tuple, Box
-
+from gym import Space, spaces
+from gym.spaces import Box, Tuple
 from gym.vector.async_vector_env import AsyncVectorEnv
 from gym.vector.sync_vector_env import SyncVectorEnv
 from gym.vector.vector_env import VectorEnv
@@ -93,9 +92,9 @@ def test_nesting_vector_envs(
     - base_env : str
         The base environment id.
     - async_inner : bool
-        Wether the inner VectorEnv will be async or not.
+        Whether the inner VectorEnv will be async or not.
     - async_outer : bool
-        Wether the outer VectorEnv will be async or not.
+        Whether the outer VectorEnv will be async or not.
     - n_inner_envs : int
         Number of inner environments.
     - n_outer_envs : int

From 49ee20904ac3a4a1dba3020d1ebd11076848f376 Mon Sep 17 00:00:00 2001
From: Fabrice Normandin <normandf@mila.quebec>
Date: Tue, 26 Apr 2022 16:59:01 -0400
Subject: [PATCH 07/18] Remove unused imports

Signed-off-by: Fabrice Normandin <normandf@mila.quebec>
---
 tests/vector/test_vector_env.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/vector/test_vector_env.py b/tests/vector/test_vector_env.py
index 0653f11a8fe..e053dff5167 100644
--- a/tests/vector/test_vector_env.py
+++ b/tests/vector/test_vector_env.py
@@ -1,10 +1,10 @@
 from functools import partial
-from typing import Callable, Type
+from typing import Type
 
 import numpy as np
 import pytest
 
-from gym import Space, spaces
+from gym import Space
 from gym.spaces import Box, Tuple
 from gym.vector.async_vector_env import AsyncVectorEnv
 from gym.vector.sync_vector_env import SyncVectorEnv

From f47596fc5f28338b41fb5874836e3155bdbde497 Mon Sep 17 00:00:00 2001
From: Fabrice Normandin <normandf@mila.quebec>
Date: Tue, 26 Apr 2022 17:16:56 -0400
Subject: [PATCH 08/18] Remove with-block from test

Signed-off-by: Fabrice Normandin <normandf@mila.quebec>
---
 tests/vector/test_vector_env.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/tests/vector/test_vector_env.py b/tests/vector/test_vector_env.py
index e053dff5167..bb208a3b6f4 100644
--- a/tests/vector/test_vector_env.py
+++ b/tests/vector/test_vector_env.py
@@ -146,15 +146,14 @@ def batch_size(space: Space) -> int:
 
     assert batch_size(env.action_space) == n_outer_envs
 
-    with env:
-        observations = env.reset()
-        assert observations in env.observation_space
+    observations = env.reset()
+    assert observations in env.observation_space
 
-        actions = env.action_space.sample()
-        assert actions in env.action_space
+    actions = env.action_space.sample()
+    assert actions in env.action_space
 
-        observations, rewards, dones, _ = env.step(actions)
-        assert observations in env.observation_space
+    observations, rewards, dones, _ = env.step(actions)
+    assert observations in env.observation_space
 
     assert isinstance(env.observation_space, Box)
     assert isinstance(observations, np.ndarray)
@@ -173,3 +172,4 @@ def batch_size(space: Space) -> int:
     assert dones.dtype == np.bool_
     assert dones.ndim == 2
     assert dones.shape == (n_outer_envs, n_inner_envs)
+    env.close()

From 5f2e3398f87aafeda7c27aa77bad3094b43dd1cb Mon Sep 17 00:00:00 2001
From: Fabrice Normandin <normandf@mila.quebec>
Date: Tue, 26 Apr 2022 18:22:41 -0400
Subject: [PATCH 09/18] Fix pre-commit flake8 errors

Signed-off-by: Fabrice Normandin <normandf@mila.quebec>
---
 gym/vector/sync_vector_env.py   | 6 ++----
 tests/vector/test_vector_env.py | 1 -
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/gym/vector/sync_vector_env.py b/gym/vector/sync_vector_env.py
index eaf3e24ab7d..e92398d09ff 100644
--- a/gym/vector/sync_vector_env.py
+++ b/gym/vector/sync_vector_env.py
@@ -3,8 +3,6 @@
 
 import numpy as np
 
-from gym import logger
-from gym.logger import warn
 from gym.vector.utils import concatenate, create_empty_array, iterate
 from gym.vector.vector_env import VectorEnv
 
@@ -111,7 +109,7 @@ def reset_wait(
                 kwargs["seed"] = single_seed
             if options is not None:
                 kwargs["options"] = options
-            if return_info == True:
+            if return_info is True:
                 kwargs["return_info"] = return_info
 
             if not return_info:
@@ -136,7 +134,7 @@ def step_async(self, actions):
         self._actions = iterate(self.action_space, actions)
 
     def step_wait(self):
-        observations, rewards, dones, infos = [], [], [], []
+        observations, infos = [], []
         for i, (env, action) in enumerate(zip(self.envs, self._actions)):
             observation, self._rewards[i], self._dones[i], info = env.step(action)
             if isinstance(env, VectorEnv):
diff --git a/tests/vector/test_vector_env.py b/tests/vector/test_vector_env.py
index bb208a3b6f4..fffcf3a7397 100644
--- a/tests/vector/test_vector_env.py
+++ b/tests/vector/test_vector_env.py
@@ -128,7 +128,6 @@ def test_nesting_vector_envs(
     # Create a single test environment.
     with make_env(base_env, 0)() as temp_single_env:
         single_observation_space = temp_single_env.observation_space
-        single_action_space = temp_single_env.action_space
 
     assert isinstance(single_observation_space, Box)
     assert isinstance(env.observation_space, Box)

From fcebd76b000980bac0d261b5bff1d6ecc48645ab Mon Sep 17 00:00:00 2001
From: Fabrice Normandin <normandf@mila.quebec>
Date: Mon, 2 May 2022 13:45:29 -0400
Subject: [PATCH 10/18] Remove unused step_fn in async_vector_env.py

Signed-off-by: Fabrice Normandin <normandf@mila.quebec>
---
 gym/vector/async_vector_env.py | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/gym/vector/async_vector_env.py b/gym/vector/async_vector_env.py
index 60bf87fd12f..c17867d7b98 100644
--- a/gym/vector/async_vector_env.py
+++ b/gym/vector/async_vector_env.py
@@ -710,15 +710,6 @@ def _worker_shared_memory(index, env_fn, pipe, parent_pipe, shared_memory, error
     observation_space = env.observation_space
     parent_pipe.close()
 
-    def step_fn(actions):
-        observation, reward, done, info = env.step(actions)
-        # Do nothing if the env is a VectorEnv, since it will automatically
-        # reset the envs that are done if needed in the 'step' method and return
-        # the initial observation instead of the final observation.
-        if not isinstance(env.unwrapped, VectorEnv) and done:
-            observation = env.reset()
-        return observation, reward, done, info
-
     try:
         while True:
             command, data = pipe.recv()

From 055087b6ae7711f9565f681b2f6befe563f6ba3e Mon Sep 17 00:00:00 2001
From: Fabrice Normandin <normandf@mila.quebec>
Date: Mon, 2 May 2022 13:48:30 -0400
Subject: [PATCH 11/18] Remove if block with pass statement

Signed-off-by: Fabrice Normandin <normandf@mila.quebec>
---
 gym/vector/async_vector_env.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/gym/vector/async_vector_env.py b/gym/vector/async_vector_env.py
index c17867d7b98..84acaf8b3fa 100644
--- a/gym/vector/async_vector_env.py
+++ b/gym/vector/async_vector_env.py
@@ -728,10 +728,8 @@ def _worker_shared_memory(index, env_fn, pipe, parent_pipe, shared_memory, error
                     pipe.send((None, True))
             elif command == "step":
                 observation, reward, done, info = env.step(data)
-                if isinstance(env, VectorEnv):
-                    # VectorEnvs take care of resetting the envs that are done.
-                    pass
-                elif done:
+                # NOTE: VectorEnvs take care of resetting the envs that are done.
+                if not isinstance(env.unwrapped, VectorEnv) and done:
                     info["terminal_observation"] = observation
                     observation = env.reset()
                 write_to_shared_memory(

From 9c0e308cc03aff71ffca12105f9e9f5085451d97 Mon Sep 17 00:00:00 2001
From: Fabrice Normandin <normandf@mila.quebec>
Date: Mon, 2 May 2022 17:17:41 -0400
Subject: [PATCH 12/18] Parametrize with all envs, make test more robust

Signed-off-by: Fabrice Normandin <normandf@mila.quebec>
---
 tests/vector/test_vector_env.py | 250 +++++++++++++++++++++++---------
 1 file changed, 184 insertions(+), 66 deletions(-)

diff --git a/tests/vector/test_vector_env.py b/tests/vector/test_vector_env.py
index fffcf3a7397..7b6e79b7eef 100644
--- a/tests/vector/test_vector_env.py
+++ b/tests/vector/test_vector_env.py
@@ -1,14 +1,20 @@
 from functools import partial
-from typing import Type
+from typing import Callable, Type
 
 import numpy as np
 import pytest
+from numpy.testing import assert_allclose
 
+import gym
 from gym import Space
+from gym.core import ObsType
 from gym.spaces import Box, Tuple
 from gym.vector.async_vector_env import AsyncVectorEnv
 from gym.vector.sync_vector_env import SyncVectorEnv
+from gym.vector.utils.numpy_utils import concatenate
+from gym.vector.utils.spaces import iterate
 from gym.vector.vector_env import VectorEnv
+from gym.wrappers import AutoResetWrapper
 from tests.vector.utils import CustomSpace, make_env
 
 
@@ -64,13 +70,43 @@ def test_custom_space_vector_env():
     assert isinstance(env.action_space, Tuple)
 
 
-@pytest.mark.parametrize("base_env", ["Pendulum-v1", "CartPole-v1"])
+def _batch_size(space: Space) -> int:
+    return len(list(iterate(space, space.sample())))
+
+
+from gym.envs.registration import EnvSpec, registry
+
+
+def _is_local_env_spec(spec: EnvSpec) -> bool:
+    if not isinstance(spec.entry_point, str):
+        return False
+    # If it is one of the envs
+    return any(
+        spec.entry_point.startswith(f"gym.envs.{package}")
+        for package in ["classic_control", "toy_text"]
+    )
+
+
+# Only use 'local' envs for testing
+# NOTE: we can't instantiate envs from Atari when in the gym repository folder.
+local_env_ids = [spec.id for spec in registry.all() if _is_local_env_spec(spec)]
+
+
+def _make_seeded_env(env_id: str, seed: int) -> gym.Env:
+    env = gym.make(env_id)
+    env.seed(seed)
+    env.action_space.seed(seed)
+    env.observation_space.seed(seed)
+    return env
+
+
+@pytest.mark.parametrize("env_id", local_env_ids)
 @pytest.mark.parametrize("async_inner", [False, True])
 @pytest.mark.parametrize("async_outer", [False, True])
 @pytest.mark.parametrize("n_inner_envs", [1, 4, 7])
 @pytest.mark.parametrize("n_outer_envs", [1, 4, 7])
 def test_nesting_vector_envs(
-    base_env: str,
+    env_id: str,
     async_inner: bool,
     async_outer: bool,
     n_inner_envs: int,
@@ -89,8 +125,8 @@ def test_nesting_vector_envs(
 
     Parameters
     ----------
-    - base_env : str
-        The base environment id.
+    - env_id : str
+        ID of a gym environment to use as the base environment.
     - async_inner : bool
         Whether the inner VectorEnv will be async or not.
     - async_outer : bool
@@ -101,74 +137,156 @@ def test_nesting_vector_envs(
         Number of outer environments.
     """
 
-    inner_vectorenv_type: Type[VectorEnv] = (
-        AsyncVectorEnv if async_inner else SyncVectorEnv
-    )
-    outer_vectorenv_type: Type[VectorEnv] = (
-        partial(AsyncVectorEnv, daemon=False) if async_outer else SyncVectorEnv
-    )
     # NOTE: When nesting AsyncVectorEnvs, only the "innermost" envs can have
     # `daemon=True`, otherwise the "daemonic processes are not allowed to have
     # children" AssertionError is raised in `multiprocessing.process`.
-
-    # Create the VectorEnv of VectorEnvs
-    env = outer_vectorenv_type(
-        [
-            partial(
-                inner_vectorenv_type,
-                env_fns=[
-                    make_env(base_env, seed=n_inner_envs * i + j)
-                    for j in range(n_inner_envs)
-                ],
-            )
-            for i in range(n_outer_envs)
-        ]
-    )
-
-    # Create a single test environment.
-    with make_env(base_env, 0)() as temp_single_env:
-        single_observation_space = temp_single_env.observation_space
-
-    assert isinstance(single_observation_space, Box)
-    assert isinstance(env.observation_space, Box)
-    assert env.observation_space.shape == (
-        n_outer_envs,
-        n_inner_envs,
-        *single_observation_space.shape,
+    inner_vectorenv_type = AsyncVectorEnv if async_inner else SyncVectorEnv
+    outer_vectorenv_type = (
+        partial(AsyncVectorEnv, daemon=False) if async_outer else SyncVectorEnv
     )
-    assert env.observation_space.dtype == single_observation_space.dtype
-
-    from gym.vector.utils.spaces import iterate
-
-    def batch_size(space: Space) -> int:
-        return len(list(iterate(space, space.sample())))
 
-    assert batch_size(env.action_space) == n_outer_envs
+    base_seed = 123
 
-    observations = env.reset()
-    assert observations in env.observation_space
-
-    actions = env.action_space.sample()
-    assert actions in env.action_space
-
-    observations, rewards, dones, _ = env.step(actions)
-    assert observations in env.observation_space
-
-    assert isinstance(env.observation_space, Box)
-    assert isinstance(observations, np.ndarray)
-    assert observations.dtype == env.observation_space.dtype
-    assert (
-        observations.shape
-        == (n_outer_envs, n_inner_envs) + single_observation_space.shape
+    # Create the functions for the envs at each index (i, j)
+    env_fns_grid = [
+        [
+            partial(_make_seeded_env, env_id, seed=base_seed)  # + n_inner_envs * i + j)
+            for j in range(n_inner_envs)
+        ]
+        for i in range(n_outer_envs)
+    ]
+
+    outer_env_fns = [
+        partial(
+            inner_vectorenv_type,
+            env_fns=inner_env_fns,
+        )
+        for inner_env_fns in env_fns_grid
+    ]
+
+    env = outer_vectorenv_type(env_fns=outer_env_fns)
+
+    # IDEA: Note the initial obs, action, next_obs, reward, done, info in all these envs, and then
+    # compare with those of the vectorenv.
+
+    base_obs: list[list] = np.zeros([n_outer_envs, n_inner_envs]).tolist()
+    base_act: list[list] = np.zeros([n_outer_envs, n_inner_envs]).tolist()
+    base_next_obs: list[list] = np.zeros([n_outer_envs, n_inner_envs]).tolist()
+    base_reward = np.zeros(shape=(n_outer_envs, n_inner_envs), dtype=float)
+    base_done = np.zeros(shape=(n_outer_envs, n_inner_envs), dtype=bool)
+    base_info: list[list[dict]] = np.zeros([n_outer_envs, n_inner_envs]).tolist()
+
+    # Create an env temporarily to get the observation and action spaces.
+    with env_fns_grid[0][0]() as temp_env:
+        base_observation_space = temp_env.observation_space
+        base_action_space = temp_env.action_space
+
+    # Go through each index (i, j) and create the env with the seed at that index, getting the
+    # initial state, action, next_obs, reward, done, info, etc.
+    # This will then be compared with the states produced by the VectorEnv equivalent.
+
+    for i in range(n_outer_envs):
+        for j in range(n_inner_envs):
+            # Create a properly seeded environment. Then, reset, and step once.
+            with env_fns_grid[i][j]() as temp_env:
+
+                # Add the AutoResetWrapper to the individual environments to replicate what will
+                # happen in the VectorEnv. (See the note below).
+                temp_env = AutoResetWrapper(temp_env)
+
+                assert temp_env.observation_space == base_observation_space
+                assert temp_env.action_space == base_action_space
+
+                # NOTE: This will change a bit once the AutoResetWrapper is used in the VectorEnvs.
+                base_obs[i][j], base_info[i][j] = temp_env.reset(return_info=True)
+                base_act[i][j] = base_action_space.sample()
+                (
+                    base_next_obs[i][j],
+                    base_reward[i][j],
+                    base_done[i][j],
+                    base_info[i][j],
+                ) = temp_env.step(base_act[i][j])
+
+    obs = env.reset()
+
+    # NOTE: creating these values so they aren't possibly unbound below and type hinters can relax.
+    i = -1
+    j = -1
+
+    for i, obs_i in enumerate(iterate(env.observation_space, obs)):
+        for j, obs_ij in enumerate(iterate(env.single_observation_space, obs_i)):
+            assert obs_ij in base_observation_space
+            # Assert that each observation is what we'd expect (following the single env.)
+            assert_allclose(obs_ij, base_obs[i][j])
+
+        assert j == n_inner_envs - 1
+    assert i == n_outer_envs - 1
+
+    # NOTE: Sampling an action using env.action_space.sample() would give a different value than
+    # if we sampled actions from each env individually and batched them.
+    # In order to check that everything is working correctly, we'll instead create the action by
+    # concatenating the individual actions, and pass it to the vectorenv, to check if that will
+    # recreate the same result for all individual envs.
+    # _ = env.action_space.sample()
+    action = concatenate(
+        env.single_action_space,
+        [
+            concatenate(base_action_space, base_act[i], out=None)
+            for i in range(n_outer_envs)
+        ],
+        out=None,
     )
 
-    assert isinstance(rewards, np.ndarray)
-    assert isinstance(rewards[0], np.ndarray)
-    assert rewards.ndim == 2
-    assert rewards.shape == (n_outer_envs, n_inner_envs)
+    for i, action_i in enumerate(iterate(env.action_space, action)):
+        for j, action_ij in enumerate(iterate(env.single_action_space, action_i)):
+            assert action_ij in base_action_space
+            # Assert that each observation is what we'd expect (following the single env.)
+            # assert_allclose(act_ij, base_act)
+        assert j == n_inner_envs - 1
+    assert i == n_outer_envs - 1
+
+    # Perform a single step:
+
+    next_obs, reward, done, info = env.step(action)
+
+    for i, next_obs_i in enumerate(iterate(env.observation_space, next_obs)):
+        for j, next_obs_ij in enumerate(
+            iterate(env.single_observation_space, next_obs_i)
+        ):
+            assert next_obs_ij in base_observation_space
+            # Assert that each next observation is what we'd expect (following the single env.)
+            assert_allclose(next_obs_ij, base_next_obs[i][j])
+
+    for i, rew_i in enumerate(reward):
+        for j, rew_ij in enumerate(rew_i):
+            # Assert that each reward is what we'd expect (following the single env.)
+            assert_allclose(rew_ij, base_reward[i][j])
+        assert j == n_inner_envs - 1
+    assert i == n_outer_envs - 1
+
+    for i, done_i in enumerate(done):
+        for j, done_ij in enumerate(done_i):
+            assert done_ij == base_done[i][j]
+        assert j == n_inner_envs - 1
+    assert i == n_outer_envs - 1
+
+    for i, info_i in enumerate(info):
+        for j, info_ij in enumerate(info_i):
+            # NOTE: Since the VectorEnvs don't apply an AutoResetWrapper to the individual envs,
+            # the autoreset logic is in the 'worker' code, and this code doesn't add the
+            # 'terminal_info' entry in the 'info' dictionary.
+            # NOTE: This test-case is forward-compatible in case the VectorEnvs do end up adding
+            # the 'terminal_info' entry in the 'info' dictionary.
+            expected_info = base_info[i][j].copy()
+            if (
+                info_ij != base_info[i][j]
+                and ("terminal_info" in expected_info)
+                and ("terminal_info" not in info_ij)
+            ):
+                # Remove the 'terminal_info' key from the expected info dict and compare as before.
+                expected_info.pop("terminal_info")
+            assert info_ij == expected_info
+        assert j == n_inner_envs - 1
+    assert i == n_outer_envs - 1
 
-    assert isinstance(dones, np.ndarray)
-    assert dones.dtype == np.bool_
-    assert dones.ndim == 2
-    assert dones.shape == (n_outer_envs, n_inner_envs)
     env.close()

From c9f4d5059b6020a7d99fee198b3a85fa827416e7 Mon Sep 17 00:00:00 2001
From: Fabrice Normandin <normandf@mila.quebec>
Date: Mon, 2 May 2022 18:02:07 -0400
Subject: [PATCH 13/18] Fix warnings in test

Signed-off-by: Fabrice Normandin <normandf@mila.quebec>
---
 tests/vector/test_vector_env.py | 44 +++++++++++++++++----------------
 1 file changed, 23 insertions(+), 21 deletions(-)

diff --git a/tests/vector/test_vector_env.py b/tests/vector/test_vector_env.py
index 7b6e79b7eef..25e944bf4bc 100644
--- a/tests/vector/test_vector_env.py
+++ b/tests/vector/test_vector_env.py
@@ -1,14 +1,13 @@
+import warnings
 from functools import partial
-from typing import Callable, Type
 
 import numpy as np
 import pytest
 from numpy.testing import assert_allclose
 
 import gym
-from gym import Space
-from gym.core import ObsType
-from gym.spaces import Box, Tuple
+from gym.envs.registration import EnvSpec, registry
+from gym.spaces import Tuple
 from gym.vector.async_vector_env import AsyncVectorEnv
 from gym.vector.sync_vector_env import SyncVectorEnv
 from gym.vector.utils.numpy_utils import concatenate
@@ -70,17 +69,10 @@ def test_custom_space_vector_env():
     assert isinstance(env.action_space, Tuple)
 
 
-def _batch_size(space: Space) -> int:
-    return len(list(iterate(space, space.sample())))
-
-
-from gym.envs.registration import EnvSpec, registry
-
-
 def _is_local_env_spec(spec: EnvSpec) -> bool:
     if not isinstance(spec.entry_point, str):
         return False
-    # If it is one of the envs
+    # If it is one of the deprecated envs, ignore the warning.
     return any(
         spec.entry_point.startswith(f"gym.envs.{package}")
         for package in ["classic_control", "toy_text"]
@@ -93,11 +85,14 @@ def _is_local_env_spec(spec: EnvSpec) -> bool:
 
 
 def _make_seeded_env(env_id: str, seed: int) -> gym.Env:
-    env = gym.make(env_id)
-    env.seed(seed)
-    env.action_space.seed(seed)
-    env.observation_space.seed(seed)
-    return env
+    # Ignore any depcrecated environment warnings, since we will always need to test those.
+    with warnings.catch_warnings():
+        warnings.simplefilter("ignore", category=UserWarning)
+        env = gym.make(env_id)
+        # env.seed(seed)
+        env.action_space.seed(seed)
+        env.observation_space.seed(seed)
+        return env
 
 
 @pytest.mark.parametrize("env_id", local_env_ids)
@@ -148,9 +143,14 @@ def test_nesting_vector_envs(
     base_seed = 123
 
     # Create the functions for the envs at each index (i, j)
+    seeds = [
+        [base_seed + i * n_inner_envs + j for j in range(n_inner_envs)]
+        for i in range(n_outer_envs)
+    ]
+
     env_fns_grid = [
         [
-            partial(_make_seeded_env, env_id, seed=base_seed)  # + n_inner_envs * i + j)
+            partial(_make_seeded_env, env_id, seed=seeds[i][j])
             for j in range(n_inner_envs)
         ]
         for i in range(n_outer_envs)
@@ -166,7 +166,7 @@ def test_nesting_vector_envs(
 
     env = outer_vectorenv_type(env_fns=outer_env_fns)
 
-    # IDEA: Note the initial obs, action, next_obs, reward, done, info in all these envs, and then
+    # Note the initial obs, action, next_obs, reward, done, info in all these envs, and then
     # compare with those of the vectorenv.
 
     base_obs: list[list] = np.zeros([n_outer_envs, n_inner_envs]).tolist()
@@ -198,7 +198,9 @@ def test_nesting_vector_envs(
                 assert temp_env.action_space == base_action_space
 
                 # NOTE: This will change a bit once the AutoResetWrapper is used in the VectorEnvs.
-                base_obs[i][j], base_info[i][j] = temp_env.reset(return_info=True)
+                base_obs[i][j], base_info[i][j] = temp_env.reset(
+                    seed=seeds[i][j], return_info=True
+                )
                 base_act[i][j] = base_action_space.sample()
                 (
                     base_next_obs[i][j],
@@ -207,7 +209,7 @@ def test_nesting_vector_envs(
                     base_info[i][j],
                 ) = temp_env.step(base_act[i][j])
 
-    obs = env.reset()
+    obs = env.reset(seed=seeds)
 
     # NOTE: creating these values so they aren't possibly unbound below and type hinters can relax.
     i = -1

From 15208e3c67c89df95792e2291bd9b62107e838a1 Mon Sep 17 00:00:00 2001
From: Fabrice Normandin <normandf@mila.quebec>
Date: Mon, 2 May 2022 20:21:14 -0400
Subject: [PATCH 14/18] Remove other unused step_fn

Signed-off-by: Fabrice Normandin <normandf@mila.quebec>
---
 gym/vector/async_vector_env.py | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/gym/vector/async_vector_env.py b/gym/vector/async_vector_env.py
index 84acaf8b3fa..7e3088d01bd 100644
--- a/gym/vector/async_vector_env.py
+++ b/gym/vector/async_vector_env.py
@@ -636,15 +636,6 @@ def _worker(index, env_fn, pipe, parent_pipe, shared_memory, error_queue):
     env = env_fn()
     parent_pipe.close()
 
-    def step_fn(actions):
-        observation, reward, done, info = env.step(actions)
-        # Do nothing if the env is a VectorEnv, since it will automatically
-        # reset the envs that are done if needed in the 'step' method and return
-        # the initial observation instead of the final observation.
-        if not isinstance(env.unwrapped, VectorEnv) and done:
-            observation = env.reset()
-        return observation, reward, done, info
-
     try:
         while True:
             command, data = pipe.recv()

From c9cfe2e757540cf8dde9aa8508c8452ac8fde295 Mon Sep 17 00:00:00 2001
From: Fabrice Normandin <normandf@mila.quebec>
Date: Mon, 2 May 2022 20:28:36 -0400
Subject: [PATCH 15/18] Reuse should_skip_env_for_test logic

Signed-off-by: Fabrice Normandin <normandf@mila.quebec>
---
 tests/vector/test_vector_env.py | 20 +++++++-------------
 1 file changed, 7 insertions(+), 13 deletions(-)

diff --git a/tests/vector/test_vector_env.py b/tests/vector/test_vector_env.py
index 25e944bf4bc..17760cd69d4 100644
--- a/tests/vector/test_vector_env.py
+++ b/tests/vector/test_vector_env.py
@@ -14,6 +14,7 @@
 from gym.vector.utils.spaces import iterate
 from gym.vector.vector_env import VectorEnv
 from gym.wrappers import AutoResetWrapper
+from tests.envs.spec_list import should_skip_env_spec_for_tests
 from tests.vector.utils import CustomSpace, make_env
 
 
@@ -69,19 +70,12 @@ def test_custom_space_vector_env():
     assert isinstance(env.action_space, Tuple)
 
 
-def _is_local_env_spec(spec: EnvSpec) -> bool:
-    if not isinstance(spec.entry_point, str):
-        return False
-    # If it is one of the deprecated envs, ignore the warning.
-    return any(
-        spec.entry_point.startswith(f"gym.envs.{package}")
-        for package in ["classic_control", "toy_text"]
-    )
-
-
-# Only use 'local' envs for testing
-# NOTE: we can't instantiate envs from Atari when in the gym repository folder.
-local_env_ids = [spec.id for spec in registry.all() if _is_local_env_spec(spec)]
+# Only use 'local' envs for testing.
+# NOTE: this won't work if the atari dependencies are installed, as we can't gym.make() them when
+# inside the git repo folder.
+local_env_ids = [
+    spec.id for spec in registry.all() if not should_skip_env_spec_for_tests(spec)
+]
 
 
 def _make_seeded_env(env_id: str, seed: int) -> gym.Env:

From b08c2b46b43e7ea34ddc22d94061f2c8ca0c0999 Mon Sep 17 00:00:00 2001
From: Fabrice Normandin <normandf@mila.quebec>
Date: Tue, 3 May 2022 16:38:52 -0400
Subject: [PATCH 16/18] Minor touch-ups

Signed-off-by: Fabrice Normandin <normandf@mila.quebec>
---
 tests/vector/test_vector_env.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/vector/test_vector_env.py b/tests/vector/test_vector_env.py
index 17760cd69d4..626d0de484f 100644
--- a/tests/vector/test_vector_env.py
+++ b/tests/vector/test_vector_env.py
@@ -153,9 +153,9 @@ def test_nesting_vector_envs(
     outer_env_fns = [
         partial(
             inner_vectorenv_type,
-            env_fns=inner_env_fns,
+            env_fns=env_fns_grid[i],
         )
-        for inner_env_fns in env_fns_grid
+        for i in range(n_outer_envs)
     ]
 
     env = outer_vectorenv_type(env_fns=outer_env_fns)

From 5791789f4a50a8bd31c491b00db9b5e4e1d9dcae Mon Sep 17 00:00:00 2001
From: Fabrice Normandin <normandf@mila.quebec>
Date: Wed, 18 May 2022 17:07:48 -0400
Subject: [PATCH 17/18] Fix typos and type errors in test util function

Signed-off-by: Fabrice Normandin <normandf@mila.quebec>
---
 tests/envs/spec_list.py         | 9 ++++++++-
 tests/vector/test_vector_env.py | 2 +-
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/tests/envs/spec_list.py b/tests/envs/spec_list.py
index 11c816f6bd1..5ed9221d987 100644
--- a/tests/envs/spec_list.py
+++ b/tests/envs/spec_list.py
@@ -1,6 +1,7 @@
 import os
 
 from gym import envs, logger
+from gym.envs.registration import EnvSpec
 
 SKIP_MUJOCO_WARNING_MESSAGE = (
     "Cannot run mujoco test (either license key not found or mujoco not"
@@ -16,10 +17,16 @@
         skip_mujoco = True
 
 
-def should_skip_env_spec_for_tests(spec):
+def should_skip_env_spec_for_tests(spec: EnvSpec) -> bool:
     # We skip tests for envs that require dependencies or are otherwise
     # troublesome to run frequently
     ep = spec.entry_point
+
+    if not isinstance(ep, str):
+        # Skip entry points that aren't strings.
+        # (Also avoids type checking errors below)
+        return False
+
     # Skip mujoco tests for pull request CI
     if skip_mujoco and ep.startswith("gym.envs.mujoco"):
         return True
diff --git a/tests/vector/test_vector_env.py b/tests/vector/test_vector_env.py
index 626d0de484f..a29fd3a0661 100644
--- a/tests/vector/test_vector_env.py
+++ b/tests/vector/test_vector_env.py
@@ -79,7 +79,7 @@ def test_custom_space_vector_env():
 
 
 def _make_seeded_env(env_id: str, seed: int) -> gym.Env:
-    # Ignore any depcrecated environment warnings, since we will always need to test those.
+    # Ignore any deprecated environment warnings, since we will always need to test those.
     with warnings.catch_warnings():
         warnings.simplefilter("ignore", category=UserWarning)
         env = gym.make(env_id)

From c62ccebcfc2d9d8870ad263dbe1461ee8f194516 Mon Sep 17 00:00:00 2001
From: Fabrice Normandin <normandf@mila.quebec>
Date: Wed, 18 May 2022 17:18:54 -0400
Subject: [PATCH 18/18] Fix error in test, make quicker, fix pre-commit

Signed-off-by: Fabrice Normandin <normandf@mila.quebec>
---
 tests/vector/test_vector_env.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/tests/vector/test_vector_env.py b/tests/vector/test_vector_env.py
index a29fd3a0661..f08eef62281 100644
--- a/tests/vector/test_vector_env.py
+++ b/tests/vector/test_vector_env.py
@@ -6,7 +6,7 @@
 from numpy.testing import assert_allclose
 
 import gym
-from gym.envs.registration import EnvSpec, registry
+from gym.envs.registration import registry
 from gym.spaces import Tuple
 from gym.vector.async_vector_env import AsyncVectorEnv
 from gym.vector.sync_vector_env import SyncVectorEnv
@@ -74,7 +74,7 @@ def test_custom_space_vector_env():
 # NOTE: this won't work if the atari dependencies are installed, as we can't gym.make() them when
 # inside the git repo folder.
 local_env_ids = [
-    spec.id for spec in registry.all() if not should_skip_env_spec_for_tests(spec)
+    spec.id for spec in registry.values() if not should_skip_env_spec_for_tests(spec)
 ]
 
 
@@ -92,8 +92,8 @@ def _make_seeded_env(env_id: str, seed: int) -> gym.Env:
 @pytest.mark.parametrize("env_id", local_env_ids)
 @pytest.mark.parametrize("async_inner", [False, True])
 @pytest.mark.parametrize("async_outer", [False, True])
-@pytest.mark.parametrize("n_inner_envs", [1, 4, 7])
-@pytest.mark.parametrize("n_outer_envs", [1, 4, 7])
+@pytest.mark.parametrize("n_inner_envs", [1, 2, 3])
+@pytest.mark.parametrize("n_outer_envs", [1, 2, 3])
 def test_nesting_vector_envs(
     env_id: str,
     async_inner: bool,
@@ -236,8 +236,8 @@ def test_nesting_vector_envs(
     for i, action_i in enumerate(iterate(env.action_space, action)):
         for j, action_ij in enumerate(iterate(env.single_action_space, action_i)):
             assert action_ij in base_action_space
-            # Assert that each observation is what we'd expect (following the single env.)
-            # assert_allclose(act_ij, base_act)
+            # Assert that each action is what we'd expect (following the single env.)
+            assert_allclose(action_ij, base_act[i][j])
         assert j == n_inner_envs - 1
     assert i == n_outer_envs - 1