openai · lebrice · Oct 18, 2020 · Oct 18, 2020 · Nov 2, 2020 · Nov 2, 2020
diff --git a/gym/vector/__init__.py b/gym/vector/__init__.py
@@ -3,11 +3,19 @@
 except ImportError:
     Iterable = (tuple, list)
 
+from gym.vector.batched_vector_env import BatchedVectorEnv
 from gym.vector.async_vector_env import AsyncVectorEnv
 from gym.vector.sync_vector_env import SyncVectorEnv
 from gym.vector.vector_env import VectorEnv, VectorEnvWrapper
 
-__all__ = ["AsyncVectorEnv", "SyncVectorEnv", "VectorEnv", "VectorEnvWrapper", "make"]
+__all__ = [
+    "BatchedVectorEnv",
+    "AsyncVectorEnv",
+    "SyncVectorEnv",
+    "VectorEnv",
+    "VectorEnvWrapper",
+    "make",
+]
 
 
 def make(id, num_envs=1, asynchronous=True, wrappers=None, **kwargs):

diff --git a/gym/vector/async_vector_env.py b/gym/vector/async_vector_env.py
@@ -403,6 +403,16 @@ def _worker(index, env_fn, pipe, parent_pipe, shared_memory, error_queue):
     assert shared_memory is None
     env = env_fn()
     parent_pipe.close()
+
+    def step_fn(actions):
+        observation, reward, done, info = env.step(actions)
+        # Do nothing if the env is a VectorEnv, since it will automatically
+        # reset the envs that are done if needed in the 'step' method and return
+        # the initial observation instead of the final observation.
+        if not isinstance(env, VectorEnv) and done:
+            observation = env.reset()
+        return observation, reward, done, info
+
     try:
         while True:
             command, data = pipe.recv()
@@ -440,6 +450,16 @@ def _worker_shared_memory(index, env_fn, pipe, parent_pipe, shared_memory, error
     env = env_fn()
     observation_space = env.observation_space
     parent_pipe.close()
+
+    def step_fn(actions):
+        observation, reward, done, info = env.step(actions)
+        # Do nothing if the env is a VectorEnv, since it will automatically
+        # reset the envs that are done if needed in the 'step' method and return
+        # the initial observation instead of the final observation.
+        if not isinstance(env, VectorEnv) and done:
+            observation = env.reset()
+        return observation, reward, done, info
+
     try:
         while True:
             command, data = pipe.recv()
@@ -451,7 +471,8 @@ def _worker_shared_memory(index, env_fn, pipe, parent_pipe, shared_memory, error
                 pipe.send((None, True))
             elif command == "step":
                 observation, reward, done, info = env.step(data)
-                if done:
+                # BUG: See PR #2104: Currently unable to nest VectorEnvs because of this
+                if (done if isinstance(done, bool) else all(done)):
                     observation = env.reset()
                 write_to_shared_memory(
                     index, observation, shared_memory, observation_space