From b4fc19d9919227c76a95a68b5d4e6106e95fa416 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= <adria@far.ai>
Date: Thu, 4 Jan 2024 22:23:49 -0800
Subject: [PATCH 01/60] Development dockerfile

---
 docker/dev.dockerfile | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/docker/dev.dockerfile b/docker/dev.dockerfile
index 7226c879..3fd936ee 100644
--- a/docker/dev.dockerfile
+++ b/docker/dev.dockerfile
@@ -1,13 +1,13 @@
 # Need docker >= 20.10.9, see https://stackoverflow.com/questions/71941032/why-i-cannot-run-apt-update-inside-a-fresh-ubuntu22-04
 
-FROM nvidia/cuda:12.2.2-cudnn8-devel-ubuntu22.04
+FROM nvidia/cuda:12.1.0-cudnn8-devel-ubuntu22.04
 
 ARG DEBIAN_FRONTEND=noninteractive
 ARG HOME=/root
 ARG PATH=$PATH:$HOME/go/bin
 
 RUN apt-get update \
-    && apt-get install -y python3-pip python3-dev golang-1.18 git wget curl zsh tmux vim \
+    && apt-get install -y python3-pip python3-dev golang-1.18 git wget curl zsh tmux vim ssh \
     && rm -rf /var/lib/apt/lists/*
 RUN ln -s /usr/bin/python3 /usr/bin/python
 RUN ln -sf /usr/lib/go-1.18/bin/go /usr/bin/go
@@ -20,6 +20,8 @@ RUN echo "set-option -g default-shell /bin/zsh" >> .tmux.conf.local
 RUN echo "set-option -g history-limit 10000" >> .tmux.conf.local
 RUN echo "export PATH=$PATH:$HOME/go/bin" >> .zshrc
 
+ENV USE_BAZEL_VERSION=6.4.0
+
 RUN go install github.com/bazelbuild/bazelisk@latest && ln -sf $HOME/go/bin/bazelisk $HOME/go/bin/bazel
 RUN go install github.com/bazelbuild/buildtools/buildifier@latest
 RUN $HOME/go/bin/bazel version
@@ -31,3 +33,6 @@ RUN apt-get update \
     && rm -rf /var/lib/apt/lists/*
 
 WORKDIR /app
+COPY . .
+
+RUN make bazel-build

From 954d7fbab260b56ee4d6a351353fb131f7465339 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= <adria@far.ai>
Date: Fri, 5 Jan 2024 14:20:28 -0800
Subject: [PATCH 02/60] Dummy Sokoban environment

---
 envpool/entry.py                           |  2 +
 envpool/sokoban/BUILD                      | 53 ++++++++++++++
 envpool/sokoban/__init__.py                | 17 +++++
 envpool/sokoban/registration.py            | 12 +++
 envpool/sokoban/sokoban_envpool.cc         | 12 +++
 envpool/sokoban/sokoban_envpool.h          | 81 +++++++++++++++++++++
 envpool/sokoban/sokoban_py_envpool_test.py | 85 ++++++++++++++++++++++
 7 files changed, 262 insertions(+)
 create mode 100644 envpool/sokoban/BUILD
 create mode 100644 envpool/sokoban/__init__.py
 create mode 100644 envpool/sokoban/registration.py
 create mode 100644 envpool/sokoban/sokoban_envpool.cc
 create mode 100644 envpool/sokoban/sokoban_envpool.h
 create mode 100644 envpool/sokoban/sokoban_py_envpool_test.py

diff --git a/envpool/entry.py b/envpool/entry.py
index eed70a29..18a881a5 100644
--- a/envpool/entry.py
+++ b/envpool/entry.py
@@ -52,3 +52,5 @@
   import envpool.vizdoom.registration  # noqa: F401
 except ImportError:
   pass
+
+import envpool.sokoban.registration  # noqa: F401
diff --git a/envpool/sokoban/BUILD b/envpool/sokoban/BUILD
new file mode 100644
index 00000000..f1425d59
--- /dev/null
+++ b/envpool/sokoban/BUILD
@@ -0,0 +1,53 @@
+load("@pip_requirements//:requirements.bzl", "requirement")
+load("@pybind11_bazel//:build_defs.bzl", "pybind_extension")
+
+py_library(
+    name = "sokoban",
+    srcs = ["__init__.py"],
+    data = [":sokoban_envpool.so"],
+    deps = ["//envpool/python:api"],
+)
+
+cc_library(
+    name = "sokoban_envpool_h",
+    hdrs = ["sokoban_envpool.h"],
+    deps = [
+        "//envpool/core:async_envpool",
+        "//envpool/core:env",
+        "//envpool/core:env_spec",
+    ],
+)
+
+# cc_test(
+#    name = "sokoban_envpool_test",
+#    size = "enormous",
+#    srcs = ["sokoban_envpool_test.cc"],
+#    deps = [
+#        ":sokoban_envpool_h",
+#        "@com_google_googletest//:gtest_main",
+#    ],
+# )
+
+py_test(
+    name = "sokoban_py_envpool_test",
+    srcs = ["sokoban_py_envpool_test.py"],
+    deps = [
+        ":sokoban",
+        requirement("numpy"),
+        requirement("absl-py"),
+    ],
+)
+
+pybind_extension(
+    name = "sokoban_envpool",
+    srcs = [
+        "sokoban_envpool.cc",
+    ],
+    linkopts = [
+        "-ldl",
+    ],
+    deps = [
+        ":sokoban_envpool_h",
+        "//envpool/core:py_envpool",
+    ],
+)
diff --git a/envpool/sokoban/__init__.py b/envpool/sokoban/__init__.py
new file mode 100644
index 00000000..0e785494
--- /dev/null
+++ b/envpool/sokoban/__init__.py
@@ -0,0 +1,17 @@
+from envpool.python.api import py_env
+
+from .sokoban_envpool import _SokobanEnvPool, _SokobanEnvSpec
+
+(
+    SokobanEnvSpec,
+    SokobanDMEnvPool,
+    SokobanGymEnvPool,
+    SokobanGymnasiumEnvPool,
+) = py_env(_SokobanEnvSpec, _SokobanEnvPool)
+
+__all__ = [
+    "SokobanEnvSpec",
+    "SokobanDMEnvPool",
+    "SokobanGymEnvPool",
+    "SokobanGymnasiumEnvPool",
+]
diff --git a/envpool/sokoban/registration.py b/envpool/sokoban/registration.py
new file mode 100644
index 00000000..490b1a34
--- /dev/null
+++ b/envpool/sokoban/registration.py
@@ -0,0 +1,12 @@
+from envpool.registration import register
+
+register(
+    task_id="Sokoban-v0",
+    import_path="envpool.sokoban",
+    spec_cls="SokobanEnvSpec",
+    dm_cls="SokobanDMEnvPool",
+    gym_cls="SokobanGymEnvPool",
+    gymnasium_cls="SokobanGymnasiumEnvPool",
+    max_episode_steps=60,
+    reward_step=-0.1,
+)
diff --git a/envpool/sokoban/sokoban_envpool.cc b/envpool/sokoban/sokoban_envpool.cc
new file mode 100644
index 00000000..07b433ee
--- /dev/null
+++ b/envpool/sokoban/sokoban_envpool.cc
@@ -0,0 +1,12 @@
+#include "envpool/sokoban/sokoban_envpool.h"
+#include "envpool/core/py_envpool.h"
+
+// generate python-side (raw) SokobanEnvSpec
+using SokobanEnvSpec = PyEnvSpec<sokoban::SokobanEnvSpec>;
+// generate python-side (raw) SokobanEnvPool
+using SokobanEnvPool = PyEnvPool<sokoban::SokobanEnvPool>;
+
+// generate sokoban_envpool.so
+PYBIND11_MODULE(sokoban_envpool, m) {
+  REGISTER(m, SokobanEnvSpec, SokobanEnvPool)
+}
diff --git a/envpool/sokoban/sokoban_envpool.h b/envpool/sokoban/sokoban_envpool.h
new file mode 100644
index 00000000..46d33945
--- /dev/null
+++ b/envpool/sokoban/sokoban_envpool.h
@@ -0,0 +1,81 @@
+#ifndef ENVPOOL_SOKOBAN_H_
+#define ENVPOOL_SOKOBAN_H_
+
+#include "envpool/core/async_envpool.h"
+#include "envpool/core/env.h"
+
+namespace sokoban {
+
+// class BaseSokobanEnvConfig(EnvConfig):
+//     tinyworld_obs: bool = False
+//     tinyworld_render: bool = False
+//     max_episode_steps: int = 120  # default value from gym_sokoban
+//     terminate_on_first_box: bool = False
+
+//     reward_finished: float = 10.0  # Reward for completing a level
+//     reward_box: float = 1.0  # Reward for putting a box on target
+//     reward_step: float = -0.1  # Reward for completing a step
+//
+// class BoxobanConfig(BaseSokobanEnvConfig):
+
+    // cache_path: Path = Path(__file__).parent.parent / ".sokoban_cache"
+    // split: Literal["train", "valid", "test", None] = "train"
+    // difficulty: Literal["unfiltered", "medium", "hard"] = "unfiltered"
+
+class SokobanEnvFns {
+ public:
+  static decltype(auto) DefaultConfig() {
+    return MakeDict("reward_finished"_.Bind(10.0f),
+                    "reward_box"_.Bind(1.0f),
+                    "reward_step"_.Bind(-0.1f),
+                    "dim_room"_.Bind(10),
+                    "levels_dir"_.Bind(std::string("None")));
+  }
+  template <typename Config>
+  static decltype(auto) StateSpec(const Config& conf) {
+    int dim_room = conf["dim_room"_];
+    return MakeDict("obs"_.Bind(Spec<uint8_t>({3, dim_room, dim_room})));
+  }
+  template <typename Config>
+  static decltype(auto) ActionSpec(const Config& conf) {
+    return MakeDict("action"_.Bind(Spec<int>({-1}, {0, 8})));
+  }
+};
+
+// this line will concat common config and common state/action spec
+using SokobanEnvSpec = EnvSpec<SokobanEnvFns>;
+
+class SokobanEnv : public Env<SokobanEnvSpec> {
+  public:
+        SokobanEnv(const Spec& spec, int env_id) : Env<SokobanEnvSpec>(spec, env_id), max_episode_steps{spec.config["max_episode_steps"_]},
+            dim_room{static_cast<int>(spec.config["dim_room"_])},
+            reward_finished{static_cast<float>(spec.config["reward_finished"_])},
+            reward_box{static_cast<float>(spec.config["reward_box"_])},
+            reward_step{static_cast<float>(spec.config["reward_step"_])},
+            levels_dir{static_cast<std::string>(spec.config["levels_dir"_])}
+        {}
+
+    bool IsDone () override { return done_; }
+    void Reset() override {
+
+    }
+    void Step(const Action &action) override {
+        static std::vector<uint8_t> zero_state(3*dim_room*dim_room);
+
+        State state = Allocate();
+        state["obs"_].Assign(zero_state.data(), zero_state.size());
+        state["reward"_] = reward_step;
+
+    }
+
+  private:
+    bool done_{true};
+    int max_episode_steps, dim_room;
+    float reward_finished, reward_box, reward_step;
+    std::string levels_dir;
+};
+
+using SokobanEnvPool = AsyncEnvPool<SokobanEnv>;
+}
+
+#endif // ENVPOOL_SOKOBAN_H_
diff --git a/envpool/sokoban/sokoban_py_envpool_test.py b/envpool/sokoban/sokoban_py_envpool_test.py
new file mode 100644
index 00000000..975ee3ce
--- /dev/null
+++ b/envpool/sokoban/sokoban_py_envpool_test.py
@@ -0,0 +1,85 @@
+"""Unit test for dummy envpool and speed benchmark."""
+
+import os
+import time
+
+import numpy as np
+from absl import logging
+from absl.testing import absltest
+from envpool.sokoban.sokoban_envpool import _SokobanEnvPool, _SokobanEnvSpec
+
+
+class _SokobanEnvPoolTest(absltest.TestCase):
+    def test_config(self) -> None:
+        ref_config_keys = [
+            "num_envs",
+            "batch_size",
+            "num_threads",
+            "max_num_players",
+            "thread_affinity_offset",
+            "base_path",
+            "seed",
+            "gym_reset_return_info",
+            "state_num",
+            "action_num",
+            "max_episode_steps",
+        ]
+        default_conf = _SokobanEnvSpec._default_config_values
+        self.assertTrue(isinstance(default_conf, tuple))
+        config_keys = _SokobanEnvSpec._config_keys
+        self.assertTrue(isinstance(config_keys, list))
+        self.assertEqual(len(default_conf), len(config_keys))
+        self.assertEqual(sorted(config_keys), sorted(ref_config_keys))
+
+    def test_spec(self) -> None:
+        conf = _SokobanEnvSpec._default_config_values
+        env_spec = _SokobanEnvSpec(conf)
+        state_spec = env_spec._state_spec
+        action_spec = env_spec._action_spec
+        state_keys = env_spec._state_keys
+        action_keys = env_spec._action_keys
+        self.assertTrue(isinstance(state_spec, tuple))
+        self.assertTrue(isinstance(action_spec, tuple))
+        state_spec = dict(zip(state_keys, state_spec))
+        action_spec = dict(zip(action_keys, action_spec))
+        # default value of state_num is 10
+        self.assertEqual(state_spec["obs:raw"][1][-1], 10)
+        self.assertEqual(state_spec["obs:dyn"][1][1][-1], 10)
+        # change conf and see if it can successfully change state_spec
+        # directly send dict or expose config as dict?
+        conf = dict(zip(_SokobanEnvSpec._config_keys, conf))
+        conf["state_num"] = 666
+        env_spec = _SokobanEnvSpec(tuple(conf.values()))
+        state_spec = dict(zip(state_keys, env_spec._state_spec))
+        self.assertEqual(state_spec["obs:raw"][1][-1], 666)
+
+    def test_envpool(self) -> None:
+        conf = dict(
+            zip(_SokobanEnvSpec._config_keys, _SokobanEnvSpec._default_config_values)
+        )
+        conf["num_envs"] = num_envs = 100
+        conf["batch_size"] = batch = 31
+        conf["num_threads"] = 10
+        env_spec = _SokobanEnvSpec(tuple(conf.values()))
+        env = _SokobanEnvPool(env_spec)
+        state_keys = env._state_keys
+        total = 1000
+        env._reset(np.arange(num_envs, dtype=np.int32))
+        t = time.time()
+        for _ in range(total):
+            state = dict(zip(state_keys, env._recv()))
+            action = {
+                "env_id": state["info:env_id"],
+                "players.env_id": state["info:players.env_id"],
+                "list_action": np.zeros((batch, 6), dtype=np.float64),
+                "players.id": state["info:players.id"],
+                "players.action": state["info:players.id"],
+            }
+            env._send(tuple(action.values()))
+        duration = time.time() - t
+        fps = total * batch / duration
+        logging.info(f"FPS = {fps:.6f}")
+
+
+if __name__ == "__main__":
+    absltest.main()

From 09eff519f5830fafb5a2291d07b378b87192b421 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= <adria@far.ai>
Date: Fri, 5 Jan 2024 14:33:58 -0800
Subject: [PATCH 03/60] Fixed some of the tests

---
 envpool/sokoban/sokoban_py_envpool_test.py | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/envpool/sokoban/sokoban_py_envpool_test.py b/envpool/sokoban/sokoban_py_envpool_test.py
index 975ee3ce..bcaecb85 100644
--- a/envpool/sokoban/sokoban_py_envpool_test.py
+++ b/envpool/sokoban/sokoban_py_envpool_test.py
@@ -13,15 +13,18 @@ class _SokobanEnvPoolTest(absltest.TestCase):
     def test_config(self) -> None:
         ref_config_keys = [
             "num_envs",
+            "base_path",
             "batch_size",
+            "levels_dir",
+            "dim_room",
             "num_threads",
             "max_num_players",
             "thread_affinity_offset",
-            "base_path",
             "seed",
             "gym_reset_return_info",
-            "state_num",
-            "action_num",
+            "reward_box",
+            "reward_step",
+            "reward_finished",
             "max_episode_steps",
         ]
         default_conf = _SokobanEnvSpec._default_config_values
@@ -54,6 +57,7 @@ def test_spec(self) -> None:
         self.assertEqual(state_spec["obs:raw"][1][-1], 666)
 
     def test_envpool(self) -> None:
+        return
         conf = dict(
             zip(_SokobanEnvSpec._config_keys, _SokobanEnvSpec._default_config_values)
         )
@@ -80,6 +84,17 @@ def test_envpool(self) -> None:
         fps = total * batch / duration
         logging.info(f"FPS = {fps:.6f}")
 
+    def test_xla(self) -> None:
+        conf = dict(
+            zip(_SokobanEnvSpec._config_keys, _SokobanEnvSpec._default_config_values)
+        )
+        conf["num_envs"] = 100
+        conf["batch_size"] = 31
+        conf["num_threads"] = os.cpu_count()
+        env_spec = _SokobanEnvSpec(tuple(conf.values()))
+        env = _SokobanEnvPool(env_spec)
+        _ = env._xla()
+
 
 if __name__ == "__main__":
     absltest.main()

From 58e20b5752edf55213c7e9b9d2d69f6465bf67cc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= <adria@far.ai>
Date: Sat, 6 Jan 2024 21:33:32 -0800
Subject: [PATCH 04/60] Fixed more of the tests

---
 envpool/sokoban/sokoban_envpool.h          |  5 +++
 envpool/sokoban/sokoban_py_envpool_test.py | 51 +++++++---------------
 2 files changed, 21 insertions(+), 35 deletions(-)

diff --git a/envpool/sokoban/sokoban_envpool.h b/envpool/sokoban/sokoban_envpool.h
index 46d33945..5d4d4e17 100644
--- a/envpool/sokoban/sokoban_envpool.h
+++ b/envpool/sokoban/sokoban_envpool.h
@@ -57,6 +57,11 @@ class SokobanEnv : public Env<SokobanEnvSpec> {
 
     bool IsDone () override { return done_; }
     void Reset() override {
+        static std::vector<uint8_t> zero_state(3*dim_room*dim_room);
+
+        State state = Allocate();
+        state["obs"_].Assign(zero_state.data(), zero_state.size());
+        state["reward"_] = reward_step;
 
     }
     void Step(const Action &action) override {
diff --git a/envpool/sokoban/sokoban_py_envpool_test.py b/envpool/sokoban/sokoban_py_envpool_test.py
index bcaecb85..de0d6f70 100644
--- a/envpool/sokoban/sokoban_py_envpool_test.py
+++ b/envpool/sokoban/sokoban_py_envpool_test.py
@@ -12,20 +12,23 @@
 class _SokobanEnvPoolTest(absltest.TestCase):
     def test_config(self) -> None:
         ref_config_keys = [
-            "num_envs",
+            # Default environment keys
             "base_path",
             "batch_size",
-            "levels_dir",
-            "dim_room",
-            "num_threads",
+            "gym_reset_return_info",
             "max_num_players",
-            "thread_affinity_offset",
+            "num_envs",
+            "num_threads",
             "seed",
-            "gym_reset_return_info",
+            "thread_affinity_offset",
+            # Default and also used by sokoban
+            "max_episode_steps",
+            # defined by sokoban
+            "dim_room",
+            "levels_dir",
             "reward_box",
-            "reward_step",
             "reward_finished",
-            "max_episode_steps",
+            "reward_step",
         ]
         default_conf = _SokobanEnvSpec._default_config_values
         self.assertTrue(isinstance(default_conf, tuple))
@@ -34,41 +37,19 @@ def test_config(self) -> None:
         self.assertEqual(len(default_conf), len(config_keys))
         self.assertEqual(sorted(config_keys), sorted(ref_config_keys))
 
-    def test_spec(self) -> None:
-        conf = _SokobanEnvSpec._default_config_values
-        env_spec = _SokobanEnvSpec(conf)
-        state_spec = env_spec._state_spec
-        action_spec = env_spec._action_spec
-        state_keys = env_spec._state_keys
-        action_keys = env_spec._action_keys
-        self.assertTrue(isinstance(state_spec, tuple))
-        self.assertTrue(isinstance(action_spec, tuple))
-        state_spec = dict(zip(state_keys, state_spec))
-        action_spec = dict(zip(action_keys, action_spec))
-        # default value of state_num is 10
-        self.assertEqual(state_spec["obs:raw"][1][-1], 10)
-        self.assertEqual(state_spec["obs:dyn"][1][1][-1], 10)
-        # change conf and see if it can successfully change state_spec
-        # directly send dict or expose config as dict?
-        conf = dict(zip(_SokobanEnvSpec._config_keys, conf))
-        conf["state_num"] = 666
-        env_spec = _SokobanEnvSpec(tuple(conf.values()))
-        state_spec = dict(zip(state_keys, env_spec._state_spec))
-        self.assertEqual(state_spec["obs:raw"][1][-1], 666)
-
     def test_envpool(self) -> None:
-        return
         conf = dict(
             zip(_SokobanEnvSpec._config_keys, _SokobanEnvSpec._default_config_values)
         )
-        conf["num_envs"] = num_envs = 100
-        conf["batch_size"] = batch = 31
+        conf["num_envs"] = num_envs = 200
+        conf["batch_size"] = batch = 100
         conf["num_threads"] = 10
         env_spec = _SokobanEnvSpec(tuple(conf.values()))
         env = _SokobanEnvPool(env_spec)
         state_keys = env._state_keys
-        total = 1000
+        total = 1
         env._reset(np.arange(num_envs, dtype=np.int32))
+        raise ValueError("resetted")
         t = time.time()
         for _ in range(total):
             state = dict(zip(state_keys, env._recv()))
@@ -79,7 +60,7 @@ def test_envpool(self) -> None:
                 "players.id": state["info:players.id"],
                 "players.action": state["info:players.id"],
             }
-            env._send(tuple(action.values()))
+            # env._send(tuple(action.values()))
         duration = time.time() - t
         fps = total * batch / duration
         logging.info(f"FPS = {fps:.6f}")

From 57cfac1a2dd717bf0b7b20708ecd8ff5588c42e1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= <adria@far.ai>
Date: Sun, 7 Jan 2024 21:25:26 -0800
Subject: [PATCH 05/60] Don't build all the other environments

---
 envpool/BUILD         | 19 ++-----------------
 envpool/sokoban/BUILD | 13 ++++++++++++-
 2 files changed, 14 insertions(+), 18 deletions(-)

diff --git a/envpool/BUILD b/envpool/BUILD
index 93a7fc6d..b1a7a0dc 100644
--- a/envpool/BUILD
+++ b/envpool/BUILD
@@ -30,14 +30,7 @@ py_library(
     name = "entry",
     srcs = ["entry.py"],
     deps = [
-        "//envpool/atari:atari_registration",
-        "//envpool/box2d:box2d_registration",
-        "//envpool/classic_control:classic_control_registration",
-        "//envpool/mujoco:mujoco_dmc_registration",
-        "//envpool/mujoco:mujoco_gym_registration",
-        "//envpool/procgen:procgen_registration",
-        "//envpool/toy_text:toy_text_registration",
-        "//envpool/vizdoom:vizdoom_registration",
+        "//envpool/sokoban:registration",
     ],
 )
 
@@ -47,15 +40,7 @@ py_library(
     deps = [
         ":entry",
         ":registration",
-        "//envpool/atari",
-        "//envpool/box2d",
-        "//envpool/classic_control",
-        "//envpool/mujoco:mujoco_dmc",
-        "//envpool/mujoco:mujoco_gym",
-        "//envpool/procgen",
-        "//envpool/python",
-        "//envpool/toy_text",
-        "//envpool/vizdoom",
+        "//envpool/sokoban",
     ],
 )
 
diff --git a/envpool/sokoban/BUILD b/envpool/sokoban/BUILD
index f1425d59..a5131945 100644
--- a/envpool/sokoban/BUILD
+++ b/envpool/sokoban/BUILD
@@ -1,6 +1,8 @@
 load("@pip_requirements//:requirements.bzl", "requirement")
 load("@pybind11_bazel//:build_defs.bzl", "pybind_extension")
 
+package(default_visibility = ["//visibility:public"])
+
 py_library(
     name = "sokoban",
     srcs = ["__init__.py"],
@@ -8,6 +10,14 @@ py_library(
     deps = ["//envpool/python:api"],
 )
 
+py_library(
+    name = "registration",
+    srcs = ["registration.py"],
+    deps = [
+        "//envpool:registration",
+    ],
+)
+
 cc_library(
     name = "sokoban_envpool_h",
     hdrs = ["sokoban_envpool.h"],
@@ -29,7 +39,8 @@ cc_library(
 # )
 
 py_test(
-    name = "sokoban_py_envpool_test",
+    name = "test",
+    main = "sokoban_py_envpool_test.py",
     srcs = ["sokoban_py_envpool_test.py"],
     deps = [
         ":sokoban",

From 4db8a6100db667a8ac35b42cedc40f76bd7656c4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= <adria@far.ai>
Date: Tue, 9 Jan 2024 09:12:56 -0800
Subject: [PATCH 06/60] Basic sokoban

---
 docker/dev.dockerfile              |   8 --
 envpool/sokoban/BUILD              |   3 +-
 envpool/sokoban/level_loader.cc    | 116 +++++++++++++++++++++++++++++
 envpool/sokoban/level_loader.h     |  31 ++++++++
 envpool/sokoban/sokoban_envpool.cc |  41 ++++++++++
 envpool/sokoban/sokoban_envpool.h  |  33 ++++----
 6 files changed, 206 insertions(+), 26 deletions(-)
 create mode 100644 envpool/sokoban/level_loader.cc
 create mode 100644 envpool/sokoban/level_loader.h

diff --git a/docker/dev.dockerfile b/docker/dev.dockerfile
index 3fd936ee..79507580 100644
--- a/docker/dev.dockerfile
+++ b/docker/dev.dockerfile
@@ -26,13 +26,5 @@ RUN go install github.com/bazelbuild/bazelisk@latest && ln -sf $HOME/go/bin/baze
 RUN go install github.com/bazelbuild/buildtools/buildifier@latest
 RUN $HOME/go/bin/bazel version
 
-RUN useradd -ms /bin/zsh github-action
-
-RUN apt-get update \
-    && apt-get install -y clang-format clang-tidy swig qtdeclarative5-dev \
-    && rm -rf /var/lib/apt/lists/*
-
 WORKDIR /app
 COPY . .
-
-RUN make bazel-build
diff --git a/envpool/sokoban/BUILD b/envpool/sokoban/BUILD
index a5131945..f4719c8f 100644
--- a/envpool/sokoban/BUILD
+++ b/envpool/sokoban/BUILD
@@ -20,7 +20,7 @@ py_library(
 
 cc_library(
     name = "sokoban_envpool_h",
-    hdrs = ["sokoban_envpool.h"],
+    hdrs = ["sokoban_envpool.h", "level_loader.h"],
     deps = [
         "//envpool/core:async_envpool",
         "//envpool/core:env",
@@ -53,6 +53,7 @@ pybind_extension(
     name = "sokoban_envpool",
     srcs = [
         "sokoban_envpool.cc",
+        "level_loader.cc",
     ],
     linkopts = [
         "-ldl",
diff --git a/envpool/sokoban/level_loader.cc b/envpool/sokoban/level_loader.cc
new file mode 100644
index 00000000..af77ff36
--- /dev/null
+++ b/envpool/sokoban/level_loader.cc
@@ -0,0 +1,116 @@
+#include "level_loader.h"
+
+#include <filesystem>
+#include <fstream>
+#include <sstream>
+#include <stdexcept>
+#include <algorithm>
+
+
+namespace sokoban {
+
+size_t ERROR_SZ = 1024;
+
+LevelLoader::LevelLoader(const std::filesystem::path& base_path)
+    : levels(0), cur_level(levels.begin()), level_file_paths(0) {
+  for (const auto& entry : std::filesystem::directory_iterator(base_path)) {
+    level_file_paths.push_back(entry.path());
+  }
+}
+
+void AddLine(SokobanLevel& level, const std::string& line) {
+  if ((line.at(0) != '#') || (*line.rend() != '#')) {
+    std::stringstream msg;
+    msg << "Line '" << line
+        << "' does not start and begin with '#', as it should." << std::endl;
+    throw std::runtime_error(msg.str());
+  }
+  for (const char& r : line) {
+    switch (r) {
+      case '#':
+        level.push_back(WALL);
+        break;
+      case '@':
+        level.push_back(PLAYER);
+        break;
+      case '$':
+        level.push_back(BOX);
+        break;
+      case '.':
+        level.push_back(TARGET);
+        break;
+      case ' ':
+        level.push_back(SPACE);
+        break;
+      default:
+        std::stringstream msg;
+        msg << "Line '" << line << "'has character '" << r
+            << "' which is not in the valid set '#@$. '." << std::endl;
+        throw std::runtime_error(msg.str());
+        break;
+    }
+  }
+}
+
+void LevelLoader::LoadNewFile(std::mt19937& gen) {
+  std::uniform_int_distribution<size_t> load_file_idx_r(
+      0, level_file_paths.size() - 1);
+  size_t load_file_idx = load_file_idx_r(gen);
+  std::ifstream file(level_file_paths.at(load_file_idx));
+
+  levels.clear();
+  std::string line;
+  while (std::getline(file, line)) {
+    if (line.at(0) == '#') {
+      SokobanLevel& cur_level = levels.emplace_back(0);
+      cur_level.reserve(15 * 15);
+
+      // Count contiguous '#' characters and use this as the box dimension
+      size_t dim_room = 0;
+      for (const char& r : line) {
+        if (r == '#') {
+          dim_room++;
+        }
+      }
+      AddLine(cur_level, line);
+
+      while (std::getline(file, line) && line.at(0) == '#') {
+        if (line.length() != dim_room) {
+          std::stringstream msg;
+          msg << "Irregular line '" << line
+              << "' does not match dim_room=" << dim_room << std::endl;
+          throw std::runtime_error(msg.str());
+        }
+        AddLine(cur_level, line);
+      }
+
+      if (cur_level.size() != dim_room * dim_room) {
+        std::stringstream msg;
+        msg << "Room is not square: " << cur_level.size() << " != " << dim_room
+            << "x" << dim_room << std::endl;
+        throw std::runtime_error(msg.str());
+      }
+    }
+  }
+  std::shuffle(levels.begin(), levels.end(), gen);
+  if(levels.empty()) {
+      std::stringstream msg;
+      msg << "No levels loaded from file '" << level_file_paths.at(load_file_idx) << std::endl;
+      throw std::runtime_error(msg.str());
+  }
+}
+
+const std::vector<SokobanLevel>::iterator LevelLoader::RandomLevel(std::mt19937& gen) {
+  if (cur_level == levels.end()) {
+    LoadNewFile(gen);
+    cur_level = levels.begin();
+    if(cur_level == levels.end()) {
+        throw std::runtime_error("No levels loaded.");
+    }
+  }
+  auto out = cur_level;
+  cur_level++;
+  return out;
+}
+
+}  // namespace sokoban
diff --git a/envpool/sokoban/level_loader.h b/envpool/sokoban/level_loader.h
new file mode 100644
index 00000000..8bdc8a0a
--- /dev/null
+++ b/envpool/sokoban/level_loader.h
@@ -0,0 +1,31 @@
+#ifndef LEVEL_LOADER_H_
+#define LEVEL_LOADER_H_
+
+#include <filesystem>
+#include <random>
+#include <vector>
+
+namespace sokoban {
+
+using SokobanLevel = std::vector<uint8_t>;
+
+constexpr uint8_t WALL = 0;
+constexpr uint8_t BOX = 4;
+constexpr uint8_t PLAYER = 5;
+constexpr uint8_t TARGET = 2;
+constexpr uint8_t SPACE = 1;
+
+class LevelLoader {
+ protected:
+  std::vector<SokobanLevel> levels;
+  std::vector<SokobanLevel>::iterator cur_level;
+  std::vector<std::filesystem::path> level_file_paths;
+  void LoadNewFile(std::mt19937& gen);
+
+ public:
+  const std::vector<SokobanLevel>::iterator RandomLevel(std::mt19937& gen);
+  LevelLoader(const std::filesystem::path& base_path);
+};
+}  // namespace sokoban
+
+#endif  // LEVEL_LOADER_H_
diff --git a/envpool/sokoban/sokoban_envpool.cc b/envpool/sokoban/sokoban_envpool.cc
index 07b433ee..43b71b06 100644
--- a/envpool/sokoban/sokoban_envpool.cc
+++ b/envpool/sokoban/sokoban_envpool.cc
@@ -1,6 +1,47 @@
 #include "envpool/sokoban/sokoban_envpool.h"
+
+#include <sstream>
+#include <stdexcept>
+
 #include "envpool/core/py_envpool.h"
 
+namespace sokoban {
+
+void SokobanEnv::Reset() {
+  //
+  internal_state_ = *level_loader.RandomLevel(gen_);
+  State state = Allocate();
+  _reward = 0.0f;
+
+  WriteState();
+}
+void SokobanEnv::Step(const Action& action) {
+  _reward = reward_step;
+  // todo actual state transition
+
+  WriteState();
+}
+
+void SokobanEnv::WriteState() {
+  State state = Allocate();
+  state["reward"_] = _reward;
+  Array& obs = state["obs"_];
+  if (obs.size != 3 * internal_state_.size()) {
+    std::stringstream msg;
+    msg << "Obs size and level size are different: obs_size=" << obs.size
+        << "/3, level_size=" << internal_state_.size()
+        << ", dim_room=" << dim_room << std::endl;
+    throw std::runtime_error(msg.str());
+  }
+
+  // TODO: actually color the image
+  for (int i = 0; i < 3; i++) {
+    obs(i).Assign(internal_state_.data(), internal_state_.size());
+  }
+}
+
+}  // namespace sokoban
+
 // generate python-side (raw) SokobanEnvSpec
 using SokobanEnvSpec = PyEnvSpec<sokoban::SokobanEnvSpec>;
 // generate python-side (raw) SokobanEnvPool
diff --git a/envpool/sokoban/sokoban_envpool.h b/envpool/sokoban/sokoban_envpool.h
index 5d4d4e17..4082f73b 100644
--- a/envpool/sokoban/sokoban_envpool.h
+++ b/envpool/sokoban/sokoban_envpool.h
@@ -1,8 +1,13 @@
 #ifndef ENVPOOL_SOKOBAN_H_
 #define ENVPOOL_SOKOBAN_H_
 
+#include <filesystem>
+
 #include "envpool/core/async_envpool.h"
 #include "envpool/core/env.h"
+#include "envpool/core/array.h"
+
+#include "level_loader.h"
 
 namespace sokoban {
 
@@ -52,32 +57,26 @@ class SokobanEnv : public Env<SokobanEnvSpec> {
             reward_finished{static_cast<float>(spec.config["reward_finished"_])},
             reward_box{static_cast<float>(spec.config["reward_box"_])},
             reward_step{static_cast<float>(spec.config["reward_step"_])},
-            levels_dir{static_cast<std::string>(spec.config["levels_dir"_])}
+            levels_dir{static_cast<std::string>(spec.config["levels_dir"_])},
+            level_loader(levels_dir),
+            internal_state_(WALL, static_cast<std::size_t>(dim_room*dim_room))
         {}
 
     bool IsDone () override { return done_; }
-    void Reset() override {
-        static std::vector<uint8_t> zero_state(3*dim_room*dim_room);
-
-        State state = Allocate();
-        state["obs"_].Assign(zero_state.data(), zero_state.size());
-        state["reward"_] = reward_step;
+    void Reset() override;
+    void Step(const Action &action) override;
 
-    }
-    void Step(const Action &action) override {
-        static std::vector<uint8_t> zero_state(3*dim_room*dim_room);
-
-        State state = Allocate();
-        state["obs"_].Assign(zero_state.data(), zero_state.size());
-        state["reward"_] = reward_step;
-
-    }
+    void WriteState();
 
   private:
     bool done_{true};
     int max_episode_steps, dim_room;
     float reward_finished, reward_box, reward_step;
-    std::string levels_dir;
+    std::filesystem::path levels_dir;
+
+    LevelLoader level_loader;
+    SokobanLevel internal_state_;
+    float _reward;
 };
 
 using SokobanEnvPool = AsyncEnvPool<SokobanEnv>;

From a9db23f852a782bb4c21e4df1efd2b1c7c307e09 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= <adria@far.ai>
Date: Tue, 9 Jan 2024 16:02:20 -0800
Subject: [PATCH 07/60] simpler dev docker with updated CUDA

---
 docker/dev.dockerfile | 25 +++++++++----------------
 1 file changed, 9 insertions(+), 16 deletions(-)

diff --git a/docker/dev.dockerfile b/docker/dev.dockerfile
index 79507580..9f5d33d3 100644
--- a/docker/dev.dockerfile
+++ b/docker/dev.dockerfile
@@ -1,29 +1,22 @@
-# Need docker >= 20.10.9, see https://stackoverflow.com/questions/71941032/why-i-cannot-run-apt-update-inside-a-fresh-ubuntu22-04
+FROM nvidia/cuda:12.1.1-cudnn8-devel-ubuntu22.04
 
-FROM nvidia/cuda:12.1.0-cudnn8-devel-ubuntu22.04
-
-ARG DEBIAN_FRONTEND=noninteractive
 ARG HOME=/root
-ARG PATH=$PATH:$HOME/go/bin
+
+ENV DEBIAN_FRONTEND=noninteractive
+ENV PATH=$PATH:$HOME/go/bin
 
 RUN apt-get update \
-    && apt-get install -y python3-pip python3-dev golang-1.18 git wget curl zsh tmux vim ssh \
+    && apt-get install -y python3-pip python3-dev golang-1.18 git wget curl tmux vim ssh \
+    && apt-get clean \
     && rm -rf /var/lib/apt/lists/*
 RUN ln -s /usr/bin/python3 /usr/bin/python
 RUN ln -sf /usr/lib/go-1.18/bin/go /usr/bin/go
-RUN sh -c "$(curl -fsSL https://raw.githubusercontent.com/ohmyzsh/ohmyzsh/master/tools/install.sh)"
-WORKDIR $HOME
-RUN git clone https://github.com/gpakosz/.tmux.git
-RUN ln -s -f .tmux/.tmux.conf
-RUN cp .tmux/.tmux.conf.local .
-RUN echo "set-option -g default-shell /bin/zsh" >> .tmux.conf.local
-RUN echo "set-option -g history-limit 10000" >> .tmux.conf.local
-RUN echo "export PATH=$PATH:$HOME/go/bin" >> .zshrc
-
-ENV USE_BAZEL_VERSION=6.4.0
 
+# Install Bazel
 RUN go install github.com/bazelbuild/bazelisk@latest && ln -sf $HOME/go/bin/bazelisk $HOME/go/bin/bazel
 RUN go install github.com/bazelbuild/buildtools/buildifier@latest
+
+ARG USE_BAZEL_VERSION=6.4.0
 RUN $HOME/go/bin/bazel version
 
 WORKDIR /app

From 7eb159d49699700d7c4d50295d68d6674d2e5139 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= <adria@far.ai>
Date: Wed, 10 Jan 2024 15:16:00 -0800
Subject: [PATCH 08/60] Implemented env

---
 envpool/sokoban/level_loader.cc    |   2 +-
 envpool/sokoban/level_loader.h     |   6 +-
 envpool/sokoban/sokoban_envpool.cc | 133 +++++++++++++++++++++++++----
 envpool/sokoban/sokoban_envpool.h  | 107 ++++++++++++-----------
 4 files changed, 179 insertions(+), 69 deletions(-)

diff --git a/envpool/sokoban/level_loader.cc b/envpool/sokoban/level_loader.cc
index af77ff36..29f8aff9 100644
--- a/envpool/sokoban/level_loader.cc
+++ b/envpool/sokoban/level_loader.cc
@@ -40,7 +40,7 @@ void AddLine(SokobanLevel& level, const std::string& line) {
         level.push_back(TARGET);
         break;
       case ' ':
-        level.push_back(SPACE);
+        level.push_back(EMPTY);
         break;
       default:
         std::stringstream msg;
diff --git a/envpool/sokoban/level_loader.h b/envpool/sokoban/level_loader.h
index 8bdc8a0a..ebe37f78 100644
--- a/envpool/sokoban/level_loader.h
+++ b/envpool/sokoban/level_loader.h
@@ -10,10 +10,12 @@ namespace sokoban {
 using SokobanLevel = std::vector<uint8_t>;
 
 constexpr uint8_t WALL = 0;
+constexpr uint8_t EMPTY = 1;
+constexpr uint8_t TARGET = 2;
+constexpr uint8_t BOX_ON_TARGET = 3;
 constexpr uint8_t BOX = 4;
 constexpr uint8_t PLAYER = 5;
-constexpr uint8_t TARGET = 2;
-constexpr uint8_t SPACE = 1;
+constexpr uint8_t PLAYER_ON_TARGET = 6;
 
 class LevelLoader {
  protected:
diff --git a/envpool/sokoban/sokoban_envpool.cc b/envpool/sokoban/sokoban_envpool.cc
index 43b71b06..5e58d269 100644
--- a/envpool/sokoban/sokoban_envpool.cc
+++ b/envpool/sokoban/sokoban_envpool.cc
@@ -1,5 +1,6 @@
 #include "envpool/sokoban/sokoban_envpool.h"
 
+#include <array>
 #include <sstream>
 #include <stdexcept>
 
@@ -8,36 +9,136 @@
 namespace sokoban {
 
 void SokobanEnv::Reset() {
-  //
-  internal_state_ = *level_loader.RandomLevel(gen_);
-  State state = Allocate();
-  _reward = 0.0f;
+  world = *level_loader.RandomLevel(gen_);
+  if (world.size() != dim_room * dim_room) {
+    std::stringstream msg;
+    msg << "Loaded level is not dim_room x dim_room. world.size()="
+        << world.size() << ", dim_room=" << dim_room << std::endl;
+    throw std::runtime_error(msg.str());
+  }
+  unmatched_boxes = 0;
+  for (int x = 0; x < dim_room; x++) {
+    for (int y = 0; y < dim_room; y++) {
+      switch (WorldAt(x, y)) {
+        case PLAYER:
+          player_x = x;
+          player_y = y;
+          break;
+        case BOX:
+          unmatched_boxes++;
+          break;
+      }
 
-  WriteState();
+      WriteState(0.0f);
+    }
+  }
 }
+
+constexpr std ::array<std::array<int, 2>, 4> CHANGE_COORDINATES = {
+    {-1, 0}, {1, 0}, {0, -1}, {0, 1}};
+
 void SokobanEnv::Step(const Action& action) {
-  _reward = reward_step;
-  // todo actual state transition
+  if (action == ACT_NOOP) {
+    WriteState(reward_step);
+    return;
+  }
+  // From here on, assume the agent will try to move
+
+  const int change_coordinates_idx = (action - 1) % CHANGE_COORDINATES.size();
+  const int delta_x = CHANGE_COORDINATES.at(change_coordinates_idx).at(0);
+  const int delta_y = CHANGE_COORDINATES.at(change_coordinates_idx).at(1);
+
+  const int prev_unmatched_boxes = unmatched_boxes;
 
-  WriteState();
+  // Arena: the things that will change if the agent moves
+  std::array<uint8_t, 3> arena;
+  for (size_t i = 0; i < arena.size(); i++) {
+    arena.at(i) = WorldAt(player_x + delta_x * i, player_y + delta_y * i);
+  }
+
+  // The box will move IFF action is a pushing action AND there's a box AND it
+  // has space to move
+  const bool box_moves =
+      ((action <= ACT_PUSH_RIGHT) &&
+       ((arena.at(1) == BOX) || (arena.at(1) == BOX_ON_TARGET)) &&
+       ((arena.at(1) == EMPTY) || (arena.at(2) == TARGET)));
+
+  // The agent will move if the next arena location is possible to move into, or
+  // if it's a box and the box moves
+  const bool is_a_box_and_the_box_moves = box_moves;
+  const bool agent_moves = (arena.at(1) == EMPTY) || (arena.at(1) == TARGET) ||
+                           is_a_box_and_the_box_moves;
+
+  if (agent_moves) {
+    // `is_target` is boolean but we'll need it as an int later
+    std::array<int, arena.size()> is_target;
+    for (size_t i = 0; i < arena.size(); i++) {
+      uint8_t tile = arena.at(i);
+      is_target.at(i) =
+          (tile == BOX_ON_TARGET || tile == TARGET || tile == PLAYER_ON_TARGET);
+    }
+    // only whatever was on the floor is now at position 0
+    arena.at(0) = is_target.at(0) ? TARGET : EMPTY;
+    // the player now occupies position 1
+    arena.at(1) = is_target.at(1) ? PLAYER_ON_TARGET : PLAYER;
+
+    if (box_moves) {
+      // the box moves for sure. A target at 2 reduces the nubmer of unmatched
+      // boxes (because the box goes there), a target at 1 increases it (the box
+      // leaves from there). Both can be equal to 1 and in that case the number
+      // stays the same.
+      unmatched_boxes += is_target.at(1) - is_target.at(2);
+
+      // A box now occupies position 2
+      arena.at(2) = is_target.at(2) ? BOX_ON_TARGET : BOX;
+    }
+
+    player_x += delta_x;
+    player_y += delta_y;
+    for (size_t i = 0; i < arena.size(); i++) {
+      WorldAssignAt(player_x + delta_x * i, player_y + delta_y * i,
+                    arena.at(i));
+    }
+  }
+
+  const float reward =
+      reward_step +
+      reward_box * static_cast<float>(prev_unmatched_boxes - unmatched_boxes) +
+      (IsDone() ? reward_finished : 0.0f);
+  WriteState(reward);
 }
 
-void SokobanEnv::WriteState() {
+constexpr std::array<std::array<uint8_t, 3>, PLAYER_ON_TARGET + 1> TINY_COLORS =
+    {
+        {0, 0, 0},        // WALL
+        {243, 248, 238},  // EMPTY
+        {254, 126, 125},  // TARGET
+        {254, 95, 56},    // BOX_ON_TARGET
+        {142, 121, 56},   // BOX
+        {160, 212, 56},   // PLAYER
+        {219, 212, 56}    // PLAYER_ON_TARGET
+};
+
+void SokobanEnv::WriteState(float reward) {
   State state = Allocate();
-  state["reward"_] = _reward;
+  state["reward"_] = reward;
   Array& obs = state["obs"_];
-  if (obs.size != 3 * internal_state_.size()) {
+  if (obs.size != 3 * world.size()) {
     std::stringstream msg;
     msg << "Obs size and level size are different: obs_size=" << obs.size
-        << "/3, level_size=" << internal_state_.size()
-        << ", dim_room=" << dim_room << std::endl;
+        << "/3, level_size=" << world.size() << ", dim_room=" << dim_room
+        << std::endl;
     throw std::runtime_error(msg.str());
   }
 
-  // TODO: actually color the image
-  for (int i = 0; i < 3; i++) {
-    obs(i).Assign(internal_state_.data(), internal_state_.size());
+  std::array<uint8_t, 3 * world.size()> out;
+  for (int rgb = 0; rgb < 3; rgb++) {
+    for (size_t i = 0; i < world.size(); i++) {
+      out.at(rgb * (dim_room * dim_room) + i) =
+          TINY_COLORS.at(world.at(i)).at(rgb);
+    }
   }
+  obs.Assign(out.data(), out.size());
 }
 
 }  // namespace sokoban
diff --git a/envpool/sokoban/sokoban_envpool.h b/envpool/sokoban/sokoban_envpool.h
index 4082f73b..6b392027 100644
--- a/envpool/sokoban/sokoban_envpool.h
+++ b/envpool/sokoban/sokoban_envpool.h
@@ -3,37 +3,29 @@
 
 #include <filesystem>
 
+#include "envpool/core/array.h"
 #include "envpool/core/async_envpool.h"
 #include "envpool/core/env.h"
-#include "envpool/core/array.h"
-
 #include "level_loader.h"
 
 namespace sokoban {
 
-// class BaseSokobanEnvConfig(EnvConfig):
-//     tinyworld_obs: bool = False
-//     tinyworld_render: bool = False
-//     max_episode_steps: int = 120  # default value from gym_sokoban
-//     terminate_on_first_box: bool = False
-
-//     reward_finished: float = 10.0  # Reward for completing a level
-//     reward_box: float = 1.0  # Reward for putting a box on target
-//     reward_step: float = -0.1  # Reward for completing a step
-//
-// class BoxobanConfig(BaseSokobanEnvConfig):
-
-    // cache_path: Path = Path(__file__).parent.parent / ".sokoban_cache"
-    // split: Literal["train", "valid", "test", None] = "train"
-    // difficulty: Literal["unfiltered", "medium", "hard"] = "unfiltered"
+constexpr int ACT_NOOP = 0;
+constexpr int ACT_PUSH_UP = 1;
+constexpr int ACT_PUSH_DOWN = 2;
+constexpr int ACT_PUSH_LEFT = 3;
+constexpr int ACT_PUSH_RIGHT = 4;
+constexpr int ACT_MOVE_UP = 5;
+constexpr int ACT_MOVE_DOWN = 6;
+constexpr int ACT_MOVE_LEFT = 7;
+constexpr int ACT_MOVE_RIGHT = 8;
+constexpr int MAX_ACTION = ACT_MOVE_RIGHT;
 
 class SokobanEnvFns {
  public:
   static decltype(auto) DefaultConfig() {
-    return MakeDict("reward_finished"_.Bind(10.0f),
-                    "reward_box"_.Bind(1.0f),
-                    "reward_step"_.Bind(-0.1f),
-                    "dim_room"_.Bind(10),
+    return MakeDict("reward_finished"_.Bind(10.0f), "reward_box"_.Bind(1.0f),
+                    "reward_step"_.Bind(-0.1f), "dim_room"_.Bind(10),
                     "levels_dir"_.Bind(std::string("None")));
   }
   template <typename Config>
@@ -43,7 +35,7 @@ class SokobanEnvFns {
   }
   template <typename Config>
   static decltype(auto) ActionSpec(const Config& conf) {
-    return MakeDict("action"_.Bind(Spec<int>({-1}, {0, 8})));
+    return MakeDict("action"_.Bind(Spec<int>({-1}, {0, MAX_ACTION})));
   }
 };
 
@@ -51,35 +43,50 @@ class SokobanEnvFns {
 using SokobanEnvSpec = EnvSpec<SokobanEnvFns>;
 
 class SokobanEnv : public Env<SokobanEnvSpec> {
-  public:
-        SokobanEnv(const Spec& spec, int env_id) : Env<SokobanEnvSpec>(spec, env_id), max_episode_steps{spec.config["max_episode_steps"_]},
-            dim_room{static_cast<int>(spec.config["dim_room"_])},
-            reward_finished{static_cast<float>(spec.config["reward_finished"_])},
-            reward_box{static_cast<float>(spec.config["reward_box"_])},
-            reward_step{static_cast<float>(spec.config["reward_step"_])},
-            levels_dir{static_cast<std::string>(spec.config["levels_dir"_])},
-            level_loader(levels_dir),
-            internal_state_(WALL, static_cast<std::size_t>(dim_room*dim_room))
-        {}
-
-    bool IsDone () override { return done_; }
-    void Reset() override;
-    void Step(const Action &action) override;
-
-    void WriteState();
-
-  private:
-    bool done_{true};
-    int max_episode_steps, dim_room;
-    float reward_finished, reward_box, reward_step;
-    std::filesystem::path levels_dir;
-
-    LevelLoader level_loader;
-    SokobanLevel internal_state_;
-    float _reward;
+ public:
+  SokobanEnv(const Spec& spec, int env_id)
+      : Env<SokobanEnvSpec>(spec, env_id),
+        max_episode_steps{spec.config["max_episode_steps"_]},
+        dim_room{static_cast<int>(spec.config["dim_room"_])},
+        reward_finished{static_cast<float>(spec.config["reward_finished"_])},
+        reward_box{static_cast<float>(spec.config["reward_box"_])},
+        reward_step{static_cast<float>(spec.config["reward_step"_])},
+        levels_dir{static_cast<std::string>(spec.config["levels_dir"_])},
+        level_loader(levels_dir),
+        world(WALL, static_cast<std::size_t>(dim_room * dim_room)) {}
+
+  bool IsDone() override { return unmatched_boxes == 0; }
+  void Reset() override;
+  void Step(const Action& action) override;
+
+  void WriteState(float reward);
+
+ private:
+  int max_episode_steps, dim_room;
+  float reward_finished, reward_box, reward_step;
+  std::filesystem::path levels_dir;
+
+  LevelLoader level_loader;
+  SokobanLevel world;
+
+  int player_x{0}, player_y{0};
+  int unmatched_boxes{0};
+
+  uint8_t WorldAt(int x, int y) {
+    if ((x < 0) || (x > dim_room) || (y < 0) || (y > dim_room)) {
+      return WALL;
+    }
+    return world.at(x + y * dim_room);
+  }
+  void WorldAssignAt(int x, int y, uint8_t value) {
+    if ((x < 0) || (x > dim_room) || (y < 0) || (y > dim_room)) {
+      return;
+    }
+    world.at(x + y * dim_room) = value;
+  }
 };
 
 using SokobanEnvPool = AsyncEnvPool<SokobanEnv>;
-}
+}  // namespace sokoban
 
-#endif // ENVPOOL_SOKOBAN_H_
+#endif  // ENVPOOL_SOKOBAN_H_

From 621de30aa1bb11084af0ecdebeeba05456b1c414 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= <adria@far.ai>
Date: Thu, 11 Jan 2024 15:09:25 -0800
Subject: [PATCH 09/60] Debugged env

---
 envpool/sokoban/BUILD                      |  2 +
 envpool/sokoban/level_loader.cc            | 70 ++++++++++++++++------
 envpool/sokoban/level_loader.h             |  7 ++-
 envpool/sokoban/sokoban_envpool.cc         | 31 +++++++---
 envpool/sokoban/sokoban_envpool.h          | 29 ++++-----
 envpool/sokoban/sokoban_py_envpool_test.py | 62 +++++++++----------
 6 files changed, 127 insertions(+), 74 deletions(-)

diff --git a/envpool/sokoban/BUILD b/envpool/sokoban/BUILD
index f4719c8f..eac4b98b 100644
--- a/envpool/sokoban/BUILD
+++ b/envpool/sokoban/BUILD
@@ -44,6 +44,8 @@ py_test(
     srcs = ["sokoban_py_envpool_test.py"],
     deps = [
         ":sokoban",
+        ":registration",
+        "//envpool:envpool",
         requirement("numpy"),
         requirement("absl-py"),
     ],
diff --git a/envpool/sokoban/level_loader.cc b/envpool/sokoban/level_loader.cc
index 29f8aff9..526fdc02 100644
--- a/envpool/sokoban/level_loader.cc
+++ b/envpool/sokoban/level_loader.cc
@@ -1,28 +1,33 @@
 #include "level_loader.h"
 
+#include <algorithm>
 #include <filesystem>
 #include <fstream>
+#include <iostream>
+#include <iterator>
 #include <sstream>
 #include <stdexcept>
-#include <algorithm>
-
 
 namespace sokoban {
 
 size_t ERROR_SZ = 1024;
 
-LevelLoader::LevelLoader(const std::filesystem::path& base_path)
-    : levels(0), cur_level(levels.begin()), level_file_paths(0) {
+LevelLoader::LevelLoader(const std::filesystem::path& base_path, int verbose)
+    : levels(0), cur_level(levels.begin()), level_file_paths(0), verbose(verbose) {
   for (const auto& entry : std::filesystem::directory_iterator(base_path)) {
     level_file_paths.push_back(entry.path());
   }
 }
 
+const std::string PRINT_LEVEL_KEY = "# .a@$s";
+
 void AddLine(SokobanLevel& level, const std::string& line) {
-  if ((line.at(0) != '#') || (*line.rend() != '#')) {
+  auto start = line.at(0);
+  auto end = line.at(line.size() - 1);
+  if ((start != '#') || (start != '#')) {
     std::stringstream msg;
-    msg << "Line '" << line
-        << "' does not start and begin with '#', as it should." << std::endl;
+    msg << "Line '" << line << "' does not start (" << start << ") and end ("
+        << end << ") with '#', as it should." << std::endl;
     throw std::runtime_error(msg.str());
   }
   for (const char& r : line) {
@@ -52,18 +57,35 @@ void AddLine(SokobanLevel& level, const std::string& line) {
   }
 }
 
+void PrintLevel(std::ostream& os, SokobanLevel vec) {
+  size_t dim_room = 0;
+  for (; dim_room * dim_room != vec.size() && dim_room <= 100; dim_room++)
+    ;  // take sqrt(vec.size())
+  for (size_t i = 0; i < vec.size(); i++) {
+    os << PRINT_LEVEL_KEY.at(vec.at(i));
+    if ((i + 1) % dim_room == 0) {
+      os << std::endl;
+    }
+  }
+}
+
 void LevelLoader::LoadNewFile(std::mt19937& gen) {
   std::uniform_int_distribution<size_t> load_file_idx_r(
       0, level_file_paths.size() - 1);
-  size_t load_file_idx = load_file_idx_r(gen);
-  std::ifstream file(level_file_paths.at(load_file_idx));
+  const size_t load_file_idx = load_file_idx_r(gen);
+  const std::filesystem::path& file_path = level_file_paths.at(load_file_idx);
+  std::ifstream file(file_path);
 
   levels.clear();
   std::string line;
   while (std::getline(file, line)) {
+    if (line.size() == 0) {
+      continue;
+    }
+
     if (line.at(0) == '#') {
       SokobanLevel& cur_level = levels.emplace_back(0);
-      cur_level.reserve(15 * 15);
+      cur_level.reserve(10 * 10);  // In practice most levels are this size
 
       // Count contiguous '#' characters and use this as the box dimension
       size_t dim_room = 0;
@@ -74,7 +96,7 @@ void LevelLoader::LoadNewFile(std::mt19937& gen) {
       }
       AddLine(cur_level, line);
 
-      while (std::getline(file, line) && line.at(0) == '#') {
+      while (std::getline(file, line) && line.size() > 0 && line.at(0) == '#') {
         if (line.length() != dim_room) {
           std::stringstream msg;
           msg << "Irregular line '" << line
@@ -93,19 +115,31 @@ void LevelLoader::LoadNewFile(std::mt19937& gen) {
     }
   }
   std::shuffle(levels.begin(), levels.end(), gen);
-  if(levels.empty()) {
-      std::stringstream msg;
-      msg << "No levels loaded from file '" << level_file_paths.at(load_file_idx) << std::endl;
-      throw std::runtime_error(msg.str());
+  if (levels.empty()) {
+    std::stringstream msg;
+    msg << "No levels loaded from file '" << file_path << std::endl;
+    throw std::runtime_error(msg.str());
+  }
+
+  if(verbose >= 1) {
+    std::cout << "Loaded " << levels.size() << " levels from " << file_path
+              << std::endl;
+    if(verbose >= 2) {
+      PrintLevel(std::cout, levels.at(0));
+      std::cout << std::endl;
+      PrintLevel(std::cout, levels.at(1));
+      std::cout << std::endl;
+    }
   }
 }
 
-const std::vector<SokobanLevel>::iterator LevelLoader::RandomLevel(std::mt19937& gen) {
+const std::vector<SokobanLevel>::iterator LevelLoader::RandomLevel(
+    std::mt19937& gen) {
   if (cur_level == levels.end()) {
     LoadNewFile(gen);
     cur_level = levels.begin();
-    if(cur_level == levels.end()) {
-        throw std::runtime_error("No levels loaded.");
+    if (cur_level == levels.end()) {
+      throw std::runtime_error("No levels loaded.");
     }
   }
   auto out = cur_level;
diff --git a/envpool/sokoban/level_loader.h b/envpool/sokoban/level_loader.h
index ebe37f78..88326c84 100644
--- a/envpool/sokoban/level_loader.h
+++ b/envpool/sokoban/level_loader.h
@@ -25,9 +25,14 @@ class LevelLoader {
   void LoadNewFile(std::mt19937& gen);
 
  public:
+  int verbose;
+
   const std::vector<SokobanLevel>::iterator RandomLevel(std::mt19937& gen);
-  LevelLoader(const std::filesystem::path& base_path);
+  LevelLoader(const std::filesystem::path& base_path, int verbose=0);
 };
+
+
+void PrintLevel(std::ostream& os, SokobanLevel vec);
 }  // namespace sokoban
 
 #endif  // LEVEL_LOADER_H_
diff --git a/envpool/sokoban/sokoban_envpool.cc b/envpool/sokoban/sokoban_envpool.cc
index 5e58d269..b6467466 100644
--- a/envpool/sokoban/sokoban_envpool.cc
+++ b/envpool/sokoban/sokoban_envpool.cc
@@ -28,16 +28,29 @@ void SokobanEnv::Reset() {
           unmatched_boxes++;
           break;
       }
-
-      WriteState(0.0f);
     }
   }
+  WriteState(0.0f);
+}
+
+uint8_t SokobanEnv::WorldAt(int x, int y) {
+  if ((x < 0) || (x >= dim_room) || (y < 0) || (y >= dim_room)) {
+    return WALL;
+  }
+  return world.at(x + y * dim_room);
+}
+void SokobanEnv::WorldAssignAt(int x, int y, uint8_t value) {
+  if ((x < 0) || (x >= dim_room) || (y < 0) || (y >= dim_room)) {
+    return;
+  }
+  world.at(x + y * dim_room) = value;
 }
 
-constexpr std ::array<std::array<int, 2>, 4> CHANGE_COORDINATES = {
-    {-1, 0}, {1, 0}, {0, -1}, {0, 1}};
+constexpr std::array<std::array<int, 2>, 4> CHANGE_COORDINATES = {
+    {{-1, 0}, {1, 0}, {0, -1}, {0, 1}}};
 
-void SokobanEnv::Step(const Action& action) {
+void SokobanEnv::Step(const Action& action_) {
+  const int action = action_["action"_];
   if (action == ACT_NOOP) {
     WriteState(reward_step);
     return;
@@ -109,7 +122,7 @@ void SokobanEnv::Step(const Action& action) {
 }
 
 constexpr std::array<std::array<uint8_t, 3>, PLAYER_ON_TARGET + 1> TINY_COLORS =
-    {
+    {{
         {0, 0, 0},        // WALL
         {243, 248, 238},  // EMPTY
         {254, 126, 125},  // TARGET
@@ -117,10 +130,10 @@ constexpr std::array<std::array<uint8_t, 3>, PLAYER_ON_TARGET + 1> TINY_COLORS =
         {142, 121, 56},   // BOX
         {160, 212, 56},   // PLAYER
         {219, 212, 56}    // PLAYER_ON_TARGET
-};
+    }};
 
 void SokobanEnv::WriteState(float reward) {
-  State state = Allocate();
+  auto state = Allocate();
   state["reward"_] = reward;
   Array& obs = state["obs"_];
   if (obs.size != 3 * world.size()) {
@@ -131,7 +144,7 @@ void SokobanEnv::WriteState(float reward) {
     throw std::runtime_error(msg.str());
   }
 
-  std::array<uint8_t, 3 * world.size()> out;
+  std::vector<uint8_t> out(3 * world.size());
   for (int rgb = 0; rgb < 3; rgb++) {
     for (size_t i = 0; i < world.size(); i++) {
       out.at(rgb * (dim_room * dim_room) + i) =
diff --git a/envpool/sokoban/sokoban_envpool.h b/envpool/sokoban/sokoban_envpool.h
index 6b392027..03728ed8 100644
--- a/envpool/sokoban/sokoban_envpool.h
+++ b/envpool/sokoban/sokoban_envpool.h
@@ -24,9 +24,14 @@ constexpr int MAX_ACTION = ACT_MOVE_RIGHT;
 class SokobanEnvFns {
  public:
   static decltype(auto) DefaultConfig() {
-    return MakeDict("reward_finished"_.Bind(10.0f), "reward_box"_.Bind(1.0f),
-                    "reward_step"_.Bind(-0.1f), "dim_room"_.Bind(10),
-                    "levels_dir"_.Bind(std::string("None")));
+    return MakeDict(
+      "reward_finished"_.Bind(10.0f),
+      "reward_box"_.Bind(1.0f),
+      "reward_step"_.Bind(-0.1f),
+      "dim_room"_.Bind(10),
+      "levels_dir"_.Bind(std::string("")),
+      "verbose"_.Bind(0)
+    );
   }
   template <typename Config>
   static decltype(auto) StateSpec(const Config& conf) {
@@ -53,7 +58,8 @@ class SokobanEnv : public Env<SokobanEnvSpec> {
         reward_step{static_cast<float>(spec.config["reward_step"_])},
         levels_dir{static_cast<std::string>(spec.config["levels_dir"_])},
         level_loader(levels_dir),
-        world(WALL, static_cast<std::size_t>(dim_room * dim_room)) {}
+        world(WALL, static_cast<std::size_t>(dim_room * dim_room)),
+        verbose(static_cast<int>(spec.config["verbose"_])) {}
 
   bool IsDone() override { return unmatched_boxes == 0; }
   void Reset() override;
@@ -68,22 +74,13 @@ class SokobanEnv : public Env<SokobanEnvSpec> {
 
   LevelLoader level_loader;
   SokobanLevel world;
+  int verbose;
 
   int player_x{0}, player_y{0};
   int unmatched_boxes{0};
 
-  uint8_t WorldAt(int x, int y) {
-    if ((x < 0) || (x > dim_room) || (y < 0) || (y > dim_room)) {
-      return WALL;
-    }
-    return world.at(x + y * dim_room);
-  }
-  void WorldAssignAt(int x, int y, uint8_t value) {
-    if ((x < 0) || (x > dim_room) || (y < 0) || (y > dim_room)) {
-      return;
-    }
-    world.at(x + y * dim_room) = value;
-  }
+  uint8_t WorldAt(int x, int y);
+  void WorldAssignAt(int x, int y, uint8_t value);
 };
 
 using SokobanEnvPool = AsyncEnvPool<SokobanEnv>;
diff --git a/envpool/sokoban/sokoban_py_envpool_test.py b/envpool/sokoban/sokoban_py_envpool_test.py
index de0d6f70..d4723b38 100644
--- a/envpool/sokoban/sokoban_py_envpool_test.py
+++ b/envpool/sokoban/sokoban_py_envpool_test.py
@@ -3,6 +3,8 @@
 import os
 import time
 
+import envpool  # noqa: F401
+import envpool.sokoban.registration
 import numpy as np
 from absl import logging
 from absl.testing import absltest
@@ -29,6 +31,7 @@ def test_config(self) -> None:
             "reward_box",
             "reward_finished",
             "reward_step",
+            "verbose",
         ]
         default_conf = _SokobanEnvSpec._default_config_values
         self.assertTrue(isinstance(default_conf, tuple))
@@ -38,43 +41,42 @@ def test_config(self) -> None:
         self.assertEqual(sorted(config_keys), sorted(ref_config_keys))
 
     def test_envpool(self) -> None:
-        conf = dict(
-            zip(_SokobanEnvSpec._config_keys, _SokobanEnvSpec._default_config_values)
+        batch = num_envs = 200
+        env = envpool.make(
+            "Sokoban-v0",
+            env_type="gymnasium",
+            num_envs=num_envs,
+            batch_size=num_envs,
+            seed=2346890,
+            max_episode_steps=60,
+            reward_step=-0.1,
+            dim_room=10,
+            levels_dir="/aa/boxoban-levels-master/unfiltered/train",
         )
-        conf["num_envs"] = num_envs = 200
-        conf["batch_size"] = batch = 100
-        conf["num_threads"] = 10
-        env_spec = _SokobanEnvSpec(tuple(conf.values()))
-        env = _SokobanEnvPool(env_spec)
-        state_keys = env._state_keys
-        total = 1
-        env._reset(np.arange(num_envs, dtype=np.int32))
-        raise ValueError("resetted")
+        total_steps = 1000
+
+        _ = env.reset()
         t = time.time()
-        for _ in range(total):
-            state = dict(zip(state_keys, env._recv()))
-            action = {
-                "env_id": state["info:env_id"],
-                "players.env_id": state["info:players.env_id"],
-                "list_action": np.zeros((batch, 6), dtype=np.float64),
-                "players.id": state["info:players.id"],
-                "players.action": state["info:players.id"],
-            }
-            # env._send(tuple(action.values()))
+        for _ in range(total_steps):
+            _ = env.step(np.random.randint(low=0, high=9, size=(num_envs,)))
         duration = time.time() - t
-        fps = total * batch / duration
+        fps = total_steps * batch / duration
         logging.info(f"FPS = {fps:.6f}")
 
     def test_xla(self) -> None:
-        conf = dict(
-            zip(_SokobanEnvSpec._config_keys, _SokobanEnvSpec._default_config_values)
+        num_envs = 10
+        env = envpool.make(
+            "Sokoban-v0",
+            env_type="dm",
+            num_envs=num_envs,
+            batch_size=num_envs,
+            seed=2346890,
+            max_episode_steps=60,
+            reward_step=-0.1,
+            dim_room=10,
+            levels_dir="/aa/boxoban-levels-master/unfiltered/train",
         )
-        conf["num_envs"] = 100
-        conf["batch_size"] = 31
-        conf["num_threads"] = os.cpu_count()
-        env_spec = _SokobanEnvSpec(tuple(conf.values()))
-        env = _SokobanEnvPool(env_spec)
-        _ = env._xla()
+        handle, recv, send, step = env.xla()
 
 
 if __name__ == "__main__":

From eccacf689074238b2967900f9dcd82c9261c89c6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= <adria@far.ai>
Date: Thu, 11 Jan 2024 17:30:40 -0800
Subject: [PATCH 10/60] Format

---
 envpool/sokoban/sokoban_py_envpool_test.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/envpool/sokoban/sokoban_py_envpool_test.py b/envpool/sokoban/sokoban_py_envpool_test.py
index d4723b38..9eede62d 100644
--- a/envpool/sokoban/sokoban_py_envpool_test.py
+++ b/envpool/sokoban/sokoban_py_envpool_test.py
@@ -1,6 +1,5 @@
 """Unit test for dummy envpool and speed benchmark."""
 
-import os
 import time
 
 import envpool  # noqa: F401
@@ -8,7 +7,7 @@
 import numpy as np
 from absl import logging
 from absl.testing import absltest
-from envpool.sokoban.sokoban_envpool import _SokobanEnvPool, _SokobanEnvSpec
+from envpool.sokoban.sokoban_envpool import _SokobanEnvSpec
 
 
 class _SokobanEnvPoolTest(absltest.TestCase):

From 4a52fa56a284216f1b72e4d6c770ac825b65b20d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= <adria@far.ai>
Date: Mon, 15 Jan 2024 12:18:41 -0800
Subject: [PATCH 11/60] Order bug in moving player

---
 envpool/sokoban/sokoban_envpool.cc | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/envpool/sokoban/sokoban_envpool.cc b/envpool/sokoban/sokoban_envpool.cc
index b6467466..f514e888 100644
--- a/envpool/sokoban/sokoban_envpool.cc
+++ b/envpool/sokoban/sokoban_envpool.cc
@@ -106,12 +106,13 @@ void SokobanEnv::Step(const Action& action_) {
       arena.at(2) = is_target.at(2) ? BOX_ON_TARGET : BOX;
     }
 
-    player_x += delta_x;
-    player_y += delta_y;
     for (size_t i = 0; i < arena.size(); i++) {
       WorldAssignAt(player_x + delta_x * i, player_y + delta_y * i,
                     arena.at(i));
     }
+    // After assigning the arena, move player.
+    player_x += delta_x;
+    player_y += delta_y;
   }
 
   const float reward =

From 3fb0c5c951f8cfefea4144d263e20a1cfa90c5f4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= <adria@far.ai>
Date: Mon, 15 Jan 2024 18:11:28 -0800
Subject: [PATCH 12/60] Fix directions

---
 envpool/sokoban/sokoban_envpool.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/envpool/sokoban/sokoban_envpool.cc b/envpool/sokoban/sokoban_envpool.cc
index f514e888..eca11551 100644
--- a/envpool/sokoban/sokoban_envpool.cc
+++ b/envpool/sokoban/sokoban_envpool.cc
@@ -47,7 +47,7 @@ void SokobanEnv::WorldAssignAt(int x, int y, uint8_t value) {
 }
 
 constexpr std::array<std::array<int, 2>, 4> CHANGE_COORDINATES = {
-    {{-1, 0}, {1, 0}, {0, -1}, {0, 1}}};
+    {{0, -1}, {0, 1}, {-1, 0}, {1, 0}}};
 
 void SokobanEnv::Step(const Action& action_) {
   const int action = action_["action"_];

From ac948ca445fbd497e831cd9dcaa6afa9003606f4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= <adria@far.ai>
Date: Mon, 15 Jan 2024 18:11:36 -0800
Subject: [PATCH 13/60] Rewards are doubles

---
 envpool/sokoban/sokoban_envpool.cc |  8 ++++----
 envpool/sokoban/sokoban_envpool.h  | 14 +++++++-------
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/envpool/sokoban/sokoban_envpool.cc b/envpool/sokoban/sokoban_envpool.cc
index eca11551..a922e4be 100644
--- a/envpool/sokoban/sokoban_envpool.cc
+++ b/envpool/sokoban/sokoban_envpool.cc
@@ -52,7 +52,7 @@ constexpr std::array<std::array<int, 2>, 4> CHANGE_COORDINATES = {
 void SokobanEnv::Step(const Action& action_) {
   const int action = action_["action"_];
   if (action == ACT_NOOP) {
-    WriteState(reward_step);
+    WriteState(static_cast<float>(reward_step));
     return;
   }
   // From here on, assume the agent will try to move
@@ -115,11 +115,11 @@ void SokobanEnv::Step(const Action& action_) {
     player_y += delta_y;
   }
 
-  const float reward =
+  const double reward =
       reward_step +
-      reward_box * static_cast<float>(prev_unmatched_boxes - unmatched_boxes) +
+      reward_box * static_cast<double>(prev_unmatched_boxes - unmatched_boxes) +
       (IsDone() ? reward_finished : 0.0f);
-  WriteState(reward);
+  WriteState(static_cast<float>(reward));
 }
 
 constexpr std::array<std::array<uint8_t, 3>, PLAYER_ON_TARGET + 1> TINY_COLORS =
diff --git a/envpool/sokoban/sokoban_envpool.h b/envpool/sokoban/sokoban_envpool.h
index 03728ed8..5b533fcd 100644
--- a/envpool/sokoban/sokoban_envpool.h
+++ b/envpool/sokoban/sokoban_envpool.h
@@ -25,9 +25,9 @@ class SokobanEnvFns {
  public:
   static decltype(auto) DefaultConfig() {
     return MakeDict(
-      "reward_finished"_.Bind(10.0f),
-      "reward_box"_.Bind(1.0f),
-      "reward_step"_.Bind(-0.1f),
+      "reward_finished"_.Bind(10.0),
+      "reward_box"_.Bind(1.0),
+      "reward_step"_.Bind(-0.1),
       "dim_room"_.Bind(10),
       "levels_dir"_.Bind(std::string("")),
       "verbose"_.Bind(0)
@@ -53,9 +53,9 @@ class SokobanEnv : public Env<SokobanEnvSpec> {
       : Env<SokobanEnvSpec>(spec, env_id),
         max_episode_steps{spec.config["max_episode_steps"_]},
         dim_room{static_cast<int>(spec.config["dim_room"_])},
-        reward_finished{static_cast<float>(spec.config["reward_finished"_])},
-        reward_box{static_cast<float>(spec.config["reward_box"_])},
-        reward_step{static_cast<float>(spec.config["reward_step"_])},
+        reward_finished{static_cast<double>(spec.config["reward_finished"_])},
+        reward_box{static_cast<double>(spec.config["reward_box"_])},
+        reward_step{static_cast<double>(spec.config["reward_step"_])},
         levels_dir{static_cast<std::string>(spec.config["levels_dir"_])},
         level_loader(levels_dir),
         world(WALL, static_cast<std::size_t>(dim_room * dim_room)),
@@ -69,7 +69,7 @@ class SokobanEnv : public Env<SokobanEnvSpec> {
 
  private:
   int max_episode_steps, dim_room;
-  float reward_finished, reward_box, reward_step;
+  double reward_finished, reward_box, reward_step;
   std::filesystem::path levels_dir;
 
   LevelLoader level_loader;

From 2dc167db0b21c14af2fc3f5f257cab524c3d9f72 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= <adria@far.ai>
Date: Mon, 15 Jan 2024 18:59:14 -0800
Subject: [PATCH 14/60] Reverse left/right

---
 envpool/sokoban/sokoban_envpool.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/envpool/sokoban/sokoban_envpool.cc b/envpool/sokoban/sokoban_envpool.cc
index a922e4be..a991405f 100644
--- a/envpool/sokoban/sokoban_envpool.cc
+++ b/envpool/sokoban/sokoban_envpool.cc
@@ -47,7 +47,7 @@ void SokobanEnv::WorldAssignAt(int x, int y, uint8_t value) {
 }
 
 constexpr std::array<std::array<int, 2>, 4> CHANGE_COORDINATES = {
-    {{0, -1}, {0, 1}, {-1, 0}, {1, 0}}};
+    {{0, -1}, {0, 1}, {1, 0}, {-1, 0}}};
 
 void SokobanEnv::Step(const Action& action_) {
   const int action = action_["action"_];

From 0c4b1f2d6ac26f0163254e0518dcb753ff75d584 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= <adria@far.ai>
Date: Mon, 15 Jan 2024 19:25:17 -0800
Subject: [PATCH 15/60] Reverse again, debugging code.

---
 envpool/sokoban/sokoban_envpool.cc | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/envpool/sokoban/sokoban_envpool.cc b/envpool/sokoban/sokoban_envpool.cc
index a991405f..13460cec 100644
--- a/envpool/sokoban/sokoban_envpool.cc
+++ b/envpool/sokoban/sokoban_envpool.cc
@@ -3,6 +3,7 @@
 #include <array>
 #include <sstream>
 #include <stdexcept>
+#include <iostream>
 
 #include "envpool/core/py_envpool.h"
 
@@ -47,7 +48,7 @@ void SokobanEnv::WorldAssignAt(int x, int y, uint8_t value) {
 }
 
 constexpr std::array<std::array<int, 2>, 4> CHANGE_COORDINATES = {
-    {{0, -1}, {0, 1}, {1, 0}, {-1, 0}}};
+    {{0, -1}, {0, 1}, {-1, 0}, {1, 0}}};
 
 void SokobanEnv::Step(const Action& action_) {
   const int action = action_["action"_];
@@ -82,6 +83,24 @@ void SokobanEnv::Step(const Action& action_) {
   const bool agent_moves = (arena.at(1) == EMPTY) || (arena.at(1) == TARGET) ||
                            is_a_box_and_the_box_moves;
 
+  std::cout << "arena.at(0) " << arena.at(0) << std::endl;
+  std::cout << "arena.at(1) " << arena.at(1) << std::endl;
+  std::cout << "arena.at(2) " << arena.at(2) << std::endl;
+
+  std::cout << "box_moves " << box_moves << std::endl;
+  std::cout << "  (action <= ACT_PUSH_RIGHT) = " << (action <= ACT_PUSH_RIGHT) << std::endl;
+  std::cout << "  (arena.at(1) == BOX) = " << (arena.at(1) == BOX) << std::endl;
+  std::cout << "  (arena.at(1) == BOX_ON_TARGET) = " << (arena.at(1) == BOX_ON_TARGET) << std::endl;
+  std::cout << "  (arena.at(1) == EMPTY) = " << (arena.at(1) == EMPTY) << std::endl;
+  std::cout << "  (arena.at(2) == TARGET) = " << (arena.at(2) == TARGET) << std::endl;
+
+  std::cout << "is_a_box_and_the_box_moves " << is_a_box_and_the_box_moves << std::endl;
+
+  std::cout << "agent_moves " << agent_moves << std::endl;
+  std::cout << "  (arena.at(1) == EMPTY) = " << (arena.at(1) == EMPTY) << std::endl;
+  std::cout << "  (arena.at(1) == TARGET) = " << (arena.at(1) == TARGET) << std::endl;
+  std::cout << "  is_a_box_and_the_box_moves = " << is_a_box_and_the_box_moves << std::endl;
+
   if (agent_moves) {
     // `is_target` is boolean but we'll need it as an int later
     std::array<int, arena.size()> is_target;

From 96c5636601e7caaebabbb3c809c5edf285fd68a6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= <adria@far.ai>
Date: Mon, 15 Jan 2024 20:48:40 -0800
Subject: [PATCH 16/60] Print action names

---
 envpool/sokoban/sokoban_envpool.cc | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/envpool/sokoban/sokoban_envpool.cc b/envpool/sokoban/sokoban_envpool.cc
index 13460cec..9674caf5 100644
--- a/envpool/sokoban/sokoban_envpool.cc
+++ b/envpool/sokoban/sokoban_envpool.cc
@@ -50,6 +50,18 @@ void SokobanEnv::WorldAssignAt(int x, int y, uint8_t value) {
 constexpr std::array<std::array<int, 2>, 4> CHANGE_COORDINATES = {
     {{0, -1}, {0, 1}, {-1, 0}, {1, 0}}};
 
+constexpr std::array<const char *, MAX_ACTION+1> action_names = {
+  "ACT_NOOP",
+  "ACT_PUSH_UP",
+  "ACT_PUSH_DOWN",
+  "ACT_PUSH_LEFT",
+  "ACT_PUSH_RIGHT",
+  "ACT_MOVE_UP",
+  "ACT_MOVE_DOWN",
+  "ACT_MOVE_LEFT",
+  "ACT_MOVE_RIGHT",
+};
+
 void SokobanEnv::Step(const Action& action_) {
   const int action = action_["action"_];
   if (action == ACT_NOOP) {
@@ -83,9 +95,9 @@ void SokobanEnv::Step(const Action& action_) {
   const bool agent_moves = (arena.at(1) == EMPTY) || (arena.at(1) == TARGET) ||
                            is_a_box_and_the_box_moves;
 
-  std::cout << "arena.at(0) " << arena.at(0) << std::endl;
-  std::cout << "arena.at(1) " << arena.at(1) << std::endl;
-  std::cout << "arena.at(2) " << arena.at(2) << std::endl;
+  std::cout << "arena.at(0) " << action_names.at(arena.at(0)) << std::endl;
+  std::cout << "arena.at(1) " << action_names.at(arena.at(1)) << std::endl;
+  std::cout << "arena.at(2) " << action_names.at(arena.at(2)) << std::endl;
 
   std::cout << "box_moves " << box_moves << std::endl;
   std::cout << "  (action <= ACT_PUSH_RIGHT) = " << (action <= ACT_PUSH_RIGHT) << std::endl;

From c4e0638fc56b61d81d22492994bb2953d6bf70ad Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= <adria@far.ai>
Date: Mon, 15 Jan 2024 23:00:26 -0800
Subject: [PATCH 17/60] Print arena thihngs

---
 envpool/sokoban/sokoban_envpool.cc | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/envpool/sokoban/sokoban_envpool.cc b/envpool/sokoban/sokoban_envpool.cc
index 9674caf5..a1522d81 100644
--- a/envpool/sokoban/sokoban_envpool.cc
+++ b/envpool/sokoban/sokoban_envpool.cc
@@ -62,6 +62,18 @@ constexpr std::array<const char *, MAX_ACTION+1> action_names = {
   "ACT_MOVE_RIGHT",
 };
 
+
+constexpr std::array<const char *, 7> arena_names = {
+  "WALL",
+  "EMPTY",
+  "TARGET",
+  "BOX_ON_TARGET",
+  "BOX",
+  "PLAYER",
+  "PLAYER_ON_TARGET",
+};
+
+
 void SokobanEnv::Step(const Action& action_) {
   const int action = action_["action"_];
   if (action == ACT_NOOP) {
@@ -95,9 +107,9 @@ void SokobanEnv::Step(const Action& action_) {
   const bool agent_moves = (arena.at(1) == EMPTY) || (arena.at(1) == TARGET) ||
                            is_a_box_and_the_box_moves;
 
-  std::cout << "arena.at(0) " << action_names.at(arena.at(0)) << std::endl;
-  std::cout << "arena.at(1) " << action_names.at(arena.at(1)) << std::endl;
-  std::cout << "arena.at(2) " << action_names.at(arena.at(2)) << std::endl;
+  std::cout << "arena.at(0) " << arena_names.at(arena.at(0)) << std::endl;
+  std::cout << "arena.at(1) " << arena_names.at(arena.at(1)) << std::endl;
+  std::cout << "arena.at(2) " << arena_names.at(arena.at(2)) << std::endl;
 
   std::cout << "box_moves " << box_moves << std::endl;
   std::cout << "  (action <= ACT_PUSH_RIGHT) = " << (action <= ACT_PUSH_RIGHT) << std::endl;

From 57302ea0fbd283196de642b0de6bb6b10043fbb3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= <adria@far.ai>
Date: Mon, 15 Jan 2024 23:04:50 -0800
Subject: [PATCH 18/60] Typo in where in arena to look

---
 envpool/sokoban/sokoban_envpool.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/envpool/sokoban/sokoban_envpool.cc b/envpool/sokoban/sokoban_envpool.cc
index a1522d81..11cfaa76 100644
--- a/envpool/sokoban/sokoban_envpool.cc
+++ b/envpool/sokoban/sokoban_envpool.cc
@@ -99,7 +99,7 @@ void SokobanEnv::Step(const Action& action_) {
   const bool box_moves =
       ((action <= ACT_PUSH_RIGHT) &&
        ((arena.at(1) == BOX) || (arena.at(1) == BOX_ON_TARGET)) &&
-       ((arena.at(1) == EMPTY) || (arena.at(2) == TARGET)));
+       ((arena.at(2) == EMPTY) || (arena.at(2) == TARGET)));
 
   // The agent will move if the next arena location is possible to move into, or
   // if it's a box and the box moves

From 0a2f05b875ab750d1193cc19081ab864da1c6733 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= <adria@far.ai>
Date: Mon, 15 Jan 2024 23:11:04 -0800
Subject: [PATCH 19/60] Don't print actions anymore

---
 envpool/sokoban/sokoban_envpool.cc | 19 -------------------
 1 file changed, 19 deletions(-)

diff --git a/envpool/sokoban/sokoban_envpool.cc b/envpool/sokoban/sokoban_envpool.cc
index 11cfaa76..6629d873 100644
--- a/envpool/sokoban/sokoban_envpool.cc
+++ b/envpool/sokoban/sokoban_envpool.cc
@@ -3,7 +3,6 @@
 #include <array>
 #include <sstream>
 #include <stdexcept>
-#include <iostream>
 
 #include "envpool/core/py_envpool.h"
 
@@ -107,24 +106,6 @@ void SokobanEnv::Step(const Action& action_) {
   const bool agent_moves = (arena.at(1) == EMPTY) || (arena.at(1) == TARGET) ||
                            is_a_box_and_the_box_moves;
 
-  std::cout << "arena.at(0) " << arena_names.at(arena.at(0)) << std::endl;
-  std::cout << "arena.at(1) " << arena_names.at(arena.at(1)) << std::endl;
-  std::cout << "arena.at(2) " << arena_names.at(arena.at(2)) << std::endl;
-
-  std::cout << "box_moves " << box_moves << std::endl;
-  std::cout << "  (action <= ACT_PUSH_RIGHT) = " << (action <= ACT_PUSH_RIGHT) << std::endl;
-  std::cout << "  (arena.at(1) == BOX) = " << (arena.at(1) == BOX) << std::endl;
-  std::cout << "  (arena.at(1) == BOX_ON_TARGET) = " << (arena.at(1) == BOX_ON_TARGET) << std::endl;
-  std::cout << "  (arena.at(1) == EMPTY) = " << (arena.at(1) == EMPTY) << std::endl;
-  std::cout << "  (arena.at(2) == TARGET) = " << (arena.at(2) == TARGET) << std::endl;
-
-  std::cout << "is_a_box_and_the_box_moves " << is_a_box_and_the_box_moves << std::endl;
-
-  std::cout << "agent_moves " << agent_moves << std::endl;
-  std::cout << "  (arena.at(1) == EMPTY) = " << (arena.at(1) == EMPTY) << std::endl;
-  std::cout << "  (arena.at(1) == TARGET) = " << (arena.at(1) == TARGET) << std::endl;
-  std::cout << "  is_a_box_and_the_box_moves = " << is_a_box_and_the_box_moves << std::endl;
-
   if (agent_moves) {
     // `is_target` is boolean but we'll need it as an int later
     std::array<int, arena.size()> is_target;

From bd50d738eab5f4138c98a2ba892b8bbf018c2022 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= <adria@far.ai>
Date: Fri, 19 Jan 2024 22:12:21 -0800
Subject: [PATCH 20/60] Working with bug

---
 envpool/sokoban/sokoban_envpool.h | 31 +++++++++++++++++++++----------
 1 file changed, 21 insertions(+), 10 deletions(-)

diff --git a/envpool/sokoban/sokoban_envpool.h b/envpool/sokoban/sokoban_envpool.h
index 5b533fcd..3347f6fd 100644
--- a/envpool/sokoban/sokoban_envpool.h
+++ b/envpool/sokoban/sokoban_envpool.h
@@ -2,6 +2,8 @@
 #define ENVPOOL_SOKOBAN_H_
 
 #include <filesystem>
+#include <sstream>
+#include <stdexcept>
 
 #include "envpool/core/array.h"
 #include "envpool/core/async_envpool.h"
@@ -24,14 +26,9 @@ constexpr int MAX_ACTION = ACT_MOVE_RIGHT;
 class SokobanEnvFns {
  public:
   static decltype(auto) DefaultConfig() {
-    return MakeDict(
-      "reward_finished"_.Bind(10.0),
-      "reward_box"_.Bind(1.0),
-      "reward_step"_.Bind(-0.1),
-      "dim_room"_.Bind(10),
-      "levels_dir"_.Bind(std::string("")),
-      "verbose"_.Bind(0)
-    );
+    return MakeDict("reward_finished"_.Bind(10.0), "reward_box"_.Bind(1.0),
+                    "reward_step"_.Bind(-0.1), "dim_room"_.Bind(10),
+                    "levels_dir"_.Bind(std::string("")), "verbose"_.Bind(0));
   }
   template <typename Config>
   static decltype(auto) StateSpec(const Config& conf) {
@@ -59,9 +56,23 @@ class SokobanEnv : public Env<SokobanEnvSpec> {
         levels_dir{static_cast<std::string>(spec.config["levels_dir"_])},
         level_loader(levels_dir),
         world(WALL, static_cast<std::size_t>(dim_room * dim_room)),
-        verbose(static_cast<int>(spec.config["verbose"_])) {}
+        verbose(static_cast<int>(spec.config["verbose"_])) {
+    if (max_num_players_ != spec_.config["max_num_players"_]) {
+      std::stringstream msg;
+      msg << "max_num_players_ != spec_['max_num_players'] " << max_num_players_
+          << " != " << spec_.config["max_num_players"_] << std::endl;
+      throw std::runtime_error(msg.str());
+    }
 
-  bool IsDone() override { return unmatched_boxes == 0; }
+    if (max_num_players_ != spec.config["max_num_players"_]) {
+      std::stringstream msg;
+      msg << "max_num_players_ != spec['max_num_players'] " << max_num_players_
+          << " != " << spec.config["max_num_players"_] << std::endl;
+      throw std::runtime_error(msg.str());
+    }
+  }
+
+  bool IsDone() override { return (unmatched_boxes == 0) || (); }
   void Reset() override;
   void Step(const Action& action) override;
 

From a85d4fda25eb20d6e2f980cefaa1cb71b0465b7d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= <adria@far.ai>
Date: Fri, 19 Jan 2024 22:16:50 -0800
Subject: [PATCH 21/60] solve type error

---
 envpool/sokoban/sokoban_envpool.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/envpool/sokoban/sokoban_envpool.h b/envpool/sokoban/sokoban_envpool.h
index 3347f6fd..3e3b2abe 100644
--- a/envpool/sokoban/sokoban_envpool.h
+++ b/envpool/sokoban/sokoban_envpool.h
@@ -72,7 +72,7 @@ class SokobanEnv : public Env<SokobanEnvSpec> {
     }
   }
 
-  bool IsDone() override { return (unmatched_boxes == 0) || (); }
+  bool IsDone() override { return unmatched_boxes == 0; }
   void Reset() override;
   void Step(const Action& action) override;
 

From 0853a48ce09986e544fee330b2a5303d3d44151e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= <adria@far.ai>
Date: Fri, 19 Jan 2024 23:12:05 -0800
Subject: [PATCH 22/60] Make sure env does indeed truncate at the correct
 number of steps

---
 envpool/sokoban/sample_levels/000.txt      | 35 ++++++++++++++++++++++
 envpool/sokoban/sokoban_envpool.cc         |  3 ++
 envpool/sokoban/sokoban_envpool.h          |  8 +++--
 envpool/sokoban/sokoban_py_envpool_test.py | 23 ++++++++++++--
 4 files changed, 64 insertions(+), 5 deletions(-)
 create mode 100644 envpool/sokoban/sample_levels/000.txt

diff --git a/envpool/sokoban/sample_levels/000.txt b/envpool/sokoban/sample_levels/000.txt
new file mode 100644
index 00000000..e0dbf4cb
--- /dev/null
+++ b/envpool/sokoban/sample_levels/000.txt
@@ -0,0 +1,35 @@
+; 0
+##########
+#@ #######
+#.$#######
+#   ######
+#.$ ######
+#  #######
+# $ ######
+#$ . #####
+# .  #####
+##########
+
+; 1
+##########
+##########
+# ###@####
+#    $   #
+#   $    #
+# ##  ####
+#.##  ####
+# ###$$.##
+#  .   . #
+##########
+
+; 2
+##########
+#####   ##
+#####   ##
+####.    #
+# .  $@ ##
+# $ $ $ ##
+#   .   ##
+#####.   #
+######   #
+##########
diff --git a/envpool/sokoban/sokoban_envpool.cc b/envpool/sokoban/sokoban_envpool.cc
index 6629d873..66656293 100644
--- a/envpool/sokoban/sokoban_envpool.cc
+++ b/envpool/sokoban/sokoban_envpool.cc
@@ -30,6 +30,7 @@ void SokobanEnv::Reset() {
       }
     }
   }
+  current_step_ = 0;
   WriteState(0.0f);
 }
 
@@ -74,6 +75,8 @@ constexpr std::array<const char *, 7> arena_names = {
 
 
 void SokobanEnv::Step(const Action& action_) {
+  current_step_++;
+
   const int action = action_["action"_];
   if (action == ACT_NOOP) {
     WriteState(static_cast<float>(reward_step));
diff --git a/envpool/sokoban/sokoban_envpool.h b/envpool/sokoban/sokoban_envpool.h
index 3e3b2abe..6ed247be 100644
--- a/envpool/sokoban/sokoban_envpool.h
+++ b/envpool/sokoban/sokoban_envpool.h
@@ -48,7 +48,6 @@ class SokobanEnv : public Env<SokobanEnvSpec> {
  public:
   SokobanEnv(const Spec& spec, int env_id)
       : Env<SokobanEnvSpec>(spec, env_id),
-        max_episode_steps{spec.config["max_episode_steps"_]},
         dim_room{static_cast<int>(spec.config["dim_room"_])},
         reward_finished{static_cast<double>(spec.config["reward_finished"_])},
         reward_box{static_cast<double>(spec.config["reward_box"_])},
@@ -72,14 +71,16 @@ class SokobanEnv : public Env<SokobanEnvSpec> {
     }
   }
 
-  bool IsDone() override { return unmatched_boxes == 0; }
+  bool IsDone() override {
+    const int max_episode_steps = spec_.config["max_episode_steps"_];
+    return (unmatched_boxes == 0) || (current_step_ >= max_episode_steps); }
   void Reset() override;
   void Step(const Action& action) override;
 
   void WriteState(float reward);
 
  private:
-  int max_episode_steps, dim_room;
+  int dim_room;
   double reward_finished, reward_box, reward_step;
   std::filesystem::path levels_dir;
 
@@ -87,6 +88,7 @@ class SokobanEnv : public Env<SokobanEnvSpec> {
   SokobanLevel world;
   int verbose;
 
+  int current_step_{0};
   int player_x{0}, player_y{0};
   int unmatched_boxes{0};
 
diff --git a/envpool/sokoban/sokoban_py_envpool_test.py b/envpool/sokoban/sokoban_py_envpool_test.py
index 9eede62d..fb1cba94 100644
--- a/envpool/sokoban/sokoban_py_envpool_test.py
+++ b/envpool/sokoban/sokoban_py_envpool_test.py
@@ -50,7 +50,7 @@ def test_envpool(self) -> None:
             max_episode_steps=60,
             reward_step=-0.1,
             dim_room=10,
-            levels_dir="/aa/boxoban-levels-master/unfiltered/train",
+            levels_dir="/app/envpool/sokoban/sample_levels",
         )
         total_steps = 1000
 
@@ -62,6 +62,25 @@ def test_envpool(self) -> None:
         fps = total_steps * batch / duration
         logging.info(f"FPS = {fps:.6f}")
 
+    def test_envpool_max_episode_steps(self) -> None:
+        for max_episode_steps in [2, 5, 10]:
+            env = envpool.make(
+                "Sokoban-v0",
+                env_type="gymnasium",
+                num_envs=1,
+                batch_size=1,
+                max_episode_steps=max_episode_steps,
+                levels_dir="/app/envpool/sokoban/sample_levels",
+            )
+            env.reset()
+            for _ in range(max_episode_steps - 1):
+                _, _, terminated, truncated, _ = env.step(np.zeros([1], dtype=np.int32))
+                assert not np.any(terminated | truncated)
+
+            _, _, terminated, truncated, _ = env.step(np.zeros([1], dtype=np.int32))
+            assert not np.any(terminated)
+            assert np.all(truncated)
+
     def test_xla(self) -> None:
         num_envs = 10
         env = envpool.make(
@@ -73,7 +92,7 @@ def test_xla(self) -> None:
             max_episode_steps=60,
             reward_step=-0.1,
             dim_room=10,
-            levels_dir="/aa/boxoban-levels-master/unfiltered/train",
+            levels_dir="/app/envpool/sokoban/sample_levels",
         )
         handle, recv, send, step = env.xla()
 

From af25a1fa4ed3164dd9d81d368925dde6a524941f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= <adria@far.ai>
Date: Sun, 21 Jan 2024 08:45:05 -0800
Subject: [PATCH 23/60] Only give a reward if the number of boxes left is 0

---
 envpool/sokoban/sokoban_envpool.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/envpool/sokoban/sokoban_envpool.cc b/envpool/sokoban/sokoban_envpool.cc
index 66656293..7faa4e84 100644
--- a/envpool/sokoban/sokoban_envpool.cc
+++ b/envpool/sokoban/sokoban_envpool.cc
@@ -145,7 +145,7 @@ void SokobanEnv::Step(const Action& action_) {
   const double reward =
       reward_step +
       reward_box * static_cast<double>(prev_unmatched_boxes - unmatched_boxes) +
-      (IsDone() ? reward_finished : 0.0f);
+      ((unmatched_boxes == 0) ? reward_finished : 0.0f);
   WriteState(static_cast<float>(reward));
 }
 

From a0bba8e08358f7e81ddecc9cdca96777d52a6584 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= <adria@far.ai>
Date: Sun, 21 Jan 2024 10:51:34 -0800
Subject: [PATCH 24/60] Display the unmatched boxes info

---
 envpool/sokoban/sokoban_envpool.cc | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/envpool/sokoban/sokoban_envpool.cc b/envpool/sokoban/sokoban_envpool.cc
index 7faa4e84..a61c0fa7 100644
--- a/envpool/sokoban/sokoban_envpool.cc
+++ b/envpool/sokoban/sokoban_envpool.cc
@@ -114,8 +114,12 @@ void SokobanEnv::Step(const Action& action_) {
     std::array<int, arena.size()> is_target;
     for (size_t i = 0; i < arena.size(); i++) {
       uint8_t tile = arena.at(i);
+      // We explicitly set them to 0 or 1 because false/true are not guaranteed
+      // to be 0/1.
       is_target.at(i) =
-          (tile == BOX_ON_TARGET || tile == TARGET || tile == PLAYER_ON_TARGET);
+          ((tile == BOX_ON_TARGET || tile == TARGET || tile == PLAYER_ON_TARGET)
+               ? 1
+               : 0);
     }
     // only whatever was on the floor is now at position 0
     arena.at(0) = is_target.at(0) ? TARGET : EMPTY;
@@ -163,6 +167,7 @@ constexpr std::array<std::array<uint8_t, 3>, PLAYER_ON_TARGET + 1> TINY_COLORS =
 void SokobanEnv::WriteState(float reward) {
   auto state = Allocate();
   state["reward"_] = reward;
+  state["info:unmatched_boxes"_] = unmatched_boxes;
   Array& obs = state["obs"_];
   if (obs.size != 3 * world.size()) {
     std::stringstream msg;

From 9de2c16e639fa547d7e281507514a4e23e6e9018 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= <adria@far.ai>
Date: Sun, 21 Jan 2024 11:06:23 -0800
Subject: [PATCH 25/60] Add unmatched boxes to spec

---
 envpool/sokoban/sokoban_envpool.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/envpool/sokoban/sokoban_envpool.h b/envpool/sokoban/sokoban_envpool.h
index 6ed247be..a2b30ff2 100644
--- a/envpool/sokoban/sokoban_envpool.h
+++ b/envpool/sokoban/sokoban_envpool.h
@@ -33,7 +33,8 @@ class SokobanEnvFns {
   template <typename Config>
   static decltype(auto) StateSpec(const Config& conf) {
     int dim_room = conf["dim_room"_];
-    return MakeDict("obs"_.Bind(Spec<uint8_t>({3, dim_room, dim_room})));
+    return MakeDict("obs"_.Bind(Spec<uint8_t>({3, dim_room, dim_room})),
+                    "info:unmatched_boxes"_.Bind(Spec<int>({})));
   }
   template <typename Config>
   static decltype(auto) ActionSpec(const Config& conf) {

From ae6a2d7f481367de6a63cc990dd7f9f50729f1c4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= <adria@far.ai>
Date: Sun, 21 Jan 2024 11:50:48 -0800
Subject: [PATCH 26/60] Print reward boxes

---
 envpool/sokoban/sokoban_envpool.cc | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/envpool/sokoban/sokoban_envpool.cc b/envpool/sokoban/sokoban_envpool.cc
index a61c0fa7..56fa3aac 100644
--- a/envpool/sokoban/sokoban_envpool.cc
+++ b/envpool/sokoban/sokoban_envpool.cc
@@ -3,6 +3,7 @@
 #include <array>
 #include <sstream>
 #include <stdexcept>
+#include <iostream>
 
 #include "envpool/core/py_envpool.h"
 
@@ -149,7 +150,10 @@ void SokobanEnv::Step(const Action& action_) {
   const double reward =
       reward_step +
       reward_box * static_cast<double>(prev_unmatched_boxes - unmatched_boxes) +
-      ((unmatched_boxes == 0) ? reward_finished : 0.0f);
+      ((unmatched_boxes == 0) ? reward_finished : 0.0);
+  std::cout << "prev_unmatched_boxes=" << prev_unmatched_boxes
+            << ", unmatched_boxes=" << unmatched_boxes
+            << ", so reward=" << reward << "\n";
   WriteState(static_cast<float>(reward));
 }
 

From 9d3d3882a747e7484063a93fca5b1d98df1b9a7b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= <adria@far.ai>
Date: Sun, 21 Jan 2024 12:14:50 -0800
Subject: [PATCH 27/60] Revert "Print reward boxes"

This reverts commit ae6a2d7f481367de6a63cc990dd7f9f50729f1c4.
---
 envpool/sokoban/sokoban_envpool.cc | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/envpool/sokoban/sokoban_envpool.cc b/envpool/sokoban/sokoban_envpool.cc
index 56fa3aac..a61c0fa7 100644
--- a/envpool/sokoban/sokoban_envpool.cc
+++ b/envpool/sokoban/sokoban_envpool.cc
@@ -3,7 +3,6 @@
 #include <array>
 #include <sstream>
 #include <stdexcept>
-#include <iostream>
 
 #include "envpool/core/py_envpool.h"
 
@@ -150,10 +149,7 @@ void SokobanEnv::Step(const Action& action_) {
   const double reward =
       reward_step +
       reward_box * static_cast<double>(prev_unmatched_boxes - unmatched_boxes) +
-      ((unmatched_boxes == 0) ? reward_finished : 0.0);
-  std::cout << "prev_unmatched_boxes=" << prev_unmatched_boxes
-            << ", unmatched_boxes=" << unmatched_boxes
-            << ", so reward=" << reward << "\n";
+      ((unmatched_boxes == 0) ? reward_finished : 0.0f);
   WriteState(static_cast<float>(reward));
 }
 

From 448be932b310bff7a6c1fdd4705ae7ed596566b3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= <adria@far.ai>
Date: Sun, 21 Jan 2024 12:15:40 -0800
Subject: [PATCH 28/60] Revert "Add unmatched boxes to spec"

This reverts commit 9de2c16e639fa547d7e281507514a4e23e6e9018.
---
 envpool/sokoban/sokoban_envpool.cc | 1 -
 envpool/sokoban/sokoban_envpool.h  | 3 +--
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/envpool/sokoban/sokoban_envpool.cc b/envpool/sokoban/sokoban_envpool.cc
index a61c0fa7..e6cbdd39 100644
--- a/envpool/sokoban/sokoban_envpool.cc
+++ b/envpool/sokoban/sokoban_envpool.cc
@@ -167,7 +167,6 @@ constexpr std::array<std::array<uint8_t, 3>, PLAYER_ON_TARGET + 1> TINY_COLORS =
 void SokobanEnv::WriteState(float reward) {
   auto state = Allocate();
   state["reward"_] = reward;
-  state["info:unmatched_boxes"_] = unmatched_boxes;
   Array& obs = state["obs"_];
   if (obs.size != 3 * world.size()) {
     std::stringstream msg;
diff --git a/envpool/sokoban/sokoban_envpool.h b/envpool/sokoban/sokoban_envpool.h
index a2b30ff2..6ed247be 100644
--- a/envpool/sokoban/sokoban_envpool.h
+++ b/envpool/sokoban/sokoban_envpool.h
@@ -33,8 +33,7 @@ class SokobanEnvFns {
   template <typename Config>
   static decltype(auto) StateSpec(const Config& conf) {
     int dim_room = conf["dim_room"_];
-    return MakeDict("obs"_.Bind(Spec<uint8_t>({3, dim_room, dim_room})),
-                    "info:unmatched_boxes"_.Bind(Spec<int>({})));
+    return MakeDict("obs"_.Bind(Spec<uint8_t>({3, dim_room, dim_room})));
   }
   template <typename Config>
   static decltype(auto) ActionSpec(const Config& conf) {

From 653dab6b8efed79292e0ee7edd6394bda0dc3f37 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= <adria@far.ai>
Date: Wed, 7 Feb 2024 22:41:54 -0800
Subject: [PATCH 29/60] Random episode length every time

---
 envpool/sokoban/sokoban_envpool.cc         |  5 +++++
 envpool/sokoban/sokoban_envpool.h          | 10 ++++++----
 envpool/sokoban/sokoban_py_envpool_test.py |  1 +
 3 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/envpool/sokoban/sokoban_envpool.cc b/envpool/sokoban/sokoban_envpool.cc
index e6cbdd39..dff613c7 100644
--- a/envpool/sokoban/sokoban_envpool.cc
+++ b/envpool/sokoban/sokoban_envpool.cc
@@ -9,6 +9,11 @@
 namespace sokoban {
 
 void SokobanEnv::Reset() {
+  const int max_episode_steps = spec_.config["max_episode_steps"_];
+  const int min_episode_steps = spec_.config["min_episode_steps"_];
+  std::uniform_int_distribution<int> episode_length_rand(min_episode_steps, max_episode_steps);
+  current_max_episode_steps_ = episode_length_rand(gen_);
+
   world = *level_loader.RandomLevel(gen_);
   if (world.size() != dim_room * dim_room) {
     std::stringstream msg;
diff --git a/envpool/sokoban/sokoban_envpool.h b/envpool/sokoban/sokoban_envpool.h
index 6ed247be..40ff47c1 100644
--- a/envpool/sokoban/sokoban_envpool.h
+++ b/envpool/sokoban/sokoban_envpool.h
@@ -28,7 +28,8 @@ class SokobanEnvFns {
   static decltype(auto) DefaultConfig() {
     return MakeDict("reward_finished"_.Bind(10.0), "reward_box"_.Bind(1.0),
                     "reward_step"_.Bind(-0.1), "dim_room"_.Bind(10),
-                    "levels_dir"_.Bind(std::string("")), "verbose"_.Bind(0));
+                    "levels_dir"_.Bind(std::string("")), "verbose"_.Bind(0),
+                    "min_episode_steps"_.Bind(0));
   }
   template <typename Config>
   static decltype(auto) StateSpec(const Config& conf) {
@@ -55,7 +56,8 @@ class SokobanEnv : public Env<SokobanEnvSpec> {
         levels_dir{static_cast<std::string>(spec.config["levels_dir"_])},
         level_loader(levels_dir),
         world(WALL, static_cast<std::size_t>(dim_room * dim_room)),
-        verbose(static_cast<int>(spec.config["verbose"_])) {
+        verbose(static_cast<int>(spec.config["verbose"_])),
+        current_max_episode_steps_(static_cast<int>(spec.config["max_episode_steps"_])) {
     if (max_num_players_ != spec_.config["max_num_players"_]) {
       std::stringstream msg;
       msg << "max_num_players_ != spec_['max_num_players'] " << max_num_players_
@@ -72,8 +74,7 @@ class SokobanEnv : public Env<SokobanEnvSpec> {
   }
 
   bool IsDone() override {
-    const int max_episode_steps = spec_.config["max_episode_steps"_];
-    return (unmatched_boxes == 0) || (current_step_ >= max_episode_steps); }
+    return (unmatched_boxes == 0) || (current_step_ >= current_max_episode_steps_); }
   void Reset() override;
   void Step(const Action& action) override;
 
@@ -88,6 +89,7 @@ class SokobanEnv : public Env<SokobanEnvSpec> {
   SokobanLevel world;
   int verbose;
 
+  int current_max_episode_steps_;
   int current_step_{0};
   int player_x{0}, player_y{0};
   int unmatched_boxes{0};
diff --git a/envpool/sokoban/sokoban_py_envpool_test.py b/envpool/sokoban/sokoban_py_envpool_test.py
index fb1cba94..c0f3973f 100644
--- a/envpool/sokoban/sokoban_py_envpool_test.py
+++ b/envpool/sokoban/sokoban_py_envpool_test.py
@@ -22,6 +22,7 @@ def test_config(self) -> None:
             "num_threads",
             "seed",
             "thread_affinity_offset",
+            "min_episode_steps",
             # Default and also used by sokoban
             "max_episode_steps",
             # defined by sokoban

From 4a6e7686af7ed300a02539b80ce29c50955a19bb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= <adria@far.ai>
Date: Thu, 7 Mar 2024 00:50:47 -0800
Subject: [PATCH 30/60] CircleCI now runs linters and tests (#2)

* Clang-format now passes

* cpplint now passes

* Install only the requirements needed for Sokoban even when testing

* Satisfy the linters

* Ran `make format`

* Ability to run python tests

* Devtools added

* Run every linter and test

* Fix schema

* Run on CircleCI

* Fix lint complaint about top-level std::string const.
---
 .circleci/config.yml                          |  80 ++++++++
 .clang-tidy                                   |   5 +
 .dir-locals.el                                |   4 +
 CPPLINT.cfg                                   |   4 +-
 Makefile                                      |  18 +-
 envpool/classic_control/pendulum.h            |  11 +-
 envpool/core/env_spec.h                       |   4 +-
 envpool/sokoban/BUILD                         |  27 ++-
 envpool/sokoban/__init__.py                   |  30 ++-
 envpool/sokoban/level_loader.cc               |  86 +++++----
 envpool/sokoban/level_loader.h                |  50 +++--
 envpool/sokoban/registration.py               |  30 ++-
 envpool/sokoban/sokoban_envpool.cc            | 173 ++++++++---------
 envpool/sokoban/sokoban_envpool.h             |  88 +++++----
 envpool/sokoban/sokoban_py_envpool_test.py    | 182 ++++++++++--------
 envpool/workspace0.bzl                        |   1 +
 .../requirements-devtools.txt                 |   5 +
 .../pip_requirements/requirements-sokoban.txt |   1 +
 18 files changed, 505 insertions(+), 294 deletions(-)
 create mode 100644 .circleci/config.yml
 create mode 100644 .dir-locals.el
 create mode 100644 third_party/pip_requirements/requirements-devtools.txt
 create mode 120000 third_party/pip_requirements/requirements-sokoban.txt

diff --git a/.circleci/config.yml b/.circleci/config.yml
new file mode 100644
index 00000000..9d5399e2
--- /dev/null
+++ b/.circleci/config.yml
@@ -0,0 +1,80 @@
+version: 2.1
+
+parameters:
+  action:
+    type: enum
+    enum: [oncommit, docker]
+    default: oncommit
+
+  docker_img_version:
+    # Docker image version for running tests.
+    type: string
+    default: "8f41d1e-envpool-ci"
+
+workflows:
+  test-jobs:
+    when:
+      equal: [oncommit, << pipeline.parameters.action >>]
+    jobs:
+      - lint:
+          context:
+            - ghcr-auth
+      - tests:
+          context:
+            - ghcr-auth
+
+jobs:
+  lint:
+    docker:
+      - image: ghcr.io/alignmentresearch/learned-planners:<< pipeline.parameters.docker_img_version >>
+        auth:
+          username: "$GHCR_DOCKER_USER"
+          password: "$GHCR_DOCKER_TOKEN"
+    resource_class: medium
+    working_directory: /app
+    steps:
+      - checkout
+      # Copied from .github/workflows/lint.yml
+      - run:
+          name: flake8
+          command: |
+            make flake8
+      - run:
+          name: isort and yapf
+          command: |
+            make py-format
+      - run:
+          name: cpplint
+          command: |
+            make cpplint
+      - run:
+          name: clang-format
+          command: |
+            make clang-format
+      - run:
+          name: clang-tidy
+          command: |
+            make clang-tidy
+      - run:
+          name: buildifier
+          command: |
+            make buildifier
+      - run:
+          name: addlicense
+          command: |
+            make addlicense
+      # Skip mypy, docstyle and spelling
+
+  tests:
+    docker:
+      - image: ghcr.io/alignmentresearch/learned-planners:<< pipeline.parameters.docker_img_version >>
+        auth:
+          username: "$GHCR_DOCKER_USER"
+          password: "$GHCR_DOCKER_TOKEN"
+    resource_class: medium
+    working_directory: /app
+    steps:
+      - checkout
+      - run:
+          name: Run tests
+          command: make bazel-test
diff --git a/.clang-tidy b/.clang-tidy
index d62bd5b9..fed4549b 100644
--- a/.clang-tidy
+++ b/.clang-tidy
@@ -11,6 +11,10 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
+# Disable google-runtime-references, because passing by reference is less
+# error-prone than passing by pointer. They actually removed it from their style
+# guide (https://www.mail-archive.com/cfe-commits@lists.llvm.org/msg203119.html)
 ---
 Checks:     '
             bugprone-*,
@@ -29,6 +33,7 @@ Checks:     '
             -readability-magic-numbers,
             -readability-static-accessed-through-instance,
             -readability-uppercase-literal-suffix,
+            -google-runtime-references,
             '
 CheckOptions:
   - { key: readability-identifier-naming.ClassCase,             value: CamelCase  }
diff --git a/.dir-locals.el b/.dir-locals.el
new file mode 100644
index 00000000..4fa827c3
--- /dev/null
+++ b/.dir-locals.el
@@ -0,0 +1,4 @@
+;; Don't format Python in this directory with the Emacs formatter, it conflicts
+;; with `make format`.
+((python-mode . nil)
+ (js-json-mode . nil))
diff --git a/CPPLINT.cfg b/CPPLINT.cfg
index aa112bc3..ced4a13f 100644
--- a/CPPLINT.cfg
+++ b/CPPLINT.cfg
@@ -1 +1,3 @@
-filter=-build/c++11,+build/c++17,-build/include_subdir
+# Disable runtime-references, it's not in the Google style guide anymore
+# and is less error prone. See .clang-tidy
+filter=-build/c++11,+build/c++17,-build/include_subdir,-runtime/references
diff --git a/Makefile b/Makefile
index 0a024181..822be03c 100644
--- a/Makefile
+++ b/Makefile
@@ -5,7 +5,8 @@ PYTHON_FILES   = $(shell find . -type f -name "*.py")
 CPP_FILES      = $(shell find $(PROJECT_NAME) -type f -name "*.h" -o -name "*.cc")
 BAZEL_FILES    = $(shell find . -type f -name "*BUILD" -o -name "*.bzl")
 COMMIT_HASH    = $(shell git log -1 --format=%h)
-COPYRIGHT      = "Garena Online Private Limited"
+COPYRIGHT      = "FAR AI"
+COPYRIGHT_YEAR = "2023-2024"
 BAZELOPT       =
 DATE           = $(shell date "+%Y-%m-%d")
 DOCKER_TAG     = $(DATE)-$(COMMIT_HASH)
@@ -79,6 +80,9 @@ cpplint: cpplint-install
 clang-format: clang-format-install
 	clang-format --style=file -i $(CPP_FILES) -n --Werror
 
+clang-format-fix: clang-format-install
+	clang-format --style=file -i $(CPP_FILES) --Werror
+
 # bazel file linter
 
 buildifier: buildifier-install
@@ -87,13 +91,15 @@ buildifier: buildifier-install
 # bazel build/test
 
 bazel-pip-requirement-dev:
-	cd third_party/pip_requirements && (cmp requirements.txt requirements-dev.txt || ln -sf requirements-dev.txt requirements.txt)
+	# Modified to only install dependencies relevant to testing Sokoban (which is the same as release)
+	cd third_party/pip_requirements && (cmp requirements.txt requirements-sokoban.txt || ln -sf requirements-sokoban.txt requirements.txt)
 
 bazel-pip-requirement-release:
 	cd third_party/pip_requirements && (cmp requirements.txt requirements-release.txt || ln -sf requirements-release.txt requirements.txt)
 
 clang-tidy: clang-tidy-install bazel-pip-requirement-dev
-	bazel build $(BAZELOPT) //... --config=clang-tidy --config=test
+	# Only lint the things we actually build
+	bazel build $(BAZELOPT) //envpool/core/... //envpool/sokoban/... --config=clang-tidy --config=test
 
 bazel-debug: bazel-install bazel-pip-requirement-dev
 	bazel run $(BAZELOPT) //:setup --config=debug -- bdist_wheel
@@ -111,7 +117,7 @@ bazel-release: bazel-install bazel-pip-requirement-release
 	cp bazel-bin/setup.runfiles/$(PROJECT_NAME)/dist/*.whl ./dist
 
 bazel-test: bazel-install bazel-pip-requirement-dev
-	bazel test --test_output=all $(BAZELOPT) //... --config=test --spawn_strategy=local --color=yes
+	bazel test --test_output=all $(BAZELOPT) //envpool/core/... //envpool/sokoban/... --config=test --spawn_strategy=local --color=yes
 
 bazel-clean: bazel-install
 	bazel clean --expunge
@@ -119,7 +125,7 @@ bazel-clean: bazel-install
 # documentation
 
 addlicense: addlicense-install
-	addlicense -c $(COPYRIGHT) -l apache -y 2023 -check $(PROJECT_FOLDER)
+	addlicense -c $(COPYRIGHT) -l apache -y "$(COPYRIGHT_YEAR)" -check $(PROJECT_FOLDER)
 
 docstyle: doc-install
 	pydocstyle $(PROJECT_NAME) && doc8 docs && cd docs && make html SPHINXOPTS="-W"
@@ -144,7 +150,7 @@ format: py-format-install clang-format-install buildifier-install addlicense-ins
 	yapf -ir $(PYTHON_FILES)
 	clang-format -style=file -i $(CPP_FILES)
 	buildifier -r -lint=fix $(BAZEL_FILES)
-	addlicense -c $(COPYRIGHT) -l apache -y 2023 $(PROJECT_FOLDER)
+	addlicense -c $(COPYRIGHT) -l apache -y "$(COPYRIGHT_YEAR)" $(PROJECT_FOLDER)
 
 # Build docker images
 
diff --git a/envpool/classic_control/pendulum.h b/envpool/classic_control/pendulum.h
index f2a594ad..85e91c1a 100644
--- a/envpool/classic_control/pendulum.h
+++ b/envpool/classic_control/pendulum.h
@@ -77,9 +77,8 @@ class PendulumEnv : public Env<PendulumEnvSpec> {
   void Step(const Action& action) override {
     done_ = (++elapsed_step_ >= max_episode_steps_);
     float act = action["action"_];
-    double u = act < -kMaxTorque  ? -kMaxTorque
-               : act > kMaxTorque ? kMaxTorque
-                                  : act;
+    double u =
+        act < -kMaxTorque ? -kMaxTorque : act > kMaxTorque ? kMaxTorque : act;
     double cost =
         theta_ * theta_ + 0.1 * theta_dot_ * theta_dot_ + 0.001 * u * u;
     double new_theta_dot =
@@ -87,9 +86,9 @@ class PendulumEnv : public Env<PendulumEnvSpec> {
     if (version_ == 0) {
       theta_ += new_theta_dot * kDt;
     }
-    theta_dot_ = new_theta_dot < -kMaxSpeed  ? -kMaxSpeed
-                 : new_theta_dot > kMaxSpeed ? kMaxSpeed
-                                             : new_theta_dot;
+    theta_dot_ = new_theta_dot < -kMaxSpeed
+                     ? -kMaxSpeed
+                     : new_theta_dot > kMaxSpeed ? kMaxSpeed : new_theta_dot;
     if (version_ == 1) {
       theta_ += new_theta_dot * kDt;
     }
diff --git a/envpool/core/env_spec.h b/envpool/core/env_spec.h
index f59e1fb2..c3cc7f69 100644
--- a/envpool/core/env_spec.h
+++ b/envpool/core/env_spec.h
@@ -52,8 +52,8 @@ class EnvSpec {
   using Config = decltype(ConcatDict(common_config, EnvFns::DefaultConfig()));
   using ConfigKeys = typename Config::Keys;
   using ConfigValues = typename Config::Values;
-  using StateSpec = decltype(ConcatDict(
-      common_state_spec, EnvFns::StateSpec(std::declval<Config>())));
+  using StateSpec = decltype(
+      ConcatDict(common_state_spec, EnvFns::StateSpec(std::declval<Config>())));
   using ActionSpec = decltype(ConcatDict(
       common_action_spec, EnvFns::ActionSpec(std::declval<Config>())));
   using StateKeys = typename StateSpec::Keys;
diff --git a/envpool/sokoban/BUILD b/envpool/sokoban/BUILD
index eac4b98b..d231954e 100644
--- a/envpool/sokoban/BUILD
+++ b/envpool/sokoban/BUILD
@@ -1,3 +1,17 @@
+# Copyright 2023-2024 FAR AI
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 load("@pip_requirements//:requirements.bzl", "requirement")
 load("@pybind11_bazel//:build_defs.bzl", "pybind_extension")
 
@@ -20,7 +34,10 @@ py_library(
 
 cc_library(
     name = "sokoban_envpool_h",
-    hdrs = ["sokoban_envpool.h", "level_loader.h"],
+    hdrs = [
+        "level_loader.h",
+        "sokoban_envpool.h",
+    ],
     deps = [
         "//envpool/core:async_envpool",
         "//envpool/core:env",
@@ -40,12 +57,12 @@ cc_library(
 
 py_test(
     name = "test",
-    main = "sokoban_py_envpool_test.py",
     srcs = ["sokoban_py_envpool_test.py"],
+    main = "sokoban_py_envpool_test.py",
     deps = [
-        ":sokoban",
         ":registration",
-        "//envpool:envpool",
+        ":sokoban",
+        "//envpool",
         requirement("numpy"),
         requirement("absl-py"),
     ],
@@ -54,8 +71,8 @@ py_test(
 pybind_extension(
     name = "sokoban_envpool",
     srcs = [
-        "sokoban_envpool.cc",
         "level_loader.cc",
+        "sokoban_envpool.cc",
     ],
     linkopts = [
         "-ldl",
diff --git a/envpool/sokoban/__init__.py b/envpool/sokoban/__init__.py
index 0e785494..e284b1ca 100644
--- a/envpool/sokoban/__init__.py
+++ b/envpool/sokoban/__init__.py
@@ -1,17 +1,31 @@
+# Copyright 2023-2024 FAR AI
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from envpool.python.api import py_env
 
 from .sokoban_envpool import _SokobanEnvPool, _SokobanEnvSpec
 
 (
-    SokobanEnvSpec,
-    SokobanDMEnvPool,
-    SokobanGymEnvPool,
-    SokobanGymnasiumEnvPool,
+  SokobanEnvSpec,
+  SokobanDMEnvPool,
+  SokobanGymEnvPool,
+  SokobanGymnasiumEnvPool,
 ) = py_env(_SokobanEnvSpec, _SokobanEnvPool)
 
 __all__ = [
-    "SokobanEnvSpec",
-    "SokobanDMEnvPool",
-    "SokobanGymEnvPool",
-    "SokobanGymnasiumEnvPool",
+  "SokobanEnvSpec",
+  "SokobanDMEnvPool",
+  "SokobanGymEnvPool",
+  "SokobanGymnasiumEnvPool",
 ]
diff --git a/envpool/sokoban/level_loader.cc b/envpool/sokoban/level_loader.cc
index 526fdc02..b252896c 100644
--- a/envpool/sokoban/level_loader.cc
+++ b/envpool/sokoban/level_loader.cc
@@ -1,3 +1,17 @@
+// Copyright 2023-2024 FAR AI
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 #include "level_loader.h"
 
 #include <algorithm>
@@ -7,19 +21,22 @@
 #include <iterator>
 #include <sstream>
 #include <stdexcept>
+#include <string>
 
 namespace sokoban {
 
-size_t ERROR_SZ = 1024;
-
 LevelLoader::LevelLoader(const std::filesystem::path& base_path, int verbose)
-    : levels(0), cur_level(levels.begin()), level_file_paths(0), verbose(verbose) {
+    : levels_(0),
+      cur_level_(levels_.begin()),
+      level_file_paths_(0),
+      verbose(verbose) {
   for (const auto& entry : std::filesystem::directory_iterator(base_path)) {
-    level_file_paths.push_back(entry.path());
+    level_file_paths_.push_back(entry.path());
   }
 }
 
-const std::string PRINT_LEVEL_KEY = "# .a@$s";
+static const std::array<char, kMaxLevelObject + 1> kPrintLevelKey{
+    '#', ' ', '.', 'a', '@', '$', 's'};
 
 void AddLine(SokobanLevel& level, const std::string& line) {
   auto start = line.at(0);
@@ -33,19 +50,19 @@ void AddLine(SokobanLevel& level, const std::string& line) {
   for (const char& r : line) {
     switch (r) {
       case '#':
-        level.push_back(WALL);
+        level.push_back(kWall);
         break;
       case '@':
-        level.push_back(PLAYER);
+        level.push_back(kPlayer);
         break;
       case '$':
-        level.push_back(BOX);
+        level.push_back(kBox);
         break;
       case '.':
-        level.push_back(TARGET);
+        level.push_back(kTarget);
         break;
       case ' ':
-        level.push_back(EMPTY);
+        level.push_back(kEmpty);
         break;
       default:
         std::stringstream msg;
@@ -57,12 +74,15 @@ void AddLine(SokobanLevel& level, const std::string& line) {
   }
 }
 
-void PrintLevel(std::ostream& os, SokobanLevel vec) {
+void PrintLevel(std::ostream& os, const SokobanLevel& vec) {
   size_t dim_room = 0;
-  for (; dim_room * dim_room != vec.size() && dim_room <= 100; dim_room++)
-    ;  // take sqrt(vec.size())
+  for (; dim_room * dim_room != vec.size() && dim_room <= 100; dim_room++) {
+  }  // take sqrt(vec.size())
+  if (dim_room == 0) {
+    throw std::runtime_error("dim_room cannot be zero.");
+  }
   for (size_t i = 0; i < vec.size(); i++) {
-    os << PRINT_LEVEL_KEY.at(vec.at(i));
+    os << kPrintLevelKey.at(vec.at(i));
     if ((i + 1) % dim_room == 0) {
       os << std::endl;
     }
@@ -71,20 +91,20 @@ void PrintLevel(std::ostream& os, SokobanLevel vec) {
 
 void LevelLoader::LoadNewFile(std::mt19937& gen) {
   std::uniform_int_distribution<size_t> load_file_idx_r(
-      0, level_file_paths.size() - 1);
+      0, level_file_paths_.size() - 1);
   const size_t load_file_idx = load_file_idx_r(gen);
-  const std::filesystem::path& file_path = level_file_paths.at(load_file_idx);
+  const std::filesystem::path& file_path = level_file_paths_.at(load_file_idx);
   std::ifstream file(file_path);
 
-  levels.clear();
+  levels_.clear();
   std::string line;
   while (std::getline(file, line)) {
-    if (line.size() == 0) {
+    if (line.empty()) {
       continue;
     }
 
     if (line.at(0) == '#') {
-      SokobanLevel& cur_level = levels.emplace_back(0);
+      SokobanLevel& cur_level = levels_.emplace_back(0);
       cur_level.reserve(10 * 10);  // In practice most levels are this size
 
       // Count contiguous '#' characters and use this as the box dimension
@@ -96,7 +116,7 @@ void LevelLoader::LoadNewFile(std::mt19937& gen) {
       }
       AddLine(cur_level, line);
 
-      while (std::getline(file, line) && line.size() > 0 && line.at(0) == '#') {
+      while (std::getline(file, line) && !line.empty() && line.at(0) == '#') {
         if (line.length() != dim_room) {
           std::stringstream msg;
           msg << "Irregular line '" << line
@@ -114,36 +134,36 @@ void LevelLoader::LoadNewFile(std::mt19937& gen) {
       }
     }
   }
-  std::shuffle(levels.begin(), levels.end(), gen);
-  if (levels.empty()) {
+  std::shuffle(levels_.begin(), levels_.end(), gen);
+  if (levels_.empty()) {
     std::stringstream msg;
     msg << "No levels loaded from file '" << file_path << std::endl;
     throw std::runtime_error(msg.str());
   }
 
-  if(verbose >= 1) {
-    std::cout << "Loaded " << levels.size() << " levels from " << file_path
+  if (verbose >= 1) {
+    std::cout << "Loaded " << levels_.size() << " levels from " << file_path
               << std::endl;
-    if(verbose >= 2) {
-      PrintLevel(std::cout, levels.at(0));
+    if (verbose >= 2) {
+      PrintLevel(std::cout, levels_.at(0));
       std::cout << std::endl;
-      PrintLevel(std::cout, levels.at(1));
+      PrintLevel(std::cout, levels_.at(1));
       std::cout << std::endl;
     }
   }
 }
 
-const std::vector<SokobanLevel>::iterator LevelLoader::RandomLevel(
+std::vector<SokobanLevel>::iterator LevelLoader::RandomLevel(
     std::mt19937& gen) {
-  if (cur_level == levels.end()) {
+  if (cur_level_ == levels_.end()) {
     LoadNewFile(gen);
-    cur_level = levels.begin();
-    if (cur_level == levels.end()) {
+    cur_level_ = levels_.begin();
+    if (cur_level_ == levels_.end()) {
       throw std::runtime_error("No levels loaded.");
     }
   }
-  auto out = cur_level;
-  cur_level++;
+  auto out = cur_level_;
+  cur_level_++;
   return out;
 }
 
diff --git a/envpool/sokoban/level_loader.h b/envpool/sokoban/level_loader.h
index 88326c84..f85a2a67 100644
--- a/envpool/sokoban/level_loader.h
+++ b/envpool/sokoban/level_loader.h
@@ -1,5 +1,21 @@
-#ifndef LEVEL_LOADER_H_
-#define LEVEL_LOADER_H_
+/*
+ * Copyright 2023-2024 FAR AI
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ENVPOOL_SOKOBAN_LEVEL_LOADER_H_
+#define ENVPOOL_SOKOBAN_LEVEL_LOADER_H_
 
 #include <filesystem>
 #include <random>
@@ -9,30 +25,30 @@ namespace sokoban {
 
 using SokobanLevel = std::vector<uint8_t>;
 
-constexpr uint8_t WALL = 0;
-constexpr uint8_t EMPTY = 1;
-constexpr uint8_t TARGET = 2;
-constexpr uint8_t BOX_ON_TARGET = 3;
-constexpr uint8_t BOX = 4;
-constexpr uint8_t PLAYER = 5;
-constexpr uint8_t PLAYER_ON_TARGET = 6;
+constexpr uint8_t kWall = 0;
+constexpr uint8_t kEmpty = 1;
+constexpr uint8_t kTarget = 2;
+constexpr uint8_t kBoxOnTarget = 3;
+constexpr uint8_t kBox = 4;
+constexpr uint8_t kPlayer = 5;
+constexpr uint8_t kPlayerOnTarget = 6;
+constexpr uint8_t kMaxLevelObject = kPlayerOnTarget;
 
 class LevelLoader {
  protected:
-  std::vector<SokobanLevel> levels;
-  std::vector<SokobanLevel>::iterator cur_level;
-  std::vector<std::filesystem::path> level_file_paths;
+  std::vector<SokobanLevel> levels_;
+  std::vector<SokobanLevel>::iterator cur_level_;
+  std::vector<std::filesystem::path> level_file_paths_;
   void LoadNewFile(std::mt19937& gen);
 
  public:
   int verbose;
 
-  const std::vector<SokobanLevel>::iterator RandomLevel(std::mt19937& gen);
-  LevelLoader(const std::filesystem::path& base_path, int verbose=0);
+  std::vector<SokobanLevel>::iterator RandomLevel(std::mt19937& gen);
+  explicit LevelLoader(const std::filesystem::path& base_path, int verbose = 0);
 };
 
-
-void PrintLevel(std::ostream& os, SokobanLevel vec);
+void PrintLevel(std::ostream& os, const SokobanLevel& vec);
 }  // namespace sokoban
 
-#endif  // LEVEL_LOADER_H_
+#endif  // ENVPOOL_SOKOBAN_LEVEL_LOADER_H_
diff --git a/envpool/sokoban/registration.py b/envpool/sokoban/registration.py
index 490b1a34..e79dc31c 100644
--- a/envpool/sokoban/registration.py
+++ b/envpool/sokoban/registration.py
@@ -1,12 +1,26 @@
+# Copyright 2023-2024 FAR AI
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 from envpool.registration import register
 
 register(
-    task_id="Sokoban-v0",
-    import_path="envpool.sokoban",
-    spec_cls="SokobanEnvSpec",
-    dm_cls="SokobanDMEnvPool",
-    gym_cls="SokobanGymEnvPool",
-    gymnasium_cls="SokobanGymnasiumEnvPool",
-    max_episode_steps=60,
-    reward_step=-0.1,
+  task_id="Sokoban-v0",
+  import_path="envpool.sokoban",
+  spec_cls="SokobanEnvSpec",
+  dm_cls="SokobanDMEnvPool",
+  gym_cls="SokobanGymEnvPool",
+  gymnasium_cls="SokobanGymnasiumEnvPool",
+  max_episode_steps=60,
+  reward_step=-0.1,
 )
diff --git a/envpool/sokoban/sokoban_envpool.cc b/envpool/sokoban/sokoban_envpool.cc
index dff613c7..a29b87da 100644
--- a/envpool/sokoban/sokoban_envpool.cc
+++ b/envpool/sokoban/sokoban_envpool.cc
@@ -1,8 +1,23 @@
+// Copyright 2023-2024 FAR AI
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 #include "envpool/sokoban/sokoban_envpool.h"
 
 #include <array>
 #include <sstream>
 #include <stdexcept>
+#include <vector>
 
 #include "envpool/core/py_envpool.h"
 
@@ -11,26 +26,27 @@ namespace sokoban {
 void SokobanEnv::Reset() {
   const int max_episode_steps = spec_.config["max_episode_steps"_];
   const int min_episode_steps = spec_.config["min_episode_steps"_];
-  std::uniform_int_distribution<int> episode_length_rand(min_episode_steps, max_episode_steps);
+  std::uniform_int_distribution<int> episode_length_rand(min_episode_steps,
+                                                         max_episode_steps);
   current_max_episode_steps_ = episode_length_rand(gen_);
 
-  world = *level_loader.RandomLevel(gen_);
-  if (world.size() != dim_room * dim_room) {
+  world_ = *(level_loader_.RandomLevel(gen_));
+  if (world_.size() != dim_room_ * dim_room_) {
     std::stringstream msg;
-    msg << "Loaded level is not dim_room x dim_room. world.size()="
-        << world.size() << ", dim_room=" << dim_room << std::endl;
+    msg << "Loaded level is not dim_room x dim_room. world_.size()="
+        << world_.size() << ", dim_room_=" << dim_room_ << std::endl;
     throw std::runtime_error(msg.str());
   }
-  unmatched_boxes = 0;
-  for (int x = 0; x < dim_room; x++) {
-    for (int y = 0; y < dim_room; y++) {
+  unmatched_boxes_ = 0;
+  for (int x = 0; x < dim_room_; x++) {
+    for (int y = 0; y < dim_room_; y++) {
       switch (WorldAt(x, y)) {
-        case PLAYER:
-          player_x = x;
-          player_y = y;
+        case kPlayer:
+          player_x_ = x;
+          player_y_ = y;
           break;
-        case BOX:
-          unmatched_boxes++;
+        case kBox:
+          unmatched_boxes_++;
           break;
       }
     }
@@ -39,153 +55,128 @@ void SokobanEnv::Reset() {
   WriteState(0.0f);
 }
 
-uint8_t SokobanEnv::WorldAt(int x, int y) {
-  if ((x < 0) || (x >= dim_room) || (y < 0) || (y >= dim_room)) {
-    return WALL;
+[[nodiscard]] uint8_t SokobanEnv::WorldAt(int x, int y) const {
+  if ((x < 0) || (x >= dim_room_) || (y < 0) || (y >= dim_room_)) {
+    return kWall;
   }
-  return world.at(x + y * dim_room);
+  return world_.at(x + y * dim_room_);
 }
 void SokobanEnv::WorldAssignAt(int x, int y, uint8_t value) {
-  if ((x < 0) || (x >= dim_room) || (y < 0) || (y >= dim_room)) {
+  if ((x < 0) || (x >= dim_room_) || (y < 0) || (y >= dim_room_)) {
     return;
   }
-  world.at(x + y * dim_room) = value;
+  world_.at(x + y * dim_room_) = value;
 }
 
-constexpr std::array<std::array<int, 2>, 4> CHANGE_COORDINATES = {
+constexpr std::array<std::array<int, 2>, 4> kChangeCoordinates = {
     {{0, -1}, {0, 1}, {-1, 0}, {1, 0}}};
 
-constexpr std::array<const char *, MAX_ACTION+1> action_names = {
-  "ACT_NOOP",
-  "ACT_PUSH_UP",
-  "ACT_PUSH_DOWN",
-  "ACT_PUSH_LEFT",
-  "ACT_PUSH_RIGHT",
-  "ACT_MOVE_UP",
-  "ACT_MOVE_DOWN",
-  "ACT_MOVE_LEFT",
-  "ACT_MOVE_RIGHT",
-};
-
-
-constexpr std::array<const char *, 7> arena_names = {
-  "WALL",
-  "EMPTY",
-  "TARGET",
-  "BOX_ON_TARGET",
-  "BOX",
-  "PLAYER",
-  "PLAYER_ON_TARGET",
-};
-
-
-void SokobanEnv::Step(const Action& action_) {
+void SokobanEnv::Step(const Action& action_dict) {
   current_step_++;
 
-  const int action = action_["action"_];
-  if (action == ACT_NOOP) {
-    WriteState(static_cast<float>(reward_step));
+  const int action = action_dict["action"_];
+  if (action == kActNoop) {
+    WriteState(static_cast<float>(reward_step_));
     return;
   }
   // From here on, assume the agent will try to move
 
-  const int change_coordinates_idx = (action - 1) % CHANGE_COORDINATES.size();
-  const int delta_x = CHANGE_COORDINATES.at(change_coordinates_idx).at(0);
-  const int delta_y = CHANGE_COORDINATES.at(change_coordinates_idx).at(1);
+  const int change_coordinates_idx = (action - 1) % kChangeCoordinates.size();
+  const int delta_x = kChangeCoordinates.at(change_coordinates_idx).at(0);
+  const int delta_y = kChangeCoordinates.at(change_coordinates_idx).at(1);
 
-  const int prev_unmatched_boxes = unmatched_boxes;
+  const int prev_unmatched_boxes = unmatched_boxes_;
 
   // Arena: the things that will change if the agent moves
   std::array<uint8_t, 3> arena;
   for (size_t i = 0; i < arena.size(); i++) {
-    arena.at(i) = WorldAt(player_x + delta_x * i, player_y + delta_y * i);
+    arena.at(i) = WorldAt(player_x_ + delta_x * i, player_y_ + delta_y * i);
   }
 
   // The box will move IFF action is a pushing action AND there's a box AND it
   // has space to move
   const bool box_moves =
-      ((action <= ACT_PUSH_RIGHT) &&
-       ((arena.at(1) == BOX) || (arena.at(1) == BOX_ON_TARGET)) &&
-       ((arena.at(2) == EMPTY) || (arena.at(2) == TARGET)));
+      ((action <= kActPushRight) &&
+       ((arena.at(1) == kBox) || (arena.at(1) == kBoxOnTarget)) &&
+       ((arena.at(2) == kEmpty) || (arena.at(2) == kTarget)));
 
   // The agent will move if the next arena location is possible to move into, or
   // if it's a box and the box moves
   const bool is_a_box_and_the_box_moves = box_moves;
-  const bool agent_moves = (arena.at(1) == EMPTY) || (arena.at(1) == TARGET) ||
+  const bool agent_moves = (arena.at(1) == kEmpty) ||
+                           (arena.at(1) == kTarget) ||
                            is_a_box_and_the_box_moves;
 
   if (agent_moves) {
-    // `is_target` is boolean but we'll need it as an int later
-    std::array<int, arena.size()> is_target;
+    std::array<bool, arena.size()> is_target;
     for (size_t i = 0; i < arena.size(); i++) {
       uint8_t tile = arena.at(i);
-      // We explicitly set them to 0 or 1 because false/true are not guaranteed
-      // to be 0/1.
       is_target.at(i) =
-          ((tile == BOX_ON_TARGET || tile == TARGET || tile == PLAYER_ON_TARGET)
-               ? 1
-               : 0);
+          (tile == kBoxOnTarget || tile == kTarget || tile == kPlayerOnTarget);
     }
     // only whatever was on the floor is now at position 0
-    arena.at(0) = is_target.at(0) ? TARGET : EMPTY;
+    arena.at(0) = is_target.at(0) ? kTarget : kEmpty;
     // the player now occupies position 1
-    arena.at(1) = is_target.at(1) ? PLAYER_ON_TARGET : PLAYER;
+    arena.at(1) = is_target.at(1) ? kPlayerOnTarget : kPlayer;
 
     if (box_moves) {
       // the box moves for sure. A target at 2 reduces the nubmer of unmatched
       // boxes (because the box goes there), a target at 1 increases it (the box
       // leaves from there). Both can be equal to 1 and in that case the number
       // stays the same.
-      unmatched_boxes += is_target.at(1) - is_target.at(2);
+      //
+      // Implicit conversion from bool to int is always 0/1.
+      // https://en.cppreference.com/w/cpp/language/implicit_conversion
+      unmatched_boxes_ +=
+          static_cast<int>(is_target.at(1)) - static_cast<int>(is_target.at(2));
 
       // A box now occupies position 2
-      arena.at(2) = is_target.at(2) ? BOX_ON_TARGET : BOX;
+      arena.at(2) = is_target.at(2) ? kBoxOnTarget : kBox;
     }
 
     for (size_t i = 0; i < arena.size(); i++) {
-      WorldAssignAt(player_x + delta_x * i, player_y + delta_y * i,
+      WorldAssignAt(player_x_ + delta_x * i, player_y_ + delta_y * i,
                     arena.at(i));
     }
     // After assigning the arena, move player.
-    player_x += delta_x;
-    player_y += delta_y;
+    player_x_ += delta_x;
+    player_y_ += delta_y;
   }
 
-  const double reward =
-      reward_step +
-      reward_box * static_cast<double>(prev_unmatched_boxes - unmatched_boxes) +
-      ((unmatched_boxes == 0) ? reward_finished : 0.0f);
+  const double reward = reward_step_ +
+                        reward_box_ * static_cast<double>(prev_unmatched_boxes -
+                                                          unmatched_boxes_) +
+                        ((unmatched_boxes_ == 0) ? reward_finished_ : 0.0f);
   WriteState(static_cast<float>(reward));
 }
 
-constexpr std::array<std::array<uint8_t, 3>, PLAYER_ON_TARGET + 1> TINY_COLORS =
-    {{
-        {0, 0, 0},        // WALL
-        {243, 248, 238},  // EMPTY
-        {254, 126, 125},  // TARGET
-        {254, 95, 56},    // BOX_ON_TARGET
-        {142, 121, 56},   // BOX
-        {160, 212, 56},   // PLAYER
-        {219, 212, 56}    // PLAYER_ON_TARGET
-    }};
+constexpr std::array<std::array<uint8_t, 3>, kPlayerOnTarget + 1> kTinyColors{{
+    {0, 0, 0},        // WALL
+    {243, 248, 238},  // EMPTY
+    {254, 126, 125},  // TARGET
+    {254, 95, 56},    // BOX_ON_TARGET
+    {142, 121, 56},   // BOX
+    {160, 212, 56},   // PLAYER
+    {219, 212, 56}    // PLAYER_ON_TARGET
+}};
 
 void SokobanEnv::WriteState(float reward) {
   auto state = Allocate();
   state["reward"_] = reward;
   Array& obs = state["obs"_];
-  if (obs.size != 3 * world.size()) {
+  if (obs.size != 3 * world_.size()) {
     std::stringstream msg;
     msg << "Obs size and level size are different: obs_size=" << obs.size
-        << "/3, level_size=" << world.size() << ", dim_room=" << dim_room
+        << "/3, level_size=" << world_.size() << ", dim_room=" << dim_room_
         << std::endl;
     throw std::runtime_error(msg.str());
   }
 
-  std::vector<uint8_t> out(3 * world.size());
+  std::vector<uint8_t> out(3 * world_.size());
   for (int rgb = 0; rgb < 3; rgb++) {
-    for (size_t i = 0; i < world.size(); i++) {
-      out.at(rgb * (dim_room * dim_room) + i) =
-          TINY_COLORS.at(world.at(i)).at(rgb);
+    for (size_t i = 0; i < world_.size(); i++) {
+      out.at(rgb * (dim_room_ * dim_room_) + i) =
+          kTinyColors.at(world_.at(i)).at(rgb);
     }
   }
   obs.Assign(out.data(), out.size());
diff --git a/envpool/sokoban/sokoban_envpool.h b/envpool/sokoban/sokoban_envpool.h
index 40ff47c1..c788040f 100644
--- a/envpool/sokoban/sokoban_envpool.h
+++ b/envpool/sokoban/sokoban_envpool.h
@@ -1,9 +1,26 @@
-#ifndef ENVPOOL_SOKOBAN_H_
-#define ENVPOOL_SOKOBAN_H_
+/*
+ * Copyright 2023-2024 FAR AI
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ENVPOOL_SOKOBAN_SOKOBAN_ENVPOOL_H_
+#define ENVPOOL_SOKOBAN_SOKOBAN_ENVPOOL_H_
 
 #include <filesystem>
 #include <sstream>
 #include <stdexcept>
+#include <string>
 
 #include "envpool/core/array.h"
 #include "envpool/core/async_envpool.h"
@@ -12,16 +29,16 @@
 
 namespace sokoban {
 
-constexpr int ACT_NOOP = 0;
-constexpr int ACT_PUSH_UP = 1;
-constexpr int ACT_PUSH_DOWN = 2;
-constexpr int ACT_PUSH_LEFT = 3;
-constexpr int ACT_PUSH_RIGHT = 4;
-constexpr int ACT_MOVE_UP = 5;
-constexpr int ACT_MOVE_DOWN = 6;
-constexpr int ACT_MOVE_LEFT = 7;
-constexpr int ACT_MOVE_RIGHT = 8;
-constexpr int MAX_ACTION = ACT_MOVE_RIGHT;
+constexpr int kActNoop = 0;
+constexpr int kActPushUp = 1;
+constexpr int kActPushDown = 2;
+constexpr int kActPushLeft = 3;
+constexpr int kActPushRight = 4;
+constexpr int kActMoveUp = 5;
+constexpr int kActMoveDown = 6;
+constexpr int kActMoveLeft = 7;
+constexpr int kActMoveRight = 8;
+constexpr int kMaxAction = kActMoveRight;
 
 class SokobanEnvFns {
  public:
@@ -38,7 +55,7 @@ class SokobanEnvFns {
   }
   template <typename Config>
   static decltype(auto) ActionSpec(const Config& conf) {
-    return MakeDict("action"_.Bind(Spec<int>({-1}, {0, MAX_ACTION})));
+    return MakeDict("action"_.Bind(Spec<int>({-1}, {0, kMaxAction})));
   }
 };
 
@@ -49,15 +66,16 @@ class SokobanEnv : public Env<SokobanEnvSpec> {
  public:
   SokobanEnv(const Spec& spec, int env_id)
       : Env<SokobanEnvSpec>(spec, env_id),
-        dim_room{static_cast<int>(spec.config["dim_room"_])},
-        reward_finished{static_cast<double>(spec.config["reward_finished"_])},
-        reward_box{static_cast<double>(spec.config["reward_box"_])},
-        reward_step{static_cast<double>(spec.config["reward_step"_])},
-        levels_dir{static_cast<std::string>(spec.config["levels_dir"_])},
-        level_loader(levels_dir),
-        world(WALL, static_cast<std::size_t>(dim_room * dim_room)),
-        verbose(static_cast<int>(spec.config["verbose"_])),
-        current_max_episode_steps_(static_cast<int>(spec.config["max_episode_steps"_])) {
+        dim_room_{static_cast<int>(spec.config["dim_room"_])},
+        reward_finished_{static_cast<double>(spec.config["reward_finished"_])},
+        reward_box_{static_cast<double>(spec.config["reward_box"_])},
+        reward_step_{static_cast<double>(spec.config["reward_step"_])},
+        levels_dir_{static_cast<std::string>(spec.config["levels_dir"_])},
+        level_loader_(levels_dir_),
+        world_(kWall, static_cast<std::size_t>(dim_room_ * dim_room_)),
+        verbose_(static_cast<int>(spec.config["verbose"_])),
+        current_max_episode_steps_(
+            static_cast<int>(spec.config["max_episode_steps"_])) {
     if (max_num_players_ != spec_.config["max_num_players"_]) {
       std::stringstream msg;
       msg << "max_num_players_ != spec_['max_num_players'] " << max_num_players_
@@ -74,31 +92,33 @@ class SokobanEnv : public Env<SokobanEnvSpec> {
   }
 
   bool IsDone() override {
-    return (unmatched_boxes == 0) || (current_step_ >= current_max_episode_steps_); }
+    return (unmatched_boxes_ == 0) ||
+           (current_step_ >= current_max_episode_steps_);
+  }
   void Reset() override;
-  void Step(const Action& action) override;
+  void Step(const Action& action_dict) override;
 
   void WriteState(float reward);
 
  private:
-  int dim_room;
-  double reward_finished, reward_box, reward_step;
-  std::filesystem::path levels_dir;
+  int dim_room_;
+  double reward_finished_, reward_box_, reward_step_;
+  std::filesystem::path levels_dir_;
 
-  LevelLoader level_loader;
-  SokobanLevel world;
-  int verbose;
+  LevelLoader level_loader_;
+  SokobanLevel world_;
+  int verbose_;
 
   int current_max_episode_steps_;
   int current_step_{0};
-  int player_x{0}, player_y{0};
-  int unmatched_boxes{0};
+  int player_x_{0}, player_y_{0};
+  int unmatched_boxes_{0};
 
-  uint8_t WorldAt(int x, int y);
+  [[nodiscard]] uint8_t WorldAt(int x, int y) const;
   void WorldAssignAt(int x, int y, uint8_t value);
 };
 
 using SokobanEnvPool = AsyncEnvPool<SokobanEnv>;
 }  // namespace sokoban
 
-#endif  // ENVPOOL_SOKOBAN_H_
+#endif  // ENVPOOL_SOKOBAN_SOKOBAN_ENVPOOL_H_
diff --git a/envpool/sokoban/sokoban_py_envpool_test.py b/envpool/sokoban/sokoban_py_envpool_test.py
index c0f3973f..bed17588 100644
--- a/envpool/sokoban/sokoban_py_envpool_test.py
+++ b/envpool/sokoban/sokoban_py_envpool_test.py
@@ -1,102 +1,118 @@
+# Copyright 2023-2024 FAR AI
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 """Unit test for dummy envpool and speed benchmark."""
 
 import time
 
-import envpool  # noqa: F401
-import envpool.sokoban.registration
 import numpy as np
 from absl import logging
 from absl.testing import absltest
+
+import envpool  # noqa: F401
+import envpool.sokoban.registration
 from envpool.sokoban.sokoban_envpool import _SokobanEnvSpec
 
 
 class _SokobanEnvPoolTest(absltest.TestCase):
-    def test_config(self) -> None:
-        ref_config_keys = [
-            # Default environment keys
-            "base_path",
-            "batch_size",
-            "gym_reset_return_info",
-            "max_num_players",
-            "num_envs",
-            "num_threads",
-            "seed",
-            "thread_affinity_offset",
-            "min_episode_steps",
-            # Default and also used by sokoban
-            "max_episode_steps",
-            # defined by sokoban
-            "dim_room",
-            "levels_dir",
-            "reward_box",
-            "reward_finished",
-            "reward_step",
-            "verbose",
-        ]
-        default_conf = _SokobanEnvSpec._default_config_values
-        self.assertTrue(isinstance(default_conf, tuple))
-        config_keys = _SokobanEnvSpec._config_keys
-        self.assertTrue(isinstance(config_keys, list))
-        self.assertEqual(len(default_conf), len(config_keys))
-        self.assertEqual(sorted(config_keys), sorted(ref_config_keys))
 
-    def test_envpool(self) -> None:
-        batch = num_envs = 200
-        env = envpool.make(
-            "Sokoban-v0",
-            env_type="gymnasium",
-            num_envs=num_envs,
-            batch_size=num_envs,
-            seed=2346890,
-            max_episode_steps=60,
-            reward_step=-0.1,
-            dim_room=10,
-            levels_dir="/app/envpool/sokoban/sample_levels",
-        )
-        total_steps = 1000
+  def test_config(self) -> None:
+    ref_config_keys = [
+      # Default environment keys
+      "base_path",
+      "batch_size",
+      "gym_reset_return_info",
+      "max_num_players",
+      "num_envs",
+      "num_threads",
+      "seed",
+      "thread_affinity_offset",
+      "min_episode_steps",
+      # Default and also used by sokoban
+      "max_episode_steps",
+      # defined by sokoban
+      "dim_room",
+      "levels_dir",
+      "reward_box",
+      "reward_finished",
+      "reward_step",
+      "verbose",
+    ]
+    default_conf = _SokobanEnvSpec._default_config_values
+    self.assertTrue(isinstance(default_conf, tuple))
+    config_keys = _SokobanEnvSpec._config_keys
+    self.assertTrue(isinstance(config_keys, list))
+    self.assertEqual(len(default_conf), len(config_keys))
+    self.assertEqual(sorted(config_keys), sorted(ref_config_keys))
+
+  def test_envpool(self) -> None:
+    batch = num_envs = 200
+    env = envpool.make(
+      "Sokoban-v0",
+      env_type="gymnasium",
+      num_envs=num_envs,
+      batch_size=num_envs,
+      seed=2346890,
+      max_episode_steps=60,
+      reward_step=-0.1,
+      dim_room=10,
+      levels_dir="/app/envpool/sokoban/sample_levels",
+    )
+    total_steps = 1000
 
-        _ = env.reset()
-        t = time.time()
-        for _ in range(total_steps):
-            _ = env.step(np.random.randint(low=0, high=9, size=(num_envs,)))
-        duration = time.time() - t
-        fps = total_steps * batch / duration
-        logging.info(f"FPS = {fps:.6f}")
+    _ = env.reset()
+    t = time.time()
+    for _ in range(total_steps):
+      _ = env.step(np.random.randint(low=0, high=9, size=(num_envs,)))
+    duration = time.time() - t
+    fps = total_steps * batch / duration
+    logging.info(f"FPS = {fps:.6f}")
 
-    def test_envpool_max_episode_steps(self) -> None:
-        for max_episode_steps in [2, 5, 10]:
-            env = envpool.make(
-                "Sokoban-v0",
-                env_type="gymnasium",
-                num_envs=1,
-                batch_size=1,
-                max_episode_steps=max_episode_steps,
-                levels_dir="/app/envpool/sokoban/sample_levels",
-            )
-            env.reset()
-            for _ in range(max_episode_steps - 1):
-                _, _, terminated, truncated, _ = env.step(np.zeros([1], dtype=np.int32))
-                assert not np.any(terminated | truncated)
+  def test_envpool_max_episode_steps(self) -> None:
+    for max_episode_steps in [2, 5, 10]:
+      env = envpool.make(
+        "Sokoban-v0",
+        env_type="gymnasium",
+        num_envs=1,
+        batch_size=1,
+        min_episode_steps=max_episode_steps,
+        max_episode_steps=max_episode_steps,
+        levels_dir="/app/envpool/sokoban/sample_levels",
+      )
+      env.reset()
+      for _ in range(max_episode_steps - 1):
+        _, _, terminated, truncated, _ = env.step(np.zeros([1], dtype=np.int32))
+        assert not np.any(terminated | truncated)
 
-            _, _, terminated, truncated, _ = env.step(np.zeros([1], dtype=np.int32))
-            assert not np.any(terminated)
-            assert np.all(truncated)
+      _, _, terminated, truncated, _ = env.step(np.zeros([1], dtype=np.int32))
+      assert not np.any(terminated)
+      assert np.all(truncated)
 
-    def test_xla(self) -> None:
-        num_envs = 10
-        env = envpool.make(
-            "Sokoban-v0",
-            env_type="dm",
-            num_envs=num_envs,
-            batch_size=num_envs,
-            seed=2346890,
-            max_episode_steps=60,
-            reward_step=-0.1,
-            dim_room=10,
-            levels_dir="/app/envpool/sokoban/sample_levels",
-        )
-        handle, recv, send, step = env.xla()
+  def test_xla(self) -> None:
+    num_envs = 10
+    env = envpool.make(
+      "Sokoban-v0",
+      env_type="dm",
+      num_envs=num_envs,
+      batch_size=num_envs,
+      seed=2346890,
+      max_episode_steps=60,
+      reward_step=-0.1,
+      dim_room=10,
+      levels_dir="/app/envpool/sokoban/sample_levels",
+    )
+    handle, recv, send, step = env.xla()
 
 
 if __name__ == "__main__":
-    absltest.main()
+  absltest.main()
diff --git a/envpool/workspace0.bzl b/envpool/workspace0.bzl
index 2618ca92..98c44cde 100644
--- a/envpool/workspace0.bzl
+++ b/envpool/workspace0.bzl
@@ -20,6 +20,7 @@ load("//third_party/cuda:cuda.bzl", "cuda_configure")
 
 def workspace():
     """Load requested packages."""
+
     # we cannot upgrade rules_python because it requires requirements_lock.txt after 0.13.0
     maybe(
         http_archive,
diff --git a/third_party/pip_requirements/requirements-devtools.txt b/third_party/pip_requirements/requirements-devtools.txt
new file mode 100644
index 00000000..f7f556e5
--- /dev/null
+++ b/third_party/pip_requirements/requirements-devtools.txt
@@ -0,0 +1,5 @@
+flake8==7.0.0
+flake8-bugbear==24.2.6
+isort==5.13.2
+yapf==0.40.2
+cpplint==1.6.1
diff --git a/third_party/pip_requirements/requirements-sokoban.txt b/third_party/pip_requirements/requirements-sokoban.txt
new file mode 120000
index 00000000..6829e68e
--- /dev/null
+++ b/third_party/pip_requirements/requirements-sokoban.txt
@@ -0,0 +1 @@
+requirements-release.txt
\ No newline at end of file

From 89fff4c25fd3c6c806f39138770d13e449f39db1 Mon Sep 17 00:00:00 2001
From: Mohammad Taufeeque <9taufeeque9@gmail.com>
Date: Fri, 8 Mar 2024 05:42:49 +0530
Subject: [PATCH 31/60] Add option to load a fixed number of levels
 sequentially (#1)

* add option to load sokoban sequentially

* fix issues with original tests

* add test case

* fix seg fault

* add safe_uniform_int function

* use pytest and fix error

* fix BUILD

* Add pytest to requirements-dev.txt

* fix lint

* fix link and docker image

* add pytest to release and fix lint

* fix lint

* revert image

* fix clang-tidy lint
---
 envpool/sokoban/BUILD                         | 41 ++++++-----
 envpool/sokoban/level_loader.cc               | 47 ++++++++----
 envpool/sokoban/level_loader.h                | 12 ++-
 envpool/sokoban/registration.py               |  1 +
 envpool/sokoban/sample_levels/001.txt         | 35 +++++++++
 envpool/sokoban/sokoban_envpool.cc            |  8 +-
 envpool/sokoban/sokoban_envpool.h             |  8 +-
 envpool/sokoban/sokoban_py_envpool_test.py    | 73 ++++++++++++++++---
 envpool/sokoban/utils.h                       | 39 ++++++++++
 .../pip_requirements/requirements-dev.txt     |  1 +
 .../pip_requirements/requirements-release.txt |  1 +
 11 files changed, 215 insertions(+), 51 deletions(-)
 create mode 100644 envpool/sokoban/sample_levels/001.txt
 create mode 100644 envpool/sokoban/utils.h

diff --git a/envpool/sokoban/BUILD b/envpool/sokoban/BUILD
index d231954e..ed684f96 100644
--- a/envpool/sokoban/BUILD
+++ b/envpool/sokoban/BUILD
@@ -15,30 +15,31 @@
 load("@pip_requirements//:requirements.bzl", "requirement")
 load("@pybind11_bazel//:build_defs.bzl", "pybind_extension")
 
-package(default_visibility = ["//visibility:public"])
+package(default_visibility=["//visibility:public"])
 
 py_library(
-    name = "sokoban",
-    srcs = ["__init__.py"],
-    data = [":sokoban_envpool.so"],
-    deps = ["//envpool/python:api"],
+    name="sokoban",
+    srcs=["__init__.py"],
+    data=[":sokoban_envpool.so"],
+    deps=["//envpool/python:api"],
 )
 
 py_library(
-    name = "registration",
-    srcs = ["registration.py"],
-    deps = [
+    name="registration",
+    srcs=["registration.py"],
+    deps=[
         "//envpool:registration",
     ],
 )
 
 cc_library(
-    name = "sokoban_envpool_h",
-    hdrs = [
+    name="sokoban_envpool_h",
+    hdrs=[
         "level_loader.h",
         "sokoban_envpool.h",
+        "utils.h",
     ],
-    deps = [
+    deps=[
         "//envpool/core:async_envpool",
         "//envpool/core:env",
         "//envpool/core:env_spec",
@@ -56,28 +57,28 @@ cc_library(
 # )
 
 py_test(
-    name = "test",
-    srcs = ["sokoban_py_envpool_test.py"],
-    main = "sokoban_py_envpool_test.py",
-    deps = [
+    name="test",
+    srcs=["sokoban_py_envpool_test.py"],
+    main="sokoban_py_envpool_test.py",
+    deps=[
         ":registration",
         ":sokoban",
         "//envpool",
         requirement("numpy"),
-        requirement("absl-py"),
+        requirement("pytest"),
     ],
 )
 
 pybind_extension(
-    name = "sokoban_envpool",
-    srcs = [
+    name="sokoban_envpool",
+    srcs=[
         "level_loader.cc",
         "sokoban_envpool.cc",
     ],
-    linkopts = [
+    linkopts=[
         "-ldl",
     ],
-    deps = [
+    deps=[
         ":sokoban_envpool_h",
         "//envpool/core:py_envpool",
     ],
diff --git a/envpool/sokoban/level_loader.cc b/envpool/sokoban/level_loader.cc
index b252896c..314f4175 100644
--- a/envpool/sokoban/level_loader.cc
+++ b/envpool/sokoban/level_loader.cc
@@ -23,20 +23,28 @@
 #include <stdexcept>
 #include <string>
 
+#include "envpool/sokoban/utils.h"
+
 namespace sokoban {
 
-LevelLoader::LevelLoader(const std::filesystem::path& base_path, int verbose)
-    : levels_(0),
+LevelLoader::LevelLoader(const std::filesystem::path& base_path,
+                         bool load_sequentially, int n_levels_to_load,
+                         int verbose)
+    : load_sequentially_(load_sequentially),
+      n_levels_to_load_(n_levels_to_load),
+      levels_loaded_(0),
+      levels_(0),
       cur_level_(levels_.begin()),
       level_file_paths_(0),
       verbose(verbose) {
   for (const auto& entry : std::filesystem::directory_iterator(base_path)) {
     level_file_paths_.push_back(entry.path());
   }
+  cur_file_ = level_file_paths_.begin();
 }
 
 static const std::array<char, kMaxLevelObject + 1> kPrintLevelKey{
-    '#', ' ', '.', 'a', '@', '$', 's'};
+    '#', ' ', '.', 'a', '$', '@', 's'};
 
 void AddLine(SokobanLevel& level, const std::string& line) {
   auto start = line.at(0);
@@ -89,11 +97,19 @@ void PrintLevel(std::ostream& os, const SokobanLevel& vec) {
   }
 }
 
-void LevelLoader::LoadNewFile(std::mt19937& gen) {
-  std::uniform_int_distribution<size_t> load_file_idx_r(
-      0, level_file_paths_.size() - 1);
-  const size_t load_file_idx = load_file_idx_r(gen);
-  const std::filesystem::path& file_path = level_file_paths_.at(load_file_idx);
+void LevelLoader::LoadFile(std::mt19937& gen) {
+  std::filesystem::path file_path;
+  if (load_sequentially_) {
+    if (cur_file_ == level_file_paths_.end()) {
+      throw std::runtime_error("No more files to load.");
+    }
+    file_path = *cur_file_;
+    cur_file_++;
+  } else {
+    const size_t load_file_idx = SafeUniformInt(
+        static_cast<size_t>(0), level_file_paths_.size() - 1, gen);
+    file_path = level_file_paths_.at(load_file_idx);
+  }
   std::ifstream file(file_path);
 
   levels_.clear();
@@ -134,7 +150,9 @@ void LevelLoader::LoadNewFile(std::mt19937& gen) {
       }
     }
   }
-  std::shuffle(levels_.begin(), levels_.end(), gen);
+  if (!load_sequentially_) {
+    std::shuffle(levels_.begin(), levels_.end(), gen);
+  }
   if (levels_.empty()) {
     std::stringstream msg;
     msg << "No levels loaded from file '" << file_path << std::endl;
@@ -142,7 +160,7 @@ void LevelLoader::LoadNewFile(std::mt19937& gen) {
   }
 
   if (verbose >= 1) {
-    std::cout << "Loaded " << levels_.size() << " levels from " << file_path
+    std::cout << "***Loaded " << levels_.size() << " levels from " << file_path
               << std::endl;
     if (verbose >= 2) {
       PrintLevel(std::cout, levels_.at(0));
@@ -153,10 +171,12 @@ void LevelLoader::LoadNewFile(std::mt19937& gen) {
   }
 }
 
-std::vector<SokobanLevel>::iterator LevelLoader::RandomLevel(
-    std::mt19937& gen) {
+std::vector<SokobanLevel>::iterator LevelLoader::GetLevel(std::mt19937& gen) {
+  if (n_levels_to_load_ > 0 && levels_loaded_ >= n_levels_to_load_) {
+    throw std::runtime_error("Loaded all requested levels.");
+  }
   if (cur_level_ == levels_.end()) {
-    LoadNewFile(gen);
+    LoadFile(gen);
     cur_level_ = levels_.begin();
     if (cur_level_ == levels_.end()) {
       throw std::runtime_error("No levels loaded.");
@@ -164,6 +184,7 @@ std::vector<SokobanLevel>::iterator LevelLoader::RandomLevel(
   }
   auto out = cur_level_;
   cur_level_++;
+  levels_loaded_++;
   return out;
 }
 
diff --git a/envpool/sokoban/level_loader.h b/envpool/sokoban/level_loader.h
index f85a2a67..9879d8df 100644
--- a/envpool/sokoban/level_loader.h
+++ b/envpool/sokoban/level_loader.h
@@ -36,16 +36,22 @@ constexpr uint8_t kMaxLevelObject = kPlayerOnTarget;
 
 class LevelLoader {
  protected:
+  bool load_sequentially_;
+  int n_levels_to_load_;
+  int levels_loaded_;
   std::vector<SokobanLevel> levels_;
   std::vector<SokobanLevel>::iterator cur_level_;
   std::vector<std::filesystem::path> level_file_paths_;
-  void LoadNewFile(std::mt19937& gen);
+  std::vector<std::filesystem::path>::iterator cur_file_;
+  void LoadFile(std::mt19937& gen);
 
  public:
   int verbose;
 
-  std::vector<SokobanLevel>::iterator RandomLevel(std::mt19937& gen);
-  explicit LevelLoader(const std::filesystem::path& base_path, int verbose = 0);
+  std::vector<SokobanLevel>::iterator GetLevel(std::mt19937& gen);
+  explicit LevelLoader(const std::filesystem::path& base_path,
+                       bool load_sequentially, int n_levels_to_load,
+                       int verbose = 0);
 };
 
 void PrintLevel(std::ostream& os, const SokobanLevel& vec);
diff --git a/envpool/sokoban/registration.py b/envpool/sokoban/registration.py
index e79dc31c..026098e4 100644
--- a/envpool/sokoban/registration.py
+++ b/envpool/sokoban/registration.py
@@ -23,4 +23,5 @@
   gymnasium_cls="SokobanGymnasiumEnvPool",
   max_episode_steps=60,
   reward_step=-0.1,
+  max_num_players=1,
 )
diff --git a/envpool/sokoban/sample_levels/001.txt b/envpool/sokoban/sample_levels/001.txt
new file mode 100644
index 00000000..e5b2b185
--- /dev/null
+++ b/envpool/sokoban/sample_levels/001.txt
@@ -0,0 +1,35 @@
+; 0
+##########
+##########
+##########
+##### # ##
+#####    #
+#####  $ #
+# .    ..#
+# $$$ #  #
+#@ .     #
+##########
+
+; 1
+##########
+##########
+####     #
+#  $  .  #
+# #      #
+#@### .$ #
+###### $ #
+###  $.  #
+###     .#
+##########
+
+; 2
+##########
+#####   @#
+####    ##
+####.   ##
+#.   . $ #
+# $  $.  #
+# ###    #
+# $ ######
+#   ######
+##########
diff --git a/envpool/sokoban/sokoban_envpool.cc b/envpool/sokoban/sokoban_envpool.cc
index a29b87da..2d139b08 100644
--- a/envpool/sokoban/sokoban_envpool.cc
+++ b/envpool/sokoban/sokoban_envpool.cc
@@ -20,17 +20,17 @@
 #include <vector>
 
 #include "envpool/core/py_envpool.h"
+#include "envpool/sokoban/utils.h"
 
 namespace sokoban {
 
 void SokobanEnv::Reset() {
   const int max_episode_steps = spec_.config["max_episode_steps"_];
   const int min_episode_steps = spec_.config["min_episode_steps"_];
-  std::uniform_int_distribution<int> episode_length_rand(min_episode_steps,
-                                                         max_episode_steps);
-  current_max_episode_steps_ = episode_length_rand(gen_);
+  current_max_episode_steps_ =
+      SafeUniformInt(min_episode_steps, max_episode_steps, gen_);
 
-  world_ = *(level_loader_.RandomLevel(gen_));
+  world_ = *(level_loader_.GetLevel(gen_));
   if (world_.size() != dim_room_ * dim_room_) {
     std::stringstream msg;
     msg << "Loaded level is not dim_room x dim_room. world_.size()="
diff --git a/envpool/sokoban/sokoban_envpool.h b/envpool/sokoban/sokoban_envpool.h
index c788040f..77bee609 100644
--- a/envpool/sokoban/sokoban_envpool.h
+++ b/envpool/sokoban/sokoban_envpool.h
@@ -46,7 +46,9 @@ class SokobanEnvFns {
     return MakeDict("reward_finished"_.Bind(10.0), "reward_box"_.Bind(1.0),
                     "reward_step"_.Bind(-0.1), "dim_room"_.Bind(10),
                     "levels_dir"_.Bind(std::string("")), "verbose"_.Bind(0),
-                    "min_episode_steps"_.Bind(0));
+                    "min_episode_steps"_.Bind(0),
+                    "load_sequentially"_.Bind(false),
+                    "n_levels_to_load"_.Bind(-1));
   }
   template <typename Config>
   static decltype(auto) StateSpec(const Config& conf) {
@@ -71,7 +73,9 @@ class SokobanEnv : public Env<SokobanEnvSpec> {
         reward_box_{static_cast<double>(spec.config["reward_box"_])},
         reward_step_{static_cast<double>(spec.config["reward_step"_])},
         levels_dir_{static_cast<std::string>(spec.config["levels_dir"_])},
-        level_loader_(levels_dir_),
+        level_loader_(levels_dir_, spec.config["load_sequentially"_],
+                      static_cast<int>(spec.config["n_levels_to_load"_]),
+                      static_cast<int>(spec.config["verbose"_])),
         world_(kWall, static_cast<std::size_t>(dim_room_ * dim_room_)),
         verbose_(static_cast<int>(spec.config["verbose"_])),
         current_max_episode_steps_(
diff --git a/envpool/sokoban/sokoban_py_envpool_test.py b/envpool/sokoban/sokoban_py_envpool_test.py
index bed17588..d5a7a2e4 100644
--- a/envpool/sokoban/sokoban_py_envpool_test.py
+++ b/envpool/sokoban/sokoban_py_envpool_test.py
@@ -13,18 +13,19 @@
 # limitations under the License.
 """Unit test for dummy envpool and speed benchmark."""
 
+import glob
+import re
 import time
 
 import numpy as np
-from absl import logging
-from absl.testing import absltest
+import pytest
 
 import envpool  # noqa: F401
 import envpool.sokoban.registration
 from envpool.sokoban.sokoban_envpool import _SokobanEnvSpec
 
 
-class _SokobanEnvPoolTest(absltest.TestCase):
+class TestSokobanEnvPool:
 
   def test_config(self) -> None:
     ref_config_keys = [
@@ -47,13 +48,15 @@ def test_config(self) -> None:
       "reward_finished",
       "reward_step",
       "verbose",
+      "load_sequentially",
+      "n_levels_to_load",
     ]
     default_conf = _SokobanEnvSpec._default_config_values
-    self.assertTrue(isinstance(default_conf, tuple))
+    assert isinstance(default_conf, tuple)
     config_keys = _SokobanEnvSpec._config_keys
-    self.assertTrue(isinstance(config_keys, list))
-    self.assertEqual(len(default_conf), len(config_keys))
-    self.assertEqual(sorted(config_keys), sorted(ref_config_keys))
+    assert isinstance(config_keys, list)
+    assert len(default_conf) == len(config_keys)
+    assert sorted(config_keys) == sorted(ref_config_keys)
 
   def test_envpool(self) -> None:
     batch = num_envs = 200
@@ -76,7 +79,7 @@ def test_envpool(self) -> None:
       _ = env.step(np.random.randint(low=0, high=9, size=(num_envs,)))
     duration = time.time() - t
     fps = total_steps * batch / duration
-    logging.info(f"FPS = {fps:.6f}")
+    print(f"FPS = {fps:.6f}")
 
   def test_envpool_max_episode_steps(self) -> None:
     for max_episode_steps in [2, 5, 10]:
@@ -98,6 +101,58 @@ def test_envpool_max_episode_steps(self) -> None:
       assert not np.any(terminated)
       assert np.all(truncated)
 
+    def test_envpool_load_sequentially(self, capfd) -> None:
+      levels_dir = "/app/envpool/sokoban/sample_levels"
+      files = glob.glob(f"{levels_dir}/*.txt")
+      levels_by_files = []
+      for file in files:
+        with open(file, "r") as f:
+          text = f.read()
+        levels = text.split("\n;")
+        levels = ["\n".join(level.split("\n")[1:]).strip() for level in levels]
+        levels_by_files.append((file, levels))
+      assert len(levels_by_files) > 1
+      assert all(len(levels) > 1 for levels in levels_by_files)
+      total_levels = sum(len(levels) for levels in levels_by_files)
+      for n_levels_to_load in range(1, total_levels + 1):
+        env = envpool.make(
+          "Sokoban-v0",
+          env_type="gymnasium",
+          num_envs=1,
+          batch_size=1,
+          max_episode_steps=60,
+          min_episode_steps=60,
+          levels_dir=levels_dir,
+          load_sequentially=True,
+          n_levels_to_load=n_levels_to_load,
+          verbose=2,
+        )
+        dim_room = env.spec.config.dim_room
+        obs, _ = env.reset()
+        assert obs.shape == (
+          1,
+          3,
+          dim_room,
+          dim_room,
+        ), f"obs shape: {obs.shape}"
+        if n_levels_to_load == -1:
+          n_levels_to_load = total_levels
+        for _ in range(n_levels_to_load - 1):
+          env.reset()
+        out, _ = capfd.readouterr()
+        files_output = out.split("***")[1:]
+        for i, file_output in enumerate(files_output):
+          first_line, out = file_output.strip().split("\n", 1)
+          result = re.search(
+            r'Loaded (\d+) levels from "(.*\.txt)"', first_line
+          )
+          n_levels, file_name = int(result.group(1)), result.group(2)
+          lev1, lev2 = out.strip().split("\n\n")
+          assert file_name == levels_by_files[i][0]
+          assert n_levels == len(levels_by_files[i][1])
+          assert lev1 == levels_by_files[i][1][0]
+          assert lev2 == levels_by_files[i][1][1]
+
   def test_xla(self) -> None:
     num_envs = 10
     env = envpool.make(
@@ -115,4 +170,4 @@ def test_xla(self) -> None:
 
 
 if __name__ == "__main__":
-  absltest.main()
+  pytest.main(["-v", __file__])
diff --git a/envpool/sokoban/utils.h b/envpool/sokoban/utils.h
new file mode 100644
index 00000000..9d680b8d
--- /dev/null
+++ b/envpool/sokoban/utils.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright 2023-2024 FAR AI
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ENVPOOL_SOKOBAN_UTILS_H_
+#define ENVPOOL_SOKOBAN_UTILS_H_
+
+#include <random>
+
+namespace sokoban {
+
+template <typename T>
+T SafeUniformInt(T low, T high, std::mt19937& gen) {
+  // check if low is greater than high
+  if (low > high) {
+    throw std::invalid_argument("low should be less than high");
+  }
+  static_assert(std::is_same_v<T, uint32_t> || std::is_same_v<T, uint64_t> ||
+                    std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t>,
+                "SafeUniformInt only supports int, long, and long long");
+  std::uniform_int_distribution<T> dist(low, high);
+  return dist(gen);
+}
+
+}  // namespace sokoban
+
+#endif  // ENVPOOL_SOKOBAN_UTILS_H_
diff --git a/third_party/pip_requirements/requirements-dev.txt b/third_party/pip_requirements/requirements-dev.txt
index 7096668a..a72c0607 100644
--- a/third_party/pip_requirements/requirements-dev.txt
+++ b/third_party/pip_requirements/requirements-dev.txt
@@ -7,6 +7,7 @@ gymnasium>=0.26,!=0.27.0
 optree>=0.6.0
 jax[cpu]
 absl-py
+pytest
 packaging
 tqdm
 protobuf<=4.20.0
diff --git a/third_party/pip_requirements/requirements-release.txt b/third_party/pip_requirements/requirements-release.txt
index a5cdfddb..5b8ceffc 100644
--- a/third_party/pip_requirements/requirements-release.txt
+++ b/third_party/pip_requirements/requirements-release.txt
@@ -7,3 +7,4 @@ gymnasium>=0.26,!=0.27.0
 optree>=0.6.0
 jax[cpu]
 packaging
+pytest

From 4a12f55e236cddcbfee53bf4a5f214e4a2a10fa9 Mon Sep 17 00:00:00 2001
From: Mohammad Taufeeque <9taufeeque9@gmail.com>
Date: Sat, 9 Mar 2024 19:22:46 +0530
Subject: [PATCH 32/60] A* implementation for Sokoban (#4)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* add a-star files for sokoban

* fix compilation & linking errors

* fix astar namespace

* optimizations

* fix cpplint local

* fix lint and compilation error

* fix errors

* add logging file

* fix test indent

* fix errors

* fix error

* fix bugs

* fix bugs

* fix bug

* final fixes

* Make linting less noisy by moving third_party files elsewhere,

run `make format`

* incorporate review comments

* fix lint errors

* fix lint errors

* fix path

* revert excluding fsa.h and astar.h from Makefile

* take filename instead of index

* add test for astar_log

* fix lint

---------

Co-authored-by: Adrià Garriga-Alonso <adria@far.ai>
---
 Makefile                                   |   2 +-
 envpool/sokoban/BUILD                      |  83 ++-
 envpool/sokoban/astar_log.cc               | 138 ++++
 envpool/sokoban/level_loader.cc            |  10 +-
 envpool/sokoban/sokoban_astar_test.cc      | 144 ++++
 envpool/sokoban/sokoban_node.cc            | 187 ++++++
 envpool/sokoban/sokoban_node.h             | 122 ++++
 envpool/sokoban/sokoban_py_envpool_test.py | 118 ++--
 third_party/astar_stl/BUILD                |  23 +
 third_party/astar_stl/astar.h              | 747 +++++++++++++++++++++
 third_party/astar_stl/fsa.h                | 211 ++++++
 11 files changed, 1702 insertions(+), 83 deletions(-)
 create mode 100644 envpool/sokoban/astar_log.cc
 create mode 100644 envpool/sokoban/sokoban_astar_test.cc
 create mode 100644 envpool/sokoban/sokoban_node.cc
 create mode 100644 envpool/sokoban/sokoban_node.h
 create mode 100644 third_party/astar_stl/BUILD
 create mode 100644 third_party/astar_stl/astar.h
 create mode 100644 third_party/astar_stl/fsa.h

diff --git a/Makefile b/Makefile
index 822be03c..bcce0fad 100644
--- a/Makefile
+++ b/Makefile
@@ -75,7 +75,7 @@ mypy: mypy-install
 # c++ linter
 
 cpplint: cpplint-install
-	cpplint $(CPP_FILES)
+	cpplint --root . $(CPP_FILES)
 
 clang-format: clang-format-install
 	clang-format --style=file -i $(CPP_FILES) -n --Werror
diff --git a/envpool/sokoban/BUILD b/envpool/sokoban/BUILD
index ed684f96..8938ad53 100644
--- a/envpool/sokoban/BUILD
+++ b/envpool/sokoban/BUILD
@@ -15,52 +15,79 @@
 load("@pip_requirements//:requirements.bzl", "requirement")
 load("@pybind11_bazel//:build_defs.bzl", "pybind_extension")
 
-package(default_visibility=["//visibility:public"])
+package(default_visibility = ["//visibility:public"])
 
 py_library(
-    name="sokoban",
-    srcs=["__init__.py"],
-    data=[":sokoban_envpool.so"],
-    deps=["//envpool/python:api"],
+    name = "sokoban",
+    srcs = ["__init__.py"],
+    data = [":sokoban_envpool.so"],
+    deps = ["//envpool/python:api"],
 )
 
 py_library(
-    name="registration",
-    srcs=["registration.py"],
-    deps=[
+    name = "registration",
+    srcs = ["registration.py"],
+    deps = [
         "//envpool:registration",
     ],
 )
 
 cc_library(
-    name="sokoban_envpool_h",
-    hdrs=[
+    name = "sokoban_envpool_h",
+    hdrs = [
         "level_loader.h",
         "sokoban_envpool.h",
         "utils.h",
     ],
-    deps=[
+    deps = [
         "//envpool/core:async_envpool",
         "//envpool/core:env",
         "//envpool/core:env_spec",
     ],
 )
 
-# cc_test(
-#    name = "sokoban_envpool_test",
-#    size = "enormous",
-#    srcs = ["sokoban_envpool_test.cc"],
-#    deps = [
-#        ":sokoban_envpool_h",
-#        "@com_google_googletest//:gtest_main",
-#    ],
-# )
+cc_library(
+    name = "sokoban_node_h",
+    hdrs = [
+        "level_loader.h",
+        "sokoban_node.h",
+        "utils.h",
+    ],
+    deps = ["//third_party/astar_stl:astar_stl_h"],
+)
+
+cc_binary(
+    name = "astar_log",
+    srcs = [
+        "astar_log.cc",
+        "level_loader.cc",
+        "sokoban_node.cc",
+    ],
+    deps = [
+        ":sokoban_node_h",
+    ],
+)
+
+cc_test(
+    name = "sokoban_astar_test",
+    size = "enormous",
+    srcs = [
+        "level_loader.cc",
+        "sokoban_astar_test.cc",
+        "sokoban_node.cc",
+    ],
+    deps = [
+        ":sokoban_node_h",
+        "@com_github_google_glog//:glog",
+        "@com_google_googletest//:gtest_main",
+    ],
+)
 
 py_test(
-    name="test",
-    srcs=["sokoban_py_envpool_test.py"],
-    main="sokoban_py_envpool_test.py",
-    deps=[
+    name = "test",
+    srcs = ["sokoban_py_envpool_test.py"],
+    main = "sokoban_py_envpool_test.py",
+    deps = [
         ":registration",
         ":sokoban",
         "//envpool",
@@ -70,15 +97,15 @@ py_test(
 )
 
 pybind_extension(
-    name="sokoban_envpool",
-    srcs=[
+    name = "sokoban_envpool",
+    srcs = [
         "level_loader.cc",
         "sokoban_envpool.cc",
     ],
-    linkopts=[
+    linkopts = [
         "-ldl",
     ],
-    deps=[
+    deps = [
         ":sokoban_envpool_h",
         "//envpool/core:py_envpool",
     ],
diff --git a/envpool/sokoban/astar_log.cc b/envpool/sokoban/astar_log.cc
new file mode 100644
index 00000000..55d93c28
--- /dev/null
+++ b/envpool/sokoban/astar_log.cc
@@ -0,0 +1,138 @@
+// Copyright 2023-2024 FAR AI
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <fstream>
+#include <sstream>
+
+#include "envpool/sokoban/sokoban_node.h"
+
+namespace sokoban {
+
+void RunAStar(const std::string& level_file_name,
+              const std::string& log_file_name, int total_levels_to_run = 1000,
+              int fsa_limit = 1000000) {
+  std::cout << "Running A* on file " << level_file_name << " and logging to "
+            << log_file_name << " with fsa_limit " << fsa_limit << std::endl;
+  const int dim_room = 10;
+  int level_idx = 0;
+  LevelLoader level_loader(level_file_name, true, -1);
+  std::mt19937 gen(42);
+
+  std::ofstream log_file_out(log_file_name, std::ios_base::app);
+  std::ifstream log_file_in(log_file_name);
+  // check if the file is empty
+  if (log_file_in.peek() == std::ifstream::traits_type::eof()) {
+    log_file_out << "Level, Actions, Steps, SearchSteps" << std::endl;
+  } else {  // skip levels that have already been run
+    std::string line;
+    std::getline(log_file_in, line);  // skip header
+    while (std::getline(log_file_in, line)) {
+      SokobanLevel level = *level_loader.GetLevel(gen);
+      level_idx++;
+    }
+  }
+  log_file_in.close();
+
+  while (level_idx < total_levels_to_run) {
+    std::AStarSearch<SokobanNode> astarsearch(fsa_limit);
+    std::cout << "Running level " << level_idx << std::endl;
+    SokobanLevel level = *level_loader.GetLevel(gen);
+
+    SokobanNode node_start(dim_room, level, false);
+    SokobanNode node_end(dim_room, level, true);
+    astarsearch.SetStartAndGoalStates(node_start, node_end);
+    unsigned int search_state;
+    unsigned int search_steps = 0;
+    std::cout << "Starting search" << std::endl;
+    do {
+      search_state = astarsearch.SearchStep();
+      search_steps++;
+    } while (search_state ==
+             std::AStarSearch<SokobanNode>::SEARCH_STATE_SEARCHING);
+
+    if (search_state == std::AStarSearch<SokobanNode>::SEARCH_STATE_SUCCEEDED) {
+      std::stringstream loglinestream;
+      loglinestream << level_idx << ", ";
+      astarsearch.GetSolutionStart();
+      int steps = 0;
+      for (;;) {
+        SokobanNode* node = astarsearch.GetSolutionNext();
+        if (node == nullptr) {
+          break;
+        }
+        int action = node->action_from_parent;
+        assert(action >= 0 && action < 4);
+        loglinestream << action;
+        steps++;
+      }
+      loglinestream << ", " << steps << ", " << search_steps << std::endl;
+      log_file_out << loglinestream.str();
+      astarsearch.FreeSolutionNodes();
+      astarsearch.EnsureMemoryFreed();
+    } else if (search_state ==
+               std::AStarSearch<SokobanNode>::SEARCH_STATE_FAILED) {
+      log_file_out << level_idx << ", "
+                   << "SEARCH_STATE_FAILED, -1, " << search_steps << std::endl;
+    } else if (search_state ==
+               std::AStarSearch<SokobanNode>::SEARCH_STATE_NOT_INITIALISED) {
+      log_file_out << level_idx << ", "
+                   << "SEARCH_STATE_NOT_INITIALISED, -1, " << search_steps
+                   << std::endl;
+    } else if (search_state ==
+               std::AStarSearch<SokobanNode>::SEARCH_STATE_SEARCHING) {
+      log_file_out << level_idx << ", "
+                   << "SEARCH_STATE_SEARCHING, -1, " << search_steps
+                   << std::endl;
+    } else if (search_state ==
+               std::AStarSearch<SokobanNode>::SEARCH_STATE_OUT_OF_MEMORY) {
+      log_file_out << level_idx << ", "
+                   << "SEARCH_STATE_OUT_OF_MEMORY, -1, " << search_steps
+                   << std::endl;
+    } else if (search_state ==
+               std::AStarSearch<SokobanNode>::SEARCH_STATE_INVALID) {
+      log_file_out << level_idx << ", "
+                   << "SEARCH_STATE_INVALID, -1, " << search_steps << std::endl;
+    } else {
+      log_file_out << level_idx << ", "
+                   << "UNKNOWN, -1, " << search_steps << std::endl;
+    }
+    log_file_out.flush();
+    level_idx++;
+  }
+}
+}  // namespace sokoban
+
+int main(int argc, char** argv) {
+  int total_levels_to_run = 1000;
+  int fsa_limit = 1000000;
+  if (argc < 3) {
+    std::cout
+        << "Usage: " << argv[0]
+        << " level_file_name log_file_name [total_levels_to_run] [fsa_limit]"
+        << std::endl;
+    return 1;
+  }
+  std::string level_file_name = argv[1];
+  std::string log_file_name = argv[2];
+  if (argc > 3) {
+    total_levels_to_run = std::stoi(argv[3]);
+  }
+  if (argc > 4) {
+    fsa_limit = std::stoi(argv[4]);
+  }
+
+  sokoban::RunAStar(level_file_name, log_file_name, total_levels_to_run,
+                    fsa_limit);
+  return 0;
+}
diff --git a/envpool/sokoban/level_loader.cc b/envpool/sokoban/level_loader.cc
index 314f4175..9d0a9863 100644
--- a/envpool/sokoban/level_loader.cc
+++ b/envpool/sokoban/level_loader.cc
@@ -37,8 +37,12 @@ LevelLoader::LevelLoader(const std::filesystem::path& base_path,
       cur_level_(levels_.begin()),
       level_file_paths_(0),
       verbose(verbose) {
-  for (const auto& entry : std::filesystem::directory_iterator(base_path)) {
-    level_file_paths_.push_back(entry.path());
+  if (std::filesystem::is_regular_file(base_path)) {
+    level_file_paths_.push_back(base_path);
+  } else {
+    for (const auto& entry : std::filesystem::directory_iterator(base_path)) {
+      level_file_paths_.push_back(entry.path());
+    }
   }
   cur_file_ = level_file_paths_.begin();
 }
@@ -49,7 +53,7 @@ static const std::array<char, kMaxLevelObject + 1> kPrintLevelKey{
 void AddLine(SokobanLevel& level, const std::string& line) {
   auto start = line.at(0);
   auto end = line.at(line.size() - 1);
-  if ((start != '#') || (start != '#')) {
+  if ((start != '#') || (end != '#')) {
     std::stringstream msg;
     msg << "Line '" << line << "' does not start (" << start << ") and end ("
         << end << ") with '#', as it should." << std::endl;
diff --git a/envpool/sokoban/sokoban_astar_test.cc b/envpool/sokoban/sokoban_astar_test.cc
new file mode 100644
index 00000000..2b0bafd6
--- /dev/null
+++ b/envpool/sokoban/sokoban_astar_test.cc
@@ -0,0 +1,144 @@
+// Copyright 2023-2024 FAR AI
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <glog/logging.h>
+#include <gtest/gtest.h>
+
+#include "envpool/sokoban/sokoban_node.h"
+
+#define DEBUG_LISTS 0
+#define DEBUG_LIST_LENGTHS_ONLY 0
+
+namespace sokoban {
+TEST(SokobanAStarTest, Basic) {
+  std::cout << "STL A* Search implementation\n(C)2001 Justin Heyes-Jones\n";
+
+  // Create an instance of the search class...
+  std::AStarSearch<SokobanNode> astarsearch(1000000);
+  std::vector<int> verify_steps = {38, 19};
+  std::vector<int> verify_search_steps = {63408, 24991};
+
+  unsigned int search_count = 0;
+  const unsigned int num_searches = 2;
+  const std::string level_file = "/app/envpool/sokoban/sample_levels/";
+  const int dim_room = 10;
+  LevelLoader level_loader(level_file, false, 2);
+  std::mt19937 gen(42);
+
+  while (search_count < num_searches) {
+    // Create a start state
+    SokobanLevel level = *level_loader.GetLevel(gen);
+
+    SokobanNode node_start(dim_room, level, false);
+    SokobanNode node_end(dim_room, level, true);
+    std::vector<std::pair<int, int>>* goals = &node_end.boxes;
+    node_start.PrintNodeInfo(goals);
+    astarsearch.SetStartAndGoalStates(node_start, node_end);
+
+    unsigned int search_state;
+    unsigned int search_steps = 0;
+
+    do {
+      search_state = astarsearch.SearchStep();
+
+      search_steps++;
+
+#if DEBUG_LISTS
+
+      std::cout << "Steps:" << search_steps << "\n";
+
+      int len = 0;
+
+      std::cout << "Open:\n";
+      SokobanNode* p = astarsearch.GetOpenListStart();
+      while (p) {
+        len++;
+#if !DEBUG_LIST_LENGTHS_ONLY
+        ((SokobanNode*)p)->PrintNodeInfo(goals);
+#endif
+        p = astarsearch.GetOpenListNext();
+      }
+
+      std::cout << "Open list has " << len << " nodes\n";
+
+      len = 0;
+
+      std::cout << "Closed:\n";
+      p = astarsearch.GetClosedListStart();
+      while (p) {
+        len++;
+#if !DEBUG_LIST_LENGTHS_ONLY
+        p->PrintNodeInfo(goals);
+#endif
+        p = astarsearch.GetClosedListNext();
+      }
+
+      std::cout << "Closed list has " << len << " nodes\n";
+#endif
+    } while (search_state ==
+             std::AStarSearch<SokobanNode>::SEARCH_STATE_SEARCHING);
+
+    if (search_state == std::AStarSearch<SokobanNode>::SEARCH_STATE_SUCCEEDED) {
+      std::cout << "Search found goal state\n";
+
+      SokobanNode* node = astarsearch.GetSolutionStart();
+
+      int steps = 0;
+
+      node->PrintNodeInfo(goals);
+      for (;;) {
+        node = astarsearch.GetSolutionNext();
+
+        if (node == nullptr) {
+          break;
+        }
+        std::cout << "Step " << steps << std::endl;
+        node->PrintNodeInfo(goals);
+        steps++;
+      }
+      std::cout << "Solution steps " << steps << std::endl;
+      EXPECT_EQ(steps, verify_steps.at(search_count));
+
+      // Once you're done with the solution you can free the nodes up
+      astarsearch.FreeSolutionNodes();
+
+    } else if (search_state ==
+               std::AStarSearch<SokobanNode>::SEARCH_STATE_FAILED) {
+      std::cout << "Search terminated. Did not find goal state\n";
+    } else if (search_state ==
+               std::AStarSearch<SokobanNode>::SEARCH_STATE_NOT_INITIALISED) {
+      std::cout << "SEARCH_STATE_NOT_INITIALISED\n";
+    } else if (search_state ==
+               std::AStarSearch<SokobanNode>::SEARCH_STATE_SEARCHING) {
+      std::cout << "SEARCH_STATE_SEARCHING\n";
+    } else if (search_state ==
+               std::AStarSearch<SokobanNode>::SEARCH_STATE_OUT_OF_MEMORY) {
+      std::cout << "SEARCH_STATE_OUT_OF_MEMORY\n";
+    } else if (search_state ==
+               std::AStarSearch<SokobanNode>::SEARCH_STATE_INVALID) {
+      std::cout << "SEARCH_STATE_INVALID\n";
+    }
+
+    // Display the number of loops the search went through
+    std::cout << "search_steps : " << search_steps << "\n";
+    EXPECT_EQ(search_state,
+              std::AStarSearch<SokobanNode>::SEARCH_STATE_SUCCEEDED);
+    EXPECT_EQ(search_steps, verify_search_steps.at(search_count));
+
+    search_count++;
+
+    astarsearch.EnsureMemoryFreed();
+  }
+}
+}  // namespace sokoban
diff --git a/envpool/sokoban/sokoban_node.cc b/envpool/sokoban/sokoban_node.cc
new file mode 100644
index 00000000..f22065ca
--- /dev/null
+++ b/envpool/sokoban/sokoban_node.cc
@@ -0,0 +1,187 @@
+// Copyright 2023-2024 FAR AI
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "envpool/sokoban/sokoban_node.h"
+
+#include <algorithm>
+#include <limits>
+
+namespace sokoban {
+
+bool SokobanNode::IsSameState(SokobanNode& rhs) const {
+  if (player_x != rhs.player_x || player_y != rhs.player_y) {
+    return false;
+  }
+  return boxes == rhs.boxes;
+}
+
+void SokobanNode::PrintNodeInfo(std::vector<std::pair<int, int>>* goals) {
+  std::cout << "Action: " << action_from_parent << std::endl;
+  for (int y = 0; y < dim_room; y++) {
+    for (int x = 0; x < dim_room; x++) {
+      bool is_wall = walls->at(x + y * dim_room);
+      bool is_player = (x == player_x && y == player_y);
+      bool is_box = false;
+      bool is_goal = false;
+      for (const auto& box : boxes) {
+        if (box.first == x && box.second == y) {
+          is_box = true;
+          break;
+        }
+      }
+      if (goals != nullptr) {
+        for (const auto& goal : *goals) {
+          if (goal.first == x && goal.second == y) {
+            is_goal = true;
+            break;
+          }
+        }
+      }
+      if (is_wall) {
+        std::cout << "#";
+      } else if (is_player) {
+        if (is_goal) {
+          std::cout << "a";
+        } else {
+          std::cout << "@";
+        }
+      } else if (is_box) {
+        if (is_goal) {
+          std::cout << "s";
+        } else {
+          std::cout << "$";
+        }
+      } else if (is_goal) {
+        std::cout << ".";
+      } else {
+        std::cout << " ";
+      }
+    }
+    std::cout << std::endl;
+  }
+}
+
+std::unique_ptr<SokobanNode> SokobanNode::GetChildNode(int action_idx) {
+  int delta_x = kDelta.at(action_idx).at(0);
+  int delta_y = kDelta.at(action_idx).at(1);
+  int new_player_x = player_x + delta_x;
+  int new_player_y = player_y + delta_y;
+  // check if the move is valid
+  if (CheckWall(new_player_x, new_player_y)) {
+    return nullptr;
+  }
+  // check if (new_player_x, new_player_y) is a box, if it is not, return a
+  // new SokobanNode with the new player position
+  std::vector<std::pair<int, int>> new_boxes = boxes;
+  for (size_t i = 0; i < boxes.size(); i++) {
+    if (boxes.at(i).first == new_player_x &&
+        boxes.at(i).second == new_player_y) {
+      int new_box_x = boxes.at(i).first + delta_x;
+      int new_box_y = boxes.at(i).second + delta_y;
+      // check if the box can move
+      if (CheckWall(new_box_x, new_box_y)) {
+        return nullptr;
+      }
+      // check if the box is blocked by another box
+      for (const auto& orig_box : boxes) {
+        if (orig_box.first == new_box_x && orig_box.second == new_box_y) {
+          return nullptr;
+        }
+      }
+      // update the box position
+      new_boxes.at(i).first = new_box_x;
+      new_boxes.at(i).second = new_box_y;
+      if (delta_y != 0) {
+        std::sort(
+            new_boxes.begin(), new_boxes.end(),
+            [](const std::pair<int, int>& a, const std::pair<int, int>& b) {
+              if (a.second != b.second) {
+                return a.second < b.second;
+              }
+              return a.first < b.first;
+            });
+      }
+      break;
+    }
+  }
+  return std::make_unique<SokobanNode>(dim_room, new_player_x, new_player_y,
+                                       new_boxes, walls, this, action_idx);
+}
+
+bool SokobanNode::CheckWall(int x, int y) const {
+  if (x < 0 || x >= dim_room || y < 0 || y >= dim_room) {
+    return true;
+  }
+  return walls->at(x + y * dim_room);
+}
+
+size_t SokobanNode::Hash() const {
+  size_t hash = 0;
+  hash = (hash * 397) ^ std::hash<int>{}(player_x);
+  hash = (hash * 397) ^ std::hash<int>{}(player_y);
+  for (const auto& box : boxes) {
+    hash = (hash * 397) ^ std::hash<int>{}(box.first);
+    hash = (hash * 397) ^ std::hash<int>{}(box.second);
+  }
+  return hash;
+}
+
+bool SokobanNode::IsGoal(SokobanNode& goal_node) {
+  for (const auto& box : boxes) {
+    bool matched = false;
+    for (const auto& goal_box : goal_node.boxes) {
+      if (box == goal_box) {
+        matched = true;
+        break;
+      }
+    }
+    if (!matched) {
+      return false;
+    }
+  }
+  return true;
+}
+
+float SokobanNode::GoalDistanceEstimate(SokobanNode& goal_node) {
+  float h = 0;
+  for (const auto& box : boxes) {
+    float min_distance = std::numeric_limits<float>::max();
+    for (const auto& goal_box : goal_node.boxes) {
+      float distance =
+          abs(box.first - goal_box.first) + abs(box.second - goal_box.second);
+      min_distance = std::min(min_distance, distance);
+    }
+    h += min_distance;
+  }
+  return h;
+}
+
+float SokobanNode::GetCost(SokobanNode& successor) { return 1; }
+
+bool SokobanNode::GetSuccessors(std::AStarSearch<SokobanNode>* astarsearch,
+                                SokobanNode* parent_node) {
+  for (size_t i = 0; i < kDelta.size(); i++) {
+    std::unique_ptr<SokobanNode> new_node_ptr = GetChildNode(i);
+    if (new_node_ptr == nullptr) {
+      continue;
+    }
+    if (parent_node != nullptr && new_node_ptr->IsSameState(*parent_node)) {
+      continue;
+    }
+    astarsearch->AddSuccessor(*new_node_ptr);
+  }
+  return true;
+}
+
+}  // namespace sokoban
diff --git a/envpool/sokoban/sokoban_node.h b/envpool/sokoban/sokoban_node.h
new file mode 100644
index 00000000..3402c7c1
--- /dev/null
+++ b/envpool/sokoban/sokoban_node.h
@@ -0,0 +1,122 @@
+// Copyright 2023-2024 FAR AI
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ENVPOOL_SOKOBAN_SOKOBAN_NODE_H_
+#define ENVPOOL_SOKOBAN_SOKOBAN_NODE_H_
+
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include "envpool/sokoban/level_loader.h"
+#include "third_party/astar_stl/astar.h"
+
+namespace sokoban {
+
+class SokobanNode {
+ public:
+  static constexpr std::array<std::array<int, 2>, 4> kDelta = {
+      {{0, -1}, {0, 1}, {-1, 0}, {1, 0}}  // Up, Down, Left, Right
+  };
+  int dim_room{0};
+  int player_x{0}, player_y{0};
+  std::vector<std::pair<int, int>> boxes;
+  unsigned int total_boxes{0};
+  std::shared_ptr<std::vector<bool>> walls;
+  SokobanNode* parent_node{nullptr};
+  int action_from_parent{-1};  // -1 is for when node is root
+  bool is_goal_node{false};
+
+  SokobanNode() = default;
+
+  SokobanNode(int dim_room, const SokobanLevel& world, bool is_goal_node)
+      : dim_room(dim_room),
+        walls(std::make_shared<std::vector<bool>>(dim_room * dim_room, false)),
+        is_goal_node(is_goal_node) {
+    for (int y = 0; y < dim_room; y++) {
+      for (int x = 0; x < dim_room; x++) {
+        switch (world.at(x + y * dim_room)) {
+          case kPlayer:
+            player_x = x;
+            player_y = y;
+            break;
+          case kBox:
+            if (!is_goal_node) {
+              total_boxes++;
+              boxes.emplace_back(std::make_pair(x, y));
+            }
+            break;
+          case kTarget:
+            if (is_goal_node) {
+              total_boxes++;
+              boxes.emplace_back(std::make_pair(x, y));
+            }
+            break;
+          case kBoxOnTarget:
+            total_boxes++;
+            boxes.emplace_back(std::make_pair(x, y));
+            break;
+          case kPlayerOnTarget:
+            player_x = x;
+            player_y = y;
+            break;
+        }
+
+        if (world.at(x + y * dim_room) == kWall) {
+          walls->at(x + y * dim_room) = true;
+        }
+      }
+    }
+    assert(total_boxes == boxes.size());
+  }
+
+  SokobanNode(int dim_room, int player_x, int player_y,
+              const std::vector<std::pair<int, int>>& boxes,
+              std::shared_ptr<std::vector<bool>> walls,
+              SokobanNode* parent_node = nullptr, int action_from_parent = -1)
+      : dim_room(dim_room),
+        player_x(player_x),
+        player_y(player_y),
+        boxes(boxes),
+        total_boxes(boxes.size()),
+        walls(std::move(walls)),
+        parent_node(parent_node),
+        action_from_parent(action_from_parent) {}
+
+  void UpdateGoalNode(SokobanNode goal_node) {
+    assert(is_goal_node);
+    goal_node.is_goal_node = true;
+    player_x = goal_node.player_x;
+    player_y = goal_node.player_y;
+    parent_node = goal_node.parent_node;
+    action_from_parent = goal_node.action_from_parent;
+  }
+
+  [[nodiscard]] bool CheckWall(int x, int y) const;
+
+  std::unique_ptr<SokobanNode> GetChildNode(int action_idx);
+
+  float GoalDistanceEstimate(SokobanNode& goal_node);
+  bool IsGoal(SokobanNode& goal_node);
+  bool GetSuccessors(std::AStarSearch<SokobanNode>* astarsearch,
+                     SokobanNode* parent_node);
+  static float GetCost(SokobanNode& successor);
+  bool IsSameState(SokobanNode& rhs) const;
+  [[nodiscard]] size_t Hash() const;
+
+  void PrintNodeInfo(std::vector<std::pair<int, int>>* goals = nullptr);
+};
+}  // namespace sokoban
+
+#endif  // ENVPOOL_SOKOBAN_SOKOBAN_NODE_H_
diff --git a/envpool/sokoban/sokoban_py_envpool_test.py b/envpool/sokoban/sokoban_py_envpool_test.py
index d5a7a2e4..4bb423f6 100644
--- a/envpool/sokoban/sokoban_py_envpool_test.py
+++ b/envpool/sokoban/sokoban_py_envpool_test.py
@@ -15,6 +15,8 @@
 
 import glob
 import re
+import subprocess
+import tempfile
 import time
 
 import numpy as np
@@ -101,57 +103,55 @@ def test_envpool_max_episode_steps(self) -> None:
       assert not np.any(terminated)
       assert np.all(truncated)
 
-    def test_envpool_load_sequentially(self, capfd) -> None:
-      levels_dir = "/app/envpool/sokoban/sample_levels"
-      files = glob.glob(f"{levels_dir}/*.txt")
-      levels_by_files = []
-      for file in files:
-        with open(file, "r") as f:
-          text = f.read()
-        levels = text.split("\n;")
-        levels = ["\n".join(level.split("\n")[1:]).strip() for level in levels]
-        levels_by_files.append((file, levels))
-      assert len(levels_by_files) > 1
-      assert all(len(levels) > 1 for levels in levels_by_files)
-      total_levels = sum(len(levels) for levels in levels_by_files)
-      for n_levels_to_load in range(1, total_levels + 1):
-        env = envpool.make(
-          "Sokoban-v0",
-          env_type="gymnasium",
-          num_envs=1,
-          batch_size=1,
-          max_episode_steps=60,
-          min_episode_steps=60,
-          levels_dir=levels_dir,
-          load_sequentially=True,
-          n_levels_to_load=n_levels_to_load,
-          verbose=2,
-        )
-        dim_room = env.spec.config.dim_room
-        obs, _ = env.reset()
-        assert obs.shape == (
-          1,
-          3,
-          dim_room,
-          dim_room,
-        ), f"obs shape: {obs.shape}"
-        if n_levels_to_load == -1:
-          n_levels_to_load = total_levels
-        for _ in range(n_levels_to_load - 1):
-          env.reset()
-        out, _ = capfd.readouterr()
-        files_output = out.split("***")[1:]
-        for i, file_output in enumerate(files_output):
-          first_line, out = file_output.strip().split("\n", 1)
-          result = re.search(
-            r'Loaded (\d+) levels from "(.*\.txt)"', first_line
-          )
-          n_levels, file_name = int(result.group(1)), result.group(2)
-          lev1, lev2 = out.strip().split("\n\n")
-          assert file_name == levels_by_files[i][0]
-          assert n_levels == len(levels_by_files[i][1])
-          assert lev1 == levels_by_files[i][1][0]
-          assert lev2 == levels_by_files[i][1][1]
+  def test_envpool_load_sequentially(self, capfd) -> None:
+    levels_dir = "/app/envpool/sokoban/sample_levels"
+    files = glob.glob(f"{levels_dir}/*.txt")
+    levels_by_files = []
+    for file in files:
+      with open(file, "r") as f:
+        text = f.read()
+      levels = text.split("\n;")
+      levels = ["\n".join(level.split("\n")[1:]).strip() for level in levels]
+      levels_by_files.append((file, levels))
+    assert len(levels_by_files) > 1
+    assert all(len(levels) > 1 for levels in levels_by_files)
+    total_levels = sum(len(levels) for levels in levels_by_files)
+    for n_levels_to_load in range(1, total_levels + 1):
+      env = envpool.make(
+        "Sokoban-v0",
+        env_type="gymnasium",
+        num_envs=1,
+        batch_size=1,
+        max_episode_steps=60,
+        min_episode_steps=60,
+        levels_dir=levels_dir,
+        load_sequentially=True,
+        n_levels_to_load=n_levels_to_load,
+        verbose=2,
+      )
+      dim_room = env.spec.config.dim_room
+      obs, _ = env.reset()
+      assert obs.shape == (
+        1,
+        3,
+        dim_room,
+        dim_room,
+      ), f"obs shape: {obs.shape}"
+      if n_levels_to_load == -1:
+        n_levels_to_load = total_levels
+      for _ in range(n_levels_to_load - 1):
+        env.reset()
+      out, _ = capfd.readouterr()
+      files_output = out.split("***")[1:]
+      for i, file_output in enumerate(files_output):
+        first_line, out = file_output.strip().split("\n", 1)
+        result = re.search(r'Loaded (\d+) levels from "(.*\.txt)"', first_line)
+        n_levels, file_name = int(result.group(1)), result.group(2)
+        lev1, lev2 = out.strip().split("\n\n")
+        assert file_name == levels_by_files[i][0]
+        assert n_levels == len(levels_by_files[i][1])
+        assert lev1 == levels_by_files[i][1][0]
+        assert lev2 == levels_by_files[i][1][1]
 
   def test_xla(self) -> None:
     num_envs = 10
@@ -169,5 +169,21 @@ def test_xla(self) -> None:
     handle, recv, send, step = env.xla()
 
 
+def test_astar_log(self) -> None:
+  level_file_name = "/app/envpool/sokoban/sample_levels/001.txt"
+  with tempfile.NamedTemporaryFile() as f:
+    log_file_name = f.name
+    subprocess.run(
+      [
+        "bazel", "run", "//envpool/sokoban:astar_log", "--", level_file_name,
+        log_file_name, 1
+      ],
+      check=True,
+    )
+    with open(log_file_name, "r") as f:
+      log = f.read()
+    assert "0, 301333002213130203303031, 24, 40611" == log.split("\n")[1]
+
+
 if __name__ == "__main__":
   pytest.main(["-v", __file__])
diff --git a/third_party/astar_stl/BUILD b/third_party/astar_stl/BUILD
new file mode 100644
index 00000000..03de4363
--- /dev/null
+++ b/third_party/astar_stl/BUILD
@@ -0,0 +1,23 @@
+# Copyright 2023-2024 FAR AI
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+cc_library(
+    name = "astar_stl_h",
+    hdrs = [
+        "astar.h",
+        "fsa.h",
+    ],
+    visibility = ["//visibility:public"],
+    deps = [],
+)
diff --git a/third_party/astar_stl/astar.h b/third_party/astar_stl/astar.h
new file mode 100644
index 00000000..61a99202
--- /dev/null
+++ b/third_party/astar_stl/astar.h
@@ -0,0 +1,747 @@
+/*
+A* Algorithm Implementation using STL is
+Copyright (C)2001-2005 Justin Heyes-Jones
+
+Permission is given by the author to freely redistribute and
+include this code in any program as long as this credit is
+given where due.
+
+  COVERED CODE IS PROVIDED UNDER THIS LICENSE ON AN "AS IS" BASIS,
+  WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED,
+  INCLUDING, WITHOUT LIMITATION, WARRANTIES THAT THE COVERED CODE
+  IS FREE OF DEFECTS, MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE
+  OR NON-INFRINGING. THE ENTIRE RISK AS TO THE QUALITY AND
+  PERFORMANCE OF THE COVERED CODE IS WITH YOU. SHOULD ANY COVERED
+  CODE PROVE DEFECTIVE IN ANY RESPECT, YOU (NOT THE INITIAL
+  DEVELOPER OR ANY OTHER CONTRIBUTOR) ASSUME THE COST OF ANY
+  NECESSARY SERVICING, REPAIR OR CORRECTION. THIS DISCLAIMER OF
+  WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS LICENSE. NO USE
+  OF ANY COVERED CODE IS AUTHORIZED HEREUNDER EXCEPT UNDER
+  THIS DISCLAIMER.
+
+  Use at your own risk!
+
+*/
+
+#ifndef STLASTAR_H
+#define STLASTAR_H
+// used for text debugging
+#include <stdio.h>
+
+#include <iostream>
+// #include <conio.h>
+#include <assert.h>
+
+// stl includes
+#include <algorithm>
+#include <cfloat>
+#include <unordered_set>
+#include <vector>
+
+// fast fixed size memory allocator, used for fast node memory management
+#include "fsa.h"
+
+// Fixed size memory allocator can be disabled to compare performance
+// Uses std new and delete instead if you turn it off
+#define USE_FSA_MEMORY 1
+
+// disable warning that debugging information has lines that are truncated
+// occurs in stl headers
+#if defined(WIN32) && defined(_WINDOWS)
+#pragma warning(disable : 4786)
+#endif
+
+namespace std {
+
+template <class T>
+class AStarState;
+
+// The AStar search class. UserState is the users state space type
+template <class UserState>
+class AStarSearch {
+ public:  // data
+  enum {
+    SEARCH_STATE_NOT_INITIALISED,
+    SEARCH_STATE_SEARCHING,
+    SEARCH_STATE_SUCCEEDED,
+    SEARCH_STATE_FAILED,
+    SEARCH_STATE_OUT_OF_MEMORY,
+    SEARCH_STATE_INVALID
+  };
+
+  // A node represents a possible state in the search
+  // The user provided state type is included inside this type
+
+ public:
+  class Node {
+   public:
+    Node* parent;  // used during the search to record the parent of successor
+                   // nodes
+    Node* child;   // used after the search for the application to view the
+                   // search in reverse
+
+    float g;  // cost of this node + its predecessors
+    float h;  // heuristic estimate of distance to goal
+    float f;  // sum of cumulative cost of predecessors and self and heuristic
+
+    Node() : parent(0), child(0), g(0.0f), h(0.0f), f(0.0f) {}
+
+    bool operator==(const Node& otherNode) const {
+      return this->m_UserState.IsSameState(otherNode->m_UserState);
+    }
+
+    UserState m_UserState;
+  };
+
+  // For sorting the heap the STL needs compare function that lets us compare
+  // the f value of two nodes
+
+  class HeapCompare_f {
+   public:
+    bool operator()(const Node* x, const Node* y) const { return x->f > y->f; }
+  };
+
+ public:  // methods
+  // constructor just initialises private data
+  AStarSearch()
+      : m_State(SEARCH_STATE_NOT_INITIALISED),
+        m_CurrentSolutionNode(NULL),
+#if USE_FSA_MEMORY
+        m_FixedSizeAllocator(1000),
+#endif
+        m_AllocateNodeCount(0),
+        m_CancelRequest(false) {
+  }
+
+  AStarSearch(int MaxNodes)
+      : m_State(SEARCH_STATE_NOT_INITIALISED),
+        m_CurrentSolutionNode(NULL),
+#if USE_FSA_MEMORY
+        m_FixedSizeAllocator(MaxNodes),
+#endif
+        m_AllocateNodeCount(0),
+        m_CancelRequest(false) {
+  }
+
+  // call at any time to cancel the search and free up all the memory
+  void CancelSearch() { m_CancelRequest = true; }
+
+  // Set Start and goal states
+  void SetStartAndGoalStates(UserState& Start, UserState& Goal) {
+    m_CancelRequest = false;
+
+    m_Start = AllocateNode();
+    m_Goal = AllocateNode();
+
+    assert((m_Start != NULL && m_Goal != NULL));
+
+    m_Start->m_UserState = Start;
+    m_Goal->m_UserState = Goal;
+
+    m_State = SEARCH_STATE_SEARCHING;
+
+    // Initialise the AStar specific parts of the Start Node
+    // The user only needs fill out the state information
+
+    m_Start->g = 0;
+    m_Start->h = m_Start->m_UserState.GoalDistanceEstimate(m_Goal->m_UserState);
+    m_Start->f = m_Start->g + m_Start->h;
+    m_Start->parent = 0;
+
+    // Push the start node on the Open list
+
+    m_OpenList.push_back(m_Start);  // heap now unsorted
+
+    // Sort back element into heap
+    push_heap(m_OpenList.begin(), m_OpenList.end(), HeapCompare_f());
+
+    // Initialise counter for search steps
+    m_Steps = 0;
+  }
+
+  // Advances search one step
+  unsigned int SearchStep() {
+    // Firstly break if the user has not initialised the search
+    assert((m_State > SEARCH_STATE_NOT_INITIALISED) &&
+           (m_State < SEARCH_STATE_INVALID));
+
+    // Next I want it to be safe to do a searchstep once the search has
+    // succeeded...
+    if ((m_State == SEARCH_STATE_SUCCEEDED) ||
+        (m_State == SEARCH_STATE_FAILED)) {
+      return m_State;
+    }
+
+    // Failure is defined as emptying the open list as there is nothing left to
+    // search...
+    // New: Allow user abort
+    if (m_OpenList.empty() || m_CancelRequest) {
+      FreeAllNodes();
+      m_State = SEARCH_STATE_FAILED;
+      return m_State;
+    }
+
+    // Incremement step count
+    m_Steps++;
+
+    // Pop the best node (the one with the lowest f)
+    Node* n = m_OpenList.front();  // get pointer to the node
+    pop_heap(m_OpenList.begin(), m_OpenList.end(), HeapCompare_f());
+    m_OpenList.pop_back();
+
+    // Check for the goal, once we pop that we're done
+    if (n->m_UserState.IsGoal(m_Goal->m_UserState)) {
+      // The user is going to use the Goal Node he passed in
+      // so copy the parent pointer of n
+      m_Goal->parent = n->parent;
+      m_Goal->g = n->g;
+      m_Goal->m_UserState.UpdateGoalNode(n->m_UserState);
+
+      // A special case is that the goal was passed in as the start state
+      // so handle that here
+      if (false == n->m_UserState.IsSameState(m_Start->m_UserState)) {
+        FreeNode(n);
+
+        // set the child pointers in each node (except Goal which has no child)
+        Node* nodeChild = m_Goal;
+        Node* nodeParent = m_Goal->parent;
+
+        do {
+          nodeParent->child = nodeChild;
+
+          nodeChild = nodeParent;
+          nodeParent = nodeParent->parent;
+
+        } while (nodeChild !=
+                 m_Start);  // Start is always the first node by definition
+      }
+
+      // delete nodes that aren't needed for the solution
+      FreeUnusedNodes();
+
+      m_State = SEARCH_STATE_SUCCEEDED;
+
+      return m_State;
+    } else  // not goal
+    {
+      // We now need to generate the successors of this node
+      // The user helps us to do this, and we keep the new nodes in
+      // m_Successors ...
+
+      m_Successors.clear();  // empty vector of successor nodes to n
+
+      // User provides this functions and uses AddSuccessor to add each
+      // successor of node 'n' to m_Successors
+      bool ret = n->m_UserState.GetSuccessors(
+          this, n->parent ? &n->parent->m_UserState : NULL);
+
+      if (!ret) {
+        typename vector<Node*>::iterator successor;
+
+        // free the nodes that may previously have been added
+        for (successor = m_Successors.begin(); successor != m_Successors.end();
+             successor++) {
+          FreeNode((*successor));
+        }
+
+        m_Successors.clear();  // empty vector of successor nodes to n
+
+        // free up everything else we allocated
+        FreeNode((n));
+        FreeAllNodes();
+
+        m_State = SEARCH_STATE_OUT_OF_MEMORY;
+        return m_State;
+      }
+
+      // Now handle each successor to the current node ...
+      for (typename vector<Node*>::iterator successor = m_Successors.begin();
+           successor != m_Successors.end(); successor++) {
+        // 	The g value for this successor ...
+        float newg = n->g + n->m_UserState.GetCost((*successor)->m_UserState);
+
+        // Now we need to find whether the node is on the open or closed lists
+        // If it is but the node that is already on them is better (lower g)
+        // then we can forget about this successor
+
+        // First linear search of open list to find node
+
+        typename vector<Node*>::iterator openlist_result;
+
+        for (openlist_result = m_OpenList.begin();
+             openlist_result != m_OpenList.end(); openlist_result++) {
+          if ((*openlist_result)
+                  ->m_UserState.IsSameState((*successor)->m_UserState)) {
+            break;
+          }
+        }
+
+        if (openlist_result != m_OpenList.end()) {
+          // we found this state on open
+
+          if ((*openlist_result)->g <= newg) {
+            FreeNode((*successor));
+
+            // the one on Open is cheaper than this one
+            continue;
+          }
+        }
+        typename unordered_set<Node*, NodeHash, NodeEqual>::iterator
+            closedlist_result;
+
+        closedlist_result = m_ClosedList.find(*successor);
+
+        if (closedlist_result != m_ClosedList.end()) {
+          // we found this state on closed
+
+          if ((*closedlist_result)->g <= newg) {
+            // the one on Closed is cheaper than this one
+            FreeNode((*successor));
+
+            continue;
+          }
+        }
+
+        // This node is the best node so far with this particular state
+        // so lets keep it and set up its AStar specific data ...
+
+        (*successor)->parent = n;
+        (*successor)->g = newg;
+        (*successor)->h =
+            (*successor)->m_UserState.GoalDistanceEstimate(m_Goal->m_UserState);
+        (*successor)->f = (*successor)->g + (*successor)->h;
+
+        // Successor in closed list
+        // 1 - Update old version of this node in closed list
+        // 2 - Move it from closed to open list
+        // 3 - Sort heap again in open list
+
+        if (closedlist_result != m_ClosedList.end()) {
+          // Update closed node with successor node AStar data
+          //*(*closedlist_result) = *(*successor);
+          (*closedlist_result)->parent = (*successor)->parent;
+          (*closedlist_result)->g = (*successor)->g;
+          (*closedlist_result)->h = (*successor)->h;
+          (*closedlist_result)->f = (*successor)->f;
+
+          // Free successor node
+          FreeNode((*successor));
+
+          // Push closed node into open list
+          m_OpenList.push_back((*closedlist_result));
+
+          // Remove closed node from closed list
+          m_ClosedList.erase(closedlist_result);
+
+          // Sort back element into heap
+          push_heap(m_OpenList.begin(), m_OpenList.end(), HeapCompare_f());
+
+          // Fix thanks to ...
+          // Greg Douglas <gregdouglasmail@gmail.com>
+          // who noticed that this code path was incorrect
+          // Here we have found a new state which is already CLOSED
+
+        }
+
+        // Successor in open list
+        // 1 - Update old version of this node in open list
+        // 2 - sort heap again in open list
+
+        else if (openlist_result != m_OpenList.end()) {
+          // Update open node with successor node AStar data
+          //*(*openlist_result) = *(*successor);
+          (*openlist_result)->parent = (*successor)->parent;
+          (*openlist_result)->g = (*successor)->g;
+          (*openlist_result)->h = (*successor)->h;
+          (*openlist_result)->f = (*successor)->f;
+
+          // Free successor node
+          FreeNode((*successor));
+
+          // re-make the heap
+          // make_heap rather than sort_heap is an essential bug fix
+          // thanks to Mike Ryynanen for pointing this out and then explaining
+          // it in detail. sort_heap called on an invalid heap does not work
+          make_heap(m_OpenList.begin(), m_OpenList.end(), HeapCompare_f());
+        }
+
+        // New successor
+        // 1 - Move it from successors to open list
+        // 2 - sort heap again in open list
+
+        else {
+          // Push successor node into open list
+          m_OpenList.push_back((*successor));
+
+          // Sort back element into heap
+          push_heap(m_OpenList.begin(), m_OpenList.end(), HeapCompare_f());
+        }
+      }
+
+      // push n onto Closed, as we have expanded it now
+
+      m_ClosedList.insert(n);
+
+    }  // end else (not goal so expand)
+
+    return m_State;  // Succeeded bool is false at this point.
+  }
+
+  // User calls this to add a successor to a list of successors
+  // when expanding the search frontier
+  bool AddSuccessor(UserState& State) {
+    Node* node = AllocateNode();
+
+    if (node) {
+      node->m_UserState = State;
+
+      m_Successors.push_back(node);
+
+      return true;
+    }
+
+    return false;
+  }
+
+  // Free the solution nodes
+  // This is done to clean up all used Node memory when you are done with the
+  // search
+  void FreeSolutionNodes() {
+    Node* n = m_Start;
+
+    if (m_Start->child) {
+      do {
+        Node* del = n;
+        n = n->child;
+        FreeNode(del);
+
+        del = NULL;
+
+      } while (n != m_Goal);
+
+      FreeNode(n);  // Delete the goal
+
+    } else {
+      // if the start node is the solution we need to just delete the start and
+      // goal nodes
+      FreeNode(m_Start);
+      FreeNode(m_Goal);
+    }
+  }
+
+  // Functions for traversing the solution
+
+  // Get start node
+  UserState* GetSolutionStart() {
+    m_CurrentSolutionNode = m_Start;
+    if (m_Start) {
+      return &m_Start->m_UserState;
+    } else {
+      return NULL;
+    }
+  }
+
+  // Get next node
+  UserState* GetSolutionNext() {
+    if (m_CurrentSolutionNode) {
+      if (m_CurrentSolutionNode->child) {
+        Node* child = m_CurrentSolutionNode->child;
+
+        m_CurrentSolutionNode = m_CurrentSolutionNode->child;
+
+        return &child->m_UserState;
+      }
+    }
+
+    return NULL;
+  }
+
+  // Get end node
+  UserState* GetSolutionEnd() {
+    m_CurrentSolutionNode = m_Goal;
+    if (m_Goal) {
+      return &m_Goal->m_UserState;
+    } else {
+      return NULL;
+    }
+  }
+
+  // Step solution iterator backwards
+  UserState* GetSolutionPrev() {
+    if (m_CurrentSolutionNode) {
+      if (m_CurrentSolutionNode->parent) {
+        Node* parent = m_CurrentSolutionNode->parent;
+
+        m_CurrentSolutionNode = m_CurrentSolutionNode->parent;
+
+        return &parent->m_UserState;
+      }
+    }
+
+    return NULL;
+  }
+
+  // Get final cost of solution
+  // Returns FLT_MAX if goal is not defined or there is no solution
+  float GetSolutionCost() {
+    if (m_Goal && m_State == SEARCH_STATE_SUCCEEDED) {
+      return m_Goal->g;
+    } else {
+      return FLT_MAX;
+    }
+  }
+
+  // For educational use and debugging it is useful to be able to view
+  // the open and closed list at each step, here are two functions to allow
+  // that.
+
+  UserState* GetOpenListStart() {
+    float f, g, h;
+    return GetOpenListStart(f, g, h);
+  }
+
+  UserState* GetOpenListStart(float& f, float& g, float& h) {
+    iterDbgOpen = m_OpenList.begin();
+    if (iterDbgOpen != m_OpenList.end()) {
+      f = (*iterDbgOpen)->f;
+      g = (*iterDbgOpen)->g;
+      h = (*iterDbgOpen)->h;
+      return &(*iterDbgOpen)->m_UserState;
+    }
+
+    return NULL;
+  }
+
+  UserState* GetOpenListNext() {
+    float f, g, h;
+    return GetOpenListNext(f, g, h);
+  }
+
+  UserState* GetOpenListNext(float& f, float& g, float& h) {
+    iterDbgOpen++;
+    if (iterDbgOpen != m_OpenList.end()) {
+      f = (*iterDbgOpen)->f;
+      g = (*iterDbgOpen)->g;
+      h = (*iterDbgOpen)->h;
+      return &(*iterDbgOpen)->m_UserState;
+    }
+
+    return NULL;
+  }
+
+  UserState* GetClosedListStart() {
+    float f, g, h;
+    return GetClosedListStart(f, g, h);
+  }
+
+  UserState* GetClosedListStart(float& f, float& g, float& h) {
+    iterDbgClosed = m_ClosedList.begin();
+    if (iterDbgClosed != m_ClosedList.end()) {
+      f = (*iterDbgClosed)->f;
+      g = (*iterDbgClosed)->g;
+      h = (*iterDbgClosed)->h;
+
+      return &(*iterDbgClosed)->m_UserState;
+    }
+
+    return NULL;
+  }
+
+  UserState* GetClosedListNext() {
+    float f, g, h;
+    return GetClosedListNext(f, g, h);
+  }
+
+  UserState* GetClosedListNext(float& f, float& g, float& h) {
+    iterDbgClosed++;
+    if (iterDbgClosed != m_ClosedList.end()) {
+      f = (*iterDbgClosed)->f;
+      g = (*iterDbgClosed)->g;
+      h = (*iterDbgClosed)->h;
+
+      return &(*iterDbgClosed)->m_UserState;
+    }
+
+    return NULL;
+  }
+
+  // Get the number of steps
+
+  int GetStepCount() { return m_Steps; }
+
+  void EnsureMemoryFreed() {
+#if USE_FSA_MEMORY
+    assert(m_AllocateNodeCount == 0);
+#endif
+  }
+
+ private:  // methods
+  // This is called when a search fails or is cancelled to free all used
+  // memory
+  void FreeAllNodes() {
+    // iterate open list and delete all nodes
+    typename vector<Node*>::iterator iterOpen = m_OpenList.begin();
+
+    while (iterOpen != m_OpenList.end()) {
+      Node* n = (*iterOpen);
+      FreeNode(n);
+
+      iterOpen++;
+    }
+
+    m_OpenList.clear();
+
+    // iterate closed list and delete unused nodes
+    typename unordered_set<Node*, NodeHash, NodeEqual>::iterator iterClosed;
+
+    for (iterClosed = m_ClosedList.begin(); iterClosed != m_ClosedList.end();
+         iterClosed++) {
+      Node* n = (*iterClosed);
+      FreeNode(n);
+    }
+
+    m_ClosedList.clear();
+
+    // delete the goal
+
+    FreeNode(m_Goal);
+  }
+
+  // This call is made by the search class when the search ends. A lot of nodes
+  // may be created that are still present when the search ends. They will be
+  // deleted by this routine once the search ends
+  void FreeUnusedNodes() {
+    // iterate open list and delete unused nodes
+    typename vector<Node*>::iterator iterOpen = m_OpenList.begin();
+
+    while (iterOpen != m_OpenList.end()) {
+      Node* n = (*iterOpen);
+
+      if (!n->child) {
+        FreeNode(n);
+
+        n = NULL;
+      }
+
+      iterOpen++;
+    }
+
+    m_OpenList.clear();
+
+    // iterate closed list and delete unused nodes
+    typename unordered_set<Node*, NodeHash, NodeEqual>::iterator iterClosed;
+
+    for (iterClosed = m_ClosedList.begin(); iterClosed != m_ClosedList.end();
+         iterClosed++) {
+      Node* n = (*iterClosed);
+
+      if (!n->child) {
+        FreeNode(n);
+        n = NULL;
+      }
+    }
+
+    m_ClosedList.clear();
+  }
+
+  // Node memory management
+  Node* AllocateNode() {
+#if !USE_FSA_MEMORY
+    m_AllocateNodeCount++;
+    Node* p = new Node;
+    return p;
+#else
+    Node* address = m_FixedSizeAllocator.alloc();
+
+    if (!address) {
+      return NULL;
+    }
+    m_AllocateNodeCount++;
+    Node* p = new (address) Node;
+    return p;
+#endif
+  }
+
+  void FreeNode(Node* node) {
+    m_AllocateNodeCount--;
+
+#if !USE_FSA_MEMORY
+    delete node;
+#else
+    node->~Node();
+    m_FixedSizeAllocator.free(node);
+#endif
+  }
+
+ private:  // data
+  // Heap (simple vector but used as a heap, cf. Steve Rabin's game gems
+  // article)
+  vector<Node*> m_OpenList;
+
+  // Closed is an unordered_set
+  struct NodeHash {
+    size_t operator()(Node* const& n) const { return n->m_UserState.Hash(); }
+  };
+  struct NodeEqual {
+    bool operator()(Node* a, Node* b) const {
+      return a->m_UserState.IsSameState(b->m_UserState);
+    }
+  };
+  unordered_set<Node*, NodeHash, NodeEqual> m_ClosedList;
+
+  // Successors is a vector filled out by the user each type successors to a
+  // node are generated
+  vector<Node*> m_Successors;
+
+  // State
+  unsigned int m_State;
+
+  // Counts steps
+  int m_Steps;
+
+  // Start and goal state pointers
+  Node* m_Start;
+  Node* m_Goal;
+
+  Node* m_CurrentSolutionNode;
+
+#if USE_FSA_MEMORY
+  // Memory
+  FixedSizeAllocator<Node> m_FixedSizeAllocator;
+#endif
+
+  // Debug : need to keep these two iterators around
+  //  for the user Dbg functions
+  typename vector<Node*>::iterator iterDbgOpen;
+  typename vector<Node*>::iterator iterDbgClosed;
+
+  // debugging : count memory allocation and free's
+  int m_AllocateNodeCount;
+
+  bool m_CancelRequest;
+};
+
+template <class T>
+class AStarState {
+ public:
+  virtual ~AStarState() {}
+  virtual float GoalDistanceEstimate(
+      T& nodeGoal) = 0;  // Heuristic function which computes the estimated cost
+                         // to the goal node
+  virtual bool IsGoal(
+      T& nodeGoal) = 0;  // Returns true if this node is the goal node
+  virtual bool GetSuccessors(
+      AStarSearch<T>* astarsearch,
+      T* parent_node) = 0;  // Retrieves all successors to this node and adds
+                            // them via astarsearch.addSuccessor()
+  virtual float GetCost(
+      T& successor) = 0;  // Computes the cost of travelling from this node to
+                          // the successor node
+  virtual bool IsSameState(
+      T& rhs) = 0;  // Returns true if this node is the same as the rhs node
+  virtual size_t Hash() = 0;  // Returns a hash for the state
+};
+
+}  // namespace std
+
+#endif
diff --git a/third_party/astar_stl/fsa.h b/third_party/astar_stl/fsa.h
new file mode 100644
index 00000000..5803f88e
--- /dev/null
+++ b/third_party/astar_stl/fsa.h
@@ -0,0 +1,211 @@
+/*
+
+A* Algorithm Implementation using STL is
+Copyright (C)2001-2005 Justin Heyes-Jones
+
+Permission is given by the author to freely redistribute and
+include this code in any program as long as this credit is
+given where due.
+
+  COVERED CODE IS PROVIDED UNDER THIS LICENSE ON AN "AS IS" BASIS,
+  WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED,
+  INCLUDING, WITHOUT LIMITATION, WARRANTIES THAT THE COVERED CODE
+  IS FREE OF DEFECTS, MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE
+  OR NON-INFRINGING. THE ENTIRE RISK AS TO THE QUALITY AND
+  PERFORMANCE OF THE COVERED CODE IS WITH YOU. SHOULD ANY COVERED
+  CODE PROVE DEFECTIVE IN ANY RESPECT, YOU (NOT THE INITIAL
+  DEVELOPER OR ANY OTHER CONTRIBUTOR) ASSUME THE COST OF ANY
+  NECESSARY SERVICING, REPAIR OR CORRECTION. THIS DISCLAIMER OF
+  WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS LICENSE. NO USE
+  OF ANY COVERED CODE IS AUTHORIZED HEREUNDER EXCEPT UNDER
+  THIS DISCLAIMER.
+
+  Use at your own risk!
+
+
+
+  FixedSizeAllocator class
+  Copyright 2001 Justin Heyes-Jones
+
+  This class is a constant time O(1) memory manager for objects of
+  a specified type. The type is specified using a template class.
+
+  Memory is allocated from a fixed size buffer which you can specify in the
+  class constructor or use the default.
+
+  Using GetFirst and GetNext it is possible to iterate through the elements
+  one by one, and this would be the most common use for the class.
+
+  I would suggest using this class when you want O(1) add and delete
+  and you don't do much searching, which would be O(n). Structures such as
+binary trees can be used instead to get O(logn) access time.
+
+*/
+
+#ifndef FSA_H
+#define FSA_H
+
+#include <stdio.h>
+#include <string.h>
+
+template <class USER_TYPE>
+class FixedSizeAllocator {
+ public:
+  // Constants
+  enum { FSA_DEFAULT_SIZE = 100 };
+
+  // This class enables us to transparently manage the extra data
+  // needed to enable the user class to form part of the double-linked
+  // list class
+  struct FSA_ELEMENT {
+    USER_TYPE UserType;
+
+    FSA_ELEMENT* pPrev;
+    FSA_ELEMENT* pNext;
+  };
+
+ public:  // methods
+  FixedSizeAllocator(unsigned int MaxElements = FSA_DEFAULT_SIZE)
+      : m_pFirstUsed(NULL), m_MaxElements(MaxElements) {
+    // Allocate enough memory for the maximum number of elements
+
+    char* pMem = new char[m_MaxElements * sizeof(FSA_ELEMENT)];
+
+    m_pMemory = (FSA_ELEMENT*)pMem;
+
+    // Set the free list first pointer
+    m_pFirstFree = m_pMemory;
+
+    // Clear the memory
+    memset(m_pMemory, 0, sizeof(FSA_ELEMENT) * m_MaxElements);
+
+    // Point at first element
+    FSA_ELEMENT* pElement = m_pFirstFree;
+
+    // Set the double linked free list
+    for (unsigned int i = 0; i < m_MaxElements; i++) {
+      pElement->pPrev = pElement - 1;
+      pElement->pNext = pElement + 1;
+
+      pElement++;
+    }
+
+    // first element should have a null prev
+    m_pFirstFree->pPrev = NULL;
+    // last element should have a null next
+    (pElement - 1)->pNext = NULL;
+  }
+
+  ~FixedSizeAllocator() {
+    // Free up the memory
+    delete[] (char*)m_pMemory;
+  }
+
+  // Allocate a new USER_TYPE and return a pointer to it
+  USER_TYPE* alloc() {
+    FSA_ELEMENT* pNewNode = NULL;
+
+    if (!m_pFirstFree) {
+      return NULL;
+    } else {
+      pNewNode = m_pFirstFree;
+      m_pFirstFree = pNewNode->pNext;
+
+      // if the new node points to another free node then
+      // change that nodes prev free pointer...
+      if (pNewNode->pNext) {
+        pNewNode->pNext->pPrev = NULL;
+      }
+
+      // node is now on the used list
+
+      pNewNode->pPrev = NULL;  // the allocated node is always first in the list
+
+      if (m_pFirstUsed == NULL) {
+        pNewNode->pNext = NULL;  // no other nodes
+      } else {
+        m_pFirstUsed->pPrev =
+            pNewNode;  // insert this at the head of the used list
+        pNewNode->pNext = m_pFirstUsed;
+      }
+
+      m_pFirstUsed = pNewNode;
+    }
+
+    return reinterpret_cast<USER_TYPE*>(pNewNode);
+  }
+
+  // Free the given user type
+  // For efficiency I don't check whether the user_data is a valid
+  // pointer that was allocated. I may add some debug only checking
+  // (To add the debug check you'd need to make sure the pointer is in
+  // the m_pMemory area and is pointing at the start of a node)
+  void free(USER_TYPE* user_data) {
+    FSA_ELEMENT* pNode = reinterpret_cast<FSA_ELEMENT*>(user_data);
+
+    // manage used list, remove this node from it
+    if (pNode->pPrev) {
+      pNode->pPrev->pNext = pNode->pNext;
+    } else {
+      // this handles the case that we delete the first node in the used list
+      m_pFirstUsed = pNode->pNext;
+    }
+
+    if (pNode->pNext) {
+      pNode->pNext->pPrev = pNode->pPrev;
+    }
+
+    // add to free list
+    if (m_pFirstFree == NULL) {
+      // free list was empty
+      m_pFirstFree = pNode;
+      pNode->pPrev = NULL;
+      pNode->pNext = NULL;
+    } else {
+      // Add this node at the start of the free list
+      m_pFirstFree->pPrev = pNode;
+      pNode->pNext = m_pFirstFree;
+      m_pFirstFree = pNode;
+    }
+  }
+
+  // For debugging this displays both lists (using the prev/next list pointers)
+  void Debug() {
+    printf("free list ");
+
+    FSA_ELEMENT* p = m_pFirstFree;
+    while (p) {
+      printf("%x!%x ", p->pPrev, p->pNext);
+      p = p->pNext;
+    }
+    printf("\n");
+
+    printf("used list ");
+
+    p = m_pFirstUsed;
+    while (p) {
+      printf("%x!%x ", p->pPrev, p->pNext);
+      p = p->pNext;
+    }
+    printf("\n");
+  }
+
+  // Iterators
+
+  USER_TYPE* GetFirst() { return reinterpret_cast<USER_TYPE*>(m_pFirstUsed); }
+
+  USER_TYPE* GetNext(USER_TYPE* node) {
+    return reinterpret_cast<USER_TYPE*>(
+        (reinterpret_cast<FSA_ELEMENT*>(node))->pNext);
+  }
+
+ public:   // data
+ private:  // methods
+ private:  // data
+  FSA_ELEMENT* m_pFirstFree;
+  FSA_ELEMENT* m_pFirstUsed;
+  unsigned int m_MaxElements;
+  FSA_ELEMENT* m_pMemory;
+};
+
+#endif  // defined FSA_H
\ No newline at end of file

From 4f2db77b1741e35b80bdeac0488f26c979843387 Mon Sep 17 00:00:00 2001
From: Mohammad Taufeeque <9taufeeque9@gmail.com>
Date: Tue, 19 Mar 2024 02:34:32 +0530
Subject: [PATCH 33/60] only add files in the levels_dir (#6)

Fixes the issue of having any directory (e.g: logs directory) within the
levels directory:
https://github.com/AlignmentResearch/learned-planners/issues/54
---
 envpool/sokoban/BUILD                 |  15 ---
 envpool/sokoban/level_loader.cc       |   4 +-
 envpool/sokoban/sokoban_astar_test.cc | 144 --------------------------
 3 files changed, 3 insertions(+), 160 deletions(-)
 delete mode 100644 envpool/sokoban/sokoban_astar_test.cc

diff --git a/envpool/sokoban/BUILD b/envpool/sokoban/BUILD
index 8938ad53..1d739971 100644
--- a/envpool/sokoban/BUILD
+++ b/envpool/sokoban/BUILD
@@ -68,21 +68,6 @@ cc_binary(
     ],
 )
 
-cc_test(
-    name = "sokoban_astar_test",
-    size = "enormous",
-    srcs = [
-        "level_loader.cc",
-        "sokoban_astar_test.cc",
-        "sokoban_node.cc",
-    ],
-    deps = [
-        ":sokoban_node_h",
-        "@com_github_google_glog//:glog",
-        "@com_google_googletest//:gtest_main",
-    ],
-)
-
 py_test(
     name = "test",
     srcs = ["sokoban_py_envpool_test.py"],
diff --git a/envpool/sokoban/level_loader.cc b/envpool/sokoban/level_loader.cc
index 9d0a9863..2f7952f8 100644
--- a/envpool/sokoban/level_loader.cc
+++ b/envpool/sokoban/level_loader.cc
@@ -41,7 +41,9 @@ LevelLoader::LevelLoader(const std::filesystem::path& base_path,
     level_file_paths_.push_back(base_path);
   } else {
     for (const auto& entry : std::filesystem::directory_iterator(base_path)) {
-      level_file_paths_.push_back(entry.path());
+      if (entry.is_regular_file()) {
+        level_file_paths_.push_back(entry.path());
+      }
     }
   }
   cur_file_ = level_file_paths_.begin();
diff --git a/envpool/sokoban/sokoban_astar_test.cc b/envpool/sokoban/sokoban_astar_test.cc
deleted file mode 100644
index 2b0bafd6..00000000
--- a/envpool/sokoban/sokoban_astar_test.cc
+++ /dev/null
@@ -1,144 +0,0 @@
-// Copyright 2023-2024 FAR AI
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include <glog/logging.h>
-#include <gtest/gtest.h>
-
-#include "envpool/sokoban/sokoban_node.h"
-
-#define DEBUG_LISTS 0
-#define DEBUG_LIST_LENGTHS_ONLY 0
-
-namespace sokoban {
-TEST(SokobanAStarTest, Basic) {
-  std::cout << "STL A* Search implementation\n(C)2001 Justin Heyes-Jones\n";
-
-  // Create an instance of the search class...
-  std::AStarSearch<SokobanNode> astarsearch(1000000);
-  std::vector<int> verify_steps = {38, 19};
-  std::vector<int> verify_search_steps = {63408, 24991};
-
-  unsigned int search_count = 0;
-  const unsigned int num_searches = 2;
-  const std::string level_file = "/app/envpool/sokoban/sample_levels/";
-  const int dim_room = 10;
-  LevelLoader level_loader(level_file, false, 2);
-  std::mt19937 gen(42);
-
-  while (search_count < num_searches) {
-    // Create a start state
-    SokobanLevel level = *level_loader.GetLevel(gen);
-
-    SokobanNode node_start(dim_room, level, false);
-    SokobanNode node_end(dim_room, level, true);
-    std::vector<std::pair<int, int>>* goals = &node_end.boxes;
-    node_start.PrintNodeInfo(goals);
-    astarsearch.SetStartAndGoalStates(node_start, node_end);
-
-    unsigned int search_state;
-    unsigned int search_steps = 0;
-
-    do {
-      search_state = astarsearch.SearchStep();
-
-      search_steps++;
-
-#if DEBUG_LISTS
-
-      std::cout << "Steps:" << search_steps << "\n";
-
-      int len = 0;
-
-      std::cout << "Open:\n";
-      SokobanNode* p = astarsearch.GetOpenListStart();
-      while (p) {
-        len++;
-#if !DEBUG_LIST_LENGTHS_ONLY
-        ((SokobanNode*)p)->PrintNodeInfo(goals);
-#endif
-        p = astarsearch.GetOpenListNext();
-      }
-
-      std::cout << "Open list has " << len << " nodes\n";
-
-      len = 0;
-
-      std::cout << "Closed:\n";
-      p = astarsearch.GetClosedListStart();
-      while (p) {
-        len++;
-#if !DEBUG_LIST_LENGTHS_ONLY
-        p->PrintNodeInfo(goals);
-#endif
-        p = astarsearch.GetClosedListNext();
-      }
-
-      std::cout << "Closed list has " << len << " nodes\n";
-#endif
-    } while (search_state ==
-             std::AStarSearch<SokobanNode>::SEARCH_STATE_SEARCHING);
-
-    if (search_state == std::AStarSearch<SokobanNode>::SEARCH_STATE_SUCCEEDED) {
-      std::cout << "Search found goal state\n";
-
-      SokobanNode* node = astarsearch.GetSolutionStart();
-
-      int steps = 0;
-
-      node->PrintNodeInfo(goals);
-      for (;;) {
-        node = astarsearch.GetSolutionNext();
-
-        if (node == nullptr) {
-          break;
-        }
-        std::cout << "Step " << steps << std::endl;
-        node->PrintNodeInfo(goals);
-        steps++;
-      }
-      std::cout << "Solution steps " << steps << std::endl;
-      EXPECT_EQ(steps, verify_steps.at(search_count));
-
-      // Once you're done with the solution you can free the nodes up
-      astarsearch.FreeSolutionNodes();
-
-    } else if (search_state ==
-               std::AStarSearch<SokobanNode>::SEARCH_STATE_FAILED) {
-      std::cout << "Search terminated. Did not find goal state\n";
-    } else if (search_state ==
-               std::AStarSearch<SokobanNode>::SEARCH_STATE_NOT_INITIALISED) {
-      std::cout << "SEARCH_STATE_NOT_INITIALISED\n";
-    } else if (search_state ==
-               std::AStarSearch<SokobanNode>::SEARCH_STATE_SEARCHING) {
-      std::cout << "SEARCH_STATE_SEARCHING\n";
-    } else if (search_state ==
-               std::AStarSearch<SokobanNode>::SEARCH_STATE_OUT_OF_MEMORY) {
-      std::cout << "SEARCH_STATE_OUT_OF_MEMORY\n";
-    } else if (search_state ==
-               std::AStarSearch<SokobanNode>::SEARCH_STATE_INVALID) {
-      std::cout << "SEARCH_STATE_INVALID\n";
-    }
-
-    // Display the number of loops the search went through
-    std::cout << "search_steps : " << search_steps << "\n";
-    EXPECT_EQ(search_state,
-              std::AStarSearch<SokobanNode>::SEARCH_STATE_SUCCEEDED);
-    EXPECT_EQ(search_steps, verify_search_steps.at(search_count));
-
-    search_count++;
-
-    astarsearch.EnsureMemoryFreed();
-  }
-}
-}  // namespace sokoban

From e3e2e7ea9d8261c613a879ef470e0f4d4aefaae4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= <adria@far.ai>
Date: Fri, 22 Mar 2024 11:40:30 -0700
Subject: [PATCH 34/60] Upgrade pytorch and cuda

---
 .circleci/config.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 9d5399e2..2acb62b2 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -9,7 +9,7 @@ parameters:
   docker_img_version:
     # Docker image version for running tests.
     type: string
-    default: "8f41d1e-envpool-ci"
+    default: "8d8cf1a-envpool-ci"
 
 workflows:
   test-jobs:

From ed640fdfc391e529681c70f21d162409bcfaffdf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= <adria@far.ai>
Date: Fri, 22 Mar 2024 17:00:11 -0700
Subject: [PATCH 35/60] Clang-format changed

---
 envpool/classic_control/pendulum.h | 11 ++++++-----
 envpool/core/env_spec.h            |  4 ++--
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/envpool/classic_control/pendulum.h b/envpool/classic_control/pendulum.h
index 85e91c1a..f2a594ad 100644
--- a/envpool/classic_control/pendulum.h
+++ b/envpool/classic_control/pendulum.h
@@ -77,8 +77,9 @@ class PendulumEnv : public Env<PendulumEnvSpec> {
   void Step(const Action& action) override {
     done_ = (++elapsed_step_ >= max_episode_steps_);
     float act = action["action"_];
-    double u =
-        act < -kMaxTorque ? -kMaxTorque : act > kMaxTorque ? kMaxTorque : act;
+    double u = act < -kMaxTorque  ? -kMaxTorque
+               : act > kMaxTorque ? kMaxTorque
+                                  : act;
     double cost =
         theta_ * theta_ + 0.1 * theta_dot_ * theta_dot_ + 0.001 * u * u;
     double new_theta_dot =
@@ -86,9 +87,9 @@ class PendulumEnv : public Env<PendulumEnvSpec> {
     if (version_ == 0) {
       theta_ += new_theta_dot * kDt;
     }
-    theta_dot_ = new_theta_dot < -kMaxSpeed
-                     ? -kMaxSpeed
-                     : new_theta_dot > kMaxSpeed ? kMaxSpeed : new_theta_dot;
+    theta_dot_ = new_theta_dot < -kMaxSpeed  ? -kMaxSpeed
+                 : new_theta_dot > kMaxSpeed ? kMaxSpeed
+                                             : new_theta_dot;
     if (version_ == 1) {
       theta_ += new_theta_dot * kDt;
     }
diff --git a/envpool/core/env_spec.h b/envpool/core/env_spec.h
index c3cc7f69..f59e1fb2 100644
--- a/envpool/core/env_spec.h
+++ b/envpool/core/env_spec.h
@@ -52,8 +52,8 @@ class EnvSpec {
   using Config = decltype(ConcatDict(common_config, EnvFns::DefaultConfig()));
   using ConfigKeys = typename Config::Keys;
   using ConfigValues = typename Config::Values;
-  using StateSpec = decltype(
-      ConcatDict(common_state_spec, EnvFns::StateSpec(std::declval<Config>())));
+  using StateSpec = decltype(ConcatDict(
+      common_state_spec, EnvFns::StateSpec(std::declval<Config>())));
   using ActionSpec = decltype(ConcatDict(
       common_action_spec, EnvFns::ActionSpec(std::declval<Config>())));
   using StateKeys = typename StateSpec::Keys;

From 918655157cba9cf22b3e4fd2766e95a8d33bfe2f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= <adria@far.ai>
Date: Fri, 22 Mar 2024 17:15:54 -0700
Subject: [PATCH 36/60] Make fixed initializers the default ones

---
 envpool/sokoban/level_loader.cc | 3 ---
 envpool/sokoban/level_loader.h  | 6 +++---
 2 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/envpool/sokoban/level_loader.cc b/envpool/sokoban/level_loader.cc
index 2f7952f8..0689cfa2 100644
--- a/envpool/sokoban/level_loader.cc
+++ b/envpool/sokoban/level_loader.cc
@@ -32,10 +32,7 @@ LevelLoader::LevelLoader(const std::filesystem::path& base_path,
                          int verbose)
     : load_sequentially_(load_sequentially),
       n_levels_to_load_(n_levels_to_load),
-      levels_loaded_(0),
-      levels_(0),
       cur_level_(levels_.begin()),
-      level_file_paths_(0),
       verbose(verbose) {
   if (std::filesystem::is_regular_file(base_path)) {
     level_file_paths_.push_back(base_path);
diff --git a/envpool/sokoban/level_loader.h b/envpool/sokoban/level_loader.h
index 9879d8df..ced5e60a 100644
--- a/envpool/sokoban/level_loader.h
+++ b/envpool/sokoban/level_loader.h
@@ -38,10 +38,10 @@ class LevelLoader {
  protected:
   bool load_sequentially_;
   int n_levels_to_load_;
-  int levels_loaded_;
-  std::vector<SokobanLevel> levels_;
+  int levels_loaded_{0};
+  std::vector<SokobanLevel> levels_{0};
   std::vector<SokobanLevel>::iterator cur_level_;
-  std::vector<std::filesystem::path> level_file_paths_;
+  std::vector<std::filesystem::path> level_file_paths_{0};
   std::vector<std::filesystem::path>::iterator cur_file_;
   void LoadFile(std::mt19937& gen);
 

From 7562370e0eceb53607438e15a9064de1eae6c19b Mon Sep 17 00:00:00 2001
From: Mohammad Taufeeque <9taufeeque9@gmail.com>
Date: Fri, 29 Mar 2024 11:29:45 +0530
Subject: [PATCH 37/60] Better heuristic function that improves runtime on
 difficult levels (#5)

- Check deadlock condition for when the box is stuck in a corner that's
not a goal
- Add a script `astar_log_level.cc` to solve a particular level in a
file. This should be used when logging levels across a small number of
files.
- `astar_log.cc` should still be used to log levels across a large
number of files.
---
 envpool/sokoban/BUILD                      |  12 +
 envpool/sokoban/astar_log.cc               |  53 ++--
 envpool/sokoban/astar_log_level.cc         | 141 +++++++++++
 envpool/sokoban/sample_levels/small.txt    |  23 ++
 envpool/sokoban/sokoban_node.cc            |  23 ++
 envpool/sokoban/sokoban_node.h             |   3 +-
 envpool/sokoban/sokoban_py_envpool_test.py | 274 +++++++++++----------
 7 files changed, 375 insertions(+), 154 deletions(-)
 create mode 100644 envpool/sokoban/astar_log_level.cc
 create mode 100644 envpool/sokoban/sample_levels/small.txt

diff --git a/envpool/sokoban/BUILD b/envpool/sokoban/BUILD
index 1d739971..b79ee42f 100644
--- a/envpool/sokoban/BUILD
+++ b/envpool/sokoban/BUILD
@@ -68,6 +68,18 @@ cc_binary(
     ],
 )
 
+cc_binary(
+    name = "astar_log_level",
+    srcs = [
+        "astar_log_level.cc",
+        "level_loader.cc",
+        "sokoban_node.cc",
+    ],
+    deps = [
+        ":sokoban_node_h",
+    ],
+)
+
 py_test(
     name = "test",
     srcs = ["sokoban_py_envpool_test.py"],
diff --git a/envpool/sokoban/astar_log.cc b/envpool/sokoban/astar_log.cc
index 55d93c28..976cc67a 100644
--- a/envpool/sokoban/astar_log.cc
+++ b/envpool/sokoban/astar_log.cc
@@ -33,11 +33,14 @@ void RunAStar(const std::string& level_file_name,
   std::ifstream log_file_in(log_file_name);
   // check if the file is empty
   if (log_file_in.peek() == std::ifstream::traits_type::eof()) {
-    log_file_out << "Level, Actions, Steps, SearchSteps" << std::endl;
+    log_file_out << "Level,Actions,Steps,SearchSteps" << std::endl;
   } else {  // skip levels that have already been run
     std::string line;
     std::getline(log_file_in, line);  // skip header
     while (std::getline(log_file_in, line)) {
+      if (line.empty()) {
+        continue;
+      }
       SokobanLevel level = *level_loader.GetLevel(gen);
       level_idx++;
     }
@@ -63,9 +66,12 @@ void RunAStar(const std::string& level_file_name,
 
     if (search_state == std::AStarSearch<SokobanNode>::SEARCH_STATE_SUCCEEDED) {
       std::stringstream loglinestream;
-      loglinestream << level_idx << ", ";
-      astarsearch.GetSolutionStart();
+      loglinestream << level_idx << ",";
+      SokobanNode* node = astarsearch.GetSolutionStart();
       int steps = 0;
+      int prev_x = node->player_x;
+      int prev_y = node->player_y;
+      bool correct_solution = true;
       for (;;) {
         SokobanNode* node = astarsearch.GetSolutionNext();
         if (node == nullptr) {
@@ -75,37 +81,50 @@ void RunAStar(const std::string& level_file_name,
         assert(action >= 0 && action < 4);
         loglinestream << action;
         steps++;
+        int curr_x = node->player_x;
+        int curr_y = node->player_y;
+        int delta_x = node->kDelta.at(action).at(0);
+        int delta_y = node->kDelta.at(action).at(1);
+        if (curr_x != prev_x + delta_x || curr_y != prev_y + delta_y) {
+          correct_solution = false;
+        }
+        prev_x = curr_x;
+        prev_y = curr_y;
+      }
+      if (!correct_solution) {
+        loglinestream << ",INCORRECT_SOLUTION_FOUND," << search_steps
+                      << std::endl;
+      } else {
+        loglinestream << "," << steps << "," << search_steps << std::endl;
       }
-      loglinestream << ", " << steps << ", " << search_steps << std::endl;
       log_file_out << loglinestream.str();
       astarsearch.FreeSolutionNodes();
       astarsearch.EnsureMemoryFreed();
     } else if (search_state ==
                std::AStarSearch<SokobanNode>::SEARCH_STATE_FAILED) {
-      log_file_out << level_idx << ", "
-                   << "SEARCH_STATE_FAILED, -1, " << search_steps << std::endl;
+      log_file_out << level_idx << ","
+                   << "SEARCH_STATE_FAILED,-1," << search_steps << std::endl;
     } else if (search_state ==
                std::AStarSearch<SokobanNode>::SEARCH_STATE_NOT_INITIALISED) {
-      log_file_out << level_idx << ", "
-                   << "SEARCH_STATE_NOT_INITIALISED, -1, " << search_steps
+      log_file_out << level_idx << ","
+                   << "SEARCH_STATE_NOT_INITIALISED,-1," << search_steps
                    << std::endl;
     } else if (search_state ==
                std::AStarSearch<SokobanNode>::SEARCH_STATE_SEARCHING) {
-      log_file_out << level_idx << ", "
-                   << "SEARCH_STATE_SEARCHING, -1, " << search_steps
-                   << std::endl;
+      log_file_out << level_idx << ","
+                   << "SEARCH_STATE_SEARCHING,-1," << search_steps << std::endl;
     } else if (search_state ==
                std::AStarSearch<SokobanNode>::SEARCH_STATE_OUT_OF_MEMORY) {
-      log_file_out << level_idx << ", "
-                   << "SEARCH_STATE_OUT_OF_MEMORY, -1, " << search_steps
+      log_file_out << level_idx << ","
+                   << "SEARCH_STATE_OUT_OF_MEMORY,-1," << search_steps
                    << std::endl;
     } else if (search_state ==
                std::AStarSearch<SokobanNode>::SEARCH_STATE_INVALID) {
-      log_file_out << level_idx << ", "
-                   << "SEARCH_STATE_INVALID, -1, " << search_steps << std::endl;
+      log_file_out << level_idx << ","
+                   << "SEARCH_STATE_INVALID,-1," << search_steps << std::endl;
     } else {
-      log_file_out << level_idx << ", "
-                   << "UNKNOWN, -1, " << search_steps << std::endl;
+      log_file_out << level_idx << ","
+                   << "UNKNOWN,-1," << search_steps << std::endl;
     }
     log_file_out.flush();
     level_idx++;
diff --git a/envpool/sokoban/astar_log_level.cc b/envpool/sokoban/astar_log_level.cc
new file mode 100644
index 00000000..96c3802f
--- /dev/null
+++ b/envpool/sokoban/astar_log_level.cc
@@ -0,0 +1,141 @@
+// Copyright 2023-2024 FAR AI
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <fstream>
+#include <sstream>
+
+#include "envpool/sokoban/sokoban_node.h"
+
+namespace sokoban {
+
+void RunAStar(const std::string& level_file_name,
+              const std::string& log_file_name, int level_to_run = 0,
+              int fsa_limit = 1000000) {
+  std::cout << "Running A* on file " << level_file_name << " and logging to "
+            << log_file_name << " with fsa_limit " << fsa_limit << "on level "
+            << level_to_run << std::endl;
+  const int dim_room = 10;
+  int level_idx = 0;
+  LevelLoader level_loader(level_file_name, true, -1);
+  std::mt19937 gen(42);
+  std::string file_idx =
+      level_file_name.substr(level_file_name.find_last_of("/\\") + 1);
+  file_idx = file_idx.substr(0, file_idx.find('.'));
+
+  std::ofstream log_file_out(log_file_name, std::ios_base::app);
+
+  while (level_idx < level_to_run) {
+    level_loader.GetLevel(gen);
+    level_idx++;
+  }
+  std::AStarSearch<SokobanNode> astarsearch(fsa_limit);
+  std::cout << "Running level " << level_idx << std::endl;
+  SokobanLevel level = *level_loader.GetLevel(gen);
+
+  SokobanNode node_start(dim_room, level, false);
+  SokobanNode node_end(dim_room, level, true);
+  astarsearch.SetStartAndGoalStates(node_start, node_end);
+  unsigned int search_state;
+  unsigned int search_steps = 0;
+  std::cout << "Starting search" << std::endl;
+  do {
+    search_state = astarsearch.SearchStep();
+    search_steps++;
+  } while (search_state ==
+           std::AStarSearch<SokobanNode>::SEARCH_STATE_SEARCHING);
+
+  if (search_state == std::AStarSearch<SokobanNode>::SEARCH_STATE_SUCCEEDED) {
+    std::stringstream loglinestream;
+    loglinestream << file_idx << "," << level_idx << ",";
+    SokobanNode* node = astarsearch.GetSolutionStart();
+    int steps = 0;
+    int prev_x = node->player_x;
+    int prev_y = node->player_y;
+    bool correct_solution = true;
+    for (;;) {
+      SokobanNode* node = astarsearch.GetSolutionNext();
+      if (node == nullptr) {
+        break;
+      }
+      int action = node->action_from_parent;
+      assert(action >= 0 && action < 4);
+      loglinestream << action;
+      steps++;
+      int curr_x = node->player_x;
+      int curr_y = node->player_y;
+      int delta_x = node->kDelta.at(action).at(0);
+      int delta_y = node->kDelta.at(action).at(1);
+      if (curr_x != prev_x + delta_x || curr_y != prev_y + delta_y) {
+        correct_solution = false;
+      }
+      prev_x = curr_x;
+      prev_y = curr_y;
+    }
+    if (!correct_solution) {
+      loglinestream << ",INCORRECT_SOLUTION_FOUND," << search_steps
+                    << std::endl;
+    } else {
+      loglinestream << "," << steps << "," << search_steps << std::endl;
+    }
+    log_file_out << loglinestream.str();
+    astarsearch.FreeSolutionNodes();
+    astarsearch.EnsureMemoryFreed();
+  } else if (search_state ==
+             std::AStarSearch<SokobanNode>::SEARCH_STATE_FAILED) {
+    log_file_out << level_idx << ","
+                 << "SEARCH_STATE_FAILED,-1," << search_steps << std::endl;
+  } else if (search_state ==
+             std::AStarSearch<SokobanNode>::SEARCH_STATE_NOT_INITIALISED) {
+    log_file_out << level_idx << ","
+                 << "SEARCH_STATE_NOT_INITIALISED,-1," << search_steps
+                 << std::endl;
+  } else if (search_state ==
+             std::AStarSearch<SokobanNode>::SEARCH_STATE_SEARCHING) {
+    log_file_out << level_idx << ","
+                 << "SEARCH_STATE_SEARCHING,-1," << search_steps << std::endl;
+  } else if (search_state ==
+             std::AStarSearch<SokobanNode>::SEARCH_STATE_OUT_OF_MEMORY) {
+    log_file_out << level_idx << ","
+                 << "SEARCH_STATE_OUT_OF_MEMORY,-1," << search_steps
+                 << std::endl;
+  } else if (search_state ==
+             std::AStarSearch<SokobanNode>::SEARCH_STATE_INVALID) {
+    log_file_out << level_idx << ","
+                 << "SEARCH_STATE_INVALID,-1," << search_steps << std::endl;
+  } else {
+    log_file_out << level_idx << ","
+                 << "UNKNOWN,-1," << search_steps << std::endl;
+  }
+  log_file_out.flush();
+}
+}  // namespace sokoban
+
+int main(int argc, char** argv) {
+  int fsa_limit = 1000000;
+  if (argc < 4) {
+    std::cout << "Usage: " << argv[0]
+              << " level_file_name log_file_name level_to_run [fsa_limit]"
+              << std::endl;
+    return 1;
+  }
+  std::string level_file_name = argv[1];
+  std::string log_file_name = argv[2];
+  int level_to_run = std::stoi(argv[3]);
+  if (argc > 4) {
+    fsa_limit = std::stoi(argv[4]);
+  }
+
+  sokoban::RunAStar(level_file_name, log_file_name, level_to_run, fsa_limit);
+  return 0;
+}
diff --git a/envpool/sokoban/sample_levels/small.txt b/envpool/sokoban/sample_levels/small.txt
new file mode 100644
index 00000000..f0acb3b5
--- /dev/null
+++ b/envpool/sokoban/sample_levels/small.txt
@@ -0,0 +1,23 @@
+; 0
+##########
+#@$  .####
+#$.$  ####
+# $ ######
+#  #######
+#.########
+#.########
+##########
+##########
+##########
+
+; 1
+##########
+##########
+##########
+##########
+###  #####
+## $ . $ #
+##   $. @#
+##    $. #
+##.      #
+##########
diff --git a/envpool/sokoban/sokoban_node.cc b/envpool/sokoban/sokoban_node.cc
index f22065ca..7be85918 100644
--- a/envpool/sokoban/sokoban_node.cc
+++ b/envpool/sokoban/sokoban_node.cc
@@ -163,6 +163,10 @@ float SokobanNode::GoalDistanceEstimate(SokobanNode& goal_node) {
       min_distance = std::min(min_distance, distance);
     }
     h += min_distance;
+    bool contiguous_walls = CornerWalls(box);
+    if (contiguous_walls && min_distance != 0) {
+      h += 1000;
+    }
   }
   return h;
 }
@@ -184,4 +188,23 @@ bool SokobanNode::GetSuccessors(std::AStarSearch<SokobanNode>* astarsearch,
   return true;
 }
 
+bool SokobanNode::CornerWalls(const std::pair<int, int>& box) const {
+  bool found_wall = false;
+  bool found_contiguous_wall = false;
+  for (const auto& delta : kDelta) {
+    int new_x = box.first + delta.at(0);
+    int new_y = box.second + delta.at(1);
+    bool new_found_wall = CheckWall(new_x, new_y);
+    found_contiguous_wall =
+        found_contiguous_wall || (found_wall && new_found_wall);
+    found_wall = new_found_wall;
+  }
+  if (found_wall && !found_contiguous_wall) {
+    int new_x = box.first + kDelta.at(0).at(0);
+    int new_y = box.second + kDelta.at(0).at(1);
+    found_contiguous_wall = CheckWall(new_x, new_y);
+  }
+  return found_contiguous_wall;
+}
+
 }  // namespace sokoban
diff --git a/envpool/sokoban/sokoban_node.h b/envpool/sokoban/sokoban_node.h
index 3402c7c1..ef789ed2 100644
--- a/envpool/sokoban/sokoban_node.h
+++ b/envpool/sokoban/sokoban_node.h
@@ -27,7 +27,7 @@ namespace sokoban {
 class SokobanNode {
  public:
   static constexpr std::array<std::array<int, 2>, 4> kDelta = {
-      {{0, -1}, {0, 1}, {-1, 0}, {1, 0}}  // Up, Down, Left, Right
+      {{0, -1}, {1, 0}, {0, 1}, {-1, 0}}  // Up, Right, Down, Left
   };
   int dim_room{0};
   int player_x{0}, player_y{0};
@@ -116,6 +116,7 @@ class SokobanNode {
   [[nodiscard]] size_t Hash() const;
 
   void PrintNodeInfo(std::vector<std::pair<int, int>>* goals = nullptr);
+  [[nodiscard]] bool CornerWalls(const std::pair<int, int>& box) const;
 };
 }  // namespace sokoban
 
diff --git a/envpool/sokoban/sokoban_py_envpool_test.py b/envpool/sokoban/sokoban_py_envpool_test.py
index 4bb423f6..9eaf31d4 100644
--- a/envpool/sokoban/sokoban_py_envpool_test.py
+++ b/envpool/sokoban/sokoban_py_envpool_test.py
@@ -27,162 +27,164 @@
 from envpool.sokoban.sokoban_envpool import _SokobanEnvSpec
 
 
-class TestSokobanEnvPool:
-
-  def test_config(self) -> None:
-    ref_config_keys = [
-      # Default environment keys
-      "base_path",
-      "batch_size",
-      "gym_reset_return_info",
-      "max_num_players",
-      "num_envs",
-      "num_threads",
-      "seed",
-      "thread_affinity_offset",
-      "min_episode_steps",
-      # Default and also used by sokoban
-      "max_episode_steps",
-      # defined by sokoban
-      "dim_room",
-      "levels_dir",
-      "reward_box",
-      "reward_finished",
-      "reward_step",
-      "verbose",
-      "load_sequentially",
-      "n_levels_to_load",
-    ]
-    default_conf = _SokobanEnvSpec._default_config_values
-    assert isinstance(default_conf, tuple)
-    config_keys = _SokobanEnvSpec._config_keys
-    assert isinstance(config_keys, list)
-    assert len(default_conf) == len(config_keys)
-    assert sorted(config_keys) == sorted(ref_config_keys)
-
-  def test_envpool(self) -> None:
-    batch = num_envs = 200
+def test_config() -> None:
+  ref_config_keys = [
+    # Default environment keys
+    "base_path",
+    "batch_size",
+    "gym_reset_return_info",
+    "max_num_players",
+    "num_envs",
+    "num_threads",
+    "seed",
+    "thread_affinity_offset",
+    "min_episode_steps",
+    # Default and also used by sokoban
+    "max_episode_steps",
+    # defined by sokoban
+    "dim_room",
+    "levels_dir",
+    "reward_box",
+    "reward_finished",
+    "reward_step",
+    "verbose",
+    "load_sequentially",
+    "n_levels_to_load",
+  ]
+  default_conf = _SokobanEnvSpec._default_config_values
+  assert isinstance(default_conf, tuple)
+  config_keys = _SokobanEnvSpec._config_keys
+  assert isinstance(config_keys, list)
+  assert len(default_conf) == len(config_keys)
+  assert sorted(config_keys) == sorted(ref_config_keys)
+
+
+def test_envpool() -> None:
+  batch = num_envs = 200
+  env = envpool.make(
+    "Sokoban-v0",
+    env_type="gymnasium",
+    num_envs=num_envs,
+    batch_size=num_envs,
+    seed=2346890,
+    max_episode_steps=60,
+    reward_step=-0.1,
+    dim_room=10,
+    levels_dir="/app/envpool/sokoban/sample_levels",
+  )
+  total_steps = 1000
+
+  _ = env.reset()
+  t = time.time()
+  for _ in range(total_steps):
+    _ = env.step(np.random.randint(low=0, high=9, size=(num_envs,)))
+  duration = time.time() - t
+  fps = total_steps * batch / duration
+  print(f"FPS = {fps:.6f}")
+
+
+def test_envpool_max_episode_steps() -> None:
+  for max_episode_steps in [2, 5, 10]:
     env = envpool.make(
       "Sokoban-v0",
       env_type="gymnasium",
-      num_envs=num_envs,
-      batch_size=num_envs,
-      seed=2346890,
-      max_episode_steps=60,
-      reward_step=-0.1,
-      dim_room=10,
+      num_envs=1,
+      batch_size=1,
+      min_episode_steps=max_episode_steps,
+      max_episode_steps=max_episode_steps,
       levels_dir="/app/envpool/sokoban/sample_levels",
     )
-    total_steps = 1000
-
-    _ = env.reset()
-    t = time.time()
-    for _ in range(total_steps):
-      _ = env.step(np.random.randint(low=0, high=9, size=(num_envs,)))
-    duration = time.time() - t
-    fps = total_steps * batch / duration
-    print(f"FPS = {fps:.6f}")
-
-  def test_envpool_max_episode_steps(self) -> None:
-    for max_episode_steps in [2, 5, 10]:
-      env = envpool.make(
-        "Sokoban-v0",
-        env_type="gymnasium",
-        num_envs=1,
-        batch_size=1,
-        min_episode_steps=max_episode_steps,
-        max_episode_steps=max_episode_steps,
-        levels_dir="/app/envpool/sokoban/sample_levels",
-      )
-      env.reset()
-      for _ in range(max_episode_steps - 1):
-        _, _, terminated, truncated, _ = env.step(np.zeros([1], dtype=np.int32))
-        assert not np.any(terminated | truncated)
-
+    env.reset()
+    for _ in range(max_episode_steps - 1):
       _, _, terminated, truncated, _ = env.step(np.zeros([1], dtype=np.int32))
-      assert not np.any(terminated)
-      assert np.all(truncated)
-
-  def test_envpool_load_sequentially(self, capfd) -> None:
-    levels_dir = "/app/envpool/sokoban/sample_levels"
-    files = glob.glob(f"{levels_dir}/*.txt")
-    levels_by_files = []
-    for file in files:
-      with open(file, "r") as f:
-        text = f.read()
-      levels = text.split("\n;")
-      levels = ["\n".join(level.split("\n")[1:]).strip() for level in levels]
-      levels_by_files.append((file, levels))
-    assert len(levels_by_files) > 1
-    assert all(len(levels) > 1 for levels in levels_by_files)
-    total_levels = sum(len(levels) for levels in levels_by_files)
-    for n_levels_to_load in range(1, total_levels + 1):
-      env = envpool.make(
-        "Sokoban-v0",
-        env_type="gymnasium",
-        num_envs=1,
-        batch_size=1,
-        max_episode_steps=60,
-        min_episode_steps=60,
-        levels_dir=levels_dir,
-        load_sequentially=True,
-        n_levels_to_load=n_levels_to_load,
-        verbose=2,
-      )
-      dim_room = env.spec.config.dim_room
-      obs, _ = env.reset()
-      assert obs.shape == (
-        1,
-        3,
-        dim_room,
-        dim_room,
-      ), f"obs shape: {obs.shape}"
-      if n_levels_to_load == -1:
-        n_levels_to_load = total_levels
-      for _ in range(n_levels_to_load - 1):
-        env.reset()
-      out, _ = capfd.readouterr()
-      files_output = out.split("***")[1:]
-      for i, file_output in enumerate(files_output):
-        first_line, out = file_output.strip().split("\n", 1)
-        result = re.search(r'Loaded (\d+) levels from "(.*\.txt)"', first_line)
-        n_levels, file_name = int(result.group(1)), result.group(2)
-        lev1, lev2 = out.strip().split("\n\n")
-        assert file_name == levels_by_files[i][0]
-        assert n_levels == len(levels_by_files[i][1])
-        assert lev1 == levels_by_files[i][1][0]
-        assert lev2 == levels_by_files[i][1][1]
-
-  def test_xla(self) -> None:
-    num_envs = 10
+      assert not np.any(terminated | truncated)
+
+    _, _, terminated, truncated, _ = env.step(np.zeros([1], dtype=np.int32))
+    assert not np.any(terminated)
+    assert np.all(truncated)
+
+
+def test_envpool_load_sequentially(capfd) -> None:
+  levels_dir = "/app/envpool/sokoban/sample_levels"
+  files = glob.glob(f"{levels_dir}/*.txt")
+  levels_by_files = []
+  for file in files:
+    with open(file, "r") as f:
+      text = f.read()
+    levels = text.split("\n;")
+    levels = ["\n".join(level.split("\n")[1:]).strip() for level in levels]
+    levels_by_files.append((file, levels))
+  assert len(levels_by_files) > 1
+  assert all(len(levels) > 1 for levels in levels_by_files)
+  total_levels = sum(len(levels) for levels in levels_by_files)
+  for n_levels_to_load in range(1, total_levels + 1):
     env = envpool.make(
       "Sokoban-v0",
-      env_type="dm",
-      num_envs=num_envs,
-      batch_size=num_envs,
-      seed=2346890,
+      env_type="gymnasium",
+      num_envs=1,
+      batch_size=1,
       max_episode_steps=60,
-      reward_step=-0.1,
-      dim_room=10,
-      levels_dir="/app/envpool/sokoban/sample_levels",
+      min_episode_steps=60,
+      levels_dir=levels_dir,
+      load_sequentially=True,
+      n_levels_to_load=n_levels_to_load,
+      verbose=2,
     )
-    handle, recv, send, step = env.xla()
-
-
-def test_astar_log(self) -> None:
-  level_file_name = "/app/envpool/sokoban/sample_levels/001.txt"
+    dim_room = env.spec.config.dim_room
+    obs, _ = env.reset()
+    assert obs.shape == (
+      1,
+      3,
+      dim_room,
+      dim_room,
+    ), f"obs shape: {obs.shape}"
+    if n_levels_to_load == -1:
+      n_levels_to_load = total_levels
+    for _ in range(n_levels_to_load - 1):
+      env.reset()
+    out, _ = capfd.readouterr()
+    files_output = out.split("***")[1:]
+    for i, file_output in enumerate(files_output):
+      first_line, out = file_output.strip().split("\n", 1)
+      result = re.search(r'Loaded (\d+) levels from "(.*\.txt)"', first_line)
+      n_levels, file_name = int(result.group(1)), result.group(2)
+      lev1, lev2 = out.strip().split("\n\n")
+      assert file_name == levels_by_files[i][0]
+      assert n_levels == len(levels_by_files[i][1])
+      assert lev1 == levels_by_files[i][1][0]
+      assert lev2 == levels_by_files[i][1][1]
+
+
+def test_xla() -> None:
+  num_envs = 10
+  env = envpool.make(
+    "Sokoban-v0",
+    env_type="dm",
+    num_envs=num_envs,
+    batch_size=num_envs,
+    seed=2346890,
+    max_episode_steps=60,
+    reward_step=-0.1,
+    dim_room=10,
+    levels_dir="/app/envpool/sokoban/sample_levels",
+  )
+  handle, recv, send, step = env.xla()
+
+
+def test_astar_log() -> None:
+  level_file_name = "/app/envpool/sokoban/sample_levels/small.txt"
   with tempfile.NamedTemporaryFile() as f:
     log_file_name = f.name
     subprocess.run(
       [
         "bazel", "run", "//envpool/sokoban:astar_log", "--", level_file_name,
-        log_file_name, 1
+        log_file_name, "1"
       ],
       check=True,
     )
     with open(log_file_name, "r") as f:
       log = f.read()
-    assert "0, 301333002213130203303031, 24, 40611" == log.split("\n")[1]
+    assert "1, 222200001112330322210, 21, 1443" == log.split("\n")[1]
 
 
 if __name__ == "__main__":

From 58eae4ec5a5a96a171d2915fa126a7fe6019ca2b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= <adria@far.ai>
Date: Mon, 15 Apr 2024 11:39:07 -0700
Subject: [PATCH 38/60] Don't truncate episodes that finish at the last step.

---
 envpool/sokoban/sokoban_envpool.cc | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/envpool/sokoban/sokoban_envpool.cc b/envpool/sokoban/sokoban_envpool.cc
index 2d139b08..66c6032f 100644
--- a/envpool/sokoban/sokoban_envpool.cc
+++ b/envpool/sokoban/sokoban_envpool.cc
@@ -162,6 +162,10 @@ constexpr std::array<std::array<uint8_t, 3>, kPlayerOnTarget + 1> kTinyColors{{
 
 void SokobanEnv::WriteState(float reward) {
   auto state = Allocate();
+  if(unmatched_boxes == 0) {
+    // Never mark the episode as truncated if we're getting the big final reward.
+    state["trunc"_] = false;
+  }
   state["reward"_] = reward;
   Array& obs = state["obs"_];
   if (obs.size != 3 * world_.size()) {

From f83f79b3a7d649fd055a7a4fdaa1c57dfcaedb86 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= <adria@far.ai>
Date: Mon, 15 Apr 2024 11:59:13 -0700
Subject: [PATCH 39/60] The start of a test

---
 envpool/sokoban/sokoban_py_envpool_test.py | 27 ++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/envpool/sokoban/sokoban_py_envpool_test.py b/envpool/sokoban/sokoban_py_envpool_test.py
index 9eaf31d4..1972893d 100644
--- a/envpool/sokoban/sokoban_py_envpool_test.py
+++ b/envpool/sokoban/sokoban_py_envpool_test.py
@@ -171,6 +171,33 @@ def test_xla() -> None:
   handle, recv, send, step = env.xla()
 
 
+def test_truncation_unsolved_episodes_only():
+  """
+  Test that only episodes that do *not* get solved within the time limit get truncated. That is, a large 'solution'
+  reward and truncation should never co-occur.
+  """
+  max_episode_steps = 120
+  env = envpool.make(
+    "Sokoban-v0",
+    env_type="gymnasium",
+    num_envs=1,
+    batch_size=1,
+    min_episode_steps=max_episode_steps,
+    max_episode_steps=max_episode_steps,
+    levels_dir="/app/envpool/sokoban/sample_levels",
+    load_sequentially=True,
+  )
+  env.reset()  # Load level 0 and discard it
+  env.reset()  # Load level 1
+
+  solve_actions = "222200001112330322210"
+  for a in solve_actions[:-1]:
+    env.step(int(a))
+
+  obs, reward, term, trunc, infos = env.step(int(solve_actions[-1]))
+  assert reward == env.spec.reward_step + env.spec.reward_box + env.spec.reward_finished
+
+
 def test_astar_log() -> None:
   level_file_name = "/app/envpool/sokoban/sample_levels/small.txt"
   with tempfile.NamedTemporaryFile() as f:

From bc5ba8ee47af941d8c38e44f51634e706af80cb4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= <adria@far.ai>
Date: Mon, 15 Apr 2024 12:03:42 -0700
Subject: [PATCH 40/60] Fix variable name

---
 envpool/sokoban/sokoban_envpool.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/envpool/sokoban/sokoban_envpool.cc b/envpool/sokoban/sokoban_envpool.cc
index 66c6032f..86f066c6 100644
--- a/envpool/sokoban/sokoban_envpool.cc
+++ b/envpool/sokoban/sokoban_envpool.cc
@@ -162,7 +162,7 @@ constexpr std::array<std::array<uint8_t, 3>, kPlayerOnTarget + 1> kTinyColors{{
 
 void SokobanEnv::WriteState(float reward) {
   auto state = Allocate();
-  if(unmatched_boxes == 0) {
+  if(unmatched_boxes_ == 0) {
     // Never mark the episode as truncated if we're getting the big final reward.
     state["trunc"_] = false;
   }

From 8ce206a4390802403176b4bcc4ada06c316b12dd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= <adria@far.ai>
Date: Mon, 15 Apr 2024 16:12:15 -0700
Subject: [PATCH 41/60] Test that environment terminates/truncates correctly

---
 envpool/sokoban/sokoban_py_envpool_test.py | 91 ++++++++++++++++++----
 1 file changed, 77 insertions(+), 14 deletions(-)

diff --git a/envpool/sokoban/sokoban_py_envpool_test.py b/envpool/sokoban/sokoban_py_envpool_test.py
index 1972893d..76db0976 100644
--- a/envpool/sokoban/sokoban_py_envpool_test.py
+++ b/envpool/sokoban/sokoban_py_envpool_test.py
@@ -16,6 +16,7 @@
 import glob
 import re
 import subprocess
+import sys
 import tempfile
 import time
 
@@ -171,12 +172,55 @@ def test_xla() -> None:
   handle, recv, send, step = env.xla()
 
 
-def test_truncation_unsolved_episodes_only():
+
+SOLVE_LEVEL_ZERO: str = "222200001112330322210"
+TINY_COLORS: list[tuple[tuple[int, int, int], str]] = [
+  ((0, 0, 0), "#"),
+  ((243, 248, 238), " "),
+  ((254, 126, 125), "."),
+  ((254, 95, 56), "s"),
+  ((142, 121, 56), "$"),
+  ((160, 212, 56), "@"),
+  ((219, 212, 56), "a"),
+]
+
+
+def print_obs(obs: np.ndarray):
+  assert obs.shape == (3, 10, 10)
+  for y in range(obs.shape[1]):
+    for x in range(obs.shape[2]):
+      arr = obs[:, y, x]
+      printed_any = False
+      for color, symbol in TINY_COLORS:
+        assert arr.shape == (3,)
+        if np.array_equal(arr, color):
+          print(symbol, end="")
+          printed_any = True
+          break
+      assert printed_any, f"Could not find match for {arr}"
+    print("\n", end="")
+  print("\n", end="")
+
+
+action_astar_to_envpool = {
+  "0": 1,
+  "1": 4,
+  "2": 2,
+  "3": 3,
+}
+
+def make_1d_array(action: int | str) -> np.ndarray:
+  return np.array(int(action))[None]
+
+
+
+@pytest.mark.parametrize("solve_on_time", [True, False])
+def test_solved_level_does_not_truncate(solve_on_time: bool):
   """
-  Test that only episodes that do *not* get solved within the time limit get truncated. That is, a large 'solution'
-  reward and truncation should never co-occur.
+  Test that a level that gets solved just in time does not get truncated. But if it does not get solved just in time, it
+  gets truncated.
   """
-  max_episode_steps = 120
+  max_episode_steps = len(SOLVE_LEVEL_ZERO)
   env = envpool.make(
     "Sokoban-v0",
     env_type="gymnasium",
@@ -187,32 +231,51 @@ def test_truncation_unsolved_episodes_only():
     levels_dir="/app/envpool/sokoban/sample_levels",
     load_sequentially=True,
   )
-  env.reset()  # Load level 0 and discard it
-  env.reset()  # Load level 1
+  env.reset()  # Load level 0
+
+  for a in SOLVE_LEVEL_ZERO[:-1]:
+    obs, reward, term, trunc, infos = env.step(make_1d_array(action_astar_to_envpool[a]))
+    # print_obs(obs[0])
+    assert not term and not trunc, "Level should not have reached time limit yet"
+
+  NOOP = 0
+
+  if solve_on_time:
+    obs, reward, term, trunc, infos = env.step(make_1d_array(action_astar_to_envpool[SOLVE_LEVEL_ZERO[-1]]))
+    # print_obs(obs[0])
+    assert reward == env.spec.config.reward_step + env.spec.config.reward_box + env.spec.config.reward_finished, (
+      f"the level wasn't solved successfully. Level: {print_obs(obs[0])}"
+    )
+    assert term and not trunc, "Level should have finished within the time limit"
+
+  else:
+    obs, reward, term, trunc, infos = env.step(make_1d_array(NOOP))
+    assert not term and trunc, "Level should get truncated at precisely this step"
 
-  solve_actions = "222200001112330322210"
-  for a in solve_actions[:-1]:
-    env.step(int(a))
+  _, _, term, trunc, _ =env.step(make_1d_array(NOOP))
+  assert not term and not trunc, "Level should reset correctly"
 
-  obs, reward, term, trunc, infos = env.step(int(solve_actions[-1]))
-  assert reward == env.spec.reward_step + env.spec.reward_box + env.spec.reward_finished
 
 
 def test_astar_log() -> None:
   level_file_name = "/app/envpool/sokoban/sample_levels/small.txt"
   with tempfile.NamedTemporaryFile() as f:
     log_file_name = f.name
+    return
     subprocess.run(
       [
-        "bazel", "run", "//envpool/sokoban:astar_log", "--", level_file_name,
+        "/root/go/bin/bazel", "run", "//envpool/sokoban:astar_log", "--", level_file_name,
         log_file_name, "1"
       ],
       check=True,
+      cwd="/app",
+      env=dict(HOME="/root"),
     )
     with open(log_file_name, "r") as f:
       log = f.read()
-    assert "1, 222200001112330322210, 21, 1443" == log.split("\n")[1]
+    assert f"1, {SOLVE_LEVEL_ZERO}, 21, 1443" == log.split("\n")[1]
 
 
 if __name__ == "__main__":
-  pytest.main(["-v", __file__])
+  retcode = pytest.main(["-v", __file__])
+  sys.exit(retcode)

From 42254d261fe888ac7dd71c112aacd5b82f4b8843 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= <adria@far.ai>
Date: Mon, 15 Apr 2024 16:26:37 -0700
Subject: [PATCH 42/60] Make sure the tests pass in CI

---
 envpool/sokoban/sokoban_envpool.cc         |  5 ++--
 envpool/sokoban/sokoban_py_envpool_test.py | 35 ++++++++++++----------
 2 files changed, 22 insertions(+), 18 deletions(-)

diff --git a/envpool/sokoban/sokoban_envpool.cc b/envpool/sokoban/sokoban_envpool.cc
index 86f066c6..b644548c 100644
--- a/envpool/sokoban/sokoban_envpool.cc
+++ b/envpool/sokoban/sokoban_envpool.cc
@@ -162,8 +162,9 @@ constexpr std::array<std::array<uint8_t, 3>, kPlayerOnTarget + 1> kTinyColors{{
 
 void SokobanEnv::WriteState(float reward) {
   auto state = Allocate();
-  if(unmatched_boxes_ == 0) {
-    // Never mark the episode as truncated if we're getting the big final reward.
+  if (unmatched_boxes_ == 0) {
+    // Never mark the episode as truncated if we're getting the big final
+    // reward.
     state["trunc"_] = false;
   }
   state["reward"_] = reward;
diff --git a/envpool/sokoban/sokoban_py_envpool_test.py b/envpool/sokoban/sokoban_py_envpool_test.py
index 76db0976..574bd792 100644
--- a/envpool/sokoban/sokoban_py_envpool_test.py
+++ b/envpool/sokoban/sokoban_py_envpool_test.py
@@ -172,7 +172,6 @@ def test_xla() -> None:
   handle, recv, send, step = env.xla()
 
 
-
 SOLVE_LEVEL_ZERO: str = "222200001112330322210"
 TINY_COLORS: list[tuple[tuple[int, int, int], str]] = [
   ((0, 0, 0), "#"),
@@ -209,16 +208,16 @@ def print_obs(obs: np.ndarray):
   "3": 3,
 }
 
+
 def make_1d_array(action: int | str) -> np.ndarray:
   return np.array(int(action))[None]
 
 
-
 @pytest.mark.parametrize("solve_on_time", [True, False])
 def test_solved_level_does_not_truncate(solve_on_time: bool):
   """
-  Test that a level that gets solved just in time does not get truncated. But if it does not get solved just in time, it
-  gets truncated.
+  Test that a level that gets solved just in time does not get truncated. But if
+  it does not get solved just in time, it gets truncated.
   """
   max_episode_steps = len(SOLVE_LEVEL_ZERO)
   env = envpool.make(
@@ -234,29 +233,33 @@ def test_solved_level_does_not_truncate(solve_on_time: bool):
   env.reset()  # Load level 0
 
   for a in SOLVE_LEVEL_ZERO[:-1]:
-    obs, reward, term, trunc, infos = env.step(make_1d_array(action_astar_to_envpool[a]))
+    obs, reward, term, trunc, infos = env.step(
+      make_1d_array(action_astar_to_envpool[a])
+    )
     # print_obs(obs[0])
-    assert not term and not trunc, "Level should not have reached time limit yet"
+    assert not term and not trunc, "Level should not have reached time limit"
 
   NOOP = 0
 
   if solve_on_time:
-    obs, reward, term, trunc, infos = env.step(make_1d_array(action_astar_to_envpool[SOLVE_LEVEL_ZERO[-1]]))
-    # print_obs(obs[0])
-    assert reward == env.spec.config.reward_step + env.spec.config.reward_box + env.spec.config.reward_finished, (
-      f"the level wasn't solved successfully. Level: {print_obs(obs[0])}"
+    obs, reward, term, trunc, infos = env.step(
+      make_1d_array(action_astar_to_envpool[SOLVE_LEVEL_ZERO[-1]])
     )
-    assert term and not trunc, "Level should have finished within the time limit"
+    # print_obs(obs[0])
+    assert reward == (
+      env.spec.config.reward_step + env.spec.config.reward_box +
+      env.spec.config.reward_finished
+    ), (f"the level wasn't solved successfully. Level: {print_obs(obs[0])}")
+    assert term and not trunc, "Level should finish within the time limit"
 
   else:
     obs, reward, term, trunc, infos = env.step(make_1d_array(NOOP))
-    assert not term and trunc, "Level should get truncated at precisely this step"
+    assert not term and trunc, "Level should truncate at precisely this step"
 
-  _, _, term, trunc, _ =env.step(make_1d_array(NOOP))
+  _, _, term, trunc, _ = env.step(make_1d_array(NOOP))
   assert not term and not trunc, "Level should reset correctly"
 
 
-
 def test_astar_log() -> None:
   level_file_name = "/app/envpool/sokoban/sample_levels/small.txt"
   with tempfile.NamedTemporaryFile() as f:
@@ -264,8 +267,8 @@ def test_astar_log() -> None:
     return
     subprocess.run(
       [
-        "/root/go/bin/bazel", "run", "//envpool/sokoban:astar_log", "--", level_file_name,
-        log_file_name, "1"
+        "/root/go/bin/bazel", "run", "//envpool/sokoban:astar_log", "--",
+        level_file_name, log_file_name, "1"
       ],
       check=True,
       cwd="/app",

From e4876f87c9939eaf3efd0b8b577e5833394771eb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= <adria@far.ai>
Date: Mon, 15 Apr 2024 16:31:41 -0700
Subject: [PATCH 43/60] Prevent JVM from running out of memory

---
 .circleci/config.yml | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 2acb62b2..2fbcb01f 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -71,10 +71,11 @@ jobs:
         auth:
           username: "$GHCR_DOCKER_USER"
           password: "$GHCR_DOCKER_TOKEN"
-    resource_class: medium
+    resource_class: large
     working_directory: /app
     steps:
       - checkout
       - run:
           name: Run tests
-          command: make bazel-test
+          command: |
+            BAZEL_OPT=--host_jvm_args=-Xmx3g make bazel-test

From 1621dafd46e6cee11bcbb871dc4b3b4a595302c5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= <adria@far.ai>
Date: Mon, 15 Apr 2024 16:38:56 -0700
Subject: [PATCH 44/60] Load other levels first

---
 envpool/sokoban/sokoban_py_envpool_test.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/envpool/sokoban/sokoban_py_envpool_test.py b/envpool/sokoban/sokoban_py_envpool_test.py
index 574bd792..ca49b368 100644
--- a/envpool/sokoban/sokoban_py_envpool_test.py
+++ b/envpool/sokoban/sokoban_py_envpool_test.py
@@ -230,6 +230,9 @@ def test_solved_level_does_not_truncate(solve_on_time: bool):
     levels_dir="/app/envpool/sokoban/sample_levels",
     load_sequentially=True,
   )
+  # Skip levels in 000.txt and 001.txt
+  for _ in range(3 + 3):
+    env.reset()
   env.reset()  # Load level 0
 
   for a in SOLVE_LEVEL_ZERO[:-1]:

From 0e9785ced3342923da1dee9366071de05b372dde Mon Sep 17 00:00:00 2001
From: taufeeque9 <9taufeeque9@gmail.com>
Date: Thu, 25 Apr 2024 00:32:35 +0530
Subject: [PATCH 45/60] reduce action space to 4

---
 envpool/sokoban/sokoban_envpool.cc         |  7 +------
 envpool/sokoban/sokoban_envpool.h          | 15 +++++----------
 envpool/sokoban/sokoban_py_envpool_test.py |  4 +++-
 3 files changed, 9 insertions(+), 17 deletions(-)

diff --git a/envpool/sokoban/sokoban_envpool.cc b/envpool/sokoban/sokoban_envpool.cc
index 2d139b08..93f14176 100644
--- a/envpool/sokoban/sokoban_envpool.cc
+++ b/envpool/sokoban/sokoban_envpool.cc
@@ -75,13 +75,8 @@ void SokobanEnv::Step(const Action& action_dict) {
   current_step_++;
 
   const int action = action_dict["action"_];
-  if (action == kActNoop) {
-    WriteState(static_cast<float>(reward_step_));
-    return;
-  }
-  // From here on, assume the agent will try to move
 
-  const int change_coordinates_idx = (action - 1) % kChangeCoordinates.size();
+  const int change_coordinates_idx = (action) % kChangeCoordinates.size();
   const int delta_x = kChangeCoordinates.at(change_coordinates_idx).at(0);
   const int delta_y = kChangeCoordinates.at(change_coordinates_idx).at(1);
 
diff --git a/envpool/sokoban/sokoban_envpool.h b/envpool/sokoban/sokoban_envpool.h
index 77bee609..a76951ef 100644
--- a/envpool/sokoban/sokoban_envpool.h
+++ b/envpool/sokoban/sokoban_envpool.h
@@ -29,16 +29,11 @@
 
 namespace sokoban {
 
-constexpr int kActNoop = 0;
-constexpr int kActPushUp = 1;
-constexpr int kActPushDown = 2;
-constexpr int kActPushLeft = 3;
-constexpr int kActPushRight = 4;
-constexpr int kActMoveUp = 5;
-constexpr int kActMoveDown = 6;
-constexpr int kActMoveLeft = 7;
-constexpr int kActMoveRight = 8;
-constexpr int kMaxAction = kActMoveRight;
+constexpr int kActPushUp = 0;
+constexpr int kActPushDown = 1;
+constexpr int kActPushLeft = 2;
+constexpr int kActPushRight = 3;
+constexpr int kMaxAction = kActPushRight;
 
 class SokobanEnvFns {
  public:
diff --git a/envpool/sokoban/sokoban_py_envpool_test.py b/envpool/sokoban/sokoban_py_envpool_test.py
index 9eaf31d4..2538e7b4 100644
--- a/envpool/sokoban/sokoban_py_envpool_test.py
+++ b/envpool/sokoban/sokoban_py_envpool_test.py
@@ -76,8 +76,10 @@ def test_envpool() -> None:
 
   _ = env.reset()
   t = time.time()
+
+  assert env.action_space.n == 4
   for _ in range(total_steps):
-    _ = env.step(np.random.randint(low=0, high=9, size=(num_envs,)))
+    _ = env.step(np.random.randint(low=0, high=4, size=(num_envs,)))
   duration = time.time() - t
   fps = total_steps * batch / duration
   print(f"FPS = {fps:.6f}")

From 1d6e81b608e4648b7ccf82de805a7fd6c77fa1c5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= <adria@far.ai>
Date: Wed, 24 Apr 2024 22:33:27 -0700
Subject: [PATCH 46/60] Fix review

---
 envpool/sokoban/sokoban_py_envpool_test.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/envpool/sokoban/sokoban_py_envpool_test.py b/envpool/sokoban/sokoban_py_envpool_test.py
index ca49b368..e7c6d7ab 100644
--- a/envpool/sokoban/sokoban_py_envpool_test.py
+++ b/envpool/sokoban/sokoban_py_envpool_test.py
@@ -239,7 +239,6 @@ def test_solved_level_does_not_truncate(solve_on_time: bool):
     obs, reward, term, trunc, infos = env.step(
       make_1d_array(action_astar_to_envpool[a])
     )
-    # print_obs(obs[0])
     assert not term and not trunc, "Level should not have reached time limit"
 
   NOOP = 0
@@ -248,7 +247,6 @@ def test_solved_level_does_not_truncate(solve_on_time: bool):
     obs, reward, term, trunc, infos = env.step(
       make_1d_array(action_astar_to_envpool[SOLVE_LEVEL_ZERO[-1]])
     )
-    # print_obs(obs[0])
     assert reward == (
       env.spec.config.reward_step + env.spec.config.reward_box +
       env.spec.config.reward_finished
@@ -267,7 +265,6 @@ def test_astar_log() -> None:
   level_file_name = "/app/envpool/sokoban/sample_levels/small.txt"
   with tempfile.NamedTemporaryFile() as f:
     log_file_name = f.name
-    return
     subprocess.run(
       [
         "/root/go/bin/bazel", "run", "//envpool/sokoban:astar_log", "--",

From d467459732772cf7b07d8478717aad0a5990d24b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= <adria@far.ai>
Date: Wed, 24 Apr 2024 22:57:17 -0700
Subject: [PATCH 47/60] Fix incorrect truncation

---
 envpool/sokoban/sokoban_envpool.cc | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/envpool/sokoban/sokoban_envpool.cc b/envpool/sokoban/sokoban_envpool.cc
index b644548c..b897e0fc 100644
--- a/envpool/sokoban/sokoban_envpool.cc
+++ b/envpool/sokoban/sokoban_envpool.cc
@@ -166,7 +166,12 @@ void SokobanEnv::WriteState(float reward) {
     // Never mark the episode as truncated if we're getting the big final
     // reward.
     state["trunc"_] = false;
+  } else if (IsDone()) {
+    // But if there are unmatched boxes and the current step is the last
+    // one we will get, truncate the episode.
+    state["trunc"_] = true;
   }
+
   state["reward"_] = reward;
   Array& obs = state["obs"_];
   if (obs.size != 3 * world_.size()) {

From 534a0860b9422624b024401c5abc520a08879c13 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= <adria@far.ai>
Date: Wed, 24 Apr 2024 22:57:38 -0700
Subject: [PATCH 48/60] Explicitly skip astar_log test

---
 envpool/sokoban/sokoban_py_envpool_test.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/envpool/sokoban/sokoban_py_envpool_test.py b/envpool/sokoban/sokoban_py_envpool_test.py
index e7c6d7ab..43670c1b 100644
--- a/envpool/sokoban/sokoban_py_envpool_test.py
+++ b/envpool/sokoban/sokoban_py_envpool_test.py
@@ -261,6 +261,7 @@ def test_solved_level_does_not_truncate(solve_on_time: bool):
   assert not term and not trunc, "Level should reset correctly"
 
 
+@pytest.mark.skip
 def test_astar_log() -> None:
   level_file_name = "/app/envpool/sokoban/sample_levels/small.txt"
   with tempfile.NamedTemporaryFile() as f:

From b56251cef1ab8676ce18eb84868b22cf4a1ba37f Mon Sep 17 00:00:00 2001
From: Mohammad Taufeeque <9taufeeque9@gmail.com>
Date: Thu, 25 Apr 2024 13:04:49 +0530
Subject: [PATCH 49/60] remove modulo on action
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Adrià Garriga-Alonso <adria@far.ai>
---
 envpool/sokoban/sokoban_envpool.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/envpool/sokoban/sokoban_envpool.cc b/envpool/sokoban/sokoban_envpool.cc
index 93f14176..9ad83980 100644
--- a/envpool/sokoban/sokoban_envpool.cc
+++ b/envpool/sokoban/sokoban_envpool.cc
@@ -76,7 +76,7 @@ void SokobanEnv::Step(const Action& action_dict) {
 
   const int action = action_dict["action"_];
 
-  const int change_coordinates_idx = (action) % kChangeCoordinates.size();
+  const int change_coordinates_idx = action;
   const int delta_x = kChangeCoordinates.at(change_coordinates_idx).at(0);
   const int delta_y = kChangeCoordinates.at(change_coordinates_idx).at(1);
 

From 2c18ac4a5bf8cc7259ee924d307605a3bdc4aa09 Mon Sep 17 00:00:00 2001
From: taufeeque9 <9taufeeque9@gmail.com>
Date: Thu, 25 Apr 2024 13:22:46 +0530
Subject: [PATCH 50/60] remove test skipping

---
 envpool/sokoban/sokoban_py_envpool_test.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/envpool/sokoban/sokoban_py_envpool_test.py b/envpool/sokoban/sokoban_py_envpool_test.py
index 35e6e97a..f6fba988 100644
--- a/envpool/sokoban/sokoban_py_envpool_test.py
+++ b/envpool/sokoban/sokoban_py_envpool_test.py
@@ -263,7 +263,6 @@ def test_solved_level_does_not_truncate(solve_on_time: bool):
   assert not term and not trunc, "Level should reset correctly"
 
 
-@pytest.mark.skip
 def test_astar_log() -> None:
   level_file_name = "/app/envpool/sokoban/sample_levels/small.txt"
   with tempfile.NamedTemporaryFile() as f:

From d8ccb0b373fa00208435c1152f34970244ca0316 Mon Sep 17 00:00:00 2001
From: taufeeque9 <9taufeeque9@gmail.com>
Date: Thu, 25 Apr 2024 13:30:08 +0530
Subject: [PATCH 51/60] fix action mapping error in test

---
 envpool/sokoban/sokoban_py_envpool_test.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/envpool/sokoban/sokoban_py_envpool_test.py b/envpool/sokoban/sokoban_py_envpool_test.py
index f6fba988..965db9e7 100644
--- a/envpool/sokoban/sokoban_py_envpool_test.py
+++ b/envpool/sokoban/sokoban_py_envpool_test.py
@@ -204,10 +204,10 @@ def print_obs(obs: np.ndarray):
 
 
 action_astar_to_envpool = {
-  "0": 1,
-  "1": 4,
-  "2": 2,
-  "3": 3,
+  "0": 0,
+  "1": 3,
+  "2": 1,
+  "3": 2,
 }
 
 

From 129f0eda20276e114595a544df6ac1ed15f9e43c Mon Sep 17 00:00:00 2001
From: taufeeque9 <9taufeeque9@gmail.com>
Date: Thu, 25 Apr 2024 14:57:11 +0530
Subject: [PATCH 52/60] sort files while loading levels and fix test_astar_log

---
 envpool/sokoban/level_loader.cc            |  5 ++++
 envpool/sokoban/sokoban_py_envpool_test.py | 35 ++++++++++------------
 2 files changed, 21 insertions(+), 19 deletions(-)

diff --git a/envpool/sokoban/level_loader.cc b/envpool/sokoban/level_loader.cc
index 0689cfa2..56c6fdba 100644
--- a/envpool/sokoban/level_loader.cc
+++ b/envpool/sokoban/level_loader.cc
@@ -42,6 +42,11 @@ LevelLoader::LevelLoader(const std::filesystem::path& base_path,
         level_file_paths_.push_back(entry.path());
       }
     }
+    std::sort(
+        level_file_paths_.begin(), level_file_paths_.end(),
+        [](const std::filesystem::path& a, const std::filesystem::path& b) {
+          return a.filename().string() < b.filename().string();
+        });
   }
   cur_file_ = level_file_paths_.begin();
 }
diff --git a/envpool/sokoban/sokoban_py_envpool_test.py b/envpool/sokoban/sokoban_py_envpool_test.py
index 965db9e7..d3eb8ad1 100644
--- a/envpool/sokoban/sokoban_py_envpool_test.py
+++ b/envpool/sokoban/sokoban_py_envpool_test.py
@@ -111,7 +111,7 @@ def test_envpool_load_sequentially(capfd) -> None:
   levels_dir = "/app/envpool/sokoban/sample_levels"
   files = glob.glob(f"{levels_dir}/*.txt")
   levels_by_files = []
-  for file in files:
+  for file in sorted(files):
     with open(file, "r") as f:
       text = f.read()
     levels = text.split("\n;")
@@ -243,7 +243,7 @@ def test_solved_level_does_not_truncate(solve_on_time: bool):
     )
     assert not term and not trunc, "Level should not have reached time limit"
 
-  NOOP = 0
+  wrong_action = str((int(SOLVE_LEVEL_ZERO[-1]) + 1) % 4)
 
   if solve_on_time:
     obs, reward, term, trunc, infos = env.step(
@@ -256,29 +256,26 @@ def test_solved_level_does_not_truncate(solve_on_time: bool):
     assert term and not trunc, "Level should finish within the time limit"
 
   else:
-    obs, reward, term, trunc, infos = env.step(make_1d_array(NOOP))
+    obs, reward, term, trunc, infos = env.step(make_1d_array(wrong_action))
     assert not term and trunc, "Level should truncate at precisely this step"
 
-  _, _, term, trunc, _ = env.step(make_1d_array(NOOP))
+  _, _, term, trunc, _ = env.step(make_1d_array(wrong_action))
   assert not term and not trunc, "Level should reset correctly"
 
 
-def test_astar_log() -> None:
+def test_astar_log(tmp_path) -> None:
   level_file_name = "/app/envpool/sokoban/sample_levels/small.txt"
-  with tempfile.NamedTemporaryFile() as f:
-    log_file_name = f.name
-    subprocess.run(
-      [
-        "/root/go/bin/bazel", "run", "//envpool/sokoban:astar_log", "--",
-        level_file_name, log_file_name, "1"
-      ],
-      check=True,
-      cwd="/app",
-      env=dict(HOME="/root"),
-    )
-    with open(log_file_name, "r") as f:
-      log = f.read()
-    assert f"1, {SOLVE_LEVEL_ZERO}, 21, 1443" == log.split("\n")[1]
+  log_file_name = tmp_path / "log_file.csv"
+  subprocess.run(
+    [
+      "/root/go/bin/bazel", "run", "//envpool/sokoban:astar_log", "--",
+      level_file_name, str(log_file_name), "1"
+    ],
+    check=True,
+    cwd="/app",
+  )
+  log = log_file_name.read_text()
+  assert f"0,{SOLVE_LEVEL_ZERO},21,1380" == log.split("\n")[1]
 
 
 if __name__ == "__main__":

From a74ce4f8e9739b5a8a9c87d228a739822d92a2b4 Mon Sep 17 00:00:00 2001
From: taufeeque9 <9taufeeque9@gmail.com>
Date: Thu, 25 Apr 2024 19:47:26 +0530
Subject: [PATCH 53/60] fix test and lint

---
 envpool/sokoban/sokoban_py_envpool_test.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/envpool/sokoban/sokoban_py_envpool_test.py b/envpool/sokoban/sokoban_py_envpool_test.py
index d3eb8ad1..6b80b821 100644
--- a/envpool/sokoban/sokoban_py_envpool_test.py
+++ b/envpool/sokoban/sokoban_py_envpool_test.py
@@ -17,7 +17,6 @@
 import re
 import subprocess
 import sys
-import tempfile
 import time
 
 import numpy as np
@@ -268,11 +267,12 @@ def test_astar_log(tmp_path) -> None:
   log_file_name = tmp_path / "log_file.csv"
   subprocess.run(
     [
-      "/root/go/bin/bazel", "run", "//envpool/sokoban:astar_log", "--",
+      "/root/go/bin/bazel", f"--output_base={str(tmp_path)}", "run", "//envpool/sokoban:astar_log", "--",
       level_file_name, str(log_file_name), "1"
     ],
     check=True,
-    cwd="/app",
+    cwd="/app/envpool",
+    env={"HOME": "/root", "PATH": "/opt/conda/bin:/usr/bin"},
   )
   log = log_file_name.read_text()
   assert f"0,{SOLVE_LEVEL_ZERO},21,1380" == log.split("\n")[1]

From 3c2cf1392523f99b5cb5af6f7df96996569b4fa1 Mon Sep 17 00:00:00 2001
From: taufeeque9 <9taufeeque9@gmail.com>
Date: Thu, 25 Apr 2024 19:52:16 +0530
Subject: [PATCH 54/60] fix lint

---
 envpool/sokoban/sokoban_py_envpool_test.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/envpool/sokoban/sokoban_py_envpool_test.py b/envpool/sokoban/sokoban_py_envpool_test.py
index 6b80b821..d0a8d1d6 100644
--- a/envpool/sokoban/sokoban_py_envpool_test.py
+++ b/envpool/sokoban/sokoban_py_envpool_test.py
@@ -267,7 +267,8 @@ def test_astar_log(tmp_path) -> None:
   log_file_name = tmp_path / "log_file.csv"
   subprocess.run(
     [
-      "/root/go/bin/bazel", f"--output_base={str(tmp_path)}", "run", "//envpool/sokoban:astar_log", "--",
+      "/root/go/bin/bazel", f"--output_base={str(tmp_path)}", "run",
+      "//envpool/sokoban:astar_log", "--",
       level_file_name, str(log_file_name), "1"
     ],
     check=True,

From 9c6a5ccc1cef031686bc7245cfde4550f060e342 Mon Sep 17 00:00:00 2001
From: taufeeque9 <9taufeeque9@gmail.com>
Date: Thu, 25 Apr 2024 19:58:18 +0530
Subject: [PATCH 55/60] fix lint

---
 envpool/sokoban/sokoban_py_envpool_test.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/envpool/sokoban/sokoban_py_envpool_test.py b/envpool/sokoban/sokoban_py_envpool_test.py
index d0a8d1d6..198ff34a 100644
--- a/envpool/sokoban/sokoban_py_envpool_test.py
+++ b/envpool/sokoban/sokoban_py_envpool_test.py
@@ -268,12 +268,15 @@ def test_astar_log(tmp_path) -> None:
   subprocess.run(
     [
       "/root/go/bin/bazel", f"--output_base={str(tmp_path)}", "run",
-      "//envpool/sokoban:astar_log", "--",
-      level_file_name, str(log_file_name), "1"
+      "//envpool/sokoban:astar_log", "--", level_file_name,
+      str(log_file_name), "1"
     ],
     check=True,
     cwd="/app/envpool",
-    env={"HOME": "/root", "PATH": "/opt/conda/bin:/usr/bin"},
+    env={
+      "HOME": "/root",
+      "PATH": "/opt/conda/bin:/usr/bin"
+    },
   )
   log = log_file_name.read_text()
   assert f"0,{SOLVE_LEVEL_ZERO},21,1380" == log.split("\n")[1]

From 1abbeb821b1ce4426f3ee2a552f094a2df0cff4e Mon Sep 17 00:00:00 2001
From: taufeeque9 <9taufeeque9@gmail.com>
Date: Tue, 28 May 2024 04:55:55 +0530
Subject: [PATCH 56/60] fix delayed reset bug

---
 envpool/sokoban/sokoban_envpool.cc | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/envpool/sokoban/sokoban_envpool.cc b/envpool/sokoban/sokoban_envpool.cc
index c4e6affc..c7e6219b 100644
--- a/envpool/sokoban/sokoban_envpool.cc
+++ b/envpool/sokoban/sokoban_envpool.cc
@@ -142,7 +142,11 @@ void SokobanEnv::Step(const Action& action_dict) {
                         reward_box_ * static_cast<double>(prev_unmatched_boxes -
                                                           unmatched_boxes_) +
                         ((unmatched_boxes_ == 0) ? reward_finished_ : 0.0f);
-  WriteState(static_cast<float>(reward));
+  if (IsDone()) {
+    Reset();
+  } else {
+    WriteState(static_cast<float>(reward));
+  }
 }
 
 constexpr std::array<std::array<uint8_t, 3>, kPlayerOnTarget + 1> kTinyColors{{

From 268c93dfdda32fedb9dc671a84617c4de1985577 Mon Sep 17 00:00:00 2001
From: taufeeque9 <9taufeeque9@gmail.com>
Date: Tue, 28 May 2024 05:49:18 +0530
Subject: [PATCH 57/60] update the reset function

---
 envpool/sokoban/sokoban_envpool.cc | 17 +++++++++++------
 envpool/sokoban/sokoban_envpool.h  |  1 +
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/envpool/sokoban/sokoban_envpool.cc b/envpool/sokoban/sokoban_envpool.cc
index c7e6219b..5e5db8c6 100644
--- a/envpool/sokoban/sokoban_envpool.cc
+++ b/envpool/sokoban/sokoban_envpool.cc
@@ -24,7 +24,7 @@
 
 namespace sokoban {
 
-void SokobanEnv::Reset() {
+void SokobanEnv::ResetWithoutWrite() {
   const int max_episode_steps = spec_.config["max_episode_steps"_];
   const int min_episode_steps = spec_.config["min_episode_steps"_];
   current_max_episode_steps_ =
@@ -52,6 +52,10 @@ void SokobanEnv::Reset() {
     }
   }
   current_step_ = 0;
+}
+
+void SokobanEnv::Reset() {
+  ResetWithoutWrite();
   WriteState(0.0f);
 }
 
@@ -142,11 +146,8 @@ void SokobanEnv::Step(const Action& action_dict) {
                         reward_box_ * static_cast<double>(prev_unmatched_boxes -
                                                           unmatched_boxes_) +
                         ((unmatched_boxes_ == 0) ? reward_finished_ : 0.0f);
-  if (IsDone()) {
-    Reset();
-  } else {
-    WriteState(static_cast<float>(reward));
-  }
+
+  WriteState(static_cast<float>(reward));
 }
 
 constexpr std::array<std::array<uint8_t, 3>, kPlayerOnTarget + 1> kTinyColors{{
@@ -181,6 +182,10 @@ void SokobanEnv::WriteState(float reward) {
     throw std::runtime_error(msg.str());
   }
 
+  if (IsDone()) {
+    ResetWithoutWrite();
+  }
+
   std::vector<uint8_t> out(3 * world_.size());
   for (int rgb = 0; rgb < 3; rgb++) {
     for (size_t i = 0; i < world_.size(); i++) {
diff --git a/envpool/sokoban/sokoban_envpool.h b/envpool/sokoban/sokoban_envpool.h
index a76951ef..f0138b20 100644
--- a/envpool/sokoban/sokoban_envpool.h
+++ b/envpool/sokoban/sokoban_envpool.h
@@ -115,6 +115,7 @@ class SokobanEnv : public Env<SokobanEnvSpec> {
 
   [[nodiscard]] uint8_t WorldAt(int x, int y) const;
   void WorldAssignAt(int x, int y, uint8_t value);
+  void ResetWithoutWrite();
 };
 
 using SokobanEnvPool = AsyncEnvPool<SokobanEnv>;

From 4098670966fa1e2b886943434c3ad119166102bb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= <adria@far.ai>
Date: Mon, 27 May 2024 22:38:52 -0400
Subject: [PATCH 58/60] Explain why ResetWithoutWrite with comment

---
 envpool/sokoban/sokoban_envpool.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/envpool/sokoban/sokoban_envpool.cc b/envpool/sokoban/sokoban_envpool.cc
index 5e5db8c6..94e874b6 100644
--- a/envpool/sokoban/sokoban_envpool.cc
+++ b/envpool/sokoban/sokoban_envpool.cc
@@ -183,6 +183,7 @@ void SokobanEnv::WriteState(float reward) {
   }
 
   if (IsDone()) {
+    // If this episode truncates or terminates, the observation should be the one for the next episode.
     ResetWithoutWrite();
   }
 

From 6b1b577d883ec50acebda31a5166906f3b449f81 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= <adria@far.ai>
Date: Mon, 27 May 2024 23:45:06 -0400
Subject: [PATCH 59/60] Cap line to 80

---
 envpool/sokoban/sokoban_envpool.cc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/envpool/sokoban/sokoban_envpool.cc b/envpool/sokoban/sokoban_envpool.cc
index 94e874b6..8bbbea2c 100644
--- a/envpool/sokoban/sokoban_envpool.cc
+++ b/envpool/sokoban/sokoban_envpool.cc
@@ -183,7 +183,8 @@ void SokobanEnv::WriteState(float reward) {
   }
 
   if (IsDone()) {
-    // If this episode truncates or terminates, the observation should be the one for the next episode.
+    // If this episode truncates or terminates, the observation should be the
+    // one for the next episode.
     ResetWithoutWrite();
   }
 

From c25428b1bed59b5617834141c6c75819431f16ec Mon Sep 17 00:00:00 2001
From: taufeeque9 <9taufeeque9@gmail.com>
Date: Fri, 28 Jun 2024 03:34:24 +0530
Subject: [PATCH 60/60] add fix in level loader and test

---
 envpool/sokoban/level_loader.cc            | 19 +++++---
 envpool/sokoban/level_loader.h             |  8 ++--
 envpool/sokoban/sokoban_envpool.h          |  1 +
 envpool/sokoban/sokoban_py_envpool_test.py | 55 +++++++++++++++++++++-
 4 files changed, 72 insertions(+), 11 deletions(-)

diff --git a/envpool/sokoban/level_loader.cc b/envpool/sokoban/level_loader.cc
index 56c6fdba..faed8197 100644
--- a/envpool/sokoban/level_loader.cc
+++ b/envpool/sokoban/level_loader.cc
@@ -29,10 +29,11 @@ namespace sokoban {
 
 LevelLoader::LevelLoader(const std::filesystem::path& base_path,
                          bool load_sequentially, int n_levels_to_load,
-                         int verbose)
+                         int env_id, int num_envs, int verbose)
     : load_sequentially_(load_sequentially),
       n_levels_to_load_(n_levels_to_load),
-      cur_level_(levels_.begin()),
+      num_envs_(num_envs),
+      cur_level_(env_id),
       verbose(verbose) {
   if (std::filesystem::is_regular_file(base_path)) {
     level_file_paths_.push_back(base_path);
@@ -49,6 +50,10 @@ LevelLoader::LevelLoader(const std::filesystem::path& base_path,
         });
   }
   cur_file_ = level_file_paths_.begin();
+  if (n_levels_to_load_ > 0 && n_levels_to_load_ % num_envs_ != 0) {
+    throw std::runtime_error(
+        "n_levels_to_load must be a multiple of num_envs.");
+  }
 }
 
 static const std::array<char, kMaxLevelObject + 1> kPrintLevelKey{
@@ -183,15 +188,15 @@ std::vector<SokobanLevel>::iterator LevelLoader::GetLevel(std::mt19937& gen) {
   if (n_levels_to_load_ > 0 && levels_loaded_ >= n_levels_to_load_) {
     throw std::runtime_error("Loaded all requested levels.");
   }
-  if (cur_level_ == levels_.end()) {
+  while (cur_level_ >= levels_.size()) {
+    cur_level_ -= levels_.size();
     LoadFile(gen);
-    cur_level_ = levels_.begin();
-    if (cur_level_ == levels_.end()) {
+    if (levels_.size() == 0) {
       throw std::runtime_error("No levels loaded.");
     }
   }
-  auto out = cur_level_;
-  cur_level_++;
+  auto out = levels_.begin() + cur_level_;
+  cur_level_ += num_envs_;
   levels_loaded_++;
   return out;
 }
diff --git a/envpool/sokoban/level_loader.h b/envpool/sokoban/level_loader.h
index ced5e60a..d8a07c16 100644
--- a/envpool/sokoban/level_loader.h
+++ b/envpool/sokoban/level_loader.h
@@ -39,8 +39,10 @@ class LevelLoader {
   bool load_sequentially_;
   int n_levels_to_load_;
   int levels_loaded_{0};
+  int env_id_{0};
+  int num_envs_{1};
   std::vector<SokobanLevel> levels_{0};
-  std::vector<SokobanLevel>::iterator cur_level_;
+  int cur_level_;
   std::vector<std::filesystem::path> level_file_paths_{0};
   std::vector<std::filesystem::path>::iterator cur_file_;
   void LoadFile(std::mt19937& gen);
@@ -50,8 +52,8 @@ class LevelLoader {
 
   std::vector<SokobanLevel>::iterator GetLevel(std::mt19937& gen);
   explicit LevelLoader(const std::filesystem::path& base_path,
-                       bool load_sequentially, int n_levels_to_load,
-                       int verbose = 0);
+                       bool load_sequentially, int n_levels_to_load, int env_id,
+                       int num_envs, int verbose = 0);
 };
 
 void PrintLevel(std::ostream& os, const SokobanLevel& vec);
diff --git a/envpool/sokoban/sokoban_envpool.h b/envpool/sokoban/sokoban_envpool.h
index f0138b20..d2cd597d 100644
--- a/envpool/sokoban/sokoban_envpool.h
+++ b/envpool/sokoban/sokoban_envpool.h
@@ -70,6 +70,7 @@ class SokobanEnv : public Env<SokobanEnvSpec> {
         levels_dir_{static_cast<std::string>(spec.config["levels_dir"_])},
         level_loader_(levels_dir_, spec.config["load_sequentially"_],
                       static_cast<int>(spec.config["n_levels_to_load"_]),
+                      env_id, static_cast<int>(spec.config["num_envs"_]),
                       static_cast<int>(spec.config["verbose"_])),
         world_(kWall, static_cast<std::size_t>(dim_room_ * dim_room_)),
         verbose_(static_cast<int>(spec.config["verbose"_])),
diff --git a/envpool/sokoban/sokoban_py_envpool_test.py b/envpool/sokoban/sokoban_py_envpool_test.py
index 198ff34a..969eb871 100644
--- a/envpool/sokoban/sokoban_py_envpool_test.py
+++ b/envpool/sokoban/sokoban_py_envpool_test.py
@@ -25,7 +25,8 @@
 import envpool  # noqa: F401
 import envpool.sokoban.registration
 from envpool.sokoban.sokoban_envpool import _SokobanEnvSpec
-
+from pathlib import Path
+from typing import List
 
 def test_config() -> None:
   ref_config_keys = [
@@ -261,6 +262,58 @@ def test_solved_level_does_not_truncate(solve_on_time: bool):
   _, _, term, trunc, _ = env.step(make_1d_array(wrong_action))
   assert not term and not trunc, "Level should reset correctly"
 
+def read_levels_file(fpath: Path) -> List[List[str]]:
+    maps = []
+    current_map = []
+    with open(fpath, "r") as sf:
+        for line in sf.readlines():
+            if ";" in line and current_map:
+                maps.append(current_map)
+                current_map = []
+            if "#" == line[0]:
+                current_map.append(line.strip())
+
+    maps.append(current_map)
+    return maps
+
+def test_load_sequentially_with_multiple_envs() -> None:
+    levels_dir = "/app/envpool/sokoban/sample_levels"
+    files = glob.glob(f"{levels_dir}/*.txt")
+    levels_by_files = []
+    total_levels, num_envs = 8, 2
+    for file in sorted(files):
+        levels = read_levels_file(file)
+        levels_by_files.extend(levels)
+    assert len(levels_by_files) == total_levels, "8 levels stored in files."
+    
+    env = envpool.make(
+        "Sokoban-v0",
+        env_type="gymnasium",
+        num_envs=num_envs,
+        batch_size=num_envs,
+        max_episode_steps=60,
+        min_episode_steps=60,
+        levels_dir=levels_dir,
+        load_sequentially=True,
+        n_levels_to_load=total_levels,
+        verbose=2,
+    )
+    dim_room = env.spec.config.dim_room
+    printed_obs = []
+    for _ in range(total_levels // num_envs):
+        obs, _ = env.reset()
+        assert obs.shape == (
+            num_envs,
+            3,
+            dim_room,
+            dim_room,
+        ), f"obs shape: {obs.shape}"
+        for idx in range(num_envs):
+            printed_obs.append(print_obs(obs[idx]))
+    for i, level in enumerate(levels_by_files):
+        for j, line in enumerate(level):
+            assert printed_obs[i][j] == line, f"Level {i} is not loaded correctly."
+
 
 def test_astar_log(tmp_path) -> None:
   level_file_name = "/app/envpool/sokoban/sample_levels/small.txt"