From b4fc19d9919227c76a95a68b5d4e6106e95fa416 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= Date: Thu, 4 Jan 2024 22:23:49 -0800 Subject: [PATCH 01/60] Development dockerfile --- docker/dev.dockerfile | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/docker/dev.dockerfile b/docker/dev.dockerfile index 7226c879..3fd936ee 100644 --- a/docker/dev.dockerfile +++ b/docker/dev.dockerfile @@ -1,13 +1,13 @@ # Need docker >= 20.10.9, see https://stackoverflow.com/questions/71941032/why-i-cannot-run-apt-update-inside-a-fresh-ubuntu22-04 -FROM nvidia/cuda:12.2.2-cudnn8-devel-ubuntu22.04 +FROM nvidia/cuda:12.1.0-cudnn8-devel-ubuntu22.04 ARG DEBIAN_FRONTEND=noninteractive ARG HOME=/root ARG PATH=$PATH:$HOME/go/bin RUN apt-get update \ - && apt-get install -y python3-pip python3-dev golang-1.18 git wget curl zsh tmux vim \ + && apt-get install -y python3-pip python3-dev golang-1.18 git wget curl zsh tmux vim ssh \ && rm -rf /var/lib/apt/lists/* RUN ln -s /usr/bin/python3 /usr/bin/python RUN ln -sf /usr/lib/go-1.18/bin/go /usr/bin/go @@ -20,6 +20,8 @@ RUN echo "set-option -g default-shell /bin/zsh" >> .tmux.conf.local RUN echo "set-option -g history-limit 10000" >> .tmux.conf.local RUN echo "export PATH=$PATH:$HOME/go/bin" >> .zshrc +ENV USE_BAZEL_VERSION=6.4.0 + RUN go install github.com/bazelbuild/bazelisk@latest && ln -sf $HOME/go/bin/bazelisk $HOME/go/bin/bazel RUN go install github.com/bazelbuild/buildtools/buildifier@latest RUN $HOME/go/bin/bazel version @@ -31,3 +33,6 @@ RUN apt-get update \ && rm -rf /var/lib/apt/lists/* WORKDIR /app +COPY . . + +RUN make bazel-build From 954d7fbab260b56ee4d6a351353fb131f7465339 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= Date: Fri, 5 Jan 2024 14:20:28 -0800 Subject: [PATCH 02/60] Dummy Sokoban environment --- envpool/entry.py | 2 + envpool/sokoban/BUILD | 53 ++++++++++++++ envpool/sokoban/__init__.py | 17 +++++ envpool/sokoban/registration.py | 12 +++ envpool/sokoban/sokoban_envpool.cc | 12 +++ envpool/sokoban/sokoban_envpool.h | 81 +++++++++++++++++++++ envpool/sokoban/sokoban_py_envpool_test.py | 85 ++++++++++++++++++++++ 7 files changed, 262 insertions(+) create mode 100644 envpool/sokoban/BUILD create mode 100644 envpool/sokoban/__init__.py create mode 100644 envpool/sokoban/registration.py create mode 100644 envpool/sokoban/sokoban_envpool.cc create mode 100644 envpool/sokoban/sokoban_envpool.h create mode 100644 envpool/sokoban/sokoban_py_envpool_test.py diff --git a/envpool/entry.py b/envpool/entry.py index eed70a29..18a881a5 100644 --- a/envpool/entry.py +++ b/envpool/entry.py @@ -52,3 +52,5 @@ import envpool.vizdoom.registration # noqa: F401 except ImportError: pass + +import envpool.sokoban.registration # noqa: F401 diff --git a/envpool/sokoban/BUILD b/envpool/sokoban/BUILD new file mode 100644 index 00000000..f1425d59 --- /dev/null +++ b/envpool/sokoban/BUILD @@ -0,0 +1,53 @@ +load("@pip_requirements//:requirements.bzl", "requirement") +load("@pybind11_bazel//:build_defs.bzl", "pybind_extension") + +py_library( + name = "sokoban", + srcs = ["__init__.py"], + data = [":sokoban_envpool.so"], + deps = ["//envpool/python:api"], +) + +cc_library( + name = "sokoban_envpool_h", + hdrs = ["sokoban_envpool.h"], + deps = [ + "//envpool/core:async_envpool", + "//envpool/core:env", + "//envpool/core:env_spec", + ], +) + +# cc_test( +# name = "sokoban_envpool_test", +# size = "enormous", +# srcs = ["sokoban_envpool_test.cc"], +# deps = [ +# ":sokoban_envpool_h", +# "@com_google_googletest//:gtest_main", +# ], +# ) + +py_test( + name = "sokoban_py_envpool_test", + srcs = ["sokoban_py_envpool_test.py"], + deps = [ + ":sokoban", + requirement("numpy"), + requirement("absl-py"), + ], +) + +pybind_extension( + name = "sokoban_envpool", + srcs = [ + "sokoban_envpool.cc", + ], + linkopts = [ + "-ldl", + ], + deps = [ + ":sokoban_envpool_h", + "//envpool/core:py_envpool", + ], +) diff --git a/envpool/sokoban/__init__.py b/envpool/sokoban/__init__.py new file mode 100644 index 00000000..0e785494 --- /dev/null +++ b/envpool/sokoban/__init__.py @@ -0,0 +1,17 @@ +from envpool.python.api import py_env + +from .sokoban_envpool import _SokobanEnvPool, _SokobanEnvSpec + +( + SokobanEnvSpec, + SokobanDMEnvPool, + SokobanGymEnvPool, + SokobanGymnasiumEnvPool, +) = py_env(_SokobanEnvSpec, _SokobanEnvPool) + +__all__ = [ + "SokobanEnvSpec", + "SokobanDMEnvPool", + "SokobanGymEnvPool", + "SokobanGymnasiumEnvPool", +] diff --git a/envpool/sokoban/registration.py b/envpool/sokoban/registration.py new file mode 100644 index 00000000..490b1a34 --- /dev/null +++ b/envpool/sokoban/registration.py @@ -0,0 +1,12 @@ +from envpool.registration import register + +register( + task_id="Sokoban-v0", + import_path="envpool.sokoban", + spec_cls="SokobanEnvSpec", + dm_cls="SokobanDMEnvPool", + gym_cls="SokobanGymEnvPool", + gymnasium_cls="SokobanGymnasiumEnvPool", + max_episode_steps=60, + reward_step=-0.1, +) diff --git a/envpool/sokoban/sokoban_envpool.cc b/envpool/sokoban/sokoban_envpool.cc new file mode 100644 index 00000000..07b433ee --- /dev/null +++ b/envpool/sokoban/sokoban_envpool.cc @@ -0,0 +1,12 @@ +#include "envpool/sokoban/sokoban_envpool.h" +#include "envpool/core/py_envpool.h" + +// generate python-side (raw) SokobanEnvSpec +using SokobanEnvSpec = PyEnvSpec; +// generate python-side (raw) SokobanEnvPool +using SokobanEnvPool = PyEnvPool; + +// generate sokoban_envpool.so +PYBIND11_MODULE(sokoban_envpool, m) { + REGISTER(m, SokobanEnvSpec, SokobanEnvPool) +} diff --git a/envpool/sokoban/sokoban_envpool.h b/envpool/sokoban/sokoban_envpool.h new file mode 100644 index 00000000..46d33945 --- /dev/null +++ b/envpool/sokoban/sokoban_envpool.h @@ -0,0 +1,81 @@ +#ifndef ENVPOOL_SOKOBAN_H_ +#define ENVPOOL_SOKOBAN_H_ + +#include "envpool/core/async_envpool.h" +#include "envpool/core/env.h" + +namespace sokoban { + +// class BaseSokobanEnvConfig(EnvConfig): +// tinyworld_obs: bool = False +// tinyworld_render: bool = False +// max_episode_steps: int = 120 # default value from gym_sokoban +// terminate_on_first_box: bool = False + +// reward_finished: float = 10.0 # Reward for completing a level +// reward_box: float = 1.0 # Reward for putting a box on target +// reward_step: float = -0.1 # Reward for completing a step +// +// class BoxobanConfig(BaseSokobanEnvConfig): + + // cache_path: Path = Path(__file__).parent.parent / ".sokoban_cache" + // split: Literal["train", "valid", "test", None] = "train" + // difficulty: Literal["unfiltered", "medium", "hard"] = "unfiltered" + +class SokobanEnvFns { + public: + static decltype(auto) DefaultConfig() { + return MakeDict("reward_finished"_.Bind(10.0f), + "reward_box"_.Bind(1.0f), + "reward_step"_.Bind(-0.1f), + "dim_room"_.Bind(10), + "levels_dir"_.Bind(std::string("None"))); + } + template + static decltype(auto) StateSpec(const Config& conf) { + int dim_room = conf["dim_room"_]; + return MakeDict("obs"_.Bind(Spec({3, dim_room, dim_room}))); + } + template + static decltype(auto) ActionSpec(const Config& conf) { + return MakeDict("action"_.Bind(Spec({-1}, {0, 8}))); + } +}; + +// this line will concat common config and common state/action spec +using SokobanEnvSpec = EnvSpec; + +class SokobanEnv : public Env { + public: + SokobanEnv(const Spec& spec, int env_id) : Env(spec, env_id), max_episode_steps{spec.config["max_episode_steps"_]}, + dim_room{static_cast(spec.config["dim_room"_])}, + reward_finished{static_cast(spec.config["reward_finished"_])}, + reward_box{static_cast(spec.config["reward_box"_])}, + reward_step{static_cast(spec.config["reward_step"_])}, + levels_dir{static_cast(spec.config["levels_dir"_])} + {} + + bool IsDone () override { return done_; } + void Reset() override { + + } + void Step(const Action &action) override { + static std::vector zero_state(3*dim_room*dim_room); + + State state = Allocate(); + state["obs"_].Assign(zero_state.data(), zero_state.size()); + state["reward"_] = reward_step; + + } + + private: + bool done_{true}; + int max_episode_steps, dim_room; + float reward_finished, reward_box, reward_step; + std::string levels_dir; +}; + +using SokobanEnvPool = AsyncEnvPool; +} + +#endif // ENVPOOL_SOKOBAN_H_ diff --git a/envpool/sokoban/sokoban_py_envpool_test.py b/envpool/sokoban/sokoban_py_envpool_test.py new file mode 100644 index 00000000..975ee3ce --- /dev/null +++ b/envpool/sokoban/sokoban_py_envpool_test.py @@ -0,0 +1,85 @@ +"""Unit test for dummy envpool and speed benchmark.""" + +import os +import time + +import numpy as np +from absl import logging +from absl.testing import absltest +from envpool.sokoban.sokoban_envpool import _SokobanEnvPool, _SokobanEnvSpec + + +class _SokobanEnvPoolTest(absltest.TestCase): + def test_config(self) -> None: + ref_config_keys = [ + "num_envs", + "batch_size", + "num_threads", + "max_num_players", + "thread_affinity_offset", + "base_path", + "seed", + "gym_reset_return_info", + "state_num", + "action_num", + "max_episode_steps", + ] + default_conf = _SokobanEnvSpec._default_config_values + self.assertTrue(isinstance(default_conf, tuple)) + config_keys = _SokobanEnvSpec._config_keys + self.assertTrue(isinstance(config_keys, list)) + self.assertEqual(len(default_conf), len(config_keys)) + self.assertEqual(sorted(config_keys), sorted(ref_config_keys)) + + def test_spec(self) -> None: + conf = _SokobanEnvSpec._default_config_values + env_spec = _SokobanEnvSpec(conf) + state_spec = env_spec._state_spec + action_spec = env_spec._action_spec + state_keys = env_spec._state_keys + action_keys = env_spec._action_keys + self.assertTrue(isinstance(state_spec, tuple)) + self.assertTrue(isinstance(action_spec, tuple)) + state_spec = dict(zip(state_keys, state_spec)) + action_spec = dict(zip(action_keys, action_spec)) + # default value of state_num is 10 + self.assertEqual(state_spec["obs:raw"][1][-1], 10) + self.assertEqual(state_spec["obs:dyn"][1][1][-1], 10) + # change conf and see if it can successfully change state_spec + # directly send dict or expose config as dict? + conf = dict(zip(_SokobanEnvSpec._config_keys, conf)) + conf["state_num"] = 666 + env_spec = _SokobanEnvSpec(tuple(conf.values())) + state_spec = dict(zip(state_keys, env_spec._state_spec)) + self.assertEqual(state_spec["obs:raw"][1][-1], 666) + + def test_envpool(self) -> None: + conf = dict( + zip(_SokobanEnvSpec._config_keys, _SokobanEnvSpec._default_config_values) + ) + conf["num_envs"] = num_envs = 100 + conf["batch_size"] = batch = 31 + conf["num_threads"] = 10 + env_spec = _SokobanEnvSpec(tuple(conf.values())) + env = _SokobanEnvPool(env_spec) + state_keys = env._state_keys + total = 1000 + env._reset(np.arange(num_envs, dtype=np.int32)) + t = time.time() + for _ in range(total): + state = dict(zip(state_keys, env._recv())) + action = { + "env_id": state["info:env_id"], + "players.env_id": state["info:players.env_id"], + "list_action": np.zeros((batch, 6), dtype=np.float64), + "players.id": state["info:players.id"], + "players.action": state["info:players.id"], + } + env._send(tuple(action.values())) + duration = time.time() - t + fps = total * batch / duration + logging.info(f"FPS = {fps:.6f}") + + +if __name__ == "__main__": + absltest.main() From 09eff519f5830fafb5a2291d07b378b87192b421 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= Date: Fri, 5 Jan 2024 14:33:58 -0800 Subject: [PATCH 03/60] Fixed some of the tests --- envpool/sokoban/sokoban_py_envpool_test.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/envpool/sokoban/sokoban_py_envpool_test.py b/envpool/sokoban/sokoban_py_envpool_test.py index 975ee3ce..bcaecb85 100644 --- a/envpool/sokoban/sokoban_py_envpool_test.py +++ b/envpool/sokoban/sokoban_py_envpool_test.py @@ -13,15 +13,18 @@ class _SokobanEnvPoolTest(absltest.TestCase): def test_config(self) -> None: ref_config_keys = [ "num_envs", + "base_path", "batch_size", + "levels_dir", + "dim_room", "num_threads", "max_num_players", "thread_affinity_offset", - "base_path", "seed", "gym_reset_return_info", - "state_num", - "action_num", + "reward_box", + "reward_step", + "reward_finished", "max_episode_steps", ] default_conf = _SokobanEnvSpec._default_config_values @@ -54,6 +57,7 @@ def test_spec(self) -> None: self.assertEqual(state_spec["obs:raw"][1][-1], 666) def test_envpool(self) -> None: + return conf = dict( zip(_SokobanEnvSpec._config_keys, _SokobanEnvSpec._default_config_values) ) @@ -80,6 +84,17 @@ def test_envpool(self) -> None: fps = total * batch / duration logging.info(f"FPS = {fps:.6f}") + def test_xla(self) -> None: + conf = dict( + zip(_SokobanEnvSpec._config_keys, _SokobanEnvSpec._default_config_values) + ) + conf["num_envs"] = 100 + conf["batch_size"] = 31 + conf["num_threads"] = os.cpu_count() + env_spec = _SokobanEnvSpec(tuple(conf.values())) + env = _SokobanEnvPool(env_spec) + _ = env._xla() + if __name__ == "__main__": absltest.main() From 58e20b5752edf55213c7e9b9d2d69f6465bf67cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= Date: Sat, 6 Jan 2024 21:33:32 -0800 Subject: [PATCH 04/60] Fixed more of the tests --- envpool/sokoban/sokoban_envpool.h | 5 +++ envpool/sokoban/sokoban_py_envpool_test.py | 51 +++++++--------------- 2 files changed, 21 insertions(+), 35 deletions(-) diff --git a/envpool/sokoban/sokoban_envpool.h b/envpool/sokoban/sokoban_envpool.h index 46d33945..5d4d4e17 100644 --- a/envpool/sokoban/sokoban_envpool.h +++ b/envpool/sokoban/sokoban_envpool.h @@ -57,6 +57,11 @@ class SokobanEnv : public Env { bool IsDone () override { return done_; } void Reset() override { + static std::vector zero_state(3*dim_room*dim_room); + + State state = Allocate(); + state["obs"_].Assign(zero_state.data(), zero_state.size()); + state["reward"_] = reward_step; } void Step(const Action &action) override { diff --git a/envpool/sokoban/sokoban_py_envpool_test.py b/envpool/sokoban/sokoban_py_envpool_test.py index bcaecb85..de0d6f70 100644 --- a/envpool/sokoban/sokoban_py_envpool_test.py +++ b/envpool/sokoban/sokoban_py_envpool_test.py @@ -12,20 +12,23 @@ class _SokobanEnvPoolTest(absltest.TestCase): def test_config(self) -> None: ref_config_keys = [ - "num_envs", + # Default environment keys "base_path", "batch_size", - "levels_dir", - "dim_room", - "num_threads", + "gym_reset_return_info", "max_num_players", - "thread_affinity_offset", + "num_envs", + "num_threads", "seed", - "gym_reset_return_info", + "thread_affinity_offset", + # Default and also used by sokoban + "max_episode_steps", + # defined by sokoban + "dim_room", + "levels_dir", "reward_box", - "reward_step", "reward_finished", - "max_episode_steps", + "reward_step", ] default_conf = _SokobanEnvSpec._default_config_values self.assertTrue(isinstance(default_conf, tuple)) @@ -34,41 +37,19 @@ def test_config(self) -> None: self.assertEqual(len(default_conf), len(config_keys)) self.assertEqual(sorted(config_keys), sorted(ref_config_keys)) - def test_spec(self) -> None: - conf = _SokobanEnvSpec._default_config_values - env_spec = _SokobanEnvSpec(conf) - state_spec = env_spec._state_spec - action_spec = env_spec._action_spec - state_keys = env_spec._state_keys - action_keys = env_spec._action_keys - self.assertTrue(isinstance(state_spec, tuple)) - self.assertTrue(isinstance(action_spec, tuple)) - state_spec = dict(zip(state_keys, state_spec)) - action_spec = dict(zip(action_keys, action_spec)) - # default value of state_num is 10 - self.assertEqual(state_spec["obs:raw"][1][-1], 10) - self.assertEqual(state_spec["obs:dyn"][1][1][-1], 10) - # change conf and see if it can successfully change state_spec - # directly send dict or expose config as dict? - conf = dict(zip(_SokobanEnvSpec._config_keys, conf)) - conf["state_num"] = 666 - env_spec = _SokobanEnvSpec(tuple(conf.values())) - state_spec = dict(zip(state_keys, env_spec._state_spec)) - self.assertEqual(state_spec["obs:raw"][1][-1], 666) - def test_envpool(self) -> None: - return conf = dict( zip(_SokobanEnvSpec._config_keys, _SokobanEnvSpec._default_config_values) ) - conf["num_envs"] = num_envs = 100 - conf["batch_size"] = batch = 31 + conf["num_envs"] = num_envs = 200 + conf["batch_size"] = batch = 100 conf["num_threads"] = 10 env_spec = _SokobanEnvSpec(tuple(conf.values())) env = _SokobanEnvPool(env_spec) state_keys = env._state_keys - total = 1000 + total = 1 env._reset(np.arange(num_envs, dtype=np.int32)) + raise ValueError("resetted") t = time.time() for _ in range(total): state = dict(zip(state_keys, env._recv())) @@ -79,7 +60,7 @@ def test_envpool(self) -> None: "players.id": state["info:players.id"], "players.action": state["info:players.id"], } - env._send(tuple(action.values())) + # env._send(tuple(action.values())) duration = time.time() - t fps = total * batch / duration logging.info(f"FPS = {fps:.6f}") From 57cfac1a2dd717bf0b7b20708ecd8ff5588c42e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= Date: Sun, 7 Jan 2024 21:25:26 -0800 Subject: [PATCH 05/60] Don't build all the other environments --- envpool/BUILD | 19 ++----------------- envpool/sokoban/BUILD | 13 ++++++++++++- 2 files changed, 14 insertions(+), 18 deletions(-) diff --git a/envpool/BUILD b/envpool/BUILD index 93a7fc6d..b1a7a0dc 100644 --- a/envpool/BUILD +++ b/envpool/BUILD @@ -30,14 +30,7 @@ py_library( name = "entry", srcs = ["entry.py"], deps = [ - "//envpool/atari:atari_registration", - "//envpool/box2d:box2d_registration", - "//envpool/classic_control:classic_control_registration", - "//envpool/mujoco:mujoco_dmc_registration", - "//envpool/mujoco:mujoco_gym_registration", - "//envpool/procgen:procgen_registration", - "//envpool/toy_text:toy_text_registration", - "//envpool/vizdoom:vizdoom_registration", + "//envpool/sokoban:registration", ], ) @@ -47,15 +40,7 @@ py_library( deps = [ ":entry", ":registration", - "//envpool/atari", - "//envpool/box2d", - "//envpool/classic_control", - "//envpool/mujoco:mujoco_dmc", - "//envpool/mujoco:mujoco_gym", - "//envpool/procgen", - "//envpool/python", - "//envpool/toy_text", - "//envpool/vizdoom", + "//envpool/sokoban", ], ) diff --git a/envpool/sokoban/BUILD b/envpool/sokoban/BUILD index f1425d59..a5131945 100644 --- a/envpool/sokoban/BUILD +++ b/envpool/sokoban/BUILD @@ -1,6 +1,8 @@ load("@pip_requirements//:requirements.bzl", "requirement") load("@pybind11_bazel//:build_defs.bzl", "pybind_extension") +package(default_visibility = ["//visibility:public"]) + py_library( name = "sokoban", srcs = ["__init__.py"], @@ -8,6 +10,14 @@ py_library( deps = ["//envpool/python:api"], ) +py_library( + name = "registration", + srcs = ["registration.py"], + deps = [ + "//envpool:registration", + ], +) + cc_library( name = "sokoban_envpool_h", hdrs = ["sokoban_envpool.h"], @@ -29,7 +39,8 @@ cc_library( # ) py_test( - name = "sokoban_py_envpool_test", + name = "test", + main = "sokoban_py_envpool_test.py", srcs = ["sokoban_py_envpool_test.py"], deps = [ ":sokoban", From 4db8a6100db667a8ac35b42cedc40f76bd7656c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= Date: Tue, 9 Jan 2024 09:12:56 -0800 Subject: [PATCH 06/60] Basic sokoban --- docker/dev.dockerfile | 8 -- envpool/sokoban/BUILD | 3 +- envpool/sokoban/level_loader.cc | 116 +++++++++++++++++++++++++++++ envpool/sokoban/level_loader.h | 31 ++++++++ envpool/sokoban/sokoban_envpool.cc | 41 ++++++++++ envpool/sokoban/sokoban_envpool.h | 33 ++++---- 6 files changed, 206 insertions(+), 26 deletions(-) create mode 100644 envpool/sokoban/level_loader.cc create mode 100644 envpool/sokoban/level_loader.h diff --git a/docker/dev.dockerfile b/docker/dev.dockerfile index 3fd936ee..79507580 100644 --- a/docker/dev.dockerfile +++ b/docker/dev.dockerfile @@ -26,13 +26,5 @@ RUN go install github.com/bazelbuild/bazelisk@latest && ln -sf $HOME/go/bin/baze RUN go install github.com/bazelbuild/buildtools/buildifier@latest RUN $HOME/go/bin/bazel version -RUN useradd -ms /bin/zsh github-action - -RUN apt-get update \ - && apt-get install -y clang-format clang-tidy swig qtdeclarative5-dev \ - && rm -rf /var/lib/apt/lists/* - WORKDIR /app COPY . . - -RUN make bazel-build diff --git a/envpool/sokoban/BUILD b/envpool/sokoban/BUILD index a5131945..f4719c8f 100644 --- a/envpool/sokoban/BUILD +++ b/envpool/sokoban/BUILD @@ -20,7 +20,7 @@ py_library( cc_library( name = "sokoban_envpool_h", - hdrs = ["sokoban_envpool.h"], + hdrs = ["sokoban_envpool.h", "level_loader.h"], deps = [ "//envpool/core:async_envpool", "//envpool/core:env", @@ -53,6 +53,7 @@ pybind_extension( name = "sokoban_envpool", srcs = [ "sokoban_envpool.cc", + "level_loader.cc", ], linkopts = [ "-ldl", diff --git a/envpool/sokoban/level_loader.cc b/envpool/sokoban/level_loader.cc new file mode 100644 index 00000000..af77ff36 --- /dev/null +++ b/envpool/sokoban/level_loader.cc @@ -0,0 +1,116 @@ +#include "level_loader.h" + +#include +#include +#include +#include +#include + + +namespace sokoban { + +size_t ERROR_SZ = 1024; + +LevelLoader::LevelLoader(const std::filesystem::path& base_path) + : levels(0), cur_level(levels.begin()), level_file_paths(0) { + for (const auto& entry : std::filesystem::directory_iterator(base_path)) { + level_file_paths.push_back(entry.path()); + } +} + +void AddLine(SokobanLevel& level, const std::string& line) { + if ((line.at(0) != '#') || (*line.rend() != '#')) { + std::stringstream msg; + msg << "Line '" << line + << "' does not start and begin with '#', as it should." << std::endl; + throw std::runtime_error(msg.str()); + } + for (const char& r : line) { + switch (r) { + case '#': + level.push_back(WALL); + break; + case '@': + level.push_back(PLAYER); + break; + case '$': + level.push_back(BOX); + break; + case '.': + level.push_back(TARGET); + break; + case ' ': + level.push_back(SPACE); + break; + default: + std::stringstream msg; + msg << "Line '" << line << "'has character '" << r + << "' which is not in the valid set '#@$. '." << std::endl; + throw std::runtime_error(msg.str()); + break; + } + } +} + +void LevelLoader::LoadNewFile(std::mt19937& gen) { + std::uniform_int_distribution load_file_idx_r( + 0, level_file_paths.size() - 1); + size_t load_file_idx = load_file_idx_r(gen); + std::ifstream file(level_file_paths.at(load_file_idx)); + + levels.clear(); + std::string line; + while (std::getline(file, line)) { + if (line.at(0) == '#') { + SokobanLevel& cur_level = levels.emplace_back(0); + cur_level.reserve(15 * 15); + + // Count contiguous '#' characters and use this as the box dimension + size_t dim_room = 0; + for (const char& r : line) { + if (r == '#') { + dim_room++; + } + } + AddLine(cur_level, line); + + while (std::getline(file, line) && line.at(0) == '#') { + if (line.length() != dim_room) { + std::stringstream msg; + msg << "Irregular line '" << line + << "' does not match dim_room=" << dim_room << std::endl; + throw std::runtime_error(msg.str()); + } + AddLine(cur_level, line); + } + + if (cur_level.size() != dim_room * dim_room) { + std::stringstream msg; + msg << "Room is not square: " << cur_level.size() << " != " << dim_room + << "x" << dim_room << std::endl; + throw std::runtime_error(msg.str()); + } + } + } + std::shuffle(levels.begin(), levels.end(), gen); + if(levels.empty()) { + std::stringstream msg; + msg << "No levels loaded from file '" << level_file_paths.at(load_file_idx) << std::endl; + throw std::runtime_error(msg.str()); + } +} + +const std::vector::iterator LevelLoader::RandomLevel(std::mt19937& gen) { + if (cur_level == levels.end()) { + LoadNewFile(gen); + cur_level = levels.begin(); + if(cur_level == levels.end()) { + throw std::runtime_error("No levels loaded."); + } + } + auto out = cur_level; + cur_level++; + return out; +} + +} // namespace sokoban diff --git a/envpool/sokoban/level_loader.h b/envpool/sokoban/level_loader.h new file mode 100644 index 00000000..8bdc8a0a --- /dev/null +++ b/envpool/sokoban/level_loader.h @@ -0,0 +1,31 @@ +#ifndef LEVEL_LOADER_H_ +#define LEVEL_LOADER_H_ + +#include +#include +#include + +namespace sokoban { + +using SokobanLevel = std::vector; + +constexpr uint8_t WALL = 0; +constexpr uint8_t BOX = 4; +constexpr uint8_t PLAYER = 5; +constexpr uint8_t TARGET = 2; +constexpr uint8_t SPACE = 1; + +class LevelLoader { + protected: + std::vector levels; + std::vector::iterator cur_level; + std::vector level_file_paths; + void LoadNewFile(std::mt19937& gen); + + public: + const std::vector::iterator RandomLevel(std::mt19937& gen); + LevelLoader(const std::filesystem::path& base_path); +}; +} // namespace sokoban + +#endif // LEVEL_LOADER_H_ diff --git a/envpool/sokoban/sokoban_envpool.cc b/envpool/sokoban/sokoban_envpool.cc index 07b433ee..43b71b06 100644 --- a/envpool/sokoban/sokoban_envpool.cc +++ b/envpool/sokoban/sokoban_envpool.cc @@ -1,6 +1,47 @@ #include "envpool/sokoban/sokoban_envpool.h" + +#include +#include + #include "envpool/core/py_envpool.h" +namespace sokoban { + +void SokobanEnv::Reset() { + // + internal_state_ = *level_loader.RandomLevel(gen_); + State state = Allocate(); + _reward = 0.0f; + + WriteState(); +} +void SokobanEnv::Step(const Action& action) { + _reward = reward_step; + // todo actual state transition + + WriteState(); +} + +void SokobanEnv::WriteState() { + State state = Allocate(); + state["reward"_] = _reward; + Array& obs = state["obs"_]; + if (obs.size != 3 * internal_state_.size()) { + std::stringstream msg; + msg << "Obs size and level size are different: obs_size=" << obs.size + << "/3, level_size=" << internal_state_.size() + << ", dim_room=" << dim_room << std::endl; + throw std::runtime_error(msg.str()); + } + + // TODO: actually color the image + for (int i = 0; i < 3; i++) { + obs(i).Assign(internal_state_.data(), internal_state_.size()); + } +} + +} // namespace sokoban + // generate python-side (raw) SokobanEnvSpec using SokobanEnvSpec = PyEnvSpec; // generate python-side (raw) SokobanEnvPool diff --git a/envpool/sokoban/sokoban_envpool.h b/envpool/sokoban/sokoban_envpool.h index 5d4d4e17..4082f73b 100644 --- a/envpool/sokoban/sokoban_envpool.h +++ b/envpool/sokoban/sokoban_envpool.h @@ -1,8 +1,13 @@ #ifndef ENVPOOL_SOKOBAN_H_ #define ENVPOOL_SOKOBAN_H_ +#include + #include "envpool/core/async_envpool.h" #include "envpool/core/env.h" +#include "envpool/core/array.h" + +#include "level_loader.h" namespace sokoban { @@ -52,32 +57,26 @@ class SokobanEnv : public Env { reward_finished{static_cast(spec.config["reward_finished"_])}, reward_box{static_cast(spec.config["reward_box"_])}, reward_step{static_cast(spec.config["reward_step"_])}, - levels_dir{static_cast(spec.config["levels_dir"_])} + levels_dir{static_cast(spec.config["levels_dir"_])}, + level_loader(levels_dir), + internal_state_(WALL, static_cast(dim_room*dim_room)) {} bool IsDone () override { return done_; } - void Reset() override { - static std::vector zero_state(3*dim_room*dim_room); - - State state = Allocate(); - state["obs"_].Assign(zero_state.data(), zero_state.size()); - state["reward"_] = reward_step; + void Reset() override; + void Step(const Action &action) override; - } - void Step(const Action &action) override { - static std::vector zero_state(3*dim_room*dim_room); - - State state = Allocate(); - state["obs"_].Assign(zero_state.data(), zero_state.size()); - state["reward"_] = reward_step; - - } + void WriteState(); private: bool done_{true}; int max_episode_steps, dim_room; float reward_finished, reward_box, reward_step; - std::string levels_dir; + std::filesystem::path levels_dir; + + LevelLoader level_loader; + SokobanLevel internal_state_; + float _reward; }; using SokobanEnvPool = AsyncEnvPool; From a9db23f852a782bb4c21e4df1efd2b1c7c307e09 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= Date: Tue, 9 Jan 2024 16:02:20 -0800 Subject: [PATCH 07/60] simpler dev docker with updated CUDA --- docker/dev.dockerfile | 25 +++++++++---------------- 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/docker/dev.dockerfile b/docker/dev.dockerfile index 79507580..9f5d33d3 100644 --- a/docker/dev.dockerfile +++ b/docker/dev.dockerfile @@ -1,29 +1,22 @@ -# Need docker >= 20.10.9, see https://stackoverflow.com/questions/71941032/why-i-cannot-run-apt-update-inside-a-fresh-ubuntu22-04 +FROM nvidia/cuda:12.1.1-cudnn8-devel-ubuntu22.04 -FROM nvidia/cuda:12.1.0-cudnn8-devel-ubuntu22.04 - -ARG DEBIAN_FRONTEND=noninteractive ARG HOME=/root -ARG PATH=$PATH:$HOME/go/bin + +ENV DEBIAN_FRONTEND=noninteractive +ENV PATH=$PATH:$HOME/go/bin RUN apt-get update \ - && apt-get install -y python3-pip python3-dev golang-1.18 git wget curl zsh tmux vim ssh \ + && apt-get install -y python3-pip python3-dev golang-1.18 git wget curl tmux vim ssh \ + && apt-get clean \ && rm -rf /var/lib/apt/lists/* RUN ln -s /usr/bin/python3 /usr/bin/python RUN ln -sf /usr/lib/go-1.18/bin/go /usr/bin/go -RUN sh -c "$(curl -fsSL https://raw.githubusercontent.com/ohmyzsh/ohmyzsh/master/tools/install.sh)" -WORKDIR $HOME -RUN git clone https://github.com/gpakosz/.tmux.git -RUN ln -s -f .tmux/.tmux.conf -RUN cp .tmux/.tmux.conf.local . -RUN echo "set-option -g default-shell /bin/zsh" >> .tmux.conf.local -RUN echo "set-option -g history-limit 10000" >> .tmux.conf.local -RUN echo "export PATH=$PATH:$HOME/go/bin" >> .zshrc - -ENV USE_BAZEL_VERSION=6.4.0 +# Install Bazel RUN go install github.com/bazelbuild/bazelisk@latest && ln -sf $HOME/go/bin/bazelisk $HOME/go/bin/bazel RUN go install github.com/bazelbuild/buildtools/buildifier@latest + +ARG USE_BAZEL_VERSION=6.4.0 RUN $HOME/go/bin/bazel version WORKDIR /app From 7eb159d49699700d7c4d50295d68d6674d2e5139 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= Date: Wed, 10 Jan 2024 15:16:00 -0800 Subject: [PATCH 08/60] Implemented env --- envpool/sokoban/level_loader.cc | 2 +- envpool/sokoban/level_loader.h | 6 +- envpool/sokoban/sokoban_envpool.cc | 133 +++++++++++++++++++++++++---- envpool/sokoban/sokoban_envpool.h | 107 ++++++++++++----------- 4 files changed, 179 insertions(+), 69 deletions(-) diff --git a/envpool/sokoban/level_loader.cc b/envpool/sokoban/level_loader.cc index af77ff36..29f8aff9 100644 --- a/envpool/sokoban/level_loader.cc +++ b/envpool/sokoban/level_loader.cc @@ -40,7 +40,7 @@ void AddLine(SokobanLevel& level, const std::string& line) { level.push_back(TARGET); break; case ' ': - level.push_back(SPACE); + level.push_back(EMPTY); break; default: std::stringstream msg; diff --git a/envpool/sokoban/level_loader.h b/envpool/sokoban/level_loader.h index 8bdc8a0a..ebe37f78 100644 --- a/envpool/sokoban/level_loader.h +++ b/envpool/sokoban/level_loader.h @@ -10,10 +10,12 @@ namespace sokoban { using SokobanLevel = std::vector; constexpr uint8_t WALL = 0; +constexpr uint8_t EMPTY = 1; +constexpr uint8_t TARGET = 2; +constexpr uint8_t BOX_ON_TARGET = 3; constexpr uint8_t BOX = 4; constexpr uint8_t PLAYER = 5; -constexpr uint8_t TARGET = 2; -constexpr uint8_t SPACE = 1; +constexpr uint8_t PLAYER_ON_TARGET = 6; class LevelLoader { protected: diff --git a/envpool/sokoban/sokoban_envpool.cc b/envpool/sokoban/sokoban_envpool.cc index 43b71b06..5e58d269 100644 --- a/envpool/sokoban/sokoban_envpool.cc +++ b/envpool/sokoban/sokoban_envpool.cc @@ -1,5 +1,6 @@ #include "envpool/sokoban/sokoban_envpool.h" +#include #include #include @@ -8,36 +9,136 @@ namespace sokoban { void SokobanEnv::Reset() { - // - internal_state_ = *level_loader.RandomLevel(gen_); - State state = Allocate(); - _reward = 0.0f; + world = *level_loader.RandomLevel(gen_); + if (world.size() != dim_room * dim_room) { + std::stringstream msg; + msg << "Loaded level is not dim_room x dim_room. world.size()=" + << world.size() << ", dim_room=" << dim_room << std::endl; + throw std::runtime_error(msg.str()); + } + unmatched_boxes = 0; + for (int x = 0; x < dim_room; x++) { + for (int y = 0; y < dim_room; y++) { + switch (WorldAt(x, y)) { + case PLAYER: + player_x = x; + player_y = y; + break; + case BOX: + unmatched_boxes++; + break; + } - WriteState(); + WriteState(0.0f); + } + } } + +constexpr std ::array, 4> CHANGE_COORDINATES = { + {-1, 0}, {1, 0}, {0, -1}, {0, 1}}; + void SokobanEnv::Step(const Action& action) { - _reward = reward_step; - // todo actual state transition + if (action == ACT_NOOP) { + WriteState(reward_step); + return; + } + // From here on, assume the agent will try to move + + const int change_coordinates_idx = (action - 1) % CHANGE_COORDINATES.size(); + const int delta_x = CHANGE_COORDINATES.at(change_coordinates_idx).at(0); + const int delta_y = CHANGE_COORDINATES.at(change_coordinates_idx).at(1); + + const int prev_unmatched_boxes = unmatched_boxes; - WriteState(); + // Arena: the things that will change if the agent moves + std::array arena; + for (size_t i = 0; i < arena.size(); i++) { + arena.at(i) = WorldAt(player_x + delta_x * i, player_y + delta_y * i); + } + + // The box will move IFF action is a pushing action AND there's a box AND it + // has space to move + const bool box_moves = + ((action <= ACT_PUSH_RIGHT) && + ((arena.at(1) == BOX) || (arena.at(1) == BOX_ON_TARGET)) && + ((arena.at(1) == EMPTY) || (arena.at(2) == TARGET))); + + // The agent will move if the next arena location is possible to move into, or + // if it's a box and the box moves + const bool is_a_box_and_the_box_moves = box_moves; + const bool agent_moves = (arena.at(1) == EMPTY) || (arena.at(1) == TARGET) || + is_a_box_and_the_box_moves; + + if (agent_moves) { + // `is_target` is boolean but we'll need it as an int later + std::array is_target; + for (size_t i = 0; i < arena.size(); i++) { + uint8_t tile = arena.at(i); + is_target.at(i) = + (tile == BOX_ON_TARGET || tile == TARGET || tile == PLAYER_ON_TARGET); + } + // only whatever was on the floor is now at position 0 + arena.at(0) = is_target.at(0) ? TARGET : EMPTY; + // the player now occupies position 1 + arena.at(1) = is_target.at(1) ? PLAYER_ON_TARGET : PLAYER; + + if (box_moves) { + // the box moves for sure. A target at 2 reduces the nubmer of unmatched + // boxes (because the box goes there), a target at 1 increases it (the box + // leaves from there). Both can be equal to 1 and in that case the number + // stays the same. + unmatched_boxes += is_target.at(1) - is_target.at(2); + + // A box now occupies position 2 + arena.at(2) = is_target.at(2) ? BOX_ON_TARGET : BOX; + } + + player_x += delta_x; + player_y += delta_y; + for (size_t i = 0; i < arena.size(); i++) { + WorldAssignAt(player_x + delta_x * i, player_y + delta_y * i, + arena.at(i)); + } + } + + const float reward = + reward_step + + reward_box * static_cast(prev_unmatched_boxes - unmatched_boxes) + + (IsDone() ? reward_finished : 0.0f); + WriteState(reward); } -void SokobanEnv::WriteState() { +constexpr std::array, PLAYER_ON_TARGET + 1> TINY_COLORS = + { + {0, 0, 0}, // WALL + {243, 248, 238}, // EMPTY + {254, 126, 125}, // TARGET + {254, 95, 56}, // BOX_ON_TARGET + {142, 121, 56}, // BOX + {160, 212, 56}, // PLAYER + {219, 212, 56} // PLAYER_ON_TARGET +}; + +void SokobanEnv::WriteState(float reward) { State state = Allocate(); - state["reward"_] = _reward; + state["reward"_] = reward; Array& obs = state["obs"_]; - if (obs.size != 3 * internal_state_.size()) { + if (obs.size != 3 * world.size()) { std::stringstream msg; msg << "Obs size and level size are different: obs_size=" << obs.size - << "/3, level_size=" << internal_state_.size() - << ", dim_room=" << dim_room << std::endl; + << "/3, level_size=" << world.size() << ", dim_room=" << dim_room + << std::endl; throw std::runtime_error(msg.str()); } - // TODO: actually color the image - for (int i = 0; i < 3; i++) { - obs(i).Assign(internal_state_.data(), internal_state_.size()); + std::array out; + for (int rgb = 0; rgb < 3; rgb++) { + for (size_t i = 0; i < world.size(); i++) { + out.at(rgb * (dim_room * dim_room) + i) = + TINY_COLORS.at(world.at(i)).at(rgb); + } } + obs.Assign(out.data(), out.size()); } } // namespace sokoban diff --git a/envpool/sokoban/sokoban_envpool.h b/envpool/sokoban/sokoban_envpool.h index 4082f73b..6b392027 100644 --- a/envpool/sokoban/sokoban_envpool.h +++ b/envpool/sokoban/sokoban_envpool.h @@ -3,37 +3,29 @@ #include +#include "envpool/core/array.h" #include "envpool/core/async_envpool.h" #include "envpool/core/env.h" -#include "envpool/core/array.h" - #include "level_loader.h" namespace sokoban { -// class BaseSokobanEnvConfig(EnvConfig): -// tinyworld_obs: bool = False -// tinyworld_render: bool = False -// max_episode_steps: int = 120 # default value from gym_sokoban -// terminate_on_first_box: bool = False - -// reward_finished: float = 10.0 # Reward for completing a level -// reward_box: float = 1.0 # Reward for putting a box on target -// reward_step: float = -0.1 # Reward for completing a step -// -// class BoxobanConfig(BaseSokobanEnvConfig): - - // cache_path: Path = Path(__file__).parent.parent / ".sokoban_cache" - // split: Literal["train", "valid", "test", None] = "train" - // difficulty: Literal["unfiltered", "medium", "hard"] = "unfiltered" +constexpr int ACT_NOOP = 0; +constexpr int ACT_PUSH_UP = 1; +constexpr int ACT_PUSH_DOWN = 2; +constexpr int ACT_PUSH_LEFT = 3; +constexpr int ACT_PUSH_RIGHT = 4; +constexpr int ACT_MOVE_UP = 5; +constexpr int ACT_MOVE_DOWN = 6; +constexpr int ACT_MOVE_LEFT = 7; +constexpr int ACT_MOVE_RIGHT = 8; +constexpr int MAX_ACTION = ACT_MOVE_RIGHT; class SokobanEnvFns { public: static decltype(auto) DefaultConfig() { - return MakeDict("reward_finished"_.Bind(10.0f), - "reward_box"_.Bind(1.0f), - "reward_step"_.Bind(-0.1f), - "dim_room"_.Bind(10), + return MakeDict("reward_finished"_.Bind(10.0f), "reward_box"_.Bind(1.0f), + "reward_step"_.Bind(-0.1f), "dim_room"_.Bind(10), "levels_dir"_.Bind(std::string("None"))); } template @@ -43,7 +35,7 @@ class SokobanEnvFns { } template static decltype(auto) ActionSpec(const Config& conf) { - return MakeDict("action"_.Bind(Spec({-1}, {0, 8}))); + return MakeDict("action"_.Bind(Spec({-1}, {0, MAX_ACTION}))); } }; @@ -51,35 +43,50 @@ class SokobanEnvFns { using SokobanEnvSpec = EnvSpec; class SokobanEnv : public Env { - public: - SokobanEnv(const Spec& spec, int env_id) : Env(spec, env_id), max_episode_steps{spec.config["max_episode_steps"_]}, - dim_room{static_cast(spec.config["dim_room"_])}, - reward_finished{static_cast(spec.config["reward_finished"_])}, - reward_box{static_cast(spec.config["reward_box"_])}, - reward_step{static_cast(spec.config["reward_step"_])}, - levels_dir{static_cast(spec.config["levels_dir"_])}, - level_loader(levels_dir), - internal_state_(WALL, static_cast(dim_room*dim_room)) - {} - - bool IsDone () override { return done_; } - void Reset() override; - void Step(const Action &action) override; - - void WriteState(); - - private: - bool done_{true}; - int max_episode_steps, dim_room; - float reward_finished, reward_box, reward_step; - std::filesystem::path levels_dir; - - LevelLoader level_loader; - SokobanLevel internal_state_; - float _reward; + public: + SokobanEnv(const Spec& spec, int env_id) + : Env(spec, env_id), + max_episode_steps{spec.config["max_episode_steps"_]}, + dim_room{static_cast(spec.config["dim_room"_])}, + reward_finished{static_cast(spec.config["reward_finished"_])}, + reward_box{static_cast(spec.config["reward_box"_])}, + reward_step{static_cast(spec.config["reward_step"_])}, + levels_dir{static_cast(spec.config["levels_dir"_])}, + level_loader(levels_dir), + world(WALL, static_cast(dim_room * dim_room)) {} + + bool IsDone() override { return unmatched_boxes == 0; } + void Reset() override; + void Step(const Action& action) override; + + void WriteState(float reward); + + private: + int max_episode_steps, dim_room; + float reward_finished, reward_box, reward_step; + std::filesystem::path levels_dir; + + LevelLoader level_loader; + SokobanLevel world; + + int player_x{0}, player_y{0}; + int unmatched_boxes{0}; + + uint8_t WorldAt(int x, int y) { + if ((x < 0) || (x > dim_room) || (y < 0) || (y > dim_room)) { + return WALL; + } + return world.at(x + y * dim_room); + } + void WorldAssignAt(int x, int y, uint8_t value) { + if ((x < 0) || (x > dim_room) || (y < 0) || (y > dim_room)) { + return; + } + world.at(x + y * dim_room) = value; + } }; using SokobanEnvPool = AsyncEnvPool; -} +} // namespace sokoban -#endif // ENVPOOL_SOKOBAN_H_ +#endif // ENVPOOL_SOKOBAN_H_ From 621de30aa1bb11084af0ecdebeeba05456b1c414 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= Date: Thu, 11 Jan 2024 15:09:25 -0800 Subject: [PATCH 09/60] Debugged env --- envpool/sokoban/BUILD | 2 + envpool/sokoban/level_loader.cc | 70 ++++++++++++++++------ envpool/sokoban/level_loader.h | 7 ++- envpool/sokoban/sokoban_envpool.cc | 31 +++++++--- envpool/sokoban/sokoban_envpool.h | 29 ++++----- envpool/sokoban/sokoban_py_envpool_test.py | 62 +++++++++---------- 6 files changed, 127 insertions(+), 74 deletions(-) diff --git a/envpool/sokoban/BUILD b/envpool/sokoban/BUILD index f4719c8f..eac4b98b 100644 --- a/envpool/sokoban/BUILD +++ b/envpool/sokoban/BUILD @@ -44,6 +44,8 @@ py_test( srcs = ["sokoban_py_envpool_test.py"], deps = [ ":sokoban", + ":registration", + "//envpool:envpool", requirement("numpy"), requirement("absl-py"), ], diff --git a/envpool/sokoban/level_loader.cc b/envpool/sokoban/level_loader.cc index 29f8aff9..526fdc02 100644 --- a/envpool/sokoban/level_loader.cc +++ b/envpool/sokoban/level_loader.cc @@ -1,28 +1,33 @@ #include "level_loader.h" +#include #include #include +#include +#include #include #include -#include - namespace sokoban { size_t ERROR_SZ = 1024; -LevelLoader::LevelLoader(const std::filesystem::path& base_path) - : levels(0), cur_level(levels.begin()), level_file_paths(0) { +LevelLoader::LevelLoader(const std::filesystem::path& base_path, int verbose) + : levels(0), cur_level(levels.begin()), level_file_paths(0), verbose(verbose) { for (const auto& entry : std::filesystem::directory_iterator(base_path)) { level_file_paths.push_back(entry.path()); } } +const std::string PRINT_LEVEL_KEY = "# .a@$s"; + void AddLine(SokobanLevel& level, const std::string& line) { - if ((line.at(0) != '#') || (*line.rend() != '#')) { + auto start = line.at(0); + auto end = line.at(line.size() - 1); + if ((start != '#') || (start != '#')) { std::stringstream msg; - msg << "Line '" << line - << "' does not start and begin with '#', as it should." << std::endl; + msg << "Line '" << line << "' does not start (" << start << ") and end (" + << end << ") with '#', as it should." << std::endl; throw std::runtime_error(msg.str()); } for (const char& r : line) { @@ -52,18 +57,35 @@ void AddLine(SokobanLevel& level, const std::string& line) { } } +void PrintLevel(std::ostream& os, SokobanLevel vec) { + size_t dim_room = 0; + for (; dim_room * dim_room != vec.size() && dim_room <= 100; dim_room++) + ; // take sqrt(vec.size()) + for (size_t i = 0; i < vec.size(); i++) { + os << PRINT_LEVEL_KEY.at(vec.at(i)); + if ((i + 1) % dim_room == 0) { + os << std::endl; + } + } +} + void LevelLoader::LoadNewFile(std::mt19937& gen) { std::uniform_int_distribution load_file_idx_r( 0, level_file_paths.size() - 1); - size_t load_file_idx = load_file_idx_r(gen); - std::ifstream file(level_file_paths.at(load_file_idx)); + const size_t load_file_idx = load_file_idx_r(gen); + const std::filesystem::path& file_path = level_file_paths.at(load_file_idx); + std::ifstream file(file_path); levels.clear(); std::string line; while (std::getline(file, line)) { + if (line.size() == 0) { + continue; + } + if (line.at(0) == '#') { SokobanLevel& cur_level = levels.emplace_back(0); - cur_level.reserve(15 * 15); + cur_level.reserve(10 * 10); // In practice most levels are this size // Count contiguous '#' characters and use this as the box dimension size_t dim_room = 0; @@ -74,7 +96,7 @@ void LevelLoader::LoadNewFile(std::mt19937& gen) { } AddLine(cur_level, line); - while (std::getline(file, line) && line.at(0) == '#') { + while (std::getline(file, line) && line.size() > 0 && line.at(0) == '#') { if (line.length() != dim_room) { std::stringstream msg; msg << "Irregular line '" << line @@ -93,19 +115,31 @@ void LevelLoader::LoadNewFile(std::mt19937& gen) { } } std::shuffle(levels.begin(), levels.end(), gen); - if(levels.empty()) { - std::stringstream msg; - msg << "No levels loaded from file '" << level_file_paths.at(load_file_idx) << std::endl; - throw std::runtime_error(msg.str()); + if (levels.empty()) { + std::stringstream msg; + msg << "No levels loaded from file '" << file_path << std::endl; + throw std::runtime_error(msg.str()); + } + + if(verbose >= 1) { + std::cout << "Loaded " << levels.size() << " levels from " << file_path + << std::endl; + if(verbose >= 2) { + PrintLevel(std::cout, levels.at(0)); + std::cout << std::endl; + PrintLevel(std::cout, levels.at(1)); + std::cout << std::endl; + } } } -const std::vector::iterator LevelLoader::RandomLevel(std::mt19937& gen) { +const std::vector::iterator LevelLoader::RandomLevel( + std::mt19937& gen) { if (cur_level == levels.end()) { LoadNewFile(gen); cur_level = levels.begin(); - if(cur_level == levels.end()) { - throw std::runtime_error("No levels loaded."); + if (cur_level == levels.end()) { + throw std::runtime_error("No levels loaded."); } } auto out = cur_level; diff --git a/envpool/sokoban/level_loader.h b/envpool/sokoban/level_loader.h index ebe37f78..88326c84 100644 --- a/envpool/sokoban/level_loader.h +++ b/envpool/sokoban/level_loader.h @@ -25,9 +25,14 @@ class LevelLoader { void LoadNewFile(std::mt19937& gen); public: + int verbose; + const std::vector::iterator RandomLevel(std::mt19937& gen); - LevelLoader(const std::filesystem::path& base_path); + LevelLoader(const std::filesystem::path& base_path, int verbose=0); }; + + +void PrintLevel(std::ostream& os, SokobanLevel vec); } // namespace sokoban #endif // LEVEL_LOADER_H_ diff --git a/envpool/sokoban/sokoban_envpool.cc b/envpool/sokoban/sokoban_envpool.cc index 5e58d269..b6467466 100644 --- a/envpool/sokoban/sokoban_envpool.cc +++ b/envpool/sokoban/sokoban_envpool.cc @@ -28,16 +28,29 @@ void SokobanEnv::Reset() { unmatched_boxes++; break; } - - WriteState(0.0f); } } + WriteState(0.0f); +} + +uint8_t SokobanEnv::WorldAt(int x, int y) { + if ((x < 0) || (x >= dim_room) || (y < 0) || (y >= dim_room)) { + return WALL; + } + return world.at(x + y * dim_room); +} +void SokobanEnv::WorldAssignAt(int x, int y, uint8_t value) { + if ((x < 0) || (x >= dim_room) || (y < 0) || (y >= dim_room)) { + return; + } + world.at(x + y * dim_room) = value; } -constexpr std ::array, 4> CHANGE_COORDINATES = { - {-1, 0}, {1, 0}, {0, -1}, {0, 1}}; +constexpr std::array, 4> CHANGE_COORDINATES = { + {{-1, 0}, {1, 0}, {0, -1}, {0, 1}}}; -void SokobanEnv::Step(const Action& action) { +void SokobanEnv::Step(const Action& action_) { + const int action = action_["action"_]; if (action == ACT_NOOP) { WriteState(reward_step); return; @@ -109,7 +122,7 @@ void SokobanEnv::Step(const Action& action) { } constexpr std::array, PLAYER_ON_TARGET + 1> TINY_COLORS = - { + {{ {0, 0, 0}, // WALL {243, 248, 238}, // EMPTY {254, 126, 125}, // TARGET @@ -117,10 +130,10 @@ constexpr std::array, PLAYER_ON_TARGET + 1> TINY_COLORS = {142, 121, 56}, // BOX {160, 212, 56}, // PLAYER {219, 212, 56} // PLAYER_ON_TARGET -}; + }}; void SokobanEnv::WriteState(float reward) { - State state = Allocate(); + auto state = Allocate(); state["reward"_] = reward; Array& obs = state["obs"_]; if (obs.size != 3 * world.size()) { @@ -131,7 +144,7 @@ void SokobanEnv::WriteState(float reward) { throw std::runtime_error(msg.str()); } - std::array out; + std::vector out(3 * world.size()); for (int rgb = 0; rgb < 3; rgb++) { for (size_t i = 0; i < world.size(); i++) { out.at(rgb * (dim_room * dim_room) + i) = diff --git a/envpool/sokoban/sokoban_envpool.h b/envpool/sokoban/sokoban_envpool.h index 6b392027..03728ed8 100644 --- a/envpool/sokoban/sokoban_envpool.h +++ b/envpool/sokoban/sokoban_envpool.h @@ -24,9 +24,14 @@ constexpr int MAX_ACTION = ACT_MOVE_RIGHT; class SokobanEnvFns { public: static decltype(auto) DefaultConfig() { - return MakeDict("reward_finished"_.Bind(10.0f), "reward_box"_.Bind(1.0f), - "reward_step"_.Bind(-0.1f), "dim_room"_.Bind(10), - "levels_dir"_.Bind(std::string("None"))); + return MakeDict( + "reward_finished"_.Bind(10.0f), + "reward_box"_.Bind(1.0f), + "reward_step"_.Bind(-0.1f), + "dim_room"_.Bind(10), + "levels_dir"_.Bind(std::string("")), + "verbose"_.Bind(0) + ); } template static decltype(auto) StateSpec(const Config& conf) { @@ -53,7 +58,8 @@ class SokobanEnv : public Env { reward_step{static_cast(spec.config["reward_step"_])}, levels_dir{static_cast(spec.config["levels_dir"_])}, level_loader(levels_dir), - world(WALL, static_cast(dim_room * dim_room)) {} + world(WALL, static_cast(dim_room * dim_room)), + verbose(static_cast(spec.config["verbose"_])) {} bool IsDone() override { return unmatched_boxes == 0; } void Reset() override; @@ -68,22 +74,13 @@ class SokobanEnv : public Env { LevelLoader level_loader; SokobanLevel world; + int verbose; int player_x{0}, player_y{0}; int unmatched_boxes{0}; - uint8_t WorldAt(int x, int y) { - if ((x < 0) || (x > dim_room) || (y < 0) || (y > dim_room)) { - return WALL; - } - return world.at(x + y * dim_room); - } - void WorldAssignAt(int x, int y, uint8_t value) { - if ((x < 0) || (x > dim_room) || (y < 0) || (y > dim_room)) { - return; - } - world.at(x + y * dim_room) = value; - } + uint8_t WorldAt(int x, int y); + void WorldAssignAt(int x, int y, uint8_t value); }; using SokobanEnvPool = AsyncEnvPool; diff --git a/envpool/sokoban/sokoban_py_envpool_test.py b/envpool/sokoban/sokoban_py_envpool_test.py index de0d6f70..d4723b38 100644 --- a/envpool/sokoban/sokoban_py_envpool_test.py +++ b/envpool/sokoban/sokoban_py_envpool_test.py @@ -3,6 +3,8 @@ import os import time +import envpool # noqa: F401 +import envpool.sokoban.registration import numpy as np from absl import logging from absl.testing import absltest @@ -29,6 +31,7 @@ def test_config(self) -> None: "reward_box", "reward_finished", "reward_step", + "verbose", ] default_conf = _SokobanEnvSpec._default_config_values self.assertTrue(isinstance(default_conf, tuple)) @@ -38,43 +41,42 @@ def test_config(self) -> None: self.assertEqual(sorted(config_keys), sorted(ref_config_keys)) def test_envpool(self) -> None: - conf = dict( - zip(_SokobanEnvSpec._config_keys, _SokobanEnvSpec._default_config_values) + batch = num_envs = 200 + env = envpool.make( + "Sokoban-v0", + env_type="gymnasium", + num_envs=num_envs, + batch_size=num_envs, + seed=2346890, + max_episode_steps=60, + reward_step=-0.1, + dim_room=10, + levels_dir="/aa/boxoban-levels-master/unfiltered/train", ) - conf["num_envs"] = num_envs = 200 - conf["batch_size"] = batch = 100 - conf["num_threads"] = 10 - env_spec = _SokobanEnvSpec(tuple(conf.values())) - env = _SokobanEnvPool(env_spec) - state_keys = env._state_keys - total = 1 - env._reset(np.arange(num_envs, dtype=np.int32)) - raise ValueError("resetted") + total_steps = 1000 + + _ = env.reset() t = time.time() - for _ in range(total): - state = dict(zip(state_keys, env._recv())) - action = { - "env_id": state["info:env_id"], - "players.env_id": state["info:players.env_id"], - "list_action": np.zeros((batch, 6), dtype=np.float64), - "players.id": state["info:players.id"], - "players.action": state["info:players.id"], - } - # env._send(tuple(action.values())) + for _ in range(total_steps): + _ = env.step(np.random.randint(low=0, high=9, size=(num_envs,))) duration = time.time() - t - fps = total * batch / duration + fps = total_steps * batch / duration logging.info(f"FPS = {fps:.6f}") def test_xla(self) -> None: - conf = dict( - zip(_SokobanEnvSpec._config_keys, _SokobanEnvSpec._default_config_values) + num_envs = 10 + env = envpool.make( + "Sokoban-v0", + env_type="dm", + num_envs=num_envs, + batch_size=num_envs, + seed=2346890, + max_episode_steps=60, + reward_step=-0.1, + dim_room=10, + levels_dir="/aa/boxoban-levels-master/unfiltered/train", ) - conf["num_envs"] = 100 - conf["batch_size"] = 31 - conf["num_threads"] = os.cpu_count() - env_spec = _SokobanEnvSpec(tuple(conf.values())) - env = _SokobanEnvPool(env_spec) - _ = env._xla() + handle, recv, send, step = env.xla() if __name__ == "__main__": From eccacf689074238b2967900f9dcd82c9261c89c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= Date: Thu, 11 Jan 2024 17:30:40 -0800 Subject: [PATCH 10/60] Format --- envpool/sokoban/sokoban_py_envpool_test.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/envpool/sokoban/sokoban_py_envpool_test.py b/envpool/sokoban/sokoban_py_envpool_test.py index d4723b38..9eede62d 100644 --- a/envpool/sokoban/sokoban_py_envpool_test.py +++ b/envpool/sokoban/sokoban_py_envpool_test.py @@ -1,6 +1,5 @@ """Unit test for dummy envpool and speed benchmark.""" -import os import time import envpool # noqa: F401 @@ -8,7 +7,7 @@ import numpy as np from absl import logging from absl.testing import absltest -from envpool.sokoban.sokoban_envpool import _SokobanEnvPool, _SokobanEnvSpec +from envpool.sokoban.sokoban_envpool import _SokobanEnvSpec class _SokobanEnvPoolTest(absltest.TestCase): From 4a52fa56a284216f1b72e4d6c770ac825b65b20d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= Date: Mon, 15 Jan 2024 12:18:41 -0800 Subject: [PATCH 11/60] Order bug in moving player --- envpool/sokoban/sokoban_envpool.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/envpool/sokoban/sokoban_envpool.cc b/envpool/sokoban/sokoban_envpool.cc index b6467466..f514e888 100644 --- a/envpool/sokoban/sokoban_envpool.cc +++ b/envpool/sokoban/sokoban_envpool.cc @@ -106,12 +106,13 @@ void SokobanEnv::Step(const Action& action_) { arena.at(2) = is_target.at(2) ? BOX_ON_TARGET : BOX; } - player_x += delta_x; - player_y += delta_y; for (size_t i = 0; i < arena.size(); i++) { WorldAssignAt(player_x + delta_x * i, player_y + delta_y * i, arena.at(i)); } + // After assigning the arena, move player. + player_x += delta_x; + player_y += delta_y; } const float reward = From 3fb0c5c951f8cfefea4144d263e20a1cfa90c5f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= Date: Mon, 15 Jan 2024 18:11:28 -0800 Subject: [PATCH 12/60] Fix directions --- envpool/sokoban/sokoban_envpool.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/envpool/sokoban/sokoban_envpool.cc b/envpool/sokoban/sokoban_envpool.cc index f514e888..eca11551 100644 --- a/envpool/sokoban/sokoban_envpool.cc +++ b/envpool/sokoban/sokoban_envpool.cc @@ -47,7 +47,7 @@ void SokobanEnv::WorldAssignAt(int x, int y, uint8_t value) { } constexpr std::array, 4> CHANGE_COORDINATES = { - {{-1, 0}, {1, 0}, {0, -1}, {0, 1}}}; + {{0, -1}, {0, 1}, {-1, 0}, {1, 0}}}; void SokobanEnv::Step(const Action& action_) { const int action = action_["action"_]; From ac948ca445fbd497e831cd9dcaa6afa9003606f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= Date: Mon, 15 Jan 2024 18:11:36 -0800 Subject: [PATCH 13/60] Rewards are doubles --- envpool/sokoban/sokoban_envpool.cc | 8 ++++---- envpool/sokoban/sokoban_envpool.h | 14 +++++++------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/envpool/sokoban/sokoban_envpool.cc b/envpool/sokoban/sokoban_envpool.cc index eca11551..a922e4be 100644 --- a/envpool/sokoban/sokoban_envpool.cc +++ b/envpool/sokoban/sokoban_envpool.cc @@ -52,7 +52,7 @@ constexpr std::array, 4> CHANGE_COORDINATES = { void SokobanEnv::Step(const Action& action_) { const int action = action_["action"_]; if (action == ACT_NOOP) { - WriteState(reward_step); + WriteState(static_cast(reward_step)); return; } // From here on, assume the agent will try to move @@ -115,11 +115,11 @@ void SokobanEnv::Step(const Action& action_) { player_y += delta_y; } - const float reward = + const double reward = reward_step + - reward_box * static_cast(prev_unmatched_boxes - unmatched_boxes) + + reward_box * static_cast(prev_unmatched_boxes - unmatched_boxes) + (IsDone() ? reward_finished : 0.0f); - WriteState(reward); + WriteState(static_cast(reward)); } constexpr std::array, PLAYER_ON_TARGET + 1> TINY_COLORS = diff --git a/envpool/sokoban/sokoban_envpool.h b/envpool/sokoban/sokoban_envpool.h index 03728ed8..5b533fcd 100644 --- a/envpool/sokoban/sokoban_envpool.h +++ b/envpool/sokoban/sokoban_envpool.h @@ -25,9 +25,9 @@ class SokobanEnvFns { public: static decltype(auto) DefaultConfig() { return MakeDict( - "reward_finished"_.Bind(10.0f), - "reward_box"_.Bind(1.0f), - "reward_step"_.Bind(-0.1f), + "reward_finished"_.Bind(10.0), + "reward_box"_.Bind(1.0), + "reward_step"_.Bind(-0.1), "dim_room"_.Bind(10), "levels_dir"_.Bind(std::string("")), "verbose"_.Bind(0) @@ -53,9 +53,9 @@ class SokobanEnv : public Env { : Env(spec, env_id), max_episode_steps{spec.config["max_episode_steps"_]}, dim_room{static_cast(spec.config["dim_room"_])}, - reward_finished{static_cast(spec.config["reward_finished"_])}, - reward_box{static_cast(spec.config["reward_box"_])}, - reward_step{static_cast(spec.config["reward_step"_])}, + reward_finished{static_cast(spec.config["reward_finished"_])}, + reward_box{static_cast(spec.config["reward_box"_])}, + reward_step{static_cast(spec.config["reward_step"_])}, levels_dir{static_cast(spec.config["levels_dir"_])}, level_loader(levels_dir), world(WALL, static_cast(dim_room * dim_room)), @@ -69,7 +69,7 @@ class SokobanEnv : public Env { private: int max_episode_steps, dim_room; - float reward_finished, reward_box, reward_step; + double reward_finished, reward_box, reward_step; std::filesystem::path levels_dir; LevelLoader level_loader; From 2dc167db0b21c14af2fc3f5f257cab524c3d9f72 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= Date: Mon, 15 Jan 2024 18:59:14 -0800 Subject: [PATCH 14/60] Reverse left/right --- envpool/sokoban/sokoban_envpool.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/envpool/sokoban/sokoban_envpool.cc b/envpool/sokoban/sokoban_envpool.cc index a922e4be..a991405f 100644 --- a/envpool/sokoban/sokoban_envpool.cc +++ b/envpool/sokoban/sokoban_envpool.cc @@ -47,7 +47,7 @@ void SokobanEnv::WorldAssignAt(int x, int y, uint8_t value) { } constexpr std::array, 4> CHANGE_COORDINATES = { - {{0, -1}, {0, 1}, {-1, 0}, {1, 0}}}; + {{0, -1}, {0, 1}, {1, 0}, {-1, 0}}}; void SokobanEnv::Step(const Action& action_) { const int action = action_["action"_]; From 0c4b1f2d6ac26f0163254e0518dcb753ff75d584 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= Date: Mon, 15 Jan 2024 19:25:17 -0800 Subject: [PATCH 15/60] Reverse again, debugging code. --- envpool/sokoban/sokoban_envpool.cc | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/envpool/sokoban/sokoban_envpool.cc b/envpool/sokoban/sokoban_envpool.cc index a991405f..13460cec 100644 --- a/envpool/sokoban/sokoban_envpool.cc +++ b/envpool/sokoban/sokoban_envpool.cc @@ -3,6 +3,7 @@ #include #include #include +#include #include "envpool/core/py_envpool.h" @@ -47,7 +48,7 @@ void SokobanEnv::WorldAssignAt(int x, int y, uint8_t value) { } constexpr std::array, 4> CHANGE_COORDINATES = { - {{0, -1}, {0, 1}, {1, 0}, {-1, 0}}}; + {{0, -1}, {0, 1}, {-1, 0}, {1, 0}}}; void SokobanEnv::Step(const Action& action_) { const int action = action_["action"_]; @@ -82,6 +83,24 @@ void SokobanEnv::Step(const Action& action_) { const bool agent_moves = (arena.at(1) == EMPTY) || (arena.at(1) == TARGET) || is_a_box_and_the_box_moves; + std::cout << "arena.at(0) " << arena.at(0) << std::endl; + std::cout << "arena.at(1) " << arena.at(1) << std::endl; + std::cout << "arena.at(2) " << arena.at(2) << std::endl; + + std::cout << "box_moves " << box_moves << std::endl; + std::cout << " (action <= ACT_PUSH_RIGHT) = " << (action <= ACT_PUSH_RIGHT) << std::endl; + std::cout << " (arena.at(1) == BOX) = " << (arena.at(1) == BOX) << std::endl; + std::cout << " (arena.at(1) == BOX_ON_TARGET) = " << (arena.at(1) == BOX_ON_TARGET) << std::endl; + std::cout << " (arena.at(1) == EMPTY) = " << (arena.at(1) == EMPTY) << std::endl; + std::cout << " (arena.at(2) == TARGET) = " << (arena.at(2) == TARGET) << std::endl; + + std::cout << "is_a_box_and_the_box_moves " << is_a_box_and_the_box_moves << std::endl; + + std::cout << "agent_moves " << agent_moves << std::endl; + std::cout << " (arena.at(1) == EMPTY) = " << (arena.at(1) == EMPTY) << std::endl; + std::cout << " (arena.at(1) == TARGET) = " << (arena.at(1) == TARGET) << std::endl; + std::cout << " is_a_box_and_the_box_moves = " << is_a_box_and_the_box_moves << std::endl; + if (agent_moves) { // `is_target` is boolean but we'll need it as an int later std::array is_target; From 96c5636601e7caaebabbb3c809c5edf285fd68a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= Date: Mon, 15 Jan 2024 20:48:40 -0800 Subject: [PATCH 16/60] Print action names --- envpool/sokoban/sokoban_envpool.cc | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/envpool/sokoban/sokoban_envpool.cc b/envpool/sokoban/sokoban_envpool.cc index 13460cec..9674caf5 100644 --- a/envpool/sokoban/sokoban_envpool.cc +++ b/envpool/sokoban/sokoban_envpool.cc @@ -50,6 +50,18 @@ void SokobanEnv::WorldAssignAt(int x, int y, uint8_t value) { constexpr std::array, 4> CHANGE_COORDINATES = { {{0, -1}, {0, 1}, {-1, 0}, {1, 0}}}; +constexpr std::array action_names = { + "ACT_NOOP", + "ACT_PUSH_UP", + "ACT_PUSH_DOWN", + "ACT_PUSH_LEFT", + "ACT_PUSH_RIGHT", + "ACT_MOVE_UP", + "ACT_MOVE_DOWN", + "ACT_MOVE_LEFT", + "ACT_MOVE_RIGHT", +}; + void SokobanEnv::Step(const Action& action_) { const int action = action_["action"_]; if (action == ACT_NOOP) { @@ -83,9 +95,9 @@ void SokobanEnv::Step(const Action& action_) { const bool agent_moves = (arena.at(1) == EMPTY) || (arena.at(1) == TARGET) || is_a_box_and_the_box_moves; - std::cout << "arena.at(0) " << arena.at(0) << std::endl; - std::cout << "arena.at(1) " << arena.at(1) << std::endl; - std::cout << "arena.at(2) " << arena.at(2) << std::endl; + std::cout << "arena.at(0) " << action_names.at(arena.at(0)) << std::endl; + std::cout << "arena.at(1) " << action_names.at(arena.at(1)) << std::endl; + std::cout << "arena.at(2) " << action_names.at(arena.at(2)) << std::endl; std::cout << "box_moves " << box_moves << std::endl; std::cout << " (action <= ACT_PUSH_RIGHT) = " << (action <= ACT_PUSH_RIGHT) << std::endl; From c4e0638fc56b61d81d22492994bb2953d6bf70ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= Date: Mon, 15 Jan 2024 23:00:26 -0800 Subject: [PATCH 17/60] Print arena thihngs --- envpool/sokoban/sokoban_envpool.cc | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/envpool/sokoban/sokoban_envpool.cc b/envpool/sokoban/sokoban_envpool.cc index 9674caf5..a1522d81 100644 --- a/envpool/sokoban/sokoban_envpool.cc +++ b/envpool/sokoban/sokoban_envpool.cc @@ -62,6 +62,18 @@ constexpr std::array action_names = { "ACT_MOVE_RIGHT", }; + +constexpr std::array arena_names = { + "WALL", + "EMPTY", + "TARGET", + "BOX_ON_TARGET", + "BOX", + "PLAYER", + "PLAYER_ON_TARGET", +}; + + void SokobanEnv::Step(const Action& action_) { const int action = action_["action"_]; if (action == ACT_NOOP) { @@ -95,9 +107,9 @@ void SokobanEnv::Step(const Action& action_) { const bool agent_moves = (arena.at(1) == EMPTY) || (arena.at(1) == TARGET) || is_a_box_and_the_box_moves; - std::cout << "arena.at(0) " << action_names.at(arena.at(0)) << std::endl; - std::cout << "arena.at(1) " << action_names.at(arena.at(1)) << std::endl; - std::cout << "arena.at(2) " << action_names.at(arena.at(2)) << std::endl; + std::cout << "arena.at(0) " << arena_names.at(arena.at(0)) << std::endl; + std::cout << "arena.at(1) " << arena_names.at(arena.at(1)) << std::endl; + std::cout << "arena.at(2) " << arena_names.at(arena.at(2)) << std::endl; std::cout << "box_moves " << box_moves << std::endl; std::cout << " (action <= ACT_PUSH_RIGHT) = " << (action <= ACT_PUSH_RIGHT) << std::endl; From 57302ea0fbd283196de642b0de6bb6b10043fbb3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= Date: Mon, 15 Jan 2024 23:04:50 -0800 Subject: [PATCH 18/60] Typo in where in arena to look --- envpool/sokoban/sokoban_envpool.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/envpool/sokoban/sokoban_envpool.cc b/envpool/sokoban/sokoban_envpool.cc index a1522d81..11cfaa76 100644 --- a/envpool/sokoban/sokoban_envpool.cc +++ b/envpool/sokoban/sokoban_envpool.cc @@ -99,7 +99,7 @@ void SokobanEnv::Step(const Action& action_) { const bool box_moves = ((action <= ACT_PUSH_RIGHT) && ((arena.at(1) == BOX) || (arena.at(1) == BOX_ON_TARGET)) && - ((arena.at(1) == EMPTY) || (arena.at(2) == TARGET))); + ((arena.at(2) == EMPTY) || (arena.at(2) == TARGET))); // The agent will move if the next arena location is possible to move into, or // if it's a box and the box moves From 0a2f05b875ab750d1193cc19081ab864da1c6733 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= Date: Mon, 15 Jan 2024 23:11:04 -0800 Subject: [PATCH 19/60] Don't print actions anymore --- envpool/sokoban/sokoban_envpool.cc | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/envpool/sokoban/sokoban_envpool.cc b/envpool/sokoban/sokoban_envpool.cc index 11cfaa76..6629d873 100644 --- a/envpool/sokoban/sokoban_envpool.cc +++ b/envpool/sokoban/sokoban_envpool.cc @@ -3,7 +3,6 @@ #include #include #include -#include #include "envpool/core/py_envpool.h" @@ -107,24 +106,6 @@ void SokobanEnv::Step(const Action& action_) { const bool agent_moves = (arena.at(1) == EMPTY) || (arena.at(1) == TARGET) || is_a_box_and_the_box_moves; - std::cout << "arena.at(0) " << arena_names.at(arena.at(0)) << std::endl; - std::cout << "arena.at(1) " << arena_names.at(arena.at(1)) << std::endl; - std::cout << "arena.at(2) " << arena_names.at(arena.at(2)) << std::endl; - - std::cout << "box_moves " << box_moves << std::endl; - std::cout << " (action <= ACT_PUSH_RIGHT) = " << (action <= ACT_PUSH_RIGHT) << std::endl; - std::cout << " (arena.at(1) == BOX) = " << (arena.at(1) == BOX) << std::endl; - std::cout << " (arena.at(1) == BOX_ON_TARGET) = " << (arena.at(1) == BOX_ON_TARGET) << std::endl; - std::cout << " (arena.at(1) == EMPTY) = " << (arena.at(1) == EMPTY) << std::endl; - std::cout << " (arena.at(2) == TARGET) = " << (arena.at(2) == TARGET) << std::endl; - - std::cout << "is_a_box_and_the_box_moves " << is_a_box_and_the_box_moves << std::endl; - - std::cout << "agent_moves " << agent_moves << std::endl; - std::cout << " (arena.at(1) == EMPTY) = " << (arena.at(1) == EMPTY) << std::endl; - std::cout << " (arena.at(1) == TARGET) = " << (arena.at(1) == TARGET) << std::endl; - std::cout << " is_a_box_and_the_box_moves = " << is_a_box_and_the_box_moves << std::endl; - if (agent_moves) { // `is_target` is boolean but we'll need it as an int later std::array is_target; From bd50d738eab5f4138c98a2ba892b8bbf018c2022 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= Date: Fri, 19 Jan 2024 22:12:21 -0800 Subject: [PATCH 20/60] Working with bug --- envpool/sokoban/sokoban_envpool.h | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/envpool/sokoban/sokoban_envpool.h b/envpool/sokoban/sokoban_envpool.h index 5b533fcd..3347f6fd 100644 --- a/envpool/sokoban/sokoban_envpool.h +++ b/envpool/sokoban/sokoban_envpool.h @@ -2,6 +2,8 @@ #define ENVPOOL_SOKOBAN_H_ #include +#include +#include #include "envpool/core/array.h" #include "envpool/core/async_envpool.h" @@ -24,14 +26,9 @@ constexpr int MAX_ACTION = ACT_MOVE_RIGHT; class SokobanEnvFns { public: static decltype(auto) DefaultConfig() { - return MakeDict( - "reward_finished"_.Bind(10.0), - "reward_box"_.Bind(1.0), - "reward_step"_.Bind(-0.1), - "dim_room"_.Bind(10), - "levels_dir"_.Bind(std::string("")), - "verbose"_.Bind(0) - ); + return MakeDict("reward_finished"_.Bind(10.0), "reward_box"_.Bind(1.0), + "reward_step"_.Bind(-0.1), "dim_room"_.Bind(10), + "levels_dir"_.Bind(std::string("")), "verbose"_.Bind(0)); } template static decltype(auto) StateSpec(const Config& conf) { @@ -59,9 +56,23 @@ class SokobanEnv : public Env { levels_dir{static_cast(spec.config["levels_dir"_])}, level_loader(levels_dir), world(WALL, static_cast(dim_room * dim_room)), - verbose(static_cast(spec.config["verbose"_])) {} + verbose(static_cast(spec.config["verbose"_])) { + if (max_num_players_ != spec_.config["max_num_players"_]) { + std::stringstream msg; + msg << "max_num_players_ != spec_['max_num_players'] " << max_num_players_ + << " != " << spec_.config["max_num_players"_] << std::endl; + throw std::runtime_error(msg.str()); + } - bool IsDone() override { return unmatched_boxes == 0; } + if (max_num_players_ != spec.config["max_num_players"_]) { + std::stringstream msg; + msg << "max_num_players_ != spec['max_num_players'] " << max_num_players_ + << " != " << spec.config["max_num_players"_] << std::endl; + throw std::runtime_error(msg.str()); + } + } + + bool IsDone() override { return (unmatched_boxes == 0) || (); } void Reset() override; void Step(const Action& action) override; From a85d4fda25eb20d6e2f980cefaa1cb71b0465b7d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= Date: Fri, 19 Jan 2024 22:16:50 -0800 Subject: [PATCH 21/60] solve type error --- envpool/sokoban/sokoban_envpool.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/envpool/sokoban/sokoban_envpool.h b/envpool/sokoban/sokoban_envpool.h index 3347f6fd..3e3b2abe 100644 --- a/envpool/sokoban/sokoban_envpool.h +++ b/envpool/sokoban/sokoban_envpool.h @@ -72,7 +72,7 @@ class SokobanEnv : public Env { } } - bool IsDone() override { return (unmatched_boxes == 0) || (); } + bool IsDone() override { return unmatched_boxes == 0; } void Reset() override; void Step(const Action& action) override; From 0853a48ce09986e544fee330b2a5303d3d44151e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= Date: Fri, 19 Jan 2024 23:12:05 -0800 Subject: [PATCH 22/60] Make sure env does indeed truncate at the correct number of steps --- envpool/sokoban/sample_levels/000.txt | 35 ++++++++++++++++++++++ envpool/sokoban/sokoban_envpool.cc | 3 ++ envpool/sokoban/sokoban_envpool.h | 8 +++-- envpool/sokoban/sokoban_py_envpool_test.py | 23 ++++++++++++-- 4 files changed, 64 insertions(+), 5 deletions(-) create mode 100644 envpool/sokoban/sample_levels/000.txt diff --git a/envpool/sokoban/sample_levels/000.txt b/envpool/sokoban/sample_levels/000.txt new file mode 100644 index 00000000..e0dbf4cb --- /dev/null +++ b/envpool/sokoban/sample_levels/000.txt @@ -0,0 +1,35 @@ +; 0 +########## +#@ ####### +#.$####### +# ###### +#.$ ###### +# ####### +# $ ###### +#$ . ##### +# . ##### +########## + +; 1 +########## +########## +# ###@#### +# $ # +# $ # +# ## #### +#.## #### +# ###$$.## +# . . # +########## + +; 2 +########## +##### ## +##### ## +####. # +# . $@ ## +# $ $ $ ## +# . ## +#####. # +###### # +########## diff --git a/envpool/sokoban/sokoban_envpool.cc b/envpool/sokoban/sokoban_envpool.cc index 6629d873..66656293 100644 --- a/envpool/sokoban/sokoban_envpool.cc +++ b/envpool/sokoban/sokoban_envpool.cc @@ -30,6 +30,7 @@ void SokobanEnv::Reset() { } } } + current_step_ = 0; WriteState(0.0f); } @@ -74,6 +75,8 @@ constexpr std::array arena_names = { void SokobanEnv::Step(const Action& action_) { + current_step_++; + const int action = action_["action"_]; if (action == ACT_NOOP) { WriteState(static_cast(reward_step)); diff --git a/envpool/sokoban/sokoban_envpool.h b/envpool/sokoban/sokoban_envpool.h index 3e3b2abe..6ed247be 100644 --- a/envpool/sokoban/sokoban_envpool.h +++ b/envpool/sokoban/sokoban_envpool.h @@ -48,7 +48,6 @@ class SokobanEnv : public Env { public: SokobanEnv(const Spec& spec, int env_id) : Env(spec, env_id), - max_episode_steps{spec.config["max_episode_steps"_]}, dim_room{static_cast(spec.config["dim_room"_])}, reward_finished{static_cast(spec.config["reward_finished"_])}, reward_box{static_cast(spec.config["reward_box"_])}, @@ -72,14 +71,16 @@ class SokobanEnv : public Env { } } - bool IsDone() override { return unmatched_boxes == 0; } + bool IsDone() override { + const int max_episode_steps = spec_.config["max_episode_steps"_]; + return (unmatched_boxes == 0) || (current_step_ >= max_episode_steps); } void Reset() override; void Step(const Action& action) override; void WriteState(float reward); private: - int max_episode_steps, dim_room; + int dim_room; double reward_finished, reward_box, reward_step; std::filesystem::path levels_dir; @@ -87,6 +88,7 @@ class SokobanEnv : public Env { SokobanLevel world; int verbose; + int current_step_{0}; int player_x{0}, player_y{0}; int unmatched_boxes{0}; diff --git a/envpool/sokoban/sokoban_py_envpool_test.py b/envpool/sokoban/sokoban_py_envpool_test.py index 9eede62d..fb1cba94 100644 --- a/envpool/sokoban/sokoban_py_envpool_test.py +++ b/envpool/sokoban/sokoban_py_envpool_test.py @@ -50,7 +50,7 @@ def test_envpool(self) -> None: max_episode_steps=60, reward_step=-0.1, dim_room=10, - levels_dir="/aa/boxoban-levels-master/unfiltered/train", + levels_dir="/app/envpool/sokoban/sample_levels", ) total_steps = 1000 @@ -62,6 +62,25 @@ def test_envpool(self) -> None: fps = total_steps * batch / duration logging.info(f"FPS = {fps:.6f}") + def test_envpool_max_episode_steps(self) -> None: + for max_episode_steps in [2, 5, 10]: + env = envpool.make( + "Sokoban-v0", + env_type="gymnasium", + num_envs=1, + batch_size=1, + max_episode_steps=max_episode_steps, + levels_dir="/app/envpool/sokoban/sample_levels", + ) + env.reset() + for _ in range(max_episode_steps - 1): + _, _, terminated, truncated, _ = env.step(np.zeros([1], dtype=np.int32)) + assert not np.any(terminated | truncated) + + _, _, terminated, truncated, _ = env.step(np.zeros([1], dtype=np.int32)) + assert not np.any(terminated) + assert np.all(truncated) + def test_xla(self) -> None: num_envs = 10 env = envpool.make( @@ -73,7 +92,7 @@ def test_xla(self) -> None: max_episode_steps=60, reward_step=-0.1, dim_room=10, - levels_dir="/aa/boxoban-levels-master/unfiltered/train", + levels_dir="/app/envpool/sokoban/sample_levels", ) handle, recv, send, step = env.xla() From af25a1fa4ed3164dd9d81d368925dde6a524941f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= Date: Sun, 21 Jan 2024 08:45:05 -0800 Subject: [PATCH 23/60] Only give a reward if the number of boxes left is 0 --- envpool/sokoban/sokoban_envpool.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/envpool/sokoban/sokoban_envpool.cc b/envpool/sokoban/sokoban_envpool.cc index 66656293..7faa4e84 100644 --- a/envpool/sokoban/sokoban_envpool.cc +++ b/envpool/sokoban/sokoban_envpool.cc @@ -145,7 +145,7 @@ void SokobanEnv::Step(const Action& action_) { const double reward = reward_step + reward_box * static_cast(prev_unmatched_boxes - unmatched_boxes) + - (IsDone() ? reward_finished : 0.0f); + ((unmatched_boxes == 0) ? reward_finished : 0.0f); WriteState(static_cast(reward)); } From a0bba8e08358f7e81ddecc9cdca96777d52a6584 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= Date: Sun, 21 Jan 2024 10:51:34 -0800 Subject: [PATCH 24/60] Display the unmatched boxes info --- envpool/sokoban/sokoban_envpool.cc | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/envpool/sokoban/sokoban_envpool.cc b/envpool/sokoban/sokoban_envpool.cc index 7faa4e84..a61c0fa7 100644 --- a/envpool/sokoban/sokoban_envpool.cc +++ b/envpool/sokoban/sokoban_envpool.cc @@ -114,8 +114,12 @@ void SokobanEnv::Step(const Action& action_) { std::array is_target; for (size_t i = 0; i < arena.size(); i++) { uint8_t tile = arena.at(i); + // We explicitly set them to 0 or 1 because false/true are not guaranteed + // to be 0/1. is_target.at(i) = - (tile == BOX_ON_TARGET || tile == TARGET || tile == PLAYER_ON_TARGET); + ((tile == BOX_ON_TARGET || tile == TARGET || tile == PLAYER_ON_TARGET) + ? 1 + : 0); } // only whatever was on the floor is now at position 0 arena.at(0) = is_target.at(0) ? TARGET : EMPTY; @@ -163,6 +167,7 @@ constexpr std::array, PLAYER_ON_TARGET + 1> TINY_COLORS = void SokobanEnv::WriteState(float reward) { auto state = Allocate(); state["reward"_] = reward; + state["info:unmatched_boxes"_] = unmatched_boxes; Array& obs = state["obs"_]; if (obs.size != 3 * world.size()) { std::stringstream msg; From 9de2c16e639fa547d7e281507514a4e23e6e9018 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= Date: Sun, 21 Jan 2024 11:06:23 -0800 Subject: [PATCH 25/60] Add unmatched boxes to spec --- envpool/sokoban/sokoban_envpool.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/envpool/sokoban/sokoban_envpool.h b/envpool/sokoban/sokoban_envpool.h index 6ed247be..a2b30ff2 100644 --- a/envpool/sokoban/sokoban_envpool.h +++ b/envpool/sokoban/sokoban_envpool.h @@ -33,7 +33,8 @@ class SokobanEnvFns { template static decltype(auto) StateSpec(const Config& conf) { int dim_room = conf["dim_room"_]; - return MakeDict("obs"_.Bind(Spec({3, dim_room, dim_room}))); + return MakeDict("obs"_.Bind(Spec({3, dim_room, dim_room})), + "info:unmatched_boxes"_.Bind(Spec({}))); } template static decltype(auto) ActionSpec(const Config& conf) { From ae6a2d7f481367de6a63cc990dd7f9f50729f1c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= Date: Sun, 21 Jan 2024 11:50:48 -0800 Subject: [PATCH 26/60] Print reward boxes --- envpool/sokoban/sokoban_envpool.cc | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/envpool/sokoban/sokoban_envpool.cc b/envpool/sokoban/sokoban_envpool.cc index a61c0fa7..56fa3aac 100644 --- a/envpool/sokoban/sokoban_envpool.cc +++ b/envpool/sokoban/sokoban_envpool.cc @@ -3,6 +3,7 @@ #include #include #include +#include #include "envpool/core/py_envpool.h" @@ -149,7 +150,10 @@ void SokobanEnv::Step(const Action& action_) { const double reward = reward_step + reward_box * static_cast(prev_unmatched_boxes - unmatched_boxes) + - ((unmatched_boxes == 0) ? reward_finished : 0.0f); + ((unmatched_boxes == 0) ? reward_finished : 0.0); + std::cout << "prev_unmatched_boxes=" << prev_unmatched_boxes + << ", unmatched_boxes=" << unmatched_boxes + << ", so reward=" << reward << "\n"; WriteState(static_cast(reward)); } From 9d3d3882a747e7484063a93fca5b1d98df1b9a7b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= Date: Sun, 21 Jan 2024 12:14:50 -0800 Subject: [PATCH 27/60] Revert "Print reward boxes" This reverts commit ae6a2d7f481367de6a63cc990dd7f9f50729f1c4. --- envpool/sokoban/sokoban_envpool.cc | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/envpool/sokoban/sokoban_envpool.cc b/envpool/sokoban/sokoban_envpool.cc index 56fa3aac..a61c0fa7 100644 --- a/envpool/sokoban/sokoban_envpool.cc +++ b/envpool/sokoban/sokoban_envpool.cc @@ -3,7 +3,6 @@ #include #include #include -#include #include "envpool/core/py_envpool.h" @@ -150,10 +149,7 @@ void SokobanEnv::Step(const Action& action_) { const double reward = reward_step + reward_box * static_cast(prev_unmatched_boxes - unmatched_boxes) + - ((unmatched_boxes == 0) ? reward_finished : 0.0); - std::cout << "prev_unmatched_boxes=" << prev_unmatched_boxes - << ", unmatched_boxes=" << unmatched_boxes - << ", so reward=" << reward << "\n"; + ((unmatched_boxes == 0) ? reward_finished : 0.0f); WriteState(static_cast(reward)); } From 448be932b310bff7a6c1fdd4705ae7ed596566b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= Date: Sun, 21 Jan 2024 12:15:40 -0800 Subject: [PATCH 28/60] Revert "Add unmatched boxes to spec" This reverts commit 9de2c16e639fa547d7e281507514a4e23e6e9018. --- envpool/sokoban/sokoban_envpool.cc | 1 - envpool/sokoban/sokoban_envpool.h | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/envpool/sokoban/sokoban_envpool.cc b/envpool/sokoban/sokoban_envpool.cc index a61c0fa7..e6cbdd39 100644 --- a/envpool/sokoban/sokoban_envpool.cc +++ b/envpool/sokoban/sokoban_envpool.cc @@ -167,7 +167,6 @@ constexpr std::array, PLAYER_ON_TARGET + 1> TINY_COLORS = void SokobanEnv::WriteState(float reward) { auto state = Allocate(); state["reward"_] = reward; - state["info:unmatched_boxes"_] = unmatched_boxes; Array& obs = state["obs"_]; if (obs.size != 3 * world.size()) { std::stringstream msg; diff --git a/envpool/sokoban/sokoban_envpool.h b/envpool/sokoban/sokoban_envpool.h index a2b30ff2..6ed247be 100644 --- a/envpool/sokoban/sokoban_envpool.h +++ b/envpool/sokoban/sokoban_envpool.h @@ -33,8 +33,7 @@ class SokobanEnvFns { template static decltype(auto) StateSpec(const Config& conf) { int dim_room = conf["dim_room"_]; - return MakeDict("obs"_.Bind(Spec({3, dim_room, dim_room})), - "info:unmatched_boxes"_.Bind(Spec({}))); + return MakeDict("obs"_.Bind(Spec({3, dim_room, dim_room}))); } template static decltype(auto) ActionSpec(const Config& conf) { From 653dab6b8efed79292e0ee7edd6394bda0dc3f37 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= Date: Wed, 7 Feb 2024 22:41:54 -0800 Subject: [PATCH 29/60] Random episode length every time --- envpool/sokoban/sokoban_envpool.cc | 5 +++++ envpool/sokoban/sokoban_envpool.h | 10 ++++++---- envpool/sokoban/sokoban_py_envpool_test.py | 1 + 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/envpool/sokoban/sokoban_envpool.cc b/envpool/sokoban/sokoban_envpool.cc index e6cbdd39..dff613c7 100644 --- a/envpool/sokoban/sokoban_envpool.cc +++ b/envpool/sokoban/sokoban_envpool.cc @@ -9,6 +9,11 @@ namespace sokoban { void SokobanEnv::Reset() { + const int max_episode_steps = spec_.config["max_episode_steps"_]; + const int min_episode_steps = spec_.config["min_episode_steps"_]; + std::uniform_int_distribution episode_length_rand(min_episode_steps, max_episode_steps); + current_max_episode_steps_ = episode_length_rand(gen_); + world = *level_loader.RandomLevel(gen_); if (world.size() != dim_room * dim_room) { std::stringstream msg; diff --git a/envpool/sokoban/sokoban_envpool.h b/envpool/sokoban/sokoban_envpool.h index 6ed247be..40ff47c1 100644 --- a/envpool/sokoban/sokoban_envpool.h +++ b/envpool/sokoban/sokoban_envpool.h @@ -28,7 +28,8 @@ class SokobanEnvFns { static decltype(auto) DefaultConfig() { return MakeDict("reward_finished"_.Bind(10.0), "reward_box"_.Bind(1.0), "reward_step"_.Bind(-0.1), "dim_room"_.Bind(10), - "levels_dir"_.Bind(std::string("")), "verbose"_.Bind(0)); + "levels_dir"_.Bind(std::string("")), "verbose"_.Bind(0), + "min_episode_steps"_.Bind(0)); } template static decltype(auto) StateSpec(const Config& conf) { @@ -55,7 +56,8 @@ class SokobanEnv : public Env { levels_dir{static_cast(spec.config["levels_dir"_])}, level_loader(levels_dir), world(WALL, static_cast(dim_room * dim_room)), - verbose(static_cast(spec.config["verbose"_])) { + verbose(static_cast(spec.config["verbose"_])), + current_max_episode_steps_(static_cast(spec.config["max_episode_steps"_])) { if (max_num_players_ != spec_.config["max_num_players"_]) { std::stringstream msg; msg << "max_num_players_ != spec_['max_num_players'] " << max_num_players_ @@ -72,8 +74,7 @@ class SokobanEnv : public Env { } bool IsDone() override { - const int max_episode_steps = spec_.config["max_episode_steps"_]; - return (unmatched_boxes == 0) || (current_step_ >= max_episode_steps); } + return (unmatched_boxes == 0) || (current_step_ >= current_max_episode_steps_); } void Reset() override; void Step(const Action& action) override; @@ -88,6 +89,7 @@ class SokobanEnv : public Env { SokobanLevel world; int verbose; + int current_max_episode_steps_; int current_step_{0}; int player_x{0}, player_y{0}; int unmatched_boxes{0}; diff --git a/envpool/sokoban/sokoban_py_envpool_test.py b/envpool/sokoban/sokoban_py_envpool_test.py index fb1cba94..c0f3973f 100644 --- a/envpool/sokoban/sokoban_py_envpool_test.py +++ b/envpool/sokoban/sokoban_py_envpool_test.py @@ -22,6 +22,7 @@ def test_config(self) -> None: "num_threads", "seed", "thread_affinity_offset", + "min_episode_steps", # Default and also used by sokoban "max_episode_steps", # defined by sokoban From 4a6e7686af7ed300a02539b80ce29c50955a19bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= Date: Thu, 7 Mar 2024 00:50:47 -0800 Subject: [PATCH 30/60] CircleCI now runs linters and tests (#2) * Clang-format now passes * cpplint now passes * Install only the requirements needed for Sokoban even when testing * Satisfy the linters * Ran `make format` * Ability to run python tests * Devtools added * Run every linter and test * Fix schema * Run on CircleCI * Fix lint complaint about top-level std::string const. --- .circleci/config.yml | 80 ++++++++ .clang-tidy | 5 + .dir-locals.el | 4 + CPPLINT.cfg | 4 +- Makefile | 18 +- envpool/classic_control/pendulum.h | 11 +- envpool/core/env_spec.h | 4 +- envpool/sokoban/BUILD | 27 ++- envpool/sokoban/__init__.py | 30 ++- envpool/sokoban/level_loader.cc | 86 +++++---- envpool/sokoban/level_loader.h | 50 +++-- envpool/sokoban/registration.py | 30 ++- envpool/sokoban/sokoban_envpool.cc | 173 ++++++++--------- envpool/sokoban/sokoban_envpool.h | 88 +++++---- envpool/sokoban/sokoban_py_envpool_test.py | 182 ++++++++++-------- envpool/workspace0.bzl | 1 + .../requirements-devtools.txt | 5 + .../pip_requirements/requirements-sokoban.txt | 1 + 18 files changed, 505 insertions(+), 294 deletions(-) create mode 100644 .circleci/config.yml create mode 100644 .dir-locals.el create mode 100644 third_party/pip_requirements/requirements-devtools.txt create mode 120000 third_party/pip_requirements/requirements-sokoban.txt diff --git a/.circleci/config.yml b/.circleci/config.yml new file mode 100644 index 00000000..9d5399e2 --- /dev/null +++ b/.circleci/config.yml @@ -0,0 +1,80 @@ +version: 2.1 + +parameters: + action: + type: enum + enum: [oncommit, docker] + default: oncommit + + docker_img_version: + # Docker image version for running tests. + type: string + default: "8f41d1e-envpool-ci" + +workflows: + test-jobs: + when: + equal: [oncommit, << pipeline.parameters.action >>] + jobs: + - lint: + context: + - ghcr-auth + - tests: + context: + - ghcr-auth + +jobs: + lint: + docker: + - image: ghcr.io/alignmentresearch/learned-planners:<< pipeline.parameters.docker_img_version >> + auth: + username: "$GHCR_DOCKER_USER" + password: "$GHCR_DOCKER_TOKEN" + resource_class: medium + working_directory: /app + steps: + - checkout + # Copied from .github/workflows/lint.yml + - run: + name: flake8 + command: | + make flake8 + - run: + name: isort and yapf + command: | + make py-format + - run: + name: cpplint + command: | + make cpplint + - run: + name: clang-format + command: | + make clang-format + - run: + name: clang-tidy + command: | + make clang-tidy + - run: + name: buildifier + command: | + make buildifier + - run: + name: addlicense + command: | + make addlicense + # Skip mypy, docstyle and spelling + + tests: + docker: + - image: ghcr.io/alignmentresearch/learned-planners:<< pipeline.parameters.docker_img_version >> + auth: + username: "$GHCR_DOCKER_USER" + password: "$GHCR_DOCKER_TOKEN" + resource_class: medium + working_directory: /app + steps: + - checkout + - run: + name: Run tests + command: make bazel-test diff --git a/.clang-tidy b/.clang-tidy index d62bd5b9..fed4549b 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -11,6 +11,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + +# Disable google-runtime-references, because passing by reference is less +# error-prone than passing by pointer. They actually removed it from their style +# guide (https://www.mail-archive.com/cfe-commits@lists.llvm.org/msg203119.html) --- Checks: ' bugprone-*, @@ -29,6 +33,7 @@ Checks: ' -readability-magic-numbers, -readability-static-accessed-through-instance, -readability-uppercase-literal-suffix, + -google-runtime-references, ' CheckOptions: - { key: readability-identifier-naming.ClassCase, value: CamelCase } diff --git a/.dir-locals.el b/.dir-locals.el new file mode 100644 index 00000000..4fa827c3 --- /dev/null +++ b/.dir-locals.el @@ -0,0 +1,4 @@ +;; Don't format Python in this directory with the Emacs formatter, it conflicts +;; with `make format`. +((python-mode . nil) + (js-json-mode . nil)) diff --git a/CPPLINT.cfg b/CPPLINT.cfg index aa112bc3..ced4a13f 100644 --- a/CPPLINT.cfg +++ b/CPPLINT.cfg @@ -1 +1,3 @@ -filter=-build/c++11,+build/c++17,-build/include_subdir +# Disable runtime-references, it's not in the Google style guide anymore +# and is less error prone. See .clang-tidy +filter=-build/c++11,+build/c++17,-build/include_subdir,-runtime/references diff --git a/Makefile b/Makefile index 0a024181..822be03c 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,8 @@ PYTHON_FILES = $(shell find . -type f -name "*.py") CPP_FILES = $(shell find $(PROJECT_NAME) -type f -name "*.h" -o -name "*.cc") BAZEL_FILES = $(shell find . -type f -name "*BUILD" -o -name "*.bzl") COMMIT_HASH = $(shell git log -1 --format=%h) -COPYRIGHT = "Garena Online Private Limited" +COPYRIGHT = "FAR AI" +COPYRIGHT_YEAR = "2023-2024" BAZELOPT = DATE = $(shell date "+%Y-%m-%d") DOCKER_TAG = $(DATE)-$(COMMIT_HASH) @@ -79,6 +80,9 @@ cpplint: cpplint-install clang-format: clang-format-install clang-format --style=file -i $(CPP_FILES) -n --Werror +clang-format-fix: clang-format-install + clang-format --style=file -i $(CPP_FILES) --Werror + # bazel file linter buildifier: buildifier-install @@ -87,13 +91,15 @@ buildifier: buildifier-install # bazel build/test bazel-pip-requirement-dev: - cd third_party/pip_requirements && (cmp requirements.txt requirements-dev.txt || ln -sf requirements-dev.txt requirements.txt) + # Modified to only install dependencies relevant to testing Sokoban (which is the same as release) + cd third_party/pip_requirements && (cmp requirements.txt requirements-sokoban.txt || ln -sf requirements-sokoban.txt requirements.txt) bazel-pip-requirement-release: cd third_party/pip_requirements && (cmp requirements.txt requirements-release.txt || ln -sf requirements-release.txt requirements.txt) clang-tidy: clang-tidy-install bazel-pip-requirement-dev - bazel build $(BAZELOPT) //... --config=clang-tidy --config=test + # Only lint the things we actually build + bazel build $(BAZELOPT) //envpool/core/... //envpool/sokoban/... --config=clang-tidy --config=test bazel-debug: bazel-install bazel-pip-requirement-dev bazel run $(BAZELOPT) //:setup --config=debug -- bdist_wheel @@ -111,7 +117,7 @@ bazel-release: bazel-install bazel-pip-requirement-release cp bazel-bin/setup.runfiles/$(PROJECT_NAME)/dist/*.whl ./dist bazel-test: bazel-install bazel-pip-requirement-dev - bazel test --test_output=all $(BAZELOPT) //... --config=test --spawn_strategy=local --color=yes + bazel test --test_output=all $(BAZELOPT) //envpool/core/... //envpool/sokoban/... --config=test --spawn_strategy=local --color=yes bazel-clean: bazel-install bazel clean --expunge @@ -119,7 +125,7 @@ bazel-clean: bazel-install # documentation addlicense: addlicense-install - addlicense -c $(COPYRIGHT) -l apache -y 2023 -check $(PROJECT_FOLDER) + addlicense -c $(COPYRIGHT) -l apache -y "$(COPYRIGHT_YEAR)" -check $(PROJECT_FOLDER) docstyle: doc-install pydocstyle $(PROJECT_NAME) && doc8 docs && cd docs && make html SPHINXOPTS="-W" @@ -144,7 +150,7 @@ format: py-format-install clang-format-install buildifier-install addlicense-ins yapf -ir $(PYTHON_FILES) clang-format -style=file -i $(CPP_FILES) buildifier -r -lint=fix $(BAZEL_FILES) - addlicense -c $(COPYRIGHT) -l apache -y 2023 $(PROJECT_FOLDER) + addlicense -c $(COPYRIGHT) -l apache -y "$(COPYRIGHT_YEAR)" $(PROJECT_FOLDER) # Build docker images diff --git a/envpool/classic_control/pendulum.h b/envpool/classic_control/pendulum.h index f2a594ad..85e91c1a 100644 --- a/envpool/classic_control/pendulum.h +++ b/envpool/classic_control/pendulum.h @@ -77,9 +77,8 @@ class PendulumEnv : public Env { void Step(const Action& action) override { done_ = (++elapsed_step_ >= max_episode_steps_); float act = action["action"_]; - double u = act < -kMaxTorque ? -kMaxTorque - : act > kMaxTorque ? kMaxTorque - : act; + double u = + act < -kMaxTorque ? -kMaxTorque : act > kMaxTorque ? kMaxTorque : act; double cost = theta_ * theta_ + 0.1 * theta_dot_ * theta_dot_ + 0.001 * u * u; double new_theta_dot = @@ -87,9 +86,9 @@ class PendulumEnv : public Env { if (version_ == 0) { theta_ += new_theta_dot * kDt; } - theta_dot_ = new_theta_dot < -kMaxSpeed ? -kMaxSpeed - : new_theta_dot > kMaxSpeed ? kMaxSpeed - : new_theta_dot; + theta_dot_ = new_theta_dot < -kMaxSpeed + ? -kMaxSpeed + : new_theta_dot > kMaxSpeed ? kMaxSpeed : new_theta_dot; if (version_ == 1) { theta_ += new_theta_dot * kDt; } diff --git a/envpool/core/env_spec.h b/envpool/core/env_spec.h index f59e1fb2..c3cc7f69 100644 --- a/envpool/core/env_spec.h +++ b/envpool/core/env_spec.h @@ -52,8 +52,8 @@ class EnvSpec { using Config = decltype(ConcatDict(common_config, EnvFns::DefaultConfig())); using ConfigKeys = typename Config::Keys; using ConfigValues = typename Config::Values; - using StateSpec = decltype(ConcatDict( - common_state_spec, EnvFns::StateSpec(std::declval()))); + using StateSpec = decltype( + ConcatDict(common_state_spec, EnvFns::StateSpec(std::declval()))); using ActionSpec = decltype(ConcatDict( common_action_spec, EnvFns::ActionSpec(std::declval()))); using StateKeys = typename StateSpec::Keys; diff --git a/envpool/sokoban/BUILD b/envpool/sokoban/BUILD index eac4b98b..d231954e 100644 --- a/envpool/sokoban/BUILD +++ b/envpool/sokoban/BUILD @@ -1,3 +1,17 @@ +# Copyright 2023-2024 FAR AI +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + load("@pip_requirements//:requirements.bzl", "requirement") load("@pybind11_bazel//:build_defs.bzl", "pybind_extension") @@ -20,7 +34,10 @@ py_library( cc_library( name = "sokoban_envpool_h", - hdrs = ["sokoban_envpool.h", "level_loader.h"], + hdrs = [ + "level_loader.h", + "sokoban_envpool.h", + ], deps = [ "//envpool/core:async_envpool", "//envpool/core:env", @@ -40,12 +57,12 @@ cc_library( py_test( name = "test", - main = "sokoban_py_envpool_test.py", srcs = ["sokoban_py_envpool_test.py"], + main = "sokoban_py_envpool_test.py", deps = [ - ":sokoban", ":registration", - "//envpool:envpool", + ":sokoban", + "//envpool", requirement("numpy"), requirement("absl-py"), ], @@ -54,8 +71,8 @@ py_test( pybind_extension( name = "sokoban_envpool", srcs = [ - "sokoban_envpool.cc", "level_loader.cc", + "sokoban_envpool.cc", ], linkopts = [ "-ldl", diff --git a/envpool/sokoban/__init__.py b/envpool/sokoban/__init__.py index 0e785494..e284b1ca 100644 --- a/envpool/sokoban/__init__.py +++ b/envpool/sokoban/__init__.py @@ -1,17 +1,31 @@ +# Copyright 2023-2024 FAR AI +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from envpool.python.api import py_env from .sokoban_envpool import _SokobanEnvPool, _SokobanEnvSpec ( - SokobanEnvSpec, - SokobanDMEnvPool, - SokobanGymEnvPool, - SokobanGymnasiumEnvPool, + SokobanEnvSpec, + SokobanDMEnvPool, + SokobanGymEnvPool, + SokobanGymnasiumEnvPool, ) = py_env(_SokobanEnvSpec, _SokobanEnvPool) __all__ = [ - "SokobanEnvSpec", - "SokobanDMEnvPool", - "SokobanGymEnvPool", - "SokobanGymnasiumEnvPool", + "SokobanEnvSpec", + "SokobanDMEnvPool", + "SokobanGymEnvPool", + "SokobanGymnasiumEnvPool", ] diff --git a/envpool/sokoban/level_loader.cc b/envpool/sokoban/level_loader.cc index 526fdc02..b252896c 100644 --- a/envpool/sokoban/level_loader.cc +++ b/envpool/sokoban/level_loader.cc @@ -1,3 +1,17 @@ +// Copyright 2023-2024 FAR AI +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + #include "level_loader.h" #include @@ -7,19 +21,22 @@ #include #include #include +#include namespace sokoban { -size_t ERROR_SZ = 1024; - LevelLoader::LevelLoader(const std::filesystem::path& base_path, int verbose) - : levels(0), cur_level(levels.begin()), level_file_paths(0), verbose(verbose) { + : levels_(0), + cur_level_(levels_.begin()), + level_file_paths_(0), + verbose(verbose) { for (const auto& entry : std::filesystem::directory_iterator(base_path)) { - level_file_paths.push_back(entry.path()); + level_file_paths_.push_back(entry.path()); } } -const std::string PRINT_LEVEL_KEY = "# .a@$s"; +static const std::array kPrintLevelKey{ + '#', ' ', '.', 'a', '@', '$', 's'}; void AddLine(SokobanLevel& level, const std::string& line) { auto start = line.at(0); @@ -33,19 +50,19 @@ void AddLine(SokobanLevel& level, const std::string& line) { for (const char& r : line) { switch (r) { case '#': - level.push_back(WALL); + level.push_back(kWall); break; case '@': - level.push_back(PLAYER); + level.push_back(kPlayer); break; case '$': - level.push_back(BOX); + level.push_back(kBox); break; case '.': - level.push_back(TARGET); + level.push_back(kTarget); break; case ' ': - level.push_back(EMPTY); + level.push_back(kEmpty); break; default: std::stringstream msg; @@ -57,12 +74,15 @@ void AddLine(SokobanLevel& level, const std::string& line) { } } -void PrintLevel(std::ostream& os, SokobanLevel vec) { +void PrintLevel(std::ostream& os, const SokobanLevel& vec) { size_t dim_room = 0; - for (; dim_room * dim_room != vec.size() && dim_room <= 100; dim_room++) - ; // take sqrt(vec.size()) + for (; dim_room * dim_room != vec.size() && dim_room <= 100; dim_room++) { + } // take sqrt(vec.size()) + if (dim_room == 0) { + throw std::runtime_error("dim_room cannot be zero."); + } for (size_t i = 0; i < vec.size(); i++) { - os << PRINT_LEVEL_KEY.at(vec.at(i)); + os << kPrintLevelKey.at(vec.at(i)); if ((i + 1) % dim_room == 0) { os << std::endl; } @@ -71,20 +91,20 @@ void PrintLevel(std::ostream& os, SokobanLevel vec) { void LevelLoader::LoadNewFile(std::mt19937& gen) { std::uniform_int_distribution load_file_idx_r( - 0, level_file_paths.size() - 1); + 0, level_file_paths_.size() - 1); const size_t load_file_idx = load_file_idx_r(gen); - const std::filesystem::path& file_path = level_file_paths.at(load_file_idx); + const std::filesystem::path& file_path = level_file_paths_.at(load_file_idx); std::ifstream file(file_path); - levels.clear(); + levels_.clear(); std::string line; while (std::getline(file, line)) { - if (line.size() == 0) { + if (line.empty()) { continue; } if (line.at(0) == '#') { - SokobanLevel& cur_level = levels.emplace_back(0); + SokobanLevel& cur_level = levels_.emplace_back(0); cur_level.reserve(10 * 10); // In practice most levels are this size // Count contiguous '#' characters and use this as the box dimension @@ -96,7 +116,7 @@ void LevelLoader::LoadNewFile(std::mt19937& gen) { } AddLine(cur_level, line); - while (std::getline(file, line) && line.size() > 0 && line.at(0) == '#') { + while (std::getline(file, line) && !line.empty() && line.at(0) == '#') { if (line.length() != dim_room) { std::stringstream msg; msg << "Irregular line '" << line @@ -114,36 +134,36 @@ void LevelLoader::LoadNewFile(std::mt19937& gen) { } } } - std::shuffle(levels.begin(), levels.end(), gen); - if (levels.empty()) { + std::shuffle(levels_.begin(), levels_.end(), gen); + if (levels_.empty()) { std::stringstream msg; msg << "No levels loaded from file '" << file_path << std::endl; throw std::runtime_error(msg.str()); } - if(verbose >= 1) { - std::cout << "Loaded " << levels.size() << " levels from " << file_path + if (verbose >= 1) { + std::cout << "Loaded " << levels_.size() << " levels from " << file_path << std::endl; - if(verbose >= 2) { - PrintLevel(std::cout, levels.at(0)); + if (verbose >= 2) { + PrintLevel(std::cout, levels_.at(0)); std::cout << std::endl; - PrintLevel(std::cout, levels.at(1)); + PrintLevel(std::cout, levels_.at(1)); std::cout << std::endl; } } } -const std::vector::iterator LevelLoader::RandomLevel( +std::vector::iterator LevelLoader::RandomLevel( std::mt19937& gen) { - if (cur_level == levels.end()) { + if (cur_level_ == levels_.end()) { LoadNewFile(gen); - cur_level = levels.begin(); - if (cur_level == levels.end()) { + cur_level_ = levels_.begin(); + if (cur_level_ == levels_.end()) { throw std::runtime_error("No levels loaded."); } } - auto out = cur_level; - cur_level++; + auto out = cur_level_; + cur_level_++; return out; } diff --git a/envpool/sokoban/level_loader.h b/envpool/sokoban/level_loader.h index 88326c84..f85a2a67 100644 --- a/envpool/sokoban/level_loader.h +++ b/envpool/sokoban/level_loader.h @@ -1,5 +1,21 @@ -#ifndef LEVEL_LOADER_H_ -#define LEVEL_LOADER_H_ +/* + * Copyright 2023-2024 FAR AI + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ENVPOOL_SOKOBAN_LEVEL_LOADER_H_ +#define ENVPOOL_SOKOBAN_LEVEL_LOADER_H_ #include #include @@ -9,30 +25,30 @@ namespace sokoban { using SokobanLevel = std::vector; -constexpr uint8_t WALL = 0; -constexpr uint8_t EMPTY = 1; -constexpr uint8_t TARGET = 2; -constexpr uint8_t BOX_ON_TARGET = 3; -constexpr uint8_t BOX = 4; -constexpr uint8_t PLAYER = 5; -constexpr uint8_t PLAYER_ON_TARGET = 6; +constexpr uint8_t kWall = 0; +constexpr uint8_t kEmpty = 1; +constexpr uint8_t kTarget = 2; +constexpr uint8_t kBoxOnTarget = 3; +constexpr uint8_t kBox = 4; +constexpr uint8_t kPlayer = 5; +constexpr uint8_t kPlayerOnTarget = 6; +constexpr uint8_t kMaxLevelObject = kPlayerOnTarget; class LevelLoader { protected: - std::vector levels; - std::vector::iterator cur_level; - std::vector level_file_paths; + std::vector levels_; + std::vector::iterator cur_level_; + std::vector level_file_paths_; void LoadNewFile(std::mt19937& gen); public: int verbose; - const std::vector::iterator RandomLevel(std::mt19937& gen); - LevelLoader(const std::filesystem::path& base_path, int verbose=0); + std::vector::iterator RandomLevel(std::mt19937& gen); + explicit LevelLoader(const std::filesystem::path& base_path, int verbose = 0); }; - -void PrintLevel(std::ostream& os, SokobanLevel vec); +void PrintLevel(std::ostream& os, const SokobanLevel& vec); } // namespace sokoban -#endif // LEVEL_LOADER_H_ +#endif // ENVPOOL_SOKOBAN_LEVEL_LOADER_H_ diff --git a/envpool/sokoban/registration.py b/envpool/sokoban/registration.py index 490b1a34..e79dc31c 100644 --- a/envpool/sokoban/registration.py +++ b/envpool/sokoban/registration.py @@ -1,12 +1,26 @@ +# Copyright 2023-2024 FAR AI +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from envpool.registration import register register( - task_id="Sokoban-v0", - import_path="envpool.sokoban", - spec_cls="SokobanEnvSpec", - dm_cls="SokobanDMEnvPool", - gym_cls="SokobanGymEnvPool", - gymnasium_cls="SokobanGymnasiumEnvPool", - max_episode_steps=60, - reward_step=-0.1, + task_id="Sokoban-v0", + import_path="envpool.sokoban", + spec_cls="SokobanEnvSpec", + dm_cls="SokobanDMEnvPool", + gym_cls="SokobanGymEnvPool", + gymnasium_cls="SokobanGymnasiumEnvPool", + max_episode_steps=60, + reward_step=-0.1, ) diff --git a/envpool/sokoban/sokoban_envpool.cc b/envpool/sokoban/sokoban_envpool.cc index dff613c7..a29b87da 100644 --- a/envpool/sokoban/sokoban_envpool.cc +++ b/envpool/sokoban/sokoban_envpool.cc @@ -1,8 +1,23 @@ +// Copyright 2023-2024 FAR AI +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + #include "envpool/sokoban/sokoban_envpool.h" #include #include #include +#include #include "envpool/core/py_envpool.h" @@ -11,26 +26,27 @@ namespace sokoban { void SokobanEnv::Reset() { const int max_episode_steps = spec_.config["max_episode_steps"_]; const int min_episode_steps = spec_.config["min_episode_steps"_]; - std::uniform_int_distribution episode_length_rand(min_episode_steps, max_episode_steps); + std::uniform_int_distribution episode_length_rand(min_episode_steps, + max_episode_steps); current_max_episode_steps_ = episode_length_rand(gen_); - world = *level_loader.RandomLevel(gen_); - if (world.size() != dim_room * dim_room) { + world_ = *(level_loader_.RandomLevel(gen_)); + if (world_.size() != dim_room_ * dim_room_) { std::stringstream msg; - msg << "Loaded level is not dim_room x dim_room. world.size()=" - << world.size() << ", dim_room=" << dim_room << std::endl; + msg << "Loaded level is not dim_room x dim_room. world_.size()=" + << world_.size() << ", dim_room_=" << dim_room_ << std::endl; throw std::runtime_error(msg.str()); } - unmatched_boxes = 0; - for (int x = 0; x < dim_room; x++) { - for (int y = 0; y < dim_room; y++) { + unmatched_boxes_ = 0; + for (int x = 0; x < dim_room_; x++) { + for (int y = 0; y < dim_room_; y++) { switch (WorldAt(x, y)) { - case PLAYER: - player_x = x; - player_y = y; + case kPlayer: + player_x_ = x; + player_y_ = y; break; - case BOX: - unmatched_boxes++; + case kBox: + unmatched_boxes_++; break; } } @@ -39,153 +55,128 @@ void SokobanEnv::Reset() { WriteState(0.0f); } -uint8_t SokobanEnv::WorldAt(int x, int y) { - if ((x < 0) || (x >= dim_room) || (y < 0) || (y >= dim_room)) { - return WALL; +[[nodiscard]] uint8_t SokobanEnv::WorldAt(int x, int y) const { + if ((x < 0) || (x >= dim_room_) || (y < 0) || (y >= dim_room_)) { + return kWall; } - return world.at(x + y * dim_room); + return world_.at(x + y * dim_room_); } void SokobanEnv::WorldAssignAt(int x, int y, uint8_t value) { - if ((x < 0) || (x >= dim_room) || (y < 0) || (y >= dim_room)) { + if ((x < 0) || (x >= dim_room_) || (y < 0) || (y >= dim_room_)) { return; } - world.at(x + y * dim_room) = value; + world_.at(x + y * dim_room_) = value; } -constexpr std::array, 4> CHANGE_COORDINATES = { +constexpr std::array, 4> kChangeCoordinates = { {{0, -1}, {0, 1}, {-1, 0}, {1, 0}}}; -constexpr std::array action_names = { - "ACT_NOOP", - "ACT_PUSH_UP", - "ACT_PUSH_DOWN", - "ACT_PUSH_LEFT", - "ACT_PUSH_RIGHT", - "ACT_MOVE_UP", - "ACT_MOVE_DOWN", - "ACT_MOVE_LEFT", - "ACT_MOVE_RIGHT", -}; - - -constexpr std::array arena_names = { - "WALL", - "EMPTY", - "TARGET", - "BOX_ON_TARGET", - "BOX", - "PLAYER", - "PLAYER_ON_TARGET", -}; - - -void SokobanEnv::Step(const Action& action_) { +void SokobanEnv::Step(const Action& action_dict) { current_step_++; - const int action = action_["action"_]; - if (action == ACT_NOOP) { - WriteState(static_cast(reward_step)); + const int action = action_dict["action"_]; + if (action == kActNoop) { + WriteState(static_cast(reward_step_)); return; } // From here on, assume the agent will try to move - const int change_coordinates_idx = (action - 1) % CHANGE_COORDINATES.size(); - const int delta_x = CHANGE_COORDINATES.at(change_coordinates_idx).at(0); - const int delta_y = CHANGE_COORDINATES.at(change_coordinates_idx).at(1); + const int change_coordinates_idx = (action - 1) % kChangeCoordinates.size(); + const int delta_x = kChangeCoordinates.at(change_coordinates_idx).at(0); + const int delta_y = kChangeCoordinates.at(change_coordinates_idx).at(1); - const int prev_unmatched_boxes = unmatched_boxes; + const int prev_unmatched_boxes = unmatched_boxes_; // Arena: the things that will change if the agent moves std::array arena; for (size_t i = 0; i < arena.size(); i++) { - arena.at(i) = WorldAt(player_x + delta_x * i, player_y + delta_y * i); + arena.at(i) = WorldAt(player_x_ + delta_x * i, player_y_ + delta_y * i); } // The box will move IFF action is a pushing action AND there's a box AND it // has space to move const bool box_moves = - ((action <= ACT_PUSH_RIGHT) && - ((arena.at(1) == BOX) || (arena.at(1) == BOX_ON_TARGET)) && - ((arena.at(2) == EMPTY) || (arena.at(2) == TARGET))); + ((action <= kActPushRight) && + ((arena.at(1) == kBox) || (arena.at(1) == kBoxOnTarget)) && + ((arena.at(2) == kEmpty) || (arena.at(2) == kTarget))); // The agent will move if the next arena location is possible to move into, or // if it's a box and the box moves const bool is_a_box_and_the_box_moves = box_moves; - const bool agent_moves = (arena.at(1) == EMPTY) || (arena.at(1) == TARGET) || + const bool agent_moves = (arena.at(1) == kEmpty) || + (arena.at(1) == kTarget) || is_a_box_and_the_box_moves; if (agent_moves) { - // `is_target` is boolean but we'll need it as an int later - std::array is_target; + std::array is_target; for (size_t i = 0; i < arena.size(); i++) { uint8_t tile = arena.at(i); - // We explicitly set them to 0 or 1 because false/true are not guaranteed - // to be 0/1. is_target.at(i) = - ((tile == BOX_ON_TARGET || tile == TARGET || tile == PLAYER_ON_TARGET) - ? 1 - : 0); + (tile == kBoxOnTarget || tile == kTarget || tile == kPlayerOnTarget); } // only whatever was on the floor is now at position 0 - arena.at(0) = is_target.at(0) ? TARGET : EMPTY; + arena.at(0) = is_target.at(0) ? kTarget : kEmpty; // the player now occupies position 1 - arena.at(1) = is_target.at(1) ? PLAYER_ON_TARGET : PLAYER; + arena.at(1) = is_target.at(1) ? kPlayerOnTarget : kPlayer; if (box_moves) { // the box moves for sure. A target at 2 reduces the nubmer of unmatched // boxes (because the box goes there), a target at 1 increases it (the box // leaves from there). Both can be equal to 1 and in that case the number // stays the same. - unmatched_boxes += is_target.at(1) - is_target.at(2); + // + // Implicit conversion from bool to int is always 0/1. + // https://en.cppreference.com/w/cpp/language/implicit_conversion + unmatched_boxes_ += + static_cast(is_target.at(1)) - static_cast(is_target.at(2)); // A box now occupies position 2 - arena.at(2) = is_target.at(2) ? BOX_ON_TARGET : BOX; + arena.at(2) = is_target.at(2) ? kBoxOnTarget : kBox; } for (size_t i = 0; i < arena.size(); i++) { - WorldAssignAt(player_x + delta_x * i, player_y + delta_y * i, + WorldAssignAt(player_x_ + delta_x * i, player_y_ + delta_y * i, arena.at(i)); } // After assigning the arena, move player. - player_x += delta_x; - player_y += delta_y; + player_x_ += delta_x; + player_y_ += delta_y; } - const double reward = - reward_step + - reward_box * static_cast(prev_unmatched_boxes - unmatched_boxes) + - ((unmatched_boxes == 0) ? reward_finished : 0.0f); + const double reward = reward_step_ + + reward_box_ * static_cast(prev_unmatched_boxes - + unmatched_boxes_) + + ((unmatched_boxes_ == 0) ? reward_finished_ : 0.0f); WriteState(static_cast(reward)); } -constexpr std::array, PLAYER_ON_TARGET + 1> TINY_COLORS = - {{ - {0, 0, 0}, // WALL - {243, 248, 238}, // EMPTY - {254, 126, 125}, // TARGET - {254, 95, 56}, // BOX_ON_TARGET - {142, 121, 56}, // BOX - {160, 212, 56}, // PLAYER - {219, 212, 56} // PLAYER_ON_TARGET - }}; +constexpr std::array, kPlayerOnTarget + 1> kTinyColors{{ + {0, 0, 0}, // WALL + {243, 248, 238}, // EMPTY + {254, 126, 125}, // TARGET + {254, 95, 56}, // BOX_ON_TARGET + {142, 121, 56}, // BOX + {160, 212, 56}, // PLAYER + {219, 212, 56} // PLAYER_ON_TARGET +}}; void SokobanEnv::WriteState(float reward) { auto state = Allocate(); state["reward"_] = reward; Array& obs = state["obs"_]; - if (obs.size != 3 * world.size()) { + if (obs.size != 3 * world_.size()) { std::stringstream msg; msg << "Obs size and level size are different: obs_size=" << obs.size - << "/3, level_size=" << world.size() << ", dim_room=" << dim_room + << "/3, level_size=" << world_.size() << ", dim_room=" << dim_room_ << std::endl; throw std::runtime_error(msg.str()); } - std::vector out(3 * world.size()); + std::vector out(3 * world_.size()); for (int rgb = 0; rgb < 3; rgb++) { - for (size_t i = 0; i < world.size(); i++) { - out.at(rgb * (dim_room * dim_room) + i) = - TINY_COLORS.at(world.at(i)).at(rgb); + for (size_t i = 0; i < world_.size(); i++) { + out.at(rgb * (dim_room_ * dim_room_) + i) = + kTinyColors.at(world_.at(i)).at(rgb); } } obs.Assign(out.data(), out.size()); diff --git a/envpool/sokoban/sokoban_envpool.h b/envpool/sokoban/sokoban_envpool.h index 40ff47c1..c788040f 100644 --- a/envpool/sokoban/sokoban_envpool.h +++ b/envpool/sokoban/sokoban_envpool.h @@ -1,9 +1,26 @@ -#ifndef ENVPOOL_SOKOBAN_H_ -#define ENVPOOL_SOKOBAN_H_ +/* + * Copyright 2023-2024 FAR AI + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ENVPOOL_SOKOBAN_SOKOBAN_ENVPOOL_H_ +#define ENVPOOL_SOKOBAN_SOKOBAN_ENVPOOL_H_ #include #include #include +#include #include "envpool/core/array.h" #include "envpool/core/async_envpool.h" @@ -12,16 +29,16 @@ namespace sokoban { -constexpr int ACT_NOOP = 0; -constexpr int ACT_PUSH_UP = 1; -constexpr int ACT_PUSH_DOWN = 2; -constexpr int ACT_PUSH_LEFT = 3; -constexpr int ACT_PUSH_RIGHT = 4; -constexpr int ACT_MOVE_UP = 5; -constexpr int ACT_MOVE_DOWN = 6; -constexpr int ACT_MOVE_LEFT = 7; -constexpr int ACT_MOVE_RIGHT = 8; -constexpr int MAX_ACTION = ACT_MOVE_RIGHT; +constexpr int kActNoop = 0; +constexpr int kActPushUp = 1; +constexpr int kActPushDown = 2; +constexpr int kActPushLeft = 3; +constexpr int kActPushRight = 4; +constexpr int kActMoveUp = 5; +constexpr int kActMoveDown = 6; +constexpr int kActMoveLeft = 7; +constexpr int kActMoveRight = 8; +constexpr int kMaxAction = kActMoveRight; class SokobanEnvFns { public: @@ -38,7 +55,7 @@ class SokobanEnvFns { } template static decltype(auto) ActionSpec(const Config& conf) { - return MakeDict("action"_.Bind(Spec({-1}, {0, MAX_ACTION}))); + return MakeDict("action"_.Bind(Spec({-1}, {0, kMaxAction}))); } }; @@ -49,15 +66,16 @@ class SokobanEnv : public Env { public: SokobanEnv(const Spec& spec, int env_id) : Env(spec, env_id), - dim_room{static_cast(spec.config["dim_room"_])}, - reward_finished{static_cast(spec.config["reward_finished"_])}, - reward_box{static_cast(spec.config["reward_box"_])}, - reward_step{static_cast(spec.config["reward_step"_])}, - levels_dir{static_cast(spec.config["levels_dir"_])}, - level_loader(levels_dir), - world(WALL, static_cast(dim_room * dim_room)), - verbose(static_cast(spec.config["verbose"_])), - current_max_episode_steps_(static_cast(spec.config["max_episode_steps"_])) { + dim_room_{static_cast(spec.config["dim_room"_])}, + reward_finished_{static_cast(spec.config["reward_finished"_])}, + reward_box_{static_cast(spec.config["reward_box"_])}, + reward_step_{static_cast(spec.config["reward_step"_])}, + levels_dir_{static_cast(spec.config["levels_dir"_])}, + level_loader_(levels_dir_), + world_(kWall, static_cast(dim_room_ * dim_room_)), + verbose_(static_cast(spec.config["verbose"_])), + current_max_episode_steps_( + static_cast(spec.config["max_episode_steps"_])) { if (max_num_players_ != spec_.config["max_num_players"_]) { std::stringstream msg; msg << "max_num_players_ != spec_['max_num_players'] " << max_num_players_ @@ -74,31 +92,33 @@ class SokobanEnv : public Env { } bool IsDone() override { - return (unmatched_boxes == 0) || (current_step_ >= current_max_episode_steps_); } + return (unmatched_boxes_ == 0) || + (current_step_ >= current_max_episode_steps_); + } void Reset() override; - void Step(const Action& action) override; + void Step(const Action& action_dict) override; void WriteState(float reward); private: - int dim_room; - double reward_finished, reward_box, reward_step; - std::filesystem::path levels_dir; + int dim_room_; + double reward_finished_, reward_box_, reward_step_; + std::filesystem::path levels_dir_; - LevelLoader level_loader; - SokobanLevel world; - int verbose; + LevelLoader level_loader_; + SokobanLevel world_; + int verbose_; int current_max_episode_steps_; int current_step_{0}; - int player_x{0}, player_y{0}; - int unmatched_boxes{0}; + int player_x_{0}, player_y_{0}; + int unmatched_boxes_{0}; - uint8_t WorldAt(int x, int y); + [[nodiscard]] uint8_t WorldAt(int x, int y) const; void WorldAssignAt(int x, int y, uint8_t value); }; using SokobanEnvPool = AsyncEnvPool; } // namespace sokoban -#endif // ENVPOOL_SOKOBAN_H_ +#endif // ENVPOOL_SOKOBAN_SOKOBAN_ENVPOOL_H_ diff --git a/envpool/sokoban/sokoban_py_envpool_test.py b/envpool/sokoban/sokoban_py_envpool_test.py index c0f3973f..bed17588 100644 --- a/envpool/sokoban/sokoban_py_envpool_test.py +++ b/envpool/sokoban/sokoban_py_envpool_test.py @@ -1,102 +1,118 @@ +# Copyright 2023-2024 FAR AI +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """Unit test for dummy envpool and speed benchmark.""" import time -import envpool # noqa: F401 -import envpool.sokoban.registration import numpy as np from absl import logging from absl.testing import absltest + +import envpool # noqa: F401 +import envpool.sokoban.registration from envpool.sokoban.sokoban_envpool import _SokobanEnvSpec class _SokobanEnvPoolTest(absltest.TestCase): - def test_config(self) -> None: - ref_config_keys = [ - # Default environment keys - "base_path", - "batch_size", - "gym_reset_return_info", - "max_num_players", - "num_envs", - "num_threads", - "seed", - "thread_affinity_offset", - "min_episode_steps", - # Default and also used by sokoban - "max_episode_steps", - # defined by sokoban - "dim_room", - "levels_dir", - "reward_box", - "reward_finished", - "reward_step", - "verbose", - ] - default_conf = _SokobanEnvSpec._default_config_values - self.assertTrue(isinstance(default_conf, tuple)) - config_keys = _SokobanEnvSpec._config_keys - self.assertTrue(isinstance(config_keys, list)) - self.assertEqual(len(default_conf), len(config_keys)) - self.assertEqual(sorted(config_keys), sorted(ref_config_keys)) - def test_envpool(self) -> None: - batch = num_envs = 200 - env = envpool.make( - "Sokoban-v0", - env_type="gymnasium", - num_envs=num_envs, - batch_size=num_envs, - seed=2346890, - max_episode_steps=60, - reward_step=-0.1, - dim_room=10, - levels_dir="/app/envpool/sokoban/sample_levels", - ) - total_steps = 1000 + def test_config(self) -> None: + ref_config_keys = [ + # Default environment keys + "base_path", + "batch_size", + "gym_reset_return_info", + "max_num_players", + "num_envs", + "num_threads", + "seed", + "thread_affinity_offset", + "min_episode_steps", + # Default and also used by sokoban + "max_episode_steps", + # defined by sokoban + "dim_room", + "levels_dir", + "reward_box", + "reward_finished", + "reward_step", + "verbose", + ] + default_conf = _SokobanEnvSpec._default_config_values + self.assertTrue(isinstance(default_conf, tuple)) + config_keys = _SokobanEnvSpec._config_keys + self.assertTrue(isinstance(config_keys, list)) + self.assertEqual(len(default_conf), len(config_keys)) + self.assertEqual(sorted(config_keys), sorted(ref_config_keys)) + + def test_envpool(self) -> None: + batch = num_envs = 200 + env = envpool.make( + "Sokoban-v0", + env_type="gymnasium", + num_envs=num_envs, + batch_size=num_envs, + seed=2346890, + max_episode_steps=60, + reward_step=-0.1, + dim_room=10, + levels_dir="/app/envpool/sokoban/sample_levels", + ) + total_steps = 1000 - _ = env.reset() - t = time.time() - for _ in range(total_steps): - _ = env.step(np.random.randint(low=0, high=9, size=(num_envs,))) - duration = time.time() - t - fps = total_steps * batch / duration - logging.info(f"FPS = {fps:.6f}") + _ = env.reset() + t = time.time() + for _ in range(total_steps): + _ = env.step(np.random.randint(low=0, high=9, size=(num_envs,))) + duration = time.time() - t + fps = total_steps * batch / duration + logging.info(f"FPS = {fps:.6f}") - def test_envpool_max_episode_steps(self) -> None: - for max_episode_steps in [2, 5, 10]: - env = envpool.make( - "Sokoban-v0", - env_type="gymnasium", - num_envs=1, - batch_size=1, - max_episode_steps=max_episode_steps, - levels_dir="/app/envpool/sokoban/sample_levels", - ) - env.reset() - for _ in range(max_episode_steps - 1): - _, _, terminated, truncated, _ = env.step(np.zeros([1], dtype=np.int32)) - assert not np.any(terminated | truncated) + def test_envpool_max_episode_steps(self) -> None: + for max_episode_steps in [2, 5, 10]: + env = envpool.make( + "Sokoban-v0", + env_type="gymnasium", + num_envs=1, + batch_size=1, + min_episode_steps=max_episode_steps, + max_episode_steps=max_episode_steps, + levels_dir="/app/envpool/sokoban/sample_levels", + ) + env.reset() + for _ in range(max_episode_steps - 1): + _, _, terminated, truncated, _ = env.step(np.zeros([1], dtype=np.int32)) + assert not np.any(terminated | truncated) - _, _, terminated, truncated, _ = env.step(np.zeros([1], dtype=np.int32)) - assert not np.any(terminated) - assert np.all(truncated) + _, _, terminated, truncated, _ = env.step(np.zeros([1], dtype=np.int32)) + assert not np.any(terminated) + assert np.all(truncated) - def test_xla(self) -> None: - num_envs = 10 - env = envpool.make( - "Sokoban-v0", - env_type="dm", - num_envs=num_envs, - batch_size=num_envs, - seed=2346890, - max_episode_steps=60, - reward_step=-0.1, - dim_room=10, - levels_dir="/app/envpool/sokoban/sample_levels", - ) - handle, recv, send, step = env.xla() + def test_xla(self) -> None: + num_envs = 10 + env = envpool.make( + "Sokoban-v0", + env_type="dm", + num_envs=num_envs, + batch_size=num_envs, + seed=2346890, + max_episode_steps=60, + reward_step=-0.1, + dim_room=10, + levels_dir="/app/envpool/sokoban/sample_levels", + ) + handle, recv, send, step = env.xla() if __name__ == "__main__": - absltest.main() + absltest.main() diff --git a/envpool/workspace0.bzl b/envpool/workspace0.bzl index 2618ca92..98c44cde 100644 --- a/envpool/workspace0.bzl +++ b/envpool/workspace0.bzl @@ -20,6 +20,7 @@ load("//third_party/cuda:cuda.bzl", "cuda_configure") def workspace(): """Load requested packages.""" + # we cannot upgrade rules_python because it requires requirements_lock.txt after 0.13.0 maybe( http_archive, diff --git a/third_party/pip_requirements/requirements-devtools.txt b/third_party/pip_requirements/requirements-devtools.txt new file mode 100644 index 00000000..f7f556e5 --- /dev/null +++ b/third_party/pip_requirements/requirements-devtools.txt @@ -0,0 +1,5 @@ +flake8==7.0.0 +flake8-bugbear==24.2.6 +isort==5.13.2 +yapf==0.40.2 +cpplint==1.6.1 diff --git a/third_party/pip_requirements/requirements-sokoban.txt b/third_party/pip_requirements/requirements-sokoban.txt new file mode 120000 index 00000000..6829e68e --- /dev/null +++ b/third_party/pip_requirements/requirements-sokoban.txt @@ -0,0 +1 @@ +requirements-release.txt \ No newline at end of file From 89fff4c25fd3c6c806f39138770d13e449f39db1 Mon Sep 17 00:00:00 2001 From: Mohammad Taufeeque <9taufeeque9@gmail.com> Date: Fri, 8 Mar 2024 05:42:49 +0530 Subject: [PATCH 31/60] Add option to load a fixed number of levels sequentially (#1) * add option to load sokoban sequentially * fix issues with original tests * add test case * fix seg fault * add safe_uniform_int function * use pytest and fix error * fix BUILD * Add pytest to requirements-dev.txt * fix lint * fix link and docker image * add pytest to release and fix lint * fix lint * revert image * fix clang-tidy lint --- envpool/sokoban/BUILD | 41 ++++++----- envpool/sokoban/level_loader.cc | 47 ++++++++---- envpool/sokoban/level_loader.h | 12 ++- envpool/sokoban/registration.py | 1 + envpool/sokoban/sample_levels/001.txt | 35 +++++++++ envpool/sokoban/sokoban_envpool.cc | 8 +- envpool/sokoban/sokoban_envpool.h | 8 +- envpool/sokoban/sokoban_py_envpool_test.py | 73 ++++++++++++++++--- envpool/sokoban/utils.h | 39 ++++++++++ .../pip_requirements/requirements-dev.txt | 1 + .../pip_requirements/requirements-release.txt | 1 + 11 files changed, 215 insertions(+), 51 deletions(-) create mode 100644 envpool/sokoban/sample_levels/001.txt create mode 100644 envpool/sokoban/utils.h diff --git a/envpool/sokoban/BUILD b/envpool/sokoban/BUILD index d231954e..ed684f96 100644 --- a/envpool/sokoban/BUILD +++ b/envpool/sokoban/BUILD @@ -15,30 +15,31 @@ load("@pip_requirements//:requirements.bzl", "requirement") load("@pybind11_bazel//:build_defs.bzl", "pybind_extension") -package(default_visibility = ["//visibility:public"]) +package(default_visibility=["//visibility:public"]) py_library( - name = "sokoban", - srcs = ["__init__.py"], - data = [":sokoban_envpool.so"], - deps = ["//envpool/python:api"], + name="sokoban", + srcs=["__init__.py"], + data=[":sokoban_envpool.so"], + deps=["//envpool/python:api"], ) py_library( - name = "registration", - srcs = ["registration.py"], - deps = [ + name="registration", + srcs=["registration.py"], + deps=[ "//envpool:registration", ], ) cc_library( - name = "sokoban_envpool_h", - hdrs = [ + name="sokoban_envpool_h", + hdrs=[ "level_loader.h", "sokoban_envpool.h", + "utils.h", ], - deps = [ + deps=[ "//envpool/core:async_envpool", "//envpool/core:env", "//envpool/core:env_spec", @@ -56,28 +57,28 @@ cc_library( # ) py_test( - name = "test", - srcs = ["sokoban_py_envpool_test.py"], - main = "sokoban_py_envpool_test.py", - deps = [ + name="test", + srcs=["sokoban_py_envpool_test.py"], + main="sokoban_py_envpool_test.py", + deps=[ ":registration", ":sokoban", "//envpool", requirement("numpy"), - requirement("absl-py"), + requirement("pytest"), ], ) pybind_extension( - name = "sokoban_envpool", - srcs = [ + name="sokoban_envpool", + srcs=[ "level_loader.cc", "sokoban_envpool.cc", ], - linkopts = [ + linkopts=[ "-ldl", ], - deps = [ + deps=[ ":sokoban_envpool_h", "//envpool/core:py_envpool", ], diff --git a/envpool/sokoban/level_loader.cc b/envpool/sokoban/level_loader.cc index b252896c..314f4175 100644 --- a/envpool/sokoban/level_loader.cc +++ b/envpool/sokoban/level_loader.cc @@ -23,20 +23,28 @@ #include #include +#include "envpool/sokoban/utils.h" + namespace sokoban { -LevelLoader::LevelLoader(const std::filesystem::path& base_path, int verbose) - : levels_(0), +LevelLoader::LevelLoader(const std::filesystem::path& base_path, + bool load_sequentially, int n_levels_to_load, + int verbose) + : load_sequentially_(load_sequentially), + n_levels_to_load_(n_levels_to_load), + levels_loaded_(0), + levels_(0), cur_level_(levels_.begin()), level_file_paths_(0), verbose(verbose) { for (const auto& entry : std::filesystem::directory_iterator(base_path)) { level_file_paths_.push_back(entry.path()); } + cur_file_ = level_file_paths_.begin(); } static const std::array kPrintLevelKey{ - '#', ' ', '.', 'a', '@', '$', 's'}; + '#', ' ', '.', 'a', '$', '@', 's'}; void AddLine(SokobanLevel& level, const std::string& line) { auto start = line.at(0); @@ -89,11 +97,19 @@ void PrintLevel(std::ostream& os, const SokobanLevel& vec) { } } -void LevelLoader::LoadNewFile(std::mt19937& gen) { - std::uniform_int_distribution load_file_idx_r( - 0, level_file_paths_.size() - 1); - const size_t load_file_idx = load_file_idx_r(gen); - const std::filesystem::path& file_path = level_file_paths_.at(load_file_idx); +void LevelLoader::LoadFile(std::mt19937& gen) { + std::filesystem::path file_path; + if (load_sequentially_) { + if (cur_file_ == level_file_paths_.end()) { + throw std::runtime_error("No more files to load."); + } + file_path = *cur_file_; + cur_file_++; + } else { + const size_t load_file_idx = SafeUniformInt( + static_cast(0), level_file_paths_.size() - 1, gen); + file_path = level_file_paths_.at(load_file_idx); + } std::ifstream file(file_path); levels_.clear(); @@ -134,7 +150,9 @@ void LevelLoader::LoadNewFile(std::mt19937& gen) { } } } - std::shuffle(levels_.begin(), levels_.end(), gen); + if (!load_sequentially_) { + std::shuffle(levels_.begin(), levels_.end(), gen); + } if (levels_.empty()) { std::stringstream msg; msg << "No levels loaded from file '" << file_path << std::endl; @@ -142,7 +160,7 @@ void LevelLoader::LoadNewFile(std::mt19937& gen) { } if (verbose >= 1) { - std::cout << "Loaded " << levels_.size() << " levels from " << file_path + std::cout << "***Loaded " << levels_.size() << " levels from " << file_path << std::endl; if (verbose >= 2) { PrintLevel(std::cout, levels_.at(0)); @@ -153,10 +171,12 @@ void LevelLoader::LoadNewFile(std::mt19937& gen) { } } -std::vector::iterator LevelLoader::RandomLevel( - std::mt19937& gen) { +std::vector::iterator LevelLoader::GetLevel(std::mt19937& gen) { + if (n_levels_to_load_ > 0 && levels_loaded_ >= n_levels_to_load_) { + throw std::runtime_error("Loaded all requested levels."); + } if (cur_level_ == levels_.end()) { - LoadNewFile(gen); + LoadFile(gen); cur_level_ = levels_.begin(); if (cur_level_ == levels_.end()) { throw std::runtime_error("No levels loaded."); @@ -164,6 +184,7 @@ std::vector::iterator LevelLoader::RandomLevel( } auto out = cur_level_; cur_level_++; + levels_loaded_++; return out; } diff --git a/envpool/sokoban/level_loader.h b/envpool/sokoban/level_loader.h index f85a2a67..9879d8df 100644 --- a/envpool/sokoban/level_loader.h +++ b/envpool/sokoban/level_loader.h @@ -36,16 +36,22 @@ constexpr uint8_t kMaxLevelObject = kPlayerOnTarget; class LevelLoader { protected: + bool load_sequentially_; + int n_levels_to_load_; + int levels_loaded_; std::vector levels_; std::vector::iterator cur_level_; std::vector level_file_paths_; - void LoadNewFile(std::mt19937& gen); + std::vector::iterator cur_file_; + void LoadFile(std::mt19937& gen); public: int verbose; - std::vector::iterator RandomLevel(std::mt19937& gen); - explicit LevelLoader(const std::filesystem::path& base_path, int verbose = 0); + std::vector::iterator GetLevel(std::mt19937& gen); + explicit LevelLoader(const std::filesystem::path& base_path, + bool load_sequentially, int n_levels_to_load, + int verbose = 0); }; void PrintLevel(std::ostream& os, const SokobanLevel& vec); diff --git a/envpool/sokoban/registration.py b/envpool/sokoban/registration.py index e79dc31c..026098e4 100644 --- a/envpool/sokoban/registration.py +++ b/envpool/sokoban/registration.py @@ -23,4 +23,5 @@ gymnasium_cls="SokobanGymnasiumEnvPool", max_episode_steps=60, reward_step=-0.1, + max_num_players=1, ) diff --git a/envpool/sokoban/sample_levels/001.txt b/envpool/sokoban/sample_levels/001.txt new file mode 100644 index 00000000..e5b2b185 --- /dev/null +++ b/envpool/sokoban/sample_levels/001.txt @@ -0,0 +1,35 @@ +; 0 +########## +########## +########## +##### # ## +##### # +##### $ # +# . ..# +# $$$ # # +#@ . # +########## + +; 1 +########## +########## +#### # +# $ . # +# # # +#@### .$ # +###### $ # +### $. # +### .# +########## + +; 2 +########## +##### @# +#### ## +####. ## +#. . $ # +# $ $. # +# ### # +# $ ###### +# ###### +########## diff --git a/envpool/sokoban/sokoban_envpool.cc b/envpool/sokoban/sokoban_envpool.cc index a29b87da..2d139b08 100644 --- a/envpool/sokoban/sokoban_envpool.cc +++ b/envpool/sokoban/sokoban_envpool.cc @@ -20,17 +20,17 @@ #include #include "envpool/core/py_envpool.h" +#include "envpool/sokoban/utils.h" namespace sokoban { void SokobanEnv::Reset() { const int max_episode_steps = spec_.config["max_episode_steps"_]; const int min_episode_steps = spec_.config["min_episode_steps"_]; - std::uniform_int_distribution episode_length_rand(min_episode_steps, - max_episode_steps); - current_max_episode_steps_ = episode_length_rand(gen_); + current_max_episode_steps_ = + SafeUniformInt(min_episode_steps, max_episode_steps, gen_); - world_ = *(level_loader_.RandomLevel(gen_)); + world_ = *(level_loader_.GetLevel(gen_)); if (world_.size() != dim_room_ * dim_room_) { std::stringstream msg; msg << "Loaded level is not dim_room x dim_room. world_.size()=" diff --git a/envpool/sokoban/sokoban_envpool.h b/envpool/sokoban/sokoban_envpool.h index c788040f..77bee609 100644 --- a/envpool/sokoban/sokoban_envpool.h +++ b/envpool/sokoban/sokoban_envpool.h @@ -46,7 +46,9 @@ class SokobanEnvFns { return MakeDict("reward_finished"_.Bind(10.0), "reward_box"_.Bind(1.0), "reward_step"_.Bind(-0.1), "dim_room"_.Bind(10), "levels_dir"_.Bind(std::string("")), "verbose"_.Bind(0), - "min_episode_steps"_.Bind(0)); + "min_episode_steps"_.Bind(0), + "load_sequentially"_.Bind(false), + "n_levels_to_load"_.Bind(-1)); } template static decltype(auto) StateSpec(const Config& conf) { @@ -71,7 +73,9 @@ class SokobanEnv : public Env { reward_box_{static_cast(spec.config["reward_box"_])}, reward_step_{static_cast(spec.config["reward_step"_])}, levels_dir_{static_cast(spec.config["levels_dir"_])}, - level_loader_(levels_dir_), + level_loader_(levels_dir_, spec.config["load_sequentially"_], + static_cast(spec.config["n_levels_to_load"_]), + static_cast(spec.config["verbose"_])), world_(kWall, static_cast(dim_room_ * dim_room_)), verbose_(static_cast(spec.config["verbose"_])), current_max_episode_steps_( diff --git a/envpool/sokoban/sokoban_py_envpool_test.py b/envpool/sokoban/sokoban_py_envpool_test.py index bed17588..d5a7a2e4 100644 --- a/envpool/sokoban/sokoban_py_envpool_test.py +++ b/envpool/sokoban/sokoban_py_envpool_test.py @@ -13,18 +13,19 @@ # limitations under the License. """Unit test for dummy envpool and speed benchmark.""" +import glob +import re import time import numpy as np -from absl import logging -from absl.testing import absltest +import pytest import envpool # noqa: F401 import envpool.sokoban.registration from envpool.sokoban.sokoban_envpool import _SokobanEnvSpec -class _SokobanEnvPoolTest(absltest.TestCase): +class TestSokobanEnvPool: def test_config(self) -> None: ref_config_keys = [ @@ -47,13 +48,15 @@ def test_config(self) -> None: "reward_finished", "reward_step", "verbose", + "load_sequentially", + "n_levels_to_load", ] default_conf = _SokobanEnvSpec._default_config_values - self.assertTrue(isinstance(default_conf, tuple)) + assert isinstance(default_conf, tuple) config_keys = _SokobanEnvSpec._config_keys - self.assertTrue(isinstance(config_keys, list)) - self.assertEqual(len(default_conf), len(config_keys)) - self.assertEqual(sorted(config_keys), sorted(ref_config_keys)) + assert isinstance(config_keys, list) + assert len(default_conf) == len(config_keys) + assert sorted(config_keys) == sorted(ref_config_keys) def test_envpool(self) -> None: batch = num_envs = 200 @@ -76,7 +79,7 @@ def test_envpool(self) -> None: _ = env.step(np.random.randint(low=0, high=9, size=(num_envs,))) duration = time.time() - t fps = total_steps * batch / duration - logging.info(f"FPS = {fps:.6f}") + print(f"FPS = {fps:.6f}") def test_envpool_max_episode_steps(self) -> None: for max_episode_steps in [2, 5, 10]: @@ -98,6 +101,58 @@ def test_envpool_max_episode_steps(self) -> None: assert not np.any(terminated) assert np.all(truncated) + def test_envpool_load_sequentially(self, capfd) -> None: + levels_dir = "/app/envpool/sokoban/sample_levels" + files = glob.glob(f"{levels_dir}/*.txt") + levels_by_files = [] + for file in files: + with open(file, "r") as f: + text = f.read() + levels = text.split("\n;") + levels = ["\n".join(level.split("\n")[1:]).strip() for level in levels] + levels_by_files.append((file, levels)) + assert len(levels_by_files) > 1 + assert all(len(levels) > 1 for levels in levels_by_files) + total_levels = sum(len(levels) for levels in levels_by_files) + for n_levels_to_load in range(1, total_levels + 1): + env = envpool.make( + "Sokoban-v0", + env_type="gymnasium", + num_envs=1, + batch_size=1, + max_episode_steps=60, + min_episode_steps=60, + levels_dir=levels_dir, + load_sequentially=True, + n_levels_to_load=n_levels_to_load, + verbose=2, + ) + dim_room = env.spec.config.dim_room + obs, _ = env.reset() + assert obs.shape == ( + 1, + 3, + dim_room, + dim_room, + ), f"obs shape: {obs.shape}" + if n_levels_to_load == -1: + n_levels_to_load = total_levels + for _ in range(n_levels_to_load - 1): + env.reset() + out, _ = capfd.readouterr() + files_output = out.split("***")[1:] + for i, file_output in enumerate(files_output): + first_line, out = file_output.strip().split("\n", 1) + result = re.search( + r'Loaded (\d+) levels from "(.*\.txt)"', first_line + ) + n_levels, file_name = int(result.group(1)), result.group(2) + lev1, lev2 = out.strip().split("\n\n") + assert file_name == levels_by_files[i][0] + assert n_levels == len(levels_by_files[i][1]) + assert lev1 == levels_by_files[i][1][0] + assert lev2 == levels_by_files[i][1][1] + def test_xla(self) -> None: num_envs = 10 env = envpool.make( @@ -115,4 +170,4 @@ def test_xla(self) -> None: if __name__ == "__main__": - absltest.main() + pytest.main(["-v", __file__]) diff --git a/envpool/sokoban/utils.h b/envpool/sokoban/utils.h new file mode 100644 index 00000000..9d680b8d --- /dev/null +++ b/envpool/sokoban/utils.h @@ -0,0 +1,39 @@ +/* + * Copyright 2023-2024 FAR AI + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ENVPOOL_SOKOBAN_UTILS_H_ +#define ENVPOOL_SOKOBAN_UTILS_H_ + +#include + +namespace sokoban { + +template +T SafeUniformInt(T low, T high, std::mt19937& gen) { + // check if low is greater than high + if (low > high) { + throw std::invalid_argument("low should be less than high"); + } + static_assert(std::is_same_v || std::is_same_v || + std::is_same_v || std::is_same_v, + "SafeUniformInt only supports int, long, and long long"); + std::uniform_int_distribution dist(low, high); + return dist(gen); +} + +} // namespace sokoban + +#endif // ENVPOOL_SOKOBAN_UTILS_H_ diff --git a/third_party/pip_requirements/requirements-dev.txt b/third_party/pip_requirements/requirements-dev.txt index 7096668a..a72c0607 100644 --- a/third_party/pip_requirements/requirements-dev.txt +++ b/third_party/pip_requirements/requirements-dev.txt @@ -7,6 +7,7 @@ gymnasium>=0.26,!=0.27.0 optree>=0.6.0 jax[cpu] absl-py +pytest packaging tqdm protobuf<=4.20.0 diff --git a/third_party/pip_requirements/requirements-release.txt b/third_party/pip_requirements/requirements-release.txt index a5cdfddb..5b8ceffc 100644 --- a/third_party/pip_requirements/requirements-release.txt +++ b/third_party/pip_requirements/requirements-release.txt @@ -7,3 +7,4 @@ gymnasium>=0.26,!=0.27.0 optree>=0.6.0 jax[cpu] packaging +pytest From 4a12f55e236cddcbfee53bf4a5f214e4a2a10fa9 Mon Sep 17 00:00:00 2001 From: Mohammad Taufeeque <9taufeeque9@gmail.com> Date: Sat, 9 Mar 2024 19:22:46 +0530 Subject: [PATCH 32/60] A* implementation for Sokoban (#4) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * add a-star files for sokoban * fix compilation & linking errors * fix astar namespace * optimizations * fix cpplint local * fix lint and compilation error * fix errors * add logging file * fix test indent * fix errors * fix error * fix bugs * fix bugs * fix bug * final fixes * Make linting less noisy by moving third_party files elsewhere, run `make format` * incorporate review comments * fix lint errors * fix lint errors * fix path * revert excluding fsa.h and astar.h from Makefile * take filename instead of index * add test for astar_log * fix lint --------- Co-authored-by: AdriĆ  Garriga-Alonso --- Makefile | 2 +- envpool/sokoban/BUILD | 83 ++- envpool/sokoban/astar_log.cc | 138 ++++ envpool/sokoban/level_loader.cc | 10 +- envpool/sokoban/sokoban_astar_test.cc | 144 ++++ envpool/sokoban/sokoban_node.cc | 187 ++++++ envpool/sokoban/sokoban_node.h | 122 ++++ envpool/sokoban/sokoban_py_envpool_test.py | 118 ++-- third_party/astar_stl/BUILD | 23 + third_party/astar_stl/astar.h | 747 +++++++++++++++++++++ third_party/astar_stl/fsa.h | 211 ++++++ 11 files changed, 1702 insertions(+), 83 deletions(-) create mode 100644 envpool/sokoban/astar_log.cc create mode 100644 envpool/sokoban/sokoban_astar_test.cc create mode 100644 envpool/sokoban/sokoban_node.cc create mode 100644 envpool/sokoban/sokoban_node.h create mode 100644 third_party/astar_stl/BUILD create mode 100644 third_party/astar_stl/astar.h create mode 100644 third_party/astar_stl/fsa.h diff --git a/Makefile b/Makefile index 822be03c..bcce0fad 100644 --- a/Makefile +++ b/Makefile @@ -75,7 +75,7 @@ mypy: mypy-install # c++ linter cpplint: cpplint-install - cpplint $(CPP_FILES) + cpplint --root . $(CPP_FILES) clang-format: clang-format-install clang-format --style=file -i $(CPP_FILES) -n --Werror diff --git a/envpool/sokoban/BUILD b/envpool/sokoban/BUILD index ed684f96..8938ad53 100644 --- a/envpool/sokoban/BUILD +++ b/envpool/sokoban/BUILD @@ -15,52 +15,79 @@ load("@pip_requirements//:requirements.bzl", "requirement") load("@pybind11_bazel//:build_defs.bzl", "pybind_extension") -package(default_visibility=["//visibility:public"]) +package(default_visibility = ["//visibility:public"]) py_library( - name="sokoban", - srcs=["__init__.py"], - data=[":sokoban_envpool.so"], - deps=["//envpool/python:api"], + name = "sokoban", + srcs = ["__init__.py"], + data = [":sokoban_envpool.so"], + deps = ["//envpool/python:api"], ) py_library( - name="registration", - srcs=["registration.py"], - deps=[ + name = "registration", + srcs = ["registration.py"], + deps = [ "//envpool:registration", ], ) cc_library( - name="sokoban_envpool_h", - hdrs=[ + name = "sokoban_envpool_h", + hdrs = [ "level_loader.h", "sokoban_envpool.h", "utils.h", ], - deps=[ + deps = [ "//envpool/core:async_envpool", "//envpool/core:env", "//envpool/core:env_spec", ], ) -# cc_test( -# name = "sokoban_envpool_test", -# size = "enormous", -# srcs = ["sokoban_envpool_test.cc"], -# deps = [ -# ":sokoban_envpool_h", -# "@com_google_googletest//:gtest_main", -# ], -# ) +cc_library( + name = "sokoban_node_h", + hdrs = [ + "level_loader.h", + "sokoban_node.h", + "utils.h", + ], + deps = ["//third_party/astar_stl:astar_stl_h"], +) + +cc_binary( + name = "astar_log", + srcs = [ + "astar_log.cc", + "level_loader.cc", + "sokoban_node.cc", + ], + deps = [ + ":sokoban_node_h", + ], +) + +cc_test( + name = "sokoban_astar_test", + size = "enormous", + srcs = [ + "level_loader.cc", + "sokoban_astar_test.cc", + "sokoban_node.cc", + ], + deps = [ + ":sokoban_node_h", + "@com_github_google_glog//:glog", + "@com_google_googletest//:gtest_main", + ], +) py_test( - name="test", - srcs=["sokoban_py_envpool_test.py"], - main="sokoban_py_envpool_test.py", - deps=[ + name = "test", + srcs = ["sokoban_py_envpool_test.py"], + main = "sokoban_py_envpool_test.py", + deps = [ ":registration", ":sokoban", "//envpool", @@ -70,15 +97,15 @@ py_test( ) pybind_extension( - name="sokoban_envpool", - srcs=[ + name = "sokoban_envpool", + srcs = [ "level_loader.cc", "sokoban_envpool.cc", ], - linkopts=[ + linkopts = [ "-ldl", ], - deps=[ + deps = [ ":sokoban_envpool_h", "//envpool/core:py_envpool", ], diff --git a/envpool/sokoban/astar_log.cc b/envpool/sokoban/astar_log.cc new file mode 100644 index 00000000..55d93c28 --- /dev/null +++ b/envpool/sokoban/astar_log.cc @@ -0,0 +1,138 @@ +// Copyright 2023-2024 FAR AI +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include "envpool/sokoban/sokoban_node.h" + +namespace sokoban { + +void RunAStar(const std::string& level_file_name, + const std::string& log_file_name, int total_levels_to_run = 1000, + int fsa_limit = 1000000) { + std::cout << "Running A* on file " << level_file_name << " and logging to " + << log_file_name << " with fsa_limit " << fsa_limit << std::endl; + const int dim_room = 10; + int level_idx = 0; + LevelLoader level_loader(level_file_name, true, -1); + std::mt19937 gen(42); + + std::ofstream log_file_out(log_file_name, std::ios_base::app); + std::ifstream log_file_in(log_file_name); + // check if the file is empty + if (log_file_in.peek() == std::ifstream::traits_type::eof()) { + log_file_out << "Level, Actions, Steps, SearchSteps" << std::endl; + } else { // skip levels that have already been run + std::string line; + std::getline(log_file_in, line); // skip header + while (std::getline(log_file_in, line)) { + SokobanLevel level = *level_loader.GetLevel(gen); + level_idx++; + } + } + log_file_in.close(); + + while (level_idx < total_levels_to_run) { + std::AStarSearch astarsearch(fsa_limit); + std::cout << "Running level " << level_idx << std::endl; + SokobanLevel level = *level_loader.GetLevel(gen); + + SokobanNode node_start(dim_room, level, false); + SokobanNode node_end(dim_room, level, true); + astarsearch.SetStartAndGoalStates(node_start, node_end); + unsigned int search_state; + unsigned int search_steps = 0; + std::cout << "Starting search" << std::endl; + do { + search_state = astarsearch.SearchStep(); + search_steps++; + } while (search_state == + std::AStarSearch::SEARCH_STATE_SEARCHING); + + if (search_state == std::AStarSearch::SEARCH_STATE_SUCCEEDED) { + std::stringstream loglinestream; + loglinestream << level_idx << ", "; + astarsearch.GetSolutionStart(); + int steps = 0; + for (;;) { + SokobanNode* node = astarsearch.GetSolutionNext(); + if (node == nullptr) { + break; + } + int action = node->action_from_parent; + assert(action >= 0 && action < 4); + loglinestream << action; + steps++; + } + loglinestream << ", " << steps << ", " << search_steps << std::endl; + log_file_out << loglinestream.str(); + astarsearch.FreeSolutionNodes(); + astarsearch.EnsureMemoryFreed(); + } else if (search_state == + std::AStarSearch::SEARCH_STATE_FAILED) { + log_file_out << level_idx << ", " + << "SEARCH_STATE_FAILED, -1, " << search_steps << std::endl; + } else if (search_state == + std::AStarSearch::SEARCH_STATE_NOT_INITIALISED) { + log_file_out << level_idx << ", " + << "SEARCH_STATE_NOT_INITIALISED, -1, " << search_steps + << std::endl; + } else if (search_state == + std::AStarSearch::SEARCH_STATE_SEARCHING) { + log_file_out << level_idx << ", " + << "SEARCH_STATE_SEARCHING, -1, " << search_steps + << std::endl; + } else if (search_state == + std::AStarSearch::SEARCH_STATE_OUT_OF_MEMORY) { + log_file_out << level_idx << ", " + << "SEARCH_STATE_OUT_OF_MEMORY, -1, " << search_steps + << std::endl; + } else if (search_state == + std::AStarSearch::SEARCH_STATE_INVALID) { + log_file_out << level_idx << ", " + << "SEARCH_STATE_INVALID, -1, " << search_steps << std::endl; + } else { + log_file_out << level_idx << ", " + << "UNKNOWN, -1, " << search_steps << std::endl; + } + log_file_out.flush(); + level_idx++; + } +} +} // namespace sokoban + +int main(int argc, char** argv) { + int total_levels_to_run = 1000; + int fsa_limit = 1000000; + if (argc < 3) { + std::cout + << "Usage: " << argv[0] + << " level_file_name log_file_name [total_levels_to_run] [fsa_limit]" + << std::endl; + return 1; + } + std::string level_file_name = argv[1]; + std::string log_file_name = argv[2]; + if (argc > 3) { + total_levels_to_run = std::stoi(argv[3]); + } + if (argc > 4) { + fsa_limit = std::stoi(argv[4]); + } + + sokoban::RunAStar(level_file_name, log_file_name, total_levels_to_run, + fsa_limit); + return 0; +} diff --git a/envpool/sokoban/level_loader.cc b/envpool/sokoban/level_loader.cc index 314f4175..9d0a9863 100644 --- a/envpool/sokoban/level_loader.cc +++ b/envpool/sokoban/level_loader.cc @@ -37,8 +37,12 @@ LevelLoader::LevelLoader(const std::filesystem::path& base_path, cur_level_(levels_.begin()), level_file_paths_(0), verbose(verbose) { - for (const auto& entry : std::filesystem::directory_iterator(base_path)) { - level_file_paths_.push_back(entry.path()); + if (std::filesystem::is_regular_file(base_path)) { + level_file_paths_.push_back(base_path); + } else { + for (const auto& entry : std::filesystem::directory_iterator(base_path)) { + level_file_paths_.push_back(entry.path()); + } } cur_file_ = level_file_paths_.begin(); } @@ -49,7 +53,7 @@ static const std::array kPrintLevelKey{ void AddLine(SokobanLevel& level, const std::string& line) { auto start = line.at(0); auto end = line.at(line.size() - 1); - if ((start != '#') || (start != '#')) { + if ((start != '#') || (end != '#')) { std::stringstream msg; msg << "Line '" << line << "' does not start (" << start << ") and end (" << end << ") with '#', as it should." << std::endl; diff --git a/envpool/sokoban/sokoban_astar_test.cc b/envpool/sokoban/sokoban_astar_test.cc new file mode 100644 index 00000000..2b0bafd6 --- /dev/null +++ b/envpool/sokoban/sokoban_astar_test.cc @@ -0,0 +1,144 @@ +// Copyright 2023-2024 FAR AI +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include "envpool/sokoban/sokoban_node.h" + +#define DEBUG_LISTS 0 +#define DEBUG_LIST_LENGTHS_ONLY 0 + +namespace sokoban { +TEST(SokobanAStarTest, Basic) { + std::cout << "STL A* Search implementation\n(C)2001 Justin Heyes-Jones\n"; + + // Create an instance of the search class... + std::AStarSearch astarsearch(1000000); + std::vector verify_steps = {38, 19}; + std::vector verify_search_steps = {63408, 24991}; + + unsigned int search_count = 0; + const unsigned int num_searches = 2; + const std::string level_file = "/app/envpool/sokoban/sample_levels/"; + const int dim_room = 10; + LevelLoader level_loader(level_file, false, 2); + std::mt19937 gen(42); + + while (search_count < num_searches) { + // Create a start state + SokobanLevel level = *level_loader.GetLevel(gen); + + SokobanNode node_start(dim_room, level, false); + SokobanNode node_end(dim_room, level, true); + std::vector>* goals = &node_end.boxes; + node_start.PrintNodeInfo(goals); + astarsearch.SetStartAndGoalStates(node_start, node_end); + + unsigned int search_state; + unsigned int search_steps = 0; + + do { + search_state = astarsearch.SearchStep(); + + search_steps++; + +#if DEBUG_LISTS + + std::cout << "Steps:" << search_steps << "\n"; + + int len = 0; + + std::cout << "Open:\n"; + SokobanNode* p = astarsearch.GetOpenListStart(); + while (p) { + len++; +#if !DEBUG_LIST_LENGTHS_ONLY + ((SokobanNode*)p)->PrintNodeInfo(goals); +#endif + p = astarsearch.GetOpenListNext(); + } + + std::cout << "Open list has " << len << " nodes\n"; + + len = 0; + + std::cout << "Closed:\n"; + p = astarsearch.GetClosedListStart(); + while (p) { + len++; +#if !DEBUG_LIST_LENGTHS_ONLY + p->PrintNodeInfo(goals); +#endif + p = astarsearch.GetClosedListNext(); + } + + std::cout << "Closed list has " << len << " nodes\n"; +#endif + } while (search_state == + std::AStarSearch::SEARCH_STATE_SEARCHING); + + if (search_state == std::AStarSearch::SEARCH_STATE_SUCCEEDED) { + std::cout << "Search found goal state\n"; + + SokobanNode* node = astarsearch.GetSolutionStart(); + + int steps = 0; + + node->PrintNodeInfo(goals); + for (;;) { + node = astarsearch.GetSolutionNext(); + + if (node == nullptr) { + break; + } + std::cout << "Step " << steps << std::endl; + node->PrintNodeInfo(goals); + steps++; + } + std::cout << "Solution steps " << steps << std::endl; + EXPECT_EQ(steps, verify_steps.at(search_count)); + + // Once you're done with the solution you can free the nodes up + astarsearch.FreeSolutionNodes(); + + } else if (search_state == + std::AStarSearch::SEARCH_STATE_FAILED) { + std::cout << "Search terminated. Did not find goal state\n"; + } else if (search_state == + std::AStarSearch::SEARCH_STATE_NOT_INITIALISED) { + std::cout << "SEARCH_STATE_NOT_INITIALISED\n"; + } else if (search_state == + std::AStarSearch::SEARCH_STATE_SEARCHING) { + std::cout << "SEARCH_STATE_SEARCHING\n"; + } else if (search_state == + std::AStarSearch::SEARCH_STATE_OUT_OF_MEMORY) { + std::cout << "SEARCH_STATE_OUT_OF_MEMORY\n"; + } else if (search_state == + std::AStarSearch::SEARCH_STATE_INVALID) { + std::cout << "SEARCH_STATE_INVALID\n"; + } + + // Display the number of loops the search went through + std::cout << "search_steps : " << search_steps << "\n"; + EXPECT_EQ(search_state, + std::AStarSearch::SEARCH_STATE_SUCCEEDED); + EXPECT_EQ(search_steps, verify_search_steps.at(search_count)); + + search_count++; + + astarsearch.EnsureMemoryFreed(); + } +} +} // namespace sokoban diff --git a/envpool/sokoban/sokoban_node.cc b/envpool/sokoban/sokoban_node.cc new file mode 100644 index 00000000..f22065ca --- /dev/null +++ b/envpool/sokoban/sokoban_node.cc @@ -0,0 +1,187 @@ +// Copyright 2023-2024 FAR AI +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "envpool/sokoban/sokoban_node.h" + +#include +#include + +namespace sokoban { + +bool SokobanNode::IsSameState(SokobanNode& rhs) const { + if (player_x != rhs.player_x || player_y != rhs.player_y) { + return false; + } + return boxes == rhs.boxes; +} + +void SokobanNode::PrintNodeInfo(std::vector>* goals) { + std::cout << "Action: " << action_from_parent << std::endl; + for (int y = 0; y < dim_room; y++) { + for (int x = 0; x < dim_room; x++) { + bool is_wall = walls->at(x + y * dim_room); + bool is_player = (x == player_x && y == player_y); + bool is_box = false; + bool is_goal = false; + for (const auto& box : boxes) { + if (box.first == x && box.second == y) { + is_box = true; + break; + } + } + if (goals != nullptr) { + for (const auto& goal : *goals) { + if (goal.first == x && goal.second == y) { + is_goal = true; + break; + } + } + } + if (is_wall) { + std::cout << "#"; + } else if (is_player) { + if (is_goal) { + std::cout << "a"; + } else { + std::cout << "@"; + } + } else if (is_box) { + if (is_goal) { + std::cout << "s"; + } else { + std::cout << "$"; + } + } else if (is_goal) { + std::cout << "."; + } else { + std::cout << " "; + } + } + std::cout << std::endl; + } +} + +std::unique_ptr SokobanNode::GetChildNode(int action_idx) { + int delta_x = kDelta.at(action_idx).at(0); + int delta_y = kDelta.at(action_idx).at(1); + int new_player_x = player_x + delta_x; + int new_player_y = player_y + delta_y; + // check if the move is valid + if (CheckWall(new_player_x, new_player_y)) { + return nullptr; + } + // check if (new_player_x, new_player_y) is a box, if it is not, return a + // new SokobanNode with the new player position + std::vector> new_boxes = boxes; + for (size_t i = 0; i < boxes.size(); i++) { + if (boxes.at(i).first == new_player_x && + boxes.at(i).second == new_player_y) { + int new_box_x = boxes.at(i).first + delta_x; + int new_box_y = boxes.at(i).second + delta_y; + // check if the box can move + if (CheckWall(new_box_x, new_box_y)) { + return nullptr; + } + // check if the box is blocked by another box + for (const auto& orig_box : boxes) { + if (orig_box.first == new_box_x && orig_box.second == new_box_y) { + return nullptr; + } + } + // update the box position + new_boxes.at(i).first = new_box_x; + new_boxes.at(i).second = new_box_y; + if (delta_y != 0) { + std::sort( + new_boxes.begin(), new_boxes.end(), + [](const std::pair& a, const std::pair& b) { + if (a.second != b.second) { + return a.second < b.second; + } + return a.first < b.first; + }); + } + break; + } + } + return std::make_unique(dim_room, new_player_x, new_player_y, + new_boxes, walls, this, action_idx); +} + +bool SokobanNode::CheckWall(int x, int y) const { + if (x < 0 || x >= dim_room || y < 0 || y >= dim_room) { + return true; + } + return walls->at(x + y * dim_room); +} + +size_t SokobanNode::Hash() const { + size_t hash = 0; + hash = (hash * 397) ^ std::hash{}(player_x); + hash = (hash * 397) ^ std::hash{}(player_y); + for (const auto& box : boxes) { + hash = (hash * 397) ^ std::hash{}(box.first); + hash = (hash * 397) ^ std::hash{}(box.second); + } + return hash; +} + +bool SokobanNode::IsGoal(SokobanNode& goal_node) { + for (const auto& box : boxes) { + bool matched = false; + for (const auto& goal_box : goal_node.boxes) { + if (box == goal_box) { + matched = true; + break; + } + } + if (!matched) { + return false; + } + } + return true; +} + +float SokobanNode::GoalDistanceEstimate(SokobanNode& goal_node) { + float h = 0; + for (const auto& box : boxes) { + float min_distance = std::numeric_limits::max(); + for (const auto& goal_box : goal_node.boxes) { + float distance = + abs(box.first - goal_box.first) + abs(box.second - goal_box.second); + min_distance = std::min(min_distance, distance); + } + h += min_distance; + } + return h; +} + +float SokobanNode::GetCost(SokobanNode& successor) { return 1; } + +bool SokobanNode::GetSuccessors(std::AStarSearch* astarsearch, + SokobanNode* parent_node) { + for (size_t i = 0; i < kDelta.size(); i++) { + std::unique_ptr new_node_ptr = GetChildNode(i); + if (new_node_ptr == nullptr) { + continue; + } + if (parent_node != nullptr && new_node_ptr->IsSameState(*parent_node)) { + continue; + } + astarsearch->AddSuccessor(*new_node_ptr); + } + return true; +} + +} // namespace sokoban diff --git a/envpool/sokoban/sokoban_node.h b/envpool/sokoban/sokoban_node.h new file mode 100644 index 00000000..3402c7c1 --- /dev/null +++ b/envpool/sokoban/sokoban_node.h @@ -0,0 +1,122 @@ +// Copyright 2023-2024 FAR AI +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ENVPOOL_SOKOBAN_SOKOBAN_NODE_H_ +#define ENVPOOL_SOKOBAN_SOKOBAN_NODE_H_ + +#include +#include +#include + +#include "envpool/sokoban/level_loader.h" +#include "third_party/astar_stl/astar.h" + +namespace sokoban { + +class SokobanNode { + public: + static constexpr std::array, 4> kDelta = { + {{0, -1}, {0, 1}, {-1, 0}, {1, 0}} // Up, Down, Left, Right + }; + int dim_room{0}; + int player_x{0}, player_y{0}; + std::vector> boxes; + unsigned int total_boxes{0}; + std::shared_ptr> walls; + SokobanNode* parent_node{nullptr}; + int action_from_parent{-1}; // -1 is for when node is root + bool is_goal_node{false}; + + SokobanNode() = default; + + SokobanNode(int dim_room, const SokobanLevel& world, bool is_goal_node) + : dim_room(dim_room), + walls(std::make_shared>(dim_room * dim_room, false)), + is_goal_node(is_goal_node) { + for (int y = 0; y < dim_room; y++) { + for (int x = 0; x < dim_room; x++) { + switch (world.at(x + y * dim_room)) { + case kPlayer: + player_x = x; + player_y = y; + break; + case kBox: + if (!is_goal_node) { + total_boxes++; + boxes.emplace_back(std::make_pair(x, y)); + } + break; + case kTarget: + if (is_goal_node) { + total_boxes++; + boxes.emplace_back(std::make_pair(x, y)); + } + break; + case kBoxOnTarget: + total_boxes++; + boxes.emplace_back(std::make_pair(x, y)); + break; + case kPlayerOnTarget: + player_x = x; + player_y = y; + break; + } + + if (world.at(x + y * dim_room) == kWall) { + walls->at(x + y * dim_room) = true; + } + } + } + assert(total_boxes == boxes.size()); + } + + SokobanNode(int dim_room, int player_x, int player_y, + const std::vector>& boxes, + std::shared_ptr> walls, + SokobanNode* parent_node = nullptr, int action_from_parent = -1) + : dim_room(dim_room), + player_x(player_x), + player_y(player_y), + boxes(boxes), + total_boxes(boxes.size()), + walls(std::move(walls)), + parent_node(parent_node), + action_from_parent(action_from_parent) {} + + void UpdateGoalNode(SokobanNode goal_node) { + assert(is_goal_node); + goal_node.is_goal_node = true; + player_x = goal_node.player_x; + player_y = goal_node.player_y; + parent_node = goal_node.parent_node; + action_from_parent = goal_node.action_from_parent; + } + + [[nodiscard]] bool CheckWall(int x, int y) const; + + std::unique_ptr GetChildNode(int action_idx); + + float GoalDistanceEstimate(SokobanNode& goal_node); + bool IsGoal(SokobanNode& goal_node); + bool GetSuccessors(std::AStarSearch* astarsearch, + SokobanNode* parent_node); + static float GetCost(SokobanNode& successor); + bool IsSameState(SokobanNode& rhs) const; + [[nodiscard]] size_t Hash() const; + + void PrintNodeInfo(std::vector>* goals = nullptr); +}; +} // namespace sokoban + +#endif // ENVPOOL_SOKOBAN_SOKOBAN_NODE_H_ diff --git a/envpool/sokoban/sokoban_py_envpool_test.py b/envpool/sokoban/sokoban_py_envpool_test.py index d5a7a2e4..4bb423f6 100644 --- a/envpool/sokoban/sokoban_py_envpool_test.py +++ b/envpool/sokoban/sokoban_py_envpool_test.py @@ -15,6 +15,8 @@ import glob import re +import subprocess +import tempfile import time import numpy as np @@ -101,57 +103,55 @@ def test_envpool_max_episode_steps(self) -> None: assert not np.any(terminated) assert np.all(truncated) - def test_envpool_load_sequentially(self, capfd) -> None: - levels_dir = "/app/envpool/sokoban/sample_levels" - files = glob.glob(f"{levels_dir}/*.txt") - levels_by_files = [] - for file in files: - with open(file, "r") as f: - text = f.read() - levels = text.split("\n;") - levels = ["\n".join(level.split("\n")[1:]).strip() for level in levels] - levels_by_files.append((file, levels)) - assert len(levels_by_files) > 1 - assert all(len(levels) > 1 for levels in levels_by_files) - total_levels = sum(len(levels) for levels in levels_by_files) - for n_levels_to_load in range(1, total_levels + 1): - env = envpool.make( - "Sokoban-v0", - env_type="gymnasium", - num_envs=1, - batch_size=1, - max_episode_steps=60, - min_episode_steps=60, - levels_dir=levels_dir, - load_sequentially=True, - n_levels_to_load=n_levels_to_load, - verbose=2, - ) - dim_room = env.spec.config.dim_room - obs, _ = env.reset() - assert obs.shape == ( - 1, - 3, - dim_room, - dim_room, - ), f"obs shape: {obs.shape}" - if n_levels_to_load == -1: - n_levels_to_load = total_levels - for _ in range(n_levels_to_load - 1): - env.reset() - out, _ = capfd.readouterr() - files_output = out.split("***")[1:] - for i, file_output in enumerate(files_output): - first_line, out = file_output.strip().split("\n", 1) - result = re.search( - r'Loaded (\d+) levels from "(.*\.txt)"', first_line - ) - n_levels, file_name = int(result.group(1)), result.group(2) - lev1, lev2 = out.strip().split("\n\n") - assert file_name == levels_by_files[i][0] - assert n_levels == len(levels_by_files[i][1]) - assert lev1 == levels_by_files[i][1][0] - assert lev2 == levels_by_files[i][1][1] + def test_envpool_load_sequentially(self, capfd) -> None: + levels_dir = "/app/envpool/sokoban/sample_levels" + files = glob.glob(f"{levels_dir}/*.txt") + levels_by_files = [] + for file in files: + with open(file, "r") as f: + text = f.read() + levels = text.split("\n;") + levels = ["\n".join(level.split("\n")[1:]).strip() for level in levels] + levels_by_files.append((file, levels)) + assert len(levels_by_files) > 1 + assert all(len(levels) > 1 for levels in levels_by_files) + total_levels = sum(len(levels) for levels in levels_by_files) + for n_levels_to_load in range(1, total_levels + 1): + env = envpool.make( + "Sokoban-v0", + env_type="gymnasium", + num_envs=1, + batch_size=1, + max_episode_steps=60, + min_episode_steps=60, + levels_dir=levels_dir, + load_sequentially=True, + n_levels_to_load=n_levels_to_load, + verbose=2, + ) + dim_room = env.spec.config.dim_room + obs, _ = env.reset() + assert obs.shape == ( + 1, + 3, + dim_room, + dim_room, + ), f"obs shape: {obs.shape}" + if n_levels_to_load == -1: + n_levels_to_load = total_levels + for _ in range(n_levels_to_load - 1): + env.reset() + out, _ = capfd.readouterr() + files_output = out.split("***")[1:] + for i, file_output in enumerate(files_output): + first_line, out = file_output.strip().split("\n", 1) + result = re.search(r'Loaded (\d+) levels from "(.*\.txt)"', first_line) + n_levels, file_name = int(result.group(1)), result.group(2) + lev1, lev2 = out.strip().split("\n\n") + assert file_name == levels_by_files[i][0] + assert n_levels == len(levels_by_files[i][1]) + assert lev1 == levels_by_files[i][1][0] + assert lev2 == levels_by_files[i][1][1] def test_xla(self) -> None: num_envs = 10 @@ -169,5 +169,21 @@ def test_xla(self) -> None: handle, recv, send, step = env.xla() +def test_astar_log(self) -> None: + level_file_name = "/app/envpool/sokoban/sample_levels/001.txt" + with tempfile.NamedTemporaryFile() as f: + log_file_name = f.name + subprocess.run( + [ + "bazel", "run", "//envpool/sokoban:astar_log", "--", level_file_name, + log_file_name, 1 + ], + check=True, + ) + with open(log_file_name, "r") as f: + log = f.read() + assert "0, 301333002213130203303031, 24, 40611" == log.split("\n")[1] + + if __name__ == "__main__": pytest.main(["-v", __file__]) diff --git a/third_party/astar_stl/BUILD b/third_party/astar_stl/BUILD new file mode 100644 index 00000000..03de4363 --- /dev/null +++ b/third_party/astar_stl/BUILD @@ -0,0 +1,23 @@ +# Copyright 2023-2024 FAR AI +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +cc_library( + name = "astar_stl_h", + hdrs = [ + "astar.h", + "fsa.h", + ], + visibility = ["//visibility:public"], + deps = [], +) diff --git a/third_party/astar_stl/astar.h b/third_party/astar_stl/astar.h new file mode 100644 index 00000000..61a99202 --- /dev/null +++ b/third_party/astar_stl/astar.h @@ -0,0 +1,747 @@ +/* +A* Algorithm Implementation using STL is +Copyright (C)2001-2005 Justin Heyes-Jones + +Permission is given by the author to freely redistribute and +include this code in any program as long as this credit is +given where due. + + COVERED CODE IS PROVIDED UNDER THIS LICENSE ON AN "AS IS" BASIS, + WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, + INCLUDING, WITHOUT LIMITATION, WARRANTIES THAT THE COVERED CODE + IS FREE OF DEFECTS, MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE + OR NON-INFRINGING. THE ENTIRE RISK AS TO THE QUALITY AND + PERFORMANCE OF THE COVERED CODE IS WITH YOU. SHOULD ANY COVERED + CODE PROVE DEFECTIVE IN ANY RESPECT, YOU (NOT THE INITIAL + DEVELOPER OR ANY OTHER CONTRIBUTOR) ASSUME THE COST OF ANY + NECESSARY SERVICING, REPAIR OR CORRECTION. THIS DISCLAIMER OF + WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS LICENSE. NO USE + OF ANY COVERED CODE IS AUTHORIZED HEREUNDER EXCEPT UNDER + THIS DISCLAIMER. + + Use at your own risk! + +*/ + +#ifndef STLASTAR_H +#define STLASTAR_H +// used for text debugging +#include + +#include +// #include +#include + +// stl includes +#include +#include +#include +#include + +// fast fixed size memory allocator, used for fast node memory management +#include "fsa.h" + +// Fixed size memory allocator can be disabled to compare performance +// Uses std new and delete instead if you turn it off +#define USE_FSA_MEMORY 1 + +// disable warning that debugging information has lines that are truncated +// occurs in stl headers +#if defined(WIN32) && defined(_WINDOWS) +#pragma warning(disable : 4786) +#endif + +namespace std { + +template +class AStarState; + +// The AStar search class. UserState is the users state space type +template +class AStarSearch { + public: // data + enum { + SEARCH_STATE_NOT_INITIALISED, + SEARCH_STATE_SEARCHING, + SEARCH_STATE_SUCCEEDED, + SEARCH_STATE_FAILED, + SEARCH_STATE_OUT_OF_MEMORY, + SEARCH_STATE_INVALID + }; + + // A node represents a possible state in the search + // The user provided state type is included inside this type + + public: + class Node { + public: + Node* parent; // used during the search to record the parent of successor + // nodes + Node* child; // used after the search for the application to view the + // search in reverse + + float g; // cost of this node + its predecessors + float h; // heuristic estimate of distance to goal + float f; // sum of cumulative cost of predecessors and self and heuristic + + Node() : parent(0), child(0), g(0.0f), h(0.0f), f(0.0f) {} + + bool operator==(const Node& otherNode) const { + return this->m_UserState.IsSameState(otherNode->m_UserState); + } + + UserState m_UserState; + }; + + // For sorting the heap the STL needs compare function that lets us compare + // the f value of two nodes + + class HeapCompare_f { + public: + bool operator()(const Node* x, const Node* y) const { return x->f > y->f; } + }; + + public: // methods + // constructor just initialises private data + AStarSearch() + : m_State(SEARCH_STATE_NOT_INITIALISED), + m_CurrentSolutionNode(NULL), +#if USE_FSA_MEMORY + m_FixedSizeAllocator(1000), +#endif + m_AllocateNodeCount(0), + m_CancelRequest(false) { + } + + AStarSearch(int MaxNodes) + : m_State(SEARCH_STATE_NOT_INITIALISED), + m_CurrentSolutionNode(NULL), +#if USE_FSA_MEMORY + m_FixedSizeAllocator(MaxNodes), +#endif + m_AllocateNodeCount(0), + m_CancelRequest(false) { + } + + // call at any time to cancel the search and free up all the memory + void CancelSearch() { m_CancelRequest = true; } + + // Set Start and goal states + void SetStartAndGoalStates(UserState& Start, UserState& Goal) { + m_CancelRequest = false; + + m_Start = AllocateNode(); + m_Goal = AllocateNode(); + + assert((m_Start != NULL && m_Goal != NULL)); + + m_Start->m_UserState = Start; + m_Goal->m_UserState = Goal; + + m_State = SEARCH_STATE_SEARCHING; + + // Initialise the AStar specific parts of the Start Node + // The user only needs fill out the state information + + m_Start->g = 0; + m_Start->h = m_Start->m_UserState.GoalDistanceEstimate(m_Goal->m_UserState); + m_Start->f = m_Start->g + m_Start->h; + m_Start->parent = 0; + + // Push the start node on the Open list + + m_OpenList.push_back(m_Start); // heap now unsorted + + // Sort back element into heap + push_heap(m_OpenList.begin(), m_OpenList.end(), HeapCompare_f()); + + // Initialise counter for search steps + m_Steps = 0; + } + + // Advances search one step + unsigned int SearchStep() { + // Firstly break if the user has not initialised the search + assert((m_State > SEARCH_STATE_NOT_INITIALISED) && + (m_State < SEARCH_STATE_INVALID)); + + // Next I want it to be safe to do a searchstep once the search has + // succeeded... + if ((m_State == SEARCH_STATE_SUCCEEDED) || + (m_State == SEARCH_STATE_FAILED)) { + return m_State; + } + + // Failure is defined as emptying the open list as there is nothing left to + // search... + // New: Allow user abort + if (m_OpenList.empty() || m_CancelRequest) { + FreeAllNodes(); + m_State = SEARCH_STATE_FAILED; + return m_State; + } + + // Incremement step count + m_Steps++; + + // Pop the best node (the one with the lowest f) + Node* n = m_OpenList.front(); // get pointer to the node + pop_heap(m_OpenList.begin(), m_OpenList.end(), HeapCompare_f()); + m_OpenList.pop_back(); + + // Check for the goal, once we pop that we're done + if (n->m_UserState.IsGoal(m_Goal->m_UserState)) { + // The user is going to use the Goal Node he passed in + // so copy the parent pointer of n + m_Goal->parent = n->parent; + m_Goal->g = n->g; + m_Goal->m_UserState.UpdateGoalNode(n->m_UserState); + + // A special case is that the goal was passed in as the start state + // so handle that here + if (false == n->m_UserState.IsSameState(m_Start->m_UserState)) { + FreeNode(n); + + // set the child pointers in each node (except Goal which has no child) + Node* nodeChild = m_Goal; + Node* nodeParent = m_Goal->parent; + + do { + nodeParent->child = nodeChild; + + nodeChild = nodeParent; + nodeParent = nodeParent->parent; + + } while (nodeChild != + m_Start); // Start is always the first node by definition + } + + // delete nodes that aren't needed for the solution + FreeUnusedNodes(); + + m_State = SEARCH_STATE_SUCCEEDED; + + return m_State; + } else // not goal + { + // We now need to generate the successors of this node + // The user helps us to do this, and we keep the new nodes in + // m_Successors ... + + m_Successors.clear(); // empty vector of successor nodes to n + + // User provides this functions and uses AddSuccessor to add each + // successor of node 'n' to m_Successors + bool ret = n->m_UserState.GetSuccessors( + this, n->parent ? &n->parent->m_UserState : NULL); + + if (!ret) { + typename vector::iterator successor; + + // free the nodes that may previously have been added + for (successor = m_Successors.begin(); successor != m_Successors.end(); + successor++) { + FreeNode((*successor)); + } + + m_Successors.clear(); // empty vector of successor nodes to n + + // free up everything else we allocated + FreeNode((n)); + FreeAllNodes(); + + m_State = SEARCH_STATE_OUT_OF_MEMORY; + return m_State; + } + + // Now handle each successor to the current node ... + for (typename vector::iterator successor = m_Successors.begin(); + successor != m_Successors.end(); successor++) { + // The g value for this successor ... + float newg = n->g + n->m_UserState.GetCost((*successor)->m_UserState); + + // Now we need to find whether the node is on the open or closed lists + // If it is but the node that is already on them is better (lower g) + // then we can forget about this successor + + // First linear search of open list to find node + + typename vector::iterator openlist_result; + + for (openlist_result = m_OpenList.begin(); + openlist_result != m_OpenList.end(); openlist_result++) { + if ((*openlist_result) + ->m_UserState.IsSameState((*successor)->m_UserState)) { + break; + } + } + + if (openlist_result != m_OpenList.end()) { + // we found this state on open + + if ((*openlist_result)->g <= newg) { + FreeNode((*successor)); + + // the one on Open is cheaper than this one + continue; + } + } + typename unordered_set::iterator + closedlist_result; + + closedlist_result = m_ClosedList.find(*successor); + + if (closedlist_result != m_ClosedList.end()) { + // we found this state on closed + + if ((*closedlist_result)->g <= newg) { + // the one on Closed is cheaper than this one + FreeNode((*successor)); + + continue; + } + } + + // This node is the best node so far with this particular state + // so lets keep it and set up its AStar specific data ... + + (*successor)->parent = n; + (*successor)->g = newg; + (*successor)->h = + (*successor)->m_UserState.GoalDistanceEstimate(m_Goal->m_UserState); + (*successor)->f = (*successor)->g + (*successor)->h; + + // Successor in closed list + // 1 - Update old version of this node in closed list + // 2 - Move it from closed to open list + // 3 - Sort heap again in open list + + if (closedlist_result != m_ClosedList.end()) { + // Update closed node with successor node AStar data + //*(*closedlist_result) = *(*successor); + (*closedlist_result)->parent = (*successor)->parent; + (*closedlist_result)->g = (*successor)->g; + (*closedlist_result)->h = (*successor)->h; + (*closedlist_result)->f = (*successor)->f; + + // Free successor node + FreeNode((*successor)); + + // Push closed node into open list + m_OpenList.push_back((*closedlist_result)); + + // Remove closed node from closed list + m_ClosedList.erase(closedlist_result); + + // Sort back element into heap + push_heap(m_OpenList.begin(), m_OpenList.end(), HeapCompare_f()); + + // Fix thanks to ... + // Greg Douglas + // who noticed that this code path was incorrect + // Here we have found a new state which is already CLOSED + + } + + // Successor in open list + // 1 - Update old version of this node in open list + // 2 - sort heap again in open list + + else if (openlist_result != m_OpenList.end()) { + // Update open node with successor node AStar data + //*(*openlist_result) = *(*successor); + (*openlist_result)->parent = (*successor)->parent; + (*openlist_result)->g = (*successor)->g; + (*openlist_result)->h = (*successor)->h; + (*openlist_result)->f = (*successor)->f; + + // Free successor node + FreeNode((*successor)); + + // re-make the heap + // make_heap rather than sort_heap is an essential bug fix + // thanks to Mike Ryynanen for pointing this out and then explaining + // it in detail. sort_heap called on an invalid heap does not work + make_heap(m_OpenList.begin(), m_OpenList.end(), HeapCompare_f()); + } + + // New successor + // 1 - Move it from successors to open list + // 2 - sort heap again in open list + + else { + // Push successor node into open list + m_OpenList.push_back((*successor)); + + // Sort back element into heap + push_heap(m_OpenList.begin(), m_OpenList.end(), HeapCompare_f()); + } + } + + // push n onto Closed, as we have expanded it now + + m_ClosedList.insert(n); + + } // end else (not goal so expand) + + return m_State; // Succeeded bool is false at this point. + } + + // User calls this to add a successor to a list of successors + // when expanding the search frontier + bool AddSuccessor(UserState& State) { + Node* node = AllocateNode(); + + if (node) { + node->m_UserState = State; + + m_Successors.push_back(node); + + return true; + } + + return false; + } + + // Free the solution nodes + // This is done to clean up all used Node memory when you are done with the + // search + void FreeSolutionNodes() { + Node* n = m_Start; + + if (m_Start->child) { + do { + Node* del = n; + n = n->child; + FreeNode(del); + + del = NULL; + + } while (n != m_Goal); + + FreeNode(n); // Delete the goal + + } else { + // if the start node is the solution we need to just delete the start and + // goal nodes + FreeNode(m_Start); + FreeNode(m_Goal); + } + } + + // Functions for traversing the solution + + // Get start node + UserState* GetSolutionStart() { + m_CurrentSolutionNode = m_Start; + if (m_Start) { + return &m_Start->m_UserState; + } else { + return NULL; + } + } + + // Get next node + UserState* GetSolutionNext() { + if (m_CurrentSolutionNode) { + if (m_CurrentSolutionNode->child) { + Node* child = m_CurrentSolutionNode->child; + + m_CurrentSolutionNode = m_CurrentSolutionNode->child; + + return &child->m_UserState; + } + } + + return NULL; + } + + // Get end node + UserState* GetSolutionEnd() { + m_CurrentSolutionNode = m_Goal; + if (m_Goal) { + return &m_Goal->m_UserState; + } else { + return NULL; + } + } + + // Step solution iterator backwards + UserState* GetSolutionPrev() { + if (m_CurrentSolutionNode) { + if (m_CurrentSolutionNode->parent) { + Node* parent = m_CurrentSolutionNode->parent; + + m_CurrentSolutionNode = m_CurrentSolutionNode->parent; + + return &parent->m_UserState; + } + } + + return NULL; + } + + // Get final cost of solution + // Returns FLT_MAX if goal is not defined or there is no solution + float GetSolutionCost() { + if (m_Goal && m_State == SEARCH_STATE_SUCCEEDED) { + return m_Goal->g; + } else { + return FLT_MAX; + } + } + + // For educational use and debugging it is useful to be able to view + // the open and closed list at each step, here are two functions to allow + // that. + + UserState* GetOpenListStart() { + float f, g, h; + return GetOpenListStart(f, g, h); + } + + UserState* GetOpenListStart(float& f, float& g, float& h) { + iterDbgOpen = m_OpenList.begin(); + if (iterDbgOpen != m_OpenList.end()) { + f = (*iterDbgOpen)->f; + g = (*iterDbgOpen)->g; + h = (*iterDbgOpen)->h; + return &(*iterDbgOpen)->m_UserState; + } + + return NULL; + } + + UserState* GetOpenListNext() { + float f, g, h; + return GetOpenListNext(f, g, h); + } + + UserState* GetOpenListNext(float& f, float& g, float& h) { + iterDbgOpen++; + if (iterDbgOpen != m_OpenList.end()) { + f = (*iterDbgOpen)->f; + g = (*iterDbgOpen)->g; + h = (*iterDbgOpen)->h; + return &(*iterDbgOpen)->m_UserState; + } + + return NULL; + } + + UserState* GetClosedListStart() { + float f, g, h; + return GetClosedListStart(f, g, h); + } + + UserState* GetClosedListStart(float& f, float& g, float& h) { + iterDbgClosed = m_ClosedList.begin(); + if (iterDbgClosed != m_ClosedList.end()) { + f = (*iterDbgClosed)->f; + g = (*iterDbgClosed)->g; + h = (*iterDbgClosed)->h; + + return &(*iterDbgClosed)->m_UserState; + } + + return NULL; + } + + UserState* GetClosedListNext() { + float f, g, h; + return GetClosedListNext(f, g, h); + } + + UserState* GetClosedListNext(float& f, float& g, float& h) { + iterDbgClosed++; + if (iterDbgClosed != m_ClosedList.end()) { + f = (*iterDbgClosed)->f; + g = (*iterDbgClosed)->g; + h = (*iterDbgClosed)->h; + + return &(*iterDbgClosed)->m_UserState; + } + + return NULL; + } + + // Get the number of steps + + int GetStepCount() { return m_Steps; } + + void EnsureMemoryFreed() { +#if USE_FSA_MEMORY + assert(m_AllocateNodeCount == 0); +#endif + } + + private: // methods + // This is called when a search fails or is cancelled to free all used + // memory + void FreeAllNodes() { + // iterate open list and delete all nodes + typename vector::iterator iterOpen = m_OpenList.begin(); + + while (iterOpen != m_OpenList.end()) { + Node* n = (*iterOpen); + FreeNode(n); + + iterOpen++; + } + + m_OpenList.clear(); + + // iterate closed list and delete unused nodes + typename unordered_set::iterator iterClosed; + + for (iterClosed = m_ClosedList.begin(); iterClosed != m_ClosedList.end(); + iterClosed++) { + Node* n = (*iterClosed); + FreeNode(n); + } + + m_ClosedList.clear(); + + // delete the goal + + FreeNode(m_Goal); + } + + // This call is made by the search class when the search ends. A lot of nodes + // may be created that are still present when the search ends. They will be + // deleted by this routine once the search ends + void FreeUnusedNodes() { + // iterate open list and delete unused nodes + typename vector::iterator iterOpen = m_OpenList.begin(); + + while (iterOpen != m_OpenList.end()) { + Node* n = (*iterOpen); + + if (!n->child) { + FreeNode(n); + + n = NULL; + } + + iterOpen++; + } + + m_OpenList.clear(); + + // iterate closed list and delete unused nodes + typename unordered_set::iterator iterClosed; + + for (iterClosed = m_ClosedList.begin(); iterClosed != m_ClosedList.end(); + iterClosed++) { + Node* n = (*iterClosed); + + if (!n->child) { + FreeNode(n); + n = NULL; + } + } + + m_ClosedList.clear(); + } + + // Node memory management + Node* AllocateNode() { +#if !USE_FSA_MEMORY + m_AllocateNodeCount++; + Node* p = new Node; + return p; +#else + Node* address = m_FixedSizeAllocator.alloc(); + + if (!address) { + return NULL; + } + m_AllocateNodeCount++; + Node* p = new (address) Node; + return p; +#endif + } + + void FreeNode(Node* node) { + m_AllocateNodeCount--; + +#if !USE_FSA_MEMORY + delete node; +#else + node->~Node(); + m_FixedSizeAllocator.free(node); +#endif + } + + private: // data + // Heap (simple vector but used as a heap, cf. Steve Rabin's game gems + // article) + vector m_OpenList; + + // Closed is an unordered_set + struct NodeHash { + size_t operator()(Node* const& n) const { return n->m_UserState.Hash(); } + }; + struct NodeEqual { + bool operator()(Node* a, Node* b) const { + return a->m_UserState.IsSameState(b->m_UserState); + } + }; + unordered_set m_ClosedList; + + // Successors is a vector filled out by the user each type successors to a + // node are generated + vector m_Successors; + + // State + unsigned int m_State; + + // Counts steps + int m_Steps; + + // Start and goal state pointers + Node* m_Start; + Node* m_Goal; + + Node* m_CurrentSolutionNode; + +#if USE_FSA_MEMORY + // Memory + FixedSizeAllocator m_FixedSizeAllocator; +#endif + + // Debug : need to keep these two iterators around + // for the user Dbg functions + typename vector::iterator iterDbgOpen; + typename vector::iterator iterDbgClosed; + + // debugging : count memory allocation and free's + int m_AllocateNodeCount; + + bool m_CancelRequest; +}; + +template +class AStarState { + public: + virtual ~AStarState() {} + virtual float GoalDistanceEstimate( + T& nodeGoal) = 0; // Heuristic function which computes the estimated cost + // to the goal node + virtual bool IsGoal( + T& nodeGoal) = 0; // Returns true if this node is the goal node + virtual bool GetSuccessors( + AStarSearch* astarsearch, + T* parent_node) = 0; // Retrieves all successors to this node and adds + // them via astarsearch.addSuccessor() + virtual float GetCost( + T& successor) = 0; // Computes the cost of travelling from this node to + // the successor node + virtual bool IsSameState( + T& rhs) = 0; // Returns true if this node is the same as the rhs node + virtual size_t Hash() = 0; // Returns a hash for the state +}; + +} // namespace std + +#endif diff --git a/third_party/astar_stl/fsa.h b/third_party/astar_stl/fsa.h new file mode 100644 index 00000000..5803f88e --- /dev/null +++ b/third_party/astar_stl/fsa.h @@ -0,0 +1,211 @@ +/* + +A* Algorithm Implementation using STL is +Copyright (C)2001-2005 Justin Heyes-Jones + +Permission is given by the author to freely redistribute and +include this code in any program as long as this credit is +given where due. + + COVERED CODE IS PROVIDED UNDER THIS LICENSE ON AN "AS IS" BASIS, + WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, + INCLUDING, WITHOUT LIMITATION, WARRANTIES THAT THE COVERED CODE + IS FREE OF DEFECTS, MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE + OR NON-INFRINGING. THE ENTIRE RISK AS TO THE QUALITY AND + PERFORMANCE OF THE COVERED CODE IS WITH YOU. SHOULD ANY COVERED + CODE PROVE DEFECTIVE IN ANY RESPECT, YOU (NOT THE INITIAL + DEVELOPER OR ANY OTHER CONTRIBUTOR) ASSUME THE COST OF ANY + NECESSARY SERVICING, REPAIR OR CORRECTION. THIS DISCLAIMER OF + WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS LICENSE. NO USE + OF ANY COVERED CODE IS AUTHORIZED HEREUNDER EXCEPT UNDER + THIS DISCLAIMER. + + Use at your own risk! + + + + FixedSizeAllocator class + Copyright 2001 Justin Heyes-Jones + + This class is a constant time O(1) memory manager for objects of + a specified type. The type is specified using a template class. + + Memory is allocated from a fixed size buffer which you can specify in the + class constructor or use the default. + + Using GetFirst and GetNext it is possible to iterate through the elements + one by one, and this would be the most common use for the class. + + I would suggest using this class when you want O(1) add and delete + and you don't do much searching, which would be O(n). Structures such as +binary trees can be used instead to get O(logn) access time. + +*/ + +#ifndef FSA_H +#define FSA_H + +#include +#include + +template +class FixedSizeAllocator { + public: + // Constants + enum { FSA_DEFAULT_SIZE = 100 }; + + // This class enables us to transparently manage the extra data + // needed to enable the user class to form part of the double-linked + // list class + struct FSA_ELEMENT { + USER_TYPE UserType; + + FSA_ELEMENT* pPrev; + FSA_ELEMENT* pNext; + }; + + public: // methods + FixedSizeAllocator(unsigned int MaxElements = FSA_DEFAULT_SIZE) + : m_pFirstUsed(NULL), m_MaxElements(MaxElements) { + // Allocate enough memory for the maximum number of elements + + char* pMem = new char[m_MaxElements * sizeof(FSA_ELEMENT)]; + + m_pMemory = (FSA_ELEMENT*)pMem; + + // Set the free list first pointer + m_pFirstFree = m_pMemory; + + // Clear the memory + memset(m_pMemory, 0, sizeof(FSA_ELEMENT) * m_MaxElements); + + // Point at first element + FSA_ELEMENT* pElement = m_pFirstFree; + + // Set the double linked free list + for (unsigned int i = 0; i < m_MaxElements; i++) { + pElement->pPrev = pElement - 1; + pElement->pNext = pElement + 1; + + pElement++; + } + + // first element should have a null prev + m_pFirstFree->pPrev = NULL; + // last element should have a null next + (pElement - 1)->pNext = NULL; + } + + ~FixedSizeAllocator() { + // Free up the memory + delete[] (char*)m_pMemory; + } + + // Allocate a new USER_TYPE and return a pointer to it + USER_TYPE* alloc() { + FSA_ELEMENT* pNewNode = NULL; + + if (!m_pFirstFree) { + return NULL; + } else { + pNewNode = m_pFirstFree; + m_pFirstFree = pNewNode->pNext; + + // if the new node points to another free node then + // change that nodes prev free pointer... + if (pNewNode->pNext) { + pNewNode->pNext->pPrev = NULL; + } + + // node is now on the used list + + pNewNode->pPrev = NULL; // the allocated node is always first in the list + + if (m_pFirstUsed == NULL) { + pNewNode->pNext = NULL; // no other nodes + } else { + m_pFirstUsed->pPrev = + pNewNode; // insert this at the head of the used list + pNewNode->pNext = m_pFirstUsed; + } + + m_pFirstUsed = pNewNode; + } + + return reinterpret_cast(pNewNode); + } + + // Free the given user type + // For efficiency I don't check whether the user_data is a valid + // pointer that was allocated. I may add some debug only checking + // (To add the debug check you'd need to make sure the pointer is in + // the m_pMemory area and is pointing at the start of a node) + void free(USER_TYPE* user_data) { + FSA_ELEMENT* pNode = reinterpret_cast(user_data); + + // manage used list, remove this node from it + if (pNode->pPrev) { + pNode->pPrev->pNext = pNode->pNext; + } else { + // this handles the case that we delete the first node in the used list + m_pFirstUsed = pNode->pNext; + } + + if (pNode->pNext) { + pNode->pNext->pPrev = pNode->pPrev; + } + + // add to free list + if (m_pFirstFree == NULL) { + // free list was empty + m_pFirstFree = pNode; + pNode->pPrev = NULL; + pNode->pNext = NULL; + } else { + // Add this node at the start of the free list + m_pFirstFree->pPrev = pNode; + pNode->pNext = m_pFirstFree; + m_pFirstFree = pNode; + } + } + + // For debugging this displays both lists (using the prev/next list pointers) + void Debug() { + printf("free list "); + + FSA_ELEMENT* p = m_pFirstFree; + while (p) { + printf("%x!%x ", p->pPrev, p->pNext); + p = p->pNext; + } + printf("\n"); + + printf("used list "); + + p = m_pFirstUsed; + while (p) { + printf("%x!%x ", p->pPrev, p->pNext); + p = p->pNext; + } + printf("\n"); + } + + // Iterators + + USER_TYPE* GetFirst() { return reinterpret_cast(m_pFirstUsed); } + + USER_TYPE* GetNext(USER_TYPE* node) { + return reinterpret_cast( + (reinterpret_cast(node))->pNext); + } + + public: // data + private: // methods + private: // data + FSA_ELEMENT* m_pFirstFree; + FSA_ELEMENT* m_pFirstUsed; + unsigned int m_MaxElements; + FSA_ELEMENT* m_pMemory; +}; + +#endif // defined FSA_H \ No newline at end of file From 4f2db77b1741e35b80bdeac0488f26c979843387 Mon Sep 17 00:00:00 2001 From: Mohammad Taufeeque <9taufeeque9@gmail.com> Date: Tue, 19 Mar 2024 02:34:32 +0530 Subject: [PATCH 33/60] only add files in the levels_dir (#6) Fixes the issue of having any directory (e.g: logs directory) within the levels directory: https://github.com/AlignmentResearch/learned-planners/issues/54 --- envpool/sokoban/BUILD | 15 --- envpool/sokoban/level_loader.cc | 4 +- envpool/sokoban/sokoban_astar_test.cc | 144 -------------------------- 3 files changed, 3 insertions(+), 160 deletions(-) delete mode 100644 envpool/sokoban/sokoban_astar_test.cc diff --git a/envpool/sokoban/BUILD b/envpool/sokoban/BUILD index 8938ad53..1d739971 100644 --- a/envpool/sokoban/BUILD +++ b/envpool/sokoban/BUILD @@ -68,21 +68,6 @@ cc_binary( ], ) -cc_test( - name = "sokoban_astar_test", - size = "enormous", - srcs = [ - "level_loader.cc", - "sokoban_astar_test.cc", - "sokoban_node.cc", - ], - deps = [ - ":sokoban_node_h", - "@com_github_google_glog//:glog", - "@com_google_googletest//:gtest_main", - ], -) - py_test( name = "test", srcs = ["sokoban_py_envpool_test.py"], diff --git a/envpool/sokoban/level_loader.cc b/envpool/sokoban/level_loader.cc index 9d0a9863..2f7952f8 100644 --- a/envpool/sokoban/level_loader.cc +++ b/envpool/sokoban/level_loader.cc @@ -41,7 +41,9 @@ LevelLoader::LevelLoader(const std::filesystem::path& base_path, level_file_paths_.push_back(base_path); } else { for (const auto& entry : std::filesystem::directory_iterator(base_path)) { - level_file_paths_.push_back(entry.path()); + if (entry.is_regular_file()) { + level_file_paths_.push_back(entry.path()); + } } } cur_file_ = level_file_paths_.begin(); diff --git a/envpool/sokoban/sokoban_astar_test.cc b/envpool/sokoban/sokoban_astar_test.cc deleted file mode 100644 index 2b0bafd6..00000000 --- a/envpool/sokoban/sokoban_astar_test.cc +++ /dev/null @@ -1,144 +0,0 @@ -// Copyright 2023-2024 FAR AI -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include - -#include "envpool/sokoban/sokoban_node.h" - -#define DEBUG_LISTS 0 -#define DEBUG_LIST_LENGTHS_ONLY 0 - -namespace sokoban { -TEST(SokobanAStarTest, Basic) { - std::cout << "STL A* Search implementation\n(C)2001 Justin Heyes-Jones\n"; - - // Create an instance of the search class... - std::AStarSearch astarsearch(1000000); - std::vector verify_steps = {38, 19}; - std::vector verify_search_steps = {63408, 24991}; - - unsigned int search_count = 0; - const unsigned int num_searches = 2; - const std::string level_file = "/app/envpool/sokoban/sample_levels/"; - const int dim_room = 10; - LevelLoader level_loader(level_file, false, 2); - std::mt19937 gen(42); - - while (search_count < num_searches) { - // Create a start state - SokobanLevel level = *level_loader.GetLevel(gen); - - SokobanNode node_start(dim_room, level, false); - SokobanNode node_end(dim_room, level, true); - std::vector>* goals = &node_end.boxes; - node_start.PrintNodeInfo(goals); - astarsearch.SetStartAndGoalStates(node_start, node_end); - - unsigned int search_state; - unsigned int search_steps = 0; - - do { - search_state = astarsearch.SearchStep(); - - search_steps++; - -#if DEBUG_LISTS - - std::cout << "Steps:" << search_steps << "\n"; - - int len = 0; - - std::cout << "Open:\n"; - SokobanNode* p = astarsearch.GetOpenListStart(); - while (p) { - len++; -#if !DEBUG_LIST_LENGTHS_ONLY - ((SokobanNode*)p)->PrintNodeInfo(goals); -#endif - p = astarsearch.GetOpenListNext(); - } - - std::cout << "Open list has " << len << " nodes\n"; - - len = 0; - - std::cout << "Closed:\n"; - p = astarsearch.GetClosedListStart(); - while (p) { - len++; -#if !DEBUG_LIST_LENGTHS_ONLY - p->PrintNodeInfo(goals); -#endif - p = astarsearch.GetClosedListNext(); - } - - std::cout << "Closed list has " << len << " nodes\n"; -#endif - } while (search_state == - std::AStarSearch::SEARCH_STATE_SEARCHING); - - if (search_state == std::AStarSearch::SEARCH_STATE_SUCCEEDED) { - std::cout << "Search found goal state\n"; - - SokobanNode* node = astarsearch.GetSolutionStart(); - - int steps = 0; - - node->PrintNodeInfo(goals); - for (;;) { - node = astarsearch.GetSolutionNext(); - - if (node == nullptr) { - break; - } - std::cout << "Step " << steps << std::endl; - node->PrintNodeInfo(goals); - steps++; - } - std::cout << "Solution steps " << steps << std::endl; - EXPECT_EQ(steps, verify_steps.at(search_count)); - - // Once you're done with the solution you can free the nodes up - astarsearch.FreeSolutionNodes(); - - } else if (search_state == - std::AStarSearch::SEARCH_STATE_FAILED) { - std::cout << "Search terminated. Did not find goal state\n"; - } else if (search_state == - std::AStarSearch::SEARCH_STATE_NOT_INITIALISED) { - std::cout << "SEARCH_STATE_NOT_INITIALISED\n"; - } else if (search_state == - std::AStarSearch::SEARCH_STATE_SEARCHING) { - std::cout << "SEARCH_STATE_SEARCHING\n"; - } else if (search_state == - std::AStarSearch::SEARCH_STATE_OUT_OF_MEMORY) { - std::cout << "SEARCH_STATE_OUT_OF_MEMORY\n"; - } else if (search_state == - std::AStarSearch::SEARCH_STATE_INVALID) { - std::cout << "SEARCH_STATE_INVALID\n"; - } - - // Display the number of loops the search went through - std::cout << "search_steps : " << search_steps << "\n"; - EXPECT_EQ(search_state, - std::AStarSearch::SEARCH_STATE_SUCCEEDED); - EXPECT_EQ(search_steps, verify_search_steps.at(search_count)); - - search_count++; - - astarsearch.EnsureMemoryFreed(); - } -} -} // namespace sokoban From e3e2e7ea9d8261c613a879ef470e0f4d4aefaae4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= Date: Fri, 22 Mar 2024 11:40:30 -0700 Subject: [PATCH 34/60] Upgrade pytorch and cuda --- .circleci/config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 9d5399e2..2acb62b2 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -9,7 +9,7 @@ parameters: docker_img_version: # Docker image version for running tests. type: string - default: "8f41d1e-envpool-ci" + default: "8d8cf1a-envpool-ci" workflows: test-jobs: From ed640fdfc391e529681c70f21d162409bcfaffdf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= Date: Fri, 22 Mar 2024 17:00:11 -0700 Subject: [PATCH 35/60] Clang-format changed --- envpool/classic_control/pendulum.h | 11 ++++++----- envpool/core/env_spec.h | 4 ++-- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/envpool/classic_control/pendulum.h b/envpool/classic_control/pendulum.h index 85e91c1a..f2a594ad 100644 --- a/envpool/classic_control/pendulum.h +++ b/envpool/classic_control/pendulum.h @@ -77,8 +77,9 @@ class PendulumEnv : public Env { void Step(const Action& action) override { done_ = (++elapsed_step_ >= max_episode_steps_); float act = action["action"_]; - double u = - act < -kMaxTorque ? -kMaxTorque : act > kMaxTorque ? kMaxTorque : act; + double u = act < -kMaxTorque ? -kMaxTorque + : act > kMaxTorque ? kMaxTorque + : act; double cost = theta_ * theta_ + 0.1 * theta_dot_ * theta_dot_ + 0.001 * u * u; double new_theta_dot = @@ -86,9 +87,9 @@ class PendulumEnv : public Env { if (version_ == 0) { theta_ += new_theta_dot * kDt; } - theta_dot_ = new_theta_dot < -kMaxSpeed - ? -kMaxSpeed - : new_theta_dot > kMaxSpeed ? kMaxSpeed : new_theta_dot; + theta_dot_ = new_theta_dot < -kMaxSpeed ? -kMaxSpeed + : new_theta_dot > kMaxSpeed ? kMaxSpeed + : new_theta_dot; if (version_ == 1) { theta_ += new_theta_dot * kDt; } diff --git a/envpool/core/env_spec.h b/envpool/core/env_spec.h index c3cc7f69..f59e1fb2 100644 --- a/envpool/core/env_spec.h +++ b/envpool/core/env_spec.h @@ -52,8 +52,8 @@ class EnvSpec { using Config = decltype(ConcatDict(common_config, EnvFns::DefaultConfig())); using ConfigKeys = typename Config::Keys; using ConfigValues = typename Config::Values; - using StateSpec = decltype( - ConcatDict(common_state_spec, EnvFns::StateSpec(std::declval()))); + using StateSpec = decltype(ConcatDict( + common_state_spec, EnvFns::StateSpec(std::declval()))); using ActionSpec = decltype(ConcatDict( common_action_spec, EnvFns::ActionSpec(std::declval()))); using StateKeys = typename StateSpec::Keys; From 918655157cba9cf22b3e4fd2766e95a8d33bfe2f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= Date: Fri, 22 Mar 2024 17:15:54 -0700 Subject: [PATCH 36/60] Make fixed initializers the default ones --- envpool/sokoban/level_loader.cc | 3 --- envpool/sokoban/level_loader.h | 6 +++--- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/envpool/sokoban/level_loader.cc b/envpool/sokoban/level_loader.cc index 2f7952f8..0689cfa2 100644 --- a/envpool/sokoban/level_loader.cc +++ b/envpool/sokoban/level_loader.cc @@ -32,10 +32,7 @@ LevelLoader::LevelLoader(const std::filesystem::path& base_path, int verbose) : load_sequentially_(load_sequentially), n_levels_to_load_(n_levels_to_load), - levels_loaded_(0), - levels_(0), cur_level_(levels_.begin()), - level_file_paths_(0), verbose(verbose) { if (std::filesystem::is_regular_file(base_path)) { level_file_paths_.push_back(base_path); diff --git a/envpool/sokoban/level_loader.h b/envpool/sokoban/level_loader.h index 9879d8df..ced5e60a 100644 --- a/envpool/sokoban/level_loader.h +++ b/envpool/sokoban/level_loader.h @@ -38,10 +38,10 @@ class LevelLoader { protected: bool load_sequentially_; int n_levels_to_load_; - int levels_loaded_; - std::vector levels_; + int levels_loaded_{0}; + std::vector levels_{0}; std::vector::iterator cur_level_; - std::vector level_file_paths_; + std::vector level_file_paths_{0}; std::vector::iterator cur_file_; void LoadFile(std::mt19937& gen); From 7562370e0eceb53607438e15a9064de1eae6c19b Mon Sep 17 00:00:00 2001 From: Mohammad Taufeeque <9taufeeque9@gmail.com> Date: Fri, 29 Mar 2024 11:29:45 +0530 Subject: [PATCH 37/60] Better heuristic function that improves runtime on difficult levels (#5) - Check deadlock condition for when the box is stuck in a corner that's not a goal - Add a script `astar_log_level.cc` to solve a particular level in a file. This should be used when logging levels across a small number of files. - `astar_log.cc` should still be used to log levels across a large number of files. --- envpool/sokoban/BUILD | 12 + envpool/sokoban/astar_log.cc | 53 ++-- envpool/sokoban/astar_log_level.cc | 141 +++++++++++ envpool/sokoban/sample_levels/small.txt | 23 ++ envpool/sokoban/sokoban_node.cc | 23 ++ envpool/sokoban/sokoban_node.h | 3 +- envpool/sokoban/sokoban_py_envpool_test.py | 274 +++++++++++---------- 7 files changed, 375 insertions(+), 154 deletions(-) create mode 100644 envpool/sokoban/astar_log_level.cc create mode 100644 envpool/sokoban/sample_levels/small.txt diff --git a/envpool/sokoban/BUILD b/envpool/sokoban/BUILD index 1d739971..b79ee42f 100644 --- a/envpool/sokoban/BUILD +++ b/envpool/sokoban/BUILD @@ -68,6 +68,18 @@ cc_binary( ], ) +cc_binary( + name = "astar_log_level", + srcs = [ + "astar_log_level.cc", + "level_loader.cc", + "sokoban_node.cc", + ], + deps = [ + ":sokoban_node_h", + ], +) + py_test( name = "test", srcs = ["sokoban_py_envpool_test.py"], diff --git a/envpool/sokoban/astar_log.cc b/envpool/sokoban/astar_log.cc index 55d93c28..976cc67a 100644 --- a/envpool/sokoban/astar_log.cc +++ b/envpool/sokoban/astar_log.cc @@ -33,11 +33,14 @@ void RunAStar(const std::string& level_file_name, std::ifstream log_file_in(log_file_name); // check if the file is empty if (log_file_in.peek() == std::ifstream::traits_type::eof()) { - log_file_out << "Level, Actions, Steps, SearchSteps" << std::endl; + log_file_out << "Level,Actions,Steps,SearchSteps" << std::endl; } else { // skip levels that have already been run std::string line; std::getline(log_file_in, line); // skip header while (std::getline(log_file_in, line)) { + if (line.empty()) { + continue; + } SokobanLevel level = *level_loader.GetLevel(gen); level_idx++; } @@ -63,9 +66,12 @@ void RunAStar(const std::string& level_file_name, if (search_state == std::AStarSearch::SEARCH_STATE_SUCCEEDED) { std::stringstream loglinestream; - loglinestream << level_idx << ", "; - astarsearch.GetSolutionStart(); + loglinestream << level_idx << ","; + SokobanNode* node = astarsearch.GetSolutionStart(); int steps = 0; + int prev_x = node->player_x; + int prev_y = node->player_y; + bool correct_solution = true; for (;;) { SokobanNode* node = astarsearch.GetSolutionNext(); if (node == nullptr) { @@ -75,37 +81,50 @@ void RunAStar(const std::string& level_file_name, assert(action >= 0 && action < 4); loglinestream << action; steps++; + int curr_x = node->player_x; + int curr_y = node->player_y; + int delta_x = node->kDelta.at(action).at(0); + int delta_y = node->kDelta.at(action).at(1); + if (curr_x != prev_x + delta_x || curr_y != prev_y + delta_y) { + correct_solution = false; + } + prev_x = curr_x; + prev_y = curr_y; + } + if (!correct_solution) { + loglinestream << ",INCORRECT_SOLUTION_FOUND," << search_steps + << std::endl; + } else { + loglinestream << "," << steps << "," << search_steps << std::endl; } - loglinestream << ", " << steps << ", " << search_steps << std::endl; log_file_out << loglinestream.str(); astarsearch.FreeSolutionNodes(); astarsearch.EnsureMemoryFreed(); } else if (search_state == std::AStarSearch::SEARCH_STATE_FAILED) { - log_file_out << level_idx << ", " - << "SEARCH_STATE_FAILED, -1, " << search_steps << std::endl; + log_file_out << level_idx << "," + << "SEARCH_STATE_FAILED,-1," << search_steps << std::endl; } else if (search_state == std::AStarSearch::SEARCH_STATE_NOT_INITIALISED) { - log_file_out << level_idx << ", " - << "SEARCH_STATE_NOT_INITIALISED, -1, " << search_steps + log_file_out << level_idx << "," + << "SEARCH_STATE_NOT_INITIALISED,-1," << search_steps << std::endl; } else if (search_state == std::AStarSearch::SEARCH_STATE_SEARCHING) { - log_file_out << level_idx << ", " - << "SEARCH_STATE_SEARCHING, -1, " << search_steps - << std::endl; + log_file_out << level_idx << "," + << "SEARCH_STATE_SEARCHING,-1," << search_steps << std::endl; } else if (search_state == std::AStarSearch::SEARCH_STATE_OUT_OF_MEMORY) { - log_file_out << level_idx << ", " - << "SEARCH_STATE_OUT_OF_MEMORY, -1, " << search_steps + log_file_out << level_idx << "," + << "SEARCH_STATE_OUT_OF_MEMORY,-1," << search_steps << std::endl; } else if (search_state == std::AStarSearch::SEARCH_STATE_INVALID) { - log_file_out << level_idx << ", " - << "SEARCH_STATE_INVALID, -1, " << search_steps << std::endl; + log_file_out << level_idx << "," + << "SEARCH_STATE_INVALID,-1," << search_steps << std::endl; } else { - log_file_out << level_idx << ", " - << "UNKNOWN, -1, " << search_steps << std::endl; + log_file_out << level_idx << "," + << "UNKNOWN,-1," << search_steps << std::endl; } log_file_out.flush(); level_idx++; diff --git a/envpool/sokoban/astar_log_level.cc b/envpool/sokoban/astar_log_level.cc new file mode 100644 index 00000000..96c3802f --- /dev/null +++ b/envpool/sokoban/astar_log_level.cc @@ -0,0 +1,141 @@ +// Copyright 2023-2024 FAR AI +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include "envpool/sokoban/sokoban_node.h" + +namespace sokoban { + +void RunAStar(const std::string& level_file_name, + const std::string& log_file_name, int level_to_run = 0, + int fsa_limit = 1000000) { + std::cout << "Running A* on file " << level_file_name << " and logging to " + << log_file_name << " with fsa_limit " << fsa_limit << "on level " + << level_to_run << std::endl; + const int dim_room = 10; + int level_idx = 0; + LevelLoader level_loader(level_file_name, true, -1); + std::mt19937 gen(42); + std::string file_idx = + level_file_name.substr(level_file_name.find_last_of("/\\") + 1); + file_idx = file_idx.substr(0, file_idx.find('.')); + + std::ofstream log_file_out(log_file_name, std::ios_base::app); + + while (level_idx < level_to_run) { + level_loader.GetLevel(gen); + level_idx++; + } + std::AStarSearch astarsearch(fsa_limit); + std::cout << "Running level " << level_idx << std::endl; + SokobanLevel level = *level_loader.GetLevel(gen); + + SokobanNode node_start(dim_room, level, false); + SokobanNode node_end(dim_room, level, true); + astarsearch.SetStartAndGoalStates(node_start, node_end); + unsigned int search_state; + unsigned int search_steps = 0; + std::cout << "Starting search" << std::endl; + do { + search_state = astarsearch.SearchStep(); + search_steps++; + } while (search_state == + std::AStarSearch::SEARCH_STATE_SEARCHING); + + if (search_state == std::AStarSearch::SEARCH_STATE_SUCCEEDED) { + std::stringstream loglinestream; + loglinestream << file_idx << "," << level_idx << ","; + SokobanNode* node = astarsearch.GetSolutionStart(); + int steps = 0; + int prev_x = node->player_x; + int prev_y = node->player_y; + bool correct_solution = true; + for (;;) { + SokobanNode* node = astarsearch.GetSolutionNext(); + if (node == nullptr) { + break; + } + int action = node->action_from_parent; + assert(action >= 0 && action < 4); + loglinestream << action; + steps++; + int curr_x = node->player_x; + int curr_y = node->player_y; + int delta_x = node->kDelta.at(action).at(0); + int delta_y = node->kDelta.at(action).at(1); + if (curr_x != prev_x + delta_x || curr_y != prev_y + delta_y) { + correct_solution = false; + } + prev_x = curr_x; + prev_y = curr_y; + } + if (!correct_solution) { + loglinestream << ",INCORRECT_SOLUTION_FOUND," << search_steps + << std::endl; + } else { + loglinestream << "," << steps << "," << search_steps << std::endl; + } + log_file_out << loglinestream.str(); + astarsearch.FreeSolutionNodes(); + astarsearch.EnsureMemoryFreed(); + } else if (search_state == + std::AStarSearch::SEARCH_STATE_FAILED) { + log_file_out << level_idx << "," + << "SEARCH_STATE_FAILED,-1," << search_steps << std::endl; + } else if (search_state == + std::AStarSearch::SEARCH_STATE_NOT_INITIALISED) { + log_file_out << level_idx << "," + << "SEARCH_STATE_NOT_INITIALISED,-1," << search_steps + << std::endl; + } else if (search_state == + std::AStarSearch::SEARCH_STATE_SEARCHING) { + log_file_out << level_idx << "," + << "SEARCH_STATE_SEARCHING,-1," << search_steps << std::endl; + } else if (search_state == + std::AStarSearch::SEARCH_STATE_OUT_OF_MEMORY) { + log_file_out << level_idx << "," + << "SEARCH_STATE_OUT_OF_MEMORY,-1," << search_steps + << std::endl; + } else if (search_state == + std::AStarSearch::SEARCH_STATE_INVALID) { + log_file_out << level_idx << "," + << "SEARCH_STATE_INVALID,-1," << search_steps << std::endl; + } else { + log_file_out << level_idx << "," + << "UNKNOWN,-1," << search_steps << std::endl; + } + log_file_out.flush(); +} +} // namespace sokoban + +int main(int argc, char** argv) { + int fsa_limit = 1000000; + if (argc < 4) { + std::cout << "Usage: " << argv[0] + << " level_file_name log_file_name level_to_run [fsa_limit]" + << std::endl; + return 1; + } + std::string level_file_name = argv[1]; + std::string log_file_name = argv[2]; + int level_to_run = std::stoi(argv[3]); + if (argc > 4) { + fsa_limit = std::stoi(argv[4]); + } + + sokoban::RunAStar(level_file_name, log_file_name, level_to_run, fsa_limit); + return 0; +} diff --git a/envpool/sokoban/sample_levels/small.txt b/envpool/sokoban/sample_levels/small.txt new file mode 100644 index 00000000..f0acb3b5 --- /dev/null +++ b/envpool/sokoban/sample_levels/small.txt @@ -0,0 +1,23 @@ +; 0 +########## +#@$ .#### +#$.$ #### +# $ ###### +# ####### +#.######## +#.######## +########## +########## +########## + +; 1 +########## +########## +########## +########## +### ##### +## $ . $ # +## $. @# +## $. # +##. # +########## diff --git a/envpool/sokoban/sokoban_node.cc b/envpool/sokoban/sokoban_node.cc index f22065ca..7be85918 100644 --- a/envpool/sokoban/sokoban_node.cc +++ b/envpool/sokoban/sokoban_node.cc @@ -163,6 +163,10 @@ float SokobanNode::GoalDistanceEstimate(SokobanNode& goal_node) { min_distance = std::min(min_distance, distance); } h += min_distance; + bool contiguous_walls = CornerWalls(box); + if (contiguous_walls && min_distance != 0) { + h += 1000; + } } return h; } @@ -184,4 +188,23 @@ bool SokobanNode::GetSuccessors(std::AStarSearch* astarsearch, return true; } +bool SokobanNode::CornerWalls(const std::pair& box) const { + bool found_wall = false; + bool found_contiguous_wall = false; + for (const auto& delta : kDelta) { + int new_x = box.first + delta.at(0); + int new_y = box.second + delta.at(1); + bool new_found_wall = CheckWall(new_x, new_y); + found_contiguous_wall = + found_contiguous_wall || (found_wall && new_found_wall); + found_wall = new_found_wall; + } + if (found_wall && !found_contiguous_wall) { + int new_x = box.first + kDelta.at(0).at(0); + int new_y = box.second + kDelta.at(0).at(1); + found_contiguous_wall = CheckWall(new_x, new_y); + } + return found_contiguous_wall; +} + } // namespace sokoban diff --git a/envpool/sokoban/sokoban_node.h b/envpool/sokoban/sokoban_node.h index 3402c7c1..ef789ed2 100644 --- a/envpool/sokoban/sokoban_node.h +++ b/envpool/sokoban/sokoban_node.h @@ -27,7 +27,7 @@ namespace sokoban { class SokobanNode { public: static constexpr std::array, 4> kDelta = { - {{0, -1}, {0, 1}, {-1, 0}, {1, 0}} // Up, Down, Left, Right + {{0, -1}, {1, 0}, {0, 1}, {-1, 0}} // Up, Right, Down, Left }; int dim_room{0}; int player_x{0}, player_y{0}; @@ -116,6 +116,7 @@ class SokobanNode { [[nodiscard]] size_t Hash() const; void PrintNodeInfo(std::vector>* goals = nullptr); + [[nodiscard]] bool CornerWalls(const std::pair& box) const; }; } // namespace sokoban diff --git a/envpool/sokoban/sokoban_py_envpool_test.py b/envpool/sokoban/sokoban_py_envpool_test.py index 4bb423f6..9eaf31d4 100644 --- a/envpool/sokoban/sokoban_py_envpool_test.py +++ b/envpool/sokoban/sokoban_py_envpool_test.py @@ -27,162 +27,164 @@ from envpool.sokoban.sokoban_envpool import _SokobanEnvSpec -class TestSokobanEnvPool: - - def test_config(self) -> None: - ref_config_keys = [ - # Default environment keys - "base_path", - "batch_size", - "gym_reset_return_info", - "max_num_players", - "num_envs", - "num_threads", - "seed", - "thread_affinity_offset", - "min_episode_steps", - # Default and also used by sokoban - "max_episode_steps", - # defined by sokoban - "dim_room", - "levels_dir", - "reward_box", - "reward_finished", - "reward_step", - "verbose", - "load_sequentially", - "n_levels_to_load", - ] - default_conf = _SokobanEnvSpec._default_config_values - assert isinstance(default_conf, tuple) - config_keys = _SokobanEnvSpec._config_keys - assert isinstance(config_keys, list) - assert len(default_conf) == len(config_keys) - assert sorted(config_keys) == sorted(ref_config_keys) - - def test_envpool(self) -> None: - batch = num_envs = 200 +def test_config() -> None: + ref_config_keys = [ + # Default environment keys + "base_path", + "batch_size", + "gym_reset_return_info", + "max_num_players", + "num_envs", + "num_threads", + "seed", + "thread_affinity_offset", + "min_episode_steps", + # Default and also used by sokoban + "max_episode_steps", + # defined by sokoban + "dim_room", + "levels_dir", + "reward_box", + "reward_finished", + "reward_step", + "verbose", + "load_sequentially", + "n_levels_to_load", + ] + default_conf = _SokobanEnvSpec._default_config_values + assert isinstance(default_conf, tuple) + config_keys = _SokobanEnvSpec._config_keys + assert isinstance(config_keys, list) + assert len(default_conf) == len(config_keys) + assert sorted(config_keys) == sorted(ref_config_keys) + + +def test_envpool() -> None: + batch = num_envs = 200 + env = envpool.make( + "Sokoban-v0", + env_type="gymnasium", + num_envs=num_envs, + batch_size=num_envs, + seed=2346890, + max_episode_steps=60, + reward_step=-0.1, + dim_room=10, + levels_dir="/app/envpool/sokoban/sample_levels", + ) + total_steps = 1000 + + _ = env.reset() + t = time.time() + for _ in range(total_steps): + _ = env.step(np.random.randint(low=0, high=9, size=(num_envs,))) + duration = time.time() - t + fps = total_steps * batch / duration + print(f"FPS = {fps:.6f}") + + +def test_envpool_max_episode_steps() -> None: + for max_episode_steps in [2, 5, 10]: env = envpool.make( "Sokoban-v0", env_type="gymnasium", - num_envs=num_envs, - batch_size=num_envs, - seed=2346890, - max_episode_steps=60, - reward_step=-0.1, - dim_room=10, + num_envs=1, + batch_size=1, + min_episode_steps=max_episode_steps, + max_episode_steps=max_episode_steps, levels_dir="/app/envpool/sokoban/sample_levels", ) - total_steps = 1000 - - _ = env.reset() - t = time.time() - for _ in range(total_steps): - _ = env.step(np.random.randint(low=0, high=9, size=(num_envs,))) - duration = time.time() - t - fps = total_steps * batch / duration - print(f"FPS = {fps:.6f}") - - def test_envpool_max_episode_steps(self) -> None: - for max_episode_steps in [2, 5, 10]: - env = envpool.make( - "Sokoban-v0", - env_type="gymnasium", - num_envs=1, - batch_size=1, - min_episode_steps=max_episode_steps, - max_episode_steps=max_episode_steps, - levels_dir="/app/envpool/sokoban/sample_levels", - ) - env.reset() - for _ in range(max_episode_steps - 1): - _, _, terminated, truncated, _ = env.step(np.zeros([1], dtype=np.int32)) - assert not np.any(terminated | truncated) - + env.reset() + for _ in range(max_episode_steps - 1): _, _, terminated, truncated, _ = env.step(np.zeros([1], dtype=np.int32)) - assert not np.any(terminated) - assert np.all(truncated) - - def test_envpool_load_sequentially(self, capfd) -> None: - levels_dir = "/app/envpool/sokoban/sample_levels" - files = glob.glob(f"{levels_dir}/*.txt") - levels_by_files = [] - for file in files: - with open(file, "r") as f: - text = f.read() - levels = text.split("\n;") - levels = ["\n".join(level.split("\n")[1:]).strip() for level in levels] - levels_by_files.append((file, levels)) - assert len(levels_by_files) > 1 - assert all(len(levels) > 1 for levels in levels_by_files) - total_levels = sum(len(levels) for levels in levels_by_files) - for n_levels_to_load in range(1, total_levels + 1): - env = envpool.make( - "Sokoban-v0", - env_type="gymnasium", - num_envs=1, - batch_size=1, - max_episode_steps=60, - min_episode_steps=60, - levels_dir=levels_dir, - load_sequentially=True, - n_levels_to_load=n_levels_to_load, - verbose=2, - ) - dim_room = env.spec.config.dim_room - obs, _ = env.reset() - assert obs.shape == ( - 1, - 3, - dim_room, - dim_room, - ), f"obs shape: {obs.shape}" - if n_levels_to_load == -1: - n_levels_to_load = total_levels - for _ in range(n_levels_to_load - 1): - env.reset() - out, _ = capfd.readouterr() - files_output = out.split("***")[1:] - for i, file_output in enumerate(files_output): - first_line, out = file_output.strip().split("\n", 1) - result = re.search(r'Loaded (\d+) levels from "(.*\.txt)"', first_line) - n_levels, file_name = int(result.group(1)), result.group(2) - lev1, lev2 = out.strip().split("\n\n") - assert file_name == levels_by_files[i][0] - assert n_levels == len(levels_by_files[i][1]) - assert lev1 == levels_by_files[i][1][0] - assert lev2 == levels_by_files[i][1][1] - - def test_xla(self) -> None: - num_envs = 10 + assert not np.any(terminated | truncated) + + _, _, terminated, truncated, _ = env.step(np.zeros([1], dtype=np.int32)) + assert not np.any(terminated) + assert np.all(truncated) + + +def test_envpool_load_sequentially(capfd) -> None: + levels_dir = "/app/envpool/sokoban/sample_levels" + files = glob.glob(f"{levels_dir}/*.txt") + levels_by_files = [] + for file in files: + with open(file, "r") as f: + text = f.read() + levels = text.split("\n;") + levels = ["\n".join(level.split("\n")[1:]).strip() for level in levels] + levels_by_files.append((file, levels)) + assert len(levels_by_files) > 1 + assert all(len(levels) > 1 for levels in levels_by_files) + total_levels = sum(len(levels) for levels in levels_by_files) + for n_levels_to_load in range(1, total_levels + 1): env = envpool.make( "Sokoban-v0", - env_type="dm", - num_envs=num_envs, - batch_size=num_envs, - seed=2346890, + env_type="gymnasium", + num_envs=1, + batch_size=1, max_episode_steps=60, - reward_step=-0.1, - dim_room=10, - levels_dir="/app/envpool/sokoban/sample_levels", + min_episode_steps=60, + levels_dir=levels_dir, + load_sequentially=True, + n_levels_to_load=n_levels_to_load, + verbose=2, ) - handle, recv, send, step = env.xla() - - -def test_astar_log(self) -> None: - level_file_name = "/app/envpool/sokoban/sample_levels/001.txt" + dim_room = env.spec.config.dim_room + obs, _ = env.reset() + assert obs.shape == ( + 1, + 3, + dim_room, + dim_room, + ), f"obs shape: {obs.shape}" + if n_levels_to_load == -1: + n_levels_to_load = total_levels + for _ in range(n_levels_to_load - 1): + env.reset() + out, _ = capfd.readouterr() + files_output = out.split("***")[1:] + for i, file_output in enumerate(files_output): + first_line, out = file_output.strip().split("\n", 1) + result = re.search(r'Loaded (\d+) levels from "(.*\.txt)"', first_line) + n_levels, file_name = int(result.group(1)), result.group(2) + lev1, lev2 = out.strip().split("\n\n") + assert file_name == levels_by_files[i][0] + assert n_levels == len(levels_by_files[i][1]) + assert lev1 == levels_by_files[i][1][0] + assert lev2 == levels_by_files[i][1][1] + + +def test_xla() -> None: + num_envs = 10 + env = envpool.make( + "Sokoban-v0", + env_type="dm", + num_envs=num_envs, + batch_size=num_envs, + seed=2346890, + max_episode_steps=60, + reward_step=-0.1, + dim_room=10, + levels_dir="/app/envpool/sokoban/sample_levels", + ) + handle, recv, send, step = env.xla() + + +def test_astar_log() -> None: + level_file_name = "/app/envpool/sokoban/sample_levels/small.txt" with tempfile.NamedTemporaryFile() as f: log_file_name = f.name subprocess.run( [ "bazel", "run", "//envpool/sokoban:astar_log", "--", level_file_name, - log_file_name, 1 + log_file_name, "1" ], check=True, ) with open(log_file_name, "r") as f: log = f.read() - assert "0, 301333002213130203303031, 24, 40611" == log.split("\n")[1] + assert "1, 222200001112330322210, 21, 1443" == log.split("\n")[1] if __name__ == "__main__": From 58eae4ec5a5a96a171d2915fa126a7fe6019ca2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= Date: Mon, 15 Apr 2024 11:39:07 -0700 Subject: [PATCH 38/60] Don't truncate episodes that finish at the last step. --- envpool/sokoban/sokoban_envpool.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/envpool/sokoban/sokoban_envpool.cc b/envpool/sokoban/sokoban_envpool.cc index 2d139b08..66c6032f 100644 --- a/envpool/sokoban/sokoban_envpool.cc +++ b/envpool/sokoban/sokoban_envpool.cc @@ -162,6 +162,10 @@ constexpr std::array, kPlayerOnTarget + 1> kTinyColors{{ void SokobanEnv::WriteState(float reward) { auto state = Allocate(); + if(unmatched_boxes == 0) { + // Never mark the episode as truncated if we're getting the big final reward. + state["trunc"_] = false; + } state["reward"_] = reward; Array& obs = state["obs"_]; if (obs.size != 3 * world_.size()) { From f83f79b3a7d649fd055a7a4fdaa1c57dfcaedb86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= Date: Mon, 15 Apr 2024 11:59:13 -0700 Subject: [PATCH 39/60] The start of a test --- envpool/sokoban/sokoban_py_envpool_test.py | 27 ++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/envpool/sokoban/sokoban_py_envpool_test.py b/envpool/sokoban/sokoban_py_envpool_test.py index 9eaf31d4..1972893d 100644 --- a/envpool/sokoban/sokoban_py_envpool_test.py +++ b/envpool/sokoban/sokoban_py_envpool_test.py @@ -171,6 +171,33 @@ def test_xla() -> None: handle, recv, send, step = env.xla() +def test_truncation_unsolved_episodes_only(): + """ + Test that only episodes that do *not* get solved within the time limit get truncated. That is, a large 'solution' + reward and truncation should never co-occur. + """ + max_episode_steps = 120 + env = envpool.make( + "Sokoban-v0", + env_type="gymnasium", + num_envs=1, + batch_size=1, + min_episode_steps=max_episode_steps, + max_episode_steps=max_episode_steps, + levels_dir="/app/envpool/sokoban/sample_levels", + load_sequentially=True, + ) + env.reset() # Load level 0 and discard it + env.reset() # Load level 1 + + solve_actions = "222200001112330322210" + for a in solve_actions[:-1]: + env.step(int(a)) + + obs, reward, term, trunc, infos = env.step(int(solve_actions[-1])) + assert reward == env.spec.reward_step + env.spec.reward_box + env.spec.reward_finished + + def test_astar_log() -> None: level_file_name = "/app/envpool/sokoban/sample_levels/small.txt" with tempfile.NamedTemporaryFile() as f: From bc5ba8ee47af941d8c38e44f51634e706af80cb4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= Date: Mon, 15 Apr 2024 12:03:42 -0700 Subject: [PATCH 40/60] Fix variable name --- envpool/sokoban/sokoban_envpool.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/envpool/sokoban/sokoban_envpool.cc b/envpool/sokoban/sokoban_envpool.cc index 66c6032f..86f066c6 100644 --- a/envpool/sokoban/sokoban_envpool.cc +++ b/envpool/sokoban/sokoban_envpool.cc @@ -162,7 +162,7 @@ constexpr std::array, kPlayerOnTarget + 1> kTinyColors{{ void SokobanEnv::WriteState(float reward) { auto state = Allocate(); - if(unmatched_boxes == 0) { + if(unmatched_boxes_ == 0) { // Never mark the episode as truncated if we're getting the big final reward. state["trunc"_] = false; } From 8ce206a4390802403176b4bcc4ada06c316b12dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= Date: Mon, 15 Apr 2024 16:12:15 -0700 Subject: [PATCH 41/60] Test that environment terminates/truncates correctly --- envpool/sokoban/sokoban_py_envpool_test.py | 91 ++++++++++++++++++---- 1 file changed, 77 insertions(+), 14 deletions(-) diff --git a/envpool/sokoban/sokoban_py_envpool_test.py b/envpool/sokoban/sokoban_py_envpool_test.py index 1972893d..76db0976 100644 --- a/envpool/sokoban/sokoban_py_envpool_test.py +++ b/envpool/sokoban/sokoban_py_envpool_test.py @@ -16,6 +16,7 @@ import glob import re import subprocess +import sys import tempfile import time @@ -171,12 +172,55 @@ def test_xla() -> None: handle, recv, send, step = env.xla() -def test_truncation_unsolved_episodes_only(): + +SOLVE_LEVEL_ZERO: str = "222200001112330322210" +TINY_COLORS: list[tuple[tuple[int, int, int], str]] = [ + ((0, 0, 0), "#"), + ((243, 248, 238), " "), + ((254, 126, 125), "."), + ((254, 95, 56), "s"), + ((142, 121, 56), "$"), + ((160, 212, 56), "@"), + ((219, 212, 56), "a"), +] + + +def print_obs(obs: np.ndarray): + assert obs.shape == (3, 10, 10) + for y in range(obs.shape[1]): + for x in range(obs.shape[2]): + arr = obs[:, y, x] + printed_any = False + for color, symbol in TINY_COLORS: + assert arr.shape == (3,) + if np.array_equal(arr, color): + print(symbol, end="") + printed_any = True + break + assert printed_any, f"Could not find match for {arr}" + print("\n", end="") + print("\n", end="") + + +action_astar_to_envpool = { + "0": 1, + "1": 4, + "2": 2, + "3": 3, +} + +def make_1d_array(action: int | str) -> np.ndarray: + return np.array(int(action))[None] + + + +@pytest.mark.parametrize("solve_on_time", [True, False]) +def test_solved_level_does_not_truncate(solve_on_time: bool): """ - Test that only episodes that do *not* get solved within the time limit get truncated. That is, a large 'solution' - reward and truncation should never co-occur. + Test that a level that gets solved just in time does not get truncated. But if it does not get solved just in time, it + gets truncated. """ - max_episode_steps = 120 + max_episode_steps = len(SOLVE_LEVEL_ZERO) env = envpool.make( "Sokoban-v0", env_type="gymnasium", @@ -187,32 +231,51 @@ def test_truncation_unsolved_episodes_only(): levels_dir="/app/envpool/sokoban/sample_levels", load_sequentially=True, ) - env.reset() # Load level 0 and discard it - env.reset() # Load level 1 + env.reset() # Load level 0 + + for a in SOLVE_LEVEL_ZERO[:-1]: + obs, reward, term, trunc, infos = env.step(make_1d_array(action_astar_to_envpool[a])) + # print_obs(obs[0]) + assert not term and not trunc, "Level should not have reached time limit yet" + + NOOP = 0 + + if solve_on_time: + obs, reward, term, trunc, infos = env.step(make_1d_array(action_astar_to_envpool[SOLVE_LEVEL_ZERO[-1]])) + # print_obs(obs[0]) + assert reward == env.spec.config.reward_step + env.spec.config.reward_box + env.spec.config.reward_finished, ( + f"the level wasn't solved successfully. Level: {print_obs(obs[0])}" + ) + assert term and not trunc, "Level should have finished within the time limit" + + else: + obs, reward, term, trunc, infos = env.step(make_1d_array(NOOP)) + assert not term and trunc, "Level should get truncated at precisely this step" - solve_actions = "222200001112330322210" - for a in solve_actions[:-1]: - env.step(int(a)) + _, _, term, trunc, _ =env.step(make_1d_array(NOOP)) + assert not term and not trunc, "Level should reset correctly" - obs, reward, term, trunc, infos = env.step(int(solve_actions[-1])) - assert reward == env.spec.reward_step + env.spec.reward_box + env.spec.reward_finished def test_astar_log() -> None: level_file_name = "/app/envpool/sokoban/sample_levels/small.txt" with tempfile.NamedTemporaryFile() as f: log_file_name = f.name + return subprocess.run( [ - "bazel", "run", "//envpool/sokoban:astar_log", "--", level_file_name, + "/root/go/bin/bazel", "run", "//envpool/sokoban:astar_log", "--", level_file_name, log_file_name, "1" ], check=True, + cwd="/app", + env=dict(HOME="/root"), ) with open(log_file_name, "r") as f: log = f.read() - assert "1, 222200001112330322210, 21, 1443" == log.split("\n")[1] + assert f"1, {SOLVE_LEVEL_ZERO}, 21, 1443" == log.split("\n")[1] if __name__ == "__main__": - pytest.main(["-v", __file__]) + retcode = pytest.main(["-v", __file__]) + sys.exit(retcode) From 42254d261fe888ac7dd71c112aacd5b82f4b8843 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= Date: Mon, 15 Apr 2024 16:26:37 -0700 Subject: [PATCH 42/60] Make sure the tests pass in CI --- envpool/sokoban/sokoban_envpool.cc | 5 ++-- envpool/sokoban/sokoban_py_envpool_test.py | 35 ++++++++++++---------- 2 files changed, 22 insertions(+), 18 deletions(-) diff --git a/envpool/sokoban/sokoban_envpool.cc b/envpool/sokoban/sokoban_envpool.cc index 86f066c6..b644548c 100644 --- a/envpool/sokoban/sokoban_envpool.cc +++ b/envpool/sokoban/sokoban_envpool.cc @@ -162,8 +162,9 @@ constexpr std::array, kPlayerOnTarget + 1> kTinyColors{{ void SokobanEnv::WriteState(float reward) { auto state = Allocate(); - if(unmatched_boxes_ == 0) { - // Never mark the episode as truncated if we're getting the big final reward. + if (unmatched_boxes_ == 0) { + // Never mark the episode as truncated if we're getting the big final + // reward. state["trunc"_] = false; } state["reward"_] = reward; diff --git a/envpool/sokoban/sokoban_py_envpool_test.py b/envpool/sokoban/sokoban_py_envpool_test.py index 76db0976..574bd792 100644 --- a/envpool/sokoban/sokoban_py_envpool_test.py +++ b/envpool/sokoban/sokoban_py_envpool_test.py @@ -172,7 +172,6 @@ def test_xla() -> None: handle, recv, send, step = env.xla() - SOLVE_LEVEL_ZERO: str = "222200001112330322210" TINY_COLORS: list[tuple[tuple[int, int, int], str]] = [ ((0, 0, 0), "#"), @@ -209,16 +208,16 @@ def print_obs(obs: np.ndarray): "3": 3, } + def make_1d_array(action: int | str) -> np.ndarray: return np.array(int(action))[None] - @pytest.mark.parametrize("solve_on_time", [True, False]) def test_solved_level_does_not_truncate(solve_on_time: bool): """ - Test that a level that gets solved just in time does not get truncated. But if it does not get solved just in time, it - gets truncated. + Test that a level that gets solved just in time does not get truncated. But if + it does not get solved just in time, it gets truncated. """ max_episode_steps = len(SOLVE_LEVEL_ZERO) env = envpool.make( @@ -234,29 +233,33 @@ def test_solved_level_does_not_truncate(solve_on_time: bool): env.reset() # Load level 0 for a in SOLVE_LEVEL_ZERO[:-1]: - obs, reward, term, trunc, infos = env.step(make_1d_array(action_astar_to_envpool[a])) + obs, reward, term, trunc, infos = env.step( + make_1d_array(action_astar_to_envpool[a]) + ) # print_obs(obs[0]) - assert not term and not trunc, "Level should not have reached time limit yet" + assert not term and not trunc, "Level should not have reached time limit" NOOP = 0 if solve_on_time: - obs, reward, term, trunc, infos = env.step(make_1d_array(action_astar_to_envpool[SOLVE_LEVEL_ZERO[-1]])) - # print_obs(obs[0]) - assert reward == env.spec.config.reward_step + env.spec.config.reward_box + env.spec.config.reward_finished, ( - f"the level wasn't solved successfully. Level: {print_obs(obs[0])}" + obs, reward, term, trunc, infos = env.step( + make_1d_array(action_astar_to_envpool[SOLVE_LEVEL_ZERO[-1]]) ) - assert term and not trunc, "Level should have finished within the time limit" + # print_obs(obs[0]) + assert reward == ( + env.spec.config.reward_step + env.spec.config.reward_box + + env.spec.config.reward_finished + ), (f"the level wasn't solved successfully. Level: {print_obs(obs[0])}") + assert term and not trunc, "Level should finish within the time limit" else: obs, reward, term, trunc, infos = env.step(make_1d_array(NOOP)) - assert not term and trunc, "Level should get truncated at precisely this step" + assert not term and trunc, "Level should truncate at precisely this step" - _, _, term, trunc, _ =env.step(make_1d_array(NOOP)) + _, _, term, trunc, _ = env.step(make_1d_array(NOOP)) assert not term and not trunc, "Level should reset correctly" - def test_astar_log() -> None: level_file_name = "/app/envpool/sokoban/sample_levels/small.txt" with tempfile.NamedTemporaryFile() as f: @@ -264,8 +267,8 @@ def test_astar_log() -> None: return subprocess.run( [ - "/root/go/bin/bazel", "run", "//envpool/sokoban:astar_log", "--", level_file_name, - log_file_name, "1" + "/root/go/bin/bazel", "run", "//envpool/sokoban:astar_log", "--", + level_file_name, log_file_name, "1" ], check=True, cwd="/app", From e4876f87c9939eaf3efd0b8b577e5833394771eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= Date: Mon, 15 Apr 2024 16:31:41 -0700 Subject: [PATCH 43/60] Prevent JVM from running out of memory --- .circleci/config.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 2acb62b2..2fbcb01f 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -71,10 +71,11 @@ jobs: auth: username: "$GHCR_DOCKER_USER" password: "$GHCR_DOCKER_TOKEN" - resource_class: medium + resource_class: large working_directory: /app steps: - checkout - run: name: Run tests - command: make bazel-test + command: | + BAZEL_OPT=--host_jvm_args=-Xmx3g make bazel-test From 1621dafd46e6cee11bcbb871dc4b3b4a595302c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= Date: Mon, 15 Apr 2024 16:38:56 -0700 Subject: [PATCH 44/60] Load other levels first --- envpool/sokoban/sokoban_py_envpool_test.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/envpool/sokoban/sokoban_py_envpool_test.py b/envpool/sokoban/sokoban_py_envpool_test.py index 574bd792..ca49b368 100644 --- a/envpool/sokoban/sokoban_py_envpool_test.py +++ b/envpool/sokoban/sokoban_py_envpool_test.py @@ -230,6 +230,9 @@ def test_solved_level_does_not_truncate(solve_on_time: bool): levels_dir="/app/envpool/sokoban/sample_levels", load_sequentially=True, ) + # Skip levels in 000.txt and 001.txt + for _ in range(3 + 3): + env.reset() env.reset() # Load level 0 for a in SOLVE_LEVEL_ZERO[:-1]: From 0e9785ced3342923da1dee9366071de05b372dde Mon Sep 17 00:00:00 2001 From: taufeeque9 <9taufeeque9@gmail.com> Date: Thu, 25 Apr 2024 00:32:35 +0530 Subject: [PATCH 45/60] reduce action space to 4 --- envpool/sokoban/sokoban_envpool.cc | 7 +------ envpool/sokoban/sokoban_envpool.h | 15 +++++---------- envpool/sokoban/sokoban_py_envpool_test.py | 4 +++- 3 files changed, 9 insertions(+), 17 deletions(-) diff --git a/envpool/sokoban/sokoban_envpool.cc b/envpool/sokoban/sokoban_envpool.cc index 2d139b08..93f14176 100644 --- a/envpool/sokoban/sokoban_envpool.cc +++ b/envpool/sokoban/sokoban_envpool.cc @@ -75,13 +75,8 @@ void SokobanEnv::Step(const Action& action_dict) { current_step_++; const int action = action_dict["action"_]; - if (action == kActNoop) { - WriteState(static_cast(reward_step_)); - return; - } - // From here on, assume the agent will try to move - const int change_coordinates_idx = (action - 1) % kChangeCoordinates.size(); + const int change_coordinates_idx = (action) % kChangeCoordinates.size(); const int delta_x = kChangeCoordinates.at(change_coordinates_idx).at(0); const int delta_y = kChangeCoordinates.at(change_coordinates_idx).at(1); diff --git a/envpool/sokoban/sokoban_envpool.h b/envpool/sokoban/sokoban_envpool.h index 77bee609..a76951ef 100644 --- a/envpool/sokoban/sokoban_envpool.h +++ b/envpool/sokoban/sokoban_envpool.h @@ -29,16 +29,11 @@ namespace sokoban { -constexpr int kActNoop = 0; -constexpr int kActPushUp = 1; -constexpr int kActPushDown = 2; -constexpr int kActPushLeft = 3; -constexpr int kActPushRight = 4; -constexpr int kActMoveUp = 5; -constexpr int kActMoveDown = 6; -constexpr int kActMoveLeft = 7; -constexpr int kActMoveRight = 8; -constexpr int kMaxAction = kActMoveRight; +constexpr int kActPushUp = 0; +constexpr int kActPushDown = 1; +constexpr int kActPushLeft = 2; +constexpr int kActPushRight = 3; +constexpr int kMaxAction = kActPushRight; class SokobanEnvFns { public: diff --git a/envpool/sokoban/sokoban_py_envpool_test.py b/envpool/sokoban/sokoban_py_envpool_test.py index 9eaf31d4..2538e7b4 100644 --- a/envpool/sokoban/sokoban_py_envpool_test.py +++ b/envpool/sokoban/sokoban_py_envpool_test.py @@ -76,8 +76,10 @@ def test_envpool() -> None: _ = env.reset() t = time.time() + + assert env.action_space.n == 4 for _ in range(total_steps): - _ = env.step(np.random.randint(low=0, high=9, size=(num_envs,))) + _ = env.step(np.random.randint(low=0, high=4, size=(num_envs,))) duration = time.time() - t fps = total_steps * batch / duration print(f"FPS = {fps:.6f}") From 1d6e81b608e4648b7ccf82de805a7fd6c77fa1c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= Date: Wed, 24 Apr 2024 22:33:27 -0700 Subject: [PATCH 46/60] Fix review --- envpool/sokoban/sokoban_py_envpool_test.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/envpool/sokoban/sokoban_py_envpool_test.py b/envpool/sokoban/sokoban_py_envpool_test.py index ca49b368..e7c6d7ab 100644 --- a/envpool/sokoban/sokoban_py_envpool_test.py +++ b/envpool/sokoban/sokoban_py_envpool_test.py @@ -239,7 +239,6 @@ def test_solved_level_does_not_truncate(solve_on_time: bool): obs, reward, term, trunc, infos = env.step( make_1d_array(action_astar_to_envpool[a]) ) - # print_obs(obs[0]) assert not term and not trunc, "Level should not have reached time limit" NOOP = 0 @@ -248,7 +247,6 @@ def test_solved_level_does_not_truncate(solve_on_time: bool): obs, reward, term, trunc, infos = env.step( make_1d_array(action_astar_to_envpool[SOLVE_LEVEL_ZERO[-1]]) ) - # print_obs(obs[0]) assert reward == ( env.spec.config.reward_step + env.spec.config.reward_box + env.spec.config.reward_finished @@ -267,7 +265,6 @@ def test_astar_log() -> None: level_file_name = "/app/envpool/sokoban/sample_levels/small.txt" with tempfile.NamedTemporaryFile() as f: log_file_name = f.name - return subprocess.run( [ "/root/go/bin/bazel", "run", "//envpool/sokoban:astar_log", "--", From d467459732772cf7b07d8478717aad0a5990d24b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= Date: Wed, 24 Apr 2024 22:57:17 -0700 Subject: [PATCH 47/60] Fix incorrect truncation --- envpool/sokoban/sokoban_envpool.cc | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/envpool/sokoban/sokoban_envpool.cc b/envpool/sokoban/sokoban_envpool.cc index b644548c..b897e0fc 100644 --- a/envpool/sokoban/sokoban_envpool.cc +++ b/envpool/sokoban/sokoban_envpool.cc @@ -166,7 +166,12 @@ void SokobanEnv::WriteState(float reward) { // Never mark the episode as truncated if we're getting the big final // reward. state["trunc"_] = false; + } else if (IsDone()) { + // But if there are unmatched boxes and the current step is the last + // one we will get, truncate the episode. + state["trunc"_] = true; } + state["reward"_] = reward; Array& obs = state["obs"_]; if (obs.size != 3 * world_.size()) { From 534a0860b9422624b024401c5abc520a08879c13 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= Date: Wed, 24 Apr 2024 22:57:38 -0700 Subject: [PATCH 48/60] Explicitly skip astar_log test --- envpool/sokoban/sokoban_py_envpool_test.py | 1 + 1 file changed, 1 insertion(+) diff --git a/envpool/sokoban/sokoban_py_envpool_test.py b/envpool/sokoban/sokoban_py_envpool_test.py index e7c6d7ab..43670c1b 100644 --- a/envpool/sokoban/sokoban_py_envpool_test.py +++ b/envpool/sokoban/sokoban_py_envpool_test.py @@ -261,6 +261,7 @@ def test_solved_level_does_not_truncate(solve_on_time: bool): assert not term and not trunc, "Level should reset correctly" +@pytest.mark.skip def test_astar_log() -> None: level_file_name = "/app/envpool/sokoban/sample_levels/small.txt" with tempfile.NamedTemporaryFile() as f: From b56251cef1ab8676ce18eb84868b22cf4a1ba37f Mon Sep 17 00:00:00 2001 From: Mohammad Taufeeque <9taufeeque9@gmail.com> Date: Thu, 25 Apr 2024 13:04:49 +0530 Subject: [PATCH 49/60] remove modulo on action MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: AdriĆ  Garriga-Alonso --- envpool/sokoban/sokoban_envpool.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/envpool/sokoban/sokoban_envpool.cc b/envpool/sokoban/sokoban_envpool.cc index 93f14176..9ad83980 100644 --- a/envpool/sokoban/sokoban_envpool.cc +++ b/envpool/sokoban/sokoban_envpool.cc @@ -76,7 +76,7 @@ void SokobanEnv::Step(const Action& action_dict) { const int action = action_dict["action"_]; - const int change_coordinates_idx = (action) % kChangeCoordinates.size(); + const int change_coordinates_idx = action; const int delta_x = kChangeCoordinates.at(change_coordinates_idx).at(0); const int delta_y = kChangeCoordinates.at(change_coordinates_idx).at(1); From 2c18ac4a5bf8cc7259ee924d307605a3bdc4aa09 Mon Sep 17 00:00:00 2001 From: taufeeque9 <9taufeeque9@gmail.com> Date: Thu, 25 Apr 2024 13:22:46 +0530 Subject: [PATCH 50/60] remove test skipping --- envpool/sokoban/sokoban_py_envpool_test.py | 1 - 1 file changed, 1 deletion(-) diff --git a/envpool/sokoban/sokoban_py_envpool_test.py b/envpool/sokoban/sokoban_py_envpool_test.py index 35e6e97a..f6fba988 100644 --- a/envpool/sokoban/sokoban_py_envpool_test.py +++ b/envpool/sokoban/sokoban_py_envpool_test.py @@ -263,7 +263,6 @@ def test_solved_level_does_not_truncate(solve_on_time: bool): assert not term and not trunc, "Level should reset correctly" -@pytest.mark.skip def test_astar_log() -> None: level_file_name = "/app/envpool/sokoban/sample_levels/small.txt" with tempfile.NamedTemporaryFile() as f: From d8ccb0b373fa00208435c1152f34970244ca0316 Mon Sep 17 00:00:00 2001 From: taufeeque9 <9taufeeque9@gmail.com> Date: Thu, 25 Apr 2024 13:30:08 +0530 Subject: [PATCH 51/60] fix action mapping error in test --- envpool/sokoban/sokoban_py_envpool_test.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/envpool/sokoban/sokoban_py_envpool_test.py b/envpool/sokoban/sokoban_py_envpool_test.py index f6fba988..965db9e7 100644 --- a/envpool/sokoban/sokoban_py_envpool_test.py +++ b/envpool/sokoban/sokoban_py_envpool_test.py @@ -204,10 +204,10 @@ def print_obs(obs: np.ndarray): action_astar_to_envpool = { - "0": 1, - "1": 4, - "2": 2, - "3": 3, + "0": 0, + "1": 3, + "2": 1, + "3": 2, } From 129f0eda20276e114595a544df6ac1ed15f9e43c Mon Sep 17 00:00:00 2001 From: taufeeque9 <9taufeeque9@gmail.com> Date: Thu, 25 Apr 2024 14:57:11 +0530 Subject: [PATCH 52/60] sort files while loading levels and fix test_astar_log --- envpool/sokoban/level_loader.cc | 5 ++++ envpool/sokoban/sokoban_py_envpool_test.py | 35 ++++++++++------------ 2 files changed, 21 insertions(+), 19 deletions(-) diff --git a/envpool/sokoban/level_loader.cc b/envpool/sokoban/level_loader.cc index 0689cfa2..56c6fdba 100644 --- a/envpool/sokoban/level_loader.cc +++ b/envpool/sokoban/level_loader.cc @@ -42,6 +42,11 @@ LevelLoader::LevelLoader(const std::filesystem::path& base_path, level_file_paths_.push_back(entry.path()); } } + std::sort( + level_file_paths_.begin(), level_file_paths_.end(), + [](const std::filesystem::path& a, const std::filesystem::path& b) { + return a.filename().string() < b.filename().string(); + }); } cur_file_ = level_file_paths_.begin(); } diff --git a/envpool/sokoban/sokoban_py_envpool_test.py b/envpool/sokoban/sokoban_py_envpool_test.py index 965db9e7..d3eb8ad1 100644 --- a/envpool/sokoban/sokoban_py_envpool_test.py +++ b/envpool/sokoban/sokoban_py_envpool_test.py @@ -111,7 +111,7 @@ def test_envpool_load_sequentially(capfd) -> None: levels_dir = "/app/envpool/sokoban/sample_levels" files = glob.glob(f"{levels_dir}/*.txt") levels_by_files = [] - for file in files: + for file in sorted(files): with open(file, "r") as f: text = f.read() levels = text.split("\n;") @@ -243,7 +243,7 @@ def test_solved_level_does_not_truncate(solve_on_time: bool): ) assert not term and not trunc, "Level should not have reached time limit" - NOOP = 0 + wrong_action = str((int(SOLVE_LEVEL_ZERO[-1]) + 1) % 4) if solve_on_time: obs, reward, term, trunc, infos = env.step( @@ -256,29 +256,26 @@ def test_solved_level_does_not_truncate(solve_on_time: bool): assert term and not trunc, "Level should finish within the time limit" else: - obs, reward, term, trunc, infos = env.step(make_1d_array(NOOP)) + obs, reward, term, trunc, infos = env.step(make_1d_array(wrong_action)) assert not term and trunc, "Level should truncate at precisely this step" - _, _, term, trunc, _ = env.step(make_1d_array(NOOP)) + _, _, term, trunc, _ = env.step(make_1d_array(wrong_action)) assert not term and not trunc, "Level should reset correctly" -def test_astar_log() -> None: +def test_astar_log(tmp_path) -> None: level_file_name = "/app/envpool/sokoban/sample_levels/small.txt" - with tempfile.NamedTemporaryFile() as f: - log_file_name = f.name - subprocess.run( - [ - "/root/go/bin/bazel", "run", "//envpool/sokoban:astar_log", "--", - level_file_name, log_file_name, "1" - ], - check=True, - cwd="/app", - env=dict(HOME="/root"), - ) - with open(log_file_name, "r") as f: - log = f.read() - assert f"1, {SOLVE_LEVEL_ZERO}, 21, 1443" == log.split("\n")[1] + log_file_name = tmp_path / "log_file.csv" + subprocess.run( + [ + "/root/go/bin/bazel", "run", "//envpool/sokoban:astar_log", "--", + level_file_name, str(log_file_name), "1" + ], + check=True, + cwd="/app", + ) + log = log_file_name.read_text() + assert f"0,{SOLVE_LEVEL_ZERO},21,1380" == log.split("\n")[1] if __name__ == "__main__": From a74ce4f8e9739b5a8a9c87d228a739822d92a2b4 Mon Sep 17 00:00:00 2001 From: taufeeque9 <9taufeeque9@gmail.com> Date: Thu, 25 Apr 2024 19:47:26 +0530 Subject: [PATCH 53/60] fix test and lint --- envpool/sokoban/sokoban_py_envpool_test.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/envpool/sokoban/sokoban_py_envpool_test.py b/envpool/sokoban/sokoban_py_envpool_test.py index d3eb8ad1..6b80b821 100644 --- a/envpool/sokoban/sokoban_py_envpool_test.py +++ b/envpool/sokoban/sokoban_py_envpool_test.py @@ -17,7 +17,6 @@ import re import subprocess import sys -import tempfile import time import numpy as np @@ -268,11 +267,12 @@ def test_astar_log(tmp_path) -> None: log_file_name = tmp_path / "log_file.csv" subprocess.run( [ - "/root/go/bin/bazel", "run", "//envpool/sokoban:astar_log", "--", + "/root/go/bin/bazel", f"--output_base={str(tmp_path)}", "run", "//envpool/sokoban:astar_log", "--", level_file_name, str(log_file_name), "1" ], check=True, - cwd="/app", + cwd="/app/envpool", + env={"HOME": "/root", "PATH": "/opt/conda/bin:/usr/bin"}, ) log = log_file_name.read_text() assert f"0,{SOLVE_LEVEL_ZERO},21,1380" == log.split("\n")[1] From 3c2cf1392523f99b5cb5af6f7df96996569b4fa1 Mon Sep 17 00:00:00 2001 From: taufeeque9 <9taufeeque9@gmail.com> Date: Thu, 25 Apr 2024 19:52:16 +0530 Subject: [PATCH 54/60] fix lint --- envpool/sokoban/sokoban_py_envpool_test.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/envpool/sokoban/sokoban_py_envpool_test.py b/envpool/sokoban/sokoban_py_envpool_test.py index 6b80b821..d0a8d1d6 100644 --- a/envpool/sokoban/sokoban_py_envpool_test.py +++ b/envpool/sokoban/sokoban_py_envpool_test.py @@ -267,7 +267,8 @@ def test_astar_log(tmp_path) -> None: log_file_name = tmp_path / "log_file.csv" subprocess.run( [ - "/root/go/bin/bazel", f"--output_base={str(tmp_path)}", "run", "//envpool/sokoban:astar_log", "--", + "/root/go/bin/bazel", f"--output_base={str(tmp_path)}", "run", + "//envpool/sokoban:astar_log", "--", level_file_name, str(log_file_name), "1" ], check=True, From 9c6a5ccc1cef031686bc7245cfde4550f060e342 Mon Sep 17 00:00:00 2001 From: taufeeque9 <9taufeeque9@gmail.com> Date: Thu, 25 Apr 2024 19:58:18 +0530 Subject: [PATCH 55/60] fix lint --- envpool/sokoban/sokoban_py_envpool_test.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/envpool/sokoban/sokoban_py_envpool_test.py b/envpool/sokoban/sokoban_py_envpool_test.py index d0a8d1d6..198ff34a 100644 --- a/envpool/sokoban/sokoban_py_envpool_test.py +++ b/envpool/sokoban/sokoban_py_envpool_test.py @@ -268,12 +268,15 @@ def test_astar_log(tmp_path) -> None: subprocess.run( [ "/root/go/bin/bazel", f"--output_base={str(tmp_path)}", "run", - "//envpool/sokoban:astar_log", "--", - level_file_name, str(log_file_name), "1" + "//envpool/sokoban:astar_log", "--", level_file_name, + str(log_file_name), "1" ], check=True, cwd="/app/envpool", - env={"HOME": "/root", "PATH": "/opt/conda/bin:/usr/bin"}, + env={ + "HOME": "/root", + "PATH": "/opt/conda/bin:/usr/bin" + }, ) log = log_file_name.read_text() assert f"0,{SOLVE_LEVEL_ZERO},21,1380" == log.split("\n")[1] From 1abbeb821b1ce4426f3ee2a552f094a2df0cff4e Mon Sep 17 00:00:00 2001 From: taufeeque9 <9taufeeque9@gmail.com> Date: Tue, 28 May 2024 04:55:55 +0530 Subject: [PATCH 56/60] fix delayed reset bug --- envpool/sokoban/sokoban_envpool.cc | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/envpool/sokoban/sokoban_envpool.cc b/envpool/sokoban/sokoban_envpool.cc index c4e6affc..c7e6219b 100644 --- a/envpool/sokoban/sokoban_envpool.cc +++ b/envpool/sokoban/sokoban_envpool.cc @@ -142,7 +142,11 @@ void SokobanEnv::Step(const Action& action_dict) { reward_box_ * static_cast(prev_unmatched_boxes - unmatched_boxes_) + ((unmatched_boxes_ == 0) ? reward_finished_ : 0.0f); - WriteState(static_cast(reward)); + if (IsDone()) { + Reset(); + } else { + WriteState(static_cast(reward)); + } } constexpr std::array, kPlayerOnTarget + 1> kTinyColors{{ From 268c93dfdda32fedb9dc671a84617c4de1985577 Mon Sep 17 00:00:00 2001 From: taufeeque9 <9taufeeque9@gmail.com> Date: Tue, 28 May 2024 05:49:18 +0530 Subject: [PATCH 57/60] update the reset function --- envpool/sokoban/sokoban_envpool.cc | 17 +++++++++++------ envpool/sokoban/sokoban_envpool.h | 1 + 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/envpool/sokoban/sokoban_envpool.cc b/envpool/sokoban/sokoban_envpool.cc index c7e6219b..5e5db8c6 100644 --- a/envpool/sokoban/sokoban_envpool.cc +++ b/envpool/sokoban/sokoban_envpool.cc @@ -24,7 +24,7 @@ namespace sokoban { -void SokobanEnv::Reset() { +void SokobanEnv::ResetWithoutWrite() { const int max_episode_steps = spec_.config["max_episode_steps"_]; const int min_episode_steps = spec_.config["min_episode_steps"_]; current_max_episode_steps_ = @@ -52,6 +52,10 @@ void SokobanEnv::Reset() { } } current_step_ = 0; +} + +void SokobanEnv::Reset() { + ResetWithoutWrite(); WriteState(0.0f); } @@ -142,11 +146,8 @@ void SokobanEnv::Step(const Action& action_dict) { reward_box_ * static_cast(prev_unmatched_boxes - unmatched_boxes_) + ((unmatched_boxes_ == 0) ? reward_finished_ : 0.0f); - if (IsDone()) { - Reset(); - } else { - WriteState(static_cast(reward)); - } + + WriteState(static_cast(reward)); } constexpr std::array, kPlayerOnTarget + 1> kTinyColors{{ @@ -181,6 +182,10 @@ void SokobanEnv::WriteState(float reward) { throw std::runtime_error(msg.str()); } + if (IsDone()) { + ResetWithoutWrite(); + } + std::vector out(3 * world_.size()); for (int rgb = 0; rgb < 3; rgb++) { for (size_t i = 0; i < world_.size(); i++) { diff --git a/envpool/sokoban/sokoban_envpool.h b/envpool/sokoban/sokoban_envpool.h index a76951ef..f0138b20 100644 --- a/envpool/sokoban/sokoban_envpool.h +++ b/envpool/sokoban/sokoban_envpool.h @@ -115,6 +115,7 @@ class SokobanEnv : public Env { [[nodiscard]] uint8_t WorldAt(int x, int y) const; void WorldAssignAt(int x, int y, uint8_t value); + void ResetWithoutWrite(); }; using SokobanEnvPool = AsyncEnvPool; From 4098670966fa1e2b886943434c3ad119166102bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= Date: Mon, 27 May 2024 22:38:52 -0400 Subject: [PATCH 58/60] Explain why ResetWithoutWrite with comment --- envpool/sokoban/sokoban_envpool.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/envpool/sokoban/sokoban_envpool.cc b/envpool/sokoban/sokoban_envpool.cc index 5e5db8c6..94e874b6 100644 --- a/envpool/sokoban/sokoban_envpool.cc +++ b/envpool/sokoban/sokoban_envpool.cc @@ -183,6 +183,7 @@ void SokobanEnv::WriteState(float reward) { } if (IsDone()) { + // If this episode truncates or terminates, the observation should be the one for the next episode. ResetWithoutWrite(); } From 6b1b577d883ec50acebda31a5166906f3b449f81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A0=20Garriga-Alonso?= Date: Mon, 27 May 2024 23:45:06 -0400 Subject: [PATCH 59/60] Cap line to 80 --- envpool/sokoban/sokoban_envpool.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/envpool/sokoban/sokoban_envpool.cc b/envpool/sokoban/sokoban_envpool.cc index 94e874b6..8bbbea2c 100644 --- a/envpool/sokoban/sokoban_envpool.cc +++ b/envpool/sokoban/sokoban_envpool.cc @@ -183,7 +183,8 @@ void SokobanEnv::WriteState(float reward) { } if (IsDone()) { - // If this episode truncates or terminates, the observation should be the one for the next episode. + // If this episode truncates or terminates, the observation should be the + // one for the next episode. ResetWithoutWrite(); } From c25428b1bed59b5617834141c6c75819431f16ec Mon Sep 17 00:00:00 2001 From: taufeeque9 <9taufeeque9@gmail.com> Date: Fri, 28 Jun 2024 03:34:24 +0530 Subject: [PATCH 60/60] add fix in level loader and test --- envpool/sokoban/level_loader.cc | 19 +++++--- envpool/sokoban/level_loader.h | 8 ++-- envpool/sokoban/sokoban_envpool.h | 1 + envpool/sokoban/sokoban_py_envpool_test.py | 55 +++++++++++++++++++++- 4 files changed, 72 insertions(+), 11 deletions(-) diff --git a/envpool/sokoban/level_loader.cc b/envpool/sokoban/level_loader.cc index 56c6fdba..faed8197 100644 --- a/envpool/sokoban/level_loader.cc +++ b/envpool/sokoban/level_loader.cc @@ -29,10 +29,11 @@ namespace sokoban { LevelLoader::LevelLoader(const std::filesystem::path& base_path, bool load_sequentially, int n_levels_to_load, - int verbose) + int env_id, int num_envs, int verbose) : load_sequentially_(load_sequentially), n_levels_to_load_(n_levels_to_load), - cur_level_(levels_.begin()), + num_envs_(num_envs), + cur_level_(env_id), verbose(verbose) { if (std::filesystem::is_regular_file(base_path)) { level_file_paths_.push_back(base_path); @@ -49,6 +50,10 @@ LevelLoader::LevelLoader(const std::filesystem::path& base_path, }); } cur_file_ = level_file_paths_.begin(); + if (n_levels_to_load_ > 0 && n_levels_to_load_ % num_envs_ != 0) { + throw std::runtime_error( + "n_levels_to_load must be a multiple of num_envs."); + } } static const std::array kPrintLevelKey{ @@ -183,15 +188,15 @@ std::vector::iterator LevelLoader::GetLevel(std::mt19937& gen) { if (n_levels_to_load_ > 0 && levels_loaded_ >= n_levels_to_load_) { throw std::runtime_error("Loaded all requested levels."); } - if (cur_level_ == levels_.end()) { + while (cur_level_ >= levels_.size()) { + cur_level_ -= levels_.size(); LoadFile(gen); - cur_level_ = levels_.begin(); - if (cur_level_ == levels_.end()) { + if (levels_.size() == 0) { throw std::runtime_error("No levels loaded."); } } - auto out = cur_level_; - cur_level_++; + auto out = levels_.begin() + cur_level_; + cur_level_ += num_envs_; levels_loaded_++; return out; } diff --git a/envpool/sokoban/level_loader.h b/envpool/sokoban/level_loader.h index ced5e60a..d8a07c16 100644 --- a/envpool/sokoban/level_loader.h +++ b/envpool/sokoban/level_loader.h @@ -39,8 +39,10 @@ class LevelLoader { bool load_sequentially_; int n_levels_to_load_; int levels_loaded_{0}; + int env_id_{0}; + int num_envs_{1}; std::vector levels_{0}; - std::vector::iterator cur_level_; + int cur_level_; std::vector level_file_paths_{0}; std::vector::iterator cur_file_; void LoadFile(std::mt19937& gen); @@ -50,8 +52,8 @@ class LevelLoader { std::vector::iterator GetLevel(std::mt19937& gen); explicit LevelLoader(const std::filesystem::path& base_path, - bool load_sequentially, int n_levels_to_load, - int verbose = 0); + bool load_sequentially, int n_levels_to_load, int env_id, + int num_envs, int verbose = 0); }; void PrintLevel(std::ostream& os, const SokobanLevel& vec); diff --git a/envpool/sokoban/sokoban_envpool.h b/envpool/sokoban/sokoban_envpool.h index f0138b20..d2cd597d 100644 --- a/envpool/sokoban/sokoban_envpool.h +++ b/envpool/sokoban/sokoban_envpool.h @@ -70,6 +70,7 @@ class SokobanEnv : public Env { levels_dir_{static_cast(spec.config["levels_dir"_])}, level_loader_(levels_dir_, spec.config["load_sequentially"_], static_cast(spec.config["n_levels_to_load"_]), + env_id, static_cast(spec.config["num_envs"_]), static_cast(spec.config["verbose"_])), world_(kWall, static_cast(dim_room_ * dim_room_)), verbose_(static_cast(spec.config["verbose"_])), diff --git a/envpool/sokoban/sokoban_py_envpool_test.py b/envpool/sokoban/sokoban_py_envpool_test.py index 198ff34a..969eb871 100644 --- a/envpool/sokoban/sokoban_py_envpool_test.py +++ b/envpool/sokoban/sokoban_py_envpool_test.py @@ -25,7 +25,8 @@ import envpool # noqa: F401 import envpool.sokoban.registration from envpool.sokoban.sokoban_envpool import _SokobanEnvSpec - +from pathlib import Path +from typing import List def test_config() -> None: ref_config_keys = [ @@ -261,6 +262,58 @@ def test_solved_level_does_not_truncate(solve_on_time: bool): _, _, term, trunc, _ = env.step(make_1d_array(wrong_action)) assert not term and not trunc, "Level should reset correctly" +def read_levels_file(fpath: Path) -> List[List[str]]: + maps = [] + current_map = [] + with open(fpath, "r") as sf: + for line in sf.readlines(): + if ";" in line and current_map: + maps.append(current_map) + current_map = [] + if "#" == line[0]: + current_map.append(line.strip()) + + maps.append(current_map) + return maps + +def test_load_sequentially_with_multiple_envs() -> None: + levels_dir = "/app/envpool/sokoban/sample_levels" + files = glob.glob(f"{levels_dir}/*.txt") + levels_by_files = [] + total_levels, num_envs = 8, 2 + for file in sorted(files): + levels = read_levels_file(file) + levels_by_files.extend(levels) + assert len(levels_by_files) == total_levels, "8 levels stored in files." + + env = envpool.make( + "Sokoban-v0", + env_type="gymnasium", + num_envs=num_envs, + batch_size=num_envs, + max_episode_steps=60, + min_episode_steps=60, + levels_dir=levels_dir, + load_sequentially=True, + n_levels_to_load=total_levels, + verbose=2, + ) + dim_room = env.spec.config.dim_room + printed_obs = [] + for _ in range(total_levels // num_envs): + obs, _ = env.reset() + assert obs.shape == ( + num_envs, + 3, + dim_room, + dim_room, + ), f"obs shape: {obs.shape}" + for idx in range(num_envs): + printed_obs.append(print_obs(obs[idx])) + for i, level in enumerate(levels_by_files): + for j, line in enumerate(level): + assert printed_obs[i][j] == line, f"Level {i} is not loaded correctly." + def test_astar_log(tmp_path) -> None: level_file_name = "/app/envpool/sokoban/sample_levels/small.txt"