From 700f646626c46b34292ae72b7007305afc85e8c0 Mon Sep 17 00:00:00 2001 From: John Agapiou Date: Fri, 25 Nov 2022 17:52:58 -0800 Subject: [PATCH] Release version 2.0.0 https://github.com/deepmind/meltingpot/issues/81 PiperOrigin-RevId: 490966899 Change-Id: I7600da4f421ae3385829cc96403ce4824140dcce --- CHANGELOG.md | 20 + install.sh | 4 +- .../allelopathic_harvest/components.lua | 5 +- .../lua/levels/allelopathic_harvest/init.lua | 6 +- .../lua/levels/boat_race/components.lua | 887 +++ meltingpot/lua/levels/boat_race/init.lua | 65 + meltingpot/lua/levels/clean_up/components.lua | 137 +- meltingpot/lua/levels/clean_up/init.lua | 6 +- meltingpot/lua/levels/coins/components.lua | 341 ++ meltingpot/lua/levels/coins/init.lua | 37 + .../collaborative_cooking/components.lua | 36 +- .../lua/levels/collaborative_cooking/init.lua | 4 +- .../lua/levels/commons_harvest/components.lua | 13 +- .../lua/levels/commons_harvest/init.lua | 4 +- .../lua/levels/coop_mining/components.lua | 293 + meltingpot/lua/levels/coop_mining/init.lua | 37 + meltingpot/lua/levels/daycare/components.lua | 741 +++ meltingpot/lua/levels/daycare/init.lua | 37 + .../externality_mushrooms/components.lua | 387 ++ .../lua/levels/externality_mushrooms/init.lua | 37 + .../factory_of_the_commons/components.lua | 838 +++ .../levels/factory_of_the_commons/init.lua | 79 + .../levels/gift_refinements/components.lua | 405 ++ .../lua/levels/gift_refinements/init.lua | 37 + .../lua/levels/grid_land/components.lua | 134 +- meltingpot/lua/levels/grid_land/init.lua | 4 +- .../levels/paintball/shared_components.lua | 70 +- .../components.lua | 21 +- .../init.lua | 7 +- .../components.lua | 119 +- .../init.lua | 7 +- .../lua/levels/predator_prey/components.lua | 842 +++ meltingpot/lua/levels/predator_prey/init.lua | 61 + .../lua/levels/stamina/shared_components.lua | 444 ++ .../lua/levels/territory/components.lua | 128 +- meltingpot/lua/levels/territory/init.lua | 24 +- .../lua/levels/the_matrix/components.lua | 657 ++- meltingpot/lua/levels/the_matrix/init.lua | 2 +- meltingpot/lua/levels/trade/components.lua | 1052 ++++ meltingpot/lua/levels/trade/init.lua | 43 + meltingpot/lua/modules/base_simulation.lua | 3 - meltingpot/lua/modules/base_simulation_v2.lua | 624 -- meltingpot/python/bot.py | 72 +- meltingpot/python/bot_test.py | 27 +- meltingpot/python/configs/bots/__init__.py | 3750 ++++++++++-- .../python/configs/bots/bot_configs_test.py | 4 +- .../python/configs/scenarios/__init__.py | 5188 +++++++++++++---- .../scenarios/scenario_configs_test.py | 26 +- .../python/configs/substrates/__init__.py | 110 +- .../substrates/allelopathic_harvest.py | 394 +- .../substrates/allelopathic_harvest__open.py | 84 + ...ach_or_stravinsky_in_the_matrix__arena.py} | 180 +- ...h_or_stravinsky_in_the_matrix__repeated.py | 565 ++ .../python/configs/substrates/boat_race.py | 900 +++ .../boat_race__eight_races.py} | 21 +- .../chemistry__three_metabolic_cycles.py | 502 ++ ...bolic_cycles_with_plentiful_distractors.py | 516 ++ ....py => chemistry__two_metabolic_cycles.py} | 225 +- ...__two_metabolic_cycles_with_distractors.py | 446 ++ .../chemistry_branched_chain_reaction.py | 317 - ...rix.py => chicken_in_the_matrix__arena.py} | 172 +- .../chicken_in_the_matrix__repeated.py | 605 ++ .../python/configs/substrates/clean_up.py | 429 +- meltingpot/python/configs/substrates/coins.py | 526 ++ .../substrates/collaborative_cooking.py | 226 +- .../collaborative_cooking__asymmetric.py | 76 + .../collaborative_cooking__circuit.py | 76 + .../collaborative_cooking__cramped.py | 72 + .../collaborative_cooking__crowded.py | 81 + .../collaborative_cooking__figure_eight.py | 78 + .../collaborative_cooking__forced.py | 77 + .../substrates/collaborative_cooking__ring.py | 74 + .../collaborative_cooking_impassable.py | 39 - .../collaborative_cooking_passable.py | 35 - ...t_closed.py => commons_harvest__closed.py} | 299 +- ...rvest_open.py => commons_harvest__open.py} | 286 +- ...hip.py => commons_harvest__partnership.py} | 300 +- .../python/configs/substrates/coop_mining.py | 502 ++ .../python/configs/substrates/daycare.py | 937 +++ .../substrates/externality_mushrooms.py | 1077 ++++ .../externality_mushrooms__dense.py | 91 + .../configs/substrates/factory_commons.py | 2407 ++++++++ .../substrates/factory_commons__either_or.py | 108 + .../python/configs/substrates/fruit_market.py | 1229 ++++ .../fruit_market__concentric_rivers.py | 116 + .../configs/substrates/gift_refinements.py | 498 ++ ...flag.py => paintball__capture_the_flag.py} | 88 +- ...hill.py => paintball__king_of_the_hill.py} | 205 +- .../configs/substrates/predator_prey.py | 1718 ++++++ .../substrates/predator_prey__alley_hunt.py | 104 + .../configs/substrates/predator_prey__open.py | 113 + .../substrates/predator_prey__orchard.py | 113 + .../predator_prey__random_forest.py | 120 + ...prisoners_dilemma_in_the_matrix__arena.py} | 152 +- ...isoners_dilemma_in_the_matrix__repeated.py | 605 ++ ...pure_coordination_in_the_matrix__arena.py} | 157 +- ...re_coordination_in_the_matrix__repeated.py | 610 ++ ...able_coordination_in_the_matrix__arena.py} | 157 +- ...le_coordination_in_the_matrix__repeated.py | 610 ++ .../substrates/reaction_graph_utils.py | 613 ++ ...ing_with_scissors_in_the_matrix__arena.py} | 202 +- ..._with_scissors_in_the_matrix__one_shot.py} | 179 +- ...g_with_scissors_in_the_matrix__repeated.py | 644 ++ ...x.py => stag_hunt_in_the_matrix__arena.py} | 190 +- .../stag_hunt_in_the_matrix__repeated.py | 604 ++ .../python/configs/substrates/territory.py | 893 +++ .../substrates/territory__inside_out.py | 111 + .../configs/substrates/territory__open.py | 121 + .../configs/substrates/territory__rooms.py | 106 + .../configs/substrates/territory_open.py | 537 -- .../configs/substrates/territory_rooms.py | 524 -- .../python/configs/substrates/the_matrix.py | 164 + .../play_allelopathic_harvest.py | 22 +- .../play_anything_in_the_matrix.py | 98 +- .../python/human_players/play_boat_race.py | 96 + .../python/human_players/play_chemistry.py | 90 + .../python/human_players/play_clean_up.py | 38 +- meltingpot/python/human_players/play_coins.py | 83 + .../play_collaborative_cooking.py | 60 +- .../human_players/play_commons_harvest.py | 29 +- ..._paintball_game.py => play_coop_mining.py} | 27 +- .../python/human_players/play_daycare.py | 82 + .../play_externality_mushrooms.py | 99 + .../human_players/play_factory_commons.py | 88 + .../python/human_players/play_fruit_market.py | 130 + ..._grid_land.py => play_gift_refinements.py} | 28 +- .../python/human_players/play_level_test.py | 190 +- .../python/human_players/play_paintball.py | 86 + .../human_players/play_predator_and_prey.py | 94 + .../python/human_players/play_territory.py | 26 +- meltingpot/python/scenario.py | 136 +- meltingpot/python/scenario_test.py | 24 +- meltingpot/python/substrate.py | 62 +- meltingpot/python/substrate_test.py | 19 +- .../utils/policies/saved_model_policy.py | 136 +- .../utils/policies/saved_model_policy_v2.py | 210 - .../alternator.py | 0 .../alternator_test.py | 4 +- .../python/utils/puppeteers/clean_up.py | 197 +- .../clean_up_test.py | 4 +- .../{puppeteers_v2 => puppeteers}/coins.py | 0 .../coins_test.py | 4 +- .../coordination_in_the_matrix.py | 2 +- .../coordination_in_the_matrix_test.py | 6 +- .../fixed_goal.py | 0 .../fixed_goal_test.py | 4 +- .../gift_refinements.py | 0 .../gift_refinements_test.py | 4 +- .../python/utils/puppeteers/in_the_matrix.py | 621 +- .../in_the_matrix_test.py | 4 +- .../running_with_scissors_in_the_matrix.py | 2 +- ...unning_with_scissors_in_the_matrix_test.py | 6 +- .../testutils.py | 0 .../python/utils/puppeteers_v2/clean_up.py | 141 - .../utils/puppeteers_v2/in_the_matrix.py | 590 -- .../utils/scenarios/substrate_transforms.py | 86 - .../python/utils/substrates/builder_test.py | 15 +- setup.py | 2 +- 158 files changed, 38489 insertions(+), 7237 deletions(-) create mode 100644 meltingpot/lua/levels/boat_race/components.lua create mode 100644 meltingpot/lua/levels/boat_race/init.lua create mode 100644 meltingpot/lua/levels/coins/components.lua create mode 100644 meltingpot/lua/levels/coins/init.lua create mode 100644 meltingpot/lua/levels/coop_mining/components.lua create mode 100644 meltingpot/lua/levels/coop_mining/init.lua create mode 100644 meltingpot/lua/levels/daycare/components.lua create mode 100644 meltingpot/lua/levels/daycare/init.lua create mode 100644 meltingpot/lua/levels/externality_mushrooms/components.lua create mode 100644 meltingpot/lua/levels/externality_mushrooms/init.lua create mode 100644 meltingpot/lua/levels/factory_of_the_commons/components.lua create mode 100644 meltingpot/lua/levels/factory_of_the_commons/init.lua create mode 100644 meltingpot/lua/levels/gift_refinements/components.lua create mode 100644 meltingpot/lua/levels/gift_refinements/init.lua rename meltingpot/lua/levels/{paintball_capture_the_flag => paintball__capture_the_flag}/components.lua (95%) rename meltingpot/lua/levels/{paintball_capture_the_flag => paintball__capture_the_flag}/init.lua (93%) rename meltingpot/lua/levels/{paintball_king_of_the_hill => paintball__king_of_the_hill}/components.lua (77%) rename meltingpot/lua/levels/{paintball_king_of_the_hill => paintball__king_of_the_hill}/init.lua (93%) create mode 100644 meltingpot/lua/levels/predator_prey/components.lua create mode 100644 meltingpot/lua/levels/predator_prey/init.lua create mode 100644 meltingpot/lua/levels/stamina/shared_components.lua create mode 100644 meltingpot/lua/levels/trade/components.lua create mode 100644 meltingpot/lua/levels/trade/init.lua delete mode 100644 meltingpot/lua/modules/base_simulation_v2.lua create mode 100644 meltingpot/python/configs/substrates/allelopathic_harvest__open.py rename meltingpot/python/configs/substrates/{bach_or_stravinsky_in_the_matrix.py => bach_or_stravinsky_in_the_matrix__arena.py} (79%) create mode 100644 meltingpot/python/configs/substrates/bach_or_stravinsky_in_the_matrix__repeated.py create mode 100644 meltingpot/python/configs/substrates/boat_race.py rename meltingpot/python/configs/{__init__.py => substrates/boat_race__eight_races.py} (51%) create mode 100644 meltingpot/python/configs/substrates/chemistry__three_metabolic_cycles.py create mode 100644 meltingpot/python/configs/substrates/chemistry__three_metabolic_cycles_with_plentiful_distractors.py rename meltingpot/python/configs/substrates/{chemistry_metabolic_cycles.py => chemistry__two_metabolic_cycles.py} (70%) create mode 100644 meltingpot/python/configs/substrates/chemistry__two_metabolic_cycles_with_distractors.py delete mode 100644 meltingpot/python/configs/substrates/chemistry_branched_chain_reaction.py rename meltingpot/python/configs/substrates/{chicken_in_the_matrix.py => chicken_in_the_matrix__arena.py} (80%) create mode 100644 meltingpot/python/configs/substrates/chicken_in_the_matrix__repeated.py create mode 100644 meltingpot/python/configs/substrates/coins.py create mode 100644 meltingpot/python/configs/substrates/collaborative_cooking__asymmetric.py create mode 100644 meltingpot/python/configs/substrates/collaborative_cooking__circuit.py create mode 100644 meltingpot/python/configs/substrates/collaborative_cooking__cramped.py create mode 100644 meltingpot/python/configs/substrates/collaborative_cooking__crowded.py create mode 100644 meltingpot/python/configs/substrates/collaborative_cooking__figure_eight.py create mode 100644 meltingpot/python/configs/substrates/collaborative_cooking__forced.py create mode 100644 meltingpot/python/configs/substrates/collaborative_cooking__ring.py delete mode 100644 meltingpot/python/configs/substrates/collaborative_cooking_impassable.py delete mode 100644 meltingpot/python/configs/substrates/collaborative_cooking_passable.py rename meltingpot/python/configs/substrates/{commons_harvest_closed.py => commons_harvest__closed.py} (67%) rename meltingpot/python/configs/substrates/{commons_harvest_open.py => commons_harvest__open.py} (70%) rename meltingpot/python/configs/substrates/{commons_harvest_partnership.py => commons_harvest__partnership.py} (69%) create mode 100644 meltingpot/python/configs/substrates/coop_mining.py create mode 100644 meltingpot/python/configs/substrates/daycare.py create mode 100644 meltingpot/python/configs/substrates/externality_mushrooms.py create mode 100644 meltingpot/python/configs/substrates/externality_mushrooms__dense.py create mode 100644 meltingpot/python/configs/substrates/factory_commons.py create mode 100644 meltingpot/python/configs/substrates/factory_commons__either_or.py create mode 100644 meltingpot/python/configs/substrates/fruit_market.py create mode 100644 meltingpot/python/configs/substrates/fruit_market__concentric_rivers.py create mode 100644 meltingpot/python/configs/substrates/gift_refinements.py rename meltingpot/python/configs/substrates/{capture_the_flag.py => paintball__capture_the_flag.py} (93%) rename meltingpot/python/configs/substrates/{king_of_the_hill.py => paintball__king_of_the_hill.py} (81%) create mode 100644 meltingpot/python/configs/substrates/predator_prey.py create mode 100644 meltingpot/python/configs/substrates/predator_prey__alley_hunt.py create mode 100644 meltingpot/python/configs/substrates/predator_prey__open.py create mode 100644 meltingpot/python/configs/substrates/predator_prey__orchard.py create mode 100644 meltingpot/python/configs/substrates/predator_prey__random_forest.py rename meltingpot/python/configs/substrates/{prisoners_dilemma_in_the_matrix.py => prisoners_dilemma_in_the_matrix__arena.py} (82%) create mode 100644 meltingpot/python/configs/substrates/prisoners_dilemma_in_the_matrix__repeated.py rename meltingpot/python/configs/substrates/{pure_coordination_in_the_matrix.py => pure_coordination_in_the_matrix__arena.py} (84%) create mode 100644 meltingpot/python/configs/substrates/pure_coordination_in_the_matrix__repeated.py rename meltingpot/python/configs/substrates/{rationalizable_coordination_in_the_matrix.py => rationalizable_coordination_in_the_matrix__arena.py} (84%) create mode 100644 meltingpot/python/configs/substrates/rationalizable_coordination_in_the_matrix__repeated.py create mode 100644 meltingpot/python/configs/substrates/reaction_graph_utils.py rename meltingpot/python/configs/substrates/{arena_running_with_scissors_in_the_matrix.py => running_with_scissors_in_the_matrix__arena.py} (76%) rename meltingpot/python/configs/substrates/{running_with_scissors_in_the_matrix.py => running_with_scissors_in_the_matrix__one_shot.py} (82%) create mode 100644 meltingpot/python/configs/substrates/running_with_scissors_in_the_matrix__repeated.py rename meltingpot/python/configs/substrates/{stag_hunt_in_the_matrix.py => stag_hunt_in_the_matrix__arena.py} (78%) create mode 100644 meltingpot/python/configs/substrates/stag_hunt_in_the_matrix__repeated.py create mode 100644 meltingpot/python/configs/substrates/territory.py create mode 100644 meltingpot/python/configs/substrates/territory__inside_out.py create mode 100644 meltingpot/python/configs/substrates/territory__open.py create mode 100644 meltingpot/python/configs/substrates/territory__rooms.py delete mode 100644 meltingpot/python/configs/substrates/territory_open.py delete mode 100644 meltingpot/python/configs/substrates/territory_rooms.py create mode 100644 meltingpot/python/configs/substrates/the_matrix.py create mode 100644 meltingpot/python/human_players/play_boat_race.py create mode 100644 meltingpot/python/human_players/play_chemistry.py create mode 100644 meltingpot/python/human_players/play_coins.py rename meltingpot/python/human_players/{play_any_paintball_game.py => play_coop_mining.py} (71%) create mode 100644 meltingpot/python/human_players/play_daycare.py create mode 100644 meltingpot/python/human_players/play_externality_mushrooms.py create mode 100644 meltingpot/python/human_players/play_factory_commons.py create mode 100644 meltingpot/python/human_players/play_fruit_market.py rename meltingpot/python/human_players/{play_grid_land.py => play_gift_refinements.py} (68%) create mode 100644 meltingpot/python/human_players/play_paintball.py create mode 100644 meltingpot/python/human_players/play_predator_and_prey.py delete mode 100644 meltingpot/python/utils/policies/saved_model_policy_v2.py rename meltingpot/python/utils/{puppeteers_v2 => puppeteers}/alternator.py (100%) rename meltingpot/python/utils/{puppeteers_v2 => puppeteers}/alternator_test.py (94%) rename meltingpot/python/utils/{puppeteers_v2 => puppeteers}/clean_up_test.py (98%) rename meltingpot/python/utils/{puppeteers_v2 => puppeteers}/coins.py (100%) rename meltingpot/python/utils/{puppeteers_v2 => puppeteers}/coins_test.py (98%) rename meltingpot/python/utils/{puppeteers_v2 => puppeteers}/coordination_in_the_matrix.py (96%) rename meltingpot/python/utils/{puppeteers_v2 => puppeteers}/coordination_in_the_matrix_test.py (93%) rename meltingpot/python/utils/{puppeteers_v2 => puppeteers}/fixed_goal.py (100%) rename meltingpot/python/utils/{puppeteers_v2 => puppeteers}/fixed_goal_test.py (90%) rename meltingpot/python/utils/{puppeteers_v2 => puppeteers}/gift_refinements.py (100%) rename meltingpot/python/utils/{puppeteers_v2 => puppeteers}/gift_refinements_test.py (94%) rename meltingpot/python/utils/{puppeteers_v2 => puppeteers}/in_the_matrix_test.py (99%) rename meltingpot/python/utils/{puppeteers_v2 => puppeteers}/running_with_scissors_in_the_matrix.py (96%) rename meltingpot/python/utils/{puppeteers_v2 => puppeteers}/running_with_scissors_in_the_matrix_test.py (93%) rename meltingpot/python/utils/{puppeteers_v2 => puppeteers}/testutils.py (100%) delete mode 100644 meltingpot/python/utils/puppeteers_v2/clean_up.py delete mode 100644 meltingpot/python/utils/puppeteers_v2/in_the_matrix.py delete mode 100644 meltingpot/python/utils/scenarios/substrate_transforms.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 7771d6ef..3ba360fc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,26 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/) and this project adheres to [Semantic Versioning](http://semver.org/). +## [2.0.0] - 2022-11-25 + +Melting Pot Version 2.0.0 release. See +[Melting Pot 2.0 Tech Report](https://storage.googleapis.com/dm-meltingpot/meltingpot-tech-report-2.0.0.pdf) +for detailed information on the new substrates, bots, and scenarios. + +### Changed + +- Removed all v1 scenarios, bots, and substrates and replaced with new versions. +- Scenarios now support heterogenous roles, which must be specified at build + time. +- Various improvements to `examples` and their documentation. + +### Added + +- New puppeteers and policies to implement new bots. +- New utils to handle sprites, colors, and maps. +- Mocks for use in testing. + + ## [1.0.4] - 2022-08-22 ### Changed diff --git a/install.sh b/install.sh index fad052bf..94d547b1 100755 --- a/install.sh +++ b/install.sh @@ -87,7 +87,7 @@ function test_dmlab2d() { function install_meltingpot() { echo -e "\nDownloading assets..." - curl -L https://storage.googleapis.com/dm-meltingpot/meltingpot-assets-1.0.0.tar.gz \ + curl -L https://storage.googleapis.com/dm-meltingpot/meltingpot-assets-2.0.0.tar.gz \ | tar -xz --directory=meltingpot echo -e "\nInstalling meltingpot..." @@ -99,7 +99,7 @@ function install_meltingpot() { function test_meltingpot() { echo -e "\nTesting meltingpot..." pip install pytest-xdist - pytest -n auto -ra --durations=10 meltingpot + pytest -n auto -rax --durations=10 meltingpot } diff --git a/meltingpot/lua/levels/allelopathic_harvest/components.lua b/meltingpot/lua/levels/allelopathic_harvest/components.lua index 3f57ea1f..86d3c74e 100644 --- a/meltingpot/lua/levels/allelopathic_harvest/components.lua +++ b/meltingpot/lua/levels/allelopathic_harvest/components.lua @@ -1,4 +1,4 @@ ---[[ Copyright 2020 DeepMind Technologies Limited. +--[[ Copyright 2022 DeepMind Technologies Limited. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -226,6 +226,7 @@ function Regrowth:__init__(kwargs) self._config.baseRate = kwargs.baseRate self._config.cubicRate = kwargs.cubicRate self._config.linearGrowth = kwargs.linearGrowth + self:resetCountdown() end function Regrowth:onStateChange(previousState) @@ -552,7 +553,7 @@ function ColorZapper:addHits(worldConfig) layer = self._config.layerNames[idx], sprite = self._config.spriteNames[idx], } - table.insert(worldConfig.renderOrder, self._config.layerNames[idx]) + component.insertIfNotPresent(worldConfig.renderOrder, self._config.layerNames[idx]) end end diff --git a/meltingpot/lua/levels/allelopathic_harvest/init.lua b/meltingpot/lua/levels/allelopathic_harvest/init.lua index a3953d91..fdc7b90a 100644 --- a/meltingpot/lua/levels/allelopathic_harvest/init.lua +++ b/meltingpot/lua/levels/allelopathic_harvest/init.lua @@ -1,4 +1,4 @@ ---[[ Copyright 2020 DeepMind Technologies Limited. +--[[ Copyright 2022 DeepMind Technologies Limited. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -13,13 +13,13 @@ See the License for the specific language governing permissions and limitations under the License. ]] --- Entry point lua file for the allelopathic_harvest level. +-- Entry point lua file for the allelopathic_harvest substrate. local meltingpot = 'meltingpot.lua.modules.' local api_factory = require(meltingpot .. 'api_factory') local simulation = require(meltingpot .. 'base_simulation') --- Required to be able to use the components in the level +-- Required to be able to use the components in the substrate local component_library = require(meltingpot .. 'component_library') local avatar_library = require(meltingpot .. 'avatar_library') local components = require 'components' diff --git a/meltingpot/lua/levels/boat_race/components.lua b/meltingpot/lua/levels/boat_race/components.lua new file mode 100644 index 00000000..fda4acf8 --- /dev/null +++ b/meltingpot/lua/levels/boat_race/components.lua @@ -0,0 +1,887 @@ +--[[ Copyright 2022 DeepMind Technologies Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +]] + +local args = require 'common.args' +local class = require 'common.class' +local helpers = require 'common.helpers' +local log = require 'common.log' +local random = require 'system.random' +local tensor = require 'system.tensor' +local events = require 'system.events' + +local meltingpot = 'meltingpot.lua.modules.' +local component = require(meltingpot .. 'component') +local component_registry = require(meltingpot .. 'component_registry') + + +-- The BoatManager allows handling the objects comprising the boat as a single +-- entity. This component is added to the left-seat, and connects all the needed +-- objects so they can move as a unit. It also manages the effects of rowing. +local BoatManager = class.Class(component.Component) + +function BoatManager:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('BoatManager')}, + {'flailEffectiveness', args.default(0.1), args.numberType}, + {'mismatchStrokePenalty', args.default(-0.5), args.numberType}, + {'mismatchRolePseudoreward', args.default(-5.0), args.numberType}, + {'matchRolePseudoreward', args.default(2.0), args.numberType}, + }) + BoatManager.Base.__init__(self, kwargs) + + self._otherSide = {L = 'R', R = 'L'} + self._config.flailEffectiveness = kwargs.flailEffectiveness + self._config.mismatchStrokePenalty = kwargs.mismatchStrokePenalty + self._config.mismatchRolePseudoreward = kwargs.mismatchRolePseudoreward + self._config.matchRolePseudoreward = kwargs.matchRolePseudoreward +end + +function BoatManager:reset() + self._rowers = {} + self._strokes = {} + self._seats = {} +end + +function BoatManager:postStart() + -- /\ Front of boat + -- p;:q Oars and seats + -- LJ Back of boat + local transform = self.gameObject:getComponent('Transform') + local position = transform:getPosition() + -- Store the reference of seats on the boat manager + table.insert(self._seats, self.gameObject) + table.insert(self._seats, transform:queryPosition('lowerPhysical', + {position[1] + 1, position[2]})) + -- Calculate the key coordination of the boat + local upperLeft = {position[1] - 1, position[2] - 1} + local lowerRight = {position[1] + 2, position[2] + 1} + -- Oars are on upperPhysical + local oarsBoat = transform:queryRectangle('overlay', upperLeft, lowerRight) + -- Hull is on lowerPhysical + self._lowerBoat = transform:queryRectangle( + 'lowerPhysical', upperLeft, lowerRight) + for _, go in pairs(oarsBoat) do + self.gameObject:connect(go) + end + for _, go in pairs(self._lowerBoat) do + self.gameObject:connect(go) + end + local scene = self.gameObject.simulation:getSceneObject() + if scene:hasComponent("RaceManager") then + self._raceManager = scene:getComponent("RaceManager") + end +end + +function BoatManager:reportRower(seatSide, avatar) + self._rowers[seatSide] = avatar + log.v(1, "Added rower to side", seatSide, "to BoatManager.") + events:add('player_added_to_side', 'dict', + 'player_index', avatar:getComponent("Avatar"):getIndex(), + 'seatSide', seatSide) +end + +function BoatManager:_reportAndClearStrokes() + local globalStrokes = self.gameObject.simulation:getSceneObject():getComponent( + "GlobalRaceTracker") + for side, stroke in pairs(self._strokes) do + local index = self._rowers[side]:getComponent('Avatar'):getIndex() + local avatarStrokes = self._rowers[side]:getComponent( + "StrokesTracker") + globalStrokes:countStroke(index, stroke) + avatarStrokes:countStroke(stroke) + end + self._strokes = {} -- Empty strokes +end + +function BoatManager:registerUpdaters(updaterRegistry) + updaterRegistry:registerUpdater{ + updateFn = function() + -- Apply agent role pseudo-rewards + for side, stroke in pairs(self._strokes) do + local role = self._rowers[side]:getComponent('Rowing'):getRole() + log.v(1, "Processing role pseudo-rewards: ", side, stroke, role) + if role ~= 'none' and role ~= stroke then + self._rowers[side]:getComponent('Avatar'):addReward( + self._config.mismatchRolePseudoreward) + elseif role == stroke then + self._rowers[side]:getComponent('Avatar'):addReward( + self._config.matchRolePseudoreward) + end + end + if self._strokes.L == 'row' and self._strokes.R == 'row' then + -- Both efficient rowing causes deterministic movement. + self.gameObject:moveAbs(self._raceManager:getRaceDirection()) + elseif self._strokes.L == 'flail' or self._strokes.R == 'flail' then + -- There are 2 independent things that happen if either player flails: + -- 1. The boat might move forward (stochastically) + if random:uniformReal(0, 1) < self._config.flailEffectiveness then + self.gameObject:moveAbs(self._raceManager:getRaceDirection()) + end + -- 2. Another player rowing gets penalised for mismatched stroke + for side, stroke in pairs(self._strokes) do + if stroke == 'row' then + self._rowers[side]:getComponent('Avatar'):addReward( + self._config.mismatchStrokePenalty) + end + end + end + self:_reportAndClearStrokes() + end, + } +end + +function BoatManager:isFull() + return self._rowers["L"] and self._rowers["R"] +end + +function BoatManager:_registerPair(pairsTensor) + if self._rowers["L"] == nil or self._rowers["R"] == nil then + return + end + local numPlayers = self.gameObject.simulation:getNumPlayers() + local posX = self._rowers["L"]:getPosition()[1] + -- Find out which boat is this pair at. + -- TODO(b/260154779): Is there a more elegant way to do this? We'd have to + -- know too much from the ASCII map to do it programatically. :/ + local boat = 1 + if posX >= 10 and posX < 14 then + boat = 2 + elseif posX >= 14 then + boat = 3 + end + events:add('pair_started_race', 'dict', + 'race_number', self._raceManager:getRaceNumber(), + 'boat_index', boat, + 'player_L_index', self._rowers["L"]:getComponent("Avatar"):getIndex(), + 'player_R_index', self._rowers["R"]:getComponent("Avatar"):getIndex()) + -- Register the pair on the appropritate boat spot in the tensor. + pairsTensor(boat, 1):val( + self._rowers["L"]:getComponent("Avatar"):getIndex()) + pairsTensor(boat, 2):val( + self._rowers["R"]:getComponent("Avatar"):getIndex()) +end + +function BoatManager:setBoatStateFull() + for _, boat_part in pairs(self._lowerBoat) do + if boat_part:getState() == "boat" then + boat_part:setState("boatFull") + end + end + -- Register the pair starting the race. + local raceStart = self.gameObject.simulation:getSceneObject():getComponent( + "GlobalRaceTracker").raceStart + -- TODO(b/260154977): Sometimes this can happen twice in a single race. + -- I suspect it is due to other agents entering the seat once it's taken. + -- Should be fixed by adding a blocking goal object. + self:_registerPair(raceStart) +end + +function BoatManager:setBoatStateNormal() + for _, boat_part in pairs(self._lowerBoat) do + if boat_part:getState() == "boatFull" then + boat_part:setState("boat") + end + end +end + +function BoatManager:oarAction(seatSide, style) + self._strokes[seatSide] = style +end + +function BoatManager:disembarkRowers(targetY) + for side, rower in pairs(self._rowers) do + log.v(1, "Disembark rower: ", side) + rower:getComponent("Rowing"):setSeat(nil) + if rower:hasComponent("Crown") then + local crown = rower:getComponent("Crown"):getCrownOverlay() + crown:disconnect() + rower:disconnect() + rower:connect(crown) + else + rower:disconnect() + end + local targetPosition = {} + targetPosition[1] = rower:getPosition()[1] + targetPosition[2] = targetY + rower:teleport(targetPosition, rower:getOrientation()) + end +end + +-- The race manager lives in the Scene object, and manages the start of a race +-- by coordinating the barriers and semaphores. +local RaceManager = class.Class(component.Component) + +function RaceManager:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('RaceManager')}, + -- Total time of race = raceStartTime + raceDuration. + -- Time for the barriers (and semaphore) to lift ans start the race. + {'raceStartTime', args.default(75), args.numberType}, + -- Time that a light change in the semaphore should remain. + {'semaphoreTimer', args.default(5), args.numberType}, + -- The duration of the race once the race starts. + {'raceDuration', args.default(225), args.numberType}, + -- The direction of the race. + {'raceInitialDirection', args.default('N'), args.stringType}, + }) + RaceManager.Base.__init__(self, kwargs) + + self._otherSide = {N = 'S', S = 'N'} + self._config.raceStartTime = kwargs.raceStartTime + self._config.semaphoreTimer = kwargs.semaphoreTimer + self._config.raceDuration = kwargs.raceDuration + self._config.raceInitialDirection = kwargs.raceInitialDirection + self._race_number = 0 +end + +function RaceManager:reset() + self._raceDirection = self._config.raceInitialDirection +end + +function RaceManager:getRaceNumber() + return self._race_number +end + +function RaceManager:registerUpdaters(updaterRegistry) + local barriersToggle = function(state) + local barriers = self.gameObject.simulation:getGameObjectsByName("barrier") + for _, barrier in pairs(barriers) do + if barrier:getState() ~= "on" then + barrier:setState("on") + else + barrier:setState("off") + end + end + end + + local semaphoreChange = function(color) + local semaphores = self.gameObject.simulation:getGameObjectsByName( + "semaphore") + for _, semaphore in pairs(semaphores) do + semaphore:setState(color) + end + self.gameObject:setState("semaphore_" .. color) + end + + local disconnectComponentSafely = function(component, newState) + component:disconnect() + component:setState(newState) + component:teleport({0, 0}, "N") + end + + local playerClean = function() + local players = self.gameObject.simulation:getGameObjectsByName("avatar") + for i, player in pairs(players) do + if player:getState() ~= "landed" then + log.v(1, "Disqualifying player:", i, 'with state:', player:getState()) + events:add('player_disqualified', 'dict', + 'race_number', self._race_number, + 'player_index', i) + if player:hasComponent("Crown") then + local crown = player:getComponent("Crown"):getCrownOverlay() + disconnectComponentSafely(crown, "crownWait") + end + disconnectComponentSafely(player, "playerWait") + player:getComponent("Avatar"):disallowMovement() + else + events:add('player_ended_race', 'dict', + 'race_number', self._race_number, + 'player_index', i) + player:setState("player") + end + end + end + + local boatReset = function() + local l_seats = self.gameObject.simulation:getGameObjectsByName("seat_L") + local r_seats = self.gameObject.simulation:getGameObjectsByName("seat_R") + for _, seat in pairs(l_seats) do + seat:setState("seat") + local boat_manager = seat:getComponent("BoatManager") + assert(boat_manager ~= nil, "The boat manager is not set!") + boat_manager:reset() + end + for _, seat in pairs(r_seats) do + seat:setState("seat") + end + end + + local appleSpawnBankFlip = function() + -- We need to modify both the current apple state as well as its state if + -- the Edible onEnter is triggered because the apple will spawn if there + -- is a player standing on apple when its state gets updated. + local apples = self.gameObject.simulation:getGameObjectsByName("apple") + for _, apple in pairs(apples) do + if apple:getState() ~= "applePause" then + apple:setState("applePause") + apple:getComponent("Edible"):setWaitState("applePause") + else + apple:setState("apple") + apple:getComponent("Edible"):setWaitState("appleWait") + end + end + local single_apples = self.gameObject.simulation:getGameObjectsByName( + "single_apple") + for _, single_apple in pairs(single_apples) do + single_apple:setState("apple") + end + end + + local goalReset = function() + local goals = self.gameObject.simulation:getGameObjectsByName("water_goal") + for _, goal in pairs(goals) do + local transform = goal:getComponent('Transform') + local position = transform:getPosition() + local boat = transform:queryPosition('lowerPhysical', position) + local water_goal = goal:getComponent('WaterGoal') + if boat ~= nil and + self:getRaceDirection() ~= water_goal._config.bank_side then + log.v(1, "Found boat object with state at start: ", boat:getState()) + goal:setState("goalNonBlocking") + else + log.v(1, "Not found boat or goal at start, block entrance...") + goal:setState("goalBlocking") + end + end + end + + local raceReady = function() semaphoreChange("yellow") end + + local raceStart = function() + semaphoreChange("green") + barriersToggle() + self.gameObject:setState("boatRace") + -- Sentinel value to signal start of race + self.gameObject:getComponent("GlobalRaceTracker").raceStart:fill(-1) + -- increase number of races + self._race_number = self._race_number + 1 + events:add('race_start', 'dict', 'race_number', self._race_number) + end + + local raceEnd = function() + semaphoreChange("red") + playerClean() + end + + local raceReset = function() + self._raceDirection = self._otherSide[self._raceDirection] + boatReset() + goalReset() + appleSpawnBankFlip() + self.gameObject:setState("partnerChoice") + end + + local forceEmbark = function() + local players = {} + for _, player in pairs( + self.gameObject.simulation:getAvatarGameObjects("avatar")) do + table.insert(players, player) + end + random:shuffleInPlace(players) + local l_seats = self.gameObject.simulation:getGameObjectsByName("seat_L") + for i, l_seat in pairs(l_seats) do + local boat_manager = l_seat:getComponent("BoatManager") + local seat = random:choice(boat_manager._seats) + local transform = seat:getComponent('Transform') + local position = transform:getPosition() + players[i]:teleport({position[1], position[2]}, "N") + end + self.gameObject:setState("partnerChoice") + end + + updaterRegistry:registerUpdater{ + state = "ForceEmbark", + startFrame = 1, + updateFn = forceEmbark, + } + updaterRegistry:registerUpdater{ + state = "partnerChoice", + startFrame = self._config.raceStartTime - 2 * self._config.semaphoreTimer, + updateFn = raceReady, + } + updaterRegistry:registerUpdater{ + state = "semaphore_yellow", + startFrame = self._config.semaphoreTimer, + updateFn = raceStart, + } + updaterRegistry:registerUpdater{ + state = "boatRace", + startFrame = self._config.raceDuration, + updateFn = raceEnd, + priority = 200, + } + updaterRegistry:registerUpdater{ + state = "semaphore_red", + updateFn = raceReset, + priority = 50, + } +end + +function RaceManager:getRaceDirection() + return self._raceDirection +end + + +-- The `EpisodeManager` will periodically monitor whether all players have +-- been disqualified and terminate the episode early if this is the case. +-- Episode lengths will always be a multiple of the `checkInterval` +-- parameter. +local EpisodeManager = class.Class(component.Component) + +function EpisodeManager:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('EpisodeManager')}, + {'checkInterval', args.numberType}, + }) + EpisodeManager.Base.__init__(self, kwargs) + self._config.checkInterval = kwargs.checkInterval + self._step = 0 +end + +function EpisodeManager:registerUpdaters(updaterRegistry) + local earlyExit = function() + if self._step % self._config.checkInterval == 0 then + local players = self.gameObject.simulation:getGameObjectsByName("avatar") + local allDisqualified = true + for i, player in pairs(players) do + if player:getState() ~= "playerWait" then + allDisqualified = false + break + end + end + if allDisqualified then + log.v(1, "All players disqualified, ending episode.") + self.gameObject.simulation:endEpisode() + end + end + + self._step = self._step + 1 + + end + + updaterRegistry:registerUpdater{ + updateFn = earlyExit, + priority = 100, + } + +end + + +--[[ The `Crown` component keeps track of the number of times an avatar has +rowed and flailed. If the ratio of rowing to flailing is within the config +thresholds, a crown overlay is displayed on top of the avatar. The crown +sprite can be configured to be invisible. +]] +local Crown = class.Class(component.Component) +function Crown:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('Crown')}, + -- Min rowing to flailing ratio to turn on crown + {'turnOnThreshold', args.numberType}, + -- Max rowing to flailing ratio to turn off crown + {'turnOffThreshold', args.numberType}, + -- weighting decrease for the exponential moving average + {'alpha', args.numberType}, + -- weighting decrease for the exponential moving average + {'beta', args.numberType}, + }) + Crown.Base.__init__(self, kwargs) + assert(kwargs.turnOnThreshold > kwargs.turnOffThreshold, + "Crown turnOnThreshold should be strictly larger than turnOnThreshold") + self._config.turnOnThreshold = kwargs.turnOnThreshold + self._config.turnOffThreshold = kwargs.turnOffThreshold + self._config.alpha = kwargs.alpha + self._config.beta = kwargs.beta + self._num_rows = 0 + self._num_flails = 0 + self._num_actions = 0 + self._mean = 0 +end + +function Crown:start() + self._avatarComponent = self.gameObject:getComponent('Avatar') +end + +function Crown:recordAction(action) + local action_value = 0 + if action == 'flail' then + self._num_flails = self._num_flails + 1 + elseif action == 'row' then + self._num_rows = self._num_rows + 1 + action_value = 1 + else + return + end + self._mean = self._config.alpha * action_value + + (1 - self._config.alpha) * self._mean + self._num_actions = self._num_actions + 1 +end + +function Crown:getCrownOverlay() + local overlayObject = self._avatarComponent + :getAllConnectedObjectsWithNamedComponent('AvatarConnector') + if #overlayObject == 1 then + return overlayObject[1] + end +end + +function Crown:update() + if self._crown == nil then + self._crown = self:getCrownOverlay() + end + self._mean = self._mean * (1 - self._config.beta) + if self._mean > self._config.turnOnThreshold and + self._crown:getState() == "crownOff" then + events:add("turn_on_crown", "avg paddle", self._mean, + "num_rows", self._num_rows, "num_flails", self._num_flails, + "player_index", self._avatarComponent:getIndex()) + self._crown:setState("crownOn") + elseif self._mean < self._config.turnOffThreshold and + self._crown:getState() == "crownOn" then + events:add("turn_off_crown", "avg paddle", self._mean, + "num_rows", self._num_rows, "num_flails", self._num_flails, + "player_index", self._avatarComponent:getIndex()) + self._crown:setState("crownOff") + end +end + +--[[ The `Rowing` component endows an avatar with the ability to row the boat. +There are two actions, 'row' and 'flail'. Flailing means a secure progress but +without much efficiency. Rowing means that if both players perform it, movement +is much faster. Mismatches result in the agent rowing potentially incurring a +negative reward. +]] +local Rowing = class.Class(component.Component) + +function Rowing:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('Rowing')}, + -- Cooldown for the rowing action. + {'cooldownTime', args.numberType}, + {'playerRowingState', args.default('rowing'), args.stringType}, + {'playerRole', args.default('none'), args.stringType}, + }) + Rowing.Base.__init__(self, kwargs) + + self._config.cooldownTime = kwargs.cooldownTime + self._config.playerRowingState = kwargs.playerRowingState + self._config.playerRole = kwargs.playerRole +end + +function Rowing:postStart() + local scene = self.gameObject.simulation:getSceneObject() + self._raceManager = scene:getComponent("RaceManager") +end + +function Rowing:getRole() + return self._config.playerRole +end + +function Rowing:registerUpdaters(updaterRegistry) + local flailing = function() + local playerVolatileVariables = ( + self.gameObject:getComponent('Avatar'):getVolatileData()) + local actions = playerVolatileVariables.actions + -- Execute the beam if applicable. + if self.gameObject:getState() == self._config.playerRowingState then + if actions['flail'] == 1 and self._seat ~= nil then + local moved = self._seat:moveOar('flail') + if moved then + events:add('player_flailed', 'dict', + 'race_number', self._raceManager:getRaceNumber(), + 'player_index', self.gameObject:getComponent("Avatar"):getIndex()) + if self.gameObject:hasComponent('Crown') then + local crown = self.gameObject:getComponent('Crown') + crown:recordAction('flail') + end + end + end + end + end + + local rowing = function() + local playerVolatileVariables = ( + self.gameObject:getComponent('Avatar'):getVolatileData()) + local actions = playerVolatileVariables.actions + -- Execute the beam if applicable. + if self.gameObject:getState() == self._config.playerRowingState then + if self._coolingTimer > 0 then + self._coolingTimer = self._coolingTimer - 1 + if self._coolingTimer == 0 and self._seat ~= nil then + self._seat:moveOar(nil) + end + end + if self._coolingTimer == 0 and actions['row'] == 1 then + self._coolingTimer = self._config.cooldownTime + if self._seat ~= nil then + local moved = self._seat:moveOar('row') + if moved then + events:add('player_rowed', 'dict', + 'race_number', self._raceManager:getRaceNumber(), + 'player_index', self.gameObject:getComponent("Avatar"):getIndex()) + if self.gameObject:hasComponent('Crown') then + local crown = self.gameObject:getComponent('Crown') + crown:recordAction('row') + end + end + end + end + end + end + + updaterRegistry:registerUpdater{ + updateFn = rowing, + priority = 140, + } + updaterRegistry:registerUpdater{ + updateFn = flailing, + priority = 130, + } +end + +function Rowing:start() + -- Set the beam cooldown timer to its `ready` state (i.e. coolingTimer = 0). + self._coolingTimer = 0 +end + +function Rowing:setSeat(seat) + self._seat = seat +end + + +local Seat = class.Class(component.Component) + +function Seat:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('Seat')}, + {'rowingState', args.default('rowing'), args.stringType}, + }) + Seat.Base.__init__(self, kwargs) + + self._config.rowingState = kwargs.rowingState +end + +function Seat:postStart() + local transform = self.gameObject:getComponent('Transform') + -- /\ Front of boat + -- p;:q Oars and seats + -- LJ Back of boat + -- Oars are on overlay, rest of boat is lowerPhysical. + local oar = transform:queryDiamond('overlay', 1) + assert(#oar == 1, + "Exactly one 'overlay' object expected around the seat, " .. #oar + .. " found instead. There should only be one Oar!") + self._oar = oar[1] + if self.gameObject:hasComponent("BoatManager") then + self._manager = self.gameObject:getComponent("BoatManager") + else + -- The manager is on the seat tot he right + local position = transform:getPosition() + position[1] = position[1] - 1 + self._manager = transform:queryPosition( + 'lowerPhysical', position):getComponent("BoatManager") + end + self._seatSide = string.gsub(self.gameObject.name, "seat_", "") +end + +function Seat:update() + if self._oar:getState() == "oarUp_row" or + self._oar:getState() == "oarUp_flail" then + self._oar:setState("oarDown") + end +end + +function Seat:getOar() + return self._oar +end + +function Seat:moveOar(style) + if not self._manager:isFull() then + -- did not move oar + return false + end + if style then -- If not nil style, change state + self._oar:setState("oarUp_" .. style) + end + -- The game object's name for the seat is "seat_L" or "seat_R". + self._manager:oarAction(self._seatSide, style) + return true +end + +function Seat:releaseOar() + self._oar:setState("oarDown") +end + +function Seat:onEnter(enteringObject, contactName) + if contactName == 'avatar' and self.gameObject:getState() == "seat" and + enteringObject:getState() == "player" then + self._manager:reportRower(self._seatSide, enteringObject) + enteringObject:getComponent("Avatar"):disallowMovement() + enteringObject:getComponent("Rowing"):setSeat(self) + enteringObject:setState(self._config.rowingState) + self.gameObject:connect(enteringObject) + self.gameObject:setState("seatTaken") + + if self._manager:isFull() then + self._manager:setBoatStateFull() + end + end +end + + +local WaterGoal = class.Class(component.Component) + +function WaterGoal:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('WaterGoal')}, + {'bank_side', args.stringType}, + {'rowingPlayerState', args.default('rowing'), args.stringType}, + {'landPlayerState', args.default('landed'), args.stringType}, + {'usedSeatState', args.default('seatUsed'), args.stringType}, + }) + WaterGoal.Base.__init__(self, kwargs) + self._config.rowingPlayerState = kwargs.rowingPlayerState + self._config.landPlayerState = kwargs.landPlayerState + self._config.usedSeatState = kwargs.usedSeatState + self._config.bank_side = kwargs.bank_side +end + +function WaterGoal:postStart() + local scene = self.gameObject.simulation:getSceneObject() + if scene:hasComponent("RaceManager") then + self._raceManager = scene:getComponent("RaceManager") + end +end + +function WaterGoal:isGoalReached() + return self._raceManager:getRaceDirection() == self._config.bank_side +end + +function WaterGoal:getGoalTeleportY() + local offset = 0 + if self._config.bank_side == "N" then + offset = -3 + else + offset = 3 + end + return self.gameObject:getComponent('Transform'):getPosition()[2] + offset +end + +function WaterGoal:onEnter(enteringObject, contactName) + if self:isGoalReached() then + if contactName == 'boat' and + enteringObject:getState() ~= self._config.usedSeatState then + if enteringObject:hasComponent("BoatManager") then + log.v(1, "Boat arrived to shore") + enteringObject:getComponent("BoatManager"):setBoatStateNormal() + enteringObject:getComponent("BoatManager"):disembarkRowers( + self:getGoalTeleportY()) + end + enteringObject:getComponent("Seat"):releaseOar() + enteringObject:setState(self._config.usedSeatState) + end + end +end + +function WaterGoal:onExit(leavingObject, contactName) + if self:isGoalReached() then + if contactName == 'avatar' and + self._raceManager.gameObject:getState() == 'boatRace' and + leavingObject:getState() == self._config.rowingPlayerState then + leavingObject:getComponent("Avatar"):allowMovement() + leavingObject:setState('landed') + log.v(1, "Successfully disembarked rower.") + end + else + log.v(1, "Raise watergoal. Object exited:", leavingObject.name) + self.gameObject:setState("goalBlocking") + end +end + + +--[[ The GlobalRaceTracker keeps track of the gift metric.]] +local GlobalRaceTracker = class.Class(component.Component) + +function GlobalRaceTracker:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('GlobalRaceTracker')}, + {'numPlayers', args.numberType}, + }) + GlobalRaceTracker.Base.__init__(self, kwargs) + + self._config.numPlayers = kwargs.numPlayers +end + +function GlobalRaceTracker:reset() + self.raceStart = tensor.Int32Tensor(self._config.numPlayers / 2, 2) + self.strokes = tensor.Int32Tensor(self._config.numPlayers) +end + +function GlobalRaceTracker:preUpdate() + self.raceStart:fill(0) + self.strokes:fill(0) +end + +function GlobalRaceTracker:countStroke(index, stroke) + if stroke == "flail" then + self.strokes(index):val(1) + elseif stroke == "row" then + self.strokes(index):val(2) + end +end + + +--[[ The StrokesTracker keeps track of the stroke style of the player.]] +local StrokesTracker = class.Class(component.Component) + +function StrokesTracker:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('StrokesTracker')}, + }) + StrokesTracker.Base.__init__(self, kwargs) +end + +function StrokesTracker:reset() + self.strokes = tensor.Int32Tensor(2) +end + +function StrokesTracker:preUpdate() + self.strokes:fill(0) +end + +function StrokesTracker:countStroke(stroke) + if stroke == "flail" then + self.strokes(1):val(1) + elseif stroke == "row" then + self.strokes(2):val(1) + end +end + + +local allComponents = { + BoatManager = BoatManager, + RaceManager = RaceManager, + EpisodeManager = EpisodeManager, + Rowing = Rowing, + Crown = Crown, + Seat = Seat, + WaterGoal = WaterGoal, + GlobalRaceTracker = GlobalRaceTracker, + StrokesTracker = StrokesTracker, +} + +component_registry.registerAllComponents(allComponents) + +return allComponents diff --git a/meltingpot/lua/levels/boat_race/init.lua b/meltingpot/lua/levels/boat_race/init.lua new file mode 100644 index 00000000..6c3cff8a --- /dev/null +++ b/meltingpot/lua/levels/boat_race/init.lua @@ -0,0 +1,65 @@ +--[[ Copyright 2022 DeepMind Technologies Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +]] + +-- Entry point lua file for the boat_race substrate. +local class = require 'common.class' + +local meltingpot = 'meltingpot.lua.modules.' +local api_factory = require(meltingpot .. 'api_factory') +local simulation = require(meltingpot .. 'base_simulation') + +-- Required to be able to use the components in the level +local component_library = require(meltingpot .. 'component_library') +local avatar_library = require(meltingpot .. 'avatar_library') +local components = require 'components' + +--[[ Add layer `layerToAdd` after existing layer `afterWhich`.]] +local function addLayer(layerToAdd, afterWhich, config) + local index = 0 + for layerIndex, layerName in ipairs(config.renderOrder) do + if layerName == afterWhich then + index = layerIndex + break + end + end + -- Add layer 'layerToAdd' above layer 'afterWhich'. + table.insert(config.renderOrder, index, layerToAdd) + return config +end + +local OverrideSimulation = class.Class(simulation.BaseSimulation) + +function OverrideSimulation:worldConfig() + local config = simulation.BaseSimulation.worldConfig(self) + -- Add layer 'directionIndicatorLayer' after all the default layers. + table.insert(config.renderOrder, 'directionIndicatorLayer') + -- Also add 'superDirectionIndicatorLayer' after 'directionIndicatorLayer'. + table.insert(config.renderOrder, 'superDirectionIndicatorLayer') + -- Add the 'superCrownOverlay' layer at a specific place in the list. + config = addLayer('superCrownOverlay', 'superOverlay', config) + return config +end + +return api_factory.apiFactory{ + Simulation = OverrideSimulation, + settings = { + -- Scale each sprite to a square of size `spriteSize` X `spriteSize`. + spriteSize = 8, + -- Terminate the episode after this many frames. + maxEpisodeLengthFrames = 1000, + -- Settings to pass to simulation.lua. + simulation = {}, + } +} diff --git a/meltingpot/lua/levels/clean_up/components.lua b/meltingpot/lua/levels/clean_up/components.lua index 0c7fd123..2ebd443b 100644 --- a/meltingpot/lua/levels/clean_up/components.lua +++ b/meltingpot/lua/levels/clean_up/components.lua @@ -1,4 +1,4 @@ ---[[ Copyright 2020 DeepMind Technologies Limited. +--[[ Copyright 2022 DeepMind Technologies Limited. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -145,6 +145,9 @@ function DirtCleaning:onHit(hittingGameObject, hitName) if hittingGameObject:hasComponent('Taste') then hittingGameObject:getComponent('Taste'):cleaned() end + if hittingGameObject:hasComponent('Cleaner') then + hittingGameObject:getComponent('Cleaner'):setCumulant() + end local avatar = hittingGameObject:getComponent('Avatar') events:add('player_cleaned', 'dict', 'player_index', avatar:getIndex()) -- int @@ -219,6 +222,14 @@ function Cleaner:registerUpdaters(updaterRegistry) updateFn = clean, priority = 140, } + + local function resetCumulant() + self.player_cleaned = 0 + end + updaterRegistry:registerUpdater{ + updateFn = resetCumulant, + priority = 400, + } end function Cleaner:reset() @@ -234,6 +245,15 @@ function Cleaner:getWaitState() return self.gameObject:getComponent('Avatar'):getWaitState() end +function Cleaner:setCumulant() + self.player_cleaned = self.player_cleaned + 1 + + local globalData = self.gameObject.simulation:getSceneObject():getComponent( + 'GlobalData') + local playerIndex = self.gameObject:getComponent('Avatar'):getIndex() + globalData:setCleanedThisStep(playerIndex) +end + --[[ The RiverMonitor is a scene component that tracks the state of the river. @@ -404,6 +424,16 @@ function Taste:__init__(kwargs) self._config.rewardAmount = kwargs.rewardAmount end +function Taste:registerUpdaters(updaterRegistry) + local function resetCumulant() + self.player_ate_apple = 0 + end + updaterRegistry:registerUpdater{ + updateFn = resetCumulant, + priority = 400, + } +end + function Taste:cleaned() if self._config.role == 'cleaner' then self.gameObject:getComponent('Avatar'):addReward(self._config.rewardAmount) @@ -421,6 +451,109 @@ function Taste:consumed(edibleDefaultReward) else self.gameObject:getComponent('Avatar'):addReward(edibleDefaultReward) end + self:setCumulant() +end + +function Taste:setCumulant() + self.player_ate_apple = self.player_ate_apple + 1 + + local globalData = self.gameObject.simulation:getSceneObject():getComponent( + 'GlobalData') + local playerIndex = self.gameObject:getComponent('Avatar'):getIndex() + globalData:setAteThisStep(playerIndex) +end + + +local GlobalData = class.Class(component.Component) + +function GlobalData:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('GlobalData')}, + }) + GlobalData.Base.__init__(self, kwargs) +end + +function GlobalData:reset() + local numPlayers = self.gameObject.simulation:getNumPlayers() + + self.playersWhoCleanedThisStep = tensor.Tensor(numPlayers):fill(0) + self.playersWhoAteThisStep = tensor.Tensor(numPlayers):fill(0) +end + +function GlobalData:registerUpdaters(updaterRegistry) + local function resetCumulants() + self.playersWhoCleanedThisStep:fill(0) + self.playersWhoAteThisStep:fill(0) + end + updaterRegistry:registerUpdater{ + updateFn = resetCumulants, + priority = 2, + } +end + +function GlobalData:setCleanedThisStep(playerIndex) + self.playersWhoCleanedThisStep(playerIndex):val(1) +end + +function GlobalData:setAteThisStep(playerIndex) + self.playersWhoAteThisStep(playerIndex):val(1) +end + + +local AllNonselfCumulants = class.Class(component.Component) + +function AllNonselfCumulants:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('AllNonselfCumulants')}, + }) + AllNonselfCumulants.Base.__init__(self, kwargs) +end + +function AllNonselfCumulants:reset() + self._playerIndex = self.gameObject:getComponent('Avatar'):getIndex() + self._globalData = self.gameObject.simulation:getSceneObject():getComponent( + 'GlobalData') + + local numPlayers = self.gameObject.simulation:getNumPlayers() + self._tmpTensor = tensor.Tensor(numPlayers):fill(0) + + self.num_others_who_cleaned_this_step = 0 + self.num_others_who_ate_this_step = 0 +end + +function AllNonselfCumulants:sumNonself(vector) + -- Copy the vector so as not to modify the original. + self._tmpTensor:copy(vector) + self._tmpTensor(self._playerIndex):val(0) + local result = self._tmpTensor:sum() + self._tmpTensor:fill(0) + return result +end + +function AllNonselfCumulants:registerUpdaters(updaterRegistry) + + local function getCumulants() + self.num_others_who_cleaned_this_step = self:sumNonself( + self._globalData.playersWhoCleanedThisStep) + self.num_others_who_ate_this_step = self:sumNonself( + self._globalData.playersWhoAteThisStep) + end + + updaterRegistry:registerUpdater{ + updateFn = getCumulants, + priority = 4, + } + + local function resetCumulants() + self.num_others_who_cleaned_this_step = 0 + self.num_others_who_ate_this_step = 0 + self._tmpTensor:fill(0) + end + + updaterRegistry:registerUpdater{ + updateFn = resetCumulants, + priority = 400, + } end @@ -434,10 +567,12 @@ local allComponents = { -- Avatar components. Cleaner = Cleaner, Taste = Taste, + AllNonselfCumulants = AllNonselfCumulants, -- Scene components. RiverMonitor = RiverMonitor, DirtSpawner = DirtSpawner, + GlobalData = GlobalData, } component_registry.registerAllComponents(allComponents) diff --git a/meltingpot/lua/levels/clean_up/init.lua b/meltingpot/lua/levels/clean_up/init.lua index 4064541e..2ed99d81 100644 --- a/meltingpot/lua/levels/clean_up/init.lua +++ b/meltingpot/lua/levels/clean_up/init.lua @@ -1,4 +1,4 @@ ---[[ Copyright 2020 DeepMind Technologies Limited. +--[[ Copyright 2022 DeepMind Technologies Limited. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -13,13 +13,13 @@ See the License for the specific language governing permissions and limitations under the License. ]] --- Entry point lua file for the clean_up level. +-- Entry point lua file for the clean_up substrate. local meltingpot = 'meltingpot.lua.modules.' local api_factory = require(meltingpot .. 'api_factory') local simulation = require(meltingpot .. 'base_simulation') --- Required to be able to use the components in the level +-- Required to be able to use the components in the substrate local component_library = require(meltingpot .. 'component_library') local avatar_library = require(meltingpot .. 'avatar_library') local components = require 'components' diff --git a/meltingpot/lua/levels/coins/components.lua b/meltingpot/lua/levels/coins/components.lua new file mode 100644 index 00000000..9c87820c --- /dev/null +++ b/meltingpot/lua/levels/coins/components.lua @@ -0,0 +1,341 @@ +--[[ Copyright 2022 DeepMind Technologies Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +]] + +local args = require 'common.args' +local class = require 'common.class' +local helpers = require 'common.helpers' +local log = require 'common.log' +local events = require 'system.events' +local random = require 'system.random' +local tensor = require 'system.tensor' + +local meltingpot = 'meltingpot.lua.modules.' +local component = require(meltingpot .. 'component') +local component_registry = require(meltingpot .. 'component_registry') + +local _COMPASS = {'N', 'E', 'S', 'W'} + + +-- A component that keeps track of a parameterized coin type. +local PlayerCoinType = class.Class(component.Component) + +function PlayerCoinType:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('PlayerCoinType')}, + {'coinType', args.stringType}, + }) + PlayerCoinType.Base.__init__(self, kwargs) + + self._config.coinType = kwargs.coinType +end + +function PlayerCoinType:getCoinType() + return self._config.coinType +end + + +-- Coins switch state when touched by an avatar, and provide rewards to players +-- based on the (mis)match between its type and the collecting players's coin +-- type. It can be used in combination with ChoiceCoinRegrow. +local Coin = class.Class(component.Component) + +function Coin:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('Coin')}, + {'waitState', args.stringType}, + {'rewardSelfForMatch', args.numberType}, + {'rewardSelfForMismatch', args.numberType}, + {'rewardOtherForMatch', args.numberType}, + {'rewardOtherForMismatch', args.numberType}, + {'terminateEpisode', args.booleanType, args.default(false)}, + {'coinsToTerminateEpisode', args.numberType, args.default(-1)}, + }) + Coin.Base.__init__(self, kwargs) + + self._config.waitState = kwargs.waitState + self._config.rewardSelfForMatch = kwargs.rewardSelfForMatch + self._config.rewardOtherForMatch = kwargs.rewardOtherForMatch + self._config.rewardSelfForMismatch = kwargs.rewardSelfForMismatch + self._config.rewardOtherForMismatch = kwargs.rewardOtherForMismatch + self._config.terminateEpisode = kwargs.terminateEpisode + self._config.coinsToTerminateEpisode = kwargs.coinsToTerminateEpisode +end + +function Coin:reset() + self._waitState = self._config.waitState +end + +function Coin:rewardOthers(amountToReward, avatarIndexToSkip) + local simulation = self.gameObject.simulation + -- Iterate through avatars. + for _, object in pairs(simulation:getAvatarGameObjects()) do + local avatarComponent = object:getComponent('Avatar') + -- Skip the player who collected the coin. + if avatarComponent:getIndex() ~= avatarIndexToSkip then + -- Add reward. + avatarComponent:addReward(amountToReward) + end + end +end + +function Coin:onEnter(enteringGameObject, contactName) + local simulation = self.gameObject.simulation + assert( + simulation:getNumPlayers() <= 2, 'We only allow 1 or 2 players in Coins.') + + if contactName == 'avatar' then + local coinState = self.gameObject:getState() + if coinState ~= self._waitState then + -- Prepare to record collection event. + local coinsCollected = simulation:getSceneObject():getComponent( + "GlobalCoinCollectionTracker").coinsCollected + + -- Get collecting player's coin type. + local avatarComponent = enteringGameObject:getComponent('Avatar') + local roleComponent = enteringGameObject:getComponent('Role') + local partnerTracker = enteringGameObject:getComponent('PartnerTracker') + local playerIndex = avatarComponent:getIndex() + local playerCoinTypeComponent = enteringGameObject:getComponent( + 'PlayerCoinType') + local playerCoinType = playerCoinTypeComponent:getCoinType() + + -- Check for match between this coin's type and collecting player's type. + if playerCoinType == coinState then + -- Reward collecting player and others for match. + local selfReward = roleComponent:getRewardSelfForMatch( + self._config.rewardSelfForMatch) + avatarComponent:addReward(selfReward) + local otherReward = roleComponent:getRewardOtherForMatch( + self._config.rewardOtherForMatch) + self:rewardOthers(otherReward, playerIndex) + -- Record collection event. + coinsCollected(playerIndex, playerIndex):val(1) + partnerTracker:reportMatch() + else + -- Reward collecting player and others for mismatch. + local selfReward = roleComponent:getRewardSelfForMismatch( + self._config.rewardSelfForMismatch) + avatarComponent:addReward(selfReward) + local otherReward = roleComponent:getRewardOtherForMismatch( + self._config.rewardOtherForMismatch) + self:rewardOthers(otherReward, playerIndex) + -- Record collection event. + coinIndex = playerIndex % 2 + 1 + coinsCollected(playerIndex, coinIndex):val(1) + partnerTracker:reportMismatch() + end + + -- Record events. + events:add('coin_consumed', 'dict', + 'player_index', playerIndex, -- int + 'player_coin_type', playerCoinType, -- str + 'coin_type', coinState) -- str + + -- Change the coin to its wait (disabled) state. + self.gameObject:setState(self._waitState) + + -- Track cumulative collections for each player. + local cumulativeCoinsCollected = simulation:getSceneObject():getComponent( + "GlobalCoinCollectionTracker").cumulativeCoinsCollected + cumulativeCoinsCollected(playerIndex):add(1) + + -- If the level is configured to end after a certain number of + -- collections... + if self._config.terminateEpisode then + -- ... count coins and compare against the configured threshold. + coinCount = cumulativeCoinsCollected(playerIndex):val() + if coinCount >= self._config.coinsToTerminateEpisode then + -- End episode. + self.gameObject.simulation:endEpisode() + end + end + end + end +end + + +--[[ The `ChoiceCoinRegrow` component enables a game object that is in a +particular (traditionally thought of as "dormant") state to change its state +probabilistically (at a fixed rate). Used primarily for respawning objects. +]] +local ChoiceCoinRegrow = class.Class(component.Component) + +function ChoiceCoinRegrow:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('ChoiceCoinRegrow')}, + {'liveStateA', args.stringType}, + {'liveStateB', args.stringType}, + {'waitState', args.stringType}, + {'regrowRate', args.ge(0.0), args.le(1.0)}, + }) + ChoiceCoinRegrow.Base.__init__(self, kwargs) + + self._config.liveStates = {kwargs.liveStateA, kwargs.liveStateB} + self._config.waitState = kwargs.waitState + self._config.regrowRate = kwargs.regrowRate +end + +function ChoiceCoinRegrow:registerUpdaters(updaterRegistry) + -- Registers an update with high priority that only gets called when the + -- object is in the `waitState` state. + updaterRegistry:registerUpdater{ + state = self._config.waitState, + probability = self._config.regrowRate, + updateFn = function() + self.gameObject:setState(random:choice(self._config.liveStates)) + end, + } +end + + +--[[ The GlobalCoinCollectionTracker keeps track of coin collections.]] +local GlobalCoinCollectionTracker = class.Class(component.Component) + +function GlobalCoinCollectionTracker:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('GlobalCoinCollectionTracker')}, + {'numPlayers', args.numberType}, + }) + GlobalCoinCollectionTracker.Base.__init__(self, kwargs) + + self._config.numPlayers = kwargs.numPlayers +end + +function GlobalCoinCollectionTracker:reset() + self.coinsCollected = tensor.Int32Tensor(self._config.numPlayers, 2) + self.cumulativeCoinsCollected = tensor.Int32Tensor(self._config.numPlayers) + self.cumulativeCoinsCollected:fill(0) +end + +function GlobalCoinCollectionTracker:preUpdate() + self.coinsCollected:fill(0) +end + + +local Role = class.Class(component.Component) + +function Role:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('Role')}, + {'multiplyRewardSelfForMatch', args.default(1.0), args.numberType}, + {'multiplyRewardSelfForMismatch', args.default(1.0), args.numberType}, + {'multiplyRewardOtherForMatch', args.default(1.0), args.numberType}, + {'multiplyRewardOtherForMismatch', args.default(1.0), args.numberType}, + }) + Role.Base.__init__(self, kwargs) + + self._config.multiplyRewardSelfForMatch = kwargs.multiplyRewardSelfForMatch + self._config.multiplyRewardSelfForMismatch = + kwargs.multiplyRewardSelfForMismatch + self._config.multiplyRewardOtherForMatch = kwargs.multiplyRewardOtherForMatch + self._config.multiplyRewardOtherForMismatch = + kwargs.multiplyRewardOtherForMismatch +end + +function Role:reset() + -- Dynamic frame-by-frame tracking of coin collection. + self.cumulantCollectedMatch = 0 + self.cumulantCollectedMismatch = 0 +end + +function Role:getRewardSelfForMatch(baseReward) + self.cumulantCollectedMatch = 1 + local result = baseReward * self._config.multiplyRewardSelfForMatch + return result +end + +function Role:getRewardSelfForMismatch(baseReward) + self.cumulantCollectedMismatch = 1 + local result = baseReward * self._config.multiplyRewardSelfForMismatch + return result +end + +function Role:getRewardOtherForMatch(baseReward) + local result = baseReward * self._config.multiplyRewardOtherForMatch + return result +end + +function Role:getRewardOtherForMismatch(baseReward) + local result = baseReward * self._config.multiplyRewardOtherForMismatch + return result +end + +function Role:preUpdate() + self.cumulantCollectedMatch = 0 + self.cumulantCollectedMismatch = 0 +end + + +local PartnerTracker = class.Class(component.Component) + +function PartnerTracker:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('PartnerTracker')}, + }) + PartnerTracker.Base.__init__(self, kwargs) +end + +function PartnerTracker:reset() + -- Dynamic frame-by-frame tracking of the other player's coin collection. + self.partnerCollectedMatch = 0 + self.partnerCollectedMismatch = 0 +end + +function PartnerTracker:postStart() + self._trackerOfPartner = self:getTrackerOfPartner() +end + +function PartnerTracker:preUpdate() + self.partnerCollectedMatch = 0 + self.partnerCollectedMismatch = 0 +end + +function PartnerTracker:getTrackerOfPartner() + local selfIndex = self.gameObject:getComponent('Avatar'):getIndex() + local partnerIndex + if selfIndex == 1 then + -- If I am 1 then you are 2. + partnerIndex = 2 + elseif selfIndex == 2 then + -- If I am 2 then you are 1. + partnerIndex = 1 + else + assert(false, 'Unrecognized self index. Coins only supports two players.') + end + local otherAvatarObject = self.gameObject.simulation:getAvatarFromIndex( + partnerIndex) + return otherAvatarObject:getComponent('PartnerTracker') +end + +function PartnerTracker:reportMatch() + self._trackerOfPartner.partnerCollectedMatch = 1 +end + +function PartnerTracker:reportMismatch() + self._trackerOfPartner.partnerCollectedMismatch = 1 +end + + +local allComponents = { + PlayerCoinType = PlayerCoinType, + Coin = Coin, + ChoiceCoinRegrow = ChoiceCoinRegrow, + GlobalCoinCollectionTracker = GlobalCoinCollectionTracker, + Role = Role, + PartnerTracker = PartnerTracker, +} +component_registry.registerAllComponents(allComponents) + +return allComponents diff --git a/meltingpot/lua/levels/coins/init.lua b/meltingpot/lua/levels/coins/init.lua new file mode 100644 index 00000000..63c6826f --- /dev/null +++ b/meltingpot/lua/levels/coins/init.lua @@ -0,0 +1,37 @@ +--[[ Copyright 2022 DeepMind Technologies Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +]] + +-- Entry point lua file for the coins substrate. + +local meltingpot = 'meltingpot.lua.modules.' +local api_factory = require(meltingpot .. 'api_factory') +local simulation = require(meltingpot .. 'base_simulation') + +-- Required to be able to use the components in the substrate +local component_library = require(meltingpot .. 'component_library') +local avatar_library = require(meltingpot .. 'avatar_library') +local components = require 'components' + +return api_factory.apiFactory{ + Simulation = simulation.BaseSimulation, + settings = { + -- Scale each sprite to a square of size `spriteSize` X `spriteSize`. + spriteSize = 8, + -- Terminate the episode after this many frames. + maxEpisodeLengthFrames = 1000, + -- Settings to pass to simulation.lua. + simulation = {}, + } +} diff --git a/meltingpot/lua/levels/collaborative_cooking/components.lua b/meltingpot/lua/levels/collaborative_cooking/components.lua index b101b33d..bc726e8e 100644 --- a/meltingpot/lua/levels/collaborative_cooking/components.lua +++ b/meltingpot/lua/levels/collaborative_cooking/components.lua @@ -1,4 +1,4 @@ ---[[ Copyright 2020 DeepMind Technologies Limited. +--[[ Copyright 2022 DeepMind Technologies Limited. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -68,7 +68,7 @@ function InteractBeam:addHits(worldConfig) layer = self.hitAndSpriteName, sprite = self.hitAndSpriteName, } - table.insert(worldConfig.renderOrder, self.hitAndSpriteName) + component.insertIfNotPresent(worldConfig.renderOrder, self.hitAndSpriteName) end function InteractBeam:addSprites(tileSet) @@ -384,7 +384,7 @@ function CookingPot:onHit(hittingGameObject, hitName) local avatarsHeldItem = avatarsInventory:getHeldItem() local itemsInPot = #self._containedItems if self._config.acceptedItems[avatarsHeldItem] and itemsInPot < 3 then - -- Drop onion from avatar to cooking pot. + -- Drop item from avatar to cooking pot. table.insert(self._containedItems, avatarsHeldItem) avatar:addReward(self._config.reward) avatarsInventory:setHeldItem('empty') @@ -393,6 +393,10 @@ function CookingPot:onHit(hittingGameObject, hitName) 'player_index', avatar:getIndex(), -- int 'pot', self.name, -- string 'item', avatarsHeldItem) -- string + if hittingGameObject:hasComponent('AvatarCumulants') then + local cumulants = hittingGameObject:getComponent('AvatarCumulants') + cumulants.addedIngredientToCookingPot = 1 + end elseif avatarsHeldItem == 'dish' and self._cooked then -- Collect soup from cooking pot. local cookedItem = 'soup' @@ -406,6 +410,10 @@ function CookingPot:onHit(hittingGameObject, hitName) 'player_index', avatar:getIndex(), -- int 'pot', self.name, -- string 'cooked_item', cookedItem) -- string + if hittingGameObject:hasComponent('AvatarCumulants') then + local cumulants = hittingGameObject:getComponent('AvatarCumulants') + cumulants.collectedSoupFromCookingPot = 1 + end end -- Update state based on items on in the pot. if not self._cooked then @@ -501,9 +509,31 @@ function LoadingBarVisualiser:registerUpdaters(updaterRegistry) end +local AvatarCumulants = class.Class(component.Component) + +function AvatarCumulants:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('AvatarCumulants')}, + }) + self.Base.__init__(self, kwargs) + + self.addedIngredientToCookingPot = 0 + self.collectedSoupFromCookingPot = 0 +end + +function AvatarCumulants:update() + self.addedIngredientToCookingPot = 0 + self.collectedSoupFromCookingPot = 0 +end + + local allComponents = { + -- Avatar components. Inventory = Inventory, InteractBeam = InteractBeam, + AvatarCumulants = AvatarCumulants, + + -- Object components. Container = Container, Receiver = Receiver, CookingPot = CookingPot, diff --git a/meltingpot/lua/levels/collaborative_cooking/init.lua b/meltingpot/lua/levels/collaborative_cooking/init.lua index 1d57a5a0..3def3e5a 100644 --- a/meltingpot/lua/levels/collaborative_cooking/init.lua +++ b/meltingpot/lua/levels/collaborative_cooking/init.lua @@ -1,4 +1,4 @@ ---[[ Copyright 2020 DeepMind Technologies Limited. +--[[ Copyright 2022 DeepMind Technologies Limited. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ]] --- Entry point lua file for the collaborative cooking level. +-- Entry point lua file for the collaborative cooking substrate. local meltingpot = 'meltingpot.lua.modules.' local api_factory = require(meltingpot .. 'api_factory') diff --git a/meltingpot/lua/levels/commons_harvest/components.lua b/meltingpot/lua/levels/commons_harvest/components.lua index 4ca4631d..2f9d752f 100644 --- a/meltingpot/lua/levels/commons_harvest/components.lua +++ b/meltingpot/lua/levels/commons_harvest/components.lua @@ -1,4 +1,4 @@ ---[[ Copyright 2020 DeepMind Technologies Limited. +--[[ Copyright 2022 DeepMind Technologies Limited. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -146,6 +146,8 @@ end function DensityRegrow:postStart() self:_beginLive() self._started = true + self._underlyingGrass = self.gameObject:getComponent( + 'Transform'):queryPosition('background') end function DensityRegrow:update() @@ -180,8 +182,13 @@ function DensityRegrow:_updateWaitState() if self.gameObject:getState() ~= self._config.liveState then local piece = self.gameObject:getPiece() local numClose = self._variables.pieceToNumNeighbors[piece] - self.gameObject:setState( - self._config.waitState .. '_' .. tostring(numClose)) + local newState = self._config.waitState .. '_' .. tostring(numClose) + self.gameObject:setState(newState) + if newState == self._config.waitState .. '_' .. tostring(0) then + self._underlyingGrass:setState('dessicated') + else + self._underlyingGrass:setState('grass') + end end end diff --git a/meltingpot/lua/levels/commons_harvest/init.lua b/meltingpot/lua/levels/commons_harvest/init.lua index fdbb0191..c69f32f5 100644 --- a/meltingpot/lua/levels/commons_harvest/init.lua +++ b/meltingpot/lua/levels/commons_harvest/init.lua @@ -1,4 +1,4 @@ ---[[ Copyright 2020 DeepMind Technologies Limited. +--[[ Copyright 2022 DeepMind Technologies Limited. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ]] --- Entry point lua file for the commons_harvest level. +-- Entry point lua file for the commons_harvest substrate. local meltingpot = 'meltingpot.lua.modules.' local api_factory = require(meltingpot .. 'api_factory') diff --git a/meltingpot/lua/levels/coop_mining/components.lua b/meltingpot/lua/levels/coop_mining/components.lua new file mode 100644 index 00000000..6c135299 --- /dev/null +++ b/meltingpot/lua/levels/coop_mining/components.lua @@ -0,0 +1,293 @@ +--[[ Copyright 2022 DeepMind Technologies Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +]] + +local args = require 'common.args' +local class = require 'common.class' +local events = require 'system.events' +local helpers = require 'common.helpers' +local log = require 'common.log' +local random = require 'system.random' +local tensor = require 'system.tensor' + +local meltingpot = 'meltingpot.lua.modules.' +local component = require(meltingpot .. 'component') +local component_registry = require(meltingpot .. 'component_registry') + + +local FixedRateRegrow = class.Class(component.Component) + +function FixedRateRegrow:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('FixedRateRegrow')}, + {'liveStates', args.tableType}, + {'liveRates', args.tableType}, + {'waitState', args.stringType}, + }) + self.Base.__init__(self, kwargs) + + self._config.liveStates = kwargs.liveStates + self._config.liveRates = kwargs.liveRates + self._config.waitState = kwargs.waitState +end + +function FixedRateRegrow:registerUpdaters(updaterRegistry) + for i, rate in ipairs(self._config.liveRates) do + updaterRegistry:registerUpdater{ + priority = 200, + state = self._config.waitState, + probability = rate, + updateFn = function() + local transform = self.gameObject:getComponent('Transform') + local maybeAvatar = transform:queryPosition('upperPhysical') + if not maybeAvatar then + self.gameObject:setState(self._config.liveStates[i]) + end + end, + } + end +end + +local Ore = class.Class(component.Component) + +function Ore:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('Ore')}, + {'rawState', args.stringType}, + {'waitState', args.stringType}, + {'partialState', args.stringType}, + {'minNumMiners', args.numberType}, + {'miningWindow', args.numberType}, + }) + self.Base.__init__(self, kwargs) + + self._config.rawState = kwargs.rawState + self._config.waitState = kwargs.waitState + self._config.partialState = kwargs.partialState + self._config.minNumMiners = kwargs.minNumMiners + self._config.miningWindow = kwargs.miningWindow +end + +function Ore:currentMiners() + local count = 0 + for k, v in pairs(self._miners) do + count = count + 1 + end + return count +end + +function Ore:reset() + -- Table tracking which players have attempted mining this resource. + self._miners = {} + self._miningCountdown = 0 + if self.gameObject:getState() ~= self._config.waitState then + self.gameObject:setState(self._config.rawState) + end +end + +function Ore:update() + self._miningCountdown = self._miningCountdown - 1 + if self._miningCountdown == 0 then + -- Clean miners + self:reset() + end +end + +function Ore:addMiner(minerId) + self._miningCountdown = self._config.miningWindow + self._miners[minerId] = 1 + self.gameObject:setState(self._config.partialState) +end + +function Ore:onHit(hitterGameObject, hitName) + if hitName == 'mine' and + (self.gameObject:getState() == self._config.rawState or + self.gameObject:getState() == self._config.partialState) then + local hitterIndex = hitterGameObject:getComponent('Avatar'):getIndex() + self:addMiner(hitterIndex) + + local hitterMineBeam = hitterGameObject:getComponent('MineBeam') + hitterMineBeam:processRoleMineEvent(self._config.minNumMiners) + -- If the Ore has enough miners, process rewards. + if self:currentMiners() == self._config.minNumMiners then + for id, _ in pairs(self._miners) do + local avatarGO = self.gameObject.simulation:getAvatarFromIndex(id) + avatarGO:getComponent('MineBeam'):processRoleExtractEvent( + self._config.minNumMiners) + for otherId, _ in pairs(self._miners) do + if otherId ~= id then + avatarGO:getComponent('MineBeam'):processRolePairExtractEvent( + otherId, self._config.minNumMiners) + end + end + end + self:reset() + self.gameObject:setState(self._config.waitState) + end + -- return `true` to prevent the beam from passing through a hit ore. + return true + end + -- Other beams, or if in state not raw nor partial can pass through. + return false +end + +--[[ The `MineBeam` component endows an avatar with the ability to fire a beam. +]] +local MineBeam = class.Class(component.Component) + +function MineBeam:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('MineBeam')}, + {'cooldownTime', args.numberType}, + {'beamLength', args.numberType}, + {'beamRadius', args.numberType}, + {'agentRole', args.stringType}, + -- These two must be tables indexed by [role][oreType] + {'roleRewardForMining', args.tableType}, + {'roleRewardForExtracting', args.tableType}, + }) + self.Base.__init__(self, kwargs) + + self._config.cooldownTime = kwargs.cooldownTime + self._config.beamLength = kwargs.beamLength + self._config.beamRadius = kwargs.beamRadius + self._config.agentRole = kwargs.agentRole + self._config.roleRewardForMining = kwargs.roleRewardForMining + self._config.roleRewardForExtracting = kwargs.roleRewardForExtracting + + self._coolingTimer = 0 +end + +function MineBeam:readyToShoot() + local normalizedTimeTillReady = self._coolingTimer / self._config.cooldownTime + return 1 - normalizedTimeTillReady +end + +function MineBeam:addHits(worldConfig) + worldConfig.hits['mine'] = { + layer = 'beamMine', + sprite = 'beamMine', + } + table.insert(worldConfig.renderOrder, 'beamMine') +end + +function MineBeam:addSprites(tileSet) + -- This color is pink. + tileSet:addColor('beamMine', {255, 202, 202}) +end + +function MineBeam:processRoleMineEvent(oreType) + local amount = self._config.roleRewardForMining[ + self._config.agentRole][oreType] + local avatar = self.gameObject:getComponent('Avatar') + avatar:addReward(amount) + + events:add("mining", "dict", + "player", avatar:getIndex(), + "ore_type", oreType) + + self.playerMined(oreType):add(1) +end + +function MineBeam:processRoleExtractEvent(oreType) + local amount = self._config.roleRewardForExtracting[ + self._config.agentRole][oreType] + local avatar = self.gameObject:getComponent('Avatar') + avatar:addReward(amount) + local index = avatar:getIndex() + + events:add("extraction", "dict", + "player", index, + "ore_type", oreType) + + self.playerExtracted(oreType):add(1) +end + +function MineBeam:processRolePairExtractEvent(otherId, oreType) + local index = self.gameObject:getComponent('Avatar'):getIndex() + + events:add("extraction_pair", "dict", + "player_a", index, + "player_b", otherId, + "ore_type", oreType) + + self.coExtracted(otherId, oreType):add(1) +end + +function MineBeam:update() + if self._coolingTimer > 0 then + self._coolingTimer = self._coolingTimer - 1 + end + + -- TODO(b/260338825): It would be good to factor out the firing logic to be in + -- an updater so we can control the exact order of execution within a frame. + -- Right now it depends on the Lua table order that the components are added. + local state = self.gameObject:getComponent('Avatar'):getVolatileData() + local actions = state.actions + -- Execute the beam if applicable. + if actions.mine == 1 and self:readyToShoot() >= 1 then + self._coolingTimer = self._config.cooldownTime + self.gameObject:hitBeam( + 'mine', self._config.beamLength, self._config.beamRadius) + end +end + +function MineBeam:start() + -- Set the beam cooldown timer to its `ready` state (i.e. coolingTimer = 0). + self._coolingTimer = 0 + self.playerMined = self.gameObject:getComponent('MiningTracker').playerMined + self.playerExtracted = self.gameObject:getComponent( + 'MiningTracker').playerExtracted + self.coExtracted = self.gameObject:getComponent('MiningTracker').coExtracted +end + +--[[ The MiningTracker keeps track of the mining metrics.]] +local MiningTracker = class.Class(component.Component) + +function MiningTracker:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('MiningTracker')}, + {'numPlayers', args.numberType}, + {'numOreTypes', args.numberType}, + }) + self.Base.__init__(self, kwargs) + + self._config.numPlayers = kwargs.numPlayers + self._config.numOreTypes = kwargs.numOreTypes +end + +function MiningTracker:reset() + self.playerMined = tensor.Int32Tensor(self._config.numOreTypes) + self.playerExtracted = tensor.Int32Tensor(self._config.numOreTypes) + self.coExtracted = tensor.Int32Tensor( + self._config.numPlayers, + self._config.numOreTypes) +end + +function MiningTracker:preUpdate() + self.playerMined:fill(0) + self.playerExtracted:fill(0) + self.coExtracted:fill(0) +end + +local allComponents = { + FixedRateRegrow = FixedRateRegrow, + Ore = Ore, + MineBeam = MineBeam, + MiningTracker = MiningTracker, +} + +component_registry.registerAllComponents(allComponents) + +return allComponents diff --git a/meltingpot/lua/levels/coop_mining/init.lua b/meltingpot/lua/levels/coop_mining/init.lua new file mode 100644 index 00000000..dd7b4abc --- /dev/null +++ b/meltingpot/lua/levels/coop_mining/init.lua @@ -0,0 +1,37 @@ +--[[ Copyright 2022 DeepMind Technologies Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +]] + +-- Entry point lua file for the coop_mining substrate. + +local meltingpot = 'meltingpot.lua.modules.' +local api_factory = require(meltingpot .. 'api_factory') +local simulation = require(meltingpot .. 'base_simulation') + +-- Required to be able to use the components in the level +local component_library = require(meltingpot .. 'component_library') +local avatar_library = require(meltingpot .. 'avatar_library') +local components = require 'components' + +return api_factory.apiFactory{ + Simulation = simulation.BaseSimulation, + settings = { + -- Scale each sprite to a square of size `spriteSize` X `spriteSize`. + spriteSize = 8, + -- Terminate the episode after this many frames. + maxEpisodeLengthFrames = 1000, + -- Settings to pass to simulation.lua. + simulation = {}, + } +} diff --git a/meltingpot/lua/levels/daycare/components.lua b/meltingpot/lua/levels/daycare/components.lua new file mode 100644 index 00000000..acbbd142 --- /dev/null +++ b/meltingpot/lua/levels/daycare/components.lua @@ -0,0 +1,741 @@ +--[[ Copyright 2022 DeepMind Technologies Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +]] + +local args = require 'common.args' +local class = require 'common.class' +local helpers = require 'common.helpers' +local set = require 'common.set' +local log = require 'common.log' +local events = require 'system.events' +local random = require 'system.random' +local tensor = require 'system.tensor' + +local meltingpot = 'meltingpot.lua.modules.' +local component = require(meltingpot .. 'component') +local component_registry = require(meltingpot .. 'component_registry') + +local _GRASP_PRIORITY = 140 +local _MOVE_PRIORITY = 150 +local _AVATAR_RESPAWN = 160 +local _APPLE_SPAWN = 200 +local _APPLE_RESPAWN = 180 +local _APPLE_EAT = 190 + +local _DIRECTION = { + N = tensor.Tensor({0, -1}), + E = tensor.Tensor({1, 0}), + S = tensor.Tensor({0, 1}), + F = tensor.Tensor({0, 3}), + W = tensor.Tensor({-1, 0}), +} + +local Role = class.Class(component.Component) + +function Role:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('Role')}, + {'isChild', args.booleanType}, + }) + Role.Base.__init__(self, kwargs) + self._config.isChild = kwargs.isChild +end + +function Role:isChild() + return self._config.isChild +end + +function Role:isParent() + return not self._config.isChild +end + +-- The `Graspable` component enables an object to be grasped by PlayerGrasp. +local Graspable = class.Class(component.Component) + +function Graspable:__init__(kwargs) + kwargs = args.parse(kwargs, {{'name', args.default('Graspable')}}) + Graspable.Base.__init__(self, kwargs) +end + +function Graspable:onHit(hitterObject, hitName) + if string.sub(hitName, 1, #'grasp_') == 'grasp_' then + hitterObject:getComponent('PlayerGrasp'):grasp(self.gameObject) + self._setIsGrasped = true + end +end + +function Graspable:isGrasped() + return self._setIsGrasped +end + +function Graspable:reset() + self._setIsGrasped = false +end + +function Graspable:registerUpdaters(updaterRegistry) + local grasp = function() + self._setIsGrasped = false + + end + + updaterRegistry:registerUpdater{ + updateFn = grasp, + priority = _GRASP_PRIORITY - 1, + } +end + +-- The `PlayerGrasp` component endows an avatar with the ability to grasp an +-- object in the direction they are facing. + +local PlayerGrasp = class.Class(component.Component) + +function PlayerGrasp:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('PlayerGrasp')}, + {'shape', args.stringType}, + {'palette', args.tableType}, + {'canGraspTree', args.booleanType}, + {'graspSuccessProbability', args.ge(0.0), args.le(1.0), args.default(1.0)}, + --parent gets reward if they pick and drop something the child pointed at + {'attentiveParentPseudoreward', args.default(0.0), args.numberType}, + --parent gets reward for anything they pick and drop + {'droppingParentPseudoreward', args.default(0.0), args.numberType}, + --child gets reward for failed graps + {'tryingChildPseudoreward', args.default(0.0), args.numberType}, + --child gets reward for failed graps at banana trees + {'tryingChildBananaPseudoreward', args.default(0.0), args.numberType}, + }) + + PlayerGrasp.Base.__init__(self, kwargs) + self._config.shape = kwargs.shape + self._config.palette = kwargs.palette + self._config.canGraspTree = kwargs.canGraspTree + self._config.graspSuccessProbability = kwargs.graspSuccessProbability + self._config.attentiveParentPseudoreward = kwargs.attentiveParentPseudoreward + self._config.droppingParentPseudoreward = kwargs.droppingParentPseudoreward + self._config.tryingChildPseudoreward = kwargs.tryingChildPseudoreward + self._config.tryingChildBananaPseudoreward = kwargs.tryingChildBananaPseudoreward +end + +function PlayerGrasp:awake() + self._hitName = 'grasp_' .. self:avatarIndex() +end + +function PlayerGrasp:reset() + self._lastGraspAction = false + self._avatar = self.gameObject:getComponent('Avatar') + self._avatar_idx = self._avatar:getIndex() + if self.gameObject:getComponent('Role'):isChild() then + self._avatar_role = 'child' + else + self._avatar_role = 'parent' + end + self._graspedObject = 'empty' +end + +function insertAtIfNotPresent(tbl, insert_at, element) + for _, value in pairs(tbl) do + if value == element then + return + end + end + table.insert(tbl, insert_at, element) +end + +function PlayerGrasp:addHits(worldConfig) + -- Add the grasp beam underneath the overlay layer. + local insert_at = 0 + for index, layer in pairs(worldConfig.renderOrder) do + if layer == 'overlay' then + insert_at = index + break + end + end + insertAtIfNotPresent(worldConfig.renderOrder, insert_at, self._hitName) + worldConfig.hits[self._hitName] = { + layer = self._hitName, + sprite = self._hitName, + } +end + +function PlayerGrasp:addSprites(tileSet) + tileSet:addShape(self._hitName, { + palette = self._config.palette, + text = self._config.shape}) +end + +-- Ungrasping logic: don't allow ungrasping on top of trees +-- Give pseudorewards to parent when ungrasping for the first time. +function PlayerGrasp:unGrasp() + local transform = self._graspedObject:getComponent('Transform') + local position = transform:getPosition() + local maybe_empty_tree = transform:queryPosition('lowerPhysical', position) + local maybe_full_tree = transform:queryPosition('upperPhysical', position) + if maybe_empty_tree == nil and maybe_full_tree == nil then + local fruit = self._graspedObject:getComponent('FruitType') + if fruit.initial_dropper_role == nil then + events:add("ungrasp", "dict", + "player_index", self._avatar_idx, + "fruit_type", fruit.fruitType, + "height", fruit.originalHeight, + "player_role", self._avatar_role, + "child_attempted", tostring(fruit.child_attempted_grasp)) + fruit.initial_dropper_role = self._avatar_role + if self._avatar_role == 'parent' then + self._avatar:addReward( + self._config.droppingParentPseudoreward) + end + if self._avatar_role == 'parent' and fruit.child_attempted_grasp then + self._avatar:addReward( + self._config.attentiveParentPseudoreward) + end + end + self.gameObject:connect(self._graspedObject) + self.gameObject:disconnect() + self._graspedObject = 'empty' + end +end + +function PlayerGrasp:registerUpdaters(updaterRegistry) + local grasp = function() + local playerVolatileVariables = self._avatar:getVolatileData() + local graspAction = playerVolatileVariables.actions['grasp'] == 1 + + -- If the player is holding an object continue holding it until the player + -- tries grasping again. + -- If the player is not holding an object, attempt to grasp when the grasp + -- action is sent. + local isHoldingObject = self._graspedObject ~= 'empty' + local changeGrasp = graspAction and not self._lastGraspAction + + if isHoldingObject and changeGrasp then + self:unGrasp() + end + + if isHoldingObject ~= changeGrasp then + self.gameObject:hitBeam(self._hitName, 1, 0) + end + + self._lastGraspAction = graspAction + end + + local rotate_grasped = function() + -- Only rotate the grasped object if there is one connected. + if self._graspedObject == 'empty' then + return + end + + -- Only rotate the grasped object if the player rotates. + local playerVolatileVariables = self._avatar:getVolatileData() + local turn_action = playerVolatileVariables.actions['turn'] + if turn_action == 0 then + return + end + + -- Determine the direction the player will be facing on the next turn. + local _COMPASS = {N = 1, E = 2, S = 3, W = 4} + local _COMPASSKEYS = {'N', 'E', 'S', 'W'} + local function rotate(facing, turn) + return _COMPASSKEYS[(_COMPASS[facing] - 1 + turn + 4) % 4 + 1] + end + + local playerDir = rotate(self.gameObject:getOrientation(), turn_action) + local objectDir = rotate(self._graspedObject:getOrientation(), turn_action) + + -- Teleport the object to the location the player will be facing. + local offsetPosition = tensor.Tensor(self.gameObject:getPosition()):cadd( + _DIRECTION[playerDir]):val() + self._graspedObject:disconnect() + self._graspedObject:teleport(offsetPosition, objectDir) + self.gameObject:connect(self._graspedObject) + end + + updaterRegistry:registerUpdater{ + updateFn = grasp, + priority = _GRASP_PRIORITY, + } + updaterRegistry:registerUpdater{ + updateFn = rotate_grasped, + priority = _MOVE_PRIORITY - 1, + } +end + +function PlayerGrasp:avatarIndex() + return self.gameObject:getComponent('Avatar'):getIndex() +end + +function PlayerGrasp:failedGrasp(fruit) + events:add("failed_grasp", "dict", + "height", fruit.originalHeight, + "fruit_type", fruit.fruitType, + "player_index", self.gameObject:getComponent('Avatar'):getIndex(), + "player_role", self._avatar_role) + fruit.child_attempted_grasp = true + self.gameObject:getComponent('Avatar'):addReward(self._config.tryingChildPseudoreward) + if fruit:isBanana() then + self.gameObject:getComponent('Avatar') + :addReward(self._config.tryingChildBananaPseudoreward) + end + end + +function PlayerGrasp:grasp(gameObject) + if self._graspedObject ~= 'empty' then + return self._graspedObject + end + local graspProbability = self._config.graspSuccessProbability + local failedGrasp = random:uniformReal(0, 1) > graspProbability + local fruit = gameObject:getComponent('FruitType') + if fruit:isShrub() and failedGrasp then + self:failedGrasp(fruit) + return nil + end + local canNotGraspTree = not self._config.canGraspTree + + if fruit:isTree() and canNotGraspTree then + self:failedGrasp(fruit) + return nil + end + local picked = fruit:pickFruitFromTree() + if picked then + fruit.initial_picker_role = self._avatar_role + events:add("success_grasp", "dict", + "height", fruit.originalHeight, + "fruit_type", fruit.fruitType, + "initial_picker_role", fruit.initial_picker_role, + "player_index", self.gameObject:getComponent('Avatar'):getIndex(), + "player_role", self._avatar_role) + end + -- disconnect from everyone else currently holding the same object + local players = self.gameObject.simulation:getGameObjectsByName("avatar") + for i, player in pairs(players) do + local go = player:getComponent('PlayerGrasp')._graspedObject + if go == gameObject then + player:disconnect() + player:getComponent('PlayerGrasp')._graspedObject = 'empty' + end + end + self._graspedObject = gameObject + self.gameObject:connect(self._graspedObject) + return self._graspedObject +end + +--[[ `TreeType` makes a tree probabilistically yield +either apples or bananas. +]] +local TreeType = class.Class(component.Component) + +function TreeType:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('TreeType')}, + {'probabilities', args.tableType}, + }) + TreeType.Base.__init__(self, kwargs) + + self._config.probabilities = kwargs.probabilities + -- Normalize tree spawn probabilities to one. + self:normalizeTreeTypeProbabilitiesSumToOne() +end + +function TreeType:normalizeTreeTypeProbabilitiesSumToOne() + local sum = (self._config.probabilities['empty'] + + self._config.probabilities['appleTree'] + + self._config.probabilities['bananaTree'] + + self._config.probabilities['appleShrub'] + + self._config.probabilities['bananaShrub']) + self._config.probabilities['empty'] = self._config.probabilities['empty'] / sum + self._config.probabilities['appleTree'] = self._config.probabilities['appleTree'] / sum + self._config.probabilities['bananaTree'] = self._config.probabilities['bananaTree'] / sum + self._config.probabilities['appleShrub'] = self._config.probabilities['appleShrub'] / sum + self._config.probabilities['bananaShrub'] = self._config.probabilities['bananaShrub'] / sum +end + +function TreeType:_weighted_random_choice(list) + local choice = random:uniformReal(0, 1) + for idx=1, #list do + choice = choice - list[idx][1] + if choice <= 0.0 then + return list[idx][2] + end + end + assert(false, 'Weighted probabilities must sum to 1.') +end + +function TreeType:spawn() + local states = {'empty', 'appleTree', + 'bananaTree', 'appleShrub', 'bananaShrub'} + local weighted_states_list = {} + for idx=1, #states do + proba = self._config.probabilities[states[idx]] + table.insert(weighted_states_list, {proba, states[idx]}) + end + state = TreeType:_weighted_random_choice(weighted_states_list) + if state ~= 'empty' then + self.gameObject:setState(state) + end +end + +function TreeType:start() + self:spawn() +end + + +--[[ `FruitType` keeps information about a fruit. +]] +local FruitType = class.Class(component.Component) + +function FruitType:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('FruitType')}, + {'framesTillAppleRespawn', args.positive}, + }) + FruitType.Base.__init__(self, kwargs) + self._config.framesTillAppleRespawn = kwargs.framesTillAppleRespawn +end + +function FruitType:saveInitialPosition(position) + self._position = position + -- the lua idx of the player who first picked the apple +end + +--save information about who attempts to pick the tree +function FruitType:initializePickingInfo() + --Who picked the fruit first: nil, parent or child + self.initial_picker_role = nil + --Who picked and dropped the fruit first: nil, parent or child + self.initial_dropper_role = nil + --true when child tries and fails to grasp + self.child_attempted_grasp = false +end + +function FruitType:getInitialPosition() + return self._position +end + +function FruitType:setInfoFromTreeState(treeTypeState) + fruits = {'apple', 'banana'} + heights = {'Tree', 'Shrub'} + for fidx=1, #fruits do + for hidx=1, #heights do + state = fruits[fidx] .. heights[hidx] + if treeTypeState == state then + self.liveState = fruits[fidx] .. "In" .. heights[hidx] + self.gameObject:setState(self.liveState) + self.originalHeight = heights[hidx] + self.fruitType = fruits[fidx] + return + end + end + end +end + + +function FruitType:registerUpdaters(updaterRegistry) + local spawn = function() + local transform = self.gameObject:getComponent('Transform') + local position = transform:getPosition() + local potentialTree = transform:queryPosition('lowerPhysical', position) + if potentialTree ~= nil and self.gameObject:getState() == 'fruitWait' then + local treeTypeState = potentialTree:getState() + self:setInfoFromTreeState(treeTypeState) + self:saveInitialPosition(position) + self:initializePickingInfo() + end + + end + + local respawn = function() + self.gameObject:setState(self.liveState) + position = self:getInitialPosition() + self.gameObject:teleport({position[1], position[2]}, "N") + self:initializePickingInfo() + end + + updaterRegistry:registerUpdater{ + updateFn = spawn, + priority = _APPLE_SPAWN, + state = "fruitWait", + } + updaterRegistry:registerUpdater{ + updateFn = respawn, + priority = _APPLE_RESPAWN, + state = "fruitEaten", + startFrame = self._config.framesTillAppleRespawn + } +end + +function FruitType:isTree() + return string.sub(self.gameObject:getState(), -#'Tree', -1) == 'Tree' +end + +function FruitType:isShrub() + return string.sub(self.gameObject:getState(), -#'Shrub', -1) == 'Shrub' +end + +function FruitType:isPicked() + return string.sub(self.gameObject:getState(), -#'Picked', -1) == 'Picked' +end + +function FruitType:isApple() + return string.sub(self.gameObject:getState(), 1, #'apple') == 'apple' +end + +function FruitType:isBanana() + return string.sub(self.gameObject:getState(), 1, #'banana') == 'banana' +end + + +function FruitType:pickFruitFromTree() + local state = self.gameObject:getState() + if state == "appleInTree" or state == "appleInShrub" then + self.gameObject:setState('applePicked') + return true + elseif state == "bananaInTree" or state == "bananaInShrub" then + self.gameObject:setState('bananaPicked') + return true + end + -- fruit was already picked + return false +end + + +--[[ `Eating` endows avatars with the ability to eat items and get rewards and +additionally fulfill hunger for children. +]] +local Eating = class.Class(component.Component) + +function Eating:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('Eating')}, + -- Eating rewards. + {'bananaReward', args.numberType}, + {'appleReward', args.numberType}, + }) + Eating.Base.__init__(self, kwargs) + self._config.bananaReward = kwargs.bananaReward + self._config.appleReward = kwargs.appleReward +end + +function Eating:isChildPresent() + local players = self.gameObject.simulation:getGameObjectsByName("avatar") + for i, player in pairs(players) do + if player:getComponent("Role"):isChild() and + player:getState() ~= 'playerWait' then + return true + end + end + return false +end + +function Eating:calculateReward(isBanana, isChild) + -- parent only receives reward if child is present + if isChild or self:isChildPresent() then + if isBanana then + return self._config.bananaReward + else + return self._config.appleReward + end + end + return 0 +end + + +function Eating:registerUpdaters(updaterRegistry) + local avatar = self.gameObject:getComponent('Avatar') + local grasp = self.gameObject:getComponent('PlayerGrasp') + local eat = function() + local playerVolatileVariables = ( + self.gameObject:getComponent('Avatar'):getVolatileData()) + local actions = playerVolatileVariables.actions + if actions['eat'] == 1 and grasp._graspedObject ~= 'empty' then + grasp._graspedObject:setState("fruitEaten") + local fruit = grasp._graspedObject:getComponent('FruitType') + local isBanana = fruit:isBanana() + local isChild = self.gameObject:getComponent("Role"):isChild() + local reward = self:calculateReward(isBanana, isChild) + events:add('fruit_eaten', 'dict', + 'height',fruit.originalHeight, + 'fruit_type', fruit.fruitType, + 'player_index', avatar:getIndex(), + "player_role", grasp._avatar_role, + 'eater_is_child', tostring(isChild), + 'initial_picker_role', fruit.initial_picker_role, + 'initial_dropper_role', tostring(fruit.initial_dropper_role), + 'child_attempted', tostring(fruit.child_attempted_grasp)) + avatar:addReward(reward) + if isBanana and isChild then + self.gameObject:getComponent('Hunger'):resetDriveLevel() + end + self.gameObject:disconnect() + self.gameObject:getComponent('PlayerGrasp')._graspedObject = 'empty' + end +end + + updaterRegistry:registerUpdater{ + updateFn = eat, + priority = _APPLE_EAT, + } +end + +--[[ The `Hunger` component keeps track of the child's hugner level. +]] +local Hunger = class.Class(component.Component) +function Hunger:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('Hunger')}, + {'framesTillHungry', args.numberType}, + }) + Hunger.Base.__init__(self, kwargs) + self._config.framesTillHungry = kwargs.framesTillHungry +end + +function Hunger:reset() + self._hungerTimer = self._config.framesTillHungry +end + +-- Call this function to reset the countdown, i.e., to "satisfy" the need. +function Hunger:resetDriveLevel() + self._hungerTimer = self._config.framesTillHungry + events:add("player_hunger_reset", "dict", "hunger_timer", self._hungerTimer, + "player_index", self.gameObject:getComponent('Avatar'):getIndex()) +end + +function Hunger:update() + local isChild = self.gameObject:getComponent("Role"):isChild() + if not isChild then + return + end + if self.gameObject:getState() ~= 'playerWait' then + self._hungerTimer = self._hungerTimer - 1 + if self._hungerTimer == 0 then + local graspedObject = self.gameObject:getComponent("PlayerGrasp")._graspedObject + -- mark grasped fruit as eaten so it can respawn if child is in wait state + if graspedObject ~= 'empty' then + self.gameObject:connect(graspedObject) + self.gameObject:disconnect() + graspedObject:setState('fruitEaten') + self.gameObject:getComponent("PlayerGrasp")._graspedObject = 'empty' + self.gameObject:getComponent("PlayerGrasp")._lastGraspAction = false + end + self.gameObject:setState('playerWait') + events:add("player_wait", "dict", "hunger_timer", self._hungerTimer, + "player_index", self.gameObject:getComponent('Avatar'):getIndex()) + end + end +end + +function Hunger:getNeed() + local isChild = self.gameObject:getComponent("Role"):isChild() + if not isChild then + return 1 + end + local normalizedHunger = self._hungerTimer / self._config.framesTillHungry + if self.gameObject:getState() ~= 'playerWait' then + return normalizedHunger + else + return 0 + end +end + +local HungerObserver = class.Class(component.Component) + +function HungerObserver:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('HungerObserver')}, + {'needComponent', args.default('Hunger'), args.stringType}, + }) + HungerObserver.Base.__init__(self, kwargs) + self._config.needComponent = kwargs.needComponent +end + +function HungerObserver:addObservations(tileSet, world, observations) + local playerIndex = self.gameObject:getComponent('Avatar'):getIndex() + local needComponent = self.gameObject:getComponent(self._config.needComponent) + observations[#observations + 1] = { + name = tostring(playerIndex) .. '.HUNGER', + type = 'Doubles', + shape = {}, + func = function(grid) + return needComponent:getNeed() + end + } +end + +local AvatarRespawn = class.Class(component.Component) + +function AvatarRespawn:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('AvatarRespawn')}, + {'framesTillRespawn', args.positive}, + }) + AvatarRespawn.Base.__init__(self, kwargs) + self._config.framesTillRespawn = kwargs.framesTillRespawn +end + +function AvatarRespawn:registerUpdaters(updaterRegistry) + local avatar = self.gameObject:getComponent('Avatar') + local aliveState = avatar:getAliveState() + local waitState = avatar:getWaitState() + local respawn = function() + -- find parent position + local transform = nil + local players = self.gameObject.simulation:getGameObjectsByName("avatar") + for i, player in pairs(players) do + if not player:getComponent("Role"):isChild() then + transform = player:getComponent('Transform') + end + end + assert(transform ~= nil, "The parent avatar was not found!") + + -- try to respawn on nearby ground + local nearby = transform:queryDisc('background', 1) + assert(#nearby > 0, "There is no nearby background to respawn on!") + + local nearby_object = nearby[1] + local pos = nearby_object:getComponent("Transform"):getPosition() + local teleport_position = {pos[1], pos[2]} + self.gameObject:setState(aliveState) + self.gameObject:teleport(teleport_position, 'N') + + self.gameObject:getComponent('Hunger'):reset() + events:add("player_respawned", "dict", "hunger_timer", + "player_index", avatar:getIndex()) + + end + + updaterRegistry:registerUpdater{ + updateFn = respawn, + priority = _AVATAR_RESPAWN, + state = waitState, + startFrame = self._config.framesTillRespawn + } +end + +local allComponents = { + -- Avatar components. + Role = Role, + PlayerGrasp = PlayerGrasp, + Eating = Eating, + Hunger = Hunger, + HungerObserver = HungerObserver, + AvatarRespawn = AvatarRespawn, + -- Object components. + Graspable = Graspable, + -- Fruit tree components. + FruitType = FruitType, + TreeType = TreeType, +} + +component_registry.registerAllComponents(allComponents) + +return allComponents diff --git a/meltingpot/lua/levels/daycare/init.lua b/meltingpot/lua/levels/daycare/init.lua new file mode 100644 index 00000000..5368cd04 --- /dev/null +++ b/meltingpot/lua/levels/daycare/init.lua @@ -0,0 +1,37 @@ +--[[ Copyright 2022 DeepMind Technologies Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +]] + +-- Entry point lua file for daycare. + +local meltingpot = 'meltingpot.lua.modules.' +local api_factory = require(meltingpot .. 'api_factory') +local simulation = require(meltingpot .. 'base_simulation') + +-- Required to be able to use the components in the substrate +local component_library = require(meltingpot .. 'component_library') +local avatar_library = require(meltingpot .. 'avatar_library') +local components = require 'components' + +return api_factory.apiFactory{ + Simulation = simulation.BaseSimulation, + settings = { + -- Scale each sprite to a square of size `spriteSize` X `spriteSize`. + spriteSize = 8, + -- Terminate the episode after this many frames. + maxEpisodeLengthFrames = 1000, + -- Settings to pass to simulation.lua. + simulation = {}, + } +} diff --git a/meltingpot/lua/levels/externality_mushrooms/components.lua b/meltingpot/lua/levels/externality_mushrooms/components.lua new file mode 100644 index 00000000..1164b749 --- /dev/null +++ b/meltingpot/lua/levels/externality_mushrooms/components.lua @@ -0,0 +1,387 @@ +--[[ Copyright 2022 DeepMind Technologies Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +]] + +local args = require 'common.args' +local class = require 'common.class' +local helpers = require 'common.helpers' +local log = require 'common.log' +local random = require 'system.random' +local tensor = require 'system.tensor' +local set = require 'common.set' +local events = require 'system.events' + +local meltingpot = 'meltingpot.lua.modules.' +local component = require(meltingpot .. 'component') +local component_registry = require(meltingpot .. 'component_registry') + + +local MushroomEating = class.Class(component.Component) + +function MushroomEating:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('MushroomEating')}, + {'liveStates', args.tableType}, + {'waitState', args.default('wait'), args.stringType}, + {'totalReward', args.tableType}, + {'numSporesReleasedWhenEaten', args.tableType}, + {'digestionTimes', args.tableType}, + {'destroyOnEating', args.tableType}, + }) + MushroomEating.Base.__init__(self, kwargs) + self._totalReward = kwargs.totalReward + self._numSporesReleasedWhenEaten = kwargs.numSporesReleasedWhenEaten + self._waitState = kwargs.waitState + self._liveStates = kwargs.liveStates + self._liveStatesSet = set.Set(self._liveStates) + self._waitState = kwargs.waitState + self._digestionTimes = kwargs.digestionTimes + self._destroyOnEating = kwargs.destroyOnEating +end + +function MushroomEating:_getOthers(eatingPlayerIndex) + local numPlayers = self.gameObject.simulation:getNumPlayers() + local others = {} + for idx = 1, numPlayers do + if idx ~= eatingPlayerIndex then + local avatarObject = self.gameObject.simulation:getAvatarFromIndex(idx) + table.insert(others, avatarObject) + end + end + return others +end + +function MushroomEating:_rewardEveryone(playerIndex) + local avatarWhoAteThis = self.gameObject.simulation:getAvatarFromIndex( + playerIndex):getComponent('Avatar') + local otherAvatars = self:_getOthers(playerIndex) + local numPlayers = self.gameObject.simulation:getNumPlayers() + + local mushroomType = self.gameObject:getState() + events:add('eating_mushroom', 'dict', + 'player_index', playerIndex, + 'mushroom_type', mushroomType) + if mushroomType == 'fullInternalityZeroExternality' then + -- Only reward self. + avatarWhoAteThis:addReward(self._totalReward[mushroomType]) + elseif mushroomType == 'halfInternalityHalfExternality' then + local partialReward = self._totalReward[mushroomType] / numPlayers + -- Reward self. + avatarWhoAteThis:addReward(partialReward) + -- Reward others. + for _, avatarObject in ipairs(otherAvatars) do + local avatarComponent = avatarObject:getComponent('Avatar') + avatarComponent:addReward(partialReward) + end + elseif mushroomType == 'zeroInternalityFullExternality' then + -- Only reward others. + local partialReward = self._totalReward[mushroomType] / (numPlayers - 1) + for _, avatarObject in ipairs(otherAvatars) do + local avatarComponent = avatarObject:getComponent('Avatar') + avatarComponent:addReward(partialReward) + end + elseif mushroomType == 'negativeInternalityNegativeExternality' then + -- Reward (or punish) everyone. + local partialReward = self._totalReward[mushroomType] / numPlayers + avatarWhoAteThis:addReward(partialReward) + for _, avatarObject in ipairs(otherAvatars) do + local avatarComponent = avatarObject:getComponent('Avatar') + avatarComponent:addReward(partialReward) + end + else + assert(false, 'Unrecognized mushroom type: ' .. mushroomType) + end +end + +function MushroomEating:onEnter(enteringGameObject, contactName) + if contactName == 'avatar' then + if self._liveStatesSet[self.gameObject:getState()] then + local avatarComponent = enteringGameObject:getComponent('Avatar') + local playerIndex = avatarComponent:getIndex() + self:_rewardEveryone(playerIndex) + -- Another mushroom grows. + local mushroomRegrowth = self.gameObject.simulation:getSceneObject( + ):getComponent('MushroomRegrowth') + local mushroomType = self.gameObject:getState() + for n = 1, self._numSporesReleasedWhenEaten[mushroomType] do + mushroomRegrowth:grow(self.gameObject:getState()) + end + -- Some mushrooms destroy other mushrooms when eaten. + if rawget(self._destroyOnEating, mushroomType) then + local mushroomTypeToDestroy = self._destroyOnEating[ + mushroomType].typeToDestroy + local percentToDestroy = self._destroyOnEating[ + mushroomType].percentToDestroy + mushroomRegrowth:destroyRandomMushrooms(mushroomTypeToDestroy, + percentToDestroy) + end + -- Freeze the avatar who ate the mushroom while it digests. + local timeToDigest = self._digestionTimes[self.gameObject:getState()] + avatarComponent:disallowMovementUntil(timeToDigest) + -- Change this object to its wait (disabled) state. + self.gameObject:setState(self._waitState) + -- Set the cumulant tracking that this mushroom was eaten. + self:setCumulants(enteringGameObject) + end + end +end + +function MushroomEating:setCumulants(enteringGameObject) + local cumulantsComponent = enteringGameObject:getComponent('Cumulants') + local mushroomType = self.gameObject:getState() + if mushroomType == 'fullInternalityZeroExternality' then + cumulantsComponent.ate_mushroom_fize = 1 + elseif mushroomType == 'halfInternalityHalfExternality' then + cumulantsComponent.ate_mushroom_hihe = 1 + elseif mushroomType == 'zeroInternalityFullExternality' then + cumulantsComponent.ate_mushroom_zife = 1 + elseif mushroomType == 'negativeInternalityNegativeExternality' then + cumulantsComponent.ate_mushroom_nine = 1 + end +end + + +local MushroomGrowable = class.Class(component.Component) + +function MushroomGrowable:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('MushroomGrowable')}, + }) + MushroomGrowable.Base.__init__(self, kwargs) +end + +function MushroomGrowable:registerUpdaters(updaterRegistry) + local regrowth = self.gameObject.simulation:getSceneObject():getComponent( + 'MushroomRegrowth') + + local registration = function() + if self._mustRegister then + regrowth:registerPotentialMushroom(self.gameObject:getPiece()) + elseif self._mustDeregister then + regrowth:deregisterPotentialMushroom(self.gameObject:getPiece()) + end + self._mustRegister = false + self._mustDeregister = false + end + + updaterRegistry:registerUpdater{ + updateFn = registration, + priority = 500, + } +end + +function MushroomGrowable:onStateChange(oldState) + local regrowth = self.gameObject.simulation:getSceneObject():getComponent( + 'MushroomRegrowth') + local newState = self.gameObject:getState() + if newState == 'wait' then + self._mustRegister = true + else + self._mustDeregister = true + end +end + + +local MushroomRegrowth = class.Class(component.Component) + +function MushroomRegrowth:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('MushroomRegrowth')}, + {'mushroomsToProbabilities', args.tableType}, + {'minPotentialMushrooms', args.default(10), args.numberType}, + }) + MushroomRegrowth.Base.__init__(self, kwargs) + self.mushroomsToProbabilities = kwargs.mushroomsToProbabilities + self._minPotentialMushrooms = kwargs.minPotentialMushrooms + + self._potentialMushrooms = {} +end + +function MushroomRegrowth:reset() + self._potentialMushrooms = set.Set{} + self._numPotentialMushrooms = 0 +end + +function MushroomRegrowth:grow(eatenMushroom) + local growthProbabilities = self.mushroomsToProbabilities[eatenMushroom] + for mushroom, probability in pairs(growthProbabilities) do + if self._numPotentialMushrooms >= self._minPotentialMushrooms then + if random:uniformReal(0, 1) < probability then + local piece = random:choice(set.toSortedList(self._potentialMushrooms)) + if piece then + local object = self.gameObject.simulation:getGameObjectFromPiece( + piece) + local avatarOrNil = object:getComponent('Transform'):queryPosition( + 'upperPhysical') + if not avatarOrNil then + -- Do not spawn mushrooms where avatars are currently standing. + object:setState(mushroom) + end + end + end + end + end +end + +function MushroomRegrowth:destroyRandomMushrooms(mushroomType, percentToDestroy) + local simulation = self.gameObject.simulation + local shroomObjects = simulation:getGroupShuffledWithProbability( + mushroomType, percentToDestroy) + for _, object in ipairs(shroomObjects) do + object:setState('wait') + end +end + +function MushroomRegrowth:registerPotentialMushroom(mushroomPiece) + self._potentialMushrooms[mushroomPiece] = true + self._numPotentialMushrooms = self._numPotentialMushrooms + 1 +end + +function MushroomRegrowth:deregisterPotentialMushroom(mushroomPiece) + self._potentialMushrooms[mushroomPiece] = nil + self._numPotentialMushrooms = self._numPotentialMushrooms - 1 +end + + +local Destroyable = class.Class(component.Component) + +function Destroyable:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('Destroyable')}, + {'initialHealth', args.positive}, + {'waitState', args.stringType}, + }) + Destroyable.Base.__init__(self, kwargs) + self._config.initialHealth = kwargs.initialHealth + self._config.waitState = kwargs.waitState +end + +function Destroyable:reset() + self._variables = {} + self._variables.health = self._config.initialHealth +end + +function Destroyable:onHit(hitterGameObject, hitName) + if hitName == 'zapHit' then + self._variables.health = self._variables.health - 1 + if self._variables.health == 0 then + -- Reset the health state variable. + self._variables.health = self._config.initialHealth + -- Remove the resource from the map. + self.gameObject:setState(self._config.waitState) + -- Set the cumulant tracking that this mushroom was eaten. + self:setCumulants(hitterGameObject) + -- Beams pass through a destroyed destroyable. + return false + end + -- Beams do not pass through after hitting an undestroyed destroyable. + return true + end +end + +function Destroyable:setCumulants(hitterGameObject) + local cumulantsComponent = hitterGameObject:getComponent('Cumulants') + local mushroomType = self.gameObject:getState() + if mushroomType == 'fullInternalityZeroExternality' then + cumulantsComponent.destroyed_mushroom_fize = 1 + elseif mushroomType == 'halfInternalityHalfExternality' then + cumulantsComponent.destroyed_mushroom_hihe = 1 + elseif mushroomType == 'zeroInternalityFullExternality' then + cumulantsComponent.destroyed_mushroom_zife = 1 + elseif mushroomType == 'negativeInternalityNegativeExternality' then + cumulantsComponent.destroyed_mushroom_nine = 1 + end +end + + +local Perishable = class.Class(component.Component) + +function Perishable:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('Perishable')}, + {'waitState', args.stringType}, + {'delayPerState', args.tableType}, + }) + Perishable.Base.__init__(self, kwargs) + self._config.waitState = kwargs.waitState + self._delayPerState = kwargs.delayPerState +end + +function Perishable:registerUpdaters(updaterRegistry) + local perish = function() + self.gameObject:setState(self._config.waitState) + end + + for state, delay in pairs(self._delayPerState) do + updaterRegistry:registerUpdater{ + updateFn = perish, + priority = 3, + startFrame = delay, + state = state, + } + end +end + + +local Cumulants = class.Class(component.Component) + +function Cumulants:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('Cumulants')}, + }) + Cumulants.Base.__init__(self, kwargs) +end + +function Cumulants:reset() + self:_resetBinaryCumulants() +end + +function Cumulants:_resetBinaryCumulants() + self.ate_mushroom_fize = 0 + self.ate_mushroom_hihe = 0 + self.ate_mushroom_zife = 0 + self.ate_mushroom_nine = 0 + self.destroyed_mushroom_fize = 0 + self.destroyed_mushroom_hihe = 0 + self.destroyed_mushroom_zife = 0 + self.destroyed_mushroom_nine = 0 +end + +function Cumulants:registerUpdaters(updaterRegistry) + local resetCumuluants = function() + self:_resetBinaryCumulants() + end + + updaterRegistry:registerUpdater{ + updateFn = resetCumuluants, + priority = 900, + } +end + + +local allComponents = { + -- Game object components. + MushroomEating = MushroomEating, + MushroomGrowable = MushroomGrowable, + Destroyable = Destroyable, + Perishable = Perishable, + -- Avatar components. + Cumulants = Cumulants, + -- Scene components. + MushroomRegrowth = MushroomRegrowth, +} + +component_registry.registerAllComponents(allComponents) + +return allComponents diff --git a/meltingpot/lua/levels/externality_mushrooms/init.lua b/meltingpot/lua/levels/externality_mushrooms/init.lua new file mode 100644 index 00000000..c67ba390 --- /dev/null +++ b/meltingpot/lua/levels/externality_mushrooms/init.lua @@ -0,0 +1,37 @@ +--[[ Copyright 2022 DeepMind Technologies Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +]] + +-- Entry point lua file for this substrate. + +local meltingpot = 'meltingpot.lua.modules.' +local api_factory = require(meltingpot .. 'api_factory') +local simulation = require(meltingpot .. 'base_simulation') + +-- Required to be able to use the components in the level +local component_library = require(meltingpot .. 'component_library') +local avatar_library = require(meltingpot .. 'avatar_library') +local components = require 'components' + +return api_factory.apiFactory{ + Simulation = simulation.BaseSimulation, + settings = { + -- Scale each sprite to a square of size `spriteSize` X `spriteSize`. + spriteSize = 8, + -- Terminate the episode after this many frames. + maxEpisodeLengthFrames = 1000, + -- Settings to pass to simulation.lua. + simulation = {}, + } +} diff --git a/meltingpot/lua/levels/factory_of_the_commons/components.lua b/meltingpot/lua/levels/factory_of_the_commons/components.lua new file mode 100644 index 00000000..871c6e29 --- /dev/null +++ b/meltingpot/lua/levels/factory_of_the_commons/components.lua @@ -0,0 +1,838 @@ +--[[ Copyright 2022 DeepMind Technologies Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +]] + +local args = require 'common.args' +local class = require 'common.class' +local helpers = require 'common.helpers' +local set = require 'common.set' +local log = require 'common.log' +local events = require 'system.events' +local random = require 'system.random' +local tensor = require 'system.tensor' + +local meltingpot = 'meltingpot.lua.modules.' +local component = require(meltingpot .. 'component') +local component_registry = require(meltingpot .. 'component_registry') + + +-- The 'Receivable' component enables an object to be placed on a receiver. +-- Sets Receiver as having needed objects. +local Receivable = class.Class(component.Component) + +function Receivable:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('Receivable')}, + {'waitState', args.stringType}, + {'liveState', args.stringType}, + }) + Receivable.Base.__init__(self, kwargs) + self._config.waitState = kwargs.waitState + self._config.liveState = kwargs.liveState + self._isDropped = false +end + +function Receivable:_checkIfOverReceiver() + local underlyingObject = self.gameObject:getComponent('Transform'):queryDisc( + 'lowestPhysical', 0) + + local isOverReceiver = false + for _, object in ipairs(underlyingObject) do + if object:hasComponent('Receiver') then + if object:getComponent('HopperMouth'):isOpen() == true then + isOverReceiver = true + end + end + end + return isOverReceiver +end + +function Receivable:_setReceiver() + local underlyingObject = self.gameObject:getComponent('Transform'):queryDisc( + 'lowestPhysical', 0) + local hopper = self.gameObject:getComponent('Transform'):queryDisc( + 'upperPhysical', 1) + local isOverReceiver = false + for _, receiver in ipairs(underlyingObject) do + if receiver:hasComponent('Receiver') then + if receiver:getComponent('HopperMouth'):isOpen() == true then + for _, hopperIndicator in ipairs(hopper) do + if hopperIndicator:hasComponent('ReceiverIndicator') then + local oneCube = receiver:getComponent('Receiver'):hasOneOfTwoCubes() + local tokenType = self.gameObject:getComponent('Token'):getType() + local indicatorType = hopperIndicator:getComponent( + 'ReceiverIndicator'):getType() + if tokenType == 'BlueCube' and + indicatorType == 'TwoBlocks' and + oneCube ~= true then + receiver:getComponent('Receiver'):setHasOneOfTwoCubes(true) + elseif tokenType == 'BlueCube' and + indicatorType == 'TwoBlocks' and + oneCube == true or + tokenType == indicatorType then + receiver:getComponent('Receiver'):setHasOneOfTwoCubes(false) + receiver:getComponent('Receiver'):setHasNeededObjects(true) + local test = receiver:getComponent('Receiver'):hasNeededObjects() + end + end + end + end + end + end + return isOverReceiver +end + +function Receivable:_isGrasped() + return self.gameObject:getComponent('Graspable'):isGrasped() +end + +function Receivable:_beReceived() + self.gameObject:getComponent('ReceiverDropAnimation'):startFall() +end + +function Receivable:isDropping() + return self._config.isDropping +end + +function Receivable:setIsDropping(x) + if self._config.isDropping and not x then + local avatarIndex = self.gameObject:getComponent( + 'Graspable'):getWhoIsGrasping() + if avatarIndex then + local avatarGrasp = self.gameObject.simulation:getAvatarFromIndex( + avatarIndex):getComponent('AvatarGrasp') + avatarGrasp:drop() + end + self.gameObject:setState(self._config.waitState) + self._isDropped = true + end + self._config.isDropping = x +end + +function Receivable:registerUpdaters(updaterRegistry) + local grasp = function() + if not self._isDropped then + if not self._config.isDropping then + if self:_checkIfOverReceiver() then + if not self:_isGrasped() then + self:_setReceiver() + self:_beReceived() + end + end + end + end + if self._config.liveState == self.gameObject:getState() then + self._isDropped = false + end + end + updaterRegistry:registerUpdater{ + updateFn = grasp, + priority = self.gameObject:getComponent('Graspable'):priority() + 1, + } +end + + +--[[ A receiver component which accepts items from the avatar and gives the +avatar a reward (or all avatars if globalReward=True). +]] +local Receiver = class.Class(component.Component) + +function Receiver:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('Receiver')}, + }) + Receiver.Base.__init__(self, kwargs) + self._kwargs = kwargs +end + +function Receiver:hasNeededObjects() + return self._config.hasNeededObjects +end + +function Receiver:setHasNeededObjects(x) + self._config.hasNeededObjects = x + if self._config.hasNeededObjects == true then + self.gameObject:setState(self._config.waitState) + end + return self._config.hasNeededObjects +end + +function Receiver:hasOneOfTwoCubes() + return self._config.hasOneOfTwoCubes +end + +function Receiver:setHasOneOfTwoCubes(x) + self._config.hasOneOfTwoCubes = x + if self._config.hasOneOfTwoCubes == true then + self.gameObject:setState(self._config.waitState) + end + return self._config.hasOneOfTwoCubes +end + +local Token = class.Class(component.Component) + +function Token:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('Token')}, + {'type', args.stringType}, + }) + Token.Base.__init__(self, kwargs) + self._config.type = kwargs.type +end + +function Token:getType() + return self._config.type +end + + +-- Animation for dropping object into receiver hopper. +local ReceiverDropAnimation = class.Class(component.Component) + +function ReceiverDropAnimation:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('ReceiverDropAnimation')}, + {'framesToDrop', args.default(2), args.positive}, + {'dropOne', args.stringType}, + {'dropTwo', args.stringType}, + }) + ReceiverDropAnimation.Base.__init__(self, kwargs) + self._config.framesToDrop = kwargs.framesToDrop + self._config.dropOne = kwargs.dropOne + self._config.dropTwo = kwargs.dropTwo +end + +function ReceiverDropAnimation:reset() + self._counter = 0 +end + +function ReceiverDropAnimation:startFall() + self._counter = self._config.framesToDrop + self.gameObject:getComponent('Receivable'):setIsDropping(true) +end + +function ReceiverDropAnimation:update() + if self._counter == 2 then + self.gameObject:setState(self._config.dropOne) + end + if self._counter == 1 then + self.gameObject:setState(self._config.dropTwo) + end + if self._counter == 0 then + self.gameObject:getComponent('Receivable'):setIsDropping(false) + end + self._counter = self._counter - 1 +end + + +local ConveyerBeltOnAnimation = class.Class(component.Component) + +function ConveyerBeltOnAnimation:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('ConveyerBeltOnAnimation')}, + {'framesToDispenseObject', args.default(6), args.positive}, + {'waitState', args.stringType}, + {'stateOne', args.stringType}, + {'stateTwo', args.stringType}, + {'stateThree', args.stringType}, + }) + ConveyerBeltOnAnimation.Base.__init__(self, kwargs) + self._config.framesToDispenseObject = kwargs.framesToDispenseObject + self._config.waitState = kwargs.waitState + self._config.stateOne = kwargs.stateOne + self._config.stateTwo = kwargs.stateTwo + self._config.stateThree = kwargs.stateThree +end + +function ConveyerBeltOnAnimation:reset() + self._counter = 0 +end + +function ConveyerBeltOnAnimation:update() + local frame = self._counter % 6 + if frame == 5 then + self.gameObject:setState(self._config.stateThree) + end + if frame == 3 then + self.gameObject:setState(self._config.stateTwo) + end + if frame == 1 then + self.gameObject:setState(self._config.stateOne) + end + self._counter = self._counter + 1 +end + + +local ReceiverIndicator = class.Class(component.Component) + +function ReceiverIndicator:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('ReceiverIndicator')}, + {'framesToTurnOffIndicator', args.default(8), args.positive}, + {'waitState', args.stringType}, + {'liveState', args.stringType}, + {'secondLiveState', args.stringType}, + {'count', args.stringType}, + {'type', args.stringType}, + }) + ReceiverIndicator.Base.__init__(self, kwargs) + self._config.framesToTurnOffIndicator = kwargs.framesToTurnOffIndicator + self._config.waitState = kwargs.waitState + self._config.liveState = kwargs.liveState + self._config.secondLiveState = kwargs.secondLiveState + self._config.count = kwargs.count + self._config.type = kwargs.type +end + +function ReceiverIndicator:getCount() + return self._config.count +end + +function ReceiverIndicator:getType() + return self._config.type +end + +function ReceiverIndicator:processingObjects() + self._counter = self._config.framesToTurnOffIndicator +end + +function ReceiverIndicator:reset() + self._counter = 0 +end + +function ReceiverIndicator:update() + local hopperInventory = self.gameObject:getComponent('Transform'):queryDisc( + 'lowestPhysical', 1) + if self._config.liveState then + for _, hopperMouth in ipairs(hopperInventory) do + if hopperMouth:hasComponent('Receiver') then + if self:getType() == 'TwoBlocks' and + hopperMouth:getComponent('Receiver'):hasOneOfTwoCubes() then + self.gameObject:setState(self._config.secondLiveState) + elseif hopperMouth:getComponent('Receiver'):hasNeededObjects() then + self.gameObject:setState(self._config.waitState) + self:processingObjects() + else + self.gameObject:setState(self._config.liveState) + end + end + end + end + if self._counter > 0 then + self.gameObject:setState(self._config.waitState) + self._counter = self._counter -1 + end +end + + +local HopperMouth = class.Class(component.Component) + +function HopperMouth:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('HopperMouth')}, + {'framesToProcess', args.default(17), args.positive}, + {'closed', args.stringType}, + {'opening', args.stringType}, + {'open', args.stringType}, + }) + HopperMouth.Base.__init__(self, kwargs) + self._config.framesToProcess = kwargs.framesToProcess + self._config.closed = kwargs.closed + self._config.opening = kwargs.opening + self._config.open = kwargs.open +end + +function HopperMouth:processing() + self._counter = self._config.framesToProcess + processing = true + return +end + +function HopperMouth:reset() + processing = false + self._counter = 0 +end + +function HopperMouth:isOpen() + return self._config.isOpen +end + +function HopperMouth:resetInventory() + self.gameObject:getComponent('Receiver'):setHasNeededObjects(false) + return +end + +function HopperMouth:setIsOpen(x) + self._config.isOpen = x + if self._config.isOpen then + end + return self._config.isOpen +end + +function HopperMouth:update() + if not processing == true then + self.gameObject:setState(self._config.open) + self:setIsOpen(true) + elseif processing == true then + if self._counter > 0 then + if self._counter == 15 then + self.gameObject:setState(self._config.opening) + self:resetInventory() + end + if self._counter == 14 then + self.gameObject:setState(self._config.closed) + self:setIsOpen(false) + end + if self._counter == 2 then + self.gameObject:setState(self._config.opening) + end + if self._counter == 1 then + self.gameObject:setState(self._config.open) + self:setIsOpen(true) + self:reset() + end + self._counter = self._counter - 1 + end + else + self.gameObject:setState(self._config.closed) + self:setIsOpen(false) + end + local hopper = self.gameObject:getComponent('Transform'):queryDisc( + 'upperPhysical', 1) + for _, neededObjects in ipairs(hopper) do + if self.gameObject:getComponent('Receiver'):hasNeededObjects() then + if not processing == true then + self:processing() + end + end + end +end + + +local AppleComponent = class.Class(component.Component) + +function AppleComponent:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('AppleComponent')}, + {'liveState', args.stringType}, + {'waitState', args.stringType}, + {'rewardForEating', args.numberType}, + }) + AppleComponent.Base.__init__(self, kwargs) + self._config.liveState = kwargs.liveState + self._config.waitState = kwargs.waitState + self._config.rewardForEating = kwargs.rewardForEating +end + +function AppleComponent:reset() + self._waitState = self._config.waitState + self._liveState = self._config.liveState +end + +function AppleComponent:onEnter(enteringGameObject, contactName) + if contactName == 'avatar' then + if self.gameObject:getState() == self._liveState then + -- Reward the player who ate the edible. + local avatarComponent = enteringGameObject:getComponent('Avatar') + avatarComponent:addReward(self._config.rewardForEating) + -- Change the edible to its wait (disabled) state. + self.gameObject:setState(self._waitState) + end + end +end + + +-- Animation for apple ejecting from converter output. +local ObjectJumpAnimation = class.Class(component.Component) + +function ObjectJumpAnimation:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('ObjectJumpAnimation')}, + {'framesToJump', args.default(8), args.positive}, + {'jump', args.stringType}, + {'drop', args.stringType}, + {'waitState', args.stringType}, + }) + ObjectJumpAnimation.Base.__init__(self, kwargs) + self._config.framesToJump = kwargs.framesToJump + self._config.jump = kwargs.jump + self._config.drop = kwargs.drop + self._config.waitState = kwargs.waitState + self._counter = 0 +end + +function ObjectJumpAnimation:startJump() + self._counter = self._config.framesToJump + self.gameObject:setState(self._config.waitState) +end + +function ObjectJumpAnimation:update() + if self._counter == 2 then + self.gameObject:setState(self._config.jump) + end + if self._counter == 1 then + self.gameObject:setState(self._config.drop) + end + self._counter = self._counter - 1 +end + + +-- Animation for apple ejecting from converter output. +local SecondObjectJumpAnimation = class.Class(component.Component) + +function SecondObjectJumpAnimation:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('SecondObjectJumpAnimation')}, + {'framesToJump', args.default(11), args.positive}, + {'jump', args.stringType}, + {'drop', args.stringType}, + {'waitState', args.stringType}, + }) + SecondObjectJumpAnimation.Base.__init__(self, kwargs) + self._config.framesToJump = kwargs.framesToJump + self._config.jump = kwargs.jump + self._config.drop = kwargs.drop + self._config.waitState = kwargs.waitState +end + +function SecondObjectJumpAnimation:reset() + self._counter = 0 +end + +function SecondObjectJumpAnimation:startJump() + self._counter = self._config.framesToJump + self.gameObject:setState(self._config.waitState) +end + +function SecondObjectJumpAnimation:update() + if self._counter == 2 then + self.gameObject:setState(self._config.jump) + end + if self._counter == 1 then + self.gameObject:setState(self._config.drop) + end + self._counter = self._counter - 1 + if self._counter == 0 then + self:reset() + end +end + + +-- Animation for apple ejecting from converter output. +local ObjectDispensingAnimation = class.Class(component.Component) + +function ObjectDispensingAnimation:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('ObjectDispensingAnimation')}, + {'framesToJump', args.default(6), args.positive}, + {'frameOne', args.stringType}, + {'frameTwo', args.stringType}, + {'frameThree', args.stringType}, + {'waitState', args.stringType}, + }) + ObjectDispensingAnimation.Base.__init__(self, kwargs) + self._config.framesToJump = kwargs.framesToJump + self._config.frameOne = kwargs.frameOne + self._config.frameTwo = kwargs.frameTwo + self._config.frameThree = kwargs.frameThree + self._config.waitState = kwargs.waitState +end + +function ObjectDispensingAnimation:reset() + self._counter = 0 +end + +function ObjectDispensingAnimation:startJump() + self._counter = self._config.framesToJump +end + +function ObjectDispensingAnimation:update() + local state = self.gameObject:getState() + if self._counter == 3 then + self.gameObject:setState(self._config.frameOne) + end + if self._counter == 2 then + self.gameObject:setState(self._config.frameTwo) + end + if self._counter == 1 then + self.gameObject:setState(self._config.frameThree) + end + if self._counter == 0 then + self.gameObject:setState(self._config.waitState) + end + self._counter = self._counter - 1 + if self._counter == 0 then + self:reset() + end +end + + +-- Animation for apple ejecting from converter output. +local DoubleObjectDispensingAnimation = class.Class(component.Component) + +function DoubleObjectDispensingAnimation:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('DoubleObjectDispensingAnimation')}, + {'framesToJump', args.default(9), args.positive}, + {'frameOne', args.stringType}, + {'frameTwo', args.stringType}, + {'frameThree', args.stringType}, + {'frameFour', args.stringType}, + {'frameFive', args.stringType}, + {'frameSix', args.stringType}, + {'waitState', args.stringType}, + }) + DoubleObjectDispensingAnimation.Base.__init__(self, kwargs) + self._config.framesToJump = kwargs.framesToJump + self._config.frameOne = kwargs.frameOne + self._config.frameTwo = kwargs.frameTwo + self._config.frameThree = kwargs.frameThree + self._config.frameFour = kwargs.frameFour + self._config.frameFive = kwargs.frameFive + self._config.frameSix = kwargs.frameSix + self._config.waitState = kwargs.waitState +end + +function DoubleObjectDispensingAnimation:reset() + self._counter = 0 +end + +function DoubleObjectDispensingAnimation:startJump() + self._counter = self._config.framesToJump +end + +function DoubleObjectDispensingAnimation:update() + local state = self.gameObject:getState() + if self._counter == 7 then + self.gameObject:setState(self._config.frameOne) + end + if self._counter == 6 then + self.gameObject:setState(self._config.frameTwo) + end + if self._counter == 5 then + self.gameObject:setState(self._config.frameThree) + end + if self._counter == 3 then + self.gameObject:setState(self._config.frameFour) + end + if self._counter == 2 then + self.gameObject:setState(self._config.frameFive) + end + if self._counter == 1 then + self.gameObject:setState(self._config.frameSix) + end + if self._counter == 0 then + self.gameObject:setState(self._config.waitState) + end + self._counter = self._counter - 1 + if self._counter == 0 then + self:reset() + end +end + + +local DispenserIndicator = class.Class(component.Component) + +function DispenserIndicator:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('DispenserIndicator')}, + {'objectOne', args.stringType}, + {'objectTwo', args.stringType}, + }) + DispenserIndicator.Base.__init__(self, kwargs) + self._config.objectOne = kwargs.objectOne + self._config.objectTwo = kwargs.objectTwo +end + +function DispenserIndicator:getObjectOne() + return self._config.objectOne +end + +function DispenserIndicator:getObjectTwo() + return self._config.objectTwo +end + +function DispenserIndicator:reset() + self._counter = 0 +end + +function DispenserIndicator:registerUpdaters(updaterRegistry) + local indicator = function() + local dispensingAnimation = self.gameObject:getComponent( + 'Transform'):queryDisc('overlay', 2) + local hopper = self.gameObject:getComponent('Transform'):queryDisc( + 'lowestPhysical', 2) + -- Looks for hopper. Must be within a radius of 2 cells of connected game + -- object. + local dispenser = self.gameObject:getComponent('Transform'):queryDisc( + 'midPhysical', 1) + -- Looks for dispenser. Must be on a cell adjacent to connected game object. + local offsetPosition = tensor.Tensor( + self.gameObject:getPosition()):cadd(tensor.Tensor({0, 2})):val() + -- Locates position 2 cells from connected game object. + local secondOffsetPosition = tensor.Tensor( + self.gameObject:getPosition()):cadd(tensor.Tensor({0, 3})):val() + -- Locates position 3 cells from connected game object. + local allTokens = self.gameObject.simulation:getAllGameObjectsWithComponent( + 'Token') + -- Find all game objects with Token component. + for _, receiverInventory in ipairs(hopper) do + if receiverInventory:hasComponent('Receiver') then + local dispenserObjectOne = self:getObjectOne() + local dispenserObjectTwo = self:getObjectTwo() + local hopperInventory = receiverInventory:getComponent('Receiver') + if hopperInventory:hasNeededObjects() then + -- If the machine only dispenses one object, then find and + -- teleport that object. + if self:getObjectTwo() == "NoneNeeded" then + if self:getObjectOne() == "Apple" then + local appleToMove + for i, apple in ipairs(allTokens) do + local tokenType = apple:getComponent('Token'):getType() + if apple:getState() == 'waitState' and tokenType == 'Apple' then + appleToMove = apple + -- Queue an apple in waitState to teleport onto dispensing + -- area when triggered. + end + end + appleToMove:getComponent('ObjectJumpAnimation'):startJump() + appleToMove:teleport(offsetPosition, 'S') + for i, dispensingApple in ipairs(dispensingAnimation) do + -- Run any dispensing animation in waitState in vicinity. + dispensingApple:getComponent( + 'ObjectDispensingAnimation'):startJump() + end + end + if self:getObjectOne() == "PinkCube" then + local pinkCubeToMove + for i, token in ipairs(allTokens) do + local tokenType = token:getComponent('Token'):getType() + if token:getState() == 'waitState' and + tokenType == 'PinkCube' then + pinkCubeToMove = token + break + end + end + pinkCubeToMove:getComponent('ObjectJumpAnimation'):startJump() + pinkCubeToMove:teleport(offsetPosition, 'S') + for i, dispensingPinkCube in ipairs(dispensingAnimation) do + dispensingPinkCube:getComponent( + 'ObjectDispensingAnimation'):startJump() + end + end + elseif self:getObjectTwo() ~= "NoneNeeded" then + -- Check if hopper has required objects. + if self:getObjectOne() == "Apple" and + self:getObjectTwo() == "Apple" then + -- Find all invisible apples. + appleList = {} + for i, apple in ipairs(allTokens) do + local tokenType = apple:getComponent('Token'):getType() + if apple:getState() == 'waitState' and + tokenType == 'Apple' then + table.insert(appleList, apple) + end + end + appleList[1]:getComponent('ObjectJumpAnimation'):startJump() + appleList[1]:teleport(offsetPosition, 'S') + appleList[2]:getComponent('ObjectJumpAnimation'):startJump() + appleList[2]:teleport(secondOffsetPosition, 'S') + for i, dispensingObjects in ipairs(dispensingAnimation) do + dispensingObjects:getComponent( + 'ObjectDispensingAnimation'):startJump() + end + end + if dispenserObjectOne == "BlueCube" and + dispenserObjectTwo == "Banana" then + local bananaToMove + local blueCubeToMove + for i, token in ipairs(allTokens) do + local tokenType = token:getComponent('Token'):getType() + if token:getState() == 'waitState' and + tokenType == 'Banana' then + bananaToMove = token + end + if token:getState() == 'waitState' and + tokenType == 'BlueCube' then + blueCubeToMove = token + end + end + blueCubeToMove:getComponent('ObjectJumpAnimation'):startJump() + blueCubeToMove:teleport(offsetPosition, 'S') + bananaToMove:getComponent('SecondObjectJumpAnimation'):startJump() + bananaToMove:teleport(secondOffsetPosition, 'S') + for i, dispensingObjects in ipairs(dispensingAnimation) do + dispensingObjects:getComponent( + 'DoubleObjectDispensingAnimation'):startJump() + end + end + if dispenserObjectOne == "Apple" and + dispenserObjectTwo == "BlueCube" then + local appleToMove + local blueCubeToMove + for i, token in ipairs(allTokens) do + local tokenType = token:getComponent('Token'):getType() + if token:getState() == 'waitState' and + tokenType == 'Apple' then + appleToMove = token + end + if token:getState() == 'waitState' and + tokenType == 'BlueCube' then + blueCubeToMove = token + break + end + end + blueCubeToMove:getComponent('ObjectJumpAnimation'):startJump() + blueCubeToMove:teleport(offsetPosition, 'S') + appleToMove:getComponent('SecondObjectJumpAnimation'):startJump() + appleToMove:teleport(secondOffsetPosition, 'S') + for i, dispensingObjects in ipairs(dispensingAnimation) do + dispensingObjects:getComponent( + 'DoubleObjectDispensingAnimation'):startJump() + end + end + end + end + end + end + end + updaterRegistry:registerUpdater{ + updateFn = indicator, + priority = 200, +} +end + + +local allComponents = { + -- Apple components. + AppleComponent = AppleComponent, + ObjectJumpAnimation = ObjectJumpAnimation, + SecondObjectJumpAnimation = SecondObjectJumpAnimation, + ObjectDispensingAnimation = ObjectDispensingAnimation, + DoubleObjectDispensingAnimation = DoubleObjectDispensingAnimation, + -- Object components. + Receivable = Receivable, + Token = Token, + -- Machinery components. + Receiver = Receiver, + ReceiverIndicator = ReceiverIndicator, + DispenserIndicator = DispenserIndicator, + ReceiverDropAnimation = ReceiverDropAnimation, + ConveyerBeltOnAnimation = ConveyerBeltOnAnimation, + HopperMouth = HopperMouth, + ConveyorMovement = ConveyorMovement, +} + +component_registry.registerAllComponents(allComponents) + +return allComponents diff --git a/meltingpot/lua/levels/factory_of_the_commons/init.lua b/meltingpot/lua/levels/factory_of_the_commons/init.lua new file mode 100644 index 00000000..b3f49124 --- /dev/null +++ b/meltingpot/lua/levels/factory_of_the_commons/init.lua @@ -0,0 +1,79 @@ +--[[ Copyright 2022 DeepMind Technologies Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +]] + +-- Entry point lua file for Factory2D levels. +local class = require 'common.class' +local helpers = require 'common.helpers' + +local meltingpot = 'meltingpot.lua.modules.' +local avatar_grasp = require(meltingpot .. 'avatar_grasp') +local avatar_grappling = require(meltingpot .. 'avatar_grappling') +local simulation = require(meltingpot .. 'base_simulation') + +local stamina = 'meltingpot.lua.levels.stamina.' +local shared_components = require(stamina .. 'shared_components') + +local api_factory = require(meltingpot .. 'api_factory') +local component_library = require(meltingpot .. 'component_library') +local avatar_library = require(meltingpot .. 'avatar_library') + +-- Finally add the local components for this game, overriding any previously +-- loaded having the same name. +local components = require 'components' + + +local OverrideSimulation = class.Class(simulation.BaseSimulation) + +function OverrideSimulation:worldConfig() + local config = simulation.BaseSimulation.worldConfig(self) + local index = 0 + -- Add layer 'midPhysical' below 'upperPhysical'. + for layerIndex, layerName in ipairs(config.renderOrder) do + if layerName == 'upperPhysical' then + index = layerIndex + break + end + end + table.insert(config.renderOrder, index, 'midPhysical') + -- Add layer 'appleLayer' below 'midPhysical'. + for layerIndex, layerName in ipairs(config.renderOrder) do + if layerName == 'midPhysical' then + index = layerIndex + break + end + end + table.insert(config.renderOrder, index, 'appleLayer') + -- Add layer 'lowestPhysical' below 'lowerPhysical'. + for layerIndex, layerName in ipairs(config.renderOrder) do + if layerName == 'lowerPhysical' then + index = layerIndex + break + end + end + table.insert(config.renderOrder, index, 'lowestPhysical') + return config +end + +return api_factory.apiFactory{ + Simulation = OverrideSimulation, + settings = { + -- Scale each sprite to a square of size `spriteSize` X `spriteSize`. + spriteSize = 8, + -- Terminate the episode after this many frames. + maxEpisodeLengthFrames = 1000, + -- Settings to pass to simulation.lua. + simulation = {}, + } +} diff --git a/meltingpot/lua/levels/gift_refinements/components.lua b/meltingpot/lua/levels/gift_refinements/components.lua new file mode 100644 index 00000000..684e3edd --- /dev/null +++ b/meltingpot/lua/levels/gift_refinements/components.lua @@ -0,0 +1,405 @@ +--[[ Copyright 2022 DeepMind Technologies Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +]] + +local args = require 'common.args' +local class = require 'common.class' +local events = require 'system.events' +local helpers = require 'common.helpers' +local log = require 'common.log' +local random = require 'system.random' +local tensor = require 'system.tensor' + +local meltingpot = 'meltingpot.lua.modules.' +local component = require(meltingpot .. 'component') +local component_registry = require(meltingpot .. 'component_registry') + + +local FixedRateRegrow = class.Class(component.Component) + +function FixedRateRegrow:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('FixedRateRegrow')}, + {'liveState', args.stringType}, + {'waitState', args.stringType}, + {'regrowRate', args.ge(0.0), args.le(1.0)}, + }) + FixedRateRegrow.Base.__init__(self, kwargs) + + self._config.liveState = kwargs.liveState + self._config.waitState = kwargs.waitState + self._config.regrowRate = kwargs.regrowRate +end + +function FixedRateRegrow:update() + if self.gameObject:getState() == self._config.waitState then + if random:uniformReal(0, 1) < self._config.regrowRate then + local transform = self.gameObject:getComponent('Transform') + local maybeAvatar = transform:queryPosition('upperPhysical') + if not maybeAvatar then + self.gameObject:setState(self._config.liveState) + end + end + end +end + +local Pickable = class.Class(component.Component) + +function Pickable:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('Pickable')}, + {'liveState', args.stringType}, + {'waitState', args.stringType}, + {'rewardForPicking', args.numberType}, + }) + Pickable.Base.__init__(self, kwargs) + + self._config.rewardForPicking = kwargs.rewardForPicking + + self._config.liveState = kwargs.liveState + self._config.waitState = kwargs.waitState +end + +function Pickable:onEnter(enteringGameObject, contactName) + if contactName == 'avatar' and + self.gameObject:getState() == self._config.liveState then + -- Add reward for picking up object. + enteringGameObject:getComponent('Avatar'):addReward( + self._config.rewardForPicking) + -- Add to the player's inventory as lowest refinement. + enteringGameObject:getComponent('Inventory'):addTokens(1, 1) + enteringGameObject:getComponent('TokenTracker').collectedToken = ( + enteringGameObject:getComponent('TokenTracker').collectedToken + 1) + -- Replace the appple with an invisible appleWait. + self.gameObject:setState(self._config.waitState) + end +end + +--[[ The `GiftBeam` component endows an avatar with the ability to fire a beam +and be hit by the gift beams of other avatars. +]] +local GiftBeam = class.Class(component.Component) + +function GiftBeam:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('GiftBeam')}, + {'cooldownTime', args.numberType}, + {'beamLength', args.numberType}, + {'beamRadius', args.numberType}, + {'agentRole', args.stringType}, + {'giftMultiplier', args.numberType}, + {'successfulGiftReward', args.numberType}, + {'roleRewardForGifting', args.tableType}, + }) + GiftBeam.Base.__init__(self, kwargs) + + self._config.cooldownTime = kwargs.cooldownTime + self._config.beamLength = kwargs.beamLength + self._config.beamRadius = kwargs.beamRadius + self._config.agentRole = kwargs.agentRole + self._config.giftMultiplier = kwargs.giftMultiplier + self._config.successfulGiftReward = kwargs.successfulGiftReward + self._config.roleRewardForGifting = kwargs.roleRewardForGifting + + self._coolingTimer = 0 +end + +function GiftBeam:addHits(worldConfig) + worldConfig.hits['gift'] = { + layer = 'beamGift', + sprite = 'beamGift', + } + table.insert(worldConfig.renderOrder, 'beamGift') +end + +function GiftBeam:addSprites(tileSet) + -- This color is pink. + tileSet:addColor('beamGift', {255, 202, 202}) +end + +function GiftBeam:getAgentRole() + return self._config.agentRole +end + +function GiftBeam:onHit(hitterGameObject, hitName) + if hitName == 'gift' then + local hitAvatar = self.gameObject:getComponent('Avatar') + local hitRole = self.gameObject:getComponent('GiftBeam'):getAgentRole() + local hitIndex = hitAvatar:getIndex() + local hitterAvatar = hitterGameObject:getComponent('Avatar') + local hitterRole = hitterGameObject:getComponent('GiftBeam'):getAgentRole() + local hitterIndex = hitterAvatar:getIndex() + local amount = self._config.roleRewardForGifting[hitterRole] + if amount ~= nil then + hitterAvatar:addReward(amount) + end + local hitterInventory = hitterGameObject:getComponent('Inventory') + local hitInventory = self.gameObject:getComponent('Inventory') + -- return `true` to prevent the beam from passing through a hit player. + local srcType = hitterInventory:getHighestTypeAvailable() + -- Only add tokens if any is available. Gift the most refined first. + if srcType > 0 then + local dstAmount = self._config.giftMultiplier + local dstType = srcType + 1 + -- If at most refined, don't multiply nor increase refinement. + if srcType + 1 > hitInventory:getNumTokenTypes() then + dstType = hitInventory:getNumTokenTypes() + dstAmount = 1 + else + hitterAvatar:addReward(amount * self._config.successfulGiftReward) + end + hitterInventory:removeTokens(srcType, 1) + local actual = hitInventory:addTokens(dstType, dstAmount) + self.gameObject:getComponent( + 'TokenTracker').giftsReceived(hitterIndex, srcType):add(actual) + self.gameObject:getComponent( + 'TokenTracker').giftsReceivedFromAny = self.gameObject:getComponent( + 'TokenTracker').giftsReceivedFromAny + actual + hitterGameObject:getComponent( + 'TokenTracker').giftsGiven(hitIndex, srcType):add(actual) + hitterGameObject:getComponent( + 'TokenTracker').giftsGivenToAny = hitterGameObject:getComponent( + 'TokenTracker').giftsGivenToAny + actual + events:add("gift", "dict", + "gifter_index", hitterIndex, + "gifter_role", hitterRole, + "receipient_index", hitIndex, + "receipient_role", hitRole, + "source_type", srcType, + "received_amount", actual) + end + return true + end +end + +function GiftBeam:registerUpdaters(updaterRegistry) + + local gift = function() + local playerVolatileVariables = ( + self.gameObject:getComponent('Avatar'):getVolatileData()) + local actions = playerVolatileVariables.actions + -- Execute the beam if applicable. + if self.gameObject:getComponent('Avatar'):isAlive() then + if actions.refineAndGift == 1 and self._coolingTimer <= 0 then + self._coolingTimer = self._config.cooldownTime + self.gameObject:hitBeam( + 'gift', self._config.beamLength, self._config.beamRadius) + -- TODO(b/260154384): reward for even attempting to gift? + end + end + end + + updaterRegistry:registerUpdater{ + updateFn = gift, + priority = 140, + } +end + +-- Integrate all signals that affect whether it is possible to fire the gift +-- beam into a single float between 0 and 1. It is only possible to use the beam +-- action when 1 is returned. The GiftBeam will be restored sooner the closer to +-- 1 the signal becomes. +function GiftBeam:readyToShoot() + local normalizedTimeTillReady = self._coolingTimer / self._config.cooldownTime + if self.gameObject:getComponent('Avatar'):isAlive() then + return math.max(1 - normalizedTimeTillReady, 0) + else + return 0 + end +end + +function GiftBeam:update() + if self._coolingTimer > 0 then + self._coolingTimer = self._coolingTimer - 1 + end +end + +function GiftBeam:start() + -- Set the beam cooldown timer to its `ready` state (i.e. coolingTimer = 0). + self._coolingTimer = 0 + end + +--[[ The inventory carrying tokens. Also provides the action to consume them. + +Items in the inventory have a type, starting from 1, and up to numTokenTypes +(both inclusive). When getting tokens, we can request the one with the highest +type, or the lowest, or one of a specific type. +]] +local Inventory = class.Class(component.Component) + +function Inventory:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('Inventory')}, + {'capacityPerType', args.numberType}, + {'numTokenTypes', args.numberType}, + {'consumptionCooldown', args.default(0), args.numberType}, + }) + Inventory.Base.__init__(self, kwargs) + + self._config.capacityPerType = kwargs.capacityPerType + self._config.numTokenTypes = kwargs.numTokenTypes + self._config.consumptionCooldown = kwargs.consumptionCooldown + self:emptyInventory() +end + +function Inventory:reset() + self:emptyInventory() +end + +function Inventory:start() + self._consumeCooldownTimer = 0 + self.tokensConsumed = self.gameObject:getComponent( + 'TokenTracker').tokensConsumed +end + +function Inventory:getNumTokenTypes() + return self._config.numTokenTypes +end + +function Inventory:emptyInventory() + self.inventory = tensor.DoubleTensor( + self._config.numTokenTypes):fill(0) +end + +-- This function returns the highest type of token for which the inventory has +-- at least one. If the inventory is empty, this function returns 0 (types are +-- 1 to numTokenTypes). +function Inventory:getHighestTypeAvailable() + local tokenType = 0 + for tType = 1, self._config.numTokenTypes do + local count = self.inventory(tType):val() + if count > 0 then + tokenType = tType + end + end + return tokenType +end + +-- This function returns the lowest type of token for which the inventory has +-- at least one. If the inventory is empty, this function returns 0 (types are +-- 1 to numTokenTypes). +function Inventory:getLowestTypeAvailable() + for tType = 1, self._config.numTokenTypes do + local count = self.inventory(tType):val() + if count > 0 then + return tType + end + end + return 0 +end + +--[[ Attempt to add a certain amount of tokens of the specified type. If the +current tokens of this type in the inventory + the amount requested is larger +than the capacity of the inventory, we will add as many tokens as possible. The +function returns how many tokens were actually added. +]] +function Inventory:addTokens(tokenType, amount) + assert(tokenType >= 1 and tokenType <= self._config.numTokenTypes) + local value = math.min( + self.inventory(tokenType):val() + amount, + self._config.capacityPerType) + self.inventory(tokenType):val(value) + return value +end + +--[[ Attempt to remove a certain amount of tokens of the specified type. If the +amount requested is larger than the available tokens, we will remove as many +tokens as possible. The function returns how many tokens were actually removed. +]] +function Inventory:removeTokens(tokenType, amount) + assert(tokenType >= 1 and tokenType <= self._config.numTokenTypes) + local actual = math.min(self.inventory(tokenType):val(), amount) + self.inventory(tokenType):val(math.max( + self.inventory(tokenType):val() - amount, 0)) + return actual +end + +function Inventory:update() + local state = self.gameObject:getComponent('Avatar'):getVolatileData() + local actions = state.actions + -- Execute the beam if applicable. + if actions.consumeTokens == 1 and self._consumeCooldownTimer <= 0 then + local avatar = self.gameObject:getComponent('Avatar') + local amount = 0 + for tokenType = 1, self._config.numTokenTypes do + local value = self.inventory(tokenType):val() + amount = amount + value + self.tokensConsumed(tokenType):add(value) + end + avatar:addReward(amount) + self:emptyInventory() + self._consumeCooldownTimer = self._config.consumptionCooldown + -- Sate the hunger drive if applicable. + if self.gameObject:hasComponent('Hunger') and amount > 0 then + self.gameObject:getComponent('Hunger'):resetDriveLevel() + end + end + self._consumeCooldownTimer = self._consumeCooldownTimer - 1 +end + + +--[[ Token tracker keeps track of the picking up, gifting and consumption of +tokens by players. These can then be used for cumulants and debug observations. +]] +local TokenTracker = class.Class(component.Component) + +function TokenTracker:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('TokenTracker')}, + {'numPlayers', args.numberType}, + {'numTokenTypes', args.numberType}, + }) + TokenTracker.Base.__init__(self, kwargs) + + self._config.numPlayers = kwargs.numPlayers + self._config.numTokenTypes = kwargs.numTokenTypes +end + +function TokenTracker:reset() + self.giftsGiven = tensor.Int32Tensor( + self._config.numPlayers, + self._config.numTokenTypes) + self.giftsGivenToAny = 0.0 + self.giftsReceived = tensor.Int32Tensor( + self._config.numPlayers, + self._config.numTokenTypes) + self.giftsReceivedFromAny = 0.0 + self.tokensConsumed = tensor.Int32Tensor(self._config.numTokenTypes) + self.collectedToken = 0.0 +end + +function TokenTracker:preUpdate() + self.giftsGiven:fill(0) + self.giftsGivenToAny = 0.0 + self.giftsReceived:fill(0) + self.giftsReceivedFromAny = 0.0 + self.tokensConsumed:fill(0) + self.collectedToken = 0.0 +end + + +local allComponents = { + -- Coin components + FixedRateRegrow = FixedRateRegrow, + Pickable = Pickable, + + -- Avatar components + GiftBeam = GiftBeam, + Inventory = Inventory, + TokenTracker = TokenTracker, +} + +component_registry.registerAllComponents(allComponents) + +return allComponents diff --git a/meltingpot/lua/levels/gift_refinements/init.lua b/meltingpot/lua/levels/gift_refinements/init.lua new file mode 100644 index 00000000..6e7e4c4b --- /dev/null +++ b/meltingpot/lua/levels/gift_refinements/init.lua @@ -0,0 +1,37 @@ +--[[ Copyright 2022 DeepMind Technologies Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +]] + +-- Entry point lua file for the gift_refinements substrate. + +local meltingpot = 'meltingpot.lua.modules.' +local api_factory = require(meltingpot .. 'api_factory') +local simulation = require(meltingpot .. 'base_simulation') + +-- Required to be able to use the components in the level +local component_library = require(meltingpot .. 'component_library') +local avatar_library = require(meltingpot .. 'avatar_library') +local components = require 'components' + +return api_factory.apiFactory{ + Simulation = simulation.BaseSimulation, + settings = { + -- Scale each sprite to a square of size `spriteSize` X `spriteSize`. + spriteSize = 8, + -- Terminate the episode after this many frames. + maxEpisodeLengthFrames = 1000, + -- Settings to pass to simulation.lua. + simulation = {}, + } +} diff --git a/meltingpot/lua/levels/grid_land/components.lua b/meltingpot/lua/levels/grid_land/components.lua index eac79cf2..31fd30a5 100644 --- a/meltingpot/lua/levels/grid_land/components.lua +++ b/meltingpot/lua/levels/grid_land/components.lua @@ -1,4 +1,4 @@ ---[[ Copyright 2020 DeepMind Technologies Limited. +--[[ Copyright 2022 DeepMind Technologies Limited. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -76,15 +76,15 @@ function Cell:onHit(hitterObject, hitName) local groups = set.Set(self.gameObject:getGroups()) if not groups['immovables'] then local hitterIO = hitterObject:getComponent('IOBeam') - local stomach = hitterIO:getStomachObject():getComponent('AvatarStomach') - -- Stomach must not be activated, cell old enough, and not blocked for IO. + local vesicle = hitterIO:getVesicleObject():getComponent('AvatarVesicle') + -- Vesicle must not be activated, cell old enough, and not blocked for IO. local blocked = self.gameObject:getComponent('Product'):isBlocked() - if not stomach:isBlocked() and not blocked and self:_oldEnough() then - local next_stomach_state = self.gameObject:getState() - local next_ground_state = stomach:pop() - stomach:add(next_stomach_state) + if not vesicle:isBlocked() and not blocked and self:_oldEnough() then + local next_vesicle_state = self.gameObject:getState() + local next_ground_state = vesicle:pop() + vesicle:add(next_vesicle_state) self.gameObject:setState(next_ground_state) - stomach:block() + vesicle:block() self.gameObject:getComponent('Reactant'):block() end end @@ -590,6 +590,7 @@ end function IOBeam:reset() self._coolingTimer = 0 + self._IOAllowed = true end function IOBeam:registerUpdaters(updaterRegistry) @@ -602,7 +603,7 @@ function IOBeam:registerUpdaters(updaterRegistry) if self._coolingTimer > 0 then self._coolingTimer = self._coolingTimer - 1 else - if actions['ioAction'] == 1 then + if actions['ioAction'] == 1 and self._IOAllowed then self._coolingTimer = self._config.cooldownTime local cellBelowCurrentLocation = self:getCellComponentUnderneath() @@ -610,6 +611,7 @@ function IOBeam:registerUpdaters(updaterRegistry) end end end + self._IOAllowed = true end updaterRegistry:registerUpdater{ @@ -628,10 +630,10 @@ function IOBeam:start() self._avatarComponent = self.gameObject:getComponent('Avatar') end -function IOBeam:getStomachObject() - -- Assume there will only be one connected object with an AvatarStomach. +function IOBeam:getVesicleObject() + -- Assume there will only be one connected object with an AvatarVesicle. return self._avatarComponent:getAllConnectedObjectsWithNamedComponent( - 'AvatarStomach')[1] + 'AvatarVesicle')[1] end function IOBeam:getCellComponentUnderneath() @@ -641,12 +643,16 @@ function IOBeam:getCellComponentUnderneath() return cellObjectBelow:getComponent('Cell') end +function IOBeam:disallowIO() + self._IOAllowed = false +end + -local AvatarStomach = class.Class(component.Component) +local AvatarVesicle = class.Class(component.Component) -function AvatarStomach:__init__(kwargs) +function AvatarVesicle:__init__(kwargs) kwargs = args.parse(kwargs, { - {'name', args.default('AvatarStomach')}, + {'name', args.default('AvatarVesicle')}, -- `playerIndex` (int): player index for the avatar to connect to. {'playerIndex', args.numberType}, {'preInitState', args.stringType}, @@ -657,7 +663,7 @@ function AvatarStomach:__init__(kwargs) self._kwargs = kwargs end -function AvatarStomach:reset() +function AvatarVesicle:reset() local kwargs = self._kwargs self._playerIndex = kwargs.playerIndex self._preInitState = kwargs.preInitState @@ -670,7 +676,7 @@ end --[[ Note that postStart is called from the avatar manager, after start has been called on all other game objects, even avatars.]] -function AvatarStomach:postStart() +function AvatarVesicle:postStart() local sim = self.gameObject.simulation self._avatarObject = sim:getAvatarFromIndex(self._playerIndex) @@ -686,10 +692,10 @@ function AvatarStomach:postStart() avatarComponent:connect(self.gameObject) end -function AvatarStomach:update() +function AvatarVesicle:update() local avatarComponent = self._avatarObject:getComponent('Avatar') - -- Provide rewards based on stomach contents. + -- Provide rewards based on vesicle contents. local productComponent = self.gameObject:getComponent('Product') if productComponent:didTransition() then local reactionName = productComponent:getLatestReaction() @@ -700,46 +706,48 @@ function AvatarStomach:update() local rewardValue = rewardsComponent:getRewardValue(reactionName) avatarComponent:addReward(rewardValue) end - -- Report an event whenever a reaction occurs involving an avatar stomach. + -- Report an event whenever a reaction occurs involving an avatar vesicle. local replacedReactant = productComponent:getLatestReplacedReactant() - events:add('stomach_reaction', 'dict', + events:add('vesicle_reaction', 'dict', 'player_index', avatarComponent:getIndex(), -- int - -- Use stomach_name to disambiguate if we have multiple stomachs. - 'stomach_name', self.gameObject.name, -- str + -- Use vesicle_name to disambiguate if we have multiple vesicles. + 'vesicle_name', self.gameObject.name, -- str 'reaction_name', reactionName, -- str 'reactant_compound', replacedReactant, -- str 'product_compound', self.gameObject:getState()) -- str end -- Prevent avatar movement while still allowing IOBeam actions whenever an - -- immovable molecule is within the stomach. + -- immovable molecule is within the vesicle. avatarComponent:allowMovement() local groups = set.Set(self.gameObject:getGroups()) if groups['immovables'] then avatarComponent:disallowMovement() + local ioComponent = self._avatarObject:getComponent('IOBeam') + ioComponent:disallowIO() end self._blocked = false end -function AvatarStomach:isEmpty() +function AvatarVesicle:isEmpty() return self.gameObject:getState() == self._emptyState end -function AvatarStomach:block() +function AvatarVesicle:block() self._blocked = true end -function AvatarStomach:isBlocked() +function AvatarVesicle:isBlocked() return self.gameObject:getComponent('Product'):isBlocked() and not self._blocked end -function AvatarStomach:add(state) +function AvatarVesicle:add(state) self.gameObject:setState(state) end -function AvatarStomach:pop() +function AvatarVesicle:pop() local output = self.gameObject:getState() return output end @@ -781,6 +789,71 @@ function ReactionsToRewards:getRewardValue(reactionName) end +local VesicleManager = class.Class(component.Component) + +function VesicleManager:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('VesicleManager')}, + {'orderedVesicles', args.tableType}, + {'cytoavatarStates', args.tableType}, + }) + self.Base.__init__(self, kwargs) + self._orderedVesicles = kwargs.orderedVesicles + self._cytoavatarStates = kwargs.cytoavatarStates +end + +function VesicleManager:reset() + self._numOccupied = 0 + self._avatarComponent = self.gameObject:getComponent('Avatar') + self._started = false +end + +function VesicleManager:push() + self._numOccupied = self._numOccupied + 1 +end + +function VesicleManager:pop() + self._numOccupied = self._numOccupied - 1 +end + +function VesicleManager:setInitialCompounds() + local allVesicles = + self._avatarComponent:getAllConnectedObjectsWithNamedComponent( + 'AvatarVesicle') + for _, vesicle in ipairs(allVesicles) do + vesicle:setState('empty') + end +end + +function VesicleManager:update() + if not self._started then + self:setInitialCompounds() + end + self._started = true + + -- First update the count of the number of occupied vesicles. + self._numOccupied = 0 + local allVesicles = + self._avatarComponent:getAllConnectedObjectsWithNamedComponent( + 'AvatarVesicle') + for _, vesicle in ipairs(allVesicles) do + local vesicleComponent = vesicle:getComponent('AvatarVesicle') + if not vesicleComponent:isEmpty() then + -- Add 1 to numOccupied if vesicle is nonempty. + self._numOccupied = self._numOccupied + 1 + end + end + -- Set state of the avatar object accordingly to its num occupied vesicles. + if self._numOccupied == 0 then + self.gameObject:setState(self._cytoavatarStates['empty']) + elseif self._numOccupied == 1 then + self.gameObject:setState(self._cytoavatarStates['holdingOne']) + else + assert(False, 'Nonsensical numOccupied: ' .. tostring(self._numOccupied)) + end +end + + local allComponents = { -- Grid cell components. Cell = Cell, @@ -788,8 +861,9 @@ local allComponents = { Product = Product, -- Avatar components. IOBeam = IOBeam, - AvatarStomach = AvatarStomach, + AvatarVesicle = AvatarVesicle, ReactionsToRewards = ReactionsToRewards, + VesicleManager = VesicleManager, -- Global components, ReactionAlgebra = ReactionAlgebra, GlobalMetricTracker = GlobalMetricTracker, diff --git a/meltingpot/lua/levels/grid_land/init.lua b/meltingpot/lua/levels/grid_land/init.lua index 541ae899..0cdb1c10 100644 --- a/meltingpot/lua/levels/grid_land/init.lua +++ b/meltingpot/lua/levels/grid_land/init.lua @@ -1,4 +1,4 @@ ---[[ Copyright 2020 DeepMind Technologies Limited. +--[[ Copyright 2022 DeepMind Technologies Limited. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ]] --- Entry point lua file for the grid_land level. +-- Entry point lua file for the grid_land substrate. local meltingpot = 'meltingpot.lua.modules.' local api_factory = require(meltingpot .. 'api_factory') diff --git a/meltingpot/lua/levels/paintball/shared_components.lua b/meltingpot/lua/levels/paintball/shared_components.lua index 7899c0ef..b73d651c 100644 --- a/meltingpot/lua/levels/paintball/shared_components.lua +++ b/meltingpot/lua/levels/paintball/shared_components.lua @@ -1,4 +1,4 @@ ---[[ Copyright 2020 DeepMind Technologies Limited. +--[[ Copyright 2022 DeepMind Technologies Limited. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -118,7 +118,6 @@ end function Ground:onHit(hittingGameObject, hittingTeam) -- Assume teamNames are identical to color states e.g. red, blue etc. if self._teamNames[hittingTeam] then - local oldTeam = self.gameObject:getState(oldTeam) self.gameObject:setState(hittingTeam) -- Beams always pass through. return false @@ -139,6 +138,9 @@ function ColorZapper:__init__(kwargs) {'cooldownTime', args.numberType}, {'beamLength', args.numberType}, {'beamRadius', args.numberType}, + {'secondaryBeamCooldownTime', args.numberType}, + {'secondaryBeamLength', args.numberType}, + {'secondaryBeamRadius', args.numberType}, {'aliveStates', args.tableType}, {'groundLayer', args.default('alternateLogic'), args.stringType}, }) @@ -149,6 +151,9 @@ function ColorZapper:__init__(kwargs) self._config.cooldownTime = kwargs.cooldownTime self._config.beamLength = kwargs.beamLength self._config.beamRadius = kwargs.beamRadius + self._config.secondaryBeamCooldownTime = kwargs.secondaryBeamCooldownTime + self._config.secondaryBeamLength = kwargs.secondaryBeamLength + self._config.secondaryBeamRadius = kwargs.secondaryBeamRadius self._config.aliveStates = set.Set(kwargs.aliveStates) -- Ground layer is used to make the beam strike the location underneath the @@ -173,6 +178,13 @@ function ColorZapper:addSprites(tileSet) tileSet:addColor(self._config.beamSprite, self._config.color) end +function ColorZapper:positionsAreEqual(tableA, tableB) + if tableA[1] == tableB[1] and tableA[2] == tableB[2] then + return true + end + return false +end + function ColorZapper:registerUpdaters(updaterRegistry) local transformComponent = self.gameObject:getComponent('Transform') local zap = function() @@ -186,6 +198,7 @@ function ColorZapper:registerUpdaters(updaterRegistry) self._coolingTimer = self._coolingTimer - 1 else if actions['fireZap'] == 1 then + -- A short-range beam with a wide area of effect. self._coolingTimer = self._config.cooldownTime self.gameObject:hitBeam( self._config.team, @@ -199,9 +212,26 @@ function ColorZapper:registerUpdaters(updaterRegistry) groundObject:getComponent('Ground'):onHit(self.gameObject, self._config.team) end + elseif actions['fireZap'] == 2 and + self:positionsAreEqual(self._previousPosition, + transformComponent:getPosition()) then + -- A longer range beam with a thin area of effect. + -- This beam can only be used if the player did not change its + -- position on the previous frame (must stand still for one frame + -- before it can be used). + -- This beam takes twice as long to cool down after use before any + -- beam can be used again (all beams share a cooling timer). + self._coolingTimer = self._config.secondaryBeamCooldownTime + self.gameObject:hitBeam( + self._config.team, + self._config.secondaryBeamLength, + self._config.secondaryBeamRadius + ) + -- Note: long-range zaps do not color the avatar's current location. end end end + self._previousPosition = transformComponent:getPosition() end end @@ -214,6 +244,7 @@ end function ColorZapper:reset() -- Set the beam cooldown timer to its `ready` state (i.e. coolingTimer = 0). self._coolingTimer = 0 + self._previousPosition = nil end function ColorZapper:readyToShoot() @@ -239,6 +270,7 @@ function ZappedByColor:__init__(kwargs) {'maxHealthOnGround', args.positive}, {'maxHealthOnOwnColor', args.positive}, {'maxHealthOnEnemyColor', args.positive}, + {'groundLayer', args.default('alternateLogic'), args.stringType}, }) ZappedByColor.Base.__init__(self, kwargs) @@ -251,11 +283,12 @@ function ZappedByColor:__init__(kwargs) self._config.maxHealthOnGround = kwargs.maxHealthOnGround self._config.maxHealthOnOwnColor = kwargs.maxHealthOnOwnColor self._config.maxHealthOnEnemyColor = kwargs.maxHealthOnEnemyColor + self._config.groundLayer = kwargs.groundLayer end function ZappedByColor:_getColorHere() local maybeGameObject = self.gameObject:getComponent( - 'Transform'):queryPosition("lowerPhysical") + 'Transform'):queryPosition(self._config.groundLayer) if maybeGameObject and maybeGameObject:hasComponent('Ground') then return maybeGameObject:getState() end @@ -271,6 +304,7 @@ function ZappedByColor:registerUpdaters(updaterRegistry) self.gameObject:teleportToGroup(spawnGroup, aliveState) self.playerRespawnedThisStep = true self._health = self._config.maxHealthOnGround + self.gameObject:getComponent('Avatar'):allowMovement() end local function _incrementHealthIfBelowThreshold(threshold) @@ -295,6 +329,19 @@ function ZappedByColor:registerUpdaters(updaterRegistry) end end + local function updateHealthState() + local state = self.gameObject:getState() + if state ~= self:getWaitState() and + self._health ~= self._healthLastFrame then + self.gameObject:setState('health' .. tostring(self._health)) + end + + self._healthLastFrame = self._health + + self.playerRespawnedThisStep = false + self.zapperIndex = nil + end + updaterRegistry:registerUpdater{ updateFn = respawn, priority = 135, @@ -307,6 +354,11 @@ function ZappedByColor:registerUpdaters(updaterRegistry) priority = 2, probability = self._config.healthRegenerationRate } + + updaterRegistry:registerUpdater{ + updateFn = updateHealthState, + priority = 1, + } end function ZappedByColor:onStateChange() @@ -359,18 +411,6 @@ function ZappedByColor:start() end end -function ZappedByColor:update() - local state = self.gameObject:getState() - if state ~= self:getWaitState() and self._health ~= self._healthLastFrame then - self.gameObject:setState('health' .. tostring(self._health)) - end - - self._healthLastFrame = self._health - -- Metrics must be read from preUpdate since they will get reset in update. - self.playerRespawnedThisStep = false - self.zapperIndex = nil -end - function ZappedByColor:getAliveState() return self.gameObject:getComponent('Avatar'):getAliveState() end diff --git a/meltingpot/lua/levels/paintball_capture_the_flag/components.lua b/meltingpot/lua/levels/paintball__capture_the_flag/components.lua similarity index 95% rename from meltingpot/lua/levels/paintball_capture_the_flag/components.lua rename to meltingpot/lua/levels/paintball__capture_the_flag/components.lua index be54a74e..488ec34d 100644 --- a/meltingpot/lua/levels/paintball_capture_the_flag/components.lua +++ b/meltingpot/lua/levels/paintball__capture_the_flag/components.lua @@ -1,4 +1,4 @@ ---[[ Copyright 2020 DeepMind Technologies Limited. +--[[ Copyright 2022 DeepMind Technologies Limited. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -115,6 +115,7 @@ end ]] function Flag:disconnectFromAvatar() if self._carryingAvatarObject then + self._carryingAvatarObject:getComponent('TeamMember'):dropFlag() self._carryingAvatarObject:getComponent('Avatar'):disconnect( self.gameObject) self._carryingAvatarObject = nil @@ -160,16 +161,22 @@ function ControlIndicator:__init__(kwargs) ControlIndicator.Base.__init__(self, kwargs) end -function ControlIndicator:update() +function ControlIndicator:registerUpdaters(updaterRegistry) local sceneObject = self.gameObject.simulation:getSceneObject() local flagManager = sceneObject:getComponent('FlagManager') - -- Show the color of a team that can score because their flag is on their - -- own home tile. If both teams have their flag on their home tile then show - -- purple. If neither, then show black. - local flagControlState = flagManager:getFlagControlState() - if flagControlState ~= self.gameObject:getState() then + + local function updateColorIndicator() + -- Show the color of a team that can score because their flag is on their + -- own home tile. If both teams have their flag on their home tile then show + -- purple. If neither, then show black. + local flagControlState = flagManager:getFlagControlState() self.gameObject:setState(flagControlState) end + + updaterRegistry:registerUpdater{ + updateFn = updateColorIndicator, + priority = 3, + } end diff --git a/meltingpot/lua/levels/paintball_capture_the_flag/init.lua b/meltingpot/lua/levels/paintball__capture_the_flag/init.lua similarity index 93% rename from meltingpot/lua/levels/paintball_capture_the_flag/init.lua rename to meltingpot/lua/levels/paintball__capture_the_flag/init.lua index 3374043a..ef6f33f1 100644 --- a/meltingpot/lua/levels/paintball_capture_the_flag/init.lua +++ b/meltingpot/lua/levels/paintball__capture_the_flag/init.lua @@ -1,4 +1,4 @@ ---[[ Copyright 2020 DeepMind Technologies Limited. +--[[ Copyright 2022 DeepMind Technologies Limited. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -13,12 +13,11 @@ See the License for the specific language governing permissions and limitations under the License. ]] --- Entry point lua file for the capture_the_flag level. +-- Entry point lua file for the capture_the_flag substrate. local meltingpot = 'meltingpot.lua.' local mp_modules = meltingpot .. 'modules.' -local paintball = meltingpot .. 'levels.paintball.' local api_factory = require(mp_modules .. 'api_factory') local simulation = require(mp_modules .. 'base_simulation') @@ -27,7 +26,9 @@ local simulation = require(mp_modules .. 'base_simulation') local component_library = require(mp_modules .. 'component_library') local avatar_library = require(mp_modules .. 'avatar_library') -- Next require the general paintball game components. +local paintball = meltingpot .. 'levels.paintball.' local shared_components = require(paintball .. 'shared_components') + -- Finally add the local components for this game, overriding any previously -- loaded having the same name. local components = require 'components' diff --git a/meltingpot/lua/levels/paintball_king_of_the_hill/components.lua b/meltingpot/lua/levels/paintball__king_of_the_hill/components.lua similarity index 77% rename from meltingpot/lua/levels/paintball_king_of_the_hill/components.lua rename to meltingpot/lua/levels/paintball__king_of_the_hill/components.lua index 37c3b064..231b70d0 100644 --- a/meltingpot/lua/levels/paintball_king_of_the_hill/components.lua +++ b/meltingpot/lua/levels/paintball__king_of_the_hill/components.lua @@ -1,4 +1,4 @@ ---[[ Copyright 2020 DeepMind Technologies Limited. +--[[ Copyright 2022 DeepMind Technologies Limited. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -52,9 +52,8 @@ function GroundOrHill:onHit(hittingGameObject, hittingTeam) local passThrough = GroundOrHill.Base.onHit( self, hittingGameObject, hittingTeam) if self._teamNames[hittingTeam] then - local oldTeam = self.gameObject:getState(oldTeam) + local oldTeam = self.gameObject:getState() if self._isHill then - self._hillManager:squareColored(oldTeam, hittingTeam) -- Provide reward for painting the hill if applicable. if hittingGameObject:hasComponent('Taste') then hittingGameObject:getComponent('Taste'):paintedHillSquare(oldTeam) @@ -65,9 +64,8 @@ function GroundOrHill:onHit(hittingGameObject, hittingTeam) end function GroundOrHill:capture(capturingTeam) - local oldTeam = self.gameObject:getState(oldTeam) + local oldTeam = self.gameObject:getState() self.gameObject:setState(capturingTeam) - self._hillManager:squareColored(oldTeam, capturingTeam) end function GroundOrHill:start() @@ -88,13 +86,19 @@ function ControlIndicator:__init__(kwargs) ControlIndicator.Base.__init__(self, kwargs) end -function ControlIndicator:update() +function ControlIndicator:registerUpdaters(updaterRegistry) local sceneObject = self.gameObject.simulation:getSceneObject() local hillManager = sceneObject:getComponent('HillManager') - local teamInControl = hillManager:getTeamInControl() - if teamInControl ~= self.gameObject:getState() then + + local function displayColorOfControllingTeam() + local teamInControl = hillManager:getTeamInControl() self.gameObject:setState(teamInControl) end + + updaterRegistry:registerUpdater{ + updateFn = displayColorOfControllingTeam, + priority = 3, + } end @@ -147,8 +151,14 @@ function Taste:reset() self._framesSinceEvent = 0 end -function Taste:update() - self._framesSinceEvent = self._framesSinceEvent + 1 +function Taste:registerUpdaters(updaterRegistry) + local function incrementFramesSinceEvent() + self._framesSinceEvent = self._framesSinceEvent + 1 + end + updaterRegistry:registerUpdater{ + updateFn = incrementFramesSinceEvent, + priority = 300, + } end function Taste:paintedHillSquare(oldTeamOfThisSquare) @@ -199,7 +209,7 @@ local HillManager = class.Class(component.Component) function HillManager:__init__(kwargs) kwargs = args.parse(kwargs, { {'name', args.default('HillManager')}, - {'percentToCapture', args.ge(0), args.le(100)}, + {'percentToCapture', args.ge(51), args.le(100)}, {'rewardPerStepInControl', args.numberType}, }) HillManager.Base.__init__(self, kwargs) @@ -208,42 +218,41 @@ function HillManager:__init__(kwargs) end function HillManager:reset() - self._numColoredByTeam = {} self._registeredSquares = {} self._avatarsByTeam = {} self._teamCurrentlyInControl = "uncontrolled" -end - -function HillManager:squareColored(sourceTeam, targetTeam) - if targetTeam ~= sourceTeam then - -- Bookkeeping: increment target team's hill control counter. - if not self._numColoredByTeam[targetTeam] then - self._numColoredByTeam[targetTeam] = 0 - end - self._numColoredByTeam[targetTeam] = self._numColoredByTeam[targetTeam] + 1 - -- Bookkeeping: decrement source team hill control counter (if applicable). - if self._numColoredByTeam[sourceTeam] then - self._numColoredByTeam[sourceTeam] = - self._numColoredByTeam[sourceTeam] - 1 - end - end + self._totalHillSize = 0 end function HillManager:_getPercentFilledByMaximalTeam() - local total = 0 local maximalTeam = "uncontrolled" local maximalTeamAmount = 0 - for team, numColored in pairs(self._numColoredByTeam) do - total = total + numColored + + local simulation = self.gameObject.simulation + local numColoredByTeam = { + red = simulation:getGroupCount("hill_red"), + blue = simulation:getGroupCount("hill_blue"), + uncontrolled = simulation:getGroupCount("hill_clean"), + } + + for team, numColored in pairs(numColoredByTeam) do if numColored > maximalTeamAmount then maximalTeam = team maximalTeamAmount = numColored end end - local percentFilled = (maximalTeamAmount / total) * 100 + local percentFilled = (maximalTeamAmount / self._totalHillSize) * 100 return percentFilled, maximalTeam end +function HillManager:_getPercentOwnedByTeamInControl() + local simulation = self.gameObject.simulation + local numColored = simulation:getGroupCount( + "hill_" .. self._teamCurrentlyInControl) + local percentFilled = (numColored / self._totalHillSize) * 100 + return percentFilled +end + function HillManager:_capture(capturedByTeam) for _, square in ipairs(self._registeredSquares) do square:capture(capturedByTeam) @@ -253,29 +262,45 @@ function HillManager:_capture(capturedByTeam) 'team', capturedByTeam) -- string end -function HillManager:update() - local percentFilled, maximalTeam = self:_getPercentFilledByMaximalTeam() +function HillManager:registerUpdaters(updaterRegistry) + local function updateHill() + local percentFilled, maximalTeam = self:_getPercentFilledByMaximalTeam() - if maximalTeam and maximalTeam ~= self._teamCurrentlyInControl then - -- Change of control - if percentFilled >= self._config.percentToCapture then - -- Successful capture - self:_capture(maximalTeam) - self._teamCurrentlyInControl = maximalTeam - else - -- Mere loss of control without a new team capturing - self._teamCurrentlyInControl = "uncontrolled" + -- Change of control logic. + if maximalTeam and maximalTeam ~= self._teamCurrentlyInControl then + if percentFilled >= self._config.percentToCapture then + -- Successful capture + self:_capture(maximalTeam) + self._teamCurrentlyInControl = maximalTeam + end + end + + -- Loss of control logic. + if self._teamCurrentlyInControl ~= "uncontrolled" then + local percentOwnedByTeamInControl = self:_getPercentOwnedByTeamInControl() + if percentOwnedByTeamInControl < 50 then + self._teamCurrentlyInControl = "uncontrolled" + end + end + + -- Scoring logic. + if self._teamCurrentlyInControl ~= "uncontrolled" then + self:provideRewards(self._teamCurrentlyInControl, true) + local teamNotInControl = self:getOtherTeam(self._teamCurrentlyInControl) + self:provideRewards(teamNotInControl, false) end end - if self._teamCurrentlyInControl ~= "uncontrolled" then - self:provideRewards(self._teamCurrentlyInControl, true) - local teamNotInControl = self:getOtherTeam(self._teamCurrentlyInControl) - self:provideRewards(teamNotInControl, false) - end + + updaterRegistry:registerUpdater{ + updateFn = updateHill, + priority = 5, + } end + function HillManager:registerSquare(groundComponent) table.insert(self._registeredSquares, groundComponent) + self._totalHillSize = self._totalHillSize + 1 end function HillManager:registerAvatar(avatar, team) diff --git a/meltingpot/lua/levels/paintball_king_of_the_hill/init.lua b/meltingpot/lua/levels/paintball__king_of_the_hill/init.lua similarity index 93% rename from meltingpot/lua/levels/paintball_king_of_the_hill/init.lua rename to meltingpot/lua/levels/paintball__king_of_the_hill/init.lua index 026c9183..27b51881 100644 --- a/meltingpot/lua/levels/paintball_king_of_the_hill/init.lua +++ b/meltingpot/lua/levels/paintball__king_of_the_hill/init.lua @@ -1,4 +1,4 @@ ---[[ Copyright 2020 DeepMind Technologies Limited. +--[[ Copyright 2022 DeepMind Technologies Limited. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -13,12 +13,11 @@ See the License for the specific language governing permissions and limitations under the License. ]] --- Entry point lua file for the king_of_the_hill level. +-- Entry point lua file for the king_of_the_hill substrate. local meltingpot = 'meltingpot.lua.' local mp_modules = meltingpot .. 'modules.' -local paintball = meltingpot .. 'levels.paintball.' local api_factory = require(mp_modules .. 'api_factory') local simulation = require(mp_modules .. 'base_simulation') @@ -27,7 +26,9 @@ local simulation = require(mp_modules .. 'base_simulation') local component_library = require(mp_modules .. 'component_library') local avatar_library = require(mp_modules .. 'avatar_library') -- Next require the general paintball game components. +local paintball = meltingpot .. 'levels.paintball.' local shared_components = require(paintball .. 'shared_components') + -- Finally add the local components for this game, overriding any previously -- loaded having the same name. local components = require 'components' diff --git a/meltingpot/lua/levels/predator_prey/components.lua b/meltingpot/lua/levels/predator_prey/components.lua new file mode 100644 index 00000000..705ba261 --- /dev/null +++ b/meltingpot/lua/levels/predator_prey/components.lua @@ -0,0 +1,842 @@ +--[[ Copyright 2022 DeepMind Technologies Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +]] + +local args = require 'common.args' +local class = require 'common.class' +local helpers = require 'common.helpers' +local set = require 'common.set' +local log = require 'common.log' +local events = require 'system.events' +local random = require 'system.random' +local tensor = require 'system.tensor' + +local meltingpot = 'meltingpot.lua.modules.' +local component = require(meltingpot .. 'component') +local component_registry = require(meltingpot .. 'component_registry') + + +local Role = class.Class(component.Component) + +function Role:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('Role')}, + {'isPredator', args.booleanType}, + }) + Role.Base.__init__(self, kwargs) + self._config.isPredator = kwargs.isPredator +end + +function Role:isPredator() + return self._config.isPredator +end + +function Role:isPrey() + return not self._config.isPredator +end + + +--[[ The `PredatorInteractBeam` component endows a predator with the ability +to fire a 1 length, 0 radius beam to interact with other objects that they are +facing. ]] +local PredatorInteractBeam = class.Class(component.Component) + +function PredatorInteractBeam:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('PredatorInteractBeam')}, + {'cooldownTime', args.numberType}, + {'shapes', args.tableType}, + {'palettes', args.tableType}, + }) + PredatorInteractBeam.Base.__init__(self, kwargs) + + self._config.cooldownTime = kwargs.cooldownTime + self._config.shape = kwargs.shapes[1] + self._config.palette = kwargs.palettes[1] +end + +function PredatorInteractBeam:awake() + self.hitAndSpriteName = 'predator' + self._showForDuration = 0 +end + +function PredatorInteractBeam:getHitName() + return self.hitAndSpriteName +end + +function PredatorInteractBeam:addHits(worldConfig) + worldConfig.hits[self.hitAndSpriteName] = { + layer = self.hitAndSpriteName, + sprite = self.hitAndSpriteName, + } + component.insertIfNotPresent(worldConfig.renderOrder, self.hitAndSpriteName) +end + +function PredatorInteractBeam:addSprites(tileSet) + tileSet:addShape(self.hitAndSpriteName, + {palette = self._config.palette, + text = self._config.shape, + noRotate = true}) +end + +function PredatorInteractBeam:registerUpdaters(updaterRegistry) + local interact = function() + local playerVolatileVariables = ( + self.gameObject:getComponent('Avatar'):getVolatileData()) + local actions = playerVolatileVariables.actions + -- Execute the beam if applicable. + if self._showForDuration > 0 then + self.gameObject:hitBeam(self.hitAndSpriteName, 1, 0) + self._showForDuration = self._showForDuration - 1 + else + if actions['interact'] == 1 then + self._coolingTimer = self._config.cooldownTime + self.gameObject:hitBeam(self.hitAndSpriteName, 1, 0) + end + end + end + + updaterRegistry:registerUpdater{ + updateFn = interact, + priority = 140, + } +end + +function PredatorInteractBeam:showForDuration(duration) + self._showForDuration = duration - 1 +end + +--[[ Reset gets called just before start regardless of whether we have a new +environment instance or one that was reset by calling reset() in python.]] +function PredatorInteractBeam:reset() + local kwargs = self._kwargs + self._playerIndex = self.gameObject:getComponent('Avatar'):getIndex() + -- Set the beam cooldown timer to its `ready` state (i.e. coolingTimer = 0). + self._coolingTimer = 0 +end + + +--[[ The `InteractEatAcorn` component endows a prey avatar with the ability to +eat an acorn if in their inventory for a reward. ]] +local InteractEatAcorn = class.Class(component.Component) + +function InteractEatAcorn:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('InteractEatAcorn')}, + {'cooldownTime', args.numberType}, + {'shapes', args.tableType}, + {'palettes', args.tableType}, + {'isEating', args.booleanType}, + -- `defaultState` is the default player state with arms down. + {'defaultState', args.stringType}, + }) + InteractEatAcorn.Base.__init__(self, kwargs) + + self._config.cooldownTime = kwargs.cooldownTime + self._config.shape = kwargs.shapes[1] + self._config.palette = kwargs.palettes[1] + self._config.isEating = kwargs.isEating + self._config.defaultState = kwargs.defaultState + + self._inventoryObject = nil +end + +function InteractEatAcorn:awake() + self.hitAndSpriteName = 'interact_' .. self.gameObject:getUniqueState() + self._showForDuration = 0 +end + +function InteractEatAcorn:getHitName() + return self.hitAndSpriteName +end + +function InteractEatAcorn:isEating() + return self._config.isEating +end + +function InteractEatAcorn:setIsEating(x) + self._config.isEating = x + return self._config.isEating +end + +function InteractEatAcorn:setInventoryObject(obj) + self._inventoryObject = obj +end + +function InteractEatAcorn:getAvatarsInventory() + return self._inventoryObject +end + +function InteractEatAcorn:addHits(worldConfig) + worldConfig.hits[self.hitAndSpriteName] = { + layer = self.hitAndSpriteName, + sprite = self.hitAndSpriteName, + } + component.insertIfNotPresent(worldConfig.renderOrder, self.hitAndSpriteName) +end + +function InteractEatAcorn:addSprites(tileSet) + tileSet:addShape(self.hitAndSpriteName, + {palette = self._config.palette, + text = self._config.shape, + noRotate = true}) +end + +function InteractEatAcorn:registerUpdaters(updaterRegistry) + local interact = function() + local playerVolatileVariables = ( + self.gameObject:getComponent('Avatar'):getVolatileData()) + local actions = playerVolatileVariables.actions + -- Execute eat action if applicable. + if self._config.cooldownTime >= 0 then + if self._coolingTimer > 0 then + self._coolingTimer = self._coolingTimer - 1 + else + if actions['interact'] == 1 then + self._coolingTimer = self._config.cooldownTime + local inventoryObject = self:getAvatarsInventory() + if inventoryObject:getHeldItem() ~= 'empty' then + -- Only allow eating an acorn if stamina bar is invisible and + -- in default "arms down" state. + local stamina = self.gameObject:getComponent('Stamina') + local state = self.gameObject:getState() + if stamina:getBand() == 'invisible' and + state == self._config.defaultState then + inventoryObject:setHeldItem('empty') + self.gameObject:getComponent('AvatarEatingAnimation'):sitDown() + end + end + end + end + end + end + + updaterRegistry:registerUpdater{ + updateFn = interact, + priority = 140, + } +end + +function InteractEatAcorn:showForDuration(duration) + self._showForDuration = duration - 1 +end + +--[[ Reset gets called just before start regardless of whether we have a new +environment instance or one that was reset by calling reset() in python.]] +function InteractEatAcorn:reset() + local kwargs = self._kwargs + self._playerIndex = self.gameObject:getComponent('Avatar'):getIndex() + -- Set the eat action cooldown timer to its `ready` state + -- (i.e. coolingTimer = 0). + self._coolingTimer = 0 +end + + +--[[ An inventory component which holds an item and visualises it. It will +be placed on a separate object, one associated with each avatar, and connected +to it. +]] +local Inventory = class.Class(component.Component) + +function Inventory:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('Inventory')}, + -- `playerIndex` (int): index of the player whose inventory this is. + {'playerIndex', args.numberType}, + }) + Inventory.Base.__init__(self, kwargs) + self._playerIndex = kwargs.playerIndex +end + +function Inventory:postStart() + local sim = self.gameObject.simulation + -- Store a reference to the connected avatar object. + self._avatarObject = sim:getAvatarFromIndex(self._playerIndex) + self._avatarObject:getComponent('InteractEatAcorn'):setInventoryObject(self) +end + +function Inventory:getHeldItem() + local state = self.gameObject:getState() + return state +end + +function Inventory:setHeldItem(item) + self.gameObject:setState(item) +end + + +local AvatarEdible = class.Class(component.Component) + +function AvatarEdible:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('AvatarEdible')}, + {'groupRadius', args.default(2), args.positive}, + {'framesToDisplayBeingEaten', args.default(5), args.positive}, + {'predatorRewardForEating', args.default(1), args.numberType}, + }) + AvatarEdible.Base.__init__(self, kwargs) + self._groupRadius = kwargs.groupRadius + self._framesToDisplayBeingEaten = kwargs.framesToDisplayBeingEaten + self._config.predatorRewardForEating = kwargs.predatorRewardForEating +end + +function AvatarEdible:reset() + self.dead = false +end + +function AvatarEdible:_countGroupSize(targetRole) + local function isTargetRole(object) + if targetRole == 'prey' then + return object:getComponent('Role'):isPrey() + elseif targetRole == 'predator' then + return object:getComponent('Role'):isPredator() + end + end + + local layer = self.gameObject:getLayer() + local transform = self.gameObject:getComponent('Transform') + local objectsNearby = transform:queryDisc(layer, self._groupRadius) + + local groupSize = 0 + local objectsWithTargetRoleNearby = {} + for _, object in pairs(objectsNearby) do + if object:hasComponent('Role') and isTargetRole(object) then + if object:getComponent('Stamina'):getBand() ~= 'red' then + -- Never count avatars with stamina currently in the 'red' band. + if object:hasComponent('InteractEatAcorn') then + -- Prey have the 'InteractEatAcorn' component. + if not object:getComponent('InteractEatAcorn'):isEating() then + -- Prey only count if they are not currently eating an acorn. + groupSize = groupSize + 1 + table.insert(objectsWithTargetRoleNearby, object) + end + else + -- Predators do not have the 'InteractEatAcorn' component. + groupSize = groupSize + 1 + table.insert(objectsWithTargetRoleNearby, object) + end + end + end + end + return groupSize, objectsWithTargetRoleNearby +end + + +function AvatarEdible:_beEaten() + local deadState = self.gameObject:getComponent('Avatar'):getWaitState() + self.gameObject:setState(deadState) + self.dead = true +end + +function AvatarEdible:onHit(hitterObject, hitName) + local selfRole = self.gameObject:getComponent('Role') + if hitName == 'predator' and selfRole:isPrey() then + local preyGroupSize, preyNearby = self:_countGroupSize('prey') + local predatorGroupSize, _ = self:_countGroupSize('predator') + if preyGroupSize <= predatorGroupSize then + -- Case where the group is too small so the prey will be eaten. + self:_beEaten() + hitterObject:getComponent('PredatorInteractBeam'):showForDuration( + self._framesToDisplayBeingEaten) + hitterObject:getComponent('Avatar'):disallowMovementUntil( + self._framesToDisplayBeingEaten) + local hitterAvatar = hitterObject:getComponent('Avatar') + hitterAvatar:addReward(self._config.predatorRewardForEating) + events:add( + 'prey_consumed', 'dict', + 'predator_player_index', hitterAvatar:getIndex(), + 'prey_player_index', self.gameObject:getComponent('Avatar'):getIndex() + ) -- (int, int) + else + -- Case where the group is big enough to avoid being eaten. + for _, preyObject in ipairs(preyNearby) do + if preyObject:getComponent('AvatarEdible'):alive() then + preyObject:getComponent('AvatarAnimation'):armsUp() + end + end + end + elseif hitName == 'predator' and selfRole:isPredator() then + -- Case where a predator eats another predator for zero reward (since + -- predators are not tasty). Competition is the main reason one might want + -- to do this. + self:_beEaten() + -- It takes a lot of energy to incapacitate a predator so stamina is + -- correspondingly decreased substantially. This sometimes gives prey a + -- chance to escape. + local hitterStamina = hitterObject:getComponent('Stamina'):addValue(-4) + + events:add( + 'predator_consumed', 'dict', + 'eater_player_index', hitterObject:getComponent('Avatar'):getIndex(), + 'eaten_player_index', self.gameObject:getComponent('Avatar'):getIndex() + ) -- (int, int) + end +end + +function AvatarEdible:alive() + return not self.dead +end + + +local AvatarRespawn = class.Class(component.Component) + +function AvatarRespawn:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('AvatarRespawn')}, + {'framesTillRespawn', args.positive}, + }) + AvatarRespawn.Base.__init__(self, kwargs) + self._config.framesTillRespawn = kwargs.framesTillRespawn +end + +function AvatarRespawn:registerUpdaters(updaterRegistry) + local avatar = self.gameObject:getComponent('Avatar') + local aliveState = avatar:getAliveState() + local waitState = avatar:getWaitState() + local respawn = function() + local spawnGroup = avatar:getSpawnGroup() + self.gameObject:teleportToGroup(spawnGroup, aliveState) + self.playerRespawnedThisStep = true + if self.gameObject:hasComponent('AvatarEdible') then + local avatarEdible = self.gameObject:getComponent('AvatarEdible') + avatarEdible.dead = false + end + end + + updaterRegistry:registerUpdater{ + updateFn = respawn, + priority = 135, + state = waitState, + startFrame = self._config.framesTillRespawn + } +end + + +local AvatarAnimation = class.Class(component.Component) + +function AvatarAnimation:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('AvatarAnimation')}, + {'framesToRaiseArms', args.default(5), args.positive}, + {'upState', args.stringType}, + {'downState', args.stringType}, + }) + AvatarAnimation.Base.__init__(self, kwargs) + self._config.framesToRaiseArms = kwargs.framesToRaiseArms + self._config.upState = kwargs.upState + self._config.downState = kwargs.downState +end + +function AvatarAnimation:reset() + self._counter = 0 +end + +function AvatarAnimation:armsUp() + self._counter = self._config.framesToRaiseArms + self.gameObject:setState(self._config.upState) +end + +function AvatarAnimation:registerUpdaters(updaterRegistry) + local updateAnimation = function() + if self._counter == 1 then + if self.gameObject:getComponent('AvatarEdible'):alive() then + self.gameObject:setState(self._config.downState) + end + end + self._counter = self._counter - 1 + end + + updaterRegistry:registerUpdater{ + updateFn = updateAnimation, + priority = 295, + } +end + + +local AvatarEatingAnimation = class.Class(component.Component) + +function AvatarEatingAnimation:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('AvatarEatingAnimation')}, + {'framesToEatAcorn', args.default(26), args.positive}, + {'sit', args.stringType}, + {'prepToEat', args.stringType}, + {'firstBite', args.stringType}, + {'secondBite', args.stringType}, + {'lastBite', args.stringType}, + {'downState', args.stringType}, + {'acornReward', args.default(3.0), args.positive}, + }) + AvatarEatingAnimation.Base.__init__(self, kwargs) + self._config.framesToEatAcorn = kwargs.framesToEatAcorn + self._config.sit = kwargs.sit + self._config.prepToEat = kwargs.prepToEat + self._config.firstBite = kwargs.firstBite + self._config.secondBite = kwargs.secondBite + self._config.lastBite = kwargs.lastBite + self._config.downState = kwargs.downState + self._config.acornReward = kwargs.acornReward + self._config.oneThirdAcornReward = self._config.acornReward / 3.0 +end + +function AvatarEatingAnimation:_resetAnimation() + self._counter = 0 + self.gameObject:getComponent('InteractEatAcorn'):setIsEating(false) +end + +function AvatarEatingAnimation:reset() + self:_resetAnimation() +end + +function AvatarEatingAnimation:registerUpdaters(updaterRegistry) + local avatar = self.gameObject:getComponent('Avatar') + local transform = self.gameObject:getComponent('Transform') + local deadState = avatar:getWaitState() + + local updateAnimation = function() + if self.gameObject:getState() ~= deadState then + if self._counter == self._config.framesToEatAcorn then + -- Give the avatar a pseudoreward if applicable on the first eat frame. + if self.gameObject:hasComponent('AcornTaste') then + local pseudoreward = self.gameObject:getComponent( + 'AcornTaste'):getAcornConsumptionReward() + avatar:addReward(pseudoreward) + end + end + if self._counter > 0 then + avatar:disallowMovementUntil(0) + self.gameObject:getComponent('InteractEatAcorn'):setIsEating(true) + end + if self._counter == 21 then + if self.gameObject:getComponent('AvatarEdible'):alive() then + self.gameObject:setState(self._config.prepToEat) + end + end + if self._counter == 16 then + if self.gameObject:getComponent('AvatarEdible'):alive() then + self.gameObject:setState(self._config.firstBite) + avatar:addReward(self._config.oneThirdAcornReward) + end + end + if self._counter == 11 then + if self.gameObject:getComponent('AvatarEdible'):alive() then + self.gameObject:setState(self._config.secondBite) + avatar:addReward(self._config.oneThirdAcornReward) + end + end + if self._counter == 6 then + if self.gameObject:getComponent('AvatarEdible'):alive() then + self.gameObject:setState(self._config.lastBite) + avatar:addReward(self._config.oneThirdAcornReward) + end + end + if self._counter == 1 then + -- Done eating the acorn. + self.gameObject:getComponent('InteractEatAcorn'):setIsEating(false) + if self.gameObject:getComponent('AvatarEdible'):alive() then + self.gameObject:setState(self._config.downState) + end + events:add('acorn_consumed', 'dict', + 'player_index', avatar:getIndex()) -- int + -- Check if acorn was consumed while standing on safety grass. + local objectOrNilBelow = transform:queryPosition('midPhysical') + if objectOrNilBelow and objectOrNilBelow:getState() == 'safe_grass' then + if self.gameObject:hasComponent('AcornTaste') then + local pseudoreward = self.gameObject:getComponent( + 'AcornTaste'):getSafeAcornConsumptionReward() + avatar:addReward(pseudoreward) + end + events:add('acorn_consumed_safely', 'dict', + 'player_index', avatar:getIndex()) -- int + end + end + else + self:_resetAnimation() + end + self._counter = self._counter - 1 + end + + updaterRegistry:registerUpdater{ + updateFn = updateAnimation, + priority = 300, + } +end + +function AvatarEatingAnimation:sitDown() + if self.gameObject:getComponent('AvatarEdible'):alive() then + self._counter = self._config.framesToEatAcorn + self.gameObject:setState(self._config.sit) + end +end + + +--[[ Reward players based on their distance to other nearby avatars having a +specific role.]] +local ProxemicTaste = class.Class(component.Component) + +function ProxemicTaste:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('ProxemicTaste')}, + -- `distanceToReward` maps distances to rewards to add up per step per + -- avatar object found nearby. + {'distanceToReward', args.default({}), args.tableType}, + -- `layer` indicates which layer to count objects. It will typically be + -- the layer where Avatars are. + {'layer', args.default('upperPhysical'), args.stringType}, + -- `roleToCount` indicates whether to count prey, predators, or both. + {'roleToCount', args.default('avatar'), + args.oneOf('prey', 'predator', 'avatar')}, + }) + ProxemicTaste.Base.__init__(self, kwargs) + self._distanceToReward = kwargs.distanceToReward + self._layer = kwargs.layer + self._roleToCount = kwargs.roleToCount +end + +function ProxemicTaste:reset() + local role = self.gameObject:getComponent('Role') + self._subtractFromCount = 0 + if role:isPrey() and self._roleToCount == 'prey' then + self._subtractFromCount = 1 + end + if role:isPredator() and self._roleToCount == 'predator' then + self._subtractFromCount = 1 + end + if self._roleToCount == 'avatar' then + self._subtractFromCount = 1 + end +end + +function ProxemicTaste:_countAvatarsWithRole(neighbors) + local counts = { + avatar = 0, + prey = 0, + predator = 0, + } + for _, neighbor in ipairs(neighbors) do + if neighbor:hasComponent('Avatar') and + neighbor:getComponent('Avatar'):isAlive() then + counts['avatar'] = counts['avatar'] + 1 + if neighbor:getComponent('Role'):isPrey() then + counts['prey'] = counts['prey'] + 1 + elseif neighbor:getComponent('Role'):isPredator() then + counts['predator'] = counts['predator'] + 1 + end + end + end + return counts[self._roleToCount] +end + +function ProxemicTaste:registerUpdaters(updaterRegistry) + local avatar = self.gameObject:getComponent('Avatar') + local transform = self.gameObject:getComponent('Transform') + + local proximityTasteUpdate = function() + if avatar:isAlive() then + local rewardToDeliver = 0 + for distance, rewardAtThisDistance in pairs(self._distanceToReward) do + local neighbors = transform:queryDisc(self._layer, distance) + local numCounted = self:_countAvatarsWithRole(neighbors) + -- If needed, subtract 1 from numCounted so self does not count. + local extraReward = ( + rewardAtThisDistance * (numCounted - self._subtractFromCount)) + rewardToDeliver = rewardToDeliver + extraReward + end + avatar:addReward(rewardToDeliver) + end + end + + updaterRegistry:registerUpdater{ + updateFn = proximityTasteUpdate, + priority = 2, + } +end + + +--[[ Optionally provide extra rewards for collecting and eating acorns. + +This is off by default. Acorns should not provide any reward till they have +been consumed in the main version of the substrate. This component is typically +only used when training certain background populations. +]] +local AcornTaste = class.Class(component.Component) + +function AcornTaste:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('AcornTaste')}, + -- No effect when `collectReward` is 0.0. + {'collectReward', args.default(0.0), args.numberType}, + -- No effect when `eatReward` is 0.0. + {'eatReward', args.default(0.0), args.numberType}, + -- No effect when `safeAcornConsumptionReward` is 0.0. + {'safeAcornConsumptionReward', args.default(0.0), args.numberType}, + }) + AcornTaste.Base.__init__(self, kwargs) + self._collectReward = kwargs.collectReward + self._extraEatReward = kwargs.eatReward + self._safeAcornConsumptionReward = kwargs.safeAcornConsumptionReward +end + +function AcornTaste:getAcornCollectionReward() + return self._collectReward +end + +function AcornTaste:getAcornConsumptionReward() + return self._extraEatReward +end + +function AcornTaste:getSafeAcornConsumptionReward() + return self._safeAcornConsumptionReward +end + + +-- Make the apples edible. +local AppleEdible = class.Class(component.Component) + +function AppleEdible:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('AppleEdible')}, + {'liveState', args.stringType}, + {'waitState', args.stringType}, + {'rewardForEating', args.numberType}, + }) + AppleEdible.Base.__init__(self, kwargs) + + self._config.liveState = kwargs.liveState + self._config.waitState = kwargs.waitState + self._config.rewardForEating = kwargs.rewardForEating +end + +function AppleEdible:reset() + self._waitState = self._config.waitState + self._liveState = self._config.liveState +end + +function AppleEdible:setWaitState(newWaitState) + self._waitState = newWaitState +end + +function AppleEdible:getWaitState() + return self._waitState +end + +function AppleEdible:setLiveState(newLiveState) + self._liveState = newLiveState +end + +function AppleEdible:getLiveState() + return self._liveState +end + +function AppleEdible:onEnter(enteringGameObject, contactName) + if contactName == 'avatar' and enteringGameObject and + enteringGameObject:hasComponent('Role') then + local role = enteringGameObject:getComponent('Role') + if self.gameObject:getState() == self._liveState and role:isPrey() then + -- Reward the player who ate the edible. + local avatarComponent = enteringGameObject:getComponent('Avatar') + avatarComponent:addReward(self._config.rewardForEating) + events:add('apple_consumed', 'dict', + 'player_index', avatarComponent:getIndex()) -- int + -- Change the edible to its wait (disabled) state. + self.gameObject:setState(self._waitState) + end + end +end + + +-- Make acorns pickuppable. +local AcornPickUppable = class.Class(component.Component) + +function AcornPickUppable:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('AcornPickUppable')}, + {'liveState', args.stringType}, + {'waitState', args.stringType}, + }) + AcornPickUppable.Base.__init__(self, kwargs) + + self._config.liveState = kwargs.liveState + self._config.waitState = kwargs.waitState +end + +function AcornPickUppable:reset() + self._waitState = self._config.waitState + self._liveState = self._config.liveState +end + +function AcornPickUppable:setWaitState(newWaitState) + self._waitState = newWaitState +end + +function AcornPickUppable:getWaitState() + return self._waitState +end + +function AcornPickUppable:setLiveState(newLiveState) + self._liveState = newLiveState +end + +function AcornPickUppable:getLiveState() + return self._liveState +end + +function AcornPickUppable:onEnter(enteringGameObject, contactName) + if contactName == 'avatar' and enteringGameObject and + enteringGameObject:hasComponent('Role') then + local role = enteringGameObject:getComponent('Role') + if self.gameObject:getState() == self._liveState and role:isPrey() then + local avatarsInventory = enteringGameObject:getComponent( + 'InteractEatAcorn'):getAvatarsInventory() + -- If prey avatar's inventory is empty, add acorn to inventory. + if avatarsInventory:getHeldItem() == 'empty' then + avatarsInventory:setHeldItem('acorn') + local avatarComponent = enteringGameObject:getComponent('Avatar') + events:add('acorn_collected', 'dict', + 'player_index', avatarComponent:getIndex()) -- int + -- Change the edible to its wait (disabled) state. + self.gameObject:setState(self._waitState) + -- Give the avatar a pseudoreward if applicable. + if enteringGameObject:hasComponent('AcornTaste') then + local pseudoreward = enteringGameObject:getComponent( + 'AcornTaste'):getAcornCollectionReward() + avatarComponent:addReward(pseudoreward) + end + end + end + end +end + + +local allComponents = { + -- Avatar components. + Role = Role, + Inventory = Inventory, + PredatorInteractBeam = PredatorInteractBeam, + InteractEatAcorn = InteractEatAcorn, + AvatarEdible = AvatarEdible, + AvatarRespawn = AvatarRespawn, + AvatarAnimation = AvatarAnimation, + AvatarEatingAnimation = AvatarEatingAnimation, + -- Avatar components used to define pseudorewards for background bot training. + ProxemicTaste = ProxemicTaste, + AcornTaste = AcornTaste, + -- Apple components. + AppleEdible = AppleEdible, + AcornPickUppable = AcornPickUppable, +} + +component_registry.registerAllComponents(allComponents) + +return allComponents diff --git a/meltingpot/lua/levels/predator_prey/init.lua b/meltingpot/lua/levels/predator_prey/init.lua new file mode 100644 index 00000000..7718dd58 --- /dev/null +++ b/meltingpot/lua/levels/predator_prey/init.lua @@ -0,0 +1,61 @@ +--[[ Copyright 2022 DeepMind Technologies Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +]] + +-- Entry point lua file for predator_prey substrates. +local class = require 'common.class' +local helpers = require 'common.helpers' + +local meltingpot = 'meltingpot.lua.modules.' +local api_factory = require(meltingpot .. 'api_factory') +local simulation = require(meltingpot .. 'base_simulation') + +-- Required to be able to use the components in the level +local component_library = require(meltingpot .. 'component_library') +local avatar_library = require(meltingpot .. 'avatar_library') +-- Next require the general stamina components +local stamina = 'meltingpot.lua.levels.stamina.' +local shared_components = require(stamina .. 'shared_components') +-- Finally add the local components for this game, overriding any previously +-- loaded having the same name. +local components = require 'components' + + +local OverrideSimulation = class.Class(simulation.BaseSimulation) + +function OverrideSimulation:worldConfig() + local config = simulation.BaseSimulation.worldConfig(self) + local index = 0 + for layerIndex, layerName in ipairs(config.renderOrder) do + if layerName == 'upperPhysical' then + index = layerIndex + break + end + end + -- Add layer 'midPhysical' below 'upperPhysical'. + table.insert(config.renderOrder, index, 'midPhysical') + return config +end + +return api_factory.apiFactory{ + Simulation = OverrideSimulation, + settings = { + -- Scale each sprite to a square of size `spriteSize` X `spriteSize`. + spriteSize = 8, + -- Terminate the episode after this many frames. + maxEpisodeLengthFrames = 1000, + -- Settings to pass to simulation.lua. + simulation = {}, + } +} diff --git a/meltingpot/lua/levels/stamina/shared_components.lua b/meltingpot/lua/levels/stamina/shared_components.lua new file mode 100644 index 00000000..0a838f42 --- /dev/null +++ b/meltingpot/lua/levels/stamina/shared_components.lua @@ -0,0 +1,444 @@ +--[[ Copyright 2022 DeepMind Technologies Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +]] + +local args = require 'common.args' +local class = require 'common.class' +local helpers = require 'common.helpers' +local log = require 'common.log' +local set = require 'common.set' +local events = require 'system.events' +local random = require 'system.random' +local tensor = require 'system.tensor' + +local meltingpot = 'meltingpot.lua.modules.' +local component = require(meltingpot .. 'component') +local component_registry = require(meltingpot .. 'component_registry') + + +local _DIRECTION = { + N = tensor.Tensor({0, -1}), + E = tensor.Tensor({1, 0}), + S = tensor.Tensor({0, 1}), + W = tensor.Tensor({-1, 0}), +} + + +local Stamina = class.Class(component.Component) + +function Stamina:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('Stamina')}, + {'maxStamina', args.positive}, + {'classConfig', args.tableType}, + {'amountInvisible', args.ge(0)}, + {'amountRed', args.ge(0)}, + {'amountYellow', args.ge(0)}, + {'amountGreen', args.ge(0)}, + -- `costlyActions` table of actions affecting stamina eg {'move', 'turn'}. + {'costlyActions', args.tableType}, + }) + Stamina.Base.__init__(self, kwargs) + + self._config.maxStamina = kwargs.maxStamina + self._config.classConfig = self:parseClass(kwargs.classConfig) + self._config.amountInvisible = kwargs.amountInvisible + self._config.amountRed = kwargs.amountRed + self._config.amountYellow = kwargs.amountYellow + self._config.amountGreen = kwargs.amountGreen + local sum = (self._config.amountInvisible + + self._config.amountRed + + self._config.amountYellow + + self._config.amountGreen - 1) + assert(sum == self._config.maxStamina, + "Color amounts must sum to max stamina but " .. sum .. " != " + .. self._config.maxStamina) + + self._config.costlyActions = kwargs.costlyActions +end + +function Stamina:parseClass(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.stringType}, + {'greenFreezeTime', args.ge(0)}, + {'yellowFreezeTime', args.ge(0)}, + {'redFreezeTime', args.ge(0)}, + {'decrementRate', args.default(1), args.gt(0), args.le(1)}, + }) + classConfig = {} + classConfig.greenFreezeTime = kwargs.greenFreezeTime + classConfig.yellowFreezeTime = kwargs.yellowFreezeTime + classConfig.redFreezeTime = kwargs.redFreezeTime + classConfig.decrementRate = kwargs.decrementRate + + classConfig.decrementInterval = 1.0 / classConfig.decrementRate + return classConfig +end + +function Stamina:reset() + self._value = self._config.maxStamina + self._lastAction = nil + self._frozenFramesRemaining = 0 + self._allowRecovery = true + self._costlyFramesSinceLastDelta = 0 +end + +--[[ Return the current stamina band. This function will always return one of +{'green', 'yellow', 'red', 'invisible'}. + +`invisible` indicates the highest stamina values. +`green` indicates high stamina. +`yellow` indicates a stamina level in between green and red. +`red` indicates low stamina. +]] +function Stamina:getBand() + local x = self._config.amountInvisible + local r = self._config.amountRed + local y = self._config.amountYellow + local g = self._config.amountGreen + + if self._value >= 0 and self._value < r then + return 'red' + elseif self._value >= r and self._value < r + y then + return 'yellow' + elseif self._value >= r + y and self._value < r + y + g then + return 'green' + elseif self._value >= r + y + g then + return 'invisible' + end +end + +function Stamina:registerUpdaters(updaterRegistry) + local avatar = self.gameObject:getComponent('Avatar') + + local function updateStamina() + if self._frozenFramesRemaining > 0 then + return + end + -- If not frozen then do the following. + -- First, check if a costly action was taken and decrement stamina if so. + local playerVolatileVariables = ( + self.gameObject:getComponent('Avatar'):getVolatileData()) + local actions = playerVolatileVariables.actions + self._lastAction = nil + for _, costly_action_name in pairs(self._config.costlyActions) do + if actions[costly_action_name] ~= 0 then + self._costlyFramesSinceLastDelta = self._costlyFramesSinceLastDelta + 1 + local decrementInterval = self._config.classConfig.decrementInterval + if self._costlyFramesSinceLastDelta == decrementInterval then + local newStamina = self._value - 1 + self._value = (newStamina > 0) and newStamina or 0 + self._costlyFramesSinceLastDelta = 0 + end + self._lastAction = 'costly' + end + end + -- Next, check if agent didn't do anything on the last step. If so, then + -- increase stamina (it regenerates overtime while the avatar rests). + if self._lastAction == nil and self._allowRecovery then + local newStamina = self._value + 1 + self._value = math.min(newStamina, self._config.maxStamina) + end + self._lastAction = self._lastAction or 'not_costly' + end + + local function applyStamina() + if self._frozenFramesRemaining > 0 then + self._frozenFramesRemaining = self._frozenFramesRemaining - 1 + return + end + -- If not already frozen then do the following. + local greenFreezeTime = self._config.classConfig.greenFreezeTime + local yellowFreezeTime = self._config.classConfig.yellowFreezeTime + local redFreezeTime = self._config.classConfig.redFreezeTime + local band = self:getBand() + if band == 'invisible' or band == 'green' then + if self._lastAction == 'costly' and greenFreezeTime > 0 then + avatar:disallowMovementUntil(greenFreezeTime) + self._frozenFramesRemaining = greenFreezeTime + end + elseif band == 'yellow' then + if self._lastAction == 'costly' and yellowFreezeTime > 0 then + avatar:disallowMovementUntil(yellowFreezeTime) + self._frozenFramesRemaining = yellowFreezeTime + end + elseif band == 'red' then + if self._lastAction == 'costly' and redFreezeTime > 0 then + avatar:disallowMovementUntil(redFreezeTime) + self._frozenFramesRemaining = redFreezeTime + end + end + end + + updaterRegistry:registerUpdater{ + updateFn = updateStamina, + priority = 4, + } + updaterRegistry:registerUpdater{ + updateFn = applyStamina, + priority = 200, + } +end + +function Stamina:getValue() + return self._value +end + +function Stamina:addValue(value) + if value < 0 then + self._value = math.max(self._value + value, 0) + else + self._value = math.min(self._value + value, self._config.maxStamina) + end +end + +function Stamina:startPreventingRecovery() + self._allowRecovery = false +end + +function Stamina:stopPreventingRecovery() + self._allowRecovery = true +end + +function Stamina:onStateChange(oldState) + local avatar = self.gameObject:getComponent('Avatar') + local waitState = avatar:getWaitState() + if oldState == waitState and avatar:isAlive() then + -- Respawning. + self:reset() + end +end + +--[[ Return the current stamina amount as a number between 0 and 1. + +Returns 1 when value = `maxStamina` and returns 0 when value == 0. +]] +function Stamina:getNormalizedValue() + if self.gameObject:getComponent('Avatar'):isAlive() then + return self._value / self._config.maxStamina + else + -- Also show zero stamina while dead. + return 0 + end +end + + +local StaminaModulatedByNeed = class.Class(component.Component) + +function StaminaModulatedByNeed:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('StaminaModulatedByNeed')}, + {'needComponent', args.default('PeriodicNeed'), args.stringType}, + -- Decrease stamina by `lossPerStepBeyondThreshold` on every step that the + -- threshold defined in `needComponent` has been exceeded. + {'lossPerStepBeyondThreshold', args.default(1), args.ge(0)}, + }) + StaminaModulatedByNeed.Base.__init__(self, kwargs) + self._config.needComponent = kwargs.needComponent + self._config.lossPerStepBeyondThreshold = kwargs.lossPerStepBeyondThreshold +end + +function StaminaModulatedByNeed:registerUpdaters(updaterRegistry) + local needComponent = self.gameObject:getComponent(self._config.needComponent) + local staminaComponent = self.gameObject:getComponent('Stamina') + + local function updateStaminaAccordingToNeedLevel() + local needValue = needComponent:getNeed() + -- Reduce stamina by 1 on every timestep that need is at or above threshold. + if needValue >= 1 then + staminaComponent:addValue(-self._config.lossPerStepBeyondThreshold) + end + end + + updaterRegistry:registerUpdater{ + updateFn = updateStaminaAccordingToNeedLevel, + priority = 5, + } +end + + +--[[ The `StaminaObservation` component adds an observation that is 1 when +the avatar can fire (from the Zapper component) and <1 if in cooldown time. + +The resulting observation key will be `playerIndex`.READY_TO_SHOOT. +]] +local StaminaObservation = class.Class(component.Component) + +function StaminaObservation:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('StaminaObservation')}, + {'staminaComponent', args.default('Stamina'), args.stringType}, + }) + StaminaObservation.Base.__init__(self, kwargs) + self._config.staminaComponent = kwargs.staminaComponent +end + +function StaminaObservation:addObservations(tileSet, world, observations) + local playerIndex = self.gameObject:getComponent('Avatar'):getIndex() + local stamina = self.gameObject:getComponent(self._config.staminaComponent) + + observations[#observations + 1] = { + name = tostring(playerIndex) .. '.STAMINA', + type = 'Doubles', + shape = {}, + func = function(grid) + return stamina:getNormalizedValue() + end + } +end + + +--[[ Optionally send a shaping reward when stamina falls into specified bands. + +The `red` band indicates stamina is lowest. +The `yellow` band indicates stamina is just above the lowest. +The `green` band indicates stamina is relativelt high. +the `invisible` band indicates stamina is at its highest. + +This is off by default. It can be used to train background populations that +rapidly learn to avoid letting their stamina drop too low. +]] +local RewardForStaminaLevel = class.Class(component.Component) + +function RewardForStaminaLevel:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('RewardForStaminaLevel')}, + -- Selecting `rewardValue` of 0.0 makes this component do nothing. + {'rewardValue', args.default(0.0), args.numberType}, + -- Which stamina bands to reward. + {'bands', args.default({'red'}), args.tableType}, + }) + RewardForStaminaLevel.Base.__init__(self, kwargs) + self._config.rewardValue = kwargs.rewardValue + self._config.setOfBandsToReward = set.Set(kwargs.bands) +end + +function RewardForStaminaLevel:registerUpdaters(updaterRegistry) + local stamina = self.gameObject:getComponent('Stamina') + local avatar = self.gameObject:getComponent('Avatar') + + local function sendRewardIfStaminaInRedBand() + if self._config.setOfBandsToReward[stamina:getBand()] then + avatar:addReward(self._config.rewardValue) + end + end + + updaterRegistry:registerUpdater{ + updateFn = sendRewardIfStaminaInRedBand, + priority = 2, + } +end + + +local StaminaBar = class.Class(component.Component) + +function StaminaBar:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('StaminaBar')}, + {'playerIndex', args.ge(0)}, + {'waitState', args.stringType}, + {'layer', args.stringType}, + {'direction', args.oneOf('N', 'E', 'S', 'W')} + }) + StaminaBar.Base.__init__(self, kwargs) + + self._playerIndex = kwargs.playerIndex + self._waitState = kwargs.waitState + self._layer = kwargs.layer + self._direction = kwargs.direction +end + +function StaminaBar:addHits(worldConfig) + -- Each avatar has a unique layer for its stamina bar. Ensure it is added to + -- the back of the render order. + component.insertIfNotPresent(worldConfig.renderOrder, self._layer) +end + +function StaminaBar:_setLevel(level) + self.gameObject:setState('level_' .. tostring(level)) +end + +function StaminaBar:registerUpdaters(updaterRegistry) + local function setBarState() + if self.gameObject:getState() ~= self._waitState then + local stamina = self._staminaComponent:getValue() + self:_setLevel(stamina) + end + end + + local function rotate() + self.gameObject:setOrientation(self._direction) + end + + updaterRegistry:registerUpdater{ + updateFn = setBarState, + -- The stamina bar should be the very last thing to update, after all + -- effects that could potentially change stamina levels have resolved. + priority = 2, + } + updaterRegistry:registerUpdater{ + updateFn = rotate, + priority = 100, + } +end + +function StaminaBar:postStart() + local sim = self.gameObject.simulation + self._avatarObject = sim:getAvatarFromIndex(self._playerIndex) + local avatarComponent = self._avatarObject:getComponent('Avatar') + self._staminaComponent = self._avatarObject:getComponent('Stamina') + + -- Note that it is essential to set the state before teleporting it. + -- This is because pieces with no assigned layer have no position, and thus + -- cannot be teleported. + self:_setLevel(self._staminaComponent:getValue()) + local avatarPosition = tensor.Tensor(self._avatarObject:getPosition()) + local offsetPosition = avatarPosition:cadd(_DIRECTION[self._direction]):val() + self.gameObject:teleport(offsetPosition, self._direction) + + -- Connect this object to the avatar game object. + avatarComponent:connect(self.gameObject)end + +function StaminaBar:avatarStateChange(behavior) + local avatarComponent = self._avatarObject:getComponent('Avatar') + -- If the avatar's state has changed, then also update the state of + -- the avatar connector. + if behavior == 'respawn' then + avatarComponent:disconnect(self.gameObject) + -- Set the respawning player's stamina level. + self:_setLevel(self._staminaComponent:getValue()) + -- When coming to life, also teleport to the right location. + local avatarPosition = tensor.Tensor(self._avatarObject:getPosition()) + local offsetPosition = avatarPosition:cadd(_DIRECTION[self._direction]):val() + self.gameObject:teleport(offsetPosition, self._direction) + avatarComponent:connect(self.gameObject) + elseif behavior == 'die' then + self.gameObject:setState(self._waitState) + end +end + + +local allComponents = { + -- Avatar components. + Stamina = Stamina, + StaminaModulatedByNeed = StaminaModulatedByNeed, + StaminaObservation = StaminaObservation, + RewardForStaminaLevel = RewardForStaminaLevel, + -- Overlay object components. + StaminaBar = StaminaBar, +} + +component_registry.registerAllComponents(allComponents) + +return allComponents diff --git a/meltingpot/lua/levels/territory/components.lua b/meltingpot/lua/levels/territory/components.lua index 4c91509d..b090b793 100644 --- a/meltingpot/lua/levels/territory/components.lua +++ b/meltingpot/lua/levels/territory/components.lua @@ -1,4 +1,4 @@ ---[[ Copyright 2020 DeepMind Technologies Limited. +--[[ Copyright 2022 DeepMind Technologies Limited. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -19,11 +19,20 @@ local helpers = require 'common.helpers' local log = require 'common.log' local events = require 'system.events' local random = require 'system.random' +local tensor = require 'system.tensor' local meltingpot = 'meltingpot.lua.modules.' local component = require(meltingpot .. 'component') local component_registry = require(meltingpot .. 'component_registry') +local _COMPASS = {'N', 'E', 'S', 'W'} + +local _DIRECTION = { + N = tensor.Tensor({0, -1}), + E = tensor.Tensor({1, 0}), + S = tensor.Tensor({0, 1}), + W = tensor.Tensor({-1, 0}), +} local AllBeamBlocker = class.Class(component.Component) @@ -48,7 +57,9 @@ function Resource:__init__(kwargs) {'destroyedState', args.stringType}, {'reward', args.numberType}, {'rewardRate', args.numberType}, - {'rewardDelay', args.numberType} + {'rewardDelay', args.numberType}, + {'delayTillSelfRepair', args.default(15), args.ge(0)}, -- frames + {'selfRepairProbability', args.default(0.1), args.ge(0.0), args.le(1.0)}, }) Resource.Base.__init__(self, kwargs) @@ -57,6 +68,8 @@ function Resource:__init__(kwargs) self._config.reward = kwargs.reward self._config.rewardRate = kwargs.rewardRate self._config.rewardDelay = kwargs.rewardDelay + self._config.delayTillSelfRepair = kwargs.delayTillSelfRepair + self._config.selfRepairProbability = kwargs.selfRepairProbability end function Resource:reset() @@ -65,6 +78,7 @@ function Resource:reset() self._claimedByAvatarComponent = nil self._neverYetClaimed = true self._destroyed = false + self._framesSinceZapped = nil end function Resource:registerUpdaters(updaterRegistry) @@ -122,7 +136,7 @@ function Resource:_claim(hittingGameObject) end function Resource:onHit(hittingGameObject, hitName) - if hitName == 'directionHit' then + if string.sub(hitName, 1, string.len('directionHit')) == 'directionHit' then self:_claim(hittingGameObject) end @@ -137,6 +151,7 @@ function Resource:onHit(hittingGameObject, hitName) if hitName == 'zapHit' then self._health = self._health - 1 + self._framesSinceZapped = 0 if self._health == 0 then -- Reset the health state variable. self._health = self._config.initialHealth @@ -144,6 +159,10 @@ function Resource:onHit(hittingGameObject, hitName) self.gameObject:setState(self._config.destroyedState) -- Tell the reward indicator the resource was destroyed. self._rewardingStatus = 'inactive' + -- Destroy the resource's associated texture objects. + self._texture_object:setState('destroyed') + -- Tell the resource's associated damage indicator. + self._associatedDamageIndicator:setState('inactive') -- Record the destruction event. local playerIndex = hittingGameObject:getComponent('Avatar'):getIndex() events:add('destroyed_resource', 'dict', @@ -164,6 +183,28 @@ function Resource:start() self._numPlayers = self.gameObject.simulation:getNumPlayers() end +function Resource:postStart() + self._texture_object = self.gameObject:getComponent( + 'Transform'):queryPosition('lowerPhysical') + self._associatedDamageIndicator = self.gameObject:getComponent( + 'Transform'):queryPosition('superDirectionIndicatorLayer') +end + +function Resource:update() + if self._health < self._config.initialHealth then + self._associatedDamageIndicator:setState('damaged') + if self._framesSinceZapped >= self._config.delayTillSelfRepair then + if random:uniformReal(0, 1) < self._config.selfRepairProbability then + self._health = self._health + 1 + if self._health == self._config.initialHealth then + self._associatedDamageIndicator:setState('inactive') + end + end + end + self._framesSinceZapped = self._framesSinceZapped + 1 + end +end + function Resource:getRewardingStatus() return self._rewardingStatus end @@ -263,8 +304,11 @@ end function RewardIndicator:update() local resourceComponent = self._pairedResource:getComponent('Resource') local newStatus = resourceComponent:getRewardingStatus() - if newStatus ~= self.gameObject:getState() then - self.gameObject:setState(newStatus) + local resourceState = resourceComponent.gameObject:getState() + if newStatus == "active" then + self.gameObject:setState("dry_" .. resourceState) + else + self.gameObject:setState("inactive") end end @@ -312,11 +356,85 @@ function Taste:addDefaultReward(defaultReward) end +-- The `Paintbrush` component endows an avatar with the ability to grasp an +-- object in the direction they are facing. + +local Paintbrush = class.Class(component.Component) + +function Paintbrush:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('Paintbrush')}, + {'shape', args.tableType}, + {'palette', args.tableType}, + {'playerIndex', args.numberType}, + }) + Paintbrush.Base.__init__(self, kwargs) + self._config.shape = kwargs.shape + self._config.palette = kwargs.palette + self._config.playerIndex = kwargs.playerIndex +end + +function Paintbrush:addSprites(tileSet) + for j=1, 4 do + local spriteData = { + palette = self._config.palette, + text = self._config.shape[j], + noRotate = true + } + tileSet:addShape( + 'brush' .. self._config.playerIndex .. '.' .. _COMPASS[j], spriteData) + end +end + +function Paintbrush:addHits(worldConfig) + local playerIndex = self._config.playerIndex + for j=1, 4 do + local hitName = 'directionHit' .. playerIndex + worldConfig.hits[hitName] = { + layer = 'directionIndicatorLayer', + sprite = 'brush' .. self._config.playerIndex, + } + end +end + + +function Paintbrush:registerUpdaters(updaterRegistry) + local playerIndex = self._config.playerIndex + self._avatar = self.gameObject:getComponent('Avatar') + local drawBrush = function() + local beam = 'directionHit' .. playerIndex + self.gameObject:hitBeam(beam, 1, 0) + end + updaterRegistry:registerUpdater{ + updateFn = drawBrush, + priority = 130, + } +end + + +local Destructable = class.Class(component.Component) + +function Destructable:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('Destructable')}, + }) + Destructable.Base.__init__(self, kwargs) +end + +function Destructable:onHit(hittingGameObject, hitName) + if hitName == 'zapHit' then + self.gameObject:setState('destroyed') + end + return false +end + local allComponents = { -- Non-avatar components. AllBeamBlocker = AllBeamBlocker, Resource = Resource, RewardIndicator = RewardIndicator, + Paintbrush = Paintbrush, + Destructable = Destructable, -- Avatar components. ResourceClaimer = ResourceClaimer, diff --git a/meltingpot/lua/levels/territory/init.lua b/meltingpot/lua/levels/territory/init.lua index 6a53edba..9615ca0d 100644 --- a/meltingpot/lua/levels/territory/init.lua +++ b/meltingpot/lua/levels/territory/init.lua @@ -1,4 +1,4 @@ ---[[ Copyright 2020 DeepMind Technologies Limited. +--[[ Copyright 2022 DeepMind Technologies Limited. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -13,24 +13,32 @@ See the License for the specific language governing permissions and limitations under the License. ]] --- Entry point lua file for the territory level. +-- Entry point lua file for the territory substrate. +local class = require 'common.class' local meltingpot = 'meltingpot.lua.modules.' local api_factory = require(meltingpot .. 'api_factory') local simulation = require(meltingpot .. 'base_simulation') --- Required to be able to use the components in the level +-- Required to be able to use the components in the substrate local component_library = require(meltingpot .. 'component_library') local avatar_library = require(meltingpot .. 'avatar_library') local components = require 'components' +local OverrideSimulation = class.Class(simulation.BaseSimulation) + +function OverrideSimulation:worldConfig() + local config = simulation.BaseSimulation.worldConfig(self) + -- Add layer 'directionIndicatorLayer' after all the default layers. + table.insert(config.renderOrder, 'directionIndicatorLayer') + -- Also add 'superDirectionIndicatorLayer' after 'directionIndicatorLayer'. + table.insert(config.renderOrder, 'superDirectionIndicatorLayer') + return config +end + return api_factory.apiFactory{ - Simulation = simulation.BaseSimulation, + Simulation = OverrideSimulation, settings = { - -- Scale each sprite to a square of size `spriteSize` X `spriteSize`. - spriteSize = 8, - -- Terminate the episode after this many frames. - maxEpisodeLengthFrames = 1000, -- Settings to pass to simulation.lua. simulation = {}, } diff --git a/meltingpot/lua/levels/the_matrix/components.lua b/meltingpot/lua/levels/the_matrix/components.lua index 588252ea..ac679065 100644 --- a/meltingpot/lua/levels/the_matrix/components.lua +++ b/meltingpot/lua/levels/the_matrix/components.lua @@ -1,4 +1,4 @@ ---[[ Copyright 2020 DeepMind Technologies Limited. +--[[ Copyright 2022 DeepMind Technologies Limited. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -34,7 +34,6 @@ function Resource:__init__(kwargs) {'resourceClass', args.numberType}, {'visibleType', args.stringType}, {'waitState', args.stringType}, - {'groupToRespawn', args.stringType}, {'regenerationRate', args.ge(0.0), args.le(1.0)}, {'regenerationDelay', args.positive}, }) @@ -42,30 +41,30 @@ function Resource:__init__(kwargs) self._config.resourceClass = kwargs.resourceClass self._config.visibleType = kwargs.visibleType self._config.waitState = kwargs.waitState - self._config.groupToRespawn = kwargs.groupToRespawn self._config.regenerationRate = kwargs.regenerationRate self._config.regenerationDelay = kwargs.regenerationDelay end function Resource:reset() self._variables = {} - self._variables._regenTimer = self._config.regenerationDelay -end - -function Resource:onStateChange() - self._variables._regenTimer = self._config.regenerationDelay end function Resource:onEnter(enteringGameObject, contactName) if self.gameObject:getState() == self._config.visibleType and contactName == 'avatar' then local sceneObject = self.gameObject.simulation:getSceneObject() + local theMatrix = sceneObject:getComponent('TheMatrix') -- Increment inventory. local playerIndex = enteringGameObject:getComponent('Avatar'):getIndex() - local inventory = ( - sceneObject:getComponent('TheMatrix'):getPlayerInventory(playerIndex)) + local inventory = theMatrix:getPlayerInventory(playerIndex) local amount = inventory(self._config.resourceClass):val() inventory(self._config.resourceClass):fill(amount + 1) + -- Record that at least one resource was collected by this player. + theMatrix.playerCollectedAtLeastOneResource[playerIndex] = true + -- Set the 'ready' indicator state to show at least one resource collected. + if theMatrix.indicators[playerIndex] == 'notReady' then + theMatrix.indicators[playerIndex] = 'ready' + end -- Remove the resource from the map. self.gameObject:setState(self._config.waitState) -- Consult the `Taste` component to determine if rewards should be provided. @@ -76,28 +75,54 @@ function Resource:onEnter(enteringGameObject, contactName) end -- Report the resource collection event. self:_reportCollectionEvent(playerIndex, self._config.resourceClass) + -- Update the avatar cumulants tracking resource collection. + self:_updateCumulants(enteringGameObject, self._config.resourceClass) end end -function Resource:update() - if self._variables._regenTimer <= 0 then +function Resource:registerUpdaters(updaterRegistry) + local transform = self.gameObject:getComponent('Transform') + local function maybeRespawn() if random:uniformReal(0, 1) < self._config.regenerationRate then - self.gameObject:setState(self._config.visibleType) + local maybeAvatar = transform:queryPosition('upperPhysical') + if not maybeAvatar then + -- Only spawn a resource if an avatar is not currently standing here. + self.gameObject:setState(self._config.visibleType) + end end end - self._variables._regenTimer = self._variables._regenTimer - 1 + updaterRegistry:registerUpdater{ + updateFn = maybeRespawn, + priority = 100, + state = self._config.waitState, + startFrame = self._config.regenerationDelay, + } end function Resource:getResourceClass() return self._config.resourceClass end +function Resource:getVisibleState() + return self._config.visibleType +end + +function Resource:regenerate() + self.gameObject:setState(self._config.visibleType) +end + function Resource:_reportCollectionEvent(playerIndex, resourceClass) events:add('collected_resource', 'dict', 'player_index', playerIndex, 'class', resourceClass) end +function Resource:_updateCumulants(avatarObject, collectedResourceClass) + local gameInteractionZapper = avatarObject:getComponent( + 'GameInteractionZapper') + gameInteractionZapper:setResourceCollectionCumulant(collectedResourceClass) +end + local Destroyable = class.Class(component.Component) @@ -105,12 +130,10 @@ function Destroyable:__init__(kwargs) kwargs = args.parse(kwargs, { {'name', args.default('Destroyable')}, {'initialHealth', args.positive}, - {'visibleType', args.stringType}, {'waitState', args.stringType}, }) Destroyable.Base.__init__(self, kwargs) self._config.initialHealth = kwargs.initialHealth - self._config.visibleType = kwargs.visibleType self._config.waitState = kwargs.waitState end @@ -140,10 +163,13 @@ end function Destroyable:_reportDestructionEvent(hitterGameObject) local playerIndex = hitterGameObject:getComponent('Avatar'):getIndex() local resourceClass = self.gameObject:getComponent( - 'Resource'):getResourceClass() + 'Resource'):getResourceClass() events:add('destroyed_resource', 'dict', 'player_index', playerIndex, 'class', resourceClass) + local gameInteractionZapper = hitterGameObject:getComponent( + 'GameInteractionZapper') + gameInteractionZapper:setResourceDestructionCumulant(resourceClass) end @@ -163,18 +189,24 @@ function TheMatrix:__init__(kwargs) -- By default, all players start out with 1 of each resource type. This -- avoids a singularity at 0 in running with scissors, but it is not -- desirable for all games. In such cases, pass `true` to initialize at 0. - {'zero_initial_inventory', args.default(false), args.booleanType}, + {'zeroInitialInventory', args.default(false), args.booleanType}, -- By default, row players win whenever rewards are tied. {'randomTieBreaking', args.default(false), args.booleanType}, + -- By default, players can still interact despite not having picked up + -- any resources yet. + {'disallowUnreadyInteractions', args.default(false), args.booleanType}, + -- Set intervals of reward to use to determine result indicator color. + {'resultIndicatorColorIntervals', args.tableType}, }) TheMatrix.Base.__init__(self, kwargs) -- Matrices are always square since all players have the same available -- strategies as one another. self._config.numResources = #kwargs.matrix - self._config.zero_initial_inventory = kwargs.zero_initial_inventory + self._config.zeroInitialInventory = kwargs.zeroInitialInventory self._rowPlayerMatrix = tensor.DoubleTensor(self:_cleanMatrix(kwargs.matrix)) self.randomTieBreaking = kwargs.randomTieBreaking + self.disallowUnreadyInteractions = kwargs.disallowUnreadyInteractions if kwargs.columnPlayerMatrix then self._columnPlayerMatrix = tensor.DoubleTensor( @@ -182,17 +214,27 @@ function TheMatrix:__init__(kwargs) else self._columnPlayerMatrix = self._rowPlayerMatrix:clone():transpose(1, 2) end + + self.resultIndicatorColorIntervals = kwargs.resultIndicatorColorIntervals end function TheMatrix:reset() self.playerResources = tensor.DoubleTensor( self.gameObject.simulation:getNumPlayers(), self._config.numResources) - if self._config.zero_initial_inventory then + if self._config.zeroInitialInventory then self.playerResources:fill(0) else self.playerResources:fill(1) end + self.playerCollectedAtLeastOneResource = {} + for i = 1, self.gameObject.simulation:getNumPlayers() do + table.insert(self.playerCollectedAtLeastOneResource, false) + end + self.indicators = {} + for i = 1, self.gameObject.simulation:getNumPlayers() do + table.insert(self.indicators, 'notReady') + end end --[[ Coming in from the command line adds some extra fields to the matrix table @@ -224,12 +266,56 @@ function TheMatrix:getColumnPlayerMatrix() return self._columnPlayerMatrix end -function TheMatrix:resetInventory(inventory) - if self._config.zero_initial_inventory then +function TheMatrix:resetInventory(inventory, playerIndex) + if self._config.zeroInitialInventory then inventory:fill(0) else inventory:fill(1) end + if playerIndex then + self.playerCollectedAtLeastOneResource[playerIndex] = false + end +end + +function TheMatrix:getColorInterval(reward) + for idx, interval in ipairs(self.resultIndicatorColorIntervals) do + if interval[1] <= reward and reward < interval[2] then + return idx + end + end + assert(false, + 'reward: ' .. tostring(reward) .. ' not found in color intervals.') +end + + +local SpawnResourcesWhenAllPlayersZapped = class.Class(component.Component) + +function SpawnResourcesWhenAllPlayersZapped:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('SpawnResourcesWhenAllPlayersZapped')}, + }) + SpawnResourcesWhenAllPlayersZapped.Base.__init__(self, kwargs) +end + +function SpawnResourcesWhenAllPlayersZapped:registerUpdaters(updaterRegistry) + local simulation = self.gameObject.simulation + + local function step() + local numLiveAvatars = simulation:getGroupCount('players') + if numLiveAvatars == 0 then + local resourceObjects = simulation:getGroupShuffledWithProbability( + 'resourceWaits', 1.0) + for _, resourceObject in ipairs(resourceObjects) do + local resource = resourceObject:getComponent('Resource') + local visibleState = resource:getVisibleState() + resourceObject:setState(visibleState) + end + end + end + updaterRegistry:registerUpdater{ + updateFn = step, + priority = 7, + } end @@ -259,24 +345,45 @@ function GameInteractionZapper:__init__(kwargs) {'winningPlayerDies', args.default(false), args.booleanType}, -- Only deliver rewards larger than `rewardFloor`. {'rewardFloor', args.default(-1e6), args.numberType}, + -- Multiply the game's main reward signal by the following value. This is + -- useful for games like "running with scissors" where the actual reward + -- values tend to be quite a lot smaller than in other games. + {'rewardMultiplier', args.default(1.0), args.numberType}, + -- When the value of `rewardFromZappingUnreadyPlayer` is not 0 it will + -- typically be negative since that would be a situation where we aim to# + -- deter players from zapping their partners before they are ready (i.e. + -- before they have collected any resources). + {'rewardFromZappingUnreadyPlayer', args.default(0), args.numberType}, + -- Both interactants freeze after interacting for `freezeOnInteraction` + -- steps (zero by default, and if used normally would be just a few steps. + {'freezeOnInteraction', args.default(0), args.ge(0)}, + -- This color is yellow. + {'beamColor', args.default({252, 252, 106}), args.tableType}, }) GameInteractionZapper.Base.__init__(self, kwargs) self._config.cooldownTime = kwargs.cooldownTime self._config.beamLength = kwargs.beamLength self._config.beamRadius = kwargs.beamRadius + self._config.beamColor = kwargs.beamColor self._config.framesTillRespawn = kwargs.framesTillRespawn self._config.numResources = kwargs.numResources - self._config.endEpisodeOnFirstInteraction = - kwargs.endEpisodeOnFirstInteraction + self._config.endEpisodeOnFirstInteraction = ( + kwargs.endEpisodeOnFirstInteraction) self._config.reset_winner_inventory = kwargs.reset_winner_inventory self._config.reset_loser_inventory = kwargs.reset_loser_inventory self._config.losingPlayerDies = kwargs.losingPlayerDies self._config.winningPlayerDies = kwargs.winningPlayerDies self._config.rewardFloor = kwargs.rewardFloor + self._config.rewardMultiplier = kwargs.rewardMultiplier + + self._config.rewardFromZappingUnreadyPlayer = ( + kwargs.rewardFromZappingUnreadyPlayer) + + self._config.freezeOnInteraction = kwargs.freezeOnInteraction end function GameInteractionZapper:addHits(worldConfig) @@ -284,12 +391,11 @@ function GameInteractionZapper:addHits(worldConfig) layer = 'beamInteraction', sprite = 'BeamInteraction', } - table.insert(worldConfig.renderOrder, 'beamInteraction') + component.insertIfNotPresent(worldConfig.renderOrder, 'beamInteraction') end function GameInteractionZapper:addSprites(tileSet) - -- This color is yellow. - tileSet:addColor('BeamInteraction', {252, 252, 106}) + tileSet:addColor('BeamInteraction', self._config.beamColor) end function GameInteractionZapper:registerUpdaters(updaterRegistry) @@ -297,20 +403,22 @@ function GameInteractionZapper:registerUpdaters(updaterRegistry) local waitState = self:getWaitState() local zap = function() - local playerVolatileVariables = ( - self.gameObject:getComponent('Avatar'):getVolatileData()) - local actions = playerVolatileVariables.actions - -- Execute the beam if applicable. - if self.gameObject:getState() == aliveState then - if self._config.cooldownTime >= 0 then - if self._coolingTimer > 0 then - self._coolingTimer = self._coolingTimer - 1 - else - if actions['interact'] == 1 then - self._coolingTimer = self._config.cooldownTime - self.gameObject:hitBeam('gameInteraction', - self._config.beamLength, - self._config.beamRadius) + local avatar = self.gameObject:getComponent('Avatar') + if avatar:isMovementAllowed() then + local playerVolatileVariables = avatar:getVolatileData() + local actions = playerVolatileVariables.actions + -- Execute the beam if applicable. + if self.gameObject:getState() == aliveState then + if self._config.cooldownTime >= 0 then + if self._coolingTimer > 0 then + self._coolingTimer = self._coolingTimer - 1 + else + if actions['interact'] == 1 and self._canZap then + self._coolingTimer = self._config.cooldownTime + self.gameObject:hitBeam('gameInteraction', + self._config.beamLength, + self._config.beamRadius) + end end end end @@ -334,6 +442,62 @@ function GameInteractionZapper:registerUpdaters(updaterRegistry) state = waitState, startFrame = self._config.framesTillRespawn } + + local theMatrix = self.gameObject.simulation:getSceneObject():getComponent( + 'TheMatrix') + local avatar = self.gameObject:getComponent('Avatar') + + local applyScheduledEffects = function() + local playerIndex = avatar:getIndex() + if self._framesTillScheduledEffects == 0 then + -- Implement scheduled effects. + for _, effect in ipairs(self._scheduledEffects) do + effect.func(unpack(effect.arguments)) + end + self._scheduledEffects = {} + -- Remove the result indicator on the same frame effects are implemented. + theMatrix.indicators[playerIndex] = 'notReady' + self._framesTillScheduledEffects = -1 + -- End the episode after the first interaction if applicable. This is used + -- in the one-shot substrate variants. + if self._config.endEpisodeOnFirstInteraction then + self._endEpisodeOnNextFrame = true + end + elseif self._framesTillScheduledEffects > 0 then + self._framesTillScheduledEffects = self._framesTillScheduledEffects - 1 + local colorIdx = theMatrix:getColorInterval(self._rewardToDetermineColor) + theMatrix.indicators[playerIndex] = ( + 'resultIndicatorColor' .. tostring(colorIdx)) + end + end + + updaterRegistry:registerUpdater{ + updateFn = applyScheduledEffects, + priority = 4, + state = aliveState, + } + + local endEpisodeIfApplicable = function() + -- End the episode when the flag is set. This is used in the one-shot + -- substrate variants. + if self._endEpisodeOnNextFrame then + self.gameObject.simulation:endEpisode() + end + end + + updaterRegistry:registerUpdater{ + updateFn = endEpisodeIfApplicable, + priority = 900, + } + + local function resetSimultaneousInteractionBlocker() + self.interactedThisStep = false + end + + updaterRegistry:registerUpdater{ + updateFn = resetSimultaneousInteractionBlocker, + priority = 890, + } end --[[ Compute payoffs based on row and column player strategy profiles. @@ -356,7 +520,38 @@ function GameInteractionZapper:_avatarDies(avatarComponent) avatarObject:setState(waitType) end -function GameInteractionZapper:_resolve(rowPlayerIndex, columnPlayerIndex) +function GameInteractionZapper:sendRewardsToBothInteractants( + rowReward, columnReward, + rowResources, columnResources, + rowAvatar, columnAvatar) + if self.gameObject:hasComponent('InteractionTaste') then + local interactionTaste = self.gameObject:getComponent('InteractionTaste') + if rowReward > self._config.rewardFloor then + local rowRewardToDeliver = interactionTaste:getExtraRewardForInteraction( + rowReward, rowResources) + rowAvatar:addReward(rowRewardToDeliver) + end + if columnReward > self._config.rewardFloor then + local colRewardToDeliver = interactionTaste:getExtraRewardForInteraction( + columnReward, columnResources) + columnAvatar:addReward(colRewardToDeliver) + end + else + if rowReward > self._config.rewardFloor then + rowAvatar:addReward(rowReward) + end + if columnReward > self._config.rewardFloor then + columnAvatar:addReward(columnReward) + end + end +end + +function GameInteractionZapper:setFramesTillScheduledEffects(frames) + self._framesTillScheduledEffects = frames +end + +function GameInteractionZapper:_resolve( + rowPlayerIndex, columnPlayerIndex, hitterGameObject) -- Row player setup local rowAvatar = self.gameObject.simulation:getAvatarFromIndex( rowPlayerIndex):getComponent('Avatar') @@ -389,29 +584,24 @@ function GameInteractionZapper:_resolve(rowPlayerIndex, columnPlayerIndex) local rowReward, columnReward = self:_computeInteractionRewards(rowProfile, columnProfile) - -- Send actual rewards, taking into account tastes. - if self.gameObject:hasComponent('InteractionTaste') then - local interactionTaste = self.gameObject:getComponent('InteractionTaste') - if rowReward > self._config.rewardFloor then - rowAvatar:addReward(interactionTaste:getExtraRewardForInteraction( - rowReward, rowResources)) - end - if columnReward > self._config.rewardFloor then - columnAvatar:addReward(interactionTaste:getExtraRewardForInteraction( - columnReward, columnResources)) - end - else - if rowReward > self._config.rewardFloor then - rowAvatar:addReward(rowReward) - end - if columnReward > self._config.rewardFloor then - columnAvatar:addReward(columnReward) - end - end - - self:_reportInteraction(rowPlayerIndex, columnPlayerIndex, - rowReward, columnReward, - rowResources, columnResources) + -- Applly the reward multiplier (it is 1.0 by default). + rowReward = self._config.rewardMultiplier * rowReward + columnReward = self._config.rewardMultiplier * columnReward + + self:reportInteraction(rowPlayerIndex, columnPlayerIndex, + rowReward, columnReward, + rowResources, columnResources, + rowAvatar, columnAvatar) + local hitterZapper = hitterGameObject:getComponent('GameInteractionZapper') + hitterZapper:reportInteraction(rowPlayerIndex, columnPlayerIndex, + rowReward, columnReward, + rowResources, columnResources, + rowAvatar, columnAvatar) + + self:reportEventAndCumulants(rowPlayerIndex, columnPlayerIndex, + rowReward, columnReward, + rowResources, columnResources, + rowAvatar, columnAvatar) -- The player who scored lower dies by default. local rowPlayerWon, columnPlayerWon @@ -436,45 +626,146 @@ function GameInteractionZapper:_resolve(rowPlayerIndex, columnPlayerIndex) columnPlayerWon = true end + -- Set num frames till scheduled effects occur. + local rowZapper = rowAvatar.gameObject:getComponent('GameInteractionZapper') + local columnZapper = columnAvatar.gameObject:getComponent( + 'GameInteractionZapper') + rowZapper:setFramesTillScheduledEffects(self._config.freezeOnInteraction) + columnZapper:setFramesTillScheduledEffects(self._config.freezeOnInteraction) + -- Send actual rewards to avatars, taking into account tastes. + local rewardEffect = { + func = self.sendRewardsToBothInteractants, + arguments = {self, rowReward, columnReward, + rowResources, columnResources, + rowAvatar, columnAvatar}, + } + table.insert(self._scheduledEffects, rewardEffect) + -- Populate table of instructions to implement either now or on a later frame. if rowPlayerWon then -- The row player won so the column player dies. if self._config.reset_loser_inventory then - self._matrixComponent:resetInventory(columnResources) + local effect = { + func = self._matrixComponent.resetInventory, + arguments = {self._matrixComponent, columnResources, columnPlayerIndex} + } + table.insert(self._scheduledEffects, effect) end if self._config.reset_winner_inventory then - self._matrixComponent:resetInventory(rowResources) + self._matrixComponent:resetInventory(rowResources, rowPlayerIndex) + local effect = { + func = self._matrixComponent.resetInventory, + arguments = {self._matrixComponent, rowResources, rowPlayerIndex} + } + table.insert(self._scheduledEffects, effect) end if self._config.losingPlayerDies then - self:_avatarDies(columnAvatar) + local effect = {func = self._avatarDies, arguments = {self, columnAvatar}} + table.insert(self._scheduledEffects, effect) end if self._config.winningPlayerDies then - self:_avatarDies(rowAvatar) + local effect = {func = self._avatarDies, arguments = {self, rowAvatar}} + table.insert(self._scheduledEffects, effect) end elseif columnPlayerWon then -- The column player won so the row player dies. if self._config.reset_loser_inventory then - self._matrixComponent:resetInventory(rowResources) + local effect = { + func = self._matrixComponent.resetInventory, + arguments = {self._matrixComponent, rowResources, rowPlayerIndex} + } + table.insert(self._scheduledEffects, effect) end if self._config.reset_winner_inventory then - self._matrixComponent:resetInventory(columnResources) + local effect = { + func = self._matrixComponent.resetInventory, + arguments = {self._matrixComponent, columnResources, columnPlayerIndex} + } + table.insert(self._scheduledEffects, effect) end if self._config.losingPlayerDies then - self:_avatarDies(rowAvatar) + local effect = {func = self._avatarDies, arguments = {self, rowAvatar}} + table.insert(self._scheduledEffects, effect) end if self._config.winningPlayerDies then - self:_avatarDies(columnAvatar) + local effect = {func = self._avatarDies, arguments = {self, columnAvatar}} + table.insert(self._scheduledEffects, effect) end end + -- If there are any scheduled effects then disallow movement until they occur. + if #self._scheduledEffects > 0 then + -- Adding 2 to `freezeOnInteraction` ensures it is impossible to move on the + -- last frame before the scheduled effects occur. + local numFramesToFreeze = self._config.freezeOnInteraction + 2 + rowAvatar:disallowMovementUntil(numFramesToFreeze) + columnAvatar:disallowMovementUntil(numFramesToFreeze) + + rowZapper._rewardToDetermineColor = rowReward + columnZapper._rewardToDetermineColor = columnReward + end +end + +function GameInteractionZapper:_preventExtraSimultaneousInteraction( + hitterGameObject) + -- Prevent interaction of more than two players at a time, in one step. + if self.interactedThisStep then + return true + end + self.interactedThisStep = true + local hitterZapper = hitterGameObject:getComponent('GameInteractionZapper') + if hitterZapper.interactedThisStep then + return true + end + hitterZapper.interactedThisStep = true + return false end function GameInteractionZapper:onHit(hitterGameObject, hitName) if hitName == 'gameInteraction' then + if self:_preventExtraSimultaneousInteraction(hitterGameObject) then + -- Beams do not pass through. + return true + end + + if self._framesTillScheduledEffects >= 0 then + -- It is not possible to interact with a player who is already interacting + -- with another player, i.e. players cannot be zapped while frozen. + -- Return true so beams do not pass through the hit player. + return true + end + + local theMatrix = self.gameObject.simulation:getSceneObject():getComponent( + 'TheMatrix') + local zapperAvatar = hitterGameObject:getComponent('Avatar') local zapperIdx = zapperAvatar:getIndex() local zappedAvatar = self.gameObject:getComponent('Avatar') local zappedIdx = zappedAvatar:getIndex() + -- Deliver a reward to any player who zaps a player who is not yet ready (ie + -- who has not yet collected any resources). Typically this reward will be + -- be zero in which case there is no penalty from zapping an unready player. + -- In some cases we may make this reward negative in order to discourage + -- players from zapping others who have not yet collected resources. + if not theMatrix.playerCollectedAtLeastOneResource[zappedIdx] then + zapperAvatar:addReward(self._config.rewardFromZappingUnreadyPlayer) + end + + -- If disallowUnreadyInteractions = true then don't bother computing the + -- effect of the interaction unless both players are ready. + if theMatrix.disallowUnreadyInteractions then + local bothReady = ( + theMatrix.playerCollectedAtLeastOneResource[zapperIdx] and + theMatrix.playerCollectedAtLeastOneResource[zappedIdx]) + if not bothReady then + return true -- block the beam from passing through the hit player. + end + end + + -- At this point the interaction is definitely going to be resolved. So we + -- can safely set the cumulant for both zapper and zapped players. + self:_setInteractionCumulant(hitterGameObject, self.gameObject) + if self.gameObject:hasComponent( 'DyadicRole') and hitterGameObject:hasComponent('DyadicRole') then -- If the role component is present then assign row versus column player @@ -486,18 +777,14 @@ function GameInteractionZapper:onHit(hitterGameObject, hitName) -- player cannot interact with another row player. A column player cannot -- interact with another column player. if zapperRole:isRowPlayer() and not zappedRole:isRowPlayer() then - self:_resolve(zapperIdx, zappedIdx) + self:_resolve(zapperIdx, zappedIdx, hitterGameObject) elseif not zapperRole:isRowPlayer() and zappedRole:isRowPlayer() then - self:_resolve(zappedIdx, zapperIdx) + self:_resolve(zappedIdx, zapperIdx, hitterGameObject) end else -- By default the zapper avatar is the row player and the zapped avatar is -- the column player. - self:_resolve(zapperIdx, zappedIdx) - end - - if self._config.endEpisodeOnFirstInteraction then - self.gameObject.simulation:endEpisode() + self:_resolve(zapperIdx, zappedIdx, hitterGameObject) end -- Return `true` to prevent beams from passing through hit players. @@ -505,9 +792,32 @@ function GameInteractionZapper:onHit(hitterGameObject, hitName) end end -function GameInteractionZapper:_reportInteraction(rowPlayerIdx, colPlayerIdx, - rowReward, colReward, - rowInventory, colInventory) +function GameInteractionZapper:reportInteraction(rowPlayerIdx, colPlayerIdx, + rowReward, colReward, + rowInventory, colInventory, + rowAvatar, columnAvatar) + -- Am I being called on the row or column player? + local selfIndex = self.gameObject:getComponent('Avatar'):getIndex() + local selfIsRowPlayer = selfIndex == rowPlayerIdx + if not selfIsRowPlayer then + assert(selfIndex == colPlayerIdx, 'Self was neither row nor column player.') + end + -- Update variables which can be read by a metric reporter. + if selfIsRowPlayer then + -- List self first. In this case, self was the row player. + self.latest_interaction_inventories(1):val(rowInventory:val()) + self.latest_interaction_inventories(2):val(colInventory:val()) + else + -- List self first. In this case, self was the column player. + self.latest_interaction_inventories(1):val(colInventory:val()) + self.latest_interaction_inventories(2):val(rowInventory:val()) + end +end + +function GameInteractionZapper:reportEventAndCumulants( + rowPlayerIdx, colPlayerIdx, rowReward, colReward, rowInventory, + colInventory, rowAvatar, columnAvatar) + -- Event: interaction: event_type, key1, X, key2, Y, ... events:add('interaction', 'dict', 'row_player_idx', rowPlayerIdx, @@ -516,9 +826,49 @@ function GameInteractionZapper:_reportInteraction(rowPlayerIdx, colPlayerIdx, 'col_reward', colReward, 'row_inventory', rowInventory, 'col_inventory', colInventory) - -- Also update variables which can be read by a metric reporter. - self.latest_interaction_inventories(1):val(rowInventory:val()) - self.latest_interaction_inventories(2):val(colInventory:val()) + + -- Update cumulants so they can be read by a metric reporter. + rowAvatar.gameObject:getComponent( + 'GameInteractionZapper'):setArgMaxCumulants(rowInventory) + columnAvatar.gameObject:getComponent( + 'GameInteractionZapper'):setArgMaxCumulants(colInventory) +end + +function GameInteractionZapper:setArgMaxCumulants(inventory) + if inventory:max(1):val() > 0 then + local indexOfMaximalItemType = inventory:argMax(1):val() + local cumulantName = 'argmax_interaction_inventory_was_' .. tostring( + indexOfMaximalItemType) + self[cumulantName] = 1 + end +end + +function GameInteractionZapper:_resetBinaryCumulants() + -- Note: cumulant names are in Python style to suggest that they will mainly + -- be used from there. Also, as observations they will be in all capitals + -- so the camel case style won't be possible in that form. + self.interacted_this_step = 0 + self.argmax_interaction_inventory_was_1 = 0 + self.argmax_interaction_inventory_was_2 = 0 + self.argmax_interaction_inventory_was_3 = 0 + self.collected_resource_1 = 0 + self.collected_resource_2 = 0 + self.collected_resource_3 = 0 + self.destroyed_resource_1 = 0 + self.destroyed_resource_2 = 0 + self.destroyed_resource_3 = 0 +end + +function GameInteractionZapper:_setInteractionCumulant(zapSourceObject, + zapTargetObject) + local zapSourceGameInteractionComponent = zapSourceObject:getComponent( + 'GameInteractionZapper') + local zapTargetGameInteractionComponent = zapTargetObject:getComponent( + 'GameInteractionZapper') + -- Set interaction cumulant value to 1 for both zapper and zapped players as + -- both of them were part of an interaction on this frame. + zapSourceGameInteractionComponent.interacted_this_step = 1 + zapTargetGameInteractionComponent.interacted_this_step = 1 end function GameInteractionZapper:start() @@ -526,6 +876,17 @@ function GameInteractionZapper:start() self._coolingTimer = 0 self.latest_interaction_inventories = tensor.DoubleTensor( 2, self._config.numResources) + + -- Create variables to hold cumulant data (scalars that can be used as + -- instantaneous reward signals for generalized value function learning. + self:_resetBinaryCumulants() + + self._scheduledEffects = {} + self._framesTillScheduledEffects = -1 + + self._endEpisodeOnNextFrame = false + + self._canZap = true end function GameInteractionZapper:postStart() @@ -538,6 +899,8 @@ function GameInteractionZapper:update() -- interaction occured on the previous frame then it will always be set to -- the impossible inventory values of {-1, -1} self.latest_interaction_inventories:fill(-1) + -- Reset cumulants. + self:_resetBinaryCumulants() end function GameInteractionZapper:getAliveState() @@ -553,6 +916,24 @@ function GameInteractionZapper:readyToShoot() return 1 - normalizedTimeTillReady end +function GameInteractionZapper:setResourceCollectionCumulant(resourceClass) + local cumulantName = 'collected_resource_' .. tostring(resourceClass) + self[cumulantName] = 1 +end + +function GameInteractionZapper:setResourceDestructionCumulant(resourceClass) + local cumulantName = 'destroyed_resource_' .. tostring(resourceClass) + self[cumulantName] = 1 +end + +function GameInteractionZapper:disallowZapping() + self._canZap = false +end + +function GameInteractionZapper:allowZapping() + self._canZap = true +end + local InventoryObserver = class.Class(component.Component) @@ -672,6 +1053,104 @@ function DyadicRole:isRowPlayer() end +--[[ `ReadyToInteractMarker` adds an extra visual element on top of the players +who have collected at least one resource since their inventory was last reset +according to TheMatrix. +]] +local ReadyToInteractMarker = class.Class(component.Component) + +function ReadyToInteractMarker:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('ReadyToInteractMarker')}, + -- `playerIndex` of the avatar to which this object is attached. + {'playerIndex', args.positive}, + }) + ReadyToInteractMarker.Base.__init__(self, kwargs) + self._config.playerIndex = kwargs.playerIndex +end + +function ReadyToInteractMarker:registerUpdaters(updaterRegistry) + local simulation = self.gameObject.simulation + local sceneObject = simulation:getSceneObject() + local theMatrix = sceneObject:getComponent('TheMatrix') + + local displayReadiness = function() + local avatarObject = simulation:getAvatarFromIndex(self._config.playerIndex) + local indicatorState = theMatrix.indicators[self._config.playerIndex] + if avatarObject:getComponent('Avatar'):isAlive() then + self.gameObject:setState(indicatorState) + elseif avatarObject:getComponent('Avatar'):isWait() then + self.gameObject:setState('avatarMarkingWait') + end + end + + updaterRegistry:registerUpdater{ + updateFn = displayReadiness, + priority = 2, + } +end + + +local DisallowMovement = class.Class(component.Component) + +function DisallowMovement:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('DisallowMovement')}, + }) + DisallowMovement.Base.__init__(self, kwargs) +end + +function DisallowMovement:postStart() + local avatar = self.gameObject:getComponent('Avatar') + local gameInteractionZapper = self.gameObject:getComponent( + 'GameInteractionZapper') + avatar:disallowMovement() + gameInteractionZapper:disallowZapping() +end + + +local InitializeAsReadyToInteract = class.Class(component.Component) + +function InitializeAsReadyToInteract:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('InitializeAsReadyToInteract')}, + {'playerIndex', args.numberType}, + }) + InitializeAsReadyToInteract.Base.__init__(self, kwargs) + self._config.playerIndex = kwargs.playerIndex +end + +function InitializeAsReadyToInteract:registerUpdaters(updaterRegistry) + local endEpisodeIfApplicable = function() + -- End the episode when the flag is set. + if self._endEpisodeOnNextFrame then + self.gameObject.simulation:endEpisode() + end + end + updaterRegistry:registerUpdater{ + updateFn = endEpisodeIfApplicable, + priority = 900, + } +end + +function InitializeAsReadyToInteract:onHit(hitterGameObject, hitName) + if hitName == 'gameInteraction' then + self._endEpisodeOnNextFrame = true + return true + end +end + +function InitializeAsReadyToInteract:postStart() + local sceneObject = self.gameObject.simulation:getSceneObject() + local theMatrix = sceneObject:getComponent('TheMatrix') + + self._endEpisodeOnNextFrame = false + + theMatrix.indicators[self._config.playerIndex] = 'ready' + theMatrix.playerCollectedAtLeastOneResource[self._config.playerIndex] = true +end + + local allComponents = { -- Object components Resource = Resource, @@ -683,9 +1162,15 @@ local allComponents = { Taste = Taste, InteractionTaste = InteractionTaste, DyadicRole = DyadicRole, + ReadyToInteractMarker = ReadyToInteractMarker, + + -- Avatar debug components + DisallowMovement = DisallowMovement, + InitializeAsReadyToInteract = InitializeAsReadyToInteract, -- Scene components TheMatrix = TheMatrix, + SpawnResourcesWhenAllPlayersZapped = SpawnResourcesWhenAllPlayersZapped, } component_registry.registerAllComponents(allComponents) diff --git a/meltingpot/lua/levels/the_matrix/init.lua b/meltingpot/lua/levels/the_matrix/init.lua index e2d35d86..f0cc56ca 100644 --- a/meltingpot/lua/levels/the_matrix/init.lua +++ b/meltingpot/lua/levels/the_matrix/init.lua @@ -1,4 +1,4 @@ ---[[ Copyright 2020 DeepMind Technologies Limited. +--[[ Copyright 2022 DeepMind Technologies Limited. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/meltingpot/lua/levels/trade/components.lua b/meltingpot/lua/levels/trade/components.lua new file mode 100644 index 00000000..24c6501c --- /dev/null +++ b/meltingpot/lua/levels/trade/components.lua @@ -0,0 +1,1052 @@ +--[[ Copyright 2022 DeepMind Technologies Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +]] + +local args = require 'common.args' +local class = require 'common.class' +local helpers = require 'common.helpers' +local log = require 'common.log' +local random = require 'system.random' +local tensor = require 'system.tensor' +local set = require 'common.set' +local events = require 'system.events' + +local meltingpot = 'meltingpot.lua.modules.' +local component = require(meltingpot .. 'component') +local component_registry = require(meltingpot .. 'component_registry') + +local _COMPASS = {'N', 'E', 'S', 'W'} +local _OPPOSITECOMPASS = {N = 'S', E = 'W', S = 'N', W = 'E'} + +local function range(length) + local result = {} + for i = 1, length do + table.insert(result, i) + end + return result +end + + +--[[ `FruitType` makes a Harvestable (i.e. a tree) probabilistically yield +either apples or bananas. +]] +local FruitType = class.Class(component.Component) + +function FruitType:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('FruitType')}, + {'probabilities', args.tableType}, + }) + FruitType.Base.__init__(self, kwargs) + + self._config.probabilities = kwargs.probabilities + -- Check that tree spawn probabilities sum to one. + self:checkFruitTypeProbabilitiesSumToOne() +end + +function FruitType:checkFruitTypeProbabilitiesSumToOne() + local sum = 0 + for _, probability in pairs(self._config.probabilities) do + sum = sum + probability + end + if sum < 1.0 or sum > 1.0 then + assert(false, 'Fruit tree spawn probabilities must sum to one.') + end +end + +function FruitType:spawn() + local cuts = {} + local order = {} + local cumulativeValue = 0.0 + for key, probability in pairs(self._config.probabilities) do + table.insert(order, key) + cumulativeValue = cumulativeValue + probability + table.insert(cuts, cumulativeValue) + end + + local rnd = random:uniformReal(0, 1) + + local lowerBound = 0.0 + local upperBound + for itemIdx, itemKey in ipairs(order) do + upperBound = cuts[itemIdx] + if itemKey ~= 'empty' and rnd > lowerBound and rnd < upperBound then + self.gameObject:setState(itemKey .. 'TreeHarvestable') + self._fruit = itemKey + end + lowerBound = upperBound + end +end + +function FruitType:postStart() + self:spawn() + self._ripe = true +end + +function FruitType:isRipe() + return self._ripe +end + +function FruitType:setRipe(isRipe) + -- pass a boolean for `isRipe`. + self._ripe = isRipe + -- change to the correct state. + local harvestableStateName = self._fruit .. 'TreeHarvestable' + local unripeStateName = self._fruit .. 'TreeUnripe' + if self._ripe then + self.gameObject:setState(harvestableStateName) + else + self.gameObject:setState(unripeStateName) + end +end + +function FruitType:getFruit() + return self._fruit +end + + +--[[ `Harvestable` makes it possible to collect fruit from a tree. +]] +local Harvestable = class.Class(component.Component) + +function Harvestable:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('Harvestable')}, + -- `regrowthTime` determines how long to wait after a successful harvest + -- before a tree can be harvested again. + {'regrowthTime', args.ge(1)}, + }) + Harvestable.Base.__init__(self, kwargs) + self._config.regrowthTime = kwargs.regrowthTime +end + +function Harvestable:reset() + self._regrowthCounter = self._config.regrowthTime + -- When an avatar moves onto the location of this Harvestable then point to it + -- with this variable. Set it back to nil when the avatar successfully + -- harvests or moves away. + self._harvestingAvatar = nil +end + +function Harvestable:_harvest(harvester) + local fruitTypeComponent = self.gameObject:getComponent('FruitType') + -- Set the harvestable object to its unripe state. + fruitTypeComponent:setRipe(false) + self._regrowthCounter = self._config.regrowthTime + + -- Add to the harvesting avatar's inventory. + local inventory = harvester:getComponent('Inventory') + local specialization = harvester:getComponent('Specialization') + local fruit = fruitTypeComponent:getFruit() + local amount = specialization:getHarvestAmount(fruit) + inventory:add(fruit, amount) + + -- Avatar is no longer trying to harvest since it has already succeeded. + self._harvestingAvatar = nil +end + +function Harvestable:_maybeHarvest(harvester) + local specialization = harvester:getComponent('Specialization') + local fruitType = self.gameObject:getComponent('FruitType') + local fruit = fruitType:getFruit() + + local harvestProbability = specialization:getHarvestProbability(fruit) + local rnd = random:uniformReal(0, 1) + if rnd < harvestProbability then + self:_harvest(harvester) + end +end + +function Harvestable:registerUpdaters(updaterRegistry) + local resolveHarvesting = function() + -- Maybe harvest if avatar standing on tree. + if self._harvestingAvatar then + self:_maybeHarvest(self._harvestingAvatar) + end + end + + updaterRegistry:registerUpdater{ + updateFn = resolveHarvesting, + priority = 2, -- Ensures harvesting will execute after avatar movement. + } +end + +function Harvestable:onEnter(enteringObject, contactName) + local fruitTypeComponent = self.gameObject:getComponent('FruitType') + local isRipe = fruitTypeComponent:isRipe() + if contactName == 'avatar' and isRipe then + self._harvestingAvatar = enteringObject + end +end + +function Harvestable:onExit(exitingObject, contactName) + if contactName == 'avatar' then + self._harvestingAvatar = nil + end +end + +function Harvestable:update() + -- Update ripeness. + local fruitTypeComponent = self.gameObject:getComponent('FruitType') + local isRipe = fruitTypeComponent:isRipe() + if not isRipe then + self._regrowthCounter = self._regrowthCounter - 1 + if self._regrowthCounter < 1 then + fruitTypeComponent:setRipe(true) + end + end +end + + +--[[ Prevent stamina recovery while at this location. Used to prevent recovery +while harvesting.]] +local PreventStaminaRecoveryHere = class.Class(component.Component) + +function PreventStaminaRecoveryHere:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('PreventStaminaRecoveryHere')}, + }) + PreventStaminaRecoveryHere.Base.__init__(self, kwargs) +end + +function PreventStaminaRecoveryHere:onEnter(enteringObject, contactName) + if contactName == 'avatar' then + if enteringObject:hasComponent('Stamina') then + local stamina = enteringObject:getComponent('Stamina') + stamina:startPreventingRecovery() + end + end +end + +function PreventStaminaRecoveryHere:onExit(exitingGameObject, contactName) + if contactName == 'avatar' then + if exitingGameObject:hasComponent('Stamina') then + local stamina = exitingGameObject:getComponent('Stamina') + stamina:stopPreventingRecovery() + end + end +end + + +--[[ `TraversalCost` punishes agents who enter this object's location. +]] +local TraversalCost = class.Class(component.Component) + +function TraversalCost:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('TraversalCost')}, + {'penaltyAmount', args.default(0.0), args.ge(0)}, -- The reward penalty. + {'alsoReduceStamina', args.default(false), args.booleanType}, + -- `staminaPenaltyAmount` is the stamina penalty if applicable. + {'staminaPenaltyAmount', args.default(0.0), args.ge(0)}, + {'avatarLayer', args.default('upperPhysical'), args.stringType}, + }) + TraversalCost.Base.__init__(self, kwargs) + self._config.penaltyAmount = kwargs.penaltyAmount + self._config.alsoReduceStamina = kwargs.alsoReduceStamina + self._config.staminaPenaltyAmount = kwargs.staminaPenaltyAmount + self._config.avatarLayer = kwargs.avatarLayer +end + +function TraversalCost:applyCost(contactingObject) + contactingObject:getComponent('Avatar'):addReward(-self._config.penaltyAmount) + + if self._config.alsoReduceStamina then + local staminaComponent = contactingObject:getComponent('Stamina') + staminaComponent:addValue(-self._config.staminaPenaltyAmount) + end +end + +function TraversalCost:registerUpdaters(updaterRegistry) + local transform = self.gameObject:getComponent('Transform') + local function detectAvatarAndApplyCostIfPresent() + local contactingObject = transform:queryPosition(self._config.avatarLayer) + -- Transform.queryPosition returns nil when no object is found so we check + -- both that an object was found and that it has the avatar component. + if contactingObject and contactingObject:hasComponent('Avatar') then + self:applyCost(contactingObject) + end + end + + updaterRegistry:registerUpdater{ + updateFn = detectAvatarAndApplyCostIfPresent, + priority = 3, + } +end + +--[[ `Inventory` keeps track of how many objects each avatar is carrying. It +assumes that agents can carry infinite quantities so this is a kind of inventory +that cannot ever be full. +]] +local Inventory = class.Class(component.Component) + +function Inventory:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('Inventory')}, + {'itemTypes', args.tableType, args.default({'apple', 'banana'})}, + }) + Inventory.Base.__init__(self, kwargs) + self._config.itemTypes = kwargs.itemTypes +end + +function Inventory:reset() + self._inventory = {} + for _, itemType in ipairs(self._config.itemTypes) do + self._inventory[itemType] = 0 + end + -- Allocate memory for the tensor representation of the inventory. + self._tensorInventory = tensor.Int64Tensor(#self._config.itemTypes):fill(0) +end + +function Inventory:_add(itemType, number) + self._inventory[itemType] = self._inventory[itemType] + number +end + +function Inventory:_remove(itemType, number) + if self._inventory[itemType] - number >= 0 then + self._inventory[itemType] = self._inventory[itemType] - number + else + local message = (itemType .. ': Tried to remove ' .. tostring(number) .. + ' but inventory contained only ' .. + tostring(self._inventory[itemType])) + assert(false, message) + end +end + +function Inventory:add(itemType, number) + if number >= 0 then + self:_add(itemType, number) + else + self:_remove(itemType, -number) + end +end + +function Inventory:quantity(itemType) + return self._inventory[itemType] +end + +function Inventory:getInventoryAsTensor(order) + -- `order` (array of fruit strings). Set order of items in output tensor. For + -- instance {'apple', 'banana'} --> tensor.Tensor{numApples, numBananas}. + for idx, fruit in ipairs(order) do + -- Set index `idx` to value `self._inventory[fruit]`. + self._tensorInventory(idx):val(self._inventory[fruit]) + end + return self._tensorInventory +end + + +--[[ `Eating` endows avatars with the ability to eat items from their inventory +and thereby update a `periodicNeed`. +]] +local Eating = class.Class(component.Component) + +function Eating:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('Eating')}, + {'edibleKinds', args.default({'apple', 'banana'}), args.tableType}, + }) + Eating.Base.__init__(self, kwargs) + self._edibleKinds = kwargs.edibleKinds +end + +function Eating:registerUpdaters(updaterRegistry) + local inventory = self.gameObject:getComponent('Inventory') + local taste = self.gameObject:getComponent('Taste') + local avatar = self.gameObject:getComponent('Avatar') + local periodicNeed = self.gameObject:getComponent('PeriodicNeed') + local eat = function() + local playerVolatileVariables = avatar:getVolatileData() + local actions = playerVolatileVariables.actions + for _, fruit in ipairs(self._edibleKinds) do + if actions['eat_' .. fruit] == 1 and inventory:quantity(fruit) >= 1 then + inventory:add(fruit, -1) + local rewardAmount = taste:getRewardAmount(fruit) + avatar:addReward(rewardAmount) + periodicNeed:resetDriveLevel() + end + end + end + + updaterRegistry:registerUpdater{ + updateFn = eat, + priority = 200, + } +end + + +--[[ `Specialization` controls the probability with which an avatar can harvest +a `Harvestable` (tree) per step they stand on it. It also controls the number +of items they obtain when they do harvest successfully. +]] +local Specialization = class.Class(component.Component) + +function Specialization:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('Specialization')}, + {'specialty', args.oneOf('apple', 'banana')}, + {'strongAmount', args.numberType, args.default(1)}, + {'weakAmount', args.numberType, args.default(1)}, + {'strongProbability', args.numberType, args.default(1.0)}, + {'weakProbability', args.numberType, args.default(1.0)}, + }) + Specialization.Base.__init__(self, kwargs) + + self._config.specialty = kwargs.specialty + self._config.strongAmount = kwargs.strongAmount + self._config.weakAmount = kwargs.weakAmount + self._config.strongProbability = kwargs.strongProbability + self._config.weakProbability = kwargs.weakProbability +end + +function Specialization:getHarvestAmount(fruit) + if fruit == self._config.specialty then + return self._config.strongAmount + end + return self._config.weakAmount +end + +function Specialization:getHarvestProbability(fruit) + if fruit == self._config.specialty then + return self._config.strongProbability + end + return self._config.weakProbability +end + +function Specialization:getSpecialty() + return self._config.specialty +end + + +--[[ `Taste` determines how much reward an avatar gets from eating a given +type of item (such as an apple or banana). +]] +local Taste = class.Class(component.Component) + +function Taste:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('Taste')}, + {'mostTastyFruit', args.oneOf('apple', 'banana')}, + {'mostTastyReward', args.numberType}, + {'defaultReward', args.numberType}, + }) + Taste.Base.__init__(self, kwargs) + + self._config.mostTastyFruit = kwargs.mostTastyFruit + self._config.mostTastyReward = kwargs.mostTastyReward + self._config.defaultReward = kwargs.defaultReward +end + +function Taste:getRewardAmount(fruit) + if fruit == self._config.mostTastyFruit then + return self._config.mostTastyReward + end + return self._config.defaultReward +end + + +--[[ Avatars bearing the `MovementPenalty` component pay a penalty every time +they select one of their `costlyActions`. +]] +local MovementPenalty = class.Class(component.Component) + +function MovementPenalty:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('MovementPenalty')}, + {'costlyActions', args.tableType}, + {'penaltyAmount', args.ge(0)}, + }) + MovementPenalty.Base.__init__(self, kwargs) + + self._config.costlyActions = kwargs.costlyActions + self._config.penaltyAmount = kwargs.penaltyAmount +end + +function MovementPenalty:registerUpdaters(updaterRegistry) + local avatar = self.gameObject:getComponent('Avatar') + local applyActionCost = function() + local playerVolatileVariables = ( + self.gameObject:getComponent('Avatar'):getVolatileData()) + local actions = playerVolatileVariables.actions + for _, costly_action_name in pairs(self._config.costlyActions) do + if actions[costly_action_name] ~= 0 then + avatar:addReward(-self._config.penaltyAmount) + end + end + end + + updaterRegistry:registerUpdater{ + updateFn = applyActionCost, + priority = 5, + } +end + + +--[[ The `Trading` component implements all the logic of offering and resolving +trades of items. +]] +local Trading = class.Class(component.Component) + +function Trading:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('Trading')}, + {'maxOfferQuantity', args.positive}, + {'radius', args.positive}, + {'itemTypes', args.tableType, args.default({'apple', 'banana'})}, + }) + Trading.Base.__init__(self, kwargs) + + self._config.maxOfferQuantity = kwargs.maxOfferQuantity + self._config.radius = kwargs.radius + self._config.itemTypes = kwargs.itemTypes +end + +function Trading:_getEmptyOffer() + local offer = {} + for _, itemType in ipairs(self._config.itemTypes) do + offer[itemType] = 0 + end + return offer +end + +function Trading:_resetOffer() + self._offer = self:_getEmptyOffer() +end + +function Trading:reset() + self:_resetOffer() +end + +function Trading:registerUpdaters(updaterRegistry) + local function offer() + -- Listen for the action and set the appropriate self._offer. + local playerVolatileVariables = ( + self.gameObject:getComponent('Avatar'):getVolatileData()) + local actions = playerVolatileVariables.actions + if self.gameObject:getComponent('Avatar'):isAlive() then + for _, itemType in ipairs(self._config.itemTypes) do + local offerActionString = 'offer_' .. itemType + if actions[offerActionString] ~= 0 then + self._offer[itemType] = actions[offerActionString] + end + end + if actions['offer_cancel'] == 1 then + self:_resetOffer() + end + end + end + + updaterRegistry:registerUpdater{ + updateFn = offer, + priority = 250, + } +end + +function Trading:hasEnough(offer) + -- Check that you have at least as many as you are offering to give. + for fruitType, quantity in pairs(offer) do + if quantity < 0 then + local inventory = self.gameObject:getComponent('Inventory') + if quantity + inventory:quantity(fruitType) < 0 then + return false + end + end + end + return true +end + +function Trading:isValid(offer) + local take, give + for fruitType, quantity in pairs(offer) do + if quantity > 0 then + take = true + end + if quantity < 0 then + give = true + end + end + if take and give then + return true + else + return false + end +end + +function Trading:isCompatible(nearbyOffer) + -- For everything I want, check they are offering it. + for fruitType, quantityIWant in pairs(self._offer) do + if quantityIWant > 0 and quantityIWant + nearbyOffer[fruitType] > 0 then + return false + end + end + return true +end + +function Trading:assertValidTrade(offerPackage, theirPackage) + -- TODO(b/260153645): check that offers are compatible. + local myInventory = offerPackage.avatarObject:getComponent('Inventory') + local theirInventory = theirPackage.avatarObject:getComponent('Inventory') + + -- Check both players have enough items to trade. + local validTradeForMe, validTradeForThem + -- The following check for having enough fruit to trade should never come out + -- false. This is because we already checked for that possibility earlier in + -- the process. It's important to check that early because otherwise agents + -- could strategically mess up one another's trades by advertising more than + -- they can deliver. + for _, fruitType in ipairs(self._config.itemTypes) do + -- My inventory contains at least as many items as I am selling. + if -myInventory:quantity(fruitType) <= offerPackage.offer[fruitType] then + validTradeForMe = true + else + validTradeForMe = false + end + -- Their inventory contains at least as many items as I am selling. + if -theirInventory:quantity(fruitType) <= theirPackage.offer[fruitType] then + validTradeForThem = true + else + validTradeForThem = false + end + assert(validTradeForMe and validTradeForThem, + 'Somehow an invalid trade slipped past previous checks.') + end +end + +function Trading:resolve(offerPackage, theirPackage) + -- Make sure that both players have enough items to trade. + self:assertValidTrade(offerPackage, theirPackage) + + local myInventory = offerPackage.avatarObject:getComponent('Inventory') + local theirInventory = theirPackage.avatarObject:getComponent('Inventory') + local myExecutedTrade = {} + local theirExecutedTrade = {} + for _, fruitType in ipairs(self._config.itemTypes) do + -- Ensure the minimal possible number of items actually change hands. + if offerPackage.offer[fruitType] >= theirPackage.offer[fruitType] then + myExecutedTrade[fruitType] = math.min( + offerPackage.offer[fruitType], + math.abs(theirPackage.offer[fruitType]) + ) + theirExecutedTrade[fruitType] = -myExecutedTrade[fruitType] + else + theirExecutedTrade[fruitType] = math.min( + theirPackage.offer[fruitType], + math.abs(offerPackage.offer[fruitType]) + ) + myExecutedTrade[fruitType] = -theirExecutedTrade[fruitType] + end + assert(myExecutedTrade[fruitType] == -theirExecutedTrade[fruitType], + 'Trades do not match.') + + -- Update the inventories. + myInventory:add(fruitType, myExecutedTrade[fruitType]) + theirInventory:add(fruitType, theirExecutedTrade[fruitType]) + end + + -- Report the trade as an event for debug/analysis. + local item0 = self._config.itemTypes[1] -- e.g. 'apple' + local item1 = self._config.itemTypes[2] -- e.g. 'banana' + events:add('trade', 'dict', + 'item_0', item0, + 'item_1', item1, + 'player_a_index', offerPackage.partnerId, + 'player_b_index', theirPackage.partnerId, + 'player_a_offered_item_0', offerPackage.offer[item0], + 'player_a_offered_item_1', offerPackage.offer[item1], + 'player_a_traded_item_0', myExecutedTrade[item0], + 'player_a_traded_item_1', myExecutedTrade[item1], + 'player_b_offered_item_0', theirPackage.offer[item0], + 'player_b_offered_item_1', theirPackage.offer[item1], + 'player_b_traded_item_0', theirExecutedTrade[item0], + 'player_b_traded_item_1', theirExecutedTrade[item1] + ) + + -- If trade was successful then cancel both partners' offers. + offerPackage.trading:cancelOffer() + theirPackage.trading:cancelOffer() + return true +end + +function Trading:getNearbyAvatars() + local transform = self.gameObject:getComponent('Transform') + local nearbyObjects = transform:queryDisc('upperPhysical', + self._config.radius) + -- Only return nearby objects that are avatars. + local nearbyAvatars = {} + for _, object in ipairs(nearbyObjects) do + if object:hasComponent('Avatar') then + table.insert(nearbyAvatars, object) + end + end + return nearbyAvatars +end + +function Trading:_isStrictlyBetterOffer(offerA, offerB) + local possiblyBetter = false + for fruitType, _ in pairs(offerA.offer) do + if offerA.offer[fruitType] < offerB.offer[fruitType] then + possiblyBetter = true + end + if offerA.offer[fruitType] > offerB.offer[fruitType] then + return false + end + end + return possiblyBetter +end + +function Trading:getPossiblePartners() + local nearbyAvatars = self:getNearbyAvatars() + -- One pass over the avatars to collect the relevant data. + local nearbyCompatibleOffers = {} + for _, avatarObject in ipairs(nearbyAvatars) do + local trading = avatarObject:getComponent('Trading') + local nearbyOffer = trading:getPublicOffer() + if self:isCompatible(nearbyOffer) and trading:hasEnough(nearbyOffer) and + trading:isCompatible(self._offer) then + local nearbyOfferPackage = { + offer = nearbyOffer, + avatarObject = avatarObject, + trading = trading, + partnerId = avatarObject:getComponent('Avatar'):getIndex(), + dominated = false, + } + table.insert(nearbyCompatibleOffers, nearbyOfferPackage) + end + end + + -- Loop through all offers to see if any dominate each other. + for _, offerA in ipairs(nearbyCompatibleOffers) do + for _, offerB in ipairs(nearbyCompatibleOffers) do + if self:_isStrictlyBetterOffer(offerA, offerB) then + offerB.dominated = true + end + end + end + + -- Collect all offers that were not dominated. + local nearbyCompatibleNonDominatedOffers = {} + for _, offer in ipairs(nearbyCompatibleOffers) do + if not offer.dominated then + table.insert(nearbyCompatibleNonDominatedOffers, offer) + end + end + + -- TODO(b/260155059): use subset of the offers from equally close players. + + return nearbyCompatibleNonDominatedOffers +end + +function Trading:callResolveIfPossible() + if self:isValid(self._offer) and self:hasEnough(self._offer) then + -- Get all my offers. + local myPossiblePartners = self:getPossiblePartners() + + -- For each of my possible partners, check that I am their possible partner. + local myId = self.gameObject:getComponent('Avatar'):getIndex() + for _, offerPackage in pairs(myPossiblePartners) do + local theirPossiblePartners = offerPackage.trading:getPossiblePartners() + -- Iterate over all their offers to find I am in it. + for _, theirPackage in pairs(theirPossiblePartners) do + if theirPackage.partnerId == myId then + -- Return once a compatible offer has been found. + return self:resolve(offerPackage, theirPackage) + end + end + end + end + return false +end + +function Trading:getOffer() + return self._offer +end + +function Trading:getTradeRadius() + return self._config.radius +end + +--[[ Trading:getPublicOffer() returns the current advertised offer. + +Rturns {apple = 0, banana = 0} when the current offer is invalid i.e. the player +would not have enough items in its inventory to fulfil its part of the deal if +a trade were to occur in which it would have to give up the maximal amount it +offered. +]] +function Trading:getPublicOffer() + if self:isValid(self._offer) and self:hasEnough(self._offer) then + return self:getOffer() + else + return self:_getEmptyOffer() + end +end + +function Trading:cancelOffer() + self:_resetOffer() +end + + +local InventoryObserver = class.Class(component.Component) + +function InventoryObserver:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('InventoryObserver')}, + {'order', args.default({'apple', 'banana'}), args.tableType}, + }) + InventoryObserver.Base.__init__(self, kwargs) + + self._config.order = kwargs.order + self._config.numFruitTypes = #self._config.order +end + +function InventoryObserver:addObservations(tileSet, + world, + observations, + avatarCount) + local playerIdx = self.gameObject:getComponent('Avatar'):getIndex() + local inventory = self.gameObject:getComponent('Inventory') + observations[#observations + 1] = { + name = tostring(playerIdx) .. '.INVENTORY', + type = 'tensor.Int64Tensor', + shape = {self._config.numFruitTypes}, + func = function(grid) + return inventory:getInventoryAsTensor(self._config.order) + end + } +end + + +local MyOfferObserver = class.Class(component.Component) + +function MyOfferObserver:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('MyOfferObserver')}, + {'order', args.default({'apple', 'banana'}), args.tableType}, + }) + MyOfferObserver.Base.__init__(self, kwargs) + + self._config.order = kwargs.order + self._config.numFruitTypes = #self._config.order +end + +function MyOfferObserver:reset() + -- Allocate memory for the tensor representation of the offer. + self._tensorMyOffer = tensor.Int64Tensor(self._config.numFruitTypes):fill(0) +end + +function MyOfferObserver:getMyPublicOfferAsTensor(order) + -- `order` (array of fruit strings). Set order of items in output tensor. For + -- instance {'apple', 'banana'} --> tensor.Tensor{numApples, numBananas}. + local trading = self.gameObject:getComponent('Trading') + local offer = trading:getPublicOffer() + for idx, fruit in ipairs(order) do + -- Set index `idx` to value `self._inventory[fruit]`. + self._tensorMyOffer(idx):val(offer[fruit]) + end + return self._tensorMyOffer +end + +function MyOfferObserver:addObservations(tileSet, + world, + observations, + avatarCount) + local playerIdx = self.gameObject:getComponent('Avatar'):getIndex() + observations[#observations + 1] = { + name = tostring(playerIdx) .. '.MY_OFFER', + type = 'tensor.Int64Tensor', + shape = {self._config.numFruitTypes}, + func = function(grid) + -- Note: This always returns the public offer. + return self:getMyPublicOfferAsTensor(self._config.order) + end + } +end + + +local AllOffersObserver = class.Class(component.Component) + +function AllOffersObserver:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('AllOffersObserver')}, + {'order', args.default({'apple', 'banana'}), args.tableType}, + -- If `flatten` is true then output a flat vector, otherwise the output + -- observation is a matrix of size (num_players - 1, num_fruit_types). + {'flatten', args.default(false), args.booleanType}, + }) + AllOffersObserver.Base.__init__(self, kwargs) + + self._config.order = kwargs.order + self._config.numFruitTypes = #self._config.order + self._config.flatten = kwargs.flatten +end + +function AllOffersObserver:setMaxPossibleOffers() + local radius = self.gameObject:getComponent('Trading'):getTradeRadius() + -- Note: this assumes queryDisc was used in Trading.getNearbyAvatars. + self._maxPossibleOffers = math.ceil(math.pi * radius * radius) +end + +function AllOffersObserver:reset() + -- Allocate memory for the tensor representation of all nearby offers. + self:setMaxPossibleOffers() + self._tensorOffers = tensor.Int64Tensor(self._maxPossibleOffers, + self._config.numFruitTypes):fill(0) +end + +function AllOffersObserver:getNearbyAvatars() + local trading = self.gameObject:getComponent('Trading') + local avatars = trading:getNearbyAvatars() + return avatars +end + +function AllOffersObserver:getPublicOffersAsTensor(order) + -- First zero the offers tensor from the previous timestep. + self._tensorOffers:fill(0) + -- `order` (array of fruit strings). Set order of items in output tensor. For + -- instance {'apple', 'banana'} --> tensor.Tensor{numApples, numBananas}. + local selfIdx = self.gameObject:getComponent('Avatar'):getIndex() + -- Note: there is no guarantee that avatars will be in any specific order. For + -- instance, is NOT guaranteed to be in slot id or joining order. + local avatars = self:getNearbyAvatars() + local arbitrary_idx = 1 + for _, avatarObject in pairs(avatars) do + -- Exclude the offer that would correspond to the self offer since it will + -- end up coming out in an entirely different observation channel. + if avatarObject:getComponent('Avatar'):getIndex() ~= selfIdx then + local offer = avatarObject:getComponent('Trading'):getPublicOffer() + for idx, fruit in ipairs(order) do + -- Set index `idx` to value `self._inventory[fruit]`. + self._tensorOffers(arbitrary_idx, idx):val(offer[fruit]) + end + arbitrary_idx = arbitrary_idx + 1 + end + end + + return self._tensorOffers +end + +function AllOffersObserver:addObservations(tileSet, + world, + observations, + avatarCount) + self:setMaxPossibleOffers() + local shape, formatOutput + if self._config.flatten then + shape = {self._maxPossibleOffers * self._config.numFruitTypes} + formatOutput = function(publicOffersTensor) + local flatOffers = publicOffersTensor:reshape(shape) + return flatOffers + end + else + shape = {self._maxPossibleOffers, self._config.numFruitTypes} + formatOutput = function(publicOffersTensor) + return publicOffersTensor + end + end + + local playerIdx = self.gameObject:getComponent('Avatar'):getIndex() + observations[#observations + 1] = { + name = tostring(playerIdx) .. '.OFFERS', + type = 'tensor.Int64Tensor', + shape = shape, + func = function(grid) + return formatOutput(self:getPublicOffersAsTensor(self._config.order)) + end + } +end + + +local HungerObserver = class.Class(component.Component) + +function HungerObserver:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('HungerObserver')}, + {'needComponent', args.default('PeriodicNeed'), args.stringType}, + }) + HungerObserver.Base.__init__(self, kwargs) + self._config.needComponent = kwargs.needComponent +end + +function HungerObserver:addObservations(tileSet, world, observations) + local playerIndex = self.gameObject:getComponent('Avatar'):getIndex() + local needComponent = self.gameObject:getComponent(self._config.needComponent) + observations[#observations + 1] = { + name = tostring(playerIndex) .. '.HUNGER', + type = 'Doubles', + shape = {}, + func = function(grid) + return needComponent:getNeed() + end + } +end + + +local TradeManager = class.Class(component.Component) + +function TradeManager:__init__(kwargs) + kwargs = args.parse(kwargs, { + {'name', args.default('TradeManager')}, + }) + TradeManager.Base.__init__(self, kwargs) +end + +function TradeManager:registerUpdaters(updaterRegistry) + local simulation = self.gameObject.simulation + local numPlayers = simulation:getNumPlayers() + + local function resolveTrades() + local order = random:shuffle(range(numPlayers)) + + for _, playerIndex in ipairs(order) do + local avatarObject = simulation:getAvatarFromIndex(playerIndex) + local trading = avatarObject:getComponent('Trading') + local success = trading:callResolveIfPossible() + end + end + + updaterRegistry:registerUpdater{ + updateFn = resolveTrades, + priority = 2, + } +end + +local allComponents = { + -- Fruit tree components. + FruitType = FruitType, + Harvestable = Harvestable, + PreventStaminaRecoveryHere = PreventStaminaRecoveryHere, + + -- Water components. + TraversalCost = TraversalCost, + + -- Avatar components. + Inventory = Inventory, + Eating = Eating, + Specialization = Specialization, + Taste = Taste, + MovementPenalty = MovementPenalty, + Trading = Trading, + + -- Avatar observer components. + InventoryObserver = InventoryObserver, + MyOfferObserver = MyOfferObserver, + AllOffersObserver = AllOffersObserver, + HungerObserver = HungerObserver, + + -- Scene components. + TradeManager = TradeManager, +} + +component_registry.registerAllComponents(allComponents) + +return allComponents diff --git a/meltingpot/lua/levels/trade/init.lua b/meltingpot/lua/levels/trade/init.lua new file mode 100644 index 00000000..78c21d16 --- /dev/null +++ b/meltingpot/lua/levels/trade/init.lua @@ -0,0 +1,43 @@ +--[[ Copyright 2022 DeepMind Technologies Limited. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +]] + +-- Entry point lua file for the barter/trade-related substrates. + +local meltingpot = 'meltingpot.lua.modules.' +local api_factory = require(meltingpot .. 'api_factory') +local avatar_grappling = require(meltingpot .. 'avatar_grappling') +local simulation = require(meltingpot .. 'base_simulation') + +-- Required to be able to use the components in the level +local component_library = require(meltingpot .. 'component_library') +local avatar_library = require(meltingpot .. 'avatar_library') +-- Next require the general stamina components +local stamina = 'meltingpot.lua.levels.stamina.' +local shared_components = require(stamina .. 'shared_components') +-- Finally add the local components for this game, overriding any previously +-- loaded having the same name. +local components = require 'components' + +return api_factory.apiFactory{ + Simulation = simulation.BaseSimulation, + settings = { + -- Scale each sprite to a square of size `spriteSize` X `spriteSize`. + spriteSize = 8, + -- Terminate the episode after this many frames. + maxEpisodeLengthFrames = 1000, + -- Settings to pass to simulation.lua. + simulation = {}, + } +} diff --git a/meltingpot/lua/modules/base_simulation.lua b/meltingpot/lua/modules/base_simulation.lua index 4193cfd3..100b7b72 100644 --- a/meltingpot/lua/modules/base_simulation.lua +++ b/meltingpot/lua/modules/base_simulation.lua @@ -268,9 +268,6 @@ function BaseSimulation:worldConfig() 'upperPhysical', -- Avatars are normally on layer `upperPhysical`. 'overlay', 'superOverlay', - -- Only use directionIndicatorLayer for avatar direction indicators. - 'directionIndicatorLayer', - 'superDirectionIndicatorLayer' }, -- `customSprites` holds sprites that can be specified in a spriteMap but -- are otherwise not attached to any state. The main use-case for diff --git a/meltingpot/lua/modules/base_simulation_v2.lua b/meltingpot/lua/modules/base_simulation_v2.lua deleted file mode 100644 index 100b7b72..00000000 --- a/meltingpot/lua/modules/base_simulation_v2.lua +++ /dev/null @@ -1,624 +0,0 @@ ---[[ Copyright 2020 DeepMind Technologies Limited. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - https://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -]] - --- Simulations implement game logic by maintaining game objects and calling --- their functions at specific times. This file implements the base class from --- which level-specific simulations inherit. - -local class = require 'common.class' -local helpers = require 'common.helpers' -local log = require 'common.log' -local random = require 'system.random' -local read_settings = require 'common.read_settings' -local tile = require 'system.tile' - -local meltingpot = 'meltingpot.lua.modules.' -local game_object = require(meltingpot .. 'game_object') -local component_registry = require(meltingpot .. 'component_registry') -local prefab_utils = require(meltingpot .. 'prefab_utils') -local updater_registry = require(meltingpot .. 'updater_registry') - --- Functions to track from components in a game object. -_COMPONENT_FUNCTIONS = { - 'awake', 'reset', 'start', 'postStart', 'preUpdate', 'update', 'onBlocked', - 'onEnter', 'onExit', 'onHit', 'onStateChange', 'registerUpdaters', - 'addHits', 'addSprites', 'addCustomSprites', 'addObservations', - 'addPlayerCallbacks'} - ---[[ The base class of all Simulations. This object is the container of all -GameObjects, and maintains a registry of them, along with a table mapping states -to GameObjects. -]] - ---[[ BaseSimulation holds the list of GameObjects, they determine game behavior. -]] -local BaseSimulation = class.Class() - -function BaseSimulation.defaultSettings() - return { - map = '', - charPrefabMap = read_settings.any(), - prefabs = read_settings.any(), - buildAvatars = false, - playerPalettes = read_settings.any(), - gameObjects = read_settings.default( - {name = '', components = read_settings.any()}), - -- The scene is a game object that holds global state/logic. - scene = {name = 'scene', components = read_settings.any()}, - -- `worldSpriteMap` is an optional table that remaps specific sprites to - -- other sprites in the WORLD.RGB observation. For example, - -- {sourceSprite1=targetSprite1, sourceSprite2=targetSprite2, ...}. - worldSpriteMap = read_settings.any(), - } -end - -local function _stringifyKeys(table) - newTable = {} - if table then - for k, v in pairs(table) do - newTable[tostring(k)] = v - end - end - return newTable -end - -function BaseSimulation:__init__(kwargs) - self._settings = kwargs.settings - self._settings.numPlayers = kwargs.numPlayers - self._settings.charPrefabMap = _stringifyKeys(self._settings.charPrefabMap) - self._settings.worldSpriteMap = self._settings.worldSpriteMap - - self._variables = {} - self._variables.gameObjects = {} - self._variables.avatarObjects = {} - -- This table contains the mapping from a dmlab2d piece to the game object - -- that owns that piece. - self._variables.pieceToGameObject = {} - self._variables.nextGameObjectId = 1 - - -- Initialize avatar indexing tables to be populated in base avatar manager. - self._variables.avatarPieceToIndex = {} - self._variables.avatarIndexToPiece = {} - - -- Table of game objects indexed by component functions, to keep track of - -- only game objects which have at least one component with a function. - self._variables.objectsByFunctionName = {} - for _, fnName in pairs(_COMPONENT_FUNCTIONS) do - self._variables.objectsByFunctionName[fnName] = {} - end - - -- Add the "scene", a static game object that can hold global logic. - if self._settings.scene ~= nil then - self._settings.sceneObject = self:buildGameObjectFromSettings( - self._settings.scene) - end - - if self._settings.charPrefabMap ~= nil and self._settings.prefabs ~= nil then - -- Create game objects from prefabs, ASCII map, and chars to prefabs. - local objects = prefab_utils.buildGameObjectConfigs( - self._settings.map, self._settings.prefabs, - self._settings.charPrefabMap) - for _, gameObjectConfig in ipairs(objects) do - table.insert(self._settings.gameObjects, gameObjectConfig) - end - end - - if self._settings.buildAvatars then - -- Create avatar game objects from prefab, and palettes. - local avatars = prefab_utils.buildAvatarConfigs( - self._settings.numPlayers, self._settings.prefabs, - self._settings.playerPalettes) - for _, avatarConfig in ipairs(avatars) do - table.insert(self._settings.gameObjects, avatarConfig) - end - end - - -- Instantiate and add all game objects (including avatars). - if self._settings.gameObjects then - for _, gameObjectConfig in ipairs(self._settings.gameObjects) do - local gameObject = self:buildGameObjectFromSettings(gameObjectConfig) - log.v(2, "Creating game object with id " .. gameObject._id) - end - end - - -- Check that we do have enough promised avatars - local numAvatars = 0 - for k, v in pairs(self._variables.avatarObjects) do - numAvatars = numAvatars + 1 - end - assert( - numAvatars == self._settings.numPlayers, - "Created an environment with " .. self._settings.numPlayers .. " players, " - .. "but provided " .. numAvatars .. " avatar objects instead. Avatars " .. - "must be created exactly once, either passed in `game_objects` settings, " - .. "or by passing `prefabs` containing \"avatar\" and setting " .. - "`buildAvatars` to `true`.") -end - -local function _makeComponent(config, isAvatar) - local component = config.component - local kwargs = config.kwargs - -- Avatar pieces must be created by the Avatar component instead of the - -- transform component, so must set the `deferPieceCreation` flag. - if isAvatar and component == 'Transform' then - kwargs.deferPieceCreation = true - end - return component_registry.getComponent(component)(kwargs) -end - -local function _defaultStateManager() - return _makeComponent({ - component = "StateManager", - kwargs = { - initialState = "scene", - stateConfigs = {{ - state = "scene", - }}, - }}, - false) -end - -local function _defaultTransform(isAvatar) - return _makeComponent({ - component = "Transform", - kwargs = { - position = {0, 0}, - orientation = "N", - }}, - isAvatar) -end - ---[[ This function builds a game object from the given configuration and -registers it into the simulation's object tables. This function is aware of -whether the game object requested is an Avatar object or not. At the end of -this call, the game object will be almost fully initialised, and contain a -special property: `simulation` that refers to this object. -]] -function BaseSimulation:buildGameObjectFromSettings(gameObjectConfig) - local componentsConfig = gameObjectConfig.components - local configuredComponents = {} - - -- Is this an Avatar? - local isAvatar = false - for _, config in ipairs(componentsConfig) do - if config.component == 'Avatar' then - isAvatar = true - end - end - - local hasStateManager = false - local hasTransform = false - for _, config in ipairs(componentsConfig) do - if config.component == "StateManager" then - hasStateManager = true - end - if config.component == "Transform" then - hasTransform = true - end - table.insert(configuredComponents, _makeComponent(config, isAvatar)) - end - local name = '' - if rawget(gameObjectConfig, "name") ~= nil then - name = gameObjectConfig.name - end - - -- Add default components (with error-level logging to discourage). - if not hasStateManager then - log.warn( - "GameObject '" .. name .. "' did not have a StateManager component, " .. - "but explicitly specifying one is strongly preferred. Using a default.") - table.insert(configuredComponents, _defaultStateManager()) - end - if not hasTransform then - log.warn( - "GameObject '" .. name .. "' did not have a Transform component, " .. - "but explicitly specifying one is strongly preferred. Using a default.") - table.insert(configuredComponents, _defaultTransform(isAvatar)) - end - - local gameObject = game_object.GameObject{ - id = 'OID_' .. name .. '_' .. self._variables.nextGameObjectId, - name = name, - components = configuredComponents - } - self._variables.nextGameObjectId = self._variables.nextGameObjectId + 1 - - gameObject.simulation = self - table.insert(self._variables.gameObjects, gameObject) - for _, fnName in pairs(_COMPONENT_FUNCTIONS) do - if gameObject:hasComponentWithFunction(fnName) then - table.insert(self._variables.objectsByFunctionName[fnName], gameObject) - end - end - if isAvatar then - self._variables.avatarObjects[gameObject._id] = gameObject - end - return gameObject -end - ---[[ The following callbacks are called during initialisation (in this order) ]] - -function BaseSimulation:worldConfig() - -- This config contains data that you expect not to change during (or between) - -- episodes. GameObjects add data to its fields. - local config = { - outOfBoundsSprite = 'OutOfBounds', - outOfViewSprite = 'OutOfView', - -- `updateOrder` sets the order updates are to be called on each frame. - updateOrder = {}, - -- `renderOrder` is the draw order for layers. The alpha channel allows - -- control of transparency/opacity viewing to lower layers. - renderOrder = { - 'logic', - 'alternateLogic', - 'background', - 'lowerPhysical', - 'upperPhysical', -- Avatars are normally on layer `upperPhysical`. - 'overlay', - 'superOverlay', - }, - -- `customSprites` holds sprites that can be specified in a spriteMap but - -- are otherwise not attached to any state. The main use-case for - -- spriteMaps is the self-vs-other player view type. - customSprites = {}, - -- `hits` determine the names of the callbacks to execute when a piece - -- collides with a beam. - hits = {}, - -- `states` is a dictionary of state configuration tables. - states = {} - } - -- By this point, gameObjects already contain the avatar objects - for _, gameObject in pairs( - self._variables.objectsByFunctionName.registerUpdaters) do - gameObject:registerUpdaters() - end - for _, gameObject in pairs(self._variables.gameObjects) do - gameObject:addStates(config.states) - end - for _, gameObject in pairs( - self._variables.objectsByFunctionName.addHits) do - gameObject:addHits(config) - end - for _, gameObject in pairs( - self._variables.objectsByFunctionName.addCustomSprites) do - gameObject:addCustomSprites(config.customSprites) - end - self._contacts = {} - self._updaterRegistry = updater_registry.UpdaterRegistry() - -- Add all contacts from the registered states into a list to be passed later - -- to all GameObjects so they can register their callbacks (like with hits). - for _, stateElement in pairs(config.states) do - if stateElement.contact then - table.insert(self._contacts, stateElement.contact) - end - end - -- Notify all GameObjects of the hits and contacts table so they can register - -- callbacks. - for _, gameObject in pairs(self._variables.gameObjects) do - gameObject:setHits(config.hits) - gameObject:setContacts(self._contacts) - -- Merge all updaters into the simulation registry. - self._updaterRegistry:mergeWith(gameObject:getUpdaterRegistry()) - end - self._updaterRegistry:addUpdateOrder(config.updateOrder) - - log.v(1, 'World Config: ' .. helpers.tostring(config)) - - return config -end - -function BaseSimulation:addSprites(tileSet) - tileSet:addColor('OutOfBounds', {0, 0, 0}) - tileSet:addColor('OutOfView', {80, 80, 80}) - for _, gameObject in pairs( - self._variables.objectsByFunctionName.addSprites) do - gameObject:addSprites(tileSet) - end -end - -function BaseSimulation:discreteActionSpec() - local act = {} - for _, avatarObject in pairs(self._variables.avatarObjects) do - avatarObject:discreteActionSpec(act) - end - return act -end - -function BaseSimulation:discreteActions(actions) - for _, avatarObject in pairs(self._variables.avatarObjects) do - avatarObject:discreteActions(actions) - end -end - --- This function adds a third-person, global view of the world as well as any --- observations from game objects (e.g. for the Avatars). -function BaseSimulation:addObservations(tileSet, world, observations) - local worldLayerView = world:createView{ - layout = self:textMap().layout, - spriteMap = self._settings.worldSpriteMap, - } - - local worldView = tile.Scene{shape = worldLayerView:gridSize(), set = tileSet} - local spec = { - name = 'WORLD.RGB', - type = 'tensor.ByteTensor', - shape = worldView:shape(), - func = function(grid) - return worldView:render(worldLayerView:observation{grid = grid}) - end, - } - observations[#observations + 1] = spec - -- Add all observations from GameObjects, including avatars. - for _, gameObject in pairs( - self._variables.objectsByFunctionName.addObservations) do - gameObject:addObservations(tileSet, world, observations) - end -end - ---[[ End of initialisation callbacks ]] ---[[ The following callbacks are called during starting (in this order) ]] - -function BaseSimulation:stateCallbacks(callbacks) - -- By now we have the hits and the contacts lists. - for _, gameObject in pairs(self._variables.gameObjects) do - gameObject:addTypeCallbacks(callbacks) - end - for _, gameObject in pairs( - self._variables.objectsByFunctionName.addPlayerCallbacks) do - gameObject:addPlayerCallbacks(callbacks) - end - self._updaterRegistry:registerCallbacks(callbacks) - log.v(1, 'Callbacks: ' .. helpers.tostring(callbacks)) -end - -function BaseSimulation:textMap() - if not self._settings.map then - return self._settings.mapsModule[self._settings.mapName] - else - return {layout = self._settings.map, - stateMap = {}} - end -end - ---- After start on all game objects, start the avatar game objects. -function BaseSimulation:_avatarStart(grid) - -- First determine the number of avatars assigned to each spawn group so they - -- can be sampled without replacement to avoid spawn collisions. - local avatarsPerSpawnGroup = {} - -- We need to cache the spawn groups, just in case they are dynamic. The - -- reason being that we need them both for computing how many will go to each - -- group, and later to actually place them in the right group. - local cachedAvatarSpawnGroups = {} - for key, avatarObject in pairs(self._variables.avatarObjects) do - local spawnGroup = avatarObject:getComponent('Avatar'):getSpawnGroup() - cachedAvatarSpawnGroups[key] = spawnGroup - if avatarsPerSpawnGroup[spawnGroup] then - avatarsPerSpawnGroup[spawnGroup] = avatarsPerSpawnGroup[spawnGroup] + 1 - else - avatarsPerSpawnGroup[spawnGroup] = 1 - end - end - - -- Sample the right number of points at which to spawn avatars in each group. - local spawnPointsByGroup = {} - local spawnCountersByGroup = {} - for spawnGroup, numAvatarsThisGroup in pairs(avatarsPerSpawnGroup) do - spawnPointsByGroup[spawnGroup] = grid:groupShuffledWithCount( - random, spawnGroup, numAvatarsThisGroup) - assert(#spawnPointsByGroup[spawnGroup] == numAvatarsThisGroup, - "Insufficient spawn points!") - spawnCountersByGroup[spawnGroup] = 0 - end - - -- Create the avatars. - for key, avatarObject in pairs(self._variables.avatarObjects) do - local spawnGroup = cachedAvatarSpawnGroups[key] - spawnCountersByGroup[spawnGroup] = spawnCountersByGroup[spawnGroup] + 1 - local idxInGroup = spawnCountersByGroup[spawnGroup] - local point = spawnPointsByGroup[spawnGroup][idxInGroup] - - -- The call to `start` is where the avatar piece is actually created. - avatarObject:start(grid, point) - local avatarPiece = avatarObject:getPiece() - local avatarIndex = avatarObject:getComponent('Avatar'):getIndex() - self._variables.avatarPieceToIndex[avatarPiece] = avatarIndex - self._variables.avatarIndexToPiece[avatarIndex] = avatarPiece - self._variables.pieceToGameObject[avatarPiece] = avatarObject - end - - for _, gameObject in pairs( - self._variables.objectsByFunctionName.postStart) do - gameObject:postStart(grid) - end -end - --- This function starts all the non-avatar game objects. The starting of avatar --- game objects is handled in a special way by the avatar manager. The avatar --- manager start function will always be called after this one. -function BaseSimulation:start(grid) - self._updaterRegistry:registerGrid(grid) - self._variables.continueEpisodeAfterThisFrame = true - -- Call `reset` on all game objects before calling `start` on any of them. - for _, gameObject in pairs( - self._variables.objectsByFunctionName.reset) do - gameObject:reset() - end - -- Call `start` on all non-avatar game objects before calling `start` on any - -- avatar game objects. - for _, gameObject in pairs(self._variables.gameObjects) do - if not gameObject:hasComponent("Avatar") then - gameObject:start(grid) - local piece = gameObject:getPiece() - self._variables.pieceToGameObject[piece] = gameObject - end - end - self:_avatarStart(grid) - log.v(1, "grid\n" .. tostring(grid)) - -- Keep a reference to the grid. - self._grid = grid -end - ---[[ End of starting callbacks ]] ---[[ The following callbacks are called during updating / advancing ]] - -function BaseSimulation:update(grid) - -- Call preUpdate on all gameObjects before calling update on any gameObjects. - for _, gameObject in pairs( - self._variables.objectsByFunctionName.preUpdate) do - gameObject:preUpdate() - end - for _, gameObject in pairs( - self._variables.objectsByFunctionName.update) do - gameObject:update(grid) - end -end - - ---[[ -Returns whether the simulation (episode) should continue for at least another -step, as controlled by the components and its updaters. Notice that it is -possible for this function to return true, and still being at the end of the -episode if we have reached the `maxEpisodeLengthFrames`, as that is controlled -by api_factory. ---]] -function BaseSimulation:continue() - return self._variables.continueEpisodeAfterThisFrame -end - ---[[ End of updating / advancing callbacks ]] ---[[ The functions below are part of the user API ]] - --- End the episode. -function BaseSimulation:endEpisode() - self._variables.continueEpisodeAfterThisFrame = false -end - --- Get the GameObject that owns this dmlab2d piece. -function BaseSimulation:getGameObjectFromPiece(piece) - return self._variables.pieceToGameObject[piece] -end - --- Returns GameObjects that have the given name, as a list. -function BaseSimulation:getGameObjectsByName(name) - local objects = {} - for _, gameObject in pairs(self._variables.gameObjects) do - if name == gameObject.name then - table.insert(objects, gameObject) - end - end - return objects -end - --- Returns the avatar GameObjects, as a table. Keys are object IDs, values are --- the actual GameObjects. -function BaseSimulation:getAvatarGameObjects() - return self._variables.avatarObjects -end - ---[[ Return all game objects that have component `componentName`, as a list.]] -function BaseSimulation:getAllGameObjectsWithComponent(componentName) - local objects = {} - for _, gameObject in pairs(self._variables.gameObjects) do - if gameObject:hasComponent(componentName) then - table.insert(objects, gameObject) - end - end - return objects -end - ---[[ Return an unordered table of all GameObjects.]] -function BaseSimulation:getAllGameObjects() - return self._variables.gameObjects -end - ---[[ Access avatar indices by piece id from gameObjects instantiated on -simulation.]] -function BaseSimulation:getAvatarIndexFromPiece(avatarPiece) - return self._variables.avatarPieceToIndex[avatarPiece] -end - ---[[ Access avatar piece ids by player index from gameObjects instantiated on -simulation.]] -function BaseSimulation:getAvatarPieceFromIndex(avatarIndex) - return self._variables.avatarIndexToPiece[avatarIndex] -end - --- Access avatar GameObject by player index. -function BaseSimulation:getAvatarFromIndex(avatarIndex) - return self:getGameObjectFromPiece( - self._variables.avatarIndexToPiece[avatarIndex]) -end - --- Return the number of players in this episode. -function BaseSimulation:getNumPlayers() - return self._settings.numPlayers -end - --- Return a reference to the scene object. -function BaseSimulation:getSceneObject() - return self._settings.sceneObject -end - --- Returns the number of objects currently in a state belonging to `group`. -function BaseSimulation:getGroupCount(group) - return self._grid:groupCount(group) -end - --- Returns a random object currently in a state belonging to a given group. -function BaseSimulation:getGroupRandom(group) - local piece = self._grid:groupRandom(random, group) - return self:getGameObjectFromPiece(piece) -end - ---[[ Returns objects currently assigned to states belonging to `group` in a -random order.]] -function BaseSimulation:getGroupShuffled(group) - local pieces = self._grid:groupShuffled(random, group) - local objects = {} - for _, piece in ipairs(pieces) do - table.insert(objects, self:getGameObjectFromPiece(piece)) - end - return objects -end - ---[[ Returns `count` random objects currently assigned to states that belong to -the given group in a random order.]] -function BaseSimulation:getGroupShuffledWithCount(group, count) - local pieces = self._grid:groupShuffledWithCount(random, group, count) - local objects = {} - for _, piece in ipairs(pieces) do - table.insert(objects, self:getGameObjectFromPiece(piece)) - end - return objects -end - ---[[Returns objects currently assigned to states belonging to a certain group in -a random order, where each object has the given probability of being returned.]] -function BaseSimulation:getGroupShuffledWithProbability(group, probability) - local pieces = self._grid:groupShuffledWithProbability( - random, group, probability) - local objects = {} - for _, piece in ipairs(pieces) do - table.insert(objects, self:getGameObjectFromPiece(piece)) - end - return objects -end - -function BaseSimulation:getReward() - -- This is propagated all the way up to the EnvLuaApi in the Advance function. - -- It is currently unused by Melting Pot. -end - -return {BaseSimulation = BaseSimulation} diff --git a/meltingpot/python/bot.py b/meltingpot/python/bot.py index eb04ee32..5df7f585 100644 --- a/meltingpot/python/bot.py +++ b/meltingpot/python/bot.py @@ -1,4 +1,4 @@ -# Copyright 2020 DeepMind Technologies Limited. +# Copyright 2022 DeepMind Technologies Limited. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,47 +13,75 @@ # limitations under the License. """Bot factory.""" -from ml_collections import config_dict +import functools -from meltingpot.python.configs import bots as bot_config +from meltingpot.python import substrate +from meltingpot.python.configs import bots as bot_configs +from meltingpot.python.utils.policies import fixed_action_policy from meltingpot.python.utils.policies import policy +from meltingpot.python.utils.policies import policy_factory from meltingpot.python.utils.policies import puppet_policy from meltingpot.python.utils.policies import saved_model_policy +from meltingpot.python.utils.substrates import specs -BOTS = frozenset(bot_config.BOT_CONFIGS) -AVAILABLE_BOTS = BOTS +NOOP_BOT_NAME = 'noop_bot' +NOOP_ACTION = 0 +BOTS = frozenset(bot_configs.BOT_CONFIGS) | {NOOP_BOT_NAME} -def get_config(bot_name: str) -> config_dict.ConfigDict: - """Returns a config for the specified bot. + +def get_config(bot_name: str) -> bot_configs.BotConfig: + """Returns the config for the specified bot.""" + return bot_configs.BOT_CONFIGS[bot_name] + + +def build(name: str) -> policy.Policy: + """Builds a policy for the specified bot. Args: - bot_name: name of the bot. Must be in AVAILABLE_BOTS. + name: the name of the bot. + + Returns: + The bot policy. """ - if bot_name not in AVAILABLE_BOTS: - raise ValueError(f'Unknown bot {bot_name!r}.') - bot = bot_config.BOT_CONFIGS[bot_name] - config = config_dict.create( - bot_name=bot_name, - substrate=bot.substrate, - puppeteer_builder=bot.puppeteer_builder, - saved_model_path=bot.model_path, - ) - return config.lock() + return get_factory(name).build() -def build(config: config_dict.ConfigDict) -> policy.Policy: - """Builds a bot policy for the given config. +def build_from_config(config: bot_configs.BotConfig) -> policy.Policy: + """Builds a policy from the provided bot config. Args: - config: bot config resulting from `get_config`. + config: bot config. Returns: The bot policy. """ - saved_model = saved_model_policy.SavedModelPolicy(config.saved_model_path) + saved_model = saved_model_policy.SavedModelPolicy(config.model_path) if config.puppeteer_builder: puppeteer = config.puppeteer_builder() return puppet_policy.PuppetPolicy(puppeteer=puppeteer, puppet=saved_model) else: return saved_model + + +def get_factory(name: str) -> policy_factory.PolicyFactory: + """Returns a factory for the specified bot.""" + if name == NOOP_BOT_NAME: + return policy_factory.PolicyFactory( + timestep_spec=specs.timestep({}), + action_spec=specs.action(NOOP_ACTION + 1), + builder=functools.partial(fixed_action_policy.FixedActionPolicy, + NOOP_ACTION)) + else: + config = bot_configs.BOT_CONFIGS[name] + return get_factory_from_config(config) + + +def get_factory_from_config( + config: bot_configs.BotConfig) -> policy_factory.PolicyFactory: + """Returns a factory from the provided config.""" + substrate_factory = substrate.get_factory(config.substrate) + return policy_factory.PolicyFactory( + timestep_spec=substrate_factory.timestep_spec(), + action_spec=substrate_factory.action_spec(), + builder=lambda: build_from_config(config)) diff --git a/meltingpot/python/bot_test.py b/meltingpot/python/bot_test.py index cf395005..bf22d8bc 100644 --- a/meltingpot/python/bot_test.py +++ b/meltingpot/python/bot_test.py @@ -16,31 +16,20 @@ from absl.testing import absltest from absl.testing import parameterized -from meltingpot.python import bot as bot_factory -from meltingpot.python import substrate as substrate_factory +from meltingpot.python import bot from meltingpot.python.testing import bots as test_utils -from meltingpot.python.utils.scenarios import substrate_transforms -def _get_specs(substrate): - config = substrate_factory.get_config(substrate) - # TODO(b/258239516): remove this when wrapper is removed from scenario.py. - timestep_spec = substrate_transforms.tf1_bot_timestep_spec( - timestep_spec=config.timestep_spec, - action_spec=config.action_spec, - num_players=config.num_players) - return timestep_spec, config.action_spec - - -@parameterized.named_parameters( - (name, name) for name in bot_factory.AVAILABLE_BOTS) +@parameterized.named_parameters((name, name) for name in bot.BOTS) class BotTest(test_utils.BotTestCase): def test_step_without_error(self, name): - bot_config = bot_factory.get_config(name) - timestep_spec, action_spec = _get_specs(bot_config.substrate) - with bot_factory.build(bot_config) as policy: - self.assert_compatible(policy, timestep_spec, action_spec) + factory = bot.get_factory(name) + with factory.build() as policy: + self.assert_compatible( + policy, + timestep_spec=factory.timestep_spec(), + action_spec=factory.action_spec()) if __name__ == '__main__': diff --git a/meltingpot/python/configs/bots/__init__.py b/meltingpot/python/configs/bots/__init__.py index aa704f54..a3bbbfbb 100644 --- a/meltingpot/python/configs/bots/__init__.py +++ b/meltingpot/python/configs/bots/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2020 DeepMind Technologies Limited. +# Copyright 2022 DeepMind Technologies Limited. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -16,22 +16,407 @@ import dataclasses import functools import os -from typing import AbstractSet, Callable, Mapping, Optional +import re +from typing import AbstractSet, Callable, Iterable, Literal, Mapping, Optional, Sequence import immutabledict +from meltingpot.python.utils.puppeteers import alternator from meltingpot.python.utils.puppeteers import clean_up +from meltingpot.python.utils.puppeteers import coins +from meltingpot.python.utils.puppeteers import coordination_in_the_matrix +from meltingpot.python.utils.puppeteers import fixed_goal +from meltingpot.python.utils.puppeteers import gift_refinements from meltingpot.python.utils.puppeteers import in_the_matrix from meltingpot.python.utils.puppeteers import puppeteer +from meltingpot.python.utils.puppeteers import running_with_scissors_in_the_matrix +MODELS_ROOT = re.sub('meltingpot/python/.*', 'meltingpot/assets/saved_models', + __file__) -def _find_models_root() -> str: - import re # pylint: disable=g-import-not-at-top - return re.sub('meltingpot/python/.*', 'meltingpot/assets/saved_models', - __file__) +# pylint: disable=line-too-long +# Ordered puppet goals must match the order used in bot training. +_PUPPET_GOALS = immutabledict.immutabledict( + # keep-sorted start numeric=yes block=yes + bach_or_stravinsky_in_the_matrix__arena=puppeteer.puppet_goals([ + 'COLLECT_BACH', + 'COLLECT_STRAVINSKY', + 'INTERACT_PLAYING_BACH', + 'INTERACT_PLAYING_STRAVINSKY', + ]), + bach_or_stravinsky_in_the_matrix__repeated=puppeteer.puppet_goals([ + 'COLLECT_BACH', + 'COLLECT_STRAVINSKY', + 'INTERACT_PLAYING_BACH', + 'INTERACT_PLAYING_STRAVINSKY', + ]), + chicken_in_the_matrix__arena=puppeteer.puppet_goals([ + 'COLLECT_DOVE', + 'COLLECT_HAWK', + 'INTERACT_PLAYING_DOVE', + 'INTERACT_PLAYING_HAWK', + ]), + chicken_in_the_matrix__repeated=puppeteer.puppet_goals([ + 'COLLECT_DOVE', + 'COLLECT_HAWK', + 'INTERACT_PLAYING_DOVE', + 'INTERACT_PLAYING_HAWK', + ]), + clean_up=puppeteer.puppet_goals([ + 'EAT', + 'CLEAN', + ]), + coins=puppeteer.puppet_goals([ + 'COOPERATE', + 'DEFECT', + 'SPITE', + ]), + coop_mining=puppeteer.puppet_goals([ + 'EXTRACT_IRON', + 'MINE_GOLD', + 'EXTRACT_GOLD', + 'EXTRACT_ALL', + ]), + externality_mushrooms__dense=puppeteer.puppet_goals([ + 'COLLECT_MUSHROOM_HIHE', + 'COLLECT_MUSHROOM_FIZE', + 'COLLECT_MUSHROOM_ZIFE', + 'COLLECT_MUSHROOM_NINE', + 'DESTROY_MUSHROOM_HIHE', + 'DESTROY_MUSHROOM_FIZE', + 'DESTROY_MUSHROOM_ZIFE', + ]), + gift_refinements=puppeteer.puppet_goals([ + 'COLLECT_TOKENS', + 'GIFT', + 'CONSUME_SIMPLE_TOKENS', + 'CONSUME_TOKENS', + 'FORAGE', + ]), + prisoners_dilemma_in_the_matrix__arena=puppeteer.puppet_goals([ + 'COLLECT_COOPERATE', + 'COLLECT_DEFECT', + 'INTERACT_COOPERATE', + 'INTERACT_DEFECT', + ]), + prisoners_dilemma_in_the_matrix__repeated=puppeteer.puppet_goals([ + 'COLLECT_COOPERATE', + 'COLLECT_DEFECT', + 'INTERACT_COOPERATE', + 'INTERACT_DEFECT', + ]), + pure_coordination_in_the_matrix__arena=puppeteer.puppet_goals([ + 'COLLECT_RED', + 'COLLECT_GREEN', + 'COLLECT_BLUE', + 'INTERACT_PLAYING_RED', + 'INTERACT_PLAYING_GREEN', + 'INTERACT_PLAYING_BLUE', + 'COLLECT_RED_IGNORING_OTHER_CONSIDERATIONS', + 'COLLECT_GREEN_IGNORING_OTHER_CONSIDERATIONS', + 'COLLECT_BLUE_IGNORING_OTHER_CONSIDERATIONS', + ]), + pure_coordination_in_the_matrix__repeated=puppeteer.puppet_goals([ + 'COLLECT_RED', + 'COLLECT_GREEN', + 'COLLECT_BLUE', + 'INTERACT_PLAYING_RED', + 'INTERACT_PLAYING_GREEN', + 'INTERACT_PLAYING_BLUE', + 'COLLECT_RED_IGNORING_OTHER_CONSIDERATIONS', + 'COLLECT_GREEN_IGNORING_OTHER_CONSIDERATIONS', + 'COLLECT_BLUE_IGNORING_OTHER_CONSIDERATIONS', + ]), + rationalizable_coordination_in_the_matrix__arena=puppeteer.puppet_goals([ + 'COLLECT_YELLOW', + 'COLLECT_VIOLET', + 'COLLECT_CYAN', + 'INTERACT_PLAYING_YELLOW', + 'INTERACT_PLAYING_VIOLET', + 'INTERACT_PLAYING_CYAN', + 'COLLECT_YELLOW_IGNORING_OTHER_CONSIDERATIONS', + 'COLLECT_VIOLET_IGNORING_OTHER_CONSIDERATIONS', + 'COLLECT_CYAN_IGNORING_OTHER_CONSIDERATIONS', + ]), + rationalizable_coordination_in_the_matrix__repeated=puppeteer.puppet_goals([ + 'COLLECT_YELLOW', + 'COLLECT_VIOLET', + 'COLLECT_CYAN', + 'INTERACT_PLAYING_YELLOW', + 'INTERACT_PLAYING_VIOLET', + 'INTERACT_PLAYING_CYAN', + 'COLLECT_YELLOW_IGNORING_OTHER_CONSIDERATIONS', + 'COLLECT_VIOLET_IGNORING_OTHER_CONSIDERATIONS', + 'COLLECT_CYAN_IGNORING_OTHER_CONSIDERATIONS', + ]), + running_with_scissors_in_the_matrix__arena=puppeteer.puppet_goals([ + 'COLLECT_ROCK', + 'COLLECT_PAPER', + 'COLLECT_SCISSORS', + 'INTERACT_PLAYING_ROCK', + 'INTERACT_PLAYING_PAPER', + 'INTERACT_PLAYING_SCISSORS', + 'COLLECT_ROCK_IGNORING_OTHER_CONSIDERATIONS', + 'COLLECT_PAPER_IGNORING_OTHER_CONSIDERATIONS', + 'COLLECT_SCISSORS_IGNORING_OTHER_CONSIDERATIONS', + ]), + running_with_scissors_in_the_matrix__one_shot=puppeteer.puppet_goals([ + 'COLLECT_ROCK', + 'COLLECT_PAPER', + 'COLLECT_SCISSORS', + 'INTERACT_PLAYING_ROCK', + 'INTERACT_PLAYING_PAPER', + 'INTERACT_PLAYING_SCISSORS', + 'COLLECT_ROCK_IGNORING_OTHER_CONSIDERATIONS', + 'COLLECT_PAPER_IGNORING_OTHER_CONSIDERATIONS', + 'COLLECT_SCISSORS_IGNORING_OTHER_CONSIDERATIONS', + ]), + running_with_scissors_in_the_matrix__repeated=puppeteer.puppet_goals([ + 'COLLECT_ROCK', + 'COLLECT_PAPER', + 'COLLECT_SCISSORS', + 'INTERACT_PLAYING_ROCK', + 'INTERACT_PLAYING_PAPER', + 'INTERACT_PLAYING_SCISSORS', + 'COLLECT_ROCK_IGNORING_OTHER_CONSIDERATIONS', + 'COLLECT_PAPER_IGNORING_OTHER_CONSIDERATIONS', + 'COLLECT_SCISSORS_IGNORING_OTHER_CONSIDERATIONS', + ]), + stag_hunt_in_the_matrix__arena=puppeteer.puppet_goals([ + 'COLLECT_STAG', + 'COLLECT_HARE', + 'INTERACT_PLAYING_STAG', + 'INTERACT_PLAYING_HARE', + ]), + stag_hunt_in_the_matrix__repeated=puppeteer.puppet_goals([ + 'COLLECT_STAG', + 'COLLECT_HARE', + 'INTERACT_PLAYING_STAG', + 'INTERACT_PLAYING_HARE', + ]), + # keep-sorted end +) -MODELS_ROOT = _find_models_root() +_RESOURCES = immutabledict.immutabledict( + # keep-sorted start numeric=yes block=yes + bach_or_stravinsky_in_the_matrix__arena=immutabledict.immutabledict({ + 'BACH': in_the_matrix.Resource( + index=0, + collect_goal=_PUPPET_GOALS['bach_or_stravinsky_in_the_matrix__arena']['COLLECT_BACH'], + interact_goal=_PUPPET_GOALS['bach_or_stravinsky_in_the_matrix__arena']['INTERACT_PLAYING_BACH'], + ), + 'STRAVINSKY': in_the_matrix.Resource( + index=1, + collect_goal=_PUPPET_GOALS['bach_or_stravinsky_in_the_matrix__arena']['COLLECT_STRAVINSKY'], + interact_goal=_PUPPET_GOALS['bach_or_stravinsky_in_the_matrix__arena']['INTERACT_PLAYING_STRAVINSKY'], + ), + }), + bach_or_stravinsky_in_the_matrix__repeated=immutabledict.immutabledict({ + 'BACH': in_the_matrix.Resource( + index=0, + collect_goal=_PUPPET_GOALS['bach_or_stravinsky_in_the_matrix__repeated']['COLLECT_BACH'], + interact_goal=_PUPPET_GOALS['bach_or_stravinsky_in_the_matrix__repeated']['INTERACT_PLAYING_BACH'], + ), + 'STRAVINSKY': in_the_matrix.Resource( + index=1, + collect_goal=_PUPPET_GOALS['bach_or_stravinsky_in_the_matrix__repeated']['COLLECT_STRAVINSKY'], + interact_goal=_PUPPET_GOALS['bach_or_stravinsky_in_the_matrix__repeated']['INTERACT_PLAYING_STRAVINSKY'], + ), + }), + chicken_in_the_matrix__arena=immutabledict.immutabledict({ + 'DOVE': in_the_matrix.Resource( + index=0, + collect_goal=_PUPPET_GOALS['chicken_in_the_matrix__arena']['COLLECT_DOVE'], + interact_goal=_PUPPET_GOALS['chicken_in_the_matrix__arena']['INTERACT_PLAYING_DOVE'], + ), + 'HAWK': in_the_matrix.Resource( + index=1, + collect_goal=_PUPPET_GOALS['chicken_in_the_matrix__arena']['COLLECT_HAWK'], + interact_goal=_PUPPET_GOALS['chicken_in_the_matrix__arena']['INTERACT_PLAYING_HAWK'], + ), + }), + chicken_in_the_matrix__repeated=immutabledict.immutabledict({ + 'DOVE': in_the_matrix.Resource( + index=0, + collect_goal=_PUPPET_GOALS['chicken_in_the_matrix__repeated']['COLLECT_DOVE'], + interact_goal=_PUPPET_GOALS['chicken_in_the_matrix__repeated']['INTERACT_PLAYING_DOVE'], + ), + 'HAWK': in_the_matrix.Resource( + index=1, + collect_goal=_PUPPET_GOALS['chicken_in_the_matrix__repeated']['COLLECT_HAWK'], + interact_goal=_PUPPET_GOALS['chicken_in_the_matrix__repeated']['INTERACT_PLAYING_HAWK'], + ), + }), + prisoners_dilemma_in_the_matrix__arena=immutabledict.immutabledict({ + 'COOPERATE': in_the_matrix.Resource( + index=0, + collect_goal=_PUPPET_GOALS['prisoners_dilemma_in_the_matrix__arena']['COLLECT_COOPERATE'], + interact_goal=_PUPPET_GOALS['prisoners_dilemma_in_the_matrix__arena']['INTERACT_COOPERATE'], + ), + 'DEFECT': in_the_matrix.Resource( + index=1, + collect_goal=_PUPPET_GOALS['prisoners_dilemma_in_the_matrix__arena']['COLLECT_DEFECT'], + interact_goal=_PUPPET_GOALS['prisoners_dilemma_in_the_matrix__arena']['INTERACT_DEFECT'], + ), + }), + prisoners_dilemma_in_the_matrix__repeated=immutabledict.immutabledict({ + 'COOPERATE': in_the_matrix.Resource( + index=0, + collect_goal=_PUPPET_GOALS['prisoners_dilemma_in_the_matrix__repeated']['COLLECT_COOPERATE'], + interact_goal=_PUPPET_GOALS['prisoners_dilemma_in_the_matrix__repeated']['INTERACT_COOPERATE'], + ), + 'DEFECT': in_the_matrix.Resource( + index=1, + collect_goal=_PUPPET_GOALS['prisoners_dilemma_in_the_matrix__repeated']['COLLECT_DEFECT'], + interact_goal=_PUPPET_GOALS['prisoners_dilemma_in_the_matrix__repeated']['INTERACT_DEFECT'], + ), + }), + pure_coordination_in_the_matrix__arena=immutabledict.immutabledict({ + 'RED': in_the_matrix.Resource( + index=0, + collect_goal=_PUPPET_GOALS['pure_coordination_in_the_matrix__arena']['COLLECT_RED'], + interact_goal=_PUPPET_GOALS['pure_coordination_in_the_matrix__arena']['INTERACT_PLAYING_RED'], + ), + 'GREEN': in_the_matrix.Resource( + index=1, + collect_goal=_PUPPET_GOALS['pure_coordination_in_the_matrix__arena']['COLLECT_GREEN'], + interact_goal=_PUPPET_GOALS['pure_coordination_in_the_matrix__arena']['INTERACT_PLAYING_GREEN'], + ), + 'BLUE': in_the_matrix.Resource( + index=2, + collect_goal=_PUPPET_GOALS['pure_coordination_in_the_matrix__arena']['COLLECT_BLUE'], + interact_goal=_PUPPET_GOALS['pure_coordination_in_the_matrix__arena']['INTERACT_PLAYING_BLUE'], + ), + }), + pure_coordination_in_the_matrix__repeated=immutabledict.immutabledict({ + 'RED': in_the_matrix.Resource( + index=0, + collect_goal=_PUPPET_GOALS['pure_coordination_in_the_matrix__repeated']['COLLECT_RED'], + interact_goal=_PUPPET_GOALS['pure_coordination_in_the_matrix__repeated']['INTERACT_PLAYING_RED'], + ), + 'GREEN': in_the_matrix.Resource( + index=1, + collect_goal=_PUPPET_GOALS['pure_coordination_in_the_matrix__repeated']['COLLECT_GREEN'], + interact_goal=_PUPPET_GOALS['pure_coordination_in_the_matrix__repeated']['INTERACT_PLAYING_GREEN'], + ), + 'BLUE': in_the_matrix.Resource( + index=2, + collect_goal=_PUPPET_GOALS['pure_coordination_in_the_matrix__repeated']['COLLECT_BLUE'], + interact_goal=_PUPPET_GOALS['pure_coordination_in_the_matrix__repeated']['INTERACT_PLAYING_BLUE'], + ), + }), + rationalizable_coordination_in_the_matrix__arena=immutabledict.immutabledict({ + 'YELLOW': in_the_matrix.Resource( + index=0, + collect_goal=_PUPPET_GOALS['rationalizable_coordination_in_the_matrix__arena']['COLLECT_YELLOW'], + interact_goal=_PUPPET_GOALS['rationalizable_coordination_in_the_matrix__arena']['INTERACT_PLAYING_YELLOW'], + ), + 'VIOLET': in_the_matrix.Resource( + index=1, + collect_goal=_PUPPET_GOALS['rationalizable_coordination_in_the_matrix__arena']['COLLECT_VIOLET'], + interact_goal=_PUPPET_GOALS['rationalizable_coordination_in_the_matrix__arena']['INTERACT_PLAYING_VIOLET'], + ), + 'CYAN': in_the_matrix.Resource( + index=2, + collect_goal=_PUPPET_GOALS['rationalizable_coordination_in_the_matrix__arena']['COLLECT_CYAN'], + interact_goal=_PUPPET_GOALS['rationalizable_coordination_in_the_matrix__arena']['INTERACT_PLAYING_CYAN'], + ), + }), + rationalizable_coordination_in_the_matrix__repeated=immutabledict.immutabledict({ + 'YELLOW': in_the_matrix.Resource( + index=0, + collect_goal=_PUPPET_GOALS['rationalizable_coordination_in_the_matrix__repeated']['COLLECT_YELLOW'], + interact_goal=_PUPPET_GOALS['rationalizable_coordination_in_the_matrix__repeated']['INTERACT_PLAYING_YELLOW'], + ), + 'VIOLET': in_the_matrix.Resource( + index=1, + collect_goal=_PUPPET_GOALS['rationalizable_coordination_in_the_matrix__repeated']['COLLECT_VIOLET'], + interact_goal=_PUPPET_GOALS['rationalizable_coordination_in_the_matrix__repeated']['INTERACT_PLAYING_VIOLET'], + ), + 'CYAN': in_the_matrix.Resource( + index=2, + collect_goal=_PUPPET_GOALS['rationalizable_coordination_in_the_matrix__repeated']['COLLECT_CYAN'], + interact_goal=_PUPPET_GOALS['rationalizable_coordination_in_the_matrix__repeated']['INTERACT_PLAYING_CYAN'], + ), + }), + running_with_scissors_in_the_matrix__arena=immutabledict.immutabledict({ + 'ROCK': in_the_matrix.Resource( + index=0, + collect_goal=_PUPPET_GOALS['running_with_scissors_in_the_matrix__arena']['COLLECT_ROCK'], + interact_goal=_PUPPET_GOALS['running_with_scissors_in_the_matrix__arena']['INTERACT_PLAYING_ROCK'], + ), + 'PAPER': in_the_matrix.Resource( + index=1, + collect_goal=_PUPPET_GOALS['running_with_scissors_in_the_matrix__arena']['COLLECT_PAPER'], + interact_goal=_PUPPET_GOALS['running_with_scissors_in_the_matrix__arena']['INTERACT_PLAYING_PAPER'], + ), + 'SCISSORS': in_the_matrix.Resource( + index=2, + collect_goal=_PUPPET_GOALS['running_with_scissors_in_the_matrix__arena']['COLLECT_SCISSORS'], + interact_goal=_PUPPET_GOALS['running_with_scissors_in_the_matrix__arena']['INTERACT_PLAYING_SCISSORS'], + ), + }), + running_with_scissors_in_the_matrix__one_shot=immutabledict.immutabledict({ + 'ROCK': in_the_matrix.Resource( + index=0, + collect_goal=_PUPPET_GOALS['running_with_scissors_in_the_matrix__one_shot']['COLLECT_ROCK'], + interact_goal=_PUPPET_GOALS['running_with_scissors_in_the_matrix__one_shot']['INTERACT_PLAYING_ROCK'], + ), + 'PAPER': in_the_matrix.Resource( + index=1, + collect_goal=_PUPPET_GOALS['running_with_scissors_in_the_matrix__one_shot']['COLLECT_PAPER'], + interact_goal=_PUPPET_GOALS['running_with_scissors_in_the_matrix__one_shot']['INTERACT_PLAYING_PAPER'], + ), + 'SCISSORS': in_the_matrix.Resource( + index=2, + collect_goal=_PUPPET_GOALS['running_with_scissors_in_the_matrix__one_shot']['COLLECT_SCISSORS'], + interact_goal=_PUPPET_GOALS['running_with_scissors_in_the_matrix__one_shot']['INTERACT_PLAYING_SCISSORS'], + ), + }), + running_with_scissors_in_the_matrix__repeated=immutabledict.immutabledict({ + 'ROCK': in_the_matrix.Resource( + index=0, + collect_goal=_PUPPET_GOALS['running_with_scissors_in_the_matrix__repeated']['COLLECT_ROCK'], + interact_goal=_PUPPET_GOALS['running_with_scissors_in_the_matrix__repeated']['INTERACT_PLAYING_ROCK'], + ), + 'PAPER': in_the_matrix.Resource( + index=1, + collect_goal=_PUPPET_GOALS['running_with_scissors_in_the_matrix__repeated']['COLLECT_PAPER'], + interact_goal=_PUPPET_GOALS['running_with_scissors_in_the_matrix__repeated']['INTERACT_PLAYING_PAPER'], + ), + 'SCISSORS': in_the_matrix.Resource( + index=2, + collect_goal=_PUPPET_GOALS['running_with_scissors_in_the_matrix__repeated']['COLLECT_SCISSORS'], + interact_goal=_PUPPET_GOALS['running_with_scissors_in_the_matrix__repeated']['INTERACT_PLAYING_SCISSORS'], + ), + }), + stag_hunt_in_the_matrix__arena=immutabledict.immutabledict({ + 'STAG': in_the_matrix.Resource( + index=0, + collect_goal=_PUPPET_GOALS['stag_hunt_in_the_matrix__arena']['COLLECT_STAG'], + interact_goal=_PUPPET_GOALS['stag_hunt_in_the_matrix__arena']['INTERACT_PLAYING_STAG'], + ), + 'HARE': in_the_matrix.Resource( + index=1, + collect_goal=_PUPPET_GOALS['stag_hunt_in_the_matrix__arena']['COLLECT_HARE'], + interact_goal=_PUPPET_GOALS['stag_hunt_in_the_matrix__arena']['INTERACT_PLAYING_HARE'], + ), + }), + stag_hunt_in_the_matrix__repeated=immutabledict.immutabledict({ + 'STAG': in_the_matrix.Resource( + index=0, + collect_goal=_PUPPET_GOALS['stag_hunt_in_the_matrix__repeated']['COLLECT_STAG'], + interact_goal=_PUPPET_GOALS['stag_hunt_in_the_matrix__repeated']['INTERACT_PLAYING_STAG'], + ), + 'HARE': in_the_matrix.Resource( + index=1, + collect_goal=_PUPPET_GOALS['stag_hunt_in_the_matrix__repeated']['COLLECT_HARE'], + interact_goal=_PUPPET_GOALS['stag_hunt_in_the_matrix__repeated']['INTERACT_PLAYING_HARE'], + ), + }), + # keep-sorted end +) @dataclasses.dataclass(frozen=True) @@ -40,10 +425,10 @@ class BotConfig: Attributes: substrate: the substrate the bot was trained for. - roles: the roles the bot supports. + roles: the roles the bot was trained for. model_path: the path to the bot's saved model. - puppeteer_builder: an optional function that returns the puppeteer - used to control the bot. + model_version: whether the bot is a "1.0" bot or a new "1.1" bot. + puppeteer_builder: returns the puppeteer used to control the bot. """ substrate: str roles: AbstractSet[str] @@ -54,669 +439,2830 @@ def __post_init__(self): object.__setattr__(self, 'roles', frozenset(self.roles)) -def _saved_model(substrate: str, - model: str, - models_root: str = MODELS_ROOT) -> BotConfig: +def saved_model(*, + substrate: str, + roles: Iterable[str] = ('default',), + model: str, + models_root: str = MODELS_ROOT) -> BotConfig: """Returns the config for a saved model bot. Args: substrate: the substrate on which the bot was trained. + roles: the roles the bot was trained for. model: the name of the model. models_root: The path to the directory containing the saved_models. """ model_path = os.path.join(models_root, substrate, model) return BotConfig( substrate=substrate, - roles={'default'}, + roles=frozenset(roles), model_path=model_path, puppeteer_builder=None) -def _puppet(substrate: str, - puppeteer_builder: Callable[[], puppeteer.Puppeteer], - models_root: str = MODELS_ROOT) -> BotConfig: +def puppet(*, + substrate: str, + roles: Iterable[str] = ('default',), + model: str, + puppeteer_builder: Callable[[], puppeteer.Puppeteer], + models_root: str = MODELS_ROOT) -> BotConfig: """Returns the config for a puppet bot. Args: substrate: the substrate on which the bot was trained. - puppeteer_builder: returns the puppeteer that controls the puppet. - models_root: The path to the directory containing the saved_models. + roles: the roles the bot was trained for. + model: the name of the model. + puppeteer_builder: returns the puppeteer used to control the bot. + models_root: the path to the directory containing the saved_models. """ - puppet_path = os.path.join(models_root, substrate, 'puppet') + puppet_path = os.path.join(models_root, substrate, model) return BotConfig( substrate=substrate, - roles={'default'}, + roles=frozenset(roles), model_path=puppet_path, puppeteer_builder=puppeteer_builder) BOT_CONFIGS: Mapping[str, BotConfig] = immutabledict.immutabledict( # keep-sorted start numeric=yes block=yes - ah3gs_bot_finding_berry_two_the_most_tasty_0=_saved_model( - substrate='allelopathic_harvest', - model='ah3gs_bot_finding_berry_two_the_most_tasty_0', - ), - ah3gs_bot_finding_berry_two_the_most_tasty_1=_saved_model( - substrate='allelopathic_harvest', - model='ah3gs_bot_finding_berry_two_the_most_tasty_1', - ), - ah3gs_bot_finding_berry_two_the_most_tasty_4=_saved_model( - substrate='allelopathic_harvest', - model='ah3gs_bot_finding_berry_two_the_most_tasty_4', - ), - ah3gs_bot_finding_berry_two_the_most_tasty_5=_saved_model( - substrate='allelopathic_harvest', - model='ah3gs_bot_finding_berry_two_the_most_tasty_5', - ), - arena_rws_free_0=_saved_model( - substrate='arena_running_with_scissors_in_the_matrix', - model='arena_rws_free_0', - ), - arena_rws_free_1=_saved_model( - substrate='arena_running_with_scissors_in_the_matrix', - model='arena_rws_free_1', - ), - arena_rws_free_2=_saved_model( - substrate='arena_running_with_scissors_in_the_matrix', - model='arena_rws_free_2', - ), - arena_rws_pure_paper_0=_saved_model( - substrate='arena_running_with_scissors_in_the_matrix', - model='arena_rws_pure_paper_0', - ), - arena_rws_pure_paper_1=_saved_model( - substrate='arena_running_with_scissors_in_the_matrix', - model='arena_rws_pure_paper_1', - ), - arena_rws_pure_paper_2=_saved_model( - substrate='arena_running_with_scissors_in_the_matrix', - model='arena_rws_pure_paper_2', - ), - arena_rws_pure_paper_3=_saved_model( - substrate='arena_running_with_scissors_in_the_matrix', - model='arena_rws_pure_paper_3', - ), - arena_rws_pure_rock_0=_saved_model( - substrate='arena_running_with_scissors_in_the_matrix', - model='arena_rws_pure_rock_0', - ), - arena_rws_pure_rock_1=_saved_model( - substrate='arena_running_with_scissors_in_the_matrix', - model='arena_rws_pure_rock_1', - ), - arena_rws_pure_rock_2=_saved_model( - substrate='arena_running_with_scissors_in_the_matrix', - model='arena_rws_pure_rock_2', - ), - arena_rws_pure_rock_3=_saved_model( - substrate='arena_running_with_scissors_in_the_matrix', - model='arena_rws_pure_rock_3', - ), - arena_rws_pure_scissors_0=_saved_model( - substrate='arena_running_with_scissors_in_the_matrix', - model='arena_rws_pure_scissors_0', - ), - arena_rws_pure_scissors_1=_saved_model( - substrate='arena_running_with_scissors_in_the_matrix', - model='arena_rws_pure_scissors_1', - ), - arena_rws_pure_scissors_2=_saved_model( - substrate='arena_running_with_scissors_in_the_matrix', - model='arena_rws_pure_scissors_2', - ), - arena_rws_pure_scissors_3=_saved_model( - substrate='arena_running_with_scissors_in_the_matrix', - model='arena_rws_pure_scissors_3', - ), - bach_fan_0=_saved_model( - substrate='bach_or_stravinsky_in_the_matrix', - model='bach_fan_0', - ), - bach_fan_1=_saved_model( - substrate='bach_or_stravinsky_in_the_matrix', - model='bach_fan_1', - ), - bach_fan_2=_saved_model( - substrate='bach_or_stravinsky_in_the_matrix', - model='bach_fan_2', - ), - chemistry_branched_chain_reaction_X_specialist_0=_saved_model( - substrate='chemistry_branched_chain_reaction', - model='chemistry_branched_chain_reaction_X_specialist_0', - ), - chemistry_branched_chain_reaction_X_specialist_1=_saved_model( - substrate='chemistry_branched_chain_reaction', - model='chemistry_branched_chain_reaction_X_specialist_1', - ), - chemistry_branched_chain_reaction_X_specialist_2=_saved_model( - substrate='chemistry_branched_chain_reaction', - model='chemistry_branched_chain_reaction_X_specialist_2', - ), - chemistry_branched_chain_reaction_Y_specialist_0=_saved_model( - substrate='chemistry_branched_chain_reaction', - model='chemistry_branched_chain_reaction_Y_specialist_0', - ), - chemistry_branched_chain_reaction_Y_specialist_1=_saved_model( - substrate='chemistry_branched_chain_reaction', - model='chemistry_branched_chain_reaction_Y_specialist_1', - ), - chemistry_branched_chain_reaction_Y_specialist_2=_saved_model( - substrate='chemistry_branched_chain_reaction', - model='chemistry_branched_chain_reaction_Y_specialist_2', - ), - chemistry_metabolic_cycles_food1_specialist_0=_saved_model( - substrate='chemistry_metabolic_cycles', - model='chemistry_metabolic_cycles_food1_specialist_0', - ), - chemistry_metabolic_cycles_food1_specialist_1=_saved_model( - substrate='chemistry_metabolic_cycles', - model='chemistry_metabolic_cycles_food1_specialist_1', - ), - chemistry_metabolic_cycles_food2_specialist_0=_saved_model( - substrate='chemistry_metabolic_cycles', - model='chemistry_metabolic_cycles_food2_specialist_0', - ), - chemistry_metabolic_cycles_food2_specialist_1=_saved_model( - substrate='chemistry_metabolic_cycles', - model='chemistry_metabolic_cycles_food2_specialist_1', - ), - chicken_free_0=_saved_model( - substrate='chicken_in_the_matrix', - model='chicken_free_0', - ), - chicken_free_1=_saved_model( - substrate='chicken_in_the_matrix', - model='chicken_free_1', - ), - chicken_free_2=_saved_model( - substrate='chicken_in_the_matrix', - model='chicken_free_2', - ), - chicken_free_3=_saved_model( - substrate='chicken_in_the_matrix', - model='chicken_free_3', - ), - chicken_puppet_grim=_puppet( - substrate='chicken_in_the_matrix', + allelopathic_harvest__open__bot_that_supports_green_0=saved_model( + substrate='allelopathic_harvest__open', + model='bot_that_loves_green_0', + roles=('default', 'player_who_likes_red', 'player_who_likes_green',), + ), + allelopathic_harvest__open__bot_that_supports_green_1=saved_model( + substrate='allelopathic_harvest__open', + model='bot_that_loves_green_1', + roles=('default', 'player_who_likes_red', 'player_who_likes_green',), + ), + allelopathic_harvest__open__bot_that_supports_green_2=saved_model( + substrate='allelopathic_harvest__open', + model='bot_that_loves_green_2', + roles=('default', 'player_who_likes_red', 'player_who_likes_green',), + ), + allelopathic_harvest__open__bot_that_supports_green_3=saved_model( + substrate='allelopathic_harvest__open', + model='bot_that_loves_green_3', + roles=('default', 'player_who_likes_red', 'player_who_likes_green',), + ), + allelopathic_harvest__open__bot_that_supports_red_0=saved_model( + substrate='allelopathic_harvest__open', + model='bot_that_loves_red_0', + roles=('default', 'player_who_likes_red', 'player_who_likes_green',), + ), + allelopathic_harvest__open__bot_that_supports_red_1=saved_model( + substrate='allelopathic_harvest__open', + model='bot_that_loves_red_1', + roles=('default', 'player_who_likes_red', 'player_who_likes_green',), + ), + allelopathic_harvest__open__bot_that_supports_red_2=saved_model( + substrate='allelopathic_harvest__open', + model='bot_that_loves_red_2', + roles=('default', 'player_who_likes_red', 'player_who_likes_green',), + ), + allelopathic_harvest__open__bot_that_supports_red_3=saved_model( + substrate='allelopathic_harvest__open', + model='bot_that_loves_red_3', + roles=('default', 'player_who_likes_red', 'player_who_likes_green',), + ), + bach_or_stravinsky_in_the_matrix__arena__bach_picker_0=puppet( + substrate='bach_or_stravinsky_in_the_matrix__arena', + model='puppet_0', + roles=('default', 'bach_fan', 'stravinsky_fan',), puppeteer_builder=functools.partial( - in_the_matrix.GrimTwoResource, threshold=2), - ), - chicken_pure_dove_0=_saved_model( - substrate='chicken_in_the_matrix', - model='chicken_pure_dove_0', - ), - chicken_pure_dove_1=_saved_model( - substrate='chicken_in_the_matrix', - model='chicken_pure_dove_1', - ), - chicken_pure_dove_2=_saved_model( - substrate='chicken_in_the_matrix', - model='chicken_pure_dove_2', - ), - chicken_pure_dove_3=_saved_model( - substrate='chicken_in_the_matrix', - model='chicken_pure_dove_3', - ), - chicken_pure_hawk_0=_saved_model( - substrate='chicken_in_the_matrix', - model='chicken_pure_hawk_0', - ), - chicken_pure_hawk_1=_saved_model( - substrate='chicken_in_the_matrix', - model='chicken_pure_hawk_1', - ), - chicken_pure_hawk_2=_saved_model( - substrate='chicken_in_the_matrix', - model='chicken_pure_hawk_2', - ), - chicken_pure_hawk_3=_saved_model( - substrate='chicken_in_the_matrix', - model='chicken_pure_hawk_3', - ), - classic_rws_free_0=_saved_model( - substrate='running_with_scissors_in_the_matrix', - model='classic_rws_free_0', - ), - classic_rws_free_1=_saved_model( - substrate='running_with_scissors_in_the_matrix', - model='classic_rws_free_1', - ), - classic_rws_free_2=_saved_model( - substrate='running_with_scissors_in_the_matrix', - model='classic_rws_free_2', - ), - classic_rws_pure_paper_0=_saved_model( - substrate='running_with_scissors_in_the_matrix', - model='classic_rws_pure_paper_0', - ), - classic_rws_pure_paper_1=_saved_model( - substrate='running_with_scissors_in_the_matrix', - model='classic_rws_pure_paper_1', - ), - classic_rws_pure_paper_2=_saved_model( - substrate='running_with_scissors_in_the_matrix', - model='classic_rws_pure_paper_2', - ), - classic_rws_pure_paper_3=_saved_model( - substrate='running_with_scissors_in_the_matrix', - model='classic_rws_pure_paper_3', - ), - classic_rws_pure_rock_0=_saved_model( - substrate='running_with_scissors_in_the_matrix', - model='classic_rws_pure_rock_0', - ), - classic_rws_pure_rock_1=_saved_model( - substrate='running_with_scissors_in_the_matrix', - model='classic_rws_pure_rock_1', - ), - classic_rws_pure_rock_2=_saved_model( - substrate='running_with_scissors_in_the_matrix', - model='classic_rws_pure_rock_2', - ), - classic_rws_pure_rock_3=_saved_model( - substrate='running_with_scissors_in_the_matrix', - model='classic_rws_pure_rock_3', - ), - classic_rws_pure_scissors_0=_saved_model( - substrate='running_with_scissors_in_the_matrix', - model='classic_rws_pure_scissors_0', - ), - classic_rws_pure_scissors_1=_saved_model( - substrate='running_with_scissors_in_the_matrix', - model='classic_rws_pure_scissors_1', - ), - classic_rws_pure_scissors_2=_saved_model( - substrate='running_with_scissors_in_the_matrix', - model='classic_rws_pure_scissors_2', - ), - classic_rws_pure_scissors_3=_saved_model( - substrate='running_with_scissors_in_the_matrix', - model='classic_rws_pure_scissors_3', - ), - cleanup_cleaner_1=_saved_model( + in_the_matrix.Specialist, + target=_RESOURCES['bach_or_stravinsky_in_the_matrix__arena']['BACH'], + margin=3, + ), + ), + bach_or_stravinsky_in_the_matrix__arena__stravinsky_picker_0=puppet( + substrate='bach_or_stravinsky_in_the_matrix__arena', + model='puppet_0', + roles=('default', 'bach_fan', 'stravinsky_fan',), + puppeteer_builder=functools.partial( + in_the_matrix.Specialist, + target=_RESOURCES['bach_or_stravinsky_in_the_matrix__arena']['STRAVINSKY'], + margin=3, + ), + ), + bach_or_stravinsky_in_the_matrix__arena__turn_taking_initial_bach_0=puppet( + substrate='bach_or_stravinsky_in_the_matrix__arena', + model='puppet_0', + roles=('default', 'bach_fan', 'stravinsky_fan',), + puppeteer_builder=functools.partial( + in_the_matrix.AlternatingSpecialist, + targets=[ + _RESOURCES['bach_or_stravinsky_in_the_matrix__arena']['BACH'], + _RESOURCES['bach_or_stravinsky_in_the_matrix__arena']['STRAVINSKY'], + ], + interactions_per_target=2, + margin=2, + ), + ), + bach_or_stravinsky_in_the_matrix__arena__turn_taking_initial_stravinsky_0=puppet( + substrate='bach_or_stravinsky_in_the_matrix__arena', + model='puppet_0', + roles=('default', 'bach_fan', 'stravinsky_fan',), + puppeteer_builder=functools.partial( + in_the_matrix.AlternatingSpecialist, + targets=[ + _RESOURCES['bach_or_stravinsky_in_the_matrix__arena']['STRAVINSKY'], + _RESOURCES['bach_or_stravinsky_in_the_matrix__arena']['BACH'], + ], + interactions_per_target=2, + margin=2, + ), + ), + bach_or_stravinsky_in_the_matrix__repeated__bach_picker_0=puppet( + substrate='bach_or_stravinsky_in_the_matrix__repeated', + model='puppet_0', + roles=('default', 'bach_fan', 'stravinsky_fan',), + puppeteer_builder=functools.partial( + in_the_matrix.Specialist, + target=_RESOURCES['bach_or_stravinsky_in_the_matrix__repeated']['BACH'], + margin=5, + ), + ), + bach_or_stravinsky_in_the_matrix__repeated__bach_tft_0=puppet( + substrate='bach_or_stravinsky_in_the_matrix__repeated', + model='puppet_0', + roles=('default', 'bach_fan', 'stravinsky_fan',), + puppeteer_builder=functools.partial( + in_the_matrix.TitForTat, + cooperate_resource=_RESOURCES['bach_or_stravinsky_in_the_matrix__repeated']['BACH'], + defect_resource=_RESOURCES['bach_or_stravinsky_in_the_matrix__repeated']['STRAVINSKY'], + tremble_probability=0, + margin=5, + ), + ), + bach_or_stravinsky_in_the_matrix__repeated__bach_tft_tremble_0=puppet( + substrate='bach_or_stravinsky_in_the_matrix__repeated', + model='puppet_0', + roles=('default', 'bach_fan', 'stravinsky_fan',), + puppeteer_builder=functools.partial( + in_the_matrix.TitForTat, + cooperate_resource=_RESOURCES['bach_or_stravinsky_in_the_matrix__repeated']['BACH'], + defect_resource=_RESOURCES['bach_or_stravinsky_in_the_matrix__repeated']['STRAVINSKY'], + tremble_probability=0.25, + margin=5, + ), + ), + bach_or_stravinsky_in_the_matrix__repeated__stravinsky_picker_0=puppet( + substrate='bach_or_stravinsky_in_the_matrix__repeated', + model='puppet_0', + roles=('default', 'bach_fan', 'stravinsky_fan',), + puppeteer_builder=functools.partial( + in_the_matrix.Specialist, + target=_RESOURCES['bach_or_stravinsky_in_the_matrix__repeated']['STRAVINSKY'], + margin=5, + ), + ), + bach_or_stravinsky_in_the_matrix__repeated__stravinsky_tft_0=puppet( + substrate='bach_or_stravinsky_in_the_matrix__repeated', + model='puppet_0', + roles=('default', 'bach_fan', 'stravinsky_fan',), + puppeteer_builder=functools.partial( + in_the_matrix.TitForTat, + cooperate_resource=_RESOURCES['bach_or_stravinsky_in_the_matrix__repeated']['STRAVINSKY'], + defect_resource=_RESOURCES['bach_or_stravinsky_in_the_matrix__repeated']['BACH'], + tremble_probability=0, + margin=5, + ), + ), + bach_or_stravinsky_in_the_matrix__repeated__stravinsky_tft_tremble_0=puppet( + substrate='bach_or_stravinsky_in_the_matrix__repeated', + model='puppet_0', + roles=('default', 'bach_fan', 'stravinsky_fan',), + puppeteer_builder=functools.partial( + in_the_matrix.TitForTat, + cooperate_resource=_RESOURCES['bach_or_stravinsky_in_the_matrix__repeated']['STRAVINSKY'], + defect_resource=_RESOURCES['bach_or_stravinsky_in_the_matrix__repeated']['BACH'], + tremble_probability=0.25, + margin=5, + ), + ), + bach_or_stravinsky_in_the_matrix__repeated__turn_taking_initial_bach_0=puppet( + substrate='bach_or_stravinsky_in_the_matrix__repeated', + model='puppet_0', + roles=('default', 'bach_fan', 'stravinsky_fan',), + puppeteer_builder=functools.partial( + in_the_matrix.AlternatingSpecialist, + targets=[ + _RESOURCES['bach_or_stravinsky_in_the_matrix__repeated']['BACH'], + _RESOURCES['bach_or_stravinsky_in_the_matrix__repeated']['STRAVINSKY'], + ], + interactions_per_target=1, + margin=5, + ), + ), + bach_or_stravinsky_in_the_matrix__repeated__turn_taking_initial_bach_1=puppet( + substrate='bach_or_stravinsky_in_the_matrix__repeated', + model='puppet_0', + roles=('default', 'bach_fan', 'stravinsky_fan',), + puppeteer_builder=functools.partial( + in_the_matrix.AlternatingSpecialist, + targets=[ + _RESOURCES['bach_or_stravinsky_in_the_matrix__repeated']['BACH'], + _RESOURCES['bach_or_stravinsky_in_the_matrix__repeated']['STRAVINSKY'], + ], + interactions_per_target=3, + margin=5, + ), + ), + bach_or_stravinsky_in_the_matrix__repeated__turn_taking_initial_stravinsky_0=puppet( + substrate='bach_or_stravinsky_in_the_matrix__repeated', + model='puppet_0', + roles=('default', 'bach_fan', 'stravinsky_fan',), + puppeteer_builder=functools.partial( + in_the_matrix.AlternatingSpecialist, + targets=[ + _RESOURCES['bach_or_stravinsky_in_the_matrix__repeated']['STRAVINSKY'], + _RESOURCES['bach_or_stravinsky_in_the_matrix__repeated']['BACH'], + ], + interactions_per_target=1, + margin=5, + ), + ), + bach_or_stravinsky_in_the_matrix__repeated__turn_taking_initial_stravinsky_1=puppet( + substrate='bach_or_stravinsky_in_the_matrix__repeated', + model='puppet_0', + roles=('default', 'bach_fan', 'stravinsky_fan',), + puppeteer_builder=functools.partial( + in_the_matrix.AlternatingSpecialist, + targets=[ + _RESOURCES['bach_or_stravinsky_in_the_matrix__repeated']['STRAVINSKY'], + _RESOURCES['bach_or_stravinsky_in_the_matrix__repeated']['BACH'], + ], + interactions_per_target=3, + margin=5, + ), + ), + boat_race__eight_races__cooperator_0=saved_model( + substrate='boat_race__eight_races', + model='cooperator_0', + roles=('default', 'target'), + ), + boat_race__eight_races__defector_0=saved_model( + substrate='boat_race__eight_races', + model='defector_0', + roles=('default',), + ), + chemistry__three_metabolic_cycles__blue_0=saved_model( + substrate='chemistry__three_metabolic_cycles', + model='blue_0', + ), + chemistry__three_metabolic_cycles__green_0=saved_model( + substrate='chemistry__three_metabolic_cycles', + model='green_0', + ), + chemistry__three_metabolic_cycles__yellow_0=saved_model( + substrate='chemistry__three_metabolic_cycles', + model='yellow_0', + ), + chemistry__three_metabolic_cycles_with_plentiful_distractors__blue_0=saved_model( + substrate='chemistry__three_metabolic_cycles_with_plentiful_distractors', + model='blue_0', + ), + chemistry__three_metabolic_cycles_with_plentiful_distractors__green_0=saved_model( + substrate='chemistry__three_metabolic_cycles_with_plentiful_distractors', + model='green_0', + ), + chemistry__three_metabolic_cycles_with_plentiful_distractors__yellow_0=saved_model( + substrate='chemistry__three_metabolic_cycles_with_plentiful_distractors', + model='yellow_0', + ), + chemistry__two_metabolic_cycles__blue_0=saved_model( + substrate='chemistry__two_metabolic_cycles', + model='blue_0', + ), + chemistry__two_metabolic_cycles__green_0=saved_model( + substrate='chemistry__two_metabolic_cycles', + model='green_0', + ), + chemistry__two_metabolic_cycles_with_distractors__blue_0=saved_model( + substrate='chemistry__two_metabolic_cycles_with_distractors', + model='blue_0', + ), + chemistry__two_metabolic_cycles_with_distractors__green_0=saved_model( + substrate='chemistry__two_metabolic_cycles_with_distractors', + model='green_0', + ), + chicken_in_the_matrix__arena__puppet_dove_0=puppet( + substrate='chicken_in_the_matrix__arena', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.Specialist, + target=_RESOURCES['chicken_in_the_matrix__arena']['DOVE'], + margin=1, + ), + ), + chicken_in_the_matrix__arena__puppet_dove_margin_0=puppet( + substrate='chicken_in_the_matrix__arena', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.Specialist, + target=_RESOURCES['chicken_in_the_matrix__arena']['DOVE'], + margin=5, + ), + ), + chicken_in_the_matrix__arena__puppet_grim_one_strike_0=puppet( + substrate='chicken_in_the_matrix__arena', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.GrimTrigger, + cooperate_resource=_RESOURCES['chicken_in_the_matrix__arena']['DOVE'], + defect_resource=_RESOURCES['chicken_in_the_matrix__arena']['HAWK'], + threshold=1, + margin=1, + ), + ), + chicken_in_the_matrix__arena__puppet_grim_one_strike_margin_0=puppet( + substrate='chicken_in_the_matrix__arena', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.GrimTrigger, + cooperate_resource=_RESOURCES['chicken_in_the_matrix__arena']['DOVE'], + defect_resource=_RESOURCES['chicken_in_the_matrix__arena']['HAWK'], + threshold=1, + margin=5, + ), + ), + chicken_in_the_matrix__arena__puppet_grim_three_strikes_0=puppet( + substrate='chicken_in_the_matrix__arena', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.GrimTrigger, + cooperate_resource=_RESOURCES['chicken_in_the_matrix__arena']['DOVE'], + defect_resource=_RESOURCES['chicken_in_the_matrix__arena']['HAWK'], + threshold=3, + margin=1, + ), + ), + chicken_in_the_matrix__arena__puppet_grim_three_strikes_margin_0=puppet( + substrate='chicken_in_the_matrix__arena', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.GrimTrigger, + cooperate_resource=_RESOURCES['chicken_in_the_matrix__arena']['DOVE'], + defect_resource=_RESOURCES['chicken_in_the_matrix__arena']['HAWK'], + threshold=3, + margin=5, + ), + ), + chicken_in_the_matrix__arena__puppet_grim_two_strikes_0=puppet( + substrate='chicken_in_the_matrix__arena', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.GrimTrigger, + cooperate_resource=_RESOURCES['chicken_in_the_matrix__arena']['DOVE'], + defect_resource=_RESOURCES['chicken_in_the_matrix__arena']['HAWK'], + threshold=2, + margin=1, + ), + ), + chicken_in_the_matrix__arena__puppet_grim_two_strikes_margin_0=puppet( + substrate='chicken_in_the_matrix__arena', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.GrimTrigger, + cooperate_resource=_RESOURCES['chicken_in_the_matrix__arena']['DOVE'], + defect_resource=_RESOURCES['chicken_in_the_matrix__arena']['HAWK'], + threshold=2, + margin=5, + ), + ), + chicken_in_the_matrix__arena__puppet_hawk_0=puppet( + substrate='chicken_in_the_matrix__arena', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.Specialist, + target=_RESOURCES['chicken_in_the_matrix__arena']['HAWK'], + margin=1, + ), + ), + chicken_in_the_matrix__arena__puppet_hawk_margin_0=puppet( + substrate='chicken_in_the_matrix__arena', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.Specialist, + target=_RESOURCES['chicken_in_the_matrix__arena']['HAWK'], + margin=5, + ), + ), + chicken_in_the_matrix__repeated__puppet_corrigible_0=puppet( + substrate='chicken_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.Corrigible, + cooperate_resource=_RESOURCES['chicken_in_the_matrix__repeated']['DOVE'], + defect_resource=_RESOURCES['chicken_in_the_matrix__repeated']['HAWK'], + threshold=3, + margin=5, + tremble_probability=0, + ), + ), + chicken_in_the_matrix__repeated__puppet_corrigible_tremble_0=puppet( + substrate='chicken_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.Corrigible, + cooperate_resource=_RESOURCES['chicken_in_the_matrix__repeated']['DOVE'], + defect_resource=_RESOURCES['chicken_in_the_matrix__repeated']['HAWK'], + threshold=3, + margin=5, + tremble_probability=0.15, + ), + ), + chicken_in_the_matrix__repeated__puppet_dove_margin_0=puppet( + substrate='chicken_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.Specialist, + target=_RESOURCES['chicken_in_the_matrix__repeated']['DOVE'], + margin=5, + ), + ), + chicken_in_the_matrix__repeated__puppet_dove_margin_1=puppet( + substrate='chicken_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.Specialist, + target=_RESOURCES['chicken_in_the_matrix__repeated']['DOVE'], + margin=7, + ), + ), + chicken_in_the_matrix__repeated__puppet_flip_0=puppet( + substrate='chicken_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.ScheduledFlip, + initial_target=_RESOURCES['chicken_in_the_matrix__repeated']['DOVE'], + final_target=_RESOURCES['chicken_in_the_matrix__repeated']['HAWK'], + threshold=3, + initial_margin=1, + final_margin=5, + ), + ), + chicken_in_the_matrix__repeated__puppet_grim_one_strike_margin_0=puppet( + substrate='chicken_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.GrimTrigger, + cooperate_resource=_RESOURCES['chicken_in_the_matrix__repeated']['DOVE'], + defect_resource=_RESOURCES['chicken_in_the_matrix__repeated']['HAWK'], + threshold=1, + margin=5, + ), + ), + chicken_in_the_matrix__repeated__puppet_grim_one_strike_margin_1=puppet( + substrate='chicken_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.GrimTrigger, + cooperate_resource=_RESOURCES['chicken_in_the_matrix__repeated']['DOVE'], + defect_resource=_RESOURCES['chicken_in_the_matrix__repeated']['HAWK'], + threshold=1, + margin=7, + ), + ), + chicken_in_the_matrix__repeated__puppet_grim_two_strikes_margin_0=puppet( + substrate='chicken_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.GrimTrigger, + cooperate_resource=_RESOURCES['chicken_in_the_matrix__repeated']['DOVE'], + defect_resource=_RESOURCES['chicken_in_the_matrix__repeated']['HAWK'], + threshold=2, + margin=5, + ), + ), + chicken_in_the_matrix__repeated__puppet_grim_two_strikes_margin_1=puppet( + substrate='chicken_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.GrimTrigger, + cooperate_resource=_RESOURCES['chicken_in_the_matrix__repeated']['DOVE'], + defect_resource=_RESOURCES['chicken_in_the_matrix__repeated']['HAWK'], + threshold=2, + margin=7, + ), + ), + chicken_in_the_matrix__repeated__puppet_hawk_margin_0=puppet( + substrate='chicken_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.Specialist, + target=_RESOURCES['chicken_in_the_matrix__repeated']['HAWK'], + margin=5, + ), + ), + chicken_in_the_matrix__repeated__puppet_hawk_margin_1=puppet( + substrate='chicken_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.Specialist, + target=_RESOURCES['chicken_in_the_matrix__repeated']['HAWK'], + margin=7, + ), + ), + chicken_in_the_matrix__repeated__puppet_tft_margin_0=puppet( + substrate='chicken_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.TitForTat, + cooperate_resource=_RESOURCES['chicken_in_the_matrix__repeated']['DOVE'], + defect_resource=_RESOURCES['chicken_in_the_matrix__repeated']['HAWK'], + tremble_probability=0, + margin=5, + ), + ), + chicken_in_the_matrix__repeated__puppet_tft_margin_1=puppet( + substrate='chicken_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.TitForTat, + cooperate_resource=_RESOURCES['chicken_in_the_matrix__repeated']['DOVE'], + defect_resource=_RESOURCES['chicken_in_the_matrix__repeated']['HAWK'], + tremble_probability=0, + margin=7, + ), + ), + chicken_in_the_matrix__repeated__puppet_tft_tremble_margin_0=puppet( + substrate='chicken_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.TitForTat, + cooperate_resource=_RESOURCES['chicken_in_the_matrix__repeated']['DOVE'], + defect_resource=_RESOURCES['chicken_in_the_matrix__repeated']['HAWK'], + tremble_probability=0.15, + margin=5, + ), + ), + chicken_in_the_matrix__repeated__puppet_tft_tremble_margin_1=puppet( + substrate='chicken_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.TitForTat, + cooperate_resource=_RESOURCES['chicken_in_the_matrix__repeated']['DOVE'], + defect_resource=_RESOURCES['chicken_in_the_matrix__repeated']['HAWK'], + tremble_probability=0.15, + margin=7, + ), + ), + clean_up__cleaner_0=saved_model( substrate='clean_up', - model='cleanup_cleaner_1', + model='cleaner_0', ), - cleanup_cleaner_2=_saved_model( + clean_up__cleaner_1=saved_model( substrate='clean_up', - model='cleanup_cleaner_2', + model='cleaner_1', ), - cleanup_consumer_0=_saved_model( + clean_up__consumer_0=saved_model( substrate='clean_up', - model='cleanup_consumer_0', + model='consumer_0', ), - cleanup_consumer_1=_saved_model( + clean_up__consumer_1=saved_model( substrate='clean_up', - model='cleanup_consumer_1', + model='consumer_1', ), - cleanup_consumer_2=_saved_model( + clean_up__puppet_alternator_first_cleans_0=puppet( substrate='clean_up', - model='cleanup_consumer_2', - ), - cleanup_puppet_alternate_clean_first=_puppet( + model='puppet_0', + roles=('default',), + puppeteer_builder=functools.partial( + alternator.Alternator, + goals=[ + _PUPPET_GOALS['clean_up']['CLEAN'], + _PUPPET_GOALS['clean_up']['EAT'], + ], + steps_per_goal=200, + )), + clean_up__puppet_alternator_first_eats_0=puppet( substrate='clean_up', - puppeteer_builder=clean_up.AlternateCleanFirst, - ), - cleanup_puppet_alternate_eat_first=_puppet( + model='puppet_0', + roles=('default',), + puppeteer_builder=functools.partial( + alternator.Alternator, + goals=[ + _PUPPET_GOALS['clean_up']['EAT'], + _PUPPET_GOALS['clean_up']['CLEAN'], + ], + steps_per_goal=200, + ), + ), + clean_up__puppet_high_threshold_reciprocator_0=puppet( substrate='clean_up', - puppeteer_builder=clean_up.AlternateEatFirst, - ), - cleanup_puppet_reciprocator_threshold_low=_puppet( + model='puppet_0', + roles=('default',), + puppeteer_builder=functools.partial( + clean_up.ConditionalCleaner, + clean_goal=_PUPPET_GOALS['clean_up']['CLEAN'], + eat_goal=_PUPPET_GOALS['clean_up']['EAT'], + coplayer_cleaning_signal='NUM_OTHERS_WHO_CLEANED_THIS_STEP', + threshold=3, + recency_window=5, + reciprocation_period=75, + niceness_period=0, + ), + ), + clean_up__puppet_low_threshold_reciprocator_0=puppet( substrate='clean_up', + model='puppet_0', + roles=('default',), puppeteer_builder=functools.partial( - clean_up.ConditionalCleaner, threshold=1), - ), - cleanup_puppet_reciprocator_threshold_mid=_puppet( + clean_up.ConditionalCleaner, + clean_goal=_PUPPET_GOALS['clean_up']['CLEAN'], + eat_goal=_PUPPET_GOALS['clean_up']['EAT'], + coplayer_cleaning_signal='NUM_OTHERS_WHO_CLEANED_THIS_STEP', + threshold=2, + recency_window=5, + reciprocation_period=75, + niceness_period=0, + ), + ), + clean_up__puppet_nice_low_threshold_reciprocator_0=puppet( substrate='clean_up', + model='puppet_0', + roles=('default',), puppeteer_builder=functools.partial( - clean_up.ConditionalCleaner, threshold=2), - ), - closed_commons_zapper_0=_saved_model( - substrate='commons_harvest_closed', - model='closed_commons_zapper_0', - ), - closed_commons_zapper_1=_saved_model( - substrate='commons_harvest_closed', - model='closed_commons_zapper_1', - ), - closed_commons_zapper_2=_saved_model( - substrate='commons_harvest_closed', - model='closed_commons_zapper_2', - ), - closed_commons_zapper_3=_saved_model( - substrate='commons_harvest_closed', - model='closed_commons_zapper_3', - ), - collaborative_cooking_impassable_vmpo_pop_size_ten_0=_saved_model( - substrate='collaborative_cooking_impassable', - model='collaborative_cooking_impassable_vmpo_pop_size_ten_0', - ), - collaborative_cooking_impassable_vmpo_pop_size_ten_2=_saved_model( - substrate='collaborative_cooking_impassable', - model='collaborative_cooking_impassable_vmpo_pop_size_ten_2', - ), - collaborative_cooking_impassable_vmpo_pop_size_ten_3=_saved_model( - substrate='collaborative_cooking_impassable', - model='collaborative_cooking_impassable_vmpo_pop_size_ten_3', - ), - collaborative_cooking_impassable_vmpo_pop_size_ten_4=_saved_model( - substrate='collaborative_cooking_impassable', - model='collaborative_cooking_impassable_vmpo_pop_size_ten_4', - ), - collaborative_cooking_impassable_vmpo_pop_size_ten_6=_saved_model( - substrate='collaborative_cooking_impassable', - model='collaborative_cooking_impassable_vmpo_pop_size_ten_6', - ), - collaborative_cooking_impassable_vmpo_pop_size_ten_7=_saved_model( - substrate='collaborative_cooking_impassable', - model='collaborative_cooking_impassable_vmpo_pop_size_ten_7', - ), - collaborative_cooking_impassable_vmpo_pop_size_ten_9=_saved_model( - substrate='collaborative_cooking_impassable', - model='collaborative_cooking_impassable_vmpo_pop_size_ten_9', - ), - collaborative_cooking_passable_vmpo_pop_size_ten_5=_saved_model( - substrate='collaborative_cooking_passable', - model='collaborative_cooking_passable_vmpo_pop_size_ten_5', - ), - ctf_pseudorewards_for_main_game_events_a3c_2=_saved_model( - substrate='capture_the_flag', - model='ctf_pseudorewards_for_main_game_events_a3c_2', - ), - ctf_pseudorewards_for_main_game_events_a3c_6=_saved_model( - substrate='capture_the_flag', - model='ctf_pseudorewards_for_main_game_events_a3c_6', - ), - ctf_pseudorewards_for_main_game_events_vmpo_0=_saved_model( - substrate='capture_the_flag', - model='ctf_pseudorewards_for_main_game_events_vmpo_0', - ), - ctf_pseudorewards_for_main_game_events_vmpo_3=_saved_model( - substrate='capture_the_flag', - model='ctf_pseudorewards_for_main_game_events_vmpo_3', - ), - ctf_pseudorewards_for_main_game_events_vmpo_4=_saved_model( - substrate='capture_the_flag', - model='ctf_pseudorewards_for_main_game_events_vmpo_4', - ), - ctf_pseudorewards_for_main_game_events_vmpo_6=_saved_model( - substrate='capture_the_flag', - model='ctf_pseudorewards_for_main_game_events_vmpo_6', - ), - ctf_pseudorewards_for_main_game_events_vmpo_7=_saved_model( - substrate='capture_the_flag', - model='ctf_pseudorewards_for_main_game_events_vmpo_7', - ), - koth_default_vmpo_0=_saved_model( - substrate='king_of_the_hill', - model='koth_default_vmpo_0', - ), - koth_default_vmpo_1=_saved_model( - substrate='king_of_the_hill', - model='koth_default_vmpo_1', - ), - koth_default_vmpo_2=_saved_model( - substrate='king_of_the_hill', - model='koth_default_vmpo_2', - ), - koth_default_vmpo_3=_saved_model( - substrate='king_of_the_hill', - model='koth_default_vmpo_3', - ), - koth_default_vmpo_4=_saved_model( - substrate='king_of_the_hill', - model='koth_default_vmpo_4', - ), - koth_default_vmpo_5=_saved_model( - substrate='king_of_the_hill', - model='koth_default_vmpo_5', - ), - koth_default_vmpo_6=_saved_model( - substrate='king_of_the_hill', - model='koth_default_vmpo_6', - ), - koth_default_vmpo_7=_saved_model( - substrate='king_of_the_hill', - model='koth_default_vmpo_7', - ), - koth_zap_while_in_control_a3c_0=_saved_model( - substrate='king_of_the_hill', - model='koth_zap_while_in_control_a3c_0', - ), - koth_zap_while_in_control_a3c_1=_saved_model( - substrate='king_of_the_hill', - model='koth_zap_while_in_control_a3c_1', - ), - koth_zap_while_in_control_a3c_2=_saved_model( - substrate='king_of_the_hill', - model='koth_zap_while_in_control_a3c_2', - ), - koth_zap_while_in_control_a3c_3=_saved_model( - substrate='king_of_the_hill', - model='koth_zap_while_in_control_a3c_3', - ), - koth_zap_while_in_control_a3c_4=_saved_model( - substrate='king_of_the_hill', - model='koth_zap_while_in_control_a3c_4', - ), - koth_zap_while_in_control_a3c_5=_saved_model( - substrate='king_of_the_hill', - model='koth_zap_while_in_control_a3c_5', - ), - koth_zap_while_in_control_a3c_6=_saved_model( - substrate='king_of_the_hill', - model='koth_zap_while_in_control_a3c_6', + clean_up.ConditionalCleaner, + clean_goal=_PUPPET_GOALS['clean_up']['CLEAN'], + eat_goal=_PUPPET_GOALS['clean_up']['EAT'], + coplayer_cleaning_signal='NUM_OTHERS_WHO_CLEANED_THIS_STEP', + threshold=2, + recency_window=5, + reciprocation_period=75, + niceness_period=200, + ), + ), + coins__puppet_cooperator_0=puppet( + substrate='coins', + model='puppet_1', + puppeteer_builder=functools.partial( + fixed_goal.FixedGoal, + goal=_PUPPET_GOALS['coins']['COOPERATE'], + ), ), - koth_zap_while_in_control_a3c_7=_saved_model( - substrate='king_of_the_hill', - model='koth_zap_while_in_control_a3c_7', + coins__puppet_defector_0=puppet( + substrate='coins', + model='puppet_1', + puppeteer_builder=functools.partial( + fixed_goal.FixedGoal, + goal=_PUPPET_GOALS['coins']['DEFECT'], + ), ), - koth_zap_while_in_control_vmpo_0=_saved_model( - substrate='king_of_the_hill', - model='koth_zap_while_in_control_vmpo_0', + coins__puppet_one_strike_reciprocator_0=puppet( + substrate='coins', + model='puppet_1', + puppeteer_builder=functools.partial( + coins.Reciprocator, + cooperate_goal=_PUPPET_GOALS['coins']['COOPERATE'], + defect_goal=_PUPPET_GOALS['coins']['DEFECT'], + spite_goal=_PUPPET_GOALS['coins']['SPITE'], + partner_defection_signal='MISMATCHED_COIN_COLLECTED_BY_PARTNER', + recency_window=100, + threshold=1, + frames_to_punish=100, + spiteful_punishment_window=0, + ), + ), + coins__puppet_one_strike_strong_reciprocator_0=puppet( + substrate='coins', + model='puppet_1', + puppeteer_builder=functools.partial( + coins.Reciprocator, + cooperate_goal=_PUPPET_GOALS['coins']['COOPERATE'], + defect_goal=_PUPPET_GOALS['coins']['DEFECT'], + spite_goal=_PUPPET_GOALS['coins']['SPITE'], + partner_defection_signal='MISMATCHED_COIN_COLLECTED_BY_PARTNER', + recency_window=100, + threshold=1, + frames_to_punish=100, + spiteful_punishment_window=50, + ), + ), + coins__puppet_three_strikes_reciprocator_0=puppet( + substrate='coins', + model='puppet_1', + puppeteer_builder=functools.partial( + coins.Reciprocator, + cooperate_goal=_PUPPET_GOALS['coins']['COOPERATE'], + defect_goal=_PUPPET_GOALS['coins']['DEFECT'], + spite_goal=_PUPPET_GOALS['coins']['SPITE'], + partner_defection_signal='MISMATCHED_COIN_COLLECTED_BY_PARTNER', + recency_window=150, + threshold=3, + frames_to_punish=150, + spiteful_punishment_window=0, + ), + ), + coins__puppet_three_strikes_strong_reciprocator_0=puppet( + substrate='coins', + model='puppet_1', + puppeteer_builder=functools.partial( + coins.Reciprocator, + cooperate_goal=_PUPPET_GOALS['coins']['COOPERATE'], + defect_goal=_PUPPET_GOALS['coins']['DEFECT'], + spite_goal=_PUPPET_GOALS['coins']['SPITE'], + partner_defection_signal='MISMATCHED_COIN_COLLECTED_BY_PARTNER', + recency_window=150, + threshold=3, + frames_to_punish=150, + spiteful_punishment_window=75, + ), + ), + collaborative_cooking__asymmetric__apprentice_0=saved_model( + substrate='collaborative_cooking__asymmetric', + model='apprentice_0', + ), + collaborative_cooking__asymmetric__apprentice_1=saved_model( + substrate='collaborative_cooking__asymmetric', + model='apprentice_1', + ), + collaborative_cooking__asymmetric__chef_0=saved_model( + substrate='collaborative_cooking__asymmetric', + model='chef_0', + ), + collaborative_cooking__asymmetric__chef_1=saved_model( + substrate='collaborative_cooking__asymmetric', + model='chef_1', + ), + collaborative_cooking__circuit__apprentice_0=saved_model( + substrate='collaborative_cooking__circuit', + model='apprentice_0', + ), + collaborative_cooking__circuit__apprentice_1=saved_model( + substrate='collaborative_cooking__circuit', + model='apprentice_1', + ), + collaborative_cooking__circuit__chef_0=saved_model( + substrate='collaborative_cooking__circuit', + model='chef_0', + ), + collaborative_cooking__circuit__chef_1=saved_model( + substrate='collaborative_cooking__circuit', + model='chef_1', + ), + collaborative_cooking__cramped__apprentice_0=saved_model( + substrate='collaborative_cooking__cramped', + model='apprentice_0', + ), + collaborative_cooking__cramped__apprentice_1=saved_model( + substrate='collaborative_cooking__cramped', + model='apprentice_1', + ), + collaborative_cooking__cramped__chef_0=saved_model( + substrate='collaborative_cooking__cramped', + model='chef_0', + ), + collaborative_cooking__cramped__chef_1=saved_model( + substrate='collaborative_cooking__cramped', + model='chef_1', + ), + collaborative_cooking__crowded__independent_chef_0=saved_model( + substrate='collaborative_cooking__crowded', + model='independent_chef_0', + ), + collaborative_cooking__crowded__robust_chef_0=saved_model( + substrate='collaborative_cooking__crowded', + model='robust_chef_0', + ), + collaborative_cooking__figure_eight__independent_chef_0=saved_model( + substrate='collaborative_cooking__figure_eight', + model='independent_chef_0', + ), + collaborative_cooking__figure_eight__robust_chef_0=saved_model( + substrate='collaborative_cooking__figure_eight', + model='robust_chef_0', + ), + collaborative_cooking__forced__apprentice_0=saved_model( + substrate='collaborative_cooking__forced', + model='apprentice_0', + ), + collaborative_cooking__forced__apprentice_1=saved_model( + substrate='collaborative_cooking__forced', + model='apprentice_1', + ), + collaborative_cooking__forced__chef_0=saved_model( + substrate='collaborative_cooking__forced', + model='chef_0', + ), + collaborative_cooking__forced__chef_1=saved_model( + substrate='collaborative_cooking__forced', + model='chef_1', + ), + collaborative_cooking__ring__apprentice_0=saved_model( + substrate='collaborative_cooking__ring', + model='apprentice_0', + ), + collaborative_cooking__ring__apprentice_1=saved_model( + substrate='collaborative_cooking__ring', + model='apprentice_1', + ), + collaborative_cooking__ring__chef_0=saved_model( + substrate='collaborative_cooking__ring', + model='chef_0', + ), + collaborative_cooking__ring__chef_1=saved_model( + substrate='collaborative_cooking__ring', + model='chef_1', + ), + commons_harvest__closed__free_0=saved_model( + substrate='commons_harvest__closed', + model='free_0', ), - koth_zap_while_in_control_vmpo_1=_saved_model( - substrate='king_of_the_hill', - model='koth_zap_while_in_control_vmpo_1', + commons_harvest__closed__free_1=saved_model( + substrate='commons_harvest__closed', + model='free_1', ), - koth_zap_while_in_control_vmpo_2=_saved_model( - substrate='king_of_the_hill', - model='koth_zap_while_in_control_vmpo_2', + commons_harvest__closed__free_2=saved_model( + substrate='commons_harvest__closed', + model='free_2', ), - koth_zap_while_in_control_vmpo_3=_saved_model( - substrate='king_of_the_hill', - model='koth_zap_while_in_control_vmpo_3', + commons_harvest__closed__free_3=saved_model( + substrate='commons_harvest__closed', + model='free_3', ), - koth_zap_while_in_control_vmpo_4=_saved_model( - substrate='king_of_the_hill', - model='koth_zap_while_in_control_vmpo_4', + commons_harvest__closed__pacifist_0=saved_model( + substrate='commons_harvest__closed', + model='pacifist_0', ), - koth_zap_while_in_control_vmpo_5=_saved_model( - substrate='king_of_the_hill', - model='koth_zap_while_in_control_vmpo_5', + commons_harvest__closed__pacifist_1=saved_model( + substrate='commons_harvest__closed', + model='pacifist_1', ), - koth_zap_while_in_control_vmpo_6=_saved_model( - substrate='king_of_the_hill', - model='koth_zap_while_in_control_vmpo_6', + commons_harvest__closed__pacifist_2=saved_model( + substrate='commons_harvest__closed', + model='pacifist_2', ), - koth_zap_while_in_control_vmpo_7=_saved_model( - substrate='king_of_the_hill', - model='koth_zap_while_in_control_vmpo_7', + commons_harvest__open__free_0=saved_model( + substrate='commons_harvest__open', + model='free_0', ), - open_commons_zapper_0=_saved_model( - substrate='commons_harvest_open', - model='open_commons_zapper_0', + commons_harvest__open__free_1=saved_model( + substrate='commons_harvest__open', + model='free_1', ), - open_commons_zapper_1=_saved_model( - substrate='commons_harvest_open', - model='open_commons_zapper_1', + commons_harvest__open__pacifist_0=saved_model( + substrate='commons_harvest__open', + model='pacifist_0', ), - partnership_commons_putative_good_partner_4=_saved_model( - substrate='commons_harvest_partnership', - model='partnership_commons_putative_good_partner_4', + commons_harvest__open__pacifist_1=saved_model( + substrate='commons_harvest__open', + model='pacifist_1', ), - partnership_commons_putative_good_partner_5=_saved_model( - substrate='commons_harvest_partnership', - model='partnership_commons_putative_good_partner_5', + commons_harvest__partnership__free_0=saved_model( + substrate='commons_harvest__partnership', + model='free_0', ), - partnership_commons_putative_good_partner_7=_saved_model( - substrate='commons_harvest_partnership', - model='partnership_commons_putative_good_partner_7', + commons_harvest__partnership__free_1=saved_model( + substrate='commons_harvest__partnership', + model='free_1', ), - partnership_commons_zapper_1=_saved_model( - substrate='commons_harvest_partnership', - model='partnership_commons_zapper_1', + commons_harvest__partnership__free_2=saved_model( + substrate='commons_harvest__partnership', + model='free_2', ), - partnership_commons_zapper_2=_saved_model( - substrate='commons_harvest_partnership', - model='partnership_commons_zapper_2', + commons_harvest__partnership__good_partner_0=saved_model( + substrate='commons_harvest__partnership', + model='good_partner_0', ), - prisoners_dilemma_cooperator_2=_saved_model( - substrate='prisoners_dilemma_in_the_matrix', - model='prisoners_dilemma_cooperator_2', + commons_harvest__partnership__good_partner_1=saved_model( + substrate='commons_harvest__partnership', + model='good_partner_1', ), - prisoners_dilemma_cooperator_4=_saved_model( - substrate='prisoners_dilemma_in_the_matrix', - model='prisoners_dilemma_cooperator_4', + commons_harvest__partnership__good_partner_2=saved_model( + substrate='commons_harvest__partnership', + model='good_partner_2', ), - prisoners_dilemma_defector_0=_saved_model( - substrate='prisoners_dilemma_in_the_matrix', - model='prisoners_dilemma_defector_0', + commons_harvest__partnership__pacifist_0=saved_model( + substrate='commons_harvest__partnership', + model='pacifist_0', ), - prisoners_dilemma_defector_2=_saved_model( - substrate='prisoners_dilemma_in_the_matrix', - model='prisoners_dilemma_defector_2', + commons_harvest__partnership__pacifist_1=saved_model( + substrate='commons_harvest__partnership', + model='pacifist_1', ), - prisoners_dilemma_free_0=_saved_model( - substrate='prisoners_dilemma_in_the_matrix', - model='prisoners_dilemma_free_0', + commons_harvest__partnership__pacifist_2=saved_model( + substrate='commons_harvest__partnership', + model='pacifist_2', ), - prisoners_dilemma_free_1=_saved_model( - substrate='prisoners_dilemma_in_the_matrix', - model='prisoners_dilemma_free_1', + commons_harvest__partnership__sustainable_fighter_0=saved_model( + substrate='commons_harvest__partnership', + model='sustainable_fighter_0', ), - prisoners_dilemma_free_2=_saved_model( - substrate='prisoners_dilemma_in_the_matrix', - model='prisoners_dilemma_free_2', + commons_harvest__partnership__sustainable_fighter_1=saved_model( + substrate='commons_harvest__partnership', + model='sustainable_fighter_1', ), - prisoners_dilemma_puppet_grim_threshold_high=_puppet( - substrate='prisoners_dilemma_in_the_matrix', + coop_mining__cooperator_0=puppet( + substrate='coop_mining', + model='puppet_0', + roles=('default', 'target'), puppeteer_builder=functools.partial( - in_the_matrix.GrimTwoResource, threshold=2), - ), - prisoners_dilemma_puppet_grim_threshold_low=_puppet( - substrate='prisoners_dilemma_in_the_matrix', + fixed_goal.FixedGoal, + _PUPPET_GOALS['coop_mining']['EXTRACT_GOLD'], + ), + ), + coop_mining__defector_0=puppet( + substrate='coop_mining', + model='puppet_0', + roles=('default',), puppeteer_builder=functools.partial( - in_the_matrix.GrimTwoResource, threshold=1), - ), - pure_coordination_type_1_specialist_0=_saved_model( - substrate='pure_coordination_in_the_matrix', - model='pure_coordination_type_1_specialist_0', - ), - pure_coordination_type_1_specialist_1=_saved_model( - substrate='pure_coordination_in_the_matrix', - model='pure_coordination_type_1_specialist_1', - ), - pure_coordination_type_2_specialist_0=_saved_model( - substrate='pure_coordination_in_the_matrix', - model='pure_coordination_type_2_specialist_0', - ), - pure_coordination_type_2_specialist_1=_saved_model( - substrate='pure_coordination_in_the_matrix', - model='pure_coordination_type_2_specialist_1', - ), - pure_coordination_type_3_specialist_0=_saved_model( - substrate='pure_coordination_in_the_matrix', - model='pure_coordination_type_3_specialist_0', - ), - pure_coordination_type_3_specialist_1=_saved_model( - substrate='pure_coordination_in_the_matrix', - model='pure_coordination_type_3_specialist_1', - ), - rationalizable_coordination_type_1_specialist_0=_saved_model( - substrate='rationalizable_coordination_in_the_matrix', - model='rationalizable_coordination_type_1_specialist_0', - ), - rationalizable_coordination_type_1_specialist_1=_saved_model( - substrate='rationalizable_coordination_in_the_matrix', - model='rationalizable_coordination_type_1_specialist_1', - ), - rationalizable_coordination_type_2_specialist_0=_saved_model( - substrate='rationalizable_coordination_in_the_matrix', - model='rationalizable_coordination_type_2_specialist_0', - ), - rationalizable_coordination_type_2_specialist_1=_saved_model( - substrate='rationalizable_coordination_in_the_matrix', - model='rationalizable_coordination_type_2_specialist_1', - ), - rationalizable_coordination_type_3_specialist_0=_saved_model( - substrate='rationalizable_coordination_in_the_matrix', - model='rationalizable_coordination_type_3_specialist_0', - ), - rationalizable_coordination_type_3_specialist_1=_saved_model( - substrate='rationalizable_coordination_in_the_matrix', - model='rationalizable_coordination_type_3_specialist_1', - ), - stag_hunt_hare_specialist_0=_saved_model( - substrate='stag_hunt_in_the_matrix', - model='stag_hunt_hare_specialist_0', - ), - stag_hunt_hare_specialist_1=_saved_model( - substrate='stag_hunt_in_the_matrix', - model='stag_hunt_hare_specialist_1', - ), - stag_hunt_hare_specialist_2=_saved_model( - substrate='stag_hunt_in_the_matrix', - model='stag_hunt_hare_specialist_2', - ), - stag_hunt_puppet_grim=_puppet( - substrate='stag_hunt_in_the_matrix', + fixed_goal.FixedGoal, + _PUPPET_GOALS['coop_mining']['EXTRACT_IRON'], + ), + ), + coop_mining__mixed_0=puppet( + substrate='coop_mining', + model='puppet_0', + roles=('default', 'target'), puppeteer_builder=functools.partial( - in_the_matrix.GrimTwoResource, threshold=1), - ), - stag_hunt_stag_specialist_3=_saved_model( - substrate='stag_hunt_in_the_matrix', - model='stag_hunt_stag_specialist_3', - ), - stag_hunt_stag_specialist_5=_saved_model( - substrate='stag_hunt_in_the_matrix', - model='stag_hunt_stag_specialist_5', - ), - stravinsky_fan_0=_saved_model( - substrate='bach_or_stravinsky_in_the_matrix', - model='stravinsky_fan_0', - ), - stravinsky_fan_1=_saved_model( - substrate='bach_or_stravinsky_in_the_matrix', - model='stravinsky_fan_1', - ), - stravinsky_fan_2=_saved_model( - substrate='bach_or_stravinsky_in_the_matrix', - model='stravinsky_fan_2', - ), - territory_closed_reply_to_zapper_0=_saved_model( - substrate='territory_rooms', - model='territory_closed_reply_to_zapper_0', - ), - territory_closed_reply_to_zapper_1=_saved_model( - substrate='territory_rooms', - model='territory_closed_reply_to_zapper_1', - ), - territory_open_painter_0=_saved_model( - substrate='territory_open', - model='territory_open_painter_0', - ), - territory_open_painter_1=_saved_model( - substrate='territory_open', - model='territory_open_painter_1', - ), - territory_open_painter_2=_saved_model( - substrate='territory_open', - model='territory_open_painter_2', + alternator.Alternator, + goals=[ + _PUPPET_GOALS['coop_mining']['EXTRACT_IRON'], + _PUPPET_GOALS['coop_mining']['EXTRACT_GOLD'], + ], + steps_per_goal=100, + ), + ), + daycare__foraging_child_0=saved_model( + substrate='daycare', + model='foraging_child_0', + roles=('child',), + ), + daycare__foraging_parent_0=saved_model( + substrate='daycare', + model='foraging_parent_0', + roles=('parent',), + ), + daycare__helping_parent_0=saved_model( + substrate='daycare', + model='helping_parent_0', + roles=('parent',), + ), + daycare__pointing_child_0=saved_model( + substrate='daycare', + model='pointing_child_0', + roles=('child',), + ), + externality_mushrooms__dense__puppet_fize_0=puppet( + substrate='externality_mushrooms__dense', + model='puppet_0', + roles=('default',), + puppeteer_builder=functools.partial( + fixed_goal.FixedGoal, (_PUPPET_GOALS['externality_mushrooms__dense'] + ['COLLECT_MUSHROOM_FIZE'])), ), - territory_open_painter_3=_saved_model( - substrate='territory_open', - model='territory_open_painter_3', + externality_mushrooms__dense__puppet_hihe_0=puppet( + substrate='externality_mushrooms__dense', + model='puppet_0', + roles=('default',), + puppeteer_builder=functools.partial( + fixed_goal.FixedGoal, (_PUPPET_GOALS['externality_mushrooms__dense'] + ['COLLECT_MUSHROOM_HIHE'])), + ), + factory_commons__either_or__sustainable_0=saved_model( + substrate='factory_commons__either_or', + model='sustainable_0', + roles=('default',), + ), + factory_commons__either_or__sustainable_1=saved_model( + substrate='factory_commons__either_or', + model='sustainable_1', + roles=('default',), + ), + factory_commons__either_or__sustainable_2=saved_model( + substrate='factory_commons__either_or', + model='sustainable_2', + roles=('default',), + ), + factory_commons__either_or__unsustainable_0=saved_model( + substrate='factory_commons__either_or', + model='unsustainable_0', + roles=('default',), + ), + factory_commons__either_or__unsustainable_1=saved_model( + substrate='factory_commons__either_or', + model='unsustainable_1', + roles=('default',), + ), + factory_commons__either_or__unsustainable_2=saved_model( + substrate='factory_commons__either_or', + model='unsustainable_2', + roles=('default',), + ), + fruit_market__concentric_rivers__apple_farmer_0=saved_model( + substrate='fruit_market__concentric_rivers', + model='apple_farmer_0', + roles=('apple_farmer',), + ), + fruit_market__concentric_rivers__apple_farmer_1=saved_model( + substrate='fruit_market__concentric_rivers', + model='apple_farmer_1', + roles=('apple_farmer',), + ), + fruit_market__concentric_rivers__apple_farmer_2=saved_model( + substrate='fruit_market__concentric_rivers', + model='apple_farmer_2', + roles=('apple_farmer',), + ), + fruit_market__concentric_rivers__banana_farmer_0=saved_model( + substrate='fruit_market__concentric_rivers', + model='banana_farmer_0', + roles=('banana_farmer',), + ), + fruit_market__concentric_rivers__banana_farmer_1=saved_model( + substrate='fruit_market__concentric_rivers', + model='banana_farmer_1', + roles=('banana_farmer',), + ), + fruit_market__concentric_rivers__banana_farmer_2=saved_model( + substrate='fruit_market__concentric_rivers', + model='banana_farmer_2', + roles=('banana_farmer',), + ), + gift_refinements__cooperator_0=puppet( + substrate='gift_refinements', + roles=('default', 'target'), + model='puppet_0', + puppeteer_builder=functools.partial( + gift_refinements.GiftRefinementsCooperator, + collect_goal=_PUPPET_GOALS['gift_refinements']['COLLECT_TOKENS'], + consume_goal=_PUPPET_GOALS['gift_refinements']['CONSUME_TOKENS'], + gift_goal=_PUPPET_GOALS['gift_refinements']['GIFT'], + ), + ), + gift_refinements__defector_0=puppet( + substrate='gift_refinements', + roles=('default', 'target'), + model='puppet_0', + puppeteer_builder=functools.partial( + fixed_goal.FixedGoal, + goal=_PUPPET_GOALS['gift_refinements']['FORAGE'], + ), + ), + gift_refinements__extreme_cooperator_0=puppet( + substrate='gift_refinements', + roles=('default', 'target'), + model='puppet_0', + puppeteer_builder=functools.partial( + gift_refinements.GiftRefinementsExtremeCooperator, + collect_goal=_PUPPET_GOALS['gift_refinements']['COLLECT_TOKENS'], + consume_goal=_PUPPET_GOALS['gift_refinements']['CONSUME_TOKENS'], + gift_goal=_PUPPET_GOALS['gift_refinements']['GIFT'], + ), + ), + paintball__capture_the_flag__shaped_bot_0=saved_model( + substrate='paintball__capture_the_flag', + model='shaped_0', + roles=('default',), + ), + paintball__capture_the_flag__shaped_bot_1=saved_model( + substrate='paintball__capture_the_flag', + model='shaped_1', + roles=('default',), + ), + paintball__capture_the_flag__shaped_bot_2=saved_model( + substrate='paintball__capture_the_flag', + model='shaped_2', + roles=('default',), + ), + paintball__capture_the_flag__shaped_bot_3=saved_model( + substrate='paintball__capture_the_flag', + model='shaped_3', + roles=('default',), + ), + paintball__king_of_the_hill__free_0=saved_model( + substrate='paintball__king_of_the_hill', + model='free_bot_0', + roles=('default',), + ), + paintball__king_of_the_hill__free_1=saved_model( + substrate='paintball__king_of_the_hill', + model='free_bot_1', + roles=('default',), + ), + paintball__king_of_the_hill__free_2=saved_model( + substrate='paintball__king_of_the_hill', + model='free_bot_2', + roles=('default',), + ), + paintball__king_of_the_hill__spawn_camper_0=saved_model( + substrate='paintball__king_of_the_hill', + model='spawn_camper_0', + roles=('default',), + ), + paintball__king_of_the_hill__spawn_camper_1=saved_model( + substrate='paintball__king_of_the_hill', + model='spawn_camper_1', + roles=('default',), + ), + paintball__king_of_the_hill__spawn_camper_2=saved_model( + substrate='paintball__king_of_the_hill', + model='spawn_camper_2', + roles=('default',), + ), + paintball__king_of_the_hill__spawn_camper_3=saved_model( + substrate='paintball__king_of_the_hill', + model='spawn_camper_3', + roles=('default',), + ), + predator_prey__alley_hunt__predator_0=saved_model( + substrate='predator_prey__alley_hunt', + model='basic_predator_0', + roles=('predator',), + ), + predator_prey__alley_hunt__predator_1=saved_model( + substrate='predator_prey__alley_hunt', + model='basic_predator_1', + roles=('predator',), + ), + predator_prey__alley_hunt__predator_2=saved_model( + substrate='predator_prey__alley_hunt', + model='basic_predator_2', + roles=('predator',), + ), + predator_prey__alley_hunt__prey_0=saved_model( + substrate='predator_prey__alley_hunt', + model='basic_prey_0', + roles=('prey',), + ), + predator_prey__alley_hunt__prey_1=saved_model( + substrate='predator_prey__alley_hunt', + model='basic_prey_1', + roles=('prey',), + ), + predator_prey__alley_hunt__prey_2=saved_model( + substrate='predator_prey__alley_hunt', + model='basic_prey_2', + roles=('prey',), + ), + predator_prey__open__basic_predator_0=saved_model( + substrate='predator_prey__open', + model='basic_predator_0', + roles=('predator',), + ), + predator_prey__open__basic_predator_1=saved_model( + substrate='predator_prey__open', + model='basic_predator_1', + roles=('predator',), + ), + predator_prey__open__basic_prey_0=saved_model( + substrate='predator_prey__open', + model='basic_prey_0', + roles=('prey',), + ), + predator_prey__open__basic_prey_1=saved_model( + substrate='predator_prey__open', + model='basic_prey_1', + roles=('prey',), + ), + predator_prey__open__basic_prey_2=saved_model( + substrate='predator_prey__open', + model='basic_prey_2', + roles=('prey',), + ), + predator_prey__open__smart_prey_0=saved_model( + substrate='predator_prey__open', + model='smart_prey_0', + roles=('prey',), + ), + predator_prey__open__smart_prey_1=saved_model( + substrate='predator_prey__open', + model='smart_prey_1', + roles=('prey',), + ), + predator_prey__open__smart_prey_2=saved_model( + substrate='predator_prey__open', + model='smart_prey_2', + roles=('prey',), + ), + predator_prey__orchard__acorn_specialist_prey_0=saved_model( + substrate='predator_prey__orchard', + model='acorn_specialist_prey_0', + roles=('prey',), + ), + predator_prey__orchard__acorn_specialist_prey_1=saved_model( + substrate='predator_prey__orchard', + model='acorn_specialist_prey_1', + roles=('prey',), + ), + predator_prey__orchard__acorn_specialist_prey_2=saved_model( + substrate='predator_prey__orchard', + model='acorn_specialist_prey_2', + roles=('prey',), + ), + predator_prey__orchard__acorn_specialist_prey_3=saved_model( + substrate='predator_prey__orchard', + model='acorn_specialist_prey_3', + roles=('prey',), + ), + predator_prey__orchard__acorn_specialist_prey_4=saved_model( + substrate='predator_prey__orchard', + model='acorn_specialist_prey_4', + roles=('prey',), + ), + predator_prey__orchard__basic_predator_0=saved_model( + substrate='predator_prey__orchard', + model='basic_predator_0', + roles=('predator',), + ), + predator_prey__orchard__basic_predator_1=saved_model( + substrate='predator_prey__orchard', + model='basic_predator_1', + roles=('predator',), + ), + predator_prey__orchard__basic_predator_2=saved_model( + substrate='predator_prey__orchard', + model='basic_predator_2', + roles=('predator',), + ), + predator_prey__orchard__basic_prey_0=saved_model( + substrate='predator_prey__orchard', + model='basic_prey_0', + roles=('prey',), + ), + predator_prey__orchard__basic_prey_1=saved_model( + substrate='predator_prey__orchard', + model='basic_prey_1', + roles=('prey',), + ), + predator_prey__orchard__basic_prey_2=saved_model( + substrate='predator_prey__orchard', + model='basic_prey_2', + roles=('prey',), + ), + predator_prey__orchard__basic_prey_3=saved_model( + substrate='predator_prey__orchard', + model='basic_prey_3', + roles=('prey',), + ), + predator_prey__orchard__basic_prey_4=saved_model( + substrate='predator_prey__orchard', + model='basic_prey_4', + roles=('prey',), + ), + predator_prey__orchard__basic_prey_5=saved_model( + substrate='predator_prey__orchard', + model='basic_prey_5', + roles=('prey',), + ), + predator_prey__random_forest__basic_predator_0=saved_model( + substrate='predator_prey__random_forest', + model='basic_predator_0', + roles=('predator',), + ), + predator_prey__random_forest__basic_predator_1=saved_model( + substrate='predator_prey__random_forest', + model='basic_predator_1', + roles=('predator',), + ), + predator_prey__random_forest__basic_predator_2=saved_model( + substrate='predator_prey__random_forest', + model='basic_predator_2', + roles=('predator',), + ), + predator_prey__random_forest__basic_prey_0=saved_model( + substrate='predator_prey__random_forest', + model='basic_prey_0', + roles=('prey',), + ), + predator_prey__random_forest__basic_prey_1=saved_model( + substrate='predator_prey__random_forest', + model='basic_prey_1', + roles=('prey',), + ), + predator_prey__random_forest__basic_prey_2=saved_model( + substrate='predator_prey__random_forest', + model='basic_prey_2', + roles=('prey',), + ), + prisoners_dilemma_in_the_matrix__arena__puppet_cooperator_0=puppet( + substrate='prisoners_dilemma_in_the_matrix__arena', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.Specialist, + target=_RESOURCES['prisoners_dilemma_in_the_matrix__arena']['COOPERATE'], + margin=1, + ), + ), + prisoners_dilemma_in_the_matrix__arena__puppet_cooperator_margin_0=puppet( + substrate='prisoners_dilemma_in_the_matrix__arena', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.Specialist, + target=_RESOURCES['prisoners_dilemma_in_the_matrix__arena']['COOPERATE'], + margin=5, + ), + ), + prisoners_dilemma_in_the_matrix__arena__puppet_defector_0=puppet( + substrate='prisoners_dilemma_in_the_matrix__arena', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.Specialist, + target=_RESOURCES['prisoners_dilemma_in_the_matrix__arena']['DEFECT'], + margin=1, + ), + ), + prisoners_dilemma_in_the_matrix__arena__puppet_defector_margin_0=puppet( + substrate='prisoners_dilemma_in_the_matrix__arena', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.Specialist, + target=_RESOURCES['prisoners_dilemma_in_the_matrix__arena']['DEFECT'], + margin=5, + ), + ), + prisoners_dilemma_in_the_matrix__arena__puppet_grim_one_strike_0=puppet( + substrate='prisoners_dilemma_in_the_matrix__arena', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.GrimTrigger, + cooperate_resource=_RESOURCES['prisoners_dilemma_in_the_matrix__arena']['COOPERATE'], + defect_resource=_RESOURCES['prisoners_dilemma_in_the_matrix__arena']['DEFECT'], + threshold=1, + margin=1, + ), + ), + prisoners_dilemma_in_the_matrix__arena__puppet_grim_one_strike_margin_0=puppet( + substrate='prisoners_dilemma_in_the_matrix__arena', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.GrimTrigger, + cooperate_resource=_RESOURCES['prisoners_dilemma_in_the_matrix__arena']['COOPERATE'], + defect_resource=_RESOURCES['prisoners_dilemma_in_the_matrix__arena']['DEFECT'], + threshold=1, + margin=5, + ), + ), + prisoners_dilemma_in_the_matrix__arena__puppet_grim_three_strikes_0=puppet( + substrate='prisoners_dilemma_in_the_matrix__arena', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.GrimTrigger, + cooperate_resource=_RESOURCES['prisoners_dilemma_in_the_matrix__arena']['COOPERATE'], + defect_resource=_RESOURCES['prisoners_dilemma_in_the_matrix__arena']['DEFECT'], + threshold=3, + margin=1, + ), + ), + prisoners_dilemma_in_the_matrix__arena__puppet_grim_three_strikes_margin_0=puppet( + substrate='prisoners_dilemma_in_the_matrix__arena', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.GrimTrigger, + cooperate_resource=_RESOURCES['prisoners_dilemma_in_the_matrix__arena']['COOPERATE'], + defect_resource=_RESOURCES['prisoners_dilemma_in_the_matrix__arena']['DEFECT'], + threshold=3, + margin=5, + ), + ), + prisoners_dilemma_in_the_matrix__arena__puppet_grim_two_strikes_0=puppet( + substrate='prisoners_dilemma_in_the_matrix__arena', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.GrimTrigger, + cooperate_resource=_RESOURCES['prisoners_dilemma_in_the_matrix__arena']['COOPERATE'], + defect_resource=_RESOURCES['prisoners_dilemma_in_the_matrix__arena']['DEFECT'], + threshold=2, + margin=1, + ), + ), + prisoners_dilemma_in_the_matrix__arena__puppet_grim_two_strikes_margin_0=puppet( + substrate='prisoners_dilemma_in_the_matrix__arena', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.GrimTrigger, + cooperate_resource=_RESOURCES['prisoners_dilemma_in_the_matrix__arena']['COOPERATE'], + defect_resource=_RESOURCES['prisoners_dilemma_in_the_matrix__arena']['DEFECT'], + threshold=2, + margin=5, + ), + ), + prisoners_dilemma_in_the_matrix__repeated__puppet_cooperator_margin_0=puppet( + substrate='prisoners_dilemma_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.Specialist, + target=_RESOURCES['prisoners_dilemma_in_the_matrix__repeated']['COOPERATE'], + margin=5, + ), + ), + prisoners_dilemma_in_the_matrix__repeated__puppet_cooperator_margin_1=puppet( + substrate='prisoners_dilemma_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.Specialist, + target=_RESOURCES['prisoners_dilemma_in_the_matrix__repeated']['COOPERATE'], + margin=7, + ), + ), + prisoners_dilemma_in_the_matrix__repeated__puppet_corrigible_0=puppet( + substrate='prisoners_dilemma_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.Corrigible, + cooperate_resource=_RESOURCES['prisoners_dilemma_in_the_matrix__repeated']['COOPERATE'], + defect_resource=_RESOURCES['prisoners_dilemma_in_the_matrix__repeated']['DEFECT'], + threshold=3, + margin=5, + tremble_probability=0, + ), + ), + prisoners_dilemma_in_the_matrix__repeated__puppet_corrigible_tremble_0=puppet( + substrate='prisoners_dilemma_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.Corrigible, + cooperate_resource=_RESOURCES['prisoners_dilemma_in_the_matrix__repeated']['COOPERATE'], + defect_resource=_RESOURCES['prisoners_dilemma_in_the_matrix__repeated']['DEFECT'], + threshold=3, + margin=5, + tremble_probability=0.15, + ), + ), + prisoners_dilemma_in_the_matrix__repeated__puppet_defector_margin_0=puppet( + substrate='prisoners_dilemma_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.Specialist, + target=_RESOURCES['prisoners_dilemma_in_the_matrix__repeated']['DEFECT'], + margin=5, + ), + ), + prisoners_dilemma_in_the_matrix__repeated__puppet_defector_margin_1=puppet( + substrate='prisoners_dilemma_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.Specialist, + target=_RESOURCES['prisoners_dilemma_in_the_matrix__repeated']['DEFECT'], + margin=7, + ), + ), + prisoners_dilemma_in_the_matrix__repeated__puppet_flip_0=puppet( + substrate='prisoners_dilemma_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.ScheduledFlip, + initial_target=_RESOURCES['prisoners_dilemma_in_the_matrix__repeated']['COOPERATE'], + final_target=_RESOURCES['prisoners_dilemma_in_the_matrix__repeated']['DEFECT'], + threshold=3, + initial_margin=1, + final_margin=5, + ), + ), + prisoners_dilemma_in_the_matrix__repeated__puppet_grim_one_strike_margin_0=puppet( + substrate='prisoners_dilemma_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.GrimTrigger, + cooperate_resource=_RESOURCES['prisoners_dilemma_in_the_matrix__repeated']['COOPERATE'], + defect_resource=_RESOURCES['prisoners_dilemma_in_the_matrix__repeated']['DEFECT'], + threshold=1, + margin=5, + ), + ), + prisoners_dilemma_in_the_matrix__repeated__puppet_grim_one_strike_margin_1=puppet( + substrate='prisoners_dilemma_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.GrimTrigger, + cooperate_resource=_RESOURCES['prisoners_dilemma_in_the_matrix__repeated']['COOPERATE'], + defect_resource=_RESOURCES['prisoners_dilemma_in_the_matrix__repeated']['DEFECT'], + threshold=1, + margin=7, + ), + ), + prisoners_dilemma_in_the_matrix__repeated__puppet_grim_two_strikes_margin_0=puppet( + substrate='prisoners_dilemma_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.GrimTrigger, + cooperate_resource=_RESOURCES['prisoners_dilemma_in_the_matrix__repeated']['COOPERATE'], + defect_resource=_RESOURCES['prisoners_dilemma_in_the_matrix__repeated']['DEFECT'], + threshold=2, + margin=5, + ), + ), + prisoners_dilemma_in_the_matrix__repeated__puppet_grim_two_strikes_margin_1=puppet( + substrate='prisoners_dilemma_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.GrimTrigger, + cooperate_resource=_RESOURCES['prisoners_dilemma_in_the_matrix__repeated']['COOPERATE'], + defect_resource=_RESOURCES['prisoners_dilemma_in_the_matrix__repeated']['DEFECT'], + threshold=2, + margin=7, + ), + ), + prisoners_dilemma_in_the_matrix__repeated__puppet_tft_margin_0=puppet( + substrate='prisoners_dilemma_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.TitForTat, + cooperate_resource=_RESOURCES['prisoners_dilemma_in_the_matrix__repeated']['COOPERATE'], + defect_resource=_RESOURCES['prisoners_dilemma_in_the_matrix__repeated']['DEFECT'], + tremble_probability=0, + margin=5, + ), + ), + prisoners_dilemma_in_the_matrix__repeated__puppet_tft_margin_1=puppet( + substrate='prisoners_dilemma_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.TitForTat, + cooperate_resource=_RESOURCES['prisoners_dilemma_in_the_matrix__repeated']['COOPERATE'], + defect_resource=_RESOURCES['prisoners_dilemma_in_the_matrix__repeated']['DEFECT'], + tremble_probability=0, + margin=7, + ), + ), + prisoners_dilemma_in_the_matrix__repeated__puppet_tft_tremble_margin_0=puppet( + substrate='prisoners_dilemma_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.TitForTat, + cooperate_resource=_RESOURCES['prisoners_dilemma_in_the_matrix__repeated']['COOPERATE'], + defect_resource=_RESOURCES['prisoners_dilemma_in_the_matrix__repeated']['DEFECT'], + tremble_probability=0.15, + margin=5, + ), + ), + prisoners_dilemma_in_the_matrix__repeated__puppet_tft_tremble_margin_1=puppet( + substrate='prisoners_dilemma_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.TitForTat, + cooperate_resource=_RESOURCES['prisoners_dilemma_in_the_matrix__repeated']['COOPERATE'], + defect_resource=_RESOURCES['prisoners_dilemma_in_the_matrix__repeated']['DEFECT'], + tremble_probability=0.15, + margin=7, + ), + ), + pure_coordination_in_the_matrix__arena__flip_a2b_0=puppet( + substrate='pure_coordination_in_the_matrix__arena', + model='puppet_0', + puppeteer_builder=functools.partial( + in_the_matrix.ScheduledFlip, + threshold=5, + initial_target=_RESOURCES['pure_coordination_in_the_matrix__arena']['RED'], + final_target=_RESOURCES['pure_coordination_in_the_matrix__arena']['GREEN'], + initial_margin=1, + final_margin=1, + ), + ), + pure_coordination_in_the_matrix__arena__flip_a2c_0=puppet( + substrate='pure_coordination_in_the_matrix__arena', + model='puppet_0', + puppeteer_builder=functools.partial( + in_the_matrix.ScheduledFlip, + threshold=5, + initial_target=_RESOURCES['pure_coordination_in_the_matrix__arena']['RED'], + final_target=_RESOURCES['pure_coordination_in_the_matrix__arena']['BLUE'], + initial_margin=1, + final_margin=1, + ), + ), + pure_coordination_in_the_matrix__arena__flip_b2a_0=puppet( + substrate='pure_coordination_in_the_matrix__arena', + model='puppet_0', + puppeteer_builder=functools.partial( + in_the_matrix.ScheduledFlip, + threshold=5, + initial_target=_RESOURCES['pure_coordination_in_the_matrix__arena']['GREEN'], + final_target=_RESOURCES['pure_coordination_in_the_matrix__arena']['RED'], + initial_margin=1, + final_margin=1, + ), + ), + pure_coordination_in_the_matrix__arena__flip_b2c_0=puppet( + substrate='pure_coordination_in_the_matrix__arena', + model='puppet_0', + puppeteer_builder=functools.partial( + in_the_matrix.ScheduledFlip, + threshold=5, + initial_target=_RESOURCES['pure_coordination_in_the_matrix__arena']['GREEN'], + final_target=_RESOURCES['pure_coordination_in_the_matrix__arena']['BLUE'], + initial_margin=1, + final_margin=1, + ), + ), + pure_coordination_in_the_matrix__arena__flip_c2a_0=puppet( + substrate='pure_coordination_in_the_matrix__arena', + model='puppet_0', + puppeteer_builder=functools.partial( + in_the_matrix.ScheduledFlip, + threshold=5, + initial_target=_RESOURCES['pure_coordination_in_the_matrix__arena']['BLUE'], + final_target=_RESOURCES['pure_coordination_in_the_matrix__arena']['RED'], + initial_margin=1, + final_margin=1, + ), + ), + pure_coordination_in_the_matrix__arena__flip_c2b_0=puppet( + substrate='pure_coordination_in_the_matrix__arena', + model='puppet_0', + puppeteer_builder=functools.partial( + in_the_matrix.ScheduledFlip, + threshold=5, + initial_target=_RESOURCES['pure_coordination_in_the_matrix__arena']['BLUE'], + final_target=_RESOURCES['pure_coordination_in_the_matrix__arena']['GREEN'], + initial_margin=1, + final_margin=1, + ), + ), + pure_coordination_in_the_matrix__arena__pure_a_0=puppet( + substrate='pure_coordination_in_the_matrix__arena', + model='puppet_0', + puppeteer_builder=functools.partial( + in_the_matrix.Specialist, + target=_RESOURCES['pure_coordination_in_the_matrix__arena']['RED'], + margin=1, + ), + ), + pure_coordination_in_the_matrix__arena__pure_b_0=puppet( + substrate='pure_coordination_in_the_matrix__arena', + model='puppet_0', + puppeteer_builder=functools.partial( + in_the_matrix.Specialist, + target=_RESOURCES['pure_coordination_in_the_matrix__arena']['GREEN'], + margin=1, + ), + ), + pure_coordination_in_the_matrix__arena__pure_c_0=puppet( + substrate='pure_coordination_in_the_matrix__arena', + model='puppet_0', + puppeteer_builder=functools.partial( + in_the_matrix.Specialist, + target=_RESOURCES['pure_coordination_in_the_matrix__arena']['BLUE'], + margin=1, + ), + ), + pure_coordination_in_the_matrix__arena__pure_greedy_a_0=puppet( + substrate='pure_coordination_in_the_matrix__arena', + model='puppet_0', + puppeteer_builder=functools.partial( + in_the_matrix.Specialist, + target=_RESOURCES['pure_coordination_in_the_matrix__arena']['RED'], + margin=6, + ), + ), + pure_coordination_in_the_matrix__arena__pure_greedy_b_0=puppet( + substrate='pure_coordination_in_the_matrix__arena', + model='puppet_0', + puppeteer_builder=functools.partial( + in_the_matrix.Specialist, + target=_RESOURCES['pure_coordination_in_the_matrix__arena']['GREEN'], + margin=6, + ), + ), + pure_coordination_in_the_matrix__arena__pure_greedy_c_0=puppet( + substrate='pure_coordination_in_the_matrix__arena', + model='puppet_0', + puppeteer_builder=functools.partial( + in_the_matrix.Specialist, + target=_RESOURCES['pure_coordination_in_the_matrix__arena']['BLUE'], + margin=6, + ), + ), + pure_coordination_in_the_matrix__arena__resp2prev_0=puppet( + substrate='pure_coordination_in_the_matrix__arena', + model='puppet_0', + puppeteer_builder=functools.partial( + coordination_in_the_matrix.CoordinateWithPrevious, + resources=( + _RESOURCES['pure_coordination_in_the_matrix__arena']['RED'], + _RESOURCES['pure_coordination_in_the_matrix__arena']['GREEN'], + _RESOURCES['pure_coordination_in_the_matrix__arena']['BLUE'], + ), + margin=1, + ), + ), + pure_coordination_in_the_matrix__arena__resp2prev_greedy_0=puppet( + substrate='pure_coordination_in_the_matrix__arena', + model='puppet_0', + puppeteer_builder=functools.partial( + coordination_in_the_matrix.CoordinateWithPrevious, + resources=( + _RESOURCES['pure_coordination_in_the_matrix__arena']['RED'], + _RESOURCES['pure_coordination_in_the_matrix__arena']['GREEN'], + _RESOURCES['pure_coordination_in_the_matrix__arena']['BLUE'], + ), + margin=6, + ), + ), + pure_coordination_in_the_matrix__repeated__flip_a2b_0=puppet( + substrate='pure_coordination_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.ScheduledFlip, + threshold=4, + initial_target=_RESOURCES['pure_coordination_in_the_matrix__repeated']['RED'], + final_target=_RESOURCES['pure_coordination_in_the_matrix__repeated']['GREEN'], + initial_margin=5, + final_margin=5, + ), + ), + pure_coordination_in_the_matrix__repeated__flip_a2b_1=puppet( + substrate='pure_coordination_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.ScheduledFlip, + threshold=12, + initial_target=_RESOURCES['pure_coordination_in_the_matrix__repeated']['RED'], + final_target=_RESOURCES['pure_coordination_in_the_matrix__repeated']['GREEN'], + initial_margin=5, + final_margin=5, + ), + ), + pure_coordination_in_the_matrix__repeated__flip_a2c_0=puppet( + substrate='pure_coordination_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.ScheduledFlip, + threshold=4, + initial_target=_RESOURCES['pure_coordination_in_the_matrix__repeated']['RED'], + final_target=_RESOURCES['pure_coordination_in_the_matrix__repeated']['BLUE'], + initial_margin=5, + final_margin=5, + ), + ), + pure_coordination_in_the_matrix__repeated__flip_a2c_1=puppet( + substrate='pure_coordination_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.ScheduledFlip, + threshold=12, + initial_target=_RESOURCES['pure_coordination_in_the_matrix__repeated']['RED'], + final_target=_RESOURCES['pure_coordination_in_the_matrix__repeated']['BLUE'], + initial_margin=5, + final_margin=5, + ), + ), + pure_coordination_in_the_matrix__repeated__flip_b2a_0=puppet( + substrate='pure_coordination_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.ScheduledFlip, + threshold=4, + initial_target=_RESOURCES['pure_coordination_in_the_matrix__repeated']['GREEN'], + final_target=_RESOURCES['pure_coordination_in_the_matrix__repeated']['RED'], + initial_margin=5, + final_margin=5, + ), + ), + pure_coordination_in_the_matrix__repeated__flip_b2a_1=puppet( + substrate='pure_coordination_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.ScheduledFlip, + threshold=12, + initial_target=_RESOURCES['pure_coordination_in_the_matrix__repeated']['GREEN'], + final_target=_RESOURCES['pure_coordination_in_the_matrix__repeated']['RED'], + initial_margin=5, + final_margin=5, + ), + ), + pure_coordination_in_the_matrix__repeated__flip_b2c_0=puppet( + substrate='pure_coordination_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.ScheduledFlip, + threshold=4, + initial_target=_RESOURCES['pure_coordination_in_the_matrix__repeated']['GREEN'], + final_target=_RESOURCES['pure_coordination_in_the_matrix__repeated']['BLUE'], + initial_margin=5, + final_margin=5, + ), + ), + pure_coordination_in_the_matrix__repeated__flip_b2c_1=puppet( + substrate='pure_coordination_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.ScheduledFlip, + threshold=12, + initial_target=_RESOURCES['pure_coordination_in_the_matrix__repeated']['GREEN'], + final_target=_RESOURCES['pure_coordination_in_the_matrix__repeated']['BLUE'], + initial_margin=5, + final_margin=5, + ), + ), + pure_coordination_in_the_matrix__repeated__flip_c2a_0=puppet( + substrate='pure_coordination_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.ScheduledFlip, + threshold=4, + initial_target=_RESOURCES['pure_coordination_in_the_matrix__repeated']['BLUE'], + final_target=_RESOURCES['pure_coordination_in_the_matrix__repeated']['RED'], + initial_margin=5, + final_margin=5, + ), + ), + pure_coordination_in_the_matrix__repeated__flip_c2a_1=puppet( + substrate='pure_coordination_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.ScheduledFlip, + threshold=12, + initial_target=_RESOURCES['pure_coordination_in_the_matrix__repeated']['BLUE'], + final_target=_RESOURCES['pure_coordination_in_the_matrix__repeated']['RED'], + initial_margin=5, + final_margin=5, + ), + ), + pure_coordination_in_the_matrix__repeated__flip_c2b_0=puppet( + substrate='pure_coordination_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.ScheduledFlip, + threshold=4, + initial_target=_RESOURCES['pure_coordination_in_the_matrix__repeated']['BLUE'], + final_target=_RESOURCES['pure_coordination_in_the_matrix__repeated']['GREEN'], + initial_margin=5, + final_margin=5, + ), + ), + pure_coordination_in_the_matrix__repeated__flip_c2b_1=puppet( + substrate='pure_coordination_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.ScheduledFlip, + threshold=12, + initial_target=_RESOURCES['pure_coordination_in_the_matrix__repeated']['BLUE'], + final_target=_RESOURCES['pure_coordination_in_the_matrix__repeated']['GREEN'], + initial_margin=5, + final_margin=5, + ), + ), + pure_coordination_in_the_matrix__repeated__pure_a_margin_0=puppet( + substrate='pure_coordination_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.Specialist, + target=_RESOURCES['pure_coordination_in_the_matrix__repeated']['RED'], + margin=5, + ), + ), + pure_coordination_in_the_matrix__repeated__pure_b_margin_0=puppet( + substrate='pure_coordination_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.Specialist, + target=_RESOURCES['pure_coordination_in_the_matrix__repeated']['GREEN'], + margin=5, + ), + ), + pure_coordination_in_the_matrix__repeated__pure_c_margin_0=puppet( + substrate='pure_coordination_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.Specialist, + target=_RESOURCES['pure_coordination_in_the_matrix__repeated']['BLUE'], + margin=5, + ), + ), + pure_coordination_in_the_matrix__repeated__resp2prev_margin_0=puppet( + substrate='pure_coordination_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + coordination_in_the_matrix.CoordinateWithPrevious, + resources=( + _RESOURCES['pure_coordination_in_the_matrix__repeated']['RED'], + _RESOURCES['pure_coordination_in_the_matrix__repeated']['GREEN'], + _RESOURCES['pure_coordination_in_the_matrix__repeated']['BLUE'], + ), + margin=5, + ), + ), + rationalizable_coordination_in_the_matrix__arena__flip_a2b_0=puppet( + substrate='rationalizable_coordination_in_the_matrix__arena', + model='puppet_0', + puppeteer_builder=functools.partial( + in_the_matrix.ScheduledFlip, + threshold=5, + initial_target=_RESOURCES['rationalizable_coordination_in_the_matrix__arena']['YELLOW'], + final_target=_RESOURCES['rationalizable_coordination_in_the_matrix__arena']['VIOLET'], + initial_margin=1, + final_margin=1, + ), + ), + rationalizable_coordination_in_the_matrix__arena__flip_a2c_0=puppet( + substrate='rationalizable_coordination_in_the_matrix__arena', + model='puppet_0', + puppeteer_builder=functools.partial( + in_the_matrix.ScheduledFlip, + threshold=5, + initial_target=_RESOURCES['rationalizable_coordination_in_the_matrix__arena']['YELLOW'], + final_target=_RESOURCES['rationalizable_coordination_in_the_matrix__arena']['CYAN'], + initial_margin=1, + final_margin=1, + ), + ), + rationalizable_coordination_in_the_matrix__arena__flip_b2a_0=puppet( + substrate='rationalizable_coordination_in_the_matrix__arena', + model='puppet_0', + puppeteer_builder=functools.partial( + in_the_matrix.ScheduledFlip, + threshold=5, + initial_target=_RESOURCES['rationalizable_coordination_in_the_matrix__arena']['VIOLET'], + final_target=_RESOURCES['rationalizable_coordination_in_the_matrix__arena']['YELLOW'], + initial_margin=1, + final_margin=1, + ), + ), + rationalizable_coordination_in_the_matrix__arena__flip_b2c_0=puppet( + substrate='rationalizable_coordination_in_the_matrix__arena', + model='puppet_0', + puppeteer_builder=functools.partial( + in_the_matrix.ScheduledFlip, + threshold=5, + initial_target=_RESOURCES['rationalizable_coordination_in_the_matrix__arena']['VIOLET'], + final_target=_RESOURCES['rationalizable_coordination_in_the_matrix__arena']['CYAN'], + initial_margin=1, + final_margin=1, + ), + ), + rationalizable_coordination_in_the_matrix__arena__flip_c2a_0=puppet( + substrate='rationalizable_coordination_in_the_matrix__arena', + model='puppet_0', + puppeteer_builder=functools.partial( + in_the_matrix.ScheduledFlip, + threshold=5, + initial_target=_RESOURCES['rationalizable_coordination_in_the_matrix__arena']['CYAN'], + final_target=_RESOURCES['rationalizable_coordination_in_the_matrix__arena']['YELLOW'], + initial_margin=1, + final_margin=1, + ), + ), + rationalizable_coordination_in_the_matrix__arena__flip_c2b_0=puppet( + substrate='rationalizable_coordination_in_the_matrix__arena', + model='puppet_0', + puppeteer_builder=functools.partial( + in_the_matrix.ScheduledFlip, + threshold=5, + initial_target=_RESOURCES['rationalizable_coordination_in_the_matrix__arena']['CYAN'], + final_target=_RESOURCES['rationalizable_coordination_in_the_matrix__arena']['VIOLET'], + initial_margin=1, + final_margin=1, + ), + ), + rationalizable_coordination_in_the_matrix__arena__pure_a_0=puppet( + substrate='rationalizable_coordination_in_the_matrix__arena', + model='puppet_0', + puppeteer_builder=functools.partial( + in_the_matrix.Specialist, + target=_RESOURCES['rationalizable_coordination_in_the_matrix__arena']['YELLOW'], + margin=1, + ), + ), + rationalizable_coordination_in_the_matrix__arena__pure_b_0=puppet( + substrate='rationalizable_coordination_in_the_matrix__arena', + model='puppet_0', + puppeteer_builder=functools.partial( + in_the_matrix.Specialist, + target=_RESOURCES['rationalizable_coordination_in_the_matrix__arena']['VIOLET'], + margin=1, + ), + ), + rationalizable_coordination_in_the_matrix__arena__pure_c_0=puppet( + substrate='rationalizable_coordination_in_the_matrix__arena', + model='puppet_0', + puppeteer_builder=functools.partial( + in_the_matrix.Specialist, + target=_RESOURCES['rationalizable_coordination_in_the_matrix__arena']['CYAN'], + margin=1, + ), + ), + rationalizable_coordination_in_the_matrix__arena__pure_greedy_a_0=puppet( + substrate='rationalizable_coordination_in_the_matrix__arena', + model='puppet_0', + puppeteer_builder=functools.partial( + in_the_matrix.Specialist, + target=_RESOURCES['rationalizable_coordination_in_the_matrix__arena']['YELLOW'], + margin=6, + ), + ), + rationalizable_coordination_in_the_matrix__arena__pure_greedy_b_0=puppet( + substrate='rationalizable_coordination_in_the_matrix__arena', + model='puppet_0', + puppeteer_builder=functools.partial( + in_the_matrix.Specialist, + target=_RESOURCES['rationalizable_coordination_in_the_matrix__arena']['VIOLET'], + margin=6, + ), + ), + rationalizable_coordination_in_the_matrix__arena__pure_greedy_c_0=puppet( + substrate='rationalizable_coordination_in_the_matrix__arena', + model='puppet_0', + puppeteer_builder=functools.partial( + in_the_matrix.Specialist, + target=_RESOURCES['rationalizable_coordination_in_the_matrix__arena']['CYAN'], + margin=6, + ), + ), + rationalizable_coordination_in_the_matrix__arena__resp2prev_0=puppet( + substrate='rationalizable_coordination_in_the_matrix__arena', + model='puppet_0', + puppeteer_builder=functools.partial( + coordination_in_the_matrix.CoordinateWithPrevious, + resources=( + _RESOURCES['rationalizable_coordination_in_the_matrix__arena']['YELLOW'], + _RESOURCES['rationalizable_coordination_in_the_matrix__arena']['VIOLET'], + _RESOURCES['rationalizable_coordination_in_the_matrix__arena']['CYAN'], + ), + margin=1, + ), + ), + rationalizable_coordination_in_the_matrix__arena__resp2prev_greedy_0=puppet( + substrate='rationalizable_coordination_in_the_matrix__arena', + model='puppet_0', + puppeteer_builder=functools.partial( + coordination_in_the_matrix.CoordinateWithPrevious, + resources=( + _RESOURCES['rationalizable_coordination_in_the_matrix__arena']['YELLOW'], + _RESOURCES['rationalizable_coordination_in_the_matrix__arena']['VIOLET'], + _RESOURCES['rationalizable_coordination_in_the_matrix__arena']['CYAN'], + ), + margin=6, + ), + ), + rationalizable_coordination_in_the_matrix__repeated__flip_a2b_0=puppet( + substrate='rationalizable_coordination_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.ScheduledFlip, + threshold=4, + initial_target=_RESOURCES['rationalizable_coordination_in_the_matrix__repeated']['YELLOW'], + final_target=_RESOURCES['rationalizable_coordination_in_the_matrix__repeated']['VIOLET'], + initial_margin=5, + final_margin=5, + ), + ), + rationalizable_coordination_in_the_matrix__repeated__flip_a2b_1=puppet( + substrate='rationalizable_coordination_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.ScheduledFlip, + threshold=12, + initial_target=_RESOURCES['rationalizable_coordination_in_the_matrix__repeated']['YELLOW'], + final_target=_RESOURCES['rationalizable_coordination_in_the_matrix__repeated']['VIOLET'], + initial_margin=5, + final_margin=5, + ), + ), + rationalizable_coordination_in_the_matrix__repeated__flip_a2c_0=puppet( + substrate='rationalizable_coordination_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.ScheduledFlip, + threshold=4, + initial_target=_RESOURCES['rationalizable_coordination_in_the_matrix__repeated']['YELLOW'], + final_target=_RESOURCES['rationalizable_coordination_in_the_matrix__repeated']['CYAN'], + initial_margin=5, + final_margin=5, + ), + ), + rationalizable_coordination_in_the_matrix__repeated__flip_a2c_1=puppet( + substrate='rationalizable_coordination_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.ScheduledFlip, + threshold=12, + initial_target=_RESOURCES['rationalizable_coordination_in_the_matrix__repeated']['YELLOW'], + final_target=_RESOURCES['rationalizable_coordination_in_the_matrix__repeated']['CYAN'], + initial_margin=5, + final_margin=5, + ), + ), + rationalizable_coordination_in_the_matrix__repeated__flip_b2a_0=puppet( + substrate='rationalizable_coordination_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.ScheduledFlip, + threshold=4, + initial_target=_RESOURCES['rationalizable_coordination_in_the_matrix__repeated']['VIOLET'], + final_target=_RESOURCES['rationalizable_coordination_in_the_matrix__repeated']['YELLOW'], + initial_margin=5, + final_margin=5, + ), + ), + rationalizable_coordination_in_the_matrix__repeated__flip_b2a_1=puppet( + substrate='rationalizable_coordination_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.ScheduledFlip, + threshold=12, + initial_target=_RESOURCES['rationalizable_coordination_in_the_matrix__repeated']['VIOLET'], + final_target=_RESOURCES['rationalizable_coordination_in_the_matrix__repeated']['YELLOW'], + initial_margin=5, + final_margin=5, + ), + ), + rationalizable_coordination_in_the_matrix__repeated__flip_b2c_0=puppet( + substrate='rationalizable_coordination_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.ScheduledFlip, + threshold=4, + initial_target=_RESOURCES['rationalizable_coordination_in_the_matrix__repeated']['VIOLET'], + final_target=_RESOURCES['rationalizable_coordination_in_the_matrix__repeated']['CYAN'], + initial_margin=5, + final_margin=5, + ), + ), + rationalizable_coordination_in_the_matrix__repeated__flip_b2c_1=puppet( + substrate='rationalizable_coordination_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.ScheduledFlip, + threshold=12, + initial_target=_RESOURCES['rationalizable_coordination_in_the_matrix__repeated']['VIOLET'], + final_target=_RESOURCES['rationalizable_coordination_in_the_matrix__repeated']['CYAN'], + initial_margin=5, + final_margin=5, + ), + ), + rationalizable_coordination_in_the_matrix__repeated__flip_c2a_0=puppet( + substrate='rationalizable_coordination_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.ScheduledFlip, + threshold=4, + initial_target=_RESOURCES['rationalizable_coordination_in_the_matrix__repeated']['CYAN'], + final_target=_RESOURCES['rationalizable_coordination_in_the_matrix__repeated']['YELLOW'], + initial_margin=5, + final_margin=5, + ), + ), + rationalizable_coordination_in_the_matrix__repeated__flip_c2a_1=puppet( + substrate='rationalizable_coordination_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.ScheduledFlip, + threshold=12, + initial_target=_RESOURCES['rationalizable_coordination_in_the_matrix__repeated']['CYAN'], + final_target=_RESOURCES['rationalizable_coordination_in_the_matrix__repeated']['YELLOW'], + initial_margin=5, + final_margin=5, + ), + ), + rationalizable_coordination_in_the_matrix__repeated__flip_c2b_0=puppet( + substrate='rationalizable_coordination_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.ScheduledFlip, + threshold=4, + initial_target=_RESOURCES['rationalizable_coordination_in_the_matrix__repeated']['CYAN'], + final_target=_RESOURCES['rationalizable_coordination_in_the_matrix__repeated']['VIOLET'], + initial_margin=5, + final_margin=5, + ), + ), + rationalizable_coordination_in_the_matrix__repeated__flip_c2b_1=puppet( + substrate='rationalizable_coordination_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.ScheduledFlip, + threshold=12, + initial_target=_RESOURCES['rationalizable_coordination_in_the_matrix__repeated']['CYAN'], + final_target=_RESOURCES['rationalizable_coordination_in_the_matrix__repeated']['VIOLET'], + initial_margin=5, + final_margin=5, + ), + ), + rationalizable_coordination_in_the_matrix__repeated__pure_a_margin_0=puppet( + substrate='rationalizable_coordination_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.Specialist, + target=_RESOURCES['rationalizable_coordination_in_the_matrix__repeated']['YELLOW'], + margin=5, + ), + ), + rationalizable_coordination_in_the_matrix__repeated__pure_b_margin_0=puppet( + substrate='rationalizable_coordination_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.Specialist, + target=_RESOURCES['rationalizable_coordination_in_the_matrix__repeated']['VIOLET'], + margin=5, + ), + ), + rationalizable_coordination_in_the_matrix__repeated__pure_c_margin_0=puppet( + substrate='rationalizable_coordination_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.Specialist, + target=_RESOURCES['rationalizable_coordination_in_the_matrix__repeated']['CYAN'], + margin=5, + ), + ), + rationalizable_coordination_in_the_matrix__repeated__resp2prev_margin_0=puppet( + substrate='rationalizable_coordination_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + coordination_in_the_matrix.CoordinateWithPrevious, + resources=( + _RESOURCES['rationalizable_coordination_in_the_matrix__repeated']['YELLOW'], + _RESOURCES['rationalizable_coordination_in_the_matrix__repeated']['VIOLET'], + _RESOURCES['rationalizable_coordination_in_the_matrix__repeated']['CYAN'], + ), + margin=5, + ), + ), + running_with_scissors_in_the_matrix__arena__flip_p2r_0=puppet( + substrate='running_with_scissors_in_the_matrix__arena', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.ScheduledFlip, + threshold=3, + initial_target=_RESOURCES['running_with_scissors_in_the_matrix__arena']['PAPER'], + final_target=_RESOURCES['running_with_scissors_in_the_matrix__arena']['SCISSORS'], + initial_margin=1, + final_margin=5, + ), + ), + running_with_scissors_in_the_matrix__arena__flip_r2s_0=puppet( + substrate='running_with_scissors_in_the_matrix__arena', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.ScheduledFlip, + threshold=3, + initial_target=_RESOURCES['running_with_scissors_in_the_matrix__arena']['ROCK'], + final_target=_RESOURCES['running_with_scissors_in_the_matrix__arena']['SCISSORS'], + initial_margin=1, + final_margin=5, + ), + ), + running_with_scissors_in_the_matrix__arena__flip_s2p_0=puppet( + substrate='running_with_scissors_in_the_matrix__arena', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.ScheduledFlip, + threshold=3, + initial_target=_RESOURCES['running_with_scissors_in_the_matrix__arena']['SCISSORS'], + final_target=_RESOURCES['running_with_scissors_in_the_matrix__arena']['PAPER'], + initial_margin=1, + final_margin=5, + ), + ), + running_with_scissors_in_the_matrix__arena__free_0=saved_model( + substrate='running_with_scissors_in_the_matrix__arena', + model='free_0', + ), + running_with_scissors_in_the_matrix__arena__paper_margin_0=puppet( + substrate='running_with_scissors_in_the_matrix__arena', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.Specialist, + target=_RESOURCES['running_with_scissors_in_the_matrix__arena']['PAPER'], + margin=3, + ), + ), + running_with_scissors_in_the_matrix__arena__paper_margin_1=puppet( + substrate='running_with_scissors_in_the_matrix__arena', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.Specialist, + target=_RESOURCES['running_with_scissors_in_the_matrix__arena']['PAPER'], + margin=5, + ), + ), + running_with_scissors_in_the_matrix__arena__rock_margin_0=puppet( + substrate='running_with_scissors_in_the_matrix__arena', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.Specialist, + target=_RESOURCES['running_with_scissors_in_the_matrix__arena']['ROCK'], + margin=3, + ), + ), + running_with_scissors_in_the_matrix__arena__rock_margin_1=puppet( + substrate='running_with_scissors_in_the_matrix__arena', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.Specialist, + target=_RESOURCES['running_with_scissors_in_the_matrix__arena']['ROCK'], + margin=5, + ), + ), + running_with_scissors_in_the_matrix__arena__scissors_margin_0=puppet( + substrate='running_with_scissors_in_the_matrix__arena', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.Specialist, + target=_RESOURCES['running_with_scissors_in_the_matrix__arena']['SCISSORS'], + margin=3, + ), + ), + running_with_scissors_in_the_matrix__arena__scissors_margin_1=puppet( + substrate='running_with_scissors_in_the_matrix__arena', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.Specialist, + target=_RESOURCES['running_with_scissors_in_the_matrix__arena']['SCISSORS'], + margin=5, + ), + ), + running_with_scissors_in_the_matrix__one_shot__paper_margin_0=puppet( + substrate='running_with_scissors_in_the_matrix__one_shot', + model='puppet_0', + puppeteer_builder=functools.partial( + in_the_matrix.Specialist, + target=_RESOURCES['running_with_scissors_in_the_matrix__one_shot']['PAPER'], + margin=3, + ), + ), + running_with_scissors_in_the_matrix__one_shot__rock_margin_0=puppet( + substrate='running_with_scissors_in_the_matrix__one_shot', + model='puppet_0', + puppeteer_builder=functools.partial( + in_the_matrix.Specialist, + target=_RESOURCES['running_with_scissors_in_the_matrix__one_shot']['ROCK'], + margin=3, + ), + ), + running_with_scissors_in_the_matrix__one_shot__scissors_margin_0=puppet( + substrate='running_with_scissors_in_the_matrix__one_shot', + model='puppet_0', + puppeteer_builder=functools.partial( + in_the_matrix.Specialist, + target=_RESOURCES['running_with_scissors_in_the_matrix__one_shot']['SCISSORS'], + margin=3, + ), + ), + running_with_scissors_in_the_matrix__repeated__flip_p2r_0=puppet( + substrate='running_with_scissors_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.ScheduledFlip, + threshold=3, + initial_target=_RESOURCES['running_with_scissors_in_the_matrix__repeated']['PAPER'], + final_target=_RESOURCES['running_with_scissors_in_the_matrix__repeated']['ROCK'], + initial_margin=1, + final_margin=5, + ), + ), + running_with_scissors_in_the_matrix__repeated__flip_p2r_1=puppet( + substrate='running_with_scissors_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.ScheduledFlip, + threshold=2, + initial_target=_RESOURCES['running_with_scissors_in_the_matrix__repeated']['PAPER'], + final_target=_RESOURCES['running_with_scissors_in_the_matrix__repeated']['ROCK'], + initial_margin=5, + final_margin=5, + ), + ), + running_with_scissors_in_the_matrix__repeated__flip_r2s_0=puppet( + substrate='running_with_scissors_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.ScheduledFlip, + threshold=3, + initial_target=_RESOURCES['running_with_scissors_in_the_matrix__repeated']['ROCK'], + final_target=_RESOURCES['running_with_scissors_in_the_matrix__repeated']['SCISSORS'], + initial_margin=1, + final_margin=5, + ), + ), + running_with_scissors_in_the_matrix__repeated__flip_r2s_1=puppet( + substrate='running_with_scissors_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.ScheduledFlip, + threshold=2, + initial_target=_RESOURCES['running_with_scissors_in_the_matrix__repeated']['ROCK'], + final_target=_RESOURCES['running_with_scissors_in_the_matrix__repeated']['SCISSORS'], + initial_margin=5, + final_margin=5, + ), + ), + running_with_scissors_in_the_matrix__repeated__flip_s2p_0=puppet( + substrate='running_with_scissors_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.ScheduledFlip, + threshold=3, + initial_target=_RESOURCES['running_with_scissors_in_the_matrix__repeated']['SCISSORS'], + final_target=_RESOURCES['running_with_scissors_in_the_matrix__repeated']['PAPER'], + initial_margin=1, + final_margin=5, + ), + ), + running_with_scissors_in_the_matrix__repeated__flip_s2p_1=puppet( + substrate='running_with_scissors_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.ScheduledFlip, + threshold=2, + initial_target=_RESOURCES['running_with_scissors_in_the_matrix__repeated']['SCISSORS'], + final_target=_RESOURCES['running_with_scissors_in_the_matrix__repeated']['PAPER'], + initial_margin=5, + final_margin=5, + ), + ), + running_with_scissors_in_the_matrix__repeated__free_0=saved_model( + substrate='running_with_scissors_in_the_matrix__repeated', + model='free_0', + ), + running_with_scissors_in_the_matrix__repeated__paper_0=puppet( + substrate='running_with_scissors_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.Specialist, + target=_RESOURCES['running_with_scissors_in_the_matrix__repeated']['PAPER'], + margin=1, + ), + ), + running_with_scissors_in_the_matrix__repeated__paper_margin_0=puppet( + substrate='running_with_scissors_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.Specialist, + target=_RESOURCES['running_with_scissors_in_the_matrix__repeated']['PAPER'], + margin=5, + ), + ), + running_with_scissors_in_the_matrix__repeated__resp2prev_margin_0=puppet( + substrate='running_with_scissors_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + running_with_scissors_in_the_matrix.CounterPrevious, + rock_resource=_RESOURCES['running_with_scissors_in_the_matrix__repeated']['ROCK'], + paper_resource=_RESOURCES['running_with_scissors_in_the_matrix__repeated']['PAPER'], + scissors_resource=_RESOURCES['running_with_scissors_in_the_matrix__repeated']['SCISSORS'], + margin=5, + ), + ), + running_with_scissors_in_the_matrix__repeated__rock_0=puppet( + substrate='running_with_scissors_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.Specialist, + target=_RESOURCES['running_with_scissors_in_the_matrix__repeated']['ROCK'], + margin=1, + ), + ), + running_with_scissors_in_the_matrix__repeated__rock_margin_0=puppet( + substrate='running_with_scissors_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.Specialist, + target=_RESOURCES['running_with_scissors_in_the_matrix__repeated']['ROCK'], + margin=5, + ), + ), + running_with_scissors_in_the_matrix__repeated__scissors_0=puppet( + substrate='running_with_scissors_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.Specialist, + target=_RESOURCES['running_with_scissors_in_the_matrix__repeated']['SCISSORS'], + margin=1, + ), + ), + running_with_scissors_in_the_matrix__repeated__scissors_margin_0=puppet( + substrate='running_with_scissors_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.Specialist, + target=_RESOURCES['running_with_scissors_in_the_matrix__repeated']['SCISSORS'], + margin=5, + ), + ), + stag_hunt_in_the_matrix__arena__puppet_grim_one_strike_0=puppet( + substrate='stag_hunt_in_the_matrix__arena', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.GrimTrigger, + cooperate_resource=_RESOURCES['stag_hunt_in_the_matrix__arena']['STAG'], + defect_resource=_RESOURCES['stag_hunt_in_the_matrix__arena']['HARE'], + threshold=1, + margin=1, + ), + ), + stag_hunt_in_the_matrix__arena__puppet_grim_one_strike_margin_0=puppet( + substrate='stag_hunt_in_the_matrix__arena', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.GrimTrigger, + cooperate_resource=_RESOURCES['stag_hunt_in_the_matrix__arena']['STAG'], + defect_resource=_RESOURCES['stag_hunt_in_the_matrix__arena']['HARE'], + threshold=1, + margin=5, + ), + ), + stag_hunt_in_the_matrix__arena__puppet_grim_three_strikes_0=puppet( + substrate='stag_hunt_in_the_matrix__arena', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.GrimTrigger, + cooperate_resource=_RESOURCES['stag_hunt_in_the_matrix__arena']['STAG'], + defect_resource=_RESOURCES['stag_hunt_in_the_matrix__arena']['HARE'], + threshold=3, + margin=1, + ), + ), + stag_hunt_in_the_matrix__arena__puppet_grim_three_strikes_margin_0=puppet( + substrate='stag_hunt_in_the_matrix__arena', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.GrimTrigger, + cooperate_resource=_RESOURCES['stag_hunt_in_the_matrix__arena']['STAG'], + defect_resource=_RESOURCES['stag_hunt_in_the_matrix__arena']['HARE'], + threshold=3, + margin=5, + ), + ), + stag_hunt_in_the_matrix__arena__puppet_grim_two_strikes_0=puppet( + substrate='stag_hunt_in_the_matrix__arena', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.GrimTrigger, + cooperate_resource=_RESOURCES['stag_hunt_in_the_matrix__arena']['STAG'], + defect_resource=_RESOURCES['stag_hunt_in_the_matrix__arena']['HARE'], + threshold=2, + margin=1, + ), + ), + stag_hunt_in_the_matrix__arena__puppet_grim_two_strikes_margin_0=puppet( + substrate='stag_hunt_in_the_matrix__arena', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.GrimTrigger, + cooperate_resource=_RESOURCES['stag_hunt_in_the_matrix__arena']['STAG'], + defect_resource=_RESOURCES['stag_hunt_in_the_matrix__arena']['HARE'], + threshold=2, + margin=5, + ), + ), + stag_hunt_in_the_matrix__arena__puppet_hare_0=puppet( + substrate='stag_hunt_in_the_matrix__arena', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.Specialist, + target=_RESOURCES['stag_hunt_in_the_matrix__arena']['HARE'], + margin=1, + ), + ), + stag_hunt_in_the_matrix__arena__puppet_hare_margin_0=puppet( + substrate='stag_hunt_in_the_matrix__arena', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.Specialist, + target=_RESOURCES['stag_hunt_in_the_matrix__arena']['HARE'], + margin=5, + ), + ), + stag_hunt_in_the_matrix__arena__puppet_stag_0=puppet( + substrate='stag_hunt_in_the_matrix__arena', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.Specialist, + target=_RESOURCES['stag_hunt_in_the_matrix__arena']['STAG'], + margin=1, + ), + ), + stag_hunt_in_the_matrix__arena__puppet_stag_margin_0=puppet( + substrate='stag_hunt_in_the_matrix__arena', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.Specialist, + target=_RESOURCES['stag_hunt_in_the_matrix__arena']['STAG'], + margin=5, + ), + ), + stag_hunt_in_the_matrix__repeated__puppet_corrigible_0=puppet( + substrate='stag_hunt_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.Corrigible, + cooperate_resource=_RESOURCES['stag_hunt_in_the_matrix__repeated']['STAG'], + defect_resource=_RESOURCES['stag_hunt_in_the_matrix__repeated']['HARE'], + threshold=3, + margin=5, + tremble_probability=0, + ), + ), + stag_hunt_in_the_matrix__repeated__puppet_corrigible_tremble_0=puppet( + substrate='stag_hunt_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.Corrigible, + cooperate_resource=_RESOURCES['stag_hunt_in_the_matrix__repeated']['STAG'], + defect_resource=_RESOURCES['stag_hunt_in_the_matrix__repeated']['HARE'], + threshold=3, + margin=5, + tremble_probability=0.15, + ), + ), + stag_hunt_in_the_matrix__repeated__puppet_flip_0=puppet( + substrate='stag_hunt_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.ScheduledFlip, + initial_target=_RESOURCES['stag_hunt_in_the_matrix__repeated']['STAG'], + final_target=_RESOURCES['stag_hunt_in_the_matrix__repeated']['HARE'], + threshold=3, + initial_margin=1, + final_margin=5, + ), + ), + stag_hunt_in_the_matrix__repeated__puppet_grim_one_strike_margin_0=puppet( + substrate='stag_hunt_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.GrimTrigger, + cooperate_resource=_RESOURCES['stag_hunt_in_the_matrix__repeated']['STAG'], + defect_resource=_RESOURCES['stag_hunt_in_the_matrix__repeated']['HARE'], + threshold=1, + margin=5, + ), + ), + stag_hunt_in_the_matrix__repeated__puppet_grim_one_strike_margin_1=puppet( + substrate='stag_hunt_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.GrimTrigger, + cooperate_resource=_RESOURCES['stag_hunt_in_the_matrix__repeated']['STAG'], + defect_resource=_RESOURCES['stag_hunt_in_the_matrix__repeated']['HARE'], + threshold=1, + margin=7, + ), + ), + stag_hunt_in_the_matrix__repeated__puppet_grim_two_strikes_margin_0=puppet( + substrate='stag_hunt_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.GrimTrigger, + cooperate_resource=_RESOURCES['stag_hunt_in_the_matrix__repeated']['STAG'], + defect_resource=_RESOURCES['stag_hunt_in_the_matrix__repeated']['HARE'], + threshold=2, + margin=5, + ), + ), + stag_hunt_in_the_matrix__repeated__puppet_grim_two_strikes_margin_1=puppet( + substrate='stag_hunt_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.GrimTrigger, + cooperate_resource=_RESOURCES['stag_hunt_in_the_matrix__repeated']['STAG'], + defect_resource=_RESOURCES['stag_hunt_in_the_matrix__repeated']['HARE'], + threshold=2, + margin=7, + ), + ), + stag_hunt_in_the_matrix__repeated__puppet_hare_margin_0=puppet( + substrate='stag_hunt_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.Specialist, + target=_RESOURCES['stag_hunt_in_the_matrix__repeated']['HARE'], + margin=5, + ), + ), + stag_hunt_in_the_matrix__repeated__puppet_hare_margin_1=puppet( + substrate='stag_hunt_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.Specialist, + target=_RESOURCES['stag_hunt_in_the_matrix__repeated']['HARE'], + margin=7, + ), + ), + stag_hunt_in_the_matrix__repeated__puppet_stag_margin_0=puppet( + substrate='stag_hunt_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.Specialist, + target=_RESOURCES['stag_hunt_in_the_matrix__repeated']['STAG'], + margin=5, + ), + ), + stag_hunt_in_the_matrix__repeated__puppet_stag_margin_1=puppet( + substrate='stag_hunt_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.Specialist, + target=_RESOURCES['stag_hunt_in_the_matrix__repeated']['STAG'], + margin=7, + ), + ), + stag_hunt_in_the_matrix__repeated__puppet_tft_margin_0=puppet( + substrate='stag_hunt_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.TitForTat, + cooperate_resource=_RESOURCES['stag_hunt_in_the_matrix__repeated']['STAG'], + defect_resource=_RESOURCES['stag_hunt_in_the_matrix__repeated']['HARE'], + tremble_probability=0, + margin=5, + ), + ), + stag_hunt_in_the_matrix__repeated__puppet_tft_margin_1=puppet( + substrate='stag_hunt_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.TitForTat, + cooperate_resource=_RESOURCES['stag_hunt_in_the_matrix__repeated']['STAG'], + defect_resource=_RESOURCES['stag_hunt_in_the_matrix__repeated']['HARE'], + tremble_probability=0, + margin=7, + ), + ), + stag_hunt_in_the_matrix__repeated__puppet_tft_tremble_margin_0=puppet( + substrate='stag_hunt_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.TitForTat, + cooperate_resource=_RESOURCES['stag_hunt_in_the_matrix__repeated']['STAG'], + defect_resource=_RESOURCES['stag_hunt_in_the_matrix__repeated']['HARE'], + tremble_probability=0.15, + margin=5, + ), + ), + stag_hunt_in_the_matrix__repeated__puppet_tft_tremble_margin_1=puppet( + substrate='stag_hunt_in_the_matrix__repeated', + model='puppet_1', + puppeteer_builder=functools.partial( + in_the_matrix.TitForTat, + cooperate_resource=_RESOURCES['stag_hunt_in_the_matrix__repeated']['STAG'], + defect_resource=_RESOURCES['stag_hunt_in_the_matrix__repeated']['HARE'], + tremble_probability=0.15, + margin=7, + ), + ), + territory__inside_out__aggressor_0=saved_model( + substrate='territory__inside_out', + model='aggressor_0', + ), + territory__inside_out__aggressor_1=saved_model( + substrate='territory__inside_out', + model='aggressor_1', + ), + territory__inside_out__aggressor_2=saved_model( + substrate='territory__inside_out', + model='aggressor_2', + ), + territory__inside_out__aggressor_3=saved_model( + substrate='territory__inside_out', + model='aggressor_3', + ), + territory__inside_out__aggressor_with_extra_training_0=saved_model( + substrate='territory__inside_out', + model='aggressor_with_extra_training_0', + ), + territory__inside_out__somewhat_tolerant_bot_0=saved_model( + substrate='territory__inside_out', + model='somewhat_tolerant_bot_0', + ), + territory__inside_out__somewhat_tolerant_bot_1=saved_model( + substrate='territory__inside_out', + model='somewhat_tolerant_bot_1', + ), + territory__open__aggressor_0=saved_model( + substrate='territory__open', + model='aggressor_0', + ), + territory__open__aggressor_1=saved_model( + substrate='territory__open', + model='aggressor_1', + ), + territory__open__aggressor_2=saved_model( + substrate='territory__open', + model='aggressor_2', + ), + territory__open__aggressor_3=saved_model( + substrate='territory__open', + model='aggressor_3', + ), + territory__open__aggressor_with_extra_training_0=saved_model( + substrate='territory__open', + model='aggressor_with_extra_training_0', + ), + territory__rooms__aggressor_0=saved_model( + substrate='territory__rooms', + model='aggressor_0', + ), + territory__rooms__aggressor_1=saved_model( + substrate='territory__rooms', + model='aggressor_1', + ), + territory__rooms__aggressor_2=saved_model( + substrate='territory__rooms', + model='aggressor_2', + ), + territory__rooms__aggressor_3=saved_model( + substrate='territory__rooms', + model='aggressor_3', + ), + territory__rooms__aggressor_with_extra_training_0=saved_model( + substrate='territory__rooms', + model='aggressor_with_extra_training_0', ), # keep-sorted end ) diff --git a/meltingpot/python/configs/bots/bot_configs_test.py b/meltingpot/python/configs/bots/bot_configs_test.py index 231e1fda..27a8696f 100644 --- a/meltingpot/python/configs/bots/bot_configs_test.py +++ b/meltingpot/python/configs/bots/bot_configs_test.py @@ -1,4 +1,4 @@ -# Copyright 2020 DeepMind Technologies Limited. +# Copyright 2022 DeepMind Technologies Limited. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -64,7 +64,7 @@ def test_no_duplicates(self): duplicates = {names for _, names in seen.items() if len(names) > 1} self.assertEmpty(duplicates, f'Duplicate configs found: {duplicates!r}.') - def test_all_models_used_by_bots(self): + def test_models_used_by_bots(self): used = {bot.model_path for bot in BOT_CONFIGS.values()} unused = AVAILABLE_MODELS - used self.assertEmpty(unused, f'Models not used by any bot: {unused!r}') diff --git a/meltingpot/python/configs/scenarios/__init__.py b/meltingpot/python/configs/scenarios/__init__.py index c3f86ce6..04be6e03 100644 --- a/meltingpot/python/configs/scenarios/__init__.py +++ b/meltingpot/python/configs/scenarios/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2020 DeepMind Technologies Limited. +# Copyright 2022 DeepMind Technologies Limited. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,10 +11,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Test scenario configs.""" +"""Test scenario configurations.""" +import collections import dataclasses -from typing import AbstractSet, Mapping, Sequence +from typing import AbstractSet, Collection, Mapping, Optional, Sequence import immutabledict @@ -50,1220 +51,4211 @@ def __post_init__(self): object.__setattr__(self, 'bots_by_role', bots_by_role) -def _homogeneous_scenario(is_focal: Sequence[bool], bots: AbstractSet[str], - **kwargs) -> ScenarioConfig: - """Returns a ScenarioConfig for a scenario with homogeneous roles.""" - return ScenarioConfig( - roles=('default',) * len(is_focal), - is_focal=is_focal, - bots_by_role={'default': bots}, - **kwargs) - - +# Local additions/overrides. SCENARIO_CONFIGS: Mapping[str, ScenarioConfig] = immutabledict.immutabledict( # keep-sorted start numeric=yes block=yes - allelopathic_harvest_0=_homogeneous_scenario( - description='focals are resident and a visitor prefers green', - tags=frozenset({ - 'resident', - }), - substrate='allelopathic_harvest', - is_focal=(True,) * 15 + (False,) * 1, - bots=frozenset({ - 'ah3gs_bot_finding_berry_two_the_most_tasty_0', - 'ah3gs_bot_finding_berry_two_the_most_tasty_1', - 'ah3gs_bot_finding_berry_two_the_most_tasty_4', - 'ah3gs_bot_finding_berry_two_the_most_tasty_5', - }), - ), - allelopathic_harvest_1=_homogeneous_scenario( - description='visiting a green preferring population', - tags=frozenset({ - 'convention_following', + allelopathic_harvest__open_0=ScenarioConfig( + description=( + 'visiting a population where planting green berries is the ' + + 'prevailing convention'), + tags={ 'visitor', - }), - substrate='allelopathic_harvest', + 'convention_following', + }, + substrate='allelopathic_harvest__open', + roles=['player_who_likes_red',] * 8 + ['player_who_likes_green',] * 8, is_focal=(True,) * 4 + (False,) * 12, - bots=frozenset({ - 'ah3gs_bot_finding_berry_two_the_most_tasty_0', - 'ah3gs_bot_finding_berry_two_the_most_tasty_1', - 'ah3gs_bot_finding_berry_two_the_most_tasty_4', - 'ah3gs_bot_finding_berry_two_the_most_tasty_5', - }), - ), - arena_running_with_scissors_in_the_matrix_0=_homogeneous_scenario( - description='versus gullible bots', - tags=frozenset({ - 'deception', - 'half_and_half', - 'versus_free', - }), - substrate='arena_running_with_scissors_in_the_matrix', - is_focal=(True,) * 4 + (False,) * 4, - bots=frozenset({ - 'arena_rws_free_0', - 'arena_rws_free_1', - 'arena_rws_free_2', - }), + bots_by_role={ + # The same bots can play both roles. + 'player_who_likes_red': { + 'allelopathic_harvest__open__bot_that_supports_green_0', + 'allelopathic_harvest__open__bot_that_supports_green_1', + 'allelopathic_harvest__open__bot_that_supports_green_2', + 'allelopathic_harvest__open__bot_that_supports_green_3', + }, + 'player_who_likes_green': { + 'allelopathic_harvest__open__bot_that_supports_green_0', + 'allelopathic_harvest__open__bot_that_supports_green_1', + 'allelopathic_harvest__open__bot_that_supports_green_2', + 'allelopathic_harvest__open__bot_that_supports_green_3', + }, + }, ), - arena_running_with_scissors_in_the_matrix_1=_homogeneous_scenario( - description='versus mixture of pure bots', - tags=frozenset({ - 'half_and_half', - 'versus_pure_all', - }), - substrate='arena_running_with_scissors_in_the_matrix', - is_focal=(True,) * 4 + (False,) * 4, - bots=frozenset({ - 'arena_rws_pure_paper_0', - 'arena_rws_pure_paper_1', - 'arena_rws_pure_paper_2', - 'arena_rws_pure_paper_3', - 'arena_rws_pure_rock_0', - 'arena_rws_pure_rock_1', - 'arena_rws_pure_rock_2', - 'arena_rws_pure_rock_3', - 'arena_rws_pure_scissors_0', - 'arena_rws_pure_scissors_1', - 'arena_rws_pure_scissors_2', - 'arena_rws_pure_scissors_3', - }), - ), - arena_running_with_scissors_in_the_matrix_2=_homogeneous_scenario( - description='versus pure rock bots', - tags=frozenset({ - 'half_and_half', - 'versus_pure_rock', - }), - substrate='arena_running_with_scissors_in_the_matrix', - is_focal=(True,) * 4 + (False,) * 4, - bots=frozenset({ - 'arena_rws_pure_rock_0', - 'arena_rws_pure_rock_1', - 'arena_rws_pure_rock_2', - 'arena_rws_pure_rock_3', - }), + allelopathic_harvest__open_1=ScenarioConfig( + description=( + 'visiting a population where planting red berries is the ' + + 'prevailing convention'), + tags={ + 'visitor', + 'convention_following', + }, + substrate='allelopathic_harvest__open', + roles=['player_who_likes_red',] * 8 + ['player_who_likes_green',] * 8, + is_focal=(True,) * 4 + (False,) * 12, + bots_by_role={ + # The same bots can play both roles. + 'player_who_likes_red': { + 'allelopathic_harvest__open__bot_that_supports_red_0', + 'allelopathic_harvest__open__bot_that_supports_red_1', + 'allelopathic_harvest__open__bot_that_supports_red_2', + 'allelopathic_harvest__open__bot_that_supports_red_3', + }, + 'player_who_likes_green': { + 'allelopathic_harvest__open__bot_that_supports_red_0', + 'allelopathic_harvest__open__bot_that_supports_red_1', + 'allelopathic_harvest__open__bot_that_supports_red_2', + 'allelopathic_harvest__open__bot_that_supports_red_3', + }, + }, ), - arena_running_with_scissors_in_the_matrix_3=_homogeneous_scenario( - description='versus pure paper bots', - tags=frozenset({ - 'half_and_half', - 'versus_pure_paper', - }), - substrate='arena_running_with_scissors_in_the_matrix', - is_focal=(True,) * 4 + (False,) * 4, - bots=frozenset({ - 'arena_rws_pure_paper_0', - 'arena_rws_pure_paper_1', - 'arena_rws_pure_paper_2', - 'arena_rws_pure_paper_3', - }), + allelopathic_harvest__open_2=ScenarioConfig( + description=( + 'focals are resident and visited by bots who plant either red or ' + + 'green'), + tags={ + 'resident', + }, + substrate='allelopathic_harvest__open', + roles=['player_who_likes_red',] * 8 + ['player_who_likes_green',] * 8, + is_focal=(True,) * 14 + (False,) * 2, + bots_by_role={ + 'player_who_likes_green': { + 'allelopathic_harvest__open__bot_that_supports_red_0', + 'allelopathic_harvest__open__bot_that_supports_red_1', + 'allelopathic_harvest__open__bot_that_supports_red_2', + 'allelopathic_harvest__open__bot_that_supports_red_3', + 'allelopathic_harvest__open__bot_that_supports_green_0', + 'allelopathic_harvest__open__bot_that_supports_green_1', + 'allelopathic_harvest__open__bot_that_supports_green_2', + 'allelopathic_harvest__open__bot_that_supports_green_3', + }, + }, ), - arena_running_with_scissors_in_the_matrix_4=_homogeneous_scenario( - description='versus pure scissors bots', - tags=frozenset({ - 'half_and_half', - 'versus_pure_scissors', - }), - substrate='arena_running_with_scissors_in_the_matrix', - is_focal=(True,) * 4 + (False,) * 4, - bots=frozenset({ - 'arena_rws_pure_scissors_0', - 'arena_rws_pure_scissors_1', - 'arena_rws_pure_scissors_2', - 'arena_rws_pure_scissors_3', - }), - ), - bach_or_stravinsky_in_the_matrix_0=_homogeneous_scenario( - description='visiting pure bach fans', - tags=frozenset({ + bach_or_stravinsky_in_the_matrix__arena_0=ScenarioConfig( + description='visiting background population who picks bach', + tags={ 'convention_following', 'versus_pure_bach', 'visitor', - }), - substrate='bach_or_stravinsky_in_the_matrix', + }, + substrate='bach_or_stravinsky_in_the_matrix__arena', + roles=('bach_fan',) * 4 + ('stravinsky_fan',) * 4, is_focal=(True,) * 1 + (False,) * 7, - bots=frozenset({ - 'bach_fan_0', - 'bach_fan_1', - 'bach_fan_2', - }), - ), - bach_or_stravinsky_in_the_matrix_1=_homogeneous_scenario( - description='visiting pure stravinsky fans', - tags=frozenset({ + bots_by_role=immutabledict.immutabledict( + bach_fan=( + 'bach_or_stravinsky_in_the_matrix__arena__bach_picker_0', + ), + stravinsky_fan=( + 'bach_or_stravinsky_in_the_matrix__arena__bach_picker_0', + ), + ), + ), + bach_or_stravinsky_in_the_matrix__arena_1=ScenarioConfig( + description='visiting background population who picks stravinsky', + tags={ 'convention_following', 'versus_pure_stravinsky', 'visitor', - }), - substrate='bach_or_stravinsky_in_the_matrix', + }, + substrate='bach_or_stravinsky_in_the_matrix__arena', + roles=('bach_fan',) * 4 + ('stravinsky_fan',) * 4, is_focal=(True,) * 1 + (False,) * 7, - bots=frozenset({ - 'stravinsky_fan_0', - 'stravinsky_fan_1', - 'stravinsky_fan_2', - }), + bots_by_role=immutabledict.immutabledict( + bach_fan=( + 'bach_or_stravinsky_in_the_matrix__arena__stravinsky_picker_0', + ), + stravinsky_fan=( + 'bach_or_stravinsky_in_the_matrix__arena__stravinsky_picker_0', + ), + ), ), - capture_the_flag_0=_homogeneous_scenario( - description='focal team versus shaped a3c bot team', - tags=frozenset({ - 'half_and_half', - 'learned_teamwork', - }), - substrate='capture_the_flag', - is_focal=(True, False) * 4, - bots=frozenset({ - 'ctf_pseudorewards_for_main_game_events_a3c_2', - 'ctf_pseudorewards_for_main_game_events_a3c_6', - }), + bach_or_stravinsky_in_the_matrix__arena_2=ScenarioConfig( + description='visited by a pure bot', + tags={ + 'resident', + 'versus_pure_all' + }, + substrate='bach_or_stravinsky_in_the_matrix__arena', + roles=('bach_fan',) * 4 + ('stravinsky_fan',) * 4, + is_focal=(True,) * 7 + (False,) * 1, + bots_by_role=immutabledict.immutabledict( + stravinsky_fan=( + 'bach_or_stravinsky_in_the_matrix__arena__bach_picker_0', + 'bach_or_stravinsky_in_the_matrix__arena__stravinsky_picker_0', + ), + ), ), - capture_the_flag_1=_homogeneous_scenario( - description='focal team versus shaped vmpo bot team', - tags=frozenset({ - 'half_and_half', - 'learned_teamwork', - }), - substrate='capture_the_flag', - is_focal=( - True, - False, - ) * 4, - bots=frozenset({ - 'ctf_pseudorewards_for_main_game_events_vmpo_0', - 'ctf_pseudorewards_for_main_game_events_vmpo_3', - 'ctf_pseudorewards_for_main_game_events_vmpo_4', - 'ctf_pseudorewards_for_main_game_events_vmpo_6', - 'ctf_pseudorewards_for_main_game_events_vmpo_7', - }), - ), - capture_the_flag_2=_homogeneous_scenario( - description='ad hoc teamwork with shaped a3c bots', - tags=frozenset({ - 'ad_hoc_teamwork', + bach_or_stravinsky_in_the_matrix__arena_3=ScenarioConfig( + description='visited by three pure bach pickers', + tags={ + 'resident', + 'versus_pure_bach' + }, + substrate='bach_or_stravinsky_in_the_matrix__arena', + roles=('bach_fan',) * 4 + ('stravinsky_fan',) * 4, + is_focal=(True,) * 5 + (False,) * 3, + bots_by_role=immutabledict.immutabledict( + stravinsky_fan=( + 'bach_or_stravinsky_in_the_matrix__arena__bach_picker_0', + ), + ), + ), + bach_or_stravinsky_in_the_matrix__arena_4=ScenarioConfig( + description='visited by three pure stravinsky pickers', + tags={ + 'resident', + 'versus_pure_stravinsky' + }, + substrate='bach_or_stravinsky_in_the_matrix__arena', + roles=('bach_fan',) * 4 + ('stravinsky_fan',) * 4, + is_focal=(True,) * 5 + (False,) * 3, + bots_by_role=immutabledict.immutabledict( + stravinsky_fan=( + 'bach_or_stravinsky_in_the_matrix__arena__stravinsky_picker_0', + ), + ), + ), + bach_or_stravinsky_in_the_matrix__arena_5=ScenarioConfig( + description=('visiting background population who alternates, ' + + 'starting from stravinsky, repeating each twice'), + tags={ 'visitor', - }), - substrate='capture_the_flag', + 'turn_taking', + 'convention_following', + }, + substrate='bach_or_stravinsky_in_the_matrix__arena', + roles=('bach_fan',) * 4 + ('stravinsky_fan',) * 4, is_focal=(True,) * 1 + (False,) * 7, - bots=frozenset({ - 'ctf_pseudorewards_for_main_game_events_a3c_2', - 'ctf_pseudorewards_for_main_game_events_a3c_6', - }), - ), - capture_the_flag_3=_homogeneous_scenario( - description='ad hoc teamwork with shaped vmpo bots', - tags=frozenset({ - 'ad_hoc_teamwork', + bots_by_role=immutabledict.immutabledict( + bach_fan=( + 'bach_or_stravinsky_in_the_matrix__arena__turn_taking_initial_stravinsky_0', + ), + stravinsky_fan=( + 'bach_or_stravinsky_in_the_matrix__arena__turn_taking_initial_stravinsky_0', + ), + ), + ), + bach_or_stravinsky_in_the_matrix__arena_6=ScenarioConfig( + description=('visiting background population who alternates, ' + + 'starting from bach, repeating each twice'), + tags={ 'visitor', - }), - substrate='capture_the_flag', + 'turn_taking', + 'convention_following', + }, + substrate='bach_or_stravinsky_in_the_matrix__arena', + roles=('bach_fan',) * 4 + ('stravinsky_fan',) * 4, is_focal=(True,) * 1 + (False,) * 7, - bots=frozenset({ - 'ctf_pseudorewards_for_main_game_events_vmpo_0', - 'ctf_pseudorewards_for_main_game_events_vmpo_3', - 'ctf_pseudorewards_for_main_game_events_vmpo_4', - 'ctf_pseudorewards_for_main_game_events_vmpo_6', - 'ctf_pseudorewards_for_main_game_events_vmpo_7', - }), - ), - chemistry_branched_chain_reaction_0=_homogeneous_scenario( - description='focals meet X preferring bots', - tags=frozenset({ - 'half_and_half', - }), - substrate='chemistry_branched_chain_reaction', + bots_by_role=immutabledict.immutabledict( + bach_fan=( + 'bach_or_stravinsky_in_the_matrix__arena__turn_taking_initial_bach_0', + ), + stravinsky_fan=( + 'bach_or_stravinsky_in_the_matrix__arena__turn_taking_initial_bach_0', + ), + ), + ), + bach_or_stravinsky_in_the_matrix__repeated_0=ScenarioConfig( + description='meeting a stubborn bach picker', + tags={ + 'convention_following', + 'versus_pure_bach', + 'half_and_half', + }, + substrate='bach_or_stravinsky_in_the_matrix__repeated', + roles=('stravinsky_fan',) + ('bach_fan',), + is_focal=(True,) + (False,), + bots_by_role=immutabledict.immutabledict( + bach_fan=( + 'bach_or_stravinsky_in_the_matrix__repeated__bach_picker_0', + ), + ), + ), + bach_or_stravinsky_in_the_matrix__repeated_1=ScenarioConfig( + description='meeting a bot who plays bach despite not being a fan', + tags={ + 'convention_following', + 'versus_pure_bach', + 'half_and_half', + }, + substrate='bach_or_stravinsky_in_the_matrix__repeated', + roles=('bach_fan',) + ('stravinsky_fan',), + is_focal=(True,) + (False,), + bots_by_role=immutabledict.immutabledict( + stravinsky_fan=( + 'bach_or_stravinsky_in_the_matrix__repeated__bach_picker_0', + ), + ), + ), + bach_or_stravinsky_in_the_matrix__repeated_2=ScenarioConfig( + description=('meeting a bot who plays stravinsky despite not being a ' + + 'fan'), + tags={ + 'convention_following', + 'versus_pure_stravinsky', + 'half_and_half', + }, + substrate='bach_or_stravinsky_in_the_matrix__repeated', + roles=('stravinsky_fan',) + ('bach_fan',), + is_focal=(True,) + (False,), + bots_by_role=immutabledict.immutabledict( + bach_fan=( + 'bach_or_stravinsky_in_the_matrix__repeated__stravinsky_picker_0', + ), + ), + ), + bach_or_stravinsky_in_the_matrix__repeated_3=ScenarioConfig( + description='meeting a stubborn stravinsky picker', + tags={ + 'convention_following', + 'versus_pure_stravinsky', + 'half_and_half', + }, + substrate='bach_or_stravinsky_in_the_matrix__repeated', + roles=('bach_fan',) + ('stravinsky_fan',), + is_focal=(True,) + (False,), + bots_by_role=immutabledict.immutabledict( + stravinsky_fan=( + 'bach_or_stravinsky_in_the_matrix__repeated__stravinsky_picker_0', + ), + ), + ), + bach_or_stravinsky_in_the_matrix__repeated_4=ScenarioConfig( + description='bach fan focal agent meets an imperfectly copying partner', + tags={ + 'versus_tft', + 'half_and_half', + }, + substrate='bach_or_stravinsky_in_the_matrix__repeated', + roles=('bach_fan',) + ('stravinsky_fan',), + is_focal=(True,) + (False,), + bots_by_role=immutabledict.immutabledict( + stravinsky_fan=( + 'bach_or_stravinsky_in_the_matrix__repeated__bach_tft_0', + 'bach_or_stravinsky_in_the_matrix__repeated__bach_tft_tremble_0', + 'bach_or_stravinsky_in_the_matrix__repeated__stravinsky_tft_0', + 'bach_or_stravinsky_in_the_matrix__repeated__stravinsky_tft_tremble_0', + ), + ), + ), + bach_or_stravinsky_in_the_matrix__repeated_5=ScenarioConfig( + description=('stravinsky fan focal agent meets an imperfectly ' + + 'copying partner'), + tags={ + 'versus_tft', + 'half_and_half', + }, + substrate='bach_or_stravinsky_in_the_matrix__repeated', + roles=('stravinsky_fan',) + ('bach_fan',), + is_focal=(True,) + (False,), + bots_by_role=immutabledict.immutabledict( + bach_fan=( + 'bach_or_stravinsky_in_the_matrix__repeated__bach_tft_0', + 'bach_or_stravinsky_in_the_matrix__repeated__bach_tft_tremble_0', + 'bach_or_stravinsky_in_the_matrix__repeated__stravinsky_tft_0', + 'bach_or_stravinsky_in_the_matrix__repeated__stravinsky_tft_tremble_0', + ), + ), + ), + bach_or_stravinsky_in_the_matrix__repeated_6=ScenarioConfig( + description=('bach fan focal agent meets a turn-taking partner'), + tags={ + 'turn_taking', + 'half_and_half', + }, + substrate='bach_or_stravinsky_in_the_matrix__repeated', + roles=('bach_fan',) + ('stravinsky_fan',), + is_focal=(True,) + (False,), + bots_by_role=immutabledict.immutabledict( + stravinsky_fan=( + 'bach_or_stravinsky_in_the_matrix__repeated__turn_taking_initial_bach_0', + 'bach_or_stravinsky_in_the_matrix__repeated__turn_taking_initial_stravinsky_0', + ), + ), + ), + bach_or_stravinsky_in_the_matrix__repeated_7=ScenarioConfig( + description=('bach fan focal agent meets a turn-taking partner who ' + + 'repeats each goal/resource three times before switching'), + tags={ + 'turn_taking', + 'half_and_half', + }, + substrate='bach_or_stravinsky_in_the_matrix__repeated', + roles=('bach_fan',) + ('stravinsky_fan',), + is_focal=(True,) + (False,), + bots_by_role=immutabledict.immutabledict( + stravinsky_fan=( + 'bach_or_stravinsky_in_the_matrix__repeated__turn_taking_initial_bach_1', + 'bach_or_stravinsky_in_the_matrix__repeated__turn_taking_initial_stravinsky_1', + ), + ), + ), + boat_race__eight_races_0=ScenarioConfig( + description='visiting cooperators', + tags={ + 'visitor', + }, + substrate='boat_race__eight_races', + roles=('default',) * 6, + is_focal=(True,) * 1 + (False,) * 5, + bots_by_role=immutabledict.immutabledict( + default=('boat_race__eight_races__cooperator_0',), + ), + ), + boat_race__eight_races_1=ScenarioConfig( + description='visiting defectors', + tags={ + 'visitor', + }, + substrate='boat_race__eight_races', + roles=('default',) * 6, + is_focal=(True,) * 1 + (False,) * 5, + bots_by_role=immutabledict.immutabledict( + default=('boat_race__eight_races__defector_0',), + ), + ), + boat_race__eight_races_2=ScenarioConfig( + description='visited by a population of cooperators', + tags={ + 'resident', + }, + substrate='boat_race__eight_races', + roles=('default',) * 6, + is_focal=(True,) * 5 + (False,) * 1, + bots_by_role=immutabledict.immutabledict( + default=('boat_race__eight_races__cooperator_0',), + ), + ), + boat_race__eight_races_3=ScenarioConfig( + description='visited by a population of defectors', + tags={ + 'resident', + }, + substrate='boat_race__eight_races', + roles=('default',) * 6, + is_focal=(True,) * 5 + (False,) * 1, + bots_by_role=immutabledict.immutabledict( + default=('boat_race__eight_races__defector_0',), + ), + ), + boat_race__eight_races_4=ScenarioConfig( + description='find the cooperator partner', + tags={ + 'partner_choice', + }, + substrate='boat_race__eight_races', + roles=('default',) * 5 + ('target',), + is_focal=(True,) * 1 + (False,) * 5, + bots_by_role=immutabledict.immutabledict( + default=('boat_race__eight_races__defector_0',), + target=('boat_race__eight_races__cooperator_0',), + ), + ), + chemistry__three_metabolic_cycles_0=ScenarioConfig( + description=('resident focal population meets a small mixture of ' + + 'background bots'), + tags={ + 'resident', + }, + substrate='chemistry__three_metabolic_cycles', + roles=('default',) * 8, + is_focal=(True,) * 5 + (False,) * 3, + bots_by_role={ + 'default': { + 'chemistry__three_metabolic_cycles__blue_0', + 'chemistry__three_metabolic_cycles__green_0', + 'chemistry__three_metabolic_cycles__yellow_0', + }, + }, + ), + chemistry__three_metabolic_cycles_1=ScenarioConfig( + description='meeting bots running blue and yellow', + tags={ + 'half_and_half', + }, + substrate='chemistry__three_metabolic_cycles', + roles=('default',) * 8, is_focal=(True,) * 4 + (False,) * 4, - bots=frozenset({ - 'chemistry_branched_chain_reaction_X_specialist_0', - 'chemistry_branched_chain_reaction_X_specialist_1', - 'chemistry_branched_chain_reaction_X_specialist_2', - }), - ), - chemistry_branched_chain_reaction_1=_homogeneous_scenario( - description='focals meet Y preferring bots', - tags=frozenset({ - 'half_and_half', - }), - substrate='chemistry_branched_chain_reaction', + bots_by_role={ + 'default': { + 'chemistry__three_metabolic_cycles__blue_0', + 'chemistry__three_metabolic_cycles__yellow_0', + }, + }, + ), + chemistry__three_metabolic_cycles_2=ScenarioConfig( + description='meeting one-sided bots running green', + tags={ + 'half_and_half', + }, + substrate='chemistry__three_metabolic_cycles', + roles=('default',) * 8, is_focal=(True,) * 4 + (False,) * 4, - bots=frozenset({ - 'chemistry_branched_chain_reaction_Y_specialist_0', - 'chemistry_branched_chain_reaction_Y_specialist_1', - 'chemistry_branched_chain_reaction_Y_specialist_2', - }), - ), - chemistry_branched_chain_reaction_2=_homogeneous_scenario( - description='focals are resident', - tags=frozenset({ + bots_by_role={ + 'default': { + 'chemistry__three_metabolic_cycles__green_0', + }, + }, + ), + chemistry__three_metabolic_cycles_3=ScenarioConfig( + description='visit a resident population with mixed specialties', + tags={ + 'visitor', + }, + substrate='chemistry__three_metabolic_cycles', + roles=('default',) * 8, + is_focal=(True,) * 2 + (False,) * 6, + bots_by_role={ + 'default': { + 'chemistry__three_metabolic_cycles__blue_0', + 'chemistry__three_metabolic_cycles__yellow_0', + 'chemistry__three_metabolic_cycles__green_0', + }, + }, + ), + chemistry__three_metabolic_cycles_with_plentiful_distractors_0=ScenarioConfig( + description=('resident focal population meets a small mixture of ' + + 'background bots, must avoid distractor molecules'), + tags={ 'resident', - }), - substrate='chemistry_branched_chain_reaction', - is_focal=(True,) * 7 + (False,) * 1, - bots=frozenset({ - 'chemistry_branched_chain_reaction_X_specialist_0', - 'chemistry_branched_chain_reaction_X_specialist_1', - 'chemistry_branched_chain_reaction_X_specialist_2', - 'chemistry_branched_chain_reaction_Y_specialist_0', - 'chemistry_branched_chain_reaction_Y_specialist_1', - 'chemistry_branched_chain_reaction_Y_specialist_2', - }), - ), - chemistry_branched_chain_reaction_3=_homogeneous_scenario( - description='visiting another population', - tags=frozenset({ - 'convention_following', + }, + substrate='chemistry__three_metabolic_cycles_with_plentiful_distractors', + roles=('default',) * 8, + is_focal=(True,) * 5 + (False,) * 3, + bots_by_role={ + 'default': { + 'chemistry__three_metabolic_cycles_with_plentiful_distractors__blue_0', + 'chemistry__three_metabolic_cycles_with_plentiful_distractors__green_0', + 'chemistry__three_metabolic_cycles_with_plentiful_distractors__yellow_0', + }, + }, + ), + chemistry__three_metabolic_cycles_with_plentiful_distractors_1=ScenarioConfig( + description='meeting bots running blue, avoid distractors', + tags={ + 'half_and_half', + }, + substrate='chemistry__three_metabolic_cycles_with_plentiful_distractors', + roles=('default',) * 8, + is_focal=(True,) * 4 + (False,) * 4, + bots_by_role={ + 'default': { + 'chemistry__three_metabolic_cycles_with_plentiful_distractors__blue_0', + }, + }, + ), + chemistry__three_metabolic_cycles_with_plentiful_distractors_2=ScenarioConfig( + description='meeting bots running green and yellow, avoid distractors', + tags={ + 'half_and_half', + }, + substrate='chemistry__three_metabolic_cycles_with_plentiful_distractors', + roles=('default',) * 8, + is_focal=(True,) * 4 + (False,) * 4, + bots_by_role={ + 'default': { + 'chemistry__three_metabolic_cycles_with_plentiful_distractors__green_0', + 'chemistry__three_metabolic_cycles_with_plentiful_distractors__yellow_0', + }, + }, + ), + chemistry__three_metabolic_cycles_with_plentiful_distractors_3=ScenarioConfig( + description=('visit a resident population with mixed specialties and ' + + 'avoid distractor molecules'), + tags={ 'visitor', - }), - substrate='chemistry_branched_chain_reaction', - is_focal=(True,) * 1 + (False,) * 7, - bots=frozenset({ - 'chemistry_branched_chain_reaction_X_specialist_0', - 'chemistry_branched_chain_reaction_X_specialist_1', - 'chemistry_branched_chain_reaction_X_specialist_2', - 'chemistry_branched_chain_reaction_Y_specialist_0', - 'chemistry_branched_chain_reaction_Y_specialist_1', - 'chemistry_branched_chain_reaction_Y_specialist_2', - }), - ), - chemistry_metabolic_cycles_0=_homogeneous_scenario( - description='focals meet food1 preferring bots', - tags=frozenset({ - 'half_and_half', - }), - substrate='chemistry_metabolic_cycles', + }, + substrate='chemistry__three_metabolic_cycles_with_plentiful_distractors', + roles=('default',) * 8, + is_focal=(True,) * 2 + (False,) * 6, + bots_by_role={ + 'default': { + 'chemistry__three_metabolic_cycles_with_plentiful_distractors__blue_0', + 'chemistry__three_metabolic_cycles_with_plentiful_distractors__yellow_0', + 'chemistry__three_metabolic_cycles_with_plentiful_distractors__green_0', + }, + }, + ), + chemistry__two_metabolic_cycles_0=ScenarioConfig( + description=('resident focal population meets a small mixture of ' + + 'background bots'), + tags={ + 'resident', + }, + substrate='chemistry__two_metabolic_cycles', + roles=('default',) * 8, + is_focal=(True,) * 6 + (False,) * 2, + bots_by_role={ + 'default': { + 'chemistry__two_metabolic_cycles__blue_0', + 'chemistry__two_metabolic_cycles__green_0', + }, + }, + ), + chemistry__two_metabolic_cycles_1=ScenarioConfig( + description='meeting one-sided bots running blue', + tags={ + 'half_and_half', + }, + substrate='chemistry__two_metabolic_cycles', + roles=('default',) * 8, is_focal=(True,) * 4 + (False,) * 4, - bots=frozenset({ - 'chemistry_metabolic_cycles_food1_specialist_0', - 'chemistry_metabolic_cycles_food1_specialist_1', - }), + bots_by_role={ + 'default': { + 'chemistry__two_metabolic_cycles__blue_0', + }, + }, ), - chemistry_metabolic_cycles_1=_homogeneous_scenario( - description='focals meet food2 preferring bots', - tags=frozenset({ + chemistry__two_metabolic_cycles_2=ScenarioConfig( + description='meeting one-sided bots running green', + tags={ 'half_and_half', - }), - substrate='chemistry_metabolic_cycles', + }, + substrate='chemistry__two_metabolic_cycles', + roles=('default',) * 8, is_focal=(True,) * 4 + (False,) * 4, - bots=frozenset({ - 'chemistry_metabolic_cycles_food2_specialist_0', - 'chemistry_metabolic_cycles_food2_specialist_1', - }), - ), - chemistry_metabolic_cycles_2=_homogeneous_scenario( - description='focals are resident', - tags=frozenset({ + bots_by_role={ + 'default': { + 'chemistry__two_metabolic_cycles__green_0', + }, + }, + ), + chemistry__two_metabolic_cycles_3=ScenarioConfig( + description=('visit a resident background population with mixed ' + + 'specialties'), + tags={ + 'visitor', + }, + substrate='chemistry__two_metabolic_cycles', + roles=('default',) * 8, + is_focal=(True,) * 2 + (False,) * 6, + bots_by_role={ + 'default': { + 'chemistry__two_metabolic_cycles__blue_0', + 'chemistry__two_metabolic_cycles__green_0', + }, + }, + ), + chemistry__two_metabolic_cycles_with_distractors_0=ScenarioConfig( + description=('resident focal population meets a small mixture of ' + + 'background bots, must avoid distractor molecules'), + tags={ 'resident', - }), - substrate='chemistry_metabolic_cycles', - is_focal=(True,) * 7 + (False,) * 1, - bots=frozenset({ - 'chemistry_metabolic_cycles_food1_specialist_0', - 'chemistry_metabolic_cycles_food1_specialist_1', - 'chemistry_metabolic_cycles_food2_specialist_0', - 'chemistry_metabolic_cycles_food2_specialist_1', - }), - ), - chemistry_metabolic_cycles_3=_homogeneous_scenario( - description='visiting another population', - tags=frozenset({ - 'visitor', - }), - substrate='chemistry_metabolic_cycles', - is_focal=(True,) * 1 + (False,) * 7, - bots=frozenset({ - 'chemistry_metabolic_cycles_food1_specialist_0', - 'chemistry_metabolic_cycles_food1_specialist_1', - 'chemistry_metabolic_cycles_food2_specialist_0', - 'chemistry_metabolic_cycles_food2_specialist_1', - }), + }, + substrate='chemistry__two_metabolic_cycles_with_distractors', + roles=('default',) * 8, + is_focal=(True,) * 6 + (False,) * 2, + bots_by_role={ + 'default': { + 'chemistry__two_metabolic_cycles_with_distractors__blue_0', + 'chemistry__two_metabolic_cycles_with_distractors__green_0', + }, + }, ), - chicken_in_the_matrix_0=_homogeneous_scenario( - description='meeting a mixture of pure bots', - tags=frozenset({ + chemistry__two_metabolic_cycles_with_distractors_1=ScenarioConfig( + description=('meeting one-sided bots running blue and avoid ' + + 'distractor molecules'), + tags={ 'half_and_half', - 'versus_pure_all', - }), - substrate='chicken_in_the_matrix', + }, + substrate='chemistry__two_metabolic_cycles_with_distractors', + roles=('default',) * 8, is_focal=(True,) * 4 + (False,) * 4, - bots=frozenset({ - 'chicken_pure_dove_0', - 'chicken_pure_dove_1', - 'chicken_pure_dove_2', - 'chicken_pure_dove_3', - 'chicken_pure_hawk_0', - 'chicken_pure_hawk_1', - 'chicken_pure_hawk_2', - 'chicken_pure_hawk_3', - }), - ), - chicken_in_the_matrix_1=_homogeneous_scenario( - description='visiting a pure dove population', - tags=frozenset({ - 'versus_pure_dove', + bots_by_role={ + 'default': { + 'chemistry__two_metabolic_cycles_with_distractors__blue_0', + }, + }, + ), + chemistry__two_metabolic_cycles_with_distractors_2=ScenarioConfig( + description=('meeting one-sided bots running green and avoid ' + + 'distractor molecules'), + tags={ + 'half_and_half', + }, + substrate='chemistry__two_metabolic_cycles_with_distractors', + roles=('default',) * 8, + is_focal=(True,) * 4 + (False,) * 4, + bots_by_role={ + 'default': { + 'chemistry__two_metabolic_cycles_with_distractors__green_0', + }, + }, + ), + chemistry__two_metabolic_cycles_with_distractors_3=ScenarioConfig( + description=('visit a resident background population with mixed ' + + 'specialties and avoid distractor molecules'), + tags={ 'visitor', - }), - substrate='chicken_in_the_matrix', - is_focal=(True,) * 1 + (False,) * 7, - bots=frozenset({ - 'chicken_pure_dove_0', - 'chicken_pure_dove_1', - 'chicken_pure_dove_2', - 'chicken_pure_dove_3', - }), - ), - chicken_in_the_matrix_2=_homogeneous_scenario( - description='focals are resident and visitors are hawks', - tags=frozenset({ + }, + substrate='chemistry__two_metabolic_cycles_with_distractors', + roles=('default',) * 8, + is_focal=(True,) * 2 + (False,) * 6, + bots_by_role={ + 'default': { + 'chemistry__two_metabolic_cycles_with_distractors__blue_0', + 'chemistry__two_metabolic_cycles_with_distractors__green_0', + }, + }, + ), + chicken_in_the_matrix__arena_0=ScenarioConfig( + description='visiting unconditional dove players', + tags={ + 'visitor', + 'versus_pure_dove_players', + }, + substrate='chicken_in_the_matrix__arena', + roles=('default',) * 8, + is_focal=(True,) + (False,) * 7, + bots_by_role={ + 'default': { + 'chicken_in_the_matrix__arena__puppet_dove_0', + 'chicken_in_the_matrix__arena__puppet_dove_margin_0', + }, + }, + ), + chicken_in_the_matrix__arena_1=ScenarioConfig( + description=('focals are resident and visitors are unconditional ' + + 'dove players'), + tags={ 'resident', - 'versus_pure_hawk', - }), - substrate='chicken_in_the_matrix', + 'versus_pure_dove_players', + }, + substrate='chicken_in_the_matrix__arena', + roles=('default',) * 8, is_focal=(True,) * 5 + (False,) * 3, - bots=frozenset({ - 'chicken_pure_hawk_0', - 'chicken_pure_hawk_1', - 'chicken_pure_hawk_2', - 'chicken_pure_hawk_3', - }), - ), - chicken_in_the_matrix_3=_homogeneous_scenario( - description='visiting a gullible population', - tags=frozenset({ - 'deception', - 'versus_free', + bots_by_role={ + 'default': { + 'chicken_in_the_matrix__arena__puppet_dove_0', + 'chicken_in_the_matrix__arena__puppet_dove_margin_0', + }, + }, + ), + chicken_in_the_matrix__arena_2=ScenarioConfig( + description=('focals are resident and visitors are unconditional' + + 'hawk players'), + tags={ + 'resident', + 'versus_pure_hawk_players', + }, + substrate='chicken_in_the_matrix__arena', + roles=('default',) * 8, + is_focal=(True,) * 5 + (False,) * 3, + bots_by_role={ + 'default': { + 'chicken_in_the_matrix__arena__puppet_hawk_0', + 'chicken_in_the_matrix__arena__puppet_hawk_margin_0', + }, + }, + ), + chicken_in_the_matrix__arena_3=ScenarioConfig( + description=('visiting a population of hair-trigger grim ' + + 'reciprocator bots who initially cooperate but, if ' + + 'defected on once, will retaliate by defecting in all ' + + 'future interactions'), + tags={ 'visitor', - }), - substrate='chicken_in_the_matrix', - is_focal=(True,) * 1 + (False,) * 7, - bots=frozenset({ - 'chicken_free_0', - 'chicken_free_1', - 'chicken_free_2', - 'chicken_free_3', - }), - ), - chicken_in_the_matrix_4=_homogeneous_scenario( - description='visiting grim reciprocators', - tags=frozenset({ 'reciprocity', - 'versus_puppet', + }, + substrate='chicken_in_the_matrix__arena', + roles=('default',) * 8, + is_focal=(True,) + (False,) * 7, + bots_by_role={ + 'default': { + 'chicken_in_the_matrix__arena__puppet_grim_one_strike_0', + 'chicken_in_the_matrix__arena__puppet_grim_one_strike_margin_0', + }, + }, + ), + chicken_in_the_matrix__arena_4=ScenarioConfig( + description=('visiting a population of two-strikes grim ' + + 'reciprocator bots who initially cooperate but, if ' + + 'defected on twice, will retaliate by defecting in all ' + + 'future interactions'), + tags={ 'visitor', - }), - substrate='chicken_in_the_matrix', - is_focal=(True,) * 2 + (False,) * 6, - bots=frozenset({ - 'chicken_puppet_grim', - }), + 'reciprocity', + }, + substrate='chicken_in_the_matrix__arena', + roles=('default',) * 8, + is_focal=(True,) + (False,) * 7, + bots_by_role={ + 'default': { + 'chicken_in_the_matrix__arena__puppet_grim_two_strikes_0', + 'chicken_in_the_matrix__arena__puppet_grim_two_strikes_margin_0', + }, + }, + ), + chicken_in_the_matrix__arena_5=ScenarioConfig( + description=( + 'visiting a mixed population of k-strikes grim reciprocator bots ' + + 'with k values from 1 to 3, they initially cooperate but, if ' + + 'defected on k times, they retaliate in all future interactions' + ), + tags={ + 'visitor', + 'reciprocity', + }, + substrate='chicken_in_the_matrix__arena', + roles=('default',) * 8, + is_focal=(True,) * 3 + (False,) * 5, + bots_by_role={ + 'default': { + 'chicken_in_the_matrix__arena__puppet_grim_one_strike_0', + 'chicken_in_the_matrix__arena__puppet_grim_one_strike_margin_0', + 'chicken_in_the_matrix__arena__puppet_grim_three_strikes_0', + 'chicken_in_the_matrix__arena__puppet_grim_three_strikes_margin_0', + 'chicken_in_the_matrix__arena__puppet_grim_two_strikes_0', + 'chicken_in_the_matrix__arena__puppet_grim_two_strikes_margin_0', + }, + }, + ), + chicken_in_the_matrix__arena_6=ScenarioConfig( + description='visiting a mixture of pure hawk and pure dove players', + tags={ + 'visitor', + 'versus_pure_all', + }, + substrate='chicken_in_the_matrix__arena', + roles=('default',) * 8, + is_focal=(True,) * 3 + (False,) * 5, + bots_by_role={ + 'default': { + 'chicken_in_the_matrix__arena__puppet_dove_0', + 'chicken_in_the_matrix__arena__puppet_dove_margin_0', + 'chicken_in_the_matrix__arena__puppet_hawk_0', + 'chicken_in_the_matrix__arena__puppet_hawk_margin_0', + }, + }, + ), + chicken_in_the_matrix__repeated_0=ScenarioConfig( + description='partner may play either hawk or dove', + tags={ + 'half_and_half', + 'versus_pure_all', + }, + substrate='chicken_in_the_matrix__repeated', + roles=('default',) * 2, + is_focal=(True,) + (False,), + bots_by_role={ + 'default': { + 'chicken_in_the_matrix__repeated__puppet_dove_margin_0', + 'chicken_in_the_matrix__repeated__puppet_dove_margin_1', + 'chicken_in_the_matrix__repeated__puppet_hawk_margin_0', + 'chicken_in_the_matrix__repeated__puppet_hawk_margin_1', + }, + }, + ), + chicken_in_the_matrix__repeated_1=ScenarioConfig( + description='partner typically plays dove', + tags={ + 'half_and_half', + 'versus_pure_dove', + }, + substrate='chicken_in_the_matrix__repeated', + roles=('default',) * 2, + is_focal=(True,) + (False,), + bots_by_role={ + 'default': { + 'chicken_in_the_matrix__repeated__puppet_dove_margin_0', + 'chicken_in_the_matrix__repeated__puppet_dove_margin_1', + }, + }, + ), + chicken_in_the_matrix__repeated_2=ScenarioConfig( + description='partner typically plays hawk', + tags={ + 'half_and_half', + 'versus_pure_hawk', + }, + substrate='chicken_in_the_matrix__repeated', + roles=('default',) * 2, + is_focal=(True,) + (False,), + bots_by_role={ + 'default': { + 'chicken_in_the_matrix__repeated__puppet_hawk_margin_0', + 'chicken_in_the_matrix__repeated__puppet_hawk_margin_1', + }, + }, + ), + chicken_in_the_matrix__repeated_3=ScenarioConfig( + description=('partner is a hair-trigger grim reciprocator, i.e. one ' + + 'who initially cooperates but, if defected on once, will' + + ' retaliate by defecting forever after'), + tags={ + 'half_and_half', + 'reciprocity', + }, + substrate='chicken_in_the_matrix__repeated', + roles=('default',) * 2, + is_focal=(True,) + (False,), + bots_by_role={ + 'default': { + 'chicken_in_the_matrix__repeated__puppet_grim_one_strike_margin_0', + 'chicken_in_the_matrix__repeated__puppet_grim_one_strike_margin_1', + }, + }, + ), + chicken_in_the_matrix__repeated_4=ScenarioConfig( + description=('partner is a two-strikes grim reciprocator, i.e. one ' + + 'who initially cooperates, but if defected on twice, ' + + 'will retaliate by defecting forever after'), + tags={ + 'half_and_half', + 'reciprocity', + }, + substrate='chicken_in_the_matrix__repeated', + roles=('default',) * 2, + is_focal=(True,) + (False,), + bots_by_role={ + 'default': { + 'chicken_in_the_matrix__repeated__puppet_grim_two_strikes_margin_0', + 'chicken_in_the_matrix__repeated__puppet_grim_two_strikes_margin_1', + }, + }, + ), + chicken_in_the_matrix__repeated_5=ScenarioConfig( + description='partner is a tit-for-tat conditional cooperator', + tags={ + 'half_and_half', + 'reciprocity', + }, + substrate='chicken_in_the_matrix__repeated', + roles=('default',) * 2, + is_focal=(True,) + (False,), + bots_by_role={ + 'default': { + 'chicken_in_the_matrix__repeated__puppet_tft_margin_0', + 'chicken_in_the_matrix__repeated__puppet_tft_margin_1', + }, + }, + ), + chicken_in_the_matrix__repeated_6=ScenarioConfig( + description=('partner is a tit-for-tat conditional cooperator who ' + + 'occasionally plays hawk instead of dove'), + tags={ + 'half_and_half', + 'reciprocity', + 'forgiveness', + }, + substrate='chicken_in_the_matrix__repeated', + roles=('default',) * 2, + is_focal=(True,) + (False,), + bots_by_role={ + 'default': { + 'chicken_in_the_matrix__repeated__puppet_tft_tremble_margin_0', + 'chicken_in_the_matrix__repeated__puppet_tft_tremble_margin_1', + }, + }, + ), + chicken_in_the_matrix__repeated_7=ScenarioConfig( + description='partner plays dove for a while then switches to hawk', + tags={ + 'half_and_half', + 'flexibility', + }, + substrate='chicken_in_the_matrix__repeated', + roles=('default',) * 2, + is_focal=(True,) + (False,), + bots_by_role={ + 'default': { + 'chicken_in_the_matrix__repeated__puppet_flip_0', + }, + }, + ), + chicken_in_the_matrix__repeated_8=ScenarioConfig( + description=('partner tries to take advantage of the focal player ' + + 'by playing hawk, but if punished, partner then ' + + 'switches to tit-for-tat conditional cooperation'), + tags={ + 'half_and_half', + 'teaching', + 'reciprocity', + 'forgiveness', + }, + substrate='chicken_in_the_matrix__repeated', + roles=('default',) * 2, + is_focal=(True,) + (False,), + bots_by_role={ + 'default': { + 'chicken_in_the_matrix__repeated__puppet_corrigible_0', + }, + }, + ), + chicken_in_the_matrix__repeated_9=ScenarioConfig( + description=('partner tries to take advantage of the focal player ' + + 'by playing hawk, but if punished, partner then ' + + 'switches to noisy tit-for-tat conditional cooperation'), + tags={ + 'half_and_half', + 'teaching', + 'reciprocity', + 'forgiveness', + }, + substrate='chicken_in_the_matrix__repeated', + roles=('default',) * 2, + is_focal=(True,) + (False,), + bots_by_role={ + 'default': { + 'chicken_in_the_matrix__repeated__puppet_corrigible_tremble_0', + }, + }, ), - clean_up_0=_homogeneous_scenario( + clean_up_0=ScenarioConfig( description='visiting an altruistic population', - tags=frozenset({ + tags={ 'versus_cleaners', 'visitor', - }), + }, substrate='clean_up', + roles=('default',) * 7, is_focal=(True,) * 3 + (False,) * 4, - bots=frozenset({ - 'cleanup_cleaner_1', - 'cleanup_cleaner_2', - }), - ), - clean_up_1=_homogeneous_scenario( - description='focals are resident and visitors free ride', - tags=frozenset({ + bots_by_role={ + 'default': { + 'clean_up__cleaner_0', + 'clean_up__cleaner_1', + }, + }, + ), + clean_up_1=ScenarioConfig( + description='focals are resident and visitors ride free', + tags={ 'resident', 'versus_consumers', - }), + }, substrate='clean_up', + roles=('default',) * 7, is_focal=(True,) * 4 + (False,) * 3, - bots=frozenset({ - 'cleanup_consumer_0', - 'cleanup_consumer_1', - 'cleanup_consumer_2', - }), + bots_by_role={ + 'default': { + 'clean_up__consumer_0', + 'clean_up__consumer_1', + }, + }, ), - clean_up_2=_homogeneous_scenario( + clean_up_2=ScenarioConfig( description='visiting a turn-taking population that cleans first', - tags=frozenset({ + tags={ + 'turn_taking', 'versus_puppet', 'visitor', - }), + }, substrate='clean_up', + roles=('default',) * 7, is_focal=(True,) * 3 + (False,) * 4, - bots=frozenset({ - 'cleanup_puppet_alternate_clean_first', - }), + bots_by_role={ + 'default': {'clean_up__puppet_alternator_first_cleans_0',}, + }, ), - clean_up_3=_homogeneous_scenario( + clean_up_3=ScenarioConfig( description='visiting a turn-taking population that eats first', - tags=frozenset({ + tags={ + 'turn_taking', 'versus_puppet', 'visitor', - }), + }, substrate='clean_up', + roles=('default',) * 7, is_focal=(True,) * 3 + (False,) * 4, - bots=frozenset({ - 'cleanup_puppet_alternate_eat_first', - }), + bots_by_role={ + 'default': {'clean_up__puppet_alternator_first_eats_0',}, + }, ), - clean_up_4=_homogeneous_scenario( + clean_up_4=ScenarioConfig( description='focals are visited by one reciprocator', - tags=frozenset({ + tags={ 'resident', 'versus_puppet', - }), + }, substrate='clean_up', + roles=('default',) * 7, is_focal=(True,) * 6 + (False,) * 1, - bots=frozenset({ - 'cleanup_puppet_reciprocator_threshold_low', - }), + bots_by_role={ + 'default': {'clean_up__puppet_low_threshold_reciprocator_0',}, + }, ), - clean_up_5=_homogeneous_scenario( + clean_up_5=ScenarioConfig( description='focals are visited by two suspicious reciprocators', - tags=frozenset({ + tags={ 'resident', 'versus_puppet', - }), + }, substrate='clean_up', + roles=('default',) * 7, is_focal=(True,) * 5 + (False,) * 2, - bots=frozenset({ - 'cleanup_puppet_reciprocator_threshold_mid', - }), + bots_by_role={ + 'default': {'clean_up__puppet_high_threshold_reciprocator_0',}, + }, ), - clean_up_6=_homogeneous_scenario( + clean_up_6=ScenarioConfig( description='focals are visited by one suspicious reciprocator', - tags=frozenset({ + tags={ 'resident', 'versus_puppet', - }), + }, substrate='clean_up', + roles=('default',) * 7, is_focal=(True,) * 6 + (False,) * 1, - bots=frozenset({ - 'cleanup_puppet_reciprocator_threshold_mid', - }), + bots_by_role={ + 'default': {'clean_up__puppet_high_threshold_reciprocator_0',}, + }, ), - collaborative_cooking_impassable_0=_homogeneous_scenario( - description='visiting a vmpo population', - tags=frozenset({ - 'convention_following', + clean_up_7=ScenarioConfig( + description='focals visit resident group of suspicious reciprocators', + tags={ 'visitor', - }), - substrate='collaborative_cooking_impassable', - is_focal=(True,) * 1 + (False,) * 3, - bots=frozenset({ - 'collaborative_cooking_impassable_vmpo_pop_size_ten_0', - 'collaborative_cooking_impassable_vmpo_pop_size_ten_2', - 'collaborative_cooking_impassable_vmpo_pop_size_ten_3', - 'collaborative_cooking_impassable_vmpo_pop_size_ten_4', - 'collaborative_cooking_impassable_vmpo_pop_size_ten_6', - 'collaborative_cooking_impassable_vmpo_pop_size_ten_7', - 'collaborative_cooking_impassable_vmpo_pop_size_ten_9', - }), - ), - collaborative_cooking_impassable_1=_homogeneous_scenario( - description='focals are resident', - tags=frozenset({ - 'resident', - }), - substrate='collaborative_cooking_impassable', - is_focal=(True,) * 3 + (False,) * 1, - bots=frozenset({ - 'collaborative_cooking_impassable_vmpo_pop_size_ten_0', - 'collaborative_cooking_impassable_vmpo_pop_size_ten_2', - 'collaborative_cooking_impassable_vmpo_pop_size_ten_3', - 'collaborative_cooking_impassable_vmpo_pop_size_ten_4', - 'collaborative_cooking_impassable_vmpo_pop_size_ten_6', - 'collaborative_cooking_impassable_vmpo_pop_size_ten_7', - 'collaborative_cooking_impassable_vmpo_pop_size_ten_9', - }), - ), - collaborative_cooking_passable_0=_homogeneous_scenario( - description='visiting uncoordinated generalists', - tags=frozenset({ - 'convention_following', - 'versus_uncoordinated_generalist', - 'visitor', - }), - substrate='collaborative_cooking_passable', - is_focal=(True,) * 1 + (False,) * 3, - bots=frozenset({ - 'collaborative_cooking_passable_vmpo_pop_size_ten_5', - }), - ), - collaborative_cooking_passable_1=_homogeneous_scenario( - description='focals are resident and visited by an uncoordinated generalist', - tags=frozenset({ - 'resident', - 'versus_uncoordinated_generalist', - }), - substrate='collaborative_cooking_passable', - is_focal=(True,) * 3 + (False,) * 1, - bots=frozenset({ - 'collaborative_cooking_passable_vmpo_pop_size_ten_5', - }), - ), - commons_harvest_closed_0=_homogeneous_scenario( - description='focals are resident and visited by two zappers', - tags=frozenset({ - 'resident', - }), - substrate='commons_harvest_closed', - is_focal=(True,) * 14 + (False,) * 2, - bots=frozenset({ - 'closed_commons_zapper_0', - 'closed_commons_zapper_1', - 'closed_commons_zapper_2', - 'closed_commons_zapper_3', - }), - ), - commons_harvest_closed_1=_homogeneous_scenario( - description='focals are resident and visited by six zappers', - tags=frozenset({ - 'resident', - }), - substrate='commons_harvest_closed', - is_focal=(True,) * 10 + (False,) * 6, - bots=frozenset({ - 'closed_commons_zapper_0', - 'closed_commons_zapper_1', - 'closed_commons_zapper_2', - 'closed_commons_zapper_3', - }), - ), - commons_harvest_closed_2=_homogeneous_scenario( - description='visiting a population of zappers', - tags=frozenset({ - 'visitor', - }), - substrate='commons_harvest_closed', - is_focal=(True,) * 4 + (False,) * 12, - bots=frozenset({ - 'closed_commons_zapper_0', - 'closed_commons_zapper_1', - 'closed_commons_zapper_2', - 'closed_commons_zapper_3', - }), - ), - commons_harvest_open_0=_homogeneous_scenario( - description='focals are resident and visited by two zappers', - tags=frozenset({ - 'resident', - }), - substrate='commons_harvest_open', - is_focal=(True,) * 14 + (False,) * 2, - bots=frozenset({ - 'open_commons_zapper_0', - 'open_commons_zapper_1', - }), - ), - commons_harvest_open_1=_homogeneous_scenario( - description='focals are resident and visited by six zappers', - tags=frozenset({ - 'resident', - }), - substrate='commons_harvest_open', - is_focal=(True,) * 10 + (False,) * 6, - bots=frozenset({ - 'open_commons_zapper_0', - 'open_commons_zapper_1', - }), - ), - commons_harvest_partnership_0=_homogeneous_scenario( - description='meeting good partners', - tags=frozenset({ - 'half_and_half', - 'versus_good_partners', - }), - substrate='commons_harvest_partnership', - is_focal=(True,) * 8 + (False,) * 8, - bots=frozenset({ - 'partnership_commons_putative_good_partner_4', - 'partnership_commons_putative_good_partner_5', - 'partnership_commons_putative_good_partner_7', - }), + 'versus_puppet', + }, + substrate='clean_up', + roles=('default',) * 7, + is_focal=(True,) * 2 + (False,) * 5, + bots_by_role={ + 'default': {'clean_up__puppet_high_threshold_reciprocator_0',}, + }, ), - commons_harvest_partnership_1=_homogeneous_scenario( - description='focals are resident and visitors are good partners', - tags=frozenset({ - 'resident', - 'versus_good_partners', - }), - substrate='commons_harvest_partnership', - is_focal=(True,) * 12 + (False,) * 4, - bots=frozenset({ - 'partnership_commons_putative_good_partner_4', - 'partnership_commons_putative_good_partner_5', - 'partnership_commons_putative_good_partner_7', - }), - ), - commons_harvest_partnership_2=_homogeneous_scenario( - description='visiting a population of good partners', - tags=frozenset({ - 'versus_good_partners', - 'visitor', - }), - substrate='commons_harvest_partnership', - is_focal=(True,) * 4 + (False,) * 12, - bots=frozenset({ - 'partnership_commons_putative_good_partner_4', - 'partnership_commons_putative_good_partner_5', - 'partnership_commons_putative_good_partner_7', - }), - ), - commons_harvest_partnership_3=_homogeneous_scenario( - description='focals are resident and visited by two zappers', - tags=frozenset({ - 'resident', - 'versus_zappers', - }), - substrate='commons_harvest_partnership', - is_focal=(True,) * 14 + (False,) * 2, - bots=frozenset({ - 'partnership_commons_zapper_1', - 'partnership_commons_zapper_2', - }), - ), - commons_harvest_partnership_4=_homogeneous_scenario( - description='focals are resident and visited by six zappers', - tags=frozenset({ + clean_up_8=ScenarioConfig( + description='focals are visited by one nice reciprocator', + tags={ 'resident', - 'versus_zappers', - }), - substrate='commons_harvest_partnership', - is_focal=(True,) * 10 + (False,) * 6, - bots=frozenset({ - 'partnership_commons_zapper_1', - 'partnership_commons_zapper_2', - }), - ), - commons_harvest_partnership_5=_homogeneous_scenario( - description='visiting a population of zappers', - tags=frozenset({ - 'versus_zappers', - 'visitor', - }), - substrate='commons_harvest_partnership', - is_focal=(True,) * 4 + (False,) * 12, - bots=frozenset({ - 'partnership_commons_zapper_1', - 'partnership_commons_zapper_2', - }), + 'versus_puppet', + }, + substrate='clean_up', + roles=('default',) * 7, + is_focal=(True,) * 6 + (False,) * 1, + bots_by_role={ + 'default': {'clean_up__puppet_nice_low_threshold_reciprocator_0',}, + }, ), - king_of_the_hill_0=_homogeneous_scenario( - description='focal team versus default vmpo bot team', - tags=frozenset({ + coins_0=ScenarioConfig( + description='partner is either a pure cooperator or a pure defector', + tags={ + 'versus_pure_all', 'half_and_half', - 'learned_teamwork', - }), - substrate='king_of_the_hill', - is_focal=(True, False) * 4, - bots=frozenset({ - 'koth_default_vmpo_0', - 'koth_default_vmpo_1', - 'koth_default_vmpo_2', - 'koth_default_vmpo_3', - 'koth_default_vmpo_4', - 'koth_default_vmpo_5', - 'koth_default_vmpo_6', - 'koth_default_vmpo_7', - }), - ), - king_of_the_hill_1=_homogeneous_scenario( - description='focal team versus shaped a3c bot team', - tags=frozenset({ + }, + substrate='coins', + roles=('default',) * 2, + is_focal=(True,) + (False,), + bots_by_role={ + 'default': {'coins__puppet_cooperator_0', + 'coins__puppet_defector_0',}, + }, + ), + coins_1=ScenarioConfig( + description=('partner is a high-threshold (generous) reciprocator'), + tags={ + 'versus_reciprocator', 'half_and_half', - 'learned_teamwork', - }), - substrate='king_of_the_hill', - is_focal=(True, False) * 4, - bots=frozenset({ - 'koth_zap_while_in_control_a3c_0', - 'koth_zap_while_in_control_a3c_1', - 'koth_zap_while_in_control_a3c_2', - 'koth_zap_while_in_control_a3c_3', - 'koth_zap_while_in_control_a3c_4', - 'koth_zap_while_in_control_a3c_5', - 'koth_zap_while_in_control_a3c_6', - 'koth_zap_while_in_control_a3c_7', - }), - ), - king_of_the_hill_2=_homogeneous_scenario( - description='focal team versus shaped vmpo bot team', - tags=frozenset({ + }, + substrate='coins', + roles=('default',) * 2, + is_focal=(True,) + (False,), + bots_by_role={ + 'default': {'coins__puppet_three_strikes_reciprocator_0',}, + }, + ), + coins_2=ScenarioConfig( + description=('partner is a low-threshold (harsh) reciprocator'), + tags={ + 'versus_reciprocator', 'half_and_half', - 'learned_teamwork', - }), - substrate='king_of_the_hill', - is_focal=(True, False) * 4, - bots=frozenset({ - 'koth_zap_while_in_control_vmpo_0', - 'koth_zap_while_in_control_vmpo_1', - 'koth_zap_while_in_control_vmpo_2', - 'koth_zap_while_in_control_vmpo_3', - 'koth_zap_while_in_control_vmpo_4', - 'koth_zap_while_in_control_vmpo_5', - 'koth_zap_while_in_control_vmpo_6', - 'koth_zap_while_in_control_vmpo_7', - }), - ), - king_of_the_hill_3=_homogeneous_scenario( - description='ad hoc teamwork with default vmpo bots', - tags=frozenset({ - 'ad_hoc_teamwork', - 'visitor', - }), - substrate='king_of_the_hill', - is_focal=(True,) * 1 + (False,) * 7, - bots=frozenset({ - 'koth_default_vmpo_0', - 'koth_default_vmpo_1', - 'koth_default_vmpo_2', - 'koth_default_vmpo_3', - 'koth_default_vmpo_4', - 'koth_default_vmpo_5', - 'koth_default_vmpo_6', - 'koth_default_vmpo_7', - }), - ), - king_of_the_hill_4=_homogeneous_scenario( - description='ad hoc teamwork with shaped a3c bots', - tags=frozenset({ - 'ad_hoc_teamwork', - 'visitor', - }), - substrate='king_of_the_hill', - is_focal=(True,) * 1 + (False,) * 7, - bots=frozenset({ - 'koth_zap_while_in_control_a3c_0', - 'koth_zap_while_in_control_a3c_1', - 'koth_zap_while_in_control_a3c_2', - 'koth_zap_while_in_control_a3c_3', - 'koth_zap_while_in_control_a3c_4', - 'koth_zap_while_in_control_a3c_5', - 'koth_zap_while_in_control_a3c_6', - 'koth_zap_while_in_control_a3c_7', - }), - ), - king_of_the_hill_5=_homogeneous_scenario( - description='ad hoc teamwork with shaped vmpo bots', - tags=frozenset({ - 'ad_hoc_teamwork', - 'visitor', - }), - substrate='king_of_the_hill', - is_focal=(True,) * 1 + (False,) * 7, - bots=frozenset({ - 'koth_zap_while_in_control_vmpo_0', - 'koth_zap_while_in_control_vmpo_1', - 'koth_zap_while_in_control_vmpo_2', - 'koth_zap_while_in_control_vmpo_3', - 'koth_zap_while_in_control_vmpo_4', - 'koth_zap_while_in_control_vmpo_5', - 'koth_zap_while_in_control_vmpo_6', - 'koth_zap_while_in_control_vmpo_7', - }), - ), - prisoners_dilemma_in_the_matrix_0=_homogeneous_scenario( - description='visiting unconditional cooperators', - tags=frozenset({ - 'versus_pure_cooperators', - 'visitor', - }), - substrate='prisoners_dilemma_in_the_matrix', - is_focal=(True,) * 1 + (False,) * 7, - bots=frozenset({ - 'prisoners_dilemma_cooperator_2', - 'prisoners_dilemma_cooperator_4', - }), - ), - prisoners_dilemma_in_the_matrix_1=_homogeneous_scenario( - description='focals are resident and visitors are unconditional cooperators', - tags=frozenset({ - 'resident', - 'versus_pure_cooperators', - }), - substrate='prisoners_dilemma_in_the_matrix', - is_focal=(True,) * 6 + (False,) * 2, - bots=frozenset({ - 'prisoners_dilemma_cooperator_2', - 'prisoners_dilemma_cooperator_4', - }), - ), - prisoners_dilemma_in_the_matrix_2=_homogeneous_scenario( - description='focals are resident and visitors defect', - tags=frozenset({ - 'resident', - 'versus_pure_defectors', - }), - substrate='prisoners_dilemma_in_the_matrix', - is_focal=(True,) * 6 + (False,) * 2, - bots=frozenset({ - 'prisoners_dilemma_defector_0', - 'prisoners_dilemma_defector_2', - }), - ), - prisoners_dilemma_in_the_matrix_3=_homogeneous_scenario( - description='meeting gullible bots', - tags=frozenset({ - 'deception', + }, + substrate='coins', + roles=('default',) * 2, + is_focal=(True,) + (False,), + bots_by_role={ + 'default': {'coins__puppet_one_strike_reciprocator_0',}, + }, + ), + coins_3=ScenarioConfig( + description=('partner is a high-threshold (generous) strong ' + + 'reciprocator'), + tags={ + 'versus_reciprocator', 'half_and_half', - 'versus_free', - }), - substrate='prisoners_dilemma_in_the_matrix', - is_focal=(True,) * 4 + (False,) * 4, - bots=frozenset({ - 'prisoners_dilemma_free_0', - 'prisoners_dilemma_free_1', - 'prisoners_dilemma_free_2', - }), - ), - prisoners_dilemma_in_the_matrix_4=_homogeneous_scenario( - description='visiting a population of grim reciprocators', - tags=frozenset({ - 'reciprocity', - 'versus_puppet', - 'visitor', - }), - substrate='prisoners_dilemma_in_the_matrix', - is_focal=(True,) * 1 + (False,) * 7, - bots=frozenset({ - 'prisoners_dilemma_puppet_grim_threshold_high', - }), + }, + substrate='coins', + roles=('default',) * 2, + is_focal=(True,) + (False,), + bots_by_role={ + 'default': {'coins__puppet_three_strikes_strong_reciprocator_0',}, + }, ), - prisoners_dilemma_in_the_matrix_5=_homogeneous_scenario( - description='visiting a population of hair-trigger grim reciprocators', - tags=frozenset({ - 'reciprocity', - 'versus_puppet', - 'visitor', - }), - substrate='prisoners_dilemma_in_the_matrix', - is_focal=(True,) * 1 + (False,) * 7, - bots=frozenset({ - 'prisoners_dilemma_puppet_grim_threshold_low', - }), + coins_4=ScenarioConfig( + description=('partner is a low-threshold (harsh) strong reciprocator'), + tags={ + 'versus_reciprocator', + 'half_and_half', + }, + substrate='coins', + roles=('default',) * 2, + is_focal=(True,) + (False,), + bots_by_role={ + 'default': {'coins__puppet_one_strike_strong_reciprocator_0',}, + }, ), - pure_coordination_in_the_matrix_0=_homogeneous_scenario( - description='focals are resident and visitor is mixed', - tags=frozenset({ - 'resident', - 'versus_pure_all', - }), - substrate='pure_coordination_in_the_matrix', - is_focal=(True,) * 7 + (False,) * 1, - bots=frozenset({ - 'pure_coordination_type_1_specialist_0', - 'pure_coordination_type_1_specialist_1', - 'pure_coordination_type_2_specialist_0', - 'pure_coordination_type_2_specialist_1', - 'pure_coordination_type_3_specialist_0', - 'pure_coordination_type_3_specialist_1', - }), - ), - pure_coordination_in_the_matrix_1=_homogeneous_scenario( - description='visiting resource 1 fans', - tags=frozenset({ - 'versus_pure_type_1', - 'visitor', - }), - substrate='pure_coordination_in_the_matrix', - is_focal=(True,) * 1 + (False,) * 7, - bots=frozenset({ - 'pure_coordination_type_1_specialist_0', - 'pure_coordination_type_1_specialist_1', - }), - ), - pure_coordination_in_the_matrix_2=_homogeneous_scenario( - description='visiting resource 2 fans', - tags=frozenset({ - 'versus_pure_type_2', - 'visitor', - }), - substrate='pure_coordination_in_the_matrix', - is_focal=(True,) * 1 + (False,) * 7, - bots=frozenset({ - 'pure_coordination_type_2_specialist_0', - 'pure_coordination_type_2_specialist_1', - }), - ), - pure_coordination_in_the_matrix_3=_homogeneous_scenario( - description='visiting resource 3 fans', - tags=frozenset({ - 'versus_pure_type_3', - 'visitor', - }), - substrate='pure_coordination_in_the_matrix', - is_focal=(True,) * 1 + (False,) * 7, - bots=frozenset({ - 'pure_coordination_type_3_specialist_0', - 'pure_coordination_type_3_specialist_1', - }), + coins_5=ScenarioConfig( + description='partner is a cooperator', + tags={ + 'versus_pure_cooperator', + 'half_and_half', + }, + substrate='coins', + roles=('default',) * 2, + is_focal=(True,) + (False,), + bots_by_role={ + 'default': {'coins__puppet_cooperator_0',}, + }, ), - pure_coordination_in_the_matrix_4=_homogeneous_scenario( - description='meeting uncoordinated strangers', - tags=frozenset({ + coins_6=ScenarioConfig( + description='partner is a defector', + tags={ + 'versus_pure_defector', 'half_and_half', - 'versus_pure_all', - }), - substrate='pure_coordination_in_the_matrix', - is_focal=(True,) * 4 + (False,) * 4, - bots=frozenset({ - 'pure_coordination_type_1_specialist_0', - 'pure_coordination_type_1_specialist_1', - 'pure_coordination_type_2_specialist_0', - 'pure_coordination_type_2_specialist_1', - 'pure_coordination_type_3_specialist_0', - 'pure_coordination_type_3_specialist_1', - }), - ), - rationalizable_coordination_in_the_matrix_0=_homogeneous_scenario( - description='focals are resident and visitor is mixed', - tags=frozenset({ - 'resident', - 'versus_pure_all', - }), - substrate='rationalizable_coordination_in_the_matrix', - is_focal=(True,) * 7 + (False,) * 1, - bots=frozenset({ - 'rationalizable_coordination_type_1_specialist_0', - 'rationalizable_coordination_type_1_specialist_1', - 'rationalizable_coordination_type_2_specialist_0', - 'rationalizable_coordination_type_2_specialist_1', - 'rationalizable_coordination_type_3_specialist_0', - 'rationalizable_coordination_type_3_specialist_1', - }), - ), - rationalizable_coordination_in_the_matrix_1=_homogeneous_scenario( - description='visiting resource 1 fans', - tags=frozenset({ - 'versus_pure_type_1', - 'visitor', - }), - substrate='rationalizable_coordination_in_the_matrix', - is_focal=(True,) * 1 + (False,) * 7, - bots=frozenset({ - 'rationalizable_coordination_type_1_specialist_0', - 'rationalizable_coordination_type_1_specialist_1', - }), - ), - rationalizable_coordination_in_the_matrix_2=_homogeneous_scenario( - description='visiting resource 2 fans', - tags=frozenset({ - 'versus_pure_type_2', - 'visitor', - }), - substrate='rationalizable_coordination_in_the_matrix', - is_focal=(True,) * 1 + (False,) * 7, - bots=frozenset({ - 'rationalizable_coordination_type_2_specialist_0', - 'rationalizable_coordination_type_2_specialist_1', - }), - ), - rationalizable_coordination_in_the_matrix_3=_homogeneous_scenario( - description='visiting resource 3 fans', - tags=frozenset({ - 'versus_pure_type_3', - 'visitor', - }), - substrate='rationalizable_coordination_in_the_matrix', - is_focal=(True,) * 1 + (False,) * 7, - bots=frozenset({ - 'rationalizable_coordination_type_3_specialist_0', - 'rationalizable_coordination_type_3_specialist_1', - }), + }, + substrate='coins', + roles=('default',) * 2, + is_focal=(True,) + (False,), + bots_by_role={ + 'default': {'coins__puppet_defector_0',}, + }, ), - rationalizable_coordination_in_the_matrix_4=_homogeneous_scenario( - description='meeting uncoordinated strangers', - tags=frozenset({ + collaborative_cooking__asymmetric_0=ScenarioConfig( + description='collaborate with a skilled chef', + tags={ 'half_and_half', - 'versus_pure_all', - }), - substrate='rationalizable_coordination_in_the_matrix', - is_focal=(True,) * 4 + (False,) * 4, - bots=frozenset({ - 'rationalizable_coordination_type_1_specialist_0', - 'rationalizable_coordination_type_1_specialist_1', - 'rationalizable_coordination_type_2_specialist_0', - 'rationalizable_coordination_type_2_specialist_1', - 'rationalizable_coordination_type_3_specialist_0', - 'rationalizable_coordination_type_3_specialist_1', - }), - ), - running_with_scissors_in_the_matrix_0=_homogeneous_scenario( - description='versus gullible opponent', - tags=frozenset({ - 'deception', + }, + substrate='collaborative_cooking__asymmetric', + roles=('default',) * 2, + is_focal=(True,) + (False,), + bots_by_role={ + 'default': { + 'collaborative_cooking__asymmetric__chef_0', + 'collaborative_cooking__asymmetric__chef_1', + }, + }, + ), + collaborative_cooking__asymmetric_1=ScenarioConfig( + description='collaborate with a semi-skilled apprentice chef', + tags={ 'half_and_half', - 'versus_free', - }), - substrate='running_with_scissors_in_the_matrix', - is_focal=(True,) * 1 + (False,) * 1, - bots=frozenset({ - 'classic_rws_free_0', - 'classic_rws_free_1', - 'classic_rws_free_2', - }), + }, + substrate='collaborative_cooking__asymmetric', + roles=('default',) * 2, + is_focal=(True,) + (False,), + bots_by_role={ + 'default': { + 'collaborative_cooking__asymmetric__apprentice_0', + 'collaborative_cooking__asymmetric__apprentice_1', + }, + }, ), - running_with_scissors_in_the_matrix_1=_homogeneous_scenario( - description='versus mixed strategy opponent', - tags=frozenset({ + collaborative_cooking__asymmetric_2=ScenarioConfig( + description='succeed despite an unhelpful partner', + tags={ + 'half_and_half', + 'versus_noop', + }, + substrate='collaborative_cooking__asymmetric', + roles=('default',) * 2, + is_focal=(True,) + (False,), + bots_by_role={'default': {'noop_bot'}}, + ), + collaborative_cooking__circuit_0=ScenarioConfig( + description='collaborate with a skilled chef', + tags={ + 'half_and_half', + }, + substrate='collaborative_cooking__circuit', + roles=('default',) * 2, + is_focal=(True,) + (False,), + bots_by_role={ + 'default': { + 'collaborative_cooking__circuit__chef_0', + 'collaborative_cooking__circuit__chef_1', + }, + }, + ), + collaborative_cooking__circuit_1=ScenarioConfig( + description='collaborate with a semi-skilled apprentice chef', + tags={ + 'half_and_half', + }, + substrate='collaborative_cooking__circuit', + roles=('default',) * 2, + is_focal=(True,) + (False,), + bots_by_role={ + 'default': { + 'collaborative_cooking__circuit__apprentice_0', + 'collaborative_cooking__circuit__apprentice_1', + }, + }, + ), + collaborative_cooking__circuit_2=ScenarioConfig( + description='succeed despite an unhelpful partner', + tags={ + 'half_and_half', + 'versus_noop', + }, + substrate='collaborative_cooking__circuit', + roles=('default',) * 2, + is_focal=(True,) + (False,), + bots_by_role={'default': {'noop_bot'}}, + ), + collaborative_cooking__cramped_0=ScenarioConfig( + description='collaborate with a skilled chef', + tags={ + 'half_and_half', + }, + substrate='collaborative_cooking__cramped', + roles=('default',) * 2, + is_focal=(True,) + (False,), + bots_by_role={ + 'default': { + 'collaborative_cooking__cramped__chef_0', + 'collaborative_cooking__cramped__chef_1', + }, + }, + ), + collaborative_cooking__cramped_1=ScenarioConfig( + description='collaborate with a semi-skilled apprentice chef', + tags={ + 'half_and_half', + }, + substrate='collaborative_cooking__cramped', + roles=('default',) * 2, + is_focal=(True,) + (False,), + bots_by_role={ + 'default': { + 'collaborative_cooking__cramped__apprentice_0', + 'collaborative_cooking__cramped__apprentice_1', + }, + }, + ), + collaborative_cooking__cramped_2=ScenarioConfig( + description='succeed despite an unhelpful partner', + tags={ + 'half_and_half', + 'versus_noop', + }, + substrate='collaborative_cooking__cramped', + roles=('default',) * 2, + is_focal=(True,) + (False,), + bots_by_role={'default': {'noop_bot'}}, + ), + collaborative_cooking__crowded_0=ScenarioConfig( + description=( + 'collaborate with an independent chef who expects others to get ' + + 'out of their way'), + tags={ + 'resident', + }, + substrate='collaborative_cooking__crowded', + roles=('default',) * 9, + is_focal=(True,) * 8 + (False,), + bots_by_role={ + 'default': { + 'collaborative_cooking__crowded__independent_chef_0', + }, + }, + ), + collaborative_cooking__crowded_1=ScenarioConfig( + description=( + 'collaborate with several chefs who can work together, but are ' + + 'not very good at doing so'), + tags={ + 'resident', + }, + substrate='collaborative_cooking__crowded', + roles=('default',) * 9, + is_focal=(True,) * 6 + (False,) * 3, + bots_by_role={ + 'default': { + 'collaborative_cooking__crowded__robust_chef_0', + }, + }, + ), + collaborative_cooking__crowded_2=ScenarioConfig( + description=( + 'no assistance from an unhelpful visiting noop bot'), + tags={ + 'resident', + 'versus_noop', + }, + substrate='collaborative_cooking__crowded', + roles=('default',) * 9, + is_focal=(True,) * 8 + (False,), + bots_by_role={'default': {'noop_bot'}}, + ), + collaborative_cooking__figure_eight_0=ScenarioConfig( + description=( + 'collaborate with an independent chef who expects others to get ' + + 'out of their way'), + tags={ + 'resident', + }, + substrate='collaborative_cooking__figure_eight', + roles=('default',) * 6, + is_focal=(True,) * 5 + (False,), + bots_by_role={ + 'default': { + 'collaborative_cooking__figure_eight__independent_chef_0', + }, + }, + ), + collaborative_cooking__figure_eight_1=ScenarioConfig( + description=( + 'collaborate with two chefs who can work together, but are ' + + 'not very good at doing so'), + tags={ + 'resident', + }, + substrate='collaborative_cooking__figure_eight', + roles=('default',) * 6, + is_focal=(True,) * 4 + (False,) * 2, + bots_by_role={ + 'default': { + 'collaborative_cooking__figure_eight__robust_chef_0', + }, + }, + ), + collaborative_cooking__figure_eight_2=ScenarioConfig( + description=( + 'no assistance from am unhelpful visiting noop bot'), + tags={ + 'resident', + 'versus_noop', + }, + substrate='collaborative_cooking__figure_eight', + roles=('default',) * 6, + is_focal=(True,) * 5 + (False,), + bots_by_role={'default': {'noop_bot'}}, + ), + collaborative_cooking__forced_0=ScenarioConfig( + description='collaborate with a skilled chef', + tags={ + 'half_and_half', + }, + substrate='collaborative_cooking__forced', + roles=('default',) * 2, + is_focal=(True,) + (False,), + bots_by_role={ + 'default': { + 'collaborative_cooking__forced__chef_0', + 'collaborative_cooking__forced__chef_1', + }, + }, + ), + collaborative_cooking__forced_1=ScenarioConfig( + description='collaborate with a semi-skilled apprentice chef', + tags={ + 'half_and_half', + }, + substrate='collaborative_cooking__forced', + roles=('default',) * 2, + is_focal=(True,) + (False,), + bots_by_role={ + 'default': { + 'collaborative_cooking__forced__apprentice_0', + 'collaborative_cooking__forced__apprentice_1', + }, + }, + ), + collaborative_cooking__ring_0=ScenarioConfig( + description='collaborate with a skilled chef', + tags={ + 'half_and_half', + }, + substrate='collaborative_cooking__ring', + roles=('default',) * 2, + is_focal=(True,) + (False,), + bots_by_role={ + 'default': { + 'collaborative_cooking__ring__chef_0', + 'collaborative_cooking__ring__chef_1', + }, + }, + ), + collaborative_cooking__ring_1=ScenarioConfig( + description='collaborate with a semi-skilled apprentice chef', + tags={ + 'half_and_half', + }, + substrate='collaborative_cooking__ring', + roles=('default',) * 2, + is_focal=(True,) + (False,), + bots_by_role={ + 'default': { + 'collaborative_cooking__ring__apprentice_0', + 'collaborative_cooking__ring__apprentice_1', + }, + }, + ), + commons_harvest__closed_0=ScenarioConfig( + description='focals visit pacifist bots who harvest unsustainably', + tags={ + 'visitor', + 'ownership', + }, + substrate='commons_harvest__closed', + roles=('default',) * 7, + is_focal=(True,) * 2 + (False,) * 5, + bots_by_role={ + 'default': {'commons_harvest__closed__pacifist_0', + 'commons_harvest__closed__pacifist_1', + 'commons_harvest__closed__pacifist_2',}, + }, + ), + commons_harvest__closed_1=ScenarioConfig( + description=('focals are resident and visited by pacifist bots who ' + + 'harvest unsustainably'), + tags={ + 'resident', + 'ownership', + }, + substrate='commons_harvest__closed', + roles=('default',) * 7, + is_focal=(True,) * 5 + (False,) * 2, + bots_by_role={ + 'default': {'commons_harvest__closed__pacifist_0', + 'commons_harvest__closed__pacifist_1', + 'commons_harvest__closed__pacifist_2',}, + }, + ), + commons_harvest__closed_2=ScenarioConfig( + description=('focals visit bots who zap and harvest sustainably if ' + + 'they get a chance'), + tags={ + 'visitor', + 'ownership', + }, + substrate='commons_harvest__closed', + roles=('default',) * 7, + is_focal=(True,) * 2 + (False,) * 5, + bots_by_role={ + 'default': {'commons_harvest__closed__free_0', + 'commons_harvest__closed__free_1', + 'commons_harvest__closed__free_2', + 'commons_harvest__closed__free_3'}, + }, + ), + commons_harvest__closed_3=ScenarioConfig( + description=('focals are resident, and are visited by bots who zap ' + + 'and harvest sustainably if they get a chance'), + tags={ + 'resident', + 'ownership', + }, + substrate='commons_harvest__closed', + roles=('default',) * 7, + is_focal=(True,) * 5 + (False,) * 2, + bots_by_role={ + 'default': {'commons_harvest__closed__free_0', + 'commons_harvest__closed__free_1', + 'commons_harvest__closed__free_2', + 'commons_harvest__closed__free_3'}, + }, + ), + commons_harvest__open_0=ScenarioConfig( + description=('focals are resident and visited by two bots who zap ' + + 'and harvest unsustainably'), + tags={ + 'resident', + }, + substrate='commons_harvest__open', + roles=('default',) * 7, + is_focal=(True,) * 5 + (False,) * 2, + bots_by_role={ + 'default': {'commons_harvest__open__free_0', + 'commons_harvest__open__free_1',}, + }, + ), + commons_harvest__open_1=ScenarioConfig( + description=('focals are resident and visited by two pacifists who ' + + 'harvest unsustainably'), + tags={ + 'resident', + }, + substrate='commons_harvest__open', + roles=('default',) * 7, + is_focal=(True,) * 5 + (False,) * 2, + bots_by_role={ + 'default': {'commons_harvest__open__pacifist_0', + 'commons_harvest__open__pacifist_1'}, + }, + ), + commons_harvest__partnership_0=ScenarioConfig( + description='meeting good partners', + tags={ + 'visitor', + 'dyadic_trust', + 'ownership', + }, + substrate='commons_harvest__partnership', + roles=('default',) * 7, + is_focal=(True,) * 1 + (False,) * 6, + bots_by_role={ + 'default': {'commons_harvest__partnership__good_partner_0', + 'commons_harvest__partnership__good_partner_1', + 'commons_harvest__partnership__good_partner_2',}, + }, + ), + commons_harvest__partnership_1=ScenarioConfig( + description='focals are resident and visitors are good partners', + tags={ + 'resident', + 'dyadic_trust', + 'ownership', + }, + substrate='commons_harvest__partnership', + roles=('default',) * 7, + is_focal=(True,) * 5 + (False,) * 2, + bots_by_role={ + 'default': {'commons_harvest__partnership__good_partner_0', + 'commons_harvest__partnership__good_partner_1', + 'commons_harvest__partnership__good_partner_2',}, + }, + ), + commons_harvest__partnership_2=ScenarioConfig( + description=('focals visit zappers who harvest sustainably but lack ' + + 'trust'), + tags={ + 'visitor', + 'dyadic_trust', + 'ownership', + }, + substrate='commons_harvest__partnership', + roles=('default',) * 7, + is_focal=(True,) * 1 + (False,) * 6, + bots_by_role={ + 'default': {'commons_harvest__partnership__sustainable_fighter_0', + 'commons_harvest__partnership__sustainable_fighter_1',}, + }, + ), + commons_harvest__partnership_3=ScenarioConfig( + description=('focals are resident and visited by zappers who harvest ' + + 'sustainably but lack trust'), + tags={ + 'resident', + 'dyadic_trust', + 'ownership', + }, + substrate='commons_harvest__partnership', + roles=('default',) * 7, + is_focal=(True,) * 5 + (False,) * 2, + bots_by_role={ + 'default': {'commons_harvest__partnership__sustainable_fighter_0', + 'commons_harvest__partnership__sustainable_fighter_1',}, + }, + ), + commons_harvest__partnership_4=ScenarioConfig( + description='focals visit pacifists who do not harvest sustainably', + tags={ + 'visitor', + 'dyadic_trust', + 'ownership', + }, + substrate='commons_harvest__partnership', + roles=('default',) * 7, + is_focal=(True,) * 2 + (False,) * 5, + bots_by_role={ + 'default': {'commons_harvest__partnership__pacifist_0', + 'commons_harvest__partnership__pacifist_1', + 'commons_harvest__partnership__pacifist_2',}, + }, + ), + commons_harvest__partnership_5=ScenarioConfig( + description='focals visit zappers who do not harvest sustainably', + tags={ + 'visitor', + 'dyadic_trust', + 'ownership', + }, + substrate='commons_harvest__partnership', + roles=('default',) * 7, + is_focal=(True,) * 2 + (False,) * 5, + bots_by_role={ + 'default': {'commons_harvest__partnership__free_0', + 'commons_harvest__partnership__free_1', + 'commons_harvest__partnership__free_2',}, + }, + ), + coop_mining_0=ScenarioConfig( + description='visiting cooperators', + tags={ + 'visitor', + }, + substrate='coop_mining', + roles=('default',) * 6, + is_focal=(True,) * 1 + (False,) * 5, + bots_by_role={ + 'default': {'coop_mining__cooperator_0',}, + }, + ), + coop_mining_1=ScenarioConfig( + description='visiting residents that extract both ores', + tags={ + 'visitor', + }, + substrate='coop_mining', + roles=('default',) * 6, + is_focal=(True,) * 1 + (False,) * 5, + bots_by_role={ + 'default': {'coop_mining__mixed_0',}, + }, + ), + coop_mining_2=ScenarioConfig( + description='visiting defectors', + tags={ + 'visitor', + }, + substrate='coop_mining', + roles=('default',) * 6, + is_focal=(True,) * 1 + (False,) * 5, + bots_by_role={ + 'default': {'coop_mining__defector_0'}, + }, + ), + coop_mining_3=ScenarioConfig( + description='residents visited by a cooperator', + tags={ + 'resident', + }, + substrate='coop_mining', + roles=('default',) * 6, + is_focal=(True,) * 5 + (False,) * 1, + bots_by_role={ + 'default': {'coop_mining__cooperator_0',}, + }, + ), + coop_mining_4=ScenarioConfig( + description='residents visited by a defector', + tags={ + 'resident', + }, + substrate='coop_mining', + roles=('default',) * 6, + is_focal=(True,) * 5 + (False,) * 1, + bots_by_role={ + 'default': {'coop_mining__defector_0',}, + }, + ), + coop_mining_5=ScenarioConfig( + description='find the cooperator partner', + tags={ + 'partner_choice', + 'visitor', + }, + substrate='coop_mining', + roles=('default',) * 5 + ('target',), + is_focal=(True,) * 1 + (False,) * 5, + bots_by_role={ + 'default': {'coop_mining__defector_0',}, + 'target': {'coop_mining__cooperator_0', + 'coop_mining__mixed_0',}, + }, + ), + daycare_0=ScenarioConfig( + description='meeting a helpful parent', + tags={ + 'half_and_half', + }, + substrate='daycare', + roles=('child',) + ('parent',), + is_focal=(True,) + (False,), + bots_by_role={ + 'parent': {'daycare__helping_parent_0',}, + }, + ), + daycare_1=ScenarioConfig( + description='meeting a child who points to what they want', + tags={ + 'half_and_half', + }, + substrate='daycare', + roles=('child',) + ('parent',), + is_focal=(False,) + (True,), + bots_by_role={ + 'child': {'daycare__pointing_child_0',}, + }, + ), + daycare_2=ScenarioConfig( + description='meeting an unhelpful parent', + tags={ + 'half_and_half', + }, + substrate='daycare', + roles=('child',) + ('parent',), + is_focal=(True,) + (False,), + bots_by_role={ + 'parent': {'daycare__foraging_parent_0',}, + }, + ), + daycare_3=ScenarioConfig( + description='meeting an independent child', + tags={ + 'half_and_half', + }, + substrate='daycare', + roles=('child',) + ('parent',), + is_focal=(False,) + (True,), + bots_by_role={ + 'child': {'daycare__foraging_child_0',}, + }, + ), + externality_mushrooms__dense_0=ScenarioConfig( + description='visiting unconditional hihe (cooperator) players', + tags={ + 'visitor', + }, + substrate='externality_mushrooms__dense', + roles=('default',) * 5, + is_focal=(True,) + (False,) * 4, + bots_by_role={ + 'default': {'externality_mushrooms__dense__puppet_hihe_0',}, + }, + ), + externality_mushrooms__dense_1=ScenarioConfig( + description='visiting unconditional fize (defector) players', + tags={ + 'visitor', + }, + substrate='externality_mushrooms__dense', + roles=('default',) * 5, + is_focal=(True,) + (False,) * 4, + bots_by_role={ + 'default': {'externality_mushrooms__dense__puppet_fize_0',}, + }, + ), + externality_mushrooms__dense_2=ScenarioConfig( + description=('focals are resident and joined by two unconditional ' + + 'hihe (cooperator) players'), + tags={ + 'resident', + }, + substrate='externality_mushrooms__dense', + roles=('default',) * 5, + is_focal=(True,) * 3 + (False,) * 2, + bots_by_role={ + 'default': {'externality_mushrooms__dense__puppet_hihe_0',}, + }, + ), + externality_mushrooms__dense_3=ScenarioConfig( + description=('focals are resident and joined by two unconditional ' + + 'fize (defector) players'), + tags={ + 'resident', + }, + substrate='externality_mushrooms__dense', + roles=('default',) * 5, + is_focal=(True,) * 3 + (False,) * 2, + bots_by_role={ + 'default': {'externality_mushrooms__dense__puppet_fize_0',}, + }, + ), + factory_commons__either_or_0=ScenarioConfig( + description='visiting a sustainable background population', + tags={ + 'visitor', + }, + substrate='factory_commons__either_or', + roles=('default',) * 3, + is_focal=(True,) * 1 + (False,) * 2, + bots_by_role={ + 'default': {'factory_commons__either_or__sustainable_0', + 'factory_commons__either_or__sustainable_1', + 'factory_commons__either_or__sustainable_2',}, + }, + ), + factory_commons__either_or_1=ScenarioConfig( + description='visiting an unsustainable background population', + tags={ + 'visitor', + }, + substrate='factory_commons__either_or', + roles=('default',) * 3, + is_focal=(True,) * 1 + (False,) * 2, + bots_by_role={ + 'default': {'factory_commons__either_or__unsustainable_0', + 'factory_commons__either_or__unsustainable_1', + 'factory_commons__either_or__unsustainable_2',}, + }, + ), + factory_commons__either_or_2=ScenarioConfig( + description='resident focal agents are joined by a sustainable visitor', + tags={ + 'resident', + }, + substrate='factory_commons__either_or', + roles=('default',) * 3, + is_focal=(True,) * 2 + (False,) * 1, + bots_by_role={ + 'default': {'factory_commons__either_or__sustainable_0', + 'factory_commons__either_or__sustainable_1', + 'factory_commons__either_or__sustainable_2',}, + }, + ), + factory_commons__either_or_3=ScenarioConfig( + description=('resident focal agents are joined by an unsustainable ' + + 'visitor'), + tags={ + 'resident', + }, + substrate='factory_commons__either_or', + roles=('default',) * 3, + is_focal=(True,) * 2 + (False,) * 1, + bots_by_role={ + 'default': {'factory_commons__either_or__unsustainable_0', + 'factory_commons__either_or__unsustainable_1', + 'factory_commons__either_or__unsustainable_2',}, + }, + ), + fruit_market__concentric_rivers_0=ScenarioConfig( + description='all apple farmers are focal', + tags={ + 'half_and_half', + }, + substrate='fruit_market__concentric_rivers', + roles=('apple_farmer',) * 8 + ('banana_farmer',) * 8, + is_focal=(True,) * 8 + (False,) * 8, + bots_by_role={ + 'banana_farmer': { + 'fruit_market__concentric_rivers__banana_farmer_0', + 'fruit_market__concentric_rivers__banana_farmer_1', + 'fruit_market__concentric_rivers__banana_farmer_2', + }, + }, + ), + fruit_market__concentric_rivers_1=ScenarioConfig( + description='all banana farmers are focal', + tags={ + 'half_and_half', + }, + substrate='fruit_market__concentric_rivers', + roles=('apple_farmer',) * 8 + ('banana_farmer',) * 8, + is_focal=(False,) * 8 + (True,) * 8, + bots_by_role={ + 'apple_farmer': { + 'fruit_market__concentric_rivers__apple_farmer_0', + 'fruit_market__concentric_rivers__apple_farmer_1', + 'fruit_market__concentric_rivers__apple_farmer_2', + }, + }, + ), + fruit_market__concentric_rivers_2=ScenarioConfig( + description='one focal apple farmer visits a background economy', + tags={ + 'visitor', + }, + substrate='fruit_market__concentric_rivers', + roles=('apple_farmer',) * 8 + ('banana_farmer',) * 8, + is_focal=(True,) * 1 + (False,) * 15, + bots_by_role={ + 'apple_farmer': { + 'fruit_market__concentric_rivers__apple_farmer_0', + 'fruit_market__concentric_rivers__apple_farmer_1', + 'fruit_market__concentric_rivers__apple_farmer_2', + }, + 'banana_farmer': { + 'fruit_market__concentric_rivers__banana_farmer_0', + 'fruit_market__concentric_rivers__banana_farmer_1', + 'fruit_market__concentric_rivers__banana_farmer_2', + }, + }, + ), + fruit_market__concentric_rivers_3=ScenarioConfig( + description='one focal banana farmer visits a background economy', + tags={ + 'visitor', + }, + substrate='fruit_market__concentric_rivers', + roles=('banana_farmer',) * 8 + ('apple_farmer',) * 8, + is_focal=(True,) * 1 + (False,) * 15, + bots_by_role={ + 'apple_farmer': { + 'fruit_market__concentric_rivers__apple_farmer_0', + 'fruit_market__concentric_rivers__apple_farmer_1', + 'fruit_market__concentric_rivers__apple_farmer_2', + }, + 'banana_farmer': { + 'fruit_market__concentric_rivers__banana_farmer_0', + 'fruit_market__concentric_rivers__banana_farmer_1', + 'fruit_market__concentric_rivers__banana_farmer_2', + }, + }, + ), + gift_refinements_0=ScenarioConfig( + description='visiting cooperators', + tags={ + 'visitor', + }, + substrate='gift_refinements', + roles=('default',) * 6, + is_focal=(True,) * 1 + (False,) * 5, + bots_by_role=immutabledict.immutabledict( + default=('gift_refinements__cooperator_0',), + ), + ), + gift_refinements_1=ScenarioConfig( + description='visiting defectors', + tags={ + 'visitor', + }, + substrate='gift_refinements', + roles=('default',) * 6, + is_focal=(True,) * 1 + (False,) * 5, + bots_by_role=immutabledict.immutabledict( + default=('gift_refinements__defector_0',), + ), + ), + gift_refinements_2=ScenarioConfig( + description='visited by a cooperator', + tags={ + 'resident', + }, + substrate='gift_refinements', + roles=('default',) * 6, + is_focal=(True,) * 5 + (False,) * 1, + bots_by_role=immutabledict.immutabledict( + default=('gift_refinements__cooperator_0',), + ), + ), + gift_refinements_3=ScenarioConfig( + description='visited by a defector', + tags={ + 'resident', + }, + substrate='gift_refinements', + roles=('default',) * 6, + is_focal=(True,) * 5 + (False,) * 1, + bots_by_role=immutabledict.immutabledict( + default=('gift_refinements__defector_0',), + ), + ), + gift_refinements_4=ScenarioConfig( + description='find the cooperator partner', + tags={ + 'partner_choice', + }, + substrate='gift_refinements', + roles=('default',) * 5 + ('target',), + is_focal=(True,) * 1 + (False,) * 5, + bots_by_role=immutabledict.immutabledict( + default=('gift_refinements__defector_0',), + target=('gift_refinements__cooperator_0',), + ), + ), + gift_refinements_5=ScenarioConfig( + description='visiting extreme cooperators', + tags={ + 'visitor', + }, + substrate='gift_refinements', + roles=('default',) * 6, + is_focal=(True,) * 1 + (False,) * 5, + bots_by_role=immutabledict.immutabledict( + default=('gift_refinements__extreme_cooperator_0',), + ), + ), + gift_refinements_6=ScenarioConfig( + description='visited by an extreme cooperator', + tags={ + 'resident', + }, + substrate='gift_refinements', + roles=('default',) * 6, + is_focal=(True,) * 5 + (False,) * 1, + bots_by_role=immutabledict.immutabledict( + default=('gift_refinements__extreme_cooperator_0',), + ), + ), + paintball__capture_the_flag_0=ScenarioConfig( + description='focal team versus shaped bot team', + tags={ + 'half_and_half', + 'learned_teamwork', + }, + substrate='paintball__capture_the_flag', + roles=('default',) * 8, + is_focal=(True, False) * 4, + bots_by_role={ + 'default': {'paintball__capture_the_flag__shaped_bot_0', + 'paintball__capture_the_flag__shaped_bot_1', + 'paintball__capture_the_flag__shaped_bot_2', + 'paintball__capture_the_flag__shaped_bot_3',}, + }, + ), + paintball__capture_the_flag_1=ScenarioConfig( + description='ad hoc teamwork with shaped bots', + tags={ + 'ad_hoc_teamwork', + 'visitor', + }, + substrate='paintball__capture_the_flag', + roles=('default',) * 8, + is_focal=(True,) * 1 + (False,) * 7, + bots_by_role={ + 'default': {'paintball__capture_the_flag__shaped_bot_0', + 'paintball__capture_the_flag__shaped_bot_1', + 'paintball__capture_the_flag__shaped_bot_2', + 'paintball__capture_the_flag__shaped_bot_3',}, + }, + ), + paintball__king_of_the_hill_0=ScenarioConfig( + description='focal team versus default bot team', + tags={ + 'half_and_half', + 'learned_teamwork', + }, + substrate='paintball__king_of_the_hill', + roles=('default',) * 8, + is_focal=(True, False) * 4, + bots_by_role={ + 'default': {'paintball__king_of_the_hill__free_0', + 'paintball__king_of_the_hill__free_1', + 'paintball__king_of_the_hill__free_2',}, + }, + ), + paintball__king_of_the_hill_1=ScenarioConfig( + description='focal team versus shaped bot team', + tags={ + 'half_and_half', + 'learned_teamwork', + }, + substrate='paintball__king_of_the_hill', + roles=('default',) * 8, + is_focal=(True, False) * 4, + bots_by_role={ + 'default': {'paintball__king_of_the_hill__spawn_camper_0', + 'paintball__king_of_the_hill__spawn_camper_1', + 'paintball__king_of_the_hill__spawn_camper_2', + 'paintball__king_of_the_hill__spawn_camper_3',}, + }, + ), + paintball__king_of_the_hill_2=ScenarioConfig( + description='ad hoc teamwork with default bots', + tags={ + 'ad_hoc_teamwork', + 'visitor', + }, + substrate='paintball__king_of_the_hill', + roles=('default',) * 8, + is_focal=(True,) * 1 + (False,) * 7, + bots_by_role={ + 'default': {'paintball__king_of_the_hill__free_0', + 'paintball__king_of_the_hill__free_1', + 'paintball__king_of_the_hill__free_2',}, + }, + ), + paintball__king_of_the_hill_3=ScenarioConfig( + description='ad hoc teamwork with shaped bots', + tags={ + 'ad_hoc_teamwork', + 'visitor', + }, + substrate='paintball__king_of_the_hill', + roles=('default',) * 8, + is_focal=(True,) * 1 + (False,) * 7, + bots_by_role={ + 'default': {'paintball__king_of_the_hill__spawn_camper_0', + 'paintball__king_of_the_hill__spawn_camper_1', + 'paintball__king_of_the_hill__spawn_camper_2', + 'paintball__king_of_the_hill__spawn_camper_3',}, + }, + ), + predator_prey__alley_hunt_0=ScenarioConfig( + description='focal prey visited by background predators', + tags={ + 'resident', + }, + substrate='predator_prey__alley_hunt', + roles=('predator',) * 5 + ('prey',) * 8, + is_focal=(False,) * 5 + (True,) * 8, + bots_by_role={ + 'predator': {'predator_prey__alley_hunt__predator_0', + 'predator_prey__alley_hunt__predator_1', + 'predator_prey__alley_hunt__predator_2',}, + }, + ), + predator_prey__alley_hunt_1=ScenarioConfig( + description=( + 'focal predators aim to eat resident prey'), + tags={ + 'visitor', + }, + substrate='predator_prey__alley_hunt', + roles=('predator',) * 5 + ('prey',) * 8, + is_focal=(True,) * 5 + (False,) * 8, + bots_by_role={ + 'prey': {'predator_prey__alley_hunt__prey_0', + 'predator_prey__alley_hunt__prey_1', + 'predator_prey__alley_hunt__prey_2',}, + }, + ), + predator_prey__alley_hunt_2=ScenarioConfig( + description=( + 'a focal predator competes with background predators to eat prey'), + tags={ + 'visitor', + }, + substrate='predator_prey__alley_hunt', + roles=('predator',) * 5 + ('prey',) * 8, + is_focal=(True,) + (False,) * 12, + bots_by_role={ + 'prey': {'predator_prey__alley_hunt__prey_0', + 'predator_prey__alley_hunt__prey_1', + 'predator_prey__alley_hunt__prey_2',}, + 'predator': {'predator_prey__alley_hunt__predator_0', + 'predator_prey__alley_hunt__predator_1', + 'predator_prey__alley_hunt__predator_2',}, + }, + ), + predator_prey__alley_hunt_3=ScenarioConfig( + description=( + 'one focal prey ad hoc cooperates with background prey to avoid ' + + 'predation'), + tags={ + 'visitor', + }, + substrate='predator_prey__alley_hunt', + roles=('prey',) * 8 + ('predator',) * 5, + is_focal=(True,) + (False,) * 12, + bots_by_role={ + 'prey': {'predator_prey__alley_hunt__prey_0', + 'predator_prey__alley_hunt__prey_1', + 'predator_prey__alley_hunt__prey_2',}, + 'predator': {'predator_prey__alley_hunt__predator_0', + 'predator_prey__alley_hunt__predator_1', + 'predator_prey__alley_hunt__predator_2',}, + }, + ), + predator_prey__open_0=ScenarioConfig( + description='focal prey visited by background predators', + tags={ + 'resident', + }, + substrate='predator_prey__open', + roles=('predator',) * 3 + ('prey',) * 10, + is_focal=(False,) * 3 + (True,) * 10, + bots_by_role={ + 'predator': {'predator_prey__open__basic_predator_0', + 'predator_prey__open__basic_predator_1',}, + }, + ), + predator_prey__open_1=ScenarioConfig( + description=( + 'focal predators aim to eat basic resident prey'), + tags={ + 'visitor', + }, + substrate='predator_prey__open', + roles=('predator',) * 3 + ('prey',) * 10, + is_focal=(True,) * 3 + (False,) * 10, + bots_by_role={ + 'prey': {'predator_prey__open__basic_prey_0', + 'predator_prey__open__basic_prey_1', + 'predator_prey__open__basic_prey_2',}, + }, + ), + predator_prey__open_2=ScenarioConfig( + description=( + 'a focal predator competes with background predators to hunt prey'), + tags={ + 'visitor', + }, + substrate='predator_prey__open', + roles=('predator',) * 3 + ('prey',) * 10, + is_focal=(True,) + (False,) * 12, + bots_by_role={ + 'prey': {'predator_prey__open__basic_prey_0', + 'predator_prey__open__basic_prey_1', + 'predator_prey__open__basic_prey_2',}, + 'predator': {'predator_prey__open__basic_predator_0', + 'predator_prey__open__basic_predator_1',}, + }, + ), + predator_prey__open_3=ScenarioConfig( + description=( + 'one focal prey ad hoc cooperates with background prey to avoid ' + + 'predation'), + tags={ + 'visitor', + }, + substrate='predator_prey__open', + roles=('prey',) * 10 + ('predator',) * 3, + is_focal=(True,) + (False,) * 12, + bots_by_role={ + 'prey': {'predator_prey__open__basic_prey_0', + 'predator_prey__open__basic_prey_1', + 'predator_prey__open__basic_prey_2',}, + 'predator': {'predator_prey__open__basic_predator_0', + 'predator_prey__open__basic_predator_1',}, + }, + ), + predator_prey__open_4=ScenarioConfig( + description=( + 'focal predators hunt smarter resident prey'), + tags={ + 'visitor', + }, + substrate='predator_prey__open', + roles=('predator',) * 3 + ('prey',) * 10, + is_focal=(True,) * 3 + (False,) * 10, + bots_by_role={ + 'prey': {'predator_prey__open__smart_prey_0', + 'predator_prey__open__smart_prey_1', + 'predator_prey__open__smart_prey_2',}, + }, + ), + predator_prey__open_5=ScenarioConfig( + description=( + 'a focal predator competes with background predators to hunt ' + + 'smarter prey'), + tags={ + 'visitor', + }, + substrate='predator_prey__open', + roles=('predator',) * 3 + ('prey',) * 10, + is_focal=(True,) + (False,) * 12, + bots_by_role={ + 'prey': {'predator_prey__open__smart_prey_0', + 'predator_prey__open__smart_prey_1', + 'predator_prey__open__smart_prey_2',}, + 'predator': {'predator_prey__open__basic_predator_0', + 'predator_prey__open__basic_predator_1',}, + }, + ), + predator_prey__open_6=ScenarioConfig( + description=( + 'one focal prey ad hoc cooperates with background smart prey to ' + + 'avoid predation'), + tags={ + 'visitor', + }, + substrate='predator_prey__open', + roles=('prey',) * 10 + ('predator',) * 3, + is_focal=(True,) + (False,) * 12, + bots_by_role={ + 'prey': {'predator_prey__open__smart_prey_0', + 'predator_prey__open__smart_prey_1', + 'predator_prey__open__smart_prey_2',}, + 'predator': {'predator_prey__open__basic_predator_0', + 'predator_prey__open__basic_predator_1',}, + }, + ), + predator_prey__orchard_0=ScenarioConfig( + description='focal prey visited by background predators', + tags={ + 'resident', + }, + substrate='predator_prey__orchard', + roles=('predator',) * 5 + ('prey',) * 8, + is_focal=(False,) * 5 + (True,) * 8, + bots_by_role={ + 'predator': {'predator_prey__orchard__basic_predator_0', + 'predator_prey__orchard__basic_predator_1', + 'predator_prey__orchard__basic_predator_2',}, + }, + ), + predator_prey__orchard_1=ScenarioConfig( + description=( + 'focal predators aim to eat resident population of ' + + 'unspecialized prey'), + tags={ + 'visitor', + }, + substrate='predator_prey__orchard', + roles=('predator',) * 5 + ('prey',) * 8, + is_focal=(True,) * 5 + (False,) * 8, + bots_by_role={ + 'prey': {'predator_prey__orchard__basic_prey_0', + 'predator_prey__orchard__basic_prey_1', + 'predator_prey__orchard__basic_prey_2', + 'predator_prey__orchard__basic_prey_3', + 'predator_prey__orchard__basic_prey_4', + 'predator_prey__orchard__basic_prey_5',}, + }, + ), + predator_prey__orchard_2=ScenarioConfig( + description=( + 'a focal predator competes with background predators to eat ' + + 'unspecialized prey'), + tags={ + 'visitor', + }, + substrate='predator_prey__orchard', + roles=('predator',) * 5 + ('prey',) * 8, + is_focal=(True,) + (False,) * 12, + bots_by_role={ + 'prey': {'predator_prey__orchard__basic_prey_0', + 'predator_prey__orchard__basic_prey_1', + 'predator_prey__orchard__basic_prey_2', + 'predator_prey__orchard__basic_prey_3', + 'predator_prey__orchard__basic_prey_4', + 'predator_prey__orchard__basic_prey_5',}, + 'predator': {'predator_prey__orchard__basic_predator_0', + 'predator_prey__orchard__basic_predator_1', + 'predator_prey__orchard__basic_predator_2',}, + }, + ), + predator_prey__orchard_3=ScenarioConfig( + description=( + 'one focal prey ad hoc cooperates with unspecialized background ' + + 'prey to avoid predation'), + tags={ + 'visitor', + }, + substrate='predator_prey__orchard', + roles=('prey',) * 8 + ('predator',) * 5, + is_focal=(True,) + (False,) * 12, + bots_by_role={ + 'prey': {'predator_prey__orchard__basic_prey_0', + 'predator_prey__orchard__basic_prey_1', + 'predator_prey__orchard__basic_prey_2', + 'predator_prey__orchard__basic_prey_3', + 'predator_prey__orchard__basic_prey_4', + 'predator_prey__orchard__basic_prey_5',}, + 'predator': {'predator_prey__orchard__basic_predator_0', + 'predator_prey__orchard__basic_predator_1', + 'predator_prey__orchard__basic_predator_2',}, + }, + ), + predator_prey__orchard_4=ScenarioConfig( + description=( + 'focal predators aim to eat resident population of acorn ' + + 'specialist prey'), + tags={ + 'visitor', + }, + substrate='predator_prey__orchard', + roles=('predator',) * 5 + ('prey',) * 8, + is_focal=(True,) * 5 + (False,) * 8, + bots_by_role={ + 'prey': {'predator_prey__orchard__acorn_specialist_prey_0', + 'predator_prey__orchard__acorn_specialist_prey_1', + 'predator_prey__orchard__acorn_specialist_prey_2', + 'predator_prey__orchard__acorn_specialist_prey_3', + 'predator_prey__orchard__acorn_specialist_prey_4',}, + }, + ), + predator_prey__orchard_5=ScenarioConfig( + description=( + 'a focal predator competes with background predators to eat ' + + 'acorn specialist prey'), + tags={ + 'visitor', + }, + substrate='predator_prey__orchard', + roles=('predator',) * 5 + ('prey',) * 8, + is_focal=(True,) + (False,) * 12, + bots_by_role={ + 'prey': {'predator_prey__orchard__acorn_specialist_prey_0', + 'predator_prey__orchard__acorn_specialist_prey_1', + 'predator_prey__orchard__acorn_specialist_prey_2', + 'predator_prey__orchard__acorn_specialist_prey_3', + 'predator_prey__orchard__acorn_specialist_prey_4',}, + 'predator': {'predator_prey__orchard__basic_predator_0', + 'predator_prey__orchard__basic_predator_1', + 'predator_prey__orchard__basic_predator_2',}, + }, + ), + predator_prey__orchard_6=ScenarioConfig( + description=( + 'one focal prey ad hoc cooperates with acorn specialized ' + + 'background prey to avoid predation'), + tags={ + 'visitor', + }, + substrate='predator_prey__orchard', + roles=('prey',) * 8 + ('predator',) * 5, + is_focal=(True,) + (False,) * 12, + bots_by_role={ + 'prey': {'predator_prey__orchard__acorn_specialist_prey_0', + 'predator_prey__orchard__acorn_specialist_prey_1', + 'predator_prey__orchard__acorn_specialist_prey_2', + 'predator_prey__orchard__acorn_specialist_prey_3', + 'predator_prey__orchard__acorn_specialist_prey_4',}, + 'predator': {'predator_prey__orchard__basic_predator_0', + 'predator_prey__orchard__basic_predator_1', + 'predator_prey__orchard__basic_predator_2',}, + }, + ), + predator_prey__random_forest_0=ScenarioConfig( + description='focal prey visited by background predators', + tags={ + 'resident', + }, + substrate='predator_prey__random_forest', + roles=('predator',) * 5 + ('prey',) * 8, + is_focal=(False,) * 5 + (True,) * 8, + bots_by_role={ + 'predator': {'predator_prey__random_forest__basic_predator_0', + 'predator_prey__random_forest__basic_predator_1', + 'predator_prey__random_forest__basic_predator_2',}, + }, + ), + predator_prey__random_forest_1=ScenarioConfig( + description=( + 'focal predators aim to eat resident prey'), + tags={ + 'visitor', + }, + substrate='predator_prey__random_forest', + roles=('predator',) * 5 + ('prey',) * 8, + is_focal=(True,) * 5 + (False,) * 8, + bots_by_role={ + 'prey': {'predator_prey__random_forest__basic_prey_0', + 'predator_prey__random_forest__basic_prey_1', + 'predator_prey__random_forest__basic_prey_2',}, + }, + ), + predator_prey__random_forest_2=ScenarioConfig( + description=( + 'a focal predator competes with background predators to eat prey'), + tags={ + 'visitor', + }, + substrate='predator_prey__random_forest', + roles=('predator',) * 5 + ('prey',) * 8, + is_focal=(True,) + (False,) * 12, + bots_by_role={ + 'prey': {'predator_prey__random_forest__basic_prey_0', + 'predator_prey__random_forest__basic_prey_1', + 'predator_prey__random_forest__basic_prey_2',}, + 'predator': {'predator_prey__random_forest__basic_predator_0', + 'predator_prey__random_forest__basic_predator_1', + 'predator_prey__random_forest__basic_predator_2',}, + }, + ), + predator_prey__random_forest_3=ScenarioConfig( + description=( + 'one focal prey ad hoc cooperates with background prey to avoid ' + + 'predation'), + tags={ + 'visitor', + }, + substrate='predator_prey__random_forest', + roles=('prey',) * 8 + ('predator',) * 5, + is_focal=(True,) + (False,) * 12, + bots_by_role={ + 'prey': {'predator_prey__random_forest__basic_prey_0', + 'predator_prey__random_forest__basic_prey_1', + 'predator_prey__random_forest__basic_prey_2',}, + 'predator': {'predator_prey__random_forest__basic_predator_0', + 'predator_prey__random_forest__basic_predator_1', + 'predator_prey__random_forest__basic_predator_2',}, + }, + ), + prisoners_dilemma_in_the_matrix__arena_0=ScenarioConfig( + description='visiting unconditional cooperators', + tags={ + 'visitor', + 'versus_pure_cooperators', + }, + substrate='prisoners_dilemma_in_the_matrix__arena', + roles=('default',) * 8, + is_focal=(True,) + (False,) * 7, + bots_by_role={ + 'default': { + 'prisoners_dilemma_in_the_matrix__arena__puppet_cooperator_0', + 'prisoners_dilemma_in_the_matrix__arena__puppet_cooperator_margin_0', + }, + }, + ), + prisoners_dilemma_in_the_matrix__arena_1=ScenarioConfig( + description=('focals are resident and visited by an unconditional ' + + 'cooperator'), + tags={ + 'resident', + 'versus_pure_cooperators', + }, + substrate='prisoners_dilemma_in_the_matrix__arena', + roles=('default',) * 8, + is_focal=(True,) * 7 + (False,) * 1, + bots_by_role={ + 'default': { + 'prisoners_dilemma_in_the_matrix__arena__puppet_cooperator_0', + 'prisoners_dilemma_in_the_matrix__arena__puppet_cooperator_margin_0', + }, + }, + ), + prisoners_dilemma_in_the_matrix__arena_2=ScenarioConfig( + description='focals are resident and visitors defect unconditionally', + tags={ + 'resident', + 'versus_pure_defectors', + }, + substrate='prisoners_dilemma_in_the_matrix__arena', + roles=('default',) * 8, + is_focal=(True,) * 6 + (False,) * 2, + bots_by_role={ + 'default': { + 'prisoners_dilemma_in_the_matrix__arena__puppet_defector_0', + 'prisoners_dilemma_in_the_matrix__arena__puppet_defector_margin_0', + }, + }, + ), + prisoners_dilemma_in_the_matrix__arena_3=ScenarioConfig( + description=('visiting a population of hair-trigger grim ' + + 'reciprocator bots who initially cooperate but, if ' + + 'defected on once, will retaliate by defecting in all ' + + 'future interactions'), + tags={ + 'visitor', + 'reciprocity', + }, + substrate='prisoners_dilemma_in_the_matrix__arena', + roles=('default',) * 8, + is_focal=(True,) + (False,) * 7, + bots_by_role={ + 'default': { + 'prisoners_dilemma_in_the_matrix__arena__puppet_grim_one_strike_0', + 'prisoners_dilemma_in_the_matrix__arena__puppet_grim_one_strike_margin_0', + }, + }, + ), + prisoners_dilemma_in_the_matrix__arena_4=ScenarioConfig( + description=('visiting a population of two-strikes grim ' + + 'reciprocator bots who initially cooperate but, if ' + + 'defected on twice, will retaliate by defecting in all ' + + 'future interactions'), + tags={ + 'visitor', + 'reciprocity', + }, + substrate='prisoners_dilemma_in_the_matrix__arena', + roles=('default',) * 8, + is_focal=(True,) + (False,) * 7, + bots_by_role={ + 'default': { + 'prisoners_dilemma_in_the_matrix__arena__puppet_grim_two_strikes_0', + 'prisoners_dilemma_in_the_matrix__arena__puppet_grim_two_strikes_margin_0', + }, + }, + ), + prisoners_dilemma_in_the_matrix__arena_5=ScenarioConfig( + description=( + 'visiting a mixed population of k-strikes grim reciprocator bots ' + + 'with k values from 1 to 3, they initially cooperate but, if ' + + 'defected on k times, they retaliate in all future interactions' + ), + tags={ + 'visitor', + 'reciprocity', + }, + substrate='prisoners_dilemma_in_the_matrix__arena', + roles=('default',) * 8, + is_focal=(True,) * 3 + (False,) * 5, + bots_by_role={ + 'default': { + 'prisoners_dilemma_in_the_matrix__arena__puppet_grim_one_strike_0', + 'prisoners_dilemma_in_the_matrix__arena__puppet_grim_one_strike_margin_0', + 'prisoners_dilemma_in_the_matrix__arena__puppet_grim_three_strikes_0', + 'prisoners_dilemma_in_the_matrix__arena__puppet_grim_three_strikes_margin_0', + 'prisoners_dilemma_in_the_matrix__arena__puppet_grim_two_strikes_0', + 'prisoners_dilemma_in_the_matrix__arena__puppet_grim_two_strikes_margin_0', + }, + }, + ), + prisoners_dilemma_in_the_matrix__repeated_0=ScenarioConfig( + description='partner may play either cooperate or defect', + tags={ 'half_and_half', 'versus_pure_all', - }), - substrate='running_with_scissors_in_the_matrix', + }, + substrate='prisoners_dilemma_in_the_matrix__repeated', + roles=('default',) * 2, + is_focal=(True,) + (False,), + bots_by_role={ + 'default': { + 'prisoners_dilemma_in_the_matrix__repeated__puppet_cooperator_margin_0', + 'prisoners_dilemma_in_the_matrix__repeated__puppet_cooperator_margin_1', + 'prisoners_dilemma_in_the_matrix__repeated__puppet_defector_margin_0', + 'prisoners_dilemma_in_the_matrix__repeated__puppet_defector_margin_1', + }, + }, + ), + prisoners_dilemma_in_the_matrix__repeated_1=ScenarioConfig( + description='partner typically plays cooperate', + tags={ + 'half_and_half', + 'versus_pure_cooperator', + }, + substrate='prisoners_dilemma_in_the_matrix__repeated', + roles=('default',) * 2, + is_focal=(True,) + (False,), + bots_by_role={ + 'default': { + 'prisoners_dilemma_in_the_matrix__repeated__puppet_cooperator_margin_0', + 'prisoners_dilemma_in_the_matrix__repeated__puppet_cooperator_margin_1', + }, + }, + ), + prisoners_dilemma_in_the_matrix__repeated_2=ScenarioConfig( + description='partner typically plays defect', + tags={ + 'half_and_half', + 'versus_pure_defector', + }, + substrate='prisoners_dilemma_in_the_matrix__repeated', + roles=('default',) * 2, + is_focal=(True,) + (False,), + bots_by_role={ + 'default': { + 'prisoners_dilemma_in_the_matrix__repeated__puppet_defector_margin_0', + 'prisoners_dilemma_in_the_matrix__repeated__puppet_defector_margin_1', + }, + }, + ), + prisoners_dilemma_in_the_matrix__repeated_3=ScenarioConfig( + description=('partner is a hair-trigger grim reciprocator, i.e. one ' + + 'who initially cooperates but, if defected on once, will' + + ' retaliate by defecting forever after'), + tags={ + 'half_and_half', + 'reciprocity', + }, + substrate='prisoners_dilemma_in_the_matrix__repeated', + roles=('default',) * 2, + is_focal=(True,) + (False,), + bots_by_role={ + 'default': { + 'prisoners_dilemma_in_the_matrix__repeated__puppet_grim_one_strike_margin_0', + 'prisoners_dilemma_in_the_matrix__repeated__puppet_grim_one_strike_margin_1', + }, + }, + ), + prisoners_dilemma_in_the_matrix__repeated_4=ScenarioConfig( + description=('partner is a two-strikes grim reciprocator, i.e. one ' + + 'who initially cooperates, but if defected on twice, ' + + 'will retaliate by defecting forever after'), + tags={ + 'half_and_half', + 'reciprocity', + }, + substrate='prisoners_dilemma_in_the_matrix__repeated', + roles=('default',) * 2, + is_focal=(True,) + (False,), + bots_by_role={ + 'default': { + 'prisoners_dilemma_in_the_matrix__repeated__puppet_grim_two_strikes_margin_0', + 'prisoners_dilemma_in_the_matrix__repeated__puppet_grim_two_strikes_margin_1', + }, + }, + ), + prisoners_dilemma_in_the_matrix__repeated_5=ScenarioConfig( + description='partner is a tit-for-tat conditional cooperator', + tags={ + 'half_and_half', + 'reciprocity', + }, + substrate='prisoners_dilemma_in_the_matrix__repeated', + roles=('default',) * 2, + is_focal=(True,) + (False,), + bots_by_role={ + 'default': { + 'prisoners_dilemma_in_the_matrix__repeated__puppet_tft_margin_0', + 'prisoners_dilemma_in_the_matrix__repeated__puppet_tft_margin_1', + }, + }, + ), + prisoners_dilemma_in_the_matrix__repeated_6=ScenarioConfig( + description=('partner is a tit-for-tat conditional cooperator who ' + + 'occasionally plays defect instead of cooperate'), + tags={ + 'half_and_half', + 'reciprocity', + 'forgiveness', + }, + substrate='prisoners_dilemma_in_the_matrix__repeated', + roles=('default',) * 2, + is_focal=(True,) + (False,), + bots_by_role={ + 'default': { + 'prisoners_dilemma_in_the_matrix__repeated__puppet_tft_tremble_margin_0', + 'prisoners_dilemma_in_the_matrix__repeated__puppet_tft_tremble_margin_1', + }, + }, + ), + prisoners_dilemma_in_the_matrix__repeated_7=ScenarioConfig( + description=('partner plays cooperate for a while then switches to ' + + 'defect'), + tags={ + 'half_and_half', + 'flexibility', + }, + substrate='prisoners_dilemma_in_the_matrix__repeated', + roles=('default',) * 2, + is_focal=(True,) + (False,), + bots_by_role={ + 'default': { + 'prisoners_dilemma_in_the_matrix__repeated__puppet_flip_0', + }, + }, + ), + prisoners_dilemma_in_the_matrix__repeated_8=ScenarioConfig( + description=('partner tries to take advantage of the focal player ' + + 'by playing defect, but if punished, partner then ' + + 'switches to tit-for-tat conditional cooperation'), + tags={ + 'half_and_half', + 'teaching', + 'reciprocity', + 'forgiveness', + }, + substrate='prisoners_dilemma_in_the_matrix__repeated', + roles=('default',) * 2, + is_focal=(True,) + (False,), + bots_by_role={ + 'default': { + 'prisoners_dilemma_in_the_matrix__repeated__puppet_corrigible_0', + }, + }, + ), + prisoners_dilemma_in_the_matrix__repeated_9=ScenarioConfig( + description=('partner tries to take advantage of the focal player ' + + 'by playing defect, but if punished, partner then ' + + 'switches to noisy tit-for-tat conditional cooperation'), + tags={ + 'half_and_half', + 'teaching', + 'reciprocity', + 'forgiveness', + }, + substrate='prisoners_dilemma_in_the_matrix__repeated', + roles=('default',) * 2, + is_focal=(True,) + (False,), + bots_by_role={ + 'default': { + 'prisoners_dilemma_in_the_matrix__repeated__puppet_corrigible_tremble_0', + }, + }, + ), + pure_coordination_in_the_matrix__arena_0=ScenarioConfig( + description=('focals are resident, a single visitor joins who may ' + + 'prefer any option; whichever option it prefers, it ' + + 'pursues it greedily'), + tags={ + 'resident', + 'versus_pure_all', + }, + substrate='pure_coordination_in_the_matrix__arena', + roles=('default',) * 8, + is_focal=(True,) * 7 + (False,) * 1, + bots_by_role={ + 'default': { + 'pure_coordination_in_the_matrix__arena__pure_greedy_a_0', + 'pure_coordination_in_the_matrix__arena__pure_greedy_b_0', + 'pure_coordination_in_the_matrix__arena__pure_greedy_c_0', + }, + }, + ), + pure_coordination_in_the_matrix__arena_1=ScenarioConfig( + description=('focals are resident, three visitors join who always ' + + 'select the same option as their partner in the previous' + + 'interaction and do so without being too greedy'), + tags={ + 'resident', + 'versus_best_response', + }, + substrate='pure_coordination_in_the_matrix__arena', + roles=('default',) * 8, + is_focal=(True,) * 5 + (False,) * 3, + bots_by_role={ + 'default': { + 'pure_coordination_in_the_matrix__arena__resp2prev_0', + }, + }, + ), + pure_coordination_in_the_matrix__arena_2=ScenarioConfig( + description=('focals are resident, three visitors join who always ' + + 'select the same option as their partner in the previous' + + 'interaction and are greedy in doing so'), + tags={ + 'resident', + 'versus_best_response', + 'scarcity', + }, + substrate='pure_coordination_in_the_matrix__arena', + roles=('default',) * 8, + is_focal=(True,) * 5 + (False,) * 3, + bots_by_role={ + 'default': { + 'pure_coordination_in_the_matrix__arena__resp2prev_greedy_0', + }, + }, + ), + pure_coordination_in_the_matrix__arena_3=ScenarioConfig( + description=('visiting a background population where all initially ' + + 'choose option A (without greed) and then switch '+ + 'to either B or C after some time'), + tags={ + 'visitor', + 'convention_following', + 'flexibility', + }, + substrate='pure_coordination_in_the_matrix__arena', + roles=('default',) * 8, + is_focal=(True,) + (False,) * 7, + bots_by_role={ + 'default': { + 'pure_coordination_in_the_matrix__arena__flip_a2b_0', + 'pure_coordination_in_the_matrix__arena__flip_a2c_0', + }, + }, + ), + pure_coordination_in_the_matrix__arena_4=ScenarioConfig( + description=('visiting a background population where all initially ' + + 'choose option B (without greed) and then switch '+ + 'to either A or C after some time'), + tags={ + 'visitor', + 'convention_following', + 'flexibility', + }, + substrate='pure_coordination_in_the_matrix__arena', + roles=('default',) * 8, + is_focal=(True,) + (False,) * 7, + bots_by_role={ + 'default': { + 'pure_coordination_in_the_matrix__arena__flip_b2a_0', + 'pure_coordination_in_the_matrix__arena__flip_b2c_0', + }, + }, + ), + pure_coordination_in_the_matrix__arena_5=ScenarioConfig( + description=('visiting a background population where all initially ' + + 'choose option C (without greed) and then switch '+ + 'to either A or B after some time'), + tags={ + 'visitor', + 'convention_following', + 'flexibility', + }, + substrate='pure_coordination_in_the_matrix__arena', + roles=('default',) * 8, + is_focal=(True,) + (False,) * 7, + bots_by_role={ + 'default': { + 'pure_coordination_in_the_matrix__arena__flip_c2a_0', + 'pure_coordination_in_the_matrix__arena__flip_c2b_0', + }, + }, + ), + pure_coordination_in_the_matrix__arena_6=ScenarioConfig( + description=('visiting a background population where all typically ' + + 'choose option A and do not collect it too greedily'), + tags={ + 'visitor', + 'versus_pure_type_a', + 'convention_following', + }, + substrate='pure_coordination_in_the_matrix__arena', + roles=('default',) * 8, + is_focal=(True,) + (False,) * 7, + bots_by_role={ + 'default': { + 'pure_coordination_in_the_matrix__arena__pure_a_0', + }, + }, + ), + pure_coordination_in_the_matrix__arena_7=ScenarioConfig( + description=('visiting a background population where all typically ' + + 'choose option B and do not collect it too greedily'), + tags={ + 'visitor', + 'versus_pure_type_b', + 'convention_following', + }, + substrate='pure_coordination_in_the_matrix__arena', + roles=('default',) * 8, + is_focal=(True,) + (False,) * 7, + bots_by_role={ + 'default': { + 'pure_coordination_in_the_matrix__arena__pure_b_0', + }, + }, + ), + pure_coordination_in_the_matrix__arena_8=ScenarioConfig( + description=('visiting a background population where all typically ' + + 'choose option C and do not collect it too greedily'), + tags={ + 'visitor', + 'versus_pure_type_c', + 'convention_following', + }, + substrate='pure_coordination_in_the_matrix__arena', + roles=('default',) * 8, + is_focal=(True,) + (False,) * 7, + bots_by_role={ + 'default': { + 'pure_coordination_in_the_matrix__arena__pure_c_0', + }, + }, + ), + pure_coordination_in_the_matrix__repeated_0=ScenarioConfig( + description='meeting any pure strategy player', + tags={ + 'versus_pure_all', + 'half_and_half', + }, + substrate='pure_coordination_in_the_matrix__repeated', + roles=('default',) * 2, + is_focal=(True,) * 1 + (False,) * 1, + bots_by_role={ + 'default': { + 'pure_coordination_in_the_matrix__repeated__pure_a_margin_0', + 'pure_coordination_in_the_matrix__repeated__pure_b_margin_0', + 'pure_coordination_in_the_matrix__repeated__pure_c_margin_0', + }, + }, + ), + pure_coordination_in_the_matrix__repeated_1=ScenarioConfig( + description=('meeting an agent who plays the best response to ' + + 'what the focal agent did in the last round.'), + tags={ + 'half_and_half', + 'versus_best_response', + }, + substrate='pure_coordination_in_the_matrix__repeated', + roles=('default',) * 2, + is_focal=(True,) * 1 + (False,) * 1, + bots_by_role={ + 'default': { + 'pure_coordination_in_the_matrix__repeated__resp2prev_margin_0', + }, + }, + ), + pure_coordination_in_the_matrix__repeated_2=ScenarioConfig( + description=('versus mixture of opponents who often flip to other ' + + 'strategies after some number of interactions'), + tags={ + 'half_and_half', + 'versus_strategy_flip', + }, + substrate='pure_coordination_in_the_matrix__repeated', + roles=('default',) * 2, + is_focal=(True,) * 1 + (False,) * 1, + bots_by_role={ + 'default': { + 'pure_coordination_in_the_matrix__repeated__pure_a_margin_0', + 'pure_coordination_in_the_matrix__repeated__flip_a2b_0', + 'pure_coordination_in_the_matrix__repeated__flip_a2b_1', + 'pure_coordination_in_the_matrix__repeated__flip_a2c_0', + 'pure_coordination_in_the_matrix__repeated__flip_a2c_1', + 'pure_coordination_in_the_matrix__repeated__pure_b_margin_0', + 'pure_coordination_in_the_matrix__repeated__flip_b2c_0', + 'pure_coordination_in_the_matrix__repeated__flip_b2c_1', + 'pure_coordination_in_the_matrix__repeated__flip_b2a_0', + 'pure_coordination_in_the_matrix__repeated__flip_b2a_1', + 'pure_coordination_in_the_matrix__repeated__pure_c_margin_0', + 'pure_coordination_in_the_matrix__repeated__flip_c2a_0', + 'pure_coordination_in_the_matrix__repeated__flip_c2a_1', + 'pure_coordination_in_the_matrix__repeated__flip_c2b_0', + 'pure_coordination_in_the_matrix__repeated__flip_c2b_1' + }, + }, + ), + pure_coordination_in_the_matrix__repeated_3=ScenarioConfig( + description='meeting an agent who almost always chooses resource a', + tags={ + 'versus_pure_type_a', + 'half_and_half', + }, + substrate='pure_coordination_in_the_matrix__repeated', + roles=('default',) * 2, + is_focal=(True,) * 1 + (False,) * 1, + bots_by_role={ + 'default': { + 'pure_coordination_in_the_matrix__repeated__pure_a_margin_0', + }, + }, + ), + pure_coordination_in_the_matrix__repeated_4=ScenarioConfig( + description='meeting an agent who almost always chooses resource b', + tags={ + 'versus_pure_type_b', + 'half_and_half', + }, + substrate='pure_coordination_in_the_matrix__repeated', + roles=('default',) * 2, + is_focal=(True,) * 1 + (False,) * 1, + bots_by_role={ + 'default': { + 'pure_coordination_in_the_matrix__repeated__pure_b_margin_0', + }, + }, + ), + pure_coordination_in_the_matrix__repeated_5=ScenarioConfig( + description='meeting an agent who almost always chooses resource c', + tags={ + 'versus_pure_type_c', + 'half_and_half', + }, + substrate='pure_coordination_in_the_matrix__repeated', + roles=('default',) * 2, + is_focal=(True,) * 1 + (False,) * 1, + bots_by_role={ + 'default': { + 'pure_coordination_in_the_matrix__repeated__pure_c_margin_0', + }, + }, + ), + rationalizable_coordination_in_the_matrix__arena_0=ScenarioConfig( + description=('focals are resident, a single visitor joins who may ' + + 'prefer any option; whichever option it prefers, it ' + + 'pursues it greedily'), + tags={ + 'resident', + 'versus_pure_all', + }, + substrate='rationalizable_coordination_in_the_matrix__arena', + roles=('default',) * 8, + is_focal=(True,) * 7 + (False,) * 1, + bots_by_role={ + 'default': { + 'rationalizable_coordination_in_the_matrix__arena__pure_greedy_a_0', + 'rationalizable_coordination_in_the_matrix__arena__pure_greedy_b_0', + 'rationalizable_coordination_in_the_matrix__arena__pure_greedy_c_0', + }, + }, + ), + rationalizable_coordination_in_the_matrix__arena_1=ScenarioConfig( + description=('focals are resident, three visitors join who always ' + + 'select the same option as their partner in the previous' + + 'interaction and do so without being too greedy'), + tags={ + 'resident', + 'versus_best_response', + }, + substrate='rationalizable_coordination_in_the_matrix__arena', + roles=('default',) * 8, + is_focal=(True,) * 5 + (False,) * 3, + bots_by_role={ + 'default': { + 'rationalizable_coordination_in_the_matrix__arena__resp2prev_0', + }, + }, + ), + rationalizable_coordination_in_the_matrix__arena_2=ScenarioConfig( + description=('focals are resident, three visitors join who always ' + + 'select the same option as their partner in the previous' + + 'interaction and are greedy in doing so'), + tags={ + 'resident', + 'versus_best_response', + 'scarcity', + }, + substrate='rationalizable_coordination_in_the_matrix__arena', + roles=('default',) * 8, + is_focal=(True,) * 5 + (False,) * 3, + bots_by_role={ + 'default': { + 'rationalizable_coordination_in_the_matrix__arena__resp2prev_greedy_0', + }, + }, + ), + rationalizable_coordination_in_the_matrix__arena_3=ScenarioConfig( + description=('visiting a background population where all initially ' + + 'choose option A (without greed) and then switch '+ + 'to either B or C after some time'), + tags={ + 'visitor', + 'convention_following', + 'flexibility', + }, + substrate='rationalizable_coordination_in_the_matrix__arena', + roles=('default',) * 8, + is_focal=(True,) + (False,) * 7, + bots_by_role={ + 'default': { + 'rationalizable_coordination_in_the_matrix__arena__flip_a2b_0', + 'rationalizable_coordination_in_the_matrix__arena__flip_a2c_0', + }, + }, + ), + rationalizable_coordination_in_the_matrix__arena_4=ScenarioConfig( + description=('visiting a background population where all initially ' + + 'choose option B (without greed) and then switch '+ + 'to either A or C after some time'), + tags={ + 'visitor', + 'convention_following', + 'flexibility', + }, + substrate='rationalizable_coordination_in_the_matrix__arena', + roles=('default',) * 8, + is_focal=(True,) + (False,) * 7, + bots_by_role={ + 'default': { + 'rationalizable_coordination_in_the_matrix__arena__flip_b2a_0', + 'rationalizable_coordination_in_the_matrix__arena__flip_b2c_0', + }, + }, + ), + rationalizable_coordination_in_the_matrix__arena_5=ScenarioConfig( + description=('visiting a background population where all initially ' + + 'choose option C (without greed) and then switch '+ + 'to either A or B after some time'), + tags={ + 'visitor', + 'convention_following', + 'flexibility', + }, + substrate='rationalizable_coordination_in_the_matrix__arena', + roles=('default',) * 8, + is_focal=(True,) + (False,) * 7, + bots_by_role={ + 'default': { + 'rationalizable_coordination_in_the_matrix__arena__flip_c2a_0', + 'rationalizable_coordination_in_the_matrix__arena__flip_c2b_0', + }, + }, + ), + rationalizable_coordination_in_the_matrix__arena_6=ScenarioConfig( + description=('visiting a background population where all typically ' + + 'choose option A and do not collect it too greedily'), + tags={ + 'visitor', + 'versus_pure_type_a', + 'convention_following', + }, + substrate='rationalizable_coordination_in_the_matrix__arena', + roles=('default',) * 8, + is_focal=(True,) + (False,) * 7, + bots_by_role={ + 'default': { + 'rationalizable_coordination_in_the_matrix__arena__pure_a_0', + }, + }, + ), + rationalizable_coordination_in_the_matrix__arena_7=ScenarioConfig( + description=('visiting a background population where all typically ' + + 'choose option B and do not collect it too greedily'), + tags={ + 'visitor', + 'versus_pure_type_b', + 'convention_following', + }, + substrate='rationalizable_coordination_in_the_matrix__arena', + roles=('default',) * 8, + is_focal=(True,) + (False,) * 7, + bots_by_role={ + 'default': { + 'rationalizable_coordination_in_the_matrix__arena__pure_b_0', + }, + }, + ), + rationalizable_coordination_in_the_matrix__arena_8=ScenarioConfig( + description=('visiting a background population where all typically ' + + 'choose option C and do not collect it too greedily'), + tags={ + 'visitor', + 'versus_pure_type_c', + 'convention_following', + }, + substrate='rationalizable_coordination_in_the_matrix__arena', + roles=('default',) * 8, + is_focal=(True,) + (False,) * 7, + bots_by_role={ + 'default': { + 'rationalizable_coordination_in_the_matrix__arena__pure_c_0', + }, + }, + ), + rationalizable_coordination_in_the_matrix__repeated_0=ScenarioConfig( + description='meeting any pure strategy player', + tags={ + 'versus_pure_all', + 'half_and_half', + }, + substrate='rationalizable_coordination_in_the_matrix__repeated', + roles=('default',) * 2, + is_focal=(True,) * 1 + (False,) * 1, + bots_by_role={ + 'default': { + 'rationalizable_coordination_in_the_matrix__repeated__pure_a_margin_0', + 'rationalizable_coordination_in_the_matrix__repeated__pure_b_margin_0', + 'rationalizable_coordination_in_the_matrix__repeated__pure_c_margin_0', + }, + }, + ), + rationalizable_coordination_in_the_matrix__repeated_1=ScenarioConfig( + description=('meeting an agent who plays the best response to ' + + 'what the focal agent did in the last round.'), + tags={ + 'half_and_half', + 'versus_best_response', + }, + substrate='rationalizable_coordination_in_the_matrix__repeated', + roles=('default',) * 2, + is_focal=(True,) * 1 + (False,) * 1, + bots_by_role={ + 'default': { + 'rationalizable_coordination_in_the_matrix__repeated__resp2prev_margin_0', + }, + }, + ), + rationalizable_coordination_in_the_matrix__repeated_2=ScenarioConfig( + description=('versus mixture of opponents who often flip to other ' + + 'strategies after some number of interactions'), + tags={ + 'half_and_half', + 'versus_strategy_flip', + }, + substrate='rationalizable_coordination_in_the_matrix__repeated', + roles=('default',) * 2, + is_focal=(True,) * 1 + (False,) * 1, + bots_by_role={ + 'default': { + 'rationalizable_coordination_in_the_matrix__repeated__pure_a_margin_0', + 'rationalizable_coordination_in_the_matrix__repeated__flip_a2b_0', + 'rationalizable_coordination_in_the_matrix__repeated__flip_a2b_1', + 'rationalizable_coordination_in_the_matrix__repeated__flip_a2c_0', + 'rationalizable_coordination_in_the_matrix__repeated__flip_a2c_1', + 'rationalizable_coordination_in_the_matrix__repeated__pure_b_margin_0', + 'rationalizable_coordination_in_the_matrix__repeated__flip_b2c_0', + 'rationalizable_coordination_in_the_matrix__repeated__flip_b2c_1', + 'rationalizable_coordination_in_the_matrix__repeated__flip_b2a_0', + 'rationalizable_coordination_in_the_matrix__repeated__flip_b2a_1', + 'rationalizable_coordination_in_the_matrix__repeated__pure_c_margin_0', + 'rationalizable_coordination_in_the_matrix__repeated__flip_c2a_0', + 'rationalizable_coordination_in_the_matrix__repeated__flip_c2a_1', + 'rationalizable_coordination_in_the_matrix__repeated__flip_c2b_0', + 'rationalizable_coordination_in_the_matrix__repeated__flip_c2b_1' + }, + }, + ), + rationalizable_coordination_in_the_matrix__repeated_3=ScenarioConfig( + description='meeting an agent who almost always chooses resource a', + tags={ + 'versus_pure_type_a', + 'half_and_half', + }, + substrate='rationalizable_coordination_in_the_matrix__repeated', + roles=('default',) * 2, + is_focal=(True,) * 1 + (False,) * 1, + bots_by_role={ + 'default': { + 'rationalizable_coordination_in_the_matrix__repeated__pure_a_margin_0', + }, + }, + ), + rationalizable_coordination_in_the_matrix__repeated_4=ScenarioConfig( + description='meeting an agent who almost always chooses resource b', + tags={ + 'versus_pure_type_b', + 'half_and_half', + }, + substrate='rationalizable_coordination_in_the_matrix__repeated', + roles=('default',) * 2, + is_focal=(True,) * 1 + (False,) * 1, + bots_by_role={ + 'default': { + 'rationalizable_coordination_in_the_matrix__repeated__pure_b_margin_0', + }, + }, + ), + rationalizable_coordination_in_the_matrix__repeated_5=ScenarioConfig( + description='meeting an agent who almost always chooses resource c', + tags={ + 'versus_pure_type_c', + 'half_and_half', + }, + substrate='rationalizable_coordination_in_the_matrix__repeated', + roles=('default',) * 2, is_focal=(True,) * 1 + (False,) * 1, - bots=frozenset({ - 'classic_rws_pure_paper_0', - 'classic_rws_pure_paper_1', - 'classic_rws_pure_paper_2', - 'classic_rws_pure_paper_3', - 'classic_rws_pure_rock_0', - 'classic_rws_pure_rock_1', - 'classic_rws_pure_rock_2', - 'classic_rws_pure_rock_3', - 'classic_rws_pure_scissors_0', - 'classic_rws_pure_scissors_1', - 'classic_rws_pure_scissors_2', - 'classic_rws_pure_scissors_3', - }), - ), - running_with_scissors_in_the_matrix_2=_homogeneous_scenario( + bots_by_role={ + 'default': { + 'rationalizable_coordination_in_the_matrix__repeated__pure_c_margin_0', + }, + }, + ), + running_with_scissors_in_the_matrix__arena_0=ScenarioConfig( + description=('versus a background population containing bots ' + + 'implementing all three pure strategies'), + tags={ + 'visitor', + 'versus_pure_all', + }, + substrate='running_with_scissors_in_the_matrix__arena', + roles=('default',) * 8, + is_focal=(True,) * 1 + (False,) * 7, + bots_by_role={ + 'default': { + 'running_with_scissors_in_the_matrix__arena__rock_margin_0', + 'running_with_scissors_in_the_matrix__arena__rock_margin_1', + 'running_with_scissors_in_the_matrix__arena__paper_margin_0', + 'running_with_scissors_in_the_matrix__arena__paper_margin_1', + 'running_with_scissors_in_the_matrix__arena__scissors_margin_0', + 'running_with_scissors_in_the_matrix__arena__scissors_margin_1', + }, + } + ), + running_with_scissors_in_the_matrix__arena_1=ScenarioConfig( + description=('versus gullible bots'), + tags={ + 'deception', + 'visitor', + 'versus_gullible', + }, + substrate='running_with_scissors_in_the_matrix__arena', + roles=('default',) * 8, + is_focal=(True,) * 1 + (False,) * 7, + bots_by_role={ + 'default': { + 'running_with_scissors_in_the_matrix__arena__free_0', + }, + } + ), + running_with_scissors_in_the_matrix__arena_2=ScenarioConfig( + description=('versus mixture of opponents who play rock and some who ' + + 'flip to scissors after two interactions'), + tags={ + 'visitor', + 'versus_strategy_flip', + }, + substrate='running_with_scissors_in_the_matrix__arena', + roles=('default',) * 8, + is_focal=(True,) * 1 + (False,) * 7, + bots_by_role={ + 'default': { + 'running_with_scissors_in_the_matrix__arena__rock_margin_0', + 'running_with_scissors_in_the_matrix__arena__rock_margin_1', + 'running_with_scissors_in_the_matrix__arena__flip_r2s_0', + }, + } + ), + running_with_scissors_in_the_matrix__arena_3=ScenarioConfig( + description=('versus mixture of opponents who play paper and some ' + + 'who flip to rock after two interactions'), + tags={ + 'visitor', + 'versus_strategy_flip', + }, + substrate='running_with_scissors_in_the_matrix__arena', + roles=('default',) * 8, + is_focal=(True,) * 1 + (False,) * 7, + bots_by_role={ + 'default': { + 'running_with_scissors_in_the_matrix__arena__paper_margin_0', + 'running_with_scissors_in_the_matrix__arena__paper_margin_1', + 'running_with_scissors_in_the_matrix__arena__flip_p2r_0', + }, + } + ), + running_with_scissors_in_the_matrix__arena_4=ScenarioConfig( + description=('versus mixture of opponents who play scissors and some ' + + 'who flip to paper after two interactions'), + tags={ + 'visitor', + 'versus_strategy_flip', + }, + substrate='running_with_scissors_in_the_matrix__arena', + roles=('default',) * 8, + is_focal=(True,) * 1 + (False,) * 7, + bots_by_role={ + 'default': { + 'running_with_scissors_in_the_matrix__arena__scissors_margin_0', + 'running_with_scissors_in_the_matrix__arena__scissors_margin_1', + 'running_with_scissors_in_the_matrix__arena__flip_s2p_0', + }, + } + ), + running_with_scissors_in_the_matrix__arena_5=ScenarioConfig( + description=('visiting a population of pure paper bots'), + tags={ + 'visitor', + 'versus_pure_paper', + }, + substrate='running_with_scissors_in_the_matrix__arena', + roles=('default',) * 8, + is_focal=(True,) + (False,) * 7, + bots_by_role={ + 'default': { + 'running_with_scissors_in_the_matrix__arena__paper_margin_0', + 'running_with_scissors_in_the_matrix__arena__paper_margin_1', + }, + } + ), + running_with_scissors_in_the_matrix__arena_6=ScenarioConfig( + description=('visiting a population of pure rock bots'), + tags={ + 'visitor', + 'versus_pure_rock', + }, + substrate='running_with_scissors_in_the_matrix__arena', + roles=('default',) * 8, + is_focal=(True,) + (False,) * 7, + bots_by_role={ + 'default': { + 'running_with_scissors_in_the_matrix__arena__rock_margin_0', + 'running_with_scissors_in_the_matrix__arena__rock_margin_1', + }, + } + ), + running_with_scissors_in_the_matrix__arena_7=ScenarioConfig( + description=('visiting a population of pure scissors bots'), + tags={ + 'visitor', + 'versus_pure_scissors', + }, + substrate='running_with_scissors_in_the_matrix__arena', + roles=('default',) * 8, + is_focal=(True,) + (False,) * 7, + bots_by_role={ + 'default': { + 'running_with_scissors_in_the_matrix__arena__scissors_margin_0', + 'running_with_scissors_in_the_matrix__arena__scissors_margin_1', + }, + } + ), + running_with_scissors_in_the_matrix__one_shot_0=ScenarioConfig( + description='versus mixed strategy opponent', + tags={ + 'half_and_half', + 'versus_pure_all', + }, + substrate='running_with_scissors_in_the_matrix__one_shot', + roles=('default',) * 2, + is_focal=(True,) + (False,), + bots_by_role={ + 'default': { + 'running_with_scissors_in_the_matrix__one_shot__rock_margin_0', + 'running_with_scissors_in_the_matrix__one_shot__paper_margin_0', + 'running_with_scissors_in_the_matrix__one_shot__scissors_margin_0', + }, + } + ), + running_with_scissors_in_the_matrix__one_shot_1=ScenarioConfig( description='versus pure rock opponent', - tags=frozenset({ + tags={ 'half_and_half', 'versus_pure_rock', - }), - substrate='running_with_scissors_in_the_matrix', - is_focal=(True,) * 1 + (False,) * 1, - bots=frozenset({ - 'classic_rws_pure_rock_0', - 'classic_rws_pure_rock_1', - 'classic_rws_pure_rock_2', - 'classic_rws_pure_rock_3', - }), - ), - running_with_scissors_in_the_matrix_3=_homogeneous_scenario( + }, + substrate='running_with_scissors_in_the_matrix__one_shot', + roles=('default',) * 2, + is_focal=(True,) + (False,), + bots_by_role={ + 'default': { + 'running_with_scissors_in_the_matrix__one_shot__rock_margin_0', + }, + } + ), + running_with_scissors_in_the_matrix__one_shot_2=ScenarioConfig( description='versus pure paper opponent', - tags=frozenset({ + tags={ 'half_and_half', 'versus_pure_paper', - }), - substrate='running_with_scissors_in_the_matrix', - is_focal=(True,) * 1 + (False,) * 1, - bots=frozenset({ - 'classic_rws_pure_paper_0', - 'classic_rws_pure_paper_1', - 'classic_rws_pure_paper_2', - 'classic_rws_pure_paper_3', - }), - ), - running_with_scissors_in_the_matrix_4=_homogeneous_scenario( + }, + substrate='running_with_scissors_in_the_matrix__one_shot', + roles=('default',) * 2, + is_focal=(True,) + (False,), + bots_by_role={ + 'default': { + 'running_with_scissors_in_the_matrix__one_shot__paper_margin_0', + }, + } + ), + running_with_scissors_in_the_matrix__one_shot_3=ScenarioConfig( description='versus pure scissors opponent', - tags=frozenset({ + tags={ 'half_and_half', 'versus_pure_scissors', - }), - substrate='running_with_scissors_in_the_matrix', - is_focal=(True,) * 1 + (False,) * 1, - bots=frozenset({ - 'classic_rws_pure_scissors_0', - 'classic_rws_pure_scissors_1', - 'classic_rws_pure_scissors_2', - 'classic_rws_pure_scissors_3', - }), - ), - stag_hunt_in_the_matrix_0=_homogeneous_scenario( - description='visiting a population of stags', - tags=frozenset({ - 'versus_pure_stag', + }, + substrate='running_with_scissors_in_the_matrix__one_shot', + roles=('default',) * 2, + is_focal=(True,) + (False,), + bots_by_role={ + 'default': { + 'running_with_scissors_in_the_matrix__one_shot__scissors_margin_0', + }, + } + ), + running_with_scissors_in_the_matrix__repeated_0=ScenarioConfig( + description='versus mixed strategy opponent', + tags={ + 'half_and_half', + 'versus_pure_all', + }, + substrate='running_with_scissors_in_the_matrix__repeated', + roles=('default',) * 2, + is_focal=(True,) + (False,), + bots_by_role={ + 'default': { + 'running_with_scissors_in_the_matrix__repeated__rock_margin_0', + 'running_with_scissors_in_the_matrix__repeated__paper_margin_0', + 'running_with_scissors_in_the_matrix__repeated__scissors_margin_0', + }, + } + ), + running_with_scissors_in_the_matrix__repeated_1=ScenarioConfig( + description=('versus opponent who plays the best response to ' + + 'what the focal player did in the last round.'), + tags={ + 'half_and_half', + 'versus_best_response', + }, + substrate='running_with_scissors_in_the_matrix__repeated', + roles=('default',) * 2, + is_focal=(True,) + (False,), + bots_by_role={ + 'default': { + 'running_with_scissors_in_the_matrix__repeated__resp2prev_margin_0', + }, + } + ), + running_with_scissors_in_the_matrix__repeated_2=ScenarioConfig( + description=('versus opponent who sometimes plays a pure strategy ' + + 'but sometimes plays the best response to what the ' + + 'focal player did in the last round'), + tags={ + 'half_and_half', + 'versus_best_response', + }, + substrate='running_with_scissors_in_the_matrix__repeated', + roles=('default',) * 2, + is_focal=(True,) + (False,), + bots_by_role={ + 'default': { + 'running_with_scissors_in_the_matrix__repeated__resp2prev_margin_0', + 'running_with_scissors_in_the_matrix__repeated__rock_margin_0', + 'running_with_scissors_in_the_matrix__repeated__paper_margin_0', + 'running_with_scissors_in_the_matrix__repeated__scissors_margin_0', + }, + } + ), + running_with_scissors_in_the_matrix__repeated_3=ScenarioConfig( + description=('versus mixture of opponents who often flip to other ' + + 'strategies after two interactions'), + tags={ + 'half_and_half', + 'versus_strategy_flip', + }, + substrate='running_with_scissors_in_the_matrix__repeated', + roles=('default',) * 2, + is_focal=(True,) + (False,), + bots_by_role={ + 'default': { + 'running_with_scissors_in_the_matrix__repeated__rock_0', + 'running_with_scissors_in_the_matrix__repeated__rock_margin_0', + 'running_with_scissors_in_the_matrix__repeated__flip_r2s_0', + 'running_with_scissors_in_the_matrix__repeated__paper_0', + 'running_with_scissors_in_the_matrix__repeated__paper_margin_0', + 'running_with_scissors_in_the_matrix__repeated__flip_p2r_0', + 'running_with_scissors_in_the_matrix__repeated__scissors_0', + 'running_with_scissors_in_the_matrix__repeated__scissors_margin_0', + 'running_with_scissors_in_the_matrix__repeated__flip_s2p_0', + }, + } + ), + running_with_scissors_in_the_matrix__repeated_4=ScenarioConfig( + description=('versus mixture of opponents who either flip to another ' + + 'strategy after one interaction and keep it forever or ' + + 'continue to change, always best responding to what ' + + 'the focal player just did'), + tags={ + 'half_and_half', + 'versus_strategy_flip', + }, + substrate='running_with_scissors_in_the_matrix__repeated', + roles=('default',) * 2, + is_focal=(True,) + (False,), + bots_by_role={ + 'default': { + 'running_with_scissors_in_the_matrix__repeated__flip_r2s_1', + 'running_with_scissors_in_the_matrix__repeated__flip_p2r_1', + 'running_with_scissors_in_the_matrix__repeated__flip_s2p_1', + 'running_with_scissors_in_the_matrix__repeated__resp2prev_margin_0', + }, + } + ), + running_with_scissors_in_the_matrix__repeated_5=ScenarioConfig( + description='versus gullible opponent', + tags={ + 'deception', + 'half_and_half', + 'versus_gullible', + }, + substrate='running_with_scissors_in_the_matrix__repeated', + roles=('default',) * 2, + is_focal=(True,) + (False,), + bots_by_role={ + 'default': { + 'running_with_scissors_in_the_matrix__repeated__free_0', + }, + } + ), + running_with_scissors_in_the_matrix__repeated_6=ScenarioConfig( + description='versus pure rock opponent', + tags={ + 'half_and_half', + 'versus_pure_rock', + }, + substrate='running_with_scissors_in_the_matrix__repeated', + roles=('default',) * 2, + is_focal=(True,) + (False,), + bots_by_role={ + 'default': { + 'running_with_scissors_in_the_matrix__repeated__rock_margin_0', + }, + } + ), + running_with_scissors_in_the_matrix__repeated_7=ScenarioConfig( + description='versus pure paper opponent', + tags={ + 'half_and_half', + 'versus_pure_paper', + }, + substrate='running_with_scissors_in_the_matrix__repeated', + roles=('default',) * 2, + is_focal=(True,) + (False,), + bots_by_role={ + 'default': { + 'running_with_scissors_in_the_matrix__repeated__paper_margin_0', + }, + } + ), + running_with_scissors_in_the_matrix__repeated_8=ScenarioConfig( + description='versus pure scissors opponent', + tags={ + 'half_and_half', + 'versus_pure_scissors', + }, + substrate='running_with_scissors_in_the_matrix__repeated', + roles=('default',) * 2, + is_focal=(True,) + (False,), + bots_by_role={ + 'default': { + 'running_with_scissors_in_the_matrix__repeated__scissors_margin_0', + }, + } + ), + stag_hunt_in_the_matrix__arena_0=ScenarioConfig( + description='visiting unconditional stag players', + tags={ 'visitor', - }), - substrate='stag_hunt_in_the_matrix', - is_focal=(True,) * 1 + (False,) * 7, - bots=frozenset({ - 'stag_hunt_stag_specialist_3', - 'stag_hunt_stag_specialist_5', - }), - ), - stag_hunt_in_the_matrix_1=_homogeneous_scenario( - description='visiting a population of hares', - tags=frozenset({ - 'versus_pure_hare', + 'versus_pure_stag_players', + 'convention_following', + }, + substrate='stag_hunt_in_the_matrix__arena', + roles=('default',) * 8, + is_focal=(True,) + (False,) * 7, + bots_by_role={ + 'default': { + 'stag_hunt_in_the_matrix__arena__puppet_stag_0', + 'stag_hunt_in_the_matrix__arena__puppet_stag_margin_0', + }, + }, + ), + stag_hunt_in_the_matrix__arena_1=ScenarioConfig( + description='visiting unconditional hare players', + tags={ + 'visitor', + 'versus_pure_hare_players', + 'convention_following', + }, + substrate='stag_hunt_in_the_matrix__arena', + roles=('default',) * 8, + is_focal=(True,) + (False,) * 7, + bots_by_role={ + 'default': { + 'stag_hunt_in_the_matrix__arena__puppet_hare_0', + 'stag_hunt_in_the_matrix__arena__puppet_hare_margin_0', + }, + }, + ), + stag_hunt_in_the_matrix__arena_2=ScenarioConfig( + description=('focals are resident and visitors are unconditional ' + + 'stag players'), + tags={ + 'resident', + 'versus_pure_stag_players', + }, + substrate='stag_hunt_in_the_matrix__arena', + roles=('default',) * 8, + is_focal=(True,) * 5 + (False,) * 3, + bots_by_role={ + 'default': { + 'stag_hunt_in_the_matrix__arena__puppet_stag_0', + 'stag_hunt_in_the_matrix__arena__puppet_stag_margin_0', + }, + }, + ), + stag_hunt_in_the_matrix__arena_3=ScenarioConfig( + description=('focals are resident and visitors are unconditional' + + 'hare players'), + tags={ + 'resident', + 'versus_pure_hare_players', + }, + substrate='stag_hunt_in_the_matrix__arena', + roles=('default',) * 8, + is_focal=(True,) * 5 + (False,) * 3, + bots_by_role={ + 'default': { + 'stag_hunt_in_the_matrix__arena__puppet_hare_0', + 'stag_hunt_in_the_matrix__arena__puppet_hare_margin_0', + }, + }, + ), + stag_hunt_in_the_matrix__arena_4=ScenarioConfig( + description=('visiting a population of hair-trigger grim ' + + 'reciprocator bots who initially play stag but, if ' + + 'any partner plays hare once, they give up on trying to ' + + 'cooperate and play hare in all future interactions'), + tags={ 'visitor', - }), - substrate='stag_hunt_in_the_matrix', - is_focal=(True,) * 1 + (False,) * 7, - bots=frozenset({ - 'stag_hunt_hare_specialist_0', - 'stag_hunt_hare_specialist_1', - 'stag_hunt_hare_specialist_2', - }), - ), - stag_hunt_in_the_matrix_2=_homogeneous_scenario( - description='visiting a population of grim reciprocators', - tags=frozenset({ 'reciprocity', - 'versus_puppet', + }, + substrate='stag_hunt_in_the_matrix__arena', + roles=('default',) * 8, + is_focal=(True,) + (False,) * 7, + bots_by_role={ + 'default': { + 'stag_hunt_in_the_matrix__arena__puppet_grim_one_strike_0', + 'stag_hunt_in_the_matrix__arena__puppet_grim_one_strike_margin_0', + }, + }, + ), + stag_hunt_in_the_matrix__arena_5=ScenarioConfig( + description=('visiting a population of two-strikes grim ' + + 'reciprocator bots who initially play stag but, if ' + + 'their partners play hare twice, they give up on trying ' + + 'to cooperate and play hare in all future interactions'), + tags={ 'visitor', - }), - substrate='stag_hunt_in_the_matrix', - is_focal=(True,) * 2 + (False,) * 6, - bots=frozenset({ - 'stag_hunt_puppet_grim', - }), + 'reciprocity', + }, + substrate='stag_hunt_in_the_matrix__arena', + roles=('default',) * 8, + is_focal=(True,) + (False,) * 7, + bots_by_role={ + 'default': { + 'stag_hunt_in_the_matrix__arena__puppet_grim_two_strikes_0', + 'stag_hunt_in_the_matrix__arena__puppet_grim_two_strikes_margin_0', + }, + }, ), - territory_open_0=_homogeneous_scenario( - description='focals are resident and visited by a shaped bot', - tags=frozenset({ + stag_hunt_in_the_matrix__arena_6=ScenarioConfig( + description=( + 'visiting a mixed population of k-strikes grim reciprocator bots ' + + 'with k values from 1 to 3, they initially play stag but, if ' + + 'their partners play hare k times, they then play hare in all ' + + 'future interactions' + ), + tags={ + 'visitor', + 'reciprocity', + }, + substrate='stag_hunt_in_the_matrix__arena', + roles=('default',) * 8, + is_focal=(True,) * 3 + (False,) * 5, + bots_by_role={ + 'default': { + 'stag_hunt_in_the_matrix__arena__puppet_grim_one_strike_0', + 'stag_hunt_in_the_matrix__arena__puppet_grim_one_strike_margin_0', + 'stag_hunt_in_the_matrix__arena__puppet_grim_three_strikes_0', + 'stag_hunt_in_the_matrix__arena__puppet_grim_three_strikes_margin_0', + 'stag_hunt_in_the_matrix__arena__puppet_grim_two_strikes_0', + 'stag_hunt_in_the_matrix__arena__puppet_grim_two_strikes_margin_0', + }, + }, + ), + stag_hunt_in_the_matrix__arena_7=ScenarioConfig( + description='visiting a mixture of pure hare and pure stag players', + tags={ + 'visitor', + 'versus_pure_all', + }, + substrate='stag_hunt_in_the_matrix__arena', + roles=('default',) * 8, + is_focal=(True,) * 3 + (False,) * 5, + bots_by_role={ + 'default': { + 'stag_hunt_in_the_matrix__arena__puppet_stag_0', + 'stag_hunt_in_the_matrix__arena__puppet_stag_margin_0', + 'stag_hunt_in_the_matrix__arena__puppet_hare_0', + 'stag_hunt_in_the_matrix__arena__puppet_hare_margin_0', + }, + }, + ), + stag_hunt_in_the_matrix__repeated_0=ScenarioConfig( + description='partner may play either stag or hare', + tags={ + 'half_and_half', + 'versus_pure_all', + }, + substrate='stag_hunt_in_the_matrix__repeated', + roles=('default',) * 2, + is_focal=(True,) + (False,), + bots_by_role={ + 'default': { + 'stag_hunt_in_the_matrix__repeated__puppet_hare_margin_0', + 'stag_hunt_in_the_matrix__repeated__puppet_hare_margin_1', + 'stag_hunt_in_the_matrix__repeated__puppet_stag_margin_0', + 'stag_hunt_in_the_matrix__repeated__puppet_stag_margin_1', + }, + }, + ), + stag_hunt_in_the_matrix__repeated_1=ScenarioConfig( + description='partner typically plays stag', + tags={ + 'half_and_half', + 'versus_pure_stag', + }, + substrate='stag_hunt_in_the_matrix__repeated', + roles=('default',) * 2, + is_focal=(True,) + (False,), + bots_by_role={ + 'default': { + 'stag_hunt_in_the_matrix__repeated__puppet_stag_margin_0', + 'stag_hunt_in_the_matrix__repeated__puppet_stag_margin_1', + }, + }, + ), + stag_hunt_in_the_matrix__repeated_2=ScenarioConfig( + description='partner typically plays hare', + tags={ + 'half_and_half', + 'versus_pure_hare', + }, + substrate='stag_hunt_in_the_matrix__repeated', + roles=('default',) * 2, + is_focal=(True,) + (False,), + bots_by_role={ + 'default': { + 'stag_hunt_in_the_matrix__repeated__puppet_hare_margin_0', + 'stag_hunt_in_the_matrix__repeated__puppet_hare_margin_1', + }, + }, + ), + stag_hunt_in_the_matrix__repeated_3=ScenarioConfig( + description=('partner is a hair-trigger grim reciprocator, i.e. one ' + + 'who initially cooperates but, if defected on once, will' + + ' retaliate by defecting forever after'), + tags={ + 'half_and_half', + 'reciprocity', + }, + substrate='stag_hunt_in_the_matrix__repeated', + roles=('default',) * 2, + is_focal=(True,) + (False,), + bots_by_role={ + 'default': { + 'stag_hunt_in_the_matrix__repeated__puppet_grim_one_strike_margin_0', + 'stag_hunt_in_the_matrix__repeated__puppet_grim_one_strike_margin_1', + }, + }, + ), + stag_hunt_in_the_matrix__repeated_4=ScenarioConfig( + description=('partner is a two-strikes grim reciprocator, i.e. one ' + + 'who initially cooperates, but if defected on twice, ' + + 'will retaliate by defecting forever after'), + tags={ + 'half_and_half', + 'reciprocity', + }, + substrate='stag_hunt_in_the_matrix__repeated', + roles=('default',) * 2, + is_focal=(True,) + (False,), + bots_by_role={ + 'default': { + 'stag_hunt_in_the_matrix__repeated__puppet_grim_two_strikes_margin_0', + 'stag_hunt_in_the_matrix__repeated__puppet_grim_two_strikes_margin_1', + }, + }, + ), + stag_hunt_in_the_matrix__repeated_5=ScenarioConfig( + description='partner is a tit-for-tat conditional cooperator', + tags={ + 'half_and_half', + 'reciprocity', + }, + substrate='stag_hunt_in_the_matrix__repeated', + roles=('default',) * 2, + is_focal=(True,) + (False,), + bots_by_role={ + 'default': { + 'stag_hunt_in_the_matrix__repeated__puppet_tft_margin_0', + 'stag_hunt_in_the_matrix__repeated__puppet_tft_margin_1', + }, + }, + ), + stag_hunt_in_the_matrix__repeated_6=ScenarioConfig( + description=('partner is a tit-for-tat conditional cooperator who ' + + 'occasionally plays hare instead of stag'), + tags={ + 'half_and_half', + 'reciprocity', + 'forgiveness', + }, + substrate='stag_hunt_in_the_matrix__repeated', + roles=('default',) * 2, + is_focal=(True,) + (False,), + bots_by_role={ + 'default': { + 'stag_hunt_in_the_matrix__repeated__puppet_tft_tremble_margin_0', + 'stag_hunt_in_the_matrix__repeated__puppet_tft_tremble_margin_1', + }, + }, + ), + stag_hunt_in_the_matrix__repeated_7=ScenarioConfig( + description='partner plays stag for a while then switches to hare', + tags={ + 'half_and_half', + 'flexibility', + }, + substrate='stag_hunt_in_the_matrix__repeated', + roles=('default',) * 2, + is_focal=(True,) + (False,), + bots_by_role={ + 'default': { + 'stag_hunt_in_the_matrix__repeated__puppet_flip_0', + }, + }, + ), + stag_hunt_in_the_matrix__repeated_8=ScenarioConfig( + description=('partner initially plays hare, but if punished, partner ' + + 'then switches to tit-for-tat conditional cooperation'), + tags={ + 'half_and_half', + 'teaching', + 'reciprocity', + 'forgiveness', + }, + substrate='stag_hunt_in_the_matrix__repeated', + roles=('default',) * 2, + is_focal=(True,) + (False,), + bots_by_role={ + 'default': { + 'stag_hunt_in_the_matrix__repeated__puppet_corrigible_0', + }, + }, + ), + stag_hunt_in_the_matrix__repeated_9=ScenarioConfig( + description=('partner initially plays hare, but if punished, partner ' + + 'then switches to noisy tit-for-tat conditional ' + + 'cooperation'), + tags={ + 'half_and_half', + 'teaching', + 'reciprocity', + 'forgiveness', + }, + substrate='stag_hunt_in_the_matrix__repeated', + roles=('default',) * 2, + is_focal=(True,) + (False,), + bots_by_role={ + 'default': { + 'stag_hunt_in_the_matrix__repeated__puppet_corrigible_tremble_0', + }, + }, + ), + territory__inside_out_0=ScenarioConfig( + description='focals are resident and visited by an aggressor', + tags={ 'resident', - }), - substrate='territory_open', - is_focal=(True,) * 8 + (False,) * 1, - bots=frozenset({ - 'territory_open_painter_0', - 'territory_open_painter_1', - 'territory_open_painter_2', - 'territory_open_painter_3', - }), - ), - territory_open_1=_homogeneous_scenario( - description='visiting a population of shaped bots', - tags=frozenset({ - 'convention_following', + }, + substrate='territory__inside_out', + roles=('default',) * 5, + is_focal=(True,) * 4 + (False,), + bots_by_role={ + 'default': { + 'territory__inside_out__aggressor_0', + 'territory__inside_out__aggressor_1', + 'territory__inside_out__aggressor_2', + 'territory__inside_out__aggressor_3', + 'territory__inside_out__aggressor_with_extra_training_0', + }, + }, + ), + territory__inside_out_1=ScenarioConfig( + description='visiting a population of aggressors', + tags={ + 'visitor', + }, + substrate='territory__inside_out', + roles=('default',) * 5, + is_focal=(True,) + (False,) * 4, + bots_by_role={ + 'default': { + 'territory__inside_out__aggressor_0', + 'territory__inside_out__aggressor_1', + 'territory__inside_out__aggressor_2', + 'territory__inside_out__aggressor_3', + 'territory__inside_out__aggressor_with_extra_training_0', + }, + }, + ), + territory__inside_out_2=ScenarioConfig( + description='focals are resident, visited by a bot that does nothing', + tags={ + 'resident', + 'versus_noop', + }, + substrate='territory__inside_out', + roles=('default',) * 5, + is_focal=(True,) * 4 + (False,), + bots_by_role={'default': {'noop_bot'}}, + ), + territory__inside_out_3=ScenarioConfig( + description='focals visit a resident population that does nothing.', + tags={ 'visitor', - }), - substrate='territory_open', - is_focal=(True,) * 1 + (False,) * 8, - bots=frozenset({ - 'territory_open_painter_0', - 'territory_open_painter_1', - 'territory_open_painter_2', - 'territory_open_painter_3', - }), - ), - territory_rooms_0=_homogeneous_scenario( + 'versus_noop', + }, + substrate='territory__inside_out', + roles=('default',) * 5, + is_focal=(True,) + (False,) * 4, + bots_by_role={'default': {'noop_bot'}}, + ), + territory__inside_out_4=ScenarioConfig( + description=('focals are resident, visited by a bot that claims a ' + + 'moderate size territory and mostly tolerates its ' + + 'neighbors'), + tags={ + 'resident', + }, + substrate='territory__inside_out', + roles=('default',) * 5, + is_focal=(True,) * 4 + (False,), + bots_by_role={ + 'default': { + 'territory__inside_out__somewhat_tolerant_bot_0', + 'territory__inside_out__somewhat_tolerant_bot_1',}, + }, + ), + territory__inside_out_5=ScenarioConfig( + description=('focals visit a resident population that claims a ' + + 'moderate size territory and mostly tolerates its ' + + 'neighbors'), + tags={ + 'visitor', + }, + substrate='territory__inside_out', + roles=('default',) * 5, + is_focal=(True,) + (False,) * 4, + bots_by_role={ + 'default': { + 'territory__inside_out__somewhat_tolerant_bot_0', + 'territory__inside_out__somewhat_tolerant_bot_1',}, + }, + ), + territory__open_0=ScenarioConfig( description='focals are resident and visited by an aggressor', - tags=frozenset({ + tags={ 'resident', - }), - substrate='territory_rooms', - is_focal=(True,) * 8 + (False,) * 1, - bots=frozenset({ - 'territory_closed_reply_to_zapper_0', - 'territory_closed_reply_to_zapper_1', - }), - ), - territory_rooms_1=_homogeneous_scenario( + }, + substrate='territory__open', + roles=('default',) * 9, + is_focal=(True,) * 8 + (False,), + bots_by_role={ + 'default': { + 'territory__open__aggressor_0', + 'territory__open__aggressor_1', + 'territory__open__aggressor_2', + 'territory__open__aggressor_3', + 'territory__open__aggressor_with_extra_training_0', + }, + }, + ), + territory__open_1=ScenarioConfig( description='visiting a population of aggressors', - tags=frozenset({ - 'convention_following', + tags={ + 'visitor', + }, + substrate='territory__open', + roles=('default',) * 9, + is_focal=(True,) + (False,) * 8, + bots_by_role={ + 'default': { + 'territory__open__aggressor_0', + 'territory__open__aggressor_1', + 'territory__open__aggressor_2', + 'territory__open__aggressor_3', + 'territory__open__aggressor_with_extra_training_0', + }, + }, + ), + territory__open_2=ScenarioConfig( + description='focals are resident, visited by a bot that does nothing', + tags={ + 'resident', + 'versus_noop', + }, + substrate='territory__open', + roles=('default',) * 9, + is_focal=(True,) * 8 + (False,), + bots_by_role={'default': {'noop_bot'}}, + ), + territory__open_3=ScenarioConfig( + description='focals visit a resident population that does nothing', + tags={ + 'visitor', + 'versus_noop', + }, + substrate='territory__open', + roles=('default',) * 9, + is_focal=(True,) + (False,) * 8, + bots_by_role={'default': {'noop_bot'}}, + ), + territory__rooms_0=ScenarioConfig( + description='focals are resident and visited by an aggressor', + tags={ + 'resident', + }, + substrate='territory__rooms', + roles=('default',) * 9, + is_focal=(True,) * 8 + (False,), + bots_by_role={ + 'default': { + 'territory__rooms__aggressor_0', + 'territory__rooms__aggressor_1', + 'territory__rooms__aggressor_2', + 'territory__rooms__aggressor_3', + 'territory__rooms__aggressor_with_extra_training_0', + }, + }, + ), + territory__rooms_1=ScenarioConfig( + description='visiting a population of aggressors', + tags={ + 'visitor', + }, + substrate='territory__rooms', + roles=('default',) * 9, + is_focal=(True,) + (False,) * 8, + bots_by_role={ + 'default': { + 'territory__rooms__aggressor_0', + 'territory__rooms__aggressor_1', + 'territory__rooms__aggressor_2', + 'territory__rooms__aggressor_3', + 'territory__rooms__aggressor_with_extra_training_0', + }, + }, + ), + territory__rooms_2=ScenarioConfig( + description='focals are resident, visited by a bot that does nothing', + tags={ + 'resident', + 'versus_noop', + }, + substrate='territory__rooms', + roles=('default',) * 9, + is_focal=(True,) * 8 + (False,), + bots_by_role={'default': {'noop_bot'}}, + ), + territory__rooms_3=ScenarioConfig( + description='focals visit a resident population that does nothing', + tags={ 'visitor', - }), - substrate='territory_rooms', - is_focal=(True,) * 1 + (False,) * 8, - bots=frozenset({ - 'territory_closed_reply_to_zapper_0', - 'territory_closed_reply_to_zapper_1', - }), + 'versus_noop', + }, + substrate='territory__rooms', + roles=('default',) * 9, + is_focal=(True,) + (False,) * 8, + bots_by_role={'default': {'noop_bot'}}, ), # keep-sorted end ) diff --git a/meltingpot/python/configs/scenarios/scenario_configs_test.py b/meltingpot/python/configs/scenarios/scenario_configs_test.py index 5e877f8f..b6c9e842 100644 --- a/meltingpot/python/configs/scenarios/scenario_configs_test.py +++ b/meltingpot/python/configs/scenarios/scenario_configs_test.py @@ -1,4 +1,4 @@ -# Copyright 2020 DeepMind Technologies Limited. +# Copyright 2022 DeepMind Technologies Limited. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -18,26 +18,23 @@ from absl.testing import absltest from absl.testing import parameterized +from meltingpot.python import bot as bot_factory from meltingpot.python.configs import bots from meltingpot.python.configs import scenarios from meltingpot.python.configs import substrates SCENARIO_CONFIGS = scenarios.SCENARIO_CONFIGS -AVAILABLE_BOTS = frozenset(bots.BOT_CONFIGS) +AVAILABLE_BOTS = bot_factory.BOTS AVAILABLE_SUBSTRATES = frozenset(substrates.SUBSTRATES) def _is_compatible(bot_name, substrate, role): + if bot_name == bot_factory.NOOP_BOT_NAME: + return True bot_config = bots.BOT_CONFIGS[bot_name] return substrate == bot_config.substrate and role in bot_config.roles -def _substrate_roles(substrate): - # TODO(b/227143834): Replace with functional code when adding new substrates. - del substrate - return {'default'} - - class ScenarioConfigTest(parameterized.TestCase): @parameterized.named_parameters(SCENARIO_CONFIGS.items()) @@ -62,17 +59,13 @@ def test_has_focal_players(self, scenario): self.assertTrue(any(scenario.is_focal)) @parameterized.named_parameters(SCENARIO_CONFIGS.items()) - def test_has_valid_sizes(self, scenario): - substrate = substrates.get_config(scenario.substrate) - with self.subTest('is_focal'): - self.assertLen(scenario.is_focal, substrate.num_players) - with self.subTest('roles'): - self.assertLen(scenario.roles, substrate.num_players) + def test_has_matching_sizes(self, scenario): + self.assertLen(scenario.is_focal, len(scenario.roles)) @parameterized.named_parameters(SCENARIO_CONFIGS.items()) def test_has_valid_roles(self, scenario): - substrate_roles = _substrate_roles(scenario.substrate) - self.assertContainsSubset(set(scenario.roles), substrate_roles) + valid_roles = substrates.get_config(scenario.substrate).valid_roles + self.assertContainsSubset(scenario.roles, valid_roles) @parameterized.named_parameters(SCENARIO_CONFIGS.items()) def test_has_valid_bots(self, scenario): @@ -130,5 +123,6 @@ def test_all_bots_used_by_scenarios(self): unused = AVAILABLE_BOTS - used self.assertEmpty(unused, f'Bots not used by any scenario: {unused!r}') + if __name__ == '__main__': absltest.main() diff --git a/meltingpot/python/configs/substrates/__init__.py b/meltingpot/python/configs/substrates/__init__.py index 6d681600..ddab609e 100644 --- a/meltingpot/python/configs/substrates/__init__.py +++ b/meltingpot/python/configs/substrates/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2020 DeepMind Technologies Limited. +# Copyright 2022 DeepMind Technologies Limited. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,20 +11,48 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """Configs for substrates.""" +from collections.abc import Mapping, Sequence, Set +import dataclasses +import functools import importlib -from typing import AbstractSet +from typing import Any from ml_collections import config_dict +def _validated(build): + """And adds validation checks to build function.""" + + def lab2d_settings_builder( + *, + config: config_dict.ConfigDict, + roles: Sequence[str], + ) -> Mapping[str, Any]: + """Builds the lab2d settings for the specified config and roles. + + Args: + config: the meltingpot substrate config. + roles: the role for each corresponding player. + + Returns: + The lab2d settings for the substrate. + """ + invalid_roles = set(roles) - config.valid_roles + if invalid_roles: + raise ValueError(f'Invalid roles: {invalid_roles!r}. Must be one of ' + f'{config.valid_roles!r}') + return build(config=config, roles=roles) + + return lab2d_settings_builder + + def get_config(substrate: str) -> config_dict.ConfigDict: """Returns the specified config. Args: - substrate: the name of the substrate. + substrate: the name of the substrate. Must be in SUBSTRATES. Raises: ModuleNotFoundError: the config does not exist. @@ -33,31 +61,61 @@ def get_config(substrate: str) -> config_dict.ConfigDict: raise ValueError(f'{substrate} not in {SUBSTRATES}.') path = f'{__name__}.{substrate}' module = importlib.import_module(path) - return module.get_config().lock() + config = module.get_config() + with config.unlocked(): + config.lab2d_settings_builder = _validated(module.build) + return config.lock() -SUBSTRATES: AbstractSet[str] = frozenset({ +SUBSTRATES: Set[str] = frozenset({ # keep-sorted start - 'allelopathic_harvest', - 'arena_running_with_scissors_in_the_matrix', - 'bach_or_stravinsky_in_the_matrix', - 'capture_the_flag', - 'chemistry_branched_chain_reaction', - 'chemistry_metabolic_cycles', - 'chicken_in_the_matrix', + 'allelopathic_harvest__open', + 'bach_or_stravinsky_in_the_matrix__arena', + 'bach_or_stravinsky_in_the_matrix__repeated', + 'boat_race__eight_races', + 'chemistry__three_metabolic_cycles', + 'chemistry__three_metabolic_cycles_with_plentiful_distractors', + 'chemistry__two_metabolic_cycles', + 'chemistry__two_metabolic_cycles_with_distractors', + 'chicken_in_the_matrix__arena', + 'chicken_in_the_matrix__repeated', 'clean_up', - 'collaborative_cooking_impassable', - 'collaborative_cooking_passable', - 'commons_harvest_closed', - 'commons_harvest_open', - 'commons_harvest_partnership', - 'king_of_the_hill', - 'prisoners_dilemma_in_the_matrix', - 'pure_coordination_in_the_matrix', - 'rationalizable_coordination_in_the_matrix', - 'running_with_scissors_in_the_matrix', - 'stag_hunt_in_the_matrix', - 'territory_open', - 'territory_rooms', + 'coins', + 'collaborative_cooking__asymmetric', + 'collaborative_cooking__circuit', + 'collaborative_cooking__cramped', + 'collaborative_cooking__crowded', + 'collaborative_cooking__figure_eight', + 'collaborative_cooking__forced', + 'collaborative_cooking__ring', + 'commons_harvest__closed', + 'commons_harvest__open', + 'commons_harvest__partnership', + 'coop_mining', + 'daycare', + 'externality_mushrooms__dense', + 'factory_commons__either_or', + 'fruit_market__concentric_rivers', + 'gift_refinements', + 'paintball__capture_the_flag', + 'paintball__king_of_the_hill', + 'predator_prey__alley_hunt', + 'predator_prey__open', + 'predator_prey__orchard', + 'predator_prey__random_forest', + 'prisoners_dilemma_in_the_matrix__arena', + 'prisoners_dilemma_in_the_matrix__repeated', + 'pure_coordination_in_the_matrix__arena', + 'pure_coordination_in_the_matrix__repeated', + 'rationalizable_coordination_in_the_matrix__arena', + 'rationalizable_coordination_in_the_matrix__repeated', + 'running_with_scissors_in_the_matrix__arena', + 'running_with_scissors_in_the_matrix__one_shot', + 'running_with_scissors_in_the_matrix__repeated', + 'stag_hunt_in_the_matrix__arena', + 'stag_hunt_in_the_matrix__repeated', + 'territory__inside_out', + 'territory__open', + 'territory__rooms', # keep-sorted end }) diff --git a/meltingpot/python/configs/substrates/allelopathic_harvest.py b/meltingpot/python/configs/substrates/allelopathic_harvest.py index 7c27f696..19cd8d87 100644 --- a/meltingpot/python/configs/substrates/allelopathic_harvest.py +++ b/meltingpot/python/configs/substrates/allelopathic_harvest.py @@ -1,4 +1,4 @@ -# Copyright 2020 DeepMind Technologies Limited. +# Copyright 2022 DeepMind Technologies Limited. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,8 +13,6 @@ # limitations under the License. """Configuration for Allelopathic Harvest. -Example video: https://youtu.be/ESugMMdKLxI - This substrate contains three different varieties of berry (red, green, & blue) and a fixed number of berry patches, which could be replanted to grow any color variety of berry. The growth rate of each berry variety depends linearly on the @@ -35,9 +33,10 @@ preferences. arXiv preprint arXiv:2010.09054. """ -from typing import Any, Dict +from typing import Any, Dict, Mapping, Sequence from ml_collections import config_dict + from meltingpot.python.utils.substrates import colors from meltingpot.python.utils.substrates import game_object_utils from meltingpot.python.utils.substrates import shapes @@ -45,8 +44,6 @@ PrefabConfig = game_object_utils.PrefabConfig -# How many simultaneous players in the game. -NUM_PLAYERS = 16 # How many different colors of berries. NUM_BERRY_TYPES = 3 @@ -83,81 +80,115 @@ 21PPPPPPP12P23P1PPPPPP13P3P11 """ -MARKING_LEVEL_1 = """ -xxxxxxxxxxxxxxxx -xxxxxxxxxxxxxxxx -xxxxxxxxxxxxxxxx -xxxxxxxxxxxxxxxx -xxxxxxxxxxxxxxxx -xxxxxxxxxxxxxxxx -xxxxxxxxxxxxxxxx -xxxxxxxxxxxxxxxx -xxxxxxxxxxxxxxxx -xxxxxxxxxxxxxxxx -xxxxxxxxxxxxxxxx -xxxxxxxxxxxxxxxx -xxxxxxxxxxxxxxxx -xxxxxxxxxxxxxxxx -xxxxxxxxxxxxxxxx -xxxxxxxxxxxxxxxx -""" - -MARKING_LEVEL_2 = """ -xxxxxx****xxxxxx -xxxxxx****xxxxxx -xxxxxx****xxxxxx -xxxxxx****xxxxxx -xxxxxxxxxxxxxxxx -xxxxxxxxxxxxxxxx -xxxxxxxxxxxxxxxx -xxxxxxxxxxxxxxxx -xxxxxxxxxxxxxxxx -xxxxxxxxxxxxxxxx -xxxxxxxxxxxxxxxx -xxxxxxxxxxxxxxxx -xxxxxxxxxxxxxxxx -xxxxxxxxxxxxxxxx -xxxxxxxxxxxxxxxx -xxxxxxxxxxxxxxxx -""" - -MARKING_LEVEL_3 = """ -xxxx********xxxx -xxxx********xxxx -xxxx********xxxx -xxxx********xxxx -xxxxxxxxxxxxxxxx -xxxxxxxxxxxxxxxx -xxxx**xxxx**xxxx -xxxx**xxxx**xxxx -xxxxxxxxxxxxxxxx -xxxxxxxxxxxxxxxx -xxxxxxxxxxxxxxxx -xxxxxxxxxxxxxxxx -xxxxxxxxxxxxxxxx -xxxxxxxxxxxxxxxx -xxxx**xxxx**xxxx -xxxx**xxxx**xxxx -""" - # Map a character to the prefab it represents in the ASCII map. CHAR_PREFAB_MAP = { - "P": "spawn_point", + "P": {"type": "all", "list": ["floor", "spawn_point"]}, "W": "wall", - "1": "berry_1", - "2": "berry_2", - "3": "berry_3", + "1": {"type": "all", "list": ["soil", "berry_1"]}, + "2": {"type": "all", "list": ["soil", "berry_2"]}, + "3": {"type": "all", "list": ["soil", "berry_3"]}, } + +# These need to be orthogonal, same intensity and variance. COLORS = [ - (200, 0, 0, 255), # 'Red' - (0, 200, 0, 255), # 'Green' - (0, 0, 200, 255), # 'Blue' + (200, 10, 10, 255), # 'Red' + (10, 200, 10, 255), # 'Green' + (10, 10, 200, 255), # 'Blue' ] +ROLE_TO_MOST_TASTY_BERRY_IDX = { + "player_who_likes_red": 0, + "player_who_likes_green": 1, + "player_who_likes_blue": 2, +} + +MARKING_SPRITE = """ +oxxxxxxo +xoxxxxox +xxoxxoxx +xxxooxxx +xxxooxxx +xxoxxoxx +xoxxxxox +oxxxxxxo +""" + + +def get_marking_palette(alpha: float) -> Dict[str, Sequence[int]]: + alpha_uint8 = int(alpha * 255) + assert alpha_uint8 >= 0.0 and alpha_uint8 <= 255, "Color value out of range." + return {"x": shapes.ALPHA, "o": (0, 0, 0, alpha_uint8)} _NUM_DIRECTIONS = 4 # NESW +FLOOR = { + "name": "floor", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "floor", + "stateConfigs": [{ + "state": "floor", + "layer": "background", + "sprite": "Floor", + }], + } + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["Floor",], + "spriteShapes": [shapes.DIRT_PATTERN], + "palettes": [{ + "x": (55, 55, 55, 255), + "X": (60, 60, 60, 255), + }], + "noRotates": [True] + } + }, + { + "component": "Transform", + }, + ] +} + +SOIL = { + "name": "soil", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "soil", + "stateConfigs": [{ + "state": "soil", + "layer": "background", + "sprite": "Soil", + }], + } + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["Soil",], + "spriteShapes": [shapes.SOIL], + "palettes": [{ + "D": (40, 40, 40, 255), + "d": (50, 50, 50, 255), + "X": (60, 60, 60, 255), + "x": (70, 70, 70, 255)}], + "noRotates": [False] + } + }, + { + "component": "Transform", + }, + ] +} + WALL = { "name": "wall", "components": [ @@ -179,8 +210,8 @@ "component": "Appearance", "kwargs": { "spriteNames": ["Wall"], - # This color is a dark shade of purple. - "spriteRGBColors": [(66, 28, 82)] + # This color is a dark shade of grey. + "spriteRGBColors": [(40, 40, 40)] } }, { @@ -298,50 +329,47 @@ def create_berry_prefab(lua_index: int): "RipeBerry_3", ], "spriteShapes": [ - shapes.UNRIPE_BERRY, - shapes.UNRIPE_BERRY, - shapes.UNRIPE_BERRY, - shapes.BERRY, - shapes.BERRY, - shapes.BERRY, + shapes.BERRY_SEEDS, + shapes.BERRY_SEEDS, + shapes.BERRY_SEEDS, + shapes.BERRY_RIPE, + shapes.BERRY_RIPE, + shapes.BERRY_RIPE, ], "palettes": [ # Unripe colors { - "*": COLORS[0], - "@": shapes.scale_color(COLORS[0], 1.5), - "#": (255, 255, 255, 255), + "o": COLORS[0], + "O": shapes.scale_color(COLORS[0], 1.5), "x": (0, 0, 0, 0) }, { - "*": COLORS[1], - "@": shapes.scale_color(COLORS[1], 1.5), - "#": (255, 255, 255, 255), + "o": COLORS[1], + "O": shapes.scale_color(COLORS[1], 1.5), "x": (0, 0, 0, 0) }, { - "*": COLORS[2], - "@": shapes.scale_color(COLORS[2], 1.5), - "#": (255, 255, 255, 255), + "o": COLORS[2], + "O": shapes.scale_color(COLORS[2], 1.5), "x": (0, 0, 0, 0) }, # Ripe colors { - "*": COLORS[0], - "@": shapes.scale_color(COLORS[0], 1.5), - "#": (255, 255, 255, 255), + "d": COLORS[0], + "O": shapes.scale_color(COLORS[0], 1.5), + "o": shapes.scale_color(COLORS[0], 1.25), "x": (0, 0, 0, 0) }, { - "*": COLORS[1], - "@": shapes.scale_color(COLORS[1], 1.5), - "#": (255, 255, 255, 255), + "d": COLORS[1], + "O": shapes.scale_color(COLORS[1], 1.5), + "o": shapes.scale_color(COLORS[1], 1.25), "x": (0, 0, 0, 0) }, { - "*": COLORS[2], - "@": shapes.scale_color(COLORS[2], 1.5), - "#": (255, 255, 255, 255), + "d": COLORS[2], + "O": shapes.scale_color(COLORS[2], 1.5), + "o": shapes.scale_color(COLORS[2], 1.25), "x": (0, 0, 0, 0) }, ], @@ -384,11 +412,14 @@ def create_berry_prefab(lua_index: int): return berry -def create_avatar_object(player_idx: int) -> Dict[str, Any]: +def create_avatar_object(player_idx: int, + most_tasty_berry_idx: int) -> Dict[str, Any]: """Return the avatar for the player numbered `player_idx`.""" # Lua is 1-indexed. lua_index = player_idx + 1 + lua_most_tasty_berry_idx = most_tasty_berry_idx + 1 + live_state_name = "player{}".format(lua_index) avatar_sprite_name = "avatarSprite{}".format(lua_index) avatar_object = { @@ -420,12 +451,12 @@ def create_avatar_object(player_idx: int) -> Dict[str, Any]: "kwargs": { "renderMode": "ascii_shape", "spriteNames": [avatar_sprite_name], - "spriteShapes": [shapes.AVATAR_DEFAULT], + "spriteShapes": [shapes.CUTE_AVATAR], # This color is white. It should never appear in gameplay. So # if a white colored avatar does appear then something is # broken. "palettes": [shapes.get_palette((255, 255, 255))], - "noRotates": [False] + "noRotates": [True] } }, { @@ -465,9 +496,11 @@ def create_avatar_object(player_idx: int) -> Dict[str, Any]: "cooldownTime": 4, "beamLength": 3, "beamRadius": 1, + "beamColor": (253, 253, 253), # the zapper beam is white. "framesTillRespawn": 25, "penaltyForBeingZapped": 0, # leave this always at 0. "rewardForZapping": 0, # leave this always at 0. + # GraduatedSanctionsMarking handles removal instead of Zapper. "removeHitPlayer": False, } }, @@ -477,7 +510,7 @@ def create_avatar_object(player_idx: int) -> Dict[str, Any]: { "component": "Taste", "kwargs": { - "mostTastyBerryId": 1, # A taste for the red berry. + "mostTastyBerryId": lua_most_tasty_berry_idx, "rewardMostTasty": 2, } }, @@ -553,6 +586,8 @@ def create_avatar_object(player_idx: int) -> Dict[str, Any]: # PREFABS is a dictionary mapping names to template game objects that can # be cloned and placed in multiple locations accoring to an ascii map. PREFABS = { + "floor": FLOOR, + "soil": SOIL, "wall": WALL, "spawn_point": SPAWN_POINT, "berry_1": create_berry_prefab(1), @@ -602,7 +637,7 @@ def create_avatar_object(player_idx: int) -> Dict[str, Any]: # The Scene objece is a non-physical object, it components implement global # logic. In this case, that includes holding the global berry counters to # implement the regrowth rate, as well as some of the observations. -def create_scene(): +def create_scene(num_players: int): """Creates the global scene.""" scene = { "name": "scene", @@ -623,14 +658,14 @@ def create_scene(): "component": "GlobalBerryTracker", "kwargs": { "numBerryTypes": NUM_BERRY_TYPES, - "numPlayers": NUM_PLAYERS, + "numPlayers": num_players, } }, { "component": "GlobalZapTracker", "kwargs": { "numBerryTypes": NUM_BERRY_TYPES, - "numPlayers": NUM_PLAYERS, + "numPlayers": num_players, } }, { @@ -638,7 +673,7 @@ def create_scene(): "kwargs": { "metrics": [ {"type": "tensor.Int32Tensor", - "shape": (NUM_PLAYERS, NUM_PLAYERS), + "shape": (num_players, num_players), "variable": "playerZapMatrix"}, ] } @@ -667,13 +702,13 @@ def create_scene(): {"name": "COLORING_BY_PLAYER", "type": "tensor.Int32Tensor", - "shape": (NUM_BERRY_TYPES, NUM_PLAYERS), + "shape": (NUM_BERRY_TYPES, num_players), "component": "GlobalBerryTracker", "variable": "coloringByPlayerMatrix"}, {"name": "EATING_TYPES_BY_PLAYER", "type": "tensor.Int32Tensor", - "shape": (NUM_BERRY_TYPES, NUM_PLAYERS), + "shape": (NUM_BERRY_TYPES, num_players), "component": "GlobalBerryTracker", "variable": "eatingTypesByPlayerMatrix"}, @@ -691,7 +726,7 @@ def create_scene(): {"name": "PLAYER_TIMEOUT_COUNT", "type": "tensor.Int32Tensor", - "shape": (NUM_PLAYERS, NUM_PLAYERS), + "shape": (num_players, num_players), "component": "GlobalZapTracker", "variable": "fullZapCountMatrix"}, @@ -721,7 +756,7 @@ def create_scene(): {"name": "WHO_ZAPPED_WHO", "type": "tensor.Int32Tensor", - "shape": (NUM_PLAYERS, NUM_PLAYERS), + "shape": (num_players, num_players), "component": "GlobalMetricHolder", "variable": "playerZapMatrix"}, ] @@ -772,11 +807,13 @@ def create_marking_overlay(player_idx: int) -> Dict[str, Any]: "spriteNames": ["sprite_for_level_1", "sprite_for_level_2", "sprite_for_level_3"], - "spriteShapes": [MARKING_LEVEL_1, - MARKING_LEVEL_2, - MARKING_LEVEL_3], - "palettes": [shapes.get_palette((205, 205, 205))] * 3, - "noRotates": [False] * 3 + "spriteShapes": [MARKING_SPRITE, + MARKING_SPRITE, + MARKING_SPRITE], + "palettes": [get_marking_palette(0.0), + get_marking_palette(0.5), + get_marking_palette(1.0)], + "noRotates": [True] * 3 } }, { @@ -864,12 +901,12 @@ def create_colored_avatar_overlay(player_idx: int) -> Dict[str, Any]: "ColoredAvatar_{}".format(i) for i in range(1, NUM_BERRY_TYPES + 1) ], - "spriteShapes": [shapes.AVATAR_DEFAULT] * + "spriteShapes": [shapes.CUTE_AVATAR] * (NUM_BERRY_TYPES + 1), "palettes": [shapes.get_palette((125, 125, 125))] + [shapes.get_palette(beam_color) for beam_color in COLORS], - "noRotates": [False] * (NUM_BERRY_TYPES + 1) + "noRotates": [True] * (NUM_BERRY_TYPES + 1) } }, { @@ -885,12 +922,19 @@ def create_colored_avatar_overlay(player_idx: int) -> Dict[str, Any]: return overlay_object -def create_avatar_and_associated_objects(num_players): +def create_avatar_and_associated_objects( + roles: Sequence[str]): """Returns list of avatar objects and associated other objects.""" avatar_objects = [] additional_objects = [] - for player_idx in range(num_players): - avatar_object = create_avatar_object(player_idx) + for player_idx, role in enumerate(roles): + if role == "default": + most_tasty_berry_idx = player_idx % 2 + else: + most_tasty_berry_idx = ROLE_TO_MOST_TASTY_BERRY_IDX[role] + + avatar_object = create_avatar_object( + player_idx=player_idx, most_tasty_berry_idx=most_tasty_berry_idx) avatar_objects.append(avatar_object) overlay_object = create_colored_avatar_overlay(player_idx) @@ -901,44 +945,12 @@ def create_avatar_and_associated_objects(num_players): return avatar_objects + additional_objects -def create_lab2d_settings( - ascii_map_string: str, - num_players: int, -) -> Dict[str, Any]: - """Returns the lab2d settings. - - Args: - ascii_map_string: ascii map. - num_players: the number of players. - """ - game_objects = create_avatar_and_associated_objects(NUM_PLAYERS) - settings = { - "levelName": "allelopathic_harvest", - "levelDirectory": - "meltingpot/lua/levels", - "numPlayers": num_players, - "maxEpisodeLengthFrames": 2000, - "spriteSize": 8, - "topology": "TORUS", # Choose from ["BOUNDED", "TORUS"], - "simulation": { - "map": ascii_map_string, - "gameObjects": game_objects, - "scene": create_scene(), - "prefabs": PREFABS, - "charPrefabMap": CHAR_PREFAB_MAP, - "playerPalettes": [PLAYER_COLOR_PALETTES[0]] * NUM_PLAYERS, - }, - } - return settings - - -def get_config(factory=create_lab2d_settings): - """Default configuration for training on the allelopathic harvest level.""" +def get_config(): + """Default configuration for the allelopathic harvest level.""" config = config_dict.ConfigDict() - # Basic configuration. - config.num_players = NUM_PLAYERS - config.lab2d_settings = factory(DEFAULT_ASCII_MAP, config.num_players) + config.episode_timesteps = 2000 + config.ascii_map = DEFAULT_ASCII_MAP # Action set configuration. config.action_set = ACTION_SET @@ -948,27 +960,9 @@ def get_config(factory=create_lab2d_settings): "POSITION", "ORIENTATION", "READY_TO_SHOOT", - # Debug observations: - "COLOR_ID", - "MOST_TASTY_BERRY_ID", - "AVATAR_IDS_IN_VIEW", - "AVATAR_IDS_IN_RANGE_TO_ZAP", ] config.global_observation_names = [ "WORLD.RGB", - "WORLD.PLAYER_TIMEOUT_COUNT", - "WORLD.RIPE_BERRIES_BY_TYPE", - "WORLD.UNRIPE_BERRIES_BY_TYPE", - "WORLD.BERRIES_BY_TYPE", - "WORLD.COLOR_BY_COLOR_ZAP_COUNTS", - "WORLD.COLOR_BY_TASTE_ZAP_COUNTS", - "WORLD.TASTE_BY_COLOR_ZAP_COUNTS", - "WORLD.TASTE_BY_TASTE_ZAP_COUNTS", - "WORLD.COLORING_BY_PLAYER", - "WORLD.EATING_TYPES_BY_PLAYER", - "WORLD.BERRIES_PER_TYPE_BY_COLOR_OF_COLORER", - "WORLD.BERRIES_PER_TYPE_BY_TASTE_OF_COLORER", - "WORLD.WHO_ZAPPED_WHO", ] # The specs of the environment (from a single-agent perspective). @@ -978,30 +972,40 @@ def get_config(factory=create_lab2d_settings): "POSITION": specs.OBSERVATION["POSITION"], "ORIENTATION": specs.OBSERVATION["ORIENTATION"], "READY_TO_SHOOT": specs.OBSERVATION["READY_TO_SHOOT"], - "COLOR_ID": specs.float64(), - "MOST_TASTY_BERRY_ID": specs.float64(), - "AVATAR_IDS_IN_VIEW": specs.int32(NUM_PLAYERS), - "AVATAR_IDS_IN_RANGE_TO_ZAP": specs.int32(NUM_PLAYERS), "WORLD.RGB": specs.rgb(240, 232), - "WORLD.PLAYER_TIMEOUT_COUNT": specs.int32(NUM_PLAYERS, NUM_PLAYERS), - "WORLD.RIPE_BERRIES_BY_TYPE": specs.int32(NUM_BERRY_TYPES), - "WORLD.UNRIPE_BERRIES_BY_TYPE": specs.int32(NUM_BERRY_TYPES), - "WORLD.BERRIES_BY_TYPE": specs.int32(NUM_BERRY_TYPES), - "WORLD.COLOR_BY_COLOR_ZAP_COUNTS": specs.int32( - NUM_BERRY_TYPES + 1, NUM_BERRY_TYPES + 1), - "WORLD.COLOR_BY_TASTE_ZAP_COUNTS": specs.int32( - NUM_BERRY_TYPES + 1, NUM_BERRY_TYPES), - "WORLD.TASTE_BY_COLOR_ZAP_COUNTS": specs.int32( - NUM_BERRY_TYPES, NUM_BERRY_TYPES + 1), - "WORLD.TASTE_BY_TASTE_ZAP_COUNTS": specs.int32( - NUM_BERRY_TYPES, NUM_BERRY_TYPES), - "WORLD.COLORING_BY_PLAYER": specs.int32(NUM_BERRY_TYPES, NUM_PLAYERS), - "WORLD.EATING_TYPES_BY_PLAYER": specs.int32(NUM_BERRY_TYPES, NUM_PLAYERS), - "WORLD.BERRIES_PER_TYPE_BY_COLOR_OF_COLORER": specs.int32( - NUM_BERRY_TYPES, NUM_BERRY_TYPES + 1), - "WORLD.BERRIES_PER_TYPE_BY_TASTE_OF_COLORER": specs.int32( - NUM_BERRY_TYPES, NUM_BERRY_TYPES), - "WORLD.WHO_ZAPPED_WHO": specs.int32(NUM_PLAYERS, NUM_PLAYERS), }) + # The roles assigned to each player. + config.valid_roles = frozenset({"default", + "player_who_likes_red", + "player_who_likes_green", + "player_who_likes_blue",}) + return config + + +def build( + roles: Sequence[str], + config: config_dict.ConfigDict, +) -> Mapping[str, Any]: + """Build the allelopathic_harvest substrate given roles.""" + num_players = len(roles) + game_objects = create_avatar_and_associated_objects(roles=roles) + # Build the rest of the substrate definition. + substrate_definition = dict( + levelName="allelopathic_harvest", + levelDirectory="meltingpot/lua/levels", + numPlayers=num_players, + maxEpisodeLengthFrames=config.episode_timesteps, + spriteSize=8, + topology="TORUS", # Choose from ["BOUNDED", "TORUS"], + simulation={ + "map": config.ascii_map, + "gameObjects": game_objects, + "scene": create_scene(num_players), + "prefabs": PREFABS, + "charPrefabMap": CHAR_PREFAB_MAP, + "playerPalettes": [PLAYER_COLOR_PALETTES[0]] * num_players, + }, + ) + return substrate_definition diff --git a/meltingpot/python/configs/substrates/allelopathic_harvest__open.py b/meltingpot/python/configs/substrates/allelopathic_harvest__open.py new file mode 100644 index 00000000..cacefbf0 --- /dev/null +++ b/meltingpot/python/configs/substrates/allelopathic_harvest__open.py @@ -0,0 +1,84 @@ +# Copyright 2022 DeepMind Technologies Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Configuration for Allelopathic Harvest (open). + +Example video: https://youtu.be/Bb0duMG0YF4 + +This substrate contains three different varieties of berry (red, green, & blue) +and a fixed number of berry patches, which could be replanted to grow any color +variety of berry. The growth rate of each berry variety depends linearly on the +fraction that that color comprises of the total. Players have three planting +actions with which they can replant berries in their chosen color. All players +prefer to eat red berries (reward of 2 per red berry they eat versus a reward +of 1 per other colored berry). Players can achieve higher return by selecting +just one single color of berry to plant, but which one to pick is, in principle, +difficult to coordinate (start-up problem) -- though in this case all prefer +red berries, suggesting a globally rational chioce. They also always prefer to +eat berries over spending time planting (free-rider problem). + +Allelopathic Harvest was first described in Koster et al. (2020). + +Köster, R., McKee, K.R., Everett, R., Weidinger, L., Isaac, W.S., Hughes, E., +Duenez-Guzman, E.A., Graepel, T., Botvinick, M. and Leibo, J.Z., 2020. +Model-free conventions in multi-agent reinforcement learning with heterogeneous +preferences. arXiv preprint arXiv:2010.09054. +""" + +from meltingpot.python.configs.substrates import allelopathic_harvest as base_config + +OPEN_ASCII_MAP = """ +333PPPP12PPP322P32PPP1P13P3P3 +1PPPP2PP122PPP3P232121P2PP2P1 +P1P3P11PPP13PPP31PPPP23PPPPPP +PPPPP2P2P1P2P3P33P23PP2P2PPPP +P1PPPPPPP2PPP12311PP3321PPPPP +133P2PP2PPP3PPP1PPP2213P112P1 +3PPPPPPPPPPPPP31PPPPPP1P3112P +PP2P21P21P33PPPPPPP3PP2PPPP1P +PPPPP1P1P32P3PPP22PP1P2PPPP2P +PPP3PP3122211PPP2113P3PPP1332 +PP12132PP1PP1P321PP1PPPPPP1P3 +PPP222P12PPPP1PPPP1PPP321P11P +PPP2PPPP3P2P1PPP1P23322PP1P13 +23PPP2PPPP2P3PPPP3PP3PPP3PPP2 +2PPPP3P3P3PP3PP3P1P3PP11P21P1 +21PPP2PP331PP3PPP2PPPPP2PP3PP +P32P2PP2P1PPPPPPP12P2PPP1PPPP +P3PP3P2P21P3PP2PP11PP1323P312 +2P1PPPPP1PPP1P2PPP3P32P2P331P +PPPPP1312P3P2PPPP3P32PPPP2P11 +P3PPPP221PPP2PPPPPPPP1PPP311P +32P3PPPPPPPPPP31PPPP3PPP13PPP +PPP3PPPPP3PPPPPP232P13PPPPP1P +P1PP1PPP2PP3PPPPP33321PP2P3PP +P13PPPP1P333PPPP2PP213PP2P3PP +1PPPPP3PP2P1PP21P3PPPP231P2PP +1331P2P12P2PPPP2PPP3P23P21PPP +P3P131P3PPP13P1PPP222PPPP11PP +2P3PPPPPPPP2P323PPP2PPP1PPP2P +21PPPPPPP12P23P1PPPPPP13P3P11 +""" + +build = base_config.build + + +def get_config(): + """Adjust default configuration.""" + config = base_config.get_config() + config.ascii_map = OPEN_ASCII_MAP + + config.default_player_roles = ( + ("player_who_likes_red",) * 8 + ("player_who_likes_green",) * 8) + + return config diff --git a/meltingpot/python/configs/substrates/bach_or_stravinsky_in_the_matrix.py b/meltingpot/python/configs/substrates/bach_or_stravinsky_in_the_matrix__arena.py similarity index 79% rename from meltingpot/python/configs/substrates/bach_or_stravinsky_in_the_matrix.py rename to meltingpot/python/configs/substrates/bach_or_stravinsky_in_the_matrix__arena.py index 9684aeec..926733f8 100644 --- a/meltingpot/python/configs/substrates/bach_or_stravinsky_in_the_matrix.py +++ b/meltingpot/python/configs/substrates/bach_or_stravinsky_in_the_matrix__arena.py @@ -1,4 +1,4 @@ -# Copyright 2020 DeepMind Technologies Limited. +# Copyright 2022 DeepMind Technologies Limited. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,7 +13,7 @@ # limitations under the License. """Configuration for Bach or Stravinsky in the Matrix. -Example video: https://youtu.be/SiFjSyCp2Ss +Example video: https://youtu.be/QstXaLjiqK4 See _Running with Scissors in the Matrix_ for a general description of the game dynamics. Here the payoff matrix represents the Bach or Stravinsky (battle @@ -29,10 +29,11 @@ Players have the default `11 x 11` (off center) observation window. """ -import copy -from typing import Any, Dict, Iterable, Sequence, Tuple +from typing import Any, Dict, Mapping, Sequence, Tuple from ml_collections import config_dict + +from meltingpot.python.configs.substrates import the_matrix from meltingpot.python.utils.substrates import colors from meltingpot.python.utils.substrates import game_object_utils from meltingpot.python.utils.substrates import shapes @@ -52,9 +53,7 @@ RESOURCE2_HIGHLIGHT_COLOR = (230, 170, 157, 255) RESOURCE2_COLOR_DATA = (RESOURCE2_COLOR, RESOURCE2_HIGHLIGHT_COLOR) -# The procedural generator replaces all 'a' chars in the default map with chars -# representing specific resources, i.e. with either '1' or '2'. -DEFAULT_ASCII_MAP = """ +ASCII_MAP = """ WWWWWWWWWWWWWWWWWWWWWWWWW WPPPP W W PPPPW WPPPP PPPPW @@ -114,10 +113,6 @@ }, { "component": "Transform", - "kwargs": { - "position": (0, 0), - "orientation": "N" - } }, { "component": "Appearance", @@ -157,10 +152,6 @@ }, { "component": "Transform", - "kwargs": { - "position": (0, 0), - "orientation": "N" - } }, ] } @@ -221,16 +212,14 @@ def create_scene(): }, { "component": "Transform", - "kwargs": { - "position": (0, 0), - "orientation": "N" - }, }, { "component": "TheMatrix", "kwargs": { + # Prevent interaction before both interactors have collected + # at least one resource. + "disallowUnreadyInteractions": True, "randomTieBreaking": True, - "zero_initial_inventory": True, "matrix": [ # row player chooses a row of this matrix. # B S @@ -243,8 +232,20 @@ def create_scene(): [2, 0], # B [0, 3], # S ], + "resultIndicatorColorIntervals": [ + # red # yellow # green # blue + (0.0, 0.5), (0.5, 1.5), (1.5, 2.5), (2.5, 3.0) + ], } }, + { + "component": "StochasticIntervalEpisodeEnding", + "kwargs": { + "minimumFramesPerEpisode": 1000, + "intervalLength": 100, # Set equal to unroll length. + "probabilityTerminationPerInterval": 0.2 + } + } ] } return scene @@ -271,10 +272,6 @@ def create_resource_prefab(resource_id, color_data): }, { "component": "Transform", - "kwargs": { - "position": (0, 0), - "orientation": "N" - }, }, { "component": "Appearance", @@ -294,17 +291,17 @@ def create_resource_prefab(resource_id, color_data): "resourceClass": resource_id, "visibleType": resource_name, "waitState": resource_name + "_wait", - "groupToRespawn": "resourceWaits", - "regenerationRate": 0.005, - "regenerationDelay": 50 + "regenerationRate": 0.04, + "regenerationDelay": 10, }, }, { "component": "Destroyable", "kwargs": { - "visibleType": resource_name, "waitState": resource_name + "_wait", - "initialHealth": 1, + # It is possible to destroy resources but takes concerted + # effort to do so by zapping them `initialHealth` times. + "initialHealth": 3, }, }, ] @@ -359,10 +356,6 @@ def create_avatar_object(player_idx: int, }, { "component": "Transform", - "kwargs": { - "position": (0, 0), - "orientation": "N" - } }, { "component": "Appearance", @@ -405,14 +398,22 @@ def create_avatar_object(player_idx: int, { "component": "GameInteractionZapper", "kwargs": { - "cooldownTime": 32, + "cooldownTime": 2, "beamLength": 3, "beamRadius": 1, - "framesTillRespawn": 200, + "framesTillRespawn": 50, "numResources": NUM_RESOURCES, + "endEpisodeOnFirstInteraction": False, + # Reset both players' inventories after each interaction. "reset_winner_inventory": True, "reset_loser_inventory": True, + # Both players get removed after each interaction. "losingPlayerDies": True, + "winningPlayerDies": True, + # `freezeOnInteraction` is the number of frames to display the + # interaction result indicator, freeze, and delay delivering + # all results of interacting. + "freezeOnInteraction": 16, } }, { @@ -436,6 +437,9 @@ def create_avatar_object(player_idx: int, "component": "Taste", "kwargs": { "mostTastyResourceClass": -1, # -1 indicates no preference. + # No resource is most tasty when mostTastyResourceClass == -1. + "mostTastyReward": 1.0, + "defaultTastinessReward": 0.0, } }, { @@ -459,6 +463,7 @@ def create_avatar_object(player_idx: int, "component": "GameInteractionZapper", "variable": "latest_interaction_inventories", }, + *the_matrix.get_cumulant_metric_configs(NUM_RESOURCES), ] } }, @@ -468,63 +473,37 @@ def create_avatar_object(player_idx: int, return avatar_object -def create_avatar_objects(num_players: int) -> Sequence[PrefabConfig]: - """Returns all game objects for the map. - - Args: - num_players: number of players to create avatars for. - """ +def create_avatar_objects( + roles: Sequence[str], +) -> Sequence[PrefabConfig]: + """Returns all avatar game objects.""" avatar_objects = [] - for player_idx in range(num_players): - if player_idx % 2 == 0: - row_player = True - color = (50, 100, 200) - elif player_idx % 2 == 1: - row_player = False - color = (200, 100, 50) + for player_idx, role in enumerate(roles): + if role == "default": + if player_idx % 2 == 0: + row_player = True + color = (50, 100, 200) + elif player_idx % 2 == 1: + row_player = False + color = (200, 100, 50) + else: + if role == "bach_fan": + row_player = True + color = (50, 100, 200) + elif role == "stravinsky_fan": + row_player = False + color = (200, 100, 50) avatar = create_avatar_object(player_idx, color, row_player) avatar_objects.append(avatar) + readiness_marker = the_matrix.create_ready_to_interact_marker(player_idx) + avatar_objects.append(readiness_marker) return avatar_objects -def create_lab2d_settings( - num_players: int, - ascii_map_string: str, - settings_overrides: Iterable[Tuple[str, Any]] = ()) -> Dict[str, Any]: - """Returns the lab2d settings. - - Args: - num_players: (int) the number of players. - ascii_map_string: ascii map. - settings_overrides: (key, value) overrides for default settings. - """ - settings = { - "levelName": "the_matrix", - "levelDirectory": "meltingpot/lua/levels", - "numPlayers": num_players, - "maxEpisodeLengthFrames": 1000, - "spriteSize": 8, - "simulation": { - "map": ascii_map_string, - "gameObjects": create_avatar_objects(num_players=num_players), - "scene": copy.deepcopy(create_scene()), - "prefabs": create_prefabs(), - "charPrefabMap": CHAR_PREFAB_MAP, - } - } - settings.update(settings_overrides) - return settings - - -def get_config(factory=create_lab2d_settings): - """Default config for bach or stravinsky in the matrix.""" +def get_config(): + """Default configuration.""" config = config_dict.ConfigDict() - # Basic configuration. - config.num_players = 8 - - config.lab2d_settings = factory(config.num_players, DEFAULT_ASCII_MAP) - # Action set configuration. config.action_set = ACTION_SET # Observation format configuration. @@ -532,6 +511,7 @@ def get_config(factory=create_lab2d_settings): "RGB", "INVENTORY", "READY_TO_SHOOT", + # Debug only (do not use the following observations in policies). "POSITION", "ORIENTATION", "INTERACTION_INVENTORIES", @@ -546,10 +526,42 @@ def get_config(factory=create_lab2d_settings): "RGB": specs.OBSERVATION["RGB"], "INVENTORY": specs.inventory(2), "READY_TO_SHOOT": specs.OBSERVATION["READY_TO_SHOOT"], + # Debug only (do not use the following observations in policies). "POSITION": specs.OBSERVATION["POSITION"], "ORIENTATION": specs.OBSERVATION["ORIENTATION"], "INTERACTION_INVENTORIES": specs.interaction_inventories(2), "WORLD.RGB": specs.rgb(192, 200), }) + # The roles assigned to each player. + config.valid_roles = frozenset({"default", "bach_fan", "stravinsky_fan"}) + config.default_player_roles = ("bach_fan",) * 4 + ("stravinsky_fan",) * 4 + return config + + +def build( + roles: Sequence[str], + config: config_dict.ConfigDict, +) -> Mapping[str, Any]: + """Build substrate definition given roles.""" + del config + num_players = len(roles) + # Build the rest of the substrate definition. + substrate_definition = dict( + levelName="the_matrix", + levelDirectory="meltingpot/lua/levels", + numPlayers=num_players, + # Define upper bound of episode length since episodes end stochastically. + maxEpisodeLengthFrames=5000, + spriteSize=8, + topology="BOUNDED", # Choose from ["BOUNDED", "TORUS"], + simulation={ + "map": ASCII_MAP, + "gameObjects": create_avatar_objects(roles=roles), + "scene": create_scene(), + "prefabs": create_prefabs(), + "charPrefabMap": CHAR_PREFAB_MAP, + } + ) + return substrate_definition diff --git a/meltingpot/python/configs/substrates/bach_or_stravinsky_in_the_matrix__repeated.py b/meltingpot/python/configs/substrates/bach_or_stravinsky_in_the_matrix__repeated.py new file mode 100644 index 00000000..4084ab44 --- /dev/null +++ b/meltingpot/python/configs/substrates/bach_or_stravinsky_in_the_matrix__repeated.py @@ -0,0 +1,565 @@ +# Copyright 2022 DeepMind Technologies Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Configuration for Bach or Stravinsky in the Matrix (2player, repeated). + +Example video: https://youtu.be/I2uYugpdffQ + +See _Running with Scissors in the Matrix_ for a general description of the +game dynamics. Here the payoff matrix represents the Bach or Stravinsky (battle +of the sexes) game. `K = 2` resources represent "Bach" and "Stravinsky" pure +strategies. + +Bach or Stravinsky is an asymmetric game. Players are assigned by their slot +id to be either row players (blue) or column players (orange). Interactions are +only resolved when they are between a row player and a column player. Otherwise, +e.g. when a row player tries to interact with another row player, then nothing +happens. + +Players have a `5 x 5` observation window. + +The episode has a chance of ending stochastically on every 100 step interval +after step 1000. This usually allows time for 8 or more interactions. +""" + +from typing import Any, Dict, Mapping, Sequence, Tuple + +from ml_collections import config_dict + +from meltingpot.python.configs.substrates import the_matrix +from meltingpot.python.utils.substrates import colors +from meltingpot.python.utils.substrates import game_object_utils +from meltingpot.python.utils.substrates import shapes +from meltingpot.python.utils.substrates import specs + +PrefabConfig = game_object_utils.PrefabConfig + +# The number of resources must match the (square) size of the matrix. +NUM_RESOURCES = 2 + +# This color is light blue. +RESOURCE1_COLOR = (123, 231, 255, 255) +RESOURCE1_HIGHLIGHT_COLOR = (157, 217, 230, 255) +RESOURCE1_COLOR_DATA = (RESOURCE1_COLOR, RESOURCE1_HIGHLIGHT_COLOR) +# This color is light orange. +RESOURCE2_COLOR = (255, 163, 123, 255) +RESOURCE2_HIGHLIGHT_COLOR = (230, 170, 157, 255) +RESOURCE2_COLOR_DATA = (RESOURCE2_COLOR, RESOURCE2_HIGHLIGHT_COLOR) + +ASCII_MAP = """ +WWWWWWWWWWWWWWWWWWWWWWW +Wn n nW +W WWW W W W WW W +W W 11a W a22 W W +Wn WW 11a W a22 WW nW +W 11a a22 W +W W +Wn WW WW n WW WWW nW +W W +W 22a W a11 W +Wn W 22a W a11 W nW +W W 22a W a11 WW W +W WWWW W W W WWW W +Wn n nW +WWWWWWWWWWWWWWWWWWWWWWW +""" + +_resource_names = [ + "resource_class1", + "resource_class2", +] + +# `prefab` determines which prefab game object to use for each `char` in the +# ascii map. +CHAR_PREFAB_MAP = { + "a": {"type": "choice", "list": _resource_names}, + "1": _resource_names[0], + "2": _resource_names[1], + "n": "spawn_point", + "W": "wall", +} + +_COMPASS = ["N", "E", "S", "W"] + +WALL = { + "name": "wall", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "wall", + "stateConfigs": [{ + "state": "wall", + "layer": "upperPhysical", + "sprite": "Wall", + }], + } + }, + { + "component": "Transform", + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["Wall"], + "spriteShapes": [shapes.WALL], + "palettes": [{"*": (95, 95, 95, 255), + "&": (100, 100, 100, 255), + "@": (109, 109, 109, 255), + "#": (152, 152, 152, 255)}], + "noRotates": [False] + } + }, + { + "component": "BeamBlocker", + "kwargs": { + "beamType": "gameInteraction" + } + }, + ] +} + +SPAWN_POINT = { + "name": "spawnPoint", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "spawnPoint", + "stateConfigs": [{ + "state": "spawnPoint", + "layer": "alternateLogic", + "groups": ["spawnPoints"] + }], + } + }, + { + "component": "Transform", + }, + ] +} + +# PLAYER_COLOR_PALETTES is a list with each entry specifying the color to use +# for the player at the corresponding index. +NUM_PLAYERS_UPPER_BOUND = 32 +PLAYER_COLOR_PALETTES = [] +for idx in range(NUM_PLAYERS_UPPER_BOUND): + PLAYER_COLOR_PALETTES.append(shapes.get_palette(colors.palette[idx])) + +# Primitive action components. +# pylint: disable=bad-whitespace +# pyformat: disable +NOOP = {"move": 0, "turn": 0, "interact": 0} +FORWARD = {"move": 1, "turn": 0, "interact": 0} +STEP_RIGHT = {"move": 2, "turn": 0, "interact": 0} +BACKWARD = {"move": 3, "turn": 0, "interact": 0} +STEP_LEFT = {"move": 4, "turn": 0, "interact": 0} +TURN_LEFT = {"move": 0, "turn": -1, "interact": 0} +TURN_RIGHT = {"move": 0, "turn": 1, "interact": 0} +INTERACT = {"move": 0, "turn": 0, "interact": 1} +# pyformat: enable +# pylint: enable=bad-whitespace + +ACTION_SET = ( + NOOP, + FORWARD, + BACKWARD, + STEP_LEFT, + STEP_RIGHT, + TURN_LEFT, + TURN_RIGHT, + INTERACT, +) + +TARGET_SPRITE_SELF = { + "name": "Self", + "shape": shapes.CUTE_AVATAR, + "palette": shapes.get_palette((50, 100, 200)), + "noRotate": True, +} + + +def create_scene(): + """Creates the global scene.""" + scene = { + "name": "scene", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "scene", + "stateConfigs": [{ + "state": "scene", + }], + } + }, + { + "component": "Transform", + }, + { + "component": "TheMatrix", + "kwargs": { + # Prevent interaction before both interactors have collected + # at least one resource. + "disallowUnreadyInteractions": True, + "randomTieBreaking": True, + "matrix": [ + # row player chooses a row of this matrix. + # B S + [3, 0], # B + [0, 2], # S + ], + "columnPlayerMatrix": [ + # column player chooses a column of this matrix. + # B S + [2, 0], # B + [0, 3], # S + ], + "resultIndicatorColorIntervals": [ + # red # yellow # green # blue + (0.0, 0.5), (0.5, 1.5), (1.5, 2.5), (2.5, 3.0) + ], + } + }, + { + "component": "StochasticIntervalEpisodeEnding", + "kwargs": { + "minimumFramesPerEpisode": 1000, + "intervalLength": 100, # Set equal to unroll length. + "probabilityTerminationPerInterval": 0.1 + } + } + ] + } + return scene + + +def create_resource_prefab(resource_id, color_data): + """Creates resource prefab with provided `resource_id` (num) and color.""" + resource_name = "resource_class{}".format(resource_id) + resource_prefab = { + "name": resource_name, + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": resource_name, + "stateConfigs": [ + {"state": resource_name + "_wait", + "groups": ["resourceWaits"]}, + {"state": resource_name, + "layer": "lowerPhysical", + "sprite": resource_name + "_sprite"}, + ] + }, + }, + { + "component": "Transform", + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": [resource_name + "_sprite"], + "spriteShapes": [shapes.BUTTON], + "palettes": [{"*": color_data[0], + "#": color_data[1], + "x": (0, 0, 0, 0)}], + "noRotates": [False] + }, + }, + { + "component": "Resource", + "kwargs": { + "resourceClass": resource_id, + "visibleType": resource_name, + "waitState": resource_name + "_wait", + "regenerationRate": 0.02, + "regenerationDelay": 10, + }, + }, + { + "component": "Destroyable", + "kwargs": { + "waitState": resource_name + "_wait", + # It is possible to destroy resources but takes concerted + # effort to do so by zapping them `initialHealth` times. + "initialHealth": 3, + }, + }, + ] + } + return resource_prefab + + +def create_prefabs() -> PrefabConfig: + """Returns the prefabs. + + Prefabs are a dictionary mapping names to template game objects that can + be cloned and placed in multiple locations accoring to an ascii map. + """ + prefabs = { + "wall": WALL, + "spawn_point": SPAWN_POINT, + } + prefabs["resource_class1"] = create_resource_prefab(1, RESOURCE1_COLOR_DATA) + prefabs["resource_class2"] = create_resource_prefab(2, RESOURCE2_COLOR_DATA) + return prefabs + + +def create_avatar_object(player_idx: int, + color: Tuple[int, int, int], + row_player: bool) -> Dict[str, Any]: + """Create an avatar object that always sees itself as blue.""" + # Lua is 1-indexed. + lua_index = player_idx + 1 + + # Setup the self vs other sprite mapping. + source_sprite_self = "Avatar" + str(lua_index) + + live_state_name = "player{}".format(lua_index) + avatar_object = { + "name": "avatar", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": live_state_name, + "stateConfigs": [ + {"state": live_state_name, + "layer": "upperPhysical", + "sprite": source_sprite_self, + "contact": "avatar", + "groups": ["players"]}, + + {"state": "playerWait", + "groups": ["playerWaits"]}, + ] + } + }, + { + "component": "Transform", + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": [source_sprite_self], + "spriteShapes": [shapes.CUTE_AVATAR], + "palettes": [shapes.get_palette(color)], + "noRotates": [True] + } + }, + { + "component": "Avatar", + "kwargs": { + "index": lua_index, + "aliveState": live_state_name, + "waitState": "playerWait", + "speed": 1.0, + "spawnGroup": "spawnPoints", + "actionOrder": ["move", "turn", "interact"], + "actionSpec": { + "move": {"default": 0, "min": 0, "max": len(_COMPASS)}, + "turn": {"default": 0, "min": -1, "max": 1}, + "interact": {"default": 0, "min": 0, "max": 1}, + }, + "view": { + "left": 2, + "right": 2, + "forward": 3, + "backward": 1, + "centered": False + }, + # The following kwarg makes it possible to get rewarded even + # on frames when an avatar is "dead". It is needed for in the + # matrix games in order to correctly handle the case of two + # players getting hit simultaneously by the same beam. + "skipWaitStateRewards": False, + } + }, + { + "component": "GameInteractionZapper", + "kwargs": { + "cooldownTime": 2, + "beamLength": 3, + "beamRadius": 1, + "framesTillRespawn": 5, + "numResources": NUM_RESOURCES, + "endEpisodeOnFirstInteraction": False, + # Reset both players' inventories after each interaction. + "reset_winner_inventory": True, + "reset_loser_inventory": True, + # Both players get removed after each interaction. + "losingPlayerDies": True, + "winningPlayerDies": True, + # `freezeOnInteraction` is the number of frames to display the + # interaction result indicator, freeze, and delay delivering + # all results of interacting. + "freezeOnInteraction": 16, + } + }, + { + "component": "ReadyToShootObservation", + "kwargs": { + "zapperComponent": "GameInteractionZapper", + } + }, + { + "component": "InventoryObserver", + "kwargs": { + } + }, + { + "component": "SpawnResourcesWhenAllPlayersZapped", + }, + { + "component": "DyadicRole", + "kwargs": { + "rowPlayer": row_player, + } + }, + { + "component": "Taste", + "kwargs": { + "mostTastyResourceClass": -1, # -1 indicates no preference. + # No resource is most tasty when mostTastyResourceClass == -1. + "mostTastyReward": 1.0, + "defaultTastinessReward": 0.0, + } + }, + { + "component": "LocationObserver", + "kwargs": { + "objectIsAvatar": True, + "alsoReportOrientation": True + } + }, + { + "component": "AvatarMetricReporter", + "kwargs": { + "metrics": [ + { + # Report the inventories of both players involved in + # an interaction on this frame formatted as + # (self inventory, partner inventory). + "name": "INTERACTION_INVENTORIES", + "type": "tensor.DoubleTensor", + "shape": (2, NUM_RESOURCES), + "component": "GameInteractionZapper", + "variable": "latest_interaction_inventories", + }, + *the_matrix.get_cumulant_metric_configs(NUM_RESOURCES), + ] + } + }, + ] + } + + return avatar_object + + +def create_avatar_objects( + roles: Sequence[str], +) -> Sequence[PrefabConfig]: + """Returns all avatar game objects.""" + avatar_objects = [] + for player_idx, role in enumerate(roles): + if role == "default": + if player_idx % 2 == 0: + row_player = True + color = (50, 100, 200) + elif player_idx % 2 == 1: + row_player = False + color = (200, 100, 50) + else: + if role == "bach_fan": + row_player = True + color = (50, 100, 200) + elif role == "stravinsky_fan": + row_player = False + color = (200, 100, 50) + + avatar = create_avatar_object(player_idx, color, row_player) + avatar_objects.append(avatar) + readiness_marker = the_matrix.create_ready_to_interact_marker(player_idx) + avatar_objects.append(readiness_marker) + return avatar_objects + + +def get_config(): + """Default configuration.""" + config = config_dict.ConfigDict() + + # Action set configuration. + config.action_set = ACTION_SET + # Observation format configuration. + config.individual_observation_names = [ + "RGB", + "INVENTORY", + "READY_TO_SHOOT", + # Debug only (do not use the following observations in policies). + "POSITION", + "ORIENTATION", + "INTERACTION_INVENTORIES", + ] + config.global_observation_names = [ + "WORLD.RGB", + ] + + # The specs of the environment (from a single-agent perspective). + config.action_spec = specs.action(len(ACTION_SET)) + config.timestep_spec = specs.timestep({ + "RGB": specs.rgb(40, 40), + "INVENTORY": specs.inventory(2), + "READY_TO_SHOOT": specs.OBSERVATION["READY_TO_SHOOT"], + # Debug only (do not use the following observations in policies). + "POSITION": specs.OBSERVATION["POSITION"], + "ORIENTATION": specs.OBSERVATION["ORIENTATION"], + "INTERACTION_INVENTORIES": specs.interaction_inventories(2), + "WORLD.RGB": specs.rgb(120, 184), + }) + + # The roles assigned to each player. + config.valid_roles = frozenset({"default", "bach_fan", "stravinsky_fan"}) + config.default_player_roles = ("bach_fan", "stravinsky_fan",) + + return config + + +def build( + roles: Sequence[str], + config: config_dict.ConfigDict, +) -> Mapping[str, Any]: + """Build substrate definition given player roles.""" + del config + num_players = len(roles) + # Build the rest of the substrate definition. + substrate_definition = dict( + levelName="the_matrix", + levelDirectory="meltingpot/lua/levels", + numPlayers=num_players, + # Define upper bound of episode length since episodes end stochastically. + maxEpisodeLengthFrames=5000, + spriteSize=8, + topology="BOUNDED", # Choose from ["BOUNDED", "TORUS"], + simulation={ + "map": ASCII_MAP, + "gameObjects": create_avatar_objects(roles=roles), + "scene": create_scene(), + "prefabs": create_prefabs(), + "charPrefabMap": CHAR_PREFAB_MAP, + } + ) + return substrate_definition diff --git a/meltingpot/python/configs/substrates/boat_race.py b/meltingpot/python/configs/substrates/boat_race.py new file mode 100644 index 00000000..b6ec0d31 --- /dev/null +++ b/meltingpot/python/configs/substrates/boat_race.py @@ -0,0 +1,900 @@ +# Copyright 2022 DeepMind Technologies Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Configuration for boat_race. + +Example video: https://youtu.be/sEh1hRJVuFw + +Six players engage in a back and forth series of boat races across a river to +reach a patch of apples, which confer reward when eaten. Boats, however, cannot +be rowed by a single player, and thus, players need to find a partner before +each race and coordinate their rowing during the race to cross the river. When +the players are on the boat, they can choose from two different rowing actions +at each timestamp: (a) paddle, which is efficient, but costly if not +coordinated with its partner; and (b) flail, an inefficient action which isn't +affected by the partner's action. When both players paddle simultaneously, the +boat moves one cell every few timesteps. When either player flails, the boat has +a probability of moving one cell, and a reward penalty is given to its partner +if that partner is currently paddling, i.e. if they have executed the paddle +action within the last few timesteps. +""" + +from collections.abc import Mapping, Sequence +from typing import Any + +from ml_collections import config_dict as configdict + +from meltingpot.python.utils.substrates import colors +from meltingpot.python.utils.substrates import shapes +from meltingpot.python.utils.substrates import specs + +# This substrate only makes sense with exactly six players. +MANDATED_NUM_PLAYERS = 6 +NUM_RACES = 8 +PARTNER_DURATION = 75 +RACE_DURATION = 225 +UNROLL_LENGTH = 100 + +ASCII_MAP = r""" +WWWWWWWWWWWWWWWWWWWWWWWWWW +W W +W W +W W +W RRRRRRRRRRRR W +W RRRRRRRRRRRR W +W RRRRRRRRRRRR W +W RRRRRRRRRRRR W +W W +W S SS SS S W +W S%%SS%%SS%%S W +W S SS SS S W +~~~~~~~~gg~~gg~~gg~~~~~~~~ +~~~~~~~~{{~~{{~~{{~~~~~~~~ +~~~~~~~~AA~~AA~~AA~~~~~~~~ +~~~~~~~~{{~~{{~~{{~~~~~~~~ +~~~~~~~~{{~~{{~~{{~~~~~~~~ +~~~~~~~~AA~~AA~~AA~~~~~~~~ +~~~~~~~~{{~~{{~~{{~~~~~~~~ +~~~~~~~~{{~~{{~~{{~~~~~~~~ +~~~~~~~~AA~~AA~~AA~~~~~~~~ +~~~~~~~~{{~~{{~~{{~~~~~~~~ +~~~~~~~~{{~~{{~~{{~~~~~~~~ +~~~~~~~~AA~~AA~~AA~~~~~~~~ +~~~~~~~~/\~~/\~~/\~~~~~~~~ +~~~~~~~p;:qp;:qp;:q~~~~~~~ +W SLJSSLJSSLJS W +W S--SS--SS--S W +W S SS SS S W +W W +W OOOOOOOOOOOO W +W OOOOOOOOOOOO W +W OOOOOOOOOOOO W +W OOOOOOOOOOOO W +W W +W ________________ W +W ________________ W +WWWWWWWWWWWWWWWWWWWWWWWWWW +""" + +# `prefab` determines which prefab game object to use for each `char` in the +# ascii map. +CHAR_PREFAB_MAP = { + "_": {"type": "all", "list": ["floor", "spawn_point"]}, + " ": "floor", + "W": "wall", + "S": {"type": "all", "list": ["floor", "semaphore"]}, + "A": {"type": "all", "list": ["water_background", "single_apple"]}, + "R": {"type": "all", "list": ["floor", "respawning_apple_north"]}, + "O": {"type": "all", "list": ["floor", "respawning_apple_south"]}, + "%": {"type": "all", "list": ["floor", "barrier_north"]}, + "-": {"type": "all", "list": ["floor", "barrier_south"]}, + "~": "water_blocking", + "{": "water_background", + "g": {"type": "all", "list": ["goal_north", "water_background"]}, + "/": {"type": "all", "list": ["boat_FL", "water_background"]}, + "\\": {"type": "all", "list": ["boat_FR", "water_background"]}, + "L": {"type": "all", "list": ["floor", "boat_RL"]}, + "J": {"type": "all", "list": ["floor", "boat_RR"]}, + "p": {"type": "all", "list": ["oar_L", "water_blocking"]}, + "q": {"type": "all", "list": ["oar_R", "water_blocking"]}, + ";": {"type": "all", "list": ["seat_L", "goal_south", "water_background"]}, + ":": {"type": "all", "list": ["seat_R", "goal_south", "water_background"]}, +} + +_COMPASS = ["N", "E", "S", "W"] + + +# The Scene objece is a non-physical object, it components implement global +# logic. In this case, that includes holding the global berry counters to +# implement the regrowth rate, as well as some of the observations. +SCENE = { + "name": "scene", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "partnerChoice", + "stateConfigs": [{ + "state": "ForceEmbark", + }, { + "state": "partnerChoice", + }, { + "state": "semaphore_yellow", + }, { + "state": "semaphore_green", + }, { + "state": "boatRace", + }, { + "state": "semaphore_red", # A temporary state at end game. + }], + } + }, + {"component": "Transform",}, + { + "component": "RaceManager", + "kwargs": { + "raceStartTime": PARTNER_DURATION, + "raceDuration": RACE_DURATION, + }, + }, + { + "component": "GlobalMetricReporter", + "kwargs": { + "metrics": [ + {"name": "RACE_START", + "type": "tensor.Int32Tensor", + "shape": (MANDATED_NUM_PLAYERS // 2, 2), + "component": "GlobalRaceTracker", + "variable": "raceStart"}, + + {"name": "STROKES", + "type": "tensor.Int32Tensor", + "shape": (MANDATED_NUM_PLAYERS,), + "component": "GlobalRaceTracker", + "variable": "strokes"}, + ] + } + }, + { + "component": "GlobalRaceTracker", + "kwargs": { + "numPlayers": MANDATED_NUM_PLAYERS, + }, + }, + { + "component": "EpisodeManager", + "kwargs": { + "checkInterval": UNROLL_LENGTH, + }, + }, + ] +} + +FLOOR = { + "name": "floor", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "floor", + "stateConfigs": [{ + "state": "floor", + "layer": "background", + "sprite": "Floor", + }], + } + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["Floor",], + "spriteShapes": [shapes.GRAINY_FLOOR], + "palettes": [{ + "+": (157, 142, 120, 255), + "*": (154, 139, 115, 255), + }], + "noRotates": [True] + } + }, + { + "component": "Transform", + }, + ] +} + +WALL = { + "name": "wall", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "wall", + "stateConfigs": [{ + "state": "wall", + "layer": "upperPhysical", + "sprite": "Wall", + }], + } + }, + {"component": "Transform",}, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["Wall",], + "spriteShapes": [shapes.WALL], + "palettes": [{"*": (95, 95, 95, 255), + "&": (100, 100, 100, 255), + "@": (109, 109, 109, 255), + "#": (152, 152, 152, 255)}], + "noRotates": [True] + } + }, + { + "component": "BeamBlocker", + "kwargs": { + "beamType": "gift" + } + }, + { + "component": "BeamBlocker", + "kwargs": { + "beamType": "zap" + } + }, + ] +} + +SPAWN_POINT = { + "name": "spawnPoint", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "spawnPoint", + "stateConfigs": [{ + "state": "spawnPoint", + "layer": "logic", + "groups": ["spawnPoints"] + }], + } + }, + {"component": "Transform",}, + ] +} + + +SINGLE_APPLE = { + "name": "single_apple", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "apple", + "stateConfigs": [ + {"state": "apple", + "layer": "superDirectionIndicatorLayer", + "sprite": "apple", + }, + {"state": "appleWait", + "layer": "logic", + }, + ] + } + }, + {"component": "Transform",}, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["apple"], + "spriteShapes": [shapes.HD_APPLE], + "palettes": [shapes.get_palette((40, 180, 40, 255))], + "noRotates": [False], + } + }, + { + "component": "Edible", + "kwargs": { + "liveState": "apple", + "waitState": "appleWait", + "rewardForEating": 1.0, + } + }, + ] +} + + +def get_respawning_apple(bank_side: str): + initial_state = "apple" if bank_side == "N" else "applePause" + return { + "name": "apple", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": initial_state, + "stateConfigs": [ + {"state": "apple", + "layer": "superOverlay", + "sprite": "apple", + }, + {"state": "appleWait", + "layer": "logic", + }, + {"state": "applePause", + "layer": "logic", + }, + ] + } + }, + {"component": "Transform",}, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["apple"], + "spriteShapes": [shapes.HD_APPLE], + "palettes": [shapes.get_palette((40, 180, 40, 255))], + "noRotates": [False], + } + }, + { + "component": "Edible", + "kwargs": { + "liveState": "apple", + "waitState": "appleWait", + "rewardForEating": 1.0, + } + }, + { + "component": "FixedRateRegrow", + "kwargs": { + "liveState": "apple", + "waitState": "appleWait", + "regrowRate": 0.1, + } + }, + ] + } + + +SEMAPHORE = { + "name": "semaphore", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "red", + "stateConfigs": [ + {"state": "red", + "layer": "upperPhysical", + "sprite": "red", + "groups": ["semaphore"]}, + {"state": "yellow", + "layer": "upperPhysical", + "sprite": "yellow", + "groups": ["semaphore"]}, + {"state": "green", + "layer": "upperPhysical", + "sprite": "green", + "groups": ["semaphore"]}, + ] + } + }, + {"component": "Transform",}, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["red", "yellow", "green"], + "spriteShapes": [shapes.COIN] * 3, + "palettes": [shapes.RED_COIN_PALETTE, shapes.COIN_PALETTE, + shapes.GREEN_COIN_PALETTE], + "noRotates": [False] * 3, + } + }, + ] +} + + +def get_barrier(bank_side: str = "N"): + initial_state = "off" if bank_side == "N" else "on" + return { + "name": "barrier", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": initial_state, + "stateConfigs": [ + {"state": "on", + "layer": "upperPhysical", + "sprite": "barrierOn", + "groups": ["barrier"]}, + {"state": "off", + "layer": "superOverlay", + "sprite": "barrierOff", + "groups": ["barrier"]}, + ] + } + }, + {"component": "Transform",}, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["barrierOn", "barrierOff"], + "spriteShapes": [shapes.BARRIER_ON, shapes.BARRIER_OFF], + "palettes": [shapes.GRAY_PALETTE] * 2, + "noRotates": [False] * 2, + } + }, + ] + } + + +def get_water(layer: str): + """Get a water game object at the specified layer, possibly with a goal.""" + return { + "name": "water_{}".format(layer), + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "water_1", + "stateConfigs": [ + {"state": "water_1", + "layer": layer, + "sprite": "water_1", + "groups": ["water"]}, + {"state": "water_2", + "layer": layer, + "sprite": "water_2", + "groups": ["water"]}, + {"state": "water_3", + "layer": layer, + "sprite": "water_3", + "groups": ["water"]}, + {"state": "water_4", + "layer": layer, + "sprite": "water_4", + "groups": ["water"]}, + ] + } + }, + {"component": "Transform",}, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["water_1", "water_2", "water_3", "water_4"], + "spriteShapes": [shapes.WATER_1, shapes.WATER_2, + shapes.WATER_3, shapes.WATER_4], + "palettes": [shapes.WATER_PALETTE] * 4, + } + }, + { + "component": "Animation", + "kwargs": { + "states": ["water_1", "water_2", "water_3", "water_4"], + "gameFramesPerAnimationFrame": 2, + "loop": True, + "randomStartFrame": True, + "group": "water", + } + }, + ] + } + + +def get_goal(bank_side: str = "N"): + return { + "name": "water_goal", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "goalNonBlocking", + "stateConfigs": [{ + "state": "goalNonBlocking", + "layer": "logic", + }, { + "state": "goalBlocking", + "layer": "upperPhysical", + }], + } + }, + {"component": "Transform",}, + { + "component": "WaterGoal", + "kwargs": { + "bank_side": bank_side + }, + } + ] + } + + +def get_boat(front: bool, left: bool): + suffix = "{}{}".format("F" if front else "R", "L" if left else "R") + shape = { + "FL": shapes.BOAT_FRONT_L, + "FR": shapes.BOAT_FRONT_R, + "RL": shapes.BOAT_REAR_L, + "RR": shapes.BOAT_REAR_R, + } + return { + "name": f"boat_{suffix}", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "boat", + "stateConfigs": [ + {"state": "boat", + "layer": "lowerPhysical", + "sprite": f"Boat{suffix}", + "groups": ["boat"]}, + {"state": "boatFull", + "layer": "overlay", + "sprite": f"Boat{suffix}", + "groups": ["boat"]}, + ] + } + }, + {"component": "Transform",}, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": [f"Boat{suffix}"], + "spriteShapes": [shape[suffix]], + "palettes": [shapes.BOAT_PALETTE], + "noRotates": [False] + } + }, + ] + } + + +def get_seat(left: bool): + """Get a seat prefab. Left seats contain the BoatManager component.""" + suffix = "L" if left else "R" + shape = { + "L": shapes.BOAT_SEAT_L, + "R": shapes.BOAT_SEAT_R, + } + seat = { + "name": f"seat_{suffix}", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "seat", + "stateConfigs": [ + {"state": "seat", + "layer": "lowerPhysical", + "sprite": f"Seat{suffix}", + "groups": ["seat", "boat"]}, + {"state": "seatTaken", + "layer": "overlay", + "sprite": f"Seat{suffix}", + "contact": "boat"}, + {"state": "seatUsed", + "layer": "lowerPhysical", + "sprite": f"Seat{suffix}"}, + ] + } + }, + {"component": "Transform",}, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": [f"Seat{suffix}"], + "spriteShapes": [shape[suffix]], + "palettes": [shapes.BOAT_PALETTE], + "noRotates": [False] + } + }, + { + "component": "Seat", + "kwargs": { + }, + }, + ] + } + if left: + seat["components"] += [ + { + "component": "BoatManager", + "kwargs": { + "flailEffectiveness": 0.1, + } + } + ] + return seat + + +def get_oar(left: bool): + suffix = "L" if left else "R" + shape = { + "L": [shapes.OAR_DOWN_L, shapes.OAR_UP_L, shapes.OAR_UP_L], + "R": [shapes.OAR_DOWN_R, shapes.OAR_UP_R, shapes.OAR_UP_R], + } + return { + "name": f"oar_{suffix}", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "oarDown", + "stateConfigs": [ + {"state": "oarDown", + "layer": "overlay", + "sprite": f"OarDown{suffix}", + "groups": ["oar", "boat"]}, + + {"state": "oarUp_row", + "layer": "overlay", + "sprite": f"OarUp{suffix}Row", + "groups": ["oar", "boat"]}, + + {"state": "oarUp_flail", + "layer": "overlay", + "sprite": f"OarUp{suffix}Flail", + "groups": ["oar", "boat"]}, + ] + } + }, + {"component": "Transform",}, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": [ + f"OarDown{suffix}", + f"OarUp{suffix}Row", + f"OarUp{suffix}Flail", + ], + "spriteShapes": shape[suffix], + "palettes": [shapes.GRAY_PALETTE] * 3, + "noRotates": [False] * 3 + } + }, + ] + } + + +AVATAR = { + "name": "avatar", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "player", + "stateConfigs": [ + {"state": "player", + "layer": "upperPhysical", + "sprite": "Avatar", + "contact": "avatar", + "groups": ["players"]}, + + {"state": "playerWait", + "groups": ["playerWaits"]}, + + {"state": "rowing", + "layer": "superOverlay", + "sprite": "Avatar", + "contact": "avatar", + "groups": ["players"]}, + + {"state": "landed", + "layer": "upperPhysical", + "sprite": "Avatar", + "contact": "avatar", + "groups": ["players"]}, + ] + } + }, + {"component": "Transform",}, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["Avatar"], + "spriteShapes": [shapes.CUTE_AVATAR], + "palettes": [shapes.get_palette(colors.human_readable[0])], + "noRotates": [True] + } + }, + { + "component": "Avatar", + "kwargs": { + "index": -1, # player index to be overwritten. + "aliveState": "player", + "waitState": "playerWait", + "spawnGroup": "spawnPoints", + "actionOrder": [ + "move", "turn", "row", "flail"], + "actionSpec": { + "move": {"default": 0, "min": 0, "max": len(_COMPASS)}, + "turn": {"default": 0, "min": -1, "max": 1}, + "row": {"default": 0, "min": 0, "max": 1}, + "flail": {"default": 0, "min": 0, "max": 1}, + }, + "view": { + "left": 5, + "right": 5, + "forward": 9, + "backward": 1, + "centered": False + } + } + }, + { + "component": "Rowing", + "kwargs": { + "cooldownTime": 5, + "playerRole": "none", + }, + }, + { + "component": "LocationObserver", + "kwargs": { + "objectIsAvatar": True, + "alsoReportOrientation": True + } + }, + { + "component": "StrokesTracker", + "kwargs": {} + }, + ] +} + + +# PREFABS is a dictionary mapping names to template game objects that can +# be cloned and placed in multiple locations accoring to an ascii map. +PREFABS = { + "floor": FLOOR, + "wall": WALL, + "spawn_point": SPAWN_POINT, + "water_blocking": get_water("upperPhysical"), + "water_background": get_water("background"), + "goal_north": get_goal(bank_side="N"), + "goal_south": get_goal(bank_side="S"), + "barrier_north": get_barrier(bank_side="N"), + "barrier_south": get_barrier(bank_side="S"), + "single_apple": SINGLE_APPLE, + "respawning_apple_north": get_respawning_apple(bank_side="N"), + "respawning_apple_south": get_respawning_apple(bank_side="S"), + "semaphore": SEMAPHORE, + "boat_FL": get_boat(front=True, left=True), + "boat_FR": get_boat(front=True, left=False), + "boat_RL": get_boat(front=False, left=True), + "boat_RR": get_boat(front=False, left=False), + "seat_L": get_seat(left=True), + "seat_R": get_seat(left=False), + "oar_L": get_oar(left=True), + "oar_R": get_oar(left=False), + "avatar": AVATAR, +} + +# PLAYER_COLOR_PALETTES is a list with each entry specifying the color to use +# for the player at the corresponding index. +# These correspond to the persistent agent colors, but are meaningless for the +# human player. They will be overridden by the environment_builder. +PLAYER_COLOR_PALETTES = [ + shapes.get_palette(colors.human_readable[0]), + shapes.get_palette(colors.human_readable[1]), + shapes.get_palette(colors.human_readable[2]), + shapes.get_palette(colors.human_readable[3]), + shapes.get_palette(colors.human_readable[4]), + shapes.get_palette(colors.human_readable[5]), +] + +# Primitive action components. +# pylint: disable=bad-whitespace +# pyformat: disable +NOOP = {"move": 0, "turn": 0, "row": 0, "flail": 0} +FORWARD = {"move": 1, "turn": 0, "row": 0, "flail": 0} +STEP_RIGHT = {"move": 2, "turn": 0, "row": 0, "flail": 0} +BACKWARD = {"move": 3, "turn": 0, "row": 0, "flail": 0} +STEP_LEFT = {"move": 4, "turn": 0, "row": 0, "flail": 0} +TURN_LEFT = {"move": 0, "turn": -1, "row": 0, "flail": 0} +TURN_RIGHT = {"move": 0, "turn": 1, "row": 0, "flail": 0} +ROW = {"move": 0, "turn": 0, "row": 1, "flail": 0} +FLAIL = {"move": 0, "turn": 0, "row": 0, "flail": 1} +# pyformat: enable +# pylint: enable=bad-whitespace + +ACTION_SET = ( + NOOP, + FORWARD, + BACKWARD, + STEP_LEFT, + STEP_RIGHT, + TURN_LEFT, + TURN_RIGHT, + ROW, + FLAIL, +) + + +def get_config(): + """Configuration for the boat_race substrate.""" + config = configdict.ConfigDict() + + # Specify the number of players to particate in each episode (optional). + config.recommended_num_players = MANDATED_NUM_PLAYERS + + # Action set configuration. + config.action_set = ACTION_SET + # Observation format configuration. + config.individual_observation_names = [ + "RGB", + # Debug only (do not use the following observations in policies). + "POSITION", + "ORIENTATION", + ] + config.global_observation_names = [ + "WORLD.RGB", + ] + + # The specs of the environment (from a single-agent perspective). + config.action_spec = specs.action(len(ACTION_SET)) + config.timestep_spec = specs.timestep({ + "RGB": specs.OBSERVATION["RGB"], + # Debug only (do not use the following observations in policies). + "POSITION": specs.OBSERVATION["POSITION"], + "ORIENTATION": specs.OBSERVATION["ORIENTATION"], + "WORLD.RGB": specs.rgb(304, 208), + }) + + # The roles assigned to each player. + config.valid_roles = frozenset({"default", "target"}) + + return config + + +def build( + roles: Sequence[str], + config: configdict.ConfigDict, +) -> Mapping[str, Any]: + """Build boat_race substrate given player roles.""" + assert len(roles) == MANDATED_NUM_PLAYERS, "Wrong number of players" + assert "num_races" in config, ( + "Cannot build substrate without specifying the number of races. Try " + "using the specific config (e.g. `boat_race__eight_races`) instead.") + + # Build the rest of the substrate definition. + substrate_definition = dict( + levelName="boat_race", + levelDirectory="meltingpot/lua/levels", + numPlayers=MANDATED_NUM_PLAYERS, + maxEpisodeLengthFrames=config.num_races * (PARTNER_DURATION + + RACE_DURATION), + spriteSize=8, + topology="BOUNDED", # Choose from ["BOUNDED", "TORUS"], + simulation={ + "map": ASCII_MAP, + "scene": SCENE, + "prefabs": PREFABS, + "charPrefabMap": CHAR_PREFAB_MAP, + "playerPalettes": PLAYER_COLOR_PALETTES, + }, + ) + return substrate_definition diff --git a/meltingpot/python/configs/__init__.py b/meltingpot/python/configs/substrates/boat_race__eight_races.py similarity index 51% rename from meltingpot/python/configs/__init__.py rename to meltingpot/python/configs/substrates/boat_race__eight_races.py index e0735882..70c71c88 100644 --- a/meltingpot/python/configs/__init__.py +++ b/meltingpot/python/configs/substrates/boat_race__eight_races.py @@ -1,4 +1,4 @@ -# Copyright 2020 DeepMind Technologies Limited. +# Copyright 2022 DeepMind Technologies Limited. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,3 +11,22 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +"""Configuration for boat_race__eight_races.""" + +from ml_collections import config_dict as configdict + +from meltingpot.python.configs.substrates import boat_race as base_config + + +def get_config() -> configdict.ConfigDict: + """Configuration for the boat_race substrate.""" + config = base_config.get_config() + + config.num_races = 8 + + config.default_player_roles = ("default",) * base_config.MANDATED_NUM_PLAYERS + + return config + + +build = base_config.build diff --git a/meltingpot/python/configs/substrates/chemistry__three_metabolic_cycles.py b/meltingpot/python/configs/substrates/chemistry__three_metabolic_cycles.py new file mode 100644 index 00000000..b342d593 --- /dev/null +++ b/meltingpot/python/configs/substrates/chemistry__three_metabolic_cycles.py @@ -0,0 +1,502 @@ +# Copyright 2022 DeepMind Technologies Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Config for Chemistry: Three Metabolic Cycles. + +Example video: https://youtu.be/RlnojJHAoFI + +Individuals benefit from three different food generating reaction cycles. +The cycles will run on their own (autocatalytically), but require energy to +continue and one of them immediately consumes the energy relied upon. +Bringing together side products from the other two cycles generates new energy +such that the cycles can continue. The population needs to keep both of these +cycles running to get high rewards. + +Reactions are defined by a directed graph. Reactant nodes project into reaction +nodes, which project out to product nodes. Reactions occur stochastically when +all reactants are brought near one another. Agents can carry a single molecule +around the map with them at a time. Agents are rewarded when a specific reaction +occurs that involves the molecule they are currently carrying (as either a +reactant or a product). +""" + +from collections.abc import Mapping, Sequence +from typing import Any + +from ml_collections import config_dict +import networkx as nx + +from meltingpot.python.configs.substrates import reaction_graph_utils as graph_utils +from meltingpot.python.utils.substrates import colors +from meltingpot.python.utils.substrates import shapes +from meltingpot.python.utils.substrates import specs + +# Map reaction to rewards. +DEFAULT_REWARDING_REACTIONS = {"MetabolizeFood1": 1, + "MetabolizeFood2": 1, + "MetabolizeFood3": 10, + "MetabolizeXY": 10,} + +# Define the default reaction query configuration. It can be overridden on a per +# compount basis. +DEFAULT_REACTION_CONFIG = {"radius": 1, "query_type": "disc"} + +REACTIVITY_LEVELS = { + "ground": {"background": 0.00001, + "low": 0.005, + "medium": 0.001, + "high": 0.9}, + "vesicle": {"background": 0.0, + "low": 0.0025, + "medium": 0.25, + "high": 0.9}, +} + + +def dissipate_when_paired(g: nx.MultiDiGraph, reaction_name: str, + compound: str): + g.add_node(reaction_name, reaction=True) + # Reactants: + g.add_edge(compound, reaction_name) + g.add_edge(compound, reaction_name) + # Products: + g.add_edge(reaction_name, "empty") + g.add_edge(reaction_name, "empty") + + +def cycle(g, reaction_prefix: str, intermediates: Sequence[str], product: str, + secondary_product=None, food: str = "food"): + """Add a reaction cycle.""" + # Reaction cycle x, reaction 1 + reaction_1 = "{}1{}".format(reaction_prefix, product) + g.add_node(reaction_1, reaction=True) + # Reactants: + g.add_edge(intermediates[0], reaction_1) + g.add_edge(intermediates[1], reaction_1) + g.add_edge("empty", reaction_1) + # Products: + g.add_edge(reaction_1, intermediates[1]) + g.add_edge(reaction_1, intermediates[2]) + g.add_edge(reaction_1, food) + + # Reaction cycle x, reaction 2 + reaction_2 = "{}2{}".format(reaction_prefix, product) + g.add_node(reaction_2, reaction=True) + # Reactants: + g.add_edge(intermediates[1], reaction_2) + g.add_edge(intermediates[2], reaction_2) + g.add_edge("energy", reaction_2) + # Products: + g.add_edge(reaction_2, intermediates[2]) + g.add_edge(reaction_2, intermediates[0]) + g.add_edge(reaction_2, "energy") + + # Reaction cycle x, reaction 3 + reaction_3 = "{}3{}".format(reaction_prefix, product) + g.add_node(reaction_3, reaction=True) + # Reactants: + g.add_edge(intermediates[2], reaction_3) + g.add_edge(intermediates[0], reaction_3) + g.add_edge("empty", reaction_3) + if secondary_product is not None: + g.add_edge("empty", reaction_3) + # Products: + g.add_edge(reaction_3, intermediates[0]) + g.add_edge(reaction_3, intermediates[1]) + g.add_edge(reaction_3, product) + if secondary_product is not None: + g.add_edge(reaction_3, secondary_product) + + +def greedy_cycle(g: nx.MultiDiGraph, reaction_prefix: str, + intermediates: Sequence[str], product: str, + secondary_product: str, food: str = "food"): + + """Add a reaction cycle that consumes energy.""" + # Reaction cycle x, reaction 1 + reaction_1 = "{}1{}".format(reaction_prefix, product) + g.add_node(reaction_1, reaction=True) + # Reactants: + g.add_edge(intermediates[0], reaction_1) + g.add_edge(intermediates[1], reaction_1) + g.add_edge("empty", reaction_1) + # Products: + g.add_edge(reaction_1, intermediates[1]) + g.add_edge(reaction_1, intermediates[2]) + g.add_edge(reaction_1, food) + + # Reaction cycle x, reaction 2. Takes and destroys one energy + reaction_2 = "{}2{}".format(reaction_prefix, product) + g.add_node(reaction_2, reaction=True) + # Reactants: + g.add_edge(intermediates[1], reaction_2) + g.add_edge(intermediates[2], reaction_2) + g.add_edge("energy", reaction_2) + # Products: + g.add_edge(reaction_2, intermediates[2]) + g.add_edge(reaction_2, intermediates[0]) + g.add_edge(reaction_2, "empty") + + # Reaction cycle x, reaction 3 + reaction_3 = "{}3{}".format(reaction_prefix, product) + g.add_node(reaction_3, reaction=True) + # Reactants: + g.add_edge(intermediates[2], reaction_3) + g.add_edge(intermediates[0], reaction_3) + g.add_edge("empty", reaction_3) + if secondary_product is not None: + g.add_edge("empty", reaction_3) + # Products: + g.add_edge(reaction_3, intermediates[0]) + g.add_edge(reaction_3, intermediates[1]) + g.add_edge(reaction_3, product) + if secondary_product is not None: + g.add_edge(reaction_3, secondary_product) + + +def make_graph(): + """User defined graph construction function using networkx.""" + # Note: You can copy-paste this function into colab to visualize the graph. + g = nx.MultiDiGraph() + # First add the "empty" and "activated" nodes, which are always present. + graph_utils.add_system_nodes(g) + + cycle(g, "R", + intermediates=["ax", "bx", "cx"], + product="x", + secondary_product="iy", + food="food1") + cycle(g, "R", + intermediates=["ay", "by", "cy"], + product="y", + secondary_product="ix", + food="food2") + greedy_cycle(g, "R", + intermediates=["az", "bz", "cz"], + product="food1", + secondary_product="food2", + food="food3") + + # Inhibit x with a product of the y-producing cycle. + g.add_node("InhibitX", reaction=True) + # Reactants: + g.add_edge("x", "InhibitX") + g.add_edge("ix", "InhibitX") + # Products: + g.add_edge("InhibitX", "empty") + g.add_edge("InhibitX", "empty") + + # Inhibit y with a product of the x-producing cycle. + g.add_node("InhibitY", reaction=True) + # Reactants: + g.add_edge("y", "InhibitY") + g.add_edge("iy", "InhibitY") + # Products: + g.add_edge("InhibitY", "empty") + g.add_edge("InhibitY", "empty") + + # Food can be metabolized in the vesicle. + g.add_node("MetabolizeFood1", reaction=True) + # Reactants: + g.add_edge("food1", "MetabolizeFood1") + # Products: + g.add_edge("MetabolizeFood1", "empty") + + # Food can be metabolized in the vesicle. + g.add_node("MetabolizeFood2", reaction=True) + # Reactants: + g.add_edge("food2", "MetabolizeFood2") + # Products: + g.add_edge("MetabolizeFood2", "empty") + + # Food can be metabolized in the vesicle. + g.add_node("MetabolizeFood3", reaction=True) + # Reactants: + g.add_edge("food3", "MetabolizeFood3") + # Products: + g.add_edge("MetabolizeFood3", "empty") + + # Food spontaneously appears from time to time. + g.add_node("SpawnFood1", reaction=True) + # Reactants: + g.add_edge("empty", "SpawnFood1") + # Products: + g.add_edge("SpawnFood1", "food1") + + # Food spontaneously appears from time to time. + g.add_node("SpawnFood2", reaction=True) + # Reactants: + g.add_edge("empty", "SpawnFood2") + # Products: + g.add_edge("SpawnFood2", "food2") + + # x and y can be combined to produce energy. + g.add_node("MetabolizeXY", reaction=True) + # Reactants: + g.add_edge("x", "MetabolizeXY") + g.add_edge("y", "MetabolizeXY") + # Products: + g.add_edge("MetabolizeXY", "energy") + g.add_edge("MetabolizeXY", "energy") + + # Energy spontaneously dissipates. + g.add_node("DissipateEnergy", reaction=True) + # Reactants: + g.add_edge("energy", "DissipateEnergy") + # Products: + g.add_edge("DissipateEnergy", "empty") + + # Prevent inhibitors from accumulating by dissipating them whenever they pair. + dissipate_when_paired(g, "DissipateIX", "ix") + dissipate_when_paired(g, "DissipateIY", "iy") + + # Properties of compounds + # Color: + g.nodes["ax"]["color"] = (153, 204, 255, 255) # blue 1 + g.nodes["bx"]["color"] = (102, 204, 255, 255) # blue 2 + g.nodes["cx"]["color"] = (51, 153, 255, 255) # blue 3 + + g.nodes["ay"]["color"] = (102, 255, 153, 255) # green 1 + g.nodes["by"]["color"] = (102, 255, 102, 255) # green 2 + g.nodes["cy"]["color"] = (0, 255, 0, 255) # green 3 + + g.nodes["az"]["color"] = (178, 34, 34, 255) # red 1 + g.nodes["bz"]["color"] = (131, 38, 38, 255) # red 2 + g.nodes["cz"]["color"] = (142, 27, 27, 255) # red 3 + + g.nodes["x"]["color"] = (0, 51, 204, 255) # dark blue + g.nodes["y"]["color"] = (0, 51, 0, 255) # dark green + + g.nodes["food1"]["color"] = (178, 151, 0, 255) # light gold + g.nodes["food1"]["sprite"] = graph_utils.FOOD_SHAPE + + g.nodes["food2"]["color"] = (255, 215, 0, 255) # gold + g.nodes["food2"]["sprite"] = graph_utils.FOOD_SHAPE + + g.nodes["food3"]["color"] = (255, 100, 50, 255) # orange + g.nodes["food3"]["sprite"] = graph_utils.FOOD_SHAPE + + g.nodes["energy"]["color"] = (255, 0, 0, 255) # red + g.nodes["energy"]["sprite"] = graph_utils.ENERGY_SHAPE + + g.nodes["ix"]["color"] = (102, 153, 153, 255) # greyish green + g.nodes["iy"]["color"] = (51, 102, 153, 255) # greyish blue + + # Reactivity: + g.nodes["ax"]["reactivity"] = "high" + g.nodes["bx"]["reactivity"] = "high" + g.nodes["cx"]["reactivity"] = "high" + + g.nodes["ay"]["reactivity"] = "high" + g.nodes["by"]["reactivity"] = "high" + g.nodes["cy"]["reactivity"] = "high" + + g.nodes["az"]["reactivity"] = "high" + g.nodes["bz"]["reactivity"] = "high" + g.nodes["cz"]["reactivity"] = "high" + + g.nodes["x"]["reactivity"] = "medium" + g.nodes["y"]["reactivity"] = "medium" + + g.nodes["ix"]["reactivity"] = "high" + g.nodes["iy"]["reactivity"] = "high" + + g.nodes["food1"]["reactivity"] = "medium" + g.nodes["food2"]["reactivity"] = "medium" + g.nodes["food3"]["reactivity"] = "medium" + + g.nodes["energy"]["reactivity"] = "low" + g.nodes["empty"]["reactivity"] = "background" + + # The following commented line documents how to set the query config for a + # specific compound, overriding the default query configuration. + # g.nodes["food1"]["query_config"] = {"radius": 3, "queryType": "diamond"} + + return g + +ASCII_MAP = """ +~~~~~~~~~~~a~~~~~~~~~~~~~ +~~~~~~~~c~~~~~~~~~~~~~~~~ +~~~~~~~~~~~b~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~1~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~ +1~~3~~~~hhhhhhh~~~~~3~~2~ +~~~~~~~~~~~~~~~~~~~~~~~~~ +~2~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~c~~~~~~~~~~~~~~~~~ +~~~~~~~~~a~~~~~~~~~~4~~~6 +~~~~~~~b~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~5~~ +""" + +# `prefab` determines which compound to use for each `char` in the ascii map. +CHAR_PREFAB_MAP = { + "~": "empty", + "a": "ax", + "b": "bx", + "c": "cx", + "1": "ay", + "2": "by", + "3": "cy", + "4": "az", + "5": "bz", + "6": "cz", + "h": "energy", +} + +# PLAYER_COLOR_PALETTES is a list with each entry specifying the color to use +# for the player at the corresponding index. +NUM_PLAYERS_UPPER_BOUND = 60 +PLAYER_COLOR_PALETTES = [] +for i in range(NUM_PLAYERS_UPPER_BOUND): + PLAYER_COLOR_PALETTES.append(shapes.get_palette(colors.palette[i])) + +# Primitive action components. +# pylint: disable=bad-whitespace +# pyformat: disable +NOOP = {"move": 0, "turn": 0, "ioAction": 0} +FORWARD = {"move": 1, "turn": 0, "ioAction": 0} +STEP_RIGHT = {"move": 2, "turn": 0, "ioAction": 0} +BACKWARD = {"move": 3, "turn": 0, "ioAction": 0} +STEP_LEFT = {"move": 4, "turn": 0, "ioAction": 0} +TURN_LEFT = {"move": 0, "turn": -1, "ioAction": 0} +TURN_RIGHT = {"move": 0, "turn": 1, "ioAction": 0} +IO_ACTION = {"move": 0, "turn": 0, "ioAction": 1} +# pyformat: enable +# pylint: enable=bad-whitespace + +ACTION_SET = ( + NOOP, + FORWARD, + BACKWARD, + STEP_LEFT, + STEP_RIGHT, + TURN_LEFT, + TURN_RIGHT, + IO_ACTION, +) + +TARGET_SPRITE_SELF_EMPTY = { + "name": "SelfEmpty", + "shape": shapes.CYTOAVATAR_EMPTY, + "palette": shapes.CYTOAVATAR_PALETTE, + "noRotate": True, +} +TARGET_SPRITE_SELF_HOLDS_ONE = { + "name": "SelfHoldsOne", + "shape": shapes.CYTOAVATAR_HOLDING_ONE, + "palette": shapes.CYTOAVATAR_PALETTE, + "noRotate": True, +} + + +def create_avatar_objects(num_players, compounds): + """Returns list of avatar objects of length 'num_players'.""" + avatar_objects = [] + additional_game_objects = [] + for player_idx in range(0, num_players): + game_object = graph_utils.create_avatar_constant_self_view( + rewarding_reactions=DEFAULT_REWARDING_REACTIONS, + player_idx=player_idx, + target_sprite_self_empty=TARGET_SPRITE_SELF_EMPTY, + target_sprite_self_holds_one=TARGET_SPRITE_SELF_HOLDS_ONE) + avatar_objects.append(game_object) + + # Add the overlaid avatar vesicle on top of each avatar. + avatar_vesicle = graph_utils.create_vesicle( + player_idx=player_idx, + compounds=compounds, + reactivity_levels=REACTIVITY_LEVELS["vesicle"], + default_reaction_radius=DEFAULT_REACTION_CONFIG["radius"], + default_reaction_query_type=DEFAULT_REACTION_CONFIG["query_type"], + priority_mode=True) + additional_game_objects.append(avatar_vesicle) + + return avatar_objects, additional_game_objects + + +def get_config(): + """Default configuration for this substrate.""" + config = config_dict.ConfigDict() + + # Action set configuration. + config.action_set = ACTION_SET + # Observation format configuration. + config.individual_observation_names = [ + "RGB", + # For debug only. + "POSITION", + "ORIENTATION", + ] + config.global_observation_names = [ + "WORLD.RGB", + ] + + # The specs of the environment (from a single-agent perspective). + config.action_spec = specs.action(len(ACTION_SET)) + config.timestep_spec = specs.timestep({ + "RGB": specs.OBSERVATION["RGB"], + # Debug only (do not use the following observations in policies). + "POSITION": specs.OBSERVATION["POSITION"], + "ORIENTATION": specs.OBSERVATION["ORIENTATION"], + "WORLD.RGB": specs.rgb(112, 200), + }) + + # The roles assigned to each player. + config.valid_roles = frozenset({"default"}) + config.default_player_roles = ("default",) * 8 + + return config + + +def build( + roles: Sequence[str], + config: config_dict.ConfigDict, +) -> Mapping[str, Any]: + """Build this substrate.""" + del config + num_players = len(roles) + + # Must create compounds and reactions. + compounds, reactions = graph_utils.graph_semantics(make_graph()) + + cell_prefabs = {} + cell_prefabs = graph_utils.add_compounds_to_prefabs_dictionary( + cell_prefabs, compounds, REACTIVITY_LEVELS["ground"], sprites=True, + default_reaction_radius=DEFAULT_REACTION_CONFIG["radius"], + default_reaction_query_type=DEFAULT_REACTION_CONFIG["query_type"], + priority_mode=True) + + avatar_objects, additional_objects = create_avatar_objects(num_players, + compounds) + + # Build the rest of the substrate definition. + substrate_definition = dict( + levelName="grid_land", + levelDirectory="meltingpot/lua/levels", + numPlayers=num_players, + maxEpisodeLengthFrames=1000, + spriteSize=8, + topology="BOUNDED", # Choose from ["BOUNDED", "TORUS"]. + simulation={ + "map": ASCII_MAP, + "gameObjects": avatar_objects + additional_objects, + "scene": graph_utils.create_scene(reactions, + stochastic_episode_ending=True), + "prefabs": cell_prefabs, + "charPrefabMap": CHAR_PREFAB_MAP, + }, + ) + return substrate_definition diff --git a/meltingpot/python/configs/substrates/chemistry__three_metabolic_cycles_with_plentiful_distractors.py b/meltingpot/python/configs/substrates/chemistry__three_metabolic_cycles_with_plentiful_distractors.py new file mode 100644 index 00000000..f3b69f59 --- /dev/null +++ b/meltingpot/python/configs/substrates/chemistry__three_metabolic_cycles_with_plentiful_distractors.py @@ -0,0 +1,516 @@ +# Copyright 2022 DeepMind Technologies Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Config for Chemistry: Three Metabolic Cycles With Plentiful Distractors. + +Example video: https://youtu.be/IjlJckwM1VE + +Individuals benefit from three different food generating reaction cycles or from +holding a distractor molecule in their vesicle. The cycles will run on their own +(autocatalytically), but require energy to continue and one of them immediately +consumes the energy relied upon. Bringing together side products from the other +two cycles generates new energy such that the cycles can continue. The +population needs to keep both of these cycles running to get high rewards. + +Reactions are defined by a directed graph. Reactant nodes project into reaction +nodes, which project out to product nodes. Reactions occur stochastically when +all reactants are brought near one another. Agents can carry a single molecule +around the map with them at a time. Agents are rewarded when a specific reaction +occurs that involves the molecule they are currently carrying (as either a +reactant or a product). +""" + +from collections.abc import Mapping, Sequence +from typing import Any + +from ml_collections import config_dict +import networkx as nx + +from meltingpot.python.configs.substrates import reaction_graph_utils as graph_utils +from meltingpot.python.utils.substrates import colors +from meltingpot.python.utils.substrates import shapes +from meltingpot.python.utils.substrates import specs + +# Map reaction to rewards. +DEFAULT_REWARDING_REACTIONS = {"MetabolizeFood1": 1, + "MetabolizeFood2": 1, + "MetabolizeFood3": 10, + "MetabolizeXY": 10, + "Holding": 0.1} + +# Define the default reaction query configuration. It can be overridden on a per +# compount basis. +DEFAULT_REACTION_CONFIG = {"radius": 1, "query_type": "disc"} + +REACTIVITY_LEVELS = { + "ground": {"background": 0.00001, + "low": 0.005, + "medium": 0.001, + "high": 0.9}, + "vesicle": {"background": 0.0, + "low": 0.0025, + "medium": 0.25, + "high": 0.9}, +} + + +def dissipate_when_paired(g: nx.MultiDiGraph, reaction_name: str, + compound: str): + g.add_node(reaction_name, reaction=True) + # Reactants: + g.add_edge(compound, reaction_name) + g.add_edge(compound, reaction_name) + # Products: + g.add_edge(reaction_name, "empty") + g.add_edge(reaction_name, "empty") + + +def cycle(g: nx.MultiDiGraph, reaction_prefix: str, + intermediates: Sequence[str], product: str, + secondary_product: str, food: str = "food"): + """Add a reaction cycle.""" + # Reaction cycle x, reaction 1 + reaction_1 = "{}1{}".format(reaction_prefix, product) + g.add_node(reaction_1, reaction=True) + # Reactants: + g.add_edge(intermediates[0], reaction_1) + g.add_edge(intermediates[1], reaction_1) + g.add_edge("empty", reaction_1) + # Products: + g.add_edge(reaction_1, intermediates[1]) + g.add_edge(reaction_1, intermediates[2]) + g.add_edge(reaction_1, food) + + # Reaction cycle x, reaction 2 + reaction_2 = "{}2{}".format(reaction_prefix, product) + g.add_node(reaction_2, reaction=True) + # Reactants: + g.add_edge(intermediates[1], reaction_2) + g.add_edge(intermediates[2], reaction_2) + g.add_edge("energy", reaction_2) + # Products: + g.add_edge(reaction_2, intermediates[2]) + g.add_edge(reaction_2, intermediates[0]) + g.add_edge(reaction_2, "energy") + + # Reaction cycle x, reaction 3 + reaction_3 = "{}3{}".format(reaction_prefix, product) + g.add_node(reaction_3, reaction=True) + # Reactants: + g.add_edge(intermediates[2], reaction_3) + g.add_edge(intermediates[0], reaction_3) + g.add_edge("empty", reaction_3) + if secondary_product is not None: + g.add_edge("empty", reaction_3) + # Products: + g.add_edge(reaction_3, intermediates[0]) + g.add_edge(reaction_3, intermediates[1]) + g.add_edge(reaction_3, product) + if secondary_product is not None: + g.add_edge(reaction_3, secondary_product) + + +def null(g: nx.MultiDiGraph, reaction_name: str, compound: str): + """Chemical state transition pattern.""" + + g.add_node(reaction_name, reaction=True) + g.add_edge(compound, reaction_name) + g.add_edge(reaction_name, compound) + + +def greedy_cycle(g: nx.MultiDiGraph, reaction_prefix: str, + intermediates: Sequence[str], product: str, + secondary_product: str, food: str = "food"): + """Add a reaction cycle that consumes energy.""" + # Reaction cycle x, reaction 1 + reaction_1 = "{}1{}".format(reaction_prefix, product) + g.add_node(reaction_1, reaction=True) + # Reactants: + g.add_edge(intermediates[0], reaction_1) + g.add_edge(intermediates[1], reaction_1) + g.add_edge("empty", reaction_1) + # Products: + g.add_edge(reaction_1, intermediates[1]) + g.add_edge(reaction_1, intermediates[2]) + g.add_edge(reaction_1, food) + + # Reaction cycle x, reaction 2. Takes and destroys one energy + reaction_2 = "{}2{}".format(reaction_prefix, product) + g.add_node(reaction_2, reaction=True) + # Reactants: + g.add_edge(intermediates[1], reaction_2) + g.add_edge(intermediates[2], reaction_2) + g.add_edge("energy", reaction_2) + # Products: + g.add_edge(reaction_2, intermediates[2]) + g.add_edge(reaction_2, intermediates[0]) + g.add_edge(reaction_2, "empty") + + # Reaction cycle x, reaction 3 + reaction_3 = "{}3{}".format(reaction_prefix, product) + g.add_node(reaction_3, reaction=True) + # Reactants: + g.add_edge(intermediates[2], reaction_3) + g.add_edge(intermediates[0], reaction_3) + g.add_edge("empty", reaction_3) + if secondary_product is not None: + g.add_edge("empty", reaction_3) + # Products: + g.add_edge(reaction_3, intermediates[0]) + g.add_edge(reaction_3, intermediates[1]) + g.add_edge(reaction_3, product) + if secondary_product is not None: + g.add_edge(reaction_3, secondary_product) + + +def make_graph(): + """User defined graph construction function using networkx.""" + # Note: You can copy-paste this function into colab to visualize the graph. + g = nx.MultiDiGraph() + # First add the "empty" and "activated" nodes, which are always present. + graph_utils.add_system_nodes(g) + + cycle(g, "R", + intermediates=["ax", "bx", "cx"], + product="x", + secondary_product="iy", + food="food1") + cycle(g, "R", + intermediates=["ay", "by", "cy"], + product="y", + secondary_product="ix", + food="food2") + greedy_cycle(g, "R", + intermediates=["az", "bz", "cz"], + product="food1", + secondary_product="food2", + food="food3") + null(g, "Holding", "distractor") # Holding the distractor provides reward. + + # Inhibit x with a product of the y-producing cycle. + g.add_node("InhibitX", reaction=True) + # Reactants: + g.add_edge("x", "InhibitX") + g.add_edge("ix", "InhibitX") + # Products: + g.add_edge("InhibitX", "empty") + g.add_edge("InhibitX", "empty") + + # Inhibit y with a product of the x-producing cycle. + g.add_node("InhibitY", reaction=True) + # Reactants: + g.add_edge("y", "InhibitY") + g.add_edge("iy", "InhibitY") + # Products: + g.add_edge("InhibitY", "empty") + g.add_edge("InhibitY", "empty") + + # Food can be metabolized in the vesicle. + g.add_node("MetabolizeFood1", reaction=True) + # Reactants: + g.add_edge("food1", "MetabolizeFood1") + # Products: + g.add_edge("MetabolizeFood1", "empty") + + # Food can be metabolized in the vesicle. + g.add_node("MetabolizeFood2", reaction=True) + # Reactants: + g.add_edge("food2", "MetabolizeFood2") + # Products: + g.add_edge("MetabolizeFood2", "empty") + + # Food can be metabolized in the vesicle. + g.add_node("MetabolizeFood3", reaction=True) + # Reactants: + g.add_edge("food3", "MetabolizeFood3") + # Products: + g.add_edge("MetabolizeFood3", "empty") + + # Food spontaneously appears from time to time. + g.add_node("SpawnFood1", reaction=True) + # Reactants: + g.add_edge("empty", "SpawnFood1") + # Products: + g.add_edge("SpawnFood1", "food1") + + # Food spontaneously appears from time to time. + g.add_node("SpawnFood2", reaction=True) + # Reactants: + g.add_edge("empty", "SpawnFood2") + # Products: + g.add_edge("SpawnFood2", "food2") + + # x and y can be combined to produce energy. + g.add_node("MetabolizeXY", reaction=True) + # Reactants: + g.add_edge("x", "MetabolizeXY") + g.add_edge("y", "MetabolizeXY") + # Products: + g.add_edge("MetabolizeXY", "energy") + g.add_edge("MetabolizeXY", "energy") + + # Energy spontaneously dissipates. + g.add_node("DissipateEnergy", reaction=True) + # Reactants: + g.add_edge("energy", "DissipateEnergy") + # Products: + g.add_edge("DissipateEnergy", "empty") + + # Prevent inhibitors from accumulating by dissipating them whenever they pair. + dissipate_when_paired(g, "DissipateIX", "ix") + dissipate_when_paired(g, "DissipateIY", "iy") + + # Properties of compounds + # Color: + g.nodes["ax"]["color"] = (153, 204, 255, 255) # blue 1 + g.nodes["bx"]["color"] = (102, 204, 255, 255) # blue 2 + g.nodes["cx"]["color"] = (51, 153, 255, 255) # blue 3 + + g.nodes["ay"]["color"] = (102, 255, 153, 255) # green 1 + g.nodes["by"]["color"] = (102, 255, 102, 255) # green 2 + g.nodes["cy"]["color"] = (0, 255, 0, 255) # green 3 + + g.nodes["az"]["color"] = (178, 34, 34, 255) # red 1 + g.nodes["bz"]["color"] = (131, 38, 38, 255) # red 2 + g.nodes["cz"]["color"] = (142, 27, 27, 255) # red 3 + + g.nodes["x"]["color"] = (0, 51, 204, 255) # dark blue + g.nodes["y"]["color"] = (0, 51, 0, 255) # dark green + + g.nodes["food1"]["color"] = (178, 151, 0, 255) # light gold + g.nodes["food1"]["sprite"] = graph_utils.FOOD_SHAPE + + g.nodes["food2"]["color"] = (255, 215, 0, 255) # gold + g.nodes["food2"]["sprite"] = graph_utils.FOOD_SHAPE + + g.nodes["food3"]["color"] = (255, 100, 50, 255) # orange + g.nodes["food3"]["sprite"] = graph_utils.FOOD_SHAPE + + g.nodes["distractor"]["color"] = (75, 0, 130, 255) # indigo + g.nodes["distractor"]["sprite"] = graph_utils.FOOD_SHAPE + + g.nodes["energy"]["color"] = (255, 0, 0, 255) # red + g.nodes["energy"]["sprite"] = graph_utils.ENERGY_SHAPE + + g.nodes["ix"]["color"] = (102, 153, 153, 255) # greyish green + g.nodes["iy"]["color"] = (51, 102, 153, 255) # greyish blue + + # Reactivity: + g.nodes["ax"]["reactivity"] = "high" + g.nodes["bx"]["reactivity"] = "high" + g.nodes["cx"]["reactivity"] = "high" + + g.nodes["ay"]["reactivity"] = "high" + g.nodes["by"]["reactivity"] = "high" + g.nodes["cy"]["reactivity"] = "high" + + g.nodes["az"]["reactivity"] = "high" + g.nodes["bz"]["reactivity"] = "high" + g.nodes["cz"]["reactivity"] = "high" + + g.nodes["x"]["reactivity"] = "medium" + g.nodes["y"]["reactivity"] = "medium" + + g.nodes["ix"]["reactivity"] = "high" + g.nodes["iy"]["reactivity"] = "high" + + g.nodes["food1"]["reactivity"] = "medium" + g.nodes["food2"]["reactivity"] = "medium" + g.nodes["food3"]["reactivity"] = "medium" + g.nodes["distractor"]["reactivity"] = "medium" + g.nodes["energy"]["reactivity"] = "low" + g.nodes["empty"]["reactivity"] = "background" + + # The following commented line documents how to set the query config for a + # specific compound, overriding the default query configuration. + # g.nodes["food1"]["query_config"] = {"radius": 3, "queryType": "diamond"} + + return g + +ASCII_MAP = """ +~~~~~~~~~~~a~x~~~~~~~~~~~ +~~~~~~~~c~~~~~~~~~~~~x~~~ +~~x~~~~~~~~b~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~x~~~~~~~~~~~~~1~~~ +~~~~~~~~~~~~~~~~~~x~~~~~~ +1~~3~~~~hhhhhhh~~~~~3~~2~ +~~~x~~~~~~~~~~~~~~~~~~~~~ +~2~~~~~~~~~~~x~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~x~~~~~ +~~~~~~~c~~~~~~~~~~~~~~~~~ +~x~~~~~~~a~~~~~~~~~~4~~~6 +~~~~~~~b~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~x~~~~~~~~5~~ +""" + +# `prefab` determines which compound to use for each `char` in the ascii map. +CHAR_PREFAB_MAP = { + "~": "empty", + "a": "ax", + "b": "bx", + "c": "cx", + "1": "ay", + "2": "by", + "3": "cy", + "4": "az", + "5": "bz", + "6": "cz", + "x": "distractor", + "h": "energy", +} + +# PLAYER_COLOR_PALETTES is a list with each entry specifying the color to use +# for the player at the corresponding index. +NUM_PLAYERS_UPPER_BOUND = 60 +PLAYER_COLOR_PALETTES = [] +for i in range(NUM_PLAYERS_UPPER_BOUND): + PLAYER_COLOR_PALETTES.append(shapes.get_palette(colors.palette[i])) + +# Primitive action components. +# pylint: disable=bad-whitespace +# pyformat: disable +NOOP = {"move": 0, "turn": 0, "ioAction": 0} +FORWARD = {"move": 1, "turn": 0, "ioAction": 0} +STEP_RIGHT = {"move": 2, "turn": 0, "ioAction": 0} +BACKWARD = {"move": 3, "turn": 0, "ioAction": 0} +STEP_LEFT = {"move": 4, "turn": 0, "ioAction": 0} +TURN_LEFT = {"move": 0, "turn": -1, "ioAction": 0} +TURN_RIGHT = {"move": 0, "turn": 1, "ioAction": 0} +IO_ACTION = {"move": 0, "turn": 0, "ioAction": 1} +# pyformat: enable +# pylint: enable=bad-whitespace + +ACTION_SET = ( + NOOP, + FORWARD, + BACKWARD, + STEP_LEFT, + STEP_RIGHT, + TURN_LEFT, + TURN_RIGHT, + IO_ACTION, +) + +TARGET_SPRITE_SELF_EMPTY = { + "name": "SelfEmpty", + "shape": shapes.CYTOAVATAR_EMPTY, + "palette": shapes.CYTOAVATAR_PALETTE, + "noRotate": True, +} +TARGET_SPRITE_SELF_HOLDS_ONE = { + "name": "SelfHoldsOne", + "shape": shapes.CYTOAVATAR_HOLDING_ONE, + "palette": shapes.CYTOAVATAR_PALETTE, + "noRotate": True, +} + + +def create_avatar_objects(num_players, compounds): + """Returns list of avatar objects of length 'num_players'.""" + avatar_objects = [] + additional_game_objects = [] + for player_idx in range(0, num_players): + game_object = graph_utils.create_avatar_constant_self_view( + rewarding_reactions=DEFAULT_REWARDING_REACTIONS, + player_idx=player_idx, + target_sprite_self_empty=TARGET_SPRITE_SELF_EMPTY, + target_sprite_self_holds_one=TARGET_SPRITE_SELF_HOLDS_ONE) + avatar_objects.append(game_object) + + # Add the overlaid avatar vesicle on top of each avatar. + avatar_vesicle = graph_utils.create_vesicle( + player_idx=player_idx, + compounds=compounds, + reactivity_levels=REACTIVITY_LEVELS["vesicle"], + default_reaction_radius=DEFAULT_REACTION_CONFIG["radius"], + default_reaction_query_type=DEFAULT_REACTION_CONFIG["query_type"], + priority_mode=True) + additional_game_objects.append(avatar_vesicle) + + return avatar_objects, additional_game_objects + + +def get_config(): + """Default configuration for this substrate.""" + config = config_dict.ConfigDict() + + # Action set configuration. + config.action_set = ACTION_SET + # Observation format configuration. + config.individual_observation_names = [ + "RGB", + # For debug only. + "POSITION", + "ORIENTATION", + ] + config.global_observation_names = [ + "WORLD.RGB", + ] + + # The specs of the environment (from a single-agent perspective). + config.action_spec = specs.action(len(ACTION_SET)) + config.timestep_spec = specs.timestep({ + "RGB": specs.OBSERVATION["RGB"], + # Debug only (do not use the following observations in policies). + "POSITION": specs.OBSERVATION["POSITION"], + "ORIENTATION": specs.OBSERVATION["ORIENTATION"], + "WORLD.RGB": specs.rgb(112, 200), + }) + + # The roles assigned to each player. + config.valid_roles = frozenset({"default"}) + config.default_player_roles = ("default",) * 8 + + return config + + +def build( + roles: Sequence[str], + config: config_dict.ConfigDict, +) -> Mapping[str, Any]: + """Build this substrate.""" + del config + num_players = len(roles) + + # Must create compounds and reactions. + compounds, reactions = graph_utils.graph_semantics(make_graph()) + + cell_prefabs = {} + cell_prefabs = graph_utils.add_compounds_to_prefabs_dictionary( + cell_prefabs, compounds, REACTIVITY_LEVELS["ground"], sprites=True, + default_reaction_radius=DEFAULT_REACTION_CONFIG["radius"], + default_reaction_query_type=DEFAULT_REACTION_CONFIG["query_type"], + priority_mode=True) + + avatar_objects, additional_objects = create_avatar_objects(num_players, + compounds) + + # Build the rest of the substrate definition. + substrate_definition = dict( + levelName="grid_land", + levelDirectory="meltingpot/lua/levels", + numPlayers=num_players, + maxEpisodeLengthFrames=1000, + spriteSize=8, + topology="BOUNDED", # Choose from ["BOUNDED", "TORUS"]. + simulation={ + "map": ASCII_MAP, + "gameObjects": avatar_objects + additional_objects, + "scene": graph_utils.create_scene(reactions, + stochastic_episode_ending=True), + "prefabs": cell_prefabs, + "charPrefabMap": CHAR_PREFAB_MAP, + }, + ) + return substrate_definition diff --git a/meltingpot/python/configs/substrates/chemistry_metabolic_cycles.py b/meltingpot/python/configs/substrates/chemistry__two_metabolic_cycles.py similarity index 70% rename from meltingpot/python/configs/substrates/chemistry_metabolic_cycles.py rename to meltingpot/python/configs/substrates/chemistry__two_metabolic_cycles.py index a19f5321..918cea97 100644 --- a/meltingpot/python/configs/substrates/chemistry_metabolic_cycles.py +++ b/meltingpot/python/configs/substrates/chemistry__two_metabolic_cycles.py @@ -1,4 +1,4 @@ -# Copyright 2020 DeepMind Technologies Limited. +# Copyright 2022 DeepMind Technologies Limited. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,15 +11,15 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Configuration for Chemistry: Metabolic Cycles. +"""Configuration for Chemistry: Two Metabolic Cycles. -Example video: https://youtu.be/oFK9VujhpeI +Example video: https://youtu.be/kxMNAcJuXJE -Individuals benefit from two different food generating reaction cycles. Both -will run on their own (autocatalytically), but require energy to continue. -Bringing together side products from both cycles generates new energy such that -the cycles can continue. The population needs to keep both cycles running to get -high rewards. +Individuals benefit from two different food generating reaction cycles. +The cycles will run on their own (autocatalytically), but require energy to +continue. Bringing together side products from the two cycles generates new +energy such that the cycles can continue. The population needs to keep both of +these cycles running to get high rewards. Reactions are defined by a directed graph. Reactant nodes project into reaction nodes, which project out to product nodes. Reactions occur stochastically when @@ -29,20 +29,21 @@ reactant or a product). """ -import copy +from collections.abc import Mapping, Sequence +from typing import Any from ml_collections import config_dict import networkx as nx + +from meltingpot.python.configs.substrates import reaction_graph_utils as graph_utils from meltingpot.python.utils.substrates import colors -from meltingpot.python.utils.substrates import game_object_utils -from meltingpot.python.utils.substrates import reaction_graph_utils as graph_utils from meltingpot.python.utils.substrates import shapes from meltingpot.python.utils.substrates import specs # Map reaction to rewards. DEFAULT_REWARDING_REACTIONS = {"MetabolizeFood1": 1, "MetabolizeFood2": 1, - "MetabolizeXY": 10} + "MetabolizeXY": 10,} # Define the default reaction query configuration. It can be overridden on a per # compount basis. @@ -53,14 +54,15 @@ "low": 0.005, "medium": 0.001, "high": 0.9}, - "stomach": {"background": 0.0, + "vesicle": {"background": 0.0, "low": 0.0025, "medium": 0.25, "high": 0.9}, } -def dissipate_when_paired(g, reaction_name, compound): +def dissipate_when_paired(g: nx.MultiDiGraph, reaction_name: str, + compound: str): g.add_node(reaction_name, reaction=True) # Reactants: g.add_edge(compound, reaction_name) @@ -70,8 +72,9 @@ def dissipate_when_paired(g, reaction_name, compound): g.add_edge(reaction_name, "empty") -def cycle(g, reaction_prefix, intermediates, product, secondary_product=None, - food="food"): +def cycle(g: nx.MultiDiGraph, reaction_prefix: str, + intermediates: Sequence[str], product: str, + secondary_product: str, food: str = "food"): """Add a reaction cycle.""" # Reaction cycle x, reaction 1 reaction_1 = "{}1{}".format(reaction_prefix, product) @@ -150,14 +153,14 @@ def make_graph(): g.add_edge("InhibitY", "empty") g.add_edge("InhibitY", "empty") - # Food can be metabolized in the stomach. + # Food can be metabolized in the vesicle. g.add_node("MetabolizeFood1", reaction=True) # Reactants: g.add_edge("food1", "MetabolizeFood1") # Products: g.add_edge("MetabolizeFood1", "empty") - # Food can be metabolized in the stomach. + # Food can be metabolized in the vesicle. g.add_node("MetabolizeFood2", reaction=True) # Reactants: g.add_edge("food2", "MetabolizeFood2") @@ -200,22 +203,28 @@ def make_graph(): # Properties of compounds # Color: - g.nodes["ax"]["color"] = (153, 204, 255) # blue 1 - g.nodes["bx"]["color"] = (102, 204, 255) # blue 2 - g.nodes["cx"]["color"] = (51, 153, 255) # blue 3 + g.nodes["ax"]["color"] = (153, 204, 255, 255) # blue 1 + g.nodes["bx"]["color"] = (102, 204, 255, 255) # blue 2 + g.nodes["cx"]["color"] = (51, 153, 255, 255) # blue 3 + + g.nodes["ay"]["color"] = (102, 255, 153, 255) # green 1 + g.nodes["by"]["color"] = (102, 255, 102, 255) # green 2 + g.nodes["cy"]["color"] = (0, 255, 0, 255) # green 3 + + g.nodes["x"]["color"] = (0, 51, 204, 255) # dark blue + g.nodes["y"]["color"] = (0, 51, 0, 255) # dark green + + g.nodes["food1"]["color"] = (178, 151, 0, 255) # light gold + g.nodes["food1"]["sprite"] = graph_utils.FOOD_SHAPE - g.nodes["ay"]["color"] = (102, 255, 153) # green 1 - g.nodes["by"]["color"] = (102, 255, 102) # green 2 - g.nodes["cy"]["color"] = (0, 255, 0) # green 3 + g.nodes["food2"]["color"] = (255, 215, 0, 255) # gold + g.nodes["food2"]["sprite"] = graph_utils.FOOD_SHAPE - g.nodes["x"]["color"] = (0, 51, 204) # dark blue - g.nodes["y"]["color"] = (0, 51, 0) # dark green - g.nodes["food1"]["color"] = (255, 255, 0) # yellow - g.nodes["food2"]["color"] = (255, 215, 0) # gold - g.nodes["energy"]["color"] = (255, 0, 0) # red + g.nodes["energy"]["color"] = (255, 0, 0, 255) # red + g.nodes["energy"]["sprite"] = graph_utils.ENERGY_SHAPE - g.nodes["ix"]["color"] = (102, 153, 153) # greyish green - g.nodes["iy"]["color"] = (51, 102, 153) # greyish blue + g.nodes["ix"]["color"] = (102, 153, 153, 255) # greyish green + g.nodes["iy"]["color"] = (51, 102, 153, 255) # greyish blue # Reactivity: g.nodes["ax"]["reactivity"] = "high" @@ -234,6 +243,7 @@ def make_graph(): g.nodes["food1"]["reactivity"] = "medium" g.nodes["food2"]["reactivity"] = "medium" + g.nodes["energy"]["reactivity"] = "low" g.nodes["empty"]["reactivity"] = "background" @@ -254,9 +264,9 @@ def make_graph(): ~~~~~~~~~~~~~~~~~~~~~~~~~ ~2~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~ -~~~~~~~~~~c~~~~~~~~~~~~~~ -~~~~~~~~~~~~a~~~~~~~~~~~~ -~~~~~~~~~~b~~~~~~~~~~~~~~ +~~~~~~~c~~~~~~~~~~~~~~~~~ +~~~~~~~~~a~~~~~~~~~~~~~~~ +~~~~~~~b~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~ """ @@ -269,6 +279,10 @@ def make_graph(): "1": "ay", "2": "by", "3": "cy", + "4": "az", + "5": "bz", + "6": "cz", + "x": "food4", "h": "energy", } @@ -304,98 +318,55 @@ def make_graph(): IO_ACTION, ) -TARGET_SPRITE_SELF = { - "name": "Self", - "shape": shapes.AVATAR_DEFAULT, - "palette": shapes.get_palette((50, 100, 200)), - "noRotate": False, +TARGET_SPRITE_SELF_EMPTY = { + "name": "SelfEmpty", + "shape": shapes.CYTOAVATAR_EMPTY, + "palette": shapes.CYTOAVATAR_PALETTE, + "noRotate": True, +} +TARGET_SPRITE_SELF_HOLDS_ONE = { + "name": "SelfHoldsOne", + "shape": shapes.CYTOAVATAR_HOLDING_ONE, + "palette": shapes.CYTOAVATAR_PALETTE, + "noRotate": True, } -def create_avatar_objects(prefabs, num_players): +def create_avatar_objects(num_players, compounds): """Returns list of avatar objects of length 'num_players'.""" avatar_objects = [] additional_game_objects = [] for player_idx in range(0, num_players): game_object = graph_utils.create_avatar_constant_self_view( - DEFAULT_REWARDING_REACTIONS, player_idx, TARGET_SPRITE_SELF) + rewarding_reactions=DEFAULT_REWARDING_REACTIONS, + player_idx=player_idx, + target_sprite_self_empty=TARGET_SPRITE_SELF_EMPTY, + target_sprite_self_holds_one=TARGET_SPRITE_SELF_HOLDS_ONE) avatar_objects.append(game_object) - # Add the overlaid avatar stomach on top of each avatar. - avatar_stomach = copy.deepcopy(prefabs["avatar_stomach"]) - game_object_utils.get_first_named_component( - avatar_stomach, - "AvatarStomach")["kwargs"]["playerIndex"] = player_idx + 1 - additional_game_objects.append(avatar_stomach) + # Add the overlaid avatar vesicle on top of each avatar. + avatar_vesicle = graph_utils.create_vesicle( + player_idx=player_idx, + compounds=compounds, + reactivity_levels=REACTIVITY_LEVELS["vesicle"], + default_reaction_radius=DEFAULT_REACTION_CONFIG["radius"], + default_reaction_query_type=DEFAULT_REACTION_CONFIG["query_type"], + priority_mode=True) + additional_game_objects.append(avatar_vesicle) return avatar_objects, additional_game_objects -def get_lab2d_settings(num_players): - """Returns the lab2d settings. - - Args: - num_players: the number of players in the game. - """ - - # Must create compounds and reactions. - compounds, reactions = graph_utils.graph_semantics(make_graph()) - - avatar_object_templates = { - "avatar": graph_utils.create_avatar(DEFAULT_REWARDING_REACTIONS), - "avatar_stomach": graph_utils.create_stomach( - compounds, - REACTIVITY_LEVELS["stomach"], - default_reaction_radius=DEFAULT_REACTION_CONFIG["radius"], - default_reaction_query_type=DEFAULT_REACTION_CONFIG["query_type"], - priority_mode=True), - } - cell_prefabs = {} - cell_prefabs = graph_utils.add_compounds_to_prefabs_dictionary( - cell_prefabs, compounds, REACTIVITY_LEVELS["ground"], sprites=True, - default_reaction_radius=DEFAULT_REACTION_CONFIG["radius"], - default_reaction_query_type=DEFAULT_REACTION_CONFIG["query_type"], - priority_mode=True) - - avatar_objects, additional_objects = create_avatar_objects( - avatar_object_templates, num_players) - - # Lua script configuration. - lab2d_settings = { - "levelName": "grid_land", - "levelDirectory": - "meltingpot/lua/levels", - "numPlayers": num_players, - "maxEpisodeLengthFrames": 1000, - "spriteSize": 8, - "topology": "BOUNDED", - "simulation": { - "map": ASCII_MAP, - "gameObjects": avatar_objects + additional_objects, - "scene": graph_utils.create_scene(reactions, - stochastic_episode_ending=True), - "prefabs": cell_prefabs, - "charPrefabMap": CHAR_PREFAB_MAP, - }, - } - - return lab2d_settings - - def get_config(): - """Default configuration for training on the grid_land level.""" + """Default configuration for this substrate.""" config = config_dict.ConfigDict() - # Basic configuration. - config.num_players = 8 - - config.lab2d_settings = get_lab2d_settings(config.num_players) - # Action set configuration. config.action_set = ACTION_SET # Observation format configuration. config.individual_observation_names = [ "RGB", + # For debug only. "POSITION", "ORIENTATION", ] @@ -407,9 +378,55 @@ def get_config(): config.action_spec = specs.action(len(ACTION_SET)) config.timestep_spec = specs.timestep({ "RGB": specs.OBSERVATION["RGB"], + # Debug only (do not use the following observations in policies). "POSITION": specs.OBSERVATION["POSITION"], "ORIENTATION": specs.OBSERVATION["ORIENTATION"], "WORLD.RGB": specs.rgb(112, 200), }) + # The roles assigned to each player. + config.valid_roles = frozenset({"default"}) + config.default_player_roles = ("default",) * 8 + return config + + +def build( + roles: Sequence[str], + config: config_dict.ConfigDict, +) -> Mapping[str, Any]: + """Build this substrate.""" + del config + num_players = len(roles) + + # Must create compounds and reactions. + compounds, reactions = graph_utils.graph_semantics(make_graph()) + + cell_prefabs = {} + cell_prefabs = graph_utils.add_compounds_to_prefabs_dictionary( + cell_prefabs, compounds, REACTIVITY_LEVELS["ground"], sprites=True, + default_reaction_radius=DEFAULT_REACTION_CONFIG["radius"], + default_reaction_query_type=DEFAULT_REACTION_CONFIG["query_type"], + priority_mode=True) + + avatar_objects, additional_objects = create_avatar_objects(num_players, + compounds) + + # Build the rest of the substrate definition. + substrate_definition = dict( + levelName="grid_land", + levelDirectory="meltingpot/lua/levels", + numPlayers=num_players, + maxEpisodeLengthFrames=1000, + spriteSize=8, + topology="BOUNDED", # Choose from ["BOUNDED", "TORUS"]. + simulation={ + "map": ASCII_MAP, + "gameObjects": avatar_objects + additional_objects, + "scene": graph_utils.create_scene(reactions, + stochastic_episode_ending=True), + "prefabs": cell_prefabs, + "charPrefabMap": CHAR_PREFAB_MAP, + }, + ) + return substrate_definition diff --git a/meltingpot/python/configs/substrates/chemistry__two_metabolic_cycles_with_distractors.py b/meltingpot/python/configs/substrates/chemistry__two_metabolic_cycles_with_distractors.py new file mode 100644 index 00000000..2c7165d2 --- /dev/null +++ b/meltingpot/python/configs/substrates/chemistry__two_metabolic_cycles_with_distractors.py @@ -0,0 +1,446 @@ +# Copyright 2022 DeepMind Technologies Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Configuration for Chemistry: Two Metabolic Cycles With Distractors. + +Example video: + +Individuals benefit from two different food generating reaction cycles or from +holding a distractor molecule in their vesicle. The cycles will run on their own +(autocatalytically), but require energy to continue. +Bringing together side products from the two cycles generates new energy +such that the cycles can continue. The population needs to keep both of these +cycles running to get high rewards. + +Reactions are defined by a directed graph. Reactant nodes project into reaction +nodes, which project out to product nodes. Reactions occur stochastically when +all reactants are brought near one another. Agents can carry a single molecule +around the map with them at a time. Agents are rewarded when a specific reaction +occurs that involves the molecule they are currently carrying (as either a +reactant or a product). +""" + +from collections.abc import Mapping, Sequence +from typing import Any + +from ml_collections import config_dict +import networkx as nx + +from meltingpot.python.configs.substrates import reaction_graph_utils as graph_utils +from meltingpot.python.utils.substrates import colors +from meltingpot.python.utils.substrates import shapes +from meltingpot.python.utils.substrates import specs + +# Map reaction to rewards. +DEFAULT_REWARDING_REACTIONS = {"MetabolizeFood1": 1, + "MetabolizeFood2": 1, + "MetabolizeXY": 10, + "Holding": 0.1} + +# Define the default reaction query configuration. It can be overridden on a per +# compount basis. +DEFAULT_REACTION_CONFIG = {"radius": 1, "query_type": "disc"} + +REACTIVITY_LEVELS = { + "ground": {"background": 0.00001, + "low": 0.005, + "medium": 0.001, + "high": 0.9}, + "vesicle": {"background": 0.0, + "low": 0.0025, + "medium": 0.25, + "high": 0.9}, +} + + +def dissipate_when_paired(g: nx.MultiDiGraph, reaction_name: str, + compound: str): + g.add_node(reaction_name, reaction=True) + # Reactants: + g.add_edge(compound, reaction_name) + g.add_edge(compound, reaction_name) + # Products: + g.add_edge(reaction_name, "empty") + g.add_edge(reaction_name, "empty") + + +def cycle(g: nx.MultiDiGraph, reaction_prefix: str, + intermediates: Sequence[str], product: str, + secondary_product: str, food: str = "food"): + """Add a reaction cycle.""" + # Reaction cycle x, reaction 1 + reaction_1 = "{}1{}".format(reaction_prefix, product) + g.add_node(reaction_1, reaction=True) + # Reactants: + g.add_edge(intermediates[0], reaction_1) + g.add_edge(intermediates[1], reaction_1) + g.add_edge("empty", reaction_1) + # Products: + g.add_edge(reaction_1, intermediates[1]) + g.add_edge(reaction_1, intermediates[2]) + g.add_edge(reaction_1, food) + + # Reaction cycle x, reaction 2 + reaction_2 = "{}2{}".format(reaction_prefix, product) + g.add_node(reaction_2, reaction=True) + # Reactants: + g.add_edge(intermediates[1], reaction_2) + g.add_edge(intermediates[2], reaction_2) + g.add_edge("energy", reaction_2) + # Products: + g.add_edge(reaction_2, intermediates[2]) + g.add_edge(reaction_2, intermediates[0]) + g.add_edge(reaction_2, "energy") + + # Reaction cycle x, reaction 3 + reaction_3 = "{}3{}".format(reaction_prefix, product) + g.add_node(reaction_3, reaction=True) + # Reactants: + g.add_edge(intermediates[2], reaction_3) + g.add_edge(intermediates[0], reaction_3) + g.add_edge("empty", reaction_3) + if secondary_product is not None: + g.add_edge("empty", reaction_3) + # Products: + g.add_edge(reaction_3, intermediates[0]) + g.add_edge(reaction_3, intermediates[1]) + g.add_edge(reaction_3, product) + if secondary_product is not None: + g.add_edge(reaction_3, secondary_product) + + +def null(g: nx.MultiDiGraph, reaction_name: str, compound: str): + """Chemical state transition pattern.""" + + g.add_node(reaction_name, reaction=True) + g.add_edge(compound, reaction_name) + g.add_edge(reaction_name, compound) + + +def make_graph(): + """User defined graph construction function using networkx.""" + # Note: You can copy-paste this function into colab to visualize the graph. + g = nx.MultiDiGraph() + # First add the "empty" and "activated" nodes, which are always present. + graph_utils.add_system_nodes(g) + + cycle(g, "R", + intermediates=["ax", "bx", "cx"], + product="x", + secondary_product="iy", + food="food1") + cycle(g, "R", + intermediates=["ay", "by", "cy"], + product="y", + secondary_product="ix", + food="food2") + null(g, "Holding", "distractor") # Holding the distractor provides reward. + + # Inhibit x with a product of the y-producing cycle. + g.add_node("InhibitX", reaction=True) + # Reactants: + g.add_edge("x", "InhibitX") + g.add_edge("ix", "InhibitX") + # Products: + g.add_edge("InhibitX", "empty") + g.add_edge("InhibitX", "empty") + + # Inhibit y with a product of the x-producing cycle. + g.add_node("InhibitY", reaction=True) + # Reactants: + g.add_edge("y", "InhibitY") + g.add_edge("iy", "InhibitY") + # Products: + g.add_edge("InhibitY", "empty") + g.add_edge("InhibitY", "empty") + + # Food can be metabolized in the vesicle. + g.add_node("MetabolizeFood1", reaction=True) + # Reactants: + g.add_edge("food1", "MetabolizeFood1") + # Products: + g.add_edge("MetabolizeFood1", "empty") + + # Food can be metabolized in the vesicle. + g.add_node("MetabolizeFood2", reaction=True) + # Reactants: + g.add_edge("food2", "MetabolizeFood2") + # Products: + g.add_edge("MetabolizeFood2", "empty") + + # Food spontaneously appears from time to time. + g.add_node("SpawnFood1", reaction=True) + # Reactants: + g.add_edge("empty", "SpawnFood1") + # Products: + g.add_edge("SpawnFood1", "food1") + + # Food spontaneously appears from time to time. + g.add_node("SpawnFood2", reaction=True) + # Reactants: + g.add_edge("empty", "SpawnFood2") + # Products: + g.add_edge("SpawnFood2", "food2") + + # x and y can be combined to produce energy. + g.add_node("MetabolizeXY", reaction=True) + # Reactants: + g.add_edge("x", "MetabolizeXY") + g.add_edge("y", "MetabolizeXY") + # Products: + g.add_edge("MetabolizeXY", "energy") + g.add_edge("MetabolizeXY", "energy") + + # Energy spontaneously dissipates. + g.add_node("DissipateEnergy", reaction=True) + # Reactants: + g.add_edge("energy", "DissipateEnergy") + # Products: + g.add_edge("DissipateEnergy", "empty") + + # Prevent inhibitors from accumulating by dissipating them whenever they pair. + dissipate_when_paired(g, "DissipateIX", "ix") + dissipate_when_paired(g, "DissipateIY", "iy") + + # Properties of compounds + # Color: + g.nodes["ax"]["color"] = (153, 204, 255, 255) # blue 1 + g.nodes["bx"]["color"] = (102, 204, 255, 255) # blue 2 + g.nodes["cx"]["color"] = (51, 153, 255, 255) # blue 3 + + g.nodes["ay"]["color"] = (102, 255, 153, 255) # green 1 + g.nodes["by"]["color"] = (102, 255, 102, 255) # green 2 + g.nodes["cy"]["color"] = (0, 255, 0, 255) # green 3 + + g.nodes["x"]["color"] = (0, 51, 204, 255) # dark blue + g.nodes["y"]["color"] = (0, 51, 0, 255) # dark green + + g.nodes["food1"]["color"] = (178, 151, 0, 255) # light gold + g.nodes["food1"]["sprite"] = graph_utils.FOOD_SHAPE + + g.nodes["food2"]["color"] = (255, 215, 0, 255) # gold + g.nodes["food2"]["sprite"] = graph_utils.FOOD_SHAPE + + g.nodes["distractor"]["color"] = (75, 0, 130, 255) # indigo + g.nodes["distractor"]["sprite"] = graph_utils.FOOD_SHAPE + + g.nodes["energy"]["color"] = (255, 0, 0, 255) # red + g.nodes["energy"]["sprite"] = graph_utils.ENERGY_SHAPE + + g.nodes["ix"]["color"] = (102, 153, 153, 255) # greyish green + g.nodes["iy"]["color"] = (51, 102, 153, 255) # greyish blue + + # Reactivity: + g.nodes["ax"]["reactivity"] = "high" + g.nodes["bx"]["reactivity"] = "high" + g.nodes["cx"]["reactivity"] = "high" + + g.nodes["ay"]["reactivity"] = "high" + g.nodes["by"]["reactivity"] = "high" + g.nodes["cy"]["reactivity"] = "high" + + g.nodes["x"]["reactivity"] = "medium" + g.nodes["y"]["reactivity"] = "medium" + + g.nodes["ix"]["reactivity"] = "high" + g.nodes["iy"]["reactivity"] = "high" + + g.nodes["food1"]["reactivity"] = "medium" + g.nodes["food2"]["reactivity"] = "medium" + g.nodes["distractor"]["reactivity"] = "medium" + g.nodes["energy"]["reactivity"] = "low" + g.nodes["empty"]["reactivity"] = "background" + + # The following commented line documents how to set the query config for a + # specific compound, overriding the default query configuration. + # g.nodes["food1"]["query_config"] = {"radius": 3, "queryType": "diamond"} + + return g + +ASCII_MAP = """ +~~~~~~~~~~~a~~~~~~~~~~~~~ +~~x~~~~~c~~~~~~~~~~~~~~~~ +~~~~~~~~~~~b~~~~~~~x~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~1~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~ +1~~3~~~~hhhhhhh~~~~~3~~2~ +~~~~~~~~~~~~~~~~~~~~~~~~~ +~2~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~c~~~~~~~~~~~~~~~~~ +~~x~~~~~~a~~~~~~~~~~~x~~~ +~~~~~~~b~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~ +""" + +# `prefab` determines which compound to use for each `char` in the ascii map. +CHAR_PREFAB_MAP = { + "~": "empty", + "a": "ax", + "b": "bx", + "c": "cx", + "1": "ay", + "2": "by", + "3": "cy", + "4": "az", + "5": "bz", + "6": "cz", + "x": "distractor", + "h": "energy", +} + +# PLAYER_COLOR_PALETTES is a list with each entry specifying the color to use +# for the player at the corresponding index. +NUM_PLAYERS_UPPER_BOUND = 60 +PLAYER_COLOR_PALETTES = [] +for i in range(NUM_PLAYERS_UPPER_BOUND): + PLAYER_COLOR_PALETTES.append(shapes.get_palette(colors.palette[i])) + +# Primitive action components. +# pylint: disable=bad-whitespace +# pyformat: disable +NOOP = {"move": 0, "turn": 0, "ioAction": 0} +FORWARD = {"move": 1, "turn": 0, "ioAction": 0} +STEP_RIGHT = {"move": 2, "turn": 0, "ioAction": 0} +BACKWARD = {"move": 3, "turn": 0, "ioAction": 0} +STEP_LEFT = {"move": 4, "turn": 0, "ioAction": 0} +TURN_LEFT = {"move": 0, "turn": -1, "ioAction": 0} +TURN_RIGHT = {"move": 0, "turn": 1, "ioAction": 0} +IO_ACTION = {"move": 0, "turn": 0, "ioAction": 1} +# pyformat: enable +# pylint: enable=bad-whitespace + +ACTION_SET = ( + NOOP, + FORWARD, + BACKWARD, + STEP_LEFT, + STEP_RIGHT, + TURN_LEFT, + TURN_RIGHT, + IO_ACTION, +) + +TARGET_SPRITE_SELF_EMPTY = { + "name": "SelfEmpty", + "shape": shapes.CYTOAVATAR_EMPTY, + "palette": shapes.CYTOAVATAR_PALETTE, + "noRotate": True, +} +TARGET_SPRITE_SELF_HOLDS_ONE = { + "name": "SelfHoldsOne", + "shape": shapes.CYTOAVATAR_HOLDING_ONE, + "palette": shapes.CYTOAVATAR_PALETTE, + "noRotate": True, +} + + +def create_avatar_objects(num_players, compounds): + """Returns list of avatar objects of length 'num_players'.""" + avatar_objects = [] + additional_game_objects = [] + for player_idx in range(0, num_players): + game_object = graph_utils.create_avatar_constant_self_view( + rewarding_reactions=DEFAULT_REWARDING_REACTIONS, + player_idx=player_idx, + target_sprite_self_empty=TARGET_SPRITE_SELF_EMPTY, + target_sprite_self_holds_one=TARGET_SPRITE_SELF_HOLDS_ONE) + avatar_objects.append(game_object) + + # Add the overlaid avatar vesicle on top of each avatar. + avatar_vesicle = graph_utils.create_vesicle( + player_idx=player_idx, + compounds=compounds, + reactivity_levels=REACTIVITY_LEVELS["vesicle"], + default_reaction_radius=DEFAULT_REACTION_CONFIG["radius"], + default_reaction_query_type=DEFAULT_REACTION_CONFIG["query_type"], + priority_mode=True) + additional_game_objects.append(avatar_vesicle) + + return avatar_objects, additional_game_objects + + +def get_config(): + """Default configuration for this substrate.""" + config = config_dict.ConfigDict() + + # Action set configuration. + config.action_set = ACTION_SET + # Observation format configuration. + config.individual_observation_names = [ + "RGB", + # For debug only. + "POSITION", + "ORIENTATION", + ] + config.global_observation_names = [ + "WORLD.RGB", + ] + + # The specs of the environment (from a single-agent perspective). + config.action_spec = specs.action(len(ACTION_SET)) + config.timestep_spec = specs.timestep({ + "RGB": specs.OBSERVATION["RGB"], + # Debug only (do not use the following observations in policies). + "POSITION": specs.OBSERVATION["POSITION"], + "ORIENTATION": specs.OBSERVATION["ORIENTATION"], + "WORLD.RGB": specs.rgb(112, 200), + }) + + # The roles assigned to each player. + config.valid_roles = frozenset({"default"}) + config.default_player_roles = ("default",) * 8 + + return config + + +def build( + roles: Sequence[str], + config: config_dict.ConfigDict, +) -> Mapping[str, Any]: + """Build this substrate.""" + del config + num_players = len(roles) + + # Must create compounds and reactions. + compounds, reactions = graph_utils.graph_semantics(make_graph()) + + cell_prefabs = {} + cell_prefabs = graph_utils.add_compounds_to_prefabs_dictionary( + cell_prefabs, compounds, REACTIVITY_LEVELS["ground"], sprites=True, + default_reaction_radius=DEFAULT_REACTION_CONFIG["radius"], + default_reaction_query_type=DEFAULT_REACTION_CONFIG["query_type"], + priority_mode=True) + + avatar_objects, additional_objects = create_avatar_objects(num_players, + compounds) + + # Build the rest of the substrate definition. + substrate_definition = dict( + levelName="grid_land", + levelDirectory="meltingpot/lua/levels", + numPlayers=num_players, + maxEpisodeLengthFrames=1000, + spriteSize=8, + topology="BOUNDED", # Choose from ["BOUNDED", "TORUS"]. + simulation={ + "map": ASCII_MAP, + "gameObjects": avatar_objects + additional_objects, + "scene": graph_utils.create_scene(reactions, + stochastic_episode_ending=True), + "prefabs": cell_prefabs, + "charPrefabMap": CHAR_PREFAB_MAP, + }, + ) + return substrate_definition diff --git a/meltingpot/python/configs/substrates/chemistry_branched_chain_reaction.py b/meltingpot/python/configs/substrates/chemistry_branched_chain_reaction.py deleted file mode 100644 index 129b6246..00000000 --- a/meltingpot/python/configs/substrates/chemistry_branched_chain_reaction.py +++ /dev/null @@ -1,317 +0,0 @@ -# Copyright 2020 DeepMind Technologies Limited. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Configuration for Chemistry: Branched Chain Reaction. - -Example video: https://youtu.be/ZhRB-_ruoH8 - -Individuals are rewarded by driving chemical reactions involving specific -molecules. They need to suitably coordinate the alternation of branches while -keeping certain elements apart that would otherwise react unfavourably, so as -not to run out of the molecules required for continuing the chain. Combining -molecules efficiently requires coordination but can also lead to exclusion of -players. - -Reactions are defined by a directed graph. Reactant nodes project into reaction -nodes, which project out to product nodes. Reactions occur stochastically when -all reactants are brought near one another. Agents can carry a single molecule -around the map with them at a time. Agents are rewarded when a specific reaction -occurs that involves the molecule they are currently carrying (as either a -reactant or a product). -""" - -import copy - -from ml_collections import config_dict -import networkx as nx -from meltingpot.python.utils.substrates import colors -from meltingpot.python.utils.substrates import game_object_utils -from meltingpot.python.utils.substrates import reaction_graph_utils as graph_utils -from meltingpot.python.utils.substrates import shapes -from meltingpot.python.utils.substrates import specs - -# Map reaction to rewards. -DEFAULT_REWARDING_REACTIONS = {"LowerX": 1, - "HigherX": 1, - "LowerY": 1, - "HigherY": 1, - "DestroyB": -1, - "DestroyD": -1} - -# Define the default reaction query configuration. It can be overridden on a per -# compount basis. -DEFAULT_REACTION_CONFIG = {"radius": 1, "query_type": "disc"} - -REACTIVITY_LEVELS = { - "ground": {"background": 0.00001, - "low": 0.88, - "medium": 0.89, - "high": 0.9}, - "stomach": {"background": 0.0, - "low": 0.88, - "medium": 0.89, - "high": 0.9}, -} - - -def make_graph(): - """User defined graph construction function using networkx.""" - # Note: You can copy-paste this function into colab to visualize the graph. - g = nx.MultiDiGraph() - # First add the "empty" and "activated" nodes, which are always present. - graph_utils.add_system_nodes(g) - - g.add_node("LowerX", reaction=True) - # Reactants: - g.add_edge("ax", "LowerX") - g.add_edge("bx", "LowerX") - # Products: - g.add_edge("LowerX", "c") - g.add_edge("LowerX", "by") - - g.add_node("LowerY", reaction=True) - # Reactants: - g.add_edge("ay", "LowerY") - g.add_edge("by", "LowerY") - # Products: - g.add_edge("LowerY", "c") - g.add_edge("LowerY", "bx") - - # Inhibit y with a product of the x-producing cycle. - g.add_node("HigherX", reaction=True) - # Reactants: - g.add_edge("c", "HigherX") - g.add_edge("dx", "HigherX") - # Products: - g.add_edge("HigherX", "ay") - g.add_edge("HigherX", "dy") - - g.add_node("HigherY", reaction=True) - g.add_edge("c", "HigherY") - g.add_edge("dy", "HigherY") - # Products: - g.add_edge("HigherY", "ax") - g.add_edge("HigherY", "dx") - - g.add_node("DestroyB", reaction=True) - g.add_edge("bx", "DestroyB") - g.add_edge("by", "DestroyB") - # Products: - g.add_edge("DestroyB", "empty") - g.add_edge("DestroyB", "empty") - - g.add_node("DestroyD", reaction=True) - g.add_edge("dx", "DestroyD") - g.add_edge("dy", "DestroyD") - # Products: - g.add_edge("DestroyD", "empty") - g.add_edge("DestroyD", "empty") - - # Properties of compounds - # Color: - g.nodes["ax"]["color"] = (153, 204, 255) # blue 1 - g.nodes["bx"]["color"] = (102, 154, 255) # blue 2 - g.nodes["dx"]["color"] = (201, 15, 255) # blue 3 = purple - - g.nodes["ay"]["color"] = (102, 255, 153) # green 1 - g.nodes["by"]["color"] = (52, 255, 102) # green 2 - g.nodes["dy"]["color"] = (0, 255, 0) # green 3 - - g.nodes["c"]["color"] = (255, 0, 0) # red - - # Reactivity: - g.nodes["ax"]["reactivity"] = "low" - g.nodes["bx"]["reactivity"] = "high" - g.nodes["dx"]["reactivity"] = "high" - - g.nodes["ay"]["reactivity"] = "low" - g.nodes["by"]["reactivity"] = "high" - g.nodes["dy"]["reactivity"] = "high" - - g.nodes["c"]["reactivity"] = "medium" - - g.nodes["empty"]["reactivity"] = "background" - - # The following commented line documents how to set the query config for a - # specific compound, overriding the default query configuration. - # g.nodes["food1"]["query_config"] = {"radius": 3, "queryType": "diamond"} - - return g - -ASCII_MAP = """ -~~~~~~~~~~~~~~~~~~~~~~~~~ -~~~~~~~~~~~b~~~~~~~~~~~~~ -~~~~~~~~~~~~~~~~~~~~~~~~~ -~~~~~~~~~~~~~~~~~~~~~~~~~ -~~d~~~~~~~~~~~~~~~~~~~~~~ -~~~~~~~~a~~~~~~1~~~~~~~~~ -~~~~~~~~~~~~~~~~~~~~~3~~~ -~~~~~~~~~~~~~~~~~~~~~~~~~ -~~~~~~~~~~~~~~~~~~~~~~~~~ -~~~~~~~~~~~2~~~~~~~~~~~~~ -~~~~~~~~~~~~~~~~~~~~~~~~~ -""" - -# `prefab` determines which compound to use for each `char` in the ascii map. -CHAR_PREFAB_MAP = { - "~": "empty", - "a": "ax", - "b": "bx", - "d": "dx", - "1": "ay", - "2": "by", - "3": "dy", - "c": "c", -} - -# PLAYER_COLOR_PALETTES is a list with each entry specifying the color to use -# for the player at the corresponding index. -NUM_PLAYERS_UPPER_BOUND = 60 -PLAYER_COLOR_PALETTES = [] -for i in range(NUM_PLAYERS_UPPER_BOUND): - PLAYER_COLOR_PALETTES.append(shapes.get_palette(colors.palette[i])) - -# Primitive action components. -# pylint: disable=bad-whitespace -# pyformat: disable -NOOP = {"move": 0, "turn": 0, "ioAction": 0} -FORWARD = {"move": 1, "turn": 0, "ioAction": 0} -STEP_RIGHT = {"move": 2, "turn": 0, "ioAction": 0} -BACKWARD = {"move": 3, "turn": 0, "ioAction": 0} -STEP_LEFT = {"move": 4, "turn": 0, "ioAction": 0} -TURN_LEFT = {"move": 0, "turn": -1, "ioAction": 0} -TURN_RIGHT = {"move": 0, "turn": 1, "ioAction": 0} -IO_ACTION = {"move": 0, "turn": 0, "ioAction": 1} -# pyformat: enable -# pylint: enable=bad-whitespace - -ACTION_SET = ( - NOOP, - FORWARD, - BACKWARD, - STEP_LEFT, - STEP_RIGHT, - TURN_LEFT, - TURN_RIGHT, - IO_ACTION, -) - -TARGET_SPRITE_SELF = { - "name": "Self", - "shape": shapes.AVATAR_DEFAULT, - "palette": shapes.get_palette((50, 100, 200)), - "noRotate": False, -} - - -def create_avatar_objects(prefabs, num_players): - """Returns list of avatar objects of length 'num_players'.""" - avatar_objects = [] - additional_game_objects = [] - for player_idx in range(0, num_players): - game_object = graph_utils.create_avatar_constant_self_view( - DEFAULT_REWARDING_REACTIONS, player_idx, TARGET_SPRITE_SELF) - avatar_objects.append(game_object) - - # Add the overlaid avatar stomach on top of each avatar. - avatar_stomach = copy.deepcopy(prefabs["avatar_stomach"]) - game_object_utils.get_first_named_component( - avatar_stomach, - "AvatarStomach")["kwargs"]["playerIndex"] = player_idx + 1 - additional_game_objects.append(avatar_stomach) - - return avatar_objects, additional_game_objects - - -def get_lab2d_settings(num_players): - """Returns the lab2d settings. - - Args: - num_players: the number of players in the game. - """ - - # Must create compounds and reactions. - compounds, reactions = graph_utils.graph_semantics(make_graph()) - - avatar_object_templates = { - "avatar": graph_utils.create_avatar(DEFAULT_REWARDING_REACTIONS), - "avatar_stomach": graph_utils.create_stomach( - compounds, - REACTIVITY_LEVELS["stomach"], - default_reaction_radius=DEFAULT_REACTION_CONFIG["radius"], - default_reaction_query_type=DEFAULT_REACTION_CONFIG["query_type"], - priority_mode=True), - } - cell_prefabs = {} - cell_prefabs = graph_utils.add_compounds_to_prefabs_dictionary( - cell_prefabs, compounds, REACTIVITY_LEVELS["ground"], sprites=True, - default_reaction_radius=DEFAULT_REACTION_CONFIG["radius"], - default_reaction_query_type=DEFAULT_REACTION_CONFIG["query_type"], - priority_mode=True) - - avatar_objects, additional_objects = create_avatar_objects( - avatar_object_templates, num_players) - - # Lua script configuration. - lab2d_settings = { - "levelName": "grid_land", - "levelDirectory": - "meltingpot/lua/levels", - "numPlayers": num_players, - "maxEpisodeLengthFrames": 1000, - "spriteSize": 8, - "topology": "BOUNDED", - "simulation": { - "map": ASCII_MAP, - "gameObjects": avatar_objects + additional_objects, - "scene": graph_utils.create_scene(reactions, - stochastic_episode_ending=True), - "prefabs": cell_prefabs, - "charPrefabMap": CHAR_PREFAB_MAP, - }, - } - - return lab2d_settings - - -def get_config(): - """Default configuration for training on the grid_land level.""" - config = config_dict.ConfigDict() - - # Basic configuration. - config.num_players = 8 - - config.lab2d_settings = get_lab2d_settings(config.num_players) - - # Action set configuration. - config.action_set = ACTION_SET - # Observation format configuration. - config.individual_observation_names = [ - "RGB", - "POSITION", - "ORIENTATION", - ] - config.global_observation_names = [ - "WORLD.RGB", - ] - - # The specs of the environment (from a single-agent perspective). - config.action_spec = specs.action(len(ACTION_SET)) - config.timestep_spec = specs.timestep({ - "RGB": specs.OBSERVATION["RGB"], - "POSITION": specs.OBSERVATION["POSITION"], - "ORIENTATION": specs.OBSERVATION["ORIENTATION"], - "WORLD.RGB": specs.rgb(88, 200), - }) - - return config diff --git a/meltingpot/python/configs/substrates/chicken_in_the_matrix.py b/meltingpot/python/configs/substrates/chicken_in_the_matrix__arena.py similarity index 80% rename from meltingpot/python/configs/substrates/chicken_in_the_matrix.py rename to meltingpot/python/configs/substrates/chicken_in_the_matrix__arena.py index 7abc8131..b4512439 100644 --- a/meltingpot/python/configs/substrates/chicken_in_the_matrix.py +++ b/meltingpot/python/configs/substrates/chicken_in_the_matrix__arena.py @@ -1,4 +1,4 @@ -# Copyright 2020 DeepMind Technologies Limited. +# Copyright 2022 DeepMind Technologies Limited. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,7 +13,7 @@ # limitations under the License. """Configuration for Chicken (hawk - dove) in the Matrix. -Example video: https://youtu.be/uhAb2busSDY +Example video: https://youtu.be/94DHJ6BVEJM See _Running with Scissors in the Matrix_ for a general description of the game dynamics. Here the payoff matrix represents the Chicken game. `K = 2` @@ -22,10 +22,11 @@ Players have the default `11 x 11` (off center) observation window. """ -import copy -from typing import Any, Dict, Iterable, Sequence, Tuple +from typing import Any, Dict, Mapping, Sequence from ml_collections import config_dict + +from meltingpot.python.configs.substrates import the_matrix from meltingpot.python.utils.substrates import colors from meltingpot.python.utils.substrates import game_object_utils from meltingpot.python.utils.substrates import shapes @@ -47,7 +48,7 @@ # The procedural generator replaces all 'a' chars in the default map with chars # representing specific resources, i.e. with either '1' or '2'. -DEFAULT_ASCII_MAP = """ +ASCII_MAP = """ WWWWWWWWWWWWWWWWWWWWWWWWW WPPPP W W PPPPW WPPPP PPPPW @@ -107,10 +108,6 @@ }, { "component": "Transform", - "kwargs": { - "position": (0, 0), - "orientation": "N" - } }, { "component": "Appearance", @@ -150,20 +147,24 @@ }, { "component": "Transform", - "kwargs": { - "position": (0, 0), - "orientation": "N" - } }, ] } +# Remove the first entry from human_readable_colors after using it for the self +# color to prevent it from being used again as another avatar color. +human_readable_colors = list(colors.human_readable) +TARGET_SPRITE_SELF = { + "name": "Self", + "shape": shapes.CUTE_AVATAR, + "palette": shapes.get_palette(human_readable_colors.pop(0)), + "noRotate": True, +} # PLAYER_COLOR_PALETTES is a list with each entry specifying the color to use # for the player at the corresponding index. -NUM_PLAYERS_UPPER_BOUND = 32 PLAYER_COLOR_PALETTES = [] -for idx in range(NUM_PLAYERS_UPPER_BOUND): - PLAYER_COLOR_PALETTES.append(shapes.get_palette(colors.palette[idx])) +for human_readable_color in human_readable_colors: + PLAYER_COLOR_PALETTES.append(shapes.get_palette(human_readable_color)) # Primitive action components. # pylint: disable=bad-whitespace @@ -190,13 +191,6 @@ INTERACT, ) -TARGET_SPRITE_SELF = { - "name": "Self", - "shape": shapes.CUTE_AVATAR, - "palette": shapes.get_palette((50, 100, 200)), - "noRotate": True, -} - def create_scene(): """Creates the global scene.""" @@ -214,15 +208,13 @@ def create_scene(): }, { "component": "Transform", - "kwargs": { - "position": (0, 0), - "orientation": "N" - }, }, { "component": "TheMatrix", "kwargs": { - "zero_initial_inventory": True, + # Prevent interaction before both interactors have collected + # at least one resource. + "disallowUnreadyInteractions": True, "matrix": [ # row player chooses a row of this matrix. # D H (conventionally D = dove and H = hawk) @@ -235,8 +227,20 @@ def create_scene(): [3, 5], # D [2, 0], # H ], + "resultIndicatorColorIntervals": [ + # red # yellow # green # blue + (0.0, 1.5), (1.5, 2.5), (2.5, 3.5), (3.5, 5.0) + ], } }, + { + "component": "StochasticIntervalEpisodeEnding", + "kwargs": { + "minimumFramesPerEpisode": 1000, + "intervalLength": 100, # Set equal to unroll length. + "probabilityTerminationPerInterval": 0.2 + } + } ] } return scene @@ -263,10 +267,6 @@ def create_resource_prefab(resource_id, color_data): }, { "component": "Transform", - "kwargs": { - "position": (0, 0), - "orientation": "N" - }, }, { "component": "Appearance", @@ -286,17 +286,17 @@ def create_resource_prefab(resource_id, color_data): "resourceClass": resource_id, "visibleType": resource_name, "waitState": resource_name + "_wait", - "groupToRespawn": "resourceWaits", - "regenerationRate": 0.005, - "regenerationDelay": 50 + "regenerationRate": 0.04, + "regenerationDelay": 10, }, }, { "component": "Destroyable", "kwargs": { - "visibleType": resource_name, "waitState": resource_name + "_wait", - "initialHealth": 1, + # It is possible to destroy resources but takes concerted + # effort to do so by zapping them `initialHealth` times. + "initialHealth": 3, }, }, ] @@ -351,10 +351,6 @@ def create_avatar_object(player_idx: int, }, { "component": "Transform", - "kwargs": { - "position": (0, 0), - "orientation": "N" - } }, { "component": "Appearance", @@ -362,7 +358,8 @@ def create_avatar_object(player_idx: int, "renderMode": "ascii_shape", "spriteNames": [source_sprite_self], "spriteShapes": [shapes.CUTE_AVATAR], - "palettes": [shapes.get_palette(colors.palette[player_idx])], + "palettes": [shapes.get_palette( + human_readable_colors[player_idx])], "noRotates": [True] } }, @@ -408,14 +405,22 @@ def create_avatar_object(player_idx: int, { "component": "GameInteractionZapper", "kwargs": { - "cooldownTime": 32, + "cooldownTime": 2, "beamLength": 3, "beamRadius": 1, - "framesTillRespawn": 200, + "framesTillRespawn": 50, "numResources": NUM_RESOURCES, - "reset_winner_inventory": False, + "endEpisodeOnFirstInteraction": False, + # Reset both players' inventories after each interaction. + "reset_winner_inventory": True, "reset_loser_inventory": True, + # Both players get removed after each interaction. "losingPlayerDies": True, + "winningPlayerDies": True, + # `freezeOnInteraction` is the number of frames to display the + # interaction result indicator, freeze, and delay delivering + # all results of interacting. + "freezeOnInteraction": 16, } }, { @@ -433,13 +438,15 @@ def create_avatar_object(player_idx: int, "component": "Taste", "kwargs": { "mostTastyResourceClass": -1, # -1 indicates no preference. + # No resource is most tasty when mostTastyResourceClass == -1. + "mostTastyReward": 0.1, } }, { "component": "InteractionTaste", "kwargs": { "mostTastyResourceClass": -1, # -1 indicates no preference. - "zeroDefaultInteractionReward": True, + "zeroDefaultInteractionReward": False, "extraReward": 1.0, } }, @@ -464,6 +471,7 @@ def create_avatar_object(player_idx: int, "component": "GameInteractionZapper", "variable": "latest_interaction_inventories", }, + *the_matrix.get_cumulant_metric_configs(NUM_RESOURCES), ] } }, @@ -483,46 +491,15 @@ def create_avatar_objects(num_players: int) -> Sequence[PrefabConfig]: for player_idx in range(num_players): avatar = create_avatar_object(player_idx, TARGET_SPRITE_SELF) avatar_objects.append(avatar) + readiness_marker = the_matrix.create_ready_to_interact_marker(player_idx) + avatar_objects.append(readiness_marker) return avatar_objects -def create_lab2d_settings( - num_players: int, - ascii_map_string: str, - settings_overrides: Iterable[Tuple[str, Any]] = ()) -> Dict[str, Any]: - """Returns the lab2d settings. - - Args: - num_players: (int) the number of players. - ascii_map_string: ascii map. - settings_overrides: (key, value) overrides for default settings. - """ - settings = { - "levelName": "the_matrix", - "levelDirectory": "meltingpot/lua/levels", - "numPlayers": num_players, - "maxEpisodeLengthFrames": 1000, - "spriteSize": 8, - "simulation": { - "map": ascii_map_string, - "gameObjects": create_avatar_objects(num_players=num_players), - "scene": copy.deepcopy(create_scene()), - "prefabs": create_prefabs(), - "charPrefabMap": CHAR_PREFAB_MAP, - } - } - settings.update(settings_overrides) - return settings - - -def get_config(factory=create_lab2d_settings): - """Default config for chicken in the matrix.""" +def get_config(): + """Default configuration.""" config = config_dict.ConfigDict() - # Basic configuration. - config.num_players = 8 - config.lab2d_settings = factory(config.num_players, DEFAULT_ASCII_MAP) - # Action set configuration. config.action_set = ACTION_SET # Observation format configuration. @@ -530,6 +507,7 @@ def get_config(factory=create_lab2d_settings): "RGB", "INVENTORY", "READY_TO_SHOOT", + # Debug only (do not use the following observations in policies). "POSITION", "ORIENTATION", "INTERACTION_INVENTORIES", @@ -544,10 +522,42 @@ def get_config(factory=create_lab2d_settings): "RGB": specs.OBSERVATION["RGB"], "INVENTORY": specs.inventory(2), "READY_TO_SHOOT": specs.OBSERVATION["READY_TO_SHOOT"], + # Debug only (do not use the following observations in policies). "POSITION": specs.OBSERVATION["POSITION"], "ORIENTATION": specs.OBSERVATION["ORIENTATION"], "INTERACTION_INVENTORIES": specs.interaction_inventories(2), "WORLD.RGB": specs.rgb(192, 200), }) + # The roles assigned to each player. + config.valid_roles = frozenset({"default"}) + config.default_player_roles = ("default",) * 8 + return config + + +def build( + roles: Sequence[str], + config: config_dict.ConfigDict, +) -> Mapping[str, Any]: + """Build substrate definition given roles.""" + del config + num_players = len(roles) + # Build the rest of the substrate definition. + substrate_definition = dict( + levelName="the_matrix", + levelDirectory="meltingpot/lua/levels", + numPlayers=num_players, + # Define upper bound of episode length since episodes end stochastically. + maxEpisodeLengthFrames=5000, + spriteSize=8, + topology="BOUNDED", # Choose from ["BOUNDED", "TORUS"], + simulation={ + "map": ASCII_MAP, + "gameObjects": create_avatar_objects(num_players=num_players), + "scene": create_scene(), + "prefabs": create_prefabs(), + "charPrefabMap": CHAR_PREFAB_MAP, + } + ) + return substrate_definition diff --git a/meltingpot/python/configs/substrates/chicken_in_the_matrix__repeated.py b/meltingpot/python/configs/substrates/chicken_in_the_matrix__repeated.py new file mode 100644 index 00000000..d23653f8 --- /dev/null +++ b/meltingpot/python/configs/substrates/chicken_in_the_matrix__repeated.py @@ -0,0 +1,605 @@ +# Copyright 2022 DeepMind Technologies Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Configuration for Chicken in the Matrix (two player, repeated version). + +Example video: https://youtu.be/bFwV-udmRb4 + +See _Running with Scissors in the Matrix_ for a general description of the +game dynamics. Here the payoff matrix represents the Chicken game. `K = 2` +resources represent "hawk" and "dove" pure strategies. + +Players have a `5 x 5` observation window. + +The episode has a chance of ending stochastically on every 100 step interval +after step 1000. This usually allows time for 8 or more interactions. +""" + +from typing import Any, Dict, Mapping, Sequence, Tuple + +from ml_collections import config_dict + +from meltingpot.python.configs.substrates import the_matrix +from meltingpot.python.utils.substrates import colors +from meltingpot.python.utils.substrates import shapes +from meltingpot.python.utils.substrates import specs + +# The number of resources must match the (square) size of the matrix. +NUM_RESOURCES = 2 + +# This color is green. +RESOURCE1_COLOR = (30, 225, 185, 255) +RESOURCE1_HIGHLIGHT_COLOR = (98, 234, 206, 255) +RESOURCE1_COLOR_DATA = (RESOURCE1_COLOR, RESOURCE1_HIGHLIGHT_COLOR) +# This color is red. +RESOURCE2_COLOR = (225, 30, 70, 255) +RESOURCE2_HIGHLIGHT_COLOR = (234, 98, 126, 255) +RESOURCE2_COLOR_DATA = (RESOURCE2_COLOR, RESOURCE2_HIGHLIGHT_COLOR) + +ASCII_MAP = """ +WWWWWWWWWWWWWWWWWWWWWWW +Wn n nW +W WWW W W W WW W +W W 11a W a22 W W +Wn WW 11a W a22 WW nW +W 11a a22 W +W W +Wn WW WW n WW WWW nW +W W +W 22a W a11 W +Wn W 22a W a11 W nW +W W 22a W a11 WW W +W WWWW W W W WWW W +Wn n nW +WWWWWWWWWWWWWWWWWWWWWWW +""" + +_resource_names = [ + "resource_class1", # Dove + "resource_class2", # Hawk +] + +# `prefab` determines which prefab game object to use for each `char` in the +# ascii map. +CHAR_PREFAB_MAP = { + "a": {"type": "choice", "list": _resource_names}, + "1": _resource_names[0], + "2": _resource_names[1], + "n": "spawn_point", + "W": "wall", +} + +_COMPASS = ["N", "E", "S", "W"] + +WALL = { + "name": "wall", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "wall", + "stateConfigs": [{ + "state": "wall", + "layer": "upperPhysical", + "sprite": "Wall", + }], + } + }, + { + "component": "Transform", + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["Wall"], + "spriteShapes": [shapes.WALL], + "palettes": [{"*": (95, 95, 95, 255), + "&": (100, 100, 100, 255), + "@": (109, 109, 109, 255), + "#": (152, 152, 152, 255)}], + "noRotates": [False] + } + }, + { + "component": "BeamBlocker", + "kwargs": { + "beamType": "gameInteraction" + } + }, + ] +} + +SPAWN_POINT = { + "name": "spawnPoint", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "spawnPoint", + "stateConfigs": [{ + "state": "spawnPoint", + "layer": "alternateLogic", + "groups": ["spawnPoints"] + }], + } + }, + { + "component": "Transform", + }, + ] +} + +# PLAYER_COLOR_PALETTES is a list with each entry specifying the color to use +# for the player at the corresponding index. +NUM_PLAYERS_UPPER_BOUND = 8 +PLAYER_COLOR_PALETTES = [] +for idx in range(NUM_PLAYERS_UPPER_BOUND): + PLAYER_COLOR_PALETTES.append(shapes.get_palette(colors.palette[idx])) + +# Primitive action components. +# pylint: disable=bad-whitespace +# pyformat: disable +NOOP = {"move": 0, "turn": 0, "interact": 0} +FORWARD = {"move": 1, "turn": 0, "interact": 0} +STEP_RIGHT = {"move": 2, "turn": 0, "interact": 0} +BACKWARD = {"move": 3, "turn": 0, "interact": 0} +STEP_LEFT = {"move": 4, "turn": 0, "interact": 0} +TURN_LEFT = {"move": 0, "turn": -1, "interact": 0} +TURN_RIGHT = {"move": 0, "turn": 1, "interact": 0} +INTERACT = {"move": 0, "turn": 0, "interact": 1} +# pyformat: enable +# pylint: enable=bad-whitespace + +ACTION_SET = ( + NOOP, + FORWARD, + BACKWARD, + STEP_LEFT, + STEP_RIGHT, + TURN_LEFT, + TURN_RIGHT, + INTERACT, +) + +TARGET_SPRITE_SELF = { + "name": "Self", + "shape": shapes.CUTE_AVATAR, + "palette": shapes.get_palette((50, 100, 200)), + "noRotate": True, +} + +TARGET_SPRITE_OTHER = { + "name": "Other", + "shape": shapes.CUTE_AVATAR, + "palette": shapes.get_palette((200, 100, 50)), + "noRotate": True, +} + + +def create_scene(): + """Creates the global scene.""" + scene = { + "name": "scene", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "scene", + "stateConfigs": [{ + "state": "scene", + }], + } + }, + { + "component": "Transform", + }, + { + "component": "TheMatrix", + "kwargs": { + # Prevent interaction before both interactors have collected + # at least one resource. + "disallowUnreadyInteractions": True, + "matrix": [ + # row player chooses a row of this matrix. + # D H (conventionally D = dove and H = hawk) + [3, 2], # D + [5, 0], # H + ], + "columnPlayerMatrix": [ + # column player chooses a column of this matrix. + # D H (conventionally D = dove and H = hawk) + [3, 5], # D + [2, 0], # H + ], + "resultIndicatorColorIntervals": [ + # red # yellow # green # blue + (0.0, 1.5), (1.5, 2.5), (2.5, 3.5), (3.5, 5.0) + ], + } + }, + { + "component": "StochasticIntervalEpisodeEnding", + "kwargs": { + "minimumFramesPerEpisode": 1000, + "intervalLength": 100, # Set equal to unroll length. + "probabilityTerminationPerInterval": 0.1 + } + } + ] + } + return scene + + +def create_resource_prefab( + resource_id: int, + resource_shape: str, + resource_palette: Dict[str, Tuple[int, int, int, int]]): + """Creates resource prefab with provided resource_id, shape, and palette.""" + resource_name = "resource_class{}".format(resource_id) + resource_prefab = { + "name": resource_name, + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": resource_name, + "stateConfigs": [ + {"state": resource_name + "_wait", + "groups": ["resourceWaits"]}, + {"state": resource_name, + "layer": "lowerPhysical", + "sprite": resource_name + "_sprite"}, + ] + }, + }, + { + "component": "Transform", + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": [resource_name + "_sprite"], + "spriteShapes": [resource_shape], + "palettes": [resource_palette], + "noRotates": [True] + }, + }, + { + "component": "Resource", + "kwargs": { + "resourceClass": resource_id, + "visibleType": resource_name, + "waitState": resource_name + "_wait", + "regenerationRate": 0.02, + "regenerationDelay": 10, + }, + }, + { + "component": "Destroyable", + "kwargs": { + "waitState": resource_name + "_wait", + # It is possible to destroy resources but takes concerted + # effort to do so by zapping them `initialHealth` times. + "initialHealth": 3, + }, + }, + ] + } + return resource_prefab + + +def create_avatar_object( + player_idx: int, + all_source_sprite_names: Sequence[str], + target_sprite_self: Dict[str, Any], + target_sprite_other: Dict[str, Any], + turn_off_default_reward: bool = False) -> Dict[str, Any]: + """Create an avatar object given self vs other sprite data.""" + # Lua is 1-indexed. + lua_index = player_idx + 1 + + # Setup the self vs other sprite mapping. + source_sprite_self = "Avatar" + str(lua_index) + custom_sprite_map = {source_sprite_self: target_sprite_self["name"]} + for name in all_source_sprite_names: + if name != source_sprite_self: + custom_sprite_map[name] = target_sprite_other["name"] + + live_state_name = "player{}".format(lua_index) + avatar_object = { + "name": "avatar", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": live_state_name, + "stateConfigs": [ + {"state": live_state_name, + "layer": "upperPhysical", + "sprite": source_sprite_self, + "contact": "avatar", + "groups": ["players"]}, + + {"state": "playerWait", + "groups": ["playerWaits"]}, + ] + } + }, + { + "component": "Transform", + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "colored_square", + "spriteNames": [source_sprite_self], + # A white square should never be displayed. It will always be + # remapped since this is self vs other observation mode. + "spriteRGBColors": [(255, 255, 255, 255)], + } + }, + { + "component": "AdditionalSprites", + "kwargs": { + "renderMode": "ascii_shape", + "customSpriteNames": [target_sprite_self["name"], + target_sprite_other["name"]], + "customSpriteShapes": [target_sprite_self["shape"], + target_sprite_other["shape"]], + "customPalettes": [target_sprite_self["palette"], + target_sprite_other["palette"]], + "customNoRotates": [target_sprite_self["noRotate"], + target_sprite_other["noRotate"]], + } + }, + { + "component": "Avatar", + "kwargs": { + "index": lua_index, + "aliveState": live_state_name, + "waitState": "playerWait", + "speed": 1.0, + "spawnGroup": "spawnPoints", + "actionOrder": ["move", "turn", "interact"], + "actionSpec": { + "move": {"default": 0, "min": 0, "max": len(_COMPASS)}, + "turn": {"default": 0, "min": -1, "max": 1}, + "interact": {"default": 0, "min": 0, "max": 1}, + }, + "view": { + "left": 2, + "right": 2, + "forward": 3, + "backward": 1, + "centered": False + }, + "spriteMap": custom_sprite_map, + # The following kwarg makes it possible to get rewarded even + # on frames when an avatar is "dead". It is needed for in the + # matrix games in order to correctly handle the case of two + # players getting hit simultaneously by the same beam. + "skipWaitStateRewards": False, + } + }, + { + "component": "GameInteractionZapper", + "kwargs": { + "cooldownTime": 2, + "beamLength": 3, + "beamRadius": 1, + "framesTillRespawn": 5, + "numResources": NUM_RESOURCES, + "endEpisodeOnFirstInteraction": False, + # Reset both players' inventories after each interaction. + "reset_winner_inventory": True, + "reset_loser_inventory": True, + # Both players get removed after each interaction. + "losingPlayerDies": True, + "winningPlayerDies": True, + # `freezeOnInteraction` is the number of frames to display the + # interaction result indicator, freeze, and delay delivering + # all results of interacting. + "freezeOnInteraction": 16, + } + }, + { + "component": "ReadyToShootObservation", + "kwargs": { + "zapperComponent": "GameInteractionZapper", + } + }, + { + "component": "InventoryObserver", + "kwargs": { + } + }, + { + "component": "SpawnResourcesWhenAllPlayersZapped", + }, + { + "component": "Taste", + "kwargs": { + "mostTastyResourceClass": -1, # -1 indicates no preference. + # No resource is most tasty when mostTastyResourceClass == -1. + "mostTastyReward": 0.1, + } + }, + { + "component": "InteractionTaste", + "kwargs": { + "mostTastyResourceClass": -1, # -1 indicates no preference. + "zeroDefaultInteractionReward": turn_off_default_reward, + "extraReward": 1.0, + } + }, + { + "component": "LocationObserver", + "kwargs": { + "objectIsAvatar": True, + "alsoReportOrientation": True + } + }, + { + "component": "AvatarMetricReporter", + "kwargs": { + "metrics": [ + { + # Report the inventories of both players involved in + # an interaction on this frame formatted as + # (self inventory, partner inventory). + "name": "INTERACTION_INVENTORIES", + "type": "tensor.DoubleTensor", + "shape": (2, NUM_RESOURCES), + "component": "GameInteractionZapper", + "variable": "latest_interaction_inventories", + }, + *the_matrix.get_cumulant_metric_configs(NUM_RESOURCES), + ] + } + }, + ] + } + return avatar_object + + +def create_prefabs(): + """Returns a dictionary mapping names to template game objects.""" + prefabs = { + "wall": WALL, + "spawn_point": SPAWN_POINT, + } + prefabs["resource_class1"] = create_resource_prefab( + 1, shapes.BUTTON, {"*": RESOURCE1_COLOR_DATA[0], + "#": RESOURCE1_COLOR_DATA[1], + "x": (0, 0, 0, 0)}) + prefabs["resource_class2"] = create_resource_prefab( + 2, shapes.BUTTON, {"*": RESOURCE2_COLOR_DATA[0], + "#": RESOURCE2_COLOR_DATA[1], + "x": (0, 0, 0, 0)}) + return prefabs + + +def get_all_source_sprite_names(num_players): + all_source_sprite_names = [] + for player_idx in range(0, num_players): + # Lua is 1-indexed. + lua_index = player_idx + 1 + all_source_sprite_names.append("Avatar" + str(lua_index)) + + return all_source_sprite_names + + +def create_avatar_objects(num_players, + turn_off_default_reward: bool = False): + """Returns list of avatar objects of length 'num_players'.""" + all_source_sprite_names = get_all_source_sprite_names(num_players) + avatar_objects = [] + for player_idx in range(0, num_players): + game_object = create_avatar_object( + player_idx, + all_source_sprite_names, + TARGET_SPRITE_SELF, + TARGET_SPRITE_OTHER, + turn_off_default_reward=turn_off_default_reward) + readiness_marker = the_matrix.create_ready_to_interact_marker(player_idx) + avatar_objects.append(game_object) + avatar_objects.append(readiness_marker) + + return avatar_objects + + +def create_world_sprite_map( + num_players: int, target_sprite_other: Dict[str, Any]) -> Dict[str, str]: + all_source_sprite_names = get_all_source_sprite_names(num_players) + world_sprite_map = {} + for name in all_source_sprite_names: + world_sprite_map[name] = target_sprite_other["name"] + + return world_sprite_map + + +def get_config(): + """Default configuration.""" + config = config_dict.ConfigDict() + + # Other parameters that are useful to override in training config files. + config.turn_off_default_reward = False + + # Action set configuration. + config.action_set = ACTION_SET + # Observation format configuration. + config.individual_observation_names = [ + "RGB", + "INVENTORY", + "READY_TO_SHOOT", + # Debug only (do not use the following observations in policies). + "POSITION", + "ORIENTATION", + "INTERACTION_INVENTORIES", + ] + config.global_observation_names = [ + "WORLD.RGB", + ] + + # The specs of the environment (from a single-agent perspective). + config.action_spec = specs.action(len(ACTION_SET)) + config.timestep_spec = specs.timestep({ + "RGB": specs.rgb(40, 40), + "INVENTORY": specs.inventory(2), + "READY_TO_SHOOT": specs.OBSERVATION["READY_TO_SHOOT"], + # Debug only (do not use the following observations in policies). + "POSITION": specs.OBSERVATION["POSITION"], + "ORIENTATION": specs.OBSERVATION["ORIENTATION"], + "INTERACTION_INVENTORIES": specs.interaction_inventories(2), + "WORLD.RGB": specs.rgb(120, 184), + }) + + # The roles assigned to each player. + config.valid_roles = frozenset({"default"}) + config.default_player_roles = ("default",) * 2 + + return config + + +def build( + roles: Sequence[str], + config: config_dict.ConfigDict, +) -> Mapping[str, Any]: + """Build substrate definition given roles.""" + del config + num_players = len(roles) + # Build the rest of the substrate definition. + substrate_definition = dict( + levelName="the_matrix", + levelDirectory="meltingpot/lua/levels", + numPlayers=num_players, + # Define upper bound of episode length since episodes end stochastically. + maxEpisodeLengthFrames=5000, + spriteSize=8, + topology="BOUNDED", # Choose from ["BOUNDED", "TORUS"], + simulation={ + "map": ASCII_MAP, + "gameObjects": create_avatar_objects(num_players=num_players), + "scene": create_scene(), + "prefabs": create_prefabs(), + "charPrefabMap": CHAR_PREFAB_MAP, + # worldSpriteMap is needed to make the global view used in videos be + # be informative in cases where individual avatar views have had + # sprites remapped to one another (example: self vs other mode). + "worldSpriteMap": create_world_sprite_map(num_players, + TARGET_SPRITE_OTHER), + } + ) + return substrate_definition diff --git a/meltingpot/python/configs/substrates/clean_up.py b/meltingpot/python/configs/substrates/clean_up.py index 7d494706..acdac664 100644 --- a/meltingpot/python/configs/substrates/clean_up.py +++ b/meltingpot/python/configs/substrates/clean_up.py @@ -1,4 +1,4 @@ -# Copyright 2020 DeepMind Technologies Limited. +# Copyright 2022 DeepMind Technologies Limited. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,7 +13,7 @@ # limitations under the License. """Configuration for Clean Up. -Example video: https://youtu.be/jOeIunFtTS0 +Example video: https://youtu.be/TqiJYxOwdxw Clean Up is a seven player game. Players are rewarded for collecting apples. In Clean Up, apples grow in an orchard at a rate inversely related to the @@ -39,9 +39,10 @@ Processing Systems (pp. 3330-3340). """ -from typing import Any, Dict +from typing import Any, Dict, Mapping, Sequence from ml_collections import config_dict + from meltingpot.python.utils.substrates import colors from meltingpot.python.utils.substrates import game_object_utils from meltingpot.python.utils.substrates import shapes @@ -57,17 +58,17 @@ WHFFHFFHHFHFHFHFHFHFHHFHFFFHFW WHFHFHFFHFHFHFHFHFHFHHFHFFFHFW WHFFFFFFHFHFHFHFHFHFHHFHFFFHFW -W HFHHHHHH W -W P P SSS W -W P P P SS P W -W P PPSS W -W P P SS P W -W P SS P W -W P P SS W -W P SS P W -W P P PSS W -W B B B B B B B B B SSB B B BW -WBBBBBBBBBBBBBBBBBBBBBBBBBBBBW +W==============+~FHHHHHHf====W +W P P ===+~SSf W +W P P P <~Sf P W +W P P<~S> W +W P P <~S> P W +W P <~S>P W +W P P<~S> W +W P <~S> P W +W P P <~S> W +W^T^T^T^T^T^T^T^T^T;~S,^T^T^TW +WBBBBBBBBBBBBBBBBBBBssBBBBBBBW WBBBBBBBBBBBBBBBBBBBBBBBBBBBBW WBBBBBBBBBBBBBBBBBBBBBBBBBBBBW WBBBBBBBBBBBBBBBBBBBBBBBBBBBBW @@ -77,8 +78,20 @@ # Map a character to the prefab it represents in the ASCII map. CHAR_PREFAB_MAP = { "W": "wall", - "P": "spawn_point", - "B": "potential_apple", + " ": "sand", + "P": {"type": "all", "list": ["sand", "spawn_point"]}, + "B": {"type": "all", "list": ["grass", "potential_apple"]}, + "s": {"type": "all", "list": ["grass", "shadow_n"]}, + "+": {"type": "all", "list": ["sand", "shadow_e", "shadow_n"]}, + "f": {"type": "all", "list": ["sand", "shadow_w", "shadow_n"]}, + ";": {"type": "all", "list": ["sand", "grass_edge", "shadow_e"]}, + ",": {"type": "all", "list": ["sand", "grass_edge", "shadow_w"]}, + "^": {"type": "all", "list": ["sand", "grass_edge",]}, + "=": {"type": "all", "list": ["sand", "shadow_n",]}, + ">": {"type": "all", "list": ["sand", "shadow_w",]}, + "<": {"type": "all", "list": ["sand", "shadow_e",]}, + "~": {"type": "all", "list": ["river", "shadow_w",]}, + "T": {"type": "all", "list": ["sand", "grass_edge", "potential_apple"]}, "S": "river", "H": {"type": "all", "list": ["river", "potential_dirt"]}, "F": {"type": "all", "list": ["river", "actual_dirt"]}, @@ -86,6 +99,191 @@ _COMPASS = ["N", "E", "S", "W"] +SAND = { + "name": "sand", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "sand", + "stateConfigs": [{ + "state": "sand", + "layer": "background", + "sprite": "Sand", + }], + } + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["Sand"], + "spriteShapes": [shapes.GRAINY_FLOOR], + "palettes": [{"+": (222, 221, 189, 255), + "*": (219, 218, 186, 255)}], + "noRotates": [False] + } + }, + { + "component": "Transform", + }, + ] +} + +GRASS = { + "name": "grass", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "grass", + "stateConfigs": [{ + "state": "grass", + "layer": "background", + "sprite": "Grass", + }], + } + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["Grass"], + "spriteShapes": [shapes.GRASS_STRAIGHT], + "palettes": [{"*": (164, 189, 75, 255), + "@": (182, 207, 95, 255), + "x": (0, 0, 0, 0)}], + "noRotates": [False] + } + }, + { + "component": "Transform", + }, + ] +} + +GRASS_EDGE = { + "name": "grass_edge", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "grass_edge", + "stateConfigs": [{ + "state": "grass_edge", + "layer": "lowerPhysical", + "sprite": "GrassEdge", + }], + } + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["GrassEdge"], + "spriteShapes": [shapes.GRASS_STRAIGHT_N_EDGE], + "palettes": [{"*": (164, 189, 75, 255), + "@": (182, 207, 95, 255), + "x": (0, 0, 0, 0)}], + "noRotates": [False] + } + }, + { + "component": "Transform", + }, + ] +} + +SHADOW_W = { + "name": "shadow_w", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "shadow_w", + "stateConfigs": [{ + "state": "shadow_w", + "layer": "upperPhysical", + "sprite": "ShadowW", + }], + } + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["ShadowW"], + "spriteShapes": [shapes.SHADOW_W], + "palettes": [shapes.SHADOW_PALETTE], + "noRotates": [False] + } + }, + { + "component": "Transform", + }, + ] +} + +SHADOW_E = { + "name": "shadow_e", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "shadow_e", + "stateConfigs": [{ + "state": "shadow_e", + "layer": "upperPhysical", + "sprite": "ShadowE", + }], + } + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["ShadowE"], + "spriteShapes": [shapes.SHADOW_E], + "palettes": [shapes.SHADOW_PALETTE], + "noRotates": [False] + } + }, + { + "component": "Transform", + }, + ] +} + +SHADOW_N = { + "name": "shadow_n", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "shadow_n", + "stateConfigs": [{ + "state": "shadow_n", + "layer": "overlay", + "sprite": "ShadowN", + }], + } + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["ShadowN"], + "spriteShapes": [shapes.SHADOW_N], + "palettes": [shapes.SHADOW_PALETTE], + "noRotates": [False] + } + }, + { + "component": "Transform", + }, + ] +} + WALL = { "name": "wall", "components": [ @@ -95,7 +293,7 @@ "initialState": "wall", "stateConfigs": [{ "state": "wall", - "layer": "upperPhysical", + "layer": "superOverlay", "sprite": "Wall", }], } @@ -147,10 +345,6 @@ }, { "component": "Transform", - "kwargs": { - "position": (0, 0), - "orientation": "N" - } }, ] } @@ -166,7 +360,7 @@ { "state": "apple", "sprite": "Apple", - "layer": "lowerPhysical", + "layer": "upperPhysical", }, { "state": "appleWait" @@ -175,23 +369,20 @@ }, { "component": "Transform", - "kwargs": { - "position": (0, 0), - "orientation": "N" - } }, { "component": "Appearance", "kwargs": { "renderMode": "ascii_shape", "spriteNames": ["Apple"], - "spriteShapes": [shapes.LEGACY_APPLE], - "palettes": [{"*": (102, 255, 0, 255), - "@": (230, 255, 0, 255), - "&": (117, 255, 26, 255), - "#": (255, 153, 0, 255), - "x": (0, 0, 0, 0)}], - "noRotates": [False] + "spriteShapes": [shapes.APPLE], + "palettes": [{ + "x": (0, 0, 0, 0), + "*": (212, 80, 57, 255), + "#": (173, 66, 47, 255), + "o": (43, 127, 53, 255), + "|": (79, 47, 44, 255)}], + "noRotates": [True] } }, { @@ -230,7 +421,7 @@ def create_dirt_prefab(initial_state): }, { "state": "dirt", - "layer": "lowerPhysical", + "layer": "upperPhysical", "sprite": "Dirt", }, ], @@ -238,10 +429,6 @@ def create_dirt_prefab(initial_state): }, { "component": "Transform", - "kwargs": { - "position": (0, 0), - "orientation": "N" - } }, { "component": "Appearance", @@ -249,7 +436,7 @@ def create_dirt_prefab(initial_state): "spriteNames": ["Dirt"], # This color is greenish, and quite transparent to expose the # animated water below. - "spriteRGBColors": [(2, 230, 80, 50)], + "spriteRGBColors": [(2, 245, 80, 50)], } }, { @@ -294,10 +481,13 @@ def create_dirt_prefab(initial_state): FIRE_CLEAN ) +# Remove the first entry from human_readable_colors after using it for the self +# color to prevent it from being used again as another avatar color. +human_readable_colors = list(colors.human_readable) TARGET_SPRITE_SELF = { "name": "Self", "shape": shapes.CUTE_AVATAR, - "palette": shapes.get_palette((50, 100, 200)), + "palette": shapes.get_palette(human_readable_colors.pop(0)), "noRotate": True, } @@ -340,7 +530,11 @@ def get_water(): "spriteNames": ["water_1", "water_2", "water_3", "water_4"], "spriteShapes": [shapes.WATER_1, shapes.WATER_2, shapes.WATER_3, shapes.WATER_4], - "palettes": [shapes.WATER_PALETTE] * 4, + "palettes": [{ + "@": (66, 173, 212, 255), + "*": (35, 133, 168, 255), + "o": (34, 129, 163, 255), + "~": (33, 125, 158, 255),}] * 4, } }, { @@ -366,6 +560,12 @@ def create_prefabs() -> PrefabConfig: """ prefabs = { "wall": WALL, + "sand": SAND, + "grass": GRASS, + "grass_edge": GRASS_EDGE, + "shadow_w": SHADOW_W, + "shadow_e": SHADOW_E, + "shadow_n": SHADOW_N, "spawn_point": SPAWN_POINT, "potential_apple": POTENTIAL_APPLE, "river": get_water(), @@ -391,10 +591,6 @@ def create_scene(): }, { "component": "Transform", - "kwargs": { - "position": (0, 0), - "orientation": "N" - }, }, { "component": "RiverMonitor", @@ -407,6 +603,17 @@ def create_scene(): "delayStartOfDirtSpawning": 50, }, }, + { + "component": "StochasticIntervalEpisodeEnding", + "kwargs": { + "minimumFramesPerEpisode": 1000, + "intervalLength": 100, # Set equal to unroll length. + "probabilityTerminationPerInterval": 0.2 + } + }, + { + "component": "GlobalData", + }, ] } return scene @@ -424,7 +631,7 @@ def create_avatar_object(player_idx: int, live_state_name = "player{}".format(lua_index) avatar_object = { - "name": "avatar", + "name": f"avatar{lua_index}", "components": [ { "component": "StateManager", @@ -433,7 +640,7 @@ def create_avatar_object(player_idx: int, "stateConfigs": [ # Initial player state. {"state": live_state_name, - "layer": "upperPhysical", + "layer": "superOverlay", "sprite": source_sprite_self, "contact": "avatar", "groups": ["players"]}, @@ -446,10 +653,6 @@ def create_avatar_object(player_idx: int, }, { "component": "Transform", - "kwargs": { - "position": (0, 0), - "orientation": "N" - } }, { "component": "Appearance", @@ -457,7 +660,8 @@ def create_avatar_object(player_idx: int, "renderMode": "ascii_shape", "spriteNames": [source_sprite_self], "spriteShapes": [shapes.CUTE_AVATAR], - "palettes": [shapes.get_palette(colors.palette[player_idx])], + "palettes": [shapes.get_palette( + human_readable_colors[player_idx])], "noRotates": [True] } }, @@ -535,6 +739,51 @@ def create_avatar_object(player_idx: int, "alsoReportOrientation": True } }, + { + "component": "AllNonselfCumulants", + }, + { + "component": "AvatarMetricReporter", + "kwargs": { + "metrics": [ + { + "name": "PLAYER_CLEANED", + "type": "Doubles", + "shape": [], + "component": "Cleaner", + "variable": "player_cleaned", + }, + { + "name": "PLAYER_ATE_APPLE", + "type": "Doubles", + "shape": [], + "component": "Taste", + "variable": "player_ate_apple", + }, + { + "name": "NUM_OTHERS_PLAYER_ZAPPED_THIS_STEP", + "type": "Doubles", + "shape": [], + "component": "Zapper", + "variable": "num_others_player_zapped_this_step", + }, + { + "name": "NUM_OTHERS_WHO_CLEANED_THIS_STEP", + "type": "Doubles", + "shape": [], + "component": "AllNonselfCumulants", + "variable": "num_others_who_cleaned_this_step", + }, + { + "name": "NUM_OTHERS_WHO_ATE_THIS_STEP", + "type": "Doubles", + "shape": [], + "component": "AllNonselfCumulants", + "variable": "num_others_who_ate_this_step", + }, + ] + } + }, ] } return avatar_object @@ -551,43 +800,24 @@ def create_avatar_objects(num_players): return avatar_objects -def create_lab2d_settings(num_players: int) -> Dict[str, Any]: - """Returns the lab2d settings.""" - lab2d_settings = { - "levelName": "clean_up", - "levelDirectory": - "meltingpot/lua/levels", - "numPlayers": num_players, - "maxEpisodeLengthFrames": 1000, - "spriteSize": 8, - "topology": "BOUNDED", # Choose from ["BOUNDED", "TORUS"], - "simulation": { - "map": ASCII_MAP, - "gameObjects": create_avatar_objects(num_players), - "prefabs": create_prefabs(), - "charPrefabMap": CHAR_PREFAB_MAP, - "scene": create_scene(), - }, - } - return lab2d_settings - - def get_config(): - """Default configuration for training on the clean_up level.""" + """Default configuration for the clean_up level.""" config = config_dict.ConfigDict() - # Basic configuration. - config.num_players = 7 - - # Lua script configuration. - config.lab2d_settings = create_lab2d_settings(config.num_players) - # Action set configuration. config.action_set = ACTION_SET # Observation format configuration. config.individual_observation_names = [ "RGB", "READY_TO_SHOOT", + # Cumulants. + "PLAYER_ATE_APPLE", + "PLAYER_CLEANED", + "NUM_OTHERS_PLAYER_ZAPPED_THIS_STEP", + # Global switching signals for puppeteers. + "NUM_OTHERS_WHO_CLEANED_THIS_STEP", + "NUM_OTHERS_WHO_ATE_THIS_STEP", + # Debug only (do not use the following observations in policies). "POSITION", "ORIENTATION", ] @@ -600,9 +830,48 @@ def get_config(): config.timestep_spec = specs.timestep({ "RGB": specs.OBSERVATION["RGB"], "READY_TO_SHOOT": specs.OBSERVATION["READY_TO_SHOOT"], + # Cumulants. + "PLAYER_ATE_APPLE": specs.float64(), + "PLAYER_CLEANED": specs.float64(), + "NUM_OTHERS_PLAYER_ZAPPED_THIS_STEP": specs.float64(), + # Global switching signals for puppeteers. + "NUM_OTHERS_WHO_CLEANED_THIS_STEP": specs.float64(), + "NUM_OTHERS_WHO_ATE_THIS_STEP": specs.float64(), + # Debug only (do not use the following observations in policies). "POSITION": specs.OBSERVATION["POSITION"], "ORIENTATION": specs.OBSERVATION["ORIENTATION"], "WORLD.RGB": specs.rgb(168, 240), }) + # The roles assigned to each player. + config.valid_roles = frozenset({"default"}) + config.default_player_roles = ("default",) * 7 + return config + + +def build( + roles: Sequence[str], + config: config_dict.ConfigDict, +) -> Mapping[str, Any]: + """Build the clean_up substrate given roles.""" + del config + num_players = len(roles) + # Build the rest of the substrate definition. + substrate_definition = dict( + levelName="clean_up", + levelDirectory="meltingpot/lua/levels", + numPlayers=num_players, + # Define upper bound of episode length since episodes end stochastically. + maxEpisodeLengthFrames=5000, + spriteSize=8, + topology="BOUNDED", # Choose from ["BOUNDED", "TORUS"], + simulation={ + "map": ASCII_MAP, + "gameObjects": create_avatar_objects(num_players), + "scene": create_scene(), + "prefabs": create_prefabs(), + "charPrefabMap": CHAR_PREFAB_MAP, + }, + ) + return substrate_definition diff --git a/meltingpot/python/configs/substrates/coins.py b/meltingpot/python/configs/substrates/coins.py new file mode 100644 index 00000000..098cccdd --- /dev/null +++ b/meltingpot/python/configs/substrates/coins.py @@ -0,0 +1,526 @@ +# Copyright 2022 DeepMind Technologies Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Configuration for running a Coins game in Melting Pot. + +Example video: https://youtu.be/a_SYgt4tBsc +""" + +from collections.abc import Mapping, Sequence +import random +from typing import Any + +from ml_collections import config_dict as configdict + +from meltingpot.python.utils.substrates import colors +from meltingpot.python.utils.substrates import game_object_utils +from meltingpot.python.utils.substrates import shapes +from meltingpot.python.utils.substrates import specs + +PrefabConfig = game_object_utils.PrefabConfig + +MANDATED_NUM_PLAYERS = 2 + +COIN_PALETTES = { + "coin_red": shapes.get_palette((238, 102, 119)), # Red. + "coin_blue": shapes.get_palette((68, 119, 170)), # Blue. + "coin_yellow": shapes.get_palette((204, 187, 68)), # Yellow. + "coin_green": shapes.get_palette((34, 136, 51)), # Green. + "coin_purple": shapes.get_palette((170, 51, 119)) # Purple. +} + + +def get_ascii_map( + min_width: int, max_width: int, min_height: int, max_height: int) -> str: + """Procedurally generate ASCII map.""" + assert min_width <= max_width + assert min_height <= max_height + + # Sample random map width and height. + width = random.randint(min_width, max_width) + height = random.randint(min_height, max_height) + + # Make top row (walls). Pad to max width to ensure all maps are same size. + ascii_map = ["W"] * (width + 2) + [" "] * (max_width - width) + + # Make middle rows (navigable interior). + for row in range(height): + # Add walls and coins. + ascii_map += ["\nW"] + ["C"] * width + ["W"] + + if row == 1: + # Add top-right spawn point. + ascii_map[-3] = "_" + elif row == height - 2: + # Add bottom-left spawn point. + ascii_map[-width] = "_" + + # Pad to max width. + ascii_map += [" "] * (max_width - width) + + # Make bottom row (walls). Pad to max width. + ascii_map += ["\n"] + ["W"] * (width + 2) + [" "] * (max_width - width) + + # Pad with extra rows to reach max height. + for _ in range(max_height - height): + ascii_map += ["\n"] + [" "] * max_width + + # Join list of strings into single string. + ascii_map = "".join(ascii_map) + + return ascii_map + +# `prefab` determines which prefab game object to use for each `char` in the +# ascii map. +CHAR_PREFAB_MAP = { + "_": "spawn_point", + "W": "wall", + "C": "coin", +} + +_COMPASS = ["N", "E", "S", "W"] + + +# The Scene objece is a non-physical object, it components implement global +# logic. In this case, that includes holding the global berry counters to +# implement the regrowth rate, as well as some of the observations. +SCENE = { + "name": "scene", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "scene", + "stateConfigs": [{ + "state": "scene", + }], + } + }, + { + "component": "Transform", + }, + { + "component": "GlobalMetricReporter", + "kwargs": { + "metrics": [ + {"name": "COINS_COLLECTED", + "type": "tensor.Int32Tensor", + "shape": (MANDATED_NUM_PLAYERS, 2), + "component": "GlobalCoinCollectionTracker", + "variable": "coinsCollected"}, + ] + } + }, + { + "component": "GlobalCoinCollectionTracker", + "kwargs": { + "numPlayers": MANDATED_NUM_PLAYERS, + }, + }, + { + "component": "StochasticIntervalEpisodeEnding", + "kwargs": { + "minimumFramesPerEpisode": 300, + "intervalLength": 100, # Set equal to unroll length. + "probabilityTerminationPerInterval": 0.05 + } + } + ] +} + + +WALL = { + "name": "wall", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "wall", + "stateConfigs": [{ + "state": "wall", + "layer": "upperPhysical", + "sprite": "Wall", + }], + } + }, + {"component": "Transform",}, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["Wall",], + "spriteShapes": [shapes.WALL], + "palettes": [{"*": (95, 95, 95, 255), + "&": (100, 100, 100, 255), + "@": (109, 109, 109, 255), + "#": (152, 152, 152, 255)}], + "noRotates": [True] + } + }, + { + "component": "BeamBlocker", + "kwargs": { + "beamType": "gift" + } + }, + { + "component": "BeamBlocker", + "kwargs": { + "beamType": "zap" + } + }, + ] +} + +SPAWN_POINT = { + "name": "spawnPoint", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "spawnPoint", + "stateConfigs": [{ + "state": "spawnPoint", + "layer": "logic", + "groups": ["spawnPoints"] + }], + } + }, + {"component": "Transform",}, + ] +} + + +def get_coin( + coin_type_a: str, + coin_type_b: str, + regrow_rate: float, + reward_self_for_match: float, + reward_self_for_mismatch: float, + reward_other_for_match: float, + reward_other_for_mismatch: float, + ) -> PrefabConfig: + """Create `PrefabConfig` for coin component.""" + return { + "name": "coin", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "coinWait", + "stateConfigs": [ + {"state": coin_type_a, + "layer": "superOverlay", + "sprite": coin_type_a, + }, + {"state": coin_type_b, + "layer": "superOverlay", + "sprite": coin_type_b, + }, + {"state": "coinWait", + "layer": "logic", + }, + ] + } + }, + {"component": "Transform",}, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": [coin_type_a, coin_type_b], + "spriteShapes": [shapes.COIN] * 2, + "palettes": [COIN_PALETTES[coin_type_a], + COIN_PALETTES[coin_type_b]], + "noRotates": [False] * 2, + } + }, + { + "component": "Coin", + "kwargs": { + "waitState": "coinWait", + "rewardSelfForMatch": reward_self_for_match, + "rewardSelfForMismatch": reward_self_for_mismatch, + "rewardOtherForMatch": reward_other_for_match, + "rewardOtherForMismatch": reward_other_for_mismatch, + } + }, + { + "component": "ChoiceCoinRegrow", + "kwargs": { + "liveStateA": coin_type_a, + "liveStateB": coin_type_b, + "waitState": "coinWait", + "regrowRate": regrow_rate, + } + }, + ] + } + + +def get_avatar(coin_type: str): + return { + "name": "avatar", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "player", + "stateConfigs": [ + {"state": "player", + "layer": "upperPhysical", + "sprite": "Avatar", + "contact": "avatar", + "groups": ["players"]}, + + {"state": "playerWait", + "groups": ["playerWaits"]}, + ] + } + }, + {"component": "Transform",}, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["Avatar"], + "spriteShapes": [shapes.CUTE_AVATAR], + # Palette to be overwritten. + "palettes": [shapes.get_palette(colors.palette[0])], + "noRotates": [True] + } + }, + { + "component": "Avatar", + "kwargs": { + "index": -1, # Player index to be overwritten. + "aliveState": "player", + "waitState": "playerWait", + "spawnGroup": "spawnPoints", + "actionOrder": ["move", "turn",], + "actionSpec": { + "move": {"default": 0, "min": 0, "max": len(_COMPASS)}, + "turn": {"default": 0, "min": -1, "max": 1}, + }, + "view": { + "left": 5, + "right": 5, + "forward": 9, + "backward": 1, + "centered": False + }, + } + }, + { + "component": "PlayerCoinType", + "kwargs": { + "coinType": coin_type, + }, + }, + { + "component": "Role", + "kwargs": { + # Role has no effect if all factors are 1.0. + "multiplyRewardSelfForMatch": 1.0, + "multiplyRewardSelfForMismatch": 1.0, + "multiplyRewardOtherForMatch": 1.0, + "multiplyRewardOtherForMismatch": 1.0, + }, + }, + { + "component": "PartnerTracker", + "kwargs": {} + }, + { + "component": "AvatarMetricReporter", + "kwargs": { + "metrics": [ + { + "name": "MATCHED_COIN_COLLECTED", + "type": "Doubles", + "shape": [], + "component": "Role", + "variable": "cumulantCollectedMatch", + }, + { + "name": "MISMATCHED_COIN_COLLECTED", + "type": "Doubles", + "shape": [], + "component": "Role", + "variable": "cumulantCollectedMismatch", + }, + { + "name": "MATCHED_COIN_COLLECTED_BY_PARTNER", + "type": "Doubles", + "shape": [], + "component": "PartnerTracker", + "variable": "partnerCollectedMatch", + }, + { + "name": "MISMATCHED_COIN_COLLECTED_BY_PARTNER", + "type": "Doubles", + "shape": [], + "component": "PartnerTracker", + "variable": "partnerCollectedMismatch", + }, + ] + } + }, + { + "component": "LocationObserver", + "kwargs": { + "objectIsAvatar": True, + "alsoReportOrientation": True + } + }, + ] + } + + +# `prefabs` is a dictionary mapping names to template game objects that can +# be cloned and placed in multiple locations accoring to an ascii map. +def get_prefabs( + coin_type_a: str, + coin_type_b: str, + regrow_rate: float = 0.0005, + reward_self_for_match: float = 1.0, + reward_self_for_mismatch: float = 1.0, + reward_other_for_match: float = 0.0, + reward_other_for_mismatch: float = -2.0, +) -> PrefabConfig: + """Make `prefabs` (a dictionary mapping names to template game objects).""" + coin = get_coin(coin_type_a=coin_type_a, + coin_type_b=coin_type_b, + regrow_rate=regrow_rate, + reward_self_for_match=reward_self_for_match, + reward_self_for_mismatch=reward_self_for_mismatch, + reward_other_for_match=reward_other_for_match, + reward_other_for_mismatch=reward_other_for_mismatch) + return {"wall": WALL, "spawn_point": SPAWN_POINT, "coin": coin} + + +# `player_color_palettes` is a list with each entry specifying the color to use +# for the player at the corresponding index. +# These correspond to the persistent agent colors, but are meaningless for the +# human player. They will be overridden by the environment_builder. +def get_player_color_palettes( + coin_type_a: str, coin_type_b: str) -> Sequence[Mapping[str, shapes.Color]]: + return [COIN_PALETTES[coin_type_a], COIN_PALETTES[coin_type_b]] + +# Primitive action components. +# pylint: disable=bad-whitespace +# pyformat: disable +NOOP = {"move": 0, "turn": 0,} +FORWARD = {"move": 1, "turn": 0,} +STEP_RIGHT = {"move": 2, "turn": 0,} +BACKWARD = {"move": 3, "turn": 0,} +STEP_LEFT = {"move": 4, "turn": 0,} +TURN_LEFT = {"move": 0, "turn": -1,} +TURN_RIGHT = {"move": 0, "turn": 1,} +# pyformat: enable +# pylint: enable=bad-whitespace + +ACTION_SET = ( + NOOP, + FORWARD, + BACKWARD, + STEP_LEFT, + STEP_RIGHT, + TURN_LEFT, + TURN_RIGHT, +) + + +def get_config(): + """Default configuration for the Coins substrate.""" + config = configdict.ConfigDict() + + # Set the size of the map. + config.min_width = 10 + config.max_width = 15 + config.min_height = 10 + config.max_height = 15 + + # Action set configuration. + config.action_set = ACTION_SET + + # Observation format configuration. + config.individual_observation_names = [ + "RGB", + # Global switching signals for puppeteers. + "MISMATCHED_COIN_COLLECTED_BY_PARTNER", + # Debug only (do not use the following observations in policies). + "POSITION", + "ORIENTATION", + ] + config.global_observation_names = [ + "WORLD.RGB" + ] + + # The specs of the environment (from a single-agent perspective). + config.action_spec = specs.action(len(ACTION_SET)) + config.timestep_spec = specs.timestep({ + "RGB": specs.OBSERVATION["RGB"], + "MISMATCHED_COIN_COLLECTED_BY_PARTNER": specs.float64(), + "POSITION": specs.OBSERVATION["POSITION"], + "ORIENTATION": specs.OBSERVATION["ORIENTATION"], + "WORLD.RGB": specs.rgb(136, 136), + }) + + # The roles assigned to each player. + config.valid_roles = frozenset({"default"}) + config.default_player_roles = ("default",) * MANDATED_NUM_PLAYERS + + return config + + +def build( + roles: Sequence[str], + config: configdict.ConfigDict, +) -> Mapping[str, Any]: + """Build the coins substrate given player roles.""" + assert len(roles) == MANDATED_NUM_PLAYERS, "Wrong number of players" + # Randomly choose colors. + coin_type_a, coin_type_b = random.sample(tuple(COIN_PALETTES), k=2) + + # Manually build avatar config. + num_players = len(roles) + player_color_palettes = get_player_color_palettes( + coin_type_a=coin_type_a, coin_type_b=coin_type_b) + avatar_objects = game_object_utils.build_avatar_objects( + num_players, {"avatar": get_avatar(coin_type_a)}, player_color_palettes) # pytype: disable=wrong-arg-types # allow-recursive-types + game_object_utils.get_first_named_component( + avatar_objects[1], "PlayerCoinType")["kwargs"]["coinType"] = coin_type_b + + # Build the substrate definition. + substrate_definition = dict( + levelName="coins", + levelDirectory="meltingpot/lua/levels", + numPlayers=num_players, + # Define upper bound of episode length since episodes end stochastically. + maxEpisodeLengthFrames=5000, + spriteSize=8, + topology="BOUNDED", # Choose from ["BOUNDED", "TORUS"], + simulation={ + "map": get_ascii_map(min_width=config.min_width, + max_width=config.max_width, + min_height=config.min_height, + max_height=config.max_height), + "scene": SCENE, + "prefabs": get_prefabs(coin_type_a=coin_type_a, + coin_type_b=coin_type_b), + "charPrefabMap": CHAR_PREFAB_MAP, + "gameObjects": avatar_objects, + } + ) + return substrate_definition diff --git a/meltingpot/python/configs/substrates/collaborative_cooking.py b/meltingpot/python/configs/substrates/collaborative_cooking.py index 735c90fb..7525b5af 100644 --- a/meltingpot/python/configs/substrates/collaborative_cooking.py +++ b/meltingpot/python/configs/substrates/collaborative_cooking.py @@ -1,4 +1,4 @@ -# Copyright 2020 DeepMind Technologies Limited. +# Copyright 2022 DeepMind Technologies Limited. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,19 +13,22 @@ # limitations under the License. """Configuration for Collaborative Cooking. -Example video: https://youtu.be/R_TBitc3hto - -A pure common interest cooking game inspired by Carroll et al. (2019). +A pure common interest cooking game inspired by Carroll et al. (2019) and +Strouse et al. (2021). Carroll, M., Shah, R., Ho, M.K., Griffiths, T.L., Seshia, S.A., Abbeel, P. and Dragan, A., 2019. On the utility of learning about humans for human-AI coordination. arXiv preprint arXiv:1910.05789. + +Strouse, D.J., McKee, K.R., Botvinick, M., Hughes, E. and Everett, R., 2021. +Collaborating with Humans without Human Data. arXiv preprint arXiv:2110.08176. """ import copy -from typing import Any, Dict +from typing import Any, Dict, Mapping, Sequence + +from ml_collections import config_dict as configdict -from ml_collections import config_dict from meltingpot.python.utils.substrates import colors from meltingpot.python.utils.substrates import game_object_utils from meltingpot.python.utils.substrates import shapes @@ -44,40 +47,6 @@ "C": "cooking_pot", } -############## -# ASCII maps # -############## - -impassable = """ -##T###### -## ###D## -#P # -# P C -# # -######### -# # -C P P # -# # -##O###O## -""" - -passable = """ -###D###O#O### -# # ## -# # P ## -C P # ## -# # T -C P# ## -# # P ## -# ## -############# -""" - -ASCII_MAPS = { - "impassable": impassable, - "passable": passable, -} - ########### # SPRITES # ########### @@ -433,10 +402,6 @@ def create_loading_bar_palette(count, finished=False): }, { "component": "Transform", - "kwargs": { - "position": (0, 0), - "orientation": "N" - } }, ] } @@ -465,10 +430,7 @@ def create_loading_bar_palette(count, finished=False): }, { "component": "Transform", - "kwargs": { - "position": (0, 0), - "orientation": "N" - } + "kwargs": {} }, { "component": "Appearance", @@ -509,10 +471,7 @@ def create_loading_bar_palette(count, finished=False): } }, { "component": "Transform", - "kwargs": { - "position": (0, 0), - "orientation": "N" - } + "kwargs": {} }, { "component": "Appearance", "kwargs": { @@ -550,10 +509,7 @@ def create_base_prefab(name, layer="upperPhysical"): }, { "component": "Transform", - "kwargs": { - "position": (0, 0), - "orientation": "N" - } + "kwargs": {} }, { "component": "Appearance", @@ -685,10 +641,7 @@ def create_cooking_pot(time_to_cook, reward=1): }, { "component": "Transform", - "kwargs": { - "position": (0, 0), - "orientation": "N" - } + "kwargs": {} }, { "component": "Appearance", @@ -714,24 +667,26 @@ def create_cooking_pot(time_to_cook, reward=1): return cooking_pot -# PREFABS is a dictionary mapping names to template game objects that can -# be cloned and placed in multiple locations according to an ascii map. -PREFABS = { - "spawn_point": SPAWN_POINT, - "inventory": INVENTORY, - "loading_bar": LOADING_BAR, - "counter": create_counter(), - "dish_dispenser": create_dispenser(prefab_name="dish_dispenser", - item_name="dish"), - "tomato_dispenser": create_dispenser(prefab_name="tomato_dispenser", - item_name="tomato"), - "delivery_location": create_receiver(prefab_name="delivery_location", - item_name="soup", - reward=20, - global_reward=True), - "cooking_pot": create_cooking_pot(time_to_cook=COOKING_TIME, - reward=0), -} + +def create_prefabs(cooking_pot_pseudoreward: float = 0.0): + """Creates a dictionary mapping names to template game objects.""" + prefabs = { + "spawn_point": SPAWN_POINT, + "inventory": INVENTORY, + "loading_bar": LOADING_BAR, + "counter": create_counter(), + "dish_dispenser": create_dispenser(prefab_name="dish_dispenser", + item_name="dish"), + "tomato_dispenser": create_dispenser(prefab_name="tomato_dispenser", + item_name="tomato"), + "delivery_location": create_receiver(prefab_name="delivery_location", + item_name="soup", + reward=20, + global_reward=True), + "cooking_pot": create_cooking_pot(time_to_cook=COOKING_TIME, + reward=cooking_pot_pseudoreward), + } + return prefabs ########### # ACTIONS # @@ -767,7 +722,7 @@ def create_cooking_pot(time_to_cook, reward=1): ########### -def create_game_objects(ascii_map_string): +def create_game_objects(ascii_map_string, prefabs): """Returns list of game objects from 'ascii_map' and 'char_prefab' mapping.""" # Create all game objects. @@ -778,7 +733,7 @@ def create_game_objects(ascii_map_string): for transform in transforms: # Add inventory game object for holding and visualising items. if char == "#" or char == "O" or char == "D": - inventory_object = copy.deepcopy(PREFABS["inventory"]) + inventory_object = copy.deepcopy(prefabs["inventory"]) go_transform = game_object_utils.get_first_named_component( inventory_object, "Transform") go_transform["kwargs"]["position"] = (transform.position.x, @@ -787,7 +742,7 @@ def create_game_objects(ascii_map_string): # Add loading bar object to cooking pots. if char == "C": - loading_object = copy.deepcopy(PREFABS["loading_bar"]) + loading_object = copy.deepcopy(prefabs["loading_bar"]) go_transform = game_object_utils.get_first_named_component( loading_object, "Transform") go_transform["kwargs"]["position"] = (transform.position.x, @@ -834,10 +789,6 @@ def create_avatar_object(player_idx: int, }, { "component": "Transform", - "kwargs": { - "position": (0, 0), - "orientation": "N" - } }, { "component": "Appearance", @@ -891,6 +842,24 @@ def create_avatar_object(player_idx: int, "palettes": [interact_palette] } }, + {"component": "AvatarCumulants",}, + { + "component": "AvatarMetricReporter", + "kwargs": { + "metrics": [ + {"name": "ADDED_INGREDIENT_TO_COOKING_POT", + "type": "Doubles", + "shape": [], + "component": "AvatarCumulants", + "variable": "addedIngredientToCookingPot"}, + {"name": "COLLECTED_SOUP_FROM_COOKING_POT", + "type": "Doubles", + "shape": [], + "component": "AvatarCumulants", + "variable": "collectedSoupFromCookingPot"}, + ] + } + }, { "component": "LocationObserver", "kwargs": { @@ -903,7 +872,7 @@ def create_avatar_object(player_idx: int, return avatar_object -def create_avatar_objects(num_players): +def create_avatar_objects(num_players, prefabs): """Returns list of avatar objects of length 'num_players'.""" game_objects = [] for player_idx in range(0, num_players): @@ -913,7 +882,7 @@ def create_avatar_objects(num_players): game_objects.append(game_object) # Add inventory game object which will be connected to player at init. - inventory_object = copy.deepcopy(PREFABS["inventory"]) + inventory_object = copy.deepcopy(prefabs["inventory"]) game_object_utils.get_first_named_component( inventory_object, "Inventory")["kwargs"]["playerIndex"] = lua_index @@ -922,45 +891,25 @@ def create_avatar_objects(num_players): return game_objects -def create_lab2d_settings(ascii_map: str, - num_players: int) -> Dict[str, Any]: - """Returns the lab2d settings.""" - ascii_map = ASCII_MAPS[ascii_map] - game_objects = create_game_objects(ascii_map) - extra_game_objects = create_avatar_objects(num_players) - game_objects += extra_game_objects - - # Lua script configuration. - lab2d_settings = { - "levelName": "collaborative_cooking", - "levelDirectory": - "meltingpot/lua/levels", - "numPlayers": num_players, - "maxEpisodeLengthFrames": 1000, - "spriteSize": 8, - "simulation": { - "map": ascii_map, - "gameObjects": game_objects, - "prefabs": PREFABS, - "charPrefabMap": CHAR_PREFAB_MAP, - }, - } - return lab2d_settings - - -def get_config(ascii_map: str): +def get_config(): """Default configuration for training on the collaborative cooking level.""" - config = config_dict.ConfigDict() + config = configdict.ConfigDict() - config.num_players = 4 - # Lua script configuration. - config.lab2d_settings = create_lab2d_settings(ascii_map, config.num_players) + # Cooking pot pseudoreward should be 0.0 for the canonical version of this + # environment, but in order to train background bots it is sometimes useful + # to give them a pseudoreward when they put items in the cooking pot. It has + # the effect of shaping their behavior a bit in the right direction. + config.cooking_pot_pseudoreward = 0.0 # Action set configuration. config.action_set = ACTION_SET # Observation format configuration. config.individual_observation_names = [ "RGB", + # Cumulants (do not use in policies). + "ADDED_INGREDIENT_TO_COOKING_POT", + "COLLECTED_SOUP_FROM_COOKING_POT", + # Debug only (do not use the following observations in policies). "POSITION", "ORIENTATION", ] @@ -968,17 +917,36 @@ def get_config(ascii_map: str): "WORLD.RGB", ] - # The specs of the environment (from a single-agent perspective). config.action_spec = specs.action(len(ACTION_SET)) - if ascii_map == "passable": - world_size = 72, 104 - else: - world_size = 80, 72 - config.timestep_spec = specs.timestep({ - "RGB": specs.rgb(40, 40), - "POSITION": specs.OBSERVATION["POSITION"], - "ORIENTATION": specs.OBSERVATION["ORIENTATION"], - "WORLD.RGB": specs.rgb(*world_size), - }) return config + + +def build( + roles: Sequence[str], + config: configdict.ConfigDict, +) -> Mapping[str, Any]: + """Build the substrate given player roles.""" + num_players = len(roles) + ascii_map = config.layout.ascii_map + prefabs = create_prefabs( + cooking_pot_pseudoreward=config.cooking_pot_pseudoreward) + game_objects = create_game_objects(ascii_map, prefabs) + extra_game_objects = create_avatar_objects(num_players, prefabs) + game_objects += extra_game_objects + # Build the rest of the substrate definition. + substrate_definition = dict( + levelName="collaborative_cooking", + levelDirectory="meltingpot/lua/levels", + numPlayers=num_players, + maxEpisodeLengthFrames=1000, + spriteSize=8, + topology="BOUNDED", # Choose from ["BOUNDED", "TORUS"], + simulation={ + "map": ascii_map, + "gameObjects": game_objects, + "prefabs": prefabs, + "charPrefabMap": CHAR_PREFAB_MAP, + }, + ) + return substrate_definition diff --git a/meltingpot/python/configs/substrates/collaborative_cooking__asymmetric.py b/meltingpot/python/configs/substrates/collaborative_cooking__asymmetric.py new file mode 100644 index 00000000..31da3013 --- /dev/null +++ b/meltingpot/python/configs/substrates/collaborative_cooking__asymmetric.py @@ -0,0 +1,76 @@ +# Copyright 2022 DeepMind Technologies Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Configuration for Collaborative Cooking: Asymmetric. + +Example video: https://youtu.be/4AN3e1lFuMo + +The recipe they must follow is for tomato soup: +1. Add three tomatoes to the cooking pot. +2. Wait for the soup to cook (status bar completion). +3. Bring a bowl to the pot and pour the soup from the pot into the bowl. +4. Deliver the bowl of soup at the goal location. + +This substrate is a pure common interest game. All players share all rewards. + +Players have a `5 x 5` observation window. + +Map: +Asymmetric Advantages: A two-room layout with an agent in each. In the left +room, the tomato station is far away from the cooking pots while the delivery +location is close. In the right room, the tomato station is next to the cooking +pots while the delivery station is far. This presents an asymmetric advantage of +responsibilities for optimally creating and delivering soups. +""" + +from ml_collections import config_dict + +from meltingpot.python.configs.substrates import collaborative_cooking as base_config +from meltingpot.python.utils.substrates import specs + +build = base_config.build + +# Asymmetric Advantages: A two-room layout with an agent in each room where it +# is possible for agents to work independently but more efficient if they +# specialize due to asymmetric advantages in delivery vs tomato loading. +ASCII_MAP = """ +######### +O #T#O# T +# P C P # +# C # +###D#D### +""" + + +def get_config(): + """Default configuration.""" + config = base_config.get_config() + + # Override the map layout settings. + config.layout = config_dict.ConfigDict() + config.layout.ascii_map = ASCII_MAP + # The specs of the environment (from a single-agent perspective). + config.timestep_spec = specs.timestep({ + "RGB": specs.rgb(40, 40), + "ADDED_INGREDIENT_TO_COOKING_POT": specs.float64(), + "COLLECTED_SOUP_FROM_COOKING_POT": specs.float64(), + "POSITION": specs.OBSERVATION["POSITION"], + "ORIENTATION": specs.OBSERVATION["ORIENTATION"], + "WORLD.RGB": specs.rgb(40, 72), + }) + + # The roles assigned to each player. + config.valid_roles = frozenset({"default"}) + config.default_player_roles = ("default",) * 2 + + return config diff --git a/meltingpot/python/configs/substrates/collaborative_cooking__circuit.py b/meltingpot/python/configs/substrates/collaborative_cooking__circuit.py new file mode 100644 index 00000000..bd227cf3 --- /dev/null +++ b/meltingpot/python/configs/substrates/collaborative_cooking__circuit.py @@ -0,0 +1,76 @@ +# Copyright 2022 DeepMind Technologies Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Configuration for Collaborative Cooking: Circuit. + +Example video: https://youtu.be/2nXe5OPvJ7g + +The recipe they must follow is for tomato soup: +1. Add three tomatoes to the cooking pot. +2. Wait for the soup to cook (status bar completion). +3. Bring a bowl to the pot and pour the soup from the pot into the bowl. +4. Deliver the bowl of soup at the goal location. + +This substrate is a pure common interest game. All players share all rewards. + +Players have a `5 x 5` observation window. + +Map: +Counter Circuit: Players are able to cook and deliver soups by themselves +through walking around the entire circuit. However, there exists a more optimal +coordinated strategy whereby players pass tomatoes across the counter. +Additionally, there are the clockwise and anti-clockwise strategies as in the +Ring layout. +""" + +from ml_collections import config_dict + +from meltingpot.python.configs.substrates import collaborative_cooking as base_config +from meltingpot.python.utils.substrates import specs + +build = base_config.build + +# Counter Circuit: Another layout where it is possible for agents to work +# independently but more efficient if they work together, with one agent passing +# tomatoes to the other. +ASCII_MAP = """ +x###CC### +x#P # +xD #### T +x# P# +x###OO### +""" + + +def get_config(): + """Default configuration.""" + config = base_config.get_config() + + # Override the map layout settings. + config.layout = config_dict.ConfigDict() + config.layout.ascii_map = ASCII_MAP + # The specs of the environment (from a single-agent perspective). + config.timestep_spec = specs.timestep({ + "RGB": specs.rgb(40, 40), + "ADDED_INGREDIENT_TO_COOKING_POT": specs.float64(), + "COLLECTED_SOUP_FROM_COOKING_POT": specs.float64(), + "POSITION": specs.OBSERVATION["POSITION"], + "ORIENTATION": specs.OBSERVATION["ORIENTATION"], + "WORLD.RGB": specs.rgb(40, 72), + }) + + # The roles assigned to each player. + config.valid_roles = frozenset({"default"}) + config.default_player_roles = ("default",) * 2 + + return config diff --git a/meltingpot/python/configs/substrates/collaborative_cooking__cramped.py b/meltingpot/python/configs/substrates/collaborative_cooking__cramped.py new file mode 100644 index 00000000..9e3130af --- /dev/null +++ b/meltingpot/python/configs/substrates/collaborative_cooking__cramped.py @@ -0,0 +1,72 @@ +# Copyright 2022 DeepMind Technologies Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Configuration for Collaborative Cooking: Cramped. + +Example video: https://youtu.be/8qQFbxO8UNY + +The recipe they must follow is for tomato soup: +1. Add three tomatoes to the cooking pot. +2. Wait for the soup to cook (status bar completion). +3. Bring a bowl to the pot and pour the soup from the pot into the bowl. +4. Deliver the bowl of soup at the goal location. + +This substrate is a pure common interest game. All players share all rewards. + +Players have a `5 x 5` observation window. + +Map: +Cramped Room: A tight layout requiring significant movement coordination between +the players in order to avoid being blocked by each other. +""" + +from ml_collections import config_dict + +from meltingpot.python.configs.substrates import collaborative_cooking as base_config +from meltingpot.python.utils.substrates import specs + +build = base_config.build + +# Cramped Room: A tight layout requiring significant movement coordination +# between the players in order to avoid being blocked by each other. +ASCII_MAP = """ +xx##C##xx +xxOP POxx +xx# #xx +xx#D#T#xx +xxxxxxxxx +""" + + +def get_config(): + """Default configuration.""" + config = base_config.get_config() + + # Override the map layout settings. + config.layout = config_dict.ConfigDict() + config.layout.ascii_map = ASCII_MAP + # The specs of the environment (from a single-agent perspective). + config.timestep_spec = specs.timestep({ + "RGB": specs.rgb(40, 40), + "ADDED_INGREDIENT_TO_COOKING_POT": specs.float64(), + "COLLECTED_SOUP_FROM_COOKING_POT": specs.float64(), + "POSITION": specs.OBSERVATION["POSITION"], + "ORIENTATION": specs.OBSERVATION["ORIENTATION"], + "WORLD.RGB": specs.rgb(40, 72), + }) + + # The roles assigned to each player. + config.valid_roles = frozenset({"default"}) + config.default_player_roles = ("default",) * 2 + + return config diff --git a/meltingpot/python/configs/substrates/collaborative_cooking__crowded.py b/meltingpot/python/configs/substrates/collaborative_cooking__crowded.py new file mode 100644 index 00000000..ece0bb15 --- /dev/null +++ b/meltingpot/python/configs/substrates/collaborative_cooking__crowded.py @@ -0,0 +1,81 @@ +# Copyright 2022 DeepMind Technologies Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Configuration for Collaborative Cooking: Crowded. + +Example video: https://youtu.be/_6j3yYbf434 + +The recipe they must follow is for tomato soup: +1. Add three tomatoes to the cooking pot. +2. Wait for the soup to cook (status bar completion). +3. Bring a bowl to the pot and pour the soup from the pot into the bowl. +4. Deliver the bowl of soup at the goal location. + +This substrate is a pure common interest game. All players share all rewards. + +Players have a `5 x 5` observation window. + +Map: +Crowded: here players can pass each other in the kitchen, allowing less +coordinated yet inefficient strategies by individual players. The +most efficient strategies involve passing ingredients over the central counter. +There is a choke point where it is likely that players who do not work as a +team will get in one another's way. +""" + +from ml_collections import config_dict + +from meltingpot.python.configs.substrates import collaborative_cooking as base_config +from meltingpot.python.utils.substrates import specs + +build = base_config.build + +# Crowded: notice that there are more spawn points than the recommended number +# of players. Since players are spawned randomly this means the numbers starting +# on either side of the divider will vary from episode to episode and generally +# be imbalanced. +ASCII_MAP = """ +###D###O#O### +#P P# P ## +# # P ## +C P #P ## +# #P T +C P# P ## +# P # P ## +#P ## +############# +""" + + +def get_config(): + """Default configuration.""" + config = base_config.get_config() + + # Override the map layout settings. + config.layout = config_dict.ConfigDict() + config.layout.ascii_map = ASCII_MAP + # The specs of the environment (from a single-agent perspective). + config.timestep_spec = specs.timestep({ + "RGB": specs.rgb(40, 40), + "ADDED_INGREDIENT_TO_COOKING_POT": specs.float64(), + "COLLECTED_SOUP_FROM_COOKING_POT": specs.float64(), + "POSITION": specs.OBSERVATION["POSITION"], + "ORIENTATION": specs.OBSERVATION["ORIENTATION"], + "WORLD.RGB": specs.rgb(72, 104), + }) + + # The roles assigned to each player. + config.valid_roles = frozenset({"default"}) + config.default_player_roles = ("default",) * 9 + + return config diff --git a/meltingpot/python/configs/substrates/collaborative_cooking__figure_eight.py b/meltingpot/python/configs/substrates/collaborative_cooking__figure_eight.py new file mode 100644 index 00000000..78d4c1d0 --- /dev/null +++ b/meltingpot/python/configs/substrates/collaborative_cooking__figure_eight.py @@ -0,0 +1,78 @@ +# Copyright 2022 DeepMind Technologies Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Configuration for Collaborative Cooking: Figure Eight. + +Example video: https://youtu.be/hUCbOL5l-Gw + +The recipe they must follow is for tomato soup: +1. Add three tomatoes to the cooking pot. +2. Wait for the soup to cook (status bar completion). +3. Bring a bowl to the pot and pour the soup from the pot into the bowl. +4. Deliver the bowl of soup at the goal location. + +This substrate is a pure common interest game. All players share all rewards. + +Players have a `5 x 5` observation window. + +Map: +Figure Eight: The map is a figure eight shaped maze that generates numerous +places where players might get in one another's way, blocking critical paths. +While it is technically possible for a single player to complete the task on +their own it is very unlikely that poor performing partners would get out of its +way, so in practice, collaboration is essential. +""" + +from ml_collections import config_dict + +from meltingpot.python.configs.substrates import collaborative_cooking as base_config +from meltingpot.python.utils.substrates import specs + +build = base_config.build + +# Figure Eight: Strong performance on this map requires two stages of teamwork. +ASCII_MAP = """ +################ +####C#C##C#C#### +# P P # +## ########## ## +# P P # +## ########## ## +# P P # +### #ODTTOD# ### +################ +""" + + +def get_config(): + """Default configuration.""" + config = base_config.get_config() + + # Override the map layout settings. + config.layout = config_dict.ConfigDict() + config.layout.ascii_map = ASCII_MAP + # The specs of the environment (from a single-agent perspective). + config.timestep_spec = specs.timestep({ + "RGB": specs.rgb(40, 40), + "ADDED_INGREDIENT_TO_COOKING_POT": specs.float64(), + "COLLECTED_SOUP_FROM_COOKING_POT": specs.float64(), + "POSITION": specs.OBSERVATION["POSITION"], + "ORIENTATION": specs.OBSERVATION["ORIENTATION"], + "WORLD.RGB": specs.rgb(72, 128), + }) + + # The roles assigned to each player. + config.valid_roles = frozenset({"default"}) + config.default_player_roles = ("default",) * 6 + + return config diff --git a/meltingpot/python/configs/substrates/collaborative_cooking__forced.py b/meltingpot/python/configs/substrates/collaborative_cooking__forced.py new file mode 100644 index 00000000..7a326434 --- /dev/null +++ b/meltingpot/python/configs/substrates/collaborative_cooking__forced.py @@ -0,0 +1,77 @@ +# Copyright 2022 DeepMind Technologies Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Configuration for Collaborative Cooking: Forced. + +Example video: https://youtu.be/FV_xZuSCRmM + +The recipe they must follow is for tomato soup: +1. Add three tomatoes to the cooking pot. +2. Wait for the soup to cook (status bar completion). +3. Bring a bowl to the pot and pour the soup from the pot into the bowl. +4. Deliver the bowl of soup at the goal location. + +This substrate is a pure common interest game. All players share all rewards. + +Players have a `5 x 5` observation window. + +Map: +Forced Coordination: One player is in the left room and second player is in the +right room. Consequently, both players are forced to work together in order to +cook and deliver soup. The player in the left room can only pass tomatoes and +dishes, while the player on the right can only cook the soup and deliver it +(using the items provided by the first player). +""" + +from ml_collections import config_dict + +from meltingpot.python.configs.substrates import collaborative_cooking as base_config +from meltingpot.python.utils.substrates import specs + +build = base_config.build + +# Forced Coordination: A two-room layout in which agents cannot complete the +# task alone and therefore must work together, with one player passing tomatoes +# and plates to the other, and the other player loading the pot and delivering +# soups. +ASCII_MAP = """ +xx###C#xx +xxO #PCxx +xxOP# #xx +xxD # #xx +xx###T#xx +""" + + +def get_config(): + """Default configuration.""" + config = base_config.get_config() + + # Override the map layout settings. + config.layout = config_dict.ConfigDict() + config.layout.ascii_map = ASCII_MAP + # The specs of the environment (from a single-agent perspective). + config.timestep_spec = specs.timestep({ + "RGB": specs.rgb(40, 40), + "ADDED_INGREDIENT_TO_COOKING_POT": specs.float64(), + "COLLECTED_SOUP_FROM_COOKING_POT": specs.float64(), + "POSITION": specs.OBSERVATION["POSITION"], + "ORIENTATION": specs.OBSERVATION["ORIENTATION"], + "WORLD.RGB": specs.rgb(40, 72), + }) + + # The roles assigned to each player. + config.valid_roles = frozenset({"default"}) + config.default_player_roles = ("default",) * 2 + + return config diff --git a/meltingpot/python/configs/substrates/collaborative_cooking__ring.py b/meltingpot/python/configs/substrates/collaborative_cooking__ring.py new file mode 100644 index 00000000..4c8e2a44 --- /dev/null +++ b/meltingpot/python/configs/substrates/collaborative_cooking__ring.py @@ -0,0 +1,74 @@ +# Copyright 2022 DeepMind Technologies Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Configuration for Collaborative Cooking: Ring. + +Example video: https://youtu.be/j5v7B9pfG9I + +The recipe they must follow is for tomato soup: +1. Add three tomatoes to the cooking pot. +2. Wait for the soup to cook (status bar completion). +3. Bring a bowl to the pot and pour the soup from the pot into the bowl. +4. Deliver the bowl of soup at the goal location. + +This substrate is a pure common interest game. All players share all rewards. + +Players have a `5 x 5` observation window. + +Map: +Coordination Ring: A layout with two equally successful movement strategies – +(1) both players moving clockwise, and (2) both players moving anti-clockwise. +If players do not coordinate, they will block each other’s movement. +""" + +from ml_collections import config_dict + +from meltingpot.python.configs.substrates import collaborative_cooking as base_config +from meltingpot.python.utils.substrates import specs + +build = base_config.build + +# Coordination Ring: Another tight layout requiring significant movement +# coordination between the players, this time in terms of moving clockwise vs +# counterclockwise. +ASCII_MAP = """ +xx###C#xx +xx# Cxx +xxDP# #xx +xxO P #xx +xx#OT##xx +""" + + +def get_config(): + """Default configuration.""" + config = base_config.get_config() + + # Override the map layout settings. + config.layout = config_dict.ConfigDict() + config.layout.ascii_map = ASCII_MAP + # The specs of the environment (from a single-agent perspective). + config.timestep_spec = specs.timestep({ + "RGB": specs.rgb(40, 40), + "ADDED_INGREDIENT_TO_COOKING_POT": specs.float64(), + "COLLECTED_SOUP_FROM_COOKING_POT": specs.float64(), + "POSITION": specs.OBSERVATION["POSITION"], + "ORIENTATION": specs.OBSERVATION["ORIENTATION"], + "WORLD.RGB": specs.rgb(40, 72), + }) + + # The roles assigned to each player. + config.valid_roles = frozenset({"default"}) + config.default_player_roles = ("default",) * 2 + + return config diff --git a/meltingpot/python/configs/substrates/collaborative_cooking_impassable.py b/meltingpot/python/configs/substrates/collaborative_cooking_impassable.py deleted file mode 100644 index 7a5692d4..00000000 --- a/meltingpot/python/configs/substrates/collaborative_cooking_impassable.py +++ /dev/null @@ -1,39 +0,0 @@ -# Copyright 2020 DeepMind Technologies Limited. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Configuration for Collaborative Cooking: Impassable. - -Example video: https://youtu.be/yn1uSNymQ_U - -Players need to collaborate to follow recipes. They are separated by an -impassable kitchen counter, so no player can complete the objective alone. - -The recipe they must follow is for tomato soup: -1. Add three tomatoes to the cooking pot. -2. Wait for the soup to cook (status bar completion). -3. Bring a bowl to the pot and pour the soup from the pot into the bowl. -4. Deliver the bowl of soup at the goal location. - -This substrate is a pure common interest game. All players share all rewards. - -Players have a `5 x 5` observation window. -""" - -# pylint: disable=g-line-too-long -from meltingpot.python.configs.substrates import collaborative_cooking as base_config - - -def get_config(): - """Default config for training on collaborative cooking.""" - config = base_config.get_config("impassable") - return config diff --git a/meltingpot/python/configs/substrates/collaborative_cooking_passable.py b/meltingpot/python/configs/substrates/collaborative_cooking_passable.py deleted file mode 100644 index fcb6b322..00000000 --- a/meltingpot/python/configs/substrates/collaborative_cooking_passable.py +++ /dev/null @@ -1,35 +0,0 @@ -# Copyright 2020 DeepMind Technologies Limited. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Configuration for Collaborative Cooking: Passable. - -Example video: https://youtu.be/R_TBitc3hto - -Same as _Collaborative Cooking: Impassable_ except here players can pass each -other in the kitchen, allowing less coordinated yet inefficient strategies by -individual players. - -See _Collaborative Cooking: Impassable_ for the recipe that players must follow. - -This substrate is a pure common interest game. All players share all rewards. - -Players have a `5 x 5` observation window. -""" - -from meltingpot.python.configs.substrates import collaborative_cooking as base_config - - -def get_config(): - """Default config for training on collaborative cooking.""" - config = base_config.get_config("passable") - return config diff --git a/meltingpot/python/configs/substrates/commons_harvest_closed.py b/meltingpot/python/configs/substrates/commons_harvest__closed.py similarity index 67% rename from meltingpot/python/configs/substrates/commons_harvest_closed.py rename to meltingpot/python/configs/substrates/commons_harvest__closed.py index dfe3ca31..6dc4897e 100644 --- a/meltingpot/python/configs/substrates/commons_harvest_closed.py +++ b/meltingpot/python/configs/substrates/commons_harvest__closed.py @@ -1,4 +1,4 @@ -# Copyright 2020 DeepMind Technologies Limited. +# Copyright 2022 DeepMind Technologies Limited. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,7 +13,7 @@ # limitations under the License. """Configuration for Commons Harvest: Closed. -Example video: https://youtu.be/ZHjrlTft98M +Example video: https://youtu.be/WbkTSbiSOw0 See _Commons Harvest: Open_ for the general description of the mechanics at play in this substrate. @@ -26,7 +26,7 @@ excluded from any natural region. """ -from typing import Any, Dict +from typing import Any, Dict, Mapping, Sequence from ml_collections import config_dict import numpy as np @@ -39,60 +39,116 @@ REGROWTH_PROBABILITIES = [0.0, 0.001, 0.005, 0.025] ASCII_MAP = """ -WWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWW -WW WW W -W WW -W PPPPPPPPPPPPPPPPPPPPPPPP PPPPPPPPPPPPPPPPPPPPPPPP W -W PPPPPPPPPPPPWWPPPPPPPPPP PPPPPPPPPWPPWPPPPPPPPPPP WW -W WWWWWWWWWWWWWWWWWWWWWWWW WWWWWWWWWWWWWWWWWWWWWWWW W -W W A WW A W WW W -W W AAA WW AAA W WWWWWWWWWW WW WWWWWWWWWW W -W W AAAAAWWAAAAA W W A WW A W W -W W AAA WW AAA W W AAA WWWWWWWWWW AAA W W -W W A WW A W WAAAAA WW AAAAAW W -W W A WW A W W AAA WW AAA W W -W W AAA WW AAA W W A WW A W W -W WAAAAA WW AAAAAW W A WW A W W -W W AAA WWWWWWWWWW AAA W W AAA WW AAA W W -W W A WW A W W AAAAAWWAAAAA W W -W WWWWWWWWWW WW WWWWWWWWWW W AAA WW AAA W W -W WW W A WW A W W -W WWWWWWWWWWWWWWWWWWWWWWWW WWWWWWWWWWWWWWWWWWWWWWWW W -W W W W -WW PPPPPPPPPPPPPPPPPPPPPPPP PPPPPPPPPPPPPPPPPPPPPPPP W -WW PPPPPPPPPPPPPPPPPPPPPPPP PPPPPPPPPPPPPPPPPPPPPPPP W -WW W WW -W WWWWWWWWWWWWWWWWWWWWWWWW WWWWWWWWWWWWWWWWWWWWWWWW W -W WW W A WW A W W -W WWWWWWWWWW WW WWWWWWWWWW W AAA WW AAA WW W -W W A WW A W W AAAAAWWAAAAA W W -W W AAA WWWWWWWWWW AAA W W AAA WW AAA W W -W WAAAAA WW AAAAAW W A WW A W W -W W AAA WW AAA W W A WW A W W -W W A WW A W W AAA WW AAA W W -W W A WW A W WAAAAA WW AAAAAW W -W W AAA WW AAA W W AAA WWWWWWWWWW AAA W W -W W AAAAAWWAAAAA W W A WW A W W -W W AAA WW AAA W WWWWWWWWWW WW WWWWWWWWWW W -W W A WW A W WW W -W WWWWWWWWWWWWWWWWWWWWWWWW WWWWWWWWWWWWWWWWWWWWWWWW W -W PPPPPPWPPPPPPPPPPPPPPPPP PPPPPPPPPPPPPPPPPPPPPPPP W -W PPPPPPWPPPPPPPPPPPPPPPPP PPPPPPPPPPPPPPPPPPPPPPPP W -W W -W WWW W W W -WWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWW +WWWWWWWWWWWWWWWWWWWWWWWW +WAAA A WW A AAAW +WAA AAA WW AAA AAW +WA AAAAAWWAAAAA AW +W AAA WW AAA W +W A WW A W +W A WW A W +W AAA Q WW Q AAA W +WAAAAA WW AAAAAW +W AAA WWWWWWWWWW AAA W +W A WW A W +WWWWWWWWWW WW WWWWWWWWWW +W WW W +W WWWWWWWWWWWWWWWWWW W +W PPPPPPPPPPPPPPPPPP W +W PPPPPPPPPPPPPPPPPPPP W +WPPPPPPPPPPPPPPPPPPPPPPW +WWWWWWWWWWWWWWWWWWWWWWWW """ # `prefab` determines which prefab game object to use for each `char` in the # ascii map. CHAR_PREFAB_MAP = { - "P": "spawn_point", + "P": {"type": "all", "list": ["floor", "spawn_point"]}, + "Q": {"type": "all", "list": ["floor", "inside_spawn_point"]}, + " ": "floor", "W": "wall", - "A": "apple", + "A": {"type": "all", "list": ["grass", "apple"]}, } _COMPASS = ["N", "E", "S", "W"] +FLOOR = { + "name": "floor", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "floor", + "stateConfigs": [{ + "state": "floor", + "layer": "background", + "sprite": "Floor", + }], + } + }, + { + "component": "Transform", + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["Floor"], + "spriteShapes": [shapes.GRAINY_FLOOR], + "palettes": [{"*": (220, 205, 185, 255), + "+": (210, 195, 175, 255),}], + "noRotates": [False] + } + }, + ] +} + +GRASS = { + "name": + "grass", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": + "grass", + "stateConfigs": [ + { + "state": "grass", + "layer": "background", + "sprite": "Grass" + }, + { + "state": "dessicated", + "layer": "background", + "sprite": "Floor" + }, + ], + } + }, + { + "component": "Transform", + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["Grass", "Floor"], + "spriteShapes": [ + shapes.GRASS_STRAIGHT, shapes.GRAINY_FLOOR + ], + "palettes": [{ + "*": (158, 194, 101, 255), + "@": (170, 207, 112, 255) + }, { + "*": (220, 205, 185, 255), + "+": (210, 195, 175, 255), + }], + "noRotates": [False, False] + } + }, + ] +} + WALL = { "name": "wall", "components": [ @@ -109,10 +165,6 @@ }, { "component": "Transform", - "kwargs": { - "position": (0, 0), - "orientation": "N" - } }, { "component": "Appearance", @@ -152,11 +204,27 @@ }, { "component": "Transform", + }, + ] +} + +INSIDE_SPAWN_POINT = { + "name": "spawnPoint", + "components": [ + { + "component": "StateManager", "kwargs": { - "position": (0, 0), - "orientation": "N" + "initialState": "spawnPoint", + "stateConfigs": [{ + "state": "spawnPoint", + "layer": "alternateLogic", + "groups": ["insideSpawnPoints"] + }], } }, + { + "component": "Transform", + }, ] } @@ -209,15 +277,19 @@ def create_scene(): }, { "component": "Transform", - "kwargs": { - "position": (0, 0), - "orientation": "N" - }, }, { "component": "Neighborhoods", "kwargs": {} }, + { + "component": "StochasticIntervalEpisodeEnding", + "kwargs": { + "minimumFramesPerEpisode": 1000, + "intervalLength": 100, # Set equal to unroll length. + "probabilityTerminationPerInterval": 0.15 + } + } ] } @@ -261,28 +333,21 @@ def create_apple_prefab(regrowth_radius=-1.0, # pylint: disable=dangerous-defau }, { "component": "Transform", - "kwargs": { - "position": (0, 0), - "orientation": "N" - } }, { "component": "Appearance", "kwargs": { "renderMode": "ascii_shape", "spriteNames": ["Apple", "AppleWait"], - "spriteShapes": [shapes.LEGACY_APPLE, shapes.LEGACY_APPLE], - "palettes": [{"*": (102, 255, 0, 255), - "@": (230, 255, 0, 255), - "&": (117, 255, 26, 255), - "#": (255, 153, 0, 255), - "x": (0, 0, 0, 0)}, - {"*": (102, 255, 0, 25), - "@": (230, 255, 0, 25), - "&": (117, 255, 26, 25), - "#": (255, 153, 0, 25), - "x": (0, 0, 0, 0)}], - "noRotates": [False, False] + "spriteShapes": [shapes.APPLE, shapes.FILL], + "palettes": [ + {"x": (0, 0, 0, 0), + "*": (214, 88, 88, 255), + "#": (194, 79, 79, 255), + "o": (53, 132, 49, 255), + "|": (102, 51, 61, 255)}, + {"i": (0, 0, 0, 0)}], + "noRotates": [True, True] } }, { @@ -313,8 +378,11 @@ def create_prefabs(regrowth_radius=-1.0, regrowth_probabilities=[0, 0.0, 0.0, 0.0]): """Returns a dictionary mapping names to template game objects.""" prefabs = { + "floor": FLOOR, + "grass": GRASS, "wall": WALL, "spawn_point": SPAWN_POINT, + "inside_spawn_point": INSIDE_SPAWN_POINT, } prefabs["apple"] = create_apple_prefab( regrowth_radius=regrowth_radius, @@ -323,7 +391,8 @@ def create_prefabs(regrowth_radius=-1.0, def create_avatar_object(player_idx: int, - target_sprite_self: Dict[str, Any]) -> Dict[str, Any]: + target_sprite_self: Dict[str, Any], + spawn_group: str) -> Dict[str, Any]: """Create an avatar object that always sees itself as blue.""" # Lua is 1-indexed. lua_index = player_idx + 1 @@ -354,10 +423,6 @@ def create_avatar_object(player_idx: int, }, { "component": "Transform", - "kwargs": { - "position": (0, 0), - "orientation": "N" - } }, { "component": "Appearance", @@ -365,7 +430,8 @@ def create_avatar_object(player_idx: int, "renderMode": "ascii_shape", "spriteNames": [source_sprite_self], "spriteShapes": [shapes.CUTE_AVATAR], - "palettes": [shapes.get_palette(colors.palette[player_idx])], + "palettes": [shapes.get_palette( + colors.human_readable[player_idx])], "noRotates": [True] } }, @@ -386,7 +452,8 @@ def create_avatar_object(player_idx: int, "aliveState": live_state_name, "waitState": "playerWait", "speed": 1.0, - "spawnGroup": "spawnPoints", + "spawnGroup": spawn_group, + "postInitialSpawnGroup": "spawnPoints", "actionOrder": ["move", "turn", "fireZap"], "actionSpec": { "move": {"default": 0, "min": 0, "max": len(_COMPASS)}, @@ -409,7 +476,7 @@ def create_avatar_object(player_idx: int, "cooldownTime": 1, "beamLength": 4, "beamRadius": 1, - "framesTillRespawn": 50, + "framesTillRespawn": 100, "penaltyForBeingZapped": 0, "rewardForZapping": 0, } @@ -433,50 +500,30 @@ def create_avatar_objects(num_players): """Returns list of avatar objects of length 'num_players'.""" avatar_objects = [] for player_idx in range(0, num_players): + spawn_group = "spawnPoints" + if player_idx < 2: + # The first two player slots always spawn inside the rooms. + spawn_group = "insideSpawnPoints" + game_object = create_avatar_object(player_idx, - TARGET_SPRITE_SELF) + TARGET_SPRITE_SELF, + spawn_group=spawn_group) avatar_objects.append(game_object) return avatar_objects -def create_lab2d_settings(num_players: int) -> Dict[str, Any]: - """Returns the lab2d settings.""" - lab2d_settings = { - "levelName": "commons_harvest", - "levelDirectory": - "meltingpot/lua/levels", - "numPlayers": num_players, - "maxEpisodeLengthFrames": 1000, - "spriteSize": 8, - "simulation": { - "map": ASCII_MAP, - "gameObjects": create_avatar_objects(num_players), - "prefabs": create_prefabs(APPLE_RESPAWN_RADIUS, - REGROWTH_PROBABILITIES), - "charPrefabMap": CHAR_PREFAB_MAP, - "scene": create_scene(), - }, - } - return lab2d_settings - - def get_config(): """Default configuration for training on the commons_harvest level.""" config = config_dict.ConfigDict() - # Basic configuration. - config.num_players = 16 - - # Lua script configuration. - config.lab2d_settings = create_lab2d_settings(config.num_players) - # Action set configuration. config.action_set = ACTION_SET # Observation format configuration. config.individual_observation_names = [ "RGB", "READY_TO_SHOOT", + # Debug only (do not use the following observations in policies). "POSITION", "ORIENTATION", ] @@ -491,7 +538,39 @@ def get_config(): "READY_TO_SHOOT": specs.OBSERVATION["READY_TO_SHOOT"], "POSITION": specs.OBSERVATION["POSITION"], "ORIENTATION": specs.OBSERVATION["ORIENTATION"], - "WORLD.RGB": specs.rgb(336, 504), + "WORLD.RGB": specs.rgb(144, 192), }) + # The roles assigned to each player. + config.valid_roles = frozenset({"default"}) + config.default_player_roles = ("default",) * 7 + return config + + +def build( + roles: Sequence[str], + config: config_dict.ConfigDict, +) -> Mapping[str, Any]: + """Build substrate definition given player roles.""" + del config + num_players = len(roles) + # Build the rest of the substrate definition. + substrate_definition = dict( + levelName="commons_harvest", + levelDirectory="meltingpot/lua/levels", + numPlayers=num_players, + # Define upper bound of episode length since episodes end stochastically. + maxEpisodeLengthFrames=5000, + spriteSize=8, + topology="BOUNDED", # Choose from ["BOUNDED", "TORUS"], + simulation={ + "map": ASCII_MAP, + "gameObjects": create_avatar_objects(num_players), + "prefabs": create_prefabs(APPLE_RESPAWN_RADIUS, + REGROWTH_PROBABILITIES), + "charPrefabMap": CHAR_PREFAB_MAP, + "scene": create_scene(), + }, + ) + return substrate_definition diff --git a/meltingpot/python/configs/substrates/commons_harvest_open.py b/meltingpot/python/configs/substrates/commons_harvest__open.py similarity index 70% rename from meltingpot/python/configs/substrates/commons_harvest_open.py rename to meltingpot/python/configs/substrates/commons_harvest__open.py index d6e272c8..cb2db6ba 100644 --- a/meltingpot/python/configs/substrates/commons_harvest_open.py +++ b/meltingpot/python/configs/substrates/commons_harvest__open.py @@ -1,4 +1,4 @@ -# Copyright 2020 DeepMind Technologies Limited. +# Copyright 2022 DeepMind Technologies Limited. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,7 +13,7 @@ # limitations under the License. """Configuration for Commons Harvest: Open. -Example video: https://youtu.be/ZwQaUj8GS6U +Example video: https://youtu.be/lZ-qpPP4BNE Apples are spread around the map and can be consumed for a reward of 1. Apples that have been consumed regrow with a per-step probability that depends on the @@ -43,7 +43,7 @@ Neural Information Processing Systems (pp. 3646-3655). """ -from typing import Any, Dict +from typing import Any, Dict, Mapping, Sequence from ml_collections import config_dict import numpy as np @@ -56,49 +56,116 @@ REGROWTH_PROBABILITIES = [0.0, 0.0025, 0.005, 0.025] ASCII_MAP = """ -WWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWW -W PPPP W -W A A W -W AAA PPPP PPPP AAA W -W AAAAA AAAAA W -W AAA AAA W -W A A A A W -WP AAA AAA A A PW -WP A A A AAAA A A AAAAAA A AAA AAA A PW -WP AAA AAA A AAAAA A A AAAA A A PW -WP A A A A AAA AAA A PW -WP AA A A AA PW -WP A AAA A AAA AA PW -WP AA A AA PW -WP A A A A A PW -WP A PW -WP A PPPPPPPPPPPAPPPPPPPPPPPPPPPPPPPP A PW -WP A PW -WP A A A PW -WP AAA A A AAA PW -WPAAAAA A A AAA AAA AAAAAPW -WP AAA AAA AAA A AAAAA A A AAAA A A A AAA PW -WP A A A AAAA A A AAAAAA A AAA AAA A PW -WP AAA AAA A A A PW -W A A A A W -W A W -W A A A W -W AA PPPP PPPP A AA W -W A AAA AAA W -W PPPP W -WWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWW +WWWWWWWWWWWWWWWWWWWWWWWW +WAAA A A AAAW +WAA AAA AAA AAW +WA AAAAA AAAAA AW +W AAA AAA W +W A A W +W A A W +W AAA Q Q AAA W +WAAAAA AAAAAW +W AAA AAA W +W A A W +W W +W W +W W +W PPPPPPPPPPPPPPPPPP W +W PPPPPPPPPPPPPPPPPPPP W +WPPPPPPPPPPPPPPPPPPPPPPW +WWWWWWWWWWWWWWWWWWWWWWWW """ # `prefab` determines which prefab game object to use for each `char` in the # ascii map. CHAR_PREFAB_MAP = { - "P": "spawn_point", + "P": {"type": "all", "list": ["floor", "spawn_point"]}, + "Q": {"type": "all", "list": ["floor", "inside_spawn_point"]}, + " ": "floor", "W": "wall", - "A": "apple", + "A": {"type": "all", "list": ["grass", "apple"]}, } _COMPASS = ["N", "E", "S", "W"] +FLOOR = { + "name": "floor", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "floor", + "stateConfigs": [{ + "state": "floor", + "layer": "background", + "sprite": "Floor", + }], + } + }, + { + "component": "Transform", + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["Floor"], + "spriteShapes": [shapes.GRAINY_FLOOR], + "palettes": [{"*": (220, 205, 185, 255), + "+": (210, 195, 175, 255),}], + "noRotates": [False] + } + }, + ] +} + +GRASS = { + "name": + "grass", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": + "grass", + "stateConfigs": [ + { + "state": "grass", + "layer": "background", + "sprite": "Grass" + }, + { + "state": "dessicated", + "layer": "background", + "sprite": "Floor" + }, + ], + } + }, + { + "component": "Transform", + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["Grass", "Floor"], + "spriteShapes": [ + shapes.GRASS_STRAIGHT, shapes.GRAINY_FLOOR + ], + "palettes": [{ + "*": (158, 194, 101, 255), + "@": (170, 207, 112, 255) + }, { + "*": (220, 205, 185, 255), + "+": (210, 195, 175, 255), + }], + "noRotates": [False, False] + } + }, + ] +} + WALL = { "name": "wall", "components": [ @@ -115,10 +182,6 @@ }, { "component": "Transform", - "kwargs": { - "position": (0, 0), - "orientation": "N" - } }, { "component": "Appearance", @@ -158,11 +221,27 @@ }, { "component": "Transform", + }, + ] +} + +INSIDE_SPAWN_POINT = { + "name": "spawnPoint", + "components": [ + { + "component": "StateManager", "kwargs": { - "position": (0, 0), - "orientation": "N" + "initialState": "spawnPoint", + "stateConfigs": [{ + "state": "spawnPoint", + "layer": "alternateLogic", + "groups": ["insideSpawnPoints"] + }], } }, + { + "component": "Transform", + }, ] } @@ -215,15 +294,19 @@ def create_scene(): }, { "component": "Transform", - "kwargs": { - "position": (0, 0), - "orientation": "N" - }, }, { "component": "Neighborhoods", "kwargs": {} }, + { + "component": "StochasticIntervalEpisodeEnding", + "kwargs": { + "minimumFramesPerEpisode": 1000, + "intervalLength": 100, # Set equal to unroll length. + "probabilityTerminationPerInterval": 0.15 + } + } ] } @@ -267,28 +350,21 @@ def create_apple_prefab(regrowth_radius=-1.0, # pylint: disable=dangerous-defau }, { "component": "Transform", - "kwargs": { - "position": (0, 0), - "orientation": "N" - } }, { "component": "Appearance", "kwargs": { "renderMode": "ascii_shape", "spriteNames": ["Apple", "AppleWait"], - "spriteShapes": [shapes.LEGACY_APPLE, shapes.LEGACY_APPLE], - "palettes": [{"*": (102, 255, 0, 255), - "@": (230, 255, 0, 255), - "&": (117, 255, 26, 255), - "#": (255, 153, 0, 255), - "x": (0, 0, 0, 0)}, - {"*": (102, 255, 0, 25), - "@": (230, 255, 0, 25), - "&": (117, 255, 26, 25), - "#": (255, 153, 0, 25), - "x": (0, 0, 0, 0)}], - "noRotates": [False, False] + "spriteShapes": [shapes.APPLE, shapes.FILL], + "palettes": [ + {"x": (0, 0, 0, 0), + "*": (214, 88, 88, 255), + "#": (194, 79, 79, 255), + "o": (53, 132, 49, 255), + "|": (102, 51, 61, 255)}, + {"i": (0, 0, 0, 0)}], + "noRotates": [True, True] } }, { @@ -319,8 +395,11 @@ def create_prefabs(regrowth_radius=-1.0, regrowth_probabilities=[0, 0.0, 0.0, 0.0]): """Returns a dictionary mapping names to template game objects.""" prefabs = { + "floor": FLOOR, + "grass": GRASS, "wall": WALL, "spawn_point": SPAWN_POINT, + "inside_spawn_point": INSIDE_SPAWN_POINT, } prefabs["apple"] = create_apple_prefab( regrowth_radius=regrowth_radius, @@ -329,7 +408,8 @@ def create_prefabs(regrowth_radius=-1.0, def create_avatar_object(player_idx: int, - target_sprite_self: Dict[str, Any]) -> Dict[str, Any]: + target_sprite_self: Dict[str, Any], + spawn_group: str) -> Dict[str, Any]: """Create an avatar object that always sees itself as blue.""" # Lua is 1-indexed. lua_index = player_idx + 1 @@ -360,10 +440,6 @@ def create_avatar_object(player_idx: int, }, { "component": "Transform", - "kwargs": { - "position": (0, 0), - "orientation": "N" - } }, { "component": "Appearance", @@ -371,7 +447,8 @@ def create_avatar_object(player_idx: int, "renderMode": "ascii_shape", "spriteNames": [source_sprite_self], "spriteShapes": [shapes.CUTE_AVATAR], - "palettes": [shapes.get_palette(colors.palette[player_idx])], + "palettes": [shapes.get_palette( + colors.human_readable[player_idx])], "noRotates": [True] } }, @@ -392,7 +469,8 @@ def create_avatar_object(player_idx: int, "aliveState": live_state_name, "waitState": "playerWait", "speed": 1.0, - "spawnGroup": "spawnPoints", + "spawnGroup": spawn_group, + "postInitialSpawnGroup": "spawnPoints", "actionOrder": ["move", "turn", "fireZap"], "actionSpec": { "move": {"default": 0, "min": 0, "max": len(_COMPASS)}, @@ -439,50 +517,30 @@ def create_avatar_objects(num_players): """Returns list of avatar objects of length 'num_players'.""" avatar_objects = [] for player_idx in range(0, num_players): + spawn_group = "spawnPoints" + if player_idx < 2: + # The first two player slots always spawn closer to the apples. + spawn_group = "insideSpawnPoints" + game_object = create_avatar_object(player_idx, - TARGET_SPRITE_SELF) + TARGET_SPRITE_SELF, + spawn_group=spawn_group) avatar_objects.append(game_object) return avatar_objects -def create_lab2d_settings(num_players: int) -> Dict[str, Any]: - """Returns the lab2d settings.""" - lab2d_settings = { - "levelName": "commons_harvest", - "levelDirectory": - "meltingpot/lua/levels", - "numPlayers": num_players, - "maxEpisodeLengthFrames": 1000, - "spriteSize": 8, - "simulation": { - "map": ASCII_MAP, - "gameObjects": create_avatar_objects(num_players), - "prefabs": create_prefabs(APPLE_RESPAWN_RADIUS, - REGROWTH_PROBABILITIES), - "charPrefabMap": CHAR_PREFAB_MAP, - "scene": create_scene(), - }, - } - return lab2d_settings - - def get_config(): """Default configuration for training on the commons_harvest level.""" config = config_dict.ConfigDict() - # Basic configuration. - config.num_players = 16 - - # Lua script configuration. - config.lab2d_settings = create_lab2d_settings(config.num_players) - # Action set configuration. config.action_set = ACTION_SET # Observation format configuration. config.individual_observation_names = [ "RGB", "READY_TO_SHOOT", + # Debug only (do not use the following observations in policies). "POSITION", "ORIENTATION", ] @@ -497,7 +555,39 @@ def get_config(): "READY_TO_SHOOT": specs.OBSERVATION["READY_TO_SHOOT"], "POSITION": specs.OBSERVATION["POSITION"], "ORIENTATION": specs.OBSERVATION["ORIENTATION"], - "WORLD.RGB": specs.rgb(248, 456), + "WORLD.RGB": specs.rgb(144, 192), }) + # The roles assigned to each player. + config.valid_roles = frozenset({"default"}) + config.default_player_roles = ("default",) * 7 + return config + + +def build( + roles: Sequence[str], + config: config_dict.ConfigDict, +) -> Mapping[str, Any]: + """Build substrate definition given player roles.""" + del config + num_players = len(roles) + # Build the rest of the substrate definition. + substrate_definition = dict( + levelName="commons_harvest", + levelDirectory="meltingpot/lua/levels", + numPlayers=num_players, + # Define upper bound of episode length since episodes end stochastically. + maxEpisodeLengthFrames=5000, + spriteSize=8, + topology="BOUNDED", # Choose from ["BOUNDED", "TORUS"], + simulation={ + "map": ASCII_MAP, + "gameObjects": create_avatar_objects(num_players), + "prefabs": create_prefabs(APPLE_RESPAWN_RADIUS, + REGROWTH_PROBABILITIES), + "charPrefabMap": CHAR_PREFAB_MAP, + "scene": create_scene(), + }, + ) + return substrate_definition diff --git a/meltingpot/python/configs/substrates/commons_harvest_partnership.py b/meltingpot/python/configs/substrates/commons_harvest__partnership.py similarity index 69% rename from meltingpot/python/configs/substrates/commons_harvest_partnership.py rename to meltingpot/python/configs/substrates/commons_harvest__partnership.py index e7e8d0f7..ac1adf13 100644 --- a/meltingpot/python/configs/substrates/commons_harvest_partnership.py +++ b/meltingpot/python/configs/substrates/commons_harvest__partnership.py @@ -1,4 +1,4 @@ -# Copyright 2020 DeepMind Technologies Limited. +# Copyright 2022 DeepMind Technologies Limited. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,7 +13,7 @@ # limitations under the License. """Configuration for Commons Harvest: Partnership. -Example video: https://youtu.be/ODgPnxC7yYA +Example video: https://youtu.be/dH_0-APGKSs See _Commons Harvest: Open_ for the general description of the mechanics at play in this substrate. @@ -29,7 +29,7 @@ their partner and act accordingly. """ -from typing import Any, Dict +from typing import Any, Dict, Mapping, Sequence from ml_collections import config_dict import numpy as np @@ -42,61 +42,118 @@ REGROWTH_PROBABILITIES = [0.0, 0.001, 0.005, 0.025] ASCII_MAP = """ -WWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWW -WW WW W -W WW -W PPPPPPPPPPPPPPPPPPPPPPPP PPPPPPPPPPPPPPPPPPPPPPPP W -W PPPPPPPPPPPPWWPPPPPPPPPP PPPPPPPPPWPPWPPPPPPPPPPP WW -W WWWWWWWWWWWWWWWWWWWWWWWW WWWWWWWWWWWWWWWWWWWWWWWW W -W W A II A W WW W -W W AAA II AAA W WWWWWWWWWW WW WWWWWWWWWW W -W W AAAAAIIAAAAA W W A WW A W W -W W AAA II AAA W W AAA WWWWWWWWWW AAA W W -W W A II A W WAAAAA II AAAAAW W -W W A II A W W AAA WW AAA W W -W W AAA WW AAA W W A II A W W -W WAAAAA II AAAAAW W A II A W W -W W AAA WWWWWWWWWW AAA W W AAA II AAA W W -W W A WW A W W AAAAAIIAAAAA W W -W WWWWWWWWWW WW WWWWWWWWWW W AAA II AAA W W -W WW W A II A W W -W WWWWWWWWWWWWWWWWWWWWWWWW WWWWWWWWWWWWWWWWWWWWWWWW W -W W W W -WW PPPPPPPPPPPPPPPPPPPPPPPP PPPPPPPPPPPPPPPPPPPPPPPP W -WW PPPPPPPPPPPPPPPPPPPPPPPP PPPPPPPPPPPPPPPPPPPPPPPP W -WW W WW -W WWWWWWWWWWWWWWWWWWWWWWWW WWWWWWWWWWWWWWWWWWWWWWWW W -W WW W A II A W W -W WWWWWWWWWW WW WWWWWWWWWW W AAA II AAA WW W -W W A WW A W W AAAAAIIAAAAA W W -W W AAA WWWWWWWWWW AAA W W AAA II AAA W W -W WAAAAA II AAAAAW W A II A W W -W W AAA WW AAA W W A II A W W -W W A II A W W AAA WW AAA W W -W W A II A W WAAAAA II AAAAAW W -W W AAA II AAA W W AAA WWWWWWWWWW AAA W W -W W AAAAAIIAAAAA W W A WW A W W -W W AAA II AAA W WWWWWWWWWW WW WWWWWWWWWW W -W W A II A W WW W -W WWWWWWWWWWWWWWWWWWWWWWWW WWWWWWWWWWWWWWWWWWWWWWWW W -W PPPPPPWPPPPPPPPPPPPPPPPP PPPPPPPPPPPPPPPPPPPPPPPP W -W PPPPPPWPPPPPPPPPPPPPPPPP PPPPPPPPPPPPPPPPPPPPPPPP W -W W -W WWW W W W -WWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWW +WWWWWWWWWWWWWWWWWWWWWWWW +WAAA A II A AAAW +WAA AAA II AAA AAW +WA AAAAAIIAAAAA AW +W AAA II AAA W +W A II A W +W A II A W +W AAA Q WW Q AAA W +WAAAAA II AAAAAW +W AAA WWWWWWWWWW AAA W +W A WW A W +WWWWWWWWWW WW WWWWWWWWWW +W WW W +W WWWWWWWWWWWWWWWWWW W +W PPPPPPPPPPPPPPPPPP W +W PPPPPPPPPPPPPPPPPPPP W +WPPPPPPPPPPPPPPPPPPPPPPW +WWWWWWWWWWWWWWWWWWWWWWWW """ # `prefab` determines which prefab game object to use for each `char` in the # ascii map. CHAR_PREFAB_MAP = { - "P": "spawn_point", + "P": {"type": "all", "list": ["floor", "spawn_point"]}, + "Q": {"type": "all", "list": ["floor", "inside_spawn_point"]}, + " ": "floor", "W": "wall", - "A": "apple", - "I": "hidden_role_based_punishment_tile", + "A": {"type": "all", "list": ["grass", "apple"]}, + "I": {"type": "all", "list": [ + "floor", "hidden_role_based_punishment_tile"]}, } _COMPASS = ["N", "E", "S", "W"] +FLOOR = { + "name": "floor", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "floor", + "stateConfigs": [{ + "state": "floor", + "layer": "background", + "sprite": "Floor", + }], + } + }, + { + "component": "Transform", + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["Floor"], + "spriteShapes": [shapes.GRAINY_FLOOR], + "palettes": [{"*": (220, 205, 185, 255), + "+": (210, 195, 175, 255),}], + "noRotates": [False] + } + }, + ] +} + +GRASS = { + "name": + "grass", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": + "grass", + "stateConfigs": [ + { + "state": "grass", + "layer": "background", + "sprite": "Grass" + }, + { + "state": "dessicated", + "layer": "background", + "sprite": "Floor" + }, + ], + } + }, + { + "component": "Transform", + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["Grass", "Floor"], + "spriteShapes": [ + shapes.GRASS_STRAIGHT, shapes.GRAINY_FLOOR + ], + "palettes": [{ + "*": (158, 194, 101, 255), + "@": (170, 207, 112, 255) + }, { + "*": (220, 205, 185, 255), + "+": (210, 195, 175, 255), + }], + "noRotates": [False, False] + } + }, + ] +} + WALL = { "name": "wall", "components": [ @@ -113,10 +170,6 @@ }, { "component": "Transform", - "kwargs": { - "position": (0, 0), - "orientation": "N" - } }, { "component": "Appearance", @@ -156,11 +209,27 @@ }, { "component": "Transform", + }, + ] +} + +INSIDE_SPAWN_POINT = { + "name": "spawnPoint", + "components": [ + { + "component": "StateManager", "kwargs": { - "position": (0, 0), - "orientation": "N" + "initialState": "spawnPoint", + "stateConfigs": [{ + "state": "spawnPoint", + "layer": "alternateLogic", + "groups": ["insideSpawnPoints"] + }], } }, + { + "component": "Transform", + }, ] } @@ -182,7 +251,7 @@ "kwargs": { "avatarRoleComponent": "Role", "getRoleFunction": "getRole", - "rolesToRewards": {"putative_cooperator": -5}, + "rolesToRewards": {"putative_cooperator": -10}, }} ] } @@ -236,15 +305,19 @@ def create_scene(): }, { "component": "Transform", - "kwargs": { - "position": (0, 0), - "orientation": "N" - }, }, { "component": "Neighborhoods", "kwargs": {} }, + { + "component": "StochasticIntervalEpisodeEnding", + "kwargs": { + "minimumFramesPerEpisode": 1000, + "intervalLength": 100, # Set equal to unroll length. + "probabilityTerminationPerInterval": 0.15 + } + } ] } @@ -298,18 +371,15 @@ def create_apple_prefab(regrowth_radius=-1.0, # pylint: disable=dangerous-defau "kwargs": { "renderMode": "ascii_shape", "spriteNames": ["Apple", "AppleWait"], - "spriteShapes": [shapes.LEGACY_APPLE, shapes.LEGACY_APPLE], - "palettes": [{"*": (102, 255, 0, 255), - "@": (230, 255, 0, 255), - "&": (117, 255, 26, 255), - "#": (255, 153, 0, 255), - "x": (0, 0, 0, 0)}, - {"*": (102, 255, 0, 25), - "@": (230, 255, 0, 25), - "&": (117, 255, 26, 25), - "#": (255, 153, 0, 25), - "x": (0, 0, 0, 0)}], - "noRotates": [False, False] + "spriteShapes": [shapes.APPLE, shapes.FILL], + "palettes": [ + {"x": (0, 0, 0, 0), + "*": (214, 88, 88, 255), + "#": (194, 79, 79, 255), + "o": (53, 132, 49, 255), + "|": (102, 51, 61, 255)}, + {"i": (0, 0, 0, 0)}], + "noRotates": [True, True] } }, { @@ -340,8 +410,11 @@ def create_prefabs(regrowth_radius=-1.0, regrowth_probabilities=[0, 0.0, 0.0, 0.0]): """Returns a dictionary mapping names to template game objects.""" prefabs = { + "floor": FLOOR, + "grass": GRASS, "wall": WALL, "spawn_point": SPAWN_POINT, + "inside_spawn_point": INSIDE_SPAWN_POINT, "hidden_role_based_punishment_tile": HIDDEN_ROLE_BASED_PUNISHMENT_TILE, } prefabs["apple"] = create_apple_prefab( @@ -351,7 +424,8 @@ def create_prefabs(regrowth_radius=-1.0, def create_avatar_object(player_idx: int, - target_sprite_self: Dict[str, Any]) -> Dict[str, Any]: + target_sprite_self: Dict[str, Any], + spawn_group: str) -> Mapping[str, Any]: """Create an avatar object that always sees itself as blue.""" # Lua is 1-indexed. lua_index = player_idx + 1 @@ -382,10 +456,6 @@ def create_avatar_object(player_idx: int, }, { "component": "Transform", - "kwargs": { - "position": (0, 0), - "orientation": "N" - } }, { "component": "Appearance", @@ -393,7 +463,8 @@ def create_avatar_object(player_idx: int, "renderMode": "ascii_shape", "spriteNames": [source_sprite_self], "spriteShapes": [shapes.CUTE_AVATAR], - "palettes": [shapes.get_palette(colors.palette[player_idx])], + "palettes": [shapes.get_palette( + colors.human_readable[player_idx])], "noRotates": [True] } }, @@ -414,7 +485,8 @@ def create_avatar_object(player_idx: int, "aliveState": live_state_name, "waitState": "playerWait", "speed": 1.0, - "spawnGroup": "spawnPoints", + "spawnGroup": spawn_group, + "postInitialSpawnGroup": "spawnPoints", "actionOrder": ["move", "turn", "fireZap"], "actionSpec": { "move": {"default": 0, "min": 0, "max": len(_COMPASS)}, @@ -437,7 +509,7 @@ def create_avatar_object(player_idx: int, "cooldownTime": 1, "beamLength": 4, "beamRadius": 1, - "framesTillRespawn": 50, + "framesTillRespawn": 100, "penaltyForBeingZapped": 0, "rewardForZapping": 0, } @@ -467,50 +539,30 @@ def create_avatar_objects(num_players): """Returns list of avatar objects of length 'num_players'.""" avatar_objects = [] for player_idx in range(0, num_players): + spawn_group = "spawnPoints" + if player_idx < 2: + # The first two player slots always spawn inside the rooms. + spawn_group = "insideSpawnPoints" + game_object = create_avatar_object(player_idx, - TARGET_SPRITE_SELF) + TARGET_SPRITE_SELF, + spawn_group=spawn_group) avatar_objects.append(game_object) return avatar_objects -def create_lab2d_settings(num_players: int) -> Dict[str, Any]: - """Returns the lab2d settings.""" - lab2d_settings = { - "levelName": "commons_harvest", - "levelDirectory": - "meltingpot/lua/levels", - "numPlayers": num_players, - "maxEpisodeLengthFrames": 1000, - "spriteSize": 8, - "simulation": { - "map": ASCII_MAP, - "gameObjects": create_avatar_objects(num_players), - "prefabs": create_prefabs(APPLE_RESPAWN_RADIUS, - REGROWTH_PROBABILITIES), - "charPrefabMap": CHAR_PREFAB_MAP, - "scene": create_scene(), - }, - } - return lab2d_settings - - def get_config(): """Default configuration for training on the commons_harvest level.""" config = config_dict.ConfigDict() - # Basic configuration. - config.num_players = 16 - - # Lua script configuration. - config.lab2d_settings = create_lab2d_settings(config.num_players) - # Action set configuration. config.action_set = ACTION_SET # Observation format configuration. config.individual_observation_names = [ "RGB", "READY_TO_SHOOT", + # Debug only (do not use the following observations in policies). "POSITION", "ORIENTATION", ] @@ -525,7 +577,39 @@ def get_config(): "READY_TO_SHOOT": specs.OBSERVATION["READY_TO_SHOOT"], "POSITION": specs.OBSERVATION["POSITION"], "ORIENTATION": specs.OBSERVATION["ORIENTATION"], - "WORLD.RGB": specs.rgb(336, 504), + "WORLD.RGB": specs.rgb(144, 192), }) + # The roles assigned to each player. + config.valid_roles = frozenset({"default"}) + config.default_player_roles = ("default",) * 7 + return config + + +def build( + roles: Sequence[str], + config: config_dict.ConfigDict, +) -> Mapping[str, Any]: + """Build substrate definition given player roles.""" + del config + num_players = len(roles) + # Build the rest of the substrate definition. + substrate_definition = dict( + levelName="commons_harvest", + levelDirectory="meltingpot/lua/levels", + numPlayers=num_players, + # Define upper bound of episode length since episodes end stochastically. + maxEpisodeLengthFrames=5000, + spriteSize=8, + topology="BOUNDED", # Choose from ["BOUNDED", "TORUS"], + simulation={ + "map": ASCII_MAP, + "gameObjects": create_avatar_objects(num_players), + "prefabs": create_prefabs(APPLE_RESPAWN_RADIUS, + REGROWTH_PROBABILITIES), + "charPrefabMap": CHAR_PREFAB_MAP, + "scene": create_scene(), + }, + ) + return substrate_definition diff --git a/meltingpot/python/configs/substrates/coop_mining.py b/meltingpot/python/configs/substrates/coop_mining.py new file mode 100644 index 00000000..cd883526 --- /dev/null +++ b/meltingpot/python/configs/substrates/coop_mining.py @@ -0,0 +1,502 @@ +# Copyright 2022 DeepMind Technologies Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Configuration for coop_mining substrate. + +Example video: https://youtu.be/KvwGUinjIsk + +Two different types of ore appear at random in empty spaces. Players are +equipped with a mining beam that attempts to extract the ore. Iron ore (gray) +can be mined by a single player and confers a reward of 1 when extracted. Gold +ore (yellow) has to be mined by exactly two players within a time window of 3 +timesteps and confers a reward of 8 to each of them. When a player mines a gold +ore, it flashes to indicate that it is ready to be mined by another player. If +no other player, or if too many players try to mine within that time, it will +revert back to normal. + +This games has some properties in common with Stag-Hunt. Mining iron is akin to +playing Hare, with a reliable payoff, without needing to coordinate with others. +Mining gold is akin to Stag because it has an opportunity cost (not mining +iron). If noone else helps, mining gold gives no reward. However, if two players +stick together (spatially) and go around mining gold, they will both receive +higher reward than if they were mining iron. +""" + +from collections.abc import Mapping, Sequence +from typing import Any + +from ml_collections import config_dict + +from meltingpot.python.utils.substrates import colors +from meltingpot.python.utils.substrates import shapes +from meltingpot.python.utils.substrates import specs + + +NUM_ORE_TYPES = 2 +MAX_TOKENS_PER_TYPE = 6 + +ASCII_MAP = """ +WWWWWWWWWWWWWWWWWWWWWWWWWWW +WOOOOOOOOOOOOOOOOOOOOOOOOOW +WOPOOOOOOOOOPOOOOOPOOOOOPOW +WOOOOOOOOWOOOOOOOOOOOOOOOOW +WOOOOOOOOWOOOOOOOOOOWOOOOOW +WOOOOOOOOWOOOOOOOOOOWOOOOOW +WOOOOOOOOWWWWWWWOOOOWOOOPOW +WOPOWWOOOOWOOOOOOOOOWOOOOOW +WOOOOOOOOOWOOPOOOOOOOOOOOOW +WOOOOOOOOOWOOOOOWWWOOOOOOOW +WOOOOOOOOOWOOOOOOOOOOOOOOOW +WOOOOOOOOOOOOOOOOOOOOOOOPOW +WOPOOOWWWOOOOOOWWWWWWWWOOOW +WOOWWWWOOOOOOOOOOOOOOOOOOOW +WOOOOOWOOOOWOOOOOPOOOOOOOOW +WOOOOOWOOOOWOOOOOOOOOOOOPOW +WOOOOOWOOOOOWOOOOOOOOWOOOOW +WOOOOOOWOOOOOWWWWOOOOWOOOOW +WOPOOOOOWOOOOOOOOOOOOWOOOOW +WOOOOOOOOWOOOPOOOOOOOOOOPOW +WOOOOOOOOOWOOOOOOOOWOOOOOOW +WOOOOWOOOOOOOOOOOOOWOOOOOOW +WOOOOWOOOOOOOOOWWWWWWWWOOOW +WOOOOWOOOOOOOOOOOOWOOOOOOOW +WOPOOOOOOPOOOOOOOPOOOOOOPOW +WOOOOOOOOOOOOOOOOOOOOOOOOOW +WWWWWWWWWWWWWWWWWWWWWWWWWWW +""" + +# `prefab` determines which prefab game object to use for each `char` in the +# ascii map. +CHAR_PREFAB_MAP = { + "P": "spawn_point", + "W": "wall", + "O": "ore", +} + +_COMPASS = ["N", "E", "S", "W"] + + +SCENE = { + "name": "scene", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "scene", + "stateConfigs": [{ + "state": "scene", + }], + } + }, + { + "component": "Transform", + }, + { + "component": "StochasticIntervalEpisodeEnding", + "kwargs": { + "minimumFramesPerEpisode": 1000, + "intervalLength": 100, # Set equal to unroll length. + "probabilityTerminationPerInterval": 0.2 + } + }, + ] +} + + +WALL = { + "name": "wall", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "wall", + "stateConfigs": [{ + "state": "wall", + "layer": "upperPhysical", + "sprite": "Wall", + }], + } + }, + { + "component": "Transform", + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["Wall",], + "spriteShapes": [shapes.WALL], + "palettes": [{"*": (95, 95, 95, 255), + "&": (100, 100, 100, 255), + "@": (109, 109, 109, 255), + "#": (152, 152, 152, 255)}], + "noRotates": [True] + } + }, + { + "component": "BeamBlocker", + "kwargs": { + "beamType": "mine" + } + }, + ] +} + +SPAWN_POINT = { + "name": "spawnPoint", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "spawnPoint", + "stateConfigs": [{ + "state": "spawnPoint", + "layer": "logic", + "groups": ["spawnPoints"] + }], + } + }, + { + "component": "Transform", + }, + ] +} + +RAW_ORE = """ +xxxxxxxxxxxxxxxx +xxxxxxxxxxxxxxxx +xxxxxxxxxxxxxxxx +xxxxxxxxxxxxxxxx +xxxxxxxxxxxxxxxx +xxxxxxxxxxxxxxxx +xxxxxxxxxxxxxxxx +xxxxxxxxxxxxxxxx +xxxxxx*&&@xxxxxx +xxxxx****&@xxxxx +xxxx**&@*&**xxxx +xxxx*&*&*&@@@xxx +xxx****@&***&@xx +xx****&&*****&&x +******&*****&**& +**************** +""" + +PARTIAL_ORE = """ +xxxxxxxxxxxxxxxx +xxxxxx#xx##xxxxx +xxxxxxx##xxxxxxx +xxxxxx##x#xxxxxx +x##xxxxxxxxxxxxx +xx###xxxxxxxx##x +xxx###xxx####xxx +xxxx#######xxxxx +xxxx######xxxxxx +xx###***###xxxxx +##xx**&@*&###xxx +xxxx*&*&*&@@##xx +xxx****@&***&@xx +xx****&&*****&&x +******&*****&**& +**************** +""" + +IRON_PALETTE = { + "*": (70, 60, 70, 255), + "&": (140, 120, 140, 255), + "@": (170, 160, 170, 255), + "#": (255, 240, 255, 255), + "x": (0, 0, 0, 0) +} + +GOLD_PALETTE = { + "*": (90, 90, 20, 255), + "&": (180, 180, 40, 255), + "@": (220, 220, 60, 255), + "#": (255, 255, 240, 255), + "x": (0, 0, 0, 0) +} + +ORE = { + "name": "ore", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "oreWait", + "stateConfigs": [ + {"state": "oreWait", + "layer": "lowerPhysical", + "sprite": "oreWait", + "groups": []}, + {"state": "ironRaw", + "layer": "lowerPhysical", + "sprite": "ironRaw", + "groups": ["tokens"]}, + {"state": "goldRaw", + "layer": "lowerPhysical", + "sprite": "goldRaw", + "groups": ["tokens"]}, + {"state": "goldPartial", + "layer": "lowerPhysical", + "sprite": "goldPartial", + "groups": ["tokens"]}, + ] + } + }, + { + "component": "Transform", + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["oreWait", "ironRaw", "goldRaw", "goldPartial"], + "spriteShapes": [RAW_ORE, RAW_ORE, RAW_ORE, PARTIAL_ORE], + "palettes": [shapes.INVISIBLE_PALETTE, IRON_PALETTE, + GOLD_PALETTE, GOLD_PALETTE], + "noRotates": [True] * 4, + } + }, + { + "component": "Ore", + "kwargs": { + "waitState": "oreWait", + "rawState": "goldRaw", + "partialState": "goldPartial", + "minNumMiners": 2, + "miningWindow": 3, + } + }, + { + "component": "Ore", + "kwargs": { + "waitState": "oreWait", + "rawState": "ironRaw", + "partialState": "ironRaw", + "minNumMiners": 1, + "miningWindow": 2, + } + }, + { + "component": "FixedRateRegrow", + "kwargs": { + "liveStates": ["ironRaw", "goldRaw"], + "liveRates": [0.0002, 0.00008], + "waitState": "oreWait", + } + }, + ] +} + + +PLAYER_COLOR_PALETTES = [] +for human_readable_color in colors.human_readable: + PLAYER_COLOR_PALETTES.append(shapes.get_palette(human_readable_color)) + + +def get_avatar_object(num_players: int, player_index: int): + lua_index = player_index + 1 + color_palette = PLAYER_COLOR_PALETTES[player_index] + avatar_sprite_name = "avatarSprite{}".format(lua_index) + return { + "name": "avatar", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "player", + "stateConfigs": [ + {"state": "player", + "layer": "upperPhysical", + "sprite": avatar_sprite_name, + "contact": "avatar", + "groups": ["players"]}, + + {"state": "playerWait", + "groups": ["playerWaits"]}, + ] + } + }, + { + "component": "Transform", + }, + { + "component": "ReadyToShootObservation", + "kwargs": { + "zapperComponent": "MineBeam", + }, + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": [avatar_sprite_name], + "spriteShapes": [shapes.CUTE_AVATAR], + "palettes": [color_palette], + "noRotates": [True] + } + }, + { + "component": "Avatar", + "kwargs": { + "index": lua_index, + "aliveState": "player", + "waitState": "playerWait", + "speed": 1.0, + "spawnGroup": "spawnPoints", + "actionOrder": ["move", "turn", "mine"], + "actionSpec": { + "move": {"default": 0, "min": 0, "max": len(_COMPASS)}, + "turn": {"default": 0, "min": -1, "max": 1}, + "mine": {"default": 0, "min": 0, "max": 1}, + }, + "view": { + "left": 5, + "right": 5, + "forward": 9, + "backward": 1, + "centered": False + } + } + }, + { + "component": "MineBeam", + "kwargs": { + "cooldownTime": 3, + "beamLength": 3, + "beamRadius": 0, + "agentRole": "none", + "roleRewardForMining": { + "none": [0, 0], + "golddigger": [0, 0.2], "irondigger": [0, 0]}, + "roleRewardForExtracting": { + "none": [1, 8], + "golddigger": [-1, 8], "irondigger": [8, -1]}, + } + }, + { + "component": "LocationObserver", + "kwargs": { + "objectIsAvatar": True, + "alsoReportOrientation": True, + }, + }, + { + "component": "MiningTracker", + "kwargs": { + "numPlayers": num_players, + "numOreTypes": NUM_ORE_TYPES, + } + }, + ] + } + + +def get_avatar_objects(num_players: int): + return [get_avatar_object(num_players, i) for i in range(num_players)] + + +# PREFABS is a dictionary mapping names to template game objects that can +# be cloned and placed in multiple locations accoring to an ascii map. +PREFABS = { + "wall": WALL, + "spawn_point": SPAWN_POINT, + "ore": ORE, +} + +# Primitive action components. +# pylint: disable=bad-whitespace +# pyformat: disable +NOOP = {"move": 0, "turn": 0, "mine": 0} +FORWARD = {"move": 1, "turn": 0, "mine": 0} +STEP_RIGHT = {"move": 2, "turn": 0, "mine": 0} +BACKWARD = {"move": 3, "turn": 0, "mine": 0} +STEP_LEFT = {"move": 4, "turn": 0, "mine": 0} +TURN_LEFT = {"move": 0, "turn": -1, "mine": 0} +TURN_RIGHT = {"move": 0, "turn": 1, "mine": 0} +MINE = {"move": 0, "turn": 0, "mine": 1} +# pyformat: enable +# pylint: enable=bad-whitespace + +ACTION_SET = ( + NOOP, + FORWARD, + BACKWARD, + STEP_LEFT, + STEP_RIGHT, + TURN_LEFT, + TURN_RIGHT, + MINE, +) + + +def get_config(): + """Default configuration for the coop_mining level.""" + config = config_dict.ConfigDict() + + # Action set configuration. + config.action_set = ACTION_SET + # Observation format configuration. + config.individual_observation_names = [ + "RGB", + "READY_TO_SHOOT", + # Debug only (do not use the following observations in policies). + "POSITION", + "ORIENTATION", + ] + config.global_observation_names = [ + "WORLD.RGB", + ] + + # The specs of the environment (from a single-agent perspective). + config.action_spec = specs.action(len(ACTION_SET)) + config.timestep_spec = specs.timestep({ + "RGB": specs.OBSERVATION["RGB"], + "READY_TO_SHOOT": specs.OBSERVATION["READY_TO_SHOOT"], + # Debug only (do not use the following observations in policies). + "ORIENTATION": specs.OBSERVATION["ORIENTATION"], + "POSITION": specs.OBSERVATION["POSITION"], + "WORLD.RGB": specs.rgb(216, 216), + }) + + # The roles assigned to each player. + config.valid_roles = frozenset({"default", "target"}) + config.default_player_roles = ("default",) * 6 + + return config + + +def build( + roles: Sequence[str], + config: config_dict.ConfigDict, +) -> Mapping[str, Any]: + """Build substrate given player roles.""" + del config + num_players = len(roles) + + return dict( + levelName="coop_mining", + levelDirectory="meltingpot/lua/levels", + numPlayers=num_players, + # Define upper bound of episode length since episodes end stochastically. + maxEpisodeLengthFrames=5000, + spriteSize=8, + topology="BOUNDED", # Choose from ["BOUNDED", "TORUS"], + simulation={ + "map": ASCII_MAP, + "gameObjects": get_avatar_objects(num_players), + "scene": SCENE, + "prefabs": PREFABS, + "charPrefabMap": CHAR_PREFAB_MAP, + }, + ) diff --git a/meltingpot/python/configs/substrates/daycare.py b/meltingpot/python/configs/substrates/daycare.py new file mode 100644 index 00000000..7c84d4ad --- /dev/null +++ b/meltingpot/python/configs/substrates/daycare.py @@ -0,0 +1,937 @@ +# Copyright 2022 DeepMind Technologies Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Configuration for daycare.""" + +import copy +from typing import Any, Dict, Mapping, Sequence + +from ml_collections import config_dict as configdict + +from meltingpot.python.utils.substrates import colors +from meltingpot.python.utils.substrates import shapes +from meltingpot.python.utils.substrates import specs + +_COMPASS = ["N", "E", "S", "W"] + +ASCII_MAP = """ +/__________________+ +!~~~~~~~~~~~~~~~~~~| +!~~~~~~~~~~~~~~~~~~| +!~~~~~~~~~~~~~~~~~~| +!~~~~~~~~~~~~~~~~~~| +!~~~~~~~PPP~~~~~~~~| +!~~~~~~~PPP~~~~~~~~| +!~~~~~~~PPP~~~~~~~~| +!~~~~~~~~~~~~~~~~~~| +!~~~~~~~~~~~~~~~~~~| +!~~~~~~~~~~~~~~~~~~| +!~~~~~~~~~~~~~~~~~~| +(------------------) +""" + +# Map a character to the prefab it represents in the ASCII map. +CHAR_PREFAB_MAP = { + # wall prefabs + "/": "nw_wall_corner", + "+": "ne_wall_corner", + ")": "se_wall_corner", + "(": "sw_wall_corner", + "_": "wall_north", + "|": "wall_east", + "-": "wall_south", + "!": "wall_west", + + # non-wall prefabs + "P": {"type": "all", "list": ["ground", "spawn_point"]}, + "~": {"type": "all", "list": ["ground", "tree", "fruit"]}, +} +INVISIBLE = (0, 0, 0, 0) + +NW_WALL_CORNER = { + "name": "nw_wall_corner", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "nw_wall_corner", + "stateConfigs": [{ + "state": "nw_wall_corner", + "layer": "superOverlay", + "sprite": "NwWallCorner", + }], + } + }, + {"component": "Transform"}, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["NwWallCorner"], + "spriteShapes": [shapes.BRICK_WALL_NW_CORNER], + "palettes": [shapes.BRICK_WALL_PALETTE], + "noRotates": [False] + } + }, + ] +} + +NE_WALL_CORNER = { + "name": "ne_wall_corner", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "ne_wall_corner", + "stateConfigs": [{ + "state": "ne_wall_corner", + "layer": "superOverlay", + "sprite": "NeWallCorner", + }], + } + }, + {"component": "Transform"}, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["NeWallCorner"], + "spriteShapes": [shapes.BRICK_WALL_NE_CORNER], + "palettes": [shapes.BRICK_WALL_PALETTE], + "noRotates": [False] + } + }, + ] +} + +SE_WALL_CORNER = { + "name": "se_wall_corner", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "se_wall_corner", + "stateConfigs": [{ + "state": "se_wall_corner", + "layer": "superOverlay", + "sprite": "SeWallCorner", + }], + } + }, + {"component": "Transform"}, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["SeWallCorner"], + "spriteShapes": [shapes.BRICK_WALL_SE_CORNER], + "palettes": [shapes.BRICK_WALL_PALETTE], + "noRotates": [False] + } + }, + ] +} + +SW_WALL_CORNER = { + "name": "sw_wall_corner", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "sw_wall_corner", + "stateConfigs": [{ + "state": "sw_wall_corner", + "layer": "superOverlay", + "sprite": "SwWallCorner", + }], + } + }, + {"component": "Transform"}, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["SwWallCorner"], + "spriteShapes": [shapes.BRICK_WALL_SW_CORNER], + "palettes": [shapes.BRICK_WALL_PALETTE], + "noRotates": [False] + } + }, + ] +} + +NW_INNER_WALL_CORNER = { + "name": "nw_inner_wall_corner", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "nw_inner_wall_corner", + "stateConfigs": [{ + "state": "nw_inner_wall_corner", + "layer": "superOverlay", + "sprite": "NwInnerWallCorner", + }], + } + }, + {"component": "Transform"}, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["NwInnerWallCorner"], + "spriteShapes": [shapes.BRICK_WALL_INNER_NW_CORNER], + "palettes": [shapes.BRICK_WALL_PALETTE], + "noRotates": [False] + } + }, + ] +} + +NE_INNER_WALL_CORNER = { + "name": "ne_inner_wall_corner", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "ne_inner_wall_corner", + "stateConfigs": [{ + "state": "ne_inner_wall_corner", + "layer": "superOverlay", + "sprite": "NeInnerWallCorner", + }], + } + }, + {"component": "Transform"}, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["NeInnerWallCorner"], + "spriteShapes": [shapes.BRICK_WALL_INNER_NE_CORNER], + "palettes": [{"b": (166, 162, 139, 255), + "c": (110, 108, 92, 255), + "o": (78, 78, 78, 255)}], + "noRotates": [False] + } + }, + ] +} + +SE_INNER_WALL_CORNER = { + "name": "se_inner_wall_corner", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "se_inner_wall_corner", + "stateConfigs": [{ + "state": "se_inner_wall_corner", + "layer": "superOverlay", + "sprite": "SeInnerWallCorner", + }], + } + }, + {"component": "Transform"}, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["SeInnerWallCorner"], + "spriteShapes": [shapes.BRICK_WALL_INNER_SE_CORNER], + "palettes": [{"b": (166, 162, 139, 255), + "c": (110, 108, 92, 255), + "o": (78, 78, 78, 255)}], + "noRotates": [False] + } + }, + ] +} + +SW_INNER_WALL_CORNER = { + "name": "sw_inner_wall_corner", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "sw_inner_wall_corner", + "stateConfigs": [{ + "state": "sw_inner_wall_corner", + "layer": "superOverlay", + "sprite": "SwInnerWallCorner", + }], + } + }, + {"component": "Transform"}, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["SwInnerWallCorner"], + "spriteShapes": [shapes.BRICK_WALL_INNER_SW_CORNER], + "palettes": [shapes.BRICK_WALL_PALETTE], + "noRotates": [False] + } + }, + ] +} + +WALL_NORTH = { + "name": "wall_north", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "wall_north", + "stateConfigs": [{ + "state": "wall_north", + "layer": "superOverlay", + "sprite": "WallNorth", + }], + } + }, + {"component": "Transform"}, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["WallNorth"], + "spriteShapes": [shapes.BRICK_WALL_NORTH], + "palettes": [shapes.BRICK_WALL_PALETTE], + "noRotates": [False] + } + }, + ] +} + +WALL_EAST = { + "name": "wall_east", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "wall_east", + "stateConfigs": [{ + "state": "wall_east", + "layer": "superOverlay", + "sprite": "WallEast", + }], + } + }, + {"component": "Transform"}, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["WallEast"], + "spriteShapes": [shapes.BRICK_WALL_EAST], + "palettes": [shapes.BRICK_WALL_PALETTE], + "noRotates": [False] + } + }, + ] +} + +WALL_SOUTH = { + "name": "wall_south", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "wall_south", + "stateConfigs": [{ + "state": "wall_south", + "layer": "superOverlay", + "sprite": "WallSouth", + }], + } + }, + {"component": "Transform"}, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["WallSouth"], + "spriteShapes": [shapes.BRICK_WALL_SOUTH], + "palettes": [shapes.BRICK_WALL_PALETTE], + "noRotates": [False] + } + }, + ] +} + + +WALL_WEST = { + "name": "wall_west", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "wall_west", + "stateConfigs": [{ + "state": "wall_west", + "layer": "superOverlay", + "sprite": "WallWest", + }], + } + }, + {"component": "Transform"}, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["WallWest"], + "spriteShapes": [shapes.BRICK_WALL_WEST], + "palettes": [shapes.BRICK_WALL_PALETTE], + "noRotates": [False] + } + }, + ] +} + +GROUND = { + "name": "ground", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "ground", + "stateConfigs": [{ + "state": "ground", + "layer": "background", + "sprite": "groundSprite", + }], + } + }, + {"component": "Transform"}, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["groundSprite"], + "spriteShapes": [shapes.DIRT_PATTERN], + "palettes": [{"X": (155, 118, 83, 255), + "x": (149, 114, 80, 255),}], + "noRotates": [True] + } + }, + ] +} + + +def get_fruit_tree_palette(fruit_type): + """Return a palette with the correct colored fruit.""" + palette = copy.deepcopy(shapes.TREE_PALETTE) + if fruit_type == "ripe_apple": + palette["Z"] = (255, 0, 0, 255) + elif fruit_type == "ripe_banana": + palette["Z"] = (255, 255, 53, 255) + elif fruit_type == "unripe_apple": + palette["Z"] = (128, 0, 0, 255) + elif fruit_type == "unripe_banana": + palette["Z"] = (153, 153, 0, 255) + return palette + +TREE = { + "name": "tree", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "treeWait", + "stateConfigs": [ + {"state": "treeWait"}, + { + "state": "appleTree", + "layer": "lowerPhysical", + "sprite": "appleTreeSprite", + }, + { + "state": "appleShrub", + "layer": "lowerPhysical", + "sprite": "appleShrubSprite", + }, + { + "state": "bananaTree", + "layer": "lowerPhysical", + "sprite": "bananaTreeSprite", + }, + { + "state": "bananaShrub", + "layer": "lowerPhysical", + "sprite": "bananaShrubSprite", + }, + ], + } + }, + {"component": "Transform"}, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["appleTreeSprite", + "bananaTreeSprite", + "appleShrubSprite", + "bananaShrubSprite",], + "spriteShapes": [shapes.EMPTY_TREE, + shapes.EMPTY_TREE, + shapes.EMPTY_SHRUB, + shapes.EMPTY_SHRUB], + "palettes": [get_fruit_tree_palette("ripe_apple"), + get_fruit_tree_palette("ripe_banana"), + get_fruit_tree_palette("ripe_apple"), + get_fruit_tree_palette("ripe_banana")], + "noRotates": [True] * 4, + } + }, + { + "component": "TreeType", + "kwargs": { + "probabilities": { + "empty": 0.8, + "appleTree": 0.15, + "appleShrub": 0.01, + "bananaTree": 0.03, + # lower probability that child can pick up what they like + "bananaShrub": 0.01, + } + } + }, + ] +} + +FRUIT = { + "name": "fruit", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "fruitWait", + "stateConfigs": [ + {"state": "fruitWait"}, + {"state": "fruitEaten"}, + { + "state": "applePicked", + "layer": "overlay", + "sprite": "appleSprite", + }, + { + "state": "appleInTree", + "layer": "upperPhysical", + "sprite": "appleInTreeSprite", + }, + { + "state": "appleInShrub", + "layer": "upperPhysical", + "sprite": "appleInShrubSprite", + }, + { + "state": "bananaPicked", + "layer": "overlay", + "sprite": "bananaSprite", + }, + { + "state": "bananaInTree", + "layer": "upperPhysical", + "sprite": "bananaInTreeSprite", + }, + { + "state": "bananaInShrub", + "layer": "upperPhysical", + "sprite": "bananaInShrubSprite", + }, + ], + } + }, + {"component": "Transform"}, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["appleSprite", + "appleInTreeSprite", + "appleInShrubSprite", + "bananaSprite", + "bananaInTreeSprite", + "bananaInShrubSprite", + ], + "spriteShapes": [shapes.HD_APPLE, + shapes.FRUIT_IN_TREE, + shapes.FRUIT_IN_SHRUB, + shapes.HD_APPLE, + shapes.FRUIT_IN_TREE, + shapes.FRUIT_IN_SHRUB, + ], + "palettes": [shapes.get_palette((255, 0, 0, 255)), + get_fruit_tree_palette("ripe_apple"), + get_fruit_tree_palette("ripe_apple"), + shapes.get_palette((255, 255, 53, 255)), + get_fruit_tree_palette("ripe_banana"), + get_fruit_tree_palette("ripe_banana"), + ], + "noRotates": [True] * 6, + } + }, + { + "component": "Graspable", + }, + { + "component": "FruitType", + "kwargs": {"framesTillAppleRespawn": 50} + }, + ] +} + +SPAWN_POINT = { + "name": "spawnPoint", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "spawnPoint", + "stateConfigs": [{ + "state": "spawnPoint", + "layer": "alternateLogic", + "groups": ["spawnPoints"] + }], + } + }, + {"component": "Transform"}, + ] +} + + +# PREFABS is a dictionary mapping names to template game objects that can +# be cloned and placed in multiple locations accoring to an ascii map. +PREFABS = { + "nw_wall_corner": NW_WALL_CORNER, + "ne_wall_corner": NE_WALL_CORNER, + "se_wall_corner": SE_WALL_CORNER, + "sw_wall_corner": SW_WALL_CORNER, + "nw_inner_wall_corner": NW_INNER_WALL_CORNER, + "ne_inner_wall_corner": NE_INNER_WALL_CORNER, + "se_inner_wall_corner": SE_INNER_WALL_CORNER, + "sw_inner_wall_corner": SW_INNER_WALL_CORNER, + "wall_north": WALL_NORTH, + "wall_east": WALL_EAST, + "wall_south": WALL_SOUTH, + "wall_west": WALL_WEST, + # non-wall prefabs + "spawn_point": SPAWN_POINT, + "ground": GROUND, + "tree": TREE, + "fruit": FRUIT, +} + +# Primitive action components. +# pylint: disable=bad-whitespace +# pyformat: disable +NOOP = {"move": 0, "turn": 0, "eat": 0, "grasp": 0} +FORWARD = {"move": 1, "turn": 0, "eat": 0, "grasp": 0} +STEP_RIGHT = {"move": 2, "turn": 0, "eat": 0, "grasp": 0} +BACKWARD = {"move": 3, "turn": 0, "eat": 0, "grasp": 0} +STEP_LEFT = {"move": 4, "turn": 0, "eat": 0, "grasp": 0} +TURN_LEFT = {"move": 0, "turn": -1, "eat": 0, "grasp": 0} +TURN_RIGHT = {"move": 0, "turn": 1, "eat": 0, "grasp": 0} +EAT = {"move": 0, "turn": 0, "eat": 1, "grasp": 0} +GRASP = {"move": 0, "turn": 0, "eat": 0, "grasp": 1} + +# pyformat: enable +# pylint: enable=bad-whitespace + +ACTION_SET = ( + NOOP, + FORWARD, + BACKWARD, + STEP_LEFT, + STEP_RIGHT, + TURN_LEFT, + TURN_RIGHT, + EAT, + GRASP, +) + + +def create_scene(): + """Creates the global scene.""" + scene = { + "name": "scene", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "scene", + "stateConfigs": [{ + "state": "scene", + }], + } + }, + {"component": "Transform"}, + ] + } + + return scene + + +def _create_avatar_object(player_idx: int, is_child: bool) -> Dict[str, Any]: + """Create an avatar object.""" + # Lua is 1-indexed. + lua_index = player_idx + 1 + live_state_name = "player{}".format(lua_index) + avatar_sprite_name = "avatarSprite{}".format(lua_index) + + if is_child: + color_palette = shapes.get_palette(colors.palette[3]) + sprite = shapes.CUTE_AVATAR_CHILD + can_grasp_tree = False + # child gets reward for eating bananas + apple_reward = 0 + banana_reward = 1 + grasp_success_probability = 0.3 + # child sees trees as shrubs + custom_sprite_map = {"appleTreeSprite": "appleShrubSprite", + "appleInTreeSprite": "appleInShrubSprite", + "bananaTreeSprite": "bananaShrubSprite", + "bananaInTreeSprite": "bananaInShrubSprite"} + else: + color_palette = shapes.get_palette(colors.palette[0]) + sprite = shapes.CUTE_AVATAR + can_grasp_tree = True + apple_reward = 1 + banana_reward = 1 + grasp_success_probability = 1 + # parent sees bananas as apples + custom_sprite_map = {"bananaTreeSprite": "appleTreeSprite", + "bananaShrubSprite": "appleShrubSprite", + "bananaInTreeSprite": "appleInTreeSprite", + "bananaInShrubSprite": "appleInShrubSprite", + "bananaSprite": "appleSprite",} + + avatar_object = { + "name": "avatar", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": + live_state_name, + "stateConfigs": [ + # Initial player state. + { + "state": live_state_name, + "layer": "superOverlay", + "sprite": avatar_sprite_name, + "contact": "avatar", + "groups": ["players"] + }, + # Player wait type for times when they are zapped out. + { + "state": "playerWait", + "groups": ["playerWaits"] + }, + ] + } + }, + { + "component": "Transform" + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": [avatar_sprite_name], + "spriteShapes": [sprite], + "palettes": [color_palette], + "noRotates": [True] + } + }, + { + "component": "Avatar", + "kwargs": { + "index": lua_index, + "aliveState": live_state_name, + "waitState": "playerWait", + "spawnGroup": "spawnPoints", + "actionOrder": ["move", + "turn", + "eat", + "grasp"], + "actionSpec": { + "move": {"default": 0, "min": 0, "max": len(_COMPASS)}, + "turn": {"default": 0, "min": -1, "max": 1}, + "eat": {"default": 0, "min": 0, "max": 1}, + "grasp": {"default": 0, "min": 0, "max": 1}, + }, + "view": { + "left": 5, + "right": 5, + "forward": 9, + "backward": 1, + "centered": False + }, + "spriteMap": custom_sprite_map, + } + }, + { + "component": "Role", + "kwargs": { + "isChild": is_child, + } + }, + { + "component": "Eating", + "kwargs": { + "bananaReward": banana_reward, + "appleReward": apple_reward, + } + }, + { + "component": "PlayerGrasp", + "kwargs": { + "shape": shapes.GRASP_SHAPE, + "palette": color_palette, + "canGraspTree": can_grasp_tree, + "graspSuccessProbability": grasp_success_probability, + "attentiveParentPseudoreward": 0.0, + "droppingParentPseudoreward": 0.0, + "tryingChildPseudoreward": 0.0, + "tryingChildBananaPseudoreward": 0.0, + } + }, + { + "component": "AvatarRespawn", + "kwargs": { + "framesTillRespawn": 100, + } + }, + { + "component": "Hunger", + "kwargs": { + "framesTillHungry": 200, + } + }, + { + "component": "HungerObserver", + "kwargs": { + "needComponent": "Hunger", + }, + }, + { + "component": "LocationObserver", + "kwargs": { + "objectIsAvatar": True, + "alsoReportOrientation": True + } + }, + ] + } + return avatar_object + + +def _build_child_objects(player_idx: int): + """Build child avatar objects.""" + avatar_object = _create_avatar_object( + player_idx, is_child=True) + game_objects = [] + game_objects.append(avatar_object) + return game_objects + + +def _build_parent_objects(player_idx: int): + """Build parent avatar objects.""" + avatar_object = _create_avatar_object( + player_idx, is_child=False) + game_objects = [] + game_objects.append(avatar_object) + return game_objects + + +def create_avatar_objects(roles: Sequence[str]): + """Returns list of avatar objects of length 'num_players'.""" + avatar_objects_and_helpers = [] + for player_idx, role in enumerate(roles): + if role == "child": + avatar_objects_and_helpers.extend(_build_child_objects(player_idx)) + elif role == "parent": + avatar_objects_and_helpers.extend(_build_parent_objects(player_idx)) + elif role == "default": + # Parents and children are alternating, parents in even positions. + if player_idx % 2 == 0: + avatar_objects_and_helpers.extend(_build_parent_objects(player_idx)) + else: + avatar_objects_and_helpers.extend(_build_child_objects(player_idx)) + else: + raise ValueError(f"Unrecognized role: {role}") + + return avatar_objects_and_helpers + + +def get_config(): + """Default configuration for the daycare substrate.""" + config = configdict.ConfigDict() + + # Specify the number of players to particate in each episode (optional). + config.recommended_num_players = 2 + + # Action set configuration. + config.action_set = ACTION_SET + # Observation format configuration. + config.individual_observation_names = [ + "RGB", + "HUNGER", + # Debug only (do not use the following observations in policies). + "POSITION", + "ORIENTATION", + ] + config.global_observation_names = [ + "WORLD.RGB", + ] + + # The specs of the environment (from a single-agent perspective). + config.action_spec = specs.action(len(ACTION_SET)) + config.timestep_spec = specs.timestep({ + "RGB": specs.OBSERVATION["RGB"], + "HUNGER": specs.float64(), + "POSITION": specs.OBSERVATION["POSITION"], + "ORIENTATION": specs.OBSERVATION["ORIENTATION"], + "WORLD.RGB": specs.rgb(104, 160,), + }) + + # The roles assigned to each player. + config.valid_roles = frozenset({"child", "parent"}) + config.default_player_roles = ("child", "parent") + return config + + +def build( + roles: Sequence[str], + config: configdict.ConfigDict, +) -> Mapping[str, Any]: + """Build this substrate given player roles.""" + del config + substrate_definition = dict( + levelName="daycare", + levelDirectory="meltingpot/lua/levels", + numPlayers=len(roles), + maxEpisodeLengthFrames=1000, + spriteSize=8, + topology="BOUNDED", # Choose from ["BOUNDED", "TORUS"], + simulation=dict( + map=ASCII_MAP, + gameObjects=create_avatar_objects(roles), + scene=create_scene(), + prefabs=PREFABS, + charPrefabMap=CHAR_PREFAB_MAP, + ), + ) + return substrate_definition diff --git a/meltingpot/python/configs/substrates/externality_mushrooms.py b/meltingpot/python/configs/substrates/externality_mushrooms.py new file mode 100644 index 00000000..20ec5465 --- /dev/null +++ b/meltingpot/python/configs/substrates/externality_mushrooms.py @@ -0,0 +1,1077 @@ +# Copyright 2022 DeepMind Technologies Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Configuration for Externality Mushrooms. + +Externality mushrooms is an immediate feedback collective action problem and +social dilemma. Unlike the other sequential social dilemmas in this suite, there +is no delay between the time when an agent takes an antisocial (or prosocial) +action and when its effect is felt by all other players. Thus it is a +sequential social dilemma in the sense of Leibo et al. 2017, but not an +intertemporal social dilemma in the sense of Hughes et al. 2018. + +Three types of mushrooms are spread around the map and can be consumed for a +reward. Eating a red mushroom gives a reward of 1 to the individual who +ate the mushroom. Eating a green mushroom gives a reward of 2 and it gets +divided equally among all individuals. Eating a blue mushroom gives a reward of +3 and it gets divided among the individuals except the individual who ate the +mushroom. Mushrooms regrowth depends on the type of the mushrooms eaten by +individuals. Red mushrooms regrow with a probability of 0.25 when a mushroom of +any color is eaten. Green mushrooms regrow with a probability of 0.4 when a +green or blue mushroom is eaten. Blue mushrooms regrow with a probability of 0.6 +when a blue mushroom is eaten. Each mushroom has a time period that it takes to +digest it. An individual who ate a mushroom gets frozen during the time they are +digesting it. Red mushrooms get digested instantly, green and blue mushrooms +take 5 and 10 steps to digest respectively. In addition, unharvested mushrooms +spoil (and get removed from the game) after a period of time. Red, green and +blue mushrooms spoil after 75, 100 and 200 time steps respectively. + +References: + +Leibo JZ, Zambaldi V, Lanctot M, Marecki J, Graepel T. Multi-agent Reinforcement +Learning in Sequential Social Dilemmas (2017). AAMAS. + +Hughes E, Leibo JZ, Phillips MG, Tuyls K, Duenez-Guzman EA, Garcia Castaneda A, +Dunning I, Zhu T, McKee KR, Koster R, Roff H, Graepel T. Inequity aversion +improves cooperation in intertemporal social dilemmas (2018). NeurIPS. +""" + +from typing import Any, Dict, Mapping, Sequence, Text + +from ml_collections import config_dict + +from meltingpot.python.utils.substrates import colors +from meltingpot.python.utils.substrates import game_object_utils +from meltingpot.python.utils.substrates import shapes +from meltingpot.python.utils.substrates import specs + + +PrefabConfig = game_object_utils.PrefabConfig + +_COMPASS = ["N", "E", "S", "W"] + +MARKING_SPRITE = """ +oxxxxxxo +xoxxxxox +xxoxxoxx +xxxooxxx +xxxooxxx +xxoxxoxx +xoxxxxox +oxxxxxxo +""" + +NW_WALL_CORNER = { + "name": "nw_wall_corner", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "nw_wall_corner", + "stateConfigs": [{ + "state": "nw_wall_corner", + "layer": "upperPhysical", + "sprite": "NwWallCorner", + }], + } + }, + {"component": "Transform"}, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["NwWallCorner"], + "spriteShapes": [shapes.FENCE_NW_CORNER], + "palettes": [shapes.FENCE_PALETTE_BROWN], + "noRotates": [False] + } + }, + {"component": "BeamBlocker", "kwargs": {"beamType": "zapHit"}}, + ] +} + +NE_WALL_CORNER = { + "name": "ne_wall_corner", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "ne_wall_corner", + "stateConfigs": [{ + "state": "ne_wall_corner", + "layer": "upperPhysical", + "sprite": "NeWallCorner", + }], + } + }, + {"component": "Transform"}, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["NeWallCorner"], + "spriteShapes": [shapes.FENCE_NE_CORNER], + "palettes": [shapes.FENCE_PALETTE_BROWN], + "noRotates": [False] + } + }, + {"component": "BeamBlocker", "kwargs": {"beamType": "zapHit"}}, + ] +} + +NE_INNER_WALL_CORNER = { + "name": "ne_inner_wall_corner", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "ne_inner_wall_corner", + "stateConfigs": [{ + "state": "ne_inner_wall_corner", + "layer": "upperPhysical", + "sprite": "ne_inner_wall_corner", + }], + } + }, + {"component": "Transform"}, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["ne_inner_wall_corner"], + "spriteShapes": [shapes.FENCE_INNER_NE_CORNER], + "palettes": [shapes.FENCE_PALETTE_BROWN], + "noRotates": [False] + } + }, + {"component": "BeamBlocker", "kwargs": {"beamType": "zapHit"}}, + ] +} + +NW_INNER_WALL_CORNER = { + "name": "nw_inner_wall_corner", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "nw_inner_wall_corner", + "stateConfigs": [{ + "state": "nw_inner_wall_corner", + "layer": "upperPhysical", + "sprite": "nw_inner_wall_corner", + }], + } + }, + {"component": "Transform"}, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["nw_inner_wall_corner"], + "spriteShapes": [shapes.FENCE_INNER_NW_CORNER], + "palettes": [shapes.FENCE_PALETTE_BROWN], + "noRotates": [False] + } + }, + {"component": "BeamBlocker", "kwargs": {"beamType": "zapHit"}}, + ] +} + +SE_WALL_CORNER = { + "name": "se_wall_corner", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "se_wall_corner", + "stateConfigs": [{ + "state": "se_wall_corner", + "layer": "upperPhysical", + "sprite": "SeWallCorner", + }], + } + }, + {"component": "Transform"}, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["SeWallCorner"], + "spriteShapes": [shapes.FENCE_SE_CORNER], + "palettes": [shapes.FENCE_PALETTE_BROWN], + "noRotates": [False] + } + }, + {"component": "BeamBlocker", "kwargs": {"beamType": "zapHit"}}, + ] +} + +SW_WALL_CORNER = { + "name": "sw_wall_corner", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "sw_wall_corner", + "stateConfigs": [{ + "state": "sw_wall_corner", + "layer": "upperPhysical", + "sprite": "SwWallCorner", + }], + } + }, + {"component": "Transform"}, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["SwWallCorner"], + "spriteShapes": [shapes.FENCE_SW_CORNER], + "palettes": [shapes.FENCE_PALETTE_BROWN], + "noRotates": [False] + } + }, + {"component": "BeamBlocker", "kwargs": {"beamType": "zapHit"}}, + ] +} + +WALL_SHADOW_SW = { + "name": "wall_shadow_sw", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "wall_shadow_sw", + "stateConfigs": [{ + "state": "wall_shadow_sw", + "layer": "upperPhysical", + "sprite": "wall_shadow_sw", + }], + } + }, + {"component": "Transform"}, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["wall_shadow_sw"], + "spriteShapes": [shapes.FENCE_SHADOW_SW], + "palettes": [shapes.FENCE_PALETTE_BROWN], + "noRotates": [False] + } + }, + {"component": "BeamBlocker", "kwargs": {"beamType": "zapHit"}}, + ] +} + +WALL_SHADOW_S = { + "name": "wall_shadow_s", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "wall_shadow_s", + "stateConfigs": [{ + "state": "wall_shadow_s", + "layer": "upperPhysical", + "sprite": "wall_shadow_s", + }], + } + }, + {"component": "Transform"}, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["wall_shadow_s"], + "spriteShapes": [shapes.FENCE_SHADOW_S], + "palettes": [shapes.FENCE_PALETTE_BROWN], + "noRotates": [False] + } + }, + {"component": "BeamBlocker", "kwargs": {"beamType": "zapHit"}}, + ] +} + +WALL_SHADOW_SE = { + "name": "wall_shadow_se", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "wall_shadow_se", + "stateConfigs": [{ + "state": "wall_shadow_se", + "layer": "upperPhysical", + "sprite": "wall_shadow_se", + }], + } + }, + {"component": "Transform"}, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["wall_shadow_se"], + "spriteShapes": [shapes.FENCE_SHADOW_SE], + "palettes": [shapes.FENCE_PALETTE_BROWN], + "noRotates": [False] + } + }, + {"component": "BeamBlocker", "kwargs": {"beamType": "zapHit"}}, + ] +} + +WALL_NORTH = { + "name": "wall_north", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "wall_north", + "stateConfigs": [{ + "state": "wall_north", + "layer": "upperPhysical", + "sprite": "WallNorth", + }], + } + }, + {"component": "Transform"}, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["WallNorth"], + "spriteShapes": [shapes.FENCE_N], + "palettes": [shapes.FENCE_PALETTE_BROWN], + "noRotates": [False] + } + }, + {"component": "BeamBlocker", "kwargs": {"beamType": "zapHit"}}, + ] +} + +WALL_EAST = { + "name": "wall_east", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "wall_east", + "stateConfigs": [{ + "state": "wall_east", + "layer": "upperPhysical", + "sprite": "WallEast", + }], + } + }, + {"component": "Transform"}, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["WallEast"], + "spriteShapes": [shapes.FENCE_E], + "palettes": [shapes.FENCE_PALETTE_BROWN], + "noRotates": [False] + } + }, + {"component": "BeamBlocker", "kwargs": {"beamType": "zapHit"}}, + ] +} + +WALL_SOUTH = { + "name": "wall_south", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "wall_south", + "stateConfigs": [{ + "state": "wall_south", + "layer": "upperPhysical", + "sprite": "WallSouth", + }], + } + }, + {"component": "Transform"}, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["WallSouth"], + "spriteShapes": [shapes.FENCE_S], + "palettes": [shapes.FENCE_PALETTE_BROWN], + "noRotates": [False] + } + }, + {"component": "BeamBlocker", "kwargs": {"beamType": "zapHit"}}, + ] +} + + +WALL_WEST = { + "name": "wall_west", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "wall_west", + "stateConfigs": [{ + "state": "wall_west", + "layer": "upperPhysical", + "sprite": "WallWest", + }], + } + }, + {"component": "Transform"}, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["WallWest"], + "spriteShapes": [shapes.FENCE_W], + "palettes": [shapes.FENCE_PALETTE_BROWN], + "noRotates": [False] + } + }, + {"component": "BeamBlocker", "kwargs": {"beamType": "zapHit"}}, + ] +} + + +def get_marking_palette(alpha: float) -> Mapping[str, Sequence[int]]: + alpha_uint8 = int(alpha * 255) + assert alpha_uint8 >= 0.0 and alpha_uint8 <= 255, "Color value out of range." + return {"x": shapes.ALPHA, "o": (0, 0, 0, alpha_uint8)} + +DIRT = { + "name": "dirt", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "dirt", + "stateConfigs": [{ + "state": "dirt", + "layer": "background", + "sprite": "Dirt", + }], + } + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["Dirt"], + "spriteShapes": [shapes.DIRT_PATTERN], + "palettes": [{ + "x": (81, 70, 32, 255), + "X": (89, 77, 36, 255), + }], + "noRotates": [False] + } + }, + { + "component": "Transform", + }, + ] +} + +SPAWN_POINT = { + "name": "spawnPoint", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "spawnPoint", + "stateConfigs": [{ + "state": "spawnPoint", + "layer": "logic", + "groups": ["spawnPoints"] + }], + } + }, + { + "component": "Transform", + }, + ] +} + + +def create_mushroom(initial_state: Text = "wait"): + """Create a mushroom prefab object.""" + + mushroom_prefab = { + "name": "mushroom", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": initial_state, + "stateConfigs": [ + { + "state": "fullInternalityZeroExternality", + "layer": "lowerPhysical", + "sprite": "FullInternalityZeroExternality", + "groups": ["fullInternalityZeroExternality"], + }, + { + "state": "halfInternalityHalfExternality", + "layer": "lowerPhysical", + "sprite": "HalfInternalityHalfExternality", + "groups": ["halfInternalityHalfExternality"], + }, + { + "state": "zeroInternalityFullExternality", + "layer": "lowerPhysical", + "sprite": "ZeroInternalityFullExternality", + "groups": ["zeroInternalityFullExternality"], + }, + { + "state": "negativeInternalityNegativeExternality", + "layer": "lowerPhysical", + "sprite": "NegativeInternalityNegativeExternality", + "groups": ["negativeInternalityNegativeExternality"], + }, + { + "state": "wait", + }, + ], + } + }, + { + "component": "Transform", + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["FullInternalityZeroExternality", + "HalfInternalityHalfExternality", + "ZeroInternalityFullExternality", + "NegativeInternalityNegativeExternality"], + "spriteShapes": [shapes.MUSHROOM] * 4, + "palettes": [ + shapes.MUSHROOM_RED_PALETTE, + shapes.MUSHROOM_GREEN_PALETTE, + shapes.MUSHROOM_BLUE_PALETTE, + shapes.MUSHROOM_ORANGE_PALETTE, + ], + "noRotates": [True] * 4 + } + }, + { + "component": "MushroomEating", + "kwargs": { + "totalReward": { + "fullInternalityZeroExternality": 1, + "halfInternalityHalfExternality": 2, + "zeroInternalityFullExternality": 3, + "negativeInternalityNegativeExternality": -1.0, + }, + "liveStates": ("fullInternalityZeroExternality", + "halfInternalityHalfExternality", + "zeroInternalityFullExternality", + "negativeInternalityNegativeExternality"), + "numSporesReleasedWhenEaten": { + "fullInternalityZeroExternality": 3, + "halfInternalityHalfExternality": 3, + "zeroInternalityFullExternality": 3, + "negativeInternalityNegativeExternality": 1, + }, + "digestionTimes": { + "fullInternalityZeroExternality": 0, + "halfInternalityHalfExternality": 10, + "zeroInternalityFullExternality": 15, + "negativeInternalityNegativeExternality": 15, + }, + "destroyOnEating": { + "negativeInternalityNegativeExternality": { + "typeToDestroy": "fullInternalityZeroExternality", + "percentToDestroy": 0.25}, + }, + }, + }, + { + "component": "MushroomGrowable", + "kwargs": {} + }, + { + "component": "Destroyable", + "kwargs": { + "initialHealth": 1, + "waitState": "wait", + } + }, + { + "component": "Perishable", + "kwargs": { + "waitState": "wait", + "delayPerState": { + "fullInternalityZeroExternality": 200, + "halfInternalityHalfExternality": 100, + "zeroInternalityFullExternality": 75, + "negativeInternalityNegativeExternality": 1e7, + } + } + }, + ] + } + return mushroom_prefab + + +# Primitive action components. +# pylint: disable=bad-whitespace +# pyformat: disable +NOOP = {"move": 0, "turn": 0, "fireZap": 0} +FORWARD = {"move": 1, "turn": 0, "fireZap": 0} +STEP_RIGHT = {"move": 2, "turn": 0, "fireZap": 0} +BACKWARD = {"move": 3, "turn": 0, "fireZap": 0} +STEP_LEFT = {"move": 4, "turn": 0, "fireZap": 0} +TURN_LEFT = {"move": 0, "turn": -1, "fireZap": 0} +TURN_RIGHT = {"move": 0, "turn": 1, "fireZap": 0} +FIRE_ZAP = {"move": 0, "turn": 0, "fireZap": 1} +# pyformat: enable +# pylint: enable=bad-whitespace + +ACTION_SET = ( + NOOP, + FORWARD, + BACKWARD, + STEP_LEFT, + STEP_RIGHT, + TURN_LEFT, + TURN_RIGHT, + FIRE_ZAP, +) + +# Remove the first entry from human_readable_colors after using it for the self +# color to prevent it from being used again as another avatar color. +light_desaturated_avatar_palette = list( + colors.light_desaturated_avatar_palette) +TARGET_SPRITE_SELF = { + "name": "Self", + "shape": shapes.CUTE_AVATAR, + "palette": shapes.get_palette(light_desaturated_avatar_palette.pop(0)), + "noRotate": True, +} + + +def create_prefabs() -> PrefabConfig: + """Returns the prefabs. + + Prefabs are a dictionary mapping names to template game objects that can + be cloned and placed in multiple locations accoring to an ascii map. + """ + prefabs = { + "dirt": DIRT, + "spawn_point": SPAWN_POINT, + "red_mushroom": create_mushroom( + initial_state="fullInternalityZeroExternality"), + "green_mushroom": create_mushroom( + initial_state="halfInternalityHalfExternality"), + "blue_mushroom": create_mushroom( + initial_state="zeroInternalityFullExternality"), + "orange_mushroom": create_mushroom( + initial_state="negativeInternalityNegativeExternality"), + "potential_mushroom": create_mushroom(initial_state="wait"), + # fence prefabs + "nw_wall_corner": NW_WALL_CORNER, + "nw_inner_wall_corner": NW_INNER_WALL_CORNER, + "ne_wall_corner": NE_WALL_CORNER, + "ne_inner_wall_corner": NE_INNER_WALL_CORNER, + "se_wall_corner": SE_WALL_CORNER, + "sw_wall_corner": SW_WALL_CORNER, + "wall_north": WALL_NORTH, + "wall_east": WALL_EAST, + "wall_south": WALL_SOUTH, + "wall_west": WALL_WEST, + "wall_shadow_sw": WALL_SHADOW_SW, + "wall_shadow_s": WALL_SHADOW_S, + "wall_shadow_se": WALL_SHADOW_SE, + } + return prefabs + + +def create_scene(): + """Create the scene object, a non-physical object to hold global logic.""" + scene = { + "name": "scene", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "scene", + "stateConfigs": [{ + "state": "scene", + }], + } + }, + { + "component": "Transform", + }, + { + "component": "MushroomRegrowth", + "kwargs": { + "mushroomsToProbabilities": { + "fullInternalityZeroExternality": { + "fullInternalityZeroExternality": 0.25, + "halfInternalityHalfExternality": 0.0, + "zeroInternalityFullExternality": 0.0, + "negativeInternalityNegativeExternality": 0.0, + }, + "halfInternalityHalfExternality": { + "fullInternalityZeroExternality": 0.25, + "halfInternalityHalfExternality": 0.4, + "zeroInternalityFullExternality": 0.0, + "negativeInternalityNegativeExternality": 0.0, + }, + "zeroInternalityFullExternality": { + "fullInternalityZeroExternality": 0.25, + "halfInternalityHalfExternality": 0.4, + "zeroInternalityFullExternality": 0.6, + "negativeInternalityNegativeExternality": 0.0, + }, + "negativeInternalityNegativeExternality": { + "fullInternalityZeroExternality": 0.0, + "halfInternalityHalfExternality": 0.0, + "zeroInternalityFullExternality": 0.0, + "negativeInternalityNegativeExternality": 1.0, + }, + }, + "minPotentialMushrooms": 1, + } + }, + { + "component": "StochasticIntervalEpisodeEnding", + "kwargs": { + "minimumFramesPerEpisode": 1000, + "intervalLength": 100, # Set equal to unroll length. + "probabilityTerminationPerInterval": 0.2 + } + }, + ] + } + return scene + + +def create_avatar_object(player_idx: int, + target_sprite_self: Dict[str, Any]) -> Dict[str, Any]: + """Create an avatar object that always sees itself as blue.""" + # Lua is 1-indexed. + lua_index = player_idx + 1 + + # Setup the self vs other sprite mapping. + source_sprite_self = "Avatar" + str(lua_index) + custom_sprite_map = {source_sprite_self: target_sprite_self["name"]} + + live_state_name = "player{}".format(lua_index) + avatar_object = { + "name": f"avatar{lua_index}", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": live_state_name, + "stateConfigs": [ + # Initial player state. + {"state": live_state_name, + "layer": "upperPhysical", + "sprite": source_sprite_self, + "contact": "avatar", + "groups": ["players"]}, + + # Player wait type for times when they are zapped out. + {"state": "playerWait", + "groups": ["playerWaits"]}, + ] + } + }, + { + "component": "Transform", + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": [source_sprite_self], + "spriteShapes": [shapes.CUTE_AVATAR], + "palettes": [ + shapes.get_palette( + light_desaturated_avatar_palette[player_idx]) + ], + "noRotates": [True] + } + }, + { + "component": "AdditionalSprites", + "kwargs": { + "renderMode": "ascii_shape", + "customSpriteNames": [target_sprite_self["name"]], + "customSpriteShapes": [target_sprite_self["shape"]], + "customPalettes": [target_sprite_self["palette"]], + "customNoRotates": [target_sprite_self["noRotate"]], + } + }, + { + "component": "Avatar", + "kwargs": { + "index": lua_index, + "aliveState": live_state_name, + "waitState": "playerWait", + "spawnGroup": "spawnPoints", + "actionOrder": ["move", + "turn", + "fireZap"], + "actionSpec": { + "move": {"default": 0, "min": 0, "max": len(_COMPASS)}, + "turn": {"default": 0, "min": -1, "max": 1}, + "fireZap": {"default": 0, "min": 0, "max": 1}, + }, + "view": { + "left": 5, + "right": 5, + "forward": 9, + "backward": 1, + "centered": False + }, + "spriteMap": custom_sprite_map, + } + }, + { + "component": "Zapper", + "kwargs": { + "cooldownTime": 3, + "beamLength": 3, + "beamRadius": 1, + "framesTillRespawn": 50, + "penaltyForBeingZapped": 0, + "rewardForZapping": 0, + # GraduatedSanctionsMarking handles removal instead of Zapper. + "removeHitPlayer": False, + } + }, + { + "component": "ReadyToShootObservation", + }, + { + "component": "Cumulants", + }, + { + "component": "LocationObserver", + "kwargs": { + "objectIsAvatar": True, + "alsoReportOrientation": True + } + }, + { + "component": "AvatarMetricReporter", + "kwargs": { + "metrics": [ + { + "name": "ATE_MUSHROOM_FIZE", + "type": "Doubles", + "shape": [], + "component": "Cumulants", + "variable": "ate_mushroom_fize", + }, + { + "name": "ATE_MUSHROOM_HIHE", + "type": "Doubles", + "shape": [], + "component": "Cumulants", + "variable": "ate_mushroom_hihe", + }, + { + "name": "ATE_MUSHROOM_ZIFE", + "type": "Doubles", + "shape": [], + "component": "Cumulants", + "variable": "ate_mushroom_zife", + }, + { + "name": "ATE_MUSHROOM_NINE", + "type": "Doubles", + "shape": [], + "component": "Cumulants", + "variable": "ate_mushroom_nine", + }, + { + "name": "DESTROYED_MUSHROOM_FIZE", + "type": "Doubles", + "shape": [], + "component": "Cumulants", + "variable": "destroyed_mushroom_fize", + }, + { + "name": "DESTROYED_MUSHROOM_HIHE", + "type": "Doubles", + "shape": [], + "component": "Cumulants", + "variable": "destroyed_mushroom_hihe", + }, + { + "name": "DESTROYED_MUSHROOM_ZIFE", + "type": "Doubles", + "shape": [], + "component": "Cumulants", + "variable": "destroyed_mushroom_zife", + }, + { + "name": "DESTROYED_MUSHROOM_NINE", + "type": "Doubles", + "shape": [], + "component": "Cumulants", + "variable": "destroyed_mushroom_nine", + }, + ] + } + }, + ] + } + return avatar_object + + +def create_marking_overlay(player_idx: int) -> Mapping[str, Any]: + """Create a graduated sanctions marking overlay object.""" + # Lua is 1-indexed. + lua_idx = player_idx + 1 + + marking_object = { + "name": "avatar_marking", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "avatarMarkingWait", + "stateConfigs": [ + # Declare one state per level of the hit logic. + {"state": "level_1", + "layer": "superOverlay", + "sprite": "sprite_for_level_1"}, + {"state": "level_2", + "layer": "superOverlay", + "sprite": "sprite_for_level_2"}, + + # Invisible inactive (zapped out) overlay type. + {"state": "avatarMarkingWait", + "groups": ["avatarMarkingWaits"]}, + ] + } + }, + { + "component": "Transform", + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["sprite_for_level_1", + "sprite_for_level_2"], + "spriteShapes": [MARKING_SPRITE, + MARKING_SPRITE], + "palettes": [get_marking_palette(0.0), + get_marking_palette(1.0)], + "noRotates": [True] * 3 + } + }, + { + "component": "GraduatedSanctionsMarking", + "kwargs": { + "playerIndex": lua_idx, + "waitState": "avatarMarkingWait", + "hitName": "zapHit", + "recoveryTime": 50, + "hitLogic": [ + {"levelIncrement": 1, + "sourceReward": 0, + "targetReward": 0, + "freeze": 25}, + {"levelIncrement": -1, + "sourceReward": 0, + "targetReward": 0, + "remove": True} + ], + } + }, + ] + } + return marking_object + + +def create_avatar_objects(num_players): + """Returns list of avatar objects of length 'num_players'.""" + avatar_objects = [] + for player_idx in range(0, num_players): + game_object = create_avatar_object(player_idx, + TARGET_SPRITE_SELF) + avatar_objects.append(game_object) + + marking_object = create_marking_overlay(player_idx) + avatar_objects.append(marking_object) + + return avatar_objects + + +def get_config(): + """Default configuration for this substrate.""" + config = config_dict.ConfigDict() + + # Action set configuration. + config.action_set = ACTION_SET + # Observation format configuration. + config.individual_observation_names = [ + "RGB", + "READY_TO_SHOOT", + # Debug only (do not use the following observations in policies). + "POSITION", + "ORIENTATION", + ] + config.global_observation_names = [ + "WORLD.RGB", + ] + + # The specs of the environment (from a single-agent perspective). + config.action_spec = specs.action(len(ACTION_SET)) + + # The roles assigned to each player. + config.valid_roles = frozenset({"default"}) + + return config + + +def build( + roles: Sequence[str], + config: config_dict.ConfigDict, +) -> Mapping[str, Any]: + """Build substrate definition given player roles.""" + num_players = len(roles) + # Build the rest of the substrate definition. + substrate_definition = dict( + levelName="externality_mushrooms", + levelDirectory="meltingpot/lua/levels", + numPlayers=num_players, + # Define upper bound of episode length since episodes end stochastically. + maxEpisodeLengthFrames=5000, + spriteSize=8, + topology="BOUNDED", # Choose from ["BOUNDED", "TORUS"], + simulation={ + "map": config.layout.ascii_map, + "gameObjects": create_avatar_objects(num_players), + "scene": create_scene(), + "prefabs": create_prefabs(), + "charPrefabMap": config.layout.char_prefab_map, + }, + ) + return substrate_definition diff --git a/meltingpot/python/configs/substrates/externality_mushrooms__dense.py b/meltingpot/python/configs/substrates/externality_mushrooms__dense.py new file mode 100644 index 00000000..c2152d5b --- /dev/null +++ b/meltingpot/python/configs/substrates/externality_mushrooms__dense.py @@ -0,0 +1,91 @@ +# Copyright 2022 DeepMind Technologies Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Configuration for Externality Mushrooms: Dense. + +Example video: https://youtu.be/MwHhg7sa0xs + +See base config: externality_mushrooms.py. Here the map is such that mushrooms +may grow anywhere on the map and most of the map can become full of mushrooms. +This may sometimes make it necessary to actively avoid or destroy undesirable +mushrooms. +""" + +from ml_collections import config_dict + +from meltingpot.python.configs.substrates import externality_mushrooms as base_config +from meltingpot.python.utils.substrates import specs + +build = base_config.build + +ASCII_MAP = """ +/_____________________+ +'#####################` +! | +! R G | +! R | +! | +! G | +! B O | +! B | +! R | +! | +! B G | +! | +(---------------------) +""" + +# Map a character to the prefab it represents in the ASCII map. +CHAR_PREFAB_MAP = { + " ": {"type": "all", "list": ["dirt", "spawn_point", "potential_mushroom"]}, + "R": {"type": "all", "list": ["dirt", "red_mushroom"]}, + "G": {"type": "all", "list": ["dirt", "green_mushroom"]}, + "B": {"type": "all", "list": ["dirt", "blue_mushroom"]}, + "O": {"type": "all", "list": ["dirt", "orange_mushroom"]}, + # fence prefabs + "/": {"type": "all", "list": ["dirt", "nw_wall_corner"]}, + "'": {"type": "all", "list": ["dirt", "nw_inner_wall_corner"]}, + "+": {"type": "all", "list": ["dirt", "ne_wall_corner"]}, + "`": {"type": "all", "list": ["dirt", "ne_inner_wall_corner"]}, + ")": {"type": "all", "list": ["dirt", "se_wall_corner"]}, + "(": {"type": "all", "list": ["dirt", "sw_wall_corner"]}, + "_": {"type": "all", "list": ["dirt", "wall_north"]}, + "|": {"type": "all", "list": ["dirt", "wall_east"]}, + "-": {"type": "all", "list": ["dirt", "wall_south"]}, + "!": {"type": "all", "list": ["dirt", "wall_west"]}, + "#": {"type": "all", "list": ["dirt", "wall_shadow_s"]}, + ">": {"type": "all", "list": ["dirt", "wall_shadow_se"]}, + "<": {"type": "all", "list": ["dirt", "wall_shadow_sw"]}, +} + + +def get_config(): + """Default configuration.""" + config = base_config.get_config() + # Override the map layout settings. + config.layout = config_dict.ConfigDict() + config.layout.ascii_map = ASCII_MAP + config.layout.char_prefab_map = CHAR_PREFAB_MAP + + # The specs of the environment (from a single-agent perspective). + config.timestep_spec = specs.timestep({ + "RGB": specs.OBSERVATION["RGB"], + "READY_TO_SHOOT": specs.OBSERVATION["READY_TO_SHOOT"], + "POSITION": specs.OBSERVATION["POSITION"], + "ORIENTATION": specs.OBSERVATION["ORIENTATION"], + "WORLD.RGB": specs.rgb(112, 184), + }) + + config.default_player_roles = ("default",) * 5 + + return config diff --git a/meltingpot/python/configs/substrates/factory_commons.py b/meltingpot/python/configs/substrates/factory_commons.py new file mode 100644 index 00000000..098c633b --- /dev/null +++ b/meltingpot/python/configs/substrates/factory_commons.py @@ -0,0 +1,2407 @@ +# Copyright 2022 DeepMind Technologies Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Configuration for Factory of the Commons.""" + +from typing import Any, Dict, Generator, Mapping, Sequence + +from ml_collections import config_dict + +from meltingpot.python.utils.substrates import colors +from meltingpot.python.utils.substrates import shapes +from meltingpot.python.utils.substrates import specs + +_COMPASS = ["N", "E", "S", "W"] +INVISIBLE = (0, 0, 0, 0) + +GRASP_SHAPE = """ +xxxxxxxx +xxxxxxxx +xxxxxxxx +xxxxxxxx +xxxxxxxx +xxxxxxxx +xoxxxxox +xxooooxx +""" + +FLOOR_MARKING = { + "name": + "floor_marking", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": + "floor_marking", + "stateConfigs": [{ + "state": "floor_marking", + "layer": "lowestPhysical", + "sprite": "floor_marking", + }], + } + }, + { + "component": "Transform" + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["floor_marking"], + "spriteShapes": [shapes.FLOOR_MARKING], + "palettes": [shapes.DISPENSER_BELT_PALETTE], + "noRotates": [False] + } + }, + ] +} + +PINK_CUBE_DISPENSING_ANIMATION = { + "name": + "pink_cube_dispensing_animation", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": + "waitState", + "stateConfigs": [ + { + "state": "waitState", + "layer": "overlay", + }, + { + "state": "pink_cube_dispensing_1", + "layer": "overlay", + "sprite": "pink_cube_dispensing_1", + }, + { + "state": "pink_cube_dispensing_2", + "layer": "overlay", + "sprite": "pink_cube_dispensing_2", + }, + + { + "state": "pink_cube_dispensing_3", + "layer": "overlay", + "sprite": "pink_cube_dispensing_3", + }, + ] + } + }, + { + "component": "Transform", + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["pink_cube_dispensing_1", + "pink_cube_dispensing_2", + "pink_cube_dispensing_3"], + "spriteShapes": [shapes.CUBE_DISPENSING_ANIMATION_1, + shapes.CUBE_DISPENSING_ANIMATION_2, + shapes.CUBE_DISPENSING_ANIMATION_3], + "palettes": [{ + "a": (255, 174, 182, 255), + "A": (240, 161, 169, 255), + "&": (237, 140, 151, 255), + "x": (0, 0, 0, 0), + }] * 3, + "noRotates": [True] * 3, + } + }, + { + "component": "ObjectDispensingAnimation", + "kwargs": { + "frameOne": "pink_cube_dispensing_1", + "frameTwo": "pink_cube_dispensing_2", + "frameThree": "pink_cube_dispensing_3", + "waitState": "waitState", + } + }, + ] +} + +DISPENSER_INDICATOR_PINK_CUBE = { + "name": + "dispenser_indicator_pink_cube", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": + "dispenser_pink_cube", + "stateConfigs": [ + { + "state": "dispenser_pink_cube", + "layer": "midPhysical", + "sprite": "dispenser_pink_cube", + }, + ] + } + }, + { + "component": "Transform", + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["dispenser_pink_cube"], + "spriteShapes": [shapes.HOPPER_INDICATOR_SINGLE_BLOCK], + "palettes": [{ + "x": (0, 0, 0, 0), + "a": (255, 174, 182, 255), + }], + "noRotates": [False] + } + }, + { + "component": "DispenserIndicator", + "kwargs": { + "objectOne": "PinkCube", + "objectTwo": "NoneNeeded", + } + } + ] +} + + +SPAWN_POINT = { + "name": + "spawnPoint", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": + "spawnPoint", + "stateConfigs": [{ + "state": "spawnPoint", + "layer": "alternateLogic", + "groups": ["spawnPoints"] + }], + } + }, + { + "component": "Transform" + }, + ] +} + + +def get_blue_cube(initial_state: str): + """Get a blue cube prefab.""" + prefab = { + "name": + "blue_cube_live", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": initial_state, + "stateConfigs": [ + { + "state": "blue_cube", + "layer": "lowerPhysical", + "sprite": "blue_cube", + }, + { + "state": "blue_jump", + "layer": "lowerPhysical", + "sprite": "blue_jump", + }, + { + "state": "blue_cube_drop_one", + "layer": "lowerPhysical", + "sprite": "blue_cube_drop_one", + }, + { + "state": "blue_cube_drop_two", + "layer": "lowerPhysical", + "sprite": "blue_cube_drop_two", + }, + { + "state": "waitState", + } + ], + } + }, + { + "component": "Transform" + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["blue_cube", "blue_cube_drop_one", + "blue_cube_drop_two", "blue_jump"], + "spriteShapes": [shapes.BLOCK, + shapes.BLOCK_DROP_1, + shapes.BLOCK_DROP_2, + shapes.CUBE_DISPENSING_ANIMATION_1], + "palettes": [shapes.FACTORY_OBJECTS_PALETTE,] * 4, + "noRotates": [True] * 4 + } + }, + { + "component": "Receivable", + "kwargs": { + "waitState": "waitState", + "liveState": "blue_cube", + } + }, + { + "component": "ReceiverDropAnimation", + "kwargs": { + "dropOne": "blue_cube_drop_one", + "dropTwo": "blue_cube_drop_two", + } + }, + { + "component": "Token", + "kwargs": { + "type": "BlueCube" + } + }, + { + "component": "ObjectJumpAnimation", + "kwargs": { + "jump": "blue_jump", + "drop": "blue_cube", + "waitState": "waitState", + } + }, + { + "component": "Graspable", + "kwargs": { + "graspableStates": ("blue_cube",), + "disconnectStates": ( + "blue_jump", "blue_cube_drop_one", "blue_cube_drop_two", + "waitState",), + } + } + ] + } + return prefab + + +BANANA = { + "name": + "banana", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "waitState", + "stateConfigs": [ + { + "state": "banana", + "layer": "lowerPhysical", + "sprite": "banana", + }, + { + "state": "banana_jump", + "layer": "lowerPhysical", + "sprite": "banana_jump", + }, + { + "state": "banana_drop_one", + "layer": "lowerPhysical", + "sprite": "banana_drop_one", + }, + { + "state": "banana_drop_two", + "layer": "lowerPhysical", + "sprite": "banana_drop_two", + }, + { + "state": "waitState" + } + ], + } + }, + { + "component": "Transform" + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["banana", "banana_drop_one", "banana_drop_two", + "banana_jump"], + "spriteShapes": [shapes.BANANA, + shapes.BANANA_DROP_1, + shapes.BANANA_DROP_2, + shapes.BANANA,], + "palettes": [shapes.FACTORY_OBJECTS_PALETTE,] * 4, + "noRotates": [True] * 4 + } + }, + { + "component": "Receivable", + "kwargs": { + "waitState": "waitState", + "liveState": "banana", + } + }, + { + "component": "ReceiverDropAnimation", + "kwargs": { + "dropOne": "banana_drop_one", + "dropTwo": "banana_drop_two", + } + }, + { + "component": "Token", + "kwargs": { + "type": "Banana" + } + }, + { + "component": "SecondObjectJumpAnimation", + "kwargs": { + "jump": "banana", + "drop": "banana", + "waitState": "waitState", + } + }, + { + "component": "Graspable", + "kwargs": { + "graspableStates": ("banana",), + "disconnectStates": ( + "banana_jump", "banana_drop_one", "banana_drop_two", + "waitState",), + } + } + ] +} + +APPLE = { + "name": + "apples", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": + "waitState", + "stateConfigs": [ + { + "state": "waitState", + }, + { + "state": "apple", + "layer": "appleLayer", + "sprite": "apple", + }, + { + "state": "apple_jump_state", + "layer": "appleLayer", + "sprite": "apple_jump_sprite", + }, + ] + } + }, + { + "component": "Transform", + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["apple", "apple_jump_sprite"], + "spriteShapes": [shapes.APPLE, shapes.APPLE_JUMP], + "palettes": [shapes.APPLE_RED_PALETTE] * 2, + "noRotates": [True] * 2, + } + }, + { + "component": "Graspable", + "kwargs": { + "graspableStates": ("apple",), + "disconnectStates": ("apple_jump_state", "waitState",), + } + }, + { + "component": "AppleComponent", + "kwargs": { + "liveState": "apple", + "waitState": "waitState", + "rewardForEating": 1, + } + }, + { + "component": "Token", + "kwargs": { + "type": "Apple" + } + }, + { + "component": "ObjectJumpAnimation", + "kwargs": { + "jump": "apple_jump_state", + "drop": "apple", + "waitState": "waitState", + } + }, + { + "component": "SecondObjectJumpAnimation", + "kwargs": { + "jump": "apple", + "drop": "apple", + "waitState": "waitState", + } + }, + + ] +} + +PINK_CUBE = { + "name": + "pink_cube", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "waitState", + "stateConfigs": [ + { + "state": "pink_cube", + "layer": "lowerPhysical", + "sprite": "pink_cube", + }, + { + "state": "pink_cube_drop_one", + "layer": "lowerPhysical", + "sprite": "pink_cube_drop_one", + }, + { + "state": "pink_cube_drop_two", + "layer": "lowerPhysical", + "sprite": "pink_cube_drop_two", + }, + { + "state": "pink_jump", + "layer": "lowerPhysical", + "sprite": "pink_jump", + }, + { + "state": "waitState", + } + ], + } + }, + { + "component": "Transform" + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["pink_cube", "pink_cube_drop_one", + "pink_cube_drop_two", "pink_jump"], + "spriteShapes": [shapes.BLOCK, + shapes.BLOCK_DROP_1, + shapes.BLOCK_DROP_2, + shapes.CUBE_DISPENSING_ANIMATION_1], + "palettes": [{ + "a": (255, 174, 182, 255), + "A": (240, 161, 169, 255), + "&": (237, 140, 151, 255), + "x": (0, 0, 0, 0), + }] * 4, + "noRotates": [True] * 4 + } + }, + { + "component": "Receivable", + "kwargs": { + "waitState": "waitState", + "liveState": "pink_cube", + } + }, + { + "component": "Token", + "kwargs": { + "type": "PinkCube" + } + }, + { + "component": "ReceiverDropAnimation", + "kwargs": { + "dropOne": "pink_cube_drop_one", + "dropTwo": "pink_cube_drop_two", + } + }, + { + "component": "ObjectJumpAnimation", + "kwargs": { + "jump": "pink_jump", + "drop": "pink_cube", + "waitState": "waitState", + } + }, + { + "component": "Graspable", + "kwargs": { + "graspableStates": ("pink_cube",), + "disconnectStates": ( + "pink_cube_drop_one", "pink_cube_drop_two", "pink_jump", + "waitState",), + } + } + ] +} + +APPLE_DISPENSING = { + "name": + "apple_dispensing", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": + "waitState", + "stateConfigs": [ + { + "state": "waitState", + "layer": "overlay", + }, + { + "state": "apple_dispensing_1", + "layer": "overlay", + "sprite": "apple_dispensing_1", + }, + { + "state": "apple_dispensing_2", + "layer": "overlay", + "sprite": "apple_dispensing_2", + }, + + { + "state": "apple_dispensing_3", + "layer": "overlay", + "sprite": "apple_dispensing_3", + }, + ] + } + }, + { + "component": "Transform", + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["apple_dispensing_1", "apple_dispensing_2", + "apple_dispensing_3"], + "spriteShapes": [shapes.APPLE_DISPENSING_ANIMATION_1, + shapes.APPLE_DISPENSING_ANIMATION_2, + shapes.APPLE_DISPENSING_ANIMATION_3], + "palettes": [shapes.FACTORY_OBJECTS_PALETTE] * 3, + "noRotates": [True] * 3, + } + }, + { + "component": "ObjectDispensingAnimation", + "kwargs": { + "frameOne": "apple_dispensing_1", + "frameTwo": "apple_dispensing_2", + "frameThree": "apple_dispensing_3", + "waitState": "waitState", + } + }, + ] +} + +CUBE_APPLE_DISPENSING_ANIMATION = { + "name": + "cube_apple_dispensing_animation", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": + "waitState", + "stateConfigs": [ + { + "state": "waitState", + "layer": "overlay", + }, + { + "state": "apple_dispensing_1", + "layer": "overlay", + "sprite": "apple_dispensing_1", + }, + { + "state": "apple_dispensing_2", + "layer": "overlay", + "sprite": "apple_dispensing_2", + }, + { + "state": "apple_dispensing_3", + "layer": "overlay", + "sprite": "apple_dispensing_3", + }, + { + "state": "blue_cube_dispensing_1", + "layer": "overlay", + "sprite": "blue_cube_dispensing_1", + }, + { + "state": "blue_cube_dispensing_2", + "layer": "overlay", + "sprite": "blue_cube_dispensing_2", + }, + { + "state": "blue_cube_dispensing_3", + "layer": "overlay", + "sprite": "blue_cube_dispensing_3", + }, + ] + } + }, + { + "component": "Transform", + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["apple_dispensing_1", "apple_dispensing_2", + "apple_dispensing_3", "blue_cube_dispensing_1", + "blue_cube_dispensing_2", + "blue_cube_dispensing_3"], + "spriteShapes": [shapes.APPLE_DISPENSING_ANIMATION_1, + shapes.APPLE_DISPENSING_ANIMATION_2, + shapes.APPLE_DISPENSING_ANIMATION_3, + shapes.CUBE_DISPENSING_ANIMATION_1, + shapes.CUBE_DISPENSING_ANIMATION_2, + shapes.CUBE_DISPENSING_ANIMATION_3], + "palettes": [shapes.FACTORY_OBJECTS_PALETTE] * 6, + "noRotates": [True] * 6, + } + }, + { + "component": "DoubleObjectDispensingAnimation", + "kwargs": { + "frameOne": "blue_cube_dispensing_1", + "frameTwo": "blue_cube_dispensing_2", + "frameThree": "blue_cube_dispensing_3", + "frameFour": "apple_dispensing_1", + "frameFive": "apple_dispensing_2", + "frameSix": "apple_dispensing_3", + "waitState": "waitState", + } + }, + ] +} + +BANANA_CUBE_DISPENSING_ANIMATION = { + "name": + "banana_cube_dispensing_animation", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": + "waitState", + "stateConfigs": [ + { + "state": "waitState", + "layer": "overlay", + }, + { + "state": "banana_dispensing_1", + "layer": "overlay", + "sprite": "banana_dispensing_1", + }, + { + "state": "banana_dispensing_2", + "layer": "overlay", + "sprite": "banana_dispensing_2", + }, + { + "state": "banana_dispensing_3", + "layer": "overlay", + "sprite": "banana_dispensing_3", + }, + { + "state": "blue_cube_dispensing_1", + "layer": "overlay", + "sprite": "blue_cube_dispensing_1", + }, + { + "state": "blue_cube_dispensing_2", + "layer": "overlay", + "sprite": "blue_cube_dispensing_2", + }, + + { + "state": "blue_cube_dispensing_3", + "layer": "overlay", + "sprite": "blue_cube_dispensing_3", + }, + ] + } + }, + { + "component": "Transform", + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["banana_dispensing_1", "banana_dispensing_2", + "banana_dispensing_3", "blue_cube_dispensing_1", + "blue_cube_dispensing_2", + "blue_cube_dispensing_3"], + "spriteShapes": [shapes.BANANA_DISPENSING_ANIMATION_1, + shapes.BANANA, + shapes.BANANA_DISPENSING_ANIMATION_3, + shapes.CUBE_DISPENSING_ANIMATION_1, + shapes.CUBE_DISPENSING_ANIMATION_2, + shapes.CUBE_DISPENSING_ANIMATION_3], + "palettes": [shapes.FACTORY_OBJECTS_PALETTE] * 6, + "noRotates": [True] * 6, + } + }, + { + "component": "DoubleObjectDispensingAnimation", + "kwargs": { + "frameOne": "blue_cube_dispensing_1", + "frameTwo": "blue_cube_dispensing_2", + "frameThree": "blue_cube_dispensing_3", + "frameFour": "banana_dispensing_1", + "frameFive": "banana_dispensing_2", + "frameSix": "banana_dispensing_3", + "waitState": "waitState", + } + }, + ] +} + +PINK_CUBE_DISPENSING = { + "name": + "pink_cube_dispensing", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": + "waitState", + "stateConfigs": [ + { + "state": "waitState", + "layer": "overlay", + }, + { + "state": "pink_cube_dispensing_1", + "layer": "overlay", + "sprite": "pink_cube_dispensing_1", + }, + { + "state": "pink_cube_dispensing_2", + "layer": "overlay", + "sprite": "pink_cube_dispensing_2", + }, + + { + "state": "pink_cube_dispensing_3", + "layer": "overlay", + "sprite": "pink_cube_dispensing_3", + }, + ] + } + }, + { + "component": "Transform", + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["pink_cube_dispensing_1", + "pink_cube_dispensing_2", + "pink_cube_dispensing_3"], + "spriteShapes": [shapes.CUBE_DISPENSING_ANIMATION_1, + shapes.CUBE_DISPENSING_ANIMATION_2, + shapes.CUBE_DISPENSING_ANIMATION_3], + "palettes": [{ + "a": (255, 174, 182, 255), + "A": (240, 161, 169, 255), + "&": (237, 140, 151, 255), + "x": (0, 0, 0, 0), + }] * 3, + "noRotates": [True] * 3, + } + }, + { + "component": "DoubleObjectDispensingAnimation", + "kwargs": { + "frameOne": "pink_cube_dispensing_1", + "frameTwo": "pink_cube_dispensing_2", + "frameThree": "pink_cube_dispensing_3", + "frameFour": "waitState", + "frameFive": "waitState", + "frameSix": "waitState", + "waitState": "waitState", + } + }, + ] +} + + +HOPPER_MOUTH = { + "name": + "hopper_mouth", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": + "hopper_mouth_open", + "stateConfigs": [ + { + "state": "hopper_mouth_closed", + "layer": "lowestPhysical", + "sprite": "hopper_mouth_closed", + }, + { + "state": "hopper_mouth_closing", + "layer": "lowestPhysical", + "sprite": "hopper_mouth_closing", + }, + { + "state": "hopper_mouth_open", + "layer": "lowestPhysical", + "sprite": "hopper_mouth_open", + }, + { + "state": "waitState" + } + ], + } + }, + { + "component": "Transform" + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["hopper_mouth_closed", "hopper_mouth_closing", + "hopper_mouth_open"], + "spriteShapes": [shapes.HOPPER_CLOSED, + shapes.HOPPER_CLOSING, + shapes.HOPPER_OPEN], + "palettes": [shapes.FACTORY_MACHINE_BODY_PALETTE] * 3, + "noRotates": [False] * 3 + } + }, + { + "component": "Receiver" + }, + { + "component": "HopperMouth", + "kwargs": { + "closed": "hopper_mouth_closed", + "opening": "hopper_mouth_closing", + "open": "hopper_mouth_open", + } + }, + ] +} + +HOPPER_BODY = { + "name": + "hopper_body", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": + "hopper_body", + "stateConfigs": [ + { + "state": "hopper_body", + "layer": "midPhysical", + "sprite": "hopper_body", + }, + { + "state": "hopper_body_activated", + "layer": "midPhysical", + "sprite": "hopper_body_activated", + } + ], + } + }, + { + "component": "Transform" + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["hopper_body", "hopper_body_activated"], + "spriteShapes": [shapes.HOPPER_BODY, + shapes.HOPPER_BODY_ACTIVATED], + "palettes": [{ + "a": (140, 129, 129, 255), + "b": (84, 77, 77, 255), + "f": (92, 98, 120, 255), + "g": (92, 98, 120, 255), + "c": (92, 98, 120, 255), + "x": (0, 0, 0, 0), + }] * 2, + "noRotates": [False] * 2 + } + }, + ] +} + + +HOPPER_INDICATOR = { + "name": + "hopper_indicator", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": + "hopper_indicator_two", + "stateConfigs": [ + { + "state": "waitState", + }, + { + "state": "hopper_indicator_one", + "layer": "upperPhysical", + "sprite": "hopper_indicator_one", + "groups": ["indicator"] + }, + { + "state": "hopper_indicator_two", + "layer": "upperPhysical", + "sprite": "hopper_indicator_two", + "groups": ["indicator"] + }, + ] + } + }, + { + "component": "Transform", + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": + [ + "hopper_indicator_two", "hopper_indicator_one", + ], + "spriteShapes": [ + shapes.HOPPER_INDICATOR_TWO_BLOCKS, + shapes.HOPPER_INDICATOR_ONE_BLOCK,], + "palettes": [shapes.FACTORY_OBJECTS_PALETTE] * 2, + "noRotates": [False] * 2 + } + }, + { + "component": "ReceiverIndicator", + "kwargs": { + "waitState": "waitState", + "liveState": "hopper_indicator_two", + "secondLiveState": "hopper_indicator_one", + "count": "Double", + "type": "TwoBlocks", + } + } + ] +} + +HOPPER_INDICATOR_BLUE_CUBE = { + "name": + "hopper_indicator_blue_cube", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": + "waitState", + "stateConfigs": [ + { + "state": "waitState", + }, + { + "state": "blue_cube_indicator", + "layer": "upperPhysical", + "sprite": "blue_cube_indicator", + }, + ] + } + }, + { + "component": "Transform", + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["blue_cube_indicator"], + "spriteShapes": [shapes.HOPPER_INDICATOR_SINGLE_BLOCK], + "palettes": [shapes.FACTORY_OBJECTS_PALETTE], + "noRotates": [False] + } + }, + { + "component": "ReceiverIndicator", + "kwargs": { + "waitState": "waitState", + "liveState": "blue_cube_indicator", + "secondLiveState": "waitState", + "count": "Single", + "type": "BlueCube" + } + } + ] +} + +HOPPER_INDICATOR_BANANA = { + "name": + "hopper_indicator_banana", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": + "hopper_banana", + "stateConfigs": [ + { + "state": "hopper_banana", + "layer": "upperPhysical", + "sprite": "hopper_banana", + }, + { + "state": "waitState" + } + ] + } + }, + { + "component": "ReceiverIndicator", + "kwargs": { + "waitState": "waitState", + "liveState": "hopper_banana", + "secondLiveState": "waitState", + "count": "Single", + "type": "Banana", + } + }, + { + "component": "Transform", + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["hopper_banana"], + "spriteShapes": [shapes.HOPPER_INDICATOR_SINGLE_BANANA], + "palettes": [shapes.FACTORY_OBJECTS_PALETTE], + "noRotates": [False] + } + }, + ] +} + +HOPPER_INDICATOR_PINK_CUBE = { + "name": + "hopper_indicator_pink_cube", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": + "waitState", + "stateConfigs": [ + { + "state": "waitState", + }, + { + "state": "hopper_pink_cube", + "layer": "upperPhysical", + "sprite": "hopper_pink_cube", + }, + ] + } + }, + { + "component": "Transform", + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["hopper_pink_cube"], + "spriteShapes": [shapes.HOPPER_INDICATOR_SINGLE_BLOCK], + "palettes": + [{ + "x": (0, 0, 0, 0), + "a": (255, 174, 182, 255), + }], + "noRotates": [False] + } + }, + { + "component": "ReceiverIndicator", + "kwargs": { + "waitState": "waitState", + "liveState": "hopper_pink_cube", + "secondLiveState": "waitState", + "count": "Single", + "type": "PinkCube", + } + } + ] +} + +DISPENSER_INDICATOR_BANANA_CUBE = { + "name": + "dispenser_indicator_banana_cube", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": + "banana_cube", + "stateConfigs": [ + { + "state": "banana_cube", + "layer": "midPhysical", + "sprite": "banana_cube", + }, + ] + } + }, + { + "component": "Transform", + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["banana_cube"], + "spriteShapes": [shapes.HOPPER_INDICATOR_ON], + "palettes": [shapes.FACTORY_OBJECTS_PALETTE], + "noRotates": [False] + } + }, + { + "component": "DispenserIndicator", + "kwargs": { + "objectOne": "BlueCube", + "objectTwo": "Banana", + } + } + ] +} + +DISPENSER_INDICATOR_CUBE_APPLE = { + "name": + "dispenser_indicator_cube_apple", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": + "cube_apple", + "stateConfigs": [ + { + "state": "cube_apple", + "layer": "midPhysical", + "sprite": "cube_apple", + }, + ] + } + }, + { + "component": "Transform", + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["cube_apple"], + "spriteShapes": [shapes.APPLE_CUBE_INDICATOR], + "palettes": [shapes.FACTORY_OBJECTS_PALETTE], + "noRotates": [False] + } + }, + { + "component": "DispenserIndicator", + "kwargs": { + "objectOne": "Apple", + "objectTwo": "BlueCube", + } + } + ] +} + +DISPENSER_INDICATOR_APPLE = { + "name": + "dispenser_indicator_apple", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": + "dispenser_indicator_apple", + "stateConfigs": [ + { + "state": "dispenser_indicator_apple", + "layer": "midPhysical", + "sprite": "dispenser_indicator_apple", + "groups": ["indicator"] + }, + ] + } + }, + { + "component": "Transform", + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": + [ + "dispenser_indicator_apple", + ], + "spriteShapes": [ + shapes.APPLE_INDICATOR], + "palettes": [shapes.FACTORY_OBJECTS_PALETTE], + "noRotates": [False] + } + }, + { + "component": "DispenserIndicator", + "kwargs": { + "objectOne": "Apple", + "objectTwo": "NoneNeeded", + } + } + ] +} + +DISPENSER_INDICATOR_TWO_APPLES = { + "name": + "dispenser_indicator_two_apples", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": + "two_apples", + "stateConfigs": [ + { + "state": "two_apples", + "layer": "midPhysical", + "sprite": "two_apples", + }, + ] + } + }, + { + "component": "Transform", + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["two_apples"], + "spriteShapes": [shapes.DOUBLE_APPLE_INDICATOR], + "palettes": [shapes.FACTORY_OBJECTS_PALETTE], + "noRotates": [False] + } + }, + { + "component": "DispenserIndicator", + "kwargs": { + "objectOne": "Apple", + "objectTwo": "Apple", + } + } + ] +} + +DISPENSER_BODY = { + "name": + "dispenser_body", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "dispenser_body", + "stateConfigs": [ + { + "state": "dispenser_body", + "layer": "lowerPhysical", + "sprite": "dispenser_body", + "groups": ["dispenser"] + }, + { + "state": "dispenser_body_activated", + "layer": "lowerPhysical", + "sprite": "dispenser_body_activated", + "groups": ["dispenser"] + }, + ] + } + }, + { + "component": "Transform", + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": [ + "dispenser_body", + "dispenser_body_activated", + ], + "spriteShapes": [ + shapes.DISPENSER_BODY, + shapes.DISPENSER_BODY_ACTIVATED, + ], + "palettes": [shapes.FACTORY_MACHINE_BODY_PALETTE] * 2, + "noRotates": [False] * 2 + } + }, + ] +} + +DISPENSER_BELT = { + "name": + "dispenser_belt", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "dispenser_belt_deactivated", + "stateConfigs": [ + { + "state": "dispenser_belt_deactivated", + "layer": "lowestPhysical", + "sprite": "dispenser_belt_deactivated", + "groups": ["dispenser"] + }, + { + "state": "dispenser_belt_on_position_1", + "layer": "lowestPhysical", + "sprite": "dispenser_belt_on_position_1", + "groups": ["dispenser"] + }, + { + "state": "dispenser_belt_on_position_2", + "layer": "lowestPhysical", + "sprite": "dispenser_belt_on_position_2", + "groups": ["dispenser"] + }, + { + "state": "dispenser_belt_on_position_3", + "layer": "lowestPhysical", + "sprite": "dispenser_belt_on_position_3", + "groups": ["dispenser"] + }, + ] + } + }, + { + "component": "Transform", + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": [ + "dispenser_belt_deactivated", + "dispenser_belt_on_position_1", + "dispenser_belt_on_position_2", + "dispenser_belt_on_position_3", + ], + "spriteShapes": [ + shapes.DISPENSER_BELT_OFF, + shapes.DISPENSER_BELT_ON_POSITION_1, + shapes.DISPENSER_BELT_ON_POSITION_2, + shapes.DISPENSER_BELT_ON_POSITION_3, + ], + "palettes": [shapes.DISPENSER_BELT_PALETTE] * 4, + "noRotates": [False] * 4 + } + }, + { + "component": "ConveyerBeltOnAnimation", + "kwargs": { + "waitState": "dispenser_belt_deactivated", + "stateOne": "dispenser_belt_on_position_1", + "stateTwo": "dispenser_belt_on_position_2", + "stateThree": "dispenser_belt_on_position_3", + } + } + ] +} + + +NW_WALL_CORNER = { + "name": + "nw_wall_corner", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": + "nw_wall_corner", + "stateConfigs": [{ + "state": "nw_wall_corner", + "layer": "lowerPhysical", + "sprite": "NwWallCorner", + }], + } + }, + { + "component": "Transform" + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["NwWallCorner"], + "spriteShapes": [shapes.NW_PERSPECTIVE_WALL], + "palettes": [shapes.PERSPECTIVE_WALL_PALETTE], + "noRotates": [False] + } + }, + {"component": "BeamBlocker", "kwargs": {"beamType": "hold"}}, + {"component": "BeamBlocker", "kwargs": {"beamType": "shove"}}, + ] +} + +NE_WALL_CORNER = { + "name": + "ne_wall_corner", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": + "ne_wall_corner", + "stateConfigs": [{ + "state": "ne_wall_corner", + "layer": "upperPhysical", + "sprite": "NeWallCorner", + }], + } + }, + { + "component": "Transform" + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["NeWallCorner"], + "spriteShapes": [shapes.NE_PERSPECTIVE_WALL], + "palettes": [shapes.PERSPECTIVE_WALL_PALETTE], + "noRotates": [False] + } + }, + {"component": "BeamBlocker", "kwargs": {"beamType": "hold"}}, + {"component": "BeamBlocker", "kwargs": {"beamType": "shove"}}, + ] +} + +WALL_HORIZONTAL = { + "name": + "wall_horizontal", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": + "wall_horizontal", + "stateConfigs": [{ + "state": "wall_horizontal", + "layer": "lowerPhysical", + "sprite": "WallHorizontal", + }], + } + }, + { + "component": "Transform" + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["WallHorizontal"], + "spriteShapes": [shapes.PERSPECTIVE_WALL], + "palettes": [shapes.PERSPECTIVE_WALL_PALETTE], + "noRotates": [False] + } + }, + {"component": "BeamBlocker", "kwargs": {"beamType": "hold"}}, + {"component": "BeamBlocker", "kwargs": {"beamType": "shove"}}, + ] +} + +WALL_T_COUPLING = { + "name": + "wall_t_coupling", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": + "wall_t_coupling", + "stateConfigs": [{ + "state": "wall_t_coupling", + "layer": "upperPhysical", + "sprite": "WallTCoupling", + }], + } + }, + { + "component": "Transform" + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["WallTCoupling"], + "spriteShapes": [shapes.PERSPECTIVE_WALL_T_COUPLING], + "palettes": [shapes.PERSPECTIVE_WALL_PALETTE], + "noRotates": [False] + } + }, + {"component": "BeamBlocker", "kwargs": {"beamType": "hold"}}, + {"component": "BeamBlocker", "kwargs": {"beamType": "shove"}}, + ] +} + +WALL_EAST = { + "name": + "wall_east", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": + "wall_east", + "stateConfigs": [{ + "state": "wall_east", + "layer": "lowerPhysical", + "sprite": "WallEast", + }], + } + }, + { + "component": "Transform" + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["WallEast"], + "spriteShapes": [shapes.E_PERSPECTIVE_WALL], + "palettes": [shapes.PERSPECTIVE_WALL_PALETTE], + "noRotates": [False] + } + }, + {"component": "BeamBlocker", "kwargs": {"beamType": "hold"}}, + {"component": "BeamBlocker", "kwargs": {"beamType": "shove"}}, + ] +} + +WALL_WEST = { + "name": + "wall_west", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": + "wall_west", + "stateConfigs": [{ + "state": "wall_west", + "layer": "lowerPhysical", + "sprite": "WallWest", + }], + } + }, + { + "component": "Transform" + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["WallWest"], + "spriteShapes": [shapes.W_PERSPECTIVE_WALL], + "palettes": [shapes.PERSPECTIVE_WALL_PALETTE], + "noRotates": [False] + } + }, + {"component": "BeamBlocker", "kwargs": {"beamType": "hold"}}, + {"component": "BeamBlocker", "kwargs": {"beamType": "shove"}}, + ] +} + +WALL_MIDDLE = { + "name": + "wall_middle", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": + "wall_middle", + "stateConfigs": [{ + "state": "wall_middle", + "layer": "lowerPhysical", + "sprite": "WallMiddle", + }], + } + }, + { + "component": "Transform" + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["WallMiddle"], + "spriteShapes": [shapes.MID_PERSPECTIVE_WALL], + "palettes": [shapes.PERSPECTIVE_WALL_PALETTE], + "noRotates": [False] + } + }, + {"component": "BeamBlocker", "kwargs": {"beamType": "hold"}}, + {"component": "BeamBlocker", "kwargs": {"beamType": "shove"}}, + ] +} + +THRESHOLD = { + "name": + "threshold", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": + "threshold", + "stateConfigs": [{ + "state": "threshold", + "layer": "lowestPhysical", + "sprite": "Threshold", + }], + } + }, + { + "component": "Transform" + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["Threshold"], + "spriteShapes": [shapes.PERSPECTIVE_THRESHOLD], + "palettes": [shapes.PERSPECTIVE_WALL_PALETTE], + "noRotates": [False] + } + }, + ] +} + +TILED_FLOOR = { + "name": + "tiled_floor", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": + "tiled_floor", + "stateConfigs": [{ + "state": "tiled_floor", + "layer": "background", + "sprite": "tiled_floor", + }], + } + }, + { + "component": "Transform" + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["tiled_floor"], + "spriteShapes": [shapes.METAL_FLOOR_DOUBLE_SPACED], + "palettes": [shapes.FACTORY_FLOOR_PALETTE], + "noRotates": [False] + } + }, + ] +} + +FLOOR_MARKING = { + "name": + "floor_marking", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": + "floor_marking", + "stateConfigs": [{ + "state": "floor_marking", + "layer": "lowestPhysical", + "sprite": "floor_marking", + }], + } + }, + { + "component": "Transform" + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["floor_marking"], + "spriteShapes": [shapes.FLOOR_MARKING], + "palettes": [shapes.DISPENSER_BELT_PALETTE], + "noRotates": [False] + } + }, + ] +} + +FLOOR_MARKING_TOP = { + "name": + "floor_marking_top", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": + "floor_marking_top", + "stateConfigs": [{ + "state": "floor_marking_top", + "layer": "lowestPhysical", + "sprite": "floor_marking_top", + }], + } + }, + { + "component": "Transform" + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["floor_marking_top"], + "spriteShapes": [shapes.FLOOR_MARKING_LONG_TOP], + "palettes": [shapes.DISPENSER_BELT_PALETTE], + "noRotates": [False] + } + }, + ] +} + +FLOOR_MARKING_BOTTOM = { + "name": + "floor_marking_bottom", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": + "floor_marking_bottom", + "stateConfigs": [{ + "state": "floor_marking_bottom", + "layer": "lowestPhysical", + "sprite": "floor_marking_bottom", + }], + } + }, + { + "component": "Transform" + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["floor_marking_bottom"], + "spriteShapes": [shapes.FLOOR_MARKING_LONG_BOTTOM], + "palettes": [shapes.DISPENSER_BELT_PALETTE], + "noRotates": [False] + } + }, + ] +} + +human_readable_colors = list(colors.human_readable) +target_sprite_color = human_readable_colors.pop(0) +grappling_target_color_palette = shapes.get_palette(target_sprite_color) +# Add character mappings to avatar pallete for Magic Beam overlay +grappling_target_color_palette["P"] = (196, 77, 190, 130) +grappling_target_color_palette["p"] = (184, 72, 178, 80) +TARGET_SPRITE_SELF = { + "default": { + "name": "Self", + "shape": shapes.CUTE_AVATAR, + "palette": shapes.get_palette(target_sprite_color), + "noRotate": True, + }, + "grappling": { + "name": "SelfGrappling", + "shape": shapes.CUTE_AVATAR_ARMS_UP, + "palette": grappling_target_color_palette, + "noRotate": True, + }, + "grappled": { + "name": "SelfGrappled", + "shape": shapes.MAGIC_GRAPPLED_AVATAR, + "palette": grappling_target_color_palette, + "noRotate": True, + }, +} + +# PREFABS is a dictionary mapping names to template game objects that can +# be cloned and placed in multiple locations accoring to an ascii map.graspable +PREFABS = { + "spawn_point": SPAWN_POINT, + # Graspable objects. + "apple": APPLE, + "blue_cube_live": get_blue_cube(initial_state="blue_cube"), + "pink_cube": PINK_CUBE, + "blue_cube_wait": get_blue_cube(initial_state="waitState"), + "banana": BANANA, + # Dynamic components. + "hopper_body": HOPPER_BODY, + "hopper_mouth": HOPPER_MOUTH, + # Hopper indicators. + "hopper_indicator": HOPPER_INDICATOR, + "hopper_indicator_pink_cube": HOPPER_INDICATOR_PINK_CUBE, + "hopper_indicator_blue_cube": HOPPER_INDICATOR_BLUE_CUBE, + "hopper_indicator_banana": HOPPER_INDICATOR_BANANA, + # Dispenser indicators. + "dispenser_indicator_apple": DISPENSER_INDICATOR_APPLE, + "dispenser_indicator_two_apples": DISPENSER_INDICATOR_TWO_APPLES, + "dispenser_indicator_pink_cube": DISPENSER_INDICATOR_PINK_CUBE, + "dispenser_indicator_banana_cube": DISPENSER_INDICATOR_BANANA_CUBE, + "dispenser_indicator_cube_apple": DISPENSER_INDICATOR_CUBE_APPLE, + "dispenser_body": DISPENSER_BODY, + "dispenser_belt": DISPENSER_BELT, + "apple_dispensing_animation": APPLE_DISPENSING, + "pink_cube_dispensing_animation": PINK_CUBE_DISPENSING_ANIMATION, + "banana_cube_dispensing_animation": BANANA_CUBE_DISPENSING_ANIMATION, + "cube_apple_dispensing_animation": CUBE_APPLE_DISPENSING_ANIMATION, + # Static components. + "nw_wall_corner": NW_WALL_CORNER, + "ne_wall_corner": NE_WALL_CORNER, + "wall_horizontal": WALL_HORIZONTAL, + "wall_t_coupling": WALL_T_COUPLING, + "wall_east": WALL_EAST, + "wall_west": WALL_WEST, + "wall_middle": WALL_MIDDLE, + "threshold": THRESHOLD, + "tiled_floor": TILED_FLOOR, + "floor_marking": FLOOR_MARKING, + "floor_marking_top": FLOOR_MARKING_TOP, + "floor_marking_bottom": FLOOR_MARKING_BOTTOM, +} + +# Primitive action components. +# pylint: disable=bad-whitespace +# pyformat: disable +NOOP = {"move": 0, "turn": 0, "pickup": 0, "grasp": 0, "hold": 0, "shove": 0} +FORWARD = {"move": 1, "turn": 0, "pickup": 0, "grasp": 0, "hold": 0, "shove": 0} +STEP_RIGHT = {"move": 2, "turn": 0, "pickup": 0, "grasp": 0, "hold": 0, "shove": 0} +BACKWARD = {"move": 3, "turn": 0, "pickup": 0, "grasp": 0, "hold": 0, "shove": 0} +STEP_LEFT = {"move": 4, "turn": 0, "pickup": 0, "grasp": 0, "hold": 0, "shove": 0} +TURN_LEFT = {"move": 0, "turn": -1, "pickup": 0, "grasp": 0, "hold": 0, "shove": 0} +TURN_RIGHT = {"move": 0, "turn": 1, "pickup": 0, "grasp": 0, "hold": 0, "shove": 0} +PICKUP = {"move": 0, "turn": 0, "pickup": 1, "grasp": 0, "hold": 0, "shove": 0} +GRASP = {"move": 0, "turn": 0, "pickup": 0, "grasp": 1, "hold": 0, "shove": 0} +HOLD = {"move": 0, "turn": 0, "pickup": 0, "grasp": 0, "hold": 1, "shove": 0} +# Notice that SHOVE includes both `hold` and `shove` parts. +SHOVE = {"move": 0, "turn": 0, "pickup": 0, "grasp": 0, "hold": 1, "shove": 1} +PULL = {"move": 0, "turn": 0, "pickup": 0, "grasp": 0, "hold": 1, "shove": -1} +# pyformat: enable +# pylint: enable=bad-whitespace + +ACTION_SET = ( + NOOP, + FORWARD, + BACKWARD, + STEP_LEFT, + STEP_RIGHT, + TURN_LEFT, + TURN_RIGHT, + PICKUP, + GRASP, + HOLD, + SHOVE, + PULL, +) + + +def create_scene(): + """Creates the global scene.""" + scene = { + "name": + "scene", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "scene", + "stateConfigs": [{ + "state": "scene", + }], + } + }, + { + "component": "Transform" + }, + { + "component": "StochasticIntervalEpisodeEnding", + "kwargs": { + "minimumFramesPerEpisode": 1000, + "intervalLength": 100, # Set equal to unroll length. + "probabilityTerminationPerInterval": 0.1 + } + } + ] + } + + return scene + + +def _create_stamina_overlay(player_idx: int, + max_stamina_bar_states: int, + ) -> Generator[Dict[str, Any], None, None]: + """Create stamina marker overlay objects.""" + # Lua is 1-indexed. + lua_idx = player_idx + 1 + + stamina_bar_state_configs = [ + # Invisible inactive (dead) overlay type. + {"state": "staminaBarWait"}, + ] + stamina_bar_sprite_names = [] + stamina_bar_sprite_shapes = [] + + # Each player's stamina bars must be in their own layer so they do not + # interact/collide with other players' stamina bars. + stamina_bar_layer = f"superOverlay_{player_idx}" + + # Declare one state per level of the stamina bar. + for i in range(max_stamina_bar_states): + sprite_name = f"sprite_for_level_{i}" + stamina_bar_state_configs.append( + {"state": f"level_{i}", + "layer": stamina_bar_layer, + "sprite": sprite_name}) + stamina_bar_sprite_names.append(sprite_name) + xs = "\nxxxxxxxx" + blank_space = xs * 7 + number_of_rs = max(6 - i, 0) + number_of_ys = i if i < 7 else 12 - i + number_of_gs = max(i - 6, 0) + if i >= 13: + level = blank_space + xs + else: + level = blank_space + "\nx" + "G" * number_of_gs + "Y" * number_of_ys + "R" * number_of_rs + "x" + empty = "\n".join(["x" * 8] * 8) + # Replace the east/south/west sprites with invisible sprites so the only + # stamina bar rendered is the one in the direction that the current player + # is facing. + stamina_bar_sprite_shapes.append((level, empty, empty, empty)) + + # Create a stamina bar for each compass direction. Only the direction the + # current player is facing is visible. + for direction in ("N", "E", "S", "W"): + yield { + "name": "avatar_stamina_bar", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "staminaBarWait", + "stateConfigs": stamina_bar_state_configs + } + }, + { + "component": "Transform", + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": stamina_bar_sprite_names, + "spriteShapes": stamina_bar_sprite_shapes, + "palettes": [{"G": (62, 137, 72, 255), + "Y": (255, 216, 97, 255), + "R": (162, 38, 51, 255), + "x": INVISIBLE,}] * max_stamina_bar_states, + "noRotates": [True] * max_stamina_bar_states + } + }, + { + "component": "StaminaBar", + "kwargs": { + "playerIndex": lua_idx, + "waitState": "staminaBarWait", + "layer": stamina_bar_layer, + "direction": direction + } + }, + ] + } + + +def create_avatar_object(player_idx: int, + target_sprite_self: Dict[str, Any], + max_stamina_bar_states: int) -> Dict[str, Any]: + """Create an avatar object.""" + # Lua is 1-indexed. + lua_index = player_idx + 1 + # Setup the self vs other sprite mapping. + avatar_sprite_name = "avatarSprite{}".format(lua_index) + grappling_sprite = "AvatarGrappling" + str(lua_index) + grappled_sprite = "AvatarGrappled" + str(lua_index) + + custom_sprite_map = { + avatar_sprite_name: target_sprite_self["default"]["name"], + grappling_sprite: target_sprite_self["grappling"]["name"], + grappled_sprite: target_sprite_self["grappled"]["name"], + } + + live_state_name = "player{}".format(lua_index) + grappling_state_name = f"player{lua_index}_grappling" + grappled_state_name = f"player{lua_index}_grappled" + + color_palette = shapes.get_palette(colors.palette[player_idx]) + # Add character mappings to avatar pallete for Magic Beam overlay + color_palette["P"] = (196, 77, 190, 130) + color_palette["p"] = (184, 72, 178, 80) + spawn_group = "spawnPoints" + + avatar_object = { + "name": + "avatar", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": + live_state_name, + "stateConfigs": [ + # Initial player state. + { + "state": live_state_name, + "layer": "midPhysical", + "sprite": avatar_sprite_name, + "contact": "avatar", + "groups": ["players"] + }, + { + "state": grappling_state_name, + "layer": "upperPhysical", + "sprite": grappling_sprite, + "contact": "avatar", + "groups": ["players"] + }, + { + "state": grappled_state_name, + "layer": "upperPhysical", + "sprite": grappled_sprite, + "contact": "avatar", + "groups": ["players"]}, + # Player wait type for times when they are zapped out. + { + "state": "playerWait", + "groups": ["playerWaits"] + }, + ] + } + }, + { + "component": "Transform" + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": [avatar_sprite_name, grappling_sprite, + grappled_sprite], + "spriteShapes": [shapes.CUTE_AVATAR, + shapes.CUTE_AVATAR_ARMS_UP, + shapes.MAGIC_GRAPPLED_AVATAR], + "palettes": [color_palette] * 3, + "noRotates": [True] * 3 + } + }, + { + "component": "AdditionalSprites", + "kwargs": { + "renderMode": "ascii_shape", + "customSpriteNames": [ + target_sprite_self["default"]["name"], + target_sprite_self["grappling"]["name"], + target_sprite_self["grappled"]["name"], + ], + "customSpriteShapes": [ + target_sprite_self["default"]["shape"], + target_sprite_self["grappling"]["shape"], + target_sprite_self["grappled"]["shape"], + ], + "customPalettes": [ + target_sprite_self["default"]["palette"], + target_sprite_self["grappling"]["palette"], + target_sprite_self["grappled"]["palette"], + ], + "customNoRotates": [ + target_sprite_self["default"]["noRotate"], + target_sprite_self["grappling"]["noRotate"], + target_sprite_self["grappled"]["noRotate"], + ], + } + }, + { + "component": "Avatar", + "kwargs": { + "index": lua_index, + "aliveState": live_state_name, + "additionalLiveStates": [grappled_state_name, + grappling_state_name], + "waitState": "playerWait", + "spawnGroup": spawn_group, + "actionOrder": [ + "move", + "turn", + "pickup", + "grasp", + # Grappling actions + "hold", + "shove", + ], + "actionSpec": { + "move": {"default": 0, "min": 0, "max": len(_COMPASS)}, + "turn": {"default": 0, "min": -1, "max": 1}, + "pickup": {"default": 0, "min": 0, "max": 1}, + "grasp": {"default": 0, "min": 0, "max": 1}, + # Grappling actions + "hold": {"default": 0, "min": 0, "max": 1}, + "shove": {"default": 0, "min": -1, "max": 1}, + }, + "view": { + "left": 5, + "right": 5, + "forward": 9, + "backward": 1, + "centered": False + }, + "spriteMap": custom_sprite_map, + } + }, + { + "component": "AvatarGrasp", + "kwargs": { + "shape": GRASP_SHAPE, + "palette": color_palette, + "graspAction": "grasp", + # If multiple objects are at the same position then grasp them + # according to their layer in order `precedenceOrder`. + "precedenceOrder": ("appleLayer", "lowerPhysical",), + } + }, + { + "component": "Grappling", + "kwargs": { + "shape": shapes.MAGIC_BEAM, + "palette": shapes.MAGIC_BEAM_PALETTE, + "liveState": live_state_name, + "grappledState": grappled_state_name, + "grapplingState": grappling_state_name, + } + }, + { + "component": "ReadyToShootObservation", + "kwargs": { + # In this case READY_TO_SHOOT will be 1 if hold is allowed and + # will be 0 if not. + "zapperComponent": "Grappling", + } + }, + { + "component": "Stamina", + "kwargs": { + "maxStamina": max_stamina_bar_states, + "classConfig": { + "name": "player", + "greenFreezeTime": 0, + "yellowFreezeTime": 2, + "redFreezeTime": 6, + # `decrementRate` = 0.5 means decrease stamina on every + # other costly step. `decrementRate` = 1 means decrease + # stamina on every costly step. + "decrementRate": 1.0, + }, + "amountInvisible": 6, + "amountGreen": 6, + "amountYellow": 6, + "amountRed": 1, + "costlyActions": ["move",], + } + }, + { + "component": "StaminaObservation", + "kwargs": { + "staminaComponent": "Stamina", + } + }, + { + "component": "LocationObserver", + "kwargs": { + "objectIsAvatar": True, + "alsoReportOrientation": True + } + }, + ] + } + return avatar_object + + +def create_avatar_objects(num_players: int, + max_stamina_bar_states: int = 19): + """Returns list of avatar objects of length 'num_players'.""" + avatar_objects = [] + for player_idx in range(num_players): + avatar_object = create_avatar_object(player_idx, TARGET_SPRITE_SELF, + max_stamina_bar_states - 1) + stamina_bar_objects = _create_stamina_overlay(player_idx, + max_stamina_bar_states) + enter_obstacle = _create_enter_obstacle(player_idx) + avatar_objects.append(avatar_object) + avatar_objects.append(enter_obstacle) + avatar_objects.extend(stamina_bar_objects) + + return avatar_objects + + +def _create_enter_obstacle(player_idx: int) -> Dict[str, Any]: + # Lua is 1-indexed. + lua_idx = player_idx + 1 + return { + "name": + "enter_obstacle", + "components": [ + { + "component": "StateManager", + "kwargs": + { + "initialState": "obstacleWait", + "stateConfigs": [ + { + "state": "obstacleWait" + }, + { + "state": "obstacleLive", + "layer": "lowerPhysical", + } + ] + } + }, + { + "component": "Transform", + }, + { + "component": "AvatarConnector", + "kwargs": { + "playerIndex": lua_idx, + "aliveState": "obstacleLive", + "waitState": "obstacleWait" + } + }, + ] + } + + +def get_config(): + """Default configuration for training on the factory2d level.""" + config = config_dict.ConfigDict() + + # Specify the number of players to particate in each episode (optional). + config.recommended_num_players = 12 + + # Action set configuration. + config.action_set = ACTION_SET + # Observation format configuration. + config.individual_observation_names = [ + "RGB", + "READY_TO_SHOOT", + "STAMINA", + # Debug only (do not use the following observations in policies). + "POSITION", + "ORIENTATION", + ] + config.global_observation_names = [ + "WORLD.RGB", + ] + + config.action_spec = specs.action(len(ACTION_SET)) + config.valid_roles = frozenset({"default"}) + + return config + + +def build( + roles: Sequence[str], + config: config_dict.ConfigDict, +) -> Mapping[str, Any]: + """Build substrate definition given player roles.""" + num_players = len(roles) + # Build the rest of the substrate definition. + substrate_definition = dict( + levelName="factory_of_the_commons", + levelDirectory="meltingpot/lua/levels", + numPlayers=num_players, + maxEpisodeLengthFrames=5000, # The maximum possible number of frames. + spriteSize=8, + topology="BOUNDED", # Choose from ["BOUNDED", "TORUS"], + simulation={ + "map": config.layout.ascii_map, + "gameObjects": create_avatar_objects(num_players), + "scene": create_scene(), + "prefabs": PREFABS, + "charPrefabMap": config.layout.char_prefab_map, + }, + ) + return substrate_definition diff --git a/meltingpot/python/configs/substrates/factory_commons__either_or.py b/meltingpot/python/configs/substrates/factory_commons__either_or.py new file mode 100644 index 00000000..3d7b3925 --- /dev/null +++ b/meltingpot/python/configs/substrates/factory_commons__either_or.py @@ -0,0 +1,108 @@ +# Copyright 2022 DeepMind Technologies Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Configuration for Factory of the Commons: Either Or.""" + +from ml_collections import config_dict + +from meltingpot.python.configs.substrates import factory_commons as base_config +from meltingpot.python.utils.substrates import map_helpers +from meltingpot.python.utils.substrates import specs + +build = base_config.build + +ASCII_MAP = """ +;_____________________, +! c | +! cCc | +! ls ls C lt lt | +! Oj Oj O# O# | +! z z z z | +! x x x x | +! cCc | +! cCc | +! ls ls lt lt | +! Oj Oj O# O# | +! z z z z | +! x x C x x | +! cCc | +! c | +_______________________ +""" + +blue_cube_live = { + "type": "all", "list": ["tiled_floor", "blue_cube_wait", "blue_cube_live"]} +blue_cube_wait = { + "type": "all", "list": ["tiled_floor", "blue_cube_wait"]} + +# `prefab` determines which prefab game object to use for each `char` in the +# ascii map. +CHAR_PREFAB_MAP = { + " ": {"type": "all", "list": ["tiled_floor", "apple", "spawn_point"]}, + # Graspable objects. + "c": map_helpers.a_or_b_with_odds(blue_cube_wait, + blue_cube_live, odds=(1, 1)), + "C": blue_cube_live, # This blue cube will always be present. + # New dynamic components. + "l": {"type": "all", "list": ["tiled_floor", "hopper_body", + "hopper_indicator_blue_cube"]}, + "O": {"type": "all", "list": ["tiled_floor", "hopper_mouth"]}, + "D": {"type": "all", "list": ["tiled_floor", "dispenser_body", + "dispenser_indicator_apple"]}, + "t": {"type": "all", "list": ["tiled_floor", "dispenser_body", + "dispenser_indicator_two_apples"]}, + "s": {"type": "all", "list": ["tiled_floor", "dispenser_body", + "dispenser_indicator_cube_apple"]}, + "#": {"type": "all", "list": ["tiled_floor", "dispenser_belt", + "apple_dispensing_animation"]}, + "j": {"type": "all", "list": ["tiled_floor", "dispenser_belt", + "cube_apple_dispensing_animation"]}, + "z": {"type": "all", "list": ["tiled_floor", "floor_marking_top"]}, + "x": {"type": "all", "list": ["tiled_floor", "floor_marking_bottom"]}, + # Static components. + ";": {"type": "all", "list": ["tiled_floor", "nw_wall_corner"]}, + ",": {"type": "all", "list": ["tiled_floor", "ne_wall_corner"]}, + "_": "wall_horizontal", + "T": "wall_t_coupling", + "|": {"type": "all", "list": ["tiled_floor", "wall_east"]}, + "!": {"type": "all", "list": ["tiled_floor", "wall_west"]}, + "i": {"type": "all", "list": ["tiled_floor", "wall_middle"]}, + "~": {"type": "all", "list": ["tiled_floor", "threshold"]}, +} + + +def get_config(): + """Default configuration.""" + config = base_config.get_config() + # Specify a recommended number of players to particate in each episode. + config.recommended_num_players = 3 + # Override the map layout settings. + config.layout = config_dict.ConfigDict() + config.layout.ascii_map = ASCII_MAP + config.layout.char_prefab_map = CHAR_PREFAB_MAP + + # The roles assigned to each player. + config.valid_roles = frozenset({"default"}) + config.default_player_roles = ("default",) * 3 + + # The specs of the environment (from a single-agent perspective). + config.timestep_spec = specs.timestep({ + "RGB": specs.OBSERVATION["RGB"], + "READY_TO_SHOOT": specs.OBSERVATION["READY_TO_SHOOT"], + "STAMINA": specs.float64(), + # Debug only. + "POSITION": specs.OBSERVATION["POSITION"], + "ORIENTATION": specs.OBSERVATION["ORIENTATION"], + "WORLD.RGB": specs.rgb(128, 184), + }) + return config diff --git a/meltingpot/python/configs/substrates/fruit_market.py b/meltingpot/python/configs/substrates/fruit_market.py new file mode 100644 index 00000000..7dc965ba --- /dev/null +++ b/meltingpot/python/configs/substrates/fruit_market.py @@ -0,0 +1,1229 @@ +# Copyright 2022 DeepMind Technologies Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Configuration for the Fruit Market substrate. + +This substrate is used to study the dynamics of trade and bargaining of goods +that have different value to different players. + +The substrate consists of an open space where two types of trees exist: apple +trees and banana trees. Trees can be harvested by players by stepping on their +location and wait until they harvest the fruit from the tree. A harvested fruit +(apple or banana) goes into a player's inventory. Players can carry any number +of apples or bananas. Harvested fruit can be consumed for reward. Players have +two actions to consume fruit of the two types from their inventory. + +Players can be of two types: apple farmer & banana farmer. Apple farmers have a +higher probability of harvesting from apple trees than banana trees, but receive +more reward for consuming bananas. Banana farmers are the opposite. + +Players have a hunger meter which can be replenished by consuming a fruit. +Players have an action to consume an apple from their inventory, and another to +consume a banana. If the hunger meter reaches zero the player pays a +substantial cost in stamina. + +Crossing water also imposes a cost in stamina. + +Players also have trading actions of the form "I offer X apples for Y bananas" +and the converse "I offer Z bananas for W apples". When players are within a +trading radius of each other and have corresponding offers (`X = W` and `Y = Z`) +and enough fruit in their inventories to satisfy it, the trade occurs and the +appropriate number of apples and bananas are exchanged and placed in their +inventories. +""" + +import copy +from typing import Any, Dict, Generator, Mapping, Sequence + +from ml_collections import config_dict as configdict + +from meltingpot.python.utils.substrates import game_object_utils +from meltingpot.python.utils.substrates import shapes + +PrefabConfig = game_object_utils.PrefabConfig + +MAX_OFFER_QUANTITY = 3 + +_COMPASS = ["N", "E", "S", "W"] +INVISIBLE = (0, 0, 0, 0) + +NW_WALL_CORNER = { + "name": "nw_wall_corner", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "nw_wall_corner", + "stateConfigs": [{ + "state": "nw_wall_corner", + "layer": "upperPhysical", + "sprite": "NwWallCorner", + }], + } + }, + {"component": "Transform"}, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["NwWallCorner"], + "spriteShapes": [shapes.FENCE_NW_CORNER], + "palettes": [shapes.FENCE_PALETTE], + "noRotates": [False] + } + }, + {"component": "BeamBlocker", "kwargs": {"beamType": "hold"}}, + {"component": "BeamBlocker", "kwargs": {"beamType": "shove"}}, + ] +} + +NE_WALL_CORNER = { + "name": "ne_wall_corner", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "ne_wall_corner", + "stateConfigs": [{ + "state": "ne_wall_corner", + "layer": "upperPhysical", + "sprite": "NeWallCorner", + }], + } + }, + {"component": "Transform"}, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["NeWallCorner"], + "spriteShapes": [shapes.FENCE_NE_CORNER], + "palettes": [shapes.FENCE_PALETTE], + "noRotates": [False] + } + }, + {"component": "BeamBlocker", "kwargs": {"beamType": "hold"}}, + {"component": "BeamBlocker", "kwargs": {"beamType": "shove"}}, + ] +} + +NE_INNER_WALL_CORNER = { + "name": "ne_inner_wall_corner", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "ne_inner_wall_corner", + "stateConfigs": [{ + "state": "ne_inner_wall_corner", + "layer": "upperPhysical", + "sprite": "ne_inner_wall_corner", + }], + } + }, + {"component": "Transform"}, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["ne_inner_wall_corner"], + "spriteShapes": [shapes.FENCE_INNER_NE_CORNER], + "palettes": [shapes.FENCE_PALETTE], + "noRotates": [False] + } + }, + {"component": "BeamBlocker", "kwargs": {"beamType": "hold"}}, + {"component": "BeamBlocker", "kwargs": {"beamType": "shove"}}, + ] +} + +NW_INNER_WALL_CORNER = { + "name": "nw_inner_wall_corner", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "nw_inner_wall_corner", + "stateConfigs": [{ + "state": "nw_inner_wall_corner", + "layer": "upperPhysical", + "sprite": "nw_inner_wall_corner", + }], + } + }, + {"component": "Transform"}, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["nw_inner_wall_corner"], + "spriteShapes": [shapes.FENCE_INNER_NW_CORNER], + "palettes": [shapes.FENCE_PALETTE], + "noRotates": [False] + } + }, + {"component": "BeamBlocker", "kwargs": {"beamType": "hold"}}, + {"component": "BeamBlocker", "kwargs": {"beamType": "shove"}}, + ] +} + +SE_WALL_CORNER = { + "name": "se_wall_corner", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "se_wall_corner", + "stateConfigs": [{ + "state": "se_wall_corner", + "layer": "upperPhysical", + "sprite": "SeWallCorner", + }], + } + }, + {"component": "Transform"}, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["SeWallCorner"], + "spriteShapes": [shapes.FENCE_SE_CORNER], + "palettes": [shapes.FENCE_PALETTE], + "noRotates": [False] + } + }, + {"component": "BeamBlocker", "kwargs": {"beamType": "hold"}}, + {"component": "BeamBlocker", "kwargs": {"beamType": "shove"}}, + ] +} + +SW_WALL_CORNER = { + "name": "sw_wall_corner", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "sw_wall_corner", + "stateConfigs": [{ + "state": "sw_wall_corner", + "layer": "upperPhysical", + "sprite": "SwWallCorner", + }], + } + }, + {"component": "Transform"}, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["SwWallCorner"], + "spriteShapes": [shapes.FENCE_SW_CORNER], + "palettes": [shapes.FENCE_PALETTE], + "noRotates": [False] + } + }, + {"component": "BeamBlocker", "kwargs": {"beamType": "hold"}}, + {"component": "BeamBlocker", "kwargs": {"beamType": "shove"}}, + ] +} + +WALL_SHADOW_SW = { + "name": "wall_shadow_sw", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "wall_shadow_sw", + "stateConfigs": [{ + "state": "wall_shadow_sw", + "layer": "upperPhysical", + "sprite": "wall_shadow_sw", + }], + } + }, + {"component": "Transform"}, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["wall_shadow_sw"], + "spriteShapes": [shapes.FENCE_SHADOW_SW], + "palettes": [shapes.FENCE_PALETTE], + "noRotates": [False] + } + }, + {"component": "BeamBlocker", "kwargs": {"beamType": "hold"}}, + {"component": "BeamBlocker", "kwargs": {"beamType": "shove"}}, + ] +} + +WALL_SHADOW_S = { + "name": "wall_shadow_s", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "wall_shadow_s", + "stateConfigs": [{ + "state": "wall_shadow_s", + "layer": "upperPhysical", + "sprite": "wall_shadow_s", + }], + } + }, + {"component": "Transform"}, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["wall_shadow_s"], + "spriteShapes": [shapes.FENCE_SHADOW_S], + "palettes": [shapes.FENCE_PALETTE], + "noRotates": [False] + } + }, + {"component": "BeamBlocker", "kwargs": {"beamType": "hold"}}, + {"component": "BeamBlocker", "kwargs": {"beamType": "shove"}}, + ] +} + +WALL_SHADOW_SE = { + "name": "wall_shadow_se", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "wall_shadow_se", + "stateConfigs": [{ + "state": "wall_shadow_se", + "layer": "upperPhysical", + "sprite": "wall_shadow_se", + }], + } + }, + {"component": "Transform"}, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["wall_shadow_se"], + "spriteShapes": [shapes.FENCE_SHADOW_SE], + "palettes": [shapes.FENCE_PALETTE], + "noRotates": [False] + } + }, + {"component": "BeamBlocker", "kwargs": {"beamType": "hold"}}, + {"component": "BeamBlocker", "kwargs": {"beamType": "shove"}}, + ] +} + +WALL_NORTH = { + "name": "wall_north", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "wall_north", + "stateConfigs": [{ + "state": "wall_north", + "layer": "upperPhysical", + "sprite": "WallNorth", + }], + } + }, + {"component": "Transform"}, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["WallNorth"], + "spriteShapes": [shapes.FENCE_N], + "palettes": [shapes.FENCE_PALETTE], + "noRotates": [False] + } + }, + {"component": "BeamBlocker", "kwargs": {"beamType": "hold"}}, + {"component": "BeamBlocker", "kwargs": {"beamType": "shove"}}, + ] +} + +WALL_EAST = { + "name": "wall_east", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "wall_east", + "stateConfigs": [{ + "state": "wall_east", + "layer": "upperPhysical", + "sprite": "WallEast", + }], + } + }, + {"component": "Transform"}, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["WallEast"], + "spriteShapes": [shapes.FENCE_E], + "palettes": [shapes.FENCE_PALETTE], + "noRotates": [False] + } + }, + {"component": "BeamBlocker", "kwargs": {"beamType": "hold"}}, + {"component": "BeamBlocker", "kwargs": {"beamType": "shove"}}, + ] +} + +WALL_SOUTH = { + "name": "wall_south", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "wall_south", + "stateConfigs": [{ + "state": "wall_south", + "layer": "upperPhysical", + "sprite": "WallSouth", + }], + } + }, + {"component": "Transform"}, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["WallSouth"], + "spriteShapes": [shapes.FENCE_S], + "palettes": [shapes.FENCE_PALETTE], + "noRotates": [False] + } + }, + {"component": "BeamBlocker", "kwargs": {"beamType": "hold"}}, + {"component": "BeamBlocker", "kwargs": {"beamType": "shove"}}, + ] +} + + +WALL_WEST = { + "name": "wall_west", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "wall_west", + "stateConfigs": [{ + "state": "wall_west", + "layer": "upperPhysical", + "sprite": "WallWest", + }], + } + }, + {"component": "Transform"}, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["WallWest"], + "spriteShapes": [shapes.FENCE_W], + "palettes": [shapes.FENCE_PALETTE], + "noRotates": [False] + } + }, + {"component": "BeamBlocker", "kwargs": {"beamType": "hold"}}, + {"component": "BeamBlocker", "kwargs": {"beamType": "shove"}}, + ] +} + +STONE_WALL = { + "name": "stone_wall", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "stoneWall", + "stateConfigs": [{ + "state": "stoneWall", + "layer": "upperPhysical", + "sprite": "StoneWall", + }], + } + }, + { + "component": "Transform", + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["StoneWall"], + "spriteShapes": [shapes.WALL], + "palettes": [{"*": (95, 95, 95, 255), + "&": (100, 100, 100, 255), + "@": (109, 109, 109, 255), + "#": (152, 152, 152, 255)}], + "noRotates": [False] + } + }, + {"component": "BeamBlocker", "kwargs": {"beamType": "hold"}}, + {"component": "BeamBlocker", "kwargs": {"beamType": "shove"}}, + ] +} + +SPAWN_POINT = { + "name": "spawnPoint", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "spawnPoint", + "stateConfigs": [{ + "state": "spawnPoint", + "layer": "logic", + "groups": ["spawnPoints"] + }], + } + }, + { + "component": "Transform", + }, + ] +} + + +def get_water(): + """Get an animated water game object.""" + layer = "background" + water = { + "name": "water_{}".format(layer), + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "water_1", + "stateConfigs": [ + {"state": "water_1", + "layer": layer, + "sprite": "water_1", + "groups": ["water"]}, + {"state": "water_2", + "layer": layer, + "sprite": "water_2", + "groups": ["water"]}, + {"state": "water_3", + "layer": layer, + "sprite": "water_3", + "groups": ["water"]}, + {"state": "water_4", + "layer": layer, + "sprite": "water_4", + "groups": ["water"]}, + ] + } + }, + {"component": "Transform",}, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["water_1", "water_2", "water_3", "water_4"], + "spriteShapes": [shapes.WATER_1, shapes.WATER_2, + shapes.WATER_3, shapes.WATER_4], + "palettes": [{ + "@": (52, 193, 209, 255), + "*": (34, 166, 181, 255), + "o": (32, 155, 168, 255), + "~": (31, 148, 161, 255)}] * 4, + } + }, + { + "component": "Animation", + "kwargs": { + "states": ["water_1", "water_2", "water_3", "water_4"], + "gameFramesPerAnimationFrame": 2, + "loop": True, + "randomStartFrame": True, + "group": "water", + } + }, + { + "component": "TraversalCost", + "kwargs": { + "penaltyAmount": 0, # No reward cost from crossing water. + "alsoReduceStamina": True, # Crossing water depletes stamina. + "staminaPenaltyAmount": 1, # Stamina lost per step on water. + } + }, + ] + } + return water + + +GROUND = { + "name": "ground", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "ground", + "stateConfigs": [{ + "state": "ground", + "layer": "background", + "sprite": "groundSprite", + }], + } + }, + {"component": "Transform"}, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["groundSprite"], + "spriteShapes": [shapes.DIRT_PATTERN], + "palettes": [{"X": (207, 199, 184, 255), + "x": (199, 192, 177, 255),}], + "noRotates": [True] + } + }, + ] +} + + +def get_fruit_tree_palette(fruit_type): + """Return a palette with the correct colored fruit.""" + apple_palette = copy.deepcopy(shapes.APPLE_TREE_PALETTE) + banana_palette = copy.deepcopy(shapes.BANANA_TREE_PALETTE) + if fruit_type == "ripe_apple": + apple_palette["o"] = (199, 33, 8, 255) + return apple_palette + elif fruit_type == "ripe_banana": + banana_palette["o"] = (222, 222, 13, 255) + return banana_palette + elif fruit_type == "unripe_apple": + apple_palette["o"] = (124, 186, 58, 255) + return apple_palette + elif fruit_type == "unripe_banana": + banana_palette["o"] = (37, 115, 45, 255) + return banana_palette + + +def get_potential_tree(probability_empty: float = 0.9, + probability_apple: float = 0.05, + probability_banana: float = 0.05) -> PrefabConfig: + """Return a prefab for a potential tree.""" + assert probability_empty + probability_apple + probability_banana == 1.0, ( + "Probabilities must sum to 1.0.") + spawn_probabilities = {"empty": probability_empty, + "apple": probability_apple, + "banana": probability_banana} + prefab = { + "name": "potential_tree", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "treeWait", + "stateConfigs": [ + {"state": "treeWait"}, + { + "state": "appleTreeHarvestable", + "layer": "lowerPhysical", + "sprite": "appleTreeHarvestableSprite", + }, + { + "state": "bananaTreeHarvestable", + "layer": "lowerPhysical", + "sprite": "bananaTreeHarvestableSprite", + }, + { + "state": "appleTreeUnripe", + "layer": "lowerPhysical", + "sprite": "appleTreeUnripeSprite", + }, + { + "state": "bananaTreeUnripe", + "layer": "lowerPhysical", + "sprite": "bananaTreeUnripeSprite", + }, + ], + } + }, + {"component": "Transform"}, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["appleTreeHarvestableSprite", + "bananaTreeHarvestableSprite", + "appleTreeUnripeSprite", + "bananaTreeUnripeSprite"], + "spriteShapes": [shapes.APPLE_TREE_STOUT, + shapes.BANANA_TREE, + shapes.APPLE_TREE_STOUT, + shapes.BANANA_TREE], + "palettes": [get_fruit_tree_palette("ripe_apple"), + get_fruit_tree_palette("ripe_banana"), + get_fruit_tree_palette("unripe_apple"), + get_fruit_tree_palette("unripe_banana")], + "noRotates": [True, + True, + True, + True] + } + }, + { + "component": "FruitType", + "kwargs": { + "probabilities": spawn_probabilities, + } + }, + { + "component": "Harvestable", + "kwargs": { + "regrowthTime": 50, + } + }, + { + "component": "PreventStaminaRecoveryHere", + }, + ] + } + return prefab + + +# Primitive action components. +# pylint: disable=bad-whitespace +# pyformat: disable +NOOP = {"move": 0, "turn": 0, "eat_apple": 0, "eat_banana": 0, "offer_apple": 0, "offer_banana": 0, "offer_cancel": 0, "hold": 0, "shove": 0} +FORWARD = {"move": 1, "turn": 0, "eat_apple": 0, "eat_banana": 0, "offer_apple": 0, "offer_banana": 0, "offer_cancel": 0, "hold": 0, "shove": 0} +STEP_RIGHT = {"move": 2, "turn": 0, "eat_apple": 0, "eat_banana": 0, "offer_apple": 0, "offer_banana": 0, "offer_cancel": 0, "hold": 0, "shove": 0} +BACKWARD = {"move": 3, "turn": 0, "eat_apple": 0, "eat_banana": 0, "offer_apple": 0, "offer_banana": 0, "offer_cancel": 0, "hold": 0, "shove": 0} +STEP_LEFT = {"move": 4, "turn": 0, "eat_apple": 0, "eat_banana": 0, "offer_apple": 0, "offer_banana": 0, "offer_cancel": 0, "hold": 0, "shove": 0} +TURN_LEFT = {"move": 0, "turn": -1, "eat_apple": 0, "eat_banana": 0, "offer_apple": 0, "offer_banana": 0, "offer_cancel": 0, "hold": 0, "shove": 0} +TURN_RIGHT = {"move": 0, "turn": 1, "eat_apple": 0, "eat_banana": 0, "offer_apple": 0, "offer_banana": 0, "offer_cancel": 0, "hold": 0, "shove": 0} +EAT_APPLE = {"move": 0, "turn": 0, "eat_apple": 1, "eat_banana": 0, "offer_apple": 0, "offer_banana": 0, "offer_cancel": 0, "hold": 0, "shove": 0} +EAT_BANANA = {"move": 0, "turn": 0, "eat_apple": 0, "eat_banana": 1, "offer_apple": 0, "offer_banana": 0, "offer_cancel": 0, "hold": 0, "shove": 0} +HOLD = {"move": 0, "turn": 0, "eat_apple": 0, "eat_banana": 0, "offer_apple": 0, "offer_banana": 0, "offer_cancel": 0, "hold": 1, "shove": 0} +# Notice that SHOVE includes both `hold` and `shove` parts. +SHOVE = {"move": 0, "turn": 0, "eat_apple": 0, "eat_banana": 0, "offer_apple": 0, "offer_banana": 0, "offer_cancel": 0, "hold": 1, "shove": 1} +PULL = {"move": 0, "turn": 0, "eat_apple": 0, "eat_banana": 0, "offer_apple": 0, "offer_banana": 0, "offer_cancel": 0, "hold": 1, "shove": -1} +# pyformat: enable +# pylint: enable=bad-whitespace + +offer_actions = [] +# Add the cancel action +cancel_action = {"move": 0, "turn": 0, "eat_apple": 0, "eat_banana": 0, + "offer_apple": 0, "offer_banana": 0, "offer_cancel": 1, + "hold": 0, "shove": 0} +offer_actions.append(cancel_action) + +for a in range(-MAX_OFFER_QUANTITY, MAX_OFFER_QUANTITY): + for b in range(-MAX_OFFER_QUANTITY, MAX_OFFER_QUANTITY): + offer_action = {"move": 0, "turn": 0, "eat_apple": 0, "eat_banana": 0, + "offer_apple": a, "offer_banana": b, "offer_cancel": 0, + "hold": 0, "shove": 0} + if a > 0 and b < 0: + offer_actions.append(offer_action) + elif a < 0 and b > 0: + offer_actions.append(offer_action) + +ACTION_SET = ( + NOOP, + FORWARD, + BACKWARD, + STEP_LEFT, + STEP_RIGHT, + TURN_LEFT, + TURN_RIGHT, + EAT_APPLE, + EAT_BANANA, + HOLD, + SHOVE, + PULL, + *offer_actions, +) + + +def create_prefabs() -> PrefabConfig: + """Returns the prefabs. + + Prefabs are a dictionary mapping names to template game objects that can + be cloned and placed in multiple locations accoring to an ascii map. + """ + prefabs = { + # wall prefabs + "nw_wall_corner": NW_WALL_CORNER, + "nw_inner_wall_corner": NW_INNER_WALL_CORNER, + "ne_wall_corner": NE_WALL_CORNER, + "ne_inner_wall_corner": NE_INNER_WALL_CORNER, + "se_wall_corner": SE_WALL_CORNER, + "sw_wall_corner": SW_WALL_CORNER, + "wall_north": WALL_NORTH, + "wall_east": WALL_EAST, + "wall_south": WALL_SOUTH, + "wall_west": WALL_WEST, + "wall_shadow_sw": WALL_SHADOW_SW, + "wall_shadow_s": WALL_SHADOW_S, + "wall_shadow_se": WALL_SHADOW_SE, + "stone_wall": STONE_WALL, + + # non-wall prefabs + "spawn_point": SPAWN_POINT, + "river": get_water(), + "ground": GROUND, + "potential_tree": get_potential_tree(), + "high_probability_tree": get_potential_tree( + probability_empty=0.1, + probability_apple=0.45, + probability_banana=0.45, + ), + } + return prefabs + + +def create_scene(): + """Create the scene object, a non-physical object to hold global logic.""" + scene = { + "name": "scene", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "scene", + "stateConfigs": [{ + "state": "scene", + }], + } + }, + { + "component": "Transform", + }, + { + "component": "TradeManager", + }, + ] + } + return scene + + +def _create_stamina_overlay(player_idx: int, + max_stamina_bar_states: int, + ) -> Generator[Dict[str, Any], None, None]: + """Create stamina marker overlay objects.""" + # Lua is 1-indexed. + lua_idx = player_idx + 1 + + stamina_bar_state_configs = [ + # Invisible inactive (dead) overlay type. + {"state": "staminaBarWait"}, + ] + stamina_bar_sprite_names = [] + stamina_bar_sprite_shapes = [] + + # Each player's stamina bars must be in their own layer so they do not + # interact/collide with other players' stamina bars. + stamina_bar_layer = f"superOverlay_{player_idx}" + + # Declare one state per level of the stamina bar. + for i in range(max_stamina_bar_states): + sprite_name = f"sprite_for_level_{i}" + stamina_bar_state_configs.append( + {"state": f"level_{i}", + "layer": stamina_bar_layer, + "sprite": sprite_name}) + stamina_bar_sprite_names.append(sprite_name) + xs = "\nxxxxxxxx" + blank_space = xs * 7 + number_of_rs = max(6 - i, 0) + number_of_ys = i if i < 7 else 12 - i + number_of_gs = max(i - 6, 0) + if i >= 13: + level = blank_space + xs + else: + level = (blank_space + "\nx" + "G" * number_of_gs + "Y" * number_of_ys + + "R" * number_of_rs + "x") + empty = "\n".join(["x" * 8] * 8) + # Replace the east/south/west sprites with invisible sprites so the only + # stamina bar rendered is the one in the direction that the current player + # is facing. + stamina_bar_sprite_shapes.append((level, empty, empty, empty)) + + # Create a stamina bar for each compass direction. Only the direction the + # current player is facing is visible. + for direction in ("N", "E", "S", "W"): + yield { + "name": "avatar_stamina_bar", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "staminaBarWait", + "stateConfigs": stamina_bar_state_configs + } + }, + { + "component": "Transform", + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": stamina_bar_sprite_names, + "spriteShapes": stamina_bar_sprite_shapes, + "palettes": [{"G": (62, 137, 72, 255), + "Y": (255, 216, 97, 255), + "R": (162, 38, 51, 255), + "x": INVISIBLE,}] * max_stamina_bar_states, + "noRotates": [True] * max_stamina_bar_states + } + }, + { + "component": "StaminaBar", + "kwargs": { + "playerIndex": lua_idx, + "waitState": "staminaBarWait", + "layer": stamina_bar_layer, + "direction": direction + } + }, + ] + } + + +def create_avatar_object(player_idx: int, + specialty: str, + max_stamina_bar_states: int) -> Dict[str, Any]: + """Create an avatar object.""" + # Lua is 1-indexed. + lua_index = player_idx + 1 + + source_sprite_self = "Avatar" + str(lua_index) + grappling_sprite = "AvatarGrappling" + str(lua_index) + grappled_sprite = "AvatarGrappled" + str(lua_index) + + live_state_name = "player{}".format(lua_index) + grappling_state_name = f"player{lua_index}_grappling" + grappled_state_name = f"player{lua_index}_grappled" + + map_specialty_to_sprite_color = { + "apple": (199, 55, 47), # apple red + "banana": (255, 225, 53), # banana yellow + } + avatar_color = map_specialty_to_sprite_color[specialty] + + avatar_palette = shapes.get_palette(avatar_color) + avatar_palette["P"] = (196, 77, 190, 200) + avatar_palette["p"] = (184, 72, 178, 150) + + map_specialty_to_complement = { + "apple": "banana", + "banana": "apple" + } + avatar_object = { + "name": "avatar", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": live_state_name, + "stateConfigs": [ + # Initial player state. + {"state": live_state_name, + "layer": "upperPhysical", + "sprite": source_sprite_self, + "contact": "avatar", + "groups": ["players"]}, + {"state": grappling_state_name, + "layer": "upperPhysical", + "sprite": grappling_sprite, + "contact": "avatar", + "groups": ["players"]}, + {"state": grappled_state_name, + "layer": "upperPhysical", + "sprite": grappled_sprite, + "contact": "avatar", + "groups": ["players"]}, + # Player wait type for times when they are zapped out. + {"state": "playerWait", + "groups": ["playerWaits"]}, + ] + } + }, + { + "component": "Transform", + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": [source_sprite_self, grappling_sprite, + grappled_sprite], + "spriteShapes": [shapes.CUTE_AVATAR, + shapes.CUTE_AVATAR_ARMS_UP, + shapes.MAGIC_GRAPPLED_AVATAR], + "palettes": [avatar_palette] * 3, + "noRotates": [True] * 3, + } + }, + { + "component": "Avatar", + "kwargs": { + "index": lua_index, + "aliveState": live_state_name, + "additionalLiveStates": [grappled_state_name, + grappling_state_name], + "waitState": "playerWait", + "spawnGroup": "spawnPoints", + "actionOrder": [ + # Basic movement actions + "move", + "turn", + # Trade actions + "eat_apple", + "eat_banana", + "offer_apple", + "offer_banana", + "offer_cancel", + # Grappling actions + "hold", + "shove", + ], + "actionSpec": { + # Basic movement actions + "move": {"default": 0, "min": 0, "max": len(_COMPASS)}, + "turn": {"default": 0, "min": -1, "max": 1}, + # Trade actions + "eat_apple": {"default": 0, "min": 0, "max": 1}, + "eat_banana": {"default": 0, "min": 0, "max": 1}, + "offer_apple": {"default": 0, "min": -MAX_OFFER_QUANTITY, + "max": MAX_OFFER_QUANTITY}, + "offer_banana": {"default": 0, "min": -MAX_OFFER_QUANTITY, + "max": MAX_OFFER_QUANTITY}, + "offer_cancel": {"default": 0, "min": 0, "max": 1}, + # Grappling actions + "hold": {"default": 0, "min": 0, "max": 1}, + "shove": {"default": 0, "min": -1, "max": 1}, + }, + "view": { + "left": 5, + "right": 5, + "forward": 9, + "backward": 1, + "centered": False + }, + } + }, + { + "component": "Inventory", + }, + { + "component": "Eating", + }, + { + "component": "Specialization", + "kwargs": { + "specialty": specialty, # "apple" or "banana" + "strongAmount": 2, + "weakAmount": 2, + "strongProbability": 1, + "weakProbability": 0.04, + } + }, + { + "component": "Trading", + "kwargs": { + "maxOfferQuantity": 3, # The highest possible offer. + "radius": 4, # Size of neighborhood where trade is possible. + } + }, + { + "component": "Taste", + "kwargs": { + "mostTastyFruit": map_specialty_to_complement[specialty], + "mostTastyReward": 8, + "defaultReward": 1, + } + }, + { + "component": "PeriodicNeed", # The hunger mechanic + "kwargs": { + # Hunger threshold reached after `delay` steps without eating. + "delay": 50, + # No reward cost of hunger exceeding threshold. + "reward": 0, + }, + }, + { + "component": "Grappling", + "kwargs": { + "shape": shapes.MAGIC_BEAM, + "palette": shapes.MAGIC_BEAM_PALETTE, + "liveState": live_state_name, + "grappledState": grappled_state_name, + "grapplingState": grappling_state_name, + } + }, + { + "component": "ReadyToShootObservation", + "kwargs": { + # In this case READY_TO_SHOOT will be 1 if hold is allowed and + # will be 0 if not. + "zapperComponent": "Grappling", + } + }, + { + "component": "Stamina", + "kwargs": { + "maxStamina": max_stamina_bar_states, + "classConfig": { + "name": "player", + "greenFreezeTime": 0, + "yellowFreezeTime": 2, + "redFreezeTime": 6, + # `decrementRate` = 0.5 means decrease stamina on every + # other costly step. `decrementRate` = 1 means decrease + # stamina on every costly step. + "decrementRate": 0.5, + }, + "amountInvisible": 6, + "amountGreen": 6, + "amountYellow": 6, + "amountRed": 1, + "costlyActions": ["move"], + } + }, + { + "component": "StaminaModulatedByNeed", + "kwargs": { + # Reduce stamina by `lossPerStepBeyondThreshold` per timestep + # after hunger exceeds its threshold. + "lossPerStepBeyondThreshold": 1, + } + }, + { + "component": "StaminaObservation", + "kwargs": { + "staminaComponent": "Stamina", + } + }, + { + "component": "InventoryObserver", + }, + { + "component": "MyOfferObserver", + }, + { + "component": "AllOffersObserver", + "kwargs": { + "flatten": True, + } + }, + { + "component": "HungerObserver", + "kwargs": { + "needComponent": "PeriodicNeed", + }, + }, + { + "component": "LocationObserver", + "kwargs": { + "objectIsAvatar": True, + "alsoReportOrientation": True + } + }, + ] + } + return avatar_object + + +def create_avatar_objects(roles: Sequence[str], + max_stamina_bar_states: int = 19): + """Returns list of avatar objects of length 'num_players'.""" + avatar_objects = [] + for player_idx, role in enumerate(roles): + if role == "default": + # If no role was passed then set even numbered players to be banana + # farmers and odd numbered players to be apple farmers. + if player_idx % 2 == 1: + specialty = "apple" + elif player_idx % 2 == 0: + specialty = "banana" + else: + if role == "apple_farmer": + specialty = "apple" + elif role == "banana_farmer": + specialty = "banana" + game_object = create_avatar_object(player_idx, + specialty, + max_stamina_bar_states - 1) + stamina_bar_objects = _create_stamina_overlay(player_idx, + max_stamina_bar_states) + avatar_objects.append(game_object) + avatar_objects.extend(stamina_bar_objects) + + return avatar_objects + + +def get_config(): + """Default configuration for the Fruit Market game.""" + config = configdict.ConfigDict() + + # Specify the number of players to particate in each episode (optional). + config.recommended_num_players = 16 + + # Action set configuration. + config.action_set = ACTION_SET + # Observation format configuration. + config.individual_observation_names = [ + "RGB", + "READY_TO_SHOOT", + "STAMINA", + "INVENTORY", + "MY_OFFER", + "OFFERS", + "HUNGER", + # Debug only (do not use the following observations in policies). + "POSITION", + "ORIENTATION", + ] + config.global_observation_names = [ + "WORLD.RGB", + ] + + return config + + +def build( + roles: Sequence[str], + config: configdict.ConfigDict, +) -> Mapping[str, Any]: + """Build this substrate given player roles.""" + substrate_definition = dict( + levelName="trade", + levelDirectory="meltingpot/lua/levels", + numPlayers=len(roles), + maxEpisodeLengthFrames=1000, + spriteSize=8, + topology="BOUNDED", # Choose from ["BOUNDED", "TORUS"], + simulation={ + "map": config.layout.ascii_map, + "gameObjects": create_avatar_objects(roles), + "prefabs": create_prefabs(), + "charPrefabMap": config.layout.char_prefab_map, + "scene": create_scene(), + }, + ) + return substrate_definition diff --git a/meltingpot/python/configs/substrates/fruit_market__concentric_rivers.py b/meltingpot/python/configs/substrates/fruit_market__concentric_rivers.py new file mode 100644 index 00000000..2cc4d3f1 --- /dev/null +++ b/meltingpot/python/configs/substrates/fruit_market__concentric_rivers.py @@ -0,0 +1,116 @@ +# Copyright 2022 DeepMind Technologies Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Configuration for the substrate: fruit_market_concentric_rivers. + +Example video: https://youtu.be/djmylRv1i_w + +This substrate has three concentric rings of water that confer a small stamina +cost to players who step on them. +""" + +from ml_collections import config_dict as configdict + +from meltingpot.python.configs.substrates import fruit_market as base_config +from meltingpot.python.utils.substrates import specs + +build = base_config.build + +ASCII_MAP = """ +xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx +x/___________________________+x +x'###########################`x +x!~~~~~~~~~~~~~~~~~~~~~~~~~~~|x +x!~~~~~~~~~~~~~~~~~~~~~~~~~~~|x +x!~~~LLLLLLLLLLLLLLLLLLLLL~~~|x +x!~~~L~~~~~~~~~~~~~~~~~~~L~~~|x +x!~~~L~~~~~~~~~~~~~~~~~~~L~~~|x +x!~~~L~~LLLLLLLLLLLLLLL~~L~~~|x +x!~~~L~~L~~~~~~~~~~~~~L~~L~~~|x +x!~~~L~~L~~~~~~~~~~~~~L~~L~~~|x +x!~~~L~~L~~LLLLLLLLL~~L~~L~~~|x +x!~~~L~~L~~LP~P~P~PL~~L~~L~~~|x +x!~~~L~~L~~L~P~P~P~L~~L~~L~~~|x +x!~~~L~~L~~L~~P~P~~L~~L~~L~~~|x +x!~~~L~~L~~L~P~P~P~L~~L~~L~~~|x +x!~~~L~~L~~L~~P~P~~L~~L~~L~~~|x +x!~~~L~~L~~L~P~P~P~L~~L~~L~~~|x +x!~~~L~~L~~LP~P~P~PL~~L~~L~~~|x +x!~~~L~~L~~LLLLLLLLL~~L~~L~~~|x +x!~~~L~~L~~~~~~~~~~~~~L~~L~~~|x +x!~~~L~~L~~~~~~~~~~~~~L~~L~~~|x +x!~~~L~~LLLLLLLLLLLLLLL~~L~~~|x +x!~~~L~~~~~~~~~~~~~~~~~~~L~~~|x +x!~~~L~~~~~~~~~~~~~~~~~~~L~~~|x +x!~~~LLLLLLLLLLLLLLLLLLLLL~~~|x +x!~~~~~~~~~~~~~~~~~~~~~~~~~~~|x +x!~~~~~~~~~~~~~~~~~~~~~~~~~~~|x +x!~~~~~~~~~~~~~~~~~~~~~~~~~~~|x +x(---------------------------)x +x<###########################>x +""" + +# Map a character to the prefab it represents in the ASCII map. +CHAR_PREFAB_MAP = { + # wall prefabs + "/": {"type": "all", "list": ["ground", "nw_wall_corner"]}, + "'": {"type": "all", "list": ["ground", "nw_inner_wall_corner"]}, + "+": {"type": "all", "list": ["ground", "ne_wall_corner"]}, + "`": {"type": "all", "list": ["ground", "ne_inner_wall_corner"]}, + ")": {"type": "all", "list": ["ground", "se_wall_corner"]}, + "(": {"type": "all", "list": ["ground", "sw_wall_corner"]}, + "_": {"type": "all", "list": ["ground", "wall_north"]}, + "|": {"type": "all", "list": ["ground", "wall_east"]}, + "-": {"type": "all", "list": ["ground", "wall_south"]}, + "!": {"type": "all", "list": ["ground", "wall_west"]}, + "#": {"type": "all", "list": ["ground", "wall_shadow_s"]}, + ">": {"type": "all", "list": ["ground", "wall_shadow_se"]}, + "<": {"type": "all", "list": ["ground", "wall_shadow_sw"]}, + + # non-wall prefabs + "L": "river", + "P": {"type": "all", "list": ["ground", "potential_tree", "spawn_point"]}, + "~": {"type": "all", "list": ["ground", "potential_tree"]}, + "x": "ground", +} + + +def get_config(): + """Configuration for this substrate.""" + config = base_config.get_config() + # Specify the number of players to particate in each episode (optional). + config.recommended_num_players = 16 + # Override the map layout settings. + config.layout = configdict.ConfigDict() + config.layout.ascii_map = ASCII_MAP + config.layout.char_prefab_map = CHAR_PREFAB_MAP + + # The specs of the environment (from a single-agent perspective). + config.action_spec = specs.action(len(base_config.ACTION_SET)) + config.timestep_spec = specs.timestep({ + "RGB": specs.OBSERVATION["RGB"], + "READY_TO_SHOOT": specs.OBSERVATION["READY_TO_SHOOT"], + "STAMINA": specs.float64(), + "INVENTORY": specs.int64(2), + "MY_OFFER": specs.int64(2), + "OFFERS": specs.int64(102), + "HUNGER": specs.float64(), + "POSITION": specs.OBSERVATION["POSITION"], + "ORIENTATION": specs.OBSERVATION["ORIENTATION"], + "WORLD.RGB": specs.rgb(248, 248,), + }) + + # The roles assigned to each player. + config.valid_roles = frozenset({"apple_farmer", "banana_farmer"}) + config.default_player_roles = ("apple_farmer",) * 8 + ("banana_farmer",) * 8 + return config diff --git a/meltingpot/python/configs/substrates/gift_refinements.py b/meltingpot/python/configs/substrates/gift_refinements.py new file mode 100644 index 00000000..16f61600 --- /dev/null +++ b/meltingpot/python/configs/substrates/gift_refinements.py @@ -0,0 +1,498 @@ +# Copyright 2022 DeepMind Technologies Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Configuration for gift_refinements. + +Example video: https://youtu.be/C1C2CJ__mhQ + +Tokens randomly spawn in empty spaces. When collected, they are put in the +player's inventory where they can be consumed by for reward. Alternatively, a +token can be refined into three higher refinement tokens and gifted to another +player. This is akin to tokens initially being a raw material, like chunks of +metal, and then being split and shaped into more useful goods for added value. +A token can only be refined a finite number of times, after which it cannot be +split again, nor refined further; although they can still be gifted. + +Gift Refinements is inspired by the Trust Game from behavioural economics where +the first player has an endowment and chooses how much to donate to a second +player who receives three times its value. Then the second player chooses how +much to give back to the first. + +In Gift Refinements, tokens can only be refined twice (i.e. there are three +types of tokens). The token that spawn are always of the rawest type, and the +only way to create more refined tokens is to gift them to another player. +Players also have a limited inventory capacity of 15 tokens for each token type. +The special gifting is implemented as a beam that the players can fire. If they +hit another player with the beam while their inventory is not full, they lose +one token of the rawest type they currently hold, and the hit player receives +either three token of the next refinement (if the token gifted wasn't already +at maximum refinement), or the token gifted (otherwise). + +The players have an action to consume tokens which takes all tokens of all types +currently in their inventory and converts them into reward. All tokens are worth +1 reward regardless of refinement level. + +The game is set up in such a way that there are several ways players can form +mutually beneficial interactions, but all of them require trust. For instance, +A pair of players might have one player pick up a token and immediately gift it +to the other one who receives three. Then the second player returns one token +which leaves them with three and two tokens respectively. If they both consume +after this, they both benefitted from the interaction. A more extreme case would +have them take one token and refine it maximally to produce 9 tokens that they +can split five and four with 10 roughly alternating gifting actions. +""" + +from collections.abc import Mapping, Sequence +from typing import Any + +from ml_collections import config_dict + +from meltingpot.python.utils.substrates import colors +from meltingpot.python.utils.substrates import shapes +from meltingpot.python.utils.substrates import specs + +NUM_TOKEN_TYPES = 3 +MAX_TOKENS_PER_TYPE = 15 + +ASCII_MAP = """ +WWWWWWWWWWWWWWWWWWWWWWWWWWW +WTTTTTTTTTTTTTTTTTTTTTTTTTW +WTPTTTTTTTTTPTTTTTPTTTTTPTW +WTTTTTTTTWTTTTTTTTTTTTTTTTW +WTTTTTTTTWTTTTTTTTTTWTTTTTW +WTTTTTTTTWTTTTTTTTTTWTTTTTW +WTTTTTTTTWWWWWWWTTTTWTTTPTW +WTPTWWTTTTWTTTTTTTTTWTTTTTW +WTTTTTTTTTWTTPTTTTTTTTTTTTW +WTTTTTTTTTWTTTTTWWWTTTTTTTW +WTTTTTTTTTWTTTTTTTTTTTTTTTW +WTTTTTTTTTTTTTTTTTTTTTTTPTW +WTPTTTWWWTTTTTTWWWWWWWWTTTW +WTTWWWWTTTTTTTTTTTTTTTTTTTW +WTTTTTWTTTTWTTTTTPTTTTTTTTW +WTTTTTWTTTTWTTTTTTTTTTTTPTW +WTTTTTWTTTTTWTTTTTTTTWTTTTW +WTTTTTTWTTTTTWWWWTTTTWTTTTW +WTPTTTTTWTTTTTTTTTTTTWTTTTW +WTTTTTTTTWTTTPTTTTTTTTTTPTW +WTTTTTTTTTWTTTTTTTTWTTTTTTW +WTTTTWTTTTTTTTTTTTTWTTTTTTW +WTTTTWTTTTTTTTTWWWWWWWWTTTW +WTTTTWTTTTTTTTTTTTWTTTTTTTW +WTPTTTTTTPTTTTTTTPTTTTTTPTW +WTTTTTTTTTTTTTTTTTTTTTTTTTW +WWWWWWWWWWWWWWWWWWWWWWWWWWW +""" + +# `prefab` determines which prefab game object to use for each `char` in the +# ascii map. +CHAR_PREFAB_MAP = { + "P": "spawn_point", + "W": "wall", + "T": "token", +} + +_COMPASS = ["N", "E", "S", "W"] + +# The Scene objece is a non-physical object, it components implement global +# logic. In this case, that includes holding the global berry counters to +# implement the regrowth rate, as well as some of the observations. +SCENE = { + "name": "scene", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "scene", + "stateConfigs": [{ + "state": "scene", + }], + } + }, + { + "component": "Transform", + }, + { + "component": "StochasticIntervalEpisodeEnding", + "kwargs": { + "minimumFramesPerEpisode": 1000, + "intervalLength": 100, # Set equal to unroll length. + "probabilityTerminationPerInterval": 0.2 + } + }, + ] +} + + +WALL = { + "name": "wall", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "wall", + "stateConfigs": [{ + "state": "wall", + "layer": "upperPhysical", + "sprite": "Wall", + }], + } + }, + { + "component": "Transform", + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["Wall",], + "spriteShapes": [shapes.WALL], + "palettes": [{"*": (95, 95, 95, 255), + "&": (100, 100, 100, 255), + "@": (109, 109, 109, 255), + "#": (152, 152, 152, 255)}], + "noRotates": [True] + } + }, + { + "component": "BeamBlocker", + "kwargs": { + "beamType": "gift" + } + }, + ] +} + +SPAWN_POINT = { + "name": "spawnPoint", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "spawnPoint", + "stateConfigs": [{ + "state": "spawnPoint", + "layer": "logic", + "groups": ["spawnPoints"] + }], + } + }, + { + "component": "Transform", + }, + ] +} + +TOKEN = { + "name": "token", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "tokenWait", + "stateConfigs": [ + {"state": "tokenWait", + "layer": "lowerPhysical", + "sprite": "coinWait", + "groups": []}, + {"state": "token", + "layer": "lowerPhysical", + "sprite": "coin", + "groups": ["tokens"]}, + ] + } + }, + { + "component": "Transform", + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["coin", "coinWait"], + "spriteShapes": [shapes.COIN, shapes.COIN], + "palettes": [ + shapes.COIN_PALETTE, shapes.INVISIBLE_PALETTE], + } + }, + { + "component": "Pickable", + "kwargs": { + "liveState": "token", + "waitState": "tokenWait", + "rewardForPicking": 0.0, + } + }, + { + "component": "FixedRateRegrow", + "kwargs": { + "liveState": "token", + "waitState": "tokenWait", + "regrowRate": 0.0002, + } + }, + ] +} + + +PLAYER_COLOR_PALETTES = [] +for human_readable_color in colors.human_readable: + PLAYER_COLOR_PALETTES.append(shapes.get_palette(human_readable_color)) + + +def get_avatar_object(num_players: int, player_index: int): + # Lua is 1-indexed. + lua_index = player_index + 1 + color_palette = PLAYER_COLOR_PALETTES[player_index] + avatar_sprite_name = "avatarSprite{}".format(lua_index) + return { + "name": "avatar", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "player", + "stateConfigs": [ + { + "state": "player", + "layer": "upperPhysical", + "sprite": avatar_sprite_name, + "contact": "avatar", + "groups": ["players"] + }, + { + "state": "playerWait", + "groups": ["playerWaits"] + }, + ] + } + }, + { + "component": "Transform", + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": [avatar_sprite_name], + "spriteShapes": [shapes.CUTE_AVATAR], + "palettes": [color_palette], + "noRotates": [True], + } + }, + { + "component": "Avatar", + "kwargs": { + "index": lua_index, + "aliveState": "player", + "waitState": "playerWait", + "spawnGroup": "spawnPoints", + "actionOrder": [ + "move", "turn", "refineAndGift", "consumeTokens" + ], + "actionSpec": { + "move": { + "default": 0, + "min": 0, + "max": len(_COMPASS) + }, + "turn": { + "default": 0, + "min": -1, + "max": 1 + }, + "refineAndGift": { + "default": 0, + "min": 0, + "max": 1 + }, + "consumeTokens": { + "default": 0, + "min": 0, + "max": 1 + }, + }, + "view": { + "left": 5, + "right": 5, + "forward": 9, + "backward": 1, + "centered": False + } + } + }, + { + "component": "Inventory", + "kwargs": { + "capacityPerType": MAX_TOKENS_PER_TYPE, + "numTokenTypes": NUM_TOKEN_TYPES, + } + }, + { + "component": "GiftBeam", + "kwargs": { + "cooldownTime": 3, + "beamLength": 5, + "beamRadius": 0, + "agentRole": "none", + "giftMultiplier": 5, + "successfulGiftReward": 10, + "roleRewardForGifting": { + "none": 0.0, + "gifter": 0.2, + "selfish": -2.0 + }, + } + }, + { + "component": "ReadyToShootObservation", + "kwargs": { + "zapperComponent": "GiftBeam", + }, + }, + { + "component": "AvatarMetricReporter", + "kwargs": { + "metrics": [ + { + "name": "INVENTORY", + "type": "tensor.DoubleTensor", + "shape": [NUM_TOKEN_TYPES], + "component": "Inventory", + "variable": "inventory" + }, + ] + } + }, + { + "component": "TokenTracker", + "kwargs": { + "numPlayers": num_players, + "numTokenTypes": NUM_TOKEN_TYPES, + } + }, + ] + } + + +# PREFABS is a dictionary mapping names to template game objects that can +# be cloned and placed in multiple locations accoring to an ascii map. +PREFABS = { + "wall": WALL, + "spawn_point": SPAWN_POINT, + "token": TOKEN, +} + + +def get_avatar_objects(num_players: int): + return [get_avatar_object(num_players, i) for i in range(num_players)] + + +# Primitive action components. +# pylint: disable=bad-whitespace +# pyformat: disable +NOOP = { + "move": 0, "turn": 0, "refineAndGift": 0, "consumeTokens": 0} +FORWARD = { + "move": 1, "turn": 0, "refineAndGift": 0, "consumeTokens": 0} +STEP_RIGHT = { + "move": 2, "turn": 0, "refineAndGift": 0, "consumeTokens": 0} +BACKWARD = { + "move": 3, "turn": 0, "refineAndGift": 0, "consumeTokens": 0} +STEP_LEFT = { + "move": 4, "turn": 0, "refineAndGift": 0, "consumeTokens": 0} +TURN_LEFT = { + "move": 0, "turn": -1, "refineAndGift": 0, "consumeTokens": 0} +TURN_RIGHT = { + "move": 0, "turn": 1, "refineAndGift": 0, "consumeTokens": 0} +REFINE_AND_GIFT = { + "move": 0, "turn": 0, "refineAndGift": 1, "consumeTokens": 0} +CONSUME_TOKENS = { + "move": 0, "turn": 0, "refineAndGift": 0, "consumeTokens": 1} +# pyformat: enable +# pylint: enable=bad-whitespace + +ACTION_SET = ( + NOOP, + FORWARD, + BACKWARD, + STEP_LEFT, + STEP_RIGHT, + TURN_LEFT, + TURN_RIGHT, + REFINE_AND_GIFT, + CONSUME_TOKENS, +) + + +def get_config(): + """Default configuration for the gift_refinements level.""" + config = config_dict.ConfigDict() + + # Action set configuration. + config.action_set = ACTION_SET + # Observation format configuration. + config.individual_observation_names = [ + "RGB", + "READY_TO_SHOOT", + "INVENTORY", + # Debug only (do not use the following observations in policies). + "POSITION", + "ORIENTATION", + ] + config.global_observation_names = [ + "WORLD.RGB", + ] + + # The specs of the environment (from a single-agent perspective). + config.action_spec = specs.action(len(ACTION_SET)) + config.timestep_spec = specs.timestep({ + "RGB": specs.OBSERVATION["RGB"], + "READY_TO_SHOOT": specs.OBSERVATION["READY_TO_SHOOT"], + "INVENTORY": specs.inventory(3), + # Debug only (do not use the following observations in policies). + "WORLD.RGB": specs.rgb(216, 216), + }) + + # The roles assigned to each player. + config.valid_roles = frozenset({"default", "target"}) + config.default_player_roles = ("default",) * 6 + + return config + + +def build( + roles: Sequence[str], + config: config_dict.ConfigDict, +) -> Mapping[str, Any]: + """Build substrate given player roles.""" + del config + num_players = len(roles) + # Build the rest of the substrate definition. + substrate_definition = dict( + levelName="gift_refinements", + levelDirectory="meltingpot/lua/levels", + numPlayers=num_players, + # Define upper bound of episode length since episodes end stochastically. + maxEpisodeLengthFrames=5000, + spriteSize=8, + topology="BOUNDED", # Choose from ["BOUNDED", "TORUS"], + simulation={ + "map": ASCII_MAP, + "gameObjects": get_avatar_objects(num_players), + "scene": SCENE, + "prefabs": PREFABS, + "charPrefabMap": CHAR_PREFAB_MAP, + }, + ) + return substrate_definition diff --git a/meltingpot/python/configs/substrates/capture_the_flag.py b/meltingpot/python/configs/substrates/paintball__capture_the_flag.py similarity index 93% rename from meltingpot/python/configs/substrates/capture_the_flag.py rename to meltingpot/python/configs/substrates/paintball__capture_the_flag.py index b086b300..48708a2f 100644 --- a/meltingpot/python/configs/substrates/capture_the_flag.py +++ b/meltingpot/python/configs/substrates/paintball__capture_the_flag.py @@ -1,4 +1,4 @@ -# Copyright 2020 DeepMind Technologies Limited. +# Copyright 2022 DeepMind Technologies Limited. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,7 +13,7 @@ # limitations under the License. """Configuration for Capture the Flag. -Example video: https://youtu.be/VRNt55-0IqE +Example video: https://youtu.be/ECzevYpi1dM This substrate a team based zero sum game. There are four players on each team. @@ -44,7 +44,7 @@ flags are in their respective home bases. """ -from typing import Any, Dict, Optional +from typing import Any, Dict, Mapping, Optional, Sequence from ml_collections import config_dict import numpy as np @@ -54,7 +54,7 @@ _COMPASS = ["N", "E", "S", "W"] -DEFAULT_ASCII_MAP = """ +ASCII_MAP = """ IIIIIIIIIIIIIIIIIIIIIII IWWWWWWWWWWWWWWWWWWWWWI IWPPP,PPPP,F,PPPP,PPPWI @@ -504,7 +504,8 @@ def create_flag_prefab(team: str): STEP_LEFT = {"move": 4, "turn": 0, "fireZap": 0} TURN_LEFT = {"move": 0, "turn": -1, "fireZap": 0} TURN_RIGHT = {"move": 0, "turn": 1, "fireZap": 0} -FIRE_ZAP = {"move": 0, "turn": 0, "fireZap": 1} +FIRE_ZAP_A = {"move": 0, "turn": 0, "fireZap": 1} +FIRE_ZAP_B = {"move": 0, "turn": 0, "fireZap": 2} # pyformat: enable # pylint: enable=bad-whitespace @@ -516,7 +517,8 @@ def create_flag_prefab(team: str): STEP_RIGHT, TURN_LEFT, TURN_RIGHT, - FIRE_ZAP, + FIRE_ZAP_A, # a short-range beam with a wide area of effect + FIRE_ZAP_B, # a longer range beam with a thin area of effect ) @@ -634,7 +636,7 @@ def create_avatar_object( "actionSpec": { "move": {"default": 0, "min": 0, "max": len(_COMPASS)}, "turn": {"default": 0, "min": -1, "max": 1}, - "fireZap": {"default": 0, "min": 0, "max": 1}, + "fireZap": {"default": 0, "min": 0, "max": 2}, }, "view": { "left": 5, @@ -657,6 +659,9 @@ def create_avatar_object( "cooldownTime": 2, "beamLength": 3, "beamRadius": 1, + "secondaryBeamCooldownTime": 4, + "secondaryBeamLength": 6, + "secondaryBeamRadius": 0, "aliveStates": ["health1", "health2", "health3"], } }, @@ -678,6 +683,7 @@ def create_avatar_object( "maxHealthOnGround": 2, "maxHealthOnOwnColor": 3, "maxHealthOnEnemyColor": 1, + "groundLayer": "alternateLogic", } }, { @@ -748,37 +754,16 @@ def create_avatar_objects(num_players, return avatar_objects -def create_lab2d_settings( - num_players: int, - avatar_taste_kwargs: Optional[Any] = None, - fixed_teams: Optional[bool] = False) -> Dict[str, Any]: - """Returns the lab2d settings.""" - lab2d_settings = { - "levelName": "paintball_capture_the_flag", - "levelDirectory": "meltingpot/lua/levels", - "numPlayers": num_players, - "maxEpisodeLengthFrames": 1000, - "spriteSize": 8, - "topology": "BOUNDED", # Choose from ["BOUNDED", "TORUS"], - "simulation": { - "map": DEFAULT_ASCII_MAP, - "gameObjects": create_avatar_objects(num_players, - taste_kwargs=avatar_taste_kwargs, - fixed_teams=fixed_teams), - "scene": create_scene(), - "prefabs": PREFABS, - "charPrefabMap": CHAR_PREFAB_MAP, - }, - } - return lab2d_settings - - -def get_config(factory=create_lab2d_settings): - """Default configuration for training on the capture_the_flag level.""" +def get_config(): + """Default configuration.""" config = config_dict.ConfigDict() - config.num_players = 8 - config.lab2d_settings = factory(config.num_players) + # If shaping_kwargs are None then use the default reward structure in which + # the only positive rewards are those that are delivered when your team + # captures its opposing team's flag and the only negative rewards are those + # delivered when the opposing team captures your team's flag. The default + # reward structure is zero sum. + config.shaping_kwargs = None # Action set configuration. config.action_set = ACTION_SET @@ -786,6 +771,7 @@ def get_config(factory=create_lab2d_settings): config.individual_observation_names = [ "RGB", "READY_TO_SHOOT", + # Debug only (do not use the following observations in policies). "POSITION", "ORIENTATION", ] @@ -798,9 +784,39 @@ def get_config(factory=create_lab2d_settings): config.timestep_spec = specs.timestep({ "RGB": specs.OBSERVATION["RGB"], "READY_TO_SHOOT": specs.OBSERVATION["READY_TO_SHOOT"], + # Debug only (do not use the following observations in policies). "POSITION": specs.OBSERVATION["POSITION"], "ORIENTATION": specs.OBSERVATION["ORIENTATION"], "WORLD.RGB": specs.rgb(184, 184), }) + # The roles assigned to each player. + config.valid_roles = frozenset({"default"}) + config.default_player_roles = ("default",) * 8 + return config + + +def build( + roles: Sequence[str], + config: config_dict.ConfigDict, +) -> Mapping[str, Any]: + """Build substrate definition given player roles.""" + num_players = len(roles) + substrate_definition = dict( + levelName="paintball__capture_the_flag", + levelDirectory="meltingpot/lua/levels", + numPlayers=num_players, + maxEpisodeLengthFrames=1000, + spriteSize=8, + topology="BOUNDED", # Choose from ["BOUNDED", "TORUS"], + simulation={ + "map": ASCII_MAP, + "gameObjects": create_avatar_objects( + num_players, taste_kwargs=config.shaping_kwargs), + "scene": create_scene(), + "prefabs": PREFABS, + "charPrefabMap": CHAR_PREFAB_MAP, + }, + ) + return substrate_definition diff --git a/meltingpot/python/configs/substrates/king_of_the_hill.py b/meltingpot/python/configs/substrates/paintball__king_of_the_hill.py similarity index 81% rename from meltingpot/python/configs/substrates/king_of_the_hill.py rename to meltingpot/python/configs/substrates/paintball__king_of_the_hill.py index ef836cba..6bf657ee 100644 --- a/meltingpot/python/configs/substrates/king_of_the_hill.py +++ b/meltingpot/python/configs/substrates/paintball__king_of_the_hill.py @@ -1,4 +1,4 @@ -# Copyright 2020 DeepMind Technologies Limited. +# Copyright 2022 DeepMind Technologies Limited. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,7 +13,7 @@ # limitations under the License. """Configuration for King of the Hill. -Example video: https://youtu.be/DmO2uqGBPco +Example video: https://youtu.be/VVAfeObAZzI See _Capture the Flag_ for the description of the painting, zapping, and movement mechanics, which also operate in this substrate. @@ -26,7 +26,7 @@ is in control. Purple indicator tiles mean no team is in control. """ -from typing import Any, Dict, Optional +from typing import Any, Dict, Mapping, Optional, Sequence from ml_collections import config_dict import numpy as np @@ -36,23 +36,23 @@ _COMPASS = ["N", "E", "S", "W"] -DEFAULT_ASCII_MAP = """ +ASCII_MAP = """ IIIIIIIIIIIIIIIIIIIIIII IWWWWWWWWWWWWWWWWWWWWWI IWPPP,PPPP,P,PPPP,PPPWI IWPPP,,PP,,,,,PP,,PPPWI IWPPP,,,,,,,,,,,,,PPPWI -IW,,,WW,,,,,,,,,WW,,,WI +IWP,,WW,,,,,,,,,WW,,PWI IW,,,WWDWWWDWWW,WW,,,WI -IW,,,,,,,,,,,,,,D,,,,WI -IW,,,,W,GGGGGGG,W,,,,WI -IWHWWHW,GGGGGGG,WHWWHWI -IWHWWHW,GGGGGGG,WHWWHWI -IW,,,,D,GGGIGGG,D,,,,WI -IWHWWHW,GGGGGGG,WHWWHWI -IWHWWHW,GGGGGGG,WHWWHWI -IW,,,,W,GGGGGGG,W,,,,WI -IW,,,,D,,,,,,,,,,,,,,WI +IW,,,,,,uuuuuuu,D,,,,WI +IW,,,,WlGGGGGGGrW,,,,WI +IWHWWHWlGGGGGGGrWHWWHWI +IWHWWHWlGGGGGGGrWHWWHWI +IW,,,,DlGGGIGGGrD,,,,WI +IWHWWHWlGGGGGGGrWHWWHWI +IWHWWHWlGGGGGGGrWHWWHWI +IW,,,,WlGGGGGGGrW,,,,WI +IW,,,,D,ddddddd,,,,,,WI IW,,,WW,WWWDWWWDWW,,,WI IWQ,,WW,,,,,,,,,WW,,QWI IWQQQ,,,,,,,,,,,,,QQQWI @@ -75,6 +75,12 @@ "G": "hill", ",": "ground", "I": {"type": "all", "list": ["indicator", "indicator_frame"]}, + + # Lines marking the edge of the hill. + "u": {"type": "all", "list": ["ground", "line_north"]}, + "r": {"type": "all", "list": ["ground", "line_west"]}, + "d": {"type": "all", "list": ["ground", "line_south"]}, + "l": {"type": "all", "list": ["ground", "line_east"]}, } RED_COLOR = (225, 55, 85, 255) @@ -87,6 +93,29 @@ PURPLE_COLOR = (107, 63, 160, 255) +LINE_NORTH = """ +xxxxxxxx +xxxxxxxx +xxxxxxxx +xxxxxxxx +xxxxxxxx +xxxxxxxx +xxxxxxxx +oooooooo +""" +LINE_SOUTH = shapes.flip_vertical(LINE_NORTH) +LINE_EAST = """ +xxxxxxxo +xxxxxxxo +xxxxxxxo +xxxxxxxo +xxxxxxxo +xxxxxxxo +xxxxxxxo +xxxxxxxo +""" +LINE_WEST = shapes.flip_horizontal(LINE_EAST) + def multiply_tuple(color_tuple, factor): alpha = color_tuple[3] @@ -134,6 +163,54 @@ def multiply_tuple(color_tuple, factor): ] } + +def get_marking_line(orientation: str): + """Return a line prefab to trace out the area of the hill.""" + if orientation == "N": + shape = LINE_NORTH + elif orientation == "E": + shape = LINE_EAST + elif orientation == "S": + shape = LINE_SOUTH + elif orientation == "W": + shape = LINE_WEST + else: + raise ValueError(f"Unrecognized orientation: {orientation}") + + line_name = f"line_{orientation}" + prefab = { + "name": line_name, + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": line_name, + "stateConfigs": [{ + "state": line_name, + "layer": "lowerPhysical", + "sprite": line_name, + }], + } + }, + { + "component": "Transform", + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": [line_name,], + "spriteShapes": [shape], + "palettes": [{"x": (0, 0, 0, 0), + "o": (75, 75, 75, 120)}], + "noRotates": [False] + } + }, + ] + } + return prefab + + INDICATOR_FRAME = { "name": "indicator_frame", "components": [ @@ -213,9 +290,18 @@ def create_ground_prefab(is_hill=False): if is_hill: sprite_names = ["RedHill", "BlueHill"] sprite_colors = [DARKER_RED_COLOR, DARKER_BLUE_COLOR] + groups = ["grounds", "hills"] + clean_groups = ["hill_clean",] + red_groups = ["hill_red",] + blue_groups = ["hill_blue",] else: sprite_names = ["RedGround", "BlueGround"] sprite_colors = [DARKEST_RED_COLOR, DARKEST_BLUE_COLOR] + groups = ["grounds",] + clean_groups = [] + red_groups = [] + blue_groups = [] + prefab = { "name": "ground", "components": [ @@ -227,16 +313,19 @@ def create_ground_prefab(is_hill=False): { "state": "clean", "layer": "alternateLogic", + "groups": groups + clean_groups, }, { "state": "red", "layer": "alternateLogic", "sprite": sprite_names[0], + "groups": groups + red_groups, }, { "state": "blue", "layer": "alternateLogic", "sprite": sprite_names[1], + "groups": groups + blue_groups, }, ] } @@ -375,6 +464,10 @@ def create_spawn_point_prefab(team): "ground": create_ground_prefab(is_hill=False), "indicator": INDICATOR, "indicator_frame": INDICATOR_FRAME, + "line_north": get_marking_line("N"), + "line_east": get_marking_line("E"), + "line_south": get_marking_line("S"), + "line_west": get_marking_line("W"), } # Primitive action components. @@ -387,7 +480,8 @@ def create_spawn_point_prefab(team): STEP_LEFT = {"move": 4, "turn": 0, "fireZap": 0} TURN_LEFT = {"move": 0, "turn": -1, "fireZap": 0} TURN_RIGHT = {"move": 0, "turn": 1, "fireZap": 0} -FIRE_ZAP = {"move": 0, "turn": 0, "fireZap": 1} +FIRE_ZAP_A = {"move": 0, "turn": 0, "fireZap": 1} +FIRE_ZAP_B = {"move": 0, "turn": 0, "fireZap": 2} # pyformat: enable # pylint: enable=bad-whitespace @@ -399,7 +493,8 @@ def create_spawn_point_prefab(team): STEP_RIGHT, TURN_LEFT, TURN_RIGHT, - FIRE_ZAP, + FIRE_ZAP_A, # a short-range beam with a wide area of effect + FIRE_ZAP_B, # a longer range beam with a thin area of effect ) @@ -450,8 +545,8 @@ def create_avatar_object( health3_color_palette = shapes.get_palette(multiply_tuple(team_color, 1.75)) taste_kwargs = { - # choose from: - # "none", "control_hill", "paint_hill", "zap_while_in_control" + # select `mode` from: + # ("none", "control_hill", "paint_hill", "zap_while_in_control") "mode": "none", "rewardAmount": 0.0, "zeroMainReward": False, @@ -512,6 +607,7 @@ def create_avatar_object( "kwargs": { "index": lua_index, "aliveState": "health2", + "additionalLiveStates": ["health1", "health3"], "waitState": "playerWait", "spawnGroup": TEAMS_DATA[team]["spawn_group"], "actionOrder": ["move", @@ -520,7 +616,7 @@ def create_avatar_object( "actionSpec": { "move": {"default": 0, "min": 0, "max": len(_COMPASS)}, "turn": {"default": 0, "min": -1, "max": 1}, - "fireZap": {"default": 0, "min": 0, "max": 1}, + "fireZap": {"default": 0, "min": 0, "max": 2}, }, "view": { "left": 5, @@ -543,6 +639,9 @@ def create_avatar_object( "cooldownTime": 2, "beamLength": 3, "beamRadius": 1, + "secondaryBeamCooldownTime": 4, + "secondaryBeamLength": 6, + "secondaryBeamRadius": 0, "aliveStates": ["health1", "health2", "health3"], } }, @@ -564,6 +663,7 @@ def create_avatar_object( "maxHealthOnGround": 2, "maxHealthOnOwnColor": 3, "maxHealthOnEnemyColor": 1, + "groundLayer": "alternateLogic", } }, { @@ -634,37 +734,15 @@ def create_avatar_objects(num_players, return avatar_objects -def create_lab2d_settings( - num_players: int, - avatar_taste_kwargs: Optional[Any] = None, - fixed_teams: Optional[bool] = False) -> Dict[str, Any]: - """Returns the lab2d settings.""" - lab2d_settings = { - "levelName": "paintball_king_of_the_hill", - "levelDirectory": "meltingpot/lua/levels", - "numPlayers": num_players, - "maxEpisodeLengthFrames": 1000, - "spriteSize": 8, - "topology": "BOUNDED", # Choose from ["BOUNDED", "TORUS"], - "simulation": { - "map": DEFAULT_ASCII_MAP, - "gameObjects": create_avatar_objects(num_players, - taste_kwargs=avatar_taste_kwargs, - fixed_teams=fixed_teams), - "scene": create_scene(), - "prefabs": PREFABS, - "charPrefabMap": CHAR_PREFAB_MAP, - }, - } - return lab2d_settings - - -def get_config(factory=create_lab2d_settings): - """Default configuration for training on the king_of_the_hill level.""" +def get_config(): + """Default configuration.""" config = config_dict.ConfigDict() - config.num_players = 8 - config.lab2d_settings = factory(config.num_players) + # If shaping_kwargs are None then use the default reward structure in which + # all positive rewards come from your team being in control of the hill and + # all negative rewwards come from the opposing team being in control of the + # hill. The default reward structure is zero sum. + config.shaping_kwargs = None # Action set configuration. config.action_set = ACTION_SET @@ -672,6 +750,7 @@ def get_config(factory=create_lab2d_settings): config.individual_observation_names = [ "RGB", "READY_TO_SHOOT", + # Debug only (do not use the following observations in policies). "POSITION", "ORIENTATION", ] @@ -684,9 +763,39 @@ def get_config(factory=create_lab2d_settings): config.timestep_spec = specs.timestep({ "RGB": specs.OBSERVATION["RGB"], "READY_TO_SHOOT": specs.OBSERVATION["READY_TO_SHOOT"], + # Debug only (do not use the following observations in policies). "POSITION": specs.OBSERVATION["POSITION"], "ORIENTATION": specs.OBSERVATION["ORIENTATION"], "WORLD.RGB": specs.rgb(184, 184), }) + # The roles assigned to each player. + config.valid_roles = frozenset({"default"}) + config.default_player_roles = ("default",) * 8 + return config + + +def build( + roles: Sequence[str], + config: config_dict.ConfigDict, +) -> Mapping[str, Any]: + """Build substrate definition given player roles.""" + num_players = len(roles) + substrate_definition = dict( + levelName="paintball__king_of_the_hill", + levelDirectory="meltingpot/lua/levels", + numPlayers=num_players, + maxEpisodeLengthFrames=1000, + spriteSize=8, + topology="BOUNDED", # Choose from ["BOUNDED", "TORUS"], + simulation={ + "map": ASCII_MAP, + "gameObjects": create_avatar_objects( + num_players, taste_kwargs=config.shaping_kwargs), + "scene": create_scene(), + "prefabs": PREFABS, + "charPrefabMap": CHAR_PREFAB_MAP, + }, + ) + return substrate_definition diff --git a/meltingpot/python/configs/substrates/predator_prey.py b/meltingpot/python/configs/substrates/predator_prey.py new file mode 100644 index 00000000..7c07d098 --- /dev/null +++ b/meltingpot/python/configs/substrates/predator_prey.py @@ -0,0 +1,1718 @@ +# Copyright 2022 DeepMind Technologies Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Common configuration for all predator_prey__* substrates. + +There are two roles: predators and prey. The prey try to eat apples and acorns. +The predators try to eat prey. + +Apples are worth 1 reward and can be eaten immediately. Acorns are worth 18 +reward but they take a long time to eat. It is not possible to move while eating +so a prey player is especially vulnerable while they eat it. + +Predators can also eat other predators, though they get no reward for doing so. +However, a predator might eat another predator anyway in order to remove a +competitor who might otherwise eat its prey. + +When prey travel together in tight groups they can defend themselves from being +eaten by predators. When a predator tries to eat its prey then all other prey +who are not currently eating an acorn within a radius of 3 are counted. If there +are more prey than predators within the radius then the predator cannot eat the +prey. + +So prey are safer in groups. However, they are also tempted to depart from their +group and strike out alone since that way they are more likely to be the one to +come first to any food they find. + +Both predators and prey have limited stamina. They can only move at top speed +for a limited number of consecutive steps, after which they must slow down. +Stamina is visible to all with a colored bar above each player's head. If the +bar over a particular player's head is invisible or green then they can move at +top speed. If it is red then they have depleted their stamina and can only move +slowly until they let it recharge. Stamina is recharged by standing still for +some number of consecutive time steps, how many depends on how much stamina was +depleted. Predators have a faster top speed than prey but they tire more +quickly. + +Prey but not predators can cross tall grass (green grassy locations). Prey must +still be careful on grass though since predators can still reach one cell over +the border to eat prey on the edge of safety. + +Both predators and prey respawn 200 steps after being eaten. +""" + +from typing import Any, Dict, Generator, Mapping, Sequence + +from ml_collections import config_dict as configdict + +from meltingpot.python.utils.substrates import colors +from meltingpot.python.utils.substrates import shapes +from meltingpot.python.utils.substrates import specs + +_COMPASS = ("N", "E", "S", "W") +ITEMS = ("empty", "acorn") +INVISIBLE = (0, 0, 0, 0) + +SPRITES = {} +PALETTES = {} + +SPRITES["empty"] = """ +xxxxxxxx +xxxxxxxx +xxxxxxxx +xxxxxxxx +xxxxxxxx +xxxxxxxx +xxxxxxxx +xxxxxxxx +""" + +PALETTES["empty"] = {"x": INVISIBLE,} + +SPRITES["acorn"] = """ +xxxxxxxx +xxxxxxxx +xxxxxxxx +xxxxxxxx +xxxxxxxx +xxxxxxBB +xxxxxxbb +xxxxxxxx +""" + +PALETTES["acorn"] = { + "x": INVISIBLE, + "B": [158, 85, 25, 255], + "b": [92, 29, 19, 255], +} + +PREDATOR_EAT_SPRITE = """ +*x*x*x*x +*x*x*x** +x***&x** +xx*&xx** +xx*&xx*& +xx*&xx*x +xx&&xx&x +xxxxxxxx +""" + +APPLE_SPRITE = """ +xxxxxxxx +xxo|*xxx +x*#|**xx +x#***#xx +x#####xx +xx###xxx +xxxxxxxx +xxxxxxxx +""" + + +def create_inventory(player_index: int): + """Return prefab for the inventory of the player at index `player_index`.""" + lua_idx = player_index + 1 + prefab = { + "name": "inventory", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "wait", + "stateConfigs": ( + [{"state": "wait"}] + + [{"state": item, "sprite": item, "layer": "overlay"} + for item in ITEMS]), + } + }, + { + "component": "Transform", + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ITEMS, + "spriteShapes": [SPRITES[item] for item in ITEMS], + "palettes": [PALETTES[item] for item in ITEMS], + "noRotates": [False] + } + }, + { + "component": "AvatarConnector", + "kwargs": { + "playerIndex": lua_idx, + "aliveState": "empty", + "waitState": "wait" + } + }, + { + "component": "Inventory", + "kwargs": { + "playerIndex": lua_idx, + } + }, + ] + } + return prefab + + +def create_base_prefab(name, layer="upperPhysical"): + """Returns a base prefab with a given name on the given layer.""" + return { + "name": f"{name}", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": f"{name}", + "stateConfigs": [{ + "state": f"{name}", + "layer": layer, + "sprite": f"{name}", + }], + } + }, + { + "component": "Transform", + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": [name], + "spriteShapes": [SPRITES[name]], + "palettes": [PALETTES[name]], + "noRotates": [True] + } + }] + } + +NW_WALL_CORNER = { + "name": "nw_wall_corner", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "nw_wall_corner", + "stateConfigs": [{ + "state": "nw_wall_corner", + "layer": "upperPhysical", + "sprite": "NwWallCorner", + }], + } + }, + {"component": "Transform"}, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["NwWallCorner"], + "spriteShapes": [shapes.BRICK_WALL_NW_CORNER], + "palettes": [shapes.BRICK_WALL_PALETTE], + "noRotates": [False] + } + }, + ] +} + +NE_WALL_CORNER = { + "name": "ne_wall_corner", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "ne_wall_corner", + "stateConfigs": [{ + "state": "ne_wall_corner", + "layer": "upperPhysical", + "sprite": "NeWallCorner", + }], + } + }, + {"component": "Transform"}, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["NeWallCorner"], + "spriteShapes": [shapes.BRICK_WALL_NE_CORNER], + "palettes": [shapes.BRICK_WALL_PALETTE], + "noRotates": [False] + } + }, + ] +} + +SE_WALL_CORNER = { + "name": "se_wall_corner", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "se_wall_corner", + "stateConfigs": [{ + "state": "se_wall_corner", + "layer": "upperPhysical", + "sprite": "SeWallCorner", + }], + } + }, + {"component": "Transform"}, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["SeWallCorner"], + "spriteShapes": [shapes.BRICK_WALL_SE_CORNER], + "palettes": [shapes.BRICK_WALL_PALETTE], + "noRotates": [False] + } + }, + ] +} + +SW_WALL_CORNER = { + "name": "sw_wall_corner", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "sw_wall_corner", + "stateConfigs": [{ + "state": "sw_wall_corner", + "layer": "upperPhysical", + "sprite": "SwWallCorner", + }], + } + }, + {"component": "Transform"}, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["SwWallCorner"], + "spriteShapes": [shapes.BRICK_WALL_SW_CORNER], + "palettes": [shapes.BRICK_WALL_PALETTE], + "noRotates": [False] + } + }, + ] +} + +NW_INNER_WALL_CORNER = { + "name": "nw_inner_wall_corner", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "nw_inner_wall_corner", + "stateConfigs": [{ + "state": "nw_inner_wall_corner", + "layer": "upperPhysical", + "sprite": "NwInnerWallCorner", + }], + } + }, + {"component": "Transform"}, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["NwInnerWallCorner"], + "spriteShapes": [shapes.BRICK_WALL_INNER_NW_CORNER], + "palettes": [shapes.BRICK_WALL_PALETTE], + "noRotates": [False] + } + }, + ] +} + +NE_INNER_WALL_CORNER = { + "name": "ne_inner_wall_corner", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "ne_inner_wall_corner", + "stateConfigs": [{ + "state": "ne_inner_wall_corner", + "layer": "upperPhysical", + "sprite": "NeInnerWallCorner", + }], + } + }, + {"component": "Transform"}, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["NeInnerWallCorner"], + "spriteShapes": [shapes.BRICK_WALL_INNER_NE_CORNER], + "palettes": [shapes.BRICK_WALL_PALETTE], + "noRotates": [False] + } + }, + ] +} + +SE_INNER_WALL_CORNER = { + "name": "se_inner_wall_corner", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "se_inner_wall_corner", + "stateConfigs": [{ + "state": "se_inner_wall_corner", + "layer": "upperPhysical", + "sprite": "SeInnerWallCorner", + }], + } + }, + {"component": "Transform"}, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["SeInnerWallCorner"], + "spriteShapes": [shapes.BRICK_WALL_INNER_SE_CORNER], + "palettes": [shapes.BRICK_WALL_PALETTE], + "noRotates": [False] + } + }, + ] +} + +SW_INNER_WALL_CORNER = { + "name": "sw_inner_wall_corner", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "sw_inner_wall_corner", + "stateConfigs": [{ + "state": "sw_inner_wall_corner", + "layer": "upperPhysical", + "sprite": "SwInnerWallCorner", + }], + } + }, + {"component": "Transform"}, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["SwInnerWallCorner"], + "spriteShapes": [shapes.BRICK_WALL_INNER_SW_CORNER], + "palettes": [shapes.BRICK_WALL_PALETTE], + "noRotates": [False] + } + }, + ] +} + +WALL_NORTH = { + "name": "wall_north", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "wall_north", + "stateConfigs": [{ + "state": "wall_north", + "layer": "upperPhysical", + "sprite": "WallNorth", + }], + } + }, + {"component": "Transform"}, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["WallNorth"], + "spriteShapes": [shapes.BRICK_WALL_NORTH], + "palettes": [shapes.BRICK_WALL_PALETTE], + "noRotates": [False] + } + }, + ] +} + +WALL_EAST = { + "name": "wall_east", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "wall_east", + "stateConfigs": [{ + "state": "wall_east", + "layer": "upperPhysical", + "sprite": "WallEast", + }], + } + }, + {"component": "Transform"}, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["WallEast"], + "spriteShapes": [shapes.BRICK_WALL_EAST], + "palettes": [shapes.BRICK_WALL_PALETTE], + "noRotates": [False] + } + }, + ] +} + +WALL_SOUTH = { + "name": "wall_south", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "wall_south", + "stateConfigs": [{ + "state": "wall_south", + "layer": "upperPhysical", + "sprite": "WallSouth", + }], + } + }, + {"component": "Transform"}, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["WallSouth"], + "spriteShapes": [shapes.BRICK_WALL_SOUTH], + "palettes": [shapes.BRICK_WALL_PALETTE], + "noRotates": [False] + } + }, + ] +} + + +WALL_WEST = { + "name": "wall_west", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "wall_west", + "stateConfigs": [{ + "state": "wall_west", + "layer": "upperPhysical", + "sprite": "WallWest", + }], + } + }, + {"component": "Transform"}, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["WallWest"], + "spriteShapes": [shapes.BRICK_WALL_WEST], + "palettes": [shapes.BRICK_WALL_PALETTE], + "noRotates": [False] + } + }, + ] +} + +FILL = { + "name": "fill", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "fill", + "stateConfigs": [{ + "state": "fill", + "layer": "upperPhysical", + "sprite": "fill", + }], + } + }, + {"component": "Transform"}, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["fill"], + "spriteShapes": [shapes.FILL], + "palettes": [shapes.BRICK_WALL_PALETTE], + "noRotates": [False] + } + }, + ] +} + +TILED_FLOOR = { + "name": "tiled_floor", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "tiled_floor", + "stateConfigs": [{ + "state": "tiled_floor", + "layer": "background", + "sprite": "tiled_floor", + }], + } + }, + {"component": "Transform"}, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["tiled_floor"], + "spriteShapes": [shapes.TILED_FLOOR_GREY], + "palettes": [{"o": (204, 199, 192, 255), + "-": (194, 189, 182, 255),}], + "noRotates": [False] + } + }, + ] +} + +SAFE_GRASS = { + "name": + "safe_grass", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": + "safe_grass", + "stateConfigs": [{ + "state": "safe_grass", + "layer": "midPhysical", + "sprite": "safe_grass", + }], + } + }, + { + "component": "Transform" + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["safe_grass"], + "spriteShapes": [shapes.GRASS_STRAIGHT], + "palettes": [shapes.GRASS_PALETTE], + "noRotates": [False] + } + }, + ] +} + +SAFE_GRASS_N_EDGE = { + "name": + "safe_grass_n_edge", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": + "safe_grass_n_edge", + "stateConfigs": [{ + "state": "safe_grass_n_edge", + "layer": "midPhysical", + "sprite": "safe_grass_n_edge", + }], + } + }, + { + "component": "Transform" + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["safe_grass_n_edge"], + "spriteShapes": [shapes.GRASS_STRAIGHT_N_EDGE], + "palettes": [shapes.GRASS_PALETTE], + "noRotates": [False] + } + }, + ] +} + +SAFE_GRASS_E_EDGE = { + "name": + "safe_grass_e_edge", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": + "safe_grass_e_edge", + "stateConfigs": [{ + "state": "safe_grass_e_edge", + "layer": "midPhysical", + "sprite": "safe_grass_e_edge", + }], + } + }, + { + "component": "Transform" + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["safe_grass_e_edge"], + "spriteShapes": [shapes.GRASS_STRAIGHT_E_EDGE], + "palettes": [shapes.GRASS_PALETTE], + "noRotates": [False] + } + }, + ] +} + +SAFE_GRASS_S_EDGE = { + "name": + "safe_grass_s_edge", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": + "safe_grass_s_edge", + "stateConfigs": [{ + "state": "safe_grass_s_edge", + "layer": "midPhysical", + "sprite": "safe_grass_s_edge", + }], + } + }, + { + "component": "Transform" + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["safe_grass_s_edge"], + "spriteShapes": [shapes.GRASS_STRAIGHT_S_EDGE], + "palettes": [shapes.GRASS_PALETTE], + "noRotates": [False] + } + }, + ] +} + +SAFE_GRASS_W_EDGE = { + "name": + "safe_grass_w_edge", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": + "safe_grass_w_edge", + "stateConfigs": [{ + "state": "safe_grass_w_edge", + "layer": "midPhysical", + "sprite": "safe_grass_w_edge", + }], + } + }, + { + "component": "Transform" + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["safe_grass_w_edge"], + "spriteShapes": [shapes.GRASS_STRAIGHT_W_EDGE], + "palettes": [shapes.GRASS_PALETTE], + "noRotates": [False] + } + }, + ] +} + +SAFE_GRASS_NW_CORNER = { + "name": + "safe_grass_nw_corner", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": + "safe_grass_nw_corner", + "stateConfigs": [{ + "state": "safe_grass_nw_corner", + "layer": "midPhysical", + "sprite": "safe_grass_nw_corner", + }], + } + }, + { + "component": "Transform" + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["safe_grass_nw_corner"], + "spriteShapes": [shapes.GRASS_STRAIGHT_NW_CORNER], + "palettes": [shapes.GRASS_PALETTE], + "noRotates": [False] + } + }, + ] +} + +SAFE_GRASS_NE_CORNER = { + "name": + "safe_grass_ne_corner", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": + "safe_grass_ne_corner", + "stateConfigs": [{ + "state": "safe_grass_ne_corner", + "layer": "midPhysical", + "sprite": "safe_grass_ne_corner", + }], + } + }, + { + "component": "Transform" + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["safe_grass_ne_corner"], + "spriteShapes": [shapes.GRASS_STRAIGHT_NE_CORNER], + "palettes": [shapes.GRASS_PALETTE], + "noRotates": [False] + } + }, + ] +} + +SAFE_GRASS_SE_CORNER = { + "name": + "safe_grass_se_corner", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": + "safe_grass_se_corner", + "stateConfigs": [{ + "state": "safe_grass_se_corner", + "layer": "midPhysical", + "sprite": "safe_grass_se_corner", + }], + } + }, + { + "component": "Transform" + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["safe_grass_se_corner"], + "spriteShapes": [shapes.GRASS_STRAIGHT_SE_CORNER], + "palettes": [shapes.GRASS_PALETTE], + "noRotates": [False] + } + }, + ] +} + +SAFE_GRASS_SW_CORNER = { + "name": + "safe_grass_sw_corner", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": + "safe_grass_sw_corner", + "stateConfigs": [{ + "state": "safe_grass_sw_corner", + "layer": "midPhysical", + "sprite": "safe_grass_sw_corner", + }], + } + }, + { + "component": "Transform" + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["safe_grass_sw_corner"], + "spriteShapes": [shapes.GRASS_STRAIGHT_SW_CORNER], + "palettes": [shapes.GRASS_PALETTE], + "noRotates": [False] + } + }, + ] +} + + +def create_apple(apple_reward: float = 1.0): + """Return a prefab object defining an apple, which can be eaten by prey.""" + prefab = { + "name": + "edibleApple", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": + "apple", + "stateConfigs": [ + { + "state": "apple", + "layer": "lowerPhysical", + "sprite": "apple", + }, + { + "state": "appleWait", + "layer": "logic", + }, + ] + } + }, + { + "component": "Transform", + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["apple"], + "spriteShapes": [APPLE_SPRITE], + "palettes": [{ + "*": (106, 184, 83, 255), + "#": (96, 166, 75, 255), + "o": (61, 130, 62, 255), + "|": (115, 62, 57, 255), + "x": INVISIBLE, + }], + "noRotates": [True], + } + }, + { + "component": "AppleEdible", + "kwargs": { + "liveState": "apple", + "waitState": "appleWait", + "rewardForEating": apple_reward, + } + }, + { + "component": "FixedRateRegrow", + "kwargs": { + "name": "AppleFixedRateRegrow", + "liveState": "apple", + "waitState": "appleWait", + "regrowRate": 0.007, + } + }, + ] + } + return prefab + +FLOOR_ACORN = { + "name": + "floorAcorn", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": + "floorAcorn", + "stateConfigs": [ + { + "state": "floorAcorn", + "layer": "lowerPhysical", + "sprite": "floorAcorn", + }, + { + "state": "acornWait", + "layer": "logic", + }, + ] + } + }, + { + "component": "Transform", + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["floorAcorn"], + "spriteShapes": [shapes.ACORN], + "palettes": [{ + "*": (158, 85, 25, 255), + "@": (158, 85, 25, 140), + "o": (92, 29, 19, 255), + "x": INVISIBLE, + }], + "noRotates": [True], + } + }, + { + "component": "AcornPickUppable", + "kwargs": { + "liveState": "floorAcorn", + "waitState": "acornWait", + } + }, + { + "component": "FixedRateRegrow", + "kwargs": { + "name": "AcornFixedRateRegrow", + "liveState": "floorAcorn", + "waitState": "acornWait", + "regrowRate": 0.01, + } + }, + ] +} + +SPAWN_POINT = { + "name": "spawnPoint", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "spawnPoint", + "stateConfigs": [{ + "state": "spawnPoint", + "layer": "alternateLogic", + "groups": ["spawnPoints"] + }], + } + }, + {"component": "Transform"}, + ] +} + + +def create_spawn_point_prefab(team): + """Return a team-specific spawn-point prefab.""" + prefab = { + "name": "spawn_point", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "playerSpawnPoint", + "stateConfigs": [{ + "state": "playerSpawnPoint", + "layer": "alternateLogic", + "groups": ["{}SpawnPoints".format(team)], + }], + } + }, + {"component": "Transform",}, + { + "component": "Appearance", + "kwargs": { + "renderMode": "invisible", + "spriteNames": [], + "spriteRGBColors": [] + } + }, + ] + } + return prefab + + +def create_prefabs(apple_reward: float = 1.0): + """Returns the prefabs dictionary.""" + prefabs = { + "spawn_point_prey": create_spawn_point_prefab("prey"), + "spawn_point_predator": create_spawn_point_prefab("predator"), + "apple": create_apple(apple_reward=apple_reward), + "floor_acorn": FLOOR_ACORN, + "nw_wall_corner": NW_WALL_CORNER, + "ne_wall_corner": NE_WALL_CORNER, + "se_wall_corner": SE_WALL_CORNER, + "sw_wall_corner": SW_WALL_CORNER, + "nw_inner_wall_corner": NW_INNER_WALL_CORNER, + "ne_inner_wall_corner": NE_INNER_WALL_CORNER, + "se_inner_wall_corner": SE_INNER_WALL_CORNER, + "sw_inner_wall_corner": SW_INNER_WALL_CORNER, + "wall_north": WALL_NORTH, + "wall_east": WALL_EAST, + "wall_south": WALL_SOUTH, + "wall_west": WALL_WEST, + "fill": FILL, + "tiled_floor": TILED_FLOOR, + "safe_grass": SAFE_GRASS, + "safe_grass_n_edge": SAFE_GRASS_N_EDGE, + "safe_grass_e_edge": SAFE_GRASS_E_EDGE, + "safe_grass_s_edge": SAFE_GRASS_S_EDGE, + "safe_grass_w_edge": SAFE_GRASS_W_EDGE, + "safe_grass_ne_corner": SAFE_GRASS_NE_CORNER, + "safe_grass_se_corner": SAFE_GRASS_SE_CORNER, + "safe_grass_sw_corner": SAFE_GRASS_SW_CORNER, + "safe_grass_nw_corner": SAFE_GRASS_NW_CORNER, + } + return prefabs + +# Primitive action components. +# pylint: disable=bad-whitespace +# pyformat: disable +NOOP = {"move": 0, "turn": 0, "interact": 0} +FORWARD = {"move": 1, "turn": 0, "interact": 0} +STEP_RIGHT = {"move": 2, "turn": 0, "interact": 0} +BACKWARD = {"move": 3, "turn": 0, "interact": 0} +STEP_LEFT = {"move": 4, "turn": 0, "interact": 0} +TURN_LEFT = {"move": 0, "turn": -1, "interact": 0} +TURN_RIGHT = {"move": 0, "turn": 1, "interact": 0} +INTERACT = {"move": 0, "turn": 0, "interact": 1} +# pyformat: enable +# pylint: enable=bad-whitespace + +ACTION_SET = ( + NOOP, + FORWARD, + BACKWARD, + STEP_LEFT, + STEP_RIGHT, + TURN_LEFT, + TURN_RIGHT, + INTERACT, +) + + +def create_scene(): + """Creates the global scene.""" + scene = { + "name": "scene", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "scene", + "stateConfigs": [{ + "state": "scene", + }], + } + }, + {"component": "Transform"}, + ] + } + + return scene + + +def _create_avatar_object(player_idx: int, is_predator: bool, + max_stamina: int) -> Dict[str, Any]: + """Create an avatar object.""" + # Lua is 1-indexed. + lua_index = player_idx + 1 + live_state_name = "player{}".format(lua_index) + avatar_sprite_name = "avatarSprite{}".format(lua_index) + + if is_predator: + spawn_group = "predatorSpawnPoints" + color_palette = shapes.PRED1_PALETTE + sprite = shapes.PERSISTENCE_PREDATOR + if not is_predator: + spawn_group = "preySpawnPoints" + alert_state_name = live_state_name + "alert" + sit_state_name = live_state_name + "sit" + prep_to_eat_state_name = live_state_name + "prepToEat" + first_bite_state_name = live_state_name + "firstBite" + second_bite_state_name = live_state_name + "secondBite" + last_bite_state_name = live_state_name + "lastBite" + alert_sprite_name = "avatarAlertSprite{}".format(lua_index) + sit_sprite_name = "avatarSitSprite{}".format(lua_index) + prep_to_eat_sprite_name = "avatarPrepToEatSprite{}".format(lua_index) + first_bite_sprite_name = "avatarFirstBiteSprite{}".format(lua_index) + second_bite_sprite_name = "avatarSecondBiteSprite{}".format(lua_index) + last_bite_sprite_name = "avatarLastBiteSprite{}".format(lua_index) + color_palette = {**shapes.get_palette(colors.palette[player_idx] + ), ** PALETTES["acorn"]} + sprite = shapes.CUTE_AVATAR + alert_sprite = shapes.CUTE_AVATAR_ALERT + sit_sprite = shapes.CUTE_AVATAR_SIT + prep_to_eat_sprite = shapes.CUTE_AVATAR_EAT + first_bite_sprite = shapes.CUTE_AVATAR_FIRST_BITE + second_bite_sprite = shapes.CUTE_AVATAR_SECOND_BITE + last_bite_sprite = shapes.CUTE_AVATAR_LAST_BITE + + interact_palette = { + "P": colors.palette[player_idx] + (255,), + "&": (10, 10, 10, 50), + "*": (230, 230, 230, 255), + "x": INVISIBLE, + } + + if is_predator: + role_name = "predator" + green_freeze_time = 0 + yellow_freeze_time = 1 + red_freeze_time = 6 + else: + role_name = "prey" + green_freeze_time = 1 + yellow_freeze_time = 2 + red_freeze_time = 4 + + live_state_name = "player{}".format(lua_index) + avatar_object = { + "name": f"avatar{lua_index}", + "components": [ + { + "component": "Transform", + }, + { + "component": "Role", + "kwargs": { + "isPredator": is_predator, + } + }, + { + "component": "Stamina", + "kwargs": { + "maxStamina": max_stamina, + "classConfig": {"name": role_name, + "greenFreezeTime": green_freeze_time, + "yellowFreezeTime": yellow_freeze_time, + "redFreezeTime": red_freeze_time}, + "amountInvisible": 6, + "amountGreen": 6, + "amountYellow": 6, + "amountRed": 1, + "costlyActions": ["move", "turn", "interact"], + } + }, + { + "component": "StaminaObservation", + "kwargs": { + "staminaComponent": "Stamina", + } + }, + { + "component": "LocationObserver", + "kwargs": { + "objectIsAvatar": True, + "alsoReportOrientation": True + } + }, + # The `ProxemicTaste` component defines a pseudoreward which is useful + # for training background populations of prey or predators that stand + # near others with their same role. It should be turned off by + # default. It is turned off when `distanceToReward` is empty. + { + "component": "ProxemicTaste", + "kwargs": { + "distanceToReward": {}, + "layer": "upperPhysical", + "roleToCount": role_name, + } + }, + # The `RewardForStaminaLevel` component defines a pseudoreward which + # is useful for training background populations to rapidly learn how + # to control their stamina. For the default "real" substrate, the + # reward it defines should always be zero. + { + "component": "RewardForStaminaLevel", + "kwargs": { + "rewardValue": 0.0, + "bands": [], + } + }, + ] + } + if is_predator: + avatar_object["components"].extend([ + { + "component": "StateManager", + "kwargs": { + "initialState": live_state_name, + "stateConfigs": [ + # Initial player state. + { + "state": live_state_name, + "layer": "upperPhysical", + "sprite": avatar_sprite_name, + "contact": "avatar", + "groups": ["players"] + }, + # Player wait type for times when they are zapped out. + { + "state": "playerWait", + "groups": ["playerWaits"] + }, + ] + } + }, + { + "component": "PredatorInteractBeam", + "kwargs": { + "cooldownTime": 5, + "shapes": [PREDATOR_EAT_SPRITE, shapes.FILL], + "palettes": [interact_palette] * 2, + } + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": [avatar_sprite_name], + "spriteShapes": [sprite], + "palettes": [color_palette], + "noRotates": [True] + } + }, + { + "component": "Avatar", + "kwargs": { + "index": lua_index, + "aliveState": live_state_name, + "additionalLiveStates": [], + "waitState": "playerWait", + "spawnGroup": spawn_group, + "actionOrder": ["move", + "turn", + "interact"], + "actionSpec": { + "move": {"default": 0, "min": 0, "max": len(_COMPASS)}, + "turn": {"default": 0, "min": -1, "max": 1}, + "interact": {"default": 0, "min": 0, "max": 1}, + }, + "view": { + "left": 5, + "right": 5, + "forward": 9, + "backward": 1, + "centered": False + }, + } + }, + { + "component": "AvatarEdible", + }, + { + "component": "AvatarRespawn", + "kwargs": { + "framesTillRespawn": 200, + } + }, + ]) + if not is_predator: + avatar_object["components"].extend([ + { + "component": "StateManager", + "kwargs": { + "initialState": live_state_name, + "stateConfigs": [ + # Initial player state. + { + "state": live_state_name, + "layer": "upperPhysical", + "sprite": avatar_sprite_name, + "contact": "avatar", + "groups": ["players"] + }, + # Player wait type for times when they are zapped out. + { + "state": "playerWait", + "groups": ["playerWaits"] + }, + { + "state": alert_state_name, + "layer": "upperPhysical", + "sprite": alert_sprite_name, + "contact": "avatar", + "groups": ["players"] + }, + { + "state": sit_state_name, + "layer": "upperPhysical", + "sprite": sit_sprite_name, + "contact": "avatar", + "groups": ["players"] + }, + { + "state": prep_to_eat_state_name, + "layer": "upperPhysical", + "sprite": prep_to_eat_sprite_name, + "contact": "avatar", + "groups": ["players"] + }, + { + "state": first_bite_state_name, + "layer": "upperPhysical", + "sprite": first_bite_sprite_name, + "contact": "avatar", + "groups": ["players"] + }, + { + "state": second_bite_state_name, + "layer": "upperPhysical", + "sprite": second_bite_sprite_name, + "contact": "avatar", + "groups": ["players"] + }, + { + "state": last_bite_state_name, + "layer": "upperPhysical", + "sprite": last_bite_sprite_name, + "contact": "avatar", + "groups": ["players"] + } + ] + } + }, + { + "component": "Avatar", + "kwargs": { + "index": lua_index, + "aliveState": live_state_name, + "additionalLiveStates": [alert_state_name, + sit_state_name, + prep_to_eat_state_name, + first_bite_state_name, + second_bite_state_name, + last_bite_state_name], + "waitState": "playerWait", + "spawnGroup": spawn_group, + "actionOrder": ["move", "turn", "interact"], + "actionSpec": { + "move": {"default": 0, "min": 0, "max": len(_COMPASS)}, + "turn": {"default": 0, "min": -1, "max": 1}, + "interact": {"default": 0, "min": 0, "max": 1}, + }, + "view": { + "left": 5, + "right": 5, + "forward": 9, + "backward": 1, + "centered": False + }, + } + }, + { + "component": "InteractEatAcorn", + "kwargs": { + "cooldownTime": 5, + "shapes": [PREDATOR_EAT_SPRITE, shapes.FILL], + "palettes": [interact_palette], + "isEating": False, + "defaultState": live_state_name, + } + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": [avatar_sprite_name, alert_sprite_name, + sit_sprite_name, prep_to_eat_sprite_name, + first_bite_sprite_name, second_bite_sprite_name, + last_bite_sprite_name], + "spriteShapes": [sprite, alert_sprite, sit_sprite, + prep_to_eat_sprite, first_bite_sprite, + second_bite_sprite, last_bite_sprite], + "palettes": [color_palette] * 7, + "noRotates": [True] * 7 + } + }, + { + "component": "AvatarEatingAnimation", + "kwargs": { + "sit": sit_state_name, + "prepToEat": prep_to_eat_state_name, + "firstBite": first_bite_state_name, + "secondBite": second_bite_state_name, + "lastBite": last_bite_state_name, + "downState": live_state_name, + # On each of 3 eating frames, get one third of `acornReward`. + "acornReward": 18, + } + }, + { + "component": "AvatarEdible", + "kwargs": { + "groupRadius": 3, + "predatorRewardForEating": 1.0, + } + }, + { + "component": "AvatarRespawn", + "kwargs": { + "framesTillRespawn": 200, + } + }, + { + "component": "AvatarAnimation", + "kwargs": { + "upState": alert_state_name, + "downState": live_state_name, + } + }, + # The `AcornTaste` component defines pseudorewards which are useful for + # training background populations. For the default "real" substrate, + # the rewards it defines should always be zero, + { + "component": "AcornTaste", + "kwargs": { + "collectReward": 0.0, + "eatReward": 0.0, + "safeAcornConsumptionReward": 0.0, + } + }, + ]) + return avatar_object + + +def _create_predator_obstacle(player_idx: int) -> Dict[str, Any]: + # Lua is 1-indexed. + lua_idx = player_idx + 1 + return { + "name": + "predator_obstacle", + "components": [ + { + "component": "StateManager", + "kwargs": + { + "initialState": "obstacleWait", + "stateConfigs": [ + { + "state": "obstacleWait" + }, + # Block predators from entering any tile with a + # piece on layer 'midPhysical'. + { + "state": "obstacleLive", + "layer": "midPhysical", + "groups": ["obstacles"] + } + ] + } + }, + { + "component": "Transform", + }, + { + "component": "AvatarConnector", + "kwargs": { + "playerIndex": lua_idx, + "aliveState": "obstacleLive", + "waitState": "obstacleWait" + } + }, + ] + } + + +def _create_stamina_overlay(player_idx: int, + max_stamina_bar_states: int, + ) -> Generator[Dict[str, Any], None, None]: + """Create stamina marker overlay objects.""" + # Lua is 1-indexed. + lua_idx = player_idx + 1 + + stamina_bar_state_configs = [ + # Invisible inactive (dead) overlay type. + {"state": "staminaBarWait"}, + ] + stamina_bar_sprite_names = [] + stamina_bar_sprite_shapes = [] + + # Each player's stamina bars must be in their own layer so they do not + # interact/collide with other players' stamina bars. + stamina_bar_layer = f"superOverlay_{player_idx}" + + # Declare one state per level of the stamina bar. + for i in range(max_stamina_bar_states): + sprite_name = f"sprite_for_level_{i}" + stamina_bar_state_configs.append( + {"state": f"level_{i}", + "layer": stamina_bar_layer, + "sprite": sprite_name}) + stamina_bar_sprite_names.append(sprite_name) + xs = "\nxxxxxxxx" + blank_space = xs * 7 + number_of_rs = max(6 - i, 0) + number_of_ys = i if i < 7 else 12 - i + number_of_gs = max(i - 6, 0) + if i >= 13: + level = blank_space + xs + else: + level = blank_space + "\nx" + "G" * number_of_gs + "Y" * number_of_ys + "R" * number_of_rs + "x" + empty = "\n".join(["x" * 8] * 8) + # Replace the east/south/west sprites with invisible sprites so the only + # stamina bar rendered is the one in the direction that the current player + # is facing. + stamina_bar_sprite_shapes.append((level, empty, empty, empty)) + + # Create a stamina bar for each compass direction. Only the direction the + # current player is facing is visible. + for direction in ("N", "E", "S", "W"): + yield { + "name": "avatar_stamina_bar", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "staminaBarWait", + "stateConfigs": stamina_bar_state_configs + } + }, + { + "component": "Transform", + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": stamina_bar_sprite_names, + "spriteShapes": stamina_bar_sprite_shapes, + "palettes": [{"G": (62, 137, 72, 255), + "Y": (255, 216, 97, 255), + "R": (162, 38, 51, 255), + "x": INVISIBLE,}] * max_stamina_bar_states, + "noRotates": [True] * max_stamina_bar_states + } + }, + { + "component": "StaminaBar", + "kwargs": { + "playerIndex": lua_idx, + "waitState": "staminaBarWait", + "layer": stamina_bar_layer, + "direction": direction + } + }, + ] + } + + +def _build_prey_objects(player_idx: int, + max_stamina_bar_states: int = 19): + """Build a prey avatar and its associated stamina objects.""" + avatar_object = _create_avatar_object( + player_idx, is_predator=False, max_stamina=max_stamina_bar_states - 1) + stamina_bar_objects = _create_stamina_overlay(player_idx, + max_stamina_bar_states) + inventory_object = create_inventory(player_index=player_idx) + game_objects = [] + game_objects.append(avatar_object) + game_objects.extend(stamina_bar_objects) + game_objects.append(inventory_object) + return game_objects + + +def _build_predator_objects(player_idx: int, + max_stamina_bar_states: int = 19): + """Build a predator avatar and its associated stamina objects and obstacle.""" + avatar_object = _create_avatar_object( + player_idx, is_predator=True, max_stamina=max_stamina_bar_states - 1) + stamina_bar_objects = _create_stamina_overlay(player_idx, + max_stamina_bar_states) + predator_obstacle = _create_predator_obstacle(player_idx) + game_objects = [] + game_objects.append(avatar_object) + game_objects.extend(stamina_bar_objects) + game_objects.append(predator_obstacle) + return game_objects + + +def get_config(): + """Default configuration.""" + config = configdict.ConfigDict() + + # Declare parameters here that we may want to override externally. + # `apple_reward` should be 1.0 for the canonical version of this environment, + # but to train background bots it is sometimes useful to use other values in + # order to control the relative attractiveness of apples and acorns. + config.apple_reward = 1.0 + + # Action set configuration. + config.action_set = ACTION_SET + # Observation format configuration. + config.individual_observation_names = [ + "RGB", + "STAMINA", + # Debug only (do not use the following observations in policies). + "POSITION", + "ORIENTATION", + ] + config.global_observation_names = [ + "WORLD.RGB", + ] + + # The specs of the environment (from a single-agent perspective). + config.action_spec = specs.action(len(ACTION_SET)) + + # The roles assigned to each player. + config.valid_roles = frozenset({"predator", "prey"}) + + return config + + +def build( + roles: Sequence[str], + config: configdict.ConfigDict, +) -> Mapping[str, Any]: + """Build predator_and_prey substrate given player roles.""" + # Build avatars. + num_players = len(roles) + avatar_objects_and_helpers = [] + for player_idx, role in enumerate(roles): + if role == "prey": + avatar_objects_and_helpers.extend(_build_prey_objects(player_idx)) + elif role == "predator": + avatar_objects_and_helpers.extend(_build_predator_objects(player_idx)) + else: + raise ValueError(f"Unrecognized role: {role}") + + # Build the rest of the substrate definition. + substrate_definition = dict( + levelName="predator_prey", + levelDirectory="meltingpot/lua/levels", + numPlayers=num_players, + maxEpisodeLengthFrames=1000, + spriteSize=8, + topology="BOUNDED", # Choose from ["BOUNDED", "TORUS"], + simulation=dict( + map=config.layout.ascii_map, + gameObjects=avatar_objects_and_helpers, + scene=create_scene(), + prefabs=create_prefabs(apple_reward=config.apple_reward), + charPrefabMap=config.layout.char_prefab_map, + ), + ) + + return substrate_definition diff --git a/meltingpot/python/configs/substrates/predator_prey__alley_hunt.py b/meltingpot/python/configs/substrates/predator_prey__alley_hunt.py new file mode 100644 index 00000000..d8540a8c --- /dev/null +++ b/meltingpot/python/configs/substrates/predator_prey__alley_hunt.py @@ -0,0 +1,104 @@ +# Copyright 2022 DeepMind Technologies Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Configuration for predator_prey__alley_hunt. + +Example video: https://youtu.be/ctVjhn7VYgo + +See predator_prey.py for a detailed description applicable to all predator_prey +substrates. + +In this variant prey must forage for apples in a maze with many dangerous +dead-end corridors where they could easily be trapped by predators. +""" + +from ml_collections import config_dict + +from meltingpot.python.configs.substrates import predator_prey as base_config +from meltingpot.python.utils.substrates import specs + +build = base_config.build + +ASCII_MAP = """ +;________________________, +!aa''''''''''''''''''''aa| +!a'''''''''a''=+''''''''a| +!''=~~~+''=+''|!''=~~~+''| +!''[__,!''|!''|!''[___]''| +!''''a|!''|!aa|!'''''''''| +!''=~~J!''|L~~J!'a'=~~~+'| +!''|///!''[____]'a'|///!a| +!''|///!'''''''''''[__,L~J +!''[___]'XX''''X''''<*[__, +!''''''''''a''''XX''<****| +!'aa'''X''''''a'''XX<****| +!''''''''''a''''XX''<****| +!''=~~~+'''''''X''''<*=~~J +!''|///!'XX''''''''=~~J;_, +!''|///!''=~~~~+'a'|///!a| +!''[__,!''|;__,!'a'[___]'| +!''''a|!''|!aa|!'''''''''| +!''=~~J!''|!''|!''=~~~+''| +!''[___]''[]''|!''[___]''| +!a'''''''''a''[]''''''''a| +!aa''''''''''''''''''''aa| +L~~~~~~~~~~~~~~~~~~~~~~~~J +""" + +# `prefab` determines which prefab game object to use for each `char` in the +# ascii map. +CHAR_PREFAB_MAP = { + "*": {"type": "all", "list": ["safe_grass", "spawn_point_prey"]}, + "X": {"type": "all", "list": ["tiled_floor", "spawn_point_predator"]}, + "a": {"type": "all", "list": ["tiled_floor", "apple"]}, + ";": "nw_wall_corner", + ",": "ne_wall_corner", + "J": "se_wall_corner", + "L": "sw_wall_corner", + "_": "wall_north", + "|": "wall_east", + "~": "wall_south", + "!": "wall_west", + "=": "nw_inner_wall_corner", + "+": "ne_inner_wall_corner", + "]": "se_inner_wall_corner", + "[": "sw_inner_wall_corner", + "'": "tiled_floor", + "<": "safe_grass_w_edge", + ">": "safe_grass", + "/": "fill", +} + + +def get_config(): + """Default configuration.""" + config = base_config.get_config() + + # Override the map layout settings. + config.layout = config_dict.ConfigDict() + config.layout.ascii_map = ASCII_MAP + config.layout.char_prefab_map = CHAR_PREFAB_MAP + + # The specs of the environment (from a single-agent perspective). + config.timestep_spec = specs.timestep({ + "RGB": specs.OBSERVATION["RGB"], + "STAMINA": specs.float64(), + "POSITION": specs.OBSERVATION["POSITION"], + "ORIENTATION": specs.OBSERVATION["ORIENTATION"], + "WORLD.RGB": specs.rgb(184, 208), + }) + + # The roles assigned to each player. + config.default_player_roles = ("predator",) * 5 + ("prey",) * 8 + + return config diff --git a/meltingpot/python/configs/substrates/predator_prey__open.py b/meltingpot/python/configs/substrates/predator_prey__open.py new file mode 100644 index 00000000..43c5b95e --- /dev/null +++ b/meltingpot/python/configs/substrates/predator_prey__open.py @@ -0,0 +1,113 @@ +# Copyright 2022 DeepMind Technologies Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Configuration for predator_prey__open. + +Example video: https://youtu.be/0ZlrkWsWzMw + +See predator_prey.py for a detailed description applicable to all predator_prey +substrates. + +In this variant prey must forage over a large field of apples and acorns in the +center of the map. Since the space is so open it should be possible for the prey +to move together in larger groups so they can defend themselves from predators. +Another prey strategy focused on acorns instead of apples is also possible. In +this case prey collect acorns and bring them back to safe tall grass to consume +them. +""" + +from ml_collections import config_dict + +from meltingpot.python.configs.substrates import predator_prey as base_config +from meltingpot.python.utils.substrates import specs + +build = base_config.build + +ASCII_MAP = """ +/;___________________,/ +;]*******************[, +!vvvvvvvvvvvvvvvvvvvvv| +!'''''''''''''''''''''| +!''XXXXXXXXXXXXXXXXX''| +!''XAaaaaaaaaaaAaaaX''| +!''Xaaaa&aaaAaaaaaaX''| +!'aaaaaaaaaaaaaaaaaaa'| +!Aaaaaaaaaaaaaaaaaaaaa| +!aaaaaaaaaaaaaaAaaaaaa| +!aAaaaaaaaaaaaaaaa&aaA| +!'aaaaaaAaaaaaaaaaAaa'| +!''Xaaaaaaa&aaaaaaaX''| +!''XaaaaaaaaAaaaaaaX''| +!''XXXXXXXXXXXXXXXXX''| +!'''''''''''''''''''''| +!^^^^^^^^^^^^^^^^^^^^^| +L+*******************=J +/L~~~~~~~~~~~~~~~~~~~J/ +""" + +# `prefab` determines which prefab game object to use for each `char` in the +# ascii map. +CHAR_PREFAB_MAP = { + "*": {"type": "all", "list": ["safe_grass", "spawn_point_prey"]}, + "&": {"type": "all", "list": ["tiled_floor", "apple", "spawn_point_prey"]}, + "X": {"type": "all", "list": ["tiled_floor", "spawn_point_predator"]}, + "a": {"type": "all", "list": ["tiled_floor", "apple"]}, + "A": {"type": "all", "list": ["tiled_floor", "floor_acorn"]}, + ";": "nw_wall_corner", + ",": "ne_wall_corner", + "J": "se_wall_corner", + "L": "sw_wall_corner", + "_": "wall_north", + "|": "wall_east", + "~": "wall_south", + "!": "wall_west", + "=": "nw_inner_wall_corner", + "+": "ne_inner_wall_corner", + "]": "se_inner_wall_corner", + "[": "sw_inner_wall_corner", + "'": "tiled_floor", + "#": "safe_grass", + "<": "safe_grass_w_edge", + "^": "safe_grass_n_edge", + ">": "safe_grass_e_edge", + "v": "safe_grass_s_edge", + "l": "safe_grass_ne_corner", + "j": "safe_grass_se_corner", + "z": "safe_grass_sw_corner", + "r": "safe_grass_nw_corner", + "/": "fill", +} + + +def get_config(): + """Default configuration.""" + config = base_config.get_config() + + # Override the map layout settings. + config.layout = config_dict.ConfigDict() + config.layout.ascii_map = ASCII_MAP + config.layout.char_prefab_map = CHAR_PREFAB_MAP + + # The specs of the environment (from a single-agent perspective). + config.timestep_spec = specs.timestep({ + "RGB": specs.OBSERVATION["RGB"], + "STAMINA": specs.float64(), + "POSITION": specs.OBSERVATION["POSITION"], + "ORIENTATION": specs.OBSERVATION["ORIENTATION"], + "WORLD.RGB": specs.rgb(152, 184), + }) + + # The roles assigned to each player. + config.default_player_roles = ("predator",) * 3 + ("prey",) * 10 + + return config diff --git a/meltingpot/python/configs/substrates/predator_prey__orchard.py b/meltingpot/python/configs/substrates/predator_prey__orchard.py new file mode 100644 index 00000000..423fceb4 --- /dev/null +++ b/meltingpot/python/configs/substrates/predator_prey__orchard.py @@ -0,0 +1,113 @@ +# Copyright 2022 DeepMind Technologies Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Configuration for predator_prey__orchard. + +Example video: https://youtu.be/gtd-ziZYJRI + +See predator_prey.py for a detailed description applicable to all predator_prey +substrates. + +In this variant there are two areas of the map containing food: an apple-rich +region to the north of the safe tall grass and an acorn-rich region to the east. +There are two possible prey strategies focusing on either apples or acorns. +However, in this case it is clear that focusing on acorns is the better +strategy since they are relatively close to the safe tall grass. They can easily +be collected and brought back to safety for consumption. +""" + +from ml_collections import config_dict + +from meltingpot.python.configs.substrates import predator_prey as base_config +from meltingpot.python.utils.substrates import specs + +build = base_config.build + +ASCII_MAP = """ +/;__________,;_______,/ +;]aa'X'XX''a|!a''''aA[, +!a''aaaaaa'X[]''aa&''A| +!X'aaAaaaaa''''aaaaa''| +!'&'aaaaaa''Aa'aaaaaa'| +!a'''X''''X'''a''''''a| +!aa''aaa''''''''''''aa| +L~+''aaa''=~~+XXXaA=~~J +;_]'''a'X'[_,L~~~~~J;_, +!XX'''a'X'''[_______]'| +!'''''a''''''XX'''''''| +!'r^^^^^^l'''X'A'''A''| +!'zv#****#^l'''''A''''| +!'''<#***##j'''A'''A''| +!''r###**#>''''''A'''X| +!''zv##***#^l''A'''A''| +!''''zvvvvvvj''''A'&''| +L+'''''''''''''''''''=J +/L~~~~~~~~~~~~~~~~~~~J/ +""" + +# `prefab` determines which prefab game object to use for each `char` in the +# ascii map. +CHAR_PREFAB_MAP = { + "*": {"type": "all", "list": ["safe_grass", "spawn_point_prey"]}, + "&": {"type": "all", "list": ["tiled_floor", "spawn_point_prey"]}, + "X": {"type": "all", "list": ["tiled_floor", "spawn_point_predator"]}, + "a": {"type": "all", "list": ["tiled_floor", "apple"]}, + "A": {"type": "all", "list": ["tiled_floor", "floor_acorn"]}, + ";": "nw_wall_corner", + ",": "ne_wall_corner", + "J": "se_wall_corner", + "L": "sw_wall_corner", + "_": "wall_north", + "|": "wall_east", + "~": "wall_south", + "!": "wall_west", + "=": "nw_inner_wall_corner", + "+": "ne_inner_wall_corner", + "]": "se_inner_wall_corner", + "[": "sw_inner_wall_corner", + "'": "tiled_floor", + "#": "safe_grass", + "<": "safe_grass_w_edge", + "^": "safe_grass_n_edge", + ">": "safe_grass_e_edge", + "v": "safe_grass_s_edge", + "l": "safe_grass_ne_corner", + "j": "safe_grass_se_corner", + "z": "safe_grass_sw_corner", + "r": "safe_grass_nw_corner", + "/": "fill", +} + + +def get_config(): + """Default configuration.""" + config = base_config.get_config() + + # Override the map layout settings. + config.layout = config_dict.ConfigDict() + config.layout.ascii_map = ASCII_MAP + config.layout.char_prefab_map = CHAR_PREFAB_MAP + + # The specs of the environment (from a single-agent perspective). + config.timestep_spec = specs.timestep({ + "RGB": specs.OBSERVATION["RGB"], + "STAMINA": specs.float64(), + "POSITION": specs.OBSERVATION["POSITION"], + "ORIENTATION": specs.OBSERVATION["ORIENTATION"], + "WORLD.RGB": specs.rgb(152, 184), + }) + + # The roles assigned to each player. + config.default_player_roles = ("predator",) * 5 + ("prey",) * 8 + + return config diff --git a/meltingpot/python/configs/substrates/predator_prey__random_forest.py b/meltingpot/python/configs/substrates/predator_prey__random_forest.py new file mode 100644 index 00000000..89a872b6 --- /dev/null +++ b/meltingpot/python/configs/substrates/predator_prey__random_forest.py @@ -0,0 +1,120 @@ +# Copyright 2022 DeepMind Technologies Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Configuration for predator_prey__random_forest. + +Example video: https://youtu.be/ZYkXwvn5_Sc + +See predator_prey.py for a detailed description applicable to all predator_prey +substrates. + +In this variant there are only acorns, no apples. And, there is no fully safe +tall grass. The tall grass that there is on this map is never large enough for +prey to be fully safe from predation. The grass merely provides an obstacle that +predators must navigate around while chasing prey. +""" + +from ml_collections import config_dict + +from meltingpot.python.configs.substrates import predator_prey as base_config +from meltingpot.python.utils.substrates import map_helpers +from meltingpot.python.utils.substrates import specs + +build = base_config.build + +ASCII_MAP = """ +/;___________________,/ +;]XAXXXXXXXAXXXXXXXAX[, +!XXXXXXXXXXXXXXXXXXXXX| +!''''M'M''MMM''M'M''''| +!'M''M'MM''Q''MM'M''M'| +!'MQ'M''MMMMMMM''M'QM'| +!''''''QM'''''MQ''''''| +!M'MMMMMM@@@@@MMMMMM'M| +!M''''''@@@@@@@''''''M| +!Q'MMQ''@@@A@@@''QMM'Q| +!M''''''@@@@@@@''''''M| +!M'MMMMMM@@@@@MMMMMM'M| +!''''''QM'''''MQ''''''| +!'MQ'M''MMMMMMM''M'QM'| +!'M''M'MM''Q''MM'M''M'| +!''''M'M''MMM''M'M''''| +!XXXXXXXXXXXXXXXXXXXXX| +L+XAXXXXXXXAXXXXXXXAX=J +/L~~~~~~~~~~~~~~~~~~~J/ +""" + +prey_spawn_point = {"type": "all", "list": ["tiled_floor", "spawn_point_prey"]} +predator_spawn_point = {"type": "all", "list": ["tiled_floor", + "spawn_point_predator"]} +acorn = {"type": "all", "list": ["tiled_floor", "floor_acorn"]} + +# `prefab` determines which prefab game object to use for each `char` in the +# ascii map. +CHAR_PREFAB_MAP = { + "@": prey_spawn_point, + "*": {"type": "all", "list": ["safe_grass", "spawn_point_prey"]}, + "&": {"type": "all", "list": ["tiled_floor", "apple", "spawn_point_prey"]}, + "X": predator_spawn_point, + "a": {"type": "all", "list": ["tiled_floor", "apple"]}, + "A": acorn, + ";": "nw_wall_corner", + ",": "ne_wall_corner", + "J": "se_wall_corner", + "L": "sw_wall_corner", + "_": "wall_north", + "|": "wall_east", + "~": "wall_south", + "!": "wall_west", + "=": "nw_inner_wall_corner", + "+": "ne_inner_wall_corner", + "]": "se_inner_wall_corner", + "[": "sw_inner_wall_corner", + "'": "tiled_floor", + "#": "safe_grass", + "<": "safe_grass_w_edge", + "^": "safe_grass_n_edge", + ">": "safe_grass_e_edge", + "v": "safe_grass_s_edge", + "l": "safe_grass_ne_corner", + "j": "safe_grass_se_corner", + "z": "safe_grass_sw_corner", + "r": "safe_grass_nw_corner", + "/": "fill", + "Q": map_helpers.a_or_b_with_odds(acorn, "tiled_floor", odds=(1, 2)), + "M": map_helpers.a_or_b_with_odds("safe_grass", "tiled_floor", odds=(1, 2)), +} + + +def get_config(): + """Default configuration.""" + config = base_config.get_config() + + # Override the map layout settings. + config.layout = config_dict.ConfigDict() + config.layout.ascii_map = ASCII_MAP + config.layout.char_prefab_map = CHAR_PREFAB_MAP + + # The specs of the environment (from a single-agent perspective). + config.timestep_spec = specs.timestep({ + "RGB": specs.OBSERVATION["RGB"], + "STAMINA": specs.float64(), + "POSITION": specs.OBSERVATION["POSITION"], + "ORIENTATION": specs.OBSERVATION["ORIENTATION"], + "WORLD.RGB": specs.rgb(152, 184), + }) + + # The roles assigned to each player. + config.default_player_roles = ("predator",) * 5 + ("prey",) * 8 + + return config diff --git a/meltingpot/python/configs/substrates/prisoners_dilemma_in_the_matrix.py b/meltingpot/python/configs/substrates/prisoners_dilemma_in_the_matrix__arena.py similarity index 82% rename from meltingpot/python/configs/substrates/prisoners_dilemma_in_the_matrix.py rename to meltingpot/python/configs/substrates/prisoners_dilemma_in_the_matrix__arena.py index f45808f9..1971f606 100644 --- a/meltingpot/python/configs/substrates/prisoners_dilemma_in_the_matrix.py +++ b/meltingpot/python/configs/substrates/prisoners_dilemma_in_the_matrix__arena.py @@ -1,4 +1,4 @@ -# Copyright 2020 DeepMind Technologies Limited. +# Copyright 2022 DeepMind Technologies Limited. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,7 +13,7 @@ # limitations under the License. """Configuration for Prisoner's Dilemma in the Matrix. -Example video: https://youtu.be/bQkEKc1zNuE +Example video: https://youtu.be/81QrMpsP-HU See _Running with Scissors in the Matrix_ for a general description of the game dynamics. Here the payoff matrix represents the Prisoner's Dilemma game. @@ -22,10 +22,11 @@ Players have the default `11 x 11` (off center) observation window. """ -import copy -from typing import Any, Dict, Iterable, Sequence, Tuple +from typing import Any, Dict, Mapping, Sequence from ml_collections import config_dict + +from meltingpot.python.configs.substrates import the_matrix from meltingpot.python.utils.substrates import colors from meltingpot.python.utils.substrates import game_object_utils from meltingpot.python.utils.substrates import shapes @@ -47,7 +48,7 @@ # The procedural generator replaces all 'a' chars in the default map with chars # representing specific resources, i.e. with either '1' or '2'. -DEFAULT_ASCII_MAP = """ +ASCII_MAP = """ WWWWWWWWWWWWWWWWWWWWWWWWW WPPPP W W PPPPW WPPPP PPPPW @@ -107,10 +108,6 @@ }, { "component": "Transform", - "kwargs": { - "position": (0, 0), - "orientation": "N" - } }, { "component": "Appearance", @@ -150,10 +147,6 @@ }, { "component": "Transform", - "kwargs": { - "position": (0, 0), - "orientation": "N" - } }, ] } @@ -214,29 +207,39 @@ def create_scene(): }, { "component": "Transform", - "kwargs": { - "position": (0, 0), - "orientation": "N" - }, }, { "component": "TheMatrix", "kwargs": { - "zero_initial_inventory": True, + # Prevent interaction before both interactors have collected + # at least one resource. + "disallowUnreadyInteractions": True, "matrix": [ # row player chooses a row of this matrix. # C D [3, 0], # C - [4, 1], # D + [5, 1], # D ], "columnPlayerMatrix": [ # column player chooses a column of this matrix. # C D - [3, 4], # C + [3, 5], # C [0, 1], # D ], + "resultIndicatorColorIntervals": [ + # red # yellow # green # blue # violet + (0.0, 1.0), (1.0, 2.0), (2.0, 3.0), (3.0, 4.0), (4.0, 5.0) + ], } }, + { + "component": "StochasticIntervalEpisodeEnding", + "kwargs": { + "minimumFramesPerEpisode": 1000, + "intervalLength": 100, # Set equal to unroll length. + "probabilityTerminationPerInterval": 0.15 + } + } ] } return scene @@ -263,10 +266,6 @@ def create_resource_prefab(resource_id, color_data): }, { "component": "Transform", - "kwargs": { - "position": (0, 0), - "orientation": "N" - }, }, { "component": "Appearance", @@ -286,17 +285,17 @@ def create_resource_prefab(resource_id, color_data): "resourceClass": resource_id, "visibleType": resource_name, "waitState": resource_name + "_wait", - "groupToRespawn": "resourceWaits", - "regenerationRate": 0.005, - "regenerationDelay": 50 + "regenerationRate": 0.04, + "regenerationDelay": 10, }, }, { "component": "Destroyable", "kwargs": { - "visibleType": resource_name, "waitState": resource_name + "_wait", - "initialHealth": 1, + # It is possible to destroy resources but takes concerted + # effort to do so by zapping them `initialHealth` times. + "initialHealth": 3, }, }, ] @@ -351,10 +350,6 @@ def create_avatar_object(player_idx: int, }, { "component": "Transform", - "kwargs": { - "position": (0, 0), - "orientation": "N" - } }, { "component": "Appearance", @@ -408,14 +403,22 @@ def create_avatar_object(player_idx: int, { "component": "GameInteractionZapper", "kwargs": { - "cooldownTime": 32, + "cooldownTime": 2, "beamLength": 3, "beamRadius": 1, - "framesTillRespawn": 200, + "framesTillRespawn": 50, "numResources": NUM_RESOURCES, - "reset_winner_inventory": False, + "endEpisodeOnFirstInteraction": False, + # Reset both players' inventories after each interaction. + "reset_winner_inventory": True, "reset_loser_inventory": True, + # Both players get removed after each interaction. "losingPlayerDies": True, + "winningPlayerDies": True, + # `freezeOnInteraction` is the number of frames to display the + # interaction result indicator, freeze, and delay delivering + # all results of interacting. + "freezeOnInteraction": 16, } }, { @@ -433,13 +436,15 @@ def create_avatar_object(player_idx: int, "component": "Taste", "kwargs": { "mostTastyResourceClass": -1, # -1 indicates no preference. + # No resource is most tasty when mostTastyResourceClass == -1. + "mostTastyReward": 0.1, } }, { "component": "InteractionTaste", "kwargs": { "mostTastyResourceClass": -1, # -1 indicates no preference. - "zeroDefaultInteractionReward": True, + "zeroDefaultInteractionReward": False, "extraReward": 1.0, } }, @@ -464,6 +469,7 @@ def create_avatar_object(player_idx: int, "component": "GameInteractionZapper", "variable": "latest_interaction_inventories", }, + *the_matrix.get_cumulant_metric_configs(NUM_RESOURCES), ] } }, @@ -483,46 +489,15 @@ def create_avatar_objects(num_players: int) -> Sequence[PrefabConfig]: for player_idx in range(num_players): avatar = create_avatar_object(player_idx, TARGET_SPRITE_SELF) avatar_objects.append(avatar) + readiness_marker = the_matrix.create_ready_to_interact_marker(player_idx) + avatar_objects.append(readiness_marker) return avatar_objects -def create_lab2d_settings( - num_players: int, - ascii_map_string: str, - settings_overrides: Iterable[Tuple[str, Any]] = ()) -> Dict[str, Any]: - """Returns the lab2d settings. - - Args: - num_players: (int) the number of players. - ascii_map_string: ascii map. - settings_overrides: (key, value) overrides for default settings. - """ - settings = { - "levelName": "the_matrix", - "levelDirectory": "meltingpot/lua/levels", - "numPlayers": num_players, - "maxEpisodeLengthFrames": 1000, - "spriteSize": 8, - "simulation": { - "map": ascii_map_string, - "gameObjects": create_avatar_objects(num_players=num_players), - "scene": copy.deepcopy(create_scene()), - "prefabs": create_prefabs(), - "charPrefabMap": CHAR_PREFAB_MAP, - } - } - settings.update(settings_overrides) - return settings - - -def get_config(factory=create_lab2d_settings): - """Default config for prisoners dilemma in the matrix.""" +def get_config(): + """Default configuration.""" config = config_dict.ConfigDict() - # Basic configuration. - config.num_players = 8 - config.lab2d_settings = factory(config.num_players, DEFAULT_ASCII_MAP) - # Action set configuration. config.action_set = ACTION_SET # Observation format configuration. @@ -530,6 +505,7 @@ def get_config(factory=create_lab2d_settings): "RGB", "INVENTORY", "READY_TO_SHOOT", + # Debug only (do not use the following observations in policies). "POSITION", "ORIENTATION", "INTERACTION_INVENTORIES", @@ -544,10 +520,42 @@ def get_config(factory=create_lab2d_settings): "RGB": specs.OBSERVATION["RGB"], "INVENTORY": specs.inventory(2), "READY_TO_SHOOT": specs.OBSERVATION["READY_TO_SHOOT"], + # Debug only (do not use the following observations in policies). "POSITION": specs.OBSERVATION["POSITION"], "ORIENTATION": specs.OBSERVATION["ORIENTATION"], "INTERACTION_INVENTORIES": specs.interaction_inventories(2), "WORLD.RGB": specs.rgb(192, 200), }) + # The roles assigned to each player. + config.valid_roles = frozenset({"default"}) + config.default_player_roles = ("default",) * 8 + return config + + +def build( + roles: Sequence[str], + config: config_dict.ConfigDict, +) -> Mapping[str, Any]: + """Build substrate definition given roles.""" + del config + num_players = len(roles) + # Build the rest of the substrate definition. + substrate_definition = dict( + levelName="the_matrix", + levelDirectory="meltingpot/lua/levels", + numPlayers=num_players, + # Define upper bound of episode length since episodes end stochastically. + maxEpisodeLengthFrames=5000, + spriteSize=8, + topology="BOUNDED", # Choose from ["BOUNDED", "TORUS"], + simulation={ + "map": ASCII_MAP, + "gameObjects": create_avatar_objects(num_players=num_players), + "scene": create_scene(), + "prefabs": create_prefabs(), + "charPrefabMap": CHAR_PREFAB_MAP, + } + ) + return substrate_definition diff --git a/meltingpot/python/configs/substrates/prisoners_dilemma_in_the_matrix__repeated.py b/meltingpot/python/configs/substrates/prisoners_dilemma_in_the_matrix__repeated.py new file mode 100644 index 00000000..2bf4344f --- /dev/null +++ b/meltingpot/python/configs/substrates/prisoners_dilemma_in_the_matrix__repeated.py @@ -0,0 +1,605 @@ +# Copyright 2022 DeepMind Technologies Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Configuration for Prisoner's Dilemma in the Matrix (two player, repeated). + +Example video: https://youtu.be/AAd9UcP0nk0 + +See _Running with Scissors in the Matrix_ for a general description of the +game dynamics. Here the payoff matrix represents the Prisoner's Dilemma game. +`K = 2` resources represent "cooperate" and "defect" pure strategies. + +Players have a `5 x 5` observation window. + +The episode has a chance of ending stochastically on every 100 step interval +after step 1000. This usually allows time for 8 or more interactions. +""" + +from typing import Any, Dict, Mapping, Sequence, Tuple + +from ml_collections import config_dict + +from meltingpot.python.configs.substrates import the_matrix +from meltingpot.python.utils.substrates import colors +from meltingpot.python.utils.substrates import shapes +from meltingpot.python.utils.substrates import specs + +# The number of resources must match the (square) size of the matrix. +NUM_RESOURCES = 2 + +# This color is green. +RESOURCE1_COLOR = (30, 225, 185, 255) +RESOURCE1_HIGHLIGHT_COLOR = (98, 234, 206, 255) +RESOURCE1_COLOR_DATA = (RESOURCE1_COLOR, RESOURCE1_HIGHLIGHT_COLOR) +# This color is red. +RESOURCE2_COLOR = (225, 30, 70, 255) +RESOURCE2_HIGHLIGHT_COLOR = (234, 98, 126, 255) +RESOURCE2_COLOR_DATA = (RESOURCE2_COLOR, RESOURCE2_HIGHLIGHT_COLOR) + +ASCII_MAP = """ +WWWWWWWWWWWWWWWWWWWWWWW +Wn n nW +W WWW W W W WW W +W W 11a W a22 W W +Wn WW 11a W a22 WW nW +W 11a a22 W +W W +Wn WW WW n WW WWW nW +W W +W 22a W a11 W +Wn W 22a W a11 W nW +W W 22a W a11 WW W +W WWWW W W W WWW W +Wn n nW +WWWWWWWWWWWWWWWWWWWWWWW +""" + +_resource_names = [ + "resource_class1", # Cooperate + "resource_class2", # Defect +] + +# `prefab` determines which prefab game object to use for each `char` in the +# ascii map. +CHAR_PREFAB_MAP = { + "a": {"type": "choice", "list": _resource_names}, + "1": _resource_names[0], + "2": _resource_names[1], + "n": "spawn_point", + "W": "wall", +} + +_COMPASS = ["N", "E", "S", "W"] + +WALL = { + "name": "wall", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "wall", + "stateConfigs": [{ + "state": "wall", + "layer": "upperPhysical", + "sprite": "Wall", + }], + } + }, + { + "component": "Transform", + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["Wall"], + "spriteShapes": [shapes.WALL], + "palettes": [{"*": (95, 95, 95, 255), + "&": (100, 100, 100, 255), + "@": (109, 109, 109, 255), + "#": (152, 152, 152, 255)}], + "noRotates": [False] + } + }, + { + "component": "BeamBlocker", + "kwargs": { + "beamType": "gameInteraction" + } + }, + ] +} + +SPAWN_POINT = { + "name": "spawnPoint", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "spawnPoint", + "stateConfigs": [{ + "state": "spawnPoint", + "layer": "alternateLogic", + "groups": ["spawnPoints"] + }], + } + }, + { + "component": "Transform", + }, + ] +} + +# PLAYER_COLOR_PALETTES is a list with each entry specifying the color to use +# for the player at the corresponding index. +NUM_PLAYERS_UPPER_BOUND = 8 +PLAYER_COLOR_PALETTES = [] +for idx in range(NUM_PLAYERS_UPPER_BOUND): + PLAYER_COLOR_PALETTES.append(shapes.get_palette(colors.palette[idx])) + +# Primitive action components. +# pylint: disable=bad-whitespace +# pyformat: disable +NOOP = {"move": 0, "turn": 0, "interact": 0} +FORWARD = {"move": 1, "turn": 0, "interact": 0} +STEP_RIGHT = {"move": 2, "turn": 0, "interact": 0} +BACKWARD = {"move": 3, "turn": 0, "interact": 0} +STEP_LEFT = {"move": 4, "turn": 0, "interact": 0} +TURN_LEFT = {"move": 0, "turn": -1, "interact": 0} +TURN_RIGHT = {"move": 0, "turn": 1, "interact": 0} +INTERACT = {"move": 0, "turn": 0, "interact": 1} +# pyformat: enable +# pylint: enable=bad-whitespace + +ACTION_SET = ( + NOOP, + FORWARD, + BACKWARD, + STEP_LEFT, + STEP_RIGHT, + TURN_LEFT, + TURN_RIGHT, + INTERACT, +) + +TARGET_SPRITE_SELF = { + "name": "Self", + "shape": shapes.CUTE_AVATAR, + "palette": shapes.get_palette((50, 100, 200)), + "noRotate": True, +} + +TARGET_SPRITE_OTHER = { + "name": "Other", + "shape": shapes.CUTE_AVATAR, + "palette": shapes.get_palette((200, 100, 50)), + "noRotate": True, +} + + +def create_scene(): + """Creates the global scene.""" + scene = { + "name": "scene", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "scene", + "stateConfigs": [{ + "state": "scene", + }], + } + }, + { + "component": "Transform", + }, + { + "component": "TheMatrix", + "kwargs": { + # Prevent interaction before both interactors have collected + # at least one resource. + "disallowUnreadyInteractions": True, + "matrix": [ + # row player chooses a row of this matrix. + # C D + [3, 0], # C + [5, 1], # D + ], + "columnPlayerMatrix": [ + # column player chooses a column of this matrix. + # C D + [3, 5], # C + [0, 1], # D + ], + "resultIndicatorColorIntervals": [ + # red # yellow # green # blue # violet + (0.0, 1.0), (1.0, 2.0), (2.0, 3.0), (3.0, 4.0), (4.0, 5.0) + ], + } + }, + { + "component": "StochasticIntervalEpisodeEnding", + "kwargs": { + "minimumFramesPerEpisode": 1000, + "intervalLength": 100, # Set equal to unroll length. + "probabilityTerminationPerInterval": 0.1 + } + } + ] + } + return scene + + +def create_resource_prefab( + resource_id: int, + resource_shape: str, + resource_palette: Dict[str, Tuple[int, int, int, int]]): + """Creates resource prefab with provided resource_id, shape, and palette.""" + resource_name = "resource_class{}".format(resource_id) + resource_prefab = { + "name": resource_name, + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": resource_name, + "stateConfigs": [ + {"state": resource_name + "_wait", + "groups": ["resourceWaits"]}, + {"state": resource_name, + "layer": "lowerPhysical", + "sprite": resource_name + "_sprite"}, + ] + }, + }, + { + "component": "Transform", + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": [resource_name + "_sprite"], + "spriteShapes": [resource_shape], + "palettes": [resource_palette], + "noRotates": [True] + }, + }, + { + "component": "Resource", + "kwargs": { + "resourceClass": resource_id, + "visibleType": resource_name, + "waitState": resource_name + "_wait", + "regenerationRate": 0.02, + "regenerationDelay": 10, + }, + }, + { + "component": "Destroyable", + "kwargs": { + "waitState": resource_name + "_wait", + # It is possible to destroy resources but takes concerted + # effort to do so by zapping them `initialHealth` times. + "initialHealth": 3, + }, + }, + ] + } + return resource_prefab + + +def create_avatar_object( + player_idx: int, + all_source_sprite_names: Sequence[str], + target_sprite_self: Dict[str, Any], + target_sprite_other: Dict[str, Any], + turn_off_default_reward: bool = False) -> Dict[str, Any]: + """Create an avatar object given self vs other sprite data.""" + # Lua is 1-indexed. + lua_index = player_idx + 1 + + # Setup the self vs other sprite mapping. + source_sprite_self = "Avatar" + str(lua_index) + custom_sprite_map = {source_sprite_self: target_sprite_self["name"]} + for name in all_source_sprite_names: + if name != source_sprite_self: + custom_sprite_map[name] = target_sprite_other["name"] + + live_state_name = "player{}".format(lua_index) + avatar_object = { + "name": "avatar", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": live_state_name, + "stateConfigs": [ + {"state": live_state_name, + "layer": "upperPhysical", + "sprite": source_sprite_self, + "contact": "avatar", + "groups": ["players"]}, + + {"state": "playerWait", + "groups": ["playerWaits"]}, + ] + } + }, + { + "component": "Transform", + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "colored_square", + "spriteNames": [source_sprite_self], + # A white square should never be displayed. It will always be + # remapped since this is self vs other observation mode. + "spriteRGBColors": [(255, 255, 255, 255)], + } + }, + { + "component": "AdditionalSprites", + "kwargs": { + "renderMode": "ascii_shape", + "customSpriteNames": [target_sprite_self["name"], + target_sprite_other["name"]], + "customSpriteShapes": [target_sprite_self["shape"], + target_sprite_other["shape"]], + "customPalettes": [target_sprite_self["palette"], + target_sprite_other["palette"]], + "customNoRotates": [target_sprite_self["noRotate"], + target_sprite_other["noRotate"]], + } + }, + { + "component": "Avatar", + "kwargs": { + "index": lua_index, + "aliveState": live_state_name, + "waitState": "playerWait", + "speed": 1.0, + "spawnGroup": "spawnPoints", + "actionOrder": ["move", "turn", "interact"], + "actionSpec": { + "move": {"default": 0, "min": 0, "max": len(_COMPASS)}, + "turn": {"default": 0, "min": -1, "max": 1}, + "interact": {"default": 0, "min": 0, "max": 1}, + }, + "view": { + "left": 2, + "right": 2, + "forward": 3, + "backward": 1, + "centered": False + }, + "spriteMap": custom_sprite_map, + # The following kwarg makes it possible to get rewarded even + # on frames when an avatar is "dead". It is needed for in the + # matrix games in order to correctly handle the case of two + # players getting hit simultaneously by the same beam. + "skipWaitStateRewards": False, + } + }, + { + "component": "GameInteractionZapper", + "kwargs": { + "cooldownTime": 2, + "beamLength": 3, + "beamRadius": 1, + "framesTillRespawn": 5, + "numResources": NUM_RESOURCES, + "endEpisodeOnFirstInteraction": False, + # Reset both players' inventories after each interaction. + "reset_winner_inventory": True, + "reset_loser_inventory": True, + # Both players get removed after each interaction. + "losingPlayerDies": True, + "winningPlayerDies": True, + # `freezeOnInteraction` is the number of frames to display the + # interaction result indicator, freeze, and delay delivering + # all results of interacting. + "freezeOnInteraction": 16, + } + }, + { + "component": "ReadyToShootObservation", + "kwargs": { + "zapperComponent": "GameInteractionZapper", + } + }, + { + "component": "InventoryObserver", + "kwargs": { + } + }, + { + "component": "SpawnResourcesWhenAllPlayersZapped", + }, + { + "component": "Taste", + "kwargs": { + "mostTastyResourceClass": -1, # -1 indicates no preference. + # No resource is most tasty when mostTastyResourceClass == -1. + "mostTastyReward": 0.1, + } + }, + { + "component": "InteractionTaste", + "kwargs": { + "mostTastyResourceClass": -1, # -1 indicates no preference. + "zeroDefaultInteractionReward": turn_off_default_reward, + "extraReward": 1.0, + } + }, + { + "component": "LocationObserver", + "kwargs": { + "objectIsAvatar": True, + "alsoReportOrientation": True + } + }, + { + "component": "AvatarMetricReporter", + "kwargs": { + "metrics": [ + { + # Report the inventories of both players involved in + # an interaction on this frame formatted as + # (self inventory, partner inventory). + "name": "INTERACTION_INVENTORIES", + "type": "tensor.DoubleTensor", + "shape": (2, NUM_RESOURCES), + "component": "GameInteractionZapper", + "variable": "latest_interaction_inventories", + }, + *the_matrix.get_cumulant_metric_configs(NUM_RESOURCES), + ] + } + }, + ] + } + return avatar_object + + +def create_prefabs(): + """Returns a dictionary mapping names to template game objects.""" + prefabs = { + "wall": WALL, + "spawn_point": SPAWN_POINT, + } + prefabs["resource_class1"] = create_resource_prefab( + 1, shapes.BUTTON, {"*": RESOURCE1_COLOR_DATA[0], + "#": RESOURCE1_COLOR_DATA[1], + "x": (0, 0, 0, 0)}) + prefabs["resource_class2"] = create_resource_prefab( + 2, shapes.BUTTON, {"*": RESOURCE2_COLOR_DATA[0], + "#": RESOURCE2_COLOR_DATA[1], + "x": (0, 0, 0, 0)}) + return prefabs + + +def get_all_source_sprite_names(num_players): + all_source_sprite_names = [] + for player_idx in range(0, num_players): + # Lua is 1-indexed. + lua_index = player_idx + 1 + all_source_sprite_names.append("Avatar" + str(lua_index)) + + return all_source_sprite_names + + +def create_avatar_objects(num_players, + turn_off_default_reward: bool = False): + """Returns list of avatar objects of length 'num_players'.""" + all_source_sprite_names = get_all_source_sprite_names(num_players) + avatar_objects = [] + for player_idx in range(0, num_players): + game_object = create_avatar_object( + player_idx, + all_source_sprite_names, + TARGET_SPRITE_SELF, + TARGET_SPRITE_OTHER, + turn_off_default_reward=turn_off_default_reward) + avatar_objects.append(game_object) + readiness_marker = the_matrix.create_ready_to_interact_marker(player_idx) + avatar_objects.append(readiness_marker) + + return avatar_objects + + +def create_world_sprite_map( + num_players: int, target_sprite_other: Dict[str, Any]) -> Dict[str, str]: + all_source_sprite_names = get_all_source_sprite_names(num_players) + world_sprite_map = {} + for name in all_source_sprite_names: + world_sprite_map[name] = target_sprite_other["name"] + + return world_sprite_map + + +def get_config(): + """Default configuration.""" + config = config_dict.ConfigDict() + + # Other parameters that are useful to override in training config files. + config.turn_off_default_reward = False + + # Action set configuration. + config.action_set = ACTION_SET + # Observation format configuration. + config.individual_observation_names = [ + "RGB", + "INVENTORY", + "READY_TO_SHOOT", + # Debug only (do not use the following observations in policies). + "POSITION", + "ORIENTATION", + "INTERACTION_INVENTORIES", + ] + config.global_observation_names = [ + "WORLD.RGB", + ] + + # The specs of the environment (from a single-agent perspective). + config.action_spec = specs.action(len(ACTION_SET)) + config.timestep_spec = specs.timestep({ + "RGB": specs.rgb(40, 40), + "INVENTORY": specs.inventory(2), + "READY_TO_SHOOT": specs.OBSERVATION["READY_TO_SHOOT"], + # Debug only (do not use the following observations in policies). + "POSITION": specs.OBSERVATION["POSITION"], + "ORIENTATION": specs.OBSERVATION["ORIENTATION"], + "INTERACTION_INVENTORIES": specs.interaction_inventories(2), + "WORLD.RGB": specs.rgb(120, 184), + }) + + # The roles assigned to each player. + config.valid_roles = frozenset({"default"}) + config.default_player_roles = ("default",) * 2 + + return config + + +def build( + roles: Sequence[str], + config: config_dict.ConfigDict, +) -> Mapping[str, Any]: + """Build substrate definition given roles.""" + del config + num_players = len(roles) + # Build the rest of the substrate definition. + substrate_definition = dict( + levelName="the_matrix", + levelDirectory="meltingpot/lua/levels", + numPlayers=num_players, + # Define upper bound of episode length since episodes end stochastically. + maxEpisodeLengthFrames=5000, + spriteSize=8, + topology="BOUNDED", # Choose from ["BOUNDED", "TORUS"], + simulation={ + "map": ASCII_MAP, + "gameObjects": create_avatar_objects(num_players=num_players), + "scene": create_scene(), + "prefabs": create_prefabs(), + "charPrefabMap": CHAR_PREFAB_MAP, + # worldSpriteMap is needed to make the global view used in videos be + # be informative in cases where individual avatar views have had + # sprites remapped to one another (example: self vs other mode). + "worldSpriteMap": create_world_sprite_map(num_players, + TARGET_SPRITE_OTHER), + } + ) + return substrate_definition diff --git a/meltingpot/python/configs/substrates/pure_coordination_in_the_matrix.py b/meltingpot/python/configs/substrates/pure_coordination_in_the_matrix__arena.py similarity index 84% rename from meltingpot/python/configs/substrates/pure_coordination_in_the_matrix.py rename to meltingpot/python/configs/substrates/pure_coordination_in_the_matrix__arena.py index 7f243738..d71d3d26 100644 --- a/meltingpot/python/configs/substrates/pure_coordination_in_the_matrix.py +++ b/meltingpot/python/configs/substrates/pure_coordination_in_the_matrix__arena.py @@ -1,4 +1,4 @@ -# Copyright 2020 DeepMind Technologies Limited. +# Copyright 2022 DeepMind Technologies Limited. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,7 +13,7 @@ # limitations under the License. """Configuration for Pure Coordination in the Matrix. -Example video: https://youtu.be/5G9M7rGI68I +Example video: https://youtu.be/LG_qvqujxPU See _Running with Scissors in the Matrix_ for a general description of the game dynamics. Here the payoff matrix represents a pure coordination game with @@ -24,10 +24,11 @@ Both players are removed and their inventories are reset after each interaction. """ -import copy -from typing import Any, Dict, Iterable, List, Tuple +from typing import Any, Dict, Mapping, Sequence from ml_collections import config_dict + +from meltingpot.python.configs.substrates import the_matrix from meltingpot.python.utils.substrates import colors from meltingpot.python.utils.substrates import game_object_utils from meltingpot.python.utils.substrates import shapes @@ -53,7 +54,7 @@ # The procedural generator replaces all 'a' chars in the default map with chars # representing specific resources, i.e. with either '1' or '2'. -DEFAULT_ASCII_MAP = """ +ASCII_MAP = """ WWWWWWWWWWWWWWWWWWWWWWWWW WPPPP W W PPPPW WPPPP PPPPW @@ -115,10 +116,6 @@ }, { "component": "Transform", - "kwargs": { - "position": (0, 0), - "orientation": "N" - } }, { "component": "Appearance", @@ -158,10 +155,6 @@ }, { "component": "Transform", - "kwargs": { - "position": (0, 0), - "orientation": "N" - } }, ] } @@ -229,23 +222,33 @@ def create_scene(): }, { "component": "Transform", - "kwargs": { - "position": (0, 0), - "orientation": "N" - }, }, { "component": "TheMatrix", "kwargs": { - "zero_initial_inventory": True, + # Prevent interaction before both interactors have collected + # at least one resource. + "disallowUnreadyInteractions": True, "matrix": [ # 1 2 3 [1, 0, 0], # 1 [0, 1, 0], # 2 [0, 0, 1] # 3 ], + "resultIndicatorColorIntervals": [ + # red # yellow # green # blue # violet + (0.0, 0.2), (0.2, 0.4), (0.4, 0.6), (0.6, 0.8), (0.8, 1.0) + ], } }, + { + "component": "StochasticIntervalEpisodeEnding", + "kwargs": { + "minimumFramesPerEpisode": 1000, + "intervalLength": 100, # Set equal to unroll length. + "probabilityTerminationPerInterval": 0.2 + } + } ] } return scene @@ -272,10 +275,6 @@ def create_resource_prefab(resource_id, color_data): }, { "component": "Transform", - "kwargs": { - "position": (0, 0), - "orientation": "N" - }, }, { "component": "Appearance", @@ -295,17 +294,17 @@ def create_resource_prefab(resource_id, color_data): "resourceClass": resource_id, "visibleType": resource_name, "waitState": resource_name + "_wait", - "groupToRespawn": "resourceWaits", - "regenerationRate": 0.005, - "regenerationDelay": 50 + "regenerationRate": 0.04, + "regenerationDelay": 10, }, }, { "component": "Destroyable", "kwargs": { - "visibleType": resource_name, "waitState": resource_name + "_wait", - "initialHealth": 1, + # It is possible to destroy resources but takes concerted + # effort to do so by zapping them `initialHealth` times. + "initialHealth": 3, }, }, ] @@ -330,7 +329,7 @@ def create_prefabs() -> PrefabConfig: def create_avatar_object(player_idx: int, - all_source_sprite_names: List[str], + all_source_sprite_names: Sequence[str], target_sprite_self: Dict[str, Any], target_sprite_other: Dict[str, Any]) -> Dict[str, Any]: """Create an avatar object given self vs other sprite data.""" @@ -366,10 +365,6 @@ def create_avatar_object(player_idx: int, }, { "component": "Transform", - "kwargs": { - "position": (0, 0), - "orientation": "N" - } }, { "component": "Appearance", @@ -427,15 +422,22 @@ def create_avatar_object(player_idx: int, { "component": "GameInteractionZapper", "kwargs": { - "cooldownTime": 32, + "cooldownTime": 2, "beamLength": 3, "beamRadius": 1, - "framesTillRespawn": 200, + "framesTillRespawn": 50, "numResources": NUM_RESOURCES, + "endEpisodeOnFirstInteraction": False, + # Reset both players' inventories after each interaction. "reset_winner_inventory": True, "reset_loser_inventory": True, + # Both players get removed after each interaction. "losingPlayerDies": True, "winningPlayerDies": True, + # `freezeOnInteraction` is the number of frames to display the + # interaction result indicator, freeze, and delay delivering + # all results of interacting. + "freezeOnInteraction": 16, } }, { @@ -453,13 +455,15 @@ def create_avatar_object(player_idx: int, "component": "Taste", "kwargs": { "mostTastyResourceClass": -1, # -1 indicates no preference. + # No resource is most tasty when mostTastyResourceClass == -1. + "mostTastyReward": 0.1, } }, { "component": "InteractionTaste", "kwargs": { "mostTastyResourceClass": -1, # -1 indicates no preference. - "zeroDefaultInteractionReward": True, + "zeroDefaultInteractionReward": False, "extraReward": 1.0, } }, @@ -484,6 +488,7 @@ def create_avatar_object(player_idx: int, "component": "GameInteractionZapper", "variable": "latest_interaction_inventories", }, + *the_matrix.get_cumulant_metric_configs(NUM_RESOURCES), ] } }, @@ -513,6 +518,8 @@ def create_avatar_objects(num_players): TARGET_SPRITE_SELF, TARGET_SPRITE_OTHER) avatar_objects.append(game_object) + readiness_marker = the_matrix.create_ready_to_interact_marker(player_idx) + avatar_objects.append(readiness_marker) return avatar_objects @@ -527,48 +534,10 @@ def create_world_sprite_map( return world_sprite_map -def create_lab2d_settings( - num_players: int, - ascii_map_string: str, - settings_overrides: Iterable[Tuple[str, Any]] = ()) -> Dict[str, Any]: - """Returns the lab2d settings. - - Args: - num_players: (int) the number of players. - ascii_map_string: ascii map. - settings_overrides: (key, value) overrides for default settings. - """ - settings = { - "levelName": "the_matrix", - "levelDirectory": "meltingpot/lua/levels", - "numPlayers": num_players, - "maxEpisodeLengthFrames": 1000, - "spriteSize": 8, - "simulation": { - "map": ascii_map_string, - "gameObjects": create_avatar_objects(num_players=num_players), - "scene": copy.deepcopy(create_scene()), - "prefabs": create_prefabs(), - "charPrefabMap": CHAR_PREFAB_MAP, - # worldSpriteMap is needed to make the global view used in videos be - # be informative in cases where individual avatar views have had - # sprites remapped to one another (example: self vs other mode). - "worldSpriteMap": create_world_sprite_map(num_players, - TARGET_SPRITE_OTHER), - } - } - settings.update(settings_overrides) - return settings - - -def get_config(factory=create_lab2d_settings): - """Default config for pure coordination in the matrix.""" +def get_config(): + """Default configuration.""" config = config_dict.ConfigDict() - # Basic configuration. - config.num_players = 8 - config.lab2d_settings = factory(config.num_players, DEFAULT_ASCII_MAP) - # Action set configuration. config.action_set = ACTION_SET # Observation format configuration. @@ -576,6 +545,7 @@ def get_config(factory=create_lab2d_settings): "RGB", "INVENTORY", "READY_TO_SHOOT", + # Debug only (do not use the following observations in policies). "POSITION", "ORIENTATION", "INTERACTION_INVENTORIES", @@ -590,10 +560,47 @@ def get_config(factory=create_lab2d_settings): "RGB": specs.OBSERVATION["RGB"], "INVENTORY": specs.inventory(3), "READY_TO_SHOOT": specs.OBSERVATION["READY_TO_SHOOT"], + # Debug only (do not use the following observations in policies). "POSITION": specs.OBSERVATION["POSITION"], "ORIENTATION": specs.OBSERVATION["ORIENTATION"], "INTERACTION_INVENTORIES": specs.interaction_inventories(3), "WORLD.RGB": specs.rgb(192, 200), }) + # The roles assigned to each player. + config.valid_roles = frozenset({"default"}) + config.default_player_roles = ("default",) * 8 + return config + + +def build( + roles: Sequence[str], + config: config_dict.ConfigDict, +) -> Mapping[str, Any]: + """Build substrate definition given roles.""" + del config + num_players = len(roles) + # Build the rest of the substrate definition. + substrate_definition = dict( + levelName="the_matrix", + levelDirectory="meltingpot/lua/levels", + numPlayers=num_players, + # Define upper bound of episode length since episodes end stochastically. + maxEpisodeLengthFrames=5000, + spriteSize=8, + topology="BOUNDED", # Choose from ["BOUNDED", "TORUS"], + simulation={ + "map": ASCII_MAP, + "gameObjects": create_avatar_objects(num_players=num_players), + "scene": create_scene(), + "prefabs": create_prefabs(), + "charPrefabMap": CHAR_PREFAB_MAP, + # worldSpriteMap is needed to make the global view used in videos be + # be informative in cases where individual avatar views have had + # sprites remapped to one another (example: self vs other mode). + "worldSpriteMap": create_world_sprite_map(num_players, + TARGET_SPRITE_OTHER), + } + ) + return substrate_definition diff --git a/meltingpot/python/configs/substrates/pure_coordination_in_the_matrix__repeated.py b/meltingpot/python/configs/substrates/pure_coordination_in_the_matrix__repeated.py new file mode 100644 index 00000000..079e7ff9 --- /dev/null +++ b/meltingpot/python/configs/substrates/pure_coordination_in_the_matrix__repeated.py @@ -0,0 +1,610 @@ +# Copyright 2022 DeepMind Technologies Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Configuration for Pure Coordination in the Matrix (2 player, repeated). + +Example video: https://youtu.be/biyhB378q58 + +See _Running with Scissors in the Matrix_ for a general description of the +game dynamics. Here the payoff matrix represents a pure coordination game. +`K = 3`, three different resources corresponding to different coordination +outcomes. + +Players have a `5 x 5` observation window. + +The episode has a chance of ending stochastically on every 100 step interval +after step 1000. This usually allows time for 8 or more interactions. +""" + +from typing import Any, Dict, Mapping, Sequence, Tuple + +from ml_collections import config_dict + +from meltingpot.python.configs.substrates import the_matrix +from meltingpot.python.utils.substrates import colors +from meltingpot.python.utils.substrates import shapes +from meltingpot.python.utils.substrates import specs + +# The number of resources must match the (square) size of the matrix. +NUM_RESOURCES = 3 + +# This color is red. +RESOURCE1_COLOR = (150, 0, 0, 255) +RESOURCE1_HIGHLIGHT_COLOR = (200, 0, 0, 255) +RESOURCE1_COLOR_DATA = (RESOURCE1_COLOR, RESOURCE1_HIGHLIGHT_COLOR) +# This color is green. +RESOURCE2_COLOR = (0, 150, 0, 255) +RESOURCE2_HIGHLIGHT_COLOR = (0, 200, 0, 255) +RESOURCE2_COLOR_DATA = (RESOURCE2_COLOR, RESOURCE2_HIGHLIGHT_COLOR) +# This color is blue. +RESOURCE3_COLOR = (0, 0, 150, 255) +RESOURCE3_HIGHLIGHT_COLOR = (0, 0, 200, 255) +RESOURCE3_COLOR_DATA = (RESOURCE3_COLOR, RESOURCE3_HIGHLIGHT_COLOR) + +ASCII_MAP = """ +WWWWWWWWWWWWWWWWWWWWWWW +Wn n nW +W WWW W W WW W +W W rra app W W +Wn WW rra app WW nW +W rra app W +W W +Wn WW n nW +W WWWW W +W ssa W W +Wn W ssa W aaa W nW +W W ssa W aaa WW W +W WWWW W W W WWW W +Wn n nW +WWWWWWWWWWWWWWWWWWWWWWW +""" + +_resource_names = [ + "resource_class1", + "resource_class2", + "resource_class3", +] + +# `prefab` determines which prefab game object to use for each `char` in the +# ascii map. +CHAR_PREFAB_MAP = { + "a": {"type": "choice", "list": _resource_names}, + "r": _resource_names[0], + "p": _resource_names[1], + "s": _resource_names[2], + "n": "spawn_point", + "W": "wall", +} + +_COMPASS = ["N", "E", "S", "W"] + +WALL = { + "name": "wall", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "wall", + "stateConfigs": [{ + "state": "wall", + "layer": "upperPhysical", + "sprite": "Wall", + }], + } + }, + { + "component": "Transform", + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["Wall"], + "spriteShapes": [shapes.WALL], + "palettes": [{"*": (95, 95, 95, 255), + "&": (100, 100, 100, 255), + "@": (109, 109, 109, 255), + "#": (152, 152, 152, 255)}], + "noRotates": [False] + } + }, + { + "component": "BeamBlocker", + "kwargs": { + "beamType": "gameInteraction" + } + }, + ] +} + +SPAWN_POINT = { + "name": "spawnPoint", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "spawnPoint", + "stateConfigs": [{ + "state": "spawnPoint", + "layer": "alternateLogic", + "groups": ["spawnPoints"] + }], + } + }, + { + "component": "Transform", + }, + ] +} + +# PLAYER_COLOR_PALETTES is a list with each entry specifying the color to use +# for the player at the corresponding index. +NUM_PLAYERS_UPPER_BOUND = 8 +PLAYER_COLOR_PALETTES = [] +for idx in range(NUM_PLAYERS_UPPER_BOUND): + PLAYER_COLOR_PALETTES.append(shapes.get_palette(colors.palette[idx])) + +# Primitive action components. +# pylint: disable=bad-whitespace +# pyformat: disable +NOOP = {"move": 0, "turn": 0, "interact": 0} +FORWARD = {"move": 1, "turn": 0, "interact": 0} +STEP_RIGHT = {"move": 2, "turn": 0, "interact": 0} +BACKWARD = {"move": 3, "turn": 0, "interact": 0} +STEP_LEFT = {"move": 4, "turn": 0, "interact": 0} +TURN_LEFT = {"move": 0, "turn": -1, "interact": 0} +TURN_RIGHT = {"move": 0, "turn": 1, "interact": 0} +INTERACT = {"move": 0, "turn": 0, "interact": 1} +# pyformat: enable +# pylint: enable=bad-whitespace + +ACTION_SET = ( + NOOP, + FORWARD, + BACKWARD, + STEP_LEFT, + STEP_RIGHT, + TURN_LEFT, + TURN_RIGHT, + INTERACT, +) + +TARGET_SPRITE_SELF = { + "name": "Self", + "shape": shapes.CUTE_AVATAR, + "palette": shapes.get_palette((50, 100, 200)), + "noRotate": True, +} + +TARGET_SPRITE_OTHER = { + "name": "Other", + "shape": shapes.CUTE_AVATAR, + "palette": shapes.get_palette((200, 100, 50)), + "noRotate": True, +} + + +def create_scene(): + """Creates the global scene.""" + scene = { + "name": "scene", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "scene", + "stateConfigs": [{ + "state": "scene", + }], + } + }, + { + "component": "Transform", + }, + { + "component": "TheMatrix", + "kwargs": { + # Prevent interaction before both interactors have collected + # at least one resource. + "disallowUnreadyInteractions": True, + "matrix": [ + # 1 2 3 + [1, 0, 0], # 1 + [0, 1, 0], # 2 + [0, 0, 1] # 3 + ], + "resultIndicatorColorIntervals": [ + # red # yellow # green # blue # violet + (0.0, 0.2), (0.2, 0.4), (0.4, 0.6), (0.6, 0.8), (0.8, 1.0) + ], + } + }, + { + "component": "StochasticIntervalEpisodeEnding", + "kwargs": { + "minimumFramesPerEpisode": 1000, + "intervalLength": 100, # Set equal to unroll length. + "probabilityTerminationPerInterval": 0.1 + } + } + ] + } + return scene + + +def create_resource_prefab( + resource_id: int, + resource_shape: str, + resource_palette: Dict[str, Tuple[int, int, int, int]]): + """Creates resource prefab with provided resource_id, shape, and palette.""" + resource_name = "resource_class{}".format(resource_id) + resource_prefab = { + "name": resource_name, + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": resource_name, + "stateConfigs": [ + {"state": resource_name + "_wait", + "groups": ["resourceWaits"]}, + {"state": resource_name, + "layer": "lowerPhysical", + "sprite": resource_name + "_sprite"}, + ] + }, + }, + { + "component": "Transform", + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": [resource_name + "_sprite"], + "spriteShapes": [resource_shape], + "palettes": [resource_palette], + "noRotates": [True] + }, + }, + { + "component": "Resource", + "kwargs": { + "resourceClass": resource_id, + "visibleType": resource_name, + "waitState": resource_name + "_wait", + "regenerationRate": 0.02, + "regenerationDelay": 10, + }, + }, + { + "component": "Destroyable", + "kwargs": { + "waitState": resource_name + "_wait", + # It is possible to destroy resources but takes concerted + # effort to do so by zapping them `initialHealth` times. + "initialHealth": 3, + }, + }, + ] + } + return resource_prefab + + +def create_avatar_object( + player_idx: int, + all_source_sprite_names: Sequence[str], + target_sprite_self: Dict[str, Any], + target_sprite_other: Dict[str, Any], + turn_off_default_reward: bool = False) -> Dict[str, Any]: + """Create an avatar object given self vs other sprite data.""" + # Lua is 1-indexed. + lua_index = player_idx + 1 + + # Setup the self vs other sprite mapping. + source_sprite_self = "Avatar" + str(lua_index) + custom_sprite_map = {source_sprite_self: target_sprite_self["name"]} + for name in all_source_sprite_names: + if name != source_sprite_self: + custom_sprite_map[name] = target_sprite_other["name"] + + live_state_name = "player{}".format(lua_index) + avatar_object = { + "name": "avatar", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": live_state_name, + "stateConfigs": [ + {"state": live_state_name, + "layer": "upperPhysical", + "sprite": source_sprite_self, + "contact": "avatar", + "groups": ["players"]}, + + {"state": "playerWait", + "groups": ["playerWaits"]}, + ] + } + }, + { + "component": "Transform", + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "colored_square", + "spriteNames": [source_sprite_self], + # A white square should never be displayed. It will always be + # remapped since this is self vs other observation mode. + "spriteRGBColors": [(255, 255, 255, 255)], + } + }, + { + "component": "AdditionalSprites", + "kwargs": { + "renderMode": "ascii_shape", + "customSpriteNames": [target_sprite_self["name"], + target_sprite_other["name"]], + "customSpriteShapes": [target_sprite_self["shape"], + target_sprite_other["shape"]], + "customPalettes": [target_sprite_self["palette"], + target_sprite_other["palette"]], + "customNoRotates": [target_sprite_self["noRotate"], + target_sprite_other["noRotate"]], + } + }, + { + "component": "Avatar", + "kwargs": { + "index": lua_index, + "aliveState": live_state_name, + "waitState": "playerWait", + "speed": 1.0, + "spawnGroup": "spawnPoints", + "actionOrder": ["move", "turn", "interact"], + "actionSpec": { + "move": {"default": 0, "min": 0, "max": len(_COMPASS)}, + "turn": {"default": 0, "min": -1, "max": 1}, + "interact": {"default": 0, "min": 0, "max": 1}, + }, + "view": { + "left": 2, + "right": 2, + "forward": 3, + "backward": 1, + "centered": False + }, + "spriteMap": custom_sprite_map, + # The following kwarg makes it possible to get rewarded even + # on frames when an avatar is "dead". It is needed for in the + # matrix games in order to correctly handle the case of two + # players getting hit simultaneously by the same beam. + "skipWaitStateRewards": False, + } + }, + { + "component": "GameInteractionZapper", + "kwargs": { + "cooldownTime": 2, + "beamLength": 3, + "beamRadius": 1, + "framesTillRespawn": 5, + "numResources": NUM_RESOURCES, + "endEpisodeOnFirstInteraction": False, + # Reset both players' inventories after each interaction. + "reset_winner_inventory": True, + "reset_loser_inventory": True, + # Both players get removed after each interaction. + "losingPlayerDies": True, + "winningPlayerDies": True, + # `freezeOnInteraction` is the number of frames to display the + # interaction result indicator, freeze, and delay delivering + # all results of interacting. + "freezeOnInteraction": 16, + } + }, + { + "component": "ReadyToShootObservation", + "kwargs": { + "zapperComponent": "GameInteractionZapper", + } + }, + { + "component": "InventoryObserver", + "kwargs": { + } + }, + { + "component": "SpawnResourcesWhenAllPlayersZapped", + }, + { + "component": "Taste", + "kwargs": { + "mostTastyResourceClass": -1, # -1 indicates no preference. + # No resource is most tasty when mostTastyResourceClass == -1. + "mostTastyReward": 0.1, + } + }, + { + "component": "InteractionTaste", + "kwargs": { + "mostTastyResourceClass": -1, # -1 indicates no preference. + "zeroDefaultInteractionReward": turn_off_default_reward, + "extraReward": 1.0, + } + }, + { + "component": "LocationObserver", + "kwargs": { + "objectIsAvatar": True, + "alsoReportOrientation": True + } + }, + { + "component": "AvatarMetricReporter", + "kwargs": { + "metrics": [ + { + # Report the inventories of both players involved in + # an interaction on this frame formatted as + # (self inventory, partner inventory). + "name": "INTERACTION_INVENTORIES", + "type": "tensor.DoubleTensor", + "shape": (2, NUM_RESOURCES), + "component": "GameInteractionZapper", + "variable": "latest_interaction_inventories", + }, + *the_matrix.get_cumulant_metric_configs(NUM_RESOURCES), + ] + } + }, + ] + } + return avatar_object + + +def create_prefabs(): + """Returns a dictionary mapping names to template game objects.""" + prefabs = { + "wall": WALL, + "spawn_point": SPAWN_POINT, + } + prefabs["resource_class1"] = create_resource_prefab( + 1, shapes.BUTTON, {"*": RESOURCE1_COLOR_DATA[0], + "#": RESOURCE1_COLOR_DATA[1], + "x": (0, 0, 0, 0)}) + prefabs["resource_class2"] = create_resource_prefab( + 2, shapes.BUTTON, {"*": RESOURCE2_COLOR_DATA[0], + "#": RESOURCE2_COLOR_DATA[1], + "x": (0, 0, 0, 0)}) + prefabs["resource_class3"] = create_resource_prefab( + 3, shapes.BUTTON, {"*": RESOURCE3_COLOR_DATA[0], + "#": RESOURCE3_COLOR_DATA[1], + "x": (0, 0, 0, 0)}) + return prefabs + + +def get_all_source_sprite_names(num_players): + all_source_sprite_names = [] + for player_idx in range(0, num_players): + # Lua is 1-indexed. + lua_index = player_idx + 1 + all_source_sprite_names.append("Avatar" + str(lua_index)) + + return all_source_sprite_names + + +def create_avatar_objects(num_players, + turn_off_default_reward: bool = False): + """Returns list of avatar objects of length 'num_players'.""" + all_source_sprite_names = get_all_source_sprite_names(num_players) + avatar_objects = [] + for player_idx in range(0, num_players): + game_object = create_avatar_object( + player_idx, + all_source_sprite_names, + TARGET_SPRITE_SELF, + TARGET_SPRITE_OTHER, + turn_off_default_reward=turn_off_default_reward) + avatar_objects.append(game_object) + readiness_marker = the_matrix.create_ready_to_interact_marker(player_idx) + avatar_objects.append(readiness_marker) + + return avatar_objects + + +def create_world_sprite_map( + num_players: int, target_sprite_other: Dict[str, Any]) -> Dict[str, str]: + all_source_sprite_names = get_all_source_sprite_names(num_players) + world_sprite_map = {} + for name in all_source_sprite_names: + world_sprite_map[name] = target_sprite_other["name"] + + return world_sprite_map + + +def get_config(): + """Default configuration.""" + config = config_dict.ConfigDict() + + # Other parameters that are useful to override in training config files. + config.turn_off_default_reward = False + + # Action set configuration. + config.action_set = ACTION_SET + # Observation format configuration. + config.individual_observation_names = [ + "RGB", + "INVENTORY", + "READY_TO_SHOOT", + # Debug only (do not use the following observations in policies). + "POSITION", + "ORIENTATION", + "INTERACTION_INVENTORIES", + ] + config.global_observation_names = [ + "WORLD.RGB", + ] + + # The specs of the environment (from a single-agent perspective). + config.action_spec = specs.action(len(ACTION_SET)) + config.timestep_spec = specs.timestep({ + "RGB": specs.rgb(40, 40), + "INVENTORY": specs.inventory(3), + "READY_TO_SHOOT": specs.OBSERVATION["READY_TO_SHOOT"], + # Debug only (do not use the following observations in policies). + "POSITION": specs.OBSERVATION["POSITION"], + "ORIENTATION": specs.OBSERVATION["ORIENTATION"], + "INTERACTION_INVENTORIES": specs.interaction_inventories(3), + "WORLD.RGB": specs.rgb(120, 184), + }) + + # The roles assigned to each player. + config.valid_roles = frozenset({"default"}) + config.default_player_roles = ("default",) * 2 + + return config + + +def build( + roles: Sequence[str], + config: config_dict.ConfigDict, +) -> Mapping[str, Any]: + """Build substrate definition given roles.""" + del config + num_players = len(roles) + # Build the rest of the substrate definition. + substrate_definition = dict( + levelName="the_matrix", + levelDirectory="meltingpot/lua/levels", + numPlayers=num_players, + # Define upper bound of episode length since episodes end stochastically. + maxEpisodeLengthFrames=5000, + spriteSize=8, + topology="BOUNDED", # Choose from ["BOUNDED", "TORUS"], + simulation={ + "map": ASCII_MAP, + "gameObjects": create_avatar_objects(num_players=num_players), + "scene": create_scene(), + "prefabs": create_prefabs(), + "charPrefabMap": CHAR_PREFAB_MAP, + # worldSpriteMap is needed to make the global view used in videos be + # be informative in cases where individual avatar views have had + # sprites remapped to one another (example: self vs other mode). + "worldSpriteMap": create_world_sprite_map(num_players, + TARGET_SPRITE_OTHER), + } + ) + return substrate_definition diff --git a/meltingpot/python/configs/substrates/rationalizable_coordination_in_the_matrix.py b/meltingpot/python/configs/substrates/rationalizable_coordination_in_the_matrix__arena.py similarity index 84% rename from meltingpot/python/configs/substrates/rationalizable_coordination_in_the_matrix.py rename to meltingpot/python/configs/substrates/rationalizable_coordination_in_the_matrix__arena.py index 9621b8c4..da40c310 100644 --- a/meltingpot/python/configs/substrates/rationalizable_coordination_in_the_matrix.py +++ b/meltingpot/python/configs/substrates/rationalizable_coordination_in_the_matrix__arena.py @@ -1,4 +1,4 @@ -# Copyright 2020 DeepMind Technologies Limited. +# Copyright 2022 DeepMind Technologies Limited. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,7 +13,7 @@ # limitations under the License. """Configuration for Rationalizable Coordination in the Matrix. -Example video: https://youtu.be/BpHpoir06mY +Example video: https://youtu.be/IXakuZhvrxo See _Running with Scissors in the Matrix_ for a general description of the game dynamics. Here the payoff matrix represents a coordination game with @@ -26,10 +26,11 @@ Both players are removed and their inventories are reset after each interaction. """ -import copy -from typing import Any, Dict, Iterable, List, Tuple +from typing import Any, Dict, Mapping, Sequence from ml_collections import config_dict + +from meltingpot.python.configs.substrates import the_matrix from meltingpot.python.utils.substrates import colors from meltingpot.python.utils.substrates import game_object_utils from meltingpot.python.utils.substrates import shapes @@ -55,7 +56,7 @@ # The procedural generator replaces all 'a' chars in the default map with chars # representing specific resources, i.e. with either '1' or '2'. -DEFAULT_ASCII_MAP = """ +ASCII_MAP = """ WWWWWWWWWWWWWWWWWWWWWWWWW WPPPP W W PPPPW WPPPP PPPPW @@ -117,10 +118,6 @@ }, { "component": "Transform", - "kwargs": { - "position": (0, 0), - "orientation": "N" - } }, { "component": "Appearance", @@ -160,10 +157,6 @@ }, { "component": "Transform", - "kwargs": { - "position": (0, 0), - "orientation": "N" - } }, ] } @@ -231,23 +224,33 @@ def create_scene(): }, { "component": "Transform", - "kwargs": { - "position": (0, 0), - "orientation": "N" - }, }, { "component": "TheMatrix", "kwargs": { - "zero_initial_inventory": True, + # Prevent interaction before both interactors have collected + # at least one resource. + "disallowUnreadyInteractions": True, "matrix": [ # 1 2 3 [1, 0, 0], # 1 [0, 2, 0], # 2 [0, 0, 3] # 3 ], + "resultIndicatorColorIntervals": [ + # red # yellow # green # blue + (0.0, 0.5), (0.5, 1.5), (1.5, 2.5), (2.5, 3.0) + ], } }, + { + "component": "StochasticIntervalEpisodeEnding", + "kwargs": { + "minimumFramesPerEpisode": 1000, + "intervalLength": 100, # Set equal to unroll length. + "probabilityTerminationPerInterval": 0.2 + } + } ] } return scene @@ -274,10 +277,6 @@ def create_resource_prefab(resource_id, color_data): }, { "component": "Transform", - "kwargs": { - "position": (0, 0), - "orientation": "N" - }, }, { "component": "Appearance", @@ -297,17 +296,17 @@ def create_resource_prefab(resource_id, color_data): "resourceClass": resource_id, "visibleType": resource_name, "waitState": resource_name + "_wait", - "groupToRespawn": "resourceWaits", - "regenerationRate": 0.005, - "regenerationDelay": 50 + "regenerationRate": 0.04, + "regenerationDelay": 10, }, }, { "component": "Destroyable", "kwargs": { - "visibleType": resource_name, "waitState": resource_name + "_wait", - "initialHealth": 1, + # It is possible to destroy resources but takes concerted + # effort to do so by zapping them `initialHealth` times. + "initialHealth": 3, }, }, ] @@ -332,7 +331,7 @@ def create_prefabs() -> PrefabConfig: def create_avatar_object(player_idx: int, - all_source_sprite_names: List[str], + all_source_sprite_names: Sequence[str], target_sprite_self: Dict[str, Any], target_sprite_other: Dict[str, Any]) -> Dict[str, Any]: """Create an avatar object given self vs other sprite data.""" @@ -368,10 +367,6 @@ def create_avatar_object(player_idx: int, }, { "component": "Transform", - "kwargs": { - "position": (0, 0), - "orientation": "N" - } }, { "component": "Appearance", @@ -429,15 +424,22 @@ def create_avatar_object(player_idx: int, { "component": "GameInteractionZapper", "kwargs": { - "cooldownTime": 32, + "cooldownTime": 2, "beamLength": 3, "beamRadius": 1, - "framesTillRespawn": 200, + "framesTillRespawn": 50, "numResources": NUM_RESOURCES, + "endEpisodeOnFirstInteraction": False, + # Reset both players' inventories after each interaction. "reset_winner_inventory": True, "reset_loser_inventory": True, + # Both players get removed after each interaction. "losingPlayerDies": True, "winningPlayerDies": True, + # `freezeOnInteraction` is the number of frames to display the + # interaction result indicator, freeze, and delay delivering + # all results of interacting. + "freezeOnInteraction": 16, } }, { @@ -455,13 +457,15 @@ def create_avatar_object(player_idx: int, "component": "Taste", "kwargs": { "mostTastyResourceClass": -1, # -1 indicates no preference. + # No resource is most tasty when mostTastyResourceClass == -1. + "mostTastyReward": 0.1, } }, { "component": "InteractionTaste", "kwargs": { "mostTastyResourceClass": -1, # -1 indicates no preference. - "zeroDefaultInteractionReward": True, + "zeroDefaultInteractionReward": False, "extraReward": 1.0, } }, @@ -486,6 +490,7 @@ def create_avatar_object(player_idx: int, "component": "GameInteractionZapper", "variable": "latest_interaction_inventories", }, + *the_matrix.get_cumulant_metric_configs(NUM_RESOURCES), ] } }, @@ -515,6 +520,8 @@ def create_avatar_objects(num_players): TARGET_SPRITE_SELF, TARGET_SPRITE_OTHER) avatar_objects.append(game_object) + readiness_marker = the_matrix.create_ready_to_interact_marker(player_idx) + avatar_objects.append(readiness_marker) return avatar_objects @@ -529,48 +536,10 @@ def create_world_sprite_map( return world_sprite_map -def create_lab2d_settings( - num_players: int, - ascii_map_string: str, - settings_overrides: Iterable[Tuple[str, Any]] = ()) -> Dict[str, Any]: - """Returns the lab2d settings. - - Args: - num_players: (int) the number of players. - ascii_map_string: ascii map. - settings_overrides: (key, value) overrides for default settings. - """ - settings = { - "levelName": "the_matrix", - "levelDirectory": "meltingpot/lua/levels", - "numPlayers": num_players, - "maxEpisodeLengthFrames": 1000, - "spriteSize": 8, - "simulation": { - "map": ascii_map_string, - "gameObjects": create_avatar_objects(num_players=num_players), - "scene": copy.deepcopy(create_scene()), - "prefabs": create_prefabs(), - "charPrefabMap": CHAR_PREFAB_MAP, - # worldSpriteMap is needed to make the global view used in videos be - # be informative in cases where individual avatar views have had - # sprites remapped to one another (example: self vs other mode). - "worldSpriteMap": create_world_sprite_map(num_players, - TARGET_SPRITE_OTHER), - } - } - settings.update(settings_overrides) - return settings - - -def get_config(factory=create_lab2d_settings): - """Default config for rationalizable coordination in the matrix.""" +def get_config(): + """Default configuration.""" config = config_dict.ConfigDict() - # Basic configuration. - config.num_players = 8 - config.lab2d_settings = factory(config.num_players, DEFAULT_ASCII_MAP) - # Action set configuration. config.action_set = ACTION_SET # Observation format configuration. @@ -578,6 +547,7 @@ def get_config(factory=create_lab2d_settings): "RGB", "INVENTORY", "READY_TO_SHOOT", + # Debug only (do not use the following observations in policies). "POSITION", "ORIENTATION", "INTERACTION_INVENTORIES", @@ -592,10 +562,47 @@ def get_config(factory=create_lab2d_settings): "RGB": specs.OBSERVATION["RGB"], "INVENTORY": specs.inventory(3), "READY_TO_SHOOT": specs.OBSERVATION["READY_TO_SHOOT"], + # Debug only (do not use the following observations in policies). "POSITION": specs.OBSERVATION["POSITION"], "ORIENTATION": specs.OBSERVATION["ORIENTATION"], "INTERACTION_INVENTORIES": specs.interaction_inventories(3), "WORLD.RGB": specs.rgb(192, 200), }) + # The roles assigned to each player. + config.valid_roles = frozenset({"default"}) + config.default_player_roles = ("default",) * 8 + return config + + +def build( + roles: Sequence[str], + config: config_dict.ConfigDict, +) -> Mapping[str, Any]: + """Build substrate definition given roles.""" + del config + num_players = len(roles) + # Build the rest of the substrate definition. + substrate_definition = dict( + levelName="the_matrix", + levelDirectory="meltingpot/lua/levels", + numPlayers=num_players, + # Define upper bound of episode length since episodes end stochastically. + maxEpisodeLengthFrames=5000, + spriteSize=8, + topology="BOUNDED", # Choose from ["BOUNDED", "TORUS"], + simulation={ + "map": ASCII_MAP, + "gameObjects": create_avatar_objects(num_players=num_players), + "scene": create_scene(), + "prefabs": create_prefabs(), + "charPrefabMap": CHAR_PREFAB_MAP, + # worldSpriteMap is needed to make the global view used in videos be + # be informative in cases where individual avatar views have had + # sprites remapped to one another (example: self vs other mode). + "worldSpriteMap": create_world_sprite_map(num_players, + TARGET_SPRITE_OTHER), + } + ) + return substrate_definition diff --git a/meltingpot/python/configs/substrates/rationalizable_coordination_in_the_matrix__repeated.py b/meltingpot/python/configs/substrates/rationalizable_coordination_in_the_matrix__repeated.py new file mode 100644 index 00000000..82adf44a --- /dev/null +++ b/meltingpot/python/configs/substrates/rationalizable_coordination_in_the_matrix__repeated.py @@ -0,0 +1,610 @@ +# Copyright 2022 DeepMind Technologies Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Config for Rationalizable Coordination in the Matrix (2 player, repeated). + +Example video: https://youtu.be/3brwR7DtxEI + +See _Running with Scissors in the Matrix_ for a general description of the +game dynamics. Here the payoff matrix represents a pure coordination game. +`K = 3`, three different resources corresponding to different coordination +outcomes. + +Players have a `5 x 5` observation window. + +The episode has a chance of ending stochastically on every 100 step interval +after step 1000. This usually allows time for 8 or more interactions. +""" + +from typing import Any, Dict, Mapping, Sequence, Tuple + +from ml_collections import config_dict + +from meltingpot.python.configs.substrates import the_matrix +from meltingpot.python.utils.substrates import colors +from meltingpot.python.utils.substrates import shapes +from meltingpot.python.utils.substrates import specs + +# The number of resources must match the (square) size of the matrix. +NUM_RESOURCES = 3 + +# This color is red. +RESOURCE1_COLOR = (150, 0, 0, 255) +RESOURCE1_HIGHLIGHT_COLOR = (200, 0, 0, 255) +RESOURCE1_COLOR_DATA = (RESOURCE1_COLOR, RESOURCE1_HIGHLIGHT_COLOR) +# This color is green. +RESOURCE2_COLOR = (0, 150, 0, 255) +RESOURCE2_HIGHLIGHT_COLOR = (0, 200, 0, 255) +RESOURCE2_COLOR_DATA = (RESOURCE2_COLOR, RESOURCE2_HIGHLIGHT_COLOR) +# This color is blue. +RESOURCE3_COLOR = (0, 0, 150, 255) +RESOURCE3_HIGHLIGHT_COLOR = (0, 0, 200, 255) +RESOURCE3_COLOR_DATA = (RESOURCE3_COLOR, RESOURCE3_HIGHLIGHT_COLOR) + +ASCII_MAP = """ +WWWWWWWWWWWWWWWWWWWWWWW +Wn n nW +W WWW W W WW W +W W rra app W W +Wn WW rra app WW nW +W rra app W +W W +Wn WW n nW +W WWWW W +W ssa W W +Wn W ssa W aaa W nW +W W ssa W aaa WW W +W WWWW W W W WWW W +Wn n nW +WWWWWWWWWWWWWWWWWWWWWWW +""" + +_resource_names = [ + "resource_class1", + "resource_class2", + "resource_class3", +] + +# `prefab` determines which prefab game object to use for each `char` in the +# ascii map. +CHAR_PREFAB_MAP = { + "a": {"type": "choice", "list": _resource_names}, + "r": _resource_names[0], + "p": _resource_names[1], + "s": _resource_names[2], + "n": "spawn_point", + "W": "wall", +} + +_COMPASS = ["N", "E", "S", "W"] + +WALL = { + "name": "wall", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "wall", + "stateConfigs": [{ + "state": "wall", + "layer": "upperPhysical", + "sprite": "Wall", + }], + } + }, + { + "component": "Transform", + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["Wall"], + "spriteShapes": [shapes.WALL], + "palettes": [{"*": (95, 95, 95, 255), + "&": (100, 100, 100, 255), + "@": (109, 109, 109, 255), + "#": (152, 152, 152, 255)}], + "noRotates": [False] + } + }, + { + "component": "BeamBlocker", + "kwargs": { + "beamType": "gameInteraction" + } + }, + ] +} + +SPAWN_POINT = { + "name": "spawnPoint", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "spawnPoint", + "stateConfigs": [{ + "state": "spawnPoint", + "layer": "alternateLogic", + "groups": ["spawnPoints"] + }], + } + }, + { + "component": "Transform", + }, + ] +} + +# PLAYER_COLOR_PALETTES is a list with each entry specifying the color to use +# for the player at the corresponding index. +NUM_PLAYERS_UPPER_BOUND = 8 +PLAYER_COLOR_PALETTES = [] +for idx in range(NUM_PLAYERS_UPPER_BOUND): + PLAYER_COLOR_PALETTES.append(shapes.get_palette(colors.palette[idx])) + +# Primitive action components. +# pylint: disable=bad-whitespace +# pyformat: disable +NOOP = {"move": 0, "turn": 0, "interact": 0} +FORWARD = {"move": 1, "turn": 0, "interact": 0} +STEP_RIGHT = {"move": 2, "turn": 0, "interact": 0} +BACKWARD = {"move": 3, "turn": 0, "interact": 0} +STEP_LEFT = {"move": 4, "turn": 0, "interact": 0} +TURN_LEFT = {"move": 0, "turn": -1, "interact": 0} +TURN_RIGHT = {"move": 0, "turn": 1, "interact": 0} +INTERACT = {"move": 0, "turn": 0, "interact": 1} +# pyformat: enable +# pylint: enable=bad-whitespace + +ACTION_SET = ( + NOOP, + FORWARD, + BACKWARD, + STEP_LEFT, + STEP_RIGHT, + TURN_LEFT, + TURN_RIGHT, + INTERACT, +) + +TARGET_SPRITE_SELF = { + "name": "Self", + "shape": shapes.CUTE_AVATAR, + "palette": shapes.get_palette((50, 100, 200)), + "noRotate": True, +} + +TARGET_SPRITE_OTHER = { + "name": "Other", + "shape": shapes.CUTE_AVATAR, + "palette": shapes.get_palette((200, 100, 50)), + "noRotate": True, +} + + +def create_scene(): + """Creates the global scene.""" + scene = { + "name": "scene", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "scene", + "stateConfigs": [{ + "state": "scene", + }], + } + }, + { + "component": "Transform", + }, + { + "component": "TheMatrix", + "kwargs": { + # Prevent interaction before both interactors have collected + # at least one resource. + "disallowUnreadyInteractions": True, + "matrix": [ + # 1 2 3 + [1, 0, 0], # 1 + [0, 2, 0], # 2 + [0, 0, 3] # 3 + ], + "resultIndicatorColorIntervals": [ + # red # yellow # green # blue + (0.0, 0.5), (0.5, 1.5), (1.5, 2.5), (2.5, 3.0) + ], + } + }, + { + "component": "StochasticIntervalEpisodeEnding", + "kwargs": { + "minimumFramesPerEpisode": 1000, + "intervalLength": 100, # Set equal to unroll length. + "probabilityTerminationPerInterval": 0.1 + } + } + ] + } + return scene + + +def create_resource_prefab( + resource_id: int, + resource_shape: str, + resource_palette: Dict[str, Tuple[int, int, int, int]]): + """Creates resource prefab with provided resource_id, shape, and palette.""" + resource_name = "resource_class{}".format(resource_id) + resource_prefab = { + "name": resource_name, + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": resource_name, + "stateConfigs": [ + {"state": resource_name + "_wait", + "groups": ["resourceWaits"]}, + {"state": resource_name, + "layer": "lowerPhysical", + "sprite": resource_name + "_sprite"}, + ] + }, + }, + { + "component": "Transform", + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": [resource_name + "_sprite"], + "spriteShapes": [resource_shape], + "palettes": [resource_palette], + "noRotates": [True] + }, + }, + { + "component": "Resource", + "kwargs": { + "resourceClass": resource_id, + "visibleType": resource_name, + "waitState": resource_name + "_wait", + "regenerationRate": 0.02, + "regenerationDelay": 10, + }, + }, + { + "component": "Destroyable", + "kwargs": { + "waitState": resource_name + "_wait", + # It is possible to destroy resources but takes concerted + # effort to do so by zapping them `initialHealth` times. + "initialHealth": 3, + }, + }, + ] + } + return resource_prefab + + +def create_avatar_object( + player_idx: int, + all_source_sprite_names: Sequence[str], + target_sprite_self: Dict[str, Any], + target_sprite_other: Dict[str, Any], + turn_off_default_reward: bool = False) -> Dict[str, Any]: + """Create an avatar object given self vs other sprite data.""" + # Lua is 1-indexed. + lua_index = player_idx + 1 + + # Setup the self vs other sprite mapping. + source_sprite_self = "Avatar" + str(lua_index) + custom_sprite_map = {source_sprite_self: target_sprite_self["name"]} + for name in all_source_sprite_names: + if name != source_sprite_self: + custom_sprite_map[name] = target_sprite_other["name"] + + live_state_name = "player{}".format(lua_index) + avatar_object = { + "name": "avatar", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": live_state_name, + "stateConfigs": [ + {"state": live_state_name, + "layer": "upperPhysical", + "sprite": source_sprite_self, + "contact": "avatar", + "groups": ["players"]}, + + {"state": "playerWait", + "groups": ["playerWaits"]}, + ] + } + }, + { + "component": "Transform", + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "colored_square", + "spriteNames": [source_sprite_self], + # A white square should never be displayed. It will always be + # remapped since this is self vs other observation mode. + "spriteRGBColors": [(255, 255, 255, 255)], + } + }, + { + "component": "AdditionalSprites", + "kwargs": { + "renderMode": "ascii_shape", + "customSpriteNames": [target_sprite_self["name"], + target_sprite_other["name"]], + "customSpriteShapes": [target_sprite_self["shape"], + target_sprite_other["shape"]], + "customPalettes": [target_sprite_self["palette"], + target_sprite_other["palette"]], + "customNoRotates": [target_sprite_self["noRotate"], + target_sprite_other["noRotate"]], + } + }, + { + "component": "Avatar", + "kwargs": { + "index": lua_index, + "aliveState": live_state_name, + "waitState": "playerWait", + "speed": 1.0, + "spawnGroup": "spawnPoints", + "actionOrder": ["move", "turn", "interact"], + "actionSpec": { + "move": {"default": 0, "min": 0, "max": len(_COMPASS)}, + "turn": {"default": 0, "min": -1, "max": 1}, + "interact": {"default": 0, "min": 0, "max": 1}, + }, + "view": { + "left": 2, + "right": 2, + "forward": 3, + "backward": 1, + "centered": False + }, + "spriteMap": custom_sprite_map, + # The following kwarg makes it possible to get rewarded even + # on frames when an avatar is "dead". It is needed for in the + # matrix games in order to correctly handle the case of two + # players getting hit simultaneously by the same beam. + "skipWaitStateRewards": False, + } + }, + { + "component": "GameInteractionZapper", + "kwargs": { + "cooldownTime": 2, + "beamLength": 3, + "beamRadius": 1, + "framesTillRespawn": 5, + "numResources": NUM_RESOURCES, + "endEpisodeOnFirstInteraction": False, + # Reset both players' inventories after each interaction. + "reset_winner_inventory": True, + "reset_loser_inventory": True, + # Both players get removed after each interaction. + "losingPlayerDies": True, + "winningPlayerDies": True, + # `freezeOnInteraction` is the number of frames to display the + # interaction result indicator, freeze, and delay delivering + # all results of interacting. + "freezeOnInteraction": 16, + } + }, + { + "component": "ReadyToShootObservation", + "kwargs": { + "zapperComponent": "GameInteractionZapper", + } + }, + { + "component": "InventoryObserver", + "kwargs": { + } + }, + { + "component": "SpawnResourcesWhenAllPlayersZapped", + }, + { + "component": "Taste", + "kwargs": { + "mostTastyResourceClass": -1, # -1 indicates no preference. + # No resource is most tasty when mostTastyResourceClass == -1. + "mostTastyReward": 0.1, + } + }, + { + "component": "InteractionTaste", + "kwargs": { + "mostTastyResourceClass": -1, # -1 indicates no preference. + "zeroDefaultInteractionReward": turn_off_default_reward, + "extraReward": 1.0, + } + }, + { + "component": "LocationObserver", + "kwargs": { + "objectIsAvatar": True, + "alsoReportOrientation": True + } + }, + { + "component": "AvatarMetricReporter", + "kwargs": { + "metrics": [ + { + # Report the inventories of both players involved in + # an interaction on this frame formatted as + # (self inventory, partner inventory). + "name": "INTERACTION_INVENTORIES", + "type": "tensor.DoubleTensor", + "shape": (2, NUM_RESOURCES), + "component": "GameInteractionZapper", + "variable": "latest_interaction_inventories", + }, + *the_matrix.get_cumulant_metric_configs(NUM_RESOURCES), + ] + } + }, + ] + } + return avatar_object + + +def create_prefabs(): + """Returns a dictionary mapping names to template game objects.""" + prefabs = { + "wall": WALL, + "spawn_point": SPAWN_POINT, + } + prefabs["resource_class1"] = create_resource_prefab( + 1, shapes.BUTTON, {"*": RESOURCE1_COLOR_DATA[0], + "#": RESOURCE1_COLOR_DATA[1], + "x": (0, 0, 0, 0)}) + prefabs["resource_class2"] = create_resource_prefab( + 2, shapes.BUTTON, {"*": RESOURCE2_COLOR_DATA[0], + "#": RESOURCE2_COLOR_DATA[1], + "x": (0, 0, 0, 0)}) + prefabs["resource_class3"] = create_resource_prefab( + 3, shapes.BUTTON, {"*": RESOURCE3_COLOR_DATA[0], + "#": RESOURCE3_COLOR_DATA[1], + "x": (0, 0, 0, 0)}) + return prefabs + + +def get_all_source_sprite_names(num_players): + all_source_sprite_names = [] + for player_idx in range(0, num_players): + # Lua is 1-indexed. + lua_index = player_idx + 1 + all_source_sprite_names.append("Avatar" + str(lua_index)) + + return all_source_sprite_names + + +def create_avatar_objects(num_players, + turn_off_default_reward: bool = False): + """Returns list of avatar objects of length 'num_players'.""" + all_source_sprite_names = get_all_source_sprite_names(num_players) + avatar_objects = [] + for player_idx in range(0, num_players): + game_object = create_avatar_object( + player_idx, + all_source_sprite_names, + TARGET_SPRITE_SELF, + TARGET_SPRITE_OTHER, + turn_off_default_reward=turn_off_default_reward) + avatar_objects.append(game_object) + readiness_marker = the_matrix.create_ready_to_interact_marker(player_idx) + avatar_objects.append(readiness_marker) + + return avatar_objects + + +def create_world_sprite_map( + num_players: int, target_sprite_other: Dict[str, Any]) -> Dict[str, str]: + all_source_sprite_names = get_all_source_sprite_names(num_players) + world_sprite_map = {} + for name in all_source_sprite_names: + world_sprite_map[name] = target_sprite_other["name"] + + return world_sprite_map + + +def get_config(): + """Default configuration.""" + config = config_dict.ConfigDict() + + # Other parameters that are useful to override in training config files. + config.turn_off_default_reward = False + + # Action set configuration. + config.action_set = ACTION_SET + # Observation format configuration. + config.individual_observation_names = [ + "RGB", + "INVENTORY", + "READY_TO_SHOOT", + # Debug only (do not use the following observations in policies). + "POSITION", + "ORIENTATION", + "INTERACTION_INVENTORIES", + ] + config.global_observation_names = [ + "WORLD.RGB", + ] + + # The specs of the environment (from a single-agent perspective). + config.action_spec = specs.action(len(ACTION_SET)) + config.timestep_spec = specs.timestep({ + "RGB": specs.rgb(40, 40), + "INVENTORY": specs.inventory(3), + "READY_TO_SHOOT": specs.OBSERVATION["READY_TO_SHOOT"], + # Debug only (do not use the following observations in policies). + "POSITION": specs.OBSERVATION["POSITION"], + "ORIENTATION": specs.OBSERVATION["ORIENTATION"], + "INTERACTION_INVENTORIES": specs.interaction_inventories(3), + "WORLD.RGB": specs.rgb(120, 184), + }) + + # The roles assigned to each player. + config.valid_roles = frozenset({"default"}) + config.default_player_roles = ("default",) * 2 + + return config + + +def build( + roles: Sequence[str], + config: config_dict.ConfigDict, +) -> Mapping[str, Any]: + """Build substrate definition given roles.""" + del config + num_players = len(roles) + # Build the rest of the substrate definition. + substrate_definition = dict( + levelName="the_matrix", + levelDirectory="meltingpot/lua/levels", + numPlayers=num_players, + # Define upper bound of episode length since episodes end stochastically. + maxEpisodeLengthFrames=5000, + spriteSize=8, + topology="BOUNDED", # Choose from ["BOUNDED", "TORUS"], + simulation={ + "map": ASCII_MAP, + "gameObjects": create_avatar_objects(num_players=num_players), + "scene": create_scene(), + "prefabs": create_prefabs(), + "charPrefabMap": CHAR_PREFAB_MAP, + # worldSpriteMap is needed to make the global view used in videos be + # be informative in cases where individual avatar views have had + # sprites remapped to one another (example: self vs other mode). + "worldSpriteMap": create_world_sprite_map(num_players, + TARGET_SPRITE_OTHER), + } + ) + return substrate_definition diff --git a/meltingpot/python/configs/substrates/reaction_graph_utils.py b/meltingpot/python/configs/substrates/reaction_graph_utils.py new file mode 100644 index 00000000..5f739b2f --- /dev/null +++ b/meltingpot/python/configs/substrates/reaction_graph_utils.py @@ -0,0 +1,613 @@ +# Copyright 2022 DeepMind Technologies Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Library of functions for defining chemical motifs.""" + +from typing import Any, Dict +from absl import logging # pylint: disable=unused-import + +import networkx as nx # pylint: disable=unused-import +import numpy as np + +from meltingpot.python.utils.substrates import shapes + +EMPTY_COLOR = shapes.PETRI_DISH_PALETTE["@"] +WHITE_COLOR = (255, 255, 255, 255) # A white color. + +DIAMOND_SHAPE = """ +xxxabxxx +xxaabbxx +xaaabbbx +aaaabbbb +ddddcccc +xdddcccx +xxddccxx +xxxdcxxx +""" + +SQUARE_SHAPE = """ +bbbbbbbb +bbbbbbbb +bbbbbbbb +bbbbbbbb +bbbbbbbb +bbbbbbbb +bbbbbbbb +bbbbbbbb +""" + +ENERGY_SHAPE = """ +xxxxxxxx +xxxxxxxx +xxxabxxx +xxaabbxx +xxddccxx +xxxdcxxx +xxxxxxxx +xxxxxxxx +""" + +FOOD_SHAPE = """ +xxxxxxxx +xxxxxxxx +xdddbbxx +ddbbbxxx +xxbddbbx +xdddbbxx +xxbbddbb +xxxxxxxx +""" + + +def graph_semantics(g): + """Convert a networkx.DiGraph to compounds and reactions for grid_land.""" + compounds = {} + reactions = {} + for node, attributes in g.nodes.items(): + if attributes.get("reaction"): + reactants = [e[0] for e in g.in_edges(node)] + products = [e[1] for e in g.out_edges(node)] + reactions[node] = create_reaction(reactants, products, attributes) + if not attributes.get("reaction"): + compounds[node] = create_compound(attributes) + + return compounds, reactions + + +def create_reaction(reactants, products, attributes): + # TODO(b/192926758): support fixedSwapOrder = False, in that case, pass + # reactants# and products as a dictionary mapping to the number required (not + # a list with possibly repeated entries like the current version). + return { + "reactants": reactants, + "products": products, + "fixedSwapOrder": attributes.get("fixedSwapOrder", True), + "priority": attributes.get("priority", 1), + } + + +def create_compound(attributes): + """Convert node attributes to dictionary structure needed for a compound.""" + data = { + # Use black color if none provided. + "color": attributes.get("color", (0, 0, 0, 0)), + "properties": { + # Use (0, 0) for structure if none provided, + "structure": attributes.get("structure", (0, 0)), + }, + } + for k, v in attributes.items(): + data[k] = v + return data + + +def add_system_nodes(g): + """Add several nodes that must always be present for the system to function. + + Args: + g: (nx.DiGraph): directed graph representing the reaction system. + """ + g.add_nodes_from([ + # Add a node for the "empty" compound. + ("empty", {"color": EMPTY_COLOR, + "reactivity": "low"}), + # Add a node for the "activated" compound. + ("activated", {"color": WHITE_COLOR, + "immovable": True}), + # Add unused nodes that serve only to make all standard groups valid so + # their corresponding updater can be created. + ("_unused_a", {"reactivity": "low"}), + ("_unused_b", {"reactivity": "medium"}), + ("_unused_c", {"reactivity": "high"}) + ]) + + +def add_compounds_to_prefabs_dictionary(prefabs, + compounds, + reactivity_levels, + sprites=False, + default_reaction_radius=None, + default_reaction_query_type=None, + priority_mode=False): + """Add compounds.""" + for compound_name in compounds.keys(): + prefabs[compound_name] = create_cell_prefab( + compound_name, + compounds, + reactivity_levels, + sprites=sprites, + default_reaction_radius=default_reaction_radius, + default_reaction_query_type=default_reaction_query_type, + priority_mode=priority_mode) + return prefabs + + +def multiply_tuple(color_tuple, factor): + if len(color_tuple) == 3: + return tuple([int(np.min([x * factor, 255])) for x in color_tuple]) + elif len(color_tuple) == 4: + return tuple([int(np.min([x * factor])) for x in color_tuple]) + + +def adjust_color_opacity(color_tuple, factor): + apply_opacity = tuple([color_tuple[0], color_tuple[1], color_tuple[2], + color_tuple[3] * factor]) + return tuple([int(np.min([x])) for x in apply_opacity]) + + +def get_matter_palette(sprite_color): + return { + "*": sprite_color, + "b": shapes.WHITE, + "x": shapes.ALPHA, + # Shades for liquid matter. + "L": shapes.adjust_color_brightness(sprite_color, 0.85), + "l": shapes.adjust_color_brightness(sprite_color, 0.90), + "w": shapes.adjust_color_brightness(sprite_color, 0.95), + } + + +def get_cytoavatar_palette(sprite_color): + return { + "*": (184, 61, 187, 255), + "&": (161, 53, 146, 255), + "o": sprite_color, + ",": shapes.BLACK, + "x": shapes.ALPHA, + "#": shapes.WHITE, + } + + +def create_cell_prefab(compound_name, compounds, reactivity_levels, + sprites=False, default_reaction_radius=None, + default_reaction_query_type=None, priority_mode=False): + """Create prefab for a cell object initially set to state=`compound_name`.""" + state_configs = [] + states_to_properties = {} + sprite_colors = [] + query_configs = {} + special_sprites = {} + for compound, attributes in compounds.items(): + groups = [] + if "reactivity" in attributes: + reactivity_group = attributes["reactivity"] + groups.append(reactivity_group) + if "immovable" in attributes and attributes["immovable"]: + groups.append("immovables") + if "query_config" in attributes: + query_configs[compound] = attributes["query_config"] + if "sprite" in attributes: + special_sprites[compound] = attributes["sprite"] + + state_config = { + "state": compound, + "sprite": compound, + "layer": "lowerPhysical", + "groups": groups + ["spawnPoints"], + } + state_configs.append(state_config) + states_to_properties[compound] = attributes["properties"] + sprite_colors.append(attributes["color"]) + + # Configure the Reactant component. + reactivities = {} + for key, value in reactivity_levels.items(): + reactivities[key] = value + + if sprites: + def get_palette(sprite_color): + if len(sprite_color) == 3: + x_color = EMPTY_COLOR[0:3] + a_color = (252, 252, 252) + elif len(sprite_color) == 4: + x_color = EMPTY_COLOR + a_color = (252, 252, 252, 255) + return { + "x": x_color, + "a": a_color, + "b": sprite_color, + "c": multiply_tuple(sprite_color, 0.2), + "d": sprite_color + } + appearance_kwargs = { + "renderMode": "ascii_shape", + "spriteNames": list(compounds.keys()), + "spriteShapes": [DIAMOND_SHAPE] * len(sprite_colors), + "palettes": [get_palette(color) for color in sprite_colors], + "noRotates": [True] * len(sprite_colors), + } + # Must ensure "empty" and "activated" are not given the diamond sprite. + for i, compound in enumerate(appearance_kwargs["spriteNames"]): + if compound in ["empty", "activated"]: + appearance_kwargs["spriteShapes"][i] = SQUARE_SHAPE + if compound in special_sprites: + appearance_kwargs["spriteShapes"][i] = special_sprites[compound] + else: + appearance_kwargs = { + "spriteNames": list(compounds.keys()), + "spriteRGBColors": sprite_colors, + } + + prefab = { + "name": "cell", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": compound_name, + "stateConfigs": state_configs, + } + }, + { + "component": "Transform", + }, + { + "component": "Appearance", + "kwargs": appearance_kwargs + }, + { + "component": "Cell", + "kwargs": { + "numCellStates": len(state_configs), + "statesToProperties": states_to_properties, + # The radius over which to search for neighbors on every step. + "radius": default_reaction_radius, + # Query according to L1 (diamond) or L2 (disc) norm. + "queryType": default_reaction_query_type, + # Layers on which to search for neighbors on every step. + "interactionLayers": ["lowerPhysical", "overlay"], + # You can override query properties on a per state basis. + "stateSpecificQueryConfig": query_configs, + }, + }, + { + "component": "Reactant", + "kwargs": { + "name": "Reactant", + "reactivities": reactivities, + "priorityMode": priority_mode, + } + }, + { + "component": "Product", + "kwargs": { + "name": "Product", + } + }, + ] + } + return prefab + + +def create_vesicle(player_idx: int, + compounds, + reactivity_levels, + default_reaction_radius=None, + default_reaction_query_type=None, + priority_mode=False): + """Construct prefab for an avatar's vesicle object.""" + # Lua is 1-indexed. + lua_index = player_idx + 1 + + vesicle_prefix = "vesicle_" + state_configs = [] + states_to_properties = {} + sprite_colors = [] + sprite_shapes = [] + query_configs = {} + for compound, attributes in compounds.items(): + groups = [] + sprite_shape = shapes.SINGLE_HOLDING_LIQUID + if "reactivity" in attributes: + reactivity_group = (vesicle_prefix + + attributes["reactivity"]) + groups.append(reactivity_group) + if "immovable" in attributes and attributes["immovable"]: + groups.append("immovables") + if "query_config" in attributes: + query_configs[compound] = attributes["query_config"] + + sprite_color = attributes["color"] + if compound == "empty": + sprite_shape = shapes.SQUARE + sprite_color = shapes.ALPHA + state_config = { + "state": compound, + "sprite": compound + "_vesicle", + "layer": "overlay", + "groups": groups, + } + state_configs.append(state_config) + states_to_properties[compound] = attributes["properties"] + sprite_colors.append(sprite_color) + sprite_shapes.append(sprite_shape) + + # Configure the Reactant component. + reactivities = {} + for key, value in reactivity_levels.items(): + reactivities[vesicle_prefix + key] = value + + prefab = { + "name": f"avatar_vesicle_{lua_index}", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "preInit", + "stateConfigs": state_configs + + [{"state": "preInit"}], + } + }, + { + "component": "Transform", + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": [key + "_vesicle" for key in compounds.keys()], + "spriteShapes": sprite_shapes, + "palettes": [get_matter_palette(sprite_colors[i]) + for i in range(len(sprite_colors))], + "noRotates": [True] * len(sprite_colors) + }, + }, + { + "component": "AvatarVesicle", + "kwargs": { + "playerIndex": lua_index, + "preInitState": "preInit", + "initialState": "empty", + "waitState": "vesicleWait" + } + }, + { + "component": "Cell", + "kwargs": { + "numCellStates": len(state_configs), + "statesToProperties": states_to_properties, + # The radius over which to search for neighbors on every step. + "radius": default_reaction_radius, + # Query according to L1 (diamond) or L2 (disc) norm. + "queryType": default_reaction_query_type, + # Layers on which to search for neighbors on every step. + "interactionLayers": ["lowerPhysical", "overlay"], + # You can override query properties on a per state basis. + "stateSpecificQueryConfig": query_configs, + }, + }, + { + "component": "Reactant", + "kwargs": { + "name": "Reactant", + "reactivities": reactivities, + "priorityMode": priority_mode, + } + }, + { + "component": "Product", + "kwargs": { + "name": "Product", + } + }, + ] + } + return prefab + + +def create_avatar_constant_self_view( + rewarding_reactions, + player_idx: int, + target_sprite_self_empty: Dict[str, Any], + target_sprite_self_holds_one: Dict[str, Any], + randomize_initial_orientation: bool = True) -> Dict[str, Any]: + """Create an avatar prefab rewarded by reactions in `rewarding_reactions`.""" + # Lua is 1-indexed. + lua_index = player_idx + 1 + + # Setup the self vs other sprite mapping. + source_sprite_self_empty = f"Avatar_{lua_index}_empty" + source_sprite_self_holds_one = f"Avatar_{lua_index}_holds_one" + + custom_sprite_map = { + source_sprite_self_empty: target_sprite_self_empty["name"], + source_sprite_self_holds_one: target_sprite_self_holds_one["name"], + } + + # Part of the avatar is partially transparent so molecules can be seen below. + cytoavatar_palette = get_cytoavatar_palette((0, 0, 0, 75)) + + live_state_name_empty = f"player{lua_index}_empty" + live_state_name_holds_one = f"player{lua_index}_holds_one" + avatar_object = { + "name": "avatar", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": live_state_name_empty, + "stateConfigs": [ + {"state": live_state_name_empty, + "layer": "upperPhysical", + "sprite": source_sprite_self_empty, + "contact": "avatar", + "groups": ["players"]}, + {"state": live_state_name_holds_one, + "layer": "upperPhysical", + "sprite": source_sprite_self_holds_one, + "contact": "avatar", + "groups": ["players"]}, + + {"state": "playerWait", + "groups": ["playerWaits"]}, + ] + } + }, + { + "component": "Transform", + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": [source_sprite_self_empty, + source_sprite_self_holds_one], + "spriteShapes": [shapes.CYTOAVATAR_EMPTY, + shapes.CYTOAVATAR_HOLDING_ONE], + "palettes": [cytoavatar_palette] * 2, + "noRotates": [True] * 2 + } + }, + { + "component": "AdditionalSprites", + "kwargs": { + "renderMode": "ascii_shape", + "customSpriteNames": [ + target_sprite_self_empty["name"], + target_sprite_self_holds_one["name"], + ], + "customSpriteShapes": [ + target_sprite_self_empty["shape"], + target_sprite_self_holds_one["shape"], + ], + "customPalettes": [ + cytoavatar_palette, + cytoavatar_palette, + ], + "customNoRotates": [ + target_sprite_self_empty["noRotate"], + target_sprite_self_holds_one["noRotate"], + ], + } + }, + { + "component": "Avatar", + "kwargs": { + "index": lua_index, + "spawnGroup": "spawnPoints", + "aliveState": live_state_name_empty, + "additionalLiveStates": [live_state_name_holds_one], + "waitState": "playerWait", + "actionOrder": ["move", "turn", "ioAction"], + "actionSpec": { + "move": {"default": 0, "min": 0, "max": 4}, + "turn": {"default": 0, "min": -1, "max": 1}, + "ioAction": {"default": 0, "min": 0, "max": 1}, + }, + "view": { + "left": 5, + "right": 5, + "forward": 9, + "backward": 1, + "centered": False + }, + "spriteMap": custom_sprite_map, + "randomizeInitialOrientation": randomize_initial_orientation, + } + }, + { + "component": "IOBeam", + "kwargs": { + "cooldownTime": 2, + } + }, + { + "component": "VesicleManager", + "kwargs": { + "orderedVesicles": ["vesicleOne",], + "cytoavatarStates": { + "empty": live_state_name_empty, + "holdingOne": live_state_name_holds_one, + }, + } + }, + { + "component": "ReactionsToRewards", + "kwargs": { + # Specify rewards for specific reactions. + "rewardingReactions": rewarding_reactions + } + }, + { + "component": "LocationObserver", + "kwargs": { + "objectIsAvatar": True, + "alsoReportOrientation": True + } + }, + ] + } + return avatar_object + + +def create_scene(reactions, stochastic_episode_ending=False): + """Construct the global scene prefab.""" + scene = { + "name": "scene", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "scene", + "stateConfigs": [{ + "state": "scene", + }], + } + }, + { + "component": "Transform", + }, + { + "component": "ReactionAlgebra", + "kwargs": { + "reactions": reactions + } + }, + { + "component": "GlobalMetricTracker", + "kwargs": { + "name": "GlobalMetricTracker", + } + }, + ] + } + if stochastic_episode_ending: + scene["components"].append({ + "component": "StochasticIntervalEpisodeEnding", + "kwargs": { + "minimumFramesPerEpisode": 1000, + "intervalLength": 100, # Set equal to unroll length. + "probabilityTerminationPerInterval": 0.2 + } + }) + return scene diff --git a/meltingpot/python/configs/substrates/arena_running_with_scissors_in_the_matrix.py b/meltingpot/python/configs/substrates/running_with_scissors_in_the_matrix__arena.py similarity index 76% rename from meltingpot/python/configs/substrates/arena_running_with_scissors_in_the_matrix.py rename to meltingpot/python/configs/substrates/running_with_scissors_in_the_matrix__arena.py index 185d0898..f0d020a7 100644 --- a/meltingpot/python/configs/substrates/arena_running_with_scissors_in_the_matrix.py +++ b/meltingpot/python/configs/substrates/running_with_scissors_in_the_matrix__arena.py @@ -1,4 +1,4 @@ -# Copyright 2020 DeepMind Technologies Limited. +# Copyright 2022 DeepMind Technologies Limited. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,9 +11,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Configuration for Arena Running with Scissors in the Matrix. +"""Configuration for Running with Scissors in the Matrix (arena version). -Example video: https://youtu.be/esXPyGBIf2Y +Example video: https://youtu.be/6BL6JIbS2cE This substrate is the same as _Running with Scissors in the Matrix_ except in this case there are eight players and the map layout is different. Even though @@ -23,10 +23,11 @@ Players have the default `11 x 11` (off center) observation window. """ -import copy -from typing import Any, Dict, Iterable, Sequence, Tuple +from typing import Any, Dict, Mapping, Sequence from ml_collections import config_dict + +from meltingpot.python.configs.substrates import the_matrix from meltingpot.python.utils.substrates import colors from meltingpot.python.utils.substrates import game_object_utils from meltingpot.python.utils.substrates import shapes @@ -50,9 +51,9 @@ RESOURCE3_HIGHLIGHT_COLOR = (91, 214, 255, 255) RESOURCE3_COLOR_DATA = (RESOURCE3_COLOR, RESOURCE3_HIGHLIGHT_COLOR) -# The procedural generator replaces all 'a' chars in the default map with chars +# The map parser replaces all 'a' chars in the default map with chars # representing specific resources, i.e. with either '1' or '2'. -DEFAULT_ASCII_MAP = """ +ASCII_MAP = """ WWWWWWWWWWWWWWWWWWWWWWWWW WPPPP W W PPPPW WPPPP PPPPW @@ -114,10 +115,6 @@ }, { "component": "Transform", - "kwargs": { - "position": (0, 0), - "orientation": "N" - } }, { "component": "Appearance", @@ -157,20 +154,24 @@ }, { "component": "Transform", - "kwargs": { - "position": (0, 0), - "orientation": "N" - } }, ] } +# Remove the first entry from human_readable_colors after using it for the self +# color to prevent it from being used again as another avatar color. +human_readable_colors = list(colors.human_readable) +TARGET_SPRITE_SELF = { + "name": "Self", + "shape": shapes.CUTE_AVATAR, + "palette": shapes.get_palette(human_readable_colors.pop(0)), + "noRotate": True, +} # PLAYER_COLOR_PALETTES is a list with each entry specifying the color to use # for the player at the corresponding index. -NUM_PLAYERS_UPPER_BOUND = 32 PLAYER_COLOR_PALETTES = [] -for idx in range(NUM_PLAYERS_UPPER_BOUND): - PLAYER_COLOR_PALETTES.append(shapes.get_palette(colors.palette[idx])) +for human_readable_color in human_readable_colors: + PLAYER_COLOR_PALETTES.append(shapes.get_palette(human_readable_color)) # Primitive action components. # pylint: disable=bad-whitespace @@ -197,13 +198,6 @@ INTERACT, ) -TARGET_SPRITE_SELF = { - "name": "Self", - "shape": shapes.CUTE_AVATAR, - "palette": shapes.get_palette((50, 100, 200)), - "noRotate": True, -} - def create_scene(): """Creates the global scene.""" @@ -221,22 +215,35 @@ def create_scene(): }, { "component": "Transform", - "kwargs": { - "position": (0, 0), - "orientation": "N" - }, }, { "component": "TheMatrix", "kwargs": { - "zero_initial_inventory": False, + # Prevent interaction before both interactors have collected + # at least one resource. + "disallowUnreadyInteractions": True, "matrix": [ - [0, -1, 1], - [1, 0, -1], - [-1, 1, 0] + [0, -10, 10], + [10, 0, -10], + [-10, 10, 0] + ], + "resultIndicatorColorIntervals": [ + (-10.0, -5.0), # red + (-5.0, -2.5), # yellow + (-2.5, 2.5), # green + (2.5, 5.0), # blue + (5.0, 10.0) # violet ], } }, + { + "component": "StochasticIntervalEpisodeEnding", + "kwargs": { + "minimumFramesPerEpisode": 1000, + "intervalLength": 100, # Set equal to unroll length. + "probabilityTerminationPerInterval": 0.2 + } + } ] } return scene @@ -263,10 +270,6 @@ def create_resource_prefab(resource_id, color_data): }, { "component": "Transform", - "kwargs": { - "position": (0, 0), - "orientation": "N" - }, }, { "component": "Appearance", @@ -286,17 +289,17 @@ def create_resource_prefab(resource_id, color_data): "resourceClass": resource_id, "visibleType": resource_name, "waitState": resource_name + "_wait", - "groupToRespawn": "resourceWaits", - "regenerationRate": 0.005, - "regenerationDelay": 50 + "regenerationRate": 0.04, + "regenerationDelay": 10, }, }, { "component": "Destroyable", "kwargs": { - "visibleType": resource_name, "waitState": resource_name + "_wait", - "initialHealth": 1, + # It is possible to destroy resources but takes concerted + # effort to do so by zapping them `initialHealth` times. + "initialHealth": 3, }, }, ] @@ -320,8 +323,10 @@ def create_prefabs() -> PrefabConfig: return prefabs -def create_avatar_object(player_idx: int, - target_sprite_self: Dict[str, Any]) -> Dict[str, Any]: +def create_avatar_object( + player_idx: int, + target_sprite_self: Dict[str, Any], + turn_off_default_reward: bool = False) -> Dict[str, Any]: """Create an avatar object that always sees itself as blue.""" # Lua is 1-indexed. lua_index = player_idx + 1 @@ -352,10 +357,6 @@ def create_avatar_object(player_idx: int, }, { "component": "Transform", - "kwargs": { - "position": (0, 0), - "orientation": "N" - } }, { "component": "Appearance", @@ -363,7 +364,8 @@ def create_avatar_object(player_idx: int, "renderMode": "ascii_shape", "spriteNames": [source_sprite_self], "spriteShapes": [shapes.CUTE_AVATAR], - "palettes": [shapes.get_palette(colors.palette[player_idx])], + "palettes": [shapes.get_palette( + human_readable_colors[player_idx])], "noRotates": [True] } }, @@ -412,11 +414,19 @@ def create_avatar_object(player_idx: int, "cooldownTime": 2, "beamLength": 3, "beamRadius": 1, - "framesTillRespawn": 200, + "framesTillRespawn": 50, "numResources": NUM_RESOURCES, - "reset_winner_inventory": False, + "endEpisodeOnFirstInteraction": False, + # Reset both players' inventories after each interaction. + "reset_winner_inventory": True, "reset_loser_inventory": True, + # Both players get removed after each interaction. "losingPlayerDies": True, + "winningPlayerDies": True, + # `freezeOnInteraction` is the number of frames to display the + # interaction result indicator, freeze, and delay delivering + # all results of interacting. + "freezeOnInteraction": 16, } }, { @@ -442,7 +452,7 @@ def create_avatar_object(player_idx: int, "component": "InteractionTaste", "kwargs": { "mostTastyResourceClass": -1, # -1 indicates no preference. - "zeroDefaultInteractionReward": False, + "zeroDefaultInteractionReward": turn_off_default_reward, "extraReward": 1.0, } }, @@ -467,6 +477,7 @@ def create_avatar_object(player_idx: int, "component": "GameInteractionZapper", "variable": "latest_interaction_inventories", }, + *the_matrix.get_cumulant_metric_configs(NUM_RESOURCES), ] } }, @@ -476,57 +487,35 @@ def create_avatar_object(player_idx: int, return avatar_object -def create_avatar_objects(num_players: int) -> Sequence[PrefabConfig]: +def create_avatar_objects( + num_players: int, + turn_off_default_reward: bool = False) -> Sequence[PrefabConfig]: """Returns all game objects for the map. Args: num_players: number of players to create avatars for. + turn_off_default_reward: if true then zero the main game reward. This is + used for training specialist background populations. """ avatar_objects = [] for player_idx in range(num_players): - avatar = create_avatar_object(player_idx, TARGET_SPRITE_SELF) + avatar = create_avatar_object( + player_idx, + TARGET_SPRITE_SELF, + turn_off_default_reward=turn_off_default_reward) + readiness_marker = the_matrix.create_ready_to_interact_marker(player_idx) avatar_objects.append(avatar) - return avatar_objects - + avatar_objects.append(readiness_marker) -def create_lab2d_settings( - num_players: int, - ascii_map_string: str, - settings_overrides: Iterable[Tuple[str, Any]] = ()) -> Dict[str, Any]: - """Returns the lab2d settings. - - Args: - num_players: (int) the number of players. - ascii_map_string: ascii map. - settings_overrides: (key, value) overrides for default settings. - """ - settings = { - "levelName": "the_matrix", - "levelDirectory": "meltingpot/lua/levels", - "numPlayers": num_players, - "maxEpisodeLengthFrames": 1000, - "spriteSize": 8, - "simulation": { - "map": ascii_map_string, - "gameObjects": create_avatar_objects(num_players=num_players), - "scene": copy.deepcopy(create_scene()), - "prefabs": create_prefabs(), - "charPrefabMap": CHAR_PREFAB_MAP, - } - } - settings.update(settings_overrides) - return settings + return avatar_objects -def get_config(factory=create_lab2d_settings): - """Default config for running with scissors arena in the matrix.""" +def get_config(): + """Default configuration.""" config = config_dict.ConfigDict() - # Basic configuration. - config.num_players = 8 - - # Lua script configuration. - config.lab2d_settings = factory(config.num_players, DEFAULT_ASCII_MAP) + # Other parameters that are useful to override in training config files. + config.turn_off_default_reward = False # Action set configuration. config.action_set = ACTION_SET @@ -535,6 +524,7 @@ def get_config(factory=create_lab2d_settings): "RGB", "INVENTORY", "READY_TO_SHOOT", + # Debug only (do not use the following observations in policies). "POSITION", "ORIENTATION", "INTERACTION_INVENTORIES", @@ -549,10 +539,42 @@ def get_config(factory=create_lab2d_settings): "RGB": specs.OBSERVATION["RGB"], "INVENTORY": specs.inventory(3), "READY_TO_SHOOT": specs.OBSERVATION["READY_TO_SHOOT"], + # Debug only (do not use the following observations in policies). "POSITION": specs.OBSERVATION["POSITION"], "ORIENTATION": specs.OBSERVATION["ORIENTATION"], "INTERACTION_INVENTORIES": specs.interaction_inventories(3), "WORLD.RGB": specs.rgb(192, 200), }) + # The roles assigned to each player. + config.valid_roles = frozenset({"default"}) + config.default_player_roles = ("default",) * 8 + return config + + +def build( + roles: Sequence[str], + config: config_dict.ConfigDict, +) -> Mapping[str, Any]: + """Build substrate definition given roles.""" + del config + num_players = len(roles) + # Build the rest of the substrate definition. + substrate_definition = dict( + levelName="the_matrix", + levelDirectory="meltingpot/lua/levels", + numPlayers=num_players, + # Define upper bound of episode length since episodes end stochastically. + maxEpisodeLengthFrames=5000, + spriteSize=8, + topology="BOUNDED", # Choose from ["BOUNDED", "TORUS"], + simulation={ + "map": ASCII_MAP, + "gameObjects": create_avatar_objects(num_players=num_players), + "scene": create_scene(), + "prefabs": create_prefabs(), + "charPrefabMap": CHAR_PREFAB_MAP, + } + ) + return substrate_definition diff --git a/meltingpot/python/configs/substrates/running_with_scissors_in_the_matrix.py b/meltingpot/python/configs/substrates/running_with_scissors_in_the_matrix__one_shot.py similarity index 82% rename from meltingpot/python/configs/substrates/running_with_scissors_in_the_matrix.py rename to meltingpot/python/configs/substrates/running_with_scissors_in_the_matrix__one_shot.py index fb0c7a6b..3c4be344 100644 --- a/meltingpot/python/configs/substrates/running_with_scissors_in_the_matrix.py +++ b/meltingpot/python/configs/substrates/running_with_scissors_in_the_matrix__one_shot.py @@ -1,4 +1,4 @@ -# Copyright 2020 DeepMind Technologies Limited. +# Copyright 2022 DeepMind Technologies Limited. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,9 +11,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Configuration for Running with Scissors in the Matrix. +"""Configuration for Running with Scissors in the Matrix (one shot version). -Example video: https://youtu.be/oqYd4Ib5g70 +Example video: https://youtu.be/gtemAx4XEcQ Players can move around the map and collect resources of `K` discrete types. In addition to movement, the agents have an action to fire an "interaction" beam. @@ -50,15 +50,18 @@ Players have a `5 x 5` observation window. +The episode ends after a single interaction. + Vezhnevets, A., Wu, Y., Eckstein, M., Leblond, R. and Leibo, J.Z., 2020. OPtions as REsponses: Grounding behavioural hierarchies in multi-agent reinforcement learning. In International Conference on Machine Learning (pp. 9733-9742). PMLR. """ -from typing import Any, Dict, List, Tuple +from typing import Any, Dict, Mapping, Sequence, Tuple from ml_collections import config_dict +from meltingpot.python.configs.substrates import the_matrix from meltingpot.python.utils.substrates import colors from meltingpot.python.utils.substrates import shapes from meltingpot.python.utils.substrates import specs @@ -79,21 +82,21 @@ RESOURCE3_HIGHLIGHT_COLOR = (91, 214, 255, 255) RESOURCE3_COLOR_DATA = (RESOURCE3_COLOR, RESOURCE3_HIGHLIGHT_COLOR) -DEFAULT_ASCII_MAP = """ +ASCII_MAP = """ WWWWWWWWWWWWWWWWWWWWWWW -WP r r a a p p PW +Wn r r a a p p nW W W -WP r r a a p p PW +Wn r r a a p p nW W W -WP r r a a p p PW +Wn r r a a p p nW W W -W P P P W +W n n n W W W -WP s s a a a a PW +Wn s s a a a a nW W W -WP s s a a a a PW +Wn s s a a a a nW W W -WP s s a a a a PW +Wn s s a a a a nW WWWWWWWWWWWWWWWWWWWWWWW """ @@ -110,7 +113,7 @@ "r": _resource_names[0], "p": _resource_names[1], "s": _resource_names[2], - "P": "spawn_point", + "n": "spawn_point", "W": "wall", } @@ -132,10 +135,6 @@ }, { "component": "Transform", - "kwargs": { - "position": (0, 0), - "orientation": "N" - } }, { "component": "Appearance", @@ -175,10 +174,6 @@ }, { "component": "Transform", - "kwargs": { - "position": (0, 0), - "orientation": "N" - } }, ] } @@ -246,18 +241,24 @@ def create_scene(): }, { "component": "Transform", - "kwargs": { - "position": (0, 0), - "orientation": "N" - }, }, { "component": "TheMatrix", "kwargs": { + # Prevent interaction before both interactors have collected + # at least one resource. + "disallowUnreadyInteractions": True, "matrix": [ - [0, -1, 1], - [1, 0, -1], - [-1, 1, 0] + [0, -10, 10], + [10, 0, -10], + [-10, 10, 0] + ], + "resultIndicatorColorIntervals": [ + (-10.0, -5.0), # red + (-5.0, -2.5), # yellow + (-2.5, 2.5), # green + (2.5, 5.0), # blue + (5.0, 10.0) # violet ], } }, @@ -290,10 +291,6 @@ def create_resource_prefab( }, { "component": "Transform", - "kwargs": { - "position": (0, 0), - "orientation": "N" - }, }, { "component": "Appearance", @@ -311,7 +308,7 @@ def create_resource_prefab( "resourceClass": resource_id, "visibleType": resource_name, "waitState": resource_name + "_wait", - "groupToRespawn": "resourceWaits", + # Resources never regenerate since this substrate is one-shot. "regenerationRate": 0, "regenerationDelay": 1000 }, @@ -319,9 +316,10 @@ def create_resource_prefab( { "component": "Destroyable", "kwargs": { - "visibleType": resource_name, "waitState": resource_name + "_wait", - "initialHealth": 2, + # It takes concerted effort to destroy resources here because + # this substrate is one-shot. + "initialHealth": 3, }, }, ] @@ -329,10 +327,12 @@ def create_resource_prefab( return resource_prefab -def create_avatar_object(player_idx: int, - all_source_sprite_names: List[str], - target_sprite_self: Dict[str, Any], - target_sprite_other: Dict[str, Any]) -> Dict[str, Any]: +def create_avatar_object( + player_idx: int, + all_source_sprite_names: Sequence[str], + target_sprite_self: Dict[str, Any], + target_sprite_other: Dict[str, Any], + turn_off_default_reward: bool = False) -> Dict[str, Any]: """Create an avatar object given self vs other sprite data.""" # Lua is 1-indexed. lua_index = player_idx + 1 @@ -430,9 +430,19 @@ def create_avatar_object(player_idx: int, "cooldownTime": 2, "beamLength": 3, "beamRadius": 1, - "framesTillRespawn": 1000, + "framesTillRespawn": 100, "numResources": NUM_RESOURCES, "endEpisodeOnFirstInteraction": True, + # Reset both players' inventories after each interaction. + "reset_winner_inventory": True, + "reset_loser_inventory": True, + # Both players get removed after each interaction. + "losingPlayerDies": True, + "winningPlayerDies": True, + # `freezeOnInteraction` is the number of frames to display the + # interaction result indicator, freeze, and delay delivering + # all results of interacting. + "freezeOnInteraction": 16, } }, { @@ -450,7 +460,7 @@ def create_avatar_object(player_idx: int, "component": "Taste", "kwargs": { "mostTastyResourceClass": -1, # -1 indicates no preference. - # No resource is most tasty when mostTastyResourceClass == 1. + # No resource is most tasty when mostTastyResourceClass == -1. "mostTastyReward": 0.1, } }, @@ -458,7 +468,7 @@ def create_avatar_object(player_idx: int, "component": "InteractionTaste", "kwargs": { "mostTastyResourceClass": -1, # -1 indicates no preference. - "zeroDefaultInteractionReward": False, + "zeroDefaultInteractionReward": turn_off_default_reward, "extraReward": 1.0, } }, @@ -483,6 +493,7 @@ def create_avatar_object(player_idx: int, "component": "GameInteractionZapper", "variable": "latest_interaction_inventories", }, + *the_matrix.get_cumulant_metric_configs(NUM_RESOURCES), ] } }, @@ -522,16 +533,20 @@ def get_all_source_sprite_names(num_players): return all_source_sprite_names -def create_avatar_objects(num_players): +def create_avatar_objects(num_players, turn_off_default_reward: bool = False): """Returns list of avatar objects of length 'num_players'.""" all_source_sprite_names = get_all_source_sprite_names(num_players) avatar_objects = [] for player_idx in range(0, num_players): - game_object = create_avatar_object(player_idx, - all_source_sprite_names, - TARGET_SPRITE_SELF, - TARGET_SPRITE_OTHER) + game_object = create_avatar_object( + player_idx, + all_source_sprite_names, + TARGET_SPRITE_SELF, + TARGET_SPRITE_OTHER, + turn_off_default_reward=turn_off_default_reward) + readiness_marker = the_matrix.create_ready_to_interact_marker(player_idx) avatar_objects.append(game_object) + avatar_objects.append(readiness_marker) return avatar_objects @@ -546,39 +561,12 @@ def create_world_sprite_map( return world_sprite_map -def create_lab2d_settings(num_players: int) -> Dict[str, Any]: - """Returns the lab2d settings.""" - lab2d_settings = { - "levelName": "the_matrix", - "levelDirectory": "meltingpot/lua/levels", - "numPlayers": num_players, - "maxEpisodeLengthFrames": 1000, - "spriteSize": 8, - "simulation": { - "map": DEFAULT_ASCII_MAP, - "gameObjects": create_avatar_objects(num_players), - "scene": create_scene(), - "prefabs": create_prefabs(), - "charPrefabMap": CHAR_PREFAB_MAP, - # worldSpriteMap is needed to make the global view used in videos be - # be informative in cases where individual avatar views have had - # sprites remapped to one another (example: self vs other mode). - "worldSpriteMap": create_world_sprite_map(num_players, - TARGET_SPRITE_OTHER), - }, - } - return lab2d_settings - - def get_config(): - """Default config for training on running_with_scissors_in_the_matrix.""" + """Default configuration.""" config = config_dict.ConfigDict() - # Basic configuration. - config.num_players = 2 - - # Lua script configuration. - config.lab2d_settings = create_lab2d_settings(config.num_players) + # Other parameters that are useful to override in training config files. + config.turn_off_default_reward = False # Action set configuration. config.action_set = ACTION_SET @@ -587,6 +575,7 @@ def get_config(): "RGB", "INVENTORY", "READY_TO_SHOOT", + # Debug only (do not use the following observations in policies). "POSITION", "ORIENTATION", "INTERACTION_INVENTORIES", @@ -601,10 +590,46 @@ def get_config(): "RGB": specs.rgb(40, 40), "INVENTORY": specs.inventory(3), "READY_TO_SHOOT": specs.OBSERVATION["READY_TO_SHOOT"], + # Debug only (do not use the following observations in policies). "POSITION": specs.OBSERVATION["POSITION"], "ORIENTATION": specs.OBSERVATION["ORIENTATION"], "INTERACTION_INVENTORIES": specs.interaction_inventories(3), "WORLD.RGB": specs.rgb(120, 184), }) + # The roles assigned to each player. + config.valid_roles = frozenset({"default"}) + config.default_player_roles = ("default",) * 2 + return config + + +def build( + roles: Sequence[str], + config: config_dict.ConfigDict, +) -> Mapping[str, Any]: + """Build substrate definition given roles.""" + del config + num_players = len(roles) + # Build the rest of the substrate definition. + substrate_definition = dict( + levelName="the_matrix", + levelDirectory="meltingpot/lua/levels", + numPlayers=num_players, + maxEpisodeLengthFrames=1000, # The maximum possible number of frames. + spriteSize=8, + topology="BOUNDED", # Choose from ["BOUNDED", "TORUS"], + simulation={ + "map": ASCII_MAP, + "gameObjects": create_avatar_objects(num_players=num_players), + "scene": create_scene(), + "prefabs": create_prefabs(), + "charPrefabMap": CHAR_PREFAB_MAP, + # worldSpriteMap is needed to make the global view used in videos be + # be informative in cases where individual avatar views have had + # sprites remapped to one another (example: self vs other mode). + "worldSpriteMap": create_world_sprite_map(num_players, + TARGET_SPRITE_OTHER), + } + ) + return substrate_definition diff --git a/meltingpot/python/configs/substrates/running_with_scissors_in_the_matrix__repeated.py b/meltingpot/python/configs/substrates/running_with_scissors_in_the_matrix__repeated.py new file mode 100644 index 00000000..591dd20e --- /dev/null +++ b/meltingpot/python/configs/substrates/running_with_scissors_in_the_matrix__repeated.py @@ -0,0 +1,644 @@ +# Copyright 2022 DeepMind Technologies Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Configuration for Running with Scissors in the Matrix (two player, repeated). + +Example video: https://youtu.be/rZH9nUKefcU + +Players can move around the map and collect resources of `K` discrete types. In +addition to movement, the agents have an action to fire an "interaction" beam. +All players carry an inventory with the count of resources picked up since last +respawn. + +Players can observe their own inventory but not the inventories of their +coplayers. When another agent is zapped with the interaction beam, an +interaction occurs. The resolution of the interactions is determined by a +traditional matrix game, where there is a `K x K` payoff matrix describing the +reward produced by the pure strategies available to the two players. The +resources map one-to-one to the pure strategies of the matrix game. Unless +stated otherwise, for the purposes of resolving the interaction, the zapping +agent is considered the row player, and the zapped agent the column player. The +actual strategy played depends on the resources picked up before the +interaction. The more resources of a given type an agent picks up, the more +committed the agent becomes to the pure strategy corresponding to that resource. + +In the case of running with scissors, `K = 3`, corresponding to rock, paper, and +scissors pure strategies respectively. + +The payoff matrix is the traditional rock-paper-scissors game matrix. + +Running with scissors was first described in Vezhnevets et al. (2020). Two +players gather rock, paper or scissor resources in the environment and can +challenge one another to a 'rock, paper scissor' game, the outcome of which +depends on the resources they collected. It is possible to observe the policy +that one's partner is starting to implement, either by watching them pick up +resources or by noting which resources are missing, and then take +countermeasures. This induces a wealth of possible feinting strategies. + +Players can also zap resources with their interaction beam to destroy them. This +creates additional scope for feinting strategies. + +Players have a `5 x 5` observation window. + +The episode has a chance of ending stochastically on every 100 step interval +after step 1000. This usually allows time for 8 or more interactions. + +Vezhnevets, A., Wu, Y., Eckstein, M., Leblond, R. and Leibo, J.Z., 2020. OPtions +as REsponses: Grounding behavioural hierarchies in multi-agent reinforcement +learning. In International Conference on Machine Learning (pp. 9733-9742). PMLR. +""" + +from typing import Any, Dict, Mapping, Sequence, Tuple + +from ml_collections import config_dict + +from meltingpot.python.configs.substrates import the_matrix +from meltingpot.python.utils.substrates import colors +from meltingpot.python.utils.substrates import shapes +from meltingpot.python.utils.substrates import specs + +# The number of resources must match the (square) size of the matrix. +NUM_RESOURCES = 3 + +# This color is yellow. +RESOURCE1_COLOR = (255, 227, 11, 255) +RESOURCE1_HIGHLIGHT_COLOR = (255, 214, 91, 255) +RESOURCE1_COLOR_DATA = (RESOURCE1_COLOR, RESOURCE1_HIGHLIGHT_COLOR) +# This color is violet. +RESOURCE2_COLOR = (109, 42, 255, 255) +RESOURCE2_HIGHLIGHT_COLOR = (132, 91, 255, 255) +RESOURCE2_COLOR_DATA = (RESOURCE2_COLOR, RESOURCE2_HIGHLIGHT_COLOR) +# This color is cyan. +RESOURCE3_COLOR = (42, 188, 255, 255) +RESOURCE3_HIGHLIGHT_COLOR = (91, 214, 255, 255) +RESOURCE3_COLOR_DATA = (RESOURCE3_COLOR, RESOURCE3_HIGHLIGHT_COLOR) + +ASCII_MAP = """ +WWWWWWWWWWWWWWWWWWWWWWW +Wn n nW +W WWW W W WW W +W W rra app W W +Wn WW rra app WW nW +W rra app W +W W +Wn WW n nW +W WWWW W +W ssa W W +Wn W ssa W aaa W nW +W W ssa W aaa WW W +W WWWW W W W WWW W +Wn n nW +WWWWWWWWWWWWWWWWWWWWWWW +""" + +_resource_names = [ + "resource_class1", + "resource_class2", + "resource_class3", +] + +# `prefab` determines which prefab game object to use for each `char` in the +# ascii map. +CHAR_PREFAB_MAP = { + "a": {"type": "choice", "list": _resource_names}, + "r": _resource_names[0], + "p": _resource_names[1], + "s": _resource_names[2], + "n": "spawn_point", + "W": "wall", +} + +_COMPASS = ["N", "E", "S", "W"] + +WALL = { + "name": "wall", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "wall", + "stateConfigs": [{ + "state": "wall", + "layer": "upperPhysical", + "sprite": "Wall", + }], + } + }, + { + "component": "Transform", + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["Wall"], + "spriteShapes": [shapes.WALL], + "palettes": [{"*": (95, 95, 95, 255), + "&": (100, 100, 100, 255), + "@": (109, 109, 109, 255), + "#": (152, 152, 152, 255)}], + "noRotates": [False] + } + }, + { + "component": "BeamBlocker", + "kwargs": { + "beamType": "gameInteraction" + } + }, + ] +} + +SPAWN_POINT = { + "name": "spawnPoint", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "spawnPoint", + "stateConfigs": [{ + "state": "spawnPoint", + "layer": "alternateLogic", + "groups": ["spawnPoints"] + }], + } + }, + { + "component": "Transform", + }, + ] +} + +# PLAYER_COLOR_PALETTES is a list with each entry specifying the color to use +# for the player at the corresponding index. +NUM_PLAYERS_UPPER_BOUND = 8 +PLAYER_COLOR_PALETTES = [] +for idx in range(NUM_PLAYERS_UPPER_BOUND): + PLAYER_COLOR_PALETTES.append(shapes.get_palette(colors.palette[idx])) + +# Primitive action components. +# pylint: disable=bad-whitespace +# pyformat: disable +NOOP = {"move": 0, "turn": 0, "interact": 0} +FORWARD = {"move": 1, "turn": 0, "interact": 0} +STEP_RIGHT = {"move": 2, "turn": 0, "interact": 0} +BACKWARD = {"move": 3, "turn": 0, "interact": 0} +STEP_LEFT = {"move": 4, "turn": 0, "interact": 0} +TURN_LEFT = {"move": 0, "turn": -1, "interact": 0} +TURN_RIGHT = {"move": 0, "turn": 1, "interact": 0} +INTERACT = {"move": 0, "turn": 0, "interact": 1} +# pyformat: enable +# pylint: enable=bad-whitespace + +ACTION_SET = ( + NOOP, + FORWARD, + BACKWARD, + STEP_LEFT, + STEP_RIGHT, + TURN_LEFT, + TURN_RIGHT, + INTERACT, +) + +TARGET_SPRITE_SELF = { + "name": "Self", + "shape": shapes.CUTE_AVATAR, + "palette": shapes.get_palette((50, 100, 200)), + "noRotate": True, +} + +TARGET_SPRITE_OTHER = { + "name": "Other", + "shape": shapes.CUTE_AVATAR, + "palette": shapes.get_palette((200, 100, 50)), + "noRotate": True, +} + + +def create_scene(): + """Creates the global scene.""" + scene = { + "name": "scene", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "scene", + "stateConfigs": [{ + "state": "scene", + }], + } + }, + { + "component": "Transform", + }, + { + "component": "TheMatrix", + "kwargs": { + # Prevent interaction before both interactors have collected + # at least one resource. + "disallowUnreadyInteractions": True, + "matrix": [ + [0, -10, 10], + [10, 0, -10], + [-10, 10, 0] + ], + "resultIndicatorColorIntervals": [ + (-10.0, -5.0), # red + (-5.0, -2.5), # yellow + (-2.5, 2.5), # green + (2.5, 5.0), # blue + (5.0, 10.0) # violet + ], + } + }, + { + "component": "StochasticIntervalEpisodeEnding", + "kwargs": { + "minimumFramesPerEpisode": 1000, + "intervalLength": 100, # Set equal to unroll length. + "probabilityTerminationPerInterval": 0.2 + } + } + ] + } + return scene + + +def create_resource_prefab( + resource_id: int, + resource_shape: str, + resource_palette: Dict[str, Tuple[int, int, int, int]]): + """Creates resource prefab with provided resource_id, shape, and palette.""" + resource_name = "resource_class{}".format(resource_id) + resource_prefab = { + "name": resource_name, + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": resource_name, + "stateConfigs": [ + {"state": resource_name + "_wait", + "groups": ["resourceWaits"]}, + {"state": resource_name, + "layer": "lowerPhysical", + "sprite": resource_name + "_sprite"}, + ] + }, + }, + { + "component": "Transform", + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": [resource_name + "_sprite"], + "spriteShapes": [resource_shape], + "palettes": [resource_palette], + "noRotates": [True] + }, + }, + { + "component": "Resource", + "kwargs": { + "resourceClass": resource_id, + "visibleType": resource_name, + "waitState": resource_name + "_wait", + "regenerationRate": 0.02, + "regenerationDelay": 15, + }, + }, + { + "component": "Destroyable", + "kwargs": { + "waitState": resource_name + "_wait", + # It is possible to destroy resources but takes concerted + # effort to do so by zapping them `initialHealth` times. + "initialHealth": 3, + }, + }, + ] + } + return resource_prefab + + +def create_avatar_object( + player_idx: int, + all_source_sprite_names: Sequence[str], + target_sprite_self: Dict[str, Any], + target_sprite_other: Dict[str, Any], + turn_off_default_reward: bool = False) -> Dict[str, Any]: + """Create an avatar object given self vs other sprite data.""" + # Lua is 1-indexed. + lua_index = player_idx + 1 + + # Setup the self vs other sprite mapping. + source_sprite_self = "Avatar" + str(lua_index) + custom_sprite_map = {source_sprite_self: target_sprite_self["name"]} + for name in all_source_sprite_names: + if name != source_sprite_self: + custom_sprite_map[name] = target_sprite_other["name"] + + live_state_name = "player{}".format(lua_index) + avatar_object = { + "name": "avatar", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": live_state_name, + "stateConfigs": [ + {"state": live_state_name, + "layer": "upperPhysical", + "sprite": source_sprite_self, + "contact": "avatar", + "groups": ["players"]}, + + {"state": "playerWait", + "groups": ["playerWaits"]}, + ] + } + }, + { + "component": "Transform", + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "colored_square", + "spriteNames": [source_sprite_self], + # A white square should never be displayed. It will always be + # remapped since this is self vs other observation mode. + "spriteRGBColors": [(255, 255, 255, 255)], + } + }, + { + "component": "AdditionalSprites", + "kwargs": { + "renderMode": "ascii_shape", + "customSpriteNames": [target_sprite_self["name"], + target_sprite_other["name"]], + "customSpriteShapes": [target_sprite_self["shape"], + target_sprite_other["shape"]], + "customPalettes": [target_sprite_self["palette"], + target_sprite_other["palette"]], + "customNoRotates": [target_sprite_self["noRotate"], + target_sprite_other["noRotate"]], + } + }, + { + "component": "Avatar", + "kwargs": { + "index": lua_index, + "aliveState": live_state_name, + "waitState": "playerWait", + "speed": 1.0, + "spawnGroup": "spawnPoints", + "actionOrder": ["move", "turn", "interact"], + "actionSpec": { + "move": {"default": 0, "min": 0, "max": len(_COMPASS)}, + "turn": {"default": 0, "min": -1, "max": 1}, + "interact": {"default": 0, "min": 0, "max": 1}, + }, + "view": { + "left": 2, + "right": 2, + "forward": 3, + "backward": 1, + "centered": False + }, + "spriteMap": custom_sprite_map, + # The following kwarg makes it possible to get rewarded even + # on frames when an avatar is "dead". It is needed for in the + # matrix games in order to correctly handle the case of two + # players getting hit simultaneously by the same beam. + "skipWaitStateRewards": False, + } + }, + { + "component": "GameInteractionZapper", + "kwargs": { + "cooldownTime": 2, + "beamLength": 3, + "beamRadius": 1, + "framesTillRespawn": 5, + "numResources": NUM_RESOURCES, + "endEpisodeOnFirstInteraction": False, + # Reset both players' inventories after each interaction. + "reset_winner_inventory": True, + "reset_loser_inventory": True, + # Both players get removed after each interaction. + "losingPlayerDies": True, + "winningPlayerDies": True, + # `freezeOnInteraction` is the number of frames to display the + # interaction result indicator, freeze, and delay delivering + # all results of interacting. + "freezeOnInteraction": 16, + } + }, + { + "component": "ReadyToShootObservation", + "kwargs": { + "zapperComponent": "GameInteractionZapper", + } + }, + { + "component": "InventoryObserver", + "kwargs": { + } + }, + { + "component": "SpawnResourcesWhenAllPlayersZapped", + }, + { + "component": "Taste", + "kwargs": { + "mostTastyResourceClass": -1, # -1 indicates no preference. + # No resource is most tasty when mostTastyResourceClass == -1. + "mostTastyReward": 0.1, + } + }, + { + "component": "InteractionTaste", + "kwargs": { + "mostTastyResourceClass": -1, # -1 indicates no preference. + "zeroDefaultInteractionReward": turn_off_default_reward, + "extraReward": 1.0, + } + }, + { + "component": "LocationObserver", + "kwargs": { + "objectIsAvatar": True, + "alsoReportOrientation": True + } + }, + { + "component": "AvatarMetricReporter", + "kwargs": { + "metrics": [ + { + # Report the inventories of both players involved in + # an interaction on this frame formatted as + # (self inventory, partner inventory). + "name": "INTERACTION_INVENTORIES", + "type": "tensor.DoubleTensor", + "shape": (2, NUM_RESOURCES), + "component": "GameInteractionZapper", + "variable": "latest_interaction_inventories", + }, + *the_matrix.get_cumulant_metric_configs(NUM_RESOURCES), + ] + } + }, + ] + } + return avatar_object + + +def create_prefabs(): + """Returns a dictionary mapping names to template game objects.""" + prefabs = { + "wall": WALL, + "spawn_point": SPAWN_POINT, + } + prefabs["resource_class1"] = create_resource_prefab( + 1, shapes.BUTTON, {"*": RESOURCE1_COLOR_DATA[0], + "#": RESOURCE1_COLOR_DATA[1], + "x": (0, 0, 0, 0)}) + prefabs["resource_class2"] = create_resource_prefab( + 2, shapes.BUTTON, {"*": RESOURCE2_COLOR_DATA[0], + "#": RESOURCE2_COLOR_DATA[1], + "x": (0, 0, 0, 0)}) + prefabs["resource_class3"] = create_resource_prefab( + 3, shapes.BUTTON, {"*": RESOURCE3_COLOR_DATA[0], + "#": RESOURCE3_COLOR_DATA[1], + "x": (0, 0, 0, 0)}) + return prefabs + + +def get_all_source_sprite_names(num_players): + all_source_sprite_names = [] + for player_idx in range(0, num_players): + # Lua is 1-indexed. + lua_index = player_idx + 1 + all_source_sprite_names.append("Avatar" + str(lua_index)) + + return all_source_sprite_names + + +def create_avatar_objects(num_players, + turn_off_default_reward: bool = False): + """Returns list of avatar objects of length 'num_players'.""" + all_source_sprite_names = get_all_source_sprite_names(num_players) + avatar_objects = [] + for player_idx in range(0, num_players): + game_object = create_avatar_object( + player_idx, + all_source_sprite_names, + TARGET_SPRITE_SELF, + TARGET_SPRITE_OTHER, + turn_off_default_reward=turn_off_default_reward) + readiness_marker = the_matrix.create_ready_to_interact_marker(player_idx) + avatar_objects.append(game_object) + avatar_objects.append(readiness_marker) + + return avatar_objects + + +def create_world_sprite_map( + num_players: int, target_sprite_other: Dict[str, Any]) -> Dict[str, str]: + all_source_sprite_names = get_all_source_sprite_names(num_players) + world_sprite_map = {} + for name in all_source_sprite_names: + world_sprite_map[name] = target_sprite_other["name"] + + return world_sprite_map + + +def get_config(): + """Default configuration.""" + config = config_dict.ConfigDict() + + # Other parameters that are useful to override in training config files. + config.turn_off_default_reward = False + + # Action set configuration. + config.action_set = ACTION_SET + # Observation format configuration. + config.individual_observation_names = [ + "RGB", + "INVENTORY", + "READY_TO_SHOOT", + # Debug only (do not use the following observations in policies). + "POSITION", + "ORIENTATION", + "INTERACTION_INVENTORIES", + ] + config.global_observation_names = [ + "WORLD.RGB", + ] + + # The specs of the environment (from a single-agent perspective). + config.action_spec = specs.action(len(ACTION_SET)) + config.timestep_spec = specs.timestep({ + "RGB": specs.rgb(40, 40), + "INVENTORY": specs.inventory(3), + "READY_TO_SHOOT": specs.OBSERVATION["READY_TO_SHOOT"], + # Debug only (do not use the following observations in policies). + "POSITION": specs.OBSERVATION["POSITION"], + "ORIENTATION": specs.OBSERVATION["ORIENTATION"], + "INTERACTION_INVENTORIES": specs.interaction_inventories(3), + "WORLD.RGB": specs.rgb(120, 184), + }) + + # The roles assigned to each player. + config.valid_roles = frozenset({"default"}) + config.default_player_roles = ("default",) * 2 + + return config + + +def build( + roles: Sequence[str], + config: config_dict.ConfigDict, +) -> Mapping[str, Any]: + """Build substrate definition given roles.""" + del config + num_players = len(roles) + # Build the rest of the substrate definition. + substrate_definition = dict( + levelName="the_matrix", + levelDirectory="meltingpot/lua/levels", + numPlayers=num_players, + # Define upper bound of episode length since episodes end stochastically. + maxEpisodeLengthFrames=5000, + spriteSize=8, + topology="BOUNDED", # Choose from ["BOUNDED", "TORUS"], + simulation={ + "map": ASCII_MAP, + "gameObjects": create_avatar_objects(num_players=num_players), + "scene": create_scene(), + "prefabs": create_prefabs(), + "charPrefabMap": CHAR_PREFAB_MAP, + # worldSpriteMap is needed to make the global view used in videos be + # be informative in cases where individual avatar views have had + # sprites remapped to one another (example: self vs other mode). + "worldSpriteMap": create_world_sprite_map(num_players, + TARGET_SPRITE_OTHER), + } + ) + return substrate_definition diff --git a/meltingpot/python/configs/substrates/stag_hunt_in_the_matrix.py b/meltingpot/python/configs/substrates/stag_hunt_in_the_matrix__arena.py similarity index 78% rename from meltingpot/python/configs/substrates/stag_hunt_in_the_matrix.py rename to meltingpot/python/configs/substrates/stag_hunt_in_the_matrix__arena.py index 2393853f..022221df 100644 --- a/meltingpot/python/configs/substrates/stag_hunt_in_the_matrix.py +++ b/meltingpot/python/configs/substrates/stag_hunt_in_the_matrix__arena.py @@ -1,4 +1,4 @@ -# Copyright 2020 DeepMind Technologies Limited. +# Copyright 2022 DeepMind Technologies Limited. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,7 +13,7 @@ # limitations under the License. """Configuration for Stag Hunt in the Matrix. -Example video: https://youtu.be/7fVHUH4siOQ +Example video: https://youtu.be/agOpo0MZmzs See _Running with Scissors in the Matrix_ for a general description of the game dynamics. Here the payoff matrix represents the Stag Hunt game. `K = 2` @@ -25,10 +25,11 @@ Players have the default `11 x 11` (off center) observation window. """ -import copy -from typing import Any, Dict, Iterable, Sequence, Tuple +from typing import Any, Dict, Mapping, Sequence from ml_collections import config_dict + +from meltingpot.python.configs.substrates import the_matrix from meltingpot.python.utils.substrates import colors from meltingpot.python.utils.substrates import game_object_utils from meltingpot.python.utils.substrates import shapes @@ -50,7 +51,7 @@ # The procedural generator replaces all 'a' chars in the default map with chars # representing specific resources, i.e. with either '1' or '2'. -DEFAULT_ASCII_MAP = """ +ASCII_MAP = """ WWWWWWWWWWWWWWWWWWWWWWWWW WPPPPPPP W W PPPPPPPW WPPPP PPPPW @@ -59,14 +60,14 @@ WP PW WP 222222 222 PW WP 2 11 11 PW -W 2 11 222 W -W WW W 222 W -WW 21 W 222 W -WWW 21 WWWWWWWWW W -W 21 111 1 WWW -W 111 1 W -W 22 W 22 W -W 22 22 W WW W +W 2 11 a 222 W +W WW W1 11a W +WW 21 11 W 11a 2 W +WWW 21 WWWWWWWWW 2 W +W 2 aa 111 1a WWW +W 2 111 1a W +W aa W 22 W +W 22 2a Waa WW W WP 22 W222 PW WP 222 PW WP 222 PW @@ -85,6 +86,7 @@ # `prefab` determines which prefab game object to use for each `char` in the # ascii map. CHAR_PREFAB_MAP = { + "a": {"type": "choice", "list": _resource_names}, "1": _resource_names[0], "2": _resource_names[1], "P": "spawn_point", @@ -109,10 +111,6 @@ }, { "component": "Transform", - "kwargs": { - "position": (0, 0), - "orientation": "N" - } }, { "component": "Appearance", @@ -152,20 +150,24 @@ }, { "component": "Transform", - "kwargs": { - "position": (0, 0), - "orientation": "N" - } }, ] } +# Remove the first entry from human_readable_colors after using it for the self +# color to prevent it from being used again as another avatar color. +human_readable_colors = list(colors.human_readable) +TARGET_SPRITE_SELF = { + "name": "Self", + "shape": shapes.CUTE_AVATAR, + "palette": shapes.get_palette(human_readable_colors.pop(0)), + "noRotate": True, +} # PLAYER_COLOR_PALETTES is a list with each entry specifying the color to use # for the player at the corresponding index. -NUM_PLAYERS_UPPER_BOUND = 32 PLAYER_COLOR_PALETTES = [] -for idx in range(NUM_PLAYERS_UPPER_BOUND): - PLAYER_COLOR_PALETTES.append(shapes.get_palette(colors.palette[idx])) +for human_readable_color in human_readable_colors: + PLAYER_COLOR_PALETTES.append(shapes.get_palette(human_readable_color)) # Primitive action components. # pylint: disable=bad-whitespace @@ -192,13 +194,6 @@ INTERACT, ) -TARGET_SPRITE_SELF = { - "name": "Self", - "shape": shapes.CUTE_AVATAR, - "palette": shapes.get_palette((50, 100, 200)), - "noRotate": True, -} - def create_scene(): """Creates the global scene.""" @@ -216,15 +211,13 @@ def create_scene(): }, { "component": "Transform", - "kwargs": { - "position": (0, 0), - "orientation": "N" - }, }, { "component": "TheMatrix", "kwargs": { - "zero_initial_inventory": True, + # Prevent interaction before both interactors have collected + # at least one resource. + "disallowUnreadyInteractions": True, "matrix": [ # row player chooses a row of this matrix. # C D @@ -237,8 +230,20 @@ def create_scene(): [4, 2], # C [0, 2], # D ], + "resultIndicatorColorIntervals": [ + # red # yellow # green # blue # violet + (0.0, 0.5), (0.5, 1.5), (1.5, 2.5), (2.5, 3.5), (3.5, 4.0) + ], } }, + { + "component": "StochasticIntervalEpisodeEnding", + "kwargs": { + "minimumFramesPerEpisode": 1000, + "intervalLength": 100, # Set equal to unroll length. + "probabilityTerminationPerInterval": 0.1 + } + } ] } return scene @@ -265,10 +270,6 @@ def create_resource_prefab(resource_id, color_data): }, { "component": "Transform", - "kwargs": { - "position": (0, 0), - "orientation": "N" - }, }, { "component": "Appearance", @@ -288,17 +289,17 @@ def create_resource_prefab(resource_id, color_data): "resourceClass": resource_id, "visibleType": resource_name, "waitState": resource_name + "_wait", - "groupToRespawn": "resourceWaits", - "regenerationRate": 0.005, - "regenerationDelay": 50 + "regenerationRate": 0.04, + "regenerationDelay": 10, }, }, { "component": "Destroyable", "kwargs": { - "visibleType": resource_name, "waitState": resource_name + "_wait", - "initialHealth": 1, + # It is possible to destroy resources but takes concerted + # effort to do so by zapping them `initialHealth` times. + "initialHealth": 3, }, }, ] @@ -353,10 +354,6 @@ def create_avatar_object(player_idx: int, }, { "component": "Transform", - "kwargs": { - "position": (0, 0), - "orientation": "N" - } }, { "component": "Appearance", @@ -364,7 +361,8 @@ def create_avatar_object(player_idx: int, "renderMode": "ascii_shape", "spriteNames": [source_sprite_self], "spriteShapes": [shapes.CUTE_AVATAR], - "palettes": [shapes.get_palette(colors.palette[player_idx])], + "palettes": [shapes.get_palette( + human_readable_colors[player_idx])], "noRotates": [True] } }, @@ -410,12 +408,22 @@ def create_avatar_object(player_idx: int, { "component": "GameInteractionZapper", "kwargs": { - "cooldownTime": 8, + "cooldownTime": 2, "beamLength": 3, "beamRadius": 1, - "framesTillRespawn": 64, + "framesTillRespawn": 50, "numResources": NUM_RESOURCES, + "endEpisodeOnFirstInteraction": False, + # Reset both players' inventories after each interaction. "reset_winner_inventory": True, + "reset_loser_inventory": True, + # Both players get removed after each interaction. + "losingPlayerDies": True, + "winningPlayerDies": True, + # `freezeOnInteraction` is the number of frames to display the + # interaction result indicator, freeze, and delay delivering + # all results of interacting. + "freezeOnInteraction": 16, } }, { @@ -433,13 +441,15 @@ def create_avatar_object(player_idx: int, "component": "Taste", "kwargs": { "mostTastyResourceClass": -1, # -1 indicates no preference. + # No resource is most tasty when mostTastyResourceClass == -1. + "mostTastyReward": 0.1, } }, { "component": "InteractionTaste", "kwargs": { "mostTastyResourceClass": -1, # -1 indicates no preference. - "zeroDefaultInteractionReward": True, + "zeroDefaultInteractionReward": False, "extraReward": 1.0, } }, @@ -464,6 +474,7 @@ def create_avatar_object(player_idx: int, "component": "GameInteractionZapper", "variable": "latest_interaction_inventories", }, + *the_matrix.get_cumulant_metric_configs(NUM_RESOURCES), ] } }, @@ -482,48 +493,16 @@ def create_avatar_objects(num_players: int) -> Sequence[PrefabConfig]: avatar_objects = [] for player_idx in range(num_players): avatar = create_avatar_object(player_idx, TARGET_SPRITE_SELF) + readiness_marker = the_matrix.create_ready_to_interact_marker(player_idx) avatar_objects.append(avatar) + avatar_objects.append(readiness_marker) return avatar_objects -def create_lab2d_settings( - num_players: int, - ascii_map_string: str, - settings_overrides: Iterable[Tuple[str, Any]] = ()) -> Dict[str, Any]: - """Returns the lab2d settings. - - Args: - num_players: (int) the number of players. - ascii_map_string: ascii map. - settings_overrides: (key, value) overrides for default settings. - """ - settings = { - "levelName": "the_matrix", - "levelDirectory": "meltingpot/lua/levels", - "numPlayers": num_players, - "maxEpisodeLengthFrames": 1000, - "spriteSize": 8, - "simulation": { - "map": ascii_map_string, - "gameObjects": create_avatar_objects(num_players=num_players), - "scene": copy.deepcopy(create_scene()), - "prefabs": create_prefabs(), - "charPrefabMap": CHAR_PREFAB_MAP, - } - } - settings.update(settings_overrides) - return settings - - -def get_config(factory=create_lab2d_settings): - """Default config for stag hunt in the matrix.""" +def get_config(): + """Default configuration.""" config = config_dict.ConfigDict() - # Basic configuration. - config.num_players = 8 - - config.lab2d_settings = factory(config.num_players, DEFAULT_ASCII_MAP) - # Action set configuration. config.action_set = ACTION_SET # Observation format configuration. @@ -531,6 +510,7 @@ def get_config(factory=create_lab2d_settings): "RGB", "INVENTORY", "READY_TO_SHOOT", + # Debug only (do not use the following observations in policies). "POSITION", "ORIENTATION", "INTERACTION_INVENTORIES", @@ -545,10 +525,42 @@ def get_config(factory=create_lab2d_settings): "RGB": specs.OBSERVATION["RGB"], "INVENTORY": specs.inventory(2), "READY_TO_SHOOT": specs.OBSERVATION["READY_TO_SHOOT"], + # Debug only (do not use the following observations in policies). "POSITION": specs.OBSERVATION["POSITION"], "ORIENTATION": specs.OBSERVATION["ORIENTATION"], "INTERACTION_INVENTORIES": specs.interaction_inventories(2), "WORLD.RGB": specs.rgb(192, 200), }) + # The roles assigned to each player. + config.valid_roles = frozenset({"default"}) + config.default_player_roles = ("default",) * 8 + return config + + +def build( + roles: Sequence[str], + config: config_dict.ConfigDict, +) -> Mapping[str, Any]: + """Build substrate definition given roles.""" + del config + num_players = len(roles) + # Build the rest of the substrate definition. + substrate_definition = dict( + levelName="the_matrix", + levelDirectory="meltingpot/lua/levels", + numPlayers=num_players, + # Define upper bound of episode length since episodes end stochastically. + maxEpisodeLengthFrames=5000, + spriteSize=8, + topology="BOUNDED", # Choose from ["BOUNDED", "TORUS"], + simulation={ + "map": ASCII_MAP, + "gameObjects": create_avatar_objects(num_players=num_players), + "scene": create_scene(), + "prefabs": create_prefabs(), + "charPrefabMap": CHAR_PREFAB_MAP, + } + ) + return substrate_definition diff --git a/meltingpot/python/configs/substrates/stag_hunt_in_the_matrix__repeated.py b/meltingpot/python/configs/substrates/stag_hunt_in_the_matrix__repeated.py new file mode 100644 index 00000000..ba7ac2a5 --- /dev/null +++ b/meltingpot/python/configs/substrates/stag_hunt_in_the_matrix__repeated.py @@ -0,0 +1,604 @@ +# Copyright 2022 DeepMind Technologies Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Configuration for Stag Hunt in the Matrix (two player, repeated version). + +Example video: https://youtu.be/aDp_CArcb1Y + +See _Running with Scissors in the Matrix_ for a general description of the +game dynamics. Here the payoff matrix represents the Stag Hunt game. `K = 2` +resources represent "stag" and "hare" pure strategies. + +Players have a `5 x 5` observation window. + +The episode has a chance of ending stochastically on every 100 step interval +after step 1000. This usually allows time for 8 or more interactions. +""" + +from typing import Any, Dict, Mapping, Sequence, Tuple + +from ml_collections import config_dict + +from meltingpot.python.configs.substrates import the_matrix +from meltingpot.python.utils.substrates import colors +from meltingpot.python.utils.substrates import shapes +from meltingpot.python.utils.substrates import specs + +# The number of resources must match the (square) size of the matrix. +NUM_RESOURCES = 2 + +# This color is green. +RESOURCE1_COLOR = (30, 225, 185, 255) +RESOURCE1_HIGHLIGHT_COLOR = (98, 234, 206, 255) +RESOURCE1_COLOR_DATA = (RESOURCE1_COLOR, RESOURCE1_HIGHLIGHT_COLOR) +# This color is red. +RESOURCE2_COLOR = (225, 30, 70, 255) +RESOURCE2_HIGHLIGHT_COLOR = (234, 98, 126, 255) +RESOURCE2_COLOR_DATA = (RESOURCE2_COLOR, RESOURCE2_HIGHLIGHT_COLOR) + +ASCII_MAP = """ +WWWWWWWWWWWWWWWWWWWWWWW +Wn n nW +W 2WWW W W W WW2 W +W W 11a W 222 W W +Wn WW 11a W a22 WW nW +W 1aa 2 a22 W +W 2 2 W +Wn WW WW2 n WW WWW nW +W 2 2 W +W 22a 2 aa1 W +Wn W 22a W a11 W nW +W 2W 222 W a11 WW W +W WWWW W W W WWW2 W +Wn n nW +WWWWWWWWWWWWWWWWWWWWWWW +""" + +_resource_names = [ + "resource_class1", + "resource_class2", +] + +# `prefab` determines which prefab game object to use for each `char` in the +# ascii map. +CHAR_PREFAB_MAP = { + "a": {"type": "choice", "list": _resource_names}, + "1": _resource_names[0], + "2": _resource_names[1], + "n": "spawn_point", + "W": "wall", +} + +_COMPASS = ["N", "E", "S", "W"] + +WALL = { + "name": "wall", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "wall", + "stateConfigs": [{ + "state": "wall", + "layer": "upperPhysical", + "sprite": "Wall", + }], + } + }, + { + "component": "Transform", + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["Wall"], + "spriteShapes": [shapes.WALL], + "palettes": [{"*": (95, 95, 95, 255), + "&": (100, 100, 100, 255), + "@": (109, 109, 109, 255), + "#": (152, 152, 152, 255)}], + "noRotates": [False] + } + }, + { + "component": "BeamBlocker", + "kwargs": { + "beamType": "gameInteraction" + } + }, + ] +} + +SPAWN_POINT = { + "name": "spawnPoint", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "spawnPoint", + "stateConfigs": [{ + "state": "spawnPoint", + "layer": "alternateLogic", + "groups": ["spawnPoints"] + }], + } + }, + { + "component": "Transform", + }, + ] +} + +# PLAYER_COLOR_PALETTES is a list with each entry specifying the color to use +# for the player at the corresponding index. +NUM_PLAYERS_UPPER_BOUND = 8 +PLAYER_COLOR_PALETTES = [] +for idx in range(NUM_PLAYERS_UPPER_BOUND): + PLAYER_COLOR_PALETTES.append(shapes.get_palette(colors.palette[idx])) + +# Primitive action components. +# pylint: disable=bad-whitespace +# pyformat: disable +NOOP = {"move": 0, "turn": 0, "interact": 0} +FORWARD = {"move": 1, "turn": 0, "interact": 0} +STEP_RIGHT = {"move": 2, "turn": 0, "interact": 0} +BACKWARD = {"move": 3, "turn": 0, "interact": 0} +STEP_LEFT = {"move": 4, "turn": 0, "interact": 0} +TURN_LEFT = {"move": 0, "turn": -1, "interact": 0} +TURN_RIGHT = {"move": 0, "turn": 1, "interact": 0} +INTERACT = {"move": 0, "turn": 0, "interact": 1} +# pyformat: enable +# pylint: enable=bad-whitespace + +ACTION_SET = ( + NOOP, + FORWARD, + BACKWARD, + STEP_LEFT, + STEP_RIGHT, + TURN_LEFT, + TURN_RIGHT, + INTERACT, +) + +TARGET_SPRITE_SELF = { + "name": "Self", + "shape": shapes.CUTE_AVATAR, + "palette": shapes.get_palette((50, 100, 200)), + "noRotate": True, +} + +TARGET_SPRITE_OTHER = { + "name": "Other", + "shape": shapes.CUTE_AVATAR, + "palette": shapes.get_palette((200, 100, 50)), + "noRotate": True, +} + + +def create_scene(): + """Creates the global scene.""" + scene = { + "name": "scene", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "scene", + "stateConfigs": [{ + "state": "scene", + }], + } + }, + { + "component": "Transform", + }, + { + "component": "TheMatrix", + "kwargs": { + # Prevent interaction before both interactors have collected + # at least one resource. + "disallowUnreadyInteractions": True, + "matrix": [ + # row player chooses a row of this matrix. + # C D + [4, 0], # C + [2, 2], # D + ], + "columnPlayerMatrix": [ + # column player chooses a column of this matrix. + # C D + [4, 2], # C + [0, 2], # D + ], + "resultIndicatorColorIntervals": [ + # red # yellow # green # blue # violet + (0.0, 0.5), (0.5, 1.5), (1.5, 2.5), (2.5, 3.5), (3.5, 4.0) + ], + } + }, + { + "component": "StochasticIntervalEpisodeEnding", + "kwargs": { + "minimumFramesPerEpisode": 1000, + "intervalLength": 100, # Set equal to unroll length. + "probabilityTerminationPerInterval": 0.1 + } + } + ] + } + return scene + + +def create_resource_prefab( + resource_id: int, + resource_shape: str, + resource_palette: Dict[str, Tuple[int, int, int, int]]): + """Creates resource prefab with provided resource_id, shape, and palette.""" + resource_name = "resource_class{}".format(resource_id) + resource_prefab = { + "name": resource_name, + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": resource_name, + "stateConfigs": [ + {"state": resource_name + "_wait", + "groups": ["resourceWaits"]}, + {"state": resource_name, + "layer": "lowerPhysical", + "sprite": resource_name + "_sprite"}, + ] + }, + }, + { + "component": "Transform", + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": [resource_name + "_sprite"], + "spriteShapes": [resource_shape], + "palettes": [resource_palette], + "noRotates": [True] + }, + }, + { + "component": "Resource", + "kwargs": { + "resourceClass": resource_id, + "visibleType": resource_name, + "waitState": resource_name + "_wait", + "regenerationRate": 0.02, + "regenerationDelay": 10, + }, + }, + { + "component": "Destroyable", + "kwargs": { + "waitState": resource_name + "_wait", + # It is possible to destroy resources but takes concerted + # effort to do so by zapping them `initialHealth` times. + "initialHealth": 3, + }, + }, + ] + } + return resource_prefab + + +def create_avatar_object( + player_idx: int, + all_source_sprite_names: Sequence[str], + target_sprite_self: Dict[str, Any], + target_sprite_other: Dict[str, Any], + turn_off_default_reward: bool = False) -> Dict[str, Any]: + """Create an avatar object given self vs other sprite data.""" + # Lua is 1-indexed. + lua_index = player_idx + 1 + + # Setup the self vs other sprite mapping. + source_sprite_self = "Avatar" + str(lua_index) + custom_sprite_map = {source_sprite_self: target_sprite_self["name"]} + for name in all_source_sprite_names: + if name != source_sprite_self: + custom_sprite_map[name] = target_sprite_other["name"] + + live_state_name = "player{}".format(lua_index) + avatar_object = { + "name": "avatar", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": live_state_name, + "stateConfigs": [ + {"state": live_state_name, + "layer": "upperPhysical", + "sprite": source_sprite_self, + "contact": "avatar", + "groups": ["players"]}, + + {"state": "playerWait", + "groups": ["playerWaits"]}, + ] + } + }, + { + "component": "Transform", + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "colored_square", + "spriteNames": [source_sprite_self], + # A white square should never be displayed. It will always be + # remapped since this is self vs other observation mode. + "spriteRGBColors": [(255, 255, 255, 255)], + } + }, + { + "component": "AdditionalSprites", + "kwargs": { + "renderMode": "ascii_shape", + "customSpriteNames": [target_sprite_self["name"], + target_sprite_other["name"]], + "customSpriteShapes": [target_sprite_self["shape"], + target_sprite_other["shape"]], + "customPalettes": [target_sprite_self["palette"], + target_sprite_other["palette"]], + "customNoRotates": [target_sprite_self["noRotate"], + target_sprite_other["noRotate"]], + } + }, + { + "component": "Avatar", + "kwargs": { + "index": lua_index, + "aliveState": live_state_name, + "waitState": "playerWait", + "speed": 1.0, + "spawnGroup": "spawnPoints", + "actionOrder": ["move", "turn", "interact"], + "actionSpec": { + "move": {"default": 0, "min": 0, "max": len(_COMPASS)}, + "turn": {"default": 0, "min": -1, "max": 1}, + "interact": {"default": 0, "min": 0, "max": 1}, + }, + "view": { + "left": 2, + "right": 2, + "forward": 3, + "backward": 1, + "centered": False + }, + "spriteMap": custom_sprite_map, + # The following kwarg makes it possible to get rewarded even + # on frames when an avatar is "dead". It is needed for in the + # matrix games in order to correctly handle the case of two + # players getting hit simultaneously by the same beam. + "skipWaitStateRewards": False, + } + }, + { + "component": "GameInteractionZapper", + "kwargs": { + "cooldownTime": 2, + "beamLength": 3, + "beamRadius": 1, + "framesTillRespawn": 5, + "numResources": NUM_RESOURCES, + "endEpisodeOnFirstInteraction": False, + # Reset both players' inventories after each interaction. + "reset_winner_inventory": True, + "reset_loser_inventory": True, + # Both players get removed after each interaction. + "losingPlayerDies": True, + "winningPlayerDies": True, + # `freezeOnInteraction` is the number of frames to display the + # interaction result indicator, freeze, and delay delivering + # all results of interacting. + "freezeOnInteraction": 16, + } + }, + { + "component": "ReadyToShootObservation", + "kwargs": { + "zapperComponent": "GameInteractionZapper", + } + }, + { + "component": "InventoryObserver", + "kwargs": { + } + }, + { + "component": "SpawnResourcesWhenAllPlayersZapped", + }, + { + "component": "Taste", + "kwargs": { + "mostTastyResourceClass": -1, # -1 indicates no preference. + # No resource is most tasty when mostTastyResourceClass == -1. + "mostTastyReward": 0.1, + } + }, + { + "component": "InteractionTaste", + "kwargs": { + "mostTastyResourceClass": -1, # -1 indicates no preference. + "zeroDefaultInteractionReward": turn_off_default_reward, + "extraReward": 1.0, + } + }, + { + "component": "LocationObserver", + "kwargs": { + "objectIsAvatar": True, + "alsoReportOrientation": True + } + }, + { + "component": "AvatarMetricReporter", + "kwargs": { + "metrics": [ + { + # Report the inventories of both players involved in + # an interaction on this frame formatted as + # (self inventory, partner inventory). + "name": "INTERACTION_INVENTORIES", + "type": "tensor.DoubleTensor", + "shape": (2, NUM_RESOURCES), + "component": "GameInteractionZapper", + "variable": "latest_interaction_inventories", + }, + *the_matrix.get_cumulant_metric_configs(NUM_RESOURCES), + ] + } + }, + ] + } + return avatar_object + + +def create_prefabs(): + """Returns a dictionary mapping names to template game objects.""" + prefabs = { + "wall": WALL, + "spawn_point": SPAWN_POINT, + } + prefabs["resource_class1"] = create_resource_prefab( + 1, shapes.BUTTON, {"*": RESOURCE1_COLOR_DATA[0], + "#": RESOURCE1_COLOR_DATA[1], + "x": (0, 0, 0, 0)}) + prefabs["resource_class2"] = create_resource_prefab( + 2, shapes.BUTTON, {"*": RESOURCE2_COLOR_DATA[0], + "#": RESOURCE2_COLOR_DATA[1], + "x": (0, 0, 0, 0)}) + return prefabs + + +def get_all_source_sprite_names(num_players): + all_source_sprite_names = [] + for player_idx in range(0, num_players): + # Lua is 1-indexed. + lua_index = player_idx + 1 + all_source_sprite_names.append("Avatar" + str(lua_index)) + + return all_source_sprite_names + + +def create_avatar_objects(num_players, + turn_off_default_reward: bool = False): + """Returns list of avatar objects of length 'num_players'.""" + all_source_sprite_names = get_all_source_sprite_names(num_players) + avatar_objects = [] + for player_idx in range(0, num_players): + game_object = create_avatar_object( + player_idx, + all_source_sprite_names, + TARGET_SPRITE_SELF, + TARGET_SPRITE_OTHER, + turn_off_default_reward=turn_off_default_reward) + readiness_marker = the_matrix.create_ready_to_interact_marker(player_idx) + avatar_objects.append(game_object) + avatar_objects.append(readiness_marker) + + return avatar_objects + + +def create_world_sprite_map( + num_players: int, target_sprite_other: Dict[str, Any]) -> Dict[str, str]: + all_source_sprite_names = get_all_source_sprite_names(num_players) + world_sprite_map = {} + for name in all_source_sprite_names: + world_sprite_map[name] = target_sprite_other["name"] + + return world_sprite_map + + +def get_config(): + """Default configuration.""" + config = config_dict.ConfigDict() + + # Other parameters that are useful to override in training config files. + config.turn_off_default_reward = False + + # Action set configuration. + config.action_set = ACTION_SET + # Observation format configuration. + config.individual_observation_names = [ + "RGB", + "INVENTORY", + "READY_TO_SHOOT", + # Debug only (do not use the following observations in policies). + "POSITION", + "ORIENTATION", + "INTERACTION_INVENTORIES", + ] + config.global_observation_names = [ + "WORLD.RGB", + ] + + # The specs of the environment (from a single-agent perspective). + config.action_spec = specs.action(len(ACTION_SET)) + config.timestep_spec = specs.timestep({ + "RGB": specs.rgb(40, 40), + "INVENTORY": specs.inventory(2), + "READY_TO_SHOOT": specs.OBSERVATION["READY_TO_SHOOT"], + # Debug only (do not use the following observations in policies). + "POSITION": specs.OBSERVATION["POSITION"], + "ORIENTATION": specs.OBSERVATION["ORIENTATION"], + "INTERACTION_INVENTORIES": specs.interaction_inventories(2), + "WORLD.RGB": specs.rgb(120, 184), + }) + + # The roles assigned to each player. + config.valid_roles = frozenset({"default"}) + config.default_player_roles = ("default",) * 2 + + return config + + +def build( + roles: Sequence[str], + config: config_dict.ConfigDict, +) -> Mapping[str, Any]: + """Build substrate definition given roles.""" + del config + num_players = len(roles) + # Build the rest of the substrate definition. + substrate_definition = dict( + levelName="the_matrix", + levelDirectory="meltingpot/lua/levels", + numPlayers=num_players, + maxEpisodeLengthFrames=2500, # The maximum possible number of frames. + spriteSize=8, + topology="BOUNDED", # Choose from ["BOUNDED", "TORUS"], + simulation={ + "map": ASCII_MAP, + "gameObjects": create_avatar_objects(num_players=num_players), + "scene": create_scene(), + "prefabs": create_prefabs(), + "charPrefabMap": CHAR_PREFAB_MAP, + # worldSpriteMap is needed to make the global view used in videos be + # be informative in cases where individual avatar views have had + # sprites remapped to one another (example: self vs other mode). + "worldSpriteMap": create_world_sprite_map(num_players, + TARGET_SPRITE_OTHER), + } + ) + return substrate_definition diff --git a/meltingpot/python/configs/substrates/territory.py b/meltingpot/python/configs/substrates/territory.py new file mode 100644 index 00000000..066f5f87 --- /dev/null +++ b/meltingpot/python/configs/substrates/territory.py @@ -0,0 +1,893 @@ +# Copyright 2022 DeepMind Technologies Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Common configuration for all territory_* substrates. + +See _Territory: Open_ for the general description of the mechanics at play in +this substrate. +""" + +from typing import Any, Mapping, Sequence + +from ml_collections import config_dict + +from meltingpot.python.utils.substrates import colors +from meltingpot.python.utils.substrates import game_object_utils +from meltingpot.python.utils.substrates import shapes +from meltingpot.python.utils.substrates import specs + +PrefabConfig = game_object_utils.PrefabConfig +_COMPASS = ["N", "E", "S", "W"] + +MARKING_SPRITE = """ +oxxxxxxo +xoxxxxox +xxoxxoxx +xxxooxxx +xxxooxxx +xxoxxoxx +xoxxxxox +oxxxxxxo +""" + + +def get_marking_palette(alpha: float) -> Mapping[str, Sequence[int]]: + alpha_uint8 = int(alpha * 255) + assert alpha_uint8 >= 0.0 and alpha_uint8 <= 255, "Color value out of range." + return {"x": shapes.ALPHA, "o": (0, 0, 0, alpha_uint8)} + +FLOOR = { + "name": "floor", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "floor", + "stateConfigs": [{ + "state": "floor", + "layer": "background", + "sprite": "Floor", + }], + } + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["Floor",], + "spriteShapes": [shapes.GRAINY_FLOOR], + "palettes": [{ + "*": (27, 22, 20, 255), + "+": (23, 17, 15, 255), + }], + "noRotates": [True] + } + }, + { + "component": "Transform", + }, + ] +} + +WALL = { + "name": "wall", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "wall", + "stateConfigs": [{ + "state": "wall", + "layer": "upperPhysical", + "sprite": "Wall", + }], + } + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["Wall",], + "spriteShapes": [shapes.FILL], + "palettes": [{"i": (61, 57, 55, 255)}], + "noRotates": [False] + } + }, + { + "component": "Transform", + }, + { + "component": "AllBeamBlocker", + "kwargs": {} + }, + ] +} + +WALL_HIGHLIGHT_NW = { + "name": "nw_highlight", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "nw_highlight", + "stateConfigs": [{ + "state": "nw_highlight", + "layer": "overlay", + "sprite": "NWHighlight", + }], + } + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["NWHighlight",], + "spriteShapes": [shapes.NW_HIGHLIGHT], + "palettes": [shapes.HIGHLIGHT_PALETTE], + "noRotates": [False] + } + }, + { + "component": "Transform", + }, + ] +} + +WALL_HIGHLIGHT_E_W = { + "name": "e_w_highlight", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "e_w_highlight", + "stateConfigs": [{ + "state": "e_w_highlight", + "layer": "overlay", + "sprite": "EWHighlight", + }], + } + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["EWHighlight",], + "spriteShapes": [shapes.E_W_HIGHLIGHT], + "palettes": [shapes.HIGHLIGHT_PALETTE], + "noRotates": [False] + } + }, + { + "component": "Transform", + }, + ] +} + +WALL_HIGHLIGHT_N_S = { + "name": "n_s_highlight", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "n_s_highlight", + "stateConfigs": [{ + "state": "n_s_highlight", + "layer": "overlay", + "sprite": "NSHighlight", + }], + } + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["NSHighlight",], + "spriteShapes": [shapes.N_S_HIGHLIGHT], + "palettes": [shapes.HIGHLIGHT_PALETTE], + "noRotates": [False] + } + }, + { + "component": "Transform", + }, + ] +} + +WALL_HIGHLIGHT_NE = { + "name": "ne_highlight", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "ne_highlight", + "stateConfigs": [{ + "state": "ne_highlight", + "layer": "overlay", + "sprite": "NEHighlight", + }], + } + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["NEHighlight",], + "spriteShapes": [shapes.NE_HIGHLIGHT], + "palettes": [shapes.HIGHLIGHT_PALETTE], + "noRotates": [False] + } + }, + { + "component": "Transform", + }, + ] +} + +WALL_HIGHLIGHT_SE = { + "name": "se_highlight", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "se_highlight", + "stateConfigs": [{ + "state": "se_highlight", + "layer": "overlay", + "sprite": "SEHighlight", + }], + } + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["SEHighlight",], + "spriteShapes": [shapes.SE_HIGHLIGHT], + "palettes": [shapes.HIGHLIGHT_PALETTE], + "noRotates": [False] + } + }, + { + "component": "Transform", + }, + ] +} + +WALL_HIGHLIGHT_SW = { + "name": "sw_highlight", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "sw_highlight", + "stateConfigs": [{ + "state": "sw_highlight", + "layer": "overlay", + "sprite": "SWHighlight", + }], + } + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["SWHighlight",], + "spriteShapes": [shapes.SW_HIGHLIGHT], + "palettes": [shapes.HIGHLIGHT_PALETTE], + "noRotates": [False] + } + }, + { + "component": "Transform", + }, + ] +} + +SPAWN_POINT = { + "name": "spawn_point", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "playerSpawnPoint", + "stateConfigs": [{ + "state": "playerSpawnPoint", + "layer": "logic", + "groups": ["spawnPoints"], + }], + } + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "invisible", + "spriteNames": [], + "spriteRGBColors": [] + } + }, + { + "component": "Transform", + }, + ] +} + + +def get_dry_painted_wall_palette(base_color: shapes.Color + ) -> Mapping[str, shapes.ColorRGBA]: + return { + "*": shapes.scale_color(base_color, 0.75, 200), + "#": shapes.scale_color(base_color, 0.90, 150), + } + + +def get_brush_palette( + base_color: shapes.Color) -> Mapping[str, shapes.ColorRGBA]: + return { + "*": base_color + (255,), + "&": shapes.scale_color(base_color, 0.75, 255), + "o": shapes.scale_color(base_color, 0.55, 255), + "O": (70, 70, 70, 255), + "-": (143, 96, 74, 255), + "+": (117, 79, 61, 255), + "k": (199, 176, 135, 255), + "x": shapes.ALPHA, + } + +PLAYER_COLOR_PALETTES = [] +BRUSH_PALETTES = [] +for human_readable_color in colors.human_readable: + PLAYER_COLOR_PALETTES.append(shapes.get_palette(human_readable_color)) + BRUSH_PALETTES.append(get_brush_palette(human_readable_color)) + + +def create_resource(num_players: int) -> PrefabConfig: + """Configure the prefab to use for all resource objects.""" + # Setup unique states corresponding to each player who can claim the resource. + claim_state_configs = [] + claim_sprite_names = [] + claim_sprite_rgb_colors = [] + for player_idx in range(num_players): + lua_player_idx = player_idx + 1 + player_color = colors.human_readable[player_idx] + wet_sprite_name = "Color" + str(lua_player_idx) + "ResourceSprite" + claim_state_configs.append({ + "state": "claimed_by_" + str(lua_player_idx), + "layer": "upperPhysical", + "sprite": wet_sprite_name, + "groups": ["claimedResources"] + }) + claim_sprite_names.append(wet_sprite_name) + # Use alpha channel to make transparent version of claiming agent's color. + wet_paint_color = player_color + (75,) + claim_sprite_rgb_colors.append(wet_paint_color) + + prefab = { + "name": "resource", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "unclaimed", + "stateConfigs": [ + {"state": "unclaimed", + "layer": "upperPhysical", + "sprite": "UnclaimedResourceSprite"}, + {"state": "destroyed"}, + ] + claim_state_configs, + } + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "colored_square", + "spriteNames": claim_sprite_names, + "spriteRGBColors": claim_sprite_rgb_colors + } + }, + { + "component": "Transform", + }, + { + "component": "Resource", + "kwargs": { + "initialHealth": 2, + "destroyedState": "destroyed", + "reward": 1.0, + "rewardRate": 0.01, + "rewardDelay": 25, + "delayTillSelfRepair": 15, + "selfRepairProbability": 0.1, + } + }, + ] + } + return prefab + + +def create_resource_texture() -> PrefabConfig: + """Configure the background texture for a resource. It looks like a wall.""" + prefab = { + "name": "resource_texture", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "unclaimed", + "stateConfigs": [ + {"state": "unclaimed", + "layer": "lowerPhysical", + "sprite": "UnclaimedResourceSprite"}, + {"state": "destroyed"}, + ], + } + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["UnclaimedResourceSprite",], + "spriteShapes": [shapes.WALL], + "palettes": [{"*": (61, 61, 61, 255), + "#": (80, 80, 80, 255)}], + "noRotates": [True] + } + }, + { + "component": "Transform", + }, + ] + } + return prefab + + +def create_reward_indicator(num_players) -> PrefabConfig: + """Configure object indicating if a resource is currently providing reward.""" + # Setup unique states corresponding to each player who can claim the resource. + claim_state_configs = [] + claim_sprite_names = [] + claim_sprite_shapes = [] + claim_palettes = [] + claim_no_rotates = [] + for player_idx in range(num_players): + lua_player_idx = player_idx + 1 + player_color = colors.human_readable[player_idx] + dry_sprite_name = "Color" + str(lua_player_idx) + "DryPaintSprite" + claim_state_configs.append({ + "state": "dry_claimed_by_" + str(lua_player_idx), + "layer": "overlay", + "sprite": dry_sprite_name, + }) + claim_sprite_names.append(dry_sprite_name) + claim_sprite_shapes.append(shapes.WALL) + claim_palettes.append(get_dry_painted_wall_palette(player_color)) + claim_no_rotates.append(True) + prefab = { + "name": "reward_indicator", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "inactive", + "stateConfigs": [ + {"state": "inactive"}, + ] + claim_state_configs, + } + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": claim_sprite_names, + "spriteShapes": claim_sprite_shapes, + "palettes": claim_palettes, + "noRotates": claim_no_rotates + } + }, + { + "component": "Transform", + }, + { + "component": "RewardIndicator", + }, + ] + } + return prefab + + +def create_damage_indicator() -> PrefabConfig: + """Configure the object indicating whether or not a resource is damaged.""" + damaged_resource_sprite = """ + ,,bb,,,, + ,,bb,bb, + ,,,b,,b, + ,,,b,,,, + ,,,b,,,b + ,,,bb,,b + ,,,bb,,b + b,,,b,,, + """ + damaged_resource_palette = {",": shapes.ALPHA, "b": shapes.BLACK} + prefab = { + "name": "damage_indicator", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "inactive", + "stateConfigs": [ + {"state": "inactive", + "layer": "superDirectionIndicatorLayer"}, + {"state": "damaged", + "layer": "superDirectionIndicatorLayer", + "sprite": "DamagedResource"}, + ], + } + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["DamagedResource"], + "spriteShapes": [damaged_resource_sprite], + "palettes": [damaged_resource_palette], + "noRotates": [True] + } + }, + { + "component": "Transform", + }, + ] + } + return prefab + + +def create_prefabs(num_players: int): + """Returns the prefabs dictionary.""" + prefabs = { + "spawn_point": SPAWN_POINT, + "floor": FLOOR, + "wall": WALL, + "wall_highlight_nw": WALL_HIGHLIGHT_NW, + "wall_highlight_e_w": WALL_HIGHLIGHT_E_W, + "wall_highlight_n_s": WALL_HIGHLIGHT_N_S, + "wall_highlight_ne": WALL_HIGHLIGHT_NE, + "wall_highlight_se": WALL_HIGHLIGHT_SE, + "wall_highlight_sw": WALL_HIGHLIGHT_SW, + "resource": create_resource(num_players=num_players), + "resource_texture": create_resource_texture(), + "reward_indicator": create_reward_indicator(num_players), + "damage_indicator": create_damage_indicator(), + } + return prefabs + + +# Primitive action components. +# pylint: disable=bad-whitespace +# pyformat: disable +NOOP = {"move": 0, "turn": 0, "fireZap": 0, "fireClaim": 0} +FORWARD = {"move": 1, "turn": 0, "fireZap": 0, "fireClaim": 0} +STEP_RIGHT = {"move": 2, "turn": 0, "fireZap": 0, "fireClaim": 0} +BACKWARD = {"move": 3, "turn": 0, "fireZap": 0, "fireClaim": 0} +STEP_LEFT = {"move": 4, "turn": 0, "fireZap": 0, "fireClaim": 0} +TURN_LEFT = {"move": 0, "turn": -1, "fireZap": 0, "fireClaim": 0} +TURN_RIGHT = {"move": 0, "turn": 1, "fireZap": 0, "fireClaim": 0} +FIRE_ZAP = {"move": 0, "turn": 0, "fireZap": 1, "fireClaim": 0} +FIRE_CLAIM = {"move": 0, "turn": 0, "fireZap": 0, "fireClaim": 1} +# pyformat: enable +# pylint: enable=bad-whitespace + +ACTION_SET = ( + NOOP, + FORWARD, + BACKWARD, + STEP_LEFT, + STEP_RIGHT, + TURN_LEFT, + TURN_RIGHT, + FIRE_ZAP, + FIRE_CLAIM +) + + +# The Scene object is a non-physical object, its components implement global +# logic. +def create_scene(): + """Creates the global scene.""" + scene = { + "name": "scene", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "scene", + "stateConfigs": [{ + "state": "scene", + }], + } + }, + { + "component": "Transform", + }, + { + "component": "StochasticIntervalEpisodeEnding", + "kwargs": { + "minimumFramesPerEpisode": 1000, + "intervalLength": 100, # Set equal to unroll length. + "probabilityTerminationPerInterval": 0.2 + } + } + ] + } + return scene + + +def create_avatar_object(player_idx: int) -> Mapping[str, Any]: + """Create an avatar object that always sees itself as blue.""" + # Lua is 1-indexed. + lua_index = player_idx + 1 + + color_palette = PLAYER_COLOR_PALETTES[player_idx] + paintbrush_palette = BRUSH_PALETTES[player_idx] + live_state_name = "player{}".format(lua_index) + avatar_sprite_name = "avatarSprite{}".format(lua_index) + avatar_object = { + "name": "avatar", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": live_state_name, + "stateConfigs": [ + # Initial player state. + {"state": live_state_name, + "layer": "upperPhysical", + "sprite": avatar_sprite_name, + "contact": "avatar", + "groups": ["players"]}, + + # Player wait state used when they have been zapped out. + {"state": "playerWait", + "groups": ["playerWaits"]}, + ] + } + }, + { + "component": "Transform", + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": [avatar_sprite_name], + "spriteShapes": [shapes.CUTE_AVATAR_HOLDING_PAINTBRUSH], + "palettes": [{**color_palette, **paintbrush_palette}], + "noRotates": [True] + } + }, + { + "component": "Avatar", + "kwargs": { + "index": lua_index, + "aliveState": live_state_name, + "waitState": "playerWait", + "spawnGroup": "spawnPoints", + "actionOrder": ["move", + "turn", + "fireZap", + "fireClaim"], + "actionSpec": { + "move": {"default": 0, "min": 0, "max": len(_COMPASS)}, + "turn": {"default": 0, "min": -1, "max": 1}, + "fireZap": {"default": 0, "min": 0, "max": 1}, + "fireClaim": {"default": 0, "min": 0, "max": 1}, + }, + "view": { + "left": 5, + "right": 5, + "forward": 9, + "backward": 1, + "centered": False + }, + } + }, + { + "component": "Paintbrush", + "kwargs": { + "shape": shapes.PAINTBRUSH, + "palette": paintbrush_palette, + "playerIndex": lua_index, + } + }, + { + "component": "Zapper", + "kwargs": { + "cooldownTime": 4, + "beamLength": 2, + "beamRadius": 1, + "framesTillRespawn": 1e6, # Effectively never respawn. + "penaltyForBeingZapped": 0, + "rewardForZapping": 0, + # GraduatedSanctionsMarking handles removal instead of Zapper. + "removeHitPlayer": False, + } + }, + { + "component": "ReadyToShootObservation", + }, + { + "component": "ResourceClaimer", + "kwargs": { + "color": color_palette["*"], + "playerIndex": lua_index, + "beamLength": 2, + "beamRadius": 0, + "beamWait": 0, + } + }, + { + "component": "LocationObserver", + "kwargs": { + "objectIsAvatar": True, + "alsoReportOrientation": True + } + }, + { + "component": "Taste", + "kwargs": { + "role": "none", + "rewardAmount": 1.0, + } + }, + ] + } + + return avatar_object + + +def create_marking_overlay(player_idx: int) -> Mapping[str, Any]: + """Create a graduated sanctions marking overlay object.""" + # Lua is 1-indexed. + lua_idx = player_idx + 1 + + marking_object = { + "name": "avatar_marking", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "avatarMarkingWait", + "stateConfigs": [ + # Declare one state per level of the hit logic. + {"state": "level_1", + "layer": "superOverlay", + "sprite": "sprite_for_level_1"}, + {"state": "level_2", + "layer": "superOverlay", + "sprite": "sprite_for_level_2"}, + + # Invisible inactive (zapped out) overlay type. + {"state": "avatarMarkingWait", + "groups": ["avatarMarkingWaits"]}, + ] + } + }, + { + "component": "Transform", + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": ["sprite_for_level_1", + "sprite_for_level_2"], + "spriteShapes": [MARKING_SPRITE, + MARKING_SPRITE], + "palettes": [get_marking_palette(0.0), + get_marking_palette(1.0)], + "noRotates": [True] * 3 + } + }, + { + "component": "GraduatedSanctionsMarking", + "kwargs": { + "playerIndex": lua_idx, + "waitState": "avatarMarkingWait", + "hitName": "zapHit", + "recoveryTime": 50, + "hitLogic": [ + {"levelIncrement": 1, + "sourceReward": 0, + "targetReward": 0, + "freeze": 25}, + {"levelIncrement": -1, + "sourceReward": 0, + "targetReward": 0, + "remove": True} + ], + } + }, + ] + } + return marking_object + + +def create_avatar_and_associated_objects(num_players): + """Returns list of avatars and their associated marking objects.""" + avatar_objects = [] + additional_objects = [] + for player_idx in range(0, num_players): + game_object = create_avatar_object(player_idx) + avatar_objects.append(game_object) + + marking_object = create_marking_overlay(player_idx) + additional_objects.append(marking_object) + + return avatar_objects + additional_objects + + +def get_config(): + """Default configuration.""" + config = config_dict.ConfigDict() + + # Action set configuration. + config.action_set = ACTION_SET + # Observation format configuration. + config.individual_observation_names = [ + "RGB", + "READY_TO_SHOOT", + "POSITION", + "ORIENTATION", + ] + config.global_observation_names = [ + "WORLD.RGB", + ] + + # The specs of the environment (from a single-agent perspective). + config.action_spec = specs.action(len(ACTION_SET)) + config.timestep_spec = specs.timestep({ + "RGB": specs.OBSERVATION["RGB"], + "READY_TO_SHOOT": specs.OBSERVATION["READY_TO_SHOOT"], + "POSITION": specs.OBSERVATION["POSITION"], + "ORIENTATION": specs.OBSERVATION["ORIENTATION"], + "WORLD.RGB": specs.rgb(120, 184), + }) + + config.valid_roles = frozenset({"default"}) + + return config + + +def build( + roles: Sequence[str], + config: config_dict.ConfigDict, +) -> Mapping[str, Any]: + """Build substrate definition given player roles.""" + num_players = len(roles) + # Build the rest of the substrate definition. + substrate_definition = dict( + levelName="territory", + levelDirectory="meltingpot/lua/levels", + numPlayers=num_players, + # Define upper bound of episode length since episodes end stochastically. + maxEpisodeLengthFrames=5000, + spriteSize=8, + topology=config.layout.topology, # Choose from ["BOUNDED", "TORUS"], + simulation={ + "map": config.layout.ascii_map, + "gameObjects": create_avatar_and_associated_objects(num_players), + "scene": create_scene(), + "prefabs": create_prefabs(num_players), + "charPrefabMap": config.layout.char_prefab_map, + }, + ) + return substrate_definition diff --git a/meltingpot/python/configs/substrates/territory__inside_out.py b/meltingpot/python/configs/substrates/territory__inside_out.py new file mode 100644 index 00000000..5ea4de3b --- /dev/null +++ b/meltingpot/python/configs/substrates/territory__inside_out.py @@ -0,0 +1,111 @@ +# Copyright 2022 DeepMind Technologies Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Configuration for Territory: Inside Out. + +Example video: https://youtu.be/LdbIjnHaisU + +See _Territory: Open_ for the general description of the mechanics at play in +this substrate. + +In this substrate, _Territory: Inside Out_, players start on the outside of +a randomly generated maze of resources. They must move from their starting +locations inward toward the center of the map to claim territory. In so doing +they will quickly encounter their coplayers who will be doing the same thing +from their own starting locations. In order to get high scores, agents must be +able to rapidly negotiate tacit agreements with one another concerning the +borders between their respective territories. Since the spatial arrangement of +the resources differs from episode to episode, so too does the negotiation +problem to be solved. +""" + +from ml_collections import config_dict + +from meltingpot.python.configs.substrates import territory as base_config +from meltingpot.python.utils.substrates import map_helpers +from meltingpot.python.utils.substrates import specs + +build = base_config.build + +ASCII_MAP = """ +F=====================T +|,,,,,,,,,,P,,,,,,,,,,| +|,P,,,,QQ,,,,,QQ,,,,P,| +|,,RRR,,,,RRR,,,,RRR,,| +|,,R,RAAAAR,RAAAAR,R,,| +|,,RRR,BB,RRR,BB,RRR,,| +|,,,A,,BB,,A,,BB,,A,,,| +|,Q,ABBRRBBABBRRBBA,Q,| +|,Q,ABBRRBBABBRRBBA,Q,| +|,,,A,,BB,,A,,BB,,A,,,| +|,,RRR,BB,RRR,BB,RRR,,| +|P,R,RAAAAR,RAAAAR,R,P| +|,,RRR,BB,RRR,BB,RRR,,| +|,,,A,,BB,,A,,BB,,A,,,| +|,Q,ABBRRBBABBRRBBA,Q,| +|,Q,ABBRRBBABBRRBBA,Q,| +|,,,A,,BB,,A,,BB,,A,,,| +|,,RRR,BB,RRR,BB,RRR,,| +|,,R,RAAAAR,RAAAAR,R,,| +|,,RRR,,,,RRR,,,,RRR,,| +|,P,,,,QQ,,,,,QQ,,,,P,| +|,,,,,,,,,,P,,,,,,,,,,| +L=====================J +""" + +# `prefab` determines which prefab game object to use for each `char` in the +# ascii map. +resource_associated_prefabs = ["floor", "resource_texture", "resource", + "reward_indicator", "damage_indicator"] +resource = {"type": "all", "list": resource_associated_prefabs} +spawn_point_associated_prefabs = ["floor", "spawn_point"] +spawn_point = {"type": "all", "list": spawn_point_associated_prefabs} +CHAR_PREFAB_MAP = { + "P": spawn_point, + "Q": map_helpers.a_or_b_with_odds(spawn_point, "floor", odds=(1, 6)), + ",": "floor", + "F": {"type": "all", "list": ["wall", "wall_highlight_nw"]}, + "|": {"type": "all", "list": ["wall", "wall_highlight_e_w"]}, + "=": {"type": "all", "list": ["wall", "wall_highlight_n_s"]}, + "T": {"type": "all", "list": ["wall", "wall_highlight_ne"]}, + "J": {"type": "all", "list": ["wall", "wall_highlight_se"]}, + "L": {"type": "all", "list": ["wall", "wall_highlight_sw"]}, + "R": resource, + "A": map_helpers.a_or_b_with_odds(resource, "floor", odds=(2, 1)), + "B": map_helpers.a_or_b_with_odds(resource, "floor", odds=(1, 3)), +} + + +def get_config(): + """Default configuration.""" + config = base_config.get_config() + + # Override the map layout settings. + config.layout = config_dict.ConfigDict() + config.layout.ascii_map = ASCII_MAP + config.layout.char_prefab_map = CHAR_PREFAB_MAP + config.layout.topology = "BOUNDED" + + # The specs of the environment (from a single-agent perspective). + config.timestep_spec = specs.timestep({ + "RGB": specs.OBSERVATION["RGB"], + "READY_TO_SHOOT": specs.OBSERVATION["READY_TO_SHOOT"], + "POSITION": specs.OBSERVATION["POSITION"], + "ORIENTATION": specs.OBSERVATION["ORIENTATION"], + "WORLD.RGB": specs.rgb(184, 184), + }) + + # The roles assigned to each player. + config.default_player_roles = ("default",) * 5 + + return config diff --git a/meltingpot/python/configs/substrates/territory__open.py b/meltingpot/python/configs/substrates/territory__open.py new file mode 100644 index 00000000..dc875a21 --- /dev/null +++ b/meltingpot/python/configs/substrates/territory__open.py @@ -0,0 +1,121 @@ +# Copyright 2022 DeepMind Technologies Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Configuration for Territory: Open. + +Example video: https://youtu.be/F1OO6LFIZHI + +Players can claim a resource in two ways: (1) by touching it, and (2) by using a +"claiming beam", different from the zapping beam, which they also have. +Claimed resources are colored in the unique color of the player that claimed +them. Unclaimed resources are gray. Players cannot walk through resources, they +are like walls. + +Once a resource has been claimed a countdown begins. After 100 timesteps, the +claimed resource becomes active. This is visualized by a white and gray plus +sign appearing on top. Active resources provide reward stochastically to the +player that claimed them at a rate of 0.01 per timestep. Thus the more resources +a player claims and can hold until they become active, the more reward they +obtain. + +The claiming beam is of length 2. It can color through a resource to +simultaneously color a second resource on the other side. If two players stand +on opposite sides of a wall of resources of width 2 and one player claims all +the way across to the other side (closer to the other player than themselves) +then the player on the other side might reasonably perceive that as a somewhat +aggressive action. Less aggressive of course than the other option both players +have: using their zapping beam. If any resource is zapped twice then it gets +permanently destroyed. It no longer functions as a wall or a resource, allowing +players to pass through. + +Like resources, when players are hit by a zapping beam they also get removed +from the game and never regenerate. Once a player has been zapped out it is +gone. All resources it claimed are immediately returned to the unclaimed state. +""" + +from ml_collections import config_dict + +from meltingpot.python.configs.substrates import territory as base_config +from meltingpot.python.utils.substrates import specs + +build = base_config.build + +ASCII_MAP = """ +F=====================================T +|,,RRRRR,,RR,,RR,,,,,,RR,,,,,,RR,,,,,,| +|,,,,,RR,,,,,,RR,,,,,,RR,,,,,,,,,,,,,,| +|,,,,,RR,,,,,,RR,,,,,,,,,,,,,,,,,,,,,,| +|,RR,,RR,,,,,,RR,,,,,,,,,,R,,,RR,,,RR,| +|,,,,,RR,,,,,,RR,,,,,,,,,,R,,,RR,,,,,,| +|,,,,,RR,,,,,,,,,,RRRR,,,,R,,,,,,,,,,,| +|,,RR,RR,,,,,,,,,,,,,,,,,,R,,,,,,,,,,,| +|,,,,,RR,,,,,,,RR,,,,,,,,,R,,,,,,,,,,,| +|,,,,,RRRR,,,,,,,,,,,,,,,,,,,,,RR,,,,,| +|,,,,,,,,,,,,,,,,,,,,RR,,,,,,,,,,,,,,,| +|,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,| +|,,RRRR,,,RRRRRR,,,,,,,,,,,RR,,,,R,,,,| +|,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,R,,,,| +|,,,,,,,,,,,,,,,,RR,,,,,,,,,,,,,,,,P,,| +|,,,,RR,,,,,,,,,,,,,,,,RR,,,,,,,P,,,,,| +|,,,,,,,,,RR,,,,,,,,,,,,,,,,,,,,,P,,P,| +|,,,,,,,,,,,,,,,,,,,,,,,,,,,P,,P,,,,,,| +|,,,,,,,,,,,,,,,,,,,,,,,,,,,,,P,,,P,,,| +|,,P,,,,P,,,P,,P,,,P,,,,P,P,,P,,P,,P,,| +|,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,| +|,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,| +L=====================================J +""" + +# `prefab` determines which prefab game object to use for each `char` in the +# ascii map. +resource_associated_prefabs = ["floor", "resource_texture", "resource", + "reward_indicator", "damage_indicator"] +resource = {"type": "all", "list": resource_associated_prefabs} +spawn_point_associated_prefabs = ["floor", "spawn_point"] +spawn_point = {"type": "all", "list": spawn_point_associated_prefabs} +CHAR_PREFAB_MAP = { + "P": spawn_point, + ",": "floor", + "F": {"type": "all", "list": ["wall", "wall_highlight_nw"]}, + "|": {"type": "all", "list": ["wall", "wall_highlight_e_w"]}, + "=": {"type": "all", "list": ["wall", "wall_highlight_n_s"]}, + "T": {"type": "all", "list": ["wall", "wall_highlight_ne"]}, + "J": {"type": "all", "list": ["wall", "wall_highlight_se"]}, + "L": {"type": "all", "list": ["wall", "wall_highlight_sw"]}, + "R": resource, +} + + +def get_config(): + """Default configuration.""" + config = base_config.get_config() + + # Override the map layout settings. + config.layout = config_dict.ConfigDict() + config.layout.ascii_map = ASCII_MAP + config.layout.char_prefab_map = CHAR_PREFAB_MAP + config.layout.topology = "BOUNDED" + + # The specs of the environment (from a single-agent perspective). + config.timestep_spec = specs.timestep({ + "RGB": specs.OBSERVATION["RGB"], + "READY_TO_SHOOT": specs.OBSERVATION["READY_TO_SHOOT"], + "POSITION": specs.OBSERVATION["POSITION"], + "ORIENTATION": specs.OBSERVATION["ORIENTATION"], + "WORLD.RGB": specs.rgb(184, 312), + }) + + # The roles assigned to each player. + config.default_player_roles = ("default",) * 9 + + return config diff --git a/meltingpot/python/configs/substrates/territory__rooms.py b/meltingpot/python/configs/substrates/territory__rooms.py new file mode 100644 index 00000000..e3c800c2 --- /dev/null +++ b/meltingpot/python/configs/substrates/territory__rooms.py @@ -0,0 +1,106 @@ +# Copyright 2022 DeepMind Technologies Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Configuration for Territory: Rooms. + +Example video: https://youtu.be/4URkGR9iv9k + +See _Territory: Open_ for the general description of the mechanics at play in +this substrate. + +In this substrate, _Territory: Rooms_, individuals start in segregated rooms +that strongly suggest a partition individuals could adhere to. They can break +down the walls of these regions and invade each other's "natural territory", but +the destroyed resources are lost forever. A peaceful partition is possible at +the start of the episode, and the policy to achieve it is easy to implement. But +if any agent gets too greedy and invades, it buys itself a chance of large +rewards, but also chances inflicting significant chaos and deadweight loss on +everyone if its actions spark wider conflict. The reason it can spiral out of +control is that once an agent's neighbor has left their natural territory then +it becomes rational to invade the space, leaving one's own territory undefended, +creating more opportunity for mischief by others. +""" + +from ml_collections import config_dict + +from meltingpot.python.configs.substrates import territory as base_config +from meltingpot.python.utils.substrates import specs + +build = base_config.build + +ASCII_MAP = """ +JRRRRRLJRRRRRLJRRRRRL +R,,,,,RR,,,,,RR,,,,,R +R,,,,,RR,,,,,RR,,,,,R +R,,P,,RR,,P,,RR,,P,,R +R,,,,,RR,,,,,RR,,,,,R +R,,,,,RR,,,,,RR,,,,,R +TRRRRRFTRRRRRFTRRRRRF +JRRRRRLJRRRRRLJRRRRRL +R,,,,,RR,,,,,RR,,,,,R +R,,,,,RR,,,,,RR,,,,,R +R,,P,,RR,,P,,RR,,P,,R +R,,,,,RR,,,,,RR,,,,,R +R,,,,,RR,,,,,RR,,,,,R +TRRRRRFTRRRRRFTRRRRRF +JRRRRRLJRRRRRLJRRRRRL +R,,,,,RR,,,,,RR,,,,,R +R,,,,,RR,,,,,RR,,,,,R +R,,P,,RR,,P,,RR,,P,,R +R,,,,,RR,,,,,RR,,,,,R +R,,,,,RR,,,,,RR,,,,,R +TRRRRRFTRRRRRFTRRRRRF +""" + +# `prefab` determines which prefab game object to use for each `char` in the +# ascii map. +resource_associated_prefabs = ["floor", "resource_texture", "resource", + "reward_indicator", "damage_indicator"] +resource = {"type": "all", "list": resource_associated_prefabs} +spawn_point_associated_prefabs = ["floor", "spawn_point"] +spawn_point = {"type": "all", "list": spawn_point_associated_prefabs} +CHAR_PREFAB_MAP = { + "P": spawn_point, + ",": "floor", + "W": "wall", + "F": {"type": "all", "list": ["wall", "wall_highlight_nw"]}, + "T": {"type": "all", "list": ["wall", "wall_highlight_ne"]}, + "J": {"type": "all", "list": ["wall", "wall_highlight_se"]}, + "L": {"type": "all", "list": ["wall", "wall_highlight_sw"]}, + "R": resource, +} + + +def get_config(): + """Default configuration.""" + config = base_config.get_config() + + # Override the map layout settings. + config.layout = config_dict.ConfigDict() + config.layout.ascii_map = ASCII_MAP + config.layout.char_prefab_map = CHAR_PREFAB_MAP + config.layout.topology = "TORUS" + + # The specs of the environment (from a single-agent perspective). + config.timestep_spec = specs.timestep({ + "RGB": specs.OBSERVATION["RGB"], + "READY_TO_SHOOT": specs.OBSERVATION["READY_TO_SHOOT"], + "POSITION": specs.OBSERVATION["POSITION"], + "ORIENTATION": specs.OBSERVATION["ORIENTATION"], + "WORLD.RGB": specs.rgb(168, 168), + }) + + # The roles assigned to each player. + config.default_player_roles = ("default",) * 9 + + return config diff --git a/meltingpot/python/configs/substrates/territory_open.py b/meltingpot/python/configs/substrates/territory_open.py deleted file mode 100644 index fbad5d26..00000000 --- a/meltingpot/python/configs/substrates/territory_open.py +++ /dev/null @@ -1,537 +0,0 @@ -# Copyright 2020 DeepMind Technologies Limited. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Configuration for Territory: Open. - -Example video: https://youtu.be/3hB8lABa6nI - -Players can claim a resource in two ways: (1) by touching it, and (2) by using a -"claiming beam", different from the zapping beam, which they also have. -Claimed resources are colored in the unique color of the player that claimed -them. Unclaimed resources are gray. Players cannot walk through resources, they -are like walls. - -Once a resource has been claimed a countdown begins. After 100 timesteps, the -claimed resource becomes active. This is visualized by a white and gray plus -sign appearing on top. Active resources provide reward stochastically to the -player that claimed them at a rate of 0.01 per timestep. Thus the more resources -a player claims and can hold until they become active, the more reward they -obtain. - -The claiming beam is of length 2. It can color through a resource to -simultaneously color a second resource on the other side. If two players stand -on opposite sides of a wall of resources of width 2 and one player claims all -the way across to the other side (closer to the other player than themselves) -then the player on the other side might reasonably perceive that as a somewhat -aggressive action. Less aggressive of course than the other option both players -have: using their zapping beam. If any resource is zapped twice then it gets -permanently destroyed. It no longer functions as a wall or a resource, allowing -players to pass through. - -Like resources, when players are hit by a zapping beam they also get removed -from the game and never regenerate. Once a player has been zapped out it is -gone. All resources it claimed are immediately returned to the unclaimed state. -""" - -from typing import Any, Dict - -from ml_collections import config_dict -from meltingpot.python.utils.substrates import colors -from meltingpot.python.utils.substrates import game_object_utils -from meltingpot.python.utils.substrates import shapes -from meltingpot.python.utils.substrates import specs - -_COMPASS = ["N", "E", "S", "W"] - -# This number just needs to be greater than the number of players. -MAX_ALLOWED_NUM_PLAYERS = 10 - -DEFAULT_ASCII_MAP = """ -WWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWW -W RRRRR RR RR RR RR W -W RR RR RR W -W RR RR W -W RR RR RR R RR RR W -W RR RR R RR W -W RR RRRR R W -W RR RR R W -W RR RR R W -W RRRR RR W -W RR W -W W -W RRRR RRRRRR RR R W -W R W -W RR P W -W RR RR P W -W RR P P W -W P P W -W P P W -W P P P P P P P P P P W -W W -W W -WWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWW -""" - -# `prefab` determines which prefab game object to use for each `char` in the -# ascii map. -CHAR_PREFAB_MAP = { - "P": "spawn_point", - "W": "wall", - "R": {"type": "all", "list": ["resource", "reward_indicator"]}, -} - -WALL = { - "name": "wall", - "components": [ - { - "component": "StateManager", - "kwargs": { - "initialState": "wall", - "stateConfigs": [{ - "state": "wall", - "layer": "upperPhysical", - "sprite": "Wall", - }], - } - }, - { - "component": "Appearance", - "kwargs": { - "renderMode": "ascii_shape", - "spriteNames": ["Wall",], - "spriteShapes": [shapes.WALL], - "palettes": [{"*": (95, 95, 95, 255), - "&": (100, 100, 100, 255), - "@": (109, 109, 109, 255), - "#": (152, 152, 152, 255)}], - "noRotates": [True] - } - }, - { - "component": "Transform", - "kwargs": { - "position": (0, 0), - "orientation": "N" - } - }, - { - "component": "AllBeamBlocker", - "kwargs": {} - }, - ] -} - -SPAWN_POINT = { - "name": "spawn_point", - "components": [ - { - "component": "StateManager", - "kwargs": { - "initialState": "playerSpawnPoint", - "stateConfigs": [{ - "state": "playerSpawnPoint", - "layer": "logic", - "groups": ["spawnPoints"], - }], - } - }, - { - "component": "Appearance", - "kwargs": { - "renderMode": "invisible", - "spriteNames": [], - "spriteRGBColors": [] - } - }, - { - "component": "Transform", - "kwargs": { - "position": (0, 0), - "orientation": "N" - } - }, - ] -} - -RESOURCE = { - "name": "resource", - "components": [ - { - "component": "StateManager", - "kwargs": { - "initialState": "unclaimed", - "stateConfigs": [ - {"state": "unclaimed", - "layer": "upperPhysical", - "sprite": "UnclaimedResourceSprite", - "groups": ["unclaimedResources"]}, - {"state": "destroyed"}, - ], - } - }, - { - "component": "Appearance", - "kwargs": { - "spriteNames": ["UnclaimedResourceSprite"], - # This color is grey. - "spriteRGBColors": [(64, 64, 64, 255)] - } - }, - { - "component": "Transform", - "kwargs": { - "position": (0, 0), - "orientation": "N" - } - }, - { - "component": "Resource", - "kwargs": { - "initialHealth": 2, - "destroyedState": "destroyed", - "reward": 1.0, - "rewardRate": 0.01, - "rewardDelay": 100 - } - }, - ] -} - -REWARD_INDICATOR = { - "name": "reward_indicator", - "components": [ - { - "component": "StateManager", - "kwargs": { - "initialState": "inactive", - "stateConfigs": [ - {"state": "active", - "layer": "overlay", - "sprite": "ActivelyRewardingResource"}, - {"state": "inactive"}, - ], - } - }, - { - "component": "Appearance", - "kwargs": { - "spriteNames": ["ActivelyRewardingResource",], - "renderMode": "ascii_shape", - "spriteShapes": [shapes.PLUS_IN_BOX], - "palettes": [{"*": (86, 86, 86, 65), - "#": (202, 202, 202, 105), - "@": (128, 128, 128, 135), - "x": (0, 0, 0, 0)}], - "noRotates": [True] - } - }, - { - "component": "Transform", - "kwargs": { - "position": (0, 0), - "orientation": "N" - } - }, - { - "component": "RewardIndicator", - "kwargs": { - } - }, - ] -} - -# PLAYER_COLOR_PALETTES is a list with each entry specifying the color to use -# for the player at the corresponding index. -PLAYER_COLOR_PALETTES = [] -for i in range(MAX_ALLOWED_NUM_PLAYERS): - PLAYER_COLOR_PALETTES.append(shapes.get_palette(colors.palette[i])) - -# Set up player-specific settings for resources. -for j, color in enumerate(colors.palette[:MAX_ALLOWED_NUM_PLAYERS]): - sprite_name = "Color" + str(j + 1) + "ResourceSprite" - game_object_utils.get_first_named_component( - RESOURCE, - "StateManager")["kwargs"]["stateConfigs"].append({ - "state": "claimed_by_" + str(j + 1), - "layer": "upperPhysical", - "sprite": sprite_name, - "groups": ["claimedResources"] - }) - game_object_utils.get_first_named_component( - RESOURCE, - "Appearance")["kwargs"]["spriteNames"].append(sprite_name) - game_object_utils.get_first_named_component( - RESOURCE, - "Appearance")["kwargs"]["spriteRGBColors"].append(color) - -# PREFABS is a dictionary mapping names to template game objects that can -# be cloned and placed in multiple locations accoring to an ascii map. -PREFABS = { - "wall": WALL, - "spawn_point": SPAWN_POINT, - "resource": RESOURCE, - "reward_indicator": REWARD_INDICATOR, -} - -# Primitive action components. -# pylint: disable=bad-whitespace -# pyformat: disable -NOOP = {"move": 0, "turn": 0, "fireZap": 0, "fireClaim": 0} -FORWARD = {"move": 1, "turn": 0, "fireZap": 0, "fireClaim": 0} -STEP_RIGHT = {"move": 2, "turn": 0, "fireZap": 0, "fireClaim": 0} -BACKWARD = {"move": 3, "turn": 0, "fireZap": 0, "fireClaim": 0} -STEP_LEFT = {"move": 4, "turn": 0, "fireZap": 0, "fireClaim": 0} -TURN_LEFT = {"move": 0, "turn": -1, "fireZap": 0, "fireClaim": 0} -TURN_RIGHT = {"move": 0, "turn": 1, "fireZap": 0, "fireClaim": 0} -FIRE_ZAP = {"move": 0, "turn": 0, "fireZap": 1, "fireClaim": 0} -FIRE_CLAIM = {"move": 0, "turn": 0, "fireZap": 0, "fireClaim": 1} -# pyformat: enable -# pylint: enable=bad-whitespace - -ACTION_SET = ( - NOOP, - FORWARD, - BACKWARD, - STEP_LEFT, - STEP_RIGHT, - TURN_LEFT, - TURN_RIGHT, - FIRE_ZAP, - FIRE_CLAIM -) - - -# The Scene object is a non-physical object, its components implement global -# logic. -def create_scene(): - """Creates the global scene.""" - scene = { - "name": "scene", - "components": [ - { - "component": "StateManager", - "kwargs": { - "initialState": "scene", - "stateConfigs": [{ - "state": "scene", - }], - } - }, - { - "component": "Transform", - "kwargs": { - "position": (0, 0), - "orientation": "N" - }, - }, - { - "component": "StochasticIntervalEpisodeEnding", - "kwargs": { - "minimumFramesPerEpisode": 1000, - "intervalLength": 100, # Set equal to unroll length. - "probabilityTerminationPerInterval": 0.2 - } - } - ] - } - return scene - - -def create_avatar_object(player_idx: int) -> Dict[str, Any]: - """Create an avatar object that always sees itself as blue.""" - # Lua is 1-indexed. - lua_index = player_idx + 1 - - color_palette = PLAYER_COLOR_PALETTES[player_idx] - live_state_name = "player{}".format(lua_index) - avatar_sprite_name = "avatarSprite{}".format(lua_index) - avatar_object = { - "name": "avatar", - "components": [ - { - "component": "StateManager", - "kwargs": { - "initialState": live_state_name, - "stateConfigs": [ - # Initial player state. - {"state": live_state_name, - "layer": "upperPhysical", - "sprite": avatar_sprite_name, - "contact": "avatar", - "groups": ["players"]}, - - # Player wait state used when they have been zapped out. - {"state": "playerWait", - "groups": ["playerWaits"]}, - ] - } - }, - { - "component": "Transform", - "kwargs": { - "position": (0, 0), - "orientation": "N" - } - }, - { - "component": "Appearance", - "kwargs": { - "renderMode": "ascii_shape", - "spriteNames": [avatar_sprite_name], - "spriteShapes": [shapes.CUTE_AVATAR], - "palettes": [color_palette], - "noRotates": [True] - } - }, - { - "component": "Avatar", - "kwargs": { - "index": lua_index, - "aliveState": live_state_name, - "waitState": "playerWait", - "spawnGroup": "spawnPoints", - "actionOrder": ["move", - "turn", - "fireZap", - "fireClaim"], - "actionSpec": { - "move": {"default": 0, "min": 0, "max": len(_COMPASS)}, - "turn": {"default": 0, "min": -1, "max": 1}, - "fireZap": {"default": 0, "min": 0, "max": 1}, - "fireClaim": {"default": 0, "min": 0, "max": 1}, - }, - "view": { - "left": 5, - "right": 5, - "forward": 9, - "backward": 1, - "centered": False - }, - } - }, - { - "component": "AvatarDirectionIndicator", - # We do not normally use direction indicators for the MAGI suite, - # but we do use them for territory because they function to claim - # any resources they contact. - "kwargs": {"color": (202, 202, 202, 50)} - }, - { - "component": "Zapper", - "kwargs": { - "cooldownTime": 2, - "beamLength": 3, - "beamRadius": 1, - "framesTillRespawn": 1e6, # Effectively never respawn. - "penaltyForBeingZapped": 0, - "rewardForZapping": 0, - } - }, - { - "component": "ReadyToShootObservation", - }, - { - "component": "ResourceClaimer", - "kwargs": { - "color": color_palette["*"], - "playerIndex": lua_index, - "beamLength": 2, - "beamRadius": 0, - "beamWait": 0, - } - }, - { - "component": "LocationObserver", - "kwargs": { - "objectIsAvatar": True, - "alsoReportOrientation": True - } - }, - { - "component": "Taste", - "kwargs": { - "role": "none", - "rewardAmount": 1.0, - "firstClaimRewardMultiplier": 10.0, - } - }, - ] - } - - return avatar_object - - -def create_avatar_objects(num_players): - """Returns list of avatar objects of length 'num_players'.""" - avatar_objects = [] - for player_idx in range(0, num_players): - game_object = create_avatar_object(player_idx) - avatar_objects.append(game_object) - - return avatar_objects - - -def create_lab2d_settings(num_players: int) -> Dict[str, Any]: - """Returns the lab2d settings.""" - lab2d_settings = { - "levelName": "territory", - "levelDirectory": - "meltingpot/lua/levels", - "numPlayers": num_players, - # Define upper bound of episode length since episodes end stochastically. - "maxEpisodeLengthFrames": 2000, - "spriteSize": 8, - "topology": "BOUNDED", # Choose from ["BOUNDED", "TORUS"], - "simulation": { - "map": DEFAULT_ASCII_MAP, - "gameObjects": create_avatar_objects(num_players), - "scene": create_scene(), - "prefabs": PREFABS, - "charPrefabMap": CHAR_PREFAB_MAP, - }, - } - return lab2d_settings - - -def get_config(factory=create_lab2d_settings): - """Default configuration for training on the territory level.""" - config = config_dict.ConfigDict() - - # Lua script configuration. - config.num_players = 9 - config.lab2d_settings = factory(config.num_players) - - # Action set configuration. - config.action_set = ACTION_SET - # Observation format configuration. - config.individual_observation_names = [ - "RGB", - "READY_TO_SHOOT", - "POSITION", - "ORIENTATION", - ] - config.global_observation_names = [ - "WORLD.RGB", - ] - - # The specs of the environment (from a single-agent perspective). - config.action_spec = specs.action(len(ACTION_SET)) - config.timestep_spec = specs.timestep({ - "RGB": specs.OBSERVATION["RGB"], - "READY_TO_SHOOT": specs.OBSERVATION["READY_TO_SHOOT"], - "POSITION": specs.OBSERVATION["POSITION"], - "ORIENTATION": specs.OBSERVATION["ORIENTATION"], - "WORLD.RGB": specs.rgb(184, 312), - }) - - return config diff --git a/meltingpot/python/configs/substrates/territory_rooms.py b/meltingpot/python/configs/substrates/territory_rooms.py deleted file mode 100644 index 73b6bd8f..00000000 --- a/meltingpot/python/configs/substrates/territory_rooms.py +++ /dev/null @@ -1,524 +0,0 @@ -# Copyright 2020 DeepMind Technologies Limited. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Configuration for Territory: Rooms. - -Example video: https://youtu.be/u0YOiShqzA4 - -See _Territory: Open_ for the general description of the mechanics at play in -this substrate. - -In this substrate, _Territory: Rooms_, individuals start in segregated rooms -that strongly suggest a partition individuals could adhere to. They can break -down the walls of these regions and invade each other's "natural territory", but -the destroyed resources are lost forever. A peaceful partition is possible at -the start of the episode, and the policy to achieve it is easy to implement. But -if any agent gets too greedy and invades, it buys itself a chance of large -rewards, but also chances inflicting significant chaos and deadweight loss on -everyone if its actions spark wider conflict. The reason it can spiral out of -control is that once an agent's neighbor has left their natural territory then -it becomes rational to invade the space, leaving one's own territory undefended, -creating more opportunity for mischief by others. -""" - -from typing import Any, Dict - -from ml_collections import config_dict -from meltingpot.python.utils.substrates import colors -from meltingpot.python.utils.substrates import game_object_utils -from meltingpot.python.utils.substrates import shapes -from meltingpot.python.utils.substrates import specs - -_COMPASS = ["N", "E", "S", "W"] - -# This number just needs to be greater than the number of players. -MAX_ALLOWED_NUM_PLAYERS = 10 - -DEFAULT_ASCII_MAP = """ -WRRRRRWWRRRRRWWRRRRRW -R RR RR R -R RR RR R -R P RR P RR P R -R RR RR R -R RR RR R -WRRRRRWWRRRRRWWRRRRRW -WRRRRRWWRRRRRWWRRRRRW -R RR RR R -R RR RR R -R P RR P RR P R -R RR RR R -R RR RR R -WRRRRRWWRRRRRWWRRRRRW -WRRRRRWWRRRRRWWRRRRRW -R RR RR R -R RR RR R -R P RR P RR P R -R RR RR R -R RR RR R -WRRRRRWWRRRRRWWRRRRRW -""" - -# `prefab` determines which prefab game object to use for each `char` in the -# ascii map. -CHAR_PREFAB_MAP = { - "P": "spawn_point", - "W": "wall", - "R": {"type": "all", "list": ["resource", "reward_indicator"]}, -} - -WALL = { - "name": "wall", - "components": [ - { - "component": "StateManager", - "kwargs": { - "initialState": "wall", - "stateConfigs": [{ - "state": "wall", - "layer": "upperPhysical", - "sprite": "Wall", - }], - } - }, - { - "component": "Appearance", - "kwargs": { - "renderMode": "ascii_shape", - "spriteNames": ["Wall",], - "spriteShapes": [shapes.WALL], - "palettes": [{"*": (95, 95, 95, 255), - "&": (100, 100, 100, 255), - "@": (109, 109, 109, 255), - "#": (152, 152, 152, 255)}], - "noRotates": [True] - } - }, - { - "component": "Transform", - "kwargs": { - "position": (0, 0), - "orientation": "N" - } - }, - { - "component": "AllBeamBlocker", - "kwargs": {} - }, - ] -} - -SPAWN_POINT = { - "name": "spawn_point", - "components": [ - { - "component": "StateManager", - "kwargs": { - "initialState": "playerSpawnPoint", - "stateConfigs": [{ - "state": "playerSpawnPoint", - "layer": "logic", - "groups": ["spawnPoints"], - }], - } - }, - { - "component": "Appearance", - "kwargs": { - "renderMode": "invisible", - "spriteNames": [], - "spriteRGBColors": [] - } - }, - { - "component": "Transform", - "kwargs": { - "position": (0, 0), - "orientation": "N" - } - }, - ] -} - -RESOURCE = { - "name": "resource", - "components": [ - { - "component": "StateManager", - "kwargs": { - "initialState": "unclaimed", - "stateConfigs": [ - {"state": "unclaimed", - "layer": "upperPhysical", - "sprite": "UnclaimedResourceSprite", - "groups": ["unclaimedResources"]}, - {"state": "destroyed"}, - ], - } - }, - { - "component": "Appearance", - "kwargs": { - "spriteNames": ["UnclaimedResourceSprite"], - # This color is grey. - "spriteRGBColors": [(64, 64, 64, 255)] - } - }, - { - "component": "Transform", - "kwargs": { - "position": (0, 0), - "orientation": "N" - } - }, - { - "component": "Resource", - "kwargs": { - "initialHealth": 2, - "destroyedState": "destroyed", - "reward": 1.0, - "rewardRate": 0.01, - "rewardDelay": 100 - } - }, - ] -} - -REWARD_INDICATOR = { - "name": "reward_indicator", - "components": [ - { - "component": "StateManager", - "kwargs": { - "initialState": "inactive", - "stateConfigs": [ - {"state": "active", - "layer": "overlay", - "sprite": "ActivelyRewardingResource"}, - {"state": "inactive"}, - ], - } - }, - { - "component": "Appearance", - "kwargs": { - "spriteNames": ["ActivelyRewardingResource",], - "renderMode": "ascii_shape", - "spriteShapes": [shapes.PLUS_IN_BOX], - "palettes": [{"*": (86, 86, 86, 65), - "#": (202, 202, 202, 105), - "@": (128, 128, 128, 135), - "x": (0, 0, 0, 0)}], - "noRotates": [True] - } - }, - { - "component": "Transform", - "kwargs": { - "position": (0, 0), - "orientation": "N" - } - }, - { - "component": "RewardIndicator", - "kwargs": { - } - }, - ] -} - -# PLAYER_COLOR_PALETTES is a list with each entry specifying the color to use -# for the player at the corresponding index. -PLAYER_COLOR_PALETTES = [] -for i in range(MAX_ALLOWED_NUM_PLAYERS): - PLAYER_COLOR_PALETTES.append(shapes.get_palette(colors.palette[i])) - -# Set up player-specific settings for resources. -for j, color in enumerate(colors.palette[:MAX_ALLOWED_NUM_PLAYERS]): - sprite_name = "Color" + str(j + 1) + "ResourceSprite" - game_object_utils.get_first_named_component( - RESOURCE, - "StateManager")["kwargs"]["stateConfigs"].append({ - "state": "claimed_by_" + str(j + 1), - "layer": "upperPhysical", - "sprite": sprite_name, - "groups": ["claimedResources"] - }) - game_object_utils.get_first_named_component( - RESOURCE, - "Appearance")["kwargs"]["spriteNames"].append(sprite_name) - game_object_utils.get_first_named_component( - RESOURCE, - "Appearance")["kwargs"]["spriteRGBColors"].append(color) - -# PREFABS is a dictionary mapping names to template game objects that can -# be cloned and placed in multiple locations accoring to an ascii map. -PREFABS = { - "wall": WALL, - "spawn_point": SPAWN_POINT, - "resource": RESOURCE, - "reward_indicator": REWARD_INDICATOR, -} - -# Primitive action components. -# pylint: disable=bad-whitespace -# pyformat: disable -NOOP = {"move": 0, "turn": 0, "fireZap": 0, "fireClaim": 0} -FORWARD = {"move": 1, "turn": 0, "fireZap": 0, "fireClaim": 0} -STEP_RIGHT = {"move": 2, "turn": 0, "fireZap": 0, "fireClaim": 0} -BACKWARD = {"move": 3, "turn": 0, "fireZap": 0, "fireClaim": 0} -STEP_LEFT = {"move": 4, "turn": 0, "fireZap": 0, "fireClaim": 0} -TURN_LEFT = {"move": 0, "turn": -1, "fireZap": 0, "fireClaim": 0} -TURN_RIGHT = {"move": 0, "turn": 1, "fireZap": 0, "fireClaim": 0} -FIRE_ZAP = {"move": 0, "turn": 0, "fireZap": 1, "fireClaim": 0} -FIRE_CLAIM = {"move": 0, "turn": 0, "fireZap": 0, "fireClaim": 1} -# pyformat: enable -# pylint: enable=bad-whitespace - -ACTION_SET = ( - NOOP, - FORWARD, - BACKWARD, - STEP_LEFT, - STEP_RIGHT, - TURN_LEFT, - TURN_RIGHT, - FIRE_ZAP, - FIRE_CLAIM -) - - -# The Scene object is a non-physical object, its components implement global -# logic. -def create_scene(): - """Creates the global scene.""" - scene = { - "name": "scene", - "components": [ - { - "component": "StateManager", - "kwargs": { - "initialState": "scene", - "stateConfigs": [{ - "state": "scene", - }], - } - }, - { - "component": "Transform", - "kwargs": { - "position": (0, 0), - "orientation": "N" - }, - }, - { - "component": "StochasticIntervalEpisodeEnding", - "kwargs": { - "minimumFramesPerEpisode": 1000, - "intervalLength": 100, # Set equal to unroll length. - "probabilityTerminationPerInterval": 0.2 - } - } - ] - } - return scene - - -def create_avatar_object(player_idx: int) -> Dict[str, Any]: - """Create an avatar object that always sees itself as blue.""" - # Lua is 1-indexed. - lua_index = player_idx + 1 - - color_palette = PLAYER_COLOR_PALETTES[player_idx] - live_state_name = "player{}".format(lua_index) - avatar_sprite_name = "avatarSprite{}".format(lua_index) - avatar_object = { - "name": "avatar", - "components": [ - { - "component": "StateManager", - "kwargs": { - "initialState": live_state_name, - "stateConfigs": [ - # Initial player state. - {"state": live_state_name, - "layer": "upperPhysical", - "sprite": avatar_sprite_name, - "contact": "avatar", - "groups": ["players"]}, - - # Player wait state used when they have been zapped out. - {"state": "playerWait", - "groups": ["playerWaits"]}, - ] - } - }, - { - "component": "Transform", - "kwargs": { - "position": (0, 0), - "orientation": "N" - } - }, - { - "component": "Appearance", - "kwargs": { - "renderMode": "ascii_shape", - "spriteNames": [avatar_sprite_name], - "spriteShapes": [shapes.CUTE_AVATAR], - "palettes": [color_palette], - "noRotates": [True] - } - }, - { - "component": "Avatar", - "kwargs": { - "index": lua_index, - "aliveState": live_state_name, - "waitState": "playerWait", - "spawnGroup": "spawnPoints", - "actionOrder": ["move", - "turn", - "fireZap", - "fireClaim"], - "actionSpec": { - "move": {"default": 0, "min": 0, "max": len(_COMPASS)}, - "turn": {"default": 0, "min": -1, "max": 1}, - "fireZap": {"default": 0, "min": 0, "max": 1}, - "fireClaim": {"default": 0, "min": 0, "max": 1}, - }, - "view": { - "left": 5, - "right": 5, - "forward": 9, - "backward": 1, - "centered": False - }, - } - }, - { - "component": "AvatarDirectionIndicator", - # We do not normally use direction indicators for the MAGI suite, - # but we do use them for territory because they function to claim - # any resources they contact. - "kwargs": {"color": (202, 202, 202, 50)} - }, - { - "component": "Zapper", - "kwargs": { - "cooldownTime": 2, - "beamLength": 3, - "beamRadius": 1, - "framesTillRespawn": 1e6, # Effectively never respawn. - "penaltyForBeingZapped": 0, - "rewardForZapping": 0, - } - }, - { - "component": "ReadyToShootObservation", - }, - { - "component": "ResourceClaimer", - "kwargs": { - "color": color_palette["*"], - "playerIndex": lua_index, - "beamLength": 2, - "beamRadius": 0, - "beamWait": 0, - } - }, - { - "component": "LocationObserver", - "kwargs": { - "objectIsAvatar": True, - "alsoReportOrientation": True - } - }, - { - "component": "Taste", - "kwargs": { - "role": "none", - "rewardAmount": 1.0, - } - }, - ] - } - - return avatar_object - - -def create_avatar_objects(num_players): - """Returns list of avatar objects of length 'num_players'.""" - avatar_objects = [] - for player_idx in range(0, num_players): - game_object = create_avatar_object(player_idx) - avatar_objects.append(game_object) - - return avatar_objects - - -def create_lab2d_settings(num_players: int) -> Dict[str, Any]: - """Returns the lab2d settings.""" - lab2d_settings = { - "levelName": "territory", - "levelDirectory": - "meltingpot/lua/levels", - "numPlayers": num_players, - # Define upper bound of episode length since episodes end stochastically. - "maxEpisodeLengthFrames": 2000, - "spriteSize": 8, - "topology": "TORUS", # Choose from ["BOUNDED", "TORUS"], - "simulation": { - "map": DEFAULT_ASCII_MAP, - "gameObjects": create_avatar_objects(num_players), - "scene": create_scene(), - "prefabs": PREFABS, - "charPrefabMap": CHAR_PREFAB_MAP, - }, - } - return lab2d_settings - - -def get_config(factory=create_lab2d_settings): - """Default configuration for training on the territory level.""" - config = config_dict.ConfigDict() - - # Basic configuration. - config.num_players = 9 - - # Lua script configuration. - config.lab2d_settings = factory(config.num_players) - - # Action set configuration. - config.action_set = ACTION_SET - # Observation format configuration. - config.individual_observation_names = [ - "RGB", - "READY_TO_SHOOT", - "POSITION", - "ORIENTATION", - ] - config.global_observation_names = [ - "WORLD.RGB", - ] - - # The specs of the environment (from a single-agent perspective). - config.action_spec = specs.action(len(ACTION_SET)) - config.timestep_spec = specs.timestep({ - "RGB": specs.OBSERVATION["RGB"], - "READY_TO_SHOOT": specs.OBSERVATION["READY_TO_SHOOT"], - "POSITION": specs.OBSERVATION["POSITION"], - "ORIENTATION": specs.OBSERVATION["ORIENTATION"], - "WORLD.RGB": specs.rgb(168, 168), - }) - - return config diff --git a/meltingpot/python/configs/substrates/the_matrix.py b/meltingpot/python/configs/substrates/the_matrix.py new file mode 100644 index 00000000..f6a7f889 --- /dev/null +++ b/meltingpot/python/configs/substrates/the_matrix.py @@ -0,0 +1,164 @@ +# Copyright 2022 DeepMind Technologies Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Common functions to be used across multiple *_in_the_matrix substrates. +""" +import copy +from typing import Any, Dict, Mapping, Sequence + +from meltingpot.python.utils.substrates import shapes + + +def get_cumulant_metric_configs( + num_resources: int) -> Sequence[Mapping[str, Any]]: + """Get metric configs to configure AvatarMetricReporter.""" + cumulants = [] + # One cumulant tracks from frame to frame whether the player participated in + # an interaction. + cumulants.append({ + "name": "INTERACTED_THIS_STEP", + "type": "Doubles", + "shape": [], + "component": "GameInteractionZapper", + "variable": "interacted_this_step", + }) + for py_idx in range(num_resources): + lua_idx = py_idx + 1 + # Several cumulants track when resources are collected. There will be one + # such cumulant per resource type. + cumulants.append({ + "name": f"COLLECTED_RESOURCE_{lua_idx}", + "type": "Doubles", + "shape": [], + "component": "GameInteractionZapper", + "variable": f"collected_resource_{lua_idx}", + }) + # Several cumulants track when resources are destroyed. There will be one + # such cumulant per resource type. + cumulants.append({ + "name": f"DESTROYED_RESOURCE_{lua_idx}", + "type": "Doubles", + "shape": [], + "component": "GameInteractionZapper", + "variable": f"destroyed_resource_{lua_idx}", + }) + # Sevaral cumulants track which resource was maximal in the interaction on + # the current frame. There will be one such cumulant per resource type. + cumulants.append({ + "name": f"ARGMAX_INTERACTION_INVENTORY_WAS_{lua_idx}", + "type": "Doubles", + "shape": [], + "component": "GameInteractionZapper", + "variable": f"argmax_interaction_inventory_was_{lua_idx}", + }) + return cumulants + + +def get_indicator_color_palette(color_rgba): + indicator_palette = copy.deepcopy(shapes.GOLD_CROWN_PALETTE) + indicator_palette["#"] = color_rgba + slightly_darker_color = [round(value * 0.9) for value in color_rgba[:-1]] + slightly_darker_color.append(150) # Add a half transparent alpha channel. + indicator_palette["@"] = slightly_darker_color + return indicator_palette + + +def create_ready_to_interact_marker(player_idx: int) -> Dict[str, Any]: + """Create a ready-to-interact marker overlay object.""" + # Lua is 1-indexed. + lua_idx = player_idx + 1 + + marking_object = { + "name": "avatarReadyToInteractMarker", + "components": [ + { + "component": "StateManager", + "kwargs": { + "initialState": "avatarMarkingWait", + "stateConfigs": [ + # Use `overlay` layer for ready and nonready states, both + # are used for live avatars and are always connected. + {"state": "ready", + "layer": "overlay", + "sprite": "Ready"}, + {"state": "notReady", + "layer": "overlay"}, + + # Result indication colors. + {"state": "resultIndicatorColor1", + "layer": "overlay", + "sprite": "ResultIndicatorColor1"}, + {"state": "resultIndicatorColor2", + "layer": "overlay", + "sprite": "ResultIndicatorColor2"}, + {"state": "resultIndicatorColor3", + "layer": "overlay", + "sprite": "ResultIndicatorColor3"}, + {"state": "resultIndicatorColor4", + "layer": "overlay", + "sprite": "ResultIndicatorColor4"}, + {"state": "resultIndicatorColor5", + "layer": "overlay", + "sprite": "ResultIndicatorColor5"}, + + # Invisible inactive overlay type. + {"state": "avatarMarkingWait", + "groups": ["avatarMarkingWaits"]}, + ] + } + }, + { + "component": "Transform", + }, + { + "component": "Appearance", + "kwargs": { + "renderMode": "ascii_shape", + "spriteNames": [ + "Ready", + "ResultIndicatorColor1", + "ResultIndicatorColor2", + "ResultIndicatorColor3", + "ResultIndicatorColor4", + "ResultIndicatorColor5", + ], + "spriteShapes": [shapes.BRONZE_CAP,] * 6, + "palettes": [ + shapes.SILVER_CROWN_PALETTE, + # Colors are in rainbow order (more or less). + get_indicator_color_palette((139, 0, 0, 255)), # red + get_indicator_color_palette((253, 184, 1, 255)), # yellow + get_indicator_color_palette((0, 102, 0, 255)), # green + get_indicator_color_palette((2, 71, 254, 255)), # blue + get_indicator_color_palette((127, 0, 255, 255)), # violet + ], + "noRotates": [True,] * 6, + } + }, + { + "component": "AvatarConnector", + "kwargs": { + "playerIndex": lua_idx, + "aliveState": "notReady", # state `notReady` is invisible. + "waitState": "avatarMarkingWait" + } + }, + { + "component": "ReadyToInteractMarker", + "kwargs": { + "playerIndex": lua_idx, + } + }, + ] + } + return marking_object diff --git a/meltingpot/python/human_players/play_allelopathic_harvest.py b/meltingpot/python/human_players/play_allelopathic_harvest.py index 1fcc45f2..81ed547c 100644 --- a/meltingpot/python/human_players/play_allelopathic_harvest.py +++ b/meltingpot/python/human_players/play_allelopathic_harvest.py @@ -1,4 +1,4 @@ -# Copyright 2020 DeepMind Technologies Limited. +# Copyright 2022 DeepMind Technologies Limited. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,8 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - -"""A simple human player for testing the `allelopathic_harvest` level. +"""A simple human player for testing `allelopathic_harvest`. Use `WASD` keys to move the character around. Use `Q and E` to turn the character. @@ -22,13 +21,14 @@ import argparse import json +from ml_collections import config_dict -from meltingpot.python.configs.substrates import allelopathic_harvest as mp_allelopathic_harvest +from meltingpot.python.configs.substrates import allelopathic_harvest__open from meltingpot.python.human_players import level_playing_utils environment_configs = { - 'mp_allelopathic_harvest': mp_allelopathic_harvest, + 'allelopathic_harvest__open': allelopathic_harvest__open, } _ACTION_MAP = { @@ -38,8 +38,6 @@ 'fire_1': level_playing_utils.get_key_number_one_pressed, 'fire_2': level_playing_utils.get_key_number_two_pressed, 'fire_3': level_playing_utils.get_key_number_three_pressed, - 'fire_4': level_playing_utils.get_key_number_four_pressed, - 'fire_5': level_playing_utils.get_key_number_five_pressed, } @@ -50,7 +48,7 @@ def verbose_fn(unused_env, unused_player_index): def main(): parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( - '--level_name', type=str, default='mp_allelopathic_harvest', + '--level_name', type=str, default='allelopathic_harvest__open', help='Level name to load') parser.add_argument( '--observation', type=str, default='RGB', help='Observation to render') @@ -64,10 +62,14 @@ def main(): '--print_events', type=bool, default=False, help='Print events') args = parser.parse_args() - env_config = environment_configs[args.level_name] + env_module = environment_configs[args.level_name] + env_config = env_module.get_config() + with config_dict.ConfigDict(env_config).unlocked() as env_config: + roles = env_config.default_player_roles + env_config.lab2d_settings = env_module.build(roles, env_config) level_playing_utils.run_episode( args.observation, args.settings, _ACTION_MAP, - env_config.get_config(), level_playing_utils.RenderType.PYGAME, + env_config, level_playing_utils.RenderType.PYGAME, verbose_fn=verbose_fn if args.verbose else None, print_events=args.print_events) diff --git a/meltingpot/python/human_players/play_anything_in_the_matrix.py b/meltingpot/python/human_players/play_anything_in_the_matrix.py index b0f25df1..44adb1ca 100644 --- a/meltingpot/python/human_players/play_anything_in_the_matrix.py +++ b/meltingpot/python/human_players/play_anything_in_the_matrix.py @@ -1,4 +1,4 @@ -# Copyright 2020 DeepMind Technologies Limited. +# Copyright 2022 DeepMind Technologies Limited. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,8 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - -"""A simple human player for testing `the_matrix` levels. +"""A simple human player for testing `*_in_the_matrix`. Use `WASD` keys to move the character around. Use `Q and E` to turn the character. @@ -22,27 +21,44 @@ import argparse import json +from ml_collections import config_dict -from meltingpot.python.configs.substrates import arena_running_with_scissors_in_the_matrix as mp_arena_rws_itm -from meltingpot.python.configs.substrates import bach_or_stravinsky_in_the_matrix as mp_bach_or_stravinsky_itm -from meltingpot.python.configs.substrates import chicken_in_the_matrix as mp_chicken_itm -from meltingpot.python.configs.substrates import prisoners_dilemma_in_the_matrix as mp_prisoners_dilemma_itm -from meltingpot.python.configs.substrates import pure_coordination_in_the_matrix as mp_pure_coord_itm -from meltingpot.python.configs.substrates import rationalizable_coordination_in_the_matrix as mp_rational_coord_itm -from meltingpot.python.configs.substrates import running_with_scissors_in_the_matrix as mp_rws_itm -from meltingpot.python.configs.substrates import stag_hunt_in_the_matrix as mp_stag_hunt_itm +from meltingpot.python.configs.substrates import bach_or_stravinsky_in_the_matrix__arena as bach_or_stravinsky_itm +from meltingpot.python.configs.substrates import bach_or_stravinsky_in_the_matrix__repeated as bach_or_stravinsky_itm__repeated +from meltingpot.python.configs.substrates import chicken_in_the_matrix__arena as chicken_itm +from meltingpot.python.configs.substrates import chicken_in_the_matrix__repeated as chicken_itm__repeated +from meltingpot.python.configs.substrates import prisoners_dilemma_in_the_matrix__arena as prisoners_dilemma_itm +from meltingpot.python.configs.substrates import prisoners_dilemma_in_the_matrix__repeated as prisoners_dilemma_itm__repeated +from meltingpot.python.configs.substrates import pure_coordination_in_the_matrix__arena as pure_coord_itm +from meltingpot.python.configs.substrates import pure_coordination_in_the_matrix__repeated as pure_coord_itm__repeated +from meltingpot.python.configs.substrates import rationalizable_coordination_in_the_matrix__arena as rational_coord_itm +from meltingpot.python.configs.substrates import rationalizable_coordination_in_the_matrix__repeated as rational_coord_itm__repeated +from meltingpot.python.configs.substrates import running_with_scissors_in_the_matrix__arena as rws_itm__arena +from meltingpot.python.configs.substrates import running_with_scissors_in_the_matrix__one_shot as rws_itm +from meltingpot.python.configs.substrates import running_with_scissors_in_the_matrix__repeated as rws_itm__repeated +from meltingpot.python.configs.substrates import stag_hunt_in_the_matrix__arena as stag_hunt_itm +from meltingpot.python.configs.substrates import stag_hunt_in_the_matrix__repeated as stag_hunt_itm__repeated from meltingpot.python.human_players import level_playing_utils - environment_configs = { - 'mp_arena_running_with_scissors_in_the_matrix': mp_arena_rws_itm, - 'mp_bach_or_stravinsky_in_the_matrix': mp_bach_or_stravinsky_itm, - 'mp_chicken_in_the_matrix': mp_chicken_itm, - 'mp_prisoners_dilemma_in_the_matrix': mp_prisoners_dilemma_itm, - 'mp_pure_coordination_in_the_matrix': mp_pure_coord_itm, - 'mp_rationalizable_coordination_in_the_matrix': mp_rational_coord_itm, - 'mp_running_with_scissors_in_the_matrix': mp_rws_itm, - 'mp_stag_hunt_in_the_matrix': mp_stag_hunt_itm, + 'bach_or_stravinsky_in_the_matrix__arena': bach_or_stravinsky_itm, + 'bach_or_stravinsky_in_the_matrix__repeated': + bach_or_stravinsky_itm__repeated, + 'chicken_in_the_matrix__arena': chicken_itm, + 'chicken_in_the_matrix__repeated': chicken_itm__repeated, + 'prisoners_dilemma_in_the_matrix__arena': prisoners_dilemma_itm, + 'prisoners_dilemma_in_the_matrix__repeated': + prisoners_dilemma_itm__repeated, + 'pure_coordination_in_the_matrix__arena': pure_coord_itm, + 'pure_coordination_in_the_matrix__repeated': pure_coord_itm__repeated, + 'rationalizable_coordination_in_the_matrix__arena': rational_coord_itm, + 'rationalizable_coordination_in_the_matrix__repeated': + rational_coord_itm__repeated, + 'running_with_scissors_in_the_matrix__arena': rws_itm__arena, + 'running_with_scissors_in_the_matrix__one_shot': rws_itm, + 'running_with_scissors_in_the_matrix__repeated': rws_itm__repeated, + 'stag_hunt_in_the_matrix__arena': stag_hunt_itm, + 'stag_hunt_in_the_matrix__repeated': stag_hunt_itm__repeated, } _ACTION_MAP = { @@ -52,14 +68,42 @@ } -def verbose_fn(unused_env, unused_player_index): - pass +def verbose_fn(env_timestep, player_index): + """Print using this function once enabling the option --verbose=True.""" + lua_index = player_index + 1 + collected_resource_1 = env_timestep.observation[ + f'{lua_index}.COLLECTED_RESOURCE_1'] + collected_resource_2 = env_timestep.observation[ + f'{lua_index}.COLLECTED_RESOURCE_2'] + destroyed_resource_1 = env_timestep.observation[ + f'{lua_index}.DESTROYED_RESOURCE_1'] + destroyed_resource_2 = env_timestep.observation[ + f'{lua_index}.DESTROYED_RESOURCE_2'] + interacted_this_step = env_timestep.observation[ + f'{lua_index}.INTERACTED_THIS_STEP'] + argmax_interact_inventory_1 = env_timestep.observation[ + f'{lua_index}.ARGMAX_INTERACTION_INVENTORY_WAS_1'] + argmax_interact_inventory_2 = env_timestep.observation[ + f'{lua_index}.ARGMAX_INTERACTION_INVENTORY_WAS_2'] + # Only print observations from player 0. + if player_index == 0: + print( + f'player: {player_index} --- \n' + + f' collected_resource_1: {collected_resource_1} \n' + + f' collected_resource_2: {collected_resource_2} \n' + + f' destroyed_resource_1: {destroyed_resource_1} \n' + + f' destroyed_resource_1: {destroyed_resource_2} \n' + + f' interacted_this_step: {interacted_this_step} \n' + + f' argmax_interaction_inventory_1: {argmax_interact_inventory_1} \n' + + f' argmax_interaction_inventory_2: {argmax_interact_inventory_2} \n' + ) def main(): parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( - '--level_name', type=str, default='mp_prisoners_dilemma_in_the_matrix', + '--level_name', type=str, + default='prisoners_dilemma_in_the_matrix__repeated', help='Level name to load') parser.add_argument( '--observation', type=str, default='RGB', help='Observation to render') @@ -73,10 +117,14 @@ def main(): '--print_events', type=bool, default=False, help='Print events') args = parser.parse_args() - env_config = environment_configs[args.level_name] + env_module = environment_configs[args.level_name] + env_config = env_module.get_config() + with config_dict.ConfigDict(env_config).unlocked() as env_config: + roles = env_config.default_player_roles + env_config.lab2d_settings = env_module.build(roles, env_config) level_playing_utils.run_episode( args.observation, args.settings, _ACTION_MAP, - env_config.get_config(), level_playing_utils.RenderType.PYGAME, + env_config, level_playing_utils.RenderType.PYGAME, verbose_fn=verbose_fn if args.verbose else None, print_events=args.print_events) diff --git a/meltingpot/python/human_players/play_boat_race.py b/meltingpot/python/human_players/play_boat_race.py new file mode 100644 index 00000000..7a1b5e32 --- /dev/null +++ b/meltingpot/python/human_players/play_boat_race.py @@ -0,0 +1,96 @@ +# Copyright 2022 DeepMind Technologies Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""A simple human player for testing `boat_race`. + +Use `WASD` keys to move the character around. +Use `Q and E` to turn the character. +Use ` ` to row (effectively, but needs coordinated stroke). +Use `x` to flail (row ineffectively, but with safe steady progress). +Use `TAB` to switch between players. +""" + +import argparse +import json +from ml_collections import config_dict + +from meltingpot.python.configs.substrates import boat_race__eight_races +from meltingpot.python.human_players import level_playing_utils +from meltingpot.python.utils.substrates import game_object_utils + +MAX_SCREEN_WIDTH = 600 +MAX_SCREEN_HEIGHT = 800 +FRAMES_PER_SECOND = 8 + +environment_configs = { + 'boat_race__eight_races': boat_race__eight_races, +} + +_ACTION_MAP = { + 'move': level_playing_utils.get_direction_pressed, + 'turn': level_playing_utils.get_turn_pressed, + 'row': level_playing_utils.get_space_key_pressed, + 'flail': level_playing_utils.get_key_x_pressed, +} + + +def verbose_fn(env_timestep, player_index): + lua_index = player_index + 1 + if env_timestep.observation['WORLD.RACE_START'].any() and player_index == 0: + print('WORLD.RACE_START', env_timestep.observation['WORLD.RACE_START']) + for obs in [f'{lua_index}.PADDLES', f'{lua_index}.FLAILS']: + if env_timestep.observation[obs]: + print(obs, env_timestep.observation[obs]) + + +def main(): + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + '--level_name', type=str, default='boat_race__eight_races', + help='Level name to load') + parser.add_argument( + '--observation', type=str, default='RGB', help='Observation to render') + parser.add_argument( + '--settings', type=json.loads, default={}, help='Settings as JSON string') + # Activate verbose mode with --verbose=True. + parser.add_argument( + '--verbose', type=bool, default=False, help='Print debug information') + # Activate events printing mode with --print_events=True. + parser.add_argument( + '--print_events', type=bool, default=False, help='Print events') + parser.add_argument( + '--override_flail_effectiveness', type=float, default=0.1, + help='Override flail effectiveness to make debugging easier.') + + args = parser.parse_args() + env_module = environment_configs[args.level_name] + env_config = env_module.get_config() + with config_dict.ConfigDict(env_config).unlocked() as env_config: + roles = env_config.default_player_roles + env_config.lab2d_settings = env_module.build(roles, env_config) + # For easier debug, override the flailEffectiveness + game_object_utils.get_first_named_component( + env_config.lab2d_settings['simulation']['prefabs']['seat_L'], + 'BoatManager' + )['kwargs']['flailEffectiveness'] = args.override_flail_effectiveness + + level_playing_utils.run_episode( + args.observation, args.settings, _ACTION_MAP, env_config, + level_playing_utils.RenderType.PYGAME, MAX_SCREEN_WIDTH, + MAX_SCREEN_HEIGHT, FRAMES_PER_SECOND, + verbose_fn if args.verbose else None, + print_events=args.print_events) + + +if __name__ == '__main__': + main() diff --git a/meltingpot/python/human_players/play_chemistry.py b/meltingpot/python/human_players/play_chemistry.py new file mode 100644 index 00000000..a3acf2ef --- /dev/null +++ b/meltingpot/python/human_players/play_chemistry.py @@ -0,0 +1,90 @@ +# Copyright 2022 DeepMind Technologies Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""A simple human player for testing `chemistry`. + +Use `WASD` keys to move the character around. +Use `Q and E` to turn the character. +Use `SPACE` to select the `endocytose` action. +Use `TAB` to switch between players. +""" + +import argparse +import json +from ml_collections import config_dict + +from meltingpot.python.configs.substrates import chemistry__three_metabolic_cycles +from meltingpot.python.configs.substrates import chemistry__three_metabolic_cycles_with_plentiful_distractors +from meltingpot.python.configs.substrates import chemistry__two_metabolic_cycles +from meltingpot.python.configs.substrates import chemistry__two_metabolic_cycles_with_distractors +from meltingpot.python.human_players import level_playing_utils + + +MAX_SCREEN_WIDTH = 800 +MAX_SCREEN_HEIGHT = 600 +FRAMES_PER_SECOND = 8 + + +_ACTION_MAP = { + 'move': level_playing_utils.get_direction_pressed, + 'turn': level_playing_utils.get_turn_pressed, + 'ioAction': level_playing_utils.get_space_key_pressed, +} + +environment_configs = { + 'chemistry__three_metabolic_cycles': ( + chemistry__three_metabolic_cycles), + 'chemistry__three_metabolic_cycles_with_plentiful_distractors': ( + chemistry__three_metabolic_cycles_with_plentiful_distractors), + 'chemistry__two_metabolic_cycles': chemistry__two_metabolic_cycles, + 'chemistry__two_metabolic_cycles_with_distractors': ( + chemistry__two_metabolic_cycles_with_distractors), +} + + +def verbose_fn(unused_env, unused_player_index): + """Activate verbose printing with --verbose=True.""" + pass + + +def main(): + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + '--level_name', type=str, default='chemistry__two_metabolic_cycles', + help='Level name to load') + parser.add_argument( + '--observation', type=str, default='RGB', help='Observation to render') + parser.add_argument( + '--settings', type=json.loads, default={}, help='Settings as JSON string') + # Activate verbose mode with --verbose=True. + parser.add_argument( + '--verbose', type=bool, default=False, help='Print debug information') + # Activate events printing mode with --print_events=True. + parser.add_argument( + '--print_events', type=bool, default=False, help='Print events') + + args = parser.parse_args() + env_module = environment_configs[args.level_name] + env_config = env_module.get_config() + with config_dict.ConfigDict(env_config).unlocked() as env_config: + roles = env_config.default_player_roles + env_config.lab2d_settings = env_module.build(roles, env_config) + level_playing_utils.run_episode( + args.observation, args.settings, _ACTION_MAP, + env_config, level_playing_utils.RenderType.PYGAME, + verbose_fn=verbose_fn if args.verbose else None, + print_events=args.print_events) + + +if __name__ == '__main__': + main() diff --git a/meltingpot/python/human_players/play_clean_up.py b/meltingpot/python/human_players/play_clean_up.py index ac857af3..28762634 100644 --- a/meltingpot/python/human_players/play_clean_up.py +++ b/meltingpot/python/human_players/play_clean_up.py @@ -1,4 +1,4 @@ -# Copyright 2020 DeepMind Technologies Limited. +# Copyright 2022 DeepMind Technologies Limited. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""A simple human player for testing the `clean_up` level. +"""A simple human player for testing `clean_up`. Use `WASD` keys to move the character around. Use `Q and E` to turn the character. @@ -21,13 +21,14 @@ import argparse import json +from ml_collections import config_dict -from meltingpot.python.configs.substrates import clean_up as mp_clean_up +from meltingpot.python.configs.substrates import clean_up from meltingpot.python.human_players import level_playing_utils environment_configs = { - 'mp_clean_up': mp_clean_up, + 'clean_up': clean_up, } _ACTION_MAP = { @@ -38,14 +39,29 @@ } -def verbose_fn(unused_env, unused_player_index): - pass +def verbose_fn(env_timestep, player_index): + """Print using this function once enabling the option --verbose=True.""" + lua_index = player_index + 1 + cleaned = env_timestep.observation[f'{lua_index}.PLAYER_CLEANED'] + ate = env_timestep.observation[f'{lua_index}.PLAYER_ATE_APPLE'] + num_zapped_this_step = env_timestep.observation[ + f'{lua_index}.NUM_OTHERS_PLAYER_ZAPPED_THIS_STEP'] + num_others_cleaned = env_timestep.observation[ + f'{lua_index}.NUM_OTHERS_WHO_CLEANED_THIS_STEP'] + num_others_ate = env_timestep.observation[ + f'{lua_index}.NUM_OTHERS_WHO_ATE_THIS_STEP'] + # Only print observations from player 0. + if player_index == 0: + print(f'player: {player_index} --- player_cleaned: {cleaned} --- ' + + f'player_ate_apple: {ate} --- num_others_cleaned: ' + + f'{num_others_cleaned} --- num_others_ate: {num_others_ate} ' + + f'---num_others_player_zapped_this_step: {num_zapped_this_step}') def main(): parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( - '--level_name', type=str, default='mp_clean_up', + '--level_name', type=str, default='clean_up', help='Level name to load') parser.add_argument( '--observation', type=str, default='RGB', help='Observation to render') @@ -59,10 +75,14 @@ def main(): '--print_events', type=bool, default=False, help='Print events') args = parser.parse_args() - env_config = environment_configs[args.level_name] + env_module = environment_configs[args.level_name] + env_config = env_module.get_config() + with config_dict.ConfigDict(env_config).unlocked() as env_config: + roles = env_config.default_player_roles + env_config.lab2d_settings = env_module.build(roles, env_config) level_playing_utils.run_episode( args.observation, args.settings, _ACTION_MAP, - env_config.get_config(), level_playing_utils.RenderType.PYGAME, + env_config, level_playing_utils.RenderType.PYGAME, verbose_fn=verbose_fn if args.verbose else None, print_events=args.print_events) diff --git a/meltingpot/python/human_players/play_coins.py b/meltingpot/python/human_players/play_coins.py new file mode 100644 index 00000000..72744e4c --- /dev/null +++ b/meltingpot/python/human_players/play_coins.py @@ -0,0 +1,83 @@ +# Copyright 2022 DeepMind Technologies Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""A simple human player for testing `coins`. + +Use `WASD` keys to move the character around. +Use `TAB` to switch between players. +""" + +import argparse +import json +from ml_collections import config_dict + +from meltingpot.python.configs.substrates import coins +from meltingpot.python.human_players import level_playing_utils + +MAX_SCREEN_WIDTH = 600 +MAX_SCREEN_HEIGHT = 450 +FRAMES_PER_SECOND = 8 + +environment_configs = { + 'coins': coins, +} + + +def no_op() -> int: + """Gets direction pressed.""" + return level_playing_utils.MOVEMENT_MAP['NONE'] + + +_ACTION_MAP = { + 'move': level_playing_utils.get_direction_pressed, + 'turn': level_playing_utils.get_turn_pressed, +} + + +def verbose_fn(env_timestep, player_index): + del env_timestep, player_index + pass + + +def main(): + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + '--level_name', type=str, default='coins', + help='Level name to load') + parser.add_argument( + '--observation', type=str, default='RGB', help='Observation to render') + parser.add_argument( + '--settings', type=json.loads, default={}, help='Settings as JSON string') + # Activate verbose mode with --verbose=True. + parser.add_argument( + '--verbose', type=bool, default=False, help='Print debug information') + # Activate events printing mode with --print_events=True. + parser.add_argument( + '--print_events', type=bool, default=False, help='Print events') + + args = parser.parse_args() + env_module = environment_configs[args.level_name] + env_config = env_module.get_config() + with config_dict.ConfigDict(env_config).unlocked() as env_config: + roles = env_config.default_player_roles + env_config.lab2d_settings = env_module.build(roles, env_config) + level_playing_utils.run_episode( + args.observation, args.settings, _ACTION_MAP, env_config, + level_playing_utils.RenderType.PYGAME, MAX_SCREEN_WIDTH, + MAX_SCREEN_HEIGHT, FRAMES_PER_SECOND, + verbose_fn if args.verbose else None, + print_events=args.print_events) + + +if __name__ == '__main__': + main() diff --git a/meltingpot/python/human_players/play_collaborative_cooking.py b/meltingpot/python/human_players/play_collaborative_cooking.py index 90389224..d6917738 100644 --- a/meltingpot/python/human_players/play_collaborative_cooking.py +++ b/meltingpot/python/human_players/play_collaborative_cooking.py @@ -1,4 +1,4 @@ -# Copyright 2020 DeepMind Technologies Limited. +# Copyright 2022 DeepMind Technologies Limited. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""A simple human player for testing the `collaborative_cooking` level. +"""A simple human player for testing `collaborative_cooking`. Use `WASD` keys to move the character around. Use `Q and E` to turn the character. @@ -22,15 +22,22 @@ import argparse import json -from meltingpot.python.configs.substrates import collaborative_cooking_impassable as mp_collaborative_cooking_impassable -from meltingpot.python.configs.substrates import collaborative_cooking_passable as mp_collaborative_cooking_passable +from ml_collections import config_dict + +from meltingpot.python.configs.substrates import collaborative_cooking__asymmetric +from meltingpot.python.configs.substrates import collaborative_cooking__circuit +from meltingpot.python.configs.substrates import collaborative_cooking__cramped +from meltingpot.python.configs.substrates import collaborative_cooking__crowded +from meltingpot.python.configs.substrates import collaborative_cooking__figure_eight +from meltingpot.python.configs.substrates import collaborative_cooking__forced +from meltingpot.python.configs.substrates import collaborative_cooking__ring from meltingpot.python.human_players import level_playing_utils -environment_configs = { - 'mp_collaborative_cooking_impassable': mp_collaborative_cooking_impassable, - 'mp_collaborative_cooking_passable': mp_collaborative_cooking_passable, -} +MAX_SCREEN_WIDTH = 800 +MAX_SCREEN_HEIGHT = 600 +FRAMES_PER_SECOND = 8 + _ACTION_MAP = { 'move': level_playing_utils.get_direction_pressed, @@ -38,15 +45,33 @@ 'interact': level_playing_utils.get_space_key_pressed, } +environment_configs = { + 'collaborative_cooking__asymmetric': collaborative_cooking__asymmetric, + 'collaborative_cooking__circuit': collaborative_cooking__circuit, + 'collaborative_cooking__cramped': collaborative_cooking__cramped, + 'collaborative_cooking__crowded': collaborative_cooking__crowded, + 'collaborative_cooking__figure_eight': collaborative_cooking__figure_eight, + 'collaborative_cooking__forced': collaborative_cooking__forced, + 'collaborative_cooking__ring': collaborative_cooking__ring, +} + -def verbose_fn(unused_env, unused_player_index): - pass +def verbose_fn(env_timestep, player_index): + if player_index != 0: + return + for obs in ['ADDED_INGREDIENT_TO_COOKING_POT', + 'COLLECTED_SOUP_FROM_COOKING_POT']: + lua_index = player_index + 1 + if env_timestep.observation[f'{lua_index}.{obs}']: + print(obs, env_timestep.observation[f'{lua_index}.{obs}']) def main(): parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( - '--level_name', type=str, default='mp_collaborative_cooking_passable', + '--level_name', + type=str, + default='collaborative_cooking__cramped', help='Level name to load') parser.add_argument( '--observation', type=str, default='RGB', help='Observation to render') @@ -60,11 +85,16 @@ def main(): '--print_events', type=bool, default=False, help='Print events') args = parser.parse_args() - env_config = environment_configs[args.level_name] + env_module = environment_configs[args.level_name] + env_config = env_module.get_config() + with config_dict.ConfigDict(env_config).unlocked() as env_config: + roles = env_config.default_player_roles + env_config.lab2d_settings = env_module.build(roles, env_config) level_playing_utils.run_episode( - args.observation, args.settings, _ACTION_MAP, - env_config.get_config(), level_playing_utils.RenderType.PYGAME, - verbose_fn=verbose_fn if args.verbose else None, + args.observation, args.settings, _ACTION_MAP, env_config, + level_playing_utils.RenderType.PYGAME, MAX_SCREEN_WIDTH, + MAX_SCREEN_HEIGHT, FRAMES_PER_SECOND, + verbose_fn if args.verbose else None, print_events=args.print_events) diff --git a/meltingpot/python/human_players/play_commons_harvest.py b/meltingpot/python/human_players/play_commons_harvest.py index 4f598082..9441b942 100644 --- a/meltingpot/python/human_players/play_commons_harvest.py +++ b/meltingpot/python/human_players/play_commons_harvest.py @@ -1,4 +1,4 @@ -# Copyright 2020 DeepMind Technologies Limited. +# Copyright 2022 DeepMind Technologies Limited. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""A simple human player for testing any `commons_harvest` substrate. +"""A simple human player for testing `commons_harvest`. Use `WASD` keys to move the character around. Use `Q and E` to turn the character. @@ -21,17 +21,18 @@ import argparse import json +from ml_collections import config_dict -from meltingpot.python.configs.substrates import commons_harvest_closed as mp_commons_harvest_closed -from meltingpot.python.configs.substrates import commons_harvest_open as mp_commons_harvest_open -from meltingpot.python.configs.substrates import commons_harvest_partnership as mp_commons_harvest_partnership +from meltingpot.python.configs.substrates import commons_harvest__closed +from meltingpot.python.configs.substrates import commons_harvest__open +from meltingpot.python.configs.substrates import commons_harvest__partnership from meltingpot.python.human_players import level_playing_utils environment_configs = { - 'mp_commons_harvest_closed': mp_commons_harvest_closed, - 'mp_commons_harvest_open': mp_commons_harvest_open, - 'mp_commons_harvest_partnership': mp_commons_harvest_partnership, + 'commons_harvest__closed': commons_harvest__closed, + 'commons_harvest__open': commons_harvest__open, + 'commons_harvest__partnership': commons_harvest__partnership, } _ACTION_MAP = { @@ -48,7 +49,7 @@ def verbose_fn(unused_env, unused_player_index): def main(): parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( - '--level_name', type=str, default='mp_commons_harvest_closed', + '--level_name', type=str, default='commons_harvest__closed', help='Level name to load') parser.add_argument( '--observation', type=str, default='RGB', help='Observation to render') @@ -62,10 +63,14 @@ def main(): '--print_events', type=bool, default=False, help='Print events') args = parser.parse_args() - env_config = environment_configs[args.level_name] + env_module = environment_configs[args.level_name] + env_config = env_module.get_config() + with config_dict.ConfigDict(env_config).unlocked() as env_config: + roles = env_config.default_player_roles + env_config.lab2d_settings = env_module.build(roles, env_config) level_playing_utils.run_episode( - args.observation, args.settings, _ACTION_MAP, env_config.get_config(), - level_playing_utils.RenderType.PYGAME, + args.observation, args.settings, _ACTION_MAP, + env_config, level_playing_utils.RenderType.PYGAME, verbose_fn=verbose_fn if args.verbose else None, print_events=args.print_events) diff --git a/meltingpot/python/human_players/play_any_paintball_game.py b/meltingpot/python/human_players/play_coop_mining.py similarity index 71% rename from meltingpot/python/human_players/play_any_paintball_game.py rename to meltingpot/python/human_players/play_coop_mining.py index d9067550..12df0880 100644 --- a/meltingpot/python/human_players/play_any_paintball_game.py +++ b/meltingpot/python/human_players/play_coop_mining.py @@ -1,4 +1,4 @@ -# Copyright 2020 DeepMind Technologies Limited. +# Copyright 2022 DeepMind Technologies Limited. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,32 +11,31 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - -"""A simple human player for testing paintball games. +"""A simple human player for testing `coop_mining`. Use `WASD` keys to move the character around. Use `Q and E` to turn the character. -Use `SPACE` to fire the zapper. +Use `SPACE` to fire the gift beam. +Use `1` to consume tokens. Use `TAB` to switch between players. """ import argparse import json +from ml_collections import config_dict -from meltingpot.python.configs.substrates import capture_the_flag as mp_capture_the_flag -from meltingpot.python.configs.substrates import king_of_the_hill as mp_king_of_the_hill +from meltingpot.python.configs.substrates import coop_mining from meltingpot.python.human_players import level_playing_utils environment_configs = { - 'mp_capture_the_flag': mp_capture_the_flag, - 'mp_king_of_the_hill': mp_king_of_the_hill, + 'coop_mining': coop_mining, } _ACTION_MAP = { 'move': level_playing_utils.get_direction_pressed, 'turn': level_playing_utils.get_turn_pressed, - 'fireZap': level_playing_utils.get_space_key_pressed, + 'mine': level_playing_utils.get_space_key_pressed, } @@ -47,7 +46,7 @@ def verbose_fn(unused_env, unused_player_index): def main(): parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( - '--level_name', type=str, default='mp_capture_the_flag', + '--level_name', type=str, default='coop_mining', help='Level name to load') parser.add_argument( '--observation', type=str, default='RGB', help='Observation to render') @@ -61,10 +60,14 @@ def main(): '--print_events', type=bool, default=False, help='Print events') args = parser.parse_args() - env_config = environment_configs[args.level_name] + env_module = environment_configs[args.level_name] + env_config = env_module.get_config() + with config_dict.ConfigDict(env_config).unlocked() as env_config: + roles = env_config.default_player_roles + env_config.lab2d_settings = env_module.build(roles, env_config) level_playing_utils.run_episode( args.observation, args.settings, _ACTION_MAP, - env_config.get_config(), level_playing_utils.RenderType.PYGAME, + env_config, level_playing_utils.RenderType.PYGAME, verbose_fn=verbose_fn if args.verbose else None, print_events=args.print_events) diff --git a/meltingpot/python/human_players/play_daycare.py b/meltingpot/python/human_players/play_daycare.py new file mode 100644 index 00000000..40c3c32d --- /dev/null +++ b/meltingpot/python/human_players/play_daycare.py @@ -0,0 +1,82 @@ +# Copyright 2022 DeepMind Technologies Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""A simple human player for testing `daycare`. + +Use `WASD` keys to move the character around. +Use `Q and E` to turn the character. +Use `TAB` to switch between players. +""" + +import argparse +import json +from ml_collections import config_dict + +from meltingpot.python.configs.substrates import daycare +from meltingpot.python.human_players import level_playing_utils + + +MAX_SCREEN_WIDTH = 800 +MAX_SCREEN_HEIGHT = 600 +FRAMES_PER_SECOND = 8 + +environment_configs = { + 'daycare': daycare, +} + +_ACTION_MAP = { + 'move': level_playing_utils.get_direction_pressed, + 'turn': level_playing_utils.get_turn_pressed, + 'eat': level_playing_utils.get_key_z_pressed, + 'grasp': level_playing_utils.get_space_key_pressed, +} + + +def verbose_fn(env_timestep, player_index): + del env_timestep, player_index + pass + + +def main(): + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + '--level_name', type=str, + default='daycare', + help='Level name to load') + parser.add_argument( + '--observation', type=str, default='RGB', help='Observation to render') + parser.add_argument( + '--settings', type=json.loads, default={}, help='Settings as JSON string') + # Activate verbose mode with --verbose=True. + parser.add_argument( + '--verbose', type=bool, default=False, help='Print debug information') + # Activate events printing mode with --print_events=True. + parser.add_argument( + '--print_events', type=bool, default=False, help='Print events') + + args = parser.parse_args() + env_module = environment_configs[args.level_name] + env_config = env_module.get_config() + with config_dict.ConfigDict(env_config).unlocked() as env_config: + roles = env_config.default_player_roles + env_config.lab2d_settings = env_module.build(roles, env_config) + level_playing_utils.run_episode( + args.observation, args.settings, _ACTION_MAP, env_config, + level_playing_utils.RenderType.PYGAME, MAX_SCREEN_WIDTH, + MAX_SCREEN_HEIGHT, FRAMES_PER_SECOND, + verbose_fn if args.verbose else None, + print_events=args.print_events) + + +if __name__ == '__main__': + main() diff --git a/meltingpot/python/human_players/play_externality_mushrooms.py b/meltingpot/python/human_players/play_externality_mushrooms.py new file mode 100644 index 00000000..08a14c1e --- /dev/null +++ b/meltingpot/python/human_players/play_externality_mushrooms.py @@ -0,0 +1,99 @@ +# Copyright 2022 DeepMind Technologies Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""A simple human player for testing `externality_mushrooms`. + +Use `WASD` keys to move the character around. +Use `Q and E` to turn the character. +Use `SPACE` to fire the zapper. +Use `TAB` to switch between players. +""" + +import argparse +import json + +from ml_collections import config_dict + +from meltingpot.python.configs.substrates import externality_mushrooms__dense +from meltingpot.python.human_players import level_playing_utils + + +environment_configs = { + 'externality_mushrooms__dense': externality_mushrooms__dense, +} + +_ACTION_MAP = { + 'move': level_playing_utils.get_direction_pressed, + 'turn': level_playing_utils.get_turn_pressed, + 'fireZap': level_playing_utils.get_space_key_pressed, +} + + +def verbose_fn(env_timestep, player_index): + """Print using this function once enabling the option --verbose=True.""" + lua_index = player_index + 1 + ate_hihe = env_timestep.observation[f'{lua_index}.ATE_MUSHROOM_HIHE'] + ate_fize = env_timestep.observation[f'{lua_index}.ATE_MUSHROOM_FIZE'] + ate_zife = env_timestep.observation[f'{lua_index}.ATE_MUSHROOM_ZIFE'] + destroyed_hihe = env_timestep.observation[ + f'{lua_index}.DESTROYED_MUSHROOM_HIHE'] + destroyed_fize = env_timestep.observation[ + f'{lua_index}.DESTROYED_MUSHROOM_FIZE'] + destroyed_zife = env_timestep.observation[ + f'{lua_index}.DESTROYED_MUSHROOM_ZIFE'] + at_least_one_nonzero = (ate_hihe + ate_fize + ate_zife + + destroyed_hihe + destroyed_fize + destroyed_zife) + # Only print observations from player 0. + if player_index == 0 and at_least_one_nonzero > 0: + print( + f'player: {player_index} --- \n' + + f' ate_hihe: {ate_hihe} \n' + + f' ate_fize: {ate_fize} \n' + + f' ate_zife: {ate_zife} \n' + + f' destroyed_hihe: {destroyed_hihe} \n' + + f' destroyed_fize: {destroyed_fize} \n' + + f' destroyed_zife: {destroyed_zife} \n' + ) + + +def main(): + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + '--level_name', type=str, default='externality_mushrooms__dense', + help='Level name to load') + parser.add_argument( + '--observation', type=str, default='RGB', help='Observation to render') + parser.add_argument( + '--settings', type=json.loads, default={}, help='Settings as JSON string') + # Activate verbose mode with --verbose=True. + parser.add_argument( + '--verbose', type=bool, default=False, help='Print debug information') + # Activate events printing mode with --print_events=True. + parser.add_argument( + '--print_events', type=bool, default=False, help='Print events') + + args = parser.parse_args() + env_module = environment_configs[args.level_name] + env_config = env_module.get_config() + with config_dict.ConfigDict(env_config).unlocked() as env_config: + roles = env_config.default_player_roles + env_config.lab2d_settings = env_module.build(roles, env_config) + level_playing_utils.run_episode( + args.observation, args.settings, _ACTION_MAP, + env_config, level_playing_utils.RenderType.PYGAME, + verbose_fn=verbose_fn if args.verbose else None, + print_events=args.print_events) + + +if __name__ == '__main__': + main() diff --git a/meltingpot/python/human_players/play_factory_commons.py b/meltingpot/python/human_players/play_factory_commons.py new file mode 100644 index 00000000..a97cfd3e --- /dev/null +++ b/meltingpot/python/human_players/play_factory_commons.py @@ -0,0 +1,88 @@ +# Copyright 2022 DeepMind Technologies Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""A simple human player for testing factory_commons. + +Use `WASD` keys to move the character around. +Use `Q and E` to turn the character. +Use `Z` to pick-up pickuppable objects. +Use `SPACE` to grasp a movable block. +Use `TAB` to switch between players. +""" +import argparse +import json + +from ml_collections import config_dict + +from meltingpot.python.configs.substrates import factory_commons__either_or +from meltingpot.python.human_players import level_playing_utils + + +def get_push_pull() -> int: + """Sets shove to either -1, 0, or 1.""" + if level_playing_utils.get_right_shift_pressed(): + return 1 + if level_playing_utils.get_left_control_pressed(): + return -1 + return 0 + +environment_configs = { + 'factory_commons__either_or': factory_commons__either_or, +} + +_ACTION_MAP = { + 'move': level_playing_utils.get_direction_pressed, + 'turn': level_playing_utils.get_turn_pressed, + 'pickup': level_playing_utils.get_key_z_pressed, + 'grasp': level_playing_utils.get_key_x_pressed, + # Grappling actions + 'hold': level_playing_utils.get_space_key_pressed, + 'shove': get_push_pull, +} + + +def verbose_fn(unused_env, unused_player_index): + pass + + +def main(): + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + '--level_name', type=str, default='factory_commons__either_or', + help='Level name to load') + parser.add_argument( + '--observation', type=str, default='RGB', help='Observation to render') + parser.add_argument( + '--settings', type=json.loads, default={}, help='Settings as JSON string') + # Activate verbose mode with --verbose=True. + parser.add_argument( + '--verbose', type=bool, default=False, help='Print debug information') + # Activate events printing mode with --print_events=True. + parser.add_argument( + '--print_events', type=bool, default=False, help='Print events') + + args = parser.parse_args() + env_module = environment_configs[args.level_name] + env_config = env_module.get_config() + with config_dict.ConfigDict(env_config).unlocked() as env_config: + roles = env_config.default_player_roles + env_config.lab2d_settings = env_module.build(roles, env_config) + level_playing_utils.run_episode( + args.observation, args.settings, _ACTION_MAP, + env_config, level_playing_utils.RenderType.PYGAME, + verbose_fn=verbose_fn if args.verbose else None, + print_events=args.print_events) + + +if __name__ == '__main__': + main() diff --git a/meltingpot/python/human_players/play_fruit_market.py b/meltingpot/python/human_players/play_fruit_market.py new file mode 100644 index 00000000..b225e52a --- /dev/null +++ b/meltingpot/python/human_players/play_fruit_market.py @@ -0,0 +1,130 @@ +# Copyright 2022 DeepMind Technologies Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""A human player for testing fruit_market. + +Note: The real agents can make and accept offers up to size 3 (up to 3 apples +for up to 3 bananas). However this human player script only allows offers up to +size 1. The reason is just that we started to run out of keys on the keyboard to +represent higher offers. + +Use `WASD` keys to move the player around. +Use `Q and E` to turn the player. +Use `TAB` to switch which player you are controlling. +Use 'Z' to eat an apple from your inventory. +Use 'X' to eat a banana from your inventory. +""" +import argparse +import json +from ml_collections import config_dict +import pygame + +from meltingpot.python.configs.substrates import fruit_market__concentric_rivers +from meltingpot.python.human_players import level_playing_utils + + +def get_offer_apple_pressed() -> int: + """Sets apple offer to either -1, 0, or 1.""" + key_pressed = pygame.key.get_pressed() + if key_pressed[pygame.K_1]: + return -1 + if key_pressed[pygame.K_2]: + return 1 + return 0 + + +def get_offer_banana_pressed() -> int: + """Sets banana offer to either -1, 0, or 1.""" + key_pressed = pygame.key.get_pressed() + if key_pressed[pygame.K_3]: + return -1 + if key_pressed[pygame.K_4]: + return 1 + return 0 + + +def get_push_pull() -> int: + """Sets shove to either -1, 0, or 1.""" + if level_playing_utils.get_right_shift_pressed(): + return 1 + if level_playing_utils.get_left_control_pressed(): + return -1 + return 0 + +environment_configs = { + 'fruit_market__concentric_rivers': fruit_market__concentric_rivers, +} + +_ACTION_MAP = { + # Basic movement actions + 'move': level_playing_utils.get_direction_pressed, + 'turn': level_playing_utils.get_turn_pressed, + # Trade actions + 'eat_apple': level_playing_utils.get_key_z_pressed, + 'eat_banana': level_playing_utils.get_key_x_pressed, + 'offer_apple': get_offer_apple_pressed, # 1 and 2 + 'offer_banana': get_offer_banana_pressed, # 3 and 4 + 'offer_cancel': level_playing_utils.get_key_number_five_pressed, + # Grappling actions + 'hold': level_playing_utils.get_space_key_pressed, + 'shove': get_push_pull, +} + + +def verbose_fn(env_timestep, player_index): + """Print using this function once enabling the option --verbose=True.""" + lua_index = player_index + 1 + inventory = env_timestep.observation[f'{lua_index}.INVENTORY'] + hunger = env_timestep.observation[f'{lua_index}.HUNGER'] + my_offer = env_timestep.observation[f'{lua_index}.MY_OFFER'] + offers = env_timestep.observation[f'{lua_index}.OFFERS'] + # Only print offer observations from player 0. + if player_index == 0: + print( + f'player: {player_index} --- inventory: {inventory}, hunger: {hunger}') + print(f'**player 0 view of offers:\n{offers}') + print(f'**player 0 view of own offer: {my_offer}') + + +def main(): + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + '--level_name', + type=str, + default='fruit_market__concentric_rivers', + help='Level name to load') + parser.add_argument( + '--observation', type=str, default='RGB', help='Observation to render') + parser.add_argument( + '--settings', type=json.loads, default={}, help='Settings as JSON string') + # Activate verbose mode with --verbose=True. + parser.add_argument( + '--verbose', type=bool, default=False, help='Print debug information') + # Activate events printing mode with --print_events=True. + parser.add_argument( + '--print_events', type=bool, default=False, help='Print events') + + args = parser.parse_args() + env_module = environment_configs[args.level_name] + env_config = env_module.get_config() + with config_dict.ConfigDict(env_config).unlocked() as env_config: + roles = env_config.default_player_roles + env_config.lab2d_settings = env_module.build(roles, env_config) + level_playing_utils.run_episode( + args.observation, args.settings, _ACTION_MAP, + env_config, level_playing_utils.RenderType.PYGAME, + verbose_fn=verbose_fn if args.verbose else None, + print_events=args.print_events) + +if __name__ == '__main__': + main() diff --git a/meltingpot/python/human_players/play_grid_land.py b/meltingpot/python/human_players/play_gift_refinements.py similarity index 68% rename from meltingpot/python/human_players/play_grid_land.py rename to meltingpot/python/human_players/play_gift_refinements.py index 67a126b1..f0637852 100644 --- a/meltingpot/python/human_players/play_grid_land.py +++ b/meltingpot/python/human_players/play_gift_refinements.py @@ -1,4 +1,4 @@ -# Copyright 2020 DeepMind Technologies Limited. +# Copyright 2022 DeepMind Technologies Limited. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,32 +11,32 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""A simple human player for testing the `grid_land` (chemistry) levels. +"""A simple human player for testing `gift_refinements`. Use `WASD` keys to move the character around. Use `Q and E` to turn the character. -Use `SPACE` to use the IO action (swap contents of stomach and ground). +Use `SPACE` to fire the gift beam. +Use `1` to consume tokens. Use `TAB` to switch between players. """ import argparse import json +from ml_collections import config_dict -from meltingpot.python.configs.substrates import chemistry_branched_chain_reaction as mp_chemistry_branched_chain_reaction -from meltingpot.python.configs.substrates import chemistry_metabolic_cycles as mp_chemistry_metabolic_cycles +from meltingpot.python.configs.substrates import gift_refinements from meltingpot.python.human_players import level_playing_utils environment_configs = { - 'mp_chemistry_branched_chain_reaction': - mp_chemistry_branched_chain_reaction, - 'mp_chemistry_metabolic_cycles': mp_chemistry_metabolic_cycles, + 'gift_refinements': gift_refinements, } _ACTION_MAP = { 'move': level_playing_utils.get_direction_pressed, 'turn': level_playing_utils.get_turn_pressed, - 'ioAction': level_playing_utils.get_space_key_pressed, + 'refineAndGift': level_playing_utils.get_space_key_pressed, + 'consumeTokens': level_playing_utils.get_key_number_one_pressed, } @@ -47,7 +47,7 @@ def verbose_fn(unused_env, unused_player_index): def main(): parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( - '--level_name', type=str, default='mp_chemistry_metabolic_cycles', + '--level_name', type=str, default='gift_refinements', help='Level name to load') parser.add_argument( '--observation', type=str, default='RGB', help='Observation to render') @@ -61,10 +61,14 @@ def main(): '--print_events', type=bool, default=False, help='Print events') args = parser.parse_args() - env_config = environment_configs[args.level_name] + env_module = environment_configs[args.level_name] + env_config = env_module.get_config() + with config_dict.ConfigDict(env_config).unlocked() as env_config: + roles = env_config.default_player_roles + env_config.lab2d_settings = env_module.build(roles, env_config) level_playing_utils.run_episode( args.observation, args.settings, _ACTION_MAP, - env_config.get_config(), level_playing_utils.RenderType.PYGAME, + env_config, level_playing_utils.RenderType.PYGAME, verbose_fn=verbose_fn if args.verbose else None, print_events=args.print_events) diff --git a/meltingpot/python/human_players/play_level_test.py b/meltingpot/python/human_players/play_level_test.py index 2367a744..3b3dd04f 100644 --- a/meltingpot/python/human_players/play_level_test.py +++ b/meltingpot/python/human_players/play_level_test.py @@ -1,4 +1,4 @@ -# Copyright 2020 DeepMind Technologies Limited. +# Copyright 2022 DeepMind Technologies Limited. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,135 +11,115 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Tests of the human_players levels.""" +"""Tests for human_players.""" -import collections from unittest import mock from absl.testing import absltest from absl.testing import parameterized -from dm_env import specs -import numpy as np +from ml_collections import config_dict import pygame -import dmlab2d -from meltingpot.python.configs.substrates import allelopathic_harvest as mp_allelopathic_harvest -from meltingpot.python.configs.substrates import arena_running_with_scissors_in_the_matrix as mp_arena_running_with_scissors_itm -from meltingpot.python.configs.substrates import bach_or_stravinsky_in_the_matrix as mp_bach_or_stravinsky_itm -from meltingpot.python.configs.substrates import capture_the_flag as mp_capture_the_flag -from meltingpot.python.configs.substrates import chemistry_metabolic_cycles as mp_chemistry_metabolic_cycles -from meltingpot.python.configs.substrates import chicken_in_the_matrix as mp_chicken_itm -from meltingpot.python.configs.substrates import clean_up as mp_clean_up -from meltingpot.python.configs.substrates import collaborative_cooking_passable as mp_collaborative_cooking_passable -from meltingpot.python.configs.substrates import commons_harvest_closed as mp_commons_harvest_closed -from meltingpot.python.configs.substrates import king_of_the_hill as mp_king_of_the_hill -from meltingpot.python.configs.substrates import prisoners_dilemma_in_the_matrix as mp_prisoners_dilemma_itm -from meltingpot.python.configs.substrates import pure_coordination_in_the_matrix as mp_pure_coordination_itm -from meltingpot.python.configs.substrates import rationalizable_coordination_in_the_matrix as mp_rationalizable_coordination_itm -from meltingpot.python.configs.substrates import running_with_scissors_in_the_matrix as mp_running_with_scissors_itm -from meltingpot.python.configs.substrates import stag_hunt_in_the_matrix as mp_stag_hunt_itm -from meltingpot.python.configs.substrates import territory_rooms as mp_territory_rooms +from meltingpot.python.configs.substrates import allelopathic_harvest__open +from meltingpot.python.configs.substrates import boat_race__eight_races +from meltingpot.python.configs.substrates import chemistry__three_metabolic_cycles +from meltingpot.python.configs.substrates import chemistry__three_metabolic_cycles_with_plentiful_distractors +from meltingpot.python.configs.substrates import chemistry__two_metabolic_cycles +from meltingpot.python.configs.substrates import chemistry__two_metabolic_cycles_with_distractors +from meltingpot.python.configs.substrates import clean_up +from meltingpot.python.configs.substrates import coins +from meltingpot.python.configs.substrates import collaborative_cooking__asymmetric +from meltingpot.python.configs.substrates import commons_harvest__closed +from meltingpot.python.configs.substrates import coop_mining +from meltingpot.python.configs.substrates import daycare +from meltingpot.python.configs.substrates import externality_mushrooms__dense +from meltingpot.python.configs.substrates import factory_commons__either_or +from meltingpot.python.configs.substrates import fruit_market__concentric_rivers +from meltingpot.python.configs.substrates import gift_refinements +from meltingpot.python.configs.substrates import paintball__capture_the_flag +from meltingpot.python.configs.substrates import paintball__king_of_the_hill +from meltingpot.python.configs.substrates import predator_prey__alley_hunt +from meltingpot.python.configs.substrates import predator_prey__orchard +from meltingpot.python.configs.substrates import prisoners_dilemma_in_the_matrix__arena +from meltingpot.python.configs.substrates import territory__rooms from meltingpot.python.human_players import level_playing_utils from meltingpot.python.human_players import play_allelopathic_harvest -from meltingpot.python.human_players import play_any_paintball_game from meltingpot.python.human_players import play_anything_in_the_matrix +from meltingpot.python.human_players import play_boat_race +from meltingpot.python.human_players import play_chemistry from meltingpot.python.human_players import play_clean_up +from meltingpot.python.human_players import play_coins from meltingpot.python.human_players import play_collaborative_cooking from meltingpot.python.human_players import play_commons_harvest -from meltingpot.python.human_players import play_grid_land +from meltingpot.python.human_players import play_coop_mining +from meltingpot.python.human_players import play_daycare +from meltingpot.python.human_players import play_externality_mushrooms +from meltingpot.python.human_players import play_factory_commons +from meltingpot.python.human_players import play_fruit_market +from meltingpot.python.human_players import play_gift_refinements +from meltingpot.python.human_players import play_paintball +from meltingpot.python.human_players import play_predator_and_prey from meltingpot.python.human_players import play_territory -class HumanActionReaderTest(parameterized.TestCase): - - @parameterized.parameters( - ( - { # Capture the following key events, - 'move': level_playing_utils.get_direction_pressed, - }, # given this action name, key pressed, for this player index; and - pygame.K_w, '1', - # Expecting this action list out. - {'1.move': 1, '2.move': 0, '3.move': 0}, - ), ( - { # Capture the following key events, - 'move': level_playing_utils.get_direction_pressed, - }, # given this action name, key pressed, for this player index; and - pygame.K_s, '3', - # Expecting this action list out. - {'1.move': 0, '2.move': 0, '3.move': 3}, - ), ( - { # Capture the following key events, - 'move': level_playing_utils.get_direction_pressed, - }, # given this action name, key pressed, for this player index; and - pygame.K_s, '1', - # Expecting this action list out. - {'1.move': 3, '2.move': 0, '3.move': 0}, - ), ( - { # Capture the following key events, - 'move': level_playing_utils.get_direction_pressed, - }, # given action name, irrelevant key pressed, for player 0; and - pygame.K_x, '1', - # Expecting this action list out. - {'1.move': 0, '2.move': 0, '3.move': 0}, - ), ( - { # Capture the following key events (don't need to make sense), - 'move': level_playing_utils.get_space_key_pressed, - }, # given action name, irrelevant key pressed, for player 0; and - pygame.K_SPACE, '1', - # Expecting this action list out. - {'1.move': 1, '2.move': 0, '3.move': 0}, - ), - ) - @mock.patch.object(pygame, 'key') - def test_human_action(self, action_map, key_pressed, player_index, - expected_action, mock_key): - retval = collections.defaultdict(bool) - retval[key_pressed] = True - mock_key.get_pressed.return_value = retval - - move_array = specs.BoundedArray( - shape=tuple(), dtype=np.intc, minimum=0, maximum=4, name='move') - action_spec = { - '1.move': move_array, - '2.move': move_array, - '3.move': move_array, - } - with mock.patch.object(dmlab2d, 'Lab2d') as env: - env.action_spec.return_value = action_spec - har = level_playing_utils.ActionReader(env, action_map) - np.testing.assert_array_equal(har.step(player_index), expected_action) - - class PlayLevelTest(parameterized.TestCase): - @parameterized.parameters( - (mp_allelopathic_harvest, play_allelopathic_harvest), - (mp_arena_running_with_scissors_itm, play_anything_in_the_matrix), - (mp_bach_or_stravinsky_itm, play_anything_in_the_matrix), - (mp_capture_the_flag, play_any_paintball_game), - (mp_chemistry_metabolic_cycles, play_grid_land), - (mp_chicken_itm, play_anything_in_the_matrix), - (mp_clean_up, play_clean_up), - (mp_collaborative_cooking_passable, play_collaborative_cooking), - (mp_commons_harvest_closed, play_commons_harvest), - (mp_king_of_the_hill, play_any_paintball_game), - (mp_prisoners_dilemma_itm, play_anything_in_the_matrix), - (mp_pure_coordination_itm, play_anything_in_the_matrix), - (mp_rationalizable_coordination_itm, play_anything_in_the_matrix), - (mp_running_with_scissors_itm, play_anything_in_the_matrix), - (mp_stag_hunt_itm, play_anything_in_the_matrix), - (mp_territory_rooms, play_territory), - ) + @parameterized.named_parameters( + ('allelopathic_harvest__open', allelopathic_harvest__open, + play_allelopathic_harvest), + ('boat_race__eight_races', boat_race__eight_races, play_boat_race), + ('chemistry__three_metabolic_cycles', chemistry__three_metabolic_cycles, + play_chemistry), + ('chemistry__three_metabolic_cycles_with_plentiful_distractors', + chemistry__three_metabolic_cycles_with_plentiful_distractors, + play_chemistry), + ('chemistry__two_metabolic_cycles', chemistry__two_metabolic_cycles, + play_chemistry), + ('chemistry__two_metabolic_cycles_with_distractors', + chemistry__two_metabolic_cycles_with_distractors, play_chemistry), + ('clean_up', clean_up, play_clean_up), + ('coins', coins, play_coins), + ('collaborative_cooking__asymmetric', collaborative_cooking__asymmetric, + play_collaborative_cooking), + ('commons_harvest__closed', commons_harvest__closed, + play_commons_harvest), + ('coop_mining', coop_mining, play_coop_mining), + ('daycare', daycare, play_daycare), + ('externality_mushrooms__dense', externality_mushrooms__dense, + play_externality_mushrooms), + ('factory_commons__either_or', factory_commons__either_or, + play_factory_commons), + ('fruit_market__concentric_rivers', fruit_market__concentric_rivers, + play_fruit_market), + ('gift_refinements', gift_refinements, play_gift_refinements), + ('paintball__capture_the_flag', paintball__capture_the_flag, + play_paintball), + ('paintball__king_of_the_hill', paintball__king_of_the_hill, + play_paintball), + ('predator_prey__alley_hunt', predator_prey__alley_hunt, + play_predator_and_prey), + ('predator_prey__orchard', predator_prey__orchard, + play_predator_and_prey), + ('prisoners_dilemma_in_the_matrix__arena', + prisoners_dilemma_in_the_matrix__arena, play_anything_in_the_matrix), + ('territory__rooms', territory__rooms, play_territory), + ) @mock.patch.object(pygame, 'key') @mock.patch.object(pygame, 'display') @mock.patch.object(pygame, 'event') @mock.patch.object(pygame, 'time') def test_run_level( self, config_module, play_module, unused_k, unused_d, unused_e, unused_t): - full_config = config_module.get_config() - full_config['lab2d_settings']['maxEpisodeLengthFrames'] = 10 + env_module = config_module + env_config = env_module.get_config() + + with config_dict.ConfigDict(env_config).unlocked() as env_config: + roles = env_config.default_player_roles + env_config.lab2d_settings = env_module.build(roles, env_config) + + env_config['lab2d_settings']['maxEpisodeLengthFrames'] = 10 level_playing_utils.run_episode( - 'RGB', {}, play_module._ACTION_MAP, full_config) + 'RGB', {}, play_module._ACTION_MAP, env_config) if __name__ == '__main__': diff --git a/meltingpot/python/human_players/play_paintball.py b/meltingpot/python/human_players/play_paintball.py new file mode 100644 index 00000000..161f59aa --- /dev/null +++ b/meltingpot/python/human_players/play_paintball.py @@ -0,0 +1,86 @@ +# Copyright 2022 DeepMind Technologies Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""A simple human player for testing any `paintball__*` substrate. + +Use `WASD` keys to move the character around. +Use `Q and E` to turn the character. +Use `SPACE` to fire the zapper. +Use `TAB` to switch between players. +""" + +import argparse +import json +from ml_collections import config_dict + +from meltingpot.python.configs.substrates import paintball__capture_the_flag +from meltingpot.python.configs.substrates import paintball__king_of_the_hill +from meltingpot.python.human_players import level_playing_utils + + +def get_zap() -> int: + """Sets zap to either 0, 1, or 2.""" + if level_playing_utils.get_right_shift_pressed(): + return 2 + if level_playing_utils.get_space_key_pressed(): + return 1 + return 0 + + +environment_configs = { + 'paintball__capture_the_flag': paintball__capture_the_flag, + 'paintball__king_of_the_hill': paintball__king_of_the_hill, +} + +_ACTION_MAP = { + 'move': level_playing_utils.get_direction_pressed, + 'turn': level_playing_utils.get_turn_pressed, + 'fireZap': get_zap, +} + + +def verbose_fn(unused_env, unused_player_index): + pass + + +def main(): + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + '--level_name', type=str, default='paintball__capture_the_flag', + help='Level name to load') + parser.add_argument( + '--observation', type=str, default='RGB', help='Observation to render') + parser.add_argument( + '--settings', type=json.loads, default={}, help='Settings as JSON string') + # Activate verbose mode with --verbose=True. + parser.add_argument( + '--verbose', type=bool, default=False, help='Print debug information') + # Activate events printing mode with --print_events=True. + parser.add_argument( + '--print_events', type=bool, default=False, help='Print events') + + args = parser.parse_args() + env_module = environment_configs[args.level_name] + env_config = env_module.get_config() + with config_dict.ConfigDict(env_config).unlocked() as env_config: + roles = env_config.default_player_roles + env_config.lab2d_settings = env_module.build(roles, env_config) + level_playing_utils.run_episode( + args.observation, args.settings, _ACTION_MAP, + env_config, level_playing_utils.RenderType.PYGAME, + verbose_fn=verbose_fn if args.verbose else None, + print_events=args.print_events) + + +if __name__ == '__main__': + main() diff --git a/meltingpot/python/human_players/play_predator_and_prey.py b/meltingpot/python/human_players/play_predator_and_prey.py new file mode 100644 index 00000000..c879a0f4 --- /dev/null +++ b/meltingpot/python/human_players/play_predator_and_prey.py @@ -0,0 +1,94 @@ +# Copyright 2022 DeepMind Technologies Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""A simple human player for testing the `predator_prey__*` substrates. + +Use `WASD` keys to move the character around. +Use `Q and E` to turn the character. +Use `TAB` to switch between players. +Use `space bar` to select the 'eat' (i.e. the `interact` action). +""" + +import argparse +import json + +from ml_collections import config_dict + +from meltingpot.python.configs.substrates import predator_prey__alley_hunt +from meltingpot.python.configs.substrates import predator_prey__open +from meltingpot.python.configs.substrates import predator_prey__orchard +from meltingpot.python.configs.substrates import predator_prey__random_forest +from meltingpot.python.human_players import level_playing_utils + + +MAX_SCREEN_WIDTH = 800 +MAX_SCREEN_HEIGHT = 600 +FRAMES_PER_SECOND = 8 + +environment_configs = { + 'predator_prey__alley_hunt': predator_prey__alley_hunt, + 'predator_prey__open': predator_prey__open, + 'predator_prey__orchard': predator_prey__orchard, + 'predator_prey__random_forest': predator_prey__random_forest, +} + +_ACTION_MAP = { + 'move': level_playing_utils.get_direction_pressed, + 'turn': level_playing_utils.get_turn_pressed, + # 'interact' is the 'eat' action for this substrate. + 'interact': level_playing_utils.get_space_key_pressed, +} + + +def verbose_fn(env_timestep, player_index): + """Print using this function once enabling the option --verbose=True.""" + lua_index = player_index + 1 + stamina = env_timestep.observation[f'{lua_index}.STAMINA'] + # Only print observations from player 0. + if player_index == 0: + print(f'player: {player_index} --- stamina: {stamina}') + + +def main(): + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + '--level_name', type=str, + default='magi_suite_predator_prey__alley_hunt', + help='Level name to load') + parser.add_argument( + '--observation', type=str, default='RGB', help='Observation to render') + parser.add_argument( + '--settings', type=json.loads, default={}, help='Settings as JSON string') + # Activate verbose mode with --verbose=True. + parser.add_argument( + '--verbose', type=bool, default=False, help='Print debug information') + # Activate events printing mode with --print_events=True. + parser.add_argument( + '--print_events', type=bool, default=False, help='Print events') + + args = parser.parse_args() + env_module = environment_configs[args.level_name] + env_config = env_module.get_config() + with config_dict.ConfigDict(env_config).unlocked() as env_config: + roles = env_config.default_player_roles + env_config.lab2d_settings = env_module.build(roles, env_config) + level_playing_utils.run_episode( + args.observation, args.settings, _ACTION_MAP, env_config, + level_playing_utils.RenderType.PYGAME, MAX_SCREEN_WIDTH, + MAX_SCREEN_HEIGHT, FRAMES_PER_SECOND, + verbose_fn if args.verbose else None, + print_events=args.print_events) + + +if __name__ == '__main__': + main() diff --git a/meltingpot/python/human_players/play_territory.py b/meltingpot/python/human_players/play_territory.py index 6245bf22..4ebc1689 100644 --- a/meltingpot/python/human_players/play_territory.py +++ b/meltingpot/python/human_players/play_territory.py @@ -1,4 +1,4 @@ -# Copyright 2020 DeepMind Technologies Limited. +# Copyright 2022 DeepMind Technologies Limited. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -22,14 +22,18 @@ import argparse import json -from meltingpot.python.configs.substrates import territory_open as mp_territory_open -from meltingpot.python.configs.substrates import territory_rooms as mp_territory_rooms +from ml_collections import config_dict + +from meltingpot.python.configs.substrates import territory__inside_out +from meltingpot.python.configs.substrates import territory__open +from meltingpot.python.configs.substrates import territory__rooms from meltingpot.python.human_players import level_playing_utils environment_configs = { - 'mp_territory_open': mp_territory_open, - 'mp_territory_rooms': mp_territory_rooms, + 'territory__open': territory__open, + 'territory__rooms': territory__rooms, + 'territory__inside_out': territory__inside_out, } _ACTION_MAP = { @@ -47,7 +51,7 @@ def verbose_fn(unused_env, unused_player_index): def main(): parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( - '--level_name', type=str, default='mp_territory_rooms', + '--level_name', type=str, default='territory__rooms', help='Level name to load') parser.add_argument( '--observation', type=str, default='RGB', help='Observation to render') @@ -61,10 +65,14 @@ def main(): '--print_events', type=bool, default=False, help='Print events') args = parser.parse_args() - env_config = environment_configs[args.level_name] + env_module = environment_configs[args.level_name] + env_config = env_module.get_config() + with config_dict.ConfigDict(env_config).unlocked() as env_config: + roles = env_config.default_player_roles + env_config.lab2d_settings = env_module.build(roles, env_config) level_playing_utils.run_episode( - args.observation, args.settings, _ACTION_MAP, env_config.get_config(), - level_playing_utils.RenderType.PYGAME, + args.observation, args.settings, _ACTION_MAP, + env_config, level_playing_utils.RenderType.PYGAME, verbose_fn=verbose_fn if args.verbose else None, print_events=args.print_events) diff --git a/meltingpot/python/scenario.py b/meltingpot/python/scenario.py index 6e1f25a6..0c8525d6 100644 --- a/meltingpot/python/scenario.py +++ b/meltingpot/python/scenario.py @@ -1,4 +1,4 @@ -# Copyright 2020 DeepMind Technologies Limited. +# Copyright 2022 DeepMind Technologies Limited. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,26 +14,29 @@ """Scenario factory.""" import collections -from collections.abc import Mapping, Set +from collections.abc import Collection, Mapping +from typing import Callable, Optional import immutabledict -from ml_collections import config_dict -from meltingpot.python import bot as bot_factory -from meltingpot.python import substrate as substrate_factory +from meltingpot.python import bot as mp_bot +from meltingpot.python import substrate as mp_substrate from meltingpot.python.configs import scenarios as scenario_configs -from meltingpot.python.utils.scenarios import scenario as scenario_lib -from meltingpot.python.utils.scenarios import substrate_transforms +from meltingpot.python.utils.scenarios import scenario +from meltingpot.python.utils.scenarios import scenario_factory +from meltingpot.python.utils.substrates import substrate as substrate_lib SCENARIOS = frozenset(scenario_configs.SCENARIO_CONFIGS) -AVAILABLE_SCENARIOS = SCENARIOS +SubstrateTransform = Callable[[substrate_lib.Substrate], + substrate_lib.Substrate] -def _scenarios_by_substrate() -> Mapping[str, Set[str]]: + +def _scenarios_by_substrate() -> Mapping[str, Collection[str]]: """Returns a mapping from substrates to their scenarios.""" scenarios_by_substrate = collections.defaultdict(list) - for scenario_name, config in scenario_configs.SCENARIO_CONFIGS.items(): - scenarios_by_substrate[config.substrate].append(scenario_name) + for name, config in scenario_configs.SCENARIO_CONFIGS.items(): + scenarios_by_substrate[config.substrate].append(name) return immutabledict.immutabledict({ substrate: frozenset(scenarios) for substrate, scenarios in scenarios_by_substrate.items() @@ -46,77 +49,84 @@ def _scenarios_by_substrate() -> Mapping[str, Set[str]]: # The primary visual input. 'RGB', # Extra observations used in some substrates. + 'HUNGER', 'INVENTORY', + 'MY_OFFER', + 'OFFERS', 'READY_TO_SHOOT', + 'STAMINA', }) -def get_config(scenario_name: str) -> config_dict.ConfigDict: - """Returns a config for the specified scenario. +def get_config(name: str) -> scenario_configs.ScenarioConfig: + """Returns the config for the specified scenario.""" + return scenario_configs.SCENARIO_CONFIGS[name] + + +def build( + name: str, + *, + substrate_transform: Optional[SubstrateTransform] = None, +) -> scenario.Scenario: + """Builds an instance of the specified scenario. Args: - scenario_name: Name of the scenario. Must be in AVAILABLE_SCENARIOS. + name: the scenario. + substrate_transform: optional transform to apply to underlying substrate. + This is intended for training purposes and should not be used during + evaluation. If applied, the observations will not be restricted to + PERMITTED_OBSERVATIONS. + + Returns: + The test scenario. """ - if scenario_name not in AVAILABLE_SCENARIOS: - raise ValueError(f'Unknown scenario {scenario_name!r}') - scenario = scenario_configs.SCENARIO_CONFIGS[scenario_name] - substrate = substrate_factory.get_config(scenario.substrate) - bots = { - name: bot_factory.get_config(name) - for name in set().union(*scenario.bots_by_role.values()) - } - focal_player_roles = tuple( - role for n, role in enumerate(scenario.roles) if scenario.is_focal[n] - ) - focal_timestep_spec = substrate.timestep_spec._replace( - observation=immutabledict.immutabledict({ - key: spec for key, spec in substrate.timestep_spec.observation.items() - if key in PERMITTED_OBSERVATIONS - }), - ) - config = config_dict.create( - substrate=substrate, - roles=scenario.roles, - is_focal=scenario.is_focal, - num_players=sum(scenario.is_focal), - bots=bots, - bots_by_role=scenario.bots_by_role, - substrate_transform=None, - permitted_observations=set(PERMITTED_OBSERVATIONS), - focal_player_roles=focal_player_roles, - timestep_spec=focal_timestep_spec, - action_spec=substrate.action_spec, - ) - return config.lock() + config = get_config(name) + return build_from_config(config, substrate_transform=substrate_transform) -def build(config: config_dict.ConfigDict) -> scenario_lib.Scenario: - """Builds a scenario for the given config. +def build_from_config( + config: scenario_configs.ScenarioConfig, + *, + substrate_transform: Optional[SubstrateTransform] = None, +) -> scenario.Scenario: + """Builds a scenario from the provided config. Args: - config: config resulting from `get_config`. + config: bot config + substrate_transform: optional transform to apply to underlying substrate. + This is intended for training purposes and should not be used during + evaluation. If applied, the observations will not be restricted to + PERMITTED_OBSERVATIONS. Returns: The test scenario. """ - # TODO(b/227143834): pass roles to substrate when building. - substrate = substrate_factory.build(config.substrate) - if config.substrate_transform: - substrate = config.substrate_transform(substrate) - permitted_observations = set(substrate.observation_spec()[0]) - if not config.substrate_transform: - permitted_observations &= config.permitted_observations - # Add observations needed by some bots. These are removed for focal players. - # TODO(b/258239516): remove this wrapper in a future release. - substrate = substrate_transforms.with_tf1_bot_required_observations(substrate) + factory = get_factory_from_config(config) + if substrate_transform is None: + return factory.build() + else: + return factory.build_transformed(substrate_transform) + + +def get_factory(name: str) -> scenario_factory.ScenarioFactory: + """Returns the factory for the specified scenario.""" + config = scenario_configs.SCENARIO_CONFIGS[name] + return get_factory_from_config(config) + + +def get_factory_from_config( + config: scenario_configs.ScenarioConfig, +) -> scenario_factory.ScenarioFactory: + """Returns a factory from the provided config.""" + substrate = mp_substrate.get_factory(config.substrate) bots = { - bot_name: bot_factory.build(bot_config) - for bot_name, bot_config in config.bots.items() + name: mp_bot.get_factory(name) + for name in set().union(*config.bots_by_role.values()) } - return scenario_lib.build_scenario( + return scenario_factory.ScenarioFactory( substrate=substrate, + roles=config.roles, bots=bots, bots_by_role=config.bots_by_role, - roles=config.roles, is_focal=config.is_focal, - permitted_observations=permitted_observations) + permitted_observations=PERMITTED_OBSERVATIONS) diff --git a/meltingpot/python/scenario_test.py b/meltingpot/python/scenario_test.py index eb769ee3..2aeb6f6e 100644 --- a/meltingpot/python/scenario_test.py +++ b/meltingpot/python/scenario_test.py @@ -16,22 +16,21 @@ from absl.testing import absltest from absl.testing import parameterized -from meltingpot.python import scenario as scenario_factory +from meltingpot.python import scenario from meltingpot.python.testing import substrates as test_utils -@parameterized.named_parameters( - (name, name) for name in scenario_factory.AVAILABLE_SCENARIOS) +@parameterized.named_parameters((name, name) for name in scenario.SCENARIOS) class ScenarioTest(test_utils.SubstrateTestCase): def test_scenario(self, name): - config = scenario_factory.get_config(name) - action_spec = [config.action_spec] * config.num_players - discount_spec = config.timestep_spec.discount - reward_spec = [config.timestep_spec.reward] * config.num_players - observation_spec = [ - dict(config.timestep_spec.observation)] * config.num_players - with scenario_factory.build(config) as env: + factory = scenario.get_factory(name) + num_players = factory.num_focal_players() + action_spec = [factory.action_spec()] * num_players + reward_spec = [factory.timestep_spec().reward] * num_players + discount_spec = factory.timestep_spec().discount + observation_spec = [factory.timestep_spec().observation] * num_players + with factory.build() as env: with self.subTest('step'): self.assert_step_matches_specs(env) with self.subTest('discount_spec'): @@ -43,9 +42,8 @@ def test_scenario(self, name): with self.subTest('observation_spec'): self.assertSequenceEqual(env.observation_spec(), observation_spec) with self.subTest('only_permitted'): - self.assertContainsSubset( - config.timestep_spec.observation, - scenario_factory.PERMITTED_OBSERVATIONS) + self.assertContainsSubset(factory.timestep_spec().observation, + scenario.PERMITTED_OBSERVATIONS) if __name__ == '__main__': diff --git a/meltingpot/python/substrate.py b/meltingpot/python/substrate.py index 67459650..7a3fd011 100644 --- a/meltingpot/python/substrate.py +++ b/meltingpot/python/substrate.py @@ -1,4 +1,4 @@ -# Copyright 2020 DeepMind Technologies Limited. +# Copyright 2022 DeepMind Technologies Limited. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,37 +13,73 @@ # limitations under the License. """Substrate builder.""" +from collections.abc import Sequence + from ml_collections import config_dict from meltingpot.python.configs import substrates as substrate_configs from meltingpot.python.utils.substrates import substrate +from meltingpot.python.utils.substrates import substrate_factory SUBSTRATES = substrate_configs.SUBSTRATES -AVAILABLE_SUBSTRATES = SUBSTRATES -def get_config(substrate_name: str) -> config_dict.ConfigDict: - """Returns the configs for the substrate. +def get_config(name: str) -> config_dict.ConfigDict: + """Returns the configs for the specified substrate.""" + return substrate_configs.get_config(name).lock() + + +def build(name: str, *, roles: Sequence[str]) -> substrate.Substrate: + """Builds an instance of the specified substrate. Args: - substrate_name: name of the substrate. Must be in AVAILABLE_SUBSTRATES. + name: name of the substrate. + roles: sequence of strings defining each player's role. The length of + this sequence determines the number of players. + + Returns: + The training substrate. """ - if substrate_name not in AVAILABLE_SUBSTRATES: - raise ValueError(f'Unknown substrate {substrate_name!r}.') - return substrate_configs.get_config(substrate_name).lock() + return get_factory(name).build(roles) -def build(config: config_dict.ConfigDict) -> substrate.Substrate: - """Builds the substrate given the config. +def build_from_config( + config: config_dict.ConfigDict, + *, + roles: Sequence[str], +) -> substrate.Substrate: + """Builds a substrate from the provided config. Args: config: config resulting from `get_config`. + roles: sequence of strings defining each player's role. The length of + this sequence determines the number of players. Returns: The training substrate. """ - return substrate.build_substrate( - lab2d_settings=config.lab2d_settings, + return get_factory_from_config(config).build(roles) + + +def get_factory(name: str) -> substrate_factory.SubstrateFactory: + """Returns the factory for the specified substrate.""" + config = substrate_configs.get_config(name) + return get_factory_from_config(config) + + +def get_factory_from_config( + config: config_dict.ConfigDict) -> substrate_factory.SubstrateFactory: + """Returns a factory from the provided config.""" + + def lab2d_settings_builder(roles): + return config.lab2d_settings_builder(roles=roles, config=config) + + return substrate_factory.SubstrateFactory( + lab2d_settings_builder=lab2d_settings_builder, individual_observations=config.individual_observation_names, global_observations=config.global_observation_names, - action_table=config.action_set) + action_table=config.action_set, + timestep_spec=config.timestep_spec, + action_spec=config.action_spec, + valid_roles=config.valid_roles, + default_player_roles=config.default_player_roles) diff --git a/meltingpot/python/substrate_test.py b/meltingpot/python/substrate_test.py index 9cefacca..7e9f9685 100644 --- a/meltingpot/python/substrate_test.py +++ b/meltingpot/python/substrate_test.py @@ -20,18 +20,17 @@ from meltingpot.python.testing import substrates as test_utils -@parameterized.named_parameters( - (name, name) for name in substrate.AVAILABLE_SUBSTRATES) -class SubstrateTestCase(test_utils.SubstrateTestCase): +@parameterized.named_parameters((name, name) for name in substrate.SUBSTRATES) +class PerSubstrateTestCase(test_utils.SubstrateTestCase): def test_substrate(self, name): - config = substrate.get_config(name) - action_spec = [config.action_spec] * config.num_players - discount_spec = config.timestep_spec.discount - reward_spec = [config.timestep_spec.reward] * config.num_players - observation_spec = [ - dict(config.timestep_spec.observation)] * config.num_players - with substrate.build(config) as env: + factory = substrate.get_factory(name) + roles = factory.default_player_roles() + action_spec = [factory.action_spec()] * len(roles) + reward_spec = [factory.timestep_spec().reward] * len(roles) + discount_spec = factory.timestep_spec().discount + observation_spec = [dict(factory.timestep_spec().observation)] * len(roles) + with factory.build(roles) as env: with self.subTest('step'): self.assert_step_matches_specs(env) with self.subTest('discount_spec'): diff --git a/meltingpot/python/utils/policies/saved_model_policy.py b/meltingpot/python/utils/policies/saved_model_policy.py index 734abbc3..e51430b3 100644 --- a/meltingpot/python/utils/policies/saved_model_policy.py +++ b/meltingpot/python/utils/policies/saved_model_policy.py @@ -1,4 +1,4 @@ -# Copyright 2020 DeepMind Technologies Limited. +# Copyright 2022 DeepMind Technologies Limited. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,7 +14,7 @@ """Policy from a Saved Model.""" import contextlib -from typing import Mapping, Tuple, TypeVar +import random import dm_env import numpy as np @@ -24,16 +24,40 @@ from meltingpot.python.utils.policies import permissive_model from meltingpot.python.utils.policies import policy -State = TypeVar('State') + +def _numpy_to_placeholder( + template: tree.Structure[np.ndarray]) -> tree.Structure[tf.Tensor]: + """Returns placeholders that matches a given template. + + Args: + template: template numpy arrays. + + Returns: + A tree of placeholders matching the template arrays' specs. + """ + fn = lambda x: tf.compat.v1.placeholder(shape=x.shape, dtype=x.dtype) + return tree.map_structure(fn, template) + + +def _downcast(x): + """Downcasts input to 32-bit precision.""" + if not isinstance(x, np.ndarray): + return x + elif x.dtype == np.float64: + return np.asarray(x, dtype=np.float32) + elif x.dtype == np.int64: + return np.asarray(x, dtype=np.int32) + else: + return x class TF2SavedModelPolicy(policy.Policy[tree.Structure[tf.Tensor]]): """Policy wrapping a saved model for TF2 inference. Note: the model should have methods: - 1. `initial_state(batch_size, trainable)` - 2. `step(step_type, reward, discount, observation, prev_state)` - that accept batched inputs and produce batched outputs. + 1. `initial_state(random_key)` + 2. `step(key, timestep, prev_state)` + that accept unbatched inputs. """ def __init__(self, model_path: str, device_name: str = 'cpu') -> None: @@ -53,53 +77,29 @@ def step( self, timestep: dm_env.TimeStep, prev_state: tree.Structure[tf.Tensor], - ) -> Tuple[int, tree.Structure[tf.Tensor]]: + ) -> tuple[int, tree.Structure[tf.Tensor]]: """See base class.""" - step_type = np.array(timestep.step_type, dtype=np.int64)[None] - reward = np.asarray(timestep.reward, dtype=np.float32)[None] - discount = np.asarray(timestep.discount, dtype=np.float32)[None] - observation = tree.map_structure(lambda x: x[None], timestep.observation) - output, next_state = self._strategy.run( - fn=self._model.step, - kwargs=dict( - step_type=step_type, - reward=reward, - discount=discount, - observation=observation, - prev_state=prev_state, - ), + prev_key, prev_state = prev_state + timestep = timestep._replace( + step_type=int(timestep.step_type), + observation=tree.map_structure(_downcast, timestep.observation), ) - if isinstance(output.action, Mapping): - # Legacy bots trained with older action spec. - action = output.action['environment_action'] - else: - action = output.action - action = int(action.numpy()[0]) - return action, next_state + next_key, outputs = self._strategy.run( + self._model.step, [prev_key, timestep, prev_state]) + (action, _), next_state = outputs + return int(action.numpy()), (next_key, next_state) def initial_state(self) -> tree.Structure[tf.Tensor]: """See base class.""" - return self._strategy.run( - fn=self._model.initial_state, kwargs=dict(batch_size=1, trainable=None)) + random_seed = random.getrandbits(32) + seed_key = np.array([0, random_seed], dtype=np.uint32) + key, state = self._strategy.run(self._model.initial_state, [seed_key]) + return key, state def close(self) -> None: """See base class.""" -def _numpy_to_placeholder( - template: tree.Structure[np.ndarray]) -> tree.Structure[tf.Tensor]: - """Returns placeholders that matches a given template. - - Args: - template: template numpy arrays. - - Returns: - A tree of placeholders matching the template arrays' specs. - """ - fn = lambda x: tf.compat.v1.placeholder(shape=x.shape, dtype=x.dtype) - return tree.map_structure(fn, template) - - class TF1SavedModelPolicy(policy.Policy[tree.Structure[np.ndarray]]): """Policy wrapping a saved model for TF1 inference. @@ -138,8 +138,9 @@ def _build_context(self): def _build_initial_state_graph(self) -> None: """Builds the TF1 subgraph for the initial_state operation.""" with self._build_context(): - self._initial_state_outputs = self._model.initial_state( - batch_size=1, trainable=None) + key_in = tf.compat.v1.placeholder(shape=[2], dtype=np.uint32) + self._initial_state_outputs = self._model.initial_state(key_in) + self._initial_state_input = key_in def _build_step_graph(self, timestep, prev_state) -> None: """Builds the TF1 subgraph for the step operation. @@ -152,37 +153,33 @@ def _build_step_graph(self, timestep, prev_state) -> None: self._build_initial_state_graph() with self._build_context(): - step_type_in = tf.compat.v1.placeholder(shape=[], dtype=np.int64) + step_type_in = tf.compat.v1.placeholder(shape=[], dtype=np.int32) reward_in = tf.compat.v1.placeholder(shape=[], dtype=np.float32) discount_in = tf.compat.v1.placeholder(shape=[], dtype=np.float32) observation_in = _numpy_to_placeholder(timestep.observation) - prev_state_in = _numpy_to_placeholder(prev_state) - output, next_state = self._model.step( - step_type=step_type_in[None], - reward=reward_in[None], - discount=discount_in[None], - observation=tree.map_structure(lambda x: x[None], observation_in), - prev_state=prev_state_in) - if isinstance(output.action, Mapping): - # Legacy bots trained with older action spec. - action = output.action['environment_action'][0] - else: - action = output.action[0] - - timestep_in = dm_env.TimeStep( - step_type=step_type_in, - reward=reward_in, - discount=discount_in, - observation=observation_in) - self._step_inputs = tree.flatten([timestep_in, prev_state_in]) - self._step_outputs = (action, next_state) + timestep_in = dm_env.TimeStep( + step_type=step_type_in, + reward=reward_in, + discount=discount_in, + observation=observation_in) + prev_key_in, prev_state_in = _numpy_to_placeholder(prev_state) + next_key, outputs = self._model.step(prev_key_in, timestep_in, + prev_state_in) + (action, _), next_state = outputs + self._step_inputs = tree.flatten( + [timestep_in, (prev_key_in, prev_state_in)]) + self._step_outputs = (action, (next_key, next_state)) self._graph.finalize() def step( self, timestep: dm_env.TimeStep, prev_state: tree.Structure[np.ndarray] - ) -> Tuple[int, tree.Structure[np.ndarray]]: + ) -> tuple[int, tree.Structure[np.ndarray]]: """See base class.""" + timestep = timestep._replace( + step_type=int(timestep.step_type), + observation=tree.map_structure(_downcast, timestep.observation), + ) if not self._step_inputs: self._build_step_graph(timestep, prev_state) input_values = tree.flatten([timestep, prev_state]) @@ -194,7 +191,10 @@ def initial_state(self) -> tree.Structure[np.ndarray]: """See base class.""" if not self._initial_state_outputs: self._build_initial_state_graph() - return self._session.run(self._initial_state_outputs) + random_seed = random.getrandbits(32) + seed_key = np.array([0, random_seed], dtype=np.uint32) + feed_dict = {self._initial_state_input: seed_key} + return self._session.run(self._initial_state_outputs, feed_dict) def close(self) -> None: """See base class.""" diff --git a/meltingpot/python/utils/policies/saved_model_policy_v2.py b/meltingpot/python/utils/policies/saved_model_policy_v2.py deleted file mode 100644 index 3938b99c..00000000 --- a/meltingpot/python/utils/policies/saved_model_policy_v2.py +++ /dev/null @@ -1,210 +0,0 @@ -# Copyright 2022 DeepMind Technologies Limited. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Policy built from newer Saved Models.""" - -import contextlib -import random -from typing import Tuple - -import dm_env -import numpy as np -import tensorflow as tf -import tree - -from meltingpot.python.utils.policies import permissive_model -from meltingpot.python.utils.policies import policy - - -def _numpy_to_placeholder( - template: tree.Structure[np.ndarray]) -> tree.Structure[tf.Tensor]: - """Returns placeholders that matches a given template. - - Args: - template: template numpy arrays. - - Returns: - A tree of placeholders matching the template arrays' specs. - """ - fn = lambda x: tf.compat.v1.placeholder(shape=x.shape, dtype=x.dtype) - return tree.map_structure(fn, template) - - -def _downcast(x): - """Downcasts input to 32-bit precision.""" - if not isinstance(x, np.ndarray): - return x - elif x.dtype == np.float64: - return np.asarray(x, dtype=np.float32) - elif x.dtype == np.int64: - return np.asarray(x, dtype=np.int32) - else: - return x - - -class TF2SavedModelPolicy(policy.Policy[tree.Structure[tf.Tensor]]): - """Policy wrapping a saved model for inference. - - Note: the model should have methods: - 1. `initial_state(random_key)` - 2. `step(key, timestep, prev_state)` - that accept unbatched inputs. - """ - - def __init__(self, model_path: str, device_name: str = 'cpu') -> None: - """Initialize a policy instance. - - Args: - model_path: Path to the SavedModel. - device_name: Device to load SavedModel onto. Defaults to a cpu device. - See tf.device for supported device names. - """ - self._strategy = tf.distribute.OneDeviceStrategy(device_name) - with self._strategy.scope(): - model = tf.saved_model.load(model_path) - self._model = permissive_model.PermissiveModel(model) - - def step( - self, - timestep: dm_env.TimeStep, - prev_state: tree.Structure[tf.Tensor], - ) -> Tuple[int, tree.Structure[tf.Tensor]]: - """See base class.""" - prev_key, prev_state = prev_state - timestep = timestep._replace( - step_type=int(timestep.step_type), - observation=tree.map_structure(_downcast, timestep.observation), - ) - next_key, outputs = self._strategy.run( - self._model.step, [prev_key, timestep, prev_state]) - (action, _), next_state = outputs - return int(action.numpy()), (next_key, next_state) - - def initial_state(self) -> tree.Structure[tf.Tensor]: - """See base class.""" - random_seed = random.getrandbits(32) - seed_key = np.array([0, random_seed], dtype=np.uint32) - key, state = self._strategy.run(self._model.initial_state, [seed_key]) - return key, state - - def close(self) -> None: - """See base class.""" - - -class TF1SavedModelPolicy(policy.Policy[tree.Structure[np.ndarray]]): - """Policy wrapping a saved model for TF1 inference. - - Note: the model should have methods: - 1. `initial_state(batch_size, trainable)` - 2. `step(step_type, reward, discount, observation, prev_state)` - that accept batched inputs and produce batched outputs. - """ - - def __init__(self, model_path: str, device_name: str = 'cpu') -> None: - """Initialize a policy instance. - - Args: - model_path: Path to the SavedModel. - device_name: Device to load SavedModel onto. Defaults to a cpu device. See - tf.device for supported device names. - """ - self._device_name = device_name - self._graph = tf.compat.v1.Graph() - self._session = tf.compat.v1.Session(graph=self._graph) - - with self._build_context(): - model = tf.compat.v1.saved_model.load_v2(model_path) - self._model = permissive_model.PermissiveModel(model) - - self._initial_state_outputs = None - self._step_inputs = None - self._step_outputs = None - - @contextlib.contextmanager - def _build_context(self): - with self._graph.as_default(): - with tf.compat.v1.device(self._device_name): - yield - - def _build_initial_state_graph(self) -> None: - """Builds the TF1 subgraph for the initial_state operation.""" - with self._build_context(): - key_in = tf.compat.v1.placeholder(shape=[2], dtype=np.uint32) - self._initial_state_outputs = self._model.initial_state(key_in) - self._initial_state_input = key_in - - def _build_step_graph(self, timestep, prev_state) -> None: - """Builds the TF1 subgraph for the step operation. - - Args: - timestep: an example timestep. - prev_state: an example previous state. - """ - if not self._initial_state_outputs: - self._build_initial_state_graph() - - with self._build_context(): - step_type_in = tf.compat.v1.placeholder(shape=[], dtype=np.int32) - reward_in = tf.compat.v1.placeholder(shape=[], dtype=np.float32) - discount_in = tf.compat.v1.placeholder(shape=[], dtype=np.float32) - observation_in = _numpy_to_placeholder(timestep.observation) - timestep_in = dm_env.TimeStep( - step_type=step_type_in, - reward=reward_in, - discount=discount_in, - observation=observation_in) - prev_key_in, prev_state_in = _numpy_to_placeholder(prev_state) - next_key, outputs = self._model.step(prev_key_in, timestep_in, - prev_state_in) - (action, _), next_state = outputs - self._step_inputs = tree.flatten( - [timestep_in, (prev_key_in, prev_state_in)]) - self._step_outputs = (action, (next_key, next_state)) - - self._graph.finalize() - - def step( - self, - timestep: dm_env.TimeStep, - prev_state: tree.Structure[np.ndarray], - ) -> Tuple[int, tree.Structure[np.ndarray]]: - """See base class.""" - timestep = timestep._replace( - step_type=int(timestep.step_type), - observation=tree.map_structure(_downcast, timestep.observation), - ) - if not self._step_inputs: - self._build_step_graph(timestep, prev_state) - input_values = tree.flatten([timestep, prev_state]) - feed_dict = dict(zip(self._step_inputs, input_values)) - action, next_state = self._session.run(self._step_outputs, feed_dict) - return int(action), next_state - - def initial_state(self) -> tree.Structure[np.ndarray]: - """See base class.""" - if not self._initial_state_outputs: - self._build_initial_state_graph() - random_seed = random.getrandbits(32) - seed_key = np.array([0, random_seed], dtype=np.uint32) - feed_dict = {self._initial_state_input: seed_key} - return self._session.run(self._initial_state_outputs, feed_dict) - - def close(self) -> None: - """See base class.""" - self._session.close() - - -if tf.executing_eagerly(): - SavedModelPolicy = TF2SavedModelPolicy -else: - SavedModelPolicy = TF1SavedModelPolicy diff --git a/meltingpot/python/utils/puppeteers_v2/alternator.py b/meltingpot/python/utils/puppeteers/alternator.py similarity index 100% rename from meltingpot/python/utils/puppeteers_v2/alternator.py rename to meltingpot/python/utils/puppeteers/alternator.py diff --git a/meltingpot/python/utils/puppeteers_v2/alternator_test.py b/meltingpot/python/utils/puppeteers/alternator_test.py similarity index 94% rename from meltingpot/python/utils/puppeteers_v2/alternator_test.py rename to meltingpot/python/utils/puppeteers/alternator_test.py index e686a5ec..914bcc3f 100644 --- a/meltingpot/python/utils/puppeteers_v2/alternator_test.py +++ b/meltingpot/python/utils/puppeteers/alternator_test.py @@ -18,8 +18,8 @@ from absl.testing import absltest from absl.testing import parameterized -from meltingpot.python.utils.puppeteers_v2 import alternator -from meltingpot.python.utils.puppeteers_v2 import testutils +from meltingpot.python.utils.puppeteers import alternator +from meltingpot.python.utils.puppeteers import testutils _GOAL_A = mock.sentinel.goal_a _GOAL_B = mock.sentinel.goal_b diff --git a/meltingpot/python/utils/puppeteers/clean_up.py b/meltingpot/python/utils/puppeteers/clean_up.py index 00650e37..c6fa2f06 100644 --- a/meltingpot/python/utils/puppeteers/clean_up.py +++ b/meltingpot/python/utils/puppeteers/clean_up.py @@ -1,4 +1,4 @@ -# Copyright 2020 DeepMind Technologies Limited. +# Copyright 2022 DeepMind Technologies Limited. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,130 +11,131 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Puppeteers for clean_up puppets.""" +"""Puppeteers for clean_up.""" -from typing import Any, Mapping, Tuple +import dataclasses import dm_env -import numpy as np from meltingpot.python.utils.puppeteers import puppeteer -_GOALS = puppeteer.puppet_goals(['CLEAN', 'EAT'], dtype=np.float64) -_CLEAN_ACTION = 8 +@dataclasses.dataclass(frozen=True) +class ConditionalCleanerState: + """Current state of the ConditionalCleaner. -class AlternateCleanFirst(puppeteer.Puppeteer[int]): - """Alternates cleaning and eating goals, starting with cleaning.""" - - def initial_state(self) -> int: - """See base class.""" - return 0 # step count - - def _goal(self, step_count): - if step_count < 250: - return _GOALS['CLEAN'] - elif step_count < 500: - return _GOALS['EAT'] - elif step_count < 750: - return _GOALS['CLEAN'] - else: - return _GOALS['EAT'] - - def step(self, timestep: dm_env.TimeStep, - prev_state: int) -> Tuple[dm_env.TimeStep, int]: - """See base class.""" - if timestep.first(): - prev_state = self.initial_state() - goal = self._goal(prev_state) - next_state = prev_state + 1 - return puppeteer.puppet_timestep(timestep, goal), next_state + Attributes: + step_count: number of timesteps previously seen in this episode. + clean_until: step_count after which to stop cleaning. + recent_cleaning: number of others cleaning on previous timesteps (ordered + from oldest to most recent). + """ + step_count: int + clean_until: int + recent_cleaning: tuple[int, ...] + + +class ConditionalCleaner(puppeteer.Puppeteer[ConditionalCleanerState]): + """Puppeteer for a reciprocating agent. + + This puppeteer's behavior depends on the behavior of others. In particular, it + tracks the total amount of others' "cleaning", and integrates this signal + using a rolling window. + + Initially, the puppet will be in a "nice" mode where it will direct the + puppet to clean the river for a fixed period. Once this period is over, the + puppeteer will fall into a "eating" mode where it will direct the puppet to + only eat apples. However, once the total level of others' cleaning reaches a + threshold, the puppeteer will temporarily switch to a "cleaning" mode. Once + the total level of others' cleaning drops back below threshold, the puppeteer + will clean for fixed number of steps before falling back into the "eating" + mode. + """ + def __init__(self, + *, + clean_goal: puppeteer.PuppetGoal, + eat_goal: puppeteer.PuppetGoal, + coplayer_cleaning_signal: str, + recency_window: int, + threshold: int, + reciprocation_period: int, + niceness_period: int) -> None: + """Initializes the puppeteer. -class AlternateEatFirst(puppeteer.Puppeteer[int]): - """Alternates cleaning and eating goals, starting with eating.""" + Args: + clean_goal: goal to emit to puppet when "cleaning". + eat_goal: goal to emit to puppet when "eating". + coplayer_cleaning_signal: key in observations that provides the + privileged observation of number of others cleaning in the previous + timestep. + recency_window: number of steps over which to remember others' behavior. + threshold: if the total number of (nonunique) cleaners over the + remembered period reaches this threshold, the puppeteer will direct the + puppet to clean. + reciprocation_period: the number of steps to clean for once others' + cleaning has been forgotten and fallen back below threshold. + niceness_period: the number of steps to unconditionally clean for at + the start of the episode. + """ + self._clean_goal = clean_goal + self._eat_goal = eat_goal + self._coplayer_cleaning_signal = coplayer_cleaning_signal - def initial_state(self) -> int: - """See base class.""" - return 0 - - def _goal(self, step_count): - if step_count < 250: - return _GOALS['EAT'] - elif step_count < 500: - return _GOALS['CLEAN'] - elif step_count < 750: - return _GOALS['EAT'] + if threshold > 0: + self._threshold = threshold else: - return _GOALS['CLEAN'] - - def step(self, timestep: dm_env.TimeStep, - prev_state: int) -> Tuple[dm_env.TimeStep, int]: - """See base class.""" - if timestep.first(): - prev_state = self.initial_state() - goal = self._goal(prev_state) - next_state = prev_state + 1 - return puppeteer.puppet_timestep(timestep, goal), next_state - + raise ValueError('threshold must be positive') -class ConditionalCleaner(puppeteer.Puppeteer[Mapping[str, Any]]): - """Cleanup puppeteer for a reciprocating agent. - - Requires the agent_slot to be in the observations. - """ + if recency_window > 0: + self._recency_window = recency_window + else: + raise ValueError('recency_window must be positive') - def __init__(self, threshold: int) -> None: - """Initializes the puppeteer. + if reciprocation_period > 0: + self._reciprocation_period = reciprocation_period + else: + raise ValueError('reciprocation_period must be positive') - Args: - threshold: number of other cleaners below which it will switch to - cleaning. - """ - self._threshold = threshold + if niceness_period >= 0: + self._niceness_period = niceness_period + else: + raise ValueError('niceness_period must be nonnegative') - def initial_state(self) -> Mapping[str, Any]: + def initial_state(self) -> ConditionalCleanerState: """See base class.""" - return dict(step_count=0, clean_until=0, cleaning=None) + return ConditionalCleanerState( + step_count=0, clean_until=self._niceness_period, recent_cleaning=()) def step( - self, - timestep: dm_env.TimeStep, - prev_state: Mapping[str, Any], - ) -> Tuple[dm_env.TimeStep, Mapping[str, Any]]: + self, timestep: dm_env.TimeStep, prev_state: ConditionalCleanerState + ) -> tuple[dm_env.TimeStep, ConditionalCleanerState]: """See base class.""" if timestep.first(): prev_state = self.initial_state() - observation = timestep.observation - step_count = prev_state['step_count'] - clean_until = prev_state['clean_until'] - prev_cleaning = prev_state['cleaning'] - - not_me = 1 - observation['agent_slot'] - # Must have at least 1 other agent cleaning, then I'll help for a while. - near_river = (observation['global']['observations']['POSITION'][..., 1] < 9) - - # Smooth the cleaning binary vector across 2 timesteps. - cleaning = observation['global']['actions'] == _CLEAN_ACTION - if prev_cleaning is None: - prev_cleaning = cleaning - smooth_cleaning = np.logical_or(cleaning, prev_cleaning) - - # AND together the cleaning, the near river, and the negated identity - # vectors to figure out the number of other cleaners. Compare to threshold. - if np.logical_and(not_me, np.logical_and( - smooth_cleaning, near_river)).sum() >= self._threshold: - clean_until = step_count + 100 + step_count = prev_state.step_count + clean_until = prev_state.clean_until + recent_cleaning = prev_state.recent_cleaning + + coplayers_cleaning = int( + timestep.observation[self._coplayer_cleaning_signal]) + recent_cleaning += (coplayers_cleaning,) + recent_cleaning = recent_cleaning[-self._recency_window:] + + smooth_cleaning = sum(recent_cleaning) + if smooth_cleaning >= self._threshold: + clean_until = max(clean_until, step_count + self._reciprocation_period) + # Do not clear the recent_cleaning history after triggering. + # TODO(b/237058204): clear history in future versions. if step_count < clean_until: - goal = _GOALS['CLEAN'] + goal = self._clean_goal else: - goal = _GOALS['EAT'] + goal = self._eat_goal timestep = puppeteer.puppet_timestep(timestep, goal) - next_state = dict( + next_state = ConditionalCleanerState( step_count=step_count + 1, clean_until=clean_until, - cleaning=cleaning, - ) + recent_cleaning=recent_cleaning) return timestep, next_state diff --git a/meltingpot/python/utils/puppeteers_v2/clean_up_test.py b/meltingpot/python/utils/puppeteers/clean_up_test.py similarity index 98% rename from meltingpot/python/utils/puppeteers_v2/clean_up_test.py rename to meltingpot/python/utils/puppeteers/clean_up_test.py index cb75114e..6efb96a3 100644 --- a/meltingpot/python/utils/puppeteers_v2/clean_up_test.py +++ b/meltingpot/python/utils/puppeteers/clean_up_test.py @@ -18,8 +18,8 @@ from absl.testing import absltest from absl.testing import parameterized -from meltingpot.python.utils.puppeteers_v2 import clean_up -from meltingpot.python.utils.puppeteers_v2 import testutils +from meltingpot.python.utils.puppeteers import clean_up +from meltingpot.python.utils.puppeteers import testutils _NUM_COOPERATORS_KEY = 'num_cooperators' _COOPERATE = mock.sentinel.cooperate diff --git a/meltingpot/python/utils/puppeteers_v2/coins.py b/meltingpot/python/utils/puppeteers/coins.py similarity index 100% rename from meltingpot/python/utils/puppeteers_v2/coins.py rename to meltingpot/python/utils/puppeteers/coins.py diff --git a/meltingpot/python/utils/puppeteers_v2/coins_test.py b/meltingpot/python/utils/puppeteers/coins_test.py similarity index 98% rename from meltingpot/python/utils/puppeteers_v2/coins_test.py rename to meltingpot/python/utils/puppeteers/coins_test.py index b721a87c..936c5f64 100644 --- a/meltingpot/python/utils/puppeteers_v2/coins_test.py +++ b/meltingpot/python/utils/puppeteers/coins_test.py @@ -18,8 +18,8 @@ from absl.testing import absltest from absl.testing import parameterized -from meltingpot.python.utils.puppeteers_v2 import coins -from meltingpot.python.utils.puppeteers_v2 import testutils +from meltingpot.python.utils.puppeteers import coins +from meltingpot.python.utils.puppeteers import testutils _COOPERATE = mock.sentinel.cooperate _DEFECT = mock.sentinel.defect diff --git a/meltingpot/python/utils/puppeteers_v2/coordination_in_the_matrix.py b/meltingpot/python/utils/puppeteers/coordination_in_the_matrix.py similarity index 96% rename from meltingpot/python/utils/puppeteers_v2/coordination_in_the_matrix.py rename to meltingpot/python/utils/puppeteers/coordination_in_the_matrix.py index afd39a61..6859aca2 100644 --- a/meltingpot/python/utils/puppeteers_v2/coordination_in_the_matrix.py +++ b/meltingpot/python/utils/puppeteers/coordination_in_the_matrix.py @@ -15,7 +15,7 @@ from typing import Iterable -from meltingpot.python.utils.puppeteers_v2 import in_the_matrix +from meltingpot.python.utils.puppeteers import in_the_matrix class CoordinateWithPrevious(in_the_matrix.RespondToPrevious): diff --git a/meltingpot/python/utils/puppeteers_v2/coordination_in_the_matrix_test.py b/meltingpot/python/utils/puppeteers/coordination_in_the_matrix_test.py similarity index 93% rename from meltingpot/python/utils/puppeteers_v2/coordination_in_the_matrix_test.py rename to meltingpot/python/utils/puppeteers/coordination_in_the_matrix_test.py index d657a396..0f379f19 100644 --- a/meltingpot/python/utils/puppeteers_v2/coordination_in_the_matrix_test.py +++ b/meltingpot/python/utils/puppeteers/coordination_in_the_matrix_test.py @@ -20,9 +20,9 @@ from absl.testing import parameterized import numpy as np -from meltingpot.python.utils.puppeteers_v2 import coordination_in_the_matrix -from meltingpot.python.utils.puppeteers_v2 import in_the_matrix -from meltingpot.python.utils.puppeteers_v2 import testutils +from meltingpot.python.utils.puppeteers import coordination_in_the_matrix +from meltingpot.python.utils.puppeteers import in_the_matrix +from meltingpot.python.utils.puppeteers import testutils _RESOURCE_A = in_the_matrix.Resource( index=0, diff --git a/meltingpot/python/utils/puppeteers_v2/fixed_goal.py b/meltingpot/python/utils/puppeteers/fixed_goal.py similarity index 100% rename from meltingpot/python/utils/puppeteers_v2/fixed_goal.py rename to meltingpot/python/utils/puppeteers/fixed_goal.py diff --git a/meltingpot/python/utils/puppeteers_v2/fixed_goal_test.py b/meltingpot/python/utils/puppeteers/fixed_goal_test.py similarity index 90% rename from meltingpot/python/utils/puppeteers_v2/fixed_goal_test.py rename to meltingpot/python/utils/puppeteers/fixed_goal_test.py index 25f791f8..a1402977 100644 --- a/meltingpot/python/utils/puppeteers_v2/fixed_goal_test.py +++ b/meltingpot/python/utils/puppeteers/fixed_goal_test.py @@ -18,8 +18,8 @@ from absl.testing import absltest from absl.testing import parameterized -from meltingpot.python.utils.puppeteers_v2 import fixed_goal -from meltingpot.python.utils.puppeteers_v2 import testutils +from meltingpot.python.utils.puppeteers import fixed_goal +from meltingpot.python.utils.puppeteers import testutils class FixedGoalTest(parameterized.TestCase): diff --git a/meltingpot/python/utils/puppeteers_v2/gift_refinements.py b/meltingpot/python/utils/puppeteers/gift_refinements.py similarity index 100% rename from meltingpot/python/utils/puppeteers_v2/gift_refinements.py rename to meltingpot/python/utils/puppeteers/gift_refinements.py diff --git a/meltingpot/python/utils/puppeteers_v2/gift_refinements_test.py b/meltingpot/python/utils/puppeteers/gift_refinements_test.py similarity index 94% rename from meltingpot/python/utils/puppeteers_v2/gift_refinements_test.py rename to meltingpot/python/utils/puppeteers/gift_refinements_test.py index f67ae649..c6be1d1b 100644 --- a/meltingpot/python/utils/puppeteers_v2/gift_refinements_test.py +++ b/meltingpot/python/utils/puppeteers/gift_refinements_test.py @@ -18,8 +18,8 @@ from absl.testing import absltest from absl.testing import parameterized -from meltingpot.python.utils.puppeteers_v2 import gift_refinements -from meltingpot.python.utils.puppeteers_v2 import testutils +from meltingpot.python.utils.puppeteers import gift_refinements +from meltingpot.python.utils.puppeteers import testutils _COLLECT = mock.sentinel.collect _CONSUME = mock.sentinel.consume diff --git a/meltingpot/python/utils/puppeteers/in_the_matrix.py b/meltingpot/python/utils/puppeteers/in_the_matrix.py index 8eba8dcd..898da8a6 100644 --- a/meltingpot/python/utils/puppeteers/in_the_matrix.py +++ b/meltingpot/python/utils/puppeteers/in_the_matrix.py @@ -1,4 +1,4 @@ -# Copyright 2020 DeepMind Technologies Limited. +# Copyright 2022 DeepMind Technologies Limited. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,91 +11,580 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Puppeteers for puppet bots.""" +"""Puppeteers for *_in_the_matrix.""" -from typing import Tuple +from collections.abc import Mapping, Sequence +import dataclasses +import random +from typing import Optional, TypeVar import dm_env import numpy as np +import tree from meltingpot.python.utils.puppeteers import puppeteer -# Note: This assumes resource 0 is "good" and resource 1 is "bad". Thus: -# For PrisonersDilemma, resource 0 is `cooperate` and resource 1 is `defect`. -# For Stag hunt, resource 0 is `stag` and resource 1 is `hare`. -# For Chicken, resource 0 is `dove` and resource 1 is `hawk`. -_TWO_RESOURCE_GOALS = puppeteer.puppet_goals([ - 'COLLECT_COOPERATE', - 'COLLECT_DEFECT', - 'DESTROY_COOPERATE', - 'DESTROY_DEFECT', - 'INTERACT', -], dtype=np.float64) +State = TypeVar("State") +Observation = Mapping[str, tree.Structure[np.ndarray]] -class GrimTwoResource(puppeteer.Puppeteer[int]): - """Puppeteer function for a GRIM strategy in two resource *_in_the_matrix.""" +def get_inventory(timestep: dm_env.TimeStep) -> np.ndarray: + """Returns player's current inventory.""" + return timestep.observation["INVENTORY"] - def __init__(self, threshold: int) -> None: + +def get_partner_interaction_inventory( + timestep: dm_env.TimeStep) -> Optional[np.ndarray]: + """Returns the partner inventory from previous interaction.""" + _, partner_inventory = timestep.observation["INTERACTION_INVENTORIES"] + if np.all(partner_inventory < 0): + return None # No interaction occurred. + else: + return partner_inventory + + +def has_interaction(timestep: dm_env.TimeStep) -> bool: + """Returns True if the timestep contains an interaction.""" + return get_partner_interaction_inventory(timestep) is not None + + +def max_resource_and_margin(inventory: np.ndarray) -> tuple[int, int]: + """Returns the index of the maximum resource and the margin of its lead.""" + sorted_resources = np.argsort(inventory) + maximum_resource = sorted_resources[-1] + margin = ( + int(inventory[sorted_resources[-1]]) - + int(inventory[sorted_resources[-2]])) + return maximum_resource, margin + + +def has_collected_sufficient( + inventory: np.ndarray, + resource: int, + margin: int, +) -> bool: + """Returns True if a sufficient amount of the resource has been collected. + + Args: + inventory: the inventory of collected resources. + resource: the index of the resource being collected. + margin: the required margin for "sufficiency". + """ + max_resource, current_margin = max_resource_and_margin(inventory) + return max_resource == resource and current_margin >= margin + + +def partner_max_resource(timestep: dm_env.TimeStep) -> Optional[int]: + """Returns partner's maximum resource at previous interaction.""" + partner_inventory = get_partner_interaction_inventory(timestep) + if partner_inventory is None: + return None # No interaction occurred. + resource, margin = max_resource_and_margin(partner_inventory) + if margin == 0: + return None # Intent is unclear (no unique maximum). + else: + return resource + + +def tremble(tremble_probability: float): + """Returns True if the hand trembles.""" + return random.random() < tremble_probability + + +@dataclasses.dataclass(frozen=True) +class Resource: + """A resource that can be collected by a puppet. + + Attributes: + index: the index of the resource in the INVENTORY vector. + collect_goal: the goal that directs the puppet to collect the resource. + interact_goal: the goal that directs the puppet to interact with another + player while playing the resource. + """ + index: int + collect_goal: puppeteer.PuppetGoal + interact_goal: puppeteer.PuppetGoal + + def __eq__(self, obj): + if not isinstance(obj, Resource): + return NotImplemented + else: + return self is obj + + def __hash__(self): + return hash(id(self)) + + +def collect_or_interact_puppet_timestep( + timestep: dm_env.TimeStep, + target: Resource, + margin: int, +) -> dm_env.TimeStep: + """Returns a timestep for a *_in_the_matrix puppet. + + Args: + timestep: the timestep without any goal added. + target: the resource for the collector to target. + margin: the threshold at which the puppet switches from collecting to + interacting. + + Returns: + A timestep with a goal added for the puppet. If the puppet has already + collected enough of the targeted resource, will add the resource's + interact_goal. Otherwise will add the resource's collect_goal. + """ + inventory = get_inventory(timestep) + if has_collected_sufficient(inventory, target.index, margin): + goal = target.interact_goal + else: + goal = target.collect_goal + return puppeteer.puppet_timestep(timestep, goal) + + +class Specialist(puppeteer.Puppeteer[tuple[()]]): + """Puppeteer that targets a single resource.""" + + def __init__(self, *, target: Resource, margin: int) -> None: + """Initializes the puppeteer. + + Args: + target: the resource to target. + margin: the margin at which the specialist will switch from collecting to + interacting. + """ + self._target = target + if margin > 0: + self._margin = margin + else: + raise ValueError("Margin must be positive.") + + def initial_state(self) -> tuple[()]: + """See base class.""" + return () + + def step(self, timestep: dm_env.TimeStep, + prev_state: tuple[()]) -> tuple[dm_env.TimeStep, tuple[()]]: + """See base class.""" + timestep = collect_or_interact_puppet_timestep( + timestep, self._target, self._margin) + return timestep, prev_state + + +class AlternatingSpecialist(puppeteer.Puppeteer[int]): + """Puppeteer that cycles targeted resource on a fixed schedule.""" + + def __init__(self, + *, + targets: Sequence[Resource], + interactions_per_target: int, + margin: int) -> None: + """Initializes the puppeteer. + + Args: + targets: circular sequence of resources to target. Targets correspond to + pure strategies in the underlying matrix game. + interactions_per_target: how many interactions to select each target + before switching to the next one in the `targets` sequence. + margin: Try to collect `margin` more of the target resource than the other + resources before interacting. + """ + if targets: + self._targets = tuple(targets) + else: + raise ValueError("targets must not be empty") + + if interactions_per_target > 0: + self._interactions_per_target = interactions_per_target + else: + raise ValueError("interactions_per_target must be positive.") + + if margin > 0: + self._margin = margin + else: + raise ValueError("margin must be positive.") + + def initial_state(self) -> int: + """See base class.""" + return 0 + + def step(self, timestep: dm_env.TimeStep, + prev_state: int) -> tuple[dm_env.TimeStep, int]: + """See base class.""" + if timestep.first(): + prev_state = self.initial_state() + + if has_interaction(timestep): + total_interactions = prev_state + 1 + else: + total_interactions = prev_state + + target_index = (total_interactions // self._interactions_per_target) % len( + self._targets) + target = self._targets[target_index] + + timestep = collect_or_interact_puppet_timestep( + timestep, target, self._margin) + + return timestep, total_interactions + + +class ScheduledFlip(puppeteer.Puppeteer[int]): + """Puppeteer that targets one resource then switches to another.""" + + def __init__( + self, + *, + threshold: int, + initial_target: Resource, + final_target: Resource, + initial_margin: int, + final_margin: int, + ) -> None: + """Initializes the puppeteer. + + Args: + threshold: Switch targeted resource once this many interactions have + occurred. + initial_target: The initial resource to target. + final_target: The resource to target after the switch. + initial_margin: How much more of the target resource to collect before + interacting. + final_margin: The margin after the flip. + """ + self._initial_target = initial_target + self._final_target = final_target + + if threshold > 0: + self._threshold = threshold + else: + raise ValueError("threshold must be positive.") + + if initial_margin > 0: + self._initial_margin = initial_margin + else: + raise ValueError("initial_margin must be positive.") + + if final_margin > 0: + self._final_margin = final_margin + else: + raise ValueError("final_margin must be positive.") + + def initial_state(self) -> int: + """See base class.""" + return 0 + + def step(self, timestep: dm_env.TimeStep, + prev_state: int) -> tuple[dm_env.TimeStep, int]: + """See base class.""" + if timestep.first(): + prev_state = self.initial_state() + + if has_interaction(timestep): + total_interactions = prev_state + 1 + else: + total_interactions = prev_state + + if total_interactions < self._threshold: + timestep = collect_or_interact_puppet_timestep( + timestep, self._initial_target, self._initial_margin) + else: + timestep = collect_or_interact_puppet_timestep( + timestep, self._final_target, self._final_margin) + + return timestep, total_interactions + + +class GrimTrigger(puppeteer.Puppeteer[int]): + """Puppeteer for a grim trigger. + + This bot will always try to play cooperate until other players have defected + against it more than `threshold` times. After enduring `threshold` defections, + it switches to a triggered mode where it always plays defect. It never leaves + this mode, i.e. it is grim. It defects in all future interactions, not only + those interactions with the players who originally defected on it. + """ + + def __init__( + self, + *, + threshold: int, + cooperate_resource: Resource, + defect_resource: Resource, + margin: int, + ) -> None: """Initializes the puppeteer. Args: - threshold: number of defections after which it will switch behavior. + threshold: How many defections cause this agent to switch to its triggered + mode. Once triggered it will try to defect in all future interactions. + cooperate_resource: the cooperation resource. + defect_resource: the defection resource. + margin: Try to collect `margin` more of the target resource than the other + resource before interacting. """ - self._threshold = threshold - self._cooperate_resource_index = 0 - self._defect_resource_index = 1 + if threshold > 0: + self._threshold = threshold + else: + raise ValueError("threshold must be positive") + + self._cooperate_resource = cooperate_resource + self._defect_resource = defect_resource + + if margin > 0: + self._margin = margin + else: + raise ValueError("margin must be positive") def initial_state(self) -> int: """See base class.""" - partner_defections = 0 - return partner_defections - - def _get_focal_and_partner_inventory(self, timestep: dm_env.TimeStep): - """Returns the focal and partner inventories from the latest interaction.""" - interaction_inventories = timestep.observation['INTERACTION_INVENTORIES'] - focal_inventory = interaction_inventories[0] - partner_inventory = interaction_inventories[1] - return focal_inventory, partner_inventory - - def _is_defection(self, inventory: np.ndarray) -> bool: - """Returns True if `inventory` constitutes defection.""" - num_cooperate_resources = inventory[self._cooperate_resource_index] - num_defect_resources = inventory[self._defect_resource_index] - return num_defect_resources > num_cooperate_resources + return 0 + + def step(self, timestep: dm_env.TimeStep, + prev_state: int) -> tuple[dm_env.TimeStep, int]: + """See base class.""" + if timestep.first(): + prev_state = self.initial_state() + + partner_resource = partner_max_resource(timestep) + partner_defected = partner_resource == self._defect_resource.index + if partner_defected: + partner_defections = prev_state + 1 + else: + partner_defections = prev_state + + if partner_defections < self._threshold: + timestep = collect_or_interact_puppet_timestep( + timestep, self._cooperate_resource, self._margin) + else: + timestep = collect_or_interact_puppet_timestep( + timestep, self._defect_resource, self._margin) + return timestep, partner_defections + + +class TitForTat(puppeteer.Puppeteer[bool]): + """Puppeteer for a tit-for-tat bot. + + This bot will always try to cooperate if its partner cooperated in the last + round and defect if its partner defected in the last round. It cooperates + on the first round. + + Important note: this puppeteer function assumes there is only one other player + in the game. So it only makes sense for two player substrates like those we + called *_in_the_matrix__repeated. + """ + + def __init__( + self, + *, + cooperate_resource: Resource, + defect_resource: Resource, + margin: int, + tremble_probability: float, + ) -> None: + """Initializes the puppeteer. + + Args: + cooperate_resource: the cooperation resource. + defect_resource: the defection resource. + margin: Try to collect `margin` more of the target resource than the other + resource before interacting. + tremble_probability: When deciding to cooperate/defect, switch to + defect/cooperate with this probability. + """ + self._cooperate_resource = cooperate_resource + self._defect_resource = defect_resource + + if margin > 0: + self._margin = margin + else: + raise ValueError("margin must be positive") + + if 0 <= tremble_probability <= 1: + self._tremble_probability = tremble_probability + else: + raise ValueError("tremble_probability must be a probability.") + + def initial_state(self) -> bool: + """See base class.""" + is_cooperative = True if not tremble(self._tremble_probability) else False + return is_cooperative def step(self, timestep: dm_env.TimeStep, - prev_state: int) -> Tuple[dm_env.TimeStep, int]: + prev_state: bool) -> tuple[dm_env.TimeStep, bool]: + """See base class.""" + if timestep.first(): + prev_state = self.initial_state() + + partner_resource = partner_max_resource(timestep) + partner_defected = partner_resource == self._defect_resource.index + partner_cooperated = partner_resource == self._cooperate_resource.index + + if partner_cooperated: + is_cooperative = True if not tremble(self._tremble_probability) else False + elif partner_defected: + is_cooperative = False if not tremble(self._tremble_probability) else True + else: + is_cooperative = prev_state + + if is_cooperative: + timestep = collect_or_interact_puppet_timestep( + timestep, self._cooperate_resource, self._margin) + else: + timestep = collect_or_interact_puppet_timestep( + timestep, self._defect_resource, self._margin) + return timestep, is_cooperative + + +@dataclasses.dataclass(frozen=True) +class CorrigableState: + """State of Corrigable puppeteer. + + Attributes: + partner_defections: the number of times the partner has defected. + is_cooperative: whether the puppeteer is currently cooperating (as opposed + to defecting). + """ + partner_defections: int + is_cooperative: bool + + +class Corrigible(puppeteer.Puppeteer[CorrigableState]): + """Puppeteer that defects until you punish it, then switches to tit-for-tat. + + Important note: this puppeteer function assumes there is only one other player + in the game. So it only makes sense for two player substrates like those we + called *_in_the_matrix__repeated. + """ + + def __init__( + self, + threshold: int, + cooperate_resource: Resource, + defect_resource: Resource, + margin: int, + tremble_probability: float, + ) -> None: + """Initializes the puppeteer. + + Args: + threshold: How many times this bot must be punished for it to change its + behavior from 'always defect' to 'tit-for-tat'. + cooperate_resource: the cooperation resource. + defect_resource: the defection resource. + margin: Try to collect `margin` more of the target resource than the other + resource before interacting. + tremble_probability: Once playing tit-for-tat, when deciding to + cooperate/defect, switch to defect/cooperate with this probability. + """ + if threshold > 0: + self._threshold = threshold + else: + raise ValueError("threshold must be positive.") + + self._cooperate_resource = cooperate_resource + self._defect_resource = defect_resource + + if margin > 0: + self._margin = margin + else: + raise ValueError("margin must be positive") + + if 0 <= tremble_probability <= 1: + self._tremble_probability = tremble_probability + else: + raise ValueError("tremble_probability must be a probability.") + + def initial_state(self) -> CorrigableState: + """See base class.""" + return CorrigableState(partner_defections=0, is_cooperative=False) + + def step( + self, + timestep: dm_env.TimeStep, + prev_state: CorrigableState, + ) -> tuple[dm_env.TimeStep, CorrigableState]: """See base class.""" if timestep.first(): prev_state = self.initial_state() - partner_defections = prev_state - - # Accumulate partner defections over the episode. - _, partner_inventory = self._get_focal_and_partner_inventory(timestep) - if self._is_defection(partner_inventory): - partner_defections += 1 - - inventory = timestep.observation['INVENTORY'] - num_cooperate_resources = inventory[self._cooperate_resource_index] - num_defect_resources = inventory[self._defect_resource_index] - - # Ready to interact if collected more of either resource than the other. - ready_to_interact = False - if np.abs(num_defect_resources - num_cooperate_resources) > 0: - ready_to_interact = True - - if not ready_to_interact: - # Collect either C or D when not ready to interact. - if partner_defections < self._threshold: - # When defection is below threshold, then collect cooperate resources. - goal = _TWO_RESOURCE_GOALS['COLLECT_COOPERATE'] - else: - # When defection exceeds threshold, then collect D resources. - goal = _TWO_RESOURCE_GOALS['COLLECT_DEFECT'] - else: - # Interact when ready. - goal = _TWO_RESOURCE_GOALS['INTERACT'] - timestep = puppeteer.puppet_timestep(timestep, goal) - next_state = partner_defections + + partner_resource = partner_max_resource(timestep) + partner_defected = partner_resource == self._defect_resource.index + partner_cooperated = partner_resource == self._cooperate_resource.index + + if partner_defected: + partner_defections = prev_state.partner_defections + 1 + switching_now = partner_defections == self._threshold + else: + partner_defections = prev_state.partner_defections + switching_now = False + + insufficiently_punished = partner_defections < self._threshold + if insufficiently_punished: + is_cooperative = False + elif switching_now or partner_cooperated: + is_cooperative = True if not tremble(self._tremble_probability) else False + elif partner_defected: + is_cooperative = False if not tremble(self._tremble_probability) else True + else: + is_cooperative = prev_state.is_cooperative + + if is_cooperative: + timestep = collect_or_interact_puppet_timestep( + timestep, self._cooperate_resource, self._margin) + else: + timestep = collect_or_interact_puppet_timestep( + timestep, self._defect_resource, self._margin) + next_state = CorrigableState( + is_cooperative=is_cooperative, partner_defections=partner_defections) return timestep, next_state + + +class RespondToPrevious(puppeteer.Puppeteer[Resource]): + """Puppeteer for responding to opponents previous move. + + At the start of an episode, RespondToPrevious targets a random resource up + until the first interaction occurs. Thereafter RespondToPrevious selects the + resource to target based on the maximum resource held by the coplayer at the + last interaction. If the coplayer held no single maximum resource, + RespondToPrevious will continue to target the resource it was previously + targeting. + """ + + def __init__( + self, + responses: Mapping[Resource, Resource], + margin: int, + ) -> None: + """Initializes the puppeteer. + + Args: + responses: Mapping from the maximum resource in the partner inventory to + the resource to target in response. + margin: Try to collect `margin` more of the target resource than the other + resource before interacting. + """ + self._responses = { + resource.index: response for resource, response in responses.items() + } + if margin > 0: + self._margin = margin + else: + raise ValueError("margin must be positive.") + + def initial_state(self) -> Resource: + """See base class.""" + return random.choice(list(self._responses.values())) + + def step( + self, + timestep: dm_env.TimeStep, + prev_state: Resource, + ) -> tuple[dm_env.TimeStep, Resource]: + """See base class.""" + if timestep.first(): + prev_state = self.initial_state() + partner_resource = partner_max_resource(timestep) + response = self._responses.get(partner_resource, prev_state) + timestep = collect_or_interact_puppet_timestep( + timestep, response, self._margin) + return timestep, response diff --git a/meltingpot/python/utils/puppeteers_v2/in_the_matrix_test.py b/meltingpot/python/utils/puppeteers/in_the_matrix_test.py similarity index 99% rename from meltingpot/python/utils/puppeteers_v2/in_the_matrix_test.py rename to meltingpot/python/utils/puppeteers/in_the_matrix_test.py index 0365d8ee..c4dd4065 100644 --- a/meltingpot/python/utils/puppeteers_v2/in_the_matrix_test.py +++ b/meltingpot/python/utils/puppeteers/in_the_matrix_test.py @@ -22,8 +22,8 @@ import immutabledict import numpy as np -from meltingpot.python.utils.puppeteers_v2 import in_the_matrix -from meltingpot.python.utils.puppeteers_v2 import testutils +from meltingpot.python.utils.puppeteers import in_the_matrix +from meltingpot.python.utils.puppeteers import testutils _RESOURCE_0 = in_the_matrix.Resource( index=0, diff --git a/meltingpot/python/utils/puppeteers_v2/running_with_scissors_in_the_matrix.py b/meltingpot/python/utils/puppeteers/running_with_scissors_in_the_matrix.py similarity index 96% rename from meltingpot/python/utils/puppeteers_v2/running_with_scissors_in_the_matrix.py rename to meltingpot/python/utils/puppeteers/running_with_scissors_in_the_matrix.py index c0ca4770..840cddb4 100644 --- a/meltingpot/python/utils/puppeteers_v2/running_with_scissors_in_the_matrix.py +++ b/meltingpot/python/utils/puppeteers/running_with_scissors_in_the_matrix.py @@ -13,7 +13,7 @@ # limitations under the License. """Puppeteers for running_with_scissors_in_the_matrix.""" -from meltingpot.python.utils.puppeteers_v2 import in_the_matrix +from meltingpot.python.utils.puppeteers import in_the_matrix class CounterPrevious(in_the_matrix.RespondToPrevious): diff --git a/meltingpot/python/utils/puppeteers_v2/running_with_scissors_in_the_matrix_test.py b/meltingpot/python/utils/puppeteers/running_with_scissors_in_the_matrix_test.py similarity index 93% rename from meltingpot/python/utils/puppeteers_v2/running_with_scissors_in_the_matrix_test.py rename to meltingpot/python/utils/puppeteers/running_with_scissors_in_the_matrix_test.py index 5082313a..c06ed967 100644 --- a/meltingpot/python/utils/puppeteers_v2/running_with_scissors_in_the_matrix_test.py +++ b/meltingpot/python/utils/puppeteers/running_with_scissors_in_the_matrix_test.py @@ -20,9 +20,9 @@ from absl.testing import parameterized import numpy as np -from meltingpot.python.utils.puppeteers_v2 import in_the_matrix -from meltingpot.python.utils.puppeteers_v2 import running_with_scissors_in_the_matrix -from meltingpot.python.utils.puppeteers_v2 import testutils +from meltingpot.python.utils.puppeteers import in_the_matrix +from meltingpot.python.utils.puppeteers import running_with_scissors_in_the_matrix +from meltingpot.python.utils.puppeteers import testutils _ROCK = in_the_matrix.Resource( index=2, diff --git a/meltingpot/python/utils/puppeteers_v2/testutils.py b/meltingpot/python/utils/puppeteers/testutils.py similarity index 100% rename from meltingpot/python/utils/puppeteers_v2/testutils.py rename to meltingpot/python/utils/puppeteers/testutils.py diff --git a/meltingpot/python/utils/puppeteers_v2/clean_up.py b/meltingpot/python/utils/puppeteers_v2/clean_up.py deleted file mode 100644 index c6fa2f06..00000000 --- a/meltingpot/python/utils/puppeteers_v2/clean_up.py +++ /dev/null @@ -1,141 +0,0 @@ -# Copyright 2022 DeepMind Technologies Limited. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Puppeteers for clean_up.""" - -import dataclasses - -import dm_env - -from meltingpot.python.utils.puppeteers import puppeteer - - -@dataclasses.dataclass(frozen=True) -class ConditionalCleanerState: - """Current state of the ConditionalCleaner. - - Attributes: - step_count: number of timesteps previously seen in this episode. - clean_until: step_count after which to stop cleaning. - recent_cleaning: number of others cleaning on previous timesteps (ordered - from oldest to most recent). - """ - step_count: int - clean_until: int - recent_cleaning: tuple[int, ...] - - -class ConditionalCleaner(puppeteer.Puppeteer[ConditionalCleanerState]): - """Puppeteer for a reciprocating agent. - - This puppeteer's behavior depends on the behavior of others. In particular, it - tracks the total amount of others' "cleaning", and integrates this signal - using a rolling window. - - Initially, the puppet will be in a "nice" mode where it will direct the - puppet to clean the river for a fixed period. Once this period is over, the - puppeteer will fall into a "eating" mode where it will direct the puppet to - only eat apples. However, once the total level of others' cleaning reaches a - threshold, the puppeteer will temporarily switch to a "cleaning" mode. Once - the total level of others' cleaning drops back below threshold, the puppeteer - will clean for fixed number of steps before falling back into the "eating" - mode. - """ - - def __init__(self, - *, - clean_goal: puppeteer.PuppetGoal, - eat_goal: puppeteer.PuppetGoal, - coplayer_cleaning_signal: str, - recency_window: int, - threshold: int, - reciprocation_period: int, - niceness_period: int) -> None: - """Initializes the puppeteer. - - Args: - clean_goal: goal to emit to puppet when "cleaning". - eat_goal: goal to emit to puppet when "eating". - coplayer_cleaning_signal: key in observations that provides the - privileged observation of number of others cleaning in the previous - timestep. - recency_window: number of steps over which to remember others' behavior. - threshold: if the total number of (nonunique) cleaners over the - remembered period reaches this threshold, the puppeteer will direct the - puppet to clean. - reciprocation_period: the number of steps to clean for once others' - cleaning has been forgotten and fallen back below threshold. - niceness_period: the number of steps to unconditionally clean for at - the start of the episode. - """ - self._clean_goal = clean_goal - self._eat_goal = eat_goal - self._coplayer_cleaning_signal = coplayer_cleaning_signal - - if threshold > 0: - self._threshold = threshold - else: - raise ValueError('threshold must be positive') - - if recency_window > 0: - self._recency_window = recency_window - else: - raise ValueError('recency_window must be positive') - - if reciprocation_period > 0: - self._reciprocation_period = reciprocation_period - else: - raise ValueError('reciprocation_period must be positive') - - if niceness_period >= 0: - self._niceness_period = niceness_period - else: - raise ValueError('niceness_period must be nonnegative') - - def initial_state(self) -> ConditionalCleanerState: - """See base class.""" - return ConditionalCleanerState( - step_count=0, clean_until=self._niceness_period, recent_cleaning=()) - - def step( - self, timestep: dm_env.TimeStep, prev_state: ConditionalCleanerState - ) -> tuple[dm_env.TimeStep, ConditionalCleanerState]: - """See base class.""" - if timestep.first(): - prev_state = self.initial_state() - step_count = prev_state.step_count - clean_until = prev_state.clean_until - recent_cleaning = prev_state.recent_cleaning - - coplayers_cleaning = int( - timestep.observation[self._coplayer_cleaning_signal]) - recent_cleaning += (coplayers_cleaning,) - recent_cleaning = recent_cleaning[-self._recency_window:] - - smooth_cleaning = sum(recent_cleaning) - if smooth_cleaning >= self._threshold: - clean_until = max(clean_until, step_count + self._reciprocation_period) - # Do not clear the recent_cleaning history after triggering. - # TODO(b/237058204): clear history in future versions. - - if step_count < clean_until: - goal = self._clean_goal - else: - goal = self._eat_goal - timestep = puppeteer.puppet_timestep(timestep, goal) - - next_state = ConditionalCleanerState( - step_count=step_count + 1, - clean_until=clean_until, - recent_cleaning=recent_cleaning) - return timestep, next_state diff --git a/meltingpot/python/utils/puppeteers_v2/in_the_matrix.py b/meltingpot/python/utils/puppeteers_v2/in_the_matrix.py deleted file mode 100644 index 898da8a6..00000000 --- a/meltingpot/python/utils/puppeteers_v2/in_the_matrix.py +++ /dev/null @@ -1,590 +0,0 @@ -# Copyright 2022 DeepMind Technologies Limited. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Puppeteers for *_in_the_matrix.""" - -from collections.abc import Mapping, Sequence -import dataclasses -import random -from typing import Optional, TypeVar - -import dm_env -import numpy as np -import tree - -from meltingpot.python.utils.puppeteers import puppeteer - -State = TypeVar("State") -Observation = Mapping[str, tree.Structure[np.ndarray]] - - -def get_inventory(timestep: dm_env.TimeStep) -> np.ndarray: - """Returns player's current inventory.""" - return timestep.observation["INVENTORY"] - - -def get_partner_interaction_inventory( - timestep: dm_env.TimeStep) -> Optional[np.ndarray]: - """Returns the partner inventory from previous interaction.""" - _, partner_inventory = timestep.observation["INTERACTION_INVENTORIES"] - if np.all(partner_inventory < 0): - return None # No interaction occurred. - else: - return partner_inventory - - -def has_interaction(timestep: dm_env.TimeStep) -> bool: - """Returns True if the timestep contains an interaction.""" - return get_partner_interaction_inventory(timestep) is not None - - -def max_resource_and_margin(inventory: np.ndarray) -> tuple[int, int]: - """Returns the index of the maximum resource and the margin of its lead.""" - sorted_resources = np.argsort(inventory) - maximum_resource = sorted_resources[-1] - margin = ( - int(inventory[sorted_resources[-1]]) - - int(inventory[sorted_resources[-2]])) - return maximum_resource, margin - - -def has_collected_sufficient( - inventory: np.ndarray, - resource: int, - margin: int, -) -> bool: - """Returns True if a sufficient amount of the resource has been collected. - - Args: - inventory: the inventory of collected resources. - resource: the index of the resource being collected. - margin: the required margin for "sufficiency". - """ - max_resource, current_margin = max_resource_and_margin(inventory) - return max_resource == resource and current_margin >= margin - - -def partner_max_resource(timestep: dm_env.TimeStep) -> Optional[int]: - """Returns partner's maximum resource at previous interaction.""" - partner_inventory = get_partner_interaction_inventory(timestep) - if partner_inventory is None: - return None # No interaction occurred. - resource, margin = max_resource_and_margin(partner_inventory) - if margin == 0: - return None # Intent is unclear (no unique maximum). - else: - return resource - - -def tremble(tremble_probability: float): - """Returns True if the hand trembles.""" - return random.random() < tremble_probability - - -@dataclasses.dataclass(frozen=True) -class Resource: - """A resource that can be collected by a puppet. - - Attributes: - index: the index of the resource in the INVENTORY vector. - collect_goal: the goal that directs the puppet to collect the resource. - interact_goal: the goal that directs the puppet to interact with another - player while playing the resource. - """ - index: int - collect_goal: puppeteer.PuppetGoal - interact_goal: puppeteer.PuppetGoal - - def __eq__(self, obj): - if not isinstance(obj, Resource): - return NotImplemented - else: - return self is obj - - def __hash__(self): - return hash(id(self)) - - -def collect_or_interact_puppet_timestep( - timestep: dm_env.TimeStep, - target: Resource, - margin: int, -) -> dm_env.TimeStep: - """Returns a timestep for a *_in_the_matrix puppet. - - Args: - timestep: the timestep without any goal added. - target: the resource for the collector to target. - margin: the threshold at which the puppet switches from collecting to - interacting. - - Returns: - A timestep with a goal added for the puppet. If the puppet has already - collected enough of the targeted resource, will add the resource's - interact_goal. Otherwise will add the resource's collect_goal. - """ - inventory = get_inventory(timestep) - if has_collected_sufficient(inventory, target.index, margin): - goal = target.interact_goal - else: - goal = target.collect_goal - return puppeteer.puppet_timestep(timestep, goal) - - -class Specialist(puppeteer.Puppeteer[tuple[()]]): - """Puppeteer that targets a single resource.""" - - def __init__(self, *, target: Resource, margin: int) -> None: - """Initializes the puppeteer. - - Args: - target: the resource to target. - margin: the margin at which the specialist will switch from collecting to - interacting. - """ - self._target = target - if margin > 0: - self._margin = margin - else: - raise ValueError("Margin must be positive.") - - def initial_state(self) -> tuple[()]: - """See base class.""" - return () - - def step(self, timestep: dm_env.TimeStep, - prev_state: tuple[()]) -> tuple[dm_env.TimeStep, tuple[()]]: - """See base class.""" - timestep = collect_or_interact_puppet_timestep( - timestep, self._target, self._margin) - return timestep, prev_state - - -class AlternatingSpecialist(puppeteer.Puppeteer[int]): - """Puppeteer that cycles targeted resource on a fixed schedule.""" - - def __init__(self, - *, - targets: Sequence[Resource], - interactions_per_target: int, - margin: int) -> None: - """Initializes the puppeteer. - - Args: - targets: circular sequence of resources to target. Targets correspond to - pure strategies in the underlying matrix game. - interactions_per_target: how many interactions to select each target - before switching to the next one in the `targets` sequence. - margin: Try to collect `margin` more of the target resource than the other - resources before interacting. - """ - if targets: - self._targets = tuple(targets) - else: - raise ValueError("targets must not be empty") - - if interactions_per_target > 0: - self._interactions_per_target = interactions_per_target - else: - raise ValueError("interactions_per_target must be positive.") - - if margin > 0: - self._margin = margin - else: - raise ValueError("margin must be positive.") - - def initial_state(self) -> int: - """See base class.""" - return 0 - - def step(self, timestep: dm_env.TimeStep, - prev_state: int) -> tuple[dm_env.TimeStep, int]: - """See base class.""" - if timestep.first(): - prev_state = self.initial_state() - - if has_interaction(timestep): - total_interactions = prev_state + 1 - else: - total_interactions = prev_state - - target_index = (total_interactions // self._interactions_per_target) % len( - self._targets) - target = self._targets[target_index] - - timestep = collect_or_interact_puppet_timestep( - timestep, target, self._margin) - - return timestep, total_interactions - - -class ScheduledFlip(puppeteer.Puppeteer[int]): - """Puppeteer that targets one resource then switches to another.""" - - def __init__( - self, - *, - threshold: int, - initial_target: Resource, - final_target: Resource, - initial_margin: int, - final_margin: int, - ) -> None: - """Initializes the puppeteer. - - Args: - threshold: Switch targeted resource once this many interactions have - occurred. - initial_target: The initial resource to target. - final_target: The resource to target after the switch. - initial_margin: How much more of the target resource to collect before - interacting. - final_margin: The margin after the flip. - """ - self._initial_target = initial_target - self._final_target = final_target - - if threshold > 0: - self._threshold = threshold - else: - raise ValueError("threshold must be positive.") - - if initial_margin > 0: - self._initial_margin = initial_margin - else: - raise ValueError("initial_margin must be positive.") - - if final_margin > 0: - self._final_margin = final_margin - else: - raise ValueError("final_margin must be positive.") - - def initial_state(self) -> int: - """See base class.""" - return 0 - - def step(self, timestep: dm_env.TimeStep, - prev_state: int) -> tuple[dm_env.TimeStep, int]: - """See base class.""" - if timestep.first(): - prev_state = self.initial_state() - - if has_interaction(timestep): - total_interactions = prev_state + 1 - else: - total_interactions = prev_state - - if total_interactions < self._threshold: - timestep = collect_or_interact_puppet_timestep( - timestep, self._initial_target, self._initial_margin) - else: - timestep = collect_or_interact_puppet_timestep( - timestep, self._final_target, self._final_margin) - - return timestep, total_interactions - - -class GrimTrigger(puppeteer.Puppeteer[int]): - """Puppeteer for a grim trigger. - - This bot will always try to play cooperate until other players have defected - against it more than `threshold` times. After enduring `threshold` defections, - it switches to a triggered mode where it always plays defect. It never leaves - this mode, i.e. it is grim. It defects in all future interactions, not only - those interactions with the players who originally defected on it. - """ - - def __init__( - self, - *, - threshold: int, - cooperate_resource: Resource, - defect_resource: Resource, - margin: int, - ) -> None: - """Initializes the puppeteer. - - Args: - threshold: How many defections cause this agent to switch to its triggered - mode. Once triggered it will try to defect in all future interactions. - cooperate_resource: the cooperation resource. - defect_resource: the defection resource. - margin: Try to collect `margin` more of the target resource than the other - resource before interacting. - """ - if threshold > 0: - self._threshold = threshold - else: - raise ValueError("threshold must be positive") - - self._cooperate_resource = cooperate_resource - self._defect_resource = defect_resource - - if margin > 0: - self._margin = margin - else: - raise ValueError("margin must be positive") - - def initial_state(self) -> int: - """See base class.""" - return 0 - - def step(self, timestep: dm_env.TimeStep, - prev_state: int) -> tuple[dm_env.TimeStep, int]: - """See base class.""" - if timestep.first(): - prev_state = self.initial_state() - - partner_resource = partner_max_resource(timestep) - partner_defected = partner_resource == self._defect_resource.index - if partner_defected: - partner_defections = prev_state + 1 - else: - partner_defections = prev_state - - if partner_defections < self._threshold: - timestep = collect_or_interact_puppet_timestep( - timestep, self._cooperate_resource, self._margin) - else: - timestep = collect_or_interact_puppet_timestep( - timestep, self._defect_resource, self._margin) - return timestep, partner_defections - - -class TitForTat(puppeteer.Puppeteer[bool]): - """Puppeteer for a tit-for-tat bot. - - This bot will always try to cooperate if its partner cooperated in the last - round and defect if its partner defected in the last round. It cooperates - on the first round. - - Important note: this puppeteer function assumes there is only one other player - in the game. So it only makes sense for two player substrates like those we - called *_in_the_matrix__repeated. - """ - - def __init__( - self, - *, - cooperate_resource: Resource, - defect_resource: Resource, - margin: int, - tremble_probability: float, - ) -> None: - """Initializes the puppeteer. - - Args: - cooperate_resource: the cooperation resource. - defect_resource: the defection resource. - margin: Try to collect `margin` more of the target resource than the other - resource before interacting. - tremble_probability: When deciding to cooperate/defect, switch to - defect/cooperate with this probability. - """ - self._cooperate_resource = cooperate_resource - self._defect_resource = defect_resource - - if margin > 0: - self._margin = margin - else: - raise ValueError("margin must be positive") - - if 0 <= tremble_probability <= 1: - self._tremble_probability = tremble_probability - else: - raise ValueError("tremble_probability must be a probability.") - - def initial_state(self) -> bool: - """See base class.""" - is_cooperative = True if not tremble(self._tremble_probability) else False - return is_cooperative - - def step(self, timestep: dm_env.TimeStep, - prev_state: bool) -> tuple[dm_env.TimeStep, bool]: - """See base class.""" - if timestep.first(): - prev_state = self.initial_state() - - partner_resource = partner_max_resource(timestep) - partner_defected = partner_resource == self._defect_resource.index - partner_cooperated = partner_resource == self._cooperate_resource.index - - if partner_cooperated: - is_cooperative = True if not tremble(self._tremble_probability) else False - elif partner_defected: - is_cooperative = False if not tremble(self._tremble_probability) else True - else: - is_cooperative = prev_state - - if is_cooperative: - timestep = collect_or_interact_puppet_timestep( - timestep, self._cooperate_resource, self._margin) - else: - timestep = collect_or_interact_puppet_timestep( - timestep, self._defect_resource, self._margin) - return timestep, is_cooperative - - -@dataclasses.dataclass(frozen=True) -class CorrigableState: - """State of Corrigable puppeteer. - - Attributes: - partner_defections: the number of times the partner has defected. - is_cooperative: whether the puppeteer is currently cooperating (as opposed - to defecting). - """ - partner_defections: int - is_cooperative: bool - - -class Corrigible(puppeteer.Puppeteer[CorrigableState]): - """Puppeteer that defects until you punish it, then switches to tit-for-tat. - - Important note: this puppeteer function assumes there is only one other player - in the game. So it only makes sense for two player substrates like those we - called *_in_the_matrix__repeated. - """ - - def __init__( - self, - threshold: int, - cooperate_resource: Resource, - defect_resource: Resource, - margin: int, - tremble_probability: float, - ) -> None: - """Initializes the puppeteer. - - Args: - threshold: How many times this bot must be punished for it to change its - behavior from 'always defect' to 'tit-for-tat'. - cooperate_resource: the cooperation resource. - defect_resource: the defection resource. - margin: Try to collect `margin` more of the target resource than the other - resource before interacting. - tremble_probability: Once playing tit-for-tat, when deciding to - cooperate/defect, switch to defect/cooperate with this probability. - """ - if threshold > 0: - self._threshold = threshold - else: - raise ValueError("threshold must be positive.") - - self._cooperate_resource = cooperate_resource - self._defect_resource = defect_resource - - if margin > 0: - self._margin = margin - else: - raise ValueError("margin must be positive") - - if 0 <= tremble_probability <= 1: - self._tremble_probability = tremble_probability - else: - raise ValueError("tremble_probability must be a probability.") - - def initial_state(self) -> CorrigableState: - """See base class.""" - return CorrigableState(partner_defections=0, is_cooperative=False) - - def step( - self, - timestep: dm_env.TimeStep, - prev_state: CorrigableState, - ) -> tuple[dm_env.TimeStep, CorrigableState]: - """See base class.""" - if timestep.first(): - prev_state = self.initial_state() - - partner_resource = partner_max_resource(timestep) - partner_defected = partner_resource == self._defect_resource.index - partner_cooperated = partner_resource == self._cooperate_resource.index - - if partner_defected: - partner_defections = prev_state.partner_defections + 1 - switching_now = partner_defections == self._threshold - else: - partner_defections = prev_state.partner_defections - switching_now = False - - insufficiently_punished = partner_defections < self._threshold - if insufficiently_punished: - is_cooperative = False - elif switching_now or partner_cooperated: - is_cooperative = True if not tremble(self._tremble_probability) else False - elif partner_defected: - is_cooperative = False if not tremble(self._tremble_probability) else True - else: - is_cooperative = prev_state.is_cooperative - - if is_cooperative: - timestep = collect_or_interact_puppet_timestep( - timestep, self._cooperate_resource, self._margin) - else: - timestep = collect_or_interact_puppet_timestep( - timestep, self._defect_resource, self._margin) - next_state = CorrigableState( - is_cooperative=is_cooperative, partner_defections=partner_defections) - return timestep, next_state - - -class RespondToPrevious(puppeteer.Puppeteer[Resource]): - """Puppeteer for responding to opponents previous move. - - At the start of an episode, RespondToPrevious targets a random resource up - until the first interaction occurs. Thereafter RespondToPrevious selects the - resource to target based on the maximum resource held by the coplayer at the - last interaction. If the coplayer held no single maximum resource, - RespondToPrevious will continue to target the resource it was previously - targeting. - """ - - def __init__( - self, - responses: Mapping[Resource, Resource], - margin: int, - ) -> None: - """Initializes the puppeteer. - - Args: - responses: Mapping from the maximum resource in the partner inventory to - the resource to target in response. - margin: Try to collect `margin` more of the target resource than the other - resource before interacting. - """ - self._responses = { - resource.index: response for resource, response in responses.items() - } - if margin > 0: - self._margin = margin - else: - raise ValueError("margin must be positive.") - - def initial_state(self) -> Resource: - """See base class.""" - return random.choice(list(self._responses.values())) - - def step( - self, - timestep: dm_env.TimeStep, - prev_state: Resource, - ) -> tuple[dm_env.TimeStep, Resource]: - """See base class.""" - if timestep.first(): - prev_state = self.initial_state() - partner_resource = partner_max_resource(timestep) - response = self._responses.get(partner_resource, prev_state) - timestep = collect_or_interact_puppet_timestep( - timestep, response, self._margin) - return timestep, response diff --git a/meltingpot/python/utils/scenarios/substrate_transforms.py b/meltingpot/python/utils/scenarios/substrate_transforms.py deleted file mode 100644 index 9036e516..00000000 --- a/meltingpot/python/utils/scenarios/substrate_transforms.py +++ /dev/null @@ -1,86 +0,0 @@ -# Copyright 2020 DeepMind Technologies Limited. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Substrate transforms.""" - -from typing import TypeVar - -import dm_env -import immutabledict -import numpy as np - -from meltingpot.python.utils.scenarios.wrappers import agent_slot_wrapper -from meltingpot.python.utils.scenarios.wrappers import all_observations_wrapper -from meltingpot.python.utils.scenarios.wrappers import default_observation_wrapper -from meltingpot.python.utils.substrates import specs - -T = TypeVar('T') - - -def with_tf1_bot_required_observations(substrate: T) -> T: - """Transforms a substrate to include observations needed by original TF bots. - - We trained the original TF bots with these wrappers present and so we need to - add them back in so that they execute in the same context as they were - trained and validated. Newly trained bots should not need these wrappers. - - Args: - substrate: substrate to add observations to. - - Returns: - The substrate, with additional wrappers required by the tf1 bots. - """ - substrate_observations = set(substrate.observation_spec()[0]) - substrate = all_observations_wrapper.Wrapper( - substrate, observations_to_share=['POSITION'], share_actions=True) - substrate = agent_slot_wrapper.Wrapper(substrate) - if 'INVENTORY' not in substrate_observations: - substrate = default_observation_wrapper.Wrapper( - substrate, key='INVENTORY', default_value=np.zeros([1])) - return substrate - - -def tf1_bot_timestep_spec( - timestep_spec: dm_env.TimeStep, - action_spec: dm_env.specs.DiscreteArray, - num_players: int, -) -> dm_env.TimeStep: - """Transforms specs to include observations needed by original TF bots. - - Args: - timestep_spec: substrate timestep spec. - action_spec: substrate action spec. - num_players: the number of players. - - Returns: - The timestep spec, with additional observations required by the tf1 bots. - """ - global_observations = {} - if 'POSITION' in timestep_spec.observation: - position_spec = immutabledict.immutabledict( - POSITION=specs.int32(num_players, 2)) - global_observations['observations'] = position_spec - - observation_spec = dict(timestep_spec.observation) - observation_spec['global'] = immutabledict.immutabledict( - actions=dm_env.specs.BoundedArray( - shape=[num_players], - dtype=action_spec.dtype, - minimum=action_spec.minimum, - maximum=action_spec.maximum), - **global_observations) - observation_spec['agent_slot'] = specs.float32(num_players) - if 'INVENTORY' not in observation_spec: - observation_spec['INVENTORY'] = specs.inventory(1) - return timestep_spec._replace( - observation=immutabledict.immutabledict(observation_spec)) diff --git a/meltingpot/python/utils/substrates/builder_test.py b/meltingpot/python/utils/substrates/builder_test.py index eb62f73d..4eeb8c41 100644 --- a/meltingpot/python/utils/substrates/builder_test.py +++ b/meltingpot/python/utils/substrates/builder_test.py @@ -19,16 +19,21 @@ from absl.testing import parameterized import numpy as np -from meltingpot.python.configs.substrates import running_with_scissors_in_the_matrix +from meltingpot.python.configs.substrates import running_with_scissors_in_the_matrix__repeated as test_substrate from meltingpot.python.utils.substrates import builder -_TEST_SETTINGS = running_with_scissors_in_the_matrix.get_config().lab2d_settings +def _get_test_settings(): + config = test_substrate.get_config() + return test_substrate.build(config, config.default_player_roles) + + +_TEST_SETTINGS = _get_test_settings() def _get_lua_randomization_map(): """Replaces first row of walls with items randomized by Lua.""" - head, line, *tail = _TEST_SETTINGS.simulation.map.split('\n') + head, line, *tail = _TEST_SETTINGS['simulation']['map'].split('\n') # Replace line 1 (walls) with a row of 'a' (items randomized by Lua). new_map = '\n'.join([head, 'a' * len(line), *tail]) return new_map @@ -76,7 +81,7 @@ def test_no_seed_causes_nondeterminism(self): @parameterized.product(seed=[None, 42, 123, 1337, 12481632]) def test_episodes_are_randomized_in_lua(self, seed): lab2d_settings = copy.deepcopy(_TEST_SETTINGS) - lab2d_settings.simulation.map = _LUA_RANDOMIZATION_MAP + lab2d_settings['simulation']['map'] = _LUA_RANDOMIZATION_MAP env = self.enter_context(builder.builder(lab2d_settings, env_seed=seed)) obs = env.reset().observation['WORLD.RGB'][_LUA_RANDOMIZED_LINE] @@ -90,7 +95,7 @@ def test_episodes_are_randomized_in_lua(self, seed): def test_no_seed_causes_nondeterminism_for_lua(self): lab2d_settings = copy.deepcopy(_TEST_SETTINGS) - lab2d_settings.simulation.map = _LUA_RANDOMIZATION_MAP + lab2d_settings['simulation']['map'] = _LUA_RANDOMIZATION_MAP env1 = self.enter_context(builder.builder(lab2d_settings)) env2 = self.enter_context(builder.builder(lab2d_settings)) diff --git a/setup.py b/setup.py index 12caa0f3..8bfb2fa1 100644 --- a/setup.py +++ b/setup.py @@ -20,7 +20,7 @@ setuptools.setup( name='dm-meltingpot', - version='1.0.4', + version='2.0.0', license='Apache 2.0', license_files=['LICENSE'], url='https://github.com/deepmind/meltingpot',