Implement fixes in MARL agent

yura-hb · Feb 24, 2024 · a38b620 · a38b620
1 parent 6174dcf
commit a38b620
Show file tree

Hide file tree

Showing 18 changed files with 72 additions and 27 deletions.
diff --git a/diploma_thesis/agents/base/marl_agent.py b/diploma_thesis/agents/base/marl_agent.py
@@ -1,17 +1,19 @@
 import copy
 from typing import Dict
 
+import pandas as pd
+
 from agents.utils import TrainingPhase
 from agents.utils.rl import RLTrainer
 from utils import filter
-from .agent import *
+from .rl_agent import *
 from .model import NNModel
 
 
-class MARLAgent(Generic[Key], Agent[Key]):
+class MARLAgent(Generic[Key], RLAgent[Key]):
 
     def __init__(self, model: NNModel, state_encoder: StateEncoder, trainer: RLTrainer, is_model_distributed: bool):
-        super().__init__(model, state_encoder)
+        super().__init__(model, state_encoder, trainer)
 
         self.trainer: RLTrainer | Dict[Key, RLTrainer] = trainer
         self.is_model_distributed = is_model_distributed
@@ -34,6 +36,7 @@ def setup(self, shop_floor: ShopFloor):
             assert is_key_set_equal or is_evaluating_with_centralized_model, \
                 ("Multi-Agent model should be configured for the same shop floor architecture "
                  "or have centralized action network")
+
             return
 
         self.is_configured = True
@@ -61,21 +64,29 @@ def store(self, key: Key, record: Record):
         self.trainer[key].store(record)
 
     def loss_record(self):
-        result = [self.trainer[key].loss_record() for key in self.keys]
+        result = []
 
-        return result
+        for key in self.keys:
+            loss_record = self.trainer[key].loss_record()
+
+            for k, v in key.__dict__.items():
+                loss_record[k] = int(v)
+
+            result += [loss_record]
+
+        return pd.concat(result)
 
     def clear_memory(self):
         for key in self.keys:
             self.trainer[key].clear()
 
     def schedule(self, key: Key, parameters):
         state = self.encode_state(parameters)
-
-        result = self.__model_for_key__(key)(state, parameters)
+        model = self.__model_for_key__(key)
+        result = model(state, parameters)
 
         if not self.trainer[key].is_configured:
-            self.trainer[key].configure(self.model)
+            self.trainer[key].configure(model)
 
         return result
 

diff --git a/diploma_thesis/agents/utils/graph/graph_encoder.py b/diploma_thesis/agents/utils/graph/graph_encoder.py
@@ -0,0 +1,7 @@
+
+
+class GraphEncoder:
+
+    def __init__(self):
+        pass
+
diff --git a/diploma_thesis/agents/utils/memory/__init__.py b/diploma_thesis/agents/utils/memory/__init__.py
@@ -2,7 +2,7 @@
 from functools import partial
 
 from utils import from_cli
-from .memory import Record, Memory
+from .memory import Record, Memory, NotReadyException
 from .prioritized_replay_memory import PrioritizedReplayMemory
 from .replay_memory import ReplayMemory
 

diff --git a/diploma_thesis/agents/utils/memory/memory.py b/diploma_thesis/agents/utils/memory/memory.py
@@ -20,16 +20,23 @@ class Record:
     done: torch.BoolTensor
 
 
+class NotReadyException(BaseException):
+    pass
+
+
 class Memory(metaclass=ABCMeta):
 
     def __init__(self, configuration: Configuration):
         self.configuration = configuration
-        self.buffer = self.__make_buffer__()
+        self.buffer: TensorDictReplayBuffer = self.__make_buffer__()
 
     def store(self, record: Record):
         self.buffer.extend(record)
 
     def sample(self, return_info: bool = False) -> Record:
+        if len(self.buffer) < self.buffer._batch_size:
+            raise NotReadyException()
+
         return self.buffer.sample(return_info=return_info)
 
     def sample_n(self, batch_size: int) -> Record:
@@ -55,3 +62,4 @@ def __getstate__(self):
 
     def __setstate__(self, state):
         self.configuration = state
+        self.buffer = self.__make_buffer__()
diff --git a/diploma_thesis/agents/utils/rl/dqn.py b/diploma_thesis/agents/utils/rl/dqn.py
@@ -40,10 +40,12 @@ def configure(self, model: NNModel):
 
         self.target_model = model.clone()
 
-    @filter(lambda self, *args: len(self.memory) > 0)
     def train_step(self, model: NNModel):
-        batch, info = self.memory.sample(return_info=True)
-        batch: Record | torch.Tensor = torch.squeeze(batch)
+        try:
+            batch, info = self.memory.sample(return_info=True)
+            batch: Record | torch.Tensor = torch.squeeze(batch)
+        except NotReadyException:
+            return
 
         with torch.no_grad():
             q_values, td_error = self.estimate_q(model, batch)

diff --git a/diploma_thesis/agents/utils/rl/rl.py b/diploma_thesis/agents/utils/rl/rl.py
@@ -5,7 +5,7 @@
 import torch
 
 from agents.base.model import NNModel
-from agents.utils.memory import Record, Memory
+from agents.utils.memory import Record, Memory, NotReadyException
 from agents.utils.nn import LossCLI, OptimizerCLI
 
 

diff --git a/diploma_thesis/configuration/jsp.yml b/diploma_thesis/configuration/jsp.yml
@@ -1,7 +1,7 @@
 
 task:
   kind: 'multi_task'
-  n_workers: 10
+  n_workers: 8
   debug: False
 
   tasks:
@@ -52,10 +52,16 @@ task:
               mods:
                 # Reserved keywords to build GridFactory resulting a list
                 __factory__:
-                  # RL Agents
+                  # RL Trainers
                   - [
-                    'rl/dqn.yml',
-                    'rl/ddqn.yml'
+                    'rl/trainer/dqn.yml',
+                    'rl/trainer/ddqn.yml'
+                  ]
+                  # RL Agent Kind
+                  - [
+                    'rl/agent/single.yml',
+#                   'rl/agent/multi_agent.yml',
+#                   'rl/agent/centralized.yml'
                   ]
                   # Memory
                   - [

diff --git a/diploma_thesis/configuration/mods/machine_agent/mods/rl/agent/centralized.yml b/diploma_thesis/configuration/mods/machine_agent/mods/rl/agent/centralized.yml
@@ -0,0 +1,3 @@
+kind: 'marl'
+parameters:
+  is_model_distributed: False
diff --git a/diploma_thesis/configuration/mods/machine_agent/mods/rl/agent/multi_agent.yml b/diploma_thesis/configuration/mods/machine_agent/mods/rl/agent/multi_agent.yml
@@ -0,0 +1 @@
+kind: 'marl'
diff --git a/diploma_thesis/configuration/mods/machine_agent/mods/rl/agent/single.yml b/diploma_thesis/configuration/mods/machine_agent/mods/rl/agent/single.yml
@@ -0,0 +1 @@
+kind: 'rl'
diff --git a/...ation/mods/machine_agent/mods/rl/ddqn.yml → ...ds/machine_agent/mods/rl/trainer/ddqn.yml b/...ation/mods/machine_agent/mods/rl/ddqn.yml → ...ds/machine_agent/mods/rl/trainer/ddqn.yml
diff --git a/...ration/mods/machine_agent/mods/rl/dqn.yml → ...ods/machine_agent/mods/rl/trainer/dqn.yml b/...ration/mods/machine_agent/mods/rl/dqn.yml → ...ods/machine_agent/mods/rl/trainer/dqn.yml
diff --git a/diploma_thesis/configuration/simulation.yml b/diploma_thesis/configuration/simulation.yml
@@ -11,7 +11,8 @@ task:
       base_path: 'configuration/mods/machine_agent/model.yml'
       mods: [
         'encoding/mr.yml',
-        'model/model/marl_mr.yml'
+        'model/model/marl_mr.yml',
+        'rl/agent/multi_agent.yml'
       ]
 
 
@@ -27,7 +28,7 @@ task:
 
   tape:
     machine_reward:
-      kind: 'surrogate_tardiness'
+      kind: 'global_decomposed_tardiness'
       parameters:
         span: 256
 

diff --git a/diploma_thesis/tape/machine/surrogate_tardiness_reward.py b/diploma_thesis/tape/machine/surrogate_tardiness_reward.py
@@ -76,6 +76,11 @@ def reward_after_completion(self, contexts: List[Context]):
         )
 
     def __compute_reward__(self, context: Context):
+        # Note: Potentially, it may happen that there was only one job in the queue. In this case the machine doesn't
+        # perform any decision.
+        if context.chosen.numel() == 1:
+            return torch.tensor(0.0)
+
         slack = context.slack
 
         critical_level = 1 - slack / (torch.abs(slack) + self.configuration.critical_level_factor)
@@ -89,9 +94,9 @@ def __compute_reward__(self, context: Context):
         consumed_slack_loser = context.processing_time[context.chosen] * critical_level_loser.mean()
 
         reward_slack = earned_slack_chosen - consumed_slack_loser
-        reward_winq = (
-            context.winq[~context.chosen].mean() - context.winq[context.chosen]
-        ) * self.configuration.winq_factor
+
+        reward_winq = (context.winq[~context.chosen].mean() - context.winq[context.chosen])
+        reward_winq = reward_winq * self.configuration.winq_factor
 
         reward = ((reward_slack + reward_winq) / self.configuration.span).clip(-1, 1)
 

diff --git a/diploma_thesis/tape/queue/machine_queue.py b/diploma_thesis/tape/queue/machine_queue.py
@@ -36,7 +36,7 @@ def clear_all(self):
     @filter(lambda self, context, *args, **kwargs: context.shop_floor.id in self.queue)
     @filter(lambda self, _, __, record: isinstance(record.result, Job))
     def register(self, context: Context, machine: Machine, record: MachineModel.Record):
-        if record.result:
+        if record.result is not None:
             self.queue[context.shop_floor.id][machine.key][record.result.id] = TapeRecord(
                 record=Record(
                     state=record.state,

diff --git a/diploma_thesis/workflow/__init__.py b/diploma_thesis/workflow/__init__.py
@@ -2,4 +2,4 @@
 from .workflow import Workflow
 from .simulation import Simulation
 from .tournament import Tournament
-from .multi_simulation import MultiSimulation
+from .multi_simulation import MultiSimulation
diff --git a/diploma_thesis/workflow/simulation.py b/diploma_thesis/workflow/simulation.py
@@ -187,7 +187,7 @@ def __process_rl_agents__(simulator: Simulator, output_dir: str):
 
         for model, loss_path in zip(models, loss_paths):
             if isinstance(model, RLAgent):
-                loss = model.trainer.loss_record()
+                loss = model.loss_record()
                 path = os.path.join(output_dir, loss_path)
                 loss.to_csv(path, index=True)
                 model.clear_memory()

diff --git a/diploma_thesis/workflow/workflow.py b/diploma_thesis/workflow/workflow.py
@@ -74,7 +74,7 @@ def __add_handlers__(self, logger, formatter, filename: str, log_stdout: bool):
     def __get_logger__(self, name):
         workflow_id = self.workflow_id
 
-        logger = logging.Logger(name + '_' + self.workflow_id if len(workflow_id) > 0 else name)
+        logger = logging.getLogger(name + '_' + self.workflow_id if len(workflow_id) > 0 else name)
         logger.setLevel(logging.INFO)
 
         return logger