From 36963a9f8c28e982cfaac73c5294fe84881245d0 Mon Sep 17 00:00:00 2001
From: Yury Hayeu <ggaev.work@gmail.com>
Date: Tue, 12 Mar 2024 12:31:36 +0100
Subject: [PATCH] Fixes connected with compiled models

---
 diploma_thesis/agents/base/__init__.py        |  4 +
 diploma_thesis/agents/base/marl_agent.py      | 39 +++++++---
 diploma_thesis/agents/base/model.py           |  5 ++
 diploma_thesis/agents/base/rl_agent.py        | 43 ++++++++++-
 diploma_thesis/agents/machine/marl.py         |  9 ++-
 .../agents/machine/model/deep_multi_rule.py   |  2 +-
 .../agents/machine/model/deep_rule.py         |  2 +-
 diploma_thesis/agents/machine/rl.py           |  5 +-
 .../nn/layers/partial_instance_norm_1d.py     |  2 +-
 .../agents/utils/policy/flexible_action.py    | 30 ++++----
 diploma_thesis/agents/utils/policy/policy.py  |  2 +-
 diploma_thesis/agents/utils/rl/ddqn.py        |  4 +-
 diploma_thesis/agents/utils/rl/dqn.py         | 14 +++-
 diploma_thesis/agents/utils/rl/ppo.py         |  2 +-
 diploma_thesis/agents/utils/rl/reinforce.py   |  9 ++-
 diploma_thesis/agents/utils/rl/rl.py          |  3 +
 diploma_thesis/cli.py                         |  1 -
 .../experiments/jsp/dqn_path.yml              | 74 ++++++++-----------
 .../configuration/mods/machine/dqn.yml        |  2 +-
 .../mods/util/infrastructure/compile.yml      |  4 +-
 diploma_thesis/configuration/mods/run/run.yml |  2 +-
 diploma_thesis/configuration/simulation.yml   |  2 +-
 diploma_thesis/simulator/simulator.py         |  1 -
 diploma_thesis/utils/modified.py              |  1 +
 diploma_thesis/workflow/multi_simulation.py   |  5 +-
 diploma_thesis/workflow/simulation.py         |  6 +-
 notebooks/{utils => plot_utils}/__init__.py   | 13 ++--
 notebooks/{utils => plot_utils}/legend.py     |  0
 .../plot_decisions_per_action.py              |  0
 notebooks/{utils => plot_utils}/plot_gantt.py |  0
 .../plot_performance_accross_runs.py          |  1 +
 .../plot_reward_distribution_per_action.py    |  0
 .../plot_reward_model_across_runs.py          |  0
 .../plot_reward_per_run.py                    |  0
 notebooks/{utils => plot_utils}/plot_value.py |  0
 notebooks/plot_utils/plot_value_per_run.py    | 48 ++++++++++++
 notebooks/utils/plot_value_per_run.py         |  0
 37 files changed, 227 insertions(+), 108 deletions(-)
 rename notebooks/{utils => plot_utils}/__init__.py (89%)
 rename notebooks/{utils => plot_utils}/legend.py (100%)
 rename notebooks/{utils => plot_utils}/plot_decisions_per_action.py (100%)
 rename notebooks/{utils => plot_utils}/plot_gantt.py (100%)
 rename notebooks/{utils => plot_utils}/plot_performance_accross_runs.py (99%)
 rename notebooks/{utils => plot_utils}/plot_reward_distribution_per_action.py (100%)
 rename notebooks/{utils => plot_utils}/plot_reward_model_across_runs.py (100%)
 rename notebooks/{utils => plot_utils}/plot_reward_per_run.py (100%)
 rename notebooks/{utils => plot_utils}/plot_value.py (100%)
 create mode 100644 notebooks/plot_utils/plot_value_per_run.py
 delete mode 100644 notebooks/utils/plot_value_per_run.py

diff --git a/diploma_thesis/agents/base/__init__.py b/diploma_thesis/agents/base/__init__.py
index c234374..3162871 100644
--- a/diploma_thesis/agents/base/__init__.py
+++ b/diploma_thesis/agents/base/__init__.py
@@ -5,3 +5,7 @@
 from .state import GraphState, TensorState, Graph
 from .rl_agent import RLAgent
 from .marl_agent import MARLAgent
+
+import torch
+
+torch._dynamo.config.suppress_errors = True
diff --git a/diploma_thesis/agents/base/marl_agent.py b/diploma_thesis/agents/base/marl_agent.py
index 68e021b..cb7dfa6 100644
--- a/diploma_thesis/agents/base/marl_agent.py
+++ b/diploma_thesis/agents/base/marl_agent.py
@@ -8,17 +8,14 @@
 
 class MARLAgent(Generic[Key], RLAgent[Key]):
 
-    def __init__(self,
-                 model: DeepPolicyModel,
-                 state_encoder: StateEncoder,
-                 trainer: RLTrainer,
-                 is_model_distributed: bool):
-        super().__init__(model, state_encoder, trainer)
-
-        self.model: DeepPolicyModel | Dict[Key, DeepPolicyModel] = model
-        self.trainer: RLTrainer | Dict[Key, RLTrainer] = trainer
-        self.is_model_distributed = is_model_distributed
+    def __init__(self, is_model_distributed: bool, *args, **kwargs):
         self.is_configured = False
+
+        super().__init__(*args, **kwargs)
+
+        self.model: DeepPolicyModel | Dict[Key, DeepPolicyModel] = self.model
+        self.trainer: RLTrainer | Dict[Key, RLTrainer] = self.trainer
+        self.is_model_distributed = is_model_distributed
         self.keys = None
 
     @property
@@ -99,6 +96,9 @@ def schedule(self, key: Key, parameters):
         if not self.trainer[key].is_configured:
             self.trainer[key].configure(model.policy)
 
+        if not self.is_compiled:
+            self.compile()
+
         return result
 
     def __model_for_key__(self, key: Key):
@@ -106,3 +106,22 @@ def __model_for_key__(self, key: Key):
             return self.model[key]
 
         return self.model
+
+    def compile(self):
+        if not self.is_configured or self.is_compiled:
+            return
+
+        if not self.configuration.compile:
+            self.is_compiled = True
+            return
+
+        for _, value in self.trainer.items():
+            value.compile()
+
+        if self.is_model_distributed:
+            for _, value in self.model.items():
+                value.compile()
+        else:
+            self.model.compile()
+
+        self.is_compiled = True
diff --git a/diploma_thesis/agents/base/model.py b/diploma_thesis/agents/base/model.py
index 3d7c8e6..9f03935 100644
--- a/diploma_thesis/agents/base/model.py
+++ b/diploma_thesis/agents/base/model.py
@@ -26,6 +26,9 @@ class Record:
     def __call__(self, state: State, parameters: Input) -> Record:
         pass
 
+    def compile(self):
+        pass
+
 
 class DeepPolicyModel(Model[Input, State, Action, Result], PhaseUpdatable, metaclass=ABCMeta):
 
@@ -39,3 +42,5 @@ def update(self, phase: Phase):
 
         self.policy.update(phase)
 
+    def compile(self):
+        self.policy.compile()
diff --git a/diploma_thesis/agents/base/rl_agent.py b/diploma_thesis/agents/base/rl_agent.py
index 4323e9d..854d33b 100644
--- a/diploma_thesis/agents/base/rl_agent.py
+++ b/diploma_thesis/agents/base/rl_agent.py
@@ -6,12 +6,32 @@
 from .agent import *
 from .model import DeepPolicyModel
 
+from dataclasses import dataclass
+
+
+@dataclass
+class Configuration:
+    compile: bool = False
+
+    @staticmethod
+    def from_cli(parameters):
+        return Configuration(
+            compile=parameters.get('compile', False)
+        )
+
 
 class RLAgent(Generic[Key], Agent[Key]):
 
-    def __init__(self, model: DeepPolicyModel, state_encoder: StateEncoder, trainer: RLTrainer):
+    def __init__(self,
+                 model: DeepPolicyModel,
+                 state_encoder: StateEncoder,
+                 trainer: RLTrainer,
+                 configuration: Configuration):
         super().__init__(model, state_encoder)
 
+        self.is_compiled = False
+
+        self.configuration = configuration
         self.model: DeepPolicyModel = model
         self.trainer = trainer
 
@@ -52,4 +72,25 @@ def schedule(self, key, parameters):
         if not self.trainer.is_configured:
             self.trainer.configure(self.model.policy)
 
+        if not self.is_compiled:
+            self.compile()
+
         return result
+
+    def __setstate__(self, state):
+        self.__dict__ = state
+
+        self.compile()
+
+    def compile(self):
+        if not self.configuration.compile:
+            self.is_compiled = True
+            return
+
+        if self.is_compiled:
+            return
+
+        self.trainer.compile()
+        self.model.compile()
+
+        self.is_compiled = True
diff --git a/diploma_thesis/agents/machine/marl.py b/diploma_thesis/agents/machine/marl.py
index 09be9f0..e0f8edb 100644
--- a/diploma_thesis/agents/machine/marl.py
+++ b/diploma_thesis/agents/machine/marl.py
@@ -1,6 +1,6 @@
 from typing import Dict
 
-from agents.base.marl_agent import MARLAgent
+from agents.base.marl_agent import MARLAgent, Configuration
 from agents.utils.rl import from_cli as rl_trainer_from_cli
 from environment import MachineKey, ShopFloor
 from .model import DeepPolicyMachineModel, from_cli as model_from_cli
@@ -19,9 +19,14 @@ def from_cli(parameters: Dict):
         model = model_from_cli(parameters['model'])
         encoder = state_encoder_from_cli(parameters['encoder'])
         trainer = rl_trainer_from_cli(parameters['trainer'])
+        configuration = Configuration.from_cli(parameters)
 
         is_model_distributed = parameters.get('is_model_distributed', True)
 
         assert isinstance(model, DeepPolicyMachineModel), f"Model must conform to NNModel"
 
-        return MARLMachine(model, encoder, trainer, is_model_distributed)
+        return MARLMachine(is_model_distributed=is_model_distributed,
+                           model=model,
+                           state_encoder=encoder,
+                           trainer=trainer,
+                           configuration=configuration)
diff --git a/diploma_thesis/agents/machine/model/deep_multi_rule.py b/diploma_thesis/agents/machine/model/deep_multi_rule.py
index 4835be0..e619664 100644
--- a/diploma_thesis/agents/machine/model/deep_multi_rule.py
+++ b/diploma_thesis/agents/machine/model/deep_multi_rule.py
@@ -18,7 +18,7 @@ def __init__(self, rules: List[SchedulingRule], policy: Policy[MachineInput]):
     def __call__(self, state: State, parameters: Input) -> DeepPolicyMachineModel.Record:
         # No gradient descent based on decision on the moment
         with torch.no_grad():
-            record = self.policy(state, parameters)
+            record = self.policy.select(state, parameters)
             result = self.rules[record.action.item()](parameters.machine, parameters.now)
 
             return DeepPolicyMachineModel.Record(result=result, record=record, batch_size=[])
diff --git a/diploma_thesis/agents/machine/model/deep_rule.py b/diploma_thesis/agents/machine/model/deep_rule.py
index 21749cc..45f4b80 100644
--- a/diploma_thesis/agents/machine/model/deep_rule.py
+++ b/diploma_thesis/agents/machine/model/deep_rule.py
@@ -11,7 +11,7 @@ class DeepRule(DeepPolicyMachineModel):
     def __call__(self, state: State, parameters: Input) -> DeepPolicyMachineModel.Record:
         # No gradient descent based on decision on the moment
         with torch.no_grad():
-            record = self.policy(state, parameters)
+            record = self.policy.select(state, parameters)
             result = parameters.machine.queue[record.action.item()]
 
             return DeepPolicyMachineModel.Record(result=result, record=record, batch_size=[])
diff --git a/diploma_thesis/agents/machine/rl.py b/diploma_thesis/agents/machine/rl.py
index d50ee06..921cbc1 100644
--- a/diploma_thesis/agents/machine/rl.py
+++ b/diploma_thesis/agents/machine/rl.py
@@ -1,6 +1,6 @@
 from typing import Dict
 
-from agents.base.rl_agent import RLAgent
+from agents.base.rl_agent import RLAgent, Configuration
 from agents.utils.rl import from_cli as rl_trainer_from_cli
 from environment import MachineKey
 from .model import DeepPolicyMachineModel, from_cli as model_from_cli
@@ -14,7 +14,8 @@ def from_cli(parameters: Dict):
         model = model_from_cli(parameters['model'])
         encoder = state_encoder_from_cli(parameters['encoder'])
         trainer = rl_trainer_from_cli(parameters['trainer'])
+        configuration = Configuration.from_cli(parameters)
 
         assert isinstance(model, DeepPolicyMachineModel), f"Model must conform to NNModel"
 
-        return RLMachine(model, encoder, trainer)
+        return RLMachine(model, encoder, trainer, configuration)
diff --git a/diploma_thesis/agents/utils/nn/layers/partial_instance_norm_1d.py b/diploma_thesis/agents/utils/nn/layers/partial_instance_norm_1d.py
index 9711a09..89a86a7 100644
--- a/diploma_thesis/agents/utils/nn/layers/partial_instance_norm_1d.py
+++ b/diploma_thesis/agents/utils/nn/layers/partial_instance_norm_1d.py
@@ -11,7 +11,7 @@ def __init__(self, channels: int):
         super().__init__()
 
         self.channels = channels
-        self.norm = nn.InstanceNorm1d(num_features=channels)
+        self.norm = nn.InstanceNorm1d(num_features=1)
 
     def forward(self, batch):
         normalized = batch[:, :self.channels]
diff --git a/diploma_thesis/agents/utils/policy/flexible_action.py b/diploma_thesis/agents/utils/policy/flexible_action.py
index b050916..180b8fa 100644
--- a/diploma_thesis/agents/utils/policy/flexible_action.py
+++ b/diploma_thesis/agents/utils/policy/flexible_action.py
@@ -44,21 +44,7 @@ def __get_values__(self, state):
     def __get_actions__(self, state):
         return self.action_model(state)
 
-    def forward(self, state: State, parameters: Input) -> Record:
-        values, actions = self.predict(state)
-        values, actions = values.squeeze(), actions.squeeze()
-        action, policy = self.action_selector(actions)
-        action = action if torch.is_tensor(action) else torch.tensor(action, dtype=torch.long)
-
-        info = TensorDict({
-            "policy": policy,
-            "values": values.detach().clone(),
-            "actions": actions.detach().clone()
-        }, batch_size=[])
-
-        return Record(state, action, info, batch_size=[])
-
-    def predict(self, state: State):
+    def forward(self, state: State):
         actions = torch.tensor(0, dtype=torch.long)
 
         if self.action_model is not None:
@@ -77,6 +63,20 @@ def predict(self, state: State):
             case _:
                 raise ValueError(f"Policy estimation method {self.policy_estimation_method} is not supported")
 
+    def select(self, state: State, parameters: Input) -> Record:
+        values, actions = self.__call__(state)
+        values, actions = values.squeeze(), actions.squeeze()
+        action, policy = self.action_selector(actions)
+        action = action if torch.is_tensor(action) else torch.tensor(action, dtype=torch.long)
+
+        info = TensorDict({
+            "policy": policy,
+            "values": values.detach().clone(),
+            "actions": actions.detach().clone()
+        }, batch_size=[])
+
+        return Record(state, action, info, batch_size=[])
+
     def __configure__(self):
         if self.noise_parameters is not None:
             self.action_model.to_noisy(self.noise_parameters)
diff --git a/diploma_thesis/agents/utils/policy/policy.py b/diploma_thesis/agents/utils/policy/policy.py
index 8384f3b..13d394b 100644
--- a/diploma_thesis/agents/utils/policy/policy.py
+++ b/diploma_thesis/agents/utils/policy/policy.py
@@ -28,7 +28,7 @@ class Record:
 class Policy(Generic[Input], nn.Module, PhaseUpdatable, metaclass=ABCMeta):
 
     @abstractmethod
-    def predict(self, state: State) -> Tuple[torch.FloatTensor, torch.FloatTensor]:
+    def select(self, state, parameters):
         pass
 
     def clone(self):
diff --git a/diploma_thesis/agents/utils/rl/ddqn.py b/diploma_thesis/agents/utils/rl/ddqn.py
index 53e5ce3..0b20f37 100644
--- a/diploma_thesis/agents/utils/rl/ddqn.py
+++ b/diploma_thesis/agents/utils/rl/ddqn.py
@@ -9,12 +9,12 @@
 class DoubleDeepQTrainer(DeepQTrainer):
 
     def estimate_q(self, model: Policy, batch: Record | tensordict.TensorDictBase):
-        _, actions = model.predict(batch.next_state)
+        _, actions = model(batch.next_state)
         orig_q = actions[range(batch.shape[0]), batch.action]
 
         best_actions = actions.max(dim=-1).indices
 
-        target = self.target_model.predict(batch.next_state)[1][range(batch.shape[0]), best_actions]
+        target = self.target_model(batch.next_state)[1][range(batch.shape[0]), best_actions]
 
         q = batch.reward + self.return_estimator.discount_factor * target * (1 - batch.done.int())
         actions[range(batch.shape[0]), batch.action] = q
diff --git a/diploma_thesis/agents/utils/rl/dqn.py b/diploma_thesis/agents/utils/rl/dqn.py
index d3ba1ec..529478c 100644
--- a/diploma_thesis/agents/utils/rl/dqn.py
+++ b/diploma_thesis/agents/utils/rl/dqn.py
@@ -19,7 +19,7 @@ class Configuration:
         def from_cli(parameters: Dict):
             return DeepQTrainer.Configuration(
                 decay=parameters.get('decay', 0.99),
-                update_steps=parameters.get('update_steps', 100),
+                update_steps=parameters.get('update_steps', 20),
                 prior_eps=parameters.get('prior_eps', 1e-6)
             )
 
@@ -43,7 +43,7 @@ def __train__(self, model: Policy):
         with torch.no_grad():
             q_values, td_error = self.estimate_q(model, batch)
 
-        _, actions = model.predict(batch.state)
+        _, actions = model(batch.state)
         loss = self.loss(actions, q_values)
 
         self.optimizer.zero_grad()
@@ -64,10 +64,10 @@ def estimate_q(self, model: Policy, batch: Record | tensordict.TensorDictBase):
         # Note:
         # The idea is that we compute the Q-values only for performed actions. Other actions wouldn't be updated,
         # because there will be zero loss and so zero gradient
-        _, actions = model.predict(batch.next_state)
+        _, actions = model(batch.next_state)
         orig_q = actions.clone()[range(batch.shape[0]), batch.action]
 
-        _, target = self.target_model.predict(batch.next_state)
+        _, target = self.target_model(batch.next_state)
         target = target.max(dim=1).values
 
         q = batch.reward + self.return_estimator.discount_factor * target * (1 - batch.done.int())
@@ -81,6 +81,12 @@ def estimate_q(self, model: Policy, batch: Record | tensordict.TensorDictBase):
     def target_model(self):
         return self._target_model.module
 
+    def compile(self):
+        if not self.is_configured:
+            return
+
+        self.target_model.compile()
+
     @classmethod
     def from_cli(cls,
                  parameters,
diff --git a/diploma_thesis/agents/utils/rl/ppo.py b/diploma_thesis/agents/utils/rl/ppo.py
index 713c3e8..239aa51 100644
--- a/diploma_thesis/agents/utils/rl/ppo.py
+++ b/diploma_thesis/agents/utils/rl/ppo.py
@@ -64,7 +64,7 @@ def __train__(self, model: Policy):
 
     def __step__(self, batch: Record, model: Policy):
         advantages = batch.info[Record.ADVANTAGE_KEY]
-        value, logits = model.predict(batch.state)
+        value, logits = model(batch.state)
         value = value[torch.arange(batch.shape[0]), batch.action]
         distribution = torch.distributions.Categorical(logits=logits)
 
diff --git a/diploma_thesis/agents/utils/rl/reinforce.py b/diploma_thesis/agents/utils/rl/reinforce.py
index 2266a4c..890c1a3 100644
--- a/diploma_thesis/agents/utils/rl/reinforce.py
+++ b/diploma_thesis/agents/utils/rl/reinforce.py
@@ -64,7 +64,7 @@ def __train__(self, model: Policy):
                 baseline = torch.squeeze(baseline)
 
         # Perform policy step
-        loss = self.loss(model.predict(batch.state)[1], batch.action)
+        loss = self.loss(model(batch.state)[1], batch.action)
 
         if loss.numel() == 1:
             raise ValueError('Loss should not have reduction to single value')
@@ -90,6 +90,13 @@ def __train__(self, model: Policy):
             critic.optimizer.step()
             self.record_loss(critic_loss, key=f'critic_{index}')
 
+    def compile(self):
+        if not self.is_configured:
+            return
+
+        for critic in self.critics:
+            critic.neural_network.compile()
+
     @property
     def critics(self):
         return self.configuration.critics
diff --git a/diploma_thesis/agents/utils/rl/rl.py b/diploma_thesis/agents/utils/rl/rl.py
index 195a167..606c8c0 100644
--- a/diploma_thesis/agents/utils/rl/rl.py
+++ b/diploma_thesis/agents/utils/rl/rl.py
@@ -90,6 +90,9 @@ def store(self, sample: TrainingSample, model: Policy):
 
         self.__train__(model)
 
+    def compile(self):
+        pass
+
     def clear(self):
         self.loss_cache = []
         self.storage.clear()
diff --git a/diploma_thesis/cli.py b/diploma_thesis/cli.py
index ea5efcc..49536bc 100644
--- a/diploma_thesis/cli.py
+++ b/diploma_thesis/cli.py
@@ -2,7 +2,6 @@
 import argparse
 from typing import Dict
 
-import torch._dynamo
 import yaml
 
 from workflow import Workflow, Simulation, Tournament, MultiSimulation
diff --git a/diploma_thesis/configuration/experiments/jsp/dqn_path.yml b/diploma_thesis/configuration/experiments/jsp/dqn_path.yml
index 4814d5e..8b271e6 100644
--- a/diploma_thesis/configuration/experiments/jsp/dqn_path.yml
+++ b/diploma_thesis/configuration/experiments/jsp/dqn_path.yml
@@ -1,6 +1,11 @@
 # Evaluate the effectivenes of basic DQNs on the JSP environment
 
-template: &template 'reference/marl_indirect'
+template: &template 'reference/marl_direct'
+
+default_mods: &default_mods
+  - 'util/optimizer/grad_norm.yml'
+  - 'util/rules/all_rules.yml'
+#  - 'util/infrastructure/compile.yml'
 
 ###############################################################################################
 
@@ -8,16 +13,14 @@ dqn_1: &dqn_1
   base_path: 'configuration/mods/machine/dqn.yml'
   template: *template
   mods:
-    - 'util/optimizer/grad_norm.yml'
-    - 'util/rules/all_rules.yml'
+    *default_mods
 
 marl_dqn_1: &marl_dqn_1
   base_path: 'configuration/mods/machine/dqn.yml'
   template: *template
   mods:
     - 'util/agent/multi_agent.yml'
-    - 'util/optimizer/grad_norm.yml'
-    - 'util/rules/all_rules.yml'
+    - *default_mods
 
 marl_dqn_centralized_1: &centralized_dqn_1
   base_path: 'configuration/mods/machine/dqn.yml'
@@ -25,8 +28,7 @@ marl_dqn_centralized_1: &centralized_dqn_1
   mods:
     - 'util/agent/centralized.yml'
     - 'util/optimizer/lr_0001.yml'
-    - 'util/optimizer/grad_norm.yml'
-    - 'util/rules/all_rules.yml'
+    - *default_mods
 
 ###############################################################################################
 
@@ -35,8 +37,7 @@ dqn_2: &dqn_2
   template: *template
   mods:
     - 'agent/dqn/ddqn.yml'
-    - 'util/optimizer/grad_norm.yml'
-    - 'util/rules/all_rules.yml'
+    - *default_mods
 
 marl_dqn_2: &marl_dqn_2
   base_path: 'configuration/mods/machine/dqn.yml'
@@ -44,8 +45,7 @@ marl_dqn_2: &marl_dqn_2
   mods:
     - 'agent/dqn/ddqn.yml'
     - 'util/agent/multi_agent.yml'
-    - 'util/optimizer/grad_norm.yml'
-    - 'util/rules/all_rules.yml'
+    - *default_mods
 
 marl_dqn_centralized_2: &centralized_dqn_2
   base_path: 'configuration/mods/machine/dqn.yml'
@@ -54,8 +54,7 @@ marl_dqn_centralized_2: &centralized_dqn_2
     - 'agent/dqn/ddqn.yml'
     - 'util/agent/centralized.yml'
     - 'util/optimizer/lr_0001.yml'
-    - 'util/optimizer/grad_norm.yml'
-    - 'util/rules/all_rules.yml'
+    - *default_mods
 
   ###############################################################################################
 
@@ -64,8 +63,7 @@ dqn_3: &dqn_3
   template: *template
   mods:
     - 'agent/dqn/dueling.yml'
-    - 'util/optimizer/grad_norm.yml'
-    - 'util/rules/all_rules.yml'
+    - *default_mods
 
 marl_dqn_3: &marl_dqn_3
   base_path: 'configuration/mods/machine/dqn.yml'
@@ -73,8 +71,7 @@ marl_dqn_3: &marl_dqn_3
   mods:
     - 'agent/dqn/dueling.yml'
     - 'util/agent/multi_agent.yml'
-    - 'util/optimizer/grad_norm.yml'
-    - 'util/rules/all_rules.yml'
+    - *default_mods
 
 marl_dqn_centralized_3: &centralized_dqn_3
   base_path: 'configuration/mods/machine/dqn.yml'
@@ -83,8 +80,7 @@ marl_dqn_centralized_3: &centralized_dqn_3
     - 'agent/dqn/dueling.yml'
     - 'util/agent/centralized.yml'
     - 'util/optimizer/lr_0001.yml'
-    - 'util/optimizer/grad_norm.yml'
-    - 'util/rules/all_rules.yml'
+    - *default_mods
 
 ###############################################################################################
 
@@ -93,8 +89,7 @@ dqn_4: &dqn_4
   template: *template
   mods:
     - 'agent/dqn/steps/3.yml'
-    - 'util/optimizer/grad_norm.yml'
-    - 'util/rules/all_rules.yml'
+    - *default_mods
 
 marl_dqn_4: &marl_dqn_4
   base_path: 'configuration/mods/machine/dqn.yml'
@@ -102,8 +97,7 @@ marl_dqn_4: &marl_dqn_4
   mods:
     - 'agent/dqn/steps/3.yml'
     - 'util/agent/multi_agent.yml'
-    - 'util/optimizer/grad_norm.yml'
-    - 'util/rules/all_rules.yml'
+    - *default_mods
 
 marl_dqn_centralized_4: &centralized_dqn_4
   base_path: 'configuration/mods/machine/dqn.yml'
@@ -112,8 +106,7 @@ marl_dqn_centralized_4: &centralized_dqn_4
     - 'agent/dqn/steps/3.yml'
     - 'util/agent/centralized.yml'
     - 'util/optimizer/lr_0001.yml'
-    - 'util/optimizer/grad_norm.yml'
-    - 'util/rules/all_rules.yml'
+    - *default_mods
 
 ###############################################################################################
 
@@ -122,8 +115,7 @@ dqn_5: &dqn_5
   template: *template
   mods:
     - 'agent/dqn/prioritized.yml'
-    - 'util/optimizer/grad_norm.yml'
-    - 'util/rules/all_rules.yml'
+    - *default_mods
 
 marl_dqn_5: &marl_dqn_5
   base_path: 'configuration/mods/machine/dqn.yml'
@@ -131,8 +123,7 @@ marl_dqn_5: &marl_dqn_5
   mods:
     - 'agent/dqn/prioritized.yml'
     - 'util/agent/multi_agent.yml'
-    - 'util/optimizer/grad_norm.yml'
-    - 'util/rules/all_rules.yml'
+    - *default_mods
 
 marl_dqn_centralized_5: &centralized_dqn_5
   base_path: 'configuration/mods/machine/dqn.yml'
@@ -141,8 +132,7 @@ marl_dqn_centralized_5: &centralized_dqn_5
     - 'agent/dqn/prioritized.yml'
     - 'util/agent/centralized.yml'
     - 'util/optimizer/lr_0001.yml'
-    - 'util/optimizer/grad_norm.yml'
-    - 'util/rules/all_rules.yml'
+    - *default_mods
 
 ###############################################################################################
 
@@ -151,9 +141,8 @@ dqn_6: &dqn_6
   template: *template
   mods:
     - 'agent/dqn/noisy.yml'
-    - 'util/optimizer/grad_norm.yml'
-    - 'util/rules/all_rules.yml'
     - 'util/action_selector/sample.yml'
+    - *default_mods
 
 marl_dqn_6: &marl_dqn_6
   base_path: 'configuration/mods/machine/dqn.yml'
@@ -161,8 +150,7 @@ marl_dqn_6: &marl_dqn_6
   mods:
     - 'agent/dqn/noisy.yml'
     - 'util/agent/multi_agent.yml'
-    - 'util/optimizer/grad_norm.yml'
-    - 'util/rules/all_rules.yml'
+    - *default_mods
     - 'util/action_selector/sample.yml'
 
 marl_dqn_centralized_6: &centralized_dqn_6
@@ -172,8 +160,7 @@ marl_dqn_centralized_6: &centralized_dqn_6
     - 'agent/dqn/noisy.yml'
     - 'util/agent/centralized.yml'
     - 'util/optimizer/lr_0001.yml'
-    - 'util/optimizer/grad_norm.yml'
-    - 'util/rules/all_rules.yml'
+    - *default_mods
     - 'util/action_selector/sample.yml'
 
 ###############################################################################################
@@ -187,8 +174,7 @@ dqn_7: &dqn_7
     - 'agent/dqn/steps/3.yml'
     - 'agent/dqn/dueling.yml'
     - 'agent/dqn/noisy.yml'
-    - 'util/optimizer/grad_norm.yml'
-    - 'util/rules/all_rules.yml'
+    - *default_mods
     - 'util/action_selector/sample.yml'
 
 marl_dqn_7: &marl_dqn_7
@@ -201,8 +187,7 @@ marl_dqn_7: &marl_dqn_7
     - 'agent/dqn/dueling.yml'
     - 'agent/dqn/noisy.yml'
     - 'util/agent/multi_agent.yml'
-    - 'util/optimizer/grad_norm.yml'
-    - 'util/rules/all_rules.yml'
+    - *default_mods
     - 'util/action_selector/sample.yml'
 
 marl_dqn_centralized_7: &centralized_dqn_7
@@ -216,8 +201,7 @@ marl_dqn_centralized_7: &centralized_dqn_7
     - 'agent/dqn/noisy.yml'
     - 'util/agent/centralized.yml'
     - 'util/optimizer/lr_0001.yml'
-    - 'util/optimizer/grad_norm.yml'
-    - 'util/rules/all_rules.yml'
+    - *default_mods
     - 'util/action_selector/sample.yml'
 
 ###############################################################################################
@@ -287,10 +271,10 @@ short_single_source_run: &short_single_source_run
 
 task:
   kind: 'multi_task'
-  n_workers: 4
+  n_workers: 8
   debug: False
   store_run_statistics: False
-  output_dir: 'results/jsp/experiments/dqn_path/marl_indirect'
+  output_dir: 'results/jsp/experiments/dqn_path/marl_direct'
 
   tasks:
     # TD - n-step short single source
diff --git a/diploma_thesis/configuration/mods/machine/dqn.yml b/diploma_thesis/configuration/mods/machine/dqn.yml
index 5156dfd..5dab3b5 100644
--- a/diploma_thesis/configuration/mods/machine/dqn.yml
+++ b/diploma_thesis/configuration/mods/machine/dqn.yml
@@ -54,7 +54,7 @@ parameters:
         kind: 'replay'
         parameters:
           size: 8192
-          batch_size: 256
+          batch_size: 128
           prefetch: 8
 
       loss:
diff --git a/diploma_thesis/configuration/mods/machine/mods/util/infrastructure/compile.yml b/diploma_thesis/configuration/mods/machine/mods/util/infrastructure/compile.yml
index 9ded62b..26421c1 100644
--- a/diploma_thesis/configuration/mods/machine/mods/util/infrastructure/compile.yml
+++ b/diploma_thesis/configuration/mods/machine/mods/util/infrastructure/compile.yml
@@ -1,5 +1,3 @@
 
 parameters:
-  model:
-    parameters:
-      compile: True
\ No newline at end of file
+  compile: True
diff --git a/diploma_thesis/configuration/mods/run/run.yml b/diploma_thesis/configuration/mods/run/run.yml
index 058ad8c..fbadbb4 100644
--- a/diploma_thesis/configuration/mods/run/run.yml
+++ b/diploma_thesis/configuration/mods/run/run.yml
@@ -11,7 +11,7 @@ parameters:
 
   machine_train_schedule:
     pretrain_steps: 10
-    train_interval: 100
+    train_interval: 50
     max_training_steps: 100000000
 
   work_center_train_schedule:
diff --git a/diploma_thesis/configuration/simulation.yml b/diploma_thesis/configuration/simulation.yml
index 455eec8..ec47108 100644
--- a/diploma_thesis/configuration/simulation.yml
+++ b/diploma_thesis/configuration/simulation.yml
@@ -18,7 +18,7 @@ task:
 #        - 'util/agent/multi_agent.yml'
         - 'util/rules/all_rules.yml'
         - 'util/optimizer/grad_norm.yml'
-#        - 'util/agent/multi_agent.yml'
+        - 'util/agent/multi_agent.yml'
         - 'util/infrastructure/compile.yml'
 
   work_center_agent:
diff --git a/diploma_thesis/simulator/simulator.py b/diploma_thesis/simulator/simulator.py
index 7275b03..c70b53f 100644
--- a/diploma_thesis/simulator/simulator.py
+++ b/diploma_thesis/simulator/simulator.py
@@ -3,7 +3,6 @@
 from dataclasses import field
 from typing import Callable, List
 
-import gc
 import simpy
 import torch
 from tensordict.prototype import tensorclass
diff --git a/diploma_thesis/utils/modified.py b/diploma_thesis/utils/modified.py
index 2778b0d..48114ea 100644
--- a/diploma_thesis/utils/modified.py
+++ b/diploma_thesis/utils/modified.py
@@ -16,6 +16,7 @@ def modified(parameters):
         template = __load_template__(parameters, base_path)
 
     mods = parameters['mods']
+    mods = reduce(lambda x, y: x + y if isinstance(y, list) else x + [y], mods, [])
 
     mods_dir = os.path.dirname(base_path)
     mods_dir = os.path.join(mods_dir, 'mods')
diff --git a/diploma_thesis/workflow/multi_simulation.py b/diploma_thesis/workflow/multi_simulation.py
index 82ba080..d111bc3 100644
--- a/diploma_thesis/workflow/multi_simulation.py
+++ b/diploma_thesis/workflow/multi_simulation.py
@@ -45,7 +45,10 @@ def __merge__(key, lhs, rhs):
 
         n_workers = self.parameters.get('n_workers', -1)
 
-        Parallel(n_jobs=n_workers)(delayed(__run__)(s) for s in parameters)
+        torch.set_num_threads(n_workers)
+        torch.set_num_interop_threads(n_workers)
+
+        Parallel(n_jobs=n_workers, backend='loky')(delayed(__run__)(s) for s in parameters)
 
     def __fetch_tasks__(self):
         result: [Dict] = []
diff --git a/diploma_thesis/workflow/simulation.py b/diploma_thesis/workflow/simulation.py
index 377a95c..b8f3c6f 100644
--- a/diploma_thesis/workflow/simulation.py
+++ b/diploma_thesis/workflow/simulation.py
@@ -11,16 +11,12 @@
 from agents.base.rl_agent import RLAgent
 from simulator import from_cli as simulator_from_cli, Simulator, RewardCache
 from simulator import run_configuration_from_cli, evaluate_configuration_from_cli
-from simulator.tape import TapeModel
 from simulator.graph import GraphModel
+from simulator.tape import TapeModel
 from utils import save
 from .workflow import Workflow
 
 
-torch.set_num_threads(1)
-torch._dynamo.config.suppress_errors = True
-
-
 class Simulation(Workflow):
 
     def __init__(self, parameters: Dict):
diff --git a/notebooks/utils/__init__.py b/notebooks/plot_utils/__init__.py
similarity index 89%
rename from notebooks/utils/__init__.py
rename to notebooks/plot_utils/__init__.py
index 1d8bd36..12e45c5 100644
--- a/notebooks/utils/__init__.py
+++ b/notebooks/plot_utils/__init__.py
@@ -1,9 +1,8 @@
-
-
-from .plot_value import plot_value
-from .plot_reward_per_run import plot_reward_per_run
-from .plot_reward_distribution_per_action import plot_reward_distribution_per_action
-from .plot_reward_model_across_runs import plot_reward_per_model_across_runs
+from .plot_decisions_per_action import plot_decisions_per_action
 from .plot_gantt import plot_gantt
 from .plot_performance_accross_runs import plot_performance_across_runs
-from .plot_decisions_per_action import plot_decisions_per_action
+from .plot_reward_distribution_per_action import plot_reward_distribution_per_action
+from .plot_reward_model_across_runs import plot_reward_per_model_across_runs
+from .plot_reward_per_run import plot_reward_per_run
+from .plot_value import plot_value
+from .plot_value_per_run import plot_value_per_run
diff --git a/notebooks/utils/legend.py b/notebooks/plot_utils/legend.py
similarity index 100%
rename from notebooks/utils/legend.py
rename to notebooks/plot_utils/legend.py
diff --git a/notebooks/utils/plot_decisions_per_action.py b/notebooks/plot_utils/plot_decisions_per_action.py
similarity index 100%
rename from notebooks/utils/plot_decisions_per_action.py
rename to notebooks/plot_utils/plot_decisions_per_action.py
diff --git a/notebooks/utils/plot_gantt.py b/notebooks/plot_utils/plot_gantt.py
similarity index 100%
rename from notebooks/utils/plot_gantt.py
rename to notebooks/plot_utils/plot_gantt.py
diff --git a/notebooks/utils/plot_performance_accross_runs.py b/notebooks/plot_utils/plot_performance_accross_runs.py
similarity index 99%
rename from notebooks/utils/plot_performance_accross_runs.py
rename to notebooks/plot_utils/plot_performance_accross_runs.py
index 256619b..d2910b5 100644
--- a/notebooks/utils/plot_performance_accross_runs.py
+++ b/notebooks/plot_utils/plot_performance_accross_runs.py
@@ -4,6 +4,7 @@
 
 from .legend import add_legend
 
+
 def plot_performance_across_runs(data, info):
     metric = info['metric']
     group = info['group']
diff --git a/notebooks/utils/plot_reward_distribution_per_action.py b/notebooks/plot_utils/plot_reward_distribution_per_action.py
similarity index 100%
rename from notebooks/utils/plot_reward_distribution_per_action.py
rename to notebooks/plot_utils/plot_reward_distribution_per_action.py
diff --git a/notebooks/utils/plot_reward_model_across_runs.py b/notebooks/plot_utils/plot_reward_model_across_runs.py
similarity index 100%
rename from notebooks/utils/plot_reward_model_across_runs.py
rename to notebooks/plot_utils/plot_reward_model_across_runs.py
diff --git a/notebooks/utils/plot_reward_per_run.py b/notebooks/plot_utils/plot_reward_per_run.py
similarity index 100%
rename from notebooks/utils/plot_reward_per_run.py
rename to notebooks/plot_utils/plot_reward_per_run.py
diff --git a/notebooks/utils/plot_value.py b/notebooks/plot_utils/plot_value.py
similarity index 100%
rename from notebooks/utils/plot_value.py
rename to notebooks/plot_utils/plot_value.py
diff --git a/notebooks/plot_utils/plot_value_per_run.py b/notebooks/plot_utils/plot_value_per_run.py
new file mode 100644
index 0000000..42b30d2
--- /dev/null
+++ b/notebooks/plot_utils/plot_value_per_run.py
@@ -0,0 +1,48 @@
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+
+from .legend import add_legend
+
+
+def plot_value_per_run(path: str | dict, info: dict, make_run_path, post_process_fn=lambda a: a):
+    if not isinstance(path, dict):
+        path = dict(first=path)
+
+    fig, ax = plt.subplots(figsize=info.get('figsize', (8, 8)))
+
+    max_values_len = 0
+
+    for name, data_path in path.items():
+        run = 1
+
+        values = []
+
+        while True:
+            run_path = make_run_path(data_path, run)
+
+            run += 1
+
+            try:
+                df = pd.read_csv(run_path)
+
+                df = df.sort_values(by=info['index'])
+                df.set_index(info['index'], inplace=True)
+
+                values += [post_process_fn(df[info['column']], run)]
+            except:
+                break
+
+        ax.plot(np.arange(len(values)), np.array(values), marker=info['marker'], label=name)
+
+        max_values_len = max(max_values_len, len(values))
+
+    ax.grid(True)
+    ax.set_title(info['title'])
+    ax.set_xlabel(info['xlabel'])
+    ax.set_ylabel(info['ylabel'])
+    ax.set_xticks(np.arange(max_values_len))
+
+    add_legend(ax, info)
+
+    return fig
diff --git a/notebooks/utils/plot_value_per_run.py b/notebooks/utils/plot_value_per_run.py
deleted file mode 100644
index e69de29..0000000