diff --git a/diploma_thesis/agents/base/state.py b/diploma_thesis/agents/base/state.py
index c0472414..6bfe1185 100644
--- a/diploma_thesis/agents/base/state.py
+++ b/diploma_thesis/agents/base/state.py
@@ -93,9 +93,11 @@ def to_pyg_graph(self) -> pyg.data.Data | pyg.data.HeteroData:
             if isinstance(key, str):
                 data[key] = self.data[key]
 
+            # Nested
             if len(key) == 2:
                 data[key[0]][key[1]] = self.data[key]
 
+            # Edge
             if len(key) == 4:
                 data[key[:3]][key[3]] = self.data[key]
 
diff --git a/diploma_thesis/agents/machine/state/custom_encoder_v2.py b/diploma_thesis/agents/machine/state/custom_encoder_v2.py
index 9c4ef5e0..284260cd 100644
--- a/diploma_thesis/agents/machine/state/custom_encoder_v2.py
+++ b/diploma_thesis/agents/machine/state/custom_encoder_v2.py
@@ -72,7 +72,7 @@ def __encode__(self, parameters: StateEncoder.Input) -> State:
                 completion_rate.view(-1),
                 critical_ratios.view(-1),
 
-                placeholder + wait_times,
+                placeholder + wait_times / self.norm_factor,
                 placeholder + machine_util_rate,
                 placeholder + arriving_jobs,
                 placeholder + will_arrive_jobs,
diff --git a/diploma_thesis/agents/machine/state/deep_marl_indirect.py b/diploma_thesis/agents/machine/state/deep_marl_indirect.py
index 5ae7ab01..da772b1f 100644
--- a/diploma_thesis/agents/machine/state/deep_marl_indirect.py
+++ b/diploma_thesis/agents/machine/state/deep_marl_indirect.py
@@ -7,7 +7,7 @@
 from environment import JobReductionStrategy
 from .encoder import StateEncoder
 
-# TODO: - Update normalized state
+
 
 class DEEPMARLIndirectStateEncoder(StateEncoder):
 
diff --git a/diploma_thesis/agents/utils/nn/loss.py b/diploma_thesis/agents/utils/nn/loss.py
index f6e1ddcd..71dd6882 100644
--- a/diploma_thesis/agents/utils/nn/loss.py
+++ b/diploma_thesis/agents/utils/nn/loss.py
@@ -13,7 +13,7 @@ class Configuration:
 
     def __init__(self, configuration: Configuration):
         super().__init__()
-        
+
         self.configuration = configuration
         self.loss = self.__make_loss__()
 
diff --git a/diploma_thesis/agents/utils/return_estimator/gae.py b/diploma_thesis/agents/utils/return_estimator/gae.py
index b3858838..a1f33d6c 100644
--- a/diploma_thesis/agents/utils/return_estimator/gae.py
+++ b/diploma_thesis/agents/utils/return_estimator/gae.py
@@ -37,8 +37,6 @@ def update_returns(self, records: List[Record]) -> List[Record]:
             records[i].info[Record.ADVANTAGE_KEY] = coef ** i * advantage + next_advantage
             records[i].info[Record.RETURN_KEY] = records[i].info[Record.ADVANTAGE_KEY] + value
 
-
-
         return records[:-1]
 
     @staticmethod
diff --git a/diploma_thesis/agents/utils/return_estimator/n_step.py b/diploma_thesis/agents/utils/return_estimator/n_step.py
index 93c30207..0d13416e 100644
--- a/diploma_thesis/agents/utils/return_estimator/n_step.py
+++ b/diploma_thesis/agents/utils/return_estimator/n_step.py
@@ -37,6 +37,8 @@ def __init__(self, configuration: Configuration):
 
         self.configuration = configuration
 
+        if self.configuration.n > 1:
+            self.value_fetch_method = ValueFetchMethod.ACTION
 
     @property
     def discount_factor(self) -> float:
@@ -75,7 +77,7 @@ def update_returns(self, records: List[Record]) -> List[Record]:
                 off_policy_weights += [1]
 
         for i in range(len(records)):
-            g = records[i].info[Record.VALUE_KEY]
+            g = self.get_value(records[i])
             n = min(self.configuration.n, len(records) - i)
 
             weights = off_policy_weights[i:i+n]
diff --git a/diploma_thesis/agents/utils/rl/rl.py b/diploma_thesis/agents/utils/rl/rl.py
index ab16ce3d..6644988e 100644
--- a/diploma_thesis/agents/utils/rl/rl.py
+++ b/diploma_thesis/agents/utils/rl/rl.py
@@ -105,7 +105,7 @@ def __train_step__(self, model: Policy):
 
         torch.cuda.empty_cache()
 
-        print(f'Train step: { time.time() - start } Optimizer Step: { self.optimizer.step_count }')
+        print(f'Train step: { time.time() - start } Optimizer Step: { self.optimizer.step_count } Learning Rate: { self.optimizer.learning_rate}')
 
     def clear(self):
         self.loss_cache = []
diff --git a/diploma_thesis/agents/utils/rl/storage.py b/diploma_thesis/agents/utils/rl/storage.py
index 98e3af2a..6f0087f0 100644
--- a/diploma_thesis/agents/utils/rl/storage.py
+++ b/diploma_thesis/agents/utils/rl/storage.py
@@ -27,9 +27,9 @@ def store(self, sample: TrainingSample):
             record.info['episode'] = sample.episode_id
 
             # Remove extra fields for training record
-            if record.state.graph is not None and Graph.JOB_INDEX_MAP in record.state.graph.data.keys():
-                del record.state.graph.data[Graph.JOB_INDEX_MAP]
-                del record.next_state.graph.data[Graph.JOB_INDEX_MAP]
+            for graph in [record.state.graph, record.next_state.graph]:
+                if graph is not None and Graph.JOB_INDEX_MAP in graph.data.keys():
+                    del graph.data[Graph.JOB_INDEX_MAP]
 
         if self.is_episodic:
             self.memory.store([records])
diff --git a/diploma_thesis/configuration/experiments/jsp/GRAPH-BEST/experiments/1/0/experiment.yml b/diploma_thesis/configuration/experiments/jsp/GRAPH-BEST/experiments/1/0/experiment.yml
new file mode 100644
index 00000000..f79aaaab
--- /dev/null
+++ b/diploma_thesis/configuration/experiments/jsp/GRAPH-BEST/experiments/1/0/experiment.yml
@@ -0,0 +1,207 @@
+# Evaluate the effectivenes of basic DQNs on the JSP environment
+base_model: &base_model 'configuration/experiments/jsp/GRAPH-BEST/experiments/1/flexible_machine.yml'
+
+
+default_mods: &default_mods
+  - 'util/train_schedule/on_store_256.yml'
+  - 'agent/dqn/ddqn.yml'
+#  - 'agent/dqn/dueling.yml'
+  - 'agent/dqn/prioritized.yml'
+#  - 'agent/dqn/steps/3.yml'
+  - 'util/optimizer/adam_stationary.yml'
+  - 'util/optimizer/grad_norm.yml'
+#  - 'util/infrastructure/cuda.yml'
+
+###############################################################################################
+
+dqn_1: &dqn_1
+  base_path: *base_model
+  template: 'custom_gat'
+  mod_dirs:
+    - 'configuration/mods/machine/mods'
+  mods:
+    - *default_mods
+
+
+###############################################################################################
+
+dqn_2: &dqn_2
+  base_path: *base_model
+  template: 'custom_gin'
+  mod_dirs:
+    - 'configuration/mods/machine/mods'
+  mods:
+    - *default_mods
+
+###############################################################################################
+
+dqn_3: &dqn_3
+  base_path: *base_model
+  template: 'hierarchical_gin'
+  mod_dirs:
+    - 'configuration/mods/machine/mods'
+  mods:
+    - *default_mods
+
+###############################################################################################
+
+dqn_4: &dqn_4
+  base_path: *base_model
+  template: 'hierarchical_gat'
+  mod_dirs:
+    - 'configuration/mods/machine/mods'
+  mods:
+    - *default_mods
+
+###############################################################################################
+
+reward: &reward
+  - kind: 'surrogate_tardiness'
+    parameters:
+      winq_factor: 0.2
+      span: 80
+      critical_level_factor: 64
+
+##############################################################################################
+
+# 8 runs
+
+long_single_source_run: &long_single_source_run
+  parameters:
+    mods:
+      __inout_factory__:
+        - [ ['utilization/90.yml' ]]
+    nested:
+      parameters:
+        dispatch:
+          seed:
+            - '__range__': [ 2000, 2200 ]
+
+
+###############################################################################################
+
+
+task:
+  kind: 'multi_task'
+  n_workers: 1
+  n_threads: 30
+  debug: False
+  store_run_statistics: False
+  output_dir: 'results/jsp/experiments/FINAL/BEST/0'
+
+  tasks:
+    - kind: 'multi_value'
+      parameters:
+        base:
+          name: 'model'
+          output_dir: '1'
+          log_stdout: False
+          seed: 123
+
+          machine_agent:
+            kind: 'mod'
+            parameters:
+              base_path: 'configuration/mods/machine_agent/model.yml'
+              mods: [ ]
+
+          work_center_agent:
+            kind: 'static'
+            parameters:
+              model:
+                kind: 'static'
+                parameters:
+                  rule: 'et'
+              encoder:
+                kind: 'plain'
+
+          tape:
+            machine_reward:
+              kind: 'surrogate_tardiness'
+
+            work_center_reward:
+              kind: 'no'
+
+          simulator:
+            kind: 'td'
+
+
+          run:
+            kind: 'mod'
+            parameters:
+              base_path: 'configuration/experiments/jsp/GRAPH-BEST/experiments/1/run.yml'
+              mod_dirs:
+                - 'configuration/mods/run/mods'
+              mods: []
+              nested:
+                parameters:
+                  simulations:
+                    - name: ''
+                      kind: 'multi_value'
+                      parameters:
+                        base:
+                          kind: 'mod'
+                          parameters:
+                            base_path: 'configuration/experiments/jsp/GRAPH-BEST/experiments/1/simulation.yml'
+                            mod_dirs:
+                              - 'configuration/mods/simulation/mods'
+                            mods: [ ]
+                        values:
+        values:
+          __concat__:
+
+#            - output_dir: 'Global'
+#              machine_agent:
+#                parameters:
+#                  - *dqn_1
+#                  - *dqn_2
+#                  - *dqn_3
+#                  - *dqn_4
+#              graph:
+#                transition_model:
+#                  kind: 'base'
+#                  parameters:
+#                    forward:
+#                      kind: 'complete'
+#
+#                    schedule:
+#                      kind: 'complete'
+#
+#                memory: 0
+#                is_machine_set_in_work_center_connected: False
+#                is_work_center_set_in_shop_floor_connected: False
+
+            - output_dir: 'Local'
+              machine_agent:
+                parameters:
+                  - *dqn_1
+#                  - *dqn_2
+#                  - *dqn_3
+#                  - *dqn_4
+              graph:
+                transition_model:
+                  kind: 'base'
+                  parameters:
+                    forward:
+                      kind: 'complete'
+
+                    schedule:
+                      kind: 'machine_compressed'
+
+                memory: 0
+                is_machine_set_in_work_center_connected: True
+                is_work_center_set_in_shop_floor_connected: True
+
+          tape:
+            machine_reward:
+              *reward
+
+          run:
+            parameters:
+              nested:
+                parameters:
+                  simulations:
+                    __0__:
+                      parameters:
+                        values:
+                          __concat__:
+                            - *long_single_source_run
diff --git a/diploma_thesis/configuration/experiments/jsp/GRAPH-BEST/experiments/1/flexible_machine.yml b/diploma_thesis/configuration/experiments/jsp/GRAPH-BEST/experiments/1/flexible_machine.yml
new file mode 100644
index 00000000..b51dc68d
--- /dev/null
+++ b/diploma_thesis/configuration/experiments/jsp/GRAPH-BEST/experiments/1/flexible_machine.yml
@@ -0,0 +1,79 @@
+
+kind: 'rl'
+parameters:
+
+  model:
+    kind: 'deep_rule'
+    parameters:
+      policy:
+        kind: 'flexible_action'
+        parameters:
+          policy_method: 'independent'
+
+          model:
+            __model__: ''
+
+          action_selector:
+            kind: 'phase_selector'
+            parameters:
+              default:
+                 kind: 'greedy'
+              phases:
+                - phase:
+                    kind: 'warm_up'
+                    parameters:
+                      step: 0
+                  action_selector:
+                    kind: 'uniform'
+                - phase:
+                    kind: 'warm_up'
+                    parameters:
+                      step: 1
+                  action_selector:
+                    kind: 'epsilon_greedy'
+                    parameters:
+                      epsilon: 0.4
+                - phase:
+                    kind: 'training'
+                  action_selector:
+                    kind: 'epsilon_greedy'
+                    parameters:
+                      epsilon: 0.4
+                      min_epsilon: 0.05
+                      decay_factor: 0.999
+                      decay_steps: 125
+
+  __encoder__: ''
+
+  trainer:
+    kind: 'dqn'
+    parameters:
+      decay: 1.0
+      update_steps: 50
+      epochs: 5
+
+      memory:
+        kind: 'replay'
+        parameters:
+          size: 16394
+          batch_size: 1024
+
+      loss:
+        kind: 'mse'
+        parameters:
+          reduction: 'none'
+
+      optimizer:
+        model:
+          kind: 'adam'
+          parameters:
+            lr: 0.001
+        scheduler:
+          kind: 'exponential'
+          parameters:
+            gamma: 0.999
+
+      return:
+        kind: 'no'
+        parameters:
+          discount_factor: 0.95
diff --git a/diploma_thesis/configuration/experiments/jsp/GRAPH-BEST/experiments/1/run.yml b/diploma_thesis/configuration/experiments/jsp/GRAPH-BEST/experiments/1/run.yml
new file mode 100644
index 00000000..683fca9c
--- /dev/null
+++ b/diploma_thesis/configuration/experiments/jsp/GRAPH-BEST/experiments/1/run.yml
@@ -0,0 +1,23 @@
+
+# JSP training based on the DEEP-MARL thesis
+
+kind: 'plain'
+parameters:
+  timeline:
+    warmup:
+      - 5000
+      - 5000
+
+    duration: 20000000
+
+  machine_train_schedule:
+    pretrain_steps: 10
+    train_interval: 25
+    max_training_steps: 100000000
+
+  work_center_train_schedule:
+    pretrain_steps: 0
+    train_interval: 100
+    max_training_steps: 0
+
+  n_workers: 10
diff --git a/diploma_thesis/configuration/experiments/jsp/GRAPH-BEST/experiments/1/simulation.yml b/diploma_thesis/configuration/experiments/jsp/GRAPH-BEST/experiments/1/simulation.yml
new file mode 100644
index 00000000..d36d7f86
--- /dev/null
+++ b/diploma_thesis/configuration/experiments/jsp/GRAPH-BEST/experiments/1/simulation.yml
@@ -0,0 +1,51 @@
+
+
+
+kind: 'simulation'
+parameters:
+  configuration:
+    timespan: 5000
+    machines_per_work_center: 1
+    work_center_count: 10
+    deduce_naive_actions: True
+
+  dispatch:
+    initial_job_assignment:
+      kind: 'n_per_machine'
+      parameters:
+        n: 3
+
+    job_sampler:
+      kind: 'dynamic'
+      parameters:
+        n_jobs: 200
+        processing_times:
+          kind: 'uniform'
+          parameters:
+            uniform: [ 1, 100 ]
+            noise: [ 0, 5 ]
+        permutation:
+          uneveness: 5
+        due_time:
+          kind: 'uniform'
+          parameters:
+            uniform: [ 0, 2 ]
+        job_arrival_time_on_machine:
+          kind: 'expected_utilization'
+          parameters:
+            value: 0.0
+
+
+    breakdown:
+      kind: 'no'
+      parameters:
+        breakdown_arrival:
+          kind: 'exponential'
+          parameters:
+            mean: 1000
+        repair_duration:
+          kind: 'uniform'
+          parameters:
+            uniform: [ 200, 300 ]
+
+    seed: 42
diff --git a/diploma_thesis/configuration/experiments/jsp/GRAPH-BEST/experiments/1/templates/custom_gat/encoder.yml b/diploma_thesis/configuration/experiments/jsp/GRAPH-BEST/experiments/1/templates/custom_gat/encoder.yml
new file mode 100644
index 00000000..b661bd9b
--- /dev/null
+++ b/diploma_thesis/configuration/experiments/jsp/GRAPH-BEST/experiments/1/templates/custom_gat/encoder.yml
@@ -0,0 +1,8 @@
+
+encoder:
+  kind: 'custom_v2'
+  parameters:
+    is_homogeneous: True
+    is_undirected: True
+    is_local: True
+    append_target_mask: True
diff --git a/diploma_thesis/configuration/experiments/jsp/GRAPH-BEST/experiments/1/templates/custom_gat/model.yml b/diploma_thesis/configuration/experiments/jsp/GRAPH-BEST/experiments/1/templates/custom_gat/model.yml
new file mode 100644
index 00000000..d9139a63
--- /dev/null
+++ b/diploma_thesis/configuration/experiments/jsp/GRAPH-BEST/experiments/1/templates/custom_gat/model.yml
@@ -0,0 +1,79 @@
+layers:
+
+  # Actor
+
+  - kind: 'graph_model'
+    parameters:
+      layers:
+        - kind: 'linear'
+          parameters:
+            dim: 64
+            activation: 'leaky_relu'
+            signature: 'x -> x'
+        - kind: 'gat'
+          parameters:
+            in_channels: -1
+            out_channels: 64
+            num_layers: 2
+            v2: True
+            heads: 8
+            hidden_channels: 64
+            jk: 'cat'
+        - kind: 'linear'
+          parameters:
+            dim: 64
+            activation: 'leaky_relu'
+            signature: 'x -> x'
+        - kind: 'mask'
+
+      signature: 'graph -> actions_hidden, actions_batch'
+
+  - kind: 'linear'
+    parameters:
+      dim: 64
+      activation: 'tanh'
+      signature: 'actions_hidden -> actions_hidden'
+
+  # Critic
+
+  - kind: 'graph_model'
+    parameters:
+      layers:
+        - kind: 'linear'
+          parameters:
+            dim: 64
+            activation: 'leaky_relu'
+            signature: 'x -> x'
+        - kind: 'gat'
+          parameters:
+            in_channels: -1
+            out_channels: 64
+            num_layers: 3
+            v2: True
+            heads: 4
+            hidden_channels: 64
+            jk: 'cat'
+        - kind: 'linear'
+          parameters:
+            dim: 64
+            activation: 'leaky_relu'
+            signature: 'x -> x'
+        - kind: 'mask'
+
+      signature: 'graph -> values_hidden, values_batch'
+
+  - kind: 'mean_pool'
+    parameters:
+      dim: 64
+      signature: 'values_hidden, values_batch -> values_hidden'
+
+  - kind: 'linear'
+    parameters:
+      dim: 64
+      activation: 'leaky_relu'
+      signature: 'values_hidden -> values_hidden'
+
+  - kind: 'output'
+    parameters:
+      value: 'values_hidden'
+      actions: 'actions_hidden'
diff --git a/diploma_thesis/configuration/experiments/jsp/GRAPH-BEST/experiments/1/templates/custom_gat/rules.yml b/diploma_thesis/configuration/experiments/jsp/GRAPH-BEST/experiments/1/templates/custom_gat/rules.yml
new file mode 100644
index 00000000..f26787ad
--- /dev/null
+++ b/diploma_thesis/configuration/experiments/jsp/GRAPH-BEST/experiments/1/templates/custom_gat/rules.yml
@@ -0,0 +1,9 @@
+
+rules:
+  - 'spt'
+  - 'cr'
+  - 'ms'
+  - 'winq'
+  - 'edd'
+  - 'lwkr'
+  - 'atc'
\ No newline at end of file
diff --git a/diploma_thesis/configuration/experiments/jsp/tournament.yml b/diploma_thesis/configuration/experiments/jsp/tournament.yml
index 9b37861c..41e6f0d7 100644
--- a/diploma_thesis/configuration/experiments/jsp/tournament.yml
+++ b/diploma_thesis/configuration/experiments/jsp/tournament.yml
@@ -70,7 +70,7 @@ task:
         weight: 1
         direction: 'minimize'
         scale: 'linear'
-        at: 5000
+        at: 8000
         limit: 100
 
   reward:
@@ -190,35 +190,37 @@ task:
                 job_sampler:
                   kind: 'dynamic'
                   parameters:
+                    n_jobs: 200
                     processing_times:
                       kind: 'uniform'
                       parameters:
-                        uniform: [ 1, 50 ]
-                        noise: [ 0, 10 ]
+                        uniform: [ 1, 100 ]
+                        noise: [ 0, 5 ]
                     permutation:
                       uneveness: 5
                     due_time:
                       kind: 'uniform'
                       parameters:
-                        uniform: [ 0.0, 2 ]
+                        uniform: [ 0, 2 ]
                     job_arrival_time_on_machine:
                       kind: 'expected_utilization'
                       parameters:
                         value: 0.0
 
+
                 breakdown:
-                  kind: 'dynamic'
+                  kind: 'no'
                   parameters:
                     breakdown_arrival:
                       kind: 'exponential'
                       parameters:
-                        mean: 5000
+                        mean: 1000
                     repair_duration:
                       kind: 'uniform'
                       parameters:
                         uniform: [ 200, 300 ]
 
-                seed: 32
+                seed: 42
 
           values:
 
@@ -235,18 +237,18 @@ task:
                 job_sampler:
                   parameters:
                     __concat__:
-#                      - job_arrival_time_on_machine:
-#                          parameters:
-#                            value:
-#                              - 0.7
-#                        n_jobs: 150
+                      - job_arrival_time_on_machine:
+                          parameters:
+                            value:
+                              - 0.7
+                        n_jobs: 200
                       - job_arrival_time_on_machine:
                           parameters:
                             value:
                               - 0.8
-                        n_jobs: 150
-#                      - job_arrival_time_on_machine:
-#                          parameters:
-#                            value:
-#                              - 0.9
-#                        n_jobs: 150
+                        n_jobs: 200
+                      - job_arrival_time_on_machine:
+                          parameters:
+                            value:
+                              - 0.9
+                        n_jobs: 200
diff --git a/diploma_thesis/configuration/mods/machine/mods/agent/dqn/steps/3.yml b/diploma_thesis/configuration/mods/machine/mods/agent/dqn/steps/3.yml
index bef1063f..00ff86bb 100644
--- a/diploma_thesis/configuration/mods/machine/mods/agent/dqn/steps/3.yml
+++ b/diploma_thesis/configuration/mods/machine/mods/agent/dqn/steps/3.yml
@@ -8,4 +8,4 @@ parameters:
           discount: 0.9
           lambda_factor: 0.9
           n: 3
-          off_policy: true
+          off_policy: False
diff --git a/diploma_thesis/configuration/mods/machine/mods/util/optimizer/adam_stationary.yml b/diploma_thesis/configuration/mods/machine/mods/util/optimizer/adam_stationary.yml
index 9f3bda3e..58adeeff 100644
--- a/diploma_thesis/configuration/mods/machine/mods/util/optimizer/adam_stationary.yml
+++ b/diploma_thesis/configuration/mods/machine/mods/util/optimizer/adam_stationary.yml
@@ -6,6 +6,6 @@ parameters:
         model:
           kind: 'adam'
           parameters:
-            lr: 0.001
+            lr: 0.0005
             betas: [0.99, 0.99]
             weight_decay: 0.000001
\ No newline at end of file
diff --git a/diploma_thesis/environment/job.py b/diploma_thesis/environment/job.py
index 677700d1..b366e86a 100644
--- a/diploma_thesis/environment/job.py
+++ b/diploma_thesis/environment/job.py
@@ -452,7 +452,7 @@ def with_due_at(self, due_at: torch.FloatTensor):
     def __processing_time_on_work_center__(cls,
                                            steps: torch.LongTensor,
                                            processing_times: torch.LongTensor,
-                                           step_idx: int, 
+                                           step_idx: int,
                                            strategy: ReductionStrategy = ReductionStrategy.mean):
         """
         Returns: The processing time of the operation in work center
@@ -468,7 +468,7 @@ def __processing_time_on_work_center__(cls,
     @lru_cache
     def __processing_time_on_machine__(cls,
                                        steps: torch.LongTensor,
-                                       processing_times: torch.LongTensor, 
+                                       processing_times: torch.LongTensor,
                                        step_idx: int,
                                        machine_idx: int):
         """
@@ -485,7 +485,7 @@ def __processing_time_on_machine__(cls,
     def __next_processing_time__(cls,
                                  steps: torch.LongTensor,
                                  processing_times: torch.LongTensor,
-                                 step_idx: int, 
+                                 step_idx: int,
                                  strategy: ReductionStrategy = ReductionStrategy.mean):
         """
         Returns: The processing time of the next operation
@@ -516,12 +516,12 @@ def __remaining_processing_time__(cls,
         result += cls.__next_remaining_processing_time__(processing_times, step_idx, strategy)
 
         return result
-    
+
     @classmethod
     @lru_cache
     def __next_remaining_processing_time__(cls,
-                                           processing_times: torch.LongTensor, 
-                                           step_idx: int, 
+                                           processing_times: torch.LongTensor,
+                                           step_idx: int,
                                            strategy: ReductionStrategy = ReductionStrategy.mean):
         """
         Returns: The remaining processing time of the next operation
diff --git a/diploma_thesis/simulator/graph/transition/transition.py b/diploma_thesis/simulator/graph/transition/transition.py
index 3b19c14a..db6f89cf 100644
--- a/diploma_thesis/simulator/graph/transition/transition.py
+++ b/diploma_thesis/simulator/graph/transition/transition.py
@@ -95,7 +95,7 @@ def __remove_job__(cls, job: Job, graph: Graph):
         if job_id in graph.data[Graph.JOB_KEY].keys():
             graph.data[Graph.JOB_KEY].pop(job_id)
             return
-        
+
         raise ValueError(f'Job with id {job.id} not found in graph')
 
     # Utils
diff --git a/diploma_thesis/simulator/td.py b/diploma_thesis/simulator/td.py
index fc6abd3b..8fa8384a 100644
--- a/diploma_thesis/simulator/td.py
+++ b/diploma_thesis/simulator/td.py
@@ -91,7 +91,7 @@ def __forward_td__(self, context: Context, queue: TDQueue, agent, key):
 
     @staticmethod
     def from_cli(parameters, *args, **kwargs) -> Simulator:
-        return TDSimulator(parameters.get('memory', 1), 
+        return TDSimulator(parameters.get('memory', 1),
                            parameters.get('emit_trajectory', False),
                            parameters.get('reset_trajectory', True),
                            parameters.get('sliding_window', 1),
diff --git a/notebooks/plot_utils/legend.py b/notebooks/plot_utils/legend.py
index 467e426b..3c3ca112 100644
--- a/notebooks/plot_utils/legend.py
+++ b/notebooks/plot_utils/legend.py
@@ -4,4 +4,5 @@ def add_legend(ax, info):
     ax.legend(ncols=info.get('ncols', 2),
               bbox_to_anchor=info.get('bbox_to_anchor', (-0.08, 1)),
               loc='best',
+              frameon=True,
               fancybox=True)
diff --git a/notebooks/plot_utils/plot_decisions_per_action.py b/notebooks/plot_utils/plot_decisions_per_action.py
index b1993a03..4450a1d8 100644
--- a/notebooks/plot_utils/plot_decisions_per_action.py
+++ b/notebooks/plot_utils/plot_decisions_per_action.py
@@ -4,29 +4,30 @@
 import matplotlib.pyplot as plt
 
 
-def plot_decisions_per_action(data: pd.DataFrame, name: str, figsize=(8, 8)):
+def plot_decisions_per_action(data: pd.DataFrame, name: str, figsize=(8, 8), ax=None):
     data = data[data['reward'] != 0]
 
-    fig, ax = plt.subplots(figsize=figsize)
+    if ax is None:
+        fig, ax = plt.subplots(figsize=figsize)
 
     actions = data["action"].unique()
     actions = np.sort(actions)
 
+    ax.grid(True, zorder=0)
+
     # Create violins for each action
     for i, action in enumerate(actions):
         action_data = data[data["action"] == action]
 
-        ax.bar(x=i, height=len(action_data))
+        ax.bar(x=i, height=len(action_data), zorder=3)
 
     # Set labels and title
-    ax.set_xlabel("Action")
+    # ax.set_xlabel("Action")
     ax.set_ylabel("Count")
-    ax.set_title(f"Selected action count ({name})")
+    ax.set_title(f"Histogram of actions ({name})")
 
     ax.set_xticks(np.arange(len(actions)), actions, rotation=45)
 
     # Add grid and adjust layout
-    ax.grid(True)
     plt.tight_layout()
 
-    return fig
diff --git a/notebooks/plot_utils/plot_normalized_performance.py b/notebooks/plot_utils/plot_normalized_performance.py
index 9979c8ec..9dafcdca 100644
--- a/notebooks/plot_utils/plot_normalized_performance.py
+++ b/notebooks/plot_utils/plot_normalized_performance.py
@@ -2,30 +2,41 @@
 import numpy as np
 
 
-def plot_normalized_performance(df, info):
+def plot_normalized_performance(df, info, ax=None):
     index_column = info['index']
     metric = info['metric']
     candidate_column = info['candidate_column']
     baseline = info['baseline']
 
-    fig, ax = plt.subplots(figsize=info.get('figsize', (12, 6)))
+    if ax is None:
+        fig, ax = plt.subplots(figsize=info.get('figsize', (12, 6)))
 
     candidates = info.get('candidates', np.sort(df[candidate_column].unique()))
     candidates = [candidate for candidate in candidates if candidate != baseline]
 
-    print(candidates)
-
     baseline_info = df[df[candidate_column] == baseline].set_index(index_column)
 
-    for index, candidate in enumerate(candidates):
+    def performance(candidate):
         candidate_info = df[df[candidate_column] == candidate].set_index(index_column)
 
         delta = (baseline_info[metric] - candidate_info[metric]) / (baseline_info[metric] + 1e-10)
         delta = delta[~np.isnan(delta)]
 
+        return delta
+
+    candidates = [(candidate, performance(candidate)) for candidate in candidates if candidate != baseline]
+
+    if info.get('sort', True):
+        candidates = sorted(candidates, key=lambda x: np.mean(x[1]), reverse=True)
+
+        if top_k := info.get('top_k', None):
+            candidates = candidates[:top_k]
+
+    for index, candidate in enumerate(candidates):
+        delta = candidate[1]
         delta *= 100
 
-        ax.boxplot(delta, positions=[index], notch=True, vert=True)
+        ax.boxplot(delta, positions=[index], notch=True, vert=True, showmeans=True, widths=info.get('box_width', 0.4))
 
     ax.yaxis.grid(True)
 
@@ -33,8 +44,16 @@ def plot_normalized_performance(df, info):
     ax.set_ylabel(info['ylabel'])
     ax.set_title(info['title'])
 
-    ax.set_xticks(np.arange(len(candidates)), candidates, rotation=45)
+    ax.set_ylim(bottom=info.get('bottom'), top=info.get('top'))
+
+    start, end = ax.get_ylim()
+
+    ax.yaxis.set_ticks(np.arange(start, end, info.get('y_step', 5)))
+    ax.set_xticks(np.arange(len(candidates)), [candidate[0] for candidate in candidates], rotation=90)
 
     plt.tight_layout()
 
-    return fig
+    if ax is None:
+        return fig, candidates
+    else:
+        return candidates
diff --git a/notebooks/plot_utils/plot_performance_accross_runs.py b/notebooks/plot_utils/plot_performance_accross_runs.py
index 03a8675d..f878e8c9 100644
--- a/notebooks/plot_utils/plot_performance_accross_runs.py
+++ b/notebooks/plot_utils/plot_performance_accross_runs.py
@@ -48,7 +48,7 @@ def plot_performance_across_runs(data, info):
                     label=title)
 
 
-    ax.xaxis.grid(True)
+    ax.xaxis.grid(True, zorder=0)
 
     ax.set_xlabel(info['xlabel'])
     ax.set_ylabel(info['ylabel'])
diff --git a/notebooks/plot_utils/plot_reward_distribution_per_action.py b/notebooks/plot_utils/plot_reward_distribution_per_action.py
index 962498a7..33673ae7 100644
--- a/notebooks/plot_utils/plot_reward_distribution_per_action.py
+++ b/notebooks/plot_utils/plot_reward_distribution_per_action.py
@@ -14,7 +14,7 @@ def plot_reward_distribution_per_action(data: pd.DataFrame, name: str, figsize=(
     # Create violins for each action
     for i, action in enumerate(actions):
         action_data = data[data["action"] == action]["reward"]
-        violin_parts = ax.boxplot(
+        violin_parts = ax.violinplot(
             action_data,
             positions=[i],
             showmeans=True,
@@ -32,7 +32,7 @@ def plot_reward_distribution_per_action(data: pd.DataFrame, name: str, figsize=(
     ax.set_xticks(np.arange(len(actions)), actions, rotation=45)
 
     # Add grid and adjust layout
-    ax.grid(True)
+    ax.grid(True, zorder=0)
     plt.tight_layout()
 
     return fig
diff --git a/notebooks/plot_utils/plot_reward_model_across_runs.py b/notebooks/plot_utils/plot_reward_model_across_runs.py
index 680e6b6a..100749e2 100644
--- a/notebooks/plot_utils/plot_reward_model_across_runs.py
+++ b/notebooks/plot_utils/plot_reward_model_across_runs.py
@@ -31,7 +31,7 @@ def plot_reward_per_model_across_runs(data, info):
 
             ax.plot(filtered[metric], filtered[reward], marker=info['marker'], ms=10, ls='', label=title)
 
-    ax.grid(True)
+    ax.grid(True, zorder=0)
 
     ax.set_xlabel(info['xlabel'])
     ax.set_ylabel(info['ylabel'])
diff --git a/notebooks/plot_utils/plot_value.py b/notebooks/plot_utils/plot_value.py
index 20b3e469..98ec349a 100644
--- a/notebooks/plot_utils/plot_value.py
+++ b/notebooks/plot_utils/plot_value.py
@@ -6,50 +6,64 @@
 from .legend import add_legend
 
 
-def plot_value(path: str | dict, info: dict, figsize=(8, 8), post_process_fn=lambda a: a):
+def plot_value(path: str | dict, info: dict, figsize=(8, 8), ax = None, post_process_fn=lambda a: a, background_process_fn=None):
     if not isinstance(path, dict):
         path = dict(first=path)
 
-    fig, ax = plt.subplots(figsize=figsize)
+    if ax is None:
+        fig, ax = plt.subplots(figsize=figsize)
 
-    for name, data_path in path.items():
-        df = pd.read_csv(data_path)
+    min_value_len = float('inf')
 
-        if info.get('norm_index', False):
-            df[info['index']] -= df[info['index']].min()
+    for name, data_path in path.items():
+        if not isinstance(data_path, list):
+            data_path = [data_path]
 
-        suffix = '' if len(path) == 1 else f'{name}'
+        result = []
 
-        if 'work_center_id' in df.columns and info.get('is_reward_per_unit_visible', False):
-            work_centers = np.sort(df['work_center_id'].unique())
-            machines = np.sort(df['machine_id'].unique())
+        for p in data_path:
+            df = pd.read_csv(p)
 
-            for work_center_id in work_centers:
-                for machine_id in machines:
-                    filtered = df[(df['work_center_id'] == work_center_id) & (df['machine_id'] == machine_id)]
-                    filtered = filtered.sort_values(by=info['index'])
-                    filtered.set_index(info['index'], inplace=True)
+            if info.get('norm_index', False):
+                df[info['index']] -= df[info['index']].min()
 
-                    if len(machines) == 1:
-                        label = f'M_idx: {work_center_id}'
-                    else:
-                        label = f'W_idx: {work_center_id}, M_idx: {machine_id}'
+            if f := info.get('filter'):
+                df = f(df)
 
-                    if len(suffix) > 0:
-                        label += ' ' + suffix
+            suffix = '' if len(path) == 1 else f'{name}'
 
-                    ax.plot(post_process_fn(filtered[info['column']]), label=label)
-        else:
             df = df.sort_values(by=info['index'])
             df.set_index(info['index'], inplace=True)
 
-            ax.plot(post_process_fn(df[info['column']]), label=name)
+            result += [post_process_fn(df[info['column']])]
+
+            min_value_len = min(min_value_len, len(result[-1]))
+
+
+        if len(result) == 1:
+            ax.plot(result[0], label=name)
+        else:
+            result = [v[:min_value_len] for v in result]
+            result = np.vstack(result)
+
+            min_value = result.min(axis=0)
+            mean_value = result.mean(axis=0)
+            max_value = result.max(axis=0)
+
+            ax.plot(np.arange(len(mean_value)), mean_value, marker=info['marker'], label=name)
+
+            ax.fill_between(np.arange(len(mean_value)), min_value, max_value, alpha=0.25)
+
+    ax.grid(True, zorder=0)
+
+    if 'title' in info:
+        ax.set_title(info['title'])
 
-    ax.grid(True)
-    ax.set_title(info['title'])
     ax.set_xlabel(info['xlabel'])
     ax.set_ylabel(info['ylabel'])
 
-    add_legend(ax, info)
+    # if len(path) > 1:
+    #     add_legend(ax, info)
 
-    return fig
+    if ax is None:
+        return fig
diff --git a/notebooks/plot_utils/plot_value_per_run.py b/notebooks/plot_utils/plot_value_per_run.py
index 42b30d27..1cc45902 100644
--- a/notebooks/plot_utils/plot_value_per_run.py
+++ b/notebooks/plot_utils/plot_value_per_run.py
@@ -1,48 +1,86 @@
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
+import scipy
 
 from .legend import add_legend
 
+def mean_confidence_interval(data, confidence=0.95):
+    a = 1.0 * np.array(data)
+    n = len(a)
+    m, se = np.mean(a), scipy.stats.sem(a)
+    h = se * scipy.stats.t.ppf((1 + confidence) / 2., n-1)
+    return m, m-h, m+h
 
-def plot_value_per_run(path: str | dict, info: dict, make_run_path, post_process_fn=lambda a: a):
+
+def plot_value_per_run(path: str | dict, info: dict, make_run_path, post_process_fn=lambda a: a, ax = None):
     if not isinstance(path, dict):
         path = dict(first=path)
 
-    fig, ax = plt.subplots(figsize=info.get('figsize', (8, 8)))
+    fig = None
 
-    max_values_len = 0
+    if ax is None:
+        fig, ax = plt.subplots(figsize=info.get('figsize', (8, 8)))
 
     for name, data_path in path.items():
-        run = 1
-
-        values = []
+        if not isinstance(data_path, list):
+            data_path = [data_path]
 
-        while True:
-            run_path = make_run_path(data_path, run)
+        result = []
 
-            run += 1
+        min_value_len = float('inf')
 
+        for p in data_path:
             try:
-                df = pd.read_csv(run_path)
+                run = 1
+
+                values = []
+
+                while True:
+                    run_path = make_run_path(p, run)
+
+                    run += 1
+
+                    try:
+                        df = pd.read_csv(run_path)
+                        df = df.sort_values(by=info['index'])
+                        df.set_index(info['index'], inplace=True)
 
-                df = df.sort_values(by=info['index'])
-                df.set_index(info['index'], inplace=True)
+                        values += [post_process_fn(df[info['column']], run)]
+                    except:
+                        break
 
-                values += [post_process_fn(df[info['column']], run)]
+                values = np.array(values)
+
+                if value := info.get('smoothing_value'):
+                    values = np.convolve(values, np.ones(value), 'valid') / value
+
+                result += [values]
+
+                min_value_len = min(min_value_len, len(values))
             except:
-                break
+                pass
+
+        if len(result) == 1:
+            ax.plot(np.arange(len(result[0])), result[0], marker=info['marker'], label=name)
+        else:
+            result = [v[:min_value_len] for v in result]
+            result = np.vstack(result)
+
+            min_value = result.min(axis=0)
+            mean_value = result.mean(axis=0)
+            max_value = result.max(axis=0)
 
-        ax.plot(np.arange(len(values)), np.array(values), marker=info['marker'], label=name)
+            ax.plot(np.arange(len(mean_value)), mean_value, marker=info['marker'], label=name)
 
-        max_values_len = max(max_values_len, len(values))
+            ax.fill_between(np.arange(len(mean_value)), min_value, max_value, alpha=0.25)
 
-    ax.grid(True)
+    ax.grid(True, zorder=0)
     ax.set_title(info['title'])
     ax.set_xlabel(info['xlabel'])
     ax.set_ylabel(info['ylabel'])
-    ax.set_xticks(np.arange(max_values_len))
+    # ax.set_xticks(np.arange(max_values_len))
 
-    add_legend(ax, info)
+    # add_legend(ax, info)
 
     return fig