Change training schedule

yura-hb · Apr 24, 2024 · 8d4a2d4 · 8d4a2d4
1 parent 2c11db2
commit 8d4a2d4
Show file tree

Hide file tree

Showing 35 changed files with 164 additions and 126 deletions.
diff --git a/diploma_thesis/agents/utils/action/sample.py b/diploma_thesis/agents/utils/action/sample.py
@@ -1,3 +1,5 @@
+import dis
+
 import torch.distributions
 
 from .action_selector import *
@@ -19,6 +21,8 @@ def __call__(self, distribution: torch.Tensor) -> Tuple[int, torch.Tensor]:
 
         action = distribution.sample().item()
 
+        print("action: ", action, "entropy: ", distribution.entropy().item(), distribution.probs)
+
         return action, distribution.probs
 
     @staticmethod

diff --git a/diploma_thesis/agents/utils/policy/flexible_action.py b/diploma_thesis/agents/utils/policy/flexible_action.py
@@ -38,7 +38,7 @@ def encode(self, state):
                 prev_count += target_nodes_count
 
             actions = torch.nn.utils.rnn.pad_sequence(result, batch_first=True, padding_value=-float('inf'))
-            lengths = torch.tensor(lengths)
+            lengths = torch.tensor(lengths).to(actions.device)
 
             output[Keys.ACTIONS] = (actions, lengths)
 

diff --git a/diploma_thesis/agents/utils/rl/utils/ppo_mixin.py b/diploma_thesis/agents/utils/rl/utils/ppo_mixin.py
@@ -25,7 +25,7 @@ def base_parameters_from_cli(parameters: Dict):
             sample_count=parameters.get('sample_count', 128),
             policy_step_ratio=parameters.get('policy_step_ratio', 1.0),
             entropy_regularization=parameters.get('entropy_regularization', 0.0),
-            rollback_ratio=parameters.get('rollback_ratio', 0.1),
+            rollback_ratio=parameters.get('rollback_ratio', 0.0),
             critic_weight=parameters.get('critic_weight', 1.0),
             epochs=parameters.get('epochs', 1),
             priority_reduction_ratio=parameters.get('priority_reduction_ratio', 1.05)
@@ -124,7 +124,7 @@ def actor_loss(batch, logits, configuration: PPOConfiguration, device):
         advantages = batch.info[Record.ADVANTAGE_KEY]
 
         # Normalize advantages
-        advantages = (advantages - advantages.mean()) / (advantages.std() + 1e-8)
+        # advantages = (advantages - advantages.mean()) / (advantages.std() + 1e-8)
 
         action_probs = batch.info[Record.POLICY_KEY][range, batch.action.view(-1)]
 
@@ -139,6 +139,8 @@ def actor_loss(batch, logits, configuration: PPOConfiguration, device):
 
         advantages = torch.min(weights * advantages, clipped_weights * advantages)
 
+        print(advantages)
+
         entropy = distribution.entropy().mean()
 
         return torch.mean(advantages) + entropy_regularization * entropy, entropy
diff --git a/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/1 (DQN)/experiment.yml b/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/1 (DQN)/experiment.yml
@@ -20,6 +20,7 @@ graph: &graph
 
 default_mods: &default_mods
   - 'util/infrastructure/cuda.yml'
+  - 'util/train_schedule/on_store_64.yml'
 
 ###############################################################################################
 
@@ -135,7 +136,7 @@ long_single_source_run: &long_single_source_run
     nested:
       parameters:
         dispatch:
-          seed: [ [ 0 ] ]
+          seed: [ [ 0, 1, 2, 3 ] ]
 
 
 ###############################################################################################
@@ -194,8 +195,7 @@ task:
               base_path: 'configuration/experiments/jsp/GRAPH-NN/run.yml'
               mod_dirs:
                 - 'configuration/mods/run/mods'
-              mods:
-                - 'n_workers/1.yml'
+              mods: []
               nested:
                 parameters:
                   simulations:

diff --git a/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/1 (PPO)/experiment.yml b/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/1 (PPO)/experiment.yml
@@ -37,6 +37,7 @@ ppo_1: &ppo_1
 #        'util/agent/multi_agent.yml'
       ]
       - [ 'util/infrastructure/cuda.yml' ]
+      - [ 'util/train_schedule/on_store.yml' ]
 
 ppo_2: &ppo_2
   base_path: *base_model
@@ -55,6 +56,7 @@ ppo_2: &ppo_2
 #        'util/agent/multi_agent.yml'
       ]
       - [ 'util/infrastructure/cuda.yml' ]
+      - [ 'util/train_schedule/on_store.yml' ]
 
 ppo_3: &ppo_3
   base_path: *base_model
@@ -73,6 +75,7 @@ ppo_3: &ppo_3
 #        'util/agent/multi_agent.yml'
       ]
       - [ 'util/infrastructure/cuda.yml' ]
+      - [ 'util/train_schedule/on_store.yml' ]
 
 ppo_4: &ppo_4
   base_path: *base_model
@@ -91,6 +94,7 @@ ppo_4: &ppo_4
 #        'util/agent/multi_agent.yml'
       ]
       - [ 'util/infrastructure/cuda.yml' ]
+      - [ 'util/train_schedule/on_store.yml' ]
 ###############################################################################################
 
 reward: &reward
@@ -112,15 +116,15 @@ long_single_source_run: &long_single_source_run
     nested:
       parameters:
         dispatch:
-          seed: [ [ 0 ] ]
+          seed: [ [ 0, 1, 2, 3 ] ]
 
 
 ###############################################################################################
 
 
 task:
   kind: 'multi_task'
-  n_workers: 4
+  n_workers: 8
   n_threads: 32
   debug: False
   store_run_statistics: False
@@ -160,7 +164,7 @@ task:
           simulator:
             kind: 'td'
             parameters:
-              memory: 96
+              memory: 64
               emit_trajectory: True
 
           graph:
@@ -172,8 +176,7 @@ task:
               base_path: 'configuration/experiments/jsp/GRAPH-NN/run_ppo.yml'
               mod_dirs:
                 - 'configuration/mods/run/mods'
-              mods:
-                - 'n_workers/1.yml'
+              mods: []
               nested:
                 parameters:
                   simulations:

diff --git a/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/2 (DQN)/experiment.yml b/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/2 (DQN)/experiment.yml
@@ -19,7 +19,8 @@ graph: &graph
     is_work_center_set_in_shop_floor_connected: False
 
 default_mods: &default_mods
-  ['util/infrastructure/cuda.yml']
+  - 'util/infrastructure/cuda.yml'
+  - 'util/train_schedule/on_store_64.yml'
 
 ###############################################################################################
 
@@ -231,7 +232,7 @@ long_single_source_run: &long_single_source_run
     nested:
       parameters:
         dispatch:
-          seed: [ [ 0 ] ]
+          seed: [ [ 0, 1, 2, 3 ] ]
 
 
 ###############################################################################################
@@ -290,8 +291,7 @@ task:
               base_path: 'configuration/experiments/jsp/GRAPH-NN/run.yml'
               mod_dirs:
                 - 'configuration/mods/run/mods'
-              mods:
-                - 'n_workers/1.yml'
+              mods: []
               nested:
                 parameters:
                   simulations:

diff --git a/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/2 (PPO)/experiment.yml b/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/2 (PPO)/experiment.yml
@@ -30,12 +30,13 @@ ppo_1: &ppo_1
       - [ 'util/optimizer/grad_norm.yml' ]
       - [
           '__none__',
-          'agent/ppo/p3or.yml'
+#          'agent/ppo/p3or.yml'
       ]
       - [
         '__none__',
 #        'util/agent/multi_agent.yml'
       ]
+      - ['util/train_schedule/on_store.yml']
       - ['util/infrastructure/cuda.yml']
 
 ppo_2: &ppo_2
@@ -55,6 +56,7 @@ ppo_2: &ppo_2
 #        'util/agent/multi_agent.yml'
       ]
       - ['util/infrastructure/cuda.yml']
+      - ['util/train_schedule/on_store.yml']
 
 ppo_3: &ppo_3
   base_path: *base_model
@@ -73,6 +75,7 @@ ppo_3: &ppo_3
 #        'util/agent/multi_agent.yml'
       ]
       - ['util/infrastructure/cuda.yml']
+      - ['util/train_schedule/on_store.yml']
 
 ppo_4: &ppo_4
   base_path: *base_model
@@ -91,6 +94,7 @@ ppo_4: &ppo_4
 #        'util/agent/multi_agent.yml'
       ]
       - ['util/infrastructure/cuda.yml']
+      - [ 'util/train_schedule/on_store.yml']
 ###############################################################################################
 
 reward: &reward
@@ -112,7 +116,7 @@ long_single_source_run: &long_single_source_run
     nested:
       parameters:
         dispatch:
-          seed: [ [ 0 ] ]
+          seed: [ [ 0, 1, 2, 3 ] ]
 
 
 ###############################################################################################
@@ -132,6 +136,7 @@ task:
         base:
           name: 'model'
           output_dir: '1'
+          seed: 0
           log_stdout: False
 
           machine_agent:
@@ -172,8 +177,7 @@ task:
               base_path: 'configuration/experiments/jsp/GRAPH-NN/run_ppo.yml'
               mod_dirs:
                 - 'configuration/mods/run/mods'
-              mods:
-                - 'n_workers/1.yml'
+              mods: []
               nested:
                 parameters:
                   simulations:

diff --git a/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/3 (DQN)/experiment.yml b/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/3 (DQN)/experiment.yml
@@ -20,6 +20,7 @@ graph: &graph
 
 default_mods: &default_mods 
   - 'util/infrastructure/cuda.yml'
+  - 'util/train_schedule/on_store_64.yml'
 
 ###############################################################################################
 
@@ -323,7 +324,7 @@ long_single_source_run: &long_single_source_run
     nested:
       parameters:
         dispatch:
-          seed: [ [ 0 ] ]
+          seed: [ [ 0, 1, 2, 3 ] ]
 
 
 ###############################################################################################
@@ -382,8 +383,7 @@ task:
               base_path: 'configuration/experiments/jsp/GRAPH-NN/run.yml'
               mod_dirs:
                 - 'configuration/mods/run/mods'
-              mods:
-                - 'n_workers/1.yml'
+              mods: []
               nested:
                 parameters:
                   simulations:

diff --git a/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/3 (PPO)/experiment.yml b/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/3 (PPO)/experiment.yml
@@ -37,6 +37,7 @@ ppo_1: &ppo_1
 #        'util/agent/multi_agent.yml'
       ]
       - ['util/infrastructure/cuda.yml']
+      - [ 'util/train_schedule/on_store.yml']
 
 ppo_2: &ppo_2
   base_path: *base_model
@@ -55,6 +56,7 @@ ppo_2: &ppo_2
 #        'util/agent/multi_agent.yml'
       ]
       - ['util/infrastructure/cuda.yml']
+      - [ 'util/train_schedule/on_store.yml']
 
 ppo_3: &ppo_3
   base_path: *base_model
@@ -73,6 +75,7 @@ ppo_3: &ppo_3
 #        'util/agent/multi_agent.yml'
       ]
       - ['util/infrastructure/cuda.yml']
+      - [ 'util/train_schedule/on_store.yml']
 
 ppo_4: &ppo_4
   base_path: *base_model
@@ -91,6 +94,7 @@ ppo_4: &ppo_4
 #        'util/agent/multi_agent.yml'
       ]
       - ['util/infrastructure/cuda.yml']
+      - [ 'util/train_schedule/on_store.yml']
 
 ppo_5: &ppo_5
   base_path: *base_model
@@ -109,6 +113,7 @@ ppo_5: &ppo_5
 #        'util/agent/multi_agent.yml'
       ]
       - ['util/infrastructure/cuda.yml']
+      - [ 'util/train_schedule/on_store.yml']
 
 ppo_6: &ppo_6
   base_path: *base_model
@@ -127,6 +132,7 @@ ppo_6: &ppo_6
 #        'util/agent/multi_agent.yml'
       ]
       - ['util/infrastructure/cuda.yml']
+      - [ 'util/train_schedule/on_store.yml']
 ###############################################################################################
 
 reward: &reward
@@ -148,7 +154,7 @@ long_single_source_run: &long_single_source_run
     nested:
       parameters:
         dispatch:
-          seed: [ [ 0 ] ]
+          seed: [ [ 0, 1, 2, 3 ] ]
 
 
 ###############################################################################################
@@ -196,7 +202,7 @@ task:
           simulator:
             kind: 'td'
             parameters:
-              memory: 96
+              memory: 64
               emit_trajectory: True
 
           graph:
@@ -208,8 +214,7 @@ task:
               base_path: 'configuration/experiments/jsp/GRAPH-NN/run_ppo.yml'
               mod_dirs:
                 - 'configuration/mods/run/mods'
-              mods:
-                - 'n_workers/1.yml'
+              mods: []
               nested:
                 parameters:
                   simulations:

diff --git a/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/4 (DQN)/experiment.yml b/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/4 (DQN)/experiment.yml
@@ -20,6 +20,7 @@ graph: &graph
 
 default_mods: &default_mods
   - 'util/infrastructure/cuda.yml'
+  - 'util/train_schedule/on_store_64.yml'
 ###############################################################################################
 
 dqn_1: &dqn_1
@@ -182,7 +183,7 @@ long_single_source_run: &long_single_source_run
     nested:
       parameters:
         dispatch:
-          seed: [ [ 0 ] ]
+          seed: [ [ 0, 1, 2, 3 ] ]
 
 
 ###############################################################################################
@@ -241,8 +242,7 @@ task:
               base_path: 'configuration/experiments/jsp/GRAPH-NN/run.yml'
               mod_dirs:
                 - 'configuration/mods/run/mods'
-              mods:
-                - 'n_workers/1.yml'
+              mods: []
               nested:
                 parameters:
                   simulations: