diff --git a/diploma_thesis/agents/utils/action/sample.py b/diploma_thesis/agents/utils/action/sample.py index a60c2dc..6cdf028 100644 --- a/diploma_thesis/agents/utils/action/sample.py +++ b/diploma_thesis/agents/utils/action/sample.py @@ -1,3 +1,5 @@ +import dis + import torch.distributions from .action_selector import * @@ -19,6 +21,8 @@ def __call__(self, distribution: torch.Tensor) -> Tuple[int, torch.Tensor]: action = distribution.sample().item() + print("action: ", action, "entropy: ", distribution.entropy().item(), distribution.probs) + return action, distribution.probs @staticmethod diff --git a/diploma_thesis/agents/utils/policy/flexible_action.py b/diploma_thesis/agents/utils/policy/flexible_action.py index e25c619..e9b642b 100644 --- a/diploma_thesis/agents/utils/policy/flexible_action.py +++ b/diploma_thesis/agents/utils/policy/flexible_action.py @@ -38,7 +38,7 @@ def encode(self, state): prev_count += target_nodes_count actions = torch.nn.utils.rnn.pad_sequence(result, batch_first=True, padding_value=-float('inf')) - lengths = torch.tensor(lengths) + lengths = torch.tensor(lengths).to(actions.device) output[Keys.ACTIONS] = (actions, lengths) diff --git a/diploma_thesis/agents/utils/rl/utils/ppo_mixin.py b/diploma_thesis/agents/utils/rl/utils/ppo_mixin.py index 746b551..1412c35 100644 --- a/diploma_thesis/agents/utils/rl/utils/ppo_mixin.py +++ b/diploma_thesis/agents/utils/rl/utils/ppo_mixin.py @@ -25,7 +25,7 @@ def base_parameters_from_cli(parameters: Dict): sample_count=parameters.get('sample_count', 128), policy_step_ratio=parameters.get('policy_step_ratio', 1.0), entropy_regularization=parameters.get('entropy_regularization', 0.0), - rollback_ratio=parameters.get('rollback_ratio', 0.1), + rollback_ratio=parameters.get('rollback_ratio', 0.0), critic_weight=parameters.get('critic_weight', 1.0), epochs=parameters.get('epochs', 1), priority_reduction_ratio=parameters.get('priority_reduction_ratio', 1.05) @@ -124,7 +124,7 @@ def actor_loss(batch, logits, configuration: PPOConfiguration, device): advantages = batch.info[Record.ADVANTAGE_KEY] # Normalize advantages - advantages = (advantages - advantages.mean()) / (advantages.std() + 1e-8) + # advantages = (advantages - advantages.mean()) / (advantages.std() + 1e-8) action_probs = batch.info[Record.POLICY_KEY][range, batch.action.view(-1)] @@ -139,6 +139,8 @@ def actor_loss(batch, logits, configuration: PPOConfiguration, device): advantages = torch.min(weights * advantages, clipped_weights * advantages) + print(advantages) + entropy = distribution.entropy().mean() return torch.mean(advantages) + entropy_regularization * entropy, entropy diff --git a/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/1 (DQN)/experiment.yml b/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/1 (DQN)/experiment.yml index ab52937..348ae13 100644 --- a/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/1 (DQN)/experiment.yml +++ b/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/1 (DQN)/experiment.yml @@ -20,6 +20,7 @@ graph: &graph default_mods: &default_mods - 'util/infrastructure/cuda.yml' + - 'util/train_schedule/on_store_64.yml' ############################################################################################### @@ -135,7 +136,7 @@ long_single_source_run: &long_single_source_run nested: parameters: dispatch: - seed: [ [ 0 ] ] + seed: [ [ 0, 1, 2, 3 ] ] ############################################################################################### @@ -194,8 +195,7 @@ task: base_path: 'configuration/experiments/jsp/GRAPH-NN/run.yml' mod_dirs: - 'configuration/mods/run/mods' - mods: - - 'n_workers/1.yml' + mods: [] nested: parameters: simulations: diff --git a/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/1 (PPO)/experiment.yml b/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/1 (PPO)/experiment.yml index 9e1d9df..e994018 100644 --- a/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/1 (PPO)/experiment.yml +++ b/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/1 (PPO)/experiment.yml @@ -37,6 +37,7 @@ ppo_1: &ppo_1 # 'util/agent/multi_agent.yml' ] - [ 'util/infrastructure/cuda.yml' ] + - [ 'util/train_schedule/on_store.yml' ] ppo_2: &ppo_2 base_path: *base_model @@ -55,6 +56,7 @@ ppo_2: &ppo_2 # 'util/agent/multi_agent.yml' ] - [ 'util/infrastructure/cuda.yml' ] + - [ 'util/train_schedule/on_store.yml' ] ppo_3: &ppo_3 base_path: *base_model @@ -73,6 +75,7 @@ ppo_3: &ppo_3 # 'util/agent/multi_agent.yml' ] - [ 'util/infrastructure/cuda.yml' ] + - [ 'util/train_schedule/on_store.yml' ] ppo_4: &ppo_4 base_path: *base_model @@ -91,6 +94,7 @@ ppo_4: &ppo_4 # 'util/agent/multi_agent.yml' ] - [ 'util/infrastructure/cuda.yml' ] + - [ 'util/train_schedule/on_store.yml' ] ############################################################################################### reward: &reward @@ -112,7 +116,7 @@ long_single_source_run: &long_single_source_run nested: parameters: dispatch: - seed: [ [ 0 ] ] + seed: [ [ 0, 1, 2, 3 ] ] ############################################################################################### @@ -120,7 +124,7 @@ long_single_source_run: &long_single_source_run task: kind: 'multi_task' - n_workers: 4 + n_workers: 8 n_threads: 32 debug: False store_run_statistics: False @@ -160,7 +164,7 @@ task: simulator: kind: 'td' parameters: - memory: 96 + memory: 64 emit_trajectory: True graph: @@ -172,8 +176,7 @@ task: base_path: 'configuration/experiments/jsp/GRAPH-NN/run_ppo.yml' mod_dirs: - 'configuration/mods/run/mods' - mods: - - 'n_workers/1.yml' + mods: [] nested: parameters: simulations: diff --git a/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/2 (DQN)/experiment.yml b/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/2 (DQN)/experiment.yml index c3a3e6b..fa876f7 100644 --- a/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/2 (DQN)/experiment.yml +++ b/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/2 (DQN)/experiment.yml @@ -19,7 +19,8 @@ graph: &graph is_work_center_set_in_shop_floor_connected: False default_mods: &default_mods - ['util/infrastructure/cuda.yml'] + - 'util/infrastructure/cuda.yml' + - 'util/train_schedule/on_store_64.yml' ############################################################################################### @@ -231,7 +232,7 @@ long_single_source_run: &long_single_source_run nested: parameters: dispatch: - seed: [ [ 0 ] ] + seed: [ [ 0, 1, 2, 3 ] ] ############################################################################################### @@ -290,8 +291,7 @@ task: base_path: 'configuration/experiments/jsp/GRAPH-NN/run.yml' mod_dirs: - 'configuration/mods/run/mods' - mods: - - 'n_workers/1.yml' + mods: [] nested: parameters: simulations: diff --git a/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/2 (PPO)/experiment.yml b/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/2 (PPO)/experiment.yml index 82cd50a..e0b1e92 100644 --- a/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/2 (PPO)/experiment.yml +++ b/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/2 (PPO)/experiment.yml @@ -30,12 +30,13 @@ ppo_1: &ppo_1 - [ 'util/optimizer/grad_norm.yml' ] - [ '__none__', - 'agent/ppo/p3or.yml' +# 'agent/ppo/p3or.yml' ] - [ '__none__', # 'util/agent/multi_agent.yml' ] + - ['util/train_schedule/on_store.yml'] - ['util/infrastructure/cuda.yml'] ppo_2: &ppo_2 @@ -55,6 +56,7 @@ ppo_2: &ppo_2 # 'util/agent/multi_agent.yml' ] - ['util/infrastructure/cuda.yml'] + - ['util/train_schedule/on_store.yml'] ppo_3: &ppo_3 base_path: *base_model @@ -73,6 +75,7 @@ ppo_3: &ppo_3 # 'util/agent/multi_agent.yml' ] - ['util/infrastructure/cuda.yml'] + - ['util/train_schedule/on_store.yml'] ppo_4: &ppo_4 base_path: *base_model @@ -91,6 +94,7 @@ ppo_4: &ppo_4 # 'util/agent/multi_agent.yml' ] - ['util/infrastructure/cuda.yml'] + - [ 'util/train_schedule/on_store.yml'] ############################################################################################### reward: &reward @@ -112,7 +116,7 @@ long_single_source_run: &long_single_source_run nested: parameters: dispatch: - seed: [ [ 0 ] ] + seed: [ [ 0, 1, 2, 3 ] ] ############################################################################################### @@ -132,6 +136,7 @@ task: base: name: 'model' output_dir: '1' + seed: 0 log_stdout: False machine_agent: @@ -172,8 +177,7 @@ task: base_path: 'configuration/experiments/jsp/GRAPH-NN/run_ppo.yml' mod_dirs: - 'configuration/mods/run/mods' - mods: - - 'n_workers/1.yml' + mods: [] nested: parameters: simulations: diff --git a/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/3 (DQN)/experiment.yml b/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/3 (DQN)/experiment.yml index 53a0a2f..7b06f94 100644 --- a/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/3 (DQN)/experiment.yml +++ b/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/3 (DQN)/experiment.yml @@ -20,6 +20,7 @@ graph: &graph default_mods: &default_mods - 'util/infrastructure/cuda.yml' + - 'util/train_schedule/on_store_64.yml' ############################################################################################### @@ -323,7 +324,7 @@ long_single_source_run: &long_single_source_run nested: parameters: dispatch: - seed: [ [ 0 ] ] + seed: [ [ 0, 1, 2, 3 ] ] ############################################################################################### @@ -382,8 +383,7 @@ task: base_path: 'configuration/experiments/jsp/GRAPH-NN/run.yml' mod_dirs: - 'configuration/mods/run/mods' - mods: - - 'n_workers/1.yml' + mods: [] nested: parameters: simulations: diff --git a/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/3 (PPO)/experiment.yml b/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/3 (PPO)/experiment.yml index f0a62c1..5ab1b43 100644 --- a/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/3 (PPO)/experiment.yml +++ b/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/3 (PPO)/experiment.yml @@ -37,6 +37,7 @@ ppo_1: &ppo_1 # 'util/agent/multi_agent.yml' ] - ['util/infrastructure/cuda.yml'] + - [ 'util/train_schedule/on_store.yml'] ppo_2: &ppo_2 base_path: *base_model @@ -55,6 +56,7 @@ ppo_2: &ppo_2 # 'util/agent/multi_agent.yml' ] - ['util/infrastructure/cuda.yml'] + - [ 'util/train_schedule/on_store.yml'] ppo_3: &ppo_3 base_path: *base_model @@ -73,6 +75,7 @@ ppo_3: &ppo_3 # 'util/agent/multi_agent.yml' ] - ['util/infrastructure/cuda.yml'] + - [ 'util/train_schedule/on_store.yml'] ppo_4: &ppo_4 base_path: *base_model @@ -91,6 +94,7 @@ ppo_4: &ppo_4 # 'util/agent/multi_agent.yml' ] - ['util/infrastructure/cuda.yml'] + - [ 'util/train_schedule/on_store.yml'] ppo_5: &ppo_5 base_path: *base_model @@ -109,6 +113,7 @@ ppo_5: &ppo_5 # 'util/agent/multi_agent.yml' ] - ['util/infrastructure/cuda.yml'] + - [ 'util/train_schedule/on_store.yml'] ppo_6: &ppo_6 base_path: *base_model @@ -127,6 +132,7 @@ ppo_6: &ppo_6 # 'util/agent/multi_agent.yml' ] - ['util/infrastructure/cuda.yml'] + - [ 'util/train_schedule/on_store.yml'] ############################################################################################### reward: &reward @@ -148,7 +154,7 @@ long_single_source_run: &long_single_source_run nested: parameters: dispatch: - seed: [ [ 0 ] ] + seed: [ [ 0, 1, 2, 3 ] ] ############################################################################################### @@ -196,7 +202,7 @@ task: simulator: kind: 'td' parameters: - memory: 96 + memory: 64 emit_trajectory: True graph: @@ -208,8 +214,7 @@ task: base_path: 'configuration/experiments/jsp/GRAPH-NN/run_ppo.yml' mod_dirs: - 'configuration/mods/run/mods' - mods: - - 'n_workers/1.yml' + mods: [] nested: parameters: simulations: diff --git a/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/4 (DQN)/experiment.yml b/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/4 (DQN)/experiment.yml index 7e87ba1..90ac6af 100644 --- a/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/4 (DQN)/experiment.yml +++ b/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/4 (DQN)/experiment.yml @@ -20,6 +20,7 @@ graph: &graph default_mods: &default_mods - 'util/infrastructure/cuda.yml' + - 'util/train_schedule/on_store_64.yml' ############################################################################################### dqn_1: &dqn_1 @@ -182,7 +183,7 @@ long_single_source_run: &long_single_source_run nested: parameters: dispatch: - seed: [ [ 0 ] ] + seed: [ [ 0, 1, 2, 3 ] ] ############################################################################################### @@ -241,8 +242,7 @@ task: base_path: 'configuration/experiments/jsp/GRAPH-NN/run.yml' mod_dirs: - 'configuration/mods/run/mods' - mods: - - 'n_workers/1.yml' + mods: [] nested: parameters: simulations: diff --git a/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/4 (PPO)/experiment.yml b/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/4 (PPO)/experiment.yml index b473779..380c2d8 100644 --- a/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/4 (PPO)/experiment.yml +++ b/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/4 (PPO)/experiment.yml @@ -37,6 +37,7 @@ ppo_1: &ppo_1 # 'util/agent/multi_agent.yml' ] - [ 'util/infrastructure/cuda.yml'] + - [ 'util/train_schedule/on_store.yml'] ppo_2: &ppo_2 base_path: *base_model @@ -55,6 +56,7 @@ ppo_2: &ppo_2 # 'util/agent/multi_agent.yml' ] - [ 'util/infrastructure/cuda.yml'] + - [ 'util/train_schedule/on_store.yml'] ppo_3: &ppo_3 base_path: *base_model @@ -73,6 +75,7 @@ ppo_3: &ppo_3 # 'util/agent/multi_agent.yml' ] - [ 'util/infrastructure/cuda.yml'] + - [ 'util/train_schedule/on_store.yml'] ############################################################################################### @@ -95,7 +98,7 @@ long_single_source_run: &long_single_source_run nested: parameters: dispatch: - seed: [ [ 0 ] ] + seed: [ [ 0, 1, 2, 3 ] ] ############################################################################################### @@ -143,7 +146,7 @@ task: simulator: kind: 'td' parameters: - memory: 96 + memory: 64 emit_trajectory: True graph: @@ -155,8 +158,7 @@ task: base_path: 'configuration/experiments/jsp/GRAPH-NN/run_ppo.yml' mod_dirs: - 'configuration/mods/run/mods' - mods: - - 'n_workers/1.yml' + mods: [] nested: parameters: simulations: diff --git a/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/5 (DQN)/experiment.yml b/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/5 (DQN)/experiment.yml index 48ea2b5..3b81d6a 100644 --- a/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/5 (DQN)/experiment.yml +++ b/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/5 (DQN)/experiment.yml @@ -20,6 +20,7 @@ graph: &graph default_mods: &default_mods - 'util/infrastructure/cuda.yml' + - 'util/train_schedule/on_store_64.yml' ############################################################################################### @@ -230,7 +231,7 @@ long_single_source_run: &long_single_source_run nested: parameters: dispatch: - seed: [ [ 0 ] ] + seed: [ [ 0, 1, 2, 3 ] ] ############################################################################################### @@ -289,8 +290,7 @@ task: base_path: 'configuration/experiments/jsp/GRAPH-NN/run.yml' mod_dirs: - 'configuration/mods/run/mods' - mods: - - 'n_workers/1.yml' + mods: [] nested: parameters: simulations: diff --git a/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/5 (PPO)/experiment.yml b/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/5 (PPO)/experiment.yml index f5e312d..8e95535 100644 --- a/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/5 (PPO)/experiment.yml +++ b/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/5 (PPO)/experiment.yml @@ -37,6 +37,7 @@ ppo_1: &ppo_1 # 'util/agent/multi_agent.yml' ] - [ 'util/infrastructure/cuda.yml'] + - [ 'util/train_schedule/on_store.yml'] ppo_2: &ppo_2 base_path: 'configuration/experiments/jsp/GRAPH-NN/flexible_machine.yml' @@ -55,6 +56,7 @@ ppo_2: &ppo_2 # 'util/agent/multi_agent.yml' ] - [ 'util/infrastructure/cuda.yml'] + - [ 'util/train_schedule/on_store.yml'] ppo_3: &ppo_3 base_path: *base_model @@ -73,6 +75,26 @@ ppo_3: &ppo_3 # 'util/agent/multi_agent.yml' ] - [ 'util/infrastructure/cuda.yml'] + - [ 'util/train_schedule/on_store.yml'] + +ppo_4: &ppo_4 + base_path: 'configuration/experiments/jsp/GRAPH-NN/flexible_machine.yml' + template: '5/flexible_gat' + mod_dirs: + - ['configuration/mods/machine/mods'] + mods: + __factory__: + - [ 'util/optimizer/grad_norm.yml' ] + - [ + '__none__', + 'agent/ppo/p3or.yml' + ] + - [ + '__none__', +# 'util/agent/multi_agent.yml' + ] + - [ 'util/infrastructure/cuda.yml'] + - [ 'util/train_schedule/on_store.yml'] ############################################################################################### @@ -95,7 +117,7 @@ long_single_source_run: &long_single_source_run nested: parameters: dispatch: - seed: [ [ 0 ] ] + seed: [ [ 0, 1, 2, 3 ] ] ############################################################################################### @@ -143,7 +165,7 @@ task: simulator: kind: 'td' parameters: - memory: 96 + memory: 64 emit_trajectory: True graph: @@ -155,8 +177,7 @@ task: base_path: 'configuration/experiments/jsp/GRAPH-NN/run_ppo.yml' mod_dirs: - 'configuration/mods/run/mods' - mods: - - 'n_workers/1.yml' + mods: [] nested: parameters: simulations: @@ -185,6 +206,10 @@ task: machine_agent: parameters: *ppo_3 + - output_dir: 'Flexible GAT' + machine_agent: + parameters: + *ppo_4 tape: machine_reward: diff --git a/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/6 (DQN)/experiment.yml b/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/6 (DQN)/experiment.yml index 5703338..88403bf 100644 --- a/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/6 (DQN)/experiment.yml +++ b/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/6 (DQN)/experiment.yml @@ -20,6 +20,7 @@ graph: &graph default_mods: &default_mods - 'util/infrastructure/cuda.yml' + - 'util/train_schedule/on_store_64.yml' ############################################################################################### @@ -88,7 +89,7 @@ long_single_source_run: &long_single_source_run nested: parameters: dispatch: - seed: [ [ 0 ] ] + seed: [ [ 0, 1, 2, 3 ] ] ############################################################################################### @@ -147,8 +148,7 @@ task: base_path: 'configuration/experiments/jsp/GRAPH-NN/run.yml' mod_dirs: - 'configuration/mods/run/mods' - mods: - - 'n_workers/1.yml' + mods: [] nested: parameters: simulations: diff --git a/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/6 (PPO)/experiment.yml b/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/6 (PPO)/experiment.yml index a63c194..f3b21eb 100644 --- a/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/6 (PPO)/experiment.yml +++ b/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/6 (PPO)/experiment.yml @@ -37,6 +37,7 @@ ppo_1: &ppo_1 # 'util/agent/multi_agent.yml' ] - [ 'util/infrastructure/cuda.yml'] + - [ 'util/train_schedule/on_store.yml'] ############################################################################################### reward: &reward @@ -58,7 +59,7 @@ long_single_source_run: &long_single_source_run nested: parameters: dispatch: - seed: [ [ 0 ] ] + seed: [ [ 0, 1, 2, 3 ] ] ############################################################################################### @@ -106,7 +107,7 @@ task: simulator: kind: 'td' parameters: - memory: 96 + memory: 64 emit_trajectory: True graph: @@ -118,8 +119,7 @@ task: base_path: 'configuration/experiments/jsp/GRAPH-NN/run_ppo.yml' mod_dirs: - 'configuration/mods/run/mods' - mods: - - 'n_workers/1.yml' + mods: [] nested: parameters: simulations: diff --git a/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/7 (DQN)/experiment.yml b/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/7 (DQN)/experiment.yml index 29e0ff9..d67a2b5 100644 --- a/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/7 (DQN)/experiment.yml +++ b/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/7 (DQN)/experiment.yml @@ -20,6 +20,7 @@ graph: &graph default_mods: &default_mods - 'util/infrastructure/cuda.yml' + - 'util/train_schedule/on_store_64.yml' ############################################################################################### @@ -88,7 +89,7 @@ long_single_source_run: &long_single_source_run nested: parameters: dispatch: - seed: [ [ 0 ] ] + seed: [ [ 0, 1, 2, 3 ] ] ############################################################################################### @@ -147,8 +148,7 @@ task: base_path: 'configuration/experiments/jsp/GRAPH-NN/run.yml' mod_dirs: - 'configuration/mods/run/mods' - mods: - - 'n_workers/1.yml' + mods: [] nested: parameters: simulations: diff --git a/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/7 (PPO)/experiment.yml b/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/7 (PPO)/experiment.yml index 02653fa..83f0abd 100644 --- a/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/7 (PPO)/experiment.yml +++ b/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/experiments/7 (PPO)/experiment.yml @@ -37,6 +37,7 @@ ppo_1: &ppo_1 # 'util/agent/multi_agent.yml' ] - ['util/infrastructure/cuda.yml'] + - ['util/train_schedule/on_store.yml'] ############################################################################################### @@ -59,7 +60,7 @@ long_single_source_run: &long_single_source_run nested: parameters: dispatch: - seed: [ [ 0 ] ] + seed: [ [ 0, 1, 2, 3 ] ] ############################################################################################### @@ -107,7 +108,7 @@ task: simulator: kind: 'td' parameters: - memory: 96 + memory: 64 emit_trajectory: True graph: @@ -119,8 +120,7 @@ task: base_path: 'configuration/experiments/jsp/GRAPH-NN/run_ppo.yml' mod_dirs: - 'configuration/mods/run/mods' - mods: - - 'n_workers/1.yml' + mods: [] nested: parameters: simulations: diff --git a/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/flexible_machine.yml b/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/flexible_machine.yml index ec7dc61..91adde5 100644 --- a/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/flexible_machine.yml +++ b/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/flexible_machine.yml @@ -58,8 +58,8 @@ parameters: memory: kind: 'replay' parameters: - size: 1024 - batch_size: 96 + size: 2048 + batch_size: 128 prefetch: 8 loss: diff --git a/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/flexible_marl_machine.yml b/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/flexible_marl_machine.yml index 66d4ab2..f9491a4 100644 --- a/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/flexible_marl_machine.yml +++ b/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/flexible_marl_machine.yml @@ -59,8 +59,8 @@ parameters: memory: kind: 'replay' parameters: - size: 1024 - batch_size: 96 + size: 2048 + batch_size: 128 prefetch: 8 loss: diff --git a/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/flexible_ppo_machine.yml b/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/flexible_ppo_machine.yml index 13445e4..6cdc015 100644 --- a/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/flexible_ppo_machine.yml +++ b/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/flexible_ppo_machine.yml @@ -3,12 +3,12 @@ kind: 'rl' parameters: model: - kind: 'deep_rule' + kind: 'deep_multi_rule' parameters: __rules__: '' policy: - kind: 'flexible_action' + kind: 'discrete_action' parameters: policy_method: 'independent' @@ -49,13 +49,13 @@ parameters: kind: 'ppo' parameters: device: 'cpu' - sample_count: 96 + sample_count: 256 policy_step_ratio: 0.2 - entropy_regularization: 0.001 + entropy_regularization: 0.01 rollback_ratio: 0.00 critic_weight: 0.5 - epochs: 1 + epochs: 5 loss: kind: 'cross_entropy' @@ -67,9 +67,9 @@ parameters: optimizer: model: - kind: 'adam' + kind: 'adam_w' parameters: - lr: 0.0005 + lr: 0.001 memory: kind: 'prioritized_replay' diff --git a/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/machine.yml b/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/machine.yml index e52bed1..c301c2f 100644 --- a/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/machine.yml +++ b/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/machine.yml @@ -59,7 +59,7 @@ parameters: kind: 'replay' parameters: size: 1024 - batch_size: 96 + batch_size: 128 prefetch: 8 loss: diff --git a/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/marl_machine.yml b/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/marl_machine.yml index fe787cf..e72ccb1 100644 --- a/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/marl_machine.yml +++ b/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/marl_machine.yml @@ -59,8 +59,8 @@ parameters: memory: kind: 'replay' parameters: - size: 1024 - batch_size: 96 + size: 2048 + batch_size: 128 prefetch: 8 loss: diff --git a/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/ppo_machine.yml b/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/ppo_machine.yml index c272c91..6cdc015 100644 --- a/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/ppo_machine.yml +++ b/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/ppo_machine.yml @@ -49,13 +49,13 @@ parameters: kind: 'ppo' parameters: device: 'cpu' - sample_count: 96 + sample_count: 256 policy_step_ratio: 0.2 - entropy_regularization: 0.001 + entropy_regularization: 0.01 rollback_ratio: 0.00 critic_weight: 0.5 - epochs: 1 + epochs: 5 loss: kind: 'cross_entropy' @@ -67,9 +67,9 @@ parameters: optimizer: model: - kind: 'adam' + kind: 'adam_w' parameters: - lr: 0.0005 + lr: 0.001 memory: kind: 'prioritized_replay' diff --git a/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/run.yml b/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/run.yml index 2811c5a..12b1ea6 100644 --- a/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/run.yml +++ b/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/run.yml @@ -20,4 +20,4 @@ parameters: train_interval: 100 max_training_steps: 0 - n_workers: 1 + n_workers: 4 diff --git a/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/run_ppo.yml b/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/run_ppo.yml index 5915729..5688acb 100644 --- a/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/run_ppo.yml +++ b/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/run_ppo.yml @@ -17,4 +17,4 @@ parameters: train_interval: 100 max_training_steps: 0 - n_workers: 1 + n_workers: 4 diff --git a/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/simulation.yml b/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/simulation.yml index ad1450b..139308d 100644 --- a/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/simulation.yml +++ b/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/simulation.yml @@ -3,7 +3,7 @@ kind: 'simulation' parameters: configuration: - timespan: 100000 + timespan: 125000 machines_per_work_center: 1 work_center_count: 10 deduce_naive_actions: True diff --git a/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/templates/baseline/model.yml b/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/templates/baseline/model.yml index 3dc34d8..6313624 100644 --- a/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/templates/baseline/model.yml +++ b/diploma_thesis/configuration/experiments/jsp/GRAPH-NN/templates/baseline/model.yml @@ -62,7 +62,7 @@ layers: signature: 'graph -> values_hidden, values_batch' - - kind: 'mean_pool' + - kind: 'max_pool' parameters: signature: 'values_hidden, values_batch -> values_hidden' diff --git a/diploma_thesis/configuration/experiments/jsp/MARL-DQN/experiment/1/experiment.yml b/diploma_thesis/configuration/experiments/jsp/MARL-DQN/experiment/1/experiment.yml index d44feb2..a150639 100644 --- a/diploma_thesis/configuration/experiments/jsp/MARL-DQN/experiment/1/experiment.yml +++ b/diploma_thesis/configuration/experiments/jsp/MARL-DQN/experiment/1/experiment.yml @@ -212,7 +212,7 @@ long_single_source_run: &long_single_source_run nested: parameters: dispatch: - seed: [ [ 0 ] ] + seed: [ [ 0, 1, 2, 3 ] ] ############################################################################################### @@ -272,8 +272,7 @@ task: base_path: 'configuration/experiments/jsp/MARL-DQN/run.yml' mod_dirs: - 'configuration/mods/run/mods' - mods: - - 'n_workers/1.yml' + mods: [] nested: parameters: simulations: diff --git a/diploma_thesis/configuration/experiments/jsp/MARL-DQN/experiment/4/experiment.yml b/diploma_thesis/configuration/experiments/jsp/MARL-DQN/experiment/4/experiment.yml index 0831e93..82f263d 100644 --- a/diploma_thesis/configuration/experiments/jsp/MARL-DQN/experiment/4/experiment.yml +++ b/diploma_thesis/configuration/experiments/jsp/MARL-DQN/experiment/4/experiment.yml @@ -29,6 +29,7 @@ ppo: &ppo '__none__', # 'util/agent/multi_agent.yml' ] + - [ 'util/train_schedule/on_store.yml'] ############################################################################################### @@ -51,7 +52,7 @@ long_single_source_run: &long_single_source_run nested: parameters: dispatch: - seed: [ [ 0 ] ] + seed: [ [ 0, 1, 2, 3 ] ] ############################################################################################### @@ -112,8 +113,7 @@ task: base_path: 'configuration/experiments/jsp/MARL-DQN/experiment/4/run.yml' mod_dirs: - 'configuration/mods/run/mods' - mods: - - 'n_workers/1.yml' + mods: [] nested: parameters: simulations: diff --git a/diploma_thesis/configuration/experiments/jsp/MARL-DQN/experiment/4/machine.yml b/diploma_thesis/configuration/experiments/jsp/MARL-DQN/experiment/4/machine.yml index 4c77832..a9f0dc1 100644 --- a/diploma_thesis/configuration/experiments/jsp/MARL-DQN/experiment/4/machine.yml +++ b/diploma_thesis/configuration/experiments/jsp/MARL-DQN/experiment/4/machine.yml @@ -17,29 +17,30 @@ parameters: action_selector: kind: 'phase_selector' - default: - kind: 'sample' - parameters: - is_distribution: False - phases: - - phase: - kind: 'warm_up' - parameters: - step: 0 - action_selector: - kind: 'uniform' - - phase: - kind: 'warm_up' - parameters: - step: 1 - action_selector: - kind: 'uniform' - - phase: - kind: 'training' - action_selector: - kind: 'sample' - parameters: - is_distribution: False + parameters: + default: + kind: 'sample' + parameters: + is_distribution: False + phases: + - phase: + kind: 'warm_up' + parameters: + step: 0 + action_selector: + kind: 'uniform' + - phase: + kind: 'warm_up' + parameters: + step: 1 + action_selector: + kind: 'uniform' + - phase: + kind: 'training' + action_selector: + kind: 'sample' + parameters: + is_distribution: False __encoder__: '' @@ -48,13 +49,13 @@ parameters: kind: 'ppo' parameters: device: 'cpu' - sample_count: 128 + sample_count: 256 policy_step_ratio: 0.2 - entropy_regularization: 0.01 + entropy_regularization: 0.001 rollback_ratio: 0.00 critic_weight: 0.5 - epochs: 1 + epochs: 3 loss: kind: 'cross_entropy' @@ -66,14 +67,9 @@ parameters: optimizer: model: - kind: 'adam' + kind: 'adam_w' parameters: - lr: 0.0005 - -# scheduler: -# kind: 'exponential' -# parameters: -# gamma: 0.9999 + lr: 0.001 memory: kind: 'prioritized_replay' diff --git a/diploma_thesis/configuration/experiments/jsp/MARL-DQN/experiment/4/run.yml b/diploma_thesis/configuration/experiments/jsp/MARL-DQN/experiment/4/run.yml index d2545d3..8cb7705 100644 --- a/diploma_thesis/configuration/experiments/jsp/MARL-DQN/experiment/4/run.yml +++ b/diploma_thesis/configuration/experiments/jsp/MARL-DQN/experiment/4/run.yml @@ -18,4 +18,4 @@ parameters: train_interval: 100 max_training_steps: 0 - n_workers: 1 + n_workers: 4 diff --git a/diploma_thesis/configuration/experiments/jsp/MARL-DQN/machine.yml b/diploma_thesis/configuration/experiments/jsp/MARL-DQN/machine.yml index 35ce23c..10e1cc3 100644 --- a/diploma_thesis/configuration/experiments/jsp/MARL-DQN/machine.yml +++ b/diploma_thesis/configuration/experiments/jsp/MARL-DQN/machine.yml @@ -71,7 +71,6 @@ parameters: kind: 'sgd' parameters: lr: 0.001 - momentum: 0.9 return: kind: 'no' diff --git a/diploma_thesis/configuration/experiments/jsp/MARL-DQN/marl_machine.yml b/diploma_thesis/configuration/experiments/jsp/MARL-DQN/marl_machine.yml index 1c316f5..45729ed 100644 --- a/diploma_thesis/configuration/experiments/jsp/MARL-DQN/marl_machine.yml +++ b/diploma_thesis/configuration/experiments/jsp/MARL-DQN/marl_machine.yml @@ -71,7 +71,6 @@ parameters: kind: 'sgd' parameters: lr: 0.001 - momentum: 0.9 return: kind: 'no' diff --git a/diploma_thesis/configuration/experiments/jsp/tournament.yml b/diploma_thesis/configuration/experiments/jsp/tournament.yml index a97b9ab..416f6f6 100644 --- a/diploma_thesis/configuration/experiments/jsp/tournament.yml +++ b/diploma_thesis/configuration/experiments/jsp/tournament.yml @@ -4,7 +4,7 @@ task: n_workers: 8 n_threads: 8 name: 'rules' - output_dir: 'results/jsp/experiments/tournaments/jsp/2. GRAPH-NN' + output_dir: 'results/jsp/experiments/tournaments/jsp/1. MARL-DQN' store_run_statistics: True log_run: False update: True @@ -29,7 +29,7 @@ task: - kind: 'persisted_agents' parameters: prefix: '' - path: 'results/jsp/experiments/2. GRAPH-NN' + path: 'results/jsp/experiments/1. MARL-DQN' depth: 5 # # - kind: 'persisted_agents' diff --git a/diploma_thesis/simulator/tape/machine/unary.py b/diploma_thesis/simulator/tape/machine/unary.py index d993335..531d5aa 100644 --- a/diploma_thesis/simulator/tape/machine/unary.py +++ b/diploma_thesis/simulator/tape/machine/unary.py @@ -47,15 +47,15 @@ def reward_after_production(self, context: Context) -> torch.FloatTensor | None: return torch.FloatTensor(1) if actual_tardiness_rate > context.actual_tardiness_rate: - return torch.FloatTensor(-1) + return torch.FloatTensor([-1]).view(-1) expected_tardiness_rate = context.machine.shop_floor.expected_tardy_rate(now) if expected_tardiness_rate < context.expected_tardiness_rate: - return torch.FloatTensor(1) + return torch.FloatTensor([1]).view(-1) if expected_tardiness_rate > context.expected_tardiness_rate: - return torch.FloatTensor(-1) + return torch.FloatTensor([-1]).view(-1) utilization_rate = context.machine.shop_floor.utilization_rate() @@ -63,9 +63,9 @@ def reward_after_production(self, context: Context) -> torch.FloatTensor | None: return torch.FloatTensor(1) if utilization_rate > context.utilization_rate * 0.95: - return torch.FloatTensor(0) + return torch.FloatTensor([0]).view(-1) - return torch.FloatTensor(-1) + return torch.FloatTensor([-1]).view(-1) def reward_after_completion(self, contexts: List[Context]): return None