Skip to content

Commit

Permalink
Change training schedule
Browse files Browse the repository at this point in the history
  • Loading branch information
yura-hb committed Apr 24, 2024
1 parent 2c11db2 commit 8d4a2d4
Show file tree
Hide file tree
Showing 35 changed files with 164 additions and 126 deletions.
4 changes: 4 additions & 0 deletions diploma_thesis/agents/utils/action/sample.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import dis

import torch.distributions

from .action_selector import *
Expand All @@ -19,6 +21,8 @@ def __call__(self, distribution: torch.Tensor) -> Tuple[int, torch.Tensor]:

action = distribution.sample().item()

print("action: ", action, "entropy: ", distribution.entropy().item(), distribution.probs)

return action, distribution.probs

@staticmethod
Expand Down
2 changes: 1 addition & 1 deletion diploma_thesis/agents/utils/policy/flexible_action.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def encode(self, state):
prev_count += target_nodes_count

actions = torch.nn.utils.rnn.pad_sequence(result, batch_first=True, padding_value=-float('inf'))
lengths = torch.tensor(lengths)
lengths = torch.tensor(lengths).to(actions.device)

output[Keys.ACTIONS] = (actions, lengths)

Expand Down
6 changes: 4 additions & 2 deletions diploma_thesis/agents/utils/rl/utils/ppo_mixin.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def base_parameters_from_cli(parameters: Dict):
sample_count=parameters.get('sample_count', 128),
policy_step_ratio=parameters.get('policy_step_ratio', 1.0),
entropy_regularization=parameters.get('entropy_regularization', 0.0),
rollback_ratio=parameters.get('rollback_ratio', 0.1),
rollback_ratio=parameters.get('rollback_ratio', 0.0),
critic_weight=parameters.get('critic_weight', 1.0),
epochs=parameters.get('epochs', 1),
priority_reduction_ratio=parameters.get('priority_reduction_ratio', 1.05)
Expand Down Expand Up @@ -124,7 +124,7 @@ def actor_loss(batch, logits, configuration: PPOConfiguration, device):
advantages = batch.info[Record.ADVANTAGE_KEY]

# Normalize advantages
advantages = (advantages - advantages.mean()) / (advantages.std() + 1e-8)
# advantages = (advantages - advantages.mean()) / (advantages.std() + 1e-8)

action_probs = batch.info[Record.POLICY_KEY][range, batch.action.view(-1)]

Expand All @@ -139,6 +139,8 @@ def actor_loss(batch, logits, configuration: PPOConfiguration, device):

advantages = torch.min(weights * advantages, clipped_weights * advantages)

print(advantages)

entropy = distribution.entropy().mean()

return torch.mean(advantages) + entropy_regularization * entropy, entropy
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ graph: &graph

default_mods: &default_mods
- 'util/infrastructure/cuda.yml'
- 'util/train_schedule/on_store_64.yml'

###############################################################################################

Expand Down Expand Up @@ -135,7 +136,7 @@ long_single_source_run: &long_single_source_run
nested:
parameters:
dispatch:
seed: [ [ 0 ] ]
seed: [ [ 0, 1, 2, 3 ] ]


###############################################################################################
Expand Down Expand Up @@ -194,8 +195,7 @@ task:
base_path: 'configuration/experiments/jsp/GRAPH-NN/run.yml'
mod_dirs:
- 'configuration/mods/run/mods'
mods:
- 'n_workers/1.yml'
mods: []
nested:
parameters:
simulations:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ ppo_1: &ppo_1
# 'util/agent/multi_agent.yml'
]
- [ 'util/infrastructure/cuda.yml' ]
- [ 'util/train_schedule/on_store.yml' ]

ppo_2: &ppo_2
base_path: *base_model
Expand All @@ -55,6 +56,7 @@ ppo_2: &ppo_2
# 'util/agent/multi_agent.yml'
]
- [ 'util/infrastructure/cuda.yml' ]
- [ 'util/train_schedule/on_store.yml' ]

ppo_3: &ppo_3
base_path: *base_model
Expand All @@ -73,6 +75,7 @@ ppo_3: &ppo_3
# 'util/agent/multi_agent.yml'
]
- [ 'util/infrastructure/cuda.yml' ]
- [ 'util/train_schedule/on_store.yml' ]

ppo_4: &ppo_4
base_path: *base_model
Expand All @@ -91,6 +94,7 @@ ppo_4: &ppo_4
# 'util/agent/multi_agent.yml'
]
- [ 'util/infrastructure/cuda.yml' ]
- [ 'util/train_schedule/on_store.yml' ]
###############################################################################################

reward: &reward
Expand All @@ -112,15 +116,15 @@ long_single_source_run: &long_single_source_run
nested:
parameters:
dispatch:
seed: [ [ 0 ] ]
seed: [ [ 0, 1, 2, 3 ] ]


###############################################################################################


task:
kind: 'multi_task'
n_workers: 4
n_workers: 8
n_threads: 32
debug: False
store_run_statistics: False
Expand Down Expand Up @@ -160,7 +164,7 @@ task:
simulator:
kind: 'td'
parameters:
memory: 96
memory: 64
emit_trajectory: True

graph:
Expand All @@ -172,8 +176,7 @@ task:
base_path: 'configuration/experiments/jsp/GRAPH-NN/run_ppo.yml'
mod_dirs:
- 'configuration/mods/run/mods'
mods:
- 'n_workers/1.yml'
mods: []
nested:
parameters:
simulations:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ graph: &graph
is_work_center_set_in_shop_floor_connected: False

default_mods: &default_mods
['util/infrastructure/cuda.yml']
- 'util/infrastructure/cuda.yml'
- 'util/train_schedule/on_store_64.yml'

###############################################################################################

Expand Down Expand Up @@ -231,7 +232,7 @@ long_single_source_run: &long_single_source_run
nested:
parameters:
dispatch:
seed: [ [ 0 ] ]
seed: [ [ 0, 1, 2, 3 ] ]


###############################################################################################
Expand Down Expand Up @@ -290,8 +291,7 @@ task:
base_path: 'configuration/experiments/jsp/GRAPH-NN/run.yml'
mod_dirs:
- 'configuration/mods/run/mods'
mods:
- 'n_workers/1.yml'
mods: []
nested:
parameters:
simulations:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,13 @@ ppo_1: &ppo_1
- [ 'util/optimizer/grad_norm.yml' ]
- [
'__none__',
'agent/ppo/p3or.yml'
# 'agent/ppo/p3or.yml'
]
- [
'__none__',
# 'util/agent/multi_agent.yml'
]
- ['util/train_schedule/on_store.yml']
- ['util/infrastructure/cuda.yml']

ppo_2: &ppo_2
Expand All @@ -55,6 +56,7 @@ ppo_2: &ppo_2
# 'util/agent/multi_agent.yml'
]
- ['util/infrastructure/cuda.yml']
- ['util/train_schedule/on_store.yml']

ppo_3: &ppo_3
base_path: *base_model
Expand All @@ -73,6 +75,7 @@ ppo_3: &ppo_3
# 'util/agent/multi_agent.yml'
]
- ['util/infrastructure/cuda.yml']
- ['util/train_schedule/on_store.yml']

ppo_4: &ppo_4
base_path: *base_model
Expand All @@ -91,6 +94,7 @@ ppo_4: &ppo_4
# 'util/agent/multi_agent.yml'
]
- ['util/infrastructure/cuda.yml']
- [ 'util/train_schedule/on_store.yml']
###############################################################################################

reward: &reward
Expand All @@ -112,7 +116,7 @@ long_single_source_run: &long_single_source_run
nested:
parameters:
dispatch:
seed: [ [ 0 ] ]
seed: [ [ 0, 1, 2, 3 ] ]


###############################################################################################
Expand All @@ -132,6 +136,7 @@ task:
base:
name: 'model'
output_dir: '1'
seed: 0
log_stdout: False

machine_agent:
Expand Down Expand Up @@ -172,8 +177,7 @@ task:
base_path: 'configuration/experiments/jsp/GRAPH-NN/run_ppo.yml'
mod_dirs:
- 'configuration/mods/run/mods'
mods:
- 'n_workers/1.yml'
mods: []
nested:
parameters:
simulations:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ graph: &graph

default_mods: &default_mods
- 'util/infrastructure/cuda.yml'
- 'util/train_schedule/on_store_64.yml'

###############################################################################################

Expand Down Expand Up @@ -323,7 +324,7 @@ long_single_source_run: &long_single_source_run
nested:
parameters:
dispatch:
seed: [ [ 0 ] ]
seed: [ [ 0, 1, 2, 3 ] ]


###############################################################################################
Expand Down Expand Up @@ -382,8 +383,7 @@ task:
base_path: 'configuration/experiments/jsp/GRAPH-NN/run.yml'
mod_dirs:
- 'configuration/mods/run/mods'
mods:
- 'n_workers/1.yml'
mods: []
nested:
parameters:
simulations:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ ppo_1: &ppo_1
# 'util/agent/multi_agent.yml'
]
- ['util/infrastructure/cuda.yml']
- [ 'util/train_schedule/on_store.yml']

ppo_2: &ppo_2
base_path: *base_model
Expand All @@ -55,6 +56,7 @@ ppo_2: &ppo_2
# 'util/agent/multi_agent.yml'
]
- ['util/infrastructure/cuda.yml']
- [ 'util/train_schedule/on_store.yml']

ppo_3: &ppo_3
base_path: *base_model
Expand All @@ -73,6 +75,7 @@ ppo_3: &ppo_3
# 'util/agent/multi_agent.yml'
]
- ['util/infrastructure/cuda.yml']
- [ 'util/train_schedule/on_store.yml']

ppo_4: &ppo_4
base_path: *base_model
Expand All @@ -91,6 +94,7 @@ ppo_4: &ppo_4
# 'util/agent/multi_agent.yml'
]
- ['util/infrastructure/cuda.yml']
- [ 'util/train_schedule/on_store.yml']

ppo_5: &ppo_5
base_path: *base_model
Expand All @@ -109,6 +113,7 @@ ppo_5: &ppo_5
# 'util/agent/multi_agent.yml'
]
- ['util/infrastructure/cuda.yml']
- [ 'util/train_schedule/on_store.yml']

ppo_6: &ppo_6
base_path: *base_model
Expand All @@ -127,6 +132,7 @@ ppo_6: &ppo_6
# 'util/agent/multi_agent.yml'
]
- ['util/infrastructure/cuda.yml']
- [ 'util/train_schedule/on_store.yml']
###############################################################################################

reward: &reward
Expand All @@ -148,7 +154,7 @@ long_single_source_run: &long_single_source_run
nested:
parameters:
dispatch:
seed: [ [ 0 ] ]
seed: [ [ 0, 1, 2, 3 ] ]


###############################################################################################
Expand Down Expand Up @@ -196,7 +202,7 @@ task:
simulator:
kind: 'td'
parameters:
memory: 96
memory: 64
emit_trajectory: True

graph:
Expand All @@ -208,8 +214,7 @@ task:
base_path: 'configuration/experiments/jsp/GRAPH-NN/run_ppo.yml'
mod_dirs:
- 'configuration/mods/run/mods'
mods:
- 'n_workers/1.yml'
mods: []
nested:
parameters:
simulations:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ graph: &graph

default_mods: &default_mods
- 'util/infrastructure/cuda.yml'
- 'util/train_schedule/on_store_64.yml'
###############################################################################################

dqn_1: &dqn_1
Expand Down Expand Up @@ -182,7 +183,7 @@ long_single_source_run: &long_single_source_run
nested:
parameters:
dispatch:
seed: [ [ 0 ] ]
seed: [ [ 0, 1, 2, 3 ] ]


###############################################################################################
Expand Down Expand Up @@ -241,8 +242,7 @@ task:
base_path: 'configuration/experiments/jsp/GRAPH-NN/run.yml'
mod_dirs:
- 'configuration/mods/run/mods'
mods:
- 'n_workers/1.yml'
mods: []
nested:
parameters:
simulations:
Expand Down
Loading

0 comments on commit 8d4a2d4

Please sign in to comment.