From 36963a9f8c28e982cfaac73c5294fe84881245d0 Mon Sep 17 00:00:00 2001 From: Yury Hayeu Date: Tue, 12 Mar 2024 12:31:36 +0100 Subject: [PATCH] Fixes connected with compiled models --- diploma_thesis/agents/base/__init__.py | 4 + diploma_thesis/agents/base/marl_agent.py | 39 +++++++--- diploma_thesis/agents/base/model.py | 5 ++ diploma_thesis/agents/base/rl_agent.py | 43 ++++++++++- diploma_thesis/agents/machine/marl.py | 9 ++- .../agents/machine/model/deep_multi_rule.py | 2 +- .../agents/machine/model/deep_rule.py | 2 +- diploma_thesis/agents/machine/rl.py | 5 +- .../nn/layers/partial_instance_norm_1d.py | 2 +- .../agents/utils/policy/flexible_action.py | 30 ++++---- diploma_thesis/agents/utils/policy/policy.py | 2 +- diploma_thesis/agents/utils/rl/ddqn.py | 4 +- diploma_thesis/agents/utils/rl/dqn.py | 14 +++- diploma_thesis/agents/utils/rl/ppo.py | 2 +- diploma_thesis/agents/utils/rl/reinforce.py | 9 ++- diploma_thesis/agents/utils/rl/rl.py | 3 + diploma_thesis/cli.py | 1 - .../experiments/jsp/dqn_path.yml | 74 ++++++++----------- .../configuration/mods/machine/dqn.yml | 2 +- .../mods/util/infrastructure/compile.yml | 4 +- diploma_thesis/configuration/mods/run/run.yml | 2 +- diploma_thesis/configuration/simulation.yml | 2 +- diploma_thesis/simulator/simulator.py | 1 - diploma_thesis/utils/modified.py | 1 + diploma_thesis/workflow/multi_simulation.py | 5 +- diploma_thesis/workflow/simulation.py | 6 +- notebooks/{utils => plot_utils}/__init__.py | 13 ++-- notebooks/{utils => plot_utils}/legend.py | 0 .../plot_decisions_per_action.py | 0 notebooks/{utils => plot_utils}/plot_gantt.py | 0 .../plot_performance_accross_runs.py | 1 + .../plot_reward_distribution_per_action.py | 0 .../plot_reward_model_across_runs.py | 0 .../plot_reward_per_run.py | 0 notebooks/{utils => plot_utils}/plot_value.py | 0 notebooks/plot_utils/plot_value_per_run.py | 48 ++++++++++++ notebooks/utils/plot_value_per_run.py | 0 37 files changed, 227 insertions(+), 108 deletions(-) rename notebooks/{utils => plot_utils}/__init__.py (89%) rename notebooks/{utils => plot_utils}/legend.py (100%) rename notebooks/{utils => plot_utils}/plot_decisions_per_action.py (100%) rename notebooks/{utils => plot_utils}/plot_gantt.py (100%) rename notebooks/{utils => plot_utils}/plot_performance_accross_runs.py (99%) rename notebooks/{utils => plot_utils}/plot_reward_distribution_per_action.py (100%) rename notebooks/{utils => plot_utils}/plot_reward_model_across_runs.py (100%) rename notebooks/{utils => plot_utils}/plot_reward_per_run.py (100%) rename notebooks/{utils => plot_utils}/plot_value.py (100%) create mode 100644 notebooks/plot_utils/plot_value_per_run.py delete mode 100644 notebooks/utils/plot_value_per_run.py diff --git a/diploma_thesis/agents/base/__init__.py b/diploma_thesis/agents/base/__init__.py index c234374..3162871 100644 --- a/diploma_thesis/agents/base/__init__.py +++ b/diploma_thesis/agents/base/__init__.py @@ -5,3 +5,7 @@ from .state import GraphState, TensorState, Graph from .rl_agent import RLAgent from .marl_agent import MARLAgent + +import torch + +torch._dynamo.config.suppress_errors = True diff --git a/diploma_thesis/agents/base/marl_agent.py b/diploma_thesis/agents/base/marl_agent.py index 68e021b..cb7dfa6 100644 --- a/diploma_thesis/agents/base/marl_agent.py +++ b/diploma_thesis/agents/base/marl_agent.py @@ -8,17 +8,14 @@ class MARLAgent(Generic[Key], RLAgent[Key]): - def __init__(self, - model: DeepPolicyModel, - state_encoder: StateEncoder, - trainer: RLTrainer, - is_model_distributed: bool): - super().__init__(model, state_encoder, trainer) - - self.model: DeepPolicyModel | Dict[Key, DeepPolicyModel] = model - self.trainer: RLTrainer | Dict[Key, RLTrainer] = trainer - self.is_model_distributed = is_model_distributed + def __init__(self, is_model_distributed: bool, *args, **kwargs): self.is_configured = False + + super().__init__(*args, **kwargs) + + self.model: DeepPolicyModel | Dict[Key, DeepPolicyModel] = self.model + self.trainer: RLTrainer | Dict[Key, RLTrainer] = self.trainer + self.is_model_distributed = is_model_distributed self.keys = None @property @@ -99,6 +96,9 @@ def schedule(self, key: Key, parameters): if not self.trainer[key].is_configured: self.trainer[key].configure(model.policy) + if not self.is_compiled: + self.compile() + return result def __model_for_key__(self, key: Key): @@ -106,3 +106,22 @@ def __model_for_key__(self, key: Key): return self.model[key] return self.model + + def compile(self): + if not self.is_configured or self.is_compiled: + return + + if not self.configuration.compile: + self.is_compiled = True + return + + for _, value in self.trainer.items(): + value.compile() + + if self.is_model_distributed: + for _, value in self.model.items(): + value.compile() + else: + self.model.compile() + + self.is_compiled = True diff --git a/diploma_thesis/agents/base/model.py b/diploma_thesis/agents/base/model.py index 3d7c8e6..9f03935 100644 --- a/diploma_thesis/agents/base/model.py +++ b/diploma_thesis/agents/base/model.py @@ -26,6 +26,9 @@ class Record: def __call__(self, state: State, parameters: Input) -> Record: pass + def compile(self): + pass + class DeepPolicyModel(Model[Input, State, Action, Result], PhaseUpdatable, metaclass=ABCMeta): @@ -39,3 +42,5 @@ def update(self, phase: Phase): self.policy.update(phase) + def compile(self): + self.policy.compile() diff --git a/diploma_thesis/agents/base/rl_agent.py b/diploma_thesis/agents/base/rl_agent.py index 4323e9d..854d33b 100644 --- a/diploma_thesis/agents/base/rl_agent.py +++ b/diploma_thesis/agents/base/rl_agent.py @@ -6,12 +6,32 @@ from .agent import * from .model import DeepPolicyModel +from dataclasses import dataclass + + +@dataclass +class Configuration: + compile: bool = False + + @staticmethod + def from_cli(parameters): + return Configuration( + compile=parameters.get('compile', False) + ) + class RLAgent(Generic[Key], Agent[Key]): - def __init__(self, model: DeepPolicyModel, state_encoder: StateEncoder, trainer: RLTrainer): + def __init__(self, + model: DeepPolicyModel, + state_encoder: StateEncoder, + trainer: RLTrainer, + configuration: Configuration): super().__init__(model, state_encoder) + self.is_compiled = False + + self.configuration = configuration self.model: DeepPolicyModel = model self.trainer = trainer @@ -52,4 +72,25 @@ def schedule(self, key, parameters): if not self.trainer.is_configured: self.trainer.configure(self.model.policy) + if not self.is_compiled: + self.compile() + return result + + def __setstate__(self, state): + self.__dict__ = state + + self.compile() + + def compile(self): + if not self.configuration.compile: + self.is_compiled = True + return + + if self.is_compiled: + return + + self.trainer.compile() + self.model.compile() + + self.is_compiled = True diff --git a/diploma_thesis/agents/machine/marl.py b/diploma_thesis/agents/machine/marl.py index 09be9f0..e0f8edb 100644 --- a/diploma_thesis/agents/machine/marl.py +++ b/diploma_thesis/agents/machine/marl.py @@ -1,6 +1,6 @@ from typing import Dict -from agents.base.marl_agent import MARLAgent +from agents.base.marl_agent import MARLAgent, Configuration from agents.utils.rl import from_cli as rl_trainer_from_cli from environment import MachineKey, ShopFloor from .model import DeepPolicyMachineModel, from_cli as model_from_cli @@ -19,9 +19,14 @@ def from_cli(parameters: Dict): model = model_from_cli(parameters['model']) encoder = state_encoder_from_cli(parameters['encoder']) trainer = rl_trainer_from_cli(parameters['trainer']) + configuration = Configuration.from_cli(parameters) is_model_distributed = parameters.get('is_model_distributed', True) assert isinstance(model, DeepPolicyMachineModel), f"Model must conform to NNModel" - return MARLMachine(model, encoder, trainer, is_model_distributed) + return MARLMachine(is_model_distributed=is_model_distributed, + model=model, + state_encoder=encoder, + trainer=trainer, + configuration=configuration) diff --git a/diploma_thesis/agents/machine/model/deep_multi_rule.py b/diploma_thesis/agents/machine/model/deep_multi_rule.py index 4835be0..e619664 100644 --- a/diploma_thesis/agents/machine/model/deep_multi_rule.py +++ b/diploma_thesis/agents/machine/model/deep_multi_rule.py @@ -18,7 +18,7 @@ def __init__(self, rules: List[SchedulingRule], policy: Policy[MachineInput]): def __call__(self, state: State, parameters: Input) -> DeepPolicyMachineModel.Record: # No gradient descent based on decision on the moment with torch.no_grad(): - record = self.policy(state, parameters) + record = self.policy.select(state, parameters) result = self.rules[record.action.item()](parameters.machine, parameters.now) return DeepPolicyMachineModel.Record(result=result, record=record, batch_size=[]) diff --git a/diploma_thesis/agents/machine/model/deep_rule.py b/diploma_thesis/agents/machine/model/deep_rule.py index 21749cc..45f4b80 100644 --- a/diploma_thesis/agents/machine/model/deep_rule.py +++ b/diploma_thesis/agents/machine/model/deep_rule.py @@ -11,7 +11,7 @@ class DeepRule(DeepPolicyMachineModel): def __call__(self, state: State, parameters: Input) -> DeepPolicyMachineModel.Record: # No gradient descent based on decision on the moment with torch.no_grad(): - record = self.policy(state, parameters) + record = self.policy.select(state, parameters) result = parameters.machine.queue[record.action.item()] return DeepPolicyMachineModel.Record(result=result, record=record, batch_size=[]) diff --git a/diploma_thesis/agents/machine/rl.py b/diploma_thesis/agents/machine/rl.py index d50ee06..921cbc1 100644 --- a/diploma_thesis/agents/machine/rl.py +++ b/diploma_thesis/agents/machine/rl.py @@ -1,6 +1,6 @@ from typing import Dict -from agents.base.rl_agent import RLAgent +from agents.base.rl_agent import RLAgent, Configuration from agents.utils.rl import from_cli as rl_trainer_from_cli from environment import MachineKey from .model import DeepPolicyMachineModel, from_cli as model_from_cli @@ -14,7 +14,8 @@ def from_cli(parameters: Dict): model = model_from_cli(parameters['model']) encoder = state_encoder_from_cli(parameters['encoder']) trainer = rl_trainer_from_cli(parameters['trainer']) + configuration = Configuration.from_cli(parameters) assert isinstance(model, DeepPolicyMachineModel), f"Model must conform to NNModel" - return RLMachine(model, encoder, trainer) + return RLMachine(model, encoder, trainer, configuration) diff --git a/diploma_thesis/agents/utils/nn/layers/partial_instance_norm_1d.py b/diploma_thesis/agents/utils/nn/layers/partial_instance_norm_1d.py index 9711a09..89a86a7 100644 --- a/diploma_thesis/agents/utils/nn/layers/partial_instance_norm_1d.py +++ b/diploma_thesis/agents/utils/nn/layers/partial_instance_norm_1d.py @@ -11,7 +11,7 @@ def __init__(self, channels: int): super().__init__() self.channels = channels - self.norm = nn.InstanceNorm1d(num_features=channels) + self.norm = nn.InstanceNorm1d(num_features=1) def forward(self, batch): normalized = batch[:, :self.channels] diff --git a/diploma_thesis/agents/utils/policy/flexible_action.py b/diploma_thesis/agents/utils/policy/flexible_action.py index b050916..180b8fa 100644 --- a/diploma_thesis/agents/utils/policy/flexible_action.py +++ b/diploma_thesis/agents/utils/policy/flexible_action.py @@ -44,21 +44,7 @@ def __get_values__(self, state): def __get_actions__(self, state): return self.action_model(state) - def forward(self, state: State, parameters: Input) -> Record: - values, actions = self.predict(state) - values, actions = values.squeeze(), actions.squeeze() - action, policy = self.action_selector(actions) - action = action if torch.is_tensor(action) else torch.tensor(action, dtype=torch.long) - - info = TensorDict({ - "policy": policy, - "values": values.detach().clone(), - "actions": actions.detach().clone() - }, batch_size=[]) - - return Record(state, action, info, batch_size=[]) - - def predict(self, state: State): + def forward(self, state: State): actions = torch.tensor(0, dtype=torch.long) if self.action_model is not None: @@ -77,6 +63,20 @@ def predict(self, state: State): case _: raise ValueError(f"Policy estimation method {self.policy_estimation_method} is not supported") + def select(self, state: State, parameters: Input) -> Record: + values, actions = self.__call__(state) + values, actions = values.squeeze(), actions.squeeze() + action, policy = self.action_selector(actions) + action = action if torch.is_tensor(action) else torch.tensor(action, dtype=torch.long) + + info = TensorDict({ + "policy": policy, + "values": values.detach().clone(), + "actions": actions.detach().clone() + }, batch_size=[]) + + return Record(state, action, info, batch_size=[]) + def __configure__(self): if self.noise_parameters is not None: self.action_model.to_noisy(self.noise_parameters) diff --git a/diploma_thesis/agents/utils/policy/policy.py b/diploma_thesis/agents/utils/policy/policy.py index 8384f3b..13d394b 100644 --- a/diploma_thesis/agents/utils/policy/policy.py +++ b/diploma_thesis/agents/utils/policy/policy.py @@ -28,7 +28,7 @@ class Record: class Policy(Generic[Input], nn.Module, PhaseUpdatable, metaclass=ABCMeta): @abstractmethod - def predict(self, state: State) -> Tuple[torch.FloatTensor, torch.FloatTensor]: + def select(self, state, parameters): pass def clone(self): diff --git a/diploma_thesis/agents/utils/rl/ddqn.py b/diploma_thesis/agents/utils/rl/ddqn.py index 53e5ce3..0b20f37 100644 --- a/diploma_thesis/agents/utils/rl/ddqn.py +++ b/diploma_thesis/agents/utils/rl/ddqn.py @@ -9,12 +9,12 @@ class DoubleDeepQTrainer(DeepQTrainer): def estimate_q(self, model: Policy, batch: Record | tensordict.TensorDictBase): - _, actions = model.predict(batch.next_state) + _, actions = model(batch.next_state) orig_q = actions[range(batch.shape[0]), batch.action] best_actions = actions.max(dim=-1).indices - target = self.target_model.predict(batch.next_state)[1][range(batch.shape[0]), best_actions] + target = self.target_model(batch.next_state)[1][range(batch.shape[0]), best_actions] q = batch.reward + self.return_estimator.discount_factor * target * (1 - batch.done.int()) actions[range(batch.shape[0]), batch.action] = q diff --git a/diploma_thesis/agents/utils/rl/dqn.py b/diploma_thesis/agents/utils/rl/dqn.py index d3ba1ec..529478c 100644 --- a/diploma_thesis/agents/utils/rl/dqn.py +++ b/diploma_thesis/agents/utils/rl/dqn.py @@ -19,7 +19,7 @@ class Configuration: def from_cli(parameters: Dict): return DeepQTrainer.Configuration( decay=parameters.get('decay', 0.99), - update_steps=parameters.get('update_steps', 100), + update_steps=parameters.get('update_steps', 20), prior_eps=parameters.get('prior_eps', 1e-6) ) @@ -43,7 +43,7 @@ def __train__(self, model: Policy): with torch.no_grad(): q_values, td_error = self.estimate_q(model, batch) - _, actions = model.predict(batch.state) + _, actions = model(batch.state) loss = self.loss(actions, q_values) self.optimizer.zero_grad() @@ -64,10 +64,10 @@ def estimate_q(self, model: Policy, batch: Record | tensordict.TensorDictBase): # Note: # The idea is that we compute the Q-values only for performed actions. Other actions wouldn't be updated, # because there will be zero loss and so zero gradient - _, actions = model.predict(batch.next_state) + _, actions = model(batch.next_state) orig_q = actions.clone()[range(batch.shape[0]), batch.action] - _, target = self.target_model.predict(batch.next_state) + _, target = self.target_model(batch.next_state) target = target.max(dim=1).values q = batch.reward + self.return_estimator.discount_factor * target * (1 - batch.done.int()) @@ -81,6 +81,12 @@ def estimate_q(self, model: Policy, batch: Record | tensordict.TensorDictBase): def target_model(self): return self._target_model.module + def compile(self): + if not self.is_configured: + return + + self.target_model.compile() + @classmethod def from_cli(cls, parameters, diff --git a/diploma_thesis/agents/utils/rl/ppo.py b/diploma_thesis/agents/utils/rl/ppo.py index 713c3e8..239aa51 100644 --- a/diploma_thesis/agents/utils/rl/ppo.py +++ b/diploma_thesis/agents/utils/rl/ppo.py @@ -64,7 +64,7 @@ def __train__(self, model: Policy): def __step__(self, batch: Record, model: Policy): advantages = batch.info[Record.ADVANTAGE_KEY] - value, logits = model.predict(batch.state) + value, logits = model(batch.state) value = value[torch.arange(batch.shape[0]), batch.action] distribution = torch.distributions.Categorical(logits=logits) diff --git a/diploma_thesis/agents/utils/rl/reinforce.py b/diploma_thesis/agents/utils/rl/reinforce.py index 2266a4c..890c1a3 100644 --- a/diploma_thesis/agents/utils/rl/reinforce.py +++ b/diploma_thesis/agents/utils/rl/reinforce.py @@ -64,7 +64,7 @@ def __train__(self, model: Policy): baseline = torch.squeeze(baseline) # Perform policy step - loss = self.loss(model.predict(batch.state)[1], batch.action) + loss = self.loss(model(batch.state)[1], batch.action) if loss.numel() == 1: raise ValueError('Loss should not have reduction to single value') @@ -90,6 +90,13 @@ def __train__(self, model: Policy): critic.optimizer.step() self.record_loss(critic_loss, key=f'critic_{index}') + def compile(self): + if not self.is_configured: + return + + for critic in self.critics: + critic.neural_network.compile() + @property def critics(self): return self.configuration.critics diff --git a/diploma_thesis/agents/utils/rl/rl.py b/diploma_thesis/agents/utils/rl/rl.py index 195a167..606c8c0 100644 --- a/diploma_thesis/agents/utils/rl/rl.py +++ b/diploma_thesis/agents/utils/rl/rl.py @@ -90,6 +90,9 @@ def store(self, sample: TrainingSample, model: Policy): self.__train__(model) + def compile(self): + pass + def clear(self): self.loss_cache = [] self.storage.clear() diff --git a/diploma_thesis/cli.py b/diploma_thesis/cli.py index ea5efcc..49536bc 100644 --- a/diploma_thesis/cli.py +++ b/diploma_thesis/cli.py @@ -2,7 +2,6 @@ import argparse from typing import Dict -import torch._dynamo import yaml from workflow import Workflow, Simulation, Tournament, MultiSimulation diff --git a/diploma_thesis/configuration/experiments/jsp/dqn_path.yml b/diploma_thesis/configuration/experiments/jsp/dqn_path.yml index 4814d5e..8b271e6 100644 --- a/diploma_thesis/configuration/experiments/jsp/dqn_path.yml +++ b/diploma_thesis/configuration/experiments/jsp/dqn_path.yml @@ -1,6 +1,11 @@ # Evaluate the effectivenes of basic DQNs on the JSP environment -template: &template 'reference/marl_indirect' +template: &template 'reference/marl_direct' + +default_mods: &default_mods + - 'util/optimizer/grad_norm.yml' + - 'util/rules/all_rules.yml' +# - 'util/infrastructure/compile.yml' ############################################################################################### @@ -8,16 +13,14 @@ dqn_1: &dqn_1 base_path: 'configuration/mods/machine/dqn.yml' template: *template mods: - - 'util/optimizer/grad_norm.yml' - - 'util/rules/all_rules.yml' + *default_mods marl_dqn_1: &marl_dqn_1 base_path: 'configuration/mods/machine/dqn.yml' template: *template mods: - 'util/agent/multi_agent.yml' - - 'util/optimizer/grad_norm.yml' - - 'util/rules/all_rules.yml' + - *default_mods marl_dqn_centralized_1: ¢ralized_dqn_1 base_path: 'configuration/mods/machine/dqn.yml' @@ -25,8 +28,7 @@ marl_dqn_centralized_1: ¢ralized_dqn_1 mods: - 'util/agent/centralized.yml' - 'util/optimizer/lr_0001.yml' - - 'util/optimizer/grad_norm.yml' - - 'util/rules/all_rules.yml' + - *default_mods ############################################################################################### @@ -35,8 +37,7 @@ dqn_2: &dqn_2 template: *template mods: - 'agent/dqn/ddqn.yml' - - 'util/optimizer/grad_norm.yml' - - 'util/rules/all_rules.yml' + - *default_mods marl_dqn_2: &marl_dqn_2 base_path: 'configuration/mods/machine/dqn.yml' @@ -44,8 +45,7 @@ marl_dqn_2: &marl_dqn_2 mods: - 'agent/dqn/ddqn.yml' - 'util/agent/multi_agent.yml' - - 'util/optimizer/grad_norm.yml' - - 'util/rules/all_rules.yml' + - *default_mods marl_dqn_centralized_2: ¢ralized_dqn_2 base_path: 'configuration/mods/machine/dqn.yml' @@ -54,8 +54,7 @@ marl_dqn_centralized_2: ¢ralized_dqn_2 - 'agent/dqn/ddqn.yml' - 'util/agent/centralized.yml' - 'util/optimizer/lr_0001.yml' - - 'util/optimizer/grad_norm.yml' - - 'util/rules/all_rules.yml' + - *default_mods ############################################################################################### @@ -64,8 +63,7 @@ dqn_3: &dqn_3 template: *template mods: - 'agent/dqn/dueling.yml' - - 'util/optimizer/grad_norm.yml' - - 'util/rules/all_rules.yml' + - *default_mods marl_dqn_3: &marl_dqn_3 base_path: 'configuration/mods/machine/dqn.yml' @@ -73,8 +71,7 @@ marl_dqn_3: &marl_dqn_3 mods: - 'agent/dqn/dueling.yml' - 'util/agent/multi_agent.yml' - - 'util/optimizer/grad_norm.yml' - - 'util/rules/all_rules.yml' + - *default_mods marl_dqn_centralized_3: ¢ralized_dqn_3 base_path: 'configuration/mods/machine/dqn.yml' @@ -83,8 +80,7 @@ marl_dqn_centralized_3: ¢ralized_dqn_3 - 'agent/dqn/dueling.yml' - 'util/agent/centralized.yml' - 'util/optimizer/lr_0001.yml' - - 'util/optimizer/grad_norm.yml' - - 'util/rules/all_rules.yml' + - *default_mods ############################################################################################### @@ -93,8 +89,7 @@ dqn_4: &dqn_4 template: *template mods: - 'agent/dqn/steps/3.yml' - - 'util/optimizer/grad_norm.yml' - - 'util/rules/all_rules.yml' + - *default_mods marl_dqn_4: &marl_dqn_4 base_path: 'configuration/mods/machine/dqn.yml' @@ -102,8 +97,7 @@ marl_dqn_4: &marl_dqn_4 mods: - 'agent/dqn/steps/3.yml' - 'util/agent/multi_agent.yml' - - 'util/optimizer/grad_norm.yml' - - 'util/rules/all_rules.yml' + - *default_mods marl_dqn_centralized_4: ¢ralized_dqn_4 base_path: 'configuration/mods/machine/dqn.yml' @@ -112,8 +106,7 @@ marl_dqn_centralized_4: ¢ralized_dqn_4 - 'agent/dqn/steps/3.yml' - 'util/agent/centralized.yml' - 'util/optimizer/lr_0001.yml' - - 'util/optimizer/grad_norm.yml' - - 'util/rules/all_rules.yml' + - *default_mods ############################################################################################### @@ -122,8 +115,7 @@ dqn_5: &dqn_5 template: *template mods: - 'agent/dqn/prioritized.yml' - - 'util/optimizer/grad_norm.yml' - - 'util/rules/all_rules.yml' + - *default_mods marl_dqn_5: &marl_dqn_5 base_path: 'configuration/mods/machine/dqn.yml' @@ -131,8 +123,7 @@ marl_dqn_5: &marl_dqn_5 mods: - 'agent/dqn/prioritized.yml' - 'util/agent/multi_agent.yml' - - 'util/optimizer/grad_norm.yml' - - 'util/rules/all_rules.yml' + - *default_mods marl_dqn_centralized_5: ¢ralized_dqn_5 base_path: 'configuration/mods/machine/dqn.yml' @@ -141,8 +132,7 @@ marl_dqn_centralized_5: ¢ralized_dqn_5 - 'agent/dqn/prioritized.yml' - 'util/agent/centralized.yml' - 'util/optimizer/lr_0001.yml' - - 'util/optimizer/grad_norm.yml' - - 'util/rules/all_rules.yml' + - *default_mods ############################################################################################### @@ -151,9 +141,8 @@ dqn_6: &dqn_6 template: *template mods: - 'agent/dqn/noisy.yml' - - 'util/optimizer/grad_norm.yml' - - 'util/rules/all_rules.yml' - 'util/action_selector/sample.yml' + - *default_mods marl_dqn_6: &marl_dqn_6 base_path: 'configuration/mods/machine/dqn.yml' @@ -161,8 +150,7 @@ marl_dqn_6: &marl_dqn_6 mods: - 'agent/dqn/noisy.yml' - 'util/agent/multi_agent.yml' - - 'util/optimizer/grad_norm.yml' - - 'util/rules/all_rules.yml' + - *default_mods - 'util/action_selector/sample.yml' marl_dqn_centralized_6: ¢ralized_dqn_6 @@ -172,8 +160,7 @@ marl_dqn_centralized_6: ¢ralized_dqn_6 - 'agent/dqn/noisy.yml' - 'util/agent/centralized.yml' - 'util/optimizer/lr_0001.yml' - - 'util/optimizer/grad_norm.yml' - - 'util/rules/all_rules.yml' + - *default_mods - 'util/action_selector/sample.yml' ############################################################################################### @@ -187,8 +174,7 @@ dqn_7: &dqn_7 - 'agent/dqn/steps/3.yml' - 'agent/dqn/dueling.yml' - 'agent/dqn/noisy.yml' - - 'util/optimizer/grad_norm.yml' - - 'util/rules/all_rules.yml' + - *default_mods - 'util/action_selector/sample.yml' marl_dqn_7: &marl_dqn_7 @@ -201,8 +187,7 @@ marl_dqn_7: &marl_dqn_7 - 'agent/dqn/dueling.yml' - 'agent/dqn/noisy.yml' - 'util/agent/multi_agent.yml' - - 'util/optimizer/grad_norm.yml' - - 'util/rules/all_rules.yml' + - *default_mods - 'util/action_selector/sample.yml' marl_dqn_centralized_7: ¢ralized_dqn_7 @@ -216,8 +201,7 @@ marl_dqn_centralized_7: ¢ralized_dqn_7 - 'agent/dqn/noisy.yml' - 'util/agent/centralized.yml' - 'util/optimizer/lr_0001.yml' - - 'util/optimizer/grad_norm.yml' - - 'util/rules/all_rules.yml' + - *default_mods - 'util/action_selector/sample.yml' ############################################################################################### @@ -287,10 +271,10 @@ short_single_source_run: &short_single_source_run task: kind: 'multi_task' - n_workers: 4 + n_workers: 8 debug: False store_run_statistics: False - output_dir: 'results/jsp/experiments/dqn_path/marl_indirect' + output_dir: 'results/jsp/experiments/dqn_path/marl_direct' tasks: # TD - n-step short single source diff --git a/diploma_thesis/configuration/mods/machine/dqn.yml b/diploma_thesis/configuration/mods/machine/dqn.yml index 5156dfd..5dab3b5 100644 --- a/diploma_thesis/configuration/mods/machine/dqn.yml +++ b/diploma_thesis/configuration/mods/machine/dqn.yml @@ -54,7 +54,7 @@ parameters: kind: 'replay' parameters: size: 8192 - batch_size: 256 + batch_size: 128 prefetch: 8 loss: diff --git a/diploma_thesis/configuration/mods/machine/mods/util/infrastructure/compile.yml b/diploma_thesis/configuration/mods/machine/mods/util/infrastructure/compile.yml index 9ded62b..26421c1 100644 --- a/diploma_thesis/configuration/mods/machine/mods/util/infrastructure/compile.yml +++ b/diploma_thesis/configuration/mods/machine/mods/util/infrastructure/compile.yml @@ -1,5 +1,3 @@ parameters: - model: - parameters: - compile: True \ No newline at end of file + compile: True diff --git a/diploma_thesis/configuration/mods/run/run.yml b/diploma_thesis/configuration/mods/run/run.yml index 058ad8c..fbadbb4 100644 --- a/diploma_thesis/configuration/mods/run/run.yml +++ b/diploma_thesis/configuration/mods/run/run.yml @@ -11,7 +11,7 @@ parameters: machine_train_schedule: pretrain_steps: 10 - train_interval: 100 + train_interval: 50 max_training_steps: 100000000 work_center_train_schedule: diff --git a/diploma_thesis/configuration/simulation.yml b/diploma_thesis/configuration/simulation.yml index 455eec8..ec47108 100644 --- a/diploma_thesis/configuration/simulation.yml +++ b/diploma_thesis/configuration/simulation.yml @@ -18,7 +18,7 @@ task: # - 'util/agent/multi_agent.yml' - 'util/rules/all_rules.yml' - 'util/optimizer/grad_norm.yml' -# - 'util/agent/multi_agent.yml' + - 'util/agent/multi_agent.yml' - 'util/infrastructure/compile.yml' work_center_agent: diff --git a/diploma_thesis/simulator/simulator.py b/diploma_thesis/simulator/simulator.py index 7275b03..c70b53f 100644 --- a/diploma_thesis/simulator/simulator.py +++ b/diploma_thesis/simulator/simulator.py @@ -3,7 +3,6 @@ from dataclasses import field from typing import Callable, List -import gc import simpy import torch from tensordict.prototype import tensorclass diff --git a/diploma_thesis/utils/modified.py b/diploma_thesis/utils/modified.py index 2778b0d..48114ea 100644 --- a/diploma_thesis/utils/modified.py +++ b/diploma_thesis/utils/modified.py @@ -16,6 +16,7 @@ def modified(parameters): template = __load_template__(parameters, base_path) mods = parameters['mods'] + mods = reduce(lambda x, y: x + y if isinstance(y, list) else x + [y], mods, []) mods_dir = os.path.dirname(base_path) mods_dir = os.path.join(mods_dir, 'mods') diff --git a/diploma_thesis/workflow/multi_simulation.py b/diploma_thesis/workflow/multi_simulation.py index 82ba080..d111bc3 100644 --- a/diploma_thesis/workflow/multi_simulation.py +++ b/diploma_thesis/workflow/multi_simulation.py @@ -45,7 +45,10 @@ def __merge__(key, lhs, rhs): n_workers = self.parameters.get('n_workers', -1) - Parallel(n_jobs=n_workers)(delayed(__run__)(s) for s in parameters) + torch.set_num_threads(n_workers) + torch.set_num_interop_threads(n_workers) + + Parallel(n_jobs=n_workers, backend='loky')(delayed(__run__)(s) for s in parameters) def __fetch_tasks__(self): result: [Dict] = [] diff --git a/diploma_thesis/workflow/simulation.py b/diploma_thesis/workflow/simulation.py index 377a95c..b8f3c6f 100644 --- a/diploma_thesis/workflow/simulation.py +++ b/diploma_thesis/workflow/simulation.py @@ -11,16 +11,12 @@ from agents.base.rl_agent import RLAgent from simulator import from_cli as simulator_from_cli, Simulator, RewardCache from simulator import run_configuration_from_cli, evaluate_configuration_from_cli -from simulator.tape import TapeModel from simulator.graph import GraphModel +from simulator.tape import TapeModel from utils import save from .workflow import Workflow -torch.set_num_threads(1) -torch._dynamo.config.suppress_errors = True - - class Simulation(Workflow): def __init__(self, parameters: Dict): diff --git a/notebooks/utils/__init__.py b/notebooks/plot_utils/__init__.py similarity index 89% rename from notebooks/utils/__init__.py rename to notebooks/plot_utils/__init__.py index 1d8bd36..12e45c5 100644 --- a/notebooks/utils/__init__.py +++ b/notebooks/plot_utils/__init__.py @@ -1,9 +1,8 @@ - - -from .plot_value import plot_value -from .plot_reward_per_run import plot_reward_per_run -from .plot_reward_distribution_per_action import plot_reward_distribution_per_action -from .plot_reward_model_across_runs import plot_reward_per_model_across_runs +from .plot_decisions_per_action import plot_decisions_per_action from .plot_gantt import plot_gantt from .plot_performance_accross_runs import plot_performance_across_runs -from .plot_decisions_per_action import plot_decisions_per_action +from .plot_reward_distribution_per_action import plot_reward_distribution_per_action +from .plot_reward_model_across_runs import plot_reward_per_model_across_runs +from .plot_reward_per_run import plot_reward_per_run +from .plot_value import plot_value +from .plot_value_per_run import plot_value_per_run diff --git a/notebooks/utils/legend.py b/notebooks/plot_utils/legend.py similarity index 100% rename from notebooks/utils/legend.py rename to notebooks/plot_utils/legend.py diff --git a/notebooks/utils/plot_decisions_per_action.py b/notebooks/plot_utils/plot_decisions_per_action.py similarity index 100% rename from notebooks/utils/plot_decisions_per_action.py rename to notebooks/plot_utils/plot_decisions_per_action.py diff --git a/notebooks/utils/plot_gantt.py b/notebooks/plot_utils/plot_gantt.py similarity index 100% rename from notebooks/utils/plot_gantt.py rename to notebooks/plot_utils/plot_gantt.py diff --git a/notebooks/utils/plot_performance_accross_runs.py b/notebooks/plot_utils/plot_performance_accross_runs.py similarity index 99% rename from notebooks/utils/plot_performance_accross_runs.py rename to notebooks/plot_utils/plot_performance_accross_runs.py index 256619b..d2910b5 100644 --- a/notebooks/utils/plot_performance_accross_runs.py +++ b/notebooks/plot_utils/plot_performance_accross_runs.py @@ -4,6 +4,7 @@ from .legend import add_legend + def plot_performance_across_runs(data, info): metric = info['metric'] group = info['group'] diff --git a/notebooks/utils/plot_reward_distribution_per_action.py b/notebooks/plot_utils/plot_reward_distribution_per_action.py similarity index 100% rename from notebooks/utils/plot_reward_distribution_per_action.py rename to notebooks/plot_utils/plot_reward_distribution_per_action.py diff --git a/notebooks/utils/plot_reward_model_across_runs.py b/notebooks/plot_utils/plot_reward_model_across_runs.py similarity index 100% rename from notebooks/utils/plot_reward_model_across_runs.py rename to notebooks/plot_utils/plot_reward_model_across_runs.py diff --git a/notebooks/utils/plot_reward_per_run.py b/notebooks/plot_utils/plot_reward_per_run.py similarity index 100% rename from notebooks/utils/plot_reward_per_run.py rename to notebooks/plot_utils/plot_reward_per_run.py diff --git a/notebooks/utils/plot_value.py b/notebooks/plot_utils/plot_value.py similarity index 100% rename from notebooks/utils/plot_value.py rename to notebooks/plot_utils/plot_value.py diff --git a/notebooks/plot_utils/plot_value_per_run.py b/notebooks/plot_utils/plot_value_per_run.py new file mode 100644 index 0000000..42b30d2 --- /dev/null +++ b/notebooks/plot_utils/plot_value_per_run.py @@ -0,0 +1,48 @@ +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd + +from .legend import add_legend + + +def plot_value_per_run(path: str | dict, info: dict, make_run_path, post_process_fn=lambda a: a): + if not isinstance(path, dict): + path = dict(first=path) + + fig, ax = plt.subplots(figsize=info.get('figsize', (8, 8))) + + max_values_len = 0 + + for name, data_path in path.items(): + run = 1 + + values = [] + + while True: + run_path = make_run_path(data_path, run) + + run += 1 + + try: + df = pd.read_csv(run_path) + + df = df.sort_values(by=info['index']) + df.set_index(info['index'], inplace=True) + + values += [post_process_fn(df[info['column']], run)] + except: + break + + ax.plot(np.arange(len(values)), np.array(values), marker=info['marker'], label=name) + + max_values_len = max(max_values_len, len(values)) + + ax.grid(True) + ax.set_title(info['title']) + ax.set_xlabel(info['xlabel']) + ax.set_ylabel(info['ylabel']) + ax.set_xticks(np.arange(max_values_len)) + + add_legend(ax, info) + + return fig diff --git a/notebooks/utils/plot_value_per_run.py b/notebooks/utils/plot_value_per_run.py deleted file mode 100644 index e69de29..0000000