diff --git a/diploma_thesis/agents/base/agent.py b/diploma_thesis/agents/base/agent.py index 667b326..20a5551 100644 --- a/diploma_thesis/agents/base/agent.py +++ b/diploma_thesis/agents/base/agent.py @@ -17,16 +17,17 @@ @dataclass class TrainingSample: episode_id: int + records: List[Record] @dataclass class Slice(TrainingSample): - records: List[Record] + pass @dataclass class Trajectory(TrainingSample): - records: List[Record] + pass class Agent(Generic[Key], Loggable, PhaseUpdatable, metaclass=ABCMeta): diff --git a/diploma_thesis/agents/utils/policy/discrete_action.py b/diploma_thesis/agents/utils/policy/discrete_action.py index 4e687c5..e46a90a 100644 --- a/diploma_thesis/agents/utils/policy/discrete_action.py +++ b/diploma_thesis/agents/utils/policy/discrete_action.py @@ -56,12 +56,15 @@ def predict(self, state: State): values = torch.tensor(0, dtype=torch.long) actions = torch.tensor(0, dtype=torch.long) - if self.value_model is not None: - values = self.value_model(state) - if self.action_model is not None: actions = self.action_model(state) + if self.value_model is not None: + values = self.value_model(state) + values = values.expand(-1, self.n_actions) + else: + values = actions + match self.policy_estimation_method: case PolicyEstimationMethod.INDEPENDENT: return values, actions diff --git a/diploma_thesis/agents/utils/return_estimator/gae.py b/diploma_thesis/agents/utils/return_estimator/gae.py index 85d69c4..7b7fc21 100644 --- a/diploma_thesis/agents/utils/return_estimator/gae.py +++ b/diploma_thesis/agents/utils/return_estimator/gae.py @@ -17,7 +17,7 @@ def discount_factor(self) -> float: def update_returns(self, records: List[Record]) -> List[Record]: coef = self._discount_factor * self._lambda - for i in reversed(range(records.batch_size[0])): + for i in reversed(range(len(records))): next_value = 0 if i == len(records) - 1 else records[i + 1].info[Record.ADVANTAGE_KEY] value = records[i].info[Record.VALUES_KEY] advantage = records[i].reward + self._discount_factor * next_value - value diff --git a/diploma_thesis/agents/utils/return_estimator/n_step.py b/diploma_thesis/agents/utils/return_estimator/n_step.py index 56a6070..7ba7cd2 100644 --- a/diploma_thesis/agents/utils/return_estimator/n_step.py +++ b/diploma_thesis/agents/utils/return_estimator/n_step.py @@ -53,13 +53,13 @@ def update_returns(self, records: List[Record]) -> List[Record]: lambdas = torch.cumprod(lambdas, dim=0) - for i in range(records.batch_size[0]): + for i in range(len(records)): action = records[i].action - next_state_value = records[i + 1].info[Record.VALUES_KEY] if i + 1 < len(records) else 0 + next_state_value = records[i + 1].info[Record.VALUES_KEY][action] if i + 1 < len(records) else 0 next_state_value *= self.configuration.discount_factor - td_errors += [records[i].reward + next_state_value - records[i].info[Record.VALUES_KEY]] + td_errors += [records[i].reward + next_state_value - records[i].info[Record.VALUES_KEY][action]] if self.configuration.off_policy: action_probs = torch.nn.functional.softmax(records[i].info[Record.ACTION_KEY], dim=0) @@ -73,7 +73,7 @@ def update_returns(self, records: List[Record]) -> List[Record]: else: off_policy_weights += [1] - for i in range(records.batch_size[0]): + for i in range(len(records)): g = records[i].info[Record.VALUES_KEY][records[i].action] n = min(self.configuration.n, len(records) - i) diff --git a/diploma_thesis/agents/utils/rl/rl.py b/diploma_thesis/agents/utils/rl/rl.py index 5079e05..a5d9d24 100644 --- a/diploma_thesis/agents/utils/rl/rl.py +++ b/diploma_thesis/agents/utils/rl/rl.py @@ -73,14 +73,13 @@ def store(self, sample: TrainingSample): records = self.__prepare__(sample) records.info['episode'] = torch.full(records.reward.shape, sample.episode_id, device=records.reward.device) - self.memory.store(records.view(-1)) + self.memory.store(records) if self.train_schedule != TrainSchedule.ON_STORE: return self.__train__(sample.model) - def clear(self): self.loss_cache = [] self.memory.clear() @@ -102,6 +101,7 @@ def __prepare__(self, sample: TrainingSample) -> Record: match sample: case Trajectory(_, records): updated = self.return_estimator.update_returns(records) + updated = [record.view(-1) for record in updated] updated = torch.cat(updated, dim=0) return updated @@ -111,6 +111,6 @@ def __prepare__(self, sample: TrainingSample) -> Record: updated = self.return_estimator.update_returns(records) - return updated[0] + return updated[0].view(-1) case _: raise ValueError(f'Unknown sample type: {type(sample)}') diff --git a/diploma_thesis/configuration/jsp_stream_experiment.yml b/diploma_thesis/configuration/jsp_stream_experiment.yml index 89abcac..99fe70d 100644 --- a/diploma_thesis/configuration/jsp_stream_experiment.yml +++ b/diploma_thesis/configuration/jsp_stream_experiment.yml @@ -1,6 +1,6 @@ # Task to determine the best run configuration + state encoding + reward for the model -template: &template 'marl_indirect' +template: &template 'marl_direct' ############################################################################################### @@ -38,7 +38,7 @@ marl_ddqn: &marl_ddqn template: *template mods: - 'agent/dqn/ddqn.yml' - - 'util/agent/multi_agent.yml' + - 'util/agent/centralized.yml' marl_dueling_ddqn: &marl_dueling_ddqn base_path: 'configuration/mods/machine/dqn.yml' @@ -46,7 +46,7 @@ marl_dueling_ddqn: &marl_dueling_ddqn mods: - 'agent/dqn/ddqn.yml' - 'agent/dqn/dueling.yml' - - 'util/agent/multi_agent.yml' + - 'util/agent/centralized.yml' marl_dueling_ddqn_pr: &marl_dueling_ddqn_pr base_path: 'configuration/mods/machine/dqn.yml' @@ -55,7 +55,7 @@ marl_dueling_ddqn_pr: &marl_dueling_ddqn_pr - 'agent/dqn/ddqn.yml' - 'agent/dqn/dueling.yml' - 'agent/dqn/prioritized.yml' - - 'util/agent/multi_agent.yml' + - 'util/agent/centralized.yml' marl_dueling_ddqn_n_step: &marl_dueling_ddqn_n_step base_path: 'configuration/mods/machine/dqn.yml' @@ -63,7 +63,7 @@ marl_dueling_ddqn_n_step: &marl_dueling_ddqn_n_step mods: - 'agent/dqn/ddqn.yml' - 'agent/dqn/dueling.yml' - - 'util/agent/multi_agent.yml' + - 'util/agent/centralized.yml' - 'agent/dqn/3_step.yml' reinforce: &reinforce @@ -85,10 +85,15 @@ reward: &reward - kind: 'global_decomposed_tardiness' parameters: span: 256 + - kind: 'global_mdpi' + parameters: + span: 256 + - kind: 'surrogate_slack' - kind: 'surrogate_tardiness' intermediate_reward: &intermediate_reward - kind: 'surrogate_tardiness' + - kind: 'surrogate_slack' ############################################################################################## @@ -165,9 +170,9 @@ short_multiple_source_run: &short_multiple_source_run task: kind: 'multi_task' - n_workers: 4 - debug: True - output_dir: 'results/jsp' + n_workers: 10 + debug: False + output_dir: 'results/jsp/marl_direct' tasks: # TD @@ -176,7 +181,7 @@ task: parameters: base: name: 'model' - output_dir: 'td' + output_dir: 'td_single_source' log_stdout: False machine_agent: @@ -219,6 +224,7 @@ task: base_path: 'configuration/mods/run/run.yml' mods: - 'n_workers/2.yml' + - 'timeline/warmup.yml' nested: parameters: simulations: @@ -257,18 +263,101 @@ task: values: __concat__: - *long_single_source_run - - *long_multiple_source_run - *short_single_source_run + + # TD - multiple source + - kind: 'multi_value' + parameters: + base: + name: 'model' + output_dir: 'td_multiple_source' + log_stdout: False + + machine_agent: + kind: 'mod' + parameters: + base_path: 'configuration/mods/machine_agent/model.yml' + mods: [ ] + + work_center_agent: + kind: 'static' + parameters: + model: + kind: 'static' + parameters: + rule: 'et' + encoder: + kind: 'plain' + + tape: + machine_reward: + kind: 'global_tardiness' + parameters: + span: 256 + + work_center_reward: + kind: 'no' + + simulator: + kind: 'td' + parameters: + memory: 1 + + graph: + transition_model: + kind: 'no' + + run: + kind: 'mod' + parameters: + base_path: 'configuration/mods/run/run.yml' + mods: + - 'n_workers/2.yml' + - 'timeline/warmup.yml' + nested: + parameters: + simulations: + - name: '' + kind: 'multi_value' + parameters: + base: + kind: 'mod' + parameters: + base_path: 'configuration/mods/simulation/simulation.yml' + mods: [ ] + values: + + + values: + machine_agent: + parameters: + - *dqn + - *dueling_ddqn + - *dueling_ddqn_pr + + machine_reward: + *reward + + run: + parameters: + nested: + parameters: + simulations: + __0__: + parameters: + values: + __concat__: + - *long_multiple_source_run - *short_multiple_source_run - # TD - n-step short + # TD - n-step short single source # We can evaluate all models, which performs n-step return estimation with intermediate rewards - kind: 'multi_value' parameters: base: name: 'model' - output_dir: 'td_n' + output_dir: 'td_n_single_source' log_stdout: False machine_agent: @@ -311,6 +400,7 @@ task: base_path: 'configuration/mods/run/run.yml' mods: - 'n_workers/4.yml' + - 'timeline/warmup.yml' nested: parameters: simulations: @@ -343,15 +433,97 @@ task: values: __concat__: - *short_single_source_run + + # TD - n-step short multiple source + # We can evaluate all models, which performs n-step return estimation with intermediate rewards + + - kind: 'multi_value' + parameters: + base: + name: 'model' + output_dir: 'td_n_multiple_source' + log_stdout: False + + machine_agent: + kind: 'mod' + parameters: + base_path: 'configuration/mods/machine_agent/model.yml' + mods: [ ] + + work_center_agent: + kind: 'static' + parameters: + model: + kind: 'static' + parameters: + rule: 'et' + encoder: + kind: 'plain' + + tape: + machine_reward: + kind: 'global_tardiness' + parameters: + span: 256 + + work_center_reward: + kind: 'no' + + simulator: + kind: 'td' + parameters: + memory: 10 + + graph: + transition_model: + kind: 'no' + + run: + kind: 'mod' + parameters: + base_path: 'configuration/mods/run/run.yml' + mods: + - 'n_workers/4.yml' + - 'timeline/warmup.yml' + nested: + parameters: + simulations: + - name: '' + kind: 'multi_value' + parameters: + base: + kind: 'mod' + parameters: + base_path: 'configuration/mods/simulation/simulation.yml' + mods: [ ] + values: + values: + machine_agent: + parameters: + - *dueling_ddqn_pr_n_step + + tape: + machine_reward: + *intermediate_reward + + run: + parameters: + nested: + parameters: + simulations: + __0__: + parameters: + values: + __concat__: - *short_multiple_source_run - # TD - n-step long + # TD - n-step long single source # The memory is pretty large to consider it as episode - kind: 'multi_value' parameters: base: name: 'model' - output_dir: 'td_long' + output_dir: 'td_long_single_source' log_stdout: False machine_agent: @@ -396,6 +568,7 @@ task: base_path: 'configuration/mods/run/run.yml' mods: - 'n_workers/2.yml' + - 'timeline/warmup.yml' nested: parameters: simulations: @@ -433,8 +606,95 @@ task: - *long_single_source_run - *long_multiple_source_run -# Episodic + # TD - n-step long multiple source + # The memory is pretty large to consider it as episode + - kind: 'multi_value' + parameters: + base: + name: 'model' + output_dir: 'td_long_multiple_source' + log_stdout: False + + machine_agent: + kind: 'mod' + parameters: + base_path: 'configuration/mods/machine_agent/model.yml' + mods: [ ] + + work_center_agent: + kind: 'static' + parameters: + model: + kind: 'static' + parameters: + rule: 'et' + encoder: + kind: 'plain' + + tape: + machine_reward: + kind: 'global_tardiness' + parameters: + span: 256 + + work_center_reward: + kind: 'no' + simulator: + kind: 'td' + parameters: + memory: 32 + send_as_trajectory: True + next_state_record_mode: 'on_next_action' + + graph: + transition_model: + kind: 'no' + + run: + kind: 'mod' + parameters: + base_path: 'configuration/mods/run/run.yml' + mods: + - 'n_workers/2.yml' + - 'timeline/warmup.yml' + nested: + parameters: + simulations: + - name: '' + kind: 'multi_value' + parameters: + base: + kind: 'mod' + parameters: + base_path: 'configuration/mods/simulation/simulation.yml' + mods: [ ] + values: + + values: + machine_agent: + parameters: + - *dueling_ddqn_pr_n_step + - *reinforce + - *ppo + + tape: + machine_reward: + *intermediate_reward + + run: + parameters: + nested: + parameters: + simulations: + __0__: + parameters: + values: + __concat__: + - *long_single_source_run + - *long_multiple_source_run + + # Episodic single source - kind: 'multi_value' parameters: base: @@ -482,6 +742,7 @@ task: base_path: 'configuration/mods/run/run.yml' mods: - 'n_workers/4.yml' + - 'timeline/warmup.yml' nested: parameters: simulations: @@ -523,5 +784,92 @@ task: values: __concat__: - *short_single_source_run + + # Episodic multiple source + - kind: 'multi_value' + parameters: + base: + name: 'model' + output_dir: 'episodic' + log_stdout: False + + machine_agent: + kind: 'mod' + parameters: + base_path: 'configuration/mods/machine_agent/model.yml' + mods: [ ] + + work_center_agent: + kind: 'static' + parameters: + model: + kind: 'static' + parameters: + rule: 'et' + encoder: + kind: 'plain' + + tape: + next_state_record_mode: 'on_next_action' + + machine_reward: + kind: 'global_tardiness' + parameters: + span: 256 + + work_center_reward: + kind: 'no' + + simulator: + kind: 'episodic' + + graph: + transition_model: + kind: 'no' + + run: + kind: 'mod' + parameters: + base_path: 'configuration/mods/run/run.yml' + mods: + - 'n_workers/4.yml' + - 'timeline/warmup.yml' + nested: + parameters: + simulations: + - name: '' + kind: 'multi_value' + parameters: + base: + kind: 'mod' + parameters: + base_path: 'configuration/mods/simulation/simulation.yml' + mods: [ ] + values: + + values: + machine_agent: + parameters: + - *dqn + - *dueling_ddqn + - *dueling_ddqn_pr + - *reinforce + - *ppo + - *marl_dueling_ddqn_n_step + - *dueling_ddqn_pr_n_step + + tape: + machine_reward: + *reward + + run: + parameters: + nested: + parameters: + simulations: + __0__: + parameters: + values: + __concat__: - *short_multiple_source_run diff --git a/diploma_thesis/configuration/mods/machine/mods/util/rules/all_rules.yml b/diploma_thesis/configuration/mods/machine/mods/util/rules/all_rules.yml index 21f159c..dd83cfe 100644 --- a/diploma_thesis/configuration/mods/machine/mods/util/rules/all_rules.yml +++ b/diploma_thesis/configuration/mods/machine/mods/util/rules/all_rules.yml @@ -1,2 +1,3 @@ -rules: 'all' \ No newline at end of file +__rules__: + rules: 'all' \ No newline at end of file diff --git a/diploma_thesis/configuration/mods/machine/mods/util/rules/idle.yml b/diploma_thesis/configuration/mods/machine/mods/util/rules/idle.yml index 54c2d7f..9d0f6f0 100644 --- a/diploma_thesis/configuration/mods/machine/mods/util/rules/idle.yml +++ b/diploma_thesis/configuration/mods/machine/mods/util/rules/idle.yml @@ -1,2 +1,3 @@ -idle: True \ No newline at end of file +__rules__: + idle: True \ No newline at end of file diff --git a/diploma_thesis/configuration/mods/run/mods/timeline/warmup.yml b/diploma_thesis/configuration/mods/run/mods/timeline/warmup.yml index f30f41e..67c90ba 100644 --- a/diploma_thesis/configuration/mods/run/mods/timeline/warmup.yml +++ b/diploma_thesis/configuration/mods/run/mods/timeline/warmup.yml @@ -2,6 +2,6 @@ parameters: timeline: warmup: - - 5000 - - 5000 + - 1000 + - 1000 diff --git a/diploma_thesis/configuration/mods/run/run.yml b/diploma_thesis/configuration/mods/run/run.yml index 563ec70..2b99186 100644 --- a/diploma_thesis/configuration/mods/run/run.yml +++ b/diploma_thesis/configuration/mods/run/run.yml @@ -4,8 +4,8 @@ kind: 'plain' parameters: timeline: warmup: - - 5000 - - 5000 + - 500 + - 500 duration: 100000 diff --git a/diploma_thesis/configuration/mods/simulation/simulation.yml b/diploma_thesis/configuration/mods/simulation/simulation.yml index c6f48cd..ce39255 100644 --- a/diploma_thesis/configuration/mods/simulation/simulation.yml +++ b/diploma_thesis/configuration/mods/simulation/simulation.yml @@ -21,7 +21,7 @@ parameters: uniform: [ 1, 50 ] noise: [ 0, 10 ] permutation: - uneveness: 2 + uneveness: 0 due_time: kind: 'uniform' parameters: diff --git a/diploma_thesis/configuration/simulation.yml b/diploma_thesis/configuration/simulation.yml index 4b5e871..9c0edfc 100644 --- a/diploma_thesis/configuration/simulation.yml +++ b/diploma_thesis/configuration/simulation.yml @@ -11,9 +11,11 @@ task: parameters: base_path: 'configuration/mods/machine/dqn.yml' template: 'marl_direct' - mods: [ - - ] + mods: + - 'agent/dqn/ddqn.yml' + - 'agent/dqn/dueling.yml' + - 'util/agent/centralized.yml' + - 'util/rules/all_rules.yml' work_center_agent: kind: 'static' @@ -27,7 +29,7 @@ task: tape: machine_reward: - kind: 'surrogate_tardiness' + kind: 'global_mdpi' parameters: span: 256 @@ -51,12 +53,27 @@ task: is_work_center_set_in_shop_floor_connected: True simulator: - kind: 'td' + kind: 'episodic' parameters: memory: 5 run: kind: 'mod' parameters: - base_path: 'configuration/mods/run/td_jsp/run.yml' + base_path: 'configuration/mods/run/run.yml' mods: [] + nested: + parameters: + simulations: + - kind: 'mod' + parameters: + base_path: 'configuration/mods/simulation/simulation.yml' + mods: + - 'duration/256.yml' + - 'size/jsp/5.yml' + - kind: 'mod' + parameters: + base_path: 'configuration/mods/simulation/simulation.yml' + mods: + - 'duration/256.yml' + - 'size/jsp/5.yml' \ No newline at end of file diff --git a/diploma_thesis/simulator/graph/graph_model.py b/diploma_thesis/simulator/graph/graph_model.py index 1fc7088..0fa9dad 100644 --- a/diploma_thesis/simulator/graph/graph_model.py +++ b/diploma_thesis/simulator/graph/graph_model.py @@ -114,7 +114,7 @@ def did_complete(self, context: Context, job: Job): job_id = self.cache[sid].completed_job_ids.pop(0) job = context.shop_floor.job(job_id) - self.cache[sid] = self.transition_model.remove(job, context.shop_floor, self.cache[sid]) + self.cache[sid].graph = self.transition_model.remove(job, context.shop_floor, self.cache[sid]) self.cache[sid].did_change_jobs = True self.__remove_job_from_operation_map__(context, job) diff --git a/diploma_thesis/simulator/graph/util/encoder.py b/diploma_thesis/simulator/graph/util/encoder.py index a4171b0..522c99c 100644 --- a/diploma_thesis/simulator/graph/util/encoder.py +++ b/diploma_thesis/simulator/graph/util/encoder.py @@ -27,7 +27,10 @@ def encode(self, did_change_jobs: bool, graph: Graph, shop_floor: ShopFloor, - job_operation_map: JOB_OPERATION_MAP_TYPE) -> Graph: + job_operation_map: JOB_OPERATION_MAP_TYPE) -> Graph | None: + if graph is None: + return None + result = self.__construct_initial_graph__() if previous is None else previous if previous is None: diff --git a/diploma_thesis/simulator/tape/machine/__init__.py b/diploma_thesis/simulator/tape/machine/__init__.py index 4203f0d..70cebd0 100644 --- a/diploma_thesis/simulator/tape/machine/__init__.py +++ b/diploma_thesis/simulator/tape/machine/__init__.py @@ -7,14 +7,18 @@ from .no import No from .global_tardiness import GlobalTardiness from .global_decomposed_tardiness import GlobalDecomposedTardiness -from .surrogate_tardiness import SurrogateTardinessReward +from .surrogate_tardiness import SurrogateTardiness +from .global_MDPI_reward import GlobalMDPI +from .surrogate_slack import SurrogateSlack key_to_cls = { 'no': No, 'global_tardiness': GlobalTardiness, 'global_decomposed_tardiness': GlobalDecomposedTardiness, - 'surrogate_tardiness': SurrogateTardinessReward + 'global_mdpi': GlobalMDPI, + 'surrogate_tardiness': SurrogateTardiness, + 'surrogate_slack': SurrogateSlack } diff --git a/diploma_thesis/simulator/tape/machine/global_MDPI_reward.py b/diploma_thesis/simulator/tape/machine/global_MDPI_reward.py index b872c91..35a9fd7 100644 --- a/diploma_thesis/simulator/tape/machine/global_MDPI_reward.py +++ b/diploma_thesis/simulator/tape/machine/global_MDPI_reward.py @@ -1,4 +1,65 @@ +from dataclasses import dataclass +from typing import List, Dict + +import torch + +from environment import Job, Machine +from .reward import MachineReward, RewardList # Reward from paper # Combining Reinforcement Learning Algorithms with Graph Neural Networks to Solve Dynamic Job Shop Scheduling Problems + + +class GlobalMDPI(MachineReward): + """ + Reward from Deep-MARL external/PhD-Thesis-Projects/JSP/machine.py:693 + """ + + @dataclass + class Context: + step_idx: int + job: Job + machine: Machine + + @dataclass + class Configuration: + span: int = 256 + + @staticmethod + def from_cli(parameters: Dict) -> 'GlobalMDPI.Configuration': + return GlobalMDPI.Configuration(span=parameters.get('span', 256)) + + def __init__(self, configuration: Configuration): + self.configuration = configuration + + def record_job_action(self, job: Job, machine: Machine, moment: float) -> Context: + return self.Context(job.current_step_idx, job, machine) + + def reward_after_production(self, context: Context) -> torch.FloatTensor | None: + return None + + def reward_after_completion(self, contexts: List[Context]): + job = contexts[0].job + work_center_idx = torch.tensor([job.step_idx[c.step_idx] for c in contexts]) + machine_idx = torch.tensor([c.job.history.arrived_machine_idx[c.step_idx] for c in contexts]) + reward = torch.zeros_like(work_center_idx, dtype=torch.float) + + if contexts[0].job.is_tardy_upon_completion: + tardy_rate = torch.clip(contexts[0].job.tardiness_upon_completion / self.configuration.span, 0, 1) + + if tardy_rate < 0.05: + reward += tardy_rate * job.step_idx.shape[0] + else: + reward -= 5 + else: + reward += job.step_idx.shape[0] + + return RewardList(indices=torch.arange(len(contexts)), + units=torch.vstack([work_center_idx, machine_idx]), + reward=reward, + batch_size=[]) + + @staticmethod + def from_cli(parameters) -> MachineReward: + return GlobalMDPI(GlobalMDPI.Configuration.from_cli(parameters)) diff --git a/diploma_thesis/simulator/tape/machine/global_tardiness.py b/diploma_thesis/simulator/tape/machine/global_tardiness.py index 3e6ed13..b22cf09 100644 --- a/diploma_thesis/simulator/tape/machine/global_tardiness.py +++ b/diploma_thesis/simulator/tape/machine/global_tardiness.py @@ -49,7 +49,7 @@ def reward_after_completion(self, contexts: List[Context]): return RewardList(indices=torch.arange(len(contexts)), units=torch.vstack([work_center_idx, machine_idx]), reward=reward, - batch_size=work_center_idx.shape) + batch_size=[]) @staticmethod def from_cli(parameters) -> MachineReward: diff --git a/diploma_thesis/simulator/tape/machine/surrogate_slack.py b/diploma_thesis/simulator/tape/machine/surrogate_slack.py index 08ccc22..458f7ed 100644 --- a/diploma_thesis/simulator/tape/machine/surrogate_slack.py +++ b/diploma_thesis/simulator/tape/machine/surrogate_slack.py @@ -11,7 +11,7 @@ # Dynamic jobshop scheduling algorithm based on deep q network -class SurrogateSlackReward(MachineReward): +class SurrogateSlack(MachineReward): @dataclass class Context: @@ -25,8 +25,8 @@ class Configuration: release_reward_after_completion: bool = False @staticmethod - def from_cli(parameters: Dict) -> 'SurrogateSlackReward.Configuration': - return SurrogateSlackReward.Configuration( + def from_cli(parameters: Dict) -> 'SurrogateSlack.Configuration': + return SurrogateSlack.Configuration( release_reward_after_completion=parameters.get('release_reward_after_completion', False) ) @@ -67,8 +67,8 @@ def reward_after_completion(self, contexts: List[Context]): ) def __compute_reward__(self, context: Context): - return context.slack.mean() / (context.remaining_processing_time.mean() + 0.01) + return - context.slack.mean() / (context.remaining_processing_time.mean() + 0.01) @staticmethod def from_cli(parameters: Dict) -> MachineReward: - return SurrogateSlackReward(SurrogateSlackReward.Configuration.from_cli(parameters)) + return SurrogateSlack(SurrogateSlack.Configuration.from_cli(parameters)) diff --git a/diploma_thesis/simulator/tape/machine/surrogate_tardiness.py b/diploma_thesis/simulator/tape/machine/surrogate_tardiness.py index 21b525c..a85718d 100644 --- a/diploma_thesis/simulator/tape/machine/surrogate_tardiness.py +++ b/diploma_thesis/simulator/tape/machine/surrogate_tardiness.py @@ -7,7 +7,7 @@ from .reward import MachineReward, RewardList -class SurrogateTardinessReward(MachineReward): +class SurrogateTardiness(MachineReward): """ Reward from Deep-MARL external/PhD-Thesis-Projects/JSP/machine.py:693 """ @@ -29,8 +29,8 @@ class Configuration: release_reward_after_completion: bool = False @staticmethod - def from_cli(parameters: dict) -> 'SurrogateTardinessReward.Configuration': - return SurrogateTardinessReward.Configuration( + def from_cli(parameters: dict) -> 'SurrogateTardiness.Configuration': + return SurrogateTardiness.Configuration( critical_level_factor=parameters.get('critical_level_factor', 64), winq_factor=parameters.get('winq_factor', 0.2), span=parameters.get('span', 20), @@ -69,10 +69,14 @@ def reward_after_completion(self, contexts: List[Context]): if not self.configuration.release_reward_after_completion: return None + work_center_idx = torch.tensor([c.work_center_idx for c in contexts]) + machine_idx = torch.tensor([c.machine_idx for c in contexts]) + return RewardList( indices=torch.arange(len(contexts)), - units=torch.vstack([context.work_center_idx for context in contexts]), + units=torch.vstack([work_center_idx, machine_idx]), reward=torch.stack([self.__compute_reward__(context) for context in contexts]), + batch_size=[] ) def __compute_reward__(self, context: Context): @@ -104,4 +108,4 @@ def __compute_reward__(self, context: Context): @staticmethod def from_cli(parameters) -> MachineReward: - return SurrogateTardinessReward(configuration=SurrogateTardinessReward.Configuration.from_cli(parameters)) + return SurrogateTardiness(configuration=SurrogateTardiness.Configuration.from_cli(parameters)) diff --git a/diploma_thesis/simulator/td.py b/diploma_thesis/simulator/td.py index 5d57a0e..0ab4fb0 100644 --- a/diploma_thesis/simulator/td.py +++ b/diploma_thesis/simulator/td.py @@ -49,7 +49,7 @@ def __store_or_forward_td__(self, context: Context, queue: Queue, agent, key, re else: agent.store(key, Slice(episode_id=context.shop_floor.id, records=records)) - queue.store_group(context.shop_floor.id, key, records[1:]) + queue.store_group(context.shop_floor.id, key, records[1:]) return diff --git a/diploma_thesis/utils/modified.py b/diploma_thesis/utils/modified.py index ae2d190..2778b0d 100644 --- a/diploma_thesis/utils/modified.py +++ b/diploma_thesis/utils/modified.py @@ -25,6 +25,10 @@ def modified(parameters): mod = yaml.safe_load(file) base_parameters = merge_dicts(base_parameters, mod) + for key, value in template.items(): + if key in base_parameters: + template[key] = base_parameters[key] + base_parameters = __apply_template__(base_parameters, template) if 'nested' in parameters: