diff --git a/diploma_thesis/agents/machine/model/model.py b/diploma_thesis/agents/machine/model/model.py index 1cd1744..8b0e90c 100644 --- a/diploma_thesis/agents/machine/model/model.py +++ b/diploma_thesis/agents/machine/model/model.py @@ -1,14 +1,14 @@ -from environment import Job, WaitInfo +from environment import Job from agents.base.model import * from agents.machine import MachineInput -class MachineModel(Model[MachineInput, State, Action, Job | WaitInfo], metaclass=ABCMeta): +class MachineModel(Model[MachineInput, State, Action, Job | None], metaclass=ABCMeta): Input = MachineInput -class NNMachineModel(Model[MachineInput, State, Action, Job | WaitInfo], metaclass=ABCMeta): +class NNMachineModel(Model[MachineInput, State, Action, Job | None], metaclass=ABCMeta): Input = MachineInput diff --git a/diploma_thesis/agents/machine/model/multi_rule_linear.py b/diploma_thesis/agents/machine/model/multi_rule_linear.py index 8e0872c..9143ccc 100644 --- a/diploma_thesis/agents/machine/model/multi_rule_linear.py +++ b/diploma_thesis/agents/machine/model/multi_rule_linear.py @@ -1,10 +1,16 @@ -from agents.utils import DeepRule +from typing import List + +from agents.utils import DeepRule, NNCLI, ActionSelector from .model import * from .rule import ALL_SCHEDULING_RULES, SchedulingRule class MultiRuleLinear(NNMachineModel, DeepRule): + def __init__(self, rules: List[SchedulingRule], model: NNCLI, action_selector: ActionSelector): + NNMachineModel.__init__(self) + DeepRule.__init__(self, rules, model, action_selector) + def __call__(self, state: State, parameters: MachineModel.Input) -> MachineModel.Record: return DeepRule.__call__(self, state, parameters) @@ -15,4 +21,14 @@ def all_rules(cls): def make_result( self, rule: SchedulingRule, parameters: MachineModel.Input, state: State, action: Action ) -> MachineModel.Record: - return MachineModel.Record(result=rule(state, parameters), state=state, action=action) + return MachineModel.Record( + result=rule(machine=parameters.machine, now=parameters.now), + state=state, + action=action + ) + + def clone(self): + new_model = self.model.clone() + + return MultiRuleLinear(rules=self.rules, model=new_model, action_selector=self.action_selector) + diff --git a/diploma_thesis/agents/machine/model/rule/scheduling_rule.py b/diploma_thesis/agents/machine/model/rule/scheduling_rule.py index 3482ef2..cad8c27 100644 --- a/diploma_thesis/agents/machine/model/rule/scheduling_rule.py +++ b/diploma_thesis/agents/machine/model/rule/scheduling_rule.py @@ -2,7 +2,7 @@ import torch from abc import ABCMeta, abstractmethod -from environment import Machine, Job, WaitInfo, JobReductionStrategy +from environment import Machine, Job, JobReductionStrategy class SchedulingRule(metaclass=ABCMeta): diff --git a/diploma_thesis/agents/machine/rl.py b/diploma_thesis/agents/machine/rl.py index 152dd9f..4f04ed1 100644 --- a/diploma_thesis/agents/machine/rl.py +++ b/diploma_thesis/agents/machine/rl.py @@ -19,12 +19,11 @@ def is_trainable(self): return True @filter(lambda self: self.phase == TrainingPhase()) - @filter(lambda self: len(self.memory) > 0) def train_step(self): self.trainer.train_step(self.model) - @filter(lambda self: self.phase == TrainingPhase()) - def store(self, record: Record): + @filter(lambda self, *args: self.phase == TrainingPhase()) + def store(self, key: MachineKey, record: Record): self.trainer.store(record) def schedule(self, parameters): diff --git a/diploma_thesis/agents/machine/static.py b/diploma_thesis/agents/machine/static.py index 1d690c3..0040871 100644 --- a/diploma_thesis/agents/machine/static.py +++ b/diploma_thesis/agents/machine/static.py @@ -1,6 +1,8 @@ from typing import Dict from .machine import * +from ..base.agent import Key +from ..utils.memory import Record class StaticMachine(Machine): @@ -15,6 +17,9 @@ def is_trainable(self): def train_step(self): pass + def store(self, key: Key, record: Record): + pass + @staticmethod def from_cli(parameters: Dict): model = model_from_cli(parameters['model']) diff --git a/diploma_thesis/agents/utils/model/deep_rule.py b/diploma_thesis/agents/utils/model/deep_rule.py index 4fbec0a..9e155e5 100644 --- a/diploma_thesis/agents/utils/model/deep_rule.py +++ b/diploma_thesis/agents/utils/model/deep_rule.py @@ -35,6 +35,10 @@ def all_rules(cls): def make_result(self, rule: Rule, parameters: Input, state: State, action) -> Record: pass + @abstractmethod + def clone(self): + pass + # Main def update(self, phase: Phase): @@ -68,11 +72,6 @@ def parameters(self, recurse: bool = True): def copy_parameters(self, other: 'DeepRule', decay: float = 1.0): self.model.copy_parameters(other.model, decay) - def clone(self): - new_model = self.model.clone() - - return DeepRule(rules=self.rules, model=new_model, action_selector=self.action_selector) - # Utilities @staticmethod diff --git a/diploma_thesis/agents/utils/rl/__init__.py b/diploma_thesis/agents/utils/rl/__init__.py index 389ffbc..d31d7eb 100644 --- a/diploma_thesis/agents/utils/rl/__init__.py +++ b/diploma_thesis/agents/utils/rl/__init__.py @@ -16,10 +16,10 @@ def from_cli(parameters): - _parameters = parameters + _parameters = parameters['parameters'] - memory = memory_from_cli(parameters['memory']) - loss = LossCLI.from_cli(parameters['loss']) - optimizer = OptimizerCLI.from_cli(parameters['optimizer']) + memory = memory_from_cli(_parameters['memory']) + loss = LossCLI.from_cli(_parameters['loss']) + optimizer = OptimizerCLI.from_cli(_parameters['optimizer']) return partial(_from_cli, key_to_class=key_to_class, memory=memory, loss=loss, optimizer=optimizer)(parameters) diff --git a/diploma_thesis/agents/utils/rl/dqn.py b/diploma_thesis/agents/utils/rl/dqn.py index ec96d88..50e9314 100644 --- a/diploma_thesis/agents/utils/rl/dqn.py +++ b/diploma_thesis/agents/utils/rl/dqn.py @@ -4,6 +4,7 @@ import tensordict import torch +from utils import filter from agents.utils.rl.rl import * @@ -37,8 +38,9 @@ def __init__(self, def configure(self, model: NNModel): super().configure(model) - self.target_model = model.copy() + self.target_model = model.clone() + @filter(lambda self, *args: len(self.memory) > 0) def train_step(self, model: NNModel): batch, info = self.memory.sample(return_info=True) batch: Record | torch.Tensor = torch.squeeze(batch) @@ -46,9 +48,6 @@ def train_step(self, model: NNModel): with torch.no_grad(): q_values, td_error = self.estimate_q(model, batch) - if not self.optimizer.is_connected: - self.optimizer.connect(model.parameters()) - values = model.values(batch.state) loss = self.loss(values, q_values) @@ -83,6 +82,6 @@ def estimate_q(self, model: NNModel, batch: Record | tensordict.TensorDictBase): return q_values, td_error - @staticmethod - def from_cli(parameters, memory: Memory, loss: LossCLI, optimizer: OptimizerCLI): - return DeepQTrainer(memory, optimizer, loss, DeepQTrainer.Configuration.from_cli(parameters)) + @classmethod + def from_cli(cls, parameters, memory: Memory, loss: LossCLI, optimizer: OptimizerCLI): + return cls(memory, optimizer, loss, DeepQTrainer.Configuration.from_cli(parameters)) diff --git a/diploma_thesis/agents/utils/rl/rl.py b/diploma_thesis/agents/utils/rl/rl.py index 58b3ce9..cbd7014 100644 --- a/diploma_thesis/agents/utils/rl/rl.py +++ b/diploma_thesis/agents/utils/rl/rl.py @@ -18,6 +18,9 @@ def __init__(self, memory: Memory, loss: LossCLI, optimizer: OptimizerCLI): def configure(self, model: NNModel): self._is_configured = True + if not self.optimizer.is_connected: + self.optimizer.connect(model.parameters()) + @property def is_configured(self): return self._is_configured diff --git a/diploma_thesis/agents/workcenter/model/model.py b/diploma_thesis/agents/workcenter/model/model.py index 61b34b5..fdcd096 100644 --- a/diploma_thesis/agents/workcenter/model/model.py +++ b/diploma_thesis/agents/workcenter/model/model.py @@ -1,13 +1,13 @@ -from environment import Job, WaitInfo +from environment import Job, Machine from agents.base.model import * from agents.workcenter import WorkCenterInput -class WorkCenterModel(Model[WorkCenterInput, State, Action, Job | WaitInfo], metaclass=ABCMeta): +class WorkCenterModel(Model[WorkCenterInput, State, Action, Machine | None], metaclass=ABCMeta): Input = WorkCenterInput -class NNWorkCenterModel(Model[NNModel, State, Action, Job | WaitInfo], metaclass=ABCMeta): +class NNWorkCenterModel(Model[NNModel, State, Action, Machine | None], metaclass=ABCMeta): Input = WorkCenterInput diff --git a/diploma_thesis/agents/workcenter/model/multi_rule_linear.py b/diploma_thesis/agents/workcenter/model/multi_rule_linear.py index 3047937..b4c67b6 100644 --- a/diploma_thesis/agents/workcenter/model/multi_rule_linear.py +++ b/diploma_thesis/agents/workcenter/model/multi_rule_linear.py @@ -1,10 +1,16 @@ -from agents.utils import DeepRule +from typing import List + +from agents.utils import DeepRule, NNCLI, ActionSelector from .model import * from .rule import ALL_ROUTING_RULES, RoutingRule class MultiRuleLinear(NNWorkCenterModel, DeepRule): + def __init__(self, rules: List[RoutingRule], model: NNCLI, action_selector: ActionSelector): + NNWorkCenterModel.__init__(self) + DeepRule.__init__(self, rules, model, action_selector) + def __call__(self, state: State, parameters: WorkCenterModel.Input) -> WorkCenterModel.Record: return DeepRule.__call__(self, state, parameters) @@ -15,4 +21,13 @@ def all_rules(cls): def make_result( self, rule: RoutingRule, parameters: WorkCenterModel.Input, state: State, action: Action ) -> WorkCenterModel.Record: - return WorkCenterModel.Record(result=rule(state, parameters), state=state, action=action) + return WorkCenterModel.Record( + result=rule(job=parameters.job, work_center=parameters.work_center), + state=state, + action=action + ) + + def clone(self): + new_model = self.model.clone() + + return MultiRuleLinear(rules=self.rules, model=new_model, action_selector=self.action_selector) diff --git a/diploma_thesis/agents/workcenter/rl.py b/diploma_thesis/agents/workcenter/rl.py index ab6fecd..01a762e 100644 --- a/diploma_thesis/agents/workcenter/rl.py +++ b/diploma_thesis/agents/workcenter/rl.py @@ -23,8 +23,8 @@ def is_trainable(self): def train_step(self): self.trainer.train_step(self.model) - @filter(lambda self: self.phase == TrainingPhase()) - def store(self, record: Record): + @filter(lambda self, *args: self.phase == TrainingPhase()) + def store(self, key: WorkCenterKey, record: Record): self.trainer.store(record) def schedule(self, parameters): diff --git a/diploma_thesis/agents/workcenter/static.py b/diploma_thesis/agents/workcenter/static.py index b6ef438..9c0921e 100644 --- a/diploma_thesis/agents/workcenter/static.py +++ b/diploma_thesis/agents/workcenter/static.py @@ -2,6 +2,8 @@ from .model import StaticWorkCenterModel from .work_center import * +from ..base.agent import Key +from ..utils.memory import Record class Static(WorkCenter): @@ -16,6 +18,9 @@ def is_trainable(self): def train_step(self): pass + def store(self, key: Key, record: Record): + pass + @staticmethod def from_cli(parameters: Dict): model = model_from_cli(parameters['model']) diff --git a/diploma_thesis/configuration/fjsp.yml b/diploma_thesis/configuration/fjsp.yml new file mode 100644 index 0000000..e69de29 diff --git a/diploma_thesis/environment/__init__.py b/diploma_thesis/environment/__init__.py index 3f3e7e6..51b3404 100644 --- a/diploma_thesis/environment/__init__.py +++ b/diploma_thesis/environment/__init__.py @@ -2,7 +2,7 @@ from .job import Job, JobEvent, ReductionStrategy as JobReductionStrategy from .configuration import Configuration from .context import Context -from .agent import Agent, WaitInfo +from .agent import Agent from .delegate import Delegate from .machine import Machine, Key as MachineKey, History as MachineHistory from .work_center import WorkCenter, Key as WorkCenterKey, History as WorkCenterHistory diff --git a/diploma_thesis/environment/job.py b/diploma_thesis/environment/job.py index c2c8cad..ebe4daa 100644 --- a/diploma_thesis/environment/job.py +++ b/diploma_thesis/environment/job.py @@ -355,7 +355,8 @@ def wait_time_on_machine(self, step_idx: int): Returns: The time that the operation has been waiting for processing on machine """ - assert step_idx < self.current_step_idx, "Operation must be started on machine to compute wait time" + assert step_idx < self.current_step_idx or self.is_completed,\ + "Operation must be started on machine to compute wait time" return self.history.started_at[step_idx] - self.history.arrived_at_machine[step_idx] @@ -393,14 +394,15 @@ def time_completion_rate(self, strategy: ReductionStrategy = ReductionStrategy.m # State Update def with_event(self, event: Event): + # Clone tensors to avoid in-place modification match event.kind: case JobEvent.Kind.dispatch: self.current_step_idx = torch.tensor(0) case JobEvent.Kind.forward: - self.current_step_idx += 1 + self.current_step_idx = self.current_step_idx.clone() + 1 self.current_machine_idx = torch.tensor(-1) case JobEvent.Kind.arrival_on_machine: - self.current_machine_idx = event.machine_idx + self.current_machine_idx = event.machine_idx.clone() case _: pass @@ -465,7 +467,7 @@ def __next_remaining_processing_time__(self, step_idx: int, strategy: ReductionS Returns: The remaining processing time of the next operation """ result = torch.tensor(0.0, dtype=torch.float) - expected_processing_time = self.processing_times[max(self.step_idx + 1, 0):] + expected_processing_time = self.processing_times[max(step_idx + 1, 0):] if expected_processing_time.numel() == 0: return result diff --git a/diploma_thesis/environment/shop_floor.py b/diploma_thesis/environment/shop_floor.py index 518baa9..e2eab54 100644 --- a/diploma_thesis/environment/shop_floor.py +++ b/diploma_thesis/environment/shop_floor.py @@ -255,7 +255,7 @@ def forward(self, job: environment.Job, from_: environment.Machine): # Decision methods - def schedule(self, machine: environment.Machine) -> 'environment.Job | environment.WaitInfo': + def schedule(self, machine: environment.Machine) -> 'environment.Job | None': return self.agent.schedule(self.__make_context__(), machine) def route(self, work_center: 'environment.WorkCenter', job: environment.Job) -> 'environment.Machine | None': diff --git a/diploma_thesis/environment/work_center.py b/diploma_thesis/environment/work_center.py index 45901c1..606ef2c 100644 --- a/diploma_thesis/environment/work_center.py +++ b/diploma_thesis/environment/work_center.py @@ -146,10 +146,9 @@ def __dispatch__(self): self.shop_floor.did_dispatch(job, self, machine) self.state.with_flushed_queue() - self.shop_floor.did_finish_dispatch(self) - self.__starve__() + yield self.environment.process(self.__starve__()) def __starve__(self): self.on_route = self.environment.event() diff --git a/diploma_thesis/simulator/simulator.py b/diploma_thesis/simulator/simulator.py index b9d39b4..e8fc0ce 100644 --- a/diploma_thesis/simulator/simulator.py +++ b/diploma_thesis/simulator/simulator.py @@ -11,7 +11,7 @@ from agents import MachineInput, WorkCenterInput from agents import TrainingPhase, EvaluationPhase, WarmUpPhase, Phase from agents.utils.memory import Record -from environment import Agent, ShopFloor, Job, WaitInfo, Machine, WorkCenter, Context +from environment import Agent, ShopFloor, Job, Machine, WorkCenter, Context from tape import TapeModel, SimulatorInterface from utils import Loggable from .configuration import RunConfiguration, EvaluateConfiguration @@ -197,7 +197,7 @@ def did_prepare_work_center_record( # Agent - def schedule(self, context: Context, machine: Machine) -> Job | WaitInfo: + def schedule(self, context: Context, machine: Machine) -> Job | None: parameters = MachineInput(machine, context.moment) result = self.machine.schedule(parameters) diff --git a/diploma_thesis/tape/machine/global_decomposed_tardiness_reward.py b/diploma_thesis/tape/machine/global_decomposed_tardiness_reward.py index fb0cc40..63cac9d 100644 --- a/diploma_thesis/tape/machine/global_decomposed_tardiness_reward.py +++ b/diploma_thesis/tape/machine/global_decomposed_tardiness_reward.py @@ -44,10 +44,10 @@ def reward_after_production(self, context: Context) -> torch.FloatTensor | None: return None def reward_after_completion(self, contexts: List[Context]): - work_center_idx = torch.tensor([c.step_idx for c in contexts]) + job = contexts[0].job + work_center_idx = torch.tensor([job.step_idx[c.step_idx] for c in contexts]) machine_idx = torch.tensor([c.job.history.arrived_machine_idx[c.step_idx] for c in contexts]) reward = torch.zeros_like(work_center_idx, dtype=torch.float) - job = contexts[0].job if job.is_tardy_upon_completion: wait_time = torch.FloatTensor([job.wait_time_on_machine(c.step_idx) for c in contexts]) diff --git a/diploma_thesis/tape/machine/global_tardiness_reward.py b/diploma_thesis/tape/machine/global_tardiness_reward.py index 79a4ae9..eeae4bc 100644 --- a/diploma_thesis/tape/machine/global_tardiness_reward.py +++ b/diploma_thesis/tape/machine/global_tardiness_reward.py @@ -39,7 +39,8 @@ def reward_after_production(self, context: Context) -> torch.FloatTensor | None: return None def reward_after_completion(self, contexts: List[Context]): - work_center_idx = torch.tensor([c.step_idx for c in contexts]) + job = contexts[0].job + work_center_idx = torch.tensor([job.step_idx[c.step_idx] for c in contexts]) machine_idx = torch.tensor([c.job.history.arrived_machine_idx[c.step_idx] for c in contexts]) reward = torch.zeros_like(work_center_idx, dtype=torch.float) diff --git a/diploma_thesis/tape/machine/surrogate_tardiness_reward.py b/diploma_thesis/tape/machine/surrogate_tardiness_reward.py index c966e86..451c916 100644 --- a/diploma_thesis/tape/machine/surrogate_tardiness_reward.py +++ b/diploma_thesis/tape/machine/surrogate_tardiness_reward.py @@ -84,7 +84,7 @@ def __compute_reward__(self, context: Context): critical_level_loser = critical_level[~context.chosen] earned_slack_chosen = torch.mean(context.processing_time[~context.chosen]) - earned_slack_chosen *= critical_level_chosen + earned_slack_chosen = earned_slack_chosen * critical_level_chosen consumed_slack_loser = context.processing_time[context.chosen] * critical_level_loser.mean() @@ -95,8 +95,8 @@ def __compute_reward__(self, context: Context): reward = ((reward_slack + reward_winq) / self.configuration.span).clip(-1, 1) - return reward + return reward.view([]) @staticmethod def from_cli(parameters) -> MachineReward: - return SurrogateTardinessReward(SurrogateTardinessReward.Configuration.from_cli(parameters)) + return SurrogateTardinessReward(configuration=SurrogateTardinessReward.Configuration.from_cli(parameters)) diff --git a/diploma_thesis/utils/dict.py b/diploma_thesis/utils/dict.py index c7f9eb7..1a3a7f5 100644 --- a/diploma_thesis/utils/dict.py +++ b/diploma_thesis/utils/dict.py @@ -14,6 +14,19 @@ def merge_dicts(lhs: Dict, rhs: Dict) -> Dict: for key, value in rhs.items(): if key in result and isinstance(result[key], dict) and isinstance(value, dict): result[key] = merge_dicts(result[key], value) + elif key in result and isinstance(result[key], list) and isinstance(value, list): + lhs = result[key] + rhs = value + + # Merge existing elements + for i in range(min(len(lhs), len(rhs))): + lhs[i] = merge_dicts(lhs[i], rhs[i]) + + # Pad with not presented elements + if len(lhs) < len(rhs): + lhs += rhs[len(lhs):] + + result[key] = lhs else: result[key] = value