Implement final rewards for JSP

yura-hb · Feb 23, 2024 · ac63708 · ac63708
1 parent 566eb3d
commit ac63708
Show file tree

Hide file tree

Showing 19 changed files with 347 additions and 159 deletions.
diff --git a/diploma_thesis/agents/__init__.py b/diploma_thesis/agents/__init__.py
@@ -3,5 +3,5 @@
 from .workcenter import WorkCenter, WorkCenterInput, from_cli as work_center_from_cli
 from .machine import Machine, MachineInput, from_cli as machine_from_cli
 
-from .workcenter import StaticWorkCenter
+from .workcenter import Static
 from .machine import StaticMachine
diff --git a/diploma_thesis/agents/machine/model/multi_rule_linear.py b/diploma_thesis/agents/machine/model/multi_rule_linear.py
@@ -15,8 +15,4 @@ def all_rules(cls):
     def make_result(
         self, rule: SchedulingRule, parameters: MachineModel.Input, state: State, action: Action
     ) -> MachineModel.Record:
-        return MachineModel.Record(
-            result=rule(state, parameters),
-            state=state,
-            action=action
-        )
+        return MachineModel.Record(result=rule(state, parameters), state=state, action=action)
diff --git a/diploma_thesis/agents/workcenter/__init__.py b/diploma_thesis/agents/workcenter/__init__.py
@@ -3,12 +3,12 @@
 from utils import from_cli
 from .utils import Input as WorkCenterInput
 from .work_center import WorkCenter
-from .static import StaticWorkCenter
-from .rl import RLWorkCenter
+from .static import Static
+from .rl import RLAgent
 
 key_to_class = {
-    "static": StaticWorkCenter,
-    'rl': RLWorkCenter
+    "static": Static,
+    'rl': RLAgent
 }
 
 from_cli = partial(from_cli, key_to_class=key_to_class)
diff --git a/diploma_thesis/agents/workcenter/model/multi_rule_linear.py b/diploma_thesis/agents/workcenter/model/multi_rule_linear.py
@@ -15,8 +15,4 @@ def all_rules(cls):
     def make_result(
          self, rule: RoutingRule, parameters: WorkCenterModel.Input, state: State, action: Action
     ) -> WorkCenterModel.Record:
-        return WorkCenterModel.Record(
-            result=rule(state, parameters),
-            state=state,
-            action=action
-        )
+        return WorkCenterModel.Record(result=rule(state, parameters), state=state, action=action)
diff --git a/diploma_thesis/agents/workcenter/rl.py b/diploma_thesis/agents/workcenter/rl.py
@@ -7,7 +7,7 @@
 from .work_center import *
 
 
-class RLWorkCenter(WorkCenter):
+class RLAgent(WorkCenter):
 
     def __init__(self, model: NNWorkCenterModel, state_encoder: StateEncoder, trainer: RLTrainer):
         super().__init__(model, state_encoder)

diff --git a/diploma_thesis/agents/workcenter/static.py b/diploma_thesis/agents/workcenter/static.py
@@ -4,7 +4,7 @@
 from .work_center import *
 
 
-class StaticWorkCenter(WorkCenter):
+class Static(WorkCenter):
 
     def __init__(self, model: StaticWorkCenterModel, state_encoder: StateEncoder):
         super().__init__(model=model, state_encoder=state_encoder)
@@ -21,4 +21,4 @@ def from_cli(parameters: Dict):
         model = model_from_cli(parameters['model'])
         encoder = state_encoder_from_cli(parameters['encoder'])
 
-        return StaticWorkCenter(model=model, state_encoder=encoder)
+        return Static(model=model, state_encoder=encoder)
diff --git a/diploma_thesis/agents/workcenter/utils/parameters.py b/diploma_thesis/agents/workcenter/utils/parameters.py
@@ -6,5 +6,5 @@
 
 @dataclass
 class Input:
-    work_center: WorkCenter
     job: Job
+    work_center: WorkCenter
diff --git a/diploma_thesis/environment/agent.py b/diploma_thesis/environment/agent.py
@@ -6,23 +6,15 @@
 from environment import Job, Context
 
 
-@dataclass
-class WaitInfo:
-    """
-    Information about scheduling decision
-    """
-    wait_time: int = 0
-
-
 class Agent(metaclass=ABCMeta):
     """
     Support Class to handle the events in the shop-floor
     """
 
     @abstractmethod
-    def schedule(self, context: Context, machine: 'environment.Machine') -> Job | WaitInfo:
+    def schedule(self, context: Context, machine: 'environment.Machine') -> Job | None:
         pass
 
     @abstractmethod
-    def route(self, context: Context, work_center: 'environment.WorkCenter', job: Job) -> 'Machine | None':
+    def route(self, context: Context, job: Job, work_center: 'environment.WorkCenter') -> 'Machine | None':
         pass
diff --git a/diploma_thesis/environment/job.py b/diploma_thesis/environment/job.py
@@ -6,7 +6,6 @@
 from tensordict.prototype import tensorclass
 
 
-
 class ReductionStrategy(Enum):
     """
     Job doesn't know in advance on which machine it will be processed inside work-center. ReductionStrategy
@@ -159,61 +158,35 @@ def current_operation_processing_time_on_machine(self):
         """
         Returns: The processing time of the current operation in machine
         """
-        return self.processing_times[self.current_step_idx][self.current_machine_idx]
+        return self.__processing_time_on_machine__(self.current_step_idx, self.current_machine_idx)
 
     def current_operation_processing_time_in_work_center(self, strategy: ReductionStrategy = ReductionStrategy.none):
         """
         Returns: Returns the processing time of the current operation in work center
         """
-        processing_times = self.processing_times[self.current_step_idx].float()
-
-        return reduce(processing_times, strategy)
+        return self.__processing_time_on_work_center__(self.current_step_idx, strategy)
 
     def operation_processing_time_in_work_center(
-        self,
-        work_center_idx: int,
-        strategy: ReductionStrategy = ReductionStrategy.mean
+        self, work_center_idx: int, strategy: ReductionStrategy = ReductionStrategy.mean
     ):
         """
         Returns: Returns the processing time of the current operation in workcenter
         """
-        work_center_idx = torch.argwhere(self.step_idx == work_center_idx).item()
-        processing_times = self.processing_times[work_center_idx].float()
+        step_idx = torch.argwhere(self.step_idx == work_center_idx).item()
 
-        return reduce(processing_times, strategy)
+        return self.__processing_time_on_work_center__(step_idx, strategy)
 
     def remaining_processing_time(self, strategy: ReductionStrategy = ReductionStrategy.mean):
         """
         Returns: The total processing time of the remaining operations
         """
-        result = torch.FloatTensor([0.0])
-
-        if self.is_completed:
-            return result
-
-        if self.current_machine_idx >= 0:
-            result += self.current_operation_processing_time_on_machine
-        else:
-            result += self.current_operation_processing_time_in_work_center(strategy)
-
-        result += self.next_remaining_processing_time(strategy)
-
-        return result
+        return self.__remaining_processing_time__(self.current_step_idx, self.current_machine_idx, strategy)
 
     def next_remaining_processing_time(self, strategy: ReductionStrategy = ReductionStrategy.mean):
         """
         Returns: The remaining processing time of the operation excluding processing time on current machine
         """
-        result = torch.tensor(0.0, dtype=torch.float)
-        expected_processing_time = self.processing_times[max(self.current_step_idx + 1, 0):]
-
-        if expected_processing_time.numel() == 0:
-            return result
-
-        expected_processing_time = reduce(expected_processing_time.float(), strategy)
-        result += expected_processing_time.sum()
-
-        return result
+        return self.__next_remaining_processing_time__(self.current_step_idx, strategy)
 
     def total_processing_time(self, strategy: ReductionStrategy = ReductionStrategy.mean):
         """
@@ -317,14 +290,7 @@ def next_operation_processing_time(self, strategy: ReductionStrategy = Reduction
         """
         Returns: The processing time of the next operation
         """
-        next_idx = self.current_step_idx + 1
-
-        if next_idx >= len(self.step_idx):
-            return torch.tensor(0.0, dtype=torch.float)
-
-        pt = self.processing_times[next_idx]
-
-        return reduce(pt.float(), strategy)
+        return self.__next_processing_time__(self.current_step_idx, strategy)
 
     def slack_upon_moment(self, now: torch.FloatTensor, strategy: ReductionStrategy = ReductionStrategy.mean):
         """
@@ -382,6 +348,36 @@ def current_operation_waiting_time_on_machine(self, now: torch.FloatTensor):
         """
         return now - self.history.arrived_at_machine[self.current_step_idx]
 
+    def wait_time_on_machine(self, step_idx: int):
+        """
+        Args:
+            step_idx: The index of the operation
+
+        Returns: The time that the operation has been waiting for processing on machine
+        """
+        assert step_idx < self.current_step_idx, "Operation must be started on machine to compute wait time"
+
+        return self.history.started_at[step_idx] - self.history.arrived_at_machine[step_idx]
+
+    def slack_upon_arrival_on_machine(self, step_idx):
+        """
+        Args:
+            step_idx: The index of the operation
+
+        Returns: The slack upon arrival on machine at specific step
+        """
+        assert step_idx <= self.current_step_idx, "Operation must be started on machine to compute slack time"
+
+        if step_idx == self.current_step_idx:
+            assert self.current_machine_idx >= 0 or self.is_completed, \
+                "Job must be processed on machine to compute slack"
+
+        machine_idx = self.history.arrived_machine_idx[step_idx]
+        arrival_time = self.history.arrived_at_machine[step_idx]
+        remaining_processing_time = self.__remaining_processing_time__(step_idx, machine_idx)
+
+        return self.due_at - arrival_time - remaining_processing_time
+
     def operation_completion_rate(self):
         """
         The completion rate of the job based on the number of completed operations
@@ -416,3 +412,65 @@ def with_due_at(self, due_at: torch.FloatTensor):
         self.due_at = torch.FloatTensor([due_at])
 
         return self
+
+    # Utils
+
+    def __processing_time_on_work_center__(self, step_idx: int, strategy: ReductionStrategy = ReductionStrategy.mean):
+        """
+        Returns: The processing time of the operation in work center
+        """
+        if step_idx < 0 or step_idx >= len(self.step_idx):
+            return torch.tensor(0.0, dtype=torch.float)
+
+        pt = self.processing_times[step_idx]
+
+        return reduce(pt.float(), strategy)
+
+    def __processing_time_on_machine__(self, step_idx: int, machine_idx: int):
+        """
+        Returns: The processing time of the operation in machine
+        """
+        if (step_idx < 0 or step_idx >= len(self.step_idx) or
+                machine_idx < 0 or machine_idx >= self.processing_times.shape[1]):
+            return torch.tensor(0.0, dtype=torch.float)
+
+        return self.processing_times[step_idx][machine_idx]
+
+    def __next_processing_time__(self, step_idx: int, strategy: ReductionStrategy = ReductionStrategy.mean):
+        """
+        Returns: The processing time of the next operation
+        """
+        return self.__processing_time_on_work_center__(step_idx + 1, strategy)
+
+    def __remaining_processing_time__(self, step_idx: int, machine_idx: int, strategy: ReductionStrategy = ReductionStrategy.mean):
+        """
+        Returns: The remaining processing time of the operation
+        """
+        result = torch.FloatTensor([0.0])
+
+        if step_idx >= len(self.step_idx):
+            return result
+
+        if machine_idx >= 0:
+            result += self.__processing_time_on_machine__(step_idx, machine_idx)
+        else:
+            result += self.__processing_time_on_work_center__(step_idx, strategy)
+
+        result += self.__next_remaining_processing_time__(step_idx, strategy)
+
+        return result
+
+    def __next_remaining_processing_time__(self, step_idx: int, strategy: ReductionStrategy = ReductionStrategy.mean):
+        """
+        Returns: The remaining processing time of the next operation
+        """
+        result = torch.tensor(0.0, dtype=torch.float)
+        expected_processing_time = self.processing_times[max(self.step_idx + 1, 0):]
+
+        if expected_processing_time.numel() == 0:
+            return result
+
+        expected_processing_time = reduce(expected_processing_time.float(), strategy)
+        result += expected_processing_time.sum()
+
+        return result
diff --git a/diploma_thesis/environment/machine.py b/diploma_thesis/environment/machine.py
@@ -278,8 +278,8 @@ def __produce__(self):
 
             job = self.__select_job__()
 
-            if isinstance(job, environment.WaitInfo):
-                yield self.environment.timeout(job.wait_time)
+            if job is None:
+                yield self.environment.process(self.__starve__())
                 continue
 
             self.__notify_job_about_production__(job, production_start=True)
@@ -324,7 +324,7 @@ def __starve__(self):
 
     def __select_job__(self):
         if self.state.is_empty:
-            return environment.WaitInfo(wait_time=1)
+            return None
 
         if len(self.state.queue) == 1:
             return self.state.queue[0]

diff --git a/diploma_thesis/environment/utils/report_factory.py b/diploma_thesis/environment/utils/report_factory.py
@@ -1,7 +1,6 @@
 from collections import OrderedDict
 from dataclasses import dataclass
 
-import torch
 import pandas as pd
 from tabulate import tabulate
 

diff --git a/diploma_thesis/environment/work_center.py b/diploma_thesis/environment/work_center.py
@@ -125,8 +125,6 @@ def __dispatch__(self):
         assert self.shop_floor is not None, "Work center is not connected to the shop floor"
 
         while True:
-            yield self.on_route
-
             self.history.with_decision_time(self.environment.now)
 
             for job in self.state.queue:
@@ -137,8 +135,12 @@ def __dispatch__(self):
 
                 self.shop_floor.will_dispatch(job, self)
 
-                # TODO: React on None
                 machine = self.shop_floor.route(work_center=self, job=job)
+
+                # TODO: Implement correct idleness
+                if machine is None:
+                    continue
+
                 machine.receive(job)
 
                 self.shop_floor.did_dispatch(job, self, machine)
@@ -147,7 +149,12 @@ def __dispatch__(self):
 
             self.shop_floor.did_finish_dispatch(self)
 
-            self.on_route = self.environment.event()
+            self.__starve__()
+
+    def __starve__(self):
+        self.on_route = self.environment.event()
+
+        yield self.on_route
 
     # Utility
 

diff --git a/diploma_thesis/tape/machine/__init__.py b/diploma_thesis/tape/machine/__init__.py
@@ -2,13 +2,19 @@
 from functools import partial
 
 from utils import from_cli
-from .global_tardiness_reward import GlobalTardiness
-from .no import No
+
 from .reward import MachineReward
+from .no import No
+from .global_tardiness_reward import GlobalTardiness
+from .global_decomposed_tardiness_reward import GlobalDecomposedTardiness
+from .surrogate_tardiness_reward import SurrogateTardinessReward
+
 
 key_to_cls = {
+    'no': No,
     'global_tardiness': GlobalTardiness,
-    'no': No
+    'global_decomposed_tardiness': GlobalDecomposedTardiness,
+    'surrogate_tardiness': SurrogateTardinessReward
 }