Improvements in simulation runtime

yura-hb · Feb 22, 2024 · 6c0bb98 · 6c0bb98
1 parent f01792a
commit 6c0bb98
Show file tree

Hide file tree

Showing 17 changed files with 160 additions and 66 deletions.
diff --git a/diploma_thesis/agents/machine/model/multi_rule_linear.py b/diploma_thesis/agents/machine/model/multi_rule_linear.py
@@ -30,11 +30,7 @@ def __call__(self, state: State, parameters: MachineModel.Input) -> MachineModel
         action = torch.tensor(action, dtype=torch.long)
         rule = self.rules[action]
 
-        return MachineModel.Record(
-            result=rule(parameters.machine, parameters.now),
-            state=state,
-            action=action
-        )
+        return MachineModel.Record(result=rule(parameters.machine, parameters.now), state=state, action=action)
 
     def values(self, state: State) -> torch.FloatTensor:
         assert isinstance(state, TensorState), f"State must conform to TensorState"
@@ -61,11 +57,7 @@ def clone(self):
 
     @staticmethod
     def __connect__(n_rules: int, model: NNCLI, input_shape: torch.Size):
-        output_layer = NNCLI.Configuration.Linear(
-            dim=n_rules,
-            activation='none',
-            dropout=0
-        )
+        output_layer = NNCLI.Configuration.Linear(dim=n_rules, activation='none', dropout=0)
 
         model.connect(input_shape, output_layer)
 

diff --git a/diploma_thesis/agents/machine/state/deep_marl_mr.py b/diploma_thesis/agents/machine/state/deep_marl_mr.py
@@ -20,7 +20,7 @@ class DEEPMARLMinimumRepetitionStateEncoder(StateEncoder):
        Depending on the number of jobs in queue, the state is represented in the following way:
         1. If there are 0 jobs, then the state is a tensor of zeros
         2. If there is 1 job, then the state is a tensor of shape (4, 5) where the first row repeated
-        3. If there are more than 1 job, then the information of job minimum values of first 4 criterias are stored
+        3. If there are more than 1 job, then the information of job minimum values of first 4 criteria are stored
     2. Arriving job info represents information of the job that is about to arrive at the machine
     """
 
@@ -38,6 +38,7 @@ def encode(self, parameters: StateEncoder.Input) -> State:
         arriving_job_state, _ = self.__make_arriving_job_state__(parameters.machine, parameters.now)
 
         state = torch.vstack([state, arriving_job_state])
+        state = torch.unsqueeze(state, 0)
 
         return self.State(state, job_idx, batch_size=[])
 

diff --git a/diploma_thesis/agents/utils/nn/nn_cli.py b/diploma_thesis/agents/utils/nn/nn_cli.py
@@ -39,6 +39,13 @@ def from_cli(parameters: dict):
                     dropout=parameters.get('dropout', 0.0)
                 )
 
+        @dataclass
+        class Flatten:
+
+            @staticmethod
+            def from_cli(parameters: dict):
+                return NNCLI.Configuration.Flatten()
+
         layers: list[Layer]
 
         optimizer_parameters: dict
@@ -47,7 +54,8 @@ def from_cli(parameters: dict):
         def from_cli(parameters: dict):
             key_to_cls = {
                 'linear': NNCLI.Configuration.Linear,
-                'instance_norm': NNCLI.Configuration.InstanceNorm
+                'instance_norm': NNCLI.Configuration.InstanceNorm,
+                'flatten': NNCLI.Configuration.Flatten
             }
 
             return NNCLI.Configuration(
@@ -106,6 +114,8 @@ def __make_layer__(self, input_dim, layer: Configuration.Layer):
         match layer:
             case NNCLI.Configuration.InstanceNorm():
                 return nn.InstanceNorm1d(input_dim), input_dim
+            case NNCLI.Configuration.Flatten():
+                return nn.Flatten(), input_dim
             case NNCLI.Configuration.Linear(output_dim, activation, dropout):
                 return self.__make_linear_layer__(input_dim, output_dim, activation, dropout), output_dim
             case _:

diff --git a/diploma_thesis/configuration/mods/machine_agent/model.yml b/diploma_thesis/configuration/mods/machine_agent/model.yml
@@ -9,20 +9,28 @@ parameters:
           - kind: 'instance_norm'
           - kind: 'linear'
             parameters:
-              dim: 16
+              dim: 64
               activation: 'tanh'
           - kind: 'linear'
             parameters:
-              dim: 16
+              dim: 48
               activation: 'tanh'
           - kind: 'linear'
             parameters:
-              dim: 16
+              dim: 48
+              activation: 'tanh'
+          - kind: 'linear'
+            parameters:
+              dim: 36
               activation: 'tanh'
           - kind: 'linear'
             parameters:
               dim: 24
               activation: 'tanh'
+          - kind: 'linear'
+            parameters:
+              dim: 12
+              activation: 'tanh'
       action_selector:
         kind: 'phase_selector'
         parameters:
@@ -34,7 +42,7 @@ parameters:
               action_selector:
                 kind: 'epsilon_greedy'
                 parameters:
-                  epsilon: 0.95
+                  epsilon: 0.1
             - phase:
                 kind: 'warm_up'
                 parameters:
@@ -46,7 +54,9 @@ parameters:
                 parameters:
                   step: 1
               action_selector:
-                kind: 'uniform'
+                kind: 'sample'
+                parameters:
+                  is_distribution: false
 
   encoder:
     kind: 'deep_marl_indirect'
@@ -58,12 +68,12 @@ parameters:
       batch_size: 128
 
   loss:
-    kind: 'mse'
+    kind: 'smooth_l1'
 
   optimizer:
     kind: 'adam'
     parameters:
       lr: 0.001
 
   parameters:
-    gamma: 0.4
+    gamma: 0.95
diff --git a/diploma_thesis/configuration/mods/run/jsp/run.yml b/diploma_thesis/configuration/mods/run/jsp/run.yml
@@ -17,7 +17,7 @@ simulation: &simulation
           processing_times: # Sampler
             kind: 'uniform'
             parameters:
-              uniform: [ 1, 50 ]
+              uniform: [ 1, 5 ]
               noise: [ 0, 10 ]
           permutation:
             uneveness: 5
@@ -36,11 +36,11 @@ simulation: &simulation
           breakdown_arrival:
             kind: 'exponential'
             parameters:
-              mean: 1000
+              mean: 10000
           repair_duration:
             kind: 'uniform'
             parameters:
-              uniform: [ 200, 500 ]
+              uniform: [ 10, 200 ]
 
       seed: 42
 
@@ -64,14 +64,14 @@ parameters:
 
   machine_train_schedule:
     pretrain_steps: 10
-    train_interval: 5
+    train_interval: 20
     max_training_steps: 100000000
 
   work_center_train_schedule:
     pretrain_steps: 0
     train_interval: 100
     max_training_steps: 0
 
-  n_workers: 1
+  n_workers: 3
   simulations:
     - <<: *simulation
diff --git a/diploma_thesis/configuration/simulation.yml b/diploma_thesis/configuration/simulation.yml
@@ -3,13 +3,13 @@ task:
 
   name: 'test'
   output_dir: 'tmp'
+  log_stdout: False
 
   machine_agent:
     kind: 'mod'
     parameters:
       base_path: 'configuration/mods/machine_agent/model.yml'
-      mods:
-        - 'encoding/mr.yml'
+      mods: []
 
 
   work_center_agent:
@@ -18,13 +18,15 @@ task:
       model:
         kind: 'static'
         parameters:
-          rule: 'random'
+          rule: 'et'
       encoder:
         kind: 'plain'
 
   tape:
     machine_reward:
       kind: 'global_tardiness'
+      parameters:
+        span: 512
 
     work_center_reward:
       kind: 'no'
@@ -36,5 +38,4 @@ task:
     kind: 'mod'
     parameters:
       base_path: 'configuration/mods/run/jsp/run.yml'
-      mods:
-        - 'test.yml'
+      mods: []
diff --git a/diploma_thesis/environment/job.py b/diploma_thesis/environment/job.py
@@ -6,6 +6,7 @@
 from tensordict.prototype import tensorclass
 
 
+
 class ReductionStrategy(Enum):
     """
     Job doesn't know in advance on which machine it will be processed inside work-center. ReductionStrategy
@@ -403,7 +404,7 @@ def with_event(self, event: Event):
                 self.current_step_idx += 1
                 self.current_machine_idx = torch.tensor(-1)
             case JobEvent.Kind.arrival_on_machine:
-                self.current_machine_idx = torch.tensor(event.machine_idx)
+                self.current_machine_idx = event.machine_idx
             case _:
                 pass
 

diff --git a/diploma_thesis/environment/shop_floor.py b/diploma_thesis/environment/shop_floor.py
@@ -1,14 +1,14 @@
 import logging
 from dataclasses import dataclass, field
 from typing import List, Dict
-from tensordict.prototype import tensorclass
+from typing import Set
 
 import simpy
 import torch
+from tensordict.prototype import tensorclass
 
 import environment
 from .utils import ShopFloorFactory
-from typing import Set
 
 
 @tensorclass
@@ -152,7 +152,8 @@ def map(self) -> Map:
             work_centers=[
                 Map.WorkCenter(
                     idx=work_center.work_center_idx,
-                    machines=torch.cat([machine.state.machine_idx for machine in work_center.machines]),
+                    machines=torch.cat([machine.state.machine_idx for machine in work_center.machines])
+                    if len(work_center.machines) > 1 else torch.atleast_1d(work_center.machines[0].state.machine_idx),
                     batch_size=[]
                 )
                 for work_center in self.work_centers
@@ -210,7 +211,7 @@ def tardy_rate(self, now: int) -> torch.FloatTensor:
         return tardy_jobs / len(in_system_jobs)
 
     def expected_tardy_rate(
-        self, now: float, reduction_strategy: environment.JobReductionStrategy
+            self, now: float, reduction_strategy: environment.JobReductionStrategy
     ) -> torch.FloatTensor:
         in_system_jobs = self.in_system_jobs
 
@@ -262,7 +263,7 @@ def route(self, work_center: 'environment.WorkCenter', job: environment.Job) ->
 
     # Events from subcomponents (WorkCenter, Machine)
     def will_produce(self, job: environment.Job, machine: environment.Machine):
-        self.delegate.will_produce(context=self.__make_context__(), job=job,  machine=machine)
+        self.delegate.will_produce(context=self.__make_context__(), job=job, machine=machine)
 
     def did_produce(self, job: environment.Job, machine: environment.Machine):
         self.delegate.did_produce(context=self.__make_context__(), job=job, machine=machine)
@@ -271,7 +272,7 @@ def will_dispatch(self, job: environment.Job, work_center: environment.WorkCente
         self.delegate.will_dispatch(context=self.__make_context__(), job=job, work_center=work_center)
 
     def did_dispatch(self, job: environment.Job, work_center: environment.WorkCenter, machine: environment.Machine):
-        self.delegate.did_dispatch(context=self.__make_context__(), job=job,  work_center=work_center, machine=machine)
+        self.delegate.did_dispatch(context=self.__make_context__(), job=job, work_center=work_center, machine=machine)
 
     def did_finish_dispatch(self, work_center: environment.WorkCenter):
         self.delegate.did_finish_dispatch(context=self.__make_context__(), work_center=work_center)

diff --git a/diploma_thesis/environment/utils/report_factory.py b/diploma_thesis/environment/utils/report_factory.py
@@ -1,6 +1,7 @@
 from collections import OrderedDict
 from dataclasses import dataclass
 
+import torch
 import pandas as pd
 from tabulate import tabulate
 
@@ -108,10 +109,13 @@ def __make_shopfloor_stats__(self):
 
         shop_floor = []
 
+        jobs = self.statistics.jobs(predicate=predicate)
+
         for weighted_by_priority in [True, False]:
             shop_floor += [dict(
                 weighted_by_priority=weighted_by_priority,
-                total_jobs=len(self.statistics.jobs(predicate=predicate)),
+                total_jobs=len(jobs),
+                completed_jobs=len([job for job in jobs if job.is_completed]),
                 makespan=self.statistics.total_make_span(predicate=predicate),
                 flow_time=self.statistics.total_flow_time(
                     weighted_by_priority=weighted_by_priority, predicate=predicate

diff --git a/diploma_thesis/job_sampler/dynamic/cli.py b/diploma_thesis/job_sampler/dynamic/cli.py
@@ -81,8 +81,7 @@ def arrival_time_sampler_from_cli(
             case 'sampler':
                 sampler = numeric_sampler_from_cli(parameters['sampler'])
             case 'expected_utilization':
-                arrival_time = mean_processing_time * parameters['value']
-                arrival_time /= number_of_machines
+                arrival_time = mean_processing_time / (number_of_machines * parameters['value'])
 
                 sampler = Exponential(arrival_time)
             case 'even_arrival_time':

diff --git a/diploma_thesis/simulator/__init__.py b/diploma_thesis/simulator/__init__.py
@@ -5,7 +5,7 @@
 from .configuration import run_configuration_from_cli, evaluate_configuration_from_cli
 from .episodic import EpisodicSimulator
 from .simulation import Simulation
-from .simulator import Simulator
+from .simulator import Simulator, RewardCache
 from .td import TDSimulator
 
 key_to_class = {

diff --git a/diploma_thesis/simulator/configuration.py b/diploma_thesis/simulator/configuration.py
@@ -40,7 +40,7 @@ def from_cli(parameters: Dict):
     timeline: TimelineSchedule
     machine_train_schedule: TrainSchedule
     work_center_train_schedule: TrainSchedule
-    n_workers: int = 4
+    n_workers: int = 1
     simulations: List[Simulation] = field(default_factory=list)
 
     @classmethod

diff --git a/diploma_thesis/simulator/simulation/simulation.py b/diploma_thesis/simulator/simulation/simulation.py
@@ -26,6 +26,10 @@ def __init__(self, index: int, name: str, logger: Logger, configuration: Configu
         self.shop_floor: ShopFloor = None
         self.configuration = configuration
 
+    @property
+    def simulation_index(self):
+        return self.index
+
     @property
     def simulation_id(self):
         return self.name

diff --git a/diploma_thesis/simulator/simulator.py b/diploma_thesis/simulator/simulator.py
@@ -199,6 +199,7 @@ def did_prepare_work_center_record(
 
     def schedule(self, context: Context, machine: Machine) -> Job | WaitInfo:
         parameters = MachineInput(machine, context.moment)
+
         result = self.machine.schedule(parameters)
 
         if self.machine.is_trainable:

diff --git a/diploma_thesis/tape/machine/global_tardiness_reward.py b/diploma_thesis/tape/machine/global_tardiness_reward.py
@@ -13,6 +13,9 @@ class GlobalTardiness(MachineReward):
     Reward from Deep-MARL external/PhD-Thesis-Projects/JSP/machine.py:693
     """
 
+    def __init__(self, span: int = 256):
+        self.span = span
+
     def record_job_action(self, job: Job, machine: Machine) -> Context:
         return Context(job.current_step_idx, job, machine)
 
@@ -25,7 +28,7 @@ def reward_after_completion(self, contexts: List[Context]):
         reward = torch.zeros_like(work_center_idx, dtype=torch.float)
 
         if contexts[0].job.is_tardy_upon_completion:
-            tardy_rate = - torch.clip(contexts[0].job.tardiness_upon_completion / 256, 0, 1)
+            tardy_rate = - torch.clip(contexts[0].job.tardiness_upon_completion / self.span, 0, 1)
 
             reward += tardy_rate
 
@@ -36,4 +39,4 @@ def reward_after_completion(self, contexts: List[Context]):
 
     @staticmethod
     def from_cli(parameters) -> MachineReward:
-        return GlobalTardiness()
+        return GlobalTardiness(span=parameters.get('span', 256))