Implement tape to record reward and state

yura-hb · Feb 14, 2024 · c31bae9 · c31bae9
1 parent 9a77d11
commit c31bae9
Show file tree

Hide file tree

Showing 80 changed files with 834 additions and 293 deletions.
diff --git a/diploma_thesis/agents/base/agent.py b/diploma_thesis/agents/base/agent.py
@@ -6,9 +6,12 @@
 from agents.utils.memory import Memory, Record
 from .encoder import Encoder as StateEncoder, Input, State
 from .model import Model, Action, Result
+from typing import TypeVar, Generic
 
+Key = TypeVar('Key')
 
-class Agent(Loggable, PhaseUpdatable, metaclass=ABCMeta):
+
+class Agent(Generic[Key], Loggable, PhaseUpdatable, metaclass=ABCMeta):
 
     def __init__(self,
                  model: Model[Input, State, Action, Result],
@@ -39,13 +42,13 @@ def update(self, phase: Phase):
     @property
     @abstractmethod
     def is_trainable(self):
-        pass
+        return self.phase != EvaluationPhase()
 
     @abstractmethod
     def train_step(self):
         pass
 
-    def store(self, record: Record):
+    def store(self, key: Key, record: Record):
         pass
 
     def schedule(self, parameters: Input) -> Model.Record:

diff --git a/diploma_thesis/agents/machine/__init__.py b/diploma_thesis/agents/machine/__init__.py
@@ -1,7 +1,7 @@
+from .utils import Input as MachineInput
 from .dqn import DeepQAgent
 from .machine import Machine
 from .static import StaticMachine
-from .utils import Input as MachineInput
 
 key_to_class = {
     "static": StaticMachine,

diff --git a/diploma_thesis/agents/machine/dqn.py b/diploma_thesis/agents/machine/dqn.py
@@ -7,7 +7,7 @@
 class DeepQAgent(Machine):
 
     def is_trainable(self):
-        return True
+        return super().is_trainable()
 
     def train_step(self):
         pass
@@ -18,5 +18,5 @@ def from_cli(parameters: Dict):
         encoder = state_encoder_from_cli(parameters['encoder'])
         memory = memory_from_cli(parameters['memory'])
 
-        return DeepQAgent(model=model, state_encoder=encoder, memory=None)
+        return DeepQAgent(model=model, state_encoder=encoder, memory=memory)
 
diff --git a/diploma_thesis/agents/machine/machine.py b/diploma_thesis/agents/machine/machine.py
@@ -1,11 +1,11 @@
 from abc import ABCMeta
 
 from agents.base.agent import Agent
-
 from .model import MachineModel, from_cli as model_from_cli
 from .state import StateEncoder, from_cli as state_encoder_from_cli
 from agents.utils.memory import Memory, from_cli as memory_from_cli
+from environment import MachineKey
 
 
-class Machine(Agent, metaclass=ABCMeta):
+class Machine(Agent[MachineKey], metaclass=ABCMeta):
     pass
diff --git a/diploma_thesis/reward/machine/__init__.py → diploma_thesis/agents/machine/mddqn.py b/diploma_thesis/reward/machine/__init__.py → diploma_thesis/agents/machine/mddqn.py
diff --git a/diploma_thesis/agents/machine/model/rule/atc.py b/diploma_thesis/agents/machine/model/rule/atc.py
@@ -1,5 +1,3 @@
-import torch
-
 from .scheduling_rule import *
 
 
@@ -10,6 +8,7 @@ class ATCSchedulingRule(SchedulingRule):
     Source: https://www.jstor.org/stable/2632177
     """
 
+    @property
     def selector(self):
         return torch.argmax
 
@@ -27,4 +26,4 @@ def criterion(self, machine: Machine, now: float) -> torch.FloatTensor:
         priority = torch.exp(-slack / (0.05 * torch.mean(processing_times)))
         priority /= processing_times
 
-        return priority
+        return priority
diff --git a/diploma_thesis/agents/machine/model/rule/avpro.py b/diploma_thesis/agents/machine/model/rule/avpro.py
@@ -6,6 +6,7 @@ class AVPROSchedulingRule(SchedulingRule):
     Average Processing Time per Operation scheduling rule
     """
 
+    @property
     def selector(self):
         return torch.argmin
 

diff --git a/diploma_thesis/agents/machine/model/rule/covert.py b/diploma_thesis/agents/machine/model/rule/covert.py
@@ -7,6 +7,7 @@ class COVERTSchedulingRule(SchedulingRule):
     We assume that the cost is
     """
 
+    @property
     def selector(self):
         return torch.argmax
 

diff --git a/diploma_thesis/agents/machine/model/rule/cr.py b/diploma_thesis/agents/machine/model/rule/cr.py
@@ -7,6 +7,7 @@ class CRSchedulingRule(SchedulingRule):
     processing time
     """
 
+    @property
     def selector(self):
         return torch.argmin
 

diff --git a/diploma_thesis/agents/machine/model/rule/crspt.py b/diploma_thesis/agents/machine/model/rule/crspt.py
@@ -7,6 +7,7 @@ class CRSPTSchedulingRule(SchedulingRule):
     the rule selects jobs with the lowest ratio of due time to remaining processing time and current operation time
     """
 
+    @property
     def selector(self):
         return torch.argmin
 

diff --git a/diploma_thesis/agents/machine/model/rule/dptlwkr.py b/diploma_thesis/agents/machine/model/rule/dptlwkr.py
@@ -6,6 +6,7 @@ class DPTLWKRSchedulingRule(SchedulingRule):
     Double Processing Time + Least Work Remaining
     """
 
+    @property
     def selector(self):
         return torch.argmin
 

diff --git a/diploma_thesis/agents/machine/model/rule/dptlwkrs.py b/diploma_thesis/agents/machine/model/rule/dptlwkrs.py
@@ -6,6 +6,7 @@ class DPTLWKRSSchedulingRule(SchedulingRule):
     Double Processing Time + Least Work Remaining + Slack
     """
 
+    @property
     def selector(self):
         return torch.argmin
 

diff --git a/diploma_thesis/agents/machine/model/rule/dptwinqnpt.py b/diploma_thesis/agents/machine/model/rule/dptwinqnpt.py
@@ -6,6 +6,7 @@ class DPTWINQNPTSchedulingRule(SchedulingRule):
     Double Processing Time + Work In Next Queue + Next Processing Time
     """
 
+    @property
     def selector(self):
         return torch.argmin
 

diff --git a/diploma_thesis/agents/machine/model/rule/edd.py b/diploma_thesis/agents/machine/model/rule/edd.py
@@ -6,6 +6,7 @@ class EDDSchedulingRule(SchedulingRule):
     Earliest Due Date rule, i.e. selects jobs, in which due at is the shortest
     """
 
+    @property
     def selector(self):
         return torch.argmin
 

diff --git a/diploma_thesis/agents/machine/model/rule/fifo.py b/diploma_thesis/agents/machine/model/rule/fifo.py
@@ -2,7 +2,7 @@
 
 
 class FIFOSchedulingRule(SchedulingRule):
-
+    @property
     def selector(self):
         return lambda _: 0
 

diff --git a/diploma_thesis/agents/machine/model/rule/gp_1.py b/diploma_thesis/agents/machine/model/rule/gp_1.py
@@ -6,6 +6,7 @@ class GP1SchedulingRule(SchedulingRule):
     Genetic Programming 1 scheduling rule. Taken from external/PhD-Thesis-Projects/FJSP/sequencing.py
     """
 
+    @property
     def selector(self):
         return torch.argmin
 

diff --git a/diploma_thesis/agents/machine/model/rule/gp_2.py b/diploma_thesis/agents/machine/model/rule/gp_2.py
@@ -6,6 +6,7 @@ class GP2SchedulingRule(SchedulingRule):
     Genetic Programming 2 scheduling rule. Taken from external/PhD-Thesis-Projects/FJSP/sequencing.py
     """
 
+    @property
     def selector(self):
         return torch.argmin
 

diff --git a/diploma_thesis/agents/machine/model/rule/lifo.py b/diploma_thesis/agents/machine/model/rule/lifo.py
@@ -2,7 +2,7 @@
 
 
 class LIFOSchedulingRule(SchedulingRule):
-
+    @property
     def selector(self):
         return lambda _: -1
 

diff --git a/diploma_thesis/agents/machine/model/rule/lpt.py b/diploma_thesis/agents/machine/model/rule/lpt.py
@@ -6,6 +6,7 @@ class LPTSchedulingRule(SchedulingRule):
     Longest Processing Time rule, i.e. selects jobs, in which current operation has the largest operation time
     """
 
+    @property
     def selector(self):
         return torch.argmax
 

diff --git a/diploma_thesis/agents/machine/model/rule/lro.py b/diploma_thesis/agents/machine/model/rule/lro.py
@@ -6,6 +6,7 @@ class LROSchedulingRule(SchedulingRule):
     Least Remaining Operations rule, i.e. selects jobs, which has the smallest number of remaining operations
     """
 
+    @property
     def selector(self):
         return torch.argmin
 

diff --git a/diploma_thesis/agents/machine/model/rule/lwkr.py b/diploma_thesis/agents/machine/model/rule/lwkr.py
@@ -6,6 +6,7 @@ class LWRKSchedulingRule(SchedulingRule):
     Least Work Remaining rule, i.e. selects jobs, in which the remaining time of the job is the smallest.
     """
 
+    @property
     def selector(self):
         return torch.argmin
 

diff --git a/diploma_thesis/agents/machine/model/rule/lwkrmod.py b/diploma_thesis/agents/machine/model/rule/lwkrmod.py
@@ -6,6 +6,7 @@ class LWRKMODSchedulingRule(SchedulingRule):
     Least Work Remaining + Modified Operational Due date rule. Check implementation of the rules separately.
     """
 
+    @property
     def selector(self):
         return torch.argmin
 

diff --git a/diploma_thesis/agents/machine/model/rule/lwkrspt.py b/diploma_thesis/agents/machine/model/rule/lwkrspt.py
@@ -7,6 +7,7 @@ class LWRKSPTSchedulingRule(SchedulingRule):
         i.e. selects jobs, in which satisfy both criteria (for reference check lwrk.py and spt.py)
     """
 
+    @property
     def selector(self):
         return torch.argmin
 

diff --git a/diploma_thesis/agents/machine/model/rule/mdd.py b/diploma_thesis/agents/machine/model/rule/mdd.py
@@ -7,6 +7,7 @@ class MDDSchedulingRule(SchedulingRule):
         selects job with the smallest value of max(due_at, operation_completed_at)
     """
 
+    @property
     def selector(self):
         return torch.argmin
 

diff --git a/diploma_thesis/agents/machine/model/rule/mod.py b/diploma_thesis/agents/machine/model/rule/mod.py
@@ -6,6 +6,7 @@ class MODSchedulingRule(SchedulingRule):
     Modified operational due date
     """
 
+    @property
     def selector(self):
         return torch.argmin
 

diff --git a/diploma_thesis/agents/machine/model/rule/mon.py b/diploma_thesis/agents/machine/model/rule/mon.py
@@ -7,6 +7,7 @@ class MONSchedulingRule(SchedulingRule):
         i.e. SPT + additional slack factor
     """
 
+    @property
     def selector(self):
         return torch.argmax
 

diff --git a/diploma_thesis/agents/machine/model/rule/ms.py b/diploma_thesis/agents/machine/model/rule/ms.py
@@ -6,6 +6,7 @@ class MSSchedulingRule(SchedulingRule):
     Minimum Slack scheduling rule, i.e. the rule selects jobs with the minimum slack
     """
 
+    @property
     def selector(self):
         return torch.argmin
 

diff --git a/diploma_thesis/agents/machine/model/rule/npt.py b/diploma_thesis/agents/machine/model/rule/npt.py
@@ -7,6 +7,7 @@ class NPTSchedulingRule(SchedulingRule):
     the smallest
     """
 
+    @property
     def selector(self):
         return torch.argmin
 

diff --git a/diploma_thesis/agents/machine/model/rule/ptwinq.py b/diploma_thesis/agents/machine/model/rule/ptwinq.py
@@ -6,6 +6,7 @@ class PTWINQSchedulingRule(SchedulingRule):
     Processing Time + Work In Next Queue
     """
 
+    @property
     def selector(self):
         return torch.argmin
 

diff --git a/diploma_thesis/agents/machine/model/rule/ptwinqs.py b/diploma_thesis/agents/machine/model/rule/ptwinqs.py
@@ -6,6 +6,7 @@ class PTWINQSSchedulingRule(SchedulingRule):
     Processing Time + Work In Next Queue + Slack
     """
 
+    @property
     def selector(self):
         return torch.argmin
 

diff --git a/diploma_thesis/agents/machine/model/rule/random.py b/diploma_thesis/agents/machine/model/rule/random.py
@@ -4,7 +4,7 @@
 
 
 class RandomSchedulingRule(SchedulingRule):
-
+    @property
     def selector(self):
         return lambda x: torch.randint(0, len(x), (1,))
 

diff --git a/diploma_thesis/agents/machine/model/rule/scheduling_rule.py b/diploma_thesis/agents/machine/model/rule/scheduling_rule.py
@@ -12,7 +12,8 @@ def __init__(self, reduction_strategy: JobReductionStrategy = JobReductionStrate
 
     def __call__(self, machine: 'Machine', now: float) -> Job | WaitInfo:
         value = self.criterion(machine, now)
-        idx = self.selector(value)
+        selector = self.selector
+        idx = selector(value)
 
         return machine.queue[idx]
 

diff --git a/diploma_thesis/agents/machine/model/rule/spmwk.py b/diploma_thesis/agents/machine/model/rule/spmwk.py
@@ -6,6 +6,7 @@ class SPMWKSchedulingRule(SchedulingRule):
     Slack per Remaining Work scheduling rule
     """
 
+    @property
     def selector(self):
         return torch.argmin
 

diff --git a/diploma_thesis/agents/machine/model/rule/spmwkspt.py b/diploma_thesis/agents/machine/model/rule/spmwkspt.py
@@ -6,6 +6,7 @@ class SPMWKSPTSchedulingRule(SchedulingRule):
     Slack per Remaining Work + Shortest Processing Time scheduling rule
     """
 
+    @property
     def selector(self):
         return torch.argmin
 

diff --git a/diploma_thesis/agents/machine/model/rule/spt.py b/diploma_thesis/agents/machine/model/rule/spt.py
@@ -6,6 +6,7 @@ class SPTSchedulingRule(SchedulingRule):
     Shortest Processing Time rule, i.e. selects jobs, in which current operation has the smallest operation time
     """
 
+    @property
     def selector(self):
         return torch.argmin
 

diff --git a/diploma_thesis/agents/machine/model/rule/winq.py b/diploma_thesis/agents/machine/model/rule/winq.py
@@ -6,6 +6,7 @@ class WINQSchedulingRule(SchedulingRule):
     Work In Next Queue scheduling rule
     """
 
+    @property
     def selector(self):
         return torch.argmin
 

diff --git a/diploma_thesis/agents/machine/state/deep_marl_indirect.py b/diploma_thesis/agents/machine/state/deep_marl_indirect.py
@@ -35,7 +35,7 @@ def encode(self, parameters: StateEncoder.Input) -> State:
         state = torch.hstack(state).reshape(-1)
         state = torch.nan_to_num(state, nan=0.0, posinf=1, neginf=-1)
 
-        return self.State(state)
+        return self.State(state, batch_size=[])
 
     def __make_job_number_state__(self, parameters: StateEncoder.Input):
         state = [

diff --git a/diploma_thesis/agents/machine/state/deep_marl_mr.py b/diploma_thesis/agents/machine/state/deep_marl_mr.py
@@ -39,7 +39,7 @@ def encode(self, parameters: StateEncoder.Input) -> State:
 
         state = torch.vstack([state, arriving_job_state])
 
-        return self.State(state, job_idx)
+        return self.State(state, job_idx, batch_size=[])
 
     def __make_initial_state(self, parameters: StateEncoder.Input) -> torch.FloatTensor:
         machine = parameters.machine

diff --git a/diploma_thesis/agents/machine/static.py b/diploma_thesis/agents/machine/static.py
@@ -20,4 +20,4 @@ def from_cli(parameters: Dict):
         model = model_from_cli(parameters['model'])
         encoder = state_encoder_from_cli(parameters['encoder'])
 
-        return StaticMachine(model=model, state_encoder=encoder)
+        return StaticMachine(model=model, state_encoder=encoder)
diff --git a/diploma_thesis/agents/utils/action/thompson_sampling.py b/diploma_thesis/agents/utils/action/thompson_sampling.py
diff --git a/diploma_thesis/agents/utils/memory/memory.py b/diploma_thesis/agents/utils/memory/memory.py
@@ -1,11 +1,10 @@
 
-import torch
-
-from torchrl.data import TensorDictReplayBuffer
-from abc import ABCMeta, abstractmethod
-from tensordict.prototype import tensorclass
+from abc import ABCMeta
 from typing import TypeVar
 
+import torch
+from tensordict.prototype import tensorclass
+from torchrl.data import TensorDictReplayBuffer
 
 State = TypeVar('State')
 Action = TypeVar('Action')
@@ -34,6 +33,5 @@ def sample(self) -> Record:
     def sample_n(self, batch_size: int) -> Record:
         return self.buffer.sample(batch_size=batch_size)
 
-    @abstractmethod
     def __len__(self) -> int:
-        pass
+        return len(self.buffer)
diff --git a/diploma_thesis/agents/utils/memory/replay_memory.py b/diploma_thesis/agents/utils/memory/replay_memory.py
@@ -19,4 +19,4 @@ def __init__(self,
     def from_cli(parameters: Dict) -> 'ReplayMemory':
         return ReplayMemory(size=parameters['size'],
                             batch_size=parameters['batch_size'],
-                            prefetch=parameters['prefetch'])
+                            prefetch=parameters.get('prefetch', 1))