Skip to content

Commit

Permalink
Implement tape to record reward and state
Browse files Browse the repository at this point in the history
  • Loading branch information
yura-hb committed Feb 14, 2024
1 parent 9a77d11 commit c31bae9
Show file tree
Hide file tree
Showing 80 changed files with 834 additions and 293 deletions.
9 changes: 6 additions & 3 deletions diploma_thesis/agents/base/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,12 @@
from agents.utils.memory import Memory, Record
from .encoder import Encoder as StateEncoder, Input, State
from .model import Model, Action, Result
from typing import TypeVar, Generic

Key = TypeVar('Key')

class Agent(Loggable, PhaseUpdatable, metaclass=ABCMeta):

class Agent(Generic[Key], Loggable, PhaseUpdatable, metaclass=ABCMeta):

def __init__(self,
model: Model[Input, State, Action, Result],
Expand Down Expand Up @@ -39,13 +42,13 @@ def update(self, phase: Phase):
@property
@abstractmethod
def is_trainable(self):
pass
return self.phase != EvaluationPhase()

@abstractmethod
def train_step(self):
pass

def store(self, record: Record):
def store(self, key: Key, record: Record):
pass

def schedule(self, parameters: Input) -> Model.Record:
Expand Down
2 changes: 1 addition & 1 deletion diploma_thesis/agents/machine/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from .utils import Input as MachineInput
from .dqn import DeepQAgent
from .machine import Machine
from .static import StaticMachine
from .utils import Input as MachineInput

key_to_class = {
"static": StaticMachine,
Expand Down
4 changes: 2 additions & 2 deletions diploma_thesis/agents/machine/dqn.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
class DeepQAgent(Machine):

def is_trainable(self):
return True
return super().is_trainable()

def train_step(self):
pass
Expand All @@ -18,5 +18,5 @@ def from_cli(parameters: Dict):
encoder = state_encoder_from_cli(parameters['encoder'])
memory = memory_from_cli(parameters['memory'])

return DeepQAgent(model=model, state_encoder=encoder, memory=None)
return DeepQAgent(model=model, state_encoder=encoder, memory=memory)

4 changes: 2 additions & 2 deletions diploma_thesis/agents/machine/machine.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
from abc import ABCMeta

from agents.base.agent import Agent

from .model import MachineModel, from_cli as model_from_cli
from .state import StateEncoder, from_cli as state_encoder_from_cli
from agents.utils.memory import Memory, from_cli as memory_from_cli
from environment import MachineKey


class Machine(Agent, metaclass=ABCMeta):
class Machine(Agent[MachineKey], metaclass=ABCMeta):
pass
File renamed without changes.
5 changes: 2 additions & 3 deletions diploma_thesis/agents/machine/model/rule/atc.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import torch

from .scheduling_rule import *


Expand All @@ -10,6 +8,7 @@ class ATCSchedulingRule(SchedulingRule):
Source: https://www.jstor.org/stable/2632177
"""

@property
def selector(self):
return torch.argmax

Expand All @@ -27,4 +26,4 @@ def criterion(self, machine: Machine, now: float) -> torch.FloatTensor:
priority = torch.exp(-slack / (0.05 * torch.mean(processing_times)))
priority /= processing_times

return priority
return priority
1 change: 1 addition & 0 deletions diploma_thesis/agents/machine/model/rule/avpro.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ class AVPROSchedulingRule(SchedulingRule):
Average Processing Time per Operation scheduling rule
"""

@property
def selector(self):
return torch.argmin

Expand Down
1 change: 1 addition & 0 deletions diploma_thesis/agents/machine/model/rule/covert.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ class COVERTSchedulingRule(SchedulingRule):
We assume that the cost is
"""

@property
def selector(self):
return torch.argmax

Expand Down
1 change: 1 addition & 0 deletions diploma_thesis/agents/machine/model/rule/cr.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ class CRSchedulingRule(SchedulingRule):
processing time
"""

@property
def selector(self):
return torch.argmin

Expand Down
1 change: 1 addition & 0 deletions diploma_thesis/agents/machine/model/rule/crspt.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ class CRSPTSchedulingRule(SchedulingRule):
the rule selects jobs with the lowest ratio of due time to remaining processing time and current operation time
"""

@property
def selector(self):
return torch.argmin

Expand Down
1 change: 1 addition & 0 deletions diploma_thesis/agents/machine/model/rule/dptlwkr.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ class DPTLWKRSchedulingRule(SchedulingRule):
Double Processing Time + Least Work Remaining
"""

@property
def selector(self):
return torch.argmin

Expand Down
1 change: 1 addition & 0 deletions diploma_thesis/agents/machine/model/rule/dptlwkrs.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ class DPTLWKRSSchedulingRule(SchedulingRule):
Double Processing Time + Least Work Remaining + Slack
"""

@property
def selector(self):
return torch.argmin

Expand Down
1 change: 1 addition & 0 deletions diploma_thesis/agents/machine/model/rule/dptwinqnpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ class DPTWINQNPTSchedulingRule(SchedulingRule):
Double Processing Time + Work In Next Queue + Next Processing Time
"""

@property
def selector(self):
return torch.argmin

Expand Down
1 change: 1 addition & 0 deletions diploma_thesis/agents/machine/model/rule/edd.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ class EDDSchedulingRule(SchedulingRule):
Earliest Due Date rule, i.e. selects jobs, in which due at is the shortest
"""

@property
def selector(self):
return torch.argmin

Expand Down
2 changes: 1 addition & 1 deletion diploma_thesis/agents/machine/model/rule/fifo.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@


class FIFOSchedulingRule(SchedulingRule):

@property
def selector(self):
return lambda _: 0

Expand Down
1 change: 1 addition & 0 deletions diploma_thesis/agents/machine/model/rule/gp_1.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ class GP1SchedulingRule(SchedulingRule):
Genetic Programming 1 scheduling rule. Taken from external/PhD-Thesis-Projects/FJSP/sequencing.py
"""

@property
def selector(self):
return torch.argmin

Expand Down
1 change: 1 addition & 0 deletions diploma_thesis/agents/machine/model/rule/gp_2.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ class GP2SchedulingRule(SchedulingRule):
Genetic Programming 2 scheduling rule. Taken from external/PhD-Thesis-Projects/FJSP/sequencing.py
"""

@property
def selector(self):
return torch.argmin

Expand Down
2 changes: 1 addition & 1 deletion diploma_thesis/agents/machine/model/rule/lifo.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@


class LIFOSchedulingRule(SchedulingRule):

@property
def selector(self):
return lambda _: -1

Expand Down
1 change: 1 addition & 0 deletions diploma_thesis/agents/machine/model/rule/lpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ class LPTSchedulingRule(SchedulingRule):
Longest Processing Time rule, i.e. selects jobs, in which current operation has the largest operation time
"""

@property
def selector(self):
return torch.argmax

Expand Down
1 change: 1 addition & 0 deletions diploma_thesis/agents/machine/model/rule/lro.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ class LROSchedulingRule(SchedulingRule):
Least Remaining Operations rule, i.e. selects jobs, which has the smallest number of remaining operations
"""

@property
def selector(self):
return torch.argmin

Expand Down
1 change: 1 addition & 0 deletions diploma_thesis/agents/machine/model/rule/lwkr.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ class LWRKSchedulingRule(SchedulingRule):
Least Work Remaining rule, i.e. selects jobs, in which the remaining time of the job is the smallest.
"""

@property
def selector(self):
return torch.argmin

Expand Down
1 change: 1 addition & 0 deletions diploma_thesis/agents/machine/model/rule/lwkrmod.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ class LWRKMODSchedulingRule(SchedulingRule):
Least Work Remaining + Modified Operational Due date rule. Check implementation of the rules separately.
"""

@property
def selector(self):
return torch.argmin

Expand Down
1 change: 1 addition & 0 deletions diploma_thesis/agents/machine/model/rule/lwkrspt.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ class LWRKSPTSchedulingRule(SchedulingRule):
i.e. selects jobs, in which satisfy both criteria (for reference check lwrk.py and spt.py)
"""

@property
def selector(self):
return torch.argmin

Expand Down
1 change: 1 addition & 0 deletions diploma_thesis/agents/machine/model/rule/mdd.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ class MDDSchedulingRule(SchedulingRule):
selects job with the smallest value of max(due_at, operation_completed_at)
"""

@property
def selector(self):
return torch.argmin

Expand Down
1 change: 1 addition & 0 deletions diploma_thesis/agents/machine/model/rule/mod.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ class MODSchedulingRule(SchedulingRule):
Modified operational due date
"""

@property
def selector(self):
return torch.argmin

Expand Down
1 change: 1 addition & 0 deletions diploma_thesis/agents/machine/model/rule/mon.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ class MONSchedulingRule(SchedulingRule):
i.e. SPT + additional slack factor
"""

@property
def selector(self):
return torch.argmax

Expand Down
1 change: 1 addition & 0 deletions diploma_thesis/agents/machine/model/rule/ms.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ class MSSchedulingRule(SchedulingRule):
Minimum Slack scheduling rule, i.e. the rule selects jobs with the minimum slack
"""

@property
def selector(self):
return torch.argmin

Expand Down
1 change: 1 addition & 0 deletions diploma_thesis/agents/machine/model/rule/npt.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ class NPTSchedulingRule(SchedulingRule):
the smallest
"""

@property
def selector(self):
return torch.argmin

Expand Down
1 change: 1 addition & 0 deletions diploma_thesis/agents/machine/model/rule/ptwinq.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ class PTWINQSchedulingRule(SchedulingRule):
Processing Time + Work In Next Queue
"""

@property
def selector(self):
return torch.argmin

Expand Down
1 change: 1 addition & 0 deletions diploma_thesis/agents/machine/model/rule/ptwinqs.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ class PTWINQSSchedulingRule(SchedulingRule):
Processing Time + Work In Next Queue + Slack
"""

@property
def selector(self):
return torch.argmin

Expand Down
2 changes: 1 addition & 1 deletion diploma_thesis/agents/machine/model/rule/random.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@


class RandomSchedulingRule(SchedulingRule):

@property
def selector(self):
return lambda x: torch.randint(0, len(x), (1,))

Expand Down
3 changes: 2 additions & 1 deletion diploma_thesis/agents/machine/model/rule/scheduling_rule.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ def __init__(self, reduction_strategy: JobReductionStrategy = JobReductionStrate

def __call__(self, machine: 'Machine', now: float) -> Job | WaitInfo:
value = self.criterion(machine, now)
idx = self.selector(value)
selector = self.selector
idx = selector(value)

return machine.queue[idx]

Expand Down
1 change: 1 addition & 0 deletions diploma_thesis/agents/machine/model/rule/spmwk.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ class SPMWKSchedulingRule(SchedulingRule):
Slack per Remaining Work scheduling rule
"""

@property
def selector(self):
return torch.argmin

Expand Down
1 change: 1 addition & 0 deletions diploma_thesis/agents/machine/model/rule/spmwkspt.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ class SPMWKSPTSchedulingRule(SchedulingRule):
Slack per Remaining Work + Shortest Processing Time scheduling rule
"""

@property
def selector(self):
return torch.argmin

Expand Down
1 change: 1 addition & 0 deletions diploma_thesis/agents/machine/model/rule/spt.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ class SPTSchedulingRule(SchedulingRule):
Shortest Processing Time rule, i.e. selects jobs, in which current operation has the smallest operation time
"""

@property
def selector(self):
return torch.argmin

Expand Down
1 change: 1 addition & 0 deletions diploma_thesis/agents/machine/model/rule/winq.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ class WINQSchedulingRule(SchedulingRule):
Work In Next Queue scheduling rule
"""

@property
def selector(self):
return torch.argmin

Expand Down
2 changes: 1 addition & 1 deletion diploma_thesis/agents/machine/state/deep_marl_indirect.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def encode(self, parameters: StateEncoder.Input) -> State:
state = torch.hstack(state).reshape(-1)
state = torch.nan_to_num(state, nan=0.0, posinf=1, neginf=-1)

return self.State(state)
return self.State(state, batch_size=[])

def __make_job_number_state__(self, parameters: StateEncoder.Input):
state = [
Expand Down
2 changes: 1 addition & 1 deletion diploma_thesis/agents/machine/state/deep_marl_mr.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def encode(self, parameters: StateEncoder.Input) -> State:

state = torch.vstack([state, arriving_job_state])

return self.State(state, job_idx)
return self.State(state, job_idx, batch_size=[])

def __make_initial_state(self, parameters: StateEncoder.Input) -> torch.FloatTensor:
machine = parameters.machine
Expand Down
2 changes: 1 addition & 1 deletion diploma_thesis/agents/machine/static.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,4 @@ def from_cli(parameters: Dict):
model = model_from_cli(parameters['model'])
encoder = state_encoder_from_cli(parameters['encoder'])

return StaticMachine(model=model, state_encoder=encoder)
return StaticMachine(model=model, state_encoder=encoder)
2 changes: 0 additions & 2 deletions diploma_thesis/agents/utils/action/thompson_sampling.py

This file was deleted.

12 changes: 5 additions & 7 deletions diploma_thesis/agents/utils/memory/memory.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@

import torch

from torchrl.data import TensorDictReplayBuffer
from abc import ABCMeta, abstractmethod
from tensordict.prototype import tensorclass
from abc import ABCMeta
from typing import TypeVar

import torch
from tensordict.prototype import tensorclass
from torchrl.data import TensorDictReplayBuffer

State = TypeVar('State')
Action = TypeVar('Action')
Expand Down Expand Up @@ -34,6 +33,5 @@ def sample(self) -> Record:
def sample_n(self, batch_size: int) -> Record:
return self.buffer.sample(batch_size=batch_size)

@abstractmethod
def __len__(self) -> int:
pass
return len(self.buffer)
2 changes: 1 addition & 1 deletion diploma_thesis/agents/utils/memory/replay_memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,4 @@ def __init__(self,
def from_cli(parameters: Dict) -> 'ReplayMemory':
return ReplayMemory(size=parameters['size'],
batch_size=parameters['batch_size'],
prefetch=parameters['prefetch'])
prefetch=parameters.get('prefetch', 1))
Loading

0 comments on commit c31bae9

Please sign in to comment.