Skip to content

Commit

Permalink
Fixes for new reward & rl models
Browse files Browse the repository at this point in the history
  • Loading branch information
yura-hb committed Feb 23, 2024
1 parent ac63708 commit 7156a83
Show file tree
Hide file tree
Showing 23 changed files with 104 additions and 48 deletions.
6 changes: 3 additions & 3 deletions diploma_thesis/agents/machine/model/model.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@

from environment import Job, WaitInfo
from environment import Job
from agents.base.model import *
from agents.machine import MachineInput


class MachineModel(Model[MachineInput, State, Action, Job | WaitInfo], metaclass=ABCMeta):
class MachineModel(Model[MachineInput, State, Action, Job | None], metaclass=ABCMeta):

Input = MachineInput


class NNMachineModel(Model[MachineInput, State, Action, Job | WaitInfo], metaclass=ABCMeta):
class NNMachineModel(Model[MachineInput, State, Action, Job | None], metaclass=ABCMeta):

Input = MachineInput
20 changes: 18 additions & 2 deletions diploma_thesis/agents/machine/model/multi_rule_linear.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,16 @@
from agents.utils import DeepRule
from typing import List

from agents.utils import DeepRule, NNCLI, ActionSelector
from .model import *
from .rule import ALL_SCHEDULING_RULES, SchedulingRule


class MultiRuleLinear(NNMachineModel, DeepRule):

def __init__(self, rules: List[SchedulingRule], model: NNCLI, action_selector: ActionSelector):
NNMachineModel.__init__(self)
DeepRule.__init__(self, rules, model, action_selector)

def __call__(self, state: State, parameters: MachineModel.Input) -> MachineModel.Record:
return DeepRule.__call__(self, state, parameters)

Expand All @@ -15,4 +21,14 @@ def all_rules(cls):
def make_result(
self, rule: SchedulingRule, parameters: MachineModel.Input, state: State, action: Action
) -> MachineModel.Record:
return MachineModel.Record(result=rule(state, parameters), state=state, action=action)
return MachineModel.Record(
result=rule(machine=parameters.machine, now=parameters.now),
state=state,
action=action
)

def clone(self):
new_model = self.model.clone()

return MultiRuleLinear(rules=self.rules, model=new_model, action_selector=self.action_selector)

Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import torch

from abc import ABCMeta, abstractmethod
from environment import Machine, Job, WaitInfo, JobReductionStrategy
from environment import Machine, Job, JobReductionStrategy


class SchedulingRule(metaclass=ABCMeta):
Expand Down
5 changes: 2 additions & 3 deletions diploma_thesis/agents/machine/rl.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,11 @@ def is_trainable(self):
return True

@filter(lambda self: self.phase == TrainingPhase())
@filter(lambda self: len(self.memory) > 0)
def train_step(self):
self.trainer.train_step(self.model)

@filter(lambda self: self.phase == TrainingPhase())
def store(self, record: Record):
@filter(lambda self, *args: self.phase == TrainingPhase())
def store(self, key: MachineKey, record: Record):
self.trainer.store(record)

def schedule(self, parameters):
Expand Down
5 changes: 5 additions & 0 deletions diploma_thesis/agents/machine/static.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from typing import Dict

from .machine import *
from ..base.agent import Key
from ..utils.memory import Record


class StaticMachine(Machine):
Expand All @@ -15,6 +17,9 @@ def is_trainable(self):
def train_step(self):
pass

def store(self, key: Key, record: Record):
pass

@staticmethod
def from_cli(parameters: Dict):
model = model_from_cli(parameters['model'])
Expand Down
9 changes: 4 additions & 5 deletions diploma_thesis/agents/utils/model/deep_rule.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ def all_rules(cls):
def make_result(self, rule: Rule, parameters: Input, state: State, action) -> Record:
pass

@abstractmethod
def clone(self):
pass

# Main

def update(self, phase: Phase):
Expand Down Expand Up @@ -68,11 +72,6 @@ def parameters(self, recurse: bool = True):
def copy_parameters(self, other: 'DeepRule', decay: float = 1.0):
self.model.copy_parameters(other.model, decay)

def clone(self):
new_model = self.model.clone()

return DeepRule(rules=self.rules, model=new_model, action_selector=self.action_selector)

# Utilities

@staticmethod
Expand Down
8 changes: 4 additions & 4 deletions diploma_thesis/agents/utils/rl/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@


def from_cli(parameters):
_parameters = parameters
_parameters = parameters['parameters']

memory = memory_from_cli(parameters['memory'])
loss = LossCLI.from_cli(parameters['loss'])
optimizer = OptimizerCLI.from_cli(parameters['optimizer'])
memory = memory_from_cli(_parameters['memory'])
loss = LossCLI.from_cli(_parameters['loss'])
optimizer = OptimizerCLI.from_cli(_parameters['optimizer'])

return partial(_from_cli, key_to_class=key_to_class, memory=memory, loss=loss, optimizer=optimizer)(parameters)
13 changes: 6 additions & 7 deletions diploma_thesis/agents/utils/rl/dqn.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import tensordict
import torch

from utils import filter
from agents.utils.rl.rl import *


Expand Down Expand Up @@ -37,18 +38,16 @@ def __init__(self,
def configure(self, model: NNModel):
super().configure(model)

self.target_model = model.copy()
self.target_model = model.clone()

@filter(lambda self, *args: len(self.memory) > 0)
def train_step(self, model: NNModel):
batch, info = self.memory.sample(return_info=True)
batch: Record | torch.Tensor = torch.squeeze(batch)

with torch.no_grad():
q_values, td_error = self.estimate_q(model, batch)

if not self.optimizer.is_connected:
self.optimizer.connect(model.parameters())

values = model.values(batch.state)
loss = self.loss(values, q_values)

Expand Down Expand Up @@ -83,6 +82,6 @@ def estimate_q(self, model: NNModel, batch: Record | tensordict.TensorDictBase):

return q_values, td_error

@staticmethod
def from_cli(parameters, memory: Memory, loss: LossCLI, optimizer: OptimizerCLI):
return DeepQTrainer(memory, optimizer, loss, DeepQTrainer.Configuration.from_cli(parameters))
@classmethod
def from_cli(cls, parameters, memory: Memory, loss: LossCLI, optimizer: OptimizerCLI):
return cls(memory, optimizer, loss, DeepQTrainer.Configuration.from_cli(parameters))
3 changes: 3 additions & 0 deletions diploma_thesis/agents/utils/rl/rl.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ def __init__(self, memory: Memory, loss: LossCLI, optimizer: OptimizerCLI):
def configure(self, model: NNModel):
self._is_configured = True

if not self.optimizer.is_connected:
self.optimizer.connect(model.parameters())

@property
def is_configured(self):
return self._is_configured
Expand Down
6 changes: 3 additions & 3 deletions diploma_thesis/agents/workcenter/model/model.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
from environment import Job, WaitInfo
from environment import Job, Machine
from agents.base.model import *
from agents.workcenter import WorkCenterInput


class WorkCenterModel(Model[WorkCenterInput, State, Action, Job | WaitInfo], metaclass=ABCMeta):
class WorkCenterModel(Model[WorkCenterInput, State, Action, Machine | None], metaclass=ABCMeta):

Input = WorkCenterInput


class NNWorkCenterModel(Model[NNModel, State, Action, Job | WaitInfo], metaclass=ABCMeta):
class NNWorkCenterModel(Model[NNModel, State, Action, Machine | None], metaclass=ABCMeta):

Input = WorkCenterInput
19 changes: 17 additions & 2 deletions diploma_thesis/agents/workcenter/model/multi_rule_linear.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,16 @@
from agents.utils import DeepRule
from typing import List

from agents.utils import DeepRule, NNCLI, ActionSelector
from .model import *
from .rule import ALL_ROUTING_RULES, RoutingRule


class MultiRuleLinear(NNWorkCenterModel, DeepRule):

def __init__(self, rules: List[RoutingRule], model: NNCLI, action_selector: ActionSelector):
NNWorkCenterModel.__init__(self)
DeepRule.__init__(self, rules, model, action_selector)

def __call__(self, state: State, parameters: WorkCenterModel.Input) -> WorkCenterModel.Record:
return DeepRule.__call__(self, state, parameters)

Expand All @@ -15,4 +21,13 @@ def all_rules(cls):
def make_result(
self, rule: RoutingRule, parameters: WorkCenterModel.Input, state: State, action: Action
) -> WorkCenterModel.Record:
return WorkCenterModel.Record(result=rule(state, parameters), state=state, action=action)
return WorkCenterModel.Record(
result=rule(job=parameters.job, work_center=parameters.work_center),
state=state,
action=action
)

def clone(self):
new_model = self.model.clone()

return MultiRuleLinear(rules=self.rules, model=new_model, action_selector=self.action_selector)
4 changes: 2 additions & 2 deletions diploma_thesis/agents/workcenter/rl.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ def is_trainable(self):
def train_step(self):
self.trainer.train_step(self.model)

@filter(lambda self: self.phase == TrainingPhase())
def store(self, record: Record):
@filter(lambda self, *args: self.phase == TrainingPhase())
def store(self, key: WorkCenterKey, record: Record):
self.trainer.store(record)

def schedule(self, parameters):
Expand Down
5 changes: 5 additions & 0 deletions diploma_thesis/agents/workcenter/static.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

from .model import StaticWorkCenterModel
from .work_center import *
from ..base.agent import Key
from ..utils.memory import Record


class Static(WorkCenter):
Expand All @@ -16,6 +18,9 @@ def is_trainable(self):
def train_step(self):
pass

def store(self, key: Key, record: Record):
pass

@staticmethod
def from_cli(parameters: Dict):
model = model_from_cli(parameters['model'])
Expand Down
Empty file.
2 changes: 1 addition & 1 deletion diploma_thesis/environment/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from .job import Job, JobEvent, ReductionStrategy as JobReductionStrategy
from .configuration import Configuration
from .context import Context
from .agent import Agent, WaitInfo
from .agent import Agent
from .delegate import Delegate
from .machine import Machine, Key as MachineKey, History as MachineHistory
from .work_center import WorkCenter, Key as WorkCenterKey, History as WorkCenterHistory
Expand Down
10 changes: 6 additions & 4 deletions diploma_thesis/environment/job.py
Original file line number Diff line number Diff line change
Expand Up @@ -355,7 +355,8 @@ def wait_time_on_machine(self, step_idx: int):
Returns: The time that the operation has been waiting for processing on machine
"""
assert step_idx < self.current_step_idx, "Operation must be started on machine to compute wait time"
assert step_idx < self.current_step_idx or self.is_completed,\
"Operation must be started on machine to compute wait time"

return self.history.started_at[step_idx] - self.history.arrived_at_machine[step_idx]

Expand Down Expand Up @@ -393,14 +394,15 @@ def time_completion_rate(self, strategy: ReductionStrategy = ReductionStrategy.m
# State Update

def with_event(self, event: Event):
# Clone tensors to avoid in-place modification
match event.kind:
case JobEvent.Kind.dispatch:
self.current_step_idx = torch.tensor(0)
case JobEvent.Kind.forward:
self.current_step_idx += 1
self.current_step_idx = self.current_step_idx.clone() + 1
self.current_machine_idx = torch.tensor(-1)
case JobEvent.Kind.arrival_on_machine:
self.current_machine_idx = event.machine_idx
self.current_machine_idx = event.machine_idx.clone()
case _:
pass

Expand Down Expand Up @@ -465,7 +467,7 @@ def __next_remaining_processing_time__(self, step_idx: int, strategy: ReductionS
Returns: The remaining processing time of the next operation
"""
result = torch.tensor(0.0, dtype=torch.float)
expected_processing_time = self.processing_times[max(self.step_idx + 1, 0):]
expected_processing_time = self.processing_times[max(step_idx + 1, 0):]

if expected_processing_time.numel() == 0:
return result
Expand Down
2 changes: 1 addition & 1 deletion diploma_thesis/environment/shop_floor.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ def forward(self, job: environment.Job, from_: environment.Machine):

# Decision methods

def schedule(self, machine: environment.Machine) -> 'environment.Job | environment.WaitInfo':
def schedule(self, machine: environment.Machine) -> 'environment.Job | None':
return self.agent.schedule(self.__make_context__(), machine)

def route(self, work_center: 'environment.WorkCenter', job: environment.Job) -> 'environment.Machine | None':
Expand Down
3 changes: 1 addition & 2 deletions diploma_thesis/environment/work_center.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,10 +146,9 @@ def __dispatch__(self):
self.shop_floor.did_dispatch(job, self, machine)

self.state.with_flushed_queue()

self.shop_floor.did_finish_dispatch(self)

self.__starve__()
yield self.environment.process(self.__starve__())

def __starve__(self):
self.on_route = self.environment.event()
Expand Down
4 changes: 2 additions & 2 deletions diploma_thesis/simulator/simulator.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from agents import MachineInput, WorkCenterInput
from agents import TrainingPhase, EvaluationPhase, WarmUpPhase, Phase
from agents.utils.memory import Record
from environment import Agent, ShopFloor, Job, WaitInfo, Machine, WorkCenter, Context
from environment import Agent, ShopFloor, Job, Machine, WorkCenter, Context
from tape import TapeModel, SimulatorInterface
from utils import Loggable
from .configuration import RunConfiguration, EvaluateConfiguration
Expand Down Expand Up @@ -197,7 +197,7 @@ def did_prepare_work_center_record(

# Agent

def schedule(self, context: Context, machine: Machine) -> Job | WaitInfo:
def schedule(self, context: Context, machine: Machine) -> Job | None:
parameters = MachineInput(machine, context.moment)

result = self.machine.schedule(parameters)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,10 @@ def reward_after_production(self, context: Context) -> torch.FloatTensor | None:
return None

def reward_after_completion(self, contexts: List[Context]):
work_center_idx = torch.tensor([c.step_idx for c in contexts])
job = contexts[0].job
work_center_idx = torch.tensor([job.step_idx[c.step_idx] for c in contexts])
machine_idx = torch.tensor([c.job.history.arrived_machine_idx[c.step_idx] for c in contexts])
reward = torch.zeros_like(work_center_idx, dtype=torch.float)
job = contexts[0].job

if job.is_tardy_upon_completion:
wait_time = torch.FloatTensor([job.wait_time_on_machine(c.step_idx) for c in contexts])
Expand Down
3 changes: 2 additions & 1 deletion diploma_thesis/tape/machine/global_tardiness_reward.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ def reward_after_production(self, context: Context) -> torch.FloatTensor | None:
return None

def reward_after_completion(self, contexts: List[Context]):
work_center_idx = torch.tensor([c.step_idx for c in contexts])
job = contexts[0].job
work_center_idx = torch.tensor([job.step_idx[c.step_idx] for c in contexts])
machine_idx = torch.tensor([c.job.history.arrived_machine_idx[c.step_idx] for c in contexts])
reward = torch.zeros_like(work_center_idx, dtype=torch.float)

Expand Down
6 changes: 3 additions & 3 deletions diploma_thesis/tape/machine/surrogate_tardiness_reward.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def __compute_reward__(self, context: Context):
critical_level_loser = critical_level[~context.chosen]

earned_slack_chosen = torch.mean(context.processing_time[~context.chosen])
earned_slack_chosen *= critical_level_chosen
earned_slack_chosen = earned_slack_chosen * critical_level_chosen

consumed_slack_loser = context.processing_time[context.chosen] * critical_level_loser.mean()

Expand All @@ -95,8 +95,8 @@ def __compute_reward__(self, context: Context):

reward = ((reward_slack + reward_winq) / self.configuration.span).clip(-1, 1)

return reward
return reward.view([])

@staticmethod
def from_cli(parameters) -> MachineReward:
return SurrogateTardinessReward(SurrogateTardinessReward.Configuration.from_cli(parameters))
return SurrogateTardinessReward(configuration=SurrogateTardinessReward.Configuration.from_cli(parameters))
Loading

0 comments on commit 7156a83

Please sign in to comment.