Skip to content

Commit

Permalink
Implement fixes in MARL agent
Browse files Browse the repository at this point in the history
  • Loading branch information
yura-hb committed Feb 24, 2024
1 parent 6174dcf commit a38b620
Show file tree
Hide file tree
Showing 18 changed files with 72 additions and 27 deletions.
27 changes: 19 additions & 8 deletions diploma_thesis/agents/base/marl_agent.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,19 @@
import copy
from typing import Dict

import pandas as pd

from agents.utils import TrainingPhase
from agents.utils.rl import RLTrainer
from utils import filter
from .agent import *
from .rl_agent import *
from .model import NNModel


class MARLAgent(Generic[Key], Agent[Key]):
class MARLAgent(Generic[Key], RLAgent[Key]):

def __init__(self, model: NNModel, state_encoder: StateEncoder, trainer: RLTrainer, is_model_distributed: bool):
super().__init__(model, state_encoder)
super().__init__(model, state_encoder, trainer)

self.trainer: RLTrainer | Dict[Key, RLTrainer] = trainer
self.is_model_distributed = is_model_distributed
Expand All @@ -34,6 +36,7 @@ def setup(self, shop_floor: ShopFloor):
assert is_key_set_equal or is_evaluating_with_centralized_model, \
("Multi-Agent model should be configured for the same shop floor architecture "
"or have centralized action network")

return

self.is_configured = True
Expand Down Expand Up @@ -61,21 +64,29 @@ def store(self, key: Key, record: Record):
self.trainer[key].store(record)

def loss_record(self):
result = [self.trainer[key].loss_record() for key in self.keys]
result = []

return result
for key in self.keys:
loss_record = self.trainer[key].loss_record()

for k, v in key.__dict__.items():
loss_record[k] = int(v)

result += [loss_record]

return pd.concat(result)

def clear_memory(self):
for key in self.keys:
self.trainer[key].clear()

def schedule(self, key: Key, parameters):
state = self.encode_state(parameters)

result = self.__model_for_key__(key)(state, parameters)
model = self.__model_for_key__(key)
result = model(state, parameters)

if not self.trainer[key].is_configured:
self.trainer[key].configure(self.model)
self.trainer[key].configure(model)

return result

Expand Down
7 changes: 7 additions & 0 deletions diploma_thesis/agents/utils/graph/graph_encoder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@


class GraphEncoder:

def __init__(self):
pass

2 changes: 1 addition & 1 deletion diploma_thesis/agents/utils/memory/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from functools import partial

from utils import from_cli
from .memory import Record, Memory
from .memory import Record, Memory, NotReadyException
from .prioritized_replay_memory import PrioritizedReplayMemory
from .replay_memory import ReplayMemory

Expand Down
10 changes: 9 additions & 1 deletion diploma_thesis/agents/utils/memory/memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,16 +20,23 @@ class Record:
done: torch.BoolTensor


class NotReadyException(BaseException):
pass


class Memory(metaclass=ABCMeta):

def __init__(self, configuration: Configuration):
self.configuration = configuration
self.buffer = self.__make_buffer__()
self.buffer: TensorDictReplayBuffer = self.__make_buffer__()

def store(self, record: Record):
self.buffer.extend(record)

def sample(self, return_info: bool = False) -> Record:
if len(self.buffer) < self.buffer._batch_size:
raise NotReadyException()

return self.buffer.sample(return_info=return_info)

def sample_n(self, batch_size: int) -> Record:
Expand All @@ -55,3 +62,4 @@ def __getstate__(self):

def __setstate__(self, state):
self.configuration = state
self.buffer = self.__make_buffer__()
8 changes: 5 additions & 3 deletions diploma_thesis/agents/utils/rl/dqn.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,12 @@ def configure(self, model: NNModel):

self.target_model = model.clone()

@filter(lambda self, *args: len(self.memory) > 0)
def train_step(self, model: NNModel):
batch, info = self.memory.sample(return_info=True)
batch: Record | torch.Tensor = torch.squeeze(batch)
try:
batch, info = self.memory.sample(return_info=True)
batch: Record | torch.Tensor = torch.squeeze(batch)
except NotReadyException:
return

with torch.no_grad():
q_values, td_error = self.estimate_q(model, batch)
Expand Down
2 changes: 1 addition & 1 deletion diploma_thesis/agents/utils/rl/rl.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import torch

from agents.base.model import NNModel
from agents.utils.memory import Record, Memory
from agents.utils.memory import Record, Memory, NotReadyException
from agents.utils.nn import LossCLI, OptimizerCLI


Expand Down
14 changes: 10 additions & 4 deletions diploma_thesis/configuration/jsp.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@

task:
kind: 'multi_task'
n_workers: 10
n_workers: 8
debug: False

tasks:
Expand Down Expand Up @@ -52,10 +52,16 @@ task:
mods:
# Reserved keywords to build GridFactory resulting a list
__factory__:
# RL Agents
# RL Trainers
- [
'rl/dqn.yml',
'rl/ddqn.yml'
'rl/trainer/dqn.yml',
'rl/trainer/ddqn.yml'
]
# RL Agent Kind
- [
'rl/agent/single.yml',
# 'rl/agent/multi_agent.yml',
# 'rl/agent/centralized.yml'
]
# Memory
- [
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
kind: 'marl'
parameters:
is_model_distributed: False
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
kind: 'marl'
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
kind: 'rl'
5 changes: 3 additions & 2 deletions diploma_thesis/configuration/simulation.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ task:
base_path: 'configuration/mods/machine_agent/model.yml'
mods: [
'encoding/mr.yml',
'model/model/marl_mr.yml'
'model/model/marl_mr.yml',
'rl/agent/multi_agent.yml'
]


Expand All @@ -27,7 +28,7 @@ task:

tape:
machine_reward:
kind: 'surrogate_tardiness'
kind: 'global_decomposed_tardiness'
parameters:
span: 256

Expand Down
11 changes: 8 additions & 3 deletions diploma_thesis/tape/machine/surrogate_tardiness_reward.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,11 @@ def reward_after_completion(self, contexts: List[Context]):
)

def __compute_reward__(self, context: Context):
# Note: Potentially, it may happen that there was only one job in the queue. In this case the machine doesn't
# perform any decision.
if context.chosen.numel() == 1:
return torch.tensor(0.0)

slack = context.slack

critical_level = 1 - slack / (torch.abs(slack) + self.configuration.critical_level_factor)
Expand All @@ -89,9 +94,9 @@ def __compute_reward__(self, context: Context):
consumed_slack_loser = context.processing_time[context.chosen] * critical_level_loser.mean()

reward_slack = earned_slack_chosen - consumed_slack_loser
reward_winq = (
context.winq[~context.chosen].mean() - context.winq[context.chosen]
) * self.configuration.winq_factor

reward_winq = (context.winq[~context.chosen].mean() - context.winq[context.chosen])
reward_winq = reward_winq * self.configuration.winq_factor

reward = ((reward_slack + reward_winq) / self.configuration.span).clip(-1, 1)

Expand Down
2 changes: 1 addition & 1 deletion diploma_thesis/tape/queue/machine_queue.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def clear_all(self):
@filter(lambda self, context, *args, **kwargs: context.shop_floor.id in self.queue)
@filter(lambda self, _, __, record: isinstance(record.result, Job))
def register(self, context: Context, machine: Machine, record: MachineModel.Record):
if record.result:
if record.result is not None:
self.queue[context.shop_floor.id][machine.key][record.result.id] = TapeRecord(
record=Record(
state=record.state,
Expand Down
2 changes: 1 addition & 1 deletion diploma_thesis/workflow/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
from .workflow import Workflow
from .simulation import Simulation
from .tournament import Tournament
from .multi_simulation import MultiSimulation
from .multi_simulation import MultiSimulation
2 changes: 1 addition & 1 deletion diploma_thesis/workflow/simulation.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ def __process_rl_agents__(simulator: Simulator, output_dir: str):

for model, loss_path in zip(models, loss_paths):
if isinstance(model, RLAgent):
loss = model.trainer.loss_record()
loss = model.loss_record()
path = os.path.join(output_dir, loss_path)
loss.to_csv(path, index=True)
model.clear_memory()
Expand Down
2 changes: 1 addition & 1 deletion diploma_thesis/workflow/workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def __add_handlers__(self, logger, formatter, filename: str, log_stdout: bool):
def __get_logger__(self, name):
workflow_id = self.workflow_id

logger = logging.Logger(name + '_' + self.workflow_id if len(workflow_id) > 0 else name)
logger = logging.getLogger(name + '_' + self.workflow_id if len(workflow_id) > 0 else name)
logger.setLevel(logging.INFO)

return logger

0 comments on commit a38b620

Please sign in to comment.