diff --git a/diploma_thesis/agents/base/state.py b/diploma_thesis/agents/base/state.py index c0472414..6bfe1185 100644 --- a/diploma_thesis/agents/base/state.py +++ b/diploma_thesis/agents/base/state.py @@ -93,9 +93,11 @@ def to_pyg_graph(self) -> pyg.data.Data | pyg.data.HeteroData: if isinstance(key, str): data[key] = self.data[key] + # Nested if len(key) == 2: data[key[0]][key[1]] = self.data[key] + # Edge if len(key) == 4: data[key[:3]][key[3]] = self.data[key] diff --git a/diploma_thesis/agents/machine/state/custom_encoder_v2.py b/diploma_thesis/agents/machine/state/custom_encoder_v2.py index 9c4ef5e0..284260cd 100644 --- a/diploma_thesis/agents/machine/state/custom_encoder_v2.py +++ b/diploma_thesis/agents/machine/state/custom_encoder_v2.py @@ -72,7 +72,7 @@ def __encode__(self, parameters: StateEncoder.Input) -> State: completion_rate.view(-1), critical_ratios.view(-1), - placeholder + wait_times, + placeholder + wait_times / self.norm_factor, placeholder + machine_util_rate, placeholder + arriving_jobs, placeholder + will_arrive_jobs, diff --git a/diploma_thesis/agents/machine/state/deep_marl_indirect.py b/diploma_thesis/agents/machine/state/deep_marl_indirect.py index 5ae7ab01..da772b1f 100644 --- a/diploma_thesis/agents/machine/state/deep_marl_indirect.py +++ b/diploma_thesis/agents/machine/state/deep_marl_indirect.py @@ -7,7 +7,7 @@ from environment import JobReductionStrategy from .encoder import StateEncoder -# TODO: - Update normalized state + class DEEPMARLIndirectStateEncoder(StateEncoder): diff --git a/diploma_thesis/agents/utils/nn/loss.py b/diploma_thesis/agents/utils/nn/loss.py index f6e1ddcd..71dd6882 100644 --- a/diploma_thesis/agents/utils/nn/loss.py +++ b/diploma_thesis/agents/utils/nn/loss.py @@ -13,7 +13,7 @@ class Configuration: def __init__(self, configuration: Configuration): super().__init__() - + self.configuration = configuration self.loss = self.__make_loss__() diff --git a/diploma_thesis/agents/utils/return_estimator/gae.py b/diploma_thesis/agents/utils/return_estimator/gae.py index b3858838..a1f33d6c 100644 --- a/diploma_thesis/agents/utils/return_estimator/gae.py +++ b/diploma_thesis/agents/utils/return_estimator/gae.py @@ -37,8 +37,6 @@ def update_returns(self, records: List[Record]) -> List[Record]: records[i].info[Record.ADVANTAGE_KEY] = coef ** i * advantage + next_advantage records[i].info[Record.RETURN_KEY] = records[i].info[Record.ADVANTAGE_KEY] + value - - return records[:-1] @staticmethod diff --git a/diploma_thesis/agents/utils/return_estimator/n_step.py b/diploma_thesis/agents/utils/return_estimator/n_step.py index 93c30207..0d13416e 100644 --- a/diploma_thesis/agents/utils/return_estimator/n_step.py +++ b/diploma_thesis/agents/utils/return_estimator/n_step.py @@ -37,6 +37,8 @@ def __init__(self, configuration: Configuration): self.configuration = configuration + if self.configuration.n > 1: + self.value_fetch_method = ValueFetchMethod.ACTION @property def discount_factor(self) -> float: @@ -75,7 +77,7 @@ def update_returns(self, records: List[Record]) -> List[Record]: off_policy_weights += [1] for i in range(len(records)): - g = records[i].info[Record.VALUE_KEY] + g = self.get_value(records[i]) n = min(self.configuration.n, len(records) - i) weights = off_policy_weights[i:i+n] diff --git a/diploma_thesis/agents/utils/rl/rl.py b/diploma_thesis/agents/utils/rl/rl.py index ab16ce3d..6644988e 100644 --- a/diploma_thesis/agents/utils/rl/rl.py +++ b/diploma_thesis/agents/utils/rl/rl.py @@ -105,7 +105,7 @@ def __train_step__(self, model: Policy): torch.cuda.empty_cache() - print(f'Train step: { time.time() - start } Optimizer Step: { self.optimizer.step_count }') + print(f'Train step: { time.time() - start } Optimizer Step: { self.optimizer.step_count } Learning Rate: { self.optimizer.learning_rate}') def clear(self): self.loss_cache = [] diff --git a/diploma_thesis/agents/utils/rl/storage.py b/diploma_thesis/agents/utils/rl/storage.py index 98e3af2a..6f0087f0 100644 --- a/diploma_thesis/agents/utils/rl/storage.py +++ b/diploma_thesis/agents/utils/rl/storage.py @@ -27,9 +27,9 @@ def store(self, sample: TrainingSample): record.info['episode'] = sample.episode_id # Remove extra fields for training record - if record.state.graph is not None and Graph.JOB_INDEX_MAP in record.state.graph.data.keys(): - del record.state.graph.data[Graph.JOB_INDEX_MAP] - del record.next_state.graph.data[Graph.JOB_INDEX_MAP] + for graph in [record.state.graph, record.next_state.graph]: + if graph is not None and Graph.JOB_INDEX_MAP in graph.data.keys(): + del graph.data[Graph.JOB_INDEX_MAP] if self.is_episodic: self.memory.store([records]) diff --git a/diploma_thesis/configuration/experiments/jsp/GRAPH-BEST/experiments/1/0/experiment.yml b/diploma_thesis/configuration/experiments/jsp/GRAPH-BEST/experiments/1/0/experiment.yml new file mode 100644 index 00000000..f79aaaab --- /dev/null +++ b/diploma_thesis/configuration/experiments/jsp/GRAPH-BEST/experiments/1/0/experiment.yml @@ -0,0 +1,207 @@ +# Evaluate the effectivenes of basic DQNs on the JSP environment +base_model: &base_model 'configuration/experiments/jsp/GRAPH-BEST/experiments/1/flexible_machine.yml' + + +default_mods: &default_mods + - 'util/train_schedule/on_store_256.yml' + - 'agent/dqn/ddqn.yml' +# - 'agent/dqn/dueling.yml' + - 'agent/dqn/prioritized.yml' +# - 'agent/dqn/steps/3.yml' + - 'util/optimizer/adam_stationary.yml' + - 'util/optimizer/grad_norm.yml' +# - 'util/infrastructure/cuda.yml' + +############################################################################################### + +dqn_1: &dqn_1 + base_path: *base_model + template: 'custom_gat' + mod_dirs: + - 'configuration/mods/machine/mods' + mods: + - *default_mods + + +############################################################################################### + +dqn_2: &dqn_2 + base_path: *base_model + template: 'custom_gin' + mod_dirs: + - 'configuration/mods/machine/mods' + mods: + - *default_mods + +############################################################################################### + +dqn_3: &dqn_3 + base_path: *base_model + template: 'hierarchical_gin' + mod_dirs: + - 'configuration/mods/machine/mods' + mods: + - *default_mods + +############################################################################################### + +dqn_4: &dqn_4 + base_path: *base_model + template: 'hierarchical_gat' + mod_dirs: + - 'configuration/mods/machine/mods' + mods: + - *default_mods + +############################################################################################### + +reward: &reward + - kind: 'surrogate_tardiness' + parameters: + winq_factor: 0.2 + span: 80 + critical_level_factor: 64 + +############################################################################################## + +# 8 runs + +long_single_source_run: &long_single_source_run + parameters: + mods: + __inout_factory__: + - [ ['utilization/90.yml' ]] + nested: + parameters: + dispatch: + seed: + - '__range__': [ 2000, 2200 ] + + +############################################################################################### + + +task: + kind: 'multi_task' + n_workers: 1 + n_threads: 30 + debug: False + store_run_statistics: False + output_dir: 'results/jsp/experiments/FINAL/BEST/0' + + tasks: + - kind: 'multi_value' + parameters: + base: + name: 'model' + output_dir: '1' + log_stdout: False + seed: 123 + + machine_agent: + kind: 'mod' + parameters: + base_path: 'configuration/mods/machine_agent/model.yml' + mods: [ ] + + work_center_agent: + kind: 'static' + parameters: + model: + kind: 'static' + parameters: + rule: 'et' + encoder: + kind: 'plain' + + tape: + machine_reward: + kind: 'surrogate_tardiness' + + work_center_reward: + kind: 'no' + + simulator: + kind: 'td' + + + run: + kind: 'mod' + parameters: + base_path: 'configuration/experiments/jsp/GRAPH-BEST/experiments/1/run.yml' + mod_dirs: + - 'configuration/mods/run/mods' + mods: [] + nested: + parameters: + simulations: + - name: '' + kind: 'multi_value' + parameters: + base: + kind: 'mod' + parameters: + base_path: 'configuration/experiments/jsp/GRAPH-BEST/experiments/1/simulation.yml' + mod_dirs: + - 'configuration/mods/simulation/mods' + mods: [ ] + values: + values: + __concat__: + +# - output_dir: 'Global' +# machine_agent: +# parameters: +# - *dqn_1 +# - *dqn_2 +# - *dqn_3 +# - *dqn_4 +# graph: +# transition_model: +# kind: 'base' +# parameters: +# forward: +# kind: 'complete' +# +# schedule: +# kind: 'complete' +# +# memory: 0 +# is_machine_set_in_work_center_connected: False +# is_work_center_set_in_shop_floor_connected: False + + - output_dir: 'Local' + machine_agent: + parameters: + - *dqn_1 +# - *dqn_2 +# - *dqn_3 +# - *dqn_4 + graph: + transition_model: + kind: 'base' + parameters: + forward: + kind: 'complete' + + schedule: + kind: 'machine_compressed' + + memory: 0 + is_machine_set_in_work_center_connected: True + is_work_center_set_in_shop_floor_connected: True + + tape: + machine_reward: + *reward + + run: + parameters: + nested: + parameters: + simulations: + __0__: + parameters: + values: + __concat__: + - *long_single_source_run diff --git a/diploma_thesis/configuration/experiments/jsp/GRAPH-BEST/experiments/1/flexible_machine.yml b/diploma_thesis/configuration/experiments/jsp/GRAPH-BEST/experiments/1/flexible_machine.yml new file mode 100644 index 00000000..b51dc68d --- /dev/null +++ b/diploma_thesis/configuration/experiments/jsp/GRAPH-BEST/experiments/1/flexible_machine.yml @@ -0,0 +1,79 @@ + +kind: 'rl' +parameters: + + model: + kind: 'deep_rule' + parameters: + policy: + kind: 'flexible_action' + parameters: + policy_method: 'independent' + + model: + __model__: '' + + action_selector: + kind: 'phase_selector' + parameters: + default: + kind: 'greedy' + phases: + - phase: + kind: 'warm_up' + parameters: + step: 0 + action_selector: + kind: 'uniform' + - phase: + kind: 'warm_up' + parameters: + step: 1 + action_selector: + kind: 'epsilon_greedy' + parameters: + epsilon: 0.4 + - phase: + kind: 'training' + action_selector: + kind: 'epsilon_greedy' + parameters: + epsilon: 0.4 + min_epsilon: 0.05 + decay_factor: 0.999 + decay_steps: 125 + + __encoder__: '' + + trainer: + kind: 'dqn' + parameters: + decay: 1.0 + update_steps: 50 + epochs: 5 + + memory: + kind: 'replay' + parameters: + size: 16394 + batch_size: 1024 + + loss: + kind: 'mse' + parameters: + reduction: 'none' + + optimizer: + model: + kind: 'adam' + parameters: + lr: 0.001 + scheduler: + kind: 'exponential' + parameters: + gamma: 0.999 + + return: + kind: 'no' + parameters: + discount_factor: 0.95 diff --git a/diploma_thesis/configuration/experiments/jsp/GRAPH-BEST/experiments/1/run.yml b/diploma_thesis/configuration/experiments/jsp/GRAPH-BEST/experiments/1/run.yml new file mode 100644 index 00000000..683fca9c --- /dev/null +++ b/diploma_thesis/configuration/experiments/jsp/GRAPH-BEST/experiments/1/run.yml @@ -0,0 +1,23 @@ + +# JSP training based on the DEEP-MARL thesis + +kind: 'plain' +parameters: + timeline: + warmup: + - 5000 + - 5000 + + duration: 20000000 + + machine_train_schedule: + pretrain_steps: 10 + train_interval: 25 + max_training_steps: 100000000 + + work_center_train_schedule: + pretrain_steps: 0 + train_interval: 100 + max_training_steps: 0 + + n_workers: 10 diff --git a/diploma_thesis/configuration/experiments/jsp/GRAPH-BEST/experiments/1/simulation.yml b/diploma_thesis/configuration/experiments/jsp/GRAPH-BEST/experiments/1/simulation.yml new file mode 100644 index 00000000..d36d7f86 --- /dev/null +++ b/diploma_thesis/configuration/experiments/jsp/GRAPH-BEST/experiments/1/simulation.yml @@ -0,0 +1,51 @@ + + + +kind: 'simulation' +parameters: + configuration: + timespan: 5000 + machines_per_work_center: 1 + work_center_count: 10 + deduce_naive_actions: True + + dispatch: + initial_job_assignment: + kind: 'n_per_machine' + parameters: + n: 3 + + job_sampler: + kind: 'dynamic' + parameters: + n_jobs: 200 + processing_times: + kind: 'uniform' + parameters: + uniform: [ 1, 100 ] + noise: [ 0, 5 ] + permutation: + uneveness: 5 + due_time: + kind: 'uniform' + parameters: + uniform: [ 0, 2 ] + job_arrival_time_on_machine: + kind: 'expected_utilization' + parameters: + value: 0.0 + + + breakdown: + kind: 'no' + parameters: + breakdown_arrival: + kind: 'exponential' + parameters: + mean: 1000 + repair_duration: + kind: 'uniform' + parameters: + uniform: [ 200, 300 ] + + seed: 42 diff --git a/diploma_thesis/configuration/experiments/jsp/GRAPH-BEST/experiments/1/templates/custom_gat/encoder.yml b/diploma_thesis/configuration/experiments/jsp/GRAPH-BEST/experiments/1/templates/custom_gat/encoder.yml new file mode 100644 index 00000000..b661bd9b --- /dev/null +++ b/diploma_thesis/configuration/experiments/jsp/GRAPH-BEST/experiments/1/templates/custom_gat/encoder.yml @@ -0,0 +1,8 @@ + +encoder: + kind: 'custom_v2' + parameters: + is_homogeneous: True + is_undirected: True + is_local: True + append_target_mask: True diff --git a/diploma_thesis/configuration/experiments/jsp/GRAPH-BEST/experiments/1/templates/custom_gat/model.yml b/diploma_thesis/configuration/experiments/jsp/GRAPH-BEST/experiments/1/templates/custom_gat/model.yml new file mode 100644 index 00000000..d9139a63 --- /dev/null +++ b/diploma_thesis/configuration/experiments/jsp/GRAPH-BEST/experiments/1/templates/custom_gat/model.yml @@ -0,0 +1,79 @@ +layers: + + # Actor + + - kind: 'graph_model' + parameters: + layers: + - kind: 'linear' + parameters: + dim: 64 + activation: 'leaky_relu' + signature: 'x -> x' + - kind: 'gat' + parameters: + in_channels: -1 + out_channels: 64 + num_layers: 2 + v2: True + heads: 8 + hidden_channels: 64 + jk: 'cat' + - kind: 'linear' + parameters: + dim: 64 + activation: 'leaky_relu' + signature: 'x -> x' + - kind: 'mask' + + signature: 'graph -> actions_hidden, actions_batch' + + - kind: 'linear' + parameters: + dim: 64 + activation: 'tanh' + signature: 'actions_hidden -> actions_hidden' + + # Critic + + - kind: 'graph_model' + parameters: + layers: + - kind: 'linear' + parameters: + dim: 64 + activation: 'leaky_relu' + signature: 'x -> x' + - kind: 'gat' + parameters: + in_channels: -1 + out_channels: 64 + num_layers: 3 + v2: True + heads: 4 + hidden_channels: 64 + jk: 'cat' + - kind: 'linear' + parameters: + dim: 64 + activation: 'leaky_relu' + signature: 'x -> x' + - kind: 'mask' + + signature: 'graph -> values_hidden, values_batch' + + - kind: 'mean_pool' + parameters: + dim: 64 + signature: 'values_hidden, values_batch -> values_hidden' + + - kind: 'linear' + parameters: + dim: 64 + activation: 'leaky_relu' + signature: 'values_hidden -> values_hidden' + + - kind: 'output' + parameters: + value: 'values_hidden' + actions: 'actions_hidden' diff --git a/diploma_thesis/configuration/experiments/jsp/GRAPH-BEST/experiments/1/templates/custom_gat/rules.yml b/diploma_thesis/configuration/experiments/jsp/GRAPH-BEST/experiments/1/templates/custom_gat/rules.yml new file mode 100644 index 00000000..f26787ad --- /dev/null +++ b/diploma_thesis/configuration/experiments/jsp/GRAPH-BEST/experiments/1/templates/custom_gat/rules.yml @@ -0,0 +1,9 @@ + +rules: + - 'spt' + - 'cr' + - 'ms' + - 'winq' + - 'edd' + - 'lwkr' + - 'atc' \ No newline at end of file diff --git a/diploma_thesis/configuration/experiments/jsp/tournament.yml b/diploma_thesis/configuration/experiments/jsp/tournament.yml index 9b37861c..41e6f0d7 100644 --- a/diploma_thesis/configuration/experiments/jsp/tournament.yml +++ b/diploma_thesis/configuration/experiments/jsp/tournament.yml @@ -70,7 +70,7 @@ task: weight: 1 direction: 'minimize' scale: 'linear' - at: 5000 + at: 8000 limit: 100 reward: @@ -190,35 +190,37 @@ task: job_sampler: kind: 'dynamic' parameters: + n_jobs: 200 processing_times: kind: 'uniform' parameters: - uniform: [ 1, 50 ] - noise: [ 0, 10 ] + uniform: [ 1, 100 ] + noise: [ 0, 5 ] permutation: uneveness: 5 due_time: kind: 'uniform' parameters: - uniform: [ 0.0, 2 ] + uniform: [ 0, 2 ] job_arrival_time_on_machine: kind: 'expected_utilization' parameters: value: 0.0 + breakdown: - kind: 'dynamic' + kind: 'no' parameters: breakdown_arrival: kind: 'exponential' parameters: - mean: 5000 + mean: 1000 repair_duration: kind: 'uniform' parameters: uniform: [ 200, 300 ] - seed: 32 + seed: 42 values: @@ -235,18 +237,18 @@ task: job_sampler: parameters: __concat__: -# - job_arrival_time_on_machine: -# parameters: -# value: -# - 0.7 -# n_jobs: 150 + - job_arrival_time_on_machine: + parameters: + value: + - 0.7 + n_jobs: 200 - job_arrival_time_on_machine: parameters: value: - 0.8 - n_jobs: 150 -# - job_arrival_time_on_machine: -# parameters: -# value: -# - 0.9 -# n_jobs: 150 + n_jobs: 200 + - job_arrival_time_on_machine: + parameters: + value: + - 0.9 + n_jobs: 200 diff --git a/diploma_thesis/configuration/mods/machine/mods/agent/dqn/steps/3.yml b/diploma_thesis/configuration/mods/machine/mods/agent/dqn/steps/3.yml index bef1063f..00ff86bb 100644 --- a/diploma_thesis/configuration/mods/machine/mods/agent/dqn/steps/3.yml +++ b/diploma_thesis/configuration/mods/machine/mods/agent/dqn/steps/3.yml @@ -8,4 +8,4 @@ parameters: discount: 0.9 lambda_factor: 0.9 n: 3 - off_policy: true + off_policy: False diff --git a/diploma_thesis/configuration/mods/machine/mods/util/optimizer/adam_stationary.yml b/diploma_thesis/configuration/mods/machine/mods/util/optimizer/adam_stationary.yml index 9f3bda3e..58adeeff 100644 --- a/diploma_thesis/configuration/mods/machine/mods/util/optimizer/adam_stationary.yml +++ b/diploma_thesis/configuration/mods/machine/mods/util/optimizer/adam_stationary.yml @@ -6,6 +6,6 @@ parameters: model: kind: 'adam' parameters: - lr: 0.001 + lr: 0.0005 betas: [0.99, 0.99] weight_decay: 0.000001 \ No newline at end of file diff --git a/diploma_thesis/environment/job.py b/diploma_thesis/environment/job.py index 677700d1..b366e86a 100644 --- a/diploma_thesis/environment/job.py +++ b/diploma_thesis/environment/job.py @@ -452,7 +452,7 @@ def with_due_at(self, due_at: torch.FloatTensor): def __processing_time_on_work_center__(cls, steps: torch.LongTensor, processing_times: torch.LongTensor, - step_idx: int, + step_idx: int, strategy: ReductionStrategy = ReductionStrategy.mean): """ Returns: The processing time of the operation in work center @@ -468,7 +468,7 @@ def __processing_time_on_work_center__(cls, @lru_cache def __processing_time_on_machine__(cls, steps: torch.LongTensor, - processing_times: torch.LongTensor, + processing_times: torch.LongTensor, step_idx: int, machine_idx: int): """ @@ -485,7 +485,7 @@ def __processing_time_on_machine__(cls, def __next_processing_time__(cls, steps: torch.LongTensor, processing_times: torch.LongTensor, - step_idx: int, + step_idx: int, strategy: ReductionStrategy = ReductionStrategy.mean): """ Returns: The processing time of the next operation @@ -516,12 +516,12 @@ def __remaining_processing_time__(cls, result += cls.__next_remaining_processing_time__(processing_times, step_idx, strategy) return result - + @classmethod @lru_cache def __next_remaining_processing_time__(cls, - processing_times: torch.LongTensor, - step_idx: int, + processing_times: torch.LongTensor, + step_idx: int, strategy: ReductionStrategy = ReductionStrategy.mean): """ Returns: The remaining processing time of the next operation diff --git a/diploma_thesis/simulator/graph/transition/transition.py b/diploma_thesis/simulator/graph/transition/transition.py index 3b19c14a..db6f89cf 100644 --- a/diploma_thesis/simulator/graph/transition/transition.py +++ b/diploma_thesis/simulator/graph/transition/transition.py @@ -95,7 +95,7 @@ def __remove_job__(cls, job: Job, graph: Graph): if job_id in graph.data[Graph.JOB_KEY].keys(): graph.data[Graph.JOB_KEY].pop(job_id) return - + raise ValueError(f'Job with id {job.id} not found in graph') # Utils diff --git a/diploma_thesis/simulator/td.py b/diploma_thesis/simulator/td.py index fc6abd3b..8fa8384a 100644 --- a/diploma_thesis/simulator/td.py +++ b/diploma_thesis/simulator/td.py @@ -91,7 +91,7 @@ def __forward_td__(self, context: Context, queue: TDQueue, agent, key): @staticmethod def from_cli(parameters, *args, **kwargs) -> Simulator: - return TDSimulator(parameters.get('memory', 1), + return TDSimulator(parameters.get('memory', 1), parameters.get('emit_trajectory', False), parameters.get('reset_trajectory', True), parameters.get('sliding_window', 1), diff --git a/notebooks/plot_utils/legend.py b/notebooks/plot_utils/legend.py index 467e426b..3c3ca112 100644 --- a/notebooks/plot_utils/legend.py +++ b/notebooks/plot_utils/legend.py @@ -4,4 +4,5 @@ def add_legend(ax, info): ax.legend(ncols=info.get('ncols', 2), bbox_to_anchor=info.get('bbox_to_anchor', (-0.08, 1)), loc='best', + frameon=True, fancybox=True) diff --git a/notebooks/plot_utils/plot_decisions_per_action.py b/notebooks/plot_utils/plot_decisions_per_action.py index b1993a03..4450a1d8 100644 --- a/notebooks/plot_utils/plot_decisions_per_action.py +++ b/notebooks/plot_utils/plot_decisions_per_action.py @@ -4,29 +4,30 @@ import matplotlib.pyplot as plt -def plot_decisions_per_action(data: pd.DataFrame, name: str, figsize=(8, 8)): +def plot_decisions_per_action(data: pd.DataFrame, name: str, figsize=(8, 8), ax=None): data = data[data['reward'] != 0] - fig, ax = plt.subplots(figsize=figsize) + if ax is None: + fig, ax = plt.subplots(figsize=figsize) actions = data["action"].unique() actions = np.sort(actions) + ax.grid(True, zorder=0) + # Create violins for each action for i, action in enumerate(actions): action_data = data[data["action"] == action] - ax.bar(x=i, height=len(action_data)) + ax.bar(x=i, height=len(action_data), zorder=3) # Set labels and title - ax.set_xlabel("Action") + # ax.set_xlabel("Action") ax.set_ylabel("Count") - ax.set_title(f"Selected action count ({name})") + ax.set_title(f"Histogram of actions ({name})") ax.set_xticks(np.arange(len(actions)), actions, rotation=45) # Add grid and adjust layout - ax.grid(True) plt.tight_layout() - return fig diff --git a/notebooks/plot_utils/plot_normalized_performance.py b/notebooks/plot_utils/plot_normalized_performance.py index 9979c8ec..9dafcdca 100644 --- a/notebooks/plot_utils/plot_normalized_performance.py +++ b/notebooks/plot_utils/plot_normalized_performance.py @@ -2,30 +2,41 @@ import numpy as np -def plot_normalized_performance(df, info): +def plot_normalized_performance(df, info, ax=None): index_column = info['index'] metric = info['metric'] candidate_column = info['candidate_column'] baseline = info['baseline'] - fig, ax = plt.subplots(figsize=info.get('figsize', (12, 6))) + if ax is None: + fig, ax = plt.subplots(figsize=info.get('figsize', (12, 6))) candidates = info.get('candidates', np.sort(df[candidate_column].unique())) candidates = [candidate for candidate in candidates if candidate != baseline] - print(candidates) - baseline_info = df[df[candidate_column] == baseline].set_index(index_column) - for index, candidate in enumerate(candidates): + def performance(candidate): candidate_info = df[df[candidate_column] == candidate].set_index(index_column) delta = (baseline_info[metric] - candidate_info[metric]) / (baseline_info[metric] + 1e-10) delta = delta[~np.isnan(delta)] + return delta + + candidates = [(candidate, performance(candidate)) for candidate in candidates if candidate != baseline] + + if info.get('sort', True): + candidates = sorted(candidates, key=lambda x: np.mean(x[1]), reverse=True) + + if top_k := info.get('top_k', None): + candidates = candidates[:top_k] + + for index, candidate in enumerate(candidates): + delta = candidate[1] delta *= 100 - ax.boxplot(delta, positions=[index], notch=True, vert=True) + ax.boxplot(delta, positions=[index], notch=True, vert=True, showmeans=True, widths=info.get('box_width', 0.4)) ax.yaxis.grid(True) @@ -33,8 +44,16 @@ def plot_normalized_performance(df, info): ax.set_ylabel(info['ylabel']) ax.set_title(info['title']) - ax.set_xticks(np.arange(len(candidates)), candidates, rotation=45) + ax.set_ylim(bottom=info.get('bottom'), top=info.get('top')) + + start, end = ax.get_ylim() + + ax.yaxis.set_ticks(np.arange(start, end, info.get('y_step', 5))) + ax.set_xticks(np.arange(len(candidates)), [candidate[0] for candidate in candidates], rotation=90) plt.tight_layout() - return fig + if ax is None: + return fig, candidates + else: + return candidates diff --git a/notebooks/plot_utils/plot_performance_accross_runs.py b/notebooks/plot_utils/plot_performance_accross_runs.py index 03a8675d..f878e8c9 100644 --- a/notebooks/plot_utils/plot_performance_accross_runs.py +++ b/notebooks/plot_utils/plot_performance_accross_runs.py @@ -48,7 +48,7 @@ def plot_performance_across_runs(data, info): label=title) - ax.xaxis.grid(True) + ax.xaxis.grid(True, zorder=0) ax.set_xlabel(info['xlabel']) ax.set_ylabel(info['ylabel']) diff --git a/notebooks/plot_utils/plot_reward_distribution_per_action.py b/notebooks/plot_utils/plot_reward_distribution_per_action.py index 962498a7..33673ae7 100644 --- a/notebooks/plot_utils/plot_reward_distribution_per_action.py +++ b/notebooks/plot_utils/plot_reward_distribution_per_action.py @@ -14,7 +14,7 @@ def plot_reward_distribution_per_action(data: pd.DataFrame, name: str, figsize=( # Create violins for each action for i, action in enumerate(actions): action_data = data[data["action"] == action]["reward"] - violin_parts = ax.boxplot( + violin_parts = ax.violinplot( action_data, positions=[i], showmeans=True, @@ -32,7 +32,7 @@ def plot_reward_distribution_per_action(data: pd.DataFrame, name: str, figsize=( ax.set_xticks(np.arange(len(actions)), actions, rotation=45) # Add grid and adjust layout - ax.grid(True) + ax.grid(True, zorder=0) plt.tight_layout() return fig diff --git a/notebooks/plot_utils/plot_reward_model_across_runs.py b/notebooks/plot_utils/plot_reward_model_across_runs.py index 680e6b6a..100749e2 100644 --- a/notebooks/plot_utils/plot_reward_model_across_runs.py +++ b/notebooks/plot_utils/plot_reward_model_across_runs.py @@ -31,7 +31,7 @@ def plot_reward_per_model_across_runs(data, info): ax.plot(filtered[metric], filtered[reward], marker=info['marker'], ms=10, ls='', label=title) - ax.grid(True) + ax.grid(True, zorder=0) ax.set_xlabel(info['xlabel']) ax.set_ylabel(info['ylabel']) diff --git a/notebooks/plot_utils/plot_value.py b/notebooks/plot_utils/plot_value.py index 20b3e469..98ec349a 100644 --- a/notebooks/plot_utils/plot_value.py +++ b/notebooks/plot_utils/plot_value.py @@ -6,50 +6,64 @@ from .legend import add_legend -def plot_value(path: str | dict, info: dict, figsize=(8, 8), post_process_fn=lambda a: a): +def plot_value(path: str | dict, info: dict, figsize=(8, 8), ax = None, post_process_fn=lambda a: a, background_process_fn=None): if not isinstance(path, dict): path = dict(first=path) - fig, ax = plt.subplots(figsize=figsize) + if ax is None: + fig, ax = plt.subplots(figsize=figsize) - for name, data_path in path.items(): - df = pd.read_csv(data_path) + min_value_len = float('inf') - if info.get('norm_index', False): - df[info['index']] -= df[info['index']].min() + for name, data_path in path.items(): + if not isinstance(data_path, list): + data_path = [data_path] - suffix = '' if len(path) == 1 else f'{name}' + result = [] - if 'work_center_id' in df.columns and info.get('is_reward_per_unit_visible', False): - work_centers = np.sort(df['work_center_id'].unique()) - machines = np.sort(df['machine_id'].unique()) + for p in data_path: + df = pd.read_csv(p) - for work_center_id in work_centers: - for machine_id in machines: - filtered = df[(df['work_center_id'] == work_center_id) & (df['machine_id'] == machine_id)] - filtered = filtered.sort_values(by=info['index']) - filtered.set_index(info['index'], inplace=True) + if info.get('norm_index', False): + df[info['index']] -= df[info['index']].min() - if len(machines) == 1: - label = f'M_idx: {work_center_id}' - else: - label = f'W_idx: {work_center_id}, M_idx: {machine_id}' + if f := info.get('filter'): + df = f(df) - if len(suffix) > 0: - label += ' ' + suffix + suffix = '' if len(path) == 1 else f'{name}' - ax.plot(post_process_fn(filtered[info['column']]), label=label) - else: df = df.sort_values(by=info['index']) df.set_index(info['index'], inplace=True) - ax.plot(post_process_fn(df[info['column']]), label=name) + result += [post_process_fn(df[info['column']])] + + min_value_len = min(min_value_len, len(result[-1])) + + + if len(result) == 1: + ax.plot(result[0], label=name) + else: + result = [v[:min_value_len] for v in result] + result = np.vstack(result) + + min_value = result.min(axis=0) + mean_value = result.mean(axis=0) + max_value = result.max(axis=0) + + ax.plot(np.arange(len(mean_value)), mean_value, marker=info['marker'], label=name) + + ax.fill_between(np.arange(len(mean_value)), min_value, max_value, alpha=0.25) + + ax.grid(True, zorder=0) + + if 'title' in info: + ax.set_title(info['title']) - ax.grid(True) - ax.set_title(info['title']) ax.set_xlabel(info['xlabel']) ax.set_ylabel(info['ylabel']) - add_legend(ax, info) + # if len(path) > 1: + # add_legend(ax, info) - return fig + if ax is None: + return fig diff --git a/notebooks/plot_utils/plot_value_per_run.py b/notebooks/plot_utils/plot_value_per_run.py index 42b30d27..1cc45902 100644 --- a/notebooks/plot_utils/plot_value_per_run.py +++ b/notebooks/plot_utils/plot_value_per_run.py @@ -1,48 +1,86 @@ import matplotlib.pyplot as plt import numpy as np import pandas as pd +import scipy from .legend import add_legend +def mean_confidence_interval(data, confidence=0.95): + a = 1.0 * np.array(data) + n = len(a) + m, se = np.mean(a), scipy.stats.sem(a) + h = se * scipy.stats.t.ppf((1 + confidence) / 2., n-1) + return m, m-h, m+h -def plot_value_per_run(path: str | dict, info: dict, make_run_path, post_process_fn=lambda a: a): + +def plot_value_per_run(path: str | dict, info: dict, make_run_path, post_process_fn=lambda a: a, ax = None): if not isinstance(path, dict): path = dict(first=path) - fig, ax = plt.subplots(figsize=info.get('figsize', (8, 8))) + fig = None - max_values_len = 0 + if ax is None: + fig, ax = plt.subplots(figsize=info.get('figsize', (8, 8))) for name, data_path in path.items(): - run = 1 - - values = [] + if not isinstance(data_path, list): + data_path = [data_path] - while True: - run_path = make_run_path(data_path, run) + result = [] - run += 1 + min_value_len = float('inf') + for p in data_path: try: - df = pd.read_csv(run_path) + run = 1 + + values = [] + + while True: + run_path = make_run_path(p, run) + + run += 1 + + try: + df = pd.read_csv(run_path) + df = df.sort_values(by=info['index']) + df.set_index(info['index'], inplace=True) - df = df.sort_values(by=info['index']) - df.set_index(info['index'], inplace=True) + values += [post_process_fn(df[info['column']], run)] + except: + break - values += [post_process_fn(df[info['column']], run)] + values = np.array(values) + + if value := info.get('smoothing_value'): + values = np.convolve(values, np.ones(value), 'valid') / value + + result += [values] + + min_value_len = min(min_value_len, len(values)) except: - break + pass + + if len(result) == 1: + ax.plot(np.arange(len(result[0])), result[0], marker=info['marker'], label=name) + else: + result = [v[:min_value_len] for v in result] + result = np.vstack(result) + + min_value = result.min(axis=0) + mean_value = result.mean(axis=0) + max_value = result.max(axis=0) - ax.plot(np.arange(len(values)), np.array(values), marker=info['marker'], label=name) + ax.plot(np.arange(len(mean_value)), mean_value, marker=info['marker'], label=name) - max_values_len = max(max_values_len, len(values)) + ax.fill_between(np.arange(len(mean_value)), min_value, max_value, alpha=0.25) - ax.grid(True) + ax.grid(True, zorder=0) ax.set_title(info['title']) ax.set_xlabel(info['xlabel']) ax.set_ylabel(info['ylabel']) - ax.set_xticks(np.arange(max_values_len)) + # ax.set_xticks(np.arange(max_values_len)) - add_legend(ax, info) + # add_legend(ax, info) return fig