Skip to content

Commit

Permalink
Work on experiments
Browse files Browse the repository at this point in the history
  • Loading branch information
yura-hb committed Apr 2, 2024
1 parent 0d2531d commit 04fd3b6
Show file tree
Hide file tree
Showing 56 changed files with 2,459 additions and 286 deletions.
6 changes: 5 additions & 1 deletion diploma_thesis/agents/utils/policy/action_policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ def select(self, state: State) -> Record:
output = self.__call__(state)
value, actions, memory = self.__fetch_values__(output)
value, actions = value.squeeze(), actions.squeeze()
value, actions = torch.atleast_1d(value), torch.atleast_1d(actions)

if memory is not None:
for key, item in memory.items(include_nested=True, leaves_only=True):
Expand Down Expand Up @@ -175,9 +176,12 @@ def __estimate_policy__(self, output):

if isinstance(actions, tuple):
actions, lengths = actions
mask = torch.isfinite(actions)
means = torch.nan_to_num(actions, neginf=0.0).sum(dim=-1, keepdim=True) / torch.atleast_2d(lengths).T

# Here we suppose that pad value ios neginf
output[Keys.ACTIONS] = value + actions - torch.nan_to_num(actions, neginf=0.0).sum(dim=-1) / lengths
output[Keys.ACTIONS] = value + actions - means
output[Keys.ACTIONS][~mask] = min_value
output[Keys.ACTIONS] = torch.nan_to_num(output[Keys.ACTIONS], neginf=min_value)
else:
output[Keys.ACTIONS] = value + actions - actions.mean(dim=-1, keepdim=True)
Expand Down
10 changes: 9 additions & 1 deletion diploma_thesis/agents/utils/return_estimator/n_step.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,15 @@ def update_returns(self, records: List[Record]) -> List[Record]:
for j in range(n):
g += td_errors[i + j] * lambdas[j] * weights[j] * self.configuration.discount_factor ** j

records[i].reward = g
next_value_idx = i + n
previous_value_idx = next_value_idx - 1

# Preprocess trajectory for n-step learning
records[i].reward = g - (self.get_value(records[next_value_idx]) if next_value_idx < len(records) else 0)
records[i].next_state = records[previous_value_idx].next_state
records[i].done = records[next_value_idx].done \
if next_value_idx < len(records) \
else records[previous_value_idx].done

return records

Expand Down
2 changes: 1 addition & 1 deletion diploma_thesis/agents/utils/rl/ddqn.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,6 @@ def estimate_q(self, model: Policy, batch: Record | tensordict.TensorDictBase):

target = self.__get_action_values__(self.target_model, batch.next_state, best_actions)

q = batch.reward + self.return_estimator.discount_factor * target * (1 - batch.done.int())
q = batch.reward.squeeze() + self.return_estimator.discount_factor * target * (1 - batch.done.squeeze().int())

return q
4 changes: 2 additions & 2 deletions diploma_thesis/agents/utils/rl/dqn.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,9 @@ def __train__(self, model: Policy):

def estimate_q(self, model: Policy, batch: Record | tensordict.TensorDictBase):
target = self.__get_action_values__(self.target_model, batch.next_state, None)
target = target.max(dim=1).values
target = target.max(dim=-1).values

q = batch.reward + self.return_estimator.discount_factor * target * (1 - batch.done.int())
q = batch.reward.squeeze() + self.return_estimator.discount_factor * target * (1 - batch.done.squeeze().int())

return q

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,9 @@ dqn_8: &dqn_8
reward: &reward
- kind: 'surrogate_tardiness'
parameters:
span: 196
winq_factor: 0.2
span: 20
critical_level_factor: 64

##############################################################################################

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

template: &template 'djsp'
base_model: &base_model 'configuration/experiments/jsp/djsp_flexible/machine.yml'
marl_model: &marl_model 'configuration/experiments/jsp/djsp_flexible/marl_machine.yml'

graph: &graph
transition_model:
Expand All @@ -13,13 +14,13 @@ graph: &graph
schedule:
kind: 'compressed'

memory: 0
memory: 10
is_machine_set_in_work_center_connected: True
is_work_center_set_in_shop_floor_connected: True

default_mods: &default_mods []
default_mods: &default_mods
# - 'util/infrastructure/mps.yml'
# - 'util/optimizer/grad_norm.yml'
- 'util/optimizer/grad_norm.yml'
# - 'util/rules/all_rules.yml'
# - 'util/infrastructure/compile.yml'

Expand All @@ -33,14 +34,14 @@ dqn_1: &dqn_1
mods:
*default_mods
#
#marl_dqn_1: &marl_dqn_1
# base_path: *base_model
# template: *template
# mod_dirs:
# - 'configuration/mods/machine/mods'
# mods:
# - 'util/agent/multi_agent.yml'
# - *default_mods
marl_dqn_1: &marl_dqn_1
base_path: *marl_model
template: *template
mod_dirs:
- 'configuration/mods/machine/mods'
mods:
- 'util/agent/multi_agent.yml'
- *default_mods
#
#marl_dqn_centralized_1: &centralized_dqn_1
# base_path: *base_model
Expand All @@ -62,15 +63,15 @@ dqn_2: &dqn_2
- 'agent/dqn/ddqn.yml'
- *default_mods
#
#marl_dqn_2: &marl_dqn_2
# base_path: *base_model
# template: *template
# mod_dirs:
# - 'configuration/mods/machine/mods'
# mods:
# - 'agent/dqn/ddqn.yml'
# - 'util/agent/multi_agent.yml'
# - *default_mods
marl_dqn_2: &marl_dqn_2
base_path: *marl_model
template: *template
mod_dirs:
- 'configuration/mods/machine/mods'
mods:
- 'agent/dqn/ddqn.yml'
- 'util/agent/multi_agent.yml'
- *default_mods
#
#marl_dqn_centralized_2: &centralized_dqn_2
# base_path: *base_model
Expand All @@ -93,15 +94,15 @@ dqn_3: &dqn_3
- 'agent/dqn/dueling.yml'
- *default_mods

#marl_dqn_3: &marl_dqn_3
# base_path: *base_model
# template: *template
# mod_dirs:
# - 'configuration/mods/machine/mods'
# mods:
# - 'agent/dqn/dueling.yml'
# - 'util/agent/multi_agent.yml'
# - *default_mods
marl_dqn_3: &marl_dqn_3
base_path: *marl_model
template: *template
mod_dirs:
- 'configuration/mods/machine/mods'
mods:
- 'agent/dqn/dueling.yml'
- 'util/agent/multi_agent.yml'
- *default_mods
#
#marl_dqn_centralized_3: &centralized_dqn_3
# base_path: *base_model
Expand All @@ -122,11 +123,11 @@ dqn_4: &dqn_4
- 'configuration/mods/machine/mods'
mods:
- 'agent/dqn/steps/3.yml'
# - 'agent/dqn/steps/off_policy.yml'
- 'agent/dqn/steps/off_policy.yml'
- *default_mods
#
#marl_dqn_4: &marl_dqn_4
# base_path: *base_model
# base_path: *marl_model
# template: *template
# mod_dirs:
# - 'configuration/mods/machine/mods'
Expand Down Expand Up @@ -158,15 +159,15 @@ dqn_5: &dqn_5
- 'agent/dqn/prioritized.yml'
- *default_mods

#marl_dqn_5: &marl_dqn_5
# base_path: *base_model
# template: *template
# mod_dirs:
# - 'configuration/mods/machine/mods'
# mods:
# - 'agent/dqn/prioritized.yml'
# - 'util/agent/multi_agent.yml'
# - *default_mods
marl_dqn_5: &marl_dqn_5
base_path: *marl_model
template: *template
mod_dirs:
- 'configuration/mods/machine/mods'
mods:
- 'agent/dqn/prioritized.yml'
- 'util/agent/multi_agent.yml'
- *default_mods
#
#marl_dqn_centralized_5: &centralized_dqn_5
# base_path: *base_model
Expand All @@ -190,16 +191,16 @@ dqn_6: &dqn_6
- 'util/action_selector/sample.yml'
- *default_mods

#marl_dqn_6: &marl_dqn_6
# base_path: *base_model
# template: *template
# mod_dirs:
# - 'configuration/mods/machine/mods'
# mods:
# - 'agent/dqn/noisy.yml'
# - 'util/agent/multi_agent.yml'
# - *default_mods
# - 'util/action_selector/sample.yml'
marl_dqn_6: &marl_dqn_6
base_path: *marl_model
template: *template
mod_dirs:
- 'configuration/mods/machine/mods'
mods:
- 'agent/dqn/noisy.yml'
- 'util/agent/multi_agent.yml'
- *default_mods
- 'util/action_selector/sample.yml'
#
#marl_dqn_centralized_6: &centralized_dqn_6
# base_path: *base_model
Expand All @@ -223,26 +224,26 @@ dqn_7: &dqn_7
- 'agent/dqn/ddqn.yml'
- 'agent/dqn/prioritized.yml'
- 'agent/dqn/steps/3.yml'
# - 'agent/dqn/steps/off_policy.yml'
- 'agent/dqn/steps/off_policy.yml'
- 'agent/dqn/dueling.yml'
- 'agent/dqn/noisy.yml'
- *default_mods
- 'util/action_selector/sample.yml'
#
#marl_dqn_7: &marl_dqn_7
# base_path: *base_model
# template: *template
# mod_dirs:
# - 'configuration/mods/machine/mods'
# mods:
# - 'agent/dqn/ddqn.yml'
# - 'agent/dqn/prioritized.yml'
## - 'agent/dqn/steps/3.yml'
## - 'agent/dqn/steps/off_policy.yml'
# - 'agent/dqn/dueling.yml'
# - 'agent/dqn/noisy.yml'
# - 'util/agent/multi_agent.yml'
# - *default_mods
marl_dqn_7: &marl_dqn_7
base_path: *marl_model
template: *template
mod_dirs:
- 'configuration/mods/machine/mods'
mods:
- 'agent/dqn/ddqn.yml'
- 'agent/dqn/prioritized.yml'
- 'agent/dqn/steps/3.yml'
- 'agent/dqn/steps/off_policy.yml'
- 'agent/dqn/dueling.yml'
- 'agent/dqn/noisy.yml'
- 'util/agent/multi_agent.yml'
- *default_mods
# - 'util/action_selector/sample.yml'
#
#marl_dqn_centralized_7: &centralized_dqn_7
Expand Down Expand Up @@ -272,7 +273,7 @@ dqn_8: &dqn_8
- 'agent/dqn/ddqn.yml'
- 'agent/dqn/prioritized.yml'
- 'agent/dqn/steps/3.yml'
# - 'agent/dqn/steps/off_policy.yml'
- 'agent/dqn/steps/off_policy.yml'
- 'agent/dqn/dueling.yml'
- 'agent/dqn/noisy.yml'
- *default_mods
Expand All @@ -283,7 +284,9 @@ dqn_8: &dqn_8
reward: &reward
- kind: 'surrogate_tardiness'
parameters:
span: 196
winq_factor: 0.2
span: 20
critical_level_factor: 64

##############################################################################################

Expand All @@ -293,19 +296,19 @@ long_single_source_run: &long_single_source_run
parameters:
mods:
__inout_factory__:
- [ ['utilization/70.yml'], ['utilization/80.yml'], ['utilization/90.yml'] ]
- [ ['utilization/80.yml'] ]
nested:
parameters:
dispatch:
seed: [ [ 0, 1, 2, 3, 4, 5 ] ]
seed: [ [ 0, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ] ]


###############################################################################################


task:
kind: 'multi_task'
n_workers: 4
n_workers: 2
n_threads: 8
debug: False
store_run_statistics: False
Expand Down Expand Up @@ -380,16 +383,16 @@ task:
- *dqn_1
# - *marl_dqn_1
# - *centralized_dqn_1
- output_dir: '2'
machine_agent:
parameters:
- *dqn_2
# - output_dir: '2'
# machine_agent:
# parameters:
# - *dqn_2
# - *marl_dqn_2
# - *centralized_dqn_2
- output_dir: '3'
machine_agent:
parameters:
- *dqn_3
# - output_dir: '3'
# machine_agent:
# parameters:
# - *dqn_3
# - *marl_dqn_3
# - *centralized_dqn_3
- output_dir: '4'
Expand All @@ -398,29 +401,30 @@ task:
- *dqn_4
# - *marl_dqn_4
# - *centralized_dqn_4
- output_dir: '5'
machine_agent:
parameters:
- *dqn_5
# - output_dir: '5'
# machine_agent:
# parameters:
# - *dqn_5
# - *marl_dqn_5
# - *centralized_dqn_5
- output_dir: '6'
machine_agent:
parameters:
- *dqn_6
# - output_dir: '6'
# machine_agent:
# parameters:
# - *dqn_6
# - *marl_dqn_6
# - *centralized_dqn_6
- output_dir: '7'
machine_agent:
parameters:
- *dqn_7
# - *marl_dqn_7
- *marl_dqn_7
# - *centralized_dqn_7
- output_dir: '8'
machine_agent:
parameters:
- *dqn_8


tape:
machine_reward:
*reward
Expand Down
Loading

0 comments on commit 04fd3b6

Please sign in to comment.