From 756245edc429a46a28e6a0540e826f23e3e90fc2 Mon Sep 17 00:00:00 2001 From: Fangzhou Yu Date: Mon, 7 Oct 2024 22:33:23 -0400 Subject: [PATCH 1/5] edit loss param --- .../velocity/config/anymal_d/__init__.py | 8 +- .../config/anymal_d/agents/torchrl_ppo_cfg.py | 181 ++++++++++++++++++ 2 files changed, 188 insertions(+), 1 deletion(-) create mode 100644 source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/manager_based/locomotion/velocity/config/anymal_d/agents/torchrl_ppo_cfg.py diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/manager_based/locomotion/velocity/config/anymal_d/__init__.py b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/manager_based/locomotion/velocity/config/anymal_d/__init__.py index 0f6b8a4776..d24059c2fd 100644 --- a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/manager_based/locomotion/velocity/config/anymal_d/__init__.py +++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/manager_based/locomotion/velocity/config/anymal_d/__init__.py @@ -19,6 +19,8 @@ "env_cfg_entry_point": flat_env_cfg.AnymalDFlatEnvCfg, "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:AnymalDFlatPPORunnerCfg", "skrl_cfg_entry_point": f"{agents.__name__}:skrl_flat_ppo_cfg.yaml", + "torchrl_cfg_entry_point": f"{agents.__name__}.torchrl_ppo_cfg:AnymalDFlatPPORunnerCfg", + }, ) @@ -30,6 +32,8 @@ "env_cfg_entry_point": flat_env_cfg.AnymalDFlatEnvCfg_PLAY, "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:AnymalDFlatPPORunnerCfg", "skrl_cfg_entry_point": f"{agents.__name__}:skrl_flat_ppo_cfg.yaml", + "torchrl_cfg_entry_point": f"{agents.__name__}.torchrl_ppo_cfg:AnymalDFlatPPORunnerCfg", + }, ) @@ -41,6 +45,8 @@ "env_cfg_entry_point": rough_env_cfg.AnymalDRoughEnvCfg, "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:AnymalDRoughPPORunnerCfg", "skrl_cfg_entry_point": f"{agents.__name__}:skrl_rough_ppo_cfg.yaml", + "torchrl_cfg_entry_point": f"{agents.__name__}.torchrl_ppo_cfg:AnymalDRoughPPORunnerCfg", + }, ) @@ -51,6 +57,6 @@ kwargs={ "env_cfg_entry_point": rough_env_cfg.AnymalDRoughEnvCfg_PLAY, "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:AnymalDRoughPPORunnerCfg", - "skrl_cfg_entry_point": f"{agents.__name__}:skrl_rough_ppo_cfg.yaml", + "torchrl_cfg_entry_point": f"{agents.__name__}.torchrl_ppo_cfg:AnymalDRoughPPORunnerCfg", }, ) diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/manager_based/locomotion/velocity/config/anymal_d/agents/torchrl_ppo_cfg.py b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/manager_based/locomotion/velocity/config/anymal_d/agents/torchrl_ppo_cfg.py new file mode 100644 index 0000000000..0789cb5f32 --- /dev/null +++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/manager_based/locomotion/velocity/config/anymal_d/agents/torchrl_ppo_cfg.py @@ -0,0 +1,181 @@ + +from dataclasses import MISSING + +from omni.isaac.lab.utils import configclass +import torch.nn as nn + +from omni.isaac.lab_tasks.utils.wrappers.torchrl.torchrl_ppo_runner_cfg import ( + ProbabilisticActorCfg, + ValueOperatorCfg, + ClipPPOLossCfg, + CollectorCfg, + OnPolicyPPORunnerCfg) + +class AnymalDActorNN(nn.Module): + def __init__(self): + super().__init__() + self.model = nn.Sequential( + nn.Linear(in_features=48, out_features=512, bias=True), + nn.ELU(alpha=1.0), + nn.Linear(in_features=512, out_features=256, bias=True), + nn.ELU(alpha=1.0), + nn.Linear(in_features=256, out_features=128, bias=True), + nn.ELU(alpha=1.0), + nn.Linear(in_features=128, out_features=12*2, bias=True) + ) + + def forward(self, x): + return self.model(x) + +class AnymalDCriticNN(nn.Module): + def __init__(self): + super().__init__() + self.model = nn.Sequential( + nn.Linear(in_features=48, out_features=512, bias=True), + nn.ELU(alpha=1.0), + nn.Linear(in_features=512, out_features=256, bias=True), + nn.ELU(alpha=1.0), + nn.Linear(in_features=256, out_features=128, bias=True), + nn.ELU(alpha=1.0), + nn.Linear(in_features=128, out_features=1, bias=True) + ) + + def forward(self, x): + return self.model(x) + +@configclass +class AnymalDActorModule(ProbabilisticActorCfg): + + actor_network = AnymalDActorNN + + init_noise_std = 1.0 + + in_keys = ["policy"] + + out_keys: list[str] = ["loc", "scale"] + + +@configclass +class AnymalDCriticModule(ValueOperatorCfg): + + critic_network = AnymalDCriticNN + + in_keys = ["policy"] + + out_keys = ["state_value"] + + + +""" +Collector Module Definition +""" + +@configclass +class AnymalDCollectorModule(CollectorCfg): + + actor_network = AnymalDActorModule() + + split_trajs = False + + + +""" +Loss Module Definition +""" + +@configclass +class AnymalDPPOLossModule(ClipPPOLossCfg): + + actor_network = AnymalDActorModule() + + value_network = AnymalDCriticModule() + + value_key = "state_value" + + desired_kl = 0.0012 + + beta = 1.0 + + decrement = 0.5 + + increment = 2.0 + + value_loss_coef = 0.5 + + clip_param = 0.2 + + entropy_coef = 0.02 + + entropy_bonus = True + + loss_critic_type = "l2" + + normalize_advantage = True + + learning_rate = 1e-3 + + gamma = 0.99 + + lam = 0.95 + + max_grad_norm = 1.0 + + +""" +Trainer Module Definition +""" + +@configclass +class AnymalDPPORunnerCfg(OnPolicyPPORunnerCfg): + + loss_module = AnymalDPPOLossModule() + + collector_module = AnymalDCollectorModule() + + seed = 42 + + num_steps_per_env = 24 + + num_epochs = 5 + + num_mini_batches = 4 + + lr_schedule = "adaptive" + + max_iterations = 25000 + + save_interval = 50 + + save_trainer_interval = 100 + + experiment_name = MISSING + + wandb_project = MISSING + + logger = "wandb" + + +@configclass +class AnymalDFlatPPORunnerCfg(AnymalDPPORunnerCfg): + def __post_init__(self): + """Post initialization.""" + + # change experiment name + self.experiment_name = "anymal_d_flat" + + # change wandb project + self.wandb_project = "anymal_d_flat" + + +@configclass +class AnymalDRoughPPORunnerCfg(AnymalDPPORunnerCfg): + def __post_init__(self): + """Post initialization.""" + + # change experiment name + self.experiment_name = "anymal_d_rough" + + # change wandb project + self.wandb_project = "anymal_d_rough" + + From 1dcb99e5a95f56c4aeba1210b89269c821c19345 Mon Sep 17 00:00:00 2001 From: Fangzhou Yu Date: Mon, 7 Oct 2024 22:35:52 -0400 Subject: [PATCH 2/5] run formatter --- .../velocity/config/anymal_d/__init__.py | 3 -- .../config/anymal_d/agents/torchrl_ppo_cfg.py | 47 +++++++++++-------- 2 files changed, 27 insertions(+), 23 deletions(-) diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/manager_based/locomotion/velocity/config/anymal_d/__init__.py b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/manager_based/locomotion/velocity/config/anymal_d/__init__.py index d24059c2fd..74ae92d80f 100644 --- a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/manager_based/locomotion/velocity/config/anymal_d/__init__.py +++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/manager_based/locomotion/velocity/config/anymal_d/__init__.py @@ -20,7 +20,6 @@ "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:AnymalDFlatPPORunnerCfg", "skrl_cfg_entry_point": f"{agents.__name__}:skrl_flat_ppo_cfg.yaml", "torchrl_cfg_entry_point": f"{agents.__name__}.torchrl_ppo_cfg:AnymalDFlatPPORunnerCfg", - }, ) @@ -33,7 +32,6 @@ "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:AnymalDFlatPPORunnerCfg", "skrl_cfg_entry_point": f"{agents.__name__}:skrl_flat_ppo_cfg.yaml", "torchrl_cfg_entry_point": f"{agents.__name__}.torchrl_ppo_cfg:AnymalDFlatPPORunnerCfg", - }, ) @@ -46,7 +44,6 @@ "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:AnymalDRoughPPORunnerCfg", "skrl_cfg_entry_point": f"{agents.__name__}:skrl_rough_ppo_cfg.yaml", "torchrl_cfg_entry_point": f"{agents.__name__}.torchrl_ppo_cfg:AnymalDRoughPPORunnerCfg", - }, ) diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/manager_based/locomotion/velocity/config/anymal_d/agents/torchrl_ppo_cfg.py b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/manager_based/locomotion/velocity/config/anymal_d/agents/torchrl_ppo_cfg.py index 0789cb5f32..3fe3093722 100644 --- a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/manager_based/locomotion/velocity/config/anymal_d/agents/torchrl_ppo_cfg.py +++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/manager_based/locomotion/velocity/config/anymal_d/agents/torchrl_ppo_cfg.py @@ -1,15 +1,21 @@ +# Copyright (c) 2022-2024, The Isaac Lab Project Developers. +# All rights reserved. +# +# SPDX-License-Identifier: BSD-3-Clause +import torch.nn as nn from dataclasses import MISSING from omni.isaac.lab.utils import configclass -import torch.nn as nn from omni.isaac.lab_tasks.utils.wrappers.torchrl.torchrl_ppo_runner_cfg import ( - ProbabilisticActorCfg, - ValueOperatorCfg, - ClipPPOLossCfg, - CollectorCfg, - OnPolicyPPORunnerCfg) + ClipPPOLossCfg, + CollectorCfg, + OnPolicyPPORunnerCfg, + ProbabilisticActorCfg, + ValueOperatorCfg, +) + class AnymalDActorNN(nn.Module): def __init__(self): @@ -21,12 +27,13 @@ def __init__(self): nn.ELU(alpha=1.0), nn.Linear(in_features=256, out_features=128, bias=True), nn.ELU(alpha=1.0), - nn.Linear(in_features=128, out_features=12*2, bias=True) + nn.Linear(in_features=128, out_features=12 * 2, bias=True), ) def forward(self, x): return self.model(x) + class AnymalDCriticNN(nn.Module): def __init__(self): super().__init__() @@ -37,12 +44,13 @@ def __init__(self): nn.ELU(alpha=1.0), nn.Linear(in_features=256, out_features=128, bias=True), nn.ELU(alpha=1.0), - nn.Linear(in_features=128, out_features=1, bias=True) + nn.Linear(in_features=128, out_features=1, bias=True), ) def forward(self, x): return self.model(x) - + + @configclass class AnymalDActorModule(ProbabilisticActorCfg): @@ -62,30 +70,30 @@ class AnymalDCriticModule(ValueOperatorCfg): in_keys = ["policy"] - out_keys = ["state_value"] - + out_keys = ["state_value"] """ Collector Module Definition """ + @configclass class AnymalDCollectorModule(CollectorCfg): actor_network = AnymalDActorModule() - split_trajs = False - + split_trajs = False """ Loss Module Definition """ + @configclass class AnymalDPPOLossModule(ClipPPOLossCfg): - + actor_network = AnymalDActorModule() value_network = AnymalDCriticModule() @@ -100,9 +108,9 @@ class AnymalDPPOLossModule(ClipPPOLossCfg): increment = 2.0 - value_loss_coef = 0.5 + value_loss_coef = 0.5 - clip_param = 0.2 + clip_param = 0.2 entropy_coef = 0.02 @@ -125,9 +133,10 @@ class AnymalDPPOLossModule(ClipPPOLossCfg): Trainer Module Definition """ + @configclass class AnymalDPPORunnerCfg(OnPolicyPPORunnerCfg): - + loss_module = AnymalDPPOLossModule() collector_module = AnymalDCollectorModule() @@ -147,7 +156,7 @@ class AnymalDPPORunnerCfg(OnPolicyPPORunnerCfg): save_interval = 50 save_trainer_interval = 100 - + experiment_name = MISSING wandb_project = MISSING @@ -177,5 +186,3 @@ def __post_init__(self): # change wandb project self.wandb_project = "anymal_d_rough" - - From 04b32aa56368eac5853ed830be1f3c69515464f8 Mon Sep 17 00:00:00 2001 From: Fangzhou Yu Date: Mon, 7 Oct 2024 22:40:30 -0400 Subject: [PATCH 3/5] update changelog --- source/extensions/omni.isaac.lab/docs/CHANGELOG.rst | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/source/extensions/omni.isaac.lab/docs/CHANGELOG.rst b/source/extensions/omni.isaac.lab/docs/CHANGELOG.rst index 2e67c3708c..fec38fb9b4 100644 --- a/source/extensions/omni.isaac.lab/docs/CHANGELOG.rst +++ b/source/extensions/omni.isaac.lab/docs/CHANGELOG.rst @@ -1,6 +1,15 @@ Changelog --------- +0.24.20 (2024-10-07) +~~~~~~~~~~~~~~~~~~~~ + +Added +^^^^^ + +* Added torchrl ppo training configuration to Anymal-D velocity environment + + 0.24.19 (2024-10-05) ~~~~~~~~~~~~~~~~~~~~ From ac27d97473d9d76515ff3276ccce9d92d24a1d84 Mon Sep 17 00:00:00 2001 From: Fangzhou Yu Date: Mon, 7 Oct 2024 22:40:54 -0400 Subject: [PATCH 4/5] run formatter --- source/extensions/omni.isaac.lab/docs/CHANGELOG.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/extensions/omni.isaac.lab/docs/CHANGELOG.rst b/source/extensions/omni.isaac.lab/docs/CHANGELOG.rst index fec38fb9b4..e517d4a43c 100644 --- a/source/extensions/omni.isaac.lab/docs/CHANGELOG.rst +++ b/source/extensions/omni.isaac.lab/docs/CHANGELOG.rst @@ -7,7 +7,7 @@ Changelog Added ^^^^^ -* Added torchrl ppo training configuration to Anymal-D velocity environment +* Added torchrl ppo training configuration to Anymal-D velocity environment 0.24.19 (2024-10-05) From 8bed3878509eb2347c7185443614363f801063ea Mon Sep 17 00:00:00 2001 From: Fangzhou Yu Date: Tue, 8 Oct 2024 11:45:38 -0400 Subject: [PATCH 5/5] fix skrl registration --- .../locomotion/velocity/config/anymal_d/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/manager_based/locomotion/velocity/config/anymal_d/__init__.py b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/manager_based/locomotion/velocity/config/anymal_d/__init__.py index 74ae92d80f..a082cc8a2d 100644 --- a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/manager_based/locomotion/velocity/config/anymal_d/__init__.py +++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/manager_based/locomotion/velocity/config/anymal_d/__init__.py @@ -54,6 +54,7 @@ kwargs={ "env_cfg_entry_point": rough_env_cfg.AnymalDRoughEnvCfg_PLAY, "rsl_rl_cfg_entry_point": f"{agents.__name__}.rsl_rl_ppo_cfg:AnymalDRoughPPORunnerCfg", + "skrl_cfg_entry_point": f"{agents.__name__}:skrl_rough_ppo_cfg.yaml", "torchrl_cfg_entry_point": f"{agents.__name__}.torchrl_ppo_cfg:AnymalDRoughPPORunnerCfg", }, )