diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/humanoid/humanoid_env.py b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/humanoid/humanoid_env.py index 2a4d330e..c1568d24 100644 --- a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/humanoid/humanoid_env.py +++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/humanoid/humanoid_env.py @@ -44,6 +44,7 @@ class HumanoidEnvCfg(DirectRLEnvCfg): debug_vis=False, ) + # scene scene: InteractiveSceneCfg = InteractiveSceneCfg(num_envs=4096, env_spacing=4.0, replicate_physics=True) diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/manager_based/classic/humanoid/humanoid_env_cfg.py b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/manager_based/classic/humanoid/humanoid_env_cfg.py index e02dd94e..add630cb 100644 --- a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/manager_based/classic/humanoid/humanoid_env_cfg.py +++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/manager_based/classic/humanoid/humanoid_env_cfg.py @@ -17,6 +17,7 @@ from omni.isaac.lab.terrains import TerrainImporterCfg from omni.isaac.lab.utils import configclass from omni.isaac.lab.utils.assets import ISAAC_NUCLEUS_DIR +from omni.isaac.lab.assets.rigid_object.rigid_object_cfg import RigidObjectCfg import omni.isaac.lab_tasks.manager_based.classic.humanoid.mdp as mdp @@ -24,7 +25,6 @@ # Scene definition ## - @configclass class MySceneCfg(InteractiveSceneCfg): """Configuration for the terrain scene with a humanoid robot.""" @@ -90,6 +90,32 @@ class MySceneCfg(InteractiveSceneCfg): }, ) + ball = RigidObjectCfg( + prim_path="{ENV_REGEX_NS}/ball", + spawn=sim_utils.SphereCfg( + radius=0.15, # radius of the sphere in meters + rigid_props=sim_utils.RigidBodyPropertiesCfg( + disable_gravity=False, + ), + mass_props=sim_utils.MassPropertiesCfg( + mass=1.0 + ), + collision_props=sim_utils.CollisionPropertiesCfg(), + + physics_material=sim_utils.RigidBodyMaterialCfg( + static_friction=0.5, + dynamic_friction=0.5, + restitution=0.5, + ), + ), + init_state=RigidObjectCfg.InitialStateCfg( + pos=(2.0, 0.0, 0.4), + rot=(1.0, 0.0, 0.0, 0.0), + lin_vel=(0.0, 0.0, 0.0), + ang_vel=(0.0, 0.0, 0.0), + ), +) + # lights light = AssetBaseCfg( prim_path="/World/light", @@ -174,6 +200,16 @@ class EventCfg: }, ) + reset_ball = EventTerm( + func=mdp.reset_root_state_uniform, # Same function used for object reset in inhand example + mode="reset", + params={ + "pose_range": {}, + "velocity_range": {}, + "asset_cfg": SceneEntityCfg("ball") # Reference to your ball object + }, + ) + @configclass class RewardsCfg: diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/manager_based/classic/humanoid/mdp/observations.py b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/manager_based/classic/humanoid/mdp/observations.py index 1a65c381..fd6723b2 100644 --- a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/manager_based/classic/humanoid/mdp/observations.py +++ b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/manager_based/classic/humanoid/mdp/observations.py @@ -73,3 +73,18 @@ def base_angle_to_target( angle_to_target = torch.atan2(torch.sin(angle_to_target), torch.cos(angle_to_target)) return angle_to_target.unsqueeze(-1) + +def ball_position_(env: ManagerBasedEnv, asset_cfg: SceneEntityCfg = SceneEntityCfg("ball")) -> torch.Tensor: + """Returns the x,y ball position in the simulation world frame. + + Args: + env: The environment instance. + asset_cfg: Configuration for the ball entity. + + Returns: + torch.Tensor: Ball position in world frame with shape (num_envs, 3). + """ + # extract the used quantities (to enable type-hinting) + ball = env.scene[asset_cfg.name] + # get ball position in world frame + return ball.data.root_pos_w[:, :2] diff --git a/source/standalone/demos/bipeds_rl.py b/source/standalone/demos/bipeds_rl.py new file mode 100644 index 00000000..1f1acac3 --- /dev/null +++ b/source/standalone/demos/bipeds_rl.py @@ -0,0 +1,106 @@ +#!/usr/bin/env python3 + +"""Train the Isaac Humanoid environment using Stable-Baselines3 PPO.""" + +import argparse +from omni.isaac.lab.app import AppLauncher +from stable_baselines3 import PPO +from stable_baselines3.common.env_checker import check_env +from stable_baselines3.common.vec_env import DummyVecEnv +from stable_baselines3.common.callbacks import EvalCallback +import torch + +def parse_args(): + """Parse command line arguments.""" + parser = argparse.ArgumentParser(description="Isaac Humanoid SB3 PPO example script") + AppLauncher.add_app_launcher_args(parser) + args = parser.parse_args() + return args + +def main(): + # Parse arguments + args = parse_args() + + # Launch the simulator + launcher = AppLauncher(args) + + # Import environment-related modules after simulator is launched + from omni.isaac.lab_tasks.direct.humanoid.humanoid_env import HumanoidEnv, HumanoidEnvCfg + from omni.isaac.lab.scene import InteractiveSceneCfg + + # Configure environment + cfg = HumanoidEnvCfg( + # Environment settings + episode_length_s=30.0, + decimation=2, + action_scale=1.0, + action_space=21, + observation_space=75, + + # Scene settings + scene=InteractiveSceneCfg( + num_envs=1, + env_spacing=8.0, + replicate_physics=True + ), + ) + + # Create environment + env = HumanoidEnv(cfg) + print("Created Humanoid environment with:") + print(f"\tObservation space: {env.observation_space}") + print(f"\tAction space: {env.action_space}") + + # Verify environment compatibility with SB3 + check_env(env) + + # Wrap environment with DummyVecEnv for SB3 compatibility + vec_env = DummyVecEnv([lambda: env]) + + # Define PPO model + model = PPO( + policy="MlpPolicy", + env=vec_env, + learning_rate=3e-4, + n_steps=30, + batch_size=64, + n_epochs=10, + gamma=0.99, + gae_lambda=0.95, + clip_range=0.2, + ent_coef=0.01, + vf_coef=0.5, + max_grad_norm=0.5, + verbose=1, + tensorboard_log="./ppo_humanoid_tensorboard/" + ) + + # Set up evaluation callback + eval_callback = EvalCallback( + vec_env, best_model_save_path="./logs/best_model/", + log_path="./logs/results/", eval_freq=5000, + deterministic=True, render=False + ) + + # Train the model + print("Starting training...") + model.learn(total_timesteps=1000000, callback=eval_callback) + + # Save the trained model + model.save("ppo_humanoid") + print("Model saved as 'ppo_humanoid'.") + + # Test the trained model + print("Testing the trained model...") + obs = vec_env.reset() + for _ in range(1000): + action, _states = model.predict(obs, deterministic=True) + obs, rewards, dones, info = vec_env.step(action) + vec_env.render() + + # Cleanup + env.close() + launcher.app.close() + +if __name__ == "__main__": + main()