added basic humanoid env changes - ball and observation of ball

jacobtho314 · Nov 29, 2024 · 1ba21e0 · 1ba21e0
1 parent b9a49ca
commit 1ba21e0
Show file tree

Hide file tree

Showing 4 changed files with 159 additions and 1 deletion.
diff --git a/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/humanoid/humanoid_env.py b/source/extensions/omni.isaac.lab_tasks/omni/isaac/lab_tasks/direct/humanoid/humanoid_env.py
@@ -44,6 +44,7 @@ class HumanoidEnvCfg(DirectRLEnvCfg):
         debug_vis=False,
     )
 
+
     # scene
     scene: InteractiveSceneCfg = InteractiveSceneCfg(num_envs=4096, env_spacing=4.0, replicate_physics=True)
 

diff --git a/...i.isaac.lab_tasks/omni/isaac/lab_tasks/manager_based/classic/humanoid/humanoid_env_cfg.py b/...i.isaac.lab_tasks/omni/isaac/lab_tasks/manager_based/classic/humanoid/humanoid_env_cfg.py
@@ -17,14 +17,14 @@
 from omni.isaac.lab.terrains import TerrainImporterCfg
 from omni.isaac.lab.utils import configclass
 from omni.isaac.lab.utils.assets import ISAAC_NUCLEUS_DIR
+from omni.isaac.lab.assets.rigid_object.rigid_object_cfg import RigidObjectCfg
 
 import omni.isaac.lab_tasks.manager_based.classic.humanoid.mdp as mdp
 
 ##
 # Scene definition
 ##
 
-
 @configclass
 class MySceneCfg(InteractiveSceneCfg):
     """Configuration for the terrain scene with a humanoid robot."""
@@ -90,6 +90,32 @@ class MySceneCfg(InteractiveSceneCfg):
         },
     )
 
+    ball = RigidObjectCfg(
+    prim_path="{ENV_REGEX_NS}/ball",
+    spawn=sim_utils.SphereCfg(
+        radius=0.15,  # radius of the sphere in meters
+        rigid_props=sim_utils.RigidBodyPropertiesCfg(
+            disable_gravity=False,
+        ),
+        mass_props=sim_utils.MassPropertiesCfg(
+            mass=1.0
+        ),
+        collision_props=sim_utils.CollisionPropertiesCfg(),
+
+        physics_material=sim_utils.RigidBodyMaterialCfg(
+            static_friction=0.5,
+            dynamic_friction=0.5,
+            restitution=0.5,
+        ),
+    ),
+    init_state=RigidObjectCfg.InitialStateCfg(
+        pos=(2.0, 0.0, 0.4),
+        rot=(1.0, 0.0, 0.0, 0.0),
+        lin_vel=(0.0, 0.0, 0.0),
+        ang_vel=(0.0, 0.0, 0.0),
+    ),
+)
+
     # lights
     light = AssetBaseCfg(
         prim_path="/World/light",
@@ -174,6 +200,16 @@ class EventCfg:
         },
     )
 
+    reset_ball = EventTerm(
+        func=mdp.reset_root_state_uniform,  # Same function used for object reset in inhand example
+        mode="reset",
+        params={
+            "pose_range": {},
+            "velocity_range": {},
+            "asset_cfg": SceneEntityCfg("ball")  # Reference to your ball object
+        },
+    )
+
 
 @configclass
 class RewardsCfg:

diff --git a/...i.isaac.lab_tasks/omni/isaac/lab_tasks/manager_based/classic/humanoid/mdp/observations.py b/...i.isaac.lab_tasks/omni/isaac/lab_tasks/manager_based/classic/humanoid/mdp/observations.py
@@ -73,3 +73,18 @@ def base_angle_to_target(
     angle_to_target = torch.atan2(torch.sin(angle_to_target), torch.cos(angle_to_target))
 
     return angle_to_target.unsqueeze(-1)
+
+def ball_position_(env: ManagerBasedEnv, asset_cfg: SceneEntityCfg = SceneEntityCfg("ball")) -> torch.Tensor:
+    """Returns the x,y ball position in the simulation world frame.
+    
+    Args:
+        env: The environment instance.
+        asset_cfg: Configuration for the ball entity.
+        
+    Returns:
+        torch.Tensor: Ball position in world frame with shape (num_envs, 3).
+    """
+    # extract the used quantities (to enable type-hinting)
+    ball = env.scene[asset_cfg.name]
+    # get ball position in world frame
+    return ball.data.root_pos_w[:, :2]
diff --git a/source/standalone/demos/bipeds_rl.py b/source/standalone/demos/bipeds_rl.py
@@ -0,0 +1,106 @@
+#!/usr/bin/env python3
+
+"""Train the Isaac Humanoid environment using Stable-Baselines3 PPO."""
+
+import argparse
+from omni.isaac.lab.app import AppLauncher
+from stable_baselines3 import PPO
+from stable_baselines3.common.env_checker import check_env
+from stable_baselines3.common.vec_env import DummyVecEnv
+from stable_baselines3.common.callbacks import EvalCallback
+import torch
+
+def parse_args():
+    """Parse command line arguments."""
+    parser = argparse.ArgumentParser(description="Isaac Humanoid SB3 PPO example script")
+    AppLauncher.add_app_launcher_args(parser)
+    args = parser.parse_args()
+    return args
+
+def main():
+    # Parse arguments
+    args = parse_args()
+
+    # Launch the simulator
+    launcher = AppLauncher(args)
+
+    # Import environment-related modules after simulator is launched
+    from omni.isaac.lab_tasks.direct.humanoid.humanoid_env import HumanoidEnv, HumanoidEnvCfg
+    from omni.isaac.lab.scene import InteractiveSceneCfg
+
+    # Configure environment
+    cfg = HumanoidEnvCfg(
+        # Environment settings
+        episode_length_s=30.0,
+        decimation=2,
+        action_scale=1.0,
+        action_space=21,
+        observation_space=75,
+
+        # Scene settings
+        scene=InteractiveSceneCfg(
+            num_envs=1,
+            env_spacing=8.0,
+            replicate_physics=True
+        ),
+    )   
+
+    # Create environment
+    env = HumanoidEnv(cfg)
+    print("Created Humanoid environment with:")
+    print(f"\tObservation space: {env.observation_space}")
+    print(f"\tAction space: {env.action_space}")
+
+    # Verify environment compatibility with SB3
+    check_env(env)
+
+    # Wrap environment with DummyVecEnv for SB3 compatibility
+    vec_env = DummyVecEnv([lambda: env])
+
+    # Define PPO model
+    model = PPO(
+        policy="MlpPolicy",
+        env=vec_env,
+        learning_rate=3e-4,
+        n_steps=30,
+        batch_size=64,
+        n_epochs=10,
+        gamma=0.99,
+        gae_lambda=0.95,
+        clip_range=0.2,
+        ent_coef=0.01,
+        vf_coef=0.5,
+        max_grad_norm=0.5,
+        verbose=1,
+        tensorboard_log="./ppo_humanoid_tensorboard/"
+    )
+
+    # Set up evaluation callback
+    eval_callback = EvalCallback(
+        vec_env, best_model_save_path="./logs/best_model/",
+        log_path="./logs/results/", eval_freq=5000,
+        deterministic=True, render=False
+    )
+
+    # Train the model
+    print("Starting training...")
+    model.learn(total_timesteps=1000000, callback=eval_callback)
+
+    # Save the trained model
+    model.save("ppo_humanoid")
+    print("Model saved as 'ppo_humanoid'.")
+
+    # Test the trained model
+    print("Testing the trained model...")
+    obs = vec_env.reset()
+    for _ in range(1000):
+        action, _states = model.predict(obs, deterministic=True)
+        obs, rewards, dones, info = vec_env.step(action)
+        vec_env.render()
+
+    # Cleanup
+    env.close()
+    launcher.app.close()
+
+if __name__ == "__main__":
+    main()
-Original file line number
+Diff line change
@@ Expand Up / @@ -44,6 +44,7 @@ class HumanoidEnvCfg(DirectRLEnvCfg): @@
             debug_vis=False,
         )
         # scene
         scene: InteractiveSceneCfg = InteractiveSceneCfg(num_envs=4096, env_spacing=4.0, replicate_physics=True)
@@ Expand Down @@