Skip to content

Commit

Permalink
added basic humanoid env changes - ball and observation of ball
Browse files Browse the repository at this point in the history
  • Loading branch information
jacobtho314 committed Nov 29, 2024
1 parent b9a49ca commit 1ba21e0
Show file tree
Hide file tree
Showing 4 changed files with 159 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ class HumanoidEnvCfg(DirectRLEnvCfg):
debug_vis=False,
)


# scene
scene: InteractiveSceneCfg = InteractiveSceneCfg(num_envs=4096, env_spacing=4.0, replicate_physics=True)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,14 @@
from omni.isaac.lab.terrains import TerrainImporterCfg
from omni.isaac.lab.utils import configclass
from omni.isaac.lab.utils.assets import ISAAC_NUCLEUS_DIR
from omni.isaac.lab.assets.rigid_object.rigid_object_cfg import RigidObjectCfg

import omni.isaac.lab_tasks.manager_based.classic.humanoid.mdp as mdp

##
# Scene definition
##


@configclass
class MySceneCfg(InteractiveSceneCfg):
"""Configuration for the terrain scene with a humanoid robot."""
Expand Down Expand Up @@ -90,6 +90,32 @@ class MySceneCfg(InteractiveSceneCfg):
},
)

ball = RigidObjectCfg(
prim_path="{ENV_REGEX_NS}/ball",
spawn=sim_utils.SphereCfg(
radius=0.15, # radius of the sphere in meters
rigid_props=sim_utils.RigidBodyPropertiesCfg(
disable_gravity=False,
),
mass_props=sim_utils.MassPropertiesCfg(
mass=1.0
),
collision_props=sim_utils.CollisionPropertiesCfg(),

physics_material=sim_utils.RigidBodyMaterialCfg(
static_friction=0.5,
dynamic_friction=0.5,
restitution=0.5,
),
),
init_state=RigidObjectCfg.InitialStateCfg(
pos=(2.0, 0.0, 0.4),
rot=(1.0, 0.0, 0.0, 0.0),
lin_vel=(0.0, 0.0, 0.0),
ang_vel=(0.0, 0.0, 0.0),
),
)

# lights
light = AssetBaseCfg(
prim_path="/World/light",
Expand Down Expand Up @@ -174,6 +200,16 @@ class EventCfg:
},
)

reset_ball = EventTerm(
func=mdp.reset_root_state_uniform, # Same function used for object reset in inhand example
mode="reset",
params={
"pose_range": {},
"velocity_range": {},
"asset_cfg": SceneEntityCfg("ball") # Reference to your ball object
},
)


@configclass
class RewardsCfg:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,3 +73,18 @@ def base_angle_to_target(
angle_to_target = torch.atan2(torch.sin(angle_to_target), torch.cos(angle_to_target))

return angle_to_target.unsqueeze(-1)

def ball_position_(env: ManagerBasedEnv, asset_cfg: SceneEntityCfg = SceneEntityCfg("ball")) -> torch.Tensor:
"""Returns the x,y ball position in the simulation world frame.
Args:
env: The environment instance.
asset_cfg: Configuration for the ball entity.
Returns:
torch.Tensor: Ball position in world frame with shape (num_envs, 3).
"""
# extract the used quantities (to enable type-hinting)
ball = env.scene[asset_cfg.name]
# get ball position in world frame
return ball.data.root_pos_w[:, :2]
106 changes: 106 additions & 0 deletions source/standalone/demos/bipeds_rl.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
#!/usr/bin/env python3

"""Train the Isaac Humanoid environment using Stable-Baselines3 PPO."""

import argparse
from omni.isaac.lab.app import AppLauncher
from stable_baselines3 import PPO
from stable_baselines3.common.env_checker import check_env
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.callbacks import EvalCallback
import torch

def parse_args():
"""Parse command line arguments."""
parser = argparse.ArgumentParser(description="Isaac Humanoid SB3 PPO example script")
AppLauncher.add_app_launcher_args(parser)
args = parser.parse_args()
return args

def main():
# Parse arguments
args = parse_args()

# Launch the simulator
launcher = AppLauncher(args)

# Import environment-related modules after simulator is launched
from omni.isaac.lab_tasks.direct.humanoid.humanoid_env import HumanoidEnv, HumanoidEnvCfg
from omni.isaac.lab.scene import InteractiveSceneCfg

# Configure environment
cfg = HumanoidEnvCfg(
# Environment settings
episode_length_s=30.0,
decimation=2,
action_scale=1.0,
action_space=21,
observation_space=75,

# Scene settings
scene=InteractiveSceneCfg(
num_envs=1,
env_spacing=8.0,
replicate_physics=True
),
)

# Create environment
env = HumanoidEnv(cfg)
print("Created Humanoid environment with:")
print(f"\tObservation space: {env.observation_space}")
print(f"\tAction space: {env.action_space}")

# Verify environment compatibility with SB3
check_env(env)

# Wrap environment with DummyVecEnv for SB3 compatibility
vec_env = DummyVecEnv([lambda: env])

# Define PPO model
model = PPO(
policy="MlpPolicy",
env=vec_env,
learning_rate=3e-4,
n_steps=30,
batch_size=64,
n_epochs=10,
gamma=0.99,
gae_lambda=0.95,
clip_range=0.2,
ent_coef=0.01,
vf_coef=0.5,
max_grad_norm=0.5,
verbose=1,
tensorboard_log="./ppo_humanoid_tensorboard/"
)

# Set up evaluation callback
eval_callback = EvalCallback(
vec_env, best_model_save_path="./logs/best_model/",
log_path="./logs/results/", eval_freq=5000,
deterministic=True, render=False
)

# Train the model
print("Starting training...")
model.learn(total_timesteps=1000000, callback=eval_callback)

# Save the trained model
model.save("ppo_humanoid")
print("Model saved as 'ppo_humanoid'.")

# Test the trained model
print("Testing the trained model...")
obs = vec_env.reset()
for _ in range(1000):
action, _states = model.predict(obs, deterministic=True)
obs, rewards, dones, info = vec_env.step(action)
vec_env.render()

# Cleanup
env.close()
launcher.app.close()

if __name__ == "__main__":
main()

0 comments on commit 1ba21e0

Please sign in to comment.