Source code for rofunc.learning.RofuncRL.tasks.isaacgymenv.hands.bi_qbhand_synergy_grasp

import os
import random

import torch
from PIL import Image as Im
from isaacgym import gymapi
from isaacgym import gymtorch

from rofunc.learning.RofuncRL.tasks.isaacgymenv.base.vec_task import VecTask
from rofunc.learning.RofuncRL.tasks.utils.torch_jit_utils import *
from rofunc.utils.logger.beauty_logger import beauty_print
from rofunc.utils.oslab import get_rofunc_path


[docs]class BiQbSoftHandSynergyGraspTask(VecTask):
    """
    This class corresponds to the GraspAndPlace task. This environment consists of dual-hands, an
    object and a bucket that requires us to pick up the object and put it into the bucket.
    """

    def __init__(self, cfg, rl_device, sim_device, graphics_device_id, headless, virtual_screen_capture,
                 force_render, agent_index=[[[0, 1, 2, 3, 4, 5]], [[0, 1, 2, 3, 4, 5]]], is_multi_agent=False):
        self.cfg = cfg
        self.agent_index = agent_index

        self.is_multi_agent = is_multi_agent

        self.randomize = self.cfg["task"]["randomize"]
        self.randomization_params = self.cfg["task"]["randomization_params"]
        self.aggregate_mode = self.cfg["env"]["aggregateMode"]

        self.dist_reward_scale = self.cfg["env"]["distRewardScale"]
        self.rot_reward_scale = self.cfg["env"]["rotRewardScale"]
        self.action_penalty_scale = self.cfg["env"]["actionPenaltyScale"]
        self.success_tolerance = self.cfg["env"]["successTolerance"]
        self.reach_goal_bonus = self.cfg["env"]["reachGoalBonus"]
        self.fall_dist = self.cfg["env"]["fallDistance"]
        self.fall_penalty = self.cfg["env"]["fallPenalty"]
        self.rot_eps = self.cfg["env"]["rotEps"]

        self.vel_obs_scale = 0.2  # scale factor of velocity based observations
        self.force_torque_obs_scale = 10.0  # scale factor of velocity based observations

        self.reset_position_noise = self.cfg["env"]["resetPositionNoise"]
        self.reset_rotation_noise = self.cfg["env"]["resetRotationNoise"]
        self.reset_dof_pos_noise = self.cfg["env"]["resetDofPosRandomInterval"]
        self.reset_dof_vel_noise = self.cfg["env"]["resetDofVelRandomInterval"]

        self.shadow_hand_dof_speed_scale = self.cfg["env"]["dofSpeedScale"]
        self.use_relative_control = self.cfg["env"]["useRelativeControl"]
        self.act_moving_average = self.cfg["env"]["actionsMovingAverage"]

        self.debug_viz = self.cfg["env"]["enableDebugVis"]

        self.max_episode_length = self.cfg["env"]["episodeLength"]
        self.reset_time = self.cfg["env"].get("resetTime", -1.0)
        self.print_success_stat = self.cfg["env"]["printNumSuccesses"]
        self.max_consecutive_successes = self.cfg["env"]["maxConsecutiveSuccesses"]
        self.av_factor = self.cfg["env"].get("averFactor", 0.01)
        print("Averaging factor: ", self.av_factor)

        self.transition_scale = self.cfg["env"]["transition_scale"]
        self.orientation_scale = self.cfg["env"]["orientation_scale"]

        control_freq_inv = self.cfg["env"].get("controlFrequencyInv", 1)
        if self.reset_time > 0.0:
            self.max_episode_length = int(round(self.reset_time / (control_freq_inv * self.sim_params.dt)))
            print("Reset time: ", self.reset_time)
            print("New episode length: ", self.max_episode_length)

        self.object_type = self.cfg["env"]["objectType"]
        # assert self.object_type in ["block", "egg", "pen"]

        self.ignore_z = (self.object_type == "pen")

        self.asset_files_dict = {
            "block": "urdf/objects/cube_multicolor.urdf",
            "egg": "mjcf/open_ai_assets/hand/egg.xml",
            "pen": "mjcf/open_ai_assets/hand/pen.xml",
            # "pot": "mjcf/pot.xml",
            "pot": "mjcf/bucket/100454/mobility.urdf"
        }

        if "asset" in self.cfg["env"]:
            self.asset_files_dict["block"] = self.cfg["env"]["asset"].get("assetFileNameBlock",
                                                                          self.asset_files_dict["block"])
            self.asset_files_dict["egg"] = self.cfg["env"]["asset"].get("assetFileNameEgg",
                                                                        self.asset_files_dict["egg"])
            self.asset_files_dict["pen"] = self.cfg["env"]["asset"].get("assetFileNamePen",
                                                                        self.asset_files_dict["pen"])

        # can be "openai", "full_no_vel", "full", "full_state"
        self.obs_type = self.cfg["env"]["observationType"]

        if not (self.obs_type in ["point_cloud", "full_state"]):
            raise Exception(
                "Unknown type of observations!\nobservationType should be one of: [point_cloud, full_state]")

        print("Obs type:", self.obs_type)

        # <editor-fold desc="obs space">
        self.num_point_cloud_feature_dim = 768

        self.num_hand_obs = 33 * 3 + 95 + 21 + 6
        num = 27 + self.num_hand_obs * 2
        self.num_obs_dict = {
            "point_cloud": num + self.num_point_cloud_feature_dim * 3,
            "point_cloud_for_distill": num + self.num_point_cloud_feature_dim * 3,
            "full_state": num
        }
        self.up_axis = 'z'
        # </editor-fold>

        self.right_fingertips = ["qbhand_thumb_distal_link", "qbhand_index_distal_link",
                                 "qbhand_middle_distal_link", "qbhand_ring_distal_link",
                                 "qbhand_little_distal_link"]
        self.left_fingertips = ["qbhand_thumb_distal_link", "qbhand_index_distal_link",
                                "qbhand_middle_distal_link", "qbhand_ring_distal_link",
                                "qbhand_little_distal_link"]
        # self.right_fingertips = ["right_qbhand_thumb_distal_link", "right_qbhand_index_distal_link",
        #                          "right_qbhand_middle_distal_link", "right_qbhand_ring_distal_link",
        #                          "right_qbhand_little_distal_link"]
        # self.left_fingertips = ["left_qbhand_thumb_distal_link", "left_qbhand_index_distal_link",
        #                         "left_qbhand_middle_distal_link", "left_qbhand_ring_distal_link",
        #                         "left_qbhand_little_distal_link"]

        self.num_fingertips = len(self.right_fingertips) * 2

        self.use_vel_obs = False
        self.fingertip_obs = True
        self.asymmetric_obs = self.cfg["env"]["asymmetric_observations"]

        num_states = 0
        if self.asymmetric_obs:
            num_states = 211

        self.cfg["env"]["numObservations"] = self.num_obs_dict[self.obs_type]
        self.cfg["env"]["numStates"] = num_states

        self.num_agents = 1
        self.cfg["env"]["numActions"] = (2 + 6) * 2  # 2-dim synergy for controlling each hand
        self.num_action = self.cfg["env"]["numActions"]

        super().__init__(cfg, rl_device, sim_device, graphics_device_id, headless, virtual_screen_capture, force_render)

        # <editor-fold desc="dof reduction">
        actor_joint_dict = self.gym.get_actor_joint_dict(self.envs[0], 0)
        self.right_useful_joint_index = sorted(
            [value for key, value in actor_joint_dict.items() if
             ("virtual" not in key) and ("index_knuckle" not in key) and ("middle_knuckle" not in key) and
             ("ring_knuckle" not in key) and ("little_knuckle" not in key)])
        self.left_useful_joint_index = [i + self.num_hand_dofs for i in self.right_useful_joint_index]
        self.right_real_virtual_joint_index_map_dict = {value: actor_joint_dict[key.replace("_virtual", "")] for
                                                        key, value in actor_joint_dict.items() if "virtual" in key}
        self.left_real_virtual_joint_index_map_dict = {key + self.num_hand_dofs: value + self.num_hand_dofs for
                                                       key, value in
                                                       self.right_real_virtual_joint_index_map_dict.items()}
        # </editor-fold>
        actor_rigid_body_dict = self.gym.get_actor_rigid_body_dict(self.envs[0], 0)
        self.right_fingertips_index = sorted([actor_rigid_body_dict[fingertip] for fingertip in self.right_fingertips])
        self.left_fingertips_index = sorted(
            [actor_rigid_body_dict[fingertip] + self.num_hand_dofs for fingertip in self.right_fingertips])

        if self.obs_type in ["point_cloud"]:
            from PIL import Image as Im
            # from pointnet2_ops import pointnet2_utils

        self.camera_debug = self.cfg["env"].get("cameraDebug", False)
        self.point_cloud_debug = self.cfg["env"].get("pointCloudDebug", False)

        if self.viewer != None:
            cam_pos = gymapi.Vec3(10.0, 5.0, 1.0)
            cam_target = gymapi.Vec3(6.0, 5.0, 0.0)
            self.gym.viewer_camera_look_at(self.viewer, None, cam_pos, cam_target)

        # get gym GPU state tensors
        actor_root_state_tensor = self.gym.acquire_actor_root_state_tensor(self.sim)
        dof_state_tensor = self.gym.acquire_dof_state_tensor(self.sim)
        rigid_body_tensor = self.gym.acquire_rigid_body_state_tensor(
            self.sim)  # 38 = self.num_hand_bodies * 2 + 2 + 1 + 2 + 1

        sensor_tensor = self.gym.acquire_force_sensor_tensor(self.sim)
        self.vec_sensor_tensor = gymtorch.wrap_tensor(sensor_tensor).view(self.num_envs, self.num_fingertips * 6)

        dof_force_tensor = self.gym.acquire_dof_force_tensor(self.sim)
        self.dof_force_tensor = gymtorch.wrap_tensor(dof_force_tensor).view(self.num_envs,
                                                                            self.num_hand_dofs * 2 + self.num_object_dofs * 2)

        self.gym.refresh_actor_root_state_tensor(self.sim)
        self.gym.refresh_dof_state_tensor(self.sim)
        self.gym.refresh_rigid_body_state_tensor(self.sim)

        # create some wrapper tensors for different slices
        self.shadow_hand_default_dof_pos = torch.zeros(self.num_hand_dofs, dtype=torch.float, device=self.device)
        # self.shadow_hand_default_dof_pos = to_torch([0.0, 0.0, -0,  -0,  -0,  -0, -0, -0,
        #                                     -0,  -0, -0,  -0,  -0,  -0, -0, -0,
        #                                     -0,  -0, -0,  -1.04,  1.2,  0., 0, -1.57], dtype=torch.float, device=self.device)

        self.dof_state = gymtorch.wrap_tensor(dof_state_tensor)
        self.right_hand_dof_state = self.dof_state.view(self.num_envs, -1, 2)[:, :self.num_hand_dofs]
        self.right_hand_dof_pos = self.right_hand_dof_state[..., 0]
        self.right_hand_dof_vel = self.right_hand_dof_state[..., 1]

        self.left_hand_dof_state = self.dof_state.view(self.num_envs, -1, 2)[:,
                                   self.num_hand_dofs:self.num_hand_dofs * 2]
        self.left_hand_dof_pos = self.left_hand_dof_state[..., 0]
        self.left_hand_dof_vel = self.left_hand_dof_state[..., 1]

        self.object_dof_state = self.dof_state.view(self.num_envs, -1, 2)[:,
                                self.num_hand_dofs * 2:self.num_hand_dofs * 2 + self.num_object_dofs]
        self.object_dof_pos = self.object_dof_state[..., 0]

        self.rigid_body_states = gymtorch.wrap_tensor(rigid_body_tensor).view(self.num_envs, -1, 13)
        self.num_bodies = self.rigid_body_states.shape[1]

        self.root_state_tensor = gymtorch.wrap_tensor(actor_root_state_tensor).view(-1, 13)
        self.hand_positions = self.root_state_tensor[:, 0:3]
        self.hand_orientations = self.root_state_tensor[:, 3:7]
        self.hand_linvels = self.root_state_tensor[:, 7:10]
        self.hand_angvels = self.root_state_tensor[:, 10:13]
        self.saved_root_tensor = self.root_state_tensor.clone()

        self.num_dofs = self.gym.get_sim_dof_count(self.sim) // self.num_envs
        self.prev_targets = torch.zeros((self.num_envs, self.num_dofs), dtype=torch.float, device=self.device)
        self.cur_targets = torch.zeros((self.num_envs, self.num_dofs), dtype=torch.float, device=self.device)

        self.global_indices = torch.arange(self.num_envs * 3, dtype=torch.int32, device=self.device).view(self.num_envs,
                                                                                                          -1)
        self.x_unit_tensor = to_torch([1, 0, 0], dtype=torch.float, device=self.device).repeat((self.num_envs, 1))
        self.y_unit_tensor = to_torch([0, 1, 0], dtype=torch.float, device=self.device).repeat((self.num_envs, 1))
        self.z_unit_tensor = to_torch([0, 0, 1], dtype=torch.float, device=self.device).repeat((self.num_envs, 1))

        self.reset_goal_buf = self.reset_buf.clone()
        self.successes = torch.zeros(self.num_envs, dtype=torch.float, device=self.device)
        self.consecutive_successes = torch.zeros(1, dtype=torch.float, device=self.device)

        self.av_factor = to_torch(self.av_factor, dtype=torch.float, device=self.device)
        self.apply_forces = torch.zeros((self.num_envs, self.num_bodies, 3), device=self.device, dtype=torch.float)
        self.apply_torque = torch.zeros((self.num_envs, self.num_bodies, 3), device=self.device, dtype=torch.float)

        self.total_successes = 0
        self.total_resets = 0

[docs]    def create_sim(self):
        """
        Allocates which device will simulate and which device will render the scene. Defines the simulation type to be used
        """
        self.dt = self.sim_params.dt
        self.up_axis_idx = self.set_sim_params_up_axis(self.sim_params, self.up_axis)

        self.sim = super().create_sim(self.device_id, self.graphics_device_id, self.physics_engine, self.sim_params)
        self._create_ground_plane()
        self._create_envs(self.num_envs, self.cfg["env"]['envSpacing'], int(np.sqrt(self.num_envs)))

    def _create_ground_plane(self):
        """
        Adds ground plane to simulation
        """
        plane_params = gymapi.PlaneParams()
        plane_params.normal = gymapi.Vec3(0.0, 0.0, 1.0)
        self.gym.add_ground(self.sim, plane_params)

    def _create_envs(self, num_envs, spacing, num_per_row):
        """
        Create multiple parallel isaacgym environments

        :param num_envs: The total number of environment
        :param spacing: Specifies half the side length of the square area occupied by each environment
        :param num_per_row: Specify how many environments in a row
        :return:
        """
        lower = gymapi.Vec3(-spacing, -spacing, 0.0)
        upper = gymapi.Vec3(spacing, spacing, spacing)

        # get rofunc path from rofunc package metadata
        rofunc_path = get_rofunc_path()
        asset_root = os.path.join(rofunc_path, "simulator/assets")
        # right_hand_asset_file = "mjcf/qbhand_no_virtual_right.xml"
        # left_hand_asset_file = "mjcf/qbhand_no_virtual_left.xml"
        right_hand_asset_file = "mjcf/qbhand_full_right.xml"
        left_hand_asset_file = "mjcf/qbhand_full_left.xml"
        # right_hand_asset_file = "mjcf/qbhand_free_joint_right.xml"
        # left_hand_asset_file = "mjcf/qbhand_free_joint_left.xml"
        # right_hand_asset_file = "urdf/curi/urdf/qbhand_right.urdf"
        # left_hand_asset_file = "urdf/curi/urdf/qbhand_left.urdf"
        table_texture_files = os.path.join(asset_root, "textures/texture_stone_stone_texture_0.jpg")
        table_texture_handle = self.gym.create_texture_from_file(self.sim, table_texture_files)

        if "asset" in self.cfg["env"]:
            asset_root = self.cfg["env"]["asset"].get("assetRoot", asset_root)

        object_asset_file = self.asset_files_dict[self.object_type]

        # <editor-fold desc="load hand asset and set hand dof properties">
        asset_options = gymapi.AssetOptions()
        asset_options.flip_visual_attachments = False
        asset_options.fix_base_link = False
        asset_options.collapse_fixed_joints = True
        asset_options.disable_gravity = True
        asset_options.thickness = 0.001
        asset_options.angular_damping = 100
        asset_options.linear_damping = 100
        if self.physics_engine == gymapi.SIM_PHYSX:
            asset_options.use_physx_armature = True
        asset_options.default_dof_drive_mode = gymapi.DOF_MODE_NONE
        right_hand_asset = self.gym.load_asset(self.sim, asset_root, right_hand_asset_file, asset_options)
        left_hand_asset = self.gym.load_asset(self.sim, asset_root, left_hand_asset_file, asset_options)

        # print asset info
        self.num_hand_bodies = self.gym.get_asset_rigid_body_count(right_hand_asset)
        self.num_shadow_hand_shapes = self.gym.get_asset_rigid_shape_count(right_hand_asset)
        self.num_hand_dofs = self.gym.get_asset_dof_count(right_hand_asset)
        self.num_shadow_hand_actuators = self.gym.get_asset_actuator_count(right_hand_asset)
        self.num_shadow_hand_tendons = self.gym.get_asset_tendon_count(right_hand_asset)
        beauty_print(f"self.num_hand_bodies: {self.num_hand_bodies}")
        beauty_print(f"self.num_shadow_hand_shapes: {self.num_shadow_hand_shapes}")
        beauty_print(f"self.num_hand_dofs: {self.num_hand_dofs}")
        beauty_print(f"self.num_shadow_hand_actuators: {self.num_shadow_hand_actuators}")
        beauty_print(f"self.num_shadow_hand_tendons: {self.num_shadow_hand_tendons}")

        actuated_dof_names = [self.gym.get_asset_actuator_joint_name(right_hand_asset, i) for i in
                              range(self.num_shadow_hand_actuators)]
        self.actuated_dof_indices = [self.gym.find_asset_dof_index(right_hand_asset, name) for name in
                                     actuated_dof_names]

        # set shadow_hand dof properties
        right_hand_dof_props = self.gym.get_asset_dof_properties(right_hand_asset)
        left_hand_dof_props = self.gym.get_asset_dof_properties(left_hand_asset)
        right_hand_dof_props['driveMode'] = gymapi.DOF_MODE_POS
        right_hand_dof_props['stiffness'] = 1000.0
        right_hand_dof_props['damping'] = 100.0
        left_hand_dof_props['driveMode'] = gymapi.DOF_MODE_POS
        left_hand_dof_props['stiffness'] = 1000.0
        left_hand_dof_props['damping'] = 100.0

        self.shadow_hand_dof_lower_limits = []
        self.shadow_hand_dof_upper_limits = []
        self.shadow_hand_dof_default_pos = []
        self.hand_dof_default_vel = []
        self.sensors = []

        for i in range(self.num_hand_dofs):
            self.shadow_hand_dof_lower_limits.append(right_hand_dof_props['lower'][i])
            self.shadow_hand_dof_upper_limits.append(right_hand_dof_props['upper'][i])
            self.shadow_hand_dof_default_pos.append(0.0)
            self.hand_dof_default_vel.append(0.0)

        self.actuated_dof_indices = to_torch(self.actuated_dof_indices, dtype=torch.long, device=self.device)
        self.shadow_hand_dof_lower_limits = to_torch(self.shadow_hand_dof_lower_limits, device=self.device)
        self.shadow_hand_dof_upper_limits = to_torch(self.shadow_hand_dof_upper_limits, device=self.device)
        self.shadow_hand_dof_default_pos = to_torch(self.shadow_hand_dof_default_pos, device=self.device)
        self.hand_dof_default_vel = to_torch(self.hand_dof_default_vel, device=self.device)
        # </editor-fold>

        # <editor-fold desc="load manipulated object and goal assets">
        object_asset_options = gymapi.AssetOptions()
        object_asset_options.density = 500
        object_asset_options.fix_base_link = False
        # object_asset_options.collapse_fixed_joints = True
        # object_asset_options.disable_gravity = True
        object_asset_options.use_mesh_materials = True
        object_asset_options.mesh_normal_mode = gymapi.COMPUTE_PER_VERTEX
        object_asset_options.override_com = True
        object_asset_options.override_inertia = True
        object_asset_options.vhacd_enabled = True
        object_asset_options.vhacd_params = gymapi.VhacdParams()
        object_asset_options.vhacd_params.resolution = 100000
        object_asset_options.default_dof_drive_mode = gymapi.DOF_MODE_NONE

        object_asset = self.gym.load_asset(self.sim, asset_root, object_asset_file, object_asset_options)
        block_asset_file = "urdf/objects/cube_multicolor1.urdf"
        block_asset = self.gym.load_asset(self.sim, asset_root, block_asset_file, object_asset_options)

        object_asset_options.disable_gravity = True
        goal_asset = self.gym.load_asset(self.sim, asset_root, object_asset_file, object_asset_options)

        self.num_object_bodies = self.gym.get_asset_rigid_body_count(object_asset)
        self.num_object_shapes = self.gym.get_asset_rigid_shape_count(object_asset)

        # set object dof properties
        self.num_object_dofs = self.gym.get_asset_dof_count(object_asset)
        object_dof_props = self.gym.get_asset_dof_properties(object_asset)

        self.object_dof_lower_limits = []
        self.object_dof_upper_limits = []

        for i in range(self.num_object_dofs):
            self.object_dof_lower_limits.append(object_dof_props['lower'][i])
            self.object_dof_upper_limits.append(object_dof_props['upper'][i])

        self.object_dof_lower_limits = to_torch(self.object_dof_lower_limits, device=self.device)
        self.object_dof_upper_limits = to_torch(self.object_dof_upper_limits, device=self.device)
        # </editor-fold>

        # <editor-fold desc="create table asset">
        table_dims = gymapi.Vec3(1.0, 1.0, 0.6)
        asset_options = gymapi.AssetOptions()
        asset_options.fix_base_link = True
        asset_options.flip_visual_attachments = True
        asset_options.collapse_fixed_joints = True
        asset_options.disable_gravity = True
        asset_options.thickness = 0.001

        table_asset = self.gym.create_box(self.sim, table_dims.x, table_dims.y, table_dims.z, gymapi.AssetOptions())
        # </editor-fold>

        # <editor-fold desc="set initial poses">
        right_hand_start_pose = gymapi.Transform()
        right_hand_start_pose.p = gymapi.Vec3(0.25, 0.2, 0.8)
        right_hand_start_pose.r = gymapi.Quat().from_euler_zyx(-1.57, 0, -1.57)

        left_hand_start_pose = gymapi.Transform()
        left_hand_start_pose.p = gymapi.Vec3(0.25, -0.2, 0.8)
        left_hand_start_pose.r = gymapi.Quat().from_euler_zyx(-1.57, 0, 1.57)

        object_start_pose = gymapi.Transform()
        object_start_pose.p = gymapi.Vec3(0.0, 0.2, 0.6)
        object_start_pose.r = gymapi.Quat().from_euler_zyx(0, 0, 0)
        pose_dx, pose_dy, pose_dz = -1.0, 0.0, -0.0

        block_start_pose = gymapi.Transform()
        block_start_pose.p = gymapi.Vec3(0.0, -0.2, 0.6)
        block_start_pose.r = gymapi.Quat().from_euler_zyx(1.57, 1.57, 0)
        # object_start_pose.p.x = right_hand_start_pose.p.x + pose_dx
        # object_start_pose.p.y = right_hand_start_pose.p.y + pose_dy
        # object_start_pose.p.z = right_hand_start_pose.p.z + pose_dz

        if self.object_type == "pen":
            object_start_pose.p.z = right_hand_start_pose.p.z + 0.02

        self.goal_displacement = gymapi.Vec3(-0., 0.0, 10)
        self.goal_displacement_tensor = to_torch(
            [self.goal_displacement.x, self.goal_displacement.y, self.goal_displacement.z], device=self.device)
        goal_start_pose = gymapi.Transform()
        goal_start_pose.p = object_start_pose.p + self.goal_displacement

        goal_start_pose.p.z -= 0.0

        table_pose = gymapi.Transform()
        table_pose.p = gymapi.Vec3(0.0, 0.0, 0.5 * table_dims.z)
        table_pose.r = gymapi.Quat().from_euler_zyx(-0., 0, 0)
        # </editor-fold>

        # compute aggregate size
        max_agg_bodies = self.num_hand_bodies * 2 + 3 * self.num_object_bodies + 1
        max_agg_shapes = self.num_shadow_hand_shapes * 2 + 3 * self.num_object_shapes + 1

        self.hands = []
        self.envs = []

        self.object_init_state = []
        self.hand_start_states = []

        self.right_hand_indices = []
        self.left_hand_indices = []
        self.fingertip_indices = []
        self.object_indices = []
        self.goal_object_indices = []
        self.table_indices = []
        self.block_indices = []

        self.right_fingertip_handles = [self.gym.find_asset_rigid_body_index(right_hand_asset, name) for name in
                                        self.right_fingertips]
        self.left_fingertip_handles = [self.gym.find_asset_rigid_body_index(left_hand_asset, name) for name
                                       in self.left_fingertips]

        # create fingertip force sensors, if needed
        sensor_pose = gymapi.Transform()
        for ft_handle in self.right_fingertip_handles:
            self.gym.create_asset_force_sensor(right_hand_asset, ft_handle, sensor_pose)
        for ft_a_handle in self.left_fingertip_handles:
            self.gym.create_asset_force_sensor(left_hand_asset, ft_a_handle, sensor_pose)

        if self.obs_type in ["point_cloud"]:
            self.cameras = []
            self.camera_tensors = []
            self.camera_view_matrixs = []
            self.camera_proj_matrixs = []

            self.camera_props = gymapi.CameraProperties()
            self.camera_props.width = 256
            self.camera_props.height = 256
            self.camera_props.enable_tensors = True

            self.env_origin = torch.zeros((self.num_envs, 3), device=self.device, dtype=torch.float)
            self.pointCloudDownsampleNum = 768
            self.camera_u = torch.arange(0, self.camera_props.width, device=self.device)
            self.camera_v = torch.arange(0, self.camera_props.height, device=self.device)

            self.camera_v2, self.camera_u2 = torch.meshgrid(self.camera_v, self.camera_u, indexing='ij')

            if self.point_cloud_debug:
                import open3d as o3d
                from bidexhands.utils.o3dviewer import PointcloudVisualizer
                self.pointCloudVisualizer = PointcloudVisualizer()
                self.pointCloudVisualizerInitialized = False
                self.o3d_pc = o3d.geometry.PointCloud()
            else:
                self.pointCloudVisualizer = None

        for i in range(self.num_envs):
            # create env instance
            env_ptr = self.gym.create_env(
                self.sim, lower, upper, num_per_row
            )

            if self.aggregate_mode >= 1:
                self.gym.begin_aggregate(env_ptr, max_agg_bodies, max_agg_shapes, True)

            # add hand - collision filter = -1 to use asset collision filters set in mjcf loader
            right_hand_actor = self.gym.create_actor(env_ptr, right_hand_asset, right_hand_start_pose, "right_hand", i,
                                                     1, 0)
            left_hand_actor = self.gym.create_actor(env_ptr, left_hand_asset,
                                                    left_hand_start_pose, "left_hand", i, 1, 0)

            self.hand_start_states.append(
                [right_hand_start_pose.p.x, right_hand_start_pose.p.y, right_hand_start_pose.p.z,
                 right_hand_start_pose.r.x, right_hand_start_pose.r.y, right_hand_start_pose.r.z,
                 right_hand_start_pose.r.w,
                 0, 0, 0, 0, 0, 0])

            self.gym.set_actor_dof_properties(env_ptr, right_hand_actor, right_hand_dof_props)
            right_hand_idx = self.gym.get_actor_index(env_ptr, right_hand_actor, gymapi.DOMAIN_SIM)
            self.right_hand_indices.append(right_hand_idx)
            self.gym.set_actor_dof_properties(env_ptr, left_hand_actor, left_hand_dof_props)
            left_right_hand_idx = self.gym.get_actor_index(env_ptr, left_hand_actor, gymapi.DOMAIN_SIM)
            self.left_hand_indices.append(left_right_hand_idx)

            # <editor-fold desc="randomize colors and textures for rigid body">
            num_bodies = self.gym.get_actor_rigid_body_count(env_ptr, right_hand_actor)
            hand_rigid_body_index = [[0], [i for i in range(1, 6)], [i for i in range(6, 13)],
                                     [i for i in range(13, 20)],
                                     [i for i in range(20, 27)], [i for i in range(27, 34)]]

            for n in self.agent_index[0]:
                colorx = random.uniform(0, 1)
                colory = random.uniform(0, 1)
                colorz = random.uniform(0, 1)
                for m in n:
                    for o in hand_rigid_body_index[m]:
                        self.gym.set_rigid_body_color(env_ptr, right_hand_actor, o, gymapi.MESH_VISUAL,
                                                      gymapi.Vec3(colorx, colory, colorz))
            for n in self.agent_index[1]:
                colorx = random.uniform(0, 1)
                colory = random.uniform(0, 1)
                colorz = random.uniform(0, 1)
                for m in n:
                    for o in hand_rigid_body_index[m]:
                        self.gym.set_rigid_body_color(env_ptr, left_hand_actor, o, gymapi.MESH_VISUAL,
                                                      gymapi.Vec3(colorx, colory, colorz))
                # gym.set_rigid_body_texture(env, actor_handles[-1], n, gymapi.MESH_VISUAL,
                #                            loaded_texture_handle_list[random.randint(0, len(loaded_texture_handle_list)-1)])
            # </editor-fold>

            # create fingertip force-torque sensors
            self.gym.enable_actor_dof_force_sensors(env_ptr, right_hand_actor)
            self.gym.enable_actor_dof_force_sensors(env_ptr, left_hand_actor)

            # add object
            object_handle = self.gym.create_actor(env_ptr, object_asset, object_start_pose, "object", i, 0, 0)
            self.object_init_state.append([object_start_pose.p.x, object_start_pose.p.y, object_start_pose.p.z,
                                           object_start_pose.r.x, object_start_pose.r.y, object_start_pose.r.z,
                                           object_start_pose.r.w,
                                           0, 0, 0, 0, 0, 0])
            object_idx = self.gym.get_actor_index(env_ptr, object_handle, gymapi.DOMAIN_SIM)
            self.object_indices.append(object_idx)
            # self.gym.set_actor_scale(env_ptr, object_handle, 0.3)

            block_handle = self.gym.create_actor(env_ptr, block_asset, block_start_pose, "block", i, 0, 0)
            block_idx = self.gym.get_actor_index(env_ptr, block_handle, gymapi.DOMAIN_SIM)
            self.block_indices.append(block_idx)

            # add goal object
            goal_handle = self.gym.create_actor(env_ptr, goal_asset, goal_start_pose, "goal_object", i + self.num_envs,
                                                0, 0)
            goal_object_idx = self.gym.get_actor_index(env_ptr, goal_handle, gymapi.DOMAIN_SIM)
            self.goal_object_indices.append(goal_object_idx)
            # self.gym.set_actor_scale(env_ptr, goal_handle, 0.3)

            # add table
            table_handle = self.gym.create_actor(env_ptr, table_asset, table_pose, "table", i, -1, 0)
            self.gym.set_rigid_body_texture(env_ptr, table_handle, 0, gymapi.MESH_VISUAL, table_texture_handle)
            table_idx = self.gym.get_actor_index(env_ptr, table_handle, gymapi.DOMAIN_SIM)
            self.table_indices.append(table_idx)

            if self.object_type != "block":
                self.gym.set_rigid_body_color(
                    env_ptr, object_handle, 0, gymapi.MESH_VISUAL, gymapi.Vec3(0.6, 0.72, 0.98))
                self.gym.set_rigid_body_color(
                    env_ptr, goal_handle, 0, gymapi.MESH_VISUAL, gymapi.Vec3(0.6, 0.72, 0.98))

            if self.obs_type in ["point_cloud"]:
                camera_handle = self.gym.create_camera_sensor(env_ptr, self.camera_props)
                self.gym.set_camera_location(camera_handle, env_ptr, gymapi.Vec3(0.25, -0., 1.0),
                                             gymapi.Vec3(-0.24, -0., 0))
                camera_tensor = self.gym.get_camera_image_gpu_tensor(self.sim, env_ptr, camera_handle,
                                                                     gymapi.IMAGE_DEPTH)
                torch_cam_tensor = gymtorch.wrap_tensor(camera_tensor)
                cam_vinv = torch.inverse(
                    (torch.tensor(self.gym.get_camera_view_matrix(self.sim, env_ptr, camera_handle)))).to(self.device)
                cam_proj = torch.tensor(self.gym.get_camera_proj_matrix(self.sim, env_ptr, camera_handle),
                                        device=self.device)

                origin = self.gym.get_env_origin(env_ptr)
                self.env_origin[i][0] = origin.x
                self.env_origin[i][1] = origin.y
                self.env_origin[i][2] = origin.z
                self.camera_tensors.append(torch_cam_tensor)
                self.camera_view_matrixs.append(cam_vinv)
                self.camera_proj_matrixs.append(cam_proj)
                self.cameras.append(camera_handle)

            if self.aggregate_mode > 0:
                self.gym.end_aggregate(env_ptr)

            self.envs.append(env_ptr)
            self.hands.append(right_hand_actor)

        self.object_init_state = to_torch(self.object_init_state, device=self.device, dtype=torch.float).view(
            self.num_envs, 13)
        self.goal_states = self.object_init_state.clone()
        # self.goal_pose = self.goal_states[:, 0:7]
        # self.goal_pos = self.goal_states[:, 0:3]
        # self.goal_rot = self.goal_states[:, 3:7]
        # self.goal_states[:, self.up_axis_idx] -= 0.04
        self.goal_init_state = self.goal_states.clone()
        self.hand_start_states = to_torch(self.hand_start_states, device=self.device).view(self.num_envs, 13)

        self.right_fingertip_handles = to_torch(self.right_fingertip_handles, dtype=torch.long, device=self.device)
        self.left_fingertip_handles = to_torch(self.left_fingertip_handles, dtype=torch.long, device=self.device)

        self.right_hand_indices = to_torch(self.right_hand_indices, dtype=torch.long, device=self.device)
        self.left_hand_indices = to_torch(self.left_hand_indices, dtype=torch.long, device=self.device)

        self.object_indices = to_torch(self.object_indices, dtype=torch.long, device=self.device)
        self.goal_object_indices = to_torch(self.goal_object_indices, dtype=torch.long, device=self.device)
        self.table_indices = to_torch(self.table_indices, dtype=torch.long, device=self.device)
        self.block_indices = to_torch(self.block_indices, dtype=torch.long, device=self.device)

[docs]    def compute_reward(self, actions):
        """
        Compute the reward of all environment. The core function is compute_hand_reward(
            self.rew_buf, self.reset_buf, self.reset_goal_buf, self.progress_buf, self.successes, self.consecutive_successes,
            self.max_episode_length, self.object_pos, self.object_rot, self.goal_pos, self.goal_rot, self.block_right_handle_pos, self.block_left_handle_pos, 
            self.left_hand_pos, self.right_hand_pos, self.right_hand_ff_pos, self.right_hand_mf_pos, self.right_hand_rf_pos, self.right_hand_lf_pos, self.right_hand_th_pos, 
            self.left_hand_ff_pos, self.left_hand_mf_pos, self.left_hand_rf_pos, self.left_hand_lf_pos, self.left_hand_th_pos, 
            self.dist_reward_scale, self.rot_reward_scale, self.rot_eps, self.actions, self.action_penalty_scale,
            self.success_tolerance, self.reach_goal_bonus, self.fall_dist, self.fall_penalty,
            self.max_consecutive_successes, self.av_factor, (self.object_type == "pen")
        )
        , which we will introduce in detail there

        Args:
            actions (tensor): Actions of agents in the all environment 
        """
        self.rew_buf[:], self.reset_buf[:], self.reset_goal_buf[:], self.progress_buf[:], self.successes[
                                                                                          :], self.consecutive_successes[
                                                                                              :] = compute_hand_reward(
            self.rew_buf, self.reset_buf, self.reset_goal_buf, self.progress_buf, self.successes,
            self.consecutive_successes,
            self.max_episode_length, self.object_pos, self.object_rot, self.goal_pos, self.goal_rot,
            self.block_right_handle_pos, self.block_left_handle_pos,
            self.left_hand_pos, self.right_hand_pos, self.right_hand_ff_pos, self.right_hand_mf_pos,
            self.right_hand_rf_pos, self.right_hand_lf_pos, self.right_hand_th_pos,
            self.left_hand_ff_pos, self.left_hand_mf_pos, self.left_hand_rf_pos, self.left_hand_lf_pos,
            self.left_hand_th_pos,
            self.dist_reward_scale, self.rot_reward_scale, self.rot_eps, self.actions, self.action_penalty_scale,
            self.success_tolerance, self.reach_goal_bonus, self.fall_dist, self.fall_penalty,
            self.max_consecutive_successes, self.av_factor, (self.object_type == "pen")
        )

        self.extras['successes'] = self.successes
        self.extras['consecutive_successes'] = self.consecutive_successes

        if self.print_success_stat:
            self.total_resets = self.total_resets + self.reset_buf.sum()
            direct_average_successes = self.total_successes + self.successes.sum()
            self.total_successes = self.total_successes + (self.successes * self.reset_buf).sum()

            # The direct average shows the overall result more quickly, but slightly undershoots long term
            # policy performance.
            print("Direct average consecutive successes = {:.1f}".format(
                direct_average_successes / (self.total_resets + self.num_envs)))
            if self.total_resets > 0:
                print("Post-Reset average consecutive successes = {:.1f}".format(
                    self.total_successes / self.total_resets))

[docs]    def compute_observations(self):
        """
        Compute the observations of all environment. The core function is self.compute_full_state(True), 
        which we will introduce in detail there

        """
        self.gym.refresh_dof_state_tensor(self.sim)
        self.gym.refresh_actor_root_state_tensor(self.sim)
        self.gym.refresh_rigid_body_state_tensor(self.sim)
        self.gym.refresh_force_sensor_tensor(self.sim)
        self.gym.refresh_dof_force_tensor(self.sim)

        if self.obs_type in ["point_cloud"]:
            self.gym.render_all_camera_sensors(self.sim)
            self.gym.start_access_image_tensors(self.sim)

        self.object_pose = self.root_state_tensor[self.object_indices, 0:7]
        self.object_pos = self.root_state_tensor[self.object_indices, 0:3]
        self.object_rot = self.root_state_tensor[self.object_indices, 3:7]
        self.object_linvel = self.root_state_tensor[self.object_indices, 7:10]
        self.object_angvel = self.root_state_tensor[self.object_indices, 10:13]

        self.block_right_handle_pos = self.rigid_body_states[:, self.num_hand_bodies * 2 + 1, 0:3]  # (256, ?, 13)
        self.block_right_handle_rot = self.rigid_body_states[:, self.num_hand_bodies * 2 + 1, 3:7]
        self.block_right_handle_pos = (self.block_right_handle_pos
                                       + quat_apply(self.block_right_handle_rot,
                                                    to_torch([0, 1, 0], device=self.device).repeat(self.num_envs,
                                                                                                   1) * 0.))
        self.block_right_handle_pos = (self.block_right_handle_pos
                                       + quat_apply(self.block_right_handle_rot,
                                                    to_torch([1, 0, 0], device=self.device).repeat(self.num_envs,
                                                                                                   1) * 0.0))
        self.block_right_handle_pos = (self.block_right_handle_pos
                                       + quat_apply(self.block_right_handle_rot,
                                                    to_torch([0, 0, 1], device=self.device).repeat(self.num_envs,
                                                                                                   1) * 0.0))

        self.block_left_handle_pos = self.rigid_body_states[:, self.num_hand_bodies * 2 + 2, 0:3]
        self.block_left_handle_rot = self.rigid_body_states[:, self.num_hand_bodies * 2 + 2, 3:7]
        self.block_left_handle_pos = (self.block_left_handle_pos
                                      + quat_apply(self.block_left_handle_rot,
                                                   to_torch([0, 1, 0], device=self.device).repeat(self.num_envs,
                                                                                                  1) * 0.0))
        self.block_left_handle_pos = (self.block_left_handle_pos
                                      + quat_apply(self.block_left_handle_rot,
                                                   to_torch([1, 0, 0], device=self.device).repeat(self.num_envs,
                                                                                                  1) * 0.0))
        self.block_left_handle_pos = (self.block_left_handle_pos
                                      + quat_apply(self.block_left_handle_rot,
                                                   to_torch([0, 0, 1], device=self.device).repeat(self.num_envs,
                                                                                                  1) * 0.0))

        # <editor-fold desc="hand poses">
        self.left_hand_pos = self.rigid_body_states[:, 0 + self.num_hand_bodies, 0:3]
        self.left_hand_rot = self.rigid_body_states[:, 0 + self.num_hand_bodies, 3:7]
        self.left_hand_pos = self.left_hand_pos + quat_apply(self.left_hand_rot,
                                                             to_torch([0, 0, 1], device=self.device).repeat(
                                                                 self.num_envs, 1) * 0.08)
        self.left_hand_pos = self.left_hand_pos + quat_apply(self.left_hand_rot,
                                                             to_torch([0, 1, 0], device=self.device).repeat(
                                                                 self.num_envs, 1) * -0.02)

        self.right_hand_pos = self.rigid_body_states[:, 0, 0:3]
        self.right_hand_rot = self.rigid_body_states[:, 0, 3:7]
        self.right_hand_pos = self.right_hand_pos + quat_apply(self.right_hand_rot,
                                                               to_torch([0, 0, 1], device=self.device).repeat(
                                                                   self.num_envs, 1) * 0.08)
        self.right_hand_pos = self.right_hand_pos + quat_apply(self.right_hand_rot,
                                                               to_torch([0, 1, 0], device=self.device).repeat(
                                                                   self.num_envs, 1) * -0.02)
        # </editor-fold>

        # <editor-fold desc="right hand finger">
        self.right_hand_th_pos = self.rigid_body_states[:, self.right_fingertips_index[0], 0:3]
        self.right_hand_th_rot = self.rigid_body_states[:, self.right_fingertips_index[0], 3:7]
        self.right_hand_th_pos = self.right_hand_th_pos + quat_apply(self.right_hand_th_rot,
                                                                     to_torch([0, 0, 1], device=self.device).repeat(
                                                                         self.num_envs, 1) * 0.02)
        self.right_hand_ff_pos = self.rigid_body_states[:, self.right_fingertips_index[1], 0:3]
        self.right_hand_ff_rot = self.rigid_body_states[:, self.right_fingertips_index[1], 3:7]
        self.right_hand_ff_pos = self.right_hand_ff_pos + quat_apply(self.right_hand_ff_rot,
                                                                     to_torch([0, 0, 1], device=self.device).repeat(
                                                                         self.num_envs, 1) * 0.02)
        self.right_hand_mf_pos = self.rigid_body_states[:, self.right_fingertips_index[2], 0:3]
        self.right_hand_mf_rot = self.rigid_body_states[:, self.right_fingertips_index[2], 3:7]
        self.right_hand_mf_pos = self.right_hand_mf_pos + quat_apply(self.right_hand_mf_rot,
                                                                     to_torch([0, 0, 1], device=self.device).repeat(
                                                                         self.num_envs, 1) * 0.02)
        self.right_hand_rf_pos = self.rigid_body_states[:, self.right_fingertips_index[3], 0:3]
        self.right_hand_rf_rot = self.rigid_body_states[:, self.right_fingertips_index[3], 3:7]
        self.right_hand_rf_pos = self.right_hand_rf_pos + quat_apply(self.right_hand_rf_rot,
                                                                     to_torch([0, 0, 1], device=self.device).repeat(
                                                                         self.num_envs, 1) * 0.02)
        self.right_hand_lf_pos = self.rigid_body_states[:, self.right_fingertips_index[4], 0:3]
        self.right_hand_lf_rot = self.rigid_body_states[:, self.right_fingertips_index[4], 3:7]
        self.right_hand_lf_pos = self.right_hand_lf_pos + quat_apply(self.right_hand_lf_rot,
                                                                     to_torch([0, 0, 1], device=self.device).repeat(
                                                                         self.num_envs, 1) * 0.02)

        # </editor-fold>

        # <editor-fold desc="left hand finger">
        self.left_hand_th_pos = self.rigid_body_states[:, self.left_fingertips_index[0], 0:3]
        self.left_hand_th_rot = self.rigid_body_states[:, self.left_fingertips_index[0], 3:7]
        self.left_hand_th_pos = self.left_hand_th_pos + quat_apply(self.left_hand_th_rot,
                                                                   to_torch([0, 0, 1], device=self.device).repeat(
                                                                       self.num_envs, 1) * 0.02)
        self.left_hand_ff_pos = self.rigid_body_states[:, self.left_fingertips_index[1], 0:3]
        self.left_hand_ff_rot = self.rigid_body_states[:, self.left_fingertips_index[1], 3:7]
        self.left_hand_ff_pos = self.left_hand_ff_pos + quat_apply(self.left_hand_ff_rot,
                                                                   to_torch([0, 0, 1], device=self.device).repeat(
                                                                       self.num_envs, 1) * 0.02)
        self.left_hand_mf_pos = self.rigid_body_states[:, self.left_fingertips_index[2], 0:3]
        self.left_hand_mf_rot = self.rigid_body_states[:, self.left_fingertips_index[2], 3:7]
        self.left_hand_mf_pos = self.left_hand_mf_pos + quat_apply(self.left_hand_mf_rot,
                                                                   to_torch([0, 0, 1], device=self.device).repeat(
                                                                       self.num_envs, 1) * 0.02)
        self.left_hand_rf_pos = self.rigid_body_states[:, self.left_fingertips_index[3], 0:3]
        self.left_hand_rf_rot = self.rigid_body_states[:, self.left_fingertips_index[3], 3:7]
        self.left_hand_rf_pos = self.left_hand_rf_pos + quat_apply(self.left_hand_rf_rot,
                                                                   to_torch([0, 0, 1], device=self.device).repeat(
                                                                       self.num_envs, 1) * 0.02)
        self.left_hand_lf_pos = self.rigid_body_states[:, self.left_fingertips_index[4], 0:3]
        self.left_hand_lf_rot = self.rigid_body_states[:, self.left_fingertips_index[4], 3:7]
        self.left_hand_lf_pos = self.left_hand_lf_pos + quat_apply(self.left_hand_lf_rot,
                                                                   to_torch([0, 0, 1], device=self.device).repeat(
                                                                       self.num_envs, 1) * 0.02)

        # </editor-fold>

        self.goal_pos = to_torch([-0.3, 0, 0.6], dtype=torch.float, device=self.device).repeat((self.num_envs, 1))
        self.goal_rot = self.goal_states[:, 3:7]

        self.right_fingertip_state = self.rigid_body_states[:, self.right_fingertip_handles][:, :, 0:13]
        self.right_fingertip_pos = self.rigid_body_states[:, self.right_fingertip_handles][:, :, 0:3]
        self.left_fingertip_state = self.rigid_body_states[:, self.left_fingertip_handles][:, :, 0:13]
        self.left_fingertip_pos = self.rigid_body_states[:, self.left_fingertip_handles][:, :, 0:3]

        if self.obs_type == "full_state":
            self.compute_full_state()
        elif self.obs_type == "point_cloud":
            self.compute_point_cloud_observation()

        if self.asymmetric_obs:
            self.compute_full_state(True)

[docs]    def compute_full_state(self, asymm_obs=False):
        """
        Compute the observations of all environment. The observation is composed of three parts:
        the state values of the left and right hands, and the information of objects and target.
        The state values of the left and right hands were the same for each task, including hand
        joint and finger positions, velocity, and force information. The detail 361-dimensional
        observational space as shown in below:

        Index       Description
        0 - 14	    right shadow hand dof position
        15 - 29	    right shadow hand dof velocity
        30 - 44	    right shadow hand dof force
        45 - 109	right shadow hand fingertip pose, linear velocity, angle velocity (5 x 13)
        110 - 139	right shadow hand fingertip force, torque (5 x 6)
        140 - 142	right shadow hand base position
        143 - 145	right shadow hand base rotation
        146 - 166	right shadow hand actions
        167 - 181	left shadow hand dof position
        182 - 196	left shadow hand dof velocity
        197 - 211	left shadow hand dof force
        212 - 276	left shadow hand fingertip pose, linear velocity, angle velocity (5 x 13)
        277 - 306	left shadow hand fingertip force, torque (5 x 6)
        307 - 309	left shadow hand base position
        310 - 312	left shadow hand base rotation
        313 - 333	left shadow hand actions
        334 - 340	object pose
        341 - 343	object linear velocity
        344 - 346	object angle velocity
        347 - 349	block right handle position
        350 - 353   block right handle rotation
        354 - 356	block left handle position
        357 - 360	block left handle rotation
        """
        num_ft_states = 13 * int(self.num_fingertips / 2)  # 65 = 13 * (10 / 2)
        num_ft_force_torques = 6 * int(self.num_fingertips / 2)  # 30
        num_act_per_hand = int(self.num_action / 2)  # 8

        # 0 -14 right hand dof position
        self.obs_buf[:, 0:self.num_hand_dofs] = unscale(self.right_hand_dof_pos,
                                                        self.shadow_hand_dof_lower_limits,
                                                        self.shadow_hand_dof_upper_limits)
        # 15 - 29 right hand dof velocity
        self.obs_buf[:, self.num_hand_dofs:2 * self.num_hand_dofs] = self.vel_obs_scale * self.right_hand_dof_vel
        # 30 - 44 right hand dof force
        self.obs_buf[:, 2 * self.num_hand_dofs:3 * self.num_hand_dofs] \
            = self.force_torque_obs_scale * self.dof_force_tensor[:, :self.num_hand_dofs]

        fingertip_obs_start = 3 * self.num_hand_dofs
        # 45 - 109 right hand fingertip pose, linear velocity, angle velocity (5 x 13)
        self.obs_buf[:, fingertip_obs_start:fingertip_obs_start + num_ft_states] = self.right_fingertip_state.reshape(
            self.num_envs, num_ft_states)
        # 110 - 139 right hand fingertip force, torque (5 x 6)
        self.obs_buf[:, fingertip_obs_start + num_ft_states:fingertip_obs_start + num_ft_states +
                                                            num_ft_force_torques] = self.force_torque_obs_scale * self.vec_sensor_tensor[
                                                                                                                  :,
                                                                                                                  :30]
        # 140 - 142 right hand base position
        right_hand_pose_start = fingertip_obs_start + 95  # 140 = 45 + 95
        self.obs_buf[:, right_hand_pose_start:right_hand_pose_start + 3] = self.right_hand_pos
        # 143 - 145 right hand base rotation
        self.obs_buf[:, right_hand_pose_start + 3:right_hand_pose_start + 4] = \
            get_euler_xyz(self.hand_orientations[self.right_hand_indices, :])[0].unsqueeze(-1)
        self.obs_buf[:, right_hand_pose_start + 4:right_hand_pose_start + 5] = \
            get_euler_xyz(self.hand_orientations[self.right_hand_indices, :])[1].unsqueeze(-1)
        self.obs_buf[:, right_hand_pose_start + 5:right_hand_pose_start + 6] = \
            get_euler_xyz(self.hand_orientations[self.right_hand_indices, :])[2].unsqueeze(-1)

        # 146 - 167 right hand actions
        right_action_obs_start = right_hand_pose_start + 6  # 146 = 140 + 6
        self.obs_buf[:, right_action_obs_start:right_action_obs_start + num_act_per_hand] = self.actions[:,
                                                                                            :num_act_per_hand]

        # left_hand
        # 167 - 181 left hand dof position
        left_hand_start = right_action_obs_start + num_act_per_hand  # 167 = 146 + 21
        self.obs_buf[:, left_hand_start:self.num_hand_dofs + left_hand_start] = unscale(
            self.left_hand_dof_pos,
            self.shadow_hand_dof_lower_limits, self.shadow_hand_dof_upper_limits)
        # 182 - 196 left hand dof velocity
        self.obs_buf[:,
        self.num_hand_dofs + left_hand_start:2 * self.num_hand_dofs + left_hand_start] = self.vel_obs_scale * self.left_hand_dof_vel
        # 197 - 211 left hand dof force
        self.obs_buf[:,
        2 * self.num_hand_dofs + left_hand_start:3 * self.num_hand_dofs + left_hand_start] = (
                self.force_torque_obs_scale * self.dof_force_tensor[:, self.num_hand_dofs:2 * self.num_hand_dofs])

        left_fingertip_obs_start = left_hand_start + 3 * self.num_hand_dofs  # 212 = 167 + 45
        # 212 - 276 left hand fingertip pose, linear velocity, angle velocity (5 x 13)
        self.obs_buf[:,
        left_fingertip_obs_start:left_fingertip_obs_start + num_ft_states] = self.left_fingertip_state.reshape(
            self.num_envs, num_ft_states)
        # 277 - 306 left hand fingertip force, torque (5 x 6)
        self.obs_buf[:, left_fingertip_obs_start + num_ft_states:left_fingertip_obs_start + num_ft_states +
                                                                 num_ft_force_torques] = self.force_torque_obs_scale * self.vec_sensor_tensor[
                                                                                                                       :,
                                                                                                                       30:]

        left_hand_pose_start = left_fingertip_obs_start + 95  # 307 = 212 + 95
        # 307 - 309 left hand base position
        self.obs_buf[:, left_hand_pose_start:left_hand_pose_start + 3] = self.left_hand_pos
        # 310 - 312 left hand base rotation
        self.obs_buf[:, left_hand_pose_start + 3:left_hand_pose_start + 4] = \
            get_euler_xyz(self.hand_orientations[self.left_hand_indices, :])[0].unsqueeze(-1)
        self.obs_buf[:, left_hand_pose_start + 4:left_hand_pose_start + 5] = \
            get_euler_xyz(self.hand_orientations[self.left_hand_indices, :])[1].unsqueeze(-1)
        self.obs_buf[:, left_hand_pose_start + 5:left_hand_pose_start + 6] = \
            get_euler_xyz(self.hand_orientations[self.left_hand_indices, :])[2].unsqueeze(-1)

        left_right_action_obs_start = left_hand_pose_start + 6
        # 313 - 333 left hand actions
        self.obs_buf[:, left_right_action_obs_start:left_right_action_obs_start + num_act_per_hand] = self.actions[:,
                                                                                                      num_act_per_hand:]

        obj_obs_start = left_right_action_obs_start + num_act_per_hand  # 334
        # 334 - 340 object pose
        self.obs_buf[:, obj_obs_start:obj_obs_start + 7] = self.object_pose
        # 341 - 343 object linear velocity
        self.obs_buf[:, obj_obs_start + 7:obj_obs_start + 10] = self.object_linvel
        # 344 - 346 object angle velocity
        self.obs_buf[:, obj_obs_start + 10:obj_obs_start + 13] = self.vel_obs_scale * self.object_angvel
        # 347 - 349 block right handle position
        self.obs_buf[:, obj_obs_start + 13:obj_obs_start + 16] = self.block_right_handle_pos
        # 350 - 353 block right handle rotation
        self.obs_buf[:, obj_obs_start + 16:obj_obs_start + 20] = self.block_right_handle_rot
        # 354 - 356 block left handle position
        self.obs_buf[:, obj_obs_start + 20:obj_obs_start + 23] = self.block_left_handle_pos
        # 357 - 360 block left handle rotation
        self.obs_buf[:, obj_obs_start + 23:obj_obs_start + 27] = self.block_left_handle_rot
        # goal_obs_start = obj_obs_start + 13  # 157 = 144 + 13
        # self.obs_buf[:, goal_obs_start:goal_obs_start + 7] = self.goal_pose
        # self.obs_buf[:, goal_obs_start + 7:goal_obs_start + 11] = quat_mul(self.object_rot, quat_conjugate(self.goal_rot))

    # def compute_point_cloud_observation(self, collect_demonstration=False):
    #     """
    #     Compute the observations of all environment. The observation is composed of three parts:
    #     the state values of the left and right hands, and the information of objects and target.
    #     The state values of the left and right hands were the same for each task, including hand
    #     joint and finger positions, velocity, and force information. The detail 361-dimensional
    #     observational space as shown in below:
    #
    #     Index       Description
    #     0 - 23	    right shadow hand dof position
    #     24 - 47	    right shadow hand dof velocity
    #     48 - 71	    right shadow hand dof force
    #     72 - 136	right shadow hand fingertip pose, linear velocity, angle velocity (5 x 13)
    #     137 - 166	right shadow hand fingertip force, torque (5 x 6)
    #     167 - 169	right shadow hand base position
    #     170 - 172	right shadow hand base rotation
    #     173 - 198	right shadow hand actions
    #     199 - 222	left shadow hand dof position
    #     223 - 246	left shadow hand dof velocity
    #     247 - 270	left shadow hand dof force
    #     271 - 335	left shadow hand fingertip pose, linear velocity, angle velocity (5 x 13)
    #     336 - 365	left shadow hand fingertip force, torque (5 x 6)
    #     366 - 368	left shadow hand base position
    #     369 - 371	left shadow hand base rotation
    #     372 - 397	left shadow hand actions
    #     398 - 404	object pose
    #     405 - 407	object linear velocity
    #     408 - 410	object angle velocity
    #     411 - 413	block right handle position
    #     414 - 417	block right handle rotation
    #     418 - 420	block left handle position
    #     421 - 424	block left handle rotation
    #     """
    #     num_ft_states = 13 * int(self.num_fingertips / 2)  # 65
    #     num_ft_force_torques = 6 * int(self.num_fingertips / 2)  # 30
    #
    #     self.obs_buf[:, 0:self.num_hand_dofs] = unscale(self.right_hand_dof_pos,
    #                                                     self.shadow_hand_dof_lower_limits,
    #                                                     self.shadow_hand_dof_upper_limits)
    #     self.obs_buf[:,
    #     self.num_hand_dofs:2 * self.num_hand_dofs] = self.vel_obs_scale * self.right_hand_dof_vel
    #     self.obs_buf[:,
    #     2 * self.num_hand_dofs:3 * self.num_hand_dofs] = self.force_torque_obs_scale * self.dof_force_tensor[
    #                                                                                    :, :24]
    #
    #     fingertip_obs_start = 72  # 168 = 157 + 11
    #     self.obs_buf[:, fingertip_obs_start:fingertip_obs_start + num_ft_states] = self.right_fingertip_state.reshape(
    #         self.num_envs, num_ft_states)
    #     self.obs_buf[:, fingertip_obs_start + num_ft_states:fingertip_obs_start + num_ft_states +
    #                                                         num_ft_force_torques] = self.force_torque_obs_scale * self.vec_sensor_tensor[
    #                                                                                                               :,
    #                                                                                                               :30]
    #
    #     right_hand_pose_start = fingertip_obs_start + 95
    #     self.obs_buf[:, right_hand_pose_start:right_hand_pose_start + 3] = self.right_hand_pos
    #     self.obs_buf[:, right_hand_pose_start + 3:right_hand_pose_start + 4] = \
    #         get_euler_xyz(self.hand_orientations[self.right_hand_indices, :])[0].unsqueeze(-1)
    #     self.obs_buf[:, right_hand_pose_start + 4:right_hand_pose_start + 5] = \
    #         get_euler_xyz(self.hand_orientations[self.right_hand_indices, :])[1].unsqueeze(-1)
    #     self.obs_buf[:, right_hand_pose_start + 5:right_hand_pose_start + 6] = \
    #         get_euler_xyz(self.hand_orientations[self.right_hand_indices, :])[2].unsqueeze(-1)
    #
    #     right_action_obs_start = right_hand_pose_start + 6
    #     self.obs_buf[:, right_action_obs_start:right_action_obs_start + 26] = self.actions[:, :26]
    #
    #     # left_hand
    #     left_hand_start = right_action_obs_start + 26
    #     self.obs_buf[:, left_hand_start:self.num_hand_dofs + left_hand_start] = unscale(
    #         self.left_hand_dof_pos,
    #         self.shadow_hand_dof_lower_limits, self.shadow_hand_dof_upper_limits)
    #     self.obs_buf[:,
    #     self.num_hand_dofs + left_hand_start:2 * self.num_hand_dofs + left_hand_start] = self.vel_obs_scale * self.left_hand_dof_vel
    #     self.obs_buf[:,
    #     2 * self.num_hand_dofs + left_hand_start:3 * self.num_hand_dofs + left_hand_start] = self.force_torque_obs_scale * self.dof_force_tensor[
    #                                                                                                                        :,
    #                                                                                                                        24:48]
    #
    #     left_fingertip_obs_start = left_hand_start + 72
    #     self.obs_buf[:,
    #     left_fingertip_obs_start:left_fingertip_obs_start + num_ft_states] = self.left_fingertip_state.reshape(
    #         self.num_envs, num_ft_states)
    #     self.obs_buf[:, left_fingertip_obs_start + num_ft_states:left_fingertip_obs_start + num_ft_states +
    #                                                              num_ft_force_torques] = self.force_torque_obs_scale * self.vec_sensor_tensor[
    #                                                                                                                    :,
    #                                                                                                                    30:]
    #
    #     left_hand_pose_start = left_fingertip_obs_start + 95
    #     self.obs_buf[:, left_hand_pose_start:left_hand_pose_start + 3] = self.left_hand_pos
    #     self.obs_buf[:, left_hand_pose_start + 3:left_hand_pose_start + 4] = \
    #         get_euler_xyz(self.hand_orientations[self.left_hand_indices, :])[0].unsqueeze(-1)
    #     self.obs_buf[:, left_hand_pose_start + 4:left_hand_pose_start + 5] = \
    #         get_euler_xyz(self.hand_orientations[self.left_hand_indices, :])[1].unsqueeze(-1)
    #     self.obs_buf[:, left_hand_pose_start + 5:left_hand_pose_start + 6] = \
    #         get_euler_xyz(self.hand_orientations[self.left_hand_indices, :])[2].unsqueeze(-1)
    #
    #     left_right_action_obs_start = left_hand_pose_start + 6
    #     self.obs_buf[:, left_right_action_obs_start:left_right_action_obs_start + 26] = self.actions[:, 26:]
    #
    #     obj_obs_start = left_right_action_obs_start + 26  # 144
    #     self.obs_buf[:, obj_obs_start:obj_obs_start + 7] = self.object_pose
    #     self.obs_buf[:, obj_obs_start + 7:obj_obs_start + 10] = self.object_linvel
    #     self.obs_buf[:, obj_obs_start + 10:obj_obs_start + 13] = self.vel_obs_scale * self.object_angvel
    #     self.obs_buf[:, obj_obs_start + 13:obj_obs_start + 16] = self.block_right_handle_pos
    #     self.obs_buf[:, obj_obs_start + 16:obj_obs_start + 20] = self.block_right_handle_rot
    #     self.obs_buf[:, obj_obs_start + 20:obj_obs_start + 23] = self.block_left_handle_pos
    #     self.obs_buf[:, obj_obs_start + 23:obj_obs_start + 27] = self.block_left_handle_rot
    #     # goal_obs_start = obj_obs_start + 13  # 157 = 144 + 13
    #     # self.obs_buf[:, goal_obs_start:goal_obs_start + 7] = self.goal_pose
    #     # self.obs_buf[:, goal_obs_start + 7:goal_obs_start + 11] = quat_mul(self.object_rot, quat_conjugate(self.goal_rot))
    #     point_clouds = torch.zeros((self.num_envs, self.pointCloudDownsampleNum, 3), device=self.device)
    #
    #     if self.camera_debug:
    #         import matplotlib.pyplot as plt
    #         self.camera_rgba_debug_fig = plt.figure("CAMERA_RGBD_DEBUG")
    #         camera_rgba_image = self.camera_visulization(is_depth_image=False)
    #         plt.imshow(camera_rgba_image)
    #         plt.pause(1e-9)
    #
    #     for i in range(self.num_envs):
    #         # Here is an example. In practice, it's better not to convert tensor from GPU to CPU
    #         points = depth_image_to_point_cloud_GPU(self.camera_tensors[i], self.camera_view_matrixs[i],
    #                                                 self.camera_proj_matrixs[i], self.camera_u2, self.camera_v2,
    #                                                 self.camera_props.width, self.camera_props.height, 10, self.device)
    #
    #         if points.shape[0] > 0:
    #             selected_points = self.sample_points(points, sample_num=self.pointCloudDownsampleNum,
    #                                                  sample_mathed='random')
    #         else:
    #             selected_points = torch.zeros((self.num_envs, self.pointCloudDownsampleNum, 3), device=self.device)
    #
    #         point_clouds[i] = selected_points
    #
    #     if self.pointCloudVisualizer != None:
    #         import open3d as o3d
    #         points = point_clouds[0, :, :3].cpu().numpy()
    #         # colors = plt.get_cmap()(point_clouds[0, :, 3].cpu().numpy())
    #         self.o3d_pc.points = o3d.utility.Vector3dVector(points)
    #         # self.o3d_pc.colors = o3d.utility.Vector3dVector(colors[..., :3])
    #
    #         if self.pointCloudVisualizerInitialized == False:
    #             self.pointCloudVisualizer.add_geometry(self.o3d_pc)
    #             self.pointCloudVisualizerInitialized = True
    #         else:
    #             self.pointCloudVisualizer.update(self.o3d_pc)
    #
    #     self.gym.end_access_image_tensors(self.sim)
    #     point_clouds -= self.env_origin.view(self.num_envs, 1, 3)
    #
    #     point_clouds_start = obj_obs_start + 27
    #     self.obs_buf[:, point_clouds_start:].copy_(point_clouds.view(self.num_envs, self.pointCloudDownsampleNum * 3))

[docs]    def reset_target_pose(self, env_ids, apply_reset=False):
        """
        Reset and randomize the goal pose

        Args:
            env_ids (tensor): The index of the environment that needs to reset goal pose

            apply_reset (bool): Whether to reset the goal directly here, usually used
            when the same task wants to complete multiple goals

        """
        rand_floats = torch_rand_float(-1.0, 1.0, (len(env_ids), 4), device=self.device)

        new_rot = randomize_rotation(rand_floats[:, 0], rand_floats[:, 1], self.x_unit_tensor[env_ids],
                                     self.y_unit_tensor[env_ids])

        self.goal_states[env_ids, 0:3] = self.goal_init_state[env_ids, 0:3]
        # self.goal_states[env_ids, 1] -= 0.25
        self.goal_states[env_ids, 2] += 10.0

        # self.goal_states[env_ids, 3:7] = new_rot
        self.root_state_tensor[self.goal_object_indices[env_ids], 0:3] = self.goal_states[env_ids,
                                                                         0:3] + self.goal_displacement_tensor
        self.root_state_tensor[self.goal_object_indices[env_ids], 3:7] = self.goal_states[env_ids, 3:7]
        self.root_state_tensor[self.goal_object_indices[env_ids], 7:13] = torch.zeros_like(
            self.root_state_tensor[self.goal_object_indices[env_ids], 7:13])

        if apply_reset:
            goal_object_indices = self.goal_object_indices[env_ids].to(torch.int32)
            self.gym.set_actor_root_state_tensor_indexed(self.sim,
                                                         gymtorch.unwrap_tensor(self.root_state_tensor),
                                                         gymtorch.unwrap_tensor(goal_object_indices), len(env_ids))
        self.reset_goal_buf[env_ids] = 0

[docs]    def reset_idx(self, env_ids, goal_env_ids):
        """
        Reset and randomize the environment

        Args:
            env_ids (tensor): The index of the environment that needs to reset

            goal_env_ids (tensor): The index of the environment that only goals need reset

        """
        # randomization can happen only at reset time, since it can reset actor positions on GPU
        if self.randomize:
            self.apply_randomizations(self.randomization_params)

        # generate random values
        rand_floats = torch_rand_float(0, 0, (len(env_ids), self.num_hand_dofs * 2 + 5), device=self.device)

        # randomize start object poses
        self.reset_target_pose(env_ids)

        # reset object
        self.root_state_tensor[self.object_indices[env_ids]] = self.object_init_state[env_ids].clone()

        new_object_rot = randomize_rotation(rand_floats[:, 3], rand_floats[:, 4], self.x_unit_tensor[env_ids],
                                            self.y_unit_tensor[env_ids])
        if self.object_type == "pen":
            rand_angle_y = torch.tensor(0.3)
            new_object_rot = randomize_rotation_pen(rand_floats[:, 3], rand_floats[:, 4], rand_angle_y,
                                                    self.x_unit_tensor[env_ids], self.y_unit_tensor[env_ids],
                                                    self.z_unit_tensor[env_ids])

        # self.root_state_tensor[self.object_indices[env_ids], 3:7] = new_object_rot
        self.root_state_tensor[self.object_indices[env_ids], 7:13] = torch.zeros_like(
            self.root_state_tensor[self.object_indices[env_ids], 7:13])

        object_indices = torch.unique(torch.cat([self.object_indices[env_ids],
                                                 self.goal_object_indices[env_ids],
                                                 self.goal_object_indices[goal_env_ids]]).to(torch.int32))
        # self.gym.set_actor_root_state_tensor_indexed(self.sim,
        #                                              gymtorch.unwrap_tensor(self.root_state_tensor),
        #                                              gymtorch.unwrap_tensor(object_indices), len(object_indices))

        # reset shadow hand
        delta_max = self.shadow_hand_dof_upper_limits - self.shadow_hand_dof_default_pos
        delta_min = self.shadow_hand_dof_lower_limits - self.shadow_hand_dof_default_pos
        rand_delta = delta_min + (delta_max - delta_min) * rand_floats[:, 5:5 + self.num_hand_dofs]

        pos = self.shadow_hand_default_dof_pos + self.reset_dof_pos_noise * rand_delta

        self.right_hand_dof_pos[env_ids, :] = pos
        self.left_hand_dof_pos[env_ids, :] = pos

        self.right_hand_dof_vel[env_ids, :] = self.hand_dof_default_vel + \
                                              self.reset_dof_vel_noise * rand_floats[:, 5:5 + self.num_hand_dofs]

        self.left_hand_dof_vel[env_ids, :] = self.hand_dof_default_vel + \
                                             self.reset_dof_vel_noise * rand_floats[:,
                                                                        5 + self.num_hand_dofs:5 + self.num_hand_dofs * 2]

        self.prev_targets[env_ids, :self.num_hand_dofs] = pos
        self.cur_targets[env_ids, :self.num_hand_dofs] = pos

        self.prev_targets[env_ids, self.num_hand_dofs:self.num_hand_dofs * 2] = pos
        self.cur_targets[env_ids, self.num_hand_dofs:self.num_hand_dofs * 2] = pos

        right_hand_indices = self.right_hand_indices[env_ids].to(torch.int32)
        left_hand_indices = self.left_hand_indices[env_ids].to(torch.int32)

        all_hand_indices = torch.unique(torch.cat([right_hand_indices,
                                                   left_hand_indices]).to(torch.int32))

        self.gym.set_dof_position_target_tensor_indexed(self.sim,
                                                        gymtorch.unwrap_tensor(self.prev_targets),
                                                        gymtorch.unwrap_tensor(all_hand_indices), len(all_hand_indices))

        all_indices = torch.unique(torch.cat([all_hand_indices,
                                              self.object_indices[env_ids],
                                              self.table_indices[env_ids],
                                              self.block_indices[env_ids]]).to(torch.int32))

        self.hand_positions[all_indices.to(torch.long), :] = self.saved_root_tensor[all_indices.to(torch.long), 0:3]
        self.hand_orientations[all_indices.to(torch.long), :] = self.saved_root_tensor[all_indices.to(torch.long), 3:7]

        self.gym.set_dof_state_tensor_indexed(self.sim,
                                              gymtorch.unwrap_tensor(self.dof_state),
                                              gymtorch.unwrap_tensor(all_hand_indices), len(all_hand_indices))

        self.gym.set_actor_root_state_tensor_indexed(self.sim,
                                                     gymtorch.unwrap_tensor(self.root_state_tensor),
                                                     gymtorch.unwrap_tensor(all_indices), len(all_indices))
        self.progress_buf[env_ids] = 0
        self.reset_buf[env_ids] = 0
        self.successes[env_ids] = 0

[docs]    def pre_physics_step(self, actions):
        """
        The pre-processing of the physics step. Determine whether the reset environment is needed,
        and calculate the next movement of Shadowhand through the given action. The 52-dimensional
        action space as shown in below:

        Index   Description
        0 - 2	right shadow hand base translation
        3 - 5	right shadow hand base rotation
        6 - 20 	right shadow hand actuated joint
        21 - 23 left shadow hand base translation
        24 - 26 left shadow hand base rotation
        27 - 41 left shadow hand actuated joint

        Args:
            actions (tensor): Actions of agents in the all environment
        """
        env_ids = self.reset_buf.nonzero(as_tuple=False).squeeze(-1)
        goal_env_ids = self.reset_goal_buf.nonzero(as_tuple=False).squeeze(-1)

        # if only goals need reset, then call set API
        if len(goal_env_ids) > 0 and len(env_ids) == 0:
            self.reset_target_pose(goal_env_ids, apply_reset=True)
        # if goals need reset in addition to other envs, call set API in reset()
        elif len(goal_env_ids) > 0:
            self.reset_target_pose(goal_env_ids)

        if len(env_ids) > 0:
            self.reset_idx(env_ids, goal_env_ids)

        self.actions = actions.clone().to(self.device)
        if self.use_relative_control:
            targets = self.prev_targets[:,
                      self.actuated_dof_indices] + self.shadow_hand_dof_speed_scale * self.dt * self.actions
            self.cur_targets[:, self.actuated_dof_indices] = tensor_clamp(targets,
                                                                          self.shadow_hand_dof_lower_limits[
                                                                              self.actuated_dof_indices],
                                                                          self.shadow_hand_dof_upper_limits[
                                                                              self.actuated_dof_indices])
        else:
            # assert list(self.actuated_dof_indices) == self.right_useful_joint_index
            right_synergy_action = self.actions[:, 6:8]
            right_synergy_action[:, 0] = torch.abs(right_synergy_action[:, 0])
            left_synergy_action = self.actions[:, 14:16]
            left_synergy_action[:, 0] = torch.abs(left_synergy_action[:, 0])
            synergy_action_matrix = torch.tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
                                                  [2, 2, 2, 1, 1, 1, 0, 0, 0, -1, -1, -1, -2, -2, -2]],
                                                 device=self.device, dtype=torch.float32)
            right_15dof_action = torch.matmul(right_synergy_action, synergy_action_matrix)
            left_15dof_action = torch.matmul(left_synergy_action, synergy_action_matrix)

            self.cur_targets[:, self.right_useful_joint_index] = scale(right_15dof_action,
                                                                       self.shadow_hand_dof_lower_limits[
                                                                           self.right_useful_joint_index],
                                                                       self.shadow_hand_dof_upper_limits[
                                                                           self.right_useful_joint_index]) / 180.0 * np.pi
            self.cur_targets[:, self.right_useful_joint_index] = self.act_moving_average * self.cur_targets[:,
                                                                                           self.right_useful_joint_index] + (
                                                                         1.0 - self.act_moving_average) * self.prev_targets[
                                                                                                          :,
                                                                                                          self.right_useful_joint_index]
            self.cur_targets[:, self.right_useful_joint_index] = tensor_clamp(
                self.cur_targets[:, self.right_useful_joint_index],
                self.shadow_hand_dof_lower_limits[self.right_useful_joint_index],
                self.shadow_hand_dof_upper_limits[self.right_useful_joint_index])
            for key, value in self.right_real_virtual_joint_index_map_dict.items():
                self.cur_targets[:, key] = self.cur_targets[:, value]

            self.cur_targets[:, self.left_useful_joint_index] = scale(left_15dof_action,
                                                                      self.shadow_hand_dof_lower_limits[
                                                                          self.right_useful_joint_index],
                                                                      self.shadow_hand_dof_upper_limits[
                                                                          self.right_useful_joint_index]) / 180.0 * np.pi
            self.cur_targets[:,
            self.left_useful_joint_index] = (self.act_moving_average
                                             * self.cur_targets[:,
                                               self.left_useful_joint_index] + (
                                                     1.0 - self.act_moving_average)
                                             * self.prev_targets[:, self.left_useful_joint_index])
            self.cur_targets[:, self.left_useful_joint_index] = tensor_clamp(
                self.cur_targets[:, self.left_useful_joint_index],
                self.shadow_hand_dof_lower_limits[self.right_useful_joint_index],
                self.shadow_hand_dof_upper_limits[self.right_useful_joint_index])
            for key, value in self.left_real_virtual_joint_index_map_dict.items():
                self.cur_targets[:, key] = self.cur_targets[:, value]
            # self.cur_targets[:, 49] = scale(self.actions[:, 0],
            #                                 self.object_dof_lower_limits[1], self.object_dof_upper_limits[1])
            # angle_offsets = self.actions[:, 26:32] * self.dt * self.orientation_scale

            self.apply_forces[:, 0, :] = actions[:, 0:3] * self.dt * self.transition_scale * 100000
            self.apply_forces[:, 0 + self.num_hand_bodies, :] = actions[:,
                                                                8:11] * self.dt * self.transition_scale * 100000
            self.apply_torque[:, 0, :] = self.actions[:, 3:6] * self.dt * self.orientation_scale * 1000
            self.apply_torque[:, 0 + self.num_hand_bodies, :] = self.actions[:,
                                                                11:14] * self.dt * self.orientation_scale * 1000

            # self.apply_forces = torch.zeros_like(self.apply_forces)
            # self.apply_torque = torch.zeros_like(self.apply_torque)
            self.gym.apply_rigid_body_force_tensors(self.sim, gymtorch.unwrap_tensor(self.apply_forces),
                                                    gymtorch.unwrap_tensor(self.apply_torque), gymapi.ENV_SPACE)

        self.prev_targets[:, self.actuated_dof_indices] = self.cur_targets[:, self.actuated_dof_indices]
        self.prev_targets[:, self.actuated_dof_indices + self.num_hand_dofs] = self.cur_targets[:,
                                                                               self.actuated_dof_indices + self.num_hand_dofs]

        # self.prev_targets = torch.zeros_like(self.prev_targets)

        # self.prev_targets[:, 49] = self.cur_targets[:, 49]
        # self.gym.set_dof_position_target_tensor(self.sim, gymtorch.unwrap_tensor(self.cur_targets))
        all_hand_indices = torch.unique(torch.cat([self.right_hand_indices,
                                                   self.left_hand_indices]).to(torch.int32))
        self.gym.set_dof_position_target_tensor_indexed(self.sim,
                                                        gymtorch.unwrap_tensor(self.prev_targets),
                                                        gymtorch.unwrap_tensor(all_hand_indices), len(all_hand_indices))
        # self.prev_targets_vel = torch.zeros_like(self.prev_targets)
        # dof_state = torch.cat([self.prev_targets, self.prev_targets_vel], dim=-1)
        # self.gym.set_dof_state_tensor_indexed(self.sim,
        #                                       gymtorch.unwrap_tensor(dof_state),
        #                                       gymtorch.unwrap_tensor(all_hand_indices), len(all_hand_indices))

[docs]    def post_physics_step(self):
        """
        The post-processing of the physics step. Compute the observation and reward, and visualize auxiliary
        lines for debug when needed

        """
        self.progress_buf += 1
        self.randomize_buf += 1

        self.compute_observations()
        self.compute_reward(self.actions)

        if self.viewer and self.debug_viz:
            # draw axes on target object
            self.gym.clear_lines(self.viewer)
            self.gym.refresh_rigid_body_state_tensor(self.sim)

            for i in range(self.num_envs):
                self.add_debug_lines(self.envs[i], self.block_right_handle_pos[i], self.block_right_handle_rot[i])
                self.add_debug_lines(self.envs[i], self.block_left_handle_pos[i], self.block_left_handle_rot[i])
                # self.add_debug_lines(self.envs[i], self.goal_pos[i], self.block_left_handle_rot[i])
                # self.add_debug_lines(self.envs[i], self.right_hand_ff_pos[i], self.right_hand_ff_rot[i])
                # self.add_debug_lines(self.envs[i], self.right_hand_mf_pos[i], self.right_hand_mf_rot[i])
                # self.add_debug_lines(self.envs[i], self.right_hand_rf_pos[i], self.right_hand_rf_rot[i])
                # self.add_debug_lines(self.envs[i], self.right_hand_lf_pos[i], self.right_hand_lf_rot[i])
                # self.add_debug_lines(self.envs[i], self.right_hand_th_pos[i], self.right_hand_th_rot[i])

                # self.add_debug_lines(self.envs[i], self.left_hand_ff_pos[i], self.right_hand_ff_rot[i])
                # self.add_debug_lines(self.envs[i], self.left_hand_mf_pos[i], self.right_hand_mf_rot[i])
                # self.add_debug_lines(self.envs[i], self.left_hand_rf_pos[i], self.right_hand_rf_rot[i])
                # self.add_debug_lines(self.envs[i], self.left_hand_lf_pos[i], self.right_hand_lf_rot[i])
                # self.add_debug_lines(self.envs[i], self.left_hand_th_pos[i], self.right_hand_th_rot[i])

[docs]    def add_debug_lines(self, env, pos, rot):
        posx = (pos + quat_apply(rot, to_torch([1, 0, 0], device=self.device) * 0.2)).cpu().numpy()
        posy = (pos + quat_apply(rot, to_torch([0, 1, 0], device=self.device) * 0.2)).cpu().numpy()
        posz = (pos + quat_apply(rot, to_torch([0, 0, 1], device=self.device) * 0.2)).cpu().numpy()

        p0 = pos.cpu().numpy()
        self.gym.add_lines(self.viewer, env, 1, [p0[0], p0[1], p0[2], posx[0], posx[1], posx[2]], [0.85, 0.1, 0.1])
        self.gym.add_lines(self.viewer, env, 1, [p0[0], p0[1], p0[2], posy[0], posy[1], posy[2]], [0.1, 0.85, 0.1])
        self.gym.add_lines(self.viewer, env, 1, [p0[0], p0[1], p0[2], posz[0], posz[1], posz[2]], [0.1, 0.1, 0.85])

[docs]    def rand_row(self, tensor, dim_needed):
        row_total = tensor.shape[0]
        return tensor[torch.randint(low=0, high=row_total, size=(dim_needed,)), :]

[docs]    def sample_points(self, points, sample_num=1000, sample_mathed='furthest'):
        eff_points = points[points[:, 2] > 0.04]
        if eff_points.shape[0] < sample_num:
            eff_points = points
        if sample_mathed == 'random':
            sampled_points = self.rand_row(eff_points, sample_num)
        elif sample_mathed == 'furthest':
            sampled_points_id = pointnet2_utils.furthest_point_sample(eff_points.reshape(1, *eff_points.shape),
                                                                      sample_num)
            sampled_points = eff_points.index_select(0, sampled_points_id[0].long())
        return sampled_points

[docs]    def camera_visulization(self, is_depth_image=False):
        if is_depth_image:
            camera_depth_tensor = self.gym.get_camera_image_gpu_tensor(self.sim, self.envs[0], self.cameras[0],
                                                                       gymapi.IMAGE_DEPTH)
            torch_depth_tensor = gymtorch.wrap_tensor(camera_depth_tensor)
            torch_depth_tensor = torch.clamp(torch_depth_tensor, -1, 1)
            torch_depth_tensor = scale(torch_depth_tensor, to_torch([0], dtype=torch.float, device=self.device),
                                       to_torch([256], dtype=torch.float, device=self.device))
            camera_image = torch_depth_tensor.cpu().numpy()
            camera_image = Im.fromarray(camera_image)

        else:
            camera_rgba_tensor = self.gym.get_camera_image_gpu_tensor(self.sim, self.envs[0], self.cameras[0],
                                                                      gymapi.IMAGE_COLOR)
            torch_rgba_tensor = gymtorch.wrap_tensor(camera_rgba_tensor)
            camera_image = torch_rgba_tensor.cpu().numpy()
            camera_image = Im.fromarray(camera_image)

        return camera_image


#####################################################################
###=========================jit functions=========================###
#####################################################################

@torch.jit.script
def depth_image_to_point_cloud_GPU(camera_tensor, camera_view_matrix_inv, camera_proj_matrix, u, v, width: float,
                                   height: float, depth_bar: float, device: torch.device):
    # time1 = time.time()
    depth_buffer = camera_tensor.to(device)

    # Get the camera view matrix and invert it to transform points from camera to world space
    vinv = camera_view_matrix_inv

    # Get the camera projection matrix and get the necessary scaling
    # coefficients for deprojection

    proj = camera_proj_matrix
    fu = 2 / proj[0, 0]
    fv = 2 / proj[1, 1]

    centerU = width / 2
    centerV = height / 2

    Z = depth_buffer
    X = -(u - centerU) / width * Z * fu
    Y = (v - centerV) / height * Z * fv

    Z = Z.view(-1)
    valid = Z > -depth_bar
    X = X.view(-1)
    Y = Y.view(-1)

    position = torch.vstack((X, Y, Z, torch.ones(len(X), device=device)))[:, valid]
    position = position.permute(1, 0)
    position = position @ vinv

    points = position[:, 0:3]

    return points


@torch.jit.script
def compute_hand_reward(
        rew_buf, reset_buf, reset_goal_buf, progress_buf, successes, consecutive_successes,
        max_episode_length: float, object_pos, object_rot, target_pos, target_rot, block_right_handle_pos,
        block_left_handle_pos,
        left_hand_pos, right_hand_pos, right_hand_ff_pos, right_hand_mf_pos, right_hand_rf_pos, right_hand_lf_pos,
        right_hand_th_pos,
        left_hand_ff_pos, left_hand_mf_pos, left_hand_rf_pos, left_hand_lf_pos, left_hand_th_pos,
        dist_reward_scale: float, rot_reward_scale: float, rot_eps: float,
        actions, action_penalty_scale: float,
        success_tolerance: float, reach_goal_bonus: float, fall_dist: float,
        fall_penalty: float, max_consecutive_successes: int, av_factor: float, ignore_z_rot: bool
):
    """
    Compute the reward of all environment.

    Args:
        rew_buf (tensor): The reward buffer of all environments at this time

        reset_buf (tensor): The reset buffer of all environments at this time

        reset_goal_buf (tensor): The only-goal reset buffer of all environments at this time

        progress_buf (tensor): The porgress buffer of all environments at this time

        successes (tensor): The successes buffer of all environments at this time

        consecutive_successes (tensor): The consecutive successes buffer of all environments at this time

        max_episode_length (float): The max episode length in this environment

        object_pos (tensor): The position of the object

        object_rot (tensor): The rotation of the object

        target_pos (tensor): The position of the target

        target_rot (tensor): The rotate of the target

        block_right_handle_pos (tensor): The position of the right block handle

        block_left_handle_pos (tensor): The position of the left block handle

        left_hand_pos, right_hand_pos (tensor): The position of the bimanual hands
        
        right_hand_ff_pos, right_hand_mf_pos, right_hand_rf_pos, right_hand_lf_pos, right_hand_th_pos (tensor): The position of the five fingers 
            of the right hand

        left_hand_ff_pos, left_hand_mf_pos, left_hand_rf_pos, left_hand_lf_pos, left_hand_th_pos (tensor): The position of the five fingers 
            of the left hand

        dist_reward_scale (float): The scale of the distance reward

        rot_reward_scale (float): The scale of the rotation reward

        rot_eps (float): The epsilon of the rotation calculate

        actions (tensor): The action buffer of all environments at this time

        action_penalty_scale (float): The scale of the action penalty reward

        success_tolerance (float): The tolerance of the success determined

        reach_goal_bonus (float): The reward given when the object reaches the goal

        fall_dist (float): When the object is far from the Shadowhand, it is judged as falling

        fall_penalty (float): The reward given when the object is fell

        max_consecutive_successes (float): The maximum of the consecutive successes

        av_factor (float): The average factor for calculate the consecutive successes

        ignore_z_rot (bool): Is it necessary to ignore the rot of the z-axis, which is usually used 
            for some specific objects (e.g. pen)
    """
    # Distance from the hand to the object
    left_goal_dist = torch.norm(target_pos - block_left_handle_pos, p=2, dim=-1)
    right_goal_dist = torch.norm(target_pos - block_right_handle_pos, p=2, dim=-1)
    # goal_dist = target_pos[:, 2] - object_pos[:, 2]

    right_hand_dist = torch.norm(block_right_handle_pos - right_hand_pos, p=2, dim=-1)
    left_hand_dist = torch.norm(block_left_handle_pos - left_hand_pos, p=2, dim=-1)

    right_hand_finger_dist = (torch.norm(block_right_handle_pos - right_hand_ff_pos, p=2, dim=-1) + torch.norm(
        block_right_handle_pos - right_hand_mf_pos, p=2, dim=-1)
                              + torch.norm(block_right_handle_pos - right_hand_rf_pos, p=2, dim=-1) + torch.norm(
                block_right_handle_pos - right_hand_lf_pos, p=2, dim=-1)
                              + torch.norm(block_right_handle_pos - right_hand_th_pos, p=2, dim=-1))
    left_hand_finger_dist = (torch.norm(block_left_handle_pos - left_hand_ff_pos, p=2, dim=-1) + torch.norm(
        block_left_handle_pos - left_hand_mf_pos, p=2, dim=-1)
                             + torch.norm(block_left_handle_pos - left_hand_rf_pos, p=2, dim=-1) + torch.norm(
                block_left_handle_pos - left_hand_lf_pos, p=2, dim=-1)
                             + torch.norm(block_left_handle_pos - left_hand_th_pos, p=2, dim=-1))
    # Orientation alignment for the cube in hand and goal cube
    # quat_diff = quat_mul(object_rot, quat_conjugate(target_rot))
    # rot_dist = 2.0 * torch.asin(torch.clamp(torch.norm(quat_diff[:, 0:3], p=2, dim=-1), max=1.0))

    right_hand_dist_rew = torch.exp(-10 * right_hand_finger_dist)
    left_hand_dist_rew = torch.exp(-10 * left_hand_finger_dist)

    # rot_rew = 1.0/(torch.abs(rot_dist) + rot_eps) * rot_reward_scale

    action_penalty = torch.sum(actions ** 2, dim=-1)

    # # Total reward is: position distance + orientation alignment + action regularization + success bonus + fall penalty
    # # reward = torch.exp(-0.05*(up_rew * dist_reward_scale)) + torch.exp(-0.05*(right_hand_dist_rew * dist_reward_scale)) + torch.exp(-0.05*(left_hand_dist_rew * dist_reward_scale))
    # # up_rew = torch.zeros_like(right_hand_dist_rew)
    # # up_rew = torch.where(right_hand_finger_dist < 0.6,
    # #                 torch.where(left_hand_finger_dist < 0.4,
    # up_rew = torch.zeros_like(right_hand_dist_rew)
    # up_rew = torch.exp(-10 * torch.norm(block_right_handle_pos - block_left_handle_pos, p=2, dim=-1)) * 2
    # # up_rew =  torch.where(right_hand_finger_dist <= 0.3, torch.norm(bottle_cap_up - bottle_pos, p=2, dim=-1) * 30, up_rew)
    #
    # # reward = torch.exp(-0.1*(right_hand_dist_rew * dist_reward_scale)) + torch.exp(-0.1*(left_hand_dist_rew * dist_reward_scale))

    up_rew = block_left_handle_pos[:, 2] - 0.6
    reward = left_hand_dist_rew + up_rew

    # resets = torch.where(right_hand_dist_rew <= 0, torch.ones_like(reset_buf), reset_buf)
    # resets = torch.where(right_hand_finger_dist >= 1.5, torch.ones_like(resets), resets)
    resets = torch.where(left_hand_finger_dist >= 1.5, torch.ones_like(reset_buf), reset_buf)

    # Find out which envs hit the goal and update successes count
    successes = torch.where(successes == 0,
                            torch.where(block_left_handle_pos[:, 2] > 0.8,
                                        torch.ones_like(successes), successes), successes)

    resets = torch.where(progress_buf >= max_episode_length, torch.ones_like(resets), resets)

    goal_resets = torch.zeros_like(resets)

    num_resets = torch.sum(resets)
    finished_cons_successes = torch.sum(successes * resets.float())

    cons_successes = torch.where(resets > 0, successes * resets, consecutive_successes).mean()

    return reward, resets, goal_resets, progress_buf, successes, cons_successes


@torch.jit.script
def randomize_rotation(rand0, rand1, x_unit_tensor, y_unit_tensor):
    return quat_mul(quat_from_angle_axis(rand0 * np.pi, x_unit_tensor),
                    quat_from_angle_axis(rand1 * np.pi, y_unit_tensor))


@torch.jit.script
def randomize_rotation_pen(rand0, rand1, max_angle, x_unit_tensor, y_unit_tensor, z_unit_tensor):
    rot = quat_mul(quat_from_angle_axis(0.5 * np.pi + rand0 * max_angle, x_unit_tensor),
                   quat_from_angle_axis(rand0 * np.pi, z_unit_tensor))
    return rot