nbv_rec_franka_control/runners/calibration.py

import cv2
import numpy as np
import open3d as o3d
import sys, os
sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
from utils.control_util import ControlUtil
from utils.communicate_util import CommunicateUtil
import pickle
from tqdm import tqdm
import json
import pyrealsense2 as rs

# marker parameters
MARKER_TYPE = "aruco"
MARKER_INFO = {"id": 582, "size": 0.1}

# view parameters
VIEW_NUM = 6
LOOK_AT_PT = np.array([0.5, 0, 0.2])
TOP_DOWN_DIST = [0.3, 0.45, 0.55]
TOP_DOWN_DIRECTION = np.array([0., 0., -1.])
RADIUS = [0.25, 0.2, 0.1]
INIT_GRIPPER_POSE = np.array([
    [1, 0, 0, 0.56],
    [0, -1, 0, 0],
    [0, 0, -1, 0.6],
    [0, 0, 0, 1]
])

DEFAULT_EE_TO_BASE = np.array([
    [0, -1, 0, 0],
    [1, 0, 0, 0],
    [0, 0, 1, 0],
    [0, 0, 0, 1]
])


class HandEyeCalibration:
    """
    A class to perform hand-eye calibration for a robotic system using various camera types and markers.
    Attributes:
        marker_to_cam (list): List to store marker to camera transformation matrices.
        end_effector_to_base (list): List to store end-effector to base transformation matrices.
        marker_type (str): Type of marker used for calibration (default is "aruco").
        marker_info (dict): Information about the marker, including its ID and size.
        camera (object): Camera object used for capturing images.
        intrinsics (dict): Intrinsic parameters of the camera.
        dist (optional): Distortion coefficients of the camera.
        marker_solver (object): Solver object for detecting and solving marker poses.
        camera_type (str): Type of camera used (default is "RealSense"). "RealSense" and "Inspire" are supported.
    Methods:
        get_marker_to_cam_pose(color_image, visualize=False):
            Gets the pose of the marker relative to the camera from a color image.
        capture_single_frame():
            Captures a single frame from the camera and gets the current end-effector pose.
        capture_frames(view_num, look_at_pt, top_down_dist, top_down_direrction, radius, visualize=False):
            Captures multiple frames by moving the end-effector to different poses and records the marker and end-effector poses.
        get_hand_eye(marker_to_cam_poses, end_effector_to_base_poses):
            Computes the hand-eye calibration matrix using the captured marker and end-effector poses.
        calibrate(view_num, look_at_pt, top_down_dist, top_down_direrction, radius):
            Performs the full calibration process by capturing frames and computing the hand-eye calibration matrix.
    """
    def __init__(self, dist=None, marker_type="aruco", marker_info={"id": 0, "size": 0.1}, camera_type="RealSense"):
        self.marker_to_cam = []
        self.end_effector_to_base = []
        self.marker_type = marker_type
        self.marker_info = marker_info
        ControlUtil.connect_robot()
        ControlUtil.franka_reset()

        if camera_type == "RealSense":
            self.camera = RealSenseCamera()
        elif camera_type == "Inspire":
            self.camera = InspireCamera()
        else:
            assert False, "Not implemented yet"

        self.intrinsics = self.camera.get_color_intrinsics()
        self.dist = dist
        self.marker_type = marker_type
        if self.marker_type == "aruco":
            self.marker_solver = ArcuoMarkerSolver(self.intrinsics, dist, marker_info)
        self.camera_type = camera_type


    def get_marker_to_cam_pose(self, color_image, visualize=False):
        res = self.marker_solver.get_marker_to_cam_pose(color_image, visualize)
        return res


    def capture_single_frame(self):
        color_image = self.camera.get_color_image()
        end_effector_pose = ControlUtil.get_curr_gripper_to_base_pose()
        return color_image, end_effector_pose


    def capture_frames(self, view_num, look_at_pt, top_down_dist, top_down_direrction, radius, visualize=False):
        circle_num = len(radius)
        poses = []
        for i in range(circle_num):
            reverse_order = i % 2 == 1
            pose_generator = EndEffectorPoseGenerator(view_num, look_at_pt, top_down_dist[i], top_down_direrction, radius[i], reverse_order)
            poses += [pose @ DEFAULT_EE_TO_BASE for pose in pose_generator.poses]
        if visualize == True:
            pose_visualizer = PoseVisualizer(poses)
            pose_visualizer.visualize()
        pbar = tqdm(total=len(poses), desc="Capturing frames")
        for pose in poses:
            pbar.update(1)
            ControlUtil.set_gripper_pose(pose)
            color_image, end_effector_pose = self.capture_single_frame()
            if color_image is None:
                print("Failed to capture the frame")
                continue
            res = self.get_marker_to_cam_pose(color_image, visualize=True)
            if res is None:
                print("Failed to get marker pose")
                continue
            else:
                marker_to_cam_pose = np.eye(4)
                rvec = res["rvec"]
                tvec = res["tvec"]
                rot_mat = cv2.Rodrigues(rvec)[0]
                marker_to_cam_pose[:3, :3] = rot_mat
                marker_to_cam_pose[:3, 3] = tvec
            self.marker_to_cam.append(marker_to_cam_pose)
            self.end_effector_to_base.append(end_effector_pose)


    def get_hand_eye(self, marker_to_cam_poses, end_effector_to_base_poses):
        ############################## Debug ################################
        # with open("marker_to_cam_poses.pkl", "wb") as f:
        #     pickle.dump(marker_to_cam_poses, f)
        # with open("end_effector_to_base_poses.pkl", "wb") as f:
        #     pickle.dump(end_effector_to_base_poses, f)
        # with open("marker_to_cam_poses.pkl", "rb") as f:
        #     marker_to_cam_poses = pickle.load(f)
        # with open("end_effector_to_base_poses.pkl", "rb") as f:
        #     end_effector_to_base_poses = pickle.load(f)
        #####################################################################
        R_marker_to_cam = []
        t_marker_to_cam = []
        R_end_effector_to_base = []
        t_end_effector_to_base = []
        for index, marker_to_cam_pose in enumerate(marker_to_cam_poses):
            if marker_to_cam_pose is None:
                continue
            R_marker_to_cam.append(marker_to_cam_pose[:3, :3])
            t_marker_to_cam.append(marker_to_cam_pose[:3, 3])
            R_end_effector_to_base.append(end_effector_to_base_poses[index][:3, :3])
            t_end_effector_to_base.append(end_effector_to_base_poses[index][:3, 3])
        if len(R_marker_to_cam) < 3:
            print("Not enough data to calibrate")
            return None
        R_marker_to_cam = np.array(R_marker_to_cam)
        t_marker_to_cam = np.array(t_marker_to_cam)
        R_end_effector_to_base = np.array(R_end_effector_to_base)
        t_end_effector_to_base = np.array(t_end_effector_to_base)
        from ipdb import set_trace; set_trace()
        hand_eye = cv2.calibrateHandEye(R_end_effector_to_base, t_end_effector_to_base, R_marker_to_cam, t_marker_to_cam, cv2.CALIB_HAND_EYE_TSAI)
        cv2.destroyAllWindows()
        return hand_eye


    def calibrate(self, view_num, look_at_pt, top_down_dist, top_down_direrction, radius):
        self.capture_frames(view_num, look_at_pt, top_down_dist, top_down_direrction, radius)
        marker_to_cam = np.array(self.marker_to_cam)
        end_effector_to_base = np.array(self.end_effector_to_base)
        hand_eye = self.get_hand_eye(marker_to_cam, end_effector_to_base)
        # hand_eye = self.get_hand_eye(None, None)
        return hand_eye


class ArcuoMarkerSolver:
    def __init__(self, intrinsics, dist=None, marker_info={"id": 0, "size": 0.1}):
        self.intrinsics = intrinsics
        self.dist = dist
        self.marker_info = marker_info

    def solve_arcuo_marker_pose(self, color_image):
        self.res = []
        aruco_dict = cv2.aruco.getPredefinedDictionary(cv2.aruco.DICT_ARUCO_ORIGINAL)
        aruco_params = cv2.aruco.DetectorParameters()
        cv2.imshow("color_image", color_image)
        cv2.waitKey(500)
        corners, ids, rejectedImgPoints = cv2.aruco.detectMarkers(color_image, aruco_dict, parameters=aruco_params)
        if ids is None:
            return None
        for i in range(len(ids)):
            rvec, tvec, _ = cv2.aruco.estimatePoseSingleMarkers(corners[i], self.marker_info["size"], self.intrinsics, self.dist)
            self.res.append({
                'rvec': rvec,
                'tvec': tvec,
                'corners': corners[i],
                'ids': ids[i]
            })
        for res in self.res:
            if res["ids"][0] == self.marker_info["id"]:
                return res
        return None


    def get_marker_to_cam_pose(self, color_image, visualize=False):
        res = self.solve_arcuo_marker_pose(color_image)
        if visualize and res is not None:
            self.visualize_res(color_image, res)
        return res


    def visualize_res(self, color_image, res):
        rvec = res["rvec"]
        tvec = res["tvec"]
        corners = res["corners"]
        ids = res["ids"]
        aruco_frame = cv2.drawFrameAxes(color_image, self.intrinsics, self.dist, rvec, tvec, 0.05)
        aruco_frame = cv2.aruco.drawDetectedMarkers(aruco_frame, (corners, ), ids)
        cv2.imshow("aruco_frame", aruco_frame)
        cv2.waitKey(500)


class EndEffectorPoseGenerator:
    def __init__(self, view_num, look_at_pt, top_down_dist, top_down_direrction, radius, reverse_order=False):
        self.view_num = view_num
        self.look_at_pt = look_at_pt
        self.top_down_dist = top_down_dist
        self.top_down_direrction = top_down_direrction
        self.radius = radius
        self.poses = self.generate_poses(reverse_order)

    def generate_poses(self, reverse_order=False):
        poses = []
        for i in range(self.view_num):
            order = i if not reverse_order else self.view_num - i - 1
            pose = self.get_pose(order)
            poses.append(pose)
        return poses


    def get_pose(self, i):
        angle = 0.8 * np.pi * i / self.view_num + 0.7 * np.pi
        start_point_x = self.look_at_pt[0] + self.radius * np.cos(angle)
        start_point_y = self.look_at_pt[1] + self.radius * np.sin(angle)
        start_point_z = self.look_at_pt[2] + self.top_down_dist

        look_at_vector = self.look_at_pt - np.array([start_point_x, start_point_y, start_point_z])
        look_at_vector = look_at_vector / np.linalg.norm(look_at_vector)
        up_vector = self.top_down_direrction
        noise = np.random.uniform(0, 0.1, up_vector.shape)
        right_vector = np.cross(look_at_vector, up_vector + noise)
        right_vector = right_vector / np.linalg.norm(right_vector)
        up_vector = np.cross(look_at_vector, right_vector)
        up_vector = up_vector / np.linalg.norm(up_vector)

        pose = np.eye(4)
        pose[:3, :3] = self.get_rotation(right_vector, up_vector, look_at_vector)
        pose[:3, 3] = np.array([start_point_x, start_point_y, start_point_z])
        return pose


    def get_rotation(self, right_vector, up_vector, look_at_vector):
        rotation = np.eye(3)
        rotation[:, 0] = right_vector
        rotation[:, 1] = up_vector
        rotation[:, 2] = look_at_vector
        return rotation


class PoseVisualizer:
    def __init__(self, poses):
        self.poses = poses

    def visualize(self):

        vis = o3d.visualization.Visualizer()
        vis.create_window()

        for pose in self.poses:
            mesh_frame = o3d.geometry.TriangleMesh.create_coordinate_frame(size=0.05)
            mesh_frame.transform(pose)
            vis.add_geometry(mesh_frame)
        print("Press q to close the window")
        vis.run()


class CAMERA:
    def __init__(self):
        self.color_intrinsics = None
        self.depth_intrinsics = None
        self.color = None
        self.depth = None

    def get_color_intrinsics(self):
        pass

    def get_depth_intrinsics(self):
        pass

    def get_color_image(self):
        pass

    def get_depth_image(self):
        pass


class RealSenseCamera(CAMERA):
    def __init__(self):
        super().__init__()
        self.pipeline = rs.pipeline()
        config = rs.config()
        config.enable_stream(rs.stream.depth, 640, 480, rs.format.z16, 30)
        config.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 30)
        align = rs.align(rs.stream.color)
        self.pipeline.start(config)
        self.color_intrinsics = self.get_intrinsics("color")
        self.depth_intrinsics = self.get_intrinsics("depth")

    def get_color_intrinsics(self):
        return self.color_intrinsics

    def get_depth_intrinsics(self):
        return self.depth_intrinsics

    def get_color_image(self):
        frames = self.pipeline.wait_for_frames()
        color_frame = frames.get_color_frame()
        color_image = np.asanyarray(color_frame.get_data())
        return color_image

    def get_depth_image(self):
        frames = self.pipeline.wait_for_frames()
        depth_frame = frames.get_depth_frame()
        depth_image = np.asanyarray(depth_frame.get_data())
        return depth_image

    def get_intrinsics(self, camra_type):
        if camra_type == "color":
            profile = self.pipeline.get_active_profile()
            color_profile = rs.video_stream_profile(profile.get_stream(rs.stream.color))
            color_intrinsics = color_profile.get_intrinsics()
            K = [[color_intrinsics.fx, 0, color_intrinsics.ppx], [0, color_intrinsics.fy, color_intrinsics.ppy], [0, 0, 1]]
            return np.array(K)
        elif camra_type == "depth":
            profile = self.pipeline.get_active_profile()
            depth_profile = rs.video_stream_profile(profile.get_stream(rs.stream.depth))
            depth_intrinsics = depth_profile.get_intrinsics()
            K = [[depth_intrinsics.fx, 0, depth_intrinsics.ppx], [0, depth_intrinsics.fy, depth_intrinsics.ppy], [0, 0, 1]]
            return np.array(K)


class InspireCamera(CAMERA):
    def __init__(self):
        super().__init__()
        ControlUtil.set_gripper_pose(INIT_GRIPPER_POSE)
        view_data = CommunicateUtil.get_view_data(init=True)
        self.color_intrinsics = self.get_intrinsics(view_data["color_intrinsics"], view_data["color_image"])
        self.depth_intrinsics = self.get_intrinsics(view_data["depth_intrinsics"], view_data["depth_image"])

    def get_color_intrinsics(self):
        return self.color_intrinsics

    def get_depth_intrinsics(self):
        return self.depth_intrinsics

    def get_color_image(self):
        view_data = CommunicateUtil.get_view_data()
        if view_data is None:
            return None
        else:
            color_image = view_data["color_image"]
            return color_image

    def get_depth_image(self):
        view_data = CommunicateUtil.get_view_data()
        if view_data is None:
            return None
        else:
            depth_image = view_data["depth_image"]
            return depth_image

    def get_intrinsics(self, intrinsics, image):
        height = image.shape[0]
        width = image.shape[1]
        scale_h = height / intrinsics["height"]
        scale_w = width / intrinsics["width"]
        fx = intrinsics["fx"] * scale_w
        fy = intrinsics["fy"] * scale_h
        cx = intrinsics["cx"] * scale_w
        cy = intrinsics["cy"] * scale_h
        K = np.array([
            [fx,  0, cx],
            [ 0, fy, cy],
            [ 0,  0,  1]
        ])
        return K


def example_view_generator():
    pose_generator = EndEffectorPoseGenerator(VIEW_NUM, LOOK_AT_PT, TOP_DOWN_DIST, TOP_DOWN_DIRECTION, RADIUS)
    base_pose = np.eye(4)
    test_pose = np.array([
        [1, 0, 0, 0],
        [0, -1, 0, 0],
        [0, 0, -1, 0.2],
        [0, 0, 0, 1]
    ])
    defalut_ee2base = np.array([
        [0, -1, 0, 0],
        [1, 0, 0, 0],
        [0, 0, 1, 0],
        [0, 0, 0, 1]
    ])
    poses = [pose @ defalut_ee2base for pose in pose_generator.poses] + [base_pose, test_pose]
    visualizer = PoseVisualizer(poses)
    visualizer.visualize()


def example_calibration():
    # ControlUtil.connect_robot()
    # ControlUtil.franka_reset()
    # ControlUtil.set_gripper_pose(INIT_GRIPPER_POSE)
    # view_data = CommunicateUtil.get_view_data(init=True)
    # intrinsics = get_intrinsics(view_data["color_intrinsics"], view_data["color_image"])
    calibration = HandEyeCalibration(
        # intrinsics=intrinsics,
        dist=None,
        marker_type=MARKER_TYPE,
        marker_info=MARKER_INFO,
        camera_type="Inspire"
    )
    hand_eye = calibration.calibrate(
        view_num=VIEW_NUM,
        look_at_pt=LOOK_AT_PT,
        top_down_dist=TOP_DOWN_DIST,
        top_down_direrction=TOP_DOWN_DIRECTION,
        radius=RADIUS
    )
    print(hand_eye)

    res = {
        'rotation': hand_eye[0].tolist(),
        'translation': hand_eye[1].tolist()
    }
    with open("hand_eye.json", "w") as f:
        json.dump(res, f, indent=4)
    print("Hand eye calibration finished")
    return res


def get_depth_to_end_effector_pose(calibration_res_path):
    with open(calibration_res_path, "r") as f:
        data = json.load(f)
    rotation = np.array(data["rotation"])
    translation = np.array(data["translation"])
    color_to_ee = np.eye(4)
    color_to_ee[:3, :3] = rotation
    color_to_ee[:3, 3:] = translation
    ControlUtil.connect_robot()
    ControlUtil.franka_reset()
    ControlUtil.set_gripper_pose(INIT_GRIPPER_POSE)
    view_data = CommunicateUtil.get_view_data(init=True)
    depth_to_color = np.array(view_data["depth_to_color"])
    depth_to_ee = color_to_ee @ depth_to_color
    print(depth_to_ee)
    with open("depth_to_ee.json", "w") as f:
        json.dump(depth_to_ee.tolist(), f, indent=4)


if __name__ == "__main__":
    res = example_calibration()
    get_depth_to_end_effector_pose("hand_eye.json")
    # example_view_generator()