add pointcloud memory feature for gsnet input

2024-11-03 02:54:33 -06:00
parent 28bfe0e552
commit e1a23cdde6
8 changed files with 15157 additions and 1177 deletions
--- a/cfg/active_grasp.rviz
+++ b/cfg/active_grasp.rviz
@@ -256,7 +256,9 @@ Visualization Manager:
      Name: Markers
      Namespaces:
        bbox: true
+        grasp: true
        path: true
+        roi: true
        views: true
      Queue Size: 100
      Value: true
@@ -272,7 +274,7 @@ Visualization Manager:
      Color: 255; 255; 255
      Color Transformer: Intensity
      Decay Time: 0
-      Enabled: true
+      Enabled: false
      Invert Rainbow: false
      Max Color: 255; 255; 255
      Min Color: 0; 0; 0
@@ -287,7 +289,7 @@ Visualization Manager:
      Unreliable: false
      Use Fixed Frame: true
      Use rainbow: true
-      Value: true
+      Value: false
    - Alpha: 0.20000000298023224
      Autocompute Intensity Bounds: false
      Autocompute Value Bounds:
@@ -424,7 +426,7 @@ Visualization Manager:
  Views:
    Current:
      Class: rviz/Orbit
-      Distance: 2.0648767948150635
+      Distance: 1.2382971048355103
      Enable Stereo Rendering:
        Stereo Eye Separation: 0.05999999865889549
        Stereo Focal Distance: 1
@@ -432,17 +434,17 @@ Visualization Manager:
        Value: false
      Field of View: 0.7853981852531433
      Focal Point:
-        X: -0.054327137768268585
-        Y: -0.14501728117465973
-        Z: 0.2944410443305969
+        X: 0.27311334013938904
+        Y: -0.15430450439453125
+        Z: 0.29310858249664307
      Focal Shape Fixed Size: false
      Focal Shape Size: 0.05000000074505806
      Invert Z Axis: false
      Name: Current View
      Near Clip Distance: 0.009999999776482582
-      Pitch: 0.3053985834121704
+      Pitch: 0.4653984010219574
      Target Frame: <Fixed Frame>
-      Yaw: 0.7053982615470886
+      Yaw: 1.5403975248336792
    Saved:
      - Class: rviz/Orbit
        Distance: 1.2000000476837158
@@ -483,4 +485,4 @@ Window Geometry:
    collapsed: true
  Width: 1095
  X: 1273
-  Y: 127
+  Y: 90
--- a/cfg/active_grasp.yaml
+++ b/cfg/active_grasp.yaml
@@ -1,9 +1,9 @@
 bt_sim:
  gui: True
-  gripper_force: 10
+  gripper_force: 100
  # scene: random
  # scene: manual
-  scene: $(find active_grasp)/cfg/sim/challenging_scene_6.yaml
+  scene: $(find active_grasp)/cfg/sim/challenging_scene_1.yaml

 hw:
  roi_calib_file: $(find active_grasp)/cfg/hw/T_base_tag.txt
@@ -43,3 +43,8 @@ ap_grasp:
  max_views: 80
  ap_config_path: $(find active_grasp)/src/active_grasp/active_perception/configs/local_inference_config.yaml
  max_inference_num: 50
+  crop_min_radius: 0.2
+  crop_radius_step: 0.05
+  crop_max_radius: 0.5
+  num_knn_neighbours: 5
+
--- a/cfg/sim/challenging_scene_1.yaml
+++ b/cfg/sim/challenging_scene_1.yaml
@@ -2,10 +2,10 @@ center: [0.5, 0.0, 0.25]
 q:  [0.0, -1.39, 0.0, -2.36, 0.0, 1.57, 0.79]
 objects:
 - object_id: ycb/006_mustard_bottle
-  xyz: [0.0, 0.0, 0.03]
+  xyz: [0.1, 0.0, 0.03]
  rpy: [0, 0, 0]
  scale: 0.8
 - object_id: active_perception/box
-  xyz: [0.0, 0.0, 0.0]
+  xyz: [0.1, 0.0, 0.0]
  rpy: [0, 0, 0]
-  scale: 0.8
+  scale: 0.9
--- a/cfg/sim/challenging_scene_6.yaml
+++ b/cfg/sim/challenging_scene_6.yaml
@@ -6,6 +6,6 @@ objects:
  rpy: [0, 0, -45]
  scale: 0.5
 - object_id: active_perception/cabinet2
-  xyz: [0.1, 0.0, 0.0]
-  rpy: [0, 0, 45]
+  xyz: [0.0, 0.0, 0.0]
+  rpy: [0, 0, 135]
  scale: 0.25
--- a/gsnet_input_points.txt
+++ b/gsnet_input_points.txt
--- a/src/active_grasp/active_perception_policy.py
+++ b/src/active_grasp/active_perception_policy.py
@@ -15,6 +15,7 @@ import time
 from visualization_msgs.msg import Marker, MarkerArray
 from geometry_msgs.msg import Pose
 import tf
+from robot_helpers.ros import tf as rhtf
 from sklearn.neighbors import NearestNeighbors

 import sensor_msgs.point_cloud2 as pc2
@@ -197,6 +198,10 @@ class ActivePerceptionSingleViewPolicy(SingleViewPolicy):
        super().__init__()
        self.max_views = rospy.get_param("ap_grasp/max_views")
        self.ap_config_path = rospy.get_param("ap_grasp/ap_config_path")
+        self.crop_min_radius = rospy.get_param("ap_grasp/crop_min_radius")
+        self.crop_radius_step = rospy.get_param("ap_grasp/crop_radius_step")
+        self.crop_max_radius = rospy.get_param("ap_grasp/crop_max_radius")
+        self.num_knn_neighbours = rospy.get_param("ap_grasp/num_knn_neighbours")
        self.ap_inference_engine = APInferenceEngine(self.ap_config_path)
        self.pcdvis = RealTime3DVisualizer()
        self.updated = False
@@ -218,7 +223,6 @@ class ActivePerceptionSingleViewPolicy(SingleViewPolicy):

        # Visualize Initial Camera Pose
        self.vis_cam_pose(x)
-        self.publish_pointcloud([[0,0,0]])

        # When policy hasn't produced an available grasp
        while(self.updated == False):
@@ -226,16 +230,20 @@ class ActivePerceptionSingleViewPolicy(SingleViewPolicy):
            self.publish_pointcloud([[0,0,0]])

            # Inference with our model
-            self.target_points, self.scene_points = self.depth_image_to_ap_input(img, seg, target_id, support_id,
-                                                                                 scene_sample_num=16384, 
-                                                                                 target_sample_num=1024)
+            self.target_points, self.scene_points, _ = self.depth_image_to_ap_input(img, 
+                                                                                    seg, 
+                                                                                    target_id, 
+                                                                                    support_id,
+                                                                                    scene_sample_num=16384, 
+                                                                                    target_sample_num=1024)
            ap_input = {'target_pts': self.target_points,
                        'scene_pts': self.scene_points}
            # save point cloud
            # target_points = self.target_points.cpu().numpy()[0,:,:]
-            scene_points = self.scene_points.cpu().numpy()[0,:,:]
-
-            self.publish_pointcloud(scene_points)
+            self.scene_points_cache = self.scene_points.cpu().numpy()[0]
+            self.cam_pose_cache = rhtf.lookup(self.base_frame, self.cam_frame, rospy.Time.now()).as_matrix()
+            
+            self.publish_pointcloud(self.scene_points_cache)

            # time.sleep(10000000)
            # np.savetxt("target_points.txt", target_points, delimiter=",")
@@ -274,18 +282,72 @@ class ActivePerceptionSingleViewPolicy(SingleViewPolicy):
        # Policy has produced an available nbv and moved to that camera pose
        if(self.updated == True):
            # Request grasping poses from GSNet
-            self.target_points, self.scene_points = self.depth_image_to_ap_input(img, seg, target_id, support_id)
+            self.target_points, self.scene_points, self.all_points = \
+                self.depth_image_to_ap_input(img, seg, target_id, support_id)
            target_points_list = np.asarray(self.target_points.cpu().numpy())[0].tolist()
            central_point_of_target = np.mean(target_points_list, axis=0)
-            target_points_radius = np.max(np.linalg.norm(target_points_list - central_point_of_target, axis=1))
-            scene_points_list = np.asarray(self.scene_points.cpu().numpy())[0].tolist()
-            merged_points_list = target_points_list + scene_points_list
-            gsnet_input_points = self.crop_pts_sphere(np.asarray(merged_points_list), 
-                                                      central_point_of_target, 
-                                                      radius=target_points_radius)
+
+            # Crop points to desired number of points
+            num_points_valid = False
+            target_points_radius = self.crop_min_radius
+            required_num_points = 15000
+            # Merge all points and scene points
+            self.all_points = np.asarray(self.all_points.cpu().numpy())[0]
+            all_points_list = self.all_points.tolist()
+            scene_points_list = self.scene_points_cache.tolist()
+
+            # convert scene_points to current frame
+            self.cam_pose = rhtf.lookup(self.base_frame, self.cam_frame, rospy.Time.now()).as_matrix()
+            for idx, point in enumerate(scene_points_list):
+                point = np.asarray(point)
+                T_point_cam_old = np.array([[1, 0, 0, point[0]],
+                                            [0, 1, 0, point[1]],
+                                            [0, 0, 1, point[2]],
+                                            [0, 0, 0, 1]])
+                # point in arm frame
+                T_point_arm = np.matmul(self.cam_pose_cache, T_point_cam_old)
+                # point in camera frame
+                T_point_cam_new = np.matmul(np.linalg.inv(self.cam_pose),
+                                            T_point_arm)
+                point = T_point_cam_new[:3, 3].T
+                point = point.tolist()
+                scene_points_list[idx] = point
+
+            merged_points_list = scene_points_list + all_points_list
+            merged_points_list = np.asarray(merged_points_list)
+            print("merged_points_list shape: "+str(merged_points_list.shape))
+
+            while not num_points_valid:
+                gsnet_input_points = self.crop_pts_sphere(merged_points_list, 
+                                                          central_point_of_target, 
+                                                          radius=target_points_radius)
+                if(len(gsnet_input_points) >= required_num_points):
+                    num_points_valid = True
+                    # downsample points
+                    gsnet_input_points = np.asarray(gsnet_input_points)
+                    gsnet_input_points = gsnet_input_points[np.random.choice(gsnet_input_points.shape[0], required_num_points, replace=False)]
+                else:
+                    target_points_radius += self.crop_radius_step
+                    if(target_points_radius > self.crop_max_radius):
+                        print("Target points radius exceeds maximum radius")
+                        print("Number of points: "+str(len(gsnet_input_points)))
+                        print("Interpolating points")
+                        # Interpolate points
+                        if(len(gsnet_input_points) < self.num_knn_neighbours+1):
+                            self.best_grasp = None
+                            self.vis.clear_grasp()
+                            self.done = True
+                            return
+                        else:
+                            gsnet_input_points = np.asarray(gsnet_input_points)
+                            num_interpolation = required_num_points - len(gsnet_input_points)
+                            gsnet_input_points = self.interpolate_point_cloud(gsnet_input_points, num_interpolation)
+                            num_points_valid = True
+            gsnet_input_points = gsnet_input_points.tolist()
+
+
            # gsnet_input_points = target_points_list
            # gsnet_input_points = merged_points_list
-            self.publish_pointcloud([[0,0,0]])
            self.publish_pointcloud(gsnet_input_points)
            
            # save point cloud as .txt
@@ -320,7 +382,7 @@ class ActivePerceptionSingleViewPolicy(SingleViewPolicy):
                qualities.append(gg['score'])
            
            # Visualize grasps
-            self.vis.grasps(self.base_frame, grasps, qualities)
+            # self.vis.grasps(self.base_frame, grasps, qualities)
            
            # Filter grasps
            filtered_grasps = []
@@ -337,12 +399,13 @@ class ActivePerceptionSingleViewPolicy(SingleViewPolicy):
                        filtered_qualities.append(quality)
            if len(filtered_grasps) > 0:
                self.best_grasp, quality = self.select_best_grasp(filtered_grasps, filtered_qualities)
+                # self.vis.clear_grasps()
                self.vis.grasp(self.base_frame, self.best_grasp, quality)
            else:
                self.best_grasp = None
                self.vis.clear_grasp()
            self.done = True
-            self.publish_pointcloud([[0,0,0]])
+            # self.publish_pointcloud([[0,0,0]])
    
    def publish_grasps(self, gg):
        marker_array = MarkerArray()
@@ -432,6 +495,20 @@ class ActivePerceptionSingleViewPolicy(SingleViewPolicy):
    def select_best_grasp(self, grasps, qualities):
        i = np.argmax(qualities)
        return grasps[i], qualities[i]
+    
+        # pose = rhtf.lookup(self.base_frame, "panda_link8", rospy.Time.now())  
+        # ee_matrix = pose.as_matrix()
+        # minimum_difference = np.inf
+        # for i in range(len(grasps)-1, -1, -1):
+        #     grasp = grasps[i]
+        #     g_matrix = grasp.pose.as_matrix()
+        #     # calculatr the Frobenius norm (rotation difference)
+        #     rotation_difference = np.linalg.norm(ee_matrix[:3, :3] - g_matrix[:3, :3])
+        #     if rotation_difference < minimum_difference:
+        #         minimum_difference = rotation_difference
+        #         best_grasp = grasp
+        #         best_quality = qualities[i]
+        # return best_grasp, best_quality

    def rotation_6d_to_matrix_tensor_batch(self, d6: torch.Tensor) -> torch.Tensor:
        a1, a2 = d6[..., :3], d6[..., 3:]
@@ -457,6 +534,7 @@ class ActivePerceptionSingleViewPolicy(SingleViewPolicy):
                                scene_sample_num=-1, target_sample_num=-1):
        target_points = []
        scene_points = []
+        all_points = []

        K = self.intrinsic.K
        depth_shape = depth_img.shape
@@ -481,15 +559,17 @@ class ActivePerceptionSingleViewPolicy(SingleViewPolicy):
                y = y_mat[i][j]
                z = z_mat[i][j]
                
-                # no background and no plane
-                if(int(seg_id) != int(255) and int(seg_id) != int(support_id)):
-                    # This pixel belongs to the scene
-                    scene_points.append([x,y,z])
-                    if(int(seg_id) == int(target_id)):
-                        # This pixel belongs to the target object to be grasped
-                        target_points.append([x,y,z])
+                if(int(seg_id) != int(255)): # no background points
+                    all_points.append([x,y,z])
+                    if(int(seg_id) != int(support_id)): # no support points
+                        # This pixel belongs to the scene
+                        scene_points.append([x,y,z])
+                        if(int(seg_id) == int(target_id)):
+                            # This pixel belongs to the target object to be grasped
+                            target_points.append([x,y,z])
        
        # Sample points
+        all_points = np.asarray(all_points)
        target_points = np.asarray(target_points)
        scene_points = np.asarray(scene_points)
        if scene_sample_num > 0:
@@ -517,12 +597,15 @@ class ActivePerceptionSingleViewPolicy(SingleViewPolicy):
        
        scene_points = scene_points.reshape(1, scene_points.shape[0], 3)
        scene_points = torch.from_numpy(scene_points).float().to("cuda:0")
+
+        all_points = all_points.reshape(1, all_points.shape[0], 3)
+        all_points = torch.from_numpy(all_points).float().to("cuda:0")
        
-        return target_points, scene_points
+        return target_points, scene_points, all_points

    def interpolate_point_cloud(self, points, num_new_points):
        # Fit NearestNeighbors on existing points
-        nbrs = NearestNeighbors(n_neighbors=5).fit(points)
+        nbrs = NearestNeighbors(n_neighbors=self.num_knn_neighbours).fit(points)
        interpolated_points = []
        
        for _ in range(num_new_points):
--- a/src/active_grasp/rviz.py
+++ b/src/active_grasp/rviz.py
@@ -11,12 +11,31 @@ red = [1.0, 0.0, 0.0]
 blue = [0, 0.6, 1.0]
 grey = [0.9, 0.9, 0.9]

+def create_grasp_marker(frame, grasp, color, ns, id=0, depth=0.05, radius=0.005):
+    # Faster grasp marker using Marker.LINE_LIST
+    pose, w, d, scale = grasp.pose, grasp.width, depth, [radius, 0.0, 0.0]
+    points = [[0, -w / 2, d], [0, -w / 2, 0], [0, w / 2, 0], [0, w / 2, d]]
+    return create_line_strip_marker(frame, pose, scale, color, points, ns, id)

 class Visualizer(vgn.rviz.Visualizer):
    def clear_ig_views(self):
        markers = [Marker(action=Marker.DELETE, ns="ig_views", id=i) for i in range(24)]
        self.draw(markers)

+    def clear_grasps(self):
+        markers = [Marker(action=Marker.DELETE, ns="grasps", id=i) for i in range(self.num_grasps)]
+        self.draw(markers)
+        self.num_grasps = 0
+
+    def grasps(self, frame, grasps, qualities, vmin=0.5, vmax=1.0):
+        markers = []
+        self.num_grasps = 0
+        for i, (grasp, quality) in enumerate(zip(grasps, qualities)):
+            color = cm((quality - vmin) / (vmax - vmin))
+            markers.append(create_grasp_marker(frame, grasp, color, "grasps", i))
+            self.num_grasps += 1
+        self.draw(markers)
+
    def bbox(self, frame, bbox):
        pose = Transform.identity()
        scale = [0.004, 0.0, 0.0]
--- a/src/active_grasp/simulation.py
+++ b/src/active_grasp/simulation.py
@@ -103,6 +103,12 @@ class Simulation:

    def get_target_bbox(self, uid):
        aabb_min, aabb_max = p.getAABB(uid)
+        # enlarge the bounding box
+        aabb_min = np.asarray(aabb_min)
+        aabb_max = np.asarray(aabb_max)
+        aabb_min -= 0.0
+        aabb_max += 0.0
+
        return AABBox(aabb_min, aabb_max)

    def check_for_grasps(self, bbox):