From 0267aed6e5d092012d5a61d098587b4369e29c45 Mon Sep 17 00:00:00 2001
From: hofee <64160135+GitHofee@users.noreply.github.com>
Date: Thu, 17 Oct 2024 06:13:18 -0500
Subject: [PATCH] add normal and visualize util

---
 .gitignore                              |   3 +-
 configs/local/view_generate_config.yaml |  12 +--
 preprocess/preprocessor.py              |  34 ++++--
 utils/data_load.py                      |  67 ++----------
 utils/pts.py                            |  31 +++---
 utils/vis.py                            | 132 ++++++++++++++++++++++++
 6 files changed, 191 insertions(+), 88 deletions(-)
 create mode 100644 utils/vis.py

diff --git a/.gitignore b/.gitignore
index 17fc0b6..252ae4c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -11,4 +11,5 @@ test/
 *.log
 /data_generation/data/*
 /data_generation/output/*
-test/
\ No newline at end of file
+test/
+temp*
\ No newline at end of file
diff --git a/configs/local/view_generate_config.yaml b/configs/local/view_generate_config.yaml
index b607470..0b42b9a 100644
--- a/configs/local/view_generate_config.yaml
+++ b/configs/local/view_generate_config.yaml
@@ -8,16 +8,16 @@ runner:
     root_dir: experiments
   generate:
     port: 5004
-    from: 590
-    to: 2000 # -1 means all
-    object_dir: /media/hofee/data/data/scaled_object_meshes
-    table_model_path: /media/hofee/data/data/others/table.obj
-    output_dir: /media/hofee/repository/full_data_output
+    from: 0
+    to: 2 # -1 means all
+    object_dir: H:\\AI\\Datasets\\scaled_object_box_meshes
+    table_model_path: "H:\\AI\\Datasets\\table.obj"
+    output_dir: C:\\Document\\Local Project\\nbv_rec\\nbv_reconstruction\\temp
     binocular_vision: true
     plane_size: 10
     max_views: 512
     min_views: 128
-    random_view_ratio: 0.1
+    random_view_ratio: 0.02
     min_cam_table_included_degree: 20
     max_diag: 0.7
     min_diag: 0.01
diff --git a/preprocess/preprocessor.py b/preprocess/preprocessor.py
index 1fa0acd..eff5ceb 100644
--- a/preprocess/preprocessor.py
+++ b/preprocess/preprocessor.py
@@ -46,6 +46,26 @@ def get_world_points(depth, mask, cam_intrinsic, cam_extrinsic, random_downsampl
     
     return points_camera_world
 
+def get_world_points_and_normal(depth, mask, normal, cam_intrinsic, cam_extrinsic, random_downsample_N):
+    z = depth[mask]
+    i, j = np.nonzero(mask)
+    x = (j - cam_intrinsic[0, 2]) * z / cam_intrinsic[0, 0]
+    y = (i - cam_intrinsic[1, 2]) * z / cam_intrinsic[1, 1]
+
+    points_camera = np.stack((x, y, z), axis=-1).reshape(-1, 3)
+    normal_camera = normal[mask].reshape(-1, 3)
+    sampled_target_points, idx = PtsUtil.random_downsample_point_cloud(
+                points_camera, random_downsample_N, require_idx=True
+            )
+    if len(sampled_target_points) == 0:
+        return np.zeros((0, 3)), np.zeros((0, 3))
+    sampled_normal_camera  = normal_camera[idx]
+
+    points_camera_aug = np.concatenate((sampled_target_points, np.ones((sampled_target_points.shape[0], 1))), axis=-1)
+    points_camera_world = np.dot(cam_extrinsic, points_camera_aug.T).T[:, :3]
+    
+    return points_camera_world, sampled_normal_camera
+
 def get_scan_points_indices(scan_points, mask, display_table_mask_label, cam_intrinsic, cam_extrinsic):
     scan_points_homogeneous = np.hstack((scan_points, np.ones((scan_points.shape[0], 1))))
     points_camera = np.dot(np.linalg.inv(cam_extrinsic), scan_points_homogeneous.T).T[:, :3]
@@ -90,7 +110,7 @@ def save_scene_data(root, scene, scene_idx=0, scene_total=1,file_type="txt"):
                 binocular=True
             )
         mask_L, mask_R = DataLoadUtil.load_seg(path, binocular=True)
-        
+        normal_L = DataLoadUtil.load_normal(path, binocular=True, left_only=True)
         ''' target points '''
         mask_img_L = mask_L
         mask_img_R = mask_R
@@ -99,23 +119,23 @@ def save_scene_data(root, scene, scene_idx=0, scene_total=1,file_type="txt"):
         target_mask_img_R = (mask_R == target_mask_label).all(axis=-1)
 
         
-        sampled_target_points_L = get_world_points(depth_L, target_mask_img_L, cam_info["cam_intrinsic"], cam_info["cam_to_world"], random_downsample_N)
+        sampled_target_points_L, sampled_target_normal_L = get_world_points_and_normal(depth_L,target_mask_img_L,normal_L, cam_info["cam_intrinsic"], cam_info["cam_to_world"], random_downsample_N)
         sampled_target_points_R = get_world_points(depth_R, target_mask_img_R, cam_info["cam_intrinsic"], cam_info["cam_to_world_R"], random_downsample_N)
 
 
         has_points = sampled_target_points_L.shape[0] > 0 and sampled_target_points_R.shape[0] > 0
         if has_points:
-            target_points = PtsUtil.get_overlapping_points(
-                    sampled_target_points_L, sampled_target_points_R, voxel_size
+            target_points, overlap_idx = PtsUtil.get_overlapping_points(
+                    sampled_target_points_L, sampled_target_points_R, voxel_size, require_idx=True
                 )
+            sampled_target_normal_L = sampled_target_normal_L[overlap_idx]
 
         if has_points:
             has_points = target_points.shape[0] > 0
 
         if has_points:
-            points_normals = DataLoadUtil.load_points_normals(root, scene, display_table_as_world_space_origin=True)
             target_points = PtsUtil.filter_points(
-                target_points, points_normals, cam_info["cam_to_world"],voxel_size=0.002, theta = filter_degree, z_range=(min_z, max_z)
+                target_points, sampled_target_normal_L, cam_info["cam_to_world"], theta_limit = filter_degree, z_range=(min_z, max_z)
                 )
 
 
@@ -135,7 +155,7 @@ def save_scene_data(root, scene, scene_idx=0, scene_total=1,file_type="txt"):
 
 if __name__ == "__main__":
     #root = "/media/hofee/repository/new_data_with_normal"
-    root = r"/media/hofee/data/data/box_output"
+    root = r"C:\\Document\\Local Project\\nbv_rec\\nbv_reconstruction\\temp"
     # list_path = r"/media/hofee/repository/full_list.txt"
     # scene_list = []
     
diff --git a/utils/data_load.py b/utils/data_load.py
index 4ec8936..95b5368 100644
--- a/utils/data_load.py
+++ b/utils/data_load.py
@@ -44,11 +44,6 @@ class DataLoadUtil:
         path = os.path.join(label_dir, f"{seq_idx}.json")
         return path
 
-    @staticmethod
-    def get_label_path_old(root, scene_name):
-        path = os.path.join(root, scene_name, "label.json")
-        return path
-
     @staticmethod
     def get_scene_seq_length(root, scene_name):
         camera_params_path = os.path.join(root, scene_name, "camera_params")
@@ -69,36 +64,6 @@ class DataLoadUtil:
         diagonal_length = np.linalg.norm(bbox)
         return diagonal_length
 
-    @staticmethod
-    def save_mesh_at(model_dir, output_dir, object_name, scene_name, world_object_pose):
-        mesh = DataLoadUtil.load_mesh_at(model_dir, object_name, world_object_pose)
-        model_path = os.path.join(output_dir, scene_name, "world_mesh.obj")
-        mesh.export(model_path)
-
-    @staticmethod
-    def save_target_mesh_at_world_space(
-        root, model_dir, scene_name, display_table_as_world_space_origin=True
-    ):
-        scene_info = DataLoadUtil.load_scene_info(root, scene_name)
-        target_name = scene_info["target_name"]
-        transformation = scene_info[target_name]
-        if display_table_as_world_space_origin:
-            location = transformation["location"] - DataLoadUtil.get_display_table_top(
-                root, scene_name
-            )
-        else:
-            location = transformation["location"]
-        rotation_euler = transformation["rotation_euler"]
-        pose_mat = trimesh.transformations.euler_matrix(*rotation_euler)
-        pose_mat[:3, 3] = location
-
-        mesh = DataLoadUtil.load_mesh_at(model_dir, target_name, pose_mat)
-        mesh_dir = os.path.join(root, scene_name, "mesh")
-        if not os.path.exists(mesh_dir):
-            os.makedirs(mesh_dir)
-        model_path = os.path.join(mesh_dir, "world_target_mesh.obj")
-        mesh.export(model_path)
-
     @staticmethod
     def load_scene_info(root, scene_name):
         scene_info_path = os.path.join(root, scene_name, "scene_info.json")
@@ -113,17 +78,6 @@ class DataLoadUtil:
             target_pts_num_dict = json.load(f)
         return target_pts_num_dict
 
-    @staticmethod
-    def load_target_object_pose(root, scene_name):
-        scene_info = DataLoadUtil.load_scene_info(root, scene_name)
-        target_name = scene_info["target_name"]
-        transformation = scene_info[target_name]
-        location = transformation["location"]
-        rotation_euler = transformation["rotation_euler"]
-        pose_mat = trimesh.transformations.euler_matrix(*rotation_euler)
-        pose_mat[:3, 3] = location
-        return pose_mat
-
     @staticmethod
     def load_depth(path, min_depth=0.01, max_depth=5.0, binocular=False):
 
@@ -200,10 +154,13 @@ class DataLoadUtil:
                 os.path.dirname(path), "normal", os.path.basename(path) + "_L.png"
             )
             normal_image_L = cv2.imread(normal_path_L, cv2.IMREAD_COLOR)
+            normal_image_L = cv2.cvtColor(normal_image_L, cv2.COLOR_BGR2RGB)
             normal_path_R = os.path.join(
                 os.path.dirname(path), "normal", os.path.basename(path) + "_R.png"
             )
             normal_image_R = cv2.imread(normal_path_R, cv2.IMREAD_COLOR)
+            normal_image_R = cv2.cvtColor(normal_image_R, cv2.COLOR_BGR2RGB)
+            
             normalized_normal_image_L = normal_image_L / 255.0 * 2.0 - 1.0
             normalized_normal_image_R = normal_image_R / 255.0 * 2.0 - 1.0
             return normalized_normal_image_L, normalized_normal_image_R
@@ -217,6 +174,7 @@ class DataLoadUtil:
                     os.path.dirname(path), "normal", os.path.basename(path) + ".png"
                 )
             normal_image = cv2.imread(normal_path, cv2.IMREAD_COLOR)
+            normal_image = cv2.cvtColor(normal_image, cv2.COLOR_BGR2RGB)
             normalized_normal_image = normal_image / 255.0 * 2.0 - 1.0
             return normalized_normal_image
 
@@ -227,19 +185,14 @@ class DataLoadUtil:
         return label_data
 
     @staticmethod
-    def load_rgb(path):
-        rgb_path = os.path.join(
-            os.path.dirname(path), "rgb", os.path.basename(path) + ".png"
-        )
-        rgb_image = cv2.imread(rgb_path, cv2.IMREAD_COLOR)
-        return rgb_image
-
-    @staticmethod
-    def load_from_preprocessed_pts(path):
+    def load_from_preprocessed_pts(path, file_type="npy"):
         npy_path = os.path.join(
-            os.path.dirname(path), "pts", os.path.basename(path) + ".npy"
+            os.path.dirname(path), "pts", os.path.basename(path) + "." + file_type
         )
-        pts = np.load(npy_path)
+        if file_type == "txt":
+            pts = np.loadtxt(npy_path)
+        else:
+            pts = np.load(npy_path)
         return pts
 
     @staticmethod
diff --git a/utils/pts.py b/utils/pts.py
index 87818cb..0bdc26c 100644
--- a/utils/pts.py
+++ b/utils/pts.py
@@ -78,24 +78,21 @@ class PtsUtil:
         return overlapping_points
     
     @staticmethod
-    def filter_points(points, points_normals, cam_pose,  voxel_size=0.002, theta=45, z_range=(0.2, 0.45)):
+    def filter_points(points, normals, cam_pose, theta_limit=45, z_range=(0.2, 0.45)):
+        
+        """ filter with normal """ 
+        normals_normalized = normals / np.linalg.norm(normals, axis=1, keepdims=True)
+        cos_theta = np.dot(normals_normalized, np.array([0, 0, 1]))
+        theta = np.arccos(cos_theta) * 180 / np.pi
+        idx = theta < theta_limit
+        filtered_sampled_points = points[idx]
+        
         
         """ filter with z range """
-        points_cam = PtsUtil.transform_point_cloud(points, np.linalg.inv(cam_pose))
+        points_cam = PtsUtil.transform_point_cloud(filtered_sampled_points, np.linalg.inv(cam_pose))
         idx = (points_cam[:, 2] > z_range[0]) & (points_cam[:, 2] < z_range[1])
-        z_filtered_points = points[idx]
+        z_filtered_points = filtered_sampled_points[idx]
         
-        """ filter with normal """
-        sampled_points = PtsUtil.voxel_downsample_point_cloud(z_filtered_points, voxel_size)
-        kdtree = cKDTree(points_normals[:,:3])
-        _, indices = kdtree.query(sampled_points)
-        nearest_points = points_normals[indices]
-
-        normals = nearest_points[:, 3:]
-        camera_axis = -cam_pose[:3, 2] 
-        normals_normalized = normals / np.linalg.norm(normals, axis=1, keepdims=True)
-        cos_theta = np.dot(normals_normalized, camera_axis)
-        theta_rad = np.deg2rad(theta)
-        idx = cos_theta > np.cos(theta_rad)
-        filtered_sampled_points= sampled_points[idx] 
-        return filtered_sampled_points[:, :3]
\ No newline at end of file
+        return z_filtered_points[:, :3]
+        
+        
\ No newline at end of file
diff --git a/utils/vis.py b/utils/vis.py
new file mode 100644
index 0000000..78992e2
--- /dev/null
+++ b/utils/vis.py
@@ -0,0 +1,132 @@
+import numpy as np
+import matplotlib.pyplot as plt
+import sys
+import os
+import trimesh
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from utils.data_load import DataLoadUtil
+from utils.pts import PtsUtil
+
+class visualizeUtil:
+    
+    @staticmethod
+    def save_all_cam_pos_and_cam_axis(root, scene, output_dir):
+        length = DataLoadUtil.get_scene_seq_length(root, scene)
+        all_cam_pos = []
+        all_cam_axis = []
+        for i in range(length):
+            path = DataLoadUtil.get_path(root, scene, i)
+            cam_info = DataLoadUtil.load_cam_info(path, binocular=True)
+            cam_pose = cam_info["cam_to_world"]
+            cam_pos = cam_pose[:3, 3]
+            cam_axis = cam_pose[:3, 2] 
+            
+            num_samples = 10
+            sample_points = [cam_pos + 0.02*t * cam_axis for t in range(num_samples)]
+            sample_points = np.array(sample_points)
+        
+            all_cam_pos.append(cam_pos)
+            all_cam_axis.append(sample_points)
+        
+        all_cam_pos = np.array(all_cam_pos)
+        all_cam_axis = np.array(all_cam_axis).reshape(-1, 3)
+        np.savetxt(os.path.join(output_dir, "all_cam_pos.txt"), all_cam_pos)
+        np.savetxt(os.path.join(output_dir, "all_cam_axis.txt"), all_cam_axis)
+        
+    @staticmethod
+    def save_all_combined_pts(root, scene, output_dir):
+        length = DataLoadUtil.get_scene_seq_length(root, scene)
+        all_combined_pts = []   
+        for i in range(length):
+            path = DataLoadUtil.get_path(root, scene, i)
+            pts = DataLoadUtil.load_from_preprocessed_pts(path,"txt")
+            if pts.shape[0] == 0:
+                continue
+            all_combined_pts.append(pts)
+        all_combined_pts = np.vstack(all_combined_pts)
+        downsampled_all_pts = PtsUtil.voxel_downsample_point_cloud(all_combined_pts, 0.001)
+        np.savetxt(os.path.join(output_dir, "all_combined_pts.txt"), downsampled_all_pts)
+        
+    @staticmethod
+    def save_target_mesh_at_world_space(
+        root, model_dir, scene_name, display_table_as_world_space_origin=True
+    ):
+        scene_info = DataLoadUtil.load_scene_info(root, scene_name)
+        target_name = scene_info["target_name"]
+        transformation = scene_info[target_name]
+        if display_table_as_world_space_origin:
+            location = transformation["location"] - DataLoadUtil.get_display_table_top(
+                root, scene_name
+            )
+        else:
+            location = transformation["location"]
+        rotation_euler = transformation["rotation_euler"]
+        pose_mat = trimesh.transformations.euler_matrix(*rotation_euler)
+        pose_mat[:3, 3] = location
+
+        mesh = DataLoadUtil.load_mesh_at(model_dir, target_name, pose_mat)
+        mesh_dir = os.path.join(root, scene_name, "mesh")
+        if not os.path.exists(mesh_dir):
+            os.makedirs(mesh_dir)
+        model_path = os.path.join(mesh_dir, "world_target_mesh.obj")
+        mesh.export(model_path)
+
+    @staticmethod
+    def save_points_and_normals(root, scene, frame_idx, output_dir):
+        target_mask_label = (0, 255, 0, 255)
+        path = DataLoadUtil.get_path(root, scene, frame_idx)
+        cam_info = DataLoadUtil.load_cam_info(path, binocular=True)
+        depth_L,_ = DataLoadUtil.load_depth(
+                path, cam_info["near_plane"], 
+                cam_info["far_plane"], 
+                binocular=True,
+            )
+        mask_L = DataLoadUtil.load_seg(path, binocular=True, left_only=True)
+        normal_L = DataLoadUtil.load_normal(path, binocular=True, left_only=True)
+        ''' target points '''
+        target_mask_img_L = (mask_L == target_mask_label).all(axis=-1)
+        cam_intrinsic = cam_info["cam_intrinsic"]
+        z = depth_L[target_mask_img_L]
+        i, j = np.nonzero(target_mask_img_L)
+        x = (j - cam_intrinsic[0, 2]) * z / cam_intrinsic[0, 0]
+        y = (i - cam_intrinsic[1, 2]) * z / cam_intrinsic[1, 1]
+        
+        random_downsample_N = 1000
+
+        points_camera = np.stack((x, y, z), axis=-1).reshape(-1, 3)
+        normal_camera = normal_L[target_mask_img_L].reshape(-1, 3)
+        sampled_target_points, idx = PtsUtil.random_downsample_point_cloud(
+                    points_camera, random_downsample_N, require_idx=True
+                )
+        if len(sampled_target_points) == 0:
+            print("No target points")
+            
+        offset = np.asarray([[1, 0, 0], [0, -1, 0], [0, 0, -1]])
+        sampled_normal_camera  = normal_camera[idx]
+        sampled_normal_camera = np.dot(sampled_normal_camera, offset)
+        sampled_visualized_normal = []
+        
+        
+        num_samples = 10
+        for i in range(len(sampled_target_points)):
+            sampled_visualized_normal.append([sampled_target_points[i] + 0.02*t * sampled_normal_camera[i] for t in range(num_samples)])
+            
+        sampled_visualized_normal = np.array(sampled_visualized_normal).reshape(-1, 3)
+        np.savetxt(os.path.join(output_dir, "target_pts.txt"), sampled_target_points)
+        np.savetxt(os.path.join(output_dir, "target_normal.txt"), sampled_visualized_normal)
+        
+
+
+# ------ Debug ------
+
+if __name__ == "__main__":
+    root = r"C:\Document\Local Project\nbv_rec\nbv_reconstruction\temp"
+    model_dir = r"H:\\AI\\Datasets\\scaled_object_box_meshes"
+    scene = "omniobject3d-box_030"
+    output_dir = r"C:\Document\Local Project\nbv_rec\nbv_reconstruction\test"
+    
+    # visualizeUtil.save_all_cam_pos_and_cam_axis(root, scene, output_dir)
+    # visualizeUtil.save_all_combined_pts(root, scene, output_dir)
+    # visualizeUtil.save_target_mesh_at_world_space(root, model_dir, scene)
+    visualizeUtil.save_points_and_normals(root, scene, 0, output_dir)
\ No newline at end of file