Merge branch 'master' of https://git.hofee.top/hofee/nbv_reconstruction

2024-10-21 07:33:40 +00:00
parent 9ca0851bf7 be67be95e9
commit 0f61e1d64d
4 changed files with 73 additions and 44 deletions
--- a/configs/local/view_generate_config.yaml
+++ b/configs/local/view_generate_config.yaml
@@ -9,7 +9,7 @@ runner:
  generate:
    port: 5004
    from: 0
-    to: 2 # -1 means all
+    to: 1 # -1 means all
    object_dir: H:\\AI\\Datasets\\scaled_object_box_meshes
    table_model_path: "H:\\AI\\Datasets\\table.obj"
    output_dir: C:\\Document\\Local Project\\nbv_rec\\nbv_reconstruction\\temp
--- a/preprocess/preprocessor.py
+++ b/preprocess/preprocessor.py
@@ -167,7 +167,7 @@ if __name__ == "__main__":
    #         scene_list.append(line.strip())
    scene_list = os.listdir(root)
    from_idx = 0 # 1000
-    to_idx = 700 # 1500
+    to_idx = 1 # 1500
    print(scene_list)
--- a/utils/data_load.py
+++ b/utils/data_load.py
@@ -4,12 +4,38 @@ import json
 import cv2
 import trimesh
 import torch
 import OpenEXR
 import Imath
 from utils.pts import PtsUtil
 class DataLoadUtil:
    TABLE_POSITION = np.asarray([0, 0, 0.8215])
    @staticmethod
    def load_exr_image(file_path):
        # 打开 EXR 文件
        exr_file = OpenEXR.InputFile(file_path)
        # 获取 EXR 文件的头部信息，包括尺寸
        header = exr_file.header()
        dw = header['dataWindow']
        width = dw.max.x - dw.min.x + 1
        height = dw.max.y - dw.min.y + 1
        # 定义通道，通常法线图像是 RGB
        float_channels = ['R', 'G', 'B']
        # 读取 EXR 文件中的每个通道并转化为浮点数数组
        img_data = []
        for channel in float_channels:
            channel_data = exr_file.channel(channel, Imath.PixelType(Imath.PixelType.FLOAT))
            img_data.append(np.frombuffer(channel_data, dtype=np.float32).reshape((height, width)))
        # 将各通道组合成一个 (height, width, 3) 的 RGB 图像
        img = np.stack(img_data, axis=-1)
        return img
    @staticmethod
    def get_display_table_info(root, scene_name):
        scene_info = DataLoadUtil.load_scene_info(root, scene_name)
@@ -150,34 +176,31 @@ class DataLoadUtil:
            return mask_image
    @staticmethod
-    def load_normal(path, binocular=False, left_only=False):
+    def load_normal(path, binocular=False, left_only=False, file_type="exr"):
        if binocular and not left_only:
            normal_path_L = os.path.join(
-                os.path.dirname(path), "normal", os.path.basename(path) + "_L.png"
+                os.path.dirname(path), "normal", os.path.basename(path) + f"_L.{file_type}"
            )
-            normal_image_L = cv2.imread(normal_path_L, cv2.IMREAD_COLOR)
+            normal_image_L = DataLoadUtil.load_exr_image(normal_path_L)
            normal_image_L = cv2.cvtColor(normal_image_L, cv2.COLOR_BGR2RGB)
            normal_path_R = os.path.join(
                os.path.dirname(path), "normal", os.path.basename(path) + "_R.png"
            )
            normal_image_R = cv2.imread(normal_path_R, cv2.IMREAD_COLOR)
            normal_image_R = cv2.cvtColor(normal_image_R, cv2.COLOR_BGR2RGB)
-            normalized_normal_image_L = normal_image_L / 255.0 * 2.0 - 1.0
+            normal_path_R = os.path.join(
-            normalized_normal_image_R = normal_image_R / 255.0 * 2.0 - 1.0
+                os.path.dirname(path), "normal", os.path.basename(path) + f"_R.{file_type}"
            )
            normal_image_R = DataLoadUtil.load_exr_image(normal_path_R)
            normalized_normal_image_L = normal_image_L * 2.0 - 1.0
            normalized_normal_image_R = normal_image_R * 2.0 - 1.0
            return normalized_normal_image_L, normalized_normal_image_R
        else:
            if binocular and left_only:
                normal_path = os.path.join(
-                    os.path.dirname(path), "normal", os.path.basename(path) + "_L.png"
+                    os.path.dirname(path), "normal", os.path.basename(path) + f"_L.{file_type}"
                )
            else:
                normal_path = os.path.join(
-                    os.path.dirname(path), "normal", os.path.basename(path) + ".png"
+                    os.path.dirname(path), "normal", os.path.basename(path) + f".{file_type}"
                )
-            normal_image = cv2.imread(normal_path, cv2.IMREAD_COLOR)
+            normal_image = DataLoadUtil.load_exr_image(normal_path)
-            normal_image = cv2.cvtColor(normal_image, cv2.COLOR_BGR2RGB)
+            normalized_normal_image = normal_image * 2.0 - 1.0
            normalized_normal_image = normal_image / 255.0 * 2.0 - 1.0
            return normalized_normal_image
    @staticmethod
@@ -215,11 +238,12 @@ class DataLoadUtil:
            label_data = json.load(f)
        cam_to_world = np.asarray(label_data["extrinsic"])
        cam_to_world = DataLoadUtil.cam_pose_transformation(cam_to_world)
        if display_table_as_world_space_origin:
            world_to_display_table = np.eye(4)
            world_to_display_table[:3, 3] = -DataLoadUtil.get_display_table_top(
                root_dir, scene_name
            )
        if display_table_as_world_space_origin:
            cam_to_world = np.dot(world_to_display_table, cam_to_world)
        cam_intrinsic = np.asarray(label_data["intrinsic"])
        cam_info = {
--- a/utils/vis.py
+++ b/utils/vis.py
@@ -40,7 +40,7 @@ class visualizeUtil:
        all_combined_pts = []   
        for i in range(length):
            path = DataLoadUtil.get_path(root, scene, i)
-            pts = DataLoadUtil.load_from_preprocessed_pts(path,"txt")
+            pts = DataLoadUtil.load_from_preprocessed_pts(path,"npy")
            if pts.shape[0] == 0:
                continue
            all_combined_pts.append(pts)
@@ -73,41 +73,46 @@ class visualizeUtil:
        mesh.export(model_path)
    @staticmethod
-    def save_points_and_normals(root, scene, frame_idx, output_dir):
+    def save_points_and_normals(root, scene, frame_idx, output_dir, binocular=False):
        target_mask_label = (0, 255, 0, 255)
        path = DataLoadUtil.get_path(root, scene, frame_idx)
-        cam_info = DataLoadUtil.load_cam_info(path, binocular=True)
+        cam_info = DataLoadUtil.load_cam_info(path, binocular=binocular, display_table_as_world_space_origin=False)
-        depth_L,_ = DataLoadUtil.load_depth(
+        depth = DataLoadUtil.load_depth(
                path, cam_info["near_plane"], 
                cam_info["far_plane"], 
-                binocular=True,
+                binocular=binocular,
            )
-        mask_L = DataLoadUtil.load_seg(path, binocular=True, left_only=True)
+        if isinstance(depth, tuple):
-        normal_L = DataLoadUtil.load_normal(path, binocular=True, left_only=True)
+            depth = depth[0]
        mask = DataLoadUtil.load_seg(path, binocular=binocular, left_only=True)
        normal = DataLoadUtil.load_normal(path, binocular=binocular, left_only=True)
        ''' target points '''
-        target_mask_img_L = (mask_L == target_mask_label).all(axis=-1)
+        if mask is None:
            target_mask_img = np.ones_like(depth, dtype=bool)
        else:
            target_mask_img = (mask == target_mask_label).all(axis=-1)
        cam_intrinsic = cam_info["cam_intrinsic"]
-        z = depth_L[target_mask_img_L]
+        z = depth[target_mask_img]
-        i, j = np.nonzero(target_mask_img_L)
+        i, j = np.nonzero(target_mask_img)
        x = (j - cam_intrinsic[0, 2]) * z / cam_intrinsic[0, 0]
        y = (i - cam_intrinsic[1, 2]) * z / cam_intrinsic[1, 1]
        random_downsample_N = 1000
        points_camera = np.stack((x, y, z), axis=-1).reshape(-1, 3)
-        normal_camera = normal_L[target_mask_img_L].reshape(-1, 3)
+        normal_camera = normal[target_mask_img].reshape(-1, 3)
        sampled_target_points, idx = PtsUtil.random_downsample_point_cloud(
                    points_camera, random_downsample_N, require_idx=True
                )
        if len(sampled_target_points) == 0:
            print("No target points")
-        offset = np.asarray([[1, 0, 0], [0, -1, 0], [0, 0, -1]])
+
        sampled_normal_camera  = normal_camera[idx]
        sampled_normal_camera = np.dot(sampled_normal_camera, offset)
        sampled_visualized_normal = []
-        
+        sampled_normal_camera[:, 2] = -sampled_normal_camera[:, 2]
-        
+        sampled_normal_camera[:, 1] = -sampled_normal_camera[:, 1]
        num_samples = 10
        for i in range(len(sampled_target_points)):
            sampled_visualized_normal.append([sampled_target_points[i] + 0.02*t * sampled_normal_camera[i] for t in range(num_samples)])
@@ -121,12 +126,12 @@ class visualizeUtil:
 # ------ Debug ------
 if __name__ == "__main__":
-    root = r"C:\Document\Local Project\nbv_rec\nbv_reconstruction\temp"
+    root = r"/home/yan20/nbv_rec/project/franka_control/temp"
    model_dir = r"H:\\AI\\Datasets\\scaled_object_box_meshes"
-    scene = "omniobject3d-box_030"
+    scene = "cad_model_world"
-    output_dir = r"C:\Document\Local Project\nbv_rec\nbv_reconstruction\test"
+    output_dir = r"/home/yan20/nbv_rec/project/franka_control/temp/output"
-    # visualizeUtil.save_all_cam_pos_and_cam_axis(root, scene, output_dir)
+    visualizeUtil.save_all_cam_pos_and_cam_axis(root, scene, output_dir)
-    # visualizeUtil.save_all_combined_pts(root, scene, output_dir)
+    visualizeUtil.save_all_combined_pts(root, scene, output_dir)
-    # visualizeUtil.save_target_mesh_at_world_space(root, model_dir, scene)
+    visualizeUtil.save_target_mesh_at_world_space(root, model_dir, scene)
-    visualizeUtil.save_points_and_normals(root, scene, 0, output_dir)
+    #visualizeUtil.save_points_and_normals(root, scene,"10", output_dir, binocular=True)