update

2024-10-19 19:06:09 +08:00
parent 5dae3c53db
commit be7ec1a433
4 changed files with 71 additions and 42 deletions
--- a/configs/local/view_generate_config.yaml
+++ b/configs/local/view_generate_config.yaml
@@ -9,7 +9,7 @@ runner:
  generate:
    port: 5004
    from: 0
-    to: 2 # -1 means all
+    to: 1 # -1 means all
    object_dir: H:\\AI\\Datasets\\scaled_object_box_meshes
    table_model_path: "H:\\AI\\Datasets\\table.obj"
    output_dir: C:\\Document\\Local Project\\nbv_rec\\nbv_reconstruction\\temp
--- a/preprocess/preprocessor.py
+++ b/preprocess/preprocessor.py
@@ -167,7 +167,7 @@ if __name__ == "__main__":
    #         scene_list.append(line.strip())
    scene_list = os.listdir(root)
    from_idx = 0 # 1000
-    to_idx = 700 # 1500
+    to_idx = 1 # 1500
    print(scene_list)


--- a/utils/data_load.py
+++ b/utils/data_load.py
@@ -4,12 +4,38 @@ import json
 import cv2
 import trimesh
 import torch
+import OpenEXR
+import Imath
 from utils.pts import PtsUtil


 class DataLoadUtil:
    TABLE_POSITION = np.asarray([0, 0, 0.8215])
    
+    @staticmethod
+    def load_exr_image(file_path):
+        # 打开 EXR 文件
+        exr_file = OpenEXR.InputFile(file_path)
+        
+        # 获取 EXR 文件的头部信息，包括尺寸
+        header = exr_file.header()
+        dw = header['dataWindow']
+        width = dw.max.x - dw.min.x + 1
+        height = dw.max.y - dw.min.y + 1
+
+        # 定义通道，通常法线图像是 RGB
+        float_channels = ['R', 'G', 'B']
+
+        # 读取 EXR 文件中的每个通道并转化为浮点数数组
+        img_data = []
+        for channel in float_channels:
+            channel_data = exr_file.channel(channel, Imath.PixelType(Imath.PixelType.FLOAT))
+            img_data.append(np.frombuffer(channel_data, dtype=np.float32).reshape((height, width)))
+
+        # 将各通道组合成一个 (height, width, 3) 的 RGB 图像
+        img = np.stack(img_data, axis=-1)
+        return img
+
    @staticmethod
    def get_display_table_info(root, scene_name):
        scene_info = DataLoadUtil.load_scene_info(root, scene_name)
@@ -148,34 +174,31 @@ class DataLoadUtil:
            return mask_image
        
    @staticmethod
-    def load_normal(path, binocular=False, left_only=False):
+    def load_normal(path, binocular=False, left_only=False, file_type="exr"):
        if binocular and not left_only:
            normal_path_L = os.path.join(
-                os.path.dirname(path), "normal", os.path.basename(path) + "_L.png"
+                os.path.dirname(path), "normal", os.path.basename(path) + f"_L.{file_type}"
            )
-            normal_image_L = cv2.imread(normal_path_L, cv2.IMREAD_COLOR)
-            normal_image_L = cv2.cvtColor(normal_image_L, cv2.COLOR_BGR2RGB)
-            normal_path_R = os.path.join(
-                os.path.dirname(path), "normal", os.path.basename(path) + "_R.png"
-            )
-            normal_image_R = cv2.imread(normal_path_R, cv2.IMREAD_COLOR)
-            normal_image_R = cv2.cvtColor(normal_image_R, cv2.COLOR_BGR2RGB)
+            normal_image_L = DataLoadUtil.load_exr_image(normal_path_L)
            
-            normalized_normal_image_L = normal_image_L / 255.0 * 2.0 - 1.0
-            normalized_normal_image_R = normal_image_R / 255.0 * 2.0 - 1.0
+            normal_path_R = os.path.join(
+                os.path.dirname(path), "normal", os.path.basename(path) + f"_R.{file_type}"
+            )
+            normal_image_R = DataLoadUtil.load_exr_image(normal_path_R)
+            normalized_normal_image_L = normal_image_L * 2.0 - 1.0
+            normalized_normal_image_R = normal_image_R * 2.0 - 1.0
            return normalized_normal_image_L, normalized_normal_image_R
        else:
            if binocular and left_only:
                normal_path = os.path.join(
-                    os.path.dirname(path), "normal", os.path.basename(path) + "_L.png"
+                    os.path.dirname(path), "normal", os.path.basename(path) + f"_L.{file_type}"
                )
            else:
                normal_path = os.path.join(
-                    os.path.dirname(path), "normal", os.path.basename(path) + ".png"
+                    os.path.dirname(path), "normal", os.path.basename(path) + f".{file_type}"
                )
-            normal_image = cv2.imread(normal_path, cv2.IMREAD_COLOR)
-            normal_image = cv2.cvtColor(normal_image, cv2.COLOR_BGR2RGB)
-            normalized_normal_image = normal_image / 255.0 * 2.0 - 1.0
+            normal_image = DataLoadUtil.load_exr_image(normal_path)
+            normalized_normal_image = normal_image * 2.0 - 1.0
            return normalized_normal_image

    @staticmethod
@@ -213,11 +236,12 @@ class DataLoadUtil:
            label_data = json.load(f)
        cam_to_world = np.asarray(label_data["extrinsic"])
        cam_to_world = DataLoadUtil.cam_pose_transformation(cam_to_world)
-        world_to_display_table = np.eye(4)
-        world_to_display_table[:3, 3] = -DataLoadUtil.get_display_table_top(
-            root_dir, scene_name
-        )
+        
        if display_table_as_world_space_origin:
+            world_to_display_table = np.eye(4)
+            world_to_display_table[:3, 3] = -DataLoadUtil.get_display_table_top(
+                root_dir, scene_name
+            )
            cam_to_world = np.dot(world_to_display_table, cam_to_world)
        cam_intrinsic = np.asarray(label_data["intrinsic"])
        cam_info = {
--- a/utils/vis.py
+++ b/utils/vis.py
@@ -40,7 +40,7 @@ class visualizeUtil:
        all_combined_pts = []   
        for i in range(length):
            path = DataLoadUtil.get_path(root, scene, i)
-            pts = DataLoadUtil.load_from_preprocessed_pts(path,"txt")
+            pts = DataLoadUtil.load_from_preprocessed_pts(path,"npy")
            if pts.shape[0] == 0:
                continue
            all_combined_pts.append(pts)
@@ -73,41 +73,46 @@ class visualizeUtil:
        mesh.export(model_path)

    @staticmethod
-    def save_points_and_normals(root, scene, frame_idx, output_dir):
+    def save_points_and_normals(root, scene, frame_idx, output_dir, binocular=False):
        target_mask_label = (0, 255, 0, 255)
        path = DataLoadUtil.get_path(root, scene, frame_idx)
-        cam_info = DataLoadUtil.load_cam_info(path, binocular=True)
-        depth_L,_ = DataLoadUtil.load_depth(
+        cam_info = DataLoadUtil.load_cam_info(path, binocular=binocular, display_table_as_world_space_origin=False)
+        depth = DataLoadUtil.load_depth(
                path, cam_info["near_plane"], 
                cam_info["far_plane"], 
-                binocular=True,
+                binocular=binocular,
            )
-        mask_L = DataLoadUtil.load_seg(path, binocular=True, left_only=True)
-        normal_L = DataLoadUtil.load_normal(path, binocular=True, left_only=True)
+        if isinstance(depth, tuple):
+            depth = depth[0]
+            
+        mask = DataLoadUtil.load_seg(path, binocular=binocular, left_only=True)
+        normal = DataLoadUtil.load_normal(path, binocular=binocular, left_only=True)
        ''' target points '''
-        target_mask_img_L = (mask_L == target_mask_label).all(axis=-1)
+        if mask is None:
+            target_mask_img = np.ones_like(depth, dtype=bool)
+        else:
+            target_mask_img = (mask == target_mask_label).all(axis=-1)
        cam_intrinsic = cam_info["cam_intrinsic"]
-        z = depth_L[target_mask_img_L]
-        i, j = np.nonzero(target_mask_img_L)
+        z = depth[target_mask_img]
+        i, j = np.nonzero(target_mask_img)
        x = (j - cam_intrinsic[0, 2]) * z / cam_intrinsic[0, 0]
        y = (i - cam_intrinsic[1, 2]) * z / cam_intrinsic[1, 1]
        
        random_downsample_N = 1000

        points_camera = np.stack((x, y, z), axis=-1).reshape(-1, 3)
-        normal_camera = normal_L[target_mask_img_L].reshape(-1, 3)
+        normal_camera = normal[target_mask_img].reshape(-1, 3)
        sampled_target_points, idx = PtsUtil.random_downsample_point_cloud(
                    points_camera, random_downsample_N, require_idx=True
                )
        if len(sampled_target_points) == 0:
            print("No target points")
            
-        offset = np.asarray([[1, 0, 0], [0, -1, 0], [0, 0, -1]])
+
        sampled_normal_camera  = normal_camera[idx]
-        sampled_normal_camera = np.dot(sampled_normal_camera, offset)
        sampled_visualized_normal = []
-        
-        
+        sampled_normal_camera[:, 2] = -sampled_normal_camera[:, 2]
+        sampled_normal_camera[:, 1] = -sampled_normal_camera[:, 1]
        num_samples = 10
        for i in range(len(sampled_target_points)):
            sampled_visualized_normal.append([sampled_target_points[i] + 0.02*t * sampled_normal_camera[i] for t in range(num_samples)])
@@ -123,10 +128,10 @@ class visualizeUtil:
 if __name__ == "__main__":
    root = r"C:\Document\Local Project\nbv_rec\nbv_reconstruction\temp"
    model_dir = r"H:\\AI\\Datasets\\scaled_object_box_meshes"
-    scene = "omniobject3d-box_030"
+    scene = "test_obj"
    output_dir = r"C:\Document\Local Project\nbv_rec\nbv_reconstruction\test"
    
-    # visualizeUtil.save_all_cam_pos_and_cam_axis(root, scene, output_dir)
-    # visualizeUtil.save_all_combined_pts(root, scene, output_dir)
-    # visualizeUtil.save_target_mesh_at_world_space(root, model_dir, scene)
-    visualizeUtil.save_points_and_normals(root, scene, 0, output_dir)
+    visualizeUtil.save_all_cam_pos_and_cam_axis(root, scene, output_dir)
+    visualizeUtil.save_all_combined_pts(root, scene, output_dir)
+    visualizeUtil.save_target_mesh_at_world_space(root, model_dir, scene)
+    #visualizeUtil.save_points_and_normals(root, scene,"10", output_dir, binocular=True)