From be7ec1a43313ff6d2c43d15e3577d2a9331c4dd2 Mon Sep 17 00:00:00 2001 From: hofee <64160135+GitHofee@users.noreply.github.com> Date: Sat, 19 Oct 2024 19:06:09 +0800 Subject: [PATCH] update --- configs/local/view_generate_config.yaml | 2 +- preprocess/preprocessor.py | 2 +- utils/data_load.py | 64 +++++++++++++++++-------- utils/vis.py | 45 +++++++++-------- 4 files changed, 71 insertions(+), 42 deletions(-) diff --git a/configs/local/view_generate_config.yaml b/configs/local/view_generate_config.yaml index 0b42b9a..e2f8779 100644 --- a/configs/local/view_generate_config.yaml +++ b/configs/local/view_generate_config.yaml @@ -9,7 +9,7 @@ runner: generate: port: 5004 from: 0 - to: 2 # -1 means all + to: 1 # -1 means all object_dir: H:\\AI\\Datasets\\scaled_object_box_meshes table_model_path: "H:\\AI\\Datasets\\table.obj" output_dir: C:\\Document\\Local Project\\nbv_rec\\nbv_reconstruction\\temp diff --git a/preprocess/preprocessor.py b/preprocess/preprocessor.py index c6a9338..ad8b166 100644 --- a/preprocess/preprocessor.py +++ b/preprocess/preprocessor.py @@ -167,7 +167,7 @@ if __name__ == "__main__": # scene_list.append(line.strip()) scene_list = os.listdir(root) from_idx = 0 # 1000 - to_idx = 700 # 1500 + to_idx = 1 # 1500 print(scene_list) diff --git a/utils/data_load.py b/utils/data_load.py index 95b5368..c98347e 100644 --- a/utils/data_load.py +++ b/utils/data_load.py @@ -4,11 +4,37 @@ import json import cv2 import trimesh import torch +import OpenEXR +import Imath from utils.pts import PtsUtil class DataLoadUtil: TABLE_POSITION = np.asarray([0, 0, 0.8215]) + + @staticmethod + def load_exr_image(file_path): + # 打开 EXR 文件 + exr_file = OpenEXR.InputFile(file_path) + + # 获取 EXR 文件的头部信息,包括尺寸 + header = exr_file.header() + dw = header['dataWindow'] + width = dw.max.x - dw.min.x + 1 + height = dw.max.y - dw.min.y + 1 + + # 定义通道,通常法线图像是 RGB + float_channels = ['R', 'G', 'B'] + + # 读取 EXR 文件中的每个通道并转化为浮点数数组 + img_data = [] + for channel in float_channels: + channel_data = exr_file.channel(channel, Imath.PixelType(Imath.PixelType.FLOAT)) + img_data.append(np.frombuffer(channel_data, dtype=np.float32).reshape((height, width))) + + # 将各通道组合成一个 (height, width, 3) 的 RGB 图像 + img = np.stack(img_data, axis=-1) + return img @staticmethod def get_display_table_info(root, scene_name): @@ -148,34 +174,31 @@ class DataLoadUtil: return mask_image @staticmethod - def load_normal(path, binocular=False, left_only=False): + def load_normal(path, binocular=False, left_only=False, file_type="exr"): if binocular and not left_only: normal_path_L = os.path.join( - os.path.dirname(path), "normal", os.path.basename(path) + "_L.png" + os.path.dirname(path), "normal", os.path.basename(path) + f"_L.{file_type}" ) - normal_image_L = cv2.imread(normal_path_L, cv2.IMREAD_COLOR) - normal_image_L = cv2.cvtColor(normal_image_L, cv2.COLOR_BGR2RGB) - normal_path_R = os.path.join( - os.path.dirname(path), "normal", os.path.basename(path) + "_R.png" - ) - normal_image_R = cv2.imread(normal_path_R, cv2.IMREAD_COLOR) - normal_image_R = cv2.cvtColor(normal_image_R, cv2.COLOR_BGR2RGB) + normal_image_L = DataLoadUtil.load_exr_image(normal_path_L) - normalized_normal_image_L = normal_image_L / 255.0 * 2.0 - 1.0 - normalized_normal_image_R = normal_image_R / 255.0 * 2.0 - 1.0 + normal_path_R = os.path.join( + os.path.dirname(path), "normal", os.path.basename(path) + f"_R.{file_type}" + ) + normal_image_R = DataLoadUtil.load_exr_image(normal_path_R) + normalized_normal_image_L = normal_image_L * 2.0 - 1.0 + normalized_normal_image_R = normal_image_R * 2.0 - 1.0 return normalized_normal_image_L, normalized_normal_image_R else: if binocular and left_only: normal_path = os.path.join( - os.path.dirname(path), "normal", os.path.basename(path) + "_L.png" + os.path.dirname(path), "normal", os.path.basename(path) + f"_L.{file_type}" ) else: normal_path = os.path.join( - os.path.dirname(path), "normal", os.path.basename(path) + ".png" + os.path.dirname(path), "normal", os.path.basename(path) + f".{file_type}" ) - normal_image = cv2.imread(normal_path, cv2.IMREAD_COLOR) - normal_image = cv2.cvtColor(normal_image, cv2.COLOR_BGR2RGB) - normalized_normal_image = normal_image / 255.0 * 2.0 - 1.0 + normal_image = DataLoadUtil.load_exr_image(normal_path) + normalized_normal_image = normal_image * 2.0 - 1.0 return normalized_normal_image @staticmethod @@ -213,11 +236,12 @@ class DataLoadUtil: label_data = json.load(f) cam_to_world = np.asarray(label_data["extrinsic"]) cam_to_world = DataLoadUtil.cam_pose_transformation(cam_to_world) - world_to_display_table = np.eye(4) - world_to_display_table[:3, 3] = -DataLoadUtil.get_display_table_top( - root_dir, scene_name - ) + if display_table_as_world_space_origin: + world_to_display_table = np.eye(4) + world_to_display_table[:3, 3] = -DataLoadUtil.get_display_table_top( + root_dir, scene_name + ) cam_to_world = np.dot(world_to_display_table, cam_to_world) cam_intrinsic = np.asarray(label_data["intrinsic"]) cam_info = { diff --git a/utils/vis.py b/utils/vis.py index 78992e2..831137f 100644 --- a/utils/vis.py +++ b/utils/vis.py @@ -40,7 +40,7 @@ class visualizeUtil: all_combined_pts = [] for i in range(length): path = DataLoadUtil.get_path(root, scene, i) - pts = DataLoadUtil.load_from_preprocessed_pts(path,"txt") + pts = DataLoadUtil.load_from_preprocessed_pts(path,"npy") if pts.shape[0] == 0: continue all_combined_pts.append(pts) @@ -73,41 +73,46 @@ class visualizeUtil: mesh.export(model_path) @staticmethod - def save_points_and_normals(root, scene, frame_idx, output_dir): + def save_points_and_normals(root, scene, frame_idx, output_dir, binocular=False): target_mask_label = (0, 255, 0, 255) path = DataLoadUtil.get_path(root, scene, frame_idx) - cam_info = DataLoadUtil.load_cam_info(path, binocular=True) - depth_L,_ = DataLoadUtil.load_depth( + cam_info = DataLoadUtil.load_cam_info(path, binocular=binocular, display_table_as_world_space_origin=False) + depth = DataLoadUtil.load_depth( path, cam_info["near_plane"], cam_info["far_plane"], - binocular=True, + binocular=binocular, ) - mask_L = DataLoadUtil.load_seg(path, binocular=True, left_only=True) - normal_L = DataLoadUtil.load_normal(path, binocular=True, left_only=True) + if isinstance(depth, tuple): + depth = depth[0] + + mask = DataLoadUtil.load_seg(path, binocular=binocular, left_only=True) + normal = DataLoadUtil.load_normal(path, binocular=binocular, left_only=True) ''' target points ''' - target_mask_img_L = (mask_L == target_mask_label).all(axis=-1) + if mask is None: + target_mask_img = np.ones_like(depth, dtype=bool) + else: + target_mask_img = (mask == target_mask_label).all(axis=-1) cam_intrinsic = cam_info["cam_intrinsic"] - z = depth_L[target_mask_img_L] - i, j = np.nonzero(target_mask_img_L) + z = depth[target_mask_img] + i, j = np.nonzero(target_mask_img) x = (j - cam_intrinsic[0, 2]) * z / cam_intrinsic[0, 0] y = (i - cam_intrinsic[1, 2]) * z / cam_intrinsic[1, 1] random_downsample_N = 1000 points_camera = np.stack((x, y, z), axis=-1).reshape(-1, 3) - normal_camera = normal_L[target_mask_img_L].reshape(-1, 3) + normal_camera = normal[target_mask_img].reshape(-1, 3) sampled_target_points, idx = PtsUtil.random_downsample_point_cloud( points_camera, random_downsample_N, require_idx=True ) if len(sampled_target_points) == 0: print("No target points") - offset = np.asarray([[1, 0, 0], [0, -1, 0], [0, 0, -1]]) + sampled_normal_camera = normal_camera[idx] - sampled_normal_camera = np.dot(sampled_normal_camera, offset) sampled_visualized_normal = [] - - + sampled_normal_camera[:, 2] = -sampled_normal_camera[:, 2] + sampled_normal_camera[:, 1] = -sampled_normal_camera[:, 1] num_samples = 10 for i in range(len(sampled_target_points)): sampled_visualized_normal.append([sampled_target_points[i] + 0.02*t * sampled_normal_camera[i] for t in range(num_samples)]) @@ -123,10 +128,10 @@ class visualizeUtil: if __name__ == "__main__": root = r"C:\Document\Local Project\nbv_rec\nbv_reconstruction\temp" model_dir = r"H:\\AI\\Datasets\\scaled_object_box_meshes" - scene = "omniobject3d-box_030" + scene = "test_obj" output_dir = r"C:\Document\Local Project\nbv_rec\nbv_reconstruction\test" - # visualizeUtil.save_all_cam_pos_and_cam_axis(root, scene, output_dir) - # visualizeUtil.save_all_combined_pts(root, scene, output_dir) - # visualizeUtil.save_target_mesh_at_world_space(root, model_dir, scene) - visualizeUtil.save_points_and_normals(root, scene, 0, output_dir) \ No newline at end of file + visualizeUtil.save_all_cam_pos_and_cam_axis(root, scene, output_dir) + visualizeUtil.save_all_combined_pts(root, scene, output_dir) + visualizeUtil.save_target_mesh_at_world_space(root, model_dir, scene) + #visualizeUtil.save_points_and_normals(root, scene,"10", output_dir, binocular=True) \ No newline at end of file