update

2024-10-08 21:28:30 +08:00
parent d9d2716ba7
commit 3ab046b134
11 changed files with 773 additions and 354 deletions
--- a/utils/communicate_util.py
+++ b/utils/communicate_util.py
@@ -1,10 +1,20 @@
+import requests
+import numpy as np
+import cv2
+

 class CommunicateUtil:
    VIEW_HOST = "127.0.0.1:5000"
    INFERENCE_HOST = "127.0.0.1:5000"
    
-    def get_view_data() -> dict:
-        data = {}
+    def get_view_data(init = False) -> dict:
+        params = {}
+        params["create_scanner"] = init
+        response = requests.get(f"http://{CommunicateUtil.VIEW_HOST}/api/data", json=params)
+        data = response.json()
+        if not data["success"]:
+            print(f"Failed to get view data")
+            return None
        return data
    
    def get_inference_data(view_data:dict) -> dict:
--- a/utils/data_load.py
+++ b/utils/data_load.py
@@ -0,0 +1,410 @@
+import os
+import numpy as np
+import json
+import cv2
+import trimesh
+import torch
+from utils.pts_util import PtsUtil
+
+
+class DataLoadUtil:
+    TABLE_POSITION = np.asarray([0, 0, 0.8215])
+
+    @staticmethod
+    def get_display_table_info(root, scene_name):
+        scene_info = DataLoadUtil.load_scene_info(root, scene_name)
+        display_table_info = scene_info["display_table"]
+        return display_table_info
+
+    @staticmethod
+    def get_display_table_top(root, scene_name):
+        display_table_height = DataLoadUtil.get_display_table_info(root, scene_name)[
+            "height"
+        ]
+        display_table_top = DataLoadUtil.TABLE_POSITION + np.asarray(
+            [0, 0, display_table_height]
+        )
+        return display_table_top
+
+    @staticmethod
+    def get_path(root, scene_name, frame_idx):
+        path = os.path.join(root, scene_name, f"{frame_idx}")
+        return path
+
+    @staticmethod
+    def get_label_num(root, scene_name):
+        label_dir = os.path.join(root, scene_name, "label")
+        return len(os.listdir(label_dir))
+
+    @staticmethod
+    def get_label_path(root, scene_name, seq_idx):
+        label_dir = os.path.join(root, scene_name, "label")
+        if not os.path.exists(label_dir):
+            os.makedirs(label_dir)
+        path = os.path.join(label_dir, f"{seq_idx}.json")
+        return path
+
+    @staticmethod
+    def get_label_path_old(root, scene_name):
+        path = os.path.join(root, scene_name, "label.json")
+        return path
+
+    @staticmethod
+    def get_scene_seq_length(root, scene_name):
+        camera_params_path = os.path.join(root, scene_name, "camera_params")
+        return len(os.listdir(camera_params_path))
+
+    @staticmethod
+    def load_mesh_at(model_dir, object_name, world_object_pose):
+        model_path = os.path.join(model_dir, object_name, "mesh.obj")
+        mesh = trimesh.load(model_path)
+        mesh.apply_transform(world_object_pose)
+        return mesh
+
+    @staticmethod
+    def get_bbox_diag(model_dir, object_name):
+        model_path = os.path.join(model_dir, object_name, "mesh.obj")
+        mesh = trimesh.load(model_path)
+        bbox = mesh.bounding_box.extents
+        diagonal_length = np.linalg.norm(bbox)
+        return diagonal_length
+
+    @staticmethod
+    def save_mesh_at(model_dir, output_dir, object_name, scene_name, world_object_pose):
+        mesh = DataLoadUtil.load_mesh_at(model_dir, object_name, world_object_pose)
+        model_path = os.path.join(output_dir, scene_name, "world_mesh.obj")
+        mesh.export(model_path)
+
+    @staticmethod
+    def save_target_mesh_at_world_space(
+        root, model_dir, scene_name, display_table_as_world_space_origin=True
+    ):
+        scene_info = DataLoadUtil.load_scene_info(root, scene_name)
+        target_name = scene_info["target_name"]
+        transformation = scene_info[target_name]
+        if display_table_as_world_space_origin:
+            location = transformation["location"] - DataLoadUtil.get_display_table_top(
+                root, scene_name
+            )
+        else:
+            location = transformation["location"]
+        rotation_euler = transformation["rotation_euler"]
+        pose_mat = trimesh.transformations.euler_matrix(*rotation_euler)
+        pose_mat[:3, 3] = location
+
+        mesh = DataLoadUtil.load_mesh_at(model_dir, target_name, pose_mat)
+        mesh_dir = os.path.join(root, scene_name, "mesh")
+        if not os.path.exists(mesh_dir):
+            os.makedirs(mesh_dir)
+        model_path = os.path.join(mesh_dir, "world_target_mesh.obj")
+        mesh.export(model_path)
+
+    @staticmethod
+    def load_scene_info(root, scene_name):
+        scene_info_path = os.path.join(root, scene_name, "scene_info.json")
+        with open(scene_info_path, "r") as f:
+            scene_info = json.load(f)
+        return scene_info
+
+    @staticmethod
+    def load_target_pts_num_dict(root, scene_name):
+        target_pts_num_path = os.path.join(root, scene_name, "target_pts_num.json")
+        with open(target_pts_num_path, "r") as f:
+            target_pts_num_dict = json.load(f)
+        return target_pts_num_dict
+
+    @staticmethod
+    def load_target_object_pose(root, scene_name):
+        scene_info = DataLoadUtil.load_scene_info(root, scene_name)
+        target_name = scene_info["target_name"]
+        transformation = scene_info[target_name]
+        location = transformation["location"]
+        rotation_euler = transformation["rotation_euler"]
+        pose_mat = trimesh.transformations.euler_matrix(*rotation_euler)
+        pose_mat[:3, 3] = location
+        return pose_mat
+
+    @staticmethod
+    def load_depth(path, min_depth=0.01, max_depth=5.0, binocular=False):
+
+        def load_depth_from_real_path(real_path, min_depth, max_depth):
+            depth = cv2.imread(real_path, cv2.IMREAD_UNCHANGED)
+            depth = depth.astype(np.float32) / 65535.0
+            min_depth = min_depth
+            max_depth = max_depth
+            depth_meters = min_depth + (max_depth - min_depth) * depth
+            return depth_meters
+
+        if binocular:
+            depth_path_L = os.path.join(
+                os.path.dirname(path), "depth", os.path.basename(path) + "_L.png"
+            )
+            depth_path_R = os.path.join(
+                os.path.dirname(path), "depth", os.path.basename(path) + "_R.png"
+            )
+            depth_meters_L = load_depth_from_real_path(
+                depth_path_L, min_depth, max_depth
+            )
+            depth_meters_R = load_depth_from_real_path(
+                depth_path_R, min_depth, max_depth
+            )
+            return depth_meters_L, depth_meters_R
+        else:
+            depth_path = os.path.join(
+                os.path.dirname(path), "depth", os.path.basename(path) + ".png"
+            )
+            depth_meters = load_depth_from_real_path(depth_path, min_depth, max_depth)
+            return depth_meters
+
+    @staticmethod
+    def load_seg(path, binocular=False, left_only=False):
+        if binocular and not left_only:
+
+            def clean_mask(mask_image):
+                green = [0, 255, 0, 255]
+                red = [255, 0, 0, 255]
+                threshold = 2
+                mask_image = np.where(
+                    np.abs(mask_image - green) <= threshold, green, mask_image
+                )
+                mask_image = np.where(
+                    np.abs(mask_image - red) <= threshold, red, mask_image
+                )
+                return mask_image
+
+            mask_path_L = os.path.join(
+                os.path.dirname(path), "mask", os.path.basename(path) + "_L.png"
+            )
+            mask_image_L = clean_mask(cv2.imread(mask_path_L, cv2.IMREAD_UNCHANGED))
+            mask_path_R = os.path.join(
+                os.path.dirname(path), "mask", os.path.basename(path) + "_R.png"
+            )
+            mask_image_R = clean_mask(cv2.imread(mask_path_R, cv2.IMREAD_UNCHANGED))
+            return mask_image_L, mask_image_R
+        else:
+            if binocular and left_only:
+                mask_path = os.path.join(
+                    os.path.dirname(path), "mask", os.path.basename(path) + "_L.png"
+                )
+            else:
+                mask_path = os.path.join(
+                    os.path.dirname(path), "mask", os.path.basename(path) + ".png"
+                )
+            mask_image = cv2.imread(mask_path, cv2.IMREAD_UNCHANGED)
+            return mask_image
+        
+    @staticmethod
+    def load_normal(path, binocular=False, left_only=False):
+        if binocular and not left_only:
+            normal_path_L = os.path.join(
+                os.path.dirname(path), "normal", os.path.basename(path) + "_L.png"
+            )
+            normal_image_L = cv2.imread(normal_path_L, cv2.IMREAD_COLOR)
+            normal_path_R = os.path.join(
+                os.path.dirname(path), "normal", os.path.basename(path) + "_R.png"
+            )
+            normal_image_R = cv2.imread(normal_path_R, cv2.IMREAD_COLOR)
+            normalized_normal_image_L = normal_image_L / 255.0 * 2.0 - 1.0
+            normalized_normal_image_R = normal_image_R / 255.0 * 2.0 - 1.0
+            return normalized_normal_image_L, normalized_normal_image_R
+        else:
+            if binocular and left_only:
+                normal_path = os.path.join(
+                    os.path.dirname(path), "normal", os.path.basename(path) + "_L.png"
+                )
+            else:
+                normal_path = os.path.join(
+                    os.path.dirname(path), "normal", os.path.basename(path) + ".png"
+                )
+            normal_image = cv2.imread(normal_path, cv2.IMREAD_COLOR)
+            normalized_normal_image = normal_image / 255.0 * 2.0 - 1.0
+            return normalized_normal_image
+
+    @staticmethod
+    def load_label(path):
+        with open(path, "r") as f:
+            label_data = json.load(f)
+        return label_data
+
+    @staticmethod
+    def load_rgb(path):
+        rgb_path = os.path.join(
+            os.path.dirname(path), "rgb", os.path.basename(path) + ".png"
+        )
+        rgb_image = cv2.imread(rgb_path, cv2.IMREAD_COLOR)
+        return rgb_image
+
+    @staticmethod
+    def load_from_preprocessed_pts(path):
+        npy_path = os.path.join(
+            os.path.dirname(path), "pts", os.path.basename(path) + ".npy"
+        )
+        pts = np.load(npy_path)
+        return pts
+
+    @staticmethod
+    def cam_pose_transformation(cam_pose_before):
+        offset = np.asarray([[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]])
+        cam_pose_after = cam_pose_before @ offset
+        return cam_pose_after
+
+    @staticmethod
+    def load_cam_info(path, binocular=False, display_table_as_world_space_origin=True):
+        scene_dir = os.path.dirname(path)
+        root_dir = os.path.dirname(scene_dir)
+        scene_name = os.path.basename(scene_dir)
+        camera_params_path = os.path.join(
+            os.path.dirname(path), "camera_params", os.path.basename(path) + ".json"
+        )
+        with open(camera_params_path, "r") as f:
+            label_data = json.load(f)
+        cam_to_world = np.asarray(label_data["extrinsic"])
+        cam_to_world = DataLoadUtil.cam_pose_transformation(cam_to_world)
+        world_to_display_table = np.eye(4)
+        world_to_display_table[:3, 3] = -DataLoadUtil.get_display_table_top(
+            root_dir, scene_name
+        )
+        if display_table_as_world_space_origin:
+            cam_to_world = np.dot(world_to_display_table, cam_to_world)
+        cam_intrinsic = np.asarray(label_data["intrinsic"])
+        cam_info = {
+            "cam_to_world": cam_to_world,
+            "cam_intrinsic": cam_intrinsic,
+            "far_plane": label_data["far_plane"],
+            "near_plane": label_data["near_plane"],
+        }
+        if binocular:
+            cam_to_world_R = np.asarray(label_data["extrinsic_R"])
+            cam_to_world_R = DataLoadUtil.cam_pose_transformation(cam_to_world_R)
+            cam_to_world_O = np.asarray(label_data["extrinsic_cam_object"])
+            cam_to_world_O = DataLoadUtil.cam_pose_transformation(cam_to_world_O)
+            if display_table_as_world_space_origin:
+                cam_to_world_O = np.dot(world_to_display_table, cam_to_world_O)
+                cam_to_world_R = np.dot(world_to_display_table, cam_to_world_R)
+            cam_info["cam_to_world_O"] = cam_to_world_O
+            cam_info["cam_to_world_R"] = cam_to_world_R
+        return cam_info
+
+    @staticmethod
+    def get_real_cam_O_from_cam_L(
+        cam_L, cam_O_to_cam_L, scene_path, display_table_as_world_space_origin=True
+    ):
+        root_dir = os.path.dirname(scene_path)
+        scene_name = os.path.basename(scene_path)
+        if isinstance(cam_L, torch.Tensor):
+            cam_L = cam_L.cpu().numpy()
+        nO_to_display_table_pose = cam_L @ cam_O_to_cam_L
+        if display_table_as_world_space_origin:
+            display_table_to_world = np.eye(4)
+            display_table_to_world[:3, 3] = DataLoadUtil.get_display_table_top(
+                root_dir, scene_name
+            )
+            nO_to_world_pose = np.dot(display_table_to_world, nO_to_display_table_pose)
+        nO_to_world_pose = DataLoadUtil.cam_pose_transformation(nO_to_world_pose)
+        return nO_to_world_pose
+
+    @staticmethod
+    def get_target_point_cloud(
+        depth, cam_intrinsic, cam_extrinsic, mask, target_mask_label=(0, 255, 0, 255), require_full_points=False
+    ):
+        h, w = depth.shape
+        i, j = np.meshgrid(np.arange(w), np.arange(h), indexing="xy")
+
+        z = depth
+        x = (i - cam_intrinsic[0, 2]) * z / cam_intrinsic[0, 0]
+        y = (j - cam_intrinsic[1, 2]) * z / cam_intrinsic[1, 1]
+
+        points_camera = np.stack((x, y, z), axis=-1).reshape(-1, 3)
+        mask = mask.reshape(-1, 4)
+
+        target_mask = (mask == target_mask_label).all(axis=-1)
+
+        target_points_camera = points_camera[target_mask]
+        target_points_camera_aug = np.concatenate(
+            [target_points_camera, np.ones((target_points_camera.shape[0], 1))], axis=-1
+        )
+
+        target_points_world = np.dot(cam_extrinsic, target_points_camera_aug.T).T[:, :3]
+        data = {
+            "points_world": target_points_world,
+            "points_camera": target_points_camera,
+        }
+        return data
+
+    @staticmethod
+    def get_point_cloud(depth, cam_intrinsic, cam_extrinsic):
+        h, w = depth.shape
+        i, j = np.meshgrid(np.arange(w), np.arange(h), indexing="xy")
+
+        z = depth
+        x = (i - cam_intrinsic[0, 2]) * z / cam_intrinsic[0, 0]
+        y = (j - cam_intrinsic[1, 2]) * z / cam_intrinsic[1, 1]
+
+        points_camera = np.stack((x, y, z), axis=-1).reshape(-1, 3)
+        points_camera_aug = np.concatenate(
+            [points_camera, np.ones((points_camera.shape[0], 1))], axis=-1
+        )
+
+        points_world = np.dot(cam_extrinsic, points_camera_aug.T).T[:, :3]
+        return {"points_world": points_world, "points_camera": points_camera}
+
+    @staticmethod
+    def get_target_point_cloud_world_from_path(
+        path,
+        binocular=False,
+        random_downsample_N=65536,
+        voxel_size=0.005,
+        target_mask_label=(0, 255, 0, 255),
+        display_table_mask_label=(0, 0, 255, 255),
+        get_display_table_pts=False,
+        require_normal=False,
+    ):
+        cam_info = DataLoadUtil.load_cam_info(path, binocular=binocular)
+        if binocular:
+            depth_L, depth_R = DataLoadUtil.load_depth(
+                path, cam_info["near_plane"], cam_info["far_plane"], binocular=True
+            )
+            mask_L, mask_R = DataLoadUtil.load_seg(path, binocular=True)
+            point_cloud_L = DataLoadUtil.get_target_point_cloud(
+                depth_L,
+                cam_info["cam_intrinsic"],
+                cam_info["cam_to_world"],
+                mask_L,
+                target_mask_label,
+            )["points_world"]
+            point_cloud_R = DataLoadUtil.get_target_point_cloud(
+                depth_R,
+                cam_info["cam_intrinsic"],
+                cam_info["cam_to_world_R"],
+                mask_R,
+                target_mask_label,
+            )["points_world"]
+            point_cloud_L = PtsUtil.random_downsample_point_cloud(
+                point_cloud_L, random_downsample_N
+            )
+            point_cloud_R = PtsUtil.random_downsample_point_cloud(
+                point_cloud_R, random_downsample_N
+            )
+            overlap_points = PtsUtil.get_overlapping_points(
+                point_cloud_L, point_cloud_R, voxel_size
+            )
+            return overlap_points
+        else:
+            depth = DataLoadUtil.load_depth(
+                path, cam_info["near_plane"], cam_info["far_plane"]
+            )
+            mask = DataLoadUtil.load_seg(path)
+            point_cloud = DataLoadUtil.get_target_point_cloud(
+                depth, cam_info["cam_intrinsic"], cam_info["cam_to_world"], mask
+            )["points_world"]
+            return point_cloud
+
+    @staticmethod
+    def load_points_normals(root, scene_name, display_table_as_world_space_origin=True):
+        points_path = os.path.join(root, scene_name, "points_and_normals.txt")
+        points_normals = np.loadtxt(points_path)
+        if display_table_as_world_space_origin:
+            points_normals[:, :3] = points_normals[
+                :, :3
+            ] - DataLoadUtil.get_display_table_top(root, scene_name)
+        return points_normals
--- a/utils/preprocess_util.py
+++ b/utils/preprocess_util.py
@@ -0,0 +1,158 @@
+import os
+import numpy as np
+import time
+import sys
+np.random.seed(0)
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from utils.reconstruction_util import ReconstructionUtil
+from utils.data_load import DataLoadUtil
+from utils.pts_util import PtsUtil
+
+def save_np_pts(path, pts: np.ndarray, file_type="txt"):
+    if file_type == "txt":
+        np.savetxt(path, pts)
+    else:
+        np.save(path, pts)
+
+def save_target_points(root, scene, frame_idx, target_points: np.ndarray, file_type="txt"):
+    pts_path = os.path.join(root,scene, "pts", f"{frame_idx}.{file_type}")
+    if not os.path.exists(os.path.join(root,scene, "pts")):
+        os.makedirs(os.path.join(root,scene, "pts"))
+    save_np_pts(pts_path, target_points, file_type)
+    
+def save_scan_points_indices(root, scene, frame_idx, scan_points_indices: np.ndarray, file_type="txt"):
+    indices_path = os.path.join(root,scene, "scan_points_indices", f"{frame_idx}.{file_type}")
+    if not os.path.exists(os.path.join(root,scene, "scan_points_indices")):
+        os.makedirs(os.path.join(root,scene, "scan_points_indices"))
+    save_np_pts(indices_path, scan_points_indices, file_type)
+
+def save_scan_points(root, scene, scan_points: np.ndarray):
+    scan_points_path = os.path.join(root,scene, "scan_points.txt")
+    save_np_pts(scan_points_path, scan_points)
+
+def get_world_points(depth, mask, cam_intrinsic, cam_extrinsic):
+    z = depth[mask]
+    i, j = np.nonzero(mask)
+    x = (j - cam_intrinsic[0, 2]) * z / cam_intrinsic[0, 0]
+    y = (i - cam_intrinsic[1, 2]) * z / cam_intrinsic[1, 1]
+
+    points_camera = np.stack((x, y, z), axis=-1).reshape(-1, 3)
+    points_camera_aug = np.concatenate((points_camera, np.ones((points_camera.shape[0], 1))), axis=-1)
+    points_camera_world = np.dot(cam_extrinsic, points_camera_aug.T).T[:, :3]
+    
+    return points_camera_world
+
+def get_scan_points_indices(scan_points, mask, display_table_mask_label, cam_intrinsic, cam_extrinsic):
+    scan_points_homogeneous = np.hstack((scan_points, np.ones((scan_points.shape[0], 1))))
+    points_camera = np.dot(np.linalg.inv(cam_extrinsic), scan_points_homogeneous.T).T[:, :3]
+    points_image_homogeneous = np.dot(cam_intrinsic, points_camera.T).T
+    points_image_homogeneous /= points_image_homogeneous[:, 2:]
+    pixel_x = points_image_homogeneous[:, 0].astype(int)
+    pixel_y = points_image_homogeneous[:, 1].astype(int)
+    h, w = mask.shape[:2]
+    valid_indices = (pixel_x >= 0) & (pixel_x < w) & (pixel_y >= 0) & (pixel_y < h)
+    mask_colors = mask[pixel_y[valid_indices], pixel_x[valid_indices]]
+    selected_points_indices = np.where((mask_colors == display_table_mask_label).all(axis=-1))[0]
+    selected_points_indices = np.where(valid_indices)[0][selected_points_indices]
+    return selected_points_indices
+    
+
+def save_scene_data(root, scene, scene_idx=0, scene_total=1,file_type="txt"):
+    
+    ''' configuration '''
+    target_mask_label = (0, 255, 0, 255)
+    display_table_mask_label=(0, 0, 255, 255)
+    random_downsample_N = 32768
+    voxel_size=0.002
+    filter_degree = 75
+    min_z = 0.2
+    max_z = 0.5
+    
+    ''' scan points '''
+    scan_points = np.asarray(ReconstructionUtil.generate_scan_points(display_table_top=0,display_table_radius=0.25))
+    
+    ''' read frame data(depth|mask|normal) '''
+    frame_num = DataLoadUtil.get_scene_seq_length(root, scene)
+    for frame_id in range(frame_num):
+        print(f"[scene({scene_idx}/{scene_total})|frame({frame_id}/{frame_num})]Processing {scene} frame {frame_id}")
+        path = DataLoadUtil.get_path(root, scene, frame_id)
+        cam_info = DataLoadUtil.load_cam_info(path, binocular=True)
+        depth_L, depth_R = DataLoadUtil.load_depth(
+                path, cam_info["near_plane"], 
+                cam_info["far_plane"], 
+                binocular=True
+            )
+        mask_L, mask_R = DataLoadUtil.load_seg(path, binocular=True)
+        
+        ''' target points '''
+        mask_img_L = mask_L
+        mask_img_R = mask_R
+
+        target_mask_img_L = (mask_L == target_mask_label).all(axis=-1)
+        target_mask_img_R = (mask_R == target_mask_label).all(axis=-1)
+
+        
+        target_points_L = get_world_points(depth_L, target_mask_img_L, cam_info["cam_intrinsic"], cam_info["cam_to_world"])
+        target_points_R = get_world_points(depth_R, target_mask_img_R, cam_info["cam_intrinsic"], cam_info["cam_to_world_R"])
+
+        sampled_target_points_L = PtsUtil.random_downsample_point_cloud(
+                target_points_L, random_downsample_N
+            )
+        sampled_target_points_R = PtsUtil.random_downsample_point_cloud(
+                target_points_R, random_downsample_N
+            )
+
+        has_points = sampled_target_points_L.shape[0] > 0 and sampled_target_points_R.shape[0] > 0
+        if has_points:
+            target_points = PtsUtil.get_overlapping_points(
+                    sampled_target_points_L, sampled_target_points_R, voxel_size
+                )
+
+        if has_points:
+            has_points = target_points.shape[0] > 0
+
+        if has_points:
+            points_normals = DataLoadUtil.load_points_normals(root, scene, display_table_as_world_space_origin=True)
+            target_points = PtsUtil.filter_points(
+                target_points, points_normals, cam_info["cam_to_world"],voxel_size=0.002, theta = filter_degree, z_range=(min_z, max_z)
+                )
+
+
+        ''' scan points indices '''
+        scan_points_indices_L = get_scan_points_indices(scan_points, mask_img_L, display_table_mask_label, cam_info["cam_intrinsic"], cam_info["cam_to_world"]) 
+        scan_points_indices_R = get_scan_points_indices(scan_points, mask_img_R, display_table_mask_label, cam_info["cam_intrinsic"], cam_info["cam_to_world_R"])
+        scan_points_indices = np.intersect1d(scan_points_indices_L, scan_points_indices_R)
+        
+        if not has_points:
+            target_points = np.zeros((0, 3))
+            
+        save_target_points(root, scene, frame_id, target_points, file_type=file_type)
+        save_scan_points_indices(root, scene, frame_id, scan_points_indices, file_type=file_type)
+    
+    save_scan_points(root, scene, scan_points) # The "done" flag of scene preprocess
+
+
+if __name__ == "__main__":
+    #root = "/media/hofee/repository/new_data_with_normal"
+    root = r"/media/hofee/data/tempdir/test_real_output"
+    # list_path = r"/media/hofee/repository/full_list.txt"
+    # scene_list = []
+    
+    # with open(list_path, "r") as f:
+    #     for line in f:
+    #         scene_list.append(line.strip())
+    scene_list = os.listdir(root)
+    from_idx = 0 # 1000
+    to_idx = 1 # 1500
+
+
+    cnt = 0
+    import time
+    total = to_idx - from_idx
+    for scene in scene_list[from_idx:to_idx]:
+        start = time.time()
+        save_scene_data(root, scene, cnt, total, file_type="npy")
+        cnt+=1
+        end = time.time()
+        print(f"Time cost: {end-start}")
--- a/utils/pts_util.py
+++ b/utils/pts_util.py
@@ -169,3 +169,25 @@ class PtsUtil:
        )
        
        return reg_icp.transformation
+
+    @staticmethod
+    def get_pts_from_depth(depth, cam_intrinsic, cam_extrinsic):
+        h, w = depth.shape
+        i, j = np.meshgrid(np.arange(w), np.arange(h), indexing="xy")
+
+        z = depth
+        x = (i - cam_intrinsic[0, 2]) * z / cam_intrinsic[0, 0]
+        y = (j - cam_intrinsic[1, 2]) * z / cam_intrinsic[1, 1]
+
+        points_camera = np.stack((x, y, z), axis=-1).reshape(-1, 3)
+        mask = mask.reshape(-1, 4)
+        points_camera = np.concatenate(
+            [points_camera, np.ones((points_camera.shape[0], 1))], axis=-1
+        )
+
+        points_world = np.dot(cam_extrinsic, points_camera.T).T[:, :3]
+        data = {
+            "points_world": points_world,
+            "points_camera": points_camera,
+        }
+        return data
--- a/utils/reconstruction_util.py
+++ b/utils/reconstruction_util.py
@@ -152,9 +152,15 @@ class ReconstructionUtil:
    
    @staticmethod
    def check_scan_points_overlap(history_indices, indices2, threshold=5):
-        for indices1 in history_indices:
-            if len(set(indices1).intersection(set(indices2))) >= threshold:
-                return True
+        try:
+            if len(indices2) == 0:
+                return False
+            for indices1 in history_indices:
+                if len(set(indices1).intersection(set(indices2))) >= threshold:
+                    return True
+        except Exception as e:
+            print(e)
+            import ipdb; ipdb.set_trace()
        return False

        
--- a/utils/render_util.py
+++ b/utils/render_util.py
@@ -0,0 +1,45 @@
+
+import os
+import json
+import subprocess
+import tempfile
+import shutil
+from utils.data_load import DataLoadUtil
+from utils.pts_util import PtsUtil
+class RenderUtil:
+
+    @staticmethod
+    def render_pts(cam_pose, object_name, script_path, model_points_normals, voxel_threshold=0.005, filter_degree=75, nO_to_nL_pose=None, require_full_scene=False):
+        nO_to_world_pose = DataLoadUtil.get_real_cam_O_from_cam_L(cam_pose, nO_to_nL_pose, scene_path=scene_path)
+        with tempfile.TemporaryDirectory() as temp_dir:
+            params = {
+                "cam_pose": nO_to_world_pose.tolist(),
+                "object_name": scene_path
+            }
+            params_data_path = os.path.join(temp_dir, "params.json")
+            with open(params_data_path, 'w') as f:
+                json.dump(params, f)
+            result = subprocess.run([
+                'blender', '-b', '-P', script_path, '--', temp_dir
+            ], capture_output=True, text=True)
+            if result.returncode != 0:
+                print("Blender script failed:")
+                print(result.stderr)
+                return None
+            path = os.path.join(temp_dir, "tmp")
+            point_cloud = DataLoadUtil.get_target_point_cloud_world_from_path(path, binocular=True)
+            cam_params = DataLoadUtil.load_cam_info(path, binocular=True)
+            
+            filtered_point_cloud = PtsUtil.filter_points(point_cloud, model_points_normals, cam_pose=cam_params["cam_to_world"], voxel_size=voxel_threshold, theta=filter_degree)
+            full_scene_point_cloud = None
+            if require_full_scene:
+                depth_L, depth_R = DataLoadUtil.load_depth(path, cam_params['near_plane'], cam_params['far_plane'], binocular=True)
+                point_cloud_L = DataLoadUtil.get_point_cloud(depth_L, cam_params['cam_intrinsic'], cam_params['cam_to_world'])['points_world']
+                point_cloud_R = DataLoadUtil.get_point_cloud(depth_R, cam_params['cam_intrinsic'], cam_params['cam_to_world_R'])['points_world']
+            
+                point_cloud_L = PtsUtil.random_downsample_point_cloud(point_cloud_L, 65536)
+                point_cloud_R = PtsUtil.random_downsample_point_cloud(point_cloud_R, 65536)
+                full_scene_point_cloud = PtsUtil.get_overlapping_points(point_cloud_L, point_cloud_R)
+
+            
+            return filtered_point_cloud, full_scene_point_cloud
--- a/utils/view_sample_util.py
+++ b/utils/view_sample_util.py
@@ -1,162 +0,0 @@
-
-import numpy as np
-from utils.pose_util import PoseUtil
-import trimesh
-from collections import defaultdict
-from scipy.spatial.transform import Rotation as R
-import random
-
-class ViewSampleUtil:
-    @staticmethod
-    def farthest_point_sampling(points, num_samples):
-        num_points = points.shape[0]
-        if num_samples >= num_points:
-            return points, np.arange(num_points)
-        sampled_indices = np.zeros(num_samples, dtype=int)
-        sampled_indices[0] = np.random.randint(num_points)
-        min_distances = np.full(num_points, np.inf)
-        for i in range(1, num_samples):
-            current_point = points[sampled_indices[i - 1]]
-            dist_to_current_point = np.linalg.norm(points - current_point, axis=1)
-            min_distances = np.minimum(min_distances, dist_to_current_point)
-            sampled_indices[i] = np.argmax(min_distances)
-        downsampled_points = points[sampled_indices]
-        return downsampled_points, sampled_indices
-    
-    @staticmethod
-    def voxel_downsample(points, voxel_size):
-        voxel_grid = defaultdict(list)
-        for i, point in enumerate(points):
-            voxel_index = tuple((point // voxel_size).astype(int))
-            voxel_grid[voxel_index].append(i)
-
-        downsampled_points = []
-        downsampled_indices = []
-        for indices in voxel_grid.values():
-            selected_index = indices[0]
-            downsampled_points.append(points[selected_index])
-            downsampled_indices.append(selected_index)
-
-        return np.array(downsampled_points), downsampled_indices
-
-    @staticmethod
-    def sample_view_data(mesh: trimesh.Trimesh, distance_range: tuple = (0.25, 0.5), voxel_size: float = 0.005, max_views: int = 1, pertube_repeat: int = 1) -> dict:
-        view_data = {
-            "look_at_points": [],
-            "cam_positions": [],
-        }
-
-        vertices = mesh.vertices
-        look_at_points = []
-        cam_positions = []
-        normals = []
-        vertex_normals = mesh.vertex_normals
-
-        for i, vertex in enumerate(vertices):
-            look_at_point = vertex
-            
-            view_data["look_at_points"].append(look_at_point)
-
-            normal = vertex_normals[i]
-            if np.isnan(normal).any():
-                continue
-            if np.dot(normal, look_at_point) < 0:
-                normal = -normal
-            
-            normals.append(normal)
-
-            for _ in range(pertube_repeat):
-                perturb_angle = np.radians(np.random.uniform(0, 30))
-                perturb_axis = np.random.normal(size=3)
-                perturb_axis /= np.linalg.norm(perturb_axis)
-                rotation_matrix = R.from_rotvec(perturb_angle * perturb_axis).as_matrix()
-                perturbed_normal = np.dot(rotation_matrix, normal)
-
-                distance = np.random.uniform(*distance_range)
-                cam_position = look_at_point + distance * perturbed_normal
-                look_at_points.append(look_at_point)
-                cam_positions.append(cam_position)
-
-        look_at_points = np.array(look_at_points)
-        cam_positions = np.array(cam_positions)
-
-        voxel_downsampled_look_at_points, selected_indices = ViewSampleUtil.voxel_downsample(look_at_points, voxel_size)
-        voxel_downsampled_cam_positions = cam_positions[selected_indices]
-        voxel_downsampled_normals = np.array(normals)[selected_indices]
-
-        fps_downsampled_look_at_points, selected_indices = ViewSampleUtil.farthest_point_sampling(voxel_downsampled_look_at_points, max_views * 2)
-        fps_downsampled_cam_positions = voxel_downsampled_cam_positions[selected_indices]
-
-        view_data["look_at_points"] = fps_downsampled_look_at_points.tolist()
-        view_data["cam_positions"] = fps_downsampled_cam_positions.tolist()
-        view_data["normals"] = voxel_downsampled_normals.tolist()
-        view_data["voxel_down_sampled_points"] = voxel_downsampled_look_at_points 
-
-        return view_data
-    
-    @staticmethod
-    def get_world_points_and_normals(view_data: dict, obj_world_pose: np.ndarray) -> tuple:
-        world_points = []
-        world_normals = []
-        for voxel_down_sampled_points, normal in zip(view_data["voxel_down_sampled_points"], view_data["normals"]):
-            voxel_down_sampled_points_world = obj_world_pose @ np.append(voxel_down_sampled_points, 1.0)
-            normal_world = obj_world_pose[:3, :3] @ normal
-            world_points.append(voxel_down_sampled_points_world[:3])
-            world_normals.append(normal_world)
-        return np.array(world_points), np.array(world_normals)
-    
-    @staticmethod
-    def get_cam_pose(view_data: dict, obj_world_pose: np.ndarray, max_views: int, min_cam_table_included_degree: int, random_view_ratio: float) -> np.ndarray:
-        cam_poses = []
-        min_height_z = 1000
-        for look_at_point, cam_position in zip(view_data["look_at_points"], view_data["cam_positions"]):
-            look_at_point_world = obj_world_pose @ np.append(look_at_point, 1.0)
-            cam_position_world = obj_world_pose @ np.append(cam_position, 1.0)
-            if look_at_point_world[2] < min_height_z:
-                min_height_z = look_at_point_world[2]
-            look_at_point_world = look_at_point_world[:3]  
-            cam_position_world = cam_position_world[:3]    
-            
-            forward_vector = cam_position_world - look_at_point_world
-            forward_vector /= np.linalg.norm(forward_vector)
-            
-            up_vector = np.array([0, 0, 1])
-
-            right_vector = np.cross(up_vector, forward_vector)
-            right_vector /= np.linalg.norm(right_vector)
-
-            corrected_up_vector = np.cross(forward_vector, right_vector)
-            rotation_matrix = np.array([right_vector, corrected_up_vector, forward_vector]).T
-
-            cam_pose = np.eye(4)
-            cam_pose[:3, :3] = rotation_matrix
-            cam_pose[:3, 3] = cam_position_world
-            cam_poses.append(cam_pose)
-
-        filtered_cam_poses = []
-        for cam_pose in cam_poses:
-            if cam_pose[2, 3] > min_height_z:
-                direction_vector = cam_pose[:3, 2]
-                horizontal_normal = np.array([0, 0, 1])  
-                cos_angle = np.dot(direction_vector, horizontal_normal) / (np.linalg.norm(direction_vector) * np.linalg.norm(horizontal_normal))
-                angle = np.arccos(np.clip(cos_angle, -1.0, 1.0))  
-                angle_degree = np.degrees(angle)
-                if angle_degree < 90 - min_cam_table_included_degree:
-                    filtered_cam_poses.append(cam_pose)
-                if random.random() < random_view_ratio:
-                    pertube_pose = PoseUtil.get_uniform_pose([0.1, 0.1, 0.1], [3, 3, 3], 0, 180, "cm")
-                    filtered_cam_poses.append(pertube_pose @ cam_pose)
-                    
-        if len(filtered_cam_poses) > max_views:
-            indices = np.random.choice(len(filtered_cam_poses), max_views, replace=False)
-            filtered_cam_poses = [filtered_cam_poses[i] for i in indices]
-            
-        return np.array(filtered_cam_poses)
-        
-    @staticmethod
-    def sample_view_data_world_space(mesh: trimesh.Trimesh, cad_to_world: np.ndarray, distance_range:tuple = (0.25,0.5), voxel_size:float = 0.005, max_views: int=1, min_cam_table_included_degree:int=20, random_view_ratio:float = 0.2) -> dict:
-        view_data = ViewSampleUtil.sample_view_data(mesh, distance_range, voxel_size, max_views)
-        view_data["cam_to_world_poses"] = ViewSampleUtil.get_cam_pose(view_data, cad_to_world, max_views, min_cam_table_included_degree, random_view_ratio)
-        view_data["voxel_down_sampled_points"], view_data["normals"] =  ViewSampleUtil.get_world_points_and_normals(view_data, cad_to_world)
-        return view_data
-