inference finish

2025-01-15 14:42:54 +08:00
commit 6bb8835d00
30 changed files with 1158013 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,168 @@
+# ---> Python
+# Byte-compiled / optimized / DLL files
+test/
+output/
+__pycache__/
+*.py[cod]
+*$py.class
+test_output/
+test*
+temp*
+# C extensions
+*.so
+*.txt
+experiments/
+temp_output/
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+
--- a/README.md
+++ b/README.md
@@ -0,0 +1,5 @@
+nbv reconstruction controll franka
+cd ../..
+cd frankapy
+bash bash_scripts/start_yan1.sh -i localhost
+ptb run cad_cl
--- a/app_cad.py
+++ b/app_cad.py
@@ -0,0 +1,36 @@
+from PytorchBoot.application import PytorchBootApplication
+from runners.cad_close_loop_online_reg_strategy import CADCloseLoopOnlineRegStrategyRunner
+
+@PytorchBootApplication("cad_register")
+class AppCADRegister:
+    @staticmethod
+    def start():
+        CADCloseLoopOnlineRegStrategyRunner("configs/cad_close_loop_config.yaml").register()
+        
+@PytorchBootApplication("cad_render")
+class AppCADRender:
+    @staticmethod
+    def start():
+        CADCloseLoopOnlineRegStrategyRunner("configs/cad_close_loop_config.yaml").render_data()
+        
+@PytorchBootApplication("cad_preprocess")
+class AppCADPreprocess:
+    @staticmethod
+    def start():
+        CADCloseLoopOnlineRegStrategyRunner("configs/cad_close_loop_config.yaml").preprocess_data() 
+
+@PytorchBootApplication("cad_init_obj")
+class AppCADPreprocess:
+    @staticmethod
+    def start():
+        runner = CADCloseLoopOnlineRegStrategyRunner("configs/cad_close_loop_config.yaml")
+        runner.register()
+        runner.render_data()
+        runner.preprocess_data()
+
+@PytorchBootApplication("cad_cl")
+class AppCADCloseLoopOnlineRegStrategy:
+    @staticmethod
+    def start():
+        CADCloseLoopOnlineRegStrategyRunner("configs/cad_close_loop_config.yaml").run()
+        
--- a/app_inference.py
+++ b/app_inference.py
@@ -0,0 +1,8 @@
+from PytorchBoot.application import PytorchBootApplication
+from runners.inference import InferenceRunner
+
+@PytorchBootApplication("inference")
+class AppInference:
+    @staticmethod
+    def start():
+        InferenceRunner("configs/inference_config.yaml").run()
--- a/beans/pointcloud.py
+++ b/beans/pointcloud.py
@@ -0,0 +1,327 @@
+import numpy as np
+import torch
+
+
+def rotation_6d_to_matrix_numpy(d6):
+        a1, a2 = d6[:3], d6[3:]
+        b1 = a1 / np.linalg.norm(a1)
+        b2 = a2 - np.dot(b1, a2) * b1
+        b2 = b2 / np.linalg.norm(b2)
+        b3 = np.cross(b1, b2)
+        return np.stack((b1, b2, b3), axis=-2)
+
+class Pointcloud:
+    def __init__(self, points, camera=None, name="unknown", parent=None, from_parent_index=None):
+        self.points = points # Nx3 numpy array
+        if camera is not None and camera.shape == (9,):
+            rotation_matrix = rotation_6d_to_matrix_numpy(camera)
+            translation = camera[6:]
+            camera = np.eye(4)
+            camera[:3, :3] = rotation_matrix
+            camera[:3, 3] = translation
+        self.camera = camera # 4x4 numpy array
+        self.current_transform = np.eye(4)
+        self.transform_history = {}
+        self.original_points = points.copy()
+        self.name = name
+        self.parent = parent
+        self.from_parent_index = from_parent_index
+
+    # --------- basic ---------
+    def __getitem__(self, item):
+        return self.points[item]
+
+    def __len__(self):
+        return len(self.points)
+
+    def __repr__(self):
+        return f"Pointcloud({self.points})"
+
+    def __str__(self):
+        return f"Pointcloud with {len(self.points)} points. \n\tCenter: {np.mean(self.points, axis=0)} \n\tTransform history: {list(self.transform_history.keys())}"
+
+    def __eq__(self, other):
+        return np.array_equal(self.points, other.points)
+
+    def __ne__(self, other):
+        return not np.array_equal(self.points, other.points)
+
+    def __contains__(self, item):
+        return item in self.points
+
+    def __iter__(self):
+        return iter
+    
+    def concat(self, other):
+        return Pointcloud(np.concatenate([self.points, other.points], axis=0))
+    
+    # --------- downsample ---------
+    def voxel_downsample(self, voxel_size):
+        voxel_indices = np.floor(self.points / voxel_size).astype(np.int32)
+        _, inverse, counts = np.unique(voxel_indices, axis=0, return_inverse=True, return_counts=True)
+        idx_sort = np.argsort(inverse)
+        idx_unique = idx_sort[np.cumsum(counts)-counts]
+        downsampled_points = self.points[idx_unique]
+        return Pointcloud(
+                          downsampled_points, 
+                          name=self.name+'(voxel downsampled with voxel_size='+str(voxel_size)+')',
+                          parent=self.parent,
+                          from_parent_index=idx_unique
+                          )
+    
+    def random_downsample(self, num_points):
+        if self.points.shape[0] == 0:
+            downsampled_points = self.points
+            idx = np.array([])
+        else:
+            idx = np.random.choice(len(self.points), num_points, replace=True)
+            downsampled_points = self.points[idx]
+        return Pointcloud(
+            downsampled_points, 
+            name=self.name+'(random downsampled with num_points='+str(num_points)+')',
+            parent=self.parent,
+            from_parent_index=idx
+        )
+    
+    def fps_downsample(self, num_points):
+        N = self.points.shape[0]
+        mask = np.zeros(N, dtype=bool)
+
+        sampled_indices = np.zeros(num_points, dtype=int)
+        sampled_indices[0] = np.random.randint(0, N)
+        distances = np.linalg.norm(self.points - self.points[sampled_indices[0]], axis=1)
+        for i in range(1, num_points):
+            farthest_index = np.argmax(distances)
+            sampled_indices[i] = farthest_index
+            mask[farthest_index] = True
+
+            new_distances = np.linalg.norm(self.points - self.points[farthest_index], axis=1)
+            distances = np.minimum(distances, new_distances)
+        
+        sampled_points = self.points[sampled_indices]
+        return Pointcloud(
+            sampled_points, 
+            name=self.name+'(fps downsampled with num_points='+str(num_points)+')',
+            parent=self.parent,
+            from_parent_index=sampled_indices
+        )
+    # --------- transform ---------
+    def transform(self, transform_matrix, name=None):
+        self.points = np.dot(self.points, transform_matrix[:3, :3].T) + transform_matrix[:3, 3]
+        self.current_transform = np.dot(self.current_transform, transform_matrix)
+        if name is None:
+            name = f"transform_{len(self.transform_history)}"
+        self.transform_history[name] = transform_matrix
+        return self
+
+    def translate(self, translation, name=None):
+        transform_matrix = np.eye(4)
+        transform_matrix[:3, 3] = translation
+        self.transform(transform_matrix, name)
+        return self
+    
+    def rotate(self, rotation, name=None):
+        transform_matrix = np.eye(4)
+        if rotation.shape == (3, 3):
+            rotation_matrix = rotation
+        elif rotation.shape == (6,):
+            rotation_matrix = rotation_6d_to_matrix_numpy(rotation)
+        transform_matrix[:3, :3] = rotation_matrix
+        self.transform(transform_matrix, name)
+        return self
+    
+    def scale(self, scale_factor, name=None):
+        transform_matrix = np.eye(4)
+        transform_matrix[:3, :3] = scale_factor * np.eye(3)
+        self.transform(transform_matrix, name)
+        return self
+        
+    def same_transform(self, other):
+        return np.allclose(self.current_transform, other.current_transform)
+    
+    def print_transform_history(self):
+        print(f"Transform history of {self.name}:")
+        for name, transform_matrix in self.transform_history.items():
+            print(f"\t-{name}:")
+            for i in range(4):
+                print(f"\t\t{transform_matrix[i]}")
+    
+    # --------- tensor ---------
+    def get_batchlized_points(self, batch_size=1):
+        return torch.tensor(self.points).unsqueeze(0).repeat(batch_size, 1, 1)
+    
+    # --------- visualize ---------
+    def visualize(self, point_size=1, color=None):
+        import plotly.graph_objects as go
+        fig = go.Figure()
+        if color is None:
+            if self.name is not None:
+                hash_value = hash(self.name)
+                r = (hash_value & 0xFF) / 255.0
+                g = ((hash_value >> 8) & 0xFF) / 255.0
+                b = ((hash_value >> 16) & 0xFF) / 255.0
+                color = f'rgb({int(r*255)},{int(g*255)},{int(b*255)})'
+            else:
+                color = "gray"
+            
+
+        if self.points is not None:
+            fig.add_trace(go.Scatter3d(
+                x=self.points[:, 0], y=self.points[:, 1], z=self.points[:, 2],
+                mode='markers', marker=dict(size=point_size, color=color, opacity=0.5), name=self.name
+            ))
+        
+        if self.camera is not None:
+            origin = self.camera[:3, 3]
+            z_axis = self.camera[:3, 2]
+            fig.add_trace(go.Cone(
+                x=[origin[0]], y=[origin[1]], z=[origin[2]],
+                u=[z_axis[0]], v=[z_axis[1]], w=[z_axis[2]],
+                colorscale="blues",
+                sizemode="absolute", sizeref=0.05, anchor="tail", showscale=False
+            ))
+
+        title = self.name
+        fig.update_layout(
+            title=title,
+            scene=dict(
+                xaxis_title='X',
+                yaxis_title='Y',
+                zaxis_title='Z'
+            ),
+            margin=dict(l=0, r=0, b=0, t=40),
+            scene_camera=dict(eye=dict(x=1.25, y=1.25, z=1.25))
+        )
+        fig.show()
+
+    # --------- save and load ---------
+    def save(self, file_path):
+        np.save(file_path, self.points)
+    
+    def savetxt(self, file_path):
+        np.savetxt(file_path, self.points)
+
+    def load(self, file_path):
+        self.points = np.load(file_path)
+
+    def loadtxt(self, file_path):
+        self.points = np.loadtxt(file_path)
+
+
+class PointcloudGroup:
+    def __init__(self, pointclouds: list[Pointcloud] = [], name="unknown"):
+        self.pointclouds = pointclouds
+        self.name = name
+        
+    # --------- basic ---------
+    def __getitem__(self, item):
+        return self.pointclouds[item]
+
+    def __len__(self):
+        return len(self.pointclouds)
+
+    def __repr__(self):
+        return f"PointcloudGroup({self.pointclouds})"
+
+    def __str__(self):
+        return f"PointcloudGroup with {len(self.pointclouds)} pointclouds."
+
+    def __eq__(self, other):
+        return np.array_equal(self.pointclouds, other.pointclouds)
+
+    def __ne__(self, other):
+        return not np.array_equal(self.pointclouds, other.pointclouds)
+
+    def __contains__(self, item):
+        return item in self.pointclouds
+
+    def __iter__(self):
+        return iter
+    
+    def __add__(self, pointcloud: Pointcloud):
+        new_group = PointcloudGroup(self.name)
+        new_group.pointclouds = self.pointclouds.copy()
+        new_group.pointclouds.append(pointcloud)
+        return new_group
+    
+    def __iadd__(self, pointcloud: Pointcloud):
+        self.pointclouds.append(pointcloud)
+        return self
+    
+    def add(self, pointcloud: Pointcloud):
+        self.pointclouds.append(pointcloud)
+    
+    def concat(self, other):
+        new_group = PointcloudGroup(self.name)
+        new_group.pointclouds = self.pointclouds.copy()
+        new_group.pointclouds.extend(other.pointclouds)
+        return new_group
+    
+    # --------- merge ---------
+    def merge_pointclouds(self, name="unknown"):
+        points = np.concatenate([pointcloud.points for pointcloud in self.pointclouds], axis=0)
+        return Pointcloud(points, name=name)
+    
+    # --------- transform ---------
+    def transform(self, transform_matrix, name="unknown_transform"):
+        for pointcloud in self.pointclouds:
+            pointcloud.transform(transform_matrix, name)
+    
+    def translate(self, translation, name="unknown_translate"):
+        transform_matrix = np.eye(4)
+        transform_matrix[:3, 3] = translation
+        self.transform(transform_matrix, name)
+    
+    def rotate(self, rotation_matrix, name="unknown_ratate"):
+        transform_matrix = np.eye(4)
+        transform_matrix[:3, :3] = rotation_matrix
+        self.transform(transform_matrix, name)
+    
+    def scale(self, scale_factor, name="unknown_scale"):
+        transform_matrix = np.eye(4)
+        transform_matrix[:3, :3] = scale_factor * np.eye(3)
+        self.transform(transform_matrix, name)
+        
+    # --------- visualize ---------
+    def visualize(self, point_size=1, color=None):
+        import plotly.graph_objects as go
+        fig = go.Figure()
+        for pointcloud in self.pointclouds:
+            if color is None:
+                if pointcloud.name is not None:
+                    hash_value = hash(pointcloud.name)
+                r = (hash_value & 0xFF) / 255.0
+                g = ((hash_value >> 8) & 0xFF) / 255.0
+                b = ((hash_value >> 16) & 0xFF) / 255.0
+                color = f'rgb({int(r*255)},{int(g*255)},{int(b*255)})'
+            else:
+                color = "gray"
+            
+            if pointcloud.points is not None:
+                fig.add_trace(go.Scatter3d(
+                    x=pointcloud.points[:, 0], y=pointcloud.points[:, 1], z=pointcloud.points[:, 2],
+                    mode='markers', marker=dict(size=point_size, color=color, opacity=0.5), name=pointcloud.name
+                ))
+        
+            if pointcloud.camera is not None:
+                origin = pointcloud.camera[:3, 3]
+                z_axis = pointcloud.camera[:3, 2]
+                fig.add_trace(go.Cone(
+                    x=[origin[0]], y=[origin[1]], z=[origin[2]],
+                    u=[z_axis[0]], v=[z_axis[1]], w=[z_axis[2]],
+                    colorscale="blues",
+                    sizemode="absolute", sizeref=0.05, anchor="tail", showscale=False
+                ))
+
+        title = self.name
+        fig.update_layout(
+            title=title,
+            scene=dict(
+                xaxis_title='X',
+                yaxis_title='Y',
+                zaxis_title='Z'
+            ),
+            margin=dict(l=0, r=0, b=0, t=40),
+            scene_camera=dict(eye=dict(x=1.25, y=1.25, z=1.25))
+        )
+        fig.show()
--- a/combine_all_pts.py
+++ b/combine_all_pts.py
@@ -0,0 +1,18 @@
+import numpy as np
+import os
+from utils.vis import visualizeUtil
+if __name__ == "__main__":
+    root = r"output"
+    visualizeUtil.save_all_cam_pos_and_cam_axis(root,"2box", "/home/yan20/nbv_rec/project/franka_control/output/temp")
+    # pts_dir_path = "/home/yan20/nbv_rec/project/franka_control/output/2box/pts"
+    # pts_dir = os.listdir(pts_dir_path)
+    # pts_list = []
+    # for i in range(len(pts_dir)):
+    #     pts_path = os.path.join(pts_dir_path, pts_dir[i])
+    #     pts = np.load(pts_path)
+    #     if pts.shape[0] != 0:
+    #         pts_list.append(pts)
+    # combined_pts = np.vstack(pts_list)
+    # path = "/home/yan20/nbv_rec/project/franka_control"
+    # np.savetxt(os.path.join(path, "combined_pts2.txt"), combined_pts)
+    
--- a/configs/cad_close_loop_config.yaml
+++ b/configs/cad_close_loop_config.yaml
@@ -0,0 +1,28 @@
+
+runner:
+  general:
+    seed: 1
+    device: cpu
+    cuda_visible_devices: "0,1,2,3,4,5,6,7"
+    
+  experiment:
+    name: debug
+    root_dir: "experiments"
+  
+  generate:
+    blender_bin_path: /home/yan20/nbv_rec/project/blender_app/blender-4.0.2-linux-x64/blender
+    generator_script_path: /home/yan20/Desktop/nbv_rec/project/blender_app/new_data_generator.py
+    model_dir: "/home/yan20/Desktop/nbv_rec/data/models"
+    output_dir: /home/yan20/nbv_rec/project/franka_control/output
+    table_model_path: "/home/yan20/Desktop/nbv_rec/data/table.obj"
+    model_start_idx: 0
+    voxel_size: 0.005
+    object_name: "2box"
+  
+
+  reconstruct:
+    scan_points_threshold: 10
+    max_shot_view_num: 50
+    min_shot_new_pts_num: 10
+    min_coverage_increase: 0.001  
+ 
--- a/configs/cad_open_loop_config.yaml
+++ b/configs/cad_open_loop_config.yaml
@@ -0,0 +1,43 @@
+
+runner:
+  general:
+    seed: 1
+    device: cpu
+    cuda_visible_devices: "0,1,2,3,4,5,6,7"
+    
+  experiment:
+    name: debug
+    root_dir: "experiments"
+  
+  generate:
+    blender_bin_path: /home/yan20/Desktop/nbv_rec/project/blender_app/blender-4.2.2-linux-x64/blender
+    generator_script_path: /home/yan20/Desktop/nbv_rec/project/blender_app/data_generator.py
+    model_dir: "/home/yan20/Desktop/nbv_rec/data/models"
+    table_model_path: "/home/yan20/Desktop/nbv_rec/data/table.obj"
+    model_start_idx: 0
+    voxel_size: 0.002
+    max_view: 512
+    min_view: 128
+    max_diag: 0.7
+    min_diag: 0.01
+    random_view_ratio: 0
+    min_cam_table_included_degree: 20
+    obj_name: "bear"
+    light_and_camera_config:
+      Camera:
+        near_plane: 0.01
+        far_plane: 5
+        fov_vertical: 25
+        resolution: [640,400]
+        eye_distance: 0.15
+        eye_angle: 25
+      Light:
+        location: [0,0,3.5]
+        orientation: [0,0,0]
+        power: 150
+
+  reconstruct:
+    soft_overlap_threshold: 0.3
+    hard_overlap_threshold: 0.6
+    scan_points_threshold: 10
+ 
--- a/configs/inference_config.yaml
+++ b/configs/inference_config.yaml
@@ -0,0 +1,18 @@
+
+runner:
+  general:
+    seed: 0
+    device: cuda
+    cuda_visible_devices: "0,1,2,3,4,5,6,7"
+    
+  experiment:
+    name: debug
+    root_dir: "experiments"
+  
+  inference:
+    server_url: "http://localhost:17748/inference"
+    max_iter: 50
+    max_fail: 5
+    max_no_new_pts: 5
+    min_delta_pts_num: 10
+ 
--- a/debug/cad_for_init_reg.obj
+++ b/debug/cad_for_init_reg.obj
--- a/depth_to_ee.json
+++ b/depth_to_ee.json
@@ -0,0 +1,26 @@
+[
+    [
+        -0.007570863688227585,
+        -0.9998419281744321,
+        0.016085687548450713,
+        0.026445456727876612
+    ],
+    [
+        0.9999336226554764,
+        -0.007709353551098916,
+        -0.008565016580406759,
+        -0.04860572455833323
+    ],
+    [
+        0.008687669832192671,
+        0.01601977456164321,
+        0.9998339079342421,
+        -0.02169476997180904
+    ],
+    [
+        0.0,
+        0.0,
+        0.0,
+        1.0
+    ]
+]
--- a/hand_eye.json
+++ b/hand_eye.json
@@ -0,0 +1,30 @@
+{
+    "rotation": [
+        [
+            -0.018963923592112186,
+            -0.9996228744230772,
+            0.019861483633843654
+        ],
+        [
+            0.9967174555711164,
+            -0.017337556373989038,
+            0.07908048367845485
+        ],
+        [
+            -0.07870631081325144,
+            0.02129596368149753,
+            0.9966703560200054
+        ]
+    ],
+    "translation": [
+        [
+            0.026586589276512175
+        ],
+        [
+            -0.03359567525195615
+        ],
+        [
+            -0.020136829941901187
+        ]
+    ]
+}
--- a/load_normal.py
+++ b/load_normal.py
@@ -0,0 +1,49 @@
+import cv2
+import os
+import numpy as np
+
+def load_normal(path, binocular=False, left_only=False):
+    if binocular and not left_only:
+        normal_path_L = os.path.join(
+            os.path.dirname(path), "normal", os.path.basename(path) + "_L.png"
+        )
+        normal_image_L = cv2.imread(normal_path_L, cv2.IMREAD_UNCHANGED)
+        normal_path_R = os.path.join(
+            os.path.dirname(path), "normal", os.path.basename(path) + "_R.png"
+        )
+        normal_image_R = cv2.imread(normal_path_R, cv2.IMREAD_UNCHANGED)
+        normalized_normal_image_L = normal_image_L / 255.0 * 2.0 - 1.0
+        normalized_normal_image_R = normal_image_R / 255.0 * 2.0 - 1.0
+        return normalized_normal_image_L, normalized_normal_image_R
+    else:
+        if binocular and left_only:
+            normal_path = os.path.join(
+                os.path.dirname(path), "normal", os.path.basename(path) + "_L.png"
+            )
+        else:
+            normal_path = os.path.join(
+                os.path.dirname(path), "normal", os.path.basename(path) + ".png"
+            )
+        normal_image = cv2.imread(normal_path, cv2.IMREAD_UNCHANGED)
+        normalized_normal_image = normal_image / 255.0 * 2.0 - 1.0
+        return normalized_normal_image
+
+def show_rgb(event, x, y, flags, param):
+    if event == cv2.EVENT_MOUSEMOVE:
+        pixel_value = param[y, x]
+        print(f"RGB at ({x},{y}): {pixel_value}")
+
+if __name__ == "__main__":
+    path = "/Users/hofee/temp/1"
+    normal_image = load_normal(path, binocular=True, left_only=True)
+    display_image = ((normal_image + 1.0) / 2.0 * 255).astype(np.uint8)
+
+    cv2.namedWindow("Normal Image")
+    cv2.setMouseCallback("Normal Image", show_rgb, param=display_image)
+
+    while True:
+        cv2.imshow("Normal Image", display_image)
+        if cv2.waitKey(1) & 0xFF == ord('q'):
+            break
+
+    cv2.destroyAllWindows()
--- a/runners/cad_close_loop_new.py
+++ b/runners/cad_close_loop_new.py
@@ -0,0 +1,340 @@
+import os
+import time
+import trimesh
+import tempfile
+import subprocess
+import numpy as np
+from PytorchBoot.runners.runner import Runner
+from PytorchBoot.config import ConfigManager
+import PytorchBoot.stereotype as stereotype
+from PytorchBoot.utils.log_util import Log
+from PytorchBoot.status import status_manager
+
+from utils.control_util import ControlUtil
+from utils.communicate_util import CommunicateUtil
+from utils.pts_util import PtsUtil
+from utils.reconstruction_util import ReconstructionUtil
+from utils.preprocess_util import save_scene_data
+from utils.data_load import DataLoadUtil
+from utils.view_util import ViewUtil
+
+class PointCloud:
+    def __init__(points, camera, name):
+        pass
+    
+class PointCloudGroup:
+    def __init__(point_clouds, name):
+        pass
+
+@stereotype.runner("temp")
+class CADCloseLoopOnlineRegStrategyRunner(Runner):
+
+    def __init__(self, config_path: str):
+        super().__init__(config_path)
+        self.load_experiment("cad_strategy")
+        
+        self.generate_config = ConfigManager.get("runner", "generate")
+        self.reconstruct_config = ConfigManager.get("runner", "reconstruct")
+        
+        self.output_dir = self.generate_config["output_dir"]
+        self.model_dir = self.generate_config["model_dir"]
+        self.object_name = self.generate_config["object_name"]
+        self.blender_bin_path = self.generate_config["blender_bin_path"]
+        self.generator_script_path = self.generate_config["generator_script_path"]
+        self.voxel_size = self.generate_config["voxel_size"]
+        
+        self.max_shot_view_num = self.reconstruct_config["max_shot_view_num"]
+        self.min_shot_new_pts_num = self.reconstruct_config["min_shot_new_pts_num"]
+        self.min_coverage_increase = self.reconstruct_config["min_coverage_increase"]
+        self.scan_points_threshold = self.reconstruct_config["scan_points_threshold"]
+
+    def create_experiment(self, backup_name=None):
+        super().create_experiment(backup_name)
+
+    def load_experiment(self, backup_name=None):
+        super().load_experiment(backup_name)
+
+    def split_scan_pts_and_obj_pts(self, world_pts, z_threshold=0):
+        scan_pts = world_pts[world_pts[:, 2] < z_threshold]
+        obj_pts = world_pts[world_pts[:, 2] >= z_threshold]
+        return scan_pts, obj_pts
+
+    def loop_scan(self, first_cam_to_real_world):
+        view_pts_list = []
+        first_view_data = CommunicateUtil.get_view_data(init=True)
+        ControlUtil.absolute_rotate_display_table(90)
+        first_pts = ViewUtil.get_pts(first_view_data)
+        first_real_world_pts = PtsUtil.transform_point_cloud(
+            first_pts, first_cam_to_real_world
+        )
+        _, first_splitted_real_world_pts = self.split_scan_pts_and_obj_pts(
+            first_real_world_pts
+        )
+        view_pts_list.append(first_splitted_real_world_pts)
+        shot_num = 4
+        for i in range(shot_num-1):
+            view_data = CommunicateUtil.get_view_data()
+            if i != shot_num - 2:
+                ControlUtil.absolute_rotate_display_table(90)
+            time.sleep(0.5)
+            if view_data is None:
+                Log.error("No view data received")
+                continue
+            view_pts = ViewUtil.get_pts(view_data)
+            
+            real_world_pts = PtsUtil.transform_point_cloud(
+                view_pts, first_cam_to_real_world
+            )
+            _, splitted_real_world_pts = self.split_scan_pts_and_obj_pts(
+                real_world_pts
+            )
+            view_pts_list.append(splitted_real_world_pts)
+        
+        return view_pts_list
+    
+    
+    def register(self):
+        ControlUtil.connect_robot()
+        """ init robot """
+        Log.info("start init")
+        ControlUtil.init()
+        first_cam_to_real_world = ControlUtil.get_pose()
+        """ loop shooting """
+        Log.info("start loop shooting")
+        view_pts_list = self.loop_scan(first_cam_to_real_world) 
+        """ register """
+        Log.info("start register")
+        pts = np.vstack(view_pts_list)
+        if not os.path.exists(self.output_dir):
+            os.makedirs(self.output_dir)
+        if not os.path.exists(os.path.join(self.output_dir, self.object_name)):
+            os.makedirs(os.path.join(self.output_dir, self.object_name))
+        scene_dir = os.path.join(self.output_dir, self.object_name)
+        model_path = os.path.join(self.model_dir, self.object_name, "mesh.obj")
+        cad_model = trimesh.load(model_path)
+        real_world_to_cad = PtsUtil.register(pts, cad_model)
+        cad_to_real_world = np.linalg.inv(real_world_to_cad)
+        Log.success("finish init and register")
+        real_world_to_blender_world = np.eye(4)
+        real_world_to_blender_world[:3, 3] = np.asarray([0, 0, 0.9215])
+        cad_model_real_world: trimesh.Trimesh = cad_model.apply_transform(
+            cad_to_real_world
+        )
+        cad_model_real_world.export(os.path.join(scene_dir, "mesh.obj"))
+        #downsampled_pts = PtsUtil.voxel_downsample_point_cloud(pts, self.voxel_size)
+        np.savetxt(os.path.join(scene_dir, "pts_for_init_reg.txt"), pts)
+        return cad_to_real_world
+        
+    def render_data(self):
+        scene_dir = os.path.join(self.output_dir, self.object_name)
+        result = subprocess.run(
+            [
+                self.blender_bin_path,
+                "-b",
+                "-P",
+                self.generator_script_path,
+                "--",
+                scene_dir,
+            ],
+            capture_output=True,
+            text=True,
+        )
+        print(result)
+        
+    def preprocess_data(self):
+        save_scene_data(self.output_dir, self.object_name, file_type="npy")
+        
+    def get_scan_points_indices(self, scan_points, mask, object_mask_label= (0, 255, 0, 255), cam_intrinsic = None, cam_extrinsic = None):
+        scan_points_homogeneous = np.hstack((scan_points, np.ones((scan_points.shape[0], 1))))
+        points_camera = np.dot(np.linalg.inv(cam_extrinsic), scan_points_homogeneous.T).T[:, :3]
+        points_image_homogeneous = np.dot(cam_intrinsic, points_camera.T).T
+        points_image_homogeneous /= points_image_homogeneous[:, 2:]
+        pixel_x = points_image_homogeneous[:, 0].astype(int)
+        pixel_y = points_image_homogeneous[:, 1].astype(int)
+        h, w = mask.shape[:2]
+        valid_indices = (pixel_x >= 0) & (pixel_x < w) & (pixel_y >= 0) & (pixel_y < h)
+        mask_colors = mask[pixel_y[valid_indices], pixel_x[valid_indices]]
+        selected_points_indices = np.where((mask_colors != object_mask_label).any(axis=-1))[0]
+        selected_points_indices = np.where(valid_indices)[0][selected_points_indices]
+        return selected_points_indices
+        
+    def run_one_model(self, model_name):
+        scene_dir = os.path.join(self.output_dir, model_name)
+        ControlUtil.connect_robot()
+        """ init robot """
+        Log.info("start init")
+        ControlUtil.init()
+        first_cam_to_real_world = ControlUtil.get_pose()
+        """ loop shooting """
+        Log.info("start loop shooting")
+        view_pts_list = self.loop_scan(first_cam_to_real_world)
+        """ register """
+        cad_path = os.path.join(scene_dir, "mesh.obj")
+        cad_model = trimesh.load(cad_path)
+        Log.info("start register")
+        init_pts = np.vstack(view_pts_list)
+        real_world_to_cad = PtsUtil.register(init_pts, cad_model)
+        curr_cad_to_real_world = np.linalg.inv(real_world_to_cad)
+        
+        # np.savetxt(os.path.join("/home/yan20/nbv_rec/project/franka_control/debug", "pts_for_init_reg.txt"), init_pts)
+        # debug_cad = cad_model.apply_transform(curr_cad_to_real_world)
+        # debug_cad.export(os.path.join("/home/yan20/nbv_rec/project/franka_control/debug", "cad_for_init_reg.obj"))
+        
+        
+        pts_dir = os.path.join(scene_dir, "pts")
+        sample_view_pts_list = []
+        
+        frame_num = len(os.listdir(pts_dir))
+        
+        for frame_idx in range(frame_num):
+            pts_path = os.path.join(scene_dir, "pts", f"{frame_idx}.npy")
+ 
+            point_cloud = np.load(pts_path)
+            if point_cloud.shape[0] != 0:
+                sampled_point_cloud = PtsUtil.voxel_downsample_point_cloud(
+                    point_cloud, self.voxel_size
+                )
+                sample_view_pts_list.append(sampled_point_cloud)
+            else:
+                sample_view_pts_list.append(np.zeros((0, 3)))
+            
+        
+        """ close-loop online registery strategy """
+        scanned_pts = PtsUtil.voxel_downsample_point_cloud(init_pts, voxel_size=self.voxel_size)
+        shot_pts_list = []
+        
+        last_coverage = 0
+        Log.info("start close-loop control")
+        cnt = 0
+        mask_list = []
+        cam_to_cad_list = []
+        cam_R_to_cad_list = []
+        shot_view_idx_list = []
+        scan_points_path = os.path.join(self.output_dir, self.object_name, "scan_points.txt")
+        display_table_scan_points = np.loadtxt(scan_points_path)
+        for i in range(frame_num):
+            path = DataLoadUtil.get_path(self.output_dir, self.object_name, i)
+            mask_L, mask_R = DataLoadUtil.load_seg(path, binocular=True)
+            mask_list.append((mask_L, mask_R))
+            cam_info = DataLoadUtil.load_cam_info(path, binocular=True)
+            cam_to_cad = cam_info["cam_to_world"]
+            cam_to_cad_list.append(cam_to_cad)
+            cam_R_to_cad = cam_info["cam_to_world_R"]
+            cam_R_to_cad_list.append(cam_R_to_cad)
+            
+        selected_view = []
+        while True:
+            import ipdb; ipdb.set_trace()
+            history_indices = []
+            scan_points_idx_list = []
+            
+            for i in range(frame_num):
+                cam_to_cad = cam_to_cad_list[i]
+                cam_R_to_cad = cam_R_to_cad_list[i]
+                curr_cam_L_to_world = curr_cad_to_real_world @ cam_to_cad
+                curr_cam_R_to_world = curr_cad_to_real_world @ cam_R_to_cad
+                scan_points_indices_L = self.get_scan_points_indices(display_table_scan_points, mask_list[i][0], cam_intrinsic=cam_info["cam_intrinsic"], cam_extrinsic=curr_cam_L_to_world)
+                scan_points_indices_R = self.get_scan_points_indices(display_table_scan_points, mask_list[i][1], cam_intrinsic=cam_info["cam_intrinsic"], cam_extrinsic=curr_cam_R_to_world)
+                scan_points_indices = np.intersect1d(scan_points_indices_L, scan_points_indices_R)
+                scan_points_idx_list.append(scan_points_indices)
+            
+            for shot_view_idx in shot_view_idx_list:
+                history_indices.append(scan_points_idx_list[shot_view_idx])
+            cad_scanned_pts = PtsUtil.transform_point_cloud(scanned_pts, np.linalg.inv(curr_cad_to_real_world))
+            next_best_view, next_best_coverage, next_best_covered_num = (
+                ReconstructionUtil.compute_next_best_view_with_overlap(
+                    cad_scanned_pts,
+                    sample_view_pts_list,
+                    selected_view,
+                    history_indices,
+                    scan_points_idx_list,
+                    threshold=self.voxel_size,
+                    overlap_area_threshold=25,
+                    scan_points_threshold=self.scan_points_threshold,
+                )
+            )
+            if next_best_view is None:
+                Log.warning("No next best view found")
+            selected_view.append(next_best_view)
+            nbv_path = DataLoadUtil.get_path(self.output_dir, self.object_name, next_best_view)
+            nbv_cam_info = DataLoadUtil.load_cam_info(nbv_path, binocular=True)
+            nbv_cam_to_cad = nbv_cam_info["cam_to_world_O"]
+            nbv_cam_to_world = curr_cad_to_real_world @ nbv_cam_to_cad
+            target_camera_pose = nbv_cam_to_world @ ControlUtil.CAMERA_CORRECTION
+            ControlUtil.move_to(target_camera_pose)
+            ''' get world pts '''
+            time.sleep(0.5)
+            view_data = CommunicateUtil.get_view_data()
+            if view_data is None:
+                Log.error("No view data received")
+                continue
+            shot_view_idx_list.append(next_best_view)
+            cam_shot_pts = ViewUtil.get_pts(view_data)
+            world_shot_pts = PtsUtil.transform_point_cloud(
+                cam_shot_pts, first_cam_to_real_world
+            )
+            _, world_splitted_shot_pts = self.split_scan_pts_and_obj_pts(
+                world_shot_pts
+            )
+            shot_pts_list.append(world_splitted_shot_pts)
+            
+            debug_dir = os.path.join(scene_dir, "debug")
+            if not os.path.exists(debug_dir):
+                os.makedirs(debug_dir)
+            
+            last_scanned_pts_num = scanned_pts.shape[0]
+            import ipdb;ipdb.set_trace()
+            new_scanned_pts = PtsUtil.voxel_downsample_point_cloud(
+                np.vstack([scanned_pts, world_splitted_shot_pts]), self.voxel_size
+            )
+            # last_real_world_to_cad = real_world_to_cad
+            # real_world_to_cad = PtsUtil.register_fine(new_scanned_pts, cad_model)
+            # # rot distance of two rotation matrix
+            # rot_dist = np.arccos(
+            #     (np.trace(real_world_to_cad[:3, :3].T @ last_real_world_to_cad[:3, :3]) - 1) / 2
+            # )
+            # print(f"-----rot dist: {rot_dist}") 
+            curr_cad_to_real_world = np.linalg.inv(real_world_to_cad)
+            
+            cad_splitted_shot_pts = PtsUtil.transform_point_cloud(world_splitted_shot_pts, real_world_to_cad)
+            np.savetxt(os.path.join(debug_dir,  f"shot_pts_{cnt}.txt"), world_splitted_shot_pts)
+            np.savetxt(os.path.join(debug_dir,  f"render_pts_{cnt}.txt"), sample_view_pts_list[next_best_view])
+            
+            np.savetxt(os.path.join(debug_dir,  f"reg_scanned_pts_{cnt}.txt"), new_scanned_pts)
+            cad_pts = cad_model.vertices
+            world_cad_pts = PtsUtil.transform_point_cloud(cad_pts, curr_cad_to_real_world)
+            np.savetxt(os.path.join(debug_dir,  f"world_cad_pts_{cnt}.txt"), world_cad_pts)
+            #import ipdb; ipdb.set_trace()
+            
+            new_scanned_pts_num = new_scanned_pts.shape[0]
+            scanned_pts = new_scanned_pts
+            Log.info(
+                f"Next Best cover pts: {next_best_covered_num}, Best coverage: {next_best_coverage}"
+            )
+            
+            coverage_rate_increase = next_best_coverage - last_coverage
+            if coverage_rate_increase < self.min_coverage_increase:
+                Log.info(f"Coverage rate = {coverage_rate_increase} < {self.min_coverage_increase}, stop scanning")
+                # break
+            last_coverage = next_best_coverage
+            
+            new_added_pts_num = new_scanned_pts_num - last_scanned_pts_num
+            if new_added_pts_num < self.min_shot_new_pts_num:
+                Log.info(f"New added pts num = {new_added_pts_num} < {self.min_shot_new_pts_num}")
+                #ipdb.set_trace()
+            if len(shot_pts_list) >= self.max_shot_view_num:
+                Log.info(f"Scanned view num = {len(shot_pts_list)} >= {self.max_shot_view_num}, stop scanning")
+                #break
+            cnt += 1
+                
+        Log.success("[Part 4/4] finish close-loop control")
+
+
+    def run(self):
+        self.run_one_model(self.object_name)
+
+# ---------------------------- test ---------------------------- #
+if __name__ == "__main__":
+
+    model_path = r"/home/yan20/nbv_rec/data/models/workpiece_1/mesh.obj"
+    model = trimesh.load(model_path)
--- a/runners/cad_close_loop_online_reg_strategy.py
+++ b/runners/cad_close_loop_online_reg_strategy.py
@@ -0,0 +1,347 @@
+import os
+import time
+import trimesh
+import tempfile
+import subprocess
+import numpy as np
+from PytorchBoot.runners.runner import Runner
+from PytorchBoot.config import ConfigManager
+import PytorchBoot.stereotype as stereotype
+from PytorchBoot.utils.log_util import Log
+from PytorchBoot.status import status_manager
+
+from utils.control_util import ControlUtil
+from utils.communicate_util import CommunicateUtil
+from utils.pts_util import PtsUtil
+from utils.reconstruction_util import ReconstructionUtil
+from utils.preprocess_util import save_scene_data
+from utils.data_load import DataLoadUtil
+from utils.view_util import ViewUtil
+
+from beans.pointcloud import Pointcloud,PointcloudGroup
+
+
+@stereotype.runner("CAD_close_loop_online_reg_strategy_runner")
+class CADCloseLoopOnlineRegStrategyRunner(Runner):
+
+    def __init__(self, config_path: str):
+        super().__init__(config_path)
+        self.load_experiment("cad_strategy")
+        
+        self.generate_config = ConfigManager.get("runner", "generate")
+        self.reconstruct_config = ConfigManager.get("runner", "reconstruct")
+        
+        self.output_dir = self.generate_config["output_dir"]
+        self.model_dir = self.generate_config["model_dir"]
+        self.object_name = self.generate_config["object_name"]
+        self.blender_bin_path = self.generate_config["blender_bin_path"]
+        self.generator_script_path = self.generate_config["generator_script_path"]
+        self.voxel_size = self.generate_config["voxel_size"]
+        
+        self.max_shot_view_num = self.reconstruct_config["max_shot_view_num"]
+        self.min_shot_new_pts_num = self.reconstruct_config["min_shot_new_pts_num"]
+        self.min_coverage_increase = self.reconstruct_config["min_coverage_increase"]
+        self.scan_points_threshold = self.reconstruct_config["scan_points_threshold"]
+
+    def create_experiment(self, backup_name=None):
+        super().create_experiment(backup_name)
+
+    def load_experiment(self, backup_name=None):
+        super().load_experiment(backup_name)
+
+    def split_scan_pts_and_obj_pts(self, world_pts, z_threshold=0):
+        scan_pts = world_pts[world_pts[:, 2] < z_threshold]
+        obj_pts = world_pts[world_pts[:, 2] >= z_threshold]
+        return scan_pts, obj_pts
+
+    def loop_scan(self, first_cam_to_real_world):
+        loop_scan_pcl_group = PointcloudGroup(name="loop_scan_pcl_group")
+        first_view_data = CommunicateUtil.get_view_data(init=True)
+        ControlUtil.absolute_rotate_display_table(90)
+        first_pts = ViewUtil.get_pts(first_view_data)
+        
+        first_real_world_pts = PtsUtil.transform_point_cloud(
+            first_pts, first_cam_to_real_world
+        )
+        _, first_splitted_real_world_pts = self.split_scan_pts_and_obj_pts(
+            first_real_world_pts
+        )
+        first_real_pcl = Pointcloud(first_splitted_real_world_pts, first_cam_to_real_world, name='first_real_world_pts')
+        loop_scan_pcl_group.add(first_real_pcl)
+        shot_num = 4
+        for i in range(shot_num-1):
+            view_data = CommunicateUtil.get_view_data()
+            if i != shot_num - 2:
+                ControlUtil.absolute_rotate_display_table(90)
+            time.sleep(0.5)
+            if view_data is None:
+                Log.error("No view data received")
+                continue
+            view_pts = ViewUtil.get_pts(view_data)
+            
+            real_world_pts = PtsUtil.transform_point_cloud(
+                view_pts, first_cam_to_real_world
+            )
+            _, splitted_real_world_pts = self.split_scan_pts_and_obj_pts(
+                real_world_pts
+            )
+            loop_scan_real_world_pcl = Pointcloud(splitted_real_world_pts, None, name=f'loop_scan_real_world_pcl_{(i+1)*90} degree')
+            loop_scan_pcl_group.add(loop_scan_real_world_pcl)
+        
+        #loop_scan_pcl_group.visualize()
+        ControlUtil.absolute_rotate_display_table(90)
+        return loop_scan_pcl_group
+    
+    
+    def register(self):
+        ControlUtil.connect_robot()
+        """ init robot """
+        Log.info("start init")
+        ControlUtil.init()
+        first_cam_to_real_world = ControlUtil.get_pose()
+        """ loop shooting """
+        Log.info("start loop shooting")
+        loop_scan_pcl_group = self.loop_scan(first_cam_to_real_world) 
+        """ register """
+        Log.info("start register")
+        merged_world_scan_loop_pcl = loop_scan_pcl_group.merge_pointclouds(name="merged_world_scan_loop_pts")
+        pts = merged_world_scan_loop_pcl.points
+        if not os.path.exists(self.output_dir):
+            os.makedirs(self.output_dir)
+        if not os.path.exists(os.path.join(self.output_dir, self.object_name)):
+            os.makedirs(os.path.join(self.output_dir, self.object_name))
+        scene_dir = os.path.join(self.output_dir, self.object_name)
+        model_path = os.path.join(self.model_dir, self.object_name, "mesh.obj")
+        cad_model = trimesh.load(model_path)
+        real_world_to_cad = PtsUtil.register(pts, cad_model)
+        cad_to_real_world = np.linalg.inv(real_world_to_cad)
+        Log.success("finish init and register")
+        real_world_to_blender_world = np.eye(4)
+        real_world_to_blender_world[:3, 3] = np.asarray([0, 0, 0.9215])
+        cad_model_real_world: trimesh.Trimesh = cad_model.apply_transform(
+            cad_to_real_world
+        )
+        cad_model_real_world.export(os.path.join(scene_dir, "mesh.obj"))
+        #downsampled_pts = PtsUtil.voxel_downsample_point_cloud(pts, self.voxel_size)
+        np.savetxt(os.path.join(scene_dir, "pts_for_init_reg.txt"), pts)
+        return cad_to_real_world
+        
+    def render_data(self):
+        scene_dir = os.path.join(self.output_dir, self.object_name)
+        result = subprocess.run(
+            [
+                self.blender_bin_path,
+                "-b",
+                "-P",
+                self.generator_script_path,
+                "--",
+                scene_dir,
+            ],
+            capture_output=True,
+            text=True,
+        )
+        print(result)
+        
+    def preprocess_data(self):
+        save_scene_data(self.output_dir, self.object_name, file_type="npy")
+        
+    def get_scan_points_indices(self, scan_points, mask, object_mask_label= (0, 255, 0, 255), cam_intrinsic = None, cam_extrinsic = None):
+        scan_points_homogeneous = np.hstack((scan_points, np.ones((scan_points.shape[0], 1))))
+        points_camera = np.dot(np.linalg.inv(cam_extrinsic), scan_points_homogeneous.T).T[:, :3]
+        points_image_homogeneous = np.dot(cam_intrinsic, points_camera.T).T
+        points_image_homogeneous /= points_image_homogeneous[:, 2:]
+        pixel_x = points_image_homogeneous[:, 0].astype(int)
+        pixel_y = points_image_homogeneous[:, 1].astype(int)
+        h, w = mask.shape[:2]
+        valid_indices = (pixel_x >= 0) & (pixel_x < w) & (pixel_y >= 0) & (pixel_y < h)
+        mask_colors = mask[pixel_y[valid_indices], pixel_x[valid_indices]]
+        selected_points_indices = np.where((mask_colors != object_mask_label).any(axis=-1))[0]
+        selected_points_indices = np.where(valid_indices)[0][selected_points_indices]
+        return selected_points_indices
+        
+    def run_one_model(self, model_name):
+        scene_dir = os.path.join(self.output_dir, model_name)
+        ControlUtil.connect_robot()
+        """ init robot """
+        Log.info("start init")
+        ControlUtil.init()
+        first_cam_to_real_world = ControlUtil.get_pose()
+        """ loop shooting """
+        Log.info("start loop shooting")
+        loop_scan_pcl_group = self.loop_scan(first_cam_to_real_world)
+        """ register """
+        cad_path = os.path.join(scene_dir, "mesh.obj")
+        cad_model = trimesh.load(cad_path)
+        Log.info("start register")
+        merged_world_scan_loop_pcl = loop_scan_pcl_group.merge_pointclouds(name="merged_world_scan_loop_pts")
+        real_world_to_cad = PtsUtil.register(merged_world_scan_loop_pcl.points, cad_model)
+        curr_cad_to_real_world = np.linalg.inv(real_world_to_cad)
+        
+        # np.savetxt(os.path.join("/home/yan20/nbv_rec/project/franka_control/debug", "pts_for_init_reg.txt"), init_pts)
+        # debug_cad = cad_model.apply_transform(curr_cad_to_real_world)
+        # debug_cad.export(os.path.join("/home/yan20/nbv_rec/project/franka_control/debug", "cad_for_init_reg.obj"))
+        
+        
+        pts_dir = os.path.join(scene_dir, "pts")
+        sample_view_pts_list = []
+        
+        frame_num = len(os.listdir(pts_dir))
+        
+        for frame_idx in range(frame_num):
+            pts_path = os.path.join(scene_dir, "pts", f"{frame_idx}.npy")
+ 
+            point_cloud = np.load(pts_path)
+            if point_cloud.shape[0] != 0:
+                sampled_point_cloud = PtsUtil.voxel_downsample_point_cloud(
+                    point_cloud, self.voxel_size
+                )
+                sample_view_pts_list.append(sampled_point_cloud)
+            else:
+                sample_view_pts_list.append(np.zeros((0, 3)))
+            
+        
+        """ close-loop online registery strategy """
+        
+        scanned_pcl = merged_world_scan_loop_pcl.voxel_downsample(self.voxel_size)
+        scanned_pts = scanned_pcl.points
+        shot_pcl_group = PointcloudGroup(name="world_shot_pcl_group")
+        
+        last_coverage = 0
+        Log.info("start close-loop control")
+        cnt = 0
+        mask_list = []
+        cam_to_cad_list = []
+        cam_R_to_cad_list = []
+        shot_view_idx_list = []
+        scan_points_path = os.path.join(self.output_dir, self.object_name, "scan_points.txt")
+        display_table_scan_points = np.loadtxt(scan_points_path)
+        for i in range(frame_num):
+            path = DataLoadUtil.get_path(self.output_dir, self.object_name, i)
+            mask_L, mask_R = DataLoadUtil.load_seg(path, binocular=True)
+            mask_list.append((mask_L, mask_R))
+            cam_info = DataLoadUtil.load_cam_info(path, binocular=True)
+            cam_to_cad = cam_info["cam_to_world"]
+            cam_to_cad_list.append(cam_to_cad)
+            cam_R_to_cad = cam_info["cam_to_world_R"]
+            cam_R_to_cad_list.append(cam_R_to_cad)
+            
+        selected_view = []
+        while True:
+            #import ipdb; ipdb.set_trace()
+            history_indices = []
+            scan_points_idx_list = []
+            
+            for i in range(frame_num):
+                cam_to_cad = cam_to_cad_list[i]
+                cam_R_to_cad = cam_R_to_cad_list[i]
+                curr_cam_L_to_world = curr_cad_to_real_world @ cam_to_cad
+                curr_cam_R_to_world = curr_cad_to_real_world @ cam_R_to_cad
+                scan_points_indices_L = self.get_scan_points_indices(display_table_scan_points, mask_list[i][0], cam_intrinsic=cam_info["cam_intrinsic"], cam_extrinsic=curr_cam_L_to_world)
+                scan_points_indices_R = self.get_scan_points_indices(display_table_scan_points, mask_list[i][1], cam_intrinsic=cam_info["cam_intrinsic"], cam_extrinsic=curr_cam_R_to_world)
+                scan_points_indices = np.intersect1d(scan_points_indices_L, scan_points_indices_R)
+                scan_points_idx_list.append(scan_points_indices)
+            
+            for shot_view_idx in shot_view_idx_list:
+                history_indices.append(scan_points_idx_list[shot_view_idx])
+            cad_scanned_pts = PtsUtil.transform_point_cloud(scanned_pts, np.linalg.inv(curr_cad_to_real_world))
+            next_best_view, next_best_coverage, next_best_covered_num = (
+                ReconstructionUtil.compute_next_best_view_with_overlap(
+                    cad_scanned_pts,
+                    sample_view_pts_list,
+                    selected_view,
+                    history_indices,
+                    scan_points_idx_list,
+                    threshold=self.voxel_size,
+                    overlap_area_threshold=10,
+                    scan_points_threshold=self.scan_points_threshold,
+                )
+            )
+            if next_best_view is None:
+                Log.warning("No next best view found")
+            selected_view.append(next_best_view)
+            nbv_path = DataLoadUtil.get_path(self.output_dir, self.object_name, next_best_view)
+            nbv_cam_info = DataLoadUtil.load_cam_info(nbv_path, binocular=True)
+            nbv_cam_to_cad = nbv_cam_info["cam_to_world_O"]
+            nbv_cam_to_world = curr_cad_to_real_world @ nbv_cam_to_cad
+            target_camera_pose = nbv_cam_to_world @ ControlUtil.CAMERA_CORRECTION
+            ControlUtil.move_to(target_camera_pose)
+            ''' get world pts '''
+            time.sleep(0.5)
+            view_data = CommunicateUtil.get_view_data()
+            if view_data is None:
+                Log.error("No view data received")
+                continue
+            shot_view_idx_list.append(next_best_view)
+            cam_shot_pts = ViewUtil.get_pts(view_data)
+            world_shot_pts = PtsUtil.transform_point_cloud(
+                cam_shot_pts, first_cam_to_real_world
+            )
+            _, world_splitted_shot_pts = self.split_scan_pts_and_obj_pts(
+                world_shot_pts
+            )
+            world_shot_pcl = Pointcloud(world_splitted_shot_pts, nbv_cam_to_world, name=f"world_shot_pts_{cnt}")
+            cad_render_pcl = Pointcloud(sample_view_pts_list[next_best_view], nbv_cam_to_world, name=f"cad_render_pts_{cnt}")
+            world_render_pcl = cad_render_pcl.transform(curr_cad_to_real_world)
+            shot_pcl_group.add(world_shot_pcl)
+            render_shot_group = PointcloudGroup(pointclouds=[world_render_pcl, world_shot_pcl],name="render_shot_group")
+            import ipdb; ipdb.set_trace()
+            render_shot_group.visualize()
+            debug_dir = os.path.join(scene_dir, "debug")
+            if not os.path.exists(debug_dir):
+                os.makedirs(debug_dir)
+            
+            last_scanned_pts_num = scanned_pts.shape[0]
+            new_scanned_pts = PtsUtil.voxel_downsample_point_cloud(
+                np.vstack([scanned_pts, world_splitted_shot_pts]), self.voxel_size
+            )
+            # last_real_world_to_cad = real_world_to_cad
+            # real_world_to_cad = PtsUtil.register_fine(new_scanned_pts, cad_model)
+            # # rot distance of two rotation matrix
+            # rot_dist = np.arccos(
+            #     (np.trace(real_world_to_cad[:3, :3].T @ last_real_world_to_cad[:3, :3]) - 1) / 2
+            # )
+            # print(f"-----rot dist: {rot_dist}") 
+            curr_cad_to_real_world = np.linalg.inv(real_world_to_cad)
+            
+            cad_splitted_shot_pts = PtsUtil.transform_point_cloud(world_splitted_shot_pts, real_world_to_cad)
+            np.savetxt(os.path.join(debug_dir,  f"shot_pts_{cnt}.txt"), cad_splitted_shot_pts)
+            np.savetxt(os.path.join(debug_dir,  f"render_pts_{cnt}.txt"), sample_view_pts_list[next_best_view])
+            
+            np.savetxt(os.path.join(debug_dir,  f"reg_scanned_pts_{cnt}.txt"), new_scanned_pts)
+            cad_pts = cad_model.vertices
+            world_cad_pts = PtsUtil.transform_point_cloud(cad_pts, curr_cad_to_real_world)
+            np.savetxt(os.path.join(debug_dir,  f"world_cad_pts_{cnt}.txt"), world_cad_pts)
+            #import ipdb; ipdb.set_trace()
+            
+            new_scanned_pts_num = new_scanned_pts.shape[0]
+            scanned_pts = new_scanned_pts
+            Log.info(
+                f"Next Best cover pts: {next_best_covered_num}, Best coverage: {next_best_coverage}"
+            )
+            
+            coverage_rate_increase = next_best_coverage - last_coverage
+            if coverage_rate_increase < self.min_coverage_increase:
+                Log.info(f"Coverage rate = {coverage_rate_increase} < {self.min_coverage_increase}, stop scanning")
+                # break
+            last_coverage = next_best_coverage
+            
+            new_added_pts_num = new_scanned_pts_num - last_scanned_pts_num
+            if new_added_pts_num < self.min_shot_new_pts_num:
+                Log.info(f"New added pts num = {new_added_pts_num} < {self.min_shot_new_pts_num}")
+                #ipdb.set_trace()
+            if len(shot_pcl_group) >= self.max_shot_view_num:
+                Log.info(f"Scanned view num = {len(shot_pcl_group)} >= {self.max_shot_view_num}, stop scanning")
+                #break
+            cnt += 1
+                
+        Log.success("[Part 4/4] finish close-loop control")
+
+
+    def run(self):
+        self.run_one_model(self.object_name)
+
+# ---------------------------- test ---------------------------- #
+if __name__ == "__main__":
+
+    model_path = r"/home/yan20/nbv_rec/data/models/workpiece_1/mesh.obj"
+    model = trimesh.load(model_path)
--- a/runners/cad_close_loop_strategy.py
+++ b/runners/cad_close_loop_strategy.py
@@ -0,0 +1,242 @@
+import os
+import time
+import trimesh
+import tempfile
+import subprocess
+import numpy as np
+from PytorchBoot.runners.runner import Runner
+from PytorchBoot.config import ConfigManager
+import PytorchBoot.stereotype as stereotype
+from PytorchBoot.utils.log_util import Log
+from PytorchBoot.status import status_manager
+
+from utils.control_util import ControlUtil
+from utils.communicate_util import CommunicateUtil
+from utils.pts_util import PtsUtil
+from utils.reconstruction_util import ReconstructionUtil
+from utils.preprocess_util import save_scene_data
+from utils.data_load import DataLoadUtil
+from utils.view_util import ViewUtil
+
+
+@stereotype.runner("CAD_close_loop_strategy_runner")
+class CADCloseLoopStrategyRunner(Runner):
+
+    def __init__(self, config_path: str):
+        super().__init__(config_path)
+        self.load_experiment("cad_strategy")
+        self.generate_config = ConfigManager.get("runner", "generate")
+        self.reconstruct_config = ConfigManager.get("runner", "reconstruct")
+        self.blender_bin_path = self.generate_config["blender_bin_path"]
+        self.generator_script_path = self.generate_config["generator_script_path"]
+        self.model_dir = self.generate_config["model_dir"]
+        self.voxel_size = self.generate_config["voxel_size"]
+        
+        self.max_shot_view_num = self.reconstruct_config["max_shot_view_num"]
+        self.min_shot_new_pts_num = self.reconstruct_config["min_shot_new_pts_num"]
+        self.min_coverage_increase = self.reconstruct_config["min_coverage_increase"]
+        self.scan_points_threshold = self.reconstruct_config["scan_points_threshold"]
+
+    def create_experiment(self, backup_name=None):
+        super().create_experiment(backup_name)
+
+    def load_experiment(self, backup_name=None):
+        super().load_experiment(backup_name)
+
+    def split_scan_pts_and_obj_pts(self, world_pts, z_threshold=0):
+        scan_pts = world_pts[world_pts[:, 2] < z_threshold]
+        obj_pts = world_pts[world_pts[:, 2] >= z_threshold]
+        return scan_pts, obj_pts
+
+    def run_one_model(self, model_name):
+        time_code = time.strftime("%Y%m%d%H%M%S", time.localtime())
+        temp_dir = "/home/yan20/nbv_rec/project/franka_control/temp_output_" + time_code 
+        if not os.path.exists(temp_dir):
+            os.makedirs(temp_dir)
+        ControlUtil.connect_robot()
+        """ init robot """
+        Log.info("[Part 1/5] start init and register")
+        ControlUtil.init()
+
+        """ load CAD model """
+        model_path = os.path.join(self.model_dir, model_name, "mesh.obj")
+        temp_name = "cad_model_world"
+        cad_model = trimesh.load(model_path)
+        """ take first view """
+        Log.info("[Part 1/5] take first view data")
+        view_data = CommunicateUtil.get_view_data(init=True)
+        first_cam_pts = ViewUtil.get_pts(view_data)
+        first_cam_to_real_world = ControlUtil.get_pose()
+        first_real_world_pts = PtsUtil.transform_point_cloud(
+            first_cam_pts, first_cam_to_real_world
+        )
+        _, first_splitted_real_world_pts = self.split_scan_pts_and_obj_pts(
+            first_real_world_pts
+        )
+        
+        debug_dir = os.path.join(temp_dir, "debug")
+        if not os.path.exists(debug_dir):
+            os.makedirs(debug_dir)
+            
+        np.savetxt(os.path.join(debug_dir,"FIRST_shot.txt"), first_splitted_real_world_pts)
+        # np.savetxt(f"first_real_pts_{model_name}.txt", first_splitted_real_world_pts)
+        """ register """
+        Log.info("[Part 1/4] do registeration")
+        real_world_to_cad = PtsUtil.register(first_splitted_real_world_pts, cad_model)
+        cad_to_real_world = np.linalg.inv(real_world_to_cad)
+        Log.success("[Part 1/4] finish init and register")
+        real_world_to_blender_world = np.eye(4)
+        real_world_to_blender_world[:3, 3] = np.asarray([0, 0, 0.9215])
+        cad_model_real_world: trimesh.Trimesh = cad_model.apply_transform(
+            cad_to_real_world
+        )
+        cad_model_real_world.export(
+            os.path.join(temp_dir, f"real_world_{temp_name}.obj")
+        )
+        cad_model_blender_world: trimesh.Trimesh = cad_model.apply_transform(
+            real_world_to_blender_world
+        )
+
+        with tempfile.TemporaryDirectory() as temp_dir:
+            temp_dir =  "/home/yan20/nbv_rec/project/franka_control/temp_output_" + time_code 
+            cad_model_blender_world.export(os.path.join(temp_dir, f"{temp_name}.obj"))
+            """ sample view """
+            Log.info("[Part 2/4] start running renderer")
+            subprocess.run(
+                [
+                    self.blender_bin_path,
+                    "-b",
+                    "-P",
+                    self.generator_script_path,
+                    "--",
+                    temp_dir,
+                ],
+                capture_output=True,
+                text=True,
+            )
+            Log.success("[Part 2/4] finish running renderer")
+            
+            """ preprocess """
+            Log.info("[Part 3/4] start preprocessing data")
+            save_scene_data(temp_dir, temp_name)
+            Log.success("[Part 3/4] finish preprocessing data")
+
+            pts_dir = os.path.join(temp_dir, temp_name, "pts")
+            sample_view_pts_list = []
+            scan_points_idx_list = []
+            frame_num = len(os.listdir(pts_dir))
+            
+            for frame_idx in range(frame_num):
+                pts_path = os.path.join(temp_dir, temp_name, "pts", f"{frame_idx}.txt")
+                idx_path = os.path.join(
+                    temp_dir, temp_name, "scan_points_indices", f"{frame_idx}.npy"
+                )
+                point_cloud = np.loadtxt(pts_path)
+                if point_cloud.shape[0] != 0:
+                    sampled_point_cloud = PtsUtil.voxel_downsample_point_cloud(
+                        point_cloud, self.voxel_size
+                    )
+                indices = np.load(idx_path)
+                try:
+                    len(indices)
+                except:
+                    indices = np.array([indices])
+                sample_view_pts_list.append(sampled_point_cloud)
+                scan_points_idx_list.append(indices)
+
+            """ close-loop strategy """
+            scanned_pts = PtsUtil.voxel_downsample_point_cloud(
+                first_splitted_real_world_pts, self.voxel_size
+            )
+            shot_pts_list = [first_splitted_real_world_pts]
+            history_indices = []
+            last_coverage = 0
+            
+            
+            
+            Log.info("[Part 4/4] start close-loop control")
+            cnt = 0
+            selected_view = []
+            while True:
+                #import ipdb; ipdb.set_trace()
+                next_best_view, next_best_coverage, next_best_covered_num = (
+                    ReconstructionUtil.compute_next_best_view_with_overlap(
+                        scanned_pts,
+                        sample_view_pts_list,
+                        selected_view, 
+                        history_indices,
+                        scan_points_idx_list,
+                        threshold=self.voxel_size,
+                        overlap_area_threshold=50,
+                        scan_points_threshold=self.scan_points_threshold,
+                    )
+                )
+                selected_view.append(next_best_view)
+                nbv_path = DataLoadUtil.get_path(temp_dir, temp_name, next_best_view)
+                nbv_cam_info = DataLoadUtil.load_cam_info(nbv_path, binocular=True)
+                nbv_cam_to_world = nbv_cam_info["cam_to_world_O"]
+                ControlUtil.move_to(nbv_cam_to_world)
+                ''' get world pts '''
+                time.sleep(0.5)
+                view_data = CommunicateUtil.get_view_data()
+                if view_data is None:
+                    Log.error("No view data received")
+                    continue
+                cam_shot_pts = ViewUtil.get_pts(view_data)
+                world_shot_pts = PtsUtil.transform_point_cloud(
+                    cam_shot_pts, first_cam_to_real_world
+                )
+                _, world_splitted_shot_pts = self.split_scan_pts_and_obj_pts(
+                    world_shot_pts
+                )
+                shot_pts_list.append(world_splitted_shot_pts)
+                
+                
+                np.savetxt(os.path.join(debug_dir,  f"shot_pts_{cnt}.txt"), world_splitted_shot_pts)
+                np.savetxt(os.path.join(debug_dir,  f"render_pts_{cnt}.txt"), sample_view_pts_list[next_best_view])
+                #real_world_to_cad = PtsUtil.register(first_splitted_real_world_pts, cad_model)
+                #import ipdb; ipdb.set_trace()
+                last_scanned_pts_num = scanned_pts.shape[0]
+                new_scanned_pts = PtsUtil.voxel_downsample_point_cloud(
+                    np.vstack([scanned_pts, world_splitted_shot_pts]), self.voxel_size
+                )
+                new_scanned_pts_num = new_scanned_pts.shape[0]
+                history_indices.append(scan_points_idx_list[next_best_view])
+                scanned_pts = new_scanned_pts
+                Log.info(
+                    f"Next Best cover pts: {next_best_covered_num}, Best coverage: {next_best_coverage}"
+                )
+                
+                coverage_rate_increase = next_best_coverage - last_coverage
+                if coverage_rate_increase < self.min_coverage_increase:
+                    Log.info(f"Coverage rate = {coverage_rate_increase} < {self.min_coverage_increase}, stop scanning")
+                    # break
+                last_coverage = next_best_coverage
+                
+                new_added_pts_num = new_scanned_pts_num - last_scanned_pts_num
+                if new_added_pts_num < self.min_shot_new_pts_num:
+                    Log.info(f"New added pts num = {new_added_pts_num} < {self.min_shot_new_pts_num}")
+                    #ipdb.set_trace()
+                if len(shot_pts_list) >= self.max_shot_view_num:
+                    Log.info(f"Scanned view num = {len(shot_pts_list)} >= {self.max_shot_view_num}, stop scanning")
+                    #break
+                cnt += 1
+                
+            Log.success("[Part 4/4] finish close-loop control")
+
+
+    def run(self):
+        total = len(os.listdir(self.model_dir))
+        model_start_idx = self.generate_config["model_start_idx"]
+        count_object = model_start_idx
+        # for model_name in os.listdir(self.model_dir[model_start_idx:]):
+        #     Log.info(f"[{count_object}/{total}]Processing {model_name}")
+        #     self.run_one_model(model_name)
+        #     Log.success(f"[{count_object}/{total}]Finished processing {model_name}")
+        self.run_one_model("box_1")
+
+# ---------------------------- test ---------------------------- #
+if __name__ == "__main__":
+
+    model_path = r"C:\Users\hofee\Downloads\mesh.obj"
+    model = trimesh.load(model_path)
--- a/runners/cad_open_loop_strategy.py
+++ b/runners/cad_open_loop_strategy.py
@@ -0,0 +1,224 @@
+import os
+import time
+import trimesh
+import tempfile
+import subprocess
+import numpy as np
+from PytorchBoot.runners.runner import Runner
+from PytorchBoot.config import ConfigManager
+import PytorchBoot.stereotype as stereotype
+from PytorchBoot.utils.log_util import Log
+from PytorchBoot.status import status_manager
+
+from utils.control_util import ControlUtil
+from utils.communicate_util import CommunicateUtil
+from utils.pts_util import PtsUtil
+from utils.reconstruction_util import ReconstructionUtil
+from utils.preprocess_util import save_scene_data
+from utils.data_load import DataLoadUtil
+from utils.view_util import ViewUtil
+
+
+@stereotype.runner("CAD_open_loop_strategy_runner")
+class CADOpenLoopStrategyRunner(Runner):
+
+    def __init__(self, config_path: str):
+        super().__init__(config_path)
+        self.load_experiment("cad_open_loop_strategy")
+        self.status_info = {
+            "status_manager": status_manager,
+            "app_name": "cad",
+            "runner_name": "CAD_open_loop_strategy_runner"
+        }
+        self.generate_config = ConfigManager.get("runner", "generate")
+        self.reconstruct_config = ConfigManager.get("runner", "reconstruct")
+        self.blender_bin_path = self.generate_config["blender_bin_path"]
+        self.generator_script_path = self.generate_config["generator_script_path"]
+        self.model_dir = self.generate_config["model_dir"]
+        self.voxel_size = self.generate_config["voxel_size"]
+        self.max_view = self.generate_config["max_view"]
+        self.min_view = self.generate_config["min_view"]
+        self.max_diag = self.generate_config["max_diag"]
+        self.min_diag = self.generate_config["min_diag"]
+        self.min_cam_table_included_degree = self.generate_config["min_cam_table_included_degree"]
+        self.random_view_ratio = self.generate_config["random_view_ratio"]
+        
+        self.soft_overlap_threshold = self.reconstruct_config["soft_overlap_threshold"]
+        self.hard_overlap_threshold = self.reconstruct_config["hard_overlap_threshold"]
+        self.scan_points_threshold = self.reconstruct_config["scan_points_threshold"]
+        
+    def create_experiment(self, backup_name=None):
+        super().create_experiment(backup_name)
+    
+    def load_experiment(self, backup_name=None):
+        super().load_experiment(backup_name)
+    
+    def split_scan_pts_and_obj_pts(self, world_pts, z_threshold = 0):
+        scan_pts = world_pts[world_pts[:,2] < z_threshold]
+        obj_pts = world_pts[world_pts[:,2] >= z_threshold]
+        return scan_pts, obj_pts
+
+    def run_one_model(self, model_name):
+        temp_dir = "/home/yan20/nbv_rec/project/franka_control/temp_output"
+        result = dict()
+        
+        shot_pts_list = []
+        ControlUtil.connect_robot()
+        ''' init robot '''
+        Log.info("[Part 1/5] start init and register")
+        ControlUtil.init()
+        
+        ''' load CAD model '''
+        model_path = os.path.join(self.model_dir, model_name,"mesh.ply")
+        temp_name = "cad_model_world"
+        cad_model = trimesh.load(model_path)
+        ''' take first view '''
+        Log.info("[Part 1/5] take first view data")
+        view_data = CommunicateUtil.get_view_data(init=True)
+        first_cam_pts = ViewUtil.get_pts(view_data)
+        first_cam_to_real_world = ControlUtil.get_pose()
+        first_real_world_pts = PtsUtil.transform_point_cloud(first_cam_pts, first_cam_to_real_world)
+        _, first_splitted_real_world_pts = self.split_scan_pts_and_obj_pts(first_real_world_pts)
+        np.savetxt(f"first_real_pts_{model_name}.txt", first_splitted_real_world_pts)
+        ''' register '''
+        Log.info("[Part 1/5] do registeration")
+        real_world_to_cad = PtsUtil.register(first_splitted_real_world_pts, cad_model) 
+        cad_to_real_world = np.linalg.inv(real_world_to_cad)
+        Log.success("[Part 1/5] finish init and register")
+        real_world_to_blender_world = np.eye(4)
+        real_world_to_blender_world[:3, 3] = np.asarray([0, 0, 0.9215])
+        cad_model_real_world:trimesh.Trimesh = cad_model.apply_transform(cad_to_real_world)
+        cad_model_real_world.export(os.path.join(temp_dir, f"real_world_{temp_name}.obj"))
+        cad_model_blender_world:trimesh.Trimesh = cad_model.apply_transform(real_world_to_blender_world)
+        
+        with tempfile.TemporaryDirectory() as temp_dir:
+            temp_dir = "/home/yan20/nbv_rec/project/franka_control/temp_output"
+            cad_model_blender_world.export(os.path.join(temp_dir, f"{temp_name}.obj"))
+            scene_dir = os.path.join(temp_dir, temp_name)
+            ''' sample view '''
+            Log.info("[Part 2/5] start running renderer")
+            subprocess.run([
+                self.blender_bin_path, '-b', '-P', self.generator_script_path, '--', temp_dir
+            ], capture_output=True, text=True)
+            Log.success("[Part 2/5] finish running renderer")
+            
+        
+            world_model_points = np.loadtxt(os.path.join(scene_dir, "points_and_normals.txt"))[:,:3]
+            ''' preprocess '''
+            Log.info("[Part 3/5] start preprocessing data")
+            save_scene_data(temp_dir, temp_name)
+            Log.success("[Part 3/5] finish preprocessing data")
+            
+            pts_dir = os.path.join(temp_dir,temp_name,"pts")
+            sample_view_pts_list = []
+            scan_points_idx_list = []
+            frame_num = len(os.listdir(pts_dir))
+            
+            for frame_idx in range(frame_num):
+                pts_path = os.path.join(temp_dir,temp_name, "pts", f"{frame_idx}.txt")
+                idx_path = os.path.join(temp_dir,temp_name, "scan_points_indices", f"{frame_idx}.npy")
+                point_cloud = np.loadtxt(pts_path)
+                if point_cloud.shape[0] != 0:
+                    sampled_point_cloud = PtsUtil.voxel_downsample_point_cloud(point_cloud, self.voxel_size)
+                indices = np.load(idx_path) 
+                try:
+                    len(indices)
+                except:
+                    indices = np.array([indices])
+                sample_view_pts_list.append(sampled_point_cloud)
+                scan_points_idx_list.append(indices)
+
+            ''' generate strategy '''
+            
+            Log.info("[Part 4/5] start generating strategy")
+            limited_useful_view, _, _ = ReconstructionUtil.compute_next_best_view_sequence_with_overlap(
+                world_model_points, sample_view_pts_list, 
+                scan_points_indices_list = scan_points_idx_list,
+                init_view=0,
+                threshold=self.voxel_size, 
+                soft_overlap_threshold = self.soft_overlap_threshold,
+                hard_overlap_threshold = self.hard_overlap_threshold,
+                scan_points_threshold = self.scan_points_threshold, 
+                status_info=self.status_info
+            )
+            Log.success("[Part 4/5] finish generating strategy")
+        
+            ''' extract cam_to_world sequence  '''
+            cam_to_world_seq = []
+            coveraget_rate_seq = []
+            render_pts = []
+            idx_seq = []
+            for idx, coverage_rate in limited_useful_view:
+                path = DataLoadUtil.get_path(temp_dir, temp_name, idx)
+                cam_info = DataLoadUtil.load_cam_info(path, binocular=True)
+                cam_to_world_seq.append(cam_info["cam_to_world_O"])
+                coveraget_rate_seq.append(coverage_rate)
+                idx_seq.append(idx)
+                render_pts.append(sample_view_pts_list[idx])
+            
+        Log.info("[Part 5/5] start running robot")
+        ''' take best seq view '''
+        #import ipdb; ipdb.set_trace()
+        target_scanned_pts = np.concatenate(sample_view_pts_list)
+        voxel_downsampled_target_scanned_pts = PtsUtil.voxel_downsample_point_cloud(target_scanned_pts, self.voxel_size)
+        result = dict()
+        gt_scanned_pts = np.concatenate(render_pts, axis=0)
+        voxel_down_sampled_gt_scanned_pts = PtsUtil.voxel_downsample_point_cloud(gt_scanned_pts, self.voxel_size)
+        result["gt_final_coverage_rate_cad"] = ReconstructionUtil.compute_coverage_rate(voxel_downsampled_target_scanned_pts, voxel_down_sampled_gt_scanned_pts, self.voxel_size)
+        step = 1
+        result["real_coverage_rate_seq"] = []
+        for cam_to_world in cam_to_world_seq:
+            try:
+                ControlUtil.move_to(cam_to_world)
+                ''' get world pts '''
+                time.sleep(0.5)
+                view_data = CommunicateUtil.get_view_data()
+                if view_data is None:
+                    Log.error("Failed to get view data")
+                    continue
+                cam_pts = ViewUtil.get_pts(view_data)
+                shot_pts_list.append(cam_pts)
+                scanned_pts = np.concatenate(shot_pts_list, axis=0)
+                voxel_down_sampled_scanned_pts = PtsUtil.voxel_downsample_point_cloud(scanned_pts, self.voxel_size)
+                voxel_down_sampled_scanned_pts_world = PtsUtil.transform_point_cloud(voxel_down_sampled_scanned_pts, first_cam_to_real_world)
+                curr_CR = ReconstructionUtil.compute_coverage_rate(voxel_downsampled_target_scanned_pts, voxel_down_sampled_scanned_pts_world, self.voxel_size)
+                Log.success(f"(step {step}/{len(cam_to_world_seq)}) current coverage: {curr_CR} | gt coverage: {result['gt_final_coverage_rate_cad']}")
+                result["real_final_coverage_rate"] = curr_CR
+                result["real_coverage_rate_seq"].append(curr_CR)
+                step += 1
+            except Exception as e:
+                Log.error(f"Failed to move to {cam_to_world}")
+                Log.error(e)
+            
+            #import ipdb;ipdb.set_trace()
+
+        for idx in range(len(shot_pts_list)):
+            if not os.path.exists(os.path.join(temp_dir, temp_name, "shot_pts")):
+                os.makedirs(os.path.join(temp_dir, temp_name, "shot_pts"))
+            if not os.path.exists(os.path.join(temp_dir, temp_name, "render_pts")):
+                os.makedirs(os.path.join(temp_dir, temp_name, "render_pts"))
+            shot_pts = PtsUtil.transform_point_cloud(shot_pts_list[idx], first_cam_to_real_world)
+            np.savetxt(os.path.join(temp_dir, temp_name, "shot_pts", f"{idx}.txt"), shot_pts)
+            np.savetxt(os.path.join(temp_dir, temp_name, "render_pts", f"{idx}.txt"), render_pts[idx])
+        
+            
+        Log.success("[Part 5/5] finish running robot")
+        
+        Log.debug(result)
+    
+    def run(self):
+        total = len(os.listdir(self.model_dir))
+        model_start_idx = self.generate_config["model_start_idx"]
+        count_object = model_start_idx
+        for model_name in os.listdir(self.model_dir[model_start_idx:]):
+            Log.info(f"[{count_object}/{total}]Processing {model_name}")
+            self.run_one_model(model_name)
+            Log.success(f"[{count_object}/{total}]Finished processing {model_name}")
+        
+            
+# ---------------------------- test ---------------------------- #
+if __name__ == "__main__":
+    
+    model_path = r"C:\Users\hofee\Downloads\mesh.obj"
+    model = trimesh.load(model_path)
+    
--- a/runners/calibration.py
+++ b/runners/calibration.py
@@ -0,0 +1,475 @@
+import cv2
+import numpy as np
+import open3d as o3d
+import sys, os
+sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
+from utils.control_util import ControlUtil
+from utils.communicate_util import CommunicateUtil
+import pickle
+from tqdm import tqdm
+import json
+import pyrealsense2 as rs
+
+# marker parameters
+MARKER_TYPE = "aruco"
+MARKER_INFO = {"id": 582, "size": 0.1}
+
+# view parameters
+VIEW_NUM = 6
+LOOK_AT_PT = np.array([0.5, 0, 0.2])
+TOP_DOWN_DIST = [0.3, 0.45, 0.55]
+TOP_DOWN_DIRECTION = np.array([0., 0., -1.])
+RADIUS = [0.25, 0.2, 0.1]
+INIT_GRIPPER_POSE = np.array([
+    [1, 0, 0, 0.56], 
+    [0, -1, 0, 0], 
+    [0, 0, -1, 0.6], 
+    [0, 0, 0, 1]    
+])
+
+DEFAULT_EE_TO_BASE = np.array([
+    [0, -1, 0, 0], 
+    [1, 0, 0, 0], 
+    [0, 0, 1, 0], 
+    [0, 0, 0, 1]
+])
+
+
+
+class HandEyeCalibration:
+    """
+    A class to perform hand-eye calibration for a robotic system using various camera types and markers.
+    Attributes:
+        marker_to_cam (list): List to store marker to camera transformation matrices.
+        end_effector_to_base (list): List to store end-effector to base transformation matrices.
+        marker_type (str): Type of marker used for calibration (default is "aruco").
+        marker_info (dict): Information about the marker, including its ID and size.
+        camera (object): Camera object used for capturing images.
+        intrinsics (dict): Intrinsic parameters of the camera.
+        dist (optional): Distortion coefficients of the camera.
+        marker_solver (object): Solver object for detecting and solving marker poses.
+        camera_type (str): Type of camera used (default is "RealSense"). "RealSense" and "Inspire" are supported.
+    Methods:
+        get_marker_to_cam_pose(color_image, visualize=False):
+            Gets the pose of the marker relative to the camera from a color image.
+        capture_single_frame():
+            Captures a single frame from the camera and gets the current end-effector pose.
+        capture_frames(view_num, look_at_pt, top_down_dist, top_down_direrction, radius, visualize=False):
+            Captures multiple frames by moving the end-effector to different poses and records the marker and end-effector poses.
+        get_hand_eye(marker_to_cam_poses, end_effector_to_base_poses):
+            Computes the hand-eye calibration matrix using the captured marker and end-effector poses.
+        calibrate(view_num, look_at_pt, top_down_dist, top_down_direrction, radius):
+            Performs the full calibration process by capturing frames and computing the hand-eye calibration matrix.
+    """
+    def __init__(self, dist=None, marker_type="aruco", marker_info={"id": 0, "size": 0.1}, camera_type="RealSense"):
+        self.marker_to_cam = []
+        self.end_effector_to_base = []
+        self.marker_type = marker_type
+        self.marker_info = marker_info
+        ControlUtil.connect_robot()
+        ControlUtil.franka_reset()
+
+        if camera_type == "RealSense":
+            self.camera = RealSenseCamera()
+        elif camera_type == "Inspire":
+            self.camera = InspireCamera()
+        else:
+            assert False, "Not implemented yet"
+        
+        self.intrinsics = self.camera.get_color_intrinsics()
+        self.dist = dist
+        self.marker_type = marker_type
+        if self.marker_type == "aruco":
+            self.marker_solver = ArcuoMarkerSolver(self.intrinsics, dist, marker_info)
+        self.camera_type = camera_type
+        
+
+
+
+    def get_marker_to_cam_pose(self, color_image, visualize=False):
+        res = self.marker_solver.get_marker_to_cam_pose(color_image, visualize)
+        return res
+    
+    
+    def capture_single_frame(self):
+        color_image = self.camera.get_color_image()
+        end_effector_pose = ControlUtil.get_curr_gripper_to_base_pose()
+        return color_image, end_effector_pose
+    
+    
+    def capture_frames(self, view_num, look_at_pt, top_down_dist, top_down_direrction, radius, visualize=False):
+        circle_num = len(radius)
+        poses = []
+        for i in range(circle_num):
+            reverse_order = i % 2 == 1
+            pose_generator = EndEffectorPoseGenerator(view_num, look_at_pt, top_down_dist[i], top_down_direrction, radius[i], reverse_order)
+            poses += [pose @ DEFAULT_EE_TO_BASE for pose in pose_generator.poses]
+        if visualize == True:
+            pose_visualizer = PoseVisualizer(poses)
+            pose_visualizer.visualize()
+        pbar = tqdm(total=len(poses), desc="Capturing frames")
+        for pose in poses:
+            pbar.update(1)
+            ControlUtil.set_gripper_pose(pose)
+            color_image, end_effector_pose = self.capture_single_frame()
+            if color_image is None:
+                print("Failed to capture the frame")
+                continue
+            res = self.get_marker_to_cam_pose(color_image, visualize=True)
+            if res is None:
+                print("Failed to get marker pose")
+                continue
+            else:
+                marker_to_cam_pose = np.eye(4)
+                rvec = res["rvec"]
+                tvec = res["tvec"]
+                rot_mat = cv2.Rodrigues(rvec)[0]
+                marker_to_cam_pose[:3, :3] = rot_mat
+                marker_to_cam_pose[:3, 3] = tvec
+            self.marker_to_cam.append(marker_to_cam_pose)
+            self.end_effector_to_base.append(end_effector_pose)
+
+
+    def get_hand_eye(self, marker_to_cam_poses, end_effector_to_base_poses):
+        ############################## Debug ################################
+        # with open("marker_to_cam_poses.pkl", "wb") as f:
+        #     pickle.dump(marker_to_cam_poses, f)
+        # with open("end_effector_to_base_poses.pkl", "wb") as f:
+        #     pickle.dump(end_effector_to_base_poses, f)
+        # with open("marker_to_cam_poses.pkl", "rb") as f:
+        #     marker_to_cam_poses = pickle.load(f)
+        # with open("end_effector_to_base_poses.pkl", "rb") as f:
+        #     end_effector_to_base_poses = pickle.load(f)
+        #####################################################################
+        R_marker_to_cam = []
+        t_marker_to_cam = []
+        R_end_effector_to_base = []
+        t_end_effector_to_base = []
+        for index, marker_to_cam_pose in enumerate(marker_to_cam_poses):
+            if marker_to_cam_pose is None:
+                continue
+            R_marker_to_cam.append(marker_to_cam_pose[:3, :3])
+            t_marker_to_cam.append(marker_to_cam_pose[:3, 3])
+            R_end_effector_to_base.append(end_effector_to_base_poses[index][:3, :3])
+            t_end_effector_to_base.append(end_effector_to_base_poses[index][:3, 3])
+        if len(R_marker_to_cam) < 3:
+            print("Not enough data to calibrate")
+            return None
+        R_marker_to_cam = np.array(R_marker_to_cam)
+        t_marker_to_cam = np.array(t_marker_to_cam)
+        R_end_effector_to_base = np.array(R_end_effector_to_base)
+        t_end_effector_to_base = np.array(t_end_effector_to_base)
+        from ipdb import set_trace; set_trace()
+        hand_eye = cv2.calibrateHandEye(R_end_effector_to_base, t_end_effector_to_base, R_marker_to_cam, t_marker_to_cam, cv2.CALIB_HAND_EYE_TSAI)
+        cv2.destroyAllWindows()
+        return hand_eye
+    
+    
+    def calibrate(self, view_num, look_at_pt, top_down_dist, top_down_direrction, radius):
+        self.capture_frames(view_num, look_at_pt, top_down_dist, top_down_direrction, radius)
+        marker_to_cam = np.array(self.marker_to_cam)
+        end_effector_to_base = np.array(self.end_effector_to_base)
+        hand_eye = self.get_hand_eye(marker_to_cam, end_effector_to_base)
+        # hand_eye = self.get_hand_eye(None, None)
+        return hand_eye
+
+
+class ArcuoMarkerSolver:
+    def __init__(self, intrinsics, dist=None, marker_info={"id": 0, "size": 0.1}):
+        self.intrinsics = intrinsics
+        self.dist = dist
+        self.marker_info = marker_info
+
+    def solve_arcuo_marker_pose(self, color_image):
+        self.res = []
+        aruco_dict = cv2.aruco.getPredefinedDictionary(cv2.aruco.DICT_ARUCO_ORIGINAL)
+        aruco_params = cv2.aruco.DetectorParameters()
+        cv2.imshow("color_image", color_image)
+        cv2.waitKey(500)
+        corners, ids, rejectedImgPoints = cv2.aruco.detectMarkers(color_image, aruco_dict, parameters=aruco_params)
+        if ids is None:
+            return None
+        for i in range(len(ids)):
+            rvec, tvec, _ = cv2.aruco.estimatePoseSingleMarkers(corners[i], self.marker_info["size"], self.intrinsics, self.dist)
+            self.res.append({
+                'rvec': rvec,
+                'tvec': tvec,
+                'corners': corners[i],
+                'ids': ids[i]
+            })
+        for res in self.res:
+            if res["ids"][0] == self.marker_info["id"]:
+                return res
+        return None
+
+
+    def get_marker_to_cam_pose(self, color_image, visualize=False):
+        res = self.solve_arcuo_marker_pose(color_image)
+        if visualize and res is not None:
+            self.visualize_res(color_image, res)
+        return res
+    
+    
+    def visualize_res(self, color_image, res):
+        rvec = res["rvec"]
+        tvec = res["tvec"]
+        corners = res["corners"]
+        ids = res["ids"]
+        aruco_frame = cv2.drawFrameAxes(color_image, self.intrinsics, self.dist, rvec, tvec, 0.05)
+        aruco_frame = cv2.aruco.drawDetectedMarkers(aruco_frame, (corners, ), ids)
+        cv2.imshow("aruco_frame", aruco_frame)
+        cv2.waitKey(500)
+
+
+class EndEffectorPoseGenerator:
+    def __init__(self, view_num, look_at_pt, top_down_dist, top_down_direrction, radius, reverse_order=False):
+        self.view_num = view_num
+        self.look_at_pt = look_at_pt
+        self.top_down_dist = top_down_dist
+        self.top_down_direrction = top_down_direrction
+        self.radius = radius
+        self.poses = self.generate_poses(reverse_order)
+        
+    def generate_poses(self, reverse_order=False):
+        poses = []
+        for i in range(self.view_num):
+            order = i if not reverse_order else self.view_num - i - 1
+            pose = self.get_pose(order)
+            poses.append(pose)
+        return poses
+    
+    
+    def get_pose(self, i):
+        angle = 0.8 * np.pi * i / self.view_num + 0.7 * np.pi
+        start_point_x = self.look_at_pt[0] + self.radius * np.cos(angle)
+        start_point_y = self.look_at_pt[1] + self.radius * np.sin(angle)
+        start_point_z = self.look_at_pt[2] + self.top_down_dist
+        
+        look_at_vector = self.look_at_pt - np.array([start_point_x, start_point_y, start_point_z])
+        look_at_vector = look_at_vector / np.linalg.norm(look_at_vector)
+        up_vector = self.top_down_direrction
+        noise = np.random.uniform(0, 0.1, up_vector.shape)
+        right_vector = np.cross(look_at_vector, up_vector + noise)
+        right_vector = right_vector / np.linalg.norm(right_vector)
+        up_vector = np.cross(look_at_vector, right_vector)
+        up_vector = up_vector / np.linalg.norm(up_vector)
+        
+        pose = np.eye(4)
+        pose[:3, :3] = self.get_rotation(right_vector, up_vector, look_at_vector)
+        pose[:3, 3] = np.array([start_point_x, start_point_y, start_point_z])
+        return pose
+        
+        
+    def get_rotation(self, right_vector, up_vector, look_at_vector):
+        rotation = np.eye(3)
+        rotation[:, 0] = right_vector
+        rotation[:, 1] = up_vector
+        rotation[:, 2] = look_at_vector
+        return rotation
+        
+         
+class PoseVisualizer:
+    def __init__(self, poses):
+        self.poses = poses
+        
+    def visualize(self):
+
+        vis = o3d.visualization.Visualizer()
+        vis.create_window()
+
+        for pose in self.poses:
+            mesh_frame = o3d.geometry.TriangleMesh.create_coordinate_frame(size=0.05)
+            mesh_frame.transform(pose)
+            vis.add_geometry(mesh_frame)
+        print("Press q to close the window")
+        vis.run()
+
+
+class CAMERA:
+    def __init__(self):
+        self.color_intrinsics = None
+        self.depth_intrinsics = None
+        self.color = None
+        self.depth = None
+        
+    def get_color_intrinsics(self):
+        pass
+    
+    def get_depth_intrinsics(self):
+        pass
+    
+    def get_color_image(self):
+        pass
+    
+    def get_depth_image(self):
+        pass
+    
+
+class RealSenseCamera(CAMERA):
+    def __init__(self):
+        super().__init__()
+        self.pipeline = rs.pipeline()
+        config = rs.config()
+        config.enable_stream(rs.stream.depth, 640, 480, rs.format.z16, 30)
+        config.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 30)
+        align = rs.align(rs.stream.color)
+        self.pipeline.start(config)
+        self.color_intrinsics = self.get_intrinsics("color")
+        self.depth_intrinsics = self.get_intrinsics("depth")
+        
+    def get_color_intrinsics(self):
+        return self.color_intrinsics
+    
+    def get_depth_intrinsics(self):
+        return self.depth_intrinsics
+    
+    def get_color_image(self):
+        frames = self.pipeline.wait_for_frames()
+        color_frame = frames.get_color_frame()
+        color_image = np.asanyarray(color_frame.get_data())
+        return color_image
+    
+    def get_depth_image(self):
+        frames = self.pipeline.wait_for_frames()
+        depth_frame = frames.get_depth_frame()
+        depth_image = np.asanyarray(depth_frame.get_data())
+        return depth_image
+    
+    def get_intrinsics(self, camra_type):
+        if camra_type == "color":
+            profile = self.pipeline.get_active_profile()
+            color_profile = rs.video_stream_profile(profile.get_stream(rs.stream.color))
+            color_intrinsics = color_profile.get_intrinsics()
+            K = [[color_intrinsics.fx, 0, color_intrinsics.ppx], [0, color_intrinsics.fy, color_intrinsics.ppy], [0, 0, 1]]
+            return np.array(K)
+        elif camra_type == "depth":
+            profile = self.pipeline.get_active_profile()
+            depth_profile = rs.video_stream_profile(profile.get_stream(rs.stream.depth))
+            depth_intrinsics = depth_profile.get_intrinsics()
+            K = [[depth_intrinsics.fx, 0, depth_intrinsics.ppx], [0, depth_intrinsics.fy, depth_intrinsics.ppy], [0, 0, 1]]
+            return np.array(K)
+        
+
+class InspireCamera(CAMERA):
+    def __init__(self):
+        super().__init__()
+        ControlUtil.set_gripper_pose(INIT_GRIPPER_POSE)
+        view_data = CommunicateUtil.get_view_data(init=True)
+        self.color_intrinsics = self.get_intrinsics(view_data["color_intrinsics"], view_data["color_image"])
+        self.depth_intrinsics = self.get_intrinsics(view_data["depth_intrinsics"], view_data["depth_image"])
+        
+    def get_color_intrinsics(self):
+        return self.color_intrinsics
+    
+    def get_depth_intrinsics(self):
+        return self.depth_intrinsics
+    
+    def get_color_image(self):
+        view_data = CommunicateUtil.get_view_data()
+        if view_data is None:
+            return None
+        else:
+            color_image = view_data["color_image"]
+            return color_image
+    
+    def get_depth_image(self):
+        view_data = CommunicateUtil.get_view_data()
+        if view_data is None:
+            return None
+        else:
+            depth_image = view_data["depth_image"]
+            return depth_image
+    
+    def get_intrinsics(self, intrinsics, image):    
+        height = image.shape[0]
+        width = image.shape[1]
+        scale_h = height / intrinsics["height"]
+        scale_w = width / intrinsics["width"]
+        fx = intrinsics["fx"] * scale_w
+        fy = intrinsics["fy"] * scale_h
+        cx = intrinsics["cx"] * scale_w
+        cy = intrinsics["cy"] * scale_h
+        K = np.array([
+            [fx,  0, cx],
+            [ 0, fy, cy],
+            [ 0,  0,  1]
+        ])
+        return K
+    
+
+def example_view_generator():
+    pose_generator = EndEffectorPoseGenerator(VIEW_NUM, LOOK_AT_PT, TOP_DOWN_DIST, TOP_DOWN_DIRECTION, RADIUS)
+    base_pose = np.eye(4)
+    test_pose = np.array([
+        [1, 0, 0, 0], 
+        [0, -1, 0, 0], 
+        [0, 0, -1, 0.2], 
+        [0, 0, 0, 1]    
+    ])
+    defalut_ee2base = np.array([
+        [0, -1, 0, 0], 
+        [1, 0, 0, 0], 
+        [0, 0, 1, 0], 
+        [0, 0, 0, 1]
+    ])
+    poses = [pose @ defalut_ee2base for pose in pose_generator.poses] + [base_pose, test_pose]
+    visualizer = PoseVisualizer(poses)
+    visualizer.visualize()
+
+
+def example_calibration():
+    # ControlUtil.connect_robot()
+    # ControlUtil.franka_reset()
+    # ControlUtil.set_gripper_pose(INIT_GRIPPER_POSE)
+    # view_data = CommunicateUtil.get_view_data(init=True)
+    # intrinsics = get_intrinsics(view_data["color_intrinsics"], view_data["color_image"])
+    calibration = HandEyeCalibration(
+        # intrinsics=intrinsics, 
+        dist=None, 
+        marker_type=MARKER_TYPE, 
+        marker_info=MARKER_INFO,
+        camera_type="Inspire"
+    )
+    hand_eye = calibration.calibrate(
+        view_num=VIEW_NUM, 
+        look_at_pt=LOOK_AT_PT, 
+        top_down_dist=TOP_DOWN_DIST, 
+        top_down_direrction=TOP_DOWN_DIRECTION, 
+        radius=RADIUS
+    )
+    print(hand_eye)
+    
+    res = {
+        'rotation': hand_eye[0].tolist(),
+        'translation': hand_eye[1].tolist()
+    }
+    with open("hand_eye.json", "w") as f:
+        json.dump(res, f, indent=4)
+    print("Hand eye calibration finished")
+    return res
+
+
+def get_depth_to_end_effector_pose(calibration_res_path):
+    with open(calibration_res_path, "r") as f:
+        data = json.load(f)
+    rotation = np.array(data["rotation"])
+    translation = np.array(data["translation"])
+    color_to_ee = np.eye(4)
+    color_to_ee[:3, :3] = rotation
+    color_to_ee[:3, 3:] = translation
+    ControlUtil.connect_robot()
+    ControlUtil.franka_reset()
+    ControlUtil.set_gripper_pose(INIT_GRIPPER_POSE)
+    view_data = CommunicateUtil.get_view_data(init=True)
+    depth_to_color = np.array(view_data["depth_to_color"])
+    depth_to_ee = color_to_ee @ depth_to_color
+    print(depth_to_ee)
+    with open("depth_to_ee.json", "w") as f:
+        json.dump(depth_to_ee.tolist(), f, indent=4)
+
+
+if __name__ == "__main__":
+    res = example_calibration()
+    get_depth_to_end_effector_pose("hand_eye.json")
+    # example_view_generator()
+    
--- a/runners/inference.py
+++ b/runners/inference.py
@@ -0,0 +1,182 @@
+import os
+import numpy as np
+import requests
+
+from PytorchBoot.runners import Runner
+from PytorchBoot.config import ConfigManager
+import PytorchBoot.stereotype as stereotype
+from PytorchBoot.utils.log_util import Log
+
+from utils.pose_util import PoseUtil
+from utils.control_util import ControlUtil
+from utils.communicate_util import CommunicateUtil
+from utils.pts_util import PtsUtil
+from utils.view_util import ViewUtil
+from scipy.spatial.transform import Rotation as R
+
+@stereotype.runner("inference_runner")
+class InferenceRunner(Runner):
+
+    def __init__(self, config_path: str):
+        super().__init__(config_path)
+        self.load_experiment("inference")
+        self.inference_config = ConfigManager.get("runner", "inference")
+        self.server_url = self.inference_config["server_url"]
+        self.max_iter = self.inference_config["max_iter"]
+        self.max_fail = self.inference_config["max_fail"]
+        self.max_no_new_pts = self.inference_config["max_no_new_pts"]
+        self.min_delta_pts_num = self.inference_config["min_delta_pts_num"]
+    
+    def check_stop(self, cnt, fail, no_new_pts):
+        if cnt > self.max_iter:
+            return True
+        if fail > self.max_fail:
+            return True
+        if no_new_pts > self.max_no_new_pts:
+            return True
+        return False
+    
+    def split_scan_pts_and_obj_pts(self, world_pts, z_threshold=0.005):
+        scan_pts = world_pts[world_pts[:, 2] < z_threshold]
+        obj_pts = world_pts[world_pts[:, 2] >= z_threshold]
+        return scan_pts, obj_pts
+
+    def run(self):
+        ControlUtil.connect_robot()
+        ControlUtil.init()
+        scanned_pts_list = []
+        scanned_n_to_world_pose = []
+        cnt = 0
+        fail = 0
+        no_new_pts = 0
+
+        view_data = CommunicateUtil.get_view_data(init=True)
+        first_cam_to_real_world = ControlUtil.get_pose()
+        if view_data is None:
+            Log.error("No view data received")
+            fail += 1
+            return
+        cam_shot_pts = ViewUtil.get_pts(view_data)
+        
+        # ########################################### DEBUG ###########################################
+        # sensor_pts = PtsUtil.transform_point_cloud(cam_shot_pts, np.linalg.inv(ControlUtil.CAMERA_TO_LEFT_CAMERA))
+        # np.savetxt('/home/yan20/Downloads/left_pts_0.txt', cam_shot_pts)
+        # np.savetxt('/home/yan20/Downloads/sensor_pts_0.txt', sensor_pts)
+        # #############################################################################################
+        
+        world_shot_pts = PtsUtil.transform_point_cloud(
+            cam_shot_pts, first_cam_to_real_world
+        )
+        #import ipdb; ipdb.set_trace()
+        _, world_splitted_shot_pts = self.split_scan_pts_and_obj_pts(
+            world_shot_pts
+        )
+        curr_pts = world_splitted_shot_pts
+        curr_pose = first_cam_to_real_world
+        curr_pose_6d = PoseUtil.matrix_to_rotation_6d_numpy(curr_pose[:3,:3])
+        curr_pose_9d = np.concatenate([curr_pose_6d, curr_pose[:3, 3]], axis=0)
+        scanned_pts_list.append(curr_pts.tolist())
+        scanned_n_to_world_pose.append(curr_pose_9d.tolist())
+        combined_pts = np.concatenate(scanned_pts_list, axis=0)
+        downsampled_combined_pts = PtsUtil.voxel_downsample_point_cloud(combined_pts, 0.003)
+        last_downsampled_combined_pts_num = downsampled_combined_pts.shape[0]
+        Log.info(f"First downsampled combined pts: {last_downsampled_combined_pts_num}")
+        
+        ####################################### DEBUG #######################################
+        # scan_count = 0
+        # save_path = "/home/yan20/Downloads/pts"
+        # if not os.path.exists(save_path):
+        #     os.makedirs(save_path)
+        #####################################################################################
+
+        while not self.check_stop(cnt, fail, no_new_pts):
+            
+            data = {
+                "scanned_pts": scanned_pts_list,
+                "scanned_n_to_world_pose_9d": scanned_n_to_world_pose
+            }
+            # pts = np.array(data['scanned_pts'][-1])
+            # np.savetxt(f'{save_path}/pts_{scan_count}.txt', pts)
+            # scan_count += 1
+            response = requests.post(self.server_url, json=data)
+            result = response.json()
+            pred_pose_9d = np.array(result["pred_pose_9d"])
+            pred_rot_6d = pred_pose_9d[0, :6]
+            pred_trans = pred_pose_9d[0, 6:]
+            pred_rot_mat = PoseUtil.rotation_6d_to_matrix_numpy(pred_rot_6d)
+            pred_pose = np.eye(4)
+            pred_pose[:3, :3] = pred_rot_mat
+            pred_pose[:3, 3] = pred_trans
+            target_camera_pose = pred_pose @ ControlUtil.CAMERA_CORRECTION
+                        
+            ControlUtil.move_to(target_camera_pose)
+            cnt += 1
+
+            view_data = CommunicateUtil.get_view_data()
+            if view_data is None:
+                Log.error("No view data received")
+                fail += 1
+                continue
+            cam_shot_pts = ViewUtil.get_pts(view_data)
+            left_cam_to_first_left_cam = ViewUtil.get_camera_pose(view_data)
+            curr_pose = first_cam_to_real_world @ left_cam_to_first_left_cam @ np.linalg.inv(ControlUtil.CAMERA_CORRECTION)
+            # curr_pose = pred_pose
+            # curr_pose = first_cam_to_real_world @ ViewUtil.get_camera_pose(view_data)
+            print('pred_pose:', pred_pose)
+            print('curr_pose:', curr_pose)
+
+            ##################################### DEBUG #####################################
+            # print(curr_pose)
+            # rot = R.from_matrix(curr_pose[:3, :3])
+            # quat_xyzw = rot.as_quat()
+            # translation = curr_pose[:3, 3]
+            # print(quat_xyzw, translation)
+            # # from ipdb import set_trace; set_trace()
+            #################################################################################           
+            
+            world_shot_pts = PtsUtil.transform_point_cloud(
+                cam_shot_pts, first_cam_to_real_world
+            )
+            _, world_splitted_shot_pts = self.split_scan_pts_and_obj_pts(
+                world_shot_pts
+            )
+            curr_pts = world_splitted_shot_pts
+            import ipdb; ipdb.set_trace()
+            from utils.vis import visualizeUtil
+            visualizeUtil.visualize_pts_and_camera(world_splitted_shot_pts,pred_pose)
+            curr_pose_6d = PoseUtil.matrix_to_rotation_6d_numpy(curr_pose[:3,:3])
+            curr_pose_9d = np.concatenate([curr_pose_6d, curr_pose[:3, 3]], axis=0)
+            scanned_pts_list.append(curr_pts.tolist())
+            scanned_n_to_world_pose.append(curr_pose_9d.tolist())
+            combined_pts = np.concatenate(scanned_pts_list, axis=0)
+            downsampled_combined_pts = PtsUtil.voxel_downsample_point_cloud(combined_pts, 0.003)
+
+            curr_downsampled_combined_pts_num = downsampled_combined_pts.shape[0]
+            Log.info(f"Downsampled combined pts: {curr_downsampled_combined_pts_num}")
+            if curr_downsampled_combined_pts_num < last_downsampled_combined_pts_num + self.min_delta_pts_num:
+                no_new_pts += 1
+                Log.info(f"No new points, cnt: {cnt}, fail: {fail}, no_new_pts: {no_new_pts}")
+                continue
+        Log.success("Inference finished")
+        # self.save_inference_result(scanned_pts_list, downsampled_combined_pts)
+        
+    def create_experiment(self, backup_name=None):
+        super().create_experiment(backup_name)
+        self.inference_result_dir = os.path.join(self.experiment_path, "inference_result")
+        os.makedirs(self.inference_result_dir)
+        
+    def load_experiment(self, backup_name=None):
+        super().load_experiment(backup_name)
+        self.inference_result_dir = os.path.join(self.experiment_path, "inference_result")
+    
+    def save_inference_result(self, scanned_pts_list, downsampled_combined_pts):
+        import time
+        dir_name = time.strftime("inference_result_%Y_%m_%d_%Hh%Mm%Ss", time.localtime())
+        dir_path = os.path.join(self.inference_result_dir, dir_name)
+        for i in range(len(scanned_pts_list)):
+            np.savetxt(os.path.join(dir_path, f"{i}.txt"), np.array(scanned_pts_list[i]))
+            
+        np.savetxt(os.path.join(dir_path, "downsampled_combined_pts.txt"), np.array(downsampled_combined_pts))
+        
+        Log.success("Inference result saved")
+        
--- a/utils/communicate_util.py
+++ b/utils/communicate_util.py
@@ -0,0 +1,41 @@
+import requests
+import numpy as np
+
+class CommunicateUtil:
+    VIEW_HOST = "192.168.1.2:7999"  #"10.7.250.52:7999" ##
+    INFERENCE_HOST = "127.0.0.1"
+    INFERENCE_PORT = 5000
+    
+    def get_view_data(init = False) -> dict:
+        params = {}
+        params["create_scanner"] = init
+        response = requests.get(f"http://{CommunicateUtil.VIEW_HOST}/api/data", json=params)
+        data = response.json()
+        
+        if not data["success"]:
+            print(f"Failed to get view data")
+            return None
+        
+        image_id = data["image_id"]
+        depth_image = np.array(data["depth_image"], dtype=np.uint16)
+        color_image = np.array(data["color_image"]).astype(np.uint8)
+        depth_intrinsics = data["depth_intrinsics"]
+        depth_extrinsics = np.array(data["depth_extrinsics"])
+        color_intrinsics = data["color_intrinsics"]
+        depth_to_color = np.array(data["depth_to_color"])
+        view_data = {
+            "image_id": image_id,
+            "depth_image": depth_image,
+            "depth_intrinsics": depth_intrinsics,
+            "depth_extrinsics": depth_extrinsics,
+            "color_image": color_image,
+            "color_intrinsics": color_intrinsics,
+            "depth_to_color": depth_to_color
+        }
+        return view_data
+    
+    def get_inference_data(view_data:dict) -> dict:
+        data = {}
+        return data
+        
+        
--- a/utils/control_util.py
+++ b/utils/control_util.py
@@ -0,0 +1,284 @@
+import numpy as np
+from frankapy import FrankaArm
+from autolab_core import RigidTransform
+from scipy.spatial.transform import Rotation as R
+import serial
+import time
+
+class ControlUtil:
+    
+    __fa:FrankaArm = None
+    __ser: serial.Serial = None
+    curr_rotation = 0
+    
+    # BASE_TO_WORLD:np.ndarray = np.asarray([
+    #     [1, 0, 0, -0.68834619],
+    #     [0, 1, 0, -0.01785319],
+    #     [0, 0, 1, -0.11924244],
+    #     [0, 0, 0, 1]
+    #     ])
+    
+    # BASELINE_DIS = 0.1
+    # LEFT_TO_RIGHT_ANGLE = -25.0
+    # CAMERA_TO_LEFT_CAMERA:np.ndarray = np.asarray([
+    #     [1, 0, 0, BASELINE_DIS/2 * np.cos(np.radians(-LEFT_TO_RIGHT_ANGLE/2))],
+    #     [0, 1, 0, 0],
+    #     [0, 0, 1, BASELINE_DIS/2 * np.sin(np.radians(-LEFT_TO_RIGHT_ANGLE/2))],
+    #     [0, 0, 0, 1]
+    #     ])
+    # CAMERA_TO_LEFT_CAMERA[:3, :3] = R.from_euler('y', LEFT_TO_RIGHT_ANGLE/2, degrees=True).as_matrix()
+    
+    CAMERA_CORRECTION_ANGLE = -13
+    CAMERA_CORRECTION:np.ndarray = np.eye(4)
+    CAMERA_CORRECTION[:3, :3] = R.from_euler('y', CAMERA_CORRECTION_ANGLE, degrees=True).as_matrix()
+    
+    BASE_TO_WORLD:np.ndarray = np.asarray([
+        [1, 0, 0, -0.49602172],
+        [0, 1, 0, 0.06688724],
+        [0, 0, 1, -0.2488704 ],
+        [0, 0, 0, 1]
+        ])
+    
+
+    CAMERA_TO_GRIPPER:np.ndarray = np.asarray(
+            [[-0.00757086, -0.99984193,  0.01608569,  0.02644546],
+            [ 0.99993362, -0.00770935, -0.00856502, -0.04860572],
+            [ 0.00868767,  0.01601977,  0.99983391, -0.02169477],
+            [ 0.,          0.   ,       0.     ,     1.      ,  ]
+        ])
+
+    INIT_GRIPPER_POSE:np.ndarray = np.asarray(
+        [[0.41869126  ,0.87596275 , 0.23951774 , 0.36005292],
+        [ 0.70787907 ,-0.4800251  , 0.51813998 ,-0.26499909],
+        [ 0.56884584, -0.04739109 ,-0.82107382  ,0.66881103],
+        [ 0.         , 0.    ,      0.      ,    1.        ]])
+    
+    # INIT_GRIPPER_POSE:np.ndarray = np.asarray(
+    # [[1  ,0 , 0 , 0.5],
+    # [ 0 ,-1  , 0 ,0],
+    # [ 0, 0 ,-1  ,0.6],
+    # [ 0.         , 0.    ,      0.      ,    1.        ]])
+    
+    INIT_JOINTS:np.ndarray = np.asarray([-1.64987928,-0.73557812,-0.02676473,-2.08365335,0.01794927,1.5182423,0.73276058])
+    
+    @staticmethod
+    def connect_robot():
+        if ControlUtil.__fa is None:
+            ControlUtil.__fa = FrankaArm(robot_num=1)
+        if ControlUtil.__ser is None:
+            ControlUtil.__ser = serial.Serial(port="/dev/ttyUSB0", baudrate=115200) 
+        
+    @staticmethod
+    def franka_reset() -> None:
+        #ControlUtil.__fa.goto_joints(ControlUtil.INIT_JOINTS, duration=5, use_impedance=False, ignore_errors=False)
+        ControlUtil.__fa.reset_joints()
+        
+    @staticmethod
+    def init() -> None:
+        ControlUtil.franka_reset()
+        ControlUtil.set_gripper_pose(ControlUtil.INIT_GRIPPER_POSE)
+        
+    @staticmethod
+    def get_pose() -> np.ndarray:
+        gripper_to_base = ControlUtil.get_curr_gripper_to_base_pose()
+        cam_to_world = ControlUtil.BASE_TO_WORLD @ gripper_to_base @ ControlUtil.CAMERA_TO_GRIPPER
+        return cam_to_world
+    
+    @staticmethod
+    def set_pose(cam_to_world: np.ndarray) -> None:
+        gripper_to_base = ControlUtil.solve_gripper_to_base(cam_to_world)
+        ControlUtil.set_gripper_pose(gripper_to_base)
+        
+    @staticmethod
+    def rotate_display_table(degree):
+        turn_directions = {
+            "left": 1,
+            "right": 0
+        }
+        delta_degree = degree - ControlUtil.curr_rotation
+        ControlUtil.curr_rotation = degree
+        print(f"Table rotated {ControlUtil.curr_rotation} degree")
+        
+        if delta_degree >= 0:
+            turn_angle = delta_degree
+            turn_direction = turn_directions["left"]
+        else:
+            turn_angle = -delta_degree
+            turn_direction = turn_directions["right"]
+        write_len = ControlUtil.__ser.write(f"CT+TRUNSINGLE({turn_direction},{turn_angle});".encode('utf-8'))
+        print(f"send command: CT+TRUNSINGLE({turn_direction},{turn_angle});")
+        
+        return delta_degree
+        
+    
+    @staticmethod
+    def get_curr_gripper_to_base_pose() -> np.ndarray:
+        return ControlUtil.__fa.get_pose().matrix
+    
+    @staticmethod
+    def get_curr_joints() -> np.ndarray:
+        return ControlUtil.__fa.get_joints()
+    
+    @staticmethod
+    def set_gripper_pose(gripper_to_base: np.ndarray) -> None:
+        gripper_to_base = RigidTransform(rotation=gripper_to_base[:3, :3], translation=gripper_to_base[:3, 3], from_frame="franka_tool", to_frame="world")
+        ControlUtil.__fa.goto_pose(gripper_to_base, duration=6, use_impedance=False, ignore_errors=False, ignore_virtual_walls=True)
+        
+    @staticmethod
+    def solve_gripper_to_base(cam_to_world: np.ndarray) -> np.ndarray:
+        return np.linalg.inv(ControlUtil.BASE_TO_WORLD) @ cam_to_world @ np.linalg.inv(ControlUtil.CAMERA_TO_GRIPPER)
+    
+    @staticmethod
+    def sovle_cam_to_world(gripper_to_base: np.ndarray) -> np.ndarray:
+        return ControlUtil.BASE_TO_WORLD @ gripper_to_base @ ControlUtil.CAMERA_TO_GRIPPER
+    
+    @staticmethod
+    def check_limit(new_cam_to_world):
+        if new_cam_to_world[0,3] > 0 or new_cam_to_world[1,3] > 0:
+        # if new_cam_to_world[0,3] > 0:
+            return False
+        x = abs(new_cam_to_world[0,3])
+        y = abs(new_cam_to_world[1,3])
+        
+        tan_y_x = y/x
+        min_angle = 20 / 180 * np.pi
+        max_angle = 50 / 180 * np.pi
+        if tan_y_x < np.tan(min_angle) or tan_y_x > np.tan(max_angle):
+            return False
+         
+        return True
+    
+    @staticmethod
+    def solve_display_table_rot_and_cam_to_world(cam_to_world: np.ndarray) -> tuple:   
+        if ControlUtil.check_limit(cam_to_world):
+            return 0, cam_to_world
+        else:
+            min_display_table_rot = 180
+            min_new_cam_to_world = None
+            for display_table_rot in np.linspace(0.1,360, 1800):
+                new_world_to_world = ControlUtil.get_z_axis_rot_mat(display_table_rot)
+                new_cam_to_new_world = cam_to_world
+                new_cam_to_world = new_world_to_world @ new_cam_to_new_world
+                
+                if ControlUtil.check_limit(new_cam_to_world):
+                    if display_table_rot < min_display_table_rot:
+                        min_display_table_rot, min_new_cam_to_world = display_table_rot, new_cam_to_world
+                    if abs(display_table_rot - 360) < min_display_table_rot:
+                        min_display_table_rot, min_new_cam_to_world = display_table_rot - 360, new_cam_to_world
+        
+        if min_new_cam_to_world is None:
+            raise ValueError("No valid display table rotation found")
+        
+        return min_display_table_rot, min_new_cam_to_world
+                
+    @staticmethod
+    def get_z_axis_rot_mat(degree):
+        radian = np.radians(degree)
+        return np.array([
+            [np.cos(radian), -np.sin(radian), 0, 0],
+            [np.sin(radian), np.cos(radian), 0, 0],
+            [0, 0, 1, 0],
+            [0, 0, 0, 1]
+            ])
+    
+    @staticmethod
+    def get_gripper_to_base_axis_angle(gripper_to_base: np.ndarray) -> bool:
+        rot_mat = gripper_to_base[:3,:3]
+        gripper_z_axis = rot_mat[:,2]
+        base_x_axis = np.array([1,0,0])
+        angle = np.arccos(np.dot(gripper_z_axis, base_x_axis))
+        return angle
+    
+    @staticmethod
+    def move_to(pose: np.ndarray):
+        rot_degree, cam_to_world = ControlUtil.solve_display_table_rot_and_cam_to_world(pose)
+        
+        delta_degree = ControlUtil.rotate_display_table(rot_degree)
+        exec_time = abs(delta_degree)/9
+        start_time = time.time()
+        ControlUtil.set_pose(cam_to_world)
+        end_time = time.time()
+        print(f"Move to pose with rotation {rot_degree} degree, exec time: {end_time - start_time}|exec time: {exec_time}")
+        if end_time - start_time < exec_time:
+            time.sleep(exec_time - (end_time - start_time))
+            
+    @staticmethod
+    def absolute_rotate_display_table(degree):
+        exec_time = abs(degree)/9
+        write_len = ControlUtil.__ser.write(f"CT+TRUNSINGLE({0},{degree});".encode('utf-8'))
+        print(f"send command: CT+TRUNSINGLE({0},{degree});")
+        time.sleep(exec_time)
+            
+# ----------- Debug Test -------------
+
+if __name__ == "__main__":
+    ControlUtil.connect_robot()
+    #ControlUtil.franka_reset()
+    print(ControlUtil.get_curr_gripper_to_base_pose())
+    # ControlUtil.rotate_display_table(180)
+    # ControlUtil.init()
+    # ControlUtil.franka_reset()
+    
+    # def main_test():
+    #     print(ControlUtil.get_curr_gripper_to_base_pose())
+    #     ControlUtil.init()
+    
+    # def rotate_back(rotation):
+    #     ControlUtil.rotate_display_table(-rotation)
+    
+    # #main_test()
+    # import sys; sys.path.append("/home/yan20/nbv_rec/project/franka_control")
+    # from utils.communicate_util import CommunicateUtil
+    # import ipdb
+    # ControlUtil.init()
+    # view_data_0 = CommunicateUtil.get_view_data(init=True)
+    # depth_extrinsics_0 = view_data_0["depth_extrinsics"]
+    # cam_to_world_0 = ControlUtil.get_pose()
+    # print("cam_extrinsics_0")
+    # print(depth_extrinsics_0)
+    # print("cam_to_world_0")
+    # print(cam_to_world_0)
+    
+    # ipdb.set_trace()
+    # TEST_POSE:np.ndarray = np.asarray([
+    #     [ 0.46532393,  0.62171798,  0.63002284,  0.30230963],
+    #     [ 0.43205618, -0.78075723,  0.45136491, -0.29127173],
+    #     [ 0.77251656,  0.06217437, -0.63193429,  0.559957  ],
+    #     [ 0.        ,  0.        ,  0.        ,  1.        ],
+    #     ])
+    # TEST_POSE_CAM_TO_WORLD = ControlUtil.BASE_TO_WORLD @ TEST_POSE @ ControlUtil.CAMERA_TO_GRIPPER
+    # ControlUtil.move_to(TEST_POSE_CAM_TO_WORLD)
+    # view_data_1 = CommunicateUtil.get_view_data()
+    # depth_extrinsics_1 = view_data_1["depth_extrinsics"]
+    # depth_extrinsics_1[:3,3] = depth_extrinsics_1[:3,3] / 1000
+    # cam_to_world_1 = ControlUtil.get_pose()
+    # print("cam_extrinsics_1")
+    # print(depth_extrinsics_1)
+    # print("cam_to_world_1")
+    # print(TEST_POSE_CAM_TO_WORLD)
+    # actual_cam_to_world_1 = cam_to_world_0 @ depth_extrinsics_1
+    # print("actual_cam_to_world_1")
+    # print(actual_cam_to_world_1)
+    # ipdb.set_trace()
+    # TEST_POSE_2:np.ndarray = np.asarray(
+    #     [[ 0.74398544, -0.61922696,  0.251049,    0.47000935],
+    #     [-0.47287207, -0.75338888, -0.45692666,  0.20843903],
+    #     [ 0.47207883 , 0.22123272, -0.85334192,  0.57863381],
+    #     [ 0. ,         0.   ,       0.  ,        1.  ,      ]]
+    #     )
+    # TEST_POSE_CAM_TO_WORLD_2 = ControlUtil.BASE_TO_WORLD @ TEST_POSE_2 @ ControlUtil.CAMERA_TO_GRIPPER
+    
+    # #ControlUtil.move_to(TEST_POSE_CAM_TO_WORLD_2)
+    # ControlUtil.set_pose(TEST_POSE_CAM_TO_WORLD_2)
+    # view_data_2 = CommunicateUtil.get_view_data()
+    # depth_extrinsics_2 = view_data_2["depth_extrinsics"]
+    # depth_extrinsics_2[:3,3] = depth_extrinsics_2[:3,3] / 1000
+    # cam_to_world_2 = ControlUtil.get_pose()
+    # print("cam_extrinsics_2")
+    # print(depth_extrinsics_2)
+    # print("cam_to_world_2")
+    # print(TEST_POSE_CAM_TO_WORLD_2)
+    # actual_cam_to_world_2 = cam_to_world_0 @ depth_extrinsics_2
+    # print("actual_cam_to_world_2")
+    # print(actual_cam_to_world_2)
+    # ipdb.set_trace()
--- a/utils/data_load.py
+++ b/utils/data_load.py
@@ -0,0 +1,384 @@
+import os
+import numpy as np
+import json
+import cv2
+import trimesh
+import torch
+import OpenEXR
+import Imath
+from utils.pts_util import PtsUtil
+
+
+class DataLoadUtil:
+    TABLE_POSITION = np.asarray([0, 0, -0.1])
+
+    @staticmethod
+    def get_display_table_info(root, scene_name):
+        scene_info = DataLoadUtil.load_scene_info(root, scene_name)
+        display_table_info = scene_info["display_table"]
+        return display_table_info
+
+    @staticmethod
+    def get_display_table_top(root, scene_name):
+        # display_table_height = DataLoadUtil.get_display_table_info(root, scene_name)[
+        #     "height"
+        # ]
+        display_table_height = 0.1 #FIXME
+        display_table_top = DataLoadUtil.TABLE_POSITION + np.asarray(
+            [0, 0, display_table_height]
+        )
+        return display_table_top
+
+    @staticmethod
+    def get_path(root, scene_name, frame_idx):
+        path = os.path.join(root, scene_name, f"{frame_idx}")
+        return path
+
+    @staticmethod
+    def get_label_num(root, scene_name):
+        label_dir = os.path.join(root, scene_name, "label")
+        return len(os.listdir(label_dir))
+
+    @staticmethod
+    def get_label_path(root, scene_name, seq_idx):
+        label_dir = os.path.join(root, scene_name, "label")
+        if not os.path.exists(label_dir):
+            os.makedirs(label_dir)
+        path = os.path.join(label_dir, f"{seq_idx}.json")
+        return path
+
+    @staticmethod
+    def get_label_path_old(root, scene_name):
+        path = os.path.join(root, scene_name, "label.json")
+        return path
+
+    @staticmethod
+    def get_scene_seq_length(root, scene_name):
+        camera_params_path = os.path.join(root, scene_name, "camera_params")
+        return len(os.listdir(camera_params_path))
+
+    @staticmethod
+    def load_mesh_at(model_dir, object_name, world_object_pose):
+        model_path = os.path.join(model_dir, object_name, "mesh.obj")
+        mesh = trimesh.load(model_path)
+        mesh.apply_transform(world_object_pose)
+        return mesh
+
+    @staticmethod
+    def get_bbox_diag(model_dir, object_name):
+        model_path = os.path.join(model_dir, object_name, "mesh.obj")
+        mesh = trimesh.load(model_path)
+        bbox = mesh.bounding_box.extents
+        diagonal_length = np.linalg.norm(bbox)
+        return diagonal_length
+
+    @staticmethod
+    def load_scene_info(root, scene_name):
+        scene_info_path = os.path.join(root, scene_name, "scene_info.json")
+        with open(scene_info_path, "r") as f:
+            scene_info = json.load(f)
+        return scene_info
+
+    @staticmethod
+    def load_target_pts_num_dict(root, scene_name):
+        target_pts_num_path = os.path.join(root, scene_name, "target_pts_num.json")
+        with open(target_pts_num_path, "r") as f:
+            target_pts_num_dict = json.load(f)
+        return target_pts_num_dict
+
+    @staticmethod
+    def load_depth(path, min_depth=0.01, max_depth=5.0, binocular=False):
+
+        def load_depth_from_real_path(real_path, min_depth, max_depth):
+            depth = cv2.imread(real_path, cv2.IMREAD_UNCHANGED)
+            depth = depth.astype(np.float32) / 65535.0
+            min_depth = min_depth
+            max_depth = max_depth
+            depth_meters = min_depth + (max_depth - min_depth) * depth
+            return depth_meters
+
+        if binocular:
+            depth_path_L = os.path.join(
+                os.path.dirname(path), "depth", os.path.basename(path) + "_L.png"
+            )
+            depth_path_R = os.path.join(
+                os.path.dirname(path), "depth", os.path.basename(path) + "_R.png"
+            )
+            depth_meters_L = load_depth_from_real_path(
+                depth_path_L, min_depth, max_depth
+            )
+            depth_meters_R = load_depth_from_real_path(
+                depth_path_R, min_depth, max_depth
+            )
+            return depth_meters_L, depth_meters_R
+        else:
+            depth_path = os.path.join(
+                os.path.dirname(path), "depth", os.path.basename(path) + ".png"
+            )
+            depth_meters = load_depth_from_real_path(depth_path, min_depth, max_depth)
+            return depth_meters
+
+    @staticmethod
+    def load_seg(path, binocular=False, left_only=False):
+        if binocular and not left_only:
+
+            def clean_mask(mask_image):
+                green = [0, 255, 0, 255]
+                red = [255, 0, 0, 255]
+                threshold = 2
+                mask_image = np.where(
+                    np.abs(mask_image - green) <= threshold, green, mask_image
+                )
+                mask_image = np.where(
+                    np.abs(mask_image - red) <= threshold, red, mask_image
+                )
+                return mask_image
+
+            mask_path_L = os.path.join(
+                os.path.dirname(path), "mask", os.path.basename(path) + "_L.png"
+            )
+            mask_image_L = clean_mask(cv2.imread(mask_path_L, cv2.IMREAD_UNCHANGED))
+            mask_path_R = os.path.join(
+                os.path.dirname(path), "mask", os.path.basename(path) + "_R.png"
+            )
+            mask_image_R = clean_mask(cv2.imread(mask_path_R, cv2.IMREAD_UNCHANGED))
+            return mask_image_L, mask_image_R
+        else:
+            if binocular and left_only:
+                mask_path = os.path.join(
+                    os.path.dirname(path), "mask", os.path.basename(path) + "_L.png"
+                )
+            else:
+                mask_path = os.path.join(
+                    os.path.dirname(path), "mask", os.path.basename(path) + ".png"
+                )
+            mask_image = cv2.imread(mask_path, cv2.IMREAD_UNCHANGED)
+            return mask_image
+        
+    @staticmethod
+    def load_exr_image(file_path):
+        exr_file = OpenEXR.InputFile(file_path)
+        header = exr_file.header()
+        dw = header['dataWindow']
+        width = dw.max.x - dw.min.x + 1
+        height = dw.max.y - dw.min.y + 1
+        float_channels = ['R', 'G', 'B']
+        img_data = []
+        for channel in float_channels:
+            channel_data = exr_file.channel(channel)
+            img_data.append(np.frombuffer(channel_data, dtype=np.float16).reshape((height, width)))
+        img = np.stack(img_data, axis=-1)
+        return img    
+    
+    @staticmethod
+    def load_normal(path, binocular=False, left_only=False, file_type="exr"):
+        if binocular and not left_only:
+            normal_path_L = os.path.join(
+                os.path.dirname(path), "normal", os.path.basename(path) + f"_L.{file_type}"
+            )
+            normal_image_L = DataLoadUtil.load_exr_image(normal_path_L)
+            
+            normal_path_R = os.path.join(
+                os.path.dirname(path), "normal", os.path.basename(path) + f"_R.{file_type}"
+            )
+            normal_image_R = DataLoadUtil.load_exr_image(normal_path_R)
+            normalized_normal_image_L = normal_image_L * 2.0 - 1.0
+            normalized_normal_image_R = normal_image_R * 2.0 - 1.0
+            return normalized_normal_image_L, normalized_normal_image_R
+        else:
+            if binocular and left_only:
+                normal_path = os.path.join(
+                    os.path.dirname(path), "normal", os.path.basename(path) + f"_L.{file_type}"
+                )
+            else:
+                normal_path = os.path.join(
+                    os.path.dirname(path), "normal", os.path.basename(path) + f".{file_type}"
+                )
+            normal_image = DataLoadUtil.load_exr_image(normal_path)
+            normalized_normal_image = normal_image * 2.0 - 1.0
+            return normalized_normal_image
+
+    @staticmethod
+    def load_label(path):
+        with open(path, "r") as f:
+            label_data = json.load(f)
+        return label_data
+
+    @staticmethod
+    def load_from_preprocessed_pts(path, file_type="npy"):
+        npy_path = os.path.join(
+            os.path.dirname(path), "pts", os.path.basename(path) + "." + file_type
+        )
+        if file_type == "txt":
+            pts = np.loadtxt(npy_path)
+        else:
+            pts = np.load(npy_path)
+        return pts
+
+    @staticmethod
+    def cam_pose_transformation(cam_pose_before):
+        offset = np.asarray([[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]])
+        cam_pose_after = cam_pose_before @ offset
+        return cam_pose_after
+
+    @staticmethod
+    def load_cam_info(path, binocular=False, display_table_as_world_space_origin=True):
+        scene_dir = os.path.dirname(path)
+        root_dir = os.path.dirname(scene_dir)
+        scene_name = os.path.basename(scene_dir)
+        camera_params_path = os.path.join(
+            os.path.dirname(path), "camera_params", os.path.basename(path) + ".json"
+        )
+        with open(camera_params_path, "r") as f:
+            label_data = json.load(f)
+        cam_to_world = np.asarray(label_data["extrinsic"])
+        cam_to_world = DataLoadUtil.cam_pose_transformation(cam_to_world)
+        
+        if display_table_as_world_space_origin:
+            world_to_display_table = np.eye(4)
+            world_to_display_table[:3, 3] = -DataLoadUtil.get_display_table_top(
+                root_dir, scene_name
+            )
+            cam_to_world = np.dot(world_to_display_table, cam_to_world)
+        cam_intrinsic = np.asarray(label_data["intrinsic"])
+        cam_info = {
+            "cam_to_world": cam_to_world,
+            "cam_intrinsic": cam_intrinsic,
+            "far_plane": label_data["far_plane"],
+            "near_plane": label_data["near_plane"],
+        }
+        if binocular:
+            cam_to_world_R = np.asarray(label_data["extrinsic_R"])
+            cam_to_world_R = DataLoadUtil.cam_pose_transformation(cam_to_world_R)
+            cam_to_world_O = np.asarray(label_data["extrinsic_cam_object"])
+            cam_to_world_O = DataLoadUtil.cam_pose_transformation(cam_to_world_O)
+            if display_table_as_world_space_origin:
+                cam_to_world_O = np.dot(world_to_display_table, cam_to_world_O)
+                cam_to_world_R = np.dot(world_to_display_table, cam_to_world_R)
+            cam_info["cam_to_world_O"] = cam_to_world_O
+            cam_info["cam_to_world_R"] = cam_to_world_R
+        return cam_info
+
+    @staticmethod
+    def get_real_cam_O_from_cam_L(
+        cam_L, cam_O_to_cam_L, scene_path, display_table_as_world_space_origin=True
+    ):
+        root_dir = os.path.dirname(scene_path)
+        scene_name = os.path.basename(scene_path)
+        if isinstance(cam_L, torch.Tensor):
+            cam_L = cam_L.cpu().numpy()
+        nO_to_display_table_pose = cam_L @ cam_O_to_cam_L
+        if display_table_as_world_space_origin:
+            display_table_to_world = np.eye(4)
+            display_table_to_world[:3, 3] = DataLoadUtil.get_display_table_top(
+                root_dir, scene_name
+            )
+            nO_to_world_pose = np.dot(display_table_to_world, nO_to_display_table_pose)
+        nO_to_world_pose = DataLoadUtil.cam_pose_transformation(nO_to_world_pose)
+        return nO_to_world_pose
+
+    @staticmethod
+    def get_target_point_cloud(
+        depth, cam_intrinsic, cam_extrinsic, mask, target_mask_label=(0, 255, 0, 255), require_full_points=False
+    ):
+        h, w = depth.shape
+        i, j = np.meshgrid(np.arange(w), np.arange(h), indexing="xy")
+
+        z = depth
+        x = (i - cam_intrinsic[0, 2]) * z / cam_intrinsic[0, 0]
+        y = (j - cam_intrinsic[1, 2]) * z / cam_intrinsic[1, 1]
+
+        points_camera = np.stack((x, y, z), axis=-1).reshape(-1, 3)
+        mask = mask.reshape(-1, 4)
+
+        target_mask = (mask == target_mask_label).all(axis=-1)
+
+        target_points_camera = points_camera[target_mask]
+        target_points_camera_aug = np.concatenate(
+            [target_points_camera, np.ones((target_points_camera.shape[0], 1))], axis=-1
+        )
+
+        target_points_world = np.dot(cam_extrinsic, target_points_camera_aug.T).T[:, :3]
+        data = {
+            "points_world": target_points_world,
+            "points_camera": target_points_camera,
+        }
+        return data
+
+    @staticmethod
+    def get_point_cloud(depth, cam_intrinsic, cam_extrinsic):
+        h, w = depth.shape
+        i, j = np.meshgrid(np.arange(w), np.arange(h), indexing="xy")
+
+        z = depth
+        x = (i - cam_intrinsic[0, 2]) * z / cam_intrinsic[0, 0]
+        y = (j - cam_intrinsic[1, 2]) * z / cam_intrinsic[1, 1]
+
+        points_camera = np.stack((x, y, z), axis=-1).reshape(-1, 3)
+        points_camera_aug = np.concatenate(
+            [points_camera, np.ones((points_camera.shape[0], 1))], axis=-1
+        )
+
+        points_world = np.dot(cam_extrinsic, points_camera_aug.T).T[:, :3]
+        return {"points_world": points_world, "points_camera": points_camera}
+
+    @staticmethod
+    def get_target_point_cloud_world_from_path(
+        path,
+        binocular=False,
+        random_downsample_N=65536,
+        voxel_size=0.005,
+        target_mask_label=(0, 255, 0, 255),
+        display_table_mask_label=(0, 0, 255, 255),
+        get_display_table_pts=False,
+        require_normal=False,
+    ):
+        cam_info = DataLoadUtil.load_cam_info(path, binocular=binocular)
+        if binocular:
+            depth_L, depth_R = DataLoadUtil.load_depth(
+                path, cam_info["near_plane"], cam_info["far_plane"], binocular=True
+            )
+            mask_L, mask_R = DataLoadUtil.load_seg(path, binocular=True)
+            point_cloud_L = DataLoadUtil.get_target_point_cloud(
+                depth_L,
+                cam_info["cam_intrinsic"],
+                cam_info["cam_to_world"],
+                mask_L,
+                target_mask_label,
+            )["points_world"]
+            point_cloud_R = DataLoadUtil.get_target_point_cloud(
+                depth_R,
+                cam_info["cam_intrinsic"],
+                cam_info["cam_to_world_R"],
+                mask_R,
+                target_mask_label,
+            )["points_world"]
+            point_cloud_L = PtsUtil.random_downsample_point_cloud(
+                point_cloud_L, random_downsample_N
+            )
+            point_cloud_R = PtsUtil.random_downsample_point_cloud(
+                point_cloud_R, random_downsample_N
+            )
+            overlap_points = PtsUtil.get_overlapping_points(
+                point_cloud_L, point_cloud_R, voxel_size
+            )
+            return overlap_points
+        else:
+            depth = DataLoadUtil.load_depth(
+                path, cam_info["near_plane"], cam_info["far_plane"]
+            )
+            mask = DataLoadUtil.load_seg(path)
+            point_cloud = DataLoadUtil.get_target_point_cloud(
+                depth, cam_info["cam_intrinsic"], cam_info["cam_to_world"], mask
+            )["points_world"]
+            return point_cloud
+
+    @staticmethod
+    def load_points_normals(root, scene_name, display_table_as_world_space_origin=True):
+        points_path = os.path.join(root, scene_name, "points_and_normals.txt")
+        points_normals = np.loadtxt(points_path)
+        if display_table_as_world_space_origin:
+            points_normals[:, :3] = points_normals[
+                :, :3
+            ] - DataLoadUtil.get_display_table_top(root, scene_name)
+        return points_normals
--- a/utils/inference_util.py
+++ b/utils/inference_util.py
--- a/utils/pose_util.py
+++ b/utils/pose_util.py
@@ -0,0 +1,164 @@
+import numpy as np
+
+class PoseUtil:
+    ROTATION = 1
+    TRANSLATION = 2
+    SCALE = 3
+
+    @staticmethod
+    def get_uniform_translation(trans_m_min, trans_m_max, trans_unit, debug=False):
+        if isinstance(trans_m_min, list):
+            x_min, y_min, z_min = trans_m_min
+            x_max, y_max, z_max = trans_m_max
+        else:
+            x_min, y_min, z_min = trans_m_min, trans_m_min, trans_m_min
+            x_max, y_max, z_max = trans_m_max, trans_m_max, trans_m_max
+
+        x = np.random.uniform(x_min, x_max)
+        y = np.random.uniform(y_min, y_max)
+        z = np.random.uniform(z_min, z_max)
+        translation = np.array([x, y, z])
+        if trans_unit == "cm":
+            translation = translation / 100
+        if debug:
+            print("uniform translation:", translation)
+        return translation
+
+    @staticmethod
+    def get_uniform_rotation(rot_degree_min=0, rot_degree_max=180, debug=False):
+        axis = np.random.randn(3)
+        axis /= np.linalg.norm(axis)
+        theta = np.random.uniform(
+            rot_degree_min / 180 * np.pi, rot_degree_max / 180 * np.pi
+        )
+
+        K = np.array(
+            [[0, -axis[2], axis[1]], [axis[2], 0, -axis[0]], [-axis[1], axis[0], 0]]
+        )
+        R = np.eye(3) + np.sin(theta) * K + (1 - np.cos(theta)) * (K @ K)
+        if debug:
+            print("uniform rotation:", theta * 180 / np.pi)
+        return R
+
+    @staticmethod
+    def get_uniform_pose(
+        trans_min, trans_max, rot_min=0, rot_max=180, trans_unit="cm", debug=False
+    ):
+        translation = PoseUtil.get_uniform_translation(
+            trans_min, trans_max, trans_unit, debug
+        )
+        rotation = PoseUtil.get_uniform_rotation(rot_min, rot_max, debug)
+        pose = np.eye(4)
+        pose[:3, :3] = rotation
+        pose[:3, 3] = translation
+        return pose
+
+    @staticmethod
+    def get_n_uniform_pose(
+        trans_min,
+        trans_max,
+        rot_min=0,
+        rot_max=180,
+        n=1,
+        trans_unit="cm",
+        fix=None,
+        contain_canonical=True,
+        debug=False,
+    ):
+        if fix == PoseUtil.ROTATION:
+            translations = np.zeros((n, 3))
+            for i in range(n):
+                translations[i] = PoseUtil.get_uniform_translation(
+                    trans_min, trans_max, trans_unit, debug
+                )
+            if contain_canonical:
+                translations[0] = np.zeros(3)
+            rotations = PoseUtil.get_uniform_rotation(rot_min, rot_max, debug)
+        elif fix == PoseUtil.TRANSLATION:
+            rotations = np.zeros((n, 3, 3))
+            for i in range(n):
+                rotations[i] = PoseUtil.get_uniform_rotation(rot_min, rot_max, debug)
+            if contain_canonical:
+                rotations[0] = np.eye(3)
+            translations = PoseUtil.get_uniform_translation(
+                trans_min, trans_max, trans_unit, debug
+            )
+        else:
+            translations = np.zeros((n, 3))
+            rotations = np.zeros((n, 3, 3))
+            for i in range(n):
+                translations[i] = PoseUtil.get_uniform_translation(
+                    trans_min, trans_max, trans_unit, debug
+                )
+            for i in range(n):
+                rotations[i] = PoseUtil.get_uniform_rotation(rot_min, rot_max, debug)
+            if contain_canonical:
+                translations[0] = np.zeros(3)
+                rotations[0] = np.eye(3)
+
+        pose = np.eye(4, 4, k=0)[np.newaxis, :].repeat(n, axis=0)
+        pose[:, :3, :3] = rotations
+        pose[:, :3, 3] = translations
+
+        return pose
+
+    @staticmethod
+    def get_n_uniform_pose_batch(
+        trans_min,
+        trans_max,
+        rot_min=0,
+        rot_max=180,
+        n=1,
+        batch_size=1,
+        trans_unit="cm",
+        fix=None,
+        contain_canonical=False,
+        debug=False,
+    ):
+
+        batch_poses = []
+        for i in range(batch_size):
+            pose = PoseUtil.get_n_uniform_pose(
+                trans_min,
+                trans_max,
+                rot_min,
+                rot_max,
+                n,
+                trans_unit,
+                fix,
+                contain_canonical,
+                debug,
+            )
+            batch_poses.append(pose)
+        pose_batch = np.stack(batch_poses, axis=0)
+        return pose_batch
+
+    @staticmethod
+    def get_uniform_scale(scale_min, scale_max, debug=False):
+        if isinstance(scale_min, list):
+            x_min, y_min, z_min = scale_min
+            x_max, y_max, z_max = scale_max
+        else:
+            x_min, y_min, z_min = scale_min, scale_min, scale_min
+            x_max, y_max, z_max = scale_max, scale_max, scale_max
+
+        x = np.random.uniform(x_min, x_max)
+        y = np.random.uniform(y_min, y_max)
+        z = np.random.uniform(z_min, z_max)
+        scale = np.array([x, y, z])
+        if debug:
+            print("uniform scale:", scale)
+        return scale
+
+    @staticmethod
+    def rotation_6d_to_matrix_numpy(d6):
+        a1, a2 = d6[:3], d6[3:]
+        b1 = a1 / np.linalg.norm(a1)
+        b2 = a2 - np.dot(b1, a2) * b1
+        b2 = b2 / np.linalg.norm(b2)
+        b3 = np.cross(b1, b2)
+        return np.stack((b1, b2, b3), axis=-2)
+
+    @staticmethod
+    def matrix_to_rotation_6d_numpy(matrix):
+        return np.copy(matrix[:2, :]).reshape((6,))
--- a/utils/preprocess_util.py
+++ b/utils/preprocess_util.py
@@ -0,0 +1,247 @@
+import os
+import numpy as np
+import time
+import sys
+np.random.seed(0)
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from utils.reconstruction_util import ReconstructionUtil
+from utils.data_load import DataLoadUtil
+from utils.pts_util import PtsUtil
+from PytorchBoot.utils.log_util import Log
+
+def save_np_pts(path, pts: np.ndarray, file_type="txt"):
+    if file_type == "txt":
+        np.savetxt(path, pts)
+    else:
+        np.save(path, pts)
+
+def save_target_points(root, scene, frame_idx, target_points: np.ndarray, file_type="txt"):
+    #import ipdb;ipdb.set_trace()
+    pts_path = os.path.join(root,scene, "pts", f"{frame_idx}.{file_type}")
+    if not os.path.exists(os.path.join(root,scene, "pts")):
+        os.makedirs(os.path.join(root,scene, "pts"))
+    save_np_pts(pts_path, target_points, file_type)
+    
+def save_scan_points_indices(root, scene, frame_idx, scan_points_indices: np.ndarray, file_type="txt"):
+    file_type="npy"
+    indices_path = os.path.join(root,scene, "scan_points_indices", f"{frame_idx}.{file_type}")
+    if not os.path.exists(os.path.join(root,scene, "scan_points_indices")):
+        os.makedirs(os.path.join(root,scene, "scan_points_indices"))
+    save_np_pts(indices_path, scan_points_indices, file_type)
+
+def save_scan_points(root, scene, scan_points: np.ndarray):
+    scan_points_path = os.path.join(root,scene, "scan_points.txt")
+    save_np_pts(scan_points_path, scan_points)
+
+def get_world_points(depth, mask, cam_intrinsic, cam_extrinsic, random_downsample_N):
+    z = depth[mask]
+    i, j = np.nonzero(mask)
+    x = (j - cam_intrinsic[0, 2]) * z / cam_intrinsic[0, 0]
+    y = (i - cam_intrinsic[1, 2]) * z / cam_intrinsic[1, 1]
+
+    points_camera = np.stack((x, y, z), axis=-1).reshape(-1, 3)
+    sampled_target_points = PtsUtil.random_downsample_point_cloud(
+                points_camera, random_downsample_N
+            )
+    points_camera_aug = np.concatenate((sampled_target_points, np.ones((sampled_target_points.shape[0], 1))), axis=-1)
+    points_world = np.dot(cam_extrinsic, points_camera_aug.T).T[:, :3]
+    
+    return points_world
+
+def get_world_points_and_normals(depth, normal, mask, cam_intrinsic, cam_extrinsic, random_downsample_N):
+    z = depth[mask]
+    i, j = np.nonzero(mask)
+    x = (j - cam_intrinsic[0, 2]) * z / cam_intrinsic[0, 0]
+    y = (i - cam_intrinsic[1, 2]) * z / cam_intrinsic[1, 1]
+
+    points_camera = np.stack((x, y, z), axis=-1).reshape(-1, 3)
+    normals_camera = normal[mask].reshape(-1, 3)
+    sampled_target_points, idx = PtsUtil.random_downsample_point_cloud(
+                points_camera, random_downsample_N, require_idx=True
+            )
+    if len(sampled_target_points) == 0:
+        return np.zeros((0, 3)), np.zeros((0, 3))
+    sampled_target_normals = normals_camera[idx]
+    
+    points_camera_aug = np.concatenate((sampled_target_points, np.ones((sampled_target_points.shape[0], 1))), axis=-1)
+    points_world = np.dot(cam_extrinsic, points_camera_aug.T).T[:, :3]
+    
+    
+    return points_world, sampled_target_normals
+
+def get_scan_points_indices(scan_points, mask, display_table_mask_label, cam_intrinsic, cam_extrinsic):
+    scan_points_homogeneous = np.hstack((scan_points, np.ones((scan_points.shape[0], 1))))
+    points_camera = np.dot(np.linalg.inv(cam_extrinsic), scan_points_homogeneous.T).T[:, :3]
+    points_image_homogeneous = np.dot(cam_intrinsic, points_camera.T).T
+    points_image_homogeneous /= points_image_homogeneous[:, 2:]
+    pixel_x = points_image_homogeneous[:, 0].astype(int)
+    pixel_y = points_image_homogeneous[:, 1].astype(int)
+    h, w = mask.shape[:2]
+    valid_indices = (pixel_x >= 0) & (pixel_x < w) & (pixel_y >= 0) & (pixel_y < h)
+    mask_colors = mask[pixel_y[valid_indices], pixel_x[valid_indices]]
+    selected_points_indices = np.where((mask_colors == display_table_mask_label).all(axis=-1))[0]
+    selected_points_indices = np.where(valid_indices)[0][selected_points_indices]
+    return selected_points_indices
+
+def save_scene_data(root, scene, file_type="npy"):
+    
+    ''' configuration '''
+    target_mask_label = (0, 255, 0, 255)
+    display_table_mask_label=(0, 0, 255, 255)
+    random_downsample_N = 32768
+    voxel_size=0.002
+    filter_degree = 90
+    min_z = 0.25
+    max_z = 0.5
+    
+    ''' scan points '''
+    display_table_info = DataLoadUtil.get_display_table_info(root, scene)
+    # radius = display_table_info["radius"]     #FIXME
+    radius = 0.25
+    
+    scan_points = np.asarray(ReconstructionUtil.generate_scan_points(display_table_top=0,display_table_radius=radius))
+    
+    ''' read frame data(depth|mask|normal) '''
+    frame_num = DataLoadUtil.get_scene_seq_length(root, scene)
+    for frame_id in range(frame_num):
+        Log.info(f"frame({frame_id}/{frame_num})]Processing {scene} frame {frame_id}")
+        path = DataLoadUtil.get_path(root, scene, frame_id)
+        cam_info = DataLoadUtil.load_cam_info(path, binocular=True)
+        depth_L, depth_R = DataLoadUtil.load_depth(
+                path, cam_info["near_plane"], 
+                cam_info["far_plane"], 
+                binocular=True
+            )
+        mask_L, mask_R = DataLoadUtil.load_seg(path, binocular=True)
+        normal_L = DataLoadUtil.load_normal(path, binocular=True, left_only=True)
+        
+        ''' target points '''
+        mask_img_L = mask_L
+        mask_img_R = mask_R
+
+        target_mask_img_L = (mask_L == target_mask_label).all(axis=-1)
+        target_mask_img_R = (mask_R == target_mask_label).all(axis=-1)
+        
+        
+        sampled_target_points_L, sampled_target_normals_L = get_world_points_and_normals(depth_L, normal_L, target_mask_img_L, cam_info["cam_intrinsic"], cam_info["cam_to_world"], random_downsample_N)
+        sampled_target_points_R = get_world_points(depth_R, target_mask_img_R, cam_info["cam_intrinsic"], cam_info["cam_to_world_R"], random_downsample_N)
+
+
+        has_points = sampled_target_points_L.shape[0] > 0 and sampled_target_points_R.shape[0] > 0
+        if has_points:
+            target_points, overlap_idx = PtsUtil.get_overlapping_points(
+                    sampled_target_points_L, sampled_target_points_R, voxel_size, require_idx=True
+                )
+            target_normals = sampled_target_normals_L[overlap_idx]
+
+        if has_points:
+            has_points = target_points.shape[0] > 0
+
+        if has_points:
+            target_points, target_normals = PtsUtil.filter_points(
+                target_points, target_normals, cam_info["cam_to_world"], theta_limit = filter_degree, z_range=(min_z, max_z)
+                )
+
+
+        ''' scan points indices '''
+        # scan_points_indices_L = get_scan_points_indices(scan_points, mask_img_L, display_table_mask_label, cam_info["cam_intrinsic"], cam_info["cam_to_world"]) 
+        # scan_points_indices_R = get_scan_points_indices(scan_points, mask_img_R, display_table_mask_label, cam_info["cam_intrinsic"], cam_info["cam_to_world_R"])
+        # scan_points_indices = np.intersect1d(scan_points_indices_L, scan_points_indices_R)
+        
+        if not has_points:
+            target_points = np.zeros((0, 3))
+            target_normals = np.zeros((0, 3))
+            
+        save_target_points(root, scene, frame_id, target_points, file_type=file_type)
+        #save_scan_points_indices(root, scene, frame_id, scan_points_indices, file_type=file_type)
+    
+    save_scan_points(root, scene, scan_points) # The "done" flag of scene preprocess
+
+# def process_frame(frame_id, root, scene, scan_points, file_type, target_mask_label, display_table_mask_label, random_downsample_N, voxel_size, filter_degree, min_z, max_z):
+#     Log.info(f"[frame({frame_id})]Processing {scene} frame {frame_id}")
+#     path = DataLoadUtil.get_path(root, scene, frame_id)
+#     cam_info = DataLoadUtil.load_cam_info(path, binocular=True)
+#     depth_L, depth_R = DataLoadUtil.load_depth(
+#             path, cam_info["near_plane"], 
+#             cam_info["far_plane"], 
+#             binocular=True
+#         )
+#     mask_L, mask_R = DataLoadUtil.load_seg(path, binocular=True)
+
+#     target_mask_img_L = (mask_L == target_mask_label).all(axis=-1)
+#     target_mask_img_R = (mask_R == target_mask_label).all(axis=-1)
+
+#     sampled_target_points_L = get_world_points(depth_L, target_mask_img_L, cam_info["cam_intrinsic"], cam_info["cam_to_world"], random_downsample_N)
+#     sampled_target_points_R = get_world_points(depth_R, target_mask_img_R, cam_info["cam_intrinsic"], cam_info["cam_to_world_R"], random_downsample_N)
+
+#     has_points = sampled_target_points_L.shape[0] > 0 and sampled_target_points_R.shape[0] > 0
+#     target_points = np.zeros((0, 3))
+
+#     if has_points:
+#         target_points = PtsUtil.get_overlapping_points(sampled_target_points_L, sampled_target_points_R, voxel_size)
+
+#     if has_points and target_points.shape[0] > 0:
+#         points_normals = DataLoadUtil.load_points_normals(root, scene, display_table_as_world_space_origin=True)
+#         target_points = PtsUtil.filter_points(
+#             target_points, points_normals, cam_info["cam_to_world"], voxel_size=0.002, theta=filter_degree, z_range=(min_z, max_z)
+#         )
+
+#     scan_points_indices_L = get_scan_points_indices(scan_points, mask_L, display_table_mask_label, cam_info["cam_intrinsic"], cam_info["cam_to_world"]) 
+#     scan_points_indices_R = get_scan_points_indices(scan_points, mask_R, display_table_mask_label, cam_info["cam_intrinsic"], cam_info["cam_to_world_R"])
+#     scan_points_indices = np.intersect1d(scan_points_indices_L, scan_points_indices_R)
+
+#     save_target_points(root, scene, frame_id, target_points, file_type=file_type)
+#     save_scan_points_indices(root, scene, frame_id, scan_points_indices, file_type=file_type)
+
+# def save_scene_data_multithread(root, scene, file_type="txt"):
+#     target_mask_label = (0, 255, 0, 255)
+#     display_table_mask_label = (0, 0, 255, 255)
+#     random_downsample_N = 32768
+#     voxel_size = 0.002
+#     filter_degree = 75
+#     min_z = 0.2
+#     max_z = 0.5
+
+#     display_table_info = DataLoadUtil.get_display_table_info(root, scene)
+#     radius = display_table_info["radius"]
+#     scan_points = np.asarray(ReconstructionUtil.generate_scan_points(display_table_top=0, display_table_radius=radius))
+#     frame_num = DataLoadUtil.get_scene_seq_length(root, scene)
+
+#     with ThreadPoolExecutor() as executor:
+#         futures = {executor.submit(process_frame, frame_id, root, scene, scan_points, file_type, target_mask_label, display_table_mask_label, random_downsample_N, voxel_size, filter_degree, min_z, max_z): frame_id for frame_id in range(frame_num)}
+        
+#         for future in as_completed(futures):
+#             frame_id = futures[future]
+#             try:
+#                 future.result()
+#             except Exception as e:
+#                 Log.error(f"Error processing frame {frame_id}: {e}")
+
+#     save_scan_points(root, scene, scan_points)  # The "done" flag of scene preprocess
+
+
+
+if __name__ == "__main__":
+    #root = "/media/hofee/repository/new_data_with_normal"
+    root = r"/home/yan20/nbv_rec/project/franka_control/temp_debug"
+    # list_path = r"/media/hofee/repository/full_list.txt"
+    # scene_list = []
+    
+    # with open(list_path, "r") as f:
+    #     for line in f:
+    #         scene_list.append(line.strip())
+    scene_list = os.listdir(root)
+    from_idx = 0 # 1000
+    to_idx = 1 # 1500
+
+
+    cnt = 0
+    import time
+    total = to_idx - from_idx
+    for scene in scene_list[from_idx:to_idx]:
+        start = time.time()
+        save_scene_data(root, scene, file_type="npy")
+        cnt+=1
+        end = time.time()
+        print(f"Time cost: {end-start}")
--- a/utils/pts_util.py
+++ b/utils/pts_util.py
@@ -0,0 +1,308 @@
+import numpy as np
+import open3d as o3d
+import torch
+import trimesh
+from scipy.spatial import cKDTree
+from utils.pose_util import PoseUtil
+
+
+class PtsUtil:
+
+    @staticmethod
+    def voxel_downsample_point_cloud_old(point_cloud, voxel_size=0.005):
+        o3d_pc = o3d.geometry.PointCloud()
+        o3d_pc.points = o3d.utility.Vector3dVector(point_cloud)
+        downsampled_pc = o3d_pc.voxel_down_sample(voxel_size)
+        return np.asarray(downsampled_pc.points)
+    
+    @staticmethod
+    def voxel_downsample_point_cloud(point_cloud, voxel_size=0.005, require_idx=False):
+        voxel_indices = np.floor(point_cloud / voxel_size).astype(np.int32)
+        if require_idx:
+            _, inverse, counts = np.unique(voxel_indices, axis=0, return_inverse=True, return_counts=True)
+            idx_sort = np.argsort(inverse)
+            idx_unique = idx_sort[np.cumsum(counts)-counts]
+            downsampled_points = point_cloud[idx_unique]
+            return downsampled_points, idx_unique
+        else:
+            unique_voxels = np.unique(voxel_indices, axis=0, return_inverse=True)
+            return unique_voxels[0]*voxel_size
+
+    @staticmethod
+    def random_downsample_point_cloud(point_cloud, num_points, require_idx=False):
+        if point_cloud.shape[0] == 0:
+            if require_idx:
+                return point_cloud, np.array([])
+            return point_cloud
+        idx = np.random.choice(len(point_cloud), num_points, replace=True)
+        if require_idx:
+            return point_cloud[idx], idx
+        return point_cloud[idx]
+
+    @staticmethod
+    def fps_downsample_point_cloud(point_cloud, num_points, require_idx=False):
+        N = point_cloud.shape[0]
+        mask = np.zeros(N, dtype=bool)
+
+        sampled_indices = np.zeros(num_points, dtype=int)
+        sampled_indices[0] = np.random.randint(0, N)
+        distances = np.linalg.norm(
+            point_cloud - point_cloud[sampled_indices[0]], axis=1
+        )
+        for i in range(1, num_points):
+            farthest_index = np.argmax(distances)
+            sampled_indices[i] = farthest_index
+            mask[farthest_index] = True
+
+            new_distances = np.linalg.norm(
+                point_cloud - point_cloud[farthest_index], axis=1
+            )
+            distances = np.minimum(distances, new_distances)
+
+        sampled_points = point_cloud[sampled_indices]
+        if require_idx:
+            return sampled_points, sampled_indices
+        return sampled_points
+
+    @staticmethod
+    def random_downsample_point_cloud_tensor(point_cloud, num_points):
+        idx = torch.randint(0, len(point_cloud), (num_points,))
+        return point_cloud[idx]
+
+    @staticmethod
+    def voxelize_points(points, voxel_size):
+        voxel_indices = np.floor(points / voxel_size).astype(np.int32)
+        unique_voxels = np.unique(voxel_indices, axis=0, return_inverse=True)
+        return unique_voxels
+
+    @staticmethod
+    def transform_point_cloud(points, pose_mat):
+        points_h = np.concatenate([points, np.ones((points.shape[0], 1))], axis=1)
+        points_h = np.dot(pose_mat, points_h.T).T
+        return points_h[:, :3]
+
+    @staticmethod
+    def get_overlapping_points(
+        point_cloud_L, point_cloud_R, voxel_size=0.005, require_idx=False
+    ):
+        voxels_L, indices_L = PtsUtil.voxelize_points(point_cloud_L, voxel_size)
+        voxels_R, _ = PtsUtil.voxelize_points(point_cloud_R, voxel_size)
+
+        voxel_indices_L = voxels_L.view([("", voxels_L.dtype)] * 3)
+        voxel_indices_R = voxels_R.view([("", voxels_R.dtype)] * 3)
+        overlapping_voxels = np.intersect1d(voxel_indices_L, voxel_indices_R)
+        mask_L = np.isin(
+            indices_L, np.where(np.isin(voxel_indices_L, overlapping_voxels))[0]
+        )
+        overlapping_points = point_cloud_L[mask_L]
+        if require_idx:
+            return overlapping_points, mask_L
+        return overlapping_points
+
+    @staticmethod
+    def filter_points(points, normals, cam_pose, theta_limit=45, z_range=(0.2, 0.45)):
+        
+        """ filter with normal """ 
+        normals_normalized = normals / np.linalg.norm(normals, axis=1, keepdims=True)
+        cos_theta = np.dot(normals_normalized, np.array([0, 0, 1]))
+        theta = np.arccos(cos_theta) * 180 / np.pi
+        idx = theta < theta_limit
+        filtered_sampled_points = points[idx]
+        filtered_normals = normals[idx]
+        
+        """ filter with z range """
+        points_cam = PtsUtil.transform_point_cloud(filtered_sampled_points, np.linalg.inv(cam_pose))
+        idx = (points_cam[:, 2] > z_range[0]) & (points_cam[:, 2] < z_range[1])
+        z_filtered_points = filtered_sampled_points[idx]
+        z_filtered_normals = filtered_normals[idx]
+        return z_filtered_points[:, :3], z_filtered_normals
+
+
+    @staticmethod
+    def multi_scale_icp(
+        source, target, voxel_size_range, init_transformation=None, steps=20
+    ):
+        pipreg = o3d.pipelines.registration
+
+        if init_transformation is not None:
+            current_transformation = init_transformation
+        else:
+            current_transformation = np.identity(4)
+        cnt = 0
+        best_score = 1e10
+        best_reg = None
+        voxel_sizes = []
+        for i in range(steps):
+            voxel_sizes.append(
+                voxel_size_range[0]
+                + i * (voxel_size_range[1] - voxel_size_range[0]) / steps
+            )
+
+        for voxel_size in voxel_sizes:
+            radius_normal = voxel_size * 2
+            source_downsampled = source.voxel_down_sample(voxel_size)
+            source_downsampled.estimate_normals(
+                search_param=o3d.geometry.KDTreeSearchParamHybrid(
+                    radius=radius_normal, max_nn=30
+                )
+            )
+            target_downsampled = target.voxel_down_sample(voxel_size)
+            target_downsampled.estimate_normals(
+                search_param=o3d.geometry.KDTreeSearchParamHybrid(
+                    radius=radius_normal, max_nn=30
+                )
+            )
+
+            reg_icp = pipreg.registration_icp(
+                source_downsampled,
+                target_downsampled,
+                voxel_size * 2,
+                current_transformation,
+                pipreg.TransformationEstimationPointToPlane(),
+                pipreg.ICPConvergenceCriteria(max_iteration=500),
+            )
+            cnt += 1
+            if reg_icp.fitness == 0:
+                score = 1e10
+            else:
+                score = reg_icp.inlier_rmse / reg_icp.fitness
+            
+            if score < best_score:
+                best_score = score
+                best_reg = reg_icp
+
+        return best_reg, best_score
+
+    @staticmethod
+    def multi_scale_ransac(source_downsampled, target_downsampled, source_fpfh, model_fpfh, voxel_size_range, steps=20):
+        pipreg = o3d.pipelines.registration
+        cnt = 0
+        best_score = 1e10
+        best_reg = None
+        voxel_sizes = []
+        for i in range(steps):
+            voxel_sizes.append(
+                voxel_size_range[0]
+                + i * (voxel_size_range[1] - voxel_size_range[0]) / steps
+            )
+
+        for voxel_size in voxel_sizes:
+            reg_ransac = pipreg.registration_ransac_based_on_feature_matching(
+                source_downsampled,
+                target_downsampled,
+                source_fpfh,
+                model_fpfh,
+                mutual_filter=True,
+                max_correspondence_distance=voxel_size*2,
+                estimation_method=pipreg.TransformationEstimationPointToPoint(False),
+                ransac_n=4,
+                checkers=[pipreg.CorrespondenceCheckerBasedOnEdgeLength(0.9)],
+                criteria=pipreg.RANSACConvergenceCriteria(8000000, 500),
+            )
+            cnt += 1
+            if reg_ransac.fitness == 0:
+                score = 1e10
+            else:
+                score = reg_ransac.inlier_rmse / reg_ransac.fitness
+            if score < best_score:
+                best_score = score
+                best_reg = reg_ransac
+
+        return best_reg, best_score
+
+    @staticmethod
+    def register(pcl: np.ndarray, model: trimesh.Trimesh, voxel_size=0.01):
+        radius_normal = voxel_size * 2
+        pipreg = o3d.pipelines.registration
+        model_pcd = o3d.geometry.PointCloud()
+        model_pcd.points = o3d.utility.Vector3dVector(model.vertices)
+        model_downsampled = model_pcd.voxel_down_sample(voxel_size)
+        model_downsampled.estimate_normals(
+            search_param=o3d.geometry.KDTreeSearchParamHybrid(
+                radius=radius_normal, max_nn=30
+            )
+        )
+        model_fpfh = pipreg.compute_fpfh_feature(
+            model_downsampled,
+            o3d.geometry.KDTreeSearchParamHybrid(radius=radius_normal, max_nn=100),
+        )
+
+        source_pcd = o3d.geometry.PointCloud()
+        source_pcd.points = o3d.utility.Vector3dVector(pcl)
+        source_downsampled = source_pcd.voxel_down_sample(voxel_size)
+        source_downsampled.estimate_normals(
+            search_param=o3d.geometry.KDTreeSearchParamHybrid(
+                radius=radius_normal, max_nn=30
+            )
+        )
+        source_fpfh = pipreg.compute_fpfh_feature(
+            source_downsampled,
+            o3d.geometry.KDTreeSearchParamHybrid(radius=radius_normal, max_nn=100),
+        )
+
+        reg_ransac, ransac_best_score = PtsUtil.multi_scale_ransac(
+            source_downsampled,
+            model_downsampled,
+            source_fpfh,
+            model_fpfh,
+            voxel_size_range=(0.03, 0.005),
+            steps=3,
+        )
+        reg_icp, icp_best_score = PtsUtil.multi_scale_icp(
+            source_downsampled,
+            model_downsampled,
+            voxel_size_range=(0.02, 0.001),
+            init_transformation=reg_ransac.transformation,
+            steps=50,
+        )
+        return reg_icp.transformation
+    
+    @staticmethod
+    def register_fine(pcl: np.ndarray, model: trimesh.Trimesh, voxel_size=0.01):
+        radius_normal = voxel_size * 2
+        pipreg = o3d.pipelines.registration
+        model_pcd = o3d.geometry.PointCloud()
+        model_pcd.points = o3d.utility.Vector3dVector(model.vertices)
+        model_downsampled = model_pcd.voxel_down_sample(voxel_size)
+        model_downsampled.estimate_normals(
+            search_param=o3d.geometry.KDTreeSearchParamHybrid(
+                radius=radius_normal, max_nn=30
+            )
+        )
+        source_pcd = o3d.geometry.PointCloud()
+        source_pcd.points = o3d.utility.Vector3dVector(pcl)
+        source_downsampled = source_pcd.voxel_down_sample(voxel_size)
+        source_downsampled.estimate_normals(
+            search_param=o3d.geometry.KDTreeSearchParamHybrid(
+                radius=radius_normal, max_nn=30
+            )
+        )
+        reg_icp, icp_best_score = PtsUtil.multi_scale_icp(
+            source_downsampled,
+            model_downsampled,
+            voxel_size_range=(0.02, 0.001),
+            init_transformation=np.eye(4),
+            steps=50,
+        )
+        return reg_icp.transformation
+    @staticmethod
+    def get_pts_from_depth(depth, cam_intrinsic, cam_extrinsic):
+        h, w = depth.shape
+        i, j = np.meshgrid(np.arange(w), np.arange(h), indexing="xy")
+
+        z = depth
+        x = (i - cam_intrinsic[0, 2]) * z / cam_intrinsic[0, 0]
+        y = (j - cam_intrinsic[1, 2]) * z / cam_intrinsic[1, 1]
+
+        points_camera = np.stack((x, y, z), axis=-1).reshape(-1, 3)
+        mask = mask.reshape(-1, 4)
+        points_camera = np.concatenate(
+            [points_camera, np.ones((points_camera.shape[0], 1))], axis=-1
+        )
+
+        points_world = np.dot(cam_extrinsic, points_camera.T).T[:, :3]
+        data = {
+            "points_world": points_world,
+            "points_camera": points_camera,
+        }
+        return data
--- a/utils/reconstruction_util.py
+++ b/utils/reconstruction_util.py
@@ -0,0 +1,205 @@
+import numpy as np
+from scipy.spatial import cKDTree
+from utils.pts_util import PtsUtil
+
+class ReconstructionUtil:
+    
+    @staticmethod
+    def compute_coverage_rate(target_point_cloud, combined_point_cloud, threshold=0.01):
+        kdtree = cKDTree(combined_point_cloud)
+        distances, _ = kdtree.query(target_point_cloud)
+        covered_points_num = np.sum(distances < threshold*2)
+        coverage_rate = covered_points_num / target_point_cloud.shape[0]
+        return coverage_rate, covered_points_num
+    
+    @staticmethod
+    def check_overlap(new_point_cloud, combined_point_cloud, overlap_area_threshold=25, voxel_size=0.01):
+        kdtree = cKDTree(combined_point_cloud)
+        distances, _ = kdtree.query(new_point_cloud)
+        overlapping_points = np.sum(distances < voxel_size*2)
+        cm = 0.01
+        voxel_size_cm = voxel_size / cm
+        overlap_area = overlapping_points * voxel_size_cm * voxel_size_cm
+        return overlap_area > overlap_area_threshold
+
+
+    @staticmethod
+    def get_new_added_points(old_combined_pts, new_pts, threshold=0.005):
+        if old_combined_pts.size == 0:
+            return new_pts
+        if new_pts.size == 0:
+            return np.array([])
+
+        tree = cKDTree(old_combined_pts)
+        distances, _ = tree.query(new_pts, k=1)
+        new_added_points = new_pts[distances > threshold]
+        return new_added_points
+    
+    @staticmethod
+    def compute_next_best_view_sequence_with_overlap(target_point_cloud, point_cloud_list, scan_points_indices_list, threshold=0.01, soft_overlap_threshold=0.5, hard_overlap_threshold=0.7, init_view = 0, scan_points_threshold=5, status_info=None):
+        selected_views = [init_view]
+        combined_point_cloud = point_cloud_list[init_view]
+        history_indices = [scan_points_indices_list[init_view]]
+        
+        max_rec_pts = np.vstack(point_cloud_list)
+        downsampled_max_rec_pts = PtsUtil.voxel_downsample_point_cloud(max_rec_pts, threshold)
+        
+        max_rec_pts_num = downsampled_max_rec_pts.shape[0]
+        max_real_rec_pts_coverage, _ = ReconstructionUtil.compute_coverage_rate(target_point_cloud, downsampled_max_rec_pts, threshold)
+        
+        new_coverage, new_covered_num = ReconstructionUtil.compute_coverage_rate(downsampled_max_rec_pts, combined_point_cloud, threshold)
+        current_coverage = new_coverage
+        current_covered_num = new_covered_num
+        
+        remaining_views = list(range(len(point_cloud_list)))
+        view_sequence = [(init_view, current_coverage)]
+        cnt_processed_view = 0
+        remaining_views.remove(init_view)
+        curr_rec_pts_num = combined_point_cloud.shape[0]
+        
+        while remaining_views:
+            best_view = None
+            best_coverage_increase = -1
+            best_combined_point_cloud = None
+            best_covered_num = 0
+        
+            for view_index in remaining_views:
+                if point_cloud_list[view_index].shape[0] == 0:
+                    continue
+                if selected_views:
+                    new_scan_points_indices = scan_points_indices_list[view_index]
+                    if not ReconstructionUtil.check_scan_points_overlap(history_indices, new_scan_points_indices, scan_points_threshold):
+                        overlap_threshold = hard_overlap_threshold
+                    else:
+                        overlap_threshold = soft_overlap_threshold
+                    overlap_rate = ReconstructionUtil.compute_overlap_rate(point_cloud_list[view_index],combined_point_cloud, threshold)
+                    if overlap_rate < overlap_threshold:
+                        continue
+                new_combined_point_cloud = np.vstack([combined_point_cloud, point_cloud_list[view_index]])
+                new_downsampled_combined_point_cloud = PtsUtil.voxel_downsample_point_cloud(new_combined_point_cloud,threshold)
+                new_coverage, new_covered_num = ReconstructionUtil.compute_coverage_rate(downsampled_max_rec_pts, new_downsampled_combined_point_cloud, threshold)
+
+                coverage_increase = new_coverage - current_coverage
+                if coverage_increase > best_coverage_increase:
+                    best_coverage_increase = coverage_increase
+                    best_view = view_index
+                    best_covered_num = new_covered_num
+                    best_combined_point_cloud = new_downsampled_combined_point_cloud
+                    
+            
+            if best_view is not None:
+                if best_coverage_increase <=1e-3 or best_covered_num - current_covered_num <= 5:
+                    break
+                
+                selected_views.append(best_view)
+                best_rec_pts_num = best_combined_point_cloud.shape[0]
+                print(f"Current rec pts num: {curr_rec_pts_num}, Best rec pts num: {best_rec_pts_num}, Best cover pts: {best_covered_num}, Max rec pts num: {max_rec_pts_num}")
+                print(f"Current coverage: {current_coverage}, Best coverage increase: {best_coverage_increase}, Max Real coverage: {max_real_rec_pts_coverage}")
+                current_covered_num = best_covered_num
+                curr_rec_pts_num = best_rec_pts_num
+                combined_point_cloud = best_combined_point_cloud
+                remaining_views.remove(best_view)
+                history_indices.append(scan_points_indices_list[best_view])
+                current_coverage += best_coverage_increase
+                cnt_processed_view += 1
+                if status_info is not None:
+                    sm = status_info["status_manager"]
+                    app_name = status_info["app_name"]
+                    runner_name = status_info["runner_name"]
+                    sm.set_status(app_name, runner_name, "current coverage", current_coverage)
+                    sm.set_progress(app_name, runner_name, "processed view", cnt_processed_view, len(point_cloud_list))
+            
+                view_sequence.append((best_view, current_coverage))
+                
+            else:
+                break
+        if status_info is not None:
+            sm = status_info["status_manager"]
+            app_name = status_info["app_name"]
+            runner_name = status_info["runner_name"]
+            sm.set_progress(app_name, runner_name, "processed view", len(point_cloud_list), len(point_cloud_list))
+        return view_sequence, remaining_views, combined_point_cloud
+
+    @staticmethod
+    def compute_next_best_view_with_overlap(scanned_pts, point_cloud_list, selected_view, history_indices, scan_points_indices_list, threshold=0.01, overlap_area_threshold=25, scan_points_threshold=5):
+        max_rec_pts = np.vstack(point_cloud_list)
+        downsampled_max_rec_pts = PtsUtil.voxel_downsample_point_cloud(max_rec_pts, threshold)
+        best_view = None
+        new_downsampled_combined_point_cloud = PtsUtil.voxel_downsample_point_cloud(scanned_pts,threshold)
+        best_coverage,_ = ReconstructionUtil.compute_coverage_rate(downsampled_max_rec_pts, new_downsampled_combined_point_cloud, threshold)
+        best_covered_num = new_downsampled_combined_point_cloud.shape[0]
+        #import ipdb; ipdb.set_trace()
+        for view in range(len(point_cloud_list)):
+            print(f"Processing view {view}/{len(point_cloud_list)}: {best_coverage},{best_covered_num}")
+            if point_cloud_list[view].shape[0] == 0:
+                continue
+            if view in selected_view:
+                continue
+            
+            new_scan_points_indices = scan_points_indices_list[view]
+            
+            # if not ReconstructionUtil.check_scan_points_overlap(history_indices, new_scan_points_indices, scan_points_threshold):
+            #     curr_overlap_area_threshold = overlap_area_threshold
+            # else:
+            #     curr_overlap_area_threshold = overlap_area_threshold * 0.8
+                
+            # if not ReconstructionUtil.check_overlap(point_cloud_list[view], scanned_pts, overlap_area_threshold = curr_overlap_area_threshold, voxel_size=threshold):
+            #     continue
+            
+            
+            new_combined_point_cloud = np.vstack([scanned_pts ,point_cloud_list[view]])
+            new_downsampled_combined_point_cloud = PtsUtil.voxel_downsample_point_cloud(new_combined_point_cloud,threshold)
+           
+            new_coverage, _ = ReconstructionUtil.compute_coverage_rate(downsampled_max_rec_pts, new_downsampled_combined_point_cloud, threshold)
+            new_covered_num = new_downsampled_combined_point_cloud.shape[0]
+            print("the new coverage_rate: ", new_coverage, " the new covered num: ", new_covered_num, " the fake covered num:" , _)
+            if new_coverage > best_coverage :
+                #import ipdb; ipdb.set_trace()
+                best_coverage = new_coverage
+                best_covered_num = new_covered_num
+                best_view = view
+                
+        return best_view, best_coverage, best_covered_num
+        
+    @staticmethod
+    def generate_scan_points(display_table_top, display_table_radius, min_distance=0.03, max_points_num = 500, max_attempts = 1000):
+        points = []
+        attempts = 0
+        while len(points) < max_points_num and attempts < max_attempts:
+            angle = np.random.uniform(0, 2 * np.pi)
+            r = np.random.uniform(0, display_table_radius)
+            x = r * np.cos(angle)
+            y = r * np.sin(angle)
+            z = display_table_top
+            new_point = (x, y, z)
+            if all(np.linalg.norm(np.array(new_point) - np.array(existing_point)) >= min_distance for existing_point in points):
+                points.append(new_point)
+            attempts += 1
+        return points
+    
+    @staticmethod
+    def compute_covered_scan_points(scan_points, point_cloud, threshold=0.01):
+        
+        tree = cKDTree(point_cloud)
+        covered_points = []
+        indices = []
+        for i, scan_point in enumerate(scan_points):
+            if tree.query_ball_point(scan_point, threshold):
+                covered_points.append(scan_point)
+                indices.append(i)
+        return covered_points, indices
+    
+    @staticmethod
+    def check_scan_points_overlap(history_indices, indices2, threshold=5):
+        try:
+            if len(indices2) == 0:
+                return False
+            for indices1 in history_indices:
+                if len(set(indices1).intersection(set(indices2))) >= threshold:
+                    return True
+        except Exception as e:
+            print(e)
+            import ipdb; ipdb.set_trace()
+        return False
+
+        
--- a/utils/render_util.py
+++ b/utils/render_util.py
@@ -0,0 +1,45 @@
+
+import os
+import json
+import subprocess
+import tempfile
+import shutil
+from utils.data_load import DataLoadUtil
+from utils.pts_util import PtsUtil
+class RenderUtil:
+
+    @staticmethod
+    def render_pts(cam_pose, object_name, script_path, model_points_normals, voxel_threshold=0.005, filter_degree=75, nO_to_nL_pose=None, require_full_scene=False):
+        nO_to_world_pose = DataLoadUtil.get_real_cam_O_from_cam_L(cam_pose, nO_to_nL_pose, scene_path=scene_path)
+        with tempfile.TemporaryDirectory() as temp_dir:
+            params = {
+                "cam_pose": nO_to_world_pose.tolist(),
+                "object_name": scene_path
+            }
+            params_data_path = os.path.join(temp_dir, "params.json")
+            with open(params_data_path, 'w') as f:
+                json.dump(params, f)
+            result = subprocess.run([
+                'blender', '-b', '-P', script_path, '--', temp_dir
+            ], capture_output=True, text=True)
+            if result.returncode != 0:
+                print("Blender script failed:")
+                print(result.stderr)
+                return None
+            path = os.path.join(temp_dir, "tmp")
+            point_cloud = DataLoadUtil.get_target_point_cloud_world_from_path(path, binocular=True)
+            cam_params = DataLoadUtil.load_cam_info(path, binocular=True)
+            
+            filtered_point_cloud = PtsUtil.filter_points(point_cloud, model_points_normals, cam_pose=cam_params["cam_to_world"], voxel_size=voxel_threshold, theta=filter_degree)
+            full_scene_point_cloud = None
+            if require_full_scene:
+                depth_L, depth_R = DataLoadUtil.load_depth(path, cam_params['near_plane'], cam_params['far_plane'], binocular=True)
+                point_cloud_L = DataLoadUtil.get_point_cloud(depth_L, cam_params['cam_intrinsic'], cam_params['cam_to_world'])['points_world']
+                point_cloud_R = DataLoadUtil.get_point_cloud(depth_R, cam_params['cam_intrinsic'], cam_params['cam_to_world_R'])['points_world']
+            
+                point_cloud_L = PtsUtil.random_downsample_point_cloud(point_cloud_L, 65536)
+                point_cloud_R = PtsUtil.random_downsample_point_cloud(point_cloud_R, 65536)
+                full_scene_point_cloud = PtsUtil.get_overlapping_points(point_cloud_L, point_cloud_R)
+
+            
+            return filtered_point_cloud, full_scene_point_cloud
--- a/utils/view_util.py
+++ b/utils/view_util.py
@@ -0,0 +1,132 @@
+import numpy as np
+from scipy.spatial.transform import Rotation as R
+from dataclasses import dataclass
+
+
+@dataclass
+class CameraIntrinsics:
+    width: int
+    height: int
+    fx: float
+    fy: float
+    cx: float
+    cy: float
+
+    @property
+    def intrinsic_matrix(self):
+        return np.array([[self.fx, 0, self.cx], [0, self.fy, self.cy], [0, 0, 1]])
+
+
+@dataclass
+class CameraExtrinsics:
+    def __init__(self, rotation: np.ndarray, translation: np.ndarray, rot_type: str):
+        """
+        rotation: 3x3 rotation matrix or 1x3 euler angles or 1x4 quaternion
+        translation: 1x3 or 3x1 translation vector
+        rot_type: "mat", "euler_xyz", "quat_xyzw"
+        """
+        assert rot_type in ["mat", "euler_xyz", "quat_xyzw"]
+        if rot_type == "mat":
+            self._rot = R.from_matrix(rotation)
+        elif rot_type == "euler_xyz":
+            self._rot = R.from_euler('xyz', rotation, degrees=True)
+        elif rot_type == "quat_xyzw":
+            self._rot = R.from_quat(rotation)
+        self._translation = translation
+
+    @property
+    def extrinsic_matrix(self):
+        return np.vstack([np.hstack([self._rot.as_matrix(), self._translation.reshape(3, 1)]), [0, 0, 0, 1]])
+    
+    @property
+    def rotation_euler_xyz(self):
+        return self._rot.as_euler('xyz', degrees=True)
+    
+    @property
+    def rotation_quat_xyzw(self):
+        return self._rot.as_quat()
+    
+    @property
+    def rotation_matrix(self):
+        return self._rot.as_matrix()
+    
+    @property
+    def translation(self):
+        return self._translation
+    
+
+@dataclass
+class CameraData:
+    def __init__(self, depth_image: np.ndarray, image_id: int, intrinsics: CameraIntrinsics, extrinsics: CameraExtrinsics):
+        self._depth_image = depth_image
+        self._image_id = image_id
+        self._intrinsics = intrinsics
+        self._extrinsics = extrinsics
+
+    @property
+    def depth_image(self):
+        return self._depth_image
+    
+    @property
+    def image_id(self):
+        return self._image_id
+    
+    @property
+    def intrinsics(self):
+        return self._intrinsics.intrinsic_matrix
+    
+    @property
+    def extrinsics(self):
+        return self._extrinsics.extrinsic_matrix
+    
+    @property
+    def projection_matrix(self):
+        return self.intrinsics @ self.extrinsics[:3, :4]
+    
+    @property
+    def pts_camera(self):
+        height, width = self.depth_image.shape
+        v, u = np.indices((height, width))
+        points = np.vstack([u.flatten(), v.flatten(), np.ones_like(u.flatten())]) # 3xN
+        points = np.linalg.inv(self.intrinsics) @ points                          # 3xN
+        points = points.T                                                         # Nx3
+        points = points * self.depth_image.flatten()[:, np.newaxis]               # Nx3
+        points = points[points[:, 2] > 0]                                         # Nx3
+        return points
+    
+    @property
+    def pts_world(self):
+        homogeneous_pts = np.hstack([self.pts_camera, np.ones((self.pts_camera.shape[0], 1))]) # Nx4
+        pts_world = self.extrinsics @ homogeneous_pts.T # 4xN
+        return pts_world[:3, :].T
+    
+class ViewUtil:
+    def get_pts(view_data):
+        image_id = view_data["image_id"]
+        depth_intrinsics = view_data["depth_intrinsics"]
+        depth_extrinsics = view_data["depth_extrinsics"]
+        depth_image = np.array(view_data["depth_image"], dtype=np.uint16)
+        if image_id is None:
+            return None
+        else:
+            camera_intrinsics = CameraIntrinsics(
+                depth_intrinsics['width'],
+                depth_intrinsics['height'],
+                depth_intrinsics['fx'], 
+                depth_intrinsics['fy'], 
+                depth_intrinsics['cx'], 
+                depth_intrinsics['cy']
+            )
+            camera_extrinsics = CameraExtrinsics(
+                depth_extrinsics[:3, :3], 
+                depth_extrinsics[:3, 3],
+                rot_type="mat"
+            )
+            camera_data = CameraData(depth_image, image_id, camera_intrinsics, camera_extrinsics)
+            pts = camera_data.pts_world
+        return pts/1000
+    
+    def get_camera_pose(view_data, translation_scale=0.001):
+        depth_extrinsics = view_data["depth_extrinsics"]
+        depth_extrinsics[:3, 3] *= translation_scale
+        return depth_extrinsics
--- a/utils/vis.py
+++ b/utils/vis.py
@@ -0,0 +1,172 @@
+import numpy as np
+import matplotlib.pyplot as plt
+import sys
+import os
+import trimesh
+
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from utils.data_load import DataLoadUtil
+from utils.pts_util import PtsUtil
+
+class visualizeUtil:
+    
+    @staticmethod
+    def save_all_cam_pos_and_cam_axis(root, scene, output_dir):
+        length = DataLoadUtil.get_scene_seq_length(root, scene)
+        all_cam_pos = []
+        all_cam_axis = []
+        for i in range(length):
+            path = DataLoadUtil.get_path(root, scene, i)
+            cam_info = DataLoadUtil.load_cam_info(path, binocular=True)
+            cam_pose = cam_info["cam_to_world"]
+            cam_pos = cam_pose[:3, 3]
+            cam_axis = cam_pose[:3, 2] 
+            
+            num_samples = 10
+            sample_points = [cam_pos + 0.02*t * cam_axis for t in range(num_samples)]
+            sample_points = np.array(sample_points)
+        
+            all_cam_pos.append(cam_pos)
+            all_cam_axis.append(sample_points)
+        
+        all_cam_pos = np.array(all_cam_pos)
+        all_cam_axis = np.array(all_cam_axis).reshape(-1, 3)
+        np.savetxt(os.path.join(output_dir, "all_cam_pos.txt"), all_cam_pos)
+        np.savetxt(os.path.join(output_dir, "all_cam_axis.txt"), all_cam_axis)
+        
+    @staticmethod
+    def save_all_combined_pts(root, scene, output_dir):
+        length = DataLoadUtil.get_scene_seq_length(root, scene)
+        all_combined_pts = []   
+        for i in range(length):
+            path = DataLoadUtil.get_path(root, scene, i)
+            pts = DataLoadUtil.load_from_preprocessed_pts(path,"npy")
+            if pts.shape[0] == 0:
+                continue
+            all_combined_pts.append(pts)
+        all_combined_pts = np.vstack(all_combined_pts)
+        downsampled_all_pts = PtsUtil.voxel_downsample_point_cloud(all_combined_pts, 0.001)
+        np.savetxt(os.path.join(output_dir, "all_combined_pts.txt"), downsampled_all_pts)
+        
+    @staticmethod
+    def save_target_mesh_at_world_space(
+        root, model_dir, scene_name, display_table_as_world_space_origin=True
+    ):
+        scene_info = DataLoadUtil.load_scene_info(root, scene_name)
+        target_name = scene_info["target_name"]
+        transformation = scene_info[target_name]
+        if display_table_as_world_space_origin:
+            location = transformation["location"] - DataLoadUtil.get_display_table_top(
+                root, scene_name
+            )
+        else:
+            location = transformation["location"]
+        rotation_euler = transformation["rotation_euler"]
+        pose_mat = trimesh.transformations.euler_matrix(*rotation_euler)
+        pose_mat[:3, 3] = location
+
+        mesh = DataLoadUtil.load_mesh_at(model_dir, target_name, pose_mat)
+        mesh_dir = os.path.join(root, scene_name, "mesh")
+        if not os.path.exists(mesh_dir):
+            os.makedirs(mesh_dir)
+        model_path = os.path.join(mesh_dir, "world_target_mesh.obj")
+        mesh.export(model_path)
+
+    @staticmethod
+    def save_points_and_normals(root, scene, frame_idx, output_dir, binocular=False):
+        target_mask_label = (0, 255, 0, 255)
+        path = DataLoadUtil.get_path(root, scene, frame_idx)
+        cam_info = DataLoadUtil.load_cam_info(path, binocular=binocular, display_table_as_world_space_origin=False)
+        depth = DataLoadUtil.load_depth(
+                path, cam_info["near_plane"], 
+                cam_info["far_plane"], 
+                binocular=binocular,
+            )
+        if isinstance(depth, tuple):
+            depth = depth[0]
+            
+        mask = DataLoadUtil.load_seg(path, binocular=binocular, left_only=True)
+        normal = DataLoadUtil.load_normal(path, binocular=binocular, left_only=True)
+        ''' target points '''
+        if mask is None:
+            target_mask_img = np.ones_like(depth, dtype=bool)
+        else:
+            target_mask_img = (mask == target_mask_label).all(axis=-1)
+        cam_intrinsic = cam_info["cam_intrinsic"]
+        z = depth[target_mask_img]
+        i, j = np.nonzero(target_mask_img)
+        x = (j - cam_intrinsic[0, 2]) * z / cam_intrinsic[0, 0]
+        y = (i - cam_intrinsic[1, 2]) * z / cam_intrinsic[1, 1]
+        
+        random_downsample_N = 1000
+
+        points_camera = np.stack((x, y, z), axis=-1).reshape(-1, 3)
+        normal_camera = normal[target_mask_img].reshape(-1, 3)
+        sampled_target_points, idx = PtsUtil.random_downsample_point_cloud(
+                    points_camera, random_downsample_N, require_idx=True
+                )
+        if len(sampled_target_points) == 0:
+            print("No target points")
+            
+
+        sampled_normal_camera  = normal_camera[idx]
+        sampled_visualized_normal = []
+        sampled_normal_camera[:, 2] = -sampled_normal_camera[:, 2]
+        sampled_normal_camera[:, 1] = -sampled_normal_camera[:, 1]
+        num_samples = 10
+        for i in range(len(sampled_target_points)):
+            sampled_visualized_normal.append([sampled_target_points[i] + 0.02*t * sampled_normal_camera[i] for t in range(num_samples)])
+            
+        sampled_visualized_normal = np.array(sampled_visualized_normal).reshape(-1, 3)
+        np.savetxt(os.path.join(output_dir, "target_pts.txt"), sampled_target_points)
+        np.savetxt(os.path.join(output_dir, "target_normal.txt"), sampled_visualized_normal)
+    
+    @staticmethod
+    def visualize_pts_and_camera(pts, camera):
+        import plotly.graph_objects as go
+        fig = go.Figure()
+        if pts is not None:
+            fig.add_trace(go.Scatter3d(
+                x=pts[:, 0], y=pts[:, 1], z=pts[:, 2],
+                mode='markers', marker=dict(size=1, color='gray', opacity=0.5), name='Input Points'
+            ))
+        colors = ['aggrnyl', 'agsunset', 'algae', 'amp', 'armyrose', 'balance',
+             'blackbody', 'bluered', 'blues', 'blugrn', 'bluyl', 'brbg']
+        
+        color = colors[0]
+        origin_candidate = camera[:3, 3]
+        z_axis_candidate = camera[:3, 2]
+        fig.add_trace(go.Cone(
+            x=[origin_candidate[0]], y=[origin_candidate[1]], z=[origin_candidate[2]],
+            u=[z_axis_candidate[0]], v=[z_axis_candidate[1]], w=[z_axis_candidate[2]],
+            colorscale=color,
+            sizemode="absolute", sizeref=0.1, anchor="tail", showscale=False
+        ))
+        
+        fig.update_layout(
+            title="Clustered Poses and Input Points",
+            scene=dict(
+                xaxis_title='X',
+                yaxis_title='Y',
+                zaxis_title='Z'
+            ),
+            margin=dict(l=0, r=0, b=0, t=40),
+            scene_camera=dict(eye=dict(x=1.25, y=1.25, z=1.25))
+        )
+        
+        fig.show()
+
+
+# ------ Debug ------
+
+if __name__ == "__main__":
+    root = r"/home/yan20/nbv_rec/project/franka_control/output"
+    model_dir = r"H:\\AI\\Datasets\\scaled_object_box_meshes"
+    scene = "box_1"
+    output_dir = f"/home/yan20/nbv_rec/project/franka_control/output/{scene}/visualize"
+    if not os.path.exists(output_dir):
+        os.makedirs(output_dir)
+    visualizeUtil.save_all_cam_pos_and_cam_axis(root, scene, output_dir)
+    visualizeUtil.save_all_combined_pts(root, scene, output_dir)
+    #visualizeUtil.save_target_mesh_at_world_space(root, model_dir, scene)
+    #visualizeUtil.save_points_and_normals(root, scene,"10", output_dir, binocular=True)