upd

upd infernce
2024-11-07 19:33:18 +08:00 · 2024-11-04 23:49:12 +08:00 · 2024-11-04 17:17:54 +08:00 · 2024-11-02 21:54:46 +00:00 · 2024-11-01 22:51:16 +00:00 · 2024-11-01 21:58:44 +00:00
29 changed files with 1368 additions and 477 deletions
--- a/Readme.md
+++ b/Readme.md
@@ -75,7 +75,7 @@ There are two ways to render the dataset:
 If you want to visually monitor the rendering progress and machine resource usage:
-1. In the `view_generate_config.yaml` file, under the `runner-generate` section, run:
+1. In the terminal, run:
   ```
   ptb ui
   ```
--- a/app_generate_strategy.py
+++ b/app_generate_strategy.py
@@ -5,5 +5,5 @@ from runners.strategy_generator import StrategyGenerator
 class DataGenerateApp:
    @staticmethod
    def start():
-        StrategyGenerator("configs/server/server_strategy_generate_config.yaml").run()
+        StrategyGenerator("configs/local/strategy_generate_config.yaml").run()
--- a/app_inference.py
+++ b/app_inference.py
@@ -1,5 +1,6 @@
 from PytorchBoot.application import PytorchBootApplication
 from runners.inferencer import Inferencer
 from runners.inference_server import InferencerServer
@PytorchBootApplication("inference")
 class InferenceApp:
@@ -14,3 +15,17 @@ class InferenceApp:
                Evaluator("path_to_your_eval_config").run()
        '''       
        Inferencer("./configs/local/inference_config.yaml").run()
@PytorchBootApplication("server")
 class InferenceServerApp:
    @staticmethod
    def start():
        ''' 
            call default or your custom runners here, code will be executed 
        automatically when type "pytorch-boot run" or "ptb run" in terminal
            example:
                Trainer("path_to_your_train_config").run()
                Evaluator("path_to_your_eval_config").run()
        '''       
        InferencerServer("./configs/server/server_inference_server_config.yaml").run()
--- a/app_split.py
+++ b/app_split.py
@@ -5,5 +5,5 @@ from runners.data_spliter import DataSpliter
 class DataSplitApp:
    @staticmethod
    def start():
-        DataSpliter("configs/server/split_dataset_config.yaml").run()
+        DataSpliter("configs/server/server_split_dataset_config.yaml").run()
--- a/configs/local/inference_config.yaml
+++ b/configs/local/inference_config.yaml
@@ -1,76 +1,72 @@
 runner:
  general:
-    seed: 1
+    seed: 0
    device: cuda
    cuda_visible_devices: "0,1,2,3,4,5,6,7"
  experiment:
-    name: w_gf_wo_lf_full
+    name: train_ab_global_only
    root_dir: "experiments"
-    epoch: 1 # -1 stands for last epoch
+    epoch: -1 # -1 stands for last epoch
  test:
    dataset_list:
-      - OmniObject3d_train
+      - OmniObject3d_test
  blender_script_path: "/media/hofee/data/project/python/nbv_reconstruction/blender/data_renderer.py"
-  output_dir: "/media/hofee/data/project/python/nbv_reconstruction/nbv_reconstruction/test/inference_global_full_on_testset"
+  output_dir: "/media/hofee/data/data/new_inference_test_output"
-  pipeline: nbv_reconstruction_global_pts_pipeline
+  pipeline: nbv_reconstruction_pipeline
  voxel_size: 0.003
 dataset:
-  OmniObject3d_train:
+  # OmniObject3d_train:
-    root_dir: "/media/hofee/repository/nbv_reconstruction_data_512"
+  #   root_dir: "C:\\Document\\Datasets\\inference_test1"
  #   model_dir: "C:\\Document\\Datasets\\scaled_object_meshes"
  #   source: seq_reconstruction_dataset_preprocessed
  #   split_file: "C:\\Document\\Datasets\\data_list\\sample.txt"
  #   type: test
  #   filter_degree: 75
  #   ratio: 1
  #   batch_size: 1
  #   num_workers: 12
  #   pts_num: 8192
  #   load_from_preprocess: True
  OmniObject3d_test:
    root_dir: "/media/hofee/data/data/new_testset_output"
    model_dir: "/media/hofee/data/data/scaled_object_meshes"
-    source: seq_nbv_reconstruction_dataset
+    source: seq_reconstruction_dataset_preprocessed
-    split_file: "/media/hofee/data/project/python/nbv_reconstruction/nbv_reconstruction/test/test_set_list.txt"
+    # split_file: "C:\\Document\\Datasets\\data_list\\OmniObject3d_test.txt"
    type: test
    filter_degree: 75
-    ratio: 1
+    eval_list:
      - pose_diff
      - coverage_rate_increase
    ratio: 0.1
    batch_size: 1
    num_workers: 12
-    pts_num: 4096
+    pts_num: 8192
-    load_from_preprocess: False
+    load_from_preprocess: True
 pipeline:
-  nbv_reconstruction_local_pts_pipeline:
+  nbv_reconstruction_pipeline:
    modules:
      pts_encoder: pointnet_encoder
      seq_encoder: transformer_seq_encoder
      pose_encoder: pose_encoder
      view_finder: gf_view_finder
    eps: 1e-5
    global_scanned_feat: False
  nbv_reconstruction_global_pts_pipeline:
    modules:
      pts_encoder: pointnet_encoder
      pose_seq_encoder: transformer_pose_seq_encoder
      pose_encoder: pose_encoder
      view_finder: gf_view_finder
    eps: 1e-5
    global_scanned_feat: True
 module:
  pointnet_encoder:
    in_dim: 3
    out_dim: 1024
    global_feat: True
    feature_transform: False
  transformer_seq_encoder:
-    pts_embed_dim: 1024
+    embed_dim: 256
    pose_embed_dim: 256
    num_heads: 4
    ffn_dim: 256
    num_layers: 3
    output_dim: 2048
  transformer_pose_seq_encoder:
    pose_embed_dim: 256
    num_heads: 4
    ffn_dim: 256
    num_layers: 3
@@ -86,7 +82,8 @@ module:
    sample_mode: ode
    sampling_steps: 500
    sde_mode: ve
  pose_encoder:
    pose_dim: 9
    out_dim: 256
  pts_num_encoder:
    out_dim: 64
--- a/configs/local/strategy_generate_config.yaml
+++ b/configs/local/strategy_generate_config.yaml
@@ -12,17 +12,16 @@ runner:
  generate:
    voxel_threshold: 0.003
-    soft_overlap_threshold: 0.3
+    overlap_area_threshold: 30
-    hard_overlap_threshold: 0.6
+    compute_with_normal: False
    scan_points_threshold: 10
    overwrite: False
-    seq_num: 15
+    seq_num: 10
    dataset_list:
      - OmniObject3d
 datasets:
    OmniObject3d:
-      root_dir: /media/hofee/repository/full_data_output 
+      root_dir: /data/hofee/nbv_rec_part2_preprocessed
-      from: 0
+      from: 155
-      to: -1 # -1 means end
+      to: 165 # ..-1 means end
--- a/configs/local/train_config.yaml
+++ b/configs/local/train_config.yaml
@@ -84,7 +84,7 @@ module:
  gf_view_finder:
    t_feat_dim: 128
    pose_feat_dim: 256
-    main_feat_dim: 2048
+    main_feat_dim: 3072
    regression_head: Rx_Ry_and_T
    pose_mode: rot_matrix
    per_point_feature: False
--- a/configs/local/view_generate_config.yaml
+++ b/configs/local/view_generate_config.yaml
@@ -10,14 +10,16 @@ runner:
    port: 5000
    from: 0
    to: -1 # -1 means all
-    object_dir: H:\\AI\\Datasets\\object_meshes_part2
+    object_dir: /media/hofee/data/data/scaled_object_meshes
-    table_model_path: "H:\\AI\\Datasets\\table.obj"
+    table_model_path: "/media/hofee/data/data/others/table.obj"
-    output_dir: C:\\Document\\Datasets\\nbv_rec_part2
+    output_dir: /media/hofee/data/data/new_testset
    object_list_path: /media/hofee/data/data/OmniObject3d_test.txt
    use_list: True
    binocular_vision: true
    plane_size: 10
    max_views: 512
    min_views: 128
-    random_view_ratio: 0.02
+    random_view_ratio: 0.01
    min_cam_table_included_degree: 20
    max_diag: 0.7
    min_diag: 0.01
--- a/configs/server/server_inference_server_config.yaml
+++ b/configs/server/server_inference_server_config.yaml
@@ -0,0 +1,53 @@
 runner:
  general:
    seed: 0
    device: cuda
    cuda_visible_devices: "0,1,2,3,4,5,6,7"
  experiment:
    name: train_ab_global_only
    root_dir: "experiments"
    epoch: -1 # -1 stands for last epoch
  pipeline: nbv_reconstruction_pipeline
  voxel_size: 0.003
 pipeline:
  nbv_reconstruction_pipeline:
    modules:
      pts_encoder: pointnet_encoder
      seq_encoder: transformer_seq_encoder
      pose_encoder: pose_encoder
      view_finder: gf_view_finder
    eps: 1e-5
    global_scanned_feat: True
 module:
  pointnet_encoder:
    in_dim: 3
    out_dim: 1024
    global_feat: True
    feature_transform: False
  transformer_seq_encoder:
    embed_dim: 256
    num_heads: 4
    ffn_dim: 256
    num_layers: 3
    output_dim: 1024
  gf_view_finder:
    t_feat_dim: 128
    pose_feat_dim: 256
    main_feat_dim: 2048
    regression_head: Rx_Ry_and_T
    pose_mode: rot_matrix
    per_point_feature: False
    sample_mode: ode
    sampling_steps: 500
    sde_mode: ve
  pose_encoder:
    pose_dim: 9
    out_dim: 256
  pts_num_encoder:
    out_dim: 64
--- a/configs/server/server_split_dataset_config.yaml
+++ b/configs/server/server_split_dataset_config.yaml
@@ -0,0 +1,22 @@
 runner:
  general:
    seed: 0
    device: cpu
    cuda_visible_devices: "0,1,2,3,4,5,6,7"
  experiment:
    name: debug
    root_dir: "experiments"
  split: # 
    root_dir: "/data/hofee/data/packed_preprocessed_data"
    type: "unseen_instance" # "unseen_category"
    datasets:
      OmniObject3d_train: 
        path: "/data/hofee/data/OmniObject3d_train.txt"
        ratio: 0.9
      OmniObject3d_test: 
        path: "/data/hofee/data/OmniObject3d_test.txt"
        ratio: 0.1
--- a/configs/server/server_train_config.yaml
+++ b/configs/server/server_train_config.yaml
@@ -3,13 +3,13 @@ runner:
  general:
    seed: 0
    device: cuda
-    cuda_visible_devices: "1"
+    cuda_visible_devices: "0"
    parallel: False
  experiment:
-    name: full_w_global_feat_wo_local_pts_feat
+    name: train_ab_global_only
    root_dir: "experiments"
-    use_checkpoint: False
+    use_checkpoint: True
    epoch: -1 # -1 stands for last epoch
    max_epochs: 5000
    save_checkpoint_interval: 1
@@ -28,57 +28,57 @@ runner:
      - OmniObject3d_test
      - OmniObject3d_val
-  pipeline: nbv_reconstruction_global_pts_pipeline
+  pipeline: nbv_reconstruction_pipeline
 dataset:
  OmniObject3d_train:
-    root_dir: "/home/data/hofee/project/nbv_rec/data/nbv_rec_data_512_preproc_npy"
+    root_dir: "/data/hofee/data/new_full_data"
    model_dir: "../data/scaled_object_meshes"
    source: nbv_reconstruction_dataset
-    split_file: "/home/data/hofee/project/nbv_rec/data/OmniObject3d_train.txt"
+    split_file: "/data/hofee/data/new_full_data_list/OmniObject3d_train.txt"
    type: train
    cache: True
    ratio: 1
-    batch_size: 160
+    batch_size: 80
-    num_workers: 16
+    num_workers: 128
-    pts_num: 4096
+    pts_num: 8192
    load_from_preprocess: True
  OmniObject3d_test:
-    root_dir: "/home/data/hofee/project/nbv_rec/data/nbv_rec_data_512_preproc_npy"
+    root_dir: "/data/hofee/data/new_full_data"
    model_dir: "../data/scaled_object_meshes"
    source: nbv_reconstruction_dataset
-    split_file: "/home/data/hofee/project/nbv_rec/data/OmniObject3d_test.txt"
+    split_file: "/data/hofee/data/new_full_data_list/OmniObject3d_test.txt"
    type: test
    cache: True
    filter_degree: 75
    eval_list:
      - pose_diff
-    ratio: 0.05
+    ratio: 1
-    batch_size: 160
+    batch_size: 80
    num_workers: 12
-    pts_num: 4096
+    pts_num: 8192
    load_from_preprocess: True
  OmniObject3d_val:
-    root_dir: "/home/data/hofee/project/nbv_rec/data/nbv_rec_data_512_preproc_npy"
+    root_dir: "/data/hofee/data/new_full_data"
    model_dir: "../data/scaled_object_meshes"
    source: nbv_reconstruction_dataset
-    split_file: "/home/data/hofee/project/nbv_rec/data/OmniObject3d_train.txt"
+    split_file: "/data/hofee/data/new_full_data_list/OmniObject3d_train.txt"
    type: test
    cache: True
    filter_degree: 75
    eval_list:
      - pose_diff
-    ratio: 0.005
+    ratio: 0.1
-    batch_size: 160
+    batch_size: 80
    num_workers: 12
-    pts_num: 4096
+    pts_num: 8192
    load_from_preprocess: True
 pipeline:
-  nbv_reconstruction_local_pts_pipeline:
+  nbv_reconstruction_pipeline:
    modules:
      pts_encoder: pointnet_encoder
      seq_encoder: transformer_seq_encoder
@@ -87,16 +87,6 @@ pipeline:
    eps: 1e-5
    global_scanned_feat: True
  nbv_reconstruction_global_pts_pipeline:
    modules:
      pts_encoder: pointnet_encoder
      pose_seq_encoder: transformer_seq_encoder
      pose_encoder: pose_encoder
      view_finder: gf_view_finder
    eps: 1e-5
    global_scanned_feat: True
 module:
@@ -107,11 +97,11 @@ module:
    feature_transform: False
  transformer_seq_encoder:
-    embed_dim: 1344
+    embed_dim: 256
    num_heads: 4
    ffn_dim: 256
    num_layers: 3
-    output_dim: 2048
+    output_dim: 1024
  gf_view_finder:
    t_feat_dim: 128
@@ -128,6 +118,9 @@ module:
    pose_dim: 9
    out_dim: 256
  pts_num_encoder:
    out_dim: 64
 loss_function:
  gf_loss:
--- a/core/nbv_dataset.py
+++ b/core/nbv_dataset.py
@@ -8,7 +8,7 @@ import torch
 import os
 import sys
-sys.path.append(r"/home/data/hofee/project/nbv_rec/nbv_reconstruction")
+sys.path.append(r"/data/hofee/project/nbv_rec/nbv_reconstruction")
 from utils.data_load import DataLoadUtil
 from utils.pose import PoseUtil
@@ -31,7 +31,7 @@ class NBVReconstructionDataset(BaseDataset):
        self.load_from_preprocess = config.get("load_from_preprocess", False)
        if self.type == namespace.Mode.TEST:
-            self.model_dir = config["model_dir"]
+            #self.model_dir = config["model_dir"]
            self.filter_degree = config["filter_degree"]
        if self.type == namespace.Mode.TRAIN:
            scale_ratio = 1
@@ -66,7 +66,9 @@ class NBVReconstructionDataset(BaseDataset):
                if max_coverage_rate > scene_max_coverage_rate:
                    scene_max_coverage_rate = max_coverage_rate
                max_coverage_rate_list.append(max_coverage_rate)
-            mean_coverage_rate = np.mean(max_coverage_rate_list)
+                
            if max_coverage_rate_list:
                mean_coverage_rate = np.mean(max_coverage_rate_list)
            for seq_idx in range(seq_num):
                label_path = DataLoadUtil.get_label_path(
@@ -112,6 +114,10 @@ class NBVReconstructionDataset(BaseDataset):
        except Exception as e:
            Log.error(f"Save cache failed: {e}")
    def voxel_downsample_with_mask(self, pts, voxel_size):
        pass
    def __getitem__(self, index):
        data_item_info = self.datalist[index]
        scanned_views = data_item_info["scanned_views"]
@@ -122,7 +128,7 @@ class NBVReconstructionDataset(BaseDataset):
            scanned_views_pts,
            scanned_coverages_rate,
            scanned_n_to_world_pose,
-        ) = ([], [], [], [])
+        ) = ([], [], [])
        for view in scanned_views:
            frame_idx = view[0]
            coverage_rate = view[1]
@@ -160,27 +166,12 @@ class NBVReconstructionDataset(BaseDataset):
        )
        combined_scanned_views_pts = np.concatenate(scanned_views_pts, axis=0)
-        fps_downsampled_combined_scanned_pts, fps_idx = PtsUtil.fps_downsample_point_cloud(
+        voxel_downsampled_combined_scanned_pts_np = PtsUtil.voxel_downsample_point_cloud(combined_scanned_views_pts, 0.002)
-            combined_scanned_views_pts, self.pts_num, require_idx=True
+        random_downsampled_combined_scanned_pts_np = PtsUtil.random_downsample_point_cloud(voxel_downsampled_combined_scanned_pts_np, self.pts_num)
        )
        combined_scanned_views_pts_mask = np.zeros(len(scanned_views_pts), dtype=np.uint8)
        start_idx = 0
        for i in range(len(scanned_views_pts)):
            end_idx = start_idx + len(scanned_views_pts[i])
            combined_scanned_views_pts_mask[start_idx:end_idx] = i
            start_idx = end_idx
        fps_downsampled_combined_scanned_pts_mask = combined_scanned_views_pts_mask[fps_idx]
        data_item = {
            "scanned_pts": np.asarray(scanned_views_pts, dtype=np.float32), # Ndarray(S x Nv x 3)
-            "scanned_pts_mask": np.asarray(fps_downsampled_combined_scanned_pts_mask,dtype=np.uint8), # Ndarray(N), range(0, S)
+            "combined_scanned_pts": np.asarray(random_downsampled_combined_scanned_pts_np, dtype=np.float32), # Ndarray(N x 3)
            "combined_scanned_pts": np.asarray(fps_downsampled_combined_scanned_pts, dtype=np.float32), # Ndarray(N x 3)
            "scanned_coverage_rate": scanned_coverages_rate, # List(S): Float, range(0, 1)
            "scanned_n_to_world_pose_9d": np.asarray(scanned_n_to_world_pose, dtype=np.float32), # Ndarray(S x 9)
            "best_coverage_rate": nbv_coverage_rate, # Float, range(0, 1)
@@ -215,14 +206,9 @@ class NBVReconstructionDataset(BaseDataset):
            collate_data["combined_scanned_pts"] = torch.stack(
                [torch.tensor(item["combined_scanned_pts"]) for item in batch]
            )
            collate_data["scanned_pts_mask"] = torch.stack(
                [torch.tensor(item["scanned_pts_mask"]) for item in batch]
            )
            for key in batch[0].keys():
                if key not in [
                    "scanned_pts",
                    "scanned_pts_mask",
                    "scanned_n_to_world_pose_9d",
                    "best_to_world_pose_9d",
                    "combined_scanned_pts",
@@ -241,10 +227,9 @@ if __name__ == "__main__":
    torch.manual_seed(seed)
    np.random.seed(seed)
    config = {
-        "root_dir": "/home/data/hofee/project/nbv_rec/data/nbv_rec_data_512_preproc_npy",
+        "root_dir": "/data/hofee/data/packed_preprocessed_data",
        "model_dir": "/home/data/hofee/project/nbv_rec/data/scaled_object_meshes",
        "source": "nbv_reconstruction_dataset",
-        "split_file": "/home/data/hofee/project/nbv_rec/data/OmniObject3d_test.txt",
+        "split_file": "/data/hofee/data/OmniObject3d_train.txt",
        "load_from_preprocess": True,
        "ratio": 0.5,
        "batch_size": 2,
--- a/core/old_seq_dataset.py
+++ b/core/old_seq_dataset.py
@@ -0,0 +1,154 @@
 import numpy as np
 from PytorchBoot.dataset import BaseDataset
 import PytorchBoot.namespace as namespace
 import PytorchBoot.stereotype as stereotype
 from PytorchBoot.utils.log_util import Log
 import torch
 import os
 import sys
 sys.path.append(r"/home/data/hofee/project/nbv_rec/nbv_reconstruction")
 from utils.data_load import DataLoadUtil
 from utils.pose import PoseUtil
 from utils.pts import PtsUtil
@stereotype.dataset("old_seq_nbv_reconstruction_dataset")
 class SeqNBVReconstructionDataset(BaseDataset):
    def __init__(self, config):
        super(SeqNBVReconstructionDataset, self).__init__(config)
        self.type = config["type"]
        if self.type != namespace.Mode.TEST:
            Log.error("Dataset <seq_nbv_reconstruction_dataset> Only support test mode", terminate=True)
        self.config = config
        self.root_dir = config["root_dir"]
        self.split_file_path = config["split_file"]
        self.scene_name_list = self.load_scene_name_list()
        self.datalist = self.get_datalist()
        self.pts_num = config["pts_num"]
        self.model_dir = config["model_dir"]
        self.filter_degree = config["filter_degree"]
        self.load_from_preprocess = config.get("load_from_preprocess", False)
    def load_scene_name_list(self):
        scene_name_list = []
        with open(self.split_file_path, "r") as f:
            for line in f:
                scene_name = line.strip()
                scene_name_list.append(scene_name)
        return scene_name_list
    def get_datalist(self):
        datalist = []
        for scene_name in self.scene_name_list:
            seq_num = DataLoadUtil.get_label_num(self.root_dir, scene_name)
            scene_max_coverage_rate = 0
            scene_max_cr_idx = 0
            for seq_idx in range(seq_num):
                label_path = DataLoadUtil.get_label_path(self.root_dir, scene_name, seq_idx)
                label_data = DataLoadUtil.load_label(label_path)
                max_coverage_rate = label_data["max_coverage_rate"]
                if max_coverage_rate > scene_max_coverage_rate:
                    scene_max_coverage_rate = max_coverage_rate
                    scene_max_cr_idx = seq_idx
            label_path = DataLoadUtil.get_label_path(self.root_dir, scene_name, scene_max_cr_idx)
            label_data = DataLoadUtil.load_label(label_path)
            first_frame = label_data["best_sequence"][0]
            best_seq_len = len(label_data["best_sequence"])
            datalist.append({
                    "scene_name": scene_name,
                    "first_frame": first_frame,
                    "max_coverage_rate": scene_max_coverage_rate,
                    "best_seq_len": best_seq_len,
                    "label_idx": scene_max_cr_idx,
                })
        return datalist
    def __getitem__(self, index):
        data_item_info = self.datalist[index]
        first_frame_idx = data_item_info["first_frame"][0]
        first_frame_coverage = data_item_info["first_frame"][1]
        max_coverage_rate = data_item_info["max_coverage_rate"]
        scene_name = data_item_info["scene_name"]
        first_cam_info = DataLoadUtil.load_cam_info(DataLoadUtil.get_path(self.root_dir, scene_name, first_frame_idx), binocular=True)
        first_view_path = DataLoadUtil.get_path(self.root_dir, scene_name, first_frame_idx)
        first_left_cam_pose = first_cam_info["cam_to_world"]
        first_center_cam_pose = first_cam_info["cam_to_world_O"]
        first_target_point_cloud = DataLoadUtil.load_from_preprocessed_pts(first_view_path)
        first_pts_num = first_target_point_cloud.shape[0]
        first_downsampled_target_point_cloud = PtsUtil.random_downsample_point_cloud(first_target_point_cloud, self.pts_num)
        first_to_world_rot_6d = PoseUtil.matrix_to_rotation_6d_numpy(np.asarray(first_left_cam_pose[:3,:3]))
        first_to_world_trans = first_left_cam_pose[:3,3]
        first_to_world_9d = np.concatenate([first_to_world_rot_6d, first_to_world_trans], axis=0)
        diag = DataLoadUtil.get_bbox_diag(self.model_dir, scene_name)
        voxel_threshold = diag*0.02
        first_O_to_first_L_pose = np.dot(np.linalg.inv(first_left_cam_pose), first_center_cam_pose)
        scene_path = os.path.join(self.root_dir, scene_name)
        model_points_normals = DataLoadUtil.load_points_normals(self.root_dir, scene_name)
        data_item = {
            "first_pts_num": np.asarray(
                first_pts_num, dtype=np.int32
            ),
            "first_pts": np.asarray([first_downsampled_target_point_cloud],dtype=np.float32),
            "combined_scanned_pts": np.asarray(first_downsampled_target_point_cloud,dtype=np.float32),
            "first_to_world_9d": np.asarray([first_to_world_9d],dtype=np.float32),
            "scene_name": scene_name,
            "max_coverage_rate": max_coverage_rate,
            "voxel_threshold": voxel_threshold,
            "filter_degree": self.filter_degree,
            "O_to_L_pose": first_O_to_first_L_pose,
            "first_frame_coverage": first_frame_coverage,
            "scene_path": scene_path,
            "model_points_normals": model_points_normals,
            "best_seq_len": data_item_info["best_seq_len"],
            "first_frame_id": first_frame_idx,
        }
        return data_item
    def __len__(self):
        return len(self.datalist)
    def get_collate_fn(self):
        def collate_fn(batch):
            collate_data = {}
            collate_data["first_pts"] = [torch.tensor(item['first_pts']) for item in batch]
            collate_data["first_to_world_9d"] = [torch.tensor(item['first_to_world_9d']) for item in batch]
            collate_data["combined_scanned_pts"] = torch.stack([torch.tensor(item['combined_scanned_pts']) for item in batch])
            for key in batch[0].keys():
                if key not in ["first_pts", "first_to_world_9d", "combined_scanned_pts"]:
                    collate_data[key] = [item[key] for item in batch]
            return collate_data
        return collate_fn
 # -------------- Debug ---------------- #
 if __name__ == "__main__":
    import torch
    seed = 0
    torch.manual_seed(seed)
    np.random.seed(seed)
    config = {
        "root_dir": "/home/data/hofee/project/nbv_rec/data/nbv_rec_data_512_preproc_npy",
        "split_file": "/home/data/hofee/project/nbv_rec/data/OmniObject3d_train.txt",
        "model_dir": "/home/data/hofee/project/nbv_rec/data/scaled_object_meshes",
        "ratio": 0.005,
        "batch_size": 2,
        "filter_degree": 75,
        "num_workers": 0,
        "pts_num": 32684,
        "type": namespace.Mode.TEST,
        "load_from_preprocess": True
    }
    ds = SeqNBVReconstructionDataset(config)
    print(len(ds))
    #ds.__getitem__(10)
    dl = ds.get_loader(shuffle=True)
    for idx, data in enumerate(dl):
        data = ds.process_batch(data, "cuda:0")
        print(data)
        # ------  Debug Start ------
        import ipdb;ipdb.set_trace()
        # ------  Debug End ------+
--- a/core/global_pts_n_num_pipeline.py
+++ b/core/global_pts_n_num_pipeline.py
@@ -1,4 +1,5 @@
 import torch
 import time
 from torch import nn
 import PytorchBoot.namespace as namespace
 import PytorchBoot.stereotype as stereotype
@@ -6,10 +7,10 @@ from PytorchBoot.factory.component_factory import ComponentFactory
 from PytorchBoot.utils import Log
-@stereotype.pipeline("nbv_reconstruction_global_pts_n_num_pipeline")
+@stereotype.pipeline("nbv_reconstruction_pipeline")
-class NBVReconstructionGlobalPointsPipeline(nn.Module):
+class NBVReconstructionPipeline(nn.Module):
    def __init__(self, config):
-        super(NBVReconstructionGlobalPointsPipeline, self).__init__()
+        super(NBVReconstructionPipeline, self).__init__()
        self.config = config
        self.module_config = config["modules"]
@@ -19,12 +20,8 @@ class NBVReconstructionGlobalPointsPipeline(nn.Module):
        self.pose_encoder = ComponentFactory.create(
            namespace.Stereotype.MODULE, self.module_config["pose_encoder"]
        )
-        self.pts_num_encoder = ComponentFactory.create(
+        self.seq_encoder = ComponentFactory.create(
-            namespace.Stereotype.MODULE, self.module_config["pts_num_encoder"]
+            namespace.Stereotype.MODULE, self.module_config["seq_encoder"]
        )
        self.transformer_seq_encoder = ComponentFactory.create(
            namespace.Stereotype.MODULE, self.module_config["transformer_seq_encoder"]
        )
        self.view_finder = ComponentFactory.create(
            namespace.Stereotype.MODULE, self.module_config["view_finder"]
@@ -32,7 +29,6 @@ class NBVReconstructionGlobalPointsPipeline(nn.Module):
        self.eps = float(self.config["eps"])
        self.enable_global_scanned_feat = self.config["global_scanned_feat"]
    def forward(self, data):
        mode = data["mode"]
@@ -92,47 +88,23 @@ class NBVReconstructionGlobalPointsPipeline(nn.Module):
        scanned_n_to_world_pose_9d_batch = data[
            "scanned_n_to_world_pose_9d"
        ]  # List(B): Tensor(S x 9)
        scanned_pts_mask_batch = data[
            "scanned_pts_mask"
        ]  # Tensor(B x N)
        device = next(self.parameters()).device
        embedding_list_batch = []
        combined_scanned_pts_batch = data["combined_scanned_pts"]  # Tensor(B x N x 3)
-        global_scanned_feat, perpoint_scanned_feat_batch = self.pts_encoder.encode_points(
+        global_scanned_feat = self.pts_encoder.encode_points(
-            combined_scanned_pts_batch, require_per_point_feat=True
+            combined_scanned_pts_batch, require_per_point_feat=False
-        )  # global_scanned_feat: Tensor(B x Dg), perpoint_scanned_feat: Tensor(B x N x Dl)
+        )  # global_scanned_feat: Tensor(B x Dg)
-        for scanned_n_to_world_pose_9d, scanned_mask, perpoint_scanned_feat in zip(
+        for scanned_n_to_world_pose_9d in scanned_n_to_world_pose_9d_batch:
            scanned_n_to_world_pose_9d_batch,
            scanned_pts_mask_batch,
            perpoint_scanned_feat_batch,
        ):
            scanned_target_pts_num = [] # List(S): Int
            partial_feat_seq = []
            seq_len = len(scanned_n_to_world_pose_9d)
            for seq_idx in range(seq_len): 
                partial_idx_in_combined_pts = scanned_mask == seq_idx # Ndarray(V), N->V idx mask
                partial_perpoint_feat = perpoint_scanned_feat[partial_idx_in_combined_pts] # Ndarray(V x Dl)
                partial_feat = torch.mean(partial_perpoint_feat, dim=0)[0] # Tensor(Dl)
                partial_feat_seq.append(partial_feat)
                scanned_target_pts_num.append(partial_perpoint_feat.shape[0])
            scanned_target_pts_num = torch.tensor(scanned_target_pts_num, dtype=torch.int32).to(device) # Tensor(S)
            scanned_n_to_world_pose_9d = scanned_n_to_world_pose_9d.to(device)  # Tensor(S x 9)
-            
+            pose_feat_seq = self.pose_encoder.encode_pose(scanned_n_to_world_pose_9d)  # Tensor(S x Dp) 
-            pose_feat_seq = self.pose_encoder.encode_pose(scanned_n_to_world_pose_9d)  # Tensor(S x Dp)
+            seq_embedding = pose_feat_seq
-            pts_num_feat_seq = self.pts_num_encoder.encode_pts_num(scanned_target_pts_num)  # Tensor(S x Dn)
+            embedding_list_batch.append(seq_embedding) # List(B): Tensor(S x (Dp))
-            partial_feat_seq = torch.stack(partial_feat_seq) # Tensor(S x Dl)
+        
-            
+        seq_feat = self.seq_encoder.encode_sequence(embedding_list_batch) # Tensor(B x Ds)
            seq_embedding = torch.cat([pose_feat_seq, pts_num_feat_seq, partial_feat_seq], dim=-1) # Tensor(S x (Dp+Dn+Dl))
            embedding_list_batch.append(seq_embedding) # List(B): Tensor(S x (Dp+Dn+Dl))
        seq_feat = self.transformer_seq_encoder.encode_sequence(embedding_list_batch) # Tensor(B x Ds)
        main_feat = torch.cat([seq_feat, global_scanned_feat], dim=-1) # Tensor(B x (Ds+Dg))
        if torch.isnan(main_feat).any():
--- a/core/seq_dataset.py
+++ b/core/seq_dataset.py
@@ -1,154 +1,204 @@
-import numpy as np
+import numpy as np
-from PytorchBoot.dataset import BaseDataset
+from PytorchBoot.dataset import BaseDataset
-import PytorchBoot.namespace as namespace
+import PytorchBoot.namespace as namespace
-import PytorchBoot.stereotype as stereotype
+import PytorchBoot.stereotype as stereotype
-from PytorchBoot.utils.log_util import Log
+from PytorchBoot.config import ConfigManager
-import torch
+from PytorchBoot.utils.log_util import Log
-import os
+import torch
-import sys
+import os
-sys.path.append(r"/home/data/hofee/project/nbv_rec/nbv_reconstruction")
+import sys
-
+
-from utils.data_load import DataLoadUtil
+sys.path.append(r"/media/hofee/data/project/python/nbv_reconstruction/nbv_reconstruction")
-from utils.pose import PoseUtil
+
-from utils.pts import PtsUtil
+from utils.data_load import DataLoadUtil
-
+from utils.pose import PoseUtil
-@stereotype.dataset("seq_nbv_reconstruction_dataset")
+from utils.pts import PtsUtil
-class SeqNBVReconstructionDataset(BaseDataset):
+
-    def __init__(self, config):
+
-        super(SeqNBVReconstructionDataset, self).__init__(config)
+@stereotype.dataset("seq_reconstruction_dataset")
-        self.type = config["type"]
+class SeqReconstructionDataset(BaseDataset):
-        if self.type != namespace.Mode.TEST:
+    def __init__(self, config):
-            Log.error("Dataset <seq_nbv_reconstruction_dataset> Only support test mode", terminate=True)
+        super(SeqReconstructionDataset, self).__init__(config)
-        self.config = config
+        self.config = config
-        self.root_dir = config["root_dir"]
+        self.root_dir = config["root_dir"]
-        self.split_file_path = config["split_file"]
+        self.split_file_path = config["split_file"]
-        self.scene_name_list = self.load_scene_name_list()
+        self.scene_name_list = self.load_scene_name_list()
-        self.datalist = self.get_datalist()
+        self.datalist = self.get_datalist()
-        self.pts_num = config["pts_num"]
+
-        
+        self.pts_num = config["pts_num"]
-        self.model_dir = config["model_dir"]
+        self.type = config["type"]
-        self.filter_degree = config["filter_degree"]
+        self.cache = config.get("cache")
-        self.load_from_preprocess = config.get("load_from_preprocess", False)
+        self.load_from_preprocess = config.get("load_from_preprocess", False)
-        
+
-
+        if self.type == namespace.Mode.TEST:
-    def load_scene_name_list(self):
+            #self.model_dir = config["model_dir"]
-        scene_name_list = []
+            self.filter_degree = config["filter_degree"]
-        with open(self.split_file_path, "r") as f:
+        if self.type == namespace.Mode.TRAIN:
-            for line in f:
+            scale_ratio = 1
-                scene_name = line.strip()
+            self.datalist = self.datalist*scale_ratio
-                scene_name_list.append(scene_name)
+        if self.cache:
-        return scene_name_list
+            expr_root = ConfigManager.get("runner", "experiment", "root_dir")
-    
+            expr_name = ConfigManager.get("runner", "experiment", "name")
-    def get_datalist(self):
+            self.cache_dir = os.path.join(expr_root, expr_name, "cache")
-        datalist = []
+            # self.preprocess_cache()
-        for scene_name in self.scene_name_list:
+
-            seq_num = DataLoadUtil.get_label_num(self.root_dir, scene_name)
+    def load_scene_name_list(self):
-            scene_max_coverage_rate = 0
+        scene_name_list = []
-            scene_max_cr_idx = 0
+        with open(self.split_file_path, "r") as f:
-
+            for line in f:
-            for seq_idx in range(seq_num):
+                scene_name = line.strip()
-                label_path = DataLoadUtil.get_label_path(self.root_dir, scene_name, seq_idx)
+                if os.path.exists(os.path.join(self.root_dir, scene_name)):
-                label_data = DataLoadUtil.load_label(label_path)
+                    scene_name_list.append(scene_name)
-                max_coverage_rate = label_data["max_coverage_rate"]
+        return scene_name_list
-                if max_coverage_rate > scene_max_coverage_rate:
+    
-                    scene_max_coverage_rate = max_coverage_rate
+    def get_scene_name_list(self):
-                    scene_max_cr_idx = seq_idx
+        return self.scene_name_list
-        
+
-            label_path = DataLoadUtil.get_label_path(self.root_dir, scene_name, scene_max_cr_idx)
+    def get_datalist(self):
-            label_data = DataLoadUtil.load_label(label_path)
+        datalist = []
-            first_frame = label_data["best_sequence"][0]
+        total = len(self.scene_name_list)
-            best_seq_len = len(label_data["best_sequence"])
+        for idx, scene_name in enumerate(self.scene_name_list):
-            datalist.append({
+            print(f"processing {scene_name} ({idx}/{total})")
-                    "scene_name": scene_name,
+            scene_max_cr_idx = 0
-                    "first_frame": first_frame,
+            frame_len = DataLoadUtil.get_scene_seq_length(self.root_dir, scene_name)
-                    "max_coverage_rate": scene_max_coverage_rate,
+
-                    "best_seq_len": best_seq_len,
+            for i in range(frame_len):
-                    "label_idx": scene_max_cr_idx,
+                path = DataLoadUtil.get_path(self.root_dir, scene_name, i)
-                })
+                pts = DataLoadUtil.load_from_preprocessed_pts(path, "npy")
-        return datalist
+                if pts.shape[0] == 0:
-
+                    continue
-    def __getitem__(self, index):
+            datalist.append({
-        data_item_info = self.datalist[index]
+                "scene_name": scene_name,
-        first_frame_idx = data_item_info["first_frame"][0]
+                "first_frame": i,
-        first_frame_coverage = data_item_info["first_frame"][1]
+                "best_seq_len": -1,
-        max_coverage_rate = data_item_info["max_coverage_rate"]
+                "max_coverage_rate": 1.0,
-        scene_name = data_item_info["scene_name"]
+                "label_idx": scene_max_cr_idx,
-        first_cam_info = DataLoadUtil.load_cam_info(DataLoadUtil.get_path(self.root_dir, scene_name, first_frame_idx), binocular=True)
+            })  
-        first_view_path = DataLoadUtil.get_path(self.root_dir, scene_name, first_frame_idx)
+        return datalist
-        first_left_cam_pose = first_cam_info["cam_to_world"]
+
-        first_center_cam_pose = first_cam_info["cam_to_world_O"]
+    def preprocess_cache(self):
-        first_target_point_cloud = DataLoadUtil.load_from_preprocessed_pts(first_view_path)
+        Log.info("preprocessing cache...")
-        first_pts_num = first_target_point_cloud.shape[0]
+        for item_idx in range(len(self.datalist)):
-        first_downsampled_target_point_cloud = PtsUtil.random_downsample_point_cloud(first_target_point_cloud, self.pts_num)
+            self.__getitem__(item_idx)
-        first_to_world_rot_6d = PoseUtil.matrix_to_rotation_6d_numpy(np.asarray(first_left_cam_pose[:3,:3]))
+        Log.success("finish preprocessing cache.")
-        first_to_world_trans = first_left_cam_pose[:3,3]
+
-        first_to_world_9d = np.concatenate([first_to_world_rot_6d, first_to_world_trans], axis=0)
+    def load_from_cache(self, scene_name, curr_frame_idx):
-        diag = DataLoadUtil.get_bbox_diag(self.model_dir, scene_name)
+        cache_name = f"{scene_name}_{curr_frame_idx}.txt"
-        voxel_threshold = diag*0.02
+        cache_path = os.path.join(self.cache_dir, cache_name)
-        first_O_to_first_L_pose = np.dot(np.linalg.inv(first_left_cam_pose), first_center_cam_pose)
+        if os.path.exists(cache_path):
-        scene_path = os.path.join(self.root_dir, scene_name)
+            data = np.loadtxt(cache_path)
-        model_points_normals = DataLoadUtil.load_points_normals(self.root_dir, scene_name)
+            return data
-
+        else:
-        data_item = {
+            return None
-            "first_pts_num": np.asarray(
+
-                first_pts_num, dtype=np.int32
+    def save_to_cache(self, scene_name, curr_frame_idx, data):
-            ),
+        cache_name = f"{scene_name}_{curr_frame_idx}.txt"
-            "first_pts": np.asarray([first_downsampled_target_point_cloud],dtype=np.float32),
+        cache_path = os.path.join(self.cache_dir, cache_name)
-            "combined_scanned_pts": np.asarray(first_downsampled_target_point_cloud,dtype=np.float32),
+        try:
-            "first_to_world_9d": np.asarray([first_to_world_9d],dtype=np.float32),
+            np.savetxt(cache_path, data)
-            "scene_name": scene_name,
+        except Exception as e:
-            "max_coverage_rate": max_coverage_rate,
+            Log.error(f"Save cache failed: {e}")
-            "voxel_threshold": voxel_threshold,
+
-            "filter_degree": self.filter_degree,
+    def seq_combined_pts(self, scene, frame_idx_list):
-            "O_to_L_pose": first_O_to_first_L_pose,
+        all_combined_pts = []   
-            "first_frame_coverage": first_frame_coverage,
+        for i in frame_idx_list:
-            "scene_path": scene_path,
+            path = DataLoadUtil.get_path(self.root_dir, scene, i)
-            "model_points_normals": model_points_normals,
+            pts = DataLoadUtil.load_from_preprocessed_pts(path,"npy")
-            "best_seq_len": data_item_info["best_seq_len"],
+            if pts.shape[0] == 0:
-            "first_frame_id": first_frame_idx,
+                continue
-        }
+            all_combined_pts.append(pts)
-        return data_item
+        all_combined_pts = np.vstack(all_combined_pts)
-
+        downsampled_all_pts = PtsUtil.voxel_downsample_point_cloud(all_combined_pts, 0.003)
-    def __len__(self):
+        return downsampled_all_pts
-        return len(self.datalist)
+
-    
+    def __getitem__(self, index):
-    def get_collate_fn(self):
+        data_item_info = self.datalist[index]
-        def collate_fn(batch):
+        max_coverage_rate = data_item_info["max_coverage_rate"]
-            collate_data = {}
+        best_seq_len = data_item_info["best_seq_len"]
-            collate_data["first_pts"] = [torch.tensor(item['first_pts']) for item in batch]
+        scene_name = data_item_info["scene_name"]
-            collate_data["first_to_world_9d"] = [torch.tensor(item['first_to_world_9d']) for item in batch]
+        (
-            collate_data["combined_scanned_pts"] = torch.stack([torch.tensor(item['combined_scanned_pts']) for item in batch])
+            scanned_views_pts,
-            for key in batch[0].keys():
+            scanned_coverages_rate,
-                if key not in ["first_pts", "first_to_world_9d", "combined_scanned_pts"]:
+            scanned_n_to_world_pose,
-                    collate_data[key] = [item[key] for item in batch]
+        ) = ([], [], [])
-            return collate_data
+        view = data_item_info["first_frame"]
-        return collate_fn
+        frame_idx = view
-
+        view_path = DataLoadUtil.get_path(self.root_dir, scene_name, frame_idx)
-# -------------- Debug ---------------- #
+        cam_info = DataLoadUtil.load_cam_info(view_path, binocular=True)
-if __name__ == "__main__":
+        
-    import torch
+        n_to_world_pose = cam_info["cam_to_world"]  
-    seed = 0
+        target_point_cloud = (
-    torch.manual_seed(seed)
+            DataLoadUtil.load_from_preprocessed_pts(view_path)
-    np.random.seed(seed)
+        )
-    config = {
+        downsampled_target_point_cloud = PtsUtil.random_downsample_point_cloud(
-        "root_dir": "/home/data/hofee/project/nbv_rec/data/nbv_rec_data_512_preproc_npy",
+            target_point_cloud, self.pts_num
-        "split_file": "/home/data/hofee/project/nbv_rec/data/OmniObject3d_train.txt",
+        )
-        "model_dir": "/home/data/hofee/project/nbv_rec/data/scaled_object_meshes",
+        scanned_views_pts.append(downsampled_target_point_cloud)
-        "ratio": 0.005,
+        
-        "batch_size": 2,
+        n_to_world_6d = PoseUtil.matrix_to_rotation_6d_numpy(
-        "filter_degree": 75,
+            np.asarray(n_to_world_pose[:3, :3])
-        "num_workers": 0,
+        ) 
-        "pts_num": 32684,
+        first_left_cam_pose = cam_info["cam_to_world"]
-        "type": namespace.Mode.TEST,
+        first_center_cam_pose = cam_info["cam_to_world_O"]
-        "load_from_preprocess": True
+        first_O_to_first_L_pose = np.dot(np.linalg.inv(first_left_cam_pose), first_center_cam_pose)
-    }
+        n_to_world_trans = n_to_world_pose[:3, 3]
-    ds = SeqNBVReconstructionDataset(config)
+        n_to_world_9d = np.concatenate([n_to_world_6d, n_to_world_trans], axis=0)
-    print(len(ds))
+        scanned_n_to_world_pose.append(n_to_world_9d)
-    #ds.__getitem__(10)
+
-    dl = ds.get_loader(shuffle=True)
+        frame_list = []
-    for idx, data in enumerate(dl):
+        for i in range(DataLoadUtil.get_scene_seq_length(self.root_dir, scene_name)):
-        data = ds.process_batch(data, "cuda:0")
+            frame_list.append(i)
-        print(data)
+        gt_pts = self.seq_combined_pts(scene_name, frame_list)
-        # ------  Debug Start ------
+        data_item = {
-        import ipdb;ipdb.set_trace()
+            "first_scanned_pts": np.asarray(scanned_views_pts, dtype=np.float32), # Ndarray(S x Nv x 3)
-        # ------  Debug End ------+
+            "first_scanned_n_to_world_pose_9d": np.asarray(scanned_n_to_world_pose, dtype=np.float32), # Ndarray(S x 9)
            "seq_max_coverage_rate": max_coverage_rate, # Float, range(0, 1)
            "best_seq_len": best_seq_len, # Int
            "scene_name": scene_name, # String
            "gt_pts": gt_pts, # Ndarray(N x 3)
            "scene_path": os.path.join(self.root_dir, scene_name), # String
            "O_to_L_pose": first_O_to_first_L_pose,
        }
        return data_item
    def __len__(self):
        return len(self.datalist)
 # -------------- Debug ---------------- #
 if __name__ == "__main__":
    #import ipdb; ipdb.set_trace()
    import torch
    from tqdm import tqdm
    import pickle
    import os
    seed = 0
    torch.manual_seed(seed)
    np.random.seed(seed)
    config = {
        "root_dir": "/media/hofee/data/data/new_testset",
        "source": "seq_reconstruction_dataset",
        "split_file": "/media/hofee/data/data/OmniObject3d_test.txt",
        "load_from_preprocess": True,
        "filter_degree": 75,
        "num_workers": 0,
        "pts_num": 8192,
        "type": namespace.Mode.TEST,
    }
    output_dir = "/media/hofee/data/data/new_testset_output"
    os.makedirs(output_dir, exist_ok=True)
    ds = SeqReconstructionDataset(config)
    for i in tqdm(range(len(ds)), desc="processing dataset"):
        output_path = os.path.join(output_dir, f"item_{i}.pkl")
        item = ds.__getitem__(i)
        for key, value in item.items():
            if isinstance(value, np.ndarray):
                item[key] = value.tolist()
        import ipdb; ipdb.set_trace()
        with open(output_path, "wb") as f:
            pickle.dump(item, f)
--- a/core/seq_dataset_preprocessed.py
+++ b/core/seq_dataset_preprocessed.py
@@ -0,0 +1,84 @@
 import numpy as np
 from PytorchBoot.dataset import BaseDataset
 import PytorchBoot.namespace as namespace
 import PytorchBoot.stereotype as stereotype
 from PytorchBoot.config import ConfigManager
 from PytorchBoot.utils.log_util import Log
 import pickle
 import torch
 import os
 import sys
 sys.path.append(r"C:\Document\Local Project\nbv_rec\nbv_reconstruction")
 from utils.data_load import DataLoadUtil
 from utils.pose import PoseUtil
 from utils.pts import PtsUtil
@stereotype.dataset("seq_reconstruction_dataset_preprocessed")
 class SeqReconstructionDatasetPreprocessed(BaseDataset):
    def __init__(self, config):
        super(SeqReconstructionDatasetPreprocessed, self).__init__(config)
        self.config = config
        self.root_dir = config["root_dir"]
        self.real_root_dir = r"/media/hofee/data/data/new_testset"
        self.item_list = os.listdir(self.root_dir)
    def __getitem__(self, index):
        data = pickle.load(open(os.path.join(self.root_dir, self.item_list[index]), "rb"))
        data_item = {
            "first_scanned_pts": np.asarray(data["first_scanned_pts"], dtype=np.float32), # Ndarray(S x Nv x 3)
            "first_scanned_n_to_world_pose_9d": np.asarray(data["first_scanned_n_to_world_pose_9d"], dtype=np.float32), # Ndarray(S x 9)
            "seq_max_coverage_rate": data["seq_max_coverage_rate"], # Float, range(0, 1)
            "best_seq_len": data["best_seq_len"], # Int
            "scene_name": data["scene_name"], # String
            "gt_pts": np.asarray(data["gt_pts"], dtype=np.float32), # Ndarray(N x 3)
            "scene_path": os.path.join(self.real_root_dir, data["scene_name"]), # String
            "O_to_L_pose": np.asarray(data["O_to_L_pose"], dtype=np.float32),
        }
        return data_item
    def __len__(self):
        return len(self.item_list)
 # -------------- Debug ---------------- #
 if __name__ == "__main__":
    import torch
    seed = 0
    torch.manual_seed(seed)
    np.random.seed(seed)
    '''
    OmniObject3d_test:
    root_dir: "H:\\AI\\Datasets\\packed_test_data"
    model_dir: "H:\\AI\\Datasets\\scaled_object_meshes"
    source: seq_reconstruction_dataset
    split_file: "H:\\AI\\Datasets\\data_list\\OmniObject3d_test.txt"
    type: test
    filter_degree: 75
    eval_list:
      - pose_diff
      - coverage_rate_increase
    ratio: 0.1
    batch_size: 1
    num_workers: 12
    pts_num: 8192
    load_from_preprocess: True
    '''
    config = {
        "root_dir": "H:\\AI\\Datasets\\packed_test_data",
        "source": "seq_reconstruction_dataset",
        "split_file": "H:\\AI\\Datasets\\data_list\\OmniObject3d_test.txt",
        "load_from_preprocess": True,
        "ratio": 1,
        "filter_degree": 75,
        "num_workers": 0,
        "pts_num": 8192,
        "type": "test",
    }
    ds = SeqReconstructionDataset(config)
    print(len(ds))
    print(ds.__getitem__(10))
--- a/preprocess/clean_preprocessed_data.py
+++ b/preprocess/clean_preprocessed_data.py
@@ -0,0 +1,43 @@
 import os
 import shutil
 def clean_scene_data(root, scene):
    # 清理目标点云数据
    pts_dir = os.path.join(root, scene, "pts")
    if os.path.exists(pts_dir):
        shutil.rmtree(pts_dir)
        print(f"已删除 {pts_dir}")
    # 清理法线数据
    nrm_dir = os.path.join(root, scene, "nrm")
    if os.path.exists(nrm_dir):
        shutil.rmtree(nrm_dir)
        print(f"已删除 {nrm_dir}")
    # 清理扫描点索引数据
    scan_points_indices_dir = os.path.join(root, scene, "scan_points_indices")
    if os.path.exists(scan_points_indices_dir):
        shutil.rmtree(scan_points_indices_dir)
        print(f"已删除 {scan_points_indices_dir}")
    # 删除扫描点数据文件
    scan_points_file = os.path.join(root, scene, "scan_points.txt")
    if os.path.exists(scan_points_file):
        os.remove(scan_points_file)
        print(f"已删除 {scan_points_file}")
 def clean_all_scenes(root, scene_list):
    for idx, scene in enumerate(scene_list):
        print(f"正在清理场景 {scene} ({idx+1}/{len(scene_list)})")
        clean_scene_data(root, scene)
 if __name__ == "__main__":
    root = r"c:\Document\Local Project\nbv_rec\nbv_reconstruction\temp"
    scene_list = os.listdir(root)
    from_idx = 0
    to_idx = len(scene_list)
    print(f"正在清理场景 {scene_list[from_idx:to_idx]}")
    clean_all_scenes(root, scene_list[from_idx:to_idx])
    print("清理完成")
--- a/preprocess/pack_preprocessed_data.py
+++ b/preprocess/pack_preprocessed_data.py
@@ -0,0 +1,48 @@
 import os
 import shutil
 def pack_scene_data(root, scene, output_dir):
    scene_dir = os.path.join(output_dir, scene)
    if not os.path.exists(scene_dir):
        os.makedirs(scene_dir)
    pts_dir = os.path.join(root, scene, "pts")
    if os.path.exists(pts_dir):
        shutil.move(pts_dir, os.path.join(scene_dir, "pts"))
    scan_points_indices_dir = os.path.join(root, scene, "scan_points_indices")
    if os.path.exists(scan_points_indices_dir):
        shutil.move(scan_points_indices_dir, os.path.join(scene_dir, "scan_points_indices")) 
    scan_points_file = os.path.join(root, scene, "scan_points.txt")
    if os.path.exists(scan_points_file):
        shutil.move(scan_points_file, os.path.join(scene_dir, "scan_points.txt"))
    model_pts_nrm_file = os.path.join(root, scene, "points_and_normals.txt")
    if os.path.exists(model_pts_nrm_file):
        shutil.move(model_pts_nrm_file, os.path.join(scene_dir, "points_and_normals.txt"))
    camera_dir = os.path.join(root, scene, "camera_params")
    if os.path.exists(camera_dir):
        shutil.move(camera_dir, os.path.join(scene_dir, "camera_params"))
    scene_info_file = os.path.join(root, scene, "scene_info.json")
    if os.path.exists(scene_info_file):
        shutil.move(scene_info_file, os.path.join(scene_dir, "scene_info.json"))
 def pack_all_scenes(root, scene_list, output_dir):
    for idx, scene in enumerate(scene_list):
        print(f"正在打包场景 {scene} ({idx+1}/{len(scene_list)})")
        pack_scene_data(root, scene, output_dir)
 if __name__ == "__main__":
    root = r"H:\AI\Datasets\nbv_rec_part2"
    output_dir = r"H:\AI\Datasets\scene_info_part2"
    scene_list = os.listdir(root)
    from_idx = 0
    to_idx = len(scene_list)
    print(f"正在打包场景 {scene_list[from_idx:to_idx]}")
    pack_all_scenes(root, scene_list[from_idx:to_idx], output_dir)
    print("打包完成")
--- a/preprocess/pack_upload_data.py
+++ b/preprocess/pack_upload_data.py
@@ -0,0 +1,41 @@
 import os
 import shutil
 def pack_scene_data(root, scene, output_dir):
    scene_dir = os.path.join(output_dir, scene)
    if not os.path.exists(scene_dir):
        os.makedirs(scene_dir)
    pts_dir = os.path.join(root, scene, "pts")
    if os.path.exists(pts_dir):
        shutil.move(pts_dir, os.path.join(scene_dir, "pts"))
    camera_dir = os.path.join(root, scene, "camera_params")
    if os.path.exists(camera_dir):
        shutil.move(camera_dir, os.path.join(scene_dir, "camera_params"))
    scene_info_file = os.path.join(root, scene, "scene_info.json")
    if os.path.exists(scene_info_file):
        shutil.move(scene_info_file, os.path.join(scene_dir, "scene_info.json"))
    label_dir = os.path.join(root, scene, "label")
    if os.path.exists(label_dir):
        shutil.move(label_dir, os.path.join(scene_dir, "label"))
 def pack_all_scenes(root, scene_list, output_dir):
    for idx, scene in enumerate(scene_list):
        print(f"packing {scene} ({idx+1}/{len(scene_list)})")
        pack_scene_data(root, scene, output_dir)
 if __name__ == "__main__":
    root = r"/media/hofee/repository/data_part_1"
    output_dir = r"/media/hofee/repository/upload_part1"
    scene_list = os.listdir(root)
    from_idx = 0
    to_idx = len(scene_list)
    print(f"packing {scene_list[from_idx:to_idx]}")
    pack_all_scenes(root, scene_list[from_idx:to_idx], output_dir)
    print("packing done")
--- a/preprocess/preprocessor.py
+++ b/preprocess/preprocessor.py
@@ -9,8 +9,6 @@ from utils.reconstruction import ReconstructionUtil
 from utils.data_load import DataLoadUtil
 from utils.pts import PtsUtil
 # scan shoe 536
 def save_np_pts(path, pts: np.ndarray, file_type="txt"):
    if file_type == "txt":
        np.savetxt(path, pts)
@@ -23,6 +21,12 @@ def save_target_points(root, scene, frame_idx, target_points: np.ndarray, file_t
    if not os.path.exists(os.path.join(root,scene, "pts")):
        os.makedirs(os.path.join(root,scene, "pts"))
    save_np_pts(pts_path, target_points, file_type)
 def save_target_normals(root, scene, frame_idx, target_normals: np.ndarray, file_type="txt"):
    pts_path = os.path.join(root,scene, "nrm", f"{frame_idx}.{file_type}")
    if not os.path.exists(os.path.join(root,scene, "nrm")):
        os.makedirs(os.path.join(root,scene, "nrm"))
    save_np_pts(pts_path, target_normals, file_type)
 def save_scan_points_indices(root, scene, frame_idx, scan_points_indices: np.ndarray, file_type="txt"):
    indices_path = os.path.join(root,scene, "scan_points_indices", f"{frame_idx}.{file_type}")
@@ -87,8 +91,8 @@ def get_scan_points_indices(scan_points, mask, display_table_mask_label, cam_int
 def save_scene_data(root, scene, scene_idx=0, scene_total=1,file_type="txt"):
    ''' configuration '''
-    target_mask_label = (0, 255, 0, 255)
+    target_mask_label = (0, 255, 0)
-    display_table_mask_label=(0, 0, 255, 255)
+    display_table_mask_label=(0, 0, 255)
    random_downsample_N = 32768
    voxel_size=0.003
    filter_degree = 75
@@ -137,7 +141,7 @@ def save_scene_data(root, scene, scene_idx=0, scene_total=1,file_type="txt"):
            has_points = target_points.shape[0] > 0
        if has_points:
-            target_points = PtsUtil.filter_points(
+            target_points, target_normals = PtsUtil.filter_points(
                target_points, sampled_target_normal_L, cam_info["cam_to_world"], theta_limit = filter_degree, z_range=(min_z, max_z)
                )
@@ -149,8 +153,10 @@ def save_scene_data(root, scene, scene_idx=0, scene_total=1,file_type="txt"):
        if not has_points:
            target_points = np.zeros((0, 3))
            target_normals = np.zeros((0, 3))
        save_target_points(root, scene, frame_id, target_points, file_type=file_type)
        save_target_normals(root, scene, frame_id, target_normals, file_type=file_type)
        save_scan_points_indices(root, scene, frame_id, scan_points_indices, file_type=file_type)
    save_scan_points(root, scene, scan_points) # The "done" flag of scene preprocess
@@ -158,17 +164,10 @@ def save_scene_data(root, scene, scene_idx=0, scene_total=1,file_type="txt"):
 if __name__ == "__main__":
    #root = "/media/hofee/repository/new_data_with_normal"
-    root = r"C:\\Document\\Local Project\\nbv_rec\\nbv_reconstruction\\temp"
+    root = "/media/hofee/data/data/new_testset"
    # list_path = r"/media/hofee/repository/full_list.txt"
    # scene_list = []
    # with open(list_path, "r") as f:
    #     for line in f:
    #         scene_list.append(line.strip())
    scene_list = os.listdir(root)
    from_idx = 0 # 1000
-    to_idx = 1 # 1500
+    to_idx = len(scene_list) # 1500
    print(scene_list)
    cnt = 0
@@ -176,7 +175,15 @@ if __name__ == "__main__":
    total = to_idx - from_idx
    for scene in scene_list[from_idx:to_idx]:
        start = time.time()
-        save_scene_data(root, scene, cnt, total, file_type="npy")
+        if os.path.exists(os.path.join(root, scene, "scan_points.txt")):
            print(f"Scene {scene} has been processed")
            cnt+=1
            continue
        try:
            save_scene_data(root, scene, cnt, total, file_type="npy")
        except Exception as e:
            print(f"Error occurred when processing scene {scene}")
            print(e)
        cnt+=1
        end = time.time()
        print(f"Time cost: {end-start}")
--- a/runners/inference_server.py
+++ b/runners/inference_server.py
@@ -0,0 +1,119 @@
 import os
 import json
 import torch
 import numpy as np
 from flask import Flask, request, jsonify
 import PytorchBoot.namespace as namespace
 import PytorchBoot.stereotype as stereotype
 from PytorchBoot.factory import ComponentFactory
 from PytorchBoot.runners.runner import Runner
 from PytorchBoot.utils import Log
 from utils.pts import PtsUtil
@stereotype.runner("inferencer_server")
 class InferencerServer(Runner):
    def __init__(self, config_path):
        super().__init__(config_path)
        ''' Web Server '''
        self.app = Flask(__name__)
        ''' Pipeline '''
        self.pipeline_name = self.config[namespace.Stereotype.PIPELINE]
        self.pipeline:torch.nn.Module = ComponentFactory.create(namespace.Stereotype.PIPELINE, self.pipeline_name)
        self.pipeline = self.pipeline.to(self.device)
        self.pts_num = 8192
        ''' Experiment '''
        self.load_experiment("inferencer_server")
    def get_input_data(self, data):
        input_data = {}
        scanned_pts = data["scanned_pts"]
        scanned_n_to_world_pose_9d = data["scanned_n_to_world_pose_9d"]
        combined_scanned_views_pts = np.concatenate(scanned_pts, axis=0)
        fps_downsampled_combined_scanned_pts, fps_idx = PtsUtil.fps_downsample_point_cloud(
            combined_scanned_views_pts, self.pts_num, require_idx=True
        )
        # combined_scanned_views_pts_mask = np.zeros(len(scanned_pts), dtype=np.uint8)
        # start_idx = 0
        # for i in range(len(scanned_pts)):
        #     end_idx = start_idx + len(scanned_pts[i])
        #     combined_scanned_views_pts_mask[start_idx:end_idx] = i
        #     start_idx = end_idx
        # fps_downsampled_combined_scanned_pts_mask = combined_scanned_views_pts_mask[fps_idx]
        input_data["scanned_pts"] = scanned_pts
        # input_data["scanned_pts_mask"] = np.asarray(fps_downsampled_combined_scanned_pts_mask, dtype=np.uint8)
        input_data["scanned_n_to_world_pose_9d"] = np.asarray(scanned_n_to_world_pose_9d, dtype=np.float32)
        input_data["combined_scanned_pts"] = np.asarray(fps_downsampled_combined_scanned_pts, dtype=np.float32)
        return input_data
    def get_result(self, output_data):
        pred_pose_9d = output_data["pred_pose_9d"]
        result = {
            "pred_pose_9d": pred_pose_9d.tolist()
        }
        return result
    def collate_input(self, input_data):
        collated_input_data = {}
        collated_input_data["scanned_pts"] = [torch.tensor(input_data["scanned_pts"], dtype=torch.float32, device=self.device)]
        collated_input_data["scanned_n_to_world_pose_9d"] = [torch.tensor(input_data["scanned_n_to_world_pose_9d"], dtype=torch.float32, device=self.device)]
        collated_input_data["combined_scanned_pts"] = torch.tensor(input_data["combined_scanned_pts"], dtype=torch.float32, device=self.device).unsqueeze(0)
        return collated_input_data
    def run(self):
        Log.info("Loading from epoch {}.".format(self.current_epoch))
        @self.app.route("/inference", methods=["POST"])
        def inference():
            data = request.json
            input_data = self.get_input_data(data)
            collated_input_data = self.collate_input(input_data)
            output_data = self.pipeline.forward_test(collated_input_data)
            result = self.get_result(output_data)
            return jsonify(result)
        self.app.run(host="0.0.0.0", port=5000)
    def get_checkpoint_path(self, is_last=False):
        return os.path.join(self.experiment_path, namespace.Direcotry.CHECKPOINT_DIR_NAME,
                            "Epoch_{}.pth".format(
                                self.current_epoch if self.current_epoch != -1 and not is_last else "last"))
    def load_checkpoint(self, is_last=False):
        self.load(self.get_checkpoint_path(is_last))
        Log.success(f"Loaded checkpoint from {self.get_checkpoint_path(is_last)}")
        if is_last:
            checkpoint_root = os.path.join(self.experiment_path, namespace.Direcotry.CHECKPOINT_DIR_NAME)
            meta_path = os.path.join(checkpoint_root, "meta.json")
            if not os.path.exists(meta_path):
                raise FileNotFoundError(
                    "No checkpoint meta.json file in the experiment {}".format(self.experiments_config["name"]))
            file_path = os.path.join(checkpoint_root, "meta.json")
            with open(file_path, "r") as f:
                meta = json.load(f)
            self.current_epoch = meta["last_epoch"]
            self.current_iter = meta["last_iter"]
    def load_experiment(self, backup_name=None):
        super().load_experiment(backup_name)
        self.current_epoch = self.experiments_config["epoch"]
        self.load_checkpoint(is_last=(self.current_epoch == -1))
    def create_experiment(self, backup_name=None):
        super().create_experiment(backup_name)
    def load(self, path):
        state_dict = torch.load(path)
        self.pipeline.load_state_dict(state_dict)
--- a/runners/inferencer.py
+++ b/runners/inferencer.py
@@ -19,7 +19,7 @@ from PytorchBoot.dataset import BaseDataset
 from PytorchBoot.runners.runner import Runner
 from PytorchBoot.utils import Log
 from PytorchBoot.status import status_manager
-
+from utils.data_load import DataLoadUtil
@stereotype.runner("inferencer")
 class Inferencer(Runner):
    def __init__(self, config_path):
@@ -27,6 +27,7 @@ class Inferencer(Runner):
        self.script_path = ConfigManager.get(namespace.Stereotype.RUNNER, "blender_script_path")
        self.output_dir = ConfigManager.get(namespace.Stereotype.RUNNER, "output_dir")
        self.voxel_size = ConfigManager.get(namespace.Stereotype.RUNNER, "voxel_size")
        ''' Pipeline '''
        self.pipeline_name = self.config[namespace.Stereotype.PIPELINE]
        self.pipeline:torch.nn.Module = ComponentFactory.create(namespace.Stereotype.PIPELINE, self.pipeline_name)
@@ -34,7 +35,12 @@ class Inferencer(Runner):
        ''' Experiment '''
        self.load_experiment("nbv_evaluator")
-        self.stat_result = {}
+        self.stat_result_path = os.path.join(self.output_dir, "stat.json")
        if os.path.exists(self.stat_result_path):
            with open(self.stat_result_path, "r") as f:
                self.stat_result = json.load(f)
        else:
            self.stat_result = {}
        ''' Test '''
        self.test_config = ConfigManager.get(namespace.Stereotype.RUNNER, namespace.Mode.TEST)
@@ -65,59 +71,71 @@ class Inferencer(Runner):
            for dataset_idx, test_set in enumerate(self.test_set_list):
                status_manager.set_progress("inference", "inferencer", f"dataset", dataset_idx, len(self.test_set_list))
                test_set_name = test_set.get_name()
                test_loader = test_set.get_loader()
-                if test_loader.batch_size > 1:
+                total=int(len(test_set))
-                    Log.error("Batch size should be 1 for inference, found {} in {}".format(test_loader.batch_size, test_set_name), terminate=True)
+                for i in tqdm(range(total), desc=f"Processing {test_set_name}", ncols=100):
-
+                    data = test_set.__getitem__(i)
-                total=int(len(test_loader))
+                    scene_name = data["scene_name"]
-                loop = tqdm(enumerate(test_loader), total=total)
+                    if scene_name != "omniobject3d-book_004":
-                for i, data in loop:
+                        continue
                    inference_result_path = os.path.join(self.output_dir, test_set_name, f"{scene_name}.pkl")
                    if os.path.exists(inference_result_path):
                        Log.info(f"Inference result already exists for scene: {scene_name}")
                        continue
                    status_manager.set_progress("inference", "inferencer", f"Batch[{test_set_name}]", i+1, total)
                    test_set.process_batch(data, self.device)
                    output = self.predict_sequence(data)
-                    self.save_inference_result(test_set_name, data["scene_name"][0], output)
+                    self.save_inference_result(test_set_name, data["scene_name"], output)
            status_manager.set_progress("inference", "inferencer", f"dataset", len(self.test_set_list), len(self.test_set_list))
-    def predict_sequence(self, data, cr_increase_threshold=0, max_iter=50, max_retry=5):
+    def predict_sequence(self, data, cr_increase_threshold=0, overlap_area_threshold=25, scan_points_threshold=10, max_iter=50, max_retry = 5):
-        scene_name = data["scene_name"][0]
+        scene_name = data["scene_name"]
        Log.info(f"Processing scene: {scene_name}")
        status_manager.set_status("inference", "inferencer", "scene", scene_name)
        ''' data for rendering '''
-        scene_path = data["scene_path"][0]
+        scene_path = data["scene_path"]
-        O_to_L_pose = data["O_to_L_pose"][0]
+        O_to_L_pose = data["O_to_L_pose"]
-        voxel_threshold = data["voxel_threshold"][0]
+        voxel_threshold = self.voxel_size
-        filter_degree = data["filter_degree"][0]
+        filter_degree = 75
-        model_points_normals = data["model_points_normals"][0]
+        down_sampled_model_pts = data["gt_pts"]
-        model_pts = model_points_normals[:,:3]
+        
-        down_sampled_model_pts = PtsUtil.voxel_downsample_point_cloud(model_pts, voxel_threshold)
+        first_frame_to_world_9d = data["first_scanned_n_to_world_pose_9d"][0]
-        first_frame_to_world_9d = data["first_to_world_9d"][0]
+        first_frame_to_world = np.eye(4)
-        first_frame_to_world = torch.eye(4, device=first_frame_to_world_9d.device)
+        first_frame_to_world[:3,:3] = PoseUtil.rotation_6d_to_matrix_numpy(first_frame_to_world_9d[:6])
-        first_frame_to_world[:3,:3] = PoseUtil.rotation_6d_to_matrix_tensor_batch(first_frame_to_world_9d[:,:6])[0]
+        first_frame_to_world[:3,3] = first_frame_to_world_9d[6:]
        first_frame_to_world[:3,3] = first_frame_to_world_9d[0,6:]
        first_frame_to_world = first_frame_to_world.to(self.device)
        ''' data for inference '''
        input_data = {}
-        input_data["scanned_pts"] = [data["first_pts"][0].to(self.device)]
+        input_data["combined_scanned_pts"] = torch.tensor(data["first_scanned_pts"][0], dtype=torch.float32).to(self.device).unsqueeze(0)
-        input_data["scanned_n_to_world_pose_9d"] = [data["first_to_world_9d"][0].to(self.device)]
+        input_data["scanned_n_to_world_pose_9d"] = [torch.tensor(data["first_scanned_n_to_world_pose_9d"], dtype=torch.float32).to(self.device)]
        input_data["mode"] = namespace.Mode.TEST
-        input_data["combined_scanned_pts"] = data["combined_scanned_pts"]
+        input_pts_N = input_data["combined_scanned_pts"].shape[1]
        input_pts_N = input_data["scanned_pts"][0].shape[1]
-        first_frame_target_pts, _ = RenderUtil.render_pts(first_frame_to_world, scene_path, self.script_path, model_points_normals, voxel_threshold=voxel_threshold, filter_degree=filter_degree, nO_to_nL_pose=O_to_L_pose)
+        root = os.path.dirname(scene_path)
-        scanned_view_pts = [first_frame_target_pts]
+        
-        last_pred_cr = self.compute_coverage_rate(scanned_view_pts, None, down_sampled_model_pts, threshold=voxel_threshold)
+        display_table_info = DataLoadUtil.get_display_table_info(root, scene_name)
        radius = display_table_info["radius"]
        scan_points = np.asarray(ReconstructionUtil.generate_scan_points(display_table_top=0,display_table_radius=radius))
        first_frame_target_pts, first_frame_target_normals, first_frame_scan_points_indices = RenderUtil.render_pts(first_frame_to_world, scene_path, self.script_path, scan_points, voxel_threshold=voxel_threshold, filter_degree=filter_degree, nO_to_nL_pose=O_to_L_pose)
        scanned_view_pts = [first_frame_target_pts]
        history_indices = [first_frame_scan_points_indices]
        last_pred_cr, added_pts_num = self.compute_coverage_rate(scanned_view_pts, None, down_sampled_model_pts, threshold=voxel_threshold)
        retry_duplication_pose = []
        retry_no_pts_pose = []
        retry_overlap_pose = []
        retry = 0
        pred_cr_seq = [last_pred_cr]
        success = 0
        last_pts_num = PtsUtil.voxel_downsample_point_cloud(data["first_scanned_pts"][0], 0.002).shape[0]
        import time
        while len(pred_cr_seq) < max_iter and retry < max_retry:
-            
+            start_time = time.time()
            output = self.pipeline(input_data)
            end_time = time.time()
            print(f"Time taken for inference: {end_time - start_time} seconds")
            pred_pose_9d = output["pred_pose_9d"]
            pred_pose = torch.eye(4, device=pred_pose_9d.device)
@@ -125,7 +143,24 @@ class Inferencer(Runner):
            pred_pose[:3,3] = pred_pose_9d[0,6:]
            try:
-                new_target_pts_world, new_pts_world = RenderUtil.render_pts(pred_pose, scene_path, self.script_path, model_points_normals, voxel_threshold=voxel_threshold, filter_degree=filter_degree, nO_to_nL_pose=O_to_L_pose, require_full_scene=True)
+                start_time = time.time()
                new_target_pts, new_target_normals, new_scan_points_indices = RenderUtil.render_pts(pred_pose, scene_path, self.script_path, scan_points, voxel_threshold=voxel_threshold, filter_degree=filter_degree, nO_to_nL_pose=O_to_L_pose)
                #import ipdb; ipdb.set_trace()
                if not ReconstructionUtil.check_scan_points_overlap(history_indices, new_scan_points_indices, scan_points_threshold):
                    curr_overlap_area_threshold = overlap_area_threshold
                else:
                    curr_overlap_area_threshold = overlap_area_threshold * 0.5  
                downsampled_new_target_pts = PtsUtil.voxel_downsample_point_cloud(new_target_pts, voxel_threshold)
                overlap, _ = ReconstructionUtil.check_overlap(downsampled_new_target_pts, down_sampled_model_pts, overlap_area_threshold = curr_overlap_area_threshold, voxel_size=voxel_threshold, require_new_added_pts_num = True)
                if not overlap:
                    retry += 1
                    retry_overlap_pose.append(pred_pose.cpu().numpy().tolist())
                    continue
                history_indices.append(new_scan_points_indices)
                end_time = time.time()
                print(f"Time taken for rendering: {end_time - start_time} seconds")
            except Exception as e:
                Log.warning(f"Error in scene {scene_path}, {e}")
                print("current pose: ", pred_pose)
@@ -134,61 +169,42 @@ class Inferencer(Runner):
                retry += 1
                continue
-            
+            if new_target_pts.shape[0] == 0:
-            pred_cr = self.compute_coverage_rate(scanned_view_pts, new_target_pts_world, down_sampled_model_pts, threshold=voxel_threshold)
+                print("no pts in new target")
-            
+                retry_no_pts_pose.append(pred_pose.cpu().numpy().tolist())
            print(pred_cr, last_pred_cr, " max: ", data["max_coverage_rate"])
            if pred_cr >= data["max_coverage_rate"]:
                print("max coverage rate reached!")
            if pred_cr <= last_pred_cr + cr_increase_threshold:
                retry += 1
                retry_duplication_pose.append(pred_pose.cpu().numpy().tolist())
                continue
            start_time = time.time()
            pred_cr, _ = self.compute_coverage_rate(scanned_view_pts, new_target_pts, down_sampled_model_pts, threshold=voxel_threshold)
            end_time = time.time()
            print(f"Time taken for coverage rate computation: {end_time - start_time} seconds")
            print(pred_cr, last_pred_cr, " max: ", data["seq_max_coverage_rate"])
            if pred_cr >= data["seq_max_coverage_rate"] - 1e-3:
                print("max coverage rate reached!: ", pred_cr)
                success += 1
            retry = 0
            pred_cr_seq.append(pred_cr)
-            scanned_view_pts.append(new_target_pts_world)
+            scanned_view_pts.append(new_target_pts)
            down_sampled_new_pts_world = PtsUtil.random_downsample_point_cloud(new_pts_world, input_pts_N)
            new_pts_world_aug = np.hstack([down_sampled_new_pts_world, np.ones((down_sampled_new_pts_world.shape[0], 1))])
            new_pts = np.dot(np.linalg.inv(first_frame_to_world.cpu()), new_pts_world_aug.T).T[:,:3]
            new_pts_tensor = torch.tensor(new_pts, dtype=torch.float32).unsqueeze(0).to(self.device)
            input_data["scanned_pts"] = [torch.cat([input_data["scanned_pts"][0] , new_pts_tensor], dim=0)]
            input_data["scanned_n_to_world_pose_9d"] = [torch.cat([input_data["scanned_n_to_world_pose_9d"][0], pred_pose_9d], dim=0)]
-            combined_scanned_views_pts = np.concatenate(input_data["scanned_pts"][0].tolist(), axis=0)
+            
-            voxel_downsampled_combined_scanned_pts_np = PtsUtil.voxel_downsample_point_cloud(combined_scanned_views_pts, 0.002)
+            combined_scanned_pts = np.vstack(scanned_view_pts)
            voxel_downsampled_combined_scanned_pts_np = PtsUtil.voxel_downsample_point_cloud(combined_scanned_pts, 0.002)
            random_downsampled_combined_scanned_pts_np = PtsUtil.random_downsample_point_cloud(voxel_downsampled_combined_scanned_pts_np, input_pts_N)
            input_data["combined_scanned_pts"] = torch.tensor(random_downsampled_combined_scanned_pts_np, dtype=torch.float32).unsqueeze(0).to(self.device)
-            
+            if success > 3:
                break
            last_pred_cr = pred_cr
-
+            pts_num = voxel_downsampled_combined_scanned_pts_np.shape[0]
-        
+            if pts_num - last_pts_num < 10 and pred_cr < data["seq_max_coverage_rate"] - 1e-3:
-        input_data["scanned_pts"] = input_data["scanned_pts"][0].cpu().numpy().tolist()
+                retry += 1
-        input_data["scanned_n_to_world_pose_9d"] = input_data["scanned_n_to_world_pose_9d"][0].cpu().numpy().tolist()
+                retry_duplication_pose.append(pred_pose.cpu().numpy().tolist())
-        result = {
+                print("delta pts num < 10:", pts_num, last_pts_num)
-            "pred_pose_9d_seq": input_data["scanned_n_to_world_pose_9d"],
+            last_pts_num = pts_num
            "pts_seq": input_data["scanned_pts"],
            "target_pts_seq": scanned_view_pts,
            "coverage_rate_seq": pred_cr_seq,
            "max_coverage_rate": data["max_coverage_rate"][0],
            "pred_max_coverage_rate": max(pred_cr_seq),
            "scene_name": scene_name,
            "retry_no_pts_pose": retry_no_pts_pose,
            "retry_duplication_pose": retry_duplication_pose,
            "best_seq_len": data["best_seq_len"][0],
        }
        self.stat_result[scene_name] = {
            "max_coverage_rate": data["max_coverage_rate"][0],
            "success_rate": max(pred_cr_seq)/ data["max_coverage_rate"][0],
            "coverage_rate_seq": pred_cr_seq,
            "pred_max_coverage_rate": max(pred_cr_seq),
            "pred_seq_len": len(pred_cr_seq),
        }
        print('success rate: ', max(pred_cr_seq) / data["max_coverage_rate"][0])
        return result
    def compute_coverage_rate(self, scanned_view_pts, new_pts, model_pts, threshold=0.005):
        if new_pts is not None:
@@ -206,7 +222,7 @@ class Inferencer(Runner):
            os.makedirs(dataset_dir)
        output_path = os.path.join(dataset_dir, f"{scene_name}.pkl")
        pickle.dump(output, open(output_path, "wb"))
-        with open(os.path.join(dataset_dir, "stat.json"), "w") as f:
+        with open(self.stat_result_path, "w") as f:
            json.dump(self.stat_result, f)
--- a/runners/strategy_generator.py
+++ b/runners/strategy_generator.py
@@ -24,12 +24,15 @@ class StrategyGenerator(Runner):
        }
        self.overwrite = ConfigManager.get("runner", "generate", "overwrite")
        self.seq_num = ConfigManager.get("runner","generate","seq_num")
        self.overlap_area_threshold = ConfigManager.get("runner","generate","overlap_area_threshold")
        self.compute_with_normal = ConfigManager.get("runner","generate","compute_with_normal")
        self.scan_points_threshold = ConfigManager.get("runner","generate","scan_points_threshold")
    def run(self):
        dataset_name_list =  ConfigManager.get("runner", "generate", "dataset_list")
-        voxel_threshold, soft_overlap_threshold, hard_overlap_threshold = ConfigManager.get("runner","generate","voxel_threshold"), ConfigManager.get("runner","generate","soft_overlap_threshold"), ConfigManager.get("runner","generate","hard_overlap_threshold")
+        voxel_threshold = ConfigManager.get("runner","generate","voxel_threshold")
        for dataset_idx in range(len(dataset_name_list)):
            dataset_name = dataset_name_list[dataset_idx]
            status_manager.set_progress("generate_strategy", "strategy_generator", "dataset", dataset_idx, len(dataset_name_list))
@@ -51,7 +54,7 @@ class StrategyGenerator(Runner):
                    cnt += 1
                    continue
-                self.generate_sequence(root_dir, scene_name,voxel_threshold, soft_overlap_threshold, hard_overlap_threshold)
+                self.generate_sequence(root_dir, scene_name,voxel_threshold)
                cnt += 1
            status_manager.set_progress("generate_strategy", "strategy_generator", "scene", total, total)
        status_manager.set_progress("generate_strategy", "strategy_generator", "dataset", len(dataset_name_list), len(dataset_name_list))
@@ -64,28 +67,36 @@ class StrategyGenerator(Runner):
    def load_experiment(self, backup_name=None):
        super().load_experiment(backup_name)
-    def generate_sequence(self, root, scene_name, voxel_threshold, soft_overlap_threshold, hard_overlap_threshold):
+    def generate_sequence(self, root, scene_name, voxel_threshold):
        status_manager.set_status("generate_strategy", "strategy_generator", "scene", scene_name)
        frame_num = DataLoadUtil.get_scene_seq_length(root, scene_name)
        model_points_normals = DataLoadUtil.load_points_normals(root, scene_name)
        model_pts = model_points_normals[:,:3]
-        down_sampled_model_pts = PtsUtil.voxel_downsample_point_cloud(model_pts, voxel_threshold)
+        down_sampled_model_pts, idx = PtsUtil.voxel_downsample_point_cloud(model_pts, voxel_threshold, require_idx=True)
        down_sampled_model_nrm = model_points_normals[idx, 3:]
        pts_list = []
        nrm_list = []
        scan_points_indices_list = []
        non_zero_cnt = 0
        for frame_idx in range(frame_num):
            status_manager.set_progress("generate_strategy", "strategy_generator", "loading frame", frame_idx, frame_num)
            pts_path = os.path.join(root,scene_name, "pts", f"{frame_idx}.npy")
            nrm_path = os.path.join(root,scene_name, "nrm", f"{frame_idx}.npy")
            idx_path = os.path.join(root,scene_name, "scan_points_indices", f"{frame_idx}.npy")
            point_cloud = np.load(pts_path)
            sampled_point_cloud = PtsUtil.voxel_downsample_point_cloud(point_cloud, voxel_threshold)
            indices = np.load(idx_path) 
            pts_list.append(sampled_point_cloud)
            pts = np.load(pts_path)
            if self.compute_with_normal:
                if pts.shape[0] == 0:
                    nrm = np.zeros((0,3))
                else:
                    nrm = np.load(nrm_path)
                nrm_list.append(nrm)
            pts_list.append(pts)
            indices = np.load(idx_path) 
            scan_points_indices_list.append(indices)
-            if sampled_point_cloud.shape[0] > 0:
+            if pts.shape[0] > 0:
                non_zero_cnt += 1
        status_manager.set_progress("generate_strategy", "strategy_generator", "loading frame", frame_num, frame_num)
@@ -93,7 +104,7 @@ class StrategyGenerator(Runner):
        init_view_list = []
        idx = 0
        while len(init_view_list) < seq_num and idx < len(pts_list):
-            if pts_list[idx].shape[0] > 100:
+            if pts_list[idx].shape[0] > 50:
                init_view_list.append(idx)
            idx += 1
@@ -102,8 +113,13 @@ class StrategyGenerator(Runner):
        for init_view in init_view_list:
            status_manager.set_progress("generate_strategy", "strategy_generator", "computing sequence", seq_idx, len(init_view_list))
            start = time.time()
-            limited_useful_view, _, _ = ReconstructionUtil.compute_next_best_view_sequence_with_overlap(down_sampled_model_pts, pts_list, scan_points_indices_list = scan_points_indices_list,init_view=init_view, 
+
-                                                                                                        threshold=voxel_threshold, soft_overlap_threshold=soft_overlap_threshold, hard_overlap_threshold= hard_overlap_threshold, scan_points_threshold=10, status_info=self.status_info)
+            if not self.compute_with_normal:    
                limited_useful_view, _, _ = ReconstructionUtil.compute_next_best_view_sequence(down_sampled_model_pts, pts_list, scan_points_indices_list = scan_points_indices_list,init_view=init_view, 
                                                                                                            threshold=voxel_threshold, scan_points_threshold=self.scan_points_threshold, overlap_area_threshold=self.overlap_area_threshold, status_info=self.status_info)
            else:
                limited_useful_view, _, _ = ReconstructionUtil.compute_next_best_view_sequence_with_normal(down_sampled_model_pts, down_sampled_model_nrm, pts_list, nrm_list, scan_points_indices_list = scan_points_indices_list,init_view=init_view, 
                                                                                                            threshold=voxel_threshold, scan_points_threshold=self.scan_points_threshold, overlap_area_threshold=self.overlap_area_threshold, status_info=self.status_info)
            end = time.time()
            print(f"Time: {end-start}")
            data_pairs = self.generate_data_pairs(limited_useful_view)
--- a/runners/view_generator.py
+++ b/runners/view_generator.py
@@ -9,7 +9,7 @@ class ViewGenerator(Runner):
        self.config_path = config_path
    def run(self):
-        result = subprocess.run(['blender', '-b', '-P', '../blender/run_blender.py', '--', self.config_path])
+        result = subprocess.run(['/home/hofee/blender-4.0.2-linux-x64/blender', '-b', '-P', '../blender/run_blender.py', '--', self.config_path])
        print()
    def create_experiment(self, backup_name=None):
--- a/utils/data_load.py
+++ b/utils/data_load.py
@@ -22,8 +22,8 @@ class DataLoadUtil:
        float_channels = ['R', 'G', 'B']
        img_data = []
        for channel in float_channels:
-            channel_data = exr_file.channel(channel, Imath.PixelType(Imath.PixelType.FLOAT))
+            channel_data = exr_file.channel(channel)
-            img_data.append(np.frombuffer(channel_data, dtype=np.float32).reshape((height, width)))
+            img_data.append(np.frombuffer(channel_data, dtype=np.float16).reshape((height, width)))
        img = np.stack(img_data, axis=-1)
        return img
@@ -51,6 +51,8 @@ class DataLoadUtil:
    @staticmethod
    def get_label_num(root, scene_name):
        label_dir = os.path.join(root, scene_name, "label")
        if not os.path.exists(label_dir):
            return 0
        return len(os.listdir(label_dir))
    @staticmethod
@@ -132,8 +134,8 @@ class DataLoadUtil:
        if binocular and not left_only:
            def clean_mask(mask_image):
-                green = [0, 255, 0, 255]
+                green = [0, 255, 0]
-                red = [255, 0, 0, 255]
+                red = [255, 0, 0]
                threshold = 2
                mask_image = np.where(
                    np.abs(mask_image - green) <= threshold, green, mask_image
@@ -208,6 +210,17 @@ class DataLoadUtil:
        else:
            pts = np.load(npy_path)
        return pts
    @staticmethod
    def load_from_preprocessed_nrm(path, file_type="npy"):
        npy_path = os.path.join(
            os.path.dirname(path), "nrm", os.path.basename(path) + "." + file_type
        )
        if file_type == "txt":
            nrm = np.loadtxt(npy_path)
        else:
            nrm = np.load(npy_path)
        return nrm
    @staticmethod
    def cam_pose_transformation(cam_pose_before):
--- a/utils/pts.py
+++ b/utils/pts.py
@@ -5,10 +5,28 @@ import torch
 class PtsUtil:
    @staticmethod
-    def voxel_downsample_point_cloud(point_cloud, voxel_size=0.005):
+    def voxel_downsample_point_cloud(point_cloud, voxel_size=0.005, require_idx=False):
        voxel_indices = np.floor(point_cloud / voxel_size).astype(np.int32)
-        unique_voxels = np.unique(voxel_indices, axis=0, return_inverse=True)
+        if require_idx:
-        return unique_voxels[0]*voxel_size
+            _, inverse, counts = np.unique(voxel_indices, axis=0, return_inverse=True, return_counts=True)
            idx_sort = np.argsort(inverse)
            idx_unique = idx_sort[np.cumsum(counts)-counts]
            downsampled_points = point_cloud[idx_unique]
            return downsampled_points, idx_unique
        else:
            unique_voxels = np.unique(voxel_indices, axis=0, return_inverse=True)
            return unique_voxels[0]*voxel_size
    @staticmethod
    def voxel_downsample_point_cloud_random(point_cloud, voxel_size=0.005, require_idx=False):
        voxel_indices = np.floor(point_cloud / voxel_size).astype(np.int32)
        unique_voxels, inverse, counts = np.unique(voxel_indices, axis=0, return_inverse=True, return_counts=True)
        idx_sort = np.argsort(inverse)
        idx_unique = idx_sort[np.cumsum(counts)-counts]
        downsampled_points = point_cloud[idx_unique]
        if require_idx:
            return downsampled_points, inverse
        return downsampled_points
    @staticmethod
    def random_downsample_point_cloud(point_cloud, num_points, require_idx=False):
@@ -84,14 +102,14 @@ class PtsUtil:
        theta = np.arccos(cos_theta) * 180 / np.pi
        idx = theta < theta_limit
        filtered_sampled_points = points[idx]
-        
+        filtered_normals = normals[idx]
        """ filter with z range """
        points_cam = PtsUtil.transform_point_cloud(filtered_sampled_points, np.linalg.inv(cam_pose))
        idx = (points_cam[:, 2] > z_range[0]) & (points_cam[:, 2] < z_range[1])
        z_filtered_points = filtered_sampled_points[idx]
-        
+        z_filtered_normals = filtered_normals[idx]
-        return z_filtered_points[:, :3]
+        return z_filtered_points[:, :3], z_filtered_normals
    @staticmethod
    def point_to_hash(point, voxel_size):
--- a/utils/reconstruction.py
+++ b/utils/reconstruction.py
@@ -8,16 +8,23 @@ class ReconstructionUtil:
    def compute_coverage_rate(target_point_cloud, combined_point_cloud, threshold=0.01):
        kdtree = cKDTree(combined_point_cloud)
        distances, _ = kdtree.query(target_point_cloud)
-        covered_points_num = np.sum(distances < threshold)
+        covered_points_num = np.sum(distances < threshold*2)
        coverage_rate = covered_points_num / target_point_cloud.shape[0]
        return coverage_rate, covered_points_num
    @staticmethod
    def compute_coverage_rate_with_normal(target_point_cloud, combined_point_cloud, target_normal, combined_normal, threshold=0.01, normal_threshold=0.1):
        kdtree = cKDTree(combined_point_cloud)
        distances, indices = kdtree.query(target_point_cloud)
-        is_covered_by_distance = distances < threshold
+        is_covered_by_distance = distances < threshold*2
        normal_dots = np.einsum('ij,ij->i', target_normal, combined_normal[indices])
        is_covered_by_normal = normal_dots > normal_threshold
        pts_nrm_target = np.hstack([target_point_cloud, target_normal])
        np.savetxt("pts_nrm_target.txt",  pts_nrm_target)
        pts_nrm_combined = np.hstack([combined_point_cloud, combined_normal])
        np.savetxt("pts_nrm_combined.txt", pts_nrm_combined)
        import ipdb; ipdb.set_trace()
        covered_points_num = np.sum(is_covered_by_distance & is_covered_by_normal)
        coverage_rate = covered_points_num / target_point_cloud.shape[0]
@@ -25,15 +32,16 @@ class ReconstructionUtil:
    @staticmethod
-    def compute_overlap_rate(new_point_cloud, combined_point_cloud, threshold=0.01):
+    def check_overlap(new_point_cloud, combined_point_cloud, overlap_area_threshold=25, voxel_size=0.01, require_new_added_pts_num=False):
        kdtree = cKDTree(combined_point_cloud)
        distances, _ = kdtree.query(new_point_cloud)
-        overlapping_points = np.sum(distances < threshold)
+        overlapping_points_num = np.sum(distances < voxel_size*2)
-        if new_point_cloud.shape[0] == 0:
+        cm = 0.01
-            overlap_rate = 0
+        voxel_size_cm = voxel_size / cm
-        else:
+        overlap_area = overlapping_points_num * voxel_size_cm * voxel_size_cm
-            overlap_rate = overlapping_points / new_point_cloud.shape[0]
+        if require_new_added_pts_num:
-        return overlap_rate
+            return overlap_area > overlap_area_threshold, len(new_point_cloud)-np.sum(distances < voxel_size*1.2)
        return overlap_area > overlap_area_threshold
    @staticmethod
@@ -49,14 +57,14 @@ class ReconstructionUtil:
        return new_added_points
    @staticmethod
-    def compute_next_best_view_sequence_with_overlap(target_point_cloud, point_cloud_list, scan_points_indices_list, threshold=0.01, soft_overlap_threshold=0.5, hard_overlap_threshold=0.7, init_view = 0, scan_points_threshold=5, status_info=None):
+    def compute_next_best_view_sequence(target_point_cloud, point_cloud_list, scan_points_indices_list, threshold=0.01, overlap_area_threshold=25, init_view = 0, scan_points_threshold=5, status_info=None):
        selected_views = [init_view]
        combined_point_cloud = point_cloud_list[init_view]
        history_indices = [scan_points_indices_list[init_view]]
        max_rec_pts = np.vstack(point_cloud_list)
        downsampled_max_rec_pts = PtsUtil.voxel_downsample_point_cloud(max_rec_pts, threshold)
-        
+        combined_point_cloud = PtsUtil.voxel_downsample_point_cloud(combined_point_cloud, threshold)
        max_rec_pts_num = downsampled_max_rec_pts.shape[0]
        max_real_rec_pts_coverage, _ = ReconstructionUtil.compute_coverage_rate(target_point_cloud, downsampled_max_rec_pts, threshold)
@@ -69,6 +77,7 @@ class ReconstructionUtil:
        cnt_processed_view = 0
        remaining_views.remove(init_view)
        curr_rec_pts_num = combined_point_cloud.shape[0]
        drop_output_ratio = 0.4
        import time
        while remaining_views:
@@ -78,27 +87,23 @@ class ReconstructionUtil:
            best_covered_num = 0
            for view_index in remaining_views:
                if np.random.rand() < drop_output_ratio:
                    continue
                if point_cloud_list[view_index].shape[0] == 0:
                    continue
                if selected_views:
                    new_scan_points_indices = scan_points_indices_list[view_index]
                    if not ReconstructionUtil.check_scan_points_overlap(history_indices, new_scan_points_indices, scan_points_threshold):
-                        overlap_threshold = hard_overlap_threshold
+                        curr_overlap_area_threshold = overlap_area_threshold
                    else:
-                        overlap_threshold = soft_overlap_threshold
+                        curr_overlap_area_threshold = overlap_area_threshold * 0.5
-                    start = time.time()
+                        
-                    overlap_rate = ReconstructionUtil.compute_overlap_rate(point_cloud_list[view_index],combined_point_cloud, threshold)
+                    if not ReconstructionUtil.check_overlap(point_cloud_list[view_index], combined_point_cloud, overlap_area_threshold = curr_overlap_area_threshold, voxel_size=threshold):
                    end = time.time()
                    # print(f"overlap_rate Time: {end-start}")
                    if overlap_rate < overlap_threshold:
                        continue
                start = time.time()
                new_combined_point_cloud = np.vstack([combined_point_cloud, point_cloud_list[view_index]])
                new_downsampled_combined_point_cloud = PtsUtil.voxel_downsample_point_cloud(new_combined_point_cloud,threshold)
                new_coverage, new_covered_num = ReconstructionUtil.compute_coverage_rate(downsampled_max_rec_pts, new_downsampled_combined_point_cloud, threshold)
                end = time.time()   
                #print(f"compute_coverage_rate Time: {end-start}")
                coverage_increase = new_coverage - current_coverage
                if coverage_increase > best_coverage_increase:
                    best_coverage_increase = coverage_increase
@@ -107,6 +112,100 @@ class ReconstructionUtil:
                    best_combined_point_cloud = new_downsampled_combined_point_cloud
            if best_view is not None:
                if best_coverage_increase <=1e-3 or best_covered_num - current_covered_num <= 5:
                    break
                selected_views.append(best_view)
                best_rec_pts_num = best_combined_point_cloud.shape[0]
                print(f"Current rec pts num: {curr_rec_pts_num}, Best rec pts num: {best_rec_pts_num}, Best cover pts: {best_covered_num}, Max rec pts num: {max_rec_pts_num}")
                print(f"Current coverage: {current_coverage+best_coverage_increase}, Best coverage increase: {best_coverage_increase}, Max Real coverage: {max_real_rec_pts_coverage}")
                current_covered_num = best_covered_num
                curr_rec_pts_num = best_rec_pts_num
                combined_point_cloud = best_combined_point_cloud
                remaining_views.remove(best_view)
                history_indices.append(scan_points_indices_list[best_view])
                current_coverage += best_coverage_increase
                cnt_processed_view += 1
                if status_info is not None:
                    sm = status_info["status_manager"]
                    app_name = status_info["app_name"]
                    runner_name = status_info["runner_name"]
                    sm.set_status(app_name, runner_name, "current coverage", current_coverage)
                    sm.set_progress(app_name, runner_name, "processed view", cnt_processed_view, len(point_cloud_list))
                view_sequence.append((best_view, current_coverage))
            else:
                break
        if status_info is not None:
            sm = status_info["status_manager"]
            app_name = status_info["app_name"]
            runner_name = status_info["runner_name"]
            sm.set_progress(app_name, runner_name, "processed view", len(point_cloud_list), len(point_cloud_list))
        return view_sequence, remaining_views, combined_point_cloud
    @staticmethod
    def compute_next_best_view_sequence_with_normal(target_point_cloud, target_normal, point_cloud_list, normal_list, scan_points_indices_list, threshold=0.01, overlap_area_threshold=25, init_view = 0, scan_points_threshold=5, status_info=None):
        selected_views = [init_view]
        combined_point_cloud = point_cloud_list[init_view]
        combined_normal = normal_list[init_view]
        history_indices = [scan_points_indices_list[init_view]]
        max_rec_pts = np.vstack(point_cloud_list)
        max_rec_nrm = np.vstack(normal_list)
        downsampled_max_rec_pts, idx = PtsUtil.voxel_downsample_point_cloud(max_rec_pts, threshold, require_idx=True)
        downsampled_max_rec_nrm = max_rec_nrm[idx]  
        max_rec_pts_num = downsampled_max_rec_pts.shape[0]
        try:
            max_real_rec_pts_coverage, _ = ReconstructionUtil.compute_coverage_rate_with_normal(target_point_cloud, downsampled_max_rec_pts, target_normal, downsampled_max_rec_nrm, threshold)
        except:
            import ipdb; ipdb.set_trace()
        new_coverage, new_covered_num = ReconstructionUtil.compute_coverage_rate_with_normal(downsampled_max_rec_pts, combined_point_cloud, downsampled_max_rec_nrm, combined_normal, threshold)
        current_coverage = new_coverage
        current_covered_num = new_covered_num
        remaining_views = list(range(len(point_cloud_list)))
        view_sequence = [(init_view, current_coverage)]
        cnt_processed_view = 0
        remaining_views.remove(init_view)
        curr_rec_pts_num = combined_point_cloud.shape[0]
        while remaining_views:
            best_view = None
            best_coverage_increase = -1
            best_combined_point_cloud = None
            best_combined_normal = None
            best_covered_num = 0
            for view_index in remaining_views:
                if point_cloud_list[view_index].shape[0] == 0:
                    continue
                if selected_views:
                    new_scan_points_indices = scan_points_indices_list[view_index]
                    if not ReconstructionUtil.check_scan_points_overlap(history_indices, new_scan_points_indices, scan_points_threshold):
                        curr_overlap_area_threshold = overlap_area_threshold
                    else:
                        curr_overlap_area_threshold = overlap_area_threshold * 0.5
                    if not ReconstructionUtil.check_overlap(point_cloud_list[view_index], combined_point_cloud, overlap_area_threshold = curr_overlap_area_threshold, voxel_size=threshold):
                        continue
                new_combined_point_cloud = np.vstack([combined_point_cloud, point_cloud_list[view_index]])
                new_combined_normal = np.vstack([combined_normal, normal_list[view_index]])
                new_downsampled_combined_point_cloud, idx = PtsUtil.voxel_downsample_point_cloud(new_combined_point_cloud,threshold, require_idx=True)
                new_downsampled_combined_normal = new_combined_normal[idx]
                new_coverage, new_covered_num = ReconstructionUtil.compute_coverage_rate_with_normal(downsampled_max_rec_pts, new_downsampled_combined_point_cloud, downsampled_max_rec_nrm, new_downsampled_combined_normal, threshold)
                coverage_increase = new_coverage - current_coverage
                if coverage_increase > best_coverage_increase:
                    best_coverage_increase = coverage_increase
                    best_view = view_index
                    best_covered_num = new_covered_num
                    best_combined_point_cloud = new_downsampled_combined_point_cloud
                    best_combined_normal = new_downsampled_combined_normal
            if best_view is not None:
                if best_coverage_increase <=1e-3 or best_covered_num - current_covered_num <= 5:
                    break
@@ -118,6 +217,7 @@ class ReconstructionUtil:
                current_covered_num = best_covered_num
                curr_rec_pts_num = best_rec_pts_num
                combined_point_cloud = best_combined_point_cloud
                combined_normal = best_combined_normal
                remaining_views.remove(best_view)
                history_indices.append(scan_points_indices_list[best_view])
                current_coverage += best_coverage_increase
--- a/utils/render.py
+++ b/utils/render.py
@@ -1,16 +1,75 @@
 import os
 import json
 import time
 import subprocess
 import tempfile
 import shutil
 import numpy as np
 from utils.data_load import DataLoadUtil
 from utils.reconstruction import ReconstructionUtil
 from utils.pts import PtsUtil
 class RenderUtil:
    target_mask_label = (0, 255, 0)
    display_table_mask_label = (0, 0, 255)
    random_downsample_N = 32768
    min_z = 0.2
    max_z = 0.5
    @staticmethod
-    def render_pts(cam_pose, scene_path, script_path, model_points_normals, voxel_threshold=0.005, filter_degree=75, nO_to_nL_pose=None, require_full_scene=False):
+    def get_world_points_and_normal(depth, mask, normal, cam_intrinsic, cam_extrinsic, random_downsample_N):
        z = depth[mask]
        i, j = np.nonzero(mask)
        x = (j - cam_intrinsic[0, 2]) * z / cam_intrinsic[0, 0]
        y = (i - cam_intrinsic[1, 2]) * z / cam_intrinsic[1, 1]
        points_camera = np.stack((x, y, z), axis=-1).reshape(-1, 3)
        normal_camera = normal[mask].reshape(-1, 3)
        sampled_target_points, idx = PtsUtil.random_downsample_point_cloud(
                    points_camera, random_downsample_N, require_idx=True
                )
        if len(sampled_target_points) == 0:
            return np.zeros((0, 3)), np.zeros((0, 3))
        sampled_normal_camera  = normal_camera[idx]
        points_camera_aug = np.concatenate((sampled_target_points, np.ones((sampled_target_points.shape[0], 1))), axis=-1)
        points_camera_world = np.dot(cam_extrinsic, points_camera_aug.T).T[:, :3]
        return points_camera_world, sampled_normal_camera
    @staticmethod
    def get_world_points(depth, mask, cam_intrinsic, cam_extrinsic, random_downsample_N):
        z = depth[mask]
        i, j = np.nonzero(mask)
        x = (j - cam_intrinsic[0, 2]) * z / cam_intrinsic[0, 0]
        y = (i - cam_intrinsic[1, 2]) * z / cam_intrinsic[1, 1]
        points_camera = np.stack((x, y, z), axis=-1).reshape(-1, 3)
        sampled_target_points = PtsUtil.random_downsample_point_cloud(
                    points_camera, random_downsample_N
                )
        points_camera_aug = np.concatenate((sampled_target_points, np.ones((sampled_target_points.shape[0], 1))), axis=-1)
        points_camera_world = np.dot(cam_extrinsic, points_camera_aug.T).T[:, :3]
        return points_camera_world
    @staticmethod
    def get_scan_points_indices(scan_points, mask, display_table_mask_label, cam_intrinsic, cam_extrinsic):
        scan_points_homogeneous = np.hstack((scan_points, np.ones((scan_points.shape[0], 1))))
        points_camera = np.dot(np.linalg.inv(cam_extrinsic), scan_points_homogeneous.T).T[:, :3]
        points_image_homogeneous = np.dot(cam_intrinsic, points_camera.T).T
        points_image_homogeneous /= points_image_homogeneous[:, 2:]
        pixel_x = points_image_homogeneous[:, 0].astype(int)
        pixel_y = points_image_homogeneous[:, 1].astype(int)
        h, w = mask.shape[:2]
        valid_indices = (pixel_x >= 0) & (pixel_x < w) & (pixel_y >= 0) & (pixel_y < h)
        mask_colors = mask[pixel_y[valid_indices], pixel_x[valid_indices]]
        selected_points_indices = np.where((mask_colors == display_table_mask_label).all(axis=-1))[0]
        selected_points_indices = np.where(valid_indices)[0][selected_points_indices]
        return selected_points_indices
    @staticmethod
    def render_pts(cam_pose, scene_path, script_path, scan_points, voxel_threshold=0.005, filter_degree=75, nO_to_nL_pose=None, require_full_scene=False):
        nO_to_world_pose = DataLoadUtil.get_real_cam_O_from_cam_L(cam_pose, nO_to_nL_pose, scene_path=scene_path)
@@ -25,28 +84,58 @@ class RenderUtil:
            params_data_path = os.path.join(temp_dir, "params.json")
            with open(params_data_path, 'w') as f:
                json.dump(params, f)
            start_time = time.time()
            result = subprocess.run([
-                'blender', '-b', '-P', script_path, '--', temp_dir
+                '/home/hofee/blender-4.0.2-linux-x64/blender', '-b', '-P', script_path, '--', temp_dir
            ], capture_output=True, text=True)
-            if result.returncode != 0:
+            end_time = time.time()
-                print("Blender script failed:")
+
-                print(result.stderr)
+            print(f"-- Time taken for blender: {end_time - start_time} seconds")
                return None
            path = os.path.join(temp_dir, "tmp")
-            point_cloud = DataLoadUtil.get_target_point_cloud_world_from_path(path, binocular=True)
+            cam_info = DataLoadUtil.load_cam_info(path, binocular=True)
-            cam_params = DataLoadUtil.load_cam_info(path, binocular=True)
+            depth_L, depth_R = DataLoadUtil.load_depth(
-            
+                    path, cam_info["near_plane"], 
-            ''' TODO: old code: filter_points api is changed, need to update the code '''
+                    cam_info["far_plane"], 
-            filtered_point_cloud = PtsUtil.filter_points(point_cloud, model_points_normals, cam_pose=cam_params["cam_to_world"], voxel_size=voxel_threshold, theta=filter_degree)
+                    binocular=True
-            full_scene_point_cloud = None
+                )
-            if require_full_scene:
+            start_time = time.time()
-                depth_L, depth_R = DataLoadUtil.load_depth(path, cam_params['near_plane'], cam_params['far_plane'], binocular=True)
+            mask_L, mask_R = DataLoadUtil.load_seg(path, binocular=True)
-                point_cloud_L = DataLoadUtil.get_point_cloud(depth_L, cam_params['cam_intrinsic'], cam_params['cam_to_world'])['points_world']
+            normal_L = DataLoadUtil.load_normal(path, binocular=True, left_only=True)
-                point_cloud_R = DataLoadUtil.get_point_cloud(depth_R, cam_params['cam_intrinsic'], cam_params['cam_to_world_R'])['points_world']
+            ''' target points '''
-            
+            mask_img_L = mask_L
-                point_cloud_L = PtsUtil.random_downsample_point_cloud(point_cloud_L, 65536)
+            mask_img_R = mask_R
-                point_cloud_R = PtsUtil.random_downsample_point_cloud(point_cloud_R, 65536)
+
-                full_scene_point_cloud = PtsUtil.get_overlapping_points(point_cloud_L, point_cloud_R)
+            target_mask_img_L = (mask_L == RenderUtil.target_mask_label).all(axis=-1)
            target_mask_img_R = (mask_R == RenderUtil.target_mask_label).all(axis=-1)
-            return filtered_point_cloud, full_scene_point_cloud
+            sampled_target_points_L, sampled_target_normal_L = RenderUtil.get_world_points_and_normal(depth_L,target_mask_img_L,normal_L, cam_info["cam_intrinsic"], cam_info["cam_to_world"],  RenderUtil.random_downsample_N)
            sampled_target_points_R = RenderUtil.get_world_points(depth_R, target_mask_img_R, cam_info["cam_intrinsic"], cam_info["cam_to_world_R"],  RenderUtil.random_downsample_N  )
            has_points = sampled_target_points_L.shape[0] > 0 and sampled_target_points_R.shape[0] > 0
            if has_points:
                target_points, overlap_idx = PtsUtil.get_overlapping_points(
                        sampled_target_points_L, sampled_target_points_R, voxel_threshold, require_idx=True
                    )
                sampled_target_normal_L = sampled_target_normal_L[overlap_idx]
            if has_points:
                has_points = target_points.shape[0] > 0
            if has_points:
                target_points, target_normals = PtsUtil.filter_points(
                    target_points, sampled_target_normal_L, cam_info["cam_to_world"], theta_limit = filter_degree, z_range=(RenderUtil.min_z, RenderUtil.max_z)
                    )
            scan_points_indices_L = RenderUtil.get_scan_points_indices(scan_points, mask_img_L, RenderUtil.display_table_mask_label, cam_info["cam_intrinsic"], cam_info["cam_to_world"]) 
            scan_points_indices_R = RenderUtil.get_scan_points_indices(scan_points, mask_img_R, RenderUtil.display_table_mask_label, cam_info["cam_intrinsic"], cam_info["cam_to_world_R"])
            scan_points_indices = np.intersect1d(scan_points_indices_L, scan_points_indices_R)
            if not has_points:
                target_points = np.zeros((0, 3))
                target_normals = np.zeros((0, 3))
            end_time = time.time()
            print(f"-- Time taken for processing: {end_time - start_time} seconds")
            #import ipdb; ipdb.set_trace()
            return target_points, target_normals, scan_points_indices
--- a/utils/vis.py
+++ b/utils/vis.py
@@ -47,6 +47,42 @@ class visualizeUtil:
        all_combined_pts = np.vstack(all_combined_pts)
        downsampled_all_pts = PtsUtil.voxel_downsample_point_cloud(all_combined_pts, 0.001)
        np.savetxt(os.path.join(output_dir, "all_combined_pts.txt"), downsampled_all_pts)
    @staticmethod
    def save_seq_cam_pos_and_cam_axis(root, scene, frame_idx_list, output_dir):
        all_cam_pos = []
        all_cam_axis = []
        for i in frame_idx_list:
            path = DataLoadUtil.get_path(root, scene, i)
            cam_info = DataLoadUtil.load_cam_info(path, binocular=True)
            cam_pose = cam_info["cam_to_world"]
            cam_pos = cam_pose[:3, 3]
            cam_axis = cam_pose[:3, 2] 
            num_samples = 10
            sample_points = [cam_pos + 0.02*t * cam_axis for t in range(num_samples)]
            sample_points = np.array(sample_points)
            all_cam_pos.append(cam_pos)
            all_cam_axis.append(sample_points)
        all_cam_pos = np.array(all_cam_pos)
        all_cam_axis = np.array(all_cam_axis).reshape(-1, 3)
        np.savetxt(os.path.join(output_dir, "seq_cam_pos.txt"), all_cam_pos)
        np.savetxt(os.path.join(output_dir, "seq_cam_axis.txt"), all_cam_axis)
    @staticmethod
    def save_seq_combined_pts(root, scene, frame_idx_list, output_dir):
        all_combined_pts = []   
        for i in frame_idx_list:
            path = DataLoadUtil.get_path(root, scene, i)
            pts = DataLoadUtil.load_from_preprocessed_pts(path,"npy")
            if pts.shape[0] == 0:
                continue
            all_combined_pts.append(pts)
        all_combined_pts = np.vstack(all_combined_pts)
        downsampled_all_pts = PtsUtil.voxel_downsample_point_cloud(all_combined_pts, 0.001)
        np.savetxt(os.path.join(output_dir, "seq_combined_pts.txt"), downsampled_all_pts)
    @staticmethod
    def save_target_mesh_at_world_space(
@@ -120,18 +156,37 @@ class visualizeUtil:
        sampled_visualized_normal = np.array(sampled_visualized_normal).reshape(-1, 3)
        np.savetxt(os.path.join(output_dir, "target_pts.txt"), sampled_target_points)
        np.savetxt(os.path.join(output_dir, "target_normal.txt"), sampled_visualized_normal)
    @staticmethod
    def save_pts_nrm(root, scene, frame_idx, output_dir, binocular=False):
        path = DataLoadUtil.get_path(root, scene, frame_idx)
        pts_world = DataLoadUtil.load_from_preprocessed_pts(path, "npy")
        nrm_camera = DataLoadUtil.load_from_preprocessed_nrm(path, "npy")
        cam_info = DataLoadUtil.load_cam_info(path, binocular=binocular)
        cam_to_world = cam_info["cam_to_world"]
        nrm_world = nrm_camera @ cam_to_world[:3, :3].T
        visualized_nrm = []
        num_samples = 10
        for i in range(len(pts_world)):
            for t in range(num_samples):
                visualized_nrm.append(pts_world[i] - 0.02 * t * nrm_world[i])
        visualized_nrm = np.array(visualized_nrm)
        np.savetxt(os.path.join(output_dir, "nrm.txt"), visualized_nrm)
        np.savetxt(os.path.join(output_dir, "pts.txt"), pts_world)
 # ------ Debug ------
 if __name__ == "__main__":
-    root = r"/home/yan20/nbv_rec/project/franka_control/temp"
+    root = r"C:\Document\Local Project\nbv_rec\nbv_reconstruction\temp"
    model_dir = r"H:\\AI\\Datasets\\scaled_object_box_meshes"
    scene = "box"
    output_dir = r"C:\Document\Local Project\nbv_rec\nbv_reconstruction\test"
    #visualizeUtil.save_all_cam_pos_and_cam_axis(root, scene, output_dir)
-    visualizeUtil.save_all_combined_pts(root, scene, output_dir)
+    # visualizeUtil.save_all_combined_pts(root, scene, output_dir)
-    visualizeUtil.save_target_mesh_at_world_space(root, model_dir, scene)
+    # visualizeUtil.save_seq_combined_pts(root, scene, [0, 121, 286, 175, 111,366,45,230,232,225,255,17,199,78,60], output_dir)
-    #visualizeUtil.save_points_and_normals(root, scene,"10", output_dir, binocular=True)
+    # visualizeUtil.save_seq_cam_pos_and_cam_axis(root, scene, [0, 121, 286, 175, 111,366,45,230,232,225,255,17,199,78,60], output_dir)
    # visualizeUtil.save_target_mesh_at_world_space(root, model_dir, scene)
    #visualizeUtil.save_points_and_normals(root, scene,"10", output_dir, binocular=True)
    visualizeUtil.save_pts_nrm(root, scene, "116", output_dir, binocular=True)
Author	SHA1	Message	Date
hofee	2cd811c1b7	upd	2024-11-07 19:33:18 +08:00
hofee	5bcd0fc6e3	upd	2024-11-04 23:49:12 +08:00
hofee	2b7243d1be	upd infernce	2024-11-04 17:17:54 +08:00
hofee	04d3a359e1	upd	2024-11-02 21:54:46 +00:00
hofee	287983277a	global: debug inference	2024-11-01 22:51:16 +00:00
hofee	982a3b9b60	global: inference debug	2024-11-01 21:58:44 +00:00
hofee	ecd4cfa806	global: debug inference	2024-11-01 15:47:11 +00:00
hofee	985a08d89c	global: upd inference	2024-11-01 08:43:13 +00:00
hofee	b221036e8b	global: upd	2024-10-31 16:02:26 +00:00
hofee	097712c0ea	global_only: ratio2	2024-10-30 15:58:32 +00:00
hofee	a954ed0998	global_only: ratio2	2024-10-30 15:49:59 +00:00
hofee	f5f8e4266f	global_only: ratio	2024-10-30 15:49:11 +00:00
hofee	8a05b7883d	global_only: train	2024-10-30 15:46:15 +00:00
hofee	e23697eb87	global_only: debug	2024-10-29 16:21:30 +00:00
hofee	2487039445	global_only: config	2024-10-29 12:18:51 +00:00
hofee	f533104e4a	global_only: pipeline	2024-10-29 12:04:54 +00:00
hofee	a21538c90a	global_only: dataset	2024-10-29 11:41:44 +00:00
hofee	872405e239	remove fps	2024-10-29 11:23:28 +00:00
hofee	b13e45bafc	solve merge	2024-10-29 08:14:43 +00:00
hofee	63a246c0c8	debug new training	2024-10-28 19:15:48 +00:00
hofee	9e39c6c6c9	solve merge	2024-10-28 18:27:16 +00:00
hofee	3c9e2c8d12	solve merge	2024-10-28 18:25:53 +00:00
hofee	a883a31968	solve merge	2024-10-28 17:03:03 +00:00
hofee	49bcf203a8	update	2024-10-28 16:48:34 +00:00
hofee	1c443e533d	add inference_server	2024-10-27 04:17:08 -05:00
hofee	3b9c966fd9	Merge branch 'master' of https://git.hofee.top/hofee/nbv_reconstruction	2024-10-26 03:24:18 -05:00
hofee	a41571e79c	update	2024-10-26 03:24:01 -05:00
hofee	bd27226f0f	solve merge	2024-10-25 14:40:26 +00:00
hofee	5c56dae24f	upd	2024-10-24 20:19:23 +08:00
hofee	ebb1ab3c61	udp	2024-10-24 20:18:47 +08:00
hofee	a1226eb294	update normal in computing strategy	2024-10-23 11:13:18 -05:00
hofee	9d0119549e	Merge branch 'master' of https://git.hofee.top/hofee/nbv_reconstruction	2024-10-23 02:59:18 -05:00
hofee	64891ef189	update normal strategy	2024-10-23 02:58:58 -05:00
hofee	75c70a9e59	fix no normal case	2024-10-23 14:54:53 +08:00
hofee	7e68259f6d	update clean preprocess	2024-10-23 01:03:40 -05:00
hofee	64b22fd0f4	solve merge	2024-10-23 13:59:12 +08:00
hofee	b18c1591b7	load 16bit float	2024-10-23 13:57:45 +08:00
hofee	c55a398b6d	update nrm	2024-10-23 00:47:28 -05:00
hofee	e25f7b3334	add save preprocessed normals	2024-10-23 00:42:18 -05:00
hofee	cd56d9ea58	update readme	2024-10-22 16:42:10 +08:00
hofee	d58c7980ed	update	2024-10-22 16:41:02 +08:00
hofee	0f61e1d64d	Merge branch 'master' of https://git.hofee.top/hofee/nbv_reconstruction	2024-10-21 07:33:40 +00:00
hofee	9ca0851bf7	debug pipeline	2024-10-21 07:33:32 +00:00