diff --git a/configs/local/inference_config.yaml b/configs/local/inference_config.yaml index 1d1ea81..ca8964e 100644 --- a/configs/local/inference_config.yaml +++ b/configs/local/inference_config.yaml @@ -6,71 +6,67 @@ runner: cuda_visible_devices: "0,1,2,3,4,5,6,7" experiment: - name: w_gf_wo_lf_full + name: overfit_ab_global_only root_dir: "experiments" - epoch: 1 # -1 stands for last epoch + epoch: -1 # -1 stands for last epoch test: dataset_list: - OmniObject3d_train - blender_script_path: "/media/hofee/data/project/python/nbv_reconstruction/blender/data_renderer.py" - output_dir: "/media/hofee/data/project/python/nbv_reconstruction/nbv_reconstruction/test/inference_global_full_on_testset" - pipeline: nbv_reconstruction_global_pts_pipeline + blender_script_path: "/data/hofee/project/nbv_rec/blender/data_renderer.py" + output_dir: "/data/hofee/data/inference_global_full_on_testset" + pipeline: nbv_reconstruction_pipeline + voxel_size: 0.003 dataset: OmniObject3d_train: - root_dir: "/media/hofee/repository/nbv_reconstruction_data_512" - model_dir: "/media/hofee/data/data/scaled_object_meshes" - source: seq_nbv_reconstruction_dataset - split_file: "/media/hofee/data/project/python/nbv_reconstruction/nbv_reconstruction/test/test_set_list.txt" + root_dir: "/data/hofee/data/new_full_data" + model_dir: "/data/hofee/data/scaled_object_meshes" + source: seq_reconstruction_dataset + split_file: "/data/hofee/data/sample.txt" type: test filter_degree: 75 ratio: 1 batch_size: 1 num_workers: 12 - pts_num: 4096 - load_from_preprocess: False + pts_num: 8192 + load_from_preprocess: True + + OmniObject3d_test: + root_dir: "/data/hofee/data/new_full_data" + model_dir: "/data/hofee/data/scaled_object_meshes" + source: seq_reconstruction_dataset + split_file: "/data/hofee/data/sample.txt" + type: test + filter_degree: 75 + eval_list: + - pose_diff + - coverage_rate_increase + ratio: 0.1 + batch_size: 1 + num_workers: 12 + pts_num: 8192 + load_from_preprocess: True pipeline: - nbv_reconstruction_local_pts_pipeline: + nbv_reconstruction_pipeline: modules: pts_encoder: pointnet_encoder seq_encoder: transformer_seq_encoder pose_encoder: pose_encoder view_finder: gf_view_finder eps: 1e-5 - global_scanned_feat: False - - nbv_reconstruction_global_pts_pipeline: - modules: - pts_encoder: pointnet_encoder - pose_seq_encoder: transformer_pose_seq_encoder - pose_encoder: pose_encoder - view_finder: gf_view_finder - eps: 1e-5 global_scanned_feat: True - - module: - pointnet_encoder: in_dim: 3 out_dim: 1024 global_feat: True feature_transform: False - transformer_seq_encoder: - pts_embed_dim: 1024 - pose_embed_dim: 256 - num_heads: 4 - ffn_dim: 256 - num_layers: 3 - output_dim: 2048 - - transformer_pose_seq_encoder: - pose_embed_dim: 256 + embed_dim: 256 num_heads: 4 ffn_dim: 256 num_layers: 3 @@ -86,7 +82,8 @@ module: sample_mode: ode sampling_steps: 500 sde_mode: ve - pose_encoder: pose_dim: 9 out_dim: 256 + pts_num_encoder: + out_dim: 64 \ No newline at end of file diff --git a/core/seq_dataset.py b/core/seq_dataset.py index 1e816df..598db14 100644 --- a/core/seq_dataset.py +++ b/core/seq_dataset.py @@ -103,6 +103,18 @@ class SeqReconstructionDataset(BaseDataset): except Exception as e: Log.error(f"Save cache failed: {e}") + def seq_combined_pts(self, scene, frame_idx_list): + all_combined_pts = [] + for i in frame_idx_list: + path = DataLoadUtil.get_path(self.root_dir, scene, i) + pts = DataLoadUtil.load_from_preprocessed_pts(path,"npy") + if pts.shape[0] == 0: + continue + all_combined_pts.append(pts) + all_combined_pts = np.vstack(all_combined_pts) + downsampled_all_pts = PtsUtil.voxel_downsample_point_cloud(all_combined_pts, 0.003) + return downsampled_all_pts + def __getitem__(self, index): data_item_info = self.datalist[index] max_coverage_rate = data_item_info["max_coverage_rate"] @@ -129,21 +141,27 @@ class SeqReconstructionDataset(BaseDataset): scanned_coverages_rate.append(coverage_rate) n_to_world_6d = PoseUtil.matrix_to_rotation_6d_numpy( np.asarray(n_to_world_pose[:3, :3]) - ) + ) + first_left_cam_pose = cam_info["cam_to_world"] + first_center_cam_pose = cam_info["cam_to_world_O"] + first_O_to_first_L_pose = np.dot(np.linalg.inv(first_left_cam_pose), first_center_cam_pose) n_to_world_trans = n_to_world_pose[:3, 3] n_to_world_9d = np.concatenate([n_to_world_6d, n_to_world_trans], axis=0) scanned_n_to_world_pose.append(n_to_world_9d) - # combined_scanned_views_pts = np.concatenate(scanned_views_pts, axis=0) - # voxel_downsampled_combined_scanned_pts_np = PtsUtil.voxel_downsample_point_cloud(combined_scanned_views_pts, 0.002) - # random_downsampled_combined_scanned_pts_np = PtsUtil.random_downsample_point_cloud(voxel_downsampled_combined_scanned_pts_np, self.pts_num) - + frame_list = [] + for i in range(DataLoadUtil.get_scene_seq_length(self.root_dir, scene_name)): + frame_list.append(i) + gt_pts = self.seq_combined_pts(scene_name, frame_list) data_item = { "first_scanned_pts": np.asarray(scanned_views_pts, dtype=np.float32), # Ndarray(S x Nv x 3) "first_scanned_coverage_rate": scanned_coverages_rate, # List(S): Float, range(0, 1) "first_scanned_n_to_world_pose_9d": np.asarray(scanned_n_to_world_pose, dtype=np.float32), # Ndarray(S x 9) "seq_max_coverage_rate": max_coverage_rate, # Float, range(0, 1) "scene_name": scene_name, # String + "gt_pts": gt_pts, # Ndarray(N x 3) + "scene_path": os.path.join(self.root_dir, scene_name), # String + "O_to_L_pose": first_O_to_first_L_pose, } return data_item diff --git a/runners/inferencer.py b/runners/inferencer.py index 60284d3..121cb98 100644 --- a/runners/inferencer.py +++ b/runners/inferencer.py @@ -27,6 +27,7 @@ class Inferencer(Runner): self.script_path = ConfigManager.get(namespace.Stereotype.RUNNER, "blender_script_path") self.output_dir = ConfigManager.get(namespace.Stereotype.RUNNER, "output_dir") + self.voxel_size = ConfigManager.get(namespace.Stereotype.RUNNER, "voxel_size") ''' Pipeline ''' self.pipeline_name = self.config[namespace.Stereotype.PIPELINE] self.pipeline:torch.nn.Module = ComponentFactory.create(namespace.Stereotype.PIPELINE, self.pipeline_name) @@ -65,16 +66,11 @@ class Inferencer(Runner): for dataset_idx, test_set in enumerate(self.test_set_list): status_manager.set_progress("inference", "inferencer", f"dataset", dataset_idx, len(self.test_set_list)) test_set_name = test_set.get_name() - test_loader = test_set.get_loader() - if test_loader.batch_size > 1: - Log.error("Batch size should be 1 for inference, found {} in {}".format(test_loader.batch_size, test_set_name), terminate=True) - - total=int(len(test_loader)) - loop = tqdm(enumerate(test_loader), total=total) - for i, data in loop: + total=int(len(test_set)) + for i in range(total): + data = test_set.__getitem__(i) status_manager.set_progress("inference", "inferencer", f"Batch[{test_set_name}]", i+1, total) - test_set.process_batch(data, self.device) output = self.predict_sequence(data) self.save_inference_result(test_set_name, data["scene_name"][0], output) @@ -88,26 +84,23 @@ class Inferencer(Runner): ''' data for rendering ''' scene_path = data["scene_path"][0] O_to_L_pose = data["O_to_L_pose"][0] - voxel_threshold = data["voxel_threshold"][0] - filter_degree = data["filter_degree"][0] - model_points_normals = data["model_points_normals"][0] - model_pts = model_points_normals[:,:3] - down_sampled_model_pts = PtsUtil.voxel_downsample_point_cloud(model_pts, voxel_threshold) - first_frame_to_world_9d = data["first_to_world_9d"][0] - first_frame_to_world = torch.eye(4, device=first_frame_to_world_9d.device) - first_frame_to_world[:3,:3] = PoseUtil.rotation_6d_to_matrix_tensor_batch(first_frame_to_world_9d[:,:6])[0] - first_frame_to_world[:3,3] = first_frame_to_world_9d[0,6:] - first_frame_to_world = first_frame_to_world.to(self.device) + voxel_threshold = self.voxel_size + filter_degree = 75 + down_sampled_model_pts = data["gt_pts"] + import ipdb; ipdb.set_trace() + first_frame_to_world_9d = data["first_scanned_n_to_world_pose_9d"][0] + first_frame_to_world = np.eye(4) + first_frame_to_world[:3,:3] = PoseUtil.rotation_6d_to_matrix_numpy(first_frame_to_world_9d[:6]) + first_frame_to_world[:3,3] = first_frame_to_world_9d[6:] ''' data for inference ''' input_data = {} - input_data["scanned_pts"] = [data["first_pts"][0].to(self.device)] - input_data["scanned_n_to_world_pose_9d"] = [data["first_to_world_9d"][0].to(self.device)] + input_data["combined_scanned_pts"] = torch.tensor(data["first_scanned_pts"][0], dtype=torch.float32).to(self.device) + input_data["scanned_n_to_world_pose_9d"] = [torch.tensor(data["first_scanned_n_to_world_pose_9d"], dtype=torch.float32).to(self.device)] input_data["mode"] = namespace.Mode.TEST - input_data["combined_scanned_pts"] = data["combined_scanned_pts"] - input_pts_N = input_data["scanned_pts"][0].shape[1] + input_pts_N = input_data["combined_scanned_pts"].shape[1] - first_frame_target_pts, _ = RenderUtil.render_pts(first_frame_to_world, scene_path, self.script_path, model_points_normals, voxel_threshold=voxel_threshold, filter_degree=filter_degree, nO_to_nL_pose=O_to_L_pose) + first_frame_target_pts, _ = RenderUtil.render_pts(first_frame_to_world, scene_path, self.script_path, down_sampled_model_pts, voxel_threshold=voxel_threshold, filter_degree=filter_degree, nO_to_nL_pose=O_to_L_pose) scanned_view_pts = [first_frame_target_pts] last_pred_cr = self.compute_coverage_rate(scanned_view_pts, None, down_sampled_model_pts, threshold=voxel_threshold) diff --git a/utils/render.py b/utils/render.py index e1193ab..f2e95b0 100644 --- a/utils/render.py +++ b/utils/render.py @@ -10,7 +10,7 @@ from utils.pts import PtsUtil class RenderUtil: @staticmethod - def render_pts(cam_pose, scene_path, script_path, model_points_normals, voxel_threshold=0.005, filter_degree=75, nO_to_nL_pose=None, require_full_scene=False): + def render_pts(cam_pose, scene_path, script_path, voxel_threshold=0.005, filter_degree=75, nO_to_nL_pose=None, require_full_scene=False): nO_to_world_pose = DataLoadUtil.get_real_cam_O_from_cam_L(cam_pose, nO_to_nL_pose, scene_path=scene_path) @@ -34,10 +34,10 @@ class RenderUtil: return None path = os.path.join(temp_dir, "tmp") point_cloud = DataLoadUtil.get_target_point_cloud_world_from_path(path, binocular=True) + normals = DataLoadUtil.get_target_normals_world_from_path(path, binocular=True) cam_params = DataLoadUtil.load_cam_info(path, binocular=True) - ''' TODO: old code: filter_points api is changed, need to update the code ''' - filtered_point_cloud = PtsUtil.filter_points(point_cloud, model_points_normals, cam_pose=cam_params["cam_to_world"], voxel_size=voxel_threshold, theta=filter_degree) + filtered_point_cloud = PtsUtil.filter_points(point_cloud, normals, cam_pose=cam_params["cam_to_world"], voxel_size=voxel_threshold, theta=filter_degree) full_scene_point_cloud = None if require_full_scene: depth_L, depth_R = DataLoadUtil.load_depth(path, cam_params['near_plane'], cam_params['far_plane'], binocular=True)