Compare commits

..

No commits in common. "85172552455c55f42ad8b56f6d77dab00602c52d" and "18333e68313907a0bd3da0a7cf9098a703ec0803" have entirely different histories.

7 changed files with 48 additions and 85 deletions

View File

@ -7,9 +7,9 @@ runner:
parallel: False parallel: False
experiment: experiment:
name: debug name: local_eval
root_dir: "experiments" root_dir: "experiments"
use_checkpoint: False use_checkpoint: True
epoch: 600 # -1 stands for last epoch epoch: 600 # -1 stands for last epoch
max_epochs: 5000 max_epochs: 5000
save_checkpoint_interval: 1 save_checkpoint_interval: 1
@ -40,7 +40,6 @@ dataset:
batch_size: 1 batch_size: 1
num_workers: 12 num_workers: 12
pts_num: 4096 pts_num: 4096
load_from_preprocess: True
OmniObject3d_test: OmniObject3d_test:
root_dir: "/media/hofee/data/project/python/nbv_reconstruction/sample_for_training/scenes" root_dir: "/media/hofee/data/project/python/nbv_reconstruction/sample_for_training/scenes"
@ -56,7 +55,6 @@ dataset:
batch_size: 1 batch_size: 1
num_workers: 12 num_workers: 12
pts_num: 4096 pts_num: 4096
load_from_preprocess: True
pipeline: pipeline:
nbv_reconstruction_pipeline: nbv_reconstruction_pipeline:

View File

@ -41,7 +41,6 @@ dataset:
batch_size: 160 batch_size: 160
num_workers: 16 num_workers: 16
pts_num: 4096 pts_num: 4096
load_from_preprocess: True
OmniObject3d_test: OmniObject3d_test:
root_dir: "../data/sample_for_training/scenes" root_dir: "../data/sample_for_training/scenes"
@ -57,7 +56,6 @@ dataset:
batch_size: 1 batch_size: 1
num_workers: 12 num_workers: 12
pts_num: 4096 pts_num: 4096
load_from_preprocess: True
pipeline: pipeline:
nbv_reconstruction_pipeline: nbv_reconstruction_pipeline:

View File

@ -7,7 +7,7 @@ from PytorchBoot.utils.log_util import Log
import torch import torch
import os import os
import sys import sys
sys.path.append(r"/media/hofee/data/project/python/nbv_reconstruction/nbv_reconstruction") sys.path.append(r"/home/data/hofee/project/nbv_rec/nbv_reconstruction")
from utils.data_load import DataLoadUtil from utils.data_load import DataLoadUtil
from utils.pose import PoseUtil from utils.pose import PoseUtil
@ -28,7 +28,6 @@ class NBVReconstructionDataset(BaseDataset):
self.pts_num = config["pts_num"] self.pts_num = config["pts_num"]
self.type = config["type"] self.type = config["type"]
self.cache = config.get("cache") self.cache = config.get("cache")
self.load_from_preprocess = config.get("load_from_preprocess", False)
if self.type == namespace.Mode.TEST: if self.type == namespace.Mode.TEST:
self.model_dir = config["model_dir"] self.model_dir = config["model_dir"]
self.filter_degree = config["filter_degree"] self.filter_degree = config["filter_degree"]
@ -112,28 +111,24 @@ class NBVReconstructionDataset(BaseDataset):
cam_info = DataLoadUtil.load_cam_info(view_path, binocular=True) cam_info = DataLoadUtil.load_cam_info(view_path, binocular=True)
n_to_world_pose = cam_info["cam_to_world"] n_to_world_pose = cam_info["cam_to_world"]
nR_to_world_pose = cam_info["cam_to_world_R"] nR_to_world_pose = cam_info["cam_to_world_R"]
if self.load_from_preprocess: cached_data = None
downsampled_target_point_cloud = DataLoadUtil.load_from_preprocessed_pts(view_path) if self.cache:
else: cached_data = self.load_from_cache(scene_name, frame_idx)
cached_data = None
if cached_data is None:
depth_L, depth_R = DataLoadUtil.load_depth(view_path, cam_info['near_plane'], cam_info['far_plane'], binocular=True)
point_cloud_L = DataLoadUtil.get_point_cloud(depth_L, cam_info['cam_intrinsic'], n_to_world_pose)['points_world']
point_cloud_R = DataLoadUtil.get_point_cloud(depth_R, cam_info['cam_intrinsic'], nR_to_world_pose)['points_world']
point_cloud_L = PtsUtil.random_downsample_point_cloud(point_cloud_L, 65536)
point_cloud_R = PtsUtil.random_downsample_point_cloud(point_cloud_R, 65536)
overlap_points = DataLoadUtil.get_overlapping_points(point_cloud_L, point_cloud_R)
downsampled_target_point_cloud = PtsUtil.random_downsample_point_cloud(overlap_points, self.pts_num)
if self.cache: if self.cache:
cached_data = self.load_from_cache(scene_name, frame_idx) self.save_to_cache(scene_name, frame_idx, downsampled_target_point_cloud)
else:
if cached_data is None: downsampled_target_point_cloud = cached_data
print("load depth")
depth_L, depth_R = DataLoadUtil.load_depth(view_path, cam_info['near_plane'], cam_info['far_plane'], binocular=True)
point_cloud_L = DataLoadUtil.get_point_cloud(depth_L, cam_info['cam_intrinsic'], n_to_world_pose)['points_world']
point_cloud_R = DataLoadUtil.get_point_cloud(depth_R, cam_info['cam_intrinsic'], nR_to_world_pose)['points_world']
point_cloud_L = PtsUtil.random_downsample_point_cloud(point_cloud_L, 65536)
point_cloud_R = PtsUtil.random_downsample_point_cloud(point_cloud_R, 65536)
overlap_points = DataLoadUtil.get_overlapping_points(point_cloud_L, point_cloud_R)
downsampled_target_point_cloud = PtsUtil.random_downsample_point_cloud(overlap_points, self.pts_num)
if self.cache:
self.save_to_cache(scene_name, frame_idx, downsampled_target_point_cloud)
else:
downsampled_target_point_cloud = cached_data
scanned_views_pts.append(downsampled_target_point_cloud) scanned_views_pts.append(downsampled_target_point_cloud)
scanned_coverages_rate.append(coverage_rate) scanned_coverages_rate.append(coverage_rate)
@ -210,11 +205,10 @@ if __name__ == "__main__":
torch.manual_seed(seed) torch.manual_seed(seed)
np.random.seed(seed) np.random.seed(seed)
config = { config = {
"root_dir": "/media/hofee/data/project/python/nbv_reconstruction/sample_for_training/preprocessed_scenes/", "root_dir": "../data/sample_for_training/scenes",
"model_dir": "/media/hofee/data/data/scaled_object_meshes", "model_dir": "../data/scaled_object_meshes",
"source": "nbv_reconstruction_dataset", "source": "nbv_reconstruction_dataset",
"split_file": "/media/hofee/data/project/python/nbv_reconstruction/sample_for_training/OmniObject3d_train.txt", "split_file": "../data/sample_for_training/OmniObject3d_train.txt",
"load_from_preprocess": True,
"ratio": 0.5, "ratio": 0.5,
"batch_size": 2, "batch_size": 2,
"filter_degree": 75, "filter_degree": 75,

View File

@ -46,12 +46,10 @@ class SeqNBVReconstructionDataset(BaseDataset):
best_seq = label_data["best_sequence"] best_seq = label_data["best_sequence"]
max_coverage_rate = label_data["max_coverage_rate"] max_coverage_rate = label_data["max_coverage_rate"]
first_frame = best_seq[0] first_frame = best_seq[0]
best_seq_len = len(best_seq)
datalist.append({ datalist.append({
"scene_name": scene_name, "scene_name": scene_name,
"first_frame": first_frame, "first_frame": first_frame,
"max_coverage_rate": max_coverage_rate, "max_coverage_rate": max_coverage_rate
"best_seq_len": best_seq_len,
}) })
return datalist[5:] return datalist[5:]
@ -100,7 +98,6 @@ class SeqNBVReconstructionDataset(BaseDataset):
"first_frame_coverage": first_frame_coverage, "first_frame_coverage": first_frame_coverage,
"scene_path": scene_path, "scene_path": scene_path,
"model_points_normals": model_points_normals, "model_points_normals": model_points_normals,
"best_seq_len": data_item_info["best_seq_len"],
} }
return data_item return data_item

View File

@ -75,11 +75,12 @@ class Inferencer(Runner):
status_manager.set_progress("inference", "inferencer", f"Batch[{test_set_name}]", i+1, total) status_manager.set_progress("inference", "inferencer", f"Batch[{test_set_name}]", i+1, total)
test_set.process_batch(data, self.device) test_set.process_batch(data, self.device)
output = self.predict_sequence(data) output = self.predict_sequence(data)
self.save_inference_result(test_set_name, data["scene_name"][0], output) self.save_inference_result(output, data)
status_manager.set_progress("inference", "inferencer", f"dataset", len(self.test_set_list), len(self.test_set_list)) status_manager.set_progress("inference", "inferencer", f"dataset", len(self.test_set_list), len(self.test_set_list))
def predict_sequence(self, data, cr_increase_threshold=0, max_iter=50, max_retry=5): def predict_sequence(self, data, cr_increase_threshold=0, max_iter=100):
pred_cr_seq = []
scene_name = data["scene_name"][0] scene_name = data["scene_name"][0]
Log.info(f"Processing scene: {scene_name}") Log.info(f"Processing scene: {scene_name}")
status_manager.set_status("inference", "inferencer", "scene", scene_name) status_manager.set_status("inference", "inferencer", "scene", scene_name)
@ -97,7 +98,7 @@ class Inferencer(Runner):
''' data for inference ''' ''' data for inference '''
input_data = {} input_data = {}
input_data["scanned_pts"] = [data["first_pts"][0].to(self.device)] input_data["scanned_pts"] = [data["first_pts"][0].to(self.device)]
input_data["scanned_n_to_world_pose_9d"] = [data["first_frame_to_world"][0].to(self.device)] input_data["scanned_n_to_world_pose_9d"] = [data["first_to_first_9d"][0].to(self.device)]
input_data["mode"] = namespace.Mode.TEST input_data["mode"] = namespace.Mode.TEST
input_pts_N = input_data["scanned_pts"][0].shape[1] input_pts_N = input_data["scanned_pts"][0].shape[1]
@ -106,11 +107,9 @@ class Inferencer(Runner):
scanned_view_pts = [first_frame_target_pts] scanned_view_pts = [first_frame_target_pts]
last_pred_cr = self.compute_coverage_rate(scanned_view_pts, None, down_sampled_model_pts, threshold=voxel_threshold) last_pred_cr = self.compute_coverage_rate(scanned_view_pts, None, down_sampled_model_pts, threshold=voxel_threshold)
retry_duplication_pose = []
retry_no_pts_pose = []
retry = 0 while len(pred_cr_seq) < max_iter:
pred_cr_seq = [last_pred_cr]
while len(pred_cr_seq) < max_iter and retry < max_retry:
output = self.pipeline(input_data) output = self.pipeline(input_data)
next_pose_9d = output["pred_pose_9d"] next_pose_9d = output["pred_pose_9d"]
@ -119,30 +118,21 @@ class Inferencer(Runner):
pred_pose[:3,:3] = PoseUtil.rotation_6d_to_matrix_tensor_batch(next_pose_9d[:,:6])[0] pred_pose[:3,:3] = PoseUtil.rotation_6d_to_matrix_tensor_batch(next_pose_9d[:,:6])[0]
pred_pose[:3,3] = next_pose_9d[0,6:] pred_pose[:3,3] = next_pose_9d[0,6:]
pred_n_to_world_pose_mat = torch.matmul(first_frame_to_world, pred_pose) pred_n_to_world_pose_mat = torch.matmul(first_frame_to_world, pred_pose)
try: try:
new_target_pts_world, new_pts_world = RenderUtil.render_pts(pred_n_to_world_pose_mat, scene_path, self.script_path, model_points_normals, voxel_threshold=voxel_threshold, filter_degree=filter_degree, nO_to_nL_pose=O_to_L_pose, require_full_scene=True) new_target_pts_world, new_pts_world = RenderUtil.render_pts(pred_n_to_world_pose_mat, scene_path, self.script_path, model_points_normals, voxel_threshold=voxel_threshold, filter_degree=filter_degree, nO_to_nL_pose=O_to_L_pose, require_full_scene=True)
except Exception as e: except Exception as e:
Log.warning(f"Error in scene {scene_path}, {e}") Log.warning(f"Error in scene {scene_path}, {e}")
print("current pose: ", pred_pose) print("current pose: ", pred_pose)
print("curr_pred_cr: ", last_pred_cr) print("curr_pred_cr: ", last_pred_cr)
retry_no_pts_pose.append(pred_n_to_world_pose_mat.cpu().numpy().tolist())
retry += 1
continue continue
pred_cr = self.compute_coverage_rate(scanned_view_pts, new_target_pts_world, down_sampled_model_pts, threshold=voxel_threshold) pred_cr = self.compute_coverage_rate(scanned_view_pts, new_target_pts_world, down_sampled_model_pts, threshold=voxel_threshold)
pred_cr_seq.append(pred_cr)
print(pred_cr, last_pred_cr, " max: ", data["max_coverage_rate"])
if pred_cr >= data["max_coverage_rate"]: if pred_cr >= data["max_coverage_rate"]:
break break
if pred_cr <= last_pred_cr + cr_increase_threshold: if pred_cr < last_pred_cr + cr_increase_threshold:
retry += 1 break
retry_duplication_pose.append(pred_n_to_world_pose_mat.cpu().numpy().tolist())
continue
retry = 0
pred_cr_seq.append(pred_cr)
scanned_view_pts.append(new_target_pts_world) scanned_view_pts.append(new_target_pts_world)
down_sampled_new_pts_world = PtsUtil.random_downsample_point_cloud(new_pts_world, input_pts_N) down_sampled_new_pts_world = PtsUtil.random_downsample_point_cloud(new_pts_world, input_pts_N)
new_pts_world_aug = np.hstack([down_sampled_new_pts_world, np.ones((down_sampled_new_pts_world.shape[0], 1))]) new_pts_world_aug = np.hstack([down_sampled_new_pts_world, np.ones((down_sampled_new_pts_world.shape[0], 1))])
@ -154,7 +144,10 @@ class Inferencer(Runner):
input_data["scanned_n_to_world_pose_9d"] = [torch.cat([input_data["scanned_n_to_world_pose_9d"][0], next_pose_9d], dim=0)] input_data["scanned_n_to_world_pose_9d"] = [torch.cat([input_data["scanned_n_to_world_pose_9d"][0], next_pose_9d], dim=0)]
last_pred_cr = pred_cr last_pred_cr = pred_cr
# ------ Debug Start ------
import ipdb;ipdb.set_trace()
# ------ Debug End ------
input_data["scanned_pts"] = input_data["scanned_pts"][0].cpu().numpy().tolist() input_data["scanned_pts"] = input_data["scanned_pts"][0].cpu().numpy().tolist()
input_data["scanned_n_to_world_pose_9d"] = input_data["scanned_n_to_world_pose_9d"][0].cpu().numpy().tolist() input_data["scanned_n_to_world_pose_9d"] = input_data["scanned_n_to_world_pose_9d"][0].cpu().numpy().tolist()
@ -163,12 +156,8 @@ class Inferencer(Runner):
"pts_seq": input_data["scanned_pts"], "pts_seq": input_data["scanned_pts"],
"target_pts_seq": scanned_view_pts, "target_pts_seq": scanned_view_pts,
"coverage_rate_seq": pred_cr_seq, "coverage_rate_seq": pred_cr_seq,
"max_coverage_rate": data["max_coverage_rate"][0], "max_coverage_rate": data["max_coverage_rate"],
"pred_max_coverage_rate": max(pred_cr_seq), "pred_max_coverage_rate": max(pred_cr_seq)
"scene_name": scene_name,
"retry_no_pts_pose": retry_no_pts_pose,
"retry_duplication_pose": retry_duplication_pose,
"best_seq_len": data["best_seq_len"][0],
} }
return result return result
@ -186,8 +175,7 @@ class Inferencer(Runner):
dataset_dir = os.path.join(self.output_dir, dataset_name) dataset_dir = os.path.join(self.output_dir, dataset_name)
if not os.path.exists(dataset_dir): if not os.path.exists(dataset_dir):
os.makedirs(dataset_dir) os.makedirs(dataset_dir)
output_path = os.path.join(dataset_dir, f"{scene_name}.pkl") pickle.dump(output, open(f"result_{scene_name}.pkl", "wb"))
pickle.dump(output, open(output_path, "wb"))
def get_checkpoint_path(self, is_last=False): def get_checkpoint_path(self, is_last=False):

View File

@ -133,12 +133,6 @@ class DataLoadUtil:
rgb_image = cv2.imread(rgb_path, cv2.IMREAD_COLOR) rgb_image = cv2.imread(rgb_path, cv2.IMREAD_COLOR)
return rgb_image return rgb_image
@staticmethod
def load_from_preprocessed_pts(path):
npy_path = os.path.join(os.path.dirname(path), "points", os.path.basename(path) + ".npy")
pts = np.load(npy_path)
return pts
@staticmethod @staticmethod
def cam_pose_transformation(cam_pose_before): def cam_pose_transformation(cam_pose_before):
offset = np.asarray([ offset = np.asarray([
@ -179,16 +173,6 @@ class DataLoadUtil:
cam_info["cam_to_world_R"] = cam_to_world_R cam_info["cam_to_world_R"] = cam_to_world_R
return cam_info return cam_info
@staticmethod
def get_real_cam_O_from_cam_L(cam_L, cam_O_to_cam_L, display_table_as_world_space_origin=True):
nO_to_display_table_pose = cam_L.cpu().numpy() @ cam_O_to_cam_L
if display_table_as_world_space_origin:
display_table_to_world = np.eye(4)
display_table_to_world[:3, 3] = DataLoadUtil.DISPLAY_TABLE_POSITION
nO_to_world_pose = np.dot(display_table_to_world, nO_to_display_table_pose)
nO_to_world_pose = DataLoadUtil.cam_pose_transformation(nO_to_world_pose)
return nO_to_world_pose
@staticmethod @staticmethod
def get_target_point_cloud(depth, cam_intrinsic, cam_extrinsic, mask, target_mask_label=(0,255,0,255)): def get_target_point_cloud(depth, cam_intrinsic, cam_extrinsic, mask, target_mask_label=(0,255,0,255)):
h, w = depth.shape h, w = depth.shape

View File

@ -10,8 +10,8 @@ class RenderUtil:
@staticmethod @staticmethod
def render_pts(cam_pose, scene_path,script_path, model_points_normals, voxel_threshold=0.005, filter_degree=75, nO_to_nL_pose=None, require_full_scene=False): def render_pts(cam_pose, scene_path,script_path, model_points_normals, voxel_threshold=0.005, filter_degree=75, nO_to_nL_pose=None, require_full_scene=False):
nO_to_world_pose = cam_pose.cpu().numpy() @ nO_to_nL_pose
nO_to_world_pose = DataLoadUtil.get_real_cam_O_from_cam_L(cam_pose, nO_to_nL_pose) nO_to_world_pose = DataLoadUtil.cam_pose_transformation(nO_to_world_pose)
with tempfile.TemporaryDirectory() as temp_dir: with tempfile.TemporaryDirectory() as temp_dir:
@ -30,10 +30,14 @@ class RenderUtil:
print(result.stderr) print(result.stderr)
return None return None
path = os.path.join(temp_dir, "tmp") path = os.path.join(temp_dir, "tmp")
# ------ Debug Start ------
import ipdb;ipdb.set_trace()
# ------ Debug End ------
point_cloud = DataLoadUtil.get_target_point_cloud_world_from_path(path, binocular=True) point_cloud = DataLoadUtil.get_target_point_cloud_world_from_path(path, binocular=True)
cam_params = DataLoadUtil.load_cam_info(path, binocular=True) cam_params = DataLoadUtil.load_cam_info(path, binocular=True)
filtered_point_cloud = ReconstructionUtil.filter_points(point_cloud, model_points_normals, cam_pose=cam_params["cam_to_world"], voxel_size=voxel_threshold, theta=filter_degree) filtered_point_cloud = ReconstructionUtil.filter_points(point_cloud, model_points_normals, cam_pose=cam_params["cam_to_world"], voxel_size=voxel_threshold, theta=filter_degree)
full_scene_point_cloud = None full_scene_point_cloud = None
if require_full_scene: if require_full_scene:
depth_L, depth_R = DataLoadUtil.load_depth(path, cam_params['near_plane'], cam_params['far_plane'], binocular=True) depth_L, depth_R = DataLoadUtil.load_depth(path, cam_params['near_plane'], cam_params['far_plane'], binocular=True)