4 Commits

Author SHA1 Message Date
1123e69bff fix nan 2024-10-31 12:02:48 +00:00
5e8684d149 debug 2024-10-31 11:13:37 +00:00
96fa40cc35 global_and_partial_global: upd 2024-10-30 15:34:15 +00:00
b82b92eebb global_and_partial_global: all 2024-10-30 11:49:45 +00:00
10 changed files with 365 additions and 545 deletions

View File

@@ -1,72 +1,76 @@
runner: runner:
general: general:
seed: 0 seed: 1
device: cuda device: cuda
cuda_visible_devices: "0,1,2,3,4,5,6,7" cuda_visible_devices: "0,1,2,3,4,5,6,7"
experiment: experiment:
name: train_ab_global_only name: w_gf_wo_lf_full
root_dir: "experiments" root_dir: "experiments"
epoch: -1 # -1 stands for last epoch epoch: 1 # -1 stands for last epoch
test: test:
dataset_list: dataset_list:
- OmniObject3d_test - OmniObject3d_train
blender_script_path: "/data/hofee/project/nbv_rec/blender/data_renderer.py" blender_script_path: "/media/hofee/data/project/python/nbv_reconstruction/blender/data_renderer.py"
output_dir: "/data/hofee/data/inference_global_full_on_testset" output_dir: "/media/hofee/data/project/python/nbv_reconstruction/nbv_reconstruction/test/inference_global_full_on_testset"
pipeline: nbv_reconstruction_pipeline pipeline: nbv_reconstruction_global_pts_pipeline
voxel_size: 0.003
dataset: dataset:
OmniObject3d_train: OmniObject3d_train:
root_dir: "/data/hofee/data/new_full_data" root_dir: "/media/hofee/repository/nbv_reconstruction_data_512"
model_dir: "/data/hofee/data/scaled_object_meshes" model_dir: "/media/hofee/data/data/scaled_object_meshes"
source: seq_reconstruction_dataset source: seq_nbv_reconstruction_dataset
split_file: "/data/hofee/data/sample.txt" split_file: "/media/hofee/data/project/python/nbv_reconstruction/nbv_reconstruction/test/test_set_list.txt"
type: test type: test
filter_degree: 75 filter_degree: 75
ratio: 1 ratio: 1
batch_size: 1 batch_size: 1
num_workers: 12 num_workers: 12
pts_num: 8192 pts_num: 4096
load_from_preprocess: True load_from_preprocess: False
OmniObject3d_test:
root_dir: "/data/hofee/data/new_full_data"
model_dir: "/data/hofee/data/scaled_object_meshes"
source: seq_reconstruction_dataset
split_file: "/data/hofee/data/new_full_data_list/OmniObject3d_test.txt"
type: test
filter_degree: 75
eval_list:
- pose_diff
- coverage_rate_increase
ratio: 0.1
batch_size: 1
num_workers: 12
pts_num: 8192
load_from_preprocess: True
pipeline: pipeline:
nbv_reconstruction_pipeline: nbv_reconstruction_local_pts_pipeline:
modules: modules:
pts_encoder: pointnet_encoder pts_encoder: pointnet_encoder
seq_encoder: transformer_seq_encoder seq_encoder: transformer_seq_encoder
pose_encoder: pose_encoder pose_encoder: pose_encoder
view_finder: gf_view_finder view_finder: gf_view_finder
eps: 1e-5 eps: 1e-5
global_scanned_feat: False
nbv_reconstruction_global_pts_pipeline:
modules:
pts_encoder: pointnet_encoder
pose_seq_encoder: transformer_pose_seq_encoder
pose_encoder: pose_encoder
view_finder: gf_view_finder
eps: 1e-5
global_scanned_feat: True global_scanned_feat: True
module: module:
pointnet_encoder: pointnet_encoder:
in_dim: 3 in_dim: 3
out_dim: 1024 out_dim: 1024
global_feat: True global_feat: True
feature_transform: False feature_transform: False
transformer_seq_encoder: transformer_seq_encoder:
embed_dim: 256 pts_embed_dim: 1024
pose_embed_dim: 256
num_heads: 4
ffn_dim: 256
num_layers: 3
output_dim: 2048
transformer_pose_seq_encoder:
pose_embed_dim: 256
num_heads: 4 num_heads: 4
ffn_dim: 256 ffn_dim: 256
num_layers: 3 num_layers: 3
@@ -82,8 +86,7 @@ module:
sample_mode: ode sample_mode: ode
sampling_steps: 500 sampling_steps: 500
sde_mode: ve sde_mode: ve
pose_encoder: pose_encoder:
pose_dim: 9 pose_dim: 9
out_dim: 256 out_dim: 256
pts_num_encoder:
out_dim: 64

View File

@@ -6,17 +6,17 @@ runner:
cuda_visible_devices: "0,1,2,3,4,5,6,7" cuda_visible_devices: "0,1,2,3,4,5,6,7"
experiment: experiment:
name: debug name: server_split_dataset
root_dir: "experiments" root_dir: "experiments"
split: # split: #
root_dir: "/data/hofee/data/packed_preprocessed_data" root_dir: "/data/hofee/data/new_full_data"
type: "unseen_instance" # "unseen_category" type: "unseen_instance" # "unseen_category"
datasets: datasets:
OmniObject3d_train: OmniObject3d_train:
path: "/data/hofee/data/OmniObject3d_train.txt" path: "/data/hofee/data/new_full_data_list/OmniObject3d_train.txt"
ratio: 0.9 ratio: 0.9
OmniObject3d_test: OmniObject3d_test:
path: "/data/hofee/data/OmniObject3d_test.txt" path: "/data/hofee/data/new_full_data_list/OmniObject3d_test.txt"
ratio: 0.1 ratio: 0.1

View File

@@ -3,17 +3,17 @@ runner:
general: general:
seed: 0 seed: 0
device: cuda device: cuda
cuda_visible_devices: "0" cuda_visible_devices: "1"
parallel: False parallel: False
experiment: experiment:
name: train_ab_global_only name: train_ab_global_and_partial_global
root_dir: "experiments" root_dir: "experiments"
use_checkpoint: True use_checkpoint: False
epoch: -1 # -1 stands for last epoch epoch: -1 # -1 stands for last epoch
max_epochs: 5000 max_epochs: 5000
save_checkpoint_interval: 1 save_checkpoint_interval: 1
test_first: True test_first: False
train: train:
optimizer: optimizer:
@@ -25,7 +25,7 @@ runner:
test: test:
frequency: 3 # test frequency frequency: 3 # test frequency
dataset_list: dataset_list:
- OmniObject3d_test #- OmniObject3d_test
- OmniObject3d_val - OmniObject3d_val
pipeline: nbv_reconstruction_pipeline pipeline: nbv_reconstruction_pipeline
@@ -97,7 +97,7 @@ module:
feature_transform: False feature_transform: False
transformer_seq_encoder: transformer_seq_encoder:
embed_dim: 256 embed_dim: 320
num_heads: 4 num_heads: 4
ffn_dim: 256 ffn_dim: 256
num_layers: 3 num_layers: 3

View File

@@ -7,6 +7,7 @@ from PytorchBoot.utils.log_util import Log
import torch import torch
import os import os
import sys import sys
import time
sys.path.append(r"/data/hofee/project/nbv_rec/nbv_reconstruction") sys.path.append(r"/data/hofee/project/nbv_rec/nbv_reconstruction")
@@ -114,8 +115,13 @@ class NBVReconstructionDataset(BaseDataset):
except Exception as e: except Exception as e:
Log.error(f"Save cache failed: {e}") Log.error(f"Save cache failed: {e}")
def voxel_downsample_with_mask(self, pts, voxel_size): def voxel_downsample_with_mapping(self, point_cloud, voxel_size=0.003):
pass voxel_indices = np.floor(point_cloud / voxel_size).astype(np.int32)
unique_voxels, inverse, counts = np.unique(voxel_indices, axis=0, return_inverse=True, return_counts=True)
idx_sort = np.argsort(inverse)
idx_unique = idx_sort[np.cumsum(counts)-counts]
downsampled_points = point_cloud[idx_unique]
return downsampled_points, inverse
def __getitem__(self, index): def __getitem__(self, index):
@@ -129,6 +135,9 @@ class NBVReconstructionDataset(BaseDataset):
scanned_coverages_rate, scanned_coverages_rate,
scanned_n_to_world_pose, scanned_n_to_world_pose,
) = ([], [], []) ) = ([], [], [])
start_time = time.time()
start_indices = [0]
total_points = 0
for view in scanned_views: for view in scanned_views:
frame_idx = view[0] frame_idx = view[0]
coverage_rate = view[1] coverage_rate = view[1]
@@ -150,8 +159,12 @@ class NBVReconstructionDataset(BaseDataset):
n_to_world_trans = n_to_world_pose[:3, 3] n_to_world_trans = n_to_world_pose[:3, 3]
n_to_world_9d = np.concatenate([n_to_world_6d, n_to_world_trans], axis=0) n_to_world_9d = np.concatenate([n_to_world_6d, n_to_world_trans], axis=0)
scanned_n_to_world_pose.append(n_to_world_9d) scanned_n_to_world_pose.append(n_to_world_9d)
total_points += len(downsampled_target_point_cloud)
start_indices.append(total_points)
end_time = time.time()
#Log.info(f"load data time: {end_time - start_time}")
nbv_idx, nbv_coverage_rate = nbv[0], nbv[1] nbv_idx, nbv_coverage_rate = nbv[0], nbv[1]
nbv_path = DataLoadUtil.get_path(self.root_dir, scene_name, nbv_idx) nbv_path = DataLoadUtil.get_path(self.root_dir, scene_name, nbv_idx)
cam_info = DataLoadUtil.load_cam_info(nbv_path) cam_info = DataLoadUtil.load_cam_info(nbv_path)
@@ -166,12 +179,25 @@ class NBVReconstructionDataset(BaseDataset):
) )
combined_scanned_views_pts = np.concatenate(scanned_views_pts, axis=0) combined_scanned_views_pts = np.concatenate(scanned_views_pts, axis=0)
voxel_downsampled_combined_scanned_pts_np = PtsUtil.voxel_downsample_point_cloud(combined_scanned_views_pts, 0.002) voxel_downsampled_combined_scanned_pts_np, inverse = self.voxel_downsample_with_mapping(combined_scanned_views_pts, 0.003)
random_downsampled_combined_scanned_pts_np = PtsUtil.random_downsample_point_cloud(voxel_downsampled_combined_scanned_pts_np, self.pts_num) random_downsampled_combined_scanned_pts_np, random_downsample_idx = PtsUtil.random_downsample_point_cloud(voxel_downsampled_combined_scanned_pts_np, self.pts_num, require_idx=True)
all_idx_unique = np.arange(len(voxel_downsampled_combined_scanned_pts_np))
all_random_downsample_idx = all_idx_unique[random_downsample_idx]
scanned_pts_mask = []
for idx, start_idx in enumerate(start_indices):
if idx == len(start_indices) - 1:
break
end_idx = start_indices[idx+1]
view_inverse = inverse[start_idx:end_idx]
view_unique_downsampled_idx = np.unique(view_inverse)
view_unique_downsampled_idx_set = set(view_unique_downsampled_idx)
mask = np.array([idx in view_unique_downsampled_idx_set for idx in all_random_downsample_idx])
scanned_pts_mask.append(mask)
data_item = { data_item = {
"scanned_pts": np.asarray(scanned_views_pts, dtype=np.float32), # Ndarray(S x Nv x 3) "scanned_pts": np.asarray(scanned_views_pts, dtype=np.float32), # Ndarray(S x Nv x 3)
"combined_scanned_pts": np.asarray(random_downsampled_combined_scanned_pts_np, dtype=np.float32), # Ndarray(N x 3) "combined_scanned_pts": np.asarray(random_downsampled_combined_scanned_pts_np, dtype=np.float32), # Ndarray(N x 3)
"scanned_pts_mask": np.asarray(scanned_pts_mask, dtype=np.bool), # Ndarray(N)
"scanned_coverage_rate": scanned_coverages_rate, # List(S): Float, range(0, 1) "scanned_coverage_rate": scanned_coverages_rate, # List(S): Float, range(0, 1)
"scanned_n_to_world_pose_9d": np.asarray(scanned_n_to_world_pose, dtype=np.float32), # Ndarray(S x 9) "scanned_n_to_world_pose_9d": np.asarray(scanned_n_to_world_pose, dtype=np.float32), # Ndarray(S x 9)
"best_coverage_rate": nbv_coverage_rate, # Float, range(0, 1) "best_coverage_rate": nbv_coverage_rate, # Float, range(0, 1)
@@ -197,7 +223,9 @@ class NBVReconstructionDataset(BaseDataset):
collate_data["scanned_n_to_world_pose_9d"] = [ collate_data["scanned_n_to_world_pose_9d"] = [
torch.tensor(item["scanned_n_to_world_pose_9d"]) for item in batch torch.tensor(item["scanned_n_to_world_pose_9d"]) for item in batch
] ]
collate_data["scanned_pts_mask"] = [
torch.tensor(item["scanned_pts_mask"]) for item in batch
]
''' ------ Fixed Length ------ ''' ''' ------ Fixed Length ------ '''
collate_data["best_to_world_pose_9d"] = torch.stack( collate_data["best_to_world_pose_9d"] = torch.stack(
@@ -206,12 +234,14 @@ class NBVReconstructionDataset(BaseDataset):
collate_data["combined_scanned_pts"] = torch.stack( collate_data["combined_scanned_pts"] = torch.stack(
[torch.tensor(item["combined_scanned_pts"]) for item in batch] [torch.tensor(item["combined_scanned_pts"]) for item in batch]
) )
for key in batch[0].keys(): for key in batch[0].keys():
if key not in [ if key not in [
"scanned_pts", "scanned_pts",
"scanned_n_to_world_pose_9d", "scanned_n_to_world_pose_9d",
"best_to_world_pose_9d", "best_to_world_pose_9d",
"combined_scanned_pts", "combined_scanned_pts",
"scanned_pts_mask",
]: ]:
collate_data[key] = [item[key] for item in batch] collate_data[key] = [item[key] for item in batch]
return collate_data return collate_data
@@ -227,9 +257,9 @@ if __name__ == "__main__":
torch.manual_seed(seed) torch.manual_seed(seed)
np.random.seed(seed) np.random.seed(seed)
config = { config = {
"root_dir": "/data/hofee/data/packed_preprocessed_data", "root_dir": "/data/hofee/nbv_rec_part2_preprocessed",
"source": "nbv_reconstruction_dataset", "source": "nbv_reconstruction_dataset",
"split_file": "/data/hofee/data/OmniObject3d_train.txt", "split_file": "/data/hofee/data/sample.txt",
"load_from_preprocess": True, "load_from_preprocess": True,
"ratio": 0.5, "ratio": 0.5,
"batch_size": 2, "batch_size": 2,

View File

@@ -1,154 +0,0 @@
import numpy as np
from PytorchBoot.dataset import BaseDataset
import PytorchBoot.namespace as namespace
import PytorchBoot.stereotype as stereotype
from PytorchBoot.utils.log_util import Log
import torch
import os
import sys
sys.path.append(r"/home/data/hofee/project/nbv_rec/nbv_reconstruction")
from utils.data_load import DataLoadUtil
from utils.pose import PoseUtil
from utils.pts import PtsUtil
@stereotype.dataset("old_seq_nbv_reconstruction_dataset")
class SeqNBVReconstructionDataset(BaseDataset):
def __init__(self, config):
super(SeqNBVReconstructionDataset, self).__init__(config)
self.type = config["type"]
if self.type != namespace.Mode.TEST:
Log.error("Dataset <seq_nbv_reconstruction_dataset> Only support test mode", terminate=True)
self.config = config
self.root_dir = config["root_dir"]
self.split_file_path = config["split_file"]
self.scene_name_list = self.load_scene_name_list()
self.datalist = self.get_datalist()
self.pts_num = config["pts_num"]
self.model_dir = config["model_dir"]
self.filter_degree = config["filter_degree"]
self.load_from_preprocess = config.get("load_from_preprocess", False)
def load_scene_name_list(self):
scene_name_list = []
with open(self.split_file_path, "r") as f:
for line in f:
scene_name = line.strip()
scene_name_list.append(scene_name)
return scene_name_list
def get_datalist(self):
datalist = []
for scene_name in self.scene_name_list:
seq_num = DataLoadUtil.get_label_num(self.root_dir, scene_name)
scene_max_coverage_rate = 0
scene_max_cr_idx = 0
for seq_idx in range(seq_num):
label_path = DataLoadUtil.get_label_path(self.root_dir, scene_name, seq_idx)
label_data = DataLoadUtil.load_label(label_path)
max_coverage_rate = label_data["max_coverage_rate"]
if max_coverage_rate > scene_max_coverage_rate:
scene_max_coverage_rate = max_coverage_rate
scene_max_cr_idx = seq_idx
label_path = DataLoadUtil.get_label_path(self.root_dir, scene_name, scene_max_cr_idx)
label_data = DataLoadUtil.load_label(label_path)
first_frame = label_data["best_sequence"][0]
best_seq_len = len(label_data["best_sequence"])
datalist.append({
"scene_name": scene_name,
"first_frame": first_frame,
"max_coverage_rate": scene_max_coverage_rate,
"best_seq_len": best_seq_len,
"label_idx": scene_max_cr_idx,
})
return datalist
def __getitem__(self, index):
data_item_info = self.datalist[index]
first_frame_idx = data_item_info["first_frame"][0]
first_frame_coverage = data_item_info["first_frame"][1]
max_coverage_rate = data_item_info["max_coverage_rate"]
scene_name = data_item_info["scene_name"]
first_cam_info = DataLoadUtil.load_cam_info(DataLoadUtil.get_path(self.root_dir, scene_name, first_frame_idx), binocular=True)
first_view_path = DataLoadUtil.get_path(self.root_dir, scene_name, first_frame_idx)
first_left_cam_pose = first_cam_info["cam_to_world"]
first_center_cam_pose = first_cam_info["cam_to_world_O"]
first_target_point_cloud = DataLoadUtil.load_from_preprocessed_pts(first_view_path)
first_pts_num = first_target_point_cloud.shape[0]
first_downsampled_target_point_cloud = PtsUtil.random_downsample_point_cloud(first_target_point_cloud, self.pts_num)
first_to_world_rot_6d = PoseUtil.matrix_to_rotation_6d_numpy(np.asarray(first_left_cam_pose[:3,:3]))
first_to_world_trans = first_left_cam_pose[:3,3]
first_to_world_9d = np.concatenate([first_to_world_rot_6d, first_to_world_trans], axis=0)
diag = DataLoadUtil.get_bbox_diag(self.model_dir, scene_name)
voxel_threshold = diag*0.02
first_O_to_first_L_pose = np.dot(np.linalg.inv(first_left_cam_pose), first_center_cam_pose)
scene_path = os.path.join(self.root_dir, scene_name)
model_points_normals = DataLoadUtil.load_points_normals(self.root_dir, scene_name)
data_item = {
"first_pts_num": np.asarray(
first_pts_num, dtype=np.int32
),
"first_pts": np.asarray([first_downsampled_target_point_cloud],dtype=np.float32),
"combined_scanned_pts": np.asarray(first_downsampled_target_point_cloud,dtype=np.float32),
"first_to_world_9d": np.asarray([first_to_world_9d],dtype=np.float32),
"scene_name": scene_name,
"max_coverage_rate": max_coverage_rate,
"voxel_threshold": voxel_threshold,
"filter_degree": self.filter_degree,
"O_to_L_pose": first_O_to_first_L_pose,
"first_frame_coverage": first_frame_coverage,
"scene_path": scene_path,
"model_points_normals": model_points_normals,
"best_seq_len": data_item_info["best_seq_len"],
"first_frame_id": first_frame_idx,
}
return data_item
def __len__(self):
return len(self.datalist)
def get_collate_fn(self):
def collate_fn(batch):
collate_data = {}
collate_data["first_pts"] = [torch.tensor(item['first_pts']) for item in batch]
collate_data["first_to_world_9d"] = [torch.tensor(item['first_to_world_9d']) for item in batch]
collate_data["combined_scanned_pts"] = torch.stack([torch.tensor(item['combined_scanned_pts']) for item in batch])
for key in batch[0].keys():
if key not in ["first_pts", "first_to_world_9d", "combined_scanned_pts"]:
collate_data[key] = [item[key] for item in batch]
return collate_data
return collate_fn
# -------------- Debug ---------------- #
if __name__ == "__main__":
import torch
seed = 0
torch.manual_seed(seed)
np.random.seed(seed)
config = {
"root_dir": "/home/data/hofee/project/nbv_rec/data/nbv_rec_data_512_preproc_npy",
"split_file": "/home/data/hofee/project/nbv_rec/data/OmniObject3d_train.txt",
"model_dir": "/home/data/hofee/project/nbv_rec/data/scaled_object_meshes",
"ratio": 0.005,
"batch_size": 2,
"filter_degree": 75,
"num_workers": 0,
"pts_num": 32684,
"type": namespace.Mode.TEST,
"load_from_preprocess": True
}
ds = SeqNBVReconstructionDataset(config)
print(len(ds))
#ds.__getitem__(10)
dl = ds.get_loader(shuffle=True)
for idx, data in enumerate(dl):
data = ds.process_batch(data, "cuda:0")
print(data)
# ------ Debug Start ------
import ipdb;ipdb.set_trace()
# ------ Debug End ------+

View File

@@ -89,25 +89,49 @@ class NBVReconstructionPipeline(nn.Module):
"scanned_n_to_world_pose_9d" "scanned_n_to_world_pose_9d"
] # List(B): Tensor(S x 9) ] # List(B): Tensor(S x 9)
scanned_pts_mask_batch = data["scanned_pts_mask"] # List(B): Tensor(N)
device = next(self.parameters()).device device = next(self.parameters()).device
embedding_list_batch = [] embedding_list_batch = []
combined_scanned_pts_batch = data["combined_scanned_pts"] # Tensor(B x N x 3) combined_scanned_pts_batch = data["combined_scanned_pts"] # Tensor(B x N x 3)
global_scanned_feat = self.pts_encoder.encode_points( global_scanned_feat, per_point_feat_batch = self.pts_encoder.encode_points(
combined_scanned_pts_batch, require_per_point_feat=False combined_scanned_pts_batch, require_per_point_feat=True
) # global_scanned_feat: Tensor(B x Dg) ) # global_scanned_feat: Tensor(B x Dg)
batch_size = len(scanned_n_to_world_pose_9d_batch)
for i in range(batch_size):
seq_len = len(scanned_n_to_world_pose_9d_batch[i])
scanned_n_to_world_pose_9d = scanned_n_to_world_pose_9d_batch[i].to(device) # Tensor(S x 9)
scanned_pts_mask = scanned_pts_mask_batch[i] # Tensor(S x N)
per_point_feat = per_point_feat_batch[i] # Tensor(N x Dp)
partial_point_feat_seq = []
for j in range(seq_len):
partial_per_point_feat = per_point_feat[scanned_pts_mask[j]]
if partial_per_point_feat.shape[0] == 0:
partial_point_feat = torch.zeros(per_point_feat.shape[1], device=device)
else:
partial_point_feat = torch.mean(partial_per_point_feat, dim=0) # Tensor(Dp)
partial_point_feat_seq.append(partial_point_feat)
partial_point_feat_seq = torch.stack(partial_point_feat_seq, dim=0) # Tensor(S x Dp)
for scanned_n_to_world_pose_9d in scanned_n_to_world_pose_9d_batch:
scanned_n_to_world_pose_9d = scanned_n_to_world_pose_9d.to(device) # Tensor(S x 9)
pose_feat_seq = self.pose_encoder.encode_pose(scanned_n_to_world_pose_9d) # Tensor(S x Dp) pose_feat_seq = self.pose_encoder.encode_pose(scanned_n_to_world_pose_9d) # Tensor(S x Dp)
seq_embedding = pose_feat_seq
seq_embedding = torch.cat([partial_point_feat_seq, pose_feat_seq], dim=-1)
embedding_list_batch.append(seq_embedding) # List(B): Tensor(S x (Dp)) embedding_list_batch.append(seq_embedding) # List(B): Tensor(S x (Dp))
seq_feat = self.seq_encoder.encode_sequence(embedding_list_batch) # Tensor(B x Ds) seq_feat = self.seq_encoder.encode_sequence(embedding_list_batch) # Tensor(B x Ds)
main_feat = torch.cat([seq_feat, global_scanned_feat], dim=-1) # Tensor(B x (Ds+Dg)) main_feat = torch.cat([seq_feat, global_scanned_feat], dim=-1) # Tensor(B x (Ds+Dg))
if torch.isnan(main_feat).any(): if torch.isnan(main_feat).any():
for i in range(len(main_feat)):
if torch.isnan(main_feat[i]).any():
scanned_pts_mask = scanned_pts_mask_batch[i]
Log.info(f"scanned_pts_mask shape: {scanned_pts_mask.shape}")
Log.info(f"scanned_pts_mask sum: {scanned_pts_mask.sum()}")
import ipdb
ipdb.set_trace()
Log.error("nan in main_feat", True) Log.error("nan in main_feat", True)
return main_feat return main_feat

View File

@@ -2,45 +2,34 @@ import numpy as np
from PytorchBoot.dataset import BaseDataset from PytorchBoot.dataset import BaseDataset
import PytorchBoot.namespace as namespace import PytorchBoot.namespace as namespace
import PytorchBoot.stereotype as stereotype import PytorchBoot.stereotype as stereotype
from PytorchBoot.config import ConfigManager
from PytorchBoot.utils.log_util import Log from PytorchBoot.utils.log_util import Log
import torch import torch
import os import os
import sys import sys
sys.path.append(r"/home/data/hofee/project/nbv_rec/nbv_reconstruction")
sys.path.append(r"/data/hofee/project/nbv_rec/nbv_reconstruction")
from utils.data_load import DataLoadUtil from utils.data_load import DataLoadUtil
from utils.pose import PoseUtil from utils.pose import PoseUtil
from utils.pts import PtsUtil from utils.pts import PtsUtil
@stereotype.dataset("seq_nbv_reconstruction_dataset")
@stereotype.dataset("seq_reconstruction_dataset") class SeqNBVReconstructionDataset(BaseDataset):
class SeqReconstructionDataset(BaseDataset):
def __init__(self, config): def __init__(self, config):
super(SeqReconstructionDataset, self).__init__(config) super(SeqNBVReconstructionDataset, self).__init__(config)
self.type = config["type"]
if self.type != namespace.Mode.TEST:
Log.error("Dataset <seq_nbv_reconstruction_dataset> Only support test mode", terminate=True)
self.config = config self.config = config
self.root_dir = config["root_dir"] self.root_dir = config["root_dir"]
self.split_file_path = config["split_file"] self.split_file_path = config["split_file"]
self.scene_name_list = self.load_scene_name_list() self.scene_name_list = self.load_scene_name_list()
self.datalist = self.get_datalist() self.datalist = self.get_datalist()
self.pts_num = config["pts_num"] self.pts_num = config["pts_num"]
self.type = config["type"]
self.cache = config.get("cache") self.model_dir = config["model_dir"]
self.filter_degree = config["filter_degree"]
self.load_from_preprocess = config.get("load_from_preprocess", False) self.load_from_preprocess = config.get("load_from_preprocess", False)
if self.type == namespace.Mode.TEST:
#self.model_dir = config["model_dir"]
self.filter_degree = config["filter_degree"]
if self.type == namespace.Mode.TRAIN:
scale_ratio = 1
self.datalist = self.datalist*scale_ratio
if self.cache:
expr_root = ConfigManager.get("runner", "experiment", "root_dir")
expr_name = ConfigManager.get("runner", "experiment", "name")
self.cache_dir = os.path.join(expr_root, expr_name, "cache")
# self.preprocess_cache()
def load_scene_name_list(self): def load_scene_name_list(self):
scene_name_list = [] scene_name_list = []
@@ -55,143 +44,111 @@ class SeqReconstructionDataset(BaseDataset):
for scene_name in self.scene_name_list: for scene_name in self.scene_name_list:
seq_num = DataLoadUtil.get_label_num(self.root_dir, scene_name) seq_num = DataLoadUtil.get_label_num(self.root_dir, scene_name)
scene_max_coverage_rate = 0 scene_max_coverage_rate = 0
max_coverage_rate_list = []
scene_max_cr_idx = 0 scene_max_cr_idx = 0
for seq_idx in range(seq_num): for seq_idx in range(seq_num):
label_path = DataLoadUtil.get_label_path( label_path = DataLoadUtil.get_label_path(self.root_dir, scene_name, seq_idx)
self.root_dir, scene_name, seq_idx
)
label_data = DataLoadUtil.load_label(label_path) label_data = DataLoadUtil.load_label(label_path)
max_coverage_rate = label_data["max_coverage_rate"] max_coverage_rate = label_data["max_coverage_rate"]
if max_coverage_rate > scene_max_coverage_rate: if max_coverage_rate > scene_max_coverage_rate:
scene_max_coverage_rate = max_coverage_rate scene_max_coverage_rate = max_coverage_rate
scene_max_cr_idx = seq_idx scene_max_cr_idx = seq_idx
max_coverage_rate_list.append(max_coverage_rate)
best_label_path = DataLoadUtil.get_label_path(self.root_dir, scene_name, scene_max_cr_idx) label_path = DataLoadUtil.get_label_path(self.root_dir, scene_name, scene_max_cr_idx)
best_label_data = DataLoadUtil.load_label(best_label_path) label_data = DataLoadUtil.load_label(label_path)
first_frame = best_label_data["best_sequence"][0] first_frame = label_data["best_sequence"][0]
best_seq_len = len(best_label_data["best_sequence"]) best_seq_len = len(label_data["best_sequence"])
datalist.append({ datalist.append({
"scene_name": scene_name, "scene_name": scene_name,
"first_frame": first_frame, "first_frame": first_frame,
"best_seq_len": best_seq_len,
"max_coverage_rate": scene_max_coverage_rate, "max_coverage_rate": scene_max_coverage_rate,
"best_seq_len": best_seq_len,
"label_idx": scene_max_cr_idx, "label_idx": scene_max_cr_idx,
}) })
return datalist return datalist
def preprocess_cache(self):
Log.info("preprocessing cache...")
for item_idx in range(len(self.datalist)):
self.__getitem__(item_idx)
Log.success("finish preprocessing cache.")
def load_from_cache(self, scene_name, curr_frame_idx):
cache_name = f"{scene_name}_{curr_frame_idx}.txt"
cache_path = os.path.join(self.cache_dir, cache_name)
if os.path.exists(cache_path):
data = np.loadtxt(cache_path)
return data
else:
return None
def save_to_cache(self, scene_name, curr_frame_idx, data):
cache_name = f"{scene_name}_{curr_frame_idx}.txt"
cache_path = os.path.join(self.cache_dir, cache_name)
try:
np.savetxt(cache_path, data)
except Exception as e:
Log.error(f"Save cache failed: {e}")
def seq_combined_pts(self, scene, frame_idx_list):
all_combined_pts = []
for i in frame_idx_list:
path = DataLoadUtil.get_path(self.root_dir, scene, i)
pts = DataLoadUtil.load_from_preprocessed_pts(path,"npy")
if pts.shape[0] == 0:
continue
all_combined_pts.append(pts)
all_combined_pts = np.vstack(all_combined_pts)
downsampled_all_pts = PtsUtil.voxel_downsample_point_cloud(all_combined_pts, 0.003)
return downsampled_all_pts
def __getitem__(self, index): def __getitem__(self, index):
data_item_info = self.datalist[index] data_item_info = self.datalist[index]
first_frame_idx = data_item_info["first_frame"][0]
first_frame_coverage = data_item_info["first_frame"][1]
max_coverage_rate = data_item_info["max_coverage_rate"] max_coverage_rate = data_item_info["max_coverage_rate"]
best_seq_len = data_item_info["best_seq_len"]
scene_name = data_item_info["scene_name"] scene_name = data_item_info["scene_name"]
( first_cam_info = DataLoadUtil.load_cam_info(DataLoadUtil.get_path(self.root_dir, scene_name, first_frame_idx), binocular=True)
scanned_views_pts, first_view_path = DataLoadUtil.get_path(self.root_dir, scene_name, first_frame_idx)
scanned_coverages_rate, first_left_cam_pose = first_cam_info["cam_to_world"]
scanned_n_to_world_pose, first_center_cam_pose = first_cam_info["cam_to_world_O"]
) = ([], [], []) first_target_point_cloud = DataLoadUtil.load_from_preprocessed_pts(first_view_path)
view = data_item_info["first_frame"] first_pts_num = first_target_point_cloud.shape[0]
frame_idx = view[0] first_downsampled_target_point_cloud = PtsUtil.random_downsample_point_cloud(first_target_point_cloud, self.pts_num)
coverage_rate = view[1] first_to_world_rot_6d = PoseUtil.matrix_to_rotation_6d_numpy(np.asarray(first_left_cam_pose[:3,:3]))
view_path = DataLoadUtil.get_path(self.root_dir, scene_name, frame_idx) first_to_world_trans = first_left_cam_pose[:3,3]
cam_info = DataLoadUtil.load_cam_info(view_path, binocular=True) first_to_world_9d = np.concatenate([first_to_world_rot_6d, first_to_world_trans], axis=0)
diag = DataLoadUtil.get_bbox_diag(self.model_dir, scene_name)
n_to_world_pose = cam_info["cam_to_world"] voxel_threshold = diag*0.02
target_point_cloud = (
DataLoadUtil.load_from_preprocessed_pts(view_path)
)
downsampled_target_point_cloud = PtsUtil.random_downsample_point_cloud(
target_point_cloud, self.pts_num
)
scanned_views_pts.append(downsampled_target_point_cloud)
scanned_coverages_rate.append(coverage_rate)
n_to_world_6d = PoseUtil.matrix_to_rotation_6d_numpy(
np.asarray(n_to_world_pose[:3, :3])
)
first_left_cam_pose = cam_info["cam_to_world"]
first_center_cam_pose = cam_info["cam_to_world_O"]
first_O_to_first_L_pose = np.dot(np.linalg.inv(first_left_cam_pose), first_center_cam_pose) first_O_to_first_L_pose = np.dot(np.linalg.inv(first_left_cam_pose), first_center_cam_pose)
n_to_world_trans = n_to_world_pose[:3, 3] scene_path = os.path.join(self.root_dir, scene_name)
n_to_world_9d = np.concatenate([n_to_world_6d, n_to_world_trans], axis=0) model_points_normals = DataLoadUtil.load_points_normals(self.root_dir, scene_name)
scanned_n_to_world_pose.append(n_to_world_9d)
frame_list = []
for i in range(DataLoadUtil.get_scene_seq_length(self.root_dir, scene_name)):
frame_list.append(i)
gt_pts = self.seq_combined_pts(scene_name, frame_list)
data_item = { data_item = {
"first_scanned_pts": np.asarray(scanned_views_pts, dtype=np.float32), # Ndarray(S x Nv x 3) "first_pts_num": np.asarray(
"first_scanned_coverage_rate": scanned_coverages_rate, # List(S): Float, range(0, 1) first_pts_num, dtype=np.int32
"first_scanned_n_to_world_pose_9d": np.asarray(scanned_n_to_world_pose, dtype=np.float32), # Ndarray(S x 9) ),
"seq_max_coverage_rate": max_coverage_rate, # Float, range(0, 1) "first_pts": np.asarray([first_downsampled_target_point_cloud],dtype=np.float32),
"best_seq_len": best_seq_len, # Int "combined_scanned_pts": np.asarray(first_downsampled_target_point_cloud,dtype=np.float32),
"scene_name": scene_name, # String "first_to_world_9d": np.asarray([first_to_world_9d],dtype=np.float32),
"gt_pts": gt_pts, # Ndarray(N x 3) "scene_name": scene_name,
"scene_path": os.path.join(self.root_dir, scene_name), # String "max_coverage_rate": max_coverage_rate,
"voxel_threshold": voxel_threshold,
"filter_degree": self.filter_degree,
"O_to_L_pose": first_O_to_first_L_pose, "O_to_L_pose": first_O_to_first_L_pose,
"first_frame_coverage": first_frame_coverage,
"scene_path": scene_path,
"model_points_normals": model_points_normals,
"best_seq_len": data_item_info["best_seq_len"],
"first_frame_id": first_frame_idx,
} }
return data_item return data_item
def __len__(self): def __len__(self):
return len(self.datalist) return len(self.datalist)
def get_collate_fn(self):
def collate_fn(batch):
collate_data = {}
collate_data["first_pts"] = [torch.tensor(item['first_pts']) for item in batch]
collate_data["first_to_world_9d"] = [torch.tensor(item['first_to_world_9d']) for item in batch]
collate_data["combined_scanned_pts"] = torch.stack([torch.tensor(item['combined_scanned_pts']) for item in batch])
for key in batch[0].keys():
if key not in ["first_pts", "first_to_world_9d", "combined_scanned_pts"]:
collate_data[key] = [item[key] for item in batch]
return collate_data
return collate_fn
# -------------- Debug ---------------- # # -------------- Debug ---------------- #
if __name__ == "__main__": if __name__ == "__main__":
import torch import torch
seed = 0 seed = 0
torch.manual_seed(seed) torch.manual_seed(seed)
np.random.seed(seed) np.random.seed(seed)
config = { config = {
"root_dir": "/data/hofee/data/new_full_data", "root_dir": "/home/data/hofee/project/nbv_rec/data/nbv_rec_data_512_preproc_npy",
"source": "seq_reconstruction_dataset", "split_file": "/home/data/hofee/project/nbv_rec/data/OmniObject3d_train.txt",
"split_file": "/data/hofee/data/sample.txt", "model_dir": "/home/data/hofee/project/nbv_rec/data/scaled_object_meshes",
"load_from_preprocess": True, "ratio": 0.005,
"ratio": 0.5,
"batch_size": 2, "batch_size": 2,
"filter_degree": 75, "filter_degree": 75,
"num_workers": 0, "num_workers": 0,
"pts_num": 4096, "pts_num": 32684,
"type": namespace.Mode.TRAIN, "type": namespace.Mode.TEST,
"load_from_preprocess": True
} }
ds = SeqReconstructionDataset(config) ds = SeqNBVReconstructionDataset(config)
print(len(ds)) print(len(ds))
print(ds.__getitem__(10)) #ds.__getitem__(10)
dl = ds.get_loader(shuffle=True)
for idx, data in enumerate(dl):
data = ds.process_batch(data, "cuda:0")
print(data)
# ------ Debug Start ------
import ipdb;ipdb.set_trace()
# ------ Debug End ------+

View File

@@ -27,7 +27,6 @@ class Inferencer(Runner):
self.script_path = ConfigManager.get(namespace.Stereotype.RUNNER, "blender_script_path") self.script_path = ConfigManager.get(namespace.Stereotype.RUNNER, "blender_script_path")
self.output_dir = ConfigManager.get(namespace.Stereotype.RUNNER, "output_dir") self.output_dir = ConfigManager.get(namespace.Stereotype.RUNNER, "output_dir")
self.voxel_size = ConfigManager.get(namespace.Stereotype.RUNNER, "voxel_size")
''' Pipeline ''' ''' Pipeline '''
self.pipeline_name = self.config[namespace.Stereotype.PIPELINE] self.pipeline_name = self.config[namespace.Stereotype.PIPELINE]
self.pipeline:torch.nn.Module = ComponentFactory.create(namespace.Stereotype.PIPELINE, self.pipeline_name) self.pipeline:torch.nn.Module = ComponentFactory.create(namespace.Stereotype.PIPELINE, self.pipeline_name)
@@ -66,48 +65,56 @@ class Inferencer(Runner):
for dataset_idx, test_set in enumerate(self.test_set_list): for dataset_idx, test_set in enumerate(self.test_set_list):
status_manager.set_progress("inference", "inferencer", f"dataset", dataset_idx, len(self.test_set_list)) status_manager.set_progress("inference", "inferencer", f"dataset", dataset_idx, len(self.test_set_list))
test_set_name = test_set.get_name() test_set_name = test_set.get_name()
test_loader = test_set.get_loader()
total=int(len(test_set)) if test_loader.batch_size > 1:
for i in range(total): Log.error("Batch size should be 1 for inference, found {} in {}".format(test_loader.batch_size, test_set_name), terminate=True)
data = test_set.__getitem__(i)
total=int(len(test_loader))
loop = tqdm(enumerate(test_loader), total=total)
for i, data in loop:
status_manager.set_progress("inference", "inferencer", f"Batch[{test_set_name}]", i+1, total) status_manager.set_progress("inference", "inferencer", f"Batch[{test_set_name}]", i+1, total)
test_set.process_batch(data, self.device)
output = self.predict_sequence(data) output = self.predict_sequence(data)
self.save_inference_result(test_set_name, data["scene_name"], output) self.save_inference_result(test_set_name, data["scene_name"][0], output)
status_manager.set_progress("inference", "inferencer", f"dataset", len(self.test_set_list), len(self.test_set_list)) status_manager.set_progress("inference", "inferencer", f"dataset", len(self.test_set_list), len(self.test_set_list))
def predict_sequence(self, data, cr_increase_threshold=0, max_iter=50, max_retry=5): def predict_sequence(self, data, cr_increase_threshold=0, max_iter=50, max_retry=5):
scene_name = data["scene_name"] scene_name = data["scene_name"][0]
Log.info(f"Processing scene: {scene_name}") Log.info(f"Processing scene: {scene_name}")
status_manager.set_status("inference", "inferencer", "scene", scene_name) status_manager.set_status("inference", "inferencer", "scene", scene_name)
''' data for rendering ''' ''' data for rendering '''
scene_path = data["scene_path"] scene_path = data["scene_path"][0]
O_to_L_pose = data["O_to_L_pose"] O_to_L_pose = data["O_to_L_pose"][0]
voxel_threshold = self.voxel_size voxel_threshold = data["voxel_threshold"][0]
filter_degree = 75 filter_degree = data["filter_degree"][0]
down_sampled_model_pts = data["gt_pts"] model_points_normals = data["model_points_normals"][0]
model_pts = model_points_normals[:,:3]
first_frame_to_world_9d = data["first_scanned_n_to_world_pose_9d"][0] down_sampled_model_pts = PtsUtil.voxel_downsample_point_cloud(model_pts, voxel_threshold)
first_frame_to_world = np.eye(4) first_frame_to_world_9d = data["first_to_world_9d"][0]
first_frame_to_world[:3,:3] = PoseUtil.rotation_6d_to_matrix_numpy(first_frame_to_world_9d[:6]) first_frame_to_world = torch.eye(4, device=first_frame_to_world_9d.device)
first_frame_to_world[:3,3] = first_frame_to_world_9d[6:] first_frame_to_world[:3,:3] = PoseUtil.rotation_6d_to_matrix_tensor_batch(first_frame_to_world_9d[:,:6])[0]
first_frame_to_world[:3,3] = first_frame_to_world_9d[0,6:]
first_frame_to_world = first_frame_to_world.to(self.device)
''' data for inference ''' ''' data for inference '''
input_data = {} input_data = {}
input_data["combined_scanned_pts"] = torch.tensor(data["first_scanned_pts"][0], dtype=torch.float32).to(self.device).unsqueeze(0) input_data["scanned_pts"] = [data["first_pts"][0].to(self.device)]
input_data["scanned_n_to_world_pose_9d"] = [torch.tensor(data["first_scanned_n_to_world_pose_9d"], dtype=torch.float32).to(self.device)] input_data["scanned_n_to_world_pose_9d"] = [data["first_to_world_9d"][0].to(self.device)]
input_data["mode"] = namespace.Mode.TEST input_data["mode"] = namespace.Mode.TEST
input_pts_N = input_data["combined_scanned_pts"].shape[1] input_data["combined_scanned_pts"] = data["combined_scanned_pts"]
first_frame_target_pts, first_frame_target_normals = RenderUtil.render_pts(first_frame_to_world, scene_path, self.script_path, voxel_threshold=voxel_threshold, filter_degree=filter_degree, nO_to_nL_pose=O_to_L_pose) input_pts_N = input_data["scanned_pts"][0].shape[1]
first_frame_target_pts, _ = RenderUtil.render_pts(first_frame_to_world, scene_path, self.script_path, model_points_normals, voxel_threshold=voxel_threshold, filter_degree=filter_degree, nO_to_nL_pose=O_to_L_pose)
scanned_view_pts = [first_frame_target_pts] scanned_view_pts = [first_frame_target_pts]
last_pred_cr, added_pts_num = self.compute_coverage_rate(scanned_view_pts, None, down_sampled_model_pts, threshold=voxel_threshold) last_pred_cr = self.compute_coverage_rate(scanned_view_pts, None, down_sampled_model_pts, threshold=voxel_threshold)
retry_duplication_pose = [] retry_duplication_pose = []
retry_no_pts_pose = [] retry_no_pts_pose = []
retry = 0 retry = 0
pred_cr_seq = [last_pred_cr] pred_cr_seq = [last_pred_cr]
success = 0
while len(pred_cr_seq) < max_iter and retry < max_retry: while len(pred_cr_seq) < max_iter and retry < max_retry:
output = self.pipeline(input_data) output = self.pipeline(input_data)
@@ -118,7 +125,7 @@ class Inferencer(Runner):
pred_pose[:3,3] = pred_pose_9d[0,6:] pred_pose[:3,3] = pred_pose_9d[0,6:]
try: try:
new_target_pts, new_target_normals = RenderUtil.render_pts(pred_pose, scene_path, self.script_path, voxel_threshold=voxel_threshold, filter_degree=filter_degree, nO_to_nL_pose=O_to_L_pose) new_target_pts_world, new_pts_world = RenderUtil.render_pts(pred_pose, scene_path, self.script_path, model_points_normals, voxel_threshold=voxel_threshold, filter_degree=filter_degree, nO_to_nL_pose=O_to_L_pose, require_full_scene=True)
except Exception as e: except Exception as e:
Log.warning(f"Error in scene {scene_path}, {e}") Log.warning(f"Error in scene {scene_path}, {e}")
print("current pose: ", pred_pose) print("current pose: ", pred_pose)
@@ -127,20 +134,12 @@ class Inferencer(Runner):
retry += 1 retry += 1
continue continue
if new_target_pts.shape[0] == 0:
print("no pts in new target")
retry_no_pts_pose.append(pred_pose.cpu().numpy().tolist())
retry += 1
continue
pred_cr = self.compute_coverage_rate(scanned_view_pts, new_target_pts_world, down_sampled_model_pts, threshold=voxel_threshold)
pred_cr, new_added_pts_num = self.compute_coverage_rate(scanned_view_pts, new_target_pts, down_sampled_model_pts, threshold=voxel_threshold) print(pred_cr, last_pred_cr, " max: ", data["max_coverage_rate"])
print(pred_cr, last_pred_cr, " max: ", data["seq_max_coverage_rate"]) if pred_cr >= data["max_coverage_rate"]:
if pred_cr >= data["seq_max_coverage_rate"] - 1e-3: print("max coverage rate reached!")
print("max coverage rate reached!: ", pred_cr)
success += 1
elif new_added_pts_num < 10:
print("min added pts num reached!: ", new_added_pts_num)
if pred_cr <= last_pred_cr + cr_increase_threshold: if pred_cr <= last_pred_cr + cr_increase_threshold:
retry += 1 retry += 1
retry_duplication_pose.append(pred_pose.cpu().numpy().tolist()) retry_duplication_pose.append(pred_pose.cpu().numpy().tolist())
@@ -148,40 +147,46 @@ class Inferencer(Runner):
retry = 0 retry = 0
pred_cr_seq.append(pred_cr) pred_cr_seq.append(pred_cr)
scanned_view_pts.append(new_target_pts) scanned_view_pts.append(new_target_pts_world)
down_sampled_new_pts_world = PtsUtil.random_downsample_point_cloud(new_target_pts, input_pts_N) down_sampled_new_pts_world = PtsUtil.random_downsample_point_cloud(new_pts_world, input_pts_N)
new_pts_world_aug = np.hstack([down_sampled_new_pts_world, np.ones((down_sampled_new_pts_world.shape[0], 1))])
new_pts = np.dot(np.linalg.inv(first_frame_to_world.cpu()), new_pts_world_aug.T).T[:,:3]
new_pts = down_sampled_new_pts_world new_pts_tensor = torch.tensor(new_pts, dtype=torch.float32).unsqueeze(0).to(self.device)
input_data["scanned_pts"] = [torch.cat([input_data["scanned_pts"][0] , new_pts_tensor], dim=0)]
input_data["scanned_n_to_world_pose_9d"] = [torch.cat([input_data["scanned_n_to_world_pose_9d"][0], pred_pose_9d], dim=0)] input_data["scanned_n_to_world_pose_9d"] = [torch.cat([input_data["scanned_n_to_world_pose_9d"][0], pred_pose_9d], dim=0)]
combined_scanned_views_pts = np.concatenate(input_data["scanned_pts"][0].tolist(), axis=0)
combined_scanned_pts = np.concatenate([input_data["combined_scanned_pts"][0].cpu().numpy(), new_pts], axis=0) voxel_downsampled_combined_scanned_pts_np = PtsUtil.voxel_downsample_point_cloud(combined_scanned_views_pts, 0.002)
voxel_downsampled_combined_scanned_pts_np = PtsUtil.voxel_downsample_point_cloud(combined_scanned_pts, 0.002)
random_downsampled_combined_scanned_pts_np = PtsUtil.random_downsample_point_cloud(voxel_downsampled_combined_scanned_pts_np, input_pts_N) random_downsampled_combined_scanned_pts_np = PtsUtil.random_downsample_point_cloud(voxel_downsampled_combined_scanned_pts_np, input_pts_N)
input_data["combined_scanned_pts"] = torch.tensor(random_downsampled_combined_scanned_pts_np, dtype=torch.float32).unsqueeze(0).to(self.device) input_data["combined_scanned_pts"] = torch.tensor(random_downsampled_combined_scanned_pts_np, dtype=torch.float32).unsqueeze(0).to(self.device)
if success > 3:
break
last_pred_cr = pred_cr last_pred_cr = pred_cr
input_data["scanned_pts"] = input_data["scanned_pts"][0].cpu().numpy().tolist()
input_data["scanned_n_to_world_pose_9d"] = input_data["scanned_n_to_world_pose_9d"][0].cpu().numpy().tolist() input_data["scanned_n_to_world_pose_9d"] = input_data["scanned_n_to_world_pose_9d"][0].cpu().numpy().tolist()
result = { result = {
"pred_pose_9d_seq": input_data["scanned_n_to_world_pose_9d"], "pred_pose_9d_seq": input_data["scanned_n_to_world_pose_9d"],
"combined_scanned_pts": input_data["combined_scanned_pts"], "pts_seq": input_data["scanned_pts"],
"target_pts_seq": scanned_view_pts, "target_pts_seq": scanned_view_pts,
"coverage_rate_seq": pred_cr_seq, "coverage_rate_seq": pred_cr_seq,
"max_coverage_rate": data["seq_max_coverage_rate"], "max_coverage_rate": data["max_coverage_rate"][0],
"pred_max_coverage_rate": max(pred_cr_seq), "pred_max_coverage_rate": max(pred_cr_seq),
"scene_name": scene_name, "scene_name": scene_name,
"retry_no_pts_pose": retry_no_pts_pose, "retry_no_pts_pose": retry_no_pts_pose,
"retry_duplication_pose": retry_duplication_pose, "retry_duplication_pose": retry_duplication_pose,
"best_seq_len": data["best_seq_len"], "best_seq_len": data["best_seq_len"][0],
} }
self.stat_result[scene_name] = { self.stat_result[scene_name] = {
"max_coverage_rate": data["max_coverage_rate"][0],
"success_rate": max(pred_cr_seq)/ data["max_coverage_rate"][0],
"coverage_rate_seq": pred_cr_seq, "coverage_rate_seq": pred_cr_seq,
"pred_max_coverage_rate": max(pred_cr_seq), "pred_max_coverage_rate": max(pred_cr_seq),
"pred_seq_len": len(pred_cr_seq), "pred_seq_len": len(pred_cr_seq),
} }
print('success rate: ', max(pred_cr_seq)) print('success rate: ', max(pred_cr_seq) / data["max_coverage_rate"][0])
return result return result

View File

@@ -14,16 +14,38 @@ class PtsUtil:
downsampled_points = point_cloud[idx_unique] downsampled_points = point_cloud[idx_unique]
return downsampled_points, idx_unique return downsampled_points, idx_unique
else: else:
unique_voxels = np.unique(voxel_indices, axis=0, return_inverse=True) import ipdb; ipdb.set_trace()
return unique_voxels[0]*voxel_size unique_voxels = np.unique(voxel_indices, axis=0, return_inverse=False)
return unique_voxels*voxel_size
@staticmethod @staticmethod
def random_downsample_point_cloud(point_cloud, num_points, require_idx=False): def voxel_downsample_point_cloud_o3d(point_cloud, voxel_size=0.005):
pcd = o3d.geometry.PointCloud()
pcd.points = o3d.utility.Vector3dVector(point_cloud)
pcd = pcd.voxel_down_sample(voxel_size)
return np.asarray(pcd.points)
@staticmethod
def voxel_downsample_point_cloud_and_trace_o3d(point_cloud, voxel_size=0.005):
pcd = o3d.geometry.PointCloud()
pcd.points = o3d.utility.Vector3dVector(point_cloud)
max_bound = pcd.get_max_bound()
min_bound = pcd.get_min_bound()
pcd = pcd.voxel_down_sample_and_trace(voxel_size, max_bound, min_bound, True)
return np.asarray(pcd.points)
@staticmethod
def random_downsample_point_cloud(point_cloud, num_points, require_idx=False, replace=True):
if point_cloud.shape[0] == 0: if point_cloud.shape[0] == 0:
if require_idx: if require_idx:
return point_cloud, np.array([]) return point_cloud, np.array([])
return point_cloud return point_cloud
idx = np.random.choice(len(point_cloud), num_points, replace=True) if not replace and num_points > len(point_cloud):
if require_idx:
return point_cloud, np.arange(len(point_cloud))
return point_cloud
idx = np.random.choice(len(point_cloud), num_points, replace=replace)
if require_idx: if require_idx:
return point_cloud[idx], idx return point_cloud[idx], idx
return point_cloud[idx] return point_cloud[idx]

View File

@@ -4,56 +4,13 @@ import json
import subprocess import subprocess
import tempfile import tempfile
import shutil import shutil
import numpy as np
from utils.data_load import DataLoadUtil from utils.data_load import DataLoadUtil
from utils.reconstruction import ReconstructionUtil from utils.reconstruction import ReconstructionUtil
from utils.pts import PtsUtil from utils.pts import PtsUtil
class RenderUtil: class RenderUtil:
target_mask_label = (0, 255, 0)
display_table_mask_label = (0, 0, 255)
random_downsample_N = 32768
min_z = 0.2
max_z = 0.5
@staticmethod @staticmethod
def get_world_points_and_normal(depth, mask, normal, cam_intrinsic, cam_extrinsic, random_downsample_N): def render_pts(cam_pose, scene_path, script_path, model_points_normals, voxel_threshold=0.005, filter_degree=75, nO_to_nL_pose=None, require_full_scene=False):
z = depth[mask]
i, j = np.nonzero(mask)
x = (j - cam_intrinsic[0, 2]) * z / cam_intrinsic[0, 0]
y = (i - cam_intrinsic[1, 2]) * z / cam_intrinsic[1, 1]
points_camera = np.stack((x, y, z), axis=-1).reshape(-1, 3)
normal_camera = normal[mask].reshape(-1, 3)
sampled_target_points, idx = PtsUtil.random_downsample_point_cloud(
points_camera, random_downsample_N, require_idx=True
)
if len(sampled_target_points) == 0:
return np.zeros((0, 3)), np.zeros((0, 3))
sampled_normal_camera = normal_camera[idx]
points_camera_aug = np.concatenate((sampled_target_points, np.ones((sampled_target_points.shape[0], 1))), axis=-1)
points_camera_world = np.dot(cam_extrinsic, points_camera_aug.T).T[:, :3]
return points_camera_world, sampled_normal_camera
@staticmethod
def get_world_points(depth, mask, cam_intrinsic, cam_extrinsic, random_downsample_N):
z = depth[mask]
i, j = np.nonzero(mask)
x = (j - cam_intrinsic[0, 2]) * z / cam_intrinsic[0, 0]
y = (i - cam_intrinsic[1, 2]) * z / cam_intrinsic[1, 1]
points_camera = np.stack((x, y, z), axis=-1).reshape(-1, 3)
sampled_target_points = PtsUtil.random_downsample_point_cloud(
points_camera, random_downsample_N
)
points_camera_aug = np.concatenate((sampled_target_points, np.ones((sampled_target_points.shape[0], 1))), axis=-1)
points_camera_world = np.dot(cam_extrinsic, points_camera_aug.T).T[:, :3]
return points_camera_world
@staticmethod
def render_pts(cam_pose, scene_path, script_path, voxel_threshold=0.005, filter_degree=75, nO_to_nL_pose=None, require_full_scene=False):
nO_to_world_pose = DataLoadUtil.get_real_cam_O_from_cam_L(cam_pose, nO_to_nL_pose, scene_path=scene_path) nO_to_world_pose = DataLoadUtil.get_real_cam_O_from_cam_L(cam_pose, nO_to_nL_pose, scene_path=scene_path)
@@ -76,44 +33,20 @@ class RenderUtil:
print(result.stderr) print(result.stderr)
return None return None
path = os.path.join(temp_dir, "tmp") path = os.path.join(temp_dir, "tmp")
cam_info = DataLoadUtil.load_cam_info(path, binocular=True) point_cloud = DataLoadUtil.get_target_point_cloud_world_from_path(path, binocular=True)
depth_L, depth_R = DataLoadUtil.load_depth( cam_params = DataLoadUtil.load_cam_info(path, binocular=True)
path, cam_info["near_plane"],
cam_info["far_plane"],
binocular=True
)
mask_L, mask_R = DataLoadUtil.load_seg(path, binocular=True)
normal_L = DataLoadUtil.load_normal(path, binocular=True, left_only=True)
''' target points '''
mask_img_L = mask_L
mask_img_R = mask_R
target_mask_img_L = (mask_L == RenderUtil.target_mask_label).all(axis=-1) ''' TODO: old code: filter_points api is changed, need to update the code '''
target_mask_img_R = (mask_R == RenderUtil.target_mask_label).all(axis=-1) filtered_point_cloud = PtsUtil.filter_points(point_cloud, model_points_normals, cam_pose=cam_params["cam_to_world"], voxel_size=voxel_threshold, theta=filter_degree)
full_scene_point_cloud = None
if require_full_scene:
depth_L, depth_R = DataLoadUtil.load_depth(path, cam_params['near_plane'], cam_params['far_plane'], binocular=True)
point_cloud_L = DataLoadUtil.get_point_cloud(depth_L, cam_params['cam_intrinsic'], cam_params['cam_to_world'])['points_world']
point_cloud_R = DataLoadUtil.get_point_cloud(depth_R, cam_params['cam_intrinsic'], cam_params['cam_to_world_R'])['points_world']
point_cloud_L = PtsUtil.random_downsample_point_cloud(point_cloud_L, 65536)
point_cloud_R = PtsUtil.random_downsample_point_cloud(point_cloud_R, 65536)
full_scene_point_cloud = PtsUtil.get_overlapping_points(point_cloud_L, point_cloud_R)
sampled_target_points_L, sampled_target_normal_L = RenderUtil.get_world_points_and_normal(depth_L,target_mask_img_L,normal_L, cam_info["cam_intrinsic"], cam_info["cam_to_world"], RenderUtil.random_downsample_N) return filtered_point_cloud, full_scene_point_cloud
sampled_target_points_R = RenderUtil.get_world_points(depth_R, target_mask_img_R, cam_info["cam_intrinsic"], cam_info["cam_to_world_R"], RenderUtil.random_downsample_N )
has_points = sampled_target_points_L.shape[0] > 0 and sampled_target_points_R.shape[0] > 0
if has_points:
target_points, overlap_idx = PtsUtil.get_overlapping_points(
sampled_target_points_L, sampled_target_points_R, voxel_threshold, require_idx=True
)
sampled_target_normal_L = sampled_target_normal_L[overlap_idx]
if has_points:
has_points = target_points.shape[0] > 0
if has_points:
target_points, target_normals = PtsUtil.filter_points(
target_points, sampled_target_normal_L, cam_info["cam_to_world"], theta_limit = filter_degree, z_range=(RenderUtil.min_z, RenderUtil.max_z)
)
if not has_points:
target_points = np.zeros((0, 3))
target_normals = np.zeros((0, 3))
#import ipdb; ipdb.set_trace()
return target_points, target_normals