7 Commits

Author SHA1 Message Date
985a08d89c global: upd inference 2024-11-01 08:43:13 +00:00
b221036e8b global: upd 2024-10-31 16:02:26 +00:00
097712c0ea global_only: ratio2 2024-10-30 15:58:32 +00:00
a954ed0998 global_only: ratio2 2024-10-30 15:49:59 +00:00
f5f8e4266f global_only: ratio 2024-10-30 15:49:11 +00:00
8a05b7883d global_only: train 2024-10-30 15:46:15 +00:00
e23697eb87 global_only: debug 2024-10-29 16:21:30 +00:00
10 changed files with 428 additions and 319 deletions

View File

@@ -6,71 +6,67 @@ runner:
cuda_visible_devices: "0,1,2,3,4,5,6,7" cuda_visible_devices: "0,1,2,3,4,5,6,7"
experiment: experiment:
name: w_gf_wo_lf_full name: overfit_ab_global_only
root_dir: "experiments" root_dir: "experiments"
epoch: 1 # -1 stands for last epoch epoch: -1 # -1 stands for last epoch
test: test:
dataset_list: dataset_list:
- OmniObject3d_train - OmniObject3d_train
blender_script_path: "/media/hofee/data/project/python/nbv_reconstruction/blender/data_renderer.py" blender_script_path: "/data/hofee/project/nbv_rec/blender/data_renderer.py"
output_dir: "/media/hofee/data/project/python/nbv_reconstruction/nbv_reconstruction/test/inference_global_full_on_testset" output_dir: "/data/hofee/data/inference_global_full_on_testset"
pipeline: nbv_reconstruction_global_pts_pipeline pipeline: nbv_reconstruction_pipeline
voxel_size: 0.003
dataset: dataset:
OmniObject3d_train: OmniObject3d_train:
root_dir: "/media/hofee/repository/nbv_reconstruction_data_512" root_dir: "/data/hofee/data/new_full_data"
model_dir: "/media/hofee/data/data/scaled_object_meshes" model_dir: "/data/hofee/data/scaled_object_meshes"
source: seq_nbv_reconstruction_dataset source: seq_reconstruction_dataset
split_file: "/media/hofee/data/project/python/nbv_reconstruction/nbv_reconstruction/test/test_set_list.txt" split_file: "/data/hofee/data/sample.txt"
type: test type: test
filter_degree: 75 filter_degree: 75
ratio: 1 ratio: 1
batch_size: 1 batch_size: 1
num_workers: 12 num_workers: 12
pts_num: 4096 pts_num: 8192
load_from_preprocess: False load_from_preprocess: True
OmniObject3d_test:
root_dir: "/data/hofee/data/new_full_data"
model_dir: "/data/hofee/data/scaled_object_meshes"
source: seq_reconstruction_dataset
split_file: "/data/hofee/data/sample.txt"
type: test
filter_degree: 75
eval_list:
- pose_diff
- coverage_rate_increase
ratio: 0.1
batch_size: 1
num_workers: 12
pts_num: 8192
load_from_preprocess: True
pipeline: pipeline:
nbv_reconstruction_local_pts_pipeline: nbv_reconstruction_pipeline:
modules: modules:
pts_encoder: pointnet_encoder pts_encoder: pointnet_encoder
seq_encoder: transformer_seq_encoder seq_encoder: transformer_seq_encoder
pose_encoder: pose_encoder pose_encoder: pose_encoder
view_finder: gf_view_finder view_finder: gf_view_finder
eps: 1e-5 eps: 1e-5
global_scanned_feat: False
nbv_reconstruction_global_pts_pipeline:
modules:
pts_encoder: pointnet_encoder
pose_seq_encoder: transformer_pose_seq_encoder
pose_encoder: pose_encoder
view_finder: gf_view_finder
eps: 1e-5
global_scanned_feat: True global_scanned_feat: True
module: module:
pointnet_encoder: pointnet_encoder:
in_dim: 3 in_dim: 3
out_dim: 1024 out_dim: 1024
global_feat: True global_feat: True
feature_transform: False feature_transform: False
transformer_seq_encoder: transformer_seq_encoder:
pts_embed_dim: 1024 embed_dim: 256
pose_embed_dim: 256
num_heads: 4
ffn_dim: 256
num_layers: 3
output_dim: 2048
transformer_pose_seq_encoder:
pose_embed_dim: 256
num_heads: 4 num_heads: 4
ffn_dim: 256 ffn_dim: 256
num_layers: 3 num_layers: 3
@@ -86,7 +82,8 @@ module:
sample_mode: ode sample_mode: ode
sampling_steps: 500 sampling_steps: 500
sde_mode: ve sde_mode: ve
pose_encoder: pose_encoder:
pose_dim: 9 pose_dim: 9
out_dim: 256 out_dim: 256
pts_num_encoder:
out_dim: 64

View File

@@ -6,17 +6,17 @@ runner:
cuda_visible_devices: "0,1,2,3,4,5,6,7" cuda_visible_devices: "0,1,2,3,4,5,6,7"
experiment: experiment:
name: server_split_dataset name: debug
root_dir: "experiments" root_dir: "experiments"
split: # split: #
root_dir: "/data/hofee/data/new_full_data" root_dir: "/data/hofee/data/packed_preprocessed_data"
type: "unseen_instance" # "unseen_category" type: "unseen_instance" # "unseen_category"
datasets: datasets:
OmniObject3d_train: OmniObject3d_train:
path: "/data/hofee/data/new_full_data_list/OmniObject3d_train.txt" path: "/data/hofee/data/OmniObject3d_train.txt"
ratio: 0.9 ratio: 0.9
OmniObject3d_test: OmniObject3d_test:
path: "/data/hofee/data/new_full_data_list/OmniObject3d_test.txt" path: "/data/hofee/data/OmniObject3d_test.txt"
ratio: 0.1 ratio: 0.1

View File

@@ -3,17 +3,17 @@ runner:
general: general:
seed: 0 seed: 0
device: cuda device: cuda
cuda_visible_devices: "1" cuda_visible_devices: "0"
parallel: False parallel: False
experiment: experiment:
name: train_ab_global_and_partial_global name: train_ab_global_only
root_dir: "experiments" root_dir: "experiments"
use_checkpoint: False use_checkpoint: False
epoch: -1 # -1 stands for last epoch epoch: -1 # -1 stands for last epoch
max_epochs: 5000 max_epochs: 5000
save_checkpoint_interval: 1 save_checkpoint_interval: 1
test_first: False test_first: True
train: train:
optimizer: optimizer:
@@ -25,7 +25,7 @@ runner:
test: test:
frequency: 3 # test frequency frequency: 3 # test frequency
dataset_list: dataset_list:
#- OmniObject3d_test - OmniObject3d_test
- OmniObject3d_val - OmniObject3d_val
pipeline: nbv_reconstruction_pipeline pipeline: nbv_reconstruction_pipeline
@@ -97,7 +97,7 @@ module:
feature_transform: False feature_transform: False
transformer_seq_encoder: transformer_seq_encoder:
embed_dim: 320 embed_dim: 256
num_heads: 4 num_heads: 4
ffn_dim: 256 ffn_dim: 256
num_layers: 3 num_layers: 3

View File

@@ -7,7 +7,6 @@ from PytorchBoot.utils.log_util import Log
import torch import torch
import os import os
import sys import sys
import time
sys.path.append(r"/data/hofee/project/nbv_rec/nbv_reconstruction") sys.path.append(r"/data/hofee/project/nbv_rec/nbv_reconstruction")
@@ -115,13 +114,8 @@ class NBVReconstructionDataset(BaseDataset):
except Exception as e: except Exception as e:
Log.error(f"Save cache failed: {e}") Log.error(f"Save cache failed: {e}")
def voxel_downsample_with_mapping(self, point_cloud, voxel_size=0.003): def voxel_downsample_with_mask(self, pts, voxel_size):
voxel_indices = np.floor(point_cloud / voxel_size).astype(np.int32) pass
unique_voxels, inverse, counts = np.unique(voxel_indices, axis=0, return_inverse=True, return_counts=True)
idx_sort = np.argsort(inverse)
idx_unique = idx_sort[np.cumsum(counts)-counts]
downsampled_points = point_cloud[idx_unique]
return downsampled_points, inverse
def __getitem__(self, index): def __getitem__(self, index):
@@ -135,9 +129,6 @@ class NBVReconstructionDataset(BaseDataset):
scanned_coverages_rate, scanned_coverages_rate,
scanned_n_to_world_pose, scanned_n_to_world_pose,
) = ([], [], []) ) = ([], [], [])
start_time = time.time()
start_indices = [0]
total_points = 0
for view in scanned_views: for view in scanned_views:
frame_idx = view[0] frame_idx = view[0]
coverage_rate = view[1] coverage_rate = view[1]
@@ -159,12 +150,8 @@ class NBVReconstructionDataset(BaseDataset):
n_to_world_trans = n_to_world_pose[:3, 3] n_to_world_trans = n_to_world_pose[:3, 3]
n_to_world_9d = np.concatenate([n_to_world_6d, n_to_world_trans], axis=0) n_to_world_9d = np.concatenate([n_to_world_6d, n_to_world_trans], axis=0)
scanned_n_to_world_pose.append(n_to_world_9d) scanned_n_to_world_pose.append(n_to_world_9d)
total_points += len(downsampled_target_point_cloud)
start_indices.append(total_points)
end_time = time.time()
#Log.info(f"load data time: {end_time - start_time}")
nbv_idx, nbv_coverage_rate = nbv[0], nbv[1] nbv_idx, nbv_coverage_rate = nbv[0], nbv[1]
nbv_path = DataLoadUtil.get_path(self.root_dir, scene_name, nbv_idx) nbv_path = DataLoadUtil.get_path(self.root_dir, scene_name, nbv_idx)
cam_info = DataLoadUtil.load_cam_info(nbv_path) cam_info = DataLoadUtil.load_cam_info(nbv_path)
@@ -177,27 +164,14 @@ class NBVReconstructionDataset(BaseDataset):
best_to_world_9d = np.concatenate( best_to_world_9d = np.concatenate(
[best_to_world_6d, best_to_world_trans], axis=0 [best_to_world_6d, best_to_world_trans], axis=0
) )
combined_scanned_views_pts = np.concatenate(scanned_views_pts, axis=0)
voxel_downsampled_combined_scanned_pts_np, inverse = self.voxel_downsample_with_mapping(combined_scanned_views_pts, 0.003)
random_downsampled_combined_scanned_pts_np, random_downsample_idx = PtsUtil.random_downsample_point_cloud(voxel_downsampled_combined_scanned_pts_np, self.pts_num, require_idx=True)
all_idx_unique = np.arange(len(voxel_downsampled_combined_scanned_pts_np)) combined_scanned_views_pts = np.concatenate(scanned_views_pts, axis=0)
all_random_downsample_idx = all_idx_unique[random_downsample_idx] voxel_downsampled_combined_scanned_pts_np = PtsUtil.voxel_downsample_point_cloud(combined_scanned_views_pts, 0.002)
scanned_pts_mask = [] random_downsampled_combined_scanned_pts_np = PtsUtil.random_downsample_point_cloud(voxel_downsampled_combined_scanned_pts_np, self.pts_num)
for idx, start_idx in enumerate(start_indices):
if idx == len(start_indices) - 1:
break
end_idx = start_indices[idx+1]
view_inverse = inverse[start_idx:end_idx]
view_unique_downsampled_idx = np.unique(view_inverse)
view_unique_downsampled_idx_set = set(view_unique_downsampled_idx)
mask = np.array([idx in view_unique_downsampled_idx_set for idx in all_random_downsample_idx])
scanned_pts_mask.append(mask)
data_item = { data_item = {
"scanned_pts": np.asarray(scanned_views_pts, dtype=np.float32), # Ndarray(S x Nv x 3) "scanned_pts": np.asarray(scanned_views_pts, dtype=np.float32), # Ndarray(S x Nv x 3)
"combined_scanned_pts": np.asarray(random_downsampled_combined_scanned_pts_np, dtype=np.float32), # Ndarray(N x 3) "combined_scanned_pts": np.asarray(random_downsampled_combined_scanned_pts_np, dtype=np.float32), # Ndarray(N x 3)
"scanned_pts_mask": np.asarray(scanned_pts_mask, dtype=np.bool), # Ndarray(N)
"scanned_coverage_rate": scanned_coverages_rate, # List(S): Float, range(0, 1) "scanned_coverage_rate": scanned_coverages_rate, # List(S): Float, range(0, 1)
"scanned_n_to_world_pose_9d": np.asarray(scanned_n_to_world_pose, dtype=np.float32), # Ndarray(S x 9) "scanned_n_to_world_pose_9d": np.asarray(scanned_n_to_world_pose, dtype=np.float32), # Ndarray(S x 9)
"best_coverage_rate": nbv_coverage_rate, # Float, range(0, 1) "best_coverage_rate": nbv_coverage_rate, # Float, range(0, 1)
@@ -223,9 +197,7 @@ class NBVReconstructionDataset(BaseDataset):
collate_data["scanned_n_to_world_pose_9d"] = [ collate_data["scanned_n_to_world_pose_9d"] = [
torch.tensor(item["scanned_n_to_world_pose_9d"]) for item in batch torch.tensor(item["scanned_n_to_world_pose_9d"]) for item in batch
] ]
collate_data["scanned_pts_mask"] = [
torch.tensor(item["scanned_pts_mask"]) for item in batch
]
''' ------ Fixed Length ------ ''' ''' ------ Fixed Length ------ '''
collate_data["best_to_world_pose_9d"] = torch.stack( collate_data["best_to_world_pose_9d"] = torch.stack(
@@ -234,14 +206,12 @@ class NBVReconstructionDataset(BaseDataset):
collate_data["combined_scanned_pts"] = torch.stack( collate_data["combined_scanned_pts"] = torch.stack(
[torch.tensor(item["combined_scanned_pts"]) for item in batch] [torch.tensor(item["combined_scanned_pts"]) for item in batch]
) )
for key in batch[0].keys(): for key in batch[0].keys():
if key not in [ if key not in [
"scanned_pts", "scanned_pts",
"scanned_n_to_world_pose_9d", "scanned_n_to_world_pose_9d",
"best_to_world_pose_9d", "best_to_world_pose_9d",
"combined_scanned_pts", "combined_scanned_pts",
"scanned_pts_mask",
]: ]:
collate_data[key] = [item[key] for item in batch] collate_data[key] = [item[key] for item in batch]
return collate_data return collate_data
@@ -257,9 +227,9 @@ if __name__ == "__main__":
torch.manual_seed(seed) torch.manual_seed(seed)
np.random.seed(seed) np.random.seed(seed)
config = { config = {
"root_dir": "/data/hofee/nbv_rec_part2_preprocessed", "root_dir": "/data/hofee/data/packed_preprocessed_data",
"source": "nbv_reconstruction_dataset", "source": "nbv_reconstruction_dataset",
"split_file": "/data/hofee/data/sample.txt", "split_file": "/data/hofee/data/OmniObject3d_train.txt",
"load_from_preprocess": True, "load_from_preprocess": True,
"ratio": 0.5, "ratio": 0.5,
"batch_size": 2, "batch_size": 2,

154
core/old_seq_dataset.py Normal file
View File

@@ -0,0 +1,154 @@
import numpy as np
from PytorchBoot.dataset import BaseDataset
import PytorchBoot.namespace as namespace
import PytorchBoot.stereotype as stereotype
from PytorchBoot.utils.log_util import Log
import torch
import os
import sys
sys.path.append(r"/home/data/hofee/project/nbv_rec/nbv_reconstruction")
from utils.data_load import DataLoadUtil
from utils.pose import PoseUtil
from utils.pts import PtsUtil
@stereotype.dataset("old_seq_nbv_reconstruction_dataset")
class SeqNBVReconstructionDataset(BaseDataset):
def __init__(self, config):
super(SeqNBVReconstructionDataset, self).__init__(config)
self.type = config["type"]
if self.type != namespace.Mode.TEST:
Log.error("Dataset <seq_nbv_reconstruction_dataset> Only support test mode", terminate=True)
self.config = config
self.root_dir = config["root_dir"]
self.split_file_path = config["split_file"]
self.scene_name_list = self.load_scene_name_list()
self.datalist = self.get_datalist()
self.pts_num = config["pts_num"]
self.model_dir = config["model_dir"]
self.filter_degree = config["filter_degree"]
self.load_from_preprocess = config.get("load_from_preprocess", False)
def load_scene_name_list(self):
scene_name_list = []
with open(self.split_file_path, "r") as f:
for line in f:
scene_name = line.strip()
scene_name_list.append(scene_name)
return scene_name_list
def get_datalist(self):
datalist = []
for scene_name in self.scene_name_list:
seq_num = DataLoadUtil.get_label_num(self.root_dir, scene_name)
scene_max_coverage_rate = 0
scene_max_cr_idx = 0
for seq_idx in range(seq_num):
label_path = DataLoadUtil.get_label_path(self.root_dir, scene_name, seq_idx)
label_data = DataLoadUtil.load_label(label_path)
max_coverage_rate = label_data["max_coverage_rate"]
if max_coverage_rate > scene_max_coverage_rate:
scene_max_coverage_rate = max_coverage_rate
scene_max_cr_idx = seq_idx
label_path = DataLoadUtil.get_label_path(self.root_dir, scene_name, scene_max_cr_idx)
label_data = DataLoadUtil.load_label(label_path)
first_frame = label_data["best_sequence"][0]
best_seq_len = len(label_data["best_sequence"])
datalist.append({
"scene_name": scene_name,
"first_frame": first_frame,
"max_coverage_rate": scene_max_coverage_rate,
"best_seq_len": best_seq_len,
"label_idx": scene_max_cr_idx,
})
return datalist
def __getitem__(self, index):
data_item_info = self.datalist[index]
first_frame_idx = data_item_info["first_frame"][0]
first_frame_coverage = data_item_info["first_frame"][1]
max_coverage_rate = data_item_info["max_coverage_rate"]
scene_name = data_item_info["scene_name"]
first_cam_info = DataLoadUtil.load_cam_info(DataLoadUtil.get_path(self.root_dir, scene_name, first_frame_idx), binocular=True)
first_view_path = DataLoadUtil.get_path(self.root_dir, scene_name, first_frame_idx)
first_left_cam_pose = first_cam_info["cam_to_world"]
first_center_cam_pose = first_cam_info["cam_to_world_O"]
first_target_point_cloud = DataLoadUtil.load_from_preprocessed_pts(first_view_path)
first_pts_num = first_target_point_cloud.shape[0]
first_downsampled_target_point_cloud = PtsUtil.random_downsample_point_cloud(first_target_point_cloud, self.pts_num)
first_to_world_rot_6d = PoseUtil.matrix_to_rotation_6d_numpy(np.asarray(first_left_cam_pose[:3,:3]))
first_to_world_trans = first_left_cam_pose[:3,3]
first_to_world_9d = np.concatenate([first_to_world_rot_6d, first_to_world_trans], axis=0)
diag = DataLoadUtil.get_bbox_diag(self.model_dir, scene_name)
voxel_threshold = diag*0.02
first_O_to_first_L_pose = np.dot(np.linalg.inv(first_left_cam_pose), first_center_cam_pose)
scene_path = os.path.join(self.root_dir, scene_name)
model_points_normals = DataLoadUtil.load_points_normals(self.root_dir, scene_name)
data_item = {
"first_pts_num": np.asarray(
first_pts_num, dtype=np.int32
),
"first_pts": np.asarray([first_downsampled_target_point_cloud],dtype=np.float32),
"combined_scanned_pts": np.asarray(first_downsampled_target_point_cloud,dtype=np.float32),
"first_to_world_9d": np.asarray([first_to_world_9d],dtype=np.float32),
"scene_name": scene_name,
"max_coverage_rate": max_coverage_rate,
"voxel_threshold": voxel_threshold,
"filter_degree": self.filter_degree,
"O_to_L_pose": first_O_to_first_L_pose,
"first_frame_coverage": first_frame_coverage,
"scene_path": scene_path,
"model_points_normals": model_points_normals,
"best_seq_len": data_item_info["best_seq_len"],
"first_frame_id": first_frame_idx,
}
return data_item
def __len__(self):
return len(self.datalist)
def get_collate_fn(self):
def collate_fn(batch):
collate_data = {}
collate_data["first_pts"] = [torch.tensor(item['first_pts']) for item in batch]
collate_data["first_to_world_9d"] = [torch.tensor(item['first_to_world_9d']) for item in batch]
collate_data["combined_scanned_pts"] = torch.stack([torch.tensor(item['combined_scanned_pts']) for item in batch])
for key in batch[0].keys():
if key not in ["first_pts", "first_to_world_9d", "combined_scanned_pts"]:
collate_data[key] = [item[key] for item in batch]
return collate_data
return collate_fn
# -------------- Debug ---------------- #
if __name__ == "__main__":
import torch
seed = 0
torch.manual_seed(seed)
np.random.seed(seed)
config = {
"root_dir": "/home/data/hofee/project/nbv_rec/data/nbv_rec_data_512_preproc_npy",
"split_file": "/home/data/hofee/project/nbv_rec/data/OmniObject3d_train.txt",
"model_dir": "/home/data/hofee/project/nbv_rec/data/scaled_object_meshes",
"ratio": 0.005,
"batch_size": 2,
"filter_degree": 75,
"num_workers": 0,
"pts_num": 32684,
"type": namespace.Mode.TEST,
"load_from_preprocess": True
}
ds = SeqNBVReconstructionDataset(config)
print(len(ds))
#ds.__getitem__(10)
dl = ds.get_loader(shuffle=True)
for idx, data in enumerate(dl):
data = ds.process_batch(data, "cuda:0")
print(data)
# ------ Debug Start ------
import ipdb;ipdb.set_trace()
# ------ Debug End ------+

View File

@@ -89,49 +89,25 @@ class NBVReconstructionPipeline(nn.Module):
"scanned_n_to_world_pose_9d" "scanned_n_to_world_pose_9d"
] # List(B): Tensor(S x 9) ] # List(B): Tensor(S x 9)
scanned_pts_mask_batch = data["scanned_pts_mask"] # List(B): Tensor(N)
device = next(self.parameters()).device device = next(self.parameters()).device
embedding_list_batch = [] embedding_list_batch = []
combined_scanned_pts_batch = data["combined_scanned_pts"] # Tensor(B x N x 3) combined_scanned_pts_batch = data["combined_scanned_pts"] # Tensor(B x N x 3)
global_scanned_feat, per_point_feat_batch = self.pts_encoder.encode_points( global_scanned_feat = self.pts_encoder.encode_points(
combined_scanned_pts_batch, require_per_point_feat=True combined_scanned_pts_batch, require_per_point_feat=False
) # global_scanned_feat: Tensor(B x Dg) ) # global_scanned_feat: Tensor(B x Dg)
batch_size = len(scanned_n_to_world_pose_9d_batch)
for i in range(batch_size):
seq_len = len(scanned_n_to_world_pose_9d_batch[i])
scanned_n_to_world_pose_9d = scanned_n_to_world_pose_9d_batch[i].to(device) # Tensor(S x 9)
scanned_pts_mask = scanned_pts_mask_batch[i] # Tensor(S x N)
per_point_feat = per_point_feat_batch[i] # Tensor(N x Dp)
partial_point_feat_seq = []
for j in range(seq_len):
partial_per_point_feat = per_point_feat[scanned_pts_mask[j]]
if partial_per_point_feat.shape[0] == 0:
partial_point_feat = torch.zeros(per_point_feat.shape[1], device=device)
else:
partial_point_feat = torch.mean(partial_per_point_feat, dim=0) # Tensor(Dp)
partial_point_feat_seq.append(partial_point_feat)
partial_point_feat_seq = torch.stack(partial_point_feat_seq, dim=0) # Tensor(S x Dp)
pose_feat_seq = self.pose_encoder.encode_pose(scanned_n_to_world_pose_9d) # Tensor(S x Dp)
seq_embedding = torch.cat([partial_point_feat_seq, pose_feat_seq], dim=-1) for scanned_n_to_world_pose_9d in scanned_n_to_world_pose_9d_batch:
scanned_n_to_world_pose_9d = scanned_n_to_world_pose_9d.to(device) # Tensor(S x 9)
pose_feat_seq = self.pose_encoder.encode_pose(scanned_n_to_world_pose_9d) # Tensor(S x Dp)
seq_embedding = pose_feat_seq
embedding_list_batch.append(seq_embedding) # List(B): Tensor(S x (Dp)) embedding_list_batch.append(seq_embedding) # List(B): Tensor(S x (Dp))
seq_feat = self.seq_encoder.encode_sequence(embedding_list_batch) # Tensor(B x Ds) seq_feat = self.seq_encoder.encode_sequence(embedding_list_batch) # Tensor(B x Ds)
main_feat = torch.cat([seq_feat, global_scanned_feat], dim=-1) # Tensor(B x (Ds+Dg)) main_feat = torch.cat([seq_feat, global_scanned_feat], dim=-1) # Tensor(B x (Ds+Dg))
if torch.isnan(main_feat).any(): if torch.isnan(main_feat).any():
for i in range(len(main_feat)):
if torch.isnan(main_feat[i]).any():
scanned_pts_mask = scanned_pts_mask_batch[i]
Log.info(f"scanned_pts_mask shape: {scanned_pts_mask.shape}")
Log.info(f"scanned_pts_mask sum: {scanned_pts_mask.sum()}")
import ipdb
ipdb.set_trace()
Log.error("nan in main_feat", True) Log.error("nan in main_feat", True)
return main_feat return main_feat

View File

@@ -1,154 +1,195 @@
import numpy as np import numpy as np
from PytorchBoot.dataset import BaseDataset from PytorchBoot.dataset import BaseDataset
import PytorchBoot.namespace as namespace import PytorchBoot.namespace as namespace
import PytorchBoot.stereotype as stereotype import PytorchBoot.stereotype as stereotype
from PytorchBoot.utils.log_util import Log from PytorchBoot.config import ConfigManager
import torch from PytorchBoot.utils.log_util import Log
import os import torch
import sys import os
sys.path.append(r"/home/data/hofee/project/nbv_rec/nbv_reconstruction") import sys
from utils.data_load import DataLoadUtil sys.path.append(r"/data/hofee/project/nbv_rec/nbv_reconstruction")
from utils.pose import PoseUtil
from utils.pts import PtsUtil from utils.data_load import DataLoadUtil
from utils.pose import PoseUtil
@stereotype.dataset("seq_nbv_reconstruction_dataset") from utils.pts import PtsUtil
class SeqNBVReconstructionDataset(BaseDataset):
def __init__(self, config):
super(SeqNBVReconstructionDataset, self).__init__(config) @stereotype.dataset("seq_reconstruction_dataset")
self.type = config["type"] class SeqReconstructionDataset(BaseDataset):
if self.type != namespace.Mode.TEST: def __init__(self, config):
Log.error("Dataset <seq_nbv_reconstruction_dataset> Only support test mode", terminate=True) super(SeqReconstructionDataset, self).__init__(config)
self.config = config self.config = config
self.root_dir = config["root_dir"] self.root_dir = config["root_dir"]
self.split_file_path = config["split_file"] self.split_file_path = config["split_file"]
self.scene_name_list = self.load_scene_name_list() self.scene_name_list = self.load_scene_name_list()
self.datalist = self.get_datalist() self.datalist = self.get_datalist()
self.pts_num = config["pts_num"]
self.pts_num = config["pts_num"]
self.model_dir = config["model_dir"] self.type = config["type"]
self.filter_degree = config["filter_degree"] self.cache = config.get("cache")
self.load_from_preprocess = config.get("load_from_preprocess", False) self.load_from_preprocess = config.get("load_from_preprocess", False)
if self.type == namespace.Mode.TEST:
def load_scene_name_list(self): #self.model_dir = config["model_dir"]
scene_name_list = [] self.filter_degree = config["filter_degree"]
with open(self.split_file_path, "r") as f: if self.type == namespace.Mode.TRAIN:
for line in f: scale_ratio = 1
scene_name = line.strip() self.datalist = self.datalist*scale_ratio
scene_name_list.append(scene_name) if self.cache:
return scene_name_list expr_root = ConfigManager.get("runner", "experiment", "root_dir")
expr_name = ConfigManager.get("runner", "experiment", "name")
def get_datalist(self): self.cache_dir = os.path.join(expr_root, expr_name, "cache")
datalist = [] # self.preprocess_cache()
for scene_name in self.scene_name_list:
seq_num = DataLoadUtil.get_label_num(self.root_dir, scene_name) def load_scene_name_list(self):
scene_max_coverage_rate = 0 scene_name_list = []
scene_max_cr_idx = 0 with open(self.split_file_path, "r") as f:
for line in f:
for seq_idx in range(seq_num): scene_name = line.strip()
label_path = DataLoadUtil.get_label_path(self.root_dir, scene_name, seq_idx) scene_name_list.append(scene_name)
label_data = DataLoadUtil.load_label(label_path) return scene_name_list
max_coverage_rate = label_data["max_coverage_rate"]
if max_coverage_rate > scene_max_coverage_rate: def get_datalist(self):
scene_max_coverage_rate = max_coverage_rate datalist = []
scene_max_cr_idx = seq_idx for scene_name in self.scene_name_list:
seq_num = DataLoadUtil.get_label_num(self.root_dir, scene_name)
label_path = DataLoadUtil.get_label_path(self.root_dir, scene_name, scene_max_cr_idx) scene_max_coverage_rate = 0
label_data = DataLoadUtil.load_label(label_path) max_coverage_rate_list = []
first_frame = label_data["best_sequence"][0] scene_max_cr_idx = 0
best_seq_len = len(label_data["best_sequence"]) for seq_idx in range(seq_num):
datalist.append({ label_path = DataLoadUtil.get_label_path(
"scene_name": scene_name, self.root_dir, scene_name, seq_idx
"first_frame": first_frame, )
"max_coverage_rate": scene_max_coverage_rate, label_data = DataLoadUtil.load_label(label_path)
"best_seq_len": best_seq_len, max_coverage_rate = label_data["max_coverage_rate"]
"label_idx": scene_max_cr_idx, if max_coverage_rate > scene_max_coverage_rate:
}) scene_max_coverage_rate = max_coverage_rate
return datalist scene_max_cr_idx = seq_idx
max_coverage_rate_list.append(max_coverage_rate)
def __getitem__(self, index): best_label_path = DataLoadUtil.get_label_path(self.root_dir, scene_name, scene_max_cr_idx)
data_item_info = self.datalist[index] best_label_data = DataLoadUtil.load_label(best_label_path)
first_frame_idx = data_item_info["first_frame"][0] first_frame = best_label_data["best_sequence"][0]
first_frame_coverage = data_item_info["first_frame"][1] best_seq_len = len(best_label_data["best_sequence"])
max_coverage_rate = data_item_info["max_coverage_rate"] datalist.append({
scene_name = data_item_info["scene_name"] "scene_name": scene_name,
first_cam_info = DataLoadUtil.load_cam_info(DataLoadUtil.get_path(self.root_dir, scene_name, first_frame_idx), binocular=True) "first_frame": first_frame,
first_view_path = DataLoadUtil.get_path(self.root_dir, scene_name, first_frame_idx) "best_seq_len": best_seq_len,
first_left_cam_pose = first_cam_info["cam_to_world"] "max_coverage_rate": scene_max_coverage_rate,
first_center_cam_pose = first_cam_info["cam_to_world_O"] "label_idx": scene_max_cr_idx,
first_target_point_cloud = DataLoadUtil.load_from_preprocessed_pts(first_view_path) })
first_pts_num = first_target_point_cloud.shape[0] return datalist
first_downsampled_target_point_cloud = PtsUtil.random_downsample_point_cloud(first_target_point_cloud, self.pts_num)
first_to_world_rot_6d = PoseUtil.matrix_to_rotation_6d_numpy(np.asarray(first_left_cam_pose[:3,:3])) def preprocess_cache(self):
first_to_world_trans = first_left_cam_pose[:3,3] Log.info("preprocessing cache...")
first_to_world_9d = np.concatenate([first_to_world_rot_6d, first_to_world_trans], axis=0) for item_idx in range(len(self.datalist)):
diag = DataLoadUtil.get_bbox_diag(self.model_dir, scene_name) self.__getitem__(item_idx)
voxel_threshold = diag*0.02 Log.success("finish preprocessing cache.")
first_O_to_first_L_pose = np.dot(np.linalg.inv(first_left_cam_pose), first_center_cam_pose)
scene_path = os.path.join(self.root_dir, scene_name) def load_from_cache(self, scene_name, curr_frame_idx):
model_points_normals = DataLoadUtil.load_points_normals(self.root_dir, scene_name) cache_name = f"{scene_name}_{curr_frame_idx}.txt"
cache_path = os.path.join(self.cache_dir, cache_name)
data_item = { if os.path.exists(cache_path):
"first_pts_num": np.asarray( data = np.loadtxt(cache_path)
first_pts_num, dtype=np.int32 return data
), else:
"first_pts": np.asarray([first_downsampled_target_point_cloud],dtype=np.float32), return None
"combined_scanned_pts": np.asarray(first_downsampled_target_point_cloud,dtype=np.float32),
"first_to_world_9d": np.asarray([first_to_world_9d],dtype=np.float32), def save_to_cache(self, scene_name, curr_frame_idx, data):
"scene_name": scene_name, cache_name = f"{scene_name}_{curr_frame_idx}.txt"
"max_coverage_rate": max_coverage_rate, cache_path = os.path.join(self.cache_dir, cache_name)
"voxel_threshold": voxel_threshold, try:
"filter_degree": self.filter_degree, np.savetxt(cache_path, data)
"O_to_L_pose": first_O_to_first_L_pose, except Exception as e:
"first_frame_coverage": first_frame_coverage, Log.error(f"Save cache failed: {e}")
"scene_path": scene_path,
"model_points_normals": model_points_normals, def seq_combined_pts(self, scene, frame_idx_list):
"best_seq_len": data_item_info["best_seq_len"], all_combined_pts = []
"first_frame_id": first_frame_idx, for i in frame_idx_list:
} path = DataLoadUtil.get_path(self.root_dir, scene, i)
return data_item pts = DataLoadUtil.load_from_preprocessed_pts(path,"npy")
if pts.shape[0] == 0:
def __len__(self): continue
return len(self.datalist) all_combined_pts.append(pts)
all_combined_pts = np.vstack(all_combined_pts)
def get_collate_fn(self): downsampled_all_pts = PtsUtil.voxel_downsample_point_cloud(all_combined_pts, 0.003)
def collate_fn(batch): return downsampled_all_pts
collate_data = {}
collate_data["first_pts"] = [torch.tensor(item['first_pts']) for item in batch] def __getitem__(self, index):
collate_data["first_to_world_9d"] = [torch.tensor(item['first_to_world_9d']) for item in batch] data_item_info = self.datalist[index]
collate_data["combined_scanned_pts"] = torch.stack([torch.tensor(item['combined_scanned_pts']) for item in batch]) max_coverage_rate = data_item_info["max_coverage_rate"]
for key in batch[0].keys(): scene_name = data_item_info["scene_name"]
if key not in ["first_pts", "first_to_world_9d", "combined_scanned_pts"]: (
collate_data[key] = [item[key] for item in batch] scanned_views_pts,
return collate_data scanned_coverages_rate,
return collate_fn scanned_n_to_world_pose,
) = ([], [], [])
# -------------- Debug ---------------- # view = data_item_info["first_frame"]
if __name__ == "__main__": frame_idx = view[0]
import torch coverage_rate = view[1]
seed = 0 view_path = DataLoadUtil.get_path(self.root_dir, scene_name, frame_idx)
torch.manual_seed(seed) cam_info = DataLoadUtil.load_cam_info(view_path, binocular=True)
np.random.seed(seed)
config = { n_to_world_pose = cam_info["cam_to_world"]
"root_dir": "/home/data/hofee/project/nbv_rec/data/nbv_rec_data_512_preproc_npy", target_point_cloud = (
"split_file": "/home/data/hofee/project/nbv_rec/data/OmniObject3d_train.txt", DataLoadUtil.load_from_preprocessed_pts(view_path)
"model_dir": "/home/data/hofee/project/nbv_rec/data/scaled_object_meshes", )
"ratio": 0.005, downsampled_target_point_cloud = PtsUtil.random_downsample_point_cloud(
"batch_size": 2, target_point_cloud, self.pts_num
"filter_degree": 75, )
"num_workers": 0, scanned_views_pts.append(downsampled_target_point_cloud)
"pts_num": 32684, scanned_coverages_rate.append(coverage_rate)
"type": namespace.Mode.TEST, n_to_world_6d = PoseUtil.matrix_to_rotation_6d_numpy(
"load_from_preprocess": True np.asarray(n_to_world_pose[:3, :3])
} )
ds = SeqNBVReconstructionDataset(config) first_left_cam_pose = cam_info["cam_to_world"]
print(len(ds)) first_center_cam_pose = cam_info["cam_to_world_O"]
#ds.__getitem__(10) first_O_to_first_L_pose = np.dot(np.linalg.inv(first_left_cam_pose), first_center_cam_pose)
dl = ds.get_loader(shuffle=True) n_to_world_trans = n_to_world_pose[:3, 3]
for idx, data in enumerate(dl): n_to_world_9d = np.concatenate([n_to_world_6d, n_to_world_trans], axis=0)
data = ds.process_batch(data, "cuda:0") scanned_n_to_world_pose.append(n_to_world_9d)
print(data)
# ------ Debug Start ------ frame_list = []
import ipdb;ipdb.set_trace() for i in range(DataLoadUtil.get_scene_seq_length(self.root_dir, scene_name)):
# ------ Debug End ------+ frame_list.append(i)
gt_pts = self.seq_combined_pts(scene_name, frame_list)
data_item = {
"first_scanned_pts": np.asarray(scanned_views_pts, dtype=np.float32), # Ndarray(S x Nv x 3)
"first_scanned_coverage_rate": scanned_coverages_rate, # List(S): Float, range(0, 1)
"first_scanned_n_to_world_pose_9d": np.asarray(scanned_n_to_world_pose, dtype=np.float32), # Ndarray(S x 9)
"seq_max_coverage_rate": max_coverage_rate, # Float, range(0, 1)
"scene_name": scene_name, # String
"gt_pts": gt_pts, # Ndarray(N x 3)
"scene_path": os.path.join(self.root_dir, scene_name), # String
"O_to_L_pose": first_O_to_first_L_pose,
}
return data_item
def __len__(self):
return len(self.datalist)
# -------------- Debug ---------------- #
if __name__ == "__main__":
import torch
seed = 0
torch.manual_seed(seed)
np.random.seed(seed)
config = {
"root_dir": "/data/hofee/data/new_full_data",
"source": "seq_reconstruction_dataset",
"split_file": "/data/hofee/data/sample.txt",
"load_from_preprocess": True,
"ratio": 0.5,
"batch_size": 2,
"filter_degree": 75,
"num_workers": 0,
"pts_num": 4096,
"type": namespace.Mode.TRAIN,
}
ds = SeqReconstructionDataset(config)
print(len(ds))
print(ds.__getitem__(10))

View File

@@ -27,6 +27,7 @@ class Inferencer(Runner):
self.script_path = ConfigManager.get(namespace.Stereotype.RUNNER, "blender_script_path") self.script_path = ConfigManager.get(namespace.Stereotype.RUNNER, "blender_script_path")
self.output_dir = ConfigManager.get(namespace.Stereotype.RUNNER, "output_dir") self.output_dir = ConfigManager.get(namespace.Stereotype.RUNNER, "output_dir")
self.voxel_size = ConfigManager.get(namespace.Stereotype.RUNNER, "voxel_size")
''' Pipeline ''' ''' Pipeline '''
self.pipeline_name = self.config[namespace.Stereotype.PIPELINE] self.pipeline_name = self.config[namespace.Stereotype.PIPELINE]
self.pipeline:torch.nn.Module = ComponentFactory.create(namespace.Stereotype.PIPELINE, self.pipeline_name) self.pipeline:torch.nn.Module = ComponentFactory.create(namespace.Stereotype.PIPELINE, self.pipeline_name)
@@ -65,16 +66,11 @@ class Inferencer(Runner):
for dataset_idx, test_set in enumerate(self.test_set_list): for dataset_idx, test_set in enumerate(self.test_set_list):
status_manager.set_progress("inference", "inferencer", f"dataset", dataset_idx, len(self.test_set_list)) status_manager.set_progress("inference", "inferencer", f"dataset", dataset_idx, len(self.test_set_list))
test_set_name = test_set.get_name() test_set_name = test_set.get_name()
test_loader = test_set.get_loader()
if test_loader.batch_size > 1: total=int(len(test_set))
Log.error("Batch size should be 1 for inference, found {} in {}".format(test_loader.batch_size, test_set_name), terminate=True) for i in range(total):
data = test_set.__getitem__(i)
total=int(len(test_loader))
loop = tqdm(enumerate(test_loader), total=total)
for i, data in loop:
status_manager.set_progress("inference", "inferencer", f"Batch[{test_set_name}]", i+1, total) status_manager.set_progress("inference", "inferencer", f"Batch[{test_set_name}]", i+1, total)
test_set.process_batch(data, self.device)
output = self.predict_sequence(data) output = self.predict_sequence(data)
self.save_inference_result(test_set_name, data["scene_name"][0], output) self.save_inference_result(test_set_name, data["scene_name"][0], output)
@@ -88,26 +84,23 @@ class Inferencer(Runner):
''' data for rendering ''' ''' data for rendering '''
scene_path = data["scene_path"][0] scene_path = data["scene_path"][0]
O_to_L_pose = data["O_to_L_pose"][0] O_to_L_pose = data["O_to_L_pose"][0]
voxel_threshold = data["voxel_threshold"][0] voxel_threshold = self.voxel_size
filter_degree = data["filter_degree"][0] filter_degree = 75
model_points_normals = data["model_points_normals"][0] down_sampled_model_pts = data["gt_pts"]
model_pts = model_points_normals[:,:3] import ipdb; ipdb.set_trace()
down_sampled_model_pts = PtsUtil.voxel_downsample_point_cloud(model_pts, voxel_threshold) first_frame_to_world_9d = data["first_scanned_n_to_world_pose_9d"][0]
first_frame_to_world_9d = data["first_to_world_9d"][0] first_frame_to_world = np.eye(4)
first_frame_to_world = torch.eye(4, device=first_frame_to_world_9d.device) first_frame_to_world[:3,:3] = PoseUtil.rotation_6d_to_matrix_numpy(first_frame_to_world_9d[:6])
first_frame_to_world[:3,:3] = PoseUtil.rotation_6d_to_matrix_tensor_batch(first_frame_to_world_9d[:,:6])[0] first_frame_to_world[:3,3] = first_frame_to_world_9d[6:]
first_frame_to_world[:3,3] = first_frame_to_world_9d[0,6:]
first_frame_to_world = first_frame_to_world.to(self.device)
''' data for inference ''' ''' data for inference '''
input_data = {} input_data = {}
input_data["scanned_pts"] = [data["first_pts"][0].to(self.device)] input_data["combined_scanned_pts"] = torch.tensor(data["first_scanned_pts"][0], dtype=torch.float32).to(self.device)
input_data["scanned_n_to_world_pose_9d"] = [data["first_to_world_9d"][0].to(self.device)] input_data["scanned_n_to_world_pose_9d"] = [torch.tensor(data["first_scanned_n_to_world_pose_9d"], dtype=torch.float32).to(self.device)]
input_data["mode"] = namespace.Mode.TEST input_data["mode"] = namespace.Mode.TEST
input_data["combined_scanned_pts"] = data["combined_scanned_pts"] input_pts_N = input_data["combined_scanned_pts"].shape[1]
input_pts_N = input_data["scanned_pts"][0].shape[1]
first_frame_target_pts, _ = RenderUtil.render_pts(first_frame_to_world, scene_path, self.script_path, model_points_normals, voxel_threshold=voxel_threshold, filter_degree=filter_degree, nO_to_nL_pose=O_to_L_pose) first_frame_target_pts, _ = RenderUtil.render_pts(first_frame_to_world, scene_path, self.script_path, down_sampled_model_pts, voxel_threshold=voxel_threshold, filter_degree=filter_degree, nO_to_nL_pose=O_to_L_pose)
scanned_view_pts = [first_frame_target_pts] scanned_view_pts = [first_frame_target_pts]
last_pred_cr = self.compute_coverage_rate(scanned_view_pts, None, down_sampled_model_pts, threshold=voxel_threshold) last_pred_cr = self.compute_coverage_rate(scanned_view_pts, None, down_sampled_model_pts, threshold=voxel_threshold)

View File

@@ -14,38 +14,16 @@ class PtsUtil:
downsampled_points = point_cloud[idx_unique] downsampled_points = point_cloud[idx_unique]
return downsampled_points, idx_unique return downsampled_points, idx_unique
else: else:
import ipdb; ipdb.set_trace() unique_voxels = np.unique(voxel_indices, axis=0, return_inverse=True)
unique_voxels = np.unique(voxel_indices, axis=0, return_inverse=False) return unique_voxels[0]*voxel_size
return unique_voxels*voxel_size
@staticmethod
def voxel_downsample_point_cloud_o3d(point_cloud, voxel_size=0.005):
pcd = o3d.geometry.PointCloud()
pcd.points = o3d.utility.Vector3dVector(point_cloud)
pcd = pcd.voxel_down_sample(voxel_size)
return np.asarray(pcd.points)
@staticmethod @staticmethod
def voxel_downsample_point_cloud_and_trace_o3d(point_cloud, voxel_size=0.005): def random_downsample_point_cloud(point_cloud, num_points, require_idx=False):
pcd = o3d.geometry.PointCloud()
pcd.points = o3d.utility.Vector3dVector(point_cloud)
max_bound = pcd.get_max_bound()
min_bound = pcd.get_min_bound()
pcd = pcd.voxel_down_sample_and_trace(voxel_size, max_bound, min_bound, True)
return np.asarray(pcd.points)
@staticmethod
def random_downsample_point_cloud(point_cloud, num_points, require_idx=False, replace=True):
if point_cloud.shape[0] == 0: if point_cloud.shape[0] == 0:
if require_idx: if require_idx:
return point_cloud, np.array([]) return point_cloud, np.array([])
return point_cloud return point_cloud
if not replace and num_points > len(point_cloud): idx = np.random.choice(len(point_cloud), num_points, replace=True)
if require_idx:
return point_cloud, np.arange(len(point_cloud))
return point_cloud
idx = np.random.choice(len(point_cloud), num_points, replace=replace)
if require_idx: if require_idx:
return point_cloud[idx], idx return point_cloud[idx], idx
return point_cloud[idx] return point_cloud[idx]

View File

@@ -10,7 +10,7 @@ from utils.pts import PtsUtil
class RenderUtil: class RenderUtil:
@staticmethod @staticmethod
def render_pts(cam_pose, scene_path, script_path, model_points_normals, voxel_threshold=0.005, filter_degree=75, nO_to_nL_pose=None, require_full_scene=False): def render_pts(cam_pose, scene_path, script_path, voxel_threshold=0.005, filter_degree=75, nO_to_nL_pose=None, require_full_scene=False):
nO_to_world_pose = DataLoadUtil.get_real_cam_O_from_cam_L(cam_pose, nO_to_nL_pose, scene_path=scene_path) nO_to_world_pose = DataLoadUtil.get_real_cam_O_from_cam_L(cam_pose, nO_to_nL_pose, scene_path=scene_path)
@@ -34,10 +34,10 @@ class RenderUtil:
return None return None
path = os.path.join(temp_dir, "tmp") path = os.path.join(temp_dir, "tmp")
point_cloud = DataLoadUtil.get_target_point_cloud_world_from_path(path, binocular=True) point_cloud = DataLoadUtil.get_target_point_cloud_world_from_path(path, binocular=True)
normals = DataLoadUtil.get_target_normals_world_from_path(path, binocular=True)
cam_params = DataLoadUtil.load_cam_info(path, binocular=True) cam_params = DataLoadUtil.load_cam_info(path, binocular=True)
''' TODO: old code: filter_points api is changed, need to update the code ''' filtered_point_cloud = PtsUtil.filter_points(point_cloud, normals, cam_pose=cam_params["cam_to_world"], voxel_size=voxel_threshold, theta=filter_degree)
filtered_point_cloud = PtsUtil.filter_points(point_cloud, model_points_normals, cam_pose=cam_params["cam_to_world"], voxel_size=voxel_threshold, theta=filter_degree)
full_scene_point_cloud = None full_scene_point_cloud = None
if require_full_scene: if require_full_scene:
depth_L, depth_R = DataLoadUtil.load_depth(path, cam_params['near_plane'], cam_params['far_plane'], binocular=True) depth_L, depth_R = DataLoadUtil.load_depth(path, cam_params['near_plane'], cam_params['far_plane'], binocular=True)