24 Commits

Author SHA1 Message Date
91cabec977 deploy pointnet++ 2024-12-28 10:01:43 +00:00
445e9dc00b Merge branch 'ab_global_only' of https://git.hofee.top/hofee/nbv_reconstruction into ab_global_only 2024-12-26 08:36:59 +00:00
6ce3760471 upd 2024-12-26 08:21:57 +00:00
47624f12cf inference on YCB 2024-12-04 14:52:23 +08:00
501975457f fix overlap 2024-12-02 19:15:48 +08:00
155b655938 upd 2024-11-25 09:41:28 +00:00
2c8ef20321 upd ab_global_only 2024-11-20 15:24:45 +08:00
hofee
493639287e update calculating pts_num in inference.py 2024-11-07 19:42:44 +08:00
hofee
6a608ea74b upd inference_server 2024-11-06 20:07:33 +08:00
hofee
6f427785b3 upd inference 2024-11-05 12:17:20 -06:00
hofee
5bcd0fc6e3 upd 2024-11-04 23:49:12 +08:00
hofee
2b7243d1be upd infernce 2024-11-04 17:17:54 +08:00
04d3a359e1 upd 2024-11-02 21:54:46 +00:00
287983277a global: debug inference 2024-11-01 22:51:16 +00:00
982a3b9b60 global: inference debug 2024-11-01 21:58:44 +00:00
ecd4cfa806 global: debug inference 2024-11-01 15:47:11 +00:00
985a08d89c global: upd inference 2024-11-01 08:43:13 +00:00
b221036e8b global: upd 2024-10-31 16:02:26 +00:00
097712c0ea global_only: ratio2 2024-10-30 15:58:32 +00:00
a954ed0998 global_only: ratio2 2024-10-30 15:49:59 +00:00
f5f8e4266f global_only: ratio 2024-10-30 15:49:11 +00:00
8a05b7883d global_only: train 2024-10-30 15:46:15 +00:00
e23697eb87 global_only: debug 2024-10-29 16:21:30 +00:00
2487039445 global_only: config 2024-10-29 12:18:51 +00:00
46 changed files with 3363 additions and 348 deletions

View File

@@ -1,5 +1,6 @@
from PytorchBoot.application import PytorchBootApplication
from runners.inferencer import Inferencer
from runners.inference_server import InferencerServer
@PytorchBootApplication("inference")
class InferenceApp:
@@ -14,3 +15,17 @@ class InferenceApp:
Evaluator("path_to_your_eval_config").run()
'''
Inferencer("./configs/local/inference_config.yaml").run()
@PytorchBootApplication("server")
class InferenceServerApp:
@staticmethod
def start():
'''
call default or your custom runners here, code will be executed
automatically when type "pytorch-boot run" or "ptb run" in terminal
example:
Trainer("path_to_your_train_config").run()
Evaluator("path_to_your_eval_config").run()
'''
InferencerServer("./configs/server/server_inference_server_config.yaml").run()

View File

@@ -1,76 +1,72 @@
runner:
general:
seed: 1
seed: 0
device: cuda
cuda_visible_devices: "0,1,2,3,4,5,6,7"
experiment:
name: w_gf_wo_lf_full
name: train_ab_global_only
root_dir: "experiments"
epoch: 1 # -1 stands for last epoch
epoch: -1 # -1 stands for last epoch
test:
dataset_list:
- OmniObject3d_train
- OmniObject3d_test
blender_script_path: "/media/hofee/data/project/python/nbv_reconstruction/blender/data_renderer.py"
output_dir: "/media/hofee/data/project/python/nbv_reconstruction/nbv_reconstruction/test/inference_global_full_on_testset"
pipeline: nbv_reconstruction_global_pts_pipeline
output_dir: "/media/hofee/data/results/nbv_rec_inference/global_only_ycb_241204"
pipeline: nbv_reconstruction_pipeline
voxel_size: 0.003
min_new_area: 1.0
dataset:
OmniObject3d_train:
root_dir: "/media/hofee/repository/nbv_reconstruction_data_512"
model_dir: "/media/hofee/data/data/scaled_object_meshes"
source: seq_nbv_reconstruction_dataset
split_file: "/media/hofee/data/project/python/nbv_reconstruction/nbv_reconstruction/test/test_set_list.txt"
# OmniObject3d_train:
# root_dir: "C:\\Document\\Datasets\\inference_test1"
# model_dir: "C:\\Document\\Datasets\\scaled_object_meshes"
# source: seq_reconstruction_dataset_preprocessed
# split_file: "C:\\Document\\Datasets\\data_list\\sample.txt"
# type: test
# filter_degree: 75
# ratio: 1
# batch_size: 1
# num_workers: 12
# pts_num: 8192
# load_from_preprocess: True
OmniObject3d_test:
root_dir: "/media/hofee/data/results/ycb_preprocessed_dataset"
model_dir: "/media/hofee/data/data/ycb_obj"
source: seq_reconstruction_dataset_preprocessed
# split_file: "C:\\Document\\Datasets\\data_list\\OmniObject3d_test.txt"
type: test
filter_degree: 75
ratio: 1
eval_list:
- pose_diff
- coverage_rate_increase
ratio: 0.1
batch_size: 1
num_workers: 12
pts_num: 4096
load_from_preprocess: False
pts_num: 8192
load_from_preprocess: True
pipeline:
nbv_reconstruction_local_pts_pipeline:
nbv_reconstruction_pipeline:
modules:
pts_encoder: pointnet_encoder
seq_encoder: transformer_seq_encoder
pose_encoder: pose_encoder
view_finder: gf_view_finder
eps: 1e-5
global_scanned_feat: False
nbv_reconstruction_global_pts_pipeline:
modules:
pts_encoder: pointnet_encoder
pose_seq_encoder: transformer_pose_seq_encoder
pose_encoder: pose_encoder
view_finder: gf_view_finder
eps: 1e-5
global_scanned_feat: True
module:
pointnet_encoder:
in_dim: 3
out_dim: 1024
global_feat: True
feature_transform: False
transformer_seq_encoder:
pts_embed_dim: 1024
pose_embed_dim: 256
num_heads: 4
ffn_dim: 256
num_layers: 3
output_dim: 2048
transformer_pose_seq_encoder:
pose_embed_dim: 256
embed_dim: 256
num_heads: 4
ffn_dim: 256
num_layers: 3
@@ -86,7 +82,8 @@ module:
sample_mode: ode
sampling_steps: 500
sde_mode: ve
pose_encoder:
pose_dim: 9
out_dim: 256
pts_num_encoder:
out_dim: 64

View File

@@ -22,6 +22,6 @@ runner:
datasets:
OmniObject3d:
root_dir: /data/hofee/nbv_rec_part2_preprocessed
from: 155
to: 165 # ..-1 means end
root_dir: /media/hofee/data/results/ycb_view_data
from: 0
to: -1 # ..-1 means end

View File

@@ -8,11 +8,11 @@ runner:
root_dir: experiments
generate:
port: 5002
from: 600
to: -1 # -1 means all
object_dir: /media/hofee/data/data/object_meshes_part1
table_model_path: "/media/hofee/data/data/others/table.obj"
output_dir: /media/hofee/repository/data_part_1
from: 1
to: 50 # -1 means all
object_dir: /media/hofee/data/data/ycb_obj
table_model_path: /media/hofee/data/data/others/table.obj
output_dir: /media/hofee/data/results/ycb_view_data
binocular_vision: true
plane_size: 10
max_views: 512

View File

@@ -0,0 +1,53 @@
runner:
general:
seed: 0
device: cuda
cuda_visible_devices: "0,1,2,3,4,5,6,7"
experiment:
name: train_ab_global_only
root_dir: "experiments"
epoch: -1 # -1 stands for last epoch
pipeline: nbv_reconstruction_pipeline
voxel_size: 0.003
pipeline:
nbv_reconstruction_pipeline:
modules:
pts_encoder: pointnet_encoder
seq_encoder: transformer_seq_encoder
pose_encoder: pose_encoder
view_finder: gf_view_finder
eps: 1e-5
global_scanned_feat: True
module:
pointnet_encoder:
in_dim: 3
out_dim: 1024
global_feat: True
feature_transform: False
transformer_seq_encoder:
embed_dim: 256
num_heads: 4
ffn_dim: 256
num_layers: 3
output_dim: 1024
gf_view_finder:
t_feat_dim: 128
pose_feat_dim: 256
main_feat_dim: 2048
regression_head: Rx_Ry_and_T
pose_mode: rot_matrix
per_point_feature: False
sample_mode: ode
sampling_steps: 500
sde_mode: ve
pose_encoder:
pose_dim: 9
out_dim: 256
pts_num_encoder:
out_dim: 64

View File

@@ -7,13 +7,13 @@ runner:
parallel: False
experiment:
name: overfit_ab_global_and_local
name: train_ab_global_only
root_dir: "experiments"
use_checkpoint: False
use_checkpoint: True
epoch: -1 # -1 stands for last epoch
max_epochs: 5000
save_checkpoint_interval: 1
test_first: False
test_first: True
train:
optimizer:
@@ -25,53 +25,53 @@ runner:
test:
frequency: 3 # test frequency
dataset_list:
#- OmniObject3d_test
- OmniObject3d_test
- OmniObject3d_val
pipeline: nbv_reconstruction_pipeline
dataset:
OmniObject3d_train:
root_dir: "/data/hofee/nbv_rec_part2_preprocessed"
root_dir: "/data/hofee/data/new_full_data"
model_dir: "../data/scaled_object_meshes"
source: nbv_reconstruction_dataset
split_file: "/data/hofee/data/sample.txt"
split_file: "/data/hofee/data/new_full_data_list/OmniObject3d_train.txt"
type: train
cache: True
ratio: 1
batch_size: 32
num_workers: 16
batch_size: 80
num_workers: 128
pts_num: 8192
load_from_preprocess: True
OmniObject3d_test:
root_dir: "/data/hofee/nbv_rec_part2_preprocessed"
root_dir: "/data/hofee/data/new_full_data"
model_dir: "../data/scaled_object_meshes"
source: nbv_reconstruction_dataset
split_file: "/data/hofee/data/sample.txt"
split_file: "/data/hofee/data/new_full_data_list/OmniObject3d_test.txt"
type: test
cache: True
filter_degree: 75
eval_list:
- pose_diff
ratio: 1
batch_size: 32
batch_size: 80
num_workers: 12
pts_num: 8192
load_from_preprocess: True
OmniObject3d_val:
root_dir: "/data/hofee/nbv_rec_part2_preprocessed"
root_dir: "/data/hofee/data/new_full_data"
model_dir: "../data/scaled_object_meshes"
source: nbv_reconstruction_dataset
split_file: "/data/hofee/data/sample.txt"
split_file: "/data/hofee/data/new_full_data_list/OmniObject3d_train.txt"
type: test
cache: True
filter_degree: 75
eval_list:
- pose_diff
ratio: 1
batch_size: 32
ratio: 0.1
batch_size: 80
num_workers: 12
pts_num: 8192
load_from_preprocess: True
@@ -92,21 +92,21 @@ module:
pointnet_encoder:
in_dim: 3
out_dim: 512
out_dim: 1024
global_feat: True
feature_transform: False
transformer_seq_encoder:
embed_dim: 768
embed_dim: 256
num_heads: 4
ffn_dim: 256
num_layers: 3
output_dim: 2048
output_dim: 1024
gf_view_finder:
t_feat_dim: 128
pose_feat_dim: 256
main_feat_dim: 2560
main_feat_dim: 2048
regression_head: Rx_Ry_and_T
pose_mode: rot_matrix
per_point_feature: False

View File

@@ -34,7 +34,7 @@ class NBVReconstructionDataset(BaseDataset):
#self.model_dir = config["model_dir"]
self.filter_degree = config["filter_degree"]
if self.type == namespace.Mode.TRAIN:
scale_ratio = 50
scale_ratio = 1
self.datalist = self.datalist*scale_ratio
if self.cache:
expr_root = ConfigManager.get("runner", "experiment", "root_dir")
@@ -206,11 +206,9 @@ class NBVReconstructionDataset(BaseDataset):
collate_data["combined_scanned_pts"] = torch.stack(
[torch.tensor(item["combined_scanned_pts"]) for item in batch]
)
for key in batch[0].keys():
if key not in [
"scanned_pts",
"scanned_pts_mask",
"scanned_n_to_world_pose_9d",
"best_to_world_pose_9d",
"combined_scanned_pts",

154
core/old_seq_dataset.py Normal file
View File

@@ -0,0 +1,154 @@
import numpy as np
from PytorchBoot.dataset import BaseDataset
import PytorchBoot.namespace as namespace
import PytorchBoot.stereotype as stereotype
from PytorchBoot.utils.log_util import Log
import torch
import os
import sys
sys.path.append(r"/home/data/hofee/project/nbv_rec/nbv_reconstruction")
from utils.data_load import DataLoadUtil
from utils.pose import PoseUtil
from utils.pts import PtsUtil
@stereotype.dataset("old_seq_nbv_reconstruction_dataset")
class SeqNBVReconstructionDataset(BaseDataset):
def __init__(self, config):
super(SeqNBVReconstructionDataset, self).__init__(config)
self.type = config["type"]
if self.type != namespace.Mode.TEST:
Log.error("Dataset <seq_nbv_reconstruction_dataset> Only support test mode", terminate=True)
self.config = config
self.root_dir = config["root_dir"]
self.split_file_path = config["split_file"]
self.scene_name_list = self.load_scene_name_list()
self.datalist = self.get_datalist()
self.pts_num = config["pts_num"]
self.model_dir = config["model_dir"]
self.filter_degree = config["filter_degree"]
self.load_from_preprocess = config.get("load_from_preprocess", False)
def load_scene_name_list(self):
scene_name_list = []
with open(self.split_file_path, "r") as f:
for line in f:
scene_name = line.strip()
scene_name_list.append(scene_name)
return scene_name_list
def get_datalist(self):
datalist = []
for scene_name in self.scene_name_list:
seq_num = DataLoadUtil.get_label_num(self.root_dir, scene_name)
scene_max_coverage_rate = 0
scene_max_cr_idx = 0
for seq_idx in range(seq_num):
label_path = DataLoadUtil.get_label_path(self.root_dir, scene_name, seq_idx)
label_data = DataLoadUtil.load_label(label_path)
max_coverage_rate = label_data["max_coverage_rate"]
if max_coverage_rate > scene_max_coverage_rate:
scene_max_coverage_rate = max_coverage_rate
scene_max_cr_idx = seq_idx
label_path = DataLoadUtil.get_label_path(self.root_dir, scene_name, scene_max_cr_idx)
label_data = DataLoadUtil.load_label(label_path)
first_frame = label_data["best_sequence"][0]
best_seq_len = len(label_data["best_sequence"])
datalist.append({
"scene_name": scene_name,
"first_frame": first_frame,
"max_coverage_rate": scene_max_coverage_rate,
"best_seq_len": best_seq_len,
"label_idx": scene_max_cr_idx,
})
return datalist
def __getitem__(self, index):
data_item_info = self.datalist[index]
first_frame_idx = data_item_info["first_frame"][0]
first_frame_coverage = data_item_info["first_frame"][1]
max_coverage_rate = data_item_info["max_coverage_rate"]
scene_name = data_item_info["scene_name"]
first_cam_info = DataLoadUtil.load_cam_info(DataLoadUtil.get_path(self.root_dir, scene_name, first_frame_idx), binocular=True)
first_view_path = DataLoadUtil.get_path(self.root_dir, scene_name, first_frame_idx)
first_left_cam_pose = first_cam_info["cam_to_world"]
first_center_cam_pose = first_cam_info["cam_to_world_O"]
first_target_point_cloud = DataLoadUtil.load_from_preprocessed_pts(first_view_path)
first_pts_num = first_target_point_cloud.shape[0]
first_downsampled_target_point_cloud = PtsUtil.random_downsample_point_cloud(first_target_point_cloud, self.pts_num)
first_to_world_rot_6d = PoseUtil.matrix_to_rotation_6d_numpy(np.asarray(first_left_cam_pose[:3,:3]))
first_to_world_trans = first_left_cam_pose[:3,3]
first_to_world_9d = np.concatenate([first_to_world_rot_6d, first_to_world_trans], axis=0)
diag = DataLoadUtil.get_bbox_diag(self.model_dir, scene_name)
voxel_threshold = diag*0.02
first_O_to_first_L_pose = np.dot(np.linalg.inv(first_left_cam_pose), first_center_cam_pose)
scene_path = os.path.join(self.root_dir, scene_name)
model_points_normals = DataLoadUtil.load_points_normals(self.root_dir, scene_name)
data_item = {
"first_pts_num": np.asarray(
first_pts_num, dtype=np.int32
),
"first_pts": np.asarray([first_downsampled_target_point_cloud],dtype=np.float32),
"combined_scanned_pts": np.asarray(first_downsampled_target_point_cloud,dtype=np.float32),
"first_to_world_9d": np.asarray([first_to_world_9d],dtype=np.float32),
"scene_name": scene_name,
"max_coverage_rate": max_coverage_rate,
"voxel_threshold": voxel_threshold,
"filter_degree": self.filter_degree,
"O_to_L_pose": first_O_to_first_L_pose,
"first_frame_coverage": first_frame_coverage,
"scene_path": scene_path,
"model_points_normals": model_points_normals,
"best_seq_len": data_item_info["best_seq_len"],
"first_frame_id": first_frame_idx,
}
return data_item
def __len__(self):
return len(self.datalist)
def get_collate_fn(self):
def collate_fn(batch):
collate_data = {}
collate_data["first_pts"] = [torch.tensor(item['first_pts']) for item in batch]
collate_data["first_to_world_9d"] = [torch.tensor(item['first_to_world_9d']) for item in batch]
collate_data["combined_scanned_pts"] = torch.stack([torch.tensor(item['combined_scanned_pts']) for item in batch])
for key in batch[0].keys():
if key not in ["first_pts", "first_to_world_9d", "combined_scanned_pts"]:
collate_data[key] = [item[key] for item in batch]
return collate_data
return collate_fn
# -------------- Debug ---------------- #
if __name__ == "__main__":
import torch
seed = 0
torch.manual_seed(seed)
np.random.seed(seed)
config = {
"root_dir": "/home/data/hofee/project/nbv_rec/data/nbv_rec_data_512_preproc_npy",
"split_file": "/home/data/hofee/project/nbv_rec/data/OmniObject3d_train.txt",
"model_dir": "/home/data/hofee/project/nbv_rec/data/scaled_object_meshes",
"ratio": 0.005,
"batch_size": 2,
"filter_degree": 75,
"num_workers": 0,
"pts_num": 32684,
"type": namespace.Mode.TEST,
"load_from_preprocess": True
}
ds = SeqNBVReconstructionDataset(config)
print(len(ds))
#ds.__getitem__(10)
dl = ds.get_loader(shuffle=True)
for idx, data in enumerate(dl):
data = ds.process_batch(data, "cuda:0")
print(data)
# ------ Debug Start ------
import ipdb;ipdb.set_trace()
# ------ Debug End ------+

View File

@@ -29,7 +29,6 @@ class NBVReconstructionPipeline(nn.Module):
self.eps = float(self.config["eps"])
self.enable_global_scanned_feat = self.config["global_scanned_feat"]
def forward(self, data):
mode = data["mode"]
@@ -55,10 +54,7 @@ class NBVReconstructionPipeline(nn.Module):
return perturbed_x, random_t, target_score, std
def forward_train(self, data):
start_time = time.time()
main_feat = self.get_main_feat(data)
end_time = time.time()
print("get_main_feat time: ", end_time - start_time)
""" get std """
best_to_world_pose_9d_batch = data["best_to_world_pose_9d"]
perturbed_x, random_t, target_score, std = self.pertube_data(
@@ -92,9 +88,7 @@ class NBVReconstructionPipeline(nn.Module):
scanned_n_to_world_pose_9d_batch = data[
"scanned_n_to_world_pose_9d"
] # List(B): Tensor(S x 9)
scanned_pts_batch = data[
"scanned_pts"
]
device = next(self.parameters()).device
embedding_list_batch = []
@@ -104,13 +98,11 @@ class NBVReconstructionPipeline(nn.Module):
combined_scanned_pts_batch, require_per_point_feat=False
) # global_scanned_feat: Tensor(B x Dg)
for scanned_n_to_world_pose_9d, scanned_pts in zip(scanned_n_to_world_pose_9d_batch, scanned_pts_batch):
for scanned_n_to_world_pose_9d in scanned_n_to_world_pose_9d_batch:
scanned_n_to_world_pose_9d = scanned_n_to_world_pose_9d.to(device) # Tensor(S x 9)
scanned_pts = scanned_pts.to(device) # Tensor(S x N x 3)
pose_feat_seq = self.pose_encoder.encode_pose(scanned_n_to_world_pose_9d) # Tensor(S x Dp)
pts_feat_seq = self.pts_encoder.encode_points(scanned_pts, require_per_point_feat=False) # Tensor(S x Dl)
seq_embedding = torch.cat([pose_feat_seq, pts_feat_seq], dim=-1) # Tensor(S x (Dp+Dl))
embedding_list_batch.append(seq_embedding) # List(B): Tensor(S x (Dp+Dl))
seq_embedding = pose_feat_seq
embedding_list_batch.append(seq_embedding) # List(B): Tensor(S x (Dp))
seq_feat = self.seq_encoder.encode_sequence(embedding_list_batch) # Tensor(B x Ds)
main_feat = torch.cat([seq_feat, global_scanned_feat], dim=-1) # Tensor(B x (Ds+Dg))

View File

@@ -1,154 +1,205 @@
import numpy as np
from PytorchBoot.dataset import BaseDataset
import PytorchBoot.namespace as namespace
import PytorchBoot.stereotype as stereotype
from PytorchBoot.utils.log_util import Log
import torch
import os
import sys
sys.path.append(r"/home/data/hofee/project/nbv_rec/nbv_reconstruction")
from utils.data_load import DataLoadUtil
from utils.pose import PoseUtil
from utils.pts import PtsUtil
@stereotype.dataset("seq_nbv_reconstruction_dataset")
class SeqNBVReconstructionDataset(BaseDataset):
def __init__(self, config):
super(SeqNBVReconstructionDataset, self).__init__(config)
self.type = config["type"]
if self.type != namespace.Mode.TEST:
Log.error("Dataset <seq_nbv_reconstruction_dataset> Only support test mode", terminate=True)
self.config = config
self.root_dir = config["root_dir"]
self.split_file_path = config["split_file"]
self.scene_name_list = self.load_scene_name_list()
self.datalist = self.get_datalist()
self.pts_num = config["pts_num"]
self.model_dir = config["model_dir"]
self.filter_degree = config["filter_degree"]
self.load_from_preprocess = config.get("load_from_preprocess", False)
def load_scene_name_list(self):
scene_name_list = []
with open(self.split_file_path, "r") as f:
for line in f:
scene_name = line.strip()
scene_name_list.append(scene_name)
return scene_name_list
def get_datalist(self):
datalist = []
for scene_name in self.scene_name_list:
seq_num = DataLoadUtil.get_label_num(self.root_dir, scene_name)
scene_max_coverage_rate = 0
scene_max_cr_idx = 0
for seq_idx in range(seq_num):
label_path = DataLoadUtil.get_label_path(self.root_dir, scene_name, seq_idx)
label_data = DataLoadUtil.load_label(label_path)
max_coverage_rate = label_data["max_coverage_rate"]
if max_coverage_rate > scene_max_coverage_rate:
scene_max_coverage_rate = max_coverage_rate
scene_max_cr_idx = seq_idx
label_path = DataLoadUtil.get_label_path(self.root_dir, scene_name, scene_max_cr_idx)
label_data = DataLoadUtil.load_label(label_path)
first_frame = label_data["best_sequence"][0]
best_seq_len = len(label_data["best_sequence"])
datalist.append({
"scene_name": scene_name,
"first_frame": first_frame,
"max_coverage_rate": scene_max_coverage_rate,
"best_seq_len": best_seq_len,
"label_idx": scene_max_cr_idx,
})
return datalist
def __getitem__(self, index):
data_item_info = self.datalist[index]
first_frame_idx = data_item_info["first_frame"][0]
first_frame_coverage = data_item_info["first_frame"][1]
max_coverage_rate = data_item_info["max_coverage_rate"]
scene_name = data_item_info["scene_name"]
first_cam_info = DataLoadUtil.load_cam_info(DataLoadUtil.get_path(self.root_dir, scene_name, first_frame_idx), binocular=True)
first_view_path = DataLoadUtil.get_path(self.root_dir, scene_name, first_frame_idx)
first_left_cam_pose = first_cam_info["cam_to_world"]
first_center_cam_pose = first_cam_info["cam_to_world_O"]
first_target_point_cloud = DataLoadUtil.load_from_preprocessed_pts(first_view_path)
first_pts_num = first_target_point_cloud.shape[0]
first_downsampled_target_point_cloud = PtsUtil.random_downsample_point_cloud(first_target_point_cloud, self.pts_num)
first_to_world_rot_6d = PoseUtil.matrix_to_rotation_6d_numpy(np.asarray(first_left_cam_pose[:3,:3]))
first_to_world_trans = first_left_cam_pose[:3,3]
first_to_world_9d = np.concatenate([first_to_world_rot_6d, first_to_world_trans], axis=0)
diag = DataLoadUtil.get_bbox_diag(self.model_dir, scene_name)
voxel_threshold = diag*0.02
first_O_to_first_L_pose = np.dot(np.linalg.inv(first_left_cam_pose), first_center_cam_pose)
scene_path = os.path.join(self.root_dir, scene_name)
model_points_normals = DataLoadUtil.load_points_normals(self.root_dir, scene_name)
data_item = {
"first_pts_num": np.asarray(
first_pts_num, dtype=np.int32
),
"first_pts": np.asarray([first_downsampled_target_point_cloud],dtype=np.float32),
"combined_scanned_pts": np.asarray(first_downsampled_target_point_cloud,dtype=np.float32),
"first_to_world_9d": np.asarray([first_to_world_9d],dtype=np.float32),
"scene_name": scene_name,
"max_coverage_rate": max_coverage_rate,
"voxel_threshold": voxel_threshold,
"filter_degree": self.filter_degree,
"O_to_L_pose": first_O_to_first_L_pose,
"first_frame_coverage": first_frame_coverage,
"scene_path": scene_path,
"model_points_normals": model_points_normals,
"best_seq_len": data_item_info["best_seq_len"],
"first_frame_id": first_frame_idx,
}
return data_item
def __len__(self):
return len(self.datalist)
def get_collate_fn(self):
def collate_fn(batch):
collate_data = {}
collate_data["first_pts"] = [torch.tensor(item['first_pts']) for item in batch]
collate_data["first_to_world_9d"] = [torch.tensor(item['first_to_world_9d']) for item in batch]
collate_data["combined_scanned_pts"] = torch.stack([torch.tensor(item['combined_scanned_pts']) for item in batch])
for key in batch[0].keys():
if key not in ["first_pts", "first_to_world_9d", "combined_scanned_pts"]:
collate_data[key] = [item[key] for item in batch]
return collate_data
return collate_fn
# -------------- Debug ---------------- #
if __name__ == "__main__":
import torch
seed = 0
torch.manual_seed(seed)
np.random.seed(seed)
config = {
"root_dir": "/home/data/hofee/project/nbv_rec/data/nbv_rec_data_512_preproc_npy",
"split_file": "/home/data/hofee/project/nbv_rec/data/OmniObject3d_train.txt",
"model_dir": "/home/data/hofee/project/nbv_rec/data/scaled_object_meshes",
"ratio": 0.005,
"batch_size": 2,
"filter_degree": 75,
"num_workers": 0,
"pts_num": 32684,
"type": namespace.Mode.TEST,
"load_from_preprocess": True
}
ds = SeqNBVReconstructionDataset(config)
print(len(ds))
#ds.__getitem__(10)
dl = ds.get_loader(shuffle=True)
for idx, data in enumerate(dl):
data = ds.process_batch(data, "cuda:0")
print(data)
# ------ Debug Start ------
import ipdb;ipdb.set_trace()
# ------ Debug End ------+
import numpy as np
from PytorchBoot.dataset import BaseDataset
import PytorchBoot.namespace as namespace
import PytorchBoot.stereotype as stereotype
from PytorchBoot.config import ConfigManager
from PytorchBoot.utils.log_util import Log
import torch
import os
import sys
sys.path.append(r"/media/hofee/data/project/python/nbv_reconstruction/nbv_reconstruction")
from utils.data_load import DataLoadUtil
from utils.pose import PoseUtil
from utils.pts import PtsUtil
@stereotype.dataset("seq_reconstruction_dataset")
class SeqReconstructionDataset(BaseDataset):
def __init__(self, config):
super(SeqReconstructionDataset, self).__init__(config)
self.config = config
self.root_dir = config["root_dir"]
self.split_file_path = config["split_file"]
self.scene_name_list = self.load_scene_name_list()
self.datalist = self.get_datalist()
self.pts_num = config["pts_num"]
self.type = config["type"]
self.cache = config.get("cache")
self.load_from_preprocess = config.get("load_from_preprocess", False)
if self.type == namespace.Mode.TEST:
#self.model_dir = config["model_dir"]
self.filter_degree = config["filter_degree"]
if self.type == namespace.Mode.TRAIN:
scale_ratio = 1
self.datalist = self.datalist*scale_ratio
if self.cache:
expr_root = ConfigManager.get("runner", "experiment", "root_dir")
expr_name = ConfigManager.get("runner", "experiment", "name")
self.cache_dir = os.path.join(expr_root, expr_name, "cache")
# self.preprocess_cache()
def load_scene_name_list(self):
scene_name_list = []
with open(self.split_file_path, "r") as f:
for line in f:
scene_name = line.strip()
if not os.path.exists(os.path.join(self.root_dir, scene_name)):
continue
scene_name_list.append(scene_name)
return scene_name_list
def get_scene_name_list(self):
return self.scene_name_list
def get_datalist(self):
datalist = []
total = len(self.scene_name_list)
for idx, scene_name in enumerate(self.scene_name_list):
print(f"processing {scene_name} ({idx}/{total})")
scene_max_cr_idx = 0
frame_len = DataLoadUtil.get_scene_seq_length(self.root_dir, scene_name)
for i in range(frame_len):
path = DataLoadUtil.get_path(self.root_dir, scene_name, i)
pts = DataLoadUtil.load_from_preprocessed_pts(path, "npy")
if pts.shape[0] == 0:
continue
datalist.append({
"scene_name": scene_name,
"first_frame": i,
"best_seq_len": -1,
"max_coverage_rate": 1.0,
"label_idx": scene_max_cr_idx,
})
return datalist
def preprocess_cache(self):
Log.info("preprocessing cache...")
for item_idx in range(len(self.datalist)):
self.__getitem__(item_idx)
Log.success("finish preprocessing cache.")
def load_from_cache(self, scene_name, curr_frame_idx):
cache_name = f"{scene_name}_{curr_frame_idx}.txt"
cache_path = os.path.join(self.cache_dir, cache_name)
if os.path.exists(cache_path):
data = np.loadtxt(cache_path)
return data
else:
return None
def save_to_cache(self, scene_name, curr_frame_idx, data):
cache_name = f"{scene_name}_{curr_frame_idx}.txt"
cache_path = os.path.join(self.cache_dir, cache_name)
try:
np.savetxt(cache_path, data)
except Exception as e:
Log.error(f"Save cache failed: {e}")
def seq_combined_pts(self, scene, frame_idx_list):
all_combined_pts = []
for i in frame_idx_list:
path = DataLoadUtil.get_path(self.root_dir, scene, i)
pts = DataLoadUtil.load_from_preprocessed_pts(path,"npy")
if pts.shape[0] == 0:
continue
all_combined_pts.append(pts)
all_combined_pts = np.vstack(all_combined_pts)
downsampled_all_pts = PtsUtil.voxel_downsample_point_cloud(all_combined_pts, 0.003)
return downsampled_all_pts
def __getitem__(self, index):
data_item_info = self.datalist[index]
max_coverage_rate = data_item_info["max_coverage_rate"]
best_seq_len = data_item_info["best_seq_len"]
scene_name = data_item_info["scene_name"]
(
scanned_views_pts,
scanned_coverages_rate,
scanned_n_to_world_pose,
) = ([], [], [])
view = data_item_info["first_frame"]
frame_idx = view
view_path = DataLoadUtil.get_path(self.root_dir, scene_name, frame_idx)
cam_info = DataLoadUtil.load_cam_info(view_path, binocular=True)
n_to_world_pose = cam_info["cam_to_world"]
target_point_cloud = (
DataLoadUtil.load_from_preprocessed_pts(view_path)
)
downsampled_target_point_cloud = PtsUtil.random_downsample_point_cloud(
target_point_cloud, self.pts_num
)
scanned_views_pts.append(downsampled_target_point_cloud)
n_to_world_6d = PoseUtil.matrix_to_rotation_6d_numpy(
np.asarray(n_to_world_pose[:3, :3])
)
first_left_cam_pose = cam_info["cam_to_world"]
first_center_cam_pose = cam_info["cam_to_world_O"]
first_O_to_first_L_pose = np.dot(np.linalg.inv(first_left_cam_pose), first_center_cam_pose)
n_to_world_trans = n_to_world_pose[:3, 3]
n_to_world_9d = np.concatenate([n_to_world_6d, n_to_world_trans], axis=0)
scanned_n_to_world_pose.append(n_to_world_9d)
frame_list = []
for i in range(DataLoadUtil.get_scene_seq_length(self.root_dir, scene_name)):
frame_list.append(i)
gt_pts = self.seq_combined_pts(scene_name, frame_list)
data_item = {
"first_scanned_pts": np.asarray(scanned_views_pts, dtype=np.float32), # Ndarray(S x Nv x 3)
"first_scanned_n_to_world_pose_9d": np.asarray(scanned_n_to_world_pose, dtype=np.float32), # Ndarray(S x 9)
"seq_max_coverage_rate": max_coverage_rate, # Float, range(0, 1)
"best_seq_len": best_seq_len, # Int
"scene_name": scene_name, # String
"gt_pts": gt_pts, # Ndarray(N x 3)
"scene_path": os.path.join(self.root_dir, scene_name), # String
"O_to_L_pose": first_O_to_first_L_pose,
}
return data_item
def __len__(self):
return len(self.datalist)
# -------------- Debug ---------------- #
if __name__ == "__main__":
import torch
from tqdm import tqdm
import pickle
import os
seed = 0
torch.manual_seed(seed)
np.random.seed(seed)
config = {
"root_dir": "/media/hofee/data/results/ycb_view_data",
"source": "seq_reconstruction_dataset",
"split_file": "/media/hofee/data/results/ycb_test.txt",
"load_from_preprocess": True,
"filter_degree": 75,
"num_workers": 0,
"pts_num": 8192,
"type": namespace.Mode.TEST,
}
output_dir = "/media/hofee/data/results/ycb_preprocessed_dataset"
os.makedirs(output_dir, exist_ok=True)
ds = SeqReconstructionDataset(config)
for i in tqdm(range(len(ds)), desc="processing dataset"):
output_path = os.path.join(output_dir, f"item_{i}.pkl")
item = ds.__getitem__(i)
for key, value in item.items():
if isinstance(value, np.ndarray):
item[key] = value.tolist()
#import ipdb; ipdb.set_trace()
with open(output_path, "wb") as f:
pickle.dump(item, f)

View File

@@ -0,0 +1,82 @@
import numpy as np
from PytorchBoot.dataset import BaseDataset
import PytorchBoot.namespace as namespace
import PytorchBoot.stereotype as stereotype
from PytorchBoot.config import ConfigManager
from PytorchBoot.utils.log_util import Log
import pickle
import torch
import os
import sys
sys.path.append(r"C:\Document\Local Project\nbv_rec\nbv_reconstruction")
from utils.data_load import DataLoadUtil
from utils.pose import PoseUtil
from utils.pts import PtsUtil
@stereotype.dataset("seq_reconstruction_dataset_preprocessed")
class SeqReconstructionDatasetPreprocessed(BaseDataset):
def __init__(self, config):
super(SeqReconstructionDatasetPreprocessed, self).__init__(config)
self.config = config
self.root_dir = config["root_dir"]
self.real_root_dir = r"/media/hofee/data/results/ycb_view_data"
self.item_list = os.listdir(self.root_dir)
def __getitem__(self, index):
data = pickle.load(open(os.path.join(self.root_dir, self.item_list[index]), "rb"))
data_item = {
"first_scanned_pts": np.asarray(data["first_scanned_pts"], dtype=np.float32), # Ndarray(S x Nv x 3)
"first_scanned_n_to_world_pose_9d": np.asarray(data["first_scanned_n_to_world_pose_9d"], dtype=np.float32), # Ndarray(S x 9)
"seq_max_coverage_rate": data["seq_max_coverage_rate"], # Float, range(0, 1)
"best_seq_len": data["best_seq_len"], # Int
"scene_name": data["scene_name"], # String
"gt_pts": np.asarray(data["gt_pts"], dtype=np.float32), # Ndarray(N x 3)
"scene_path": os.path.join(self.real_root_dir, data["scene_name"]), # String
"O_to_L_pose": np.asarray(data["O_to_L_pose"], dtype=np.float32),
}
return data_item
def __len__(self):
return len(self.item_list)
# -------------- Debug ---------------- #
if __name__ == "__main__":
import torch
seed = 0
torch.manual_seed(seed)
np.random.seed(seed)
'''
OmniObject3d_test:
root_dir: "H:\\AI\\Datasets\\packed_test_data"
model_dir: "H:\\AI\\Datasets\\scaled_object_meshes"
source: seq_reconstruction_dataset
split_file: "H:\\AI\\Datasets\\data_list\\OmniObject3d_test.txt"
type: test
filter_degree: 75
eval_list:
- pose_diff
- coverage_rate_increase
ratio: 0.1
batch_size: 1
num_workers: 12
pts_num: 8192
load_from_preprocess: True
'''
config = {
"root_dir": "H:\\AI\\Datasets\\packed_test_data",
"source": "seq_reconstruction_dataset",
"split_file": "H:\\AI\\Datasets\\data_list\\OmniObject3d_test.txt",
"load_from_preprocess": True,
"ratio": 1,
"filter_degree": 75,
"num_workers": 0,
"pts_num": 8192,
"type": "test",
}
ds = SeqReconstructionDataset(config)
print(len(ds))
print(ds.__getitem__(10))

View File

@@ -0,0 +1,4 @@
pointnet2/build/
pointnet2/dist/
pointnet2/pointnet2.egg-info/
__pycache__/

View File

@@ -0,0 +1,21 @@
MIT License
Copyright (c) 2019 Shaoshuai Shi
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@@ -0,0 +1,51 @@
# Pointnet2.PyTorch
* PyTorch implementation of [PointNet++](https://arxiv.org/abs/1706.02413) based on [erikwijmans/Pointnet2_PyTorch](https://github.com/erikwijmans/Pointnet2_PyTorch).
* Faster than the original codes by re-implementing the CUDA operations.
## Installation
### Requirements
* Linux (tested on Ubuntu 14.04/16.04)
* Python 3.6+
* PyTorch 1.0
### Install
Install this library by running the following command:
```shell
cd pointnet2
python setup.py install
cd ../
```
## Examples
Here I provide a simple example to use this library in the task of KITTI ourdoor foreground point cloud segmentation, and you could refer to the paper [PointRCNN](https://arxiv.org/abs/1812.04244) for the details of task description and foreground label generation.
1. Download the training data from [KITTI 3D object detection](http://www.cvlibs.net/datasets/kitti/eval_object.php?obj_benchmark=3d) website and organize the downloaded files as follows:
```
Pointnet2.PyTorch
├── pointnet2
├── tools
│ ├──data
│ │ ├── KITTI
│ │ │ ├── ImageSets
│ │ │ ├── object
│ │ │ │ ├──training
│ │ │ │ ├──calib & velodyne & label_2 & image_2
│ │ train_and_eval.py
```
2. Run the following command to train and evaluate:
```shell
cd tools
python train_and_eval.py --batch_size 8 --epochs 100 --ckpt_save_interval 2
```
## Project using this repo:
* [PointRCNN](https://github.com/sshaoshuai/PointRCNN): 3D object detector from raw point cloud.
## Acknowledgement
* [charlesq34/pointnet2](https://github.com/charlesq34/pointnet2): Paper author and official code repo.
* [erikwijmans/Pointnet2_PyTorch](https://github.com/erikwijmans/Pointnet2_PyTorch): Initial work of PyTorch implementation of PointNet++.

View File

@@ -0,0 +1,162 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
from . import pointnet2_utils
from . import pytorch_utils as pt_utils
from typing import List
class _PointnetSAModuleBase(nn.Module):
def __init__(self):
super().__init__()
self.npoint = None
self.groupers = None
self.mlps = None
self.pool_method = 'max_pool'
def forward(self, xyz: torch.Tensor, features: torch.Tensor = None, new_xyz=None) -> (torch.Tensor, torch.Tensor):
"""
:param xyz: (B, N, 3) tensor of the xyz coordinates of the features
:param features: (B, N, C) tensor of the descriptors of the the features
:param new_xyz:
:return:
new_xyz: (B, npoint, 3) tensor of the new features' xyz
new_features: (B, npoint, \sum_k(mlps[k][-1])) tensor of the new_features descriptors
"""
new_features_list = []
xyz_flipped = xyz.transpose(1, 2).contiguous()
if new_xyz is None:
new_xyz = pointnet2_utils.gather_operation(
xyz_flipped,
pointnet2_utils.furthest_point_sample(xyz, self.npoint)
).transpose(1, 2).contiguous() if self.npoint is not None else None
for i in range(len(self.groupers)):
new_features = self.groupers[i](xyz, new_xyz, features) # (B, C, npoint, nsample)
new_features = self.mlps[i](new_features) # (B, mlp[-1], npoint, nsample)
if self.pool_method == 'max_pool':
new_features = F.max_pool2d(
new_features, kernel_size=[1, new_features.size(3)]
) # (B, mlp[-1], npoint, 1)
elif self.pool_method == 'avg_pool':
new_features = F.avg_pool2d(
new_features, kernel_size=[1, new_features.size(3)]
) # (B, mlp[-1], npoint, 1)
else:
raise NotImplementedError
new_features = new_features.squeeze(-1) # (B, mlp[-1], npoint)
new_features_list.append(new_features)
return new_xyz, torch.cat(new_features_list, dim=1)
class PointnetSAModuleMSG(_PointnetSAModuleBase):
"""Pointnet set abstraction layer with multiscale grouping"""
def __init__(self, *, npoint: int, radii: List[float], nsamples: List[int], mlps: List[List[int]], bn: bool = True,
use_xyz: bool = True, pool_method='max_pool', instance_norm=False):
"""
:param npoint: int
:param radii: list of float, list of radii to group with
:param nsamples: list of int, number of samples in each ball query
:param mlps: list of list of int, spec of the pointnet before the global pooling for each scale
:param bn: whether to use batchnorm
:param use_xyz:
:param pool_method: max_pool / avg_pool
:param instance_norm: whether to use instance_norm
"""
super().__init__()
assert len(radii) == len(nsamples) == len(mlps)
self.npoint = npoint
self.groupers = nn.ModuleList()
self.mlps = nn.ModuleList()
for i in range(len(radii)):
radius = radii[i]
nsample = nsamples[i]
self.groupers.append(
pointnet2_utils.QueryAndGroup(radius, nsample, use_xyz=use_xyz)
if npoint is not None else pointnet2_utils.GroupAll(use_xyz)
)
mlp_spec = mlps[i]
if use_xyz:
mlp_spec[0] += 3
self.mlps.append(pt_utils.SharedMLP(mlp_spec, bn=bn, instance_norm=instance_norm))
self.pool_method = pool_method
class PointnetSAModule(PointnetSAModuleMSG):
"""Pointnet set abstraction layer"""
def __init__(self, *, mlp: List[int], npoint: int = None, radius: float = None, nsample: int = None,
bn: bool = True, use_xyz: bool = True, pool_method='max_pool', instance_norm=False):
"""
:param mlp: list of int, spec of the pointnet before the global max_pool
:param npoint: int, number of features
:param radius: float, radius of ball
:param nsample: int, number of samples in the ball query
:param bn: whether to use batchnorm
:param use_xyz:
:param pool_method: max_pool / avg_pool
:param instance_norm: whether to use instance_norm
"""
super().__init__(
mlps=[mlp], npoint=npoint, radii=[radius], nsamples=[nsample], bn=bn, use_xyz=use_xyz,
pool_method=pool_method, instance_norm=instance_norm
)
class PointnetFPModule(nn.Module):
r"""Propigates the features of one set to another"""
def __init__(self, *, mlp: List[int], bn: bool = True):
"""
:param mlp: list of int
:param bn: whether to use batchnorm
"""
super().__init__()
self.mlp = pt_utils.SharedMLP(mlp, bn=bn)
def forward(
self, unknown: torch.Tensor, known: torch.Tensor, unknow_feats: torch.Tensor, known_feats: torch.Tensor
) -> torch.Tensor:
"""
:param unknown: (B, n, 3) tensor of the xyz positions of the unknown features
:param known: (B, m, 3) tensor of the xyz positions of the known features
:param unknow_feats: (B, C1, n) tensor of the features to be propigated to
:param known_feats: (B, C2, m) tensor of features to be propigated
:return:
new_features: (B, mlp[-1], n) tensor of the features of the unknown features
"""
if known is not None:
dist, idx = pointnet2_utils.three_nn(unknown, known)
dist_recip = 1.0 / (dist + 1e-8)
norm = torch.sum(dist_recip, dim=2, keepdim=True)
weight = dist_recip / norm
interpolated_feats = pointnet2_utils.three_interpolate(known_feats, idx, weight)
else:
interpolated_feats = known_feats.expand(*known_feats.size()[0:2], unknown.size(1))
if unknow_feats is not None:
new_features = torch.cat([interpolated_feats, unknow_feats], dim=1) # (B, C2 + C1, n)
else:
new_features = interpolated_feats
new_features = new_features.unsqueeze(-1)
new_features = self.mlp(new_features)
return new_features.squeeze(-1)
if __name__ == "__main__":
pass

View File

@@ -0,0 +1,291 @@
import torch
from torch.autograd import Variable
from torch.autograd import Function
import torch.nn as nn
from typing import Tuple
import sys
import pointnet2_cuda as pointnet2
class FurthestPointSampling(Function):
@staticmethod
def forward(ctx, xyz: torch.Tensor, npoint: int) -> torch.Tensor:
"""
Uses iterative furthest point sampling to select a set of npoint features that have the largest
minimum distance
:param ctx:
:param xyz: (B, N, 3) where N > npoint
:param npoint: int, number of features in the sampled set
:return:
output: (B, npoint) tensor containing the set
"""
assert xyz.is_contiguous()
B, N, _ = xyz.size()
output = torch.cuda.IntTensor(B, npoint)
temp = torch.cuda.FloatTensor(B, N).fill_(1e10)
pointnet2.furthest_point_sampling_wrapper(B, N, npoint, xyz, temp, output)
return output
@staticmethod
def backward(xyz, a=None):
return None, None
furthest_point_sample = FurthestPointSampling.apply
class GatherOperation(Function):
@staticmethod
def forward(ctx, features: torch.Tensor, idx: torch.Tensor) -> torch.Tensor:
"""
:param ctx:
:param features: (B, C, N)
:param idx: (B, npoint) index tensor of the features to gather
:return:
output: (B, C, npoint)
"""
assert features.is_contiguous()
assert idx.is_contiguous()
B, npoint = idx.size()
_, C, N = features.size()
output = torch.cuda.FloatTensor(B, C, npoint)
pointnet2.gather_points_wrapper(B, C, N, npoint, features, idx, output)
ctx.for_backwards = (idx, C, N)
return output
@staticmethod
def backward(ctx, grad_out):
idx, C, N = ctx.for_backwards
B, npoint = idx.size()
grad_features = Variable(torch.cuda.FloatTensor(B, C, N).zero_())
grad_out_data = grad_out.data.contiguous()
pointnet2.gather_points_grad_wrapper(B, C, N, npoint, grad_out_data, idx, grad_features.data)
return grad_features, None
gather_operation = GatherOperation.apply
class ThreeNN(Function):
@staticmethod
def forward(ctx, unknown: torch.Tensor, known: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
"""
Find the three nearest neighbors of unknown in known
:param ctx:
:param unknown: (B, N, 3)
:param known: (B, M, 3)
:return:
dist: (B, N, 3) l2 distance to the three nearest neighbors
idx: (B, N, 3) index of 3 nearest neighbors
"""
assert unknown.is_contiguous()
assert known.is_contiguous()
B, N, _ = unknown.size()
m = known.size(1)
dist2 = torch.cuda.FloatTensor(B, N, 3)
idx = torch.cuda.IntTensor(B, N, 3)
pointnet2.three_nn_wrapper(B, N, m, unknown, known, dist2, idx)
return torch.sqrt(dist2), idx
@staticmethod
def backward(ctx, a=None, b=None):
return None, None
three_nn = ThreeNN.apply
class ThreeInterpolate(Function):
@staticmethod
def forward(ctx, features: torch.Tensor, idx: torch.Tensor, weight: torch.Tensor) -> torch.Tensor:
"""
Performs weight linear interpolation on 3 features
:param ctx:
:param features: (B, C, M) Features descriptors to be interpolated from
:param idx: (B, n, 3) three nearest neighbors of the target features in features
:param weight: (B, n, 3) weights
:return:
output: (B, C, N) tensor of the interpolated features
"""
assert features.is_contiguous()
assert idx.is_contiguous()
assert weight.is_contiguous()
B, c, m = features.size()
n = idx.size(1)
ctx.three_interpolate_for_backward = (idx, weight, m)
output = torch.cuda.FloatTensor(B, c, n)
pointnet2.three_interpolate_wrapper(B, c, m, n, features, idx, weight, output)
return output
@staticmethod
def backward(ctx, grad_out: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
"""
:param ctx:
:param grad_out: (B, C, N) tensor with gradients of outputs
:return:
grad_features: (B, C, M) tensor with gradients of features
None:
None:
"""
idx, weight, m = ctx.three_interpolate_for_backward
B, c, n = grad_out.size()
grad_features = Variable(torch.cuda.FloatTensor(B, c, m).zero_())
grad_out_data = grad_out.data.contiguous()
pointnet2.three_interpolate_grad_wrapper(B, c, n, m, grad_out_data, idx, weight, grad_features.data)
return grad_features, None, None
three_interpolate = ThreeInterpolate.apply
class GroupingOperation(Function):
@staticmethod
def forward(ctx, features: torch.Tensor, idx: torch.Tensor) -> torch.Tensor:
"""
:param ctx:
:param features: (B, C, N) tensor of features to group
:param idx: (B, npoint, nsample) tensor containing the indicies of features to group with
:return:
output: (B, C, npoint, nsample) tensor
"""
assert features.is_contiguous()
assert idx.is_contiguous()
B, nfeatures, nsample = idx.size()
_, C, N = features.size()
output = torch.cuda.FloatTensor(B, C, nfeatures, nsample)
pointnet2.group_points_wrapper(B, C, N, nfeatures, nsample, features, idx, output)
ctx.for_backwards = (idx, N)
return output
@staticmethod
def backward(ctx, grad_out: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
"""
:param ctx:
:param grad_out: (B, C, npoint, nsample) tensor of the gradients of the output from forward
:return:
grad_features: (B, C, N) gradient of the features
"""
idx, N = ctx.for_backwards
B, C, npoint, nsample = grad_out.size()
grad_features = Variable(torch.cuda.FloatTensor(B, C, N).zero_())
grad_out_data = grad_out.data.contiguous()
pointnet2.group_points_grad_wrapper(B, C, N, npoint, nsample, grad_out_data, idx, grad_features.data)
return grad_features, None
grouping_operation = GroupingOperation.apply
class BallQuery(Function):
@staticmethod
def forward(ctx, radius: float, nsample: int, xyz: torch.Tensor, new_xyz: torch.Tensor) -> torch.Tensor:
"""
:param ctx:
:param radius: float, radius of the balls
:param nsample: int, maximum number of features in the balls
:param xyz: (B, N, 3) xyz coordinates of the features
:param new_xyz: (B, npoint, 3) centers of the ball query
:return:
idx: (B, npoint, nsample) tensor with the indicies of the features that form the query balls
"""
assert new_xyz.is_contiguous()
assert xyz.is_contiguous()
B, N, _ = xyz.size()
npoint = new_xyz.size(1)
idx = torch.cuda.IntTensor(B, npoint, nsample).zero_()
pointnet2.ball_query_wrapper(B, N, npoint, radius, nsample, new_xyz, xyz, idx)
return idx
@staticmethod
def backward(ctx, a=None):
return None, None, None, None
ball_query = BallQuery.apply
class QueryAndGroup(nn.Module):
def __init__(self, radius: float, nsample: int, use_xyz: bool = True):
"""
:param radius: float, radius of ball
:param nsample: int, maximum number of features to gather in the ball
:param use_xyz:
"""
super().__init__()
self.radius, self.nsample, self.use_xyz = radius, nsample, use_xyz
def forward(self, xyz: torch.Tensor, new_xyz: torch.Tensor, features: torch.Tensor = None) -> Tuple[torch.Tensor]:
"""
:param xyz: (B, N, 3) xyz coordinates of the features
:param new_xyz: (B, npoint, 3) centroids
:param features: (B, C, N) descriptors of the features
:return:
new_features: (B, 3 + C, npoint, nsample)
"""
idx = ball_query(self.radius, self.nsample, xyz, new_xyz)
xyz_trans = xyz.transpose(1, 2).contiguous()
grouped_xyz = grouping_operation(xyz_trans, idx) # (B, 3, npoint, nsample)
grouped_xyz -= new_xyz.transpose(1, 2).unsqueeze(-1)
if features is not None:
grouped_features = grouping_operation(features, idx)
if self.use_xyz:
new_features = torch.cat([grouped_xyz, grouped_features], dim=1) # (B, C + 3, npoint, nsample)
else:
new_features = grouped_features
else:
assert self.use_xyz, "Cannot have not features and not use xyz as a feature!"
new_features = grouped_xyz
return new_features
class GroupAll(nn.Module):
def __init__(self, use_xyz: bool = True):
super().__init__()
self.use_xyz = use_xyz
def forward(self, xyz: torch.Tensor, new_xyz: torch.Tensor, features: torch.Tensor = None):
"""
:param xyz: (B, N, 3) xyz coordinates of the features
:param new_xyz: ignored
:param features: (B, C, N) descriptors of the features
:return:
new_features: (B, C + 3, 1, N)
"""
grouped_xyz = xyz.transpose(1, 2).unsqueeze(2)
if features is not None:
grouped_features = features.unsqueeze(2)
if self.use_xyz:
new_features = torch.cat([grouped_xyz, grouped_features], dim=1) # (B, 3 + C, 1, N)
else:
new_features = grouped_features
else:
new_features = grouped_xyz
return new_features

View File

@@ -0,0 +1,236 @@
import torch.nn as nn
from typing import List, Tuple
class SharedMLP(nn.Sequential):
def __init__(
self,
args: List[int],
*,
bn: bool = False,
activation=nn.ReLU(inplace=True),
preact: bool = False,
first: bool = False,
name: str = "",
instance_norm: bool = False,
):
super().__init__()
for i in range(len(args) - 1):
self.add_module(
name + 'layer{}'.format(i),
Conv2d(
args[i],
args[i + 1],
bn=(not first or not preact or (i != 0)) and bn,
activation=activation
if (not first or not preact or (i != 0)) else None,
preact=preact,
instance_norm=instance_norm
)
)
class _ConvBase(nn.Sequential):
def __init__(
self,
in_size,
out_size,
kernel_size,
stride,
padding,
activation,
bn,
init,
conv=None,
batch_norm=None,
bias=True,
preact=False,
name="",
instance_norm=False,
instance_norm_func=None
):
super().__init__()
bias = bias and (not bn)
conv_unit = conv(
in_size,
out_size,
kernel_size=kernel_size,
stride=stride,
padding=padding,
bias=bias
)
init(conv_unit.weight)
if bias:
nn.init.constant_(conv_unit.bias, 0)
if bn:
if not preact:
bn_unit = batch_norm(out_size)
else:
bn_unit = batch_norm(in_size)
if instance_norm:
if not preact:
in_unit = instance_norm_func(out_size, affine=False, track_running_stats=False)
else:
in_unit = instance_norm_func(in_size, affine=False, track_running_stats=False)
if preact:
if bn:
self.add_module(name + 'bn', bn_unit)
if activation is not None:
self.add_module(name + 'activation', activation)
if not bn and instance_norm:
self.add_module(name + 'in', in_unit)
self.add_module(name + 'conv', conv_unit)
if not preact:
if bn:
self.add_module(name + 'bn', bn_unit)
if activation is not None:
self.add_module(name + 'activation', activation)
if not bn and instance_norm:
self.add_module(name + 'in', in_unit)
class _BNBase(nn.Sequential):
def __init__(self, in_size, batch_norm=None, name=""):
super().__init__()
self.add_module(name + "bn", batch_norm(in_size))
nn.init.constant_(self[0].weight, 1.0)
nn.init.constant_(self[0].bias, 0)
class BatchNorm1d(_BNBase):
def __init__(self, in_size: int, *, name: str = ""):
super().__init__(in_size, batch_norm=nn.BatchNorm1d, name=name)
class BatchNorm2d(_BNBase):
def __init__(self, in_size: int, name: str = ""):
super().__init__(in_size, batch_norm=nn.BatchNorm2d, name=name)
class Conv1d(_ConvBase):
def __init__(
self,
in_size: int,
out_size: int,
*,
kernel_size: int = 1,
stride: int = 1,
padding: int = 0,
activation=nn.ReLU(inplace=True),
bn: bool = False,
init=nn.init.kaiming_normal_,
bias: bool = True,
preact: bool = False,
name: str = "",
instance_norm=False
):
super().__init__(
in_size,
out_size,
kernel_size,
stride,
padding,
activation,
bn,
init,
conv=nn.Conv1d,
batch_norm=BatchNorm1d,
bias=bias,
preact=preact,
name=name,
instance_norm=instance_norm,
instance_norm_func=nn.InstanceNorm1d
)
class Conv2d(_ConvBase):
def __init__(
self,
in_size: int,
out_size: int,
*,
kernel_size: Tuple[int, int] = (1, 1),
stride: Tuple[int, int] = (1, 1),
padding: Tuple[int, int] = (0, 0),
activation=nn.ReLU(inplace=True),
bn: bool = False,
init=nn.init.kaiming_normal_,
bias: bool = True,
preact: bool = False,
name: str = "",
instance_norm=False
):
super().__init__(
in_size,
out_size,
kernel_size,
stride,
padding,
activation,
bn,
init,
conv=nn.Conv2d,
batch_norm=BatchNorm2d,
bias=bias,
preact=preact,
name=name,
instance_norm=instance_norm,
instance_norm_func=nn.InstanceNorm2d
)
class FC(nn.Sequential):
def __init__(
self,
in_size: int,
out_size: int,
*,
activation=nn.ReLU(inplace=True),
bn: bool = False,
init=None,
preact: bool = False,
name: str = ""
):
super().__init__()
fc = nn.Linear(in_size, out_size, bias=not bn)
if init is not None:
init(fc.weight)
if not bn:
nn.init.constant(fc.bias, 0)
if preact:
if bn:
self.add_module(name + 'bn', BatchNorm1d(in_size))
if activation is not None:
self.add_module(name + 'activation', activation)
self.add_module(name + 'fc', fc)
if not preact:
if bn:
self.add_module(name + 'bn', BatchNorm1d(out_size))
if activation is not None:
self.add_module(name + 'activation', activation)

View File

@@ -0,0 +1,23 @@
from setuptools import setup
from torch.utils.cpp_extension import BuildExtension, CUDAExtension
setup(
name='pointnet2',
ext_modules=[
CUDAExtension('pointnet2_cuda', [
'src/pointnet2_api.cpp',
'src/ball_query.cpp',
'src/ball_query_gpu.cu',
'src/group_points.cpp',
'src/group_points_gpu.cu',
'src/interpolate.cpp',
'src/interpolate_gpu.cu',
'src/sampling.cpp',
'src/sampling_gpu.cu',
],
extra_compile_args={'cxx': ['-g'],
'nvcc': ['-O2']})
],
cmdclass={'build_ext': BuildExtension}
)

View File

@@ -0,0 +1,28 @@
#include <torch/serialize/tensor.h>
#include <vector>
// #include <THC/THC.h>
#include <cuda.h>
#include <cuda_runtime_api.h>
#include "ball_query_gpu.h"
#include <ATen/cuda/CUDAContext.h>
#include <ATen/cuda/CUDAEvent.h>
// extern THCState *state;
#define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ")
#define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ")
#define CHECK_INPUT(x) CHECK_CUDA(x);CHECK_CONTIGUOUS(x)
int ball_query_wrapper_fast(int b, int n, int m, float radius, int nsample,
at::Tensor new_xyz_tensor, at::Tensor xyz_tensor, at::Tensor idx_tensor) {
CHECK_INPUT(new_xyz_tensor);
CHECK_INPUT(xyz_tensor);
const float *new_xyz = new_xyz_tensor.data<float>();
const float *xyz = xyz_tensor.data<float>();
int *idx = idx_tensor.data<int>();
cudaStream_t stream = at::cuda::getCurrentCUDAStream();
// cudaStream_t stream = at::cuda::getCurrentCUDAStream();
ball_query_kernel_launcher_fast(b, n, m, radius, nsample, new_xyz, xyz, idx, stream);
return 1;
}

View File

@@ -0,0 +1,67 @@
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include "ball_query_gpu.h"
#include "cuda_utils.h"
__global__ void ball_query_kernel_fast(int b, int n, int m, float radius, int nsample,
const float *__restrict__ new_xyz, const float *__restrict__ xyz, int *__restrict__ idx) {
// new_xyz: (B, M, 3)
// xyz: (B, N, 3)
// output:
// idx: (B, M, nsample)
int bs_idx = blockIdx.y;
int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;
if (bs_idx >= b || pt_idx >= m) return;
new_xyz += bs_idx * m * 3 + pt_idx * 3;
xyz += bs_idx * n * 3;
idx += bs_idx * m * nsample + pt_idx * nsample;
float radius2 = radius * radius;
float new_x = new_xyz[0];
float new_y = new_xyz[1];
float new_z = new_xyz[2];
int cnt = 0;
for (int k = 0; k < n; ++k) {
float x = xyz[k * 3 + 0];
float y = xyz[k * 3 + 1];
float z = xyz[k * 3 + 2];
float d2 = (new_x - x) * (new_x - x) + (new_y - y) * (new_y - y) + (new_z - z) * (new_z - z);
if (d2 < radius2){
if (cnt == 0){
for (int l = 0; l < nsample; ++l) {
idx[l] = k;
}
}
idx[cnt] = k;
++cnt;
if (cnt >= nsample) break;
}
}
}
void ball_query_kernel_launcher_fast(int b, int n, int m, float radius, int nsample, \
const float *new_xyz, const float *xyz, int *idx, cudaStream_t stream) {
// new_xyz: (B, M, 3)
// xyz: (B, N, 3)
// output:
// idx: (B, M, nsample)
cudaError_t err;
dim3 blocks(DIVUP(m, THREADS_PER_BLOCK), b); // blockIdx.x(col), blockIdx.y(row)
dim3 threads(THREADS_PER_BLOCK);
ball_query_kernel_fast<<<blocks, threads, 0, stream>>>(b, n, m, radius, nsample, new_xyz, xyz, idx);
// cudaDeviceSynchronize(); // for using printf in kernel function
err = cudaGetLastError();
if (cudaSuccess != err) {
fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
exit(-1);
}
}

View File

@@ -0,0 +1,15 @@
#ifndef _BALL_QUERY_GPU_H
#define _BALL_QUERY_GPU_H
#include <torch/serialize/tensor.h>
#include <vector>
#include <cuda.h>
#include <cuda_runtime_api.h>
int ball_query_wrapper_fast(int b, int n, int m, float radius, int nsample,
at::Tensor new_xyz_tensor, at::Tensor xyz_tensor, at::Tensor idx_tensor);
void ball_query_kernel_launcher_fast(int b, int n, int m, float radius, int nsample,
const float *xyz, const float *new_xyz, int *idx, cudaStream_t stream);
#endif

View File

@@ -0,0 +1,15 @@
#ifndef _CUDA_UTILS_H
#define _CUDA_UTILS_H
#include <cmath>
#define TOTAL_THREADS 1024
#define THREADS_PER_BLOCK 256
#define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
inline int opt_n_threads(int work_size) {
const int pow_2 = std::log(static_cast<double>(work_size)) / std::log(2.0);
return max(min(1 << pow_2, TOTAL_THREADS), 1);
}
#endif

View File

@@ -0,0 +1,37 @@
#include <torch/serialize/tensor.h>
#include <cuda.h>
#include <cuda_runtime_api.h>
#include <vector>
// #include <THC/THC.h>
#include "group_points_gpu.h"
#include <ATen/cuda/CUDAContext.h>
#include <ATen/cuda/CUDAEvent.h>
// extern THCState *state;
int group_points_grad_wrapper_fast(int b, int c, int n, int npoints, int nsample,
at::Tensor grad_out_tensor, at::Tensor idx_tensor, at::Tensor grad_points_tensor) {
float *grad_points = grad_points_tensor.data<float>();
const int *idx = idx_tensor.data<int>();
const float *grad_out = grad_out_tensor.data<float>();
// cudaStream_t stream = at::cuda::getCurrentCUDAStream();
cudaStream_t stream = at::cuda::getCurrentCUDAStream();
group_points_grad_kernel_launcher_fast(b, c, n, npoints, nsample, grad_out, idx, grad_points, stream);
return 1;
}
int group_points_wrapper_fast(int b, int c, int n, int npoints, int nsample,
at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out_tensor) {
const float *points = points_tensor.data<float>();
const int *idx = idx_tensor.data<int>();
float *out = out_tensor.data<float>();
// cudaStream_t stream = at::cuda::getCurrentCUDAStream();
cudaStream_t stream = at::cuda::getCurrentCUDAStream();
group_points_kernel_launcher_fast(b, c, n, npoints, nsample, points, idx, out, stream);
return 1;
}

View File

@@ -0,0 +1,86 @@
#include <stdio.h>
#include <stdlib.h>
#include "cuda_utils.h"
#include "group_points_gpu.h"
__global__ void group_points_grad_kernel_fast(int b, int c, int n, int npoints, int nsample,
const float *__restrict__ grad_out, const int *__restrict__ idx, float *__restrict__ grad_points) {
// grad_out: (B, C, npoints, nsample)
// idx: (B, npoints, nsample)
// output:
// grad_points: (B, C, N)
int bs_idx = blockIdx.z;
int c_idx = blockIdx.y;
int index = blockIdx.x * blockDim.x + threadIdx.x;
int pt_idx = index / nsample;
if (bs_idx >= b || c_idx >= c || pt_idx >= npoints) return;
int sample_idx = index % nsample;
grad_out += bs_idx * c * npoints * nsample + c_idx * npoints * nsample + pt_idx * nsample + sample_idx;
idx += bs_idx * npoints * nsample + pt_idx * nsample + sample_idx;
atomicAdd(grad_points + bs_idx * c * n + c_idx * n + idx[0] , grad_out[0]);
}
void group_points_grad_kernel_launcher_fast(int b, int c, int n, int npoints, int nsample,
const float *grad_out, const int *idx, float *grad_points, cudaStream_t stream) {
// grad_out: (B, C, npoints, nsample)
// idx: (B, npoints, nsample)
// output:
// grad_points: (B, C, N)
cudaError_t err;
dim3 blocks(DIVUP(npoints * nsample, THREADS_PER_BLOCK), c, b); // blockIdx.x(col), blockIdx.y(row)
dim3 threads(THREADS_PER_BLOCK);
group_points_grad_kernel_fast<<<blocks, threads, 0, stream>>>(b, c, n, npoints, nsample, grad_out, idx, grad_points);
err = cudaGetLastError();
if (cudaSuccess != err) {
fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
exit(-1);
}
}
__global__ void group_points_kernel_fast(int b, int c, int n, int npoints, int nsample,
const float *__restrict__ points, const int *__restrict__ idx, float *__restrict__ out) {
// points: (B, C, N)
// idx: (B, npoints, nsample)
// output:
// out: (B, C, npoints, nsample)
int bs_idx = blockIdx.z;
int c_idx = blockIdx.y;
int index = blockIdx.x * blockDim.x + threadIdx.x;
int pt_idx = index / nsample;
if (bs_idx >= b || c_idx >= c || pt_idx >= npoints) return;
int sample_idx = index % nsample;
idx += bs_idx * npoints * nsample + pt_idx * nsample + sample_idx;
int in_idx = bs_idx * c * n + c_idx * n + idx[0];
int out_idx = bs_idx * c * npoints * nsample + c_idx * npoints * nsample + pt_idx * nsample + sample_idx;
out[out_idx] = points[in_idx];
}
void group_points_kernel_launcher_fast(int b, int c, int n, int npoints, int nsample,
const float *points, const int *idx, float *out, cudaStream_t stream) {
// points: (B, C, N)
// idx: (B, npoints, nsample)
// output:
// out: (B, C, npoints, nsample)
cudaError_t err;
dim3 blocks(DIVUP(npoints * nsample, THREADS_PER_BLOCK), c, b); // blockIdx.x(col), blockIdx.y(row)
dim3 threads(THREADS_PER_BLOCK);
group_points_kernel_fast<<<blocks, threads, 0, stream>>>(b, c, n, npoints, nsample, points, idx, out);
// cudaDeviceSynchronize(); // for using printf in kernel function
err = cudaGetLastError();
if (cudaSuccess != err) {
fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
exit(-1);
}
}

View File

@@ -0,0 +1,22 @@
#ifndef _GROUP_POINTS_GPU_H
#define _GROUP_POINTS_GPU_H
#include <torch/serialize/tensor.h>
#include <cuda.h>
#include <cuda_runtime_api.h>
#include <vector>
int group_points_wrapper_fast(int b, int c, int n, int npoints, int nsample,
at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out_tensor);
void group_points_kernel_launcher_fast(int b, int c, int n, int npoints, int nsample,
const float *points, const int *idx, float *out, cudaStream_t stream);
int group_points_grad_wrapper_fast(int b, int c, int n, int npoints, int nsample,
at::Tensor grad_out_tensor, at::Tensor idx_tensor, at::Tensor grad_points_tensor);
void group_points_grad_kernel_launcher_fast(int b, int c, int n, int npoints, int nsample,
const float *grad_out, const int *idx, float *grad_points, cudaStream_t stream);
#endif

View File

@@ -0,0 +1,59 @@
#include <torch/serialize/tensor.h>
#include <vector>
// #include <THC/THC.h>
#include <ATen/cuda/CUDAContext.h>
#include <ATen/cuda/CUDAEvent.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <cuda.h>
#include <cuda_runtime_api.h>
#include "interpolate_gpu.h"
// extern THCState *state;
void three_nn_wrapper_fast(int b, int n, int m, at::Tensor unknown_tensor,
at::Tensor known_tensor, at::Tensor dist2_tensor, at::Tensor idx_tensor) {
const float *unknown = unknown_tensor.data<float>();
const float *known = known_tensor.data<float>();
float *dist2 = dist2_tensor.data<float>();
int *idx = idx_tensor.data<int>();
// cudaStream_t stream = at::cuda::getCurrentCUDAStream();
cudaStream_t stream = at::cuda::getCurrentCUDAStream();
three_nn_kernel_launcher_fast(b, n, m, unknown, known, dist2, idx, stream);
}
void three_interpolate_wrapper_fast(int b, int c, int m, int n,
at::Tensor points_tensor,
at::Tensor idx_tensor,
at::Tensor weight_tensor,
at::Tensor out_tensor) {
const float *points = points_tensor.data<float>();
const float *weight = weight_tensor.data<float>();
float *out = out_tensor.data<float>();
const int *idx = idx_tensor.data<int>();
// cudaStream_t stream = at::cuda::getCurrentCUDAStream();
cudaStream_t stream = at::cuda::getCurrentCUDAStream();
three_interpolate_kernel_launcher_fast(b, c, m, n, points, idx, weight, out, stream);
}
void three_interpolate_grad_wrapper_fast(int b, int c, int n, int m,
at::Tensor grad_out_tensor,
at::Tensor idx_tensor,
at::Tensor weight_tensor,
at::Tensor grad_points_tensor) {
const float *grad_out = grad_out_tensor.data<float>();
const float *weight = weight_tensor.data<float>();
float *grad_points = grad_points_tensor.data<float>();
const int *idx = idx_tensor.data<int>();
// cudaStream_t stream = at::cuda::getCurrentCUDAStream();
cudaStream_t stream = at::cuda::getCurrentCUDAStream();
three_interpolate_grad_kernel_launcher_fast(b, c, n, m, grad_out, idx, weight, grad_points, stream);
}

View File

@@ -0,0 +1,161 @@
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include "cuda_utils.h"
#include "interpolate_gpu.h"
__global__ void three_nn_kernel_fast(int b, int n, int m, const float *__restrict__ unknown,
const float *__restrict__ known, float *__restrict__ dist2, int *__restrict__ idx) {
// unknown: (B, N, 3)
// known: (B, M, 3)
// output:
// dist2: (B, N, 3)
// idx: (B, N, 3)
int bs_idx = blockIdx.y;
int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;
if (bs_idx >= b || pt_idx >= n) return;
unknown += bs_idx * n * 3 + pt_idx * 3;
known += bs_idx * m * 3;
dist2 += bs_idx * n * 3 + pt_idx * 3;
idx += bs_idx * n * 3 + pt_idx * 3;
float ux = unknown[0];
float uy = unknown[1];
float uz = unknown[2];
double best1 = 1e40, best2 = 1e40, best3 = 1e40;
int besti1 = 0, besti2 = 0, besti3 = 0;
for (int k = 0; k < m; ++k) {
float x = known[k * 3 + 0];
float y = known[k * 3 + 1];
float z = known[k * 3 + 2];
float d = (ux - x) * (ux - x) + (uy - y) * (uy - y) + (uz - z) * (uz - z);
if (d < best1) {
best3 = best2; besti3 = besti2;
best2 = best1; besti2 = besti1;
best1 = d; besti1 = k;
}
else if (d < best2) {
best3 = best2; besti3 = besti2;
best2 = d; besti2 = k;
}
else if (d < best3) {
best3 = d; besti3 = k;
}
}
dist2[0] = best1; dist2[1] = best2; dist2[2] = best3;
idx[0] = besti1; idx[1] = besti2; idx[2] = besti3;
}
void three_nn_kernel_launcher_fast(int b, int n, int m, const float *unknown,
const float *known, float *dist2, int *idx, cudaStream_t stream) {
// unknown: (B, N, 3)
// known: (B, M, 3)
// output:
// dist2: (B, N, 3)
// idx: (B, N, 3)
cudaError_t err;
dim3 blocks(DIVUP(n, THREADS_PER_BLOCK), b); // blockIdx.x(col), blockIdx.y(row)
dim3 threads(THREADS_PER_BLOCK);
three_nn_kernel_fast<<<blocks, threads, 0, stream>>>(b, n, m, unknown, known, dist2, idx);
err = cudaGetLastError();
if (cudaSuccess != err) {
fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
exit(-1);
}
}
__global__ void three_interpolate_kernel_fast(int b, int c, int m, int n, const float *__restrict__ points,
const int *__restrict__ idx, const float *__restrict__ weight, float *__restrict__ out) {
// points: (B, C, M)
// idx: (B, N, 3)
// weight: (B, N, 3)
// output:
// out: (B, C, N)
int bs_idx = blockIdx.z;
int c_idx = blockIdx.y;
int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;
if (bs_idx >= b || c_idx >= c || pt_idx >= n) return;
weight += bs_idx * n * 3 + pt_idx * 3;
points += bs_idx * c * m + c_idx * m;
idx += bs_idx * n * 3 + pt_idx * 3;
out += bs_idx * c * n + c_idx * n;
out[pt_idx] = weight[0] * points[idx[0]] + weight[1] * points[idx[1]] + weight[2] * points[idx[2]];
}
void three_interpolate_kernel_launcher_fast(int b, int c, int m, int n,
const float *points, const int *idx, const float *weight, float *out, cudaStream_t stream) {
// points: (B, C, M)
// idx: (B, N, 3)
// weight: (B, N, 3)
// output:
// out: (B, C, N)
cudaError_t err;
dim3 blocks(DIVUP(n, THREADS_PER_BLOCK), c, b); // blockIdx.x(col), blockIdx.y(row)
dim3 threads(THREADS_PER_BLOCK);
three_interpolate_kernel_fast<<<blocks, threads, 0, stream>>>(b, c, m, n, points, idx, weight, out);
err = cudaGetLastError();
if (cudaSuccess != err) {
fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
exit(-1);
}
}
__global__ void three_interpolate_grad_kernel_fast(int b, int c, int n, int m, const float *__restrict__ grad_out,
const int *__restrict__ idx, const float *__restrict__ weight, float *__restrict__ grad_points) {
// grad_out: (B, C, N)
// weight: (B, N, 3)
// output:
// grad_points: (B, C, M)
int bs_idx = blockIdx.z;
int c_idx = blockIdx.y;
int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;
if (bs_idx >= b || c_idx >= c || pt_idx >= n) return;
grad_out += bs_idx * c * n + c_idx * n + pt_idx;
weight += bs_idx * n * 3 + pt_idx * 3;
grad_points += bs_idx * c * m + c_idx * m;
idx += bs_idx * n * 3 + pt_idx * 3;
atomicAdd(grad_points + idx[0], grad_out[0] * weight[0]);
atomicAdd(grad_points + idx[1], grad_out[0] * weight[1]);
atomicAdd(grad_points + idx[2], grad_out[0] * weight[2]);
}
void three_interpolate_grad_kernel_launcher_fast(int b, int c, int n, int m, const float *grad_out,
const int *idx, const float *weight, float *grad_points, cudaStream_t stream) {
// grad_out: (B, C, N)
// weight: (B, N, 3)
// output:
// grad_points: (B, C, M)
cudaError_t err;
dim3 blocks(DIVUP(n, THREADS_PER_BLOCK), c, b); // blockIdx.x(col), blockIdx.y(row)
dim3 threads(THREADS_PER_BLOCK);
three_interpolate_grad_kernel_fast<<<blocks, threads, 0, stream>>>(b, c, n, m, grad_out, idx, weight, grad_points);
err = cudaGetLastError();
if (cudaSuccess != err) {
fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
exit(-1);
}
}

View File

@@ -0,0 +1,30 @@
#ifndef _INTERPOLATE_GPU_H
#define _INTERPOLATE_GPU_H
#include <torch/serialize/tensor.h>
#include<vector>
#include <cuda.h>
#include <cuda_runtime_api.h>
void three_nn_wrapper_fast(int b, int n, int m, at::Tensor unknown_tensor,
at::Tensor known_tensor, at::Tensor dist2_tensor, at::Tensor idx_tensor);
void three_nn_kernel_launcher_fast(int b, int n, int m, const float *unknown,
const float *known, float *dist2, int *idx, cudaStream_t stream);
void three_interpolate_wrapper_fast(int b, int c, int m, int n, at::Tensor points_tensor,
at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor out_tensor);
void three_interpolate_kernel_launcher_fast(int b, int c, int m, int n,
const float *points, const int *idx, const float *weight, float *out, cudaStream_t stream);
void three_interpolate_grad_wrapper_fast(int b, int c, int n, int m, at::Tensor grad_out_tensor,
at::Tensor idx_tensor, at::Tensor weight_tensor, at::Tensor grad_points_tensor);
void three_interpolate_grad_kernel_launcher_fast(int b, int c, int n, int m, const float *grad_out,
const int *idx, const float *weight, float *grad_points, cudaStream_t stream);
#endif

View File

@@ -0,0 +1,24 @@
#include <torch/serialize/tensor.h>
#include <torch/extension.h>
#include "ball_query_gpu.h"
#include "group_points_gpu.h"
#include "sampling_gpu.h"
#include "interpolate_gpu.h"
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("ball_query_wrapper", &ball_query_wrapper_fast, "ball_query_wrapper_fast");
m.def("group_points_wrapper", &group_points_wrapper_fast, "group_points_wrapper_fast");
m.def("group_points_grad_wrapper", &group_points_grad_wrapper_fast, "group_points_grad_wrapper_fast");
m.def("gather_points_wrapper", &gather_points_wrapper_fast, "gather_points_wrapper_fast");
m.def("gather_points_grad_wrapper", &gather_points_grad_wrapper_fast, "gather_points_grad_wrapper_fast");
m.def("furthest_point_sampling_wrapper", &furthest_point_sampling_wrapper, "furthest_point_sampling_wrapper");
m.def("three_nn_wrapper", &three_nn_wrapper_fast, "three_nn_wrapper_fast");
m.def("three_interpolate_wrapper", &three_interpolate_wrapper_fast, "three_interpolate_wrapper_fast");
m.def("three_interpolate_grad_wrapper", &three_interpolate_grad_wrapper_fast, "three_interpolate_grad_wrapper_fast");
}

View File

@@ -0,0 +1,51 @@
#include <torch/serialize/tensor.h>
#include <ATen/cuda/CUDAContext.h>
#include <vector>
// #include <THC/THC.h>
#include "sampling_gpu.h"
#include <ATen/cuda/CUDAContext.h>
#include <ATen/cuda/CUDAEvent.h>
// extern THCState *state;
int gather_points_wrapper_fast(int b, int c, int n, int npoints,
at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out_tensor){
const float *points = points_tensor.data<float>();
const int *idx = idx_tensor.data<int>();
float *out = out_tensor.data<float>();
// cudaStream_t stream = at::cuda::getCurrentCUDAStream();
cudaStream_t stream = at::cuda::getCurrentCUDAStream();
gather_points_kernel_launcher_fast(b, c, n, npoints, points, idx, out, stream);
return 1;
}
int gather_points_grad_wrapper_fast(int b, int c, int n, int npoints,
at::Tensor grad_out_tensor, at::Tensor idx_tensor, at::Tensor grad_points_tensor) {
const float *grad_out = grad_out_tensor.data<float>();
const int *idx = idx_tensor.data<int>();
float *grad_points = grad_points_tensor.data<float>();
// cudaStream_t stream = at::cuda::getCurrentCUDAStream();
cudaStream_t stream = at::cuda::getCurrentCUDAStream();
gather_points_grad_kernel_launcher_fast(b, c, n, npoints, grad_out, idx, grad_points, stream);
return 1;
}
int furthest_point_sampling_wrapper(int b, int n, int m,
at::Tensor points_tensor, at::Tensor temp_tensor, at::Tensor idx_tensor) {
const float *points = points_tensor.data<float>();
float *temp = temp_tensor.data<float>();
int *idx = idx_tensor.data<int>();
// cudaStream_t stream = at::cuda::getCurrentCUDAStream();
cudaStream_t stream = at::cuda::getCurrentCUDAStream();
furthest_point_sampling_kernel_launcher(b, n, m, points, temp, idx, stream);
return 1;
}

View File

@@ -0,0 +1,253 @@
#include <stdio.h>
#include <stdlib.h>
#include "cuda_utils.h"
#include "sampling_gpu.h"
__global__ void gather_points_kernel_fast(int b, int c, int n, int m,
const float *__restrict__ points, const int *__restrict__ idx, float *__restrict__ out) {
// points: (B, C, N)
// idx: (B, M)
// output:
// out: (B, C, M)
int bs_idx = blockIdx.z;
int c_idx = blockIdx.y;
int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;
if (bs_idx >= b || c_idx >= c || pt_idx >= m) return;
out += bs_idx * c * m + c_idx * m + pt_idx;
idx += bs_idx * m + pt_idx;
points += bs_idx * c * n + c_idx * n;
out[0] = points[idx[0]];
}
void gather_points_kernel_launcher_fast(int b, int c, int n, int npoints,
const float *points, const int *idx, float *out, cudaStream_t stream) {
// points: (B, C, N)
// idx: (B, npoints)
// output:
// out: (B, C, npoints)
cudaError_t err;
dim3 blocks(DIVUP(npoints, THREADS_PER_BLOCK), c, b); // blockIdx.x(col), blockIdx.y(row)
dim3 threads(THREADS_PER_BLOCK);
gather_points_kernel_fast<<<blocks, threads, 0, stream>>>(b, c, n, npoints, points, idx, out);
err = cudaGetLastError();
if (cudaSuccess != err) {
fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
exit(-1);
}
}
__global__ void gather_points_grad_kernel_fast(int b, int c, int n, int m, const float *__restrict__ grad_out,
const int *__restrict__ idx, float *__restrict__ grad_points) {
// grad_out: (B, C, M)
// idx: (B, M)
// output:
// grad_points: (B, C, N)
int bs_idx = blockIdx.z;
int c_idx = blockIdx.y;
int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;
if (bs_idx >= b || c_idx >= c || pt_idx >= m) return;
grad_out += bs_idx * c * m + c_idx * m + pt_idx;
idx += bs_idx * m + pt_idx;
grad_points += bs_idx * c * n + c_idx * n;
atomicAdd(grad_points + idx[0], grad_out[0]);
}
void gather_points_grad_kernel_launcher_fast(int b, int c, int n, int npoints,
const float *grad_out, const int *idx, float *grad_points, cudaStream_t stream) {
// grad_out: (B, C, npoints)
// idx: (B, npoints)
// output:
// grad_points: (B, C, N)
cudaError_t err;
dim3 blocks(DIVUP(npoints, THREADS_PER_BLOCK), c, b); // blockIdx.x(col), blockIdx.y(row)
dim3 threads(THREADS_PER_BLOCK);
gather_points_grad_kernel_fast<<<blocks, threads, 0, stream>>>(b, c, n, npoints, grad_out, idx, grad_points);
err = cudaGetLastError();
if (cudaSuccess != err) {
fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
exit(-1);
}
}
__device__ void __update(float *__restrict__ dists, int *__restrict__ dists_i, int idx1, int idx2){
const float v1 = dists[idx1], v2 = dists[idx2];
const int i1 = dists_i[idx1], i2 = dists_i[idx2];
dists[idx1] = max(v1, v2);
dists_i[idx1] = v2 > v1 ? i2 : i1;
}
template <unsigned int block_size>
__global__ void furthest_point_sampling_kernel(int b, int n, int m,
const float *__restrict__ dataset, float *__restrict__ temp, int *__restrict__ idxs) {
// dataset: (B, N, 3)
// tmp: (B, N)
// output:
// idx: (B, M)
if (m <= 0) return;
__shared__ float dists[block_size];
__shared__ int dists_i[block_size];
int batch_index = blockIdx.x;
dataset += batch_index * n * 3;
temp += batch_index * n;
idxs += batch_index * m;
int tid = threadIdx.x;
const int stride = block_size;
int old = 0;
if (threadIdx.x == 0)
idxs[0] = old;
__syncthreads();
for (int j = 1; j < m; j++) {
int besti = 0;
float best = -1;
float x1 = dataset[old * 3 + 0];
float y1 = dataset[old * 3 + 1];
float z1 = dataset[old * 3 + 2];
for (int k = tid; k < n; k += stride) {
float x2, y2, z2;
x2 = dataset[k * 3 + 0];
y2 = dataset[k * 3 + 1];
z2 = dataset[k * 3 + 2];
// float mag = (x2 * x2) + (y2 * y2) + (z2 * z2);
// if (mag <= 1e-3)
// continue;
float d = (x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1) + (z2 - z1) * (z2 - z1);
float d2 = min(d, temp[k]);
temp[k] = d2;
besti = d2 > best ? k : besti;
best = d2 > best ? d2 : best;
}
dists[tid] = best;
dists_i[tid] = besti;
__syncthreads();
if (block_size >= 1024) {
if (tid < 512) {
__update(dists, dists_i, tid, tid + 512);
}
__syncthreads();
}
if (block_size >= 512) {
if (tid < 256) {
__update(dists, dists_i, tid, tid + 256);
}
__syncthreads();
}
if (block_size >= 256) {
if (tid < 128) {
__update(dists, dists_i, tid, tid + 128);
}
__syncthreads();
}
if (block_size >= 128) {
if (tid < 64) {
__update(dists, dists_i, tid, tid + 64);
}
__syncthreads();
}
if (block_size >= 64) {
if (tid < 32) {
__update(dists, dists_i, tid, tid + 32);
}
__syncthreads();
}
if (block_size >= 32) {
if (tid < 16) {
__update(dists, dists_i, tid, tid + 16);
}
__syncthreads();
}
if (block_size >= 16) {
if (tid < 8) {
__update(dists, dists_i, tid, tid + 8);
}
__syncthreads();
}
if (block_size >= 8) {
if (tid < 4) {
__update(dists, dists_i, tid, tid + 4);
}
__syncthreads();
}
if (block_size >= 4) {
if (tid < 2) {
__update(dists, dists_i, tid, tid + 2);
}
__syncthreads();
}
if (block_size >= 2) {
if (tid < 1) {
__update(dists, dists_i, tid, tid + 1);
}
__syncthreads();
}
old = dists_i[0];
if (tid == 0)
idxs[j] = old;
}
}
void furthest_point_sampling_kernel_launcher(int b, int n, int m,
const float *dataset, float *temp, int *idxs, cudaStream_t stream) {
// dataset: (B, N, 3)
// tmp: (B, N)
// output:
// idx: (B, M)
cudaError_t err;
unsigned int n_threads = opt_n_threads(n);
switch (n_threads) {
case 1024:
furthest_point_sampling_kernel<1024><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs); break;
case 512:
furthest_point_sampling_kernel<512><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs); break;
case 256:
furthest_point_sampling_kernel<256><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs); break;
case 128:
furthest_point_sampling_kernel<128><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs); break;
case 64:
furthest_point_sampling_kernel<64><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs); break;
case 32:
furthest_point_sampling_kernel<32><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs); break;
case 16:
furthest_point_sampling_kernel<16><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs); break;
case 8:
furthest_point_sampling_kernel<8><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs); break;
case 4:
furthest_point_sampling_kernel<4><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs); break;
case 2:
furthest_point_sampling_kernel<2><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs); break;
case 1:
furthest_point_sampling_kernel<1><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs); break;
default:
furthest_point_sampling_kernel<512><<<b, n_threads, 0, stream>>>(b, n, m, dataset, temp, idxs);
}
err = cudaGetLastError();
if (cudaSuccess != err) {
fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
exit(-1);
}
}

View File

@@ -0,0 +1,29 @@
#ifndef _SAMPLING_GPU_H
#define _SAMPLING_GPU_H
#include <torch/serialize/tensor.h>
#include <ATen/cuda/CUDAContext.h>
#include<vector>
int gather_points_wrapper_fast(int b, int c, int n, int npoints,
at::Tensor points_tensor, at::Tensor idx_tensor, at::Tensor out_tensor);
void gather_points_kernel_launcher_fast(int b, int c, int n, int npoints,
const float *points, const int *idx, float *out, cudaStream_t stream);
int gather_points_grad_wrapper_fast(int b, int c, int n, int npoints,
at::Tensor grad_out_tensor, at::Tensor idx_tensor, at::Tensor grad_points_tensor);
void gather_points_grad_kernel_launcher_fast(int b, int c, int n, int npoints,
const float *grad_out, const int *idx, float *grad_points, cudaStream_t stream);
int furthest_point_sampling_wrapper(int b, int n, int m,
at::Tensor points_tensor, at::Tensor temp_tensor, at::Tensor idx_tensor);
void furthest_point_sampling_kernel_launcher(int b, int n, int m,
const float *dataset, float *temp, int *idxs, cudaStream_t stream);
#endif

View File

@@ -0,0 +1,2 @@
import os, sys
sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), '../'))

View File

@@ -0,0 +1,188 @@
import os
import numpy as np
import torch.utils.data as torch_data
import kitti_utils
import cv2
from PIL import Image
USE_INTENSITY = False
class KittiDataset(torch_data.Dataset):
def __init__(self, root_dir, split='train', mode='TRAIN'):
self.split = split
self.mode = mode
self.classes = ['Car']
is_test = self.split == 'test'
self.imageset_dir = os.path.join(root_dir, 'KITTI', 'object', 'testing' if is_test else 'training')
split_dir = os.path.join(root_dir, 'KITTI', 'ImageSets', split + '.txt')
self.image_idx_list = [x.strip() for x in open(split_dir).readlines()]
self.sample_id_list = [int(sample_id) for sample_id in self.image_idx_list]
self.num_sample = self.image_idx_list.__len__()
self.npoints = 16384
self.image_dir = os.path.join(self.imageset_dir, 'image_2')
self.lidar_dir = os.path.join(self.imageset_dir, 'velodyne')
self.calib_dir = os.path.join(self.imageset_dir, 'calib')
self.label_dir = os.path.join(self.imageset_dir, 'label_2')
self.plane_dir = os.path.join(self.imageset_dir, 'planes')
def get_image(self, idx):
img_file = os.path.join(self.image_dir, '%06d.png' % idx)
assert os.path.exists(img_file)
return cv2.imread(img_file) # (H, W, 3) BGR mode
def get_image_shape(self, idx):
img_file = os.path.join(self.image_dir, '%06d.png' % idx)
assert os.path.exists(img_file)
im = Image.open(img_file)
width, height = im.size
return height, width, 3
def get_lidar(self, idx):
lidar_file = os.path.join(self.lidar_dir, '%06d.bin' % idx)
assert os.path.exists(lidar_file)
return np.fromfile(lidar_file, dtype=np.float32).reshape(-1, 4)
def get_calib(self, idx):
calib_file = os.path.join(self.calib_dir, '%06d.txt' % idx)
assert os.path.exists(calib_file)
return kitti_utils.Calibration(calib_file)
def get_label(self, idx):
label_file = os.path.join(self.label_dir, '%06d.txt' % idx)
assert os.path.exists(label_file)
return kitti_utils.get_objects_from_label(label_file)
@staticmethod
def get_valid_flag(pts_rect, pts_img, pts_rect_depth, img_shape):
val_flag_1 = np.logical_and(pts_img[:, 0] >= 0, pts_img[:, 0] < img_shape[1])
val_flag_2 = np.logical_and(pts_img[:, 1] >= 0, pts_img[:, 1] < img_shape[0])
val_flag_merge = np.logical_and(val_flag_1, val_flag_2)
pts_valid_flag = np.logical_and(val_flag_merge, pts_rect_depth >= 0)
return pts_valid_flag
def filtrate_objects(self, obj_list):
type_whitelist = self.classes
if self.mode == 'TRAIN':
type_whitelist = list(self.classes)
if 'Car' in self.classes:
type_whitelist.append('Van')
valid_obj_list = []
for obj in obj_list:
if obj.cls_type not in type_whitelist:
continue
valid_obj_list.append(obj)
return valid_obj_list
def __len__(self):
return len(self.sample_id_list)
def __getitem__(self, index):
sample_id = int(self.sample_id_list[index])
calib = self.get_calib(sample_id)
img_shape = self.get_image_shape(sample_id)
pts_lidar = self.get_lidar(sample_id)
# get valid point (projected points should be in image)
pts_rect = calib.lidar_to_rect(pts_lidar[:, 0:3])
pts_intensity = pts_lidar[:, 3]
pts_img, pts_rect_depth = calib.rect_to_img(pts_rect)
pts_valid_flag = self.get_valid_flag(pts_rect, pts_img, pts_rect_depth, img_shape)
pts_rect = pts_rect[pts_valid_flag][:, 0:3]
pts_intensity = pts_intensity[pts_valid_flag]
if self.npoints < len(pts_rect):
pts_depth = pts_rect[:, 2]
pts_near_flag = pts_depth < 40.0
far_idxs_choice = np.where(pts_near_flag == 0)[0]
near_idxs = np.where(pts_near_flag == 1)[0]
near_idxs_choice = np.random.choice(near_idxs, self.npoints - len(far_idxs_choice), replace=False)
choice = np.concatenate((near_idxs_choice, far_idxs_choice), axis=0) \
if len(far_idxs_choice) > 0 else near_idxs_choice
np.random.shuffle(choice)
else:
choice = np.arange(0, len(pts_rect), dtype=np.int32)
if self.npoints > len(pts_rect):
extra_choice = np.random.choice(choice, self.npoints - len(pts_rect), replace=False)
choice = np.concatenate((choice, extra_choice), axis=0)
np.random.shuffle(choice)
ret_pts_rect = pts_rect[choice, :]
ret_pts_intensity = pts_intensity[choice] - 0.5 # translate intensity to [-0.5, 0.5]
pts_features = [ret_pts_intensity.reshape(-1, 1)]
ret_pts_features = np.concatenate(pts_features, axis=1) if pts_features.__len__() > 1 else pts_features[0]
sample_info = {'sample_id': sample_id}
if self.mode == 'TEST':
if USE_INTENSITY:
pts_input = np.concatenate((ret_pts_rect, ret_pts_features), axis=1) # (N, C)
else:
pts_input = ret_pts_rect
sample_info['pts_input'] = pts_input
sample_info['pts_rect'] = ret_pts_rect
sample_info['pts_features'] = ret_pts_features
return sample_info
gt_obj_list = self.filtrate_objects(self.get_label(sample_id))
gt_boxes3d = kitti_utils.objs_to_boxes3d(gt_obj_list)
# prepare input
if USE_INTENSITY:
pts_input = np.concatenate((ret_pts_rect, ret_pts_features), axis=1) # (N, C)
else:
pts_input = ret_pts_rect
# generate training labels
cls_labels = self.generate_training_labels(ret_pts_rect, gt_boxes3d)
sample_info['pts_input'] = pts_input
sample_info['pts_rect'] = ret_pts_rect
sample_info['cls_labels'] = cls_labels
return sample_info
@staticmethod
def generate_training_labels(pts_rect, gt_boxes3d):
cls_label = np.zeros((pts_rect.shape[0]), dtype=np.int32)
gt_corners = kitti_utils.boxes3d_to_corners3d(gt_boxes3d, rotate=True)
extend_gt_boxes3d = kitti_utils.enlarge_box3d(gt_boxes3d, extra_width=0.2)
extend_gt_corners = kitti_utils.boxes3d_to_corners3d(extend_gt_boxes3d, rotate=True)
for k in range(gt_boxes3d.shape[0]):
box_corners = gt_corners[k]
fg_pt_flag = kitti_utils.in_hull(pts_rect, box_corners)
cls_label[fg_pt_flag] = 1
# enlarge the bbox3d, ignore nearby points
extend_box_corners = extend_gt_corners[k]
fg_enlarge_flag = kitti_utils.in_hull(pts_rect, extend_box_corners)
ignore_flag = np.logical_xor(fg_pt_flag, fg_enlarge_flag)
cls_label[ignore_flag] = -1
return cls_label
def collate_batch(self, batch):
batch_size = batch.__len__()
ans_dict = {}
for key in batch[0].keys():
if isinstance(batch[0][key], np.ndarray):
ans_dict[key] = np.concatenate([batch[k][key][np.newaxis, ...] for k in range(batch_size)], axis=0)
else:
ans_dict[key] = [batch[k][key] for k in range(batch_size)]
if isinstance(batch[0][key], int):
ans_dict[key] = np.array(ans_dict[key], dtype=np.int32)
elif isinstance(batch[0][key], float):
ans_dict[key] = np.array(ans_dict[key], dtype=np.float32)
return ans_dict

View File

@@ -0,0 +1,229 @@
import numpy as np
from scipy.spatial import Delaunay
import scipy
def cls_type_to_id(cls_type):
type_to_id = {'Car': 1, 'Pedestrian': 2, 'Cyclist': 3, 'Van': 4}
if cls_type not in type_to_id.keys():
return -1
return type_to_id[cls_type]
class Object3d(object):
def __init__(self, line):
label = line.strip().split(' ')
self.src = line
self.cls_type = label[0]
self.cls_id = cls_type_to_id(self.cls_type)
self.trucation = float(label[1])
self.occlusion = float(label[2]) # 0:fully visible 1:partly occluded 2:largely occluded 3:unknown
self.alpha = float(label[3])
self.box2d = np.array((float(label[4]), float(label[5]), float(label[6]), float(label[7])), dtype=np.float32)
self.h = float(label[8])
self.w = float(label[9])
self.l = float(label[10])
self.pos = np.array((float(label[11]), float(label[12]), float(label[13])), dtype=np.float32)
self.dis_to_cam = np.linalg.norm(self.pos)
self.ry = float(label[14])
self.score = float(label[15]) if label.__len__() == 16 else -1.0
self.level_str = None
self.level = self.get_obj_level()
def get_obj_level(self):
height = float(self.box2d[3]) - float(self.box2d[1]) + 1
if height >= 40 and self.trucation <= 0.15 and self.occlusion <= 0:
self.level_str = 'Easy'
return 1 # Easy
elif height >= 25 and self.trucation <= 0.3 and self.occlusion <= 1:
self.level_str = 'Moderate'
return 2 # Moderate
elif height >= 25 and self.trucation <= 0.5 and self.occlusion <= 2:
self.level_str = 'Hard'
return 3 # Hard
else:
self.level_str = 'UnKnown'
return 4
def generate_corners3d(self):
"""
generate corners3d representation for this object
:return corners_3d: (8, 3) corners of box3d in camera coord
"""
l, h, w = self.l, self.h, self.w
x_corners = [l / 2, l / 2, -l / 2, -l / 2, l / 2, l / 2, -l / 2, -l / 2]
y_corners = [0, 0, 0, 0, -h, -h, -h, -h]
z_corners = [w / 2, -w / 2, -w / 2, w / 2, w / 2, -w / 2, -w / 2, w / 2]
R = np.array([[np.cos(self.ry), 0, np.sin(self.ry)],
[0, 1, 0],
[-np.sin(self.ry), 0, np.cos(self.ry)]])
corners3d = np.vstack([x_corners, y_corners, z_corners]) # (3, 8)
corners3d = np.dot(R, corners3d).T
corners3d = corners3d + self.pos
return corners3d
def to_str(self):
print_str = '%s %.3f %.3f %.3f box2d: %s hwl: [%.3f %.3f %.3f] pos: %s ry: %.3f' \
% (self.cls_type, self.trucation, self.occlusion, self.alpha, self.box2d, self.h, self.w, self.l,
self.pos, self.ry)
return print_str
def to_kitti_format(self):
kitti_str = '%s %.2f %d %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f %.2f' \
% (self.cls_type, self.trucation, int(self.occlusion), self.alpha, self.box2d[0], self.box2d[1],
self.box2d[2], self.box2d[3], self.h, self.w, self.l, self.pos[0], self.pos[1], self.pos[2],
self.ry)
return kitti_str
def get_calib_from_file(calib_file):
with open(calib_file) as f:
lines = f.readlines()
obj = lines[2].strip().split(' ')[1:]
P2 = np.array(obj, dtype=np.float32)
obj = lines[3].strip().split(' ')[1:]
P3 = np.array(obj, dtype=np.float32)
obj = lines[4].strip().split(' ')[1:]
R0 = np.array(obj, dtype=np.float32)
obj = lines[5].strip().split(' ')[1:]
Tr_velo_to_cam = np.array(obj, dtype=np.float32)
return {'P2': P2.reshape(3, 4),
'P3': P3.reshape(3, 4),
'R0': R0.reshape(3, 3),
'Tr_velo2cam': Tr_velo_to_cam.reshape(3, 4)}
class Calibration(object):
def __init__(self, calib_file):
if isinstance(calib_file, str):
calib = get_calib_from_file(calib_file)
else:
calib = calib_file
self.P2 = calib['P2'] # 3 x 4
self.R0 = calib['R0'] # 3 x 3
self.V2C = calib['Tr_velo2cam'] # 3 x 4
def cart_to_hom(self, pts):
"""
:param pts: (N, 3 or 2)
:return pts_hom: (N, 4 or 3)
"""
pts_hom = np.hstack((pts, np.ones((pts.shape[0], 1), dtype=np.float32)))
return pts_hom
def lidar_to_rect(self, pts_lidar):
"""
:param pts_lidar: (N, 3)
:return pts_rect: (N, 3)
"""
pts_lidar_hom = self.cart_to_hom(pts_lidar)
pts_rect = np.dot(pts_lidar_hom, np.dot(self.V2C.T, self.R0.T))
return pts_rect
def rect_to_img(self, pts_rect):
"""
:param pts_rect: (N, 3)
:return pts_img: (N, 2)
"""
pts_rect_hom = self.cart_to_hom(pts_rect)
pts_2d_hom = np.dot(pts_rect_hom, self.P2.T)
pts_img = (pts_2d_hom[:, 0:2].T / pts_rect_hom[:, 2]).T # (N, 2)
pts_rect_depth = pts_2d_hom[:, 2] - self.P2.T[3, 2] # depth in rect camera coord
return pts_img, pts_rect_depth
def lidar_to_img(self, pts_lidar):
"""
:param pts_lidar: (N, 3)
:return pts_img: (N, 2)
"""
pts_rect = self.lidar_to_rect(pts_lidar)
pts_img, pts_depth = self.rect_to_img(pts_rect)
return pts_img, pts_depth
def get_objects_from_label(label_file):
with open(label_file, 'r') as f:
lines = f.readlines()
objects = [Object3d(line) for line in lines]
return objects
def objs_to_boxes3d(obj_list):
boxes3d = np.zeros((obj_list.__len__(), 7), dtype=np.float32)
for k, obj in enumerate(obj_list):
boxes3d[k, 0:3], boxes3d[k, 3], boxes3d[k, 4], boxes3d[k, 5], boxes3d[k, 6] \
= obj.pos, obj.h, obj.w, obj.l, obj.ry
return boxes3d
def boxes3d_to_corners3d(boxes3d, rotate=True):
"""
:param boxes3d: (N, 7) [x, y, z, h, w, l, ry]
:param rotate:
:return: corners3d: (N, 8, 3)
"""
boxes_num = boxes3d.shape[0]
h, w, l = boxes3d[:, 3], boxes3d[:, 4], boxes3d[:, 5]
x_corners = np.array([l / 2., l / 2., -l / 2., -l / 2., l / 2., l / 2., -l / 2., -l / 2.], dtype=np.float32).T # (N, 8)
z_corners = np.array([w / 2., -w / 2., -w / 2., w / 2., w / 2., -w / 2., -w / 2., w / 2.], dtype=np.float32).T # (N, 8)
y_corners = np.zeros((boxes_num, 8), dtype=np.float32)
y_corners[:, 4:8] = -h.reshape(boxes_num, 1).repeat(4, axis=1) # (N, 8)
if rotate:
ry = boxes3d[:, 6]
zeros, ones = np.zeros(ry.size, dtype=np.float32), np.ones(ry.size, dtype=np.float32)
rot_list = np.array([[np.cos(ry), zeros, -np.sin(ry)],
[zeros, ones, zeros],
[np.sin(ry), zeros, np.cos(ry)]]) # (3, 3, N)
R_list = np.transpose(rot_list, (2, 0, 1)) # (N, 3, 3)
temp_corners = np.concatenate((x_corners.reshape(-1, 8, 1), y_corners.reshape(-1, 8, 1),
z_corners.reshape(-1, 8, 1)), axis=2) # (N, 8, 3)
rotated_corners = np.matmul(temp_corners, R_list) # (N, 8, 3)
x_corners, y_corners, z_corners = rotated_corners[:, :, 0], rotated_corners[:, :, 1], rotated_corners[:, :, 2]
x_loc, y_loc, z_loc = boxes3d[:, 0], boxes3d[:, 1], boxes3d[:, 2]
x = x_loc.reshape(-1, 1) + x_corners.reshape(-1, 8)
y = y_loc.reshape(-1, 1) + y_corners.reshape(-1, 8)
z = z_loc.reshape(-1, 1) + z_corners.reshape(-1, 8)
corners = np.concatenate((x.reshape(-1, 8, 1), y.reshape(-1, 8, 1), z.reshape(-1, 8, 1)), axis=2)
return corners.astype(np.float32)
def enlarge_box3d(boxes3d, extra_width):
"""
:param boxes3d: (N, 7) [x, y, z, h, w, l, ry]
"""
if isinstance(boxes3d, np.ndarray):
large_boxes3d = boxes3d.copy()
else:
large_boxes3d = boxes3d.clone()
large_boxes3d[:, 3:6] += extra_width * 2
large_boxes3d[:, 1] += extra_width
return large_boxes3d
def in_hull(p, hull):
"""
:param p: (N, K) test points
:param hull: (M, K) M corners of a box
:return (N) bool
"""
try:
if not isinstance(hull, Delaunay):
hull = Delaunay(hull)
flag = hull.find_simplex(p) >= 0
except scipy.spatial.qhull.QhullError:
print('Warning: not a hull %s' % str(hull))
flag = np.zeros(p.shape[0], dtype=np.bool)
return flag

View File

@@ -0,0 +1,102 @@
import torch
import torch.nn as nn
import sys
sys.path.append('..')
from pointnet2.pointnet2_modules import PointnetFPModule, PointnetSAModuleMSG
import pointnet2.pytorch_utils as pt_utils
def get_model(input_channels=0):
return Pointnet2MSG(input_channels=input_channels)
NPOINTS = [4096, 1024, 256, 64]
RADIUS = [[0.1, 0.5], [0.5, 1.0], [1.0, 2.0], [2.0, 4.0]]
NSAMPLE = [[16, 32], [16, 32], [16, 32], [16, 32]]
MLPS = [[[16, 16, 32], [32, 32, 64]], [[64, 64, 128], [64, 96, 128]],
[[128, 196, 256], [128, 196, 256]], [[256, 256, 512], [256, 384, 512]]]
FP_MLPS = [[128, 128], [256, 256], [512, 512], [512, 512]]
CLS_FC = [128]
DP_RATIO = 0.5
class Pointnet2MSG(nn.Module):
def __init__(self, input_channels=6):
super().__init__()
self.SA_modules = nn.ModuleList()
channel_in = input_channels
skip_channel_list = [input_channels]
for k in range(NPOINTS.__len__()):
mlps = MLPS[k].copy()
channel_out = 0
for idx in range(mlps.__len__()):
mlps[idx] = [channel_in] + mlps[idx]
channel_out += mlps[idx][-1]
self.SA_modules.append(
PointnetSAModuleMSG(
npoint=NPOINTS[k],
radii=RADIUS[k],
nsamples=NSAMPLE[k],
mlps=mlps,
use_xyz=True,
bn=True
)
)
skip_channel_list.append(channel_out)
channel_in = channel_out
self.FP_modules = nn.ModuleList()
for k in range(FP_MLPS.__len__()):
pre_channel = FP_MLPS[k + 1][-1] if k + 1 < len(FP_MLPS) else channel_out
self.FP_modules.append(
PointnetFPModule(mlp=[pre_channel + skip_channel_list[k]] + FP_MLPS[k])
)
cls_layers = []
pre_channel = FP_MLPS[0][-1]
for k in range(0, CLS_FC.__len__()):
cls_layers.append(pt_utils.Conv1d(pre_channel, CLS_FC[k], bn=True))
pre_channel = CLS_FC[k]
cls_layers.append(pt_utils.Conv1d(pre_channel, 1, activation=None))
cls_layers.insert(1, nn.Dropout(0.5))
self.cls_layer = nn.Sequential(*cls_layers)
def _break_up_pc(self, pc):
xyz = pc[..., 0:3].contiguous()
features = (
pc[..., 3:].transpose(1, 2).contiguous()
if pc.size(-1) > 3 else None
)
return xyz, features
def forward(self, pointcloud: torch.cuda.FloatTensor):
xyz, features = self._break_up_pc(pointcloud)
l_xyz, l_features = [xyz], [features]
for i in range(len(self.SA_modules)):
li_xyz, li_features = self.SA_modules[i](l_xyz[i], l_features[i])
print(li_xyz.shape, li_features.shape)
l_xyz.append(li_xyz)
l_features.append(li_features)
for i in range(-1, -(len(self.FP_modules) + 1), -1):
l_features[i - 1] = self.FP_modules[i](
l_xyz[i - 1], l_xyz[i], l_features[i - 1], l_features[i]
)
pred_cls = self.cls_layer(l_features[0]).transpose(1, 2).contiguous() # (B, N, 1)
return pred_cls
if __name__ == '__main__':
net = Pointnet2MSG(0).cuda()
pts = torch.randn(2, 1024, 3).cuda()
pre = net(pts)
print(pre.shape)

View File

@@ -0,0 +1,217 @@
import _init_path
import numpy as np
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torch.optim.lr_scheduler as lr_sched
from torch.nn.utils import clip_grad_norm_
from torch.utils.data import DataLoader
import tensorboard_logger as tb_log
from dataset import KittiDataset
import argparse
import importlib
parser = argparse.ArgumentParser(description="Arg parser")
parser.add_argument("--batch_size", type=int, default=8)
parser.add_argument("--epochs", type=int, default=100)
parser.add_argument("--ckpt_save_interval", type=int, default=5)
parser.add_argument('--workers', type=int, default=4)
parser.add_argument("--mode", type=str, default='train')
parser.add_argument("--ckpt", type=str, default='None')
parser.add_argument("--net", type=str, default='pointnet2_msg')
parser.add_argument('--lr', type=float, default=0.002)
parser.add_argument('--lr_decay', type=float, default=0.2)
parser.add_argument('--lr_clip', type=float, default=0.000001)
parser.add_argument('--decay_step_list', type=list, default=[50, 70, 80, 90])
parser.add_argument('--weight_decay', type=float, default=0.001)
parser.add_argument("--output_dir", type=str, default='output')
parser.add_argument("--extra_tag", type=str, default='default')
args = parser.parse_args()
FG_THRESH = 0.3
def log_print(info, log_f=None):
print(info)
if log_f is not None:
print(info, file=log_f)
class DiceLoss(nn.Module):
def __init__(self, ignore_target=-1):
super().__init__()
self.ignore_target = ignore_target
def forward(self, input, target):
"""
:param input: (N), logit
:param target: (N), {0, 1}
:return:
"""
input = torch.sigmoid(input.view(-1))
target = target.float().view(-1)
mask = (target != self.ignore_target).float()
return 1.0 - (torch.min(input, target) * mask).sum() / torch.clamp((torch.max(input, target) * mask).sum(), min=1.0)
def train_one_epoch(model, train_loader, optimizer, epoch, lr_scheduler, total_it, tb_log, log_f):
model.train()
log_print('===============TRAIN EPOCH %d================' % epoch, log_f=log_f)
loss_func = DiceLoss(ignore_target=-1)
for it, batch in enumerate(train_loader):
optimizer.zero_grad()
pts_input, cls_labels = batch['pts_input'], batch['cls_labels']
pts_input = torch.from_numpy(pts_input).cuda(non_blocking=True).float()
cls_labels = torch.from_numpy(cls_labels).cuda(non_blocking=True).long().view(-1)
pred_cls = model(pts_input)
pred_cls = pred_cls.view(-1)
loss = loss_func(pred_cls, cls_labels)
loss.backward()
clip_grad_norm_(model.parameters(), 1.0)
optimizer.step()
total_it += 1
pred_class = (torch.sigmoid(pred_cls) > FG_THRESH)
fg_mask = cls_labels > 0
correct = ((pred_class.long() == cls_labels) & fg_mask).float().sum()
union = fg_mask.sum().float() + (pred_class > 0).sum().float() - correct
iou = correct / torch.clamp(union, min=1.0)
cur_lr = lr_scheduler.get_lr()[0]
tb_log.log_value('learning_rate', cur_lr, epoch)
if tb_log is not None:
tb_log.log_value('train_loss', loss, total_it)
tb_log.log_value('train_fg_iou', iou, total_it)
log_print('training epoch %d: it=%d/%d, total_it=%d, loss=%.5f, fg_iou=%.3f, lr=%f' %
(epoch, it, len(train_loader), total_it, loss.item(), iou.item(), cur_lr), log_f=log_f)
return total_it
def eval_one_epoch(model, eval_loader, epoch, tb_log=None, log_f=None):
model.train()
log_print('===============EVAL EPOCH %d================' % epoch, log_f=log_f)
iou_list = []
for it, batch in enumerate(eval_loader):
pts_input, cls_labels = batch['pts_input'], batch['cls_labels']
pts_input = torch.from_numpy(pts_input).cuda(non_blocking=True).float()
cls_labels = torch.from_numpy(cls_labels).cuda(non_blocking=True).long().view(-1)
pred_cls = model(pts_input)
pred_cls = pred_cls.view(-1)
pred_class = (torch.sigmoid(pred_cls) > FG_THRESH)
fg_mask = cls_labels > 0
correct = ((pred_class.long() == cls_labels) & fg_mask).float().sum()
union = fg_mask.sum().float() + (pred_class > 0).sum().float() - correct
iou = correct / torch.clamp(union, min=1.0)
iou_list.append(iou.item())
log_print('EVAL: it=%d/%d, iou=%.3f' % (it, len(eval_loader), iou), log_f=log_f)
iou_list = np.array(iou_list)
avg_iou = iou_list.mean()
if tb_log is not None:
tb_log.log_value('eval_fg_iou', avg_iou, epoch)
log_print('\nEpoch %d: Average IoU (samples=%d): %.6f' % (epoch, iou_list.__len__(), avg_iou), log_f=log_f)
return avg_iou
def save_checkpoint(model, epoch, ckpt_name):
if isinstance(model, torch.nn.DataParallel):
model_state = model.module.state_dict()
else:
model_state = model.state_dict()
state = {'epoch': epoch, 'model_state': model_state}
ckpt_name = '{}.pth'.format(ckpt_name)
torch.save(state, ckpt_name)
def load_checkpoint(model, filename):
if os.path.isfile(filename):
log_print("==> Loading from checkpoint %s" % filename)
checkpoint = torch.load(filename)
epoch = checkpoint['epoch']
model.load_state_dict(checkpoint['model_state'])
log_print("==> Done")
else:
raise FileNotFoundError
return epoch
def train_and_eval(model, train_loader, eval_loader, tb_log, ckpt_dir, log_f):
model.cuda()
optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
def lr_lbmd(cur_epoch):
cur_decay = 1
for decay_step in args.decay_step_list:
if cur_epoch >= decay_step:
cur_decay = cur_decay * args.lr_decay
return max(cur_decay, args.lr_clip / args.lr)
lr_scheduler = lr_sched.LambdaLR(optimizer, lr_lbmd)
total_it = 0
for epoch in range(1, args.epochs + 1):
lr_scheduler.step(epoch)
total_it = train_one_epoch(model, train_loader, optimizer, epoch, lr_scheduler, total_it, tb_log, log_f)
if epoch % args.ckpt_save_interval == 0:
with torch.no_grad():
avg_iou = eval_one_epoch(model, eval_loader, epoch, tb_log, log_f)
ckpt_name = os.path.join(ckpt_dir, 'checkpoint_epoch_%d' % epoch)
save_checkpoint(model, epoch, ckpt_name)
if __name__ == '__main__':
MODEL = importlib.import_module(args.net) # import network module
model = MODEL.get_model(input_channels=0)
eval_set = KittiDataset(root_dir='./data', mode='EVAL', split='val')
eval_loader = DataLoader(eval_set, batch_size=args.batch_size, shuffle=False, pin_memory=True,
num_workers=args.workers, collate_fn=eval_set.collate_batch)
if args.mode == 'train':
train_set = KittiDataset(root_dir='./data', mode='TRAIN', split='train')
train_loader = DataLoader(train_set, batch_size=args.batch_size, shuffle=True, pin_memory=True,
num_workers=args.workers, collate_fn=train_set.collate_batch)
# output dir config
output_dir = os.path.join(args.output_dir, args.extra_tag)
os.makedirs(output_dir, exist_ok=True)
tb_log.configure(os.path.join(output_dir, 'tensorboard'))
ckpt_dir = os.path.join(output_dir, 'ckpt')
os.makedirs(ckpt_dir, exist_ok=True)
log_file = os.path.join(output_dir, 'log.txt')
log_f = open(log_file, 'w')
for key, val in vars(args).items():
log_print("{:16} {}".format(key, val), log_f=log_f)
# train and eval
train_and_eval(model, train_loader, eval_loader, tb_log, ckpt_dir, log_f)
log_f.close()
elif args.mode == 'eval':
epoch = load_checkpoint(model, args.ckpt)
model.cuda()
with torch.no_grad():
avg_iou = eval_one_epoch(model, eval_loader, epoch)
else:
raise NotImplementedError

View File

@@ -0,0 +1,119 @@
import torch
import torch.nn as nn
import os
import sys
path = os.path.abspath(__file__)
for i in range(2):
path = os.path.dirname(path)
PROJECT_ROOT = path
sys.path.append(PROJECT_ROOT)
from modules.module_lib.pointnet2_utils.pointnet2.pointnet2_modules import PointnetSAModuleMSG
ClsMSG_CFG_Dense = {
'NPOINTS': [512, 256, 128, None],
'RADIUS': [[0.02, 0.04], [0.04, 0.08], [0.08, 0.16], [None, None]],
'NSAMPLE': [[32, 64], [16, 32], [8, 16], [None, None]],
'MLPS': [[[16, 16, 32], [32, 32, 64]],
[[64, 64, 128], [64, 96, 128]],
[[128, 196, 256], [128, 196, 256]],
[[256, 256, 512], [256, 384, 512]]],
'DP_RATIO': 0.5,
}
ClsMSG_CFG_Light = {
'NPOINTS': [512, 256, 128, None],
'RADIUS': [[0.02, 0.04], [0.04, 0.08], [0.08, 0.16], [None, None]],
'NSAMPLE': [[16, 32], [16, 32], [16, 32], [None, None]],
'MLPS': [[[16, 16, 32], [32, 32, 64]],
[[64, 64, 128], [64, 96, 128]],
[[128, 196, 256], [128, 196, 256]],
[[256, 256, 512], [256, 384, 512]]],
'DP_RATIO': 0.5,
}
ClsMSG_CFG_Lighter = {
'NPOINTS': [512, 256, 128, 64, None],
'RADIUS': [[0.01], [0.02], [0.04], [0.08], [None]],
'NSAMPLE': [[64], [32], [16], [8], [None]],
'MLPS': [[[32, 32, 64]],
[[64, 64, 128]],
[[128, 196, 256]],
[[256, 256, 512]],
[[512, 512, 1024]]],
'DP_RATIO': 0.5,
}
def select_params(name):
if name == 'light':
return ClsMSG_CFG_Light
elif name == 'lighter':
return ClsMSG_CFG_Lighter
elif name == 'dense':
return ClsMSG_CFG_Dense
else:
raise NotImplementedError
def break_up_pc(pc):
xyz = pc[..., 0:3].contiguous()
features = (
pc[..., 3:].transpose(1, 2).contiguous()
if pc.size(-1) > 3 else None
)
return xyz, features
class PointNet2Encoder(nn.Module):
def encode_points(self, pts):
return self.forward(pts)
def __init__(self, config:dict):
super().__init__()
input_channels = config.get("in_dim", 0)
params_name = config.get("params_name", "light")
self.SA_modules = nn.ModuleList()
channel_in = input_channels
selected_params = select_params(params_name)
for k in range(selected_params['NPOINTS'].__len__()):
mlps = selected_params['MLPS'][k].copy()
channel_out = 0
for idx in range(mlps.__len__()):
mlps[idx] = [channel_in] + mlps[idx]
channel_out += mlps[idx][-1]
self.SA_modules.append(
PointnetSAModuleMSG(
npoint=selected_params['NPOINTS'][k],
radii=selected_params['RADIUS'][k],
nsamples=selected_params['NSAMPLE'][k],
mlps=mlps,
use_xyz=True,
bn=True
)
)
channel_in = channel_out
def forward(self, point_cloud: torch.cuda.FloatTensor):
xyz, features = break_up_pc(point_cloud)
l_xyz, l_features = [xyz], [features]
for i in range(len(self.SA_modules)):
li_xyz, li_features = self.SA_modules[i](l_xyz[i], l_features[i])
l_xyz.append(li_xyz)
l_features.append(li_features)
return l_features[-1].squeeze(-1)
if __name__ == '__main__':
seed = 100
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
net = PointNet2Encoder(config={"in_dim": 0, "params_name": "light"}).cuda()
pts = torch.randn(2, 1024, 3).cuda()
print(torch.mean(pts, dim=1))
pre = net.encode_points(pts)
print(pre.shape)

View File

@@ -164,10 +164,10 @@ def save_scene_data(root, scene, scene_idx=0, scene_total=1,file_type="txt"):
if __name__ == "__main__":
#root = "/media/hofee/repository/new_data_with_normal"
root = r"H:\AI\Datasets\nbv_rec_part2"
root = r"/media/hofee/data/results/ycb_view_data"
scene_list = os.listdir(root)
from_idx = 0 # 1000
to_idx = 600 # 1500
to_idx = len(scene_list) # 1500
cnt = 0

View File

@@ -13,7 +13,7 @@ from PytorchBoot.utils import Log
from utils.pts import PtsUtil
@stereotype.runner("inferencer")
@stereotype.runner("inferencer_server")
class InferencerServer(Runner):
def __init__(self, config_path):
super().__init__(config_path)
@@ -24,40 +24,44 @@ class InferencerServer(Runner):
self.pipeline_name = self.config[namespace.Stereotype.PIPELINE]
self.pipeline:torch.nn.Module = ComponentFactory.create(namespace.Stereotype.PIPELINE, self.pipeline_name)
self.pipeline = self.pipeline.to(self.device)
self.pts_num = 8192
self.voxel_size = 0.002
''' Experiment '''
self.load_experiment("nbv_evaluator")
self.load_experiment("inferencer_server")
def get_input_data(self, data):
input_data = {}
scanned_pts = data["scanned_pts"]
scanned_n_to_world_pose_9d = data["scanned_n_to_world_pose_9d"]
combined_scanned_views_pts = np.concatenate(scanned_pts, axis=0)
fps_downsampled_combined_scanned_pts, fps_idx = PtsUtil.fps_downsample_point_cloud(
combined_scanned_views_pts, self.pts_num, require_idx=True
voxel_downsampled_combined_scanned_pts = PtsUtil.voxel_downsample_point_cloud(
combined_scanned_views_pts, self.voxel_size
)
fps_downsampled_combined_scanned_pts, fps_idx = PtsUtil.fps_downsample_point_cloud(
voxel_downsampled_combined_scanned_pts, self.pts_num, require_idx=True
)
combined_scanned_views_pts_mask = np.zeros(len(scanned_pts), dtype=np.uint8)
start_idx = 0
for i in range(len(scanned_pts)):
end_idx = start_idx + len(scanned_pts[i])
combined_scanned_views_pts_mask[start_idx:end_idx] = i
start_idx = end_idx
fps_downsampled_combined_scanned_pts_mask = combined_scanned_views_pts_mask[fps_idx]
input_data["scanned_pts_mask"] = np.asarray(fps_downsampled_combined_scanned_pts_mask, dtype=np.uint8)
input_data["scanned_pts"] = scanned_pts
input_data["scanned_n_to_world_pose_9d"] = np.asarray(scanned_n_to_world_pose_9d, dtype=np.float32)
input_data["combined_scanned_pts"] = np.asarray(fps_downsampled_combined_scanned_pts, dtype=np.float32)
return input_data
def get_result(self, output_data):
estimated_delta_rot_9d = output_data["pred_pose_9d"]
pred_pose_9d = output_data["pred_pose_9d"]
result = {
"estimated_delta_rot_9d": estimated_delta_rot_9d.tolist()
"pred_pose_9d": pred_pose_9d.tolist()
}
return result
def collate_input(self, input_data):
collated_input_data = {}
collated_input_data["scanned_pts"] = [torch.tensor(input_data["scanned_pts"], dtype=torch.float32, device=self.device)]
collated_input_data["scanned_n_to_world_pose_9d"] = [torch.tensor(input_data["scanned_n_to_world_pose_9d"], dtype=torch.float32, device=self.device)]
collated_input_data["combined_scanned_pts"] = torch.tensor(input_data["combined_scanned_pts"], dtype=torch.float32, device=self.device).unsqueeze(0)
return collated_input_data
def run(self):
Log.info("Loading from epoch {}.".format(self.current_epoch))
@@ -65,7 +69,8 @@ class InferencerServer(Runner):
def inference():
data = request.json
input_data = self.get_input_data(data)
output_data = self.pipeline.forward_test(input_data)
collated_input_data = self.collate_input(input_data)
output_data = self.pipeline.forward_test(collated_input_data)
result = self.get_result(output_data)
return jsonify(result)

View File

@@ -19,14 +19,19 @@ from PytorchBoot.dataset import BaseDataset
from PytorchBoot.runners.runner import Runner
from PytorchBoot.utils import Log
from PytorchBoot.status import status_manager
from utils.data_load import DataLoadUtil
@stereotype.runner("inferencer")
class Inferencer(Runner):
def __init__(self, config_path):
super().__init__(config_path)
self.script_path = ConfigManager.get(namespace.Stereotype.RUNNER, "blender_script_path")
self.output_dir = ConfigManager.get(namespace.Stereotype.RUNNER, "output_dir")
self.voxel_size = ConfigManager.get(namespace.Stereotype.RUNNER, "voxel_size")
self.min_new_area = ConfigManager.get(namespace.Stereotype.RUNNER, "min_new_area")
CM = 0.01
self.min_new_pts_num = self.min_new_area * (CM / self.voxel_size) **2
''' Pipeline '''
self.pipeline_name = self.config[namespace.Stereotype.PIPELINE]
self.pipeline:torch.nn.Module = ComponentFactory.create(namespace.Stereotype.PIPELINE, self.pipeline_name)
@@ -34,7 +39,12 @@ class Inferencer(Runner):
''' Experiment '''
self.load_experiment("nbv_evaluator")
self.stat_result = {}
self.stat_result_path = os.path.join(self.output_dir, "stat.json")
if os.path.exists(self.stat_result_path):
with open(self.stat_result_path, "r") as f:
self.stat_result = json.load(f)
else:
self.stat_result = {}
''' Test '''
self.test_config = ConfigManager.get(namespace.Stereotype.RUNNER, namespace.Mode.TEST)
@@ -65,128 +75,163 @@ class Inferencer(Runner):
for dataset_idx, test_set in enumerate(self.test_set_list):
status_manager.set_progress("inference", "inferencer", f"dataset", dataset_idx, len(self.test_set_list))
test_set_name = test_set.get_name()
test_loader = test_set.get_loader()
if test_loader.batch_size > 1:
Log.error("Batch size should be 1 for inference, found {} in {}".format(test_loader.batch_size, test_set_name), terminate=True)
total=int(len(test_loader))
loop = tqdm(enumerate(test_loader), total=total)
for i, data in loop:
status_manager.set_progress("inference", "inferencer", f"Batch[{test_set_name}]", i+1, total)
test_set.process_batch(data, self.device)
output = self.predict_sequence(data)
self.save_inference_result(test_set_name, data["scene_name"][0], output)
total=int(len(test_set))
for i in tqdm(range(total), desc=f"Processing {test_set_name}", ncols=100):
try:
data = test_set.__getitem__(i)
scene_name = data["scene_name"]
inference_result_path = os.path.join(self.output_dir, test_set_name, f"{scene_name}.pkl")
if os.path.exists(inference_result_path):
Log.info(f"Inference result already exists for scene: {scene_name}")
continue
status_manager.set_progress("inference", "inferencer", f"Batch[{test_set_name}]", i+1, total)
output = self.predict_sequence(data)
self.save_inference_result(test_set_name, data["scene_name"], output)
except Exception as e:
Log.error(f"Error in scene {scene_name}, {e}")
continue
status_manager.set_progress("inference", "inferencer", f"dataset", len(self.test_set_list), len(self.test_set_list))
def predict_sequence(self, data, cr_increase_threshold=0, max_iter=50, max_retry=5):
scene_name = data["scene_name"][0]
def predict_sequence(self, data, cr_increase_threshold=0, overlap_area_threshold=25, scan_points_threshold=10, max_iter=50, max_retry = 10, max_success=3):
scene_name = data["scene_name"]
Log.info(f"Processing scene: {scene_name}")
status_manager.set_status("inference", "inferencer", "scene", scene_name)
''' data for rendering '''
scene_path = data["scene_path"][0]
O_to_L_pose = data["O_to_L_pose"][0]
voxel_threshold = data["voxel_threshold"][0]
filter_degree = data["filter_degree"][0]
model_points_normals = data["model_points_normals"][0]
model_pts = model_points_normals[:,:3]
down_sampled_model_pts = PtsUtil.voxel_downsample_point_cloud(model_pts, voxel_threshold)
first_frame_to_world_9d = data["first_to_world_9d"][0]
first_frame_to_world = torch.eye(4, device=first_frame_to_world_9d.device)
first_frame_to_world[:3,:3] = PoseUtil.rotation_6d_to_matrix_tensor_batch(first_frame_to_world_9d[:,:6])[0]
first_frame_to_world[:3,3] = first_frame_to_world_9d[0,6:]
first_frame_to_world = first_frame_to_world.to(self.device)
scene_path = data["scene_path"]
O_to_L_pose = data["O_to_L_pose"]
voxel_threshold = self.voxel_size
filter_degree = 75
down_sampled_model_pts = data["gt_pts"]
first_frame_to_world_9d = data["first_scanned_n_to_world_pose_9d"][0]
first_frame_to_world = np.eye(4)
first_frame_to_world[:3,:3] = PoseUtil.rotation_6d_to_matrix_numpy(first_frame_to_world_9d[:6])
first_frame_to_world[:3,3] = first_frame_to_world_9d[6:]
''' data for inference '''
input_data = {}
input_data["scanned_pts"] = [data["first_pts"][0].to(self.device)]
input_data["scanned_n_to_world_pose_9d"] = [data["first_to_world_9d"][0].to(self.device)]
input_data["combined_scanned_pts"] = torch.tensor(data["first_scanned_pts"][0], dtype=torch.float32).to(self.device).unsqueeze(0)
input_data["scanned_n_to_world_pose_9d"] = [torch.tensor(data["first_scanned_n_to_world_pose_9d"], dtype=torch.float32).to(self.device)]
input_data["mode"] = namespace.Mode.TEST
input_data["combined_scanned_pts"] = data["combined_scanned_pts"]
input_pts_N = input_data["scanned_pts"][0].shape[1]
input_pts_N = input_data["combined_scanned_pts"].shape[1]
first_frame_target_pts, _ = RenderUtil.render_pts(first_frame_to_world, scene_path, self.script_path, model_points_normals, voxel_threshold=voxel_threshold, filter_degree=filter_degree, nO_to_nL_pose=O_to_L_pose)
scanned_view_pts = [first_frame_target_pts]
last_pred_cr = self.compute_coverage_rate(scanned_view_pts, None, down_sampled_model_pts, threshold=voxel_threshold)
root = os.path.dirname(scene_path)
display_table_info = DataLoadUtil.get_display_table_info(root, scene_name)
radius = display_table_info["radius"]
scan_points = np.asarray(ReconstructionUtil.generate_scan_points(display_table_top=0,display_table_radius=radius))
first_frame_target_pts, first_frame_target_normals, first_frame_scan_points_indices = RenderUtil.render_pts(first_frame_to_world, scene_path, self.script_path, scan_points, voxel_threshold=voxel_threshold, filter_degree=filter_degree, nO_to_nL_pose=O_to_L_pose)
scanned_view_pts = [first_frame_target_pts]
history_indices = [first_frame_scan_points_indices]
last_pred_cr, added_pts_num = self.compute_coverage_rate(scanned_view_pts, None, down_sampled_model_pts, threshold=voxel_threshold)
retry_duplication_pose = []
retry_no_pts_pose = []
retry_overlap_pose = []
retry = 0
pred_cr_seq = [last_pred_cr]
while len(pred_cr_seq) < max_iter and retry < max_retry:
success = 0
last_pts_num = PtsUtil.voxel_downsample_point_cloud(data["first_scanned_pts"][0], voxel_threshold).shape[0]
import time
while len(pred_cr_seq) < max_iter and retry < max_retry and success < max_success:
Log.green(f"iter: {len(pred_cr_seq)}, retry: {retry}/{max_retry}, success: {success}/{max_success}")
combined_scanned_pts = np.vstack(scanned_view_pts)
voxel_downsampled_combined_scanned_pts_np, inverse = self.voxel_downsample_with_mapping(combined_scanned_pts, voxel_threshold)
output = self.pipeline(input_data)
pred_pose_9d = output["pred_pose_9d"]
import ipdb; ipdb.set_trace()
pred_pose = torch.eye(4, device=pred_pose_9d.device)
pred_pose[:3,:3] = PoseUtil.rotation_6d_to_matrix_tensor_batch(pred_pose_9d[:,:6])[0]
pred_pose[:3,3] = pred_pose_9d[0,6:]
try:
new_target_pts_world, new_pts_world = RenderUtil.render_pts(pred_pose, scene_path, self.script_path, model_points_normals, voxel_threshold=voxel_threshold, filter_degree=filter_degree, nO_to_nL_pose=O_to_L_pose, require_full_scene=True)
new_target_pts, new_target_normals, new_scan_points_indices = RenderUtil.render_pts(pred_pose, scene_path, self.script_path, scan_points, voxel_threshold=voxel_threshold, filter_degree=filter_degree, nO_to_nL_pose=O_to_L_pose)
#import ipdb; ipdb.set_trace()
if not ReconstructionUtil.check_scan_points_overlap(history_indices, new_scan_points_indices, scan_points_threshold):
curr_overlap_area_threshold = overlap_area_threshold
else:
curr_overlap_area_threshold = overlap_area_threshold * 0.5
downsampled_new_target_pts = PtsUtil.voxel_downsample_point_cloud(new_target_pts, voxel_threshold)
overlap, _ = ReconstructionUtil.check_overlap(downsampled_new_target_pts, voxel_downsampled_combined_scanned_pts_np, overlap_area_threshold = curr_overlap_area_threshold, voxel_size=voxel_threshold, require_new_added_pts_num = True)
if not overlap:
Log.yellow("no overlap!")
retry += 1
retry_overlap_pose.append(pred_pose.cpu().numpy().tolist())
continue
history_indices.append(new_scan_points_indices)
except Exception as e:
Log.warning(f"Error in scene {scene_path}, {e}")
Log.error(f"Error in scene {scene_path}, {e}")
print("current pose: ", pred_pose)
print("curr_pred_cr: ", last_pred_cr)
retry_no_pts_pose.append(pred_pose.cpu().numpy().tolist())
retry += 1
continue
pred_cr = self.compute_coverage_rate(scanned_view_pts, new_target_pts_world, down_sampled_model_pts, threshold=voxel_threshold)
print(pred_cr, last_pred_cr, " max: ", data["max_coverage_rate"])
if pred_cr >= data["max_coverage_rate"]:
print("max coverage rate reached!")
if pred_cr <= last_pred_cr + cr_increase_threshold:
if new_target_pts.shape[0] == 0:
Log.red("no pts in new target")
retry_no_pts_pose.append(pred_pose.cpu().numpy().tolist())
retry += 1
retry_duplication_pose.append(pred_pose.cpu().numpy().tolist())
continue
retry = 0
pred_cr_seq.append(pred_cr)
scanned_view_pts.append(new_target_pts_world)
down_sampled_new_pts_world = PtsUtil.random_downsample_point_cloud(new_pts_world, input_pts_N)
new_pts_world_aug = np.hstack([down_sampled_new_pts_world, np.ones((down_sampled_new_pts_world.shape[0], 1))])
new_pts = np.dot(np.linalg.inv(first_frame_to_world.cpu()), new_pts_world_aug.T).T[:,:3]
new_pts_tensor = torch.tensor(new_pts, dtype=torch.float32).unsqueeze(0).to(self.device)
input_data["scanned_pts"] = [torch.cat([input_data["scanned_pts"][0] , new_pts_tensor], dim=0)]
pred_cr, _ = self.compute_coverage_rate(scanned_view_pts, new_target_pts, down_sampled_model_pts, threshold=voxel_threshold)
Log.yellow(f"{pred_cr}, {last_pred_cr}, max: , {data['seq_max_coverage_rate']}")
if pred_cr >= data["seq_max_coverage_rate"] - 1e-3:
print("max coverage rate reached!: ", pred_cr)
pred_cr_seq.append(pred_cr)
scanned_view_pts.append(new_target_pts)
input_data["scanned_n_to_world_pose_9d"] = [torch.cat([input_data["scanned_n_to_world_pose_9d"][0], pred_pose_9d], dim=0)]
combined_scanned_views_pts = np.concatenate(input_data["scanned_pts"][0].tolist(), axis=0)
voxel_downsampled_combined_scanned_pts_np = PtsUtil.voxel_downsample_point_cloud(combined_scanned_views_pts, 0.002)
combined_scanned_pts = np.vstack(scanned_view_pts)
voxel_downsampled_combined_scanned_pts_np = PtsUtil.voxel_downsample_point_cloud(combined_scanned_pts, voxel_threshold)
random_downsampled_combined_scanned_pts_np = PtsUtil.random_downsample_point_cloud(voxel_downsampled_combined_scanned_pts_np, input_pts_N)
input_data["combined_scanned_pts"] = torch.tensor(random_downsampled_combined_scanned_pts_np, dtype=torch.float32).unsqueeze(0).to(self.device)
last_pred_cr = pred_cr
pts_num = voxel_downsampled_combined_scanned_pts_np.shape[0]
Log.info(f"delta pts num:,{pts_num - last_pts_num },{pts_num}, {last_pts_num}")
if pts_num - last_pts_num < self.min_new_pts_num and pred_cr <= data["seq_max_coverage_rate"] - 1e-2:
retry += 1
retry_duplication_pose.append(pred_pose.cpu().numpy().tolist())
Log.red(f"delta pts num < {self.min_new_pts_num}:, {pts_num}, {last_pts_num}")
elif pts_num - last_pts_num < self.min_new_pts_num and pred_cr > data["seq_max_coverage_rate"] - 1e-2:
success += 1
Log.success(f"delta pts num < {self.min_new_pts_num}:, {pts_num}, {last_pts_num}")
last_pts_num = pts_num
input_data["scanned_pts"] = input_data["scanned_pts"][0].cpu().numpy().tolist()
input_data["scanned_n_to_world_pose_9d"] = input_data["scanned_n_to_world_pose_9d"][0].cpu().numpy().tolist()
result = {
"pred_pose_9d_seq": input_data["scanned_n_to_world_pose_9d"],
"pts_seq": input_data["scanned_pts"],
"combined_scanned_pts": input_data["combined_scanned_pts"],
"target_pts_seq": scanned_view_pts,
"coverage_rate_seq": pred_cr_seq,
"max_coverage_rate": data["max_coverage_rate"][0],
"max_coverage_rate": data["seq_max_coverage_rate"],
"pred_max_coverage_rate": max(pred_cr_seq),
"scene_name": scene_name,
"retry_no_pts_pose": retry_no_pts_pose,
"retry_duplication_pose": retry_duplication_pose,
"best_seq_len": data["best_seq_len"][0],
"retry_overlap_pose": retry_overlap_pose,
"best_seq_len": data["best_seq_len"],
}
self.stat_result[scene_name] = {
"max_coverage_rate": data["max_coverage_rate"][0],
"success_rate": max(pred_cr_seq)/ data["max_coverage_rate"][0],
"coverage_rate_seq": pred_cr_seq,
"pred_max_coverage_rate": max(pred_cr_seq),
"pred_seq_len": len(pred_cr_seq),
}
print('success rate: ', max(pred_cr_seq) / data["max_coverage_rate"][0])
print('success rate: ', max(pred_cr_seq))
return result
@@ -198,7 +243,14 @@ class Inferencer(Runner):
combined_point_cloud = np.vstack(new_scanned_view_pts)
down_sampled_combined_point_cloud = PtsUtil.voxel_downsample_point_cloud(combined_point_cloud,threshold)
return ReconstructionUtil.compute_coverage_rate(model_pts, down_sampled_combined_point_cloud, threshold)
def voxel_downsample_with_mapping(self, point_cloud, voxel_size=0.003):
voxel_indices = np.floor(point_cloud / voxel_size).astype(np.int32)
unique_voxels, inverse, counts = np.unique(voxel_indices, axis=0, return_inverse=True, return_counts=True)
idx_sort = np.argsort(inverse)
idx_unique = idx_sort[np.cumsum(counts)-counts]
downsampled_points = point_cloud[idx_unique]
return downsampled_points, inverse
def save_inference_result(self, dataset_name, scene_name, output):
dataset_dir = os.path.join(self.output_dir, dataset_name)
@@ -206,7 +258,7 @@ class Inferencer(Runner):
os.makedirs(dataset_dir)
output_path = os.path.join(dataset_dir, f"{scene_name}.pkl")
pickle.dump(output, open(output_path, "wb"))
with open(os.path.join(dataset_dir, "stat.json"), "w") as f:
with open(self.stat_result_path, "w") as f:
json.dump(self.stat_result, f)

View File

@@ -24,8 +24,6 @@ class DataLoadUtil:
for channel in float_channels:
channel_data = exr_file.channel(channel)
img_data.append(np.frombuffer(channel_data, dtype=np.float16).reshape((height, width)))
# 将各通道组合成一个 (height, width, 3) 的 RGB 图像
img = np.stack(img_data, axis=-1)
return img

View File

@@ -16,6 +16,17 @@ class PtsUtil:
else:
unique_voxels = np.unique(voxel_indices, axis=0, return_inverse=True)
return unique_voxels[0]*voxel_size
@staticmethod
def voxel_downsample_point_cloud_random(point_cloud, voxel_size=0.005, require_idx=False):
voxel_indices = np.floor(point_cloud / voxel_size).astype(np.int32)
unique_voxels, inverse, counts = np.unique(voxel_indices, axis=0, return_inverse=True, return_counts=True)
idx_sort = np.argsort(inverse)
idx_unique = idx_sort[np.cumsum(counts)-counts]
downsampled_points = point_cloud[idx_unique]
if require_idx:
return downsampled_points, inverse
return downsampled_points
@staticmethod
def random_downsample_point_cloud(point_cloud, num_points, require_idx=False):

View File

@@ -32,13 +32,15 @@ class ReconstructionUtil:
@staticmethod
def check_overlap(new_point_cloud, combined_point_cloud, overlap_area_threshold=25, voxel_size=0.01):
def check_overlap(new_point_cloud, combined_point_cloud, overlap_area_threshold=25, voxel_size=0.01, require_new_added_pts_num=False):
kdtree = cKDTree(combined_point_cloud)
distances, _ = kdtree.query(new_point_cloud)
overlapping_points = np.sum(distances < voxel_size*2)
overlapping_points_num = np.sum(distances < voxel_size*2)
cm = 0.01
voxel_size_cm = voxel_size / cm
overlap_area = overlapping_points * voxel_size_cm * voxel_size_cm
overlap_area = overlapping_points_num * voxel_size_cm * voxel_size_cm
if require_new_added_pts_num:
return overlap_area > overlap_area_threshold, len(new_point_cloud)-np.sum(distances < voxel_size*1.2)
return overlap_area > overlap_area_threshold

View File

@@ -1,16 +1,75 @@
import os
import json
import time
import subprocess
import tempfile
import shutil
import numpy as np
from utils.data_load import DataLoadUtil
from utils.reconstruction import ReconstructionUtil
from utils.pts import PtsUtil
class RenderUtil:
target_mask_label = (0, 255, 0)
display_table_mask_label = (0, 0, 255)
random_downsample_N = 32768
min_z = 0.2
max_z = 0.5
@staticmethod
def render_pts(cam_pose, scene_path, script_path, model_points_normals, voxel_threshold=0.005, filter_degree=75, nO_to_nL_pose=None, require_full_scene=False):
def get_world_points_and_normal(depth, mask, normal, cam_intrinsic, cam_extrinsic, random_downsample_N):
z = depth[mask]
i, j = np.nonzero(mask)
x = (j - cam_intrinsic[0, 2]) * z / cam_intrinsic[0, 0]
y = (i - cam_intrinsic[1, 2]) * z / cam_intrinsic[1, 1]
points_camera = np.stack((x, y, z), axis=-1).reshape(-1, 3)
normal_camera = normal[mask].reshape(-1, 3)
sampled_target_points, idx = PtsUtil.random_downsample_point_cloud(
points_camera, random_downsample_N, require_idx=True
)
if len(sampled_target_points) == 0:
return np.zeros((0, 3)), np.zeros((0, 3))
sampled_normal_camera = normal_camera[idx]
points_camera_aug = np.concatenate((sampled_target_points, np.ones((sampled_target_points.shape[0], 1))), axis=-1)
points_camera_world = np.dot(cam_extrinsic, points_camera_aug.T).T[:, :3]
return points_camera_world, sampled_normal_camera
@staticmethod
def get_world_points(depth, mask, cam_intrinsic, cam_extrinsic, random_downsample_N):
z = depth[mask]
i, j = np.nonzero(mask)
x = (j - cam_intrinsic[0, 2]) * z / cam_intrinsic[0, 0]
y = (i - cam_intrinsic[1, 2]) * z / cam_intrinsic[1, 1]
points_camera = np.stack((x, y, z), axis=-1).reshape(-1, 3)
sampled_target_points = PtsUtil.random_downsample_point_cloud(
points_camera, random_downsample_N
)
points_camera_aug = np.concatenate((sampled_target_points, np.ones((sampled_target_points.shape[0], 1))), axis=-1)
points_camera_world = np.dot(cam_extrinsic, points_camera_aug.T).T[:, :3]
return points_camera_world
@staticmethod
def get_scan_points_indices(scan_points, mask, display_table_mask_label, cam_intrinsic, cam_extrinsic):
scan_points_homogeneous = np.hstack((scan_points, np.ones((scan_points.shape[0], 1))))
points_camera = np.dot(np.linalg.inv(cam_extrinsic), scan_points_homogeneous.T).T[:, :3]
points_image_homogeneous = np.dot(cam_intrinsic, points_camera.T).T
points_image_homogeneous /= points_image_homogeneous[:, 2:]
pixel_x = points_image_homogeneous[:, 0].astype(int)
pixel_y = points_image_homogeneous[:, 1].astype(int)
h, w = mask.shape[:2]
valid_indices = (pixel_x >= 0) & (pixel_x < w) & (pixel_y >= 0) & (pixel_y < h)
mask_colors = mask[pixel_y[valid_indices], pixel_x[valid_indices]]
selected_points_indices = np.where((mask_colors == display_table_mask_label).all(axis=-1))[0]
selected_points_indices = np.where(valid_indices)[0][selected_points_indices]
return selected_points_indices
@staticmethod
def render_pts(cam_pose, scene_path, script_path, scan_points, voxel_threshold=0.005, filter_degree=75, nO_to_nL_pose=None, require_full_scene=False):
nO_to_world_pose = DataLoadUtil.get_real_cam_O_from_cam_L(cam_pose, nO_to_nL_pose, scene_path=scene_path)
@@ -24,29 +83,53 @@ class RenderUtil:
shutil.copy(scene_info_path, os.path.join(temp_dir, "scene_info.json"))
params_data_path = os.path.join(temp_dir, "params.json")
with open(params_data_path, 'w') as f:
json.dump(params, f)
json.dump(params, f)
result = subprocess.run([
'blender', '-b', '-P', script_path, '--', temp_dir
'/home/hofee/blender-4.0.2-linux-x64/blender', '-b', '-P', script_path, '--', temp_dir
], capture_output=True, text=True)
if result.returncode != 0:
print("Blender script failed:")
print(result.stderr)
return None
#print(result)
path = os.path.join(temp_dir, "tmp")
point_cloud = DataLoadUtil.get_target_point_cloud_world_from_path(path, binocular=True)
cam_params = DataLoadUtil.load_cam_info(path, binocular=True)
''' TODO: old code: filter_points api is changed, need to update the code '''
filtered_point_cloud = PtsUtil.filter_points(point_cloud, model_points_normals, cam_pose=cam_params["cam_to_world"], voxel_size=voxel_threshold, theta=filter_degree)
full_scene_point_cloud = None
if require_full_scene:
depth_L, depth_R = DataLoadUtil.load_depth(path, cam_params['near_plane'], cam_params['far_plane'], binocular=True)
point_cloud_L = DataLoadUtil.get_point_cloud(depth_L, cam_params['cam_intrinsic'], cam_params['cam_to_world'])['points_world']
point_cloud_R = DataLoadUtil.get_point_cloud(depth_R, cam_params['cam_intrinsic'], cam_params['cam_to_world_R'])['points_world']
point_cloud_L = PtsUtil.random_downsample_point_cloud(point_cloud_L, 65536)
point_cloud_R = PtsUtil.random_downsample_point_cloud(point_cloud_R, 65536)
full_scene_point_cloud = PtsUtil.get_overlapping_points(point_cloud_L, point_cloud_R)
cam_info = DataLoadUtil.load_cam_info(path, binocular=True)
depth_L, depth_R = DataLoadUtil.load_depth(
path, cam_info["near_plane"],
cam_info["far_plane"],
binocular=True
)
mask_L, mask_R = DataLoadUtil.load_seg(path, binocular=True)
normal_L = DataLoadUtil.load_normal(path, binocular=True, left_only=True)
''' target points '''
mask_img_L = mask_L
mask_img_R = mask_R
target_mask_img_L = (mask_L == RenderUtil.target_mask_label).all(axis=-1)
target_mask_img_R = (mask_R == RenderUtil.target_mask_label).all(axis=-1)
return filtered_point_cloud, full_scene_point_cloud
sampled_target_points_L, sampled_target_normal_L = RenderUtil.get_world_points_and_normal(depth_L,target_mask_img_L,normal_L, cam_info["cam_intrinsic"], cam_info["cam_to_world"], RenderUtil.random_downsample_N)
sampled_target_points_R = RenderUtil.get_world_points(depth_R, target_mask_img_R, cam_info["cam_intrinsic"], cam_info["cam_to_world_R"], RenderUtil.random_downsample_N )
has_points = sampled_target_points_L.shape[0] > 0 and sampled_target_points_R.shape[0] > 0
if has_points:
target_points, overlap_idx = PtsUtil.get_overlapping_points(
sampled_target_points_L, sampled_target_points_R, voxel_threshold, require_idx=True
)
sampled_target_normal_L = sampled_target_normal_L[overlap_idx]
if has_points:
has_points = target_points.shape[0] > 0
if has_points:
target_points, target_normals = PtsUtil.filter_points(
target_points, sampled_target_normal_L, cam_info["cam_to_world"], theta_limit = filter_degree, z_range=(RenderUtil.min_z, RenderUtil.max_z)
)
scan_points_indices_L = RenderUtil.get_scan_points_indices(scan_points, mask_img_L, RenderUtil.display_table_mask_label, cam_info["cam_intrinsic"], cam_info["cam_to_world"])
scan_points_indices_R = RenderUtil.get_scan_points_indices(scan_points, mask_img_R, RenderUtil.display_table_mask_label, cam_info["cam_intrinsic"], cam_info["cam_to_world_R"])
scan_points_indices = np.intersect1d(scan_points_indices_L, scan_points_indices_R)
if not has_points:
target_points = np.zeros((0, 3))
target_normals = np.zeros((0, 3))
#import ipdb; ipdb.set_trace()
return target_points, target_normals, scan_points_indices