diff --git a/app_sim.py b/app_sim.py new file mode 100644 index 0000000..feb5687 --- /dev/null +++ b/app_sim.py @@ -0,0 +1,11 @@ +from PytorchBoot.application import PytorchBootApplication +from runners.simulator import Simulator + +@PytorchBootApplication("sim") +class SimulateApp: + @staticmethod + def start(): + simulator = Simulator("configs/local/simulation_config.yaml") + simulator.run("create") + simulator.run("simulate") + \ No newline at end of file diff --git a/beans/predict_result.py b/beans/predict_result.py new file mode 100644 index 0000000..db99270 --- /dev/null +++ b/beans/predict_result.py @@ -0,0 +1,162 @@ +import numpy as np +from sklearn.cluster import DBSCAN + +class PredictResult: + def __init__(self, raw_predict_result, input_pts=None, cluster_params=dict(eps=0.5, min_samples=2)): + self.input_pts = input_pts + self.cluster_params = cluster_params + self.sampled_9d_pose = raw_predict_result + self.sampled_matrix_pose = self.get_sampled_matrix_pose() + self.distance_matrix = self.calculate_distance_matrix() + self.clusters = self.get_cluster_result() + self.candidate_matrix_poses = self.get_candidate_poses() + self.candidate_9d_poses = [np.concatenate((self.matrix_to_rotation_6d_numpy(matrix[:3,:3]), matrix[:3,3].reshape(-1,)), axis=-1) for matrix in self.candidate_matrix_poses] + self.cluster_num = len(self.clusters) + + @staticmethod + def rotation_6d_to_matrix_numpy(d6): + a1, a2 = d6[:3], d6[3:] + b1 = a1 / np.linalg.norm(a1) + b2 = a2 - np.dot(b1, a2) * b1 + b2 = b2 / np.linalg.norm(b2) + b3 = np.cross(b1, b2) + return np.stack((b1, b2, b3), axis=-2) + + @staticmethod + def matrix_to_rotation_6d_numpy(matrix): + return np.copy(matrix[:2, :]).reshape((6,)) + + def __str__(self): + info = "Predict Result:\n" + info += f" Predicted pose number: {len(self.sampled_9d_pose)}\n" + info += f" Cluster number: {self.cluster_num}\n" + for i, cluster in enumerate(self.clusters): + info += f" - Cluster {i} size: {len(cluster)}\n" + max_distance = np.max(self.distance_matrix[self.distance_matrix != 0]) + min_distance = np.min(self.distance_matrix[self.distance_matrix != 0]) + info += f" Max distance: {max_distance}\n" + info += f" Min distance: {min_distance}\n" + return info + + def get_sampled_matrix_pose(self): + sampled_matrix_pose = [] + for pose in self.sampled_9d_pose: + rotation = pose[:6] + translation = pose[6:] + pose = self.rotation_6d_to_matrix_numpy(rotation) + pose = np.concatenate((pose, translation.reshape(-1, 1)), axis=-1) + pose = np.concatenate((pose, np.array([[0, 0, 0, 1]])), axis=-2) + sampled_matrix_pose.append(pose) + return np.array(sampled_matrix_pose) + + def rotation_distance(self, R1, R2): + R = np.dot(R1.T, R2) + trace = np.trace(R) + angle = np.arccos(np.clip((trace - 1) / 2, -1, 1)) + return angle + + def calculate_distance_matrix(self): + n = len(self.sampled_matrix_pose) + dist_matrix = np.zeros((n, n)) + for i in range(n): + for j in range(n): + dist_matrix[i, j] = self.rotation_distance(self.sampled_matrix_pose[i][:3, :3], self.sampled_matrix_pose[j][:3, :3]) + return dist_matrix + + def cluster_rotations(self): + clustering = DBSCAN(eps=self.cluster_params['eps'], min_samples=self.cluster_params['min_samples'], metric='precomputed') + labels = clustering.fit_predict(self.distance_matrix) + return labels + + def get_cluster_result(self): + labels = self.cluster_rotations() + cluster_num = len(set(labels)) - (1 if -1 in labels else 0) + clusters = [] + for _ in range(cluster_num): + clusters.append([]) + for matrix_pose, label in zip(self.sampled_matrix_pose, labels): + if label != -1: + clusters[label].append(matrix_pose) + clusters.sort(key=len, reverse=True) + return clusters + + def get_center_matrix_pose_from_cluster(self, cluster): + min_total_distance = float('inf') + center_matrix_pose = None + + for matrix_pose in cluster: + total_distance = 0 + for other_matrix_pose in cluster: + rot_distance = self.rotation_distance(matrix_pose[:3, :3], other_matrix_pose[:3, :3]) + total_distance += rot_distance + + if total_distance < min_total_distance: + min_total_distance = total_distance + center_matrix_pose = matrix_pose + + return center_matrix_pose + + def get_candidate_poses(self): + candidate_poses = [] + for cluster in self.clusters: + candidate_poses.append(self.get_center_matrix_pose_from_cluster(cluster)) + return candidate_poses + + def visualize(self): + import plotly.graph_objects as go + fig = go.Figure() + if self.input_pts is not None: + fig.add_trace(go.Scatter3d( + x=self.input_pts[:, 0], y=self.input_pts[:, 1], z=self.input_pts[:, 2], + mode='markers', marker=dict(size=1, color='gray', opacity=0.5), name='Input Points' + )) + colors = ['aggrnyl', 'agsunset', 'algae', 'amp', 'armyrose', 'balance', + 'blackbody', 'bluered', 'blues', 'blugrn', 'bluyl', 'brbg'] + for i, cluster in enumerate(self.clusters): + color = colors[i] + candidate_pose = self.candidate_matrix_poses[i] + origin_candidate = candidate_pose[:3, 3] + z_axis_candidate = candidate_pose[:3, 2] + for pose in cluster: + origin = pose[:3, 3] + z_axis = pose[:3, 2] + fig.add_trace(go.Cone( + x=[origin[0]], y=[origin[1]], z=[origin[2]], + u=[z_axis[0]], v=[z_axis[1]], w=[z_axis[2]], + colorscale=color, + sizemode="absolute", sizeref=0.05, anchor="tail", showscale=False + )) + fig.add_trace(go.Cone( + x=[origin_candidate[0]], y=[origin_candidate[1]], z=[origin_candidate[2]], + u=[z_axis_candidate[0]], v=[z_axis_candidate[1]], w=[z_axis_candidate[2]], + colorscale=color, + sizemode="absolute", sizeref=0.1, anchor="tail", showscale=False + )) + + fig.update_layout( + title="Clustered Poses and Input Points", + scene=dict( + xaxis_title='X', + yaxis_title='Y', + zaxis_title='Z' + ), + margin=dict(l=0, r=0, b=0, t=40), + scene_camera=dict(eye=dict(x=1.25, y=1.25, z=1.25)) + ) + + fig.show() + + + +if __name__ == "__main__": + step = 0 + raw_predict_result = np.load(f"inference_result_pack/inference_result_pack/{step}/all_pred_pose_9d.npy") + input_pts = np.loadtxt(f"inference_result_pack/inference_result_pack/{step}/input_pts.txt") + print(raw_predict_result.shape) + predict_result = PredictResult(raw_predict_result, input_pts, cluster_params=dict(eps=0.25, min_samples=3)) + print(predict_result) + print(len(predict_result.candidate_matrix_poses)) + print(predict_result.distance_matrix) + #import ipdb; ipdb.set_trace() + predict_result.visualize() + diff --git a/configs/local/inference_config.yaml b/configs/local/inference_config.yaml index e43b478..bd13482 100644 --- a/configs/local/inference_config.yaml +++ b/configs/local/inference_config.yaml @@ -6,16 +6,16 @@ runner: cuda_visible_devices: "0,1,2,3,4,5,6,7" experiment: - name: train_ab_partial + name: train_ab_global_only_p++_wp root_dir: "experiments" - epoch: -1 # -1 stands for last epoch + epoch: 922 # -1 stands for last epoch test: dataset_list: - OmniObject3d_test blender_script_path: "/media/hofee/data/project/python/nbv_reconstruction/blender/data_renderer.py" - output_dir: "/media/hofee/data/data/new_partial_inference_test_output" + output_dir: "/media/hofee/data/data/p++_wp" pipeline: nbv_reconstruction_pipeline voxel_size: 0.003 min_new_area: 1.0 @@ -52,7 +52,7 @@ dataset: pipeline: nbv_reconstruction_pipeline: modules: - pts_encoder: pointnet_encoder + pts_encoder: pointnet++_encoder seq_encoder: transformer_seq_encoder pose_encoder: pose_encoder view_finder: gf_view_finder @@ -60,6 +60,10 @@ pipeline: global_scanned_feat: True module: + pointnet++_encoder: + in_dim: 3 + params_name: light + pointnet_encoder: in_dim: 3 out_dim: 1024 diff --git a/configs/local/simulation_config.yaml b/configs/local/simulation_config.yaml new file mode 100644 index 0000000..12fe216 --- /dev/null +++ b/configs/local/simulation_config.yaml @@ -0,0 +1,36 @@ +runner: + general: + seed: 0 + device: cuda + cuda_visible_devices: "0,1,2,3,4,5,6,7" + + experiment: + name: simulation_debug + root_dir: "experiments" + +simulation: + robot: + urdf_path: "assets/franka_panda/panda.urdf" + initial_position: [0, 0, 0] # 机械臂基座位置 + initial_orientation: [0, 0, 0] # 机械臂基座朝向(欧拉角) + + turntable: + radius: 0.3 # 转盘半径(米) + height: 0.1 # 转盘高度 + center_position: [0.8, 0, 0.4] + + target: + obj_dir: /media/hofee/data/project/python/nbv_reconstruction/nbv_reconstruction/assets/object_meshes + obj_name: "google_scan-box_0185" + scale: 1.0 # 缩放系数 + mass: 0.1 # 质量(kg) + rgba_color: [0.8, 0.8, 0.8, 1.0] # 目标物体颜色 + + camera: + width: 640 + height: 480 + fov: 40 + near: 0.01 + far: 5.0 + +displaytable: \ No newline at end of file diff --git a/configs/local/strategy_generate_config.yaml b/configs/local/strategy_generate_config.yaml index e5178a3..1759346 100644 --- a/configs/local/strategy_generate_config.yaml +++ b/configs/local/strategy_generate_config.yaml @@ -15,13 +15,13 @@ runner: overlap_area_threshold: 30 compute_with_normal: False scan_points_threshold: 10 - overwrite: False + overwrite: False seq_num: 10 dataset_list: - OmniObject3d datasets: OmniObject3d: - root_dir: /data/hofee/nbv_rec_part2_preprocessed - from: 155 - to: 165 # ..-1 means end + root_dir: /media/hofee/data/data/test_bottle/view + from: 0 + to: -1 # ..-1 means end diff --git a/configs/local/view_generate_config.yaml b/configs/local/view_generate_config.yaml index 4396de7..5d7c653 100644 --- a/configs/local/view_generate_config.yaml +++ b/configs/local/view_generate_config.yaml @@ -8,16 +8,16 @@ runner: root_dir: experiments generate: port: 5002 - from: 1 + from: 0 to: 50 # -1 means all - object_dir: C:\\Document\\Datasets\\scaled_object_meshes - table_model_path: C:\\Document\\Datasets\\table.obj - output_dir: C:\\Document\\Datasets\\debug_generate_view + object_dir: /media/hofee/data/data/test_bottle/bottle_mesh + table_model_path: /media/hofee/data/data/others/table.obj + output_dir: /media/hofee/data/data/test_bottle/view binocular_vision: true plane_size: 10 max_views: 512 min_views: 128 - random_view_ratio: 0.02 + random_view_ratio: 0.002 min_cam_table_included_degree: 20 max_diag: 0.7 min_diag: 0.01 @@ -34,7 +34,7 @@ runner: max_y: 0.05 min_z: 0.01 max_z: 0.01 - random_rotation_ratio: 0.3 + random_rotation_ratio: 0.0 random_objects: num: 4 cluster: 0.9 diff --git a/configs/server/server_train_config.yaml b/configs/server/server_train_config.yaml index 18a8163..79ea4a1 100644 --- a/configs/server/server_train_config.yaml +++ b/configs/server/server_train_config.yaml @@ -7,19 +7,19 @@ runner: parallel: False experiment: - name: train_ab_global_only + name: train_ab_global_only_with_wp_p++_strong root_dir: "experiments" - use_checkpoint: True + use_checkpoint: False epoch: -1 # -1 stands for last epoch max_epochs: 5000 save_checkpoint_interval: 1 - test_first: True + test_first: False train: optimizer: type: Adam lr: 0.0001 - losses: + losses: - gf_loss dataset: OmniObject3d_train test: @@ -39,7 +39,7 @@ dataset: type: train cache: True ratio: 1 - batch_size: 80 + batch_size: 64 num_workers: 128 pts_num: 8192 load_from_preprocess: True @@ -80,7 +80,7 @@ dataset: pipeline: nbv_reconstruction_pipeline: modules: - pts_encoder: pointnet_encoder + pts_encoder: pointnet++_encoder seq_encoder: transformer_seq_encoder pose_encoder: pose_encoder view_finder: gf_view_finder @@ -96,6 +96,10 @@ module: global_feat: True feature_transform: False + pointnet++_encoder: + in_dim: 3 + params_name: strong + transformer_seq_encoder: embed_dim: 256 num_heads: 4 @@ -106,7 +110,7 @@ module: gf_view_finder: t_feat_dim: 128 pose_feat_dim: 256 - main_feat_dim: 2048 + main_feat_dim: 5120 regression_head: Rx_Ry_and_T pose_mode: rot_matrix per_point_feature: False diff --git a/core/nbv_dataset.py b/core/nbv_dataset.py index ca9c0c7..6583e5b 100644 --- a/core/nbv_dataset.py +++ b/core/nbv_dataset.py @@ -7,6 +7,7 @@ from PytorchBoot.utils.log_util import Log import torch import os import sys +import time sys.path.append(r"/data/hofee/project/nbv_rec/nbv_reconstruction") @@ -114,8 +115,13 @@ class NBVReconstructionDataset(BaseDataset): except Exception as e: Log.error(f"Save cache failed: {e}") - def voxel_downsample_with_mask(self, pts, voxel_size): - pass + def voxel_downsample_with_mapping(self, point_cloud, voxel_size=0.003): + voxel_indices = np.floor(point_cloud / voxel_size).astype(np.int32) + unique_voxels, inverse, counts = np.unique(voxel_indices, axis=0, return_inverse=True, return_counts=True) + idx_sort = np.argsort(inverse) + idx_unique = idx_sort[np.cumsum(counts)-counts] + downsampled_points = point_cloud[idx_unique] + return downsampled_points, inverse def __getitem__(self, index): @@ -129,6 +135,9 @@ class NBVReconstructionDataset(BaseDataset): scanned_coverages_rate, scanned_n_to_world_pose, ) = ([], [], []) + start_time = time.time() + start_indices = [0] + total_points = 0 for view in scanned_views: frame_idx = view[0] coverage_rate = view[1] @@ -150,8 +159,12 @@ class NBVReconstructionDataset(BaseDataset): n_to_world_trans = n_to_world_pose[:3, 3] n_to_world_9d = np.concatenate([n_to_world_6d, n_to_world_trans], axis=0) scanned_n_to_world_pose.append(n_to_world_9d) + total_points += len(downsampled_target_point_cloud) + start_indices.append(total_points) + end_time = time.time() + #Log.info(f"load data time: {end_time - start_time}") nbv_idx, nbv_coverage_rate = nbv[0], nbv[1] nbv_path = DataLoadUtil.get_path(self.root_dir, scene_name, nbv_idx) cam_info = DataLoadUtil.load_cam_info(nbv_path) @@ -164,14 +177,27 @@ class NBVReconstructionDataset(BaseDataset): best_to_world_9d = np.concatenate( [best_to_world_6d, best_to_world_trans], axis=0 ) - - combined_scanned_views_pts = np.concatenate(scanned_views_pts, axis=0) - voxel_downsampled_combined_scanned_pts_np = PtsUtil.voxel_downsample_point_cloud(combined_scanned_views_pts, 0.002) - random_downsampled_combined_scanned_pts_np = PtsUtil.random_downsample_point_cloud(voxel_downsampled_combined_scanned_pts_np, self.pts_num) + combined_scanned_views_pts = np.concatenate(scanned_views_pts, axis=0) + voxel_downsampled_combined_scanned_pts_np, inverse = self.voxel_downsample_with_mapping(combined_scanned_views_pts, 0.003) + random_downsampled_combined_scanned_pts_np, random_downsample_idx = PtsUtil.random_downsample_point_cloud(voxel_downsampled_combined_scanned_pts_np, self.pts_num, require_idx=True) + + all_idx_unique = np.arange(len(voxel_downsampled_combined_scanned_pts_np)) + all_random_downsample_idx = all_idx_unique[random_downsample_idx] + scanned_pts_mask = [] + for idx, start_idx in enumerate(start_indices): + if idx == len(start_indices) - 1: + break + end_idx = start_indices[idx+1] + view_inverse = inverse[start_idx:end_idx] + view_unique_downsampled_idx = np.unique(view_inverse) + view_unique_downsampled_idx_set = set(view_unique_downsampled_idx) + mask = np.array([idx in view_unique_downsampled_idx_set for idx in all_random_downsample_idx]) + scanned_pts_mask.append(mask) data_item = { "scanned_pts": np.asarray(scanned_views_pts, dtype=np.float32), # Ndarray(S x Nv x 3) "combined_scanned_pts": np.asarray(random_downsampled_combined_scanned_pts_np, dtype=np.float32), # Ndarray(N x 3) + "scanned_pts_mask": np.asarray(scanned_pts_mask, dtype=np.bool), # Ndarray(N) "scanned_coverage_rate": scanned_coverages_rate, # List(S): Float, range(0, 1) "scanned_n_to_world_pose_9d": np.asarray(scanned_n_to_world_pose, dtype=np.float32), # Ndarray(S x 9) "best_coverage_rate": nbv_coverage_rate, # Float, range(0, 1) @@ -197,7 +223,9 @@ class NBVReconstructionDataset(BaseDataset): collate_data["scanned_n_to_world_pose_9d"] = [ torch.tensor(item["scanned_n_to_world_pose_9d"]) for item in batch ] - + collate_data["scanned_pts_mask"] = [ + torch.tensor(item["scanned_pts_mask"]) for item in batch + ] ''' ------ Fixed Length ------ ''' collate_data["best_to_world_pose_9d"] = torch.stack( @@ -206,12 +234,14 @@ class NBVReconstructionDataset(BaseDataset): collate_data["combined_scanned_pts"] = torch.stack( [torch.tensor(item["combined_scanned_pts"]) for item in batch] ) + for key in batch[0].keys(): if key not in [ "scanned_pts", "scanned_n_to_world_pose_9d", "best_to_world_pose_9d", "combined_scanned_pts", + "scanned_pts_mask", ]: collate_data[key] = [item[key] for item in batch] return collate_data @@ -227,9 +257,9 @@ if __name__ == "__main__": torch.manual_seed(seed) np.random.seed(seed) config = { - "root_dir": "/data/hofee/data/packed_preprocessed_data", + "root_dir": "/data/hofee/nbv_rec_part2_preprocessed", "source": "nbv_reconstruction_dataset", - "split_file": "/data/hofee/data/OmniObject3d_train.txt", + "split_file": "/data/hofee/data/sample.txt", "load_from_preprocess": True, "ratio": 0.5, "batch_size": 2, diff --git a/core/pipeline.py b/core/pipeline.py index 8bdb265..8996dc4 100644 --- a/core/pipeline.py +++ b/core/pipeline.py @@ -75,6 +75,8 @@ class NBVReconstructionPipeline(nn.Module): def forward_test(self, data): main_feat = self.get_main_feat(data) + repeat_num = data.get("repeat_num", 1) + main_feat = main_feat.repeat(repeat_num, 1) estimated_delta_rot_9d, in_process_sample = self.view_finder.next_best_view( main_feat ) @@ -90,6 +92,8 @@ class NBVReconstructionPipeline(nn.Module): ] # List(B): Tensor(S x 9) scanned_pts_mask_batch = data["scanned_pts_mask"] # List(B): Tensor(S x N) + scanned_pts_mask_batch = data["scanned_pts_mask"] # List(B): Tensor(N) + device = next(self.parameters()).device embedding_list_batch = [] diff --git a/core/seq_dataset.py b/core/seq_dataset.py index e27563b..c7332b2 100644 --- a/core/seq_dataset.py +++ b/core/seq_dataset.py @@ -50,11 +50,12 @@ class SeqReconstructionDataset(BaseDataset): if not os.path.exists(os.path.join(self.root_dir, scene_name)): continue scene_name_list.append(scene_name) - return scene_name_list + return scene_name_list def get_scene_name_list(self): return self.scene_name_list + def get_datalist(self): datalist = [] total = len(self.scene_name_list) @@ -63,11 +64,15 @@ class SeqReconstructionDataset(BaseDataset): scene_max_cr_idx = 0 frame_len = DataLoadUtil.get_scene_seq_length(self.root_dir, scene_name) - for i in range(frame_len): + for i in range(10,frame_len): path = DataLoadUtil.get_path(self.root_dir, scene_name, i) pts = DataLoadUtil.load_from_preprocessed_pts(path, "npy") + print(pts.shape) if pts.shape[0] == 0: continue + else: + break + print(i) datalist.append({ "scene_name": scene_name, "first_frame": i, @@ -179,9 +184,9 @@ if __name__ == "__main__": np.random.seed(seed) config = { - "root_dir": "/media/hofee/data/data/new_testset", + "root_dir": "/media/hofee/data/data/test_bottle/view", "source": "seq_reconstruction_dataset", - "split_file": "/media/hofee/data/data/OmniObject3d_test.txt", + "split_file": "/media/hofee/data/data/test_bottle/test_bottle.txt", "load_from_preprocess": True, "filter_degree": 75, "num_workers": 0, @@ -189,7 +194,7 @@ if __name__ == "__main__": "type": namespace.Mode.TEST, } - output_dir = "/media/hofee/data/data/new_testset_output" + output_dir = "/media/hofee/data/data/test_bottle/preprocessed_dataset" os.makedirs(output_dir, exist_ok=True) ds = SeqReconstructionDataset(config) diff --git a/core/seq_dataset_preprocessed.py b/core/seq_dataset_preprocessed.py index c92b5db..25512b5 100644 --- a/core/seq_dataset_preprocessed.py +++ b/core/seq_dataset_preprocessed.py @@ -66,7 +66,7 @@ if __name__ == "__main__": load_from_preprocess: True ''' config = { - "root_dir": "H:\\AI\\Datasets\\packed_test_data", + "root_dir": "/media/hofee/data/data/test_bottle/preprocessed_dataset", "source": "seq_reconstruction_dataset", "split_file": "H:\\AI\\Datasets\\data_list\\OmniObject3d_test.txt", "load_from_preprocess": True, diff --git a/modules/module_lib/pointnet2_modules.py b/modules/module_lib/pointnet2_modules.py new file mode 100644 index 0000000..4b94326 --- /dev/null +++ b/modules/module_lib/pointnet2_modules.py @@ -0,0 +1,162 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + +from . import pointnet2_utils +from . import pytorch_utils as pt_utils +from typing import List + + +class _PointnetSAModuleBase(nn.Module): + + def __init__(self): + super().__init__() + self.npoint = None + self.groupers = None + self.mlps = None + self.pool_method = 'max_pool' + + def forward(self, xyz: torch.Tensor, features: torch.Tensor = None, new_xyz=None) -> (torch.Tensor, torch.Tensor): + """ + :param xyz: (B, N, 3) tensor of the xyz coordinates of the features + :param features: (B, N, C) tensor of the descriptors of the the features + :param new_xyz: + :return: + new_xyz: (B, npoint, 3) tensor of the new features' xyz + new_features: (B, npoint, \sum_k(mlps[k][-1])) tensor of the new_features descriptors + """ + new_features_list = [] + + xyz_flipped = xyz.transpose(1, 2).contiguous() + if new_xyz is None: + new_xyz = pointnet2_utils.gather_operation( + xyz_flipped, + pointnet2_utils.furthest_point_sample(xyz, self.npoint) + ).transpose(1, 2).contiguous() if self.npoint is not None else None + + for i in range(len(self.groupers)): + new_features = self.groupers[i](xyz, new_xyz, features) # (B, C, npoint, nsample) + + new_features = self.mlps[i](new_features) # (B, mlp[-1], npoint, nsample) + + if self.pool_method == 'max_pool': + new_features = F.max_pool2d( + new_features, kernel_size=[1, new_features.size(3)] + ) # (B, mlp[-1], npoint, 1) + elif self.pool_method == 'avg_pool': + new_features = F.avg_pool2d( + new_features, kernel_size=[1, new_features.size(3)] + ) # (B, mlp[-1], npoint, 1) + else: + raise NotImplementedError + + new_features = new_features.squeeze(-1) # (B, mlp[-1], npoint) + new_features_list.append(new_features) + + return new_xyz, torch.cat(new_features_list, dim=1) + + +class PointnetSAModuleMSG(_PointnetSAModuleBase): + """Pointnet set abstraction layer with multiscale grouping""" + + def __init__(self, *, npoint: int, radii: List[float], nsamples: List[int], mlps: List[List[int]], bn: bool = True, + use_xyz: bool = True, pool_method='max_pool', instance_norm=False): + """ + :param npoint: int + :param radii: list of float, list of radii to group with + :param nsamples: list of int, number of samples in each ball query + :param mlps: list of list of int, spec of the pointnet before the global pooling for each scale + :param bn: whether to use batchnorm + :param use_xyz: + :param pool_method: max_pool / avg_pool + :param instance_norm: whether to use instance_norm + """ + super().__init__() + + assert len(radii) == len(nsamples) == len(mlps) + + self.npoint = npoint + self.groupers = nn.ModuleList() + self.mlps = nn.ModuleList() + for i in range(len(radii)): + radius = radii[i] + nsample = nsamples[i] + self.groupers.append( + pointnet2_utils.QueryAndGroup(radius, nsample, use_xyz=use_xyz) + if npoint is not None else pointnet2_utils.GroupAll(use_xyz) + ) + mlp_spec = mlps[i] + if use_xyz: + mlp_spec[0] += 3 + + self.mlps.append(pt_utils.SharedMLP(mlp_spec, bn=bn, instance_norm=instance_norm)) + self.pool_method = pool_method + + +class PointnetSAModule(PointnetSAModuleMSG): + """Pointnet set abstraction layer""" + + def __init__(self, *, mlp: List[int], npoint: int = None, radius: float = None, nsample: int = None, + bn: bool = True, use_xyz: bool = True, pool_method='max_pool', instance_norm=False): + """ + :param mlp: list of int, spec of the pointnet before the global max_pool + :param npoint: int, number of features + :param radius: float, radius of ball + :param nsample: int, number of samples in the ball query + :param bn: whether to use batchnorm + :param use_xyz: + :param pool_method: max_pool / avg_pool + :param instance_norm: whether to use instance_norm + """ + super().__init__( + mlps=[mlp], npoint=npoint, radii=[radius], nsamples=[nsample], bn=bn, use_xyz=use_xyz, + pool_method=pool_method, instance_norm=instance_norm + ) + + +class PointnetFPModule(nn.Module): + r"""Propigates the features of one set to another""" + + def __init__(self, *, mlp: List[int], bn: bool = True): + """ + :param mlp: list of int + :param bn: whether to use batchnorm + """ + super().__init__() + self.mlp = pt_utils.SharedMLP(mlp, bn=bn) + + def forward( + self, unknown: torch.Tensor, known: torch.Tensor, unknow_feats: torch.Tensor, known_feats: torch.Tensor + ) -> torch.Tensor: + """ + :param unknown: (B, n, 3) tensor of the xyz positions of the unknown features + :param known: (B, m, 3) tensor of the xyz positions of the known features + :param unknow_feats: (B, C1, n) tensor of the features to be propigated to + :param known_feats: (B, C2, m) tensor of features to be propigated + :return: + new_features: (B, mlp[-1], n) tensor of the features of the unknown features + """ + if known is not None: + dist, idx = pointnet2_utils.three_nn(unknown, known) + dist_recip = 1.0 / (dist + 1e-8) + norm = torch.sum(dist_recip, dim=2, keepdim=True) + weight = dist_recip / norm + + interpolated_feats = pointnet2_utils.three_interpolate(known_feats, idx, weight) + else: + interpolated_feats = known_feats.expand(*known_feats.size()[0:2], unknown.size(1)) + + if unknow_feats is not None: + new_features = torch.cat([interpolated_feats, unknow_feats], dim=1) # (B, C2 + C1, n) + else: + new_features = interpolated_feats + + new_features = new_features.unsqueeze(-1) + + new_features = self.mlp(new_features) + + return new_features.squeeze(-1) + + +if __name__ == "__main__": + pass diff --git a/modules/module_lib/pointnet2_utils.py b/modules/module_lib/pointnet2_utils.py new file mode 100644 index 0000000..97a5466 --- /dev/null +++ b/modules/module_lib/pointnet2_utils.py @@ -0,0 +1,291 @@ +import torch +from torch.autograd import Variable +from torch.autograd import Function +import torch.nn as nn +from typing import Tuple +import sys + +import pointnet2_cuda as pointnet2 + + +class FurthestPointSampling(Function): + @staticmethod + def forward(ctx, xyz: torch.Tensor, npoint: int) -> torch.Tensor: + """ + Uses iterative furthest point sampling to select a set of npoint features that have the largest + minimum distance + :param ctx: + :param xyz: (B, N, 3) where N > npoint + :param npoint: int, number of features in the sampled set + :return: + output: (B, npoint) tensor containing the set + """ + assert xyz.is_contiguous() + + B, N, _ = xyz.size() + output = torch.cuda.IntTensor(B, npoint) + temp = torch.cuda.FloatTensor(B, N).fill_(1e10) + + pointnet2.furthest_point_sampling_wrapper(B, N, npoint, xyz, temp, output) + return output + + @staticmethod + def backward(xyz, a=None): + return None, None + + +furthest_point_sample = FurthestPointSampling.apply + + +class GatherOperation(Function): + + @staticmethod + def forward(ctx, features: torch.Tensor, idx: torch.Tensor) -> torch.Tensor: + """ + :param ctx: + :param features: (B, C, N) + :param idx: (B, npoint) index tensor of the features to gather + :return: + output: (B, C, npoint) + """ + assert features.is_contiguous() + assert idx.is_contiguous() + + B, npoint = idx.size() + _, C, N = features.size() + output = torch.cuda.FloatTensor(B, C, npoint) + + pointnet2.gather_points_wrapper(B, C, N, npoint, features, idx, output) + + ctx.for_backwards = (idx, C, N) + return output + + @staticmethod + def backward(ctx, grad_out): + idx, C, N = ctx.for_backwards + B, npoint = idx.size() + + grad_features = Variable(torch.cuda.FloatTensor(B, C, N).zero_()) + grad_out_data = grad_out.data.contiguous() + pointnet2.gather_points_grad_wrapper(B, C, N, npoint, grad_out_data, idx, grad_features.data) + return grad_features, None + + +gather_operation = GatherOperation.apply + + +class ThreeNN(Function): + + @staticmethod + def forward(ctx, unknown: torch.Tensor, known: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: + """ + Find the three nearest neighbors of unknown in known + :param ctx: + :param unknown: (B, N, 3) + :param known: (B, M, 3) + :return: + dist: (B, N, 3) l2 distance to the three nearest neighbors + idx: (B, N, 3) index of 3 nearest neighbors + """ + assert unknown.is_contiguous() + assert known.is_contiguous() + + B, N, _ = unknown.size() + m = known.size(1) + dist2 = torch.cuda.FloatTensor(B, N, 3) + idx = torch.cuda.IntTensor(B, N, 3) + + pointnet2.three_nn_wrapper(B, N, m, unknown, known, dist2, idx) + return torch.sqrt(dist2), idx + + @staticmethod + def backward(ctx, a=None, b=None): + return None, None + + +three_nn = ThreeNN.apply + + +class ThreeInterpolate(Function): + + @staticmethod + def forward(ctx, features: torch.Tensor, idx: torch.Tensor, weight: torch.Tensor) -> torch.Tensor: + """ + Performs weight linear interpolation on 3 features + :param ctx: + :param features: (B, C, M) Features descriptors to be interpolated from + :param idx: (B, n, 3) three nearest neighbors of the target features in features + :param weight: (B, n, 3) weights + :return: + output: (B, C, N) tensor of the interpolated features + """ + assert features.is_contiguous() + assert idx.is_contiguous() + assert weight.is_contiguous() + + B, c, m = features.size() + n = idx.size(1) + ctx.three_interpolate_for_backward = (idx, weight, m) + output = torch.cuda.FloatTensor(B, c, n) + + pointnet2.three_interpolate_wrapper(B, c, m, n, features, idx, weight, output) + return output + + @staticmethod + def backward(ctx, grad_out: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: + """ + :param ctx: + :param grad_out: (B, C, N) tensor with gradients of outputs + :return: + grad_features: (B, C, M) tensor with gradients of features + None: + None: + """ + idx, weight, m = ctx.three_interpolate_for_backward + B, c, n = grad_out.size() + + grad_features = Variable(torch.cuda.FloatTensor(B, c, m).zero_()) + grad_out_data = grad_out.data.contiguous() + + pointnet2.three_interpolate_grad_wrapper(B, c, n, m, grad_out_data, idx, weight, grad_features.data) + return grad_features, None, None + + +three_interpolate = ThreeInterpolate.apply + + +class GroupingOperation(Function): + + @staticmethod + def forward(ctx, features: torch.Tensor, idx: torch.Tensor) -> torch.Tensor: + """ + :param ctx: + :param features: (B, C, N) tensor of features to group + :param idx: (B, npoint, nsample) tensor containing the indicies of features to group with + :return: + output: (B, C, npoint, nsample) tensor + """ + assert features.is_contiguous() + assert idx.is_contiguous() + + B, nfeatures, nsample = idx.size() + _, C, N = features.size() + output = torch.cuda.FloatTensor(B, C, nfeatures, nsample) + + pointnet2.group_points_wrapper(B, C, N, nfeatures, nsample, features, idx, output) + + ctx.for_backwards = (idx, N) + return output + + @staticmethod + def backward(ctx, grad_out: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: + """ + :param ctx: + :param grad_out: (B, C, npoint, nsample) tensor of the gradients of the output from forward + :return: + grad_features: (B, C, N) gradient of the features + """ + idx, N = ctx.for_backwards + + B, C, npoint, nsample = grad_out.size() + grad_features = Variable(torch.cuda.FloatTensor(B, C, N).zero_()) + + grad_out_data = grad_out.data.contiguous() + pointnet2.group_points_grad_wrapper(B, C, N, npoint, nsample, grad_out_data, idx, grad_features.data) + return grad_features, None + + +grouping_operation = GroupingOperation.apply + + +class BallQuery(Function): + + @staticmethod + def forward(ctx, radius: float, nsample: int, xyz: torch.Tensor, new_xyz: torch.Tensor) -> torch.Tensor: + """ + :param ctx: + :param radius: float, radius of the balls + :param nsample: int, maximum number of features in the balls + :param xyz: (B, N, 3) xyz coordinates of the features + :param new_xyz: (B, npoint, 3) centers of the ball query + :return: + idx: (B, npoint, nsample) tensor with the indicies of the features that form the query balls + """ + assert new_xyz.is_contiguous() + assert xyz.is_contiguous() + + B, N, _ = xyz.size() + npoint = new_xyz.size(1) + idx = torch.cuda.IntTensor(B, npoint, nsample).zero_() + + pointnet2.ball_query_wrapper(B, N, npoint, radius, nsample, new_xyz, xyz, idx) + return idx + + @staticmethod + def backward(ctx, a=None): + return None, None, None, None + + +ball_query = BallQuery.apply + + +class QueryAndGroup(nn.Module): + def __init__(self, radius: float, nsample: int, use_xyz: bool = True): + """ + :param radius: float, radius of ball + :param nsample: int, maximum number of features to gather in the ball + :param use_xyz: + """ + super().__init__() + self.radius, self.nsample, self.use_xyz = radius, nsample, use_xyz + + def forward(self, xyz: torch.Tensor, new_xyz: torch.Tensor, features: torch.Tensor = None) -> Tuple[torch.Tensor]: + """ + :param xyz: (B, N, 3) xyz coordinates of the features + :param new_xyz: (B, npoint, 3) centroids + :param features: (B, C, N) descriptors of the features + :return: + new_features: (B, 3 + C, npoint, nsample) + """ + idx = ball_query(self.radius, self.nsample, xyz, new_xyz) + xyz_trans = xyz.transpose(1, 2).contiguous() + grouped_xyz = grouping_operation(xyz_trans, idx) # (B, 3, npoint, nsample) + grouped_xyz -= new_xyz.transpose(1, 2).unsqueeze(-1) + + if features is not None: + grouped_features = grouping_operation(features, idx) + if self.use_xyz: + new_features = torch.cat([grouped_xyz, grouped_features], dim=1) # (B, C + 3, npoint, nsample) + else: + new_features = grouped_features + else: + assert self.use_xyz, "Cannot have not features and not use xyz as a feature!" + new_features = grouped_xyz + + return new_features + + +class GroupAll(nn.Module): + def __init__(self, use_xyz: bool = True): + super().__init__() + self.use_xyz = use_xyz + + def forward(self, xyz: torch.Tensor, new_xyz: torch.Tensor, features: torch.Tensor = None): + """ + :param xyz: (B, N, 3) xyz coordinates of the features + :param new_xyz: ignored + :param features: (B, C, N) descriptors of the features + :return: + new_features: (B, C + 3, 1, N) + """ + grouped_xyz = xyz.transpose(1, 2).unsqueeze(2) + if features is not None: + grouped_features = features.unsqueeze(2) + if self.use_xyz: + new_features = torch.cat([grouped_xyz, grouped_features], dim=1) # (B, 3 + C, 1, N) + else: + new_features = grouped_features + else: + new_features = grouped_xyz + + return new_features diff --git a/modules/module_lib/pytorch_utils.py b/modules/module_lib/pytorch_utils.py new file mode 100644 index 0000000..09cb7bc --- /dev/null +++ b/modules/module_lib/pytorch_utils.py @@ -0,0 +1,236 @@ +import torch.nn as nn +from typing import List, Tuple + + +class SharedMLP(nn.Sequential): + + def __init__( + self, + args: List[int], + *, + bn: bool = False, + activation=nn.ReLU(inplace=True), + preact: bool = False, + first: bool = False, + name: str = "", + instance_norm: bool = False, + ): + super().__init__() + + for i in range(len(args) - 1): + self.add_module( + name + 'layer{}'.format(i), + Conv2d( + args[i], + args[i + 1], + bn=(not first or not preact or (i != 0)) and bn, + activation=activation + if (not first or not preact or (i != 0)) else None, + preact=preact, + instance_norm=instance_norm + ) + ) + + +class _ConvBase(nn.Sequential): + + def __init__( + self, + in_size, + out_size, + kernel_size, + stride, + padding, + activation, + bn, + init, + conv=None, + batch_norm=None, + bias=True, + preact=False, + name="", + instance_norm=False, + instance_norm_func=None + ): + super().__init__() + + bias = bias and (not bn) + conv_unit = conv( + in_size, + out_size, + kernel_size=kernel_size, + stride=stride, + padding=padding, + bias=bias + ) + init(conv_unit.weight) + if bias: + nn.init.constant_(conv_unit.bias, 0) + + if bn: + if not preact: + bn_unit = batch_norm(out_size) + else: + bn_unit = batch_norm(in_size) + if instance_norm: + if not preact: + in_unit = instance_norm_func(out_size, affine=False, track_running_stats=False) + else: + in_unit = instance_norm_func(in_size, affine=False, track_running_stats=False) + + if preact: + if bn: + self.add_module(name + 'bn', bn_unit) + + if activation is not None: + self.add_module(name + 'activation', activation) + + if not bn and instance_norm: + self.add_module(name + 'in', in_unit) + + self.add_module(name + 'conv', conv_unit) + + if not preact: + if bn: + self.add_module(name + 'bn', bn_unit) + + if activation is not None: + self.add_module(name + 'activation', activation) + + if not bn and instance_norm: + self.add_module(name + 'in', in_unit) + + +class _BNBase(nn.Sequential): + + def __init__(self, in_size, batch_norm=None, name=""): + super().__init__() + self.add_module(name + "bn", batch_norm(in_size)) + + nn.init.constant_(self[0].weight, 1.0) + nn.init.constant_(self[0].bias, 0) + + +class BatchNorm1d(_BNBase): + + def __init__(self, in_size: int, *, name: str = ""): + super().__init__(in_size, batch_norm=nn.BatchNorm1d, name=name) + + +class BatchNorm2d(_BNBase): + + def __init__(self, in_size: int, name: str = ""): + super().__init__(in_size, batch_norm=nn.BatchNorm2d, name=name) + + +class Conv1d(_ConvBase): + + def __init__( + self, + in_size: int, + out_size: int, + *, + kernel_size: int = 1, + stride: int = 1, + padding: int = 0, + activation=nn.ReLU(inplace=True), + bn: bool = False, + init=nn.init.kaiming_normal_, + bias: bool = True, + preact: bool = False, + name: str = "", + instance_norm=False + ): + super().__init__( + in_size, + out_size, + kernel_size, + stride, + padding, + activation, + bn, + init, + conv=nn.Conv1d, + batch_norm=BatchNorm1d, + bias=bias, + preact=preact, + name=name, + instance_norm=instance_norm, + instance_norm_func=nn.InstanceNorm1d + ) + + +class Conv2d(_ConvBase): + + def __init__( + self, + in_size: int, + out_size: int, + *, + kernel_size: Tuple[int, int] = (1, 1), + stride: Tuple[int, int] = (1, 1), + padding: Tuple[int, int] = (0, 0), + activation=nn.ReLU(inplace=True), + bn: bool = False, + init=nn.init.kaiming_normal_, + bias: bool = True, + preact: bool = False, + name: str = "", + instance_norm=False + ): + super().__init__( + in_size, + out_size, + kernel_size, + stride, + padding, + activation, + bn, + init, + conv=nn.Conv2d, + batch_norm=BatchNorm2d, + bias=bias, + preact=preact, + name=name, + instance_norm=instance_norm, + instance_norm_func=nn.InstanceNorm2d + ) + + +class FC(nn.Sequential): + + def __init__( + self, + in_size: int, + out_size: int, + *, + activation=nn.ReLU(inplace=True), + bn: bool = False, + init=None, + preact: bool = False, + name: str = "" + ): + super().__init__() + + fc = nn.Linear(in_size, out_size, bias=not bn) + if init is not None: + init(fc.weight) + if not bn: + nn.init.constant(fc.bias, 0) + + if preact: + if bn: + self.add_module(name + 'bn', BatchNorm1d(in_size)) + + if activation is not None: + self.add_module(name + 'activation', activation) + + self.add_module(name + 'fc', fc) + + if not preact: + if bn: + self.add_module(name + 'bn', BatchNorm1d(out_size)) + + if activation is not None: + self.add_module(name + 'activation', activation) + diff --git a/modules/pointnet++_encoder.py b/modules/pointnet++_encoder.py new file mode 100644 index 0000000..861dce6 --- /dev/null +++ b/modules/pointnet++_encoder.py @@ -0,0 +1,149 @@ +import torch +import torch.nn as nn +import os +import sys +path = os.path.abspath(__file__) +for i in range(2): + path = os.path.dirname(path) +PROJECT_ROOT = path +sys.path.append(PROJECT_ROOT) +import PytorchBoot.stereotype as stereotype +from modules.module_lib.pointnet2_modules import PointnetSAModuleMSG + + +ClsMSG_CFG_Dense = { + 'NPOINTS': [512, 256, 128, None], + 'RADIUS': [[0.02, 0.04], [0.04, 0.08], [0.08, 0.16], [None, None]], + 'NSAMPLE': [[32, 64], [16, 32], [8, 16], [None, None]], + 'MLPS': [[[16, 16, 32], [32, 32, 64]], + [[64, 64, 128], [64, 96, 128]], + [[128, 196, 256], [128, 196, 256]], + [[256, 256, 512], [256, 384, 512]]], + 'DP_RATIO': 0.5, +} + +ClsMSG_CFG_Light = { + 'NPOINTS': [512, 256, 128, None], + 'RADIUS': [[0.02, 0.04], [0.04, 0.08], [0.08, 0.16], [None, None]], + 'NSAMPLE': [[16, 32], [16, 32], [16, 32], [None, None]], + 'MLPS': [[[16, 16, 32], [32, 32, 64]], + [[64, 64, 128], [64, 96, 128]], + [[128, 196, 256], [128, 196, 256]], + [[256, 256, 512], [256, 384, 512]]], + 'DP_RATIO': 0.5, +} + +ClsMSG_CFG_Light_2048 = { + 'NPOINTS': [512, 256, 128, None], + 'RADIUS': [[0.02, 0.04], [0.04, 0.08], [0.08, 0.16], [None, None]], + 'NSAMPLE': [[16, 32], [16, 32], [16, 32], [None, None]], + 'MLPS': [[[16, 16, 32], [32, 32, 64]], + [[64, 64, 128], [64, 96, 128]], + [[128, 196, 256], [128, 196, 256]], + [[256, 256, 1024], [256, 512, 1024]]], + 'DP_RATIO': 0.5, +} + +ClsMSG_CFG_Strong = { + 'NPOINTS': [512, 256, 128, 64, None], + 'RADIUS': [[0.02, 0.04], [0.04, 0.08], [0.08, 0.16],[0.16, 0.32], [None, None]], + 'NSAMPLE': [[16, 32], [16, 32], [16, 32], [16, 32], [None, None]], + 'MLPS': [[[16, 16, 32], [32, 32, 64]], + [[64, 64, 128], [64, 96, 128]], + [[128, 196, 256], [128, 196, 256]], + [[256, 256, 512], [256, 512, 512]], + [[512, 512, 2048], [512, 1024, 2048]] + ], + 'DP_RATIO': 0.5, +} + +ClsMSG_CFG_Lighter = { + 'NPOINTS': [512, 256, 128, 64, None], + 'RADIUS': [[0.01], [0.02], [0.04], [0.08], [None]], + 'NSAMPLE': [[64], [32], [16], [8], [None]], + 'MLPS': [[[32, 32, 64]], + [[64, 64, 128]], + [[128, 196, 256]], + [[256, 256, 512]], + [[512, 512, 1024]]], + 'DP_RATIO': 0.5, +} + + +def select_params(name): + if name == 'light': + return ClsMSG_CFG_Light + elif name == 'lighter': + return ClsMSG_CFG_Lighter + elif name == 'dense': + return ClsMSG_CFG_Dense + elif name == 'light_2048': + return ClsMSG_CFG_Light_2048 + elif name == 'strong': + return ClsMSG_CFG_Strong + else: + raise NotImplementedError + + +def break_up_pc(pc): + xyz = pc[..., 0:3].contiguous() + features = ( + pc[..., 3:].transpose(1, 2).contiguous() + if pc.size(-1) > 3 else None + ) + + return xyz, features + + +@stereotype.module("pointnet++_encoder") +class PointNet2Encoder(nn.Module): + def encode_points(self, pts, require_per_point_feat=False): + return self.forward(pts) + + def __init__(self, config:dict): + super().__init__() + + channel_in = config.get("in_dim", 3) - 3 + params_name = config.get("params_name", "light") + + self.SA_modules = nn.ModuleList() + selected_params = select_params(params_name) + for k in range(selected_params['NPOINTS'].__len__()): + mlps = selected_params['MLPS'][k].copy() + channel_out = 0 + for idx in range(mlps.__len__()): + mlps[idx] = [channel_in] + mlps[idx] + channel_out += mlps[idx][-1] + + self.SA_modules.append( + PointnetSAModuleMSG( + npoint=selected_params['NPOINTS'][k], + radii=selected_params['RADIUS'][k], + nsamples=selected_params['NSAMPLE'][k], + mlps=mlps, + use_xyz=True, + bn=True + ) + ) + channel_in = channel_out + + def forward(self, point_cloud: torch.cuda.FloatTensor): + xyz, features = break_up_pc(point_cloud) + + l_xyz, l_features = [xyz], [features] + for i in range(len(self.SA_modules)): + li_xyz, li_features = self.SA_modules[i](l_xyz[i], l_features[i]) + l_xyz.append(li_xyz) + l_features.append(li_features) + return l_features[-1].squeeze(-1) + + +if __name__ == '__main__': + seed = 100 + torch.manual_seed(seed) + torch.cuda.manual_seed(seed) + net = PointNet2Encoder(config={"in_dim": 3, "params_name": "strong"}).cuda() + pts = torch.randn(2, 2444, 3).cuda() + print(torch.mean(pts, dim=1)) + pre = net.encode_points(pts) + print(pre.shape) diff --git a/preprocess/preprocessor.py b/preprocess/preprocessor.py index b1ff29a..888962f 100644 --- a/preprocess/preprocessor.py +++ b/preprocess/preprocessor.py @@ -164,10 +164,10 @@ def save_scene_data(root, scene, scene_idx=0, scene_total=1,file_type="txt"): if __name__ == "__main__": #root = "/media/hofee/repository/new_data_with_normal" - root = r"H:\AI\Datasets\nbv_rec_part2" + root = r"/media/hofee/data/data/test_bottle/view" scene_list = os.listdir(root) from_idx = 0 # 1000 - to_idx = 600 # 1500 + to_idx = len(scene_list) # 1500 cnt = 0 diff --git a/runners/inference_server.py b/runners/inference_server.py index ac62fc9..35ec910 100644 --- a/runners/inference_server.py +++ b/runners/inference_server.py @@ -12,6 +12,7 @@ from PytorchBoot.runners.runner import Runner from PytorchBoot.utils import Log from utils.pts import PtsUtil +from beans.predict_result import PredictResult @stereotype.runner("inferencer_server") class InferencerServer(Runner): @@ -50,6 +51,7 @@ class InferencerServer(Runner): def get_result(self, output_data): pred_pose_9d = output_data["pred_pose_9d"] + pred_pose_9d = np.asarray(PredictResult(pred_pose_9d.cpu().numpy(), None, cluster_params=dict(eps=0.25, min_samples=3)).candidate_9d_poses, dtype=np.float32) result = { "pred_pose_9d": pred_pose_9d.tolist() } diff --git a/runners/inferencer.py b/runners/inferencer.py index df79cef..658938a 100644 --- a/runners/inferencer.py +++ b/runners/inferencer.py @@ -4,6 +4,7 @@ from utils.render import RenderUtil from utils.pose import PoseUtil from utils.pts import PtsUtil from utils.reconstruction import ReconstructionUtil +from beans.predict_result import PredictResult import torch from tqdm import tqdm @@ -82,6 +83,7 @@ class Inferencer(Runner): data = test_set.__getitem__(i) scene_name = data["scene_name"] inference_result_path = os.path.join(self.output_dir, test_set_name, f"{scene_name}.pkl") + if os.path.exists(inference_result_path): Log.info(f"Inference result already exists for scene: {scene_name}") continue @@ -138,97 +140,98 @@ class Inferencer(Runner): pred_cr_seq = [last_pred_cr] success = 0 last_pts_num = PtsUtil.voxel_downsample_point_cloud(data["first_scanned_pts"][0], voxel_threshold).shape[0] - import time + #import time while len(pred_cr_seq) < max_iter and retry < max_retry and success < max_success: Log.green(f"iter: {len(pred_cr_seq)}, retry: {retry}/{max_retry}, success: {success}/{max_success}") + combined_scanned_pts = np.vstack(scanned_view_pts) + voxel_downsampled_combined_scanned_pts_np, inverse = self.voxel_downsample_with_mapping(combined_scanned_pts, voxel_threshold) output = self.pipeline(input_data) pred_pose_9d = output["pred_pose_9d"] pred_pose = torch.eye(4, device=pred_pose_9d.device) - - pred_pose[:3,:3] = PoseUtil.rotation_6d_to_matrix_tensor_batch(pred_pose_9d[:,:6])[0] - pred_pose[:3,3] = pred_pose_9d[0,6:] - - try: - new_target_pts, new_target_normals, new_scan_points_indices = RenderUtil.render_pts(pred_pose, scene_path, self.script_path, scan_points, voxel_threshold=voxel_threshold, filter_degree=filter_degree, nO_to_nL_pose=O_to_L_pose) + # # save pred_pose_9d ------ + # root = "/media/hofee/data/project/python/nbv_reconstruction/nbv_reconstruction/temp_output_result" + # scene_dir = os.path.join(root, scene_name) + # if not os.path.exists(scene_dir): + # os.makedirs(scene_dir) + # pred_9d_path = os.path.join(scene_dir,f"pred_pose_9d_{len(pred_cr_seq)}.npy") + # pts_path = os.path.join(scene_dir,f"combined_scanned_pts_{len(pred_cr_seq)}.txt") + # np_combined_scanned_pts = input_data["combined_scanned_pts"][0].cpu().numpy() + # np.save(pred_9d_path, pred_pose_9d.cpu().numpy()) + # np.savetxt(pts_path, np_combined_scanned_pts) + # # ----- ----- ----- + predict_result = PredictResult(pred_pose_9d.cpu().numpy(), input_pts=input_data["combined_scanned_pts"][0].cpu().numpy(), cluster_params=dict(eps=0.25, min_samples=3)) + # ----------------------- + # import ipdb; ipdb.set_trace() + # predict_result.visualize() + # ----------------------- + pred_pose_9d_candidates = predict_result.candidate_9d_poses + for pred_pose_9d in pred_pose_9d_candidates: #import ipdb; ipdb.set_trace() - if not ReconstructionUtil.check_scan_points_overlap(history_indices, new_scan_points_indices, scan_points_threshold): - curr_overlap_area_threshold = overlap_area_threshold - else: - curr_overlap_area_threshold = overlap_area_threshold * 0.5 + pred_pose_9d = torch.tensor(pred_pose_9d, dtype=torch.float32).to(self.device).unsqueeze(0) + pred_pose[:3,:3] = PoseUtil.rotation_6d_to_matrix_tensor_batch(pred_pose_9d[:,:6])[0] + pred_pose[:3,3] = pred_pose_9d[0,6:] + try: + new_target_pts, new_target_normals, new_scan_points_indices = RenderUtil.render_pts(pred_pose, scene_path, self.script_path, scan_points, voxel_threshold=voxel_threshold, filter_degree=filter_degree, nO_to_nL_pose=O_to_L_pose) + #import ipdb; ipdb.set_trace() + if not ReconstructionUtil.check_scan_points_overlap(history_indices, new_scan_points_indices, scan_points_threshold): + curr_overlap_area_threshold = overlap_area_threshold + else: + curr_overlap_area_threshold = overlap_area_threshold * 0.5 - downsampled_new_target_pts = PtsUtil.voxel_downsample_point_cloud(new_target_pts, voxel_threshold) - overlap, _ = ReconstructionUtil.check_overlap(downsampled_new_target_pts, down_sampled_model_pts, overlap_area_threshold = curr_overlap_area_threshold, voxel_size=voxel_threshold, require_new_added_pts_num = True) - if not overlap: - Log.yellow("no overlap!") + downsampled_new_target_pts = PtsUtil.voxel_downsample_point_cloud(new_target_pts, voxel_threshold) + overlap, _ = ReconstructionUtil.check_overlap(downsampled_new_target_pts, voxel_downsampled_combined_scanned_pts_np, overlap_area_threshold = curr_overlap_area_threshold, voxel_size=voxel_threshold, require_new_added_pts_num = True) + if not overlap: + Log.yellow("no overlap!") + retry += 1 + retry_overlap_pose.append(pred_pose.cpu().numpy().tolist()) + continue + + history_indices.append(new_scan_points_indices) + except Exception as e: + Log.error(f"Error in scene {scene_path}, {e}") + print("current pose: ", pred_pose) + print("curr_pred_cr: ", last_pred_cr) + retry_no_pts_pose.append(pred_pose.cpu().numpy().tolist()) retry += 1 - retry_overlap_pose.append(pred_pose.cpu().numpy().tolist()) continue - history_indices.append(new_scan_points_indices) - except Exception as e: - Log.error(f"Error in scene {scene_path}, {e}") - print("current pose: ", pred_pose) - print("curr_pred_cr: ", last_pred_cr) - retry_no_pts_pose.append(pred_pose.cpu().numpy().tolist()) - retry += 1 - continue - - if new_target_pts.shape[0] == 0: - Log.red("no pts in new target") - retry_no_pts_pose.append(pred_pose.cpu().numpy().tolist()) - retry += 1 - continue - - pred_cr, _ = self.compute_coverage_rate(scanned_view_pts, new_target_pts, down_sampled_model_pts, threshold=voxel_threshold) - Log.yellow(f"{pred_cr}, {last_pred_cr}, max: , {data['seq_max_coverage_rate']}") - if pred_cr >= data["seq_max_coverage_rate"] - 1e-3: - print("max coverage rate reached!: ", pred_cr) + if new_target_pts.shape[0] == 0: + Log.red("no pts in new target") + retry_no_pts_pose.append(pred_pose.cpu().numpy().tolist()) + retry += 1 + continue - - - pred_cr_seq.append(pred_cr) - scanned_view_pts.append(new_target_pts) + pred_cr, _ = self.compute_coverage_rate(scanned_view_pts, new_target_pts, down_sampled_model_pts, threshold=voxel_threshold) + Log.yellow(f"{pred_cr}, {last_pred_cr}, max: , {data['seq_max_coverage_rate']}") + if pred_cr >= data["seq_max_coverage_rate"] - 1e-3: + print("max coverage rate reached!: ", pred_cr) + - input_data["scanned_n_to_world_pose_9d"] = [torch.cat([input_data["scanned_n_to_world_pose_9d"][0], pred_pose_9d], dim=0)] - start_indices = [0] - total_points = 0 - for pts in scanned_view_pts: - total_points += pts.shape[0] - start_indices.append(total_points) - combined_scanned_pts = np.vstack(scanned_view_pts) - voxel_downsampled_combined_scanned_pts_np, inverse = self.voxel_downsample_with_mapping(combined_scanned_pts, voxel_threshold) - random_downsampled_combined_scanned_pts_np, random_downsample_idx = PtsUtil.random_downsample_point_cloud(voxel_downsampled_combined_scanned_pts_np, input_pts_N, require_idx=True) - all_idx_unique = np.arange(len(voxel_downsampled_combined_scanned_pts_np)) - all_random_downsample_idx = all_idx_unique[random_downsample_idx] - scanned_pts_mask = [] - for idx, start_idx in enumerate(start_indices): - if idx == len(start_indices) - 1: - break - end_idx = start_indices[idx+1] - view_inverse = inverse[start_idx:end_idx] - view_unique_downsampled_idx = np.unique(view_inverse) - view_unique_downsampled_idx_set = set(view_unique_downsampled_idx) - mask = np.array([idx in view_unique_downsampled_idx_set for idx in all_random_downsample_idx]) - scanned_pts_mask.append(mask) - input_data["combined_scanned_pts"] = torch.tensor(random_downsampled_combined_scanned_pts_np, dtype=torch.float32).unsqueeze(0).to(self.device) - #import ipdb; ipdb.set_trace() - input_data["scanned_pts_mask"] = [torch.tensor(scanned_pts_mask, dtype=torch.bool)] + pred_cr_seq.append(pred_cr) + scanned_view_pts.append(new_target_pts) + + input_data["scanned_n_to_world_pose_9d"] = [torch.cat([input_data["scanned_n_to_world_pose_9d"][0], pred_pose_9d], dim=0)] + + combined_scanned_pts = np.vstack(scanned_view_pts) + voxel_downsampled_combined_scanned_pts_np = PtsUtil.voxel_downsample_point_cloud(combined_scanned_pts, voxel_threshold) + random_downsampled_combined_scanned_pts_np = PtsUtil.random_downsample_point_cloud(voxel_downsampled_combined_scanned_pts_np, input_pts_N) + input_data["combined_scanned_pts"] = torch.tensor(random_downsampled_combined_scanned_pts_np, dtype=torch.float32).unsqueeze(0).to(self.device) - - last_pred_cr = pred_cr - pts_num = voxel_downsampled_combined_scanned_pts_np.shape[0] - Log.info(f"delta pts num:,{pts_num - last_pts_num },{pts_num}, {last_pts_num}") + + last_pred_cr = pred_cr + pts_num = voxel_downsampled_combined_scanned_pts_np.shape[0] + Log.info(f"delta pts num:,{pts_num - last_pts_num },{pts_num}, {last_pts_num}") - if pts_num - last_pts_num < self.min_new_pts_num and pred_cr <= data["seq_max_coverage_rate"] - 1e-2: - retry += 1 - retry_duplication_pose.append(pred_pose.cpu().numpy().tolist()) - Log.red(f"delta pts num < {self.min_new_pts_num}:, {pts_num}, {last_pts_num}") - elif pts_num - last_pts_num < self.min_new_pts_num and pred_cr > data["seq_max_coverage_rate"] - 1e-2: - success += 1 - Log.success(f"delta pts num < {self.min_new_pts_num}:, {pts_num}, {last_pts_num}") + if pts_num - last_pts_num < self.min_new_pts_num and pred_cr <= data["seq_max_coverage_rate"] - 1e-2: + retry += 1 + retry_duplication_pose.append(pred_pose.cpu().numpy().tolist()) + Log.red(f"delta pts num < {self.min_new_pts_num}:, {pts_num}, {last_pts_num}") + elif pts_num - last_pts_num < self.min_new_pts_num and pred_cr > data["seq_max_coverage_rate"] - 1e-2: + success += 1 + Log.success(f"delta pts num < {self.min_new_pts_num}:, {pts_num}, {last_pts_num}") - last_pts_num = pts_num + last_pts_num = pts_num input_data["scanned_n_to_world_pose_9d"] = input_data["scanned_n_to_world_pose_9d"][0].cpu().numpy().tolist() @@ -270,7 +273,14 @@ class Inferencer(Runner): combined_point_cloud = np.vstack(new_scanned_view_pts) down_sampled_combined_point_cloud = PtsUtil.voxel_downsample_point_cloud(combined_point_cloud,threshold) return ReconstructionUtil.compute_coverage_rate(model_pts, down_sampled_combined_point_cloud, threshold) - + + def voxel_downsample_with_mapping(self, point_cloud, voxel_size=0.003): + voxel_indices = np.floor(point_cloud / voxel_size).astype(np.int32) + unique_voxels, inverse, counts = np.unique(voxel_indices, axis=0, return_inverse=True, return_counts=True) + idx_sort = np.argsort(inverse) + idx_unique = idx_sort[np.cumsum(counts)-counts] + downsampled_points = point_cloud[idx_unique] + return downsampled_points, inverse def save_inference_result(self, dataset_name, scene_name, output): dataset_dir = os.path.join(self.output_dir, dataset_name) diff --git a/runners/simulator.py b/runners/simulator.py new file mode 100644 index 0000000..fbba793 --- /dev/null +++ b/runners/simulator.py @@ -0,0 +1,456 @@ +import pybullet as p +import pybullet_data +import numpy as np +import os +import time +from PytorchBoot.runners.runner import Runner +import PytorchBoot.stereotype as stereotype +from PytorchBoot.config import ConfigManager +from utils.control import ControlUtil + + +@stereotype.runner("simulator") +class Simulator(Runner): + CREATE: str = "create" + SIMULATE: str = "simulate" + INIT_GRIPPER_POSE:np.ndarray = np.asarray( + [[0.41869126 ,0.87596275 , 0.23951774 , 0.36005292], + [ 0.70787907 ,-0.4800251 , 0.51813998 ,-0.40499909], + [ 0.56884584, -0.04739109 ,-0.82107382 ,0.76881103], + [ 0. , 0. , 0. , 1. ]]) + TURNTABLE_WORLD_TO_PYBULLET_WORLD:np.ndarray = np.asarray( + [[1, 0, 0, 0.8], + [0, 1, 0, 0], + [0, 0, 1, 0.5], + [0, 0, 0, 1]]) + + debug_pose = np.asarray([ + [ + 0.992167055606842, + -0.10552699863910675, + 0.06684812903404236, + -0.07388903945684433 + ], + [ + 0.10134342312812805, + 0.3670985698699951, + -0.9246448874473572, + -0.41582486033439636 + ], + [ + 0.07303514331579208, + 0.9241767525672913, + 0.37491756677627563, + 1.0754833221435547 + ], + [ + 0.0, + 0.0, + 0.0, + 1.0 + ]]) + + def __init__(self, config_path): + super().__init__(config_path) + self.config_path = config_path + self.robot_id = None + self.turntable_id = None + self.target_id = None + camera_config = ConfigManager.get("simulation", "camera") + self.camera_params = { + 'width': camera_config["width"], + 'height': camera_config["height"], + 'fov': camera_config["fov"], + 'near': camera_config["near"], + 'far': camera_config["far"] + } + self.sim_config = ConfigManager.get("simulation") + + def run(self, cmd): + print(f"Simulator run {cmd}") + if cmd == self.CREATE: + self.prepare_env() + self.create_env() + elif cmd == self.SIMULATE: + self.simulate() + + def simulate(self): + self.reset() + self.init() + debug_pose = Simulator.debug_pose + offset = np.asarray([[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]]) + debug_pose = debug_pose @ offset + for _ in range(10000): + debug_pose_2 = np.eye(4) + debug_pose_2[0,0] = -1 + debug_pose_2[2,3] = 0.5 + self.move_to(debug_pose_2) + # Wait for the system to stabilize + for _ in range(20): # Simulate 20 steps to ensure stability + p.stepSimulation() + time.sleep(0.001) # Add small delay to ensure physics simulation + + depth_img, segm_img = self.take_picture() + p.stepSimulation() + + def prepare_env(self): + p.connect(p.GUI) + p.setAdditionalSearchPath(pybullet_data.getDataPath()) + p.setGravity(0, 0, 0) + p.loadURDF("plane.urdf") + + def create_env(self): + print(self.config) + robot_config = self.sim_config["robot"] + turntable_config = self.sim_config["turntable"] + target_config = self.sim_config["target"] + + self.robot_id = p.loadURDF( + robot_config["urdf_path"], + robot_config["initial_position"], + p.getQuaternionFromEuler(robot_config["initial_orientation"]), + useFixedBase=True + ) + + p.changeDynamics( + self.robot_id, + linkIndex=-1, + mass=0, + linearDamping=0, + angularDamping=0, + lateralFriction=0 + ) + + visual_shape_id = p.createVisualShape( + shapeType=p.GEOM_CYLINDER, + radius=turntable_config["radius"], + length=turntable_config["height"], + rgbaColor=[0.7, 0.7, 0.7, 1] + ) + collision_shape_id = p.createCollisionShape( + shapeType=p.GEOM_CYLINDER, + radius=turntable_config["radius"], + height=turntable_config["height"] + ) + self.turntable_id = p.createMultiBody( + baseMass=0, # 设置质量为0使其成为静态物体 + baseCollisionShapeIndex=collision_shape_id, + baseVisualShapeIndex=visual_shape_id, + basePosition=turntable_config["center_position"] + ) + + # 禁用转盘的动力学 + p.changeDynamics( + self.turntable_id, + -1, # -1 表示基座 + mass=0, + linearDamping=0, + angularDamping=0, + lateralFriction=0 + ) + + + obj_path = os.path.join(target_config["obj_dir"], target_config["obj_name"], "mesh.obj") + + assert os.path.exists(obj_path), f"Error: File not found at {obj_path}" + + # 加载OBJ文件作为目标物体 + target_visual = p.createVisualShape( + shapeType=p.GEOM_MESH, + fileName=obj_path, + rgbaColor=target_config["rgba_color"], + specularColor=[0.4, 0.4, 0.4], + meshScale=[target_config["scale"]] * 3 + ) + + # 使用简化的碰撞形状 + target_collision = p.createCollisionShape( + shapeType=p.GEOM_MESH, + fileName=obj_path, + meshScale=[target_config["scale"]] * 3, + flags=p.GEOM_FORCE_CONCAVE_TRIMESH # 尝试使用凹面网格 + ) + + + # 创建目标物体 + self.target_id = p.createMultiBody( + baseMass=0, # 设置质量为0使其成为静态物体 + baseCollisionShapeIndex=target_collision, + baseVisualShapeIndex=target_visual, + basePosition=[ + turntable_config["center_position"][0], + turntable_config["center_position"][1], + turntable_config["height"] + turntable_config["center_position"][2] + ], + baseOrientation=p.getQuaternionFromEuler([np.pi/2, 0, 0]) + ) + + # 禁用目标物体的动力学 + p.changeDynamics( + self.target_id, + -1, # -1 表示基座 + mass=0, + linearDamping=0, + angularDamping=0, + lateralFriction=0 + ) + + # 创建固定约束,将目标物体固定在转盘上 + cid = p.createConstraint( + parentBodyUniqueId=self.turntable_id, + parentLinkIndex=-1, # -1 表示基座 + childBodyUniqueId=self.target_id, + childLinkIndex=-1, # -1 表示基座 + jointType=p.JOINT_FIXED, + jointAxis=[0, 0, 0], + parentFramePosition=[0, 0, 0], # 相对于转盘中心的偏移 + childFramePosition=[0, 0, 0] # 相对于物体中心的偏移 + ) + + # 设置约束参数 + p.changeConstraint(cid, maxForce=100) # 设置最大力,确保约束稳定 + + def move_robot_to_pose(self, target_matrix): + # 从4x4齐次矩阵中提取位置(前3个元素) + position = target_matrix[:3, 3] + + # 从3x3旋转矩阵中提取方向四元数 + R = target_matrix[:3, :3] + + # 计算四元数的w分量 + w = np.sqrt(max(0, 1 + R[0,0] + R[1,1] + R[2,2])) / 2 + + # 避免除零错误,同时处理不同情况 + if abs(w) < 1e-8: + # 当w接近0时的特殊情况 + x = np.sqrt(max(0, 1 + R[0,0] - R[1,1] - R[2,2])) / 2 + y = np.sqrt(max(0, 1 - R[0,0] + R[1,1] - R[2,2])) / 2 + z = np.sqrt(max(0, 1 - R[0,0] - R[1,1] + R[2,2])) / 2 + + # 确定符号 + if R[2,1] - R[1,2] < 0: x = -x + if R[0,2] - R[2,0] < 0: y = -y + if R[1,0] - R[0,1] < 0: z = -z + else: + # 正常情况 + x = (R[2,1] - R[1,2]) / (4 * w) + y = (R[0,2] - R[2,0]) / (4 * w) + z = (R[1,0] - R[0,1]) / (4 * w) + + orientation = (x, y, z, w) + + # 设置IK求解参数 + num_joints = p.getNumJoints(self.robot_id) + lower_limits = [] + upper_limits = [] + joint_ranges = [] + rest_poses = [] + + # 获取关节限制和默认姿态 + for i in range(num_joints): + joint_info = p.getJointInfo(self.robot_id, i) + lower_limits.append(joint_info[8]) + upper_limits.append(joint_info[9]) + joint_ranges.append(joint_info[9] - joint_info[8]) + rest_poses.append(0) # 可以设置一个较好的默认姿态 + + # 使用增强版IK求解器,考虑碰撞避障 + joint_poses = p.calculateInverseKinematics( + self.robot_id, + 7, # end effector link index + position, + orientation, + lowerLimits=lower_limits, + upperLimits=upper_limits, + jointRanges=joint_ranges, + restPoses=rest_poses, + maxNumIterations=100, + residualThreshold=1e-4 + ) + + # 分步移动到目标位置,同时检查碰撞 + current_poses = [p.getJointState(self.robot_id, i)[0] for i in range(7)] + steps = 50 # 分50步移动 + + for step in range(steps): + # 线性插值计算中间位置 + intermediate_poses = [] + for current, target in zip(current_poses, joint_poses): + t = (step + 1) / steps + intermediate = current + (target - current) * t + intermediate_poses.append(intermediate) + + # 设置关节位置 + for i in range(7): + p.setJointMotorControl2( + self.robot_id, + i, + p.POSITION_CONTROL, + intermediate_poses[i] + ) + + # 执行一步模拟 + p.stepSimulation() + + # 检查碰撞 + if p.getContactPoints(self.robot_id, self.turntable_id): + print("检测到潜在碰撞,停止移动") + return False + + return True + + + def rotate_turntable(self, angle_degrees): + # 旋转转盘 + current_pos, current_orn = p.getBasePositionAndOrientation(self.turntable_id) + current_orn = p.getEulerFromQuaternion(current_orn) + + new_orn = list(current_orn) + new_orn[2] += np.radians(angle_degrees) + new_orn_quat = p.getQuaternionFromEuler(new_orn) + + p.resetBasePositionAndOrientation( + self.turntable_id, + current_pos, + new_orn_quat + ) + + # 同时旋转目标物体 + target_pos, target_orn = p.getBasePositionAndOrientation(self.target_id) + target_orn = p.getEulerFromQuaternion(target_orn) + + # 更新目标物体的方向 + target_orn = list(target_orn) + target_orn[2] += np.radians(angle_degrees) + target_orn_quat = p.getQuaternionFromEuler(target_orn) + + # 计算物体新的位置(绕转盘中心旋转) + turntable_center = current_pos + relative_pos = np.array(target_pos) - np.array(turntable_center) + + # 创建旋转矩阵 + theta = np.radians(angle_degrees) + rotation_matrix = np.array([ + [np.cos(theta), -np.sin(theta), 0], + [np.sin(theta), np.cos(theta), 0], + [0, 0, 1] + ]) + + # 计算新的相对位置 + new_relative_pos = rotation_matrix.dot(relative_pos) + new_pos = np.array(turntable_center) + new_relative_pos + + # 更新目标物体的位置和方向 + p.resetBasePositionAndOrientation( + self.target_id, + new_pos, + target_orn_quat + ) + + def get_camera_pose(self): + end_effector_link = 7 # Franka末端执行器的链接索引 + state = p.getLinkState(self.robot_id, end_effector_link) + ee_pos = state[0] # 世界坐标系中的位置 + camera_orn = state[1] # 世界坐标系中的朝向(四元数) + + # 计算相机的视角矩阵 + rot_matrix = p.getMatrixFromQuaternion(camera_orn) + rot_matrix = np.array(rot_matrix).reshape(3, 3) + + # 相机的前向向量(与末端执行器的x轴对齐) + camera_forward = rot_matrix.dot(np.array([0, 0, 1])) # x轴方向 + + # 将相机位置向前偏移0.1米 + offset = 0.12 + camera_pos = np.array(ee_pos) + camera_forward * offset + camera_target = camera_pos + camera_forward + + # 相机的上向量(与末端执行器的z轴对齐) + camera_up = rot_matrix.dot(np.array([1, 0, 0])) # z轴方向 + + return camera_pos, camera_target, camera_up + + def take_picture(self): + camera_pos, camera_target, camera_up = self.get_camera_pose() + + view_matrix = p.computeViewMatrix( + cameraEyePosition=camera_pos, + cameraTargetPosition=camera_target, + cameraUpVector=camera_up + ) + + projection_matrix = p.computeProjectionMatrixFOV( + fov=self.camera_params['fov'], + aspect=self.camera_params['width'] / self.camera_params['height'], + nearVal=self.camera_params['near'], + farVal=self.camera_params['far'] + ) + + _,_,rgb_img,depth_img,segm_img = p.getCameraImage( + width=self.camera_params['width'], + height=self.camera_params['height'], + viewMatrix=view_matrix, + projectionMatrix=projection_matrix, + renderer=p.ER_BULLET_HARDWARE_OPENGL + ) + + depth_img = self.camera_params['far'] * self.camera_params['near'] / ( + self.camera_params['far'] - (self.camera_params['far'] - self.camera_params['near']) * depth_img) + + depth_img = np.array(depth_img) + segm_img = np.array(segm_img) + + return depth_img, segm_img + + def reset(self): + target_pos = [0.5, 0, 1] + target_orn = p.getQuaternionFromEuler([np.pi, 0, 0]) + target_matrix = np.eye(4) + target_matrix[:3, 3] = target_pos + target_matrix[:3, :3] = np.asarray(p.getMatrixFromQuaternion(target_orn)).reshape(3,3) + self.move_robot_to_pose(target_matrix) + + def init(self): + self.move_to(Simulator.INIT_GRIPPER_POSE) + + def move_to(self, pose: np.ndarray): + #delta_degree, min_new_cam_to_world = ControlUtil.solve_display_table_rot_and_cam_to_world(pose) + #print(delta_degree) + min_new_cam_to_pybullet_world = Simulator.TURNTABLE_WORLD_TO_PYBULLET_WORLD@pose + self.move_to_cam_pose(min_new_cam_to_pybullet_world) + #self.rotate_turntable(delta_degree) + + + + def __del__(self): + p.disconnect() + + def create_experiment(self, backup_name=None): + return super().create_experiment(backup_name) + + def load_experiment(self, backup_name=None): + super().load_experiment(backup_name) + + def move_to_cam_pose(self, camera_pose: np.ndarray): + # 从相机位姿矩阵中提取位置和旋转矩阵 + camera_pos = camera_pose[:3, 3] + R_camera = camera_pose[:3, :3] + + # 相机的朝向向量(z轴) + forward = R_camera[:, 2] + + # 由于相机与末端执行器之间有固定偏移,需要计算末端执行器位置 + # 相机在末端执行器前方0.12米 + gripper_pos = camera_pos - forward * 0.12 + + # 末端执行器的旋转矩阵需要考虑与相机坐标系的固定变换 + # 假设相机的forward对应gripper的z轴,相机的x轴对应gripper的x轴 + R_gripper = R_camera + + # 构建4x4齐次变换矩阵 + gripper_pose = np.eye(4) + gripper_pose[:3, :3] = R_gripper + gripper_pose[:3, 3] = gripper_pos + print(gripper_pose) + # 移动机器人到计算出的位姿 + return self.move_robot_to_pose(gripper_pose) \ No newline at end of file diff --git a/runners/view_generator.py b/runners/view_generator.py index 2e28c44..634ccbf 100644 --- a/runners/view_generator.py +++ b/runners/view_generator.py @@ -9,7 +9,7 @@ class ViewGenerator(Runner): self.config_path = config_path def run(self): - result = subprocess.run(['blender', '-b', '-P', '../blender/run_blender.py', '--', self.config_path]) + result = subprocess.run(['/home/hofee/blender-4.0.2-linux-x64/blender', '-b', '-P', '../blender/run_blender.py', '--', self.config_path]) print() def create_experiment(self, backup_name=None): diff --git a/utils/control.py b/utils/control.py new file mode 100644 index 0000000..3d6f56b --- /dev/null +++ b/utils/control.py @@ -0,0 +1,59 @@ +import numpy as np +from scipy.spatial.transform import Rotation as R +import time + +class ControlUtil: + + curr_rotation = 0 + + @staticmethod + def check_limit(new_cam_to_world): + if new_cam_to_world[0,3] < 0 or new_cam_to_world[1,3] > 0: + # if new_cam_to_world[0,3] > 0: + return False + x = abs(new_cam_to_world[0,3]) + y = abs(new_cam_to_world[1,3]) + tan_y_x = y/x + min_angle = 0 / 180 * np.pi + max_angle = 90 / 180 * np.pi + if tan_y_x < np.tan(min_angle) or tan_y_x > np.tan(max_angle): + return False + + return True + + @staticmethod + def solve_display_table_rot_and_cam_to_world(cam_to_world: np.ndarray) -> tuple: + if ControlUtil.check_limit(cam_to_world): + return 0, cam_to_world + else: + min_display_table_rot = 180 + min_new_cam_to_world = None + for display_table_rot in np.linspace(0.1,360, 1800): + new_world_to_world = ControlUtil.get_z_axis_rot_mat(display_table_rot) + new_cam_to_new_world = cam_to_world + new_cam_to_world = new_world_to_world @ new_cam_to_new_world + + if ControlUtil.check_limit(new_cam_to_world): + if display_table_rot < min_display_table_rot: + min_display_table_rot, min_new_cam_to_world = display_table_rot, new_cam_to_world + if abs(display_table_rot - 360) < min_display_table_rot: + min_display_table_rot, min_new_cam_to_world = display_table_rot - 360, new_cam_to_world + + if min_new_cam_to_world is None: + raise ValueError("No valid display table rotation found") + + delta_degree = min_display_table_rot - ControlUtil.curr_rotation + ControlUtil.curr_rotation = min_display_table_rot + return delta_degree, min_new_cam_to_world + + @staticmethod + def get_z_axis_rot_mat(degree): + radian = np.radians(degree) + return np.array([ + [np.cos(radian), -np.sin(radian), 0, 0], + [np.sin(radian), np.cos(radian), 0, 0], + [0, 0, 1, 0], + [0, 0, 0, 1] + ]) + + diff --git a/utils/render.py b/utils/render.py index aa2f4e2..33804b4 100644 --- a/utils/render.py +++ b/utils/render.py @@ -70,7 +70,7 @@ class RenderUtil: @staticmethod def render_pts(cam_pose, scene_path, script_path, scan_points, voxel_threshold=0.005, filter_degree=75, nO_to_nL_pose=None, require_full_scene=False): - + import ipdb; ipdb.set_trace() nO_to_world_pose = DataLoadUtil.get_real_cam_O_from_cam_L(cam_pose, nO_to_nL_pose, scene_path=scene_path) @@ -83,11 +83,12 @@ class RenderUtil: shutil.copy(scene_info_path, os.path.join(temp_dir, "scene_info.json")) params_data_path = os.path.join(temp_dir, "params.json") with open(params_data_path, 'w') as f: - json.dump(params, f) + json.dump(params, f) result = subprocess.run([ '/home/hofee/blender-4.0.2-linux-x64/blender', '-b', '-P', script_path, '--', temp_dir ], capture_output=True, text=True) - # print(result) + #print(result) + #import ipdb; ipdb.set_trace() path = os.path.join(temp_dir, "tmp") cam_info = DataLoadUtil.load_cam_info(path, binocular=True) depth_L, depth_R = DataLoadUtil.load_depth( diff --git a/utils/vis.py b/utils/vis.py index 0e2c57e..d739b7d 100644 --- a/utils/vis.py +++ b/utils/vis.py @@ -7,6 +7,7 @@ import trimesh sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from utils.data_load import DataLoadUtil from utils.pts import PtsUtil +from utils.pose import PoseUtil class visualizeUtil: @@ -33,7 +34,22 @@ class visualizeUtil: all_cam_axis = np.array(all_cam_axis).reshape(-1, 3) np.savetxt(os.path.join(output_dir, "all_cam_pos.txt"), all_cam_pos) np.savetxt(os.path.join(output_dir, "all_cam_axis.txt"), all_cam_axis) - + + @staticmethod + def get_cam_pose_and_cam_axis(cam_pose, is_6d_pose): + if is_6d_pose: + matrix_cam_pose = np.eye(4) + matrix_cam_pose[:3,:3] = PoseUtil.rotation_6d_to_matrix_numpy(cam_pose[:6]) + matrix_cam_pose[:3, 3] = cam_pose[6:] + else: + matrix_cam_pose = cam_pose + cam_pos = matrix_cam_pose[:3, 3] + cam_axis = matrix_cam_pose[:3, 2] + num_samples = 10 + sample_points = [cam_pos + 0.02*t * cam_axis for t in range(num_samples)] + sample_points = np.array(sample_points) + return cam_pos, sample_points + @staticmethod def save_all_combined_pts(root, scene, output_dir): length = DataLoadUtil.get_scene_seq_length(root, scene)