138 Commits

Author SHA1 Message Date
7c7f071f95 update 2025-04-12 04:05:51 +08:00
1a0e3c8042 sim control 2025-04-09 15:17:24 +08:00
2fcc650eb7 solve conflicts 2025-03-13 14:49:35 +08:00
b20fa8bb75 update strong pointnet++ 2025-03-13 14:48:15 +08:00
d7fb64ed13 update strong p++ 2025-01-23 08:58:10 +00:00
5a03659112 update inference server 2025-01-07 19:32:02 +08:00
fca984e76b Merge branch 'ab_global_only' of http://git.hofee.top/hofee/nbv_reconstruction into ab_global_only 2025-01-05 23:57:43 +08:00
dec67e8255 upd inference 2025-01-05 23:57:33 +08:00
1535a48a3f upd cluster inference 2025-01-05 15:50:04 +00:00
9c2625b11e upd 2024-12-31 02:52:46 +08:00
2dfb6c57ce upd 2024-12-31 02:51:42 +08:00
88d44f020e train pointnet++ 2024-12-30 14:00:53 +00:00
34548c64a3 deploy pointnet++ finished 2024-12-28 19:50:22 +00:00
47ea0ac434 deploy pointnet++ again 2024-12-28 19:38:27 +00:00
91cabec977 deploy pointnet++ 2024-12-28 10:01:43 +00:00
445e9dc00b Merge branch 'ab_global_only' of https://git.hofee.top/hofee/nbv_reconstruction into ab_global_only 2024-12-26 08:36:59 +00:00
6ce3760471 upd 2024-12-26 08:21:57 +00:00
47624f12cf inference on YCB 2024-12-04 14:52:23 +08:00
501975457f fix overlap 2024-12-02 19:15:48 +08:00
155b655938 upd 2024-11-25 09:41:28 +00:00
2c8ef20321 upd ab_global_only 2024-11-20 15:24:45 +08:00
hofee
493639287e update calculating pts_num in inference.py 2024-11-07 19:42:44 +08:00
hofee
6a608ea74b upd inference_server 2024-11-06 20:07:33 +08:00
hofee
6f427785b3 upd inference 2024-11-05 12:17:20 -06:00
hofee
5bcd0fc6e3 upd 2024-11-04 23:49:12 +08:00
hofee
2b7243d1be upd infernce 2024-11-04 17:17:54 +08:00
04d3a359e1 upd 2024-11-02 21:54:46 +00:00
287983277a global: debug inference 2024-11-01 22:51:16 +00:00
982a3b9b60 global: inference debug 2024-11-01 21:58:44 +00:00
ecd4cfa806 global: debug inference 2024-11-01 15:47:11 +00:00
985a08d89c global: upd inference 2024-11-01 08:43:13 +00:00
b221036e8b global: upd 2024-10-31 16:02:26 +00:00
097712c0ea global_only: ratio2 2024-10-30 15:58:32 +00:00
a954ed0998 global_only: ratio2 2024-10-30 15:49:59 +00:00
f5f8e4266f global_only: ratio 2024-10-30 15:49:11 +00:00
8a05b7883d global_only: train 2024-10-30 15:46:15 +00:00
e23697eb87 global_only: debug 2024-10-29 16:21:30 +00:00
2487039445 global_only: config 2024-10-29 12:18:51 +00:00
f533104e4a global_only: pipeline 2024-10-29 12:04:54 +00:00
a21538c90a global_only: dataset 2024-10-29 11:41:44 +00:00
872405e239 remove fps 2024-10-29 11:23:28 +00:00
b13e45bafc solve merge 2024-10-29 08:14:43 +00:00
63a246c0c8 debug new training 2024-10-28 19:15:48 +00:00
9e39c6c6c9 solve merge 2024-10-28 18:27:16 +00:00
3c9e2c8d12 solve merge 2024-10-28 18:25:53 +00:00
a883a31968 solve merge 2024-10-28 17:03:03 +00:00
49bcf203a8 update 2024-10-28 16:48:34 +00:00
hofee
1c443e533d add inference_server 2024-10-27 04:17:08 -05:00
hofee
3b9c966fd9 Merge branch 'master' of https://git.hofee.top/hofee/nbv_reconstruction 2024-10-26 03:24:18 -05:00
hofee
a41571e79c update 2024-10-26 03:24:01 -05:00
bd27226f0f solve merge 2024-10-25 14:40:26 +00:00
5c56dae24f upd 2024-10-24 20:19:23 +08:00
ebb1ab3c61 udp 2024-10-24 20:18:47 +08:00
hofee
a1226eb294 update normal in computing strategy 2024-10-23 11:13:18 -05:00
hofee
9d0119549e Merge branch 'master' of https://git.hofee.top/hofee/nbv_reconstruction 2024-10-23 02:59:18 -05:00
hofee
64891ef189 update normal strategy 2024-10-23 02:58:58 -05:00
75c70a9e59 fix no normal case 2024-10-23 14:54:53 +08:00
hofee
7e68259f6d update clean preprocess 2024-10-23 01:03:40 -05:00
64b22fd0f4 solve merge 2024-10-23 13:59:12 +08:00
b18c1591b7 load 16bit float 2024-10-23 13:57:45 +08:00
hofee
c55a398b6d update nrm 2024-10-23 00:47:28 -05:00
hofee
e25f7b3334 add save preprocessed normals 2024-10-23 00:42:18 -05:00
hofee
cd56d9ea58 update readme 2024-10-22 16:42:10 +08:00
hofee
d58c7980ed update 2024-10-22 16:41:02 +08:00
hofee
41eddda8d4 solve merge 2024-10-22 16:01:56 +08:00
hofee
ccec9b8e8a add readme.md 2024-10-22 16:01:11 +08:00
0f61e1d64d Merge branch 'master' of https://git.hofee.top/hofee/nbv_reconstruction 2024-10-21 07:33:40 +00:00
9ca0851bf7 debug pipeline 2024-10-21 07:33:32 +00:00
be67be95e9 solve merge 2024-10-19 19:08:39 +08:00
c9d05f0c86 merge 2024-10-19 19:07:40 +08:00
hofee
ed569254dc Merge branch 'master' of https://git.hofee.top/hofee/nbv_reconstruction 2024-10-19 19:06:26 +08:00
hofee
be7ec1a433 update 2024-10-19 19:06:09 +08:00
d0fbb0f198 remove o3d voxel_downsample 2024-10-17 14:28:19 +00:00
5dae3c53db remove mesh from strategy generator 2024-10-17 11:23:08 +00:00
15d1903080 Merge branch 'master' of https://git.hofee.top/hofee/nbv_reconstruction 2024-10-17 11:15:04 +00:00
hofee
b3344626cf solve merge 2024-10-17 06:14:46 -05:00
hofee
0267aed6e5 add normal and visualize util 2024-10-17 06:13:18 -05:00
22e7a1aed4 Merge branch 'master' of https://git.hofee.top/hofee/nbv_reconstruction 2024-10-17 11:11:14 +00:00
8892b6ed05 sync 2024-10-17 11:07:29 +00:00
31b3fa8399 fix bugs 2024-10-16 00:24:41 +08:00
dee7211e0b updaaaaaaaaaaate 2024-10-11 16:34:16 +08:00
hofee
8d92676c34 Merge branch 'master' of https://git.hofee.top/hofee/nbv_reconstruction 2024-10-10 10:16:03 -05:00
hofee
1e4fd13a24 update yaml 2024-10-10 10:15:55 -05:00
d564701807 optimize preproess 2024-10-10 14:49:24 +08:00
5c24d108e0 solve merge conflicts 2024-10-06 17:49:05 +08:00
8f96fae3ce sync 2024-10-06 17:48:06 +08:00
hofee
bfc8ba0f4b update transformer_seq_encoder's config 2024-10-06 13:53:32 +08:00
hofee
fa69f9f879 update fps algo and fps mask 2024-10-06 13:48:54 +08:00
hofee
276f45dcc3 add scanned_pts_mask 2024-10-06 12:01:10 +08:00
hofee
a84417ef62 add fps 2024-10-06 11:49:03 +08:00
hofee
e315fd99ee update new_num limit 2024-10-05 15:36:38 -05:00
hofee
1a3ae15130 update nbv_dataset: scene_points to target_points 2024-10-05 15:17:54 -05:00
hofee
60c9357491 solve merge conflicts 2024-10-05 15:12:55 -05:00
2af52c64e2 update preprocessor.py 2024-10-06 04:11:49 +08:00
hofee
11d460bd9b Merge branch 'master' of https://git.hofee.top/hofee/nbv_reconstruction 2024-10-05 15:11:01 -05:00
hofee
bb9b3f81c3 update reconstruction 2024-10-05 15:10:31 -05:00
dc79f4b313 solve merge conflict 2024-10-06 01:27:31 +08:00
hofee
d098c9f951 optimize preprocessor 2024-10-05 12:24:53 -05:00
4e170445dd Merge branch 'master' of http://git.hofee.top/hofee/nbv_reconstruction 2024-10-05 13:16:49 +08:00
9d6d36f5c2 update preprocessor 2024-10-05 13:16:14 +08:00
ee7537c315 Merge branch 'master' of http://www.hofee.top:3000/hofee/nbv_reconstruction 2024-10-04 16:35:26 +00:00
41c8c060ca server merge 2024-10-04 16:25:24 +00:00
fd7614c847 update preprocessor 2024-10-03 23:36:18 +08:00
hofee
d7561738c6 add preprocess 2024-10-03 01:59:13 +08:00
f460e6e6b2 add TODO 2024-10-02 23:43:25 +08:00
c8b8a44252 update scan_points strategy 2024-10-02 16:24:13 +08:00
551282a0ec Merge branch 'master' of http://git.hofee.top/hofee/nbv_reconstruction 2024-09-30 10:04:59 +08:00
983cb22d4c add from to in generating strategy 2024-09-30 10:04:53 +08:00
hofee
2633a48b4e compute, load, and save covered_scan_pts 2024-09-30 01:24:48 +08:00
hofee
cef7ab4429 add scan points check 2024-09-30 00:55:34 +08:00
hofee
2f6d156abd add embedding_seq_encoder and remove specific seq_encoder 2024-09-29 20:43:01 +08:00
hofee
f42e45d608 add per_points_encoder 2024-09-29 20:12:44 +08:00
hofee
2753f114a3 add pose_n_num_encoder 2024-09-29 18:37:03 +08:00
hofee
99e57c3f4c add target_pts_num into dataset 2024-09-29 18:11:55 +08:00
hofee
cb983fdc74 add random_view_ratio and min_cam_table_included_degree 2024-09-28 22:02:43 +08:00
a358dd98a9 Merge branch 'master' of http://www.hofee.top:3000/hofee/nbv_reconstruction 2024-09-27 08:06:55 +00:00
92250aeb62 debug pose_diff 2024-09-27 08:06:49 +00:00
3bc56af3d5 update inferencer: success rate 2024-09-27 16:01:07 +08:00
030bf55192 add global_pts_pipeline and pose_seq_encooder 2024-09-25 09:31:22 +00:00
ee74b825a6 Merge branch 'master' of http://www.hofee.top:3000/hofee/nbv_reconstruction 2024-09-24 09:10:38 +00:00
43f22ad91b add global_feat 2024-09-24 09:10:25 +00:00
865ae6d329 add display_table_height in renderUtil 2024-09-24 14:23:13 +08:00
b209ce050c change world space origin 2024-09-23 17:45:01 +08:00
3c4077ec4f add multi seq training 2024-09-23 14:30:51 +08:00
6cdff9c83f change config and remove online evaluation 2024-09-20 11:49:32 +00:00
8517255245 update inferencer; add load_from_preprocessed_pts 2024-09-20 19:00:08 +08:00
a621749cc9 Merge branch 'master' of http://git.hofee.top/hofee/nbv_reconstruction 2024-09-20 15:00:38 +08:00
fc700d0a5c add get_real_cam_O_from_cam_L() 2024-09-20 15:00:26 +08:00
18333e6831 change dataset.py to nbv_dataset.py 2024-09-20 06:43:19 +00:00
4e4fcb2ce5 add points_normals under display_table_world_space 2024-09-19 12:12:48 +00:00
55684e86ba adjust display table position 2024-09-19 11:44:10 +00:00
bb75372f7e define display table as world space origin 2024-09-19 11:29:43 +00:00
8d5d6d5df4 change to-1 to to_world 2024-09-19 00:20:26 +08:00
935069d68c add inference 2024-09-19 00:14:26 +08:00
9ec3a00fd4 add seq_dataset 2024-09-18 15:55:34 +08:00
0280dc7292 after first overfit test 2024-09-18 06:49:59 +00:00
d80d0ea79d change config and ensure pipeline works fine 2024-09-13 14:12:35 +00:00
bdd70323a3 remove unnecessary 2024-09-13 09:40:08 +00:00
53 changed files with 5156 additions and 639 deletions

1
.gitignore vendored
View File

@@ -12,3 +12,4 @@ test/
/data_generation/data/*
/data_generation/output/*
test/
temp*

192
Readme.md Normal file
View File

@@ -0,0 +1,192 @@
# Next Best View for Reconstruction
## 1. Setup Environment
### 1.1 Install Main Project
```bash
mkdir nbv_rec
cd nbv_rec
git clone https://git.hofee.top/hofee/nbv_reconstruction.git
```
### 1.2 Install PytorchBoot
the environment is based on PytorchBoot, clone and install it from [PytorchBoot](https://git.hofee.top/hofee/PyTorchBoot.git)
```bash
git clone https://git.hofee.top/hofee/PyTorchBoot.git
cd PyTorchBoot
pip install .
cd ..
```
### 1.3 Install Blender (Optional)
If you want to render your own dataset as described in [section 2. Render Datasets](#2-render-datasets), you'll need to install Blender version 4.0 from [Blender Release](https://download.blender.org/release/Blender4.0/). Here is an example of installing Blender on Ubuntu:
```bash
wget https://download.blender.org/release/Blender4.0/blender-4.0.2-linux-x64.tar.xz
tar -xvf blender-4.0.2-linux-x64.tar.xz
```
If blender is not in your PATH, you can add it by:
```bash
export PATH=$PATH:/path/to/blender/blender-4.0.2-linux-x64
```
To run the blender script, you need to install the `pyyaml` and `scipy` package into your blender python environment. Run the following command to print the python path of your blender:
```bash
./blender -b --python-expr "import sys; print(sys.executable)"
```
Then copy the python path `/path/to/blender_python` shown in the output and run the following command to install the packages:
```bash
/path/to/blender_python -m pip install pyyaml scipy
```
### 1.4 Install Blender Render Script (Optional)
Clone the script from [nbv_rec_blender_render](https://git.hofee.top/hofee/nbv_rec_blender_render.git) and rename it to `blender`:
```bash
git clone https://git.hofee.top/hofee/nbv_rec_blender_render.git
mv nbv_rec_blender_render blender
```
### 1.5 Check Dependencies
Switch to the project root directory and run `pytorch-boot scan` or `ptb scan` to check if all dependencies are installed:
```bash
cd nbv_reconstruction
pytorch-boot scan
# or
ptb scan
```
If you see project structure information in the output, it means all dependencies are correctly installed. Otherwise, you may need to run `pip install xxx` to install the missing packages.
## 2. Render Datasets (Optional)
### 2.1 Download Object Mesh Models
Download the mesh models divided into three parts from:
- [object_meshes_part1.zip](None)
- [object_meshes_part2.zip](https://pan.baidu.com/s/1pBPhrFtBwEGp1g4vwsLIxA?pwd=1234)
- [object_meshes_part3.zip](https://pan.baidu.com/s/1peE8HqFFL0qNFhM5OC69gA?pwd=1234)
or download the whole dataset from [object_meshes.zip](https://pan.baidu.com/s/1ilWWgzg_l7_pPBv64eSgzA?pwd=1234)
Download the table model from [table.obj](https://pan.baidu.com/s/1sjjiID25Es_kmcdUIjU_Dw?pwd=1234)
### 2.2 Set Render Configurations
Open file `configs/local/view_generate_config.yaml` and modify the parameters to fit your needs. You are required to at least set the following parameters in `runner-generate`:
- `object_dir`: the directory of the downloaded object mesh models
- `output_dir`: the directory to save the rendered dataset
- `table_model_path`: the path of the downloaded table model
### 2.3 Render Dataset
There are two ways to render the dataset:
#### 2.3.1 Render with Visual Monitoring
If you want to visually monitor the rendering progress and machine resource usage:
1. In the terminal, run:
```
ptb ui
```
2. Open your browser and visit http://localhost:5000
3. Navigate to `Project Dashboard - Project Structure - Applications - generate_view`
4. Click the `Run` button to execute the rendering script
#### 2.3.2 Render in Terminal
If you don't need visual monitoring and prefer to run the rendering process directly in the terminal, simply run:
```
ptb run generate_view
```
This command will start the rendering process without launching the UI.
## 3. Preprocess
⚠️ The preprocessing code is currently not managed by `PytorchBoot`. To run the preprocessing:
1. Open the `./preprocess/preprocessor.py` file.
2. Locate the `if __name__ == "__main__":` block at the bottom of the file.
3. Specify the dataset folder by setting `root = "path/to/your/dataset"`.
4. Run the preprocessing script directly:
```
python ./preprocess/preprocessor.py
```
This will preprocess the data in the specified dataset folder.
## 4. Generate Strategy Label
### 4.1 Set Configuration
Open the file `configs/local/strategy_generate_config.yaml` and modify the parameters to fit your needs. You are required to at least set the following parameter:
- `datasets.OmniObject3d.root_dir`: the directory of your dataset
### 4.2 Generate Strategy Label
There are two ways to generate the strategy label:
#### 4.2.1 Generate with Visual Monitoring
If you want to visually monitor the generation progress and machine resource usage:
1. In the terminal, run:
```
ptb ui
```
2. Open your browser and visit http://localhost:5000
3. Navigate to Project Dashboard - Project Structure - Applications - generate_strategy
4. Click the `Run` button to execute the generation script
#### 4.2.2 Generate in Terminal
If you don't need visual monitoring and prefer to run the generation process directly in the terminal, simply run:
```
ptb run generate_strategy
```
This command will start the strategy label generation process without launching the UI.
## 5. Train
### 5.1 Set Configuration
Open the file `configs/local/train_config.yaml` and modify the parameters to fit your needs. You are required to at least set the following parameters in the `experiment` section:
```yaml
experiment:
name: your_experiment_name
root_dir: path/to/your/experiment_dir
use_checkpoint: False # if True, the checkpoint will be loaded
epoch: 600 # specific epoch to load, -1 stands for last epoch
max_epochs: 5000 # maximum epochs to train
save_checkpoint_interval: 1 # save checkpoint interval
test_first: True # if True, test process will be performed before training at each epoch
```
Adjust these parameters according to your training requirements.
### 5.2 Start Training
There are two ways to start the training process:
#### 5.2.1 Train with Visual Monitoring
If you want to visually monitor the training progress and machine resource usage:
1. In the terminal, run:
```
ptb ui
```
2. Open your browser and visit http://localhost:5000
3. Navigate to Project Dashboard - Project Structure - Applications - train
4. Click the `Run` button to start the training process
#### 5.2.2 Train in Terminal
If you don't need visual monitoring and prefer to run the training process directly in the terminal, simply run:
```
ptb run train
```
This command will start the training process without launching the UI.
## 6. Evaluation
...

View File

@@ -5,5 +5,5 @@ from runners.strategy_generator import StrategyGenerator
class DataGenerateApp:
@staticmethod
def start():
StrategyGenerator("configs/strategy_generate_config.yaml").run()
StrategyGenerator("configs/local/strategy_generate_config.yaml").run()

View File

@@ -13,4 +13,4 @@ class ViewGenerateApp:
Trainer("path_to_your_train_config").run()
Evaluator("path_to_your_eval_config").run()
'''
ViewGenerator("./configs/view_generate_config.yaml").run()
ViewGenerator("configs/local/view_generate_config.yaml").run()

31
app_inference.py Normal file
View File

@@ -0,0 +1,31 @@
from PytorchBoot.application import PytorchBootApplication
from runners.inferencer import Inferencer
from runners.inference_server import InferencerServer
@PytorchBootApplication("inference")
class InferenceApp:
@staticmethod
def start():
'''
call default or your custom runners here, code will be executed
automatically when type "pytorch-boot run" or "ptb run" in terminal
example:
Trainer("path_to_your_train_config").run()
Evaluator("path_to_your_eval_config").run()
'''
Inferencer("./configs/local/inference_config.yaml").run()
@PytorchBootApplication("server")
class InferenceServerApp:
@staticmethod
def start():
'''
call default or your custom runners here, code will be executed
automatically when type "pytorch-boot run" or "ptb run" in terminal
example:
Trainer("path_to_your_train_config").run()
Evaluator("path_to_your_eval_config").run()
'''
InferencerServer("./configs/server/server_inference_server_config.yaml").run()

11
app_sim.py Normal file
View File

@@ -0,0 +1,11 @@
from PytorchBoot.application import PytorchBootApplication
from runners.simulator import Simulator
@PytorchBootApplication("sim")
class SimulateApp:
@staticmethod
def start():
simulator = Simulator("configs/local/simulation_config.yaml")
simulator.run("create")
simulator.run("simulate")

View File

@@ -5,5 +5,5 @@ from runners.data_spliter import DataSpliter
class DataSplitApp:
@staticmethod
def start():
DataSpliter("configs/split_dataset_config.yaml").run()
DataSpliter("configs/server/server_split_dataset_config.yaml").run()

View File

@@ -5,4 +5,4 @@ from PytorchBoot.runners.trainer import DefaultTrainer
class TrainApp:
@staticmethod
def start():
DefaultTrainer("configs/train_config.yaml").run()
DefaultTrainer("configs/server/server_train_config.yaml").run()

162
beans/predict_result.py Normal file
View File

@@ -0,0 +1,162 @@
import numpy as np
from sklearn.cluster import DBSCAN
class PredictResult:
def __init__(self, raw_predict_result, input_pts=None, cluster_params=dict(eps=0.5, min_samples=2)):
self.input_pts = input_pts
self.cluster_params = cluster_params
self.sampled_9d_pose = raw_predict_result
self.sampled_matrix_pose = self.get_sampled_matrix_pose()
self.distance_matrix = self.calculate_distance_matrix()
self.clusters = self.get_cluster_result()
self.candidate_matrix_poses = self.get_candidate_poses()
self.candidate_9d_poses = [np.concatenate((self.matrix_to_rotation_6d_numpy(matrix[:3,:3]), matrix[:3,3].reshape(-1,)), axis=-1) for matrix in self.candidate_matrix_poses]
self.cluster_num = len(self.clusters)
@staticmethod
def rotation_6d_to_matrix_numpy(d6):
a1, a2 = d6[:3], d6[3:]
b1 = a1 / np.linalg.norm(a1)
b2 = a2 - np.dot(b1, a2) * b1
b2 = b2 / np.linalg.norm(b2)
b3 = np.cross(b1, b2)
return np.stack((b1, b2, b3), axis=-2)
@staticmethod
def matrix_to_rotation_6d_numpy(matrix):
return np.copy(matrix[:2, :]).reshape((6,))
def __str__(self):
info = "Predict Result:\n"
info += f" Predicted pose number: {len(self.sampled_9d_pose)}\n"
info += f" Cluster number: {self.cluster_num}\n"
for i, cluster in enumerate(self.clusters):
info += f" - Cluster {i} size: {len(cluster)}\n"
max_distance = np.max(self.distance_matrix[self.distance_matrix != 0])
min_distance = np.min(self.distance_matrix[self.distance_matrix != 0])
info += f" Max distance: {max_distance}\n"
info += f" Min distance: {min_distance}\n"
return info
def get_sampled_matrix_pose(self):
sampled_matrix_pose = []
for pose in self.sampled_9d_pose:
rotation = pose[:6]
translation = pose[6:]
pose = self.rotation_6d_to_matrix_numpy(rotation)
pose = np.concatenate((pose, translation.reshape(-1, 1)), axis=-1)
pose = np.concatenate((pose, np.array([[0, 0, 0, 1]])), axis=-2)
sampled_matrix_pose.append(pose)
return np.array(sampled_matrix_pose)
def rotation_distance(self, R1, R2):
R = np.dot(R1.T, R2)
trace = np.trace(R)
angle = np.arccos(np.clip((trace - 1) / 2, -1, 1))
return angle
def calculate_distance_matrix(self):
n = len(self.sampled_matrix_pose)
dist_matrix = np.zeros((n, n))
for i in range(n):
for j in range(n):
dist_matrix[i, j] = self.rotation_distance(self.sampled_matrix_pose[i][:3, :3], self.sampled_matrix_pose[j][:3, :3])
return dist_matrix
def cluster_rotations(self):
clustering = DBSCAN(eps=self.cluster_params['eps'], min_samples=self.cluster_params['min_samples'], metric='precomputed')
labels = clustering.fit_predict(self.distance_matrix)
return labels
def get_cluster_result(self):
labels = self.cluster_rotations()
cluster_num = len(set(labels)) - (1 if -1 in labels else 0)
clusters = []
for _ in range(cluster_num):
clusters.append([])
for matrix_pose, label in zip(self.sampled_matrix_pose, labels):
if label != -1:
clusters[label].append(matrix_pose)
clusters.sort(key=len, reverse=True)
return clusters
def get_center_matrix_pose_from_cluster(self, cluster):
min_total_distance = float('inf')
center_matrix_pose = None
for matrix_pose in cluster:
total_distance = 0
for other_matrix_pose in cluster:
rot_distance = self.rotation_distance(matrix_pose[:3, :3], other_matrix_pose[:3, :3])
total_distance += rot_distance
if total_distance < min_total_distance:
min_total_distance = total_distance
center_matrix_pose = matrix_pose
return center_matrix_pose
def get_candidate_poses(self):
candidate_poses = []
for cluster in self.clusters:
candidate_poses.append(self.get_center_matrix_pose_from_cluster(cluster))
return candidate_poses
def visualize(self):
import plotly.graph_objects as go
fig = go.Figure()
if self.input_pts is not None:
fig.add_trace(go.Scatter3d(
x=self.input_pts[:, 0], y=self.input_pts[:, 1], z=self.input_pts[:, 2],
mode='markers', marker=dict(size=1, color='gray', opacity=0.5), name='Input Points'
))
colors = ['aggrnyl', 'agsunset', 'algae', 'amp', 'armyrose', 'balance',
'blackbody', 'bluered', 'blues', 'blugrn', 'bluyl', 'brbg']
for i, cluster in enumerate(self.clusters):
color = colors[i]
candidate_pose = self.candidate_matrix_poses[i]
origin_candidate = candidate_pose[:3, 3]
z_axis_candidate = candidate_pose[:3, 2]
for pose in cluster:
origin = pose[:3, 3]
z_axis = pose[:3, 2]
fig.add_trace(go.Cone(
x=[origin[0]], y=[origin[1]], z=[origin[2]],
u=[z_axis[0]], v=[z_axis[1]], w=[z_axis[2]],
colorscale=color,
sizemode="absolute", sizeref=0.05, anchor="tail", showscale=False
))
fig.add_trace(go.Cone(
x=[origin_candidate[0]], y=[origin_candidate[1]], z=[origin_candidate[2]],
u=[z_axis_candidate[0]], v=[z_axis_candidate[1]], w=[z_axis_candidate[2]],
colorscale=color,
sizemode="absolute", sizeref=0.1, anchor="tail", showscale=False
))
fig.update_layout(
title="Clustered Poses and Input Points",
scene=dict(
xaxis_title='X',
yaxis_title='Y',
zaxis_title='Z'
),
margin=dict(l=0, r=0, b=0, t=40),
scene_camera=dict(eye=dict(x=1.25, y=1.25, z=1.25))
)
fig.show()
if __name__ == "__main__":
step = 0
raw_predict_result = np.load(f"inference_result_pack/inference_result_pack/{step}/all_pred_pose_9d.npy")
input_pts = np.loadtxt(f"inference_result_pack/inference_result_pack/{step}/input_pts.txt")
print(raw_predict_result.shape)
predict_result = PredictResult(raw_predict_result, input_pts, cluster_params=dict(eps=0.25, min_samples=3))
print(predict_result)
print(len(predict_result.candidate_matrix_poses))
print(predict_result.distance_matrix)
#import ipdb; ipdb.set_trace()
predict_result.visualize()

View File

@@ -0,0 +1,93 @@
runner:
general:
seed: 0
device: cuda
cuda_visible_devices: "0,1,2,3,4,5,6,7"
experiment:
name: train_ab_global_only_p++_wp
root_dir: "experiments"
epoch: 922 # -1 stands for last epoch
test:
dataset_list:
- OmniObject3d_test
blender_script_path: "/media/hofee/data/project/python/nbv_reconstruction/blender/data_renderer.py"
output_dir: "/media/hofee/data/data/p++_wp"
pipeline: nbv_reconstruction_pipeline
voxel_size: 0.003
min_new_area: 1.0
dataset:
# OmniObject3d_train:
# root_dir: "C:\\Document\\Datasets\\inference_test1"
# model_dir: "C:\\Document\\Datasets\\scaled_object_meshes"
# source: seq_reconstruction_dataset_preprocessed
# split_file: "C:\\Document\\Datasets\\data_list\\sample.txt"
# type: test
# filter_degree: 75
# ratio: 1
# batch_size: 1
# num_workers: 12
# pts_num: 8192
# load_from_preprocess: True
OmniObject3d_test:
root_dir: "/media/hofee/data/data/new_testset_output"
model_dir: "/media/hofee/data/data/scaled_object_meshes"
source: seq_reconstruction_dataset_preprocessed
# split_file: "C:\\Document\\Datasets\\data_list\\OmniObject3d_test.txt"
type: test
filter_degree: 75
eval_list:
- pose_diff
- coverage_rate_increase
ratio: 0.1
batch_size: 1
num_workers: 12
pts_num: 8192
load_from_preprocess: True
pipeline:
nbv_reconstruction_pipeline:
modules:
pts_encoder: pointnet++_encoder
seq_encoder: transformer_seq_encoder
pose_encoder: pose_encoder
view_finder: gf_view_finder
eps: 1e-5
global_scanned_feat: True
module:
pointnet++_encoder:
in_dim: 3
params_name: light
pointnet_encoder:
in_dim: 3
out_dim: 1024
global_feat: True
feature_transform: False
transformer_seq_encoder:
embed_dim: 256
num_heads: 4
ffn_dim: 256
num_layers: 3
output_dim: 1024
gf_view_finder:
t_feat_dim: 128
pose_feat_dim: 256
main_feat_dim: 2048
regression_head: Rx_Ry_and_T
pose_mode: rot_matrix
per_point_feature: False
sample_mode: ode
sampling_steps: 500
sde_mode: ve
pose_encoder:
pose_dim: 9
out_dim: 256
pts_num_encoder:
out_dim: 64

View File

@@ -0,0 +1,36 @@
runner:
general:
seed: 0
device: cuda
cuda_visible_devices: "0,1,2,3,4,5,6,7"
experiment:
name: simulation_debug
root_dir: "experiments"
simulation:
robot:
urdf_path: "assets/franka_panda/panda.urdf"
initial_position: [0, 0, 0] # 机械臂基座位置
initial_orientation: [0, 0, 0] # 机械臂基座朝向(欧拉角)
turntable:
radius: 0.3 # 转盘半径(米)
height: 0.1 # 转盘高度
center_position: [0.8, 0, 0.4]
target:
obj_dir: /media/hofee/data/project/python/nbv_reconstruction/nbv_reconstruction/assets/object_meshes
obj_name: "google_scan-box_0185"
scale: 1.0 # 缩放系数
mass: 0.1 # 质量(kg)
rgba_color: [0.8, 0.8, 0.8, 1.0] # 目标物体颜色
camera:
width: 640
height: 480
fov: 40
near: 0.01
far: 5.0
displaytable:

View File

@@ -0,0 +1,27 @@
runner:
general:
seed: 0
device: cpu
cuda_visible_devices: "0,1,2,3,4,5,6,7"
experiment:
name: debug
root_dir: "experiments"
generate:
voxel_threshold: 0.003
overlap_area_threshold: 30
compute_with_normal: False
scan_points_threshold: 10
overwrite: False
seq_num: 10
dataset_list:
- OmniObject3d
datasets:
OmniObject3d:
root_dir: /media/hofee/data/data/test_bottle/view
from: 0
to: -1 # ..-1 means end

View File

@@ -1,19 +1,19 @@
runner:
general:
seed: 0
seed: 1
device: cuda
cuda_visible_devices: "0,1,2,3,4,5,6,7"
parallel: False
experiment:
name: test_overfit
name: debug
root_dir: "experiments"
use_checkpoint: False
epoch: -1 # -1 stands for last epoch
epoch: 600 # -1 stands for last epoch
max_epochs: 5000
save_checkpoint_interval: 1
test_first: False
test_first: True
train:
optimizer:
@@ -32,23 +32,31 @@ runner:
dataset:
OmniObject3d_train:
root_dir: "/media/hofee/data/project/python/nbv_reconstruction/sample_for_training/scenes"
model_dir: "/media/hofee/data/data/scaled_object_meshes"
source: nbv_reconstruction_dataset
split_file: "/media/hofee/data/project/python/nbv_reconstruction/sample_for_training/OmniObject3d_train.txt"
ratio: 1.0
type: train
ratio: 1
batch_size: 1
num_workers: 12
pts_num: 4096
load_from_preprocess: True
OmniObject3d_test:
root_dir: "/media/hofee/data/project/python/nbv_reconstruction/sample_for_training/scenes"
model_dir: "/media/hofee/data/data/scaled_object_meshes"
source: nbv_reconstruction_dataset
split_file: "/media/hofee/data/project/python/nbv_reconstruction/sample_for_training/OmniObject3d_train.txt"
type: test
filter_degree: 75
eval_list:
- pose_diff
- coverage_rate_increase
ratio: 0.1
batch_size: 1
num_workers: 1
num_workers: 12
pts_num: 4096
load_from_preprocess: True
pipeline:
nbv_reconstruction_pipeline:
@@ -68,15 +76,15 @@ module:
transformer_seq_encoder:
pts_embed_dim: 1024
pose_embed_dim: 256
num_heads: 2 # 4
ffn_dim: 128 # 256
num_layers: 2 # 3
output_dim: 1024 # 2048
num_heads: 4
ffn_dim: 256
num_layers: 3
output_dim: 2048
gf_view_finder:
t_feat_dim: 128
pose_feat_dim: 256
main_feat_dim: 1024 # 2048
main_feat_dim: 3072
regression_head: Rx_Ry_and_T
pose_mode: rot_matrix
per_point_feature: False
@@ -93,3 +101,5 @@ loss_function:
evaluation_method:
pose_diff:
coverage_rate_increase:
renderer_path: "/media/hofee/data/project/python/nbv_reconstruction/blender/data_renderer.py"

View File

@@ -7,35 +7,34 @@ runner:
name: debug
root_dir: experiments
generate:
object_dir: /media/hofee/data/data/scaled_object_meshes
port: 5002
from: 0
to: 50 # -1 means all
object_dir: /media/hofee/data/data/test_bottle/bottle_mesh
table_model_path: /media/hofee/data/data/others/table.obj
output_dir: /media/hofee/repository/nbv_reconstruction_data_512
output_dir: /media/hofee/data/data/test_bottle/view
binocular_vision: true
plane_size: 10
max_views: 512
min_views: 64
min_views: 128
random_view_ratio: 0.002
min_cam_table_included_degree: 20
max_diag: 0.7
min_diag: 0.1
min_diag: 0.01
random_config:
display_table:
min_height: 0.05
max_height: 0.15
min_radius: 0.3
max_radius: 0.5
min_R: 0.05
max_R: 0.3
min_G: 0.05
max_G: 0.3
min_B: 0.05
max_B: 0.3
min_radius: 0.2
max_radius: 0.3
display_object:
min_x: 0
max_x: 0.03
max_x: 0.05
min_y: 0
max_y: 0.03
max_y: 0.05
min_z: 0.01
max_z: 0.01
random_rotation_ratio: 0.3
random_rotation_ratio: 0.0
random_objects:
num: 4
cluster: 0.9
@@ -44,8 +43,8 @@ runner:
near_plane: 0.01
far_plane: 5
fov_vertical: 25
resolution: [1280,800]
eye_distance: 0.15
resolution: [640,400]
eye_distance: 0.10
eye_angle: 25
Light:
location: [0,0,3.5]

View File

@@ -0,0 +1,92 @@
runner:
general:
seed: 1
device: cuda
cuda_visible_devices: "0,1,2,3,4,5,6,7"
experiment:
name: w_gf_wo_lf_full_debug
root_dir: "experiments"
epoch: 1 # -1 stands for last epoch
test:
dataset_list:
- OmniObject3d_train
blender_script_path: ""
output_dir: ""
pipeline: nbv_reconstruction_global_pts_pipeline
dataset:
OmniObject3d_train:
root_dir: "/home/data/hofee/project/nbv_rec/data/nbv_rec_data_512_preproc_npy"
model_dir: "/home/data/hofee/project/nbv_rec/data/scaled_object_meshes"
source: seq_nbv_reconstruction_dataset
split_file: "/home/data/hofee/project/nbv_rec/data/OmniObject3d_train.txt"
type: test
filter_degree: 75
ratio: 1
batch_size: 1
num_workers: 12
pts_num: 4096
load_from_preprocess: True
pipeline:
nbv_reconstruction_local_pts_pipeline:
modules:
pts_encoder: pointnet_encoder
seq_encoder: transformer_seq_encoder
pose_encoder: pose_encoder
view_finder: gf_view_finder
eps: 1e-5
global_scanned_feat: False
nbv_reconstruction_global_pts_pipeline:
modules:
pts_encoder: pointnet_encoder
pose_seq_encoder: transformer_pose_seq_encoder
pose_encoder: pose_encoder
view_finder: gf_view_finder
eps: 1e-5
global_scanned_feat: True
module:
pointnet_encoder:
in_dim: 3
out_dim: 1024
global_feat: True
feature_transform: False
transformer_seq_encoder:
pts_embed_dim: 1024
pose_embed_dim: 256
num_heads: 4
ffn_dim: 256
num_layers: 3
output_dim: 2048
transformer_pose_seq_encoder:
pose_embed_dim: 256
num_heads: 4
ffn_dim: 256
num_layers: 3
output_dim: 1024
gf_view_finder:
t_feat_dim: 128
pose_feat_dim: 256
main_feat_dim: 2048
regression_head: Rx_Ry_and_T
pose_mode: rot_matrix
per_point_feature: False
sample_mode: ode
sampling_steps: 500
sde_mode: ve
pose_encoder:
pose_dim: 9
out_dim: 256

View File

@@ -0,0 +1,53 @@
runner:
general:
seed: 0
device: cuda
cuda_visible_devices: "0,1,2,3,4,5,6,7"
experiment:
name: train_ab_global_only
root_dir: "experiments"
epoch: -1 # -1 stands for last epoch
pipeline: nbv_reconstruction_pipeline
voxel_size: 0.003
pipeline:
nbv_reconstruction_pipeline:
modules:
pts_encoder: pointnet_encoder
seq_encoder: transformer_seq_encoder
pose_encoder: pose_encoder
view_finder: gf_view_finder
eps: 1e-5
global_scanned_feat: True
module:
pointnet_encoder:
in_dim: 3
out_dim: 1024
global_feat: True
feature_transform: False
transformer_seq_encoder:
embed_dim: 256
num_heads: 4
ffn_dim: 256
num_layers: 3
output_dim: 1024
gf_view_finder:
t_feat_dim: 128
pose_feat_dim: 256
main_feat_dim: 2048
regression_head: Rx_Ry_and_T
pose_mode: rot_matrix
per_point_feature: False
sample_mode: ode
sampling_steps: 500
sde_mode: ve
pose_encoder:
pose_dim: 9
out_dim: 256
pts_num_encoder:
out_dim: 64

View File

@@ -0,0 +1,22 @@
runner:
general:
seed: 0
device: cpu
cuda_visible_devices: "0,1,2,3,4,5,6,7"
experiment:
name: debug
root_dir: "experiments"
split: #
root_dir: "/data/hofee/data/packed_preprocessed_data"
type: "unseen_instance" # "unseen_category"
datasets:
OmniObject3d_train:
path: "/data/hofee/data/OmniObject3d_train.txt"
ratio: 0.9
OmniObject3d_test:
path: "/data/hofee/data/OmniObject3d_test.txt"
ratio: 0.1

View File

@@ -0,0 +1,134 @@
runner:
general:
seed: 0
device: cuda
cuda_visible_devices: "0"
parallel: False
experiment:
name: train_ab_global_only_with_wp_p++_strong
root_dir: "experiments"
use_checkpoint: False
epoch: -1 # -1 stands for last epoch
max_epochs: 5000
save_checkpoint_interval: 1
test_first: False
train:
optimizer:
type: Adam
lr: 0.0001
losses:
- gf_loss
dataset: OmniObject3d_train
test:
frequency: 3 # test frequency
dataset_list:
- OmniObject3d_test
- OmniObject3d_val
pipeline: nbv_reconstruction_pipeline
dataset:
OmniObject3d_train:
root_dir: "/data/hofee/data/new_full_data"
model_dir: "../data/scaled_object_meshes"
source: nbv_reconstruction_dataset
split_file: "/data/hofee/data/new_full_data_list/OmniObject3d_train.txt"
type: train
cache: True
ratio: 1
batch_size: 64
num_workers: 128
pts_num: 8192
load_from_preprocess: True
OmniObject3d_test:
root_dir: "/data/hofee/data/new_full_data"
model_dir: "../data/scaled_object_meshes"
source: nbv_reconstruction_dataset
split_file: "/data/hofee/data/new_full_data_list/OmniObject3d_test.txt"
type: test
cache: True
filter_degree: 75
eval_list:
- pose_diff
ratio: 1
batch_size: 80
num_workers: 12
pts_num: 8192
load_from_preprocess: True
OmniObject3d_val:
root_dir: "/data/hofee/data/new_full_data"
model_dir: "../data/scaled_object_meshes"
source: nbv_reconstruction_dataset
split_file: "/data/hofee/data/new_full_data_list/OmniObject3d_train.txt"
type: test
cache: True
filter_degree: 75
eval_list:
- pose_diff
ratio: 0.1
batch_size: 80
num_workers: 12
pts_num: 8192
load_from_preprocess: True
pipeline:
nbv_reconstruction_pipeline:
modules:
pts_encoder: pointnet++_encoder
seq_encoder: transformer_seq_encoder
pose_encoder: pose_encoder
view_finder: gf_view_finder
eps: 1e-5
global_scanned_feat: True
module:
pointnet_encoder:
in_dim: 3
out_dim: 1024
global_feat: True
feature_transform: False
pointnet++_encoder:
in_dim: 3
params_name: strong
transformer_seq_encoder:
embed_dim: 256
num_heads: 4
ffn_dim: 256
num_layers: 3
output_dim: 1024
gf_view_finder:
t_feat_dim: 128
pose_feat_dim: 256
main_feat_dim: 5120
regression_head: Rx_Ry_and_T
pose_mode: rot_matrix
per_point_feature: False
sample_mode: ode
sampling_steps: 500
sde_mode: ve
pose_encoder:
pose_dim: 9
out_dim: 256
pts_num_encoder:
out_dim: 64
loss_function:
gf_loss:
evaluation_method:
pose_diff:
coverage_rate_increase:
renderer_path: "../blender/data_renderer.py"

View File

@@ -1,32 +0,0 @@
runner:
general:
seed: 0
device: cpu
cuda_visible_devices: "0,1,2,3,4,5,6,7"
experiment:
name: debug
root_dir: "experiments"
generate:
voxel_threshold: 0.01
overlap_threshold: 0.5
filter_degree: 75
to_specified_dir: True # if True, output_dir is used, otherwise, root_dir is used
save_points: False
save_best_combined_points: True
save_mesh: True
overwrite: False
dataset_list:
- OmniObject3d
datasets:
OmniObject3d:
#"/media/hofee/data/data/temp_output"
root_dir: "/media/hofee/data/project/python/nbv_reconstruction/sample_for_training/scenes"
model_dir: "/media/hofee/data/data/scaled_object_meshes"
#output_dir: "/media/hofee/data/data/label_output"

View File

@@ -1,171 +0,0 @@
import numpy as np
from PytorchBoot.dataset import BaseDataset
import PytorchBoot.stereotype as stereotype
import torch
import sys
sys.path.append(r"/media/hofee/data/project/python/nbv_reconstruction/nbv_reconstruction")
from utils.data_load import DataLoadUtil
from utils.pose import PoseUtil
from utils.pts import PtsUtil
@stereotype.dataset("nbv_reconstruction_dataset")
class NBVReconstructionDataset(BaseDataset):
def __init__(self, config):
super(NBVReconstructionDataset, self).__init__(config)
self.config = config
self.root_dir = config["root_dir"]
self.split_file_path = config["split_file"]
self.scene_name_list = self.load_scene_name_list()
self.datalist = self.get_datalist()
self.pts_num = config["pts_num"]
def load_scene_name_list(self):
scene_name_list = []
with open(self.split_file_path, "r") as f:
for line in f:
scene_name = line.strip()
scene_name_list.append(scene_name)
return scene_name_list
def get_datalist(self):
datalist = []
for scene_name in self.scene_name_list:
label_path = DataLoadUtil.get_label_path(self.root_dir, scene_name)
label_data = DataLoadUtil.load_label(label_path)
for data_pair in label_data["data_pairs"]:
scanned_views = data_pair[0]
next_best_view = data_pair[1]
max_coverage_rate = label_data["max_coverage_rate"]
datalist.append(
{
"scanned_views": scanned_views,
"next_best_view": next_best_view,
"max_coverage_rate": max_coverage_rate,
"scene_name": scene_name,
}
)
return datalist
def __getitem__(self, index):
data_item_info = self.datalist[index]
scanned_views = data_item_info["scanned_views"]
nbv = data_item_info["next_best_view"]
max_coverage_rate = data_item_info["max_coverage_rate"]
scene_name = data_item_info["scene_name"]
scanned_views_pts, scanned_coverages_rate, scanned_n_to_1_pose = [], [], []
first_frame_idx = scanned_views[0][0]
first_cam_info = DataLoadUtil.load_cam_info(DataLoadUtil.get_path(self.root_dir, scene_name, first_frame_idx), binocular=True)
first_frame_to_world = first_cam_info["cam_to_world"]
for view in scanned_views:
frame_idx = view[0]
coverage_rate = view[1]
view_path = DataLoadUtil.get_path(self.root_dir, scene_name, frame_idx)
cam_info = DataLoadUtil.load_cam_info(view_path, binocular=True)
n_to_world_pose = cam_info["cam_to_world"]
nR_to_world_pose = cam_info["cam_to_world_R"]
n_to_1_pose = np.dot(np.linalg.inv(first_frame_to_world), n_to_world_pose)
nR_to_1_pose = np.dot(np.linalg.inv(first_frame_to_world), nR_to_world_pose)
depth_L, depth_R = DataLoadUtil.load_depth(view_path, cam_info['near_plane'], cam_info['far_plane'], binocular=True)
point_cloud_L = DataLoadUtil.get_point_cloud(depth_L, cam_info['cam_intrinsic'], n_to_1_pose)['points_world']
point_cloud_R = DataLoadUtil.get_point_cloud(depth_R, cam_info['cam_intrinsic'], nR_to_1_pose)['points_world']
point_cloud_L = PtsUtil.random_downsample_point_cloud(point_cloud_L, 65536)
point_cloud_R = PtsUtil.random_downsample_point_cloud(point_cloud_R, 65536)
overlap_points = DataLoadUtil.get_overlapping_points(point_cloud_L, point_cloud_R)
downsampled_target_point_cloud = PtsUtil.random_downsample_point_cloud(overlap_points, self.pts_num)
scanned_views_pts.append(downsampled_target_point_cloud)
scanned_coverages_rate.append(coverage_rate)
n_to_1_6d = PoseUtil.matrix_to_rotation_6d_numpy(np.asarray(n_to_1_pose[:3,:3]))
n_to_1_trans = n_to_1_pose[:3,3]
n_to_1_9d = np.concatenate([n_to_1_6d, n_to_1_trans], axis=0)
scanned_n_to_1_pose.append(n_to_1_9d)
nbv_idx, nbv_coverage_rate = nbv[0], nbv[1]
nbv_path = DataLoadUtil.get_path(self.root_dir, scene_name, nbv_idx)
cam_info = DataLoadUtil.load_cam_info(nbv_path)
best_frame_to_world = cam_info["cam_to_world"]
best_to_1_pose = np.dot(np.linalg.inv(first_frame_to_world), best_frame_to_world)
best_to_1_6d = PoseUtil.matrix_to_rotation_6d_numpy(np.asarray(best_to_1_pose[:3,:3]))
best_to_1_trans = best_to_1_pose[:3,3]
best_to_1_9d = np.concatenate([best_to_1_6d, best_to_1_trans], axis=0)
data_item = {
"scanned_pts": np.asarray(scanned_views_pts,dtype=np.float32),
"scanned_coverage_rate": scanned_coverages_rate,
"scanned_n_to_1_pose_9d": np.asarray(scanned_n_to_1_pose,dtype=np.float32),
"best_coverage_rate": nbv_coverage_rate,
"best_to_1_pose_9d": np.asarray(best_to_1_9d,dtype=np.float32),
"max_coverage_rate": max_coverage_rate,
"scene_name": scene_name
}
return data_item
def __len__(self):
return len(self.datalist)
def get_collate_fn(self):
def collate_fn(batch):
collate_data = {}
collate_data["scanned_pts"] = [torch.tensor(item['scanned_pts']) for item in batch]
collate_data["scanned_n_to_1_pose_9d"] = [torch.tensor(item['scanned_n_to_1_pose_9d']) for item in batch]
collate_data["best_to_1_pose_9d"] = torch.stack([torch.tensor(item['best_to_1_pose_9d']) for item in batch])
for key in batch[0].keys():
if key not in ["scanned_pts", "scanned_n_to_1_pose_9d", "best_to_1_pose_9d"]:
collate_data[key] = [item[key] for item in batch]
return collate_data
return collate_fn
if __name__ == "__main__":
import torch
seed = 0
torch.manual_seed(seed)
np.random.seed(seed)
config = {
"root_dir": "/media/hofee/data/project/python/nbv_reconstruction/sample_for_training/scenes",
"split_file": "/media/hofee/data/project/python/nbv_reconstruction/sample_for_training/OmniObject3d_train.txt",
"ratio": 0.5,
"batch_size": 2,
"num_workers": 0,
"pts_num": 32684
}
ds = NBVReconstructionDataset(config)
print(len(ds))
#ds.__getitem__(10)
dl = ds.get_loader(shuffle=True)
for idx, data in enumerate(dl):
data = ds.process_batch(data, "cuda:0")
print(data)
break
#
# for idx, data in enumerate(dl):
# cnt=0
# print(data["scene_name"])
# print(data["scanned_coverage_rate"])
# print(data["best_coverage_rate"])
# for pts in data["scanned_pts"][0]:
# #np.savetxt(f"pts_{cnt}.txt", pts)
# cnt+=1
# #np.savetxt("best_pts.txt", best_pts)
# for key, value in data.items():
# if isinstance(value, torch.Tensor):
# print(key, ":" ,value.shape)
# else:
# print(key, ":" ,len(value))
# if key == "scanned_n_to_1_pose_9d":
# for val in value:
# print(val.shape)
# if key == "scanned_pts":
# print("scanned_pts")
# for val in value:
# print(val.shape)
# cnt = 0
# for v in val:
# import ipdb;ipdb.set_trace()
# np.savetxt(f"pts_{cnt}.txt", v)
# cnt+=1
# print()

View File

@@ -1,10 +1,14 @@
import torch
import numpy as np
from utils.reconstruction import ReconstructionUtil
from utils.pose import PoseUtil
from utils.pts import PtsUtil
from utils.render import RenderUtil
import PytorchBoot.stereotype as stereotype
import PytorchBoot.namespace as namespace
from PytorchBoot.utils.log_util import Log
def get_view_data(cam_pose, scene_name):
pass
@stereotype.evaluation_method("pose_diff")
class PoseDiff:
@@ -16,7 +20,7 @@ class PoseDiff:
rot_angle_list = []
trans_dist_list = []
for output, data in zip(output_list, data_list):
gt_pose_9d = data['best_to_1_pose_9d']
gt_pose_9d = data['best_to_world_pose_9d']
pred_pose_9d = output['pred_pose_9d']
gt_rot_6d = gt_pose_9d[:, :6]
gt_trans = gt_pose_9d[:, 6:]
@@ -25,8 +29,9 @@ class PoseDiff:
gt_rot_mat = PoseUtil.rotation_6d_to_matrix_tensor_batch(gt_rot_6d)
pred_rot_mat = PoseUtil.rotation_6d_to_matrix_tensor_batch(pred_rot_6d)
rotation_angles = PoseUtil.rotation_angle_distance(gt_rot_mat, pred_rot_mat)
rot_angle_list.extend(list(rotation_angles))
trans_dist = torch.norm(gt_trans-pred_trans)
trans_dist = torch.norm(gt_trans-pred_trans, dim=1).mean().item()
trans_dist_list.append(trans_dist)
@@ -36,11 +41,11 @@ class PoseDiff:
@stereotype.evaluation_method("coverage_rate_increase",comment="unfinished")
@stereotype.evaluation_method("coverage_rate_increase")
class ConverageRateIncrease:
def __init__(self, config):
self.config = config
self.renderer_path = config["renderer_path"]
def evaluate(self, output_list, data_list):
results = {namespace.TensorBoard.SCALAR: {}}
@@ -48,31 +53,57 @@ class ConverageRateIncrease:
pred_coverate_increase_list = []
cr_diff_list = []
for output, data in zip(output_list, data_list):
scanned_cr = data['scanned_coverages_rate']
scanned_cr = data['scanned_coverage_rate']
gt_cr = data["best_coverage_rate"]
scene_name_list = data['scene_name']
scanned_view_pts_list = data['scanned_pts']
scene_path_list = data['scene_path']
model_points_normals_list = data['model_points_normals']
scanned_view_pts_list = data['scanned_target_pts_list']
pred_pose_9ds = output['pred_pose_9d']
pred_rot_mats = PoseUtil.rotation_6d_to_matrix_tensor_batch(pred_pose_9ds[:, :6])
pred_pose_mats = torch.cat([pred_rot_mats, pred_pose_9ds[:, 6:]], dim=-1)
nO_to_nL_pose_batch = data["nO_to_nL_pose"]
voxel_threshold_list = data["voxel_threshold"]
filter_degree_list = data["filter_degree"]
first_frame_to_world = data["first_frame_to_world"]
pred_n_to_world_pose_mats = torch.eye(4, device=pred_pose_9ds.device).unsqueeze(0).repeat(pred_pose_9ds.shape[0], 1, 1)
pred_n_to_world_pose_mats[:,:3,:3] = PoseUtil.rotation_6d_to_matrix_tensor_batch(pred_pose_9ds[:, :6])
pred_n_to_world_pose_mats[:,:3,3] = pred_pose_9ds[:, 6:]
pred_n_to_world_pose_mats = torch.matmul(first_frame_to_world, pred_n_to_world_pose_mats)
for idx in range(len(scanned_cr)):
gt_coverate_increase_list.append(gt_cr-scanned_cr[idx])
scene_name = scene_name_list[idx]
pred_pose = pred_pose_mats[idx]
model_points_normals = model_points_normals_list[idx]
scanned_view_pts = scanned_view_pts_list[idx]
view_data = get_view_data(pred_pose, scene_name)
pred_cr = self.compute_coverage_rate(pred_pose, scanned_view_pts, view_data)
pred_coverate_increase_list.append(pred_cr-scanned_cr[idx])
cr_diff_list.append(gt_cr-pred_cr)
voxel_threshold = voxel_threshold_list[idx]
model_pts = model_points_normals[:,:3]
down_sampled_model_pts = PtsUtil.voxel_downsample_point_cloud(model_pts, voxel_threshold)
old_scanned_cr = self.compute_coverage_rate(scanned_view_pts, None, down_sampled_model_pts, threshold=voxel_threshold)
gt_coverate_increase_list.append(gt_cr[idx]-old_scanned_cr)
scene_path = scene_path_list[idx]
pred_pose = pred_n_to_world_pose_mats[idx]
filter_degree = filter_degree_list[idx]
nO_to_nL_pose = nO_to_nL_pose_batch[idx]
try:
new_pts, _ = RenderUtil.render_pts(pred_pose, scene_path, self.renderer_path, model_points_normals, voxel_threshold=voxel_threshold, filter_degree=filter_degree, nO_to_nL_pose=nO_to_nL_pose)
pred_cr = self.compute_coverage_rate(scanned_view_pts, new_pts, down_sampled_model_pts, threshold=voxel_threshold)
except Exception as e:
Log.warning(f"Error in scene {scene_path}, {e}")
pred_cr = old_scanned_cr
pred_coverate_increase_list.append(pred_cr-old_scanned_cr)
cr_diff_list.append(gt_cr[idx]-pred_cr)
results[namespace.TensorBoard.SCALAR]["gt_cr_increase"] = float(sum(gt_coverate_increase_list) / len(gt_coverate_increase_list))
results[namespace.TensorBoard.SCALAR]["pred_cr_increase"] = float(sum(pred_coverate_increase_list) / len(pred_coverate_increase_list))
results[namespace.TensorBoard.SCALAR]["cr_diff"] = float(sum(cr_diff_list) / len(cr_diff_list))
return results
def compute_coverage_rate(self, pred_pose, scanned_view_pts, view_data):
pass
def compute_coverage_rate(self, scanned_view_pts, new_pts, model_pts, threshold=0.005):
if new_pts is not None:
new_scanned_view_pts = scanned_view_pts + [new_pts]
else:
new_scanned_view_pts = scanned_view_pts
combined_point_cloud = np.vstack(new_scanned_view_pts)
down_sampled_combined_point_cloud = PtsUtil.voxel_downsample_point_cloud(combined_point_cloud,threshold)
return ReconstructionUtil.compute_coverage_rate(model_pts, down_sampled_combined_point_cloud, threshold)

View File

@@ -0,0 +1,94 @@
import torch
from torch import nn
import PytorchBoot.namespace as namespace
import PytorchBoot.stereotype as stereotype
from PytorchBoot.factory.component_factory import ComponentFactory
from PytorchBoot.utils import Log
@stereotype.pipeline("nbv_reconstruction_global_pts_pipeline")
class NBVReconstructionGlobalPointsPipeline(nn.Module):
def __init__(self, config):
super(NBVReconstructionGlobalPointsPipeline, self).__init__()
self.config = config
self.module_config = config["modules"]
self.pts_encoder = ComponentFactory.create(namespace.Stereotype.MODULE, self.module_config["pts_encoder"])
self.pose_encoder = ComponentFactory.create(namespace.Stereotype.MODULE, self.module_config["pose_encoder"])
self.pose_seq_encoder = ComponentFactory.create(namespace.Stereotype.MODULE, self.module_config["pose_seq_encoder"])
self.view_finder = ComponentFactory.create(namespace.Stereotype.MODULE, self.module_config["view_finder"])
self.eps = float(self.config["eps"])
self.enable_global_scanned_feat = self.config["global_scanned_feat"]
def forward(self, data):
mode = data["mode"]
if mode == namespace.Mode.TRAIN:
return self.forward_train(data)
elif mode == namespace.Mode.TEST:
return self.forward_test(data)
else:
Log.error("Unknown mode: {}".format(mode), True)
def pertube_data(self, gt_delta_9d):
bs = gt_delta_9d.shape[0]
random_t = torch.rand(bs, device=gt_delta_9d.device) * (1. - self.eps) + self.eps
random_t = random_t.unsqueeze(-1)
mu, std = self.view_finder.marginal_prob(gt_delta_9d, random_t)
std = std.view(-1, 1)
z = torch.randn_like(gt_delta_9d)
perturbed_x = mu + z * std
target_score = - z * std / (std ** 2)
return perturbed_x, random_t, target_score, std
def forward_train(self, data):
main_feat = self.get_main_feat(data)
''' get std '''
best_to_world_pose_9d_batch = data["best_to_world_pose_9d"]
perturbed_x, random_t, target_score, std = self.pertube_data(best_to_world_pose_9d_batch)
input_data = {
"sampled_pose": perturbed_x,
"t": random_t,
"main_feat": main_feat,
}
estimated_score = self.view_finder(input_data)
output = {
"estimated_score": estimated_score,
"target_score": target_score,
"std": std
}
return output
def forward_test(self,data):
main_feat = self.get_main_feat(data)
estimated_delta_rot_9d, in_process_sample = self.view_finder.next_best_view(main_feat)
result = {
"pred_pose_9d": estimated_delta_rot_9d,
"in_process_sample": in_process_sample
}
return result
def get_main_feat(self, data):
scanned_n_to_world_pose_9d_batch = data['scanned_n_to_world_pose_9d']
device = next(self.parameters()).device
pose_feat_seq_list = []
for scanned_n_to_world_pose_9d in scanned_n_to_world_pose_9d_batch:
scanned_n_to_world_pose_9d = scanned_n_to_world_pose_9d.to(device)
pose_feat_seq_list.append(self.pose_encoder.encode_pose(scanned_n_to_world_pose_9d))
main_feat = self.pose_seq_encoder.encode_sequence(pose_feat_seq_list)
combined_scanned_pts_batch = data['combined_scanned_pts']
global_scanned_feat = self.pts_encoder.encode_points(combined_scanned_pts_batch)
main_feat = torch.cat([main_feat, global_scanned_feat], dim=-1)
if torch.isnan(main_feat).any():
Log.error("nan in main_feat", True)
return main_feat

101
core/local_pts_pipeline.py Normal file
View File

@@ -0,0 +1,101 @@
import torch
from torch import nn
import PytorchBoot.namespace as namespace
import PytorchBoot.stereotype as stereotype
from PytorchBoot.factory.component_factory import ComponentFactory
from PytorchBoot.utils import Log
@stereotype.pipeline("nbv_reconstruction_local_pts_pipeline")
class NBVReconstructionLocalPointsPipeline(nn.Module):
def __init__(self, config):
super(NBVReconstructionLocalPointsPipeline, self).__init__()
self.config = config
self.module_config = config["modules"]
self.pts_encoder = ComponentFactory.create(namespace.Stereotype.MODULE, self.module_config["pts_encoder"])
self.pose_encoder = ComponentFactory.create(namespace.Stereotype.MODULE, self.module_config["pose_encoder"])
self.seq_encoder = ComponentFactory.create(namespace.Stereotype.MODULE, self.module_config["seq_encoder"])
self.view_finder = ComponentFactory.create(namespace.Stereotype.MODULE, self.module_config["view_finder"])
self.eps = float(self.config["eps"])
self.enable_global_scanned_feat = self.config["global_scanned_feat"]
def forward(self, data):
mode = data["mode"]
if mode == namespace.Mode.TRAIN:
return self.forward_train(data)
elif mode == namespace.Mode.TEST:
return self.forward_test(data)
else:
Log.error("Unknown mode: {}".format(mode), True)
def pertube_data(self, gt_delta_9d):
bs = gt_delta_9d.shape[0]
random_t = torch.rand(bs, device=gt_delta_9d.device) * (1. - self.eps) + self.eps
random_t = random_t.unsqueeze(-1)
mu, std = self.view_finder.marginal_prob(gt_delta_9d, random_t)
std = std.view(-1, 1)
z = torch.randn_like(gt_delta_9d)
perturbed_x = mu + z * std
target_score = - z * std / (std ** 2)
return perturbed_x, random_t, target_score, std
def forward_train(self, data):
main_feat = self.get_main_feat(data)
''' get std '''
best_to_world_pose_9d_batch = data["best_to_world_pose_9d"]
perturbed_x, random_t, target_score, std = self.pertube_data(best_to_world_pose_9d_batch)
input_data = {
"sampled_pose": perturbed_x,
"t": random_t,
"main_feat": main_feat,
}
estimated_score = self.view_finder(input_data)
output = {
"estimated_score": estimated_score,
"target_score": target_score,
"std": std
}
return output
def forward_test(self,data):
main_feat = self.get_main_feat(data)
estimated_delta_rot_9d, in_process_sample = self.view_finder.next_best_view(main_feat)
result = {
"pred_pose_9d": estimated_delta_rot_9d,
"in_process_sample": in_process_sample
}
return result
def get_main_feat(self, data):
scanned_pts_batch = data['scanned_pts']
scanned_n_to_world_pose_9d_batch = data['scanned_n_to_world_pose_9d']
device = next(self.parameters()).device
pts_feat_seq_list = []
pose_feat_seq_list = []
for scanned_pts,scanned_n_to_world_pose_9d in zip(scanned_pts_batch,scanned_n_to_world_pose_9d_batch):
scanned_pts = scanned_pts.to(device)
scanned_n_to_world_pose_9d = scanned_n_to_world_pose_9d.to(device)
pts_feat_seq_list.append(self.pts_encoder.encode_points(scanned_pts))
pose_feat_seq_list.append(self.pose_encoder.encode_pose(scanned_n_to_world_pose_9d))
main_feat = self.seq_encoder.encode_sequence(pts_feat_seq_list, pose_feat_seq_list)
if self.enable_global_scanned_feat:
combined_scanned_pts_batch = data['combined_scanned_pts']
global_scanned_feat = self.pts_encoder.encode_points(combined_scanned_pts_batch)
main_feat = torch.cat([main_feat, global_scanned_feat], dim=-1)
if torch.isnan(main_feat).any():
Log.error("nan in main_feat", True)
return main_feat

282
core/nbv_dataset.py Normal file
View File

@@ -0,0 +1,282 @@
import numpy as np
from PytorchBoot.dataset import BaseDataset
import PytorchBoot.namespace as namespace
import PytorchBoot.stereotype as stereotype
from PytorchBoot.config import ConfigManager
from PytorchBoot.utils.log_util import Log
import torch
import os
import sys
import time
sys.path.append(r"/data/hofee/project/nbv_rec/nbv_reconstruction")
from utils.data_load import DataLoadUtil
from utils.pose import PoseUtil
from utils.pts import PtsUtil
@stereotype.dataset("nbv_reconstruction_dataset")
class NBVReconstructionDataset(BaseDataset):
def __init__(self, config):
super(NBVReconstructionDataset, self).__init__(config)
self.config = config
self.root_dir = config["root_dir"]
self.split_file_path = config["split_file"]
self.scene_name_list = self.load_scene_name_list()
self.datalist = self.get_datalist()
self.pts_num = config["pts_num"]
self.type = config["type"]
self.cache = config.get("cache")
self.load_from_preprocess = config.get("load_from_preprocess", False)
if self.type == namespace.Mode.TEST:
#self.model_dir = config["model_dir"]
self.filter_degree = config["filter_degree"]
if self.type == namespace.Mode.TRAIN:
scale_ratio = 1
self.datalist = self.datalist*scale_ratio
if self.cache:
expr_root = ConfigManager.get("runner", "experiment", "root_dir")
expr_name = ConfigManager.get("runner", "experiment", "name")
self.cache_dir = os.path.join(expr_root, expr_name, "cache")
# self.preprocess_cache()
def load_scene_name_list(self):
scene_name_list = []
with open(self.split_file_path, "r") as f:
for line in f:
scene_name = line.strip()
scene_name_list.append(scene_name)
return scene_name_list
def get_datalist(self):
datalist = []
for scene_name in self.scene_name_list:
seq_num = DataLoadUtil.get_label_num(self.root_dir, scene_name)
scene_max_coverage_rate = 0
max_coverage_rate_list = []
for seq_idx in range(seq_num):
label_path = DataLoadUtil.get_label_path(
self.root_dir, scene_name, seq_idx
)
label_data = DataLoadUtil.load_label(label_path)
max_coverage_rate = label_data["max_coverage_rate"]
if max_coverage_rate > scene_max_coverage_rate:
scene_max_coverage_rate = max_coverage_rate
max_coverage_rate_list.append(max_coverage_rate)
if max_coverage_rate_list:
mean_coverage_rate = np.mean(max_coverage_rate_list)
for seq_idx in range(seq_num):
label_path = DataLoadUtil.get_label_path(
self.root_dir, scene_name, seq_idx
)
label_data = DataLoadUtil.load_label(label_path)
if max_coverage_rate_list[seq_idx] > mean_coverage_rate - 0.1:
for data_pair in label_data["data_pairs"]:
scanned_views = data_pair[0]
next_best_view = data_pair[1]
datalist.append(
{
"scanned_views": scanned_views,
"next_best_view": next_best_view,
"seq_max_coverage_rate": max_coverage_rate,
"scene_name": scene_name,
"label_idx": seq_idx,
"scene_max_coverage_rate": scene_max_coverage_rate,
}
)
return datalist
def preprocess_cache(self):
Log.info("preprocessing cache...")
for item_idx in range(len(self.datalist)):
self.__getitem__(item_idx)
Log.success("finish preprocessing cache.")
def load_from_cache(self, scene_name, curr_frame_idx):
cache_name = f"{scene_name}_{curr_frame_idx}.txt"
cache_path = os.path.join(self.cache_dir, cache_name)
if os.path.exists(cache_path):
data = np.loadtxt(cache_path)
return data
else:
return None
def save_to_cache(self, scene_name, curr_frame_idx, data):
cache_name = f"{scene_name}_{curr_frame_idx}.txt"
cache_path = os.path.join(self.cache_dir, cache_name)
try:
np.savetxt(cache_path, data)
except Exception as e:
Log.error(f"Save cache failed: {e}")
def voxel_downsample_with_mapping(self, point_cloud, voxel_size=0.003):
voxel_indices = np.floor(point_cloud / voxel_size).astype(np.int32)
unique_voxels, inverse, counts = np.unique(voxel_indices, axis=0, return_inverse=True, return_counts=True)
idx_sort = np.argsort(inverse)
idx_unique = idx_sort[np.cumsum(counts)-counts]
downsampled_points = point_cloud[idx_unique]
return downsampled_points, inverse
def __getitem__(self, index):
data_item_info = self.datalist[index]
scanned_views = data_item_info["scanned_views"]
nbv = data_item_info["next_best_view"]
max_coverage_rate = data_item_info["seq_max_coverage_rate"]
scene_name = data_item_info["scene_name"]
(
scanned_views_pts,
scanned_coverages_rate,
scanned_n_to_world_pose,
) = ([], [], [])
start_time = time.time()
start_indices = [0]
total_points = 0
for view in scanned_views:
frame_idx = view[0]
coverage_rate = view[1]
view_path = DataLoadUtil.get_path(self.root_dir, scene_name, frame_idx)
cam_info = DataLoadUtil.load_cam_info(view_path, binocular=True)
n_to_world_pose = cam_info["cam_to_world"]
target_point_cloud = (
DataLoadUtil.load_from_preprocessed_pts(view_path)
)
downsampled_target_point_cloud = PtsUtil.random_downsample_point_cloud(
target_point_cloud, self.pts_num
)
scanned_views_pts.append(downsampled_target_point_cloud)
scanned_coverages_rate.append(coverage_rate)
n_to_world_6d = PoseUtil.matrix_to_rotation_6d_numpy(
np.asarray(n_to_world_pose[:3, :3])
)
n_to_world_trans = n_to_world_pose[:3, 3]
n_to_world_9d = np.concatenate([n_to_world_6d, n_to_world_trans], axis=0)
scanned_n_to_world_pose.append(n_to_world_9d)
total_points += len(downsampled_target_point_cloud)
start_indices.append(total_points)
end_time = time.time()
#Log.info(f"load data time: {end_time - start_time}")
nbv_idx, nbv_coverage_rate = nbv[0], nbv[1]
nbv_path = DataLoadUtil.get_path(self.root_dir, scene_name, nbv_idx)
cam_info = DataLoadUtil.load_cam_info(nbv_path)
best_frame_to_world = cam_info["cam_to_world"]
best_to_world_6d = PoseUtil.matrix_to_rotation_6d_numpy(
np.asarray(best_frame_to_world[:3, :3])
)
best_to_world_trans = best_frame_to_world[:3, 3]
best_to_world_9d = np.concatenate(
[best_to_world_6d, best_to_world_trans], axis=0
)
combined_scanned_views_pts = np.concatenate(scanned_views_pts, axis=0)
voxel_downsampled_combined_scanned_pts_np, inverse = self.voxel_downsample_with_mapping(combined_scanned_views_pts, 0.003)
random_downsampled_combined_scanned_pts_np, random_downsample_idx = PtsUtil.random_downsample_point_cloud(voxel_downsampled_combined_scanned_pts_np, self.pts_num, require_idx=True)
all_idx_unique = np.arange(len(voxel_downsampled_combined_scanned_pts_np))
all_random_downsample_idx = all_idx_unique[random_downsample_idx]
scanned_pts_mask = []
for idx, start_idx in enumerate(start_indices):
if idx == len(start_indices) - 1:
break
end_idx = start_indices[idx+1]
view_inverse = inverse[start_idx:end_idx]
view_unique_downsampled_idx = np.unique(view_inverse)
view_unique_downsampled_idx_set = set(view_unique_downsampled_idx)
mask = np.array([idx in view_unique_downsampled_idx_set for idx in all_random_downsample_idx])
scanned_pts_mask.append(mask)
data_item = {
"scanned_pts": np.asarray(scanned_views_pts, dtype=np.float32), # Ndarray(S x Nv x 3)
"combined_scanned_pts": np.asarray(random_downsampled_combined_scanned_pts_np, dtype=np.float32), # Ndarray(N x 3)
"scanned_pts_mask": np.asarray(scanned_pts_mask, dtype=np.bool), # Ndarray(N)
"scanned_coverage_rate": scanned_coverages_rate, # List(S): Float, range(0, 1)
"scanned_n_to_world_pose_9d": np.asarray(scanned_n_to_world_pose, dtype=np.float32), # Ndarray(S x 9)
"best_coverage_rate": nbv_coverage_rate, # Float, range(0, 1)
"best_to_world_pose_9d": np.asarray(best_to_world_9d, dtype=np.float32), # Ndarray(9)
"seq_max_coverage_rate": max_coverage_rate, # Float, range(0, 1)
"scene_name": scene_name, # String
}
return data_item
def __len__(self):
return len(self.datalist)
def get_collate_fn(self):
def collate_fn(batch):
collate_data = {}
''' ------ Varialbe Length ------ '''
collate_data["scanned_pts"] = [
torch.tensor(item["scanned_pts"]) for item in batch
]
collate_data["scanned_n_to_world_pose_9d"] = [
torch.tensor(item["scanned_n_to_world_pose_9d"]) for item in batch
]
collate_data["scanned_pts_mask"] = [
torch.tensor(item["scanned_pts_mask"]) for item in batch
]
''' ------ Fixed Length ------ '''
collate_data["best_to_world_pose_9d"] = torch.stack(
[torch.tensor(item["best_to_world_pose_9d"]) for item in batch]
)
collate_data["combined_scanned_pts"] = torch.stack(
[torch.tensor(item["combined_scanned_pts"]) for item in batch]
)
for key in batch[0].keys():
if key not in [
"scanned_pts",
"scanned_n_to_world_pose_9d",
"best_to_world_pose_9d",
"combined_scanned_pts",
"scanned_pts_mask",
]:
collate_data[key] = [item[key] for item in batch]
return collate_data
return collate_fn
# -------------- Debug ---------------- #
if __name__ == "__main__":
import torch
seed = 0
torch.manual_seed(seed)
np.random.seed(seed)
config = {
"root_dir": "/data/hofee/nbv_rec_part2_preprocessed",
"source": "nbv_reconstruction_dataset",
"split_file": "/data/hofee/data/sample.txt",
"load_from_preprocess": True,
"ratio": 0.5,
"batch_size": 2,
"filter_degree": 75,
"num_workers": 0,
"pts_num": 4096,
"type": namespace.Mode.TRAIN,
}
ds = NBVReconstructionDataset(config)
print(len(ds))
# ds.__getitem__(10)
dl = ds.get_loader(shuffle=True)
for idx, data in enumerate(dl):
data = ds.process_batch(data, "cuda:0")
print(data)
# ------ Debug Start ------
import ipdb
ipdb.set_trace()
# ------ Debug End ------

154
core/old_seq_dataset.py Normal file
View File

@@ -0,0 +1,154 @@
import numpy as np
from PytorchBoot.dataset import BaseDataset
import PytorchBoot.namespace as namespace
import PytorchBoot.stereotype as stereotype
from PytorchBoot.utils.log_util import Log
import torch
import os
import sys
sys.path.append(r"/home/data/hofee/project/nbv_rec/nbv_reconstruction")
from utils.data_load import DataLoadUtil
from utils.pose import PoseUtil
from utils.pts import PtsUtil
@stereotype.dataset("old_seq_nbv_reconstruction_dataset")
class SeqNBVReconstructionDataset(BaseDataset):
def __init__(self, config):
super(SeqNBVReconstructionDataset, self).__init__(config)
self.type = config["type"]
if self.type != namespace.Mode.TEST:
Log.error("Dataset <seq_nbv_reconstruction_dataset> Only support test mode", terminate=True)
self.config = config
self.root_dir = config["root_dir"]
self.split_file_path = config["split_file"]
self.scene_name_list = self.load_scene_name_list()
self.datalist = self.get_datalist()
self.pts_num = config["pts_num"]
self.model_dir = config["model_dir"]
self.filter_degree = config["filter_degree"]
self.load_from_preprocess = config.get("load_from_preprocess", False)
def load_scene_name_list(self):
scene_name_list = []
with open(self.split_file_path, "r") as f:
for line in f:
scene_name = line.strip()
scene_name_list.append(scene_name)
return scene_name_list
def get_datalist(self):
datalist = []
for scene_name in self.scene_name_list:
seq_num = DataLoadUtil.get_label_num(self.root_dir, scene_name)
scene_max_coverage_rate = 0
scene_max_cr_idx = 0
for seq_idx in range(seq_num):
label_path = DataLoadUtil.get_label_path(self.root_dir, scene_name, seq_idx)
label_data = DataLoadUtil.load_label(label_path)
max_coverage_rate = label_data["max_coverage_rate"]
if max_coverage_rate > scene_max_coverage_rate:
scene_max_coverage_rate = max_coverage_rate
scene_max_cr_idx = seq_idx
label_path = DataLoadUtil.get_label_path(self.root_dir, scene_name, scene_max_cr_idx)
label_data = DataLoadUtil.load_label(label_path)
first_frame = label_data["best_sequence"][0]
best_seq_len = len(label_data["best_sequence"])
datalist.append({
"scene_name": scene_name,
"first_frame": first_frame,
"max_coverage_rate": scene_max_coverage_rate,
"best_seq_len": best_seq_len,
"label_idx": scene_max_cr_idx,
})
return datalist
def __getitem__(self, index):
data_item_info = self.datalist[index]
first_frame_idx = data_item_info["first_frame"][0]
first_frame_coverage = data_item_info["first_frame"][1]
max_coverage_rate = data_item_info["max_coverage_rate"]
scene_name = data_item_info["scene_name"]
first_cam_info = DataLoadUtil.load_cam_info(DataLoadUtil.get_path(self.root_dir, scene_name, first_frame_idx), binocular=True)
first_view_path = DataLoadUtil.get_path(self.root_dir, scene_name, first_frame_idx)
first_left_cam_pose = first_cam_info["cam_to_world"]
first_center_cam_pose = first_cam_info["cam_to_world_O"]
first_target_point_cloud = DataLoadUtil.load_from_preprocessed_pts(first_view_path)
first_pts_num = first_target_point_cloud.shape[0]
first_downsampled_target_point_cloud = PtsUtil.random_downsample_point_cloud(first_target_point_cloud, self.pts_num)
first_to_world_rot_6d = PoseUtil.matrix_to_rotation_6d_numpy(np.asarray(first_left_cam_pose[:3,:3]))
first_to_world_trans = first_left_cam_pose[:3,3]
first_to_world_9d = np.concatenate([first_to_world_rot_6d, first_to_world_trans], axis=0)
diag = DataLoadUtil.get_bbox_diag(self.model_dir, scene_name)
voxel_threshold = diag*0.02
first_O_to_first_L_pose = np.dot(np.linalg.inv(first_left_cam_pose), first_center_cam_pose)
scene_path = os.path.join(self.root_dir, scene_name)
model_points_normals = DataLoadUtil.load_points_normals(self.root_dir, scene_name)
data_item = {
"first_pts_num": np.asarray(
first_pts_num, dtype=np.int32
),
"first_pts": np.asarray([first_downsampled_target_point_cloud],dtype=np.float32),
"combined_scanned_pts": np.asarray(first_downsampled_target_point_cloud,dtype=np.float32),
"first_to_world_9d": np.asarray([first_to_world_9d],dtype=np.float32),
"scene_name": scene_name,
"max_coverage_rate": max_coverage_rate,
"voxel_threshold": voxel_threshold,
"filter_degree": self.filter_degree,
"O_to_L_pose": first_O_to_first_L_pose,
"first_frame_coverage": first_frame_coverage,
"scene_path": scene_path,
"model_points_normals": model_points_normals,
"best_seq_len": data_item_info["best_seq_len"],
"first_frame_id": first_frame_idx,
}
return data_item
def __len__(self):
return len(self.datalist)
def get_collate_fn(self):
def collate_fn(batch):
collate_data = {}
collate_data["first_pts"] = [torch.tensor(item['first_pts']) for item in batch]
collate_data["first_to_world_9d"] = [torch.tensor(item['first_to_world_9d']) for item in batch]
collate_data["combined_scanned_pts"] = torch.stack([torch.tensor(item['combined_scanned_pts']) for item in batch])
for key in batch[0].keys():
if key not in ["first_pts", "first_to_world_9d", "combined_scanned_pts"]:
collate_data[key] = [item[key] for item in batch]
return collate_data
return collate_fn
# -------------- Debug ---------------- #
if __name__ == "__main__":
import torch
seed = 0
torch.manual_seed(seed)
np.random.seed(seed)
config = {
"root_dir": "/home/data/hofee/project/nbv_rec/data/nbv_rec_data_512_preproc_npy",
"split_file": "/home/data/hofee/project/nbv_rec/data/OmniObject3d_train.txt",
"model_dir": "/home/data/hofee/project/nbv_rec/data/scaled_object_meshes",
"ratio": 0.005,
"batch_size": 2,
"filter_degree": 75,
"num_workers": 0,
"pts_num": 32684,
"type": namespace.Mode.TEST,
"load_from_preprocess": True
}
ds = SeqNBVReconstructionDataset(config)
print(len(ds))
#ds.__getitem__(10)
dl = ds.get_loader(shuffle=True)
for idx, data in enumerate(dl):
data = ds.process_batch(data, "cuda:0")
print(data)
# ------ Debug Start ------
import ipdb;ipdb.set_trace()
# ------ Debug End ------+

View File

@@ -1,20 +1,34 @@
import torch
import time
from torch import nn
import PytorchBoot.namespace as namespace
import PytorchBoot.stereotype as stereotype
from PytorchBoot.factory.component_factory import ComponentFactory
from PytorchBoot.utils import Log
@stereotype.pipeline("nbv_reconstruction_pipeline", comment="should be tested")
@stereotype.pipeline("nbv_reconstruction_pipeline")
class NBVReconstructionPipeline(nn.Module):
def __init__(self, config):
super(NBVReconstructionPipeline, self).__init__()
self.config = config
self.pts_encoder = ComponentFactory.create(namespace.Stereotype.MODULE, config["pts_encoder"])
self.pose_encoder = ComponentFactory.create(namespace.Stereotype.MODULE, config["pose_encoder"])
self.seq_encoder = ComponentFactory.create(namespace.Stereotype.MODULE, config["seq_encoder"])
self.view_finder = ComponentFactory.create(namespace.Stereotype.MODULE, config["view_finder"])
self.eps = 1e-5
self.module_config = config["modules"]
self.pts_encoder = ComponentFactory.create(
namespace.Stereotype.MODULE, self.module_config["pts_encoder"]
)
self.pose_encoder = ComponentFactory.create(
namespace.Stereotype.MODULE, self.module_config["pose_encoder"]
)
self.seq_encoder = ComponentFactory.create(
namespace.Stereotype.MODULE, self.module_config["seq_encoder"]
)
self.view_finder = ComponentFactory.create(
namespace.Stereotype.MODULE, self.module_config["view_finder"]
)
self.eps = float(self.config["eps"])
def forward(self, data):
mode = data["mode"]
@@ -28,7 +42,9 @@ class NBVReconstructionPipeline(nn.Module):
def pertube_data(self, gt_delta_9d):
bs = gt_delta_9d.shape[0]
random_t = torch.rand(bs, device=gt_delta_9d.device) * (1. - self.eps) + self.eps
random_t = (
torch.rand(bs, device=gt_delta_9d.device) * (1.0 - self.eps) + self.eps
)
random_t = random_t.unsqueeze(-1)
mu, std = self.view_finder.marginal_prob(gt_delta_9d, random_t)
std = std.view(-1, 1)
@@ -38,45 +54,86 @@ class NBVReconstructionPipeline(nn.Module):
return perturbed_x, random_t, target_score, std
def forward_train(self, data):
seq_feat = self.get_seq_feat(data)
''' get std '''
best_to_1_pose_9d_batch = data["best_to_1_pose_9d"]
perturbed_x, random_t, target_score, std = self.pertube_data(best_to_1_pose_9d_batch)
main_feat = self.get_main_feat(data)
""" get std """
best_to_world_pose_9d_batch = data["best_to_world_pose_9d"]
perturbed_x, random_t, target_score, std = self.pertube_data(
best_to_world_pose_9d_batch
)
input_data = {
"sampled_pose": perturbed_x,
"t": random_t,
"seq_feat": seq_feat,
"main_feat": main_feat,
}
estimated_score = self.view_finder(input_data)
output = {
"estimated_score": estimated_score,
"target_score": target_score,
"std": std
"std": std,
}
return output
def forward_test(self, data):
seq_feat = self.get_seq_feat(data)
estimated_delta_rot_9d, in_process_sample = self.view_finder.next_best_view(seq_feat)
main_feat = self.get_main_feat(data)
repeat_num = data.get("repeat_num", 1)
main_feat = main_feat.repeat(repeat_num, 1)
estimated_delta_rot_9d, in_process_sample = self.view_finder.next_best_view(
main_feat
)
result = {
"pred_pose_9d": estimated_delta_rot_9d,
"in_process_sample": in_process_sample
"in_process_sample": in_process_sample,
}
return result
def get_seq_feat(self, data):
scanned_pts_batch = data['scanned_pts']
scanned_n_to_1_pose_9d_batch = data['scanned_n_to_1_pose_9d']
best_to_1_pose_9d_batch = data["best_to_1_pose_9d"]
pts_feat_seq_list = []
pose_feat_seq_list = []
def get_main_feat(self, data):
scanned_n_to_world_pose_9d_batch = data[
"scanned_n_to_world_pose_9d"
] # List(B): Tensor(S x 9)
for scanned_pts,scanned_n_to_1_pose_9d in zip(scanned_pts_batch,scanned_n_to_1_pose_9d_batch):
print(scanned_n_to_1_pose_9d.shape)
scanned_pts = scanned_pts.to(best_to_1_pose_9d_batch.device)
scanned_n_to_1_pose_9d = scanned_n_to_1_pose_9d.to(best_to_1_pose_9d_batch.device)
pts_feat_seq_list.append(self.pts_encoder.encode_points(scanned_pts))
pose_feat_seq_list.append(self.pose_encoder.encode_pose(scanned_n_to_1_pose_9d))
seq_feat = self.seq_encoder.encode_sequence(pts_feat_seq_list, pose_feat_seq_list)
return seq_feat
scanned_pts_mask_batch = data["scanned_pts_mask"] # List(B): Tensor(N)
device = next(self.parameters()).device
embedding_list_batch = []
combined_scanned_pts_batch = data["combined_scanned_pts"] # Tensor(B x N x 3)
global_scanned_feat, per_point_feat_batch = self.pts_encoder.encode_points(
combined_scanned_pts_batch, require_per_point_feat=True
) # global_scanned_feat: Tensor(B x Dg)
batch_size = len(scanned_n_to_world_pose_9d_batch)
for i in range(batch_size):
seq_len = len(scanned_n_to_world_pose_9d_batch[i])
scanned_n_to_world_pose_9d = scanned_n_to_world_pose_9d_batch[i].to(device) # Tensor(S x 9)
scanned_pts_mask = scanned_pts_mask_batch[i] # Tensor(S x N)
per_point_feat = per_point_feat_batch[i] # Tensor(N x Dp)
partial_point_feat_seq = []
for j in range(seq_len):
partial_per_point_feat = per_point_feat[scanned_pts_mask[j]]
if partial_per_point_feat.shape[0] == 0:
partial_point_feat = torch.zeros(per_point_feat.shape[1], device=device)
else:
partial_point_feat = torch.mean(partial_per_point_feat, dim=0) # Tensor(Dp)
partial_point_feat_seq.append(partial_point_feat)
partial_point_feat_seq = torch.stack(partial_point_feat_seq, dim=0) # Tensor(S x Dp)
pose_feat_seq = self.pose_encoder.encode_pose(scanned_n_to_world_pose_9d) # Tensor(S x Dp)
seq_embedding = torch.cat([partial_point_feat_seq, pose_feat_seq], dim=-1)
embedding_list_batch.append(seq_embedding) # List(B): Tensor(S x (Dp))
seq_feat = self.seq_encoder.encode_sequence(embedding_list_batch) # Tensor(B x Ds)
main_feat = torch.cat([seq_feat, global_scanned_feat], dim=-1) # Tensor(B x (Ds+Dg))
if torch.isnan(main_feat).any():
for i in range(len(main_feat)):
if torch.isnan(main_feat[i]).any():
scanned_pts_mask = scanned_pts_mask_batch[i]
Log.info(f"scanned_pts_mask shape: {scanned_pts_mask.shape}")
Log.info(f"scanned_pts_mask sum: {scanned_pts_mask.sum()}")
import ipdb
ipdb.set_trace()
Log.error("nan in main_feat", True)
return main_feat

209
core/seq_dataset.py Normal file
View File

@@ -0,0 +1,209 @@
import numpy as np
from PytorchBoot.dataset import BaseDataset
import PytorchBoot.namespace as namespace
import PytorchBoot.stereotype as stereotype
from PytorchBoot.config import ConfigManager
from PytorchBoot.utils.log_util import Log
import torch
import os
import sys
sys.path.append(r"/media/hofee/data/project/python/nbv_reconstruction/nbv_reconstruction")
from utils.data_load import DataLoadUtil
from utils.pose import PoseUtil
from utils.pts import PtsUtil
@stereotype.dataset("seq_reconstruction_dataset")
class SeqReconstructionDataset(BaseDataset):
def __init__(self, config):
super(SeqReconstructionDataset, self).__init__(config)
self.config = config
self.root_dir = config["root_dir"]
self.split_file_path = config["split_file"]
self.scene_name_list = self.load_scene_name_list()
self.datalist = self.get_datalist()
self.pts_num = config["pts_num"]
self.type = config["type"]
self.cache = config.get("cache")
self.load_from_preprocess = config.get("load_from_preprocess", False)
if self.type == namespace.Mode.TEST:
#self.model_dir = config["model_dir"]
self.filter_degree = config["filter_degree"]
if self.type == namespace.Mode.TRAIN:
scale_ratio = 1
self.datalist = self.datalist*scale_ratio
if self.cache:
expr_root = ConfigManager.get("runner", "experiment", "root_dir")
expr_name = ConfigManager.get("runner", "experiment", "name")
self.cache_dir = os.path.join(expr_root, expr_name, "cache")
# self.preprocess_cache()
def load_scene_name_list(self):
scene_name_list = []
with open(self.split_file_path, "r") as f:
for line in f:
scene_name = line.strip()
if not os.path.exists(os.path.join(self.root_dir, scene_name)):
continue
scene_name_list.append(scene_name)
return scene_name_list
def get_scene_name_list(self):
return self.scene_name_list
def get_datalist(self):
datalist = []
total = len(self.scene_name_list)
for idx, scene_name in enumerate(self.scene_name_list):
print(f"processing {scene_name} ({idx}/{total})")
scene_max_cr_idx = 0
frame_len = DataLoadUtil.get_scene_seq_length(self.root_dir, scene_name)
for i in range(10,frame_len):
path = DataLoadUtil.get_path(self.root_dir, scene_name, i)
pts = DataLoadUtil.load_from_preprocessed_pts(path, "npy")
print(pts.shape)
if pts.shape[0] == 0:
continue
else:
break
print(i)
datalist.append({
"scene_name": scene_name,
"first_frame": i,
"best_seq_len": -1,
"max_coverage_rate": 1.0,
"label_idx": scene_max_cr_idx,
})
return datalist
def preprocess_cache(self):
Log.info("preprocessing cache...")
for item_idx in range(len(self.datalist)):
self.__getitem__(item_idx)
Log.success("finish preprocessing cache.")
def load_from_cache(self, scene_name, curr_frame_idx):
cache_name = f"{scene_name}_{curr_frame_idx}.txt"
cache_path = os.path.join(self.cache_dir, cache_name)
if os.path.exists(cache_path):
data = np.loadtxt(cache_path)
return data
else:
return None
def save_to_cache(self, scene_name, curr_frame_idx, data):
cache_name = f"{scene_name}_{curr_frame_idx}.txt"
cache_path = os.path.join(self.cache_dir, cache_name)
try:
np.savetxt(cache_path, data)
except Exception as e:
Log.error(f"Save cache failed: {e}")
def seq_combined_pts(self, scene, frame_idx_list):
all_combined_pts = []
for i in frame_idx_list:
path = DataLoadUtil.get_path(self.root_dir, scene, i)
pts = DataLoadUtil.load_from_preprocessed_pts(path,"npy")
if pts.shape[0] == 0:
continue
all_combined_pts.append(pts)
all_combined_pts = np.vstack(all_combined_pts)
downsampled_all_pts = PtsUtil.voxel_downsample_point_cloud(all_combined_pts, 0.003)
return downsampled_all_pts
def __getitem__(self, index):
data_item_info = self.datalist[index]
max_coverage_rate = data_item_info["max_coverage_rate"]
best_seq_len = data_item_info["best_seq_len"]
scene_name = data_item_info["scene_name"]
(
scanned_views_pts,
scanned_coverages_rate,
scanned_n_to_world_pose,
) = ([], [], [])
view = data_item_info["first_frame"]
frame_idx = view
view_path = DataLoadUtil.get_path(self.root_dir, scene_name, frame_idx)
cam_info = DataLoadUtil.load_cam_info(view_path, binocular=True)
n_to_world_pose = cam_info["cam_to_world"]
target_point_cloud = (
DataLoadUtil.load_from_preprocessed_pts(view_path)
)
downsampled_target_point_cloud = PtsUtil.random_downsample_point_cloud(
target_point_cloud, self.pts_num
)
scanned_views_pts.append(downsampled_target_point_cloud)
n_to_world_6d = PoseUtil.matrix_to_rotation_6d_numpy(
np.asarray(n_to_world_pose[:3, :3])
)
first_left_cam_pose = cam_info["cam_to_world"]
first_center_cam_pose = cam_info["cam_to_world_O"]
first_O_to_first_L_pose = np.dot(np.linalg.inv(first_left_cam_pose), first_center_cam_pose)
n_to_world_trans = n_to_world_pose[:3, 3]
n_to_world_9d = np.concatenate([n_to_world_6d, n_to_world_trans], axis=0)
scanned_n_to_world_pose.append(n_to_world_9d)
frame_list = []
for i in range(DataLoadUtil.get_scene_seq_length(self.root_dir, scene_name)):
frame_list.append(i)
gt_pts = self.seq_combined_pts(scene_name, frame_list)
data_item = {
"first_scanned_pts": np.asarray(scanned_views_pts, dtype=np.float32), # Ndarray(S x Nv x 3)
"first_scanned_n_to_world_pose_9d": np.asarray(scanned_n_to_world_pose, dtype=np.float32), # Ndarray(S x 9)
"seq_max_coverage_rate": max_coverage_rate, # Float, range(0, 1)
"best_seq_len": best_seq_len, # Int
"scene_name": scene_name, # String
"gt_pts": gt_pts, # Ndarray(N x 3)
"scene_path": os.path.join(self.root_dir, scene_name), # String
"O_to_L_pose": first_O_to_first_L_pose,
}
return data_item
def __len__(self):
return len(self.datalist)
# -------------- Debug ---------------- #
if __name__ == "__main__":
import torch
from tqdm import tqdm
import pickle
import os
seed = 0
torch.manual_seed(seed)
np.random.seed(seed)
config = {
"root_dir": "/media/hofee/data/data/test_bottle/view",
"source": "seq_reconstruction_dataset",
"split_file": "/media/hofee/data/data/test_bottle/test_bottle.txt",
"load_from_preprocess": True,
"filter_degree": 75,
"num_workers": 0,
"pts_num": 8192,
"type": namespace.Mode.TEST,
}
output_dir = "/media/hofee/data/data/test_bottle/preprocessed_dataset"
os.makedirs(output_dir, exist_ok=True)
ds = SeqReconstructionDataset(config)
for i in tqdm(range(len(ds)), desc="processing dataset"):
output_path = os.path.join(output_dir, f"item_{i}.pkl")
item = ds.__getitem__(i)
for key, value in item.items():
if isinstance(value, np.ndarray):
item[key] = value.tolist()
#import ipdb; ipdb.set_trace()
with open(output_path, "wb") as f:
pickle.dump(item, f)

View File

@@ -0,0 +1,82 @@
import numpy as np
from PytorchBoot.dataset import BaseDataset
import PytorchBoot.namespace as namespace
import PytorchBoot.stereotype as stereotype
from PytorchBoot.config import ConfigManager
from PytorchBoot.utils.log_util import Log
import pickle
import torch
import os
import sys
sys.path.append(r"C:\Document\Local Project\nbv_rec\nbv_reconstruction")
from utils.data_load import DataLoadUtil
from utils.pose import PoseUtil
from utils.pts import PtsUtil
@stereotype.dataset("seq_reconstruction_dataset_preprocessed")
class SeqReconstructionDatasetPreprocessed(BaseDataset):
def __init__(self, config):
super(SeqReconstructionDatasetPreprocessed, self).__init__(config)
self.config = config
self.root_dir = config["root_dir"]
self.real_root_dir = r"/media/hofee/data/data/new_testset"
self.item_list = os.listdir(self.root_dir)
def __getitem__(self, index):
data = pickle.load(open(os.path.join(self.root_dir, self.item_list[index]), "rb"))
data_item = {
"first_scanned_pts": np.asarray(data["first_scanned_pts"], dtype=np.float32), # Ndarray(S x Nv x 3)
"first_scanned_n_to_world_pose_9d": np.asarray(data["first_scanned_n_to_world_pose_9d"], dtype=np.float32), # Ndarray(S x 9)
"seq_max_coverage_rate": data["seq_max_coverage_rate"], # Float, range(0, 1)
"best_seq_len": data["best_seq_len"], # Int
"scene_name": data["scene_name"], # String
"gt_pts": np.asarray(data["gt_pts"], dtype=np.float32), # Ndarray(N x 3)
"scene_path": os.path.join(self.real_root_dir, data["scene_name"]), # String
"O_to_L_pose": np.asarray(data["O_to_L_pose"], dtype=np.float32),
}
return data_item
def __len__(self):
return len(self.item_list)
# -------------- Debug ---------------- #
if __name__ == "__main__":
import torch
seed = 0
torch.manual_seed(seed)
np.random.seed(seed)
'''
OmniObject3d_test:
root_dir: "H:\\AI\\Datasets\\packed_test_data"
model_dir: "H:\\AI\\Datasets\\scaled_object_meshes"
source: seq_reconstruction_dataset
split_file: "H:\\AI\\Datasets\\data_list\\OmniObject3d_test.txt"
type: test
filter_degree: 75
eval_list:
- pose_diff
- coverage_rate_increase
ratio: 0.1
batch_size: 1
num_workers: 12
pts_num: 8192
load_from_preprocess: True
'''
config = {
"root_dir": "/media/hofee/data/data/test_bottle/preprocessed_dataset",
"source": "seq_reconstruction_dataset",
"split_file": "H:\\AI\\Datasets\\data_list\\OmniObject3d_test.txt",
"load_from_preprocess": True,
"ratio": 1,
"filter_degree": 75,
"num_workers": 0,
"pts_num": 8192,
"type": "test",
}
ds = SeqReconstructionDataset(config)
print(len(ds))
print(ds.__getitem__(10))

View File

@@ -32,7 +32,7 @@ def cond_ode_sampler(
init_x=None,
):
pose_dim = PoseUtil.get_pose_dim(pose_mode)
batch_size = data["seq_feat"].shape[0]
batch_size = data["main_feat"].shape[0]
init_x = (
prior((batch_size, pose_dim), T=T).to(device)
if init_x is None

View File

@@ -2,9 +2,6 @@ import torch
import torch.nn as nn
import PytorchBoot.stereotype as stereotype
import sys
sys.path.append(r"C:\Document\Local Project\nbv_rec\nbv_reconstruction")
from utils.pose import PoseUtil
import modules.module_lib as mlib
import modules.func_lib as flib
@@ -83,13 +80,13 @@ class GradientFieldViewFinder(nn.Module):
"""
Args:
data, dict {
'seq_feat': [bs, c]
'main_feat': [bs, c]
'pose_sample': [bs, pose_dim]
't': [bs, 1]
}
"""
seq_feat = data['seq_feat']
main_feat = data['main_feat']
sampled_pose = data['sampled_pose']
t = data['t']
t_feat = self.t_encoder(t.squeeze(1))
@@ -98,7 +95,7 @@ class GradientFieldViewFinder(nn.Module):
if self.per_point_feature:
raise NotImplementedError
else:
total_feat = torch.cat([seq_feat, t_feat, pose_feat], dim=-1)
total_feat = torch.cat([main_feat, t_feat, pose_feat], dim=-1)
_, std = self.marginal_prob_fn(total_feat, t)
if self.regression_head == 'Rx_Ry_and_T':
@@ -137,9 +134,9 @@ class GradientFieldViewFinder(nn.Module):
return in_process_sample, res
def next_best_view(self, seq_feat):
def next_best_view(self, main_feat):
data = {
'seq_feat': seq_feat,
'main_feat': main_feat,
}
in_process_sample, res = self.sample(data)
return res.to(dtype=torch.float32), in_process_sample

View File

@@ -0,0 +1,162 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
from . import pointnet2_utils
from . import pytorch_utils as pt_utils
from typing import List
class _PointnetSAModuleBase(nn.Module):
def __init__(self):
super().__init__()
self.npoint = None
self.groupers = None
self.mlps = None
self.pool_method = 'max_pool'
def forward(self, xyz: torch.Tensor, features: torch.Tensor = None, new_xyz=None) -> (torch.Tensor, torch.Tensor):
"""
:param xyz: (B, N, 3) tensor of the xyz coordinates of the features
:param features: (B, N, C) tensor of the descriptors of the the features
:param new_xyz:
:return:
new_xyz: (B, npoint, 3) tensor of the new features' xyz
new_features: (B, npoint, \sum_k(mlps[k][-1])) tensor of the new_features descriptors
"""
new_features_list = []
xyz_flipped = xyz.transpose(1, 2).contiguous()
if new_xyz is None:
new_xyz = pointnet2_utils.gather_operation(
xyz_flipped,
pointnet2_utils.furthest_point_sample(xyz, self.npoint)
).transpose(1, 2).contiguous() if self.npoint is not None else None
for i in range(len(self.groupers)):
new_features = self.groupers[i](xyz, new_xyz, features) # (B, C, npoint, nsample)
new_features = self.mlps[i](new_features) # (B, mlp[-1], npoint, nsample)
if self.pool_method == 'max_pool':
new_features = F.max_pool2d(
new_features, kernel_size=[1, new_features.size(3)]
) # (B, mlp[-1], npoint, 1)
elif self.pool_method == 'avg_pool':
new_features = F.avg_pool2d(
new_features, kernel_size=[1, new_features.size(3)]
) # (B, mlp[-1], npoint, 1)
else:
raise NotImplementedError
new_features = new_features.squeeze(-1) # (B, mlp[-1], npoint)
new_features_list.append(new_features)
return new_xyz, torch.cat(new_features_list, dim=1)
class PointnetSAModuleMSG(_PointnetSAModuleBase):
"""Pointnet set abstraction layer with multiscale grouping"""
def __init__(self, *, npoint: int, radii: List[float], nsamples: List[int], mlps: List[List[int]], bn: bool = True,
use_xyz: bool = True, pool_method='max_pool', instance_norm=False):
"""
:param npoint: int
:param radii: list of float, list of radii to group with
:param nsamples: list of int, number of samples in each ball query
:param mlps: list of list of int, spec of the pointnet before the global pooling for each scale
:param bn: whether to use batchnorm
:param use_xyz:
:param pool_method: max_pool / avg_pool
:param instance_norm: whether to use instance_norm
"""
super().__init__()
assert len(radii) == len(nsamples) == len(mlps)
self.npoint = npoint
self.groupers = nn.ModuleList()
self.mlps = nn.ModuleList()
for i in range(len(radii)):
radius = radii[i]
nsample = nsamples[i]
self.groupers.append(
pointnet2_utils.QueryAndGroup(radius, nsample, use_xyz=use_xyz)
if npoint is not None else pointnet2_utils.GroupAll(use_xyz)
)
mlp_spec = mlps[i]
if use_xyz:
mlp_spec[0] += 3
self.mlps.append(pt_utils.SharedMLP(mlp_spec, bn=bn, instance_norm=instance_norm))
self.pool_method = pool_method
class PointnetSAModule(PointnetSAModuleMSG):
"""Pointnet set abstraction layer"""
def __init__(self, *, mlp: List[int], npoint: int = None, radius: float = None, nsample: int = None,
bn: bool = True, use_xyz: bool = True, pool_method='max_pool', instance_norm=False):
"""
:param mlp: list of int, spec of the pointnet before the global max_pool
:param npoint: int, number of features
:param radius: float, radius of ball
:param nsample: int, number of samples in the ball query
:param bn: whether to use batchnorm
:param use_xyz:
:param pool_method: max_pool / avg_pool
:param instance_norm: whether to use instance_norm
"""
super().__init__(
mlps=[mlp], npoint=npoint, radii=[radius], nsamples=[nsample], bn=bn, use_xyz=use_xyz,
pool_method=pool_method, instance_norm=instance_norm
)
class PointnetFPModule(nn.Module):
r"""Propigates the features of one set to another"""
def __init__(self, *, mlp: List[int], bn: bool = True):
"""
:param mlp: list of int
:param bn: whether to use batchnorm
"""
super().__init__()
self.mlp = pt_utils.SharedMLP(mlp, bn=bn)
def forward(
self, unknown: torch.Tensor, known: torch.Tensor, unknow_feats: torch.Tensor, known_feats: torch.Tensor
) -> torch.Tensor:
"""
:param unknown: (B, n, 3) tensor of the xyz positions of the unknown features
:param known: (B, m, 3) tensor of the xyz positions of the known features
:param unknow_feats: (B, C1, n) tensor of the features to be propigated to
:param known_feats: (B, C2, m) tensor of features to be propigated
:return:
new_features: (B, mlp[-1], n) tensor of the features of the unknown features
"""
if known is not None:
dist, idx = pointnet2_utils.three_nn(unknown, known)
dist_recip = 1.0 / (dist + 1e-8)
norm = torch.sum(dist_recip, dim=2, keepdim=True)
weight = dist_recip / norm
interpolated_feats = pointnet2_utils.three_interpolate(known_feats, idx, weight)
else:
interpolated_feats = known_feats.expand(*known_feats.size()[0:2], unknown.size(1))
if unknow_feats is not None:
new_features = torch.cat([interpolated_feats, unknow_feats], dim=1) # (B, C2 + C1, n)
else:
new_features = interpolated_feats
new_features = new_features.unsqueeze(-1)
new_features = self.mlp(new_features)
return new_features.squeeze(-1)
if __name__ == "__main__":
pass

View File

@@ -0,0 +1,291 @@
import torch
from torch.autograd import Variable
from torch.autograd import Function
import torch.nn as nn
from typing import Tuple
import sys
import pointnet2_cuda as pointnet2
class FurthestPointSampling(Function):
@staticmethod
def forward(ctx, xyz: torch.Tensor, npoint: int) -> torch.Tensor:
"""
Uses iterative furthest point sampling to select a set of npoint features that have the largest
minimum distance
:param ctx:
:param xyz: (B, N, 3) where N > npoint
:param npoint: int, number of features in the sampled set
:return:
output: (B, npoint) tensor containing the set
"""
assert xyz.is_contiguous()
B, N, _ = xyz.size()
output = torch.cuda.IntTensor(B, npoint)
temp = torch.cuda.FloatTensor(B, N).fill_(1e10)
pointnet2.furthest_point_sampling_wrapper(B, N, npoint, xyz, temp, output)
return output
@staticmethod
def backward(xyz, a=None):
return None, None
furthest_point_sample = FurthestPointSampling.apply
class GatherOperation(Function):
@staticmethod
def forward(ctx, features: torch.Tensor, idx: torch.Tensor) -> torch.Tensor:
"""
:param ctx:
:param features: (B, C, N)
:param idx: (B, npoint) index tensor of the features to gather
:return:
output: (B, C, npoint)
"""
assert features.is_contiguous()
assert idx.is_contiguous()
B, npoint = idx.size()
_, C, N = features.size()
output = torch.cuda.FloatTensor(B, C, npoint)
pointnet2.gather_points_wrapper(B, C, N, npoint, features, idx, output)
ctx.for_backwards = (idx, C, N)
return output
@staticmethod
def backward(ctx, grad_out):
idx, C, N = ctx.for_backwards
B, npoint = idx.size()
grad_features = Variable(torch.cuda.FloatTensor(B, C, N).zero_())
grad_out_data = grad_out.data.contiguous()
pointnet2.gather_points_grad_wrapper(B, C, N, npoint, grad_out_data, idx, grad_features.data)
return grad_features, None
gather_operation = GatherOperation.apply
class ThreeNN(Function):
@staticmethod
def forward(ctx, unknown: torch.Tensor, known: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
"""
Find the three nearest neighbors of unknown in known
:param ctx:
:param unknown: (B, N, 3)
:param known: (B, M, 3)
:return:
dist: (B, N, 3) l2 distance to the three nearest neighbors
idx: (B, N, 3) index of 3 nearest neighbors
"""
assert unknown.is_contiguous()
assert known.is_contiguous()
B, N, _ = unknown.size()
m = known.size(1)
dist2 = torch.cuda.FloatTensor(B, N, 3)
idx = torch.cuda.IntTensor(B, N, 3)
pointnet2.three_nn_wrapper(B, N, m, unknown, known, dist2, idx)
return torch.sqrt(dist2), idx
@staticmethod
def backward(ctx, a=None, b=None):
return None, None
three_nn = ThreeNN.apply
class ThreeInterpolate(Function):
@staticmethod
def forward(ctx, features: torch.Tensor, idx: torch.Tensor, weight: torch.Tensor) -> torch.Tensor:
"""
Performs weight linear interpolation on 3 features
:param ctx:
:param features: (B, C, M) Features descriptors to be interpolated from
:param idx: (B, n, 3) three nearest neighbors of the target features in features
:param weight: (B, n, 3) weights
:return:
output: (B, C, N) tensor of the interpolated features
"""
assert features.is_contiguous()
assert idx.is_contiguous()
assert weight.is_contiguous()
B, c, m = features.size()
n = idx.size(1)
ctx.three_interpolate_for_backward = (idx, weight, m)
output = torch.cuda.FloatTensor(B, c, n)
pointnet2.three_interpolate_wrapper(B, c, m, n, features, idx, weight, output)
return output
@staticmethod
def backward(ctx, grad_out: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
"""
:param ctx:
:param grad_out: (B, C, N) tensor with gradients of outputs
:return:
grad_features: (B, C, M) tensor with gradients of features
None:
None:
"""
idx, weight, m = ctx.three_interpolate_for_backward
B, c, n = grad_out.size()
grad_features = Variable(torch.cuda.FloatTensor(B, c, m).zero_())
grad_out_data = grad_out.data.contiguous()
pointnet2.three_interpolate_grad_wrapper(B, c, n, m, grad_out_data, idx, weight, grad_features.data)
return grad_features, None, None
three_interpolate = ThreeInterpolate.apply
class GroupingOperation(Function):
@staticmethod
def forward(ctx, features: torch.Tensor, idx: torch.Tensor) -> torch.Tensor:
"""
:param ctx:
:param features: (B, C, N) tensor of features to group
:param idx: (B, npoint, nsample) tensor containing the indicies of features to group with
:return:
output: (B, C, npoint, nsample) tensor
"""
assert features.is_contiguous()
assert idx.is_contiguous()
B, nfeatures, nsample = idx.size()
_, C, N = features.size()
output = torch.cuda.FloatTensor(B, C, nfeatures, nsample)
pointnet2.group_points_wrapper(B, C, N, nfeatures, nsample, features, idx, output)
ctx.for_backwards = (idx, N)
return output
@staticmethod
def backward(ctx, grad_out: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
"""
:param ctx:
:param grad_out: (B, C, npoint, nsample) tensor of the gradients of the output from forward
:return:
grad_features: (B, C, N) gradient of the features
"""
idx, N = ctx.for_backwards
B, C, npoint, nsample = grad_out.size()
grad_features = Variable(torch.cuda.FloatTensor(B, C, N).zero_())
grad_out_data = grad_out.data.contiguous()
pointnet2.group_points_grad_wrapper(B, C, N, npoint, nsample, grad_out_data, idx, grad_features.data)
return grad_features, None
grouping_operation = GroupingOperation.apply
class BallQuery(Function):
@staticmethod
def forward(ctx, radius: float, nsample: int, xyz: torch.Tensor, new_xyz: torch.Tensor) -> torch.Tensor:
"""
:param ctx:
:param radius: float, radius of the balls
:param nsample: int, maximum number of features in the balls
:param xyz: (B, N, 3) xyz coordinates of the features
:param new_xyz: (B, npoint, 3) centers of the ball query
:return:
idx: (B, npoint, nsample) tensor with the indicies of the features that form the query balls
"""
assert new_xyz.is_contiguous()
assert xyz.is_contiguous()
B, N, _ = xyz.size()
npoint = new_xyz.size(1)
idx = torch.cuda.IntTensor(B, npoint, nsample).zero_()
pointnet2.ball_query_wrapper(B, N, npoint, radius, nsample, new_xyz, xyz, idx)
return idx
@staticmethod
def backward(ctx, a=None):
return None, None, None, None
ball_query = BallQuery.apply
class QueryAndGroup(nn.Module):
def __init__(self, radius: float, nsample: int, use_xyz: bool = True):
"""
:param radius: float, radius of ball
:param nsample: int, maximum number of features to gather in the ball
:param use_xyz:
"""
super().__init__()
self.radius, self.nsample, self.use_xyz = radius, nsample, use_xyz
def forward(self, xyz: torch.Tensor, new_xyz: torch.Tensor, features: torch.Tensor = None) -> Tuple[torch.Tensor]:
"""
:param xyz: (B, N, 3) xyz coordinates of the features
:param new_xyz: (B, npoint, 3) centroids
:param features: (B, C, N) descriptors of the features
:return:
new_features: (B, 3 + C, npoint, nsample)
"""
idx = ball_query(self.radius, self.nsample, xyz, new_xyz)
xyz_trans = xyz.transpose(1, 2).contiguous()
grouped_xyz = grouping_operation(xyz_trans, idx) # (B, 3, npoint, nsample)
grouped_xyz -= new_xyz.transpose(1, 2).unsqueeze(-1)
if features is not None:
grouped_features = grouping_operation(features, idx)
if self.use_xyz:
new_features = torch.cat([grouped_xyz, grouped_features], dim=1) # (B, C + 3, npoint, nsample)
else:
new_features = grouped_features
else:
assert self.use_xyz, "Cannot have not features and not use xyz as a feature!"
new_features = grouped_xyz
return new_features
class GroupAll(nn.Module):
def __init__(self, use_xyz: bool = True):
super().__init__()
self.use_xyz = use_xyz
def forward(self, xyz: torch.Tensor, new_xyz: torch.Tensor, features: torch.Tensor = None):
"""
:param xyz: (B, N, 3) xyz coordinates of the features
:param new_xyz: ignored
:param features: (B, C, N) descriptors of the features
:return:
new_features: (B, C + 3, 1, N)
"""
grouped_xyz = xyz.transpose(1, 2).unsqueeze(2)
if features is not None:
grouped_features = features.unsqueeze(2)
if self.use_xyz:
new_features = torch.cat([grouped_xyz, grouped_features], dim=1) # (B, 3 + C, 1, N)
else:
new_features = grouped_features
else:
new_features = grouped_xyz
return new_features

View File

@@ -0,0 +1,236 @@
import torch.nn as nn
from typing import List, Tuple
class SharedMLP(nn.Sequential):
def __init__(
self,
args: List[int],
*,
bn: bool = False,
activation=nn.ReLU(inplace=True),
preact: bool = False,
first: bool = False,
name: str = "",
instance_norm: bool = False,
):
super().__init__()
for i in range(len(args) - 1):
self.add_module(
name + 'layer{}'.format(i),
Conv2d(
args[i],
args[i + 1],
bn=(not first or not preact or (i != 0)) and bn,
activation=activation
if (not first or not preact or (i != 0)) else None,
preact=preact,
instance_norm=instance_norm
)
)
class _ConvBase(nn.Sequential):
def __init__(
self,
in_size,
out_size,
kernel_size,
stride,
padding,
activation,
bn,
init,
conv=None,
batch_norm=None,
bias=True,
preact=False,
name="",
instance_norm=False,
instance_norm_func=None
):
super().__init__()
bias = bias and (not bn)
conv_unit = conv(
in_size,
out_size,
kernel_size=kernel_size,
stride=stride,
padding=padding,
bias=bias
)
init(conv_unit.weight)
if bias:
nn.init.constant_(conv_unit.bias, 0)
if bn:
if not preact:
bn_unit = batch_norm(out_size)
else:
bn_unit = batch_norm(in_size)
if instance_norm:
if not preact:
in_unit = instance_norm_func(out_size, affine=False, track_running_stats=False)
else:
in_unit = instance_norm_func(in_size, affine=False, track_running_stats=False)
if preact:
if bn:
self.add_module(name + 'bn', bn_unit)
if activation is not None:
self.add_module(name + 'activation', activation)
if not bn and instance_norm:
self.add_module(name + 'in', in_unit)
self.add_module(name + 'conv', conv_unit)
if not preact:
if bn:
self.add_module(name + 'bn', bn_unit)
if activation is not None:
self.add_module(name + 'activation', activation)
if not bn and instance_norm:
self.add_module(name + 'in', in_unit)
class _BNBase(nn.Sequential):
def __init__(self, in_size, batch_norm=None, name=""):
super().__init__()
self.add_module(name + "bn", batch_norm(in_size))
nn.init.constant_(self[0].weight, 1.0)
nn.init.constant_(self[0].bias, 0)
class BatchNorm1d(_BNBase):
def __init__(self, in_size: int, *, name: str = ""):
super().__init__(in_size, batch_norm=nn.BatchNorm1d, name=name)
class BatchNorm2d(_BNBase):
def __init__(self, in_size: int, name: str = ""):
super().__init__(in_size, batch_norm=nn.BatchNorm2d, name=name)
class Conv1d(_ConvBase):
def __init__(
self,
in_size: int,
out_size: int,
*,
kernel_size: int = 1,
stride: int = 1,
padding: int = 0,
activation=nn.ReLU(inplace=True),
bn: bool = False,
init=nn.init.kaiming_normal_,
bias: bool = True,
preact: bool = False,
name: str = "",
instance_norm=False
):
super().__init__(
in_size,
out_size,
kernel_size,
stride,
padding,
activation,
bn,
init,
conv=nn.Conv1d,
batch_norm=BatchNorm1d,
bias=bias,
preact=preact,
name=name,
instance_norm=instance_norm,
instance_norm_func=nn.InstanceNorm1d
)
class Conv2d(_ConvBase):
def __init__(
self,
in_size: int,
out_size: int,
*,
kernel_size: Tuple[int, int] = (1, 1),
stride: Tuple[int, int] = (1, 1),
padding: Tuple[int, int] = (0, 0),
activation=nn.ReLU(inplace=True),
bn: bool = False,
init=nn.init.kaiming_normal_,
bias: bool = True,
preact: bool = False,
name: str = "",
instance_norm=False
):
super().__init__(
in_size,
out_size,
kernel_size,
stride,
padding,
activation,
bn,
init,
conv=nn.Conv2d,
batch_norm=BatchNorm2d,
bias=bias,
preact=preact,
name=name,
instance_norm=instance_norm,
instance_norm_func=nn.InstanceNorm2d
)
class FC(nn.Sequential):
def __init__(
self,
in_size: int,
out_size: int,
*,
activation=nn.ReLU(inplace=True),
bn: bool = False,
init=None,
preact: bool = False,
name: str = ""
):
super().__init__()
fc = nn.Linear(in_size, out_size, bias=not bn)
if init is not None:
init(fc.weight)
if not bn:
nn.init.constant(fc.bias, 0)
if preact:
if bn:
self.add_module(name + 'bn', BatchNorm1d(in_size))
if activation is not None:
self.add_module(name + 'activation', activation)
self.add_module(name + 'fc', fc)
if not preact:
if bn:
self.add_module(name + 'bn', BatchNorm1d(out_size))
if activation is not None:
self.add_module(name + 'activation', activation)

View File

@@ -0,0 +1,149 @@
import torch
import torch.nn as nn
import os
import sys
path = os.path.abspath(__file__)
for i in range(2):
path = os.path.dirname(path)
PROJECT_ROOT = path
sys.path.append(PROJECT_ROOT)
import PytorchBoot.stereotype as stereotype
from modules.module_lib.pointnet2_modules import PointnetSAModuleMSG
ClsMSG_CFG_Dense = {
'NPOINTS': [512, 256, 128, None],
'RADIUS': [[0.02, 0.04], [0.04, 0.08], [0.08, 0.16], [None, None]],
'NSAMPLE': [[32, 64], [16, 32], [8, 16], [None, None]],
'MLPS': [[[16, 16, 32], [32, 32, 64]],
[[64, 64, 128], [64, 96, 128]],
[[128, 196, 256], [128, 196, 256]],
[[256, 256, 512], [256, 384, 512]]],
'DP_RATIO': 0.5,
}
ClsMSG_CFG_Light = {
'NPOINTS': [512, 256, 128, None],
'RADIUS': [[0.02, 0.04], [0.04, 0.08], [0.08, 0.16], [None, None]],
'NSAMPLE': [[16, 32], [16, 32], [16, 32], [None, None]],
'MLPS': [[[16, 16, 32], [32, 32, 64]],
[[64, 64, 128], [64, 96, 128]],
[[128, 196, 256], [128, 196, 256]],
[[256, 256, 512], [256, 384, 512]]],
'DP_RATIO': 0.5,
}
ClsMSG_CFG_Light_2048 = {
'NPOINTS': [512, 256, 128, None],
'RADIUS': [[0.02, 0.04], [0.04, 0.08], [0.08, 0.16], [None, None]],
'NSAMPLE': [[16, 32], [16, 32], [16, 32], [None, None]],
'MLPS': [[[16, 16, 32], [32, 32, 64]],
[[64, 64, 128], [64, 96, 128]],
[[128, 196, 256], [128, 196, 256]],
[[256, 256, 1024], [256, 512, 1024]]],
'DP_RATIO': 0.5,
}
ClsMSG_CFG_Strong = {
'NPOINTS': [512, 256, 128, 64, None],
'RADIUS': [[0.02, 0.04], [0.04, 0.08], [0.08, 0.16],[0.16, 0.32], [None, None]],
'NSAMPLE': [[16, 32], [16, 32], [16, 32], [16, 32], [None, None]],
'MLPS': [[[16, 16, 32], [32, 32, 64]],
[[64, 64, 128], [64, 96, 128]],
[[128, 196, 256], [128, 196, 256]],
[[256, 256, 512], [256, 512, 512]],
[[512, 512, 2048], [512, 1024, 2048]]
],
'DP_RATIO': 0.5,
}
ClsMSG_CFG_Lighter = {
'NPOINTS': [512, 256, 128, 64, None],
'RADIUS': [[0.01], [0.02], [0.04], [0.08], [None]],
'NSAMPLE': [[64], [32], [16], [8], [None]],
'MLPS': [[[32, 32, 64]],
[[64, 64, 128]],
[[128, 196, 256]],
[[256, 256, 512]],
[[512, 512, 1024]]],
'DP_RATIO': 0.5,
}
def select_params(name):
if name == 'light':
return ClsMSG_CFG_Light
elif name == 'lighter':
return ClsMSG_CFG_Lighter
elif name == 'dense':
return ClsMSG_CFG_Dense
elif name == 'light_2048':
return ClsMSG_CFG_Light_2048
elif name == 'strong':
return ClsMSG_CFG_Strong
else:
raise NotImplementedError
def break_up_pc(pc):
xyz = pc[..., 0:3].contiguous()
features = (
pc[..., 3:].transpose(1, 2).contiguous()
if pc.size(-1) > 3 else None
)
return xyz, features
@stereotype.module("pointnet++_encoder")
class PointNet2Encoder(nn.Module):
def encode_points(self, pts, require_per_point_feat=False):
return self.forward(pts)
def __init__(self, config:dict):
super().__init__()
channel_in = config.get("in_dim", 3) - 3
params_name = config.get("params_name", "light")
self.SA_modules = nn.ModuleList()
selected_params = select_params(params_name)
for k in range(selected_params['NPOINTS'].__len__()):
mlps = selected_params['MLPS'][k].copy()
channel_out = 0
for idx in range(mlps.__len__()):
mlps[idx] = [channel_in] + mlps[idx]
channel_out += mlps[idx][-1]
self.SA_modules.append(
PointnetSAModuleMSG(
npoint=selected_params['NPOINTS'][k],
radii=selected_params['RADIUS'][k],
nsamples=selected_params['NSAMPLE'][k],
mlps=mlps,
use_xyz=True,
bn=True
)
)
channel_in = channel_out
def forward(self, point_cloud: torch.cuda.FloatTensor):
xyz, features = break_up_pc(point_cloud)
l_xyz, l_features = [xyz], [features]
for i in range(len(self.SA_modules)):
li_xyz, li_features = self.SA_modules[i](l_xyz[i], l_features[i])
l_xyz.append(li_xyz)
l_features.append(li_features)
return l_features[-1].squeeze(-1)
if __name__ == '__main__':
seed = 100
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
net = PointNet2Encoder(config={"in_dim": 3, "params_name": "strong"}).cuda()
pts = torch.randn(2, 2444, 3).cuda()
print(torch.mean(pts, dim=1))
pre = net.encode_points(pts)
print(pre.shape)

View File

@@ -22,12 +22,10 @@ class PointNetEncoder(nn.Module):
self.conv2 = torch.nn.Conv1d(64, 128, 1)
self.conv3 = torch.nn.Conv1d(128, 512, 1)
self.conv4 = torch.nn.Conv1d(512, self.out_dim , 1)
self.global_feat = config["global_feat"]
if self.feature_transform:
self.f_stn = STNkd(k=64)
def forward(self, x):
n_pts = x.shape[2]
trans = self.stn(x)
x = x.transpose(2, 1)
x = torch.bmm(x, trans)
@@ -46,20 +44,15 @@ class PointNetEncoder(nn.Module):
x = self.conv4(x)
x = torch.max(x, 2, keepdim=True)[0]
x = x.view(-1, self.out_dim)
if self.global_feat:
return x
else:
x = x.view(-1, self.out_dim, 1).repeat(1, 1, n_pts)
return torch.cat([x, point_feat], 1)
return x, point_feat
def encode_points(self, pts):
def encode_points(self, pts, require_per_point_feat=False):
pts = pts.transpose(2, 1)
if not self.global_feat:
pts_feature = self(pts).transpose(2, 1)
global_pts_feature, per_point_feature = self(pts)
if require_per_point_feat:
return global_pts_feature, per_point_feature.transpose(2, 1)
else:
pts_feature = self(pts)
return pts_feature
return global_pts_feature
class STNkd(nn.Module):
def __init__(self, k=64):
@@ -102,21 +95,13 @@ if __name__ == "__main__":
config = {
"in_dim": 3,
"out_dim": 1024,
"global_feat": True,
"feature_transform": False
}
pointnet_global = PointNetEncoder(config)
out = pointnet_global.encode_points(sim_data)
pointnet = PointNetEncoder(config)
out = pointnet.encode_points(sim_data)
print("global feat", out.size())
config = {
"in_dim": 3,
"out_dim": 1024,
"global_feat": False,
"feature_transform": False
}
pointnet = PointNetEncoder(config)
out = pointnet.encode_points(sim_data)
out, per_point_out = pointnet.encode_points(sim_data, require_per_point_feat=True)
print("point feat", out.size())
print("per point feat", per_point_out.size())

View File

@@ -0,0 +1,20 @@
from torch import nn
import PytorchBoot.stereotype as stereotype
@stereotype.module("pts_num_encoder")
class PointsNumEncoder(nn.Module):
def __init__(self, config):
super(PointsNumEncoder, self).__init__()
self.config = config
out_dim = config["out_dim"]
self.act = nn.ReLU(True)
self.pts_num_encoder = nn.Sequential(
nn.Linear(1, out_dim),
self.act,
nn.Linear(out_dim, out_dim),
self.act,
)
def encode_pts_num(self, num_seq):
return self.pts_num_encoder(num_seq)

View File

@@ -9,7 +9,7 @@ class TransformerSequenceEncoder(nn.Module):
def __init__(self, config):
super(TransformerSequenceEncoder, self).__init__()
self.config = config
embed_dim = config["pts_embed_dim"] + config["pose_embed_dim"]
embed_dim = config["embed_dim"]
encoder_layer = nn.TransformerEncoderLayer(
d_model=embed_dim,
nhead=config["num_heads"],
@@ -21,31 +21,20 @@ class TransformerSequenceEncoder(nn.Module):
)
self.fc = nn.Linear(embed_dim, config["output_dim"])
def encode_sequence(self, pts_embedding_list_batch, pose_embedding_list_batch):
# Combine features and pad sequences
combined_features_batch = []
def encode_sequence(self, embedding_list_batch):
lengths = []
for pts_embedding_list, pose_embedding_list in zip(pts_embedding_list_batch, pose_embedding_list_batch):
combined_features = [
torch.cat((pts_embed, pose_embed), dim=-1)
for pts_embed, pose_embed in zip(pts_embedding_list, pose_embedding_list)
]
combined_features_batch.append(torch.stack(combined_features))
lengths.append(len(combined_features))
for embedding_list in embedding_list_batch:
lengths.append(len(embedding_list))
combined_tensor = pad_sequence(combined_features_batch, batch_first=True) # Shape: [batch_size, max_seq_len, embed_dim]
embedding_tensor = pad_sequence(embedding_list_batch, batch_first=True) # Shape: [batch_size, max_seq_len, embed_dim]
# Prepare mask for padding
max_len = max(lengths)
padding_mask = torch.tensor([([0] * length + [1] * (max_len - length)) for length in lengths], dtype=torch.bool).to(combined_tensor.device)
# Transformer encoding
transformer_output = self.transformer_encoder(combined_tensor, src_key_padding_mask=padding_mask)
padding_mask = torch.tensor([([0] * length + [1] * (max_len - length)) for length in lengths], dtype=torch.bool).to(embedding_tensor.device)
# Mean pooling
transformer_output = self.transformer_encoder(embedding_tensor, src_key_padding_mask=padding_mask)
final_feature = transformer_output.mean(dim=1)
# Fully connected layer
final_output = self.fc(final_feature)
return final_output
@@ -53,26 +42,22 @@ class TransformerSequenceEncoder(nn.Module):
if __name__ == "__main__":
config = {
"pts_embed_dim": 1024,
"pose_embed_dim": 256,
"embed_dim": 256,
"num_heads": 4,
"ffn_dim": 256,
"num_layers": 3,
"output_dim": 2048,
"output_dim": 1024,
}
encoder = TransformerSequenceEncoder(config)
seq_len = [5, 8, 9, 4]
batch_size = 4
pts_embedding_list_batch = [
torch.randn(seq_len[idx], config["pts_embed_dim"]) for idx in range(batch_size)
]
pose_embedding_list_batch = [
torch.randn(seq_len[idx], config["pose_embed_dim"]) for idx in range(batch_size)
embedding_list_batch = [
torch.randn(seq_len[idx], config["embed_dim"]) for idx in range(batch_size)
]
output_feature = encoder.encode_sequence(
pts_embedding_list_batch, pose_embedding_list_batch
embedding_list_batch
)
print("Encoded Feature:", output_feature)
print("Feature Shape:", output_feature.shape)

View File

@@ -0,0 +1,43 @@
import os
import shutil
def clean_scene_data(root, scene):
# 清理目标点云数据
pts_dir = os.path.join(root, scene, "pts")
if os.path.exists(pts_dir):
shutil.rmtree(pts_dir)
print(f"已删除 {pts_dir}")
# 清理法线数据
nrm_dir = os.path.join(root, scene, "nrm")
if os.path.exists(nrm_dir):
shutil.rmtree(nrm_dir)
print(f"已删除 {nrm_dir}")
# 清理扫描点索引数据
scan_points_indices_dir = os.path.join(root, scene, "scan_points_indices")
if os.path.exists(scan_points_indices_dir):
shutil.rmtree(scan_points_indices_dir)
print(f"已删除 {scan_points_indices_dir}")
# 删除扫描点数据文件
scan_points_file = os.path.join(root, scene, "scan_points.txt")
if os.path.exists(scan_points_file):
os.remove(scan_points_file)
print(f"已删除 {scan_points_file}")
def clean_all_scenes(root, scene_list):
for idx, scene in enumerate(scene_list):
print(f"正在清理场景 {scene} ({idx+1}/{len(scene_list)})")
clean_scene_data(root, scene)
if __name__ == "__main__":
root = r"c:\Document\Local Project\nbv_rec\nbv_reconstruction\temp"
scene_list = os.listdir(root)
from_idx = 0
to_idx = len(scene_list)
print(f"正在清理场景 {scene_list[from_idx:to_idx]}")
clean_all_scenes(root, scene_list[from_idx:to_idx])
print("清理完成")

View File

@@ -0,0 +1,48 @@
import os
import shutil
def pack_scene_data(root, scene, output_dir):
scene_dir = os.path.join(output_dir, scene)
if not os.path.exists(scene_dir):
os.makedirs(scene_dir)
pts_dir = os.path.join(root, scene, "pts")
if os.path.exists(pts_dir):
shutil.move(pts_dir, os.path.join(scene_dir, "pts"))
scan_points_indices_dir = os.path.join(root, scene, "scan_points_indices")
if os.path.exists(scan_points_indices_dir):
shutil.move(scan_points_indices_dir, os.path.join(scene_dir, "scan_points_indices"))
scan_points_file = os.path.join(root, scene, "scan_points.txt")
if os.path.exists(scan_points_file):
shutil.move(scan_points_file, os.path.join(scene_dir, "scan_points.txt"))
model_pts_nrm_file = os.path.join(root, scene, "points_and_normals.txt")
if os.path.exists(model_pts_nrm_file):
shutil.move(model_pts_nrm_file, os.path.join(scene_dir, "points_and_normals.txt"))
camera_dir = os.path.join(root, scene, "camera_params")
if os.path.exists(camera_dir):
shutil.move(camera_dir, os.path.join(scene_dir, "camera_params"))
scene_info_file = os.path.join(root, scene, "scene_info.json")
if os.path.exists(scene_info_file):
shutil.move(scene_info_file, os.path.join(scene_dir, "scene_info.json"))
def pack_all_scenes(root, scene_list, output_dir):
for idx, scene in enumerate(scene_list):
print(f"正在打包场景 {scene} ({idx+1}/{len(scene_list)})")
pack_scene_data(root, scene, output_dir)
if __name__ == "__main__":
root = r"H:\AI\Datasets\nbv_rec_part2"
output_dir = r"H:\AI\Datasets\scene_info_part2"
scene_list = os.listdir(root)
from_idx = 0
to_idx = len(scene_list)
print(f"正在打包场景 {scene_list[from_idx:to_idx]}")
pack_all_scenes(root, scene_list[from_idx:to_idx], output_dir)
print("打包完成")

View File

@@ -0,0 +1,41 @@
import os
import shutil
def pack_scene_data(root, scene, output_dir):
scene_dir = os.path.join(output_dir, scene)
if not os.path.exists(scene_dir):
os.makedirs(scene_dir)
pts_dir = os.path.join(root, scene, "pts")
if os.path.exists(pts_dir):
shutil.move(pts_dir, os.path.join(scene_dir, "pts"))
camera_dir = os.path.join(root, scene, "camera_params")
if os.path.exists(camera_dir):
shutil.move(camera_dir, os.path.join(scene_dir, "camera_params"))
scene_info_file = os.path.join(root, scene, "scene_info.json")
if os.path.exists(scene_info_file):
shutil.move(scene_info_file, os.path.join(scene_dir, "scene_info.json"))
label_dir = os.path.join(root, scene, "label")
if os.path.exists(label_dir):
shutil.move(label_dir, os.path.join(scene_dir, "label"))
def pack_all_scenes(root, scene_list, output_dir):
for idx, scene in enumerate(scene_list):
print(f"packing {scene} ({idx+1}/{len(scene_list)})")
pack_scene_data(root, scene, output_dir)
if __name__ == "__main__":
root = r"H:\AI\Datasets\nbv_rec_part2"
output_dir = r"H:\AI\Datasets\upload_part2"
scene_list = os.listdir(root)
from_idx = 0
to_idx = len(scene_list)
print(f"packing {scene_list[from_idx:to_idx]}")
pack_all_scenes(root, scene_list[from_idx:to_idx], output_dir)
print("packing done")

185
preprocess/preprocessor.py Normal file
View File

@@ -0,0 +1,185 @@
import os
import numpy as np
import time
import sys
np.random.seed(0)
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from utils.reconstruction import ReconstructionUtil
from utils.data_load import DataLoadUtil
from utils.pts import PtsUtil
def save_np_pts(path, pts: np.ndarray, file_type="txt"):
if file_type == "txt":
np.savetxt(path, pts)
else:
np.save(path, pts)
def save_target_points(root, scene, frame_idx, target_points: np.ndarray, file_type="txt"):
pts_path = os.path.join(root,scene, "pts", f"{frame_idx}.{file_type}")
if not os.path.exists(os.path.join(root,scene, "pts")):
os.makedirs(os.path.join(root,scene, "pts"))
save_np_pts(pts_path, target_points, file_type)
def save_target_normals(root, scene, frame_idx, target_normals: np.ndarray, file_type="txt"):
pts_path = os.path.join(root,scene, "nrm", f"{frame_idx}.{file_type}")
if not os.path.exists(os.path.join(root,scene, "nrm")):
os.makedirs(os.path.join(root,scene, "nrm"))
save_np_pts(pts_path, target_normals, file_type)
def save_scan_points_indices(root, scene, frame_idx, scan_points_indices: np.ndarray, file_type="txt"):
indices_path = os.path.join(root,scene, "scan_points_indices", f"{frame_idx}.{file_type}")
if not os.path.exists(os.path.join(root,scene, "scan_points_indices")):
os.makedirs(os.path.join(root,scene, "scan_points_indices"))
save_np_pts(indices_path, scan_points_indices, file_type)
def save_scan_points(root, scene, scan_points: np.ndarray):
scan_points_path = os.path.join(root,scene, "scan_points.txt")
save_np_pts(scan_points_path, scan_points)
def get_world_points(depth, mask, cam_intrinsic, cam_extrinsic, random_downsample_N):
z = depth[mask]
i, j = np.nonzero(mask)
x = (j - cam_intrinsic[0, 2]) * z / cam_intrinsic[0, 0]
y = (i - cam_intrinsic[1, 2]) * z / cam_intrinsic[1, 1]
points_camera = np.stack((x, y, z), axis=-1).reshape(-1, 3)
sampled_target_points = PtsUtil.random_downsample_point_cloud(
points_camera, random_downsample_N
)
points_camera_aug = np.concatenate((sampled_target_points, np.ones((sampled_target_points.shape[0], 1))), axis=-1)
points_camera_world = np.dot(cam_extrinsic, points_camera_aug.T).T[:, :3]
return points_camera_world
def get_world_points_and_normal(depth, mask, normal, cam_intrinsic, cam_extrinsic, random_downsample_N):
z = depth[mask]
i, j = np.nonzero(mask)
x = (j - cam_intrinsic[0, 2]) * z / cam_intrinsic[0, 0]
y = (i - cam_intrinsic[1, 2]) * z / cam_intrinsic[1, 1]
points_camera = np.stack((x, y, z), axis=-1).reshape(-1, 3)
normal_camera = normal[mask].reshape(-1, 3)
sampled_target_points, idx = PtsUtil.random_downsample_point_cloud(
points_camera, random_downsample_N, require_idx=True
)
if len(sampled_target_points) == 0:
return np.zeros((0, 3)), np.zeros((0, 3))
sampled_normal_camera = normal_camera[idx]
points_camera_aug = np.concatenate((sampled_target_points, np.ones((sampled_target_points.shape[0], 1))), axis=-1)
points_camera_world = np.dot(cam_extrinsic, points_camera_aug.T).T[:, :3]
return points_camera_world, sampled_normal_camera
def get_scan_points_indices(scan_points, mask, display_table_mask_label, cam_intrinsic, cam_extrinsic):
scan_points_homogeneous = np.hstack((scan_points, np.ones((scan_points.shape[0], 1))))
points_camera = np.dot(np.linalg.inv(cam_extrinsic), scan_points_homogeneous.T).T[:, :3]
points_image_homogeneous = np.dot(cam_intrinsic, points_camera.T).T
points_image_homogeneous /= points_image_homogeneous[:, 2:]
pixel_x = points_image_homogeneous[:, 0].astype(int)
pixel_y = points_image_homogeneous[:, 1].astype(int)
h, w = mask.shape[:2]
valid_indices = (pixel_x >= 0) & (pixel_x < w) & (pixel_y >= 0) & (pixel_y < h)
mask_colors = mask[pixel_y[valid_indices], pixel_x[valid_indices]]
selected_points_indices = np.where((mask_colors == display_table_mask_label).all(axis=-1))[0]
selected_points_indices = np.where(valid_indices)[0][selected_points_indices]
return selected_points_indices
def save_scene_data(root, scene, scene_idx=0, scene_total=1,file_type="txt"):
''' configuration '''
target_mask_label = (0, 255, 0)
display_table_mask_label=(0, 0, 255)
random_downsample_N = 32768
voxel_size=0.003
filter_degree = 75
min_z = 0.2
max_z = 0.5
''' scan points '''
display_table_info = DataLoadUtil.get_display_table_info(root, scene)
radius = display_table_info["radius"]
scan_points = np.asarray(ReconstructionUtil.generate_scan_points(display_table_top=0,display_table_radius=radius))
''' read frame data(depth|mask|normal) '''
frame_num = DataLoadUtil.get_scene_seq_length(root, scene)
for frame_id in range(frame_num):
print(f"[scene({scene_idx}/{scene_total})|frame({frame_id}/{frame_num})]Processing {scene} frame {frame_id}")
path = DataLoadUtil.get_path(root, scene, frame_id)
cam_info = DataLoadUtil.load_cam_info(path, binocular=True)
depth_L, depth_R = DataLoadUtil.load_depth(
path, cam_info["near_plane"],
cam_info["far_plane"],
binocular=True
)
mask_L, mask_R = DataLoadUtil.load_seg(path, binocular=True)
normal_L = DataLoadUtil.load_normal(path, binocular=True, left_only=True)
''' target points '''
mask_img_L = mask_L
mask_img_R = mask_R
target_mask_img_L = (mask_L == target_mask_label).all(axis=-1)
target_mask_img_R = (mask_R == target_mask_label).all(axis=-1)
sampled_target_points_L, sampled_target_normal_L = get_world_points_and_normal(depth_L,target_mask_img_L,normal_L, cam_info["cam_intrinsic"], cam_info["cam_to_world"], random_downsample_N)
sampled_target_points_R = get_world_points(depth_R, target_mask_img_R, cam_info["cam_intrinsic"], cam_info["cam_to_world_R"], random_downsample_N)
has_points = sampled_target_points_L.shape[0] > 0 and sampled_target_points_R.shape[0] > 0
if has_points:
target_points, overlap_idx = PtsUtil.get_overlapping_points(
sampled_target_points_L, sampled_target_points_R, voxel_size, require_idx=True
)
sampled_target_normal_L = sampled_target_normal_L[overlap_idx]
if has_points:
has_points = target_points.shape[0] > 0
if has_points:
target_points, target_normals = PtsUtil.filter_points(
target_points, sampled_target_normal_L, cam_info["cam_to_world"], theta_limit = filter_degree, z_range=(min_z, max_z)
)
''' scan points indices '''
scan_points_indices_L = get_scan_points_indices(scan_points, mask_img_L, display_table_mask_label, cam_info["cam_intrinsic"], cam_info["cam_to_world"])
scan_points_indices_R = get_scan_points_indices(scan_points, mask_img_R, display_table_mask_label, cam_info["cam_intrinsic"], cam_info["cam_to_world_R"])
scan_points_indices = np.intersect1d(scan_points_indices_L, scan_points_indices_R)
if not has_points:
target_points = np.zeros((0, 3))
target_normals = np.zeros((0, 3))
save_target_points(root, scene, frame_id, target_points, file_type=file_type)
save_target_normals(root, scene, frame_id, target_normals, file_type=file_type)
save_scan_points_indices(root, scene, frame_id, scan_points_indices, file_type=file_type)
save_scan_points(root, scene, scan_points) # The "done" flag of scene preprocess
if __name__ == "__main__":
#root = "/media/hofee/repository/new_data_with_normal"
root = r"/media/hofee/data/data/test_bottle/view"
scene_list = os.listdir(root)
from_idx = 0 # 1000
to_idx = len(scene_list) # 1500
cnt = 0
import time
total = to_idx - from_idx
for scene in scene_list[from_idx:to_idx]:
start = time.time()
if os.path.exists(os.path.join(root, scene, "scan_points.txt")):
print(f"Scene {scene} has been processed")
cnt+=1
continue
save_scene_data(root, scene, cnt, total, file_type="npy")
cnt+=1
end = time.time()
print(f"Time cost: {end-start}")

116
runners/inference_server.py Normal file
View File

@@ -0,0 +1,116 @@
import os
import json
import torch
import numpy as np
from flask import Flask, request, jsonify
import PytorchBoot.namespace as namespace
import PytorchBoot.stereotype as stereotype
from PytorchBoot.factory import ComponentFactory
from PytorchBoot.runners.runner import Runner
from PytorchBoot.utils import Log
from utils.pts import PtsUtil
from beans.predict_result import PredictResult
@stereotype.runner("inferencer_server")
class InferencerServer(Runner):
def __init__(self, config_path):
super().__init__(config_path)
''' Web Server '''
self.app = Flask(__name__)
''' Pipeline '''
self.pipeline_name = self.config[namespace.Stereotype.PIPELINE]
self.pipeline:torch.nn.Module = ComponentFactory.create(namespace.Stereotype.PIPELINE, self.pipeline_name)
self.pipeline = self.pipeline.to(self.device)
self.pts_num = 8192
self.voxel_size = 0.002
''' Experiment '''
self.load_experiment("inferencer_server")
def get_input_data(self, data):
input_data = {}
scanned_pts = data["scanned_pts"]
scanned_n_to_world_pose_9d = data["scanned_n_to_world_pose_9d"]
combined_scanned_views_pts = np.concatenate(scanned_pts, axis=0)
voxel_downsampled_combined_scanned_pts = PtsUtil.voxel_downsample_point_cloud(
combined_scanned_views_pts, self.voxel_size
)
fps_downsampled_combined_scanned_pts, fps_idx = PtsUtil.fps_downsample_point_cloud(
voxel_downsampled_combined_scanned_pts, self.pts_num, require_idx=True
)
input_data["scanned_pts"] = scanned_pts
input_data["scanned_n_to_world_pose_9d"] = np.asarray(scanned_n_to_world_pose_9d, dtype=np.float32)
input_data["combined_scanned_pts"] = np.asarray(fps_downsampled_combined_scanned_pts, dtype=np.float32)
return input_data
def get_result(self, output_data):
pred_pose_9d = output_data["pred_pose_9d"]
pred_pose_9d = np.asarray(PredictResult(pred_pose_9d.cpu().numpy(), None, cluster_params=dict(eps=0.25, min_samples=3)).candidate_9d_poses, dtype=np.float32)
result = {
"pred_pose_9d": pred_pose_9d.tolist()
}
return result
def collate_input(self, input_data):
collated_input_data = {}
collated_input_data["scanned_pts"] = [torch.tensor(input_data["scanned_pts"], dtype=torch.float32, device=self.device)]
collated_input_data["scanned_n_to_world_pose_9d"] = [torch.tensor(input_data["scanned_n_to_world_pose_9d"], dtype=torch.float32, device=self.device)]
collated_input_data["combined_scanned_pts"] = torch.tensor(input_data["combined_scanned_pts"], dtype=torch.float32, device=self.device).unsqueeze(0)
return collated_input_data
def run(self):
Log.info("Loading from epoch {}.".format(self.current_epoch))
@self.app.route("/inference", methods=["POST"])
def inference():
data = request.json
input_data = self.get_input_data(data)
collated_input_data = self.collate_input(input_data)
output_data = self.pipeline.forward_test(collated_input_data)
result = self.get_result(output_data)
return jsonify(result)
self.app.run(host="0.0.0.0", port=5000)
def get_checkpoint_path(self, is_last=False):
return os.path.join(self.experiment_path, namespace.Direcotry.CHECKPOINT_DIR_NAME,
"Epoch_{}.pth".format(
self.current_epoch if self.current_epoch != -1 and not is_last else "last"))
def load_checkpoint(self, is_last=False):
self.load(self.get_checkpoint_path(is_last))
Log.success(f"Loaded checkpoint from {self.get_checkpoint_path(is_last)}")
if is_last:
checkpoint_root = os.path.join(self.experiment_path, namespace.Direcotry.CHECKPOINT_DIR_NAME)
meta_path = os.path.join(checkpoint_root, "meta.json")
if not os.path.exists(meta_path):
raise FileNotFoundError(
"No checkpoint meta.json file in the experiment {}".format(self.experiments_config["name"]))
file_path = os.path.join(checkpoint_root, "meta.json")
with open(file_path, "r") as f:
meta = json.load(f)
self.current_epoch = meta["last_epoch"]
self.current_iter = meta["last_iter"]
def load_experiment(self, backup_name=None):
super().load_experiment(backup_name)
self.current_epoch = self.experiments_config["epoch"]
self.load_checkpoint(is_last=(self.current_epoch == -1))
def create_experiment(self, backup_name=None):
super().create_experiment(backup_name)
def load(self, path):
state_dict = torch.load(path)
self.pipeline.load_state_dict(state_dict)

335
runners/inferencer.py Normal file
View File

@@ -0,0 +1,335 @@
import os
import json
from utils.render import RenderUtil
from utils.pose import PoseUtil
from utils.pts import PtsUtil
from utils.reconstruction import ReconstructionUtil
from beans.predict_result import PredictResult
import torch
from tqdm import tqdm
import numpy as np
import pickle
from PytorchBoot.config import ConfigManager
import PytorchBoot.namespace as namespace
import PytorchBoot.stereotype as stereotype
from PytorchBoot.factory import ComponentFactory
from PytorchBoot.dataset import BaseDataset
from PytorchBoot.runners.runner import Runner
from PytorchBoot.utils import Log
from PytorchBoot.status import status_manager
from utils.data_load import DataLoadUtil
@stereotype.runner("inferencer")
class Inferencer(Runner):
def __init__(self, config_path):
super().__init__(config_path)
self.script_path = ConfigManager.get(namespace.Stereotype.RUNNER, "blender_script_path")
self.output_dir = ConfigManager.get(namespace.Stereotype.RUNNER, "output_dir")
self.voxel_size = ConfigManager.get(namespace.Stereotype.RUNNER, "voxel_size")
self.min_new_area = ConfigManager.get(namespace.Stereotype.RUNNER, "min_new_area")
CM = 0.01
self.min_new_pts_num = self.min_new_area * (CM / self.voxel_size) **2
''' Pipeline '''
self.pipeline_name = self.config[namespace.Stereotype.PIPELINE]
self.pipeline:torch.nn.Module = ComponentFactory.create(namespace.Stereotype.PIPELINE, self.pipeline_name)
self.pipeline = self.pipeline.to(self.device)
''' Experiment '''
self.load_experiment("nbv_evaluator")
self.stat_result_path = os.path.join(self.output_dir, "stat.json")
if os.path.exists(self.stat_result_path):
with open(self.stat_result_path, "r") as f:
self.stat_result = json.load(f)
else:
self.stat_result = {}
''' Test '''
self.test_config = ConfigManager.get(namespace.Stereotype.RUNNER, namespace.Mode.TEST)
self.test_dataset_name_list = self.test_config["dataset_list"]
self.test_set_list = []
self.test_writer_list = []
seen_name = set()
for test_dataset_name in self.test_dataset_name_list:
if test_dataset_name not in seen_name:
seen_name.add(test_dataset_name)
else:
raise ValueError("Duplicate test dataset name: {}".format(test_dataset_name))
test_set: BaseDataset = ComponentFactory.create(namespace.Stereotype.DATASET, test_dataset_name)
self.test_set_list.append(test_set)
self.print_info()
def run(self):
Log.info("Loading from epoch {}.".format(self.current_epoch))
self.inference()
Log.success("Inference finished.")
def inference(self):
self.pipeline.eval()
with torch.no_grad():
test_set: BaseDataset
for dataset_idx, test_set in enumerate(self.test_set_list):
status_manager.set_progress("inference", "inferencer", f"dataset", dataset_idx, len(self.test_set_list))
test_set_name = test_set.get_name()
total=int(len(test_set))
for i in tqdm(range(total), desc=f"Processing {test_set_name}", ncols=100):
try:
data = test_set.__getitem__(i)
scene_name = data["scene_name"]
inference_result_path = os.path.join(self.output_dir, test_set_name, f"{scene_name}.pkl")
if os.path.exists(inference_result_path):
Log.info(f"Inference result already exists for scene: {scene_name}")
continue
status_manager.set_progress("inference", "inferencer", f"Batch[{test_set_name}]", i+1, total)
output = self.predict_sequence(data)
self.save_inference_result(test_set_name, data["scene_name"], output)
except Exception as e:
Log.error(f"Error in scene {scene_name}, {e}")
continue
status_manager.set_progress("inference", "inferencer", f"dataset", len(self.test_set_list), len(self.test_set_list))
def predict_sequence(self, data, cr_increase_threshold=0, overlap_area_threshold=25, scan_points_threshold=10, max_iter=50, max_retry = 10, max_success=3):
scene_name = data["scene_name"]
Log.info(f"Processing scene: {scene_name}")
status_manager.set_status("inference", "inferencer", "scene", scene_name)
''' data for rendering '''
scene_path = data["scene_path"]
O_to_L_pose = data["O_to_L_pose"]
voxel_threshold = self.voxel_size
filter_degree = 75
down_sampled_model_pts = data["gt_pts"]
first_frame_to_world_9d = data["first_scanned_n_to_world_pose_9d"][0]
first_frame_to_world = np.eye(4)
first_frame_to_world[:3,:3] = PoseUtil.rotation_6d_to_matrix_numpy(first_frame_to_world_9d[:6])
first_frame_to_world[:3,3] = first_frame_to_world_9d[6:]
''' data for inference '''
input_data = {}
input_data["combined_scanned_pts"] = torch.tensor(data["first_scanned_pts"][0], dtype=torch.float32).to(self.device).unsqueeze(0)
input_data["scanned_n_to_world_pose_9d"] = [torch.tensor(data["first_scanned_n_to_world_pose_9d"], dtype=torch.float32).to(self.device)]
input_data["mode"] = namespace.Mode.TEST
input_pts_N = input_data["combined_scanned_pts"].shape[1]
root = os.path.dirname(scene_path)
display_table_info = DataLoadUtil.get_display_table_info(root, scene_name)
radius = display_table_info["radius"]
scan_points = np.asarray(ReconstructionUtil.generate_scan_points(display_table_top=0,display_table_radius=radius))
first_frame_target_pts, first_frame_target_normals, first_frame_scan_points_indices = RenderUtil.render_pts(first_frame_to_world, scene_path, self.script_path, scan_points, voxel_threshold=voxel_threshold, filter_degree=filter_degree, nO_to_nL_pose=O_to_L_pose)
scanned_view_pts = [first_frame_target_pts]
history_indices = [first_frame_scan_points_indices]
last_pred_cr, added_pts_num = self.compute_coverage_rate(scanned_view_pts, None, down_sampled_model_pts, threshold=voxel_threshold)
retry_duplication_pose = []
retry_no_pts_pose = []
retry_overlap_pose = []
retry = 0
pred_cr_seq = [last_pred_cr]
success = 0
last_pts_num = PtsUtil.voxel_downsample_point_cloud(data["first_scanned_pts"][0], voxel_threshold).shape[0]
#import time
while len(pred_cr_seq) < max_iter and retry < max_retry and success < max_success:
Log.green(f"iter: {len(pred_cr_seq)}, retry: {retry}/{max_retry}, success: {success}/{max_success}")
combined_scanned_pts = np.vstack(scanned_view_pts)
voxel_downsampled_combined_scanned_pts_np, inverse = self.voxel_downsample_with_mapping(combined_scanned_pts, voxel_threshold)
output = self.pipeline(input_data)
pred_pose_9d = output["pred_pose_9d"]
pred_pose = torch.eye(4, device=pred_pose_9d.device)
# # save pred_pose_9d ------
# root = "/media/hofee/data/project/python/nbv_reconstruction/nbv_reconstruction/temp_output_result"
# scene_dir = os.path.join(root, scene_name)
# if not os.path.exists(scene_dir):
# os.makedirs(scene_dir)
# pred_9d_path = os.path.join(scene_dir,f"pred_pose_9d_{len(pred_cr_seq)}.npy")
# pts_path = os.path.join(scene_dir,f"combined_scanned_pts_{len(pred_cr_seq)}.txt")
# np_combined_scanned_pts = input_data["combined_scanned_pts"][0].cpu().numpy()
# np.save(pred_9d_path, pred_pose_9d.cpu().numpy())
# np.savetxt(pts_path, np_combined_scanned_pts)
# # ----- ----- -----
predict_result = PredictResult(pred_pose_9d.cpu().numpy(), input_pts=input_data["combined_scanned_pts"][0].cpu().numpy(), cluster_params=dict(eps=0.25, min_samples=3))
# -----------------------
# import ipdb; ipdb.set_trace()
# predict_result.visualize()
# -----------------------
pred_pose_9d_candidates = predict_result.candidate_9d_poses
for pred_pose_9d in pred_pose_9d_candidates:
#import ipdb; ipdb.set_trace()
pred_pose_9d = torch.tensor(pred_pose_9d, dtype=torch.float32).to(self.device).unsqueeze(0)
pred_pose[:3,:3] = PoseUtil.rotation_6d_to_matrix_tensor_batch(pred_pose_9d[:,:6])[0]
pred_pose[:3,3] = pred_pose_9d[0,6:]
try:
new_target_pts, new_target_normals, new_scan_points_indices = RenderUtil.render_pts(pred_pose, scene_path, self.script_path, scan_points, voxel_threshold=voxel_threshold, filter_degree=filter_degree, nO_to_nL_pose=O_to_L_pose)
#import ipdb; ipdb.set_trace()
if not ReconstructionUtil.check_scan_points_overlap(history_indices, new_scan_points_indices, scan_points_threshold):
curr_overlap_area_threshold = overlap_area_threshold
else:
curr_overlap_area_threshold = overlap_area_threshold * 0.5
downsampled_new_target_pts = PtsUtil.voxel_downsample_point_cloud(new_target_pts, voxel_threshold)
overlap, _ = ReconstructionUtil.check_overlap(downsampled_new_target_pts, voxel_downsampled_combined_scanned_pts_np, overlap_area_threshold = curr_overlap_area_threshold, voxel_size=voxel_threshold, require_new_added_pts_num = True)
if not overlap:
Log.yellow("no overlap!")
retry += 1
retry_overlap_pose.append(pred_pose.cpu().numpy().tolist())
continue
history_indices.append(new_scan_points_indices)
except Exception as e:
Log.error(f"Error in scene {scene_path}, {e}")
print("current pose: ", pred_pose)
print("curr_pred_cr: ", last_pred_cr)
retry_no_pts_pose.append(pred_pose.cpu().numpy().tolist())
retry += 1
continue
if new_target_pts.shape[0] == 0:
Log.red("no pts in new target")
retry_no_pts_pose.append(pred_pose.cpu().numpy().tolist())
retry += 1
continue
pred_cr, _ = self.compute_coverage_rate(scanned_view_pts, new_target_pts, down_sampled_model_pts, threshold=voxel_threshold)
Log.yellow(f"{pred_cr}, {last_pred_cr}, max: , {data['seq_max_coverage_rate']}")
if pred_cr >= data["seq_max_coverage_rate"] - 1e-3:
print("max coverage rate reached!: ", pred_cr)
pred_cr_seq.append(pred_cr)
scanned_view_pts.append(new_target_pts)
input_data["scanned_n_to_world_pose_9d"] = [torch.cat([input_data["scanned_n_to_world_pose_9d"][0], pred_pose_9d], dim=0)]
combined_scanned_pts = np.vstack(scanned_view_pts)
voxel_downsampled_combined_scanned_pts_np = PtsUtil.voxel_downsample_point_cloud(combined_scanned_pts, voxel_threshold)
random_downsampled_combined_scanned_pts_np = PtsUtil.random_downsample_point_cloud(voxel_downsampled_combined_scanned_pts_np, input_pts_N)
input_data["combined_scanned_pts"] = torch.tensor(random_downsampled_combined_scanned_pts_np, dtype=torch.float32).unsqueeze(0).to(self.device)
last_pred_cr = pred_cr
pts_num = voxel_downsampled_combined_scanned_pts_np.shape[0]
Log.info(f"delta pts num:,{pts_num - last_pts_num },{pts_num}, {last_pts_num}")
if pts_num - last_pts_num < self.min_new_pts_num and pred_cr <= data["seq_max_coverage_rate"] - 1e-2:
retry += 1
retry_duplication_pose.append(pred_pose.cpu().numpy().tolist())
Log.red(f"delta pts num < {self.min_new_pts_num}:, {pts_num}, {last_pts_num}")
elif pts_num - last_pts_num < self.min_new_pts_num and pred_cr > data["seq_max_coverage_rate"] - 1e-2:
success += 1
Log.success(f"delta pts num < {self.min_new_pts_num}:, {pts_num}, {last_pts_num}")
last_pts_num = pts_num
input_data["scanned_n_to_world_pose_9d"] = input_data["scanned_n_to_world_pose_9d"][0].cpu().numpy().tolist()
result = {
"pred_pose_9d_seq": input_data["scanned_n_to_world_pose_9d"],
"combined_scanned_pts": input_data["combined_scanned_pts"],
"target_pts_seq": scanned_view_pts,
"coverage_rate_seq": pred_cr_seq,
"max_coverage_rate": data["seq_max_coverage_rate"],
"pred_max_coverage_rate": max(pred_cr_seq),
"scene_name": scene_name,
"retry_no_pts_pose": retry_no_pts_pose,
"retry_duplication_pose": retry_duplication_pose,
"retry_overlap_pose": retry_overlap_pose,
"best_seq_len": data["best_seq_len"],
}
self.stat_result[scene_name] = {
"coverage_rate_seq": pred_cr_seq,
"pred_max_coverage_rate": max(pred_cr_seq),
"pred_seq_len": len(pred_cr_seq),
}
print('success rate: ', max(pred_cr_seq))
return result
def compute_coverage_rate(self, scanned_view_pts, new_pts, model_pts, threshold=0.005):
if new_pts is not None:
new_scanned_view_pts = scanned_view_pts + [new_pts]
else:
new_scanned_view_pts = scanned_view_pts
combined_point_cloud = np.vstack(new_scanned_view_pts)
down_sampled_combined_point_cloud = PtsUtil.voxel_downsample_point_cloud(combined_point_cloud,threshold)
return ReconstructionUtil.compute_coverage_rate(model_pts, down_sampled_combined_point_cloud, threshold)
def voxel_downsample_with_mapping(self, point_cloud, voxel_size=0.003):
voxel_indices = np.floor(point_cloud / voxel_size).astype(np.int32)
unique_voxels, inverse, counts = np.unique(voxel_indices, axis=0, return_inverse=True, return_counts=True)
idx_sort = np.argsort(inverse)
idx_unique = idx_sort[np.cumsum(counts)-counts]
downsampled_points = point_cloud[idx_unique]
return downsampled_points, inverse
def save_inference_result(self, dataset_name, scene_name, output):
dataset_dir = os.path.join(self.output_dir, dataset_name)
if not os.path.exists(dataset_dir):
os.makedirs(dataset_dir)
output_path = os.path.join(dataset_dir, f"{scene_name}.pkl")
pickle.dump(output, open(output_path, "wb"))
with open(self.stat_result_path, "w") as f:
json.dump(self.stat_result, f)
def get_checkpoint_path(self, is_last=False):
return os.path.join(self.experiment_path, namespace.Direcotry.CHECKPOINT_DIR_NAME,
"Epoch_{}.pth".format(
self.current_epoch if self.current_epoch != -1 and not is_last else "last"))
def load_checkpoint(self, is_last=False):
self.load(self.get_checkpoint_path(is_last))
Log.success(f"Loaded checkpoint from {self.get_checkpoint_path(is_last)}")
if is_last:
checkpoint_root = os.path.join(self.experiment_path, namespace.Direcotry.CHECKPOINT_DIR_NAME)
meta_path = os.path.join(checkpoint_root, "meta.json")
if not os.path.exists(meta_path):
raise FileNotFoundError(
"No checkpoint meta.json file in the experiment {}".format(self.experiments_config["name"]))
file_path = os.path.join(checkpoint_root, "meta.json")
with open(file_path, "r") as f:
meta = json.load(f)
self.current_epoch = meta["last_epoch"]
self.current_iter = meta["last_iter"]
def load_experiment(self, backup_name=None):
super().load_experiment(backup_name)
self.current_epoch = self.experiments_config["epoch"]
self.load_checkpoint(is_last=(self.current_epoch == -1))
def create_experiment(self, backup_name=None):
super().create_experiment(backup_name)
def load(self, path):
state_dict = torch.load(path)
self.pipeline.load_state_dict(state_dict)
def print_info(self):
def print_dataset(dataset: BaseDataset):
config = dataset.get_config()
name = dataset.get_name()
Log.blue(f"Dataset: {name}")
for k,v in config.items():
Log.blue(f"\t{k}: {v}")
super().print_info()
table_size = 70
Log.blue(f"{'+' + '-' * (table_size // 2)} Pipeline {'-' * (table_size // 2)}" + '+')
Log.blue(self.pipeline)
Log.blue(f"{'+' + '-' * (table_size // 2)} Datasets {'-' * (table_size // 2)}" + '+')
for i, test_set in enumerate(self.test_set_list):
Log.blue(f"test dataset {i}: ")
print_dataset(test_set)
Log.blue(f"{'+' + '-' * (table_size // 2)}----------{'-' * (table_size // 2)}" + '+')

456
runners/simulator.py Normal file
View File

@@ -0,0 +1,456 @@
import pybullet as p
import pybullet_data
import numpy as np
import os
import time
from PytorchBoot.runners.runner import Runner
import PytorchBoot.stereotype as stereotype
from PytorchBoot.config import ConfigManager
from utils.control import ControlUtil
@stereotype.runner("simulator")
class Simulator(Runner):
CREATE: str = "create"
SIMULATE: str = "simulate"
INIT_GRIPPER_POSE:np.ndarray = np.asarray(
[[0.41869126 ,0.87596275 , 0.23951774 , 0.36005292],
[ 0.70787907 ,-0.4800251 , 0.51813998 ,-0.40499909],
[ 0.56884584, -0.04739109 ,-0.82107382 ,0.76881103],
[ 0. , 0. , 0. , 1. ]])
TURNTABLE_WORLD_TO_PYBULLET_WORLD:np.ndarray = np.asarray(
[[1, 0, 0, 0.8],
[0, 1, 0, 0],
[0, 0, 1, 0.5],
[0, 0, 0, 1]])
debug_pose = np.asarray([
[
0.992167055606842,
-0.10552699863910675,
0.06684812903404236,
-0.07388903945684433
],
[
0.10134342312812805,
0.3670985698699951,
-0.9246448874473572,
-0.41582486033439636
],
[
0.07303514331579208,
0.9241767525672913,
0.37491756677627563,
1.0754833221435547
],
[
0.0,
0.0,
0.0,
1.0
]])
def __init__(self, config_path):
super().__init__(config_path)
self.config_path = config_path
self.robot_id = None
self.turntable_id = None
self.target_id = None
camera_config = ConfigManager.get("simulation", "camera")
self.camera_params = {
'width': camera_config["width"],
'height': camera_config["height"],
'fov': camera_config["fov"],
'near': camera_config["near"],
'far': camera_config["far"]
}
self.sim_config = ConfigManager.get("simulation")
def run(self, cmd):
print(f"Simulator run {cmd}")
if cmd == self.CREATE:
self.prepare_env()
self.create_env()
elif cmd == self.SIMULATE:
self.simulate()
def simulate(self):
self.reset()
self.init()
debug_pose = Simulator.debug_pose
offset = np.asarray([[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]])
debug_pose = debug_pose @ offset
for _ in range(10000):
debug_pose_2 = np.eye(4)
debug_pose_2[0,0] = -1
debug_pose_2[2,3] = 0.5
self.move_to(debug_pose_2)
# Wait for the system to stabilize
for _ in range(20): # Simulate 20 steps to ensure stability
p.stepSimulation()
time.sleep(0.001) # Add small delay to ensure physics simulation
depth_img, segm_img = self.take_picture()
p.stepSimulation()
def prepare_env(self):
p.connect(p.GUI)
p.setAdditionalSearchPath(pybullet_data.getDataPath())
p.setGravity(0, 0, 0)
p.loadURDF("plane.urdf")
def create_env(self):
print(self.config)
robot_config = self.sim_config["robot"]
turntable_config = self.sim_config["turntable"]
target_config = self.sim_config["target"]
self.robot_id = p.loadURDF(
robot_config["urdf_path"],
robot_config["initial_position"],
p.getQuaternionFromEuler(robot_config["initial_orientation"]),
useFixedBase=True
)
p.changeDynamics(
self.robot_id,
linkIndex=-1,
mass=0,
linearDamping=0,
angularDamping=0,
lateralFriction=0
)
visual_shape_id = p.createVisualShape(
shapeType=p.GEOM_CYLINDER,
radius=turntable_config["radius"],
length=turntable_config["height"],
rgbaColor=[0.7, 0.7, 0.7, 1]
)
collision_shape_id = p.createCollisionShape(
shapeType=p.GEOM_CYLINDER,
radius=turntable_config["radius"],
height=turntable_config["height"]
)
self.turntable_id = p.createMultiBody(
baseMass=0, # 设置质量为0使其成为静态物体
baseCollisionShapeIndex=collision_shape_id,
baseVisualShapeIndex=visual_shape_id,
basePosition=turntable_config["center_position"]
)
# 禁用转盘的动力学
p.changeDynamics(
self.turntable_id,
-1, # -1 表示基座
mass=0,
linearDamping=0,
angularDamping=0,
lateralFriction=0
)
obj_path = os.path.join(target_config["obj_dir"], target_config["obj_name"], "mesh.obj")
assert os.path.exists(obj_path), f"Error: File not found at {obj_path}"
# 加载OBJ文件作为目标物体
target_visual = p.createVisualShape(
shapeType=p.GEOM_MESH,
fileName=obj_path,
rgbaColor=target_config["rgba_color"],
specularColor=[0.4, 0.4, 0.4],
meshScale=[target_config["scale"]] * 3
)
# 使用简化的碰撞形状
target_collision = p.createCollisionShape(
shapeType=p.GEOM_MESH,
fileName=obj_path,
meshScale=[target_config["scale"]] * 3,
flags=p.GEOM_FORCE_CONCAVE_TRIMESH # 尝试使用凹面网格
)
# 创建目标物体
self.target_id = p.createMultiBody(
baseMass=0, # 设置质量为0使其成为静态物体
baseCollisionShapeIndex=target_collision,
baseVisualShapeIndex=target_visual,
basePosition=[
turntable_config["center_position"][0],
turntable_config["center_position"][1],
turntable_config["height"] + turntable_config["center_position"][2]
],
baseOrientation=p.getQuaternionFromEuler([np.pi/2, 0, 0])
)
# 禁用目标物体的动力学
p.changeDynamics(
self.target_id,
-1, # -1 表示基座
mass=0,
linearDamping=0,
angularDamping=0,
lateralFriction=0
)
# 创建固定约束,将目标物体固定在转盘上
cid = p.createConstraint(
parentBodyUniqueId=self.turntable_id,
parentLinkIndex=-1, # -1 表示基座
childBodyUniqueId=self.target_id,
childLinkIndex=-1, # -1 表示基座
jointType=p.JOINT_FIXED,
jointAxis=[0, 0, 0],
parentFramePosition=[0, 0, 0], # 相对于转盘中心的偏移
childFramePosition=[0, 0, 0] # 相对于物体中心的偏移
)
# 设置约束参数
p.changeConstraint(cid, maxForce=100) # 设置最大力,确保约束稳定
def move_robot_to_pose(self, target_matrix):
# 从4x4齐次矩阵中提取位置前3个元素
position = target_matrix[:3, 3]
# 从3x3旋转矩阵中提取方向四元数
R = target_matrix[:3, :3]
# 计算四元数的w分量
w = np.sqrt(max(0, 1 + R[0,0] + R[1,1] + R[2,2])) / 2
# 避免除零错误,同时处理不同情况
if abs(w) < 1e-8:
# 当w接近0时的特殊情况
x = np.sqrt(max(0, 1 + R[0,0] - R[1,1] - R[2,2])) / 2
y = np.sqrt(max(0, 1 - R[0,0] + R[1,1] - R[2,2])) / 2
z = np.sqrt(max(0, 1 - R[0,0] - R[1,1] + R[2,2])) / 2
# 确定符号
if R[2,1] - R[1,2] < 0: x = -x
if R[0,2] - R[2,0] < 0: y = -y
if R[1,0] - R[0,1] < 0: z = -z
else:
# 正常情况
x = (R[2,1] - R[1,2]) / (4 * w)
y = (R[0,2] - R[2,0]) / (4 * w)
z = (R[1,0] - R[0,1]) / (4 * w)
orientation = (x, y, z, w)
# 设置IK求解参数
num_joints = p.getNumJoints(self.robot_id)
lower_limits = []
upper_limits = []
joint_ranges = []
rest_poses = []
# 获取关节限制和默认姿态
for i in range(num_joints):
joint_info = p.getJointInfo(self.robot_id, i)
lower_limits.append(joint_info[8])
upper_limits.append(joint_info[9])
joint_ranges.append(joint_info[9] - joint_info[8])
rest_poses.append(0) # 可以设置一个较好的默认姿态
# 使用增强版IK求解器考虑碰撞避障
joint_poses = p.calculateInverseKinematics(
self.robot_id,
7, # end effector link index
position,
orientation,
lowerLimits=lower_limits,
upperLimits=upper_limits,
jointRanges=joint_ranges,
restPoses=rest_poses,
maxNumIterations=100,
residualThreshold=1e-4
)
# 分步移动到目标位置,同时检查碰撞
current_poses = [p.getJointState(self.robot_id, i)[0] for i in range(7)]
steps = 50 # 分50步移动
for step in range(steps):
# 线性插值计算中间位置
intermediate_poses = []
for current, target in zip(current_poses, joint_poses):
t = (step + 1) / steps
intermediate = current + (target - current) * t
intermediate_poses.append(intermediate)
# 设置关节位置
for i in range(7):
p.setJointMotorControl2(
self.robot_id,
i,
p.POSITION_CONTROL,
intermediate_poses[i]
)
# 执行一步模拟
p.stepSimulation()
# 检查碰撞
if p.getContactPoints(self.robot_id, self.turntable_id):
print("检测到潜在碰撞,停止移动")
return False
return True
def rotate_turntable(self, angle_degrees):
# 旋转转盘
current_pos, current_orn = p.getBasePositionAndOrientation(self.turntable_id)
current_orn = p.getEulerFromQuaternion(current_orn)
new_orn = list(current_orn)
new_orn[2] += np.radians(angle_degrees)
new_orn_quat = p.getQuaternionFromEuler(new_orn)
p.resetBasePositionAndOrientation(
self.turntable_id,
current_pos,
new_orn_quat
)
# 同时旋转目标物体
target_pos, target_orn = p.getBasePositionAndOrientation(self.target_id)
target_orn = p.getEulerFromQuaternion(target_orn)
# 更新目标物体的方向
target_orn = list(target_orn)
target_orn[2] += np.radians(angle_degrees)
target_orn_quat = p.getQuaternionFromEuler(target_orn)
# 计算物体新的位置(绕转盘中心旋转)
turntable_center = current_pos
relative_pos = np.array(target_pos) - np.array(turntable_center)
# 创建旋转矩阵
theta = np.radians(angle_degrees)
rotation_matrix = np.array([
[np.cos(theta), -np.sin(theta), 0],
[np.sin(theta), np.cos(theta), 0],
[0, 0, 1]
])
# 计算新的相对位置
new_relative_pos = rotation_matrix.dot(relative_pos)
new_pos = np.array(turntable_center) + new_relative_pos
# 更新目标物体的位置和方向
p.resetBasePositionAndOrientation(
self.target_id,
new_pos,
target_orn_quat
)
def get_camera_pose(self):
end_effector_link = 7 # Franka末端执行器的链接索引
state = p.getLinkState(self.robot_id, end_effector_link)
ee_pos = state[0] # 世界坐标系中的位置
camera_orn = state[1] # 世界坐标系中的朝向(四元数)
# 计算相机的视角矩阵
rot_matrix = p.getMatrixFromQuaternion(camera_orn)
rot_matrix = np.array(rot_matrix).reshape(3, 3)
# 相机的前向向量与末端执行器的x轴对齐
camera_forward = rot_matrix.dot(np.array([0, 0, 1])) # x轴方向
# 将相机位置向前偏移0.1米
offset = 0.12
camera_pos = np.array(ee_pos) + camera_forward * offset
camera_target = camera_pos + camera_forward
# 相机的上向量与末端执行器的z轴对齐
camera_up = rot_matrix.dot(np.array([1, 0, 0])) # z轴方向
return camera_pos, camera_target, camera_up
def take_picture(self):
camera_pos, camera_target, camera_up = self.get_camera_pose()
view_matrix = p.computeViewMatrix(
cameraEyePosition=camera_pos,
cameraTargetPosition=camera_target,
cameraUpVector=camera_up
)
projection_matrix = p.computeProjectionMatrixFOV(
fov=self.camera_params['fov'],
aspect=self.camera_params['width'] / self.camera_params['height'],
nearVal=self.camera_params['near'],
farVal=self.camera_params['far']
)
_,_,rgb_img,depth_img,segm_img = p.getCameraImage(
width=self.camera_params['width'],
height=self.camera_params['height'],
viewMatrix=view_matrix,
projectionMatrix=projection_matrix,
renderer=p.ER_BULLET_HARDWARE_OPENGL
)
depth_img = self.camera_params['far'] * self.camera_params['near'] / (
self.camera_params['far'] - (self.camera_params['far'] - self.camera_params['near']) * depth_img)
depth_img = np.array(depth_img)
segm_img = np.array(segm_img)
return depth_img, segm_img
def reset(self):
target_pos = [0.5, 0, 1]
target_orn = p.getQuaternionFromEuler([np.pi, 0, 0])
target_matrix = np.eye(4)
target_matrix[:3, 3] = target_pos
target_matrix[:3, :3] = np.asarray(p.getMatrixFromQuaternion(target_orn)).reshape(3,3)
self.move_robot_to_pose(target_matrix)
def init(self):
self.move_to(Simulator.INIT_GRIPPER_POSE)
def move_to(self, pose: np.ndarray):
#delta_degree, min_new_cam_to_world = ControlUtil.solve_display_table_rot_and_cam_to_world(pose)
#print(delta_degree)
min_new_cam_to_pybullet_world = Simulator.TURNTABLE_WORLD_TO_PYBULLET_WORLD@pose
self.move_to_cam_pose(min_new_cam_to_pybullet_world)
#self.rotate_turntable(delta_degree)
def __del__(self):
p.disconnect()
def create_experiment(self, backup_name=None):
return super().create_experiment(backup_name)
def load_experiment(self, backup_name=None):
super().load_experiment(backup_name)
def move_to_cam_pose(self, camera_pose: np.ndarray):
# 从相机位姿矩阵中提取位置和旋转矩阵
camera_pos = camera_pose[:3, 3]
R_camera = camera_pose[:3, :3]
# 相机的朝向向量z轴
forward = R_camera[:, 2]
# 由于相机与末端执行器之间有固定偏移,需要计算末端执行器位置
# 相机在末端执行器前方0.12米
gripper_pos = camera_pos - forward * 0.12
# 末端执行器的旋转矩阵需要考虑与相机坐标系的固定变换
# 假设相机的forward对应gripper的z轴相机的x轴对应gripper的x轴
R_gripper = R_camera
# 构建4x4齐次变换矩阵
gripper_pose = np.eye(4)
gripper_pose[:3, :3] = R_gripper
gripper_pose[:3, 3] = gripper_pos
print(gripper_pose)
# 移动机器人到计算出的位姿
return self.move_robot_to_pose(gripper_pose)

View File

@@ -16,47 +16,48 @@ from utils.pts import PtsUtil
class StrategyGenerator(Runner):
def __init__(self, config):
super().__init__(config)
self.load_experiment("generate")
self.load_experiment("generate_strategy")
self.status_info = {
"status_manager": status_manager,
"app_name": "generate",
"app_name": "generate_strategy",
"runner_name": "strategy_generator"
}
self.to_specified_dir = ConfigManager.get("runner", "generate", "to_specified_dir")
self.save_best_combined_pts = ConfigManager.get("runner", "generate", "save_best_combined_points")
self.save_mesh = ConfigManager.get("runner", "generate", "save_mesh")
self.filter_degree = ConfigManager.get("runner", "generate", "filter_degree")
self.overwrite = ConfigManager.get("runner", "generate", "overwrite")
self.seq_num = ConfigManager.get("runner","generate","seq_num")
self.overlap_area_threshold = ConfigManager.get("runner","generate","overlap_area_threshold")
self.compute_with_normal = ConfigManager.get("runner","generate","compute_with_normal")
self.scan_points_threshold = ConfigManager.get("runner","generate","scan_points_threshold")
def run(self):
dataset_name_list = ConfigManager.get("runner", "generate", "dataset_list")
voxel_threshold, overlap_threshold = ConfigManager.get("runner","generate","voxel_threshold"), ConfigManager.get("runner","generate","overlap_threshold")
self.save_pts = ConfigManager.get("runner","generate","save_points")
voxel_threshold = ConfigManager.get("runner","generate","voxel_threshold")
for dataset_idx in range(len(dataset_name_list)):
dataset_name = dataset_name_list[dataset_idx]
status_manager.set_progress("generate", "strategy_generator", "dataset", dataset_idx, len(dataset_name_list))
status_manager.set_progress("generate_strategy", "strategy_generator", "dataset", dataset_idx, len(dataset_name_list))
root_dir = ConfigManager.get("datasets", dataset_name, "root_dir")
model_dir = ConfigManager.get("datasets", dataset_name, "model_dir")
from_idx = ConfigManager.get("datasets",dataset_name,"from")
to_idx = ConfigManager.get("datasets",dataset_name,"to")
scene_name_list = os.listdir(root_dir)
if to_idx == -1:
to_idx = len(scene_name_list)
cnt = 0
total = len(scene_name_list)
for scene_name in scene_name_list:
total = len(scene_name_list[from_idx:to_idx])
Log.info(f"Processing Dataset: {dataset_name}, From: {from_idx}, To: {to_idx}")
for scene_name in scene_name_list[from_idx:to_idx]:
Log.info(f"({dataset_name})Processing [{cnt}/{total}]: {scene_name}")
status_manager.set_progress("generate", "strategy_generator", "scene", cnt, total)
diag = DataLoadUtil.get_bbox_diag(model_dir, scene_name)
voxel_threshold = diag*0.02
status_manager.set_status("generate", "strategy_generator", "voxel_threshold", voxel_threshold)
output_label_path = DataLoadUtil.get_label_path(root_dir, scene_name)
status_manager.set_progress("generate_strategy", "strategy_generator", "scene", cnt, total)
output_label_path = DataLoadUtil.get_label_path(root_dir, scene_name,0)
if os.path.exists(output_label_path) and not self.overwrite:
Log.info(f"Scene <{scene_name}> Already Exists, Skip")
cnt += 1
continue
self.generate_sequence(root_dir, model_dir, scene_name,voxel_threshold, overlap_threshold)
self.generate_sequence(root_dir, scene_name,voxel_threshold)
cnt += 1
status_manager.set_progress("generate", "strategy_generator", "scene", total, total)
status_manager.set_progress("generate", "strategy_generator", "dataset", len(dataset_name_list), len(dataset_name_list))
status_manager.set_progress("generate_strategy", "strategy_generator", "scene", total, total)
status_manager.set_progress("generate_strategy", "strategy_generator", "dataset", len(dataset_name_list), len(dataset_name_list))
def create_experiment(self, backup_name=None):
super().create_experiment(backup_name)
@@ -66,32 +67,61 @@ class StrategyGenerator(Runner):
def load_experiment(self, backup_name=None):
super().load_experiment(backup_name)
def generate_sequence(self, root, model_dir, scene_name, voxel_threshold, overlap_threshold):
status_manager.set_status("generate", "strategy_generator", "scene", scene_name)
def generate_sequence(self, root, scene_name, voxel_threshold):
status_manager.set_status("generate_strategy", "strategy_generator", "scene", scene_name)
frame_num = DataLoadUtil.get_scene_seq_length(root, scene_name)
model_points_normals = DataLoadUtil.load_points_normals(root, scene_name)
model_pts = model_points_normals[:,:3]
down_sampled_model_pts = PtsUtil.voxel_downsample_point_cloud(model_pts, voxel_threshold)
down_sampled_model_pts, idx = PtsUtil.voxel_downsample_point_cloud(model_pts, voxel_threshold, require_idx=True)
down_sampled_model_nrm = model_points_normals[idx, 3:]
pts_list = []
nrm_list = []
scan_points_indices_list = []
non_zero_cnt = 0
for frame_idx in range(frame_num):
path = DataLoadUtil.get_path(root, scene_name, frame_idx)
cam_params = DataLoadUtil.load_cam_info(path, binocular=True)
status_manager.set_progress("generate", "strategy_generator", "loading frame", frame_idx, frame_num)
point_cloud = DataLoadUtil.get_target_point_cloud_world_from_path(path, binocular=True)
#display_table = None #DataLoadUtil.get_target_point_cloud_world_from_path(path, binocular=True, target_mask_label=()) #TODO
sampled_point_cloud = ReconstructionUtil.filter_points(point_cloud, model_points_normals, cam_pose=cam_params["cam_to_world"], voxel_size=voxel_threshold, theta=self.filter_degree)
status_manager.set_progress("generate_strategy", "strategy_generator", "loading frame", frame_idx, frame_num)
pts_path = os.path.join(root,scene_name, "pts", f"{frame_idx}.npy")
nrm_path = os.path.join(root,scene_name, "nrm", f"{frame_idx}.npy")
idx_path = os.path.join(root,scene_name, "scan_points_indices", f"{frame_idx}.npy")
if self.save_pts:
pts_dir = os.path.join(root,scene_name, "pts")
if not os.path.exists(pts_dir):
os.makedirs(pts_dir)
np.savetxt(os.path.join(pts_dir, f"{frame_idx}.txt"), sampled_point_cloud)
pts_list.append(sampled_point_cloud)
status_manager.set_progress("generate", "strategy_generator", "loading frame", frame_num, frame_num)
pts = np.load(pts_path)
if self.compute_with_normal:
if pts.shape[0] == 0:
nrm = np.zeros((0,3))
else:
nrm = np.load(nrm_path)
nrm_list.append(nrm)
pts_list.append(pts)
indices = np.load(idx_path)
scan_points_indices_list.append(indices)
if pts.shape[0] > 0:
non_zero_cnt += 1
status_manager.set_progress("generate_strategy", "strategy_generator", "loading frame", frame_num, frame_num)
limited_useful_view, _, best_combined_pts = ReconstructionUtil.compute_next_best_view_sequence_with_overlap(down_sampled_model_pts, pts_list, threshold=voxel_threshold, overlap_threshold=overlap_threshold, status_info=self.status_info)
seq_num = min(self.seq_num, non_zero_cnt)
init_view_list = []
idx = 0
while len(init_view_list) < seq_num and idx < len(pts_list):
if pts_list[idx].shape[0] > 50:
init_view_list.append(idx)
idx += 1
seq_idx = 0
import time
for init_view in init_view_list:
status_manager.set_progress("generate_strategy", "strategy_generator", "computing sequence", seq_idx, len(init_view_list))
start = time.time()
if not self.compute_with_normal:
limited_useful_view, _, _ = ReconstructionUtil.compute_next_best_view_sequence(down_sampled_model_pts, pts_list, scan_points_indices_list = scan_points_indices_list,init_view=init_view,
threshold=voxel_threshold, scan_points_threshold=self.scan_points_threshold, overlap_area_threshold=self.overlap_area_threshold, status_info=self.status_info)
else:
limited_useful_view, _, _ = ReconstructionUtil.compute_next_best_view_sequence_with_normal(down_sampled_model_pts, down_sampled_model_nrm, pts_list, nrm_list, scan_points_indices_list = scan_points_indices_list,init_view=init_view,
threshold=voxel_threshold, scan_points_threshold=self.scan_points_threshold, overlap_area_threshold=self.overlap_area_threshold, status_info=self.status_info)
end = time.time()
print(f"Time: {end-start}")
data_pairs = self.generate_data_pairs(limited_useful_view)
seq_save_data = {
"data_pairs": data_pairs,
@@ -99,22 +129,17 @@ class StrategyGenerator(Runner):
"max_coverage_rate": limited_useful_view[-1][1]
}
status_manager.set_status("generate", "strategy_generator", "max_coverage_rate", limited_useful_view[-1][1])
status_manager.set_status("generate_strategy", "strategy_generator", "max_coverage_rate", limited_useful_view[-1][1])
Log.success(f"Scene <{scene_name}> Finished, Max Coverage Rate: {limited_useful_view[-1][1]}, Best Sequence length: {len(limited_useful_view)}")
output_label_path = DataLoadUtil.get_label_path(root, scene_name)
output_best_reconstructed_pts_path = os.path.join(root,scene_name, f"best_reconstructed_pts.txt")
output_label_path = DataLoadUtil.get_label_path(root, scene_name, seq_idx)
with open(output_label_path, 'w') as f:
json.dump(seq_save_data, f)
seq_idx += 1
status_manager.set_progress("generate_strategy", "strategy_generator", "computing sequence", len(init_view_list), len(init_view_list))
if self.save_best_combined_pts:
np.savetxt(output_best_reconstructed_pts_path, best_combined_pts)
if self.save_mesh:
DataLoadUtil.save_target_mesh_at_world_space(root, model_dir, scene_name)
DataLoadUtil.save_downsampled_world_model_points(root, scene_name, down_sampled_model_pts)
def generate_data_pairs(self, useful_view):
data_pairs = []

View File

@@ -9,7 +9,8 @@ class ViewGenerator(Runner):
self.config_path = config_path
def run(self):
subprocess.run(['blender', '-b', '-P', '../blender/run_blender.py', '--', self.config_path])
result = subprocess.run(['/home/hofee/blender-4.0.2-linux-x64/blender', '-b', '-P', '../blender/run_blender.py', '--', self.config_path])
print()
def create_experiment(self, backup_name=None):
return super().create_experiment(backup_name)

59
utils/control.py Normal file
View File

@@ -0,0 +1,59 @@
import numpy as np
from scipy.spatial.transform import Rotation as R
import time
class ControlUtil:
curr_rotation = 0
@staticmethod
def check_limit(new_cam_to_world):
if new_cam_to_world[0,3] < 0 or new_cam_to_world[1,3] > 0:
# if new_cam_to_world[0,3] > 0:
return False
x = abs(new_cam_to_world[0,3])
y = abs(new_cam_to_world[1,3])
tan_y_x = y/x
min_angle = 0 / 180 * np.pi
max_angle = 90 / 180 * np.pi
if tan_y_x < np.tan(min_angle) or tan_y_x > np.tan(max_angle):
return False
return True
@staticmethod
def solve_display_table_rot_and_cam_to_world(cam_to_world: np.ndarray) -> tuple:
if ControlUtil.check_limit(cam_to_world):
return 0, cam_to_world
else:
min_display_table_rot = 180
min_new_cam_to_world = None
for display_table_rot in np.linspace(0.1,360, 1800):
new_world_to_world = ControlUtil.get_z_axis_rot_mat(display_table_rot)
new_cam_to_new_world = cam_to_world
new_cam_to_world = new_world_to_world @ new_cam_to_new_world
if ControlUtil.check_limit(new_cam_to_world):
if display_table_rot < min_display_table_rot:
min_display_table_rot, min_new_cam_to_world = display_table_rot, new_cam_to_world
if abs(display_table_rot - 360) < min_display_table_rot:
min_display_table_rot, min_new_cam_to_world = display_table_rot - 360, new_cam_to_world
if min_new_cam_to_world is None:
raise ValueError("No valid display table rotation found")
delta_degree = min_display_table_rot - ControlUtil.curr_rotation
ControlUtil.curr_rotation = min_display_table_rot
return delta_degree, min_new_cam_to_world
@staticmethod
def get_z_axis_rot_mat(degree):
radian = np.radians(degree)
return np.array([
[np.cos(radian), -np.sin(radian), 0, 0],
[np.sin(radian), np.cos(radian), 0, 0],
[0, 0, 1, 0],
[0, 0, 0, 1]
])

View File

@@ -3,9 +3,45 @@ import numpy as np
import json
import cv2
import trimesh
import torch
import OpenEXR
import Imath
from utils.pts import PtsUtil
class DataLoadUtil:
TABLE_POSITION = np.asarray([0, 0, 0.8215])
@staticmethod
def load_exr_image(file_path):
exr_file = OpenEXR.InputFile(file_path)
header = exr_file.header()
dw = header['dataWindow']
width = dw.max.x - dw.min.x + 1
height = dw.max.y - dw.min.y + 1
float_channels = ['R', 'G', 'B']
img_data = []
for channel in float_channels:
channel_data = exr_file.channel(channel)
img_data.append(np.frombuffer(channel_data, dtype=np.float16).reshape((height, width)))
img = np.stack(img_data, axis=-1)
return img
@staticmethod
def get_display_table_info(root, scene_name):
scene_info = DataLoadUtil.load_scene_info(root, scene_name)
display_table_info = scene_info["display_table"]
return display_table_info
@staticmethod
def get_display_table_top(root, scene_name):
display_table_height = DataLoadUtil.get_display_table_info(root, scene_name)[
"height"
]
display_table_top = DataLoadUtil.TABLE_POSITION + np.asarray(
[0, 0, display_table_height]
)
return display_table_top
@staticmethod
def get_path(root, scene_name, frame_idx):
@@ -13,13 +49,18 @@ class DataLoadUtil:
return path
@staticmethod
def get_label_path(root, scene_name):
path = os.path.join(root,scene_name, f"label.json")
return path
def get_label_num(root, scene_name):
label_dir = os.path.join(root, scene_name, "label")
if not os.path.exists(label_dir):
return 0
return len(os.listdir(label_dir))
@staticmethod
def get_sampled_model_points_path(root, scene_name):
path = os.path.join(root,scene_name, f"sampled_model_points.txt")
def get_label_path(root, scene_name, seq_idx):
label_dir = os.path.join(root, scene_name, "label")
if not os.path.exists(label_dir):
os.makedirs(label_dir)
path = os.path.join(label_dir, f"{seq_idx}.json")
return path
@staticmethod
@@ -27,17 +68,6 @@ class DataLoadUtil:
camera_params_path = os.path.join(root, scene_name, "camera_params")
return len(os.listdir(camera_params_path))
@staticmethod
def load_downsampled_world_model_points(root, scene_name):
model_path = DataLoadUtil.get_sampled_model_points_path(root, scene_name)
model_points = np.loadtxt(model_path)
return model_points
@staticmethod
def save_downsampled_world_model_points(root, scene_name, model_points):
model_path = DataLoadUtil.get_sampled_model_points_path(root, scene_name)
np.savetxt(model_path, model_points)
@staticmethod
def load_mesh_at(model_dir, object_name, world_object_pose):
model_path = os.path.join(model_dir, object_name, "mesh.obj")
@@ -53,30 +83,6 @@ class DataLoadUtil:
diagonal_length = np.linalg.norm(bbox)
return diagonal_length
@staticmethod
def save_mesh_at(model_dir, output_dir, object_name, scene_name, world_object_pose):
mesh = DataLoadUtil.load_mesh_at(model_dir, object_name, world_object_pose)
model_path = os.path.join(output_dir, scene_name, "world_mesh.obj")
mesh.export(model_path)
@staticmethod
def save_target_mesh_at_world_space(root, model_dir, scene_name):
scene_info = DataLoadUtil.load_scene_info(root, scene_name)
target_name = scene_info["target_name"]
transformation = scene_info[target_name]
location = transformation["location"]
rotation_euler = transformation["rotation_euler"]
pose_mat = trimesh.transformations.euler_matrix(*rotation_euler)
pose_mat[:3, 3] = location
mesh = DataLoadUtil.load_mesh_at(model_dir, target_name, pose_mat)
mesh_dir = os.path.join(root, scene_name, "mesh")
if not os.path.exists(mesh_dir):
os.makedirs(mesh_dir)
model_path = os.path.join(mesh_dir, "world_target_mesh.obj")
mesh.export(model_path)
@staticmethod
def load_scene_info(root, scene_name):
scene_info_path = os.path.join(root, scene_name, "scene_info.json")
@@ -85,15 +91,11 @@ class DataLoadUtil:
return scene_info
@staticmethod
def load_target_object_pose(root, scene_name):
scene_info = DataLoadUtil.load_scene_info(root, scene_name)
target_name = scene_info["target_name"]
transformation = scene_info[target_name]
location = transformation["location"]
rotation_euler = transformation["rotation_euler"]
pose_mat = trimesh.transformations.euler_matrix(*rotation_euler)
pose_mat[:3, 3] = location
return pose_mat
def load_target_pts_num_dict(root, scene_name):
target_pts_num_path = os.path.join(root, scene_name, "target_pts_num.json")
with open(target_pts_num_path, "r") as f:
target_pts_num_dict = json.load(f)
return target_pts_num_dict
@staticmethod
def load_depth(path, min_depth=0.01, max_depth=5.0, binocular=False):
@@ -107,82 +109,187 @@ class DataLoadUtil:
return depth_meters
if binocular:
depth_path_L = os.path.join(os.path.dirname(path), "depth", os.path.basename(path) + "_L.png")
depth_path_R = os.path.join(os.path.dirname(path), "depth", os.path.basename(path) + "_R.png")
depth_meters_L = load_depth_from_real_path(depth_path_L, min_depth, max_depth)
depth_meters_R = load_depth_from_real_path(depth_path_R, min_depth, max_depth)
depth_path_L = os.path.join(
os.path.dirname(path), "depth", os.path.basename(path) + "_L.png"
)
depth_path_R = os.path.join(
os.path.dirname(path), "depth", os.path.basename(path) + "_R.png"
)
depth_meters_L = load_depth_from_real_path(
depth_path_L, min_depth, max_depth
)
depth_meters_R = load_depth_from_real_path(
depth_path_R, min_depth, max_depth
)
return depth_meters_L, depth_meters_R
else:
depth_path = os.path.join(os.path.dirname(path), "depth", os.path.basename(path) + ".png")
depth_path = os.path.join(
os.path.dirname(path), "depth", os.path.basename(path) + ".png"
)
depth_meters = load_depth_from_real_path(depth_path, min_depth, max_depth)
return depth_meters
@staticmethod
def load_seg(path, binocular=False):
if binocular:
def load_seg(path, binocular=False, left_only=False):
if binocular and not left_only:
def clean_mask(mask_image):
green = [0, 255, 0, 255]
red = [255, 0, 0, 255]
green = [0, 255, 0]
red = [255, 0, 0]
threshold = 2
mask_image = np.where(np.abs(mask_image - green) <= threshold, green, mask_image)
mask_image = np.where(np.abs(mask_image - red) <= threshold, red, mask_image)
mask_image = np.where(
np.abs(mask_image - green) <= threshold, green, mask_image
)
mask_image = np.where(
np.abs(mask_image - red) <= threshold, red, mask_image
)
return mask_image
mask_path_L = os.path.join(os.path.dirname(path), "mask", os.path.basename(path) + "_L.png")
mask_path_L = os.path.join(
os.path.dirname(path), "mask", os.path.basename(path) + "_L.png"
)
mask_image_L = clean_mask(cv2.imread(mask_path_L, cv2.IMREAD_UNCHANGED))
mask_path_R = os.path.join(os.path.dirname(path), "mask", os.path.basename(path) + "_R.png")
mask_path_R = os.path.join(
os.path.dirname(path), "mask", os.path.basename(path) + "_R.png"
)
mask_image_R = clean_mask(cv2.imread(mask_path_R, cv2.IMREAD_UNCHANGED))
return mask_image_L, mask_image_R
else:
mask_path = os.path.join(os.path.dirname(path), "mask", os.path.basename(path) + ".png")
mask_image = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
if binocular and left_only:
mask_path = os.path.join(
os.path.dirname(path), "mask", os.path.basename(path) + "_L.png"
)
else:
mask_path = os.path.join(
os.path.dirname(path), "mask", os.path.basename(path) + ".png"
)
mask_image = cv2.imread(mask_path, cv2.IMREAD_UNCHANGED)
return mask_image
@staticmethod
def load_normal(path, binocular=False, left_only=False, file_type="exr"):
if binocular and not left_only:
normal_path_L = os.path.join(
os.path.dirname(path), "normal", os.path.basename(path) + f"_L.{file_type}"
)
normal_image_L = DataLoadUtil.load_exr_image(normal_path_L)
normal_path_R = os.path.join(
os.path.dirname(path), "normal", os.path.basename(path) + f"_R.{file_type}"
)
normal_image_R = DataLoadUtil.load_exr_image(normal_path_R)
normalized_normal_image_L = normal_image_L * 2.0 - 1.0
normalized_normal_image_R = normal_image_R * 2.0 - 1.0
return normalized_normal_image_L, normalized_normal_image_R
else:
if binocular and left_only:
normal_path = os.path.join(
os.path.dirname(path), "normal", os.path.basename(path) + f"_L.{file_type}"
)
else:
normal_path = os.path.join(
os.path.dirname(path), "normal", os.path.basename(path) + f".{file_type}"
)
normal_image = DataLoadUtil.load_exr_image(normal_path)
normalized_normal_image = normal_image * 2.0 - 1.0
return normalized_normal_image
@staticmethod
def load_label(path):
with open(path, 'r') as f:
with open(path, "r") as f:
label_data = json.load(f)
return label_data
@staticmethod
def load_rgb(path):
rgb_path = os.path.join(os.path.dirname(path), "rgb", os.path.basename(path) + ".png")
rgb_image = cv2.imread(rgb_path, cv2.IMREAD_COLOR)
return rgb_image
def load_from_preprocessed_pts(path, file_type="npy"):
npy_path = os.path.join(
os.path.dirname(path), "pts", os.path.basename(path) + "." + file_type
)
if file_type == "txt":
pts = np.loadtxt(npy_path)
else:
pts = np.load(npy_path)
return pts
@staticmethod
def load_from_preprocessed_nrm(path, file_type="npy"):
npy_path = os.path.join(
os.path.dirname(path), "nrm", os.path.basename(path) + "." + file_type
)
if file_type == "txt":
nrm = np.loadtxt(npy_path)
else:
nrm = np.load(npy_path)
return nrm
@staticmethod
def cam_pose_transformation(cam_pose_before):
offset = np.asarray([
[1, 0, 0, 0],
[0, -1, 0, 0],
[0, 0, -1, 0],
[0, 0, 0, 1]])
offset = np.asarray([[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]])
cam_pose_after = cam_pose_before @ offset
return cam_pose_after
@staticmethod
def load_cam_info(path, binocular=False):
camera_params_path = os.path.join(os.path.dirname(path), "camera_params", os.path.basename(path) + ".json")
with open(camera_params_path, 'r') as f:
def load_cam_info(path, binocular=False, display_table_as_world_space_origin=True):
scene_dir = os.path.dirname(path)
root_dir = os.path.dirname(scene_dir)
scene_name = os.path.basename(scene_dir)
camera_params_path = os.path.join(
os.path.dirname(path), "camera_params", os.path.basename(path) + ".json"
)
with open(camera_params_path, "r") as f:
label_data = json.load(f)
cam_to_world = np.asarray(label_data["extrinsic"])
cam_to_world = DataLoadUtil.cam_pose_transformation(cam_to_world)
if display_table_as_world_space_origin:
world_to_display_table = np.eye(4)
world_to_display_table[:3, 3] = -DataLoadUtil.get_display_table_top(
root_dir, scene_name
)
cam_to_world = np.dot(world_to_display_table, cam_to_world)
cam_intrinsic = np.asarray(label_data["intrinsic"])
cam_info = {
"cam_to_world": cam_to_world,
"cam_intrinsic": cam_intrinsic,
"far_plane": label_data["far_plane"],
"near_plane": label_data["near_plane"]
"near_plane": label_data["near_plane"],
}
if binocular:
cam_to_world_R = np.asarray(label_data["extrinsic_R"])
cam_to_world_R = DataLoadUtil.cam_pose_transformation(cam_to_world_R)
cam_to_world_O = np.asarray(label_data["extrinsic_cam_object"])
cam_to_world_O = DataLoadUtil.cam_pose_transformation(cam_to_world_O)
if display_table_as_world_space_origin:
cam_to_world_O = np.dot(world_to_display_table, cam_to_world_O)
cam_to_world_R = np.dot(world_to_display_table, cam_to_world_R)
cam_info["cam_to_world_O"] = cam_to_world_O
cam_info["cam_to_world_R"] = cam_to_world_R
return cam_info
@staticmethod
def get_target_point_cloud(depth, cam_intrinsic, cam_extrinsic, mask, target_mask_label=(0,255,0,255)):
def get_real_cam_O_from_cam_L(
cam_L, cam_O_to_cam_L, scene_path, display_table_as_world_space_origin=True
):
root_dir = os.path.dirname(scene_path)
scene_name = os.path.basename(scene_path)
if isinstance(cam_L, torch.Tensor):
cam_L = cam_L.cpu().numpy()
nO_to_display_table_pose = cam_L @ cam_O_to_cam_L
if display_table_as_world_space_origin:
display_table_to_world = np.eye(4)
display_table_to_world[:3, 3] = DataLoadUtil.get_display_table_top(
root_dir, scene_name
)
nO_to_world_pose = np.dot(display_table_to_world, nO_to_display_table_pose)
nO_to_world_pose = DataLoadUtil.cam_pose_transformation(nO_to_world_pose)
return nO_to_world_pose
@staticmethod
def get_target_point_cloud(
depth, cam_intrinsic, cam_extrinsic, mask, target_mask_label=(0, 255, 0, 255), require_full_points=False
):
h, w = depth.shape
i, j = np.meshgrid(np.arange(w), np.arange(h), indexing='xy')
i, j = np.meshgrid(np.arange(w), np.arange(h), indexing="xy")
z = depth
x = (i - cam_intrinsic[0, 2]) * z / cam_intrinsic[0, 0]
@@ -194,72 +301,91 @@ class DataLoadUtil:
target_mask = (mask == target_mask_label).all(axis=-1)
target_points_camera = points_camera[target_mask]
target_points_camera_aug = np.concatenate([target_points_camera, np.ones((target_points_camera.shape[0], 1))], axis=-1)
target_points_camera_aug = np.concatenate(
[target_points_camera, np.ones((target_points_camera.shape[0], 1))], axis=-1
)
target_points_world = np.dot(cam_extrinsic, target_points_camera_aug.T).T[:, :3]
return {
data = {
"points_world": target_points_world,
"points_camera": target_points_camera
"points_camera": target_points_camera,
}
return data
@staticmethod
def get_point_cloud(depth, cam_intrinsic, cam_extrinsic):
h, w = depth.shape
i, j = np.meshgrid(np.arange(w), np.arange(h), indexing='xy')
i, j = np.meshgrid(np.arange(w), np.arange(h), indexing="xy")
z = depth
x = (i - cam_intrinsic[0, 2]) * z / cam_intrinsic[0, 0]
y = (j - cam_intrinsic[1, 2]) * z / cam_intrinsic[1, 1]
points_camera = np.stack((x, y, z), axis=-1).reshape(-1, 3)
points_camera_aug = np.concatenate([points_camera, np.ones((points_camera.shape[0], 1))], axis=-1)
points_camera_aug = np.concatenate(
[points_camera, np.ones((points_camera.shape[0], 1))], axis=-1
)
points_world = np.dot(cam_extrinsic, points_camera_aug.T).T[:, :3]
return {
"points_world": points_world,
"points_camera": points_camera
}
return {"points_world": points_world, "points_camera": points_camera}
@staticmethod
def get_target_point_cloud_world_from_path(path, binocular=False, random_downsample_N=65536, voxel_size = 0.005, target_mask_label=(0,255,0,255)):
def get_target_point_cloud_world_from_path(
path,
binocular=False,
random_downsample_N=65536,
voxel_size=0.005,
target_mask_label=(0, 255, 0, 255),
display_table_mask_label=(0, 0, 255, 255),
get_display_table_pts=False,
require_normal=False,
):
cam_info = DataLoadUtil.load_cam_info(path, binocular=binocular)
if binocular:
depth_L, depth_R = DataLoadUtil.load_depth(path, cam_info['near_plane'], cam_info['far_plane'], binocular=True)
depth_L, depth_R = DataLoadUtil.load_depth(
path, cam_info["near_plane"], cam_info["far_plane"], binocular=True
)
mask_L, mask_R = DataLoadUtil.load_seg(path, binocular=True)
point_cloud_L = DataLoadUtil.get_target_point_cloud(depth_L, cam_info['cam_intrinsic'], cam_info['cam_to_world'], mask_L, target_mask_label)['points_world']
point_cloud_R = DataLoadUtil.get_target_point_cloud(depth_R, cam_info['cam_intrinsic'], cam_info['cam_to_world_R'], mask_R, target_mask_label)['points_world']
point_cloud_L = PtsUtil.random_downsample_point_cloud(point_cloud_L, random_downsample_N)
point_cloud_R = PtsUtil.random_downsample_point_cloud(point_cloud_R, random_downsample_N)
overlap_points = DataLoadUtil.get_overlapping_points(point_cloud_L, point_cloud_R, voxel_size)
point_cloud_L = DataLoadUtil.get_target_point_cloud(
depth_L,
cam_info["cam_intrinsic"],
cam_info["cam_to_world"],
mask_L,
target_mask_label,
)["points_world"]
point_cloud_R = DataLoadUtil.get_target_point_cloud(
depth_R,
cam_info["cam_intrinsic"],
cam_info["cam_to_world_R"],
mask_R,
target_mask_label,
)["points_world"]
point_cloud_L = PtsUtil.random_downsample_point_cloud(
point_cloud_L, random_downsample_N
)
point_cloud_R = PtsUtil.random_downsample_point_cloud(
point_cloud_R, random_downsample_N
)
overlap_points = PtsUtil.get_overlapping_points(
point_cloud_L, point_cloud_R, voxel_size
)
return overlap_points
else:
depth = DataLoadUtil.load_depth(path, cam_info['near_plane'], cam_info['far_plane'])
depth = DataLoadUtil.load_depth(
path, cam_info["near_plane"], cam_info["far_plane"]
)
mask = DataLoadUtil.load_seg(path)
point_cloud = DataLoadUtil.get_target_point_cloud(depth, cam_info['cam_intrinsic'], cam_info['cam_to_world'], mask)['points_world']
point_cloud = DataLoadUtil.get_target_point_cloud(
depth, cam_info["cam_intrinsic"], cam_info["cam_to_world"], mask
)["points_world"]
return point_cloud
@staticmethod
def voxelize_points(points, voxel_size):
voxel_indices = np.floor(points / voxel_size).astype(np.int32)
unique_voxels = np.unique(voxel_indices, axis=0, return_inverse=True)
return unique_voxels
@staticmethod
def get_overlapping_points(point_cloud_L, point_cloud_R, voxel_size=0.005):
voxels_L, indices_L = DataLoadUtil.voxelize_points(point_cloud_L, voxel_size)
voxels_R, _ = DataLoadUtil.voxelize_points(point_cloud_R, voxel_size)
voxel_indices_L = voxels_L.view([('', voxels_L.dtype)]*3)
voxel_indices_R = voxels_R.view([('', voxels_R.dtype)]*3)
overlapping_voxels = np.intersect1d(voxel_indices_L, voxel_indices_R)
mask_L = np.isin(indices_L, np.where(np.isin(voxel_indices_L, overlapping_voxels))[0])
overlapping_points = point_cloud_L[mask_L]
return overlapping_points
@staticmethod
def load_points_normals(root, scene_name):
def load_points_normals(root, scene_name, display_table_as_world_space_origin=True):
points_path = os.path.join(root, scene_name, "points_and_normals.txt")
points_normals = np.loadtxt(points_path)
if display_table_as_world_space_origin:
points_normals[:, :3] = points_normals[
:, :3
] - DataLoadUtil.get_display_table_top(root, scene_name)
return points_normals

View File

@@ -1,14 +1,75 @@
import numpy as np
import open3d as o3d
import torch
class PtsUtil:
@staticmethod
def voxel_downsample_point_cloud(point_cloud, voxel_size=0.005):
o3d_pc = o3d.geometry.PointCloud()
o3d_pc.points = o3d.utility.Vector3dVector(point_cloud)
downsampled_pc = o3d_pc.voxel_down_sample(voxel_size)
return np.asarray(downsampled_pc.points)
def voxel_downsample_point_cloud(point_cloud, voxel_size=0.005, require_idx=False):
voxel_indices = np.floor(point_cloud / voxel_size).astype(np.int32)
if require_idx:
_, inverse, counts = np.unique(voxel_indices, axis=0, return_inverse=True, return_counts=True)
idx_sort = np.argsort(inverse)
idx_unique = idx_sort[np.cumsum(counts)-counts]
downsampled_points = point_cloud[idx_unique]
return downsampled_points, idx_unique
else:
unique_voxels = np.unique(voxel_indices, axis=0, return_inverse=True)
return unique_voxels[0]*voxel_size
@staticmethod
def voxel_downsample_point_cloud_random(point_cloud, voxel_size=0.005, require_idx=False):
voxel_indices = np.floor(point_cloud / voxel_size).astype(np.int32)
unique_voxels, inverse, counts = np.unique(voxel_indices, axis=0, return_inverse=True, return_counts=True)
idx_sort = np.argsort(inverse)
idx_unique = idx_sort[np.cumsum(counts)-counts]
downsampled_points = point_cloud[idx_unique]
if require_idx:
return downsampled_points, inverse
return downsampled_points
@staticmethod
def random_downsample_point_cloud(point_cloud, num_points, require_idx=False):
if point_cloud.shape[0] == 0:
if require_idx:
return point_cloud, np.array([])
return point_cloud
idx = np.random.choice(len(point_cloud), num_points, replace=True)
if require_idx:
return point_cloud[idx], idx
return point_cloud[idx]
@staticmethod
def fps_downsample_point_cloud(point_cloud, num_points, require_idx=False):
N = point_cloud.shape[0]
mask = np.zeros(N, dtype=bool)
sampled_indices = np.zeros(num_points, dtype=int)
sampled_indices[0] = np.random.randint(0, N)
distances = np.linalg.norm(point_cloud - point_cloud[sampled_indices[0]], axis=1)
for i in range(1, num_points):
farthest_index = np.argmax(distances)
sampled_indices[i] = farthest_index
mask[farthest_index] = True
new_distances = np.linalg.norm(point_cloud - point_cloud[farthest_index], axis=1)
distances = np.minimum(distances, new_distances)
sampled_points = point_cloud[sampled_indices]
if require_idx:
return sampled_points, sampled_indices
return sampled_points
@staticmethod
def random_downsample_point_cloud_tensor(point_cloud, num_points):
idx = torch.randint(0, len(point_cloud), (num_points,))
return point_cloud[idx]
@staticmethod
def voxelize_points(points, voxel_size):
voxel_indices = np.floor(points / voxel_size).astype(np.int32)
unique_voxels = np.unique(voxel_indices, axis=0, return_inverse=True)
return unique_voxels
@staticmethod
def transform_point_cloud(points, pose_mat):
@@ -17,6 +78,40 @@ class PtsUtil:
return points_h[:, :3]
@staticmethod
def random_downsample_point_cloud(point_cloud, num_points):
idx = np.random.choice(len(point_cloud), num_points, replace=True)
return point_cloud[idx]
def get_overlapping_points(point_cloud_L, point_cloud_R, voxel_size=0.005, require_idx=False):
voxels_L, indices_L = PtsUtil.voxelize_points(point_cloud_L, voxel_size)
voxels_R, _ = PtsUtil.voxelize_points(point_cloud_R, voxel_size)
voxel_indices_L = voxels_L.view([("", voxels_L.dtype)] * 3)
voxel_indices_R = voxels_R.view([("", voxels_R.dtype)] * 3)
overlapping_voxels = np.intersect1d(voxel_indices_L, voxel_indices_R)
mask_L = np.isin(
indices_L, np.where(np.isin(voxel_indices_L, overlapping_voxels))[0]
)
overlapping_points = point_cloud_L[mask_L]
if require_idx:
return overlapping_points, mask_L
return overlapping_points
@staticmethod
def filter_points(points, normals, cam_pose, theta_limit=45, z_range=(0.2, 0.45)):
""" filter with normal """
normals_normalized = normals / np.linalg.norm(normals, axis=1, keepdims=True)
cos_theta = np.dot(normals_normalized, np.array([0, 0, 1]))
theta = np.arccos(cos_theta) * 180 / np.pi
idx = theta < theta_limit
filtered_sampled_points = points[idx]
filtered_normals = normals[idx]
""" filter with z range """
points_cam = PtsUtil.transform_point_cloud(filtered_sampled_points, np.linalg.inv(cam_pose))
idx = (points_cam[:, 2] > z_range[0]) & (points_cam[:, 2] < z_range[1])
z_filtered_points = filtered_sampled_points[idx]
z_filtered_normals = filtered_normals[idx]
return z_filtered_points[:, :3], z_filtered_normals
@staticmethod
def point_to_hash(point, voxel_size):
return tuple(np.floor(point / voxel_size).astype(int))

View File

@@ -8,79 +8,123 @@ class ReconstructionUtil:
def compute_coverage_rate(target_point_cloud, combined_point_cloud, threshold=0.01):
kdtree = cKDTree(combined_point_cloud)
distances, _ = kdtree.query(target_point_cloud)
covered_points = np.sum(distances < threshold)
coverage_rate = covered_points / target_point_cloud.shape[0]
return coverage_rate
covered_points_num = np.sum(distances < threshold*2)
coverage_rate = covered_points_num / target_point_cloud.shape[0]
return coverage_rate, covered_points_num
@staticmethod
def compute_overlap_rate(new_point_cloud, combined_point_cloud, threshold=0.01):
def compute_coverage_rate_with_normal(target_point_cloud, combined_point_cloud, target_normal, combined_normal, threshold=0.01, normal_threshold=0.1):
kdtree = cKDTree(combined_point_cloud)
distances, indices = kdtree.query(target_point_cloud)
is_covered_by_distance = distances < threshold*2
normal_dots = np.einsum('ij,ij->i', target_normal, combined_normal[indices])
is_covered_by_normal = normal_dots > normal_threshold
pts_nrm_target = np.hstack([target_point_cloud, target_normal])
np.savetxt("pts_nrm_target.txt", pts_nrm_target)
pts_nrm_combined = np.hstack([combined_point_cloud, combined_normal])
np.savetxt("pts_nrm_combined.txt", pts_nrm_combined)
import ipdb; ipdb.set_trace()
covered_points_num = np.sum(is_covered_by_distance & is_covered_by_normal)
coverage_rate = covered_points_num / target_point_cloud.shape[0]
return coverage_rate, covered_points_num
@staticmethod
def check_overlap(new_point_cloud, combined_point_cloud, overlap_area_threshold=25, voxel_size=0.01, require_new_added_pts_num=False):
kdtree = cKDTree(combined_point_cloud)
distances, _ = kdtree.query(new_point_cloud)
overlapping_points = np.sum(distances < threshold)
overlap_rate = overlapping_points / new_point_cloud.shape[0]
return overlap_rate
@staticmethod
def combine_point_with_view_sequence(point_list, view_sequence):
selected_views = []
for view_index, _ in view_sequence:
selected_views.append(point_list[view_index])
return np.vstack(selected_views)
@staticmethod
def compute_next_view_coverage_list(views, combined_point_cloud, target_point_cloud, threshold=0.01):
best_view = None
best_coverage_increase = -1
current_coverage = ReconstructionUtil.compute_coverage_rate(target_point_cloud, combined_point_cloud, threshold)
for view_index, view in enumerate(views):
candidate_views = combined_point_cloud + [view]
down_sampled_combined_point_cloud = PtsUtil.voxel_downsample_point_cloud(candidate_views, threshold)
new_coverage = ReconstructionUtil.compute_coverage_rate(target_point_cloud, down_sampled_combined_point_cloud, threshold)
coverage_increase = new_coverage - current_coverage
if coverage_increase > best_coverage_increase:
best_coverage_increase = coverage_increase
best_view = view_index
return best_view, best_coverage_increase
overlapping_points_num = np.sum(distances < voxel_size*2)
cm = 0.01
voxel_size_cm = voxel_size / cm
overlap_area = overlapping_points_num * voxel_size_cm * voxel_size_cm
if require_new_added_pts_num:
return overlap_area > overlap_area_threshold, len(new_point_cloud)-np.sum(distances < voxel_size*1.2)
return overlap_area > overlap_area_threshold
@staticmethod
def compute_next_best_view_sequence_with_overlap(target_point_cloud, point_cloud_list, display_table_point_cloud_list = None,threshold=0.01, overlap_threshold=0.3, status_info=None):
selected_views = []
current_coverage = 0.0
def get_new_added_points(old_combined_pts, new_pts, threshold=0.005):
if old_combined_pts.size == 0:
return new_pts
if new_pts.size == 0:
return np.array([])
tree = cKDTree(old_combined_pts)
distances, _ = tree.query(new_pts, k=1)
new_added_points = new_pts[distances > threshold]
return new_added_points
@staticmethod
def compute_next_best_view_sequence(target_point_cloud, point_cloud_list, scan_points_indices_list, threshold=0.01, overlap_area_threshold=25, init_view = 0, scan_points_threshold=5, status_info=None):
selected_views = [init_view]
combined_point_cloud = point_cloud_list[init_view]
history_indices = [scan_points_indices_list[init_view]]
max_rec_pts = np.vstack(point_cloud_list)
downsampled_max_rec_pts = PtsUtil.voxel_downsample_point_cloud(max_rec_pts, threshold)
combined_point_cloud = PtsUtil.voxel_downsample_point_cloud(combined_point_cloud, threshold)
max_rec_pts_num = downsampled_max_rec_pts.shape[0]
max_real_rec_pts_coverage, _ = ReconstructionUtil.compute_coverage_rate(target_point_cloud, downsampled_max_rec_pts, threshold)
new_coverage, new_covered_num = ReconstructionUtil.compute_coverage_rate(downsampled_max_rec_pts, combined_point_cloud, threshold)
current_coverage = new_coverage
current_covered_num = new_covered_num
remaining_views = list(range(len(point_cloud_list)))
view_sequence = []
view_sequence = [(init_view, current_coverage)]
cnt_processed_view = 0
remaining_views.remove(init_view)
curr_rec_pts_num = combined_point_cloud.shape[0]
drop_output_ratio = 0.4
import time
while remaining_views:
best_view = None
best_coverage_increase = -1
best_combined_point_cloud = None
best_covered_num = 0
for view_index in remaining_views:
if np.random.rand() < drop_output_ratio:
continue
if point_cloud_list[view_index].shape[0] == 0:
continue
if selected_views:
combined_old_point_cloud = np.vstack(selected_views)
down_sampled_old_point_cloud = PtsUtil.voxel_downsample_point_cloud(combined_old_point_cloud,threshold)
down_sampled_new_view_point_cloud = PtsUtil.voxel_downsample_point_cloud(point_cloud_list[view_index],threshold)
overlap_rate = ReconstructionUtil.compute_overlap_rate(down_sampled_new_view_point_cloud,down_sampled_old_point_cloud, threshold)
if overlap_rate < overlap_threshold:
new_scan_points_indices = scan_points_indices_list[view_index]
if not ReconstructionUtil.check_scan_points_overlap(history_indices, new_scan_points_indices, scan_points_threshold):
curr_overlap_area_threshold = overlap_area_threshold
else:
curr_overlap_area_threshold = overlap_area_threshold * 0.5
if not ReconstructionUtil.check_overlap(point_cloud_list[view_index], combined_point_cloud, overlap_area_threshold = curr_overlap_area_threshold, voxel_size=threshold):
continue
candidate_views = selected_views + [point_cloud_list[view_index]]
combined_point_cloud = np.vstack(candidate_views)
down_sampled_combined_point_cloud = PtsUtil.voxel_downsample_point_cloud(combined_point_cloud,threshold)
new_coverage = ReconstructionUtil.compute_coverage_rate(target_point_cloud, down_sampled_combined_point_cloud, threshold)
new_combined_point_cloud = np.vstack([combined_point_cloud, point_cloud_list[view_index]])
new_downsampled_combined_point_cloud = PtsUtil.voxel_downsample_point_cloud(new_combined_point_cloud,threshold)
new_coverage, new_covered_num = ReconstructionUtil.compute_coverage_rate(downsampled_max_rec_pts, new_downsampled_combined_point_cloud, threshold)
coverage_increase = new_coverage - current_coverage
#print(f"view_index: {view_index}, coverage_increase: {coverage_increase}")
if coverage_increase > best_coverage_increase:
best_coverage_increase = coverage_increase
best_view = view_index
best_covered_num = new_covered_num
best_combined_point_cloud = new_downsampled_combined_point_cloud
if best_view is not None:
if best_coverage_increase <=1e-3:
if best_coverage_increase <=1e-3 or best_covered_num - current_covered_num <= 5:
break
selected_views.append(point_cloud_list[best_view])
selected_views.append(best_view)
best_rec_pts_num = best_combined_point_cloud.shape[0]
print(f"Current rec pts num: {curr_rec_pts_num}, Best rec pts num: {best_rec_pts_num}, Best cover pts: {best_covered_num}, Max rec pts num: {max_rec_pts_num}")
print(f"Current coverage: {current_coverage+best_coverage_increase}, Best coverage increase: {best_coverage_increase}, Max Real coverage: {max_real_rec_pts_coverage}")
current_covered_num = best_covered_num
curr_rec_pts_num = best_rec_pts_num
combined_point_cloud = best_combined_point_cloud
remaining_views.remove(best_view)
history_indices.append(scan_points_indices_list[best_view])
current_coverage += best_coverage_increase
cnt_processed_view += 1
if status_info is not None:
@@ -99,21 +143,125 @@ class ReconstructionUtil:
app_name = status_info["app_name"]
runner_name = status_info["runner_name"]
sm.set_progress(app_name, runner_name, "processed view", len(point_cloud_list), len(point_cloud_list))
return view_sequence, remaining_views, down_sampled_combined_point_cloud
return view_sequence, remaining_views, combined_point_cloud
@staticmethod
def filter_points(points, points_normals, cam_pose, voxel_size=0.005, theta=45):
sampled_points = PtsUtil.voxel_downsample_point_cloud(points, voxel_size)
kdtree = cKDTree(points_normals[:,:3])
_, indices = kdtree.query(sampled_points)
nearest_points = points_normals[indices]
def compute_next_best_view_sequence_with_normal(target_point_cloud, target_normal, point_cloud_list, normal_list, scan_points_indices_list, threshold=0.01, overlap_area_threshold=25, init_view = 0, scan_points_threshold=5, status_info=None):
selected_views = [init_view]
combined_point_cloud = point_cloud_list[init_view]
combined_normal = normal_list[init_view]
history_indices = [scan_points_indices_list[init_view]]
normals = nearest_points[:, 3:]
camera_axis = -cam_pose[:3, 2]
normals_normalized = normals / np.linalg.norm(normals, axis=1, keepdims=True)
cos_theta = np.dot(normals_normalized, camera_axis)
theta_rad = np.deg2rad(theta)
filtered_sampled_points= sampled_points[cos_theta > np.cos(theta_rad)]
max_rec_pts = np.vstack(point_cloud_list)
max_rec_nrm = np.vstack(normal_list)
downsampled_max_rec_pts, idx = PtsUtil.voxel_downsample_point_cloud(max_rec_pts, threshold, require_idx=True)
downsampled_max_rec_nrm = max_rec_nrm[idx]
max_rec_pts_num = downsampled_max_rec_pts.shape[0]
try:
max_real_rec_pts_coverage, _ = ReconstructionUtil.compute_coverage_rate_with_normal(target_point_cloud, downsampled_max_rec_pts, target_normal, downsampled_max_rec_nrm, threshold)
except:
import ipdb; ipdb.set_trace()
new_coverage, new_covered_num = ReconstructionUtil.compute_coverage_rate_with_normal(downsampled_max_rec_pts, combined_point_cloud, downsampled_max_rec_nrm, combined_normal, threshold)
current_coverage = new_coverage
current_covered_num = new_covered_num
remaining_views = list(range(len(point_cloud_list)))
view_sequence = [(init_view, current_coverage)]
cnt_processed_view = 0
remaining_views.remove(init_view)
curr_rec_pts_num = combined_point_cloud.shape[0]
while remaining_views:
best_view = None
best_coverage_increase = -1
best_combined_point_cloud = None
best_combined_normal = None
best_covered_num = 0
for view_index in remaining_views:
if point_cloud_list[view_index].shape[0] == 0:
continue
if selected_views:
new_scan_points_indices = scan_points_indices_list[view_index]
if not ReconstructionUtil.check_scan_points_overlap(history_indices, new_scan_points_indices, scan_points_threshold):
curr_overlap_area_threshold = overlap_area_threshold
else:
curr_overlap_area_threshold = overlap_area_threshold * 0.5
if not ReconstructionUtil.check_overlap(point_cloud_list[view_index], combined_point_cloud, overlap_area_threshold = curr_overlap_area_threshold, voxel_size=threshold):
continue
new_combined_point_cloud = np.vstack([combined_point_cloud, point_cloud_list[view_index]])
new_combined_normal = np.vstack([combined_normal, normal_list[view_index]])
new_downsampled_combined_point_cloud, idx = PtsUtil.voxel_downsample_point_cloud(new_combined_point_cloud,threshold, require_idx=True)
new_downsampled_combined_normal = new_combined_normal[idx]
new_coverage, new_covered_num = ReconstructionUtil.compute_coverage_rate_with_normal(downsampled_max_rec_pts, new_downsampled_combined_point_cloud, downsampled_max_rec_nrm, new_downsampled_combined_normal, threshold)
coverage_increase = new_coverage - current_coverage
if coverage_increase > best_coverage_increase:
best_coverage_increase = coverage_increase
best_view = view_index
best_covered_num = new_covered_num
best_combined_point_cloud = new_downsampled_combined_point_cloud
best_combined_normal = new_downsampled_combined_normal
if best_view is not None:
if best_coverage_increase <=1e-3 or best_covered_num - current_covered_num <= 5:
break
selected_views.append(best_view)
best_rec_pts_num = best_combined_point_cloud.shape[0]
print(f"Current rec pts num: {curr_rec_pts_num}, Best rec pts num: {best_rec_pts_num}, Best cover pts: {best_covered_num}, Max rec pts num: {max_rec_pts_num}")
print(f"Current coverage: {current_coverage}, Best coverage increase: {best_coverage_increase}, Max Real coverage: {max_real_rec_pts_coverage}")
current_covered_num = best_covered_num
curr_rec_pts_num = best_rec_pts_num
combined_point_cloud = best_combined_point_cloud
combined_normal = best_combined_normal
remaining_views.remove(best_view)
history_indices.append(scan_points_indices_list[best_view])
current_coverage += best_coverage_increase
cnt_processed_view += 1
if status_info is not None:
sm = status_info["status_manager"]
app_name = status_info["app_name"]
runner_name = status_info["runner_name"]
sm.set_status(app_name, runner_name, "current coverage", current_coverage)
sm.set_progress(app_name, runner_name, "processed view", cnt_processed_view, len(point_cloud_list))
view_sequence.append((best_view, current_coverage))
else:
break
if status_info is not None:
sm = status_info["status_manager"]
app_name = status_info["app_name"]
runner_name = status_info["runner_name"]
sm.set_progress(app_name, runner_name, "processed view", len(point_cloud_list), len(point_cloud_list))
return view_sequence, remaining_views, combined_point_cloud
@staticmethod
def generate_scan_points(display_table_top, display_table_radius, min_distance=0.03, max_points_num = 500, max_attempts = 1000):
points = []
attempts = 0
while len(points) < max_points_num and attempts < max_attempts:
angle = np.random.uniform(0, 2 * np.pi)
r = np.random.uniform(0, display_table_radius)
x = r * np.cos(angle)
y = r * np.sin(angle)
z = display_table_top
new_point = (x, y, z)
if all(np.linalg.norm(np.array(new_point) - np.array(existing_point)) >= min_distance for existing_point in points):
points.append(new_point)
attempts += 1
return points
@staticmethod
def check_scan_points_overlap(history_indices, indices2, threshold=5):
for indices1 in history_indices:
if len(set(indices1).intersection(set(indices2))) >= threshold:
return True
return False
return filtered_sampled_points[:, :3]

136
utils/render.py Normal file
View File

@@ -0,0 +1,136 @@
import os
import json
import time
import subprocess
import tempfile
import shutil
import numpy as np
from utils.data_load import DataLoadUtil
from utils.reconstruction import ReconstructionUtil
from utils.pts import PtsUtil
class RenderUtil:
target_mask_label = (0, 255, 0)
display_table_mask_label = (0, 0, 255)
random_downsample_N = 32768
min_z = 0.2
max_z = 0.5
@staticmethod
def get_world_points_and_normal(depth, mask, normal, cam_intrinsic, cam_extrinsic, random_downsample_N):
z = depth[mask]
i, j = np.nonzero(mask)
x = (j - cam_intrinsic[0, 2]) * z / cam_intrinsic[0, 0]
y = (i - cam_intrinsic[1, 2]) * z / cam_intrinsic[1, 1]
points_camera = np.stack((x, y, z), axis=-1).reshape(-1, 3)
normal_camera = normal[mask].reshape(-1, 3)
sampled_target_points, idx = PtsUtil.random_downsample_point_cloud(
points_camera, random_downsample_N, require_idx=True
)
if len(sampled_target_points) == 0:
return np.zeros((0, 3)), np.zeros((0, 3))
sampled_normal_camera = normal_camera[idx]
points_camera_aug = np.concatenate((sampled_target_points, np.ones((sampled_target_points.shape[0], 1))), axis=-1)
points_camera_world = np.dot(cam_extrinsic, points_camera_aug.T).T[:, :3]
return points_camera_world, sampled_normal_camera
@staticmethod
def get_world_points(depth, mask, cam_intrinsic, cam_extrinsic, random_downsample_N):
z = depth[mask]
i, j = np.nonzero(mask)
x = (j - cam_intrinsic[0, 2]) * z / cam_intrinsic[0, 0]
y = (i - cam_intrinsic[1, 2]) * z / cam_intrinsic[1, 1]
points_camera = np.stack((x, y, z), axis=-1).reshape(-1, 3)
sampled_target_points = PtsUtil.random_downsample_point_cloud(
points_camera, random_downsample_N
)
points_camera_aug = np.concatenate((sampled_target_points, np.ones((sampled_target_points.shape[0], 1))), axis=-1)
points_camera_world = np.dot(cam_extrinsic, points_camera_aug.T).T[:, :3]
return points_camera_world
@staticmethod
def get_scan_points_indices(scan_points, mask, display_table_mask_label, cam_intrinsic, cam_extrinsic):
scan_points_homogeneous = np.hstack((scan_points, np.ones((scan_points.shape[0], 1))))
points_camera = np.dot(np.linalg.inv(cam_extrinsic), scan_points_homogeneous.T).T[:, :3]
points_image_homogeneous = np.dot(cam_intrinsic, points_camera.T).T
points_image_homogeneous /= points_image_homogeneous[:, 2:]
pixel_x = points_image_homogeneous[:, 0].astype(int)
pixel_y = points_image_homogeneous[:, 1].astype(int)
h, w = mask.shape[:2]
valid_indices = (pixel_x >= 0) & (pixel_x < w) & (pixel_y >= 0) & (pixel_y < h)
mask_colors = mask[pixel_y[valid_indices], pixel_x[valid_indices]]
selected_points_indices = np.where((mask_colors == display_table_mask_label).all(axis=-1))[0]
selected_points_indices = np.where(valid_indices)[0][selected_points_indices]
return selected_points_indices
@staticmethod
def render_pts(cam_pose, scene_path, script_path, scan_points, voxel_threshold=0.005, filter_degree=75, nO_to_nL_pose=None, require_full_scene=False):
import ipdb; ipdb.set_trace()
nO_to_world_pose = DataLoadUtil.get_real_cam_O_from_cam_L(cam_pose, nO_to_nL_pose, scene_path=scene_path)
with tempfile.TemporaryDirectory() as temp_dir:
params = {
"cam_pose": nO_to_world_pose.tolist(),
"scene_path": scene_path
}
scene_info_path = os.path.join(scene_path, "scene_info.json")
shutil.copy(scene_info_path, os.path.join(temp_dir, "scene_info.json"))
params_data_path = os.path.join(temp_dir, "params.json")
with open(params_data_path, 'w') as f:
json.dump(params, f)
result = subprocess.run([
'/home/hofee/blender-4.0.2-linux-x64/blender', '-b', '-P', script_path, '--', temp_dir
], capture_output=True, text=True)
#print(result)
#import ipdb; ipdb.set_trace()
path = os.path.join(temp_dir, "tmp")
cam_info = DataLoadUtil.load_cam_info(path, binocular=True)
depth_L, depth_R = DataLoadUtil.load_depth(
path, cam_info["near_plane"],
cam_info["far_plane"],
binocular=True
)
mask_L, mask_R = DataLoadUtil.load_seg(path, binocular=True)
normal_L = DataLoadUtil.load_normal(path, binocular=True, left_only=True)
''' target points '''
mask_img_L = mask_L
mask_img_R = mask_R
target_mask_img_L = (mask_L == RenderUtil.target_mask_label).all(axis=-1)
target_mask_img_R = (mask_R == RenderUtil.target_mask_label).all(axis=-1)
sampled_target_points_L, sampled_target_normal_L = RenderUtil.get_world_points_and_normal(depth_L,target_mask_img_L,normal_L, cam_info["cam_intrinsic"], cam_info["cam_to_world"], RenderUtil.random_downsample_N)
sampled_target_points_R = RenderUtil.get_world_points(depth_R, target_mask_img_R, cam_info["cam_intrinsic"], cam_info["cam_to_world_R"], RenderUtil.random_downsample_N )
has_points = sampled_target_points_L.shape[0] > 0 and sampled_target_points_R.shape[0] > 0
if has_points:
target_points, overlap_idx = PtsUtil.get_overlapping_points(
sampled_target_points_L, sampled_target_points_R, voxel_threshold, require_idx=True
)
sampled_target_normal_L = sampled_target_normal_L[overlap_idx]
if has_points:
has_points = target_points.shape[0] > 0
if has_points:
target_points, target_normals = PtsUtil.filter_points(
target_points, sampled_target_normal_L, cam_info["cam_to_world"], theta_limit = filter_degree, z_range=(RenderUtil.min_z, RenderUtil.max_z)
)
scan_points_indices_L = RenderUtil.get_scan_points_indices(scan_points, mask_img_L, RenderUtil.display_table_mask_label, cam_info["cam_intrinsic"], cam_info["cam_to_world"])
scan_points_indices_R = RenderUtil.get_scan_points_indices(scan_points, mask_img_R, RenderUtil.display_table_mask_label, cam_info["cam_intrinsic"], cam_info["cam_to_world_R"])
scan_points_indices = np.intersect1d(scan_points_indices_L, scan_points_indices_R)
if not has_points:
target_points = np.zeros((0, 3))
target_normals = np.zeros((0, 3))
#import ipdb; ipdb.set_trace()
return target_points, target_normals, scan_points_indices

208
utils/vis.py Normal file
View File

@@ -0,0 +1,208 @@
import numpy as np
import matplotlib.pyplot as plt
import sys
import os
import trimesh
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from utils.data_load import DataLoadUtil
from utils.pts import PtsUtil
from utils.pose import PoseUtil
class visualizeUtil:
@staticmethod
def save_all_cam_pos_and_cam_axis(root, scene, output_dir):
length = DataLoadUtil.get_scene_seq_length(root, scene)
all_cam_pos = []
all_cam_axis = []
for i in range(length):
path = DataLoadUtil.get_path(root, scene, i)
cam_info = DataLoadUtil.load_cam_info(path, binocular=True)
cam_pose = cam_info["cam_to_world"]
cam_pos = cam_pose[:3, 3]
cam_axis = cam_pose[:3, 2]
num_samples = 10
sample_points = [cam_pos + 0.02*t * cam_axis for t in range(num_samples)]
sample_points = np.array(sample_points)
all_cam_pos.append(cam_pos)
all_cam_axis.append(sample_points)
all_cam_pos = np.array(all_cam_pos)
all_cam_axis = np.array(all_cam_axis).reshape(-1, 3)
np.savetxt(os.path.join(output_dir, "all_cam_pos.txt"), all_cam_pos)
np.savetxt(os.path.join(output_dir, "all_cam_axis.txt"), all_cam_axis)
@staticmethod
def get_cam_pose_and_cam_axis(cam_pose, is_6d_pose):
if is_6d_pose:
matrix_cam_pose = np.eye(4)
matrix_cam_pose[:3,:3] = PoseUtil.rotation_6d_to_matrix_numpy(cam_pose[:6])
matrix_cam_pose[:3, 3] = cam_pose[6:]
else:
matrix_cam_pose = cam_pose
cam_pos = matrix_cam_pose[:3, 3]
cam_axis = matrix_cam_pose[:3, 2]
num_samples = 10
sample_points = [cam_pos + 0.02*t * cam_axis for t in range(num_samples)]
sample_points = np.array(sample_points)
return cam_pos, sample_points
@staticmethod
def save_all_combined_pts(root, scene, output_dir):
length = DataLoadUtil.get_scene_seq_length(root, scene)
all_combined_pts = []
for i in range(length):
path = DataLoadUtil.get_path(root, scene, i)
pts = DataLoadUtil.load_from_preprocessed_pts(path,"npy")
if pts.shape[0] == 0:
continue
all_combined_pts.append(pts)
all_combined_pts = np.vstack(all_combined_pts)
downsampled_all_pts = PtsUtil.voxel_downsample_point_cloud(all_combined_pts, 0.001)
np.savetxt(os.path.join(output_dir, "all_combined_pts.txt"), downsampled_all_pts)
@staticmethod
def save_seq_cam_pos_and_cam_axis(root, scene, frame_idx_list, output_dir):
all_cam_pos = []
all_cam_axis = []
for i in frame_idx_list:
path = DataLoadUtil.get_path(root, scene, i)
cam_info = DataLoadUtil.load_cam_info(path, binocular=True)
cam_pose = cam_info["cam_to_world"]
cam_pos = cam_pose[:3, 3]
cam_axis = cam_pose[:3, 2]
num_samples = 10
sample_points = [cam_pos + 0.02*t * cam_axis for t in range(num_samples)]
sample_points = np.array(sample_points)
all_cam_pos.append(cam_pos)
all_cam_axis.append(sample_points)
all_cam_pos = np.array(all_cam_pos)
all_cam_axis = np.array(all_cam_axis).reshape(-1, 3)
np.savetxt(os.path.join(output_dir, "seq_cam_pos.txt"), all_cam_pos)
np.savetxt(os.path.join(output_dir, "seq_cam_axis.txt"), all_cam_axis)
@staticmethod
def save_seq_combined_pts(root, scene, frame_idx_list, output_dir):
all_combined_pts = []
for i in frame_idx_list:
path = DataLoadUtil.get_path(root, scene, i)
pts = DataLoadUtil.load_from_preprocessed_pts(path,"npy")
if pts.shape[0] == 0:
continue
all_combined_pts.append(pts)
all_combined_pts = np.vstack(all_combined_pts)
downsampled_all_pts = PtsUtil.voxel_downsample_point_cloud(all_combined_pts, 0.001)
np.savetxt(os.path.join(output_dir, "seq_combined_pts.txt"), downsampled_all_pts)
@staticmethod
def save_target_mesh_at_world_space(
root, model_dir, scene_name, display_table_as_world_space_origin=True
):
scene_info = DataLoadUtil.load_scene_info(root, scene_name)
target_name = scene_info["target_name"]
transformation = scene_info[target_name]
if display_table_as_world_space_origin:
location = transformation["location"] - DataLoadUtil.get_display_table_top(
root, scene_name
)
else:
location = transformation["location"]
rotation_euler = transformation["rotation_euler"]
pose_mat = trimesh.transformations.euler_matrix(*rotation_euler)
pose_mat[:3, 3] = location
mesh = DataLoadUtil.load_mesh_at(model_dir, target_name, pose_mat)
mesh_dir = os.path.join(root, scene_name, "mesh")
if not os.path.exists(mesh_dir):
os.makedirs(mesh_dir)
model_path = os.path.join(mesh_dir, "world_target_mesh.obj")
mesh.export(model_path)
@staticmethod
def save_points_and_normals(root, scene, frame_idx, output_dir, binocular=False):
target_mask_label = (0, 255, 0, 255)
path = DataLoadUtil.get_path(root, scene, frame_idx)
cam_info = DataLoadUtil.load_cam_info(path, binocular=binocular, display_table_as_world_space_origin=False)
depth = DataLoadUtil.load_depth(
path, cam_info["near_plane"],
cam_info["far_plane"],
binocular=binocular,
)
if isinstance(depth, tuple):
depth = depth[0]
mask = DataLoadUtil.load_seg(path, binocular=binocular, left_only=True)
normal = DataLoadUtil.load_normal(path, binocular=binocular, left_only=True)
''' target points '''
if mask is None:
target_mask_img = np.ones_like(depth, dtype=bool)
else:
target_mask_img = (mask == target_mask_label).all(axis=-1)
cam_intrinsic = cam_info["cam_intrinsic"]
z = depth[target_mask_img]
i, j = np.nonzero(target_mask_img)
x = (j - cam_intrinsic[0, 2]) * z / cam_intrinsic[0, 0]
y = (i - cam_intrinsic[1, 2]) * z / cam_intrinsic[1, 1]
random_downsample_N = 1000
points_camera = np.stack((x, y, z), axis=-1).reshape(-1, 3)
normal_camera = normal[target_mask_img].reshape(-1, 3)
sampled_target_points, idx = PtsUtil.random_downsample_point_cloud(
points_camera, random_downsample_N, require_idx=True
)
if len(sampled_target_points) == 0:
print("No target points")
sampled_normal_camera = normal_camera[idx]
sampled_visualized_normal = []
sampled_normal_camera[:, 2] = -sampled_normal_camera[:, 2]
sampled_normal_camera[:, 1] = -sampled_normal_camera[:, 1]
num_samples = 10
for i in range(len(sampled_target_points)):
sampled_visualized_normal.append([sampled_target_points[i] + 0.02*t * sampled_normal_camera[i] for t in range(num_samples)])
sampled_visualized_normal = np.array(sampled_visualized_normal).reshape(-1, 3)
np.savetxt(os.path.join(output_dir, "target_pts.txt"), sampled_target_points)
np.savetxt(os.path.join(output_dir, "target_normal.txt"), sampled_visualized_normal)
@staticmethod
def save_pts_nrm(root, scene, frame_idx, output_dir, binocular=False):
path = DataLoadUtil.get_path(root, scene, frame_idx)
pts_world = DataLoadUtil.load_from_preprocessed_pts(path, "npy")
nrm_camera = DataLoadUtil.load_from_preprocessed_nrm(path, "npy")
cam_info = DataLoadUtil.load_cam_info(path, binocular=binocular)
cam_to_world = cam_info["cam_to_world"]
nrm_world = nrm_camera @ cam_to_world[:3, :3].T
visualized_nrm = []
num_samples = 10
for i in range(len(pts_world)):
for t in range(num_samples):
visualized_nrm.append(pts_world[i] - 0.02 * t * nrm_world[i])
visualized_nrm = np.array(visualized_nrm)
np.savetxt(os.path.join(output_dir, "nrm.txt"), visualized_nrm)
np.savetxt(os.path.join(output_dir, "pts.txt"), pts_world)
# ------ Debug ------
if __name__ == "__main__":
root = r"C:\Document\Local Project\nbv_rec\nbv_reconstruction\temp"
model_dir = r"H:\\AI\\Datasets\\scaled_object_box_meshes"
scene = "box"
output_dir = r"C:\Document\Local Project\nbv_rec\nbv_reconstruction\test"
#visualizeUtil.save_all_cam_pos_and_cam_axis(root, scene, output_dir)
# visualizeUtil.save_all_combined_pts(root, scene, output_dir)
# visualizeUtil.save_seq_combined_pts(root, scene, [0, 121, 286, 175, 111,366,45,230,232,225,255,17,199,78,60], output_dir)
# visualizeUtil.save_seq_cam_pos_and_cam_axis(root, scene, [0, 121, 286, 175, 111,366,45,230,232,225,255,17,199,78,60], output_dir)
# visualizeUtil.save_target_mesh_at_world_space(root, model_dir, scene)
#visualizeUtil.save_points_and_normals(root, scene,"10", output_dir, binocular=True)
visualizeUtil.save_pts_nrm(root, scene, "116", output_dir, binocular=True)