update
This commit is contained in:
parent
8539ce0b9c
commit
5c96e3979f
7
app_rec.py
Normal file
7
app_rec.py
Normal file
@ -0,0 +1,7 @@
|
||||
from PytorchBoot.application import PytorchBootApplication
|
||||
from rec_runner import ReconstructionRunner
|
||||
@PytorchBootApplication("rec")
|
||||
class AppReconstruction:
|
||||
@staticmethod
|
||||
def start():
|
||||
ReconstructionRunner("config.yaml").run()
|
50
config.yaml
Normal file
50
config.yaml
Normal file
@ -0,0 +1,50 @@
|
||||
|
||||
runners:
|
||||
general:
|
||||
seed: 0
|
||||
device: cuda
|
||||
cuda_visible_devices: "0,1,2,3,4,5,6,7"
|
||||
parallel: False
|
||||
|
||||
experiment:
|
||||
name: experiment_name
|
||||
root_dir: "experiments"
|
||||
use_checkpoint: False
|
||||
epoch: -1 # -1 stands for last epoch
|
||||
max_epochs: 5000
|
||||
save_checkpoint_interval: 1
|
||||
test_first: True
|
||||
|
||||
train:
|
||||
optimizer:
|
||||
type: adam
|
||||
lr: 0.0001
|
||||
losses: # loss type : weight
|
||||
loss_type_0: 1.0
|
||||
dataset:
|
||||
name: train_set_name
|
||||
source: train_set_source_name
|
||||
ratio: 1.0
|
||||
batch_size: 1
|
||||
num_workers: 1
|
||||
|
||||
test:
|
||||
frequency: 3 # test frequency
|
||||
dataset_list:
|
||||
- name: test_set_name_0
|
||||
source: train_set_source_name
|
||||
eval_list:
|
||||
- eval_func_name_0
|
||||
- eval_func_name_1
|
||||
ratio: 1.0
|
||||
batch_size: 1
|
||||
num_workers: 1
|
||||
|
||||
|
||||
datasets:
|
||||
dataset_source_name_0:
|
||||
dataset_source_name_1:
|
||||
|
||||
modules:
|
||||
nerf:
|
||||
|
193
pipeline.py
Normal file
193
pipeline.py
Normal file
@ -0,0 +1,193 @@
|
||||
import os
|
||||
import torch
|
||||
import numpy as np
|
||||
|
||||
from PytorchBoot.factory.component_factory import ComponentFactory
|
||||
import PytorchBoot.stereotype as stereotype
|
||||
import PytorchBoot.namespace as namespace
|
||||
from PytorchBoot.utils.log_util import Log
|
||||
|
||||
from utils.volume_render_util import VolumeRendererUtil
|
||||
|
||||
|
||||
@stereotype.pipeline("reconstruction_pipeline")
|
||||
class ReconstructionPipeline:
|
||||
def __init__(self, config:dict):
|
||||
self.config = config
|
||||
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
self.module_config = config["modules"]
|
||||
self.nerf = ComponentFactory.create(
|
||||
namespace.Stereotype.MODULE, self.module_config["nerf"]
|
||||
)
|
||||
self.nerf_model_output_dir = self.config.get("nerf_model_output_dir", "./output/nerf_model")
|
||||
|
||||
def create_experiment(self, backup_name=None):
|
||||
return super().create_experiment(backup_name)
|
||||
|
||||
def load_experiment(self, backup_name=None):
|
||||
super().load_experiment(backup_name)
|
||||
|
||||
def save(self, object_name: str, best_model: bool = True, name: str|None = None):
|
||||
output_dir = os.path.join(self.nerf_model_output_dir, object_name)
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
if best_model:
|
||||
torch.save(self.nerf.state_dict(), os.path.join(output_dir, "best_model.pth"))
|
||||
elif name is not None:
|
||||
torch.save(self.nerf.state_dict(), os.path.join(output_dir, f"{name}.pth"))
|
||||
else:
|
||||
Log.error("save failed, best_model and name cannot be None at the same time", terminate=True)
|
||||
Log.info(f"save {object_name} to {output_dir}")
|
||||
return output_dir
|
||||
|
||||
def load(self, object_name: str, best_model: bool = True, name: str|None = None):
|
||||
output_dir = os.path.join(self.nerf_model_output_dir, object_name)
|
||||
if best_model:
|
||||
self.nerf.load_state_dict(torch.load(os.path.join(output_dir, "best_model.pth")))
|
||||
elif name is not None:
|
||||
self.nerf.load_state_dict(torch.load(os.path.join(output_dir, f"{name}.pth")))
|
||||
else:
|
||||
Log.error("save failed, best_model and name cannot be None at the same time", terminate=True)
|
||||
Log.info(f"load {object_name} from {output_dir}")
|
||||
return output_dir
|
||||
|
||||
|
||||
def train_nerf(self,
|
||||
images: torch.Tensor,
|
||||
poses: torch.Tensor,
|
||||
epochs: int = 5000,
|
||||
batch_size: int = 4096,
|
||||
lr: float = 5e-4,
|
||||
start_from_model=None,
|
||||
object_name: str = "unknown") -> float:
|
||||
|
||||
output_dir = os.path.join(self.nerf_model_output_dir, object_name)
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
|
||||
Log.info("train NeRF model with {} images".format(len(images)))
|
||||
H, W = images.shape[1], images.shape[2]
|
||||
sampling_config = self.config.get("sampling", {})
|
||||
camera_config = self.config.get("camera", {})
|
||||
focal = camera_config.get("focal", 1000.0)
|
||||
near = camera_config.get("near", 2.0)
|
||||
far = camera_config.get("far", 6.0)
|
||||
coarse_samples = sampling_config.get("coarse_samples", 64)
|
||||
fine_samples = sampling_config.get("fine_samples", 128)
|
||||
perturb = sampling_config.get("perturb", True)
|
||||
|
||||
|
||||
if start_from_model is not None:
|
||||
self.nerf.load_state_dict(start_from_model.state_dict())
|
||||
|
||||
optimizer = torch.optim.Adam(self.nerf.parameters(), lr=lr)
|
||||
mse_loss = torch.nn.MSELoss()
|
||||
|
||||
self.nerf.train()
|
||||
|
||||
rays_o, rays_d = ReconstructionPipeline.generate_rays(poses, H, W, focal)
|
||||
rays_o = rays_o.to(self.device)
|
||||
rays_d = rays_d.to(self.device)
|
||||
images = images.to(self.device)
|
||||
|
||||
best_loss = float('inf')
|
||||
for epoch in range(epochs):
|
||||
batch_rays_o, batch_rays_d, target_pixels = ReconstructionPipeline.sample_pixel_batch(
|
||||
images, rays_o, rays_d, batch_size)
|
||||
|
||||
batch_rays_d = torch.nn.functional.normalize(batch_rays_d, dim=-1)
|
||||
|
||||
near_tensor = torch.ones_like(batch_rays_o[..., 0]) * near
|
||||
far_tensor = torch.ones_like(batch_rays_o[..., 0]) * far
|
||||
|
||||
optimizer.zero_grad()
|
||||
|
||||
rgb_map, _, _, _ = VolumeRendererUtil.render_rays(
|
||||
self.nerf,
|
||||
batch_rays_o,
|
||||
batch_rays_d,
|
||||
near_tensor,
|
||||
far_tensor,
|
||||
coarse_samples,
|
||||
fine_samples,
|
||||
perturb
|
||||
)
|
||||
|
||||
loss = mse_loss(rgb_map, target_pixels)
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
|
||||
if (epoch + 1) % 100 == 0:
|
||||
psnr = -10.0 * torch.log10(loss)
|
||||
Log.info(f"Epoch {epoch+1}/{epochs}, Loss: {loss.item():.6f}, PSNR: {psnr.item():.2f}")
|
||||
|
||||
if loss.item() < best_loss:
|
||||
best_loss = loss.item()
|
||||
torch.save(self.nerf.state_dict(), os.path.join(output_dir, "best_model.pth"))
|
||||
|
||||
self.nerf.load_state_dict(torch.load(os.path.join(output_dir, "best_model.pth")))
|
||||
|
||||
Log.info(f"finish training, best loss: {best_loss:.6f}")
|
||||
return best_loss
|
||||
|
||||
@staticmethod
|
||||
def generate_rays(
|
||||
poses: torch.Tensor,
|
||||
H: int,
|
||||
W: int,
|
||||
focal: float) -> tuple:
|
||||
|
||||
i, j = torch.meshgrid(
|
||||
torch.linspace(0, W-1, W),
|
||||
torch.linspace(0, H-1, H),
|
||||
indexing='ij'
|
||||
)
|
||||
i = i.t() # [H, W]
|
||||
j = j.t() # [H, W]
|
||||
|
||||
dirs = torch.stack([
|
||||
(i - W * 0.5) / focal,
|
||||
-(j - H * 0.5) / focal,
|
||||
-torch.ones_like(i)
|
||||
], dim=-1) # [H, W, 3]
|
||||
|
||||
rays_o_list = []
|
||||
rays_d_list = []
|
||||
|
||||
for pose in poses:
|
||||
rays_d = torch.sum(dirs[..., None, :] * pose[:3, :3], dim=-1) # [H, W, 3]
|
||||
|
||||
rays_o = pose[:3, -1].expand(rays_d.shape) # [H, W, 3]
|
||||
|
||||
rays_o = rays_o.reshape(-1, 3) # [H*W, 3]
|
||||
rays_d = rays_d.reshape(-1, 3) # [H*W, 3]
|
||||
|
||||
rays_o_list.append(rays_o)
|
||||
rays_d_list.append(rays_d)
|
||||
|
||||
rays_o_all = torch.stack(rays_o_list, dim=0) # [N, H*W, 3]
|
||||
rays_d_all = torch.stack(rays_d_list, dim=0) # [N, H*W, 3]
|
||||
|
||||
return rays_o_all, rays_d_all
|
||||
|
||||
@staticmethod
|
||||
def sample_pixel_batch(
|
||||
images: torch.Tensor,
|
||||
rays_o: torch.Tensor,
|
||||
rays_d: torch.Tensor,
|
||||
batch_size: int) -> tuple:
|
||||
|
||||
N = images.shape[0]
|
||||
H = images.shape[1]
|
||||
W = images.shape[2]
|
||||
total_rays = N * H * W
|
||||
|
||||
pixels = images.reshape(N, -1, 3) # [N, H*W, 3]
|
||||
|
||||
indices = torch.randint(0, total_rays, size=(batch_size,))
|
||||
img_indices = indices // (H * W)
|
||||
pixel_indices = indices % (H * W)
|
||||
|
||||
sampled_rays_o = torch.stack([rays_o[i, j] for i, j in zip(img_indices, pixel_indices)])
|
||||
sampled_rays_d = torch.stack([rays_d[i, j] for i, j in zip(img_indices, pixel_indices)])
|
||||
sampled_pixels = torch.stack([pixels[i, j] for i, j in zip(img_indices, pixel_indices)])
|
||||
|
||||
return sampled_rays_o, sampled_rays_d, sampled_pixels
|
101
rec_runner.py
Normal file
101
rec_runner.py
Normal file
@ -0,0 +1,101 @@
|
||||
import os
|
||||
import torch
|
||||
import numpy as np
|
||||
from PytorchBoot.runners.runner import Runner
|
||||
import PytorchBoot.stereotype as stereotype
|
||||
import PytorchBoot.namespace as namespace
|
||||
from PytorchBoot.utils.log_util import Log
|
||||
from PytorchBoot.factory.component_factory import ComponentFactory
|
||||
|
||||
@stereotype.runner("reconstruction_runner")
|
||||
class ReconstructionRunner(Runner):
|
||||
def __init__(self, config_path):
|
||||
super().__init__(config_path)
|
||||
self.config_path = config_path
|
||||
self.module_config = self.config.get("module", {})
|
||||
self.pipeline_config = self.config.get("pipeline", {})
|
||||
self.pipeline = ComponentFactory.create(
|
||||
namespace.Stereotype.PIPELINE, self.pipeline_config
|
||||
)
|
||||
|
||||
def run(self):
|
||||
pass
|
||||
|
||||
def run_active_reconstruction(self,
|
||||
initial_poses: np.ndarray,
|
||||
initial_images: torch.Tensor = None,
|
||||
max_iterations: int = 3):
|
||||
Log.info("start active reconstruction...")
|
||||
|
||||
self.pipeline.train_nerf(
|
||||
initial_images,
|
||||
torch.from_numpy(initial_poses).float().to(self.device),
|
||||
epochs=self.config.get("reconstruction", {}).get("epochs_per_iteration", 2000)
|
||||
)
|
||||
|
||||
self.pipeline.save()
|
||||
|
||||
all_poses = initial_poses.copy()
|
||||
current_poses = initial_poses.copy()
|
||||
all_images = initial_images.clone()
|
||||
|
||||
# 提取初始网格
|
||||
initial_mesh_path = os.path.join(self.output_dir, "initial_mesh.obj")
|
||||
self.extract_mesh(
|
||||
initial_mesh_path,
|
||||
resolution=self.config.get("reconstruction", {}).get("mesh_resolution", 256)
|
||||
)
|
||||
|
||||
# 迭代执行主动重建
|
||||
for iteration in range(max_iterations):
|
||||
print(f"\n开始迭代 {iteration+1}/{max_iterations}")
|
||||
|
||||
# 选择下一批视角
|
||||
next_views = self.policy.select_next_views(self.nerf_model, current_poses)
|
||||
print(f"选择了 {len(next_views)} 个新视角")
|
||||
|
||||
# 采集新视角的图像
|
||||
new_images = self._simulate_image_capture(next_views)
|
||||
|
||||
# 将新选择的视角添加到当前位姿和图像中
|
||||
current_poses = np.concatenate([current_poses, next_views], axis=0)
|
||||
all_poses = np.concatenate([all_poses, next_views], axis=0)
|
||||
all_images = torch.cat([all_images, new_images], dim=0)
|
||||
|
||||
# 按照作者的描述,我们从初始模型重新初始化,而不是继续训练
|
||||
# "After selecting additional images, we initialize the network with the model from the initialization step and refine the model further with the updated training set."
|
||||
# 因此,我们先加载初始模型,然后用扩展的数据集重新训练
|
||||
self.nerf_model.load_state_dict(torch.load(initial_model_path))
|
||||
|
||||
# 用扩展的数据集重新训练模型
|
||||
self.train_nerf(
|
||||
all_images,
|
||||
torch.from_numpy(current_poses).float().to(self.device),
|
||||
epochs=self.config.get("reconstruction", {}).get("epochs_per_iteration", 2000)
|
||||
)
|
||||
|
||||
# 每次迭代后提取网格,以便观察重建质量的改进
|
||||
iter_mesh_path = os.path.join(self.output_dir, f"mesh_iter_{iteration+1}.obj")
|
||||
self.extract_mesh(
|
||||
iter_mesh_path,
|
||||
resolution=self.config.get("reconstruction", {}).get("mesh_resolution", 256)
|
||||
)
|
||||
|
||||
# 提取最终的3D网格
|
||||
output_mesh_path = os.path.join(self.output_dir, "final_mesh.obj")
|
||||
self.extract_mesh(
|
||||
output_mesh_path,
|
||||
resolution=self.config.get("reconstruction", {}).get("mesh_resolution", 256)
|
||||
)
|
||||
|
||||
# 评估重建质量
|
||||
self.evaluate_reconstruction()
|
||||
|
||||
print("主动重建过程完成")
|
||||
return all_poses
|
||||
|
||||
def create_experiment(self, backup_name=None):
|
||||
return super().create_experiment(backup_name)
|
||||
|
||||
def load_experiment(self, backup_name=None):
|
||||
super().load_experiment(backup_name)
|
520
ref_code/active_reconstruction.py
Normal file
520
ref_code/active_reconstruction.py
Normal file
@ -0,0 +1,520 @@
|
||||
import torch
|
||||
import numpy as np
|
||||
import os
|
||||
import yaml
|
||||
import time
|
||||
from nerf_model import NeRF
|
||||
from pipeline import ActiveReconstructionPolicy
|
||||
from uncertainty_guide import UncertaintyGuideNeRF
|
||||
import argparse
|
||||
from typing import Dict, Any, List
|
||||
from utils.volume_render_util import VolumeRendererUtil
|
||||
import mcubes # 导入Python Marching Cubes库
|
||||
import trimesh # 处理网格
|
||||
from tqdm import tqdm # 进度条
|
||||
|
||||
class ActiveReconstruction:
|
||||
"""基于NeRF不确定性引导的主动3D重建系统"""
|
||||
|
||||
def __init__(self, config_path: str):
|
||||
"""
|
||||
初始化主动重建系统
|
||||
|
||||
参数:
|
||||
config_path: 配置文件路径
|
||||
"""
|
||||
# 加载配置
|
||||
with open(config_path, 'r') as f:
|
||||
self.config = yaml.safe_load(f)
|
||||
|
||||
# 设置设备
|
||||
self.device = torch.device(self.config.get("device", "cuda") if torch.cuda.is_available() else "cpu")
|
||||
print(f"使用设备: {self.device}")
|
||||
|
||||
# 创建输出目录
|
||||
self.output_dir = self.config.get("output_dir", "output")
|
||||
os.makedirs(self.output_dir, exist_ok=True)
|
||||
|
||||
# 初始化NeRF模型
|
||||
self._init_nerf_model()
|
||||
|
||||
# 初始化视图选择策略
|
||||
self.policy = ActiveReconstructionPolicy(self.config)
|
||||
|
||||
def _init_nerf_model(self):
|
||||
"""初始化NeRF模型"""
|
||||
# 从配置中获取NeRF参数
|
||||
nerf_config = self.config.get("nerf", {})
|
||||
model_config = {
|
||||
"pos_enc_dim": nerf_config.get("pos_enc_dim", 10),
|
||||
"dir_enc_dim": nerf_config.get("dir_enc_dim", 4),
|
||||
"netdepth_coarse": nerf_config.get("netdepth_coarse", 8),
|
||||
"netwidth_coarse": nerf_config.get("netwidth_coarse", 256),
|
||||
"netdepth_fine": nerf_config.get("netdepth_fine", 8),
|
||||
"netwidth_fine": nerf_config.get("netwidth_fine", 256),
|
||||
"skips": nerf_config.get("skips", [4]),
|
||||
"use_viewdirs": nerf_config.get("use_viewdirs", True)
|
||||
}
|
||||
self.nerf_model = NeRF(model_config).to(self.device)
|
||||
|
||||
def _generate_rays(self,
|
||||
poses: torch.Tensor,
|
||||
H: int,
|
||||
W: int,
|
||||
focal: float) -> tuple:
|
||||
"""
|
||||
为每个相机位姿生成光线
|
||||
|
||||
参数:
|
||||
poses: 相机位姿 [N, 4, 4]
|
||||
H: 图像高度
|
||||
W: 图像宽度
|
||||
focal: 焦距
|
||||
|
||||
返回:
|
||||
rays_o: 光线起点 [N, H*W, 3]
|
||||
rays_d: 光线方向 [N, H*W, 3]
|
||||
"""
|
||||
# 创建像素坐标网格
|
||||
i, j = torch.meshgrid(
|
||||
torch.linspace(0, W-1, W),
|
||||
torch.linspace(0, H-1, H),
|
||||
indexing='ij'
|
||||
)
|
||||
i = i.t() # [H, W]
|
||||
j = j.t() # [H, W]
|
||||
|
||||
# 转换为相机坐标系中的方向
|
||||
dirs = torch.stack([
|
||||
(i - W * 0.5) / focal,
|
||||
-(j - H * 0.5) / focal,
|
||||
-torch.ones_like(i)
|
||||
], dim=-1) # [H, W, 3]
|
||||
|
||||
# 为每个位姿生成光线
|
||||
rays_o_list = []
|
||||
rays_d_list = []
|
||||
|
||||
for pose in poses:
|
||||
# 转换光线方向到世界坐标系
|
||||
rays_d = torch.sum(dirs[..., None, :] * pose[:3, :3], dim=-1) # [H, W, 3]
|
||||
|
||||
# 设置光线原点
|
||||
rays_o = pose[:3, -1].expand(rays_d.shape) # [H, W, 3]
|
||||
|
||||
# 展平为批处理格式
|
||||
rays_o = rays_o.reshape(-1, 3) # [H*W, 3]
|
||||
rays_d = rays_d.reshape(-1, 3) # [H*W, 3]
|
||||
|
||||
rays_o_list.append(rays_o)
|
||||
rays_d_list.append(rays_d)
|
||||
|
||||
# 组合所有位姿的光线
|
||||
rays_o_all = torch.stack(rays_o_list, dim=0) # [N, H*W, 3]
|
||||
rays_d_all = torch.stack(rays_d_list, dim=0) # [N, H*W, 3]
|
||||
|
||||
return rays_o_all, rays_d_all
|
||||
|
||||
def _sample_pixel_batch(self,
|
||||
images: torch.Tensor,
|
||||
rays_o: torch.Tensor,
|
||||
rays_d: torch.Tensor,
|
||||
batch_size: int) -> tuple:
|
||||
"""
|
||||
随机采样像素批次
|
||||
|
||||
参数:
|
||||
images: 图像数据 [N, H, W, 3]
|
||||
rays_o: 光线起点 [N, H*W, 3]
|
||||
rays_d: 光线方向 [N, H*W, 3]
|
||||
batch_size: 批次大小
|
||||
|
||||
返回:
|
||||
sampled_rays_o: 采样的光线起点 [batch_size, 3]
|
||||
sampled_rays_d: 采样的光线方向 [batch_size, 3]
|
||||
sampled_pixels: 采样的像素值 [batch_size, 3]
|
||||
"""
|
||||
# 获取图像形状
|
||||
N = images.shape[0]
|
||||
H = images.shape[1]
|
||||
W = images.shape[2]
|
||||
total_rays = N * H * W
|
||||
|
||||
# 将图像展平
|
||||
pixels = images.reshape(N, -1, 3) # [N, H*W, 3]
|
||||
|
||||
# 随机选择批次
|
||||
indices = torch.randint(0, total_rays, size=(batch_size,))
|
||||
img_indices = indices // (H * W)
|
||||
pixel_indices = indices % (H * W)
|
||||
|
||||
# 采样光线和像素
|
||||
sampled_rays_o = torch.stack([rays_o[i, j] for i, j in zip(img_indices, pixel_indices)])
|
||||
sampled_rays_d = torch.stack([rays_d[i, j] for i, j in zip(img_indices, pixel_indices)])
|
||||
sampled_pixels = torch.stack([pixels[i, j] for i, j in zip(img_indices, pixel_indices)])
|
||||
|
||||
return sampled_rays_o, sampled_rays_d, sampled_pixels
|
||||
|
||||
def train_nerf(self,
|
||||
images: torch.Tensor,
|
||||
poses: torch.Tensor,
|
||||
epochs: int = 5000,
|
||||
batch_size: int = 4096,
|
||||
lr: float = 5e-4,
|
||||
start_from_model=None) -> float:
|
||||
"""
|
||||
训练NeRF模型
|
||||
|
||||
参数:
|
||||
images: 图像数据 [N, H, W, 3]
|
||||
poses: 相机位姿 [N, 4, 4]
|
||||
epochs: 训练轮数
|
||||
batch_size: 批量大小
|
||||
lr: 学习率
|
||||
start_from_model: 可选的初始模型状态
|
||||
|
||||
返回:
|
||||
final_loss: 最终损失值
|
||||
"""
|
||||
print(f"开始训练NeRF模型,使用{len(images)}张图像...")
|
||||
|
||||
# 获取图像和采样参数
|
||||
H, W = images.shape[1], images.shape[2]
|
||||
sampling_config = self.config.get("sampling", {})
|
||||
camera_config = self.config.get("camera", {})
|
||||
focal = camera_config.get("focal", 1000.0)
|
||||
near = camera_config.get("near", 2.0)
|
||||
far = camera_config.get("far", 6.0)
|
||||
coarse_samples = sampling_config.get("coarse_samples", 64)
|
||||
fine_samples = sampling_config.get("fine_samples", 128)
|
||||
perturb = sampling_config.get("perturb", True)
|
||||
|
||||
# 如果提供了初始模型,使用它
|
||||
if start_from_model is not None:
|
||||
print("从现有模型初始化权重")
|
||||
self.nerf_model.load_state_dict(start_from_model.state_dict())
|
||||
|
||||
# 设置优化器和损失函数
|
||||
optimizer = torch.optim.Adam(self.nerf_model.parameters(), lr=lr)
|
||||
mse_loss = torch.nn.MSELoss()
|
||||
|
||||
# 将模型设置为训练模式
|
||||
self.nerf_model.train()
|
||||
|
||||
# 为所有图像生成光线(预计算光线可以加速训练)
|
||||
rays_o, rays_d = self._generate_rays(poses, H, W, focal)
|
||||
rays_o = rays_o.to(self.device)
|
||||
rays_d = rays_d.to(self.device)
|
||||
images = images.to(self.device)
|
||||
|
||||
# 训练循环
|
||||
best_loss = float('inf')
|
||||
for epoch in range(epochs):
|
||||
# 随机采样一批光线
|
||||
batch_rays_o, batch_rays_d, target_pixels = self._sample_pixel_batch(
|
||||
images, rays_o, rays_d, batch_size)
|
||||
|
||||
# 光线方向归一化
|
||||
batch_rays_d = torch.nn.functional.normalize(batch_rays_d, dim=-1)
|
||||
|
||||
# 创建近平面和远平面张量
|
||||
near_tensor = torch.ones_like(batch_rays_o[..., 0]) * near
|
||||
far_tensor = torch.ones_like(batch_rays_o[..., 0]) * far
|
||||
|
||||
# 使用体积渲染进行前向传播
|
||||
# 首先进行粗采样渲染
|
||||
optimizer.zero_grad()
|
||||
|
||||
# 体积渲染
|
||||
rgb_map, _, _, _ = VolumeRendererUtil.render_rays(
|
||||
self.nerf_model,
|
||||
batch_rays_o,
|
||||
batch_rays_d,
|
||||
near_tensor,
|
||||
far_tensor,
|
||||
coarse_samples,
|
||||
fine_samples,
|
||||
perturb
|
||||
)
|
||||
|
||||
# 计算损失并反向传播
|
||||
loss = mse_loss(rgb_map, target_pixels)
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
|
||||
# 输出训练进度
|
||||
if (epoch + 1) % 100 == 0:
|
||||
psnr = -10.0 * torch.log10(loss)
|
||||
print(f"Epoch {epoch+1}/{epochs}, Loss: {loss.item():.6f}, PSNR: {psnr.item():.2f}")
|
||||
|
||||
# 保存最佳模型
|
||||
if loss.item() < best_loss:
|
||||
best_loss = loss.item()
|
||||
torch.save(self.nerf_model.state_dict(), os.path.join(self.output_dir, "best_model.pth"))
|
||||
|
||||
# 加载最佳模型
|
||||
self.nerf_model.load_state_dict(torch.load(os.path.join(self.output_dir, "best_model.pth")))
|
||||
|
||||
print(f"NeRF模型训练完成,最终损失: {best_loss:.6f}")
|
||||
return best_loss
|
||||
|
||||
def extract_mesh(self, output_path: str, resolution: int = 256, threshold: float = 50.0, bound: float = 2.0):
|
||||
"""
|
||||
从NeRF模型中提取3D网格,使用Marching Cubes算法
|
||||
|
||||
参数:
|
||||
output_path: 输出路径
|
||||
resolution: 体素网格分辨率
|
||||
threshold: 密度阈值,用于确定表面位置
|
||||
bound: 体素网格边界大小
|
||||
"""
|
||||
print(f"从NeRF提取3D网格,分辨率: {resolution}...")
|
||||
|
||||
# 设置网格提取参数
|
||||
self.nerf_model.eval() # 设置为评估模式
|
||||
|
||||
# 定义采样网格
|
||||
x = torch.linspace(-bound, bound, resolution)
|
||||
y = torch.linspace(-bound, bound, resolution)
|
||||
z = torch.linspace(-bound, bound, resolution)
|
||||
|
||||
# 创建采样点坐标网格
|
||||
xx, yy, zz = torch.meshgrid(x, y, z, indexing='ij')
|
||||
|
||||
# 准备查询点
|
||||
points = torch.stack([xx, yy, zz], dim=-1).reshape(-1, 3).to(self.device)
|
||||
|
||||
# 创建密度场
|
||||
print("正在计算体积密度场...")
|
||||
density_field = torch.zeros((resolution, resolution, resolution))
|
||||
|
||||
# 分批处理以避免显存溢出
|
||||
batch_size = 4096 # 根据GPU内存调整
|
||||
with torch.no_grad():
|
||||
for i in tqdm(range(0, points.shape[0], batch_size)):
|
||||
# 获取当前批次的点
|
||||
batch_points = points[i:i+batch_size]
|
||||
|
||||
# 计算密度 - 使用固定方向(这里使用+z方向)
|
||||
# 注意:在NeRF中,密度不依赖于视角方向,只有颜色依赖视角
|
||||
fixed_dirs = torch.zeros_like(batch_points)
|
||||
fixed_dirs[..., 2] = 1.0 # 设置为+z方向
|
||||
|
||||
# 使用fine网络进行推理
|
||||
sigma, _ = self.nerf_model(batch_points, fixed_dirs, coarse=False)
|
||||
|
||||
# 更新密度场
|
||||
batch_indices = torch.arange(i, min(i+batch_size, points.shape[0]))
|
||||
xyz_indices = torch.stack([
|
||||
(points[batch_indices, 0] + bound) / (2 * bound) * (resolution - 1),
|
||||
(points[batch_indices, 1] + bound) / (2 * bound) * (resolution - 1),
|
||||
(points[batch_indices, 2] + bound) / (2 * bound) * (resolution - 1)
|
||||
], dim=-1).long()
|
||||
|
||||
for j, (xi, yi, zi) in enumerate(xyz_indices):
|
||||
density_field[xi, yi, zi] = sigma[j].cpu()
|
||||
|
||||
# 使用Marching Cubes提取网格
|
||||
print("使用Marching Cubes提取网格...")
|
||||
density_field_np = density_field.cpu().numpy()
|
||||
vertices, triangles = mcubes.marching_cubes(density_field_np, threshold)
|
||||
|
||||
# 转换为正确的坐标系(视场的[-bound, bound]范围)
|
||||
vertices = vertices / (resolution - 1) * (2 * bound) - bound
|
||||
|
||||
# 创建trimesh对象
|
||||
mesh = trimesh.Trimesh(vertices=vertices, faces=triangles)
|
||||
|
||||
# 保存网格
|
||||
mesh.export(output_path)
|
||||
|
||||
print(f"网格提取完成,保存至: {output_path}")
|
||||
print(f"网格统计: {len(vertices)}个顶点, {len(triangles)}个三角面")
|
||||
|
||||
return mesh
|
||||
|
||||
def evaluate_reconstruction(self,
|
||||
gt_mesh_path: str = None) -> Dict[str, float]:
|
||||
"""
|
||||
评估重建质量
|
||||
|
||||
参数:
|
||||
gt_mesh_path: 真实网格路径(如果有)
|
||||
|
||||
返回:
|
||||
metrics: 评估指标,如F-score
|
||||
"""
|
||||
if gt_mesh_path is None:
|
||||
print("没有提供真实网格,跳过评估")
|
||||
return {}
|
||||
|
||||
print("评估重建质量...")
|
||||
|
||||
# 在实际实现中,这里应该有评估重建质量的代码
|
||||
# 通常使用F-score、Chamfer距离等指标
|
||||
|
||||
# 为了简化,我们返回模拟的指标
|
||||
metrics = {
|
||||
"f_score": 0.85,
|
||||
"precision": 0.87,
|
||||
"recall": 0.83
|
||||
}
|
||||
|
||||
print(f"评估结果: F-score={metrics['f_score']:.4f}, "
|
||||
f"精确率={metrics['precision']:.4f}, 召回率={metrics['recall']:.4f}")
|
||||
|
||||
return metrics
|
||||
|
||||
def run_active_reconstruction(self,
|
||||
initial_poses: np.ndarray,
|
||||
initial_images: torch.Tensor = None,
|
||||
max_iterations: int = 3) -> List[np.ndarray]:
|
||||
"""
|
||||
运行主动重建过程
|
||||
|
||||
参数:
|
||||
initial_poses: 初始相机位姿
|
||||
initial_images: 初始图像(如果有)
|
||||
max_iterations: 最大迭代次数
|
||||
|
||||
返回:
|
||||
selected_poses: 所有选定的相机位姿
|
||||
"""
|
||||
print("开始主动重建过程...")
|
||||
|
||||
# 初始训练,使用初始视图
|
||||
if initial_images is None:
|
||||
initial_images = self._simulate_image_capture(initial_poses)
|
||||
|
||||
# 使用初始图像训练模型
|
||||
self.train_nerf(
|
||||
initial_images,
|
||||
torch.from_numpy(initial_poses).float().to(self.device),
|
||||
epochs=self.config.get("reconstruction", {}).get("epochs_per_iteration", 2000)
|
||||
)
|
||||
|
||||
# 保存初始模型
|
||||
initial_model_path = os.path.join(self.output_dir, "initial_model.pth")
|
||||
torch.save(self.nerf_model.state_dict(), initial_model_path)
|
||||
initial_model = self.nerf_model.state_dict()
|
||||
|
||||
all_poses = initial_poses.copy()
|
||||
current_poses = initial_poses.copy()
|
||||
all_images = initial_images.clone()
|
||||
|
||||
# 提取初始网格
|
||||
initial_mesh_path = os.path.join(self.output_dir, "initial_mesh.obj")
|
||||
self.extract_mesh(
|
||||
initial_mesh_path,
|
||||
resolution=self.config.get("reconstruction", {}).get("mesh_resolution", 256)
|
||||
)
|
||||
|
||||
# 迭代执行主动重建
|
||||
for iteration in range(max_iterations):
|
||||
print(f"\n开始迭代 {iteration+1}/{max_iterations}")
|
||||
|
||||
# 选择下一批视角
|
||||
next_views = self.policy.select_next_views(self.nerf_model, current_poses)
|
||||
print(f"选择了 {len(next_views)} 个新视角")
|
||||
|
||||
# 采集新视角的图像
|
||||
new_images = self._simulate_image_capture(next_views)
|
||||
|
||||
# 将新选择的视角添加到当前位姿和图像中
|
||||
current_poses = np.concatenate([current_poses, next_views], axis=0)
|
||||
all_poses = np.concatenate([all_poses, next_views], axis=0)
|
||||
all_images = torch.cat([all_images, new_images], dim=0)
|
||||
|
||||
# 按照作者的描述,我们从初始模型重新初始化,而不是继续训练
|
||||
# "After selecting additional images, we initialize the network with the model from the initialization step and refine the model further with the updated training set."
|
||||
# 因此,我们先加载初始模型,然后用扩展的数据集重新训练
|
||||
self.nerf_model.load_state_dict(torch.load(initial_model_path))
|
||||
|
||||
# 用扩展的数据集重新训练模型
|
||||
self.train_nerf(
|
||||
all_images,
|
||||
torch.from_numpy(current_poses).float().to(self.device),
|
||||
epochs=self.config.get("reconstruction", {}).get("epochs_per_iteration", 2000)
|
||||
)
|
||||
|
||||
# 每次迭代后提取网格,以便观察重建质量的改进
|
||||
iter_mesh_path = os.path.join(self.output_dir, f"mesh_iter_{iteration+1}.obj")
|
||||
self.extract_mesh(
|
||||
iter_mesh_path,
|
||||
resolution=self.config.get("reconstruction", {}).get("mesh_resolution", 256)
|
||||
)
|
||||
|
||||
# 提取最终的3D网格
|
||||
output_mesh_path = os.path.join(self.output_dir, "final_mesh.obj")
|
||||
self.extract_mesh(
|
||||
output_mesh_path,
|
||||
resolution=self.config.get("reconstruction", {}).get("mesh_resolution", 256)
|
||||
)
|
||||
|
||||
# 评估重建质量
|
||||
self.evaluate_reconstruction()
|
||||
|
||||
print("主动重建过程完成")
|
||||
return all_poses
|
||||
|
||||
def _simulate_image_capture(self, poses: np.ndarray) -> torch.Tensor:
|
||||
"""
|
||||
模拟图像采集过程(实际系统中应该从相机或数据集获取)
|
||||
|
||||
参数:
|
||||
poses: 相机位姿
|
||||
|
||||
返回:
|
||||
images: 模拟的图像
|
||||
"""
|
||||
# 模拟图像大小
|
||||
camera_config = self.config.get("camera", {})
|
||||
H, W = camera_config.get("height", 800), camera_config.get("width", 800)
|
||||
|
||||
# 创建随机图像(实际应来自相机或渲染)
|
||||
images = torch.rand(len(poses), H, W, 3, device=self.device)
|
||||
|
||||
return images
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="基于NeRF不确定性的主动3D重建")
|
||||
parser.add_argument("--config", type=str, default="nbv_config.yaml", help="配置文件路径")
|
||||
parser.add_argument("--synthetic", action="store_true", help="使用合成数据集")
|
||||
args = parser.parse_args()
|
||||
|
||||
# 创建主动重建系统
|
||||
reconstruction = ActiveReconstruction(args.config)
|
||||
|
||||
# 初始化一些相机位姿(通常来自中心圆环)
|
||||
# 根据配置获取初始位姿数量
|
||||
config = yaml.safe_load(open(args.config, 'r'))
|
||||
initial_view_count = config.get("reconstruction", {}).get("initial_view_count", 15)
|
||||
|
||||
# 根据数据集类型调整初始视图数量
|
||||
if args.synthetic:
|
||||
initial_view_count = min(initial_view_count, 6) # 合成数据使用6个初始视图
|
||||
print(f"使用合成数据集,初始视图数量: {initial_view_count}")
|
||||
else:
|
||||
print(f"使用真实数据集,初始视图数量: {initial_view_count}")
|
||||
|
||||
# 获取中间圆环上的相机位姿
|
||||
# 假设poses是按圆环组织的,我们选择中间圆环的部分位姿
|
||||
middle_circle_index = config.get("view_selection", {}).get("n_circles", 5) // 2
|
||||
poses_per_circle = config.get("view_selection", {}).get("n_poses_per_circle", 30)
|
||||
|
||||
# 等距选择初始位姿
|
||||
start_index = middle_circle_index * poses_per_circle
|
||||
step = poses_per_circle // initial_view_count
|
||||
initial_pose_indices = [start_index + i * step for i in range(initial_view_count)]
|
||||
initial_poses = reconstruction.policy.poses[initial_pose_indices]
|
||||
|
||||
# 运行主动重建
|
||||
selected_poses = reconstruction.run_active_reconstruction(
|
||||
initial_poses,
|
||||
max_iterations=config.get("reconstruction", {}).get("max_iterations", 3)
|
||||
)
|
||||
|
||||
print(f"主动重建完成,共选择了{len(selected_poses)}个相机位姿")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
52
ref_code/nbv_config.yaml
Normal file
52
ref_code/nbv_config.yaml
Normal file
@ -0,0 +1,52 @@
|
||||
# 主动重建系统配置
|
||||
|
||||
# 基本设置
|
||||
device: cuda # 使用的设备: cuda 或 cpu
|
||||
output_dir: ./outputs/nbv_reconstruction # 输出目录
|
||||
seed: 42 # 随机数种子
|
||||
|
||||
# 数据设置
|
||||
data:
|
||||
dataset_type: synthetic # 数据集类型: synthetic 或 real
|
||||
synthetic_dir: ./data/synthetic/ # 合成数据目录
|
||||
real_dir: ./data/real/ # 真实数据目录
|
||||
|
||||
# NeRF模型设置
|
||||
nerf:
|
||||
pos_enc_dim: 10 # 位置编码维度
|
||||
dir_enc_dim: 4 # 方向编码维度
|
||||
hidden_dim: 256 # 隐藏层维度(兼容旧配置)
|
||||
# 网络结构设置
|
||||
netdepth_coarse: 8 # coarse网络深度
|
||||
netwidth_coarse: 256 # coarse网络宽度
|
||||
netdepth_fine: 8 # fine网络深度
|
||||
netwidth_fine: 256 # fine网络宽度
|
||||
skips: [4] # 跳跃连接层
|
||||
use_viewdirs: true # 是否使用视角方向信息
|
||||
|
||||
# 相机设置
|
||||
camera:
|
||||
width: 800 # 图像宽度
|
||||
height: 800 # 图像高度
|
||||
focal: 1000.0 # 焦距
|
||||
near: 2.0 # 近平面距离
|
||||
far: 6.0 # 远平面距离
|
||||
|
||||
# 采样设置
|
||||
sampling:
|
||||
coarse_samples: 64 # 粗采样点数
|
||||
fine_samples: 128 # 精细采样点数
|
||||
perturb: True # 是否添加噪声
|
||||
|
||||
# 重建设置
|
||||
reconstruction:
|
||||
max_iterations: 3 # 最大迭代次数
|
||||
initial_view_count: 15 # 初始视图数量
|
||||
epochs_per_iteration: 2000 # 每次迭代的训练轮数
|
||||
mesh_resolution: 256 # 网格提取分辨率
|
||||
|
||||
# 视图选择策略设置
|
||||
view_selection:
|
||||
n_circles: 5 # 半球上的环数
|
||||
n_poses_per_circle: 30 # 每个环上的位姿数
|
||||
distance_threshold: 0.1 # 视图距离阈值
|
182
ref_code/nerf_model.py
Normal file
182
ref_code/nerf_model.py
Normal file
@ -0,0 +1,182 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
from PytorchBoot.stereotype import stereotype
|
||||
|
||||
@stereotype.module("nerf")
|
||||
class NeRF(nn.Module):
|
||||
def __init__(self, config):
|
||||
super().__init__()
|
||||
self.config = config
|
||||
|
||||
# 读取位置和方向编码维度
|
||||
pos_enc_out = 3 * (2 * config["pos_enc_dim"] + 1)
|
||||
dir_enc_out = 3 * (2 * config["dir_enc_dim"] + 1)
|
||||
|
||||
# 读取网络深度和宽度(可配置)
|
||||
netdepth_coarse = config.get("netdepth_coarse", 8)
|
||||
netwidth_coarse = config.get("netwidth_coarse", 256)
|
||||
netdepth_fine = config.get("netdepth_fine", 8)
|
||||
netwidth_fine = config.get("netwidth_fine", 256)
|
||||
|
||||
# 构建跳跃连接
|
||||
skips = config.get("skips", [4])
|
||||
|
||||
# 是否使用视角方向
|
||||
self.use_viewdirs = config.get("use_viewdirs", True)
|
||||
|
||||
# 构建coarse和fine网络
|
||||
if self.use_viewdirs:
|
||||
# 位置编码 -> 密度 + 特征
|
||||
self.pts_linears_coarse = self._build_pts_mlp(
|
||||
input_dim=pos_enc_out,
|
||||
width=netwidth_coarse,
|
||||
depth=netdepth_coarse,
|
||||
skips=skips
|
||||
)
|
||||
self.alpha_linear_coarse = nn.Linear(netwidth_coarse, 1)
|
||||
self.feature_linear_coarse = nn.Linear(netwidth_coarse, netwidth_coarse)
|
||||
|
||||
# 特征 + 方向编码 -> RGB
|
||||
self.views_linears_coarse = nn.ModuleList([
|
||||
nn.Linear(netwidth_coarse + dir_enc_out, netwidth_coarse//2)
|
||||
])
|
||||
self.rgb_linear_coarse = nn.Linear(netwidth_coarse//2, 3)
|
||||
|
||||
# 对fine网络执行相同的操作
|
||||
self.pts_linears_fine = self._build_pts_mlp(
|
||||
input_dim=pos_enc_out,
|
||||
width=netwidth_fine,
|
||||
depth=netdepth_fine,
|
||||
skips=skips
|
||||
)
|
||||
self.alpha_linear_fine = nn.Linear(netwidth_fine, 1)
|
||||
self.feature_linear_fine = nn.Linear(netwidth_fine, netwidth_fine)
|
||||
|
||||
self.views_linears_fine = nn.ModuleList([
|
||||
nn.Linear(netwidth_fine + dir_enc_out, netwidth_fine//2)
|
||||
])
|
||||
self.rgb_linear_fine = nn.Linear(netwidth_fine//2, 3)
|
||||
else:
|
||||
# 不使用视角方向的简化版本
|
||||
self.pts_linears_coarse = self._build_pts_mlp(
|
||||
input_dim=pos_enc_out,
|
||||
width=netwidth_coarse,
|
||||
depth=netdepth_coarse,
|
||||
skips=skips
|
||||
)
|
||||
self.output_linear_coarse = nn.Linear(netwidth_coarse, 4)
|
||||
|
||||
self.pts_linears_fine = self._build_pts_mlp(
|
||||
input_dim=pos_enc_out,
|
||||
width=netwidth_fine,
|
||||
depth=netdepth_fine,
|
||||
skips=skips
|
||||
)
|
||||
self.output_linear_fine = nn.Linear(netwidth_fine, 4)
|
||||
|
||||
def _build_pts_mlp(self, input_dim, width, depth, skips):
|
||||
"""构建处理位置编码的MLP网络,支持跳跃连接"""
|
||||
layers = nn.ModuleList()
|
||||
|
||||
# 第一层
|
||||
layers.append(nn.Linear(input_dim, width))
|
||||
|
||||
# 中间层
|
||||
for i in range(1, depth):
|
||||
if i in skips:
|
||||
layers.append(nn.Linear(input_dim + width, width))
|
||||
else:
|
||||
layers.append(nn.Linear(width, width))
|
||||
|
||||
return layers
|
||||
|
||||
def positional_encoding(self, x, L):
|
||||
"""位置编码函数"""
|
||||
encodings = [x]
|
||||
for i in range(L):
|
||||
encodings.append(torch.sin(2**i * x))
|
||||
encodings.append(torch.cos(2**i * x))
|
||||
return torch.cat(encodings, dim=-1)
|
||||
|
||||
def forward_mlp(self, pts_embed, viewdirs_embed, is_coarse=True):
|
||||
"""前向传播MLP部分"""
|
||||
if is_coarse:
|
||||
pts_linears = self.pts_linears_coarse
|
||||
alpha_linear = self.alpha_linear_coarse if self.use_viewdirs else None
|
||||
feature_linear = self.feature_linear_coarse if self.use_viewdirs else None
|
||||
views_linears = self.views_linears_coarse if self.use_viewdirs else None
|
||||
rgb_linear = self.rgb_linear_coarse if self.use_viewdirs else None
|
||||
output_linear = self.output_linear_coarse if not self.use_viewdirs else None
|
||||
else:
|
||||
pts_linears = self.pts_linears_fine
|
||||
alpha_linear = self.alpha_linear_fine if self.use_viewdirs else None
|
||||
feature_linear = self.feature_linear_fine if self.use_viewdirs else None
|
||||
views_linears = self.views_linears_fine if self.use_viewdirs else None
|
||||
rgb_linear = self.rgb_linear_fine if self.use_viewdirs else None
|
||||
output_linear = self.output_linear_fine if not self.use_viewdirs else None
|
||||
|
||||
# 位置编码处理
|
||||
h = pts_embed
|
||||
for i, l in enumerate(pts_linears):
|
||||
h = pts_linears[i](h)
|
||||
h = F.relu(h)
|
||||
# 处理跳跃连接
|
||||
if i in self.config.get("skips", [4]):
|
||||
h = torch.cat([pts_embed, h], -1)
|
||||
|
||||
if self.use_viewdirs:
|
||||
# 分支1:计算sigma
|
||||
sigma = alpha_linear(h)
|
||||
|
||||
# 分支2:计算颜色特征
|
||||
feature = feature_linear(h)
|
||||
|
||||
# 结合方向编码
|
||||
h = torch.cat([feature, viewdirs_embed], -1)
|
||||
|
||||
# 视角相关MLP
|
||||
for i, l in enumerate(views_linears):
|
||||
h = l(h)
|
||||
h = F.relu(h)
|
||||
|
||||
# 输出RGB
|
||||
rgb = rgb_linear(h)
|
||||
rgb = torch.sigmoid(rgb) # [0,1]范围
|
||||
|
||||
outputs = torch.cat([rgb, sigma], -1)
|
||||
else:
|
||||
# 直接输出RGBA
|
||||
outputs = output_linear(h)
|
||||
rgb = torch.sigmoid(outputs[..., :3]) # [0,1]范围
|
||||
sigma = outputs[..., 3:]
|
||||
|
||||
return rgb, sigma
|
||||
|
||||
def forward(self, pos, dir, coarse=True):
|
||||
"""
|
||||
前向传播
|
||||
|
||||
参数:
|
||||
pos: 3D位置 [batch_size, ..., 3]
|
||||
dir: 视角方向 [batch_size, ..., 3]
|
||||
coarse: 是否使用coarse网络
|
||||
|
||||
返回:
|
||||
sigma: 体积密度 [batch_size, ..., 1]
|
||||
color: RGB颜色 [batch_size, ..., 3]
|
||||
"""
|
||||
# 位置和方向编码
|
||||
pos_enc = self.positional_encoding(pos, self.config["pos_enc_dim"])
|
||||
|
||||
# 当使用视角方向时才编码方向
|
||||
if self.use_viewdirs:
|
||||
dir_normalized = F.normalize(dir, dim=-1)
|
||||
dir_enc = self.positional_encoding(dir_normalized, self.config["dir_enc_dim"])
|
||||
else:
|
||||
dir_enc = None
|
||||
|
||||
# 选择使用coarse还是fine网络
|
||||
color, sigma = self.forward_mlp(pos_enc, dir_enc, coarse)
|
||||
|
||||
return sigma, color
|
126
ref_code/pipeline.py
Normal file
126
ref_code/pipeline.py
Normal file
@ -0,0 +1,126 @@
|
||||
import numpy as np
|
||||
import torch
|
||||
from scipy.spatial.transform import Rotation as R
|
||||
from uncertainty_guide import UncertaintyGuideNeRF
|
||||
|
||||
class ActiveReconstructionPolicy:
|
||||
def __init__(self, config):
|
||||
self.config = config
|
||||
self._setup_view_sphere()
|
||||
self.uncertainty_guide = UncertaintyGuideNeRF(config)
|
||||
|
||||
def _setup_view_sphere(self):
|
||||
"""初始化半球相机位姿 (5个圆环 x 30个位姿)"""
|
||||
self.poses = []
|
||||
radii = np.linspace(0.1, np.pi/2, self.config.n_circles) # 半球上的半径
|
||||
|
||||
for r in radii:
|
||||
for theta in np.linspace(0, 2*np.pi, self.config.n_poses_per_circle, endpoint=False):
|
||||
# 球坐标转笛卡尔坐标
|
||||
x = np.cos(theta) * np.sin(r)
|
||||
y = np.sin(theta) * np.sin(r)
|
||||
z = np.cos(r)
|
||||
position = np.array([x, y, z]) * 2.0 # 缩放因子
|
||||
|
||||
# 相机朝向原点
|
||||
forward = -position / np.linalg.norm(position)
|
||||
up = np.array([0, 0, 1])
|
||||
right = np.cross(up, forward)
|
||||
up = np.cross(forward, right)
|
||||
|
||||
# 构建位姿矩阵
|
||||
pose = np.eye(4)
|
||||
pose[:3, :3] = np.stack([right, up, forward], axis=-1)
|
||||
pose[:3, 3] = position
|
||||
self.poses.append(pose)
|
||||
|
||||
self.poses = np.stack(self.poses)
|
||||
|
||||
# 区域聚类: 将半球分为12个区域 (上下半球各6个)
|
||||
self.section_masks = self._create_section_masks()
|
||||
|
||||
def _create_section_masks(self):
|
||||
"""创建12个区域的掩码"""
|
||||
masks = []
|
||||
angles = np.arctan2(self.poses[:, 1, 3], self.poses[:, 0, 3]) # 方位角
|
||||
|
||||
# 上下半球 (z坐标正负)
|
||||
upper = self.poses[:, 2, 3] > 0
|
||||
lower = ~upper
|
||||
|
||||
# 每个半球分6个区域
|
||||
angle_bins = np.linspace(-np.pi, np.pi, 7) # 6个区域需要7个边界
|
||||
for i in range(6):
|
||||
angle_mask = (angles >= angle_bins[i]) & (angles < angle_bins[i+1])
|
||||
masks.append(angle_mask & upper)
|
||||
masks.append(angle_mask & lower)
|
||||
|
||||
return masks
|
||||
|
||||
def select_next_views(self, nerf_model, current_poses):
|
||||
"""根据熵值选择下一个最佳视角
|
||||
|
||||
参数:
|
||||
nerf_model: 当前的NeRF模型
|
||||
current_poses: 已经采集的相机位姿
|
||||
|
||||
返回:
|
||||
selected_poses: 选择的下一批相机位姿
|
||||
"""
|
||||
# 排除已选视角
|
||||
current_positions = current_poses[:, :3, 3]
|
||||
all_positions = self.poses[:, :3, 3]
|
||||
distance_matrix = np.linalg.norm(
|
||||
current_positions[:, None] - all_positions[None], axis=-1)
|
||||
min_distances = np.min(distance_matrix, axis=0)
|
||||
valid_mask = min_distances > 0.1 # 避免选择太近的视角
|
||||
|
||||
# 评估候选视图的不确定性
|
||||
valid_poses = self.poses[valid_mask]
|
||||
entropy_values = self.uncertainty_guide.evaluate_candidate_views(nerf_model, valid_poses)
|
||||
|
||||
# 从每个区域选择熵最高的有效视角
|
||||
selected_indices = []
|
||||
for mask in self.section_masks:
|
||||
# 调整mask以适应有效视角的筛选
|
||||
section_mask = mask[valid_mask]
|
||||
if not np.any(section_mask):
|
||||
continue
|
||||
|
||||
section_entropy = entropy_values.copy()
|
||||
section_entropy[~section_mask] = -np.inf
|
||||
selected_idx = np.argmax(section_entropy)
|
||||
|
||||
# 转换回原始索引
|
||||
original_indices = np.where(valid_mask)[0]
|
||||
original_idx = original_indices[selected_idx]
|
||||
selected_indices.append(original_idx)
|
||||
|
||||
return self.poses[selected_indices]
|
||||
|
||||
def coarse_to_fine_reconstruction(self, nerf_model, initial_poses, max_iterations=3):
|
||||
"""执行从粗到精的重建过程
|
||||
|
||||
参数:
|
||||
nerf_model: 初始NeRF模型
|
||||
initial_poses: 初始相机位姿
|
||||
max_iterations: 最大迭代次数
|
||||
|
||||
返回:
|
||||
all_selected_poses: 所有选择的相机位姿(包括初始位姿)
|
||||
"""
|
||||
all_selected_poses = initial_poses.copy()
|
||||
current_poses = initial_poses.copy()
|
||||
|
||||
for iteration in range(max_iterations):
|
||||
# 选择下一批视角
|
||||
next_views = self.select_next_views(nerf_model, current_poses)
|
||||
|
||||
# 将新选择的视角添加到当前位姿中
|
||||
current_poses = np.concatenate([current_poses, next_views], axis=0)
|
||||
all_selected_poses = np.concatenate([all_selected_poses, next_views], axis=0)
|
||||
|
||||
# 这里应该有一个重新训练模型的步骤
|
||||
# 但这通常在外部完成,我们只返回选定的位姿
|
||||
|
||||
return all_selected_poses
|
170
ref_code/uncertainty_guide.py
Normal file
170
ref_code/uncertainty_guide.py
Normal file
@ -0,0 +1,170 @@
|
||||
import torch
|
||||
import numpy as np
|
||||
from utils.volume_render_util import VolumeRendererUtil
|
||||
import torch.nn.functional as F
|
||||
from typing import Tuple, List, Dict, Any, Optional
|
||||
|
||||
class UncertaintyGuideNeRF:
|
||||
"""
|
||||
基于NeRF不确定性的主动视图选择策略
|
||||
通过计算视图的熵值来引导下一步的最优视图选择
|
||||
"""
|
||||
|
||||
def __init__(self, config: Dict[str, Any]):
|
||||
"""
|
||||
初始化不确定性引导策略
|
||||
|
||||
参数:
|
||||
config: 配置字典,包含相关参数
|
||||
"""
|
||||
self.config = config
|
||||
self.device = torch.device(config.get("device", "cuda") if torch.cuda.is_available() else "cpu")
|
||||
|
||||
# 相机参数
|
||||
self.width = config.get("width", 800)
|
||||
self.height = config.get("height", 800)
|
||||
self.focal = config.get("focal", 1000.0)
|
||||
|
||||
# 采样参数
|
||||
self.near = config.get("near", 2.0)
|
||||
self.far = config.get("far", 6.0)
|
||||
self.coarse_samples = config.get("coarse_samples", 64)
|
||||
self.fine_samples = config.get("fine_samples", 128)
|
||||
|
||||
def generate_rays(self, pose: np.ndarray) -> Tuple[torch.Tensor, torch.Tensor]:
|
||||
"""
|
||||
从相机姿态生成光线
|
||||
|
||||
参数:
|
||||
pose: 相机姿态矩阵 [4, 4]
|
||||
|
||||
返回:
|
||||
rays_o: 光线起点 [H*W, 3]
|
||||
rays_d: 光线方向 [H*W, 3]
|
||||
"""
|
||||
# 创建像素坐标
|
||||
i, j = torch.meshgrid(
|
||||
torch.linspace(0, self.width - 1, self.width),
|
||||
torch.linspace(0, self.height - 1, self.height),
|
||||
indexing='ij'
|
||||
)
|
||||
i = i.t().to(self.device)
|
||||
j = j.t().to(self.device)
|
||||
|
||||
# 转换为相机坐标系中的方向
|
||||
dirs = torch.stack([
|
||||
(i - self.width * 0.5) / self.focal,
|
||||
-(j - self.height * 0.5) / self.focal,
|
||||
-torch.ones_like(i)
|
||||
], dim=-1)
|
||||
|
||||
# 转换为世界坐标系
|
||||
pose = torch.from_numpy(pose).float().to(self.device)
|
||||
rays_d = torch.sum(dirs[..., None, :] * pose[:3, :3], dim=-1)
|
||||
rays_o = pose[:3, -1].expand(rays_d.shape)
|
||||
|
||||
# 展平为批处理格式
|
||||
rays_o = rays_o.reshape(-1, 3)
|
||||
rays_d = rays_d.reshape(-1, 3)
|
||||
|
||||
return rays_o, rays_d
|
||||
|
||||
def evaluate_view_uncertainty(self,
|
||||
nerf_model: torch.nn.Module,
|
||||
pose: np.ndarray) -> float:
|
||||
"""
|
||||
评估给定视图的不确定性(熵)
|
||||
|
||||
参数:
|
||||
nerf_model: NeRF模型
|
||||
pose: 相机姿态矩阵 [4, 4]
|
||||
|
||||
返回:
|
||||
mean_entropy: 该视图的平均熵值
|
||||
"""
|
||||
nerf_model.eval()
|
||||
with torch.no_grad():
|
||||
# 生成光线
|
||||
rays_o, rays_d = self.generate_rays(pose)
|
||||
|
||||
# 对于较大的图像,可能需要分批处理
|
||||
batch_size = 4096 # 根据GPU内存调整
|
||||
entropy_values = []
|
||||
|
||||
# 分批处理所有光线
|
||||
for i in range(0, rays_o.shape[0], batch_size):
|
||||
batch_rays_o = rays_o[i:i+batch_size]
|
||||
batch_rays_d = rays_d[i:i+batch_size]
|
||||
|
||||
# 归一化方向向量
|
||||
batch_rays_d = F.normalize(batch_rays_d, dim=-1)
|
||||
|
||||
# 计算近平面和远平面
|
||||
near = torch.ones_like(batch_rays_o[..., 0]) * self.near
|
||||
far = torch.ones_like(batch_rays_o[..., 0]) * self.far
|
||||
|
||||
# 渲染光线并计算熵
|
||||
_, weights, _, entropy = VolumeRendererUtil.render_rays(
|
||||
nerf_model,
|
||||
batch_rays_o,
|
||||
batch_rays_d,
|
||||
near,
|
||||
far,
|
||||
self.coarse_samples,
|
||||
self.fine_samples
|
||||
)
|
||||
|
||||
entropy_values.append(entropy)
|
||||
|
||||
# 组合所有批次的熵值
|
||||
all_entropy = torch.cat(entropy_values, dim=0)
|
||||
|
||||
# 重塑为图像格式并计算平均值
|
||||
mean_entropy = all_entropy.mean().item()
|
||||
|
||||
return mean_entropy
|
||||
|
||||
def evaluate_candidate_views(self,
|
||||
nerf_model: torch.nn.Module,
|
||||
candidate_poses: np.ndarray) -> np.ndarray:
|
||||
"""
|
||||
评估候选视图的不确定性(熵)
|
||||
|
||||
参数:
|
||||
nerf_model: NeRF模型
|
||||
candidate_poses: 候选相机姿态矩阵列表 [N, 4, 4]
|
||||
|
||||
返回:
|
||||
entropy_values: 各候选视图的熵值 [N]
|
||||
"""
|
||||
entropy_values = np.zeros(len(candidate_poses))
|
||||
|
||||
for i, pose in enumerate(candidate_poses):
|
||||
entropy_values[i] = self.evaluate_view_uncertainty(nerf_model, pose)
|
||||
|
||||
return entropy_values
|
||||
|
||||
def downsample_image(self, rays_o, rays_d, factor=4):
|
||||
"""
|
||||
降采样光线以加速处理
|
||||
|
||||
参数:
|
||||
rays_o: 光线起点 [H*W, 3]
|
||||
rays_d: 光线方向 [H*W, 3]
|
||||
factor: 降采样因子
|
||||
|
||||
返回:
|
||||
downsampled_rays_o: 降采样后的光线起点
|
||||
downsampled_rays_d: 降采样后的光线方向
|
||||
"""
|
||||
# 重塑为图像格式
|
||||
H = W = int(np.sqrt(rays_o.shape[0]))
|
||||
rays_o = rays_o.reshape(H, W, 3)
|
||||
rays_d = rays_d.reshape(H, W, 3)
|
||||
|
||||
# 降采样
|
||||
new_H, new_W = H // factor, W // factor
|
||||
downsampled_rays_o = rays_o[::factor, ::factor].reshape(-1, 3)
|
||||
downsampled_rays_d = rays_d[::factor, ::factor].reshape(-1, 3)
|
||||
|
||||
return downsampled_rays_o, downsampled_rays_d
|
166
utils/pose_util.py
Normal file
166
utils/pose_util.py
Normal file
@ -0,0 +1,166 @@
|
||||
import numpy as np
|
||||
|
||||
class PoseUtil:
|
||||
ROTATION = 1
|
||||
TRANSLATION = 2
|
||||
SCALE = 3
|
||||
|
||||
@staticmethod
|
||||
def get_uniform_translation(trans_m_min, trans_m_max, trans_unit, debug=False):
|
||||
if isinstance(trans_m_min, list):
|
||||
x_min, y_min, z_min = trans_m_min
|
||||
x_max, y_max, z_max = trans_m_max
|
||||
else:
|
||||
x_min, y_min, z_min = trans_m_min, trans_m_min, trans_m_min
|
||||
x_max, y_max, z_max = trans_m_max, trans_m_max, trans_m_max
|
||||
|
||||
x = np.random.uniform(x_min, x_max)
|
||||
y = np.random.uniform(y_min, y_max)
|
||||
z = np.random.uniform(z_min, z_max)
|
||||
translation = np.array([x, y, z])
|
||||
if trans_unit == "cm":
|
||||
translation = translation / 100
|
||||
if debug:
|
||||
print("uniform translation:", translation)
|
||||
return translation
|
||||
|
||||
@staticmethod
|
||||
def get_uniform_rotation(rot_degree_min=0, rot_degree_max=180, debug=False):
|
||||
axis = np.random.randn(3)
|
||||
axis /= np.linalg.norm(axis)
|
||||
theta = np.random.uniform(
|
||||
rot_degree_min / 180 * np.pi, rot_degree_max / 180 * np.pi
|
||||
)
|
||||
|
||||
K = np.array(
|
||||
[[0, -axis[2], axis[1]], [axis[2], 0, -axis[0]], [-axis[1], axis[0], 0]]
|
||||
)
|
||||
R = np.eye(3) + np.sin(theta) * K + (1 - np.cos(theta)) * (K @ K)
|
||||
if debug:
|
||||
print("uniform rotation:", theta * 180 / np.pi)
|
||||
return R
|
||||
|
||||
@staticmethod
|
||||
def get_uniform_pose(
|
||||
trans_min, trans_max, rot_min=0, rot_max=180, trans_unit="cm", debug=False
|
||||
):
|
||||
translation = PoseUtil.get_uniform_translation(
|
||||
trans_min, trans_max, trans_unit, debug
|
||||
)
|
||||
rotation = PoseUtil.get_uniform_rotation(rot_min, rot_max, debug)
|
||||
pose = np.eye(4)
|
||||
pose[:3, :3] = rotation
|
||||
pose[:3, 3] = translation
|
||||
return pose
|
||||
|
||||
@staticmethod
|
||||
def get_n_uniform_pose(
|
||||
trans_min,
|
||||
trans_max,
|
||||
rot_min=0,
|
||||
rot_max=180,
|
||||
n=1,
|
||||
trans_unit="cm",
|
||||
fix=None,
|
||||
contain_canonical=True,
|
||||
debug=False,
|
||||
):
|
||||
if fix == PoseUtil.ROTATION:
|
||||
translations = np.zeros((n, 3))
|
||||
for i in range(n):
|
||||
translations[i] = PoseUtil.get_uniform_translation(
|
||||
trans_min, trans_max, trans_unit, debug
|
||||
)
|
||||
if contain_canonical:
|
||||
translations[0] = np.zeros(3)
|
||||
rotations = PoseUtil.get_uniform_rotation(rot_min, rot_max, debug)
|
||||
elif fix == PoseUtil.TRANSLATION:
|
||||
rotations = np.zeros((n, 3, 3))
|
||||
for i in range(n):
|
||||
rotations[i] = PoseUtil.get_uniform_rotation(rot_min, rot_max, debug)
|
||||
if contain_canonical:
|
||||
rotations[0] = np.eye(3)
|
||||
translations = PoseUtil.get_uniform_translation(
|
||||
trans_min, trans_max, trans_unit, debug
|
||||
)
|
||||
else:
|
||||
translations = np.zeros((n, 3))
|
||||
rotations = np.zeros((n, 3, 3))
|
||||
for i in range(n):
|
||||
translations[i] = PoseUtil.get_uniform_translation(
|
||||
trans_min, trans_max, trans_unit, debug
|
||||
)
|
||||
for i in range(n):
|
||||
rotations[i] = PoseUtil.get_uniform_rotation(rot_min, rot_max, debug)
|
||||
if contain_canonical:
|
||||
translations[0] = np.zeros(3)
|
||||
rotations[0] = np.eye(3)
|
||||
|
||||
pose = np.eye(4, 4, k=0)[np.newaxis, :].repeat(n, axis=0)
|
||||
pose[:, :3, :3] = rotations
|
||||
pose[:, :3, 3] = translations
|
||||
|
||||
return pose
|
||||
|
||||
@staticmethod
|
||||
def get_n_uniform_pose_batch(
|
||||
trans_min,
|
||||
trans_max,
|
||||
rot_min=0,
|
||||
rot_max=180,
|
||||
n=1,
|
||||
batch_size=1,
|
||||
trans_unit="cm",
|
||||
fix=None,
|
||||
contain_canonical=False,
|
||||
debug=False,
|
||||
):
|
||||
|
||||
batch_poses = []
|
||||
for i in range(batch_size):
|
||||
pose = PoseUtil.get_n_uniform_pose(
|
||||
trans_min,
|
||||
trans_max,
|
||||
rot_min,
|
||||
rot_max,
|
||||
n,
|
||||
trans_unit,
|
||||
fix,
|
||||
contain_canonical,
|
||||
debug,
|
||||
)
|
||||
batch_poses.append(pose)
|
||||
pose_batch = np.stack(batch_poses, axis=0)
|
||||
return pose_batch
|
||||
|
||||
@staticmethod
|
||||
def get_uniform_scale(scale_min, scale_max, debug=False):
|
||||
if isinstance(scale_min, list):
|
||||
x_min, y_min, z_min = scale_min
|
||||
x_max, y_max, z_max = scale_max
|
||||
else:
|
||||
x_min, y_min, z_min = scale_min, scale_min, scale_min
|
||||
x_max, y_max, z_max = scale_max, scale_max, scale_max
|
||||
|
||||
x = np.random.uniform(x_min, x_max)
|
||||
y = np.random.uniform(y_min, y_max)
|
||||
z = np.random.uniform(z_min, z_max)
|
||||
scale = np.array([x, y, z])
|
||||
if debug:
|
||||
print("uniform scale:", scale)
|
||||
return scale
|
||||
|
||||
@staticmethod
|
||||
def rotation_matrix_from_axis_angle(axis, angle):
|
||||
cos_angle = np.cos(angle)
|
||||
sin_angle = np.sin(angle)
|
||||
one_minus_cos = 1 - cos_angle
|
||||
|
||||
x, y, z = axis
|
||||
rotation_matrix = np.array([
|
||||
[cos_angle + x*x*one_minus_cos, x*y*one_minus_cos - z*sin_angle, x*z*one_minus_cos + y*sin_angle],
|
||||
[y*x*one_minus_cos + z*sin_angle, cos_angle + y*y*one_minus_cos, y*z*one_minus_cos - x*sin_angle],
|
||||
[z*x*one_minus_cos - y*sin_angle, z*y*one_minus_cos + x*sin_angle, cos_angle + z*z*one_minus_cos]
|
||||
])
|
||||
|
||||
return rotation_matrix
|
42
utils/view_util.py
Normal file
42
utils/view_util.py
Normal file
@ -0,0 +1,42 @@
|
||||
|
||||
import os
|
||||
import shutil
|
||||
import json
|
||||
import subprocess
|
||||
import tempfile
|
||||
|
||||
|
||||
class ViewRenderUtil:
|
||||
blender_path = r"C:\Program Files\Blender Foundation\Blender 4.0\blender.exe"
|
||||
@staticmethod
|
||||
def render_view(cam_pose, scene_path, script_path):
|
||||
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
params = {
|
||||
"cam_pose": cam_pose.tolist(),
|
||||
"scene_path": scene_path
|
||||
}
|
||||
scene_info_path = os.path.join(scene_path, "scene_info.json")
|
||||
shutil.copy(scene_info_path, os.path.join(temp_dir, "scene_info.json"))
|
||||
params_data_path = os.path.join(temp_dir, "params.json")
|
||||
with open(params_data_path, 'w') as f:
|
||||
json.dump(params, f)
|
||||
import ipdb; ipdb.set_trace()
|
||||
result = subprocess.run([
|
||||
ViewRenderUtil.blender_path, '-b', '-P', script_path, '--', temp_dir
|
||||
], capture_output=True, text=True)
|
||||
print(result.stdout)
|
||||
print(result.stderr)
|
||||
path = os.path.join(temp_dir, "tmp")
|
||||
|
||||
return None
|
||||
|
||||
if __name__ == "__main__":
|
||||
import numpy as np
|
||||
idx = 0
|
||||
cam_param_path = r"D:\Project\nbv_rec\data\google_scan-backpack_0288\camera_params\{}.json"
|
||||
cam_pose = json.load(open(cam_param_path.format(idx)))
|
||||
cam_pose = np.array(cam_pose["extrinsic"])
|
||||
scene_path = r"D:\Project\nbv_rec\data\google_scan-backpack_0288"
|
||||
script_path = r"D:\Project\nbv_rec\nbv_rec_blender_render\data_renderer.py"
|
||||
ViewRenderUtil.render_view(cam_pose, scene_path, script_path)
|
201
utils/volume_render_util.py
Normal file
201
utils/volume_render_util.py
Normal file
@ -0,0 +1,201 @@
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
from typing import Tuple
|
||||
|
||||
class VolumeRendererUtil:
|
||||
|
||||
@staticmethod
|
||||
def render_rays(
|
||||
nerf_model,
|
||||
rays_o: torch.Tensor,
|
||||
rays_d: torch.Tensor,
|
||||
near: torch.Tensor,
|
||||
far: torch.Tensor,
|
||||
coarse_samples: int = 64,
|
||||
fine_samples: int = 128,
|
||||
perturb: bool = True
|
||||
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
|
||||
"""
|
||||
渲染光线并计算不确定性(熵)
|
||||
|
||||
参数:
|
||||
nerf_model: NeRF模型(需实现forward方法)
|
||||
rays_o: 光线起点 [N_rays, 3]
|
||||
rays_d: 光线方向(已归一化) [N_rays, 3]
|
||||
near: 近平面距离 [N_rays]
|
||||
far: 远平面距离 [N_rays]
|
||||
coarse_samples: 粗采样点数
|
||||
fine_samples: 精细采样点数
|
||||
perturb: 是否在采样时添加噪声
|
||||
|
||||
返回:
|
||||
rgb_map: 渲染颜色 [N_rays, 3]
|
||||
weights: 权重分布 [N_rays, N_samples]
|
||||
t_vals: 采样点参数 [N_rays, N_samples]
|
||||
entropy: 每条光线的熵 [N_rays]
|
||||
"""
|
||||
# 粗采样
|
||||
t_vals_coarse, points_coarse = VolumeRendererUtil.sample_along_ray(
|
||||
rays_o, rays_d, near, far, coarse_samples, perturb)
|
||||
|
||||
# 重要性采样(精细)
|
||||
with torch.no_grad():
|
||||
sigma_coarse, _ = nerf_model(points_coarse[..., :3], rays_d.unsqueeze(1))
|
||||
weights_coarse = VolumeRendererUtil.compute_weights(sigma_coarse, t_vals_coarse, rays_d)
|
||||
t_vals_fine = VolumeRendererUtil.importance_sampling(t_vals_coarse, weights_coarse, fine_samples)
|
||||
|
||||
# 合并采样点
|
||||
t_vals = torch.sort(torch.cat([t_vals_coarse, t_vals_fine], -1)).values
|
||||
points = rays_o[..., None, :] + t_vals[..., None] * rays_d[..., None, :]
|
||||
|
||||
# 精细渲染
|
||||
sigma, color = nerf_model(points[..., :3], rays_d.unsqueeze(1))
|
||||
rgb_map, weights = VolumeRendererUtil.volume_rendering(sigma, color, t_vals, rays_d)
|
||||
entropy = VolumeRendererUtil.calculate_entropy(weights)
|
||||
|
||||
return rgb_map, weights, t_vals, entropy
|
||||
|
||||
@staticmethod
|
||||
def importance_sampling(
|
||||
t_vals: torch.Tensor,
|
||||
weights: torch.Tensor,
|
||||
n_samples: int
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
重要性采样(根据权重分布生成新采样点)
|
||||
|
||||
参数:
|
||||
t_vals: 原始采样点参数 [N_rays, N_coarse]
|
||||
weights: 权重分布 [N_rays, N_coarse]
|
||||
n_samples: 需要生成的采样点数
|
||||
|
||||
返回:
|
||||
samples: 新采样点参数 [N_rays, N_fine]
|
||||
"""
|
||||
weights = weights + 1e-5 # 防止除零
|
||||
pdf = weights / torch.sum(weights, -1, keepdims=True)
|
||||
cdf = torch.cumsum(pdf, -1)
|
||||
|
||||
# 逆变换采样
|
||||
u = torch.linspace(0, 1, n_samples, device=weights.device)
|
||||
u = u.expand(list(cdf.shape[:-1]) + [n_samples])
|
||||
indices = torch.searchsorted(cdf, u, right=True)
|
||||
|
||||
# 插值得到新采样点
|
||||
below = torch.max(torch.zeros_like(indices), indices - 1)
|
||||
above = torch.min((cdf.shape[-1] - 1) * torch.ones_like(indices), indices)
|
||||
indices_g = torch.stack([below, above], -1)
|
||||
|
||||
cdf_g = torch.gather(cdf, -1, indices_g)
|
||||
t_vals_g = torch.gather(t_vals, -1, indices_g)
|
||||
|
||||
denom = cdf_g[..., 1] - cdf_g[..., 0]
|
||||
denom = torch.where(denom < 1e-5, torch.ones_like(denom), denom)
|
||||
t = (u - cdf_g[..., 0]) / denom
|
||||
samples = t_vals_g[..., 0] + t * (t_vals_g[..., 1] - t_vals_g[..., 0])
|
||||
|
||||
return samples
|
||||
|
||||
@staticmethod
|
||||
def sample_along_ray(
|
||||
rays_o: torch.Tensor,
|
||||
rays_d: torch.Tensor,
|
||||
near: torch.Tensor,
|
||||
far: torch.Tensor,
|
||||
n_samples: int,
|
||||
perturb: bool = True
|
||||
) -> Tuple[torch.Tensor, torch.Tensor]:
|
||||
"""
|
||||
沿光线分层采样点
|
||||
|
||||
参数:
|
||||
rays_o: 光线起点 [N_rays, 3]
|
||||
rays_d: 光线方向 [N_rays, 3]
|
||||
near: 近平面距离 [N_rays]
|
||||
far: 远平面距离 [N_rays]
|
||||
n_samples: 采样点数
|
||||
perturb: 是否添加噪声
|
||||
|
||||
返回:
|
||||
t_vals: 采样点参数 [N_rays, N_samples]
|
||||
points: 采样点3D坐标 [N_rays, N_samples, 3]
|
||||
"""
|
||||
# 基础分层采样
|
||||
t_vals = torch.linspace(0., 1., n_samples, device=rays_o.device)
|
||||
t_vals = near + (far - near) * t_vals.unsqueeze(0)
|
||||
|
||||
if perturb:
|
||||
# 添加分层噪声
|
||||
mids = 0.5 * (t_vals[..., 1:] + t_vals[..., :-1])
|
||||
upper = torch.cat([mids, t_vals[..., -1:]], -1)
|
||||
lower = torch.cat([t_vals[..., :1], mids], -1)
|
||||
t_rand = torch.rand(t_vals.shape, device=rays_o.device)
|
||||
t_vals = lower + (upper - lower) * t_rand
|
||||
|
||||
# 生成3D点
|
||||
points = rays_o.unsqueeze(1) + t_vals.unsqueeze(-1) * rays_d.unsqueeze(1)
|
||||
return t_vals, points
|
||||
|
||||
@staticmethod
|
||||
def volume_rendering(
|
||||
sigma: torch.Tensor,
|
||||
color: torch.Tensor,
|
||||
t_vals: torch.Tensor,
|
||||
rays_d: torch.Tensor
|
||||
) -> Tuple[torch.Tensor, torch.Tensor]:
|
||||
"""
|
||||
执行体积渲染
|
||||
|
||||
参数:
|
||||
sigma: 体积密度 [N_rays, N_samples, 1]
|
||||
color: RGB颜色 [N_rays, N_samples, 3]
|
||||
t_vals: 采样点参数 [N_rays, N_samples]
|
||||
rays_d: 光线方向 [N_rays, 3]
|
||||
|
||||
返回:
|
||||
rgb_map: 渲染颜色 [N_rays, 3]
|
||||
weights: 权重分布 [N_rays, N_samples]
|
||||
"""
|
||||
dists = t_vals[..., 1:] - t_vals[..., :-1]
|
||||
dists = torch.cat([dists, torch.tensor([1e10], device=dists.device).expand(dists[..., :1].shape)], -1)
|
||||
dists = dists * torch.norm(rays_d[..., None, :], dim=-1)
|
||||
|
||||
alpha = 1. - torch.exp(-sigma.squeeze(-1) * dists)
|
||||
trans = torch.exp(-torch.cat([
|
||||
torch.zeros_like(sigma[..., :1, 0]),
|
||||
torch.cumsum(sigma[..., :-1, 0] * dists[..., :-1].unsqueeze(-1), dim=-2)
|
||||
], dim=-2))
|
||||
weights = alpha * trans.squeeze(-1)
|
||||
|
||||
rgb_map = torch.sum(weights.unsqueeze(-1) * color, dim=-2)
|
||||
return rgb_map, weights
|
||||
|
||||
@staticmethod
|
||||
def calculate_entropy(weights: torch.Tensor, eps: float = 1e-10) -> torch.Tensor:
|
||||
"""
|
||||
计算权重分布的熵
|
||||
|
||||
参数:
|
||||
weights: 权重分布 [N_rays, N_samples]
|
||||
eps: 防止log(0)的小量
|
||||
|
||||
返回:
|
||||
entropy: 每条光线的熵 [N_rays]
|
||||
"""
|
||||
norm_weights = weights / (torch.sum(weights, dim=-1, keepdim=True) + eps)
|
||||
entropy = -torch.sum(norm_weights * torch.log(norm_weights + eps), dim=-1)
|
||||
return entropy
|
||||
|
||||
@staticmethod
|
||||
def compute_weights(sigma: torch.Tensor, t_vals: torch.Tensor, rays_d: torch.Tensor) -> torch.Tensor:
|
||||
"""计算权重(用于重要性采样)"""
|
||||
dists = t_vals[..., 1:] - t_vals[..., :-1]
|
||||
dists = torch.cat([dists, torch.tensor([1e10], device=dists.device).expand(dists[..., :1].shape)], -1)
|
||||
dists = dists * torch.norm(rays_d[..., None, :], dim=-1)
|
||||
|
||||
alpha = 1. - torch.exp(-sigma.squeeze(-1) * dists)
|
||||
trans = torch.exp(-torch.cat([
|
||||
torch.zeros_like(sigma[..., :1, 0]),
|
||||
torch.cumsum(sigma[..., :-1, 0] * dists[..., :-1].unsqueeze(-1), dim=-2)
|
||||
], dim=-2))
|
||||
return alpha * trans.squeeze(-1)
|
Loading…
x
Reference in New Issue
Block a user