nbv_rec_uncertainty_guide/ref_code/pipeline.py

import numpy as np
import torch
from scipy.spatial.transform import Rotation as R
from uncertainty_guide import UncertaintyGuideNeRF

class ActiveReconstructionPolicy:
    def __init__(self, config):
        self.config = config
        self._setup_view_sphere()
        self.uncertainty_guide = UncertaintyGuideNeRF(config)

    def _setup_view_sphere(self):
        """初始化半球相机位姿 (5个圆环 x 30个位姿)"""
        self.poses = []
        radii = np.linspace(0.1, np.pi/2, self.config.n_circles)  # 半球上的半径

        for r in radii:
            for theta in np.linspace(0, 2*np.pi, self.config.n_poses_per_circle, endpoint=False):
                # 球坐标转笛卡尔坐标
                x = np.cos(theta) * np.sin(r)
                y = np.sin(theta) * np.sin(r)
                z = np.cos(r)
                position = np.array([x, y, z]) * 2.0  # 缩放因子

                # 相机朝向原点
                forward = -position / np.linalg.norm(position)
                up = np.array([0, 0, 1])
                right = np.cross(up, forward)
                up = np.cross(forward, right)

                # 构建位姿矩阵
                pose = np.eye(4)
                pose[:3, :3] = np.stack([right, up, forward], axis=-1)
                pose[:3, 3] = position
                self.poses.append(pose)

        self.poses = np.stack(self.poses)

        # 区域聚类: 将半球分为12个区域 (上下半球各6个)
        self.section_masks = self._create_section_masks()

    def _create_section_masks(self):
        """创建12个区域的掩码"""
        masks = []
        angles = np.arctan2(self.poses[:, 1, 3], self.poses[:, 0, 3])  # 方位角

        # 上下半球 (z坐标正负)
        upper = self.poses[:, 2, 3] > 0
        lower = ~upper

        # 每个半球分6个区域
        angle_bins = np.linspace(-np.pi, np.pi, 7)  # 6个区域需要7个边界
        for i in range(6):
            angle_mask = (angles >= angle_bins[i]) & (angles < angle_bins[i+1])
            masks.append(angle_mask & upper)
            masks.append(angle_mask & lower)

        return masks

    def select_next_views(self, nerf_model, current_poses):
        """根据熵值选择下一个最佳视角

        参数:
            nerf_model: 当前的NeRF模型
            current_poses: 已经采集的相机位姿

        返回:
            selected_poses: 选择的下一批相机位姿
        """
        # 排除已选视角
        current_positions = current_poses[:, :3, 3]
        all_positions = self.poses[:, :3, 3]
        distance_matrix = np.linalg.norm(
            current_positions[:, None] - all_positions[None], axis=-1)
        min_distances = np.min(distance_matrix, axis=0)
        valid_mask = min_distances > 0.1  # 避免选择太近的视角

        # 评估候选视图的不确定性
        valid_poses = self.poses[valid_mask]
        entropy_values = self.uncertainty_guide.evaluate_candidate_views(nerf_model, valid_poses)

        # 从每个区域选择熵最高的有效视角
        selected_indices = []
        for mask in self.section_masks:
            # 调整mask以适应有效视角的筛选
            section_mask = mask[valid_mask]
            if not np.any(section_mask):
                continue

            section_entropy = entropy_values.copy()
            section_entropy[~section_mask] = -np.inf
            selected_idx = np.argmax(section_entropy)

            # 转换回原始索引
            original_indices = np.where(valid_mask)[0]
            original_idx = original_indices[selected_idx]
            selected_indices.append(original_idx)

        return self.poses[selected_indices]

    def coarse_to_fine_reconstruction(self, nerf_model, initial_poses, max_iterations=3):
        """执行从粗到精的重建过程

        参数:
            nerf_model: 初始NeRF模型
            initial_poses: 初始相机位姿
            max_iterations: 最大迭代次数

        返回:
            all_selected_poses: 所有选择的相机位姿（包括初始位姿）
        """
        all_selected_poses = initial_poses.copy()
        current_poses = initial_poses.copy()

        for iteration in range(max_iterations):
            # 选择下一批视角
            next_views = self.select_next_views(nerf_model, current_poses)

            # 将新选择的视角添加到当前位姿中
            current_poses = np.concatenate([current_poses, next_views], axis=0)
            all_selected_poses = np.concatenate([all_selected_poses, next_views], axis=0)

            # 这里应该有一个重新训练模型的步骤
            # 但这通常在外部完成，我们只返回选定的位姿

        return all_selected_poses