nbv_rec_uncertainty_guide/ref_code/uncertainty_guide.py

import torch
import numpy as np
from utils.volume_render_util import VolumeRendererUtil
import torch.nn.functional as F
from typing import Tuple, List, Dict, Any, Optional

class UncertaintyGuideNeRF:
    """
    基于NeRF不确定性的主动视图选择策略
    通过计算视图的熵值来引导下一步的最优视图选择
    """

    def __init__(self, config: Dict[str, Any]):
        """
        初始化不确定性引导策略

        参数:
            config: 配置字典，包含相关参数
        """
        self.config = config
        self.device = torch.device(config.get("device", "cuda") if torch.cuda.is_available() else "cpu")

        # 相机参数
        self.width = config.get("width", 800)
        self.height = config.get("height", 800)
        self.focal = config.get("focal", 1000.0)

        # 采样参数
        self.near = config.get("near", 2.0)
        self.far = config.get("far", 6.0)
        self.coarse_samples = config.get("coarse_samples", 64)
        self.fine_samples = config.get("fine_samples", 128)

    def generate_rays(self, pose: np.ndarray) -> Tuple[torch.Tensor, torch.Tensor]:
        """
        从相机姿态生成光线

        参数:
            pose: 相机姿态矩阵 [4, 4]

        返回:
            rays_o: 光线起点 [H*W, 3]
            rays_d: 光线方向 [H*W, 3]
        """
        # 创建像素坐标
        i, j = torch.meshgrid(
            torch.linspace(0, self.width - 1, self.width),
            torch.linspace(0, self.height - 1, self.height),
            indexing='ij'
        )
        i = i.t().to(self.device)
        j = j.t().to(self.device)

        # 转换为相机坐标系中的方向
        dirs = torch.stack([
            (i - self.width * 0.5) / self.focal,
            -(j - self.height * 0.5) / self.focal,
            -torch.ones_like(i)
        ], dim=-1)

        # 转换为世界坐标系
        pose = torch.from_numpy(pose).float().to(self.device)
        rays_d = torch.sum(dirs[..., None, :] * pose[:3, :3], dim=-1)
        rays_o = pose[:3, -1].expand(rays_d.shape)

        # 展平为批处理格式
        rays_o = rays_o.reshape(-1, 3)
        rays_d = rays_d.reshape(-1, 3)

        return rays_o, rays_d

    def evaluate_view_uncertainty(self,
                                 nerf_model: torch.nn.Module,
                                 pose: np.ndarray) -> float:
        """
        评估给定视图的不确定性（熵）

        参数:
            nerf_model: NeRF模型
            pose: 相机姿态矩阵 [4, 4]

        返回:
            mean_entropy: 该视图的平均熵值
        """
        nerf_model.eval()
        with torch.no_grad():
            # 生成光线
            rays_o, rays_d = self.generate_rays(pose)

            # 对于较大的图像，可能需要分批处理
            batch_size = 4096  # 根据GPU内存调整
            entropy_values = []

            # 分批处理所有光线
            for i in range(0, rays_o.shape[0], batch_size):
                batch_rays_o = rays_o[i:i+batch_size]
                batch_rays_d = rays_d[i:i+batch_size]

                # 归一化方向向量
                batch_rays_d = F.normalize(batch_rays_d, dim=-1)

                # 计算近平面和远平面
                near = torch.ones_like(batch_rays_o[..., 0]) * self.near
                far = torch.ones_like(batch_rays_o[..., 0]) * self.far

                # 渲染光线并计算熵
                _, weights, _, entropy = VolumeRendererUtil.render_rays(
                    nerf_model,
                    batch_rays_o,
                    batch_rays_d,
                    near,
                    far,
                    self.coarse_samples,
                    self.fine_samples
                )

                entropy_values.append(entropy)

            # 组合所有批次的熵值
            all_entropy = torch.cat(entropy_values, dim=0)

            # 重塑为图像格式并计算平均值
            mean_entropy = all_entropy.mean().item()

            return mean_entropy

    def evaluate_candidate_views(self,
                                nerf_model: torch.nn.Module,
                                candidate_poses: np.ndarray) -> np.ndarray:
        """
        评估候选视图的不确定性（熵）

        参数:
            nerf_model: NeRF模型
            candidate_poses: 候选相机姿态矩阵列表 [N, 4, 4]

        返回:
            entropy_values: 各候选视图的熵值 [N]
        """
        entropy_values = np.zeros(len(candidate_poses))

        for i, pose in enumerate(candidate_poses):
            entropy_values[i] = self.evaluate_view_uncertainty(nerf_model, pose)

        return entropy_values

    def downsample_image(self, rays_o, rays_d, factor=4):
        """
        降采样光线以加速处理

        参数:
            rays_o: 光线起点 [H*W, 3]
            rays_d: 光线方向 [H*W, 3]
            factor: 降采样因子

        返回:
            downsampled_rays_o: 降采样后的光线起点
            downsampled_rays_d: 降采样后的光线方向
        """
        # 重塑为图像格式
        H = W = int(np.sqrt(rays_o.shape[0]))
        rays_o = rays_o.reshape(H, W, 3)
        rays_d = rays_d.reshape(H, W, 3)

        # 降采样
        new_H, new_W = H // factor, W // factor
        downsampled_rays_o = rays_o[::factor, ::factor].reshape(-1, 3)
        downsampled_rays_d = rays_d[::factor, ::factor].reshape(-1, 3)

        return downsampled_rays_o, downsampled_rays_d