This commit is contained in:
hofee 2024-10-19 19:06:09 +08:00
parent 5dae3c53db
commit be7ec1a433
4 changed files with 71 additions and 42 deletions

View File

@ -9,7 +9,7 @@ runner:
generate:
port: 5004
from: 0
to: 2 # -1 means all
to: 1 # -1 means all
object_dir: H:\\AI\\Datasets\\scaled_object_box_meshes
table_model_path: "H:\\AI\\Datasets\\table.obj"
output_dir: C:\\Document\\Local Project\\nbv_rec\\nbv_reconstruction\\temp

View File

@ -167,7 +167,7 @@ if __name__ == "__main__":
# scene_list.append(line.strip())
scene_list = os.listdir(root)
from_idx = 0 # 1000
to_idx = 700 # 1500
to_idx = 1 # 1500
print(scene_list)

View File

@ -4,12 +4,38 @@ import json
import cv2
import trimesh
import torch
import OpenEXR
import Imath
from utils.pts import PtsUtil
class DataLoadUtil:
TABLE_POSITION = np.asarray([0, 0, 0.8215])
@staticmethod
def load_exr_image(file_path):
# 打开 EXR 文件
exr_file = OpenEXR.InputFile(file_path)
# 获取 EXR 文件的头部信息,包括尺寸
header = exr_file.header()
dw = header['dataWindow']
width = dw.max.x - dw.min.x + 1
height = dw.max.y - dw.min.y + 1
# 定义通道,通常法线图像是 RGB
float_channels = ['R', 'G', 'B']
# 读取 EXR 文件中的每个通道并转化为浮点数数组
img_data = []
for channel in float_channels:
channel_data = exr_file.channel(channel, Imath.PixelType(Imath.PixelType.FLOAT))
img_data.append(np.frombuffer(channel_data, dtype=np.float32).reshape((height, width)))
# 将各通道组合成一个 (height, width, 3) 的 RGB 图像
img = np.stack(img_data, axis=-1)
return img
@staticmethod
def get_display_table_info(root, scene_name):
scene_info = DataLoadUtil.load_scene_info(root, scene_name)
@ -148,34 +174,31 @@ class DataLoadUtil:
return mask_image
@staticmethod
def load_normal(path, binocular=False, left_only=False):
def load_normal(path, binocular=False, left_only=False, file_type="exr"):
if binocular and not left_only:
normal_path_L = os.path.join(
os.path.dirname(path), "normal", os.path.basename(path) + "_L.png"
os.path.dirname(path), "normal", os.path.basename(path) + f"_L.{file_type}"
)
normal_image_L = cv2.imread(normal_path_L, cv2.IMREAD_COLOR)
normal_image_L = cv2.cvtColor(normal_image_L, cv2.COLOR_BGR2RGB)
normal_path_R = os.path.join(
os.path.dirname(path), "normal", os.path.basename(path) + "_R.png"
)
normal_image_R = cv2.imread(normal_path_R, cv2.IMREAD_COLOR)
normal_image_R = cv2.cvtColor(normal_image_R, cv2.COLOR_BGR2RGB)
normal_image_L = DataLoadUtil.load_exr_image(normal_path_L)
normalized_normal_image_L = normal_image_L / 255.0 * 2.0 - 1.0
normalized_normal_image_R = normal_image_R / 255.0 * 2.0 - 1.0
normal_path_R = os.path.join(
os.path.dirname(path), "normal", os.path.basename(path) + f"_R.{file_type}"
)
normal_image_R = DataLoadUtil.load_exr_image(normal_path_R)
normalized_normal_image_L = normal_image_L * 2.0 - 1.0
normalized_normal_image_R = normal_image_R * 2.0 - 1.0
return normalized_normal_image_L, normalized_normal_image_R
else:
if binocular and left_only:
normal_path = os.path.join(
os.path.dirname(path), "normal", os.path.basename(path) + "_L.png"
os.path.dirname(path), "normal", os.path.basename(path) + f"_L.{file_type}"
)
else:
normal_path = os.path.join(
os.path.dirname(path), "normal", os.path.basename(path) + ".png"
os.path.dirname(path), "normal", os.path.basename(path) + f".{file_type}"
)
normal_image = cv2.imread(normal_path, cv2.IMREAD_COLOR)
normal_image = cv2.cvtColor(normal_image, cv2.COLOR_BGR2RGB)
normalized_normal_image = normal_image / 255.0 * 2.0 - 1.0
normal_image = DataLoadUtil.load_exr_image(normal_path)
normalized_normal_image = normal_image * 2.0 - 1.0
return normalized_normal_image
@staticmethod
@ -213,11 +236,12 @@ class DataLoadUtil:
label_data = json.load(f)
cam_to_world = np.asarray(label_data["extrinsic"])
cam_to_world = DataLoadUtil.cam_pose_transformation(cam_to_world)
world_to_display_table = np.eye(4)
world_to_display_table[:3, 3] = -DataLoadUtil.get_display_table_top(
root_dir, scene_name
)
if display_table_as_world_space_origin:
world_to_display_table = np.eye(4)
world_to_display_table[:3, 3] = -DataLoadUtil.get_display_table_top(
root_dir, scene_name
)
cam_to_world = np.dot(world_to_display_table, cam_to_world)
cam_intrinsic = np.asarray(label_data["intrinsic"])
cam_info = {

View File

@ -40,7 +40,7 @@ class visualizeUtil:
all_combined_pts = []
for i in range(length):
path = DataLoadUtil.get_path(root, scene, i)
pts = DataLoadUtil.load_from_preprocessed_pts(path,"txt")
pts = DataLoadUtil.load_from_preprocessed_pts(path,"npy")
if pts.shape[0] == 0:
continue
all_combined_pts.append(pts)
@ -73,41 +73,46 @@ class visualizeUtil:
mesh.export(model_path)
@staticmethod
def save_points_and_normals(root, scene, frame_idx, output_dir):
def save_points_and_normals(root, scene, frame_idx, output_dir, binocular=False):
target_mask_label = (0, 255, 0, 255)
path = DataLoadUtil.get_path(root, scene, frame_idx)
cam_info = DataLoadUtil.load_cam_info(path, binocular=True)
depth_L,_ = DataLoadUtil.load_depth(
cam_info = DataLoadUtil.load_cam_info(path, binocular=binocular, display_table_as_world_space_origin=False)
depth = DataLoadUtil.load_depth(
path, cam_info["near_plane"],
cam_info["far_plane"],
binocular=True,
binocular=binocular,
)
mask_L = DataLoadUtil.load_seg(path, binocular=True, left_only=True)
normal_L = DataLoadUtil.load_normal(path, binocular=True, left_only=True)
if isinstance(depth, tuple):
depth = depth[0]
mask = DataLoadUtil.load_seg(path, binocular=binocular, left_only=True)
normal = DataLoadUtil.load_normal(path, binocular=binocular, left_only=True)
''' target points '''
target_mask_img_L = (mask_L == target_mask_label).all(axis=-1)
if mask is None:
target_mask_img = np.ones_like(depth, dtype=bool)
else:
target_mask_img = (mask == target_mask_label).all(axis=-1)
cam_intrinsic = cam_info["cam_intrinsic"]
z = depth_L[target_mask_img_L]
i, j = np.nonzero(target_mask_img_L)
z = depth[target_mask_img]
i, j = np.nonzero(target_mask_img)
x = (j - cam_intrinsic[0, 2]) * z / cam_intrinsic[0, 0]
y = (i - cam_intrinsic[1, 2]) * z / cam_intrinsic[1, 1]
random_downsample_N = 1000
points_camera = np.stack((x, y, z), axis=-1).reshape(-1, 3)
normal_camera = normal_L[target_mask_img_L].reshape(-1, 3)
normal_camera = normal[target_mask_img].reshape(-1, 3)
sampled_target_points, idx = PtsUtil.random_downsample_point_cloud(
points_camera, random_downsample_N, require_idx=True
)
if len(sampled_target_points) == 0:
print("No target points")
offset = np.asarray([[1, 0, 0], [0, -1, 0], [0, 0, -1]])
sampled_normal_camera = normal_camera[idx]
sampled_normal_camera = np.dot(sampled_normal_camera, offset)
sampled_visualized_normal = []
sampled_normal_camera[:, 2] = -sampled_normal_camera[:, 2]
sampled_normal_camera[:, 1] = -sampled_normal_camera[:, 1]
num_samples = 10
for i in range(len(sampled_target_points)):
sampled_visualized_normal.append([sampled_target_points[i] + 0.02*t * sampled_normal_camera[i] for t in range(num_samples)])
@ -123,10 +128,10 @@ class visualizeUtil:
if __name__ == "__main__":
root = r"C:\Document\Local Project\nbv_rec\nbv_reconstruction\temp"
model_dir = r"H:\\AI\\Datasets\\scaled_object_box_meshes"
scene = "omniobject3d-box_030"
scene = "test_obj"
output_dir = r"C:\Document\Local Project\nbv_rec\nbv_reconstruction\test"
# visualizeUtil.save_all_cam_pos_and_cam_axis(root, scene, output_dir)
# visualizeUtil.save_all_combined_pts(root, scene, output_dir)
# visualizeUtil.save_target_mesh_at_world_space(root, model_dir, scene)
visualizeUtil.save_points_and_normals(root, scene, 0, output_dir)
visualizeUtil.save_all_cam_pos_and_cam_axis(root, scene, output_dir)
visualizeUtil.save_all_combined_pts(root, scene, output_dir)
visualizeUtil.save_target_mesh_at_world_space(root, model_dir, scene)
#visualizeUtil.save_points_and_normals(root, scene,"10", output_dir, binocular=True)