This commit is contained in:
hofee 2024-10-21 07:33:40 +00:00
commit 0f61e1d64d
4 changed files with 73 additions and 44 deletions

View File

@ -9,7 +9,7 @@ runner:
generate: generate:
port: 5004 port: 5004
from: 0 from: 0
to: 2 # -1 means all to: 1 # -1 means all
object_dir: H:\\AI\\Datasets\\scaled_object_box_meshes object_dir: H:\\AI\\Datasets\\scaled_object_box_meshes
table_model_path: "H:\\AI\\Datasets\\table.obj" table_model_path: "H:\\AI\\Datasets\\table.obj"
output_dir: C:\\Document\\Local Project\\nbv_rec\\nbv_reconstruction\\temp output_dir: C:\\Document\\Local Project\\nbv_rec\\nbv_reconstruction\\temp

View File

@ -167,7 +167,7 @@ if __name__ == "__main__":
# scene_list.append(line.strip()) # scene_list.append(line.strip())
scene_list = os.listdir(root) scene_list = os.listdir(root)
from_idx = 0 # 1000 from_idx = 0 # 1000
to_idx = 700 # 1500 to_idx = 1 # 1500
print(scene_list) print(scene_list)

View File

@ -4,12 +4,38 @@ import json
import cv2 import cv2
import trimesh import trimesh
import torch import torch
import OpenEXR
import Imath
from utils.pts import PtsUtil from utils.pts import PtsUtil
class DataLoadUtil: class DataLoadUtil:
TABLE_POSITION = np.asarray([0, 0, 0.8215]) TABLE_POSITION = np.asarray([0, 0, 0.8215])
@staticmethod
def load_exr_image(file_path):
# 打开 EXR 文件
exr_file = OpenEXR.InputFile(file_path)
# 获取 EXR 文件的头部信息,包括尺寸
header = exr_file.header()
dw = header['dataWindow']
width = dw.max.x - dw.min.x + 1
height = dw.max.y - dw.min.y + 1
# 定义通道,通常法线图像是 RGB
float_channels = ['R', 'G', 'B']
# 读取 EXR 文件中的每个通道并转化为浮点数数组
img_data = []
for channel in float_channels:
channel_data = exr_file.channel(channel, Imath.PixelType(Imath.PixelType.FLOAT))
img_data.append(np.frombuffer(channel_data, dtype=np.float32).reshape((height, width)))
# 将各通道组合成一个 (height, width, 3) 的 RGB 图像
img = np.stack(img_data, axis=-1)
return img
@staticmethod @staticmethod
def get_display_table_info(root, scene_name): def get_display_table_info(root, scene_name):
scene_info = DataLoadUtil.load_scene_info(root, scene_name) scene_info = DataLoadUtil.load_scene_info(root, scene_name)
@ -150,34 +176,31 @@ class DataLoadUtil:
return mask_image return mask_image
@staticmethod @staticmethod
def load_normal(path, binocular=False, left_only=False): def load_normal(path, binocular=False, left_only=False, file_type="exr"):
if binocular and not left_only: if binocular and not left_only:
normal_path_L = os.path.join( normal_path_L = os.path.join(
os.path.dirname(path), "normal", os.path.basename(path) + "_L.png" os.path.dirname(path), "normal", os.path.basename(path) + f"_L.{file_type}"
) )
normal_image_L = cv2.imread(normal_path_L, cv2.IMREAD_COLOR) normal_image_L = DataLoadUtil.load_exr_image(normal_path_L)
normal_image_L = cv2.cvtColor(normal_image_L, cv2.COLOR_BGR2RGB)
normal_path_R = os.path.join(
os.path.dirname(path), "normal", os.path.basename(path) + "_R.png"
)
normal_image_R = cv2.imread(normal_path_R, cv2.IMREAD_COLOR)
normal_image_R = cv2.cvtColor(normal_image_R, cv2.COLOR_BGR2RGB)
normalized_normal_image_L = normal_image_L / 255.0 * 2.0 - 1.0 normal_path_R = os.path.join(
normalized_normal_image_R = normal_image_R / 255.0 * 2.0 - 1.0 os.path.dirname(path), "normal", os.path.basename(path) + f"_R.{file_type}"
)
normal_image_R = DataLoadUtil.load_exr_image(normal_path_R)
normalized_normal_image_L = normal_image_L * 2.0 - 1.0
normalized_normal_image_R = normal_image_R * 2.0 - 1.0
return normalized_normal_image_L, normalized_normal_image_R return normalized_normal_image_L, normalized_normal_image_R
else: else:
if binocular and left_only: if binocular and left_only:
normal_path = os.path.join( normal_path = os.path.join(
os.path.dirname(path), "normal", os.path.basename(path) + "_L.png" os.path.dirname(path), "normal", os.path.basename(path) + f"_L.{file_type}"
) )
else: else:
normal_path = os.path.join( normal_path = os.path.join(
os.path.dirname(path), "normal", os.path.basename(path) + ".png" os.path.dirname(path), "normal", os.path.basename(path) + f".{file_type}"
) )
normal_image = cv2.imread(normal_path, cv2.IMREAD_COLOR) normal_image = DataLoadUtil.load_exr_image(normal_path)
normal_image = cv2.cvtColor(normal_image, cv2.COLOR_BGR2RGB) normalized_normal_image = normal_image * 2.0 - 1.0
normalized_normal_image = normal_image / 255.0 * 2.0 - 1.0
return normalized_normal_image return normalized_normal_image
@staticmethod @staticmethod
@ -215,11 +238,12 @@ class DataLoadUtil:
label_data = json.load(f) label_data = json.load(f)
cam_to_world = np.asarray(label_data["extrinsic"]) cam_to_world = np.asarray(label_data["extrinsic"])
cam_to_world = DataLoadUtil.cam_pose_transformation(cam_to_world) cam_to_world = DataLoadUtil.cam_pose_transformation(cam_to_world)
if display_table_as_world_space_origin:
world_to_display_table = np.eye(4) world_to_display_table = np.eye(4)
world_to_display_table[:3, 3] = -DataLoadUtil.get_display_table_top( world_to_display_table[:3, 3] = -DataLoadUtil.get_display_table_top(
root_dir, scene_name root_dir, scene_name
) )
if display_table_as_world_space_origin:
cam_to_world = np.dot(world_to_display_table, cam_to_world) cam_to_world = np.dot(world_to_display_table, cam_to_world)
cam_intrinsic = np.asarray(label_data["intrinsic"]) cam_intrinsic = np.asarray(label_data["intrinsic"])
cam_info = { cam_info = {

View File

@ -40,7 +40,7 @@ class visualizeUtil:
all_combined_pts = [] all_combined_pts = []
for i in range(length): for i in range(length):
path = DataLoadUtil.get_path(root, scene, i) path = DataLoadUtil.get_path(root, scene, i)
pts = DataLoadUtil.load_from_preprocessed_pts(path,"txt") pts = DataLoadUtil.load_from_preprocessed_pts(path,"npy")
if pts.shape[0] == 0: if pts.shape[0] == 0:
continue continue
all_combined_pts.append(pts) all_combined_pts.append(pts)
@ -73,41 +73,46 @@ class visualizeUtil:
mesh.export(model_path) mesh.export(model_path)
@staticmethod @staticmethod
def save_points_and_normals(root, scene, frame_idx, output_dir): def save_points_and_normals(root, scene, frame_idx, output_dir, binocular=False):
target_mask_label = (0, 255, 0, 255) target_mask_label = (0, 255, 0, 255)
path = DataLoadUtil.get_path(root, scene, frame_idx) path = DataLoadUtil.get_path(root, scene, frame_idx)
cam_info = DataLoadUtil.load_cam_info(path, binocular=True) cam_info = DataLoadUtil.load_cam_info(path, binocular=binocular, display_table_as_world_space_origin=False)
depth_L,_ = DataLoadUtil.load_depth( depth = DataLoadUtil.load_depth(
path, cam_info["near_plane"], path, cam_info["near_plane"],
cam_info["far_plane"], cam_info["far_plane"],
binocular=True, binocular=binocular,
) )
mask_L = DataLoadUtil.load_seg(path, binocular=True, left_only=True) if isinstance(depth, tuple):
normal_L = DataLoadUtil.load_normal(path, binocular=True, left_only=True) depth = depth[0]
mask = DataLoadUtil.load_seg(path, binocular=binocular, left_only=True)
normal = DataLoadUtil.load_normal(path, binocular=binocular, left_only=True)
''' target points ''' ''' target points '''
target_mask_img_L = (mask_L == target_mask_label).all(axis=-1) if mask is None:
target_mask_img = np.ones_like(depth, dtype=bool)
else:
target_mask_img = (mask == target_mask_label).all(axis=-1)
cam_intrinsic = cam_info["cam_intrinsic"] cam_intrinsic = cam_info["cam_intrinsic"]
z = depth_L[target_mask_img_L] z = depth[target_mask_img]
i, j = np.nonzero(target_mask_img_L) i, j = np.nonzero(target_mask_img)
x = (j - cam_intrinsic[0, 2]) * z / cam_intrinsic[0, 0] x = (j - cam_intrinsic[0, 2]) * z / cam_intrinsic[0, 0]
y = (i - cam_intrinsic[1, 2]) * z / cam_intrinsic[1, 1] y = (i - cam_intrinsic[1, 2]) * z / cam_intrinsic[1, 1]
random_downsample_N = 1000 random_downsample_N = 1000
points_camera = np.stack((x, y, z), axis=-1).reshape(-1, 3) points_camera = np.stack((x, y, z), axis=-1).reshape(-1, 3)
normal_camera = normal_L[target_mask_img_L].reshape(-1, 3) normal_camera = normal[target_mask_img].reshape(-1, 3)
sampled_target_points, idx = PtsUtil.random_downsample_point_cloud( sampled_target_points, idx = PtsUtil.random_downsample_point_cloud(
points_camera, random_downsample_N, require_idx=True points_camera, random_downsample_N, require_idx=True
) )
if len(sampled_target_points) == 0: if len(sampled_target_points) == 0:
print("No target points") print("No target points")
offset = np.asarray([[1, 0, 0], [0, -1, 0], [0, 0, -1]])
sampled_normal_camera = normal_camera[idx] sampled_normal_camera = normal_camera[idx]
sampled_normal_camera = np.dot(sampled_normal_camera, offset)
sampled_visualized_normal = [] sampled_visualized_normal = []
sampled_normal_camera[:, 2] = -sampled_normal_camera[:, 2]
sampled_normal_camera[:, 1] = -sampled_normal_camera[:, 1]
num_samples = 10 num_samples = 10
for i in range(len(sampled_target_points)): for i in range(len(sampled_target_points)):
sampled_visualized_normal.append([sampled_target_points[i] + 0.02*t * sampled_normal_camera[i] for t in range(num_samples)]) sampled_visualized_normal.append([sampled_target_points[i] + 0.02*t * sampled_normal_camera[i] for t in range(num_samples)])
@ -121,12 +126,12 @@ class visualizeUtil:
# ------ Debug ------ # ------ Debug ------
if __name__ == "__main__": if __name__ == "__main__":
root = r"C:\Document\Local Project\nbv_rec\nbv_reconstruction\temp" root = r"/home/yan20/nbv_rec/project/franka_control/temp"
model_dir = r"H:\\AI\\Datasets\\scaled_object_box_meshes" model_dir = r"H:\\AI\\Datasets\\scaled_object_box_meshes"
scene = "omniobject3d-box_030" scene = "cad_model_world"
output_dir = r"C:\Document\Local Project\nbv_rec\nbv_reconstruction\test" output_dir = r"/home/yan20/nbv_rec/project/franka_control/temp/output"
# visualizeUtil.save_all_cam_pos_and_cam_axis(root, scene, output_dir) visualizeUtil.save_all_cam_pos_and_cam_axis(root, scene, output_dir)
# visualizeUtil.save_all_combined_pts(root, scene, output_dir) visualizeUtil.save_all_combined_pts(root, scene, output_dir)
# visualizeUtil.save_target_mesh_at_world_space(root, model_dir, scene) visualizeUtil.save_target_mesh_at_world_space(root, model_dir, scene)
visualizeUtil.save_points_and_normals(root, scene, 0, output_dir) #visualizeUtil.save_points_and_normals(root, scene,"10", output_dir, binocular=True)