From 1f8c017a01b44c83ea8c72a42944e76dc6e9ed18 Mon Sep 17 00:00:00 2001 From: hofee Date: Fri, 18 Oct 2024 20:46:31 +0800 Subject: [PATCH] optimize code structure --- data_generator.py | 249 +----- data_load.py | 265 ------ data_renderer.py | 2 +- pts.py | 22 - reconstruction.py | 119 --- run_blender.py | 6 +- blender_util.py => utils/blender_util.py | 782 +++++++++--------- utils/material_util.py | 96 +++ pose.py => utils/pose.py | 302 +++---- .../view_sample_util.py | 336 ++++---- 10 files changed, 816 insertions(+), 1363 deletions(-) delete mode 100644 data_load.py delete mode 100644 pts.py delete mode 100644 reconstruction.py rename blender_util.py => utils/blender_util.py (97%) create mode 100644 utils/material_util.py rename pose.py => utils/pose.py (97%) rename view_sample_util.py => utils/view_sample_util.py (97%) diff --git a/data_generator.py b/data_generator.py index 5287bf9..434ff0c 100644 --- a/data_generator.py +++ b/data_generator.py @@ -6,8 +6,9 @@ import bpy import numpy as np import mathutils import requests -from blender.blender_util import BlenderUtils -from blender.view_sample_util import ViewSampleUtil +from utils.blender_util import BlenderUtils +from utils.view_sample_util import ViewSampleUtil +from utils.material_util import MaterialUtil class DataGenerator: def __init__(self, config): @@ -103,29 +104,7 @@ class DataGenerator: bpy.context.object.rigid_body.type = 'PASSIVE' bpy.ops.object.shade_auto_smooth() - # 创建不受光照影响的材质 - mat = bpy.data.materials.new(name="RedMaterial") - mat.use_nodes = True - - # 清除默认节点 - nodes = mat.node_tree.nodes - for node in nodes: - nodes.remove(node) - - # 添加 Emission 节点 - emission_node = nodes.new(type='ShaderNodeEmission') - emission_node.inputs['Color'].default_value = (1.0, 0.0, 0.0, 1.0) # 红色 - - # 添加 Material Output 节点 - output_node = nodes.new(type='ShaderNodeOutputMaterial') - - # 连接节点 - links = mat.node_tree.links - links.new(emission_node.outputs['Emission'], output_node.inputs['Surface']) - - # 将材质赋给对象 - platform.data.materials.clear() - platform.data.materials.append(mat) + MaterialUtil.change_object_material(platform, MaterialUtil.create_mask_material(color=(1.0, 0, 0))) self.display_table_config = { "height": height, @@ -166,31 +145,7 @@ class DataGenerator: bpy.ops.rigidbody.object_add() bpy.context.object.rigid_body.type = 'ACTIVE' - - # 创建不受光照影响的材质 - mat = bpy.data.materials.new(name="GreenMaterial") - mat.use_nodes = True - - # 清除默认节点 - nodes = mat.node_tree.nodes - for node in nodes: - nodes.remove(node) - - # 添加 Emission 节点 - emission_node = nodes.new(type='ShaderNodeEmission') - emission_node.inputs['Color'].default_value = (0.0, 1.0, 0.0, 1.0) # 绿色 - - # 添加 Material Output 节点 - output_node = nodes.new(type='ShaderNodeOutputMaterial') - - # 连接节点 - links = mat.node_tree.links - links.new(emission_node.outputs['Emission'], output_node.inputs['Surface']) - - # 将材质赋给对象 - obj.data.materials.clear() - obj.data.materials.append(mat) - + MaterialUtil.change_object_material(obj, MaterialUtil.create_mask_material(color=(0, 1.0, 0))) self.target_obj = obj @@ -249,9 +204,7 @@ class DataGenerator: self.set_progress("render frame", len(view_data["cam_poses"]), len(view_data["cam_poses"])) BlenderUtils.save_scene_info(scene_dir, self.display_table_config, object_name) - - - self.change_target_obj_material_to_normal() + MaterialUtil.change_object_material(self.target_obj, MaterialUtil.create_normal_material()) for i, cam_pose in enumerate(view_data["cam_poses"]): BlenderUtils.set_camera_at(cam_pose) BlenderUtils.render_normal_and_depth(scene_dir, f"{i}", binocular_vision=self.binocular_vision, target_object = self.target_obj) @@ -269,196 +222,6 @@ class DataGenerator: return True - def change_target_obj_material_to_normal(self): - - material_name = "normal" - mat = bpy.data.materials.get(material_name) or bpy.data.materials.new( - material_name - ) - mat.use_nodes = True - node_tree = mat.node_tree - nodes = node_tree.nodes - nodes.clear() - - links = node_tree.links - links.clear() - - # Nodes: - new_node = nodes.new(type="ShaderNodeMath") - # new_node.active_preview = False - new_node.color = (0.6079999804496765, 0.6079999804496765, 0.6079999804496765) - new_node.location = (151.59744262695312, 854.5482177734375) - new_node.name = "Math" - new_node.operation = "MULTIPLY" - new_node.select = False - new_node.use_clamp = False - new_node.width = 140.0 - new_node.inputs[0].default_value = 0.5 - new_node.inputs[1].default_value = 1.0 - new_node.inputs[2].default_value = 0.0 - new_node.outputs[0].default_value = 0.0 - - new_node = nodes.new(type="ShaderNodeLightPath") - # new_node.active_preview = False - new_node.color = (0.6079999804496765, 0.6079999804496765, 0.6079999804496765) - new_node.location = (602.9912719726562, 1046.660888671875) - new_node.name = "Light Path" - new_node.select = False - new_node.width = 140.0 - new_node.outputs[0].default_value = 0.0 - new_node.outputs[1].default_value = 0.0 - new_node.outputs[2].default_value = 0.0 - new_node.outputs[3].default_value = 0.0 - new_node.outputs[4].default_value = 0.0 - new_node.outputs[5].default_value = 0.0 - new_node.outputs[6].default_value = 0.0 - new_node.outputs[7].default_value = 0.0 - new_node.outputs[8].default_value = 0.0 - new_node.outputs[9].default_value = 0.0 - new_node.outputs[10].default_value = 0.0 - new_node.outputs[11].default_value = 0.0 - new_node.outputs[12].default_value = 0.0 - - new_node = nodes.new(type="ShaderNodeOutputMaterial") - # new_node.active_preview = False - new_node.color = (0.6079999804496765, 0.6079999804496765, 0.6079999804496765) - new_node.is_active_output = True - new_node.location = (1168.93017578125, 701.84033203125) - new_node.name = "Material Output" - new_node.select = False - new_node.target = "ALL" - new_node.width = 140.0 - new_node.inputs[2].default_value = [0.0, 0.0, 0.0] - - new_node = nodes.new(type="ShaderNodeBsdfTransparent") - # new_node.active_preview = False - new_node.color = (0.6079999804496765, 0.6079999804496765, 0.6079999804496765) - new_node.location = (731.72900390625, 721.4832763671875) - new_node.name = "Transparent BSDF" - new_node.select = False - new_node.width = 140.0 - new_node.inputs[0].default_value = [1.0, 1.0, 1.0, 1.0] - - new_node = nodes.new(type="ShaderNodeCombineXYZ") - # new_node.active_preview = False - new_node.color = (0.6079999804496765, 0.6079999804496765, 0.6079999804496765) - new_node.location = (594.4229736328125, 602.9271240234375) - new_node.name = "Combine XYZ" - new_node.select = False - new_node.width = 140.0 - new_node.inputs[0].default_value = 0.0 - new_node.inputs[1].default_value = 0.0 - new_node.inputs[2].default_value = 0.0 - new_node.outputs[0].default_value = [0.0, 0.0, 0.0] - - new_node = nodes.new(type="ShaderNodeMixShader") - # new_node.active_preview = False - new_node.color = (0.6079999804496765, 0.6079999804496765, 0.6079999804496765) - new_node.location = (992.7239990234375, 707.2142333984375) - new_node.name = "Mix Shader" - new_node.select = False - new_node.width = 140.0 - new_node.inputs[0].default_value = 0.5 - - new_node = nodes.new(type="ShaderNodeEmission") - # new_node.active_preview = False - new_node.color = (0.6079999804496765, 0.6079999804496765, 0.6079999804496765) - new_node.location = (774.0802612304688, 608.2547607421875) - new_node.name = "Emission" - new_node.select = False - new_node.width = 140.0 - new_node.inputs[0].default_value = [1.0, 1.0, 1.0, 1.0] - new_node.inputs[1].default_value = 1.0 - - new_node = nodes.new(type="ShaderNodeSeparateXYZ") - # new_node.active_preview = False - new_node.color = (0.6079999804496765, 0.6079999804496765, 0.6079999804496765) - new_node.location = (-130.12167358398438, 558.1497802734375) - new_node.name = "Separate XYZ" - new_node.select = False - new_node.width = 140.0 - new_node.inputs[0].default_value = [0.0, 0.0, 0.0] - new_node.outputs[0].default_value = 0.0 - new_node.outputs[1].default_value = 0.0 - new_node.outputs[2].default_value = 0.0 - - new_node = nodes.new(type="ShaderNodeMath") - # new_node.active_preview = False - new_node.color = (0.6079999804496765, 0.6079999804496765, 0.6079999804496765) - new_node.location = (162.43240356445312, 618.8094482421875) - new_node.name = "Math.002" - new_node.operation = "MULTIPLY" - new_node.select = False - new_node.use_clamp = False - new_node.width = 140.0 - new_node.inputs[0].default_value = 0.5 - new_node.inputs[1].default_value = 1.0 - new_node.inputs[2].default_value = 0.0 - new_node.outputs[0].default_value = 0.0 - - new_node = nodes.new(type="ShaderNodeMath") - # new_node.active_preview = False - new_node.color = (0.6079999804496765, 0.6079999804496765, 0.6079999804496765) - new_node.location = (126.8158187866211, 364.5539855957031) - new_node.name = "Math.001" - new_node.operation = "MULTIPLY" - new_node.select = False - new_node.use_clamp = False - new_node.width = 140.0 - new_node.inputs[0].default_value = 0.5 - new_node.inputs[1].default_value = -1.0 - new_node.inputs[2].default_value = 0.0 - new_node.outputs[0].default_value = 0.0 - - new_node = nodes.new(type="ShaderNodeVectorTransform") - # new_node.active_preview = False - new_node.color = (0.6079999804496765, 0.6079999804496765, 0.6079999804496765) - new_node.convert_from = "WORLD" - new_node.convert_to = "CAMERA" - new_node.location = (-397.0209045410156, 594.7037353515625) - new_node.name = "Vector Transform" - new_node.select = False - new_node.vector_type = "VECTOR" - new_node.width = 140.0 - new_node.inputs[0].default_value = [0.5, 0.5, 0.5] - new_node.outputs[0].default_value = [0.0, 0.0, 0.0] - - new_node = nodes.new(type="ShaderNodeNewGeometry") - # new_node.active_preview = False - new_node.color = (0.6079999804496765, 0.6079999804496765, 0.6079999804496765) - new_node.location = (-651.8067016601562, 593.0455932617188) - new_node.name = "Geometry" - new_node.width = 140.0 - new_node.outputs[0].default_value = [0.0, 0.0, 0.0] - new_node.outputs[1].default_value = [0.0, 0.0, 0.0] - new_node.outputs[2].default_value = [0.0, 0.0, 0.0] - new_node.outputs[3].default_value = [0.0, 0.0, 0.0] - new_node.outputs[4].default_value = [0.0, 0.0, 0.0] - new_node.outputs[5].default_value = [0.0, 0.0, 0.0] - new_node.outputs[6].default_value = 0.0 - new_node.outputs[7].default_value = 0.0 - new_node.outputs[8].default_value = 0.0 - - # Links : - - links.new(nodes["Light Path"].outputs[0], nodes["Mix Shader"].inputs[0]) - links.new(nodes["Separate XYZ"].outputs[0], nodes["Math"].inputs[0]) - links.new(nodes["Separate XYZ"].outputs[1], nodes["Math.002"].inputs[0]) - links.new(nodes["Separate XYZ"].outputs[2], nodes["Math.001"].inputs[0]) - links.new(nodes["Vector Transform"].outputs[0], nodes["Separate XYZ"].inputs[0]) - links.new(nodes["Combine XYZ"].outputs[0], nodes["Emission"].inputs[0]) - links.new(nodes["Math"].outputs[0], nodes["Combine XYZ"].inputs[0]) - links.new(nodes["Math.002"].outputs[0], nodes["Combine XYZ"].inputs[1]) - links.new(nodes["Math.001"].outputs[0], nodes["Combine XYZ"].inputs[2]) - links.new(nodes["Transparent BSDF"].outputs[0], nodes["Mix Shader"].inputs[1]) - links.new(nodes["Emission"].outputs[0], nodes["Mix Shader"].inputs[2]) - links.new(nodes["Mix Shader"].outputs[0], nodes["Material Output"].inputs[0]) - links.new(nodes["Geometry"].outputs[1], nodes["Vector Transform"].inputs[0]) - - self.target_obj.data.materials.clear() - self.target_obj.data.materials.append(mat) - - def simulate_scene(self, frame_limit=120, depth = 0, diag = 0): bpy.context.view_layer.update() diff --git a/data_load.py b/data_load.py deleted file mode 100644 index ffc0138..0000000 --- a/data_load.py +++ /dev/null @@ -1,265 +0,0 @@ -import os -import numpy as np -import json -import cv2 -import trimesh -from pts import PtsUtil - -class DataLoadUtil: - - @staticmethod - def get_path(root, scene_name, frame_idx): - path = os.path.join(root, scene_name, f"{frame_idx}") - return path - - @staticmethod - def get_label_path(root, scene_name): - path = os.path.join(root,scene_name, f"label.json") - return path - - @staticmethod - def get_sampled_model_points_path(root, scene_name): - path = os.path.join(root,scene_name, f"sampled_model_points.txt") - return path - - @staticmethod - def get_scene_seq_length(root, scene_name): - camera_params_path = os.path.join(root, scene_name, "camera_params") - return len(os.listdir(camera_params_path)) - - @staticmethod - def load_downsampled_world_model_points(root, scene_name): - model_path = DataLoadUtil.get_sampled_model_points_path(root, scene_name) - model_points = np.loadtxt(model_path) - return model_points - - @staticmethod - def save_downsampled_world_model_points(root, scene_name, model_points): - model_path = DataLoadUtil.get_sampled_model_points_path(root, scene_name) - np.savetxt(model_path, model_points) - - @staticmethod - def load_mesh_at(model_dir, object_name, world_object_pose): - model_path = os.path.join(model_dir, object_name, "mesh.obj") - mesh = trimesh.load(model_path) - mesh.apply_transform(world_object_pose) - return mesh - - @staticmethod - def get_bbox_diag(model_dir, object_name): - model_path = os.path.join(model_dir, object_name, "mesh.obj") - mesh = trimesh.load(model_path) - bbox = mesh.bounding_box.extents - diagonal_length = np.linalg.norm(bbox) - return diagonal_length - - - @staticmethod - def save_mesh_at(model_dir, output_dir, object_name, scene_name, world_object_pose): - mesh = DataLoadUtil.load_mesh_at(model_dir, object_name, world_object_pose) - model_path = os.path.join(output_dir, scene_name, "world_mesh.obj") - mesh.export(model_path) - - @staticmethod - def save_target_mesh_at_world_space(root, model_dir, scene_name): - scene_info = DataLoadUtil.load_scene_info(root, scene_name) - target_name = scene_info["target_name"] - transformation = scene_info[target_name] - location = transformation["location"] - rotation_euler = transformation["rotation_euler"] - pose_mat = trimesh.transformations.euler_matrix(*rotation_euler) - pose_mat[:3, 3] = location - - mesh = DataLoadUtil.load_mesh_at(model_dir, target_name, pose_mat) - mesh_dir = os.path.join(root, scene_name, "mesh") - if not os.path.exists(mesh_dir): - os.makedirs(mesh_dir) - model_path = os.path.join(mesh_dir, "world_target_mesh.obj") - mesh.export(model_path) - - @staticmethod - def load_scene_info(root, scene_name): - scene_info_path = os.path.join(root, scene_name, "scene_info.json") - with open(scene_info_path, "r") as f: - scene_info = json.load(f) - return scene_info - - @staticmethod - def load_target_object_pose(root, scene_name): - scene_info = DataLoadUtil.load_scene_info(root, scene_name) - target_name = scene_info["target_name"] - transformation = scene_info[target_name] - location = transformation["location"] - rotation_euler = transformation["rotation_euler"] - pose_mat = trimesh.transformations.euler_matrix(*rotation_euler) - pose_mat[:3, 3] = location - return pose_mat - - @staticmethod - def load_depth(path, min_depth=0.01,max_depth=5.0,binocular=False): - - def load_depth_from_real_path(real_path, min_depth, max_depth): - depth = cv2.imread(real_path, cv2.IMREAD_UNCHANGED) - depth = depth.astype(np.float32) / 65535.0 - min_depth = min_depth - max_depth = max_depth - depth_meters = min_depth + (max_depth - min_depth) * depth - return depth_meters - - if binocular: - depth_path_L = os.path.join(os.path.dirname(path), "depth", os.path.basename(path) + "_L.png") - depth_path_R = os.path.join(os.path.dirname(path), "depth", os.path.basename(path) + "_R.png") - depth_meters_L = load_depth_from_real_path(depth_path_L, min_depth, max_depth) - depth_meters_R = load_depth_from_real_path(depth_path_R, min_depth, max_depth) - return depth_meters_L, depth_meters_R - else: - depth_path = os.path.join(os.path.dirname(path), "depth", os.path.basename(path) + ".png") - depth_meters = load_depth_from_real_path(depth_path, min_depth, max_depth) - return depth_meters - - @staticmethod - def load_seg(path, binocular=False): - if binocular: - def clean_mask(mask_image): - green = [0, 255, 0, 255] - red = [255, 0, 0, 255] - threshold = 2 - mask_image = np.where(np.abs(mask_image - green) <= threshold, green, mask_image) - mask_image = np.where(np.abs(mask_image - red) <= threshold, red, mask_image) - return mask_image - mask_path_L = os.path.join(os.path.dirname(path), "mask", os.path.basename(path) + "_L.png") - mask_image_L = clean_mask(cv2.imread(mask_path_L, cv2.IMREAD_UNCHANGED)) - mask_path_R = os.path.join(os.path.dirname(path), "mask", os.path.basename(path) + "_R.png") - mask_image_R = clean_mask(cv2.imread(mask_path_R, cv2.IMREAD_UNCHANGED)) - return mask_image_L, mask_image_R - else: - mask_path = os.path.join(os.path.dirname(path), "mask", os.path.basename(path) + ".png") - mask_image = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE) - return mask_image - - @staticmethod - def load_label(path): - with open(path, 'r') as f: - label_data = json.load(f) - return label_data - - @staticmethod - def load_rgb(path): - rgb_path = os.path.join(os.path.dirname(path), "rgb", os.path.basename(path) + ".png") - rgb_image = cv2.imread(rgb_path, cv2.IMREAD_COLOR) - return rgb_image - - @staticmethod - def cam_pose_transformation(cam_pose_before): - offset = np.asarray([ - [1, 0, 0, 0], - [0, -1, 0, 0], - [0, 0, -1, 0], - [0, 0, 0, 1]]) - cam_pose_after = cam_pose_before @ offset - return cam_pose_after - - @staticmethod - def load_cam_info(path, binocular=False): - camera_params_path = os.path.join(os.path.dirname(path), "camera_params", os.path.basename(path) + ".json") - with open(camera_params_path, 'r') as f: - label_data = json.load(f) - cam_to_world = np.asarray(label_data["extrinsic"]) - cam_to_world = DataLoadUtil.cam_pose_transformation(cam_to_world) - cam_intrinsic = np.asarray(label_data["intrinsic"]) - cam_info = { - "cam_to_world": cam_to_world, - "cam_intrinsic": cam_intrinsic, - "far_plane": label_data["far_plane"], - "near_plane": label_data["near_plane"] - } - if binocular: - cam_to_world_R = np.asarray(label_data["extrinsic_R"]) - cam_to_world_R = DataLoadUtil.cam_pose_transformation(cam_to_world_R) - cam_info["cam_to_world_R"] = cam_to_world_R - return cam_info - - @staticmethod - def get_target_point_cloud(depth, cam_intrinsic, cam_extrinsic, mask, target_mask_label=(0,255,0,255)): - h, w = depth.shape - i, j = np.meshgrid(np.arange(w), np.arange(h), indexing='xy') - - z = depth - x = (i - cam_intrinsic[0, 2]) * z / cam_intrinsic[0, 0] - y = (j - cam_intrinsic[1, 2]) * z / cam_intrinsic[1, 1] - - points_camera = np.stack((x, y, z), axis=-1).reshape(-1, 3) - mask = mask.reshape(-1,4) - - target_mask = (mask == target_mask_label).all(axis=-1) - - target_points_camera = points_camera[target_mask] - target_points_camera_aug = np.concatenate([target_points_camera, np.ones((target_points_camera.shape[0], 1))], axis=-1) - - target_points_world = np.dot(cam_extrinsic, target_points_camera_aug.T).T[:, :3] - return { - "points_world": target_points_world, - "points_camera": target_points_camera - } - - @staticmethod - def get_point_cloud(depth, cam_intrinsic, cam_extrinsic): - h, w = depth.shape - i, j = np.meshgrid(np.arange(w), np.arange(h), indexing='xy') - - z = depth - x = (i - cam_intrinsic[0, 2]) * z / cam_intrinsic[0, 0] - y = (j - cam_intrinsic[1, 2]) * z / cam_intrinsic[1, 1] - - points_camera = np.stack((x, y, z), axis=-1).reshape(-1, 3) - points_camera_aug = np.concatenate([points_camera, np.ones((points_camera.shape[0], 1))], axis=-1) - - points_world = np.dot(cam_extrinsic, points_camera_aug.T).T[:, :3] - return { - "points_world": points_world, - "points_camera": points_camera - } - - @staticmethod - def get_target_point_cloud_world_from_path(path, binocular=False, random_downsample_N=65536, voxel_size = 0.005, target_mask_label=(0,255,0,255)): - cam_info = DataLoadUtil.load_cam_info(path, binocular=binocular) - if binocular: - depth_L, depth_R = DataLoadUtil.load_depth(path, cam_info['near_plane'], cam_info['far_plane'], binocular=True) - mask_L, mask_R = DataLoadUtil.load_seg(path, binocular=True) - point_cloud_L = DataLoadUtil.get_target_point_cloud(depth_L, cam_info['cam_intrinsic'], cam_info['cam_to_world'], mask_L, target_mask_label)['points_world'] - point_cloud_R = DataLoadUtil.get_target_point_cloud(depth_R, cam_info['cam_intrinsic'], cam_info['cam_to_world_R'], mask_R, target_mask_label)['points_world'] - point_cloud_L = PtsUtil.random_downsample_point_cloud(point_cloud_L, random_downsample_N) - point_cloud_R = PtsUtil.random_downsample_point_cloud(point_cloud_R, random_downsample_N) - overlap_points = DataLoadUtil.get_overlapping_points(point_cloud_L, point_cloud_R, voxel_size) - return overlap_points - else: - depth = DataLoadUtil.load_depth(path, cam_info['near_plane'], cam_info['far_plane']) - mask = DataLoadUtil.load_seg(path) - point_cloud = DataLoadUtil.get_target_point_cloud(depth, cam_info['cam_intrinsic'], cam_info['cam_to_world'], mask)['points_world'] - return point_cloud - - - @staticmethod - def voxelize_points(points, voxel_size): - - voxel_indices = np.floor(points / voxel_size).astype(np.int32) - unique_voxels = np.unique(voxel_indices, axis=0, return_inverse=True) - return unique_voxels - - @staticmethod - def get_overlapping_points(point_cloud_L, point_cloud_R, voxel_size=0.005): - voxels_L, indices_L = DataLoadUtil.voxelize_points(point_cloud_L, voxel_size) - voxels_R, _ = DataLoadUtil.voxelize_points(point_cloud_R, voxel_size) - - voxel_indices_L = voxels_L.view([('', voxels_L.dtype)]*3) - voxel_indices_R = voxels_R.view([('', voxels_R.dtype)]*3) - overlapping_voxels = np.intersect1d(voxel_indices_L, voxel_indices_R) - mask_L = np.isin(indices_L, np.where(np.isin(voxel_indices_L, overlapping_voxels))[0]) - overlapping_points = point_cloud_L[mask_L] - return overlapping_points - - @staticmethod - def load_points_normals(root, scene_name): - points_path = os.path.join(root, scene_name, "points_and_normals.txt") - points_normals = np.loadtxt(points_path) - return points_normals \ No newline at end of file diff --git a/data_renderer.py b/data_renderer.py index 86ff3e3..c3a2f21 100644 --- a/data_renderer.py +++ b/data_renderer.py @@ -5,7 +5,7 @@ import json import mathutils import numpy as np sys.path.append(os.path.dirname(os.path.abspath(__file__))) -from blender_util import BlenderUtils +from utils.blender_util import BlenderUtils class DataRenderer: diff --git a/pts.py b/pts.py deleted file mode 100644 index 19d6e2a..0000000 --- a/pts.py +++ /dev/null @@ -1,22 +0,0 @@ -import numpy as np -import open3d as o3d - -class PtsUtil: - - @staticmethod - def voxel_downsample_point_cloud(point_cloud, voxel_size=0.005): - o3d_pc = o3d.geometry.PointCloud() - o3d_pc.points = o3d.utility.Vector3dVector(point_cloud) - downsampled_pc = o3d_pc.voxel_down_sample(voxel_size) - return np.asarray(downsampled_pc.points) - - @staticmethod - def transform_point_cloud(points, pose_mat): - points_h = np.concatenate([points, np.ones((points.shape[0], 1))], axis=1) - points_h = np.dot(pose_mat, points_h.T).T - return points_h[:, :3] - - @staticmethod - def random_downsample_point_cloud(point_cloud, num_points): - idx = np.random.choice(len(point_cloud), num_points, replace=True) - return point_cloud[idx] \ No newline at end of file diff --git a/reconstruction.py b/reconstruction.py deleted file mode 100644 index e7bb6ea..0000000 --- a/reconstruction.py +++ /dev/null @@ -1,119 +0,0 @@ -import numpy as np -from scipy.spatial import cKDTree -from pts import PtsUtil - -class ReconstructionUtil: - - @staticmethod - def compute_coverage_rate(target_point_cloud, combined_point_cloud, threshold=0.01): - kdtree = cKDTree(combined_point_cloud) - distances, _ = kdtree.query(target_point_cloud) - covered_points = np.sum(distances < threshold) - coverage_rate = covered_points / target_point_cloud.shape[0] - return coverage_rate - - @staticmethod - def compute_overlap_rate(new_point_cloud, combined_point_cloud, threshold=0.01): - kdtree = cKDTree(combined_point_cloud) - distances, _ = kdtree.query(new_point_cloud) - overlapping_points = np.sum(distances < threshold) - overlap_rate = overlapping_points / new_point_cloud.shape[0] - return overlap_rate - - @staticmethod - def combine_point_with_view_sequence(point_list, view_sequence): - selected_views = [] - for view_index, _ in view_sequence: - selected_views.append(point_list[view_index]) - return np.vstack(selected_views) - - @staticmethod - def compute_next_view_coverage_list(views, combined_point_cloud, target_point_cloud, threshold=0.01): - best_view = None - best_coverage_increase = -1 - current_coverage = ReconstructionUtil.compute_coverage_rate(target_point_cloud, combined_point_cloud, threshold) - - for view_index, view in enumerate(views): - candidate_views = combined_point_cloud + [view] - down_sampled_combined_point_cloud = PtsUtil.voxel_downsample_point_cloud(candidate_views, threshold) - new_coverage = ReconstructionUtil.compute_coverage_rate(target_point_cloud, down_sampled_combined_point_cloud, threshold) - coverage_increase = new_coverage - current_coverage - if coverage_increase > best_coverage_increase: - best_coverage_increase = coverage_increase - best_view = view_index - return best_view, best_coverage_increase - - - @staticmethod - def compute_next_best_view_sequence_with_overlap(target_point_cloud, point_cloud_list, display_table_point_cloud_list = None,threshold=0.01, overlap_threshold=0.3, status_info=None): - selected_views = [] - current_coverage = 0.0 - remaining_views = list(range(len(point_cloud_list))) - view_sequence = [] - cnt_processed_view = 0 - while remaining_views: - best_view = None - best_coverage_increase = -1 - - for view_index in remaining_views: - - if selected_views: - combined_old_point_cloud = np.vstack(selected_views) - down_sampled_old_point_cloud = PtsUtil.voxel_downsample_point_cloud(combined_old_point_cloud,threshold) - down_sampled_new_view_point_cloud = PtsUtil.voxel_downsample_point_cloud(point_cloud_list[view_index],threshold) - overlap_rate = ReconstructionUtil.compute_overlap_rate(down_sampled_new_view_point_cloud,down_sampled_old_point_cloud, threshold) - if overlap_rate < overlap_threshold: - continue - - candidate_views = selected_views + [point_cloud_list[view_index]] - combined_point_cloud = np.vstack(candidate_views) - down_sampled_combined_point_cloud = PtsUtil.voxel_downsample_point_cloud(combined_point_cloud,threshold) - new_coverage = ReconstructionUtil.compute_coverage_rate(target_point_cloud, down_sampled_combined_point_cloud, threshold) - coverage_increase = new_coverage - current_coverage - #print(f"view_index: {view_index}, coverage_increase: {coverage_increase}") - if coverage_increase > best_coverage_increase: - best_coverage_increase = coverage_increase - best_view = view_index - - - if best_view is not None: - if best_coverage_increase <=1e-3: - break - selected_views.append(point_cloud_list[best_view]) - remaining_views.remove(best_view) - current_coverage += best_coverage_increase - cnt_processed_view += 1 - if status_info is not None: - sm = status_info["status_manager"] - app_name = status_info["app_name"] - runner_name = status_info["runner_name"] - sm.set_status(app_name, runner_name, "current coverage", current_coverage) - sm.set_progress(app_name, runner_name, "processed view", cnt_processed_view, len(point_cloud_list)) - - view_sequence.append((best_view, current_coverage)) - - else: - break - if status_info is not None: - sm = status_info["status_manager"] - app_name = status_info["app_name"] - runner_name = status_info["runner_name"] - sm.set_progress(app_name, runner_name, "processed view", len(point_cloud_list), len(point_cloud_list)) - return view_sequence, remaining_views, down_sampled_combined_point_cloud - - @staticmethod - def filter_points(points, points_normals, cam_pose, voxel_size=0.005, theta=45): - sampled_points = PtsUtil.voxel_downsample_point_cloud(points, voxel_size) - kdtree = cKDTree(points_normals[:,:3]) - _, indices = kdtree.query(sampled_points) - nearest_points = points_normals[indices] - - normals = nearest_points[:, 3:] - camera_axis = -cam_pose[:3, 2] - normals_normalized = normals / np.linalg.norm(normals, axis=1, keepdims=True) - cos_theta = np.dot(normals_normalized, camera_axis) - theta_rad = np.deg2rad(theta) - filtered_sampled_points= sampled_points[cos_theta > np.cos(theta_rad)] - - return filtered_sampled_points[:, :3] - \ No newline at end of file diff --git a/run_blender.py b/run_blender.py index 3b9ad5b..6bdcb14 100644 --- a/run_blender.py +++ b/run_blender.py @@ -1,10 +1,10 @@ import os import sys -sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -sys.path.append("/home/hofee/.local/lib/python3.11/site-packages") import yaml -from blender.data_generator import DataGenerator + +sys.path.append(os.path.dirname(os.path.abspath(__file__))) +from data_generator import DataGenerator if __name__ == "__main__": config_path = sys.argv[sys.argv.index('--') + 1] diff --git a/blender_util.py b/utils/blender_util.py similarity index 97% rename from blender_util.py rename to utils/blender_util.py index 25ddf9b..47d016a 100644 --- a/blender_util.py +++ b/utils/blender_util.py @@ -1,391 +1,391 @@ -import os -import json -import bpy -import time -import gc -import numpy as np -import mathutils - - -class BlenderUtils: - - TABLE_NAME: str = "table" - CAMERA_NAME: str = "Camera" - CAMERA_RIGHT_NAME: str = "CameraRight" - CAMERA_OBJECT_NAME: str = "CameraObject" - DISPLAY_TABLE_NAME: str = "display_table" - MESH_FILE_NAME: str = "mesh.obj" - - @staticmethod - def get_obj_path(obj_dir, name): - return os.path.join(obj_dir, name, BlenderUtils.MESH_FILE_NAME) - - @staticmethod - def load_obj(name, mesh_path, scale=1): - print(mesh_path) - bpy.ops.wm.obj_import(filepath=mesh_path) - loaded_object = bpy.context.selected_objects[-1] - loaded_object.name = name - loaded_object.data.name = name - loaded_object.scale = (scale, scale, scale) - bpy.ops.rigidbody.object_add() - return loaded_object - - @staticmethod - def get_obj(name): - return bpy.data.objects.get(name) - - @staticmethod - def get_obj_pose(name): - obj = BlenderUtils.get_obj(name) - return np.asarray(obj.matrix_world) - - @staticmethod - def add_plane(name, location, orientation, size=10): - bpy.ops.mesh.primitive_plane_add(size=size, location=location) - plane = bpy.context.selected_objects[-1] - plane.name = name - plane.rotation_euler = orientation - bpy.ops.rigidbody.object_add() - bpy.context.object.rigid_body.type = "PASSIVE" - - @staticmethod - def add_table(table_model_path): - table = BlenderUtils.load_obj( - BlenderUtils.TABLE_NAME, table_model_path, scale=0.01 - ) - bpy.ops.rigidbody.object_add() - bpy.context.object.rigid_body.type = "PASSIVE" - - mat = bpy.data.materials.new(name="TableYellowMaterial") - mat.diffuse_color = (1.0, 1.0, 0.0, 1.0) - if len(table.data.materials) > 0: - table.data.materials[0] = mat - else: - table.data.materials.append(mat) - - @staticmethod - def setup_scene(init_light_and_camera_config, table_model_path, binocular_vision): - bpy.context.scene.render.engine = "BLENDER_EEVEE_NEXT" - bpy.context.scene.display.shading.show_xray = False - bpy.context.scene.display.shading.use_dof = False - bpy.context.scene.display.render_aa = "OFF" - bpy.context.scene.view_settings.view_transform = "Standard" - - bpy.context.scene.eevee.use_ssr = False # 关闭屏幕空间反射 - bpy.context.scene.eevee.use_bloom = False # 关闭辉光 - bpy.context.scene.eevee.use_gtao = False # 关闭环境光遮蔽 - bpy.context.scene.eevee.use_soft_shadows = False # 关闭软阴影 - bpy.context.scene.eevee.use_shadows = False # 关闭所有阴影 - bpy.context.scene.world.use_nodes = False # 如果你不需要环境光,关闭环境节点 - - # bpy.context.scene.eevee.use_sss = False # 关闭次表面散射 - - # 2. 设置最低的采样数 - bpy.context.scene.eevee.taa_render_samples = 1 - bpy.context.scene.eevee.taa_samples = 1 - BlenderUtils.init_light_and_camera( - init_light_and_camera_config, binocular_vision - ) - - BlenderUtils.add_plane("plane_floor", location=(0, 0, 0), orientation=(0, 0, 0)) - BlenderUtils.add_plane("plane_ceil", location=(0, 0, 10), orientation=(0, 0, 0)) - BlenderUtils.add_plane( - "plane_wall_1", location=(5, 0, 5), orientation=(0, np.pi / 2, 0) - ) - BlenderUtils.add_plane( - "plane_wall_2", location=(-5, 0, 5), orientation=(0, np.pi / 2, 0) - ) - BlenderUtils.add_plane( - "plane_wall_3", location=(0, 5, 5), orientation=(np.pi / 2, 0, 0) - ) - BlenderUtils.add_plane( - "plane_wall_4", location=(0, -5, 5), orientation=(np.pi / 2, 0, 0) - ) - - BlenderUtils.add_table(table_model_path) - - @staticmethod - def set_light_params(light, config): - light.location = config["location"] - light.rotation_euler = config["orientation"] - if light.type == "SUN": - light.data.energy = config["power"] - elif light.type == "POINT": - light.data.energy = config["power"] - - @staticmethod - def set_camera_params(camera, config, binocular_vision): - - camera_object = bpy.data.objects.new(BlenderUtils.CAMERA_OBJECT_NAME, None) - bpy.context.collection.objects.link(camera_object) - cameras = [bpy.data.objects.get("Camera")] - camera.location = [0, 0, 0] - camera.rotation_euler = [0, 0, 0] - camera.parent = camera_object - if binocular_vision: - left_camera = cameras[0] - right_camera = left_camera.copy() - right_camera.name = BlenderUtils.CAMERA_RIGHT_NAME - right_camera.data = left_camera.data.copy() - right_camera.data.name = BlenderUtils.CAMERA_RIGHT_NAME - bpy.context.collection.objects.link(right_camera) - right_camera.parent = camera_object - right_camera.location = [config["eye_distance"] / 2, 0, 0] - left_camera.location = [-config["eye_distance"] / 2, 0, 0] - binocular_angle = config["eye_angle"] - half_angle = np.radians(binocular_angle / 2) - - left_camera.rotation_euler[1] = -half_angle - right_camera.rotation_euler[1] = half_angle - cameras.append(right_camera) - - for camera in cameras: - camera.data.clip_start = config["near_plane"] - camera.data.clip_end = config["far_plane"] - - bpy.context.scene.render.resolution_x = config["resolution"][0] - bpy.context.scene.render.resolution_y = config["resolution"][1] - sensor_height = 24.0 - focal_length = sensor_height / ( - 2 * np.tan(np.radians(config["fov_vertical"]) / 2) - ) - camera.data.lens = focal_length - camera.data.sensor_width = ( - sensor_height * config["resolution"][0] / config["resolution"][1] - ) - camera.data.sensor_height = sensor_height - - @staticmethod - def init_light_and_camera(init_light_and_camera_config, binocular_vision): - - camera = BlenderUtils.get_obj(BlenderUtils.CAMERA_NAME) - BlenderUtils.set_camera_params( - camera, - init_light_and_camera_config[BlenderUtils.CAMERA_NAME], - binocular_vision, - ) - - @staticmethod - def get_obj_diag(name): - obj = BlenderUtils.get_obj(name) - return np.linalg.norm(obj.dimensions) - - @staticmethod - def matrix_to_blender_pose(matrix): - location = matrix[:3, 3] - rotation_matrix = matrix[:3, :3] - rotation_matrix_blender = mathutils.Matrix(rotation_matrix.tolist()) - rotation_euler = rotation_matrix_blender.to_euler() - return location, rotation_euler - - @staticmethod - def set_camera_at(pose): - camera = BlenderUtils.get_obj(BlenderUtils.CAMERA_OBJECT_NAME) - location, rotation_euler = BlenderUtils.matrix_to_blender_pose(pose) - - camera.location = location - camera.rotation_euler = rotation_euler - - @staticmethod - def get_object_bottom_z(obj): - vertices = [v.co for v in obj.data.vertices] - vertices_world = [obj.matrix_world @ v for v in vertices] - min_z = min([v.z for v in vertices_world]) - return min_z - - @staticmethod - def render_normal_and_depth( - output_dir, file_name, binocular_vision=False, target_object=None - ): - target_cameras = [BlenderUtils.CAMERA_NAME] - if binocular_vision: - target_cameras.append(BlenderUtils.CAMERA_RIGHT_NAME) - - for cam_name in target_cameras: - bpy.context.scene.camera = BlenderUtils.get_obj(cam_name) - cam_suffix = "L" if cam_name == BlenderUtils.CAMERA_NAME else "R" - scene = bpy.context.scene - scene.render.filepath = "" - - mask_dir = os.path.join(output_dir, "normal") - if not os.path.exists(mask_dir): - os.makedirs(mask_dir) - - scene.render.filepath = os.path.join( - output_dir, mask_dir, f"{file_name}_{cam_suffix}.png" - ) - scene.render.image_settings.color_depth = "8" - scene.render.resolution_percentage = 100 - scene.render.use_overwrite = False - scene.render.use_file_extension = False - scene.render.use_placeholder = False - scene.use_nodes = True - tree = scene.node_tree - - for node in tree.nodes: - tree.nodes.remove(node) - - rl = tree.nodes.new("CompositorNodeRLayers") - - map_range = tree.nodes.new("CompositorNodeMapRange") - map_range.inputs["From Min"].default_value = 0.01 - map_range.inputs["From Max"].default_value = 5 - map_range.inputs["To Min"].default_value = 0 - map_range.inputs["To Max"].default_value = 1 - tree.links.new(rl.outputs["Depth"], map_range.inputs[0]) - - output_depth = tree.nodes.new("CompositorNodeOutputFile") - - depth_dir = os.path.join(output_dir, "depth") - if not os.path.exists(depth_dir): - os.makedirs(depth_dir) - output_depth.base_path = depth_dir - output_depth.file_slots[0].path = f"{file_name}_{cam_suffix}.####" - output_depth.format.file_format = "PNG" - output_depth.format.color_mode = "BW" - output_depth.format.color_depth = "16" - tree.links.new(map_range.outputs[0], output_depth.inputs[0]) - bpy.ops.render.render(write_still=True) - - msg = "success" - return msg - - @staticmethod - def render_mask( - output_dir, file_name, binocular_vision=False, target_object=None - ): - target_cameras = [BlenderUtils.CAMERA_NAME] - if binocular_vision: - target_cameras.append(BlenderUtils.CAMERA_RIGHT_NAME) - # use pass z - bpy.context.scene.view_layers["ViewLayer"].use_pass_z = True - for cam_name in target_cameras: - bpy.context.scene.camera = BlenderUtils.get_obj(cam_name) - cam_suffix = "L" if cam_name == BlenderUtils.CAMERA_NAME else "R" - scene = bpy.context.scene - scene.render.filepath = "" - - mask_dir = os.path.join(output_dir, "mask") - if not os.path.exists(mask_dir): - os.makedirs(mask_dir) - - scene.render.filepath = os.path.join( - output_dir, mask_dir, f"{file_name}_{cam_suffix}.png" - ) - scene.render.image_settings.color_depth = "8" - scene.render.resolution_percentage = 100 - scene.render.use_overwrite = False - scene.render.use_file_extension = False - scene.render.use_placeholder = False - - - bpy.ops.render.render(write_still=True) - - msg = "success" - return msg - - @staticmethod - def save_cam_params(scene_dir, idx, binocular_vision=False): - camera = BlenderUtils.get_obj(BlenderUtils.CAMERA_NAME) - extrinsic = np.array(camera.matrix_world) - cam_data = camera.data - focal_length = cam_data.lens - sensor_width = cam_data.sensor_width - sensor_height = cam_data.sensor_height - resolution_x = bpy.context.scene.render.resolution_x - resolution_y = bpy.context.scene.render.resolution_y - intrinsic = np.zeros((3, 3)) - intrinsic[0, 0] = focal_length * resolution_x / sensor_width # fx - intrinsic[1, 1] = focal_length * resolution_y / sensor_height # fy - intrinsic[0, 2] = resolution_x / 2.0 # cx - intrinsic[1, 2] = resolution_y / 2.0 # cy - intrinsic[2, 2] = 1.0 - cam_object = BlenderUtils.get_obj(BlenderUtils.CAMERA_OBJECT_NAME) - extrinsic_cam_object = np.array(cam_object.matrix_world) - data = { - "extrinsic": extrinsic.tolist(), - "extrinsic_cam_object": extrinsic_cam_object.tolist(), - "intrinsic": intrinsic.tolist(), - "far_plane": camera.data.clip_end, - "near_plane": camera.data.clip_start, - } - if binocular_vision: - right_camera = BlenderUtils.get_obj(BlenderUtils.CAMERA_RIGHT_NAME) - extrinsic_right = np.array(right_camera.matrix_world) - print("result:", extrinsic_right) - - data["extrinsic_R"] = extrinsic_right.tolist() - - cam_params_dir = os.path.join(scene_dir, "camera_params") - if not os.path.exists(cam_params_dir): - os.makedirs(cam_params_dir) - cam_params_path = os.path.join(cam_params_dir, f"{idx}.json") - with open(cam_params_path, "w") as f: - json.dump(data, f, indent=4) - - @staticmethod - def reset_objects_and_platform(): - all_objects = bpy.data.objects - keep_objects = { - "plane_floor", - "plane_ceil", - "plane_wall_1", - "plane_wall_2", - "plane_wall_3", - "plane_wall_4", - } - keep_objects.add(BlenderUtils.CAMERA_OBJECT_NAME) - keep_objects.add(BlenderUtils.CAMERA_NAME) - keep_objects.add(BlenderUtils.CAMERA_RIGHT_NAME) - keep_objects.add(BlenderUtils.TABLE_NAME) - - for obj in all_objects: - if obj.name not in keep_objects: - bpy.data.objects.remove(obj, do_unlink=True) - - for block in bpy.data.meshes: - if block.users == 0: - bpy.data.meshes.remove(block) - for block in bpy.data.materials: - if block.users == 0: - bpy.data.materials.remove(block) - for block in bpy.data.images: - if block.users == 0: - bpy.data.images.remove(block) - - gc.collect() - bpy.context.scene.frame_set(0) - - @staticmethod - def save_scene_info(scene_root_dir, display_table_config, target_name): - all_objects = bpy.data.objects - no_save_objects = { - "plane_floor", - "plane_ceil", - "plane_wall_1", - "plane_wall_2", - "plane_wall_3", - "plane_wall_4", - } - no_save_objects.add(BlenderUtils.CAMERA_OBJECT_NAME) - no_save_objects.add(BlenderUtils.CAMERA_NAME) - no_save_objects.add(BlenderUtils.CAMERA_RIGHT_NAME) - no_save_objects.add(BlenderUtils.TABLE_NAME) - scene_info = {} - for obj in all_objects: - if ( - obj.name not in no_save_objects - and obj.name != BlenderUtils.DISPLAY_TABLE_NAME - ): - obj_info = { - "location": list(obj.location), - "rotation_euler": list(obj.rotation_euler), - "scale": list(obj.scale), - } - scene_info[obj.name] = obj_info - scene_info[BlenderUtils.DISPLAY_TABLE_NAME] = display_table_config - scene_info["target_name"] = target_name - scene_info_path = os.path.join(scene_root_dir, "scene_info.json") - with open(scene_info_path, "w") as outfile: - json.dump(scene_info, outfile) +import os +import json +import bpy +import time +import gc +import numpy as np +import mathutils + + +class BlenderUtils: + + TABLE_NAME: str = "table" + CAMERA_NAME: str = "Camera" + CAMERA_RIGHT_NAME: str = "CameraRight" + CAMERA_OBJECT_NAME: str = "CameraObject" + DISPLAY_TABLE_NAME: str = "display_table" + MESH_FILE_NAME: str = "mesh.obj" + + @staticmethod + def get_obj_path(obj_dir, name): + return os.path.join(obj_dir, name, BlenderUtils.MESH_FILE_NAME) + + @staticmethod + def load_obj(name, mesh_path, scale=1): + print(mesh_path) + bpy.ops.wm.obj_import(filepath=mesh_path) + loaded_object = bpy.context.selected_objects[-1] + loaded_object.name = name + loaded_object.data.name = name + loaded_object.scale = (scale, scale, scale) + bpy.ops.rigidbody.object_add() + return loaded_object + + @staticmethod + def get_obj(name): + return bpy.data.objects.get(name) + + @staticmethod + def get_obj_pose(name): + obj = BlenderUtils.get_obj(name) + return np.asarray(obj.matrix_world) + + @staticmethod + def add_plane(name, location, orientation, size=10): + bpy.ops.mesh.primitive_plane_add(size=size, location=location) + plane = bpy.context.selected_objects[-1] + plane.name = name + plane.rotation_euler = orientation + bpy.ops.rigidbody.object_add() + bpy.context.object.rigid_body.type = "PASSIVE" + + @staticmethod + def add_table(table_model_path): + table = BlenderUtils.load_obj( + BlenderUtils.TABLE_NAME, table_model_path, scale=0.01 + ) + bpy.ops.rigidbody.object_add() + bpy.context.object.rigid_body.type = "PASSIVE" + + mat = bpy.data.materials.new(name="TableYellowMaterial") + mat.diffuse_color = (1.0, 1.0, 0.0, 1.0) + if len(table.data.materials) > 0: + table.data.materials[0] = mat + else: + table.data.materials.append(mat) + + @staticmethod + def setup_scene(init_light_and_camera_config, table_model_path, binocular_vision): + bpy.context.scene.render.engine = "BLENDER_EEVEE_NEXT" + bpy.context.scene.display.shading.show_xray = False + bpy.context.scene.display.shading.use_dof = False + bpy.context.scene.display.render_aa = "OFF" + bpy.context.scene.view_settings.view_transform = "Standard" + + bpy.context.scene.eevee.use_ssr = False # 关闭屏幕空间反射 + bpy.context.scene.eevee.use_bloom = False # 关闭辉光 + bpy.context.scene.eevee.use_gtao = False # 关闭环境光遮蔽 + bpy.context.scene.eevee.use_soft_shadows = False # 关闭软阴影 + bpy.context.scene.eevee.use_shadows = False # 关闭所有阴影 + bpy.context.scene.world.use_nodes = False # 如果你不需要环境光,关闭环境节点 + + # bpy.context.scene.eevee.use_sss = False # 关闭次表面散射 + + # 2. 设置最低的采样数 + bpy.context.scene.eevee.taa_render_samples = 1 + bpy.context.scene.eevee.taa_samples = 1 + BlenderUtils.init_light_and_camera( + init_light_and_camera_config, binocular_vision + ) + + BlenderUtils.add_plane("plane_floor", location=(0, 0, 0), orientation=(0, 0, 0)) + BlenderUtils.add_plane("plane_ceil", location=(0, 0, 10), orientation=(0, 0, 0)) + BlenderUtils.add_plane( + "plane_wall_1", location=(5, 0, 5), orientation=(0, np.pi / 2, 0) + ) + BlenderUtils.add_plane( + "plane_wall_2", location=(-5, 0, 5), orientation=(0, np.pi / 2, 0) + ) + BlenderUtils.add_plane( + "plane_wall_3", location=(0, 5, 5), orientation=(np.pi / 2, 0, 0) + ) + BlenderUtils.add_plane( + "plane_wall_4", location=(0, -5, 5), orientation=(np.pi / 2, 0, 0) + ) + + BlenderUtils.add_table(table_model_path) + + @staticmethod + def set_light_params(light, config): + light.location = config["location"] + light.rotation_euler = config["orientation"] + if light.type == "SUN": + light.data.energy = config["power"] + elif light.type == "POINT": + light.data.energy = config["power"] + + @staticmethod + def set_camera_params(camera, config, binocular_vision): + + camera_object = bpy.data.objects.new(BlenderUtils.CAMERA_OBJECT_NAME, None) + bpy.context.collection.objects.link(camera_object) + cameras = [bpy.data.objects.get("Camera")] + camera.location = [0, 0, 0] + camera.rotation_euler = [0, 0, 0] + camera.parent = camera_object + if binocular_vision: + left_camera = cameras[0] + right_camera = left_camera.copy() + right_camera.name = BlenderUtils.CAMERA_RIGHT_NAME + right_camera.data = left_camera.data.copy() + right_camera.data.name = BlenderUtils.CAMERA_RIGHT_NAME + bpy.context.collection.objects.link(right_camera) + right_camera.parent = camera_object + right_camera.location = [config["eye_distance"] / 2, 0, 0] + left_camera.location = [-config["eye_distance"] / 2, 0, 0] + binocular_angle = config["eye_angle"] + half_angle = np.radians(binocular_angle / 2) + + left_camera.rotation_euler[1] = -half_angle + right_camera.rotation_euler[1] = half_angle + cameras.append(right_camera) + + for camera in cameras: + camera.data.clip_start = config["near_plane"] + camera.data.clip_end = config["far_plane"] + + bpy.context.scene.render.resolution_x = config["resolution"][0] + bpy.context.scene.render.resolution_y = config["resolution"][1] + sensor_height = 24.0 + focal_length = sensor_height / ( + 2 * np.tan(np.radians(config["fov_vertical"]) / 2) + ) + camera.data.lens = focal_length + camera.data.sensor_width = ( + sensor_height * config["resolution"][0] / config["resolution"][1] + ) + camera.data.sensor_height = sensor_height + + @staticmethod + def init_light_and_camera(init_light_and_camera_config, binocular_vision): + + camera = BlenderUtils.get_obj(BlenderUtils.CAMERA_NAME) + BlenderUtils.set_camera_params( + camera, + init_light_and_camera_config[BlenderUtils.CAMERA_NAME], + binocular_vision, + ) + + @staticmethod + def get_obj_diag(name): + obj = BlenderUtils.get_obj(name) + return np.linalg.norm(obj.dimensions) + + @staticmethod + def matrix_to_blender_pose(matrix): + location = matrix[:3, 3] + rotation_matrix = matrix[:3, :3] + rotation_matrix_blender = mathutils.Matrix(rotation_matrix.tolist()) + rotation_euler = rotation_matrix_blender.to_euler() + return location, rotation_euler + + @staticmethod + def set_camera_at(pose): + camera = BlenderUtils.get_obj(BlenderUtils.CAMERA_OBJECT_NAME) + location, rotation_euler = BlenderUtils.matrix_to_blender_pose(pose) + + camera.location = location + camera.rotation_euler = rotation_euler + + @staticmethod + def get_object_bottom_z(obj): + vertices = [v.co for v in obj.data.vertices] + vertices_world = [obj.matrix_world @ v for v in vertices] + min_z = min([v.z for v in vertices_world]) + return min_z + + @staticmethod + def render_normal_and_depth( + output_dir, file_name, binocular_vision=False, target_object=None + ): + target_cameras = [BlenderUtils.CAMERA_NAME] + if binocular_vision: + target_cameras.append(BlenderUtils.CAMERA_RIGHT_NAME) + + for cam_name in target_cameras: + bpy.context.scene.camera = BlenderUtils.get_obj(cam_name) + cam_suffix = "L" if cam_name == BlenderUtils.CAMERA_NAME else "R" + scene = bpy.context.scene + scene.render.filepath = "" + + mask_dir = os.path.join(output_dir, "normal") + if not os.path.exists(mask_dir): + os.makedirs(mask_dir) + + scene.render.filepath = os.path.join( + output_dir, mask_dir, f"{file_name}_{cam_suffix}.png" + ) + scene.render.image_settings.color_depth = "8" + scene.render.resolution_percentage = 100 + scene.render.use_overwrite = False + scene.render.use_file_extension = False + scene.render.use_placeholder = False + scene.use_nodes = True + tree = scene.node_tree + + for node in tree.nodes: + tree.nodes.remove(node) + + rl = tree.nodes.new("CompositorNodeRLayers") + + map_range = tree.nodes.new("CompositorNodeMapRange") + map_range.inputs["From Min"].default_value = 0.01 + map_range.inputs["From Max"].default_value = 5 + map_range.inputs["To Min"].default_value = 0 + map_range.inputs["To Max"].default_value = 1 + tree.links.new(rl.outputs["Depth"], map_range.inputs[0]) + + output_depth = tree.nodes.new("CompositorNodeOutputFile") + + depth_dir = os.path.join(output_dir, "depth") + if not os.path.exists(depth_dir): + os.makedirs(depth_dir) + output_depth.base_path = depth_dir + output_depth.file_slots[0].path = f"{file_name}_{cam_suffix}.####" + output_depth.format.file_format = "PNG" + output_depth.format.color_mode = "BW" + output_depth.format.color_depth = "16" + tree.links.new(map_range.outputs[0], output_depth.inputs[0]) + bpy.ops.render.render(write_still=True) + + msg = "success" + return msg + + @staticmethod + def render_mask( + output_dir, file_name, binocular_vision=False, target_object=None + ): + target_cameras = [BlenderUtils.CAMERA_NAME] + if binocular_vision: + target_cameras.append(BlenderUtils.CAMERA_RIGHT_NAME) + # use pass z + bpy.context.scene.view_layers["ViewLayer"].use_pass_z = True + for cam_name in target_cameras: + bpy.context.scene.camera = BlenderUtils.get_obj(cam_name) + cam_suffix = "L" if cam_name == BlenderUtils.CAMERA_NAME else "R" + scene = bpy.context.scene + scene.render.filepath = "" + + mask_dir = os.path.join(output_dir, "mask") + if not os.path.exists(mask_dir): + os.makedirs(mask_dir) + + scene.render.filepath = os.path.join( + output_dir, mask_dir, f"{file_name}_{cam_suffix}.png" + ) + scene.render.image_settings.color_depth = "8" + scene.render.resolution_percentage = 100 + scene.render.use_overwrite = False + scene.render.use_file_extension = False + scene.render.use_placeholder = False + + + bpy.ops.render.render(write_still=True) + + msg = "success" + return msg + + @staticmethod + def save_cam_params(scene_dir, idx, binocular_vision=False): + camera = BlenderUtils.get_obj(BlenderUtils.CAMERA_NAME) + extrinsic = np.array(camera.matrix_world) + cam_data = camera.data + focal_length = cam_data.lens + sensor_width = cam_data.sensor_width + sensor_height = cam_data.sensor_height + resolution_x = bpy.context.scene.render.resolution_x + resolution_y = bpy.context.scene.render.resolution_y + intrinsic = np.zeros((3, 3)) + intrinsic[0, 0] = focal_length * resolution_x / sensor_width # fx + intrinsic[1, 1] = focal_length * resolution_y / sensor_height # fy + intrinsic[0, 2] = resolution_x / 2.0 # cx + intrinsic[1, 2] = resolution_y / 2.0 # cy + intrinsic[2, 2] = 1.0 + cam_object = BlenderUtils.get_obj(BlenderUtils.CAMERA_OBJECT_NAME) + extrinsic_cam_object = np.array(cam_object.matrix_world) + data = { + "extrinsic": extrinsic.tolist(), + "extrinsic_cam_object": extrinsic_cam_object.tolist(), + "intrinsic": intrinsic.tolist(), + "far_plane": camera.data.clip_end, + "near_plane": camera.data.clip_start, + } + if binocular_vision: + right_camera = BlenderUtils.get_obj(BlenderUtils.CAMERA_RIGHT_NAME) + extrinsic_right = np.array(right_camera.matrix_world) + print("result:", extrinsic_right) + + data["extrinsic_R"] = extrinsic_right.tolist() + + cam_params_dir = os.path.join(scene_dir, "camera_params") + if not os.path.exists(cam_params_dir): + os.makedirs(cam_params_dir) + cam_params_path = os.path.join(cam_params_dir, f"{idx}.json") + with open(cam_params_path, "w") as f: + json.dump(data, f, indent=4) + + @staticmethod + def reset_objects_and_platform(): + all_objects = bpy.data.objects + keep_objects = { + "plane_floor", + "plane_ceil", + "plane_wall_1", + "plane_wall_2", + "plane_wall_3", + "plane_wall_4", + } + keep_objects.add(BlenderUtils.CAMERA_OBJECT_NAME) + keep_objects.add(BlenderUtils.CAMERA_NAME) + keep_objects.add(BlenderUtils.CAMERA_RIGHT_NAME) + keep_objects.add(BlenderUtils.TABLE_NAME) + + for obj in all_objects: + if obj.name not in keep_objects: + bpy.data.objects.remove(obj, do_unlink=True) + + for block in bpy.data.meshes: + if block.users == 0: + bpy.data.meshes.remove(block) + for block in bpy.data.materials: + if block.users == 0: + bpy.data.materials.remove(block) + for block in bpy.data.images: + if block.users == 0: + bpy.data.images.remove(block) + + gc.collect() + bpy.context.scene.frame_set(0) + + @staticmethod + def save_scene_info(scene_root_dir, display_table_config, target_name): + all_objects = bpy.data.objects + no_save_objects = { + "plane_floor", + "plane_ceil", + "plane_wall_1", + "plane_wall_2", + "plane_wall_3", + "plane_wall_4", + } + no_save_objects.add(BlenderUtils.CAMERA_OBJECT_NAME) + no_save_objects.add(BlenderUtils.CAMERA_NAME) + no_save_objects.add(BlenderUtils.CAMERA_RIGHT_NAME) + no_save_objects.add(BlenderUtils.TABLE_NAME) + scene_info = {} + for obj in all_objects: + if ( + obj.name not in no_save_objects + and obj.name != BlenderUtils.DISPLAY_TABLE_NAME + ): + obj_info = { + "location": list(obj.location), + "rotation_euler": list(obj.rotation_euler), + "scale": list(obj.scale), + } + scene_info[obj.name] = obj_info + scene_info[BlenderUtils.DISPLAY_TABLE_NAME] = display_table_config + scene_info["target_name"] = target_name + scene_info_path = os.path.join(scene_root_dir, "scene_info.json") + with open(scene_info_path, "w") as outfile: + json.dump(scene_info, outfile) \ No newline at end of file diff --git a/utils/material_util.py b/utils/material_util.py new file mode 100644 index 0000000..a77eca2 --- /dev/null +++ b/utils/material_util.py @@ -0,0 +1,96 @@ +import bpy + +class MaterialUtil: + + ''' --------- Basic --------- ''' + @staticmethod + def change_object_material(obj, mat): + if obj.data.materials: + obj.data.materials[0] = mat + else: + obj.data.materials.append(mat) + + ''' ------- Materials ------- ''' + @staticmethod + def create_normal_material(): + normal_mat = bpy.data.materials.new(name="NormalMaterial") + normal_mat.use_nodes = True + + nodes = normal_mat.node_tree.nodes + links = normal_mat.node_tree.links + + nodes.clear() + + geometry_node = nodes.new(type="ShaderNodeNewGeometry") + vector_transform_node = nodes.new(type="ShaderNodeVectorTransform") + separate_xyz_node = nodes.new(type="ShaderNodeSeparateXYZ") + multiply_node_x = nodes.new(type="ShaderNodeMath") + multiply_node_y = nodes.new(type="ShaderNodeMath") + multiply_node_z = nodes.new(type="ShaderNodeMath") + combine_xyz_node = nodes.new(type="ShaderNodeCombineXYZ") + light_path_node = nodes.new(type="ShaderNodeLightPath") + emission_node_1 = nodes.new(type="ShaderNodeEmission") + emission_node_2 = nodes.new(type="ShaderNodeEmission") + mix_shader_node_1 = nodes.new(type="ShaderNodeMixShader") + mix_shader_node_2 = nodes.new(type="ShaderNodeMixShader") + material_output_node = nodes.new(type="ShaderNodeOutputMaterial") + + vector_transform_node.vector_type = 'VECTOR' + vector_transform_node.convert_from = 'WORLD' + vector_transform_node.convert_to = 'CAMERA' + + multiply_node_x.operation = 'MULTIPLY' + multiply_node_x.inputs[1].default_value = 1.0 + + multiply_node_y.operation = 'MULTIPLY' + multiply_node_y.inputs[1].default_value = 1.0 + + multiply_node_z.operation = 'MULTIPLY' + multiply_node_z.inputs[1].default_value = -1.0 + + emission_node_1.inputs['Strength'].default_value = 1.0 + emission_node_2.inputs['Strength'].default_value = 1.0 + + mix_shader_node_2.inputs['Fac'].default_value = 0.5 + + links.new(geometry_node.outputs['Normal'], vector_transform_node.inputs['Vector']) + links.new(vector_transform_node.outputs['Vector'], separate_xyz_node.inputs['Vector']) + links.new(separate_xyz_node.outputs['X'], multiply_node_x.inputs[0]) + links.new(separate_xyz_node.outputs['Y'], multiply_node_y.inputs[0]) + links.new(separate_xyz_node.outputs['Z'], multiply_node_z.inputs[0]) + links.new(multiply_node_x.outputs['Value'], combine_xyz_node.inputs['X']) + links.new(multiply_node_y.outputs['Value'], combine_xyz_node.inputs['Y']) + links.new(multiply_node_z.outputs['Value'], combine_xyz_node.inputs['Z']) + links.new(combine_xyz_node.outputs['Vector'], emission_node_1.inputs['Color']) + links.new(light_path_node.outputs['Is Camera Ray'], mix_shader_node_1.inputs['Fac']) + links.new(emission_node_1.outputs['Emission'], mix_shader_node_1.inputs[2]) + links.new(mix_shader_node_1.outputs['Shader'], mix_shader_node_2.inputs[1]) + links.new(emission_node_2.outputs['Emission'], mix_shader_node_2.inputs[2]) + links.new(mix_shader_node_2.outputs['Shader'], material_output_node.inputs['Surface']) + return normal_mat + + @staticmethod + def create_mask_material(color=(1.0, 1.0, 1.0)): + mask_mat = bpy.data.materials.new(name="MaskMaterial") + mask_mat.use_nodes = True + + nodes = mask_mat.node_tree.nodes + links = mask_mat.node_tree.links + + nodes.clear() + emission_node = nodes.new(type="ShaderNodeEmission") + emission_node.inputs['Color'].default_value = (*color, 1.0) + emission_node.inputs['Strength'].default_value = 1.0 + material_output_node = nodes.new(type="ShaderNodeOutputMaterial") + links.new(emission_node.outputs['Emission'], material_output_node.inputs['Surface']) + + return mask_mat + + + +# -------- debug -------- +if __name__ == "__main__": + cube = bpy.data.objects.get("Cube") + normal_mat = MaterialUtil.create_normal_material() + MaterialUtil.change_object_material(cube, normal_mat) + \ No newline at end of file diff --git a/pose.py b/utils/pose.py similarity index 97% rename from pose.py rename to utils/pose.py index ddaed4a..a9373b2 100644 --- a/pose.py +++ b/utils/pose.py @@ -1,151 +1,151 @@ -import numpy as np - -class PoseUtil: - ROTATION = 1 - TRANSLATION = 2 - SCALE = 3 - - @staticmethod - def get_uniform_translation(trans_m_min, trans_m_max, trans_unit, debug=False): - if isinstance(trans_m_min, list): - x_min, y_min, z_min = trans_m_min - x_max, y_max, z_max = trans_m_max - else: - x_min, y_min, z_min = trans_m_min, trans_m_min, trans_m_min - x_max, y_max, z_max = trans_m_max, trans_m_max, trans_m_max - - x = np.random.uniform(x_min, x_max) - y = np.random.uniform(y_min, y_max) - z = np.random.uniform(z_min, z_max) - translation = np.array([x, y, z]) - if trans_unit == "cm": - translation = translation / 100 - if debug: - print("uniform translation:", translation) - return translation - - @staticmethod - def get_uniform_rotation(rot_degree_min=0, rot_degree_max=180, debug=False): - axis = np.random.randn(3) - axis /= np.linalg.norm(axis) - theta = np.random.uniform( - rot_degree_min / 180 * np.pi, rot_degree_max / 180 * np.pi - ) - - K = np.array( - [[0, -axis[2], axis[1]], [axis[2], 0, -axis[0]], [-axis[1], axis[0], 0]] - ) - R = np.eye(3) + np.sin(theta) * K + (1 - np.cos(theta)) * (K @ K) - if debug: - print("uniform rotation:", theta * 180 / np.pi) - return R - - @staticmethod - def get_uniform_pose( - trans_min, trans_max, rot_min=0, rot_max=180, trans_unit="cm", debug=False - ): - translation = PoseUtil.get_uniform_translation( - trans_min, trans_max, trans_unit, debug - ) - rotation = PoseUtil.get_uniform_rotation(rot_min, rot_max, debug) - pose = np.eye(4) - pose[:3, :3] = rotation - pose[:3, 3] = translation - return pose - - @staticmethod - def get_n_uniform_pose( - trans_min, - trans_max, - rot_min=0, - rot_max=180, - n=1, - trans_unit="cm", - fix=None, - contain_canonical=True, - debug=False, - ): - if fix == PoseUtil.ROTATION: - translations = np.zeros((n, 3)) - for i in range(n): - translations[i] = PoseUtil.get_uniform_translation( - trans_min, trans_max, trans_unit, debug - ) - if contain_canonical: - translations[0] = np.zeros(3) - rotations = PoseUtil.get_uniform_rotation(rot_min, rot_max, debug) - elif fix == PoseUtil.TRANSLATION: - rotations = np.zeros((n, 3, 3)) - for i in range(n): - rotations[i] = PoseUtil.get_uniform_rotation(rot_min, rot_max, debug) - if contain_canonical: - rotations[0] = np.eye(3) - translations = PoseUtil.get_uniform_translation( - trans_min, trans_max, trans_unit, debug - ) - else: - translations = np.zeros((n, 3)) - rotations = np.zeros((n, 3, 3)) - for i in range(n): - translations[i] = PoseUtil.get_uniform_translation( - trans_min, trans_max, trans_unit, debug - ) - for i in range(n): - rotations[i] = PoseUtil.get_uniform_rotation(rot_min, rot_max, debug) - if contain_canonical: - translations[0] = np.zeros(3) - rotations[0] = np.eye(3) - - pose = np.eye(4, 4, k=0)[np.newaxis, :].repeat(n, axis=0) - pose[:, :3, :3] = rotations - pose[:, :3, 3] = translations - - return pose - - @staticmethod - def get_n_uniform_pose_batch( - trans_min, - trans_max, - rot_min=0, - rot_max=180, - n=1, - batch_size=1, - trans_unit="cm", - fix=None, - contain_canonical=False, - debug=False, - ): - - batch_poses = [] - for i in range(batch_size): - pose = PoseUtil.get_n_uniform_pose( - trans_min, - trans_max, - rot_min, - rot_max, - n, - trans_unit, - fix, - contain_canonical, - debug, - ) - batch_poses.append(pose) - pose_batch = np.stack(batch_poses, axis=0) - return pose_batch - - @staticmethod - def get_uniform_scale(scale_min, scale_max, debug=False): - if isinstance(scale_min, list): - x_min, y_min, z_min = scale_min - x_max, y_max, z_max = scale_max - else: - x_min, y_min, z_min = scale_min, scale_min, scale_min - x_max, y_max, z_max = scale_max, scale_max, scale_max - - x = np.random.uniform(x_min, x_max) - y = np.random.uniform(y_min, y_max) - z = np.random.uniform(z_min, z_max) - scale = np.array([x, y, z]) - if debug: - print("uniform scale:", scale) - return scale +import numpy as np + +class PoseUtil: + ROTATION = 1 + TRANSLATION = 2 + SCALE = 3 + + @staticmethod + def get_uniform_translation(trans_m_min, trans_m_max, trans_unit, debug=False): + if isinstance(trans_m_min, list): + x_min, y_min, z_min = trans_m_min + x_max, y_max, z_max = trans_m_max + else: + x_min, y_min, z_min = trans_m_min, trans_m_min, trans_m_min + x_max, y_max, z_max = trans_m_max, trans_m_max, trans_m_max + + x = np.random.uniform(x_min, x_max) + y = np.random.uniform(y_min, y_max) + z = np.random.uniform(z_min, z_max) + translation = np.array([x, y, z]) + if trans_unit == "cm": + translation = translation / 100 + if debug: + print("uniform translation:", translation) + return translation + + @staticmethod + def get_uniform_rotation(rot_degree_min=0, rot_degree_max=180, debug=False): + axis = np.random.randn(3) + axis /= np.linalg.norm(axis) + theta = np.random.uniform( + rot_degree_min / 180 * np.pi, rot_degree_max / 180 * np.pi + ) + + K = np.array( + [[0, -axis[2], axis[1]], [axis[2], 0, -axis[0]], [-axis[1], axis[0], 0]] + ) + R = np.eye(3) + np.sin(theta) * K + (1 - np.cos(theta)) * (K @ K) + if debug: + print("uniform rotation:", theta * 180 / np.pi) + return R + + @staticmethod + def get_uniform_pose( + trans_min, trans_max, rot_min=0, rot_max=180, trans_unit="cm", debug=False + ): + translation = PoseUtil.get_uniform_translation( + trans_min, trans_max, trans_unit, debug + ) + rotation = PoseUtil.get_uniform_rotation(rot_min, rot_max, debug) + pose = np.eye(4) + pose[:3, :3] = rotation + pose[:3, 3] = translation + return pose + + @staticmethod + def get_n_uniform_pose( + trans_min, + trans_max, + rot_min=0, + rot_max=180, + n=1, + trans_unit="cm", + fix=None, + contain_canonical=True, + debug=False, + ): + if fix == PoseUtil.ROTATION: + translations = np.zeros((n, 3)) + for i in range(n): + translations[i] = PoseUtil.get_uniform_translation( + trans_min, trans_max, trans_unit, debug + ) + if contain_canonical: + translations[0] = np.zeros(3) + rotations = PoseUtil.get_uniform_rotation(rot_min, rot_max, debug) + elif fix == PoseUtil.TRANSLATION: + rotations = np.zeros((n, 3, 3)) + for i in range(n): + rotations[i] = PoseUtil.get_uniform_rotation(rot_min, rot_max, debug) + if contain_canonical: + rotations[0] = np.eye(3) + translations = PoseUtil.get_uniform_translation( + trans_min, trans_max, trans_unit, debug + ) + else: + translations = np.zeros((n, 3)) + rotations = np.zeros((n, 3, 3)) + for i in range(n): + translations[i] = PoseUtil.get_uniform_translation( + trans_min, trans_max, trans_unit, debug + ) + for i in range(n): + rotations[i] = PoseUtil.get_uniform_rotation(rot_min, rot_max, debug) + if contain_canonical: + translations[0] = np.zeros(3) + rotations[0] = np.eye(3) + + pose = np.eye(4, 4, k=0)[np.newaxis, :].repeat(n, axis=0) + pose[:, :3, :3] = rotations + pose[:, :3, 3] = translations + + return pose + + @staticmethod + def get_n_uniform_pose_batch( + trans_min, + trans_max, + rot_min=0, + rot_max=180, + n=1, + batch_size=1, + trans_unit="cm", + fix=None, + contain_canonical=False, + debug=False, + ): + + batch_poses = [] + for i in range(batch_size): + pose = PoseUtil.get_n_uniform_pose( + trans_min, + trans_max, + rot_min, + rot_max, + n, + trans_unit, + fix, + contain_canonical, + debug, + ) + batch_poses.append(pose) + pose_batch = np.stack(batch_poses, axis=0) + return pose_batch + + @staticmethod + def get_uniform_scale(scale_min, scale_max, debug=False): + if isinstance(scale_min, list): + x_min, y_min, z_min = scale_min + x_max, y_max, z_max = scale_max + else: + x_min, y_min, z_min = scale_min, scale_min, scale_min + x_max, y_max, z_max = scale_max, scale_max, scale_max + + x = np.random.uniform(x_min, x_max) + y = np.random.uniform(y_min, y_max) + z = np.random.uniform(z_min, z_max) + scale = np.array([x, y, z]) + if debug: + print("uniform scale:", scale) + return scale diff --git a/view_sample_util.py b/utils/view_sample_util.py similarity index 97% rename from view_sample_util.py rename to utils/view_sample_util.py index 4e57f5c..c6836d7 100644 --- a/view_sample_util.py +++ b/utils/view_sample_util.py @@ -1,168 +1,168 @@ - -import numpy as np -import bmesh -from collections import defaultdict -from scipy.spatial.transform import Rotation as R -from blender.pose import PoseUtil -import random - -class ViewSampleUtil: - @staticmethod - def farthest_point_sampling(points, num_samples): - num_points = points.shape[0] - if num_samples >= num_points: - return points, np.arange(num_points) - sampled_indices = np.zeros(num_samples, dtype=int) - sampled_indices[0] = np.random.randint(num_points) - min_distances = np.full(num_points, np.inf) - for i in range(1, num_samples): - current_point = points[sampled_indices[i - 1]] - dist_to_current_point = np.linalg.norm(points - current_point, axis=1) - min_distances = np.minimum(min_distances, dist_to_current_point) - sampled_indices[i] = np.argmax(min_distances) - downsampled_points = points[sampled_indices] - return downsampled_points, sampled_indices - - @staticmethod - def voxel_downsample(points, voxel_size): - voxel_grid = defaultdict(list) - for i, point in enumerate(points): - voxel_index = tuple((point // voxel_size).astype(int)) - voxel_grid[voxel_index].append(i) - - downsampled_points = [] - downsampled_indices = [] - for indices in voxel_grid.values(): - selected_index = indices[0] - downsampled_points.append(points[selected_index]) - downsampled_indices.append(selected_index) - - return np.array(downsampled_points), downsampled_indices - - @staticmethod - def sample_view_data(obj, distance_range:tuple = (0.25,0.5), voxel_size:float = 0.005, max_views: int = 1, pertube_repeat:int = 1) -> dict: - view_data = { - "look_at_points": [], - "cam_positions": [], - } - mesh = obj.data - bm = bmesh.new() - bm.from_mesh(mesh) - bm.verts.ensure_lookup_table() - bm.faces.ensure_lookup_table() - bm.normal_update() - - look_at_points = [] - cam_positions = [] - normals = [] - for v in bm.verts: - look_at_point = np.array(v.co) - - view_data["look_at_points"].append(look_at_point) - normal = np.zeros(3) - for loop in v.link_loops: - normal += np.array(loop.calc_normal()) - normal /= len(v.link_loops) - normal = normal / np.linalg.norm(normal) - if np.isnan(normal).any(): - continue - if np.dot(normal, look_at_point) < 0: - normal = -normal - normals.append(normal) - - for _ in range(pertube_repeat): - perturb_angle = np.radians(np.random.uniform(0, 30)) - perturb_axis = np.random.normal(size=3) - perturb_axis /= np.linalg.norm(perturb_axis) - rotation_matrix = R.from_rotvec(perturb_angle * perturb_axis).as_matrix() - perturbed_normal = np.dot(rotation_matrix, normal) - middle_distance = (distance_range[0] + distance_range[1]) / 2 - perturbed_distance = random.uniform(middle_distance-0.05, middle_distance+0.05) - cam_position = look_at_point + perturbed_distance * perturbed_normal - look_at_points.append(look_at_point) - cam_positions.append(cam_position) - - - bm.free() - look_at_points = np.array(look_at_points) - cam_positions = np.array(cam_positions) - voxel_downsampled_look_at_points, selected_indices = ViewSampleUtil.voxel_downsample(look_at_points, voxel_size) - voxel_downsampled_cam_positions = cam_positions[selected_indices] - voxel_downsampled_normals = np.array(normals)[selected_indices] - - fps_downsampled_look_at_points, selected_indices = ViewSampleUtil.farthest_point_sampling(voxel_downsampled_look_at_points, max_views*2) - fps_downsampled_cam_positions = voxel_downsampled_cam_positions[selected_indices] - - view_data["look_at_points"] = fps_downsampled_look_at_points.tolist() - view_data["cam_positions"] = fps_downsampled_cam_positions.tolist() - view_data["normals"] = voxel_downsampled_normals - view_data["voxel_down_sampled_points"] = voxel_downsampled_look_at_points - return view_data - - @staticmethod - def get_world_points_and_normals(view_data: dict, obj_world_pose: np.ndarray) -> tuple: - world_points = [] - world_normals = [] - for voxel_down_sampled_points, normal in zip(view_data["voxel_down_sampled_points"], view_data["normals"]): - voxel_down_sampled_points_world = obj_world_pose @ np.append(voxel_down_sampled_points, 1.0) - normal_world = obj_world_pose[:3, :3] @ normal - world_points.append(voxel_down_sampled_points_world[:3]) - world_normals.append(normal_world) - return np.array(world_points), np.array(world_normals) - - @staticmethod - def get_cam_pose(view_data: dict, obj_world_pose: np.ndarray, max_views: int, min_cam_table_included_degree: int, random_view_ratio: float) -> np.ndarray: - cam_poses = [] - min_height_z = 1000 - for look_at_point, cam_position in zip(view_data["look_at_points"], view_data["cam_positions"]): - look_at_point_world = obj_world_pose @ np.append(look_at_point, 1.0) - cam_position_world = obj_world_pose @ np.append(cam_position, 1.0) - if look_at_point_world[2] < min_height_z: - min_height_z = look_at_point_world[2] - look_at_point_world = look_at_point_world[:3] - cam_position_world = cam_position_world[:3] - - forward_vector = cam_position_world - look_at_point_world - forward_vector /= np.linalg.norm(forward_vector) - - up_vector = np.array([0, 0, 1]) - - right_vector = np.cross(up_vector, forward_vector) - right_vector /= np.linalg.norm(right_vector) - - corrected_up_vector = np.cross(forward_vector, right_vector) - rotation_matrix = np.array([right_vector, corrected_up_vector, forward_vector]).T - - cam_pose = np.eye(4) - cam_pose[:3, :3] = rotation_matrix - cam_pose[:3, 3] = cam_position_world - cam_poses.append(cam_pose) - - filtered_cam_poses = [] - for cam_pose in cam_poses: - if cam_pose[2, 3] > min_height_z: - direction_vector = cam_pose[:3, 2] - horizontal_normal = np.array([0, 0, 1]) - cos_angle = np.dot(direction_vector, horizontal_normal) / (np.linalg.norm(direction_vector) * np.linalg.norm(horizontal_normal)) - angle = np.arccos(np.clip(cos_angle, -1.0, 1.0)) - angle_degree = np.degrees(angle) - if angle_degree < 90 - min_cam_table_included_degree: - filtered_cam_poses.append(cam_pose) - if random.random() < random_view_ratio: - pertube_pose = PoseUtil.get_uniform_pose([0.1, 0.1, 0.1], [3, 3, 3], 0, 180, "cm") - filtered_cam_poses.append(pertube_pose @ cam_pose) - - if len(filtered_cam_poses) > max_views: - indices = np.random.choice(len(filtered_cam_poses), max_views, replace=False) - filtered_cam_poses = [filtered_cam_poses[i] for i in indices] - - return np.array(filtered_cam_poses) - - @staticmethod - def sample_view_data_world_space(obj, distance_range:tuple = (0.3,0.5), voxel_size:float = 0.005, max_views: int=1, min_cam_table_included_degree:int=20, random_view_ratio:float = 0.2) -> dict: - obj_world_pose = np.asarray(obj.matrix_world) - view_data = ViewSampleUtil.sample_view_data(obj, distance_range, voxel_size, max_views) - view_data["cam_poses"] = ViewSampleUtil.get_cam_pose(view_data, obj_world_pose, max_views, min_cam_table_included_degree, random_view_ratio) - view_data["voxel_down_sampled_points"], view_data["normals"] = ViewSampleUtil.get_world_points_and_normals(view_data, obj_world_pose) - return view_data - + +import numpy as np +import bmesh +from collections import defaultdict +from scipy.spatial.transform import Rotation as R +from utils.pose import PoseUtil +import random + +class ViewSampleUtil: + @staticmethod + def farthest_point_sampling(points, num_samples): + num_points = points.shape[0] + if num_samples >= num_points: + return points, np.arange(num_points) + sampled_indices = np.zeros(num_samples, dtype=int) + sampled_indices[0] = np.random.randint(num_points) + min_distances = np.full(num_points, np.inf) + for i in range(1, num_samples): + current_point = points[sampled_indices[i - 1]] + dist_to_current_point = np.linalg.norm(points - current_point, axis=1) + min_distances = np.minimum(min_distances, dist_to_current_point) + sampled_indices[i] = np.argmax(min_distances) + downsampled_points = points[sampled_indices] + return downsampled_points, sampled_indices + + @staticmethod + def voxel_downsample(points, voxel_size): + voxel_grid = defaultdict(list) + for i, point in enumerate(points): + voxel_index = tuple((point // voxel_size).astype(int)) + voxel_grid[voxel_index].append(i) + + downsampled_points = [] + downsampled_indices = [] + for indices in voxel_grid.values(): + selected_index = indices[0] + downsampled_points.append(points[selected_index]) + downsampled_indices.append(selected_index) + + return np.array(downsampled_points), downsampled_indices + + @staticmethod + def sample_view_data(obj, distance_range:tuple = (0.25,0.5), voxel_size:float = 0.005, max_views: int = 1, pertube_repeat:int = 1) -> dict: + view_data = { + "look_at_points": [], + "cam_positions": [], + } + mesh = obj.data + bm = bmesh.new() + bm.from_mesh(mesh) + bm.verts.ensure_lookup_table() + bm.faces.ensure_lookup_table() + bm.normal_update() + + look_at_points = [] + cam_positions = [] + normals = [] + for v in bm.verts: + look_at_point = np.array(v.co) + + view_data["look_at_points"].append(look_at_point) + normal = np.zeros(3) + for loop in v.link_loops: + normal += np.array(loop.calc_normal()) + normal /= len(v.link_loops) + normal = normal / np.linalg.norm(normal) + if np.isnan(normal).any(): + continue + if np.dot(normal, look_at_point) < 0: + normal = -normal + normals.append(normal) + + for _ in range(pertube_repeat): + perturb_angle = np.radians(np.random.uniform(0, 30)) + perturb_axis = np.random.normal(size=3) + perturb_axis /= np.linalg.norm(perturb_axis) + rotation_matrix = R.from_rotvec(perturb_angle * perturb_axis).as_matrix() + perturbed_normal = np.dot(rotation_matrix, normal) + middle_distance = (distance_range[0] + distance_range[1]) / 2 + perturbed_distance = random.uniform(middle_distance-0.05, middle_distance+0.05) + cam_position = look_at_point + perturbed_distance * perturbed_normal + look_at_points.append(look_at_point) + cam_positions.append(cam_position) + + + bm.free() + look_at_points = np.array(look_at_points) + cam_positions = np.array(cam_positions) + voxel_downsampled_look_at_points, selected_indices = ViewSampleUtil.voxel_downsample(look_at_points, voxel_size) + voxel_downsampled_cam_positions = cam_positions[selected_indices] + voxel_downsampled_normals = np.array(normals)[selected_indices] + + fps_downsampled_look_at_points, selected_indices = ViewSampleUtil.farthest_point_sampling(voxel_downsampled_look_at_points, max_views*2) + fps_downsampled_cam_positions = voxel_downsampled_cam_positions[selected_indices] + + view_data["look_at_points"] = fps_downsampled_look_at_points.tolist() + view_data["cam_positions"] = fps_downsampled_cam_positions.tolist() + view_data["normals"] = voxel_downsampled_normals + view_data["voxel_down_sampled_points"] = voxel_downsampled_look_at_points + return view_data + + @staticmethod + def get_world_points_and_normals(view_data: dict, obj_world_pose: np.ndarray) -> tuple: + world_points = [] + world_normals = [] + for voxel_down_sampled_points, normal in zip(view_data["voxel_down_sampled_points"], view_data["normals"]): + voxel_down_sampled_points_world = obj_world_pose @ np.append(voxel_down_sampled_points, 1.0) + normal_world = obj_world_pose[:3, :3] @ normal + world_points.append(voxel_down_sampled_points_world[:3]) + world_normals.append(normal_world) + return np.array(world_points), np.array(world_normals) + + @staticmethod + def get_cam_pose(view_data: dict, obj_world_pose: np.ndarray, max_views: int, min_cam_table_included_degree: int, random_view_ratio: float) -> np.ndarray: + cam_poses = [] + min_height_z = 1000 + for look_at_point, cam_position in zip(view_data["look_at_points"], view_data["cam_positions"]): + look_at_point_world = obj_world_pose @ np.append(look_at_point, 1.0) + cam_position_world = obj_world_pose @ np.append(cam_position, 1.0) + if look_at_point_world[2] < min_height_z: + min_height_z = look_at_point_world[2] + look_at_point_world = look_at_point_world[:3] + cam_position_world = cam_position_world[:3] + + forward_vector = cam_position_world - look_at_point_world + forward_vector /= np.linalg.norm(forward_vector) + + up_vector = np.array([0, 0, 1]) + + right_vector = np.cross(up_vector, forward_vector) + right_vector /= np.linalg.norm(right_vector) + + corrected_up_vector = np.cross(forward_vector, right_vector) + rotation_matrix = np.array([right_vector, corrected_up_vector, forward_vector]).T + + cam_pose = np.eye(4) + cam_pose[:3, :3] = rotation_matrix + cam_pose[:3, 3] = cam_position_world + cam_poses.append(cam_pose) + + filtered_cam_poses = [] + for cam_pose in cam_poses: + if cam_pose[2, 3] > min_height_z: + direction_vector = cam_pose[:3, 2] + horizontal_normal = np.array([0, 0, 1]) + cos_angle = np.dot(direction_vector, horizontal_normal) / (np.linalg.norm(direction_vector) * np.linalg.norm(horizontal_normal)) + angle = np.arccos(np.clip(cos_angle, -1.0, 1.0)) + angle_degree = np.degrees(angle) + if angle_degree < 90 - min_cam_table_included_degree: + filtered_cam_poses.append(cam_pose) + if random.random() < random_view_ratio: + pertube_pose = PoseUtil.get_uniform_pose([0.1, 0.1, 0.1], [3, 3, 3], 0, 180, "cm") + filtered_cam_poses.append(pertube_pose @ cam_pose) + + if len(filtered_cam_poses) > max_views: + indices = np.random.choice(len(filtered_cam_poses), max_views, replace=False) + filtered_cam_poses = [filtered_cam_poses[i] for i in indices] + + return np.array(filtered_cam_poses) + + @staticmethod + def sample_view_data_world_space(obj, distance_range:tuple = (0.3,0.5), voxel_size:float = 0.005, max_views: int=1, min_cam_table_included_degree:int=20, random_view_ratio:float = 0.2) -> dict: + obj_world_pose = np.asarray(obj.matrix_world) + view_data = ViewSampleUtil.sample_view_data(obj, distance_range, voxel_size, max_views) + view_data["cam_poses"] = ViewSampleUtil.get_cam_pose(view_data, obj_world_pose, max_views, min_cam_table_included_degree, random_view_ratio) + view_data["voxel_down_sampled_points"], view_data["normals"] = ViewSampleUtil.get_world_points_and_normals(view_data, obj_world_pose) + return view_data +