solve merge

fix scene_info
upd
2024-11-07 19:27:22 +08:00 · 2024-11-07 19:26:03 +08:00 · 2024-11-05 22:01:41 +08:00 · 2024-11-05 00:03:08 +08:00 · 2024-11-02 21:47:01 +00:00 · 2024-10-30 13:49:44 -05:00
20 changed files with 1616 additions and 1219 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -3,7 +3,7 @@
 __pycache__/
 *.py[cod]
 *$py.class
-
+temp*
 # C extensions
 *.so
--- a/data_generator.py
+++ b/data_generator.py
@@ -6,8 +6,9 @@ import bpy
 import numpy as np
 import mathutils
 import requests
-from blender.blender_util import BlenderUtils
+from utils.blender_util import BlenderUtils
-from blender.view_sample_util import ViewSampleUtil
+from utils.view_sample_util import ViewSampleUtil
 from utils.material_util import MaterialUtil
 class DataGenerator:
    def __init__(self, config):
@@ -17,6 +18,9 @@ class DataGenerator:
        self.random_config = config["runner"]["generate"]["random_config"]
        self.light_and_camera_config = config["runner"]["generate"]["light_and_camera_config"]
        self.obj_dir = config["runner"]["generate"]["object_dir"]
        self.use_list = config["runner"]["generate"]["use_list"]
        self.object_list_path = config["runner"]["generate"]["object_list_path"]
        self.max_views = config["runner"]["generate"]["max_views"]
        self.min_views = config["runner"]["generate"]["min_views"]
        self.min_diag = config["runner"]["generate"]["min_diag"]
@@ -29,7 +33,18 @@ class DataGenerator:
        self.to_idx = config["runner"]["generate"]["to"]
        self.set_status_path = f"http://localhost:{self.port}/project/set_status"
        self.log_path = f"http://localhost:{self.port}/project/add_log"
-        self.obj_name_list = os.listdir(self.obj_dir)[self.from_idx: self.to_idx]
+        self.origin_obj_name_list = os.listdir(self.obj_dir)[self.from_idx: self.to_idx]
        if not os.path.exists(self.output_dir):
            os.makedirs(self.output_dir)
        self.obj_name_list = []
        if self.use_list:
            self.target_name_list = [line.strip() for line in open(self.object_list_path).readlines()]
            for obj_name in self.target_name_list:
                if obj_name in self.origin_obj_name_list:
                    self.obj_name_list.append(obj_name)
        else:
            self.obj_name_list = self.origin_obj_name_list
        self.target_obj = None
        self.stopped = False
        self.random_obj_list = []
@@ -101,31 +116,8 @@ class DataGenerator:
        bpy.ops.rigidbody.object_add()
        bpy.context.object.rigid_body.type = 'PASSIVE'
        bpy.ops.object.shade_auto_smooth()
-        # 创建不受光照影响的材质
+        MaterialUtil.change_object_material(platform, MaterialUtil.create_mask_material(color=(1.0, 0, 0)))
        mat = bpy.data.materials.new(name="RedMaterial")
        mat.use_nodes = True
        # 清除默认节点
        nodes = mat.node_tree.nodes
        for node in nodes:
            nodes.remove(node)
        # 添加 Emission 节点
        emission_node = nodes.new(type='ShaderNodeEmission')
        emission_node.inputs['Color'].default_value = (1.0, 0.0, 0.0, 1.0)  # 红色
        # 添加 Material Output 节点
        output_node = nodes.new(type='ShaderNodeOutputMaterial')
        # 连接节点
        links = mat.node_tree.links
        links.new(emission_node.outputs['Emission'], output_node.inputs['Surface'])
        # 将材质赋给对象
        platform.data.materials.clear()
        platform.data.materials.append(mat)
        self.display_table_config = {
            "height": height,
@@ -135,6 +127,7 @@ class DataGenerator:
        return platform
    def put_display_object(self, name):
        config = self.random_config["display_object"]
        x = random.uniform(config["min_x"], config["max_x"])
@@ -154,10 +147,8 @@ class DataGenerator:
        platform_bbox = self.platform.bound_box
        platform_bbox_world = [self.platform.matrix_world @ mathutils.Vector(corner) for corner in platform_bbox]
        platform_top_z = max([v.z for v in platform_bbox_world])
        obj_mesh_path = BlenderUtils.get_obj_path(self.obj_dir, name)
        obj = BlenderUtils.load_obj(name, obj_mesh_path)
        obj_bottom_z = BlenderUtils.get_object_bottom_z(obj)
        offset_z = obj_bottom_z
@@ -166,31 +157,7 @@ class DataGenerator:
        bpy.ops.rigidbody.object_add()
        bpy.context.object.rigid_body.type = 'ACTIVE'
-
+        MaterialUtil.change_object_material(obj, MaterialUtil.create_mask_material(color=(0, 1.0, 0)))
        # 创建不受光照影响的材质
        mat = bpy.data.materials.new(name="GreenMaterial")
        mat.use_nodes = True
        # 清除默认节点
        nodes = mat.node_tree.nodes
        for node in nodes:
            nodes.remove(node)
        # 添加 Emission 节点
        emission_node = nodes.new(type='ShaderNodeEmission')
        emission_node.inputs['Color'].default_value = (0.0, 1.0, 0.0, 1.0)  # 绿色
        # 添加 Material Output 节点
        output_node = nodes.new(type='ShaderNodeOutputMaterial')
        # 连接节点
        links = mat.node_tree.links
        links.new(emission_node.outputs['Emission'], output_node.inputs['Surface'])
        # 将材质赋给对象
        obj.data.materials.clear()
        obj.data.materials.append(mat)
        self.target_obj = obj
@@ -243,11 +210,21 @@ class DataGenerator:
        np.savetxt(os.path.join(scene_dir, "points_and_normals.txt"), points_normals)
        for i, cam_pose in enumerate(view_data["cam_poses"]):
            BlenderUtils.set_camera_at(cam_pose)
-            BlenderUtils.render_and_save(scene_dir, f"{i}", binocular_vision=self.binocular_vision, target_object = self.target_obj)
+            BlenderUtils.render_mask(scene_dir, f"{i}", binocular_vision=self.binocular_vision, target_object = self.target_obj)
            BlenderUtils.save_cam_params(scene_dir, i, binocular_vision=self.binocular_vision)
            self.set_progress("render frame", i, len(view_data["cam_poses"]))
        self.set_progress("render frame", len(view_data["cam_poses"]), len(view_data["cam_poses"]))
        BlenderUtils.save_scene_info(scene_dir, self.display_table_config, object_name)
        MaterialUtil.change_object_material(self.target_obj, MaterialUtil.create_normal_material())
        for i, cam_pose in enumerate(view_data["cam_poses"]):
            BlenderUtils.set_camera_at(cam_pose)
            BlenderUtils.render_normal_and_depth(scene_dir, f"{i}", binocular_vision=self.binocular_vision, target_object = self.target_obj)
            BlenderUtils.save_cam_params(scene_dir, i, binocular_vision=self.binocular_vision)
            self.set_progress("render normal frame", i, len(view_data["cam_poses"]))
        self.set_progress("render normal frame", len(view_data["cam_poses"]), len(view_data["cam_poses"]))
        depth_dir = os.path.join(scene_dir, "depth")
        for depth_file in os.listdir(depth_dir):
            if not depth_file.endswith(".png"):
@@ -255,15 +232,10 @@ class DataGenerator:
                file_path = os.path.join(depth_dir, depth_file)
                new_file_path = os.path.join(depth_dir, f"{name}.png")
                os.rename(file_path,new_file_path)
-        normal_dir = os.path.join(scene_dir, "normal")
+        
        for normal_file in os.listdir(normal_dir):
            if not normal_file.endswith(".png"):
                name, _ = os.path.splitext(normal_file)
                file_path = os.path.join(normal_dir, normal_file)
                new_file_path = os.path.join(normal_dir, f"{name}.png")
                os.rename(file_path,new_file_path)
        return True
    def simulate_scene(self, frame_limit=120, depth = 0, diag = 0):
        bpy.context.view_layer.update()
        bpy.ops.screen.animation_play()
@@ -285,11 +257,7 @@ class DataGenerator:
        msg = self.check_and_adjust_target()
-        if msg == "adjusted" and depth < 3:
+        if msg == "success":
            bpy.context.view_layer.update()
            bpy.context.scene.frame_set(0)
            return self.simulate_scene(depth = depth + 1, diag=diag)
        elif msg == "success":
            print("Scene generation completed.")
            result = self.start_render(diag=diag)
            if not result:
@@ -307,11 +275,12 @@ class DataGenerator:
        if diag > self.max_diag or diag < self.min_diag:
            self.add_log(f"The diagonal size of the object <{object_name}>(size: {round(diag,3)}) does not meet the requirements.", "error")
            return "diag_error"
        return self.simulate_scene(diag=diag)
    def gen_all_scene_data(self):
-        max_retry_times = 3
+        max_retry_times = 5
        total = len(self.obj_name_list)
        count = 0
        count_success = 0
@@ -324,11 +293,12 @@ class DataGenerator:
            if os.path.exists(scene_info_path):
                self.add_log(f"Scene for object <{target_obj_name}> already exists, skipping", "warning")
                count += 1
                count_success += 1  
                continue
            retry_times = 0
            self.set_status("target_object", target_obj_name)
-            while retry_times < 3 and result == "retry":
+            while retry_times < max_retry_times and result == "retry":
                self.reset()  
                try:
                    result = self.gen_scene_data(target_obj_name)
--- a/data_generator_2.py
+++ b/data_generator_2.py
@@ -0,0 +1,86 @@
 import os
 import bpy
 import numpy as np
 from utils.blender_util import BlenderUtils
 from utils.cad_view_sample_util import CADViewSampleUtil
 from utils.material_util import MaterialUtil
 class DataGenerator:
    def __init__(self, config):
        self.plane_size = config["runner"]["generate"]["plane_size"] 
        self.output_dir = config["runner"]["generate"]["output_dir"]
        self.random_config = config["runner"]["generate"]["random_config"]
        self.light_and_camera_config = config["runner"]["generate"]["light_and_camera_config"]
        self.max_views = config["runner"]["generate"]["max_views"]
        self.min_views = config["runner"]["generate"]["min_views"]
        self.min_diag = config["runner"]["generate"]["min_diag"]
        self.max_diag = config["runner"]["generate"]["max_diag"]
        self.binocular_vision = config["runner"]["generate"]["binocular_vision"]
        self.target_obj = None
        self.stopped = False
        self.random_obj_list = []
        self.display_table_config = {}
        BlenderUtils.setup_scene(self.light_and_camera_config, None, self.binocular_vision)
        self.table = BlenderUtils.get_obj(BlenderUtils.TABLE_NAME)
    def put_display_object(self, name):
        obj_mesh_path = BlenderUtils.get_obj_path(self.obj_dir, name)
        obj = BlenderUtils.load_obj(name, obj_mesh_path)
        bpy.ops.rigidbody.object_add()
        bpy.context.object.rigid_body.type = 'ACTIVE'
        MaterialUtil.change_object_material(obj, MaterialUtil.create_mask_material(color=(0, 1.0, 0)))
        self.target_obj = obj
    def reset(self):
        self.target_obj = None
        self.random_obj_list = []
        BlenderUtils.reset_objects_and_platform()
    def start_render(self, diag=0):
        object_name = self.target_obj.name
        if "." in object_name:
            object_name = object_name.split(".")[0]
        scene_dir = os.path.join(self.output_dir, object_name)
        if not os.path.exists(scene_dir):
            os.makedirs(scene_dir)
        view_num = int(self.min_views + (diag - self.min_diag)/(self.max_diag - self.min_diag) * (self.max_views - self.min_views))
        view_data = CADViewSampleUtil.sample_view_data_world_space(self.target_obj, distance_range=(0.25,0.5), voxel_size=0.005, max_views=view_num, min_cam_table_included_degree = self.min_cam_table_included_degree, random_view_ratio = self.random_view_ratio )
        object_points = np.array(view_data["voxel_down_sampled_points"])
        normals = np.array(view_data["normals"])
        points_normals = np.concatenate((object_points, normals), axis=1)
        np.savetxt(os.path.join(scene_dir, "points_and_normals.txt"), points_normals)
        for i, cam_pose in enumerate(view_data["cam_poses"]):
            BlenderUtils.set_camera_at(cam_pose)
            BlenderUtils.render_mask(scene_dir, f"{i}", binocular_vision=self.binocular_vision, target_object = self.target_obj)
            BlenderUtils.save_cam_params(scene_dir, i, binocular_vision=self.binocular_vision)
        BlenderUtils.save_scene_info(scene_dir, self.display_table_config, object_name)
        MaterialUtil.change_object_material(self.target_obj, MaterialUtil.create_normal_material())
        for i, cam_pose in enumerate(view_data["cam_poses"]):
            BlenderUtils.set_camera_at(cam_pose)
            BlenderUtils.render_normal_and_depth(scene_dir, f"{i}", binocular_vision=self.binocular_vision, target_object = self.target_obj)
            BlenderUtils.save_cam_params(scene_dir, i, binocular_vision=self.binocular_vision)
        depth_dir = os.path.join(scene_dir, "depth")
        for depth_file in os.listdir(depth_dir):
            if not depth_file.endswith(".png"):
                name, _ = os.path.splitext(depth_file)
                file_path = os.path.join(depth_dir, depth_file)
                new_file_path = os.path.join(depth_dir, f"{name}.png")
                os.rename(file_path,new_file_path)
        BlenderUtils.save_blend(scene_dir)
        exit(0)
        return True
    def gen_scene_data(self, object_name):
        bpy.context.scene.frame_set(0)
        self.put_display_object(object_name)
        diag = BlenderUtils.get_obj_diag(self.target_obj.name)
        self.start_render(diag)
--- a/data_load.py
+++ b/data_load.py
@@ -1,265 +0,0 @@
 import os
 import numpy as np
 import json
 import cv2
 import trimesh
 from pts import PtsUtil
 class DataLoadUtil:
    @staticmethod
    def get_path(root, scene_name, frame_idx):
        path = os.path.join(root, scene_name, f"{frame_idx}")
        return path
    @staticmethod
    def get_label_path(root, scene_name):
        path = os.path.join(root,scene_name, f"label.json")
        return path
    @staticmethod
    def get_sampled_model_points_path(root, scene_name):
        path = os.path.join(root,scene_name, f"sampled_model_points.txt")
        return path
    @staticmethod
    def get_scene_seq_length(root, scene_name):
        camera_params_path = os.path.join(root, scene_name, "camera_params")
        return len(os.listdir(camera_params_path))
    @staticmethod
    def load_downsampled_world_model_points(root, scene_name):
        model_path = DataLoadUtil.get_sampled_model_points_path(root, scene_name)
        model_points = np.loadtxt(model_path)
        return model_points
    @staticmethod
    def save_downsampled_world_model_points(root, scene_name, model_points):
        model_path = DataLoadUtil.get_sampled_model_points_path(root, scene_name)
        np.savetxt(model_path, model_points)
    @staticmethod
    def load_mesh_at(model_dir, object_name, world_object_pose):
        model_path = os.path.join(model_dir, object_name, "mesh.obj")
        mesh = trimesh.load(model_path)
        mesh.apply_transform(world_object_pose)
        return mesh
    @staticmethod
    def get_bbox_diag(model_dir, object_name):
        model_path = os.path.join(model_dir, object_name, "mesh.obj")
        mesh = trimesh.load(model_path)
        bbox = mesh.bounding_box.extents
        diagonal_length = np.linalg.norm(bbox)
        return diagonal_length
    @staticmethod
    def save_mesh_at(model_dir, output_dir, object_name, scene_name, world_object_pose):
        mesh = DataLoadUtil.load_mesh_at(model_dir, object_name, world_object_pose)
        model_path = os.path.join(output_dir, scene_name, "world_mesh.obj")
        mesh.export(model_path)
    @staticmethod
    def save_target_mesh_at_world_space(root, model_dir, scene_name):
        scene_info = DataLoadUtil.load_scene_info(root, scene_name)
        target_name = scene_info["target_name"]
        transformation = scene_info[target_name]
        location = transformation["location"]
        rotation_euler = transformation["rotation_euler"]
        pose_mat = trimesh.transformations.euler_matrix(*rotation_euler)
        pose_mat[:3, 3] = location
        mesh = DataLoadUtil.load_mesh_at(model_dir, target_name, pose_mat)
        mesh_dir = os.path.join(root, scene_name, "mesh")
        if not os.path.exists(mesh_dir):
            os.makedirs(mesh_dir)
        model_path = os.path.join(mesh_dir, "world_target_mesh.obj")
        mesh.export(model_path)
    @staticmethod
    def load_scene_info(root, scene_name):
        scene_info_path = os.path.join(root, scene_name, "scene_info.json")
        with open(scene_info_path, "r") as f:
            scene_info = json.load(f)
        return scene_info
    @staticmethod
    def load_target_object_pose(root, scene_name):
        scene_info = DataLoadUtil.load_scene_info(root, scene_name)
        target_name = scene_info["target_name"]
        transformation = scene_info[target_name]
        location = transformation["location"]
        rotation_euler = transformation["rotation_euler"]
        pose_mat = trimesh.transformations.euler_matrix(*rotation_euler)
        pose_mat[:3, 3] = location
        return pose_mat
    @staticmethod
    def load_depth(path, min_depth=0.01,max_depth=5.0,binocular=False):
        def load_depth_from_real_path(real_path, min_depth, max_depth):
            depth = cv2.imread(real_path, cv2.IMREAD_UNCHANGED)
            depth = depth.astype(np.float32) / 65535.0
            min_depth = min_depth
            max_depth = max_depth
            depth_meters = min_depth + (max_depth - min_depth) * depth
            return depth_meters
        if binocular:
            depth_path_L = os.path.join(os.path.dirname(path), "depth", os.path.basename(path) + "_L.png")
            depth_path_R = os.path.join(os.path.dirname(path), "depth", os.path.basename(path) + "_R.png")
            depth_meters_L = load_depth_from_real_path(depth_path_L, min_depth, max_depth)
            depth_meters_R = load_depth_from_real_path(depth_path_R, min_depth, max_depth)
            return depth_meters_L, depth_meters_R
        else:
            depth_path = os.path.join(os.path.dirname(path), "depth", os.path.basename(path) + ".png")
            depth_meters = load_depth_from_real_path(depth_path, min_depth, max_depth)
            return depth_meters
    @staticmethod
    def load_seg(path, binocular=False):
        if binocular:
            def clean_mask(mask_image):
                green = [0, 255, 0, 255]
                red = [255, 0, 0, 255]
                threshold = 2
                mask_image = np.where(np.abs(mask_image - green) <= threshold, green, mask_image)
                mask_image = np.where(np.abs(mask_image - red) <= threshold, red, mask_image)
                return mask_image
            mask_path_L = os.path.join(os.path.dirname(path), "mask", os.path.basename(path) + "_L.png")
            mask_image_L = clean_mask(cv2.imread(mask_path_L, cv2.IMREAD_UNCHANGED))
            mask_path_R = os.path.join(os.path.dirname(path), "mask", os.path.basename(path) + "_R.png")
            mask_image_R = clean_mask(cv2.imread(mask_path_R, cv2.IMREAD_UNCHANGED))
            return mask_image_L, mask_image_R
        else:
            mask_path = os.path.join(os.path.dirname(path), "mask", os.path.basename(path) + ".png")
            mask_image = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
            return mask_image
    @staticmethod
    def load_label(path):
        with open(path, 'r') as f:
            label_data = json.load(f)
        return label_data
    @staticmethod
    def load_rgb(path):
        rgb_path = os.path.join(os.path.dirname(path), "rgb", os.path.basename(path) + ".png")
        rgb_image = cv2.imread(rgb_path, cv2.IMREAD_COLOR)
        return rgb_image
    @staticmethod
    def cam_pose_transformation(cam_pose_before):
        offset = np.asarray([
            [1, 0, 0, 0],
            [0, -1, 0, 0],
            [0, 0, -1, 0],
            [0, 0, 0, 1]])  
        cam_pose_after =  cam_pose_before @ offset
        return cam_pose_after
    @staticmethod
    def load_cam_info(path, binocular=False):
        camera_params_path = os.path.join(os.path.dirname(path), "camera_params", os.path.basename(path) + ".json")
        with open(camera_params_path, 'r') as f:
            label_data = json.load(f)
        cam_to_world = np.asarray(label_data["extrinsic"])
        cam_to_world = DataLoadUtil.cam_pose_transformation(cam_to_world)
        cam_intrinsic = np.asarray(label_data["intrinsic"])
        cam_info = {
            "cam_to_world": cam_to_world,
            "cam_intrinsic": cam_intrinsic,
            "far_plane": label_data["far_plane"],
            "near_plane": label_data["near_plane"]
        }
        if binocular:
            cam_to_world_R = np.asarray(label_data["extrinsic_R"])
            cam_to_world_R = DataLoadUtil.cam_pose_transformation(cam_to_world_R)
            cam_info["cam_to_world_R"] = cam_to_world_R
        return cam_info
    @staticmethod
    def get_target_point_cloud(depth, cam_intrinsic, cam_extrinsic, mask, target_mask_label=(0,255,0,255)):
        h, w = depth.shape
        i, j = np.meshgrid(np.arange(w), np.arange(h), indexing='xy')
        z = depth
        x = (i - cam_intrinsic[0, 2]) * z / cam_intrinsic[0, 0]
        y = (j - cam_intrinsic[1, 2]) * z / cam_intrinsic[1, 1]
        points_camera = np.stack((x, y, z), axis=-1).reshape(-1, 3)
        mask = mask.reshape(-1,4)
        target_mask = (mask == target_mask_label).all(axis=-1)  
        target_points_camera = points_camera[target_mask]
        target_points_camera_aug = np.concatenate([target_points_camera, np.ones((target_points_camera.shape[0], 1))], axis=-1)
        target_points_world = np.dot(cam_extrinsic, target_points_camera_aug.T).T[:, :3]
        return {
            "points_world": target_points_world,
            "points_camera": target_points_camera
        }
    @staticmethod
    def get_point_cloud(depth, cam_intrinsic, cam_extrinsic):
        h, w = depth.shape
        i, j = np.meshgrid(np.arange(w), np.arange(h), indexing='xy')
        z = depth
        x = (i - cam_intrinsic[0, 2]) * z / cam_intrinsic[0, 0]
        y = (j - cam_intrinsic[1, 2]) * z / cam_intrinsic[1, 1]
        points_camera = np.stack((x, y, z), axis=-1).reshape(-1, 3)
        points_camera_aug = np.concatenate([points_camera, np.ones((points_camera.shape[0], 1))], axis=-1)
        points_world = np.dot(cam_extrinsic, points_camera_aug.T).T[:, :3]
        return {
            "points_world": points_world,
            "points_camera": points_camera
        }
    @staticmethod
    def get_target_point_cloud_world_from_path(path, binocular=False, random_downsample_N=65536, voxel_size = 0.005, target_mask_label=(0,255,0,255)):
        cam_info = DataLoadUtil.load_cam_info(path, binocular=binocular)
        if binocular:
            depth_L, depth_R = DataLoadUtil.load_depth(path, cam_info['near_plane'], cam_info['far_plane'], binocular=True)
            mask_L, mask_R = DataLoadUtil.load_seg(path, binocular=True)
            point_cloud_L = DataLoadUtil.get_target_point_cloud(depth_L, cam_info['cam_intrinsic'], cam_info['cam_to_world'], mask_L, target_mask_label)['points_world']
            point_cloud_R = DataLoadUtil.get_target_point_cloud(depth_R, cam_info['cam_intrinsic'], cam_info['cam_to_world_R'], mask_R, target_mask_label)['points_world']
            point_cloud_L = PtsUtil.random_downsample_point_cloud(point_cloud_L, random_downsample_N)
            point_cloud_R = PtsUtil.random_downsample_point_cloud(point_cloud_R, random_downsample_N)
            overlap_points = DataLoadUtil.get_overlapping_points(point_cloud_L, point_cloud_R, voxel_size)
            return overlap_points
        else:
            depth = DataLoadUtil.load_depth(path, cam_info['near_plane'], cam_info['far_plane'])
            mask = DataLoadUtil.load_seg(path)
            point_cloud = DataLoadUtil.get_target_point_cloud(depth, cam_info['cam_intrinsic'], cam_info['cam_to_world'], mask)['points_world']
            return point_cloud
    @staticmethod
    def voxelize_points(points, voxel_size):
        voxel_indices = np.floor(points / voxel_size).astype(np.int32)
        unique_voxels = np.unique(voxel_indices, axis=0, return_inverse=True)
        return unique_voxels
    @staticmethod
    def get_overlapping_points(point_cloud_L, point_cloud_R, voxel_size=0.005):
        voxels_L, indices_L = DataLoadUtil.voxelize_points(point_cloud_L, voxel_size)
        voxels_R, _ = DataLoadUtil.voxelize_points(point_cloud_R, voxel_size)
        voxel_indices_L = voxels_L.view([('', voxels_L.dtype)]*3)
        voxel_indices_R = voxels_R.view([('', voxels_R.dtype)]*3)
        overlapping_voxels = np.intersect1d(voxel_indices_L, voxel_indices_R)
        mask_L = np.isin(indices_L, np.where(np.isin(voxel_indices_L, overlapping_voxels))[0])
        overlapping_points = point_cloud_L[mask_L]
        return overlapping_points
    @staticmethod
    def load_points_normals(root, scene_name):
        points_path = os.path.join(root, scene_name, "points_and_normals.txt")
        points_normals = np.loadtxt(points_path)
        return points_normals
--- a/data_renderer.py
+++ b/data_renderer.py
@@ -2,11 +2,12 @@ import os
 import bpy
 import sys
 import json
 import time
 import mathutils
 import numpy as np
 sys.path.append(os.path.dirname(os.path.abspath(__file__)))
-from blender_util import BlenderUtils
+from utils.blender_util import BlenderUtils
-
+from utils.material_util import MaterialUtil
 class DataRenderer:
    def __init__(self):
@@ -23,8 +24,8 @@ class DataRenderer:
                                "near_plane": 0.01,
                                "far_plane": 5,
                                "fov_vertical": 25,
-                                "resolution": [1280, 800],
+                                "resolution": [640, 400],
-                                "eye_distance": 0.15,
+                                "eye_distance": 0.10,
                                "eye_angle": 25
                            },
                            "Light": {
@@ -42,7 +43,6 @@ class DataRenderer:
        self.light_and_camera_config = config["renderer"]["generate"]["light_and_camera_config"]
        self.obj_dir = config["renderer"]["generate"]["object_dir"]
        self.binocular_vision = config["renderer"]["generate"]["binocular_vision"]
        self.obj_name_list = os.listdir(self.obj_dir)
        self.target_obj = None
        self.random_obj_list = []
@@ -58,15 +58,32 @@ class DataRenderer:
    def do_render(self, cam_pose, restore_info, temp_dir):
        self.reset()
        start_time = time.time()
        self.restore_scene(restore_info=restore_info)
        end_time = time.time()
        print(f"Time taken for restoring scene: {end_time - start_time} seconds")
        object_name = self.target_obj.name
        temp_file_name = f"tmp"
        if "." in object_name:
            object_name = object_name.split(".")[0]
        start_time = time.time()
        BlenderUtils.set_camera_at(cam_pose)
-        BlenderUtils.render_and_save(temp_dir, temp_file_name, binocular_vision=self.binocular_vision)
+        BlenderUtils.render_mask(temp_dir, temp_file_name, binocular_vision=self.binocular_vision)
        MaterialUtil.change_object_material(self.target_obj, MaterialUtil.create_normal_material())
        BlenderUtils.render_normal_and_depth(temp_dir, temp_file_name, binocular_vision=self.binocular_vision)
        end_time = time.time()
        print(f"Time taken for rendering: {end_time - start_time} seconds")
-    
+        BlenderUtils.save_cam_params(temp_dir, temp_file_name, binocular_vision=self.binocular_vision)
        depth_dir = os.path.join(temp_dir, "depth")
        for depth_file in os.listdir(depth_dir):
            if not depth_file.endswith(".png"):
                name, _ = os.path.splitext(depth_file)
                file_path = os.path.join(depth_dir, depth_file)
                new_file_path = os.path.join(depth_dir, f"{name}.png")
                os.rename(file_path,new_file_path)
    def restore_scene(self, restore_info):
        for obj_name, obj_info in restore_info.items():
@@ -82,12 +99,7 @@ class DataRenderer:
                    obj.location = mathutils.Vector(obj_info["location"])
                    obj.rotation_euler = mathutils.Vector(obj_info["rotation_euler"])
                    obj.scale = mathutils.Vector(obj_info["scale"])
-                    mat = bpy.data.materials.new(name="GreenMaterial")
+                    MaterialUtil.change_object_material(obj, MaterialUtil.create_mask_material(color=(0, 1.0, 0)))
                    mat.diffuse_color = (0.0, 1.0, 0.0, 1.0)  # Green with full alpha (1.0)
                    if len(obj.data.materials) > 0:
                        obj.data.materials[0] = mat
                    else:
                        obj.data.materials.append(mat)
                    self.target_obj = obj
    def restore_display_platform(self, platform_info):
@@ -96,16 +108,9 @@ class DataRenderer:
        platform.name = BlenderUtils.DISPLAY_TABLE_NAME
        platform.location = mathutils.Vector(platform_info["location"])
        mat = bpy.data.materials.new(name="RedMaterial")
        mat.diffuse_color = (1.0, 0.0, 0.0, 1.0)  # Red with full alpha (1.0)
        if len(platform.data.materials) > 0:
            platform.data.materials[0] = mat
        else:
            platform.data.materials.append(mat)
        bpy.ops.rigidbody.object_add()
        bpy.context.object.rigid_body.type = 'PASSIVE'
-        bpy.ops.object.shade_auto_smooth()
+        MaterialUtil.change_object_material(platform, MaterialUtil.create_mask_material(color=(1.0, 0, 0)))
 def main(temp_dir):
    params_data_path = os.path.join(temp_dir, "params.json")
@@ -116,9 +121,14 @@ def main(temp_dir):
    scene_info_path = os.path.join(params_data["scene_path"], "scene_info.json")
    with open(scene_info_path, 'r') as f:
        scene_info = json.load(f)
-    
+    start_time = time.time()
    data_renderer = DataRenderer()
    end_time = time.time()
    print(f"Time taken for initialization: {end_time - start_time} seconds")
    start_time = time.time()
    data_renderer.do_render(cam_pose, scene_info, temp_dir)
    end_time = time.time()
    print(f"Time taken for rendering: {end_time - start_time} seconds")
    depth_dir = os.path.join(temp_dir, "depth")
    for depth_file in os.listdir(depth_dir):
        if not depth_file.endswith(".png"):
@@ -134,4 +144,4 @@ if __name__ == "__main__":
        print("Usage: blender -b -P data_renderer.py -- <temp_dir>")
    else:
        temp_dir = sys.argv[-1]
-        main(temp_dir)
+        main(temp_dir)
--- a/pts.py
+++ b/pts.py
@@ -1,22 +0,0 @@
 import numpy as np
 import open3d as o3d
 class PtsUtil:
    @staticmethod
    def voxel_downsample_point_cloud(point_cloud, voxel_size=0.005):
        o3d_pc = o3d.geometry.PointCloud()
        o3d_pc.points = o3d.utility.Vector3dVector(point_cloud)
        downsampled_pc = o3d_pc.voxel_down_sample(voxel_size)
        return np.asarray(downsampled_pc.points)
    @staticmethod
    def transform_point_cloud(points, pose_mat):
        points_h = np.concatenate([points, np.ones((points.shape[0], 1))], axis=1)
        points_h = np.dot(pose_mat, points_h.T).T
        return points_h[:, :3]
    @staticmethod
    def random_downsample_point_cloud(point_cloud, num_points):
        idx = np.random.choice(len(point_cloud), num_points, replace=True)
        return point_cloud[idx]
--- a/reconstruction.py
+++ b/reconstruction.py
@@ -1,119 +0,0 @@
 import numpy as np
 from scipy.spatial import cKDTree
 from pts import PtsUtil
 class ReconstructionUtil:
    @staticmethod
    def compute_coverage_rate(target_point_cloud, combined_point_cloud, threshold=0.01):
        kdtree = cKDTree(combined_point_cloud)
        distances, _ = kdtree.query(target_point_cloud)
        covered_points = np.sum(distances < threshold)
        coverage_rate = covered_points / target_point_cloud.shape[0]
        return coverage_rate
    @staticmethod
    def compute_overlap_rate(new_point_cloud, combined_point_cloud, threshold=0.01):
        kdtree = cKDTree(combined_point_cloud)
        distances, _ = kdtree.query(new_point_cloud)
        overlapping_points = np.sum(distances < threshold)
        overlap_rate = overlapping_points / new_point_cloud.shape[0]
        return overlap_rate
    @staticmethod
    def combine_point_with_view_sequence(point_list, view_sequence):
        selected_views = []
        for view_index, _ in view_sequence:
            selected_views.append(point_list[view_index])
        return np.vstack(selected_views)
    @staticmethod 
    def compute_next_view_coverage_list(views, combined_point_cloud, target_point_cloud, threshold=0.01):
        best_view = None
        best_coverage_increase = -1
        current_coverage = ReconstructionUtil.compute_coverage_rate(target_point_cloud, combined_point_cloud, threshold)
        for view_index, view in enumerate(views):
            candidate_views = combined_point_cloud + [view]
            down_sampled_combined_point_cloud = PtsUtil.voxel_downsample_point_cloud(candidate_views, threshold)
            new_coverage = ReconstructionUtil.compute_coverage_rate(target_point_cloud, down_sampled_combined_point_cloud, threshold)
            coverage_increase = new_coverage - current_coverage
            if coverage_increase > best_coverage_increase:
                best_coverage_increase = coverage_increase
                best_view = view_index
        return best_view, best_coverage_increase
    @staticmethod
    def compute_next_best_view_sequence_with_overlap(target_point_cloud, point_cloud_list, display_table_point_cloud_list = None,threshold=0.01, overlap_threshold=0.3, status_info=None):
        selected_views = []
        current_coverage = 0.0
        remaining_views = list(range(len(point_cloud_list)))
        view_sequence = []
        cnt_processed_view = 0
        while remaining_views:
            best_view = None
            best_coverage_increase = -1
            for view_index in remaining_views:
                if selected_views:
                    combined_old_point_cloud = np.vstack(selected_views)
                    down_sampled_old_point_cloud = PtsUtil.voxel_downsample_point_cloud(combined_old_point_cloud,threshold)
                    down_sampled_new_view_point_cloud = PtsUtil.voxel_downsample_point_cloud(point_cloud_list[view_index],threshold)
                    overlap_rate = ReconstructionUtil.compute_overlap_rate(down_sampled_new_view_point_cloud,down_sampled_old_point_cloud, threshold)
                    if overlap_rate < overlap_threshold:
                        continue
                candidate_views = selected_views + [point_cloud_list[view_index]]
                combined_point_cloud = np.vstack(candidate_views)
                down_sampled_combined_point_cloud = PtsUtil.voxel_downsample_point_cloud(combined_point_cloud,threshold)
                new_coverage = ReconstructionUtil.compute_coverage_rate(target_point_cloud, down_sampled_combined_point_cloud, threshold)
                coverage_increase = new_coverage - current_coverage
                #print(f"view_index: {view_index}, coverage_increase: {coverage_increase}")
                if coverage_increase > best_coverage_increase:
                    best_coverage_increase = coverage_increase
                    best_view = view_index
            if best_view is not None:
                if best_coverage_increase <=1e-3:
                    break
                selected_views.append(point_cloud_list[best_view])
                remaining_views.remove(best_view)
                current_coverage += best_coverage_increase
                cnt_processed_view += 1
                if status_info is not None:
                    sm = status_info["status_manager"]
                    app_name = status_info["app_name"]
                    runner_name = status_info["runner_name"]
                    sm.set_status(app_name, runner_name, "current coverage", current_coverage)
                    sm.set_progress(app_name, runner_name, "processed view", cnt_processed_view, len(point_cloud_list))
                view_sequence.append((best_view, current_coverage))
            else:
                break
        if status_info is not None:
            sm = status_info["status_manager"]
            app_name = status_info["app_name"]
            runner_name = status_info["runner_name"]
            sm.set_progress(app_name, runner_name, "processed view", len(point_cloud_list), len(point_cloud_list))
        return view_sequence, remaining_views, down_sampled_combined_point_cloud
    @staticmethod
    def filter_points(points, points_normals, cam_pose,  voxel_size=0.005, theta=45):
        sampled_points = PtsUtil.voxel_downsample_point_cloud(points, voxel_size)
        kdtree = cKDTree(points_normals[:,:3])
        _, indices = kdtree.query(sampled_points)
        nearest_points = points_normals[indices]
        normals = nearest_points[:, 3:]
        camera_axis = -cam_pose[:3, 2] 
        normals_normalized = normals / np.linalg.norm(normals, axis=1, keepdims=True)
        cos_theta = np.dot(normals_normalized, camera_axis)
        theta_rad = np.deg2rad(theta)
        filtered_sampled_points= sampled_points[cos_theta > np.cos(theta_rad)]
        return filtered_sampled_points[:, :3]
--- a/run_blender.py
+++ b/run_blender.py
@@ -1,10 +1,10 @@
 import os
 import sys
 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 sys.path.append("/home/hofee/.local/lib/python3.11/site-packages")
 import yaml
-from blender.data_generator import DataGenerator
+
 sys.path.append(os.path.dirname(os.path.abspath(__file__)))
 from data_generator import DataGenerator
 if __name__ == "__main__":
    config_path = sys.argv[sys.argv.index('--') + 1]
--- a/test/tempdir/depth/tmp_L.png
+++ b/test/tempdir/depth/tmp_L.png
--- a/test/tempdir/depth/tmp_R.png
+++ b/test/tempdir/depth/tmp_R.png
--- a/test/tempdir/mask/tmp_L.png
+++ b/test/tempdir/mask/tmp_L.png
--- a/test/tempdir/mask/tmp_R.png
+++ b/test/tempdir/mask/tmp_R.png
--- a/test/tempdir/params.json
+++ b/test/tempdir/params.json
@@ -1,29 +0,0 @@
 {
    "cam_pose": [
        [
            -0.8127143979072571,
            -0.3794165253639221,
            0.4421972334384918,
            0.2740877568721771
        ],
        [
            0.5826622247695923,
            -0.5292212963104248,
            0.616789698600769,
            0.46910107135772705
        ],
        [
            0.0,
            0.7589254975318909,
            0.6511774659156799,
            1.2532192468643188
        ],
        [
            0.0,
            0.0,
            0.0,
            1.0
        ]
    ],
    "scene_path": "/media/hofee/data/project/python/nbv_reconstruction/sample_for_training/scenes/google_scan-backpack_0288"
 }
--- a/utils/blender_util.py
+++ b/utils/blender_util.py
@@ -1,369 +1,411 @@
-
+import os
-import os
+import json
-import json
+import bpy
-import bpy
+import time
-import time
+import gc
-import gc
+import numpy as np
-import numpy as np
+import mathutils
-import mathutils
+
-
+
-class BlenderUtils:
+class BlenderUtils:
-
+
-    TABLE_NAME: str = "table"
+    TABLE_NAME: str = "table"
-    CAMERA_NAME: str = "Camera"
+    CAMERA_NAME: str = "Camera"
-    CAMERA_RIGHT_NAME: str = "CameraRight"
+    CAMERA_RIGHT_NAME: str = "CameraRight"
-    CAMERA_OBJECT_NAME: str = "CameraObject"
+    CAMERA_OBJECT_NAME: str = "CameraObject"
-    DISPLAY_TABLE_NAME: str = "display_table"
+    DISPLAY_TABLE_NAME: str = "display_table"
-    MESH_FILE_NAME: str = "mesh.obj"
+    MESH_FILE_NAME: str = "mesh.obj"
-
+
-    @staticmethod
+    @staticmethod
-    def get_obj_path(obj_dir, name):
+    def get_obj_path(obj_dir, name):
-        return os.path.join(obj_dir, name, BlenderUtils.MESH_FILE_NAME)
+        return os.path.join(obj_dir, name, BlenderUtils.MESH_FILE_NAME)
-    
+
-    @staticmethod
+    @staticmethod
-    def load_obj(name, mesh_path, scale=1):
+    def load_obj(name, mesh_path, scale=1):
-        print(mesh_path)
+        print(mesh_path)
-        bpy.ops.wm.obj_import(filepath=mesh_path)
+        bpy.ops.wm.obj_import(filepath=mesh_path)
-        loaded_object = bpy.context.selected_objects[-1]
+        loaded_object = bpy.context.selected_objects[-1]
-        loaded_object.name = name
+        loaded_object.name = name
-        loaded_object.data.name = name
+        loaded_object.data.name = name
-        loaded_object.scale = (scale, scale, scale)
+        loaded_object.scale = (scale, scale, scale)
-        bpy.ops.rigidbody.object_add()
+        bpy.ops.rigidbody.object_add()
-        return loaded_object
+        return loaded_object
-
+
-    @staticmethod
+    @staticmethod
-    def get_obj(name):
+    def get_obj(name):
-        return bpy.data.objects.get(name)
+        return bpy.data.objects.get(name)
-    
+
-            
+    @staticmethod
-    @staticmethod
+    def get_obj_pose(name):
-    def get_obj_pose(name):
+        obj = BlenderUtils.get_obj(name)
-        obj = BlenderUtils.get_obj(name)
+        return np.asarray(obj.matrix_world)
-        return np.asarray(obj.matrix_world)
+
-    
+    @staticmethod
-    
+    def add_plane(name, location, orientation, size=10):
-    
+        bpy.ops.mesh.primitive_plane_add(size=size, location=location)
-    @staticmethod
+        plane = bpy.context.selected_objects[-1]
-    def add_plane(name, location, orientation, size=10):
+        plane.name = name
-        bpy.ops.mesh.primitive_plane_add(size=size,location=location)
+        plane.rotation_euler = orientation
-        plane = bpy.context.selected_objects[-1]
+        bpy.ops.rigidbody.object_add()
-        plane.name = name
+        bpy.context.object.rigid_body.type = "PASSIVE"
-        plane.rotation_euler = orientation
+
-        bpy.ops.rigidbody.object_add()
+    @staticmethod
-        bpy.context.object.rigid_body.type = 'PASSIVE'
+    def add_table(table_model_path):
-
+        table = BlenderUtils.load_obj(
-    @staticmethod
+            BlenderUtils.TABLE_NAME, table_model_path, scale=0.01
-    def add_table(table_model_path):
+        )
-        table = BlenderUtils.load_obj(BlenderUtils.TABLE_NAME, table_model_path, scale=0.01)
+        bpy.ops.rigidbody.object_add()
-        bpy.ops.rigidbody.object_add()
+        bpy.context.object.rigid_body.type = "PASSIVE"
-        bpy.context.object.rigid_body.type = 'PASSIVE'
+
-        
+        mat = bpy.data.materials.new(name="TableYellowMaterial")
-        mat = bpy.data.materials.new(name="TableYellowMaterial")
+        mat.diffuse_color = (1.0, 1.0, 0.0, 1.0)
-        mat.diffuse_color = (1.0, 1.0, 0.0, 1.0) 
+        if len(table.data.materials) > 0:
-        if len(table.data.materials) > 0:
+            table.data.materials[0] = mat
-            table.data.materials[0] = mat
+        else:
-        else:
+            table.data.materials.append(mat)
-            table.data.materials.append(mat)
+
-
+    @staticmethod
-    @staticmethod
+    def setup_scene(init_light_and_camera_config, table_model_path, binocular_vision):
-    def setup_scene(init_light_and_camera_config, table_model_path, binocular_vision):
+        bpy.context.scene.render.engine = "BLENDER_EEVEE"
-        bpy.context.scene.render.engine = 'BLENDER_EEVEE_NEXT'
+        bpy.context.scene.display.shading.show_xray = False
-        bpy.context.scene.display.shading.show_xray = False
+        bpy.context.scene.display.shading.use_dof = False
-        bpy.context.scene.display.shading.use_dof = False
+        bpy.context.scene.display.render_aa = "OFF"
-        bpy.context.scene.display.render_aa = 'OFF'
+        bpy.context.scene.view_settings.view_transform = "Standard"
-        bpy.context.scene.view_settings.view_transform = 'Standard'
+
-
+        bpy.context.scene.eevee.use_ssr = False  # 关闭屏幕空间反射
-        bpy.context.scene.eevee.use_ssr = False            # 关闭屏幕空间反射
+        bpy.context.scene.eevee.use_bloom = False  # 关闭辉光
-        bpy.context.scene.eevee.use_bloom = False          # 关闭辉光
+        bpy.context.scene.eevee.use_gtao = False  # 关闭环境光遮蔽
-        bpy.context.scene.eevee.use_gtao = False           # 关闭环境光遮蔽
+        bpy.context.scene.eevee.use_soft_shadows = False  # 关闭软阴影
-        bpy.context.scene.eevee.use_soft_shadows = False   # 关闭软阴影
+        bpy.context.scene.eevee.use_shadows = False  # 关闭所有阴影
-        bpy.context.scene.eevee.use_shadows = False  # 关闭所有阴影
+        bpy.context.scene.world.use_nodes = False  # 如果你不需要环境光，关闭环境节点
-        bpy.context.scene.world.use_nodes = False  # 如果你不需要环境光，关闭环境节点
+
-
+        # bpy.context.scene.eevee.use_sss = False            # 关闭次表面散射
-
+
-        #bpy.context.scene.eevee.use_sss = False            # 关闭次表面散射
+        # 2. 设置最低的采样数
-
+        bpy.context.scene.eevee.taa_render_samples = 1
-        # 2. 设置最低的采样数
+        bpy.context.scene.eevee.taa_samples = 1
-        bpy.context.scene.eevee.taa_render_samples = 1
+        BlenderUtils.init_light_and_camera(
-        bpy.context.scene.eevee.taa_samples = 1
+            init_light_and_camera_config, binocular_vision
-        BlenderUtils.init_light_and_camera(init_light_and_camera_config, binocular_vision)
+        )
-        
+
-        BlenderUtils.add_plane("plane_floor", location=(0,0,0), orientation=(0,0,0))
+        BlenderUtils.add_plane("plane_floor", location=(0, 0, 0), orientation=(0, 0, 0))
-        BlenderUtils.add_plane("plane_ceil", location=(0,0,10), orientation=(0,0,0))
+        BlenderUtils.add_plane("plane_ceil", location=(0, 0, 10), orientation=(0, 0, 0))
-        BlenderUtils.add_plane("plane_wall_1", location=(5,0,5), orientation=(0,np.pi/2,0))
+        BlenderUtils.add_plane(
-        BlenderUtils.add_plane("plane_wall_2", location=(-5,0,5), orientation=(0,np.pi/2,0))
+            "plane_wall_1", location=(5, 0, 5), orientation=(0, np.pi / 2, 0)
-        BlenderUtils.add_plane("plane_wall_3", location=(0,5,5), orientation=(np.pi/2,0,0))
+        )
-        BlenderUtils.add_plane("plane_wall_4", location=(0,-5,5), orientation=(np.pi/2,0,0))
+        BlenderUtils.add_plane(
-
+            "plane_wall_2", location=(-5, 0, 5), orientation=(0, np.pi / 2, 0)
-        BlenderUtils.add_table(table_model_path)
+        )
-        
+        BlenderUtils.add_plane(
-    @staticmethod
+            "plane_wall_3", location=(0, 5, 5), orientation=(np.pi / 2, 0, 0)
-    def set_light_params(light, config):
+        )
-        light.location = config["location"]
+        BlenderUtils.add_plane(
-        light.rotation_euler = config["orientation"]
+            "plane_wall_4", location=(0, -5, 5), orientation=(np.pi / 2, 0, 0)
-        if light.type == 'SUN':
+        )
-            light.data.energy = config["power"]
+
-        elif light.type == 'POINT':
+        BlenderUtils.add_table(table_model_path)
-            light.data.energy = config["power"]
+
-            
+    @staticmethod
-    @staticmethod
+    def set_light_params(light, config):
-    def set_camera_params(camera, config, binocular_vision):
+        light.location = config["location"]
-        
+        light.rotation_euler = config["orientation"]
-        camera_object = bpy.data.objects.new(BlenderUtils.CAMERA_OBJECT_NAME, None)
+        if light.type == "SUN":
-        bpy.context.collection.objects.link(camera_object)
+            light.data.energy = config["power"]
-        cameras = [bpy.data.objects.get("Camera")]
+        elif light.type == "POINT":
-        camera.location = [0,0,0]
+            light.data.energy = config["power"]
-        camera.rotation_euler = [0,0,0]
+
-        camera.parent = camera_object
+    @staticmethod
-        if binocular_vision:
+    def set_camera_params(camera, config, binocular_vision):
-            left_camera = cameras[0]
+
-            right_camera = left_camera.copy()
+        camera_object = bpy.data.objects.new(BlenderUtils.CAMERA_OBJECT_NAME, None)
-            right_camera.name = BlenderUtils.CAMERA_RIGHT_NAME
+        bpy.context.collection.objects.link(camera_object)
-            right_camera.data  = left_camera.data.copy()
+        cameras = [bpy.data.objects.get("Camera")]
-            right_camera.data.name = BlenderUtils.CAMERA_RIGHT_NAME
+        camera.location = [0, 0, 0]
-            bpy.context.collection.objects.link(right_camera)
+        camera.rotation_euler = [0, 0, 0]
-            right_camera.parent = camera_object
+        camera.parent = camera_object
-            right_camera.location = [config["eye_distance"]/2, 0, 0]
+        if binocular_vision:
-            left_camera.location = [-config["eye_distance"]/2, 0, 0]    
+            left_camera = cameras[0]
-            binocular_angle = config["eye_angle"] 
+            right_camera = left_camera.copy()
-            half_angle = np.radians(binocular_angle / 2) 
+            right_camera.name = BlenderUtils.CAMERA_RIGHT_NAME
-
+            right_camera.data = left_camera.data.copy()
-            left_camera.rotation_euler[1] = -half_angle 
+            right_camera.data.name = BlenderUtils.CAMERA_RIGHT_NAME
-            right_camera.rotation_euler[1] = half_angle
+            bpy.context.collection.objects.link(right_camera)
-            cameras.append(right_camera)
+            right_camera.parent = camera_object
-            
+            right_camera.location = [config["eye_distance"] / 2, 0, 0]
-        for camera in cameras:
+            left_camera.location = [-config["eye_distance"] / 2, 0, 0]
-            camera.data.clip_start = config["near_plane"]
+            binocular_angle = config["eye_angle"]
-            camera.data.clip_end = config["far_plane"]
+            half_angle = np.radians(binocular_angle / 2)
-            
+
-            bpy.context.scene.render.resolution_x = config["resolution"][0]
+            left_camera.rotation_euler[1] = -half_angle
-            bpy.context.scene.render.resolution_y = config["resolution"][1]
+            right_camera.rotation_euler[1] = half_angle
-            sensor_height = 24.0
+            cameras.append(right_camera)
-            focal_length = sensor_height / (2 * np.tan(np.radians(config["fov_vertical"]) / 2))
+
-            camera.data.lens = focal_length
+        for camera in cameras:
-            camera.data.sensor_width = sensor_height * config["resolution"][0] / config["resolution"][1]
+            camera.data.clip_start = config["near_plane"]
-            camera.data.sensor_height = sensor_height
+            camera.data.clip_end = config["far_plane"]
-        
+
-    @staticmethod      
+            bpy.context.scene.render.resolution_x = config["resolution"][0]
-    def init_light_and_camera(init_light_and_camera_config, binocular_vision):
+            bpy.context.scene.render.resolution_y = config["resolution"][1]
-        
+            sensor_height = 24.0
-        camera = BlenderUtils.get_obj(BlenderUtils.CAMERA_NAME)
+            focal_length = sensor_height / (
-        BlenderUtils.set_camera_params(camera, init_light_and_camera_config[BlenderUtils.CAMERA_NAME], binocular_vision)
+                2 * np.tan(np.radians(config["fov_vertical"]) / 2)
-    
+            )
-    @staticmethod
+            camera.data.lens = focal_length
-    def get_obj_diag(name):
+            camera.data.sensor_width = (
-        obj = BlenderUtils.get_obj(name)
+                sensor_height * config["resolution"][0] / config["resolution"][1]
-        return np.linalg.norm(obj.dimensions)
+            )
-    
+            camera.data.sensor_height = sensor_height
-    @staticmethod 
+
-    def matrix_to_blender_pose(matrix):
+    @staticmethod
-        location = matrix[:3, 3]
+    def init_light_and_camera(init_light_and_camera_config, binocular_vision):
-        rotation_matrix = matrix[:3, :3]
+
-        rotation_matrix_blender = mathutils.Matrix(rotation_matrix.tolist())
+        camera = BlenderUtils.get_obj(BlenderUtils.CAMERA_NAME)
-        rotation_euler = rotation_matrix_blender.to_euler()
+        BlenderUtils.set_camera_params(
-        return location, rotation_euler
+            camera,
-    
+            init_light_and_camera_config[BlenderUtils.CAMERA_NAME],
-    @staticmethod
+            binocular_vision,
-    def set_camera_at(pose):
+        )
-        camera = BlenderUtils.get_obj(BlenderUtils.CAMERA_OBJECT_NAME)
+
-        location, rotation_euler = BlenderUtils.matrix_to_blender_pose(pose)
+    @staticmethod
-        
+    def get_obj_diag(name):
-        camera.location = location
+        obj = BlenderUtils.get_obj(name)
-        camera.rotation_euler = rotation_euler
+        return np.linalg.norm(obj.dimensions)
-
+
-    @staticmethod
+    @staticmethod
-    def get_object_bottom_z(obj):
+    def matrix_to_blender_pose(matrix):
-        vertices = [v.co for v in obj.data.vertices]
+        location = matrix[:3, 3]
-        vertices_world = [obj.matrix_world @ v for v in vertices]
+        rotation_matrix = matrix[:3, :3]
-        min_z = min([v.z for v in vertices_world])
+        rotation_matrix_blender = mathutils.Matrix(rotation_matrix.tolist())
-        return min_z
+        rotation_euler = rotation_matrix_blender.to_euler()
-        
+        return location, rotation_euler
-    @staticmethod
+
-    def render_and_save(output_dir, file_name, binocular_vision=False, target_object=None):
+    @staticmethod
-        target_cameras = [BlenderUtils.CAMERA_NAME]
+    def set_camera_at(pose):
-        if binocular_vision:
+        camera = BlenderUtils.get_obj(BlenderUtils.CAMERA_OBJECT_NAME)
-            target_cameras.append(BlenderUtils.CAMERA_RIGHT_NAME)
+        location, rotation_euler = BlenderUtils.matrix_to_blender_pose(pose)
-        for cam_name in target_cameras:
+
-            bpy.context.scene.camera = BlenderUtils.get_obj(cam_name)
+        camera.location = location
-            bpy.context.scene.view_layers["ViewLayer"].use_pass_z = True
+        camera.rotation_euler = rotation_euler
-            bpy.context.scene.view_layers["ViewLayer"].use_pass_normal = True
+
-            cam_suffix = "L" if cam_name == BlenderUtils.CAMERA_NAME else "R"
+    @staticmethod
-            scene = bpy.context.scene
+    def get_object_bottom_z(obj):
-            scene.render.filepath = ""
+        vertices = [v.co for v in obj.data.vertices]
-            
+        vertices_world = [obj.matrix_world @ v for v in vertices]
-            
+        min_z = min([v.z for v in vertices_world])
-            mask_dir = os.path.join(output_dir, "mask")
+        return min_z
-            if not os.path.exists(mask_dir):
+
-                os.makedirs(mask_dir)
+    @staticmethod
-            
+    def render_normal_and_depth(
-            scene.render.filepath = os.path.join(output_dir, mask_dir, f"{file_name}_{cam_suffix}.png")
+        output_dir, file_name, binocular_vision=False, target_object=None
-            scene.render.image_settings.color_depth = '8'
+    ):
-            scene.render.resolution_percentage = 100
+        # use pass z
-            scene.render.use_overwrite = False
+        bpy.context.scene.view_layers["ViewLayer"].use_pass_z = True
-            scene.render.use_file_extension = False
+        target_cameras = [BlenderUtils.CAMERA_NAME]
-            scene.render.use_placeholder = False
+        if binocular_vision:
-
+            target_cameras.append(BlenderUtils.CAMERA_RIGHT_NAME)
-            scene.use_nodes = True
+            
-            tree = scene.node_tree
+        for cam_name in target_cameras:
-
+            bpy.context.scene.camera = BlenderUtils.get_obj(cam_name)
-            for node in tree.nodes:
+            cam_suffix = "L" if cam_name == BlenderUtils.CAMERA_NAME else "R"
-                tree.nodes.remove(node)
+            scene = bpy.context.scene
-
+            scene.render.filepath = ""
-            rl = tree.nodes.new('CompositorNodeRLayers')
+
-
+            mask_dir = os.path.join(output_dir, "normal")
-            map_range = tree.nodes.new('CompositorNodeMapRange')
+            if not os.path.exists(mask_dir):
-            map_range.inputs['From Min'].default_value = 0.01
+                os.makedirs(mask_dir)
-            map_range.inputs['From Max'].default_value = 5
+
-            map_range.inputs['To Min'].default_value = 0 
+            scene.render.filepath = os.path.join(
-            map_range.inputs['To Max'].default_value = 1
+                output_dir, mask_dir, f"{file_name}_{cam_suffix}.exr"
-            tree.links.new(rl.outputs['Depth'], map_range.inputs[0])
+            ) 
-
+            
-            output_depth = tree.nodes.new('CompositorNodeOutputFile')
+            scene.render.image_settings.file_format = "OPEN_EXR"
-
+            scene.render.image_settings.color_mode = "RGB"
-            depth_dir = os.path.join(output_dir, "depth")
+            bpy.context.scene.view_settings.view_transform = "Raw"
-            if not os.path.exists(depth_dir):
+            scene.render.image_settings.color_depth = "16"
-                os.makedirs(depth_dir)
+            bpy.context.scene.render.filter_size = 1.5
-            output_depth.base_path = depth_dir
+            scene.render.resolution_percentage = 100
-            output_depth.file_slots[0].path = f"{file_name}_{cam_suffix}.####"
+            scene.render.use_overwrite = False
-            output_depth.format.file_format = 'PNG'
+            scene.render.use_file_extension = False
-            output_depth.format.color_mode = 'BW'
+            scene.render.use_placeholder = False
-            output_depth.format.color_depth = '16'
+            scene.use_nodes = True
-
+            tree = scene.node_tree
-           # 创建 Separate XYZ 节点来分离法线的 X, Y, Z 分量
+
-            separate_xyz = tree.nodes.new('CompositorNodeSeparateXYZ')
+            for node in tree.nodes:
-
+                tree.nodes.remove(node)
-            # 将法线向量连接到 Separate XYZ 节点
+
-            tree.links.new(rl.outputs['Normal'], separate_xyz.inputs[0])
+            rl = tree.nodes.new("CompositorNodeRLayers")
-
+
-            # 创建 Map Range 节点来分别映射 X, Y, Z 分量
+            map_range = tree.nodes.new("CompositorNodeMapRange")
-            map_range_x = tree.nodes.new('CompositorNodeMapRange')
+            map_range.inputs["From Min"].default_value = 0.01
-            map_range_y = tree.nodes.new('CompositorNodeMapRange')
+            map_range.inputs["From Max"].default_value = 5
-            map_range_z = tree.nodes.new('CompositorNodeMapRange')
+            map_range.inputs["To Min"].default_value = 0
-
+            map_range.inputs["To Max"].default_value = 1
-            # 设置映射范围
+            tree.links.new(rl.outputs["Depth"], map_range.inputs[0])
-            for map_range in [map_range_x, map_range_y, map_range_z]:
+
-                map_range.inputs['From Min'].default_value = -1
+            output_depth = tree.nodes.new("CompositorNodeOutputFile")
-                map_range.inputs['From Max'].default_value = 1
+
-                map_range.inputs['To Min'].default_value = 0
+            depth_dir = os.path.join(output_dir, "depth")
-                map_range.inputs['To Max'].default_value = 1
+            if not os.path.exists(depth_dir):
-
+                os.makedirs(depth_dir)
-            # 分别连接到法线的 X, Y, Z 输出
+            output_depth.base_path = depth_dir
-            tree.links.new(separate_xyz.outputs['X'], map_range_x.inputs[0])
+            output_depth.file_slots[0].path = f"{file_name}_{cam_suffix}.####"
-            tree.links.new(separate_xyz.outputs['Y'], map_range_y.inputs[0])
+            output_depth.format.file_format = "PNG"
-            tree.links.new(separate_xyz.outputs['Z'], map_range_z.inputs[0])
+            output_depth.format.color_mode = "BW"
-
+            output_depth.format.color_depth = "16"
-            # 合并 X, Y, Z 分量到一个 RGB 输出
+            tree.links.new(map_range.outputs[0], output_depth.inputs[0])
-            combine_rgb = tree.nodes.new('CompositorNodeCombineXYZ')
+            bpy.ops.render.render(write_still=True)
-            tree.links.new(map_range_x.outputs[0], combine_rgb.inputs['X'])
+
-            tree.links.new(map_range_y.outputs[0], combine_rgb.inputs['Y'])
+        msg = "success"
-            tree.links.new(map_range_z.outputs[0], combine_rgb.inputs['Z'])
+        return msg
-
+        
-            # 输出到文件
+    @staticmethod
-            output_normal = tree.nodes.new('CompositorNodeOutputFile')
+    def render_mask(
-            normal_dir = os.path.join(output_dir, "normal")
+        output_dir, file_name, binocular_vision=False, target_object=None
-            if not os.path.exists(normal_dir):
+    ):
-                os.makedirs(normal_dir)
+        target_cameras = [BlenderUtils.CAMERA_NAME]
-            output_normal.base_path = normal_dir
+        if binocular_vision:
-            output_normal.file_slots[0].path = f"{file_name}_{cam_suffix}.####"
+            target_cameras.append(BlenderUtils.CAMERA_RIGHT_NAME)
-            output_normal.format.file_format = 'PNG'
+        
-            output_normal.format.color_mode = 'RGB'
+        for cam_name in target_cameras:
-            output_normal.format.color_depth = '8'
+            bpy.context.scene.camera = BlenderUtils.get_obj(cam_name)
-
+            cam_suffix = "L" if cam_name == BlenderUtils.CAMERA_NAME else "R"
-            tree.links.new(combine_rgb.outputs[0], output_normal.inputs[0])
+            scene = bpy.context.scene
-            bpy.ops.render.render(write_still=True)
+            scene.render.filepath = ""
-            
+
-        
+            mask_dir = os.path.join(output_dir, "mask")
-            
+            if not os.path.exists(mask_dir):
-        msg = "success"
+                os.makedirs(mask_dir)
-        return msg
+
-            
+            scene.render.filepath = os.path.join(
-    @staticmethod
+                output_dir, mask_dir, f"{file_name}_{cam_suffix}.png"
-    def save_cam_params(scene_dir, idx, binocular_vision=False):
+            )
-        camera = BlenderUtils.get_obj(BlenderUtils.CAMERA_NAME)
+            scene.use_nodes = True
-        extrinsic = np.array(camera.matrix_world)
+            tree = scene.node_tree
-        cam_data = camera.data
+
-        focal_length = cam_data.lens 
+            for node in tree.nodes:
-        sensor_width = cam_data.sensor_width  
+                tree.nodes.remove(node)
-        sensor_height = cam_data.sensor_height 
+
-        resolution_x = bpy.context.scene.render.resolution_x  
+            rl = tree.nodes.new("CompositorNodeRLayers")
-        resolution_y = bpy.context.scene.render.resolution_y 
+            scene.render.image_settings.file_format = "PNG"
-        intrinsic = np.zeros((3, 3))
+            scene.render.image_settings.color_mode = "RGB"
-        intrinsic[0, 0] = focal_length * resolution_x / sensor_width  # fx
+            scene.render.image_settings.color_depth = "8"
-        intrinsic[1, 1] = focal_length * resolution_y / sensor_height   # fy
+            scene.render.resolution_percentage = 100
-        intrinsic[0, 2] = resolution_x / 2.0  # cx
+            scene.render.use_overwrite = False
-        intrinsic[1, 2] = resolution_y / 2.0  # cy
+            scene.render.use_file_extension = False
-        intrinsic[2, 2] = 1.0
+            scene.render.use_placeholder = False
-        cam_object =  BlenderUtils.get_obj(BlenderUtils.CAMERA_OBJECT_NAME)
+
-        extrinsic_cam_object = np.array(cam_object.matrix_world)
+            
-        data = {
+            bpy.ops.render.render(write_still=True)
-            "extrinsic": extrinsic.tolist(),
+
-            "extrinsic_cam_object": extrinsic_cam_object.tolist(),
+        msg = "success"
-            "intrinsic": intrinsic.tolist(),
+        return msg
-            "far_plane": camera.data.clip_end,
+
-            "near_plane": camera.data.clip_start,
+    @staticmethod
-        }
+    def save_cam_params(scene_dir, idx, binocular_vision=False):
-        if binocular_vision:
+        camera = BlenderUtils.get_obj(BlenderUtils.CAMERA_NAME)
-            right_camera = BlenderUtils.get_obj(BlenderUtils.CAMERA_RIGHT_NAME)
+        extrinsic = np.array(camera.matrix_world)
-            extrinsic_right = np.array(right_camera.matrix_world)
+        cam_data = camera.data
-            print("result:",extrinsic_right)
+        focal_length = cam_data.lens
-            
+        sensor_width = cam_data.sensor_width
-            data["extrinsic_R"] = extrinsic_right.tolist()
+        sensor_height = cam_data.sensor_height
-        
+        resolution_x = bpy.context.scene.render.resolution_x
-        cam_params_dir = os.path.join(scene_dir, "camera_params")
+        resolution_y = bpy.context.scene.render.resolution_y
-        if not os.path.exists(cam_params_dir):
+        intrinsic = np.zeros((3, 3))
-            os.makedirs(cam_params_dir)
+        intrinsic[0, 0] = focal_length * resolution_x / sensor_width  # fx
-        cam_params_path = os.path.join(cam_params_dir,  f"{idx}.json")
+        intrinsic[1, 1] = focal_length * resolution_y / sensor_height  # fy
-        with open(cam_params_path, "w") as f:
+        intrinsic[0, 2] = resolution_x / 2.0  # cx
-            json.dump(data, f, indent=4)
+        intrinsic[1, 2] = resolution_y / 2.0  # cy
-
+        intrinsic[2, 2] = 1.0
-    @staticmethod
+        cam_object = BlenderUtils.get_obj(BlenderUtils.CAMERA_OBJECT_NAME)
-    def reset_objects_and_platform():
+        extrinsic_cam_object = np.array(cam_object.matrix_world)
-        all_objects = bpy.data.objects
+        data = {
-        keep_objects = {"plane_floor", "plane_ceil", "plane_wall_1", "plane_wall_2", "plane_wall_3", "plane_wall_4"}
+            "extrinsic": extrinsic.tolist(),
-        keep_objects.add(BlenderUtils.CAMERA_OBJECT_NAME)
+            "extrinsic_cam_object": extrinsic_cam_object.tolist(),
-        keep_objects.add(BlenderUtils.CAMERA_NAME)
+            "intrinsic": intrinsic.tolist(),
-        keep_objects.add(BlenderUtils.CAMERA_RIGHT_NAME)
+            "far_plane": camera.data.clip_end,
-        keep_objects.add(BlenderUtils.TABLE_NAME)
+            "near_plane": camera.data.clip_start,
-
+        }
-        for obj in all_objects:
+        if binocular_vision:
-            if obj.name not in keep_objects:
+            right_camera = BlenderUtils.get_obj(BlenderUtils.CAMERA_RIGHT_NAME)
-                bpy.data.objects.remove(obj, do_unlink=True)
+            extrinsic_right = np.array(right_camera.matrix_world)
-
+            print("result:", extrinsic_right)
-        for block in bpy.data.meshes:
+
-            if block.users == 0:
+            data["extrinsic_R"] = extrinsic_right.tolist()
-                bpy.data.meshes.remove(block)
+
-        for block in bpy.data.materials:
+        cam_params_dir = os.path.join(scene_dir, "camera_params")
-            if block.users == 0:
+        if not os.path.exists(cam_params_dir):
-                bpy.data.materials.remove(block)
+            os.makedirs(cam_params_dir)
-        for block in bpy.data.images:
+        cam_params_path = os.path.join(cam_params_dir, f"{idx}.json")
-            if block.users == 0:
+        with open(cam_params_path, "w") as f:
-                bpy.data.images.remove(block)
+            json.dump(data, f, indent=4)
-
+
-        gc.collect()
+    @staticmethod
-        bpy.context.scene.frame_set(0)
+    def reset_objects_and_platform():
-
+        all_objects = bpy.data.objects
-    @staticmethod
+        keep_objects = {
-    def save_scene_info(scene_root_dir, display_table_config, target_name):
+            "plane_floor",
-        all_objects = bpy.data.objects
+            "plane_ceil",
-        no_save_objects = {"plane_floor", "plane_ceil", "plane_wall_1", "plane_wall_2", "plane_wall_3", "plane_wall_4"}
+            "plane_wall_1",
-        no_save_objects.add(BlenderUtils.CAMERA_OBJECT_NAME)
+            "plane_wall_2",
-        no_save_objects.add(BlenderUtils.CAMERA_NAME)
+            "plane_wall_3",
-        no_save_objects.add(BlenderUtils.CAMERA_RIGHT_NAME)
+            "plane_wall_4",
-        no_save_objects.add(BlenderUtils.TABLE_NAME)
+        }
-        scene_info = {}
+        keep_objects.add(BlenderUtils.CAMERA_OBJECT_NAME)
-        for obj in all_objects:
+        keep_objects.add(BlenderUtils.CAMERA_NAME)
-            if obj.name not in no_save_objects and obj.name != BlenderUtils.DISPLAY_TABLE_NAME:
+        keep_objects.add(BlenderUtils.CAMERA_RIGHT_NAME)
-                obj_info = {
+        keep_objects.add(BlenderUtils.TABLE_NAME)
-                    "location": list(obj.location),
+
-                    "rotation_euler": list(obj.rotation_euler),
+        for obj in all_objects:
-                    "scale": list(obj.scale)
+            if obj.name not in keep_objects:
-                }
+                bpy.data.objects.remove(obj, do_unlink=True)
-                scene_info[obj.name] = obj_info
+
-        scene_info[BlenderUtils.DISPLAY_TABLE_NAME] = display_table_config
+        for block in bpy.data.meshes:
-        scene_info["target_name"] = target_name
+            if block.users == 0:
-        scene_info_path = os.path.join(scene_root_dir, "scene_info.json")
+                bpy.data.meshes.remove(block)
-        with open(scene_info_path, "w") as outfile:
+        for block in bpy.data.materials:
-            json.dump(scene_info, outfile)
+            if block.users == 0:
-
+                bpy.data.materials.remove(block)
-
+        for block in bpy.data.images:
-            
+            if block.users == 0:
                bpy.data.images.remove(block)
        gc.collect()
        bpy.context.scene.frame_set(0)
    @staticmethod
    def save_scene_info(scene_root_dir, display_table_config, target_name):
        all_objects = bpy.data.objects
        no_save_objects = {
            "plane_floor",
            "plane_ceil",
            "plane_wall_1",
            "plane_wall_2",
            "plane_wall_3",
            "plane_wall_4",
        }
        no_save_objects.add(BlenderUtils.CAMERA_OBJECT_NAME)
        no_save_objects.add(BlenderUtils.CAMERA_NAME)
        no_save_objects.add(BlenderUtils.CAMERA_RIGHT_NAME)
        no_save_objects.add(BlenderUtils.TABLE_NAME)
        scene_info = {}
        for obj in all_objects:
            if (
                obj.name not in no_save_objects
                and obj.name != BlenderUtils.DISPLAY_TABLE_NAME
            ):
                obj_info = {
                    "location": list(obj.matrix_world.translation),
                    "rotation_euler": list(obj.matrix_world.to_euler()),
                    "scale": list(obj.scale),
                }
                scene_info[obj.name] = obj_info
        scene_info[BlenderUtils.DISPLAY_TABLE_NAME] = display_table_config
        scene_info["target_name"] = target_name
        scene_info_path = os.path.join(scene_root_dir, "scene_info.json")
        with open(scene_info_path, "w") as outfile:
            json.dump(scene_info, outfile)
    @staticmethod
    def save_blend(scene_root_dir):
        blend_path = os.path.join(scene_root_dir, "scene.blend")
        bpy.ops.wm.save_as_mainfile(filepath=blend_path)
--- a/utils/cad_blender_util.py
+++ b/utils/cad_blender_util.py
@@ -0,0 +1,361 @@
 import os
 import json
 import bpy
 import time
 import gc
 import numpy as np
 import mathutils
 class CADBlenderUtils:
    TABLE_NAME: str = "table"
    CAMERA_NAME: str = "Camera"
    CAMERA_RIGHT_NAME: str = "CameraRight"
    CAMERA_OBJECT_NAME: str = "CameraObject"
    DISPLAY_TABLE_NAME: str = "display_table"
    MESH_FILE_NAME: str = "mesh.obj"
    @staticmethod
    def get_obj_path(obj_dir, name):
        return os.path.join(obj_dir, name, CADBlenderUtils.MESH_FILE_NAME)
    @staticmethod
    def load_obj(name, mesh_path, scale=1):
        print(mesh_path)
        bpy.ops.wm.obj_import(filepath=mesh_path)
        loaded_object = bpy.context.selected_objects[-1]
        loaded_object.name = name
        loaded_object.data.name = name
        loaded_object.scale = (scale, scale, scale)
        bpy.ops.rigidbody.object_add()
        return loaded_object
    @staticmethod
    def get_obj(name):
        return bpy.data.objects.get(name)
    @staticmethod
    def get_obj_pose(name):
        obj = CADBlenderUtils.get_obj(name)
        return np.asarray(obj.matrix_world)
    @staticmethod
    def add_plane(name, location, orientation, size=10):
        bpy.ops.mesh.primitive_plane_add(size=size, location=location)
        plane = bpy.context.selected_objects[-1]
        plane.name = name
        plane.rotation_euler = orientation
        bpy.ops.rigidbody.object_add()
        bpy.context.object.rigid_body.type = "PASSIVE"
    @staticmethod
    def setup_scene(init_light_and_camera_config, table_model_path, binocular_vision):
        bpy.context.scene.render.engine = "BLENDER_EEVEE"
        bpy.context.scene.display.shading.show_xray = False
        bpy.context.scene.display.shading.use_dof = False
        bpy.context.scene.display.render_aa = "OFF"
        bpy.context.scene.view_settings.view_transform = "Standard"
        bpy.context.scene.eevee.use_ssr = False  # 关闭屏幕空间反射
        bpy.context.scene.eevee.use_bloom = False  # 关闭辉光
        bpy.context.scene.eevee.use_gtao = False  # 关闭环境光遮蔽
        bpy.context.scene.eevee.use_soft_shadows = False  # 关闭软阴影
        bpy.context.scene.eevee.use_shadows = False  # 关闭所有阴影
        bpy.context.scene.world.use_nodes = False  # 如果你不需要环境光，关闭环境节点
        # bpy.context.scene.eevee.use_sss = False            # 关闭次表面散射
        # 2. 设置最低的采样数
        bpy.context.scene.eevee.taa_render_samples = 1
        bpy.context.scene.eevee.taa_samples = 1
        CADBlenderUtils.init_light_and_camera(
            init_light_and_camera_config, binocular_vision
        )
    @staticmethod
    def set_camera_params(camera, config, binocular_vision):
        camera_object = bpy.data.objects.new(CADBlenderUtils.CAMERA_OBJECT_NAME, None)
        bpy.context.collection.objects.link(camera_object)
        cameras = [bpy.data.objects.get("Camera")]
        camera.location = [0, 0, 0]
        camera.rotation_euler = [0, 0, 0]
        camera.parent = camera_object
        if binocular_vision:
            left_camera = cameras[0]
            right_camera = left_camera.copy()
            right_camera.name = CADBlenderUtils.CAMERA_RIGHT_NAME
            right_camera.data = left_camera.data.copy()
            right_camera.data.name = CADBlenderUtils.CAMERA_RIGHT_NAME
            bpy.context.collection.objects.link(right_camera)
            right_camera.parent = camera_object
            right_camera.location = [config["eye_distance"] / 2, 0, 0]
            left_camera.location = [-config["eye_distance"] / 2, 0, 0]
            binocular_angle = config["eye_angle"]
            half_angle = np.radians(binocular_angle / 2)
            left_camera.rotation_euler[1] = -half_angle
            right_camera.rotation_euler[1] = half_angle
            cameras.append(right_camera)
        for camera in cameras:
            camera.data.clip_start = config["near_plane"]
            camera.data.clip_end = config["far_plane"]
            bpy.context.scene.render.resolution_x = config["resolution"][0]
            bpy.context.scene.render.resolution_y = config["resolution"][1]
            sensor_height = 24.0
            focal_length = sensor_height / (
                2 * np.tan(np.radians(config["fov_vertical"]) / 2)
            )
            camera.data.lens = focal_length
            camera.data.sensor_width = (
                sensor_height * config["resolution"][0] / config["resolution"][1]
            )
            camera.data.sensor_height = sensor_height
    @staticmethod
    def init_light_and_camera(init_light_and_camera_config, binocular_vision):
        camera = CADBlenderUtils.get_obj(CADBlenderUtils.CAMERA_NAME)
        CADBlenderUtils.set_camera_params(
            camera,
            init_light_and_camera_config[CADBlenderUtils.CAMERA_NAME],
            binocular_vision,
        )
    @staticmethod
    def get_obj_diag(name):
        obj = CADBlenderUtils.get_obj(name)
        return np.linalg.norm(obj.dimensions)
    @staticmethod
    def matrix_to_blender_pose(matrix):
        location = matrix[:3, 3]
        rotation_matrix = matrix[:3, :3]
        rotation_matrix_blender = mathutils.Matrix(rotation_matrix.tolist())
        rotation_euler = rotation_matrix_blender.to_euler()
        return location, rotation_euler
    @staticmethod
    def set_camera_at(pose):
        camera = CADBlenderUtils.get_obj(CADBlenderUtils.CAMERA_OBJECT_NAME)
        location, rotation_euler = CADBlenderUtils.matrix_to_blender_pose(pose)
        camera.location = location
        camera.rotation_euler = rotation_euler
    @staticmethod
    def get_object_bottom_z(obj):
        vertices = [v.co for v in obj.data.vertices]
        vertices_world = [obj.matrix_world @ v for v in vertices]
        min_z = min([v.z for v in vertices_world])
        return min_z
    @staticmethod
    def render_normal_and_depth(
        output_dir, file_name, binocular_vision=False, target_object=None
    ):
        # use pass z
        bpy.context.scene.view_layers["ViewLayer"].use_pass_z = True
        target_cameras = [CADBlenderUtils.CAMERA_NAME]
        if binocular_vision:
            target_cameras.append(CADBlenderUtils.CAMERA_RIGHT_NAME)
        for cam_name in target_cameras:
            bpy.context.scene.camera = CADBlenderUtils.get_obj(cam_name)
            cam_suffix = "L" if cam_name == CADBlenderUtils.CAMERA_NAME else "R"
            scene = bpy.context.scene
            scene.render.filepath = ""
            mask_dir = os.path.join(output_dir, "normal")
            if not os.path.exists(mask_dir):
                os.makedirs(mask_dir)
            scene.render.filepath = os.path.join(
                output_dir, mask_dir, f"{file_name}_{cam_suffix}.exr"
            ) 
            scene.render.image_settings.file_format = "OPEN_EXR"
            scene.render.image_settings.color_mode = "RGB"
            bpy.context.scene.view_settings.view_transform = "Raw"
            scene.render.image_settings.color_depth = "16"
            bpy.context.scene.render.filter_size = 1.5
            scene.render.resolution_percentage = 100
            scene.render.use_overwrite = False
            scene.render.use_file_extension = False
            scene.render.use_placeholder = False
            scene.use_nodes = True
            tree = scene.node_tree
            for node in tree.nodes:
                tree.nodes.remove(node)
            rl = tree.nodes.new("CompositorNodeRLayers")
            map_range = tree.nodes.new("CompositorNodeMapRange")
            map_range.inputs["From Min"].default_value = 0.01
            map_range.inputs["From Max"].default_value = 5
            map_range.inputs["To Min"].default_value = 0
            map_range.inputs["To Max"].default_value = 1
            tree.links.new(rl.outputs["Depth"], map_range.inputs[0])
            output_depth = tree.nodes.new("CompositorNodeOutputFile")
            depth_dir = os.path.join(output_dir, "depth")
            if not os.path.exists(depth_dir):
                os.makedirs(depth_dir)
            output_depth.base_path = depth_dir
            output_depth.file_slots[0].path = f"{file_name}_{cam_suffix}.####"
            output_depth.format.file_format = "PNG"
            output_depth.format.color_mode = "BW"
            output_depth.format.color_depth = "16"
            tree.links.new(map_range.outputs[0], output_depth.inputs[0])
            bpy.ops.render.render(write_still=True)
        msg = "success"
        return msg
    @staticmethod
    def render_mask(
        output_dir, file_name, binocular_vision=False, target_object=None
    ):
        target_cameras = [CADBlenderUtils.CAMERA_NAME]
        if binocular_vision:
            target_cameras.append(CADBlenderUtils.CAMERA_RIGHT_NAME)
        for cam_name in target_cameras:
            bpy.context.scene.camera = CADBlenderUtils.get_obj(cam_name)
            cam_suffix = "L" if cam_name == CADBlenderUtils.CAMERA_NAME else "R"
            scene = bpy.context.scene
            scene.render.filepath = ""
            mask_dir = os.path.join(output_dir, "mask")
            if not os.path.exists(mask_dir):
                os.makedirs(mask_dir)
            scene.render.filepath = os.path.join(
                output_dir, mask_dir, f"{file_name}_{cam_suffix}.png"
            )
            scene.render.image_settings.color_depth = "8"
            scene.render.resolution_percentage = 100
            scene.render.use_overwrite = False
            scene.render.use_file_extension = False
            scene.render.use_placeholder = False
            bpy.ops.render.render(write_still=True)
        msg = "success"
        return msg
    @staticmethod
    def save_cam_params(scene_dir, idx, binocular_vision=False):
        camera = CADBlenderUtils.get_obj(CADBlenderUtils.CAMERA_NAME)
        extrinsic = np.array(camera.matrix_world)
        cam_data = camera.data
        focal_length = cam_data.lens
        sensor_width = cam_data.sensor_width
        sensor_height = cam_data.sensor_height
        resolution_x = bpy.context.scene.render.resolution_x
        resolution_y = bpy.context.scene.render.resolution_y
        intrinsic = np.zeros((3, 3))
        intrinsic[0, 0] = focal_length * resolution_x / sensor_width  # fx
        intrinsic[1, 1] = focal_length * resolution_y / sensor_height  # fy
        intrinsic[0, 2] = resolution_x / 2.0  # cx
        intrinsic[1, 2] = resolution_y / 2.0  # cy
        intrinsic[2, 2] = 1.0
        cam_object = CADBlenderUtils.get_obj(CADBlenderUtils.CAMERA_OBJECT_NAME)
        extrinsic_cam_object = np.array(cam_object.matrix_world)
        data = {
            "extrinsic": extrinsic.tolist(),
            "extrinsic_cam_object": extrinsic_cam_object.tolist(),
            "intrinsic": intrinsic.tolist(),
            "far_plane": camera.data.clip_end,
            "near_plane": camera.data.clip_start,
        }
        if binocular_vision:
            right_camera = CADBlenderUtils.get_obj(CADBlenderUtils.CAMERA_RIGHT_NAME)
            extrinsic_right = np.array(right_camera.matrix_world)
            print("result:", extrinsic_right)
            data["extrinsic_R"] = extrinsic_right.tolist()
        cam_params_dir = os.path.join(scene_dir, "camera_params")
        if not os.path.exists(cam_params_dir):
            os.makedirs(cam_params_dir)
        cam_params_path = os.path.join(cam_params_dir, f"{idx}.json")
        with open(cam_params_path, "w") as f:
            json.dump(data, f, indent=4)
    @staticmethod
    def reset_objects_and_platform():
        all_objects = bpy.data.objects
        keep_objects = {
            "plane_floor",
            "plane_ceil",
            "plane_wall_1",
            "plane_wall_2",
            "plane_wall_3",
            "plane_wall_4",
        }
        keep_objects.add(CADBlenderUtils.CAMERA_OBJECT_NAME)
        keep_objects.add(CADBlenderUtils.CAMERA_NAME)
        keep_objects.add(CADBlenderUtils.CAMERA_RIGHT_NAME)
        keep_objects.add(CADBlenderUtils.TABLE_NAME)
        for obj in all_objects:
            if obj.name not in keep_objects:
                bpy.data.objects.remove(obj, do_unlink=True)
        for block in bpy.data.meshes:
            if block.users == 0:
                bpy.data.meshes.remove(block)
        for block in bpy.data.materials:
            if block.users == 0:
                bpy.data.materials.remove(block)
        for block in bpy.data.images:
            if block.users == 0:
                bpy.data.images.remove(block)
        gc.collect()
        bpy.context.scene.frame_set(0)
    @staticmethod
    def save_scene_info(scene_root_dir, display_table_config, target_name):
        all_objects = bpy.data.objects
        no_save_objects = {
            "plane_floor",
            "plane_ceil",
            "plane_wall_1",
            "plane_wall_2",
            "plane_wall_3",
            "plane_wall_4",
        }
        no_save_objects.add(CADBlenderUtils.CAMERA_OBJECT_NAME)
        no_save_objects.add(CADBlenderUtils.CAMERA_NAME)
        no_save_objects.add(CADBlenderUtils.CAMERA_RIGHT_NAME)
        no_save_objects.add(CADBlenderUtils.TABLE_NAME)
        scene_info = {}
        for obj in all_objects:
            if (
                obj.name not in no_save_objects
                and obj.name != CADBlenderUtils.DISPLAY_TABLE_NAME
            ):
                obj_info = {
                    "location": list(obj.location),
                    "rotation_euler": list(obj.rotation_euler),
                    "scale": list(obj.scale),
                }
                scene_info[obj.name] = obj_info
        scene_info[CADBlenderUtils.DISPLAY_TABLE_NAME] = display_table_config
        scene_info["target_name"] = target_name
        scene_info_path = os.path.join(scene_root_dir, "scene_info.json")
        with open(scene_info_path, "w") as outfile:
            json.dump(scene_info, outfile)
    @staticmethod
    def save_blend(scene_root_dir):
        blend_path = os.path.join(scene_root_dir, "scene.blend")
        bpy.ops.wm.save_as_mainfile(filepath=blend_path)
--- a/utils/cad_view_sample_util.py
+++ b/utils/cad_view_sample_util.py
@@ -0,0 +1,146 @@
 import numpy as np
 import bmesh
 from collections import defaultdict
 from scipy.spatial.transform import Rotation as R
 from utils.pose import PoseUtil
 from utils.pts import PtsUtil
 import random
 class CADViewSampleUtil:
    @staticmethod
    def farthest_point_sampling(points, num_samples):
        num_points = points.shape[0]
        if num_samples >= num_points:
            return points, np.arange(num_points)
        sampled_indices = np.zeros(num_samples, dtype=int)
        sampled_indices[0] = np.random.randint(num_points)
        min_distances = np.full(num_points, np.inf)
        for i in range(1, num_samples):
            current_point = points[sampled_indices[i - 1]]
            dist_to_current_point = np.linalg.norm(points - current_point, axis=1)
            min_distances = np.minimum(min_distances, dist_to_current_point)
            sampled_indices[i] = np.argmax(min_distances)
        downsampled_points = points[sampled_indices]
        return downsampled_points, sampled_indices
    @staticmethod
    def voxel_downsample(points, voxel_size):
        voxel_grid = defaultdict(list)
        for i, point in enumerate(points):
            voxel_index = tuple((point // voxel_size).astype(int))
            voxel_grid[voxel_index].append(i)
        downsampled_points = []
        downsampled_indices = []
        for indices in voxel_grid.values():
            selected_index = indices[0]
            downsampled_points.append(points[selected_index])
            downsampled_indices.append(selected_index)
        return np.array(downsampled_points), downsampled_indices
    @staticmethod
    def sample_view_data(obj, distance_range:tuple = (0.25,0.5), voxel_size:float = 0.005, max_views: int = 1, pertube_repeat:int = 1) -> dict:
        view_data = {
            "look_at_points": [],
            "cam_positions": [],
        }
        mesh = obj.data
        bm = bmesh.new()
        bm.from_mesh(mesh)
        bm.verts.ensure_lookup_table()
        bm.faces.ensure_lookup_table()
        bm.normal_update()
        look_at_points = []
        cam_positions = []
        normals = []
        for v in bm.verts:
            look_at_point = np.array(v.co)
            view_data["look_at_points"].append(look_at_point)
            normal = np.zeros(3)
            for loop in v.link_loops:
                normal += np.array(loop.calc_normal())
            normal /= len(v.link_loops)
            normal = normal / np.linalg.norm(normal)
            if np.isnan(normal).any():
                continue
            if np.dot(normal, look_at_point) < 0:
                normal = -normal
            normals.append(normal)
            for _ in range(pertube_repeat):
                perturb_angle = np.radians(np.random.uniform(0, 10))
                perturb_axis = np.random.normal(size=3)
                perturb_axis /= np.linalg.norm(perturb_axis)
                rotation_matrix = R.from_rotvec(perturb_angle * perturb_axis).as_matrix()
                perturbed_normal = np.dot(rotation_matrix, normal)
                middle_distance = (distance_range[0] + distance_range[1]) / 2
                perturbed_distance = random.uniform(middle_distance-0.05, middle_distance+0.05)
                cam_position = look_at_point + perturbed_distance * perturbed_normal
                look_at_points.append(look_at_point)
                cam_positions.append(cam_position)
        bm.free() 
        look_at_points = np.array(look_at_points)
        cam_positions = np.array(cam_positions)
        voxel_downsampled_look_at_points, selected_indices = CADViewSampleUtil.voxel_downsample(look_at_points, voxel_size)
        voxel_downsampled_cam_positions = cam_positions[selected_indices]
        voxel_downsampled_normals = np.array(normals)[selected_indices]
        fps_downsampled_look_at_points, selected_indices = CADViewSampleUtil.farthest_point_sampling(voxel_downsampled_look_at_points, max_views*2)
        fps_downsampled_cam_positions = voxel_downsampled_cam_positions[selected_indices]
        view_data["look_at_points"] = fps_downsampled_look_at_points.tolist()
        view_data["cam_positions"] = fps_downsampled_cam_positions.tolist()
        view_data["normals"] = voxel_downsampled_normals
        view_data["voxel_down_sampled_points"] = voxel_downsampled_look_at_points 
        return view_data
    @staticmethod
    def get_world_points_and_normals(view_data: dict, obj_world_pose: np.ndarray) -> tuple:
        world_points = []
        world_normals = []
        for voxel_down_sampled_points, normal in zip(view_data["voxel_down_sampled_points"], view_data["normals"]):
            voxel_down_sampled_points_world = obj_world_pose @ np.append(voxel_down_sampled_points, 1.0)
            normal_world = obj_world_pose[:3, :3] @ normal
            world_points.append(voxel_down_sampled_points_world[:3])
            world_normals.append(normal_world)
        return np.array(world_points), np.array(world_normals)
    @staticmethod
    def get_cam_pose(view_data: dict, obj_world_pose: np.ndarray, max_views: int) -> np.ndarray:
        cam_poses = []
        for look_at_point, cam_position in zip(view_data["look_at_points"], view_data["cam_positions"]):
            look_at_point_world = obj_world_pose @ np.append(look_at_point, 1.0)
            cam_position_world = obj_world_pose @ np.append(cam_position, 1.0)
            look_at_point_world = look_at_point_world[:3]  
            cam_position_world = cam_position_world[:3]    
            forward_vector = cam_position_world - look_at_point_world
            forward_vector /= np.linalg.norm(forward_vector)
            up_vector = np.array([0, 0, 1])
            right_vector = np.cross(up_vector, forward_vector)
            rotation_matrix = np.array([right_vector, up_vector, forward_vector]).T
            cam_pose = np.eye(4)
            cam_pose[:3, :3] = rotation_matrix
            cam_pose[:3, 3] = cam_position_world
            cam_poses.append(cam_pose)
        if len(cam_poses) > max_views:
            cam_points = np.array([cam_pose[:3, 3] for cam_pose in cam_poses])
            _, indices = PtsUtil.fps_downsample_point_cloud(cam_points, max_views, require_idx=True)
            cam_poses = [cam_poses[i] for i in indices]
        return np.array(cam_poses)
    @staticmethod
    def sample_view_data_world_space(obj, distance_range:tuple = (0.3,0.5), voxel_size:float = 0.005, max_views: int=1, min_cam_table_included_degree:int=20, random_view_ratio:float = 0.2) -> dict:
        obj_world_pose = np.asarray(obj.matrix_world)
        view_data = CADViewSampleUtil.sample_view_data(obj, distance_range, voxel_size, max_views)
        view_data["cam_poses"] = CADViewSampleUtil.get_cam_pose(view_data, obj_world_pose, max_views, min_cam_table_included_degree, random_view_ratio)
        view_data["voxel_down_sampled_points"], view_data["normals"] =  CADViewSampleUtil.get_world_points_and_normals(view_data, obj_world_pose)
        return view_data
--- a/utils/material_util.py
+++ b/utils/material_util.py
@@ -0,0 +1,96 @@
 import bpy
 class MaterialUtil:
    ''' --------- Basic --------- '''
    @staticmethod
    def change_object_material(obj, mat):
        if obj.data.materials:
            obj.data.materials[0] = mat
        else:
            obj.data.materials.append(mat)
    ''' ------- Materials ------- '''
    @staticmethod
    def create_normal_material():
        normal_mat = bpy.data.materials.new(name="NormalMaterial")
        normal_mat.use_nodes = True
        nodes = normal_mat.node_tree.nodes
        links = normal_mat.node_tree.links
        nodes.clear()
        geometry_node = nodes.new(type="ShaderNodeNewGeometry")
        vector_transform_node = nodes.new(type="ShaderNodeVectorTransform")
        separate_xyz_node = nodes.new(type="ShaderNodeSeparateXYZ")
        multiply_node_x = nodes.new(type="ShaderNodeMath")
        multiply_node_y = nodes.new(type="ShaderNodeMath")
        multiply_node_z = nodes.new(type="ShaderNodeMath")
        combine_xyz_node = nodes.new(type="ShaderNodeCombineXYZ")
        light_path_node = nodes.new(type="ShaderNodeLightPath")
        emission_node_1 = nodes.new(type="ShaderNodeEmission")
        emission_node_2 = nodes.new(type="ShaderNodeEmission")
        mix_shader_node_1 = nodes.new(type="ShaderNodeMixShader")
        mix_shader_node_2 = nodes.new(type="ShaderNodeMixShader")
        material_output_node = nodes.new(type="ShaderNodeOutputMaterial")
        vector_transform_node.vector_type = 'VECTOR'
        vector_transform_node.convert_from = 'WORLD'
        vector_transform_node.convert_to = 'CAMERA'
        multiply_node_x.operation = 'MULTIPLY'
        multiply_node_x.inputs[1].default_value = 1.0
        multiply_node_y.operation = 'MULTIPLY'
        multiply_node_y.inputs[1].default_value = 1.0
        multiply_node_z.operation = 'MULTIPLY'
        multiply_node_z.inputs[1].default_value = -1.0
        emission_node_1.inputs['Strength'].default_value = 1.0
        emission_node_2.inputs['Strength'].default_value = 1.0
        mix_shader_node_2.inputs['Fac'].default_value = 0.5
        links.new(geometry_node.outputs['Normal'], vector_transform_node.inputs['Vector'])
        links.new(vector_transform_node.outputs['Vector'], separate_xyz_node.inputs['Vector'])
        links.new(separate_xyz_node.outputs['X'], multiply_node_x.inputs[0])
        links.new(separate_xyz_node.outputs['Y'], multiply_node_y.inputs[0])
        links.new(separate_xyz_node.outputs['Z'], multiply_node_z.inputs[0])
        links.new(multiply_node_x.outputs['Value'], combine_xyz_node.inputs['X'])
        links.new(multiply_node_y.outputs['Value'], combine_xyz_node.inputs['Y'])
        links.new(multiply_node_z.outputs['Value'], combine_xyz_node.inputs['Z'])
        links.new(combine_xyz_node.outputs['Vector'], emission_node_1.inputs['Color'])
        links.new(light_path_node.outputs['Is Camera Ray'], mix_shader_node_1.inputs['Fac'])
        links.new(emission_node_1.outputs['Emission'], mix_shader_node_1.inputs[2])
        links.new(mix_shader_node_1.outputs['Shader'], mix_shader_node_2.inputs[1])
        links.new(emission_node_2.outputs['Emission'], mix_shader_node_2.inputs[2])
        links.new(mix_shader_node_2.outputs['Shader'], material_output_node.inputs['Surface'])
        return normal_mat
    @staticmethod
    def create_mask_material(color=(1.0, 1.0, 1.0)): 
        mask_mat = bpy.data.materials.new(name="MaskMaterial")
        mask_mat.use_nodes = True
        nodes = mask_mat.node_tree.nodes
        links = mask_mat.node_tree.links
        nodes.clear()
        emission_node = nodes.new(type="ShaderNodeEmission")
        emission_node.inputs['Color'].default_value = (*color, 1.0) 
        emission_node.inputs['Strength'].default_value = 1.0
        material_output_node = nodes.new(type="ShaderNodeOutputMaterial")
        links.new(emission_node.outputs['Emission'], material_output_node.inputs['Surface'])
        return mask_mat
 # -------- debug --------
 if __name__ == "__main__":
    cube = bpy.data.objects.get("Cube")
    normal_mat = MaterialUtil.create_normal_material()
    MaterialUtil.change_object_material(cube, normal_mat)
--- a/utils/pose.py
+++ b/utils/pose.py
@@ -1,151 +1,166 @@
-import numpy as np
+import numpy as np
-
+
-class PoseUtil:
+class PoseUtil:
-    ROTATION = 1
+    ROTATION = 1
-    TRANSLATION = 2
+    TRANSLATION = 2
-    SCALE = 3
+    SCALE = 3
-
+
-    @staticmethod
+    @staticmethod
-    def get_uniform_translation(trans_m_min, trans_m_max, trans_unit, debug=False):
+    def get_uniform_translation(trans_m_min, trans_m_max, trans_unit, debug=False):
-        if isinstance(trans_m_min, list):
+        if isinstance(trans_m_min, list):
-            x_min, y_min, z_min = trans_m_min
+            x_min, y_min, z_min = trans_m_min
-            x_max, y_max, z_max = trans_m_max
+            x_max, y_max, z_max = trans_m_max
-        else:
+        else:
-            x_min, y_min, z_min = trans_m_min, trans_m_min, trans_m_min
+            x_min, y_min, z_min = trans_m_min, trans_m_min, trans_m_min
-            x_max, y_max, z_max = trans_m_max, trans_m_max, trans_m_max
+            x_max, y_max, z_max = trans_m_max, trans_m_max, trans_m_max
-
+
-        x = np.random.uniform(x_min, x_max)
+        x = np.random.uniform(x_min, x_max)
-        y = np.random.uniform(y_min, y_max)
+        y = np.random.uniform(y_min, y_max)
-        z = np.random.uniform(z_min, z_max)
+        z = np.random.uniform(z_min, z_max)
-        translation = np.array([x, y, z])
+        translation = np.array([x, y, z])
-        if trans_unit == "cm":
+        if trans_unit == "cm":
-            translation = translation / 100
+            translation = translation / 100
-        if debug:
+        if debug:
-            print("uniform translation:", translation)
+            print("uniform translation:", translation)
-        return translation
+        return translation
-
+
-    @staticmethod
+    @staticmethod
-    def get_uniform_rotation(rot_degree_min=0, rot_degree_max=180, debug=False):
+    def get_uniform_rotation(rot_degree_min=0, rot_degree_max=180, debug=False):
-        axis = np.random.randn(3)
+        axis = np.random.randn(3)
-        axis /= np.linalg.norm(axis)
+        axis /= np.linalg.norm(axis)
-        theta = np.random.uniform(
+        theta = np.random.uniform(
-            rot_degree_min / 180 * np.pi, rot_degree_max / 180 * np.pi
+            rot_degree_min / 180 * np.pi, rot_degree_max / 180 * np.pi
-        )
+        )
-
+
-        K = np.array(
+        K = np.array(
-            [[0, -axis[2], axis[1]], [axis[2], 0, -axis[0]], [-axis[1], axis[0], 0]]
+            [[0, -axis[2], axis[1]], [axis[2], 0, -axis[0]], [-axis[1], axis[0], 0]]
-        )
+        )
-        R = np.eye(3) + np.sin(theta) * K + (1 - np.cos(theta)) * (K @ K)
+        R = np.eye(3) + np.sin(theta) * K + (1 - np.cos(theta)) * (K @ K)
-        if debug:
+        if debug:
-            print("uniform rotation:", theta * 180 / np.pi)
+            print("uniform rotation:", theta * 180 / np.pi)
-        return R
+        return R
-
+
-    @staticmethod
+    @staticmethod
-    def get_uniform_pose(
+    def get_uniform_pose(
-        trans_min, trans_max, rot_min=0, rot_max=180, trans_unit="cm", debug=False
+        trans_min, trans_max, rot_min=0, rot_max=180, trans_unit="cm", debug=False
-    ):
+    ):
-        translation = PoseUtil.get_uniform_translation(
+        translation = PoseUtil.get_uniform_translation(
-            trans_min, trans_max, trans_unit, debug
+            trans_min, trans_max, trans_unit, debug
-        )
+        )
-        rotation = PoseUtil.get_uniform_rotation(rot_min, rot_max, debug)
+        rotation = PoseUtil.get_uniform_rotation(rot_min, rot_max, debug)
-        pose = np.eye(4)
+        pose = np.eye(4)
-        pose[:3, :3] = rotation
+        pose[:3, :3] = rotation
-        pose[:3, 3] = translation
+        pose[:3, 3] = translation
-        return pose
+        return pose
-
+
-    @staticmethod
+    @staticmethod
-    def get_n_uniform_pose(
+    def get_n_uniform_pose(
-        trans_min,
+        trans_min,
-        trans_max,
+        trans_max,
-        rot_min=0,
+        rot_min=0,
-        rot_max=180,
+        rot_max=180,
-        n=1,
+        n=1,
-        trans_unit="cm",
+        trans_unit="cm",
-        fix=None,
+        fix=None,
-        contain_canonical=True,
+        contain_canonical=True,
-        debug=False,
+        debug=False,
-    ):
+    ):
-        if fix == PoseUtil.ROTATION:
+        if fix == PoseUtil.ROTATION:
-            translations = np.zeros((n, 3))
+            translations = np.zeros((n, 3))
-            for i in range(n):
+            for i in range(n):
-                translations[i] = PoseUtil.get_uniform_translation(
+                translations[i] = PoseUtil.get_uniform_translation(
-                    trans_min, trans_max, trans_unit, debug
+                    trans_min, trans_max, trans_unit, debug
-                )
+                )
-            if contain_canonical:
+            if contain_canonical:
-                translations[0] = np.zeros(3)
+                translations[0] = np.zeros(3)
-            rotations = PoseUtil.get_uniform_rotation(rot_min, rot_max, debug)
+            rotations = PoseUtil.get_uniform_rotation(rot_min, rot_max, debug)
-        elif fix == PoseUtil.TRANSLATION:
+        elif fix == PoseUtil.TRANSLATION:
-            rotations = np.zeros((n, 3, 3))
+            rotations = np.zeros((n, 3, 3))
-            for i in range(n):
+            for i in range(n):
-                rotations[i] = PoseUtil.get_uniform_rotation(rot_min, rot_max, debug)
+                rotations[i] = PoseUtil.get_uniform_rotation(rot_min, rot_max, debug)
-            if contain_canonical:
+            if contain_canonical:
-                rotations[0] = np.eye(3)
+                rotations[0] = np.eye(3)
-            translations = PoseUtil.get_uniform_translation(
+            translations = PoseUtil.get_uniform_translation(
-                trans_min, trans_max, trans_unit, debug
+                trans_min, trans_max, trans_unit, debug
-            )
+            )
-        else:
+        else:
-            translations = np.zeros((n, 3))
+            translations = np.zeros((n, 3))
-            rotations = np.zeros((n, 3, 3))
+            rotations = np.zeros((n, 3, 3))
-            for i in range(n):
+            for i in range(n):
-                translations[i] = PoseUtil.get_uniform_translation(
+                translations[i] = PoseUtil.get_uniform_translation(
-                    trans_min, trans_max, trans_unit, debug
+                    trans_min, trans_max, trans_unit, debug
-                )
+                )
-            for i in range(n):
+            for i in range(n):
-                rotations[i] = PoseUtil.get_uniform_rotation(rot_min, rot_max, debug)
+                rotations[i] = PoseUtil.get_uniform_rotation(rot_min, rot_max, debug)
-            if contain_canonical:
+            if contain_canonical:
-                translations[0] = np.zeros(3)
+                translations[0] = np.zeros(3)
-                rotations[0] = np.eye(3)
+                rotations[0] = np.eye(3)
-
+
-        pose = np.eye(4, 4, k=0)[np.newaxis, :].repeat(n, axis=0)
+        pose = np.eye(4, 4, k=0)[np.newaxis, :].repeat(n, axis=0)
-        pose[:, :3, :3] = rotations
+        pose[:, :3, :3] = rotations
-        pose[:, :3, 3] = translations
+        pose[:, :3, 3] = translations
-
+
-        return pose
+        return pose
-
+
-    @staticmethod
+    @staticmethod
-    def get_n_uniform_pose_batch(
+    def get_n_uniform_pose_batch(
-        trans_min,
+        trans_min,
-        trans_max,
+        trans_max,
-        rot_min=0,
+        rot_min=0,
-        rot_max=180,
+        rot_max=180,
-        n=1,
+        n=1,
-        batch_size=1,
+        batch_size=1,
-        trans_unit="cm",
+        trans_unit="cm",
-        fix=None,
+        fix=None,
-        contain_canonical=False,
+        contain_canonical=False,
-        debug=False,
+        debug=False,
-    ):
+    ):
-
+
-        batch_poses = []
+        batch_poses = []
-        for i in range(batch_size):
+        for i in range(batch_size):
-            pose = PoseUtil.get_n_uniform_pose(
+            pose = PoseUtil.get_n_uniform_pose(
-                trans_min,
+                trans_min,
-                trans_max,
+                trans_max,
-                rot_min,
+                rot_min,
-                rot_max,
+                rot_max,
-                n,
+                n,
-                trans_unit,
+                trans_unit,
-                fix,
+                fix,
-                contain_canonical,
+                contain_canonical,
-                debug,
+                debug,
-            )
+            )
-            batch_poses.append(pose)
+            batch_poses.append(pose)
-        pose_batch = np.stack(batch_poses, axis=0)
+        pose_batch = np.stack(batch_poses, axis=0)
-        return pose_batch
+        return pose_batch
-
+
-    @staticmethod
+    @staticmethod
-    def get_uniform_scale(scale_min, scale_max, debug=False):
+    def get_uniform_scale(scale_min, scale_max, debug=False):
-        if isinstance(scale_min, list):
+        if isinstance(scale_min, list):
-            x_min, y_min, z_min = scale_min
+            x_min, y_min, z_min = scale_min
-            x_max, y_max, z_max = scale_max
+            x_max, y_max, z_max = scale_max
-        else:
+        else:
-            x_min, y_min, z_min = scale_min, scale_min, scale_min
+            x_min, y_min, z_min = scale_min, scale_min, scale_min
-            x_max, y_max, z_max = scale_max, scale_max, scale_max
+            x_max, y_max, z_max = scale_max, scale_max, scale_max
-
+
-        x = np.random.uniform(x_min, x_max)
+        x = np.random.uniform(x_min, x_max)
-        y = np.random.uniform(y_min, y_max)
+        y = np.random.uniform(y_min, y_max)
-        z = np.random.uniform(z_min, z_max)
+        z = np.random.uniform(z_min, z_max)
-        scale = np.array([x, y, z])
+        scale = np.array([x, y, z])
-        if debug:
+        if debug:
-            print("uniform scale:", scale)
+            print("uniform scale:", scale)
-        return scale
+        return scale
    @staticmethod
    def rotation_matrix_from_axis_angle(axis, angle):
        cos_angle = np.cos(angle)
        sin_angle = np.sin(angle)
        one_minus_cos = 1 - cos_angle
        x, y, z = axis
        rotation_matrix = np.array([
            [cos_angle + x*x*one_minus_cos, x*y*one_minus_cos - z*sin_angle, x*z*one_minus_cos + y*sin_angle],
            [y*x*one_minus_cos + z*sin_angle, cos_angle + y*y*one_minus_cos, y*z*one_minus_cos - x*sin_angle],
            [z*x*one_minus_cos - y*sin_angle, z*y*one_minus_cos + x*sin_angle, cos_angle + z*z*one_minus_cos]
        ])
        return rotation_matrix
--- a/utils/pts.py
+++ b/utils/pts.py
@@ -0,0 +1,83 @@
 import numpy as np
 class PtsUtil:
    @staticmethod
    def random_downsample_point_cloud(point_cloud, num_points, require_idx=False):
        if point_cloud.shape[0] == 0:
            if require_idx:
                return point_cloud, np.array([])
            return point_cloud
        idx = np.random.choice(len(point_cloud), num_points, replace=True)
        if require_idx:
            return point_cloud[idx], idx
        return point_cloud[idx]
    @staticmethod
    def fps_downsample_point_cloud(point_cloud, num_points, require_idx=False):
        N = point_cloud.shape[0]
        mask = np.zeros(N, dtype=bool)
        sampled_indices = np.zeros(num_points, dtype=int)
        sampled_indices[0] = np.random.randint(0, N)
        distances = np.linalg.norm(point_cloud - point_cloud[sampled_indices[0]], axis=1)
        for i in range(1, num_points):
            farthest_index = np.argmax(distances)
            sampled_indices[i] = farthest_index
            mask[farthest_index] = True
            new_distances = np.linalg.norm(point_cloud - point_cloud[farthest_index], axis=1)
            distances = np.minimum(distances, new_distances)
        sampled_points = point_cloud[sampled_indices]
        if require_idx:
            return sampled_points, sampled_indices
        return sampled_points
    @staticmethod
    def voxelize_points(points, voxel_size):
        voxel_indices = np.floor(points / voxel_size).astype(np.int32)
        unique_voxels = np.unique(voxel_indices, axis=0, return_inverse=True)
        return unique_voxels
    @staticmethod
    def transform_point_cloud(points, pose_mat):
        points_h = np.concatenate([points, np.ones((points.shape[0], 1))], axis=1)
        points_h = np.dot(pose_mat, points_h.T).T
        return points_h[:, :3]
    @staticmethod
    def get_overlapping_points(point_cloud_L, point_cloud_R, voxel_size=0.005, require_idx=False):
        voxels_L, indices_L = PtsUtil.voxelize_points(point_cloud_L, voxel_size)
        voxels_R, _ = PtsUtil.voxelize_points(point_cloud_R, voxel_size)
        voxel_indices_L = voxels_L.view([("", voxels_L.dtype)] * 3)
        voxel_indices_R = voxels_R.view([("", voxels_R.dtype)] * 3)
        overlapping_voxels = np.intersect1d(voxel_indices_L, voxel_indices_R)
        mask_L = np.isin(
            indices_L, np.where(np.isin(voxel_indices_L, overlapping_voxels))[0]
        )
        overlapping_points = point_cloud_L[mask_L]
        if require_idx:
            return overlapping_points, mask_L
        return overlapping_points
    @staticmethod
    def filter_points(points, normals, cam_pose, theta=45, z_range=(0.2, 0.45)):
        """ filter with normal """ 
        normals_normalized = normals / np.linalg.norm(normals, axis=1, keepdims=True)
        cos_theta = np.dot(normals_normalized, np.array([0, 0, 1]))
        theta_rad = np.deg2rad(theta)
        idx = cos_theta > np.cos(theta_rad)
        filtered_sampled_points = points[idx] 
        """ filter with z range """
        points_cam = PtsUtil.transform_point_cloud(filtered_sampled_points, np.linalg.inv(cam_pose))
        idx = (points_cam[:, 2] > z_range[0]) & (points_cam[:, 2] < z_range[1])
        z_filtered_points = filtered_sampled_points[idx]
        return z_filtered_points[:, :3]
--- a/utils/view_sample_util.py
+++ b/utils/view_sample_util.py
@@ -1,168 +1,191 @@
-
+
-import numpy as np
+import numpy as np
-import bmesh
+import bmesh
-from collections import defaultdict
+from collections import defaultdict
-from scipy.spatial.transform import Rotation as R
+from scipy.spatial.transform import Rotation as R
-from blender.pose import PoseUtil
+from utils.pose import PoseUtil
-import random
+from utils.pts import PtsUtil
-
+import random
-class ViewSampleUtil:
+
-    @staticmethod
+class ViewSampleUtil:
-    def farthest_point_sampling(points, num_samples):
+    @staticmethod
-        num_points = points.shape[0]
+    def farthest_point_sampling(points, num_samples):
-        if num_samples >= num_points:
+        num_points = points.shape[0]
-            return points, np.arange(num_points)
+        if num_samples >= num_points:
-        sampled_indices = np.zeros(num_samples, dtype=int)
+            return points, np.arange(num_points)
-        sampled_indices[0] = np.random.randint(num_points)
+        sampled_indices = np.zeros(num_samples, dtype=int)
-        min_distances = np.full(num_points, np.inf)
+        sampled_indices[0] = np.random.randint(num_points)
-        for i in range(1, num_samples):
+        min_distances = np.full(num_points, np.inf)
-            current_point = points[sampled_indices[i - 1]]
+        for i in range(1, num_samples):
-            dist_to_current_point = np.linalg.norm(points - current_point, axis=1)
+            current_point = points[sampled_indices[i - 1]]
-            min_distances = np.minimum(min_distances, dist_to_current_point)
+            dist_to_current_point = np.linalg.norm(points - current_point, axis=1)
-            sampled_indices[i] = np.argmax(min_distances)
+            min_distances = np.minimum(min_distances, dist_to_current_point)
-        downsampled_points = points[sampled_indices]
+            sampled_indices[i] = np.argmax(min_distances)
-        return downsampled_points, sampled_indices
+        downsampled_points = points[sampled_indices]
-    
+        return downsampled_points, sampled_indices
-    @staticmethod
+    
-    def voxel_downsample(points, voxel_size):
+    @staticmethod
-        voxel_grid = defaultdict(list)
+    def voxel_downsample(points, voxel_size):
-        for i, point in enumerate(points):
+        voxel_grid = defaultdict(list)
-            voxel_index = tuple((point // voxel_size).astype(int))
+        for i, point in enumerate(points):
-            voxel_grid[voxel_index].append(i)
+            voxel_index = tuple((point // voxel_size).astype(int))
-
+            voxel_grid[voxel_index].append(i)
-        downsampled_points = []
+
-        downsampled_indices = []
+        downsampled_points = []
-        for indices in voxel_grid.values():
+        downsampled_indices = []
-            selected_index = indices[0]
+        for indices in voxel_grid.values():
-            downsampled_points.append(points[selected_index])
+            selected_index = indices[0]
-            downsampled_indices.append(selected_index)
+            downsampled_points.append(points[selected_index])
-
+            downsampled_indices.append(selected_index)
-        return np.array(downsampled_points), downsampled_indices
+
-
+        return np.array(downsampled_points), downsampled_indices
-    @staticmethod
+
-    def sample_view_data(obj, distance_range:tuple = (0.25,0.5), voxel_size:float = 0.005, max_views: int = 1, pertube_repeat:int = 1) -> dict:
+    @staticmethod
-        view_data = {
+    def sample_view_data(obj, distance_range:tuple = (0.25,0.5), voxel_size:float = 0.005, max_views: int = 1, pertube_repeat:int = 1) -> dict:
-            "look_at_points": [],
+        view_data = {
-            "cam_positions": [],
+            "look_at_points": [],
-        }
+            "cam_positions": [],
-        mesh = obj.data
+        }
-        bm = bmesh.new()
+        mesh = obj.data
-        bm.from_mesh(mesh)
+        bm = bmesh.new()
-        bm.verts.ensure_lookup_table()
+        bm.from_mesh(mesh)
-        bm.faces.ensure_lookup_table()
+        bm.verts.ensure_lookup_table()
-        bm.normal_update()
+        bm.faces.ensure_lookup_table()
-        
+        bm.normal_update()
-        look_at_points = []
+        
-        cam_positions = []
+        look_at_points = []
-        normals = []
+        cam_positions = []
-        for v in bm.verts:
+        normals = []
-            look_at_point = np.array(v.co)
+        for v in bm.verts:
-            
+            look_at_point = np.array(v.co)
-            view_data["look_at_points"].append(look_at_point)
+            
-            normal = np.zeros(3)
+            view_data["look_at_points"].append(look_at_point)
-            for loop in v.link_loops:
+            normal = np.zeros(3)
-                normal += np.array(loop.calc_normal())
+            for loop in v.link_loops:
-            normal /= len(v.link_loops)
+                normal += np.array(loop.calc_normal())
-            normal = normal / np.linalg.norm(normal)
+            normal /= len(v.link_loops)
-            if np.isnan(normal).any():
+            normal = normal / np.linalg.norm(normal)
-                continue
+            if np.isnan(normal).any():
-            if np.dot(normal, look_at_point) < 0:
+                continue
-                normal = -normal
+            if np.dot(normal, look_at_point) < 0:
-            normals.append(normal)
+                normal = -normal
-            
+            normals.append(normal)
-            for _ in range(pertube_repeat):
+            
-                perturb_angle = np.radians(np.random.uniform(0, 30))
+            for _ in range(pertube_repeat):
-                perturb_axis = np.random.normal(size=3)
+                perturb_angle = np.radians(np.random.uniform(0, 10))
-                perturb_axis /= np.linalg.norm(perturb_axis)
+                perturb_axis = np.random.normal(size=3)
-                rotation_matrix = R.from_rotvec(perturb_angle * perturb_axis).as_matrix()
+                perturb_axis /= np.linalg.norm(perturb_axis)
-                perturbed_normal = np.dot(rotation_matrix, normal)
+                rotation_matrix = R.from_rotvec(perturb_angle * perturb_axis).as_matrix()
-                middle_distance = (distance_range[0] + distance_range[1]) / 2
+                perturbed_normal = np.dot(rotation_matrix, normal)
-                perturbed_distance = random.uniform(middle_distance-0.05, middle_distance+0.05)
+                middle_distance = (distance_range[0] + distance_range[1]) / 2
-                cam_position = look_at_point + perturbed_distance * perturbed_normal
+                perturbed_distance = random.uniform(middle_distance-0.05, middle_distance+0.05)
-                look_at_points.append(look_at_point)
+                cam_position = look_at_point + perturbed_distance * perturbed_normal
-                cam_positions.append(cam_position)
+                look_at_points.append(look_at_point)
-            
+                cam_positions.append(cam_position)
-            
+            
-        bm.free() 
+            
-        look_at_points = np.array(look_at_points)
+        bm.free() 
-        cam_positions = np.array(cam_positions)
+        look_at_points = np.array(look_at_points)
-        voxel_downsampled_look_at_points, selected_indices = ViewSampleUtil.voxel_downsample(look_at_points, voxel_size)
+        cam_positions = np.array(cam_positions)
-        voxel_downsampled_cam_positions = cam_positions[selected_indices]
+        voxel_downsampled_look_at_points, selected_indices = ViewSampleUtil.voxel_downsample(look_at_points, voxel_size)
-        voxel_downsampled_normals = np.array(normals)[selected_indices]
+        voxel_downsampled_cam_positions = cam_positions[selected_indices]
-
+        voxel_downsampled_normals = np.array(normals)[selected_indices]
-        fps_downsampled_look_at_points, selected_indices = ViewSampleUtil.farthest_point_sampling(voxel_downsampled_look_at_points, max_views*2)
+
-        fps_downsampled_cam_positions = voxel_downsampled_cam_positions[selected_indices]
+        fps_downsampled_look_at_points, selected_indices = ViewSampleUtil.farthest_point_sampling(voxel_downsampled_look_at_points, max_views*2)
-
+        fps_downsampled_cam_positions = voxel_downsampled_cam_positions[selected_indices]
-        view_data["look_at_points"] = fps_downsampled_look_at_points.tolist()
+
-        view_data["cam_positions"] = fps_downsampled_cam_positions.tolist()
+        view_data["look_at_points"] = fps_downsampled_look_at_points.tolist()
-        view_data["normals"] = voxel_downsampled_normals
+        view_data["cam_positions"] = fps_downsampled_cam_positions.tolist()
-        view_data["voxel_down_sampled_points"] = voxel_downsampled_look_at_points 
+        view_data["normals"] = voxel_downsampled_normals
-        return view_data
+        view_data["voxel_down_sampled_points"] = voxel_downsampled_look_at_points 
-    
+        return view_data
-    @staticmethod
+    
-    def get_world_points_and_normals(view_data: dict, obj_world_pose: np.ndarray) -> tuple:
+    @staticmethod
-        world_points = []
+    def get_world_points_and_normals(view_data: dict, obj_world_pose: np.ndarray) -> tuple:
-        world_normals = []
+        world_points = []
-        for voxel_down_sampled_points, normal in zip(view_data["voxel_down_sampled_points"], view_data["normals"]):
+        world_normals = []
-            voxel_down_sampled_points_world = obj_world_pose @ np.append(voxel_down_sampled_points, 1.0)
+        for voxel_down_sampled_points, normal in zip(view_data["voxel_down_sampled_points"], view_data["normals"]):
-            normal_world = obj_world_pose[:3, :3] @ normal
+            voxel_down_sampled_points_world = obj_world_pose @ np.append(voxel_down_sampled_points, 1.0)
-            world_points.append(voxel_down_sampled_points_world[:3])
+            normal_world = obj_world_pose[:3, :3] @ normal
-            world_normals.append(normal_world)
+            world_points.append(voxel_down_sampled_points_world[:3])
-        return np.array(world_points), np.array(world_normals)
+            world_normals.append(normal_world)
-    
+        return np.array(world_points), np.array(world_normals)
-    @staticmethod
+    
-    def get_cam_pose(view_data: dict, obj_world_pose: np.ndarray, max_views: int, min_cam_table_included_degree: int, random_view_ratio: float) -> np.ndarray:
+    @staticmethod
-        cam_poses = []
+    def get_cam_pose(view_data: dict, obj_world_pose: np.ndarray, max_views: int, min_cam_table_included_degree: int, random_view_ratio: float) -> np.ndarray:
-        min_height_z = 1000
+        cam_poses = []
-        for look_at_point, cam_position in zip(view_data["look_at_points"], view_data["cam_positions"]):
+        min_height_z = 1000
-            look_at_point_world = obj_world_pose @ np.append(look_at_point, 1.0)
+        for look_at_point, cam_position in zip(view_data["look_at_points"], view_data["cam_positions"]):
-            cam_position_world = obj_world_pose @ np.append(cam_position, 1.0)
+            look_at_point_world = obj_world_pose @ np.append(look_at_point, 1.0)
-            if look_at_point_world[2] < min_height_z:
+            cam_position_world = obj_world_pose @ np.append(cam_position, 1.0)
-                min_height_z = look_at_point_world[2]
+            if look_at_point_world[2] < min_height_z:
-            look_at_point_world = look_at_point_world[:3]  
+                min_height_z = look_at_point_world[2]
-            cam_position_world = cam_position_world[:3]    
+            look_at_point_world = look_at_point_world[:3]  
-            
+            cam_position_world = cam_position_world[:3]    
-            forward_vector = cam_position_world - look_at_point_world
+            
-            forward_vector /= np.linalg.norm(forward_vector)
+            forward_vector = cam_position_world - look_at_point_world
-            
+            forward_vector /= np.linalg.norm(forward_vector)
-            up_vector = np.array([0, 0, 1])
+            
-
+            up_vector = np.array([0, 0, 1])
-            right_vector = np.cross(up_vector, forward_vector)
+
-            right_vector /= np.linalg.norm(right_vector)
+            dot_product = np.dot(forward_vector, up_vector)
-
+            angle = np.degrees(np.arccos(dot_product))
-            corrected_up_vector = np.cross(forward_vector, right_vector)
+            right_vector = np.cross(up_vector, forward_vector)
-            rotation_matrix = np.array([right_vector, corrected_up_vector, forward_vector]).T
+           
-
+            if angle > 90 - min_cam_table_included_degree:
-            cam_pose = np.eye(4)
+                max_angle = 90 - min_cam_table_included_degree
-            cam_pose[:3, :3] = rotation_matrix
+                min_angle = max(90 - min_cam_table_included_degree*2, 30)
-            cam_pose[:3, 3] = cam_position_world
+                target_angle = np.random.uniform(min_angle, max_angle)
-            cam_poses.append(cam_pose)
+                angle_difference = np.radians(target_angle - angle)
-
+
-        filtered_cam_poses = []
+                rotation_axis = np.cross(forward_vector, up_vector)
-        for cam_pose in cam_poses:
+                rotation_axis /= np.linalg.norm(rotation_axis)
-            if cam_pose[2, 3] > min_height_z:
+                rotation_matrix = PoseUtil.rotation_matrix_from_axis_angle(rotation_axis, -angle_difference)
-                direction_vector = cam_pose[:3, 2]
+                new_cam_position_world = np.dot(rotation_matrix, cam_position_world - look_at_point_world) + look_at_point_world
-                horizontal_normal = np.array([0, 0, 1])  
+                cam_position_world = new_cam_position_world
-                cos_angle = np.dot(direction_vector, horizontal_normal) / (np.linalg.norm(direction_vector) * np.linalg.norm(horizontal_normal))
+                forward_vector = cam_position_world - look_at_point_world
-                angle = np.arccos(np.clip(cos_angle, -1.0, 1.0))  
+                forward_vector /= np.linalg.norm(forward_vector)
-                angle_degree = np.degrees(angle)
+                right_vector = np.cross(up_vector, forward_vector)
-                if angle_degree < 90 - min_cam_table_included_degree:
+                right_vector /= np.linalg.norm(right_vector)
-                    filtered_cam_poses.append(cam_pose)
+
-                if random.random() < random_view_ratio:
+                corrected_up_vector = np.cross(forward_vector, right_vector)
-                    pertube_pose = PoseUtil.get_uniform_pose([0.1, 0.1, 0.1], [3, 3, 3], 0, 180, "cm")
+                rotation_matrix = np.array([right_vector, corrected_up_vector, forward_vector]).T
-                    filtered_cam_poses.append(pertube_pose @ cam_pose)
+            else:
-                    
+                right_vector = np.cross(up_vector, forward_vector)
-        if len(filtered_cam_poses) > max_views:
+                right_vector /= np.linalg.norm(right_vector)
-            indices = np.random.choice(len(filtered_cam_poses), max_views, replace=False)
+                corrected_up_vector = np.cross(forward_vector, right_vector)
-            filtered_cam_poses = [filtered_cam_poses[i] for i in indices]
+                rotation_matrix = np.array([right_vector, corrected_up_vector, forward_vector]).T
-            
+            cam_pose = np.eye(4)
-        return np.array(filtered_cam_poses)
+            cam_pose[:3, :3] = rotation_matrix
-        
+            cam_pose[:3, 3] = cam_position_world
-    @staticmethod
+            cam_poses.append(cam_pose)
-    def sample_view_data_world_space(obj, distance_range:tuple = (0.3,0.5), voxel_size:float = 0.005, max_views: int=1, min_cam_table_included_degree:int=20, random_view_ratio:float = 0.2) -> dict:
+
-        obj_world_pose = np.asarray(obj.matrix_world)
+        filtered_cam_poses = []
-        view_data = ViewSampleUtil.sample_view_data(obj, distance_range, voxel_size, max_views)
+        for cam_pose in cam_poses:
-        view_data["cam_poses"] = ViewSampleUtil.get_cam_pose(view_data, obj_world_pose, max_views, min_cam_table_included_degree, random_view_ratio)
+            if cam_pose[2, 3] > min_height_z:
-        view_data["voxel_down_sampled_points"], view_data["normals"] =  ViewSampleUtil.get_world_points_and_normals(view_data, obj_world_pose)
+                direction_vector = cam_pose[:3, 2]
-        return view_data
+                horizontal_normal = np.array([0, 0, 1])  
-    
+                cos_angle = np.dot(direction_vector, horizontal_normal) / (np.linalg.norm(direction_vector) * np.linalg.norm(horizontal_normal))
                angle = np.arccos(np.clip(cos_angle, -1.0, 1.0))  
                angle_degree = np.degrees(angle)
                if angle_degree < 90 - min_cam_table_included_degree:
                    filtered_cam_poses.append(cam_pose)
                if random.random() < random_view_ratio:
                    pertube_pose = PoseUtil.get_uniform_pose([0.1, 0.1, 0.1], [3, 3, 3], 0, 180, "cm")
                    filtered_cam_poses.append(pertube_pose @ cam_pose)
        if len(filtered_cam_poses) > max_views:
            cam_points = np.array([cam_pose[:3, 3] for cam_pose in filtered_cam_poses])
            _, indices = PtsUtil.fps_downsample_point_cloud(cam_points, max_views, require_idx=True)
            filtered_cam_poses = [filtered_cam_poses[i] for i in indices]
        return np.array(filtered_cam_poses)
    @staticmethod
    def sample_view_data_world_space(obj, distance_range:tuple = (0.3,0.5), voxel_size:float = 0.005, max_views: int=1, min_cam_table_included_degree:int=20, random_view_ratio:float = 0.2) -> dict:
        obj_world_pose = np.asarray(obj.matrix_world)
        view_data = ViewSampleUtil.sample_view_data(obj, distance_range, voxel_size, max_views)
        view_data["cam_poses"] = ViewSampleUtil.get_cam_pose(view_data, obj_world_pose, max_views, min_cam_table_included_degree, random_view_ratio)
        view_data["voxel_down_sampled_points"], view_data["normals"] =  ViewSampleUtil.get_world_points_and_normals(view_data, obj_world_pose)
        return view_data
Author	SHA1	Message	Date
hofee	27668ebc01	solve merge	2024-11-07 19:27:22 +08:00
hofee	378fd76f9f	fix scene_info	2024-11-07 19:26:03 +08:00
hofee	4f523b20d5	upd	2024-11-05 22:01:41 +08:00
hofee	0ba46d4402	udp	2024-11-05 00:03:08 +08:00
hofee	8198515c7a	upd	2024-11-02 21:47:01 +00:00
hofee	9f0225c6d8	update renderer	2024-10-30 13:49:44 -05:00
hofee	7ac50c6523	debug	2024-10-22 23:05:13 +08:00
hofee	022fffa7c2	fix bug	2024-10-21 01:01:15 +08:00
hofee	c2849ce9bb	fix bug: view_sample	2024-10-20 23:45:44 +08:00
hofee	5eff15d408	backup	2024-10-20 01:03:32 +08:00
hofee	7d25777983	fix normal	2024-10-19 18:23:34 +08:00
hofee	1f8c017a01	optimize code structure	2024-10-18 20:46:31 +08:00
hofee	dd01b4903d	debug normal	2024-10-16 15:29:22 -05:00
hofee	7132ff83ce	add normal material	2024-10-15 11:17:26 -05:00
hofee	5905669dbd	format	2024-10-15 11:07:17 -05:00