diff --git a/blender_util.py b/blender_util.py new file mode 100644 index 0000000..ad4ef05 --- /dev/null +++ b/blender_util.py @@ -0,0 +1,323 @@ + +import os +import json +import bpy +import time +import gc +import numpy as np +import mathutils + +class BlenderUtils: + + TABLE_NAME: str = "table" + CAMERA_NAME: str = "Camera" + CAMERA_RIGHT_NAME: str = "CameraRight" + CAMERA_OBJECT_NAME: str = "CameraObject" + LIGHT_NAME: str = "Light" + DISPLAY_TABLE_NAME: str = "display_table" + MESH_FILE_NAME: str = "mesh.obj" + + @staticmethod + def get_obj_path(obj_dir, name): + return os.path.join(obj_dir, name, BlenderUtils.MESH_FILE_NAME) + + @staticmethod + def load_obj(name, mesh_path, scale=1): + print(mesh_path) + bpy.ops.wm.obj_import(filepath=mesh_path) + loaded_object = bpy.context.selected_objects[-1] + loaded_object.name = name + loaded_object.data.name = name + loaded_object.scale = (scale, scale, scale) + bpy.ops.rigidbody.object_add() + return loaded_object + + @staticmethod + def get_obj(name): + return bpy.data.objects.get(name) + + + @staticmethod + def get_obj_pose(name): + obj = BlenderUtils.get_obj(name) + return np.asarray(obj.matrix_world) + + + + @staticmethod + def add_plane(name, location, orientation, size=10): + bpy.ops.mesh.primitive_plane_add(size=size,location=location) + plane = bpy.context.selected_objects[-1] + plane.name = name + plane.rotation_euler = orientation + bpy.ops.rigidbody.object_add() + bpy.context.object.rigid_body.type = 'PASSIVE' + + @staticmethod + def add_table(table_model_path): + table = BlenderUtils.load_obj(BlenderUtils.TABLE_NAME, table_model_path, scale=0.01) + bpy.ops.rigidbody.object_add() + bpy.context.object.rigid_body.type = 'PASSIVE' + + mat = bpy.data.materials.new(name="TableYellowMaterial") + mat.diffuse_color = (1.0, 1.0, 0.0, 1.0) + if len(table.data.materials) > 0: + table.data.materials[0] = mat + else: + table.data.materials.append(mat) + + @staticmethod + def setup_scene(init_light_and_camera_config, table_model_path, binocular_vision): + bpy.context.scene.render.engine = 'BLENDER_WORKBENCH' + bpy.context.scene.display.shading.light = 'FLAT' + bpy.context.scene.display.shading.color_type = 'MATERIAL' + bpy.context.scene.display.shading.show_xray = False + bpy.context.scene.display.shading.use_dof = False + bpy.context.scene.display.render_aa = 'OFF' + bpy.context.scene.view_settings.view_transform = 'Standard' + + BlenderUtils.init_light_and_camera(init_light_and_camera_config, binocular_vision) + + BlenderUtils.add_plane("plane_floor", location=(0,0,0), orientation=(0,0,0)) + BlenderUtils.add_plane("plane_ceil", location=(0,0,10), orientation=(0,0,0)) + BlenderUtils.add_plane("plane_wall_1", location=(5,0,5), orientation=(0,np.pi/2,0)) + BlenderUtils.add_plane("plane_wall_2", location=(-5,0,5), orientation=(0,np.pi/2,0)) + BlenderUtils.add_plane("plane_wall_3", location=(0,5,5), orientation=(np.pi/2,0,0)) + BlenderUtils.add_plane("plane_wall_4", location=(0,-5,5), orientation=(np.pi/2,0,0)) + + BlenderUtils.add_table(table_model_path) + + @staticmethod + def set_light_params(light, config): + light.location = config["location"] + light.rotation_euler = config["orientation"] + if light.type == 'SUN': + light.data.energy = config["power"] + elif light.type == 'POINT': + light.data.energy = config["power"] + + @staticmethod + def set_camera_params(camera, config, binocular_vision): + + camera_object = bpy.data.objects.new(BlenderUtils.CAMERA_OBJECT_NAME, None) + bpy.context.collection.objects.link(camera_object) + cameras = [bpy.data.objects.get("Camera")] + camera.location = [0,0,0] + camera.rotation_euler = [0,0,0] + camera.parent = camera_object + if binocular_vision: + left_camera = cameras[0] + right_camera = left_camera.copy() + right_camera.name = BlenderUtils.CAMERA_RIGHT_NAME + right_camera.data = left_camera.data.copy() + right_camera.data.name = BlenderUtils.CAMERA_RIGHT_NAME + bpy.context.collection.objects.link(right_camera) + right_camera.parent = camera_object + right_camera.location = [config["eye_distance"]/2, 0, 0] + left_camera.location = [-config["eye_distance"]/2, 0, 0] + binocular_angle = config["eye_angle"] + half_angle = np.radians(binocular_angle / 2) + + left_camera.rotation_euler[1] = -half_angle + right_camera.rotation_euler[1] = half_angle + cameras.append(right_camera) + + for camera in cameras: + camera.data.clip_start = config["near_plane"] + camera.data.clip_end = config["far_plane"] + + bpy.context.scene.render.resolution_x = config["resolution"][0] + bpy.context.scene.render.resolution_y = config["resolution"][1] + sensor_height = 24.0 + focal_length = sensor_height / (2 * np.tan(np.radians(config["fov_vertical"]) / 2)) + camera.data.lens = focal_length + camera.data.sensor_width = sensor_height * config["resolution"][0] / config["resolution"][1] + camera.data.sensor_height = sensor_height + + @staticmethod + def init_light_and_camera(init_light_and_camera_config, binocular_vision): + + camera = BlenderUtils.get_obj(BlenderUtils.CAMERA_NAME) + light = BlenderUtils.get_obj(BlenderUtils.LIGHT_NAME) + BlenderUtils.set_camera_params(camera, init_light_and_camera_config[BlenderUtils.CAMERA_NAME], binocular_vision) + BlenderUtils.set_light_params(light, init_light_and_camera_config[BlenderUtils.LIGHT_NAME]) + + @staticmethod + def get_obj_diag(name): + obj = BlenderUtils.get_obj(name) + return np.linalg.norm(obj.dimensions) + + @staticmethod + def matrix_to_blender_pose(matrix): + location = matrix[:3, 3] + rotation_matrix = matrix[:3, :3] + rotation_matrix_blender = mathutils.Matrix(rotation_matrix.tolist()) + rotation_euler = rotation_matrix_blender.to_euler() + return location, rotation_euler + + @staticmethod + def set_camera_at(pose): + camera = BlenderUtils.get_obj(BlenderUtils.CAMERA_OBJECT_NAME) + location, rotation_euler = BlenderUtils.matrix_to_blender_pose(pose) + + camera.location = location + camera.rotation_euler = rotation_euler + + @staticmethod + def get_object_bottom_z(obj): + vertices = [v.co for v in obj.data.vertices] + vertices_world = [obj.matrix_world @ v for v in vertices] + min_z = min([v.z for v in vertices_world]) + return min_z + + @staticmethod + def render_and_save(output_dir, file_name, binocular_vision=False): + target_cameras = [BlenderUtils.CAMERA_NAME] + if binocular_vision: + target_cameras.append(BlenderUtils.CAMERA_RIGHT_NAME) + + for cam_name in target_cameras: + # Set the current camera + bpy.context.scene.camera = BlenderUtils.get_obj(cam_name) + bpy.context.scene.view_layers["ViewLayer"].use_pass_z = True + cam_suffix = "L" if cam_name == BlenderUtils.CAMERA_NAME else "R" + scene = bpy.context.scene + scene.render.filepath = "" + + mask_dir = os.path.join(output_dir, "mask") + if not os.path.exists(mask_dir): + os.makedirs(mask_dir) + + # Modify the file name based on the camera + + scene.render.filepath = os.path.join(output_dir, mask_dir, f"{file_name}_{cam_suffix}.png") + scene.render.image_settings.color_depth = '8' + scene.render.resolution_percentage = 100 + scene.render.use_overwrite = False + scene.render.use_file_extension = False + scene.render.use_placeholder = False + + scene.use_nodes = True + tree = scene.node_tree + + for node in tree.nodes: + tree.nodes.remove(node) + + rl = tree.nodes.new('CompositorNodeRLayers') + + map_range = tree.nodes.new('CompositorNodeMapRange') + map_range.inputs['From Min'].default_value = 0.01 + map_range.inputs['From Max'].default_value = 5 + map_range.inputs['To Min'].default_value = 0 + map_range.inputs['To Max'].default_value = 1 + tree.links.new(rl.outputs['Depth'], map_range.inputs[0]) + + output_depth = tree.nodes.new('CompositorNodeOutputFile') + + depth_dir = os.path.join(output_dir, "depth") + if not os.path.exists(depth_dir): + os.makedirs(depth_dir) + output_depth.base_path = depth_dir + output_depth.file_slots[0].path = f"{file_name}_{cam_suffix}.####" + output_depth.format.file_format = 'PNG' + output_depth.format.color_mode = 'BW' + output_depth.format.color_depth = '16' + + tree.links.new(map_range.outputs[0], output_depth.inputs[0]) + + bpy.ops.render.render(write_still=True) + msg = "success" + return msg + + @staticmethod + def save_cam_params(scene_dir, idx, binocular_vision=False): + camera = BlenderUtils.get_obj(BlenderUtils.CAMERA_NAME) + extrinsic = np.array(camera.matrix_world) + cam_data = camera.data + focal_length = cam_data.lens + sensor_width = cam_data.sensor_width + sensor_height = cam_data.sensor_height + resolution_x = bpy.context.scene.render.resolution_x + resolution_y = bpy.context.scene.render.resolution_y + intrinsic = np.zeros((3, 3)) + intrinsic[0, 0] = focal_length * resolution_x / sensor_width # fx + intrinsic[1, 1] = focal_length * resolution_y / sensor_height # fy + intrinsic[0, 2] = resolution_x / 2.0 # cx + intrinsic[1, 2] = resolution_y / 2.0 # cy + intrinsic[2, 2] = 1.0 + cam_object = BlenderUtils.get_obj(BlenderUtils.CAMERA_OBJECT_NAME) + extrinsic_cam_object = np.array(cam_object.matrix_world) + data = { + "extrinsic": extrinsic.tolist(), + "extrinsic_cam_object": extrinsic_cam_object.tolist(), + "intrinsic": intrinsic.tolist(), + "far_plane": camera.data.clip_end, + "near_plane": camera.data.clip_start, + } + if binocular_vision: + right_camera = BlenderUtils.get_obj(BlenderUtils.CAMERA_RIGHT_NAME) + extrinsic_right = np.array(right_camera.matrix_world) + print("result:",extrinsic_right) + + data["extrinsic_R"] = extrinsic_right.tolist() + + cam_params_dir = os.path.join(scene_dir, "camera_params") + if not os.path.exists(cam_params_dir): + os.makedirs(cam_params_dir) + cam_params_path = os.path.join(cam_params_dir, f"{idx}.json") + with open(cam_params_path, "w") as f: + json.dump(data, f, indent=4) + + @staticmethod + def reset_objects_and_platform(): + all_objects = bpy.data.objects + keep_objects = {"plane_floor", "plane_ceil", "plane_wall_1", "plane_wall_2", "plane_wall_3", "plane_wall_4"} + keep_objects.add(BlenderUtils.CAMERA_OBJECT_NAME) + keep_objects.add(BlenderUtils.CAMERA_NAME) + keep_objects.add(BlenderUtils.CAMERA_RIGHT_NAME) + keep_objects.add(BlenderUtils.LIGHT_NAME) + keep_objects.add(BlenderUtils.TABLE_NAME) + + for obj in all_objects: + if obj.name not in keep_objects: + bpy.data.objects.remove(obj, do_unlink=True) + + for block in bpy.data.meshes: + if block.users == 0: + bpy.data.meshes.remove(block) + for block in bpy.data.materials: + if block.users == 0: + bpy.data.materials.remove(block) + for block in bpy.data.images: + if block.users == 0: + bpy.data.images.remove(block) + + gc.collect() + bpy.context.scene.frame_set(0) + + @staticmethod + def save_scene_info(scene_root_dir, display_table_config, target_name): + all_objects = bpy.data.objects + no_save_objects = {"plane_floor", "plane_ceil", "plane_wall_1", "plane_wall_2", "plane_wall_3", "plane_wall_4"} + no_save_objects.add(BlenderUtils.CAMERA_OBJECT_NAME) + no_save_objects.add(BlenderUtils.CAMERA_NAME) + no_save_objects.add(BlenderUtils.CAMERA_RIGHT_NAME) + no_save_objects.add(BlenderUtils.LIGHT_NAME) + no_save_objects.add(BlenderUtils.TABLE_NAME) + scene_info = {} + for obj in all_objects: + if obj.name not in no_save_objects and obj.name != BlenderUtils.DISPLAY_TABLE_NAME: + obj_info = { + "location": list(obj.location), + "rotation_euler": list(obj.rotation_euler), + "scale": list(obj.scale) + } + scene_info[obj.name] = obj_info + scene_info[BlenderUtils.DISPLAY_TABLE_NAME] = display_table_config + scene_info["target_name"] = target_name + scene_info_path = os.path.join(scene_root_dir, "scene_info.json") + with open(scene_info_path, "w") as outfile: + json.dump(scene_info, outfile) + + + \ No newline at end of file diff --git a/data_generator.py b/data_generator.py new file mode 100644 index 0000000..ff54d26 --- /dev/null +++ b/data_generator.py @@ -0,0 +1,345 @@ + +import os +import random +import math +import bpy +import numpy as np +import mathutils +import requests +from blender.blender_util import BlenderUtils +from blender.view_sample_util import ViewSampleUtil + +class DataGenerator: + def __init__(self, config): + self.plane_size = config["runner"]["generate"]["plane_size"] + self.table_model_path = config["runner"]["generate"]["table_model_path"] + self.output_dir = config["runner"]["generate"]["output_dir"] + self.random_config = config["runner"]["generate"]["random_config"] + self.light_and_camera_config = config["runner"]["generate"]["light_and_camera_config"] + self.obj_dir = config["runner"]["generate"]["object_dir"] + self.max_views = config["runner"]["generate"]["max_views"] + self.min_views = config["runner"]["generate"]["min_views"] + self.min_diag = config["runner"]["generate"]["min_diag"] + self.max_diag = config["runner"]["generate"]["max_diag"] + self.binocular_vision = config["runner"]["generate"]["binocular_vision"] + self.set_status_path = "http://localhost:5000/project/set_status" + self.log_path = "http://localhost:5000/project/add_log" + self.obj_name_list = os.listdir(self.obj_dir) + self.target_obj = None + self.stopped = False + self.random_obj_list = [] + self.display_table_config = {} + BlenderUtils.setup_scene(self.light_and_camera_config, self.table_model_path, self.binocular_vision) + self.table = BlenderUtils.get_obj(BlenderUtils.TABLE_NAME) + self.access = self._check_set_status_access(self.set_status_path) + print(self.access) + + def _check_set_status_access(self, url): + try: + response = requests.get(url, timeout=5) + return True + except requests.RequestException as e: + print(f"Cannot access {url}: {e}") + return False + + def set_status(self, key, value): + if not self.access: + return + request_data = {} + request_data["status"] = { + "app_name" : "generate_view", + "runner_name" : "view_generator", + "key": key, + "value": value + } + requests.post(self.set_status_path, json=request_data) + + def set_progress(self, key, curr_value, max_value): + if not self.access: + return + request_data = {} + request_data["progress"] = { + "app_name" : "generate_view", + "runner_name" : "view_generator", + "key": key, + "curr_value": curr_value, + "max_value": max_value + } + requests.post(self.set_status_path, json=request_data) + + def add_log(self, msg, log_type): + if not self.access: + return + request_data = {"log":{}} + request_data["log"]["message"] = msg + request_data["log"]["log_type"] = log_type + requests.post(self.log_path, json=request_data) + + def generate_display_platform(self): + config = self.random_config[BlenderUtils.DISPLAY_TABLE_NAME] + + height = random.uniform(config["min_height"], config["max_height"]) + radius = random.uniform(config["min_radius"], config["max_radius"]) + R = random.uniform(config["min_R"], config["max_R"]) + G = random.uniform(config["min_G"], config["max_G"]) + B = random.uniform(config["min_B"], config["max_B"]) + while height > 0.5 * radius: + height = random.uniform(config["min_height"], config["max_height"]) + + bpy.ops.mesh.primitive_cylinder_add(radius=radius, depth=height) + platform = bpy.context.selected_objects[-1] + platform.name = BlenderUtils.DISPLAY_TABLE_NAME + + bbox = self.table.bound_box + bbox_world = [self.table.matrix_world @ mathutils.Vector(corner) for corner in bbox] + table_top_z = max([v.z for v in bbox_world]) + + platform.location = (0, 0, table_top_z + height / 2) + + bpy.ops.rigidbody.object_add() + bpy.context.object.rigid_body.type = 'PASSIVE' + bpy.ops.object.shade_auto_smooth() + + mat = bpy.data.materials.new(name="RedMaterial") + mat.diffuse_color = (1.0, 0.0, 0.0, 1.0) # Red with full alpha (1.0) + if len(platform.data.materials) > 0: + platform.data.materials[0] = mat + else: + platform.data.materials.append(mat) + + self.display_table_config = { + "height": height, + "radius": radius, + "R": R, + "G": G, + "B": B, + "location": list(platform.location) + } + return platform + + def put_display_object(self, name): + config = self.random_config["display_object"] + + x = random.uniform(config["min_x"], config["max_x"]) + y = random.uniform(config["min_y"], config["max_y"]) + z = random.uniform(config["min_z"], config["max_z"]) + + if random.random() <= config["random_rotation_ratio"]: + rotation = ( + random.uniform(0, 2*np.pi), + random.uniform(0, 2*np.pi), + random.uniform(0, 2*np.pi) + ) + else: + rotation = (0, 0, 0) + z=0.05 + + platform_bbox = self.platform.bound_box + platform_bbox_world = [self.platform.matrix_world @ mathutils.Vector(corner) for corner in platform_bbox] + platform_top_z = max([v.z for v in platform_bbox_world]) + + obj_mesh_path = BlenderUtils.get_obj_path(self.obj_dir,name) + obj = BlenderUtils.load_obj(name, obj_mesh_path) + + obj_bottom_z = BlenderUtils.get_object_bottom_z(obj) + offset_z = obj_bottom_z + + obj.rotation_euler = rotation + obj.location = (x, y, platform_top_z - offset_z + z) + + bpy.ops.rigidbody.object_add() + bpy.context.object.rigid_body.type = 'ACTIVE' + mat = bpy.data.materials.new(name="GreenMaterial") + mat.diffuse_color = (0.0, 1.0, 0.0, 1.0) # Green with full alpha (1.0) + if len(obj.data.materials) > 0: + obj.data.materials[0] = mat + else: + obj.data.materials.append(mat) + self.target_obj = obj + + + def put_random_objects_on_table(self): + num_objects = self.random_config["random_objects"]["num"] + cluster = self.random_config["random_objects"]["cluster"] + for _ in range(num_objects): + obj_name = random.choice(self.obj_name_list) + print(obj_name) + obj_mesh_path = BlenderUtils.get_obj_path(self.obj_dir, obj_name) + obj = BlenderUtils.load_obj(obj_name, obj_mesh_path) + + bbox = self.table.bound_box + bbox_world = [self.table.matrix_world @ mathutils.Vector(corner) for corner in bbox] + table_top_z = max([v.z for v in bbox_world]) + + platform_radius = self.platform.dimensions.x / 2.0 + + try_times = 0 + while True: + x = random.uniform(bbox_world[0].x*cluster, bbox_world[6].x*cluster) + y = random.uniform(bbox_world[0].y*cluster, bbox_world[6].y*cluster) + if math.sqrt(x**2 + y**2) > platform_radius*2 or try_times > 10: + break + try_times += 1 + if try_times > 10: + continue + + rotation = ( + random.uniform(0, 2 * np.pi), + random.uniform(0, 2 * np.pi), + random.uniform(0, 2 * np.pi) + ) + + obj_bottom_z = BlenderUtils.get_object_bottom_z(obj) + offset_z = obj_bottom_z + + obj.rotation_euler = rotation + obj.location = (x, y, table_top_z - offset_z) + + bpy.ops.rigidbody.object_add() + bpy.context.object.rigid_body.type = 'ACTIVE' + self.random_obj_list.append(obj) + + def reset(self): + self.target_obj = None + self.random_obj_list = [] + BlenderUtils.reset_objects_and_platform() + + def check_moving_objects(self, previous_locations): + threshold = 0.01 + moving_objects = False + target_checking_object = [self.target_obj] + self.random_obj_list + for obj in target_checking_object: + if obj.rigid_body: + current_location = obj.location + location_diff = (current_location - previous_locations[obj.name]).length + if location_diff > threshold: + moving_objects = True + break + return moving_objects + + def check_and_adjust_target(self): + target_position = self.target_obj.matrix_world.translation + msg = "success" + if abs(target_position[0]) > self.random_config["display_object"]["max_x"]: + target_position[0] = np.sign(target_position[0]) * self.random_config["display_object"]["max_x"]*random.uniform(-0.5,0.5) + msg = "adjusted" + if abs(target_position[1]) > self.random_config["display_object"]["max_y"]: + target_position[1] = np.sign(target_position[1]) * self.random_config["display_object"]["max_y"]*random.uniform(-0.5,0.5) + msg = "adjusted" + if target_position[2] < 0.85: + target_position[2] = target_position[2] + 0.1 + msg = "adjusted" + self.target_obj.location = target_position + return msg + + def start_render(self, diag=0): + object_name = self.target_obj.name + if "." in object_name: + object_name = object_name.split(".")[0] + scene_dir = os.path.join(self.output_dir, object_name) + if not os.path.exists(scene_dir): + os.makedirs(scene_dir) + view_num = int(self.min_views + (diag - self.min_diag)/(self.max_diag - self.min_diag) * (self.max_views - self.min_views)) + view_data = ViewSampleUtil.sample_view_data_world_space(self.target_obj, distance_range=(0.2,0.4), voxel_size=0.005, max_views=view_num) + object_points = np.array(view_data["voxel_down_sampled_points"]) + normals = np.array(view_data["normals"]) + points_normals = np.concatenate((object_points, normals), axis=1) + + np.savetxt(os.path.join(scene_dir, "points_and_normals.txt"), points_normals) + for i, cam_pose in enumerate(view_data["cam_poses"]): + BlenderUtils.set_camera_at(cam_pose) + BlenderUtils.render_and_save(scene_dir, f"{i}", binocular_vision=self.binocular_vision) + BlenderUtils.save_cam_params(scene_dir, i, binocular_vision=self.binocular_vision) + self.set_progress("render frame", i, len(view_data["cam_poses"])) + self.set_progress("render frame", len(view_data["cam_poses"]), len(view_data["cam_poses"])) + BlenderUtils.save_scene_info(scene_dir, self.display_table_config, object_name) + depth_dir = os.path.join(scene_dir, "depth") + for depth_file in os.listdir(depth_dir): + if not depth_file.endswith(".png"): + name, _ = os.path.splitext(depth_file) + file_path = os.path.join(depth_dir, depth_file) + new_file_path = os.path.join(depth_dir, f"{name}.png") + os.rename(file_path,new_file_path) + return True + + def simulate_scene(self, frame_limit=120, depth = 0, diag = 0): + bpy.context.view_layer.update() + bpy.ops.screen.animation_play() + previous_locations = {obj.name: obj.matrix_world.translation.copy() for obj in bpy.context.scene.objects if obj.rigid_body} + frame_count = 1 + moving_objects = True + while frame_count < frame_limit: + bpy.context.view_layer.update() + if frame_count%10 == 0: + moving_objects = self.check_moving_objects(previous_locations) + if not moving_objects: + break + frame_count += 1 + bpy.context.scene.frame_set(bpy.context.scene.frame_current + 1) + + previous_locations = {obj.name: obj.matrix_world.translation.copy() for obj in bpy.context.scene.objects if obj.rigid_body} + + bpy.ops.screen.animation_cancel(restore_frame=False) + + msg = self.check_and_adjust_target() + + if msg == "adjusted" and depth < 3: + bpy.context.view_layer.update() + bpy.context.scene.frame_set(0) + return self.simulate_scene(depth = depth + 1, diag=diag) + elif msg == "success": + print("Scene generation completed.") + result = self.start_render(diag=diag) + if not result: + msg = "fail" + return msg + return "retry" + + def gen_scene_data(self, object_name): + + bpy.context.scene.frame_set(0) + self.platform = self.generate_display_platform() + self.put_display_object(object_name) + diag = BlenderUtils.get_obj_diag(self.target_obj.name) + self.set_status("target_diagonal", diag) + if diag > self.max_diag or diag < self.min_diag: + self.add_log(f"The diagonal size of the object <{object_name}>(size: {round(diag,3)}) does not meet the requirements.", "error") + return "diag_error" + self.put_random_objects_on_table() + + return self.simulate_scene(diag=diag) + + + def gen_all_scene_data(self): + max_retry_times = 3 + total = len(self.obj_name_list) + count = 0 + count_success = 0 + self.set_progress("generate scene", 0, total) + result = "retry" + for target_obj_name in self.obj_name_list: + self.add_log(f"Generating scene for object <{target_obj_name}>", "info") + retry_times = 0 + self.set_status("target_object", target_obj_name) + while retry_times < 3 and result == "retry": + self.reset() + try: + result = self.gen_scene_data(target_obj_name) + except Exception as e: + self.add_log(f"Uknown error: {e}", "error") + result = "unknown_error" + if result == "retry": + retry_times += 1 + self.add_log(f"Maximum adjust times, retrying <{target_obj_name}>. ({retry_times}/{max_retry_times}) ", "warning") + count += 1 + if result == "success": + count_success += 1 + self.add_log(f"Scene for object <{target_obj_name}> generated successfully", "success") + if result == "retry" and retry_times >= max_retry_times: + self.add_log(f"Maximum retries, failed to generate scene for object <{target_obj_name}>", "error") + self.set_status("success", count_success) + self.set_status("fail", count - count_success) + self.set_progress("generate scene", count, total) + result = "retry" + + \ No newline at end of file diff --git a/data_load.py b/data_load.py new file mode 100644 index 0000000..ffc0138 --- /dev/null +++ b/data_load.py @@ -0,0 +1,265 @@ +import os +import numpy as np +import json +import cv2 +import trimesh +from pts import PtsUtil + +class DataLoadUtil: + + @staticmethod + def get_path(root, scene_name, frame_idx): + path = os.path.join(root, scene_name, f"{frame_idx}") + return path + + @staticmethod + def get_label_path(root, scene_name): + path = os.path.join(root,scene_name, f"label.json") + return path + + @staticmethod + def get_sampled_model_points_path(root, scene_name): + path = os.path.join(root,scene_name, f"sampled_model_points.txt") + return path + + @staticmethod + def get_scene_seq_length(root, scene_name): + camera_params_path = os.path.join(root, scene_name, "camera_params") + return len(os.listdir(camera_params_path)) + + @staticmethod + def load_downsampled_world_model_points(root, scene_name): + model_path = DataLoadUtil.get_sampled_model_points_path(root, scene_name) + model_points = np.loadtxt(model_path) + return model_points + + @staticmethod + def save_downsampled_world_model_points(root, scene_name, model_points): + model_path = DataLoadUtil.get_sampled_model_points_path(root, scene_name) + np.savetxt(model_path, model_points) + + @staticmethod + def load_mesh_at(model_dir, object_name, world_object_pose): + model_path = os.path.join(model_dir, object_name, "mesh.obj") + mesh = trimesh.load(model_path) + mesh.apply_transform(world_object_pose) + return mesh + + @staticmethod + def get_bbox_diag(model_dir, object_name): + model_path = os.path.join(model_dir, object_name, "mesh.obj") + mesh = trimesh.load(model_path) + bbox = mesh.bounding_box.extents + diagonal_length = np.linalg.norm(bbox) + return diagonal_length + + + @staticmethod + def save_mesh_at(model_dir, output_dir, object_name, scene_name, world_object_pose): + mesh = DataLoadUtil.load_mesh_at(model_dir, object_name, world_object_pose) + model_path = os.path.join(output_dir, scene_name, "world_mesh.obj") + mesh.export(model_path) + + @staticmethod + def save_target_mesh_at_world_space(root, model_dir, scene_name): + scene_info = DataLoadUtil.load_scene_info(root, scene_name) + target_name = scene_info["target_name"] + transformation = scene_info[target_name] + location = transformation["location"] + rotation_euler = transformation["rotation_euler"] + pose_mat = trimesh.transformations.euler_matrix(*rotation_euler) + pose_mat[:3, 3] = location + + mesh = DataLoadUtil.load_mesh_at(model_dir, target_name, pose_mat) + mesh_dir = os.path.join(root, scene_name, "mesh") + if not os.path.exists(mesh_dir): + os.makedirs(mesh_dir) + model_path = os.path.join(mesh_dir, "world_target_mesh.obj") + mesh.export(model_path) + + @staticmethod + def load_scene_info(root, scene_name): + scene_info_path = os.path.join(root, scene_name, "scene_info.json") + with open(scene_info_path, "r") as f: + scene_info = json.load(f) + return scene_info + + @staticmethod + def load_target_object_pose(root, scene_name): + scene_info = DataLoadUtil.load_scene_info(root, scene_name) + target_name = scene_info["target_name"] + transformation = scene_info[target_name] + location = transformation["location"] + rotation_euler = transformation["rotation_euler"] + pose_mat = trimesh.transformations.euler_matrix(*rotation_euler) + pose_mat[:3, 3] = location + return pose_mat + + @staticmethod + def load_depth(path, min_depth=0.01,max_depth=5.0,binocular=False): + + def load_depth_from_real_path(real_path, min_depth, max_depth): + depth = cv2.imread(real_path, cv2.IMREAD_UNCHANGED) + depth = depth.astype(np.float32) / 65535.0 + min_depth = min_depth + max_depth = max_depth + depth_meters = min_depth + (max_depth - min_depth) * depth + return depth_meters + + if binocular: + depth_path_L = os.path.join(os.path.dirname(path), "depth", os.path.basename(path) + "_L.png") + depth_path_R = os.path.join(os.path.dirname(path), "depth", os.path.basename(path) + "_R.png") + depth_meters_L = load_depth_from_real_path(depth_path_L, min_depth, max_depth) + depth_meters_R = load_depth_from_real_path(depth_path_R, min_depth, max_depth) + return depth_meters_L, depth_meters_R + else: + depth_path = os.path.join(os.path.dirname(path), "depth", os.path.basename(path) + ".png") + depth_meters = load_depth_from_real_path(depth_path, min_depth, max_depth) + return depth_meters + + @staticmethod + def load_seg(path, binocular=False): + if binocular: + def clean_mask(mask_image): + green = [0, 255, 0, 255] + red = [255, 0, 0, 255] + threshold = 2 + mask_image = np.where(np.abs(mask_image - green) <= threshold, green, mask_image) + mask_image = np.where(np.abs(mask_image - red) <= threshold, red, mask_image) + return mask_image + mask_path_L = os.path.join(os.path.dirname(path), "mask", os.path.basename(path) + "_L.png") + mask_image_L = clean_mask(cv2.imread(mask_path_L, cv2.IMREAD_UNCHANGED)) + mask_path_R = os.path.join(os.path.dirname(path), "mask", os.path.basename(path) + "_R.png") + mask_image_R = clean_mask(cv2.imread(mask_path_R, cv2.IMREAD_UNCHANGED)) + return mask_image_L, mask_image_R + else: + mask_path = os.path.join(os.path.dirname(path), "mask", os.path.basename(path) + ".png") + mask_image = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE) + return mask_image + + @staticmethod + def load_label(path): + with open(path, 'r') as f: + label_data = json.load(f) + return label_data + + @staticmethod + def load_rgb(path): + rgb_path = os.path.join(os.path.dirname(path), "rgb", os.path.basename(path) + ".png") + rgb_image = cv2.imread(rgb_path, cv2.IMREAD_COLOR) + return rgb_image + + @staticmethod + def cam_pose_transformation(cam_pose_before): + offset = np.asarray([ + [1, 0, 0, 0], + [0, -1, 0, 0], + [0, 0, -1, 0], + [0, 0, 0, 1]]) + cam_pose_after = cam_pose_before @ offset + return cam_pose_after + + @staticmethod + def load_cam_info(path, binocular=False): + camera_params_path = os.path.join(os.path.dirname(path), "camera_params", os.path.basename(path) + ".json") + with open(camera_params_path, 'r') as f: + label_data = json.load(f) + cam_to_world = np.asarray(label_data["extrinsic"]) + cam_to_world = DataLoadUtil.cam_pose_transformation(cam_to_world) + cam_intrinsic = np.asarray(label_data["intrinsic"]) + cam_info = { + "cam_to_world": cam_to_world, + "cam_intrinsic": cam_intrinsic, + "far_plane": label_data["far_plane"], + "near_plane": label_data["near_plane"] + } + if binocular: + cam_to_world_R = np.asarray(label_data["extrinsic_R"]) + cam_to_world_R = DataLoadUtil.cam_pose_transformation(cam_to_world_R) + cam_info["cam_to_world_R"] = cam_to_world_R + return cam_info + + @staticmethod + def get_target_point_cloud(depth, cam_intrinsic, cam_extrinsic, mask, target_mask_label=(0,255,0,255)): + h, w = depth.shape + i, j = np.meshgrid(np.arange(w), np.arange(h), indexing='xy') + + z = depth + x = (i - cam_intrinsic[0, 2]) * z / cam_intrinsic[0, 0] + y = (j - cam_intrinsic[1, 2]) * z / cam_intrinsic[1, 1] + + points_camera = np.stack((x, y, z), axis=-1).reshape(-1, 3) + mask = mask.reshape(-1,4) + + target_mask = (mask == target_mask_label).all(axis=-1) + + target_points_camera = points_camera[target_mask] + target_points_camera_aug = np.concatenate([target_points_camera, np.ones((target_points_camera.shape[0], 1))], axis=-1) + + target_points_world = np.dot(cam_extrinsic, target_points_camera_aug.T).T[:, :3] + return { + "points_world": target_points_world, + "points_camera": target_points_camera + } + + @staticmethod + def get_point_cloud(depth, cam_intrinsic, cam_extrinsic): + h, w = depth.shape + i, j = np.meshgrid(np.arange(w), np.arange(h), indexing='xy') + + z = depth + x = (i - cam_intrinsic[0, 2]) * z / cam_intrinsic[0, 0] + y = (j - cam_intrinsic[1, 2]) * z / cam_intrinsic[1, 1] + + points_camera = np.stack((x, y, z), axis=-1).reshape(-1, 3) + points_camera_aug = np.concatenate([points_camera, np.ones((points_camera.shape[0], 1))], axis=-1) + + points_world = np.dot(cam_extrinsic, points_camera_aug.T).T[:, :3] + return { + "points_world": points_world, + "points_camera": points_camera + } + + @staticmethod + def get_target_point_cloud_world_from_path(path, binocular=False, random_downsample_N=65536, voxel_size = 0.005, target_mask_label=(0,255,0,255)): + cam_info = DataLoadUtil.load_cam_info(path, binocular=binocular) + if binocular: + depth_L, depth_R = DataLoadUtil.load_depth(path, cam_info['near_plane'], cam_info['far_plane'], binocular=True) + mask_L, mask_R = DataLoadUtil.load_seg(path, binocular=True) + point_cloud_L = DataLoadUtil.get_target_point_cloud(depth_L, cam_info['cam_intrinsic'], cam_info['cam_to_world'], mask_L, target_mask_label)['points_world'] + point_cloud_R = DataLoadUtil.get_target_point_cloud(depth_R, cam_info['cam_intrinsic'], cam_info['cam_to_world_R'], mask_R, target_mask_label)['points_world'] + point_cloud_L = PtsUtil.random_downsample_point_cloud(point_cloud_L, random_downsample_N) + point_cloud_R = PtsUtil.random_downsample_point_cloud(point_cloud_R, random_downsample_N) + overlap_points = DataLoadUtil.get_overlapping_points(point_cloud_L, point_cloud_R, voxel_size) + return overlap_points + else: + depth = DataLoadUtil.load_depth(path, cam_info['near_plane'], cam_info['far_plane']) + mask = DataLoadUtil.load_seg(path) + point_cloud = DataLoadUtil.get_target_point_cloud(depth, cam_info['cam_intrinsic'], cam_info['cam_to_world'], mask)['points_world'] + return point_cloud + + + @staticmethod + def voxelize_points(points, voxel_size): + + voxel_indices = np.floor(points / voxel_size).astype(np.int32) + unique_voxels = np.unique(voxel_indices, axis=0, return_inverse=True) + return unique_voxels + + @staticmethod + def get_overlapping_points(point_cloud_L, point_cloud_R, voxel_size=0.005): + voxels_L, indices_L = DataLoadUtil.voxelize_points(point_cloud_L, voxel_size) + voxels_R, _ = DataLoadUtil.voxelize_points(point_cloud_R, voxel_size) + + voxel_indices_L = voxels_L.view([('', voxels_L.dtype)]*3) + voxel_indices_R = voxels_R.view([('', voxels_R.dtype)]*3) + overlapping_voxels = np.intersect1d(voxel_indices_L, voxel_indices_R) + mask_L = np.isin(indices_L, np.where(np.isin(voxel_indices_L, overlapping_voxels))[0]) + overlapping_points = point_cloud_L[mask_L] + return overlapping_points + + @staticmethod + def load_points_normals(root, scene_name): + points_path = os.path.join(root, scene_name, "points_and_normals.txt") + points_normals = np.loadtxt(points_path) + return points_normals \ No newline at end of file diff --git a/data_renderer.py b/data_renderer.py new file mode 100644 index 0000000..86ff3e3 --- /dev/null +++ b/data_renderer.py @@ -0,0 +1,137 @@ +import os +import bpy +import sys +import json +import mathutils +import numpy as np +sys.path.append(os.path.dirname(os.path.abspath(__file__))) +from blender_util import BlenderUtils + + +class DataRenderer: + def __init__(self): + config = { + "renderer": { + "generate": + { + "object_dir": "/media/hofee/data/data/scaled_object_meshes", + "table_model_path": "/media/hofee/data/data/others/table.obj", + "plane_size": 10, + "binocular_vision": True, + "light_and_camera_config":{ + "Camera": { + "near_plane": 0.01, + "far_plane": 5, + "fov_vertical": 25, + "resolution": [1280, 800], + "eye_distance": 0.15, + "eye_angle": 25 + }, + "Light": { + "location": (0,0,3.5), + "orientation": (0, 0, 0), + "power": 150 + } + } + } + } + } + + self.plane_size = config["renderer"]["generate"]["plane_size"] + self.table_model_path = config["renderer"]["generate"]["table_model_path"] + self.light_and_camera_config = config["renderer"]["generate"]["light_and_camera_config"] + self.obj_dir = config["renderer"]["generate"]["object_dir"] + self.binocular_vision = config["renderer"]["generate"]["binocular_vision"] + self.obj_name_list = os.listdir(self.obj_dir) + self.target_obj = None + self.random_obj_list = [] + + BlenderUtils.setup_scene(self.light_and_camera_config, self.table_model_path, binocular_vision=self.binocular_vision) + self.table = BlenderUtils.get_obj(BlenderUtils.TABLE_NAME) + print(BlenderUtils.get_obj(BlenderUtils.CAMERA_OBJECT_NAME)) + + def reset(self): + self.target_obj = None + self.random_obj_list = [] + BlenderUtils.reset_objects_and_platform() + + def do_render(self, cam_pose, restore_info, temp_dir): + + self.reset() + self.restore_scene(restore_info=restore_info) + object_name = self.target_obj.name + temp_file_name = f"tmp" + if "." in object_name: + object_name = object_name.split(".")[0] + BlenderUtils.set_camera_at(cam_pose) + BlenderUtils.render_and_save(temp_dir, temp_file_name, binocular_vision=self.binocular_vision) + + + def restore_scene(self, restore_info): + + for obj_name, obj_info in restore_info.items(): + print("restoring", obj_name) + if obj_name == BlenderUtils.DISPLAY_TABLE_NAME: + self.restore_display_platform(obj_info) + else: + if obj_name == "target_name": + continue + if obj_name == restore_info["target_name"]: + obj_mesh_path = BlenderUtils.get_obj_path(self.obj_dir, obj_name) + obj = BlenderUtils.load_obj(obj_name, obj_mesh_path) + obj.location = mathutils.Vector(obj_info["location"]) + obj.rotation_euler = mathutils.Vector(obj_info["rotation_euler"]) + obj.scale = mathutils.Vector(obj_info["scale"]) + mat = bpy.data.materials.new(name="GreenMaterial") + mat.diffuse_color = (0.0, 1.0, 0.0, 1.0) # Green with full alpha (1.0) + if len(obj.data.materials) > 0: + obj.data.materials[0] = mat + else: + obj.data.materials.append(mat) + self.target_obj = obj + + def restore_display_platform(self, platform_info): + bpy.ops.mesh.primitive_cylinder_add(radius=platform_info["radius"], depth=platform_info["height"]) + platform = bpy.context.selected_objects[-1] + platform.name = BlenderUtils.DISPLAY_TABLE_NAME + platform.location = mathutils.Vector(platform_info["location"]) + + mat = bpy.data.materials.new(name="RedMaterial") + mat.diffuse_color = (1.0, 0.0, 0.0, 1.0) # Red with full alpha (1.0) + if len(platform.data.materials) > 0: + platform.data.materials[0] = mat + else: + platform.data.materials.append(mat) + + bpy.ops.rigidbody.object_add() + bpy.context.object.rigid_body.type = 'PASSIVE' + bpy.ops.object.shade_auto_smooth() + +def main(temp_dir): + params_data_path = os.path.join(temp_dir, "params.json") + with open(params_data_path, 'r') as f: + params_data = json.load(f) + cam_pose = np.array(params_data["cam_pose"]) + print(cam_pose) + scene_info_path = os.path.join(params_data["scene_path"], "scene_info.json") + with open(scene_info_path, 'r') as f: + scene_info = json.load(f) + + data_renderer = DataRenderer() + data_renderer.do_render(cam_pose, scene_info, temp_dir) + depth_dir = os.path.join(temp_dir, "depth") + for depth_file in os.listdir(depth_dir): + if not depth_file.endswith(".png"): + name, _ = os.path.splitext(depth_file) + file_path = os.path.join(depth_dir, depth_file) + new_file_path = os.path.join(depth_dir, f"{name}.png") + os.rename(file_path,new_file_path) + BlenderUtils.save_cam_params(temp_dir, "tmp", binocular_vision=data_renderer.binocular_vision) + + +if __name__ == "__main__": + if len(sys.argv) < 2: + print("Usage: blender -b -P data_renderer.py -- ") + else: + temp_dir = sys.argv[-1] + main(temp_dir) \ No newline at end of file diff --git a/pts.py b/pts.py new file mode 100644 index 0000000..19d6e2a --- /dev/null +++ b/pts.py @@ -0,0 +1,22 @@ +import numpy as np +import open3d as o3d + +class PtsUtil: + + @staticmethod + def voxel_downsample_point_cloud(point_cloud, voxel_size=0.005): + o3d_pc = o3d.geometry.PointCloud() + o3d_pc.points = o3d.utility.Vector3dVector(point_cloud) + downsampled_pc = o3d_pc.voxel_down_sample(voxel_size) + return np.asarray(downsampled_pc.points) + + @staticmethod + def transform_point_cloud(points, pose_mat): + points_h = np.concatenate([points, np.ones((points.shape[0], 1))], axis=1) + points_h = np.dot(pose_mat, points_h.T).T + return points_h[:, :3] + + @staticmethod + def random_downsample_point_cloud(point_cloud, num_points): + idx = np.random.choice(len(point_cloud), num_points, replace=True) + return point_cloud[idx] \ No newline at end of file diff --git a/reconstruction.py b/reconstruction.py new file mode 100644 index 0000000..e7bb6ea --- /dev/null +++ b/reconstruction.py @@ -0,0 +1,119 @@ +import numpy as np +from scipy.spatial import cKDTree +from pts import PtsUtil + +class ReconstructionUtil: + + @staticmethod + def compute_coverage_rate(target_point_cloud, combined_point_cloud, threshold=0.01): + kdtree = cKDTree(combined_point_cloud) + distances, _ = kdtree.query(target_point_cloud) + covered_points = np.sum(distances < threshold) + coverage_rate = covered_points / target_point_cloud.shape[0] + return coverage_rate + + @staticmethod + def compute_overlap_rate(new_point_cloud, combined_point_cloud, threshold=0.01): + kdtree = cKDTree(combined_point_cloud) + distances, _ = kdtree.query(new_point_cloud) + overlapping_points = np.sum(distances < threshold) + overlap_rate = overlapping_points / new_point_cloud.shape[0] + return overlap_rate + + @staticmethod + def combine_point_with_view_sequence(point_list, view_sequence): + selected_views = [] + for view_index, _ in view_sequence: + selected_views.append(point_list[view_index]) + return np.vstack(selected_views) + + @staticmethod + def compute_next_view_coverage_list(views, combined_point_cloud, target_point_cloud, threshold=0.01): + best_view = None + best_coverage_increase = -1 + current_coverage = ReconstructionUtil.compute_coverage_rate(target_point_cloud, combined_point_cloud, threshold) + + for view_index, view in enumerate(views): + candidate_views = combined_point_cloud + [view] + down_sampled_combined_point_cloud = PtsUtil.voxel_downsample_point_cloud(candidate_views, threshold) + new_coverage = ReconstructionUtil.compute_coverage_rate(target_point_cloud, down_sampled_combined_point_cloud, threshold) + coverage_increase = new_coverage - current_coverage + if coverage_increase > best_coverage_increase: + best_coverage_increase = coverage_increase + best_view = view_index + return best_view, best_coverage_increase + + + @staticmethod + def compute_next_best_view_sequence_with_overlap(target_point_cloud, point_cloud_list, display_table_point_cloud_list = None,threshold=0.01, overlap_threshold=0.3, status_info=None): + selected_views = [] + current_coverage = 0.0 + remaining_views = list(range(len(point_cloud_list))) + view_sequence = [] + cnt_processed_view = 0 + while remaining_views: + best_view = None + best_coverage_increase = -1 + + for view_index in remaining_views: + + if selected_views: + combined_old_point_cloud = np.vstack(selected_views) + down_sampled_old_point_cloud = PtsUtil.voxel_downsample_point_cloud(combined_old_point_cloud,threshold) + down_sampled_new_view_point_cloud = PtsUtil.voxel_downsample_point_cloud(point_cloud_list[view_index],threshold) + overlap_rate = ReconstructionUtil.compute_overlap_rate(down_sampled_new_view_point_cloud,down_sampled_old_point_cloud, threshold) + if overlap_rate < overlap_threshold: + continue + + candidate_views = selected_views + [point_cloud_list[view_index]] + combined_point_cloud = np.vstack(candidate_views) + down_sampled_combined_point_cloud = PtsUtil.voxel_downsample_point_cloud(combined_point_cloud,threshold) + new_coverage = ReconstructionUtil.compute_coverage_rate(target_point_cloud, down_sampled_combined_point_cloud, threshold) + coverage_increase = new_coverage - current_coverage + #print(f"view_index: {view_index}, coverage_increase: {coverage_increase}") + if coverage_increase > best_coverage_increase: + best_coverage_increase = coverage_increase + best_view = view_index + + + if best_view is not None: + if best_coverage_increase <=1e-3: + break + selected_views.append(point_cloud_list[best_view]) + remaining_views.remove(best_view) + current_coverage += best_coverage_increase + cnt_processed_view += 1 + if status_info is not None: + sm = status_info["status_manager"] + app_name = status_info["app_name"] + runner_name = status_info["runner_name"] + sm.set_status(app_name, runner_name, "current coverage", current_coverage) + sm.set_progress(app_name, runner_name, "processed view", cnt_processed_view, len(point_cloud_list)) + + view_sequence.append((best_view, current_coverage)) + + else: + break + if status_info is not None: + sm = status_info["status_manager"] + app_name = status_info["app_name"] + runner_name = status_info["runner_name"] + sm.set_progress(app_name, runner_name, "processed view", len(point_cloud_list), len(point_cloud_list)) + return view_sequence, remaining_views, down_sampled_combined_point_cloud + + @staticmethod + def filter_points(points, points_normals, cam_pose, voxel_size=0.005, theta=45): + sampled_points = PtsUtil.voxel_downsample_point_cloud(points, voxel_size) + kdtree = cKDTree(points_normals[:,:3]) + _, indices = kdtree.query(sampled_points) + nearest_points = points_normals[indices] + + normals = nearest_points[:, 3:] + camera_axis = -cam_pose[:3, 2] + normals_normalized = normals / np.linalg.norm(normals, axis=1, keepdims=True) + cos_theta = np.dot(normals_normalized, camera_axis) + theta_rad = np.deg2rad(theta) + filtered_sampled_points= sampled_points[cos_theta > np.cos(theta_rad)] + + return filtered_sampled_points[:, :3] + \ No newline at end of file diff --git a/run_blender.py b/run_blender.py new file mode 100644 index 0000000..3fc5c47 --- /dev/null +++ b/run_blender.py @@ -0,0 +1,16 @@ + +import os +import sys +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +sys.path.append("/home/hofee/.local/lib/python3.11/site-packages") +import yaml +from blender.data_generator import DataGenerator + +if __name__ == "__main__": + config_path = sys.argv[sys.argv.index('--') + 1] + with open(config_path, "r") as file: + config = yaml.safe_load(file) + + dg = DataGenerator(config) + dg.gen_all_scene_data() + \ No newline at end of file diff --git a/test/tempdir/depth/tmp_L.png b/test/tempdir/depth/tmp_L.png new file mode 100644 index 0000000..8506846 Binary files /dev/null and b/test/tempdir/depth/tmp_L.png differ diff --git a/test/tempdir/depth/tmp_R.png b/test/tempdir/depth/tmp_R.png new file mode 100644 index 0000000..1fee75a Binary files /dev/null and b/test/tempdir/depth/tmp_R.png differ diff --git a/test/tempdir/mask/tmp_L.png b/test/tempdir/mask/tmp_L.png new file mode 100644 index 0000000..aba9186 Binary files /dev/null and b/test/tempdir/mask/tmp_L.png differ diff --git a/test/tempdir/mask/tmp_R.png b/test/tempdir/mask/tmp_R.png new file mode 100644 index 0000000..c1e34d0 Binary files /dev/null and b/test/tempdir/mask/tmp_R.png differ diff --git a/test/tempdir/params.json b/test/tempdir/params.json new file mode 100644 index 0000000..5d1cf97 --- /dev/null +++ b/test/tempdir/params.json @@ -0,0 +1,29 @@ +{ + "cam_pose": [ + [ + -0.8127143979072571, + -0.3794165253639221, + 0.4421972334384918, + 0.2740877568721771 + ], + [ + 0.5826622247695923, + -0.5292212963104248, + 0.616789698600769, + 0.46910107135772705 + ], + [ + 0.0, + 0.7589254975318909, + 0.6511774659156799, + 1.2532192468643188 + ], + [ + 0.0, + 0.0, + 0.0, + 1.0 + ] + ], + "scene_path": "/media/hofee/data/project/python/nbv_reconstruction/sample_for_training/scenes/google_scan-backpack_0288" +} \ No newline at end of file diff --git a/view_sample_util.py b/view_sample_util.py new file mode 100644 index 0000000..1deaa16 --- /dev/null +++ b/view_sample_util.py @@ -0,0 +1,152 @@ + +import numpy as np +import bmesh +from collections import defaultdict +from scipy.spatial.transform import Rotation as R + +class ViewSampleUtil: + @staticmethod + def farthest_point_sampling(points, num_samples): + num_points = points.shape[0] + if num_samples >= num_points: + return points, np.arange(num_points) + sampled_indices = np.zeros(num_samples, dtype=int) + sampled_indices[0] = np.random.randint(num_points) + min_distances = np.full(num_points, np.inf) + for i in range(1, num_samples): + current_point = points[sampled_indices[i - 1]] + dist_to_current_point = np.linalg.norm(points - current_point, axis=1) + min_distances = np.minimum(min_distances, dist_to_current_point) + sampled_indices[i] = np.argmax(min_distances) + downsampled_points = points[sampled_indices] + return downsampled_points, sampled_indices + + @staticmethod + def voxel_downsample(points, voxel_size): + voxel_grid = defaultdict(list) + for i, point in enumerate(points): + voxel_index = tuple((point // voxel_size).astype(int)) + voxel_grid[voxel_index].append(i) + + downsampled_points = [] + downsampled_indices = [] + for indices in voxel_grid.values(): + selected_index = indices[0] + downsampled_points.append(points[selected_index]) + downsampled_indices.append(selected_index) + + return np.array(downsampled_points), downsampled_indices + + @staticmethod + def sample_view_data(obj, distance_range:tuple = (0.2,0.4), voxel_size:float = 0.005, max_views: int = 1) -> dict: + view_data = { + "look_at_points": [], + "cam_positions": [], + } + mesh = obj.data + bm = bmesh.new() + bm.from_mesh(mesh) + bm.verts.ensure_lookup_table() + bm.faces.ensure_lookup_table() + bm.normal_update() + + look_at_points = [] + cam_positions = [] + normals = [] + for v in bm.verts: + look_at_point = np.array(v.co) + + view_data["look_at_points"].append(look_at_point) + normal = np.zeros(3) + for loop in v.link_loops: + normal += np.array(loop.calc_normal()) + normal /= len(v.link_loops) + normal = normal / np.linalg.norm(normal) + if np.isnan(normal).any(): + continue + if np.dot(normal, look_at_point) < 0: + normal = -normal + distance = np.random.uniform(*distance_range) + cam_position = look_at_point + distance * normal + + look_at_points.append(look_at_point) + cam_positions.append(cam_position) + normals.append(normal) + + bm.free() + look_at_points = np.array(look_at_points) + cam_positions = np.array(cam_positions) + voxel_downsampled_look_at_points, selected_indices = ViewSampleUtil.voxel_downsample(look_at_points, voxel_size) + voxel_downsampled_cam_positions = cam_positions[selected_indices] + voxel_downsampled_normals = np.array(normals)[selected_indices] + + fps_downsampled_look_at_points, selected_indices = ViewSampleUtil.farthest_point_sampling(voxel_downsampled_look_at_points, max_views*2) + fps_downsampled_cam_positions = voxel_downsampled_cam_positions[selected_indices] + + view_data["look_at_points"] = fps_downsampled_look_at_points.tolist() + view_data["cam_positions"] = fps_downsampled_cam_positions.tolist() + view_data["normals"] = voxel_downsampled_normals + view_data["voxel_down_sampled_points"] = voxel_downsampled_look_at_points + return view_data + + @staticmethod + def get_world_points_and_normals(view_data: dict, obj_world_pose: np.ndarray) -> tuple: + world_points = [] + world_normals = [] + for voxel_down_sampled_points, normal in zip(view_data["voxel_down_sampled_points"], view_data["normals"]): + voxel_down_sampled_points_world = obj_world_pose @ np.append(voxel_down_sampled_points, 1.0) + normal_world = obj_world_pose[:3, :3] @ normal + world_points.append(voxel_down_sampled_points_world[:3]) + world_normals.append(normal_world) + return np.array(world_points), np.array(world_normals) + + @staticmethod + def get_cam_pose(view_data: dict, obj_world_pose: np.ndarray, max_views: int) -> np.ndarray: + cam_poses = [] + min_height_z = 1000 + for look_at_point, cam_position in zip(view_data["look_at_points"], view_data["cam_positions"]): + look_at_point_world = obj_world_pose @ np.append(look_at_point, 1.0) + cam_position_world = obj_world_pose @ np.append(cam_position, 1.0) + if look_at_point_world[2] < min_height_z: + min_height_z = look_at_point_world[2] + look_at_point_world = look_at_point_world[:3] + cam_position_world = cam_position_world[:3] + + forward_vector = cam_position_world - look_at_point_world + forward_vector /= np.linalg.norm(forward_vector) + + up_vector = np.array([0, 0, 1]) + + right_vector = np.cross(up_vector, forward_vector) + right_vector /= np.linalg.norm(right_vector) + + corrected_up_vector = np.cross(forward_vector, right_vector) + rotation_matrix = np.array([right_vector, corrected_up_vector, forward_vector]).T + perturbation_angle = 0.01 + random_axis = np.random.randn(3) + random_axis /= np.linalg.norm(random_axis) + perturbation_rotation = R.from_rotvec(random_axis * perturbation_angle).as_matrix() + rotation_matrix = rotation_matrix @ perturbation_rotation + cam_pose = np.eye(4) + cam_pose[:3, :3] = rotation_matrix + cam_pose[:3, 3] = cam_position_world + cam_poses.append(cam_pose) + filtered_cam_poses = [] + for cam_pose in cam_poses: + if cam_pose[2, 3] > min_height_z: + filtered_cam_poses.append(cam_pose) + + if len(filtered_cam_poses) > max_views: + indices = np.random.choice(len(filtered_cam_poses), max_views, replace=False) + filtered_cam_poses = [filtered_cam_poses[i] for i in indices] + + return np.array(filtered_cam_poses) + + @staticmethod + def sample_view_data_world_space(obj, distance_range:tuple = (0.3,0.5), voxel_size:float = 0.005, max_views: int=1) -> dict: + obj_world_pose = np.asarray(obj.matrix_world) + view_data = ViewSampleUtil.sample_view_data(obj, distance_range, voxel_size, max_views) + view_data["cam_poses"] = ViewSampleUtil.get_cam_pose(view_data, obj_world_pose, max_views) + view_data["voxel_down_sampled_points"], view_data["normals"] = ViewSampleUtil.get_world_points_and_normals(view_data, obj_world_pose) + return view_data +