import os import pickle import pybullet as p import pybullet_data import numpy as np import math from flask import Flask, request, jsonify import sys path = os.path.abspath(__file__) for i in range(2): path = os.path.dirname(path) PROJECT_ROOT = path sys.path.append(PROJECT_ROOT) from runners.runner import Runner from configs.config import ConfigManager class ViewGenerator(Runner): def __init__(self, config_path, camera_params) -> None: super().__init__(config_path) self.data_dir = ConfigManager.get("settings", "dataset", "data_dir") self.port = ConfigManager.get("settings", "web_api", "port") self.camera_params = camera_params self.object_model_scale = [0.001, 0.001, 0.001] self.segmentation_labels = {} self.app = Flask(__name__) self._init_routes() def create_experiment(self, backup_name=None): return super().create_experiment(backup_name) def load_experiment(self, backup_name=None): return super().load_experiment(backup_name) def _init_routes(self): @self.app.route("/get_images", methods=["POST"]) def get_images_api(): data = request.get_json() camera_pose = data["camera_pose"] scene = data["scene"] data_type = data["data_type"] source = data["source"] scene_path = os.path.join(self.data_dir, source, data_type, scene) model_dir = os.path.join(self.data_dir, source, "objects") self.load_scene(scene_path,model_dir) result = self.generate_images(camera_pose) result = { "rgb": result["rgb"].tolist(), "depth": result["depth"].tolist(), "segmentation": result["segmentation"].tolist(), "segmentation_labels": result["segmentation_labels"], "camera_params": result["camera_params"], } return jsonify(result) def load_scene(self, scene_path, model_dir): scene_path = os.path.join(scene_path, "scene.pickle") self.scene = pickle.load(open(scene_path, "rb")) self._initialize_pybullet_scene(model_dir) def _initialize_pybullet_scene(self,model_dir): if p.isConnected(): p.resetSimulation() else: p.connect(p.DIRECT) p.setAdditionalSearchPath(pybullet_data.getDataPath()) p.setGravity(0, 0, 0) p.loadURDF("plane100.urdf") for obj_name in self.scene.keys(): orientation = self.scene[obj_name]["rotation"] position = self.scene[obj_name]["position"] class_name = obj_name[:-4] obj_path = os.path.join(model_dir,class_name, obj_name, "Scan", "Simp.obj") self._load_obj_to_pybullet( obj_file_path=obj_path, position=position, orientation=orientation, scale=self.object_model_scale, ) def _load_obj_to_pybullet(self, obj_file_path, position, orientation, scale): visual_ind = p.createVisualShape( shapeType=p.GEOM_MESH, fileName=obj_file_path, rgbaColor=[1, 1, 1, 1], specularColor=[0.4, 0.4, 0], visualFramePosition=[0, 0, 0], meshScale=scale, ) p.createMultiBody( baseMass=1, baseVisualShapeIndex=visual_ind, basePosition=position, baseOrientation=orientation, useMaximalCoordinates=True, ) def _render_image(self, camera_pose): width = self.camera_params["width"] height = self.camera_params["height"] fov = self.camera_params["fov"] aspect = width / height near = self.camera_params["near"] far = self.camera_params["far"] T = np.mat([[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 1], [0, 0, 0, 1]]) look_at_T = camera_pose.dot(T) view_matrix = p.computeViewMatrix( [camera_pose[0, 3], camera_pose[1, 3], camera_pose[2, 3]], [look_at_T[0, 3], look_at_T[1, 3], look_at_T[2, 3]], [-camera_pose[0, 1], -camera_pose[1, 1], -camera_pose[2, 1]], ) projection_matrix = p.computeProjectionMatrixFOV(fov, aspect, near, far) images = p.getCameraImage( width, height, view_matrix, projection_matrix, renderer=p.ER_BULLET_HARDWARE_OPENGL, ) rgb = images[2] depth = images[3] seg = images[4] rgb = np.reshape(rgb, (height, width, 4)) depth = np.reshape(depth, (height, width)) seg = np.reshape(seg, (height, width)) rgb_image = rgb[..., :3] depth_image = far * near / (far - (far - near) * depth) depth_image = np.asanyarray(depth_image).astype(np.float32) * 1000.0 depth_image = depth_image.astype(np.uint16) id = 0 for object_name in self.scene.keys(): self.segmentation_labels[str(id + 1)] = object_name id += 1 return { "rgb": rgb_image, "depth": depth_image, "segmentation": seg, "segmentation_labels": self.segmentation_labels, "camera_params": self.camera_params, } def generate_images(self, camera_pose): results = self._render_image(np.asarray(camera_pose)) p.stepSimulation() return results def run(self): self.app.run(host="0.0.0.0", port=self.port) ISAAC_SIM_CAM_H_APERTURE = 20.955 ISAAC_SIM_CAM_V_APERTURE = 15.2908 ISAAC_SIM_FOCAL_LENGTH = 39 ISAAC_SIM_CAM_D_APERTURE = math.sqrt(ISAAC_SIM_CAM_H_APERTURE**2 + ISAAC_SIM_CAM_V_APERTURE**2) CAM_WIDTH = 640 CAM_HEIGHT = 480 CAM_FOV = 2 * math.atan(ISAAC_SIM_CAM_D_APERTURE / (2 * ISAAC_SIM_FOCAL_LENGTH)) / math.pi * 180 CAM_NEAR = 0.001 CAM_FAR = 10 CAM_CX = CAM_WIDTH / 2 CAM_CY = CAM_HEIGHT / 2 CAM_FX = 1 / math.tan(CAM_FOV * math.pi / 180.0 / 2.0) * CAM_WIDTH / 2 CAM_FY = 1 / (CAM_HEIGHT / CAM_WIDTH * math.tan(CAM_FOV * math.pi / 180.0 / 2.0)) * CAM_HEIGHT / 2 CAMERA_PARAMS = { "width": CAM_WIDTH, "height": CAM_HEIGHT, "fov": CAM_FOV, "near": CAM_NEAR, "far": CAM_FAR, "cx": CAM_CX, "cy": CAM_CY, "fx": CAM_FX, "fy": CAM_FY, } if __name__ == "__main__": import argparse parser = argparse.ArgumentParser() parser.add_argument("--config", type=str, default="configs/server_view_generator.yaml") args = parser.parse_args() vg = ViewGenerator(args.config, CAMERA_PARAMS) vg.run()