240 lines
8.7 KiB
Python
240 lines
8.7 KiB
Python
|
import os
|
|||
|
import pickle
|
|||
|
import pybullet as p
|
|||
|
import time
|
|||
|
import pybullet_data
|
|||
|
import numpy as np
|
|||
|
import matplotlib.pyplot as plt
|
|||
|
from PIL import Image
|
|||
|
import open3d as o3d
|
|||
|
import cv2
|
|||
|
import math
|
|||
|
import json
|
|||
|
|
|||
|
|
|||
|
class GenerateScene:
|
|||
|
def __init__(self, dataset_path, scene_path, output_path, camera_params) -> None:
|
|||
|
self._init_variables()
|
|||
|
self._load_object_dataset(dataset_path)
|
|||
|
self._load_scene(scene_path)
|
|||
|
self._set_output_path(output_path)
|
|||
|
self._set_camera_params(camera_params)
|
|||
|
|
|||
|
|
|||
|
def _init_variables(self):
|
|||
|
self.object_paths = {}
|
|||
|
self.scene = {}
|
|||
|
self.object_model_scale = [0.001, 0.001, 0.001]
|
|||
|
self.output_path = None
|
|||
|
self.camera_params = None
|
|||
|
self.segmentation_labels = {}
|
|||
|
|
|||
|
|
|||
|
def _extract_model_name(self, path):
|
|||
|
# Specify it according to specific file directory
|
|||
|
NUM_SLASH_BEFORE_NAME = 1
|
|||
|
|
|||
|
num_slash = 0
|
|||
|
object_name_str = []
|
|||
|
|
|||
|
for i in range(len(path)):
|
|||
|
index = len(path) -1 - i
|
|||
|
char = path[index]
|
|||
|
|
|||
|
if(num_slash == NUM_SLASH_BEFORE_NAME):
|
|||
|
object_name_str.append(char)
|
|||
|
|
|||
|
if(char == "/"):
|
|||
|
num_slash += 1
|
|||
|
|
|||
|
object_name_str.reverse()
|
|||
|
object_name_str = ''.join(object_name_str[1:])
|
|||
|
return object_name_str
|
|||
|
|
|||
|
|
|||
|
def _load_object_dataset(self, dataset_path):
|
|||
|
if(dataset_path[-1] == "/"):
|
|||
|
dataset_path = dataset_path[:-1]
|
|||
|
for root, dirs, files in os.walk(dataset_path, topdown=False):
|
|||
|
for name in dirs:
|
|||
|
path = os.path.join(root, name)
|
|||
|
if(os.path.join(root, name)[-4:] == "Scan"):
|
|||
|
name = self._extract_model_name(path)
|
|||
|
self.object_paths[name] = path+"/Simp.obj"
|
|||
|
|
|||
|
|
|||
|
def _load_scene(self, scene_path):
|
|||
|
if(scene_path[-1] == "/"):
|
|||
|
scene_path = scene_path[:-1]
|
|||
|
scene_path = scene_path + "/scene.pickle"
|
|||
|
self.scene = pickle.load(open(scene_path, 'rb'))
|
|||
|
|
|||
|
|
|||
|
def _set_output_path(self, output_path):
|
|||
|
self.output_path = output_path
|
|||
|
if(self.output_path[-1] == "/"):
|
|||
|
self.output_path = self.output_path[:-1]
|
|||
|
|
|||
|
|
|||
|
def _set_camera_params(self, camera_params):
|
|||
|
self.camera_params = camera_params
|
|||
|
|
|||
|
|
|||
|
def load_camera_pose_from_frame(self, camera_params_path):
|
|||
|
with open(camera_params_path, "r") as f:
|
|||
|
camera_params = json.load(f)
|
|||
|
|
|||
|
view_transform = camera_params["cameraViewTransform"]
|
|||
|
print(view_transform)
|
|||
|
view_transform = np.resize(view_transform, (4,4))
|
|||
|
view_transform = np.linalg.inv(view_transform).T
|
|||
|
offset = np.mat([[1,0,0,0],[0,-1,0,0],[0,0,-1,0],[0,0,0,1]])
|
|||
|
view_transform = view_transform.dot(offset)
|
|||
|
print(view_transform)
|
|||
|
return view_transform
|
|||
|
|
|||
|
|
|||
|
def _load_obj_to_pybullet(self, obj_file_path, position, orientation, scale):
|
|||
|
visual_ind = p.createVisualShape(
|
|||
|
shapeType=p.GEOM_MESH,
|
|||
|
fileName=obj_file_path,
|
|||
|
rgbaColor=[1, 1, 1, 1],
|
|||
|
specularColor=[0.4, 0.4, 0],
|
|||
|
visualFramePosition=[0, 0, 0],
|
|||
|
meshScale=scale)
|
|||
|
p.createMultiBody(
|
|||
|
baseMass=1,
|
|||
|
baseVisualShapeIndex=visual_ind,
|
|||
|
basePosition=position,
|
|||
|
baseOrientation=orientation,
|
|||
|
useMaximalCoordinates=True)
|
|||
|
|
|||
|
|
|||
|
def _render_image(self, camera_pose):
|
|||
|
width = self.camera_params["width"]
|
|||
|
height = self.camera_params["height"]
|
|||
|
fov = self.camera_params["fov"]
|
|||
|
aspect = width / height
|
|||
|
near = self.camera_params["near"]
|
|||
|
far = self.camera_params["far"]
|
|||
|
|
|||
|
T = np.mat([[1,0,0,0],
|
|||
|
[0,1,0,0],
|
|||
|
[0,0,1,1],
|
|||
|
[0,0,0,1]])
|
|||
|
look_at_T = camera_pose.dot(T)
|
|||
|
view_matrix = p.computeViewMatrix([camera_pose[0,3], camera_pose[1,3], camera_pose[2,3]],
|
|||
|
[look_at_T[0,3], look_at_T[1,3], look_at_T[2,3]],
|
|||
|
[-camera_pose[0,1], -camera_pose[1,1], -camera_pose[2,1]])
|
|||
|
projection_matrix = p.computeProjectionMatrixFOV(fov, aspect, near, far)
|
|||
|
|
|||
|
# Get depth values using the OpenGL renderer
|
|||
|
images = p.getCameraImage(width, height, view_matrix, projection_matrix, renderer=p.ER_BULLET_HARDWARE_OPENGL)
|
|||
|
rgb = images[2]
|
|||
|
depth = images[3]
|
|||
|
seg = images[4]
|
|||
|
|
|||
|
rgb_image = cv2.cvtColor(rgb, cv2.COLOR_BGR2RGB)
|
|||
|
cv2.imwrite(self.output_path+'/rgb.jpg',rgb_image)
|
|||
|
|
|||
|
depth_image = far * near / (far - (far - near) * depth)
|
|||
|
depth_image = np.asanyarray(depth_image).astype(np.float32) * 1000.0
|
|||
|
depth_image_array = depth_image
|
|||
|
depth_image = (depth_image.astype(np.uint16))
|
|||
|
depth_image = Image.fromarray(depth_image)
|
|||
|
depth_image.save(self.output_path+'/depth.png')
|
|||
|
|
|||
|
cv2.imwrite(self.output_path+'/seg.jpg', seg)
|
|||
|
|
|||
|
id = 0
|
|||
|
for object_name in self.scene.keys():
|
|||
|
self.segmentation_labels[str(id+1)] = object_name
|
|||
|
id += 1
|
|||
|
with open(self.output_path+"/seg_labels.json", 'w') as seg_labels:
|
|||
|
json.dump(self.segmentation_labels, seg_labels)
|
|||
|
|
|||
|
with open(self.output_path+"/cam_intrinsics.json", 'w') as cam_intrinsics:
|
|||
|
json.dump(self.camera_params, cam_intrinsics)
|
|||
|
|
|||
|
|
|||
|
def generate_images(self, camera_pose):
|
|||
|
physicsClient = p.connect(p.GUI)
|
|||
|
p.setAdditionalSearchPath(pybullet_data.getDataPath())
|
|||
|
p.setGravity(0,0,0)
|
|||
|
p.loadURDF("plane100.urdf")
|
|||
|
|
|||
|
for obj_name in self.scene.keys():
|
|||
|
orientation = self.scene[obj_name]["rotation"]
|
|||
|
position = self.scene[obj_name]["position"]
|
|||
|
self._load_obj_to_pybullet(obj_file_path=self.object_paths[obj_name],
|
|||
|
position=position,
|
|||
|
orientation=orientation,
|
|||
|
scale=self.object_model_scale)
|
|||
|
self._render_image(camera_pose)
|
|||
|
p.stepSimulation()
|
|||
|
p.disconnect()
|
|||
|
|
|||
|
|
|||
|
def visualize_pcd(self):
|
|||
|
color_image = o3d.io.read_image(self.output_path+'/rgb.jpg')
|
|||
|
depth_image = o3d.io.read_image(self.output_path+'/depth.png')
|
|||
|
|
|||
|
rgbd_image = o3d.geometry.RGBDImage.create_from_color_and_depth(color_image, depth_image,convert_rgb_to_intensity=False)
|
|||
|
intrinsic = o3d.camera.PinholeCameraIntrinsic(
|
|||
|
o3d.camera.PinholeCameraIntrinsicParameters.Kinect2DepthCameraDefault )
|
|||
|
intrinsic.set_intrinsics(width=self.camera_params["width"],
|
|||
|
height=self.camera_params["height"],
|
|||
|
fx=self.camera_params["fx"],
|
|||
|
fy=self.camera_params["fy"],
|
|||
|
cx=self.camera_params["cx"],
|
|||
|
cy=self.camera_params["cy"])
|
|||
|
|
|||
|
point_cloud = o3d.geometry.PointCloud.create_from_rgbd_image(rgbd_image, intrinsic)
|
|||
|
o3d.visualization.draw_geometries([point_cloud])
|
|||
|
|
|||
|
def print_scene(self):
|
|||
|
print("================= Scene Objects: =================")
|
|||
|
print(self.scene.keys())
|
|||
|
print("==================================================")
|
|||
|
|
|||
|
|
|||
|
DATASET_PATH = "/home/hzx/Downloads/OmniObject3d-simplified/output"
|
|||
|
SCENE_PATH = "/home/hzx/Projects/ActivePerception/data_generation/output/scene_0/"
|
|||
|
OUTPUT_PATH = "/home/hzx/Projects/ActivePerception/data_generation/output/"
|
|||
|
FRAME_PATH = "/home/hzx/Projects/ActivePerception/data_generation/output/scene_0/camera_params_0119.json"
|
|||
|
|
|||
|
ISAAC_SIM_CAM_H_APERTURE = 20.955 # Isaac Sim里读取的
|
|||
|
ISAAC_SIM_CAM_V_APERTURE = 15.2908 # Isaac Sim里读取的
|
|||
|
ISAAC_SIM_FOCAL_LENGTH = 39 # 试出来的,其实Isaac Sim里原本是24
|
|||
|
ISAAC_SIM_CAM_D_APERTURE = math.sqrt(ISAAC_SIM_CAM_H_APERTURE**2 + ISAAC_SIM_CAM_V_APERTURE**2)
|
|||
|
|
|||
|
|
|||
|
CAM_WIDTH = 640
|
|||
|
CAM_HEIGHT = 480
|
|||
|
CAM_FOV = 2*math.atan(ISAAC_SIM_CAM_D_APERTURE/(2*ISAAC_SIM_FOCAL_LENGTH))/math.pi*180
|
|||
|
CAM_NEAR = 0.001 # 成像最近距离
|
|||
|
CAM_FAR = 10 # 成像最远距离
|
|||
|
CAM_CX = CAM_WIDTH/2
|
|||
|
CAM_CY = CAM_HEIGHT/2
|
|||
|
CAM_FX = 1/math.tan(CAM_FOV*math.pi/180.0/2.0)*CAM_WIDTH/2
|
|||
|
CAM_FY = 1/(CAM_HEIGHT/CAM_WIDTH*math.tan(CAM_FOV*math.pi/180.0/2.0))*CAM_HEIGHT/2
|
|||
|
|
|||
|
CAMERA_PARAMS = {
|
|||
|
"width": CAM_WIDTH,
|
|||
|
"height": CAM_HEIGHT,
|
|||
|
"fov": CAM_FOV,
|
|||
|
"near": CAM_NEAR,
|
|||
|
"far": CAM_FAR,
|
|||
|
"cx": CAM_CX,
|
|||
|
"cy": CAM_CY,
|
|||
|
"fx": CAM_FX,
|
|||
|
"fy": CAM_FY,
|
|||
|
}
|
|||
|
|
|||
|
|
|||
|
if __name__ == "__main__":
|
|||
|
gs = GenerateScene(DATASET_PATH, SCENE_PATH, OUTPUT_PATH, CAMERA_PARAMS)
|
|||
|
gs.print_scene()
|
|||
|
cam_pose = gs.load_camera_pose_from_frame(FRAME_PATH)
|
|||
|
gs.generate_images(cam_pose) # 在OUTPUT_PATH路径下生成rgb、depth、segmentation图
|
|||
|
# gs.visualize_pcd()
|