nbv_grasping/utils/omni_util.py

440 lines
18 KiB
Python
Raw Normal View History

2024-10-09 16:13:22 +00:00
import numpy as np
import pickle
import json
import pickle
import cv2
import os
import re
from scipy.spatial.transform import Rotation as R
class DepthToPCL:
def __new__(cls, *args, **kwargs):
raise RuntimeError(
"Use init_from_disk or init_from_memory to create an instance"
)
@classmethod
def _initialize(
cls,
distance_to_camera_path=None,
rgb_path=None,
camera_params_path=None,
seg_path=None,
seg_label_path=None,
depth=None,
rgb=None,
seg=None,
seg_label=None,
camera_params=None,
):
instance = super().__new__(cls)
instance._distance_to_camera_path = distance_to_camera_path
instance._rgb_path = rgb_path
instance._camera_params_path = camera_params_path
instance._seg_path = seg_path
instance._seg_label_path = seg_label_path
instance._depth = depth
instance._rgb = rgb
instance._seg = seg
instance._seg_label = seg_label
instance._camera_params = camera_params
if any(
path is not None
for path in [
distance_to_camera_path,
rgb_path,
camera_params_path,
seg_path,
seg_label_path,
]
):
instance._load_from_disk()
instance._setup()
return instance
@classmethod
def init_from_disk(
cls,
distance_to_camera_path,
rgb_path,
camera_params_path,
seg_path,
seg_label_path,
):
return cls._initialize(
distance_to_camera_path=distance_to_camera_path,
rgb_path=rgb_path,
camera_params_path=camera_params_path,
seg_path=seg_path,
seg_label_path=seg_label_path,
)
@classmethod
def init_from_memory(cls, depth, rgb, seg, seg_label, camera_params):
return cls._initialize(
depth=depth,
rgb=rgb,
seg=seg,
seg_label=seg_label,
camera_params=camera_params,
)
def _load_from_disk(self):
self._depth = np.load(self._distance_to_camera_path)
self._seg = cv2.imread(self._seg_path, cv2.IMREAD_UNCHANGED)
with open(self._seg_label_path, "r") as f:
self._seg_label = json.load(f)
with open(self._camera_params_path) as f:
self._camera_params = json.load(f)
def _setup(self):
self._read_camera_params()
self._get_intrinsic_matrix()
def _read_camera_params(self):
self._h_aperture = self._camera_params["cameraAperture"][0]
self._v_aperture = self._camera_params["cameraAperture"][1]
self._h_aperture_offset = self._camera_params["cameraApertureOffset"][0]
self._v_aperture_offset = self._camera_params["cameraApertureOffset"][1]
self._focal_length = self._camera_params["cameraFocalLength"]
self._h_resolution = self._camera_params["renderProductResolution"][0]
self._v_resolution = self._camera_params["renderProductResolution"][1]
self._cam_t = self._camera_params["cameraViewTransform"]
def _get_intrinsic_matrix(self):
self._focal_x = self._h_resolution * self._focal_length / self._h_aperture
self._focal_y = self._v_resolution * self._focal_length / self._v_aperture
self._center_x = self._h_resolution / 2
self._center_y = self._v_resolution / 2
self.intrinsic_matrix = np.array(
[
[self._focal_x, 0, self._center_x],
[0, self._focal_y, self._center_y],
[0, 0, 1],
]
)
return self.intrinsic_matrix
def _get_extrinsic_matrix(self):
self._cam_pose = np.linalg.inv(np.resize(self._cam_t, (4, 4))).T.dot(
np.mat([[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1.0], [0, 0, 0, 1]])
)
return self._cam_pose
def get_pcd(self, target_name=None):
u_indices, v_indices = np.meshgrid(
np.arange(self._h_resolution), np.arange(self._v_resolution)
)
x_factors = (u_indices - self._center_x) / self._focal_x
y_factors = (v_indices - self._center_y) / self._focal_y
if target_name is not None:
if target_name == OmniUtil.FOREGROUND:
unlabelled_mask = self.get_mask_rgba(
self._seg_label, OmniUtil.UNLABELLED
)
background_mask = self.get_mask_rgba(
self._seg_label, OmniUtil.BACKGROUND
)
if unlabelled_mask is None:
target_mask = (self._seg != background_mask).any(axis=2)
else:
target_mask = (self._seg != unlabelled_mask).any(axis=2) & (
self._seg != background_mask
).any(axis=2)
else:
target_mask = (
self._seg == self.get_mask_rgba(self._seg_label, target_name)
).all(axis=2)
else:
target_mask = np.ones((self._v_resolution, self._h_resolution), dtype=bool)
valid_x_factors = x_factors[target_mask]
valid_y_factors = y_factors[target_mask]
valid_z_factors = self._depth[target_mask]
points = np.stack([valid_x_factors, valid_y_factors, valid_z_factors], axis=1)
return points
@staticmethod
def get_mask_rgba(mask_labels, mask_name):
name_list = [name_dict["class"] for name_dict in list(mask_labels.values())]
if mask_name not in name_list:
return None
rgba_list = list(mask_labels.keys())
mask_rgba_str = rgba_list[name_list.index(mask_name)]
r, g, b, a = re.findall("\d+", mask_rgba_str)
r, g, b, a = int(b), int(g), int(r), int(a)
return r, g, b, a
def get_segmented_pcd(self, target_list, N=15000):
u_indices, v_indices = np.meshgrid(
np.arange(self._h_resolution), np.arange(self._v_resolution)
)
x_factors = (u_indices - self._center_x) / self._focal_x
y_factors = (v_indices - self._center_y) / self._focal_y
points_dict = {}
total_points_with_label = []
for target_idx in range(len(target_list)):
target_name = target_list[target_idx]
target_mask = (
self._seg == self.get_mask_rgba(self._seg_label, target_name)
).all(axis=2)
valid_x_factors = x_factors[target_mask]
valid_y_factors = y_factors[target_mask]
valid_z_factors = self._depth[target_mask]
label = np.ones_like(valid_x_factors) * target_idx
target_points_with_label = np.stack(
[valid_x_factors, valid_y_factors, valid_z_factors, label], axis=1
)
total_points_with_label.append(target_points_with_label)
total_points_with_label = np.concatenate(total_points_with_label, axis=0)
total_points_with_label = self.sample_pcl(total_points_with_label, N)
total_points = total_points_with_label[:, :3]
for target_idx in range(len(target_list)):
target_name = target_list[target_idx]
pts_seg = total_points_with_label[:, 3] == target_idx
points_dict[target_name] = total_points_with_label[pts_seg, :3]
return total_points, points_dict
def get_rgb(self):
return self._rgb
@staticmethod
def sample_pcl(pcl, n_pts=1024):
indices = np.random.choice(pcl.shape[0], n_pts, replace=pcl.shape[0] < n_pts)
return pcl[indices, :]
class OmniUtil:
FOREGROUND = "FOREGROUND"
BACKGROUND = "BACKGROUND"
UNLABELLED = "UNLABELLED"
NON_OBJECT_LIST = ['chair_028', 'chair_029', 'chair_026', 'chair_027', 'table_025', 'table_027', 'table_026', 'table_028', 'sofa_014', 'sofa_013', 'picnic_basket_010', 'picnic_basket_011', 'cabinet_009', 'flower_pot_023', 'flower_pot_022', 'flower_pot_021', 'chair_017', 'chair_020', 'chair_012', 'chair_010', 'chair_018', 'chair_025', 'chair_024', 'chair_011', 'chair_001', 'chair_013', 'chair_004', 'chair_021', 'chair_023', 'chair_006', 'chair_014', 'chair_007', 'chair_003', 'chair_009', 'chair_022', 'chair_015', 'chair_016', 'chair_008', 'chair_005', 'chair_019', 'chair_002', 'table_004', 'table_023', 'table_014', 'table_024', 'table_019', 'table_022', 'table_007', 'table_017', 'table_013', 'table_002', 'table_016', 'table_009', 'table_008', 'table_003', 'table_015', 'table_001', 'table_018', 'table_005', 'table_020', 'table_021', 'sofa_001', 'sofa_005', 'sofa_012', 'sofa_009', 'sofa_006', 'sofa_008', 'sofa_011', 'sofa_004', 'sofa_003', 'sofa_002', 'sofa_007', 'sofa_010', 'picnic_basket_005', 'picnic_basket_004', 'picnic_basket_001', 'picnic_basket_008', 'picnic_basket_002', 'picnic_basket_009', 'picnic_basket_006', 'picnic_basket_003', 'picnic_basket_007', 'cabinet_006', 'cabinet_008', 'cabinet_002', 'cabinet_001', 'cabinet_005', 'cabinet_007', 'flower_pot_013', 'flower_pot_005', 'flower_pot_008', 'flower_pot_001', 'flower_pot_003', 'flower_pot_020', 'flower_pot_006', 'flower_pot_012', 'flower_pot_018', 'flower_pot_007', 'flower_pot_002', 'flower_pot_011', 'flower_pot_010', 'flower_pot_016', 'flower_pot_004', 'flower_pot_014', 'flower_pot_017', 'flower_pot_019']
CAMERA_PARAMS_TEMPLATE = "camera_params_{}.json"
DISTANCE_TEMPLATE = "distance_to_image_plane_{}.npy"
RGB_TEMPLATE = "rgb_{}.png"
MASK_TEMPLATE = "semantic_segmentation_{}.png"
MASK_LABELS_TEMPLATE = "semantic_segmentation_labels_{}.json"
SCORE_LABEL_TEMPLATE = "label_{}.json"
RGB_FEAT_TEMPLATE = "rgb_feat_{}.npy"
@staticmethod
def get_depth_to_pointcloud_instance(path):
root, idx = path[:-4], path[-4:]
distance2plane_path = os.path.join(root, OmniUtil.DISTANCE_TEMPLATE.format(idx))
rgb_path = os.path.join(root, OmniUtil.RGB_TEMPLATE.format(idx))
cam_params_path = os.path.join(
root, OmniUtil.CAMERA_PARAMS_TEMPLATE.format(idx)
)
seg_path = os.path.join(root, OmniUtil.MASK_TEMPLATE.format(idx))
seg_labels_path = os.path.join(root, OmniUtil.MASK_LABELS_TEMPLATE.format(idx))
depth_to_pcd = DepthToPCL.init_from_disk(
distance2plane_path, rgb_path, cam_params_path, seg_path, seg_labels_path
)
return depth_to_pcd
@staticmethod
def get_points(path, object_name=None):
depth_to_pcd = OmniUtil.get_depth_to_pointcloud_instance(path)
pcd = depth_to_pcd.get_pcd(object_name)
points = np.asarray(pcd, dtype=np.float32)
return points
@staticmethod
def get_segmented_points(path, target_list):
depth_to_pcd = OmniUtil.get_depth_to_pointcloud_instance(path)
total_points, target_points_dict = depth_to_pcd.get_segmented_pcd(target_list)
return total_points, target_points_dict
@staticmethod
def get_object_list(path, contains_non_obj=False):
root, idx = path[:-4], path[-4:]
seg_labels_path = os.path.join(root, OmniUtil.MASK_LABELS_TEMPLATE.format(idx))
with open(seg_labels_path, "r") as f:
seg_labels = json.load(f)
object_list = [v["class"] for v in seg_labels.values()]
object_list.remove(OmniUtil.BACKGROUND)
if OmniUtil.UNLABELLED in object_list:
object_list.remove(OmniUtil.UNLABELLED)
occluder_list = pickle.load(open(os.path.join(root,"occluder.pickle"), "rb"))
fall_objects_list = pickle.load(open(os.path.join(root,"fall_objects.pickle"), "rb"))
non_obj_list = occluder_list + fall_objects_list
if not contains_non_obj:
for non_obj in non_obj_list:
if non_obj in object_list:
object_list.remove(non_obj)
return object_list
@staticmethod
def get_rotation_mat(path):
root, idx = os.path.split(path)
camera_params_path = os.path.join(
root, OmniUtil.CAMERA_PARAMS_TEMPLATE.format(idx)
)
with open(camera_params_path, "r") as f:
raw_camera_params = json.load(f)
cam_transform = np.asarray(raw_camera_params["cameraViewTransform"]).reshape(
(4, 4)
)
cam_rot_mat = cam_transform[:3, :3].dot(
np.mat([[1, 0, 0], [0, -1, 0], [0, 0, -1]])
)
return cam_rot_mat
@staticmethod
def get_rgb(path):
root, idx = os.path.split(path)
rgb_path = os.path.join(root, OmniUtil.RGB_TEMPLATE.format(idx))
rgb = cv2.imread(rgb_path)
return cv2.cvtColor(rgb, cv2.COLOR_BGR2RGB)
@staticmethod
def get_depth(path):
root, idx = os.path.split(path)
depth_path = os.path.join(root, OmniUtil.DISTANCE_TEMPLATE.format(idx))
depth = np.load(depth_path)
return depth
@staticmethod
def get_seg_data(path):
root, idx = os.path.split(path)
seg_labels_path = os.path.join(root, OmniUtil.MASK_LABELS_TEMPLATE.format(idx))
with open(seg_labels_path, "r") as f:
seg_labels = json.load(f)
seg_path = os.path.join(root, OmniUtil.MASK_TEMPLATE.format(idx))
seg = cv2.imread(seg_path, cv2.IMREAD_UNCHANGED)
return seg, seg_labels
@staticmethod
def get_single_seg(path, object_name):
root, idx = os.path.split(path)
seg_labels_path = os.path.join(root, OmniUtil.MASK_LABELS_TEMPLATE.format(idx))
with open(seg_labels_path, "r") as f:
seg_labels = json.load(f)
seg_path = os.path.join(root, OmniUtil.MASK_TEMPLATE.format(idx))
seg = cv2.imread(seg_path, cv2.IMREAD_UNCHANGED)
object_mask = (
seg == OmniUtil.get_mask_rgba(seg_labels, object_name)
).all(axis=2)
return object_mask
@staticmethod
def get_mask_rgba(mask_labels, mask_name):
name_list = [name_dict["class"] for name_dict in list(mask_labels.values())]
if mask_name not in name_list:
return None
rgba_list = list(mask_labels.keys())
mask_rgba_str = rgba_list[name_list.index(mask_name)]
r, g, b, a = re.findall("\d+", mask_rgba_str)
r, g, b, a = int(b), int(g), int(r), int(a)
return r, g, b, a
@staticmethod
def get_rgb_feat(path):
root, idx = os.path.split(path)
rgb_feat_path = os.path.join(root, OmniUtil.RGB_FEAT_TEMPLATE.format(idx))
rgb_feat = np.load(rgb_feat_path)
return rgb_feat
@staticmethod
def get_target_object_list(path):
return OmniUtil.get_object_list(path, contains_non_obj=False) # TODO: generalize this
@staticmethod
def get_transform_mat(path):
root, idx = os.path.split(path)
camera_params_path = os.path.join(
root, OmniUtil.CAMERA_PARAMS_TEMPLATE.format(idx)
)
with open(camera_params_path, "r") as f:
raw_camera_params = json.load(f)
cam_transform = np.asarray(raw_camera_params["cameraViewTransform"]).reshape(
(4, 4)
)
real_cam_transform = np.linalg.inv(cam_transform).T
real_cam_transform = real_cam_transform.dot(
np.mat([[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]])
)
return real_cam_transform
@staticmethod
def get_intrinsic_matrix(path):
root, idx = os.path.split(path)
camera_params_path = os.path.join(
root, OmniUtil.CAMERA_PARAMS_TEMPLATE.format(idx)
)
with open(camera_params_path, "r") as f:
raw_camera_params = json.load(f)
h_aperture = raw_camera_params["cameraAperture"][0]
v_aperture = raw_camera_params["cameraAperture"][1]
focal_length = raw_camera_params["cameraFocalLength"]
h_resolution = raw_camera_params["renderProductResolution"][0]
v_resolution = raw_camera_params["renderProductResolution"][1]
focal_x = h_resolution * focal_length / h_aperture
focal_y = v_resolution * focal_length / v_aperture
center_x = h_resolution / 2
center_y = v_resolution / 2
intrinsic_matrix = np.array(
[
[focal_x, 0, center_x],
[0, focal_y, center_y],
[0, 0, 1],
]
)
return intrinsic_matrix
@staticmethod
def get_extrinsic_matrix(path):
root, idx = os.path.split(path)
camera_params_path = os.path.join(
root, OmniUtil.CAMERA_PARAMS_TEMPLATE.format(idx)
)
with open(camera_params_path, "r") as f:
raw_camera_params = json.load(f)
cam_transform = np.asarray(raw_camera_params["cameraViewTransform"]).reshape(
(4, 4)
)
real_cam_transform = np.linalg.inv(cam_transform).T
real_cam_transform = real_cam_transform.dot(
np.mat([[1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1]])
)
return real_cam_transform
@staticmethod
def get_scene_data(path):
root, _ = os.path.split(path)
scene_data_path = os.path.join(
root, "scene.pickle"
)
with open(scene_data_path, "rb") as f:
scene_data = pickle.load(f)
return scene_data
@staticmethod
def get_o2c_pose(path, object_name):
scene_data = OmniUtil.get_scene_data(path)
cam_pose = OmniUtil.get_extrinsic_matrix(path)
pos = scene_data[object_name]["position"]
quat = scene_data[object_name]["rotation"]
rot = R.from_quat(quat).as_matrix()
obj_pose = np.eye(4)
obj_pose[:3, :3] = rot
obj_pose[:3, 3] = pos
obj_cam_pose = np.linalg.inv(cam_pose) @ obj_pose
return np.asarray(obj_cam_pose)
if __name__ == "__main__":
test_path = r"/mnt/h/AI/Datasets/nbv1/sample_one/scene_0/0050"
obj_list = OmniUtil.get_object_list(test_path, contains_non_obj=True)
print(obj_list)
pts = OmniUtil.get_segmented_points(test_path, target_list=obj_list)
np.savetxt("pts1.txt", pts)