2024-10-09 16:13:22 +00:00

120 lines
6.6 KiB
Python
Executable File

import numpy as np
import os
from PIL import Image
import scipy.io as scio
import sys
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.append(ROOT_DIR)
from utils.data_utils import get_workspace_mask, CameraInfo, create_point_cloud_from_depth_image
from knn.knn_modules import knn
import torch
from graspnetAPI.utils.xmlhandler import xmlReader
from graspnetAPI.utils.utils import get_obj_pose_list, transform_points
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('--dataset_root', default=None, required=True)
parser.add_argument('--camera_type', default='kinect', help='Camera split [realsense/kinect]')
if __name__ == '__main__':
cfgs = parser.parse_args()
dataset_root = cfgs.dataset_root # set dataset root
camera_type = cfgs.camera_type # kinect / realsense
save_path_root = os.path.join(dataset_root, 'graspness')
num_views, num_angles, num_depths = 300, 12, 4
fric_coef_thresh = 0.8
point_grasp_num = num_views * num_angles * num_depths
for scene_id in range(100):
save_path = os.path.join(save_path_root, 'scene_' + str(scene_id).zfill(4), camera_type)
if not os.path.exists(save_path):
os.makedirs(save_path)
labels = np.load(
os.path.join(dataset_root, 'collision_label', 'scene_' + str(scene_id).zfill(4), 'collision_labels.npz'))
collision_dump = []
for j in range(len(labels)):
collision_dump.append(labels['arr_{}'.format(j)])
for ann_id in range(256):
# get scene point cloud
print('generating scene: {} ann: {}'.format(scene_id, ann_id))
depth = np.array(Image.open(os.path.join(dataset_root, 'scenes', 'scene_' + str(scene_id).zfill(4),
camera_type, 'depth', str(ann_id).zfill(4) + '.png')))
seg = np.array(Image.open(os.path.join(dataset_root, 'scenes', 'scene_' + str(scene_id).zfill(4),
camera_type, 'label', str(ann_id).zfill(4) + '.png')))
meta = scio.loadmat(os.path.join(dataset_root, 'scenes', 'scene_' + str(scene_id).zfill(4),
camera_type, 'meta', str(ann_id).zfill(4) + '.mat'))
intrinsic = meta['intrinsic_matrix']
factor_depth = meta['factor_depth']
camera = CameraInfo(1280.0, 720.0, intrinsic[0][0], intrinsic[1][1], intrinsic[0][2], intrinsic[1][2],
factor_depth)
cloud = create_point_cloud_from_depth_image(depth, camera, organized=True)
# remove outlier and get objectness label
depth_mask = (depth > 0)
camera_poses = np.load(os.path.join(dataset_root, 'scenes', 'scene_' + str(scene_id).zfill(4),
camera_type, 'camera_poses.npy'))
camera_pose = camera_poses[ann_id]
align_mat = np.load(os.path.join(dataset_root, 'scenes', 'scene_' + str(scene_id).zfill(4),
camera_type, 'cam0_wrt_table.npy'))
trans = np.dot(align_mat, camera_pose)
workspace_mask = get_workspace_mask(cloud, seg, trans=trans, organized=True, outlier=0.02)
mask = (depth_mask & workspace_mask)
cloud_masked = cloud[mask]
objectness_label = seg[mask]
# get scene object and grasp info
scene_reader = xmlReader(os.path.join(dataset_root, 'scenes', 'scene_' + str(scene_id).zfill(4),
camera_type, 'annotations', '%04d.xml' % ann_id))
pose_vectors = scene_reader.getposevectorlist()
obj_list, pose_list = get_obj_pose_list(camera_pose, pose_vectors)
grasp_labels = {}
for i in obj_list:
file = np.load(os.path.join(dataset_root, 'grasp_label', '{}_labels.npz'.format(str(i).zfill(3))))
grasp_labels[i] = (file['points'].astype(np.float32), file['offsets'].astype(np.float32),
file['scores'].astype(np.float32))
grasp_points = []
grasp_points_graspness = []
for i, (obj_idx, trans_) in enumerate(zip(obj_list, pose_list)):
sampled_points, offsets, fric_coefs = grasp_labels[obj_idx]
collision = collision_dump[i] # Npoints * num_views * num_angles * num_depths
num_points = sampled_points.shape[0]
valid_grasp_mask = ((fric_coefs <= fric_coef_thresh) & (fric_coefs > 0) & ~collision)
valid_grasp_mask = valid_grasp_mask.reshape(num_points, -1)
graspness = np.sum(valid_grasp_mask, axis=1) / point_grasp_num
target_points = transform_points(sampled_points, trans_)
target_points = transform_points(target_points, np.linalg.inv(camera_pose)) # fix bug
grasp_points.append(target_points)
grasp_points_graspness.append(graspness.reshape(num_points, 1))
grasp_points = np.vstack(grasp_points)
grasp_points_graspness = np.vstack(grasp_points_graspness)
grasp_points = torch.from_numpy(grasp_points).cuda()
grasp_points_graspness = torch.from_numpy(grasp_points_graspness).cuda()
grasp_points = grasp_points.transpose(0, 1).contiguous().unsqueeze(0)
masked_points_num = cloud_masked.shape[0]
cloud_masked_graspness = np.zeros((masked_points_num, 1))
part_num = int(masked_points_num / 10000)
for i in range(1, part_num + 2): # lack of cuda memory
if i == part_num + 1:
cloud_masked_partial = cloud_masked[10000 * part_num:]
if len(cloud_masked_partial) == 0:
break
else:
cloud_masked_partial = cloud_masked[10000 * (i - 1):(i * 10000)]
cloud_masked_partial = torch.from_numpy(cloud_masked_partial).cuda()
cloud_masked_partial = cloud_masked_partial.transpose(0, 1).contiguous().unsqueeze(0)
nn_inds = knn(grasp_points, cloud_masked_partial, k=1).squeeze() - 1
cloud_masked_graspness[10000 * (i - 1):(i * 10000)] = torch.index_select(
grasp_points_graspness, 0, nn_inds).cpu().numpy()
max_graspness = np.max(cloud_masked_graspness)
min_graspness = np.min(cloud_masked_graspness)
cloud_masked_graspness = (cloud_masked_graspness - min_graspness) / (max_graspness - min_graspness)
np.save(os.path.join(save_path, str(ann_id).zfill(4) + '.npy'), cloud_masked_graspness)