interfaced with gsnet

This commit is contained in:
0nhc 2024-10-19 18:31:46 -05:00
parent 4b696a93d3
commit ca9b6733f0
7 changed files with 167 additions and 126 deletions

View File

@ -1,5 +1,4 @@
# Roadmap # Roadmap
* Change simulation scenes
* Interface with GS-Net * Interface with GS-Net
# Updated installation steps fo my PC environment # Updated installation steps fo my PC environment

View File

@ -2,7 +2,7 @@ bt_sim:
gui: True gui: True
gripper_force: 10 gripper_force: 10
# scene: random # scene: random
scene: $(find active_grasp)/cfg/sim/challenging_scene_1.yaml scene: $(find active_grasp)/cfg/sim/challenging_scene_2.yaml
hw: hw:
roi_calib_file: $(find active_grasp)/cfg/hw/T_base_tag.txt roi_calib_file: $(find active_grasp)/cfg/hw/T_base_tag.txt

View File

@ -2,10 +2,10 @@ center: [0.5, 0.0, 0.25]
q: [0.0, -1.39, 0.0, -2.36, 0.0, 1.57, 0.79] q: [0.0, -1.39, 0.0, -2.36, 0.0, 1.57, 0.79]
objects: objects:
- object_id: ycb/006_mustard_bottle - object_id: ycb/006_mustard_bottle
xyz: [0.0, 0.0, 1.6] xyz: [0.0, 0.0, 0.03]
rpy: [0, 0, 0] rpy: [0, 0, 0]
scale: 0.8 scale: 0.8
- object_id: active_perception/box2 - object_id: active_perception/box
xyz: [0.0, 0.0, 1.3] xyz: [0.0, 0.0, 0.0]
rpy: [0, 0, 0] rpy: [0, 0, 0]
scale: 0.85 scale: 0.8

View File

@ -0,0 +1,11 @@
center: [0.5, 0.0, 0.25]
q: [0.0, -1.39, 0.0, -2.36, 0.0, 1.57, 0.79]
objects:
- object_id: ycb/006_mustard_bottle
xyz: [0.0, 0.0, 0.03]
rpy: [0, 0, 0]
scale: 0.8
- object_id: active_perception/box2
xyz: [0.19, 0.0, 0.0]
rpy: [0, 0, 0]
scale: 1.2

View File

@ -8,5 +8,5 @@ register("top-view", TopView)
register("top-trajectory", TopTrajectory) register("top-trajectory", TopTrajectory)
register("fixed-trajectory", FixedTrajectory) register("fixed-trajectory", FixedTrajectory)
register("nbv", NextBestView) register("nbv", NextBestView)
register("ap-multi-view", ActivePerceptionMultiViewPolicy) # register("ap-multi-view", ActivePerceptionMultiViewPolicy)
register("ap-single-view", ActivePerceptionSingleViewPolicy) register("ap-single-view", ActivePerceptionSingleViewPolicy)

View File

@ -8,9 +8,9 @@ from .active_perception_demo import APInferenceEngine
from robot_helpers.spatial import Transform from robot_helpers.spatial import Transform
import torch import torch
import torch.nn.functional as F import torch.nn.functional as F
import requests
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from vgn.grasp import ParallelJawGrasp
class RealTime3DVisualizer: class RealTime3DVisualizer:
@ -51,144 +51,150 @@ class RealTime3DVisualizer:
plt.pause(0.001) plt.pause(0.001)
class ActivePerceptionMultiViewPolicy(MultiViewPolicy): # class ActivePerceptionMultiViewPolicy(MultiViewPolicy):
def __init__(self): # def __init__(self):
super().__init__() # super().__init__()
self.max_views = rospy.get_param("ap_grasp/max_views") # self.max_views = rospy.get_param("ap_grasp/max_views")
self.ap_config_path = rospy.get_param("ap_grasp/ap_config_path") # self.ap_config_path = rospy.get_param("ap_grasp/ap_config_path")
self.max_inference_num = rospy.get_param("ap_grasp/max_inference_num") # self.max_inference_num = rospy.get_param("ap_grasp/max_inference_num")
self.ap_inference_engine = APInferenceEngine(self.ap_config_path) # self.ap_inference_engine = APInferenceEngine(self.ap_config_path)
self.pcdvis = RealTime3DVisualizer() # self.pcdvis = RealTime3DVisualizer()
def update(self, img, seg, target_id, x, q): # def update(self, img, seg, target_id, x, q):
if len(self.views) > self.max_views or self.best_grasp_prediction_is_stable(): # if len(self.views) > self.max_views or self.best_grasp_prediction_is_stable():
self.done = True # self.done = True
else: # else:
with Timer("state_update"): # with Timer("state_update"):
self.integrate(img, x, q) # self.integrate(img, x, q)
# When policy hasn't produced an available grasp # # When policy hasn't produced an available grasp
c = 0 # c = 0
while(c < self.max_inference_num): # while(c < self.max_inference_num):
# Inference with our model # # Inference with our model
target_points, scene_points = self.depth_image_to_ap_input(img, seg, target_id) # target_points, scene_points = self.depth_image_to_ap_input(img, seg, target_id)
ap_input = {'target_pts': target_points, # ap_input = {'target_pts': target_points,
'scene_pts': scene_points} # 'scene_pts': scene_points}
ap_output = self.ap_inference_engine.inference(ap_input) # ap_output = self.ap_inference_engine.inference(ap_input)
c += 1 # c += 1
delta_rot_6d = ap_output['estimated_delta_rot_6d'] # delta_rot_6d = ap_output['estimated_delta_rot_6d']
current_cam_pose = torch.from_numpy(x.as_matrix()).float().to("cuda:0") # current_cam_pose = torch.from_numpy(x.as_matrix()).float().to("cuda:0")
est_delta_rot_mat = self.rotation_6d_to_matrix_tensor_batch(delta_rot_6d)[0] # est_delta_rot_mat = self.rotation_6d_to_matrix_tensor_batch(delta_rot_6d)[0]
look_at_center = torch.from_numpy(self.bbox.center).float().to("cuda:0") # look_at_center = torch.from_numpy(self.bbox.center).float().to("cuda:0")
nbv_tensor = self.get_transformed_mat(current_cam_pose, # nbv_tensor = self.get_transformed_mat(current_cam_pose,
est_delta_rot_mat, # est_delta_rot_mat,
look_at_center) # look_at_center)
nbv_numpy = nbv_tensor.cpu().numpy() # nbv_numpy = nbv_tensor.cpu().numpy()
nbv_transform = Transform.from_matrix(nbv_numpy) # nbv_transform = Transform.from_matrix(nbv_numpy)
x_d = nbv_transform # x_d = nbv_transform
# Check if this pose available # # Check if this pose available
if(self.solve_cam_ik(self.q0, x_d)): # if(self.solve_cam_ik(self.q0, x_d)):
self.x_d = x_d # self.x_d = x_d
self.updated = True # self.updated = True
print("Found an NBV!") # print("Found an NBV!")
break # break
def vis_cam_pose(self, x): # def vis_cam_pose(self, x):
# Integrate # # Integrate
self.views.append(x) # self.views.append(x)
self.vis.path(self.base_frame, self.intrinsic, self.views) # self.vis.path(self.base_frame, self.intrinsic, self.views)
def vis_scene_cloud(self, img, x): # def vis_scene_cloud(self, img, x):
self.tsdf.integrate(img, self.intrinsic, x.inv() * self.T_base_task) # self.tsdf.integrate(img, self.intrinsic, x.inv() * self.T_base_task)
scene_cloud = self.tsdf.get_scene_cloud() # scene_cloud = self.tsdf.get_scene_cloud()
self.vis.scene_cloud(self.task_frame, np.asarray(scene_cloud.points)) # self.vis.scene_cloud(self.task_frame, np.asarray(scene_cloud.points))
def rotation_6d_to_matrix_tensor_batch(self, d6: torch.Tensor) -> torch.Tensor: # def rotation_6d_to_matrix_tensor_batch(self, d6: torch.Tensor) -> torch.Tensor:
a1, a2 = d6[..., :3], d6[..., 3:] # a1, a2 = d6[..., :3], d6[..., 3:]
b1 = F.normalize(a1, dim=-1) # b1 = F.normalize(a1, dim=-1)
b2 = a2 - (b1 * a2).sum(-1, keepdim=True) * b1 # b2 = a2 - (b1 * a2).sum(-1, keepdim=True) * b1
b2 = F.normalize(b2, dim=-1) # b2 = F.normalize(b2, dim=-1)
b3 = torch.cross(b1, b2, dim=-1) # b3 = torch.cross(b1, b2, dim=-1)
return torch.stack((b1, b2, b3), dim=-2) # return torch.stack((b1, b2, b3), dim=-2)
def get_transformed_mat(self, src_mat, delta_rot, target_center_w): # def get_transformed_mat(self, src_mat, delta_rot, target_center_w):
src_rot = src_mat[:3, :3] # src_rot = src_mat[:3, :3]
dst_rot = src_rot @ delta_rot.T # dst_rot = src_rot @ delta_rot.T
dst_mat = torch.eye(4).to(dst_rot.device) # dst_mat = torch.eye(4).to(dst_rot.device)
dst_mat[:3, :3] = dst_rot # dst_mat[:3, :3] = dst_rot
distance = torch.norm(target_center_w - src_mat[:3, 3]) # distance = torch.norm(target_center_w - src_mat[:3, 3])
z_axis_camera = dst_rot[:3, 2].reshape(-1) # z_axis_camera = dst_rot[:3, 2].reshape(-1)
new_camera_position_w = target_center_w - distance * z_axis_camera # new_camera_position_w = target_center_w - distance * z_axis_camera
dst_mat[:3, 3] = new_camera_position_w # dst_mat[:3, 3] = new_camera_position_w
return dst_mat # return dst_mat
def depth_image_to_ap_input(self, depth_img, seg_img, target_id): # def depth_image_to_ap_input(self, depth_img, seg_img, target_id):
target_points = [] # target_points = []
scene_points = [] # scene_points = []
K = self.intrinsic.K # K = self.intrinsic.K
depth_shape = depth_img.shape # depth_shape = depth_img.shape
seg_shape = seg_img.shape # seg_shape = seg_img.shape
if(depth_shape == seg_shape): # if(depth_shape == seg_shape):
img_shape = depth_shape # img_shape = depth_shape
else: # else:
print("Depth image shape and segmentation image shape are not the same") # print("Depth image shape and segmentation image shape are not the same")
return None # return None
# Convert depth image to 3D points # # Convert depth image to 3D points
u_indices , v_indices = np.meshgrid(np.arange(img_shape[1]), np.arange(img_shape[0])) # u_indices , v_indices = np.meshgrid(np.arange(img_shape[1]), np.arange(img_shape[0]))
x_factors = (u_indices - K[0, 2]) / K[0, 0] # x_factors = (u_indices - K[0, 2]) / K[0, 0]
y_factors = (v_indices - K[1, 2]) / K[1, 1] # y_factors = (v_indices - K[1, 2]) / K[1, 1]
z_mat = depth_img # z_mat = depth_img
x_mat = x_factors * z_mat # x_mat = x_factors * z_mat
y_mat = y_factors * z_mat # y_mat = y_factors * z_mat
for i in range(img_shape[0]): # for i in range(img_shape[0]):
for j in range(img_shape[1]): # for j in range(img_shape[1]):
seg_id = seg_img[i, j] # seg_id = seg_img[i, j]
x = x_mat[i][j] # x = x_mat[i][j]
y = y_mat[i][j] # y = y_mat[i][j]
z = z_mat[i][j] # z = z_mat[i][j]
if(int(seg_id) == int(target_id)): # if(int(seg_id) == int(target_id)):
# This pixel belongs to the target object to be grasped # # This pixel belongs to the target object to be grasped
target_points.append([x,y,z]) # target_points.append([x,y,z])
else: # else:
# This pixel belongs to the scene # # This pixel belongs to the scene
scene_points.append([x,y,z]) # scene_points.append([x,y,z])
target_points = np.asarray(target_points) # target_points = np.asarray(target_points)
target_points = target_points.reshape(1, target_points.shape[0], 3) # target_points = target_points.reshape(1, target_points.shape[0], 3)
# self.pcdvis.update_points(target_points) # # self.pcdvis.update_points(target_points)
target_points = torch.from_numpy(target_points).float().to("cuda:0") # target_points = torch.from_numpy(target_points).float().to("cuda:0")
scene_points = np.asarray(scene_points) # scene_points = np.asarray(scene_points)
scene_points = scene_points.reshape(1, scene_points.shape[0], 3) # scene_points = scene_points.reshape(1, scene_points.shape[0], 3)
scene_points = torch.from_numpy(scene_points).float().to("cuda:0") # scene_points = torch.from_numpy(scene_points).float().to("cuda:0")
return target_points, scene_points # return target_points, scene_points
def best_grasp_prediction_is_stable(self): # def best_grasp_prediction_is_stable(self):
if self.best_grasp: # if self.best_grasp:
t = (self.T_task_base * self.best_grasp.pose).translation # t = (self.T_task_base * self.best_grasp.pose).translation
i, j, k = (t / self.tsdf.voxel_size).astype(int) # i, j, k = (t / self.tsdf.voxel_size).astype(int)
qs = self.qual_hist[:, i, j, k] # qs = self.qual_hist[:, i, j, k]
if np.count_nonzero(qs) == self.T and np.mean(qs) > 0.9: # if np.count_nonzero(qs) == self.T and np.mean(qs) > 0.9:
return True # return True
return False # return False
class ActivePerceptionSingleViewPolicy(SingleViewPolicy): class ActivePerceptionSingleViewPolicy(SingleViewPolicy):
def __init__(self): def __init__(self, flask_base_url="http://127.0.0.1:5000"):
super().__init__() super().__init__()
self.max_views = rospy.get_param("ap_grasp/max_views") self.max_views = rospy.get_param("ap_grasp/max_views")
self.ap_config_path = rospy.get_param("ap_grasp/ap_config_path") self.ap_config_path = rospy.get_param("ap_grasp/ap_config_path")
self.ap_inference_engine = APInferenceEngine(self.ap_config_path) self.ap_inference_engine = APInferenceEngine(self.ap_config_path)
self.pcdvis = RealTime3DVisualizer() self.pcdvis = RealTime3DVisualizer()
self.updated = False self.updated = False
self._base_url = flask_base_url
def request_grasping_pose(self, data):
response = requests.post(f"{self._base_url}/get_gsnet_grasp", json=data)
return response.json()
def update(self, img, seg, target_id, x, q): def update(self, img, seg, target_id, x, q):
@ -201,8 +207,8 @@ class ActivePerceptionSingleViewPolicy(SingleViewPolicy):
# When policy hasn't produced an available grasp # When policy hasn't produced an available grasp
while(self.updated == False): while(self.updated == False):
# Inference with our model # Inference with our model
target_points, scene_points = self.depth_image_to_ap_input(img, seg, target_id) self.target_points, scene_points = self.depth_image_to_ap_input(img, seg, target_id)
ap_input = {'target_pts': target_points, ap_input = {'target_pts': self.target_points,
'scene_pts': scene_points} 'scene_pts': scene_points}
ap_output = self.ap_inference_engine.inference(ap_input) ap_output = self.ap_inference_engine.inference(ap_input)
delta_rot_6d = ap_output['estimated_delta_rot_6d'] delta_rot_6d = ap_output['estimated_delta_rot_6d']
@ -210,7 +216,7 @@ class ActivePerceptionSingleViewPolicy(SingleViewPolicy):
current_cam_pose = torch.from_numpy(x.as_matrix()).float().to("cuda:0") current_cam_pose = torch.from_numpy(x.as_matrix()).float().to("cuda:0")
est_delta_rot_mat = self.rotation_6d_to_matrix_tensor_batch(delta_rot_6d)[0] est_delta_rot_mat = self.rotation_6d_to_matrix_tensor_batch(delta_rot_6d)[0]
target_points_np = target_points.cpu().numpy()[0,:,:] target_points_np = self.target_points.cpu().numpy()[0,:,:]
central_point_of_target = np.mean(target_points_np, axis=0) central_point_of_target = np.mean(target_points_np, axis=0)
look_at_center = torch.from_numpy(central_point_of_target).float().to("cuda:0") look_at_center = torch.from_numpy(central_point_of_target).float().to("cuda:0")
# Convert look_at_center's reference frame to arm frame # Convert look_at_center's reference frame to arm frame
@ -234,9 +240,18 @@ class ActivePerceptionSingleViewPolicy(SingleViewPolicy):
self.updated = True self.updated = True
print("Found an NBV!") print("Found an NBV!")
return return
# Policy has produced an available grasp # Policy has produced an available nbv
if(self.updated == True): if(self.updated == True):
self.generate_grasp(q) data = np.asarray(self.target_points.cpu().numpy())[0].tolist()
best_grasp_T = np.asarray(self.request_grasping_pose(data))
current_cam_pose = torch.from_numpy(x.as_matrix()).float().to("cuda:0")
best_grasp_T = current_cam_pose.cpu().numpy() @ best_grasp_T
pose = Transform.from_matrix(best_grasp_T)
width = 0.1
grasp = ParallelJawGrasp(pose, width)
self.best_grasp = grasp
self.vis.grasp(self.base_frame, self.best_grasp, 0.9)
# self.generate_grasp(q)
self.done = True self.done = True
def deactivate(self): def deactivate(self):
@ -253,6 +268,19 @@ class ActivePerceptionSingleViewPolicy(SingleViewPolicy):
scene_cloud = self.tsdf.get_scene_cloud() scene_cloud = self.tsdf.get_scene_cloud()
self.vis.scene_cloud(self.task_frame, np.asarray(scene_cloud.points)) self.vis.scene_cloud(self.task_frame, np.asarray(scene_cloud.points))
def generate_gsnet_grasp(self, q):
tsdf_grid = self.tsdf.get_grid()
out = self.vgn.predict(tsdf_grid)
self.vis.quality(self.task_frame, self.tsdf.voxel_size, out.qual, 0.9)
grasps, qualities = self.filter_grasps(out, q)
if len(grasps) > 0:
self.best_grasp, quality = self.select_best_grasp(grasps, qualities)
self.vis.grasp(self.base_frame, self.best_grasp, quality)
else:
self.best_grasp = None
self.vis.clear_grasp()
def generate_grasp(self, q): def generate_grasp(self, q):
tsdf_grid = self.tsdf.get_grid() tsdf_grid = self.tsdf.get_grid()
out = self.vgn.predict(tsdf_grid) out = self.vgn.predict(tsdf_grid)

View File

@ -107,7 +107,7 @@ class GraspController:
self.view_sphere = ViewHalfSphere(bbox, self.min_z_dist) self.view_sphere = ViewHalfSphere(bbox, self.min_z_dist)
self.policy.activate(bbox, self.view_sphere) self.policy.activate(bbox, self.view_sphere)
timer = rospy.Timer(rospy.Duration(1.0 / self.control_rate), self.send_vel_cmd) timer = rospy.Timer(rospy.Duration(1.0 / self.control_rate), self.send_vel_cmd)
r = rospy.Rate(self.policy_rate) r = rospy.Rate(self.control_rate)
if(self.policy.policy_type=="single_view"): if(self.policy.policy_type=="single_view"):
while not self.policy.done: while not self.policy.done:
@ -127,6 +127,9 @@ class GraspController:
# Arrived # Arrived
moving_to_The_target = False moving_to_The_target = False
r.sleep() r.sleep()
# sleep 3s
for i in range(self.control_rate*3):
r.sleep()
elif(self.policy.policy_type=="multi_view"): elif(self.policy.policy_type=="multi_view"):
while not self.policy.done: while not self.policy.done:
depth_img, seg_image, pose, q = self.get_state() depth_img, seg_image, pose, q = self.get_state()