使用mediapipe + Unity 手部位姿三维位姿估计
参考文章
基于Mediapipe的姿势识别并同步到Unity人体模型中
Mediapipe+Unity3d实现虚拟手_unity mediapipe-CSDN博客
需求
我的需求就是快速、准确的跟踪手部位姿并实现一个三维显示。
主要思路
搭建mdeiapipe系统,这个很简单,可以参考官方文档,配置好环境,下载一个相应的权重,然后就可以识别手部姿态了。
这里最好采用threading的方式来异步执行,因为我弄了一个小软件。
适配线程函数
处理数据的函数和可视化在同一个线程,
发送数据的线程是单独的线程
最终实现的结果是这样的
源码不太好贴
from queue import Queue
import mediapipe as mp
from matplotlib import pyplot as plt
from mediapipe.python.solutions.drawing_utils import draw_axis
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
from mediapipe import solutions
from mediapipe.framework.formats import landmark_pb2
import numpy as np
import cv2
from mediapipe.tasks.python.vision import HandLandmarkerResult
from run.media_hand.med_hand3d_base import MedHandInferBase
from run.media_hand.view_3d import MyView3D
from run.rootnet.root_infer import HandRootNet
class MedHandInfer( MedHandInferBase):
def __init__(self,_update_table_signal = None,_send_hd_client= None,_img_path = None,_vid_path = None) -> object:
super().__init__()
self.options = self.HandLandmarkerOptions(
base_options=self.BaseOptions(model_asset_path=self.model_path),
running_mode =self. VisionRunningMode.LIVE_STREAM,
result_callback = self.print_result,
num_hands=2)
self.video_infer_flag = True
self.root_infer = HandRootNet()
if _update_table_signal != None:
self.update_table_signal_h = _update_table_signal
if _img_path != None:
self.img_path = _img_path
if _vid_path != None:
self.vid_path = _vid_path
if _send_hd_client != None: # 在这里获取了client类的实例,因此可以用封装包的函数
self.send_hd_client_infer = _send_hd_client
def med_hand_infer_img(self):
'''执行图片推断'''
self.options = self.HandLandmarkerOptions(
base_options=self.BaseOptions(model_asset_path=self.model_path),
running_mode=self.VisionRunningMode.IMAGE,
num_hands=2)
mp_image = mp.Image.create_from_file(self.img_path)
mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=mp_image)
with self.HandLandmarker.create_from_options(self.options) as landmarker:
hand_landmarker_result = landmarker.detect(mp_image)
print(self.show_time() + "图片的识别结果为{0}".format(hand_landmarker_result))
def med_hand_infer_video(self):
'''执行视频推断'''
self.options = self.HandLandmarkerOptions(
base_options=self.BaseOptions(model_asset_path=self.model_path),
running_mode=self.VisionRunningMode.VIDEO,
num_hands=2)
cap = cv2.VideoCapture(self.vid_path)
self.video_infer_flag = True
while cap.isOpened():
if self.video_infer_flag:
ret, frame = cap.read()
mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=frame)
with self.HandLandmarker.create_from_options(self.options) as landmarker:
hand_landmarker_result = landmarker.detect(mp_image)
print(self.show_time() + "视频帧的识别结果为{0}".format(hand_landmarker_result))
if not ret:
break
def med_hand_infer_web(self):
print(self.show_time() + "开始执行media相机推理")
cap = cv2.VideoCapture(0)
print(self.show_time() + "开始读取视频帧")
self.video_infer_flag = True
while True:
if not self.video_infer_flag:
break
ret,frame = cap.read()
start_time = cv2.getTickCount()
# 因为摄像头是镜像的,所以将摄像头水平翻转
# 不是镜像的可以不翻转
frame = cv2.flip(frame, 1)
frame_new = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
# 获取视频宽度高度
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
# 创建一个黑色图像
black_image = np.zeros((height, width, 3), dtype=np.uint8)
# 改变图像的着色模式,用于推理,但是不用于显示
mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=frame_new)
with self.HandLandmarker.create_from_options(self.options) as landmarker:
# The landmarker is initialized. Use it here.
landmarker.detect_async(mp_image,1)
# 为了绘图获取结果
cur_res = self.res_que.get()
self.h_frame_num += 1
lm_img = self.draw_landmarks_on_image(frame, cur_res) # 在每一帧图片上绘图
lm_img_axis = MyView3D.my_draw_axis(black_image) # 先绘制坐标轴
lm_img_2 = self.draw_landmarks_on_image(lm_img_axis, cur_res) # 在每一帧图片上绘图
# 合并图片
concatenated_image = np.hstack((lm_img, lm_img_2))
# 关节转换可以放到线程中处理
con_res_hlm = self.con_res_que.get()
# 滤波在绘图线程上进行
fil_res_hlm, scores = self.data_filter.get_med_fil_pose(con_res_hlm, cur_res.handedness)
print(self.show_time() + "滤波之后的数据为{}".format(fil_res_hlm))
############ 获取手部姿态的xmin ymin xmax ymax 并转化为像素坐标 ###########
np_hlm_rig = np.array(con_res_hlm[0:21])
np_hlm_left = np.array(con_res_hlm[21:])
x_min_r = np.min(np_hlm_rig,axis=0)[0]
y_min_r = np.min(np_hlm_rig, axis=0)[1]
x_max_r = np.max(np_hlm_rig, axis=0)[0]
y_max_r = np.max(np_hlm_rig, axis=0)[1]
x_min_l = np.min(np_hlm_left, axis=0)[0]
y_min_l = np.min(np_hlm_left, axis=0)[1]
x_max_l = np.max(np_hlm_left, axis=0)[0]
y_max_l = np.max(np_hlm_left, axis=0)[1]
# 转化为像素值
x_min_r = int(x_min_r * width); x_max_r = int(x_max_r * width); y_min_r = int(y_min_r * height); y_max_r = int(y_max_r * height);
x_min_l = int(x_min_l * width); x_max_l = int(x_max_l * width); y_min_l = int(y_min_l * height); y_max_l = int(y_max_l * height);
# 绘制矩形
rec_img = cv2.rectangle(frame,(x_min_r-40,y_min_r-40),(x_max_r+40,y_max_r + 40),(0,255,0),2)
rec_img = cv2.rectangle(rec_img,(x_min_l-40,y_min_l-40),(x_max_l+40,y_max_l + 40),(0,255,0),2)
# 生成 bbox
bbox_rig = [x_min_r-40, y_min_r-40, x_max_r+40, y_max_r + 40]
bbox_left = [x_min_l-40, y_min_l-40, x_max_l+40, y_max_l + 40]
root_depth_list = self.root_infer.get_hand_root_depth(frame,scores,[bbox_rig,bbox_left])
print(self.show_time() + "手部root关节的绝对深度为{}mm".format(root_depth_list))
############ 获取世界坐标系的手部坐标 ###########
scal_res_hlm = self.res_scal(fil_res_hlm, width, height)
print(self.show_time() + "计算缩放之后的数据为{}".format(scal_res_hlm))
############ 向客户端发送数据 发送的是滤波之后的数据 没有发送加上手腕的 ###########
self.fil_res_que.put(fil_res_hlm) # 准备发送数据
self.update_table_signal_h.emit(fil_res_hlm) # 更新table
############ 绘制滤波后的图 ###########
fil_img = self.draw_filt_landmarks_on_image(frame, fil_res_hlm) # 使用处理后的数据绘图 做对比
fil_img_2 = self.draw_filt_landmarks_on_image(lm_img_axis, fil_res_hlm) # 在每一帧图片上绘图
concatenated_image_2 = np.hstack((fil_img, fil_img_2))
print(self.show_time() + "已经处理了{0}帧".format(self.h_frame_num))
# 计算时间间隔并实时更新帧率
end_time = cv2.getTickCount()
total_time = (end_time - start_time) / cv2.getTickFrequency()
fps =round(1 / total_time,2)
fps_text = f"FPS: {round(fps, 2)}"
cv2.putText(concatenated_image, fps_text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
cv2.imshow('MediaPipe Hands det', rec_img)
cv2.imshow('MediaPipe Hands', concatenated_image)
cv2.imshow('MediaPipe Hands Filter', concatenated_image_2)
# self.draw_hand3d(fig,hand_mark_list_fil)
if cv2.waitKey(1) & 0xFF == 27: # oh 天呐 我是沙口 原来摁一下 esc 就行
print("停止手部姿态检测")
break
cap.release()
cv2.destroyAllWindows()
print("cv2资源已经释放")
def send_data_packet_thm(self):
'''线程函数 将数据封装成数据包'''
print(self.show_time() + "进入send_data_packet_thm函数,开始发送数据!")
while True:
# 获取处理后的数据 这里是 double [] 类型的数据
self.res_kps_handled = self.fil_res_que.get()
# 将手部数据封装成字节数组 存在了 send_hd_client_infer.joint_data_packet
self.send_hd_client_infer.create_send_packet(self.res_kps_handled) # 处理结果存在了 joint_data_packet
self.send_hd_client_infer.send_packet(self.send_hd_client_infer.joint_data_packet) # 发送数据
#
def draw_hand3d(self,fig,_hand_mark_list:list):
'''废弃的 在子线程用matlip会崩溃'''
x = []
y = []
z = []
for item in _hand_mark_list:
x.append(item[0])
y.append(item[0])
z.append(item[0])
# 创建一个新的三维图形
ax = fig.add_subplot(111, projection='3d')
# 绘制三维散点图
ax.scatter(x, y, z, c='b', marker='o')
# 绘制连线 1 手腕到拇指
for i in range(0,5): # 5个手指
for j in range(4 * i, 4 * i + 3 ): # 循环三次即可
if self.is_joint_exist(_hand_mark_list[j]):
ax.plot([x[j], x[j + 1]], [y[j], y[j + 1]], [z[j], z[j + 1]])
for k in range(4 * i + 21, 4 * i + 24 ): # 左手
if self.is_joint_exist(_hand_mark_list[k]):
ax.plot([x[k], x[k + 1]], [y[k], y[k + 1]], [z[k], z[k + 1]])
if i == 4:
ax.plot([x[3], x[20]], [y[2], y[20]], [z[2], z[20]])
ax.plot([x[7], x[20]], [y[7], y[20]], [z[7], z[20]])
ax.plot([x[11], x[20]], [y[11], y[20]], [z[11], z[20]])
ax.plot([x[15], x[20]], [y[15], y[20]], [z[15], z[20]])
ax.plot([x[19], x[20]], [y[19], y[20]], [z[19], z[20]])
ax.plot([x[24], x[20]], [y[24], y[20]], [z[24], z[20]])
ax.plot([x[28], x[20]], [y[28], y[20]], [z[28], z[20]])
ax.plot([x[32], x[20]], [y[32], y[20]], [z[32], z[20]])
ax.plot([x[36], x[20]], [y[36], y[20]], [z[36], z[20]])
ax.plot([x[40], x[20]], [y[40], y[20]], [z[40], z[20]])
# 设置图形属性
ax.set_xlabel('X Label')
ax.set_ylabel('Y Label')
ax.set_zlabel('Z Label')
# 显示图形
plt.show()
def is_joint_exist(self,_point:list):
to = sum(_point)
return False if to < 0.001 else True
# med = MedHandInfer()
# med.med_hand_infer_web()