基于Media+Unity的手部位姿三维位姿估计

在这里插入图片描述

使用mediapipe + Unity 手部位姿三维位姿估计

参考文章

基于Mediapipe的姿势识别并同步到Unity人体模型中

Mediapipe+Unity3d实现虚拟手_unity mediapipe-CSDN博客

需求

我的需求就是快速、准确的跟踪手部位姿并实现一个三维显示。

主要思路

搭建mdeiapipe系统，这个很简单，可以参考官方文档，配置好环境，下载一个相应的权重，然后就可以识别手部姿态了。

这里最好采用threading的方式来异步执行，因为我弄了一个小软件。

适配线程函数

处理数据的函数和可视化在同一个线程，

发送数据的线程是单独的线程

最终实现的结果是这样的

在这里插入图片描述

源码不太好贴

from queue import Queue

import mediapipe as mp
from matplotlib import pyplot as plt
from mediapipe.python.solutions.drawing_utils import draw_axis
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
from mediapipe import solutions
from mediapipe.framework.formats import landmark_pb2
import numpy as np
import cv2
from mediapipe.tasks.python.vision import HandLandmarkerResult

from run.media_hand.med_hand3d_base import MedHandInferBase
from run.media_hand.view_3d import MyView3D
from run.rootnet.root_infer import HandRootNet


class MedHandInfer( MedHandInferBase):

    def __init__(self,_update_table_signal = None,_send_hd_client= None,_img_path = None,_vid_path = None) -> object:
        super().__init__()
        self.options = self.HandLandmarkerOptions(
            base_options=self.BaseOptions(model_asset_path=self.model_path),
            running_mode =self. VisionRunningMode.LIVE_STREAM,
            result_callback = self.print_result,
            num_hands=2)

        self.video_infer_flag = True

        self.root_infer = HandRootNet()
        if _update_table_signal != None:
            self.update_table_signal_h = _update_table_signal
        if _img_path != None:
            self.img_path = _img_path
        if _vid_path != None:
            self.vid_path = _vid_path
        if _send_hd_client != None:  # 在这里获取了client类的实例，因此可以用封装包的函数
            self.send_hd_client_infer = _send_hd_client

    def med_hand_infer_img(self):
        '''执行图片推断'''
        self.options = self.HandLandmarkerOptions(
            base_options=self.BaseOptions(model_asset_path=self.model_path),
            running_mode=self.VisionRunningMode.IMAGE,
            num_hands=2)
        mp_image = mp.Image.create_from_file(self.img_path)
        mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=mp_image)
        with self.HandLandmarker.create_from_options(self.options) as landmarker:
            hand_landmarker_result = landmarker.detect(mp_image)
            print(self.show_time() + "图片的识别结果为{0}".format(hand_landmarker_result))

    def med_hand_infer_video(self):
        '''执行视频推断'''
        self.options = self.HandLandmarkerOptions(
            base_options=self.BaseOptions(model_asset_path=self.model_path),
            running_mode=self.VisionRunningMode.VIDEO,
            num_hands=2)
        cap = cv2.VideoCapture(self.vid_path)
        self.video_infer_flag = True
        while cap.isOpened():
            if self.video_infer_flag:
                ret, frame = cap.read()
                mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=frame)
                with self.HandLandmarker.create_from_options(self.options) as landmarker:
                    hand_landmarker_result = landmarker.detect(mp_image)
                    print(self.show_time() + "视频帧的识别结果为{0}".format(hand_landmarker_result))
                if not ret:
                    break


    def med_hand_infer_web(self):
        print(self.show_time() + "开始执行media相机推理")
        cap = cv2.VideoCapture(0)
        print(self.show_time() + "开始读取视频帧")
        self.video_infer_flag = True
        while True:
            if not self.video_infer_flag:
                break

            ret,frame = cap.read()
            start_time = cv2.getTickCount()
            # 因为摄像头是镜像的，所以将摄像头水平翻转
            # 不是镜像的可以不翻转
            frame = cv2.flip(frame, 1)
            frame_new = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

            # 获取视频宽度高度
            width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
            height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
            # 创建一个黑色图像
            black_image = np.zeros((height, width, 3), dtype=np.uint8)
            # 改变图像的着色模式，用于推理，但是不用于显示
            mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=frame_new)
            with self.HandLandmarker.create_from_options(self.options) as landmarker:
                # The landmarker is initialized. Use it here.
                landmarker.detect_async(mp_image,1)
                # 为了绘图获取结果
                cur_res =  self.res_que.get()
                self.h_frame_num += 1

                lm_img  = self.draw_landmarks_on_image(frame, cur_res) # 在每一帧图片上绘图
                lm_img_axis = MyView3D.my_draw_axis(black_image) # 先绘制坐标轴
                lm_img_2 = self.draw_landmarks_on_image(lm_img_axis, cur_res) # 在每一帧图片上绘图
                # 合并图片
                concatenated_image = np.hstack((lm_img, lm_img_2))
                # 关节转换可以放到线程中处理
                con_res_hlm =  self.con_res_que.get()
                # 滤波在绘图线程上进行
                fil_res_hlm, scores = self.data_filter.get_med_fil_pose(con_res_hlm, cur_res.handedness)
                print(self.show_time() + "滤波之后的数据为{}".format(fil_res_hlm))

                ############ 获取手部姿态的xmin ymin xmax ymax 并转化为像素坐标 ###########
                np_hlm_rig = np.array(con_res_hlm[0:21])
                np_hlm_left = np.array(con_res_hlm[21:])
                x_min_r = np.min(np_hlm_rig,axis=0)[0]
                y_min_r = np.min(np_hlm_rig, axis=0)[1]
                x_max_r = np.max(np_hlm_rig, axis=0)[0]
                y_max_r = np.max(np_hlm_rig, axis=0)[1]
                x_min_l = np.min(np_hlm_left, axis=0)[0]
                y_min_l = np.min(np_hlm_left, axis=0)[1]
                x_max_l = np.max(np_hlm_left, axis=0)[0]
                y_max_l = np.max(np_hlm_left, axis=0)[1]
                # 转化为像素值
                x_min_r = int(x_min_r * width); x_max_r = int(x_max_r * width); y_min_r = int(y_min_r * height); y_max_r = int(y_max_r * height);
                x_min_l = int(x_min_l * width); x_max_l = int(x_max_l * width); y_min_l = int(y_min_l * height); y_max_l = int(y_max_l * height);

                # 绘制矩形
                rec_img = cv2.rectangle(frame,(x_min_r-40,y_min_r-40),(x_max_r+40,y_max_r + 40),(0,255,0),2)
                rec_img = cv2.rectangle(rec_img,(x_min_l-40,y_min_l-40),(x_max_l+40,y_max_l + 40),(0,255,0),2)
                # 生成 bbox
                bbox_rig = [x_min_r-40, y_min_r-40, x_max_r+40, y_max_r + 40]
                bbox_left = [x_min_l-40, y_min_l-40, x_max_l+40, y_max_l + 40]

                root_depth_list =  self.root_infer.get_hand_root_depth(frame,scores,[bbox_rig,bbox_left])
                print(self.show_time() + "手部root关节的绝对深度为{}mm".format(root_depth_list))



                ############ 获取世界坐标系的手部坐标 ###########
                scal_res_hlm = self.res_scal(fil_res_hlm, width, height)
                print(self.show_time() + "计算缩放之后的数据为{}".format(scal_res_hlm))

                ############ 向客户端发送数据 发送的是滤波之后的数据 没有发送加上手腕的 ###########
                self.fil_res_que.put(fil_res_hlm) # 准备发送数据
                self.update_table_signal_h.emit(fil_res_hlm) # 更新table

                ############ 绘制滤波后的图 ###########
                fil_img = self.draw_filt_landmarks_on_image(frame, fil_res_hlm)  # 使用处理后的数据绘图 做对比
                fil_img_2 = self.draw_filt_landmarks_on_image(lm_img_axis, fil_res_hlm)  # 在每一帧图片上绘图
                concatenated_image_2 = np.hstack((fil_img, fil_img_2))

            print(self.show_time() + "已经处理了{0}帧".format(self.h_frame_num))

            # 计算时间间隔并实时更新帧率
            end_time = cv2.getTickCount()
            total_time = (end_time - start_time) / cv2.getTickFrequency()
            fps =round(1 / total_time,2)
            fps_text = f"FPS: {round(fps, 2)}"
            cv2.putText(concatenated_image, fps_text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
            cv2.imshow('MediaPipe Hands det', rec_img)
            cv2.imshow('MediaPipe Hands', concatenated_image)
            cv2.imshow('MediaPipe Hands Filter', concatenated_image_2)
            # self.draw_hand3d(fig,hand_mark_list_fil)

            if cv2.waitKey(1) & 0xFF == 27: # oh 天呐 我是沙口 原来摁一下 esc 就行
                print("停止手部姿态检测")
                break
        cap.release()
        cv2.destroyAllWindows()
        print("cv2资源已经释放")

    def send_data_packet_thm(self):
        '''线程函数 将数据封装成数据包'''
        print(self.show_time() + "进入send_data_packet_thm函数，开始发送数据！")
        while True:
            # 获取处理后的数据 这里是 double [] 类型的数据
            self.res_kps_handled = self.fil_res_que.get()
            # 将手部数据封装成字节数组 存在了 send_hd_client_infer.joint_data_packet
            self.send_hd_client_infer.create_send_packet(self.res_kps_handled) # 处理结果存在了 joint_data_packet
            self.send_hd_client_infer.send_packet(self.send_hd_client_infer.joint_data_packet) # 发送数据
#
    def draw_hand3d(self,fig,_hand_mark_list:list):
        '''废弃的 在子线程用matlip会崩溃'''
        x = []
        y = []
        z = []
        for item in _hand_mark_list:
            x.append(item[0])
            y.append(item[0])
            z.append(item[0])

        # 创建一个新的三维图形

        ax = fig.add_subplot(111, projection='3d')

        # 绘制三维散点图
        ax.scatter(x, y, z, c='b', marker='o')

        # 绘制连线 1 手腕到拇指
        for i in range(0,5): # 5个手指
            for j in range(4 * i, 4 * i + 3 ): # 循环三次即可
                if self.is_joint_exist(_hand_mark_list[j]):
                    ax.plot([x[j], x[j + 1]], [y[j], y[j + 1]], [z[j], z[j + 1]])
            for k in range(4 * i + 21, 4 * i + 24 ): # 左手
                if self.is_joint_exist(_hand_mark_list[k]):
                    ax.plot([x[k], x[k + 1]], [y[k], y[k + 1]], [z[k], z[k + 1]])

            if i == 4:
                ax.plot([x[3], x[20]], [y[2], y[20]], [z[2], z[20]])
                ax.plot([x[7], x[20]], [y[7], y[20]], [z[7], z[20]])
                ax.plot([x[11], x[20]], [y[11], y[20]], [z[11], z[20]])
                ax.plot([x[15], x[20]], [y[15], y[20]], [z[15], z[20]])
                ax.plot([x[19], x[20]], [y[19], y[20]], [z[19], z[20]])

                ax.plot([x[24], x[20]], [y[24], y[20]], [z[24], z[20]])
                ax.plot([x[28], x[20]], [y[28], y[20]], [z[28], z[20]])
                ax.plot([x[32], x[20]], [y[32], y[20]], [z[32], z[20]])
                ax.plot([x[36], x[20]], [y[36], y[20]], [z[36], z[20]])
                ax.plot([x[40], x[20]], [y[40], y[20]], [z[40], z[20]])

        # 设置图形属性
        ax.set_xlabel('X Label')
        ax.set_ylabel('Y Label')
        ax.set_zlabel('Z Label')

        # 显示图形
        plt.show()
    def is_joint_exist(self,_point:list):
        to = sum(_point)
        return False if to < 0.001 else True

# med = MedHandInfer()
# med.med_hand_infer_web()