最近想试一下关键点检测的效果,先从yolov8-pose开始,不想跑coco那么大的数据集,就找了两个比较小的
yolov8-pose的txt数据格式如下:
类别、box、节点,数据做了归一化
可视化只显示了点,没有连线
参数:
mat_path 是 数据集mat文件所在地址,包含mat文件名
image_path 是 数据集图像的地址,不包含图像名
save_path 是 转换为txt后保存的地址
save_path 是可视化结果保存的地址
1、lsp
LSP:运动场景,单人数据集,截取后的单人区域,图片很小,2000张图片,14个节点
def save_joints_lsp(mat_path, image_path, save_path,save_path1):
"""
mat_path 是 lsp数据集mat文件所在地址,包含mat文件名
image_path 是 lsp数据集图像的地址,不包含图像名
save_path 是 转换为txt后保存的地址
save_path 是可视化结果保存的地址
lsp数据集共2000张图片
"""
joints = loadmat(mat_path)
joints = joints["joints"].transpose(2, 0, 1)
joints = joints[:, :, :]
#num = 0
for img_path in glob.glob("%s/*.jpg" % image_path):
img_name = img_path.split("/")[-1].split(".")[0]
img = Image.open(img_path)
img = np.array(img, dtype=np.uint8)
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
imgh, imgw = img.shape[:2]
num = int(img_name[2:])
cen_points = joints[num-1, ...]
points_num = cen_points.shape[-1]
point_dict = {}
ps = []
for points_ in range(points_num):
point_x = cen_points[0, points_]
point_y = cen_points[1, points_]
vi = cen_points[2, points_]
if vi==0:
vi = 2.0
elif vi==2:
print(name)
point_dict[str(points_)] = [point_x/imgw, point_y/imgh,vi]
# cv2.circle(img, (int(point_x), int(point_y)), 5, colors[points_],
# thickness=-1)
ps.append([int(point_x), int(point_y)])
# x, y, w, h = cv2.boundingRect(np.array([ps]))
# x = (x+w/2)/imgw
# y = (y+h/2)/imgh
# w = (w+6)/imgw
# h = (h+6)/imgh
x =0.5
y = 0.5
w =1
h=1
with open(os.path.join(save_path, img_name + ".txt"), "w") as f:
f.write(str(0)+" "+str(x)+" "+str(y)+" "+str(w)+" "+str(h))
cv2.rectangle(img,(int(x*imgw-w*imgw/2),int(y*imgh-h*imgh/2)),
(int(x*imgw+w*imgw/2),int(y*imgh+h*imgh/2)),
(0,0,255),5)
for i in point_dict:
p = point_dict[i]
f.write(" "+str(p[0]) + " " + str(p[1]) + " " + str(p[2]))
cv2.circle(img, (int(p[0]*imgw), int(p[1]*imgh)), 5, colors[points_],
thickness=-1)
f.write("\n")
#img_txt.write(str(point_dict))
f.close()
#num += 1
# 若不想看图片中关键点的位置是否准确,请注释掉后面两行
# cv2.imshow("img", img)
# cv2.waitKey()
cv2.imwrite(save_path1+"/"+img_name+".jpg",img)
2、FLIC
FLIC:电影场景,多人情况下label也只有单人,labels有29个节点,大多为nan,选了9个节点
def save_flic(mat_path, image_path, save_path,save_path1):
examples = loadmat(mat_path)
examples = examples["examples"][0]
joint_ids = ['lsho', 'lelb', 'lwri', 'rsho', 'relb', 'rwri', 'lhip',
'lkne', 'lank', 'rhip', 'rkne', 'rank', 'leye', 'reye',
'lear', 'rear', 'nose', 'msho', 'mhip', 'mear', 'mtorso',
'mluarm', 'mruarm', 'mllarm', 'mrlarm', 'mluleg', 'mruleg',
'mllleg', 'mrlleg']
available = ['lsho', 'lelb', 'lwri', 'rsho', 'relb', 'rwri', 'lhip','rhip', 'head']
for i, example in enumerate(examples):
joint = example[2].T
img_name = example[3][0]
joints = dict(zip(joint_ids, joint))
img =cv2.imread(image_path+"/"+img_name)
img_name = img_name.split(".")[0]
imgh, imgw = img.shape[:2]
point_dict = {}
ps = []
head = np.asarray(joints['reye']) + \
np.asarray(joints['leye']) + \
np.asarray(joints['nose'])
head /= 3
joints['head'] = head.tolist()
for name in available:
#joint_pos.append(joints[name])
point = joints[name]
point_dict[name] = [point[0]/imgw, point[1]/imgh,2.0]
ps.append([int(point[0]), int(point[1])])
x, y, w, h = cv2.boundingRect(np.array([ps]))
x = (x+w/2)/imgw
y = (y+h/2)/imgh
w = (w+20)/imgw
h = (h+20)/imgh
with open(os.path.join(save_path, img_name + ".txt"), "w") as f:
f.write(str(0) + " " + str(x) + " " + str(y) + " " + str(w) + " " + str(h))
cv2.rectangle(img, (int(x * imgw - w * imgw / 2), int(y * imgh - h * imgh / 2)),
(int(x * imgw + w * imgw / 2), int(y * imgh + h * imgh / 2)),
(0, 0, 255), 5)
c =0
for i in point_dict:
p = point_dict[i]
f.write(" " + str(p[0]) + " " + str(p[1]) + " " + str(p[2]))
cv2.circle(img, (int(p[0] * imgw), int(p[1] * imgh)), 5, colors[c],
thickness=-1)
f.write("\n")
c = c+1
# img_txt.write(str(point_dict))
f.close()
# num += 1
# 若不想看图片中关键点的位置是否准确,请注释掉后面两行
# cv2.imshow("img", img)
# cv2.waitKey()
cv2.imwrite(save_path1 + "/" + img_name + ".jpg", img)
完整代码:https://github.com/ziyaoma/detect-pose
参考:LSP数据集与MPII数据集标签转txt文件(字典形式储存)_mpii标注文件修改成txt-CSDN博客
https://github.com/Fangyh09/PoseDatasets