已完成的yolov8-seg分割模型TensorRt部署
- 准备
- 下载yolov8-seg模型
- 转化为onnx和trt
- 推理
- 写好的推理接口
准备
https://github.com/songjiahao-wq/yolov8_seg_trtinference.git下载代码
安装TensorRt=8.6版本,以及pip install -r requirements.txt
下载yolov8-seg模型
转化为onnx和trt
转化方法如下:
# tensorRT==8.6
## yolov8-seg CLI指令
### 转化ONNX模型
`python export-seg.py --weights yolov8m-seg.pt --opset 14 --sim --input-shape 1 3 640 640 --device cuda:0`
`python export-seg.py --weights yolov8m-seg.pt --opset 14 --sim --input-shape 1 3 448 512 --device cuda:0`
### 导出trt模型
`python build.py --weights yolov8m-seg.onnx --fp16 --device cuda:0 --seg`
### 采用trtexec导出trt模型
`E:\Download\TensorRT-10.0.1.6\bin/trtexec --onnx=yolov8m-seg.onnx --saveEngine=yolov8s-seg.engine --fp16`
### 不需要torch环境推理
`python infer-seg-without-torch.py --engine yolov8m-seg.engine --imgs data --show --out-dir outputs --method cudart`
### 需要torch环境推理
`python infer-seg.py`
- [x] infer-seg-without-torch-port.py 调用接口,每次只保存mask.txt
- [x] infer-seg-without-torch.py 不需要torch调用,有cuda和pycuda
- 首先转化为onnx模型
python export-seg.py --weights yolov8m-seg.pt --opset 14 --sim --input-shape 1 3 640 640 --device cuda:0
- 然后转化为trt模型
有两种转化方式:
代码转化:python build.py --weights yolov8m-seg.onnx --fp16 --device cuda:0 --seg
trtexec转化:trtexec --onnx=yolov8m-seg.onnx --saveEngine=yolov8s-seg.engine --fp16
推理
推理方法有两种:
cudart推理,不包含torch
python infer-seg-without-torch.py --engine yolov8m-seg.engine --imgs data --show --out-dir outputs --method cudart
pycuda推理,不包含torch- `python infer-seg-without-torch.py --engine yolov8m-seg.engine --imgs data --show --out-dir outputs --method pycuda
带torch的推理 python infer-seg.py
写好的推理接口
import argparse
import time
from pathlib import Path
import cv2
import numpy as np
from config import ALPHA, CLASSES, COLORS, MASK_COLORS
from models.utils import blob, letterbox, path_to_list, seg_postprocess
import torch
def clip_segments(segments, shape):
"""Clips segment coordinates (xy1, xy2, ...) to an image's boundaries given its shape (height, width)."""
if isinstance(segments, torch.Tensor): # faster individually
segments[:, 0].clamp_(0, shape[1]) # x
segments[:, 1].clamp_(0, shape[0]) # y
else: # np.array (faster grouped)
segments[:, 0] = segments[:, 0].clip(0, shape[1]) # x
segments[:, 1] = segments[:, 1].clip(0, shape[0]) # y
def scale_segments(img1_shape, segments, img0_shape, ratio_pad=None, normalize=False):
"""Rescales segment coordinates from img1_shape to img0_shape, optionally normalizing them with custom padding."""
if ratio_pad is None: # calculate from img0_shape
gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new
pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding
else:
gain = ratio_pad[0][0]
pad = ratio_pad[1]
segments[:, 0] -= pad[0] # x padding
segments[:, 1] -= pad[1] # y padding
segments /= gain
clip_segments(segments, img0_shape)
if normalize:
segments[:, 0] /= img0_shape[1] # width
segments[:, 1] /= img0_shape[0] # height
return segments
def masks2segments(masks, strategy="largest"):
"""Converts binary (n,160,160) masks to polygon segments with options for concatenation or selecting the largest
segment.
"""
segments = []
for x in masks.int().cpu().numpy().astype("uint8"):
c = cv2.findContours(x, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[0]
if c:
if strategy == "concat": # concatenate all segments
c = np.concatenate([x.reshape(-1, 2) for x in c])
elif strategy == "largest": # select largest segment
c = np.array(c[np.array([len(x) for x in c]).argmax()]).reshape(-1, 2)
else:
c = np.zeros((0, 2)) # no segments found
segments.append(c.astype("float32"))
return segments
def keep_highest_conf_per_class(bboxes, scores, labels, segments, classes=0):
# 组合成新的检测结果数组
det = np.hstack((bboxes, scores[:, np.newaxis], labels[:, np.newaxis], np.array(segments)[:, np.newaxis]))
if det.shape[0] == 0:
return det # 如果没有检测到任何对象,直接返回
unique_classes = np.unique(det[:, 5]) # 获取所有独特的类标签
max_conf_indices = []
# 对每一个类别找到最高置信度的检测框
cls_mask = det[:, 5] == classes # 找到所有该类别的检测框
cls_detections = det[cls_mask] # 提取该类别的所有检测框
# 计算每个检测框的面积
areas = (cls_detections[:, 2] - cls_detections[:, 0]) * (
cls_detections[:, 3] - cls_detections[:, 1])
# 合并置信度和面积为一个复合评分,这里用置信度 + 面积的小部分作为评分
scores_combined = cls_detections[:, 4] * 0.1 + 1.0 * areas
# 找到评分最高的检测框
max_score_index = np.argmax(scores_combined)
# 找到原始的索引
original_max_conf_index = np.where(cls_mask)[0][max_score_index]
max_conf_indices.append(original_max_conf_index)
# 选取评分最高的检测框
return det[max_conf_indices][:, :4], det[max_conf_indices][:, 4], det[max_conf_indices][:, 5], det[
max_conf_indices][
:,
6], max_conf_indices
class YOLOv8_seg_main:
def __init__(self, args: argparse.Namespace):
if args.method == 'cudart':
from models.cudart_api import TRTEngine
elif args.method == 'pycuda':
from models.pycuda_api import TRTEngine
else:
raise NotImplementedError
self.Engine = TRTEngine(args.engine)
self.H, self.W = self.Engine.inp_info[0].shape[-2:]
self.args = args
def main(self, bgr, imagename, outtxtdir) -> None:
outtxtdir = Path(outtxtdir)
save_path = Path(args.out_dir)
if not self.args.show and not save_path.exists():
save_path.mkdir(parents=True, exist_ok=True)
draw = bgr.copy()
bgr, ratio, dwdh = letterbox(bgr, (self.W, self.H))
dw, dh = int(dwdh[0]), int(dwdh[1])
rgb = cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB)
tensor, seg_img = blob(rgb, return_seg=True)
dwdh = np.array(dwdh * 2, dtype=np.float32)
tensor = np.ascontiguousarray(tensor)
# inference
data = self.Engine(tensor)
seg_img = seg_img[dh:self.H - dh, dw:self.W - dw, [2, 1, 0]]
bboxes, scores, labels, masks = seg_postprocess(
data, bgr.shape[:2], self.args.conf_thres, self.args.iou_thres)
if bboxes.size == 0:
# if no bounding box
assert print(f'image: no object!')
masks = masks[:, dh:self.H - dh, dw:self.W - dw, :]
segments = [
scale_segments(tensor.shape[2:], x, rgb.shape, normalize=True)
for x in reversed(masks2segments(torch.from_numpy(masks)))
]
bboxes -= dwdh
bboxes /= ratio
# 应用 keep_highest_conf_per_class 函数
bboxes, scores, labels, segments, max_conf_indices = keep_highest_conf_per_class(bboxes, scores, labels, segments, classes=0)
if args.show:
masks = masks[max_conf_indices]
mask_colors = MASK_COLORS[0]
mask_colors = mask_colors.reshape(-1, 1, 1, 3) * ALPHA
mask_colors = masks @ mask_colors
inv_alph_masks = (1 - masks * 0.5).cumprod(0)
mcs = (mask_colors * inv_alph_masks).sum(0) * 2
seg_img = (seg_img * inv_alph_masks[-1] + mcs) * 255
draw = cv2.resize(seg_img.astype(np.uint8), draw.shape[:2][::-1])
if args.save_txt:
seg = segments[0].reshape(-1) # (n,2) to (n*2)
line = (int(labels[0]), *seg) # label format
with open(outtxtdir / f"{Path(imagename).stem}.txt", "w") as f:
f.write(("%g " * len(line)).rstrip() % line + "\n")
if args.show:
save_image = save_path / Path(imagename).name
cv2.imwrite(str(save_image), draw)
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument('--engine', type=str, default="../yolov8l-seg.engine", help='Engine file')
parser.add_argument('--imgs', type=str, default="data", help='Images file')
parser.add_argument('--show',
action='store_true',
default=False,
help='Show the detection results')
parser.add_argument('--save_txt',
action='store_true',
default=True,
help='save_txt the detection results')
parser.add_argument('--out-dir',
type=str,
default='./output',
help='Path to output file')
parser.add_argument('--conf-thres',
type=float,
default=0.25,
help='Confidence threshold')
parser.add_argument('--iou-thres',
type=float,
default=0.25,
help='Confidence threshold')
parser.add_argument('--method',
type=str,
default='cudart',
help='CUDART pipeline')
args = parser.parse_args()
return args
if __name__ == '__main__':
args = parse_args()
YOLOv8_seg_main = YOLOv8_seg_main(args)
imgpath = './data/1.jpg'
outtxtdir = './output'
bgr_img = cv2.imread(imgpath)
t1 = time.time()
for i in range(100):
YOLOv8_seg_main.main(bgr_img, imgpath, outtxtdir)
print(time.time() - t1)
输入为brg图像,图像的路径和输出路径,最后会保存masktxt