发现 cfg/default.yaml 参数 mask_ratio 等于4 直接训练如下边缘分割标签,推理时mask 稀疏,训练时分数偏低,mask_ratio 改为1训练时打印的mask 的 P指标一直为0,将imgsz=原图size 训练分数也不高
标注用的是labelme多边形
阅读源码发现可能是因为mask缩放导致
且出现上边缘mask被box过度剪裁的情况
修改了源码中的两处,还是保持mask_ratio等于4,重新训练,推理如下,虽然mask粗糙但几乎不产生断裂
修改如下:
1.ultralytics/data/utils.py
从
def polygon2mask(imgsz, polygons, color=1, downsample_ratio=1): mask = np.zeros(imgsz, dtype=np.uint8) polygons = np.asarray(polygons, dtype=np.int32) polygons = polygons.reshape((polygons.shape[0], -1, 2)) cv2.fillPoly(mask, polygons, color=color) nh, nw = (imgsz[0] // downsample_ratio, imgsz[1] // downsample_ratio) # Note: fillPoly first then resize is trying to keep the same loss calculation method when mask-ratio=1 return cv2.resize(mask, (nw, nh))
到
def polygon2mask(imgsz, polygons, color=1, downsample_ratio=1): mask = np.zeros((imgsz[0]// downsample_ratio,imgsz[1]// downsample_ratio), dtype=np.uint8) polygons=[[j*0.25 for j in i] for i in polygons] polygons = np.asarray(polygons, dtype=np.int32) polygons = polygons.reshape((polygons.shape[0], -1, 2)) cv2.fillPoly(mask, polygons, color=color) return mask
2.ultralytics/models/yolo/segment/predict.py
类SegmentationPredictor postprocess方法 外扩mask 1个像素
def postprocess(self, preds, img, orig_imgs): """Applies non-max suppression and processes detections for each image in an input batch.""" p = ops.non_max_suppression( preds[0], self.args.conf, self.args.iou, agnostic=self.args.agnostic_nms, max_det=self.args.max_det, nc=len(self.model.names), classes=self.args.classes, ) if not isinstance(orig_imgs, list): # input images are a torch.Tensor, not a list orig_imgs = ops.convert_torch2numpy_batch(orig_imgs) results = [] proto = preds[1][-1] if len(preds[1]) == 3 else preds[1] # second output is len 3 if pt, but only 1 if exported for i, pred in enumerate(p): orig_img = orig_imgs[i] img_path = self.batch[0][i] if not len(pred): # save empty boxes masks = None elif self.args.retina_masks: pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape) masks = ops.process_mask_native(proto[i], pred[:, 6:], pred[:, :4], orig_img.shape[:2]) # HWC else: #外扩盒子 c, mh, mw = proto[i].shape # CHW ih, iw = img.shape[2:] pred[:, :4][:, 0] -= iw / mw*1 pred[:, :4][:, 1] -= ih / mh*1 pred[:, :4][:, 2] += iw / mw*1 pred[:, :4][:, 3] += ih / mh*1 masks = ops.process_mask(proto[i], pred[:, 6:], pred[:, :4], img.shape[2:], upsample=True) # HWC pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape) results.append(Results(orig_img, path=img_path, names=self.model.names, boxes=pred[:, :6], masks=masks)) return results