论文学习:AFPN: Asymptotic Feature Pyramid Network for Object Detection-全新特征融合模块AFPN,完胜PAFPN_athrunsunny的博客-CSDN博客
先上配置文件yolov5s-AFPN.yaml
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
# Parameters
nc: 80 # number of classes
depth_multiple: 0.33 # model depth multiple
width_multiple: 0.50 # layer channel multiple
anchors:
- [10,13, 16,30, 33,23] # P3/8
- [30,61, 62,45, 59,119] # P4/16
- [116,90, 156,198, 373,326] # P5/32
# YOLOv5 v6.0 backbone
backbone:
# [from, number, module, args]
[[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 6, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 3, C3, [1024]],
[-1, 1, SPPF, [1024, 5]], # 9
]
# YOLOv5 v6.0 head
head:
[[4, 1, Conv, [64, 1, 1]],
[6, 1, Conv, [128, 1, 1]],
[[10, 11], 1, ASFF_2, [64, 0]],
[[10, 11], 1, ASFF_2, [128, 1]],
[-2, 1, C3, [64, False]],
[-2, 1, C3, [128, False]],
[9, 1, Conv, [256, 1, 1]],
[[14, 15, 16], 1, ASFF_3, [64, 0]],
[[14, 15, 16], 1, ASFF_3, [128, 1]],
[[14, 15, 16], 1, ASFF_3, [256, 2]],
[17, 1, C3, [64, False]],
[18, 1, C3, [128, False]],
[19, 1, C3, [256, False]],
[[20, 21, 22], 1, Detect, [nc, anchors]]
]
在models/common.py增加
class Upsample(nn.Module):
def __init__(self, in_channels, out_channels, scale_factor=2):
super(Upsample, self).__init__()
self.upsample = nn.Sequential(
Conv(in_channels, out_channels, 1),
nn.Upsample(scale_factor=scale_factor, mode='bilinear')
)
# carafe
# from mmcv.ops import CARAFEPack
# self.upsample = nn.Sequential(
# BasicConv(in_channels, out_channels, 1),
# CARAFEPack(out_channels, scale_factor=scale_factor)
# )
def forward(self, x):
x = self.upsample(x)
return x
class Downsample(nn.Module):
def __init__(self, in_channels, out_channels,scale_factor=2):
super(Downsample, self).__init__()
self.downsample = nn.Sequential(
Conv(in_channels, out_channels, scale_factor, scale_factor, 0)
)
def forward(self, x):
x = self.downsample(x)
return x
class ASFF_2(nn.Module):
def __init__(self, inter_dim=512,level=0,channel=[64,128]):
super(ASFF_2, self).__init__()
self.inter_dim = inter_dim
compress_c = 8
self.weight_level_1 = Conv(self.inter_dim, compress_c, 1, 1)
self.weight_level_2 = Conv(self.inter_dim, compress_c, 1, 1)
self.weight_levels = nn.Conv2d(compress_c * 2, 2, kernel_size=1, stride=1, padding=0)
self.conv = Conv(self.inter_dim, self.inter_dim, 3, 1)
self.upsample = Upsample(channel[1],channel[0])
self.downsample = Downsample(channel[0],channel[1])
self.level = level
def forward(self, x):
input1, input2 = x
if self.level == 0:
input2 = self.upsample(input2)
elif self.level == 1:
input1 = self.downsample(input1)
level_1_weight_v = self.weight_level_1(input1)
level_2_weight_v = self.weight_level_2(input2)
levels_weight_v = torch.cat((level_1_weight_v, level_2_weight_v), 1)
levels_weight = self.weight_levels(levels_weight_v)
levels_weight = F.softmax(levels_weight, dim=1)
fused_out_reduced = input1 * levels_weight[:, 0:1, :, :] + \
input2 * levels_weight[:, 1:2, :, :]
out = self.conv(fused_out_reduced)
return out
class ASFF_3(nn.Module):
def __init__(self, inter_dim=512,level=0,channel=[64,128,256]):
super(ASFF_3, self).__init__()
self.inter_dim = inter_dim
compress_c = 8
self.weight_level_1 = Conv(self.inter_dim, compress_c, 1, 1)
self.weight_level_2 = Conv(self.inter_dim, compress_c, 1, 1)
self.weight_level_3 = Conv(self.inter_dim, compress_c, 1, 1)
self.weight_levels = nn.Conv2d(compress_c * 3, 3, kernel_size=1, stride=1, padding=0)
self.conv = Conv(self.inter_dim, self.inter_dim, 3, 1)
self.level = level
if self.level == 0:
self.upsample4x = Upsample(channel[2],channel[0], scale_factor=4)
self.upsample2x = Upsample(channel[1], channel[0], scale_factor=2)
elif self.level == 1:
self.upsample2x1 = Upsample(channel[2], channel[1], scale_factor=2)
self.downsample2x1 = Downsample(channel[0],channel[1], scale_factor=2)
elif self.level == 2:
self.downsample2x = Downsample(channel[1], channel[2], scale_factor=2)
self.downsample4x = Downsample(channel[0], channel[2], scale_factor=4)
def forward(self, x):
input1, input2, input3 = x
if self.level == 0:
input2 = self.upsample2x(input2)
input3= self.upsample4x(input3)
elif self.level == 1:
input3 = self.upsample2x1(input3)
input1 = self.downsample2x1(input1)
elif self.level == 2:
input1 = self.downsample4x(input1)
input2 = self.downsample2x(input2)
level_1_weight_v = self.weight_level_1(input1)
level_2_weight_v = self.weight_level_2(input2)
level_3_weight_v = self.weight_level_3(input3)
levels_weight_v = torch.cat((level_1_weight_v, level_2_weight_v, level_3_weight_v), 1)
levels_weight = self.weight_levels(levels_weight_v)
levels_weight = F.softmax(levels_weight, dim=1)
fused_out_reduced = input1 * levels_weight[:, 0:1, :, :] + \
input2 * levels_weight[:, 1:2, :, :] + \
input3 * levels_weight[:, 2:, :, :]
out = self.conv(fused_out_reduced)
return out
在models/yolo.py中修改:
def parse_model(d, ch): # model_dict, input_channels(3)
# Parse a YOLOv5 model.yaml dictionary
LOGGER.info(f"\n{'':>3}{'from':>18}{'n':>3}{'params':>10} {'module':<40}{'arguments':<30}")
anchors, nc, gd, gw, act = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple'], d.get('activation')
if act:
Conv.default_act = eval(act) # redefine default activation, i.e. Conv.default_act = nn.SiLU()
LOGGER.info(f"{colorstr('activation:')} {act}") # print
na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors
no = na * (nc + 5) # number of outputs = anchors * (classes + 5)
layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out
for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']): # from, number, module, args
m = eval(m) if isinstance(m, str) else m # eval strings
for j, a in enumerate(args):
with contextlib.suppress(NameError):
args[j] = eval(a) if isinstance(a, str) else a # eval strings
n = n_ = max(round(n * gd), 1) if n > 1 else n # depth gain
if m in {
Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, SPPF, DWConv, MixConv2d, Focus, CrossConv,
BottleneckCSP, C3, C3TR, C3SPP, C3Ghost, nn.ConvTranspose2d, DWConvTranspose2d, C3x}:
c1, c2 = ch[f], args[0]
if c2 != no: # if not output
c2 = make_divisible(c2 * gw, 8)
args = [c1, c2, *args[1:]]
if m in {BottleneckCSP, C3, C3TR, C3Ghost, C3x}:
args.insert(2, n) # number of repeats
n = 1
elif m is nn.BatchNorm2d:
args = [ch[f]]
elif m is Concat:
c2 = sum(ch[x] for x in f)
# TODO: channel, gw, gd
elif m in {Detect, Segment}:
args.append([ch[x] for x in f])
if isinstance(args[1], int): # number of anchors
args[1] = [list(range(args[1] * 2))] * len(f)
if m is Segment:
args[3] = make_divisible(args[3] * gw, 8)
elif m is Contract:
c2 = ch[f] * args[0] ** 2
elif m is Expand:
c2 = ch[f] // args[0] ** 2
elif m in {ASFF_2, ASFF_3}:
c2 = args[0]
if c2 != no: # if not output
c2 = make_divisible(c2 * gw, 8)
args[0] = c2
args.append([ch[x] for x in f])
else:
c2 = ch[f]
a = [*args]
m_ = nn.Sequential(*(m(*args) for _ in range(n))) if n > 1 else m(*args) # module
t = str(m)[8:-2].replace('__main__.', '') # module type
np = sum(x.numel() for x in m_.parameters()) # number params
m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params
LOGGER.info(f'{i:>3}{str(f):>18}{n_:>3}{np:10.0f} {t:<40}{str(args):<30}') # print
save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist
layers.append(m_)
if i == 0:
ch = []
ch.append(c2)
return nn.Sequential(*layers), sorted(save)
在yolo.py中配置--cfg为yolov5s-AFPN.yaml,点击运行,可见下图:
论文中提到使用AFPN的效果要比PAN的好,暂时还没有验证,先肝代码,这是最初版,后续会优化。可以看最上面的图,参数确实是少了。