- 🍨 本文为🔗365天深度学习训练营 中的学习记录博客
- 🍖 原作者:K同学啊 | 接辅导、项目定制
目录
- 环境
- 代码在之前的章节都有,我后续只贴出模型设计
- 构建过程如下
- 打印模型结构
- 打印参数量
- 训练过程与结果
- 总结
环境
- 系统: Linux
- 语言: Python3.8.10
- 深度学习框架: Pytorch2.0.0+cu118
- 显卡:GTX2080TI
代码在之前的章节都有,我后续只贴出模型设计
对比ResNet的Block和ResNeXt的Block可以发现,最重要的改动就是卷积变成了分组卷积
构建过程如下
- 创建Block
class Block(nn.Module):
def __init__(self, input_size, hidden_size, strides=1, groups=32, conv_shortcut=True):
super().__init__()
if conv_shortcut:
self.start = nn.Sequential(
nn.Conv2d(input_size, hidden_size * 2, 1, stride=strides, bias=False),
nn.BatchNorm2d(hidden_size*2, eps=1.001e-5)
)
else:
self.start = nn.Identity()
self.conv1 = nn.Conv2d(input_size, hidden_size, 1, padding='same', bias=False)
self.bn1 = nn.BatchNorm2d(hidden_size, eps=1.001e-5)
self.relu1 = nn.ReLU()
self.conv2 = nn.Conv2d(hidden_size, hidden_size, 3, padding='same', groups=groups, bias=False)
self.bn2 = nn.BatchNorm2d(hidden_size, eps=1.001e-5)
self.relu2 = nn.ReLU()
self.conv3 = nn.Conv2d(hidden_size, hidden_size * 2, 1, stride=strides, bias=False)
self.bn3 = nn.BatchNorm2d(hidden_size*2, eps=1.001e-5)
self.relu3 = nn.ReLU()
def forward(self, inputs):
short = self.start(inputs)
x = self.conv1(inputs)
x = self.bn1(x)
x = self.relu1(x)
x = self.conv2(x)
x = self.bn2(x)
x = self.relu2(x)
x = self.conv3(x)
x = self.bn3(x)
x = self.relu3(x)
x = x + short
return x
- 创建Stack
class Stack(nn.Module):
def __init__(self, input_size, hidden_size, blocks, strides, groups=32):
super().__init__()
self.layers = nn.Sequential()
self.layers.add_module('first', Block(input_size, hidden_size, strides=strides, groups=groups))
current_size = input_size
for i in range(blocks):
self.layers.add_module('layer%d' % (i+1), Block(hidden_size*2, hidden_size, groups=groups, conv_shortcut=False))
def forward(self, inputs):
x = self.layers(inputs)
return x
- 创建模型
class ResNeXt50(nn.Module):
def __init__(self, num_classes):
super().__init__()
self.pre = nn.Sequential(
nn.ZeroPad2d(3),
nn.Conv2d(3, 64, 7, stride=2),
nn.BatchNorm2d(64, eps=1.001e-5),
nn.ReLU(),
nn.ZeroPad2d(1),
nn.MaxPool2d(3, stride=2),
)
self.stack1 = Stack(64, 128, blocks=2, strides=1)
self.stack2 = Stack(256, 256, blocks=3, strides=2)
self.stack3 = Stack(512, 512, blocks=5, strides=2)
self.stack4 = Stack(1024, 1024, blocks=2, strides=2)
self.avg = nn.AdaptiveAvgPool2d(5)
self.classifier = nn.Linear(5*5*2048, num_classes)
self.softmax = nn.Softmax(dim=1)
def forward(self, inputs):
x = self.pre(inputs)
x = self.stack1(x)
x = self.stack2(x)
x = self.stack3(x)
x = self.stack4(x)
x = self.avg(x)
x = x.view(x.size(0), -1)
x = self.classifier(x)
x = self.softmax(x)
return x
打印模型结构
model = ResNeXt50(2).to(device)
model
ResNeXt50(
(pre): Sequential(
(0): ZeroPad2d((3, 3, 3, 3))
(1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2))
(2): BatchNorm2d(64, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
(3): ReLU()
(4): ZeroPad2d((1, 1, 1, 1))
(5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
)
(stack1): Stack(
(layers): Sequential(
(first): Block(
(start): Sequential(
(0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(256, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(conv1): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)
(bn1): BatchNorm2d(128, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu1): ReLU()
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=same, groups=32, bias=False)
(bn2): BatchNorm2d(128, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu2): ReLU()
(conv3): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(256, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu3): ReLU()
)
(layer1): Block(
(start): Identity()
(conv1): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)
(bn1): BatchNorm2d(128, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu1): ReLU()
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=same, groups=32, bias=False)
(bn2): BatchNorm2d(128, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu2): ReLU()
(conv3): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(256, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu3): ReLU()
)
(layer2): Block(
(start): Identity()
(conv1): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)
(bn1): BatchNorm2d(128, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu1): ReLU()
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=same, groups=32, bias=False)
(bn2): BatchNorm2d(128, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu2): ReLU()
(conv3): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(256, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu3): ReLU()
)
)
)
(stack2): Stack(
(layers): Sequential(
(first): Block(
(start): Sequential(
(0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)
(1): BatchNorm2d(512, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(conv1): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)
(bn1): BatchNorm2d(256, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu1): ReLU()
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=same, groups=32, bias=False)
(bn2): BatchNorm2d(256, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu2): ReLU()
(conv3): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)
(bn3): BatchNorm2d(512, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu3): ReLU()
)
(layer1): Block(
(start): Identity()
(conv1): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)
(bn1): BatchNorm2d(256, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu1): ReLU()
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=same, groups=32, bias=False)
(bn2): BatchNorm2d(256, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu2): ReLU()
(conv3): Conv2d(256, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(512, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu3): ReLU()
)
(layer2): Block(
(start): Identity()
(conv1): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)
(bn1): BatchNorm2d(256, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu1): ReLU()
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=same, groups=32, bias=False)
(bn2): BatchNorm2d(256, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu2): ReLU()
(conv3): Conv2d(256, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(512, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu3): ReLU()
)
(layer3): Block(
(start): Identity()
(conv1): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)
(bn1): BatchNorm2d(256, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu1): ReLU()
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=same, groups=32, bias=False)
(bn2): BatchNorm2d(256, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu2): ReLU()
(conv3): Conv2d(256, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(512, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu3): ReLU()
)
)
)
(stack3): Stack(
(layers): Sequential(
(first): Block(
(start): Sequential(
(0): Conv2d(512, 1024, kernel_size=(1, 1), stride=(2, 2), bias=False)
(1): BatchNorm2d(1024, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(conv1): Conv2d(512, 512, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)
(bn1): BatchNorm2d(512, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu1): ReLU()
(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=same, groups=32, bias=False)
(bn2): BatchNorm2d(512, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu2): ReLU()
(conv3): Conv2d(512, 1024, kernel_size=(1, 1), stride=(2, 2), bias=False)
(bn3): BatchNorm2d(1024, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu3): ReLU()
)
(layer1): Block(
(start): Identity()
(conv1): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)
(bn1): BatchNorm2d(512, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu1): ReLU()
(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=same, groups=32, bias=False)
(bn2): BatchNorm2d(512, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu2): ReLU()
(conv3): Conv2d(512, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(1024, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu3): ReLU()
)
(layer2): Block(
(start): Identity()
(conv1): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)
(bn1): BatchNorm2d(512, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu1): ReLU()
(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=same, groups=32, bias=False)
(bn2): BatchNorm2d(512, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu2): ReLU()
(conv3): Conv2d(512, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(1024, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu3): ReLU()
)
(layer3): Block(
(start): Identity()
(conv1): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)
(bn1): BatchNorm2d(512, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu1): ReLU()
(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=same, groups=32, bias=False)
(bn2): BatchNorm2d(512, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu2): ReLU()
(conv3): Conv2d(512, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(1024, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu3): ReLU()
)
(layer4): Block(
(start): Identity()
(conv1): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)
(bn1): BatchNorm2d(512, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu1): ReLU()
(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=same, groups=32, bias=False)
(bn2): BatchNorm2d(512, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu2): ReLU()
(conv3): Conv2d(512, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(1024, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu3): ReLU()
)
(layer5): Block(
(start): Identity()
(conv1): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)
(bn1): BatchNorm2d(512, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu1): ReLU()
(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=same, groups=32, bias=False)
(bn2): BatchNorm2d(512, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu2): ReLU()
(conv3): Conv2d(512, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(1024, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu3): ReLU()
)
)
)
(stack4): Stack(
(layers): Sequential(
(first): Block(
(start): Sequential(
(0): Conv2d(1024, 2048, kernel_size=(1, 1), stride=(2, 2), bias=False)
(1): BatchNorm2d(2048, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
)
(conv1): Conv2d(1024, 1024, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)
(bn1): BatchNorm2d(1024, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu1): ReLU()
(conv2): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(1, 1), padding=same, groups=32, bias=False)
(bn2): BatchNorm2d(1024, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu2): ReLU()
(conv3): Conv2d(1024, 2048, kernel_size=(1, 1), stride=(2, 2), bias=False)
(bn3): BatchNorm2d(2048, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu3): ReLU()
)
(layer1): Block(
(start): Identity()
(conv1): Conv2d(2048, 1024, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)
(bn1): BatchNorm2d(1024, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu1): ReLU()
(conv2): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(1, 1), padding=same, groups=32, bias=False)
(bn2): BatchNorm2d(1024, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu2): ReLU()
(conv3): Conv2d(1024, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(2048, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu3): ReLU()
)
(layer2): Block(
(start): Identity()
(conv1): Conv2d(2048, 1024, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)
(bn1): BatchNorm2d(1024, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu1): ReLU()
(conv2): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(1, 1), padding=same, groups=32, bias=False)
(bn2): BatchNorm2d(1024, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu2): ReLU()
(conv3): Conv2d(1024, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(2048, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu3): ReLU()
)
)
)
(avg): AdaptiveAvgPool2d(output_size=5)
(classifier): Linear(in_features=51200, out_features=2, bias=True)
(softmax): Softmax(dim=1)
)
打印参数量
summary(model, input_size=(32, 3, 224, 224))
===============================================================================================
Layer (type:depth-idx) Output Shape Param #
===============================================================================================
ResNeXt50 [32, 2] --
├─Sequential: 1-1 [32, 64, 56, 56] --
│ └─ZeroPad2d: 2-1 [32, 3, 230, 230] --
│ └─Conv2d: 2-2 [32, 64, 112, 112] 9,472
│ └─BatchNorm2d: 2-3 [32, 64, 112, 112] 128
│ └─ReLU: 2-4 [32, 64, 112, 112] --
│ └─ZeroPad2d: 2-5 [32, 64, 114, 114] --
│ └─MaxPool2d: 2-6 [32, 64, 56, 56] --
├─Stack: 1-2 [32, 256, 56, 56] --
│ └─Sequential: 2-7 [32, 256, 56, 56] --
│ │ └─Block: 3-1 [32, 256, 56, 56] 63,488
│ │ └─Block: 3-2 [32, 256, 56, 56] 71,168
│ │ └─Block: 3-3 [32, 256, 56, 56] 71,168
├─Stack: 1-3 [32, 512, 28, 28] --
│ └─Sequential: 2-8 [32, 512, 28, 28] --
│ │ └─Block: 3-4 [32, 512, 28, 28] 349,184
│ │ └─Block: 3-5 [32, 512, 28, 28] 282,624
│ │ └─Block: 3-6 [32, 512, 28, 28] 282,624
│ │ └─Block: 3-7 [32, 512, 28, 28] 282,624
├─Stack: 1-4 [32, 1024, 14, 14] --
│ └─Sequential: 2-9 [32, 1024, 14, 14] --
│ │ └─Block: 3-8 [32, 1024, 14, 14] 1,390,592
│ │ └─Block: 3-9 [32, 1024, 14, 14] 1,126,400
│ │ └─Block: 3-10 [32, 1024, 14, 14] 1,126,400
│ │ └─Block: 3-11 [32, 1024, 14, 14] 1,126,400
│ │ └─Block: 3-12 [32, 1024, 14, 14] 1,126,400
│ │ └─Block: 3-13 [32, 1024, 14, 14] 1,126,400
├─Stack: 1-5 [32, 2048, 7, 7] --
│ └─Sequential: 2-10 [32, 2048, 7, 7] --
│ │ └─Block: 3-14 [32, 2048, 7, 7] 5,550,080
│ │ └─Block: 3-15 [32, 2048, 7, 7] 4,497,408
│ │ └─Block: 3-16 [32, 2048, 7, 7] 4,497,408
├─AdaptiveAvgPool2d: 1-6 [32, 2048, 5, 5] --
├─Linear: 1-7 [32, 2] 102,402
├─Softmax: 1-8 [32, 2] --
===============================================================================================
Total params: 23,082,370
Trainable params: 23,082,370
Non-trainable params: 0
Total mult-adds (G): 139.50
===============================================================================================
Input size (MB): 19.27
Forward/backward pass size (MB): 7912.56
Params size (MB): 92.33
Estimated Total Size (MB): 8024.15
===============================================================================================
训练过程与结果
Epoch: 1, TrainLoss: 0.653, TrainAcc: 63.2, TestLoss: 0.611, TestAcc: 66.7, Lr: 1.00e-05
Epoch: 2, TrainLoss: 0.573, TrainAcc: 73.4, TestLoss: 0.578, TestAcc: 74.1, Lr: 1.00e-05
Epoch: 3, TrainLoss: 0.519, TrainAcc: 81.7, TestLoss: 0.557, TestAcc: 75.8, Lr: 1.00e-05
Epoch: 4, TrainLoss: 0.483, TrainAcc: 86.4, TestLoss: 0.553, TestAcc: 78.6, Lr: 1.00e-05
Epoch: 5, TrainLoss: 0.453, TrainAcc: 89.5, TestLoss: 0.536, TestAcc: 78.8, Lr: 1.00e-05
Epoch: 6, TrainLoss: 0.428, TrainAcc: 93.2, TestLoss: 0.538, TestAcc: 79.3, Lr: 1.00e-05
Epoch: 7, TrainLoss: 0.413, TrainAcc: 94.4, TestLoss: 0.516, TestAcc: 78.8, Lr: 1.00e-05
Epoch: 8, TrainLoss: 0.402, TrainAcc: 95.0, TestLoss: 0.506, TestAcc: 81.8, Lr: 1.00e-05
Epoch: 9, TrainLoss: 0.389, TrainAcc: 96.0, TestLoss: 0.506, TestAcc: 81.8, Lr: 1.00e-05
Epoch: 10, TrainLoss: 0.375, TrainAcc: 96.9, TestLoss: 0.493, TestAcc: 82.5, Lr: 1.00e-05
Epoch: 11, TrainLoss: 0.364, TrainAcc: 98.0, TestLoss: 0.488, TestAcc: 83.2, Lr: 1.00e-05
Epoch: 12, TrainLoss: 0.358, TrainAcc: 98.2, TestLoss: 0.485, TestAcc: 84.1, Lr: 1.00e-05
Epoch: 13, TrainLoss: 0.356, TrainAcc: 98.1, TestLoss: 0.477, TestAcc: 83.7, Lr: 1.00e-05
Epoch: 14, TrainLoss: 0.351, TrainAcc: 98.7, TestLoss: 0.473, TestAcc: 85.8, Lr: 1.00e-05
Epoch: 15, TrainLoss: 0.346, TrainAcc: 98.9, TestLoss: 0.471, TestAcc: 84.4, Lr: 1.00e-05
Epoch: 16, TrainLoss: 0.342, TrainAcc: 99.1, TestLoss: 0.470, TestAcc: 86.0, Lr: 1.00e-05
Epoch: 17, TrainLoss: 0.337, TrainAcc: 99.2, TestLoss: 0.465, TestAcc: 86.2, Lr: 1.00e-05
Epoch: 18, TrainLoss: 0.335, TrainAcc: 99.4, TestLoss: 0.459, TestAcc: 86.7, Lr: 1.00e-05
Epoch: 19, TrainLoss: 0.334, TrainAcc: 99.2, TestLoss: 0.465, TestAcc: 84.4, Lr: 1.00e-05
Epoch: 20, TrainLoss: 0.329, TrainAcc: 99.5, TestLoss: 0.470, TestAcc: 84.4, Lr: 1.00e-05
Epoch: 21, TrainLoss: 0.331, TrainAcc: 99.2, TestLoss: 0.458, TestAcc: 86.9, Lr: 1.00e-05
Epoch: 22, TrainLoss: 0.326, TrainAcc: 99.6, TestLoss: 0.458, TestAcc: 86.5, Lr: 1.00e-05
Epoch: 23, TrainLoss: 0.325, TrainAcc: 99.6, TestLoss: 0.462, TestAcc: 84.8, Lr: 1.00e-05
Epoch: 24, TrainLoss: 0.325, TrainAcc: 99.5, TestLoss: 0.461, TestAcc: 85.8, Lr: 1.00e-05
Epoch: 25, TrainLoss: 0.323, TrainAcc: 99.8, TestLoss: 0.465, TestAcc: 84.6, Lr: 1.00e-05
Epoch: 26, TrainLoss: 0.324, TrainAcc: 99.7, TestLoss: 0.458, TestAcc: 86.2, Lr: 1.00e-05
Epoch: 27, TrainLoss: 0.321, TrainAcc: 99.9, TestLoss: 0.468, TestAcc: 83.4, Lr: 1.00e-05
Epoch: 28, TrainLoss: 0.319, TrainAcc: 99.9, TestLoss: 0.453, TestAcc: 86.2, Lr: 1.00e-05
Epoch: 29, TrainLoss: 0.320, TrainAcc: 99.8, TestLoss: 0.459, TestAcc: 85.3, Lr: 1.00e-05
Epoch: 30, TrainLoss: 0.318, TrainAcc: 99.8, TestLoss: 0.459, TestAcc: 85.5, Lr: 1.00e-05
Epoch: 31, TrainLoss: 0.318, TrainAcc: 99.9, TestLoss: 0.460, TestAcc: 85.3, Lr: 1.00e-05
Epoch: 32, TrainLoss: 0.318, TrainAcc: 100.0, TestLoss: 0.459, TestAcc: 83.9, Lr: 1.00e-05
Epoch: 33, TrainLoss: 0.318, TrainAcc: 99.9, TestLoss: 0.448, TestAcc: 88.3, Lr: 1.00e-05
Epoch: 34, TrainLoss: 0.318, TrainAcc: 99.9, TestLoss: 0.454, TestAcc: 85.5, Lr: 1.00e-05
Epoch: 35, TrainLoss: 0.317, TrainAcc: 99.9, TestLoss: 0.451, TestAcc: 86.5, Lr: 1.00e-05
Epoch: 36, TrainLoss: 0.317, TrainAcc: 99.9, TestLoss: 0.448, TestAcc: 86.7, Lr: 1.00e-05
Epoch: 37, TrainLoss: 0.318, TrainAcc: 99.8, TestLoss: 0.449, TestAcc: 86.7, Lr: 1.00e-05
Epoch: 38, TrainLoss: 0.316, TrainAcc: 100.0, TestLoss: 0.441, TestAcc: 87.2, Lr: 1.00e-05
Epoch: 39, TrainLoss: 0.316, TrainAcc: 99.9, TestLoss: 0.452, TestAcc: 86.0, Lr: 1.00e-05
Epoch: 40, TrainLoss: 0.317, TrainAcc: 99.9, TestLoss: 0.454, TestAcc: 85.8, Lr: 1.00e-05
done, best acc: 88.3
总结
从结果上来看,模型的过拟合问题严重,可以发现,参数量大的时候,训练集的正确率会达到很高的高度,但是可能只是因为数据集太小,模型记住了所有的训练集导致的,就像这个模型, 在测试集上的表现并不突出。