J6 - ResNeXt50模型的实现

🍨 本文为🔗365天深度学习训练营中的学习记录博客
🍖 原作者：K同学啊 | 接辅导、项目定制

环境

系统: Linux
语言: Python3.8.10
深度学习框架: Pytorch2.0.0+cu118
显卡：GTX2080TI

代码在之前的章节都有，我后续只贴出模型设计

Block对比
对比ResNet的Block和ResNeXt的Block可以发现，最重要的改动就是卷积变成了分组卷积

构建过程如下

创建Block

class Block(nn.Module):
    def __init__(self, input_size, hidden_size, strides=1, groups=32, conv_shortcut=True):
        super().__init__()
        
        if conv_shortcut:
            self.start = nn.Sequential(
                nn.Conv2d(input_size, hidden_size * 2, 1, stride=strides, bias=False),
                nn.BatchNorm2d(hidden_size*2, eps=1.001e-5)
            )
        else:
            self.start = nn.Identity()
        
        self.conv1 = nn.Conv2d(input_size, hidden_size, 1, padding='same', bias=False)
        self.bn1 = nn.BatchNorm2d(hidden_size, eps=1.001e-5)
        self.relu1 = nn.ReLU()
        
        self.conv2 = nn.Conv2d(hidden_size, hidden_size, 3, padding='same', groups=groups, bias=False)
        self.bn2 = nn.BatchNorm2d(hidden_size, eps=1.001e-5)
        self.relu2 = nn.ReLU()
        
        self.conv3 = nn.Conv2d(hidden_size, hidden_size * 2, 1, stride=strides, bias=False)
        self.bn3 = nn.BatchNorm2d(hidden_size*2, eps=1.001e-5)
        self.relu3 = nn.ReLU()
    def forward(self, inputs):
        short = self.start(inputs)
        
        x = self.conv1(inputs)
        x = self.bn1(x)
        x = self.relu1(x)
        
        x = self.conv2(x)
        x = self.bn2(x)
        x = self.relu2(x)
        
        x = self.conv3(x)
        x = self.bn3(x)
        x = self.relu3(x)
        
        x = x + short
        return x

创建Stack

class Stack(nn.Module):
    def __init__(self, input_size, hidden_size, blocks, strides, groups=32):
        super().__init__()
        
        self.layers = nn.Sequential()
        self.layers.add_module('first', Block(input_size, hidden_size, strides=strides, groups=groups))
        current_size = input_size
        for i in range(blocks):
            self.layers.add_module('layer%d' % (i+1), Block(hidden_size*2, hidden_size, groups=groups, conv_shortcut=False))
        
    def forward(self, inputs):
        x = self.layers(inputs)
        return x

创建模型

class ResNeXt50(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        
        self.pre = nn.Sequential(
            nn.ZeroPad2d(3),
            nn.Conv2d(3, 64, 7, stride=2),
            nn.BatchNorm2d(64, eps=1.001e-5),
            nn.ReLU(),
            nn.ZeroPad2d(1),
            nn.MaxPool2d(3, stride=2),
        )
        
        self.stack1 = Stack(64, 128, blocks=2, strides=1)
        self.stack2 = Stack(256, 256, blocks=3, strides=2)
        self.stack3 = Stack(512, 512, blocks=5, strides=2)
        self.stack4 = Stack(1024, 1024, blocks=2, strides=2)
        
        self.avg = nn.AdaptiveAvgPool2d(5)
        self.classifier = nn.Linear(5*5*2048, num_classes)
        self.softmax = nn.Softmax(dim=1)
        
    def forward(self, inputs):
        x = self.pre(inputs)
        x = self.stack1(x)
        x = self.stack2(x)
        x = self.stack3(x)
        x = self.stack4(x)
        x = self.avg(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        x = self.softmax(x)
        return x

打印模型结构

model = ResNeXt50(2).to(device)
model

ResNeXt50(
  (pre): Sequential(
    (0): ZeroPad2d((3, 3, 3, 3))
    (1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2))
    (2): BatchNorm2d(64, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): ReLU()
    (4): ZeroPad2d((1, 1, 1, 1))
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (stack1): Stack(
    (layers): Sequential(
      (first): Block(
        (start): Sequential(
          (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(256, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
        )
        (conv1): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)
        (bn1): BatchNorm2d(128, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU()
        (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=same, groups=32, bias=False)
        (bn2): BatchNorm2d(128, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU()
        (conv3): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu3): ReLU()
      )
      (layer1): Block(
        (start): Identity()
        (conv1): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)
        (bn1): BatchNorm2d(128, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU()
        (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=same, groups=32, bias=False)
        (bn2): BatchNorm2d(128, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU()
        (conv3): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu3): ReLU()
      )
      (layer2): Block(
        (start): Identity()
        (conv1): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)
        (bn1): BatchNorm2d(128, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU()
        (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=same, groups=32, bias=False)
        (bn2): BatchNorm2d(128, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU()
        (conv3): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu3): ReLU()
      )
    )
  )
  (stack2): Stack(
    (layers): Sequential(
      (first): Block(
        (start): Sequential(
          (0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)
          (1): BatchNorm2d(512, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
        )
        (conv1): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)
        (bn1): BatchNorm2d(256, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU()
        (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=same, groups=32, bias=False)
        (bn2): BatchNorm2d(256, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU()
        (conv3): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)
        (bn3): BatchNorm2d(512, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu3): ReLU()
      )
      (layer1): Block(
        (start): Identity()
        (conv1): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)
        (bn1): BatchNorm2d(256, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU()
        (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=same, groups=32, bias=False)
        (bn2): BatchNorm2d(256, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU()
        (conv3): Conv2d(256, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(512, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu3): ReLU()
      )
      (layer2): Block(
        (start): Identity()
        (conv1): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)
        (bn1): BatchNorm2d(256, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU()
        (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=same, groups=32, bias=False)
        (bn2): BatchNorm2d(256, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU()
        (conv3): Conv2d(256, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(512, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu3): ReLU()
      )
      (layer3): Block(
        (start): Identity()
        (conv1): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)
        (bn1): BatchNorm2d(256, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU()
        (conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=same, groups=32, bias=False)
        (bn2): BatchNorm2d(256, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU()
        (conv3): Conv2d(256, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(512, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu3): ReLU()
      )
    )
  )
  (stack3): Stack(
    (layers): Sequential(
      (first): Block(
        (start): Sequential(
          (0): Conv2d(512, 1024, kernel_size=(1, 1), stride=(2, 2), bias=False)
          (1): BatchNorm2d(1024, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
        )
        (conv1): Conv2d(512, 512, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)
        (bn1): BatchNorm2d(512, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU()
        (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=same, groups=32, bias=False)
        (bn2): BatchNorm2d(512, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU()
        (conv3): Conv2d(512, 1024, kernel_size=(1, 1), stride=(2, 2), bias=False)
        (bn3): BatchNorm2d(1024, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu3): ReLU()
      )
      (layer1): Block(
        (start): Identity()
        (conv1): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)
        (bn1): BatchNorm2d(512, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU()
        (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=same, groups=32, bias=False)
        (bn2): BatchNorm2d(512, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU()
        (conv3): Conv2d(512, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(1024, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu3): ReLU()
      )
      (layer2): Block(
        (start): Identity()
        (conv1): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)
        (bn1): BatchNorm2d(512, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU()
        (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=same, groups=32, bias=False)
        (bn2): BatchNorm2d(512, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU()
        (conv3): Conv2d(512, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(1024, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu3): ReLU()
      )
      (layer3): Block(
        (start): Identity()
        (conv1): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)
        (bn1): BatchNorm2d(512, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU()
        (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=same, groups=32, bias=False)
        (bn2): BatchNorm2d(512, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU()
        (conv3): Conv2d(512, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(1024, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu3): ReLU()
      )
      (layer4): Block(
        (start): Identity()
        (conv1): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)
        (bn1): BatchNorm2d(512, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU()
        (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=same, groups=32, bias=False)
        (bn2): BatchNorm2d(512, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU()
        (conv3): Conv2d(512, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(1024, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu3): ReLU()
      )
      (layer5): Block(
        (start): Identity()
        (conv1): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)
        (bn1): BatchNorm2d(512, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU()
        (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=same, groups=32, bias=False)
        (bn2): BatchNorm2d(512, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU()
        (conv3): Conv2d(512, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(1024, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu3): ReLU()
      )
    )
  )
  (stack4): Stack(
    (layers): Sequential(
      (first): Block(
        (start): Sequential(
          (0): Conv2d(1024, 2048, kernel_size=(1, 1), stride=(2, 2), bias=False)
          (1): BatchNorm2d(2048, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
        )
        (conv1): Conv2d(1024, 1024, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)
        (bn1): BatchNorm2d(1024, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU()
        (conv2): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(1, 1), padding=same, groups=32, bias=False)
        (bn2): BatchNorm2d(1024, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU()
        (conv3): Conv2d(1024, 2048, kernel_size=(1, 1), stride=(2, 2), bias=False)
        (bn3): BatchNorm2d(2048, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu3): ReLU()
      )
      (layer1): Block(
        (start): Identity()
        (conv1): Conv2d(2048, 1024, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)
        (bn1): BatchNorm2d(1024, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU()
        (conv2): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(1, 1), padding=same, groups=32, bias=False)
        (bn2): BatchNorm2d(1024, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU()
        (conv3): Conv2d(1024, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(2048, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu3): ReLU()
      )
      (layer2): Block(
        (start): Identity()
        (conv1): Conv2d(2048, 1024, kernel_size=(1, 1), stride=(1, 1), padding=same, bias=False)
        (bn1): BatchNorm2d(1024, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU()
        (conv2): Conv2d(1024, 1024, kernel_size=(3, 3), stride=(1, 1), padding=same, groups=32, bias=False)
        (bn2): BatchNorm2d(1024, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU()
        (conv3): Conv2d(1024, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(2048, eps=1.001e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu3): ReLU()
      )
    )
  )
  (avg): AdaptiveAvgPool2d(output_size=5)
  (classifier): Linear(in_features=51200, out_features=2, bias=True)
  (softmax): Softmax(dim=1)
)

打印参数量

summary(model, input_size=(32, 3, 224, 224))

===============================================================================================
Layer (type:depth-idx)                        Output Shape              Param #
===============================================================================================
ResNeXt50                                     [32, 2]                   --
├─Sequential: 1-1                             [32, 64, 56, 56]          --
│    └─ZeroPad2d: 2-1                         [32, 3, 230, 230]         --
│    └─Conv2d: 2-2                            [32, 64, 112, 112]        9,472
│    └─BatchNorm2d: 2-3                       [32, 64, 112, 112]        128
│    └─ReLU: 2-4                              [32, 64, 112, 112]        --
│    └─ZeroPad2d: 2-5                         [32, 64, 114, 114]        --
│    └─MaxPool2d: 2-6                         [32, 64, 56, 56]          --
├─Stack: 1-2                                  [32, 256, 56, 56]         --
│    └─Sequential: 2-7                        [32, 256, 56, 56]         --
│    │    └─Block: 3-1                        [32, 256, 56, 56]         63,488
│    │    └─Block: 3-2                        [32, 256, 56, 56]         71,168
│    │    └─Block: 3-3                        [32, 256, 56, 56]         71,168
├─Stack: 1-3                                  [32, 512, 28, 28]         --
│    └─Sequential: 2-8                        [32, 512, 28, 28]         --
│    │    └─Block: 3-4                        [32, 512, 28, 28]         349,184
│    │    └─Block: 3-5                        [32, 512, 28, 28]         282,624
│    │    └─Block: 3-6                        [32, 512, 28, 28]         282,624
│    │    └─Block: 3-7                        [32, 512, 28, 28]         282,624
├─Stack: 1-4                                  [32, 1024, 14, 14]        --
│    └─Sequential: 2-9                        [32, 1024, 14, 14]        --
│    │    └─Block: 3-8                        [32, 1024, 14, 14]        1,390,592
│    │    └─Block: 3-9                        [32, 1024, 14, 14]        1,126,400
│    │    └─Block: 3-10                       [32, 1024, 14, 14]        1,126,400
│    │    └─Block: 3-11                       [32, 1024, 14, 14]        1,126,400
│    │    └─Block: 3-12                       [32, 1024, 14, 14]        1,126,400
│    │    └─Block: 3-13                       [32, 1024, 14, 14]        1,126,400
├─Stack: 1-5                                  [32, 2048, 7, 7]          --
│    └─Sequential: 2-10                       [32, 2048, 7, 7]          --
│    │    └─Block: 3-14                       [32, 2048, 7, 7]          5,550,080
│    │    └─Block: 3-15                       [32, 2048, 7, 7]          4,497,408
│    │    └─Block: 3-16                       [32, 2048, 7, 7]          4,497,408
├─AdaptiveAvgPool2d: 1-6                      [32, 2048, 5, 5]          --
├─Linear: 1-7                                 [32, 2]                   102,402
├─Softmax: 1-8                                [32, 2]                   --
===============================================================================================
Total params: 23,082,370
Trainable params: 23,082,370
Non-trainable params: 0
Total mult-adds (G): 139.50
===============================================================================================
Input size (MB): 19.27
Forward/backward pass size (MB): 7912.56
Params size (MB): 92.33
Estimated Total Size (MB): 8024.15
===============================================================================================

训练过程与结果

Epoch: 1, TrainLoss: 0.653, TrainAcc: 63.2, TestLoss: 0.611, TestAcc: 66.7, Lr: 1.00e-05
Epoch: 2, TrainLoss: 0.573, TrainAcc: 73.4, TestLoss: 0.578, TestAcc: 74.1, Lr: 1.00e-05
Epoch: 3, TrainLoss: 0.519, TrainAcc: 81.7, TestLoss: 0.557, TestAcc: 75.8, Lr: 1.00e-05
Epoch: 4, TrainLoss: 0.483, TrainAcc: 86.4, TestLoss: 0.553, TestAcc: 78.6, Lr: 1.00e-05
Epoch: 5, TrainLoss: 0.453, TrainAcc: 89.5, TestLoss: 0.536, TestAcc: 78.8, Lr: 1.00e-05
Epoch: 6, TrainLoss: 0.428, TrainAcc: 93.2, TestLoss: 0.538, TestAcc: 79.3, Lr: 1.00e-05
Epoch: 7, TrainLoss: 0.413, TrainAcc: 94.4, TestLoss: 0.516, TestAcc: 78.8, Lr: 1.00e-05
Epoch: 8, TrainLoss: 0.402, TrainAcc: 95.0, TestLoss: 0.506, TestAcc: 81.8, Lr: 1.00e-05
Epoch: 9, TrainLoss: 0.389, TrainAcc: 96.0, TestLoss: 0.506, TestAcc: 81.8, Lr: 1.00e-05
Epoch: 10, TrainLoss: 0.375, TrainAcc: 96.9, TestLoss: 0.493, TestAcc: 82.5, Lr: 1.00e-05
Epoch: 11, TrainLoss: 0.364, TrainAcc: 98.0, TestLoss: 0.488, TestAcc: 83.2, Lr: 1.00e-05
Epoch: 12, TrainLoss: 0.358, TrainAcc: 98.2, TestLoss: 0.485, TestAcc: 84.1, Lr: 1.00e-05
Epoch: 13, TrainLoss: 0.356, TrainAcc: 98.1, TestLoss: 0.477, TestAcc: 83.7, Lr: 1.00e-05
Epoch: 14, TrainLoss: 0.351, TrainAcc: 98.7, TestLoss: 0.473, TestAcc: 85.8, Lr: 1.00e-05
Epoch: 15, TrainLoss: 0.346, TrainAcc: 98.9, TestLoss: 0.471, TestAcc: 84.4, Lr: 1.00e-05
Epoch: 16, TrainLoss: 0.342, TrainAcc: 99.1, TestLoss: 0.470, TestAcc: 86.0, Lr: 1.00e-05
Epoch: 17, TrainLoss: 0.337, TrainAcc: 99.2, TestLoss: 0.465, TestAcc: 86.2, Lr: 1.00e-05
Epoch: 18, TrainLoss: 0.335, TrainAcc: 99.4, TestLoss: 0.459, TestAcc: 86.7, Lr: 1.00e-05
Epoch: 19, TrainLoss: 0.334, TrainAcc: 99.2, TestLoss: 0.465, TestAcc: 84.4, Lr: 1.00e-05
Epoch: 20, TrainLoss: 0.329, TrainAcc: 99.5, TestLoss: 0.470, TestAcc: 84.4, Lr: 1.00e-05
Epoch: 21, TrainLoss: 0.331, TrainAcc: 99.2, TestLoss: 0.458, TestAcc: 86.9, Lr: 1.00e-05
Epoch: 22, TrainLoss: 0.326, TrainAcc: 99.6, TestLoss: 0.458, TestAcc: 86.5, Lr: 1.00e-05
Epoch: 23, TrainLoss: 0.325, TrainAcc: 99.6, TestLoss: 0.462, TestAcc: 84.8, Lr: 1.00e-05
Epoch: 24, TrainLoss: 0.325, TrainAcc: 99.5, TestLoss: 0.461, TestAcc: 85.8, Lr: 1.00e-05
Epoch: 25, TrainLoss: 0.323, TrainAcc: 99.8, TestLoss: 0.465, TestAcc: 84.6, Lr: 1.00e-05
Epoch: 26, TrainLoss: 0.324, TrainAcc: 99.7, TestLoss: 0.458, TestAcc: 86.2, Lr: 1.00e-05
Epoch: 27, TrainLoss: 0.321, TrainAcc: 99.9, TestLoss: 0.468, TestAcc: 83.4, Lr: 1.00e-05
Epoch: 28, TrainLoss: 0.319, TrainAcc: 99.9, TestLoss: 0.453, TestAcc: 86.2, Lr: 1.00e-05
Epoch: 29, TrainLoss: 0.320, TrainAcc: 99.8, TestLoss: 0.459, TestAcc: 85.3, Lr: 1.00e-05
Epoch: 30, TrainLoss: 0.318, TrainAcc: 99.8, TestLoss: 0.459, TestAcc: 85.5, Lr: 1.00e-05
Epoch: 31, TrainLoss: 0.318, TrainAcc: 99.9, TestLoss: 0.460, TestAcc: 85.3, Lr: 1.00e-05
Epoch: 32, TrainLoss: 0.318, TrainAcc: 100.0, TestLoss: 0.459, TestAcc: 83.9, Lr: 1.00e-05
Epoch: 33, TrainLoss: 0.318, TrainAcc: 99.9, TestLoss: 0.448, TestAcc: 88.3, Lr: 1.00e-05
Epoch: 34, TrainLoss: 0.318, TrainAcc: 99.9, TestLoss: 0.454, TestAcc: 85.5, Lr: 1.00e-05
Epoch: 35, TrainLoss: 0.317, TrainAcc: 99.9, TestLoss: 0.451, TestAcc: 86.5, Lr: 1.00e-05
Epoch: 36, TrainLoss: 0.317, TrainAcc: 99.9, TestLoss: 0.448, TestAcc: 86.7, Lr: 1.00e-05
Epoch: 37, TrainLoss: 0.318, TrainAcc: 99.8, TestLoss: 0.449, TestAcc: 86.7, Lr: 1.00e-05
Epoch: 38, TrainLoss: 0.316, TrainAcc: 100.0, TestLoss: 0.441, TestAcc: 87.2, Lr: 1.00e-05
Epoch: 39, TrainLoss: 0.316, TrainAcc: 99.9, TestLoss: 0.452, TestAcc: 86.0, Lr: 1.00e-05
Epoch: 40, TrainLoss: 0.317, TrainAcc: 99.9, TestLoss: 0.454, TestAcc: 85.8, Lr: 1.00e-05
done, best acc: 88.3