PaddleVideo:Squeeze Time算法移植

 参考
PaddleVideo/docs/zh-CN/contribute/add_new_algorithm.md at develop · PaddlePaddle/PaddleVideo · GitHubAwesome video understanding toolkits based on PaddlePaddle. It supports video data annotation tools, lightweight RGB and skeleton based action recognition model, practical applications for video tagging and sport action detection. - PaddleVideo/docs/zh-CN/contribute/add_new_algorithm.md at develop · PaddlePaddle/PaddleVideoicon-default.png?t=N7T8https://github.com/PaddlePaddle/PaddleVideo/blob/develop/docs/zh-CN/contribute/add_new_algorithm.md

1:添加backbone:(网络我自己砍了几刀,目的是想和ppTSM-v2做对比)

paddlevideo/modeling/backbones/squeezetime.py

from __future__ import absolute_import, division, print_function

import paddle
import paddle.nn as nn
from paddle import ParamAttr
from paddle.nn import AdaptiveAvgPool2D, BatchNorm, Conv2D, Dropout, Linear, BatchNorm2D
from paddle.regularizer import L2Decay
from paddle.nn.initializer import KaimingNormal,Constant
import paddle.nn.functional as F

from ..registry import BACKBONES


def get_inplanes():
    return [64, 128, 256, 512]

class SpatialConv(nn.Layer):
    """
    Inter-temporal Object Interaction Module (IOI)
    """
    def __init__(self, dim_in, dim_out, pos_dim=7):
        super(SpatialConv, self).__init__()

        self.short_conv = nn.Conv2D(dim_in, dim_out, kernel_size=3, stride=1, padding=1, groups=1)

        self.glo_conv = nn.Sequential(
            nn.Conv2D(dim_in, 16, kernel_size=3, stride=1, padding=1, groups=1),
            nn.BatchNorm2D(16), nn.ReLU(),
            nn.Conv2D(16, 16, kernel_size=7, stride=1, padding=3),
            nn.BatchNorm2D(16), nn.ReLU(),
            nn.Conv2D(16, dim_out, kernel_size=3, stride=1, padding=1, groups=1), nn.Sigmoid()
        )

        self.pos_embed = self.create_parameter(shape=[1, 16, pos_dim, pos_dim], default_initializer=nn.initializer.KaimingNormal())

    def forward(self, x, param):
        x_short = self.short_conv(x)
        x = x * param

        for i in range(len(self.glo_conv)):
            if i == 3:
                _, _, H, W = x.shape
                if self.pos_embed.shape[2] != H or self.pos_embed.shape[3] != W:
                    pos_embed = F.interpolate(self.pos_embed, size=(H, W), mode='bilinear', align_corners=True)
                else:
                    pos_embed = self.pos_embed
                x = x + pos_embed

            x = self.glo_conv[i](x)

        return x_short * x

class Conv2d(nn.Layer):
    """
    Channel-Time Learning Module (CTL)
    """
    def __init__(
        self,
        in_channels: int,
        out_channels: int,
        kernel_size: int,
        stride: int = 1,
        padding: int = 0,
        dilation: int = 1,
        groups: int = 1,
        bias: bool = True,
        padding_mode: str = 'zeros', 
        pos_dim = 7):
        super(Conv2d, self).__init__()

        self.stride = stride

        self.param_conv = nn.Sequential(
            nn.AdaptiveAvgPool2D((1, 1)),
            nn.Conv2D(in_channels, in_channels, 1, stride=1, padding=1 // 2, bias_attr=False),
            nn.BatchNorm2D(in_channels),
            nn.ReLU(),
            nn.Conv2D(in_channels, in_channels, 1, bias_attr=False),
            nn.Sigmoid()
        )

        self.temporal_conv = nn.Conv2D(
            in_channels=in_channels, 
            out_channels=out_channels, 
            kernel_size=kernel_size, 
            stride=1, 
            padding=padding, 
            dilation=dilation, 
            groups=groups, 
            bias_attr=bias, 
            padding_mode=padding_mode
        )

        self.spatial_conv = SpatialConv(dim_in=in_channels, dim_out=out_channels, pos_dim=pos_dim)

    def forward(self, x):
        param = self.param_conv(x)
        x = self.temporal_conv(param * x) + self.spatial_conv(x, param)
        return x

def conv3x3x3(in_planes, out_planes, stride=1, pos_dim=7):
    return Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, padding=0, bias=False, pos_dim=pos_dim)

def conv1x1x1(in_planes, out_planes, stride=1):
    return nn.Conv2D(in_planes, out_planes, kernel_size=1, stride=stride, bias_attr=False)

class BasicBlock(nn.Layer):
    """
    Channel-Time Learning (CTL) Block
    """
    expansion = 1

    def __init__(self, in_planes, planes, stride=1, shortcut_conv=None, pos_dim=7):
        super().__init__()

        self.conv1 = conv3x3x3(in_planes, planes, stride)
        self.bn1 = nn.BatchNorm2D(planes)
        self.relu = nn.ReLU()

        self.conv2 = conv3x3x3(planes, planes, pos_dim=pos_dim)
        self.bn2 = nn.BatchNorm2D(planes)

        self.shortcut_conv = shortcut_conv

        self.stride = stride
        if stride != 1:
            self.downsample = nn.Sequential(
                nn.Conv2D(in_planes, in_planes, kernel_size=2, stride=2, groups=in_planes),
                nn.BatchNorm2D(in_planes)
            )

    def forward(self, x):
        if self.stride != 1:
            x = self.downsample(x)

        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.shortcut_conv is not None:
            residual = self.shortcut_conv(x)

        out += residual
        out = self.relu(out)

        return out

class Bottleneck(nn.Layer):
    """
    Channel-Time Learning (CTL) Block
    """
    expansion = 4

    def __init__(self, in_planes, planes, stride=1, shortcut_conv=None, pos_dim=7):
        super().__init__()

        self.conv1 = conv1x1x1(in_planes, planes)
        self.bn1 = nn.BatchNorm2D(planes)

        self.conv2 = conv3x3x3(planes, planes, pos_dim=pos_dim)
        self.bn2 = nn.BatchNorm2D(planes)

        self.conv3 = conv1x1x1(planes, planes * self.expansion)
        self.bn3 = nn.BatchNorm2D(planes * self.expansion)

        self.relu = nn.ReLU()

        self.shortcut_conv = shortcut_conv

        self.stride = stride

        if stride != 1:
            self.downsample = nn.Sequential(
                nn.Conv2D(in_planes, in_planes, kernel_size=2, stride=2, groups=in_planes),
                nn.BatchNorm2D(in_planes)
            )

    def forward(self, x):
        if self.stride != 1:
            x = self.downsample(x)

        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        if self.shortcut_conv is not None:
            residual = self.shortcut_conv(x)

        out += residual
        out = self.relu(out)

        return out

class ResNet(nn.Layer):
    def __init__(self,
                 block,
                 layers,
                 block_inplanes,
                 n_input_channels=3,
                 no_max_pool=False,
                 shortcut_type='B',
                 widen_factor=1.0,
                 dropout=0.2, 
                 freeze_bn=False, 
                 spatial_stride=[1,2,2,2], 
                 pos_dim=[64,32,16,8]):
        super().__init__()

        self.freeze_bn = freeze_bn
        block_inplanes = [int(x * widen_factor) for x in block_inplanes]

        self.in_planes = block_inplanes[0]
        self.no_max_pool = no_max_pool
        self.dropout = dropout

        self.conv1 = nn.Conv2D(n_input_channels,
                               self.in_planes,
                               kernel_size=5,
                               stride=2,
                               padding=2,
                               groups=1,
                               bias_attr=False)

        self.bn1 = nn.BatchNorm2D(self.in_planes)
        self.relu = nn.ReLU()
        self.maxpool = nn.MaxPool2D(kernel_size=3, stride=2, padding=1)

        self.layer1 = self._make_layer(block, block_inplanes[0], layers[0],
                                       shortcut_type, stride=spatial_stride[0], pos_dim=pos_dim[0])

        self.layer2 = self._make_layer(block,
                                       block_inplanes[1],
                                       layers[1],
                                       shortcut_type,
                                       stride=spatial_stride[1], pos_dim=pos_dim[1])

        self.layer3 = self._make_layer(block,
                                       block_inplanes[2],
                                       layers[2],
                                       shortcut_type,
                                       stride=spatial_stride[2], pos_dim=pos_dim[2])

        self.layer4 = self._make_layer(block,
                                       block_inplanes[3],
                                       layers[3],
                                       shortcut_type,
                                       stride=spatial_stride[3], pos_dim=pos_dim[3])


    def _downsample_basic_block(self, x, planes, stride):
        out = F.avg_pool2d(x, kernel_size=1, stride=stride)
        zero_pads = paddle.zeros([out.shape[0], planes - out.shape[1], out.shape[2], out.shape[3]])

        if isinstance(out, paddle.CUDAPlace):
            zero_pads = zero_pads.cuda()

        out = paddle.concat([out, zero_pads], axis=1)

        return out

    def _make_layer(self, block, planes, blocks, shortcut_type, stride=1, pos_dim=7):
        shortcut = None
        if self.in_planes != planes * block.expansion:
            shortcut = nn.Sequential(
                conv1x1x1(self.in_planes, planes * block.expansion, stride=1),
                nn.BatchNorm2D(planes * block.expansion)
            )

        layers = []
        layers.append(
            block(in_planes=self.in_planes,
                  planes=planes,
                  stride=stride, shortcut_conv=shortcut, pos_dim=pos_dim)
        )

        self.in_planes = planes * block.expansion

        for i in range(1, blocks):
            layers.append(block(self.in_planes, planes, pos_dim=pos_dim))

        return nn.Sequential(*layers)

    def forward(self, x):
        print('##################', x.shape)
        if len(x.shape) == 3:
            x = paddle.unsqueeze(x, axis=0)
        N, C, H, W = x.shape
        x = x.reshape([int(N/16), -1, H, W])

        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)

        if not self.no_max_pool:
            x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        return x

    def train(self, mode=True):
        freeze_bn = self.freeze_bn
        freeze_bn_affine = self.freeze_bn
        super(ResNet, self).train(mode)

        if freeze_bn:
            print("Freezing Mean/Var of BatchNorm2D.")
            for m in self.sublayers():
                if isinstance(m, nn.BatchNorm2D):
                    m.eval()

        if freeze_bn_affine:
            print("Freezing Weight/Bias of BatchNorm2D.")
            for m in self.sublayers():
                if isinstance(m, nn.BatchNorm2D):
                    m.weight.stop_gradient = True
                    m.bias.stop_gradient = True

def SqueezeTime_model(**kwargs):
    model = ResNet(Bottleneck, [2, 2, 2, 2], get_inplanes(), **kwargs)
    return model


@BACKBONES.register()
def SqueezeTime(pretrained=None, use_ssld=False, **kwargs):
    """
    Build SqueezeTime Model
    """

    model = SqueezeTime_model(widen_factor=0.5, dropout=0.5, n_input_channels=48, freeze_bn=False, spatial_stride=[1, 2, 2, 2], pos_dim=[64, 32, 16, 8])
    return  model

2:导入backbone:

paddlevideo/modeling/backbones/__init__.py

# Copyright (c) 2020  PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from .actbert import BertForMultiModalPreTraining
from .adds import ADDS_DepthNet
from .agcn import AGCN
from .asrf import ASRF
from .bmn import BMN
from .cfbi import CFBI
from .movinet import MoViNet
from .ms_tcn import MSTCN
from .resnet import ResNet
from .resnet_slowfast import ResNetSlowFast
from .resnet_slowfast_MRI import ResNetSlowFast_MRI
from .resnet_tsm import ResNetTSM
from .resnet_tsm_MRI import ResNetTSM_MRI
from .resnet_tsn_MRI import ResNetTSN_MRI
from .resnet_tweaks_tsm import ResNetTweaksTSM
from .resnet_tweaks_tsn import ResNetTweaksTSN
from .stgcn import STGCN
from .swin_transformer import SwinTransformer3D
from .transnetv2 import TransNetV2
from .vit import VisionTransformer
from .vit_tweaks import VisionTransformer_tweaks
from .ms_tcn import MSTCN
from .asrf import ASRF
from .resnet_tsn_MRI import ResNetTSN_MRI
from .resnet_tsm_MRI import ResNetTSM_MRI
from .resnet_slowfast_MRI import ResNetSlowFast_MRI
from .cfbi import CFBI
from .ctrgcn import CTRGCN
from .agcn2s import AGCN2s
from .movinet import MoViNet
from .resnet3d_slowonly import ResNet3dSlowOnly
from .toshift_vit import TokenShiftVisionTransformer
from .pptsm_mv2 import PPTSM_MobileNetV2
from .pptsm_mv3 import PPTSM_MobileNetV3
from .pptsm_v2 import PPTSM_v2
from .yowo import YOWO
from .squeezetime import SqueezeTime

__all__ = [
    'ResNet', 'ResNetTSM', 'ResNetTweaksTSM', 'ResNetSlowFast', 'BMN',
    'ResNetTweaksTSN', 'VisionTransformer', 'STGCN', 'AGCN', 'TransNetV2',
    'ADDS_DepthNet', 'VisionTransformer_tweaks', 'BertForMultiModalPreTraining',
    'ResNetTSN_MRI', 'ResNetTSM_MRI', 'ResNetSlowFast_MRI', 'CFBI', 'MSTCN',
    'ASRF', 'MoViNet', 'SwinTransformer3D', 'CTRGCN',
    'TokenShiftVisionTransformer', 'AGCN2s', 'PPTSM_MobileNetV2',
    'PPTSM_MobileNetV3', 'PPTSM_v2', 'ResNet3dSlowOnly', 'YOWO', 'SqueezeTime'
]

3:添加head:

paddlevideo/modeling/heads/i2d_head.py

# Copyright (c) 2020  PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import paddle
import paddle.nn as nn
from paddle import ParamAttr

from ..registry import HEADS
from ..weight_init import weight_init_
from .base import BaseHead


@HEADS.register()
class I2DHead(BaseHead):
    """Classification head for I2D.

    Args:
        num_classes (int): Number of classes to be classified.
        in_channels (int): Number of channels in input feature.
        loss_cls (dict): Config for building loss.
            Default: dict(name='CrossEntropyLoss')
        spatial_type (str): Pooling type in spatial dimension. Default: 'avg'.
        drop_ratio (float): Probability of dropout layer. Default: 0.5.
        std (float): Std value for Initiation. Default: 0.01.
        kwargs (dict, optional): Any keyword argument to be used to initialize
            the head.
    """
    def __init__(self,
                 num_classes,
                 in_channels,
                 loss_cfg=dict(name='CrossEntropyLoss'),
                 spatial_type='avg',
                 drop_ratio=0.5,
                 std=0.01,
                 **kwargs):

        super().__init__(num_classes, in_channels, loss_cfg, **kwargs)

        self.spatial_type = spatial_type
        self.dropout_ratio = drop_ratio
        self.init_std = std
                     
        if self.dropout_ratio != 0:
            self.dropout = nn.Dropout(p=self.dropout_ratio)
        else:
            self.dropout = None
            
        self.fc_cls = nn.Linear(self.in_channels, self.num_classes)

        if self.spatial_type == 'avg':
            self.avg_pool = nn.AdaptiveAvgPool2D((1, 1))
        else:
            self.avg_pool = nn.AdaptiveMaxPool2D((1,1))


    def forward(self, x, num_segs = None):
        """Defines the computation performed at every call.

        Args:
            x (Tensor): The input data.

        Returns:
            Tensor: The classification scores for input samples.
        """   
        
        # [N, in_channels, 4, 7, 7]
        if self.avg_pool is not None:
            x = self.avg_pool(x)
            
        # [N, in_channels, 1, 1, 1]
        if self.dropout is not None:
            x = self.dropout(x)
            
        # [N, in_channels, 1, 1, 1]
        x = paddle.reshape(x, [x.shape[0], -1])
        
        # [N, in_channels]
        cls_score = self.fc_cls(x)
        
        # [N, num_classes]
        return cls_score


    # def forward_new(self, x, num_segs = None):
    #     """Defines the computation performed at every call.

    #     Args:
    #         x (Tensor): The input data.

    #     Returns:
    #         Tensor: The classification scores for input samples.
    #     """      
        
    #     # [N, in_channels, 4, 7, 7]
    #     if self.avg_pool is not None:
    #         x = self.avg_pool(x)
            
    #     # [N, in_channels, 1, 1, 1]
    #     if self.dropout is not None:
    #         x = self.dropout(x)
            
    #     # [N, in_channels, 1, 1, 1]
    #     x = paddle.reshape(x, [x.shape[0], -1])
        
    #     # [N, in_channels]
    #     cls_score = self.fc_cls(x)
        
    #     # [N, num_classes]
    #     return cls_score



4:导入head:

paddlevideo/modeling/heads/__init__.py

# Copyright (c) 2020  PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from .adds_head import AddsHead
from .asrf_head import ASRFHead
from .attention_lstm_head import AttentionLstmHead, ActionAttentionLstmHead
from .base import BaseHead
from .bbox_head import BBoxHeadAVA
from .cfbi_head import CollaborativeEnsemblerMS
from .i3d_head import I3DHead
from .movinet_head import MoViNetHead
from .ms_tcn_head import MSTCNHead
from .pptimesformer_head import ppTimeSformerHead
from .pptsm_head import ppTSMHead
from .pptsn_head import ppTSNHead
from .roi_head import AVARoIHead
from .single_straight3d import SingleRoIExtractor3D
from .slowfast_head import SlowFastHead
from .stgcn_head import STGCNHead
from .timesformer_head import TimeSformerHead
from .transnetv2_head import TransNetV2Head
from .tsm_head import TSMHead
from .tsn_head import TSNHead
from .ms_tcn_head import MSTCNHead
from .asrf_head import ASRFHead
from .ctrgcn_head import CTRGCNHead
from .movinet_head import MoViNetHead
from .agcn2s_head import AGCN2sHead
from .token_shift_head import TokenShiftHead
from .i2d_head import I2DHead

__all__ = [
    'BaseHead', 'TSNHead', 'TSMHead', 'ppTSMHead', 'ppTSNHead', 'SlowFastHead',
    'AttentionLstmHead', 'TimeSformerHead', 'STGCNHead', 'TransNetV2Head',
    'I3DHead', 'SingleRoIExtractor3D', 'AVARoIHead', 'BBoxHeadAVA', 'AddsHead',
    'ppTimeSformerHead', 'CollaborativeEnsemblerMS', 'MSTCNHead', 'ASRFHead',
    'MoViNetHead', 'CTRGCNHead', 'TokenShiftHead', 'ActionAttentionLstmHead',
    'AGCN2sHead', 'I2DHead'
]

5:训练配置文件:

configs/recognition/pptsm/v2/md_ppsqt_16frames_uniform.yaml

MODEL: #MODEL field
    framework: "Recognizer2D" #Mandatory, indicate the type of network, associate to the 'paddlevideo/modeling/framework/' .
    backbone: #Mandatory, indicate the type of backbone, associate to the 'paddlevideo/modeling/backbones/' .
        name: "SqueezeTime" #Mandatory, The name of backbone.
    head:
        name: "I2DHead" #Mandatory, indicate the type of head, associate to the 'paddlevideo/modeling/heads'
        #pretrained: "" #Optional, pretrained model path.
        num_classes: 2
        in_channels: 1024

DATASET: #DATASET field
    batch_size: 16  #Mandatory, bacth size
    num_workers: 4 #Mandatory, the number of subprocess on each GPU.
    train:
        format: "FrameDataset" #Mandatory, indicate the type of dataset, associate to the 'paddlevidel/loader/dateset'
        data_prefix: "/home/mnt/sdd/Data/data_fights/rawframes" #Mandatory, train data root path
        file_path: "/home/mnt/sdd/Data/data_fights/train_list.txt" #Mandatory, train data index file path
        suffix: 'img_{:06}.jpg'
    valid:
        format: "FrameDataset" #Mandatory, indicate the type of dataset, associate to the 'paddlevidel/loader/dateset'
        data_prefix: "/home/mnt/sdd/Data/data_fights/rawframes" #Mandatory, valid data root path
        file_path: "/home/mnt/sdd/Data/data_fights/test_list.txt" #Mandatory, valid data index file path
        suffix: 'img_{:06}.jpg'
    test:
        format: "FrameDataset" #Mandatory, indicate the type of dataset, associate to the 'paddlevidel/loader/dateset'
        data_prefix: "/home/mnt/sdd/Data/data_fights/rawframes" #Mandatory, valid data root path
        file_path: "/home/mnt/sdd/Data/data_fights/test_list.txt" #Mandatory, valid data index file path
        suffix: 'img_{:06}.jpg'

PIPELINE: #PIPELINE field
    train: #Mandotary, indicate the pipeline to deal with the training data, associate to the 'paddlevideo/loader/pipelines/'
        decode:
            name: "FrameDecoder"
        sample:
            name: "Sampler"
            num_seg: 16
            seg_len: 1
            valid_mode: False
        transform: #Mandotary, image transfrom operator
            - Scale:
                short_size: 256
            - MultiScaleCrop:
                target_size: 256
            - RandomCrop:
                target_size: 224
            - RandomFlip:
            - Image2Array:
            - Normalization:
                mean: [0.485, 0.456, 0.406]
                std: [0.229, 0.224, 0.225]
    valid: #Mandatory, indicate the pipeline to deal with the validing data. associate to the 'paddlevideo/loader/pipelines/'
        decode:
            name: "FrameDecoder"
        sample:
            name: "Sampler"
            num_seg: 16
            seg_len: 1
            valid_mode: True
        transform:
            - Scale:
                short_size: 256
            - CenterCrop:
                target_size: 224
            - Image2Array:
            - Normalization:
                mean: [0.485, 0.456, 0.406]
                std: [0.229, 0.224, 0.225]
    test:  #Mandatory, indicate the pipeline to deal with the validing data. associate to the 'paddlevideo/loader/pipelines/'
        decode:
            name: "FrameDecoder"
        sample:
            name: "Sampler"
            num_seg: 16
            seg_len: 1
            valid_mode: True
        transform:
            - Scale:
                short_size: 256
            - CenterCrop:
                target_size: 224
            - Image2Array:
            - Normalization:
                mean: [0.485, 0.456, 0.406]
                std: [0.229, 0.224, 0.225]

OPTIMIZER: #OPTIMIZER field
  name: 'Momentum'
  momentum: 0.9
  learning_rate:
    iter_step: True
    name: 'CustomWarmupCosineDecay'
    max_epoch: 120
    warmup_epochs: 10
    warmup_start_lr: 0.005
    cosine_base_lr: 0.01
  weight_decay:
    name: 'L2'
    value: 1e-4
  use_nesterov: True

MIX:
    name: "Mixup"
    alpha: 0.2


METRIC:
    name: 'CenterCropMetric'

INFERENCE:
    name: 'ppSQT_Inference_helper'
    num_seg: 16
    target_size: 224

model_name: "ppSQT"
log_interval: 10 #Optional, the interal of logger, default:10
epochs: 120  #Mandatory, total epoch
log_level: "INFO" #Optional, the logger level. default: "INFO"

6:训练:

# multi-gpu-st
export CUDA_VISIBLE_DEVICES=0,1
python -B -m paddle.distributed.launch --gpus="0,1"  --log_dir=./log/log_sqt_frame_16  main.py  --validate -c configs/recognition/pptsm/v2/ppsqt_lcnet_md_16frames_uniform.yaml

7:结果:精度比ppTSM-v2低8个点左右。有可能是没有预训练权重的问题。 

本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如若转载,请注明出处:http://www.mfbz.cn/a/786448.html

如若内容造成侵权/违法违规/事实不符,请联系我们进行投诉反馈qq邮箱809451989@qq.com,一经查实,立即删除!

相关文章

[数仓]七、离线数仓(PrestoKylin即席查询)

第1章 Presto 1.1 Presto简介 1.1.1 Presto概念 1.1.2 Presto架构 1.1.4 Presto、Impala性能比较 Presto、Impala性能比较_presto和impala对比-CSDN博客 测试结论:Impala性能稍领先于Presto,但是Presto在数据源支持上非常丰富,包括Hive、图数据库、传统关系型数据库、Re…

Codeforces Round 956 F. array-value 【01Trie查询异或最小值】

题意 给定一个非负整数数组 a a a 对每个长度至少为 2 2 2 的子数组&#xff0c;定义其权值为&#xff1a;子数组内两两异或值最小值 即 b ⊂ a [ l , r ] , w ( b ) min ⁡ l ≤ i < j ≤ r { a i ⨁ a j } b \subset a[l, r], \quad w(b) \min_{l \leq i < j \le…

谷歌账号被停用怎么办?立刻申诉!申诉流程和经验、中英文申诉信模板

很多鞥有这两年新注册的Google账号或者购买的谷歌账号&#xff0c;在使用时可能都遇到过被停用的情况。极端的还有刚注册号&#xff0c;反手就被谷歌 停用了&#xff0c;或者被连续停用。 今天我们就来聊一聊&#xff0c;谷歌账号为什么会被停用&#xff0c;以及谷歌账号被停用…

走拼箱货必看海运拼箱的实用技巧

在国际海运运输中&#xff0c;海运拼箱适用于货物数量较少或体积不足以填满整个集装箱的情况。 海运拼箱货物通常由物流公司或货代进行组织和管理。多个货主的货物通过合理拼装&#xff0c;使集装箱空间得到充分利用。 那么&#xff0c;在海运拼箱和整柜有哪些不同&#xff0c…

淘宝商品历史价格查询(免费)

当前资料来源于网络&#xff0c;禁止用于商用&#xff0c;仅限于学习。 淘宝联盟里面就可以看到历史价格 并且没有加密 淘宝商品历史价格查询可以通过以下步骤进行&#xff1a; 先下载后&#xff0c;登录app注册账户 打开淘宝网站或淘宝手机App。在搜索框中输入你想要查询的商…

Qt 线程 QThread类详解

Qt 线程中QThread的使用 在进行桌面应用程序开发的时候&#xff0c; 假设应用程序在某些情况下需要处理比较复杂的逻辑&#xff0c; 如果只有一个线程去处理&#xff0c;就会导致窗口卡顿&#xff0c;无法处理用户的相关操作。这种情况下就需要使用多线程&#xff0c;其中一个…

亚马逊云科技EC2简明教程

&#x1f4a1; 完全适用于新手操作的Amazon EC2引导教程 简述 在亚马逊云科技中&#xff0c;存在多种计算服务&#xff0c;在此&#xff0c;我们将会着重讨论Amazon EC2(以下简称EC2)&#xff0c;EC2作为亚马逊云科技的明星产品、核心产品&#xff0c;是大多数开发者和企业用…

基于JAVA+SpringBoot+Vue的自动阅卷分析系统

✌全网粉丝20W,csdn特邀作者、博客专家、CSDN新星计划导师、java领域优质创作者,博客之星、掘金/华为云/阿里云/InfoQ等平台优质作者、专注于Java技术领域和毕业项目实战✌ &#x1f345;文末获取项目下载方式&#x1f345; 一、项目背景介绍&#xff1a; 在当前教育评估体系中…

网络安全高级工具软件100套

1、 Nessus&#xff1a;最好的UNIX漏洞扫描工具 Nessus 是最好的免费网络漏洞扫描器&#xff0c;它可以运行于几乎所有的UNIX平台之上。它不止永久升级&#xff0c;还免费提供多达11000种插件&#xff08;但需要注册并接受EULA-acceptance–终端用户授权协议&#xff09;。 它…

【面试八股总结】面向对象三大特性、虚函数、纯虚函数、虚继承

参考资料&#xff1a;阿秀 一、面向对象三大特性 封装&#xff1a;将数据和代码捆绑在一起&#xff0c;避免外界干扰和不确定性访问 继承&#xff1a;让某种类型对象获得另一个类型对象的属性和方法 多态&#xff1a;同一种事务表现出不同事务的能力&#xff0c;即&#xf…

算法小练之 位运算基础

前言 今天正式走入&#xff0c;位运算这个章节&#xff0c;关于这一部分我会先介绍几个重要的知识点&#xff0c;然后再根据几个力扣上的题来讲解。 了解6种位操作 总所周知&#xff0c;变量在计算机中都是二进制存储的&#xff0c;比如一个变量int a 1&#xff1b; 它的存…

Halcon 模糊圆边的找圆案例

Halcon 模糊圆边的找圆案例 基本思路 1.将图像转成灰度图像 2.再观察要找到的区域的灰度值变化&#xff0c;找到前景与背景的具体数值。 3.根据找到的前景与背景的具体数值&#xff0c;增强图像对比度。&#xff08;使图像变成黑白图片&#xff09; 4.使用灰度直图工具进行阈值…

gRPC 接口测试最佳实践

gRPC 是由谷歌开发的现代开源高性能 RPC 远程过程调用框架&#xff0c;由于采用了HTTP/2 作为底层传输协议&#xff0c;它特别适用于高性能应用场景。gRPC 在视频流传输等大规模数据传输场景以及密集的服务间通讯的微服务架构中表现出色。 数据交换使用轻量级的 Protobuf 序列…

18.按键消抖模块设计(使用状态机,独热码编码)

&#xff08;1&#xff09;设计意义&#xff1a;按键消抖主要针对的时机械弹性开关&#xff0c;当机械触点断开、闭合时&#xff0c;由于机械触点的弹性作用&#xff0c;一个按键开关在闭合时不会马上稳定地接通&#xff0c;在断开时也不会一下子就断开。因而在闭合以及断开的瞬…

Jmeter-接口测试-GET请求

简介 Jmeter 是 apache 公司基于 java 开发的一款开源压力测试工具&#xff0c;体积小&#xff0c;功能全&#xff0c;使用方便&#xff0c;是一个比较轻量级的测试工具&#xff0c;使用起来非常简 单。因为 jmeter 是 java 开发的&#xff0c;所以运行的时候必须先要安装 jdk…

数据结构——Trie

题目&#xff1a; 维护一个字符串集合&#xff0c;支持两种操作&#xff1a; I x 向集合中插入一个字符串 x&#x1d465;&#xff1b;Q x 询问一个字符串在集合中出现了多少次。 共有 N&#x1d441; 个操作&#xff0c;所有输入的字符串总长度不超过 10^5&#xff0c;字符串仅…

(HAL)stm32f407+freertos通过usb驱动移远4G模块-EC600U

概述 本篇文章主要介绍: 如何使用STM32CubeMX创建stm32F407+freertos+usb host的基础工程。USB-HOST-CDC驱动运行过程。如何根据4G模块的具体信息修改usb相关代码。MCU如何通过usb与4G模块通信,收发数据。调试过程中遇到的问题以及解决办法。 整个过程中在网上搜罗了很多参考…

Test-Time Adaptation via Conjugate Pseudo-labels--论文笔记

论文笔记 资料 1.代码地址 https://github.com/locuslab/tta_conjugate 2.论文地址 https://arxiv.org/abs/2207.09640 3.数据集地址 论文摘要的翻译 测试时间适应(TTA)指的是使神经网络适应分布变化&#xff0c;在测试时间仅访问来自新领域的未标记测试样本。以前的TT…

STM32(二):STM32工作原理

这里写目录标题 0、参考1、寄存器和存储器基本概念&#xff08;1&#xff09;基本概念&#xff08;2&#xff09;主要区别&#xff08;3&#xff09;联系&#xff08;4&#xff09;实际应用中的案例&#xff08;5&#xff09;总结&#xff08;6&#xff09;一些名词解释 2、STM…

实时监测、智能预警:电缆光纤测温系统|原理、应用与前景

实时监测、智能预警&#xff1a;电缆光纤测温系统|原理、应用与前景 电缆光纤测温系统&#xff0c;作为现代电力系统中不可或缺的一部分&#xff0c;以其独特的优势在电缆安全监控领域发挥着日益重要的作用。该系统利用光纤传感技术&#xff0c;实时监测电缆的运行温度&#x…