H266/VVC 变换编码中大尺寸变换块高频系数置零技术

大尺寸变换块高频系数置零

近年来视频技术有了飞速的变化，视频的分辨率从 1080P 过渡到 4K，并逐渐向发展 8K。为了适应日益增长的视频分辨率，新的编码技术采用了更大尺寸的变换块来提高编码效率，最大变换块大小变成 64x64。变换块的增大对高分辨率的视频图像非常有用，但由此引入的计算复杂度、存储带宽及空间的要求也不可忽略。同时，在大变换块的计算完成后，高频系数都基本趋于 0，在解码中对图像质量的影响不大。为减小计算的复杂度，VVC 针对 64 宽高的变换块进行了高频置 0 的操作，仅保留低频的系数部分，可大大减小变换操作的计算复杂度和存储带宽及空间。
在 VVC 中，最大的变换块被限制为 64x64，由于存在 MTT 分割，可能存在矩形的变换块，因此对于在变换块中高频置 0 条件为：
- 当变换块的宽高均等于 64，或者宽或高等于 64 时，进行高频置 0 操作，仅保留低频系数部分。
如图下图所示，变换块大小为 MxN，当 M、N 均等于 64 时，保留左方 32 列的变换系数，右方 32 列变换系数置 0；当变换块为矩形块，M>N 且 M 等于 64 时，保留左方 32 列的变换系数，右方 32 列变换系数置 0；当 M<N 且 N 等于 64 时，保留上方 32 行变换系数，下方 32 行变换系数置 0。
当 transform skip 模式使用时，所有的变换块均不执行高频置 0 操作，包括宽或高等于 64 的情况。
用户可在 sps 语法中修改最大变换块的大小，这样编码器可根据自身资源情况灵活选择 32 或 64 的最大变换块。
高频变换系数置 0 的操作大大减少了变换块的计算复杂度、存储带宽及空间的使用，同时带来的编码损失影响可忽略。
在 2020 年 1 月的 JVET-Q2002[Algorithm description for Versatile Video Coding and Test Model 8 (VTM 8)] 提案中对 MTS 技术进行了总结描述。
在 VVenC 编码器中 TrQuant.cpp 文件中transformNxN函数用来对残差数据进行变换的核心函数。

void TrQuant::transformNxN(TransformUnit &tu, const ComponentID compID, const QpParam &cQP, TCoeff &uiAbsSum, const Ctx &ctx, const bool loadTr)
{
        CodingStructure &cs = *tu.cs;
  const CompArea& rect      = tu.blocks[compID];
  const uint32_t uiWidth        = rect.width;
  const uint32_t uiHeight       = rect.height;

  const CPelBuf resiBuf     = cs.getResiBuf(rect);

  if( tu.noResidual )
  {
    uiAbsSum = 0;
    TU::setCbfAtDepth( tu, compID, tu.depth, uiAbsSum > 0 );
    return;
  }
  if (tu.cu->bdpcmM[toChannelType(compID)])
  {
    tu.mtsIdx[compID] = MTS_SKIP;
  }

  uiAbsSum = 0;
  CHECK( cs.sps->getMaxTbSize() < uiWidth, "Unsupported transformation size" );

  CoeffBuf tempCoeff(loadTr ? m_mtsCoeffs[tu.mtsIdx[compID]] : m_plTempCoeff, rect);
  if (!loadTr)
  {
    DTRACE_PEL_BUF( D_RESIDUALS, resiBuf, tu, tu.cu->predMode, compID );
    if (tu.mtsIdx[compID] == MTS_SKIP)
    {
      xTransformSkip(tu, compID, resiBuf, tempCoeff.buf);
    }
    else
    {
      xT(tu, compID, resiBuf, tempCoeff, uiWidth, uiHeight);
    }
  }
  if (cs.sps->LFNST)
  {
    xFwdLfnst(tu, compID, loadTr);
  }
  DTRACE_COEFF_BUF( D_TCOEFF, tempCoeff, tu, tu.cu->predMode, compID );

  xQuant( tu, compID, tempCoeff, uiAbsSum, cQP, ctx );

  DTRACE_COEFF_BUF( D_TCOEFF, tu.getCoeffs( compID ), tu, tu.cu->predMode, compID );

  // set coded block flag (CBF)
  TU::setCbfAtDepth (tu, compID, tu.depth, uiAbsSum > 0);
}

在 VVenC 编码器中 TrQuant.cpp 文件中 xT函数用是进行主变换的函数，里面有关于大尺寸块高频置零的操作逻辑。

void TrQuant::xT( const TransformUnit& tu, const ComponentID compID, const CPelBuf& resi, CoeffBuf& dstCoeff, const int width, const int height )
{
  PROFILER_SCOPE_AND_STAGE( 1, _TPROF, P_TRAFO );

  const unsigned maxLog2TrDynamicRange  = tu.cs->sps->getMaxLog2TrDynamicRange( toChannelType( compID ) );
  const unsigned bitDepth               = tu.cs->sps->bitDepths[               toChannelType( compID ) ];
  const int      TRANSFORM_MATRIX_SHIFT = g_transformMatrixShift[TRANSFORM_FORWARD];
  const uint32_t transformWidthIndex    = Log2(width ) - 1;  // nLog2WidthMinus1, since transform start from 2-point
  const uint32_t transformHeightIndex   = Log2(height) - 1;  // nLog2HeightMinus1, since transform start from 2-point

  int trTypeHor = DCT2;
  int trTypeVer = DCT2;

  xSetTrTypes( tu, compID, width, height, trTypeHor, trTypeVer );

  int  skipWidth  = ( trTypeHor != DCT2 && width  == 32 ) ? 16 : width  > JVET_C0024_ZERO_OUT_TH ? width  - JVET_C0024_ZERO_OUT_TH : 0;
  int  skipHeight = ( trTypeVer != DCT2 && height == 32 ) ? 16 : height > JVET_C0024_ZERO_OUT_TH ? height - JVET_C0024_ZERO_OUT_TH : 0;

  if( tu.cu->lfnstIdx )
  {
    if ((width == 4 && height > 4) || (width > 4 && height == 4))
    {
      skipWidth  = width - 4;
      skipHeight = height - 4;
    }
    else if ((width >= 8 && height >= 8))
    {
      skipWidth  = width - 8;
      skipHeight = height - 8;
    }
  }

  TCoeff* block = m_blk;
  TCoeff* tmp   = m_tmp;

  const Pel* resiBuf    = resi.buf;
  const int  resiStride = resi.stride;

#if ENABLE_SIMD_TRAFO
  if( width & 3 )
#endif
  {
    for( int y = 0; y < height; y++ )
    {
      for( int x = 0; x < width; x++ )
      {
        block[( y * width ) + x] = resiBuf[( y * resiStride ) + x];
      }
    }
  }
#if ENABLE_SIMD_TRAFO
  else if( width & 7 )
  {
    g_tCoeffOps.cpyCoeff4( resiBuf, resiStride, block, width, height );
  }
  else
  {
    g_tCoeffOps.cpyCoeff8( resiBuf, resiStride, block, width, height );
  }
#endif //ENABLE_SIMD_TRAFO

  if (width > 1 && height > 1)
  {
    const int shift_1st = ((Log2(width )) + bitDepth + TRANSFORM_MATRIX_SHIFT) - maxLog2TrDynamicRange;
    const int shift_2nd =  (Log2(height))            + TRANSFORM_MATRIX_SHIFT;
    CHECK( shift_1st < 0, "Negative shift" );
    CHECK( shift_2nd < 0, "Negative shift" );
    fastFwdTrans[trTypeHor][transformWidthIndex](block, tmp, shift_1st, height, 0, skipWidth);
    fastFwdTrans[trTypeVer][transformHeightIndex](tmp, dstCoeff.buf, shift_2nd, width, skipWidth, skipHeight);
  }
  else if (height == 1)   // 1-D horizontal transform
  {
    const int shift = ((Log2(width )) + bitDepth + TRANSFORM_MATRIX_SHIFT) - maxLog2TrDynamicRange;
    CHECK( shift < 0, "Negative shift" );
    fastFwdTrans[trTypeHor][transformWidthIndex](block, dstCoeff.buf, shift, 1, 0, skipWidth);
  }
  else   // if (iWidth == 1) //1-D vertical transform
  {
    int shift = ((floorLog2(height)) + bitDepth + TRANSFORM_MATRIX_SHIFT) - maxLog2TrDynamicRange;
    CHECK(shift < 0, "Negative shift");
    CHECKD((transformHeightIndex < 0), "There is a problem with the height.");
    fastFwdTrans[trTypeVer][transformHeightIndex](block, dstCoeff.buf, shift, 1, 0, skipHeight);
  }
}