Combined inter and intra prediction (CIIP)

Megre模式是HEVC编码标准引入的一项帧间预测编码技术，在VVC编码标准中对Merge模式进行了扩展。包括对候选列表的扩展，增加了HMVP和 Pair-wise average candidates。编码工具新增CIIP (combined inter-intra prediction)、MMVD(merge mode with MV difference)、GEO(geometric partitioning mode)。VVC新提出的affine也存在merge模式，不过affine merge的候选推导和上面几个编码工具是不一样的。本文主要讨论CIIP技术。

1 原理

在VVC中，当一个CU在merge模式下编码时，如果CU包含至少64个亮度样本（即CU宽度乘以CU高度等于或大于64），并且如果CU宽度和CU高度都小于 128 ，可以选择使用CIIP模式，当前CU是否使用CIIP会被写入码流中。

顾名思义，CIIP 就是将将帧间预测信号与帧内预测信号相结合。 CIIP模式下的帧间预测信号\(P_{inter}\)是使用与Nomal Megre模式（不是MMVD和Affine merge）相同的帧间预测过程导出的；并且帧内预测信号\(P_{intra}\)是按照Planar模式的常规帧内预测过程导出的。然后，使用加权平均来组合帧内和帧间预测信号，其中根据上方和左侧相邻块的编码模式（如图1所示）计算权重值，如下所示：

如果上方相邻CU可用且预测模式为帧内预测，则将 isIntraTop 设置为 1，否则将 isIntraTop 设置为 0；
如果左侧相邻CU可用且预测模式为帧内预测，则将 isIntraLeft 设置为 1，否则将 isIntraLeft 设置为 0；
如果 (isIntraLeft + isIntraTop) = 2，则 wt = 3；
否则，如果 (isIntraLeft + isIntraTop) = 1，则 wt = 2；
否则，将 wt = 1。

CIIP加权预测公式如下：

\(P_{CIIP}=((4-wt)*P_{inter}+wt*P_{intra}+2)>>2\)

一般来说，Nomal merge有最多有6个候选，CIIP也最多有6个候选，MMVD最多有64个候选，Affine merge最多有5个候选，如果这些merge模式每个都做RDO的话，时间复杂度肯定极高，所以一般编码器的解决方案是和Intra预测类似，先使用SATD cost进行一遍粗选，选择几个最优的候选最后才做RDO，这样可以节省很多的时间。CIIP的粗选过程可以放在Nomal merge粗选的后面，只选择其中几个比较好的候选来check CIIP的候选，也可以节省一定的时间。

值得注意的是，在编码标准中，CIIP的merge是不可以判断为skip模式的，也就是说选择CIIP模式，残差系数一定不为0，遇到CIIP模式且cbf = 0可以根据需求决定是否要丢掉这个CIIP候选。

2 VTM代码

2.1 CIIP粗选

if (isIntrainterEnabled)
{
  // prepare for Intra bits calculation
  pu.ciipFlag = true;  // 先将ciip flage设置成true，用来计算bits

  // save the to-be-tested merge candidates
  uint32_t CiipMergeCand[NUM_MRG_SATD_CAND];
  for (uint32_t mergeCnt = 0; mergeCnt < std::min(NUM_MRG_SATD_CAND, (const int)mergeCtx.numValidMergeCand); mergeCnt++)
  {
    CiipMergeCand[mergeCnt] = RdModeList[mergeCnt].mergeCand;
  }
  for (uint32_t mergeCnt = 0; mergeCnt < std::min(std::min(NUM_MRG_SATD_CAND, (const int)mergeCtx.numValidMergeCand), 4); mergeCnt++)
  {
    uint32_t mergeCand = CiipMergeCand[mergeCnt];
    acMergeTmpBuffer[mergeCand] = m_acMergeTmpBuffer[mergeCand].getBuf(localUnitArea); // 复用nomal merge的插值buffer

    // estimate merge bits
    mergeCtx.setMergeInfo(pu, mergeCand); 

    // first round
    pu.intraDir[0] = PLANAR_IDX;
    uint32_t intraCnt = 0;
    // generate intrainter Y prediction
    if (mergeCnt == 0)
    {
      m_pcIntraSearch->initIntraPatternChType(*pu.cu, pu.Y()); // 获取参考像素
      m_pcIntraSearch->predIntraAng(COMPONENT_Y, pu.cs->getPredBuf(pu).Y(), pu); // Planar预测
      m_pcIntraSearch->switchBuffer(pu, COMPONENT_Y, pu.cs->getPredBuf(pu).Y(), m_pcIntraSearch->getPredictorPtr2(COMPONENT_Y, intraCnt));
    }
    pu.cs->getPredBuf(pu).copyFrom(acMergeTmpBuffer[mergeCand]);
    if (pu.cs->slice->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag())
    {
      pu.cs->getPredBuf(pu).Y().rspSignal(m_pcReshape->getFwdLUT());
    }
    m_pcIntraSearch->geneWeightedPred(COMPONENT_Y, pu.cs->getPredBuf(pu).Y(), pu, m_pcIntraSearch->getPredictorPtr2(COMPONENT_Y, intraCnt)); //加权组合

    // calculate cost
    if (pu.cs->slice->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag())
    {
      pu.cs->getPredBuf(pu).Y().rspSignal(m_pcReshape->getInvLUT());
    }
    distParam.cur = pu.cs->getPredBuf(pu).Y();
    Distortion sadValue = distParam.distFunc(distParam); // 计算 dist
    if (pu.cs->slice->getLmcsEnabledFlag() && m_pcReshape->getCTUFlag())
    {
      pu.cs->getPredBuf(pu).Y().rspSignal(m_pcReshape->getFwdLUT());
    }
    m_CABACEstimator->getCtx() = ctxStart;
    pu.regularMergeFlag = false;
    uint64_t fracBits = m_pcInterSearch->xCalcPuMeBits(pu); //计算 bits
    double cost = (double)sadValue + (double)fracBits * sqrtLambdaForFirstPassIntra; // 计算 cost

    insertPos = -1;
    updateCandList(ModeInfo(mergeCand, false, false, true), cost, RdModeList, candCostList, uiNumMrgSATDCand, &insertPos); // 更新候选排序列表
    if (insertPos != -1)
    {
      for (int i = int(RdModeList.size()) - 1; i > insertPos; i--)
      {
        swap(acMergeTempBuffer[i - 1], acMergeTempBuffer[i]);
      }
      swap(singleMergeTempBuffer, acMergeTempBuffer[insertPos]);
    }
  }
  pu.ciipFlag = false;
}

2.2 组合预测

这一块的代码涉及到像素的加权组合，因此可以使用SIMD来实现，VTM这里是使用C++实现的。

void IntraPrediction::geneWeightedPred(const ComponentID compId, PelBuf &pred, const PredictionUnit &pu, Pel *srcBuf)
{
  const int width = pred.width;
  CHECK(width == 2, "Width of 2 is not supported");
  const int height    = pred.height;
  const int srcStride = width;
  const int dstStride = pred.stride;

  Pel *dstBuf = pred.buf;
  int wIntra, wMerge;

  const Position posBL = pu.Y().bottomLeft();
  const Position posTR = pu.Y().topRight();
  const PredictionUnit *neigh0 = pu.cs->getPURestricted(posBL.offset(-1, 0), pu, CHANNEL_TYPE_LUMA);
  const PredictionUnit *neigh1 = pu.cs->getPURestricted(posTR.offset(0, -1), pu, CHANNEL_TYPE_LUMA);
  bool isNeigh0Intra = neigh0 && (CU::isIntra(*neigh0->cu));
  bool isNeigh1Intra = neigh1 && (CU::isIntra(*neigh1->cu));

  if (isNeigh0Intra && isNeigh1Intra)
  {
    wIntra = 3; wMerge = 1;
  }
  else
  {
    if (!isNeigh0Intra && !isNeigh1Intra)
    {
      wIntra = 1; wMerge = 3;
    }
    else
    {
      wIntra = 2; wMerge = 2;
    }
  }

  for (int y = 0; y < height; y++)
  {
    for (int x = 0; x < width; x++)
    {
      dstBuf[y*dstStride + x] = (wMerge * dstBuf[y*dstStride + x] + wIntra * srcBuf[y*srcStride + x] + 2) >> 2;
    }
  }
}

2.3 encoder

首选判断是否ciipAvailable，CIIP和GEO都不是regular merge，如果有一个是true，则需要编是否为regular merge，如果不是，则还需要编一个flag来判断是CIIP还是GEO。

const bool ciipAvailable = pu.cs->sps->getUseCiip() && !pu.cu->skip && pu.cu->lwidth() < MAX_CU_SIZE && pu.cu->lheight() < MAX_CU_SIZE && pu.cu->lwidth() * pu.cu->lheight() >= 64;
  const bool geoAvailable = pu.cu->cs->slice->getSPS()->getUseGeo() && pu.cu->cs->slice->isInterB() &&
    pu.cs->sps->getMaxNumGeoCand() > 1
                                                                    && pu.cu->lwidth() >= GEO_MIN_CU_SIZE && pu.cu->lheight() >= GEO_MIN_CU_SIZE
                                                                    && pu.cu->lwidth() <= GEO_MAX_CU_SIZE && pu.cu->lheight() <= GEO_MAX_CU_SIZE
                                                                    && pu.cu->lwidth() < 8 * pu.cu->lheight() && pu.cu->lheight() < 8 * pu.cu->lwidth();
  if (geoAvailable || ciipAvailable)
  {
    m_BinEncoder.encodeBin(pu.regularMergeFlag, Ctx::RegularMergeFlag(pu.cu->skip ? 0 : 1));
  }
  if (pu.regularMergeFlag)
  {
    if (pu.cs->sps->getUseMMVD())
    {
      m_BinEncoder.encodeBin(pu.mmvdMergeFlag, Ctx::MmvdFlag(0));
      DTRACE(g_trace_ctx, D_SYNTAX, "mmvd_merge_flag() mmvd_merge=%d pos=(%d,%d) size=%dx%d\n", pu.mmvdMergeFlag ? 1 : 0, pu.lumaPos().x, pu.lumaPos().y, pu.lumaSize().width, pu.lumaSize().height);
    }
    if (pu.mmvdMergeFlag || pu.cu->mmvdSkip)
    {
      mmvd_merge_idx(pu);
    }
    else
    {
      merge_idx(pu);
    }
  }
  else
  {
    if (geoAvailable && ciipAvailable)
    {
      Ciip_flag(pu);
    }
    merge_idx(pu);
  }