Coverage Report

Created: 2026-04-01 07:49

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/vvenc/source/Lib/CommonLib/InterPrediction.cpp
Line
Count
Source
1
/* -----------------------------------------------------------------------------
2
The copyright in this software is being made available under the Clear BSD
3
License, included below. No patent rights, trademark rights and/or 
4
other Intellectual Property Rights other than the copyrights concerning 
5
the Software are granted under this license.
6
7
The Clear BSD License
8
9
Copyright (c) 2019-2026, Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. & The VVenC Authors.
10
All rights reserved.
11
12
Redistribution and use in source and binary forms, with or without modification,
13
are permitted (subject to the limitations in the disclaimer below) provided that
14
the following conditions are met:
15
16
     * Redistributions of source code must retain the above copyright notice,
17
     this list of conditions and the following disclaimer.
18
19
     * Redistributions in binary form must reproduce the above copyright
20
     notice, this list of conditions and the following disclaimer in the
21
     documentation and/or other materials provided with the distribution.
22
23
     * Neither the name of the copyright holder nor the names of its
24
     contributors may be used to endorse or promote products derived from this
25
     software without specific prior written permission.
26
27
NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
28
THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
29
CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
31
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
32
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
33
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
34
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
35
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
36
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38
POSSIBILITY OF SUCH DAMAGE.
39
40
41
------------------------------------------------------------------------------------------- */
42
43
44
/** \file     Prediction.cpp
45
    \brief    prediction class
46
*/
47
48
#include "InterPrediction.h"
49
#include "Unit.h"
50
#include "UnitTools.h"
51
#include "dtrace_next.h"
52
#include "dtrace_buffer.h"
53
#include "CommonLib/TimeProfiler.h"
54
55
#include <memory.h>
56
#include <algorithm>
57
58
//! \ingroup CommonLib
59
//! \{
60
61
namespace vvenc {
62
63
void addBDOFAvgCore(const Pel* src0, const ptrdiff_t  src0Stride, const Pel* src1, const ptrdiff_t  src1Stride, Pel* dst, const ptrdiff_t dstStride, const Pel* gradX0, const Pel* gradX1, const Pel* gradY0, const Pel*gradY1, const ptrdiff_t  gradStride, int width, int height, int tmpx, int tmpy, unsigned shift, int offset, const ClpRng& clpRng)
64
0
{
65
0
  int b = 0;
66
67
0
  for (int y = 0; y < height; y++)
68
0
  {
69
0
    for (int x = 0; x < width; x += 4)
70
0
    {
71
0
      b = tmpx * (gradX0[x] - gradX1[x]) + tmpy * (gradY0[x] - gradY1[x]);
72
0
      dst[x] = ClipPel((int16_t)rightShiftU((src0[x] + src1[x] + b + offset), shift), clpRng);
73
74
0
      b = tmpx * (gradX0[x + 1] - gradX1[x + 1]) + tmpy * (gradY0[x + 1] - gradY1[x + 1]);
75
0
      dst[x + 1] = ClipPel((int16_t)rightShiftU((src0[x + 1] + src1[x + 1] + b + offset), shift), clpRng);
76
77
0
      b = tmpx * (gradX0[x + 2] - gradX1[x + 2]) + tmpy * (gradY0[x + 2] - gradY1[x + 2]);
78
0
      dst[x + 2] = ClipPel((int16_t)rightShiftU((src0[x + 2] + src1[x + 2] + b + offset), shift), clpRng);
79
80
0
      b = tmpx * (gradX0[x + 3] - gradX1[x + 3]) + tmpy * (gradY0[x + 3] - gradY1[x + 3]);
81
0
      dst[x + 3] = ClipPel((int16_t)rightShiftU((src0[x + 3] + src1[x + 3] + b + offset), shift), clpRng);
82
0
    }
83
0
    dst += dstStride;       src0 += src0Stride;     src1 += src1Stride;
84
0
    gradX0 += gradStride; gradX1 += gradStride; gradY0 += gradStride; gradY1 += gradStride;
85
0
  }
86
0
}
87
88
void applyPROFCore(Pel* dst, int dstStride, const Pel* src, int srcStride, int width, int height, const Pel* gradX, const Pel* gradY, int gradStride, const int* dMvX, const int* dMvY, int dMvStride, const bool& bi, int shiftNum, Pel offset, const ClpRng& clpRng)
89
0
{
90
0
  int idx = 0;
91
0
  const int dILimit = 1 << std::max<int>(clpRng.bd + 1, 13);
92
0
  for (int h = 0; h < height; h++)
93
0
  {
94
0
    for (int w = 0; w < width; w++)
95
0
    {
96
0
      int32_t dI = dMvX[idx] * gradX[w] + dMvY[idx] * gradY[w];
97
0
      dI = Clip3(-dILimit, dILimit - 1, dI);
98
0
      dst[w] = src[w] + dI;
99
0
      if (!bi)
100
0
      {
101
0
        dst[w] = (dst[w] + offset) >> shiftNum;
102
0
        dst[w] = ClipPel(dst[w], clpRng);
103
0
      }
104
0
      idx++;
105
0
    }
106
0
    gradX += gradStride;
107
0
    gradY += gradStride;
108
0
    dst += dstStride;
109
0
    src += srcStride;
110
0
  }
111
0
}
112
113
template<bool PAD = true>
114
void gradFilterCore(const Pel* pSrc, int srcStride, int width, int height, int gradStride, Pel* gradX, Pel* gradY, const int bitDepth)
115
0
{
116
0
  const Pel* srcTmp = pSrc + srcStride + 1;
117
0
  Pel* gradXTmp = gradX + gradStride + 1;
118
0
  Pel* gradYTmp = gradY + gradStride + 1;
119
0
  int  shift1 = 6;
120
121
0
  for (int y = 0; y < (height - 2 * BDOF_EXTEND_SIZE); y++)
122
0
  {
123
0
    for (int x = 0; x < (width - 2 * BDOF_EXTEND_SIZE); x++)
124
0
    {
125
0
      gradYTmp[x] = ( srcTmp[x + srcStride] >> shift1 ) - ( srcTmp[x - srcStride] >> shift1 );
126
0
      gradXTmp[x] = ( srcTmp[x + 1] >> shift1 ) - ( srcTmp[x - 1] >> shift1 );
127
0
    }
128
0
    gradXTmp += gradStride;
129
0
    gradYTmp += gradStride;
130
0
    srcTmp += srcStride;
131
0
  }
132
133
0
  if (PAD)
134
0
  {
135
0
    gradXTmp = gradX + gradStride + 1;
136
0
    gradYTmp = gradY + gradStride + 1;
137
0
    for (int y = 0; y < (height - 2 * BDOF_EXTEND_SIZE); y++)
138
0
    {
139
0
      gradXTmp[-1] = gradXTmp[0];
140
0
      gradXTmp[width - 2 * BDOF_EXTEND_SIZE] = gradXTmp[width - 2 * BDOF_EXTEND_SIZE - 1];
141
0
      gradXTmp += gradStride;
142
143
0
      gradYTmp[-1] = gradYTmp[0];
144
0
      gradYTmp[width - 2 * BDOF_EXTEND_SIZE] = gradYTmp[width - 2 * BDOF_EXTEND_SIZE - 1];
145
0
      gradYTmp += gradStride;
146
0
    }
147
148
0
    gradXTmp = gradX + gradStride;
149
0
    gradYTmp = gradY + gradStride;
150
0
    ::memcpy(gradXTmp - gradStride, gradXTmp, sizeof(Pel)*(width));
151
0
    ::memcpy(gradXTmp + (height - 2 * BDOF_EXTEND_SIZE)*gradStride, gradXTmp + (height - 2 * BDOF_EXTEND_SIZE - 1)*gradStride, sizeof(Pel)*(width));
152
0
    ::memcpy(gradYTmp - gradStride, gradYTmp, sizeof(Pel)*(width));
153
0
    ::memcpy(gradYTmp + (height - 2 * BDOF_EXTEND_SIZE)*gradStride, gradYTmp + (height - 2 * BDOF_EXTEND_SIZE - 1)*gradStride, sizeof(Pel)*(width));
154
0
  }
155
0
}
Unexecuted instantiation: void vvenc::gradFilterCore<true>(short const*, int, int, int, int, short*, short*, int)
Unexecuted instantiation: void vvenc::gradFilterCore<false>(short const*, int, int, int, int, short*, short*, int)
156
157
void calcBDOFSumsCore( const Pel* srcY0Tmp, const Pel* srcY1Tmp, const Pel* gradX0, const Pel* gradX1,
158
                       const Pel* gradY0, const Pel* gradY1, int xu, int yu, const ptrdiff_t src0Stride, const ptrdiff_t src1Stride,
159
                       const ptrdiff_t widthG, const int bitDepth, int* sumAbsGX, int* sumAbsGY, int* sumDIX, int* sumDIY,
160
                       int* sumSignGY_GX )
161
0
{
162
0
  int shift4 = 4;
163
0
  int shift5 = 1;
164
165
0
  for (int y = 0; y < 6; y++)
166
0
  {
167
0
    for (int x = 0; x < 6; x++)
168
0
    {
169
0
      int tmpGX = (gradX0[x] + gradX1[x]) >> shift5;
170
0
      int tmpGY = (gradY0[x] + gradY1[x]) >> shift5;
171
0
      int tmpDI = (int)((srcY1Tmp[x] >> shift4) - (srcY0Tmp[x] >> shift4));
172
0
      *sumAbsGX += (tmpGX < 0 ? -tmpGX : tmpGX);
173
0
      *sumAbsGY += (tmpGY < 0 ? -tmpGY : tmpGY);
174
0
      *sumDIX += (tmpGX < 0 ? -tmpDI : (tmpGX == 0 ? 0 : tmpDI));
175
0
      *sumDIY += (tmpGY < 0 ? -tmpDI : (tmpGY == 0 ? 0 : tmpDI));
176
0
      *sumSignGY_GX += (tmpGY < 0 ? -tmpGX : (tmpGY == 0 ? 0 : tmpGX));
177
178
0
    }
179
0
    srcY1Tmp += src1Stride;
180
0
    srcY0Tmp += src0Stride;
181
0
    gradX0 += widthG;
182
0
    gradX1 += widthG;
183
0
    gradY0 += widthG;
184
0
    gradY1 += widthG;
185
0
  }
186
0
}
187
188
189
template<int padSize>
190
void paddingCore(Pel *ptr, int stride, int width, int height)
191
0
{
192
  /*left and right padding*/
193
0
  Pel *ptrTemp1 = ptr;
194
0
  Pel *ptrTemp2 = ptr + (width - 1);
195
0
  ptrdiff_t offset = 0;
196
0
  for (int i = 0; i < height; i++)
197
0
  {
198
0
    offset = stride * i;
199
0
    for (int j = 1; j <= padSize; j++)
200
0
    {
201
0
      *(ptrTemp1 - j + offset) = *(ptrTemp1 + offset);
202
0
      *(ptrTemp2 + j + offset) = *(ptrTemp2 + offset);
203
0
    }
204
0
  }
205
  /*Top and Bottom padding*/
206
0
  int numBytes = (width + padSize + padSize) * sizeof(Pel);
207
0
  ptrTemp1 = (ptr - padSize);
208
0
  ptrTemp2 = (ptr + (stride * (height - 1)) - padSize);
209
0
  for (int i = 1; i <= padSize; i++)
210
0
  {
211
0
    memcpy(ptrTemp1 - (i * stride), (ptrTemp1), numBytes);
212
0
    memcpy(ptrTemp2 + (i * stride), (ptrTemp2), numBytes);
213
0
  }
214
0
}
Unexecuted instantiation: void vvenc::paddingCore<1>(short*, int, int, int)
Unexecuted instantiation: void vvenc::paddingCore<2>(short*, int, int, int)
215
216
void padDmvrCore( const Pel* src, const int srcStride, Pel* dst, const int dstStride, int width, int height, int padSize )
217
0
{
218
0
  g_pelBufOP.copyBuffer( ( const char* ) src, srcStride * sizeof( Pel ), ( char* ) dst, dstStride * sizeof( Pel ), width * sizeof( Pel ), height );
219
0
  if( padSize == 1 )
220
0
    paddingCore<1>( dst, dstStride, width, height );
221
0
  else
222
0
    paddingCore<2>( dst, dstStride, width, height );
223
0
}
224
225
// ====================================================================================================================
226
// Constructor / destructor / initialize
227
// ====================================================================================================================
228
229
InterPrediction::InterPrediction()
230
0
  : m_currChromaFormat( NUM_CHROMA_FORMAT )
231
0
  , m_subPuMC(false)
232
0
  , m_IBCBufferWidth(0)
233
0
{
234
0
}
235
236
InterPrediction::~InterPrediction()
237
0
{
238
0
  destroy();
239
0
}
240
241
void InterPrediction::destroy()
242
0
{
243
0
  for( uint32_t i = 0; i < NUM_REF_PIC_LIST_01; i++ )
244
0
  {
245
0
    m_yuvPred[i].destroy();
246
0
  }
247
0
  m_geoPartBuf[0].destroy();
248
0
  m_geoPartBuf[1].destroy();
249
0
  m_IBCBuffer.destroy();
250
0
}
251
252
void InterPrediction::init( RdCost* pcRdCost, ChromaFormat chFormat, const int ctuSize, const int ifpLines )
253
0
{
254
  // if it has been initialised before, but the chroma format has changed, release the memory and start again.
255
0
  if( m_yuvPred[L0].getOrigin( COMP_Y ) != nullptr && m_currChromaFormat != chFormat )
256
0
  {
257
0
    destroy();
258
0
    DMVR::destroy();
259
0
    InterPredInterpolation::destroy();
260
0
  }
261
262
0
  m_currChromaFormat = chFormat;
263
264
0
  if( m_yuvPred[L0].getOrigin( COMP_Y ) == nullptr )
265
0
  {
266
0
    for( uint32_t i = 0; i < NUM_REF_PIC_LIST_01; i++ )
267
0
    {
268
0
      m_yuvPred[i].create( chFormat, Area{ 0, 0, (int)MAX_CU_SIZE, (int)MAX_CU_SIZE }, 0, 0, 32 );
269
0
    }
270
271
0
    InterPredInterpolation::init();
272
0
    DMVR::init( pcRdCost, chFormat );
273
0
    m_geoPartBuf[0].create(UnitArea(chFormat, Area(0, 0, MAX_CU_SIZE, MAX_CU_SIZE)));
274
0
    m_geoPartBuf[1].create(UnitArea(chFormat, Area(0, 0, MAX_CU_SIZE, MAX_CU_SIZE)));
275
0
  }
276
0
  if (m_IBCBufferWidth != g_IBCBufferSize / ctuSize)
277
0
  {
278
0
    m_IBCBuffer.destroy();
279
0
  }
280
0
  if (m_IBCBuffer.bufs.empty())
281
0
  {
282
0
    m_IBCBufferWidth = g_IBCBufferSize / ctuSize;
283
0
    m_IBCBuffer.create(UnitArea(chFormat, Area(0, 0, m_IBCBufferWidth, ctuSize)));
284
0
  }
285
0
  InterPredInterpolation::m_ifpLines = ifpLines;
286
0
}
287
288
// ====================================================================================================================
289
// Public member functions
290
// ====================================================================================================================
291
292
bool InterPrediction::xCheckIdenticalMotion( const CodingUnit& cu ) const
293
0
{
294
0
  const Slice &slice = *cu.cs->slice;
295
296
0
  if( slice.isInterB() && !cu.cs->pps->weightedBiPred )
297
0
  {
298
0
    if( cu.refIdx[0] >= 0 && cu.refIdx[1] >= 0 )
299
0
    {
300
0
      int RefPOCL0 = slice.getRefPic( REF_PIC_LIST_0, cu.refIdx[0] )->getPOC();
301
0
      int RefPOCL1 = slice.getRefPic( REF_PIC_LIST_1, cu.refIdx[1] )->getPOC();
302
303
0
      if( RefPOCL0 == RefPOCL1 )
304
0
      {
305
0
        if( !cu.affine )
306
0
        {
307
0
          if( cu.mv[0][0] == cu.mv[1][0] )
308
0
          {
309
0
            return true;
310
0
          }
311
0
        }
312
0
        else
313
0
        {
314
0
          if( cu.mv[0][0] == cu.mv[1][0] && cu.mv[0][1] == cu.mv[1][1] && ( cu.affineType == AFFINEMODEL_4PARAM || cu.mv[0][2] == cu.mv[1][2] ) )
315
0
          {
316
0
            return true;
317
0
          }
318
0
        }
319
0
      }
320
0
    }
321
0
  }
322
323
0
  return false;
324
0
}
325
326
void InterPrediction::xSubPuBDOF( const CodingUnit& cu, PelUnitBuf& predBuf, const RefPicList& refPicList /*= REF_PIC_LIST_X*/)
327
0
{
328
0
  Position puPos = cu.lumaPos();
329
0
  Size puSize = cu.lumaSize();
330
331
0
  CodingUnit subCu = cu;  // th we do not need all that stuff 
332
0
  subCu.cs             = cu.cs;
333
0
  subCu.mergeType      = cu.mergeType;
334
0
  subCu.mmvdMergeFlag  = cu.mmvdMergeFlag;
335
0
  subCu.mcControl      = cu.mcControl;
336
0
  subCu.mergeFlag      = cu.mergeFlag;
337
0
  subCu.ciip           = cu.ciip;
338
0
  subCu.mvRefine       = cu.mvRefine;
339
0
  subCu.refIdx[0]      = cu.refIdx[0];
340
0
  subCu.refIdx[1]      = cu.refIdx[1];
341
342
0
  const int  yEnd      = puPos.y + puSize.height;
343
0
  const int  xEnd      = puPos.x + puSize.width;
344
0
  const int  dy        = std::min((int)MAX_BDOF_APPLICATION_REGION, (int)puSize.height);
345
0
  const int  dx        = std::min((int)MAX_BDOF_APPLICATION_REGION, (int)puSize.width);
346
0
  for (int y = puPos.y; y < yEnd; y += dy)
347
0
  {
348
0
    for (int x = puPos.x; x < xEnd; x += dx)
349
0
    {
350
0
      const MotionInfo &curMi = cu.getMotionInfo(Position{ x, y });
351
352
0
      subCu.UnitArea::operator=(UnitArea(cu.chromaFormat, Area(x, y, dx, dy)));
353
0
      subCu = curMi;
354
0
      PelUnitBuf subPredBuf = predBuf.subBuf(UnitAreaRelative(cu, subCu));
355
356
0
      motionCompensation(subCu, subPredBuf, refPicList);
357
0
    }
358
0
  }
359
0
}
360
void InterPrediction::xPredInterUni( const CodingUnit &cu, const RefPicList &refPicList, PelUnitBuf &pcYuvPred, const bool bi, const bool bdofApplied )
361
0
{
362
0
  int iRefIdx = cu.refIdx[refPicList];
363
0
  Mv mv[3];
364
0
  bool isIBC = false;
365
0
  CHECK(!CU::isIBC(cu) && cu.lwidth() == 4 && cu.lheight() == 4, "invalid 4x4 inter blocks");
366
0
  if (CU::isIBC(cu))
367
0
  {
368
0
    isIBC = true;
369
0
  }
370
0
  if (cu.affine)
371
0
  {
372
0
    CHECK(iRefIdx < 0, "iRefIdx incorrect.");
373
374
0
    mv[0] = cu.mv[refPicList][0];
375
0
    mv[1] = cu.mv[refPicList][1];
376
0
    mv[2] = cu.mv[refPicList][2];
377
0
  }
378
0
  else
379
0
  {
380
0
    mv[0] = cu.mv[refPicList][0];
381
0
    if (!isIBC )
382
0
      clipMv(mv[0], cu.lumaPos(), cu.lumaSize(), *cu.cs->pcv);
383
0
  }
384
385
0
  for( uint32_t comp = COMP_Y; comp < pcYuvPred.bufs.size(); comp++ )
386
0
  {
387
0
    const ComponentID compID = ComponentID( comp );
388
389
0
    bool luma   = !cu.mccNoLuma  ();
390
0
    bool chroma = !cu.mccNoChroma();
391
392
0
    if( compID == COMP_Y && !luma )
393
0
      continue;
394
0
    if( compID != COMP_Y && !chroma )
395
0
      continue;
396
397
0
    if( cu.affine )
398
0
    {
399
0
      xPredAffineBlk( compID, cu, cu.slice->getRefPic( refPicList, iRefIdx ), mv, pcYuvPred, bi, cu.slice->clpRngs[ compID ], refPicList );
400
0
    }
401
0
    else
402
0
    {
403
0
      if( isIBC )
404
0
      {
405
0
        xPredInterBlk( compID, cu, cu.slice->pic, mv[ 0 ], pcYuvPred, bi, cu.slice->clpRngs[ compID ], bdofApplied, isIBC );
406
0
      }
407
0
      else
408
0
      {
409
0
        xPredInterBlk( compID, cu, cu.slice->getRefPic( refPicList, iRefIdx ), mv[ 0 ], pcYuvPred, bi, cu.slice->clpRngs[ compID ], bdofApplied, isIBC, refPicList );
410
0
      }
411
0
    }
412
0
  }
413
0
}
414
415
void InterPrediction::xPredInterBi( const CodingUnit& cu, PelUnitBuf& yuvPred, const bool bdofApplied, PelUnitBuf *yuvPredTmp )
416
0
{
417
0
  CHECK( !cu.affine && cu.refIdx[0] >= 0 && cu.refIdx[1] >= 0 && ( cu.lwidth() + cu.lheight() == 12 ), "invalid 4x8/8x4 bi-predicted blocks" );
418
419
0
  PelUnitBuf puBuf[NUM_REF_PIC_LIST_01];
420
0
  for (uint32_t refList = 0; refList < NUM_REF_PIC_LIST_01; refList++)
421
0
  {
422
0
    if( cu.refIdx[refList] < 0)
423
0
    {
424
0
      continue;
425
0
    }
426
427
0
    RefPicList refPicList = (refList ? REF_PIC_LIST_1 : REF_PIC_LIST_0);
428
429
0
    CHECK(CU::isIBC(cu) && refPicList != REF_PIC_LIST_0, "Invalid interdir for ibc mode");
430
0
    CHECK(CU::isIBC(cu) && cu.refIdx[refList] != MAX_NUM_REF, "Invalid reference index for ibc mode");
431
0
    CHECK((CU::isInter(cu) && cu.refIdx[refList] >= cu.cs->slice->numRefIdx[ refPicList ]), "Invalid reference index");
432
433
0
    puBuf[refList] = m_yuvPred[refList].getCompactBuf( cu );
434
435
0
    if( cu.refIdx[0] >= 0 && cu.refIdx[1] >= 0 )
436
0
    {
437
0
      xPredInterUni ( cu, refPicList, puBuf[refList], true, bdofApplied );
438
0
    }
439
0
    else
440
0
    {
441
0
      xPredInterUni( cu, refPicList, puBuf[refList], cu.geo, bdofApplied );
442
0
    }
443
0
  }
444
445
0
  xWeightedAverage( cu, puBuf[0], puBuf[1], yuvPred, bdofApplied, yuvPredTmp );
446
0
}
447
448
void InterPrediction::motionCompensationIBC( CodingUnit& cu, PelUnitBuf& predBuf )
449
0
{
450
  // dual tree handling for IBC as the only ref
451
0
  xPredInterUni( cu, REF_PIC_LIST_0, predBuf, false, false );
452
0
}
453
454
bool InterPrediction::motionCompensation( CodingUnit& cu, PelUnitBuf& predBuf, const RefPicList& refPicList, PelUnitBuf* predBufDfltWght )
455
0
{
456
0
  bool ret = false;
457
0
  if( refPicList != REF_PIC_LIST_X )
458
0
  {
459
0
    xPredInterUni( cu, refPicList, predBuf, false, false );
460
0
  }
461
0
  else
462
0
  {
463
0
    CHECK( !cu.affine && cu.refIdx[0] >= 0 && cu.refIdx[1] >= 0 && ( cu.lwidth() + cu.lheight() == 12 ), "Invalid 4x8/8x4 bi-predicted blocks" );
464
465
0
    bool bdofApplied = false;
466
0
    if( cu.cs->sps->BDOF && ( !cu.cs->picHeader->disBdofFlag ) )
467
0
    {
468
0
      if( cu.affine || m_subPuMC || cu.ciip )
469
0
      {
470
0
        bdofApplied = false;
471
0
      }
472
0
      else
473
0
      {
474
0
        if( CU::isBiPredFromDifferentDirEqDistPoc( cu )
475
0
            &&    cu.Y().minDim() >= 8
476
0
            &&    cu.Y().area() >= 128
477
0
            &&   !cu.smvdMode
478
0
            && !( cu.cs->sps->BCW && cu.BcwIdx != BCW_DEFAULT )
479
0
            && !( cu.mccNoBdof() && cu.mmvdMergeFlag ) )
480
0
        {
481
0
          bdofApplied = true;
482
0
        }
483
0
      }
484
0
    }
485
486
0
    bool dmvrApplied = cu.mvRefine && CU::checkDMVRCondition( cu );
487
0
    if( cu.lumaSize().maxDim() > MAX_BDOF_APPLICATION_REGION && cu.mergeType != MRG_TYPE_SUBPU_ATMVP && bdofApplied && !dmvrApplied )
488
0
    {
489
0
      xSubPuBDOF( cu, predBuf, refPicList );
490
0
    }
491
0
    else if (cu.mergeType != MRG_TYPE_DEFAULT_N && cu.mergeType != MRG_TYPE_IBC)
492
0
    {
493
0
      xSubPuMC(cu, predBuf, refPicList);
494
0
    }
495
0
    else if( xCheckIdenticalMotion( cu ) )
496
0
    {
497
0
      xPredInterUni( cu, REF_PIC_LIST_0, predBuf, false, false );
498
499
0
      if( predBufDfltWght )
500
0
      {
501
0
        predBufDfltWght->copyFrom( predBuf );
502
0
      }
503
0
    }
504
0
    else if( dmvrApplied )
505
0
    {
506
0
      xProcessDMVR( cu, predBuf, cu.slice->clpRngs, bdofApplied );
507
0
    }
508
0
    else
509
0
    {
510
0
      xPredInterBi( cu, predBuf, bdofApplied, predBufDfltWght );
511
0
    }
512
513
0
    DTRACE( g_trace_ctx, D_MOT_COMP, "BDOF=%d, DMVR=%d\n", bdofApplied, dmvrApplied );
514
0
    ret = bdofApplied || dmvrApplied;
515
0
  }
516
517
0
  DTRACE( g_trace_ctx, D_MOT_COMP, "MV=%d,%d\n", cu.mv[0][0].hor, cu.mv[0][0].ver );
518
0
  DTRACE( g_trace_ctx, D_MOT_COMP, "MV=%d,%d\n", cu.mv[1][0].hor, cu.mv[1][0].ver );
519
0
  DTRACE_PEL_BUF( D_MOT_COMP, predBuf.Y(), cu, cu.predMode, COMP_Y );
520
0
  if( cu.chromaFormat != VVENC_CHROMA_400 )
521
0
  {
522
0
    DTRACE_PEL_BUF( D_MOT_COMP, predBuf.Cb(), cu, cu.predMode, COMP_Cb );
523
0
    DTRACE_PEL_BUF( D_MOT_COMP, predBuf.Cr(), cu, cu.predMode, COMP_Cr );
524
0
  }
525
526
0
  return ret;
527
0
}
528
529
void InterPrediction::xSubPuMC(CodingUnit& cu, PelUnitBuf& predBuf, const RefPicList& eRefPicList /*= REF_PIC_LIST_X*/)
530
0
{
531
0
  Position puPos  = cu.lumaPos();
532
0
  Size     puSize = cu.lumaSize();
533
534
0
  int numPartLine = std::max( puSize.width  >> ATMVP_SUB_BLOCK_SIZE, 1u );
535
0
  int numPartCol  = std::max( puSize.height >> ATMVP_SUB_BLOCK_SIZE, 1u );
536
0
  int puHeight    = numPartCol == 1 ? puSize.height : 1 << ATMVP_SUB_BLOCK_SIZE;
537
0
  int puWidth     = numPartLine == 1 ? puSize.width : 1 << ATMVP_SUB_BLOCK_SIZE;
538
539
0
  CodingUnit subCu = cu;
540
0
  subCu.cs = cu.cs;
541
0
  subCu.mergeType = MRG_TYPE_DEFAULT_N;
542
543
0
  bool isAffine = cu.affine;
544
0
  subCu.affine = false;
545
546
  // join sub-pus containing the same motion
547
0
  bool verMC    = puSize.height > puSize.width;
548
0
  int  fstStart = (!verMC ? puPos.y : puPos.x);
549
0
  int  secStart = (!verMC ? puPos.x : puPos.y);
550
0
  int  fstEnd   = (!verMC ? puPos.y + puSize.height : puPos.x + puSize.width);
551
0
  int  secEnd   = (!verMC ? puPos.x + puSize.width : puPos.y + puSize.height);
552
0
  int  fstStep  = (!verMC ? puHeight : puWidth);
553
0
  int  secStep  = (!verMC ? puWidth : puHeight);
554
555
0
  cu.refIdx[0] = 0;
556
0
  cu.refIdx[1] = cu.cs->slice->sliceType == VVENC_B_SLICE ? 0 : -1;
557
0
  bool scaled = false;//!CU::isRefPicSameSize(cu);
558
559
0
  m_subPuMC = true;
560
561
0
  for (int fstDim = fstStart; fstDim < fstEnd; fstDim += fstStep)
562
0
  {
563
0
    for (int secDim = secStart; secDim < secEnd; secDim += secStep)
564
0
    {
565
0
      int x = !verMC ? secDim : fstDim;
566
0
      int y = !verMC ? fstDim : secDim;
567
0
      const MotionInfo &curMi = cu.getMotionInfo(Position{ x, y });
568
569
0
      int length = secStep;
570
0
      int later = secDim + secStep;
571
572
0
      while (later < secEnd)
573
0
      {
574
0
        const MotionInfo &laterMi = !verMC ? cu.getMotionInfo(Position{ later, fstDim }) : cu.getMotionInfo(Position{ fstDim, later });
575
0
        if (!scaled && laterMi == curMi)
576
0
        {
577
0
          length += secStep;
578
0
        }
579
0
        else
580
0
        {
581
0
          break;
582
0
        }
583
0
        later += secStep;
584
0
      }
585
0
      int dx = !verMC ? length : puWidth;
586
0
      int dy = !verMC ? puHeight : length;
587
588
0
      subCu.UnitArea::operator=(UnitArea(cu.chromaFormat, Area(x, y, dx, dy)));
589
0
      subCu = curMi;
590
0
      PelUnitBuf subPredBuf = predBuf.subBuf(UnitAreaRelative(cu, subCu));
591
0
      subCu.mcControl = (cu.mcControl >> 1) << 1;
592
0
      subCu.mvRefine = false;
593
0
      motionCompensation(subCu, subPredBuf, eRefPicList);
594
0
      secDim = later - secStep;
595
0
    }
596
0
  }
597
0
  m_subPuMC = false;
598
599
0
  cu.affine = isAffine;
600
0
}
601
602
static inline int xRightShiftMSB( int numer, int denom )
603
0
{
604
0
  return numer >> floorLog2( denom );
605
0
}
606
607
void xFpBiDirOptFlowCore( const Pel* srcY0, const Pel* srcY1, const Pel* gradX0, const Pel* gradX1, const Pel* gradY0,
608
                          const Pel* gradY1, const int width, const int height, Pel* dstY, const ptrdiff_t dstStride,
609
                          const int shiftNum, const int offset, const int limit, const ClpRng& clpRng,
610
                          const int bitDepth )
611
0
{
612
0
  int xUnit = width >> 2;
613
0
  int yUnit = height >> 2;
614
0
  ptrdiff_t widthG = width + 2 * BDOF_EXTEND_SIZE;
615
616
0
  ptrdiff_t offsetPos = widthG * BDOF_EXTEND_SIZE + BDOF_EXTEND_SIZE;
617
0
  ptrdiff_t stridePredMC = widthG + 2;
618
619
0
  const ptrdiff_t  src0Stride = stridePredMC;
620
0
  const ptrdiff_t  src1Stride = stridePredMC;
621
622
0
  const Pel* srcY0Temp = srcY0;
623
0
  const Pel* srcY1Temp = srcY1;
624
625
0
  for( int yu = 0; yu < yUnit; yu++ )
626
0
  {
627
0
    for( int xu = 0; xu < xUnit; xu++ )
628
0
    {
629
0
      int tmpx = 0, tmpy = 0;
630
0
      int sumAbsGX = 0, sumAbsGY = 0, sumDIX = 0, sumDIY = 0;
631
0
      int sumSignGY_GX = 0;
632
633
0
      const Pel* pGradX0Tmp = gradX0 + ( xu << 2 ) + ( yu << 2 ) * widthG;
634
0
      const Pel* pGradX1Tmp = gradX1 + ( xu << 2 ) + ( yu << 2 ) * widthG;
635
0
      const Pel* pGradY0Tmp = gradY0 + ( xu << 2 ) + ( yu << 2 ) * widthG;
636
0
      const Pel* pGradY1Tmp = gradY1 + ( xu << 2 ) + ( yu << 2 ) * widthG;
637
0
      const Pel* SrcY1Tmp = srcY1 + ( xu << 2 ) + ( yu << 2 ) * src1Stride;
638
0
      const Pel* SrcY0Tmp = srcY0 + ( xu << 2 ) + ( yu << 2 ) * src0Stride;
639
640
0
      calcBDOFSumsCore( SrcY0Tmp, SrcY1Tmp, pGradX0Tmp, pGradX1Tmp, pGradY0Tmp, pGradY1Tmp, xu, yu, src0Stride,
641
0
                        src1Stride, widthG, bitDepth, &sumAbsGX, &sumAbsGY, &sumDIX, &sumDIY, &sumSignGY_GX );
642
0
      tmpx = ( sumAbsGX == 0 ? 0 : xRightShiftMSB( 4 * sumDIX, sumAbsGX ) );
643
0
      tmpx = Clip3( -limit, limit, tmpx );
644
645
0
      const int tmpData = sumSignGY_GX * tmpx >> 1;
646
0
      tmpy = ( sumAbsGY == 0 ? 0 : xRightShiftMSB( ( 4 * sumDIY - tmpData ), sumAbsGY ) );
647
0
      tmpy = Clip3( -limit, limit, tmpy );
648
649
0
      srcY0Temp = srcY0 + ( stridePredMC + 1 ) + ( ( yu * src0Stride + xu ) << 2 );
650
0
      srcY1Temp = srcY1 + ( stridePredMC + 1 ) + ( ( yu * src0Stride + xu ) << 2 );
651
0
      pGradX0Tmp = gradX0 + offsetPos + ( ( yu * widthG + xu ) << 2 );
652
0
      pGradX1Tmp = gradX1 + offsetPos + ( ( yu * widthG + xu ) << 2 );
653
0
      pGradY0Tmp = gradY0 + offsetPos + ( ( yu * widthG + xu ) << 2 );
654
0
      pGradY1Tmp = gradY1 + offsetPos + ( ( yu * widthG + xu ) << 2 );
655
656
0
      Pel* dstY0 = dstY + ( ( yu * dstStride + xu ) << 2 );
657
0
      addBDOFAvgCore( srcY0Temp, src0Stride, srcY1Temp, src1Stride, dstY0, dstStride, pGradX0Tmp, pGradX1Tmp,
658
0
                      pGradY0Tmp, pGradY1Tmp, widthG, ( 1 << 2 ), ( 1 << 2 ), tmpx, tmpy, shiftNum, offset, clpRng );
659
0
    } // xu
660
0
  } // yu
661
0
}
662
663
InterPredInterpolation::InterPredInterpolation()
664
0
  : m_storedMv(nullptr)
665
0
  , m_skipPROF(false)
666
0
  , m_encOnly(false)
667
0
  , m_isBi(false)
668
0
  , m_ifpLines(0)
669
0
{
670
671
0
}
672
673
InterPredInterpolation::~InterPredInterpolation()
674
0
{
675
0
  destroy();
676
0
}
677
678
void InterPredInterpolation::destroy()
679
0
{
680
0
  for( uint32_t c = 0; c < MAX_NUM_COMP; c++ )
681
0
  {
682
0
    for( uint32_t i = 0; i < LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS_SIGNAL; i++ )
683
0
    {
684
0
      for( uint32_t j = 0; j < LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS_SIGNAL; j++ )
685
0
      {
686
0
        xFree( m_filteredBlock[i][j][c] );
687
0
        m_filteredBlock[i][j][c] = nullptr;
688
0
      }
689
690
0
      xFree( m_filteredBlockTmp[i][c] );
691
0
      m_filteredBlockTmp[i][c] = nullptr;
692
0
    }
693
0
  }
694
0
  xFree(m_gradX0);   m_gradX0 = nullptr;
695
0
  xFree(m_gradY0);   m_gradY0 = nullptr;
696
0
  xFree(m_gradX1);   m_gradX1 = nullptr;
697
0
  xFree(m_gradY1);   m_gradY1 = nullptr;
698
699
0
  if (m_storedMv != nullptr)
700
0
  {
701
0
    delete[] m_storedMv;
702
0
    m_storedMv = nullptr;
703
0
  }
704
0
}
705
706
void InterPredInterpolation::init( bool enableOpt )
707
0
{
708
0
  for( uint32_t c = 0; c < MAX_NUM_COMP; c++ )
709
0
  {
710
0
    int extWidth = MAX_CU_SIZE + (2 * BDOF_EXTEND_SIZE + 2) + 16;
711
0
    int extHeight = MAX_CU_SIZE + (2 * BDOF_EXTEND_SIZE + 2) + 1;
712
0
    extWidth = extWidth > (MAX_CU_SIZE + (2 * DMVR_NUM_ITERATION) + 16) ? extWidth : MAX_CU_SIZE + (2 * DMVR_NUM_ITERATION) + 16;
713
0
    extHeight = extHeight > (MAX_CU_SIZE + (2 * DMVR_NUM_ITERATION) + 1) ? extHeight : MAX_CU_SIZE + (2 * DMVR_NUM_ITERATION) + 1;
714
0
    for( uint32_t i = 0; i < LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS_SIGNAL; i++ )
715
0
    {
716
0
      m_filteredBlockTmp[i][c] = ( Pel* ) xMalloc( Pel, ( extWidth + 4 ) * ( extHeight + 7 + 4 ) );
717
0
      VALGRIND_MEMCLEAR( m_filteredBlockTmp[i][c], sizeof( Pel ) * (extWidth + 4) * (extHeight + 7 + 4) );
718
719
0
      for( uint32_t j = 0; j < LUMA_INTERPOLATION_FILTER_SUB_SAMPLE_POSITIONS_SIGNAL; j++ )
720
0
      {
721
0
        m_filteredBlock[i][j][c] = ( Pel* ) xMalloc( Pel, extWidth * extHeight );
722
0
        VALGRIND_MEMCLEAR( m_filteredBlock[i][j][c], sizeof( Pel ) * extWidth * extHeight );
723
0
      }
724
0
    }
725
0
  }
726
727
0
  m_gradX0 = (Pel*)xMalloc(Pel, BDOF_TEMP_BUFFER_SIZE);
728
0
  m_gradY0 = (Pel*)xMalloc(Pel, BDOF_TEMP_BUFFER_SIZE);
729
0
  m_gradX1 = (Pel*)xMalloc(Pel, BDOF_TEMP_BUFFER_SIZE);
730
0
  m_gradY1 = (Pel*)xMalloc(Pel, BDOF_TEMP_BUFFER_SIZE);
731
732
0
  VALGRIND_MEMCLEAR( m_gradX0, sizeof( Pel ) * BDOF_TEMP_BUFFER_SIZE );
733
0
  VALGRIND_MEMCLEAR( m_gradY0, sizeof( Pel ) * BDOF_TEMP_BUFFER_SIZE );
734
0
  VALGRIND_MEMCLEAR( m_gradX1, sizeof( Pel ) * BDOF_TEMP_BUFFER_SIZE );
735
0
  VALGRIND_MEMCLEAR( m_gradY1, sizeof( Pel ) * BDOF_TEMP_BUFFER_SIZE );
736
737
0
  m_if.initInterpolationFilter( true );
738
739
0
  xFpBiDirOptFlow   = xFpBiDirOptFlowCore;
740
0
  xFpBDOFGradFilter = gradFilterCore;
741
0
  xFpProfGradFilter = gradFilterCore<false>;
742
0
  xFpApplyPROF      = applyPROFCore;
743
0
  xFpPadDmvr        = padDmvrCore;
744
745
0
  if( enableOpt )
746
0
  {
747
0
#if ENABLE_SIMD_OPT_BDOF && defined( TARGET_SIMD_X86 )
748
0
    initInterPredictionX86();
749
0
#endif
750
#if ENABLE_SIMD_OPT_BDOF && defined( TARGET_SIMD_ARM )
751
    initInterPredictionARM();
752
#endif
753
0
  }
754
755
0
  if (m_storedMv == nullptr)
756
0
  {
757
0
    const int MVBUFFER_SIZE = MAX_CU_SIZE / MIN_PU_SIZE;
758
0
    m_storedMv = new Mv[MVBUFFER_SIZE*MVBUFFER_SIZE];
759
#if ENABLE_VALGRIND_CODE
760
    for( int i = 0; i < MVBUFFER_SIZE * MVBUFFER_SIZE; i++ )
761
    {
762
      m_storedMv[i].setZero();
763
    }
764
#endif
765
0
  }
766
0
}
767
768
void InterPredInterpolation::xPredInterBlk( const ComponentID compID, const CodingUnit &cu,
769
                                            const Picture *refPic, const Mv &_mv, PelUnitBuf &dstPic,
770
                                            const bool bi, const ClpRng &clpRng, const bool bdofApplied, const bool isIBC, const RefPicList refPicList,
771
                                            const SizeType dmvrWidth, const SizeType dmvrHeight,
772
                                            const bool bilinearMC, const Pel *srcPadBuf, const int32_t srcPadStride )
773
0
{
774
0
  const ChromaFormat  chFmt = cu.chromaFormat;
775
0
  const bool          rndRes = !bi;
776
777
0
  int shiftHor = MV_FRACTIONAL_BITS_INTERNAL + getComponentScaleX(compID, chFmt);
778
0
  int shiftVer = MV_FRACTIONAL_BITS_INTERNAL + getComponentScaleY(compID, chFmt);
779
780
0
  Mv    mv(_mv);
781
782
0
  CHECKD( m_ifpLines && !srcPadBuf && cu.cs->picture != refPic && !CU::isMvInRangeFPP( cu[compID].y, cu[compID].height, mv.ver, m_ifpLines, *cu.cs->pcv, getComponentScaleY(compID, chFmt) ), "xPredInterBlk: CTU line-wise FPP MV restriction failed!\n" );
783
784
0
  int xFrac = mv.hor & ((1 << shiftHor) - 1);
785
0
  int yFrac = mv.ver & ((1 << shiftVer) - 1);
786
0
  if (isIBC)
787
0
  {
788
0
    xFrac = yFrac = 0;
789
0
  }
790
791
0
  PelBuf& dstBuf  = dstPic.bufs[compID];
792
0
  unsigned width  = dstBuf.width;
793
0
  unsigned height = dstBuf.height;
794
795
0
  const Pel* refBufPtr;
796
0
  int        refBufStride;
797
798
0
  if( srcPadBuf )
799
0
  {
800
0
    refBufPtr    = srcPadBuf;
801
0
    refBufStride = srcPadStride;
802
0
  }
803
0
  else
804
0
  {
805
0
    Position offset = cu.blocks[compID].pos().offset( mv.hor >> shiftHor, mv.ver >> shiftVer );
806
0
    refBufPtr       = refPic->getRecoBufPtr   ( compID );
807
0
    refBufStride    = refPic->getRecoBufStride( compID );
808
0
    refBufPtr      += offset.x;
809
0
    refBufPtr      += offset.y * refBufStride;
810
0
  }
811
812
0
  if( dmvrWidth )
813
0
  {
814
0
    width  = dmvrWidth;
815
0
    height = dmvrHeight;
816
0
  }
817
  // backup data
818
0
  const int backupWidth = width;
819
0
  const int backupHeight = height;
820
0
  Pel* backupDstBufPtr = dstBuf.buf;
821
0
  int backupDstBufStride = dstBuf.stride;
822
0
  if( bdofApplied && compID == COMP_Y )
823
0
  {
824
0
    width = width + 2 * BDOF_EXTEND_SIZE + 2;
825
0
    height = height + 2 * BDOF_EXTEND_SIZE + 2;
826
827
    // change MC output
828
0
    CHECK( refPicList >= NUM_REF_PIC_LIST_01, "Wrong refpiclist" );
829
0
    dstBuf.stride = width;
830
0
    dstBuf.buf = m_filteredBlockTmp[2 + refPicList][compID] + 2 * dstBuf.stride + 2;
831
0
  }
832
0
  bool useAltHpelIf = cu.imv == IMV_HPEL;
833
834
0
  if( bilinearMC )
835
0
  {
836
0
    m_if.filterN2_2D( compID, refBufPtr, refBufStride, dstBuf.buf, dstBuf.stride, width, height, xFrac, yFrac, clpRng );
837
0
  }
838
0
  else if( yFrac == 0 )
839
0
  {
840
0
    m_if.filterHor(compID, refBufPtr, refBufStride, dstBuf.buf, dstBuf.stride, backupWidth, backupHeight, xFrac, rndRes, chFmt, clpRng, useAltHpelIf, 0);
841
0
  }
842
0
  else if( xFrac == 0 )
843
0
  {
844
0
    m_if.filterVer(compID, refBufPtr, refBufStride, dstBuf.buf, dstBuf.stride, backupWidth, backupHeight, yFrac, true, rndRes, chFmt, clpRng, useAltHpelIf, 0);
845
0
  }
846
0
  else if( backupWidth == 4 && backupHeight == 4 )
847
0
  {
848
0
    m_if.filter4x4( compID, refBufPtr, refBufStride ,(Pel*)dstBuf.buf, dstBuf.stride, 4, 4, xFrac, yFrac, rndRes, chFmt, clpRng, useAltHpelIf );
849
0
  }
850
0
  else if( backupWidth == 16 )
851
0
  {
852
0
    m_if.filter16xH( compID, refBufPtr, refBufStride, dstBuf.buf, dstBuf.stride, 16, backupHeight, xFrac, yFrac, rndRes, chFmt, clpRng, useAltHpelIf );
853
0
  }
854
0
  else if( backupWidth == 8 )
855
0
  {
856
0
    m_if.filter8xH( compID, refBufPtr, refBufStride, dstBuf.buf, dstBuf.stride, 8, backupHeight, xFrac, yFrac, rndRes, chFmt, clpRng, useAltHpelIf );
857
0
  }
858
0
  else
859
0
  {
860
0
    const int vFilterSize = isLuma( compID ) ? NTAPS_LUMA : NTAPS_CHROMA;
861
862
0
    PelBuf tmpBuf( m_filteredBlockTmp[0][compID], dmvrWidth ? dmvrWidth : dstBuf.stride, dmvrWidth ? Size( dmvrWidth, dmvrHeight ) : cu.blocks[compID].size() );
863
864
0
    m_if.filterHor(compID, refBufPtr -  ((vFilterSize >> 1) - 1) * refBufStride,  refBufStride,  tmpBuf.buf, tmpBuf.stride, backupWidth, backupHeight + vFilterSize - 1, xFrac, false, chFmt, clpRng, useAltHpelIf, 0);
865
0
    m_if.filterVer(compID, tmpBuf.buf + ((vFilterSize >> 1) - 1) * tmpBuf.stride, tmpBuf.stride, dstBuf.buf, dstBuf.stride, backupWidth, backupHeight, yFrac, false, rndRes, chFmt, clpRng, useAltHpelIf, 0);
866
0
  }
867
868
0
  if (bdofApplied && compID == COMP_Y)
869
0
  {
870
0
    const unsigned shift = std::max<int>(2, (IF_INTERNAL_PREC - clpRng.bd));
871
0
    int xOffset = (xFrac < 8) ? 1 : 0;
872
0
    int yOffset = (yFrac < 8) ? 1 : 0;
873
0
    const Pel* refPel = refBufPtr - yOffset * refBufStride - xOffset;
874
0
    Pel* dstPel = m_filteredBlockTmp[2 + refPicList][compID] + dstBuf.stride + 1;
875
0
    for (int w = 0; w < (width - 2 * BDOF_EXTEND_SIZE); w++)
876
0
    {
877
0
      Pel val = leftShiftU(refPel[w], shift);
878
0
      dstPel[w] = val - (Pel)IF_INTERNAL_OFFS;
879
0
    }
880
881
0
    refPel = refBufPtr + (1 - yOffset)*refBufStride - xOffset;
882
0
    dstPel = m_filteredBlockTmp[2 + refPicList][compID] + 2 * dstBuf.stride + 1;
883
0
    for (int h = 0; h < (height - 2 * BDOF_EXTEND_SIZE - 2); h++)
884
0
    {
885
0
      Pel val = leftShiftU(refPel[0], shift);
886
0
      dstPel[0] = val - (Pel)IF_INTERNAL_OFFS;
887
888
0
      val = leftShiftU(refPel[width - 3], shift);
889
0
      dstPel[width - 3] = val - (Pel)IF_INTERNAL_OFFS;
890
891
0
      refPel += refBufStride;
892
0
      dstPel += dstBuf.stride;
893
0
    }
894
895
0
    refPel = refBufPtr + (height - 2 * BDOF_EXTEND_SIZE - 2 + 1 - yOffset)*refBufStride - xOffset;
896
0
    dstPel = m_filteredBlockTmp[2 + refPicList][compID] + (height - 2 * BDOF_EXTEND_SIZE)*dstBuf.stride + 1;
897
0
    for (int w = 0; w < (width - 2 * BDOF_EXTEND_SIZE); w++)
898
0
    {
899
0
      Pel val = leftShiftU(refPel[w], shift);
900
0
      dstPel[w] = val - (Pel)IF_INTERNAL_OFFS;
901
0
    }
902
903
    // restore data
904
0
    width         = backupWidth;
905
0
    height        = backupHeight;
906
0
    dstBuf.buf    = backupDstBufPtr;
907
0
    dstBuf.stride = backupDstBufStride;
908
0
  }
909
0
}
910
911
void InterPredInterpolation::xApplyBDOF( PelBuf& yuvDst, const ClpRng& clpRng )
912
0
{
913
0
  const int     bitDepth  = clpRng.bd;
914
915
0
  const int     height    = yuvDst.height;
916
0
  const int     width     = yuvDst.width;
917
0
  int           heightG   = height + 2 * BDOF_EXTEND_SIZE;
918
0
  int           widthG    = width + 2 * BDOF_EXTEND_SIZE;
919
920
0
  Pel*          gradX0 = m_gradX0;
921
0
  Pel*          gradX1 = m_gradX1;
922
0
  Pel*          gradY0 = m_gradY0;
923
0
  Pel*          gradY1 = m_gradY1;
924
925
0
  int           stridePredMC = widthG + 2;
926
0
  const Pel*    srcY0 = m_filteredBlockTmp[2][COMP_Y] + stridePredMC + 1;
927
0
  const Pel*    srcY1 = m_filteredBlockTmp[3][COMP_Y] + stridePredMC + 1;
928
929
0
  Pel*          dstY = yuvDst.buf;
930
0
  const int dstStride = yuvDst.stride;
931
932
0
  for (int refList = 0; refList < NUM_REF_PIC_LIST_01; refList++)
933
0
  {
934
0
    Pel* dstTempPtr = m_filteredBlockTmp[2 + refList][COMP_Y] + stridePredMC + 1;
935
0
    Pel* gradY = (refList == 0) ? m_gradY0 : m_gradY1;
936
0
    Pel* gradX = (refList == 0) ? m_gradX0 : m_gradX1;
937
938
0
    xFpBDOFGradFilter(dstTempPtr, stridePredMC, widthG, heightG, widthG, gradX, gradY, bitDepth );
939
0
    Pel* padStr = m_filteredBlockTmp[2 + refList][COMP_Y] + 2 * stridePredMC + 2;
940
0
    for (int y = 0; y< height; y++)
941
0
    {
942
0
      padStr[-1] = padStr[0];
943
0
      padStr[width] = padStr[width - 1];
944
0
      padStr += stridePredMC;
945
0
    }
946
947
0
    padStr = m_filteredBlockTmp[2 + refList][COMP_Y] + 2 * stridePredMC + 1;
948
0
    ::memcpy(padStr - stridePredMC, padStr, sizeof(Pel)*(widthG));
949
0
    ::memcpy(padStr + height*stridePredMC, padStr + (height - 1)*stridePredMC, sizeof(Pel)*(widthG));
950
0
  }
951
952
0
  const unsigned shiftNum = IF_INTERNAL_PREC + 1 - bitDepth;
953
0
  const int   offset = (1 << (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS;
954
0
  const int   limit = (1 << 4) - 1;
955
956
0
  xFpBiDirOptFlow( srcY0, srcY1, gradX0, gradX1, gradY0, gradY1, width, height, dstY, dstStride, shiftNum, offset,
957
0
                   limit, clpRng, bitDepth );
958
0
}
959
960
void InterPredInterpolation::xWeightedAverage( const CodingUnit& cu, const CPelUnitBuf& pcYuvSrc0, const CPelUnitBuf& pcYuvSrc1, PelUnitBuf& pcYuvDst, const bool bdofApplied, PelUnitBuf *yuvPredTmp )
961
0
{
962
0
  const bool lumaOnly     = cu.mccNoChroma();
963
0
  const bool chromaOnly   = cu.mccNoLuma  ();
964
965
0
  CHECK( chromaOnly && lumaOnly, "should not happen" );
966
967
0
  const ClpRngs& clpRngs  = cu.slice->clpRngs;
968
0
  const int      iRefIdx0 = cu.refIdx[0];
969
0
  const int      iRefIdx1 = cu.refIdx[1];
970
971
0
  if( iRefIdx0 >= 0 && iRefIdx1 >= 0 )
972
0
  {
973
0
    if( cu.BcwIdx != BCW_DEFAULT && ( yuvPredTmp || !cu.ciip ) )
974
0
    {
975
0
      CHECK( bdofApplied, "BCW is disallowed with BIO" );
976
977
0
      pcYuvDst.addWeightedAvg( pcYuvSrc0, pcYuvSrc1, clpRngs, cu.BcwIdx, chromaOnly, lumaOnly );
978
979
0
      if( yuvPredTmp )
980
0
      {
981
0
        yuvPredTmp->addAvg( pcYuvSrc0, pcYuvSrc1, clpRngs, chromaOnly, lumaOnly );
982
0
      }
983
0
      return;
984
0
    }
985
    
986
0
    if( bdofApplied && !chromaOnly )
987
0
    {
988
0
      xApplyBDOF( pcYuvDst.Y(), clpRngs[COMP_Y] );
989
0
    }
990
0
    if( !bdofApplied && ( lumaOnly || chromaOnly ) )
991
0
    {
992
0
      pcYuvDst.addAvg( pcYuvSrc0, pcYuvSrc1, clpRngs, chromaOnly, lumaOnly );
993
0
    }
994
0
    else
995
0
    {
996
0
      pcYuvDst.addAvg( pcYuvSrc0, pcYuvSrc1, clpRngs, bdofApplied );
997
0
    }
998
0
  }
999
0
  else
1000
0
  {
1001
0
    if( cu.geo )
1002
0
    {
1003
0
      pcYuvDst.copyFrom( iRefIdx0 >= 0 ? pcYuvSrc0 : pcYuvSrc1 );
1004
0
    }
1005
0
    else
1006
0
    {
1007
0
      pcYuvDst.copyClip( iRefIdx0 >= 0 ? pcYuvSrc0 : pcYuvSrc1, clpRngs, lumaOnly, chromaOnly );
1008
0
    }
1009
0
  }
1010
0
}
1011
1012
void InterPrediction::motionCompensationGeo( CodingUnit &cu, PelUnitBuf &predBuf, const MergeCtx &geoMrgCtx )
1013
0
{
1014
0
  const ClpRngs &clpRngs      = cu.slice->clpRngs;
1015
0
  const UnitArea localUnitArea( cu.chromaFormat, Area( 0, 0, cu.lwidth(), cu.lheight() ) );
1016
1017
0
  PelUnitBuf     tmpGeoBuf0   = m_geoPartBuf[0].getBuf( localUnitArea );
1018
0
  PelUnitBuf     tmpGeoBuf1   = m_geoPartBuf[1].getBuf( localUnitArea );
1019
1020
0
  geoMrgCtx.setMergeInfo( cu, cu.geoMergeIdx[0] );
1021
0
  CU::spanMotionInfo    ( cu );
1022
0
  motionCompensation    ( cu, tmpGeoBuf0, REF_PIC_LIST_X );   // TODO: check 4:0:0 interaction with weighted prediction.
1023
1024
0
  geoMrgCtx.setMergeInfo( cu, cu.geoMergeIdx[1] );
1025
0
  CU::spanMotionInfo    ( cu );
1026
0
  motionCompensation    ( cu, tmpGeoBuf1, REF_PIC_LIST_X );   // TODO: check 4:0:0 interaction with weighted prediction.
1027
1028
0
  weightedGeoBlk( clpRngs, cu, cu.geoSplitDir, isChromaEnabled( cu.chromaFormat ) ? MAX_NUM_CH : CH_L, predBuf, tmpGeoBuf0, tmpGeoBuf1 );
1029
0
}
1030
1031
void InterPredInterpolation::weightedGeoBlk(const ClpRngs &clpRngs, CodingUnit& cu, const uint8_t splitDir,
1032
                                            int32_t channel, PelUnitBuf &predDst, PelUnitBuf &predSrc0, PelUnitBuf &predSrc1)
1033
0
{
1034
0
  if( channel != CH_C )
1035
0
  {
1036
0
    m_if.weightedGeoBlk( clpRngs, cu, cu.lumaSize().width, cu.lumaSize().height, COMP_Y, splitDir, predDst, predSrc0, predSrc1 );
1037
0
  }
1038
1039
0
  if( channel != CH_L && isChromaEnabled( cu.chromaFormat ) )
1040
0
  {
1041
0
    m_if.weightedGeoBlk( clpRngs, cu, cu.chromaSize().width, cu.chromaSize().height, COMP_Cb, splitDir, predDst, predSrc0, predSrc1 );
1042
0
    m_if.weightedGeoBlk( clpRngs, cu, cu.chromaSize().width, cu.chromaSize().height, COMP_Cr, splitDir, predDst, predSrc0, predSrc1 );
1043
0
  }
1044
0
}
1045
1046
0
DMVR::DMVR() : m_pcRdCost( nullptr )
1047
0
{
1048
0
}
1049
1050
DMVR::~DMVR()
1051
0
{
1052
0
  destroy();
1053
0
}
1054
1055
void DMVR::destroy()
1056
0
{
1057
0
  for( int i = 0; i < NUM_REF_PIC_LIST_01; i++ )
1058
0
  {
1059
0
    m_yuvPred[i].destroy();
1060
0
    m_yuvPad[i].destroy();
1061
0
    m_yuvTmp[i].destroy();
1062
0
  }
1063
0
  m_pcRdCost = nullptr;
1064
0
}
1065
1066
void DMVR::init( RdCost* pcRdCost, const ChromaFormat chFormat )
1067
0
{
1068
0
  if( m_pcRdCost == nullptr )
1069
0
  {
1070
0
    m_pcRdCost = pcRdCost;
1071
1072
0
    Area predArea = Area( 0, 0, DMVR_SUBCU_SIZE, DMVR_SUBCU_SIZE );
1073
0
    Area refArea  = Area( 0, 0, MAX_CU_SIZE, MAX_CU_SIZE );
1074
0
    for( int i = 0; i < NUM_REF_PIC_LIST_01; i++ )
1075
0
    {
1076
0
      m_yuvPred[i].create( chFormat, predArea );
1077
0
      m_yuvTmp[i].create( CHROMA_400, refArea, 0, DMVR_NUM_ITERATION );
1078
0
      m_yuvPad[i].create( chFormat, predArea, 0, DMVR_NUM_ITERATION + (NTAPS_LUMA>>1), 32 );
1079
      // the buffer m_yuvPad[i].bufs[0].buf is aligned to 32
1080
      // the actual begin of the written to buffer is m_yuvPad[i].bufs[0].buf - 3 * stride - 3 = m_yuvPad[i].bufs[0].buf - 99,
1081
      // which is not aligned with int. Since the margin on the left side is 1 sample too big, moving the buffer within the
1082
      // allocated memory 1 to the left doesn't cause problems
1083
0
      m_yuvPad[i].bufs[0].buf--;
1084
0
    }
1085
0
  }
1086
0
}
1087
1088
void DMVR::xCopyAndPad( const CodingUnit& cu, PelUnitBuf& pcPad, RefPicList refId, bool forLuma)
1089
0
{
1090
0
  int width, height;
1091
0
  Mv cMv;
1092
1093
0
  const Picture* refPic = cu.slice->getRefPic(refId, cu.refIdx[refId]);
1094
1095
0
  static constexpr int mvShift = MV_FRACTIONAL_BITS_INTERNAL;
1096
1097
0
  const int start = forLuma ? 0 : 1;
1098
0
  const int end   = forLuma ? 1 : MAX_NUM_COMP;
1099
1100
0
  for (int compID = start; compID < end; compID++)
1101
0
  {
1102
0
    int filtersize = compID == COMP_Y ? NTAPS_LUMA : NTAPS_CHROMA;
1103
0
    cMv            = cu.mv[refId][0];
1104
0
    width          = pcPad.bufs[compID].width;
1105
0
    height         = pcPad.bufs[compID].height;
1106
1107
0
    int mvshiftTemp = mvShift + getComponentScaleX((ComponentID)compID, cu.chromaFormat);
1108
1109
0
    width  += filtersize - 1;
1110
0
    height += filtersize - 1;
1111
0
    cMv    += Mv(-(((filtersize >> 1) - 1) << mvshiftTemp), -(((filtersize >> 1) - 1) << mvshiftTemp));
1112
1113
0
    clipMv(cMv, cu.lumaPos(), cu.lumaSize(), *cu.cs->pcv);
1114
1115
    /* Pre-fetch similar to HEVC*/
1116
0
    {
1117
0
      CPelBuf refBuf      = refPic->getRecoBuf(ComponentID(compID));
1118
0
      Position Rec_offset = cu.blocks[compID].pos().offset(cMv.hor >> mvshiftTemp, cMv.ver >> mvshiftTemp);
1119
0
      const Pel* refBufPtr = refBuf.bufAt(Rec_offset);
1120
1121
0
      PelBuf& dstBuf = pcPad.bufs[compID];
1122
1123
0
      const int leftTopFilterExt = ((filtersize >> 1) - 1);
1124
0
      const int padOffset        = leftTopFilterExt * dstBuf.stride + leftTopFilterExt;
1125
0
      const int padSize          = (DMVR_NUM_ITERATION) >> getComponentScaleX((ComponentID)compID, cu.chromaFormat);
1126
1127
0
      xFpPadDmvr( refBufPtr, refBuf.stride, dstBuf.buf - padOffset, dstBuf.stride, width, height, padSize );
1128
0
    }
1129
0
  }
1130
0
}
1131
1132
inline int32_t div_for_maxq7(int64_t N, int64_t D)
1133
0
{
1134
0
  int32_t sign, q;
1135
0
  sign = 0;
1136
0
  if (N < 0)
1137
0
  {
1138
0
    sign = 1;
1139
0
    N = -N;
1140
0
  }
1141
1142
0
  q = 0;
1143
0
  D = (D << 3);
1144
0
  if (N >= D)
1145
0
  {
1146
0
    N -= D;
1147
0
    q++;
1148
0
  }
1149
0
  q = (q << 1);
1150
1151
0
  D = (D >> 1);
1152
0
  if (N >= D)
1153
0
  {
1154
0
    N -= D;
1155
0
    q++;
1156
0
  }
1157
0
  q = (q << 1);
1158
1159
0
  if (N >= (D >> 1))
1160
0
    q++;
1161
1162
0
  if (sign)
1163
0
    return (-q);
1164
0
  return(q);
1165
0
}
1166
1167
void xSubPelErrorSrfc(uint64_t *sadBuffer, int32_t *deltaMv)
1168
0
{
1169
0
  for( int hv = 0; hv < 2; hv++)
1170
0
  {
1171
0
    const int32_t mvSubPelLvl = 4;/*1: half pel, 2: Qpel, 3:1/8, 4: 1/16*/
1172
0
    int64_t numerator   = (int64_t)((sadBuffer[hv+1] - sadBuffer[hv+3]) << mvSubPelLvl);
1173
0
    int64_t denominator = (int64_t)((sadBuffer[hv+1] + sadBuffer[hv+3] - (sadBuffer[0] << 1)));
1174
1175
0
    if (0 != denominator)
1176
0
    {
1177
0
      if ((sadBuffer[hv+1] != sadBuffer[0]) && (sadBuffer[hv+3] != sadBuffer[0]))
1178
0
      {
1179
0
        deltaMv[hv] = div_for_maxq7(numerator, denominator);
1180
0
      }
1181
0
      else
1182
0
      {
1183
0
        deltaMv[hv] = (sadBuffer[hv+1] == sadBuffer[0]) ? -8 : 8;
1184
0
      }
1185
0
    }
1186
0
  }
1187
0
}
1188
1189
void DMVR::xFinalPaddedMCForDMVR( const CodingUnit& cu, PelUnitBuf* dstBuf, const PelUnitBuf *refBuf, const bool bioApplied, const Mv mergeMv[NUM_REF_PIC_LIST_01], const Mv& refMv )
1190
0
{
1191
0
  int mvShift = MV_FRACTIONAL_BITS_INTERNAL;
1192
0
  Mv mv[2];
1193
0
  mv[L0] = mergeMv[L0] + refMv; mv[L0].clipToStorageBitDepth();
1194
0
  mv[L1] = mergeMv[L1] - refMv; mv[L1].clipToStorageBitDepth();
1195
1196
0
  for (int k = 0; k < NUM_REF_PIC_LIST_01; k++)
1197
0
  {
1198
0
    RefPicList refId = (RefPicList)k;
1199
0
    const Mv& cMv = mv[refId];
1200
0
    Mv cMvClipped( cMv );
1201
0
    clipMv(cMvClipped, cu.lumaPos(), cu.lumaSize(), *cu.cs->pcv);
1202
0
    const Picture* refPic = cu.slice->getRefPic(refId, cu.refIdx[refId]);
1203
0
    const Mv& startMv = mergeMv[refId];
1204
0
    for (int compID = 0; compID < getNumberValidComponents(cu.chromaFormat); compID++)
1205
0
    {
1206
0
      int mvshiftTemp = mvShift + getComponentScaleX((ComponentID)compID, cu.chromaFormat);
1207
0
      int deltaIntMvX = (cMv.hor >> mvshiftTemp) - (startMv.hor >> mvshiftTemp);
1208
0
      int deltaIntMvY = (cMv.ver >> mvshiftTemp) - (startMv.ver >> mvshiftTemp);
1209
1210
0
      CHECK((abs(deltaIntMvX) > DMVR_NUM_ITERATION) || (abs(deltaIntMvY) > DMVR_NUM_ITERATION), "not expected DMVR movement");
1211
1212
0
      if (deltaIntMvX || deltaIntMvY)
1213
0
      {
1214
0
        const PelBuf& srcBuf = refBuf[refId].bufs[compID];
1215
0
        int offset = (deltaIntMvY)*srcBuf.stride + (deltaIntMvX);
1216
1217
0
        xPredInterBlk( ( ComponentID ) compID, cu, nullptr, cMvClipped, dstBuf[refId], true, cu.cs->slice->clpRngs[compID], bioApplied, false, refId, 0, 0, 0, srcBuf.buf + offset, srcBuf.stride );
1218
0
      }
1219
0
      else
1220
0
      {
1221
0
        xPredInterBlk( ( ComponentID ) compID, cu, refPic,  cMvClipped, dstBuf[refId], true, cu.cs->slice->clpRngs[compID], bioApplied, false, refId );
1222
0
      }
1223
0
    }
1224
0
  }
1225
0
}
1226
1227
static void xDMVRSubPixelErrorSurface( int16_t *totalDeltaMV, int16_t *deltaMV, uint64_t *pSADsArray )
1228
0
{
1229
0
  int sadStride = (((2 * DMVR_NUM_ITERATION) + 1));
1230
0
  uint64_t sadbuffer[5];
1231
0
  if( ( abs( totalDeltaMV[ 0 ] ) != ( 2 << MV_FRACTIONAL_BITS_INTERNAL ) )
1232
0
   && ( abs( totalDeltaMV[ 1 ] ) != ( 2 << MV_FRACTIONAL_BITS_INTERNAL ) ) )
1233
0
  {
1234
0
    int32_t tempDeltaMv[2] = { 0,0 };
1235
0
    sadbuffer[0] = pSADsArray[0];
1236
0
    sadbuffer[1] = pSADsArray[-1];
1237
0
    sadbuffer[2] = pSADsArray[-sadStride];
1238
0
    sadbuffer[3] = pSADsArray[1];
1239
0
    sadbuffer[4] = pSADsArray[sadStride];
1240
0
    xSubPelErrorSrfc(sadbuffer, tempDeltaMv);
1241
0
    totalDeltaMV[0] += tempDeltaMv[0];
1242
0
    totalDeltaMV[1] += tempDeltaMv[1];
1243
0
  }
1244
0
}
1245
1246
void DMVR::xProcessDMVR( const CodingUnit& cu, PelUnitBuf& pcYuvDst, const ClpRngs &clpRngs, const bool bioApplied )
1247
0
{
1248
0
  PROFILER_SCOPE_AND_STAGE_EXT( 1, _TPROF, P_INTER_MRG_DMVR, cu.cs, CH_L );
1249
  /*Always High Precision*/
1250
0
  const int csx      = getChannelTypeScaleX( CH_C, cu.chromaFormat );
1251
0
  const int csy      = getChannelTypeScaleY( CH_C, cu.chromaFormat );
1252
0
  const int mvShift  = MV_FRACTIONAL_BITS_INTERNAL;
1253
0
  const int mvShiftC = mvShift + csx;
1254
1255
  /*use merge MV as starting MV*/
1256
0
  const Mv mergeMv[] = { cu.mv[REF_PIC_LIST_0][0], cu.mv[REF_PIC_LIST_1][0] };
1257
1258
1259
0
  const int dy = std::min<int>(cu.lumaSize().height, DMVR_SUBCU_SIZE);
1260
0
  const int dx = std::min<int>(cu.lumaSize().width,  DMVR_SUBCU_SIZE);
1261
1262
0
  const Position& puPos = cu.lumaPos();
1263
1264
0
  bool bioAppliedType[MAX_NUM_SUBCU_DMVR];
1265
1266
  // Do refinement search
1267
0
  {
1268
0
    const int bilinearBufStride = (cu.Y().width + (2 * DMVR_NUM_ITERATION));
1269
0
    const int padSize = DMVR_NUM_ITERATION << 1;
1270
0
    const int dstOffset = -( DMVR_NUM_ITERATION * bilinearBufStride + DMVR_NUM_ITERATION );
1271
1272
    /*use merge MV as starting MV*/
1273
0
    Mv mergeMVL0 = cu.mv[L0][0];
1274
0
    Mv mergeMVL1 = cu.mv[L1][0];
1275
1276
    /*Clip the starting MVs*/
1277
0
    clipMv(mergeMVL0, cu.lumaPos(), cu.lumaSize(), *cu.cs->pcv);
1278
0
    clipMv(mergeMVL1, cu.lumaPos(), cu.lumaSize(), *cu.cs->pcv);
1279
1280
    /*L0 MC for refinement*/
1281
0
    {
1282
0
      const Picture* refPic = cu.slice->getRefPic(L0, cu.refIdx[L0]);
1283
1284
0
      PelUnitBuf yuvTmp = PelUnitBuf(cu.chromaFormat, PelBuf(m_yuvTmp[L0].getBuf(COMP_Y).buf + dstOffset, bilinearBufStride, cu.lwidth() + padSize, cu.lheight() + padSize));
1285
1286
0
      mergeMVL0.hor -= (DMVR_NUM_ITERATION << MV_FRACTIONAL_BITS_INTERNAL);
1287
0
      mergeMVL0.ver -= (DMVR_NUM_ITERATION << MV_FRACTIONAL_BITS_INTERNAL);
1288
1289
0
      xPredInterBlk(COMP_Y, cu, refPic, mergeMVL0, yuvTmp, true, clpRngs[COMP_Y], false, false, L0, cu.lwidth() + padSize, cu.lheight() + padSize, true);
1290
0
    }
1291
1292
    /*L1 MC for refinement*/
1293
0
    {
1294
0
      const Picture* refPic = cu.slice->getRefPic(L1, cu.refIdx[L1]);
1295
1296
0
      PelUnitBuf yuvTmp = PelUnitBuf(cu.chromaFormat, PelBuf(m_yuvTmp[L1].getBuf(COMP_Y).buf + dstOffset, bilinearBufStride, cu.lwidth() + padSize, cu.lheight() + padSize));
1297
1298
0
      mergeMVL1.hor -= (DMVR_NUM_ITERATION << MV_FRACTIONAL_BITS_INTERNAL);
1299
0
      mergeMVL1.ver -= (DMVR_NUM_ITERATION << MV_FRACTIONAL_BITS_INTERNAL);
1300
1301
0
      xPredInterBlk(COMP_Y, cu, refPic, mergeMVL1, yuvTmp, true, clpRngs[COMP_Y], false, false, L1, cu.lwidth() + padSize, cu.lheight() + padSize, true);
1302
0
    }
1303
1304
    // point mc buffer to center point to avoid multiplication to reach each iteration to the beginning
1305
0
    const Pel* biLinearPredL0 = m_yuvTmp[0].getBuf( COMP_Y ).buf;
1306
0
    const Pel* biLinearPredL1 = m_yuvTmp[1].getBuf( COMP_Y ).buf;
1307
0
    const int bioEnabledThres = 2 * dy * dx;
1308
0
    const int bd = cu.cs->slice->clpRngs[COMP_Y].bd;
1309
1310
0
    DistParam distParam = m_pcRdCost->setDistParam( nullptr, nullptr, bilinearBufStride, bilinearBufStride, bd, COMP_Y, dx, dy, 1, true );
1311
1312
0
    int num = 0;
1313
0
    int yStart = 0;
1314
0
    uint64_t sadArray[((2 * DMVR_NUM_ITERATION) + 1) * ((2 * DMVR_NUM_ITERATION) + 1)];
1315
1316
0
    for( int y = puPos.y; y < ( puPos.y + cu.lumaSize().height ); y = y + dy, yStart = yStart + dy )
1317
0
    {
1318
0
      for( int x = puPos.x, xStart = 0; x < ( puPos.x + cu.lumaSize().width ); x = x + dx, xStart = xStart + dx )
1319
0
      {
1320
0
        uint64_t minCost        = MAX_UINT64;
1321
1322
        // set all entries to MAX_UNIT64
1323
0
        uint64_t *pSADsArray = &sadArray[( ( ( 2 * DMVR_NUM_ITERATION ) + 1 ) * ( ( 2 * DMVR_NUM_ITERATION ) + 1 ) ) >> 1];
1324
1325
0
        const Pel* addrL0Centre = biLinearPredL0 + yStart * bilinearBufStride + xStart;
1326
0
        const Pel* addrL1Centre = biLinearPredL1 + yStart * bilinearBufStride + xStart;
1327
1328
0
        const Pel* addrL0 = addrL0Centre;
1329
0
        const Pel* addrL1 = addrL1Centre;
1330
1331
0
        distParam.org.buf = addrL0;
1332
0
        distParam.cur.buf = addrL1;
1333
0
        minCost  = distParam.distFunc( distParam ) >> 1;
1334
0
        minCost -= ( minCost >> 2 );
1335
1336
0
        if( minCost < ( dx * dy ) )
1337
0
        {
1338
0
          cu.mvdL0SubPu[num] = Mv( 0, 0 );
1339
0
        }
1340
0
        else
1341
0
        {
1342
0
          int16_t totalDeltaMV[2] = { 0, 0 };
1343
0
          int16_t deltaMV[2]      = { 0, 0 };
1344
1345
0
          pSADsArray[0] = minCost;
1346
0
          pSADsArray    = sadArray;
1347
1348
0
          for( int ver = -2; ver <= 2; ver++ )
1349
0
          {
1350
0
            const int initHor = -2;
1351
0
            const ptrdiff_t offset = initHor + ver * bilinearBufStride;
1352
              
1353
0
            distParam.org.buf = addrL0 + offset;
1354
0
            distParam.cur.buf = addrL1 - offset;
1355
              
1356
0
            distParam.dmvrSadX5( distParam, pSADsArray, ver != 0 );
1357
1358
0
            for( int hor = -2; hor <= 2; hor++, pSADsArray++ )
1359
0
            {
1360
0
              Distortion cost = *pSADsArray;
1361
1362
0
              if( cost < minCost )
1363
0
              {
1364
0
                minCost    = cost;
1365
0
                deltaMV[0] = hor;
1366
0
                deltaMV[1] = ver;
1367
0
              }
1368
0
            }
1369
0
          }
1370
1371
0
          pSADsArray = &sadArray[( ( ( 2 * DMVR_NUM_ITERATION ) + 1 ) * ( ( 2 * DMVR_NUM_ITERATION ) + 1 ) ) >> 1];
1372
1373
0
          totalDeltaMV[0] += deltaMV[0];
1374
0
          totalDeltaMV[1] += deltaMV[1];
1375
0
          pSADsArray      += ( ( deltaMV[1] * ( ( ( 2 * DMVR_NUM_ITERATION ) + 1 ) ) ) + deltaMV[0] );
1376
0
          totalDeltaMV[0]  = totalDeltaMV[0] * ( 1 << mvShift );
1377
0
          totalDeltaMV[1]  = totalDeltaMV[1] * ( 1 << mvShift );
1378
1379
0
          xDMVRSubPixelErrorSurface( totalDeltaMV, deltaMV, pSADsArray );
1380
1381
0
          cu.mvdL0SubPu[num] = Mv( totalDeltaMV[0], totalDeltaMV[1] );
1382
0
        }
1383
1384
0
        bioAppliedType[num] = ( minCost < bioEnabledThres ) ? false : bioApplied;
1385
1386
0
        num++;
1387
0
      }
1388
0
    }
1389
0
  }
1390
1391
  // Final MC
1392
0
  CodingUnit subCu = cu;
1393
0
  subCu.UnitArea::operator=(UnitArea(cu.chromaFormat, Area(puPos.x, puPos.y, dx, dy)));
1394
0
  PelUnitBuf subPredBuf = pcYuvDst.subBuf(UnitAreaRelative(cu, subCu));
1395
1396
0
  PelUnitBuf predBuf[NUM_REF_PIC_LIST_01];
1397
0
  predBuf[L0] = m_yuvPred[L0].getCompactBuf( subCu );
1398
0
  predBuf[L1] = m_yuvPred[L1].getCompactBuf( subCu );
1399
  /* For padding */
1400
0
  PelUnitBuf padBuf[NUM_REF_PIC_LIST_01];
1401
0
  padBuf[L0] = m_yuvPad[L0].getBufPart(subCu);
1402
0
  padBuf[L1] = m_yuvPad[L1].getBufPart(subCu);
1403
1404
0
  int x = 0, y = 0;
1405
0
  int xStart = 0, yStart = 0;
1406
0
  int num = 0;
1407
0
  const int scaleX = getComponentScaleX(COMP_Cb, cu.chromaFormat);
1408
0
  const int scaleY = getComponentScaleY(COMP_Cb, cu.chromaFormat);
1409
1410
0
  const ptrdiff_t dstStride[MAX_NUM_COMP] = { pcYuvDst.bufs[COMP_Y].stride, cu.chromaFormat != CHROMA_400 ? pcYuvDst.bufs[COMP_Cb].stride : 0, cu.chromaFormat != CHROMA_400 ? pcYuvDst.bufs[COMP_Cr].stride : 0 };
1411
0
  for( y = puPos.y; y < ( puPos.y + cu.lumaSize().height ); y = y + dy, yStart = yStart + dy )
1412
0
  {
1413
0
    for( x = puPos.x, xStart = 0; x < ( puPos.x + cu.lumaSize().width ); x = x + dx, xStart = xStart + dx )
1414
0
    {
1415
0
      subCu.Y().x = x;
1416
0
      subCu.Y().y = y;
1417
1418
0
      if( cu.chromaFormat != CHROMA_400 )
1419
0
      {
1420
0
        subCu.Cb().x = subCu.Cr().x = x >> csx;
1421
0
        subCu.Cb().y = subCu.Cr().y = y >> csy;
1422
0
      }
1423
1424
0
      Mv mv0 = mergeMv[REF_PIC_LIST_0] + cu.mvdL0SubPu[num]; mv0.clipToStorageBitDepth();
1425
0
      Mv mv1 = mergeMv[REF_PIC_LIST_1] - cu.mvdL0SubPu[num]; mv1.clipToStorageBitDepth();
1426
1427
0
      bool padBufL0  = (mv0.hor >> mvShift)  != (mergeMv[0].hor >> mvShift)  || (mv0.ver >> mvShift)  != (mergeMv[0].ver >> mvShift);
1428
0
      bool padBufL0C = (mv0.hor >> mvShiftC) != (mergeMv[0].hor >> mvShiftC) || (mv0.ver >> mvShiftC) != (mergeMv[0].ver >> mvShiftC);
1429
        
1430
0
      bool padBufL1  = (mv1.hor >> mvShift)  != (mergeMv[1].hor >> mvShift)  || (mv1.ver >> mvShift)  != (mergeMv[1].ver >> mvShift);
1431
0
      bool padBufL1C = (mv1.hor >> mvShiftC) != (mergeMv[1].hor >> mvShiftC) || (mv1.ver >> mvShiftC) != (mergeMv[1].ver >> mvShiftC);
1432
1433
0
      padBufL0C &= cu.chromaFormat != CHROMA_400;
1434
0
      padBufL1C &= cu.chromaFormat != CHROMA_400;
1435
1436
0
      if (padBufL0)  xCopyAndPad(subCu, padBuf[L0], L0, true);
1437
0
      if (padBufL0C) xCopyAndPad(subCu, padBuf[L0], L0, false);
1438
0
      if (padBufL1)  xCopyAndPad(subCu, padBuf[L1], L1, true);
1439
0
      if (padBufL1C) xCopyAndPad(subCu, padBuf[L1], L1, false);
1440
1441
0
      xFinalPaddedMCForDMVR( subCu, predBuf, padBuf, bioAppliedType[num], mergeMv, cu.mvdL0SubPu[num] );
1442
1443
0
      subPredBuf.bufs[COMP_Y].buf  = pcYuvDst.bufs[COMP_Y].buf + xStart + yStart * dstStride[COMP_Y];
1444
0
      if( cu.chromaFormat != CHROMA_400 )
1445
0
      {
1446
0
        subPredBuf.bufs[COMP_Cb].buf = pcYuvDst.bufs[COMP_Cb].buf + (xStart >> scaleX) + ((yStart >> scaleY) * dstStride[COMP_Cb]);
1447
0
        subPredBuf.bufs[COMP_Cr].buf = pcYuvDst.bufs[COMP_Cr].buf + (xStart >> scaleX) + ((yStart >> scaleY) * dstStride[COMP_Cr]);
1448
0
      }
1449
1450
0
      xWeightedAverage( subCu, predBuf[L0], predBuf[L1], subPredBuf, bioAppliedType[num] );
1451
1452
0
      num++;
1453
0
    }
1454
0
  }
1455
0
}
1456
1457
bool InterPredInterpolation::isSubblockVectorSpreadOverLimit(int a, int b, int c, int d, int predType)
1458
0
{
1459
0
  int s4 = (4 << 11);
1460
0
  int filterTap = 6;
1461
1462
0
  if (predType == 3)
1463
0
  {
1464
0
    int refBlkWidth = std::max(std::max(0, 4 * a + s4), std::max(4 * c, 4 * a + 4 * c + s4)) - std::min(std::min(0, 4 * a + s4), std::min(4 * c, 4 * a + 4 * c + s4));
1465
0
    int refBlkHeight = std::max(std::max(0, 4 * b), std::max(4 * d + s4, 4 * b + 4 * d + s4)) - std::min(std::min(0, 4 * b), std::min(4 * d + s4, 4 * b + 4 * d + s4));
1466
0
    refBlkWidth = (refBlkWidth >> 11) + filterTap + 3;
1467
0
    refBlkHeight = (refBlkHeight >> 11) + filterTap + 3;
1468
1469
0
    if (refBlkWidth * refBlkHeight > (filterTap + 9) * (filterTap + 9))
1470
0
    {
1471
0
      return true;
1472
0
    }
1473
0
  }
1474
0
  else
1475
0
  {
1476
0
    int refBlkWidth = std::max(0, 4 * a + s4) - std::min(0, 4 * a + s4);
1477
0
    int refBlkHeight = std::max(0, 4 * b) - std::min(0, 4 * b);
1478
0
    refBlkWidth = (refBlkWidth >> 11) + filterTap + 3;
1479
0
    refBlkHeight = (refBlkHeight >> 11) + filterTap + 3;
1480
0
    if (refBlkWidth * refBlkHeight > (filterTap + 9) * (filterTap + 5))
1481
0
    {
1482
0
      return true;
1483
0
    }
1484
1485
0
    refBlkWidth = std::max(0, 4 * c) - std::min(0, 4 * c);
1486
0
    refBlkHeight = std::max(0, 4 * d + s4) - std::min(0, 4 * d + s4);
1487
0
    refBlkWidth = (refBlkWidth >> 11) + filterTap + 3;
1488
0
    refBlkHeight = (refBlkHeight >> 11) + filterTap + 3;
1489
0
    if (refBlkWidth * refBlkHeight > (filterTap + 5) * (filterTap + 9))
1490
0
    {
1491
0
      return true;
1492
0
    }
1493
0
  }
1494
0
  return false;
1495
0
}
1496
1497
void InterPredInterpolation::xPredAffineBlk(const ComponentID compID, const CodingUnit& cu, const Picture* refPic, const Mv* _mv, PelUnitBuf& dstPic, const bool bi, const ClpRng& clpRng, const RefPicList refPicList)
1498
0
{
1499
0
  const ChromaFormat chFmt = cu.chromaFormat;
1500
0
  int iScaleX = getComponentScaleX(compID, chFmt);
1501
0
  int iScaleY = getComponentScaleY(compID, chFmt);
1502
1503
0
  Mv mvLT = _mv[0];
1504
0
  Mv mvRT = _mv[1];
1505
0
  Mv mvLB = _mv[2];
1506
1507
  // get affine sub-block width and height
1508
0
  const int width = cu.Y().width;
1509
0
  const int height = cu.Y().height;
1510
0
  int blockWidth = AFFINE_MIN_BLOCK_SIZE;
1511
0
  int blockHeight = AFFINE_MIN_BLOCK_SIZE;
1512
1513
0
  CHECK(blockWidth  > (width >> iScaleX), "Sub Block width  > Block width");
1514
0
  CHECK(blockHeight > (height >> iScaleY), "Sub Block height > Block height");
1515
0
  const int MVBUFFER_SIZE = MAX_CU_SIZE / MIN_PU_SIZE;
1516
1517
0
  const int cxWidth = width >> iScaleX;
1518
0
  const int cxHeight = height >> iScaleY;
1519
0
  const int iHalfBW = blockWidth >> 1;
1520
0
  const int iHalfBH = blockHeight >> 1;
1521
1522
0
  const int iBit = MAX_CU_DEPTH;
1523
0
  int iDMvHorX = 0;
1524
0
  int iDMvHorY = 0;
1525
0
  int iDMvVerX = 0;
1526
0
  int iDMvVerY = 0;
1527
1528
0
  iDMvHorX = (mvRT - mvLT).hor * (1 <<(iBit - Log2(cxWidth)));
1529
0
  iDMvHorY = (mvRT - mvLT).ver * (1 <<(iBit - Log2(cxWidth)));
1530
0
  if (cu.affineType == AFFINEMODEL_6PARAM)
1531
0
  {
1532
0
    iDMvVerX = (mvLB - mvLT).hor * (1 <<(iBit - Log2(cxHeight)));
1533
0
    iDMvVerY = (mvLB - mvLT).ver * (1 <<(iBit - Log2(cxHeight)));
1534
0
  }
1535
0
  else
1536
0
  {
1537
0
    iDMvVerX = -iDMvHorY;
1538
0
    iDMvVerY = iDMvHorX;
1539
0
  }
1540
1541
0
  int iMvScaleHor = mvLT.hor * (1 << iBit);
1542
0
  int iMvScaleVer = mvLT.ver * (1 << iBit);
1543
0
  const PPS &pps = *cu.cs->pps;
1544
0
  const SPS &sps = *cu.cs->sps;
1545
0
  const int iMvShift = 4;
1546
0
  const int iOffset = 8;
1547
0
  const int iHorMax = (pps.picWidthInLumaSamples + iOffset - cu.Y().x - 1) << iMvShift;
1548
0
  const int iHorMin = (-(int)cu.cs->pcv->maxCUSize - iOffset - (int)cu.Y().x + 1) * (1 << iMvShift);
1549
0
  const int iVerMax = (pps.picHeightInLumaSamples + iOffset - cu.Y().y - 1) << iMvShift;
1550
0
  const int iVerMin = (-(int)cu.cs->pcv->maxCUSize - iOffset - (int)cu.Y().y + 1) * (1 << iMvShift);
1551
  
1552
0
  const int shift = iBit - 4 + MV_FRACTIONAL_BITS_INTERNAL;
1553
0
  const bool subblkMVSpreadOverLimit = isSubblockVectorSpreadOverLimit(iDMvHorX, iDMvHorY, iDMvVerX, iDMvVerY, cu.interDir);
1554
1555
0
  bool enablePROF = sps.PROF && (!m_skipPROF) && (compID == COMP_Y);
1556
0
  enablePROF &= (!cu.cs->picHeader->disProfFlag);
1557
0
  enablePROF &= !((cu.affineType == AFFINEMODEL_6PARAM && _mv[0] == _mv[1] && _mv[0] == _mv[2]) || (cu.affineType == AFFINEMODEL_4PARAM && _mv[0] == _mv[1]));
1558
0
  enablePROF &= !subblkMVSpreadOverLimit;
1559
0
  const int profThres = 1 << (iBit + (m_isBi ? 1 : 0));
1560
0
  enablePROF &= !m_encOnly || cu.slice->checkLDC || iDMvHorX > profThres || iDMvHorY > profThres || iDMvVerX > profThres || iDMvVerY > profThres || iDMvHorX < -profThres || iDMvHorY < -profThres || iDMvVerX < -profThres || iDMvVerY < -profThres;
1561
0
  enablePROF &= pps.picWidthInLumaSamples == refPic->cs->pps->picWidthInLumaSamples && pps.picHeightInLumaSamples == refPic->cs->pps->picHeightInLumaSamples;
1562
1563
0
  bool isLast = enablePROF ? false : !bi;
1564
1565
0
  const int cuExtW = AFFINE_MIN_BLOCK_SIZE + PROF_BORDER_EXT_W * 2;
1566
0
  const int cuExtH = AFFINE_MIN_BLOCK_SIZE + PROF_BORDER_EXT_H * 2;
1567
1568
0
  PelBuf gradXExt(m_gradBuf[0], cuExtW, cuExtH);
1569
0
  PelBuf gradYExt(m_gradBuf[1], cuExtW, cuExtH);
1570
1571
0
  int dstExtW = (((blockWidth + PROF_BORDER_EXT_W * 2 + 7) >> 3) << 3);
1572
0
  int dstExtH = (blockHeight + PROF_BORDER_EXT_H * 2);
1573
1574
0
  PelBuf dstExtBuf(m_filteredBlockTmp[1][compID], dstExtW, dstExtH);
1575
1576
0
  PelBuf& dstBuf = dstPic.bufs[compID];
1577
1578
0
  int *dMvScaleHor = m_dMvBuf[refPicList];
1579
0
  int *dMvScaleVer = m_dMvBuf[refPicList] + 16;
1580
1581
0
  if (enablePROF)
1582
0
  {
1583
0
    int* dMvH = dMvScaleHor;
1584
0
    int* dMvV = dMvScaleVer;
1585
0
    int quadHorX = 4 * iDMvHorX ;
1586
0
    int quadHorY = 4 * iDMvHorY ;
1587
0
    int quadVerX = 4 * iDMvVerX ;
1588
0
    int quadVerY = 4 * iDMvVerY ;
1589
1590
0
    dMvH[0] = ((iDMvHorX + iDMvVerX) * 2) - ((quadHorX + quadVerX)  * 2);
1591
0
    dMvV[0] = ((iDMvHorY + iDMvVerY) * 2) - ((quadHorY + quadVerY)  * 2);
1592
1593
0
    for (int w = 1; w < blockWidth; w++)
1594
0
    {
1595
0
      dMvH[w] = dMvH[w - 1] + quadHorX;
1596
0
      dMvV[w] = dMvV[w - 1] + quadHorY;
1597
0
    }
1598
1599
0
    dMvH += blockWidth;
1600
0
    dMvV += blockWidth;
1601
0
    for (int h = 1; h < blockHeight; h++)
1602
0
    {
1603
0
      for (int w = 0; w < blockWidth; w++)
1604
0
      {
1605
0
        dMvH[w] = dMvH[w - blockWidth] + quadVerX;
1606
0
        dMvV[w] = dMvV[w - blockWidth] + quadVerY;
1607
0
      }
1608
0
      dMvH += blockWidth;
1609
0
      dMvV += blockWidth;
1610
0
    }
1611
1612
0
    const int mvShift  = 8;
1613
0
    const int dmvLimit = ( 1 << 5 ) - 1;
1614
1615
0
    if (!g_pelBufOP.roundIntVector)
1616
0
    {
1617
0
      for (int idx = 0; idx < blockWidth * blockHeight; idx++)
1618
0
      {
1619
0
        roundAffineMv(dMvScaleHor[idx], dMvScaleVer[idx], mvShift);
1620
0
        dMvScaleHor[idx] = Clip3(-dmvLimit, dmvLimit, dMvScaleHor[idx]);
1621
0
        dMvScaleVer[idx] = Clip3(-dmvLimit, dmvLimit, dMvScaleVer[idx]);
1622
0
      }
1623
0
    }
1624
0
    else
1625
0
    {
1626
0
      int sz = blockWidth * blockHeight;
1627
0
      g_pelBufOP.roundIntVector(dMvScaleHor, sz, mvShift, dmvLimit);
1628
0
      g_pelBufOP.roundIntVector(dMvScaleVer, sz, mvShift, dmvLimit);
1629
0
    }
1630
0
  }
1631
1632
0
  int scaleXLuma = getComponentScaleX(COMP_Y, chFmt);
1633
0
  int scaleYLuma = getComponentScaleY(COMP_Y, chFmt);
1634
0
  if( cu.mccNoLuma() && ( compID == COMP_Cb ) && cu.chromaFormat != CHROMA_444 )
1635
0
  {
1636
0
    CHECK(compID == COMP_Y, "Chroma only subblock MV calculation should not apply to Luma");
1637
0
    int lumaBlockWidth = AFFINE_MIN_BLOCK_SIZE;
1638
0
    int lumaBlockHeight = AFFINE_MIN_BLOCK_SIZE;
1639
1640
0
    CHECK(lumaBlockWidth > (width >> scaleXLuma), "Sub Block width  > Block width");
1641
0
    CHECK(lumaBlockHeight > (height >> scaleYLuma), "Sub Block height > Block height");
1642
1643
0
    const int cxWidthLuma = width >> scaleXLuma;
1644
0
    const int cxHeightLuma = height >> scaleYLuma;
1645
0
    const int halfBWLuma = lumaBlockWidth >> 1;
1646
0
    const int halfBHLuma = lumaBlockHeight >> 1;
1647
1648
0
    int dMvHorXLuma, dMvHorYLuma, dMvVerXLuma, dMvVerYLuma;
1649
0
    dMvHorXLuma = (mvRT - mvLT).hor * (1 << (iBit - floorLog2(cxWidthLuma)));
1650
0
    dMvHorYLuma = (mvRT - mvLT).ver * (1 <<  (iBit - floorLog2(cxWidthLuma)));
1651
0
    if (cu.affineType == AFFINEMODEL_6PARAM)
1652
0
    {
1653
0
      dMvVerXLuma = (mvLB - mvLT).hor * (1 << (iBit - floorLog2(cxHeightLuma)));
1654
0
      dMvVerYLuma = (mvLB - mvLT).ver * (1 << (iBit - floorLog2(cxHeightLuma)));
1655
0
    }
1656
0
    else
1657
0
    {
1658
0
      dMvVerXLuma = -dMvHorYLuma;
1659
0
      dMvVerYLuma = dMvHorXLuma;
1660
0
    }
1661
1662
0
    const bool subblkMVSpreadOverLimitLuma = isSubblockVectorSpreadOverLimit(dMvHorXLuma, dMvHorYLuma, dMvVerXLuma, dMvVerYLuma, cu.interDir);
1663
1664
    // get luma MV block by block
1665
0
    for (int h = 0; h < cxHeightLuma; h += lumaBlockHeight)
1666
0
    {
1667
0
      for (int w = 0; w < cxWidthLuma; w += lumaBlockWidth)
1668
0
      {
1669
0
        int mvScaleTmpHor, mvScaleTmpVer;
1670
0
        if (!subblkMVSpreadOverLimitLuma)
1671
0
        {
1672
0
          mvScaleTmpHor = iMvScaleHor + dMvHorXLuma * (halfBWLuma + w) + dMvVerXLuma * (halfBHLuma + h);
1673
0
          mvScaleTmpVer = iMvScaleVer + dMvHorYLuma * (halfBWLuma + w) + dMvVerYLuma * (halfBHLuma + h);
1674
0
        }
1675
0
        else
1676
0
        {
1677
0
          mvScaleTmpHor = iMvScaleHor + dMvHorXLuma * (cxWidthLuma >> 1) + dMvVerXLuma * (cxHeightLuma >> 1);
1678
0
          mvScaleTmpVer = iMvScaleVer + dMvHorYLuma * (cxWidthLuma >> 1) + dMvVerYLuma * (cxHeightLuma >> 1);
1679
0
        }
1680
1681
0
        roundAffineMv(mvScaleTmpHor, mvScaleTmpVer, shift);
1682
0
        Mv tmpMv(mvScaleTmpHor, mvScaleTmpVer);
1683
0
        tmpMv.clipToStorageBitDepth();
1684
0
        mvScaleTmpHor = tmpMv.hor;
1685
0
        mvScaleTmpVer = tmpMv.ver;
1686
1687
0
        m_storedMv[h / AFFINE_MIN_BLOCK_SIZE * MVBUFFER_SIZE + w / AFFINE_MIN_BLOCK_SIZE].set(mvScaleTmpHor, mvScaleTmpVer);
1688
0
      }
1689
0
    }
1690
0
  }
1691
  // get prediction block by block
1692
0
  const CPelBuf refBuf     = refPic->getRecoBuf(compID);
1693
1694
0
  const int puX = cu.blocks[compID].x;
1695
0
  const int puY = cu.blocks[compID].y;
1696
1697
0
  for (int h = 0; h < cxHeight; h += blockHeight)
1698
0
  {
1699
0
    for (int w = 0; w < cxWidth; w += blockWidth)
1700
0
    {
1701
0
      int iMvScaleTmpHor, iMvScaleTmpVer;
1702
0
      if (compID == COMP_Y || cu.chromaFormat == CHROMA_444)
1703
0
      {
1704
0
        if (!subblkMVSpreadOverLimit)
1705
0
        {
1706
0
          iMvScaleTmpHor = iMvScaleHor + iDMvHorX * (iHalfBW + w) + iDMvVerX * (iHalfBH + h);
1707
0
          iMvScaleTmpVer = iMvScaleVer + iDMvHorY * (iHalfBW + w) + iDMvVerY * (iHalfBH + h);
1708
0
        }
1709
0
        else
1710
0
        {
1711
0
          iMvScaleTmpHor = iMvScaleHor + iDMvHorX * (cxWidth >> 1) + iDMvVerX * (cxHeight >> 1);
1712
0
          iMvScaleTmpVer = iMvScaleVer + iDMvHorY * (cxWidth >> 1) + iDMvVerY * (cxHeight >> 1);
1713
0
        }
1714
1715
0
        roundAffineMv(iMvScaleTmpHor, iMvScaleTmpVer, shift);
1716
0
        Mv tmpMv(iMvScaleTmpHor, iMvScaleTmpVer);
1717
0
        tmpMv.clipToStorageBitDepth();
1718
0
        iMvScaleTmpHor = tmpMv.hor;
1719
0
        iMvScaleTmpVer = tmpMv.ver;
1720
1721
        // clip and scale
1722
0
        m_storedMv[h / AFFINE_MIN_BLOCK_SIZE * MVBUFFER_SIZE + w / AFFINE_MIN_BLOCK_SIZE].set(iMvScaleTmpHor, iMvScaleTmpVer);
1723
        //   if( scalingRatio == SCALE_1X ) 
1724
0
        {
1725
0
          iMvScaleTmpHor = std::min<int>(iHorMax, std::max<int>(iHorMin, iMvScaleTmpHor));
1726
0
          iMvScaleTmpVer = std::min<int>(iVerMax, std::max<int>(iVerMin, iMvScaleTmpVer));
1727
0
        }
1728
0
      }
1729
0
      else
1730
0
      {
1731
0
        Mv curMv = m_storedMv[((h << iScaleY) / AFFINE_MIN_BLOCK_SIZE) * MVBUFFER_SIZE + ((w << iScaleX) / AFFINE_MIN_BLOCK_SIZE)] +
1732
0
          m_storedMv[((h << iScaleY) / AFFINE_MIN_BLOCK_SIZE + iScaleY)* MVBUFFER_SIZE + ((w << iScaleX) / AFFINE_MIN_BLOCK_SIZE + iScaleX)];
1733
0
        roundAffineMv(curMv.hor, curMv.ver, 1);
1734
1735
0
        curMv.hor = std::min<int>(iHorMax, std::max<int>(iHorMin, curMv.hor));
1736
0
        curMv.ver = std::min<int>(iVerMax, std::max<int>(iVerMin, curMv.ver));
1737
1738
0
        iMvScaleTmpHor = curMv.hor;
1739
0
        iMvScaleTmpVer = curMv.ver;
1740
0
      }
1741
1742
0
      CHECKD( m_ifpLines && !CU::isMvInRangeFPP( puY + h, blockHeight, iMvScaleTmpVer, m_ifpLines, *pps.pcv, iScaleY ), "xPredAffineBlk: FPP MV restriction failed!\n" );
1743
      // get the MV in high precision
1744
0
      int xFrac, yFrac, xInt, yInt;
1745
1746
0
      if (!iScaleX)
1747
0
      {
1748
0
        xInt = iMvScaleTmpHor >> 4;
1749
0
        xFrac = iMvScaleTmpHor & 15;
1750
0
      }
1751
0
      else
1752
0
      {
1753
0
        xInt = iMvScaleTmpHor >> 5;
1754
0
        xFrac = iMvScaleTmpHor & 31;
1755
0
      }
1756
0
      if (!iScaleY)
1757
0
      {
1758
0
        yInt = iMvScaleTmpVer >> 4;
1759
0
        yFrac = iMvScaleTmpVer & 15;
1760
0
      }
1761
0
      else
1762
0
      {
1763
0
        yInt = iMvScaleTmpVer >> 5;
1764
0
        yFrac = iMvScaleTmpVer & 31;
1765
0
      }
1766
1767
0
      const Pel* ref = refBuf.buf;
1768
0
      ref           +=   puX + xInt + w;
1769
0
      ref           += ( puY + yInt + h ) * refBuf.stride;
1770
0
      Pel* dst       = dstBuf.buf + w + h * dstBuf.stride;
1771
1772
0
      int refStride = refBuf.stride;
1773
0
      int dstStride = dstBuf.stride;
1774
1775
0
      int bw = blockWidth;
1776
0
      int bh = blockHeight;
1777
1778
0
      if( enablePROF )
1779
0
      {
1780
0
        dst = dstExtBuf.bufAt(PROF_BORDER_EXT_W, PROF_BORDER_EXT_H);
1781
0
        dstStride = dstExtBuf.stride;
1782
0
      }
1783
1784
0
      if( xFrac && yFrac )
1785
0
      {
1786
0
        m_if.filter4x4( compID, ref, refStride, dst, dstStride, 4, 4, xFrac, yFrac, isLast, chFmt, clpRng );
1787
0
      }
1788
0
      else if( !yFrac )
1789
0
      {
1790
0
        m_if.filterHor( compID, ref, refStride, dst, dstStride, bw, bh, xFrac, isLast, chFmt, clpRng );
1791
0
      }
1792
0
      else if( xFrac == 0 )
1793
0
      {
1794
0
        m_if.filterVer( compID, ref, refStride, dst, dstStride, bw, bh, yFrac, true, isLast, chFmt, clpRng );
1795
0
      }
1796
      
1797
0
      if (enablePROF)
1798
0
      {
1799
0
        const unsigned shift = std::max<int>(2, (IF_INTERNAL_PREC - clpRng.bd));
1800
0
        const int xOffset = xFrac >> 3;
1801
0
        const int yOffset = yFrac >> 3;
1802
1803
0
        const int refOffset = (blockHeight + 1) * refStride;
1804
0
        const int dstOffset = (blockHeight + 1)* dstStride;
1805
1806
0
        const Pel* refPel = ref - (1 - yOffset) * refStride + xOffset - 1;
1807
0
        Pel* dstPel = dst - dstStride - 1;
1808
0
        for (int pw = 0; pw < blockWidth + 2; pw++)
1809
0
        {
1810
0
          dstPel[pw] = leftShiftU(refPel[pw], shift) - (Pel)IF_INTERNAL_OFFS;
1811
0
          dstPel[pw + dstOffset] = leftShiftU(refPel[pw + refOffset], shift) - (Pel)IF_INTERNAL_OFFS;
1812
0
        }
1813
1814
0
        refPel = ref + yOffset * refBuf.stride + xOffset;
1815
0
        dstPel = dst;
1816
0
        for (int ph = 0; ph < blockHeight; ph++, refPel += refStride, dstPel += dstStride)
1817
0
        {
1818
0
          dstPel[-1] = leftShiftU(refPel[-1], shift) - (Pel)IF_INTERNAL_OFFS;
1819
0
          dstPel[blockWidth] = leftShiftU(refPel[blockWidth], shift) - (Pel)IF_INTERNAL_OFFS;
1820
0
        }
1821
1822
0
        PelBuf gradXBuf = gradXExt.subBuf(0, 0, blockWidth + 2, blockHeight + 2);
1823
0
        PelBuf gradYBuf = gradYExt.subBuf(0, 0, blockWidth + 2, blockHeight + 2);
1824
1825
0
        xFpProfGradFilter(dstExtBuf.buf, dstExtBuf.stride, blockWidth + 2, blockHeight + 2, gradXBuf.stride, gradXBuf.buf, gradYBuf.buf, clpRng.bd);
1826
1827
0
        const int shiftNum = std::max<int>(2, (IF_INTERNAL_PREC - clpRng.bd));
1828
0
        const Pel offset = (1 << (shiftNum - 1)) + IF_INTERNAL_OFFS;
1829
0
        Pel* src = dstExtBuf.bufAt(PROF_BORDER_EXT_W, PROF_BORDER_EXT_H);
1830
0
        Pel* gX = gradXBuf.bufAt(PROF_BORDER_EXT_W, PROF_BORDER_EXT_H);
1831
0
        Pel* gY = gradYBuf.bufAt(PROF_BORDER_EXT_W, PROF_BORDER_EXT_H);
1832
1833
0
        Pel*  dstY = dstBuf.bufAt(w, h);
1834
1835
0
        xFpApplyPROF(dstY, dstBuf.stride, src, dstExtBuf.stride, blockWidth, blockHeight, gX, gY, gradXBuf.stride, dMvScaleHor, dMvScaleVer, blockWidth, bi, shiftNum, offset, clpRng);
1836
0
      }
1837
0
    }
1838
0
  }
1839
0
}
1840
1841
bool InterPredInterpolation::xIsAffineMvInRangeFPP( const CodingUnit &cu, const Mv* _mv, const int ifpLines, const int mvPrecShift )
1842
0
{
1843
0
  const PreCalcValues& pcv = *cu.cs->pcv;
1844
0
  if( cu.ly() >= ( ( pcv.heightInCtus - 1 - ifpLines ) << pcv.maxCUSizeLog2 ) )
1845
0
    return true;
1846
1847
0
  const ChromaFormat chFmt = cu.chromaFormat;
1848
0
  const int width       = cu.Y().width;
1849
0
  const int height      = cu.Y().height;
1850
0
  const int nBW  = AFFINE_MIN_BLOCK_SIZE;
1851
0
  const int nBH = AFFINE_MIN_BLOCK_SIZE;
1852
0
  const int iHalfBW     = nBW >> 1;
1853
0
  const int iHalfBH     = nBH >> 1;
1854
0
  const int iBit        = MAX_CU_DEPTH;
1855
0
  const int shift       = iBit - 4 + MV_FRACTIONAL_BITS_INTERNAL;
1856
0
  Mv mvLT = _mv[0];
1857
0
  Mv mvRT = _mv[1];
1858
0
  Mv mvLB = _mv[2];
1859
1860
0
  int iDMvHorX = 0;
1861
0
  int iDMvHorY = 0;
1862
0
  int iDMvVerX = 0;
1863
0
  int iDMvVerY = 0;
1864
1865
0
  const int iMvScaleVer = mvLT.ver * (1 << iBit);
1866
1867
0
  int iScaleX = getChannelTypeScaleX(CH_C, chFmt);
1868
0
  int iScaleY = getChannelTypeScaleY(CH_C, chFmt);
1869
0
  const int cxWidth  = width;
1870
0
  const int cxHeight = height;
1871
1872
0
  iDMvHorX = (mvRT - mvLT).hor * (1 << (iBit - Log2(cxWidth)));
1873
0
  iDMvHorY = (mvRT - mvLT).ver * (1 <<(iBit - Log2(cxWidth)));
1874
0
  if (cu.affineType == AFFINEMODEL_6PARAM)
1875
0
  {
1876
0
    iDMvVerX = (mvLB - mvLT).hor * (1 <<(iBit - Log2(cxHeight)));
1877
0
    iDMvVerY = (mvLB - mvLT).ver * (1 <<(iBit - Log2(cxHeight)));
1878
0
  }
1879
0
  else
1880
0
  {
1881
0
    iDMvVerX = -iDMvHorY;
1882
0
    iDMvVerY = iDMvHorX;
1883
0
  }
1884
0
  const bool subblkMVSpreadOverLimit = InterPrediction::isSubblockVectorSpreadOverLimit(iDMvHorX, iDMvHorY, iDMvVerX, iDMvVerY, cu.interDir);
1885
1886
0
  const int yRefMax     = ( ( ( cu.ly() >> pcv.maxCUSizeLog2 ) + ifpLines + 1 ) << pcv.maxCUSizeLog2 ) - 1;
1887
0
  const int dctifMarginVerBot = 4;
1888
1889
0
  auto roundMvVal = [&](int mvVal, int shift)
1890
0
  {
1891
0
    const int nOffset = 1 << (shift - 1);
1892
0
    mvVal = (mvVal + nOffset - (mvVal >= 0)) >> shift;
1893
0
    return mvVal;
1894
0
  };
1895
0
  auto calcAffineMv = [&](int w, int h)
1896
0
  {
1897
0
    int iMvScaleTmpVer;
1898
0
    if(!subblkMVSpreadOverLimit)
1899
0
    {
1900
0
      iMvScaleTmpVer = iMvScaleVer + iDMvHorY * (iHalfBW + w) + iDMvVerY * (iHalfBH + h);
1901
0
    }
1902
0
    else
1903
0
    {
1904
0
      iMvScaleTmpVer = iMvScaleVer + iDMvHorY * (cxWidth >> 1) + iDMvVerY * (cxHeight >> 1);
1905
0
    }
1906
0
    iMvScaleTmpVer = roundMvVal(iMvScaleTmpVer, shift);
1907
0
    return iMvScaleTmpVer;
1908
0
  };
1909
1910
0
  auto checkMvLineSync = [&](int yMv, int blkBot, const int scaleVer = 0 )
1911
0
  {
1912
0
    if( ( ( blkBot ) + (yMv >> ( mvPrecShift + scaleVer) ) > ( yRefMax >> scaleVer ) ) )
1913
0
      return false;
1914
0
    return true;
1915
0
  };
1916
1917
0
  const int filterMargin = dctifMarginVerBot - 1;
1918
0
  int x = cu.lx();
1919
0
  int y = cu.ly();
1920
1921
  // luma
1922
0
  Position off00 (0, 0);
1923
0
  Position blk00 (x + off00.x, y + off00.y);
1924
0
  int mvVer00 = calcAffineMv(off00.x, off00.y);
1925
0
  if( !checkMvLineSync( mvVer00, blk00.y + filterMargin + nBH ) ) 
1926
0
    return false;
1927
1928
0
  Position off01 (width - nBW, 0);
1929
0
  Position blk01 (x + off01.x, y + off01.y);
1930
0
  int mvVer01 = calcAffineMv(off01.x, off01.y);
1931
0
  if( !checkMvLineSync( mvVer01, blk00.y + filterMargin + nBH ) ) 
1932
0
    return false;
1933
1934
0
  Position off10 (0, height - nBH);
1935
0
  Position blk10 (x + off10.x, y + off10.y);
1936
0
  int mvVer10 = calcAffineMv(off10.x, off10.y);
1937
0
  if( !checkMvLineSync( mvVer10, blk10.y + filterMargin + nBH  ) ) 
1938
0
    return false;
1939
1940
0
  Position off11 (width - nBW, height - nBH);
1941
0
  Position blk11 (x + off11.x, y + off11.y);
1942
0
  int mvVer11 = calcAffineMv(off11.x, off11.y);
1943
0
  if( !checkMvLineSync( mvVer11, blk10.y + filterMargin + nBH  ) ) 
1944
0
    return false;
1945
1946
  // chroma
1947
0
  if( cu.lwidth() == 8 && cu.lheight() == 8 )
1948
0
  {
1949
0
    if(iScaleX || iScaleY)
1950
0
    {
1951
0
      if(iScaleY)
1952
0
      {
1953
0
        const int blkBot = ((blk00.y + filterMargin) >> iScaleY) + nBH;
1954
0
        if(!checkMvLineSync( roundMvVal( mvVer00 + mvVer11, 1 ), blkBot, iScaleY ))
1955
0
          return false;
1956
0
      }
1957
0
      else
1958
0
      {
1959
0
        if(!checkMvLineSync( roundMvVal( mvVer00 + mvVer01, 1 ), blk00.y + filterMargin + nBH, iScaleY ))
1960
0
          return false;
1961
0
        if(!checkMvLineSync( roundMvVal( mvVer10 + mvVer11, 1 ), blk10.y + filterMargin + nBH, iScaleY ))
1962
0
          return false;
1963
0
      }
1964
0
    }
1965
0
  }
1966
0
  else
1967
0
  {
1968
0
    if(iScaleX || iScaleY)
1969
0
    {
1970
0
      int blkBot = ((blk00.y + filterMargin) >> iScaleY) + nBH;
1971
0
      int mvVer00_ = calcAffineMv(off00.x + nBW, off00.y + (iScaleY ? nBH: 0) );
1972
0
      if(!checkMvLineSync( roundMvVal( mvVer00 + mvVer00_, 1 ), blkBot, iScaleY ))
1973
0
        return false;
1974
1975
0
      int _mvVer01 =           calcAffineMv( off01.x - nBW, off01.y       );
1976
0
      int mvVer01_ = iScaleY ? calcAffineMv( off01.x      , off01.y + nBH ): mvVer01;
1977
0
      if(!checkMvLineSync( roundMvVal( _mvVer01 + mvVer01_, 1 ), blkBot, iScaleY ))
1978
0
        return false;
1979
1980
0
      blkBot = ((blk10.y - (iScaleY ? nBH: 0) + filterMargin) >> iScaleY) + nBH;
1981
0
      int _mvVer10 = iScaleY ? calcAffineMv( off10.x      , off10.y - nBH ): mvVer10;
1982
0
      int mvVer10_ =           calcAffineMv( off10.x + nBW, off10.y       );
1983
0
      if(!checkMvLineSync( roundMvVal( _mvVer10 + mvVer10_, 1 ), blkBot, iScaleY ))
1984
0
        return false;
1985
1986
0
      int _mvVer11 = calcAffineMv( off11.x - nBW, off11.y - (iScaleY ? nBH: 0) );
1987
0
      if(!checkMvLineSync( roundMvVal( _mvVer11 + mvVer11, 1 ), blkBot, iScaleY ))
1988
0
        return false;
1989
0
    }
1990
0
  }
1991
1992
0
  return true;
1993
0
}
1994
1995
void InterPrediction::xFillIBCBuffer(CodingUnit& cu)
1996
0
{
1997
0
  for (auto& currPU : CU::traverseTUs(cu))
1998
0
  {
1999
0
    for (const CompArea& area : currPU.blocks)
2000
0
    {
2001
0
      if (!area.valid())
2002
0
      {
2003
0
        continue;
2004
0
      }
2005
0
      const unsigned int lcuWidth = cu.cs->slice->sps->CTUSize;
2006
0
      const int shiftSampleHor = getComponentScaleX(area.compID, cu.chromaFormat);
2007
0
      const int shiftSampleVer = getComponentScaleY(area.compID, cu.chromaFormat);
2008
0
      const int ctuSizeLog2Ver = floorLog2(lcuWidth) - shiftSampleVer;
2009
0
      const int pux = area.x & ((m_IBCBufferWidth >> shiftSampleHor) - 1);
2010
0
      const int puy = area.y & ((1 << ctuSizeLog2Ver) - 1);
2011
0
      const CompArea dstArea = CompArea(area.compID, cu.chromaFormat, Position(pux, puy), Size(area.width, area.height));
2012
0
      CPelBuf srcBuf = cu.cs->getRecoBuf(area);
2013
0
      PelBuf dstBuf = m_IBCBuffer.getBuf(dstArea);
2014
2015
0
      dstBuf.copyFrom(srcBuf);
2016
0
    }
2017
0
  }
2018
0
}
2019
2020
void InterPrediction::xIntraBlockCopyIBC(CodingUnit& cu, PelUnitBuf& predBuf, const ComponentID compID)
2021
0
{
2022
0
  const unsigned int lcuWidth = cu.cs->slice->sps->CTUSize;
2023
0
  const int shiftSampleHor = getComponentScaleX(compID, cu.chromaFormat);
2024
0
  const int shiftSampleVer = getComponentScaleY(compID, cu.chromaFormat);
2025
0
  const int ctuSizeLog2Ver = floorLog2(lcuWidth) - shiftSampleVer;
2026
0
  Mv bv = cu.mv[REF_PIC_LIST_0][0];
2027
0
  bv.changePrecision( MV_PRECISION_INTERNAL, MV_PRECISION_INT );
2028
0
  int refx, refy;
2029
0
  if (compID == COMP_Y)
2030
0
  {
2031
0
    refx = cu.Y().x + bv.hor;
2032
0
    refy = cu.Y().y + bv.ver;
2033
0
  }
2034
0
  else
2035
0
  {//Cb or Cr
2036
0
    refx = cu.Cb().x + (bv.hor >> shiftSampleHor);
2037
0
    refy = cu.Cb().y + (bv.ver >> shiftSampleVer);
2038
0
  }
2039
0
  refx &= ((m_IBCBufferWidth >> shiftSampleHor) - 1);
2040
0
  refy &= ((1 << ctuSizeLog2Ver) - 1);
2041
2042
0
  if (refx + predBuf.bufs[compID].width <= (m_IBCBufferWidth >> shiftSampleHor))
2043
0
  {
2044
0
    const CompArea srcArea = CompArea(compID, cu.chromaFormat, Position(refx, refy), Size(predBuf.bufs[compID].width, predBuf.bufs[compID].height));
2045
0
    const CPelBuf refBuf = m_IBCBuffer.getBuf(srcArea);
2046
0
    predBuf.bufs[compID].copyFrom(refBuf);
2047
0
  }
2048
0
  else
2049
0
  {//wrap around
2050
0
    int width = (m_IBCBufferWidth >> shiftSampleHor) - refx;
2051
0
    CompArea srcArea = CompArea(compID, cu.chromaFormat, Position(refx, refy), Size(width, predBuf.bufs[compID].height));
2052
0
    CPelBuf srcBuf = m_IBCBuffer.getBuf(srcArea);
2053
0
    PelBuf dstBuf = PelBuf(predBuf.bufs[compID].bufAt(Position(0, 0)), predBuf.bufs[compID].stride, Size(width, predBuf.bufs[compID].height));
2054
0
    dstBuf.copyFrom(srcBuf);
2055
2056
0
    width = refx + predBuf.bufs[compID].width - (m_IBCBufferWidth >> shiftSampleHor);
2057
0
    srcArea = CompArea(compID, cu.chromaFormat, Position(0, refy), Size(width, predBuf.bufs[compID].height));
2058
0
    srcBuf = m_IBCBuffer.getBuf(srcArea);
2059
0
    dstBuf = PelBuf(predBuf.bufs[compID].bufAt(Position((m_IBCBufferWidth >> shiftSampleHor) - refx, 0)), predBuf.bufs[compID].stride, Size(width, predBuf.bufs[compID].height));
2060
0
    dstBuf.copyFrom(srcBuf);
2061
0
  }
2062
0
}
2063
2064
void InterPrediction::resetIBCBuffer(const ChromaFormat chromaFormatIDC, const int ctuSize)
2065
0
{
2066
0
  const UnitArea area = UnitArea(chromaFormatIDC, Area(0, 0, m_IBCBufferWidth, ctuSize));
2067
0
  m_IBCBuffer.getBuf(area).fill(-1);
2068
0
}
2069
2070
void InterPrediction::resetVPDUforIBC(const ChromaFormat chromaFormatIDC, const int ctuSize, const int vSize, const int xPos, const int yPos)
2071
0
{
2072
0
  const UnitArea area = UnitArea(chromaFormatIDC, Area(xPos & (m_IBCBufferWidth - 1), yPos & (ctuSize - 1), vSize, vSize));
2073
0
  m_IBCBuffer.getBuf(area).fill(-1);
2074
0
}
2075
bool InterPrediction::isLumaBvValidIBC(const int ctuSize, const int xCb, const int yCb, const int width, const int height, const int xBv, const int yBv)
2076
0
{
2077
0
  if (((yCb + yBv) & (ctuSize - 1)) + height > ctuSize)
2078
0
  {
2079
0
    return false;
2080
0
  }
2081
0
  int refTLx = xCb + xBv;
2082
0
  int refTLy = (yCb + yBv) & (ctuSize - 1);
2083
0
  PelBuf buf = m_IBCBuffer.Y();
2084
0
  for (int x = 0; x < width; x += 4)
2085
0
  {
2086
0
    for (int y = 0; y < height; y += 4)
2087
0
    {
2088
0
      if (buf.at((x + refTLx) & (m_IBCBufferWidth - 1), y + refTLy) == -1) return false;
2089
0
      if (buf.at((x + 3 + refTLx) & (m_IBCBufferWidth - 1), y + refTLy) == -1) return false;
2090
0
      if (buf.at((x + refTLx) & (m_IBCBufferWidth - 1), y + 3 + refTLy) == -1) return false;
2091
0
      if (buf.at((x + 3 + refTLx) & (m_IBCBufferWidth - 1), y + 3 + refTLy) == -1) return false;
2092
0
    }
2093
0
  }
2094
0
  return true;
2095
0
}
2096
2097
} // namespace vvenc
2098
2099
//! \}
2100