Coverage Report

Created: 2026-04-01 07:49

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/vvdec/source/Lib/CommonLib/InterPrediction.cpp
Line
Count
Source
1
/* -----------------------------------------------------------------------------
2
The copyright in this software is being made available under the Clear BSD
3
License, included below. No patent rights, trademark rights and/or 
4
other Intellectual Property Rights other than the copyrights concerning 
5
the Software are granted under this license.
6
7
The Clear BSD License
8
9
Copyright (c) 2018-2026, Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. & The VVdeC Authors.
10
All rights reserved.
11
12
Redistribution and use in source and binary forms, with or without modification,
13
are permitted (subject to the limitations in the disclaimer below) provided that
14
the following conditions are met:
15
16
     * Redistributions of source code must retain the above copyright notice,
17
     this list of conditions and the following disclaimer.
18
19
     * Redistributions in binary form must reproduce the above copyright
20
     notice, this list of conditions and the following disclaimer in the
21
     documentation and/or other materials provided with the distribution.
22
23
     * Neither the name of the copyright holder nor the names of its
24
     contributors may be used to endorse or promote products derived from this
25
     software without specific prior written permission.
26
27
NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
28
THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
29
CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
31
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
32
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
33
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
34
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
35
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
36
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38
POSSIBILITY OF SUCH DAMAGE.
39
40
41
------------------------------------------------------------------------------------------- */
42
43
/** \file     Prediction.cpp
44
    \brief    prediction class
45
*/
46
47
#include "InterPrediction.h"
48
49
#include "Buffer.h"
50
#include "UnitTools.h"
51
#include "CommonLib/TimeProfiler.h"
52
53
#include <memory.h>
54
#include <algorithm>
55
#include <cmath>
56
57
namespace vvdec
58
{
59
60
template<bool bi>
61
void applyPROFCore( Pel* dst, ptrdiff_t dstStride, const Pel* src, const Pel* gradX, const Pel* gradY, const int* dMvX, const int* dMvY, int shiftNum, Pel offset, const ClpRng& clpRng )
62
0
{
63
0
  static constexpr ptrdiff_t srcStride = 6;
64
0
  static constexpr int width = 4;
65
0
  static constexpr int height = 4;
66
67
0
  int idx = 0;
68
0
  const int dILimit = 1 << std::max<int>(clpRng.bd + 1, 13);
69
70
0
  for (int h = 0; h < height; h++)
71
0
  {
72
0
    for (int w = 0; w < width; w++)
73
0
    {
74
0
      int32_t dI = dMvX[idx] * gradX[w] + dMvY[idx] * gradY[w];
75
0
      dI = Clip3(-dILimit, dILimit - 1, dI);
76
0
      dst[w] = src[w] + dI;
77
0
      if (!bi)
78
0
      {
79
0
        dst[w] = (dst[w] + offset) >> shiftNum;
80
0
        dst[w] = ClipPel(dst[w], clpRng);
81
0
      }
82
0
      idx++;
83
0
    }
84
0
    gradX += 4;
85
0
    gradY += 4;
86
0
    dst += dstStride;
87
0
    src += srcStride;
88
0
  }
89
0
}
Unexecuted instantiation: void vvdec::applyPROFCore<false>(short*, long, short const*, short const*, short const*, int const*, int const*, int, short, vvdec::ClpRngTemplate<short> const&)
Unexecuted instantiation: void vvdec::applyPROFCore<true>(short*, long, short const*, short const*, short const*, int const*, int const*, int, short, vvdec::ClpRngTemplate<short> const&)
90
91
92
static inline int rightShiftMSB(int numer, int denom)
93
0
{
94
0
  int     d;
95
0
  int msbIdx = 0;
96
0
  for (msbIdx = 0; msbIdx<32; msbIdx++)
97
0
  {
98
0
    if (denom < ((int)1 << msbIdx))
99
0
    {
100
0
      break;
101
0
    }
102
0
  }
103
0
  int shiftIdx = msbIdx - 1;
104
0
  d = (numer >> shiftIdx);
105
106
0
  return d;
107
0
}
108
109
static inline void addBIOAvg4(const Pel* src0, ptrdiff_t src0Stride, const Pel* src1, ptrdiff_t src1Stride, Pel *dst, ptrdiff_t dstStride, const Pel *gradX0, const Pel *gradX1, const Pel *gradY0, const Pel*gradY1, ptrdiff_t gradStride, int width, int height, int tmpx, int tmpy, int shift, int offset, const ClpRng& clpRng)
110
0
{
111
0
  int b = 0;
112
113
0
  for (int y = 0; y < height; y++)
114
0
  {
115
0
    for (int x = 0; x < width; x += 4)
116
0
    {
117
0
      b = tmpx * (gradX0[x] - gradX1[x]) + tmpy * (gradY0[x] - gradY1[x]);
118
0
      dst[x] = ClipPel((int16_t)rightShift((src0[x] + src1[x] + b + offset), shift), clpRng);
119
120
0
      b = tmpx * (gradX0[x + 1] - gradX1[x + 1]) + tmpy * (gradY0[x + 1] - gradY1[x + 1]);
121
0
      dst[x + 1] = ClipPel((int16_t)rightShift((src0[x + 1] + src1[x + 1] + b + offset), shift), clpRng);
122
123
0
      b = tmpx * (gradX0[x + 2] - gradX1[x + 2]) + tmpy * (gradY0[x + 2] - gradY1[x + 2]);
124
0
      dst[x + 2] = ClipPel((int16_t)rightShift((src0[x + 2] + src1[x + 2] + b + offset), shift), clpRng);
125
126
0
      b = tmpx * (gradX0[x + 3] - gradX1[x + 3]) + tmpy * (gradY0[x + 3] - gradY1[x + 3]);
127
0
      dst[x + 3] = ClipPel((int16_t)rightShift((src0[x + 3] + src1[x + 3] + b + offset), shift), clpRng);
128
0
    }
129
0
    dst += dstStride;       src0 += src0Stride;     src1 += src1Stride;
130
0
    gradX0 += gradStride; gradX1 += gradStride; gradY0 += gradStride; gradY1 += gradStride;
131
0
  }
132
0
}
133
134
void calcBIOSums(const Pel* srcY0Tmp, const Pel* srcY1Tmp, const Pel* gradX0, const Pel* gradX1, const Pel* gradY0, const Pel* gradY1, int xu, int yu, const int src0Stride, const int src1Stride, const int widthG, const int bitDepth, int* sumAbsGX, int* sumAbsGY, int* sumDIX, int* sumDIY, int* sumSignGY_GX)
135
0
{
136
0
  int shift4 = 4;
137
0
  int shift5 = 1;
138
139
0
  for (int y = 0; y < 6; y++)
140
0
  {
141
0
    for (int x = 0; x < 6; x++)
142
0
    {
143
0
      int tmpGX = (gradX0[x] + gradX1[x]) >> shift5;
144
0
      int tmpGY = (gradY0[x] + gradY1[x]) >> shift5;
145
0
      int tmpDI = (int)((srcY1Tmp[x] >> shift4) - (srcY0Tmp[x] >> shift4));
146
0
      *sumAbsGX += (tmpGX < 0 ? -tmpGX : tmpGX);
147
0
      *sumAbsGY += (tmpGY < 0 ? -tmpGY : tmpGY);
148
0
      *sumDIX += (tmpGX < 0 ? -tmpDI : (tmpGX == 0 ? 0 : tmpDI));
149
0
      *sumDIY += (tmpGY < 0 ? -tmpDI : (tmpGY == 0 ? 0 : tmpDI));
150
0
      *sumSignGY_GX += (tmpGY < 0 ? -tmpGX : (tmpGY == 0 ? 0 : tmpGX));
151
152
0
    }
153
0
    srcY1Tmp += src1Stride;
154
0
    srcY0Tmp += src0Stride;
155
0
    gradX0 += widthG;
156
0
    gradX1 += widthG;
157
0
    gradY0 += widthG;
158
0
    gradY1 += widthG;
159
0
  }
160
0
}
161
162
static void BiOptFlowCore(const Pel* srcY0,const Pel* srcY1,const Pel* gradX0,const Pel* gradX1,const Pel* gradY0,const Pel* gradY1,const int width,const int height,Pel* dstY,const ptrdiff_t dstStride,const int shiftNum,const int  offset,const int  limit, const ClpRng& clpRng, const int bitDepth)
163
0
{
164
0
  int           widthG  = width      + BIO_ALIGN_SIZE;
165
0
  int           stridePredMC = width + BIO_ALIGN_SIZE;
166
0
  int           offsetPos = widthG*BIO_EXTEND_SIZE + BIO_EXTEND_SIZE;
167
0
  int xUnit = (width >> 2);
168
0
  int yUnit = (height >> 2);
169
170
0
  const Pel*    srcY0Temp;
171
0
  const Pel*    srcY1Temp;
172
0
  int OffPos;
173
0
  Pel *dstY0;
174
0
  for (int yu = 0; yu < yUnit; yu++)
175
0
  {
176
0
    for (int xu = 0; xu < xUnit; xu++)
177
0
    {
178
0
      OffPos=offsetPos + ((yu*widthG + xu) << 2);
179
180
0
      {
181
0
        int tmpx = 0, tmpy = 0;
182
0
        int sumAbsGX = 0, sumAbsGY = 0, sumDIX = 0, sumDIY = 0;
183
0
        int sumSignGY_GX = 0;
184
185
0
        const Pel* pGradX0Tmp = gradX0 + (xu << 2) + (yu << 2) * widthG;
186
0
        const Pel* pGradX1Tmp = gradX1 + (xu << 2) + (yu << 2) * widthG;
187
0
        const Pel* pGradY0Tmp = gradY0 + (xu << 2) + (yu << 2) * widthG;
188
0
        const Pel* pGradY1Tmp = gradY1 + (xu << 2) + (yu << 2) * widthG;
189
0
        const Pel* SrcY1Tmp = srcY1 + (xu << 2) + (yu << 2) * stridePredMC;
190
0
        const Pel* SrcY0Tmp = srcY0 + (xu << 2) + (yu << 2) * stridePredMC;
191
192
0
        calcBIOSums(SrcY0Tmp, SrcY1Tmp, pGradX0Tmp, pGradX1Tmp, pGradY0Tmp, pGradY1Tmp, xu, yu, stridePredMC, stridePredMC, widthG, bitDepth, &sumAbsGX, &sumAbsGY, &sumDIX, &sumDIY, &sumSignGY_GX);
193
0
        tmpx = (sumAbsGX == 0 ? 0 : rightShiftMSB(sumDIX *4, sumAbsGX));
194
0
        tmpx = Clip3(-limit, limit, tmpx);
195
0
        int     mainsGxGy = sumSignGY_GX >> 12;
196
0
        int     secsGxGy = sumSignGY_GX & ((1 << 12) - 1);
197
0
        int     tmpData = tmpx * mainsGxGy;
198
0
        tmpData = ((tmpData *(1<< 12)) + tmpx*secsGxGy) >> 1;
199
0
        tmpy = (sumAbsGY == 0 ? 0 : rightShiftMSB(((sumDIY *4) - tmpData), sumAbsGY));
200
0
        tmpy = Clip3(-limit, limit, tmpy);
201
0
        srcY0Temp = srcY0 + (stridePredMC + 1) + ((yu*stridePredMC + xu) << 2);
202
0
        srcY1Temp = srcY1 + (stridePredMC + 1) + ((yu*stridePredMC + xu) << 2);
203
0
        dstY0 = dstY + ((yu*dstStride + xu) << 2);
204
0
        addBIOAvg4(srcY0Temp, stridePredMC, srcY1Temp, stridePredMC, dstY0, dstStride, gradX0 + OffPos, gradX1 + OffPos, gradY0 + OffPos, gradY1 + OffPos, widthG, (1 << 2), (1 << 2), (int)tmpx, (int)tmpy, shiftNum, offset, clpRng);
205
0
      }
206
0
    }  // xu
207
0
  }  // yu
208
209
210
0
}
211
212
template<bool PAD = true>
213
void gradFilterCore(Pel* pSrc, ptrdiff_t srcStride, int width, int height, ptrdiff_t gradStride, Pel* gradX, Pel* gradY, const int bitDepth)
214
0
{
215
0
  Pel* srcTmp   = PAD ? pSrc  + srcStride  + 1 : pSrc;
216
0
  Pel* gradXTmp = PAD ? gradX + gradStride + 1 : gradX;
217
0
  Pel* gradYTmp = PAD ? gradY + gradStride + 1 : gradY;
218
0
  int  shift1 = 6;
219
  
220
0
  const int widthInside  = PAD ? width  - 2 * BIO_EXTEND_SIZE : width;
221
0
  const int heightInside = PAD ? height - 2 * BIO_EXTEND_SIZE : height;
222
223
0
  for (int y = 0; y < heightInside; y++)
224
0
  {
225
0
    for (int x = 0; x < widthInside; x++)
226
0
    {
227
0
      gradYTmp[x] = ( srcTmp[x + srcStride] >> shift1 ) - ( srcTmp[x - srcStride] >> shift1 );
228
0
      gradXTmp[x] = ( srcTmp[x + 1] >> shift1 ) - ( srcTmp[x - 1] >> shift1 );
229
0
    }
230
0
    gradXTmp += gradStride;
231
0
    gradYTmp += gradStride;
232
0
    srcTmp += srcStride;
233
0
  }
234
235
0
  if (PAD)
236
0
  {
237
0
    gradXTmp = gradX + gradStride + 1;
238
0
    gradYTmp = gradY + gradStride + 1;
239
0
    srcTmp   = pSrc  + srcStride  + 1;
240
241
0
    for (int y = 0; y < heightInside; y++)
242
0
    {
243
0
      gradXTmp[-1] = gradXTmp[0];
244
0
      gradXTmp[width - 2 * BIO_EXTEND_SIZE] = gradXTmp[width - 2 * BIO_EXTEND_SIZE - 1];
245
0
      gradXTmp += gradStride;
246
247
0
      gradYTmp[-1] = gradYTmp[0];
248
0
      gradYTmp[width - 2 * BIO_EXTEND_SIZE] = gradYTmp[width - 2 * BIO_EXTEND_SIZE - 1];
249
0
      gradYTmp += gradStride;
250
      
251
0
      srcTmp[-1] = srcTmp[0];
252
0
      srcTmp[width - 2 * BIO_EXTEND_SIZE] = srcTmp[width - 2 * BIO_EXTEND_SIZE - 1];
253
0
      srcTmp += srcStride;
254
0
    }
255
256
0
    gradXTmp = gradX + gradStride;
257
0
    gradYTmp = gradY + gradStride;
258
0
    srcTmp   = pSrc  + srcStride;
259
260
0
    ::memcpy(gradXTmp - gradStride, gradXTmp, sizeof(Pel)*(width));
261
0
    ::memcpy(gradXTmp + (height - 2 * BIO_EXTEND_SIZE)*gradStride, gradXTmp + (height - 2 * BIO_EXTEND_SIZE - 1)*gradStride, sizeof(Pel)*(width));
262
0
    ::memcpy(gradYTmp - gradStride, gradYTmp, sizeof(Pel)*(width));
263
0
    ::memcpy(gradYTmp + (height - 2 * BIO_EXTEND_SIZE)*gradStride, gradYTmp + (height - 2 * BIO_EXTEND_SIZE - 1)*gradStride, sizeof(Pel)*(width));
264
0
    ::memcpy(srcTmp   - srcStride , srcTmp, sizeof(Pel)*(width));
265
0
    ::memcpy(srcTmp   + (height - 2 * BIO_EXTEND_SIZE)*srcStride , srcTmp   + (height - 2 * BIO_EXTEND_SIZE - 1)*srcStride , sizeof(Pel)*(width));
266
0
  }
267
0
}
Unexecuted instantiation: void vvdec::gradFilterCore<true>(short*, long, int, int, long, short*, short*, int)
Unexecuted instantiation: void vvdec::gradFilterCore<false>(short*, long, int, int, long, short*, short*, int)
268
269
void PaddBIOCore(const Pel* refPel,Pel* dstPel,unsigned width,const int shift)
270
0
{
271
0
#define LFTSHFT(y,shift) y<<shift           // simplification because shift is never < 0
272
273
0
  for( int w = 0; w < width + 2 * BIO_EXTEND_SIZE; w++ )
274
0
  {
275
0
    Pel val   = LFTSHFT( refPel[w], shift );
276
0
    dstPel[w] = val - (Pel)IF_INTERNAL_OFFS;
277
0
  }
278
279
280
0
}
281
282
template<int padSize>
283
void paddingCore(Pel *ptr, ptrdiff_t stride, int width, int height)
284
0
{
285
  /*left and right padding*/
286
0
  Pel *ptrTemp1 = ptr;
287
0
  Pel *ptrTemp2 = ptr + (width - 1);
288
0
  ptrdiff_t offset = 0;
289
0
  for (int i = 0; i < height; i++)
290
0
  {
291
0
    offset = stride * i;
292
0
    for (int j = 1; j <= padSize; j++)
293
0
    {
294
0
      *(ptrTemp1 - j + offset) = *(ptrTemp1 + offset);
295
0
      *(ptrTemp2 + j + offset) = *(ptrTemp2 + offset);
296
0
    }
297
0
  }
298
  /*Top and Bottom padding*/
299
0
  int numBytes = (width + padSize + padSize) * sizeof(Pel);
300
0
  ptrTemp1 = (ptr - padSize);
301
0
  ptrTemp2 = (ptr + (stride * (height - 1)) - padSize);
302
0
  for (int i = 1; i <= padSize; i++)
303
0
  {
304
0
    memcpy(ptrTemp1 - (i * stride), (ptrTemp1), numBytes);
305
0
    memcpy(ptrTemp2 + (i * stride), (ptrTemp2), numBytes);
306
0
  }
307
0
}
Unexecuted instantiation: void vvdec::paddingCore<2>(short*, long, int, int)
Unexecuted instantiation: void vvdec::paddingCore<1>(short*, long, int, int)
308
309
template<int padSize>
310
void prefetchPadCore( const Pel* src, const ptrdiff_t srcStride, Pel* dst, const ptrdiff_t dstStride, int width, int height )
311
0
{
312
0
  g_pelBufOP.copyBuffer( ( const char* ) src, srcStride * sizeof( Pel ), ( char* ) dst, dstStride * sizeof( Pel ), width * sizeof( Pel ), height );
313
314
0
  paddingCore<padSize>( dst, dstStride, width, height );
315
0
}
Unexecuted instantiation: void vvdec::prefetchPadCore<2>(short const*, long, short*, long, int, int)
Unexecuted instantiation: void vvdec::prefetchPadCore<1>(short const*, long, short*, long, int, int)
316
317
// ====================================================================================================================
318
// Constructor / destructor / initialize
319
// ====================================================================================================================
320
321
322
InterPrediction::InterPrediction()
323
0
  : BioGradFilter ( gradFilterCore )
324
0
  , profGradFilter( gradFilterCore<false> )
325
0
  , BiOptFlow     ( BiOptFlowCore )
326
0
  , roundIntVector( nullptr )
327
0
{
328
0
  clipMv = clipMvInPic;
329
330
0
  m_currChromaFormat = NUM_CHROMA_FORMAT;
331
0
}
332
333
InterPrediction::~InterPrediction()
334
0
{
335
0
  destroy();
336
0
}
337
338
void InterPrediction::destroy()
339
0
{
340
0
  m_IBCBuffer.destroy();
341
0
}
342
343
void InterPrediction::init( RdCost* pcRdCost, ChromaFormat chromaFormatIDC, const int ctuSize, bool enableOpt )
344
0
{
345
0
  m_pcRdCost = pcRdCost;
346
347
  // if it has been initialised before, but the chroma format has changed, release the memory and start again.
348
0
  if( m_currChromaFormat != chromaFormatIDC )
349
0
  {
350
0
    destroy();
351
0
    m_currChromaFormat = NUM_CHROMA_FORMAT;
352
0
  }
353
354
0
  if( m_currChromaFormat == NUM_CHROMA_FORMAT ) // check if first is null (in which case, nothing initialised yet)
355
0
  {
356
0
    VALGRIND_MEMCLEAR( m_bdofBlock );
357
0
    VALGRIND_MEMCLEAR( m_tmpBlock  );
358
359
0
    m_iRefListIdx = -1;
360
361
0
    VALGRIND_MEMCLEAR( m_gradX0 );
362
0
    VALGRIND_MEMCLEAR( m_gradY0 );
363
0
    VALGRIND_MEMCLEAR( m_gradX1 );
364
0
    VALGRIND_MEMCLEAR( m_gradY1 );
365
366
0
    m_if.initInterpolationFilter( true );
367
368
0
    applyPROF[0] = applyPROFCore<0>;
369
0
    applyPROF[1] = applyPROFCore<1>;
370
0
    PaddBIO      = PaddBIOCore;
371
0
    prefetchPad[0] = prefetchPadCore<2>; // luma
372
0
    prefetchPad[1] = prefetchPadCore<2>; // chroma for 444 and 422
373
0
    prefetchPad[2] = prefetchPadCore<1>; // chroma for 420
374
0
    if( enableOpt )
375
0
    {
376
0
#if ENABLE_SIMD_OPT_INTER && defined( TARGET_SIMD_X86 )
377
0
      initInterPredictionX86();
378
0
#endif
379
#if ENABLE_SIMD_OPT_INTER && defined( TARGET_SIMD_ARM )
380
      initInterPredictionARM();
381
#endif
382
0
    }
383
0
  }
384
385
0
  if( m_IBCBuffer.bufs.empty() )
386
0
  {
387
0
    m_IBCBufferWidth = g_IBCBufferSize / ctuSize;
388
0
    m_IBCBuffer.create( UnitArea( chromaFormatIDC, Area( 0, 0, m_IBCBufferWidth, ctuSize ) ) );
389
0
  }
390
0
  m_currChromaFormat = chromaFormatIDC;
391
0
}
392
393
// ====================================================================================================================
394
// Public member functions
395
// ====================================================================================================================
396
397
bool InterPrediction::xCheckIdenticalMotion( const CodingUnit &cu )
398
0
{
399
0
  const Slice &slice = *cu.slice;
400
401
0
  if( slice.isInterB() && !cu.pps->getWPBiPred() )
402
0
  {
403
0
    if( cu.refIdx[0] >= 0 && cu.refIdx[1] >= 0 )
404
0
    {
405
0
      int RefPOCL0 = slice.getRefPOC( REF_PIC_LIST_0, cu.refIdx[0] );
406
0
      int RefPOCL1 = slice.getRefPOC( REF_PIC_LIST_1, cu.refIdx[1] );
407
408
0
      if( RefPOCL0 == RefPOCL1 )
409
0
      {
410
0
        if( !cu.affineFlag() )
411
0
        {
412
0
          if( cu.mv[0][0] == cu.mv[1][0] )
413
0
          {
414
0
            return true;
415
0
          }
416
0
        }
417
0
        else
418
0
        {
419
0
          if( cu.mv[0][0] == cu.mv[1][0] && cu.mv[0][1] == cu.mv[1][1] && ( cu.affineType() == AFFINEMODEL_4PARAM || cu.mv[0][2] == cu.mv[1][2] ) )
420
0
          {
421
0
            return true;
422
0
          }
423
0
        }
424
0
      }
425
0
    }
426
0
  }
427
428
0
  return false;
429
0
}
430
431
void InterPrediction::xSubPuMC( CodingUnit& cu, PelUnitBuf& predBuf )
432
0
{
433
  // compute the location of the current PU
434
0
  const Position puPos    = cu.lumaPos();
435
0
  const Size puSize       = cu.lumaSize();
436
437
0
  const int numPartLine   = std::max<SizeType>( puSize.width  >> ATMVP_SUB_BLOCK_SIZE, 1u );
438
0
  const int numPartCol    = std::max<SizeType>( puSize.height >> ATMVP_SUB_BLOCK_SIZE, 1u );
439
0
  const int puHeight      = numPartCol  == 1 ? puSize.height : 1 << ATMVP_SUB_BLOCK_SIZE;
440
0
  const int puWidth       = numPartLine == 1 ? puSize.width  : 1 << ATMVP_SUB_BLOCK_SIZE;
441
442
0
  CodingUnit      subCu;
443
0
  CodingUnit& subPu = subCu;
444
445
0
  subCu.cs           = cu.cs;
446
0
  subCu.slice        = cu.slice;
447
0
  subCu.pps          = cu.pps;
448
0
  subCu.sps          = cu.sps;
449
0
  subCu.setChType    ( cu.chType() );
450
0
  subCu.setPredMode  ( cu.predMode() );
451
0
  subCu.UnitArea::operator=( cu );
452
453
0
  subPu.setMergeType ( MRG_TYPE_DEFAULT_N );
454
0
  subPu.setAffineFlag( false );
455
0
  subPu.setGeoFlag   ( false );
456
0
  subPu.setBcwIdx    ( cu.BcwIdx() );
457
0
  subPu.setImv       ( cu.imv() );
458
0
  subPu.setSmvdMode  ( cu.smvdMode() );
459
460
  // join sub-pus containing the same motion
461
0
  bool verMC    = puSize.height > puSize.width;
462
0
  int  fstStart = ( !verMC ? puPos.y : puPos.x );
463
0
  int  secStart = ( !verMC ? puPos.x : puPos.y );
464
0
  int  fstEnd   = ( !verMC ? puPos.y + puSize.height : puPos.x + puSize.width  );
465
0
  int  secEnd   = ( !verMC ? puPos.x + puSize.width  : puPos.y + puSize.height );
466
0
  int  fstStep  = ( !verMC ? puHeight : puWidth  );
467
0
  int  secStep  = ( !verMC ? puWidth  : puHeight );
468
469
0
#if RPR_FIX
470
0
  bool scaled = cu.slice->getRefPic( REF_PIC_LIST_0, 0 )->isRefScaled( cu.pps ) || ( cu.slice->getSliceType() == B_SLICE ? cu.slice->getRefPic( REF_PIC_LIST_1, 0 )->isRefScaled( cu.pps ) : false );
471
0
#endif
472
  
473
0
  m_subPuMC = true;
474
475
0
  for( int fstDim = fstStart; fstDim < fstEnd; fstDim += fstStep )
476
0
  {
477
0
    for( int secDim = secStart; secDim < secEnd; secDim += secStep )
478
0
    {
479
0
      int x = !verMC ? secDim : fstDim;
480
0
      int y = !verMC ? fstDim : secDim;
481
0
      const MotionInfo &curMi = cu.getMotionInfo( Position{ x, y } );
482
483
0
      int length = secStep;
484
0
      int later   = secDim + secStep;
485
486
0
      while( later < secEnd )
487
0
      {
488
0
        const MotionInfo &laterMi = !verMC ? cu.getMotionInfo( Position{ later, fstDim } ) : cu.getMotionInfo( Position{ fstDim, later } );
489
0
#if RPR_FIX
490
0
        if( !scaled && laterMi == curMi )
491
#else
492
        if( laterMi == curMi )
493
#endif
494
0
        {
495
0
          length += secStep;
496
0
        }
497
0
        else
498
0
        {
499
0
          break;
500
0
        }
501
0
        later += secStep;
502
0
      }
503
504
0
      int dx = !verMC ? length : puWidth;
505
0
      int dy = !verMC ? puHeight : length;
506
507
0
      subPu = curMi;
508
509
0
      if( !verMC && ( dx & 15 ) && dx > 16 )
510
0
      {
511
0
        int dxPart = dx & ~15;
512
513
0
        new ( &static_cast< UnitArea& >( subPu ) ) UnitArea( cu.chromaFormat, Area( x, y, dxPart, dy ) );
514
0
        PelUnitBuf subPredBuf = predBuf.subBuf( UnitAreaRelative( cu, subPu ) );
515
516
0
        motionCompensation( subPu, subPredBuf );
517
0
        x  += dxPart;
518
0
        dx -= dxPart;
519
0
      }
520
0
      else if( verMC && ( dy & 15 ) && dy > 16 )
521
0
      {
522
0
        int dyPart = dy & ~15;
523
524
0
        new ( &static_cast< UnitArea& >( subPu ) ) UnitArea( cu.chromaFormat, Area( x, y, dx, dyPart ) );
525
0
        PelUnitBuf subPredBuf = predBuf.subBuf( UnitAreaRelative( cu, subPu ) );
526
527
0
        motionCompensation( subPu, subPredBuf );
528
529
0
        y  += dyPart;
530
0
        dy -= dyPart;
531
0
      }
532
533
0
      new ( &static_cast< UnitArea& >( subPu ) ) UnitArea( cu.chromaFormat, Area( x, y, dx, dy ) );
534
0
      PelUnitBuf subPredBuf = predBuf.subBuf( UnitAreaRelative( cu, subPu ) );
535
536
0
      motionCompensation( subPu, subPredBuf );
537
538
0
      secDim = later - secStep;
539
0
    }
540
0
  }
541
0
  m_subPuMC = false;
542
0
}
543
544
void InterPrediction::xSubPuBio(CodingUnit& cu, PelUnitBuf& predBuf )
545
0
{
546
  // compute the location of the current PU
547
0
  const Position puPos = cu.lumaPos();
548
0
  const Size puSize    = cu.lumaSize();
549
  
550
0
  CodingUnit      subCu;
551
0
  CodingUnit& subPu = subCu;
552
553
0
  subCu.cs           = cu.cs;
554
0
  subCu.slice        = cu.slice;
555
0
  subCu.pps          = cu.pps;
556
0
  subCu.sps          = cu.sps;
557
0
  subCu.setChType    ( cu.chType() );
558
0
  subCu.setPredMode  ( cu.predMode() );
559
560
0
  subPu.setMergeType ( cu.mergeType() );
561
0
  subPu.setMmvdFlag  ( cu.mmvdFlag() );
562
0
  subPu.setMergeFlag ( cu.mergeFlag() );
563
0
  subPu.setCiipFlag  ( cu.ciipFlag() );
564
0
  subPu.setGeoFlag   ( cu.geoFlag() );
565
//  subPu.mvRefine = cu.mvRefine;
566
0
  subPu.setAffineFlag( cu.affineFlag() );
567
0
  subPu.refIdx[0]    = cu.refIdx[0];
568
0
  subPu.refIdx[1]    = cu.refIdx[1];
569
0
  subPu.setBcwIdx    ( cu.BcwIdx() );
570
0
  subPu.setImv       ( cu.imv() );
571
0
  subPu.setSmvdMode  ( cu.smvdMode() );
572
573
0
  const int  subPuHeight = std::min<int>( MAX_BDOF_APPLICATION_REGION, puSize.height );
574
0
  const int  subPuWidth  = std::min<int>( MAX_BDOF_APPLICATION_REGION, puSize.width );
575
576
0
  const int  csy = getChannelTypeScaleY( CH_C, cu.chromaFormat );
577
0
  const int  csx = getChannelTypeScaleX( CH_C, cu.chromaFormat );
578
579
0
  const int  subPuHeightC = subPuHeight >> csy;
580
0
  const int  subPuWidthC  = subPuWidth  >> csx;
581
  
582
0
  PelUnitBuf pcMbBuf1( cu.chromaFormat, PelBuf( m_acYuvPred[0], subPuWidth, subPuHeight ), PelBuf( m_acYuvPred[1], subPuWidthC, subPuHeightC ), PelBuf( m_acYuvPred[2], subPuWidthC, subPuHeightC ) );
583
  
584
0
  PelUnitBuf subPredBuf = predBuf.subBuf( UnitAreaRelative( cu, UnitArea( cu.chromaFormat, Area( 0, 0, subPuWidth, subPuHeight ) ) ) );
585
                                                                   
586
0
  for( int y = puPos.y, dy = 0; y < puPos.y + puSize.height; y += subPuHeight, dy += subPuHeight )
587
0
  {
588
0
    for( int x = puPos.x, dx = 0; x < puPos.x + puSize.width; x += subPuWidth, dx += subPuWidth )
589
0
    {
590
0
      const MotionInfo &curMi = cu.getMotionInfo( Position{ x, y } );
591
592
0
      new ( &static_cast< UnitArea& >( subPu ) ) UnitArea( cu.chromaFormat, Area( x, y, subPuWidth, subPuHeight ) );
593
0
      subPu = curMi;
594
595
0
      subPredBuf  .bufs[0].buf = GET_OFFSET( predBuf.bufs[0].buf, predBuf.bufs[0].stride, dx,        dy );
596
0
      if( isChromaEnabled( cu.chromaFormat ) )
597
0
      {
598
0
        subPredBuf.bufs[1].buf = GET_OFFSET( predBuf.bufs[1].buf, predBuf.bufs[1].stride, dx >> csx, dy >> csy );
599
0
        subPredBuf.bufs[2].buf = GET_OFFSET( predBuf.bufs[2].buf, predBuf.bufs[2].stride, dx >> csx, dy >> csy );
600
0
      }
601
602
0
      CHECKD( cu.refIdx[0] < 0 || cu.refIdx[1] < 0, "Bi-prediction required for BDOF!" );
603
604
0
      PelUnitBuf& pcMbBuf0 = subPredBuf;
605
606
0
      m_iRefListIdx = REF_PIC_LIST_0;
607
0
      xPredInterUni( subPu, REF_PIC_LIST_0, pcMbBuf0, true, true, true, true );
608
0
      m_iRefListIdx = REF_PIC_LIST_1;
609
0
      xPredInterUni( subPu, REF_PIC_LIST_1, pcMbBuf1, true, true, true, true );
610
611
0
      xWeightedAverage( subPu, pcMbBuf0, pcMbBuf1, subPredBuf, cu.sps->getBitDepths(), cu.slice->clpRngs(), true );
612
0
    }
613
0
  }
614
0
}
615
616
void InterPrediction::xPredInterUni( const CodingUnit &cu, const RefPicList &eRefPicList, PelUnitBuf &pcYuvPred, const bool &bi, const bool &bioApplied, const bool luma, const bool chroma )
617
0
{
618
0
  const SPS &    sps     = *cu.sps;
619
0
  const int      iRefIdx = cu.refIdx[eRefPicList];
620
0
  const bool     isIBC   = CU::isIBC( cu );
621
0
  const Picture *refPic  = isIBC ? cu.slice->getPic() : cu.slice->getRefPic( eRefPicList, iRefIdx );
622
0
        bool     affine  = cu.affineFlag();
623
0
  Mv             mv[3];
624
0
  const bool scaled       = refPic ? refPic->isRefScaled( cu.pps ) : false;
625
0
  const auto scalingRatio = cu.slice->getScalingRatio( eRefPicList, iRefIdx );
626
627
0
  CHECKD( !CU::isIBC( cu ) && cu.lwidth() == 4 && cu.lheight() == 4, "invalid 4x4 inter blocks" );
628
629
0
  if( affine )
630
0
  {
631
0
    CHECK( iRefIdx < 0, "iRefIdx incorrect." );
632
633
0
    mv[0] = cu.mv[eRefPicList][0];
634
0
    mv[1] = cu.mv[eRefPicList][1];
635
0
    mv[2] = cu.mv[eRefPicList][2];
636
0
  }
637
0
  else
638
0
  {
639
0
    mv[0] = cu.mv[eRefPicList][0];
640
641
0
    CHECK( !refPic, "xPredInterUni missing ref pic" );
642
643
0
    if( !isIBC && !scaled )
644
0
    {
645
0
      clipMv( mv[0], m_currCuArea.lumaPos(), m_currCuArea.lumaSize(), sps, *cu.pps );
646
0
    }
647
0
  }
648
649
0
  const bool wrapRef = !isIBC && cu.sps->getUseWrapAround() && wrapClipMv( mv[0], cu.lumaPos(), cu.lumaSize(), *cu.sps, *cu.pps );
650
651
0
  for( uint32_t comp = COMPONENT_Y; comp < pcYuvPred.bufs.size(); comp++ )
652
0
  {
653
0
    const ComponentID compID = ComponentID( comp );
654
655
0
    if( compID == COMPONENT_Y && !luma   ) continue;
656
0
    if( compID != COMPONENT_Y && !chroma ) continue;
657
658
0
    if( affine )
659
0
    {
660
0
      CHECK( bioApplied, "BIO is not allowed with affine" );
661
0
      m_iRefListIdx = eRefPicList;
662
0
      xPredAffineBlk( compID, cu, refPic, eRefPicList, pcYuvPred, bi, cu.slice->clpRng( compID ), cu.slice->getScalingRatio( eRefPicList, iRefIdx ) );
663
0
    }
664
0
    else
665
0
    {
666
0
      if( !isIBC && scaled )
667
0
      {
668
0
        xPredInterBlkRPR( scalingRatio, *cu.pps, compID, cu.chromaFormat, refPic, mv[0], cu.blocks[compID], pcYuvPred.bufs[compID].width, pcYuvPred.bufs[compID].height, pcYuvPred.bufs[compID].buf, pcYuvPred.bufs[compID].stride, bi, wrapRef, cu.slice->clpRng( compID ), 0, cu.imv() == IMV_HPEL );
669
0
        CHECKD( bioApplied, "BDOF should be disabled with RPR" );
670
0
      }
671
0
      else
672
0
      {
673
0
        xPredInterBlk<false, false>( compID, cu, refPic, mv[0], pcYuvPred.bufs[compID], bi, cu.slice->clpRng( compID ), bioApplied, isIBC, wrapRef );
674
0
      }
675
0
    }
676
0
  }
677
0
}
678
679
void InterPrediction::xPredInterBi( CodingUnit& cu, PelUnitBuf &pcYuvPred )
680
0
{
681
0
  const Slice &slice = *cu.slice;
682
0
  const PPS   &pps   = *cu.pps;
683
684
0
  PelUnitBuf& pcMbBuf0 = pcYuvPred;
685
0
  PelUnitBuf  pcMbBuf1 = isChromaEnabled( cu.chromaFormat ) ? PelUnitBuf( cu.chromaFormat, PelBuf( m_acYuvPred[0], pcYuvPred.Y() ), PelBuf( m_acYuvPred[1], pcYuvPred.Cb() ), PelBuf( m_acYuvPred[2], pcYuvPred.Cr() ) ) : PelUnitBuf( cu.chromaFormat, PelBuf( m_acYuvPred[0], pcYuvPred.Y() ) );
686
687
0
  const bool isBiPred = cu.refIdx[0] >= 0 && cu.refIdx[1] >= 0;
688
689
0
  if( isBiPred )
690
0
  {
691
0
    m_iRefListIdx = REF_PIC_LIST_0;
692
0
    xPredInterUni( cu, REF_PIC_LIST_0, pcMbBuf0, true, false, true, true );
693
0
    m_iRefListIdx = REF_PIC_LIST_1;
694
0
    xPredInterUni( cu, REF_PIC_LIST_1, pcMbBuf1, true, false, true, true );
695
0
  }
696
0
  else
697
0
  {
698
0
    m_iRefListIdx = cu.refIdx[0] >= 0 ? REF_PIC_LIST_0 : REF_PIC_LIST_1;
699
700
0
    if( !cu.geoFlag() && ( ( pps.getUseWP() && slice.getSliceType() == P_SLICE ) || ( pps.getWPBiPred() && slice.getSliceType() == B_SLICE ) ) )
701
0
    {
702
0
      xPredInterUni( cu, RefPicList( m_iRefListIdx ), pcMbBuf0, true, false, true, true );
703
0
    }
704
0
    else
705
0
    {
706
0
      xPredInterUni( cu, RefPicList( m_iRefListIdx ), pcYuvPred, cu.geoFlag(), false, true, true );
707
0
    }
708
0
  }
709
710
0
#ifndef NDEBUG
711
0
  for( uint32_t refList = 0; refList < NUM_REF_PIC_LIST_01; refList++ )
712
0
  {
713
0
    if( cu.refIdx[refList] < 0 )
714
0
    {
715
0
      continue;
716
0
    }
717
718
0
    RefPicList eRefPicList = refList ? REF_PIC_LIST_1 : REF_PIC_LIST_0;
719
720
0
    CHECKD( CU::isIBC( cu ) && eRefPicList != REF_PIC_LIST_0, "Invalid interdir for ibc mode" );
721
0
    CHECKD( CU::isIBC( cu ) && cu.refIdx[refList] != MAX_NUM_REF, "Invalid reference index for ibc mode" );
722
0
    CHECKD( CU::isInter( cu ) && cu.refIdx[refList] >= slice.getNumRefIdx( eRefPicList ), "Invalid reference index" );
723
0
  }
724
725
0
#endif
726
0
  if( !cu.geoFlag() )
727
0
  {
728
0
    if( pps.getWPBiPred() && slice.getSliceType() == B_SLICE && cu.BcwIdx() == BCW_DEFAULT )
729
0
    {
730
0
      xWeightedPredictionBi( cu, pcMbBuf0, isBiPred ? pcMbBuf1 : pcMbBuf0, pcYuvPred );
731
0
    }
732
0
    else if( pps.getUseWP() && slice.getSliceType() == P_SLICE )
733
0
    {
734
0
      xWeightedPredictionUni( cu, pcMbBuf0, REF_PIC_LIST_0, pcYuvPred, -1 );
735
0
    }
736
0
    else if( isBiPred )
737
0
    {
738
0
      xWeightedAverage( cu, pcMbBuf0, pcMbBuf1, pcYuvPred, slice.getSPS()->getBitDepths(), slice.clpRngs(), false );
739
0
    }
740
0
  }
741
0
}
742
743
template<bool altSrc, bool altSize>
744
void InterPrediction::xPredInterBlk( const ComponentID&    compID,
745
                                     const CodingUnit& cu,
746
                                     const Picture*        refPic,
747
                                     Mv                    mv,
748
                                     PelBuf&               dstPic,
749
                                     bool                  bi,
750
                                     const ClpRng&         clpRng,
751
                                     bool                  bioApplied,
752
                                     bool                  isIBC,
753
                                     bool                  wrapRef,
754
                                     SizeType              dmvrWidth,
755
                                     SizeType              dmvrHeight,
756
                                     bool                  bilinearMC,
757
                                     Pel*                  srcPadBuf,
758
                                     ptrdiff_t             srcPadStride )
759
0
{
760
0
  CHECK( srcPadBuf == NULL && altSrc, "wrong" );
761
  
762
0
  const ChromaFormat  chFmt = cu.chromaFormat;
763
0
  const bool          rndRes = !bi;
764
765
0
  const int shiftHor = MV_FRACTIONAL_BITS_INTERNAL + getComponentScaleX(compID, chFmt);
766
0
  const int shiftVer = MV_FRACTIONAL_BITS_INTERNAL + getComponentScaleY(compID, chFmt);
767
  
768
0
  const bool useAltHpelIf = cu.imv() == IMV_HPEL;
769
770
0
  const int xFrac     = !isIBC ? mv.hor & ( ( 1 << shiftHor ) - 1 ) : 0;
771
0
  const int yFrac     = !isIBC ? mv.ver & ( ( 1 << shiftVer ) - 1 ) : 0;
772
773
0
  const Pel* refPtr    = nullptr;
774
0
  ptrdiff_t  refStride = 0;
775
0
  if( cu.pps->getNumSubPics() > 1 && cu.pps->getSubPicFromCU( cu ).getTreatedAsPicFlag() )
776
0
  {
777
0
    const int subPicIdx = cu.pps->getSubPicFromCU( cu ).getSubPicIdx();
778
0
    refPtr              = altSrc ? srcPadBuf    : refPic->getSubPicBufPtr   ( subPicIdx, compID, wrapRef );
779
0
    refStride           = altSrc ? srcPadStride : refPic->getSubPicBufStride( subPicIdx, compID, wrapRef );
780
0
  }
781
0
  else
782
0
  {
783
0
    refPtr    = altSrc ? srcPadBuf    : refPic->getRecoBufPtr   ( compID, wrapRef );
784
0
    refStride = altSrc ? srcPadStride : refPic->getRecoBufStride( compID, wrapRef );
785
0
  }
786
787
0
  if( !altSrc )
788
0
  {
789
0
    OFFSET( refPtr, refStride, cu.blocks[compID].x + ( mv.hor >> shiftHor ), cu.blocks[compID].y + ( mv.ver >> shiftVer ) );
790
0
  }
791
792
0
  unsigned width, height;
793
794
0
  if( altSize )
795
0
  {
796
0
    width  = dmvrWidth;
797
0
    height = dmvrHeight;
798
0
  }
799
0
  else
800
0
  {
801
0
    width  = dstPic.width;
802
0
    height = dstPic.height;
803
0
  }
804
805
0
  Pel* dstBuf;
806
0
  ptrdiff_t dstStride;
807
808
0
  if( bioApplied && compID == COMPONENT_Y )
809
0
  {
810
    // change MC output
811
0
    dstStride = width + BIO_ALIGN_SIZE;
812
0
    dstBuf    = m_bdofBlock[m_iRefListIdx] + 2 * dstStride + 1;
813
0
  }
814
0
  else
815
0
  {
816
0
    dstBuf    = dstPic.buf;
817
0
    dstStride = dstPic.stride;
818
0
  }
819
820
    
821
0
  if( yFrac == 0 )
822
0
  {
823
0
    m_if.filterHor( compID, refPtr, refStride, dstBuf, dstStride, width, height, xFrac, rndRes, chFmt, clpRng, bilinearMC ? 1 : 0, useAltHpelIf );
824
0
  }
825
0
  else if( xFrac == 0 )
826
0
  {
827
0
    m_if.filterVer( compID, refPtr, refStride, dstBuf, dstStride, width, height, yFrac, true, rndRes, chFmt, clpRng, bilinearMC ? 1 : 0, useAltHpelIf );
828
0
  }
829
0
  else if( bilinearMC )
830
0
  {
831
0
    m_if.filterN2_2D( compID, refPtr, refStride, dstBuf, dstStride, width, height, xFrac, yFrac, chFmt, clpRng );
832
0
  }
833
0
  else if( width == 4 && height == 4 )
834
0
  {
835
0
    m_if.filter4x4( compID, refPtr, refStride, dstBuf, dstStride, 4, 4, xFrac, yFrac, rndRes, chFmt, clpRng );
836
0
  }
837
0
  else if( width == 16 )
838
0
  {
839
0
    m_if.filter16xH( compID, refPtr, refStride, dstBuf, dstStride, 16, height, xFrac, yFrac, rndRes, chFmt, clpRng, useAltHpelIf );
840
0
  }
841
0
  else if( width == 8 )
842
0
  {
843
0
    m_if.filter8xH( compID, refPtr, refStride, dstBuf, dstStride, 8, height, xFrac, yFrac, rndRes, chFmt, clpRng, useAltHpelIf );
844
0
  }
845
0
  else
846
0
  {
847
0
    Pel *tmpBuf = m_tmpBlock;
848
0
    ptrdiff_t tmpStride = dmvrWidth ? dmvrWidth : width;
849
850
0
    int vFilterSize = bilinearMC ? NTAPS_BILINEAR : isLuma( compID ) ? NTAPS_LUMA : NTAPS_CHROMA;
851
852
0
    m_if.filterHor( compID, GET_OFFSETY( refPtr, refStride, -( ( vFilterSize >> 1 ) - 1 ) ), refStride, tmpBuf, tmpStride, width, height + vFilterSize - 1, xFrac, false,         chFmt, clpRng, bilinearMC ? 1 : 0, useAltHpelIf );
853
0
    m_if.filterVer( compID, GET_OFFSETY( tmpBuf, tmpStride,    ( vFilterSize >> 1 ) - 1 ),   tmpStride, dstBuf, dstStride, width, height,                   yFrac, false, rndRes, chFmt, clpRng, bilinearMC ? 1 : 0, useAltHpelIf );
854
0
  }
855
856
0
  if( bioApplied && compID == COMPONENT_Y )
857
0
  {
858
0
    const int   shift   = std::max<int>( 2, ( IF_INTERNAL_PREC - clpRng.bd ) );
859
0
    const int   xOffset = ( xFrac < 8 ) ? 1 : 0;
860
0
    const int   yOffset = ( yFrac < 8 ) ? 1 : 0;
861
0
    const Pel*  refPel  = refPtr + ( 1 - yOffset ) * refStride - xOffset;
862
0
    Pel*        dstPel  = m_bdofBlock[m_iRefListIdx] + 2 * dstStride;
863
864
0
    for( int h = 0; h < height; h++ )
865
0
    {
866
0
      dstPel[0]         = ( refPel[0        ] << shift ) - ( Pel ) IF_INTERNAL_OFFS;
867
0
      dstPel[width + 1] = ( refPel[width + 1] << shift ) - ( Pel ) IF_INTERNAL_OFFS;
868
869
0
      refPel += refStride;
870
0
      dstPel += dstStride;
871
0
    }
872
873
0
    refPel = refPtr - yOffset * refStride - xOffset;
874
0
    dstPel = m_bdofBlock[m_iRefListIdx] + dstStride;
875
876
0
    PaddBIO( refPel, dstPel, width, shift );
877
    
878
0
    refPel = refPtr + ( height + 1 - yOffset ) * refStride - xOffset;
879
0
    dstPel = m_bdofBlock[m_iRefListIdx] + ( height + 2 * BIO_EXTEND_SIZE ) * dstStride;
880
881
0
    PaddBIO( refPel, dstPel, width, shift );
882
0
  }
883
0
}
Unexecuted instantiation: void vvdec::InterPrediction::xPredInterBlk<false, false>(vvdec::ComponentID const&, vvdec::CodingUnit const&, vvdec::Picture const*, vvdec::Mv, vvdec::AreaBuf<short>&, bool, vvdec::ClpRngTemplate<short> const&, bool, bool, bool, unsigned int, unsigned int, bool, short*, long)
Unexecuted instantiation: void vvdec::InterPrediction::xPredInterBlk<true, false>(vvdec::ComponentID const&, vvdec::CodingUnit const&, vvdec::Picture const*, vvdec::Mv, vvdec::AreaBuf<short>&, bool, vvdec::ClpRngTemplate<short> const&, bool, bool, bool, unsigned int, unsigned int, bool, short*, long)
Unexecuted instantiation: void vvdec::InterPrediction::xPredInterBlk<false, true>(vvdec::ComponentID const&, vvdec::CodingUnit const&, vvdec::Picture const*, vvdec::Mv, vvdec::AreaBuf<short>&, bool, vvdec::ClpRngTemplate<short> const&, bool, bool, bool, unsigned int, unsigned int, bool, short*, long)
884
885
bool InterPrediction::isSubblockVectorSpreadOverLimit( int a, int b, int c, int d, int predType )
886
0
{
887
0
  int s4 = ( 4 << 11 );
888
0
  int filterTap = 6;
889
890
0
  if ( predType == 3 )
891
0
  {
892
0
    int refBlkWidth  = std::max( std::max( 0, 4 * a + s4 ), std::max( 4 * c, 4 * a + 4 * c + s4 ) ) - std::min( std::min( 0, 4 * a + s4 ), std::min( 4 * c, 4 * a + 4 * c + s4 ) );
893
0
    int refBlkHeight = std::max( std::max( 0, 4 * b ), std::max( 4 * d + s4, 4 * b + 4 * d + s4 ) ) - std::min( std::min( 0, 4 * b ), std::min( 4 * d + s4, 4 * b + 4 * d + s4 ) );
894
0
    refBlkWidth  = ( refBlkWidth >> 11 ) + filterTap + 3;
895
0
    refBlkHeight = ( refBlkHeight >> 11 ) + filterTap + 3;
896
897
0
    if ( refBlkWidth * refBlkHeight > ( filterTap + 9 ) * ( filterTap + 9 ) )
898
0
    {
899
0
      return true;
900
0
    }
901
0
  }
902
0
  else
903
0
  {
904
0
    int refBlkWidth  = std::max( 0, 4 * a + s4 ) - std::min( 0, 4 * a + s4 );
905
0
    int refBlkHeight = std::max( 0, 4 * b ) - std::min( 0, 4 * b );
906
0
    refBlkWidth  = ( refBlkWidth >> 11 ) + filterTap + 3;
907
0
    refBlkHeight = ( refBlkHeight >> 11 ) + filterTap + 3;
908
0
    if ( refBlkWidth * refBlkHeight > ( filterTap + 9 ) * ( filterTap + 5 ) )
909
0
    {
910
0
      return true;
911
0
    }
912
913
0
    refBlkWidth  = std::max( 0, 4 * c ) - std::min( 0, 4 * c );
914
0
    refBlkHeight = std::max( 0, 4 * d + s4 ) - std::min( 0, 4 * d + s4 );
915
0
    refBlkWidth  = ( refBlkWidth >> 11 ) + filterTap + 3;
916
0
    refBlkHeight = ( refBlkHeight >> 11 ) + filterTap + 3;
917
0
    if ( refBlkWidth * refBlkHeight > ( filterTap + 5 ) * ( filterTap + 9 ) )
918
0
    {
919
0
      return true;
920
0
    }
921
0
  }
922
0
  return false;
923
0
}
924
925
#define CALC_AFFINE_MV_ON_THE_FLY 0
926
927
void InterPrediction::xPredAffineBlk( const ComponentID&        compID,
928
                                      const CodingUnit&     cu,
929
                                      const Picture*            refPic,
930
                                      const RefPicList          refPicList,
931
                                      PelUnitBuf&               dstPic,
932
                                      bool                      bi,
933
                                      const ClpRng&             clpRng,
934
                                      const std::pair<int, int> scalingRatio
935
                                      )
936
0
{
937
0
  const ChromaFormat chFmt = cu.chromaFormat;
938
0
  const int iScaleX = getComponentScaleX( compID, chFmt );
939
0
  const int iScaleY = getComponentScaleY( compID, chFmt );
940
941
0
  const int chromaScaleX = getChannelTypeScaleX( CH_C, chFmt );
942
0
  const int chromaScaleY = getChannelTypeScaleY( CH_C, chFmt );
943
944
0
  const int shiftX = 4 + iScaleX;
945
0
  const int shiftY = 4 + iScaleY;
946
0
  const int maskX  = ( 1 << shiftX ) - 1;
947
0
  const int maskY  = ( 1 << shiftY ) - 1;
948
949
  // get affine sub-block width and height
950
0
  const int width  = cu.lwidth();
951
0
  const int height = cu.lheight();
952
953
0
  static constexpr int blockWidth  = AFFINE_MIN_BLOCK_SIZE;
954
0
  static constexpr int blockHeight = AFFINE_MIN_BLOCK_SIZE;
955
956
0
  const int MVBUFFER_SIZE = ( width / AFFINE_MIN_BLOCK_SIZE ) >> chromaScaleX;
957
958
0
  const int cxWidth  = width  >> iScaleX;
959
0
  const int cxHeight = height >> iScaleY;
960
0
  const SPS &sps    = *cu.sps;
961
0
  const int iMvShift = 4;
962
0
  const int iOffset  = 8;
963
0
  const int iHorMax = ( cu.pps->getPicWidthInLumaSamples()  + iOffset -       cu.lx() - 1 ) *(1<< iMvShift);
964
0
  const int iHorMin = (  -(int)cu.cs->pcv->maxCUWidth       - iOffset -  (int)cu.lx() + 1 ) *(1<< iMvShift);
965
0
  const int iVerMax = ( cu.pps->getPicHeightInLumaSamples() + iOffset -       cu.ly() - 1 ) *(1<< iMvShift);
966
0
  const int iVerMin = (  -(int)cu.cs->pcv->maxCUHeight      - iOffset -  (int)cu.ly() + 1 ) *(1<< iMvShift);
967
0
  const bool clipSubPic = clipMv == clipMvInSubpic;
968
969
0
  const int shift = MAX_CU_DEPTH;
970
971
0
  const Mv &affLT = cu.mv[refPicList][0];
972
0
  const Mv &affRT = cu.mv[refPicList][1];
973
0
  const Mv &affLB = cu.mv[refPicList][2];
974
975
0
  int deltaMvHorX, deltaMvHorY, deltaMvVerX, deltaMvVerY;
976
977
0
  deltaMvHorX = ( affRT - affLT ).getHor() *(1<< ( shift - getLog2( width )));
978
0
  deltaMvHorY = ( affRT - affLT ).getVer() *(1<< ( shift - getLog2( width )));
979
980
0
  if( cu.affineType() == AFFINEMODEL_6PARAM )
981
0
  {
982
0
    deltaMvVerX = ( affLB - affLT ).getHor() *(1<< ( shift - getLog2( height )));
983
0
    deltaMvVerY = ( affLB - affLT ).getVer() *(1<< ( shift - getLog2( height )));
984
0
  }
985
0
  else
986
0
  {
987
0
    deltaMvVerX = -deltaMvHorY;
988
0
    deltaMvVerY =  deltaMvHorX;
989
0
  }
990
991
#if CALC_AFFINE_MV_ON_THE_FLY
992
  const int mvScaleHor = affLT.getHor() << shift;
993
  const int mvScaleVer = affLT.getVer() << shift;
994
995
  static const int halfBW = AFFINE_MIN_BLOCK_SIZE >> 1;
996
  static const int halfBH = AFFINE_MIN_BLOCK_SIZE >> 1;
997
998
#endif
999
0
  const bool subblkMVSpreadOverLimit = InterPrediction::isSubblockVectorSpreadOverLimit( deltaMvHorX, deltaMvHorY, deltaMvVerX, deltaMvVerY, cu.interDir() );
1000
1001
0
  const bool refPicScaled = refPic->isRefScaled( cu.pps );
1002
1003
0
  PelBuf &dstBuf = dstPic.bufs[compID];
1004
1005
0
#if !CALC_AFFINE_MV_ON_THE_FLY
1006
0
  const CMotionBuf mb       = cu.getMotionBuf();
1007
0
  const MotionInfo* curMi   = mb.buf;
1008
0
  const ptrdiff_t miStride  = mb.stride;
1009
0
#endif
1010
0
  Mv* chromaMvFld = m_storedMv;
1011
1012
0
  if( isLuma( compID ) )
1013
0
  {
1014
0
    memset( NO_WARNING_class_memaccess( m_storedMv ), 0, MVBUFFER_SIZE * ( g_miScaling.scaleVer( height ) >> chromaScaleY ) * sizeof( Mv ) );
1015
0
  }
1016
1017
0
  bool enablePROF = ( sps.getUsePROF() ) && ( compID == COMPONENT_Y );
1018
0
  enablePROF &= (! cu.cs->picHeader->getDisProfFlag() );
1019
0
  enablePROF &= !( ( cu.affineType() == AFFINEMODEL_6PARAM && affLT == affRT && affLT == affLB ) || ( cu.affineType() == AFFINEMODEL_4PARAM && affLT == affRT ) );
1020
0
  enablePROF &= !subblkMVSpreadOverLimit;
1021
0
  enablePROF &= !refPicScaled;
1022
1023
0
  bool isLast = enablePROF ? false : !bi;
1024
1025
0
  Pel gradX[36];
1026
0
  Pel gradY[36];
1027
1028
0
  static constexpr int dstExtW = blockWidth  + PROF_BORDER_EXT_W * 2;
1029
0
  static constexpr int dstExtH = blockHeight + PROF_BORDER_EXT_H * 2;
1030
0
  PelBuf dstExtBuf( m_bdofBlock[0], dstExtW, dstExtH );
1031
1032
0
  int dMvScaleHor[16];
1033
0
  int dMvScaleVer[16];
1034
1035
0
  if (enablePROF)
1036
0
  {
1037
0
    int* dMvH = dMvScaleHor;
1038
0
    int* dMvV = dMvScaleVer;
1039
0
    int quadHorX = deltaMvHorX *(1<< 2);
1040
0
    int quadHorY = deltaMvHorY *(1<< 2);
1041
0
    int quadVerX = deltaMvVerX *(1<< 2);
1042
0
    int quadVerY = deltaMvVerY *(1<< 2);
1043
1044
0
    dMvH[0] = ((deltaMvHorX + deltaMvVerX) *2) - ((quadHorX + quadVerX) *2);
1045
0
    dMvV[0] = ((deltaMvHorY + deltaMvVerY) *2) - ((quadHorY + quadVerY) *2);
1046
1047
0
    for (int w = 1; w < blockWidth; w++)
1048
0
    {
1049
0
      dMvH[w] = dMvH[w - 1] + quadHorX;
1050
0
      dMvV[w] = dMvV[w - 1] + quadHorY;
1051
0
    }
1052
1053
0
    dMvH += blockWidth;
1054
0
    dMvV += blockWidth;
1055
0
    for (int h = 1; h < blockHeight; h++)
1056
0
    {
1057
0
      for (int w = 0; w < blockWidth; w++)
1058
0
      {
1059
0
        dMvH[w] = dMvH[w - blockWidth] + quadVerX;
1060
0
        dMvV[w] = dMvV[w - blockWidth] + quadVerY;
1061
0
      }
1062
0
      dMvH += blockWidth;
1063
0
      dMvV += blockWidth;
1064
0
    }
1065
1066
0
    const int mvShift  = 8;
1067
0
    const int dmvLimit = ( 1 << 5 ) - 1;
1068
1069
0
    if (!roundIntVector)
1070
0
    {
1071
0
      for (int idx = 0; idx < blockWidth * blockHeight; idx++)
1072
0
      {
1073
0
        roundAffineMv(dMvScaleHor[idx], dMvScaleVer[idx], mvShift);
1074
0
        dMvScaleHor[idx] = Clip3( -dmvLimit, dmvLimit, dMvScaleHor[idx] );
1075
0
        dMvScaleVer[idx] = Clip3( -dmvLimit, dmvLimit, dMvScaleVer[idx] );
1076
0
      }
1077
0
    }
1078
0
    else
1079
0
    {
1080
0
      int sz = blockWidth * blockHeight;
1081
0
      roundIntVector(dMvScaleHor, sz, mvShift, dmvLimit);
1082
0
      roundIntVector(dMvScaleVer, sz, mvShift, dmvLimit);
1083
0
    }
1084
0
  }
1085
  
1086
#if CALC_AFFINE_MV_ON_THE_FLY
1087
  int mvhor, mvver;
1088
1089
  if( subblkMVSpreadOverLimit )
1090
  {
1091
    mvhor = mvScaleHor + deltaMvHorX * ( width >> 1 ) + deltaMvVerX * ( height >> 1 );
1092
    mvver = mvScaleVer + deltaMvHorY * ( width >> 1 ) + deltaMvVerY * ( height >> 1 );
1093
    roundAffineMv( mvhor, mvver, shift );
1094
    mv.hor = mvhor; mv.ver = mvver;
1095
    mv.clipToStorageBitDepth();
1096
  }
1097
#endif
1098
1099
0
  std::array<const Pel*, 2> refBuf{ nullptr, nullptr };
1100
0
  std::array<ptrdiff_t, 2>  refBufStride{ 0, 0 };
1101
0
  if( cu.pps->getNumSubPics() > 1 && cu.pps->getSubPicFromCU( cu ).getTreatedAsPicFlag() )
1102
0
  {
1103
0
    const int subPicIdx = cu.pps->getSubPicFromCU( cu ).getSubPicIdx();
1104
0
    refBuf              = { refPic->getSubPicBufPtr   ( subPicIdx, compID, false ), 0 /*refPic->getSubPicBufPtr   ( subPicIdx, compID, true )*/ };
1105
0
    refBufStride        = { refPic->getSubPicBufStride( subPicIdx, compID, false ), 0 /*refPic->getSubPicBufStride( subPicIdx, compID, true )*/ };
1106
0
  }
1107
0
  else
1108
0
  {
1109
0
    if( cu.sps->getUseWrapAround() )
1110
0
    {
1111
0
      refBuf       = { refPic->getRecoBufPtr   ( compID, false ), refPic->getRecoBufPtr   ( compID, true ) };
1112
0
      refBufStride = { refPic->getRecoBufStride( compID, false ), refPic->getRecoBufStride( compID, true ) };
1113
0
    }
1114
0
    else
1115
0
    {
1116
0
      refBuf      [0] = refBuf      [1] = refPic->getRecoBufPtr   ( compID, false );
1117
0
      refBufStride[0] = refBufStride[1] = refPic->getRecoBufStride( compID, false );
1118
0
    }
1119
0
  }
1120
1121
0
  const int puPosX = cu.blocks[compID].x, puPosY = cu.blocks[compID].y;
1122
1123
  // get prediction block by block
1124
0
  for ( int h = 0; h < cxHeight; h += blockHeight )
1125
0
  {
1126
0
#if !CALC_AFFINE_MV_ON_THE_FLY
1127
0
    const MotionInfo* lineMi = curMi;
1128
1129
0
#endif
1130
0
    for ( int w = 0; w < cxWidth; w += blockWidth )
1131
0
    {
1132
0
      Position mvPos{ w >> 2, h >> 2 };
1133
      
1134
0
      int iMvScaleTmpHor;
1135
0
      int iMvScaleTmpVer;
1136
1137
0
      if( isLuma( compID ) || chFmt == CHROMA_444 )
1138
0
      {
1139
#if CALC_AFFINE_MV_ON_THE_FLY
1140
        if( !subblkMVSpreadOverLimit )
1141
        {
1142
          mvhor = mvScaleHor + deltaMvHorX * ( halfBW + w ) + deltaMvVerX * ( halfBH + h );
1143
          mvver = mvScaleVer + deltaMvHorY * ( halfBW + w ) + deltaMvVerY * ( halfBH + h );
1144
          roundAffineMv( mvhor, mvver, shift );
1145
          mv.hor = mvhor; mv.ver = mvver;
1146
          mv.clipToStorageBitDepth();
1147
        }
1148
#else   
1149
0
        const Mv& mv = lineMi->mv[refPicList];
1150
1151
0
        iMvScaleTmpHor = mv.hor;
1152
0
        iMvScaleTmpVer = mv.ver;
1153
0
#endif
1154
1155
0
        if( chFmt != CHROMA_400 && chFmt != CHROMA_444 && ( ( ( mvPos.x ^ mvPos.y ) & 1 ) == 0 || chFmt != CHROMA_420 ) )
1156
0
        {
1157
0
          Mv &chromaMv = *GET_OFFSET( chromaMvFld, MVBUFFER_SIZE, mvPos.x >> chromaScaleX, mvPos.y >> chromaScaleY );
1158
0
          chromaMv.hor += iMvScaleTmpHor;
1159
0
          chromaMv.ver += iMvScaleTmpVer;
1160
0
        }
1161
0
      }
1162
0
      else
1163
0
      {
1164
0
        Mv& mv = *GET_OFFSET( chromaMvFld, MVBUFFER_SIZE, mvPos.x, mvPos.y );
1165
0
        iMvScaleTmpHor = mv.hor *(1<< ( 1 - ( chromaScaleX | chromaScaleY ) ));
1166
0
        iMvScaleTmpVer = mv.ver *(1<< ( 1 - ( chromaScaleX | chromaScaleY ) ));
1167
0
        roundAffineMv( iMvScaleTmpHor, iMvScaleTmpVer, 1 );
1168
0
      }
1169
1170
0
      bool wrapRef = false;
1171
1172
0
      if ( refPic->isWrapAroundEnabled( cu.pps ) )
1173
0
      {
1174
0
        Mv tmpMv(iMvScaleTmpHor, iMvScaleTmpVer);
1175
0
        wrapRef = wrapClipMv( tmpMv, Position( cu.Y().x + ( w << iScaleX ), cu.Y().y + ( h << iScaleY ) ), Size( blockWidth << iScaleX, blockHeight << iScaleY ), sps, *cu.pps );
1176
0
        iMvScaleTmpHor = tmpMv.getHor();
1177
0
        iMvScaleTmpVer = tmpMv.getVer();
1178
0
      }
1179
0
      else if( !refPicScaled && clipSubPic )
1180
0
      {
1181
0
        Mv mv{ iMvScaleTmpHor, iMvScaleTmpVer };
1182
0
        clipMv( mv, cu.lumaPos(), cu.lumaSize(), sps, *cu.pps );
1183
0
        iMvScaleTmpHor = mv.hor;
1184
0
        iMvScaleTmpVer = mv.ver;
1185
0
      }
1186
0
      else
1187
0
      {
1188
0
        iMvScaleTmpHor = std::min<int>( iHorMax, std::max<int>( iHorMin, iMvScaleTmpHor ) );
1189
0
        iMvScaleTmpVer = std::min<int>( iVerMax, std::max<int>( iVerMin, iMvScaleTmpVer ) );
1190
0
      }
1191
1192
0
      CHECKD( !refPic, "Should not be null" );
1193
0
      if( refPicScaled )
1194
0
      {
1195
0
        xPredInterBlkRPR( scalingRatio, *cu.pps, compID, cu.chromaFormat, refPic, Mv( iMvScaleTmpHor, iMvScaleTmpVer ), cu.blocks[compID].offset( w, h ), blockWidth, blockHeight, dstPic.bufs[compID].buf + w + h * dstPic.bufs[compID].stride, dstPic.bufs[compID].stride, bi, wrapRef, clpRng, 2 );
1196
0
        CHECKD( enablePROF, "PROF should be disabled with RPR" );
1197
0
      }
1198
0
      else
1199
0
      {
1200
0
        const int xInt  = iMvScaleTmpHor >> shiftX;
1201
0
        const int xFrac = iMvScaleTmpHor &  maskX;
1202
0
        const int yInt  = iMvScaleTmpVer >> shiftY;
1203
0
        const int yFrac = iMvScaleTmpVer &  maskY;
1204
1205
0
        const Pel*      refBufPtr = refBuf      [wrapRef];
1206
0
        const ptrdiff_t refStride = refBufStride[wrapRef];
1207
0
        OFFSET( refBufPtr, refStride, puPosX + xInt + w, puPosY + yInt + h );
1208
1209
0
        Pel* dst;
1210
1211
0
        ptrdiff_t dstStride;
1212
1213
0
        if( enablePROF )
1214
0
        {
1215
0
          dst       = dstExtBuf.buf + 1 + dstExtBuf.stride;
1216
0
          dstStride = dstExtBuf.stride;
1217
0
        }
1218
0
        else
1219
0
        {
1220
0
          dst       = dstBuf.bufAt( w, h );
1221
0
          dstStride = dstBuf.stride;
1222
0
        }
1223
1224
0
        if( xFrac && yFrac )
1225
0
        {
1226
0
          m_if.filter4x4( compID, refBufPtr, refStride, dst, dstStride, 4, 4, xFrac, yFrac, isLast, chFmt, clpRng );
1227
0
        }
1228
0
        else if( yFrac == 0 )
1229
0
        {
1230
0
          m_if.filterHor( compID, refBufPtr, refStride, dst, dstStride, blockWidth, blockHeight, xFrac, isLast, chFmt, clpRng );
1231
0
        }
1232
0
        else// if( xFrac == 0 )
1233
0
        {
1234
0
          m_if.filterVer( compID, refBufPtr, refStride, dst, dstStride, blockWidth, blockHeight, yFrac, true, isLast, chFmt, clpRng );
1235
0
        }
1236
1237
0
        if( enablePROF )
1238
0
        {
1239
0
          const Pel shift   = std::max<int>( 2, ( IF_INTERNAL_PREC - clpRng.bd ) );
1240
0
          const int xOffset = xFrac >> 3;
1241
0
          const int yOffset = yFrac >> 3;
1242
1243
0
          CHECKD( shift < 0, "Shift need to be positive!" );
1244
0
          static_assert( PROF_BORDER_EXT_H == BIO_EXTEND_SIZE, "PROF and BIO extension need to be equal!" );
1245
0
          static_assert( PROF_BORDER_EXT_W == BIO_EXTEND_SIZE, "PROF and BIO extension need to be equal!" );
1246
1247
0
          const ptrdiff_t refOffset = ( blockHeight + 1 ) * refStride;
1248
0
          const ptrdiff_t dstOffset = ( blockHeight + 1 ) * dstStride;
1249
1250
0
          const Pel* refPel = refBufPtr - ( 1 - yOffset ) * refStride + xOffset - 1;
1251
0
                Pel* dstPel = dst - 1 - dstStride;
1252
1253
0
          PaddBIO( refPel,             dstPel,             blockWidth, shift );
1254
0
          PaddBIO( refPel + refOffset, dstPel + dstOffset, blockWidth, shift );
1255
1256
0
          refPel = refBufPtr + yOffset * refStride + xOffset;
1257
0
          dstPel = dst;
1258
0
          for( int ph = 0; ph < 4; ph++, refPel += refStride, dstPel += dstStride )
1259
0
          {
1260
0
            dstPel[        -1] = ( refPel[        -1] << shift ) - Pel( IF_INTERNAL_OFFS );
1261
0
            dstPel[blockWidth] = ( refPel[blockWidth] << shift ) - Pel( IF_INTERNAL_OFFS );
1262
0
          }
1263
1264
0
          profGradFilter( dst, dstStride, blockWidth, blockHeight, AFFINE_MIN_BLOCK_SIZE, gradX, gradY, clpRng.bd );
1265
1266
0
          Pel *dstY = dstBuf.buf + w + dstBuf.stride * h;
1267
0
          const Pel offset   = ( 1 << ( shift- 1 ) ) + IF_INTERNAL_OFFS;
1268
0
          applyPROF[bi]( dstY, dstBuf.stride, dst, gradX, gradY, dMvScaleHor, dMvScaleVer, shift, offset, clpRng );
1269
0
        }
1270
0
      }
1271
0
#if !CALC_AFFINE_MV_ON_THE_FLY
1272
1273
0
      INCX( lineMi, miStride );
1274
0
#endif
1275
0
    }
1276
0
#if !CALC_AFFINE_MV_ON_THE_FLY
1277
1278
0
    INCY( curMi, miStride );
1279
0
#endif
1280
0
  }
1281
0
}
1282
1283
void InterPrediction::applyBiOptFlow( const CodingUnit &cu,
1284
                                      const PelUnitBuf &    yuvSrc0,
1285
                                      const PelUnitBuf &    yuvSrc1,
1286
                                      const int &           refIdx0,
1287
                                      const int &           refIdx1,
1288
                                      PelUnitBuf &          yuvDst,
1289
                                      const BitDepths &     clipBitDepths )
1290
0
{
1291
0
  const int height  = yuvDst.Y().height;
1292
0
  const int width   = yuvDst.Y().width;
1293
0
  int       heightG = height + 2 * BIO_EXTEND_SIZE;
1294
0
  int       widthG  = width  + 2 * BIO_EXTEND_SIZE;
1295
1296
0
  Pel *gradX0 = m_gradX0;
1297
0
  Pel *gradX1 = m_gradX1;
1298
0
  Pel *gradY0 = m_gradY0;
1299
0
  Pel *gradY1 = m_gradY1;
1300
1301
0
  int        stridePredMC = width + BIO_ALIGN_SIZE;
1302
0
  const Pel *srcY0        = m_bdofBlock[0] + stridePredMC;
1303
0
  const Pel *srcY1        = m_bdofBlock[1] + stridePredMC;
1304
1305
0
  Pel *           dstY      = yuvDst.Y().buf;
1306
0
  const ptrdiff_t dstStride = yuvDst.Y().stride;
1307
1308
0
  const int       bitDepth  = clipBitDepths.recon;
1309
1310
0
  for( int refList = 0; refList < NUM_REF_PIC_LIST_01; refList++ )
1311
0
  {
1312
0
    Pel *dstTempPtr = m_bdofBlock[refList] + stridePredMC;
1313
0
    Pel *gradY      = ( refList == 0 ) ? m_gradY0 : m_gradY1;
1314
0
    Pel *gradX      = ( refList == 0 ) ? m_gradX0 : m_gradX1;
1315
0
    BioGradFilter( dstTempPtr, stridePredMC, widthG, heightG, width + BIO_ALIGN_SIZE, gradX, gradY, bitDepth );
1316
0
  }
1317
1318
0
  const ClpRng &clpRng   = cu.slice->clpRng( COMPONENT_Y );
1319
0
  const int     shiftNum = IF_INTERNAL_PREC + 1 - bitDepth;
1320
0
  const int     offset   = ( 1 << ( shiftNum - 1 ) ) + 2 * IF_INTERNAL_OFFS;
1321
0
  const int     limit    = ( 1 << 4 ) - 1;
1322
1323
1324
0
  BiOptFlow( srcY0,
1325
0
             srcY1,
1326
0
             gradX0,
1327
0
             gradX1,
1328
0
             gradY0,
1329
0
             gradY1,
1330
0
             width,
1331
0
             height,
1332
0
             dstY,
1333
0
             dstStride,
1334
0
             shiftNum,
1335
0
             offset,
1336
0
             limit,
1337
0
             clpRng,
1338
0
             bitDepth
1339
0
            );
1340
0
}
1341
1342
void InterPrediction::xWeightedAverage(const CodingUnit& cu, const PelUnitBuf& pcYuvSrc0, const PelUnitBuf& pcYuvSrc1, PelUnitBuf& pcYuvDst, const BitDepths& clipBitDepths, const ClpRngs& clpRngs, const bool& bioApplied )
1343
0
{
1344
0
  const int iRefIdx0 = cu.refIdx[0];
1345
0
  const int iRefIdx1 = cu.refIdx[1];
1346
1347
0
  CHECKD( !( iRefIdx0 >= 0 && iRefIdx1 >= 0 ), "xWeightedAverage should only be called for BI-predicted blocks!" );
1348
1349
0
  if( cu.BcwIdx() != BCW_DEFAULT && !cu.ciipFlag() )
1350
0
  {
1351
0
    CHECK( bioApplied, "Bcw is disallowed with BIO" );
1352
0
    pcYuvDst.addWeightedAvg( pcYuvSrc0, pcYuvSrc1, clpRngs, g_BcwInternBcw[cu.BcwIdx()] );
1353
0
    return;
1354
0
  }
1355
1356
0
  if( bioApplied )
1357
0
  {
1358
0
    applyBiOptFlow( cu, pcYuvSrc0, pcYuvSrc1, iRefIdx0, iRefIdx1, pcYuvDst, clipBitDepths );
1359
0
  }
1360
1361
0
  pcYuvDst.addAvg( pcYuvSrc0, pcYuvSrc1, clpRngs, bioApplied );
1362
0
}
1363
1364
1365
void InterPrediction::motionCompensation( CodingUnit &cu, PelUnitBuf &predBuf, const bool luma, const bool chroma )
1366
0
{
1367
0
  PROFILER_SCOPE_AND_STAGE_EXT( 1, g_timeProfiler, P_MOTCOMP, *cu.cs, luma ? CH_L: CH_C );
1368
0
  m_currCuArea = cu;
1369
1370
0
  if( cu.slice->getSliceType() != I_SLICE && cu.slice->getRefPic( REF_PIC_LIST_0, 0 )->subPictures.size() > 1 )
1371
0
  {
1372
0
    clipMv = clipMvInSubpic;
1373
0
  }
1374
0
  else
1375
0
  {
1376
0
    clipMv = clipMvInPic;
1377
0
  }
1378
1379
0
  if( CU::isIBC( cu ) )
1380
0
  {
1381
0
    CHECK( !luma, "IBC only for Chroma is not allowed." );
1382
0
    xIntraBlockCopy( cu, predBuf, COMPONENT_Y );
1383
0
    if( chroma )
1384
0
    {
1385
0
      xIntraBlockCopy( cu, predBuf, COMPONENT_Cb );
1386
0
      xIntraBlockCopy( cu, predBuf, COMPONENT_Cr );
1387
0
    }
1388
0
    return;
1389
0
  }
1390
1391
  // else, go with regular MC below
1392
0
  const PPS &pps            = *cu.pps;
1393
1394
0
  CHECKD( !cu.affineFlag() && cu.refIdx[0] >= 0 && cu.refIdx[1] >= 0 && ( cu.lwidth() + cu.lheight() == 12 ), "invalid 4x8/8x4 bi-predicted blocks" );
1395
0
  const WPScalingParam* wp0 = nullptr;
1396
0
  const WPScalingParam* wp1 = nullptr;
1397
0
  int refIdx0 = cu.refIdx[REF_PIC_LIST_0];
1398
0
  int refIdx1 = cu.refIdx[REF_PIC_LIST_1];
1399
0
  cu.slice->getWpScaling( REF_PIC_LIST_0, refIdx0, wp0 );
1400
0
  cu.slice->getWpScaling( REF_PIC_LIST_1, refIdx1, wp1 );
1401
0
  bool bioApplied    = false;
1402
0
  const Slice &slice = *cu.slice;
1403
1404
0
  if( cu.sps->getUseBIO() && ( !cu.cs->picHeader->getDisBdofFlag() ) )
1405
0
  {
1406
1407
0
    if( cu.affineFlag() || m_subPuMC || cu.ciipFlag() || cu.smvdMode() || ( cu.sps->getUseBcw() && cu.BcwIdx() != BCW_DEFAULT ) )
1408
0
    {
1409
0
      bioApplied = false;
1410
0
    }
1411
0
    else
1412
0
    {
1413
0
      const bool biocheck0 = !((wp0[COMPONENT_Y].bPresentFlag || wp0[COMPONENT_Cb].bPresentFlag || wp0[COMPONENT_Cr].bPresentFlag || wp1[COMPONENT_Y].bPresentFlag || wp1[COMPONENT_Cb].bPresentFlag || wp1[COMPONENT_Cr].bPresentFlag) && slice.getSliceType() == B_SLICE);
1414
0
      const bool biocheck1 = !( pps.getUseWP() && slice.getSliceType() == P_SLICE );
1415
1416
0
      if( biocheck0 && biocheck1 && PU::isBiPredFromDifferentDirEqDistPoc( cu ) && cu.Y().height >= 8 && cu.Y().width >= 8 && cu.Y().area() >= 128 )
1417
0
      {
1418
0
        bioApplied = true;
1419
0
      }
1420
0
    }
1421
0
  }
1422
    
1423
0
  bool dmvrApplied = !m_subPuMC && PU::checkDMVRCondition( cu );
1424
0
  bool refIsScaled = ( refIdx0 < 0 ? false : cu.slice->getRefPic( REF_PIC_LIST_0, refIdx0 )->isRefScaled( cu.pps ) ) ||
1425
0
                     ( refIdx1 < 0 ? false : cu.slice->getRefPic( REF_PIC_LIST_1, refIdx1 )->isRefScaled( cu.pps ) );
1426
1427
0
  dmvrApplied = dmvrApplied && !refIsScaled;
1428
0
  bioApplied  = bioApplied  && !refIsScaled;
1429
1430
0
  if( cu.mergeType() != MRG_TYPE_SUBPU_ATMVP && bioApplied && !dmvrApplied )
1431
0
  {
1432
0
    xSubPuBio( cu, predBuf );
1433
0
  }
1434
0
  else if( dmvrApplied )
1435
0
  {
1436
0
    cu.setDmvrCondition( true );
1437
0
    xProcessDMVR( cu, predBuf, slice.clpRngs(), bioApplied );
1438
0
  }
1439
0
  else if( cu.mergeType() == MRG_TYPE_SUBPU_ATMVP )
1440
0
  {
1441
0
    xSubPuMC( cu, predBuf );
1442
0
  }
1443
0
  else if( xCheckIdenticalMotion( cu ) )
1444
0
  {
1445
0
    xPredInterUni( cu, REF_PIC_LIST_0, predBuf, false, false , true, true );
1446
0
  }
1447
0
  else
1448
0
  {
1449
0
    CHECKD( bioApplied, "BIO should not be applied here!" );
1450
0
    xPredInterBi( cu, predBuf );
1451
0
  }
1452
0
}
1453
1454
void InterPrediction::motionCompensationGeo( CodingUnit &cu, PelUnitBuf &predBuf )
1455
0
{
1456
0
  PROFILER_SCOPE_AND_STAGE_EXT( 1, g_timeProfiler, P_MOTCOMP, *cu.cs, CH_L );
1457
1458
0
  if( cu.slice->getSliceType() != I_SLICE && cu.slice->getRefPic( REF_PIC_LIST_0, 0 )->subPictures.size() > 1 )
1459
0
  {
1460
0
    clipMv = clipMvInSubpic;
1461
0
  }
1462
0
  else
1463
0
  {
1464
0
    clipMv = clipMvInPic;
1465
0
  }
1466
1467
0
  const UnitArea localUnitArea( cu.cs->area.chromaFormat, Area( 0, 0, cu.lwidth(), cu.lheight() ) );
1468
1469
0
  PelUnitBuf tmpGeoBuf0 = isChromaEnabled( cu.chromaFormat ) ? PelUnitBuf( cu.chromaFormat, PelBuf( m_acYuvPred[0], localUnitArea.Y() ), PelBuf( m_acYuvPred[1], localUnitArea.Cb() ), PelBuf( m_acYuvPred[2], localUnitArea.Cr() ) ) : PelUnitBuf( cu.chromaFormat, PelBuf( m_acYuvPred[0], localUnitArea.Y() ) );
1470
1471
0
  uint8_t locInterDir = cu.interDirrefIdxGeo0() >> 4;
1472
0
  CHECKD( !( locInterDir == 1 || locInterDir == 2 ), "Should not happen" );
1473
0
  cu.mv  [REF_PIC_LIST_0][0] = locInterDir == 1 ? cu.mv[0][1] : Mv();
1474
0
  cu.mv  [REF_PIC_LIST_1][0] = locInterDir == 1 ? Mv() : cu.mv[0][1];
1475
0
  cu.refIdx [REF_PIC_LIST_0] = locInterDir == 1 ? cu.interDirrefIdxGeo0() & 15 : -1;
1476
0
  cu.refIdx [REF_PIC_LIST_1] = locInterDir == 1 ? -1 : cu.interDirrefIdxGeo0() & 15;
1477
0
  cu.mvpIdx [REF_PIC_LIST_0] = NOT_VALID;
1478
0
  cu.mvpIdx [REF_PIC_LIST_1] = NOT_VALID;
1479
0
  motionCompensation( cu, tmpGeoBuf0, true, isChromaEnabled( cu.chromaFormat ) );
1480
1481
0
  locInterDir = cu.interDirrefIdxGeo1() >> 4;
1482
0
  CHECKD( !( locInterDir == 1 || locInterDir == 2 ), "Should not happen" );
1483
0
  cu.mv  [REF_PIC_LIST_0][0] = locInterDir == 1 ? cu.mv[1][1] : Mv();
1484
0
  cu.mv  [REF_PIC_LIST_1][0] = locInterDir == 1 ? Mv() : cu.mv[1][1];
1485
0
  cu.refIdx [REF_PIC_LIST_0] = locInterDir == 1 ? cu.interDirrefIdxGeo1() & 15 : -1;
1486
0
  cu.refIdx [REF_PIC_LIST_1] = locInterDir == 1 ? -1 : cu.interDirrefIdxGeo1() & 15;
1487
0
  cu.mvpIdx [REF_PIC_LIST_0] = NOT_VALID;
1488
0
  cu.mvpIdx [REF_PIC_LIST_1] = NOT_VALID;
1489
0
  motionCompensation( cu, predBuf, true, isChromaEnabled( cu.chromaFormat ) );
1490
1491
0
  const uint8_t splitDir = cu.geoSplitDir;
1492
0
  weightedGeoBlk( cu, splitDir, isChromaEnabled( cu.chromaFormat ) ? MAX_NUM_CHANNEL_TYPE : CHANNEL_TYPE_LUMA, predBuf, tmpGeoBuf0, predBuf );
1493
0
}
1494
1495
void InterPrediction::weightedGeoBlk( CodingUnit &cu, const uint8_t splitDir, int32_t channel, PelUnitBuf& predDst, PelUnitBuf& predSrc0, PelUnitBuf& predSrc1)
1496
0
{
1497
0
  if( channel == CHANNEL_TYPE_LUMA )
1498
0
  {
1499
0
    m_if.weightedGeoBlk( cu, cu.lumaSize().width, cu.lumaSize().height, COMPONENT_Y, splitDir, predDst, predSrc0, predSrc1, cu.slice->clpRngs() );
1500
0
  }
1501
0
  else if( channel == CHANNEL_TYPE_CHROMA )
1502
0
  {
1503
0
    m_if.weightedGeoBlk( cu, cu.chromaSize().width, cu.chromaSize().height, COMPONENT_Cb, splitDir, predDst, predSrc0, predSrc1, cu.slice->clpRngs() );
1504
0
    m_if.weightedGeoBlk( cu, cu.chromaSize().width, cu.chromaSize().height, COMPONENT_Cr, splitDir, predDst, predSrc0, predSrc1, cu.slice->clpRngs() );
1505
0
  }
1506
0
  else
1507
0
  {
1508
0
    m_if.weightedGeoBlk( cu, cu.lumaSize().width,   cu.lumaSize().height,   COMPONENT_Y,  splitDir, predDst, predSrc0, predSrc1, cu.slice->clpRngs() );
1509
0
    if( isChromaEnabled( cu.chromaFormat ) )
1510
0
    {
1511
0
      m_if.weightedGeoBlk( cu, cu.chromaSize().width, cu.chromaSize().height, COMPONENT_Cb, splitDir, predDst, predSrc0, predSrc1, cu.slice->clpRngs() );
1512
0
      m_if.weightedGeoBlk( cu, cu.chromaSize().width, cu.chromaSize().height, COMPONENT_Cr, splitDir, predDst, predSrc0, predSrc1, cu.slice->clpRngs() );
1513
0
    }
1514
0
  }
1515
0
}
1516
1517
1518
void InterPrediction::xPrefetchPad( CodingUnit& cu, PelUnitBuf &pcPad, RefPicList refId, bool forLuma )
1519
0
{
1520
0
  int width, height;
1521
0
  Mv cMv;
1522
1523
0
  const Picture* refPic = cu.slice->getRefPic( refId, cu.refIdx[refId] );
1524
1525
0
  static constexpr int mvShift = MV_FRACTIONAL_BITS_INTERNAL;
1526
1527
0
  const bool wrapRefEnbld = refPic->isWrapAroundEnabled( cu.pps );
1528
0
  const bool subPicAsPic  = cu.pps->getNumSubPics() > 1 && cu.pps->getSubPicFromCU( cu ).getTreatedAsPicFlag();
1529
1530
0
  const ChannelType chType = forLuma ? CHANNEL_TYPE_LUMA : CHANNEL_TYPE_CHROMA;
1531
1532
0
  int filtersize = isLuma( chType ) ? NTAPS_LUMA : NTAPS_CHROMA;
1533
0
  cMv = cu.mv[refId][0];
1534
  
1535
0
  const int mvshiftTempHor        = mvShift + getChannelTypeScaleX( chType, cu.chromaFormat );
1536
0
  const int mvshiftTempVer        = mvShift + getChannelTypeScaleY( chType, cu.chromaFormat );
1537
0
  cMv                      += Mv( -( ( ( filtersize >> 1 ) - 1 ) << mvshiftTempHor ),
1538
0
                                  -( ( ( filtersize >> 1 ) - 1 ) << mvshiftTempVer ) );
1539
0
  bool wrapRef = false;
1540
1541
0
  if( wrapRefEnbld )
1542
0
  {
1543
0
    wrapRef = wrapClipMv( cMv, cu.lumaPos(), cu.lumaSize(), *cu.sps, *cu.pps );
1544
0
  }
1545
0
  else
1546
0
  {
1547
0
    clipMv( cMv, cu.lumaPos(), cu.lumaSize(), *cu.sps, *cu.pps );
1548
0
  }
1549
1550
0
  cMv.hor >>= mvshiftTempHor;
1551
0
  cMv.ver >>= mvshiftTempVer;
1552
1553
0
  if( isLuma( chType ) )
1554
0
  {
1555
0
    pcPad.bufs[COMPONENT_Y]
1556
0
      .stride  = pcPad.bufs[COMPONENT_Y].width + ( 2 * DMVR_NUM_ITERATION ) + filtersize;
1557
0
    width      = pcPad.bufs[COMPONENT_Y].width;
1558
0
    height     = pcPad.bufs[COMPONENT_Y].height;
1559
0
    ptrdiff_t
1560
0
      offset   = DMVR_NUM_ITERATION * ( pcPad.bufs[COMPONENT_Y].stride + 1 );
1561
1562
0
    width      += filtersize - 1;
1563
0
    height     += filtersize - 1;
1564
1565
0
    CPelBuf refBuf = subPicAsPic ? refPic->getSubPicBuf( cu.pps->getSubPicFromCU( cu ).getSubPicIdx(), COMPONENT_Y, wrapRef ) : refPic->getRecoBuf( COMPONENT_Y, wrapRef );
1566
1567
0
    Position   Rec_offset = cu.lumaPos().offset( cMv.hor, cMv.ver );
1568
0
    const Pel* refBufPtr  = refBuf.bufAt( Rec_offset );
1569
1570
0
    PelBuf& dstBuf = pcPad.Y();
1571
0
    prefetchPad[0]( refBufPtr, refBuf.stride, dstBuf.buf + offset, dstBuf.stride, width, height );
1572
0
  }
1573
0
  else
1574
0
  {
1575
0
    pcPad.bufs[COMPONENT_Cb]
1576
0
      .stride  = pcPad.bufs[COMPONENT_Cb].width + ( 2 * DMVR_NUM_ITERATION ) + filtersize;
1577
0
    pcPad.bufs[COMPONENT_Cr]
1578
0
      .stride  = pcPad.bufs[COMPONENT_Cb].stride;
1579
0
    width      = pcPad.bufs[COMPONENT_Cb].width;
1580
0
    height     = pcPad.bufs[COMPONENT_Cb].height;
1581
0
    ptrdiff_t
1582
0
      offsetCb = DMVR_NUM_ITERATION * ( pcPad.bufs[COMPONENT_Cb].stride + 1 );
1583
0
    ptrdiff_t
1584
0
      offsetCr = DMVR_NUM_ITERATION * ( pcPad.bufs[COMPONENT_Cr].stride + 1 );
1585
1586
0
    width      += filtersize - 1;
1587
0
    height     += filtersize - 1;
1588
1589
0
    CPelBuf refBufCb = subPicAsPic ? refPic->getSubPicBuf( cu.pps->getSubPicFromCU( cu ).getSubPicIdx(), COMPONENT_Cb, wrapRef ) : refPic->getRecoBuf( COMPONENT_Cb, wrapRef );
1590
0
    CPelBuf refBufCr = subPicAsPic ? refPic->getSubPicBuf( cu.pps->getSubPicFromCU( cu ).getSubPicIdx(), COMPONENT_Cr, wrapRef ) : refPic->getRecoBuf( COMPONENT_Cr, wrapRef );
1591
1592
0
    Position   Rec_offset     = cu.blocks[COMPONENT_Cb].pos().offset( cMv.hor, cMv.ver );
1593
0
    const Pel* refBufPtr  [2] = { refBufCb.bufAt( Rec_offset ), refBufCr.bufAt( Rec_offset ) };
1594
0
    const ptrdiff_t stride[2] = { refBufCb.stride, refBufCr.stride };
1595
0
          Pel* dstBufPtr  [2] = { pcPad.Cb().buf + offsetCb, pcPad.Cr().buf + offsetCr };
1596
0
    const ptrdiff_t dstStr[2] = { pcPad.Cb().stride, pcPad.Cr().stride };
1597
1598
0
    const int idx = getChannelTypeScaleY( CH_C, cu.chromaFormat );
1599
1600
0
    prefetchPad[1+idx]( refBufPtr[0], stride[0], dstBufPtr[0], dstStr[0], width, height );
1601
0
    prefetchPad[1+idx]( refBufPtr[1], stride[1], dstBufPtr[1], dstStr[1], width, height );
1602
0
  }
1603
0
}
1604
1605
inline int32_t div_for_maxq7(int64_t N, int64_t D)
1606
0
{
1607
0
  int32_t sign, q;
1608
0
  sign = 0;
1609
0
  if (N < 0)
1610
0
  {
1611
0
    sign = 1;
1612
0
    N = -N;
1613
0
  }
1614
1615
0
  q = 0;
1616
0
  D = (D << 3);
1617
0
  if (N >= D)
1618
0
  {
1619
0
    N -= D;
1620
0
    q++;
1621
0
  }
1622
0
  q = (q << 1);
1623
1624
0
  D = (D >> 1);
1625
0
  if (N >= D)
1626
0
  {
1627
0
    N -= D;
1628
0
    q++;
1629
0
  }
1630
0
  q = (q << 1);
1631
1632
0
  if (N >= (D >> 1))
1633
0
    q++;
1634
1635
0
  if (sign)
1636
0
    return (-q);
1637
0
  return(q);
1638
0
}
1639
1640
void xSubPelErrorSrfc(uint64_t *sadBuffer, int32_t *deltaMv)
1641
0
{
1642
0
  int64_t numerator, denominator;
1643
0
  int32_t mvDeltaSubPel;
1644
0
  int32_t mvSubPelLvl = 4;/*1: half pel, 2: Qpel, 3:1/8, 4: 1/16*/
1645
                                                        /*horizontal*/
1646
0
    numerator = (int64_t)((sadBuffer[1] - sadBuffer[3]) << mvSubPelLvl);
1647
0
    denominator = (int64_t)((sadBuffer[1] + sadBuffer[3] - (sadBuffer[0] << 1)));
1648
1649
0
    if (0 != denominator)
1650
0
    {
1651
0
      if ((sadBuffer[1] != sadBuffer[0]) && (sadBuffer[3] != sadBuffer[0]))
1652
0
      {
1653
0
        mvDeltaSubPel = div_for_maxq7(numerator, denominator);
1654
0
        deltaMv[0] = (mvDeltaSubPel);
1655
0
      }
1656
0
      else
1657
0
      {
1658
0
        if (sadBuffer[1] == sadBuffer[0])
1659
0
        {
1660
0
          deltaMv[0] = -8;// half pel
1661
0
        }
1662
0
        else
1663
0
        {
1664
0
          deltaMv[0] = 8;// half pel
1665
0
        }
1666
0
      }
1667
0
    }
1668
1669
    /*vertical*/
1670
0
    numerator = (int64_t)((sadBuffer[2] - sadBuffer[4]) << mvSubPelLvl);
1671
0
    denominator = (int64_t)((sadBuffer[2] + sadBuffer[4] - (sadBuffer[0] << 1)));
1672
0
    if (0 != denominator)
1673
0
    {
1674
0
      if ((sadBuffer[2] != sadBuffer[0]) && (sadBuffer[4] != sadBuffer[0]))
1675
0
      {
1676
0
        mvDeltaSubPel = div_for_maxq7(numerator, denominator);
1677
0
        deltaMv[1] = (mvDeltaSubPel);
1678
0
      }
1679
0
      else
1680
0
      {
1681
0
        if (sadBuffer[2] == sadBuffer[0])
1682
0
        {
1683
0
          deltaMv[1] = -8;// half pel
1684
0
        }
1685
0
        else
1686
0
        {
1687
0
          deltaMv[1] = 8;// half pel
1688
0
        }
1689
0
      }
1690
0
    }
1691
1692
0
  return;
1693
0
}
1694
1695
void InterPrediction::xBIPMVRefine( DistParam &cDistParam, const Pel *pRefL0, const Pel *pRefL1, Distortion& minCost, int16_t *deltaMV, Distortion *pSADsArray)
1696
0
{
1697
0
  const ptrdiff_t refStride = m_biLinearBufStride;
1698
1699
0
  const Pel *pRefL0Orig = pRefL0;
1700
0
  const Pel *pRefL1Orig = pRefL1;
1701
1702
0
  for (int ver = -2; ver <= 2; ver++) {
1703
0
    const int initHor = -2;
1704
0
    const ptrdiff_t offset = initHor + ver * refStride;
1705
0
    pRefL0 = pRefL0Orig + offset;
1706
0
    pRefL1 = pRefL1Orig - offset;
1707
0
    cDistParam.org.buf = pRefL0;
1708
0
    cDistParam.cur.buf = pRefL1;
1709
1710
0
    cDistParam.distFuncX5(cDistParam, pSADsArray, ver != 0);
1711
1712
0
    for (int hor = -2; hor <= 2; hor++, pSADsArray++) {
1713
0
      Distortion cost = *pSADsArray;
1714
1715
0
      if (cost < minCost) {
1716
0
        minCost = cost;
1717
0
        deltaMV[0] = hor;
1718
0
        deltaMV[1] = ver;
1719
0
      }
1720
0
    }
1721
0
  }
1722
0
}
1723
1724
void InterPrediction::xFinalPaddedMCForDMVR(CodingUnit& cu, PelUnitBuf &pcYuvSrc0, PelUnitBuf &pcYuvSrc1, PelUnitBuf &pcPad0, PelUnitBuf &pcPad1, const bool bioApplied, const Mv mergeMV[NUM_REF_PIC_LIST_01] )
1725
0
{
1726
0
  ptrdiff_t offset;
1727
0
  int deltaIntMvX, deltaIntMvY;
1728
1729
  /*always high precision MVs are used*/
1730
0
  const int mvShift      = MV_FRACTIONAL_BITS_INTERNAL;
1731
0
  const ClpRngs clp      = cu.slice->clpRngs();
1732
0
  const int numValidComp = getNumberValidComponents( cu.chromaFormat );
1733
1734
0
  for (int k = 0; k < NUM_REF_PIC_LIST_01; k++)
1735
0
  {
1736
0
    PelUnitBuf &pcYUVTemp = k == 0 ? pcYuvSrc0 : pcYuvSrc1;
1737
0
    PelUnitBuf &pcPadTemp = k == 0 ? pcPad0    : pcPad1;
1738
1739
0
    RefPicList refId = (RefPicList)k;
1740
0
    Mv cMv = cu.mv[refId][0];
1741
0
    m_iRefListIdx = refId;
1742
0
    const Picture* refPic = cu.slice->getRefPic( refId, cu.refIdx[refId] );
1743
0
    Mv cMvClipped( cMv );
1744
0
    clipMv( cMvClipped, cu.lumaPos(), cu.lumaSize(), *cu.sps, *cu.pps );
1745
0
    const bool wrapRef = cu.pps->getUseWrapAround() && wrapClipMv( cMvClipped, cu.lumaPos(), cu.lumaSize(), *cu.sps, *cu.pps );
1746
1747
0
    Mv startMv = mergeMV[refId];
1748
1749
0
    for( int compID = 0; compID < numValidComp; compID++ )
1750
0
    {
1751
0
      const int mvshiftTempHor = mvShift + getComponentScaleX( (ComponentID)compID, cu.chromaFormat );
1752
0
      const int mvshiftTempVer = mvShift + getComponentScaleY( (ComponentID)compID, cu.chromaFormat );
1753
0
      deltaIntMvX = ( cMv.getHor() >> mvshiftTempHor ) - ( startMv.getHor() >> mvshiftTempHor );
1754
0
      deltaIntMvY = ( cMv.getVer() >> mvshiftTempVer ) - ( startMv.getVer() >> mvshiftTempVer );
1755
1756
0
      if( deltaIntMvX || deltaIntMvY )
1757
0
      {
1758
0
        ptrdiff_t pcPadstride       = pcPadTemp.bufs[compID].stride;
1759
0
        const int leftPixelExtra    = compID == COMPONENT_Y ? ( NTAPS_LUMA >> 1 ) - 1 : ( NTAPS_CHROMA >> 1 ) - 1;
1760
1761
0
        CHECKD( ( abs( deltaIntMvX ) > DMVR_NUM_ITERATION ) || ( abs( deltaIntMvY ) > DMVR_NUM_ITERATION ), "not expected DMVR movement" );
1762
1763
0
        offset  = ( DMVR_NUM_ITERATION + leftPixelExtra ) * ( pcPadstride + 1 );
1764
0
        offset += ( deltaIntMvY ) * pcPadstride;
1765
0
        offset += ( deltaIntMvX );
1766
0
        Pel *srcBufPelPtr = pcPadTemp.bufs[compID].buf + offset;
1767
1768
0
        xPredInterBlk<true , false>( ComponentID( compID ), cu, refPic, cMvClipped, pcYUVTemp.bufs[compID], true, clp, bioApplied, false, wrapRef, 0, 0, 0, srcBufPelPtr, pcPadstride );
1769
0
      }
1770
0
      else
1771
0
      {
1772
0
        xPredInterBlk<false, false>( ComponentID( compID ), cu, refPic, cMvClipped, pcYUVTemp.bufs[compID], true, clp, bioApplied, false, wrapRef, 0, 0, 0 );
1773
0
      }
1774
0
    }
1775
0
  }
1776
0
}
1777
1778
void xDMVRSubPixelErrorSurface( int16_t *totalDeltaMV, int16_t *deltaMV, Distortion*pSADsArray )
1779
0
{
1780
0
  static constexpr int sadStride = ( ( ( 2 * DMVR_NUM_ITERATION ) + 1 ) );
1781
0
  uint64_t sadbuffer[5];
1782
1783
0
  if( abs( totalDeltaMV[0] ) != ( 2 << MV_FRACTIONAL_BITS_INTERNAL ) && abs( totalDeltaMV[1] ) != ( 2 << MV_FRACTIONAL_BITS_INTERNAL ) )
1784
0
  {
1785
0
    int32_t tempDeltaMv[2] = { 0,0 };
1786
0
    sadbuffer[0] = pSADsArray[0];
1787
0
    sadbuffer[1] = pSADsArray[-1];
1788
0
    sadbuffer[2] = pSADsArray[-sadStride];
1789
0
    sadbuffer[3] = pSADsArray[1];
1790
0
    sadbuffer[4] = pSADsArray[sadStride];
1791
0
    xSubPelErrorSrfc(sadbuffer, tempDeltaMv);
1792
0
    totalDeltaMV[0] += tempDeltaMv[0];
1793
0
    totalDeltaMV[1] += tempDeltaMv[1];
1794
0
  }
1795
0
}
1796
1797
void InterPrediction::xinitMC( CodingUnit& cu, const ClpRngs &clpRngs )
1798
0
{
1799
  /*use merge MV as starting MV*/
1800
0
  Mv mergeMVL0(cu.mv[REF_PIC_LIST_0][0]);
1801
0
  Mv mergeMVL1(cu.mv[REF_PIC_LIST_1][0]);
1802
1803
  /*Clip the starting MVs*/
1804
0
  clipMv( mergeMVL0, cu.lumaPos(), cu.lumaSize(), *cu.sps, *cu.pps );
1805
0
  clipMv( mergeMVL1, cu.lumaPos(), cu.lumaSize(), *cu.sps, *cu.pps );
1806
1807
0
  const bool wrapRefL0 = cu.pps->getUseWrapAround() && wrapClipMv( mergeMVL0, cu.lumaPos(), cu.lumaSize(), *cu.sps, *cu.pps );
1808
0
  const bool wrapRefL1 = cu.pps->getUseWrapAround() && wrapClipMv( mergeMVL1, cu.lumaPos(), cu.lumaSize(), *cu.sps, *cu.pps );
1809
1810
0
  static constexpr int sizeExt = DMVR_NUM_ITERATION << 1;
1811
1812
0
  const int extWidth  = cu.lwidth()  + sizeExt;
1813
0
  const int extHeight = cu.lheight() + sizeExt;
1814
1815
  /*L0 MC for refinement*/
1816
0
  {
1817
0
    const Picture* refPic = cu.slice->getRefPic( L0, cu.refIdx[L0] );
1818
1819
0
    PelBuf yuvPredTempL0( m_cYuvPredTempDMVRL0, m_biLinearBufStride, extWidth, extHeight );
1820
1821
0
    mergeMVL0.hor -= ( DMVR_NUM_ITERATION << MV_FRACTIONAL_BITS_INTERNAL );
1822
0
    mergeMVL0.ver -= ( DMVR_NUM_ITERATION << MV_FRACTIONAL_BITS_INTERNAL );
1823
1824
0
    xPredInterBlk<false, true>( COMPONENT_Y, cu, refPic, mergeMVL0, yuvPredTempL0, true, clpRngs, false, false, wrapRefL0, extWidth, extHeight, true );
1825
0
  }
1826
1827
  /*L1 MC for refinement*/
1828
0
  {
1829
0
    const Picture* refPic = cu.slice->getRefPic( L1, cu.refIdx[L1] );
1830
1831
0
    PelBuf yuvPredTempL1( m_cYuvPredTempDMVRL1, m_biLinearBufStride, extWidth, extHeight );
1832
1833
0
    mergeMVL1.hor -= ( DMVR_NUM_ITERATION << MV_FRACTIONAL_BITS_INTERNAL );
1834
0
    mergeMVL1.ver -= ( DMVR_NUM_ITERATION << MV_FRACTIONAL_BITS_INTERNAL );
1835
1836
0
    xPredInterBlk<false, true>( COMPONENT_Y, cu, refPic, mergeMVL1, yuvPredTempL1, true, clpRngs, false, false, wrapRefL1, extWidth, extHeight, true );
1837
0
  }
1838
0
}
1839
1840
void InterPrediction::xProcessDMVR( CodingUnit& cu, PelUnitBuf &pcYuvDst, const ClpRngs &clpRngs, const bool bioApplied )
1841
0
{
1842
  /*Always High Precision*/
1843
0
  static constexpr int mvShift  = MV_FRACTIONAL_BITS_INTERNAL;
1844
0
         const     int mvShiftX = mvShift + getChannelTypeScaleX( CH_C, cu.chromaFormat );
1845
0
         const     int mvShiftY = mvShift + getChannelTypeScaleY( CH_C, cu.chromaFormat );
1846
1847
  /*use merge MV as starting MV*/
1848
0
  Mv mergeMv[] = { cu.mv[REF_PIC_LIST_0][0] , cu.mv[REF_PIC_LIST_1][0] };
1849
1850
0
  m_biLinearBufStride = ( cu.lwidth() + ( 2 * DMVR_NUM_ITERATION ) );
1851
1852
0
  xinitMC( cu, clpRngs );
1853
1854
0
  int dy = std::min<int>( cu.lumaSize().height, DMVR_SUBCU_HEIGHT );
1855
0
  int dx = std::min<int>( cu.lumaSize().width,  DMVR_SUBCU_WIDTH );
1856
1857
0
  Position puPos = cu.lumaPos();
1858
0
  BitDepths bds  = cu.sps->getBitDepths();
1859
1860
0
  int  bioEnabledThres = ( 2 * dy * dx );
1861
0
  bool bioAppliedSubblk;
1862
0
  {
1863
0
    int num = 0;
1864
    
1865
0
    int scaleX = getComponentScaleX( COMPONENT_Cb, cu.chromaFormat );
1866
0
    int scaleY = getComponentScaleY( COMPONENT_Cb, cu.chromaFormat );
1867
    // point mc buffer to cetre point to avoid multiplication to reach each iteration to the begining
1868
0
    Pel *biLinearPredL0 = m_cYuvPredTempDMVRL0 + ( DMVR_NUM_ITERATION * m_biLinearBufStride ) + DMVR_NUM_ITERATION;
1869
0
    Pel *biLinearPredL1 = m_cYuvPredTempDMVRL1 + ( DMVR_NUM_ITERATION * m_biLinearBufStride ) + DMVR_NUM_ITERATION;
1870
    
1871
0
    CodingUnit      subCu;
1872
0
    CodingUnit& subPu = subCu;
1873
1874
0
    subPu                   = cu;
1875
0
    subPu.UnitArea::operator=( UnitArea( cu.chromaFormat, Area( puPos.x, puPos.y, dx, dy ) ) );
1876
1877
0
    PelUnitBuf cYuvRefBuffDMVRL0 = isChromaEnabled( cu.chromaFormat ) ? PelUnitBuf( cu.chromaFormat, PelBuf( m_cRefSamplesDMVRL0[0], subPu.Y() ), PelBuf( m_cRefSamplesDMVRL0[1], subPu.Cb() ), PelBuf( m_cRefSamplesDMVRL0[2], subPu.Cr() ) ) : PelUnitBuf( cu.chromaFormat, PelBuf( m_cRefSamplesDMVRL0[0], subPu.Y() ) );
1878
0
    PelUnitBuf cYuvRefBuffDMVRL1 = isChromaEnabled( cu.chromaFormat ) ? PelUnitBuf( cu.chromaFormat, PelBuf( m_cRefSamplesDMVRL1[0], subPu.Y() ), PelBuf( m_cRefSamplesDMVRL1[1], subPu.Cb() ), PelBuf( m_cRefSamplesDMVRL1[2], subPu.Cr() ) ) : PelUnitBuf( cu.chromaFormat, PelBuf( m_cRefSamplesDMVRL1[0], subPu.Y() ) );
1879
1880
0
    PelUnitBuf srcPred1 = isChromaEnabled( cu.chromaFormat ) ? PelUnitBuf( cu.chromaFormat, PelBuf( m_acYuvPred[0], subPu.Y() ), PelBuf( m_acYuvPred[1], subPu.Cb() ), PelBuf( m_acYuvPred[2], subPu.Cr() ) ) : PelUnitBuf( cu.chromaFormat, PelBuf( m_acYuvPred[0], subPu.Y() ) );
1881
1882
0
    DistParam cDistParam;
1883
0
    m_pcRdCost->setDistParam( cDistParam, nullptr, nullptr, m_biLinearBufStride, m_biLinearBufStride, clpRngs.bd, dx, dy, 1 );
1884
    
1885
0
    PelUnitBuf subPredBuf = pcYuvDst.subBuf( UnitAreaRelative( cu, subPu ) );
1886
0
    const ptrdiff_t dstStride[MAX_NUM_COMPONENT] = { pcYuvDst.bufs[COMPONENT_Y].stride,
1887
0
                                                     isChromaEnabled(cu.chromaFormat) ? pcYuvDst.bufs[COMPONENT_Cb].stride : 0,
1888
0
                                                     isChromaEnabled(cu.chromaFormat) ? pcYuvDst.bufs[COMPONENT_Cr].stride : 0};
1889
0
    for( int y = puPos.y, yStart = 0; y < ( puPos.y + cu.lumaSize().height ); y = y + dy, yStart = yStart + dy )
1890
0
    {
1891
0
      for( int x = puPos.x, xStart = 0; x < ( puPos.x + cu.lumaSize().width ); x = x + dx, xStart = xStart + dx )
1892
0
      {
1893
0
        subPu.mv[0][0]    = cu.mv[0][0]; subPu.mv[1][0]    = cu.mv[1][0];
1894
0
        subPu.blocks[0].x = x;           subPu.blocks[0].y = y;
1895
0
        if( isChromaEnabled( subPu.chromaFormat ) )
1896
0
        {
1897
0
          subPu.blocks[1].x = x >> scaleX; subPu.blocks[1].y = y >> scaleY;
1898
0
          subPu.blocks[2].x = x >> scaleX; subPu.blocks[2].y = y >> scaleY;
1899
0
        }
1900
1901
0
        subPredBuf.bufs[COMPONENT_Y].buf    = pcYuvDst.bufs[COMPONENT_Y].buf  +   xStart +                 yStart             * dstStride[COMPONENT_Y];
1902
1903
0
        if( isChromaEnabled( cu.chromaFormat ) )
1904
0
        {
1905
0
          subPredBuf.bufs[COMPONENT_Cb].buf = pcYuvDst.bufs[COMPONENT_Cb].buf + ( xStart >> scaleX ) + ( ( yStart >> scaleY ) * dstStride[COMPONENT_Cb] );
1906
0
          subPredBuf.bufs[COMPONENT_Cr].buf = pcYuvDst.bufs[COMPONENT_Cr].buf + ( xStart >> scaleX ) + ( ( yStart >> scaleY ) * dstStride[COMPONENT_Cr] );
1907
0
        }
1908
1909
0
        Distortion *pSADsArray = &m_SADsArray[( ( ( 2 * DMVR_NUM_ITERATION ) + 1 ) * ( ( 2 * DMVR_NUM_ITERATION ) + 1 ) ) >> 1];
1910
1911
0
        Pel *biPredSubPuL0 = biLinearPredL0 + xStart + yStart * m_biLinearBufStride;
1912
0
        Pel *biPredSubPuL1 = biLinearPredL1 + xStart + yStart * m_biLinearBufStride;
1913
1914
0
        cDistParam.cur.buf = biPredSubPuL0;
1915
0
        cDistParam.org.buf = biPredSubPuL1;
1916
1917
0
        Distortion minCost = cDistParam.distFunc( cDistParam );
1918
1919
0
        minCost >>= 1;
1920
0
        minCost  -= ( minCost >> 2 );
1921
1922
0
        if( minCost < ( dx * dy ) )
1923
0
        {
1924
0
          Mv &curDMv = cu.cs->m_dmvrMvCache[cu.mvdL0SubPuOff + num];
1925
0
          curDMv = Mv( 0, 0 );
1926
0
        }
1927
0
        else
1928
0
        {
1929
0
          int16_t totalDeltaMV[2] = { 0, 0 };
1930
0
          int16_t deltaMV[2]      = { 0, 0 };
1931
1932
0
          pSADsArray[0] = minCost;
1933
1934
0
          xBIPMVRefine( cDistParam, biPredSubPuL0, biPredSubPuL1, minCost, deltaMV, m_SADsArray );
1935
1936
0
          if( deltaMV[0] != 0 || deltaMV[1] != 0 )
1937
0
          {
1938
0
            pSADsArray += deltaMV[1] * ( 2 * DMVR_NUM_ITERATION + 1 ) + deltaMV[0];
1939
0
          }
1940
1941
0
          totalDeltaMV[0] = deltaMV[0] * ( 1 << mvShift );
1942
0
          totalDeltaMV[1] = deltaMV[1] * ( 1 << mvShift );
1943
1944
0
          xDMVRSubPixelErrorSurface( totalDeltaMV, deltaMV, pSADsArray );
1945
1946
0
          Mv &curDMv = cu.cs->m_dmvrMvCache[cu.mvdL0SubPuOff + num];
1947
0
          curDMv = Mv( totalDeltaMV[0], totalDeltaMV[1] );
1948
1949
0
          Mv mv0 = mergeMv[REF_PIC_LIST_0] + curDMv; mv0.clipToStorageBitDepth();
1950
0
          Mv mv1 = mergeMv[REF_PIC_LIST_1] - curDMv; mv1.clipToStorageBitDepth();
1951
1952
0
          if( ( mv0.hor >> mvShift ) != ( mergeMv[0].hor >> mvShift ) || ( mv0.ver >> mvShift ) != ( mergeMv[0].ver >> mvShift ) )
1953
0
          {
1954
0
            xPrefetchPad( subPu, cYuvRefBuffDMVRL0, REF_PIC_LIST_0, true );
1955
0
          }
1956
1957
0
          if( isChromaEnabled( cu.chromaFormat ) && ( ( mv0.hor >> mvShiftX ) != ( mergeMv[0].hor >> mvShiftX ) || ( mv0.ver >> mvShiftY ) != ( mergeMv[0].ver >> mvShiftY ) ) )
1958
0
          {
1959
0
            xPrefetchPad( subPu, cYuvRefBuffDMVRL0, REF_PIC_LIST_0, false );
1960
0
          }
1961
1962
0
          if( ( mv1.hor >> mvShift ) != ( mergeMv[1].hor >> mvShift ) || ( mv1.ver >> mvShift ) != ( mergeMv[1].ver >> mvShift ) )
1963
0
          {
1964
0
            xPrefetchPad( subPu, cYuvRefBuffDMVRL1, REF_PIC_LIST_1, true );
1965
0
          }
1966
0
          if( isChromaEnabled( cu.chromaFormat ) && ( ( mv1.hor >> mvShiftX ) != ( mergeMv[1].hor >> mvShiftX ) || ( mv1.ver >> mvShiftY ) != ( mergeMv[1].ver >> mvShiftY ) ) )
1967
0
          {
1968
0
            xPrefetchPad( subPu, cYuvRefBuffDMVRL1, REF_PIC_LIST_1, false );
1969
0
          }
1970
1971
0
          subPu.mv[0][0] = mv0;
1972
0
          subPu.mv[1][0] = mv1;
1973
0
        }
1974
1975
0
        PelUnitBuf& srcPred0 = subPredBuf;
1976
1977
0
        bioAppliedSubblk = minCost < bioEnabledThres ? false : bioApplied;
1978
1979
0
        xFinalPaddedMCForDMVR( subPu, srcPred0, srcPred1, cYuvRefBuffDMVRL0, cYuvRefBuffDMVRL1, bioAppliedSubblk, mergeMv );
1980
0
        xWeightedAverage     ( subPu, srcPred0, srcPred1, subPredBuf, bds, clpRngs, bioAppliedSubblk );
1981
1982
0
        num++;
1983
0
      }
1984
0
    }
1985
0
  }
1986
0
}
1987
1988
void InterPrediction::xIntraBlockCopy( CodingUnit &cu, PelUnitBuf &predBuf, const ComponentID compID )
1989
0
{
1990
0
  const unsigned int lcuWidth = cu.sps->getMaxCUWidth();
1991
0
  const int shiftSampleHor = getComponentScaleX( compID, cu.chromaFormat );
1992
0
  const int shiftSampleVer = getComponentScaleY( compID, cu.chromaFormat );
1993
0
  const int ctuSizeVerLog2 = getLog2(lcuWidth) - shiftSampleVer;
1994
0
  Mv bv = cu.mv[REF_PIC_LIST_0][0];
1995
0
  bv.changePrecision(MV_PRECISION_INTERNAL, MV_PRECISION_INT);
1996
0
  int refx, refy;
1997
0
  if (compID == COMPONENT_Y)
1998
0
  {
1999
0
    refx = cu.Y().x + bv.hor;
2000
0
    refy = cu.Y().y + bv.ver;
2001
0
  }
2002
0
  else
2003
0
  {//Cb or Cr
2004
0
    refx = cu.Cb().x + (bv.hor >> shiftSampleHor);
2005
0
    refy = cu.Cb().y + (bv.ver >> shiftSampleVer);
2006
0
  }
2007
0
  refx &= ((m_IBCBufferWidth >> shiftSampleHor) - 1);
2008
0
  refy &= ((1 << ctuSizeVerLog2) - 1);
2009
2010
0
  int lineIdx = cu.lumaPos().y / cu.slice->getSPS()->getMaxCUHeight();
2011
  
2012
0
  if (refx + predBuf.bufs[compID].width <= (m_IBCBufferWidth >> shiftSampleHor))
2013
0
  {
2014
0
    const CompArea srcArea = CompArea(compID, Position(refx, refy), Size(predBuf.bufs[compID].width, predBuf.bufs[compID].height));
2015
0
    const CPelBuf refBuf = cu.cs->m_virtualIBCbuf[lineIdx].getBuf( srcArea );   //m_IBCBuffer.getBuf(srcArea);
2016
0
    predBuf.bufs[compID].copyFrom(refBuf);
2017
0
  }
2018
0
  else
2019
0
  {//wrap around
2020
0
    int width = (m_IBCBufferWidth >> shiftSampleHor) - refx;
2021
0
    CompArea srcArea = CompArea(compID, Position(refx, refy), Size(width, predBuf.bufs[compID].height));
2022
0
    CPelBuf srcBuf = cu.cs->m_virtualIBCbuf[lineIdx].getBuf( srcArea );   //m_IBCBuffer.getBuf(srcArea);
2023
0
    PelBuf dstBuf = PelBuf(predBuf.bufs[compID].bufAt(Position(0, 0)), predBuf.bufs[compID].stride, Size(width, predBuf.bufs[compID].height));
2024
0
    dstBuf.copyFrom(srcBuf);
2025
2026
0
    width = refx + predBuf.bufs[compID].width - (m_IBCBufferWidth >> shiftSampleHor);
2027
0
    srcArea = CompArea(compID, Position(0, refy), Size(width, predBuf.bufs[compID].height));
2028
0
    srcBuf = cu.cs->m_virtualIBCbuf[lineIdx].getBuf( srcArea );   //m_IBCBuffer.getBuf(srcArea);
2029
0
    dstBuf = PelBuf(predBuf.bufs[compID].bufAt(Position((m_IBCBufferWidth >> shiftSampleHor) - refx, 0)), predBuf.bufs[compID].stride, Size(width, predBuf.bufs[compID].height));
2030
0
    dstBuf.copyFrom(srcBuf);
2031
0
  }
2032
0
}
2033
2034
#if JVET_O1170_CHECK_BV_AT_DECODER
2035
void InterPrediction::resetIBCBuffer(const ChromaFormat chromaFormatIDC, const int ctuSize)
2036
{
2037
  const UnitArea area = UnitArea(chromaFormatIDC, Area(0, 0, m_IBCBufferWidth, ctuSize));
2038
  m_IBCBuffer.getBuf(area).fill(-1);
2039
}
2040
2041
void InterPrediction::resetVPDUforIBC(const ChromaFormat chromaFormatIDC, const int ctuSize, const int vSize, const int xPos, const int yPos)
2042
{
2043
  const UnitArea area = UnitArea(chromaFormatIDC, Area(xPos & (m_IBCBufferWidth - 1), yPos & (ctuSize - 1), vSize, vSize));
2044
  m_IBCBuffer.getBuf(area).fill(-1);
2045
}
2046
2047
bool InterPrediction::isLumaBvValid(const int ctuSize, const int xCb, const int yCb, const int width, const int height, const int xBv, const int yBv)
2048
{
2049
  if(((yCb + yBv) & (ctuSize - 1)) + height > ctuSize)
2050
  {
2051
    return false;
2052
  }
2053
  int refTLx = xCb + xBv;
2054
  int refTLy = (yCb + yBv) & (ctuSize - 1);
2055
  PelBuf buf = m_IBCBuffer.Y();
2056
  for(int x = 0; x < width; x += 4)
2057
  {
2058
    for(int y = 0; y < height; y += 4)
2059
    {
2060
      if(buf.at((x + refTLx) & (m_IBCBufferWidth - 1), y + refTLy) == -1) return false;
2061
      if(buf.at((x + 3 + refTLx) & (m_IBCBufferWidth - 1), y + refTLy) == -1) return false;
2062
      if(buf.at((x + refTLx) & (m_IBCBufferWidth - 1), y + 3 + refTLy) == -1) return false;
2063
      if(buf.at((x + 3 + refTLx) & (m_IBCBufferWidth - 1), y + 3 + refTLy) == -1) return false;
2064
    }
2065
  }
2066
  return true;
2067
}
2068
#endif
2069
2070
void InterPrediction::xPredInterBlkRPR( const std::pair<int, int>& scalingRatio, const PPS& pps, const ComponentID& compID, const ChromaFormat chFmt, const Picture* refPic, const Mv& mv, const Position blkPos, const int dstWidth, const int dstHeight, Pel* dst, const ptrdiff_t dstStride, const bool bi, const bool wrapRef, const ClpRng& clpRng, const int filterIndex, const bool useAltHpelIf )
2071
0
{
2072
0
  const bool rndRes = !bi;
2073
2074
0
  const int csx = getComponentScaleX( compID, chFmt );
2075
0
  const int csy = getComponentScaleY( compID, chFmt );
2076
2077
0
  const int shiftHor  = MV_FRACTIONAL_BITS_INTERNAL + csx;
2078
0
  const int shiftVer  = MV_FRACTIONAL_BITS_INTERNAL + csy;
2079
2080
0
  const int width     = dstWidth;
2081
0
  const int height    = dstHeight;
2082
2083
0
  const int refPicWidth  = refPic->lwidth();
2084
0
  const int refPicHeight = refPic->lheight();
2085
2086
0
  const PPS* refPPS = refPic->slices[ 0 ]->getPPS();
2087
0
  const auto refBuf = refPic->getRecoBuf( compID, wrapRef );
2088
2089
0
  static constexpr int rprThreshold1 = ( 1 << SCALE_RATIO_BITS ) * 5 / 4;
2090
0
  static constexpr int rprThreshold2 = ( 1 << SCALE_RATIO_BITS ) * 7 / 4;
2091
2092
0
  int xFilter = filterIndex;
2093
0
  if     ( scalingRatio.first > rprThreshold2 ) xFilter = 4;
2094
0
  else if( scalingRatio.first > rprThreshold1 ) xFilter = 3;
2095
2096
0
  int yFilter = filterIndex;
2097
0
  if     ( scalingRatio.second > rprThreshold2 ) yFilter = 4;
2098
0
  else if( scalingRatio.second > rprThreshold1 ) yFilter = 3;
2099
2100
0
  if( isLuma( compID ) && filterIndex == 2 )
2101
0
  {
2102
0
    if( scalingRatio.first  > rprThreshold1 ) xFilter += 2;
2103
0
    if( scalingRatio.second > rprThreshold1 ) yFilter += 2;
2104
0
  }
2105
2106
0
  const int posShift = SCALE_RATIO_BITS - 4;
2107
0
  const int stepX    = ( scalingRatio.first  + 8 ) >> 4;
2108
0
  const int stepY    = ( scalingRatio.second + 8 ) >> 4;
2109
0
  const int offX     = 1 << ( posShift - shiftHor - 1 );
2110
0
  const int offY     = 1 << ( posShift - shiftVer - 1 );
2111
2112
0
  const int64_t posX = ( ( blkPos.x << csx ) - ( pps.getScalingWindow().getWindowLeftOffset() * SPS::getWinUnitX( chFmt ) ) ) >> csx;
2113
0
  const int64_t posY = ( ( blkPos.y << csy ) - ( pps.getScalingWindow().getWindowTopOffset()  * SPS::getWinUnitY( chFmt ) ) ) >> csy;
2114
2115
0
  const int     addX = isLuma( compID ) ? 0 : int( 1 - refPic->cs->sps->getHorCollocatedChromaFlag() ) * 8 * ( scalingRatio.first  - SCALE_1X.first  );
2116
0
  const int     addY = isLuma( compID ) ? 0 : int( 1 - refPic->cs->sps->getVerCollocatedChromaFlag() ) * 8 * ( scalingRatio.second - SCALE_1X.second );
2117
2118
0
  int64_t x0Int = ( ( posX << ( 4 + csx ) ) + mv.getHor() ) * (int64_t) scalingRatio.first + addX;
2119
0
          x0Int = SIGN( x0Int ) * ( ( std::abs( x0Int ) + ( (int64_t) 1 << ( 7 + csx ) ) ) >> ( 8 + csx ) )
2120
0
                + ( ( refPPS->getScalingWindow().getWindowLeftOffset() * SPS::getWinUnitX( chFmt ) ) << ( ( posShift - csx ) ) );
2121
2122
0
  int64_t y0Int = ( ( posY << ( 4 + csy ) ) + mv.getVer() ) * (int64_t) scalingRatio.second + addY;
2123
0
          y0Int = SIGN( y0Int ) * ( ( std::abs( y0Int ) + ( (int64_t) 1 << ( 7 + csy ) ) ) >> ( 8 + csy ) )
2124
0
                  + ( ( refPPS->getScalingWindow().getWindowTopOffset() * SPS::getWinUnitY( chFmt ) ) << ( ( posShift - csy ) ) );
2125
2126
0
  const int extSize = isLuma( compID ) ? 1 : 2;
2127
2128
0
  const int vFilterSize = isLuma( compID ) ? NTAPS_LUMA : NTAPS_CHROMA;
2129
2130
0
  int yInt0 = ( (int32_t) y0Int + offY ) >> posShift;
2131
0
      yInt0 = Clip3( -( NTAPS_LUMA / 2 ), ( refPicHeight >> csy ) + ( NTAPS_LUMA / 2 ), yInt0 );
2132
2133
0
  int xInt0 = ( (int32_t) x0Int + offX ) >> posShift;
2134
0
      xInt0 = Clip3( -( NTAPS_LUMA / 2 ), ( refPicWidth >> csx ) + ( NTAPS_LUMA / 2 ), xInt0 );
2135
2136
0
  int refHeight = ( ( ( (int32_t) y0Int + ( height - 1 ) * stepY ) + offY ) >> posShift )
2137
0
                - ( ( ( (int32_t) y0Int +              0 * stepY ) + offY ) >> posShift ) + 1;
2138
0
  refHeight = std::max<int>( 1, refHeight );
2139
2140
0
  CHECK( MAX_CU_SIZE * MAX_SCALING_RATIO + 16 < refHeight + vFilterSize - 1 + extSize,
2141
0
                     "Buffer size is not enough, scaling more than MAX_SCALING_RATIO" );
2142
2143
0
  Pel buffer[ ( MAX_CU_SIZE + 16 ) * ( MAX_CU_SIZE * MAX_SCALING_RATIO + 16 ) ];
2144
0
  int tmpStride = width;
2145
2146
0
  const int filtHeight = refHeight + vFilterSize - 1 + extSize;
2147
  // only need special case for bottom margin, because all other directions are clamped to -4/+4, which should always fit within the margin
2148
0
  const int maxFiltHeight = std::min( filtHeight, ( (int) ( refPicHeight + refPic->margin ) >> csy ) - yInt0 );
2149
2150
0
  int col;
2151
0
  for( col = 0; col < width; col++ )
2152
0
  {
2153
0
    int posX = (int32_t) x0Int + col * stepX;
2154
0
    int xInt = ( posX + offX ) >> posShift;
2155
0
        xInt = Clip3( -( NTAPS_LUMA / 2 ), ( refPicWidth >> csx ) + ( NTAPS_LUMA / 2 ), xInt );
2156
0
    int xFrac = ( ( posX + offX ) >> ( posShift - shiftHor ) ) & ( ( 1 << shiftHor ) - 1 );
2157
2158
0
    CHECK( xInt0 > xInt, "Wrong horizontal starting point" );
2159
2160
0
    const Pel* refPtr    = refBuf.bufAt( xInt, yInt0 );
2161
0
    ptrdiff_t  refStride = refBuf.stride;
2162
2163
0
    m_if.filterHor( compID,
2164
0
                    GET_OFFSETY( refPtr, refStride, -( vFilterSize / 2 - 1 ) ), refStride,
2165
0
                    GET_OFFSETX( buffer, tmpStride, col ),                      tmpStride,
2166
0
                    1, maxFiltHeight,
2167
0
                    xFrac,
2168
0
                    false,
2169
0
                    chFmt,
2170
0
                    clpRng,
2171
0
                    xFilter,
2172
0
                    useAltHpelIf && scalingRatio.first == SCALE_1X.first );
2173
0
  }
2174
2175
  // fill buffer area where source block reaches out of the source image + bottom margin using pixel values from last filtered column
2176
0
  if( filtHeight > maxFiltHeight )
2177
0
  {
2178
0
    CHECK( maxFiltHeight <= 0, "nothing filtered yet. Reference block completely outside?" );
2179
0
    for( int row = maxFiltHeight; row < filtHeight; ++row )
2180
0
    {
2181
0
      memcpy( &buffer[ tmpStride * row ], &buffer[ tmpStride * ( maxFiltHeight - 1 ) ], width * sizeof( buffer[ 0 ] ) );
2182
0
    }
2183
0
  }
2184
2185
0
  for( int row = 0; row < height; row++ )
2186
0
  {
2187
0
    int posY = (int32_t) y0Int + row * stepY;
2188
0
    int yInt = ( posY + offY ) >> posShift;
2189
0
        yInt = Clip3( -( NTAPS_LUMA / 2 ), ( refPicHeight >> csy ) + ( NTAPS_LUMA / 2 ), yInt );
2190
0
    int yFrac = ( ( posY + offY ) >> ( posShift - shiftVer ) ) & ( ( 1 << shiftVer ) - 1 );
2191
2192
0
    CHECK( yInt0 > yInt, "Wrong vertical starting point" );
2193
2194
0
    m_if.filterVer( compID,
2195
0
                    GET_OFFSETY( buffer, tmpStride, ( yInt - yInt0 ) + ( ( vFilterSize >> 1 ) - 1 ) ), tmpStride,
2196
0
                    GET_OFFSETY( dst, dstStride, row ),                                                dstStride,
2197
0
                    width, 1,
2198
0
                    yFrac,
2199
0
                    false,
2200
0
                    rndRes,
2201
0
                    chFmt,
2202
0
                    clpRng,
2203
0
                    yFilter,
2204
0
                    useAltHpelIf && scalingRatio.second == SCALE_1X.second );
2205
0
  }
2206
0
}
2207
2208
}   // namespace vvdec