Coverage Report

Created: 2026-06-15 06:25

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/vvenc/source/Lib/CommonLib/IntraPrediction.cpp
Line
Count
Source
1
/* -----------------------------------------------------------------------------
2
The copyright in this software is being made available under the Clear BSD
3
License, included below. No patent rights, trademark rights and/or 
4
other Intellectual Property Rights other than the copyrights concerning 
5
the Software are granted under this license.
6
7
The Clear BSD License
8
9
Copyright (c) 2019-2026, Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. & The VVenC Authors.
10
All rights reserved.
11
12
Redistribution and use in source and binary forms, with or without modification,
13
are permitted (subject to the limitations in the disclaimer below) provided that
14
the following conditions are met:
15
16
     * Redistributions of source code must retain the above copyright notice,
17
     this list of conditions and the following disclaimer.
18
19
     * Redistributions in binary form must reproduce the above copyright
20
     notice, this list of conditions and the following disclaimer in the
21
     documentation and/or other materials provided with the distribution.
22
23
     * Neither the name of the copyright holder nor the names of its
24
     contributors may be used to endorse or promote products derived from this
25
     software without specific prior written permission.
26
27
NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
28
THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
29
CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
31
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
32
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
33
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
34
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
35
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
36
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38
POSSIBILITY OF SUCH DAMAGE.
39
40
41
------------------------------------------------------------------------------------------- */
42
43
44
/** \file     Prediction.cpp
45
    \brief    prediction class
46
*/
47
48
#include "IntraPrediction.h"
49
#include "Unit.h"
50
#include "UnitTools.h"
51
#include "Rom.h"
52
#include "InterpolationFilter.h"
53
#include "dtrace_next.h"
54
55
#include <memory.h>
56
57
//! \ingroup CommonLib
58
//! \{
59
60
namespace vvenc {
61
62
// ====================================================================================================================
63
// Tables
64
// ====================================================================================================================
65
66
const uint8_t IntraPrediction::m_aucIntraFilter[MAX_INTRA_FILTER_DEPTHS] =
67
{
68
  24, //   1xn
69
  24, //   2xn
70
  24, //   4xn
71
  14, //   8xn
72
  2,  //  16xn
73
  0,  //  32xn
74
  0,  //  64xn
75
  0   // 128xn
76
};
77
78
//NOTE: Bit-Limit - 24-bit source
79
void xPredIntraPlanar_Core( PelBuf& pDst, const CPelBuf& pSrc )
80
109k
{
81
109k
  const uint32_t width  = pDst.width;
82
109k
  const uint32_t height = pDst.height;
83
109k
  const uint32_t log2W  = Log2(width);
84
109k
  const uint32_t log2H  = Log2(height);
85
86
109k
  int leftColumn[MAX_TB_SIZEY + 1], topRow[MAX_TB_SIZEY + 1], bottomRow[MAX_TB_SIZEY], rightColumn[MAX_TB_SIZEY];
87
109k
  const uint32_t offset = 1 << (log2W + log2H);
88
89
  // Get left and above reference column and row
90
2.21M
  for( int k = 0; k < width + 1; k++ )
91
2.10M
  {
92
2.10M
    topRow[k] = pSrc.at( k + 1, 0 );
93
2.10M
  }
94
95
2.19M
  for( int k = 0; k < height + 1; k++ )
96
2.08M
  {
97
2.08M
    leftColumn[k] = pSrc.at( k + 1, 1 );
98
2.08M
  }
99
100
  // Prepare intermediate variables used in interpolation
101
109k
  int bottomLeft = leftColumn[height];
102
109k
  int topRight = topRow[width];
103
104
  // with some optimizations gcc-8 gives spurious "-Wmaybe-uninitialized" warnings here (says leftColumn or topRow would be uninitialized here)
105
109k
  GCC_WARNING_DISABLE_maybe_uninitialized
106
2.10M
  for( int k = 0; k < width; k++ )
107
1.99M
  {
108
1.99M
    bottomRow[k] = bottomLeft - topRow[k];
109
1.99M
    topRow[k]    = topRow[k] << log2H;
110
1.99M
  }
111
112
2.08M
  for( int k = 0; k < height; k++ )
113
1.97M
  {
114
1.97M
    rightColumn[k] = topRight - leftColumn[k];
115
1.97M
    leftColumn[k]  = leftColumn[k] << log2W;
116
1.97M
  }
117
109k
  GCC_WARNING_RESET
118
119
109k
  const uint32_t finalShift = 1 + log2W + log2H;
120
109k
  const uint32_t stride     = pDst.stride;
121
109k
  Pel*       pred       = pDst.buf;
122
2.08M
  for( int y = 0; y < height; y++, pred += stride )
123
1.97M
  {
124
1.97M
    int horPred = leftColumn[y];
125
126
46.5M
    for( int x = 0; x < width; x++ )
127
44.5M
    {
128
44.5M
      horPred += rightColumn[y];
129
44.5M
      topRow[x] += bottomRow[x];
130
131
44.5M
      int vertPred = topRow[x];
132
44.5M
      pred[x]      = ( ( horPred << log2H ) + ( vertPred << log2W ) + offset ) >> finalShift;
133
44.5M
    }
134
1.97M
  }
135
109k
}
136
137
void  IntraPredSampleFilter_Core(PelBuf& dstBuf, const CPelBuf& pSrc)
138
324k
{
139
324k
  const int iWidth  = dstBuf.width;
140
324k
  const int iHeight = dstBuf.height;
141
142
324k
  const int scale = ((Log2(iWidth*iHeight) - 2) >> 2);
143
324k
  CHECK(scale < 0 || scale > 31, "PDPC: scale < 0 || scale > 31");
144
145
5.78M
  for (int y = 0; y < iHeight; y++)
146
5.45M
  {
147
5.45M
    const int wT   = 32 >> std::min(31, ((y << 1) >> scale));
148
5.45M
    const Pel left = pSrc.at(y + 1, 1);
149
120M
    for (int x = 0; x < iWidth; x++)
150
115M
    {
151
115M
      const int wL    = 32 >> std::min(31, ((x << 1) >> scale));
152
115M
      const Pel top   = pSrc.at(x + 1, 0);
153
115M
      const Pel val   = dstBuf.at(x, y);
154
115M
      dstBuf.at(x, y) = val + ((wL * (left - val) + wT * (top - val) + 32) >> 6);
155
115M
    }
156
5.45M
  }
157
324k
}
158
159
void IntraHorVerPDPC_Core(Pel* pDsty,const int dstStride,Pel* refSide,const int width,const int height,int scale,const Pel* refMain, const ClpRng& clpRng)
160
353k
{
161
353k
  const Pel topLeft = refMain[0];
162
163
5.56M
  for( int y = 0; y < height; y++ )
164
5.21M
  {
165
5.21M
    memcpy(pDsty,&refMain[1],width*sizeof(Pel));
166
5.21M
    const Pel left    = refSide[1 + y];
167
40.3M
    for (int x = 0; x < std::min(3 << scale, width); x++)
168
35.1M
    {
169
35.1M
      const int wL  = 32 >> (2 * x >> scale);
170
35.1M
      const Pel val = pDsty[x];
171
35.1M
      pDsty[x]      = ClipPel(val + ((wL * (left - topLeft) + 32) >> 6), clpRng);
172
35.1M
    }
173
5.21M
    pDsty += dstStride;
174
5.21M
  }
175
353k
}
176
void IntraAnglePDPC_Core(Pel* pDsty,const int dstStride,Pel* refSide,const int width,const int height,int scale,int invAngle)
177
383k
{
178
8.51M
  for (int y = 0; y<height; y++, pDsty += dstStride)
179
8.12M
  {
180
8.12M
    int       invAngleSum = 256;
181
86.6M
    for (int x = 0; x < std::min(3 << scale, width); x++)
182
78.5M
    {
183
78.5M
      invAngleSum += invAngle;
184
78.5M
      int wL   = 32 >> (2 * x >> scale);
185
78.5M
      Pel left = refSide[y + (invAngleSum >> 9) + 1];
186
78.5M
      pDsty[x] = pDsty[x] + ((wL * (left - pDsty[x]) + 32) >> 6);
187
78.5M
    }
188
8.12M
  }
189
383k
}
190
191
void IntraPredAngleLuma_Core(Pel* pDstBuf,const ptrdiff_t dstStride,Pel* refMain,int width,int height,int deltaPos,int intraPredAngle,const TFilterCoeff *ff_unused,const bool useCubicFilter,const ClpRng& clpRng)
192
498k
{
193
14.5M
  for (int y = 0; y<height; y++ )
194
14.0M
  {
195
14.0M
    const int deltaInt   = deltaPos >> 5;
196
14.0M
    const int deltaFract = deltaPos & ( 32 - 1 );
197
198
14.0M
    const TFilterCoeff      intraSmoothingFilter[4] = {TFilterCoeff(16 - (deltaFract >> 1)), TFilterCoeff(32 - (deltaFract >> 1)), TFilterCoeff(16 + (deltaFract >> 1)), TFilterCoeff(deltaFract >> 1)};
199
14.0M
    const TFilterCoeff *f = useCubicFilter ? InterpolationFilter::getChromaFilterTable(deltaFract) : intraSmoothingFilter;
200
201
14.0M
    Pel p[4];
202
203
14.0M
    int refMainIndex = deltaInt + 1;
204
205
 //   const TFilterCoeff *f = &ff[deltaFract << 2];
206
207
566M
    for( int x = 0; x < width; x++, refMainIndex++ )
208
552M
    {
209
552M
      p[0] = refMain[refMainIndex - 1];
210
552M
      p[1] = refMain[refMainIndex    ];
211
552M
      p[2] = refMain[refMainIndex + 1];
212
552M
      p[3] = refMain[refMainIndex + 2];
213
214
552M
      pDstBuf[y*dstStride + x] = static_cast<Pel>((static_cast<int>(f[0] * p[0]) + static_cast<int>(f[1] * p[1]) + static_cast<int>(f[2] * p[2]) + static_cast<int>(f[3] * p[3]) + 32) >> 6);
215
216
552M
      if( useCubicFilter ) // only cubic filter has negative coefficients and requires clipping
217
75.7M
      {
218
75.7M
        pDstBuf[y*dstStride + x] = ClipPel( pDstBuf[y*dstStride + x], clpRng );
219
75.7M
      }
220
552M
    }
221
14.0M
    deltaPos += intraPredAngle;
222
14.0M
  }
223
498k
}
224
225
void IntraPredAngleChroma_Core(Pel* pDstBuf,const ptrdiff_t dstStride,int16_t* pBorder,int width,int height,int deltaPos,int intraPredAngle)
226
51.8k
{
227
434k
  for (int y = 0; y<height; y++)
228
382k
  {
229
382k
    const int deltaInt   = deltaPos >> 5;
230
382k
    const int deltaFract = deltaPos & (32 - 1);
231
232
    // Do linear filtering
233
382k
    const Pel* pRM = pBorder + deltaInt + 1;
234
382k
    int lastRefMainPel = *pRM++;
235
236
10.0M
    for( int x = 0; x < width; pRM++, x++ )
237
9.68M
    {
238
9.68M
      int thisRefMainPel = *pRM;
239
9.68M
      pDstBuf[x + 0] = ( Pel ) ( ( ( 32 - deltaFract )*lastRefMainPel + deltaFract*thisRefMainPel + 16 ) >> 5 );
240
9.68M
      lastRefMainPel = thisRefMainPel;
241
9.68M
    }
242
382k
    deltaPos += intraPredAngle;
243
382k
    pDstBuf += dstStride;
244
382k
  }
245
51.8k
}
246
247
// ====================================================================================================================
248
// Constructor / destructor / initialize
249
// ====================================================================================================================
250
251
IntraPrediction::IntraPrediction( bool enableOpt )
252
17.3k
:  m_pMdlmTemp( nullptr )
253
17.3k
,  m_currChromaFormat( NUM_CHROMA_FORMAT )
254
17.3k
{
255
17.3k
  IntraPredAngleLuma    = IntraPredAngleLuma_Core;
256
17.3k
  IntraPredAngleChroma  = IntraPredAngleChroma_Core;
257
17.3k
  IntraAnglePDPC        = IntraAnglePDPC_Core;
258
17.3k
  IntraHorVerPDPC       = IntraHorVerPDPC_Core;
259
17.3k
  IntraPredSampleFilter = IntraPredSampleFilter_Core;
260
17.3k
  xPredIntraPlanar      = xPredIntraPlanar_Core;
261
262
17.3k
#if ENABLE_SIMD_OPT_INTRAPRED
263
17.3k
  if( enableOpt )
264
17.3k
  {
265
#if defined( TARGET_SIMD_X86 )
266
    initIntraPredictionX86();
267
#endif
268
#if defined( TARGET_SIMD_ARM )
269
    initIntraPredictionARM();
270
#endif
271
17.3k
  }
272
17.3k
#endif // ENABLE_SIMD_OPT_INTRAPRED
273
17.3k
}
274
275
IntraPrediction::~IntraPrediction()
276
17.3k
{
277
17.3k
  destroy();
278
17.3k
}
279
280
void IntraPrediction::destroy()
281
17.3k
{
282
17.3k
  delete[] m_pMdlmTemp;
283
17.3k
  m_pMdlmTemp = nullptr;
284
17.3k
}
285
286
void IntraPrediction::init(ChromaFormat chromaFormatIDC, const unsigned bitDepthY)
287
17.3k
{
288
17.3k
  m_currChromaFormat = chromaFormatIDC;
289
290
17.3k
  if (m_pMdlmTemp == nullptr)
291
17.3k
  {
292
17.3k
    m_pMdlmTemp = new Pel[(2 * MAX_TB_SIZEY + 1)*(2 * MAX_TB_SIZEY + 1)];//MDLM will use top-above and left-below samples.
293
17.3k
  }
294
17.3k
}
295
296
// ====================================================================================================================
297
// Public member functions
298
// ====================================================================================================================
299
300
// Function for calculating DC value of the reference samples used in Intra prediction
301
//NOTE: Bit-Limit - 25-bit source
302
Pel IntraPrediction::xGetPredValDc( const CPelBuf& pSrc, const Size& dstSize )
303
253k
{
304
253k
  CHECK( dstSize.width == 0 || dstSize.height == 0, "Empty area provided" );
305
306
253k
  int idx, sum = 0;
307
253k
  Pel dcVal;
308
253k
  const int width  = dstSize.width;
309
253k
  const int height = dstSize.height;
310
253k
  const auto denom     = (width == height) ? (width << 1) : std::max(width,height);
311
253k
  const auto divShift  = Log2(denom);
312
253k
  const auto divOffset = (denom >> 1);
313
253k
  const int off = m_ipaParam.multiRefIndex + 1;
314
315
316
253k
  if ( width >= height )
317
179k
  {
318
3.88M
    for( idx = 0; idx < width; idx++ )
319
3.70M
    {
320
3.70M
      sum += pSrc.at( off + idx, 0);
321
3.70M
    }
322
179k
  }
323
253k
  if ( width <= height )
324
170k
  {
325
3.82M
    for( idx = 0; idx < height; idx++ )
326
3.65M
    {
327
3.65M
      sum += pSrc.at( off + idx, 1);
328
3.65M
    }
329
170k
  }
330
331
253k
  dcVal = (sum + divOffset) >> divShift;
332
253k
  return dcVal;
333
253k
}
334
335
int IntraPrediction::getWideAngle( int width, int height, int predMode )
336
1.73M
{
337
1.73M
  if ( predMode > DC_IDX && predMode <= VDIA_IDX )
338
1.20M
  {
339
1.20M
    int modeShift[] = { 0, 6, 10, 12, 14, 15 };
340
1.20M
    int deltaSize = abs(Log2(width) - Log2(height));
341
1.20M
    if (width > height && predMode < 2 + modeShift[deltaSize])
342
34.2k
    {
343
34.2k
      predMode += (VDIA_IDX - 1);
344
34.2k
    }
345
1.16M
    else if (height > width && predMode > VDIA_IDX - modeShift[deltaSize])
346
60.9k
    {
347
60.9k
      predMode -= (VDIA_IDX - 1);
348
60.9k
    }
349
1.20M
  }
350
1.73M
  return predMode;
351
1.73M
}
352
353
void IntraPrediction::predIntraAng( const ComponentID compId, PelBuf& piPred, const CodingUnit& cu)
354
1.54M
{
355
1.54M
  const ComponentID    compID       = compId;
356
1.54M
  const ChannelType    channelType  = toChannelType( compID );
357
1.54M
  const uint32_t       uiDirMode = cu.bdpcmM[channelType] ? BDPCM_IDX : CU::getFinalIntraMode(cu, channelType);
358
359
1.54M
  CHECK( Log2(piPred.width) > 7, "Size not allowed" );
360
361
//  const int multiRefIdx = m_ipaParam.multiRefIndex;
362
1.54M
  const int srcStride  = m_refBufferStride[compID];
363
1.54M
  const int srcHStride = 2;
364
365
1.54M
  const CPelBuf& srcBuf = CPelBuf(getPredictorPtr(compID), srcStride, srcHStride);
366
1.54M
  const ClpRng& clpRng(cu.cs->slice->clpRngs[compID]);
367
368
1.54M
  switch (uiDirMode)
369
1.54M
  {
370
109k
    case(PLANAR_IDX): xPredIntraPlanar(piPred, srcBuf); break;
371
253k
    case(DC_IDX):     xPredIntraDc    ( piPred, srcBuf ); break;
372
76.3k
    case(BDPCM_IDX):  xPredIntraBDPCM ( piPred, srcBuf, cu.bdpcmM[channelType], clpRng); break;
373
1.10M
    default:          xPredIntraAng   ( piPred, srcBuf, channelType, clpRng); break;
374
1.54M
  }
375
376
1.54M
  if (m_ipaParam.applyPDPC)
377
1.13M
  {
378
1.13M
    if (uiDirMode == PLANAR_IDX || uiDirMode == DC_IDX)
379
324k
    {
380
324k
      IntraPredSampleFilter(piPred, srcBuf);
381
324k
    }
382
1.13M
  }
383
1.54M
}
384
385
void IntraPrediction::predIntraChromaLM(const ComponentID compID, PelBuf& piPred, const CodingUnit& cu, const CompArea& chromaArea, int intraDir)
386
184k
{
387
184k
  CHECK( piPred.width > MAX_TB_SIZEY || piPred.height > MAX_TB_SIZEY, "not enough memory");
388
184k
  const int iLumaStride = 2 * MAX_TB_SIZEY + 1;
389
184k
  PelBuf Temp = PelBuf(m_pMdlmTemp + iLumaStride + 1, iLumaStride, Size(chromaArea));
390
391
184k
  int a, b, iShift;
392
184k
  xGetLMParameters(cu, compID, chromaArea, a, b, iShift); // th shift result is unsigned
393
394
  ////// final prediction
395
184k
  piPred.copyFrom(Temp);
396
184k
  piPred.linearTransform(a, iShift, b, true, cu.cs->slice->clpRngs[compID]);
397
184k
}
398
399
/** Function for deriving planar intra prediction. This function derives the prediction samples for planar mode (intra coding).
400
 */
401
402
void IntraPrediction::xPredIntraDc( PelBuf& pDst, const CPelBuf& pSrc )
403
253k
{
404
253k
  const Pel dcval = xGetPredValDc( pSrc, pDst );
405
253k
  pDst.fill( dcval );
406
253k
}
407
408
// Function for initialization of intra prediction parameters
409
void IntraPrediction::initPredIntraParams(const CodingUnit& cu, const CompArea area, const SPS& sps)
410
1.73M
{
411
1.73M
  const ComponentID compId = area.compID;
412
1.73M
  const ChannelType chType = toChannelType(compId);
413
414
1.73M
  const bool        useISP = NOT_INTRA_SUBPARTITIONS != cu.ispMode && isLuma( chType );
415
416
1.73M
  const Size   cuSize    = Size( cu.blocks[compId].width, cu.blocks[compId].height );
417
1.73M
  const Size   puSize    = Size( area.width, area.height );
418
1.73M
  const Size&  blockSize = useISP ? cuSize : puSize;
419
1.73M
  const int      dirMode = CU::getFinalIntraMode(cu, chType);
420
1.73M
  const int     predMode = getWideAngle( blockSize.width, blockSize.height, dirMode );
421
422
1.73M
  m_ipaParam.isModeVer            = predMode >= DIA_IDX;
423
1.73M
  m_ipaParam.multiRefIndex        = isLuma (chType) ? cu.multiRefIdx : 0 ;
424
1.73M
  m_ipaParam.refFilterFlag        = false;
425
1.73M
  m_ipaParam.interpolationFlag    = false;
426
1.73M
  m_ipaParam.applyPDPC            = (puSize.width >= MIN_TB_SIZEY && puSize.height >= MIN_TB_SIZEY) && m_ipaParam.multiRefIndex == 0;
427
428
1.73M
  const int    intraPredAngleMode = (m_ipaParam.isModeVer) ? predMode - VER_IDX : -(predMode - HOR_IDX);
429
430
431
1.73M
  int absAng = 0;
432
1.73M
  if (dirMode > DC_IDX && dirMode < NUM_LUMA_MODE) // intraPredAngle for directional modes
433
1.20M
  {
434
1.20M
    static const int angTable[32]    = { 0,    1,    2,    3,    4,    6,     8,   10,   12,   14,   16,   18,   20,   23,   26,   29,   32,   35,   39,  45,  51,  57,  64,  73,  86, 102, 128, 171, 256, 341, 512, 1024 };
435
1.20M
    static const int invAngTable[32] = {
436
1.20M
      0,   16384, 8192, 5461, 4096, 2731, 2048, 1638, 1365, 1170, 1024, 910, 819, 712, 630, 565,
437
1.20M
      512, 468,   420,  364,  321,  287,  256,  224,  191,  161,  128,  96,  64,  48,  32,  16
438
1.20M
    };   // (512 * 32) / Angle
439
440
1.20M
    const int     absAngMode         = abs(intraPredAngleMode);
441
1.20M
    const int     signAng            = intraPredAngleMode < 0 ? -1 : 1;
442
1.20M
                  absAng             = angTable  [absAngMode];
443
444
1.20M
    m_ipaParam.absInvAngle           = invAngTable[absAngMode];
445
1.20M
    m_ipaParam.intraPredAngle        = signAng * absAng;
446
1.20M
    if (intraPredAngleMode < 0)
447
214k
    {
448
214k
      m_ipaParam.applyPDPC = false;
449
214k
    }
450
986k
    else if (intraPredAngleMode > 0)
451
486k
    {
452
486k
      const int sideSize = m_ipaParam.isModeVer ? puSize.height : puSize.width;
453
486k
      const int maxScale = 2;
454
455
486k
      m_ipaParam.angularScale = std::min(maxScale, floorLog2(sideSize) - (floorLog2(3 * m_ipaParam.absInvAngle - 2) - 8));
456
486k
      m_ipaParam.applyPDPC &= m_ipaParam.angularScale >= 0;
457
486k
    }
458
1.20M
  }
459
460
  // high level conditions and DC intra prediction
461
1.73M
  if( !isLuma( chType )
462
715k
    || useISP
463
697k
    || CU::isMIP( cu, chType ) //th remove this
464
655k
    || m_ipaParam.multiRefIndex
465
517k
    || DC_IDX == dirMode
466
1.73M
    )
467
1.24M
  {
468
1.24M
  }
469
492k
  else if (cu.bdpcmM[chType])
470
7.02k
  {
471
7.02k
    m_ipaParam.refFilterFlag = false;
472
7.02k
  }
473
485k
  else if (dirMode == PLANAR_IDX) // Planar intra prediction
474
26.3k
  {
475
26.3k
    m_ipaParam.refFilterFlag = puSize.width * puSize.height > 32 ? true : false;
476
26.3k
  }
477
459k
  else if (!useISP)// HOR, VER and angular modes (MDIS)
478
459k
  {
479
459k
    bool filterFlag = false;
480
459k
    {
481
459k
      const int diff = std::min<int>( abs( predMode - HOR_IDX ), abs( predMode - VER_IDX ) );
482
459k
      const int log2Size = (Log2(puSize.width * puSize.height) >> 1);
483
459k
      CHECK( log2Size >= MAX_INTRA_FILTER_DEPTHS, "Size not supported" );
484
459k
      filterFlag = (diff > m_aucIntraFilter[log2Size]);
485
459k
    }
486
487
    // Selelection of either ([1 2 1] / 4 ) refrence filter OR Gaussian 4-tap interpolation filter
488
459k
    if (filterFlag)
489
367k
    {
490
367k
      const bool isRefFilter       =  isIntegerSlope(absAng);
491
367k
      CHECK( puSize.width * puSize.height <= 32, "DCT-IF interpolation filter is always used for 4x4, 4x8, and 8x4 luma CB" );
492
367k
      m_ipaParam.refFilterFlag     =  isRefFilter;
493
367k
      m_ipaParam.interpolationFlag = !isRefFilter;
494
367k
    }
495
459k
  }
496
1.73M
}
497
498
}   // namespace vvenc
499
500
#ifdef TARGET_SIMD_X86
501
#include "x86/CommonDefX86.h"
502
#endif
503
504
namespace vvenc {
505
506
/** Function for deriving the simplified angular intra predictions.
507
*
508
* This function derives the prediction samples for the angular mode based on the prediction direction indicated by
509
* the prediction mode index. The prediction direction is given by the displacement of the bottom row of the block and
510
* the reference row above the block in the case of vertical prediction or displacement of the rightmost column
511
* of the block and reference column left from the block in the case of the horizontal prediction. The displacement
512
* is signalled at 1/32 pixel accuracy. When projection of the predicted pixel falls inbetween reference samples,
513
* the predicted value for the pixel is linearly interpolated from the reference samples. All reference samples are taken
514
* from the extended main reference.
515
*/
516
//NOTE: Bit-Limit - 25-bit source
517
518
void IntraPrediction::xPredIntraAng( PelBuf& pDst, const CPelBuf& pSrc, const ChannelType channelType, const ClpRng& clpRng)
519
1.10M
{
520
1.10M
  int width =int(pDst.width);
521
1.10M
  int height=int(pDst.height);
522
523
1.10M
  const bool bIsModeVer     = m_ipaParam.isModeVer;
524
1.10M
  const int  multiRefIdx    = m_ipaParam.multiRefIndex;
525
1.10M
  const int  intraPredAngle = m_ipaParam.intraPredAngle;
526
1.10M
  const int  absInvAngle    = m_ipaParam.absInvAngle;
527
528
1.10M
  Pel* refMain;
529
1.10M
  Pel* refSide;
530
531
1.10M
  Pel  refAbove[2 * MAX_CU_SIZE + 3 + 33 * MAX_REF_LINE_IDX];
532
1.10M
  Pel  refLeft [2 * MAX_CU_SIZE + 3 + 33 * MAX_REF_LINE_IDX];
533
534
  // Initialize the Main and Left reference array.
535
1.10M
  if (intraPredAngle < 0)
536
213k
  {
537
213k
    memcpy(&refAbove[height],pSrc.buf,(width + 2 + multiRefIdx)*sizeof(Pel));
538
7.01M
    for (int y = 0; y <= height + 1 + multiRefIdx; y++)
539
6.79M
    {
540
6.79M
      refLeft[y + width] = pSrc.at(y, 1);
541
6.79M
    }
542
213k
    refMain = bIsModeVer ? refAbove + height : refLeft + width;
543
213k
    refSide = bIsModeVer ? refLeft + width : refAbove + height;
544
545
    // Extend the Main reference to the left.
546
213k
    int sizeSide = bIsModeVer ? height : width;
547
6.53M
    for (int k = -sizeSide; k <= -1; k++)
548
6.32M
    {
549
6.32M
      refMain[k] = refSide[std::min((-k * absInvAngle + 256) >> 9, sizeSide)];
550
6.32M
    }
551
213k
  }
552
890k
  else
553
890k
  {
554
890k
    memcpy(&refAbove[0], pSrc.buf, ((m_topRefLength)+multiRefIdx + 1) * sizeof(Pel));
555
38.4M
    for (int y = 0; y <= m_leftRefLength + multiRefIdx; y++)
556
37.5M
    {
557
37.5M
      refLeft[y] = pSrc.at(y, 1);
558
37.5M
    }
559
560
890k
    refMain = bIsModeVer ? refAbove : refLeft;
561
890k
    refSide = bIsModeVer ? refLeft : refAbove;
562
563
    // Extend main reference to right using replication
564
890k
    const int log2Ratio = Log2(width) - Log2(height);
565
890k
    const int s         = std::max<int>(0, bIsModeVer ? log2Ratio : -log2Ratio);
566
890k
    const int maxIndex  = (multiRefIdx << s) + 2;
567
890k
    const int refLength = bIsModeVer ? m_topRefLength : m_leftRefLength;
568
890k
    const Pel val       = refMain[refLength + multiRefIdx];
569
2.86M
    for (int z = 1; z <= maxIndex; z++)
570
1.97M
    {
571
1.97M
      refMain[refLength + multiRefIdx + z] = val;
572
1.97M
    }
573
890k
  }
574
575
  // swap width/height if we are doing a horizontal mode:
576
1.10M
  if (!bIsModeVer)
577
537k
  {
578
537k
    std::swap(width, height);
579
537k
  }
580
1.10M
  Pel tempArray[MAX_CU_SIZE*MAX_CU_SIZE];
581
1.10M
  const int dstStride = bIsModeVer ? pDst.stride : MAX_CU_SIZE;
582
1.10M
  Pel* pDstBuf = bIsModeVer ? pDst.buf : tempArray;
583
584
  // compensate for line offset in reference line buffers
585
1.10M
  refMain += multiRefIdx;
586
1.10M
  refSide += multiRefIdx;
587
588
1.10M
  Pel* pDsty = pDstBuf;
589
590
1.10M
  if( intraPredAngle == 0 )  // pure vertical or pure horizontal
591
422k
  {
592
422k
    if (m_ipaParam.applyPDPC)
593
353k
    {
594
353k
      const int scale   = (Log2(width * height) - 2) >> 2;
595
353k
      IntraHorVerPDPC(pDsty,dstStride,refSide,width,height,scale,refMain,clpRng);
596
353k
    }
597
68.8k
    else
598
68.8k
    {
599
1.69M
      for( int y = 0; y < height; y++ )
600
1.62M
      {
601
1.62M
        memcpy(pDsty,&refMain[1],width*sizeof(Pel));
602
1.62M
        pDsty += dstStride;
603
1.62M
      }
604
68.8k
    }
605
422k
  }
606
682k
  else
607
682k
  {
608
682k
    if( !isIntegerSlope( abs( intraPredAngle ) ) )
609
550k
    {
610
550k
      int deltaPos = intraPredAngle * ( 1 + multiRefIdx );
611
550k
      if( isLuma( channelType ) )
612
498k
      {
613
498k
        if( width <= 2 )
614
0
        {
615
0
          for( int y = 0, deltaPos = intraPredAngle * ( 1 + multiRefIdx );
616
0
               y < height;
617
0
               y++, deltaPos += intraPredAngle, pDsty += dstStride )
618
0
          {
619
0
            const int deltaInt   = deltaPos >> 5;
620
0
            const int deltaFract = deltaPos & 31;
621
622
0
            if( !isIntegerSlope( abs( intraPredAngle ) ) )
623
0
            {
624
0
              const bool useCubicFilter = !m_ipaParam.interpolationFlag;
625
626
0
              const TFilterCoeff intraSmoothingFilter[4] = { TFilterCoeff( 16 - ( deltaFract >> 1 ) ),
627
0
                                                             TFilterCoeff( 32 - ( deltaFract >> 1 ) ),
628
0
                                                             TFilterCoeff( 16 + ( deltaFract >> 1 ) ),
629
0
                                                             TFilterCoeff(      ( deltaFract >> 1 ) ) };
630
0
              const TFilterCoeff* const f =
631
0
                ( useCubicFilter ) ? InterpolationFilter::getChromaFilterTable( deltaFract ) : intraSmoothingFilter;
632
633
0
              for( int x = 0; x < width; x++ )
634
0
              {
635
0
                Pel p[4];
636
637
0
                p[0] = refMain[deltaInt + x + 0];
638
0
                p[1] = refMain[deltaInt + x + 1];
639
0
                p[2] = refMain[deltaInt + x + 2];
640
0
                p[3] = refMain[deltaInt + x + 3];
641
642
0
                Pel val = ( f[0] * p[0] + f[1] * p[1] + f[2] * p[2] + f[3] * p[3] + 32 ) >> 6;
643
644
0
                pDsty[x] = ClipPel( val, clpRng );   // always clip even though not always needed
645
0
              }
646
0
            }
647
0
          }
648
0
        }
649
498k
        else
650
498k
        {
651
498k
          IntraPredAngleLuma(pDstBuf, dstStride, refMain, width, height, deltaPos, intraPredAngle, nullptr, !m_ipaParam.interpolationFlag, clpRng);
652
498k
        }
653
498k
      }
654
51.8k
      else
655
51.8k
      {
656
51.8k
        IntraPredAngleChroma(pDstBuf,dstStride,refMain,width,height,deltaPos,intraPredAngle);
657
51.8k
      }
658
550k
    }
659
131k
    else
660
131k
    {
661
1.92M
      for (int y = 0, deltaPos = intraPredAngle * (1 + multiRefIdx); y<height; y++, deltaPos += intraPredAngle, pDsty += dstStride)
662
1.79M
      {
663
1.79M
        const int deltaInt   = deltaPos >> 5;
664
        // Just copy the integer samples
665
1.79M
        memcpy(pDsty,refMain  + deltaInt + 1,width*sizeof(Pel));
666
1.79M
      }
667
131k
    }
668
669
682k
    if (m_ipaParam.applyPDPC)
670
383k
    {
671
383k
      pDsty = pDstBuf;
672
383k
      IntraAnglePDPC(pDsty,dstStride,refSide,width,height,m_ipaParam.angularScale,absInvAngle);
673
383k
    }
674
682k
  } // else
675
676
  // Flip the block if this is the horizontal mode
677
1.10M
  if( !bIsModeVer )
678
537k
  {
679
537k
    pDst.transposedFrom( CPelBuf( pDstBuf, dstStride, width, height) );
680
537k
  }
681
1.10M
}
682
683
void IntraPrediction::xPredIntraBDPCM(PelBuf& pDst, const CPelBuf& pSrc, const uint32_t dirMode, const ClpRng& clpRng)
684
76.3k
{
685
76.3k
  const int wdt = pDst.width;
686
76.3k
  const int hgt = pDst.height;
687
688
76.3k
  const int strideP = pDst.stride;
689
76.3k
  const int strideS = pSrc.stride;
690
691
76.3k
  CHECK(!(dirMode == 1 || dirMode == 2), "Incorrect BDPCM mode parameter.");
692
693
76.3k
  Pel* pred = &pDst.buf[0];
694
76.3k
  if (dirMode == 1)
695
3.51k
  {
696
3.51k
    Pel  val;
697
48.7k
    for (int y = 0; y < hgt; y++)
698
45.2k
    {
699
45.2k
      val = pSrc.buf[(y + 1) + strideS];
700
655k
      for (int x = 0; x < wdt; x++)
701
610k
      {
702
610k
        pred[x] = val;
703
610k
      }
704
45.2k
      pred += strideP;
705
45.2k
    }
706
3.51k
  }
707
72.8k
  else
708
72.8k
  {
709
740k
    for (int y = 0; y < hgt; y++)
710
667k
    {
711
7.43M
      for (int x = 0; x < wdt; x++)
712
6.77M
      {
713
6.77M
        pred[x] = pSrc.buf[x + 1];
714
6.77M
      }
715
667k
      pred += strideP;
716
667k
    }
717
72.8k
  }
718
76.3k
}
719
720
inline bool isAboveLeftAvailable  ( const CodingUnit &cu, const ChannelType& chType, const Position& posLT );
721
inline int  isAboveAvailable      ( const CodingUnit &cu, const ChannelType& chType, const Position& posLT, const uint32_t numUnits, const uint32_t unitWidth, bool *validFlags );
722
inline int  isLeftAvailable       ( const CodingUnit &cu, const ChannelType& chType, const Position& posLT, const uint32_t numUnits, const uint32_t unitWidth, bool *validFlags );
723
inline int  isAboveRightAvailable ( const CodingUnit &cu, const ChannelType& chType, const Position& posRT, const uint32_t numUnits, const uint32_t unitHeight, bool *validFlags );
724
inline int  isBelowLeftAvailable  ( const CodingUnit &cu, const ChannelType& chType, const Position& posLB, const uint32_t numUnits, const uint32_t unitHeight, bool *validFlags );
725
726
void IntraPrediction::initIntraPatternChType(const CodingUnit &cu, const CompArea& area, const bool forceRefFilterFlag)
727
721k
{
728
721k
  const CodingStructure& cs   = *cu.cs;
729
730
721k
  if (!forceRefFilterFlag)
731
671k
  {
732
671k
    initPredIntraParams(cu, area, *cs.sps);
733
671k
  }
734
735
721k
  Pel *refBufUnfiltered = m_refBuffer[area.compID][PRED_BUF_UNFILTERED];
736
721k
  Pel *refBufFiltered   = m_refBuffer[area.compID][PRED_BUF_FILTERED];
737
738
721k
  setReferenceArrayLengths(area);
739
740
  // ----- Step 1: unfiltered reference samples -----
741
721k
  xFillReferenceSamples( cs.picture->getRecoBuf( area ), refBufUnfiltered, area, cu );
742
  // ----- Step 2: filtered reference samples -----
743
721k
  if( m_ipaParam.refFilterFlag || forceRefFilterFlag )
744
53.8k
  {
745
53.8k
    xFilterReferenceSamples( refBufUnfiltered, refBufFiltered, area, *cs.sps, cu.multiRefIdx );
746
53.8k
  }
747
721k
}
748
749
void IntraPrediction::reset()
750
74.0k
{
751
74.0k
  m_lastCh = MAX_NUM_CH;
752
74.0k
  m_lastArea = Area(0,0,0,0);
753
74.0k
}
754
755
void IntraPrediction::xFillReferenceSamples( const CPelBuf& recoBuf, Pel* refBufUnfiltered, const CompArea& area, const CodingUnit &cu )
756
735k
{
757
735k
  const ChannelType      chType = toChannelType( area.compID );
758
735k
  const CodingStructure &cs     = *cu.cs;
759
735k
  const SPS             &sps    = *cs.sps;
760
735k
  const PreCalcValues   &pcv    = *cs.pcv;
761
762
735k
  const int multiRefIdx         = (area.compID == COMP_Y) ? cu.multiRefIdx : 0;
763
764
735k
  const int  tuWidth            = area.width;
765
735k
  const int  tuHeight           = area.height;
766
735k
  const int  predSize           = m_topRefLength;
767
735k
  const int  predHSize          = m_leftRefLength;
768
735k
  const int predStride = predSize + 1 + multiRefIdx;
769
735k
  m_refBufferStride[area.compID] = predStride;
770
771
735k
  const int  unitWidth          = tuWidth  <= 2 && cu.ispMode && isLuma(area.compID) ? tuWidth  : pcv.minCUSize >> getComponentScaleX(area.compID, sps.chromaFormatIdc);
772
735k
  const int  unitHeight         = tuHeight <= 2 && cu.ispMode && isLuma(area.compID) ? tuHeight : pcv.minCUSize >> getComponentScaleY(area.compID, sps.chromaFormatIdc);
773
774
735k
  const int  totalAboveUnits    = (predSize + (unitWidth - 1)) / unitWidth;
775
735k
  const int  totalLeftUnits     = (predHSize + (unitHeight - 1)) / unitHeight;
776
735k
  const int  totalUnits         = totalAboveUnits + totalLeftUnits + 1; //+1 for top-left
777
778
735k
  if( m_lastArea != area || m_lastCh != chType )
779
74.0k
  {
780
74.0k
    m_lastCh = chType;
781
74.0k
    m_lastArea = area;
782
74.0k
    const int  numAboveUnits      = std::max<int>( tuWidth / unitWidth, 1 );
783
74.0k
    const int  numLeftUnits       = std::max<int>( tuHeight / unitHeight, 1 );
784
74.0k
    const int  numAboveRightUnits = totalAboveUnits - numAboveUnits;
785
74.0k
    const int  numLeftBelowUnits  = totalLeftUnits - numLeftUnits;
786
787
74.0k
    CHECK( numAboveUnits <= 0 || numLeftUnits <= 0 || numAboveRightUnits <= 0 || numLeftBelowUnits <= 0, "Size not supported" );
788
789
    // ----- Step 1: analyze neighborhood -----
790
74.0k
    const Position posLT          = area;
791
74.0k
    const Position posRT          = area.topRight();
792
74.0k
    const Position posLB          = area.bottomLeft();
793
794
74.0k
    m_numIntraNeighbor = 0;
795
796
74.0k
    memset( m_neighborFlags, 0, totalUnits );
797
798
74.0k
    m_neighborFlags[totalLeftUnits] = isAboveLeftAvailable( cu, chType, posLT );
799
74.0k
    m_numIntraNeighbor += m_neighborFlags[totalLeftUnits] ? 1 : 0;
800
74.0k
    m_numIntraNeighbor += isAboveAvailable     ( cu, chType, posLT, numAboveUnits,      unitWidth,  (m_neighborFlags + totalLeftUnits + 1) );
801
74.0k
    m_numIntraNeighbor += isAboveRightAvailable( cu, chType, posRT, numAboveRightUnits, unitWidth,  (m_neighborFlags + totalLeftUnits + 1 + numAboveUnits) );
802
74.0k
    m_numIntraNeighbor += isLeftAvailable      ( cu, chType, posLT, numLeftUnits,       unitHeight, (m_neighborFlags + totalLeftUnits - 1) );
803
74.0k
    m_numIntraNeighbor += isBelowLeftAvailable ( cu, chType, posLB, numLeftBelowUnits,  unitHeight, (m_neighborFlags + totalLeftUnits - 1 - numLeftUnits) );
804
74.0k
  }
805
  // ----- Step 2: fill reference samples (depending on neighborhood) -----
806
807
735k
  const Pel*  srcBuf    = recoBuf.buf;
808
735k
  const int   srcStride = recoBuf.stride;
809
735k
        Pel*  ptrDst    = refBufUnfiltered;
810
735k
  const Pel*  ptrSrc;
811
735k
  const Pel   valueDC   = 1 << (sps.bitDepths[ chType ] - 1);
812
813
814
735k
  if( m_numIntraNeighbor == 0 )
815
238k
  {
816
    // Fill border with DC value
817
7.08M
    for (int j = 0; j <= predSize + multiRefIdx; j++) { ptrDst[j] = valueDC; }
818
6.81M
    for (int i = 0; i <= predHSize + multiRefIdx; i++) { ptrDst[i+predStride] = valueDC; }
819
238k
  }
820
497k
  else if( m_numIntraNeighbor == totalUnits )
821
1.00k
  {
822
    // Fill top-left border and top and top right with rec. samples
823
1.00k
    ptrSrc = srcBuf - (1 + multiRefIdx) * srcStride - (1 + multiRefIdx);
824
80.8k
    for (int j = 0; j <= predSize + multiRefIdx; j++) { ptrDst[j] = ptrSrc[j]; }
825
80.8k
    for (int i = 0; i <= predHSize + multiRefIdx; i++)
826
79.8k
    {
827
79.8k
      ptrDst[i + predStride] = ptrSrc[i * srcStride];
828
79.8k
    }
829
1.00k
  }
830
496k
  else // reference samples are partially available
831
496k
  {
832
    // Fill top-left sample(s) if available
833
496k
    ptrSrc = srcBuf - (1 + multiRefIdx) * srcStride - (1 + multiRefIdx);
834
496k
    ptrDst = refBufUnfiltered;
835
496k
    if (m_neighborFlags[totalLeftUnits])
836
166k
    {
837
166k
      ptrDst[0] = ptrSrc[0];
838
166k
      ptrDst[predStride] = ptrSrc[0];
839
199k
      for (int i = 1; i <= multiRefIdx; i++)
840
33.1k
      {
841
33.1k
        ptrDst[i] = ptrSrc[i];
842
33.1k
        ptrDst[i + predStride] = ptrSrc[i * srcStride];
843
33.1k
      }
844
166k
    }
845
846
    // Fill left & below-left samples if available (downwards)
847
496k
    ptrSrc += (1 + multiRefIdx) * srcStride;
848
496k
    ptrDst += (1 + multiRefIdx) + predStride;
849
7.14M
    for (int unitIdx = totalLeftUnits - 1; unitIdx > 0; unitIdx--)
850
6.65M
    {
851
6.65M
      if (m_neighborFlags[unitIdx])
852
2.88M
      {
853
9.88M
        for (int i = 0; i < unitHeight; i++)
854
6.99M
        {
855
6.99M
          ptrDst[i] = ptrSrc[i*srcStride];
856
6.99M
        }
857
2.88M
      }
858
6.65M
      ptrSrc += unitHeight * srcStride;
859
6.65M
      ptrDst += unitHeight;
860
6.65M
    }
861
    // Fill last below-left sample(s)
862
496k
    if (m_neighborFlags[0])
863
14.5k
    {
864
14.5k
      int lastSample = (predHSize % unitHeight == 0) ? unitHeight : predHSize % unitHeight;
865
58.5k
      for (int i = 0; i < lastSample; i++)
866
44.0k
      {
867
44.0k
        ptrDst[i] = ptrSrc[i*srcStride];
868
44.0k
      }
869
14.5k
    }
870
871
    // Fill above & above-right samples if available (left-to-right)
872
496k
    ptrSrc = srcBuf - srcStride * (1 + multiRefIdx);
873
496k
    ptrDst = refBufUnfiltered + 1 + multiRefIdx;
874
7.16M
    for (int unitIdx = totalLeftUnits + 1; unitIdx < totalUnits - 1; unitIdx++)
875
6.67M
    {
876
6.67M
      if (m_neighborFlags[unitIdx])
877
3.63M
      {
878
3.63M
        memcpy(ptrDst,ptrSrc,unitWidth*sizeof(Pel));
879
3.63M
      }
880
6.67M
      ptrSrc += unitWidth;
881
6.67M
      ptrDst += unitWidth;
882
6.67M
    }
883
    // Fill last above-right sample(s)
884
496k
    if (m_neighborFlags[totalUnits - 1])
885
55.6k
    {
886
55.6k
      int lastSample = (predSize % unitWidth == 0) ? unitWidth : predSize % unitWidth;
887
55.6k
      memcpy(ptrDst,ptrSrc,lastSample*sizeof(Pel));
888
55.6k
    }
889
890
    // pad from first available down to the last below-left
891
496k
    ptrDst = refBufUnfiltered;
892
496k
    int lastAvailUnit = 0;
893
496k
    if (!m_neighborFlags[0])
894
481k
    {
895
481k
      int firstAvailUnit = 1;
896
4.42M
      while (firstAvailUnit < totalUnits && !m_neighborFlags[firstAvailUnit])
897
3.94M
      {
898
3.94M
        firstAvailUnit++;
899
3.94M
      }
900
901
      // first available sample
902
481k
      int firstAvailRow = -1;
903
481k
      int firstAvailCol = 0;
904
481k
      if (firstAvailUnit < totalLeftUnits)
905
301k
      {
906
301k
        firstAvailRow = (totalLeftUnits - firstAvailUnit) * unitHeight + multiRefIdx;
907
301k
      }
908
180k
      else if (firstAvailUnit == totalLeftUnits)
909
0
      {
910
0
        firstAvailRow = multiRefIdx;
911
0
      }
912
180k
      else
913
180k
      {
914
180k
        firstAvailCol = (firstAvailUnit - totalLeftUnits - 1) * unitWidth + 1 + multiRefIdx;
915
180k
      }
916
481k
      const Pel firstAvailSample = ptrDst[firstAvailRow < 0 ? firstAvailCol : firstAvailRow + predStride];
917
918
      // last sample below-left (n.a.)
919
481k
      int lastRow = predHSize + multiRefIdx;
920
921
      // fill left column
922
10.8M
      for (int i = lastRow; i > firstAvailRow; i--)
923
10.3M
      {
924
10.3M
        ptrDst[i + predStride] = firstAvailSample;
925
10.3M
      }
926
      // fill top row
927
481k
      if (firstAvailCol > 0)
928
180k
      {
929
370k
        for (int j = 0; j < firstAvailCol; j++)
930
189k
        {
931
189k
          ptrDst[j] = firstAvailSample;
932
189k
        }
933
180k
      }
934
481k
      lastAvailUnit = firstAvailUnit;
935
481k
    }
936
937
    // pad all other reference samples.
938
496k
    int currUnit = lastAvailUnit + 1;
939
10.3M
    while (currUnit < totalUnits)
940
9.88M
    {
941
9.88M
      if (!m_neighborFlags[currUnit]) // samples not available
942
3.62M
      {
943
        // last available sample
944
3.62M
        int lastAvailRow = -1;
945
3.62M
        int lastAvailCol = 0;
946
3.62M
        if (lastAvailUnit < totalLeftUnits)
947
148k
        {
948
148k
          lastAvailRow = (totalLeftUnits - lastAvailUnit - 1) * unitHeight + multiRefIdx + 1;
949
148k
        }
950
3.47M
        else if (lastAvailUnit == totalLeftUnits)
951
148k
        {
952
148k
          lastAvailCol = multiRefIdx;
953
148k
        }
954
3.32M
        else
955
3.32M
        {
956
3.32M
          lastAvailCol = (lastAvailUnit - totalLeftUnits) * unitWidth + multiRefIdx;
957
3.32M
        }
958
3.62M
        const Pel lastAvailSample = ptrDst[lastAvailRow < 0 ? lastAvailCol : lastAvailRow + predStride];
959
960
        // fill current unit with last available sample
961
3.62M
        if (currUnit < totalLeftUnits)
962
0
        {
963
0
          for (int i = lastAvailRow - 1; i >= lastAvailRow - unitHeight; i--)
964
0
          {
965
0
            ptrDst[i + predStride] = lastAvailSample;
966
0
          }
967
0
        }
968
3.62M
        else if (currUnit == totalLeftUnits)
969
148k
        {
970
297k
          for (int i = 0; i < multiRefIdx + 1; i++)
971
148k
          {
972
148k
            ptrDst[i + predStride] = lastAvailSample;
973
148k
          }
974
297k
          for (int j = 0; j < multiRefIdx + 1; j++)
975
148k
          {
976
148k
            ptrDst[j] = lastAvailSample;
977
148k
          }
978
148k
        }
979
3.47M
        else
980
3.47M
        {
981
3.47M
          int numSamplesInUnit = (currUnit == totalUnits - 1) ? ((predSize % unitWidth == 0) ? unitWidth : predSize % unitWidth) : unitWidth;
982
11.4M
          for (int j = lastAvailCol + 1; j <= lastAvailCol + numSamplesInUnit; j++)
983
7.94M
          {
984
7.94M
            ptrDst[j] = lastAvailSample;
985
7.94M
          }
986
3.47M
        }
987
3.62M
      }
988
9.88M
      lastAvailUnit = currUnit;
989
9.88M
      currUnit++;
990
9.88M
    }
991
496k
  }
992
735k
}
993
994
void IntraPrediction::xFilterReferenceSamples( const Pel* refBufUnfiltered, Pel* refBufFiltered, const CompArea& area, const SPS &sps
995
  , int multiRefIdx
996
  , int stride
997
)
998
53.8k
{
999
53.8k
  if (area.compID != COMP_Y)
1000
0
  {
1001
0
    multiRefIdx = 0;
1002
0
  }
1003
53.8k
  const int predSize = m_topRefLength + multiRefIdx;
1004
53.8k
  const int predHSize = m_leftRefLength + multiRefIdx;
1005
53.8k
  const int predStride = stride == 0 ? predSize + 1 : stride;
1006
1007
1008
53.8k
  const Pel topLeft =
1009
53.8k
    (refBufUnfiltered[0] + refBufUnfiltered[1] + refBufUnfiltered[predStride] + refBufUnfiltered[predStride + 1] + 2)
1010
53.8k
    >> 2;
1011
1012
53.8k
  refBufFiltered[0] = topLeft;
1013
1014
3.03M
  for (int i = 1; i < predSize; i++)
1015
2.98M
  {
1016
2.98M
    refBufFiltered[i] = (refBufUnfiltered[i - 1] + 2 * refBufUnfiltered[i] + refBufUnfiltered[i + 1] + 2) >> 2;
1017
2.98M
  }
1018
53.8k
  refBufFiltered[predSize] = refBufUnfiltered[predSize];
1019
1020
53.8k
  refBufFiltered += predStride;
1021
53.8k
  refBufUnfiltered += predStride;
1022
1023
53.8k
  refBufFiltered[0] = topLeft;
1024
1025
3.04M
  for (int i = 1; i < predHSize; i++)
1026
2.98M
  {
1027
2.98M
    refBufFiltered[i] = (refBufUnfiltered[i - 1] + 2 * refBufUnfiltered[i] + refBufUnfiltered[i + 1] + 2) >> 2;
1028
2.98M
  }
1029
53.8k
  refBufFiltered[predHSize] = refBufUnfiltered[predHSize];
1030
53.8k
}
1031
1032
bool isAboveLeftAvailable(const CodingUnit &cu, const ChannelType& chType, const Position& posLT)
1033
74.0k
{
1034
74.0k
  const CodingStructure& cs = *cu.cs;
1035
74.0k
  const Position refPos = posLT.offset(-1, -1);
1036
1037
74.0k
  return (cs.getCURestricted(refPos, cu, chType) != NULL);
1038
74.0k
}
1039
1040
int isAboveAvailable(const CodingUnit &cu, const ChannelType& chType, const Position& posLT, const uint32_t numUnits, const uint32_t unitWidth, bool *bValidFlags)
1041
328k
{
1042
328k
  const CodingStructure& cs = *cu.cs;
1043
1044
328k
  bool *    validFlags  = bValidFlags;
1045
328k
  int       numIntra    = 0;
1046
328k
  const int maxDx       = numUnits * unitWidth;
1047
328k
  unsigned  checkPosX   = 0;
1048
328k
  bool      valid       = false;
1049
1050
1.36M
  for (int dx = 0; dx < maxDx; dx += unitWidth)
1051
1.23M
  {
1052
1.23M
    if( dx >= checkPosX )
1053
329k
    {
1054
329k
      const Position refPos = posLT.offset(dx, -1);
1055
1056
329k
      const CodingUnit* cuN = cs.getCURestricted(refPos, cu, chType);
1057
329k
      valid = (cuN != NULL);
1058
329k
      if( cuN ) checkPosX = chType == CH_C ? (cuN->Cb().x + cuN->Cb().width - posLT.x) : (cuN->Y().x + cuN->Y().width - posLT.x);
1059
196k
      else break;
1060
329k
    }
1061
1062
1.03M
    numIntra += valid ? 1 : 0;
1063
1.03M
    *validFlags = valid;
1064
1065
1.03M
    validFlags++;
1066
1.03M
  }
1067
1068
328k
  return numIntra;
1069
328k
}
1070
1071
int isLeftAvailable(const CodingUnit &cu, const ChannelType& chType, const Position& posLT, const uint32_t numUnits, const uint32_t unitHeight, bool *bValidFlags)
1072
328k
{
1073
328k
  const CodingStructure& cs = *cu.cs;
1074
1075
328k
  bool *    validFlags = bValidFlags;
1076
328k
  int       numIntra   = 0;
1077
328k
  const int maxDy      = numUnits * unitHeight;
1078
328k
  unsigned checkPosY   = 0;
1079
328k
  bool     valid       = false;
1080
1081
1.27M
  for (int dy = 0; dy < maxDy; dy += unitHeight)
1082
1.15M
  {
1083
1.15M
    if( dy >= checkPosY )
1084
331k
    {
1085
331k
      const Position refPos = posLT.offset(-1, dy);
1086
1087
331k
      const CodingUnit* cuN = cs.getCURestricted(refPos, cu, chType);
1088
331k
      valid = (cuN != NULL);
1089
331k
      if( cuN ) checkPosY = chType == CH_C ? (cuN->Cb().y + cuN->Cb().height - posLT.y) : (cuN->Y().y + cuN->Y().height - posLT.y);
1090
206k
      else break;
1091
331k
    }
1092
1093
949k
    numIntra += valid ? 1 : 0;
1094
949k
    *validFlags = valid;
1095
1096
949k
    validFlags--;
1097
949k
  }
1098
1099
328k
  return numIntra;
1100
328k
}
1101
1102
int isAboveRightAvailable(const CodingUnit &cu, const ChannelType& chType, const Position& posRT, const uint32_t numUnits, const uint32_t unitWidth, bool *bValidFlags )
1103
166k
{
1104
166k
  const CodingStructure& cs = *cu.cs;
1105
1106
166k
  bool *    validFlags = bValidFlags;
1107
166k
  int       numIntra   = 0;
1108
166k
  const int maxDx      = numUnits * unitWidth;
1109
166k
  unsigned  checkPosX   = 0;
1110
166k
  bool      valid       = false;
1111
1112
526k
  for (int dx = 0; dx < maxDx; dx += unitWidth)
1113
504k
  {
1114
504k
    if( dx >= checkPosX )
1115
195k
    {
1116
195k
      const Position refPos = posRT.offset(unitWidth + dx, -1);
1117
1118
195k
      const CodingUnit* cuN = cs.getCURestricted(refPos, cu, chType);
1119
195k
      valid = (cuN != NULL);
1120
195k
      if(cuN) checkPosX = chType == CH_C ? (cuN->Cb().x + cuN->Cb().width - (posRT.x + unitWidth)) : (cuN->Y().x + cuN->Y().width - (posRT.x + unitWidth));
1121
143k
      else break;
1122
195k
    }
1123
1124
360k
    numIntra += valid ? 1 : 0;
1125
360k
    *validFlags = valid;
1126
1127
360k
    validFlags++;
1128
360k
  }
1129
1130
166k
  return numIntra;
1131
166k
}
1132
1133
int isBelowLeftAvailable(const CodingUnit &cu, const ChannelType& chType, const Position& posLB, const uint32_t numUnits, const uint32_t unitHeight, bool *bValidFlags )
1134
157k
{
1135
157k
  const CodingStructure& cs = *cu.cs;
1136
1137
157k
  bool *    validFlags = bValidFlags;
1138
157k
  int       numIntra   = 0;
1139
157k
  const int maxDy      = numUnits * unitHeight;
1140
157k
  unsigned  checkPosY   = 0;
1141
157k
  bool      valid       = false;
1142
1143
274k
  for (int dy = 0; dy < maxDy; dy += unitHeight)
1144
269k
  {
1145
269k
    if( dy >= checkPosY )
1146
173k
    {
1147
173k
      const Position refPos = posLB.offset(-1, unitHeight + dy);
1148
1149
173k
      const CodingUnit* cuN = cs.getCURestricted(refPos, cu, chType);
1150
173k
      valid = (cuN != NULL);
1151
173k
      if( cuN ) checkPosY = chType == CH_C ? (cuN->Cb().y + cuN->Cb().height - (posLB.y + unitHeight)) : (cuN->Y().y + cuN->Y().height - (posLB.y + unitHeight));
1152
152k
      else break;
1153
173k
    }
1154
1155
116k
    numIntra += valid ? 1 : 0;
1156
116k
    *validFlags = valid;
1157
1158
116k
    validFlags--;
1159
116k
  }
1160
1161
157k
  return numIntra;
1162
157k
}
1163
1164
// LumaRecPixels
1165
void IntraPrediction::loadLMLumaRecPels(const CodingUnit& cu, const CompArea& chromaArea )
1166
70.0k
{
1167
70.0k
  int iDstStride = 2 * MAX_TB_SIZEY + 1;
1168
70.0k
  Pel* pDst0 = m_pMdlmTemp + iDstStride + 1;
1169
  //assert 420 chroma subsampling
1170
70.0k
  CompArea lumaArea = CompArea( COMP_Y, cu.chromaFormat, chromaArea.lumaPos(), recalcSize( cu.chromaFormat, CH_C, CH_L, chromaArea.size() ) );//needed for correct pos/size (4x4 Tus)
1171
1172
70.0k
  CHECK(lumaArea.width == chromaArea.width && CHROMA_444 != cu.chromaFormat, "");
1173
70.0k
  CHECK(lumaArea.height == chromaArea.height && CHROMA_444 != cu.chromaFormat && CHROMA_422 != cu.chromaFormat, "");
1174
1175
70.0k
  const SizeType uiCWidth = chromaArea.width;
1176
70.0k
  const SizeType uiCHeight = chromaArea.height;
1177
1178
70.0k
  const CPelBuf Src = cu.cs->picture->getRecoBuf( lumaArea );
1179
70.0k
  Pel const* pRecSrc0   = Src.bufAt( 0, 0 );
1180
70.0k
  int iRecStride        = Src.stride;
1181
70.0k
  int logSubWidthC  = getChannelTypeScaleX(CH_C, cu.chromaFormat);
1182
70.0k
  int logSubHeightC = getChannelTypeScaleY(CH_C, cu.chromaFormat);
1183
1184
70.0k
  int iRecStride2       = iRecStride << logSubHeightC;
1185
1186
70.0k
  const CompArea& area = isChroma( cu.chType ) ? chromaArea : lumaArea;
1187
1188
70.0k
  const uint32_t uiTuWidth  = area.width;
1189
70.0k
  const uint32_t uiTuHeight = area.height;
1190
1191
70.0k
  const int  unitWidthLog2  = MIN_CU_LOG2 - getComponentScaleX( area.compID, area.chromaFormat );
1192
70.0k
  const int  unitHeightLog2 = MIN_CU_LOG2 - getComponentScaleY( area.compID, area.chromaFormat );
1193
70.0k
  const int  unitWidth  = 1<<unitWidthLog2;
1194
70.0k
  const int  unitHeight = 1<<unitHeightLog2;
1195
1196
70.0k
  const int  iTUWidthInUnits  = uiTuWidth >> unitWidthLog2;
1197
70.0k
  const int  iTUHeightInUnits = uiTuHeight >> unitHeightLog2;
1198
70.0k
  const int  iAboveUnits      = iTUWidthInUnits;
1199
70.0k
  const int  iLeftUnits       = iTUHeightInUnits;
1200
1201
70.0k
  const int  chromaUnitWidthLog2  = MIN_CU_LOG2 - logSubWidthC;
1202
70.0k
  const int  chromaUnitHeightLog2 = MIN_CU_LOG2 - logSubHeightC;
1203
70.0k
  const int  chromaUnitWidth = 1<<chromaUnitWidthLog2;
1204
70.0k
  const int  chromaUnitHeight = 1<<chromaUnitHeightLog2;
1205
70.0k
  const int  topTemplateSampNum = 2 * uiCWidth; // for MDLM, the number of template samples is 2W or 2H.
1206
70.0k
  const int  leftTemplateSampNum = 2 * uiCHeight;
1207
70.0k
  const int  totalAboveUnits = (topTemplateSampNum + (chromaUnitWidth - 1)) >> chromaUnitWidthLog2;
1208
70.0k
  const int  totalLeftUnits = (leftTemplateSampNum + (chromaUnitHeight - 1)) >> chromaUnitHeightLog2;
1209
70.0k
  const int  totalUnits = totalLeftUnits + totalAboveUnits + 1;
1210
70.0k
  const int  aboveRightUnits = totalAboveUnits - iAboveUnits;
1211
70.0k
  const int  leftBelowUnits = totalLeftUnits - iLeftUnits;
1212
1213
70.0k
  int avaiAboveRightUnits = 0;
1214
70.0k
  int avaiLeftBelowUnits = 0;
1215
70.0k
  bool  bNeighborFlags[4 * MAX_NUM_PART_IDXS_IN_CTU_WIDTH + 1];
1216
70.0k
  memset(bNeighborFlags, 0, totalUnits);
1217
70.0k
  bool aboveIsAvailable, leftIsAvailable;
1218
70.0k
  const ChannelType areaCh = toChannelType( area.compID );
1219
1220
70.0k
  int availlableUnit = isLeftAvailable(cu, areaCh, area.pos(), iLeftUnits, unitHeight, (bNeighborFlags + iLeftUnits + leftBelowUnits - 1));
1221
1222
70.0k
  leftIsAvailable = availlableUnit == iTUHeightInUnits;
1223
1224
70.0k
  availlableUnit = isAboveAvailable(cu, areaCh, area.pos(), iAboveUnits, unitWidth, (bNeighborFlags + iLeftUnits + leftBelowUnits + 1));
1225
1226
70.0k
  aboveIsAvailable = availlableUnit == iTUWidthInUnits;
1227
1228
70.0k
  if (leftIsAvailable)   // if left is not available, then the below left is not available
1229
23.0k
  {
1230
23.0k
    avaiLeftBelowUnits = isBelowLeftAvailable(cu, areaCh, area.bottomLeftComp(area.compID), leftBelowUnits, unitHeight, (bNeighborFlags + leftBelowUnits - 1));
1231
23.0k
  }
1232
1233
70.0k
  if (aboveIsAvailable)   // if above is not available, then  the above right is not available.
1234
25.1k
  {
1235
25.1k
    avaiAboveRightUnits = isAboveRightAvailable(cu, areaCh, area.topRightComp(area.compID), aboveRightUnits, unitWidth, (bNeighborFlags + iLeftUnits + leftBelowUnits + iAboveUnits + 1));
1236
25.1k
  }
1237
1238
70.0k
  Pel*       pDst  = nullptr;
1239
70.0k
  Pel const* piSrc = nullptr;
1240
1241
70.0k
  bool isFirstRowOfCtu = (lumaArea.y & ((cu.cs->sps)->CTUSize - 1)) == 0;
1242
1243
70.0k
  if (aboveIsAvailable)
1244
25.1k
  {
1245
25.1k
    pDst  = pDst0    - iDstStride;
1246
25.1k
    int addedAboveRight = 0;
1247
25.1k
    if ((cu.intraDir[1] == MDLM_L_IDX) || (cu.intraDir[1] == MDLM_T_IDX))
1248
21.9k
    {
1249
21.9k
      addedAboveRight = avaiAboveRightUnits*chromaUnitWidth;
1250
21.9k
    }
1251
548k
    for (int i = 0; i < uiCWidth + addedAboveRight; i++)
1252
523k
    {
1253
523k
      const bool leftPadding = i == 0 && !leftIsAvailable;
1254
523k
      if (cu.chromaFormat == CHROMA_444)
1255
0
      {
1256
0
        piSrc = pRecSrc0 - iRecStride;
1257
0
        pDst[i] = piSrc[i];
1258
0
      }
1259
523k
      else if (isFirstRowOfCtu)
1260
115k
      {
1261
115k
        piSrc   = pRecSrc0 - iRecStride;
1262
115k
        pDst[i] = (piSrc[2 * i] * 2 + piSrc[2 * i - (leftPadding ? 0 : 1)] + piSrc[2 * i + 1] + 2) >> 2;
1263
115k
      }
1264
407k
      else if (cu.chromaFormat == CHROMA_422)
1265
0
      {
1266
0
        piSrc = pRecSrc0 - iRecStride2;
1267
1268
0
        int s = 2;
1269
0
        s += piSrc[2 * i] * 2;
1270
0
        s += piSrc[2 * i - (leftPadding ? 0 : 1)];
1271
0
        s += piSrc[2 * i + 1];
1272
0
        pDst[i] = s >> 2;
1273
0
      }
1274
407k
      else if (cu.cs->sps->verCollocatedChroma )
1275
0
      {
1276
0
        piSrc = pRecSrc0 - iRecStride2;
1277
1278
0
        int s = 4;
1279
0
        s += piSrc[2 * i - iRecStride];
1280
0
        s += piSrc[2 * i] * 4;
1281
0
        s += piSrc[2 * i - (leftPadding ? 0 : 1)];
1282
0
        s += piSrc[2 * i + 1];
1283
0
        s += piSrc[2 * i + iRecStride];
1284
0
        pDst[i] = s >> 3;
1285
0
      }
1286
407k
      else
1287
407k
      {
1288
407k
        piSrc = pRecSrc0 - iRecStride2;
1289
407k
        int s = 4;
1290
407k
        s += piSrc[2 * i] * 2;
1291
407k
        s += piSrc[2 * i + 1];
1292
407k
        s += piSrc[2 * i - (leftPadding ? 0 : 1)];
1293
407k
        s += piSrc[2 * i + iRecStride] * 2;
1294
407k
        s += piSrc[2 * i + 1 + iRecStride];
1295
407k
        s += piSrc[2 * i + iRecStride - (leftPadding ? 0 : 1)];
1296
407k
        pDst[i] = s >> 3;
1297
407k
      }
1298
523k
    }
1299
25.1k
  }
1300
1301
70.0k
  if (leftIsAvailable)
1302
23.0k
  {
1303
23.0k
    pDst  = pDst0    - 1;
1304
23.0k
    piSrc = pRecSrc0 - 1 - logSubWidthC;
1305
1306
23.0k
    int addedLeftBelow = 0;
1307
23.0k
    if ((cu.intraDir[1] == MDLM_L_IDX) || (cu.intraDir[1] == MDLM_T_IDX))
1308
20.4k
    {
1309
20.4k
      addedLeftBelow = avaiLeftBelowUnits*chromaUnitHeight;
1310
20.4k
    }
1311
1312
454k
    for (int j = 0; j < uiCHeight + addedLeftBelow; j++)
1313
431k
    {
1314
431k
      if (cu.chromaFormat == CHROMA_444)
1315
0
      {
1316
0
        pDst[0] = piSrc[0];
1317
0
      }
1318
431k
      else if (cu.chromaFormat == CHROMA_422)
1319
0
      {
1320
0
        int s = 2;
1321
0
        s += piSrc[0] * 2;
1322
0
        s += piSrc[-1];
1323
0
        s += piSrc[1];
1324
0
        pDst[0] = s >> 2;
1325
0
      }
1326
431k
      else if (cu.cs->sps->verCollocatedChroma)
1327
0
      {
1328
0
        const bool abovePadding = j == 0 && !aboveIsAvailable;
1329
1330
0
        int s = 4;
1331
0
        s += piSrc[-(abovePadding ? 0 : iRecStride)];
1332
0
        s += piSrc[0] * 4;
1333
0
        s += piSrc[-1];
1334
0
        s += piSrc[1];
1335
0
        s += piSrc[iRecStride];
1336
0
        pDst[0] = s >> 3;
1337
0
      }
1338
431k
      else
1339
431k
      {
1340
431k
        int s = 4;
1341
431k
        s += piSrc[0] * 2;
1342
431k
        s += piSrc[1];
1343
431k
        s += piSrc[-1];
1344
431k
        s += piSrc[iRecStride] * 2;
1345
431k
        s += piSrc[iRecStride + 1];
1346
431k
        s += piSrc[iRecStride - 1];
1347
431k
        pDst[0] = s >> 3;
1348
431k
      }
1349
1350
431k
      piSrc += iRecStride2;
1351
431k
      pDst  += iDstStride;
1352
431k
    }
1353
23.0k
  }
1354
1355
  // inner part from reconstructed picture buffer
1356
951k
  for( int j = 0; j < uiCHeight; j++ )
1357
881k
  {
1358
14.3M
    for( int i = 0; i < uiCWidth; i++ )
1359
13.4M
    {
1360
13.4M
      if (cu.chromaFormat == CHROMA_444)
1361
0
      {
1362
0
        pDst0[i] = pRecSrc0[i];
1363
0
      }
1364
13.4M
      else if (cu.chromaFormat == CHROMA_422)
1365
0
      {
1366
0
        const bool leftPadding  = i == 0 && !leftIsAvailable;
1367
1368
0
        int s = 2;
1369
0
        s += pRecSrc0[2 * i] * 2;
1370
0
        s += pRecSrc0[2 * i - (leftPadding ? 0 : 1)];
1371
0
        s += pRecSrc0[2 * i + 1];
1372
0
        pDst0[i] = s >> 2;
1373
0
      }
1374
13.4M
      else if (cu.cs->sps->verCollocatedChroma)
1375
0
      {
1376
0
        const bool leftPadding  = i == 0 && !leftIsAvailable;
1377
0
        const bool abovePadding = j == 0 && !aboveIsAvailable;
1378
1379
0
        int s = 4;
1380
0
        s += pRecSrc0[2 * i - (abovePadding ? 0 : iRecStride)];
1381
0
        s += pRecSrc0[2 * i] * 4;
1382
0
        s += pRecSrc0[2 * i - (leftPadding ? 0 : 1)];
1383
0
        s += pRecSrc0[2 * i + 1];
1384
0
        s += pRecSrc0[2 * i + iRecStride];
1385
0
        pDst0[i] = s >> 3;
1386
0
      }
1387
13.4M
      else
1388
13.4M
      {
1389
13.4M
        CHECK(cu.chromaFormat != CHROMA_420, "Chroma format must be 4:2:0 for vertical filtering");
1390
13.4M
        const bool leftPadding = i == 0 && !leftIsAvailable;
1391
1392
13.4M
        int s = 4;
1393
13.4M
        s += pRecSrc0[2 * i] * 2;
1394
13.4M
        s += pRecSrc0[2 * i + 1];
1395
13.4M
        s += pRecSrc0[2 * i - (leftPadding ? 0 : 1)];
1396
13.4M
        s += pRecSrc0[2 * i + iRecStride] * 2;
1397
13.4M
        s += pRecSrc0[2 * i + 1 + iRecStride];
1398
13.4M
        s += pRecSrc0[2 * i + iRecStride - (leftPadding ? 0 : 1)];
1399
13.4M
        pDst0[i] = s >> 3;
1400
13.4M
      }
1401
13.4M
    }
1402
1403
881k
    pDst0    += iDstStride;
1404
881k
    pRecSrc0 += iRecStride2;
1405
881k
  }
1406
70.0k
}
1407
1408
void IntraPrediction::xGetLMParameters(const CodingUnit& cu, const ComponentID compID,
1409
                                              const CompArea& chromaArea,
1410
                                              int& a, int& b, int& iShift)
1411
184k
{
1412
184k
  CHECK(compID == COMP_Y, "");
1413
1414
184k
  const SizeType cWidth  = chromaArea.width;
1415
184k
  const SizeType cHeight = chromaArea.height;
1416
1417
184k
  const Position posLT = chromaArea;
1418
1419
184k
  CodingStructure & cs = *(cu.cs);
1420
1421
184k
  const SPS &        sps           = *cs.sps;
1422
184k
  const uint32_t     tuWidth     = chromaArea.width;
1423
184k
  const uint32_t     tuHeight    = chromaArea.height;
1424
184k
  const ChromaFormat nChromaFormat = sps.chromaFormatIdc;
1425
1426
184k
  const int unitWidthLog2    = MIN_CU_LOG2 - getComponentScaleX(chromaArea.compID, nChromaFormat);
1427
184k
  const int unitHeightLog2   = MIN_CU_LOG2 - getComponentScaleY(chromaArea.compID, nChromaFormat);
1428
184k
  const int unitWidth    = 1<<unitWidthLog2;
1429
184k
  const int unitHeight   = 1<<unitHeightLog2;
1430
1431
184k
  const int tuWidthInUnits  = tuWidth >> unitWidthLog2;
1432
184k
  const int tuHeightInUnits = tuHeight >> unitHeightLog2;
1433
184k
  const int aboveUnits      = tuWidthInUnits;
1434
184k
  const int leftUnits       = tuHeightInUnits;
1435
184k
  int topTemplateSampNum = 2 * cWidth; // for MDLM, the template sample number is 2W or 2H;
1436
184k
  int leftTemplateSampNum = 2 * cHeight;
1437
184k
  int totalAboveUnits = (topTemplateSampNum + (unitWidth - 1)) >> unitWidthLog2;
1438
184k
  int totalLeftUnits = (leftTemplateSampNum + (unitHeight - 1)) >> unitHeightLog2;
1439
184k
  int totalUnits = totalLeftUnits + totalAboveUnits + 1;
1440
184k
  int aboveRightUnits = totalAboveUnits - aboveUnits;
1441
184k
  int leftBelowUnits = totalLeftUnits - leftUnits;
1442
184k
  int avaiAboveRightUnits = 0;
1443
184k
  int avaiLeftBelowUnits = 0;
1444
184k
  int avaiAboveUnits = 0;
1445
184k
  int avaiLeftUnits = 0;
1446
1447
184k
  const int curChromaMode = cu.intraDir[1];
1448
184k
  bool neighborFlags[4 * MAX_NUM_PART_IDXS_IN_CTU_WIDTH + 1];
1449
184k
  memset(neighborFlags, 0, totalUnits);
1450
1451
184k
  bool aboveAvailable, leftAvailable;
1452
1453
184k
  int availableUnit = isAboveAvailable(cu, CH_C, posLT, aboveUnits, unitWidth,
1454
184k
    (neighborFlags + leftUnits + leftBelowUnits + 1));
1455
184k
  aboveAvailable = availableUnit == tuWidthInUnits;
1456
1457
184k
  availableUnit = isLeftAvailable(cu, CH_C, posLT, leftUnits, unitHeight,
1458
184k
    (neighborFlags + leftUnits + leftBelowUnits - 1));
1459
184k
  leftAvailable = availableUnit == tuHeightInUnits;
1460
184k
  if (leftAvailable) // if left is not available, then the below left is not available
1461
60.5k
  {
1462
60.5k
    avaiLeftUnits = tuHeightInUnits;
1463
60.5k
    avaiLeftBelowUnits = isBelowLeftAvailable(cu, CH_C, chromaArea.bottomLeftComp(chromaArea.compID), leftBelowUnits, unitHeight, (neighborFlags + leftBelowUnits - 1));
1464
60.5k
  }
1465
184k
  if (aboveAvailable) // if above is not available, then  the above right is not available.
1466
67.1k
  {
1467
67.1k
    avaiAboveUnits = tuWidthInUnits;
1468
67.1k
    avaiAboveRightUnits = isAboveRightAvailable(cu, CH_C, chromaArea.topRightComp(chromaArea.compID), aboveRightUnits, unitWidth, (neighborFlags + leftUnits + leftBelowUnits + aboveUnits + 1));
1469
67.1k
  }
1470
1471
184k
  const int srcStride = 2 * MAX_TB_SIZEY + 1;
1472
184k
  Pel* srcColor0 = m_pMdlmTemp + srcStride + 1;
1473
1474
184k
  Pel* curChroma0 = getPredictorPtr(compID);
1475
1476
184k
  unsigned internalBitDepth = sps.bitDepths[CH_C];
1477
1478
184k
  int minLuma[2] = {  MAX_INT, 0 };
1479
184k
  int maxLuma[2] = { -MAX_INT, 0 };
1480
1481
184k
  Pel* src = srcColor0 - srcStride;
1482
184k
  int actualTopTemplateSampNum = 0;
1483
184k
  int actualLeftTemplateSampNum = 0;
1484
184k
  if (curChromaMode == MDLM_T_IDX)
1485
73.3k
  {
1486
73.3k
    leftAvailable = 0;
1487
73.3k
    avaiAboveRightUnits = avaiAboveRightUnits > (cHeight>>unitWidthLog2) ?  cHeight>>unitWidthLog2 : avaiAboveRightUnits;
1488
73.3k
    actualTopTemplateSampNum = unitWidth*(avaiAboveUnits + avaiAboveRightUnits);
1489
73.3k
  }
1490
110k
  else if (curChromaMode == MDLM_L_IDX)
1491
73.3k
  {
1492
73.3k
    aboveAvailable = 0;
1493
73.3k
    avaiLeftBelowUnits = avaiLeftBelowUnits > (cWidth>>unitHeightLog2) ? cWidth>>unitHeightLog2 : avaiLeftBelowUnits;
1494
73.3k
    actualLeftTemplateSampNum = unitHeight*(avaiLeftUnits + avaiLeftBelowUnits);
1495
73.3k
  }
1496
37.3k
  else if (curChromaMode == LM_CHROMA_IDX)
1497
37.3k
  {
1498
37.3k
    actualTopTemplateSampNum = cWidth;
1499
37.3k
    actualLeftTemplateSampNum = cHeight;
1500
37.3k
  }
1501
184k
  int startPos[2]; //0:Above, 1: Left
1502
184k
  int pickStep[2];
1503
1504
184k
  int aboveIs4 = leftAvailable  ? 0 : 1;
1505
184k
  int leftIs4 =  aboveAvailable ? 0 : 1;
1506
1507
184k
  startPos[0] = actualTopTemplateSampNum >> (2 + aboveIs4);
1508
184k
  pickStep[0] = std::max(1, actualTopTemplateSampNum >> (1 + aboveIs4));
1509
1510
184k
  startPos[1] = actualLeftTemplateSampNum >> (2 + leftIs4);
1511
184k
  pickStep[1] = std::max(1, actualLeftTemplateSampNum >> (1 + leftIs4));
1512
1513
184k
  Pel selectLumaPix[4] = { 0, 0, 0, 0 };
1514
184k
  Pel selectChromaPix[4] = { 0, 0, 0, 0 };
1515
1516
184k
  int cntT, cntL;
1517
184k
  cntT = cntL = 0;
1518
184k
  int cnt = 0;
1519
184k
  if (aboveAvailable)
1520
36.7k
  {
1521
36.7k
    cntT = std::min(actualTopTemplateSampNum, (1 + aboveIs4) << 1);
1522
36.7k
    src = srcColor0 - srcStride;
1523
36.7k
    const Pel *cur = curChroma0 + 1;
1524
181k
    for (int pos = startPos[0]; cnt < cntT; pos += pickStep[0], cnt++)
1525
144k
    {
1526
144k
      selectLumaPix[cnt] = src[pos];
1527
144k
      selectChromaPix[cnt] = cur[pos];
1528
144k
    }
1529
36.7k
  }
1530
1531
184k
  if (leftAvailable)
1532
32.8k
  {
1533
32.8k
    cntL = std::min(actualLeftTemplateSampNum, ( 1 + leftIs4 ) << 1 );
1534
32.8k
    src = srcColor0 - 1;
1535
32.8k
    const Pel *cur = curChroma0 + m_refBufferStride[compID] + 1;
1536
162k
    for (int pos = startPos[1], cnt = 0; cnt < cntL; pos += pickStep[1], cnt++)
1537
129k
    {
1538
129k
      selectLumaPix[cnt + cntT] = src[pos * srcStride];
1539
129k
      selectChromaPix[cnt + cntT] = cur[pos];
1540
129k
    }
1541
32.8k
  }
1542
184k
  cnt = cntL + cntT;
1543
1544
184k
  if (cnt == 2)
1545
20
  {
1546
20
    selectLumaPix[3] = selectLumaPix[0]; selectChromaPix[3] = selectChromaPix[0];
1547
20
    selectLumaPix[2] = selectLumaPix[1]; selectChromaPix[2] = selectChromaPix[1];
1548
20
    selectLumaPix[0] = selectLumaPix[1]; selectChromaPix[0] = selectChromaPix[1];
1549
20
    selectLumaPix[1] = selectLumaPix[3]; selectChromaPix[1] = selectChromaPix[3];
1550
20
  }
1551
1552
184k
  int minGrpIdx[2] = { 0, 2 };
1553
184k
  int maxGrpIdx[2] = { 1, 3 };
1554
184k
  int *tmpMinGrp = minGrpIdx;
1555
184k
  int *tmpMaxGrp = maxGrpIdx;
1556
184k
  if (selectLumaPix[tmpMinGrp[0]] > selectLumaPix[tmpMinGrp[1]]) std::swap(tmpMinGrp[0], tmpMinGrp[1]);
1557
184k
  if (selectLumaPix[tmpMaxGrp[0]] > selectLumaPix[tmpMaxGrp[1]]) std::swap(tmpMaxGrp[0], tmpMaxGrp[1]);
1558
184k
  if (selectLumaPix[tmpMinGrp[0]] > selectLumaPix[tmpMaxGrp[1]]) std::swap(tmpMinGrp, tmpMaxGrp);
1559
184k
  if (selectLumaPix[tmpMinGrp[1]] > selectLumaPix[tmpMaxGrp[0]]) std::swap(tmpMinGrp[1], tmpMaxGrp[0]);
1560
1561
184k
  minLuma[0] = (selectLumaPix[tmpMinGrp[0]] + selectLumaPix[tmpMinGrp[1]] + 1 )>>1;
1562
184k
  minLuma[1] = (selectChromaPix[tmpMinGrp[0]] + selectChromaPix[tmpMinGrp[1]] + 1) >> 1;
1563
184k
  maxLuma[0] = (selectLumaPix[tmpMaxGrp[0]] + selectLumaPix[tmpMaxGrp[1]] + 1 )>>1;
1564
184k
  maxLuma[1] = (selectChromaPix[tmpMaxGrp[0]] + selectChromaPix[tmpMaxGrp[1]] + 1) >> 1;
1565
1566
184k
  if (leftAvailable || aboveAvailable)
1567
68.5k
  {
1568
68.5k
    int diff = maxLuma[0] - minLuma[0];
1569
68.5k
    if (diff > 0)
1570
476
    {
1571
476
      int diffC = maxLuma[1] - minLuma[1];
1572
476
      int x = floorLog2( diff );
1573
476
      static const uint8_t DivSigTable[1 << 4] = {
1574
        // 4bit significands - 8 ( MSB is omitted )
1575
476
        0,  7,  6,  5,  5,  4,  4,  3,  3,  2,  2,  1,  1,  1,  1,  0
1576
476
      };
1577
476
      int normDiff = (diff << 4 >> x) & 15;
1578
476
      int v = DivSigTable[normDiff] | 8;
1579
476
      x += normDiff != 0;
1580
1581
476
      int y = diffC == 0 ? 0 : floorLog2( abs( diffC ) ) + 1;
1582
476
      int add = 1 << y >> 1;
1583
476
      a = (diffC * v + add) >> y;
1584
476
      iShift = 3 + x - y;
1585
476
      if ( iShift < 1 )
1586
0
      {
1587
0
        iShift = 1;
1588
0
        a = ( (a == 0)? 0: (a < 0)? -15 : 15 );   // a=Sign(a)*15
1589
0
      }
1590
476
      b = minLuma[1] - ((a * minLuma[0]) >> iShift);
1591
476
    }
1592
68.0k
    else
1593
68.0k
    {
1594
68.0k
      a = 0;
1595
68.0k
      b = minLuma[1];
1596
68.0k
      iShift = 0;
1597
68.0k
    }
1598
68.5k
  }
1599
115k
  else
1600
115k
  {
1601
115k
    a = 0;
1602
115k
    b = 1 << (internalBitDepth - 1);
1603
115k
    iShift = 0;
1604
115k
  }
1605
184k
}
1606
1607
void IntraPrediction::initIntraMip( const CodingUnit& cu )
1608
38.6k
{
1609
38.6k
  CHECK( cu.lwidth() > cu.cs->sps->getMaxTbSize() || cu.lheight() > cu.cs->sps->getMaxTbSize(), "Error: block size not supported for MIP" );
1610
1611
  // prepare input (boundary) data for prediction
1612
38.6k
  CHECK(m_ipaParam.refFilterFlag, "ERROR: unfiltered refs expected for MIP");
1613
38.6k
  Pel *ptrSrc = getPredictorPtr(COMP_Y);
1614
38.6k
  const int srcStride  = m_refBufferStride[COMP_Y];
1615
38.6k
  const int srcHStride = 2;
1616
1617
38.6k
  m_matrixIntraPred.prepareInputForPred(CPelBuf(ptrSrc, srcStride, srcHStride), cu.Y(), cu.slice->sps->bitDepths[CH_L]);
1618
38.6k
}
1619
1620
void IntraPrediction::predIntraMip( PelBuf &piPred, const CodingUnit& cu )
1621
231k
{
1622
231k
  CHECK( cu.lwidth() > cu.cs->sps->getMaxTbSize() || cu.lheight() > cu.cs->sps->getMaxTbSize(), "Error: block size not supported for MIP" );
1623
231k
  CHECK( cu.lwidth() != (1 << floorLog2(cu.lwidth())) || cu.lheight() != (1 << floorLog2(cu.lheight())), "Error: expecting blocks of size 2^M x 2^N" );
1624
1625
  // generate mode-specific prediction
1626
231k
  const int bitDepth = cu.slice->sps->bitDepths[CH_L];
1627
1628
231k
  CHECK( cu.lwidth() != piPred.stride, " no support yet" );
1629
 
1630
231k
  m_matrixIntraPred.predBlock(piPred.buf, cu.intraDir[CH_L], cu.mipTransposedFlag, bitDepth);
1631
231k
}
1632
1633
void IntraPrediction::initIntraPatternChTypeISP(const CodingUnit& cu, const CompArea& area, PelBuf& recBuf,
1634
  const bool forceRefFilterFlag)
1635
18.1k
{
1636
18.1k
  const CodingStructure& cs = *cu.cs;
1637
1638
18.1k
  if (!forceRefFilterFlag)
1639
18.1k
  {
1640
18.1k
    initPredIntraParams(cu, area, *cs.sps);
1641
18.1k
  }
1642
1643
18.1k
  const Position posLT = area;
1644
18.1k
  bool           isLeftAvail =
1645
18.1k
    (cs.getCURestricted(posLT.offset(-1, 0), cu, CH_L) != NULL);
1646
18.1k
  bool isAboveAvail =
1647
18.1k
    (cs.getCURestricted(posLT.offset(0, -1), cu, CH_L) != NULL);
1648
  // ----- Step 1: unfiltered reference samples -----
1649
18.1k
  if (cu.blocks[area.compID].x == area.x && cu.blocks[area.compID].y == area.y)
1650
13.9k
  {
1651
13.9k
    Pel* refBufUnfiltered = m_refBuffer[area.compID][PRED_BUF_UNFILTERED];
1652
    // With the first subpartition all the CU reference samples are fetched at once in a single call to
1653
    // xFillReferenceSamples
1654
13.9k
    if (cu.ispMode == HOR_INTRA_SUBPARTITIONS)
1655
6.69k
    {
1656
6.69k
      m_leftRefLength = cu.Y().height << 1;
1657
6.69k
      m_topRefLength = cu.Y().width + area.width;
1658
6.69k
    }
1659
7.28k
    else   // if (cu.ispMode == VER_INTRA_SUBPARTITIONS)
1660
7.28k
    {
1661
7.28k
      m_leftRefLength = cu.Y().height + area.height;
1662
7.28k
      m_topRefLength = cu.Y().width << 1;
1663
7.28k
    }
1664
1665
13.9k
    xFillReferenceSamples(cs.picture->getRecoBuf(cu.Y()), refBufUnfiltered, cu.Y(), cu);
1666
1667
    // After having retrieved all the CU reference samples, the number of reference samples is now adjusted for the
1668
    // current subpartition
1669
13.9k
    m_topRefLength = cu.blocks[area.compID].width + area.width;
1670
13.9k
    m_leftRefLength = cu.blocks[area.compID].height + area.height;
1671
13.9k
  }
1672
4.15k
  else
1673
4.15k
  {
1674
4.15k
    m_topRefLength = cu.blocks[area.compID].width + area.width;
1675
4.15k
    m_leftRefLength = cu.blocks[area.compID].height + area.height;
1676
1677
4.15k
    const int predSizeHor = m_topRefLength;
1678
4.15k
    const int predSizeVer = m_leftRefLength;
1679
4.15k
    if (cu.ispMode == HOR_INTRA_SUBPARTITIONS)
1680
3.19k
    {
1681
3.19k
      Pel* src = recBuf.bufAt(0, -1);
1682
3.19k
      Pel* ref = m_refBuffer[area.compID][PRED_BUF_UNFILTERED] + m_refBufferStride[area.compID];
1683
3.19k
      if (isLeftAvail)
1684
0
      {
1685
0
        for (int i = 0; i <= 2 * cu.blocks[area.compID].height - area.height; i++)
1686
0
        {
1687
0
          ref[i] = ref[i + area.height];
1688
0
        }
1689
0
      }
1690
3.19k
      else
1691
3.19k
      {
1692
66.0k
        for (int i = 0; i <= predSizeVer; i++)
1693
62.8k
        {
1694
62.8k
          ref[i] = src[0];
1695
62.8k
        }
1696
3.19k
      }
1697
3.19k
      Pel* dst = m_refBuffer[area.compID][PRED_BUF_UNFILTERED] + 1;
1698
3.19k
      dst[-1] = ref[0];
1699
66.0k
      for (int i = 0; i < area.width; i++)
1700
62.8k
      {
1701
62.8k
        dst[i] = src[i];
1702
62.8k
      }
1703
3.19k
      Pel sample = src[area.width - 1];
1704
3.19k
      dst += area.width;
1705
66.0k
      for (int i = 0; i < predSizeHor - area.width; i++)
1706
62.8k
      {
1707
62.8k
        dst[i] = sample;
1708
62.8k
      }
1709
3.19k
    }
1710
957
    else
1711
957
    {
1712
957
      Pel* src = recBuf.bufAt(-1, 0);
1713
957
      Pel* ref = m_refBuffer[area.compID][PRED_BUF_UNFILTERED];
1714
957
      if (isAboveAvail)
1715
0
      {
1716
0
        for (int i = 0; i <= 2 * cu.blocks[area.compID].width - area.width; i++)
1717
0
        {
1718
0
          ref[i] = ref[i + area.width];
1719
0
        }
1720
0
      }
1721
957
      else
1722
957
      {
1723
21.0k
        for (int i = 0; i <= predSizeHor; i++)
1724
20.0k
        {
1725
20.0k
          ref[i] = src[0];
1726
20.0k
        }
1727
957
      }
1728
957
      Pel* dst = m_refBuffer[area.compID][PRED_BUF_UNFILTERED] + m_refBufferStride[area.compID] + 1;
1729
957
      dst[-1] = ref[0];
1730
21.3k
      for (int i = 0; i < area.height; i++)
1731
20.3k
      {
1732
20.3k
        *dst = *src;
1733
20.3k
        src += recBuf.stride;
1734
20.3k
        dst++;
1735
20.3k
      }
1736
957
      Pel sample = src[-recBuf.stride];
1737
21.3k
      for (int i = 0; i < predSizeVer - area.height; i++)
1738
20.3k
      {
1739
20.3k
        *dst = sample;
1740
20.3k
        dst++;
1741
20.3k
      }
1742
957
    }
1743
4.15k
  }
1744
  // ----- Step 2: filtered reference samples -----
1745
18.1k
  if (m_ipaParam.refFilterFlag || forceRefFilterFlag)
1746
0
  {
1747
0
    Pel* refBufUnfiltered = m_refBuffer[area.compID][PRED_BUF_UNFILTERED];
1748
0
    Pel* refBufFiltered = m_refBuffer[area.compID][PRED_BUF_FILTERED];
1749
0
    xFilterReferenceSamples(refBufUnfiltered, refBufFiltered, area, *cs.sps, cu.multiRefIdx);
1750
0
  }
1751
18.1k
}
1752
1753
void IntraPrediction::setReferenceArrayLengths(const CompArea& area)
1754
721k
{
1755
  // set Top and Left reference samples length
1756
721k
  const int width = area.width;
1757
721k
  const int height = area.height;
1758
1759
721k
  m_leftRefLength = (height << 1);
1760
721k
  m_topRefLength = (width << 1);
1761
721k
}
1762
1763
} // namespace vvenc
1764
1765
//! \}
1766