Coverage Report

Created: 2026-05-30 06:10

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/vvenc/source/Lib/CommonLib/IntraPrediction.cpp
Line
Count
Source
1
/* -----------------------------------------------------------------------------
2
The copyright in this software is being made available under the Clear BSD
3
License, included below. No patent rights, trademark rights and/or 
4
other Intellectual Property Rights other than the copyrights concerning 
5
the Software are granted under this license.
6
7
The Clear BSD License
8
9
Copyright (c) 2019-2026, Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. & The VVenC Authors.
10
All rights reserved.
11
12
Redistribution and use in source and binary forms, with or without modification,
13
are permitted (subject to the limitations in the disclaimer below) provided that
14
the following conditions are met:
15
16
     * Redistributions of source code must retain the above copyright notice,
17
     this list of conditions and the following disclaimer.
18
19
     * Redistributions in binary form must reproduce the above copyright
20
     notice, this list of conditions and the following disclaimer in the
21
     documentation and/or other materials provided with the distribution.
22
23
     * Neither the name of the copyright holder nor the names of its
24
     contributors may be used to endorse or promote products derived from this
25
     software without specific prior written permission.
26
27
NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
28
THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
29
CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
31
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
32
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
33
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
34
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
35
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
36
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38
POSSIBILITY OF SUCH DAMAGE.
39
40
41
------------------------------------------------------------------------------------------- */
42
43
44
/** \file     Prediction.cpp
45
    \brief    prediction class
46
*/
47
48
#include "IntraPrediction.h"
49
#include "Unit.h"
50
#include "UnitTools.h"
51
#include "Rom.h"
52
#include "InterpolationFilter.h"
53
#include "dtrace_next.h"
54
55
#include <memory.h>
56
57
//! \ingroup CommonLib
58
//! \{
59
60
namespace vvenc {
61
62
// ====================================================================================================================
63
// Tables
64
// ====================================================================================================================
65
66
const uint8_t IntraPrediction::m_aucIntraFilter[MAX_INTRA_FILTER_DEPTHS] =
67
{
68
  24, //   1xn
69
  24, //   2xn
70
  24, //   4xn
71
  14, //   8xn
72
  2,  //  16xn
73
  0,  //  32xn
74
  0,  //  64xn
75
  0   // 128xn
76
};
77
78
//NOTE: Bit-Limit - 24-bit source
79
void xPredIntraPlanar_Core( PelBuf& pDst, const CPelBuf& pSrc )
80
129k
{
81
129k
  const uint32_t width  = pDst.width;
82
129k
  const uint32_t height = pDst.height;
83
129k
  const uint32_t log2W  = Log2(width);
84
129k
  const uint32_t log2H  = Log2(height);
85
86
129k
  int leftColumn[MAX_TB_SIZEY + 1], topRow[MAX_TB_SIZEY + 1], bottomRow[MAX_TB_SIZEY], rightColumn[MAX_TB_SIZEY];
87
129k
  const uint32_t offset = 1 << (log2W + log2H);
88
89
  // Get left and above reference column and row
90
2.65M
  for( int k = 0; k < width + 1; k++ )
91
2.52M
  {
92
2.52M
    topRow[k] = pSrc.at( k + 1, 0 );
93
2.52M
  }
94
95
2.63M
  for( int k = 0; k < height + 1; k++ )
96
2.50M
  {
97
2.50M
    leftColumn[k] = pSrc.at( k + 1, 1 );
98
2.50M
  }
99
100
  // Prepare intermediate variables used in interpolation
101
129k
  int bottomLeft = leftColumn[height];
102
129k
  int topRight = topRow[width];
103
104
  // with some optimizations gcc-8 gives spurious "-Wmaybe-uninitialized" warnings here (says leftColumn or topRow would be uninitialized here)
105
129k
  GCC_WARNING_DISABLE_maybe_uninitialized
106
2.52M
  for( int k = 0; k < width; k++ )
107
2.39M
  {
108
2.39M
    bottomRow[k] = bottomLeft - topRow[k];
109
2.39M
    topRow[k]    = topRow[k] << log2H;
110
2.39M
  }
111
112
2.50M
  for( int k = 0; k < height; k++ )
113
2.37M
  {
114
2.37M
    rightColumn[k] = topRight - leftColumn[k];
115
2.37M
    leftColumn[k]  = leftColumn[k] << log2W;
116
2.37M
  }
117
129k
  GCC_WARNING_RESET
118
119
129k
  const uint32_t finalShift = 1 + log2W + log2H;
120
129k
  const uint32_t stride     = pDst.stride;
121
129k
  Pel*       pred       = pDst.buf;
122
2.50M
  for( int y = 0; y < height; y++, pred += stride )
123
2.37M
  {
124
2.37M
    int horPred = leftColumn[y];
125
126
56.9M
    for( int x = 0; x < width; x++ )
127
54.6M
    {
128
54.6M
      horPred += rightColumn[y];
129
54.6M
      topRow[x] += bottomRow[x];
130
131
54.6M
      int vertPred = topRow[x];
132
54.6M
      pred[x]      = ( ( horPred << log2H ) + ( vertPred << log2W ) + offset ) >> finalShift;
133
54.6M
    }
134
2.37M
  }
135
129k
}
136
137
void  IntraPredSampleFilter_Core(PelBuf& dstBuf, const CPelBuf& pSrc)
138
385k
{
139
385k
  const int iWidth  = dstBuf.width;
140
385k
  const int iHeight = dstBuf.height;
141
142
385k
  const int scale = ((Log2(iWidth*iHeight) - 2) >> 2);
143
385k
  CHECK(scale < 0 || scale > 31, "PDPC: scale < 0 || scale > 31");
144
145
6.98M
  for (int y = 0; y < iHeight; y++)
146
6.60M
  {
147
6.60M
    const int wT   = 32 >> std::min(31, ((y << 1) >> scale));
148
6.60M
    const Pel left = pSrc.at(y + 1, 1);
149
147M
    for (int x = 0; x < iWidth; x++)
150
141M
    {
151
141M
      const int wL    = 32 >> std::min(31, ((x << 1) >> scale));
152
141M
      const Pel top   = pSrc.at(x + 1, 0);
153
141M
      const Pel val   = dstBuf.at(x, y);
154
141M
      dstBuf.at(x, y) = val + ((wL * (left - val) + wT * (top - val) + 32) >> 6);
155
141M
    }
156
6.60M
  }
157
385k
}
158
159
void IntraHorVerPDPC_Core(Pel* pDsty,const int dstStride,Pel* refSide,const int width,const int height,int scale,const Pel* refMain, const ClpRng& clpRng)
160
420k
{
161
420k
  const Pel topLeft = refMain[0];
162
163
6.74M
  for( int y = 0; y < height; y++ )
164
6.32M
  {
165
6.32M
    memcpy(pDsty,&refMain[1],width*sizeof(Pel));
166
6.32M
    const Pel left    = refSide[1 + y];
167
49.2M
    for (int x = 0; x < std::min(3 << scale, width); x++)
168
42.9M
    {
169
42.9M
      const int wL  = 32 >> (2 * x >> scale);
170
42.9M
      const Pel val = pDsty[x];
171
42.9M
      pDsty[x]      = ClipPel(val + ((wL * (left - topLeft) + 32) >> 6), clpRng);
172
42.9M
    }
173
6.32M
    pDsty += dstStride;
174
6.32M
  }
175
420k
}
176
void IntraAnglePDPC_Core(Pel* pDsty,const int dstStride,Pel* refSide,const int width,const int height,int scale,int invAngle)
177
459k
{
178
10.3M
  for (int y = 0; y<height; y++, pDsty += dstStride)
179
9.90M
  {
180
9.90M
    int       invAngleSum = 256;
181
106M
    for (int x = 0; x < std::min(3 << scale, width); x++)
182
96.1M
    {
183
96.1M
      invAngleSum += invAngle;
184
96.1M
      int wL   = 32 >> (2 * x >> scale);
185
96.1M
      Pel left = refSide[y + (invAngleSum >> 9) + 1];
186
96.1M
      pDsty[x] = pDsty[x] + ((wL * (left - pDsty[x]) + 32) >> 6);
187
96.1M
    }
188
9.90M
  }
189
459k
}
190
191
void IntraPredAngleLuma_Core(Pel* pDstBuf,const ptrdiff_t dstStride,Pel* refMain,int width,int height,int deltaPos,int intraPredAngle,const TFilterCoeff *ff_unused,const bool useCubicFilter,const ClpRng& clpRng)
192
595k
{
193
17.5M
  for (int y = 0; y<height; y++ )
194
16.9M
  {
195
16.9M
    const int deltaInt   = deltaPos >> 5;
196
16.9M
    const int deltaFract = deltaPos & ( 32 - 1 );
197
198
16.9M
    const TFilterCoeff      intraSmoothingFilter[4] = {TFilterCoeff(16 - (deltaFract >> 1)), TFilterCoeff(32 - (deltaFract >> 1)), TFilterCoeff(16 + (deltaFract >> 1)), TFilterCoeff(deltaFract >> 1)};
199
16.9M
    const TFilterCoeff *f = useCubicFilter ? InterpolationFilter::getChromaFilterTable(deltaFract) : intraSmoothingFilter;
200
201
16.9M
    Pel p[4];
202
203
16.9M
    int refMainIndex = deltaInt + 1;
204
205
 //   const TFilterCoeff *f = &ff[deltaFract << 2];
206
207
695M
    for( int x = 0; x < width; x++, refMainIndex++ )
208
678M
    {
209
678M
      p[0] = refMain[refMainIndex - 1];
210
678M
      p[1] = refMain[refMainIndex    ];
211
678M
      p[2] = refMain[refMainIndex + 1];
212
678M
      p[3] = refMain[refMainIndex + 2];
213
214
678M
      pDstBuf[y*dstStride + x] = static_cast<Pel>((static_cast<int>(f[0] * p[0]) + static_cast<int>(f[1] * p[1]) + static_cast<int>(f[2] * p[2]) + static_cast<int>(f[3] * p[3]) + 32) >> 6);
215
216
678M
      if( useCubicFilter ) // only cubic filter has negative coefficients and requires clipping
217
91.2M
      {
218
91.2M
        pDstBuf[y*dstStride + x] = ClipPel( pDstBuf[y*dstStride + x], clpRng );
219
91.2M
      }
220
678M
    }
221
16.9M
    deltaPos += intraPredAngle;
222
16.9M
  }
223
595k
}
224
225
void IntraPredAngleChroma_Core(Pel* pDstBuf,const ptrdiff_t dstStride,int16_t* pBorder,int width,int height,int deltaPos,int intraPredAngle)
226
61.3k
{
227
518k
  for (int y = 0; y<height; y++)
228
457k
  {
229
457k
    const int deltaInt   = deltaPos >> 5;
230
457k
    const int deltaFract = deltaPos & (32 - 1);
231
232
    // Do linear filtering
233
457k
    const Pel* pRM = pBorder + deltaInt + 1;
234
457k
    int lastRefMainPel = *pRM++;
235
236
12.1M
    for( int x = 0; x < width; pRM++, x++ )
237
11.6M
    {
238
11.6M
      int thisRefMainPel = *pRM;
239
11.6M
      pDstBuf[x + 0] = ( Pel ) ( ( ( 32 - deltaFract )*lastRefMainPel + deltaFract*thisRefMainPel + 16 ) >> 5 );
240
11.6M
      lastRefMainPel = thisRefMainPel;
241
11.6M
    }
242
457k
    deltaPos += intraPredAngle;
243
457k
    pDstBuf += dstStride;
244
457k
  }
245
61.3k
}
246
247
// ====================================================================================================================
248
// Constructor / destructor / initialize
249
// ====================================================================================================================
250
251
IntraPrediction::IntraPrediction( bool enableOpt )
252
20.7k
:  m_pMdlmTemp( nullptr )
253
20.7k
,  m_currChromaFormat( NUM_CHROMA_FORMAT )
254
20.7k
{
255
20.7k
  IntraPredAngleLuma    = IntraPredAngleLuma_Core;
256
20.7k
  IntraPredAngleChroma  = IntraPredAngleChroma_Core;
257
20.7k
  IntraAnglePDPC        = IntraAnglePDPC_Core;
258
20.7k
  IntraHorVerPDPC       = IntraHorVerPDPC_Core;
259
20.7k
  IntraPredSampleFilter = IntraPredSampleFilter_Core;
260
20.7k
  xPredIntraPlanar      = xPredIntraPlanar_Core;
261
262
20.7k
#if ENABLE_SIMD_OPT_INTRAPRED
263
20.7k
  if( enableOpt )
264
20.7k
  {
265
#if defined( TARGET_SIMD_X86 )
266
    initIntraPredictionX86();
267
#endif
268
#if defined( TARGET_SIMD_ARM )
269
    initIntraPredictionARM();
270
#endif
271
20.7k
  }
272
20.7k
#endif // ENABLE_SIMD_OPT_INTRAPRED
273
20.7k
}
274
275
IntraPrediction::~IntraPrediction()
276
20.7k
{
277
20.7k
  destroy();
278
20.7k
}
279
280
void IntraPrediction::destroy()
281
20.7k
{
282
20.7k
  delete[] m_pMdlmTemp;
283
20.7k
  m_pMdlmTemp = nullptr;
284
20.7k
}
285
286
void IntraPrediction::init(ChromaFormat chromaFormatIDC, const unsigned bitDepthY)
287
20.7k
{
288
20.7k
  m_currChromaFormat = chromaFormatIDC;
289
290
20.7k
  if (m_pMdlmTemp == nullptr)
291
20.7k
  {
292
20.7k
    m_pMdlmTemp = new Pel[(2 * MAX_TB_SIZEY + 1)*(2 * MAX_TB_SIZEY + 1)];//MDLM will use top-above and left-below samples.
293
20.7k
  }
294
20.7k
}
295
296
// ====================================================================================================================
297
// Public member functions
298
// ====================================================================================================================
299
300
// Function for calculating DC value of the reference samples used in Intra prediction
301
//NOTE: Bit-Limit - 25-bit source
302
Pel IntraPrediction::xGetPredValDc( const CPelBuf& pSrc, const Size& dstSize )
303
301k
{
304
301k
  CHECK( dstSize.width == 0 || dstSize.height == 0, "Empty area provided" );
305
306
301k
  int idx, sum = 0;
307
301k
  Pel dcVal;
308
301k
  const int width  = dstSize.width;
309
301k
  const int height = dstSize.height;
310
301k
  const auto denom     = (width == height) ? (width << 1) : std::max(width,height);
311
301k
  const auto divShift  = Log2(denom);
312
301k
  const auto divOffset = (denom >> 1);
313
301k
  const int off = m_ipaParam.multiRefIndex + 1;
314
315
316
301k
  if ( width >= height )
317
213k
  {
318
4.71M
    for( idx = 0; idx < width; idx++ )
319
4.50M
    {
320
4.50M
      sum += pSrc.at( off + idx, 0);
321
4.50M
    }
322
213k
  }
323
301k
  if ( width <= height )
324
203k
  {
325
4.63M
    for( idx = 0; idx < height; idx++ )
326
4.43M
    {
327
4.43M
      sum += pSrc.at( off + idx, 1);
328
4.43M
    }
329
203k
  }
330
331
301k
  dcVal = (sum + divOffset) >> divShift;
332
301k
  return dcVal;
333
301k
}
334
335
int IntraPrediction::getWideAngle( int width, int height, int predMode )
336
2.06M
{
337
2.06M
  if ( predMode > DC_IDX && predMode <= VDIA_IDX )
338
1.42M
  {
339
1.42M
    int modeShift[] = { 0, 6, 10, 12, 14, 15 };
340
1.42M
    int deltaSize = abs(Log2(width) - Log2(height));
341
1.42M
    if (width > height && predMode < 2 + modeShift[deltaSize])
342
41.0k
    {
343
41.0k
      predMode += (VDIA_IDX - 1);
344
41.0k
    }
345
1.38M
    else if (height > width && predMode > VDIA_IDX - modeShift[deltaSize])
346
72.2k
    {
347
72.2k
      predMode -= (VDIA_IDX - 1);
348
72.2k
    }
349
1.42M
  }
350
2.06M
  return predMode;
351
2.06M
}
352
353
void IntraPrediction::predIntraAng( const ComponentID compId, PelBuf& piPred, const CodingUnit& cu)
354
1.83M
{
355
1.83M
  const ComponentID    compID       = compId;
356
1.83M
  const ChannelType    channelType  = toChannelType( compID );
357
1.83M
  const uint32_t       uiDirMode = cu.bdpcmM[channelType] ? BDPCM_IDX : CU::getFinalIntraMode(cu, channelType);
358
359
1.83M
  CHECK( Log2(piPred.width) > 7, "Size not allowed" );
360
361
//  const int multiRefIdx = m_ipaParam.multiRefIndex;
362
1.83M
  const int srcStride  = m_refBufferStride[compID];
363
1.83M
  const int srcHStride = 2;
364
365
1.83M
  const CPelBuf& srcBuf = CPelBuf(getPredictorPtr(compID), srcStride, srcHStride);
366
1.83M
  const ClpRng& clpRng(cu.cs->slice->clpRngs[compID]);
367
368
1.83M
  switch (uiDirMode)
369
1.83M
  {
370
129k
    case(PLANAR_IDX): xPredIntraPlanar(piPred, srcBuf); break;
371
301k
    case(DC_IDX):     xPredIntraDc    ( piPred, srcBuf ); break;
372
88.4k
    case(BDPCM_IDX):  xPredIntraBDPCM ( piPred, srcBuf, cu.bdpcmM[channelType], clpRng); break;
373
1.31M
    default:          xPredIntraAng   ( piPred, srcBuf, channelType, clpRng); break;
374
1.83M
  }
375
376
1.83M
  if (m_ipaParam.applyPDPC)
377
1.34M
  {
378
1.34M
    if (uiDirMode == PLANAR_IDX || uiDirMode == DC_IDX)
379
385k
    {
380
385k
      IntraPredSampleFilter(piPred, srcBuf);
381
385k
    }
382
1.34M
  }
383
1.83M
}
384
385
void IntraPrediction::predIntraChromaLM(const ComponentID compID, PelBuf& piPred, const CodingUnit& cu, const CompArea& chromaArea, int intraDir)
386
217k
{
387
217k
  CHECK( piPred.width > MAX_TB_SIZEY || piPred.height > MAX_TB_SIZEY, "not enough memory");
388
217k
  const int iLumaStride = 2 * MAX_TB_SIZEY + 1;
389
217k
  PelBuf Temp = PelBuf(m_pMdlmTemp + iLumaStride + 1, iLumaStride, Size(chromaArea));
390
391
217k
  int a, b, iShift;
392
217k
  xGetLMParameters(cu, compID, chromaArea, a, b, iShift); // th shift result is unsigned
393
394
  ////// final prediction
395
217k
  piPred.copyFrom(Temp);
396
217k
  piPred.linearTransform(a, iShift, b, true, cu.cs->slice->clpRngs[compID]);
397
217k
}
398
399
/** Function for deriving planar intra prediction. This function derives the prediction samples for planar mode (intra coding).
400
 */
401
402
void IntraPrediction::xPredIntraDc( PelBuf& pDst, const CPelBuf& pSrc )
403
301k
{
404
301k
  const Pel dcval = xGetPredValDc( pSrc, pDst );
405
301k
  pDst.fill( dcval );
406
301k
}
407
408
// Function for initialization of intra prediction parameters
409
void IntraPrediction::initPredIntraParams(const CodingUnit& cu, const CompArea area, const SPS& sps)
410
2.06M
{
411
2.06M
  const ComponentID compId = area.compID;
412
2.06M
  const ChannelType chType = toChannelType(compId);
413
414
2.06M
  const bool        useISP = NOT_INTRA_SUBPARTITIONS != cu.ispMode && isLuma( chType );
415
416
2.06M
  const Size   cuSize    = Size( cu.blocks[compId].width, cu.blocks[compId].height );
417
2.06M
  const Size   puSize    = Size( area.width, area.height );
418
2.06M
  const Size&  blockSize = useISP ? cuSize : puSize;
419
2.06M
  const int      dirMode = CU::getFinalIntraMode(cu, chType);
420
2.06M
  const int     predMode = getWideAngle( blockSize.width, blockSize.height, dirMode );
421
422
2.06M
  m_ipaParam.isModeVer            = predMode >= DIA_IDX;
423
2.06M
  m_ipaParam.multiRefIndex        = isLuma (chType) ? cu.multiRefIdx : 0 ;
424
2.06M
  m_ipaParam.refFilterFlag        = false;
425
2.06M
  m_ipaParam.interpolationFlag    = false;
426
2.06M
  m_ipaParam.applyPDPC            = (puSize.width >= MIN_TB_SIZEY && puSize.height >= MIN_TB_SIZEY) && m_ipaParam.multiRefIndex == 0;
427
428
2.06M
  const int    intraPredAngleMode = (m_ipaParam.isModeVer) ? predMode - VER_IDX : -(predMode - HOR_IDX);
429
430
431
2.06M
  int absAng = 0;
432
2.06M
  if (dirMode > DC_IDX && dirMode < NUM_LUMA_MODE) // intraPredAngle for directional modes
433
1.42M
  {
434
1.42M
    static const int angTable[32]    = { 0,    1,    2,    3,    4,    6,     8,   10,   12,   14,   16,   18,   20,   23,   26,   29,   32,   35,   39,  45,  51,  57,  64,  73,  86, 102, 128, 171, 256, 341, 512, 1024 };
435
1.42M
    static const int invAngTable[32] = {
436
1.42M
      0,   16384, 8192, 5461, 4096, 2731, 2048, 1638, 1365, 1170, 1024, 910, 819, 712, 630, 565,
437
1.42M
      512, 468,   420,  364,  321,  287,  256,  224,  191,  161,  128,  96,  64,  48,  32,  16
438
1.42M
    };   // (512 * 32) / Angle
439
440
1.42M
    const int     absAngMode         = abs(intraPredAngleMode);
441
1.42M
    const int     signAng            = intraPredAngleMode < 0 ? -1 : 1;
442
1.42M
                  absAng             = angTable  [absAngMode];
443
444
1.42M
    m_ipaParam.absInvAngle           = invAngTable[absAngMode];
445
1.42M
    m_ipaParam.intraPredAngle        = signAng * absAng;
446
1.42M
    if (intraPredAngleMode < 0)
447
256k
    {
448
256k
      m_ipaParam.applyPDPC = false;
449
256k
    }
450
1.17M
    else if (intraPredAngleMode > 0)
451
579k
    {
452
579k
      const int sideSize = m_ipaParam.isModeVer ? puSize.height : puSize.width;
453
579k
      const int maxScale = 2;
454
455
579k
      m_ipaParam.angularScale = std::min(maxScale, floorLog2(sideSize) - (floorLog2(3 * m_ipaParam.absInvAngle - 2) - 8));
456
579k
      m_ipaParam.applyPDPC &= m_ipaParam.angularScale >= 0;
457
579k
    }
458
1.42M
  }
459
460
  // high level conditions and DC intra prediction
461
2.06M
  if( !isLuma( chType )
462
854k
    || useISP
463
833k
    || CU::isMIP( cu, chType ) //th remove this
464
783k
    || m_ipaParam.multiRefIndex
465
617k
    || DC_IDX == dirMode
466
2.06M
    )
467
1.47M
  {
468
1.47M
  }
469
587k
  else if (cu.bdpcmM[chType])
470
7.68k
  {
471
7.68k
    m_ipaParam.refFilterFlag = false;
472
7.68k
  }
473
579k
  else if (dirMode == PLANAR_IDX) // Planar intra prediction
474
31.2k
  {
475
31.2k
    m_ipaParam.refFilterFlag = puSize.width * puSize.height > 32 ? true : false;
476
31.2k
  }
477
548k
  else if (!useISP)// HOR, VER and angular modes (MDIS)
478
548k
  {
479
548k
    bool filterFlag = false;
480
548k
    {
481
548k
      const int diff = std::min<int>( abs( predMode - HOR_IDX ), abs( predMode - VER_IDX ) );
482
548k
      const int log2Size = (Log2(puSize.width * puSize.height) >> 1);
483
548k
      CHECK( log2Size >= MAX_INTRA_FILTER_DEPTHS, "Size not supported" );
484
548k
      filterFlag = (diff > m_aucIntraFilter[log2Size]);
485
548k
    }
486
487
    // Selelection of either ([1 2 1] / 4 ) refrence filter OR Gaussian 4-tap interpolation filter
488
548k
    if (filterFlag)
489
443k
    {
490
443k
      const bool isRefFilter       =  isIntegerSlope(absAng);
491
443k
      CHECK( puSize.width * puSize.height <= 32, "DCT-IF interpolation filter is always used for 4x4, 4x8, and 8x4 luma CB" );
492
443k
      m_ipaParam.refFilterFlag     =  isRefFilter;
493
443k
      m_ipaParam.interpolationFlag = !isRefFilter;
494
443k
    }
495
548k
  }
496
2.06M
}
497
498
}   // namespace vvenc
499
500
#ifdef TARGET_SIMD_X86
501
#include "x86/CommonDefX86.h"
502
#endif
503
504
namespace vvenc {
505
506
/** Function for deriving the simplified angular intra predictions.
507
*
508
* This function derives the prediction samples for the angular mode based on the prediction direction indicated by
509
* the prediction mode index. The prediction direction is given by the displacement of the bottom row of the block and
510
* the reference row above the block in the case of vertical prediction or displacement of the rightmost column
511
* of the block and reference column left from the block in the case of the horizontal prediction. The displacement
512
* is signalled at 1/32 pixel accuracy. When projection of the predicted pixel falls inbetween reference samples,
513
* the predicted value for the pixel is linearly interpolated from the reference samples. All reference samples are taken
514
* from the extended main reference.
515
*/
516
//NOTE: Bit-Limit - 25-bit source
517
518
void IntraPrediction::xPredIntraAng( PelBuf& pDst, const CPelBuf& pSrc, const ChannelType channelType, const ClpRng& clpRng)
519
1.31M
{
520
1.31M
  int width =int(pDst.width);
521
1.31M
  int height=int(pDst.height);
522
523
1.31M
  const bool bIsModeVer     = m_ipaParam.isModeVer;
524
1.31M
  const int  multiRefIdx    = m_ipaParam.multiRefIndex;
525
1.31M
  const int  intraPredAngle = m_ipaParam.intraPredAngle;
526
1.31M
  const int  absInvAngle    = m_ipaParam.absInvAngle;
527
528
1.31M
  Pel* refMain;
529
1.31M
  Pel* refSide;
530
531
1.31M
  Pel  refAbove[2 * MAX_CU_SIZE + 3 + 33 * MAX_REF_LINE_IDX];
532
1.31M
  Pel  refLeft [2 * MAX_CU_SIZE + 3 + 33 * MAX_REF_LINE_IDX];
533
534
  // Initialize the Main and Left reference array.
535
1.31M
  if (intraPredAngle < 0)
536
255k
  {
537
255k
    memcpy(&refAbove[height],pSrc.buf,(width + 2 + multiRefIdx)*sizeof(Pel));
538
8.49M
    for (int y = 0; y <= height + 1 + multiRefIdx; y++)
539
8.23M
    {
540
8.23M
      refLeft[y + width] = pSrc.at(y, 1);
541
8.23M
    }
542
255k
    refMain = bIsModeVer ? refAbove + height : refLeft + width;
543
255k
    refSide = bIsModeVer ? refLeft + width : refAbove + height;
544
545
    // Extend the Main reference to the left.
546
255k
    int sizeSide = bIsModeVer ? height : width;
547
7.91M
    for (int k = -sizeSide; k <= -1; k++)
548
7.66M
    {
549
7.66M
      refMain[k] = refSide[std::min((-k * absInvAngle + 256) >> 9, sizeSide)];
550
7.66M
    }
551
255k
  }
552
1.06M
  else
553
1.06M
  {
554
1.06M
    memcpy(&refAbove[0], pSrc.buf, ((m_topRefLength)+multiRefIdx + 1) * sizeof(Pel));
555
46.5M
    for (int y = 0; y <= m_leftRefLength + multiRefIdx; y++)
556
45.4M
    {
557
45.4M
      refLeft[y] = pSrc.at(y, 1);
558
45.4M
    }
559
560
1.06M
    refMain = bIsModeVer ? refAbove : refLeft;
561
1.06M
    refSide = bIsModeVer ? refLeft : refAbove;
562
563
    // Extend main reference to right using replication
564
1.06M
    const int log2Ratio = Log2(width) - Log2(height);
565
1.06M
    const int s         = std::max<int>(0, bIsModeVer ? log2Ratio : -log2Ratio);
566
1.06M
    const int maxIndex  = (multiRefIdx << s) + 2;
567
1.06M
    const int refLength = bIsModeVer ? m_topRefLength : m_leftRefLength;
568
1.06M
    const Pel val       = refMain[refLength + multiRefIdx];
569
3.42M
    for (int z = 1; z <= maxIndex; z++)
570
2.36M
    {
571
2.36M
      refMain[refLength + multiRefIdx + z] = val;
572
2.36M
    }
573
1.06M
  }
574
575
  // swap width/height if we are doing a horizontal mode:
576
1.31M
  if (!bIsModeVer)
577
639k
  {
578
639k
    std::swap(width, height);
579
639k
  }
580
1.31M
  Pel tempArray[MAX_CU_SIZE*MAX_CU_SIZE];
581
1.31M
  const int dstStride = bIsModeVer ? pDst.stride : MAX_CU_SIZE;
582
1.31M
  Pel* pDstBuf = bIsModeVer ? pDst.buf : tempArray;
583
584
  // compensate for line offset in reference line buffers
585
1.31M
  refMain += multiRefIdx;
586
1.31M
  refSide += multiRefIdx;
587
588
1.31M
  Pel* pDsty = pDstBuf;
589
590
1.31M
  if( intraPredAngle == 0 )  // pure vertical or pure horizontal
591
502k
  {
592
502k
    if (m_ipaParam.applyPDPC)
593
420k
    {
594
420k
      const int scale   = (Log2(width * height) - 2) >> 2;
595
420k
      IntraHorVerPDPC(pDsty,dstStride,refSide,width,height,scale,refMain,clpRng);
596
420k
    }
597
81.8k
    else
598
81.8k
    {
599
2.04M
      for( int y = 0; y < height; y++ )
600
1.96M
      {
601
1.96M
        memcpy(pDsty,&refMain[1],width*sizeof(Pel));
602
1.96M
        pDsty += dstStride;
603
1.96M
      }
604
81.8k
    }
605
502k
  }
606
813k
  else
607
813k
  {
608
813k
    if( !isIntegerSlope( abs( intraPredAngle ) ) )
609
657k
    {
610
657k
      int deltaPos = intraPredAngle * ( 1 + multiRefIdx );
611
657k
      if( isLuma( channelType ) )
612
595k
      {
613
595k
        if( width <= 2 )
614
0
        {
615
0
          for( int y = 0, deltaPos = intraPredAngle * ( 1 + multiRefIdx );
616
0
               y < height;
617
0
               y++, deltaPos += intraPredAngle, pDsty += dstStride )
618
0
          {
619
0
            const int deltaInt   = deltaPos >> 5;
620
0
            const int deltaFract = deltaPos & 31;
621
622
0
            if( !isIntegerSlope( abs( intraPredAngle ) ) )
623
0
            {
624
0
              const bool useCubicFilter = !m_ipaParam.interpolationFlag;
625
626
0
              const TFilterCoeff intraSmoothingFilter[4] = { TFilterCoeff( 16 - ( deltaFract >> 1 ) ),
627
0
                                                             TFilterCoeff( 32 - ( deltaFract >> 1 ) ),
628
0
                                                             TFilterCoeff( 16 + ( deltaFract >> 1 ) ),
629
0
                                                             TFilterCoeff(      ( deltaFract >> 1 ) ) };
630
0
              const TFilterCoeff* const f =
631
0
                ( useCubicFilter ) ? InterpolationFilter::getChromaFilterTable( deltaFract ) : intraSmoothingFilter;
632
633
0
              for( int x = 0; x < width; x++ )
634
0
              {
635
0
                Pel p[4];
636
637
0
                p[0] = refMain[deltaInt + x + 0];
638
0
                p[1] = refMain[deltaInt + x + 1];
639
0
                p[2] = refMain[deltaInt + x + 2];
640
0
                p[3] = refMain[deltaInt + x + 3];
641
642
0
                Pel val = ( f[0] * p[0] + f[1] * p[1] + f[2] * p[2] + f[3] * p[3] + 32 ) >> 6;
643
644
0
                pDsty[x] = ClipPel( val, clpRng );   // always clip even though not always needed
645
0
              }
646
0
            }
647
0
          }
648
0
        }
649
595k
        else
650
595k
        {
651
595k
          IntraPredAngleLuma(pDstBuf, dstStride, refMain, width, height, deltaPos, intraPredAngle, nullptr, !m_ipaParam.interpolationFlag, clpRng);
652
595k
        }
653
595k
      }
654
61.3k
      else
655
61.3k
      {
656
61.3k
        IntraPredAngleChroma(pDstBuf,dstStride,refMain,width,height,deltaPos,intraPredAngle);
657
61.3k
      }
658
657k
    }
659
156k
    else
660
156k
    {
661
2.35M
      for (int y = 0, deltaPos = intraPredAngle * (1 + multiRefIdx); y<height; y++, deltaPos += intraPredAngle, pDsty += dstStride)
662
2.19M
      {
663
2.19M
        const int deltaInt   = deltaPos >> 5;
664
        // Just copy the integer samples
665
2.19M
        memcpy(pDsty,refMain  + deltaInt + 1,width*sizeof(Pel));
666
2.19M
      }
667
156k
    }
668
669
813k
    if (m_ipaParam.applyPDPC)
670
459k
    {
671
459k
      pDsty = pDstBuf;
672
459k
      IntraAnglePDPC(pDsty,dstStride,refSide,width,height,m_ipaParam.angularScale,absInvAngle);
673
459k
    }
674
813k
  } // else
675
676
  // Flip the block if this is the horizontal mode
677
1.31M
  if( !bIsModeVer )
678
639k
  {
679
639k
    pDst.transposedFrom( CPelBuf( pDstBuf, dstStride, width, height) );
680
639k
  }
681
1.31M
}
682
683
void IntraPrediction::xPredIntraBDPCM(PelBuf& pDst, const CPelBuf& pSrc, const uint32_t dirMode, const ClpRng& clpRng)
684
88.4k
{
685
88.4k
  const int wdt = pDst.width;
686
88.4k
  const int hgt = pDst.height;
687
688
88.4k
  const int strideP = pDst.stride;
689
88.4k
  const int strideS = pSrc.stride;
690
691
88.4k
  CHECK(!(dirMode == 1 || dirMode == 2), "Incorrect BDPCM mode parameter.");
692
693
88.4k
  Pel* pred = &pDst.buf[0];
694
88.4k
  if (dirMode == 1)
695
3.84k
  {
696
3.84k
    Pel  val;
697
53.4k
    for (int y = 0; y < hgt; y++)
698
49.5k
    {
699
49.5k
      val = pSrc.buf[(y + 1) + strideS];
700
717k
      for (int x = 0; x < wdt; x++)
701
668k
      {
702
668k
        pred[x] = val;
703
668k
      }
704
49.5k
      pred += strideP;
705
49.5k
    }
706
3.84k
  }
707
84.5k
  else
708
84.5k
  {
709
871k
    for (int y = 0; y < hgt; y++)
710
787k
    {
711
8.84M
      for (int x = 0; x < wdt; x++)
712
8.05M
      {
713
8.05M
        pred[x] = pSrc.buf[x + 1];
714
8.05M
      }
715
787k
      pred += strideP;
716
787k
    }
717
84.5k
  }
718
88.4k
}
719
720
inline bool isAboveLeftAvailable  ( const CodingUnit &cu, const ChannelType& chType, const Position& posLT );
721
inline int  isAboveAvailable      ( const CodingUnit &cu, const ChannelType& chType, const Position& posLT, const uint32_t numUnits, const uint32_t unitWidth, bool *validFlags );
722
inline int  isLeftAvailable       ( const CodingUnit &cu, const ChannelType& chType, const Position& posLT, const uint32_t numUnits, const uint32_t unitWidth, bool *validFlags );
723
inline int  isAboveRightAvailable ( const CodingUnit &cu, const ChannelType& chType, const Position& posRT, const uint32_t numUnits, const uint32_t unitHeight, bool *validFlags );
724
inline int  isBelowLeftAvailable  ( const CodingUnit &cu, const ChannelType& chType, const Position& posLB, const uint32_t numUnits, const uint32_t unitHeight, bool *validFlags );
725
726
void IntraPrediction::initIntraPatternChType(const CodingUnit &cu, const CompArea& area, const bool forceRefFilterFlag)
727
853k
{
728
853k
  const CodingStructure& cs   = *cu.cs;
729
730
853k
  if (!forceRefFilterFlag)
731
793k
  {
732
793k
    initPredIntraParams(cu, area, *cs.sps);
733
793k
  }
734
735
853k
  Pel *refBufUnfiltered = m_refBuffer[area.compID][PRED_BUF_UNFILTERED];
736
853k
  Pel *refBufFiltered   = m_refBuffer[area.compID][PRED_BUF_FILTERED];
737
738
853k
  setReferenceArrayLengths(area);
739
740
  // ----- Step 1: unfiltered reference samples -----
741
853k
  xFillReferenceSamples( cs.picture->getRecoBuf( area ), refBufUnfiltered, area, cu );
742
  // ----- Step 2: filtered reference samples -----
743
853k
  if( m_ipaParam.refFilterFlag || forceRefFilterFlag )
744
64.2k
  {
745
64.2k
    xFilterReferenceSamples( refBufUnfiltered, refBufFiltered, area, *cs.sps, cu.multiRefIdx );
746
64.2k
  }
747
853k
}
748
749
void IntraPrediction::reset()
750
88.1k
{
751
88.1k
  m_lastCh = MAX_NUM_CH;
752
88.1k
  m_lastArea = Area(0,0,0,0);
753
88.1k
}
754
755
void IntraPrediction::xFillReferenceSamples( const CPelBuf& recoBuf, Pel* refBufUnfiltered, const CompArea& area, const CodingUnit &cu )
756
869k
{
757
869k
  const ChannelType      chType = toChannelType( area.compID );
758
869k
  const CodingStructure &cs     = *cu.cs;
759
869k
  const SPS             &sps    = *cs.sps;
760
869k
  const PreCalcValues   &pcv    = *cs.pcv;
761
762
869k
  const int multiRefIdx         = (area.compID == COMP_Y) ? cu.multiRefIdx : 0;
763
764
869k
  const int  tuWidth            = area.width;
765
869k
  const int  tuHeight           = area.height;
766
869k
  const int  predSize           = m_topRefLength;
767
869k
  const int  predHSize          = m_leftRefLength;
768
869k
  const int predStride = predSize + 1 + multiRefIdx;
769
869k
  m_refBufferStride[area.compID] = predStride;
770
771
869k
  const int  unitWidth          = tuWidth  <= 2 && cu.ispMode && isLuma(area.compID) ? tuWidth  : pcv.minCUSize >> getComponentScaleX(area.compID, sps.chromaFormatIdc);
772
869k
  const int  unitHeight         = tuHeight <= 2 && cu.ispMode && isLuma(area.compID) ? tuHeight : pcv.minCUSize >> getComponentScaleY(area.compID, sps.chromaFormatIdc);
773
774
869k
  const int  totalAboveUnits    = (predSize + (unitWidth - 1)) / unitWidth;
775
869k
  const int  totalLeftUnits     = (predHSize + (unitHeight - 1)) / unitHeight;
776
869k
  const int  totalUnits         = totalAboveUnits + totalLeftUnits + 1; //+1 for top-left
777
778
869k
  if( m_lastArea != area || m_lastCh != chType )
779
88.1k
  {
780
88.1k
    m_lastCh = chType;
781
88.1k
    m_lastArea = area;
782
88.1k
    const int  numAboveUnits      = std::max<int>( tuWidth / unitWidth, 1 );
783
88.1k
    const int  numLeftUnits       = std::max<int>( tuHeight / unitHeight, 1 );
784
88.1k
    const int  numAboveRightUnits = totalAboveUnits - numAboveUnits;
785
88.1k
    const int  numLeftBelowUnits  = totalLeftUnits - numLeftUnits;
786
787
88.1k
    CHECK( numAboveUnits <= 0 || numLeftUnits <= 0 || numAboveRightUnits <= 0 || numLeftBelowUnits <= 0, "Size not supported" );
788
789
    // ----- Step 1: analyze neighborhood -----
790
88.1k
    const Position posLT          = area;
791
88.1k
    const Position posRT          = area.topRight();
792
88.1k
    const Position posLB          = area.bottomLeft();
793
794
88.1k
    m_numIntraNeighbor = 0;
795
796
88.1k
    memset( m_neighborFlags, 0, totalUnits );
797
798
88.1k
    m_neighborFlags[totalLeftUnits] = isAboveLeftAvailable( cu, chType, posLT );
799
88.1k
    m_numIntraNeighbor += m_neighborFlags[totalLeftUnits] ? 1 : 0;
800
88.1k
    m_numIntraNeighbor += isAboveAvailable     ( cu, chType, posLT, numAboveUnits,      unitWidth,  (m_neighborFlags + totalLeftUnits + 1) );
801
88.1k
    m_numIntraNeighbor += isAboveRightAvailable( cu, chType, posRT, numAboveRightUnits, unitWidth,  (m_neighborFlags + totalLeftUnits + 1 + numAboveUnits) );
802
88.1k
    m_numIntraNeighbor += isLeftAvailable      ( cu, chType, posLT, numLeftUnits,       unitHeight, (m_neighborFlags + totalLeftUnits - 1) );
803
88.1k
    m_numIntraNeighbor += isBelowLeftAvailable ( cu, chType, posLB, numLeftBelowUnits,  unitHeight, (m_neighborFlags + totalLeftUnits - 1 - numLeftUnits) );
804
88.1k
  }
805
  // ----- Step 2: fill reference samples (depending on neighborhood) -----
806
807
869k
  const Pel*  srcBuf    = recoBuf.buf;
808
869k
  const int   srcStride = recoBuf.stride;
809
869k
        Pel*  ptrDst    = refBufUnfiltered;
810
869k
  const Pel*  ptrSrc;
811
869k
  const Pel   valueDC   = 1 << (sps.bitDepths[ chType ] - 1);
812
813
814
869k
  if( m_numIntraNeighbor == 0 )
815
279k
  {
816
    // Fill border with DC value
817
8.46M
    for (int j = 0; j <= predSize + multiRefIdx; j++) { ptrDst[j] = valueDC; }
818
8.23M
    for (int i = 0; i <= predHSize + multiRefIdx; i++) { ptrDst[i+predStride] = valueDC; }
819
279k
  }
820
589k
  else if( m_numIntraNeighbor == totalUnits )
821
1.11k
  {
822
    // Fill top-left border and top and top right with rec. samples
823
1.11k
    ptrSrc = srcBuf - (1 + multiRefIdx) * srcStride - (1 + multiRefIdx);
824
90.3k
    for (int j = 0; j <= predSize + multiRefIdx; j++) { ptrDst[j] = ptrSrc[j]; }
825
90.3k
    for (int i = 0; i <= predHSize + multiRefIdx; i++)
826
89.1k
    {
827
89.1k
      ptrDst[i + predStride] = ptrSrc[i * srcStride];
828
89.1k
    }
829
1.11k
  }
830
588k
  else // reference samples are partially available
831
588k
  {
832
    // Fill top-left sample(s) if available
833
588k
    ptrSrc = srcBuf - (1 + multiRefIdx) * srcStride - (1 + multiRefIdx);
834
588k
    ptrDst = refBufUnfiltered;
835
588k
    if (m_neighborFlags[totalLeftUnits])
836
199k
    {
837
199k
      ptrDst[0] = ptrSrc[0];
838
199k
      ptrDst[predStride] = ptrSrc[0];
839
239k
      for (int i = 1; i <= multiRefIdx; i++)
840
40.2k
      {
841
40.2k
        ptrDst[i] = ptrSrc[i];
842
40.2k
        ptrDst[i + predStride] = ptrSrc[i * srcStride];
843
40.2k
      }
844
199k
    }
845
846
    // Fill left & below-left samples if available (downwards)
847
588k
    ptrSrc += (1 + multiRefIdx) * srcStride;
848
588k
    ptrDst += (1 + multiRefIdx) + predStride;
849
8.63M
    for (int unitIdx = totalLeftUnits - 1; unitIdx > 0; unitIdx--)
850
8.04M
    {
851
8.04M
      if (m_neighborFlags[unitIdx])
852
3.50M
      {
853
11.9M
        for (int i = 0; i < unitHeight; i++)
854
8.49M
        {
855
8.49M
          ptrDst[i] = ptrSrc[i*srcStride];
856
8.49M
        }
857
3.50M
      }
858
8.04M
      ptrSrc += unitHeight * srcStride;
859
8.04M
      ptrDst += unitHeight;
860
8.04M
    }
861
    // Fill last below-left sample(s)
862
588k
    if (m_neighborFlags[0])
863
17.9k
    {
864
17.9k
      int lastSample = (predHSize % unitHeight == 0) ? unitHeight : predHSize % unitHeight;
865
72.1k
      for (int i = 0; i < lastSample; i++)
866
54.1k
      {
867
54.1k
        ptrDst[i] = ptrSrc[i*srcStride];
868
54.1k
      }
869
17.9k
    }
870
871
    // Fill above & above-right samples if available (left-to-right)
872
588k
    ptrSrc = srcBuf - srcStride * (1 + multiRefIdx);
873
588k
    ptrDst = refBufUnfiltered + 1 + multiRefIdx;
874
8.66M
    for (int unitIdx = totalLeftUnits + 1; unitIdx < totalUnits - 1; unitIdx++)
875
8.08M
    {
876
8.08M
      if (m_neighborFlags[unitIdx])
877
4.39M
      {
878
4.39M
        memcpy(ptrDst,ptrSrc,unitWidth*sizeof(Pel));
879
4.39M
      }
880
8.08M
      ptrSrc += unitWidth;
881
8.08M
      ptrDst += unitWidth;
882
8.08M
    }
883
    // Fill last above-right sample(s)
884
588k
    if (m_neighborFlags[totalUnits - 1])
885
68.9k
    {
886
68.9k
      int lastSample = (predSize % unitWidth == 0) ? unitWidth : predSize % unitWidth;
887
68.9k
      memcpy(ptrDst,ptrSrc,lastSample*sizeof(Pel));
888
68.9k
    }
889
890
    // pad from first available down to the last below-left
891
588k
    ptrDst = refBufUnfiltered;
892
588k
    int lastAvailUnit = 0;
893
588k
    if (!m_neighborFlags[0])
894
570k
    {
895
570k
      int firstAvailUnit = 1;
896
5.32M
      while (firstAvailUnit < totalUnits && !m_neighborFlags[firstAvailUnit])
897
4.75M
      {
898
4.75M
        firstAvailUnit++;
899
4.75M
      }
900
901
      // first available sample
902
570k
      int firstAvailRow = -1;
903
570k
      int firstAvailCol = 0;
904
570k
      if (firstAvailUnit < totalLeftUnits)
905
357k
      {
906
357k
        firstAvailRow = (totalLeftUnits - firstAvailUnit) * unitHeight + multiRefIdx;
907
357k
      }
908
212k
      else if (firstAvailUnit == totalLeftUnits)
909
0
      {
910
0
        firstAvailRow = multiRefIdx;
911
0
      }
912
212k
      else
913
212k
      {
914
212k
        firstAvailCol = (firstAvailUnit - totalLeftUnits - 1) * unitWidth + 1 + multiRefIdx;
915
212k
      }
916
570k
      const Pel firstAvailSample = ptrDst[firstAvailRow < 0 ? firstAvailCol : firstAvailRow + predStride];
917
918
      // last sample below-left (n.a.)
919
570k
      int lastRow = predHSize + multiRefIdx;
920
921
      // fill left column
922
13.0M
      for (int i = lastRow; i > firstAvailRow; i--)
923
12.4M
      {
924
12.4M
        ptrDst[i + predStride] = firstAvailSample;
925
12.4M
      }
926
      // fill top row
927
570k
      if (firstAvailCol > 0)
928
212k
      {
929
436k
        for (int j = 0; j < firstAvailCol; j++)
930
223k
        {
931
223k
          ptrDst[j] = firstAvailSample;
932
223k
        }
933
212k
      }
934
570k
      lastAvailUnit = firstAvailUnit;
935
570k
    }
936
937
    // pad all other reference samples.
938
588k
    int currUnit = lastAvailUnit + 1;
939
12.5M
    while (currUnit < totalUnits)
940
11.9M
    {
941
11.9M
      if (!m_neighborFlags[currUnit]) // samples not available
942
4.37M
      {
943
        // last available sample
944
4.37M
        int lastAvailRow = -1;
945
4.37M
        int lastAvailCol = 0;
946
4.37M
        if (lastAvailUnit < totalLeftUnits)
947
175k
        {
948
175k
          lastAvailRow = (totalLeftUnits - lastAvailUnit - 1) * unitHeight + multiRefIdx + 1;
949
175k
        }
950
4.20M
        else if (lastAvailUnit == totalLeftUnits)
951
175k
        {
952
175k
          lastAvailCol = multiRefIdx;
953
175k
        }
954
4.02M
        else
955
4.02M
        {
956
4.02M
          lastAvailCol = (lastAvailUnit - totalLeftUnits) * unitWidth + multiRefIdx;
957
4.02M
        }
958
4.37M
        const Pel lastAvailSample = ptrDst[lastAvailRow < 0 ? lastAvailCol : lastAvailRow + predStride];
959
960
        // fill current unit with last available sample
961
4.37M
        if (currUnit < totalLeftUnits)
962
0
        {
963
0
          for (int i = lastAvailRow - 1; i >= lastAvailRow - unitHeight; i--)
964
0
          {
965
0
            ptrDst[i + predStride] = lastAvailSample;
966
0
          }
967
0
        }
968
4.37M
        else if (currUnit == totalLeftUnits)
969
175k
        {
970
351k
          for (int i = 0; i < multiRefIdx + 1; i++)
971
175k
          {
972
175k
            ptrDst[i + predStride] = lastAvailSample;
973
175k
          }
974
351k
          for (int j = 0; j < multiRefIdx + 1; j++)
975
175k
          {
976
175k
            ptrDst[j] = lastAvailSample;
977
175k
          }
978
175k
        }
979
4.20M
        else
980
4.20M
        {
981
4.20M
          int numSamplesInUnit = (currUnit == totalUnits - 1) ? ((predSize % unitWidth == 0) ? unitWidth : predSize % unitWidth) : unitWidth;
982
13.7M
          for (int j = lastAvailCol + 1; j <= lastAvailCol + numSamplesInUnit; j++)
983
9.59M
          {
984
9.59M
            ptrDst[j] = lastAvailSample;
985
9.59M
          }
986
4.20M
        }
987
4.37M
      }
988
11.9M
      lastAvailUnit = currUnit;
989
11.9M
      currUnit++;
990
11.9M
    }
991
588k
  }
992
869k
}
993
994
void IntraPrediction::xFilterReferenceSamples( const Pel* refBufUnfiltered, Pel* refBufFiltered, const CompArea& area, const SPS &sps
995
  , int multiRefIdx
996
  , int stride
997
)
998
64.2k
{
999
64.2k
  if (area.compID != COMP_Y)
1000
0
  {
1001
0
    multiRefIdx = 0;
1002
0
  }
1003
64.2k
  const int predSize = m_topRefLength + multiRefIdx;
1004
64.2k
  const int predHSize = m_leftRefLength + multiRefIdx;
1005
64.2k
  const int predStride = stride == 0 ? predSize + 1 : stride;
1006
1007
1008
64.2k
  const Pel topLeft =
1009
64.2k
    (refBufUnfiltered[0] + refBufUnfiltered[1] + refBufUnfiltered[predStride] + refBufUnfiltered[predStride + 1] + 2)
1010
64.2k
    >> 2;
1011
1012
64.2k
  refBufFiltered[0] = topLeft;
1013
1014
3.67M
  for (int i = 1; i < predSize; i++)
1015
3.61M
  {
1016
3.61M
    refBufFiltered[i] = (refBufUnfiltered[i - 1] + 2 * refBufUnfiltered[i] + refBufUnfiltered[i + 1] + 2) >> 2;
1017
3.61M
  }
1018
64.2k
  refBufFiltered[predSize] = refBufUnfiltered[predSize];
1019
1020
64.2k
  refBufFiltered += predStride;
1021
64.2k
  refBufUnfiltered += predStride;
1022
1023
64.2k
  refBufFiltered[0] = topLeft;
1024
1025
3.67M
  for (int i = 1; i < predHSize; i++)
1026
3.61M
  {
1027
3.61M
    refBufFiltered[i] = (refBufUnfiltered[i - 1] + 2 * refBufUnfiltered[i] + refBufUnfiltered[i + 1] + 2) >> 2;
1028
3.61M
  }
1029
64.2k
  refBufFiltered[predHSize] = refBufUnfiltered[predHSize];
1030
64.2k
}
1031
1032
bool isAboveLeftAvailable(const CodingUnit &cu, const ChannelType& chType, const Position& posLT)
1033
88.1k
{
1034
88.1k
  const CodingStructure& cs = *cu.cs;
1035
88.1k
  const Position refPos = posLT.offset(-1, -1);
1036
1037
88.1k
  return (cs.getCURestricted(refPos, cu, chType) != NULL);
1038
88.1k
}
1039
1040
int isAboveAvailable(const CodingUnit &cu, const ChannelType& chType, const Position& posLT, const uint32_t numUnits, const uint32_t unitWidth, bool *bValidFlags)
1041
389k
{
1042
389k
  const CodingStructure& cs = *cu.cs;
1043
1044
389k
  bool *    validFlags  = bValidFlags;
1045
389k
  int       numIntra    = 0;
1046
389k
  const int maxDx       = numUnits * unitWidth;
1047
389k
  unsigned  checkPosX   = 0;
1048
389k
  bool      valid       = false;
1049
1050
1.64M
  for (int dx = 0; dx < maxDx; dx += unitWidth)
1051
1.49M
  {
1052
1.49M
    if( dx >= checkPosX )
1053
391k
    {
1054
391k
      const Position refPos = posLT.offset(dx, -1);
1055
1056
391k
      const CodingUnit* cuN = cs.getCURestricted(refPos, cu, chType);
1057
391k
      valid = (cuN != NULL);
1058
391k
      if( cuN ) checkPosX = chType == CH_C ? (cuN->Cb().x + cuN->Cb().width - posLT.x) : (cuN->Y().x + cuN->Y().width - posLT.x);
1059
231k
      else break;
1060
391k
    }
1061
1062
1.25M
    numIntra += valid ? 1 : 0;
1063
1.25M
    *validFlags = valid;
1064
1065
1.25M
    validFlags++;
1066
1.25M
  }
1067
1068
389k
  return numIntra;
1069
389k
}
1070
1071
int isLeftAvailable(const CodingUnit &cu, const ChannelType& chType, const Position& posLT, const uint32_t numUnits, const uint32_t unitHeight, bool *bValidFlags)
1072
389k
{
1073
389k
  const CodingStructure& cs = *cu.cs;
1074
1075
389k
  bool *    validFlags = bValidFlags;
1076
389k
  int       numIntra   = 0;
1077
389k
  const int maxDy      = numUnits * unitHeight;
1078
389k
  unsigned checkPosY   = 0;
1079
389k
  bool     valid       = false;
1080
1081
1.54M
  for (int dy = 0; dy < maxDy; dy += unitHeight)
1082
1.39M
  {
1083
1.39M
    if( dy >= checkPosY )
1084
392k
    {
1085
392k
      const Position refPos = posLT.offset(-1, dy);
1086
1087
392k
      const CodingUnit* cuN = cs.getCURestricted(refPos, cu, chType);
1088
392k
      valid = (cuN != NULL);
1089
392k
      if( cuN ) checkPosY = chType == CH_C ? (cuN->Cb().y + cuN->Cb().height - posLT.y) : (cuN->Y().y + cuN->Y().height - posLT.y);
1090
244k
      else break;
1091
392k
    }
1092
1093
1.15M
    numIntra += valid ? 1 : 0;
1094
1.15M
    *validFlags = valid;
1095
1096
1.15M
    validFlags--;
1097
1.15M
  }
1098
1099
389k
  return numIntra;
1100
389k
}
1101
1102
int isAboveRightAvailable(const CodingUnit &cu, const ChannelType& chType, const Position& posRT, const uint32_t numUnits, const uint32_t unitWidth, bool *bValidFlags )
1103
198k
{
1104
198k
  const CodingStructure& cs = *cu.cs;
1105
1106
198k
  bool *    validFlags = bValidFlags;
1107
198k
  int       numIntra   = 0;
1108
198k
  const int maxDx      = numUnits * unitWidth;
1109
198k
  unsigned  checkPosX   = 0;
1110
198k
  bool      valid       = false;
1111
1112
634k
  for (int dx = 0; dx < maxDx; dx += unitWidth)
1113
606k
  {
1114
606k
    if( dx >= checkPosX )
1115
231k
    {
1116
231k
      const Position refPos = posRT.offset(unitWidth + dx, -1);
1117
1118
231k
      const CodingUnit* cuN = cs.getCURestricted(refPos, cu, chType);
1119
231k
      valid = (cuN != NULL);
1120
231k
      if(cuN) checkPosX = chType == CH_C ? (cuN->Cb().x + cuN->Cb().width - (posRT.x + unitWidth)) : (cuN->Y().x + cuN->Y().width - (posRT.x + unitWidth));
1121
170k
      else break;
1122
231k
    }
1123
1124
436k
    numIntra += valid ? 1 : 0;
1125
436k
    *validFlags = valid;
1126
1127
436k
    validFlags++;
1128
436k
  }
1129
1130
198k
  return numIntra;
1131
198k
}
1132
1133
int isBelowLeftAvailable(const CodingUnit &cu, const ChannelType& chType, const Position& posLB, const uint32_t numUnits, const uint32_t unitHeight, bool *bValidFlags )
1134
187k
{
1135
187k
  const CodingStructure& cs = *cu.cs;
1136
1137
187k
  bool *    validFlags = bValidFlags;
1138
187k
  int       numIntra   = 0;
1139
187k
  const int maxDy      = numUnits * unitHeight;
1140
187k
  unsigned  checkPosY   = 0;
1141
187k
  bool      valid       = false;
1142
1143
332k
  for (int dy = 0; dy < maxDy; dy += unitHeight)
1144
326k
  {
1145
326k
    if( dy >= checkPosY )
1146
207k
    {
1147
207k
      const Position refPos = posLB.offset(-1, unitHeight + dy);
1148
1149
207k
      const CodingUnit* cuN = cs.getCURestricted(refPos, cu, chType);
1150
207k
      valid = (cuN != NULL);
1151
207k
      if( cuN ) checkPosY = chType == CH_C ? (cuN->Cb().y + cuN->Cb().height - (posLB.y + unitHeight)) : (cuN->Y().y + cuN->Y().height - (posLB.y + unitHeight));
1152
181k
      else break;
1153
207k
    }
1154
1155
145k
    numIntra += valid ? 1 : 0;
1156
145k
    *validFlags = valid;
1157
1158
145k
    validFlags--;
1159
145k
  }
1160
1161
187k
  return numIntra;
1162
187k
}
1163
1164
// LumaRecPixels
1165
void IntraPrediction::loadLMLumaRecPels(const CodingUnit& cu, const CompArea& chromaArea )
1166
83.0k
{
1167
83.0k
  int iDstStride = 2 * MAX_TB_SIZEY + 1;
1168
83.0k
  Pel* pDst0 = m_pMdlmTemp + iDstStride + 1;
1169
  //assert 420 chroma subsampling
1170
83.0k
  CompArea lumaArea = CompArea( COMP_Y, cu.chromaFormat, chromaArea.lumaPos(), recalcSize( cu.chromaFormat, CH_C, CH_L, chromaArea.size() ) );//needed for correct pos/size (4x4 Tus)
1171
1172
83.0k
  CHECK(lumaArea.width == chromaArea.width && CHROMA_444 != cu.chromaFormat, "");
1173
83.0k
  CHECK(lumaArea.height == chromaArea.height && CHROMA_444 != cu.chromaFormat && CHROMA_422 != cu.chromaFormat, "");
1174
1175
83.0k
  const SizeType uiCWidth = chromaArea.width;
1176
83.0k
  const SizeType uiCHeight = chromaArea.height;
1177
1178
83.0k
  const CPelBuf Src = cu.cs->picture->getRecoBuf( lumaArea );
1179
83.0k
  Pel const* pRecSrc0   = Src.bufAt( 0, 0 );
1180
83.0k
  int iRecStride        = Src.stride;
1181
83.0k
  int logSubWidthC  = getChannelTypeScaleX(CH_C, cu.chromaFormat);
1182
83.0k
  int logSubHeightC = getChannelTypeScaleY(CH_C, cu.chromaFormat);
1183
1184
83.0k
  int iRecStride2       = iRecStride << logSubHeightC;
1185
1186
83.0k
  const CompArea& area = isChroma( cu.chType ) ? chromaArea : lumaArea;
1187
1188
83.0k
  const uint32_t uiTuWidth  = area.width;
1189
83.0k
  const uint32_t uiTuHeight = area.height;
1190
1191
83.0k
  const int  unitWidthLog2  = MIN_CU_LOG2 - getComponentScaleX( area.compID, area.chromaFormat );
1192
83.0k
  const int  unitHeightLog2 = MIN_CU_LOG2 - getComponentScaleY( area.compID, area.chromaFormat );
1193
83.0k
  const int  unitWidth  = 1<<unitWidthLog2;
1194
83.0k
  const int  unitHeight = 1<<unitHeightLog2;
1195
1196
83.0k
  const int  iTUWidthInUnits  = uiTuWidth >> unitWidthLog2;
1197
83.0k
  const int  iTUHeightInUnits = uiTuHeight >> unitHeightLog2;
1198
83.0k
  const int  iAboveUnits      = iTUWidthInUnits;
1199
83.0k
  const int  iLeftUnits       = iTUHeightInUnits;
1200
1201
83.0k
  const int  chromaUnitWidthLog2  = MIN_CU_LOG2 - logSubWidthC;
1202
83.0k
  const int  chromaUnitHeightLog2 = MIN_CU_LOG2 - logSubHeightC;
1203
83.0k
  const int  chromaUnitWidth = 1<<chromaUnitWidthLog2;
1204
83.0k
  const int  chromaUnitHeight = 1<<chromaUnitHeightLog2;
1205
83.0k
  const int  topTemplateSampNum = 2 * uiCWidth; // for MDLM, the number of template samples is 2W or 2H.
1206
83.0k
  const int  leftTemplateSampNum = 2 * uiCHeight;
1207
83.0k
  const int  totalAboveUnits = (topTemplateSampNum + (chromaUnitWidth - 1)) >> chromaUnitWidthLog2;
1208
83.0k
  const int  totalLeftUnits = (leftTemplateSampNum + (chromaUnitHeight - 1)) >> chromaUnitHeightLog2;
1209
83.0k
  const int  totalUnits = totalLeftUnits + totalAboveUnits + 1;
1210
83.0k
  const int  aboveRightUnits = totalAboveUnits - iAboveUnits;
1211
83.0k
  const int  leftBelowUnits = totalLeftUnits - iLeftUnits;
1212
1213
83.0k
  int avaiAboveRightUnits = 0;
1214
83.0k
  int avaiLeftBelowUnits = 0;
1215
83.0k
  bool  bNeighborFlags[4 * MAX_NUM_PART_IDXS_IN_CTU_WIDTH + 1];
1216
83.0k
  memset(bNeighborFlags, 0, totalUnits);
1217
83.0k
  bool aboveIsAvailable, leftIsAvailable;
1218
83.0k
  const ChannelType areaCh = toChannelType( area.compID );
1219
1220
83.0k
  int availlableUnit = isLeftAvailable(cu, areaCh, area.pos(), iLeftUnits, unitHeight, (bNeighborFlags + iLeftUnits + leftBelowUnits - 1));
1221
1222
83.0k
  leftIsAvailable = availlableUnit == iTUHeightInUnits;
1223
1224
83.0k
  availlableUnit = isAboveAvailable(cu, areaCh, area.pos(), iAboveUnits, unitWidth, (bNeighborFlags + iLeftUnits + leftBelowUnits + 1));
1225
1226
83.0k
  aboveIsAvailable = availlableUnit == iTUWidthInUnits;
1227
1228
83.0k
  if (leftIsAvailable)   // if left is not available, then the below left is not available
1229
27.4k
  {
1230
27.4k
    avaiLeftBelowUnits = isBelowLeftAvailable(cu, areaCh, area.bottomLeftComp(area.compID), leftBelowUnits, unitHeight, (bNeighborFlags + leftBelowUnits - 1));
1231
27.4k
  }
1232
1233
83.0k
  if (aboveIsAvailable)   // if above is not available, then  the above right is not available.
1234
29.9k
  {
1235
29.9k
    avaiAboveRightUnits = isAboveRightAvailable(cu, areaCh, area.topRightComp(area.compID), aboveRightUnits, unitWidth, (bNeighborFlags + iLeftUnits + leftBelowUnits + iAboveUnits + 1));
1236
29.9k
  }
1237
1238
83.0k
  Pel*       pDst  = nullptr;
1239
83.0k
  Pel const* piSrc = nullptr;
1240
1241
83.0k
  bool isFirstRowOfCtu = (lumaArea.y & ((cu.cs->sps)->CTUSize - 1)) == 0;
1242
1243
83.0k
  if (aboveIsAvailable)
1244
29.9k
  {
1245
29.9k
    pDst  = pDst0    - iDstStride;
1246
29.9k
    int addedAboveRight = 0;
1247
29.9k
    if ((cu.intraDir[1] == MDLM_L_IDX) || (cu.intraDir[1] == MDLM_T_IDX))
1248
26.1k
    {
1249
26.1k
      addedAboveRight = avaiAboveRightUnits*chromaUnitWidth;
1250
26.1k
    }
1251
664k
    for (int i = 0; i < uiCWidth + addedAboveRight; i++)
1252
634k
    {
1253
634k
      const bool leftPadding = i == 0 && !leftIsAvailable;
1254
634k
      if (cu.chromaFormat == CHROMA_444)
1255
0
      {
1256
0
        piSrc = pRecSrc0 - iRecStride;
1257
0
        pDst[i] = piSrc[i];
1258
0
      }
1259
634k
      else if (isFirstRowOfCtu)
1260
138k
      {
1261
138k
        piSrc   = pRecSrc0 - iRecStride;
1262
138k
        pDst[i] = (piSrc[2 * i] * 2 + piSrc[2 * i - (leftPadding ? 0 : 1)] + piSrc[2 * i + 1] + 2) >> 2;
1263
138k
      }
1264
495k
      else if (cu.chromaFormat == CHROMA_422)
1265
0
      {
1266
0
        piSrc = pRecSrc0 - iRecStride2;
1267
1268
0
        int s = 2;
1269
0
        s += piSrc[2 * i] * 2;
1270
0
        s += piSrc[2 * i - (leftPadding ? 0 : 1)];
1271
0
        s += piSrc[2 * i + 1];
1272
0
        pDst[i] = s >> 2;
1273
0
      }
1274
495k
      else if (cu.cs->sps->verCollocatedChroma )
1275
0
      {
1276
0
        piSrc = pRecSrc0 - iRecStride2;
1277
1278
0
        int s = 4;
1279
0
        s += piSrc[2 * i - iRecStride];
1280
0
        s += piSrc[2 * i] * 4;
1281
0
        s += piSrc[2 * i - (leftPadding ? 0 : 1)];
1282
0
        s += piSrc[2 * i + 1];
1283
0
        s += piSrc[2 * i + iRecStride];
1284
0
        pDst[i] = s >> 3;
1285
0
      }
1286
495k
      else
1287
495k
      {
1288
495k
        piSrc = pRecSrc0 - iRecStride2;
1289
495k
        int s = 4;
1290
495k
        s += piSrc[2 * i] * 2;
1291
495k
        s += piSrc[2 * i + 1];
1292
495k
        s += piSrc[2 * i - (leftPadding ? 0 : 1)];
1293
495k
        s += piSrc[2 * i + iRecStride] * 2;
1294
495k
        s += piSrc[2 * i + 1 + iRecStride];
1295
495k
        s += piSrc[2 * i + iRecStride - (leftPadding ? 0 : 1)];
1296
495k
        pDst[i] = s >> 3;
1297
495k
      }
1298
634k
    }
1299
29.9k
  }
1300
1301
83.0k
  if (leftIsAvailable)
1302
27.4k
  {
1303
27.4k
    pDst  = pDst0    - 1;
1304
27.4k
    piSrc = pRecSrc0 - 1 - logSubWidthC;
1305
1306
27.4k
    int addedLeftBelow = 0;
1307
27.4k
    if ((cu.intraDir[1] == MDLM_L_IDX) || (cu.intraDir[1] == MDLM_T_IDX))
1308
24.3k
    {
1309
24.3k
      addedLeftBelow = avaiLeftBelowUnits*chromaUnitHeight;
1310
24.3k
    }
1311
1312
551k
    for (int j = 0; j < uiCHeight + addedLeftBelow; j++)
1313
523k
    {
1314
523k
      if (cu.chromaFormat == CHROMA_444)
1315
0
      {
1316
0
        pDst[0] = piSrc[0];
1317
0
      }
1318
523k
      else if (cu.chromaFormat == CHROMA_422)
1319
0
      {
1320
0
        int s = 2;
1321
0
        s += piSrc[0] * 2;
1322
0
        s += piSrc[-1];
1323
0
        s += piSrc[1];
1324
0
        pDst[0] = s >> 2;
1325
0
      }
1326
523k
      else if (cu.cs->sps->verCollocatedChroma)
1327
0
      {
1328
0
        const bool abovePadding = j == 0 && !aboveIsAvailable;
1329
1330
0
        int s = 4;
1331
0
        s += piSrc[-(abovePadding ? 0 : iRecStride)];
1332
0
        s += piSrc[0] * 4;
1333
0
        s += piSrc[-1];
1334
0
        s += piSrc[1];
1335
0
        s += piSrc[iRecStride];
1336
0
        pDst[0] = s >> 3;
1337
0
      }
1338
523k
      else
1339
523k
      {
1340
523k
        int s = 4;
1341
523k
        s += piSrc[0] * 2;
1342
523k
        s += piSrc[1];
1343
523k
        s += piSrc[-1];
1344
523k
        s += piSrc[iRecStride] * 2;
1345
523k
        s += piSrc[iRecStride + 1];
1346
523k
        s += piSrc[iRecStride - 1];
1347
523k
        pDst[0] = s >> 3;
1348
523k
      }
1349
1350
523k
      piSrc += iRecStride2;
1351
523k
      pDst  += iDstStride;
1352
523k
    }
1353
27.4k
  }
1354
1355
  // inner part from reconstructed picture buffer
1356
1.15M
  for( int j = 0; j < uiCHeight; j++ )
1357
1.06M
  {
1358
17.6M
    for( int i = 0; i < uiCWidth; i++ )
1359
16.5M
    {
1360
16.5M
      if (cu.chromaFormat == CHROMA_444)
1361
0
      {
1362
0
        pDst0[i] = pRecSrc0[i];
1363
0
      }
1364
16.5M
      else if (cu.chromaFormat == CHROMA_422)
1365
0
      {
1366
0
        const bool leftPadding  = i == 0 && !leftIsAvailable;
1367
1368
0
        int s = 2;
1369
0
        s += pRecSrc0[2 * i] * 2;
1370
0
        s += pRecSrc0[2 * i - (leftPadding ? 0 : 1)];
1371
0
        s += pRecSrc0[2 * i + 1];
1372
0
        pDst0[i] = s >> 2;
1373
0
      }
1374
16.5M
      else if (cu.cs->sps->verCollocatedChroma)
1375
0
      {
1376
0
        const bool leftPadding  = i == 0 && !leftIsAvailable;
1377
0
        const bool abovePadding = j == 0 && !aboveIsAvailable;
1378
1379
0
        int s = 4;
1380
0
        s += pRecSrc0[2 * i - (abovePadding ? 0 : iRecStride)];
1381
0
        s += pRecSrc0[2 * i] * 4;
1382
0
        s += pRecSrc0[2 * i - (leftPadding ? 0 : 1)];
1383
0
        s += pRecSrc0[2 * i + 1];
1384
0
        s += pRecSrc0[2 * i + iRecStride];
1385
0
        pDst0[i] = s >> 3;
1386
0
      }
1387
16.5M
      else
1388
16.5M
      {
1389
16.5M
        CHECK(cu.chromaFormat != CHROMA_420, "Chroma format must be 4:2:0 for vertical filtering");
1390
16.5M
        const bool leftPadding = i == 0 && !leftIsAvailable;
1391
1392
16.5M
        int s = 4;
1393
16.5M
        s += pRecSrc0[2 * i] * 2;
1394
16.5M
        s += pRecSrc0[2 * i + 1];
1395
16.5M
        s += pRecSrc0[2 * i - (leftPadding ? 0 : 1)];
1396
16.5M
        s += pRecSrc0[2 * i + iRecStride] * 2;
1397
16.5M
        s += pRecSrc0[2 * i + 1 + iRecStride];
1398
16.5M
        s += pRecSrc0[2 * i + iRecStride - (leftPadding ? 0 : 1)];
1399
16.5M
        pDst0[i] = s >> 3;
1400
16.5M
      }
1401
16.5M
    }
1402
1403
1.06M
    pDst0    += iDstStride;
1404
1.06M
    pRecSrc0 += iRecStride2;
1405
1.06M
  }
1406
83.0k
}
1407
1408
void IntraPrediction::xGetLMParameters(const CodingUnit& cu, const ComponentID compID,
1409
                                              const CompArea& chromaArea,
1410
                                              int& a, int& b, int& iShift)
1411
217k
{
1412
217k
  CHECK(compID == COMP_Y, "");
1413
1414
217k
  const SizeType cWidth  = chromaArea.width;
1415
217k
  const SizeType cHeight = chromaArea.height;
1416
1417
217k
  const Position posLT = chromaArea;
1418
1419
217k
  CodingStructure & cs = *(cu.cs);
1420
1421
217k
  const SPS &        sps           = *cs.sps;
1422
217k
  const uint32_t     tuWidth     = chromaArea.width;
1423
217k
  const uint32_t     tuHeight    = chromaArea.height;
1424
217k
  const ChromaFormat nChromaFormat = sps.chromaFormatIdc;
1425
1426
217k
  const int unitWidthLog2    = MIN_CU_LOG2 - getComponentScaleX(chromaArea.compID, nChromaFormat);
1427
217k
  const int unitHeightLog2   = MIN_CU_LOG2 - getComponentScaleY(chromaArea.compID, nChromaFormat);
1428
217k
  const int unitWidth    = 1<<unitWidthLog2;
1429
217k
  const int unitHeight   = 1<<unitHeightLog2;
1430
1431
217k
  const int tuWidthInUnits  = tuWidth >> unitWidthLog2;
1432
217k
  const int tuHeightInUnits = tuHeight >> unitHeightLog2;
1433
217k
  const int aboveUnits      = tuWidthInUnits;
1434
217k
  const int leftUnits       = tuHeightInUnits;
1435
217k
  int topTemplateSampNum = 2 * cWidth; // for MDLM, the template sample number is 2W or 2H;
1436
217k
  int leftTemplateSampNum = 2 * cHeight;
1437
217k
  int totalAboveUnits = (topTemplateSampNum + (unitWidth - 1)) >> unitWidthLog2;
1438
217k
  int totalLeftUnits = (leftTemplateSampNum + (unitHeight - 1)) >> unitHeightLog2;
1439
217k
  int totalUnits = totalLeftUnits + totalAboveUnits + 1;
1440
217k
  int aboveRightUnits = totalAboveUnits - aboveUnits;
1441
217k
  int leftBelowUnits = totalLeftUnits - leftUnits;
1442
217k
  int avaiAboveRightUnits = 0;
1443
217k
  int avaiLeftBelowUnits = 0;
1444
217k
  int avaiAboveUnits = 0;
1445
217k
  int avaiLeftUnits = 0;
1446
1447
217k
  const int curChromaMode = cu.intraDir[1];
1448
217k
  bool neighborFlags[4 * MAX_NUM_PART_IDXS_IN_CTU_WIDTH + 1];
1449
217k
  memset(neighborFlags, 0, totalUnits);
1450
1451
217k
  bool aboveAvailable, leftAvailable;
1452
1453
217k
  int availableUnit = isAboveAvailable(cu, CH_C, posLT, aboveUnits, unitWidth,
1454
217k
    (neighborFlags + leftUnits + leftBelowUnits + 1));
1455
217k
  aboveAvailable = availableUnit == tuWidthInUnits;
1456
1457
217k
  availableUnit = isLeftAvailable(cu, CH_C, posLT, leftUnits, unitHeight,
1458
217k
    (neighborFlags + leftUnits + leftBelowUnits - 1));
1459
217k
  leftAvailable = availableUnit == tuHeightInUnits;
1460
217k
  if (leftAvailable) // if left is not available, then the below left is not available
1461
71.9k
  {
1462
71.9k
    avaiLeftUnits = tuHeightInUnits;
1463
71.9k
    avaiLeftBelowUnits = isBelowLeftAvailable(cu, CH_C, chromaArea.bottomLeftComp(chromaArea.compID), leftBelowUnits, unitHeight, (neighborFlags + leftBelowUnits - 1));
1464
71.9k
  }
1465
217k
  if (aboveAvailable) // if above is not available, then  the above right is not available.
1466
79.9k
  {
1467
79.9k
    avaiAboveUnits = tuWidthInUnits;
1468
79.9k
    avaiAboveRightUnits = isAboveRightAvailable(cu, CH_C, chromaArea.topRightComp(chromaArea.compID), aboveRightUnits, unitWidth, (neighborFlags + leftUnits + leftBelowUnits + aboveUnits + 1));
1469
79.9k
  }
1470
1471
217k
  const int srcStride = 2 * MAX_TB_SIZEY + 1;
1472
217k
  Pel* srcColor0 = m_pMdlmTemp + srcStride + 1;
1473
1474
217k
  Pel* curChroma0 = getPredictorPtr(compID);
1475
1476
217k
  unsigned internalBitDepth = sps.bitDepths[CH_C];
1477
1478
217k
  int minLuma[2] = {  MAX_INT, 0 };
1479
217k
  int maxLuma[2] = { -MAX_INT, 0 };
1480
1481
217k
  Pel* src = srcColor0 - srcStride;
1482
217k
  int actualTopTemplateSampNum = 0;
1483
217k
  int actualLeftTemplateSampNum = 0;
1484
217k
  if (curChromaMode == MDLM_T_IDX)
1485
86.7k
  {
1486
86.7k
    leftAvailable = 0;
1487
86.7k
    avaiAboveRightUnits = avaiAboveRightUnits > (cHeight>>unitWidthLog2) ?  cHeight>>unitWidthLog2 : avaiAboveRightUnits;
1488
86.7k
    actualTopTemplateSampNum = unitWidth*(avaiAboveUnits + avaiAboveRightUnits);
1489
86.7k
  }
1490
131k
  else if (curChromaMode == MDLM_L_IDX)
1491
86.7k
  {
1492
86.7k
    aboveAvailable = 0;
1493
86.7k
    avaiLeftBelowUnits = avaiLeftBelowUnits > (cWidth>>unitHeightLog2) ? cWidth>>unitHeightLog2 : avaiLeftBelowUnits;
1494
86.7k
    actualLeftTemplateSampNum = unitHeight*(avaiLeftUnits + avaiLeftBelowUnits);
1495
86.7k
  }
1496
44.3k
  else if (curChromaMode == LM_CHROMA_IDX)
1497
44.3k
  {
1498
44.3k
    actualTopTemplateSampNum = cWidth;
1499
44.3k
    actualLeftTemplateSampNum = cHeight;
1500
44.3k
  }
1501
217k
  int startPos[2]; //0:Above, 1: Left
1502
217k
  int pickStep[2];
1503
1504
217k
  int aboveIs4 = leftAvailable  ? 0 : 1;
1505
217k
  int leftIs4 =  aboveAvailable ? 0 : 1;
1506
1507
217k
  startPos[0] = actualTopTemplateSampNum >> (2 + aboveIs4);
1508
217k
  pickStep[0] = std::max(1, actualTopTemplateSampNum >> (1 + aboveIs4));
1509
1510
217k
  startPos[1] = actualLeftTemplateSampNum >> (2 + leftIs4);
1511
217k
  pickStep[1] = std::max(1, actualLeftTemplateSampNum >> (1 + leftIs4));
1512
1513
217k
  Pel selectLumaPix[4] = { 0, 0, 0, 0 };
1514
217k
  Pel selectChromaPix[4] = { 0, 0, 0, 0 };
1515
1516
217k
  int cntT, cntL;
1517
217k
  cntT = cntL = 0;
1518
217k
  int cnt = 0;
1519
217k
  if (aboveAvailable)
1520
43.8k
  {
1521
43.8k
    cntT = std::min(actualTopTemplateSampNum, (1 + aboveIs4) << 1);
1522
43.8k
    src = srcColor0 - srcStride;
1523
43.8k
    const Pel *cur = curChroma0 + 1;
1524
216k
    for (int pos = startPos[0]; cnt < cntT; pos += pickStep[0], cnt++)
1525
172k
    {
1526
172k
      selectLumaPix[cnt] = src[pos];
1527
172k
      selectChromaPix[cnt] = cur[pos];
1528
172k
    }
1529
43.8k
  }
1530
1531
217k
  if (leftAvailable)
1532
39.0k
  {
1533
39.0k
    cntL = std::min(actualLeftTemplateSampNum, ( 1 + leftIs4 ) << 1 );
1534
39.0k
    src = srcColor0 - 1;
1535
39.0k
    const Pel *cur = curChroma0 + m_refBufferStride[compID] + 1;
1536
192k
    for (int pos = startPos[1], cnt = 0; cnt < cntL; pos += pickStep[1], cnt++)
1537
153k
    {
1538
153k
      selectLumaPix[cnt + cntT] = src[pos * srcStride];
1539
153k
      selectChromaPix[cnt + cntT] = cur[pos];
1540
153k
    }
1541
39.0k
  }
1542
217k
  cnt = cntL + cntT;
1543
1544
217k
  if (cnt == 2)
1545
20
  {
1546
20
    selectLumaPix[3] = selectLumaPix[0]; selectChromaPix[3] = selectChromaPix[0];
1547
20
    selectLumaPix[2] = selectLumaPix[1]; selectChromaPix[2] = selectChromaPix[1];
1548
20
    selectLumaPix[0] = selectLumaPix[1]; selectChromaPix[0] = selectChromaPix[1];
1549
20
    selectLumaPix[1] = selectLumaPix[3]; selectChromaPix[1] = selectChromaPix[3];
1550
20
  }
1551
1552
217k
  int minGrpIdx[2] = { 0, 2 };
1553
217k
  int maxGrpIdx[2] = { 1, 3 };
1554
217k
  int *tmpMinGrp = minGrpIdx;
1555
217k
  int *tmpMaxGrp = maxGrpIdx;
1556
217k
  if (selectLumaPix[tmpMinGrp[0]] > selectLumaPix[tmpMinGrp[1]]) std::swap(tmpMinGrp[0], tmpMinGrp[1]);
1557
217k
  if (selectLumaPix[tmpMaxGrp[0]] > selectLumaPix[tmpMaxGrp[1]]) std::swap(tmpMaxGrp[0], tmpMaxGrp[1]);
1558
217k
  if (selectLumaPix[tmpMinGrp[0]] > selectLumaPix[tmpMaxGrp[1]]) std::swap(tmpMinGrp, tmpMaxGrp);
1559
217k
  if (selectLumaPix[tmpMinGrp[1]] > selectLumaPix[tmpMaxGrp[0]]) std::swap(tmpMinGrp[1], tmpMaxGrp[0]);
1560
1561
217k
  minLuma[0] = (selectLumaPix[tmpMinGrp[0]] + selectLumaPix[tmpMinGrp[1]] + 1 )>>1;
1562
217k
  minLuma[1] = (selectChromaPix[tmpMinGrp[0]] + selectChromaPix[tmpMinGrp[1]] + 1) >> 1;
1563
217k
  maxLuma[0] = (selectLumaPix[tmpMaxGrp[0]] + selectLumaPix[tmpMaxGrp[1]] + 1 )>>1;
1564
217k
  maxLuma[1] = (selectChromaPix[tmpMaxGrp[0]] + selectChromaPix[tmpMaxGrp[1]] + 1) >> 1;
1565
1566
217k
  if (leftAvailable || aboveAvailable)
1567
81.6k
  {
1568
81.6k
    int diff = maxLuma[0] - minLuma[0];
1569
81.6k
    if (diff > 0)
1570
512
    {
1571
512
      int diffC = maxLuma[1] - minLuma[1];
1572
512
      int x = floorLog2( diff );
1573
512
      static const uint8_t DivSigTable[1 << 4] = {
1574
        // 4bit significands - 8 ( MSB is omitted )
1575
512
        0,  7,  6,  5,  5,  4,  4,  3,  3,  2,  2,  1,  1,  1,  1,  0
1576
512
      };
1577
512
      int normDiff = (diff << 4 >> x) & 15;
1578
512
      int v = DivSigTable[normDiff] | 8;
1579
512
      x += normDiff != 0;
1580
1581
512
      int y = diffC == 0 ? 0 : floorLog2( abs( diffC ) ) + 1;
1582
512
      int add = 1 << y >> 1;
1583
512
      a = (diffC * v + add) >> y;
1584
512
      iShift = 3 + x - y;
1585
512
      if ( iShift < 1 )
1586
0
      {
1587
0
        iShift = 1;
1588
0
        a = ( (a == 0)? 0: (a < 0)? -15 : 15 );   // a=Sign(a)*15
1589
0
      }
1590
512
      b = minLuma[1] - ((a * minLuma[0]) >> iShift);
1591
512
    }
1592
81.1k
    else
1593
81.1k
    {
1594
81.1k
      a = 0;
1595
81.1k
      b = minLuma[1];
1596
81.1k
      iShift = 0;
1597
81.1k
    }
1598
81.6k
  }
1599
136k
  else
1600
136k
  {
1601
136k
    a = 0;
1602
136k
    b = 1 << (internalBitDepth - 1);
1603
136k
    iShift = 0;
1604
136k
  }
1605
217k
}
1606
1607
void IntraPrediction::initIntraMip( const CodingUnit& cu )
1608
46.1k
{
1609
46.1k
  CHECK( cu.lwidth() > cu.cs->sps->getMaxTbSize() || cu.lheight() > cu.cs->sps->getMaxTbSize(), "Error: block size not supported for MIP" );
1610
1611
  // prepare input (boundary) data for prediction
1612
46.1k
  CHECK(m_ipaParam.refFilterFlag, "ERROR: unfiltered refs expected for MIP");
1613
46.1k
  Pel *ptrSrc = getPredictorPtr(COMP_Y);
1614
46.1k
  const int srcStride  = m_refBufferStride[COMP_Y];
1615
46.1k
  const int srcHStride = 2;
1616
1617
46.1k
  m_matrixIntraPred.prepareInputForPred(CPelBuf(ptrSrc, srcStride, srcHStride), cu.Y(), cu.slice->sps->bitDepths[CH_L]);
1618
46.1k
}
1619
1620
void IntraPrediction::predIntraMip( PelBuf &piPred, const CodingUnit& cu )
1621
276k
{
1622
276k
  CHECK( cu.lwidth() > cu.cs->sps->getMaxTbSize() || cu.lheight() > cu.cs->sps->getMaxTbSize(), "Error: block size not supported for MIP" );
1623
276k
  CHECK( cu.lwidth() != (1 << floorLog2(cu.lwidth())) || cu.lheight() != (1 << floorLog2(cu.lheight())), "Error: expecting blocks of size 2^M x 2^N" );
1624
1625
  // generate mode-specific prediction
1626
276k
  const int bitDepth = cu.slice->sps->bitDepths[CH_L];
1627
1628
276k
  CHECK( cu.lwidth() != piPred.stride, " no support yet" );
1629
 
1630
276k
  m_matrixIntraPred.predBlock(piPred.buf, cu.intraDir[CH_L], cu.mipTransposedFlag, bitDepth);
1631
276k
}
1632
1633
void IntraPrediction::initIntraPatternChTypeISP(const CodingUnit& cu, const CompArea& area, PelBuf& recBuf,
1634
  const bool forceRefFilterFlag)
1635
20.7k
{
1636
20.7k
  const CodingStructure& cs = *cu.cs;
1637
1638
20.7k
  if (!forceRefFilterFlag)
1639
20.7k
  {
1640
20.7k
    initPredIntraParams(cu, area, *cs.sps);
1641
20.7k
  }
1642
1643
20.7k
  const Position posLT = area;
1644
20.7k
  bool           isLeftAvail =
1645
20.7k
    (cs.getCURestricted(posLT.offset(-1, 0), cu, CH_L) != NULL);
1646
20.7k
  bool isAboveAvail =
1647
20.7k
    (cs.getCURestricted(posLT.offset(0, -1), cu, CH_L) != NULL);
1648
  // ----- Step 1: unfiltered reference samples -----
1649
20.7k
  if (cu.blocks[area.compID].x == area.x && cu.blocks[area.compID].y == area.y)
1650
16.2k
  {
1651
16.2k
    Pel* refBufUnfiltered = m_refBuffer[area.compID][PRED_BUF_UNFILTERED];
1652
    // With the first subpartition all the CU reference samples are fetched at once in a single call to
1653
    // xFillReferenceSamples
1654
16.2k
    if (cu.ispMode == HOR_INTRA_SUBPARTITIONS)
1655
7.84k
    {
1656
7.84k
      m_leftRefLength = cu.Y().height << 1;
1657
7.84k
      m_topRefLength = cu.Y().width + area.width;
1658
7.84k
    }
1659
8.45k
    else   // if (cu.ispMode == VER_INTRA_SUBPARTITIONS)
1660
8.45k
    {
1661
8.45k
      m_leftRefLength = cu.Y().height + area.height;
1662
8.45k
      m_topRefLength = cu.Y().width << 1;
1663
8.45k
    }
1664
1665
16.2k
    xFillReferenceSamples(cs.picture->getRecoBuf(cu.Y()), refBufUnfiltered, cu.Y(), cu);
1666
1667
    // After having retrieved all the CU reference samples, the number of reference samples is now adjusted for the
1668
    // current subpartition
1669
16.2k
    m_topRefLength = cu.blocks[area.compID].width + area.width;
1670
16.2k
    m_leftRefLength = cu.blocks[area.compID].height + area.height;
1671
16.2k
  }
1672
4.41k
  else
1673
4.41k
  {
1674
4.41k
    m_topRefLength = cu.blocks[area.compID].width + area.width;
1675
4.41k
    m_leftRefLength = cu.blocks[area.compID].height + area.height;
1676
1677
4.41k
    const int predSizeHor = m_topRefLength;
1678
4.41k
    const int predSizeVer = m_leftRefLength;
1679
4.41k
    if (cu.ispMode == HOR_INTRA_SUBPARTITIONS)
1680
3.40k
    {
1681
3.40k
      Pel* src = recBuf.bufAt(0, -1);
1682
3.40k
      Pel* ref = m_refBuffer[area.compID][PRED_BUF_UNFILTERED] + m_refBufferStride[area.compID];
1683
3.40k
      if (isLeftAvail)
1684
0
      {
1685
0
        for (int i = 0; i <= 2 * cu.blocks[area.compID].height - area.height; i++)
1686
0
        {
1687
0
          ref[i] = ref[i + area.height];
1688
0
        }
1689
0
      }
1690
3.40k
      else
1691
3.40k
      {
1692
70.3k
        for (int i = 0; i <= predSizeVer; i++)
1693
66.9k
        {
1694
66.9k
          ref[i] = src[0];
1695
66.9k
        }
1696
3.40k
      }
1697
3.40k
      Pel* dst = m_refBuffer[area.compID][PRED_BUF_UNFILTERED] + 1;
1698
3.40k
      dst[-1] = ref[0];
1699
69.9k
      for (int i = 0; i < area.width; i++)
1700
66.5k
      {
1701
66.5k
        dst[i] = src[i];
1702
66.5k
      }
1703
3.40k
      Pel sample = src[area.width - 1];
1704
3.40k
      dst += area.width;
1705
69.9k
      for (int i = 0; i < predSizeHor - area.width; i++)
1706
66.5k
      {
1707
66.5k
        dst[i] = sample;
1708
66.5k
      }
1709
3.40k
    }
1710
1.01k
    else
1711
1.01k
    {
1712
1.01k
      Pel* src = recBuf.bufAt(-1, 0);
1713
1.01k
      Pel* ref = m_refBuffer[area.compID][PRED_BUF_UNFILTERED];
1714
1.01k
      if (isAboveAvail)
1715
0
      {
1716
0
        for (int i = 0; i <= 2 * cu.blocks[area.compID].width - area.width; i++)
1717
0
        {
1718
0
          ref[i] = ref[i + area.width];
1719
0
        }
1720
0
      }
1721
1.01k
      else
1722
1.01k
      {
1723
22.2k
        for (int i = 0; i <= predSizeHor; i++)
1724
21.2k
        {
1725
21.2k
          ref[i] = src[0];
1726
21.2k
        }
1727
1.01k
      }
1728
1.01k
      Pel* dst = m_refBuffer[area.compID][PRED_BUF_UNFILTERED] + m_refBufferStride[area.compID] + 1;
1729
1.01k
      dst[-1] = ref[0];
1730
23.1k
      for (int i = 0; i < area.height; i++)
1731
22.1k
      {
1732
22.1k
        *dst = *src;
1733
22.1k
        src += recBuf.stride;
1734
22.1k
        dst++;
1735
22.1k
      }
1736
1.01k
      Pel sample = src[-recBuf.stride];
1737
23.1k
      for (int i = 0; i < predSizeVer - area.height; i++)
1738
22.1k
      {
1739
22.1k
        *dst = sample;
1740
22.1k
        dst++;
1741
22.1k
      }
1742
1.01k
    }
1743
4.41k
  }
1744
  // ----- Step 2: filtered reference samples -----
1745
20.7k
  if (m_ipaParam.refFilterFlag || forceRefFilterFlag)
1746
0
  {
1747
0
    Pel* refBufUnfiltered = m_refBuffer[area.compID][PRED_BUF_UNFILTERED];
1748
0
    Pel* refBufFiltered = m_refBuffer[area.compID][PRED_BUF_FILTERED];
1749
0
    xFilterReferenceSamples(refBufUnfiltered, refBufFiltered, area, *cs.sps, cu.multiRefIdx);
1750
0
  }
1751
20.7k
}
1752
1753
void IntraPrediction::setReferenceArrayLengths(const CompArea& area)
1754
853k
{
1755
  // set Top and Left reference samples length
1756
853k
  const int width = area.width;
1757
853k
  const int height = area.height;
1758
1759
853k
  m_leftRefLength = (height << 1);
1760
853k
  m_topRefLength = (width << 1);
1761
853k
}
1762
1763
} // namespace vvenc
1764
1765
//! \}
1766