Coverage Report

Created: 2026-06-10 07:00

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/vvenc/source/Lib/CommonLib/IntraPrediction.cpp
Line
Count
Source
1
/* -----------------------------------------------------------------------------
2
The copyright in this software is being made available under the Clear BSD
3
License, included below. No patent rights, trademark rights and/or 
4
other Intellectual Property Rights other than the copyrights concerning 
5
the Software are granted under this license.
6
7
The Clear BSD License
8
9
Copyright (c) 2019-2026, Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. & The VVenC Authors.
10
All rights reserved.
11
12
Redistribution and use in source and binary forms, with or without modification,
13
are permitted (subject to the limitations in the disclaimer below) provided that
14
the following conditions are met:
15
16
     * Redistributions of source code must retain the above copyright notice,
17
     this list of conditions and the following disclaimer.
18
19
     * Redistributions in binary form must reproduce the above copyright
20
     notice, this list of conditions and the following disclaimer in the
21
     documentation and/or other materials provided with the distribution.
22
23
     * Neither the name of the copyright holder nor the names of its
24
     contributors may be used to endorse or promote products derived from this
25
     software without specific prior written permission.
26
27
NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
28
THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
29
CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
31
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
32
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
33
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
34
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
35
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
36
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38
POSSIBILITY OF SUCH DAMAGE.
39
40
41
------------------------------------------------------------------------------------------- */
42
43
44
/** \file     Prediction.cpp
45
    \brief    prediction class
46
*/
47
48
#include "IntraPrediction.h"
49
#include "Unit.h"
50
#include "UnitTools.h"
51
#include "Rom.h"
52
#include "InterpolationFilter.h"
53
#include "dtrace_next.h"
54
55
#include <memory.h>
56
57
//! \ingroup CommonLib
58
//! \{
59
60
namespace vvenc {
61
62
// ====================================================================================================================
63
// Tables
64
// ====================================================================================================================
65
66
const uint8_t IntraPrediction::m_aucIntraFilter[MAX_INTRA_FILTER_DEPTHS] =
67
{
68
  24, //   1xn
69
  24, //   2xn
70
  24, //   4xn
71
  14, //   8xn
72
  2,  //  16xn
73
  0,  //  32xn
74
  0,  //  64xn
75
  0   // 128xn
76
};
77
78
//NOTE: Bit-Limit - 24-bit source
79
void xPredIntraPlanar_Core( PelBuf& pDst, const CPelBuf& pSrc )
80
111k
{
81
111k
  const uint32_t width  = pDst.width;
82
111k
  const uint32_t height = pDst.height;
83
111k
  const uint32_t log2W  = Log2(width);
84
111k
  const uint32_t log2H  = Log2(height);
85
86
111k
  int leftColumn[MAX_TB_SIZEY + 1], topRow[MAX_TB_SIZEY + 1], bottomRow[MAX_TB_SIZEY], rightColumn[MAX_TB_SIZEY];
87
111k
  const uint32_t offset = 1 << (log2W + log2H);
88
89
  // Get left and above reference column and row
90
2.27M
  for( int k = 0; k < width + 1; k++ )
91
2.15M
  {
92
2.15M
    topRow[k] = pSrc.at( k + 1, 0 );
93
2.15M
  }
94
95
2.23M
  for( int k = 0; k < height + 1; k++ )
96
2.12M
  {
97
2.12M
    leftColumn[k] = pSrc.at( k + 1, 1 );
98
2.12M
  }
99
100
  // Prepare intermediate variables used in interpolation
101
111k
  int bottomLeft = leftColumn[height];
102
111k
  int topRight = topRow[width];
103
104
  // with some optimizations gcc-8 gives spurious "-Wmaybe-uninitialized" warnings here (says leftColumn or topRow would be uninitialized here)
105
111k
  GCC_WARNING_DISABLE_maybe_uninitialized
106
2.15M
  for( int k = 0; k < width; k++ )
107
2.04M
  {
108
2.04M
    bottomRow[k] = bottomLeft - topRow[k];
109
2.04M
    topRow[k]    = topRow[k] << log2H;
110
2.04M
  }
111
112
2.12M
  for( int k = 0; k < height; k++ )
113
2.00M
  {
114
2.00M
    rightColumn[k] = topRight - leftColumn[k];
115
2.00M
    leftColumn[k]  = leftColumn[k] << log2W;
116
2.00M
  }
117
111k
  GCC_WARNING_RESET
118
119
111k
  const uint32_t finalShift = 1 + log2W + log2H;
120
111k
  const uint32_t stride     = pDst.stride;
121
111k
  Pel*       pred       = pDst.buf;
122
2.12M
  for( int y = 0; y < height; y++, pred += stride )
123
2.00M
  {
124
2.00M
    int horPred = leftColumn[y];
125
126
47.7M
    for( int x = 0; x < width; x++ )
127
45.7M
    {
128
45.7M
      horPred += rightColumn[y];
129
45.7M
      topRow[x] += bottomRow[x];
130
131
45.7M
      int vertPred = topRow[x];
132
45.7M
      pred[x]      = ( ( horPred << log2H ) + ( vertPred << log2W ) + offset ) >> finalShift;
133
45.7M
    }
134
2.00M
  }
135
111k
}
136
137
void  IntraPredSampleFilter_Core(PelBuf& dstBuf, const CPelBuf& pSrc)
138
329k
{
139
329k
  const int iWidth  = dstBuf.width;
140
329k
  const int iHeight = dstBuf.height;
141
142
329k
  const int scale = ((Log2(iWidth*iHeight) - 2) >> 2);
143
329k
  CHECK(scale < 0 || scale > 31, "PDPC: scale < 0 || scale > 31");
144
145
5.89M
  for (int y = 0; y < iHeight; y++)
146
5.56M
  {
147
5.56M
    const int wT   = 32 >> std::min(31, ((y << 1) >> scale));
148
5.56M
    const Pel left = pSrc.at(y + 1, 1);
149
123M
    for (int x = 0; x < iWidth; x++)
150
118M
    {
151
118M
      const int wL    = 32 >> std::min(31, ((x << 1) >> scale));
152
118M
      const Pel top   = pSrc.at(x + 1, 0);
153
118M
      const Pel val   = dstBuf.at(x, y);
154
118M
      dstBuf.at(x, y) = val + ((wL * (left - val) + wT * (top - val) + 32) >> 6);
155
118M
    }
156
5.56M
  }
157
329k
}
158
159
void IntraHorVerPDPC_Core(Pel* pDsty,const int dstStride,Pel* refSide,const int width,const int height,int scale,const Pel* refMain, const ClpRng& clpRng)
160
359k
{
161
359k
  const Pel topLeft = refMain[0];
162
163
5.69M
  for( int y = 0; y < height; y++ )
164
5.33M
  {
165
5.33M
    memcpy(pDsty,&refMain[1],width*sizeof(Pel));
166
5.33M
    const Pel left    = refSide[1 + y];
167
41.4M
    for (int x = 0; x < std::min(3 << scale, width); x++)
168
36.1M
    {
169
36.1M
      const int wL  = 32 >> (2 * x >> scale);
170
36.1M
      const Pel val = pDsty[x];
171
36.1M
      pDsty[x]      = ClipPel(val + ((wL * (left - topLeft) + 32) >> 6), clpRng);
172
36.1M
    }
173
5.33M
    pDsty += dstStride;
174
5.33M
  }
175
359k
}
176
void IntraAnglePDPC_Core(Pel* pDsty,const int dstStride,Pel* refSide,const int width,const int height,int scale,int invAngle)
177
390k
{
178
8.72M
  for (int y = 0; y<height; y++, pDsty += dstStride)
179
8.33M
  {
180
8.33M
    int       invAngleSum = 256;
181
88.9M
    for (int x = 0; x < std::min(3 << scale, width); x++)
182
80.5M
    {
183
80.5M
      invAngleSum += invAngle;
184
80.5M
      int wL   = 32 >> (2 * x >> scale);
185
80.5M
      Pel left = refSide[y + (invAngleSum >> 9) + 1];
186
80.5M
      pDsty[x] = pDsty[x] + ((wL * (left - pDsty[x]) + 32) >> 6);
187
80.5M
    }
188
8.33M
  }
189
390k
}
190
191
void IntraPredAngleLuma_Core(Pel* pDstBuf,const ptrdiff_t dstStride,Pel* refMain,int width,int height,int deltaPos,int intraPredAngle,const TFilterCoeff *ff_unused,const bool useCubicFilter,const ClpRng& clpRng)
192
507k
{
193
14.8M
  for (int y = 0; y<height; y++ )
194
14.3M
  {
195
14.3M
    const int deltaInt   = deltaPos >> 5;
196
14.3M
    const int deltaFract = deltaPos & ( 32 - 1 );
197
198
14.3M
    const TFilterCoeff      intraSmoothingFilter[4] = {TFilterCoeff(16 - (deltaFract >> 1)), TFilterCoeff(32 - (deltaFract >> 1)), TFilterCoeff(16 + (deltaFract >> 1)), TFilterCoeff(deltaFract >> 1)};
199
14.3M
    const TFilterCoeff *f = useCubicFilter ? InterpolationFilter::getChromaFilterTable(deltaFract) : intraSmoothingFilter;
200
201
14.3M
    Pel p[4];
202
203
14.3M
    int refMainIndex = deltaInt + 1;
204
205
 //   const TFilterCoeff *f = &ff[deltaFract << 2];
206
207
582M
    for( int x = 0; x < width; x++, refMainIndex++ )
208
568M
    {
209
568M
      p[0] = refMain[refMainIndex - 1];
210
568M
      p[1] = refMain[refMainIndex    ];
211
568M
      p[2] = refMain[refMainIndex + 1];
212
568M
      p[3] = refMain[refMainIndex + 2];
213
214
568M
      pDstBuf[y*dstStride + x] = static_cast<Pel>((static_cast<int>(f[0] * p[0]) + static_cast<int>(f[1] * p[1]) + static_cast<int>(f[2] * p[2]) + static_cast<int>(f[3] * p[3]) + 32) >> 6);
215
216
568M
      if( useCubicFilter ) // only cubic filter has negative coefficients and requires clipping
217
77.3M
      {
218
77.3M
        pDstBuf[y*dstStride + x] = ClipPel( pDstBuf[y*dstStride + x], clpRng );
219
77.3M
      }
220
568M
    }
221
14.3M
    deltaPos += intraPredAngle;
222
14.3M
  }
223
507k
}
224
225
void IntraPredAngleChroma_Core(Pel* pDstBuf,const ptrdiff_t dstStride,int16_t* pBorder,int width,int height,int deltaPos,int intraPredAngle)
226
52.2k
{
227
436k
  for (int y = 0; y<height; y++)
228
384k
  {
229
384k
    const int deltaInt   = deltaPos >> 5;
230
384k
    const int deltaFract = deltaPos & (32 - 1);
231
232
    // Do linear filtering
233
384k
    const Pel* pRM = pBorder + deltaInt + 1;
234
384k
    int lastRefMainPel = *pRM++;
235
236
10.1M
    for( int x = 0; x < width; pRM++, x++ )
237
9.71M
    {
238
9.71M
      int thisRefMainPel = *pRM;
239
9.71M
      pDstBuf[x + 0] = ( Pel ) ( ( ( 32 - deltaFract )*lastRefMainPel + deltaFract*thisRefMainPel + 16 ) >> 5 );
240
9.71M
      lastRefMainPel = thisRefMainPel;
241
9.71M
    }
242
384k
    deltaPos += intraPredAngle;
243
384k
    pDstBuf += dstStride;
244
384k
  }
245
52.2k
}
246
247
// ====================================================================================================================
248
// Constructor / destructor / initialize
249
// ====================================================================================================================
250
251
IntraPrediction::IntraPrediction( bool enableOpt )
252
17.7k
:  m_pMdlmTemp( nullptr )
253
17.7k
,  m_currChromaFormat( NUM_CHROMA_FORMAT )
254
17.7k
{
255
17.7k
  IntraPredAngleLuma    = IntraPredAngleLuma_Core;
256
17.7k
  IntraPredAngleChroma  = IntraPredAngleChroma_Core;
257
17.7k
  IntraAnglePDPC        = IntraAnglePDPC_Core;
258
17.7k
  IntraHorVerPDPC       = IntraHorVerPDPC_Core;
259
17.7k
  IntraPredSampleFilter = IntraPredSampleFilter_Core;
260
17.7k
  xPredIntraPlanar      = xPredIntraPlanar_Core;
261
262
17.7k
#if ENABLE_SIMD_OPT_INTRAPRED
263
17.7k
  if( enableOpt )
264
17.7k
  {
265
#if defined( TARGET_SIMD_X86 )
266
    initIntraPredictionX86();
267
#endif
268
#if defined( TARGET_SIMD_ARM )
269
    initIntraPredictionARM();
270
#endif
271
17.7k
  }
272
17.7k
#endif // ENABLE_SIMD_OPT_INTRAPRED
273
17.7k
}
274
275
IntraPrediction::~IntraPrediction()
276
17.7k
{
277
17.7k
  destroy();
278
17.7k
}
279
280
void IntraPrediction::destroy()
281
17.7k
{
282
17.7k
  delete[] m_pMdlmTemp;
283
17.7k
  m_pMdlmTemp = nullptr;
284
17.7k
}
285
286
void IntraPrediction::init(ChromaFormat chromaFormatIDC, const unsigned bitDepthY)
287
17.7k
{
288
17.7k
  m_currChromaFormat = chromaFormatIDC;
289
290
17.7k
  if (m_pMdlmTemp == nullptr)
291
17.7k
  {
292
17.7k
    m_pMdlmTemp = new Pel[(2 * MAX_TB_SIZEY + 1)*(2 * MAX_TB_SIZEY + 1)];//MDLM will use top-above and left-below samples.
293
17.7k
  }
294
17.7k
}
295
296
// ====================================================================================================================
297
// Public member functions
298
// ====================================================================================================================
299
300
// Function for calculating DC value of the reference samples used in Intra prediction
301
//NOTE: Bit-Limit - 25-bit source
302
Pel IntraPrediction::xGetPredValDc( const CPelBuf& pSrc, const Size& dstSize )
303
257k
{
304
257k
  CHECK( dstSize.width == 0 || dstSize.height == 0, "Empty area provided" );
305
306
257k
  int idx, sum = 0;
307
257k
  Pel dcVal;
308
257k
  const int width  = dstSize.width;
309
257k
  const int height = dstSize.height;
310
257k
  const auto denom     = (width == height) ? (width << 1) : std::max(width,height);
311
257k
  const auto divShift  = Log2(denom);
312
257k
  const auto divOffset = (denom >> 1);
313
257k
  const int off = m_ipaParam.multiRefIndex + 1;
314
315
316
257k
  if ( width >= height )
317
182k
  {
318
3.99M
    for( idx = 0; idx < width; idx++ )
319
3.81M
    {
320
3.81M
      sum += pSrc.at( off + idx, 0);
321
3.81M
    }
322
182k
  }
323
257k
  if ( width <= height )
324
172k
  {
325
3.88M
    for( idx = 0; idx < height; idx++ )
326
3.71M
    {
327
3.71M
      sum += pSrc.at( off + idx, 1);
328
3.71M
    }
329
172k
  }
330
331
257k
  dcVal = (sum + divOffset) >> divShift;
332
257k
  return dcVal;
333
257k
}
334
335
int IntraPrediction::getWideAngle( int width, int height, int predMode )
336
1.76M
{
337
1.76M
  if ( predMode > DC_IDX && predMode <= VDIA_IDX )
338
1.21M
  {
339
1.21M
    int modeShift[] = { 0, 6, 10, 12, 14, 15 };
340
1.21M
    int deltaSize = abs(Log2(width) - Log2(height));
341
1.21M
    if (width > height && predMode < 2 + modeShift[deltaSize])
342
35.9k
    {
343
35.9k
      predMode += (VDIA_IDX - 1);
344
35.9k
    }
345
1.18M
    else if (height > width && predMode > VDIA_IDX - modeShift[deltaSize])
346
61.2k
    {
347
61.2k
      predMode -= (VDIA_IDX - 1);
348
61.2k
    }
349
1.21M
  }
350
1.76M
  return predMode;
351
1.76M
}
352
353
void IntraPrediction::predIntraAng( const ComponentID compId, PelBuf& piPred, const CodingUnit& cu)
354
1.56M
{
355
1.56M
  const ComponentID    compID       = compId;
356
1.56M
  const ChannelType    channelType  = toChannelType( compID );
357
1.56M
  const uint32_t       uiDirMode = cu.bdpcmM[channelType] ? BDPCM_IDX : CU::getFinalIntraMode(cu, channelType);
358
359
1.56M
  CHECK( Log2(piPred.width) > 7, "Size not allowed" );
360
361
//  const int multiRefIdx = m_ipaParam.multiRefIndex;
362
1.56M
  const int srcStride  = m_refBufferStride[compID];
363
1.56M
  const int srcHStride = 2;
364
365
1.56M
  const CPelBuf& srcBuf = CPelBuf(getPredictorPtr(compID), srcStride, srcHStride);
366
1.56M
  const ClpRng& clpRng(cu.cs->slice->clpRngs[compID]);
367
368
1.56M
  switch (uiDirMode)
369
1.56M
  {
370
111k
    case(PLANAR_IDX): xPredIntraPlanar(piPred, srcBuf); break;
371
257k
    case(DC_IDX):     xPredIntraDc    ( piPred, srcBuf ); break;
372
76.9k
    case(BDPCM_IDX):  xPredIntraBDPCM ( piPred, srcBuf, cu.bdpcmM[channelType], clpRng); break;
373
1.12M
    default:          xPredIntraAng   ( piPred, srcBuf, channelType, clpRng); break;
374
1.56M
  }
375
376
1.56M
  if (m_ipaParam.applyPDPC)
377
1.15M
  {
378
1.15M
    if (uiDirMode == PLANAR_IDX || uiDirMode == DC_IDX)
379
329k
    {
380
329k
      IntraPredSampleFilter(piPred, srcBuf);
381
329k
    }
382
1.15M
  }
383
1.56M
}
384
385
void IntraPrediction::predIntraChromaLM(const ComponentID compID, PelBuf& piPred, const CodingUnit& cu, const CompArea& chromaArea, int intraDir)
386
186k
{
387
186k
  CHECK( piPred.width > MAX_TB_SIZEY || piPred.height > MAX_TB_SIZEY, "not enough memory");
388
186k
  const int iLumaStride = 2 * MAX_TB_SIZEY + 1;
389
186k
  PelBuf Temp = PelBuf(m_pMdlmTemp + iLumaStride + 1, iLumaStride, Size(chromaArea));
390
391
186k
  int a, b, iShift;
392
186k
  xGetLMParameters(cu, compID, chromaArea, a, b, iShift); // th shift result is unsigned
393
394
  ////// final prediction
395
186k
  piPred.copyFrom(Temp);
396
186k
  piPred.linearTransform(a, iShift, b, true, cu.cs->slice->clpRngs[compID]);
397
186k
}
398
399
/** Function for deriving planar intra prediction. This function derives the prediction samples for planar mode (intra coding).
400
 */
401
402
void IntraPrediction::xPredIntraDc( PelBuf& pDst, const CPelBuf& pSrc )
403
257k
{
404
257k
  const Pel dcval = xGetPredValDc( pSrc, pDst );
405
257k
  pDst.fill( dcval );
406
257k
}
407
408
// Function for initialization of intra prediction parameters
409
void IntraPrediction::initPredIntraParams(const CodingUnit& cu, const CompArea area, const SPS& sps)
410
1.76M
{
411
1.76M
  const ComponentID compId = area.compID;
412
1.76M
  const ChannelType chType = toChannelType(compId);
413
414
1.76M
  const bool        useISP = NOT_INTRA_SUBPARTITIONS != cu.ispMode && isLuma( chType );
415
416
1.76M
  const Size   cuSize    = Size( cu.blocks[compId].width, cu.blocks[compId].height );
417
1.76M
  const Size   puSize    = Size( area.width, area.height );
418
1.76M
  const Size&  blockSize = useISP ? cuSize : puSize;
419
1.76M
  const int      dirMode = CU::getFinalIntraMode(cu, chType);
420
1.76M
  const int     predMode = getWideAngle( blockSize.width, blockSize.height, dirMode );
421
422
1.76M
  m_ipaParam.isModeVer            = predMode >= DIA_IDX;
423
1.76M
  m_ipaParam.multiRefIndex        = isLuma (chType) ? cu.multiRefIdx : 0 ;
424
1.76M
  m_ipaParam.refFilterFlag        = false;
425
1.76M
  m_ipaParam.interpolationFlag    = false;
426
1.76M
  m_ipaParam.applyPDPC            = (puSize.width >= MIN_TB_SIZEY && puSize.height >= MIN_TB_SIZEY) && m_ipaParam.multiRefIndex == 0;
427
428
1.76M
  const int    intraPredAngleMode = (m_ipaParam.isModeVer) ? predMode - VER_IDX : -(predMode - HOR_IDX);
429
430
431
1.76M
  int absAng = 0;
432
1.76M
  if (dirMode > DC_IDX && dirMode < NUM_LUMA_MODE) // intraPredAngle for directional modes
433
1.21M
  {
434
1.21M
    static const int angTable[32]    = { 0,    1,    2,    3,    4,    6,     8,   10,   12,   14,   16,   18,   20,   23,   26,   29,   32,   35,   39,  45,  51,  57,  64,  73,  86, 102, 128, 171, 256, 341, 512, 1024 };
435
1.21M
    static const int invAngTable[32] = {
436
1.21M
      0,   16384, 8192, 5461, 4096, 2731, 2048, 1638, 1365, 1170, 1024, 910, 819, 712, 630, 565,
437
1.21M
      512, 468,   420,  364,  321,  287,  256,  224,  191,  161,  128,  96,  64,  48,  32,  16
438
1.21M
    };   // (512 * 32) / Angle
439
440
1.21M
    const int     absAngMode         = abs(intraPredAngleMode);
441
1.21M
    const int     signAng            = intraPredAngleMode < 0 ? -1 : 1;
442
1.21M
                  absAng             = angTable  [absAngMode];
443
444
1.21M
    m_ipaParam.absInvAngle           = invAngTable[absAngMode];
445
1.21M
    m_ipaParam.intraPredAngle        = signAng * absAng;
446
1.21M
    if (intraPredAngleMode < 0)
447
218k
    {
448
218k
      m_ipaParam.applyPDPC = false;
449
218k
    }
450
1.00M
    else if (intraPredAngleMode > 0)
451
494k
    {
452
494k
      const int sideSize = m_ipaParam.isModeVer ? puSize.height : puSize.width;
453
494k
      const int maxScale = 2;
454
455
494k
      m_ipaParam.angularScale = std::min(maxScale, floorLog2(sideSize) - (floorLog2(3 * m_ipaParam.absInvAngle - 2) - 8));
456
494k
      m_ipaParam.applyPDPC &= m_ipaParam.angularScale >= 0;
457
494k
    }
458
1.21M
  }
459
460
  // high level conditions and DC intra prediction
461
1.76M
  if( !isLuma( chType )
462
727k
    || useISP
463
709k
    || CU::isMIP( cu, chType ) //th remove this
464
666k
    || m_ipaParam.multiRefIndex
465
526k
    || DC_IDX == dirMode
466
1.76M
    )
467
1.26M
  {
468
1.26M
  }
469
500k
  else if (cu.bdpcmM[chType])
470
7.00k
  {
471
7.00k
    m_ipaParam.refFilterFlag = false;
472
7.00k
  }
473
493k
  else if (dirMode == PLANAR_IDX) // Planar intra prediction
474
26.7k
  {
475
26.7k
    m_ipaParam.refFilterFlag = puSize.width * puSize.height > 32 ? true : false;
476
26.7k
  }
477
467k
  else if (!useISP)// HOR, VER and angular modes (MDIS)
478
467k
  {
479
467k
    bool filterFlag = false;
480
467k
    {
481
467k
      const int diff = std::min<int>( abs( predMode - HOR_IDX ), abs( predMode - VER_IDX ) );
482
467k
      const int log2Size = (Log2(puSize.width * puSize.height) >> 1);
483
467k
      CHECK( log2Size >= MAX_INTRA_FILTER_DEPTHS, "Size not supported" );
484
467k
      filterFlag = (diff > m_aucIntraFilter[log2Size]);
485
467k
    }
486
487
    // Selelection of either ([1 2 1] / 4 ) refrence filter OR Gaussian 4-tap interpolation filter
488
467k
    if (filterFlag)
489
374k
    {
490
374k
      const bool isRefFilter       =  isIntegerSlope(absAng);
491
374k
      CHECK( puSize.width * puSize.height <= 32, "DCT-IF interpolation filter is always used for 4x4, 4x8, and 8x4 luma CB" );
492
374k
      m_ipaParam.refFilterFlag     =  isRefFilter;
493
374k
      m_ipaParam.interpolationFlag = !isRefFilter;
494
374k
    }
495
467k
  }
496
1.76M
}
497
498
}   // namespace vvenc
499
500
#ifdef TARGET_SIMD_X86
501
#include "x86/CommonDefX86.h"
502
#endif
503
504
namespace vvenc {
505
506
/** Function for deriving the simplified angular intra predictions.
507
*
508
* This function derives the prediction samples for the angular mode based on the prediction direction indicated by
509
* the prediction mode index. The prediction direction is given by the displacement of the bottom row of the block and
510
* the reference row above the block in the case of vertical prediction or displacement of the rightmost column
511
* of the block and reference column left from the block in the case of the horizontal prediction. The displacement
512
* is signalled at 1/32 pixel accuracy. When projection of the predicted pixel falls inbetween reference samples,
513
* the predicted value for the pixel is linearly interpolated from the reference samples. All reference samples are taken
514
* from the extended main reference.
515
*/
516
//NOTE: Bit-Limit - 25-bit source
517
518
void IntraPrediction::xPredIntraAng( PelBuf& pDst, const CPelBuf& pSrc, const ChannelType channelType, const ClpRng& clpRng)
519
1.12M
{
520
1.12M
  int width =int(pDst.width);
521
1.12M
  int height=int(pDst.height);
522
523
1.12M
  const bool bIsModeVer     = m_ipaParam.isModeVer;
524
1.12M
  const int  multiRefIdx    = m_ipaParam.multiRefIndex;
525
1.12M
  const int  intraPredAngle = m_ipaParam.intraPredAngle;
526
1.12M
  const int  absInvAngle    = m_ipaParam.absInvAngle;
527
528
1.12M
  Pel* refMain;
529
1.12M
  Pel* refSide;
530
531
1.12M
  Pel  refAbove[2 * MAX_CU_SIZE + 3 + 33 * MAX_REF_LINE_IDX];
532
1.12M
  Pel  refLeft [2 * MAX_CU_SIZE + 3 + 33 * MAX_REF_LINE_IDX];
533
534
  // Initialize the Main and Left reference array.
535
1.12M
  if (intraPredAngle < 0)
536
217k
  {
537
217k
    memcpy(&refAbove[height],pSrc.buf,(width + 2 + multiRefIdx)*sizeof(Pel));
538
7.13M
    for (int y = 0; y <= height + 1 + multiRefIdx; y++)
539
6.91M
    {
540
6.91M
      refLeft[y + width] = pSrc.at(y, 1);
541
6.91M
    }
542
217k
    refMain = bIsModeVer ? refAbove + height : refLeft + width;
543
217k
    refSide = bIsModeVer ? refLeft + width : refAbove + height;
544
545
    // Extend the Main reference to the left.
546
217k
    int sizeSide = bIsModeVer ? height : width;
547
6.67M
    for (int k = -sizeSide; k <= -1; k++)
548
6.45M
    {
549
6.45M
      refMain[k] = refSide[std::min((-k * absInvAngle + 256) >> 9, sizeSide)];
550
6.45M
    }
551
217k
  }
552
905k
  else
553
905k
  {
554
905k
    memcpy(&refAbove[0], pSrc.buf, ((m_topRefLength)+multiRefIdx + 1) * sizeof(Pel));
555
39.1M
    for (int y = 0; y <= m_leftRefLength + multiRefIdx; y++)
556
38.2M
    {
557
38.2M
      refLeft[y] = pSrc.at(y, 1);
558
38.2M
    }
559
560
905k
    refMain = bIsModeVer ? refAbove : refLeft;
561
905k
    refSide = bIsModeVer ? refLeft : refAbove;
562
563
    // Extend main reference to right using replication
564
905k
    const int log2Ratio = Log2(width) - Log2(height);
565
905k
    const int s         = std::max<int>(0, bIsModeVer ? log2Ratio : -log2Ratio);
566
905k
    const int maxIndex  = (multiRefIdx << s) + 2;
567
905k
    const int refLength = bIsModeVer ? m_topRefLength : m_leftRefLength;
568
905k
    const Pel val       = refMain[refLength + multiRefIdx];
569
2.91M
    for (int z = 1; z <= maxIndex; z++)
570
2.01M
    {
571
2.01M
      refMain[refLength + multiRefIdx + z] = val;
572
2.01M
    }
573
905k
  }
574
575
  // swap width/height if we are doing a horizontal mode:
576
1.12M
  if (!bIsModeVer)
577
544k
  {
578
544k
    std::swap(width, height);
579
544k
  }
580
1.12M
  Pel tempArray[MAX_CU_SIZE*MAX_CU_SIZE];
581
1.12M
  const int dstStride = bIsModeVer ? pDst.stride : MAX_CU_SIZE;
582
1.12M
  Pel* pDstBuf = bIsModeVer ? pDst.buf : tempArray;
583
584
  // compensate for line offset in reference line buffers
585
1.12M
  refMain += multiRefIdx;
586
1.12M
  refSide += multiRefIdx;
587
588
1.12M
  Pel* pDsty = pDstBuf;
589
590
1.12M
  if( intraPredAngle == 0 )  // pure vertical or pure horizontal
591
429k
  {
592
429k
    if (m_ipaParam.applyPDPC)
593
359k
    {
594
359k
      const int scale   = (Log2(width * height) - 2) >> 2;
595
359k
      IntraHorVerPDPC(pDsty,dstStride,refSide,width,height,scale,refMain,clpRng);
596
359k
    }
597
69.4k
    else
598
69.4k
    {
599
1.71M
      for( int y = 0; y < height; y++ )
600
1.64M
      {
601
1.64M
        memcpy(pDsty,&refMain[1],width*sizeof(Pel));
602
1.64M
        pDsty += dstStride;
603
1.64M
      }
604
69.4k
    }
605
429k
  }
606
693k
  else
607
693k
  {
608
693k
    if( !isIntegerSlope( abs( intraPredAngle ) ) )
609
559k
    {
610
559k
      int deltaPos = intraPredAngle * ( 1 + multiRefIdx );
611
559k
      if( isLuma( channelType ) )
612
507k
      {
613
507k
        if( width <= 2 )
614
0
        {
615
0
          for( int y = 0, deltaPos = intraPredAngle * ( 1 + multiRefIdx );
616
0
               y < height;
617
0
               y++, deltaPos += intraPredAngle, pDsty += dstStride )
618
0
          {
619
0
            const int deltaInt   = deltaPos >> 5;
620
0
            const int deltaFract = deltaPos & 31;
621
622
0
            if( !isIntegerSlope( abs( intraPredAngle ) ) )
623
0
            {
624
0
              const bool useCubicFilter = !m_ipaParam.interpolationFlag;
625
626
0
              const TFilterCoeff intraSmoothingFilter[4] = { TFilterCoeff( 16 - ( deltaFract >> 1 ) ),
627
0
                                                             TFilterCoeff( 32 - ( deltaFract >> 1 ) ),
628
0
                                                             TFilterCoeff( 16 + ( deltaFract >> 1 ) ),
629
0
                                                             TFilterCoeff(      ( deltaFract >> 1 ) ) };
630
0
              const TFilterCoeff* const f =
631
0
                ( useCubicFilter ) ? InterpolationFilter::getChromaFilterTable( deltaFract ) : intraSmoothingFilter;
632
633
0
              for( int x = 0; x < width; x++ )
634
0
              {
635
0
                Pel p[4];
636
637
0
                p[0] = refMain[deltaInt + x + 0];
638
0
                p[1] = refMain[deltaInt + x + 1];
639
0
                p[2] = refMain[deltaInt + x + 2];
640
0
                p[3] = refMain[deltaInt + x + 3];
641
642
0
                Pel val = ( f[0] * p[0] + f[1] * p[1] + f[2] * p[2] + f[3] * p[3] + 32 ) >> 6;
643
644
0
                pDsty[x] = ClipPel( val, clpRng );   // always clip even though not always needed
645
0
              }
646
0
            }
647
0
          }
648
0
        }
649
507k
        else
650
507k
        {
651
507k
          IntraPredAngleLuma(pDstBuf, dstStride, refMain, width, height, deltaPos, intraPredAngle, nullptr, !m_ipaParam.interpolationFlag, clpRng);
652
507k
        }
653
507k
      }
654
52.2k
      else
655
52.2k
      {
656
52.2k
        IntraPredAngleChroma(pDstBuf,dstStride,refMain,width,height,deltaPos,intraPredAngle);
657
52.2k
      }
658
559k
    }
659
134k
    else
660
134k
    {
661
1.97M
      for (int y = 0, deltaPos = intraPredAngle * (1 + multiRefIdx); y<height; y++, deltaPos += intraPredAngle, pDsty += dstStride)
662
1.84M
      {
663
1.84M
        const int deltaInt   = deltaPos >> 5;
664
        // Just copy the integer samples
665
1.84M
        memcpy(pDsty,refMain  + deltaInt + 1,width*sizeof(Pel));
666
1.84M
      }
667
134k
    }
668
669
693k
    if (m_ipaParam.applyPDPC)
670
390k
    {
671
390k
      pDsty = pDstBuf;
672
390k
      IntraAnglePDPC(pDsty,dstStride,refSide,width,height,m_ipaParam.angularScale,absInvAngle);
673
390k
    }
674
693k
  } // else
675
676
  // Flip the block if this is the horizontal mode
677
1.12M
  if( !bIsModeVer )
678
544k
  {
679
544k
    pDst.transposedFrom( CPelBuf( pDstBuf, dstStride, width, height) );
680
544k
  }
681
1.12M
}
682
683
void IntraPrediction::xPredIntraBDPCM(PelBuf& pDst, const CPelBuf& pSrc, const uint32_t dirMode, const ClpRng& clpRng)
684
76.9k
{
685
76.9k
  const int wdt = pDst.width;
686
76.9k
  const int hgt = pDst.height;
687
688
76.9k
  const int strideP = pDst.stride;
689
76.9k
  const int strideS = pSrc.stride;
690
691
76.9k
  CHECK(!(dirMode == 1 || dirMode == 2), "Incorrect BDPCM mode parameter.");
692
693
76.9k
  Pel* pred = &pDst.buf[0];
694
76.9k
  if (dirMode == 1)
695
3.50k
  {
696
3.50k
    Pel  val;
697
48.3k
    for (int y = 0; y < hgt; y++)
698
44.8k
    {
699
44.8k
      val = pSrc.buf[(y + 1) + strideS];
700
653k
      for (int x = 0; x < wdt; x++)
701
608k
      {
702
608k
        pred[x] = val;
703
608k
      }
704
44.8k
      pred += strideP;
705
44.8k
    }
706
3.50k
  }
707
73.4k
  else
708
73.4k
  {
709
750k
    for (int y = 0; y < hgt; y++)
710
677k
    {
711
7.56M
      for (int x = 0; x < wdt; x++)
712
6.88M
      {
713
6.88M
        pred[x] = pSrc.buf[x + 1];
714
6.88M
      }
715
677k
      pred += strideP;
716
677k
    }
717
73.4k
  }
718
76.9k
}
719
720
inline bool isAboveLeftAvailable  ( const CodingUnit &cu, const ChannelType& chType, const Position& posLT );
721
inline int  isAboveAvailable      ( const CodingUnit &cu, const ChannelType& chType, const Position& posLT, const uint32_t numUnits, const uint32_t unitWidth, bool *validFlags );
722
inline int  isLeftAvailable       ( const CodingUnit &cu, const ChannelType& chType, const Position& posLT, const uint32_t numUnits, const uint32_t unitWidth, bool *validFlags );
723
inline int  isAboveRightAvailable ( const CodingUnit &cu, const ChannelType& chType, const Position& posRT, const uint32_t numUnits, const uint32_t unitHeight, bool *validFlags );
724
inline int  isBelowLeftAvailable  ( const CodingUnit &cu, const ChannelType& chType, const Position& posLB, const uint32_t numUnits, const uint32_t unitHeight, bool *validFlags );
725
726
void IntraPrediction::initIntraPatternChType(const CodingUnit &cu, const CompArea& area, const bool forceRefFilterFlag)
727
731k
{
728
731k
  const CodingStructure& cs   = *cu.cs;
729
730
731k
  if (!forceRefFilterFlag)
731
680k
  {
732
680k
    initPredIntraParams(cu, area, *cs.sps);
733
680k
  }
734
735
731k
  Pel *refBufUnfiltered = m_refBuffer[area.compID][PRED_BUF_UNFILTERED];
736
731k
  Pel *refBufFiltered   = m_refBuffer[area.compID][PRED_BUF_FILTERED];
737
738
731k
  setReferenceArrayLengths(area);
739
740
  // ----- Step 1: unfiltered reference samples -----
741
731k
  xFillReferenceSamples( cs.picture->getRecoBuf( area ), refBufUnfiltered, area, cu );
742
  // ----- Step 2: filtered reference samples -----
743
731k
  if( m_ipaParam.refFilterFlag || forceRefFilterFlag )
744
54.6k
  {
745
54.6k
    xFilterReferenceSamples( refBufUnfiltered, refBufFiltered, area, *cs.sps, cu.multiRefIdx );
746
54.6k
  }
747
731k
}
748
749
void IntraPrediction::reset()
750
75.2k
{
751
75.2k
  m_lastCh = MAX_NUM_CH;
752
75.2k
  m_lastArea = Area(0,0,0,0);
753
75.2k
}
754
755
void IntraPrediction::xFillReferenceSamples( const CPelBuf& recoBuf, Pel* refBufUnfiltered, const CompArea& area, const CodingUnit &cu )
756
746k
{
757
746k
  const ChannelType      chType = toChannelType( area.compID );
758
746k
  const CodingStructure &cs     = *cu.cs;
759
746k
  const SPS             &sps    = *cs.sps;
760
746k
  const PreCalcValues   &pcv    = *cs.pcv;
761
762
746k
  const int multiRefIdx         = (area.compID == COMP_Y) ? cu.multiRefIdx : 0;
763
764
746k
  const int  tuWidth            = area.width;
765
746k
  const int  tuHeight           = area.height;
766
746k
  const int  predSize           = m_topRefLength;
767
746k
  const int  predHSize          = m_leftRefLength;
768
746k
  const int predStride = predSize + 1 + multiRefIdx;
769
746k
  m_refBufferStride[area.compID] = predStride;
770
771
746k
  const int  unitWidth          = tuWidth  <= 2 && cu.ispMode && isLuma(area.compID) ? tuWidth  : pcv.minCUSize >> getComponentScaleX(area.compID, sps.chromaFormatIdc);
772
746k
  const int  unitHeight         = tuHeight <= 2 && cu.ispMode && isLuma(area.compID) ? tuHeight : pcv.minCUSize >> getComponentScaleY(area.compID, sps.chromaFormatIdc);
773
774
746k
  const int  totalAboveUnits    = (predSize + (unitWidth - 1)) / unitWidth;
775
746k
  const int  totalLeftUnits     = (predHSize + (unitHeight - 1)) / unitHeight;
776
746k
  const int  totalUnits         = totalAboveUnits + totalLeftUnits + 1; //+1 for top-left
777
778
746k
  if( m_lastArea != area || m_lastCh != chType )
779
75.2k
  {
780
75.2k
    m_lastCh = chType;
781
75.2k
    m_lastArea = area;
782
75.2k
    const int  numAboveUnits      = std::max<int>( tuWidth / unitWidth, 1 );
783
75.2k
    const int  numLeftUnits       = std::max<int>( tuHeight / unitHeight, 1 );
784
75.2k
    const int  numAboveRightUnits = totalAboveUnits - numAboveUnits;
785
75.2k
    const int  numLeftBelowUnits  = totalLeftUnits - numLeftUnits;
786
787
75.2k
    CHECK( numAboveUnits <= 0 || numLeftUnits <= 0 || numAboveRightUnits <= 0 || numLeftBelowUnits <= 0, "Size not supported" );
788
789
    // ----- Step 1: analyze neighborhood -----
790
75.2k
    const Position posLT          = area;
791
75.2k
    const Position posRT          = area.topRight();
792
75.2k
    const Position posLB          = area.bottomLeft();
793
794
75.2k
    m_numIntraNeighbor = 0;
795
796
75.2k
    memset( m_neighborFlags, 0, totalUnits );
797
798
75.2k
    m_neighborFlags[totalLeftUnits] = isAboveLeftAvailable( cu, chType, posLT );
799
75.2k
    m_numIntraNeighbor += m_neighborFlags[totalLeftUnits] ? 1 : 0;
800
75.2k
    m_numIntraNeighbor += isAboveAvailable     ( cu, chType, posLT, numAboveUnits,      unitWidth,  (m_neighborFlags + totalLeftUnits + 1) );
801
75.2k
    m_numIntraNeighbor += isAboveRightAvailable( cu, chType, posRT, numAboveRightUnits, unitWidth,  (m_neighborFlags + totalLeftUnits + 1 + numAboveUnits) );
802
75.2k
    m_numIntraNeighbor += isLeftAvailable      ( cu, chType, posLT, numLeftUnits,       unitHeight, (m_neighborFlags + totalLeftUnits - 1) );
803
75.2k
    m_numIntraNeighbor += isBelowLeftAvailable ( cu, chType, posLB, numLeftBelowUnits,  unitHeight, (m_neighborFlags + totalLeftUnits - 1 - numLeftUnits) );
804
75.2k
  }
805
  // ----- Step 2: fill reference samples (depending on neighborhood) -----
806
807
746k
  const Pel*  srcBuf    = recoBuf.buf;
808
746k
  const int   srcStride = recoBuf.stride;
809
746k
        Pel*  ptrDst    = refBufUnfiltered;
810
746k
  const Pel*  ptrSrc;
811
746k
  const Pel   valueDC   = 1 << (sps.bitDepths[ chType ] - 1);
812
813
814
746k
  if( m_numIntraNeighbor == 0 )
815
241k
  {
816
    // Fill border with DC value
817
7.23M
    for (int j = 0; j <= predSize + multiRefIdx; j++) { ptrDst[j] = valueDC; }
818
6.98M
    for (int i = 0; i <= predHSize + multiRefIdx; i++) { ptrDst[i+predStride] = valueDC; }
819
241k
  }
820
504k
  else if( m_numIntraNeighbor == totalUnits )
821
1.01k
  {
822
    // Fill top-left border and top and top right with rec. samples
823
1.01k
    ptrSrc = srcBuf - (1 + multiRefIdx) * srcStride - (1 + multiRefIdx);
824
81.9k
    for (int j = 0; j <= predSize + multiRefIdx; j++) { ptrDst[j] = ptrSrc[j]; }
825
81.9k
    for (int i = 0; i <= predHSize + multiRefIdx; i++)
826
80.8k
    {
827
80.8k
      ptrDst[i + predStride] = ptrSrc[i * srcStride];
828
80.8k
    }
829
1.01k
  }
830
503k
  else // reference samples are partially available
831
503k
  {
832
    // Fill top-left sample(s) if available
833
503k
    ptrSrc = srcBuf - (1 + multiRefIdx) * srcStride - (1 + multiRefIdx);
834
503k
    ptrDst = refBufUnfiltered;
835
503k
    if (m_neighborFlags[totalLeftUnits])
836
168k
    {
837
168k
      ptrDst[0] = ptrSrc[0];
838
168k
      ptrDst[predStride] = ptrSrc[0];
839
202k
      for (int i = 1; i <= multiRefIdx; i++)
840
33.4k
      {
841
33.4k
        ptrDst[i] = ptrSrc[i];
842
33.4k
        ptrDst[i + predStride] = ptrSrc[i * srcStride];
843
33.4k
      }
844
168k
    }
845
846
    // Fill left & below-left samples if available (downwards)
847
503k
    ptrSrc += (1 + multiRefIdx) * srcStride;
848
503k
    ptrDst += (1 + multiRefIdx) + predStride;
849
7.26M
    for (int unitIdx = totalLeftUnits - 1; unitIdx > 0; unitIdx--)
850
6.76M
    {
851
6.76M
      if (m_neighborFlags[unitIdx])
852
2.93M
      {
853
10.0M
        for (int i = 0; i < unitHeight; i++)
854
7.11M
        {
855
7.11M
          ptrDst[i] = ptrSrc[i*srcStride];
856
7.11M
        }
857
2.93M
      }
858
6.76M
      ptrSrc += unitHeight * srcStride;
859
6.76M
      ptrDst += unitHeight;
860
6.76M
    }
861
    // Fill last below-left sample(s)
862
503k
    if (m_neighborFlags[0])
863
15.1k
    {
864
15.1k
      int lastSample = (predHSize % unitHeight == 0) ? unitHeight : predHSize % unitHeight;
865
60.8k
      for (int i = 0; i < lastSample; i++)
866
45.6k
      {
867
45.6k
        ptrDst[i] = ptrSrc[i*srcStride];
868
45.6k
      }
869
15.1k
    }
870
871
    // Fill above & above-right samples if available (left-to-right)
872
503k
    ptrSrc = srcBuf - srcStride * (1 + multiRefIdx);
873
503k
    ptrDst = refBufUnfiltered + 1 + multiRefIdx;
874
7.36M
    for (int unitIdx = totalLeftUnits + 1; unitIdx < totalUnits - 1; unitIdx++)
875
6.85M
    {
876
6.85M
      if (m_neighborFlags[unitIdx])
877
3.71M
      {
878
3.71M
        memcpy(ptrDst,ptrSrc,unitWidth*sizeof(Pel));
879
3.71M
      }
880
6.85M
      ptrSrc += unitWidth;
881
6.85M
      ptrDst += unitWidth;
882
6.85M
    }
883
    // Fill last above-right sample(s)
884
503k
    if (m_neighborFlags[totalUnits - 1])
885
57.9k
    {
886
57.9k
      int lastSample = (predSize % unitWidth == 0) ? unitWidth : predSize % unitWidth;
887
57.9k
      memcpy(ptrDst,ptrSrc,lastSample*sizeof(Pel));
888
57.9k
    }
889
890
    // pad from first available down to the last below-left
891
503k
    ptrDst = refBufUnfiltered;
892
503k
    int lastAvailUnit = 0;
893
503k
    if (!m_neighborFlags[0])
894
488k
    {
895
488k
      int firstAvailUnit = 1;
896
4.49M
      while (firstAvailUnit < totalUnits && !m_neighborFlags[firstAvailUnit])
897
4.01M
      {
898
4.01M
        firstAvailUnit++;
899
4.01M
      }
900
901
      // first available sample
902
488k
      int firstAvailRow = -1;
903
488k
      int firstAvailCol = 0;
904
488k
      if (firstAvailUnit < totalLeftUnits)
905
304k
      {
906
304k
        firstAvailRow = (totalLeftUnits - firstAvailUnit) * unitHeight + multiRefIdx;
907
304k
      }
908
183k
      else if (firstAvailUnit == totalLeftUnits)
909
0
      {
910
0
        firstAvailRow = multiRefIdx;
911
0
      }
912
183k
      else
913
183k
      {
914
183k
        firstAvailCol = (firstAvailUnit - totalLeftUnits - 1) * unitWidth + 1 + multiRefIdx;
915
183k
      }
916
488k
      const Pel firstAvailSample = ptrDst[firstAvailRow < 0 ? firstAvailCol : firstAvailRow + predStride];
917
918
      // last sample below-left (n.a.)
919
488k
      int lastRow = predHSize + multiRefIdx;
920
921
      // fill left column
922
11.0M
      for (int i = lastRow; i > firstAvailRow; i--)
923
10.5M
      {
924
10.5M
        ptrDst[i + predStride] = firstAvailSample;
925
10.5M
      }
926
      // fill top row
927
488k
      if (firstAvailCol > 0)
928
183k
      {
929
377k
        for (int j = 0; j < firstAvailCol; j++)
930
193k
        {
931
193k
          ptrDst[j] = firstAvailSample;
932
193k
        }
933
183k
      }
934
488k
      lastAvailUnit = firstAvailUnit;
935
488k
    }
936
937
    // pad all other reference samples.
938
503k
    int currUnit = lastAvailUnit + 1;
939
10.6M
    while (currUnit < totalUnits)
940
10.1M
    {
941
10.1M
      if (!m_neighborFlags[currUnit]) // samples not available
942
3.73M
      {
943
        // last available sample
944
3.73M
        int lastAvailRow = -1;
945
3.73M
        int lastAvailCol = 0;
946
3.73M
        if (lastAvailUnit < totalLeftUnits)
947
151k
        {
948
151k
          lastAvailRow = (totalLeftUnits - lastAvailUnit - 1) * unitHeight + multiRefIdx + 1;
949
151k
        }
950
3.58M
        else if (lastAvailUnit == totalLeftUnits)
951
151k
        {
952
151k
          lastAvailCol = multiRefIdx;
953
151k
        }
954
3.43M
        else
955
3.43M
        {
956
3.43M
          lastAvailCol = (lastAvailUnit - totalLeftUnits) * unitWidth + multiRefIdx;
957
3.43M
        }
958
3.73M
        const Pel lastAvailSample = ptrDst[lastAvailRow < 0 ? lastAvailCol : lastAvailRow + predStride];
959
960
        // fill current unit with last available sample
961
3.73M
        if (currUnit < totalLeftUnits)
962
0
        {
963
0
          for (int i = lastAvailRow - 1; i >= lastAvailRow - unitHeight; i--)
964
0
          {
965
0
            ptrDst[i + predStride] = lastAvailSample;
966
0
          }
967
0
        }
968
3.73M
        else if (currUnit == totalLeftUnits)
969
151k
        {
970
302k
          for (int i = 0; i < multiRefIdx + 1; i++)
971
151k
          {
972
151k
            ptrDst[i + predStride] = lastAvailSample;
973
151k
          }
974
302k
          for (int j = 0; j < multiRefIdx + 1; j++)
975
151k
          {
976
151k
            ptrDst[j] = lastAvailSample;
977
151k
          }
978
151k
        }
979
3.58M
        else
980
3.58M
        {
981
3.58M
          int numSamplesInUnit = (currUnit == totalUnits - 1) ? ((predSize % unitWidth == 0) ? unitWidth : predSize % unitWidth) : unitWidth;
982
11.7M
          for (int j = lastAvailCol + 1; j <= lastAvailCol + numSamplesInUnit; j++)
983
8.18M
          {
984
8.18M
            ptrDst[j] = lastAvailSample;
985
8.18M
          }
986
3.58M
        }
987
3.73M
      }
988
10.1M
      lastAvailUnit = currUnit;
989
10.1M
      currUnit++;
990
10.1M
    }
991
503k
  }
992
746k
}
993
994
void IntraPrediction::xFilterReferenceSamples( const Pel* refBufUnfiltered, Pel* refBufFiltered, const CompArea& area, const SPS &sps
995
  , int multiRefIdx
996
  , int stride
997
)
998
54.6k
{
999
54.6k
  if (area.compID != COMP_Y)
1000
0
  {
1001
0
    multiRefIdx = 0;
1002
0
  }
1003
54.6k
  const int predSize = m_topRefLength + multiRefIdx;
1004
54.6k
  const int predHSize = m_leftRefLength + multiRefIdx;
1005
54.6k
  const int predStride = stride == 0 ? predSize + 1 : stride;
1006
1007
1008
54.6k
  const Pel topLeft =
1009
54.6k
    (refBufUnfiltered[0] + refBufUnfiltered[1] + refBufUnfiltered[predStride] + refBufUnfiltered[predStride + 1] + 2)
1010
54.6k
    >> 2;
1011
1012
54.6k
  refBufFiltered[0] = topLeft;
1013
1014
3.10M
  for (int i = 1; i < predSize; i++)
1015
3.05M
  {
1016
3.05M
    refBufFiltered[i] = (refBufUnfiltered[i - 1] + 2 * refBufUnfiltered[i] + refBufUnfiltered[i + 1] + 2) >> 2;
1017
3.05M
  }
1018
54.6k
  refBufFiltered[predSize] = refBufUnfiltered[predSize];
1019
1020
54.6k
  refBufFiltered += predStride;
1021
54.6k
  refBufUnfiltered += predStride;
1022
1023
54.6k
  refBufFiltered[0] = topLeft;
1024
1025
3.08M
  for (int i = 1; i < predHSize; i++)
1026
3.02M
  {
1027
3.02M
    refBufFiltered[i] = (refBufUnfiltered[i - 1] + 2 * refBufUnfiltered[i] + refBufUnfiltered[i + 1] + 2) >> 2;
1028
3.02M
  }
1029
54.6k
  refBufFiltered[predHSize] = refBufUnfiltered[predHSize];
1030
54.6k
}
1031
1032
bool isAboveLeftAvailable(const CodingUnit &cu, const ChannelType& chType, const Position& posLT)
1033
75.2k
{
1034
75.2k
  const CodingStructure& cs = *cu.cs;
1035
75.2k
  const Position refPos = posLT.offset(-1, -1);
1036
1037
75.2k
  return (cs.getCURestricted(refPos, cu, chType) != NULL);
1038
75.2k
}
1039
1040
int isAboveAvailable(const CodingUnit &cu, const ChannelType& chType, const Position& posLT, const uint32_t numUnits, const uint32_t unitWidth, bool *bValidFlags)
1041
332k
{
1042
332k
  const CodingStructure& cs = *cu.cs;
1043
1044
332k
  bool *    validFlags  = bValidFlags;
1045
332k
  int       numIntra    = 0;
1046
332k
  const int maxDx       = numUnits * unitWidth;
1047
332k
  unsigned  checkPosX   = 0;
1048
332k
  bool      valid       = false;
1049
1050
1.39M
  for (int dx = 0; dx < maxDx; dx += unitWidth)
1051
1.26M
  {
1052
1.26M
    if( dx >= checkPosX )
1053
334k
    {
1054
334k
      const Position refPos = posLT.offset(dx, -1);
1055
1056
334k
      const CodingUnit* cuN = cs.getCURestricted(refPos, cu, chType);
1057
334k
      valid = (cuN != NULL);
1058
334k
      if( cuN ) checkPosX = chType == CH_C ? (cuN->Cb().x + cuN->Cb().width - posLT.x) : (cuN->Y().x + cuN->Y().width - posLT.x);
1059
199k
      else break;
1060
334k
    }
1061
1062
1.06M
    numIntra += valid ? 1 : 0;
1063
1.06M
    *validFlags = valid;
1064
1065
1.06M
    validFlags++;
1066
1.06M
  }
1067
1068
332k
  return numIntra;
1069
332k
}
1070
1071
int isLeftAvailable(const CodingUnit &cu, const ChannelType& chType, const Position& posLT, const uint32_t numUnits, const uint32_t unitHeight, bool *bValidFlags)
1072
332k
{
1073
332k
  const CodingStructure& cs = *cu.cs;
1074
1075
332k
  bool *    validFlags = bValidFlags;
1076
332k
  int       numIntra   = 0;
1077
332k
  const int maxDy      = numUnits * unitHeight;
1078
332k
  unsigned checkPosY   = 0;
1079
332k
  bool     valid       = false;
1080
1081
1.30M
  for (int dy = 0; dy < maxDy; dy += unitHeight)
1082
1.18M
  {
1083
1.18M
    if( dy >= checkPosY )
1084
335k
    {
1085
335k
      const Position refPos = posLT.offset(-1, dy);
1086
1087
335k
      const CodingUnit* cuN = cs.getCURestricted(refPos, cu, chType);
1088
335k
      valid = (cuN != NULL);
1089
335k
      if( cuN ) checkPosY = chType == CH_C ? (cuN->Cb().y + cuN->Cb().height - posLT.y) : (cuN->Y().y + cuN->Y().height - posLT.y);
1090
209k
      else break;
1091
335k
    }
1092
1093
970k
    numIntra += valid ? 1 : 0;
1094
970k
    *validFlags = valid;
1095
1096
970k
    validFlags--;
1097
970k
  }
1098
1099
332k
  return numIntra;
1100
332k
}
1101
1102
int isAboveRightAvailable(const CodingUnit &cu, const ChannelType& chType, const Position& posRT, const uint32_t numUnits, const uint32_t unitWidth, bool *bValidFlags )
1103
168k
{
1104
168k
  const CodingStructure& cs = *cu.cs;
1105
1106
168k
  bool *    validFlags = bValidFlags;
1107
168k
  int       numIntra   = 0;
1108
168k
  const int maxDx      = numUnits * unitWidth;
1109
168k
  unsigned  checkPosX   = 0;
1110
168k
  bool      valid       = false;
1111
1112
534k
  for (int dx = 0; dx < maxDx; dx += unitWidth)
1113
511k
  {
1114
511k
    if( dx >= checkPosX )
1115
196k
    {
1116
196k
      const Position refPos = posRT.offset(unitWidth + dx, -1);
1117
1118
196k
      const CodingUnit* cuN = cs.getCURestricted(refPos, cu, chType);
1119
196k
      valid = (cuN != NULL);
1120
196k
      if(cuN) checkPosX = chType == CH_C ? (cuN->Cb().x + cuN->Cb().width - (posRT.x + unitWidth)) : (cuN->Y().x + cuN->Y().width - (posRT.x + unitWidth));
1121
145k
      else break;
1122
196k
    }
1123
1124
365k
    numIntra += valid ? 1 : 0;
1125
365k
    *validFlags = valid;
1126
1127
365k
    validFlags++;
1128
365k
  }
1129
1130
168k
  return numIntra;
1131
168k
}
1132
1133
int isBelowLeftAvailable(const CodingUnit &cu, const ChannelType& chType, const Position& posLB, const uint32_t numUnits, const uint32_t unitHeight, bool *bValidFlags )
1134
160k
{
1135
160k
  const CodingStructure& cs = *cu.cs;
1136
1137
160k
  bool *    validFlags = bValidFlags;
1138
160k
  int       numIntra   = 0;
1139
160k
  const int maxDy      = numUnits * unitHeight;
1140
160k
  unsigned  checkPosY   = 0;
1141
160k
  bool      valid       = false;
1142
1143
282k
  for (int dy = 0; dy < maxDy; dy += unitHeight)
1144
276k
  {
1145
276k
    if( dy >= checkPosY )
1146
176k
    {
1147
176k
      const Position refPos = posLB.offset(-1, unitHeight + dy);
1148
1149
176k
      const CodingUnit* cuN = cs.getCURestricted(refPos, cu, chType);
1150
176k
      valid = (cuN != NULL);
1151
176k
      if( cuN ) checkPosY = chType == CH_C ? (cuN->Cb().y + cuN->Cb().height - (posLB.y + unitHeight)) : (cuN->Y().y + cuN->Y().height - (posLB.y + unitHeight));
1152
154k
      else break;
1153
176k
    }
1154
1155
121k
    numIntra += valid ? 1 : 0;
1156
121k
    *validFlags = valid;
1157
1158
121k
    validFlags--;
1159
121k
  }
1160
1161
160k
  return numIntra;
1162
160k
}
1163
1164
// LumaRecPixels
1165
void IntraPrediction::loadLMLumaRecPels(const CodingUnit& cu, const CompArea& chromaArea )
1166
71.1k
{
1167
71.1k
  int iDstStride = 2 * MAX_TB_SIZEY + 1;
1168
71.1k
  Pel* pDst0 = m_pMdlmTemp + iDstStride + 1;
1169
  //assert 420 chroma subsampling
1170
71.1k
  CompArea lumaArea = CompArea( COMP_Y, cu.chromaFormat, chromaArea.lumaPos(), recalcSize( cu.chromaFormat, CH_C, CH_L, chromaArea.size() ) );//needed for correct pos/size (4x4 Tus)
1171
1172
71.1k
  CHECK(lumaArea.width == chromaArea.width && CHROMA_444 != cu.chromaFormat, "");
1173
71.1k
  CHECK(lumaArea.height == chromaArea.height && CHROMA_444 != cu.chromaFormat && CHROMA_422 != cu.chromaFormat, "");
1174
1175
71.1k
  const SizeType uiCWidth = chromaArea.width;
1176
71.1k
  const SizeType uiCHeight = chromaArea.height;
1177
1178
71.1k
  const CPelBuf Src = cu.cs->picture->getRecoBuf( lumaArea );
1179
71.1k
  Pel const* pRecSrc0   = Src.bufAt( 0, 0 );
1180
71.1k
  int iRecStride        = Src.stride;
1181
71.1k
  int logSubWidthC  = getChannelTypeScaleX(CH_C, cu.chromaFormat);
1182
71.1k
  int logSubHeightC = getChannelTypeScaleY(CH_C, cu.chromaFormat);
1183
1184
71.1k
  int iRecStride2       = iRecStride << logSubHeightC;
1185
1186
71.1k
  const CompArea& area = isChroma( cu.chType ) ? chromaArea : lumaArea;
1187
1188
71.1k
  const uint32_t uiTuWidth  = area.width;
1189
71.1k
  const uint32_t uiTuHeight = area.height;
1190
1191
71.1k
  const int  unitWidthLog2  = MIN_CU_LOG2 - getComponentScaleX( area.compID, area.chromaFormat );
1192
71.1k
  const int  unitHeightLog2 = MIN_CU_LOG2 - getComponentScaleY( area.compID, area.chromaFormat );
1193
71.1k
  const int  unitWidth  = 1<<unitWidthLog2;
1194
71.1k
  const int  unitHeight = 1<<unitHeightLog2;
1195
1196
71.1k
  const int  iTUWidthInUnits  = uiTuWidth >> unitWidthLog2;
1197
71.1k
  const int  iTUHeightInUnits = uiTuHeight >> unitHeightLog2;
1198
71.1k
  const int  iAboveUnits      = iTUWidthInUnits;
1199
71.1k
  const int  iLeftUnits       = iTUHeightInUnits;
1200
1201
71.1k
  const int  chromaUnitWidthLog2  = MIN_CU_LOG2 - logSubWidthC;
1202
71.1k
  const int  chromaUnitHeightLog2 = MIN_CU_LOG2 - logSubHeightC;
1203
71.1k
  const int  chromaUnitWidth = 1<<chromaUnitWidthLog2;
1204
71.1k
  const int  chromaUnitHeight = 1<<chromaUnitHeightLog2;
1205
71.1k
  const int  topTemplateSampNum = 2 * uiCWidth; // for MDLM, the number of template samples is 2W or 2H.
1206
71.1k
  const int  leftTemplateSampNum = 2 * uiCHeight;
1207
71.1k
  const int  totalAboveUnits = (topTemplateSampNum + (chromaUnitWidth - 1)) >> chromaUnitWidthLog2;
1208
71.1k
  const int  totalLeftUnits = (leftTemplateSampNum + (chromaUnitHeight - 1)) >> chromaUnitHeightLog2;
1209
71.1k
  const int  totalUnits = totalLeftUnits + totalAboveUnits + 1;
1210
71.1k
  const int  aboveRightUnits = totalAboveUnits - iAboveUnits;
1211
71.1k
  const int  leftBelowUnits = totalLeftUnits - iLeftUnits;
1212
1213
71.1k
  int avaiAboveRightUnits = 0;
1214
71.1k
  int avaiLeftBelowUnits = 0;
1215
71.1k
  bool  bNeighborFlags[4 * MAX_NUM_PART_IDXS_IN_CTU_WIDTH + 1];
1216
71.1k
  memset(bNeighborFlags, 0, totalUnits);
1217
71.1k
  bool aboveIsAvailable, leftIsAvailable;
1218
71.1k
  const ChannelType areaCh = toChannelType( area.compID );
1219
1220
71.1k
  int availlableUnit = isLeftAvailable(cu, areaCh, area.pos(), iLeftUnits, unitHeight, (bNeighborFlags + iLeftUnits + leftBelowUnits - 1));
1221
1222
71.1k
  leftIsAvailable = availlableUnit == iTUHeightInUnits;
1223
1224
71.1k
  availlableUnit = isAboveAvailable(cu, areaCh, area.pos(), iAboveUnits, unitWidth, (bNeighborFlags + iLeftUnits + leftBelowUnits + 1));
1225
1226
71.1k
  aboveIsAvailable = availlableUnit == iTUWidthInUnits;
1227
1228
71.1k
  if (leftIsAvailable)   // if left is not available, then the below left is not available
1229
23.4k
  {
1230
23.4k
    avaiLeftBelowUnits = isBelowLeftAvailable(cu, areaCh, area.bottomLeftComp(area.compID), leftBelowUnits, unitHeight, (bNeighborFlags + leftBelowUnits - 1));
1231
23.4k
  }
1232
1233
71.1k
  if (aboveIsAvailable)   // if above is not available, then  the above right is not available.
1234
25.5k
  {
1235
25.5k
    avaiAboveRightUnits = isAboveRightAvailable(cu, areaCh, area.topRightComp(area.compID), aboveRightUnits, unitWidth, (bNeighborFlags + iLeftUnits + leftBelowUnits + iAboveUnits + 1));
1236
25.5k
  }
1237
1238
71.1k
  Pel*       pDst  = nullptr;
1239
71.1k
  Pel const* piSrc = nullptr;
1240
1241
71.1k
  bool isFirstRowOfCtu = (lumaArea.y & ((cu.cs->sps)->CTUSize - 1)) == 0;
1242
1243
71.1k
  if (aboveIsAvailable)
1244
25.5k
  {
1245
25.5k
    pDst  = pDst0    - iDstStride;
1246
25.5k
    int addedAboveRight = 0;
1247
25.5k
    if ((cu.intraDir[1] == MDLM_L_IDX) || (cu.intraDir[1] == MDLM_T_IDX))
1248
22.3k
    {
1249
22.3k
      addedAboveRight = avaiAboveRightUnits*chromaUnitWidth;
1250
22.3k
    }
1251
562k
    for (int i = 0; i < uiCWidth + addedAboveRight; i++)
1252
536k
    {
1253
536k
      const bool leftPadding = i == 0 && !leftIsAvailable;
1254
536k
      if (cu.chromaFormat == CHROMA_444)
1255
0
      {
1256
0
        piSrc = pRecSrc0 - iRecStride;
1257
0
        pDst[i] = piSrc[i];
1258
0
      }
1259
536k
      else if (isFirstRowOfCtu)
1260
118k
      {
1261
118k
        piSrc   = pRecSrc0 - iRecStride;
1262
118k
        pDst[i] = (piSrc[2 * i] * 2 + piSrc[2 * i - (leftPadding ? 0 : 1)] + piSrc[2 * i + 1] + 2) >> 2;
1263
118k
      }
1264
417k
      else if (cu.chromaFormat == CHROMA_422)
1265
0
      {
1266
0
        piSrc = pRecSrc0 - iRecStride2;
1267
1268
0
        int s = 2;
1269
0
        s += piSrc[2 * i] * 2;
1270
0
        s += piSrc[2 * i - (leftPadding ? 0 : 1)];
1271
0
        s += piSrc[2 * i + 1];
1272
0
        pDst[i] = s >> 2;
1273
0
      }
1274
417k
      else if (cu.cs->sps->verCollocatedChroma )
1275
0
      {
1276
0
        piSrc = pRecSrc0 - iRecStride2;
1277
1278
0
        int s = 4;
1279
0
        s += piSrc[2 * i - iRecStride];
1280
0
        s += piSrc[2 * i] * 4;
1281
0
        s += piSrc[2 * i - (leftPadding ? 0 : 1)];
1282
0
        s += piSrc[2 * i + 1];
1283
0
        s += piSrc[2 * i + iRecStride];
1284
0
        pDst[i] = s >> 3;
1285
0
      }
1286
417k
      else
1287
417k
      {
1288
417k
        piSrc = pRecSrc0 - iRecStride2;
1289
417k
        int s = 4;
1290
417k
        s += piSrc[2 * i] * 2;
1291
417k
        s += piSrc[2 * i + 1];
1292
417k
        s += piSrc[2 * i - (leftPadding ? 0 : 1)];
1293
417k
        s += piSrc[2 * i + iRecStride] * 2;
1294
417k
        s += piSrc[2 * i + 1 + iRecStride];
1295
417k
        s += piSrc[2 * i + iRecStride - (leftPadding ? 0 : 1)];
1296
417k
        pDst[i] = s >> 3;
1297
417k
      }
1298
536k
    }
1299
25.5k
  }
1300
1301
71.1k
  if (leftIsAvailable)
1302
23.4k
  {
1303
23.4k
    pDst  = pDst0    - 1;
1304
23.4k
    piSrc = pRecSrc0 - 1 - logSubWidthC;
1305
1306
23.4k
    int addedLeftBelow = 0;
1307
23.4k
    if ((cu.intraDir[1] == MDLM_L_IDX) || (cu.intraDir[1] == MDLM_T_IDX))
1308
20.7k
    {
1309
20.7k
      addedLeftBelow = avaiLeftBelowUnits*chromaUnitHeight;
1310
20.7k
    }
1311
1312
462k
    for (int j = 0; j < uiCHeight + addedLeftBelow; j++)
1313
439k
    {
1314
439k
      if (cu.chromaFormat == CHROMA_444)
1315
0
      {
1316
0
        pDst[0] = piSrc[0];
1317
0
      }
1318
439k
      else if (cu.chromaFormat == CHROMA_422)
1319
0
      {
1320
0
        int s = 2;
1321
0
        s += piSrc[0] * 2;
1322
0
        s += piSrc[-1];
1323
0
        s += piSrc[1];
1324
0
        pDst[0] = s >> 2;
1325
0
      }
1326
439k
      else if (cu.cs->sps->verCollocatedChroma)
1327
0
      {
1328
0
        const bool abovePadding = j == 0 && !aboveIsAvailable;
1329
1330
0
        int s = 4;
1331
0
        s += piSrc[-(abovePadding ? 0 : iRecStride)];
1332
0
        s += piSrc[0] * 4;
1333
0
        s += piSrc[-1];
1334
0
        s += piSrc[1];
1335
0
        s += piSrc[iRecStride];
1336
0
        pDst[0] = s >> 3;
1337
0
      }
1338
439k
      else
1339
439k
      {
1340
439k
        int s = 4;
1341
439k
        s += piSrc[0] * 2;
1342
439k
        s += piSrc[1];
1343
439k
        s += piSrc[-1];
1344
439k
        s += piSrc[iRecStride] * 2;
1345
439k
        s += piSrc[iRecStride + 1];
1346
439k
        s += piSrc[iRecStride - 1];
1347
439k
        pDst[0] = s >> 3;
1348
439k
      }
1349
1350
439k
      piSrc += iRecStride2;
1351
439k
      pDst  += iDstStride;
1352
439k
    }
1353
23.4k
  }
1354
1355
  // inner part from reconstructed picture buffer
1356
971k
  for( int j = 0; j < uiCHeight; j++ )
1357
900k
  {
1358
14.7M
    for( int i = 0; i < uiCWidth; i++ )
1359
13.8M
    {
1360
13.8M
      if (cu.chromaFormat == CHROMA_444)
1361
0
      {
1362
0
        pDst0[i] = pRecSrc0[i];
1363
0
      }
1364
13.8M
      else if (cu.chromaFormat == CHROMA_422)
1365
0
      {
1366
0
        const bool leftPadding  = i == 0 && !leftIsAvailable;
1367
1368
0
        int s = 2;
1369
0
        s += pRecSrc0[2 * i] * 2;
1370
0
        s += pRecSrc0[2 * i - (leftPadding ? 0 : 1)];
1371
0
        s += pRecSrc0[2 * i + 1];
1372
0
        pDst0[i] = s >> 2;
1373
0
      }
1374
13.8M
      else if (cu.cs->sps->verCollocatedChroma)
1375
0
      {
1376
0
        const bool leftPadding  = i == 0 && !leftIsAvailable;
1377
0
        const bool abovePadding = j == 0 && !aboveIsAvailable;
1378
1379
0
        int s = 4;
1380
0
        s += pRecSrc0[2 * i - (abovePadding ? 0 : iRecStride)];
1381
0
        s += pRecSrc0[2 * i] * 4;
1382
0
        s += pRecSrc0[2 * i - (leftPadding ? 0 : 1)];
1383
0
        s += pRecSrc0[2 * i + 1];
1384
0
        s += pRecSrc0[2 * i + iRecStride];
1385
0
        pDst0[i] = s >> 3;
1386
0
      }
1387
13.8M
      else
1388
13.8M
      {
1389
13.8M
        CHECK(cu.chromaFormat != CHROMA_420, "Chroma format must be 4:2:0 for vertical filtering");
1390
13.8M
        const bool leftPadding = i == 0 && !leftIsAvailable;
1391
1392
13.8M
        int s = 4;
1393
13.8M
        s += pRecSrc0[2 * i] * 2;
1394
13.8M
        s += pRecSrc0[2 * i + 1];
1395
13.8M
        s += pRecSrc0[2 * i - (leftPadding ? 0 : 1)];
1396
13.8M
        s += pRecSrc0[2 * i + iRecStride] * 2;
1397
13.8M
        s += pRecSrc0[2 * i + 1 + iRecStride];
1398
13.8M
        s += pRecSrc0[2 * i + iRecStride - (leftPadding ? 0 : 1)];
1399
13.8M
        pDst0[i] = s >> 3;
1400
13.8M
      }
1401
13.8M
    }
1402
1403
900k
    pDst0    += iDstStride;
1404
900k
    pRecSrc0 += iRecStride2;
1405
900k
  }
1406
71.1k
}
1407
1408
void IntraPrediction::xGetLMParameters(const CodingUnit& cu, const ComponentID compID,
1409
                                              const CompArea& chromaArea,
1410
                                              int& a, int& b, int& iShift)
1411
186k
{
1412
186k
  CHECK(compID == COMP_Y, "");
1413
1414
186k
  const SizeType cWidth  = chromaArea.width;
1415
186k
  const SizeType cHeight = chromaArea.height;
1416
1417
186k
  const Position posLT = chromaArea;
1418
1419
186k
  CodingStructure & cs = *(cu.cs);
1420
1421
186k
  const SPS &        sps           = *cs.sps;
1422
186k
  const uint32_t     tuWidth     = chromaArea.width;
1423
186k
  const uint32_t     tuHeight    = chromaArea.height;
1424
186k
  const ChromaFormat nChromaFormat = sps.chromaFormatIdc;
1425
1426
186k
  const int unitWidthLog2    = MIN_CU_LOG2 - getComponentScaleX(chromaArea.compID, nChromaFormat);
1427
186k
  const int unitHeightLog2   = MIN_CU_LOG2 - getComponentScaleY(chromaArea.compID, nChromaFormat);
1428
186k
  const int unitWidth    = 1<<unitWidthLog2;
1429
186k
  const int unitHeight   = 1<<unitHeightLog2;
1430
1431
186k
  const int tuWidthInUnits  = tuWidth >> unitWidthLog2;
1432
186k
  const int tuHeightInUnits = tuHeight >> unitHeightLog2;
1433
186k
  const int aboveUnits      = tuWidthInUnits;
1434
186k
  const int leftUnits       = tuHeightInUnits;
1435
186k
  int topTemplateSampNum = 2 * cWidth; // for MDLM, the template sample number is 2W or 2H;
1436
186k
  int leftTemplateSampNum = 2 * cHeight;
1437
186k
  int totalAboveUnits = (topTemplateSampNum + (unitWidth - 1)) >> unitWidthLog2;
1438
186k
  int totalLeftUnits = (leftTemplateSampNum + (unitHeight - 1)) >> unitHeightLog2;
1439
186k
  int totalUnits = totalLeftUnits + totalAboveUnits + 1;
1440
186k
  int aboveRightUnits = totalAboveUnits - aboveUnits;
1441
186k
  int leftBelowUnits = totalLeftUnits - leftUnits;
1442
186k
  int avaiAboveRightUnits = 0;
1443
186k
  int avaiLeftBelowUnits = 0;
1444
186k
  int avaiAboveUnits = 0;
1445
186k
  int avaiLeftUnits = 0;
1446
1447
186k
  const int curChromaMode = cu.intraDir[1];
1448
186k
  bool neighborFlags[4 * MAX_NUM_PART_IDXS_IN_CTU_WIDTH + 1];
1449
186k
  memset(neighborFlags, 0, totalUnits);
1450
1451
186k
  bool aboveAvailable, leftAvailable;
1452
1453
186k
  int availableUnit = isAboveAvailable(cu, CH_C, posLT, aboveUnits, unitWidth,
1454
186k
    (neighborFlags + leftUnits + leftBelowUnits + 1));
1455
186k
  aboveAvailable = availableUnit == tuWidthInUnits;
1456
1457
186k
  availableUnit = isLeftAvailable(cu, CH_C, posLT, leftUnits, unitHeight,
1458
186k
    (neighborFlags + leftUnits + leftBelowUnits - 1));
1459
186k
  leftAvailable = availableUnit == tuHeightInUnits;
1460
186k
  if (leftAvailable) // if left is not available, then the below left is not available
1461
61.4k
  {
1462
61.4k
    avaiLeftUnits = tuHeightInUnits;
1463
61.4k
    avaiLeftBelowUnits = isBelowLeftAvailable(cu, CH_C, chromaArea.bottomLeftComp(chromaArea.compID), leftBelowUnits, unitHeight, (neighborFlags + leftBelowUnits - 1));
1464
61.4k
  }
1465
186k
  if (aboveAvailable) // if above is not available, then  the above right is not available.
1466
68.0k
  {
1467
68.0k
    avaiAboveUnits = tuWidthInUnits;
1468
68.0k
    avaiAboveRightUnits = isAboveRightAvailable(cu, CH_C, chromaArea.topRightComp(chromaArea.compID), aboveRightUnits, unitWidth, (neighborFlags + leftUnits + leftBelowUnits + aboveUnits + 1));
1469
68.0k
  }
1470
1471
186k
  const int srcStride = 2 * MAX_TB_SIZEY + 1;
1472
186k
  Pel* srcColor0 = m_pMdlmTemp + srcStride + 1;
1473
1474
186k
  Pel* curChroma0 = getPredictorPtr(compID);
1475
1476
186k
  unsigned internalBitDepth = sps.bitDepths[CH_C];
1477
1478
186k
  int minLuma[2] = {  MAX_INT, 0 };
1479
186k
  int maxLuma[2] = { -MAX_INT, 0 };
1480
1481
186k
  Pel* src = srcColor0 - srcStride;
1482
186k
  int actualTopTemplateSampNum = 0;
1483
186k
  int actualLeftTemplateSampNum = 0;
1484
186k
  if (curChromaMode == MDLM_T_IDX)
1485
74.2k
  {
1486
74.2k
    leftAvailable = 0;
1487
74.2k
    avaiAboveRightUnits = avaiAboveRightUnits > (cHeight>>unitWidthLog2) ?  cHeight>>unitWidthLog2 : avaiAboveRightUnits;
1488
74.2k
    actualTopTemplateSampNum = unitWidth*(avaiAboveUnits + avaiAboveRightUnits);
1489
74.2k
  }
1490
112k
  else if (curChromaMode == MDLM_L_IDX)
1491
74.2k
  {
1492
74.2k
    aboveAvailable = 0;
1493
74.2k
    avaiLeftBelowUnits = avaiLeftBelowUnits > (cWidth>>unitHeightLog2) ? cWidth>>unitHeightLog2 : avaiLeftBelowUnits;
1494
74.2k
    actualLeftTemplateSampNum = unitHeight*(avaiLeftUnits + avaiLeftBelowUnits);
1495
74.2k
  }
1496
37.9k
  else if (curChromaMode == LM_CHROMA_IDX)
1497
37.9k
  {
1498
37.9k
    actualTopTemplateSampNum = cWidth;
1499
37.9k
    actualLeftTemplateSampNum = cHeight;
1500
37.9k
  }
1501
186k
  int startPos[2]; //0:Above, 1: Left
1502
186k
  int pickStep[2];
1503
1504
186k
  int aboveIs4 = leftAvailable  ? 0 : 1;
1505
186k
  int leftIs4 =  aboveAvailable ? 0 : 1;
1506
1507
186k
  startPos[0] = actualTopTemplateSampNum >> (2 + aboveIs4);
1508
186k
  pickStep[0] = std::max(1, actualTopTemplateSampNum >> (1 + aboveIs4));
1509
1510
186k
  startPos[1] = actualLeftTemplateSampNum >> (2 + leftIs4);
1511
186k
  pickStep[1] = std::max(1, actualLeftTemplateSampNum >> (1 + leftIs4));
1512
1513
186k
  Pel selectLumaPix[4] = { 0, 0, 0, 0 };
1514
186k
  Pel selectChromaPix[4] = { 0, 0, 0, 0 };
1515
1516
186k
  int cntT, cntL;
1517
186k
  cntT = cntL = 0;
1518
186k
  int cnt = 0;
1519
186k
  if (aboveAvailable)
1520
37.2k
  {
1521
37.2k
    cntT = std::min(actualTopTemplateSampNum, (1 + aboveIs4) << 1);
1522
37.2k
    src = srcColor0 - srcStride;
1523
37.2k
    const Pel *cur = curChroma0 + 1;
1524
184k
    for (int pos = startPos[0]; cnt < cntT; pos += pickStep[0], cnt++)
1525
147k
    {
1526
147k
      selectLumaPix[cnt] = src[pos];
1527
147k
      selectChromaPix[cnt] = cur[pos];
1528
147k
    }
1529
37.2k
  }
1530
1531
186k
  if (leftAvailable)
1532
33.4k
  {
1533
33.4k
    cntL = std::min(actualLeftTemplateSampNum, ( 1 + leftIs4 ) << 1 );
1534
33.4k
    src = srcColor0 - 1;
1535
33.4k
    const Pel *cur = curChroma0 + m_refBufferStride[compID] + 1;
1536
164k
    for (int pos = startPos[1], cnt = 0; cnt < cntL; pos += pickStep[1], cnt++)
1537
131k
    {
1538
131k
      selectLumaPix[cnt + cntT] = src[pos * srcStride];
1539
131k
      selectChromaPix[cnt + cntT] = cur[pos];
1540
131k
    }
1541
33.4k
  }
1542
186k
  cnt = cntL + cntT;
1543
1544
186k
  if (cnt == 2)
1545
16
  {
1546
16
    selectLumaPix[3] = selectLumaPix[0]; selectChromaPix[3] = selectChromaPix[0];
1547
16
    selectLumaPix[2] = selectLumaPix[1]; selectChromaPix[2] = selectChromaPix[1];
1548
16
    selectLumaPix[0] = selectLumaPix[1]; selectChromaPix[0] = selectChromaPix[1];
1549
16
    selectLumaPix[1] = selectLumaPix[3]; selectChromaPix[1] = selectChromaPix[3];
1550
16
  }
1551
1552
186k
  int minGrpIdx[2] = { 0, 2 };
1553
186k
  int maxGrpIdx[2] = { 1, 3 };
1554
186k
  int *tmpMinGrp = minGrpIdx;
1555
186k
  int *tmpMaxGrp = maxGrpIdx;
1556
186k
  if (selectLumaPix[tmpMinGrp[0]] > selectLumaPix[tmpMinGrp[1]]) std::swap(tmpMinGrp[0], tmpMinGrp[1]);
1557
186k
  if (selectLumaPix[tmpMaxGrp[0]] > selectLumaPix[tmpMaxGrp[1]]) std::swap(tmpMaxGrp[0], tmpMaxGrp[1]);
1558
186k
  if (selectLumaPix[tmpMinGrp[0]] > selectLumaPix[tmpMaxGrp[1]]) std::swap(tmpMinGrp, tmpMaxGrp);
1559
186k
  if (selectLumaPix[tmpMinGrp[1]] > selectLumaPix[tmpMaxGrp[0]]) std::swap(tmpMinGrp[1], tmpMaxGrp[0]);
1560
1561
186k
  minLuma[0] = (selectLumaPix[tmpMinGrp[0]] + selectLumaPix[tmpMinGrp[1]] + 1 )>>1;
1562
186k
  minLuma[1] = (selectChromaPix[tmpMinGrp[0]] + selectChromaPix[tmpMinGrp[1]] + 1) >> 1;
1563
186k
  maxLuma[0] = (selectLumaPix[tmpMaxGrp[0]] + selectLumaPix[tmpMaxGrp[1]] + 1 )>>1;
1564
186k
  maxLuma[1] = (selectChromaPix[tmpMaxGrp[0]] + selectChromaPix[tmpMaxGrp[1]] + 1) >> 1;
1565
1566
186k
  if (leftAvailable || aboveAvailable)
1567
69.6k
  {
1568
69.6k
    int diff = maxLuma[0] - minLuma[0];
1569
69.6k
    if (diff > 0)
1570
490
    {
1571
490
      int diffC = maxLuma[1] - minLuma[1];
1572
490
      int x = floorLog2( diff );
1573
490
      static const uint8_t DivSigTable[1 << 4] = {
1574
        // 4bit significands - 8 ( MSB is omitted )
1575
490
        0,  7,  6,  5,  5,  4,  4,  3,  3,  2,  2,  1,  1,  1,  1,  0
1576
490
      };
1577
490
      int normDiff = (diff << 4 >> x) & 15;
1578
490
      int v = DivSigTable[normDiff] | 8;
1579
490
      x += normDiff != 0;
1580
1581
490
      int y = diffC == 0 ? 0 : floorLog2( abs( diffC ) ) + 1;
1582
490
      int add = 1 << y >> 1;
1583
490
      a = (diffC * v + add) >> y;
1584
490
      iShift = 3 + x - y;
1585
490
      if ( iShift < 1 )
1586
0
      {
1587
0
        iShift = 1;
1588
0
        a = ( (a == 0)? 0: (a < 0)? -15 : 15 );   // a=Sign(a)*15
1589
0
      }
1590
490
      b = minLuma[1] - ((a * minLuma[0]) >> iShift);
1591
490
    }
1592
69.1k
    else
1593
69.1k
    {
1594
69.1k
      a = 0;
1595
69.1k
      b = minLuma[1];
1596
69.1k
      iShift = 0;
1597
69.1k
    }
1598
69.6k
  }
1599
116k
  else
1600
116k
  {
1601
116k
    a = 0;
1602
116k
    b = 1 << (internalBitDepth - 1);
1603
116k
    iShift = 0;
1604
116k
  }
1605
186k
}
1606
1607
void IntraPrediction::initIntraMip( const CodingUnit& cu )
1608
39.4k
{
1609
39.4k
  CHECK( cu.lwidth() > cu.cs->sps->getMaxTbSize() || cu.lheight() > cu.cs->sps->getMaxTbSize(), "Error: block size not supported for MIP" );
1610
1611
  // prepare input (boundary) data for prediction
1612
39.4k
  CHECK(m_ipaParam.refFilterFlag, "ERROR: unfiltered refs expected for MIP");
1613
39.4k
  Pel *ptrSrc = getPredictorPtr(COMP_Y);
1614
39.4k
  const int srcStride  = m_refBufferStride[COMP_Y];
1615
39.4k
  const int srcHStride = 2;
1616
1617
39.4k
  m_matrixIntraPred.prepareInputForPred(CPelBuf(ptrSrc, srcStride, srcHStride), cu.Y(), cu.slice->sps->bitDepths[CH_L]);
1618
39.4k
}
1619
1620
void IntraPrediction::predIntraMip( PelBuf &piPred, const CodingUnit& cu )
1621
235k
{
1622
235k
  CHECK( cu.lwidth() > cu.cs->sps->getMaxTbSize() || cu.lheight() > cu.cs->sps->getMaxTbSize(), "Error: block size not supported for MIP" );
1623
235k
  CHECK( cu.lwidth() != (1 << floorLog2(cu.lwidth())) || cu.lheight() != (1 << floorLog2(cu.lheight())), "Error: expecting blocks of size 2^M x 2^N" );
1624
1625
  // generate mode-specific prediction
1626
235k
  const int bitDepth = cu.slice->sps->bitDepths[CH_L];
1627
1628
235k
  CHECK( cu.lwidth() != piPred.stride, " no support yet" );
1629
 
1630
235k
  m_matrixIntraPred.predBlock(piPred.buf, cu.intraDir[CH_L], cu.mipTransposedFlag, bitDepth);
1631
235k
}
1632
1633
void IntraPrediction::initIntraPatternChTypeISP(const CodingUnit& cu, const CompArea& area, PelBuf& recBuf,
1634
  const bool forceRefFilterFlag)
1635
18.4k
{
1636
18.4k
  const CodingStructure& cs = *cu.cs;
1637
1638
18.4k
  if (!forceRefFilterFlag)
1639
18.4k
  {
1640
18.4k
    initPredIntraParams(cu, area, *cs.sps);
1641
18.4k
  }
1642
1643
18.4k
  const Position posLT = area;
1644
18.4k
  bool           isLeftAvail =
1645
18.4k
    (cs.getCURestricted(posLT.offset(-1, 0), cu, CH_L) != NULL);
1646
18.4k
  bool isAboveAvail =
1647
18.4k
    (cs.getCURestricted(posLT.offset(0, -1), cu, CH_L) != NULL);
1648
  // ----- Step 1: unfiltered reference samples -----
1649
18.4k
  if (cu.blocks[area.compID].x == area.x && cu.blocks[area.compID].y == area.y)
1650
14.3k
  {
1651
14.3k
    Pel* refBufUnfiltered = m_refBuffer[area.compID][PRED_BUF_UNFILTERED];
1652
    // With the first subpartition all the CU reference samples are fetched at once in a single call to
1653
    // xFillReferenceSamples
1654
14.3k
    if (cu.ispMode == HOR_INTRA_SUBPARTITIONS)
1655
6.85k
    {
1656
6.85k
      m_leftRefLength = cu.Y().height << 1;
1657
6.85k
      m_topRefLength = cu.Y().width + area.width;
1658
6.85k
    }
1659
7.44k
    else   // if (cu.ispMode == VER_INTRA_SUBPARTITIONS)
1660
7.44k
    {
1661
7.44k
      m_leftRefLength = cu.Y().height + area.height;
1662
7.44k
      m_topRefLength = cu.Y().width << 1;
1663
7.44k
    }
1664
1665
14.3k
    xFillReferenceSamples(cs.picture->getRecoBuf(cu.Y()), refBufUnfiltered, cu.Y(), cu);
1666
1667
    // After having retrieved all the CU reference samples, the number of reference samples is now adjusted for the
1668
    // current subpartition
1669
14.3k
    m_topRefLength = cu.blocks[area.compID].width + area.width;
1670
14.3k
    m_leftRefLength = cu.blocks[area.compID].height + area.height;
1671
14.3k
  }
1672
4.13k
  else
1673
4.13k
  {
1674
4.13k
    m_topRefLength = cu.blocks[area.compID].width + area.width;
1675
4.13k
    m_leftRefLength = cu.blocks[area.compID].height + area.height;
1676
1677
4.13k
    const int predSizeHor = m_topRefLength;
1678
4.13k
    const int predSizeVer = m_leftRefLength;
1679
4.13k
    if (cu.ispMode == HOR_INTRA_SUBPARTITIONS)
1680
3.14k
    {
1681
3.14k
      Pel* src = recBuf.bufAt(0, -1);
1682
3.14k
      Pel* ref = m_refBuffer[area.compID][PRED_BUF_UNFILTERED] + m_refBufferStride[area.compID];
1683
3.14k
      if (isLeftAvail)
1684
0
      {
1685
0
        for (int i = 0; i <= 2 * cu.blocks[area.compID].height - area.height; i++)
1686
0
        {
1687
0
          ref[i] = ref[i + area.height];
1688
0
        }
1689
0
      }
1690
3.14k
      else
1691
3.14k
      {
1692
64.4k
        for (int i = 0; i <= predSizeVer; i++)
1693
61.3k
        {
1694
61.3k
          ref[i] = src[0];
1695
61.3k
        }
1696
3.14k
      }
1697
3.14k
      Pel* dst = m_refBuffer[area.compID][PRED_BUF_UNFILTERED] + 1;
1698
3.14k
      dst[-1] = ref[0];
1699
64.5k
      for (int i = 0; i < area.width; i++)
1700
61.4k
      {
1701
61.4k
        dst[i] = src[i];
1702
61.4k
      }
1703
3.14k
      Pel sample = src[area.width - 1];
1704
3.14k
      dst += area.width;
1705
64.5k
      for (int i = 0; i < predSizeHor - area.width; i++)
1706
61.4k
      {
1707
61.4k
        dst[i] = sample;
1708
61.4k
      }
1709
3.14k
    }
1710
987
    else
1711
987
    {
1712
987
      Pel* src = recBuf.bufAt(-1, 0);
1713
987
      Pel* ref = m_refBuffer[area.compID][PRED_BUF_UNFILTERED];
1714
987
      if (isAboveAvail)
1715
0
      {
1716
0
        for (int i = 0; i <= 2 * cu.blocks[area.compID].width - area.width; i++)
1717
0
        {
1718
0
          ref[i] = ref[i + area.width];
1719
0
        }
1720
0
      }
1721
987
      else
1722
987
      {
1723
21.7k
        for (int i = 0; i <= predSizeHor; i++)
1724
20.7k
        {
1725
20.7k
          ref[i] = src[0];
1726
20.7k
        }
1727
987
      }
1728
987
      Pel* dst = m_refBuffer[area.compID][PRED_BUF_UNFILTERED] + m_refBufferStride[area.compID] + 1;
1729
987
      dst[-1] = ref[0];
1730
22.0k
      for (int i = 0; i < area.height; i++)
1731
21.0k
      {
1732
21.0k
        *dst = *src;
1733
21.0k
        src += recBuf.stride;
1734
21.0k
        dst++;
1735
21.0k
      }
1736
987
      Pel sample = src[-recBuf.stride];
1737
22.0k
      for (int i = 0; i < predSizeVer - area.height; i++)
1738
21.0k
      {
1739
21.0k
        *dst = sample;
1740
21.0k
        dst++;
1741
21.0k
      }
1742
987
    }
1743
4.13k
  }
1744
  // ----- Step 2: filtered reference samples -----
1745
18.4k
  if (m_ipaParam.refFilterFlag || forceRefFilterFlag)
1746
0
  {
1747
0
    Pel* refBufUnfiltered = m_refBuffer[area.compID][PRED_BUF_UNFILTERED];
1748
0
    Pel* refBufFiltered = m_refBuffer[area.compID][PRED_BUF_FILTERED];
1749
0
    xFilterReferenceSamples(refBufUnfiltered, refBufFiltered, area, *cs.sps, cu.multiRefIdx);
1750
0
  }
1751
18.4k
}
1752
1753
void IntraPrediction::setReferenceArrayLengths(const CompArea& area)
1754
731k
{
1755
  // set Top and Left reference samples length
1756
731k
  const int width = area.width;
1757
731k
  const int height = area.height;
1758
1759
731k
  m_leftRefLength = (height << 1);
1760
731k
  m_topRefLength = (width << 1);
1761
731k
}
1762
1763
} // namespace vvenc
1764
1765
//! \}
1766