Coverage Report

Created: 2026-06-16 07:20

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/vvdec/source/Lib/CommonLib/IntraPrediction.cpp
Line
Count
Source
1
/* -----------------------------------------------------------------------------
2
The copyright in this software is being made available under the Clear BSD
3
License, included below. No patent rights, trademark rights and/or 
4
other Intellectual Property Rights other than the copyrights concerning 
5
the Software are granted under this license.
6
7
The Clear BSD License
8
9
Copyright (c) 2018-2026, Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. & The VVdeC Authors.
10
All rights reserved.
11
12
Redistribution and use in source and binary forms, with or without modification,
13
are permitted (subject to the limitations in the disclaimer below) provided that
14
the following conditions are met:
15
16
     * Redistributions of source code must retain the above copyright notice,
17
     this list of conditions and the following disclaimer.
18
19
     * Redistributions in binary form must reproduce the above copyright
20
     notice, this list of conditions and the following disclaimer in the
21
     documentation and/or other materials provided with the distribution.
22
23
     * Neither the name of the copyright holder nor the names of its
24
     contributors may be used to endorse or promote products derived from this
25
     software without specific prior written permission.
26
27
NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
28
THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
29
CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
31
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
32
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
33
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
34
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
35
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
36
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38
POSSIBILITY OF SUCH DAMAGE.
39
40
41
------------------------------------------------------------------------------------------- */
42
43
/** \file     Prediction.cpp
44
    \brief    prediction class
45
*/
46
47
#define DONT_UNDEF_SIZE_AWARE_PER_EL_OP
48
49
#include "IntraPrediction.h"
50
51
#include "Unit.h"
52
#include "UnitTools.h"
53
54
#include "Buffer.h"
55
56
#include "dtrace_next.h"
57
#include "Rom.h"
58
59
#include <memory.h>
60
#include <array>
61
62
#include "CommonLib/InterpolationFilter.h"
63
#include "CommonLib/TimeProfiler.h"
64
65
namespace vvdec
66
{
67
68
// ====================================================================================================================
69
// Tables
70
// ====================================================================================================================
71
72
const uint8_t IntraPrediction::m_aucIntraFilter[MAX_NUM_CHANNEL_TYPE][MAX_INTRA_FILTER_DEPTHS] =
73
{
74
  { // Luma
75
    24, //   1xn
76
    24, //   2xn
77
    24, //   4xn
78
    14, //   8xn
79
    2,  //  16xn
80
    0,  //  32xn
81
    0,  //  64xn
82
    0,  // 128xn
83
  },
84
  { // Chroma
85
    40, //   1xn
86
    40, //   2xn
87
    40, //   4xn
88
    28, //   8xn
89
    4,  //  16xn
90
    0,  //  32xn
91
    0,  //  64xn
92
    0,  // 128xn
93
  }
94
};
95
96
const TFilterCoeff g_intraGaussFilter[32][4] = {
97
  { 16, 32, 16,  0 },
98
  { 16, 32, 16,  0 },
99
  { 15, 31, 17,  1 },
100
  { 15, 31, 17,  1 },
101
  { 14, 30, 18,  2 },
102
  { 14, 30, 18,  2 },
103
  { 13, 29, 19,  3 },
104
  { 13, 29, 19,  3 },
105
  { 12, 28, 20,  4 },
106
  { 12, 28, 20,  4 },
107
  { 11, 27, 21,  5 },
108
  { 11, 27, 21,  5 },
109
  { 10, 26, 22,  6 },
110
  { 10, 26, 22,  6 },
111
  {  9, 25, 23,  7 },
112
  {  9, 25, 23,  7 },
113
  {  8, 24, 24,  8 },
114
  {  8, 24, 24,  8 },
115
  {  7, 23, 25,  9 },
116
  {  7, 23, 25,  9 },
117
  {  6, 22, 26, 10 },
118
  {  6, 22, 26, 10 },
119
  {  5, 21, 27, 11 },
120
  {  5, 21, 27, 11 },
121
  {  4, 20, 28, 12 },
122
  {  4, 20, 28, 12 },
123
  {  3, 19, 29, 13 },
124
  {  3, 19, 29, 13 },
125
  {  2, 18, 30, 14 },
126
  {  2, 18, 30, 14 },
127
  {  1, 17, 31, 15 },
128
  {  1, 17, 31, 15 },
129
};
130
131
void GetLumaRecPixel420Core (const int width,const int height, const Pel* pRecSrc0,const ptrdiff_t iRecStride,Pel* pDst0,const ptrdiff_t iDstStride)
132
0
{
133
0
  for( int y = 0; y < height; y++ )
134
0
    {
135
0
      for( int x = 0; x < width; x ++ )
136
0
      {
137
0
        pDst0[x + 0 ] = (   pRecSrc0[( (x + 0 ) << 1 )    ] * 2
138
0
                          + pRecSrc0[( (x + 0 ) << 1 ) + 1] * 1
139
0
                          + pRecSrc0[( (x + 0 ) << 1 ) - 1] * 1
140
0
                          + pRecSrc0[( (x + 0 ) << 1 ) + iRecStride] * 2
141
0
                          + pRecSrc0[( (x + 0 ) << 1 ) + 1 + iRecStride] * 1
142
0
                          + pRecSrc0[( (x + 0 ) << 1 ) - 1 + iRecStride] * 1
143
0
                          + 4 ) >> 3;
144
0
      }
145
0
      pDst0 += iDstStride;
146
0
      pRecSrc0 += (iRecStride<<1);
147
0
    }
148
0
}
149
150
/** Function for deriving planar intra prediction. This function derives the prediction samples for planar mode (intra coding).
151
 */
152
153
//NOTE: Bit-Limit - 24-bit source
154
void xPredIntraPlanarCore( const CPelBuf &pSrc, PelBuf &pDst, const SPS& sps )
155
0
{
156
  // with some optimizations gcc gives spurious "-Wmaybe-uninitialized" warnings here
157
  GCC_WARNING_DISABLE_maybe_uninitialized
158
159
0
  const uint32_t width  = pDst.width;
160
0
  const uint32_t height = pDst.height;
161
0
  const uint32_t log2W  = getLog2( width );
162
0
  const uint32_t log2H  = getLog2( height );
163
0
  int leftColumn[MAX_CU_SIZE + 1], topRow[MAX_CU_SIZE + 1], bottomRow[MAX_CU_SIZE], rightColumn[MAX_CU_SIZE];
164
0
  const uint32_t offset = 1 << (log2W + log2H);
165
  // Get left and above reference column and row
166
0
  for( int k = 0; k < width + 1; k++ )
167
0
  {
168
0
    topRow[k] = pSrc.at( k + 1, 0 );
169
0
  }
170
171
0
  for( int k = 0; k < height + 1; k++ )
172
0
  {
173
0
    leftColumn[k] = pSrc.at( 0, k + 1 );
174
0
  }
175
176
  // Prepare intermediate variables used in interpolation
177
0
  int bottomLeft = leftColumn[height];
178
0
  int topRight = topRow[width];
179
180
0
  for( int k = 0; k < width; k++ )
181
0
  {
182
0
    bottomRow[k] = bottomLeft - topRow[k];
183
0
    topRow[k]    = topRow[k] << log2H;
184
0
  }
185
186
0
  for( int k = 0; k < height; k++ )
187
0
  {
188
0
    rightColumn[k] = topRight - leftColumn[k];
189
0
    leftColumn[k]  = leftColumn[k] << log2W;
190
0
  }
191
192
0
  const uint32_t finalShift = 1 + log2W + log2H;
193
0
  const ptrdiff_t stride     = pDst.stride;
194
0
  Pel*       pred       = pDst.buf;
195
0
  for( int y = 0; y < height; y++, pred += stride )
196
0
  {
197
0
    int horPred = leftColumn[y];
198
199
0
    for( int x = 0; x < width; x++ )
200
0
    {
201
0
      horPred += rightColumn[y];
202
0
      topRow[x] += bottomRow[x];
203
204
0
      int vertPred = topRow[x];
205
0
      pred[x]      = ( ( horPred << log2H ) + ( vertPred << log2W ) + offset ) >> finalShift;
206
207
0
    }
208
0
  }
209
0
  GCC_WARNING_RESET
210
0
}
211
212
void  IntraPredSampleFilterCore(Pel *ptrSrc,const ptrdiff_t  srcStride,PelBuf &piPred,const uint32_t uiDirMode,const ClpRng& clpRng)
213
0
{
214
0
  const CPelBuf srcBuf  ( ptrSrc, ( SizeType ) srcStride, ( SizeType ) srcStride );
215
0
  const int     iWidth  = piPred.width;
216
0
  const int     iHeight = piPred.height;
217
0
  PelBuf        dstBuf  = piPred;
218
219
0
  const int scale = ((getLog2(iWidth) - 2 + getLog2(iHeight) - 2 + 2) >> 2);
220
0
  CHECK(scale < 0 || scale > 31, "PDPC: scale < 0 || scale > 31");
221
222
0
#if 1
223
0
  if( uiDirMode == PLANAR_IDX || uiDirMode == DC_IDX )
224
0
  {
225
0
    for( int y = 0; y < iHeight; y++ )
226
0
    {
227
0
      const int wT   = 32 >> std::min(31, ((y << 1) >> scale));
228
//      const Pel left = srcBuf.at(y + 1, 1);
229
0
      const Pel left = srcBuf.at(0, y + 1 );
230
0
      for (int x = 0; x < iWidth; x++)
231
0
      {
232
0
        const int wL    = 32 >> std::min(31, ((x << 1) >> scale));
233
0
        const Pel top   = srcBuf.at(x + 1, 0);
234
0
        const Pel val   = dstBuf.at(x, y);
235
0
        dstBuf.at(x, y) = val + ((wL * (left - val) + wT * (top - val) + 32) >> 6);
236
0
      }
237
0
    }
238
0
  }
239
#else
240
  const int lev[4]={std::min(3,iWidth),std::min(6,iWidth),std::min(12,iWidth),std::min(24,iWidth)};
241
  if (uiDirMode == PLANAR_IDX)
242
  {
243
    for (int y = 0; y < iHeight; y++)
244
    {
245
      int wT = 32 >> std::min(31, ((y << 1) >> scale));
246
      const Pel left = srcBuf.at(0, y + 1);
247
      if (wT)
248
      {
249
        for (int x = 0; x < iWidth; x++)
250
        {
251
          const Pel top = srcBuf.at(x + 1, 0);
252
          int wL = 32 >> std::min(31, ((x << 1) >> scale));
253
          dstBuf.at(x, y) = ClipPel((wL * left + wT * top + (64 - wL - wT) * dstBuf.at(x, y) + 32) >> 6, clpRng);
254
255
        }
256
      }
257
      else
258
      {
259
        for (int x = 0; x < lev[scale]; x++)   // bis wL 0 ist, das ist bei x lev[scale]
260
        {
261
          int wL = 32 >> std::min(31, ((x << 1) >> scale));
262
          dstBuf.at(x, y) = ClipPel((wL * left + (64 - wL) * dstBuf.at(x, y) + 32) >> 6, clpRng);
263
        }
264
      }
265
    }
266
  }
267
  else if (uiDirMode == DC_IDX)
268
  {
269
    const Pel topLeft = srcBuf.at(0, 0);
270
    for (int y = 0; y < iHeight; y++)
271
    {
272
      int wT = 32 >> std::min(31, ((y << 1) >> scale));
273
      const Pel left = srcBuf.at(0, y + 1);
274
      if (wT)
275
      {
276
277
        for (int x = 0; x < iWidth; x++)
278
        {
279
          const Pel top = srcBuf.at(x + 1, 0);
280
          int wL = 32 >> std::min(31, ((x << 1) >> scale));
281
          int wTL = (wL >> 4) + (wT >> 4);
282
          dstBuf.at(x, y) = ClipPel((wL * left + wT * top - wTL * topLeft + (64 - wL - wT + wTL) * dstBuf.at(x, y) + 32) >> 6, clpRng);
283
          }
284
      }
285
      else
286
      {
287
        for (int x = 0; x < lev[scale]; x++)
288
        {
289
          const Pel top = srcBuf.at(x + 1, 0);
290
          int wL = 32 >> std::min(31, ((x << 1) >> scale));
291
          int wTL = (wL >> 4) + (wT >> 4);
292
          dstBuf.at(x, y) = ClipPel((wL * left + wT * top - wTL * topLeft + (64 - wL - wT + wTL) * dstBuf.at(x, y) + 32) >> 6, clpRng);
293
        }
294
295
      }
296
    }
297
  }
298
#endif
299
0
}
300
301
template<typename T>
302
void IntraPredAngleCore(T* pDstBuf,const ptrdiff_t dstStride,T* refMain,int width,int height,int deltaPos,int intraPredAngle,const TFilterCoeff *ff,const bool useCubicFilter,const ClpRng& clpRng)
303
0
{
304
0
    for (int y = 0; y<height; y++ )
305
0
    {
306
0
      const int deltaInt   = deltaPos >> 5;
307
0
      const int deltaFract = deltaPos & ( 32 - 1 );
308
309
0
      Pel p[4];
310
311
0
      int refMainIndex = deltaInt + 1;
312
313
0
      const TFilterCoeff *f = &ff[deltaFract << 2];
314
315
0
      for( int x = 0; x < width; x++, refMainIndex++ )
316
0
      {
317
0
        p[0] = refMain[refMainIndex - 1];
318
0
        p[1] = refMain[refMainIndex    ];
319
0
        p[2] = refMain[refMainIndex + 1];
320
0
        p[3] = refMain[refMainIndex + 2];
321
322
0
        pDstBuf[y*dstStride + x] = static_cast<Pel>((static_cast<int>(f[0] * p[0]) + static_cast<int>(f[1] * p[1]) + static_cast<int>(f[2] * p[2]) + static_cast<int>(f[3] * p[3]) + 32) >> 6);
323
324
0
        if( useCubicFilter ) // only cubic filter has negative coefficients and requires clipping
325
0
        {
326
0
          pDstBuf[y*dstStride + x] = ClipPel( pDstBuf[y*dstStride + x], clpRng );
327
0
        }
328
0
      }
329
0
      deltaPos += intraPredAngle;
330
0
    }
331
0
}
332
333
template<typename T>
334
void IntraPredAngleChroma(T* pDstBuf,const ptrdiff_t dstStride,int16_t* pBorder,int width,int height,int deltaPos,int intraPredAngle)
335
22
{
336
262
  for (int y = 0; y<height; y++)
337
240
  {
338
240
    const int deltaInt   = deltaPos >> 5;
339
240
    const int deltaFract = deltaPos & (32 - 1);
340
341
    // Do linear filtering
342
240
    const Pel *pRM = pBorder + deltaInt + 1;
343
240
    int lastRefMainPel = *pRM++;
344
345
720
    for( int x = 0; x < width; pRM++, x++ )
346
480
    {
347
480
      int thisRefMainPel = *pRM;
348
480
      pDstBuf[x + 0] = ( Pel ) ( ( ( 32 - deltaFract )*lastRefMainPel + deltaFract*thisRefMainPel + 16 ) >> 5 );
349
480
      lastRefMainPel = thisRefMainPel;
350
480
    }
351
240
    deltaPos += intraPredAngle;
352
240
    pDstBuf += dstStride;
353
240
  }
354
355
22
}
356
357
// ====================================================================================================================
358
// Constructor / destructor / initialize
359
// ====================================================================================================================
360
361
59.8k
IntraPrediction::IntraPrediction() : m_currChromaFormat( NUM_CHROMA_FORMAT )
362
59.8k
{
363
59.8k
  IntraPredAngleCore4 = IntraPredAngleCore;
364
59.8k
  IntraPredAngleCore8 = IntraPredAngleCore;
365
59.8k
  IntraPredAngleChroma4 = IntraPredAngleChroma;
366
59.8k
  IntraPredAngleChroma8 = IntraPredAngleChroma;
367
368
59.8k
  IntraPredSampleFilter8 = IntraPredSampleFilterCore;
369
59.8k
  IntraPredSampleFilter16 = IntraPredSampleFilterCore;
370
371
59.8k
  xPredIntraPlanar = xPredIntraPlanarCore;
372
373
59.8k
  GetLumaRecPixel420 = GetLumaRecPixel420Core;
374
59.8k
}
375
376
IntraPrediction::~IntraPrediction()
377
59.8k
{
378
59.8k
  destroy();
379
59.8k
}
380
381
void IntraPrediction::destroy()
382
82.6k
{
383
82.6k
}
384
385
void IntraPrediction::init(ChromaFormat chromaFormatIDC, const unsigned bitDepthY)
386
22.8k
{
387
  // if it has been initialised before, but the chroma format has changed, release the memory and start again.
388
22.8k
  if (m_currChromaFormat != chromaFormatIDC)
389
22.8k
  {
390
22.8k
    destroy();
391
22.8k
  }
392
393
22.8k
  m_currChromaFormat = chromaFormatIDC;
394
395
22.8k
  std::fill_n( m_neighborSize, 3, 0 );
396
22.8k
  m_lastCUidx = -1;
397
398
22.8k
#if ENABLE_SIMD_OPT_INTRAPRED && defined( TARGET_SIMD_X86 )
399
22.8k
  initIntraPredictionX86();
400
22.8k
#endif
401
22.8k
}
402
403
// ====================================================================================================================
404
// Public member functions
405
// ====================================================================================================================
406
407
// Function for calculating DC value of the reference samples used in Intra prediction
408
//NOTE: Bit-Limit - 25-bit source
409
Pel IntraPrediction::xGetPredValDc( const CPelBuf &pSrc, const Size &dstSize, const int mrlIdx )
410
5.34k
{
411
5.34k
  CHECK( dstSize.width == 0 || dstSize.height == 0, "Empty area provided" );
412
413
5.34k
  int idx, sum = 0;
414
5.34k
  Pel dcVal;
415
5.34k
  const int  width     = dstSize.width;
416
5.34k
  const int  height    = dstSize.height;
417
5.34k
  const auto denom     = (width == height) ? (width << 1) : std::max(width,height);
418
5.34k
  const auto divShift  = getLog2(denom);
419
5.34k
  const auto divOffset = (denom >> 1);
420
421
5.34k
  if( width >= height )
422
3.86k
  {
423
95.8k
    for( idx = 0; idx < width; idx++ )
424
91.9k
    {
425
91.9k
      sum += pSrc.at( mrlIdx + 1 + idx, 0 );
426
91.9k
    }
427
3.86k
  }
428
5.34k
  if( width <= height )
429
3.77k
  {
430
102k
    for( idx = 0; idx < height; idx++ )
431
98.6k
    {
432
98.6k
      sum += pSrc.at( 0, mrlIdx + 1 + idx );
433
98.6k
    }
434
3.77k
  }
435
436
5.34k
  dcVal = (sum + divOffset) >> divShift;
437
5.34k
  return dcVal;
438
5.34k
}
439
440
int IntraPrediction::getWideAngle( int width, int height, int predMode )
441
29.1k
{
442
29.1k
  if ( predMode > DC_IDX && predMode <= VDIA_IDX )
443
29.1k
  {
444
29.1k
    int modeShift[] = { 0, 6, 10, 12, 14, 15 };
445
29.1k
    int deltaSize = abs(getLog2(width) - getLog2(height));
446
29.1k
    if (width > height && predMode < 2 + modeShift[deltaSize])
447
625
    {
448
625
      predMode += (VDIA_IDX - 1);
449
625
    }
450
28.5k
    else if (height > width && predMode > VDIA_IDX - modeShift[deltaSize])
451
1.06k
    {
452
1.06k
      predMode -= (VDIA_IDX - 1);
453
1.06k
    }
454
29.1k
  }
455
29.1k
  return predMode;
456
29.1k
}
457
458
void IntraPrediction::setReferenceArrayLengths( const CompArea &area )
459
48.2k
{
460
  // set Top and Left reference samples length
461
48.2k
  const int  width    = area.width;
462
48.2k
  const int  height   = area.height;
463
464
48.2k
  m_leftRefLength     = (height << 1);
465
48.2k
  m_topRefLength      = (width << 1);
466
467
48.2k
}
468
469
470
471
void IntraPrediction::predIntraAng( const ComponentID compID, PelBuf &piPred, const CodingUnit &cu, const bool useFilteredPredSamples )
472
38.2k
{
473
38.2k
  const ChannelType    channelType  = toChannelType( compID );
474
38.2k
  const int            iWidth       = piPred.width;
475
38.2k
  const int            iHeight      = piPred.height;
476
38.2k
  const Size           cuSize       = Size( cu.blocks[compID].width, cu.blocks[compID].height );
477
38.2k
  CHECK( CU::isMIP(cu, toChannelType(compID)), "We should not get here for MIP." );
478
38.2k
  const uint32_t       uiDirMode    = isLuma( compID ) && cu.bdpcmMode() ? BDPCM_IDX : !isLuma(compID) && cu.bdpcmModeChroma() ? BDPCM_IDX : PU::getFinalIntraMode(cu, channelType);
479
480
38.2k
  CHECKD( iWidth == 2, "Width of 2 is not supported" );
481
482
38.2k
  const int     multiRefIdx = ( compID == COMPONENT_Y ) ? cu.multiRefIdx() : 0;
483
38.2k
  const bool    useISP      = cu.ispMode() && isLuma( compID );
484
38.2k
  const int     srcStride   = m_topRefLength  + 1 + multiRefIdx;
485
38.2k
  const int     srcHStride  = m_leftRefLength + 1 + multiRefIdx;
486
38.2k
  const ClpRng& clpRng      ( cu.slice->clpRng( compID ) );
487
38.2k
        bool    doPDPC      = ( iWidth >= MIN_TB_SIZEY && iHeight >= MIN_TB_SIZEY ) && multiRefIdx == 0;
488
489
38.2k
  const PelBuf& srcBuf = cu.ispMode() && isLuma(compID) ? getISPBuffer( useFilteredPredSamples ) : PelBuf(getPredictorPtr(compID, useFilteredPredSamples), srcStride, srcHStride);
490
491
38.2k
  switch (uiDirMode)
492
38.2k
  {
493
8.92k
    case(PLANAR_IDX): xPredIntraPlanar(srcBuf, piPred, *cu.sps); break;
494
5.34k
    case(DC_IDX):     xPredIntraDc    (srcBuf, piPred, channelType, false, multiRefIdx); break;
495
2.10k
    case(BDPCM_IDX):  xPredIntraBDPCM(srcBuf, piPred, isLuma(compID) ? cu.bdpcmMode() : cu.bdpcmModeChroma(), clpRng); break;
496
233
    case(2):
497
288
    case(DIA_IDX):
498
1.37k
    case(VDIA_IDX):
499
1.37k
      if (getWideAngle(useISP ? cuSize.width : iWidth, useISP ? cuSize.height : iHeight, uiDirMode) == static_cast<int>(uiDirMode)) // check if uiDirMode is not wide-angle
500
994
      {
501
994
        xPredIntraAng(srcBuf, piPred, channelType, uiDirMode, clpRng, *cu.sps, multiRefIdx, useFilteredPredSamples, doPDPC, useISP, cuSize );
502
994
        break;
503
994
      }
504
20.8k
    default:          xPredIntraAng(srcBuf, piPred, channelType, uiDirMode, clpRng, *cu.sps, multiRefIdx, useFilteredPredSamples, doPDPC, useISP, cuSize); break;
505
38.2k
  }
506
507
38.2k
  if( doPDPC && (uiDirMode == PLANAR_IDX || uiDirMode == DC_IDX ) )
508
13.5k
  {
509
13.5k
    if (iWidth>8)
510
8.55k
      IntraPredSampleFilter16(srcBuf.buf,srcBuf.stride,piPred,uiDirMode,clpRng);
511
4.94k
    else
512
4.94k
      IntraPredSampleFilter8(srcBuf.buf,srcBuf.stride,piPred,uiDirMode,clpRng);
513
13.5k
  }
514
38.2k
}
515
516
void IntraPrediction::predIntraChromaLM( const ComponentID compID, PelBuf& piPred, const CodingUnit& cu, const CompArea& chromaArea, int intraDir )
517
12.4k
{
518
12.4k
  int  iLumaStride = 0;
519
12.4k
  PelBuf Temp;
520
12.4k
  if( (intraDir == MDLM_L_IDX) || (intraDir == MDLM_T_IDX) )
521
3.71k
  {
522
3.71k
    iLumaStride = 2 * MAX_TU_SIZE_FOR_PROFILE + 1;
523
3.71k
    Temp = PelBuf( m_piYuvExt[1] + iLumaStride + 1, iLumaStride, Size( chromaArea ) );
524
3.71k
  }
525
8.72k
  else
526
8.72k
  {
527
8.72k
    iLumaStride = MAX_TU_SIZE_FOR_PROFILE + 1;
528
8.72k
    Temp = PelBuf( m_piYuvExt[1] + iLumaStride + 1, iLumaStride, Size( chromaArea ) );
529
8.72k
  }
530
12.4k
  int a, b, iShift;
531
12.4k
  xGetLMParameters( cu, compID, chromaArea, a, b, iShift );
532
533
  ////// final prediction
534
12.4k
  piPred.copyFrom( Temp );
535
12.4k
  piPred.linearTransform( a, iShift, b, true, cu.slice->clpRng( compID ) );
536
12.4k
}
537
538
void IntraPrediction::xPredIntraDc( const CPelBuf &pSrc, PelBuf &pDst, const ChannelType channelType, const bool enableBoundaryFilter, const int mrlIdx )
539
5.34k
{
540
5.34k
  const Pel dcval = xGetPredValDc( pSrc, pDst, mrlIdx );
541
5.34k
  pDst.fill( dcval );
542
5.34k
}
543
544
// Function for deriving the angular Intra predictions
545
void IntraPredAngleCore(Pel *pDstBuf,const int dstStride,Pel* refMain,int width,int height,int deltaPos,int intraPredAngle,const TFilterCoeff *ff,const bool useCubicFilter,const ClpRng& clpRng)
546
0
{
547
0
  for (int y = 0; y<height; y++ )
548
0
  {
549
0
    const int deltaInt   = deltaPos >> 5;
550
0
    const int deltaFract = deltaPos & ( 32 - 1 );
551
552
0
    Pel p[4];
553
554
0
    int refMainIndex = deltaInt + 1;
555
556
0
    const TFilterCoeff *f = &ff[deltaFract << 2];
557
558
0
    for( int x = 0; x < width; x++, refMainIndex++ )
559
0
    {
560
0
      p[0] = refMain[refMainIndex - 1];
561
0
      p[1] = refMain[refMainIndex    ];
562
0
      p[2] = refMain[refMainIndex + 1];
563
0
      p[3] = refMain[refMainIndex + 2];
564
565
0
      pDstBuf[y*dstStride + x] = static_cast<Pel>((static_cast<int>(f[0] * p[0]) + static_cast<int>(f[1] * p[1]) + static_cast<int>(f[2] * p[2]) + static_cast<int>(f[3] * p[3]) + 32) >> 6);
566
567
0
      if( useCubicFilter ) // only cubic filter has negative coefficients and requires clipping
568
0
      {
569
0
        pDstBuf[y*dstStride + x] = ClipPel( pDstBuf[y*dstStride + x], clpRng );
570
0
      }
571
0
    }
572
0
    deltaPos += intraPredAngle;
573
0
  }
574
0
}
575
576
577
/** Function for deriving the simplified angular intra predictions.
578
 *
579
 * This function derives the prediction samples for the angular mode based on the prediction direction indicated by
580
 * the prediction mode index. The prediction direction is given by the displacement of the bottom row of the block and
581
 * the reference row above the block in the case of vertical prediction or displacement of the rightmost column
582
 * of the block and reference column left from the block in the case of the horizontal prediction. The displacement
583
 * is signalled at 1/32 pixel accuracy. When projection of the predicted pixel falls inbetween reference samples,
584
 * the predicted value for the pixel is linearly interpolated from the reference samples. All reference samples are taken
585
 * from the extended main reference.
586
 */
587
//NOTE: Bit-Limit - 25-bit source
588
589
void IntraPrediction::xPredIntraAng( const CPelBuf &pSrc, PelBuf &pDst, const ChannelType channelType, const uint32_t dirMode, const ClpRng& clpRng, const SPS& sps,
590
                                           int      multiRefIdx,
591
                                     const bool     useFilteredPredSamples ,
592
                                           bool     &doPDPC,
593
                                     const bool     useISP,
594
                                     const Size     cuSize
595
                                    )
596
21.8k
{
597
21.8k
  int width =int(pDst.width);
598
21.8k
  int height=int(pDst.height);
599
600
21.8k
  CHECK( !( dirMode > DC_IDX && dirMode < NUM_LUMA_MODE ), "Invalid intra dir" );
601
21.8k
  int              predMode           = useISP ? getWideAngle( cuSize.width, cuSize.height, dirMode ) : getWideAngle( width, height, dirMode );
602
21.8k
  const bool       bIsModeVer         = predMode >= DIA_IDX;
603
21.8k
  const int        intraPredAngleMode = (bIsModeVer) ? predMode - VER_IDX : -(predMode - HOR_IDX);
604
21.8k
  const int        absAngMode         = abs(intraPredAngleMode);
605
21.8k
  const int        signAng            = intraPredAngleMode < 0 ? -1 : 1;
606
607
  // Set bitshifts and scale the angle parameter to block size
608
21.8k
  static const int angTable[32]    = { 0,    1,    2,    3,    4,    6,     8,   10,   12,   14,   16,   18,   20,   23,   26,   29,   32,   35,   39,  45,  51,  57,  64,  73,  86, 102, 128, 171, 256, 341, 512, 1024 };
609
21.8k
  static const int invAngTable[32] = {
610
21.8k
    0,   16384, 8192, 5461, 4096, 2731, 2048, 1638, 1365, 1170, 1024, 910, 819, 712, 630, 565,
611
21.8k
    512, 468,   420,  364,  321,  287,  256,  224,  191,  161,  128,  96,  64,  48,  32,  16
612
21.8k
  };   // (512 * 32) / Angle
613
21.8k
  int invAngle                    = invAngTable[absAngMode];
614
21.8k
  int absAng                      = angTable   [absAngMode];
615
21.8k
  int intraPredAngle              = signAng * absAng;
616
617
21.8k
  Pel* refMain;
618
21.8k
  Pel* refSide;
619
620
21.8k
  Pel  refAbove[2 * MAX_CU_SIZE + 3 + 33 * MAX_REF_LINE_IDX];
621
21.8k
  Pel  refLeft [2 * MAX_CU_SIZE + 3 + 33 * MAX_REF_LINE_IDX];
622
623
  // Initialize the Main and Left reference array.
624
21.8k
  if (intraPredAngle < 0)
625
4.38k
  {
626
93.0k
    for (int x = 0; x <= width + 1 + multiRefIdx; x++)
627
88.6k
    {
628
88.6k
      refAbove[x + height] = pSrc.at(x, 0);
629
88.6k
    }
630
94.4k
    for (int y = 0; y <= height + 1 + multiRefIdx; y++)
631
90.0k
    {
632
90.0k
      refLeft[y + width] = pSrc.at(0, y);
633
90.0k
    }
634
4.38k
    refMain = bIsModeVer ? refAbove + height : refLeft + width;
635
4.38k
    refSide = bIsModeVer ? refLeft  + width : refAbove + height;
636
637
    // Extend the Main reference to the left.
638
4.38k
    int sizeSide = bIsModeVer ? height : width;
639
86.3k
    for (int k = -sizeSide; k <= -1; k++)
640
81.9k
    {
641
81.9k
      refMain[k] = refSide[std::min((-k * invAngle + 256) >> 9, sizeSide)];
642
81.9k
    }
643
4.38k
  }
644
17.4k
  else
645
17.4k
  {
646
    //for (int x = 0; x <= m_topRefLength + multiRefIdx; x++)
647
    //{
648
    //  refAbove[x] = pSrc.at(x, 0);
649
    //}
650
17.4k
    memcpy( refAbove, pSrc.buf, ( m_topRefLength + multiRefIdx + 1 ) * sizeof( Pel ) );
651
829k
    for (int y = 0; y <= m_leftRefLength + multiRefIdx; y++)
652
812k
    {
653
812k
      refLeft[y] = pSrc.at(0, y);
654
812k
    }
655
656
17.4k
    refMain = bIsModeVer ? refAbove : refLeft;
657
17.4k
    refSide = bIsModeVer ? refLeft : refAbove;
658
659
    // Extend main reference to right using replication
660
17.4k
    const int log2Ratio = getLog2(width) - getLog2(height);
661
17.4k
    const int s         = std::max<int>(0, bIsModeVer ? log2Ratio : -log2Ratio);
662
17.4k
    const int maxIndex  = (multiRefIdx << s) + 2;
663
17.4k
    const int refLength = bIsModeVer ? m_topRefLength : m_leftRefLength;
664
17.4k
    const Pel val       = refMain[refLength + multiRefIdx];
665
53.6k
    for (int z = 1; z <= maxIndex; z++)
666
36.1k
    {
667
36.1k
      refMain[refLength + multiRefIdx + z] = val;
668
36.1k
    }
669
17.4k
  }
670
671
  // swap width/height if we are doing a horizontal mode:
672
21.8k
  Pel tempArray[MAX_TB_SIZEY*MAX_TB_SIZEY];
673
21.8k
  const ptrdiff_t dstStride = bIsModeVer ? pDst.stride : MAX_TB_SIZEY;
674
21.8k
  Pel *pDstBuf = bIsModeVer ? pDst.buf : tempArray;
675
21.8k
  if (!bIsModeVer)
676
12.1k
  {
677
12.1k
    std::swap(width, height);
678
12.1k
  }
679
680
  // compensate for line offset in reference line buffers
681
21.8k
  refMain += multiRefIdx;
682
21.8k
  refSide += multiRefIdx;
683
684
21.8k
  if( intraPredAngle == 0 )  // pure vertical or pure horizontal
685
11.3k
  {
686
11.3k
    if( doPDPC )
687
10.9k
    {
688
10.9k
      const int scale = ( ( getLog2( width ) - 2 + getLog2( height ) - 2 + 2 ) >> 2 );
689
10.9k
      CHECK(scale < 0 || scale > 31, "PDPC: scale < 0 || scale > 31");
690
10.9k
      const int lev[4]={std::min(3,width),std::min(6,width),std::min(12,width),std::min(24,width)};
691
692
10.9k
      const Pel topLeft = pSrc.at(0, 0);
693
289k
      for( int y = 0; y < height; y++ )
694
278k
      {
695
278k
        const Pel  left =  refSide[y + 1];
696
278k
              Pel *line = &pDstBuf[y * dstStride];
697
3.25M
        for( int x = 0; x < lev[scale]; x++ )
698
2.97M
        {
699
2.97M
          int wL = 32 >> std::min( 31, ( ( x << 1 ) >> scale ) );
700
2.97M
          *line++ = ClipPel( ( wL * ( left - topLeft ) + refMain[x + 1] * ( 1 << 6 ) + 32 ) >> 6, clpRng );
701
2.97M
        }
702
278k
        memcpy( line, refMain + lev[scale] + 1, ( width - lev[scale] ) * sizeof( Pel ) );
703
278k
      }
704
10.9k
    }
705
381
    else
706
381
    {
707
5.93k
      for( int y = 0; y < height; y++ )
708
5.55k
      {
709
5.55k
        memcpy( pDstBuf + y * dstStride, refMain + 1, width * sizeof( Pel ) );
710
5.55k
      }
711
712
381
    }
713
11.3k
  }
714
10.5k
  else
715
10.5k
  {
716
10.5k
    Pel *pDsty=pDstBuf;
717
718
10.5k
    if( !(0 == (absAng & 0x1F)) )
719
9.45k
    {
720
9.45k
      if( isLuma(channelType) )
721
6.71k
      {
722
6.71k
        int deltaPos = intraPredAngle * (1 + multiRefIdx);
723
6.71k
        bool interpolationFlag = false, filterFlag = false;
724
6.71k
        const int diff = std::min<int>( abs( predMode - HOR_IDX ), abs( predMode - VER_IDX ) );
725
6.71k
        const int log2Size = ((getLog2(width) + getLog2(height)) >> 1);
726
6.71k
        CHECKD( log2Size >= MAX_INTRA_FILTER_DEPTHS, "Size not supported" );
727
6.71k
        filterFlag = (diff > m_aucIntraFilter[channelType][log2Size]);
728
729
6.71k
        if( filterFlag )
730
3.16k
        {
731
3.16k
          const bool isRefFilter = 0 == ( absAng & 0x1F );
732
3.16k
          interpolationFlag = !isRefFilter;
733
3.16k
        }
734
6.71k
        const bool useCubicFilter = useISP ? true : ( !interpolationFlag || multiRefIdx > 0 );
735
6.71k
        const TFilterCoeff *f              = (useCubicFilter) ? InterpolationFilter::getChromaFilterTable(0) : g_intraGaussFilter[0];
736
6.71k
        if( ( width & 7 ) == 0 )
737
5.71k
        {
738
5.71k
          IntraPredAngleCore8(pDstBuf,dstStride,refMain,width,height,deltaPos,intraPredAngle,f,useCubicFilter,clpRng);
739
740
5.71k
        }
741
996
        else if( ( width & 3 ) == 0 )
742
870
        {
743
870
          IntraPredAngleCore4(pDstBuf,dstStride,refMain,width,height,deltaPos,intraPredAngle,f,useCubicFilter,clpRng);
744
870
        }
745
126
        else
746
126
        {
747
126
          CHECK( !useISP, "should not happen" );
748
749
2.99k
          for (int y = 0; y<height; y++ )
750
2.86k
          {
751
2.86k
            const int deltaInt   = deltaPos >> 5;
752
2.86k
            const int deltaFract = deltaPos & ( 32 - 1 );
753
754
2.86k
            Pel p[4];
755
756
2.86k
            int refMainIndex = deltaInt + 1;
757
758
2.86k
            const TFilterCoeff *ff = &f[deltaFract << 2];
759
760
7.76k
            for( int x = 0; x < width; x++, refMainIndex++ )
761
4.89k
            {
762
4.89k
              p[0] = refMain[refMainIndex - 1];
763
4.89k
              p[1] = refMain[refMainIndex    ];
764
4.89k
              p[2] = refMain[refMainIndex + 1];
765
4.89k
              p[3] = refMain[refMainIndex + 2];
766
767
4.89k
              pDstBuf[y*dstStride + x] = static_cast<Pel>((static_cast<int>(ff[0] * p[0]) + static_cast<int>(ff[1] * p[1]) + static_cast<int>(ff[2] * p[2]) + static_cast<int>(ff[3] * p[3]) + 32) >> 6);
768
769
4.89k
              if( useCubicFilter ) // only cubic filter has negative coefficients and requires clipping
770
4.89k
              {
771
4.89k
                pDstBuf[y*dstStride + x] = ClipPel( pDstBuf[y*dstStride + x], clpRng );
772
4.89k
              }
773
4.89k
            }
774
2.86k
            deltaPos += intraPredAngle;
775
2.86k
          }
776
126
        }
777
778
6.71k
      }
779
2.74k
      else
780
2.74k
      {
781
2.74k
        int deltaPos = intraPredAngle * (1 + multiRefIdx);
782
2.74k
        if ( width >=8 )
783
2.29k
        {
784
2.29k
          IntraPredAngleChroma8(pDstBuf,dstStride,refMain,width,height,deltaPos,intraPredAngle);
785
2.29k
        }
786
446
        else if( width == 4 )
787
424
        {
788
424
          IntraPredAngleChroma4(pDstBuf,dstStride,refMain,width,height,deltaPos,intraPredAngle);
789
424
        }
790
22
        else
791
22
        {
792
22
          IntraPredAngleChroma(pDstBuf,dstStride,refMain,width,height,deltaPos,intraPredAngle);
793
22
        }
794
2.74k
      }
795
796
9.45k
    }
797
1.06k
    else
798
1.06k
    {
799
18.6k
      for (int y = 0, deltaPos = intraPredAngle * (1 + multiRefIdx); y<height; y++, deltaPos += intraPredAngle, pDsty += dstStride)
800
17.6k
      {
801
17.6k
        const int deltaInt   = deltaPos >> 5;
802
        // Just copy the integer samples
803
17.6k
        memcpy(pDsty,refMain  + deltaInt + 1,width*sizeof(Pel));
804
17.6k
      }
805
1.06k
    }
806
807
10.5k
    pDsty=pDstBuf;
808
189k
    for (int y = 0; y<height; y++, pDsty += dstStride)
809
178k
    {
810
178k
      int angularScale = 0;
811
178k
      if( intraPredAngle < 0 )
812
81.9k
      {
813
81.9k
        doPDPC = false;
814
81.9k
      }
815
96.6k
      else if( intraPredAngle > 0 )
816
96.6k
      {
817
96.6k
        const int sideSize = predMode >= DIA_IDX ? pDst.height : pDst.width;
818
96.6k
        const int maxScale = 2;
819
820
96.6k
        angularScale = std::min(maxScale, getLog2(sideSize) - (getLog2(3 * invAngle - 2) - 8));
821
96.6k
        doPDPC &= angularScale >= 0;
822
96.6k
      }
823
824
178k
      if( doPDPC )
825
65.0k
      {
826
65.0k
        int invAngleSum = 256;
827
828
622k
        for (int x = 0; x < std::min(3 << angularScale, width); x++)
829
557k
        {
830
557k
          invAngleSum += invAngle;
831
832
557k
          int wL   = 32 >> (2 * x >> angularScale);
833
557k
          Pel left = refSide[y + (invAngleSum >> 9) + 1];
834
557k
          pDsty[x] = pDsty[x] + ((wL * (left - pDsty[x]) + 32) >> 6);
835
557k
        }
836
65.0k
      }
837
178k
    }
838
10.5k
  }
839
840
  // Flip the block if this is the horizontal mode
841
21.8k
  if( !bIsModeVer )
842
12.1k
  {
843
12.1k
    pDst.transposedFrom( CPelBuf( pDstBuf, dstStride, width, height ) );
844
12.1k
  }
845
21.8k
}
846
847
void IntraPrediction::xPredIntraBDPCM(const CPelBuf &pSrc, PelBuf &pDst, const uint32_t dirMode, const ClpRng& clpRng )
848
2.10k
{
849
2.10k
  const int wdt = pDst.width;
850
2.10k
  const int hgt = pDst.height;
851
852
2.10k
  const ptrdiff_t strideP = pDst.stride;
853
2.10k
  const ptrdiff_t strideS = pSrc.stride;
854
855
2.10k
  CHECK( !( dirMode == 1 || dirMode == 2 ), "Incorrect BDPCM mode parameter." );
856
857
2.10k
  Pel* pred = &pDst.buf[0];
858
2.10k
  if( dirMode == 1 )
859
1.16k
  {
860
1.16k
    Pel  val;
861
13.4k
    for( int y = 0; y < hgt; y++ )
862
12.2k
    {
863
12.2k
      val = pSrc.buf[(y + 1) * strideS];
864
150k
      for( int x = 0; x < wdt; x++ )
865
137k
      {
866
137k
        pred[x] = val;
867
137k
      }
868
12.2k
      pred += strideP;
869
12.2k
    }
870
1.16k
  }
871
943
  else
872
943
  {
873
10.5k
    for( int y = 0; y < hgt; y++ )
874
9.60k
    {
875
116k
      for( int x = 0; x < wdt; x++ )
876
106k
      {
877
106k
        pred[x] = pSrc.buf[x + 1];
878
106k
      }
879
9.60k
      pred += strideP;
880
9.60k
    }
881
943
  }
882
2.10k
}
883
884
void IntraPrediction::predBlendIntraCiip( PelUnitBuf &predUnit, const CodingUnit &cu )
885
0
{
886
0
  int maxCompID = 1;
887
888
0
  if( isChromaEnabled( cu.chromaFormat ) && cu.chromaSize().width > 2 )
889
0
  {
890
0
    maxCompID = MAX_NUM_COMPONENT;
891
0
  }
892
893
0
  for( int currCompID = 0; currCompID < maxCompID; currCompID++ )
894
0
  {
895
0
    PelBuf&              pred   = predUnit.bufs[ currCompID ];
896
0
    const int            width  = pred.width;
897
0
    const int            height = pred.height;
898
0
    const ptrdiff_t      srcStride = width;
899
0
    const ptrdiff_t      dstStride = pred.stride;
900
0
    Pel*                 dstBuf    = pred.buf;
901
0
    const bool           isUseFilter = currCompID == 0 && IntraPrediction::useFilteredIntraRefSamples( COMPONENT_Y, cu, cu );
902
0
    Pel*                 srcBuf    = m_piYuvExt[!isUseFilter];
903
0
    PelBuf               srcAreaBuf( srcBuf, srcStride, width, height );
904
905
0
    {
906
0
      PROFILER_SCOPE_AND_STAGE_EXT( 1, g_timeProfiler, P_INTRAPRED, *cu.cs, compID );
907
908
0
      initIntraPatternChType( cu.firstTU, cu.blocks[currCompID], isUseFilter );
909
0
      predIntraAng( ComponentID( currCompID ), srcAreaBuf, cu, isUseFilter );
910
0
    }
911
912
0
    CHECKD( width == 2, "Width of 2 is not supported" );
913
914
0
    const Position posBL = cu.Y().bottomLeft();
915
0
    const Position posTR = cu.Y().topRight();
916
917
0
    const CodingUnit* cuLeft  = cu.cs->getCURestricted( posBL.offset( -1, 0 ), cu, CHANNEL_TYPE_LUMA, cu.left );
918
0
    const CodingUnit* cuAbove = cu.cs->getCURestricted( posTR.offset( 0, -1 ), cu, CHANNEL_TYPE_LUMA, cu.above );
919
920
0
    const bool isNeigh0Intra = cuLeft  && ( CU::isIntra( *cuLeft ) );
921
0
    const bool isNeigh1Intra = cuAbove && ( CU::isIntra( *cuAbove ) );
922
923
0
    const int wIntra = 3 - !isNeigh0Intra - !isNeigh1Intra;
924
0
    const int wMerge = 3 - !!isNeigh0Intra - !!isNeigh1Intra;
925
926
0
    for( int y = 0; y < height; y++ )
927
0
    {
928
0
      for( int x = 0; x < width; x += 4 )
929
0
      {
930
0
        dstBuf[y * dstStride + x + 0] = ( wMerge * dstBuf[y * dstStride + x + 0] + wIntra * srcBuf[y * srcStride + x + 0] + 2 ) >> 2;
931
0
        dstBuf[y * dstStride + x + 1] = ( wMerge * dstBuf[y * dstStride + x + 1] + wIntra * srcBuf[y * srcStride + x + 1] + 2 ) >> 2;
932
0
        dstBuf[y * dstStride + x + 2] = ( wMerge * dstBuf[y * dstStride + x + 2] + wIntra * srcBuf[y * srcStride + x + 2] + 2 ) >> 2;
933
0
        dstBuf[y * dstStride + x + 3] = ( wMerge * dstBuf[y * dstStride + x + 3] + wIntra * srcBuf[y * srcStride + x + 3] + 2 ) >> 2;
934
0
      }
935
0
    }
936
0
  }
937
0
}
938
939
inline int isAboveAvailable(const TransformUnit &tu, const ChannelType &chType, const Position &posLT,
940
                            const uint32_t uiNumUnitsInPU, const uint32_t unitWidth);
941
inline int isLeftAvailable(const TransformUnit &tu, const ChannelType &chType, const Position &posLT,
942
                           const uint32_t uiNumUnitsInPU, const uint32_t unitWidth);
943
944
void IntraPrediction::initIntraPatternChType(const TransformUnit &tu, const CompArea &area, const bool bFilterRefSamples)
945
48.2k
{
946
48.2k
  CHECK( area.width == 2, "Width of 2 is not supported" );
947
48.2k
  const CodingStructure& cs   = *tu.cu->cs;
948
949
48.2k
  Pel *refBufUnfiltered   = m_piYuvExt[PRED_BUF_UNFILTERED];
950
48.2k
  Pel *refBufFiltered     = m_piYuvExt[PRED_BUF_FILTERED];
951
952
48.2k
  setReferenceArrayLengths( area );
953
954
  // ----- Step 1: unfiltered reference samples -----
955
48.2k
  xFillReferenceSamples( cs.picture->getRecoBuf( area ), refBufUnfiltered, area, tu );
956
  // ----- Step 2: filtered reference samples -----
957
48.2k
  if( bFilterRefSamples )
958
3.30k
  {
959
3.30k
    xFilterReferenceSamples( refBufUnfiltered, refBufFiltered, area, *cs.sps , tu.cu->multiRefIdx() );
960
3.30k
  }
961
48.2k
}
962
963
void IntraPrediction::initIntraPatternChTypeISP(const CodingUnit& cu, const CompArea& area, PelBuf& recBuf)
964
5.23k
{
965
5.23k
  const CodingStructure& cs = *cu.cs;
966
967
5.23k
  const Position &posLT = area.pos();
968
5.23k
  bool isLeftAvail  = nullptr != cs.getCURestricted( posLT.offset( -1, 0 ), cu, CH_L, posLT.x == cu.lx() ? cu.left : &cu );
969
5.23k
  bool isAboveAvail = nullptr != cs.getCURestricted( posLT.offset( 0, -1 ), cu, CH_L, posLT.y == cu.ly() ? cu.left : &cu );
970
971
  // ----- Step 1: unfiltered reference samples -----
972
5.23k
  if( cu.blocks[area.compID()].x == area.x && cu.blocks[area.compID()].y == area.y )
973
1.47k
  {
974
1.47k
    Pel* refBufUnfiltered = m_piYuvExt[PRED_BUF_UNFILTERED];
975
    // With the first subpartition all the CU reference samples are fetched at once in a single call to xFillReferenceSamples
976
1.47k
    if( cu.ispMode() == HOR_INTRA_SUBPARTITIONS )
977
738
    {
978
738
      m_leftRefLength = cu.Y().height << 1;
979
738
      m_topRefLength  = cu.Y().width + area.width;
980
738
    }
981
737
    else //if (cu.ispMode() == VER_INTRA_SUBPARTITIONS)
982
737
    {
983
737
      m_leftRefLength = cu.Y().height + area.height;
984
737
      m_topRefLength  = cu.Y().width << 1;
985
737
    }
986
987
1.47k
    const int srcStride = m_topRefLength + 1;
988
1.47k
    const int srcHStride = m_leftRefLength + 1;
989
990
1.47k
    m_pelBufISP[0] = m_pelBufISPBase[0] = PelBuf(m_piYuvExt[PRED_BUF_UNFILTERED], srcStride, srcHStride);
991
1.47k
    m_pelBufISP[1] = m_pelBufISPBase[1] = PelBuf(m_piYuvExt[PRED_BUF_FILTERED], srcStride, srcHStride);
992
993
1.47k
    xFillReferenceSamples( cs.picture->getRecoBuf( cu.Y() ), refBufUnfiltered, cu.Y(), isLuma( area.compID() ) ? cu.firstTU : *cu.lastTU );
994
995
    // After having retrieved all the CU reference samples, the number of reference samples is now adjusted for the current subpartition
996
1.47k
    m_topRefLength = cu.blocks[area.compID()].width + area.width;
997
1.47k
    m_leftRefLength = cu.blocks[area.compID()].height + area.height;
998
1.47k
  }
999
3.76k
  else
1000
3.76k
  {
1001
    //Now we only need to fetch the newly available reconstructed samples from the previously coded TU
1002
3.76k
    Position tuPos = area;
1003
3.76k
    tuPos.relativeTo(cu.Y());
1004
3.76k
    m_pelBufISP[0] = m_pelBufISPBase[0].subBuf(tuPos, area.size());
1005
3.76k
    m_pelBufISP[1] = m_pelBufISPBase[1].subBuf(tuPos, area.size());
1006
1007
3.76k
    PelBuf& dstBuf = m_pelBufISP[0];
1008
1009
3.76k
    m_topRefLength = cu.blocks[area.compID()].width + area.width;
1010
3.76k
    m_leftRefLength = cu.blocks[area.compID()].height + area.height;
1011
1012
3.76k
    const int predSizeHor = m_topRefLength;
1013
3.76k
    const int predSizeVer = m_leftRefLength;
1014
3.76k
    if (cu.ispMode() == HOR_INTRA_SUBPARTITIONS)
1015
2.14k
    {
1016
2.14k
      Pel* src = recBuf.bufAt(0, -1);
1017
2.14k
      Pel* dst = dstBuf.bufAt(1, 0);
1018
53.2k
      for (int i = 0; i < area.width; i++)
1019
51.1k
      {
1020
51.1k
        dst[i] = src[i];
1021
51.1k
      }
1022
2.14k
      Pel sample = src[area.width - 1];
1023
2.14k
      dst += area.width;
1024
53.2k
      for (int i = 0; i < predSizeHor - area.width; i++)
1025
51.1k
      {
1026
51.1k
        dst[i] = sample;
1027
51.1k
      }
1028
2.14k
      if (!isLeftAvail) //if left is not avaible, then it is necessary to fetch these samples for each subpartition
1029
123
      {
1030
123
        Pel* dst = dstBuf.bufAt(0, 0);
1031
123
        Pel  sample = src[0];
1032
5.03k
        for (int i = 0; i < predSizeVer + 1; i++)
1033
4.90k
        {
1034
4.90k
          *dst = sample;
1035
4.90k
          dst += dstBuf.stride;
1036
4.90k
        }
1037
123
      }
1038
2.14k
    }
1039
1.61k
    else
1040
1.61k
    {
1041
1.61k
      Pel* src = recBuf.bufAt(-1, 0);
1042
1.61k
      Pel* dst = dstBuf.bufAt(0, 1);
1043
47.0k
      for (int i = 0; i < area.height; i++)
1044
45.4k
      {
1045
45.4k
        *dst = *src;
1046
45.4k
        src += recBuf.stride;
1047
45.4k
        dst += dstBuf.stride;
1048
45.4k
      }
1049
1.61k
      Pel sample = src[-recBuf.stride];
1050
47.0k
      for (int i = 0; i < predSizeVer - area.height; i++)
1051
45.4k
      {
1052
45.4k
        *dst = sample;
1053
45.4k
        dst += dstBuf.stride;
1054
45.4k
      }
1055
1056
1.61k
      if (!isAboveAvail) //if above is not avaible, then it is necessary to fetch these samples for each subpartition
1057
266
      {
1058
266
        Pel* dst = dstBuf.bufAt(0, 0);
1059
266
        Pel  sample = recBuf.at(-1, 0);
1060
15.9k
        for (int i = 0; i < predSizeHor + 1; i++)
1061
15.6k
        {
1062
15.6k
          dst[i] = sample;
1063
15.6k
        }
1064
266
      }
1065
1.61k
    }
1066
3.76k
  }
1067
5.23k
}
1068
1069
void IntraPrediction::xFillReferenceSamples( const CPelBuf &recoBuf, Pel* refBufUnfiltered, const CompArea &area, const TransformUnit &tu )
1070
49.6k
{
1071
49.6k
  const ChannelType      chType = toChannelType( area.compID() );
1072
49.6k
  const CodingUnit      &cu     = *tu.cu;
1073
49.6k
  const CodingStructure &cs     = *cu.cs;
1074
49.6k
  const SPS             &sps    = *cs.sps;
1075
49.6k
  const PreCalcValues   &pcv    = *cs.pcv;
1076
1077
49.6k
  const int multiRefIdx         = (area.compID() == COMPONENT_Y) ? cu.multiRefIdx() : 0;
1078
1079
49.6k
  const int  tuWidth            = area.width;
1080
49.6k
  const int  tuHeight           = area.height;
1081
49.6k
  const int  predSize           = m_topRefLength;
1082
49.6k
  const int  predHSize          = m_leftRefLength;
1083
49.6k
  const int  predStride         = predSize + 1 + multiRefIdx;
1084
1085
49.6k
  const int  csx                = getChannelTypeScaleX( chType, pcv.chrFormat );
1086
49.6k
  const int  csy                = getChannelTypeScaleY( chType, pcv.chrFormat );
1087
1088
49.6k
  const int  unitWidth          = pcv.minCUWidth  >> csx;
1089
49.6k
  const int  unitHeight         = pcv.minCUHeight >> csy;
1090
1091
49.6k
  const int  totalAboveUnits    = (predSize + (unitWidth - 1)) / unitWidth;
1092
49.6k
  const int  totalLeftUnits     = (predHSize + (unitHeight - 1)) / unitHeight;
1093
49.6k
  const int  totalUnits         = totalAboveUnits + totalLeftUnits + 1; //+1 for top-left
1094
49.6k
  const int  numAboveUnits      = tuWidth / unitWidth;
1095
49.6k
  const int  numLeftUnits       = tuHeight / unitHeight;
1096
49.6k
  const int  numAboveRightUnits = totalAboveUnits - numAboveUnits;
1097
49.6k
  const int  numLeftBelowUnits  = totalLeftUnits - numLeftUnits;
1098
1099
49.6k
  CHECK( numAboveUnits <= 0 || numLeftUnits <= 0 || numAboveRightUnits <= 0 || numLeftBelowUnits <= 0, "Size not supported" );
1100
1101
  // ----- Step 1: analyze neighborhood -----
1102
49.6k
  if( m_lastCUidx == tu.cu->idx && area.compID() != getFirstComponentOfChannel( tu.cu->chType() ) )
1103
16.3k
  {
1104
16.3k
  }
1105
33.3k
  else
1106
33.3k
  {
1107
33.3k
    const Position posLT = area.pos();
1108
  
1109
33.3k
    const bool sameCTUx  = !!( posLT.x & ( pcv.maxCUWidthMask  >> csx ) );
1110
33.3k
    const bool sameCTUy  = !!( posLT.y & ( pcv.maxCUHeightMask >> csy ) );
1111
33.3k
    const bool sameCTU   = sameCTUx && sameCTUy;
1112
1113
33.3k
    m_neighborSize[0]    = sameCTU ? 1 : !!cu.cs->getCURestricted( posLT.offset( -1, -1 ), cu, chType, cu.left ? cu.left : cu.above );
1114
1115
33.3k
    if( cu.above || area.y > tu.cu->blocks[chType].y )
1116
26.9k
    {
1117
26.9k
      m_neighborSize[1] = numAboveUnits;
1118
1119
26.9k
      Position posAR{ posLT.x + ( PosType ) area.width, posLT.y };
1120
26.9k
      m_neighborSize[1] += isAboveAvailable( tu, chType, posAR, numAboveRightUnits, unitWidth );
1121
26.9k
    }
1122
6.33k
    else
1123
6.33k
      m_neighborSize[1] = 0;
1124
1125
33.3k
    if( cu.left || area.x > tu.cu->blocks[chType].x )
1126
29.9k
    {
1127
29.9k
      m_neighborSize[2] = numLeftUnits;
1128
1129
29.9k
      Position posLB{ posLT.x, posLT.y + ( PosType ) area.height };
1130
29.9k
      m_neighborSize[2] += isLeftAvailable( tu, chType, posLB, numLeftBelowUnits, unitHeight );
1131
29.9k
    }
1132
3.33k
    else
1133
3.33k
      m_neighborSize[2] = 0;
1134
1135
33.3k
    m_lastCUidx = tu.cu->idx;
1136
33.3k
  }
1137
1138
49.6k
  int numIntraNeighbor = m_neighborSize[0] + m_neighborSize[1] + m_neighborSize[2];
1139
1140
  // ----- Step 2: fill reference samples (depending on neighborhood) -----
1141
49.6k
  const Pel*  srcBuf    = recoBuf.buf;
1142
49.6k
  const ptrdiff_t srcStride = recoBuf.stride;
1143
49.6k
        Pel*  ptrDst    = refBufUnfiltered;
1144
49.6k
  const Pel*  ptrSrc;
1145
49.6k
  const Pel   valueDC   = 1 << (sps.getBitDepth() - 1);
1146
1147
49.6k
  if( numIntraNeighbor == 0 )
1148
1.59k
  {
1149
    // Fill border with DC value
1150
86.6k
    for (int j = 0; j <= predSize + multiRefIdx; j++) { ptrDst[j] = valueDC; }
1151
77.2k
    for (int i = 1; i <= predHSize + multiRefIdx; i++) { ptrDst[i*predStride] = valueDC; }
1152
1.59k
  }
1153
48.0k
  else if( numIntraNeighbor == totalUnits )
1154
7.03k
  {
1155
    // Fill top-left border and top and top right with rec. samples
1156
7.03k
    ptrSrc = srcBuf - (1 + multiRefIdx) * srcStride - (1 + multiRefIdx);
1157
215k
    for (int j = 0; j <= predSize + multiRefIdx; j++) { ptrDst[j] = ptrSrc[j]; }
1158
7.03k
    ptrSrc = srcBuf - multiRefIdx * srcStride - (1 + multiRefIdx);
1159
179k
    for (int i = 1; i <= predHSize + multiRefIdx; i++) { ptrDst[i*predStride] = *(ptrSrc); ptrSrc += srcStride; }
1160
7.03k
  }
1161
41.0k
  else // reference samples are partially available
1162
41.0k
  {
1163
    // Fill top-left sample(s) if available
1164
41.0k
    if ( m_neighborSize[2] > 0) {  // left is available
1165
      // Fill left & below-left samples if available (downwards)
1166
37.4k
      ptrSrc = srcBuf - (1 + multiRefIdx);
1167
37.4k
      ptrDst = refBufUnfiltered + (1 + multiRefIdx) * predStride;
1168
37.4k
      int tmpSize = m_neighborSize[2] * unitHeight;
1169
37.4k
      tmpSize = std::min(tmpSize, predHSize);
1170
916k
      for (int i = 0; i < tmpSize; i++) {
1171
878k
        ptrDst[i * predStride] = ptrSrc[i * srcStride];
1172
878k
      }
1173
1174
      // pad
1175
37.4k
      Pel tmpPixel = ptrDst[(tmpSize - 1) * predStride];
1176
653k
      for (int i = tmpSize; i < predHSize; i++) {
1177
616k
        ptrDst[i * predStride] = tmpPixel;
1178
616k
      }
1179
1180
      // Fill top-left sample(s) if available
1181
37.4k
      if ( m_neighborSize[0]) {
1182
29.3k
        ptrSrc = srcBuf - (1 + multiRefIdx) * srcStride - (1 + multiRefIdx);
1183
29.3k
        ptrDst = refBufUnfiltered;
1184
29.3k
        memcpy(ptrDst, ptrSrc, sizeof(Pel) * (multiRefIdx + 1));
1185
30.3k
        for (int i = 1; i <= multiRefIdx; i++) {
1186
974
          ptrDst[i * predStride] = ptrSrc[i * srcStride];
1187
974
        }
1188
29.3k
      } else {                                // pad
1189
8.12k
        ptrSrc = srcBuf - (1 + multiRefIdx);  // left pixel
1190
8.12k
        ptrDst = refBufUnfiltered;
1191
8.12k
        tmpPixel = ptrSrc[0];
1192
8.12k
        ptrDst[0] = tmpPixel;
1193
8.12k
        for (int i = 1; i <= multiRefIdx; i++) {
1194
0
          ptrDst[i] = tmpPixel;
1195
0
          ptrDst[i * predStride] = tmpPixel;
1196
0
        }
1197
8.12k
      }
1198
1199
      // Fill above & above-right samples if available (left-to-right)
1200
37.4k
      if ( m_neighborSize[1]) {
1201
29.3k
        ptrSrc = srcBuf - srcStride * (1 + multiRefIdx);
1202
29.3k
        ptrDst = refBufUnfiltered + 1 + multiRefIdx;
1203
29.3k
        tmpSize = m_neighborSize[1] * unitWidth;
1204
29.3k
        tmpSize = std::min(tmpSize, predSize);
1205
29.3k
        memcpy(ptrDst, ptrSrc, tmpSize * sizeof(Pel));
1206
        // pad
1207
29.3k
        Pel tmpPixel = ptrDst[tmpSize - 1];
1208
385k
        for (int i = tmpSize; i < predSize; i++) {
1209
356k
          ptrDst[i] = tmpPixel;
1210
356k
        }
1211
29.3k
      } else {  // all not available, pad
1212
8.11k
        ptrSrc = srcBuf - srcStride * (1 + multiRefIdx);
1213
8.11k
        ptrDst = refBufUnfiltered + 1 + multiRefIdx;
1214
8.11k
        Pel tmpPixel = ptrDst[-1];
1215
8.11k
        std::fill_n(ptrDst, predSize, tmpPixel);
1216
8.11k
      }
1217
37.4k
    } else {  // left is not available, top must be available
1218
      // Fill above & above-right samples (left-to-right)
1219
3.60k
      ptrSrc = srcBuf - srcStride * (1 + multiRefIdx);
1220
3.60k
      ptrDst = refBufUnfiltered + 1 + multiRefIdx;
1221
3.60k
      int tmpSize = m_neighborSize[1] * unitWidth;
1222
3.60k
      tmpSize = std::min(tmpSize, predSize);
1223
3.60k
      memcpy(ptrDst, ptrSrc, tmpSize * sizeof(Pel));
1224
      // pad
1225
3.60k
      Pel tmpPixel = ptrDst[tmpSize - 1];
1226
16.4k
      for (int i = tmpSize; i < predSize; i++) {
1227
12.8k
        ptrDst[i] = tmpPixel;
1228
12.8k
      }
1229
1230
3.60k
      tmpPixel = ptrSrc[0];
1231
      // pad top-left sample(s)
1232
3.60k
      ptrDst = refBufUnfiltered;
1233
3.60k
      ptrDst[0] = tmpPixel;
1234
3.70k
      for (int i = 1; i <= multiRefIdx; i++) {
1235
98
        ptrDst[i] = tmpPixel;
1236
98
        ptrDst[i * predStride] = tmpPixel;
1237
98
      }
1238
1239
      // pad left sample(s)
1240
3.60k
      ptrDst = refBufUnfiltered + (1 + multiRefIdx) * predStride;
1241
184k
      for (int i = 0; i < predHSize; i++) {
1242
180k
        ptrDst[i * predStride] = tmpPixel;
1243
180k
      }
1244
3.60k
    }
1245
41.0k
  }
1246
49.6k
}
1247
1248
void IntraPrediction::xFilterReferenceSamples( const Pel* refBufUnfiltered, Pel* refBufFiltered, const CompArea &area, const SPS &sps, int multiRefIdx, ptrdiff_t stride ) const
1249
3.30k
{
1250
3.30k
  if (area.compID() != COMPONENT_Y)
1251
0
  {
1252
0
    multiRefIdx = 0;
1253
0
  }
1254
3.30k
  const int       predSize   = m_topRefLength  + multiRefIdx;
1255
3.30k
  const int       predHSize  = m_leftRefLength + multiRefIdx;
1256
3.30k
  const ptrdiff_t predStride = stride == 0 ? predSize + 1 : stride;
1257
1258
1259
1260
  // Regular reference sample filter
1261
3.30k
  const Pel *piSrcPtr  = refBufUnfiltered + (predStride * predHSize); // bottom left
1262
3.30k
        Pel *piDestPtr = refBufFiltered   + (predStride * predHSize); // bottom left
1263
1264
  // bottom left (not filtered)
1265
3.30k
  *piDestPtr = *piSrcPtr;
1266
3.30k
  piDestPtr -= predStride;
1267
3.30k
  piSrcPtr  -= predStride;
1268
  //left column (bottom to top)
1269
159k
  for( int i = 1; i < predHSize; i++, piDestPtr -= predStride, piSrcPtr -= predStride)
1270
155k
  {
1271
155k
    *piDestPtr = (piSrcPtr[predStride] + 2 * piSrcPtr[0] + piSrcPtr[-predStride] + 2) >> 2;
1272
155k
  }
1273
  //top-left
1274
3.30k
  *piDestPtr = (piSrcPtr[predStride] + 2 * piSrcPtr[0] + piSrcPtr[1] + 2) >> 2;
1275
3.30k
  piDestPtr++;
1276
3.30k
  piSrcPtr++;
1277
  //top row (left-to-right)
1278
165k
  for( uint32_t i=1; i < predSize; i++, piDestPtr++, piSrcPtr++ )
1279
162k
  {
1280
162k
    *piDestPtr = (piSrcPtr[1] + 2 * piSrcPtr[0] + piSrcPtr[-1] + 2) >> 2;
1281
162k
  }
1282
  // top right (not filtered)
1283
3.30k
  *piDestPtr=*piSrcPtr;
1284
3.30k
}
1285
1286
static bool getUseFilterRef( const int predMode )
1287
2.55k
{
1288
2.55k
  static const int angTable[32]    = { 0,    1,    2,    3,    4,    6,     8,   10,   12,   14,   16,   18,   20,   23,   26,   29,   32,   35,   39,  45,  51,  57,  64,  73,  86, 102, 128, 171, 256, 341, 512, 1024 };
1289
1290
2.55k
  const int     intraPredAngleMode = ( predMode >= DIA_IDX ) ? predMode - VER_IDX : -( predMode - HOR_IDX );
1291
1292
2.55k
  const int     absAngMode         = abs(intraPredAngleMode);
1293
2.55k
  const int     absAng             = angTable   [absAngMode];
1294
1295
2.55k
  return 0 == ( absAng & 0x1F );
1296
2.55k
}
1297
1298
bool IntraPrediction::useFilteredIntraRefSamples( const ComponentID &compID, const CodingUnit &cu, const UnitArea &tuArea )
1299
12.7k
{
1300
  //const SPS         &sps    = *cu.sps;
1301
12.7k
  const ChannelType  chType = toChannelType( compID );
1302
1303
  // high level conditions
1304
  //if( sps.getSpsRangeExtension().getIntraSmoothingDisabledFlag() )  { return false; }
1305
  //if( !isLuma( chType ) )                                           { return false; }
1306
  //if( cu.ispMode() && isLuma(compID) )                              { return false; }
1307
  //if( CU::isMIP( cu, chType ) )                                     { return false; }
1308
12.7k
  if( cu.multiRefIdx() )                                            { return false; }
1309
11.9k
  if( cu.bdpcmMode() )                                              { return false; }
1310
1311
  // pred. mode related conditions
1312
10.0k
  const int dirMode = PU::getFinalIntraMode( cu, chType );
1313
10.0k
  if (dirMode == DC_IDX)                                            { return false; }
1314
9.24k
  if (dirMode == PLANAR_IDX)
1315
3.33k
  {
1316
3.33k
    return tuArea.blocks[compID].area() > 32 ? true : false;
1317
3.33k
  }
1318
1319
5.90k
  const int predMode = getWideAngle(tuArea.blocks[compID].width, tuArea.blocks[compID].height, dirMode);
1320
5.90k
  const int diff     = std::min<int>( abs( predMode - HOR_IDX ), abs( predMode - VER_IDX ) );
1321
5.90k
  const int log2Size = ( ( getLog2( tuArea.blocks[compID].width ) + getLog2( tuArea.blocks[compID].height ) ) >> 1 );
1322
5.90k
  CHECKD( log2Size >= MAX_INTRA_FILTER_DEPTHS, "Size not supported" );
1323
5.90k
  return diff > m_aucIntraFilter[ chType ][ log2Size ] && getUseFilterRef( predMode );
1324
9.24k
}
1325
1326
static inline TransformUnit const* getTU( const CodingUnit& cu, const Position& pos, const ChannelType chType )
1327
44.8k
{
1328
44.8k
  const TransformUnit* ptu = &cu.firstTU;
1329
1330
44.8k
  if( !ptu->next ) return ptu;
1331
1332
5.99k
  while( !( ptu->blocks[chType].x + ptu->blocks[chType].width > pos.x && ptu->blocks[chType].y + ptu->blocks[chType].height > pos.y ) )
1333
3.93k
  {
1334
3.93k
    ptu = ptu->next;
1335
3.93k
  }
1336
1337
2.06k
  return ptu;
1338
44.8k
}
1339
1340
int isAboveAvailable(const TransformUnit &tu, const ChannelType &chType, const Position &posLT,
1341
                     const uint32_t uiNumUnitsInPU, const uint32_t unitWidth)
1342
28.6k
{
1343
28.6k
  const CodingUnit      &cu = *tu.cu;
1344
28.6k
  const CodingStructure &cs = *cu.cs;
1345
1346
28.6k
  int maxDx = uiNumUnitsInPU * unitWidth;
1347
28.6k
  Position refPos = posLT.offset(0, -1);
1348
28.6k
  const TransformUnit *pcTUAbove = nullptr;
1349
28.6k
  const int currTUIdx = tu.idx;
1350
28.6k
  int dx = 0;
1351
1352
47.0k
  while( dx < maxDx )
1353
32.4k
  {
1354
32.4k
    const CodingUnit* cuAbove = cs.getCURestricted( refPos, cu, chType, pcTUAbove ? nullptr : cu.above );
1355
1356
32.4k
    if( !cuAbove ) break;
1357
18.4k
    pcTUAbove = getTU( *cuAbove, refPos, chType );
1358
18.4k
    if( cuAbove->ctuData == cu.ctuData && pcTUAbove->idx >= currTUIdx ) break;
1359
1360
18.4k
    int diff  = ( int ) pcTUAbove->blocks[chType].width - refPos.x + pcTUAbove->blocks[chType].x;
1361
18.4k
    dx       += diff;
1362
18.4k
    refPos.x += diff;
1363
18.4k
  }
1364
1365
28.6k
  int neighborSize = dx / unitWidth;
1366
28.6k
  neighborSize = std::min<int>( neighborSize, uiNumUnitsInPU );
1367
28.6k
  return neighborSize;
1368
28.6k
}
1369
1370
int isLeftAvailable(const TransformUnit &tu, const ChannelType &chType, const Position &posLT,
1371
                    const uint32_t uiNumUnitsInPU, const uint32_t unitHeight)
1372
31.6k
{
1373
31.6k
  const CodingUnit      &cu = *tu.cu;
1374
31.6k
  const CodingStructure &cs = *cu.cs;
1375
1376
31.6k
  int maxDy = uiNumUnitsInPU * unitHeight;
1377
31.6k
  Position refPos = posLT.offset(-1, 0);
1378
31.6k
  const TransformUnit *pcTULeft = nullptr;
1379
31.6k
  int currTUIdx = tu.idx;
1380
31.6k
  int dy = 0;
1381
1382
45.6k
  while( dy < maxDy )
1383
34.6k
  {
1384
34.6k
    const CodingUnit* cuLeft = cs.getCURestricted( refPos, cu, chType, pcTULeft ? nullptr : cu.left );
1385
1386
34.6k
    if( !cuLeft ) break;
1387
13.9k
    pcTULeft = getTU( *cuLeft, refPos, chType );
1388
13.9k
    if( cuLeft->ctuData == cu.ctuData && pcTULeft->idx >= currTUIdx ) break;
1389
1390
13.9k
    int diff  = ( int ) pcTULeft->blocks[chType].height - refPos.y + pcTULeft->blocks[chType].y;
1391
13.9k
    dy       += diff;
1392
13.9k
    refPos.y += diff;
1393
13.9k
  }
1394
1395
31.6k
  int neighborSize = dy / unitHeight;
1396
31.6k
  neighborSize = std::min<int>( neighborSize, uiNumUnitsInPU );
1397
31.6k
  return neighborSize;
1398
31.6k
}
1399
// LumaRecPixels
1400
NO_THREAD_SANITIZE void IntraPrediction::xGetLumaRecPixels(const CodingUnit &cu, CompArea chromaArea)
1401
12.4k
{
1402
12.4k
  int iDstStride = 0;
1403
12.4k
  Pel* pDst0 = 0;
1404
12.4k
  int curChromaMode = cu.intraDir[1];
1405
12.4k
  if ((curChromaMode == MDLM_L_IDX) || (curChromaMode == MDLM_T_IDX))
1406
3.71k
  {
1407
3.71k
    iDstStride = 2 * MAX_TU_SIZE_FOR_PROFILE + 1;
1408
3.71k
    pDst0      = m_piYuvExt[1] + iDstStride + 1;
1409
3.71k
  }
1410
8.71k
  else
1411
8.71k
  {
1412
8.71k
    iDstStride = MAX_TU_SIZE_FOR_PROFILE + 1;
1413
8.71k
    pDst0      = m_piYuvExt[1] + iDstStride + 1; //MMLM_SAMPLE_NEIGHBOR_LINES;
1414
8.71k
  }
1415
  //assert 420 chroma subsampling
1416
12.4k
  CompArea lumaArea = CompArea( COMPONENT_Y, chromaArea.lumaPos( cu.chromaFormat),
1417
12.4k
                                recalcSize( cu.chromaFormat, CHANNEL_TYPE_CHROMA, CHANNEL_TYPE_LUMA, chromaArea.size() ) );//needed for correct pos/size (4x4 Tus)
1418
1419
1420
12.4k
  CHECK( lumaArea.width  == chromaArea.width  && CHROMA_444 != cu.chromaFormat, "" );
1421
12.4k
  CHECK( lumaArea.height == chromaArea.height && CHROMA_444 != cu.chromaFormat && CHROMA_422 != cu.chromaFormat, "" );
1422
1423
12.4k
  const SizeType uiCWidth = chromaArea.width;
1424
12.4k
  const SizeType uiCHeight = chromaArea.height;
1425
1426
12.4k
  CPelBuf Src = cu.cs->picture->getRecoBuf( lumaArea );
1427
12.4k
  Pel const* pRecSrc0   = Src.bufAt( 0, 0 );
1428
12.4k
  ptrdiff_t iRecStride  = Src.stride;
1429
12.4k
  int logSubWidthC  = getChannelTypeScaleX(CHANNEL_TYPE_CHROMA, cu.chromaFormat);
1430
12.4k
  int logSubHeightC = getChannelTypeScaleY(CHANNEL_TYPE_CHROMA, cu.chromaFormat);
1431
1432
1433
12.4k
  ptrdiff_t iRecStride2 = iRecStride << logSubHeightC;    // TODO: really Height here? not Width?
1434
12.4k
  const int mult        =          1 << logSubWidthC ;
1435
1436
18.4E
  const CompArea& area = isChroma( cu.chType() ) ? chromaArea : lumaArea;
1437
1438
12.4k
  const uint32_t uiTuWidth  = area.width;
1439
12.4k
  const uint32_t uiTuHeight = area.height;
1440
1441
12.4k
  int iBaseUnitSize = ( 1 << MIN_CU_LOG2 );
1442
1443
12.4k
  const int  iUnitWidth       = iBaseUnitSize >> getComponentScaleX( area.compID(), cu.chromaFormat );
1444
12.4k
  const int  iUnitHeight      = iBaseUnitSize >> getComponentScaleY( area.compID(), cu.chromaFormat );
1445
12.4k
  const int  iTUWidthInUnits  = uiTuWidth  / iUnitWidth;
1446
12.4k
  const int  iTUHeightInUnits = uiTuHeight / iUnitHeight;
1447
12.4k
  const int  iAboveUnits      = iTUWidthInUnits;
1448
12.4k
  const int  iLeftUnits       = iTUHeightInUnits;
1449
12.4k
  const int  chromaUnitWidth  = iBaseUnitSize >> getComponentScaleX(COMPONENT_Cb, cu.chromaFormat);
1450
12.4k
  const int  chromaUnitHeight = iBaseUnitSize >> getComponentScaleY(COMPONENT_Cb, cu.chromaFormat);
1451
12.4k
  const int  topTemplateSampNum  = 2 * uiCWidth; // for MDLM, the number of template samples is 2W or 2H.
1452
12.4k
  const int  leftTemplateSampNum = 2 * uiCHeight;
1453
12.4k
  CHECKD( !( m_topRefLength >= topTemplateSampNum ), "Error!" );
1454
12.4k
  CHECKD( !( m_leftRefLength >= leftTemplateSampNum ), "Error!" );
1455
12.4k
  int totalAboveUnits = (curChromaMode == MDLM_T_IDX) ? (topTemplateSampNum + (chromaUnitWidth - 1)) / chromaUnitWidth : iAboveUnits;
1456
12.4k
  int totalLeftUnits = (curChromaMode == MDLM_L_IDX) ? (leftTemplateSampNum + (chromaUnitHeight - 1)) / chromaUnitHeight : iLeftUnits;
1457
1458
12.4k
  const int  availlableLeftUnit = ( cu.left || chromaArea.x > cu.blocks[CH_C].x ) ? totalLeftUnits : 0;
1459
12.4k
  const bool bLeftAvaillable    = availlableLeftUnit >= iTUHeightInUnits;
1460
  
1461
12.4k
  const int  availlableAboveUnit = ( cu.above || chromaArea.y > cu.blocks[CH_C].y ) ? totalAboveUnits : 0;
1462
12.4k
  const bool bAboveAvaillable    = availlableAboveUnit >= iTUWidthInUnits;
1463
1464
12.4k
  Pel*       pDst  = nullptr;
1465
12.4k
  Pel const* piSrc = nullptr;
1466
1467
12.4k
  bool isFirstRowOfCtu            = ( lumaArea.y & ( cu.sps->getCTUSize() - 1) ) == 0;
1468
12.4k
  const ptrdiff_t strOffset       = ( CHROMA_444 == cu.chromaFormat ) ? 0 : iRecStride;
1469
1470
12.4k
  int c0_3tap = 2, c1_3tap = 1, c2_3tap = 1,                                        offset_3tap = 2, shift_3tap = 2; //sum = 4
1471
12.4k
  int c0_5tap = 1, c1_5tap = 4, c2_5tap = 1, c3_5tap = 1, c4_5tap = 1,              offset_5tap = 4, shift_5tap = 3; //sum = 8
1472
12.4k
  int c0_6tap = 2, c1_6tap = 1, c2_6tap = 1, c3_6tap = 2, c4_6tap = 1, c5_6tap = 1, offset_6tap = 4, shift_6tap = 3; //sum = 8
1473
1474
12.4k
  switch (cu.chromaFormat)
1475
12.4k
  {
1476
0
    case CHROMA_422: //overwrite filter coefficient values for 422
1477
0
      c0_3tap = 2, c1_3tap = 1, c2_3tap = 1,                                        offset_3tap = 2, shift_3tap = 2; //sum = 4
1478
0
      c0_5tap = 0, c1_5tap = 2, c2_5tap = 1, c3_5tap = 1, c4_5tap = 0,              offset_5tap = 2, shift_5tap = 2; //sum = 4
1479
0
      c0_6tap = 2, c1_6tap = 1, c2_6tap = 1, c3_6tap = 0, c4_6tap = 0, c5_6tap = 0, offset_6tap = 2, shift_6tap = 2; //sum = 4
1480
0
      break;
1481
1482
0
    case CHROMA_444:  //overwrite filter coefficient values for 444
1483
0
      c0_3tap = 1, c1_3tap = 0, c2_3tap = 0,                                        offset_3tap = 0, shift_3tap = 0; //sum = 1
1484
0
      c0_5tap = 0, c1_5tap = 1, c2_5tap = 0, c3_5tap = 0, c4_5tap = 0,              offset_5tap = 0, shift_5tap = 0; //sum = 1
1485
0
      c0_6tap = 1, c1_6tap = 0, c2_6tap = 0, c3_6tap = 0, c4_6tap = 0, c5_6tap = 0, offset_6tap = 0, shift_6tap = 0; //sum = 1
1486
0
      break;
1487
1488
12.4k
    default:
1489
12.4k
      break;
1490
12.4k
  }
1491
1492
12.4k
  if( bAboveAvaillable )
1493
9.96k
  {
1494
9.96k
    pDst  = pDst0    - iDstStride;
1495
9.96k
    int avaiAboveSizes = availlableAboveUnit * chromaUnitWidth;
1496
170k
    for (int i = 0; i < avaiAboveSizes; i++)
1497
160k
    {
1498
160k
      if (isFirstRowOfCtu)
1499
14.0k
      {
1500
14.0k
        piSrc = pRecSrc0 - iRecStride;
1501
1502
14.0k
        if ((i == 0 && !bLeftAvaillable) || (i == uiCWidth + avaiAboveSizes - 1 + logSubWidthC))
1503
124
        {
1504
124
          pDst[i] = (piSrc[mult * i] * c0_3tap + piSrc[mult * i] * c1_3tap + piSrc[mult * i + 1] * c2_3tap + offset_3tap) >> shift_3tap;
1505
124
        }
1506
13.9k
        else
1507
13.9k
        {
1508
13.9k
          pDst[i] = (piSrc[mult * i] * c0_3tap + piSrc[mult * i - 1] * c1_3tap + piSrc[mult * i + 1] * c2_3tap + offset_3tap) >> shift_3tap;
1509
13.9k
        }
1510
14.0k
      }
1511
146k
      else if( cu.sps->getCclmCollocatedChromaFlag() )
1512
0
      {
1513
0
        piSrc = pRecSrc0 - iRecStride2;
1514
1515
0
        if ((i == 0 && !bLeftAvaillable) || (i == uiCWidth + avaiAboveSizes - 1 + logSubWidthC))
1516
0
        {
1517
0
          pDst[i] = (piSrc[mult * i - strOffset] * c0_5tap
1518
0
                  +  piSrc[mult * i]             * c1_5tap + piSrc[mult * i] * c2_5tap + piSrc[mult * i + 1] * c3_5tap
1519
0
                  +  piSrc[mult * i + strOffset] * c4_5tap
1520
0
                  +  offset_5tap) >> shift_5tap;
1521
0
        }
1522
0
        else
1523
0
        {
1524
0
          pDst[i] = (piSrc[mult * i - strOffset] * c0_5tap
1525
0
                  +  piSrc[mult * i]             * c1_5tap + piSrc[mult * i - 1] * c2_5tap + piSrc[mult * i + 1] * c3_5tap
1526
0
                  +  piSrc[mult * i + strOffset] * c4_5tap
1527
0
                  +  offset_5tap) >> shift_5tap;
1528
0
        }
1529
0
      }
1530
146k
      else
1531
146k
      {
1532
146k
        piSrc = pRecSrc0 - iRecStride2;
1533
1534
146k
        if ((i == 0 && !bLeftAvaillable) || (i == uiCWidth + avaiAboveSizes - 1 + logSubWidthC))
1535
598
        {
1536
598
          pDst[i] = ((piSrc[mult * i]            * c0_6tap + piSrc[mult * i]             * c1_6tap + piSrc[mult * i + 1]             * c2_6tap)
1537
598
                  + (piSrc[mult * i + strOffset] * c3_6tap + piSrc[mult * i + strOffset] * c4_6tap + piSrc[mult * i + 1 + strOffset] * c5_6tap)
1538
598
                  + offset_6tap) >> shift_6tap;
1539
598
        }
1540
145k
        else
1541
145k
        {
1542
145k
          pDst[i] = ((piSrc[mult * i]            * c0_6tap + piSrc[mult * i - 1]             * c1_6tap + piSrc[mult * i + 1]             * c2_6tap)
1543
145k
                  + (piSrc[mult * i + strOffset] * c3_6tap + piSrc[mult * i - 1 + strOffset] * c4_6tap + piSrc[mult * i + 1 + strOffset] * c5_6tap)
1544
145k
                  + offset_6tap) >> shift_6tap;
1545
145k
        }
1546
146k
      }
1547
160k
    }
1548
9.96k
  }
1549
1550
12.4k
  if( bLeftAvaillable )
1551
10.7k
  {
1552
10.7k
    pDst  = pDst0    - 1;
1553
1554
10.7k
    piSrc = pRecSrc0 - 2 - logSubWidthC;
1555
1556
10.7k
    int availlableLeftSizes = availlableLeftUnit * chromaUnitHeight;
1557
163k
    for (int j = 0; j < availlableLeftSizes; j++)
1558
153k
    {
1559
153k
      if( cu.sps->getCclmCollocatedChromaFlag() )
1560
0
      {
1561
0
        if ((j == 0 && !bAboveAvaillable) || (j == uiCHeight + availlableLeftSizes - 1 + logSubWidthC))
1562
0
        {
1563
0
          pDst[0] = ( piSrc[1            ] * c0_5tap
1564
0
                    + piSrc[1            ] * c1_5tap + piSrc[0] * c2_5tap + piSrc[2] * c3_5tap
1565
0
                    + piSrc[1 + strOffset] * c4_5tap
1566
0
                    + offset_5tap ) >> shift_5tap;
1567
0
        }
1568
0
        else
1569
0
        {
1570
0
          pDst[0] = ( piSrc[1 - strOffset] * c0_5tap
1571
0
                    + piSrc[1            ] * c1_5tap + piSrc[0] * c2_5tap + piSrc[2] * c3_5tap
1572
0
                    + piSrc[1 + strOffset] * c4_5tap
1573
0
                    + offset_5tap ) >> shift_5tap;
1574
0
        }
1575
0
      }
1576
153k
      else
1577
153k
      {
1578
153k
        pDst[0] = ((piSrc[1]             * c0_6tap + piSrc[0]         * c1_6tap + piSrc[2]             * c2_6tap)
1579
153k
                +  (piSrc[1 + strOffset] * c3_6tap + piSrc[strOffset] * c4_6tap + piSrc[2 + strOffset] * c5_6tap)
1580
153k
                +   offset_6tap) >> shift_6tap;
1581
153k
      }
1582
1583
153k
      piSrc += iRecStride2;
1584
153k
      pDst  += iDstStride;
1585
153k
    }
1586
10.7k
  }
1587
1588
12.4k
  if( cu.sps->getCclmCollocatedChromaFlag() )
1589
0
  {
1590
    // TODO: unroll loop
1591
0
    for( int j = 0; j < uiCHeight; j++ )
1592
0
    {
1593
0
      for( int i = 0; i < uiCWidth; i++ )
1594
0
      {
1595
0
        if( i == 0 && !bLeftAvaillable )
1596
0
        {
1597
0
          if( j == 0 && !bAboveAvaillable )
1598
0
          {
1599
0
            pDst0[i] = (pRecSrc0[mult * i] * c0_5tap
1600
0
                     +  pRecSrc0[mult * i] * c1_5tap + pRecSrc0[mult * i] * c2_5tap + pRecSrc0[mult * i + 1] * c3_5tap
1601
0
                     +  pRecSrc0[mult * i + strOffset] * c4_5tap
1602
0
                     +  offset_5tap) >> shift_5tap;
1603
0
          }
1604
0
          else
1605
0
          {
1606
0
            pDst0[i] = (pRecSrc0[mult * i - strOffset] * c0_5tap
1607
0
                     +  pRecSrc0[mult * i] * c1_5tap + pRecSrc0[mult * i] * c2_5tap + pRecSrc0[mult * i + 1] * c3_5tap
1608
0
                     +  pRecSrc0[mult * i + strOffset] * c4_5tap
1609
0
                     +  offset_5tap) >> shift_5tap;
1610
0
          }
1611
0
        }
1612
0
        else if( j == 0 && !bAboveAvaillable )
1613
0
        {
1614
0
          pDst0[i] = (pRecSrc0[mult * i] * c0_5tap
1615
0
                   +  pRecSrc0[mult * i] * c1_5tap + pRecSrc0[mult * i - 1] * c2_5tap + pRecSrc0[mult * i + 1] * c3_5tap
1616
0
                   +  pRecSrc0[mult * i + strOffset] * c4_5tap
1617
0
                   +  offset_5tap) >> shift_5tap;
1618
0
        }
1619
0
        else
1620
0
        {
1621
0
          pDst0[i] = (pRecSrc0[mult * i - strOffset] * c0_5tap
1622
0
                   +  pRecSrc0[mult * i]             * c1_5tap + pRecSrc0[mult * i - 1] * c2_5tap + pRecSrc0[mult * i + 1] * c3_5tap
1623
0
                   +  pRecSrc0[mult * i + strOffset] * c4_5tap
1624
0
                   +  offset_5tap) >> shift_5tap;
1625
0
        }
1626
0
      }
1627
0
      pDst0    += iDstStride;
1628
0
      pRecSrc0 += iRecStride2;
1629
0
    }
1630
0
    return;
1631
0
  }
1632
1633
1634
1635
1636
12.4k
#define GET_LUMA_REC_PIX_INC   \
1637
12.4k
  pDst0 += iDstStride;         \
1638
12.4k
  pRecSrc0 += iRecStride2
1639
1640
12.4k
#define GET_LUMA_REC_PIX_OP2(ADDR)                                                    \
1641
1.36M
  pDst0[ADDR] = (   pRecSrc0[( (ADDR) << logSubWidthC )    ]              * c0_6tap   \
1642
1.36M
                  + pRecSrc0[( (ADDR) << logSubWidthC ) + 1]              * c1_6tap   \
1643
1.36M
                  + pRecSrc0[( (ADDR) << logSubWidthC ) - 1]              * c2_6tap   \
1644
1.36M
                  + pRecSrc0[( (ADDR) << logSubWidthC )     + iRecStride] * c3_6tap   \
1645
1.36M
                  + pRecSrc0[( (ADDR) << logSubWidthC ) + 1 + iRecStride] * c4_6tap   \
1646
1.36M
                  + pRecSrc0[( (ADDR) << logSubWidthC ) - 1 + iRecStride] * c5_6tap   \
1647
1.36M
                  + offset_6tap ) >> shift_6tap
1648
1649
12.4k
#define GET_LUMA_REC_PIX_OP1(ADDR)                          \
1650
1.41M
  if( !(ADDR) )                                             \
1651
1.41M
  {                                                         \
1652
46.3k
    pDst0[0] = (   pRecSrc0[0    ]              * c0_6tap   \
1653
46.3k
                 + pRecSrc0[0 + 1]              * c1_6tap   \
1654
46.3k
                 + pRecSrc0[0]                  * c2_6tap   \
1655
46.3k
                 + pRecSrc0[0     + iRecStride] * c3_6tap   \
1656
46.3k
                 + pRecSrc0[0 + 1 + iRecStride] * c4_6tap   \
1657
46.3k
                 + pRecSrc0[0     + iRecStride] * c5_6tap   \
1658
46.3k
                 + offset_6tap ) >> shift_6tap;             \
1659
46.3k
  }                                                         \
1660
1.41M
  else                                                      \
1661
1.41M
  {                                                         \
1662
1.36M
    GET_LUMA_REC_PIX_OP2(ADDR);                             \
1663
1.36M
  }
1664
1665
12.4k
  int width  = uiCWidth;
1666
12.4k
  int height = uiCHeight;
1667
1668
12.4k
  if( bLeftAvaillable )
1669
10.7k
  {
1670
10.7k
    if( cu.chromaFormat == CHROMA_420 )
1671
10.7k
    {
1672
10.7k
      GetLumaRecPixel420( width, height, pRecSrc0, iRecStride, pDst0, iDstStride );
1673
      //      SIZE_AWARE_PER_EL_OP( GET_LUMA_REC_PIX_OP2, GET_LUMA_REC_PIX_INC );
1674
10.7k
    }
1675
0
    else  //TODO add SIMD for 422,444
1676
0
    {
1677
0
      SIZE_AWARE_PER_EL_OP( GET_LUMA_REC_PIX_OP2, GET_LUMA_REC_PIX_INC );
1678
0
    }
1679
10.7k
  }
1680
1.71k
  else
1681
1.71k
  {
1682
1.41M
    SIZE_AWARE_PER_EL_OP( GET_LUMA_REC_PIX_OP1, GET_LUMA_REC_PIX_INC );
1683
1.71k
  }
1684
12.4k
}
1685
1686
#undef GET_LUMA_REC_PIX_INC
1687
#undef GET_LUMA_REC_PIX_OP1
1688
#undef GET_LUMA_REC_PIX_OP2
1689
#undef SIZE_AWARE_PER_EL_OP
1690
1691
void IntraPrediction::xGetLMParameters(const CodingUnit &cu, const ComponentID compID,
1692
                                              const CompArea &chromaArea,
1693
                                              int &a, int &b, int &iShift)
1694
12.4k
{
1695
12.4k
  CHECK(compID == COMPONENT_Y, "");
1696
1697
12.4k
  const SizeType cWidth  = chromaArea.width;
1698
12.4k
  const SizeType cHeight = chromaArea.height;
1699
1700
12.4k
  const Position posLT = chromaArea;
1701
1702
12.4k
  const CodingStructure &cs = *cu.cs;
1703
1704
12.4k
  const SPS &        sps           = *cs.sps;
1705
12.4k
  const uint32_t     tuWidth     = chromaArea.width;
1706
12.4k
  const uint32_t     tuHeight    = chromaArea.height;
1707
12.4k
  const ChromaFormat nChromaFormat = sps.getChromaFormatIdc();
1708
1709
12.4k
  const int baseUnitSize = 1 << MIN_CU_LOG2;
1710
12.4k
  const int unitWidth    = baseUnitSize >> getComponentScaleX(chromaArea.compID(), nChromaFormat);
1711
12.4k
  const int unitHeight   = baseUnitSize >> getComponentScaleX(chromaArea.compID(), nChromaFormat);
1712
1713
12.4k
  const int tuWidthInUnits  = tuWidth / unitWidth;
1714
12.4k
  const int tuHeightInUnits = tuHeight / unitHeight;
1715
12.4k
  const int aboveUnits      = tuWidthInUnits;
1716
12.4k
  const int leftUnits       = tuHeightInUnits;
1717
12.4k
  int topTemplateSampNum = 2 * cWidth; // for MDLM, the template sample number is 2W or 2H;
1718
12.4k
  int leftTemplateSampNum = 2 * cHeight;
1719
12.4k
  CHECKD( !(m_topRefLength >= topTemplateSampNum),   "Error!" );
1720
12.4k
  CHECKD( !(m_leftRefLength >= leftTemplateSampNum), "Error!" );
1721
12.4k
  int totalAboveUnits = (topTemplateSampNum + (unitWidth - 1)) / unitWidth;
1722
12.4k
  int totalLeftUnits = (leftTemplateSampNum + (unitHeight - 1)) / unitHeight;
1723
12.4k
  int aboveRightUnits = totalAboveUnits - aboveUnits;
1724
12.4k
  int leftBelowUnits = totalLeftUnits - leftUnits;
1725
1726
12.4k
  int curChromaMode = cu.intraDir[1];
1727
12.4k
  bool aboveAvailable = 0, leftAvailable = 0;
1728
1729
12.4k
  const TransformUnit& tu = *getTU( cu, chromaArea.pos(), CH_C );
1730
1731
12.4k
  Pel *srcColor0, *curChroma0;
1732
12.4k
  int  srcStride, curStride;
1733
1734
12.4k
  PelBuf temp;
1735
12.4k
  if ((curChromaMode == MDLM_L_IDX) || (curChromaMode == MDLM_T_IDX))
1736
3.71k
  {
1737
3.71k
    srcStride = 2 * MAX_TU_SIZE_FOR_PROFILE + 1;
1738
3.71k
    temp      = PelBuf(m_piYuvExt[1] + srcStride + 1, srcStride, Size(chromaArea));
1739
3.71k
  }
1740
8.71k
  else
1741
8.71k
  {
1742
8.71k
    srcStride = MAX_TU_SIZE_FOR_PROFILE + 1;
1743
8.71k
    temp      = PelBuf(m_piYuvExt[1] + srcStride + 1, srcStride, Size(chromaArea));
1744
8.71k
  }
1745
12.4k
  srcColor0 = temp.bufAt(0, 0);
1746
12.4k
  curChroma0 = getPredictorPtr(compID);
1747
1748
12.4k
  curStride = m_topRefLength + 1;
1749
1750
12.4k
  curChroma0 += curStride + 1;
1751
1752
12.4k
  unsigned internalBitDepth = sps.getBitDepth();
1753
1754
12.4k
  int minLuma[2] = {  MAX_INT, 0 };
1755
12.4k
  int maxLuma[2] = { -MAX_INT, 0 };
1756
1757
12.4k
  Pel *src = srcColor0 - srcStride;
1758
12.4k
  Pel *cur = curChroma0 - curStride;
1759
12.4k
  int actualTopTemplateSampNum = 0;
1760
12.4k
  int actualLeftTemplateSampNum = 0;
1761
12.4k
  if( curChromaMode == MDLM_T_IDX )
1762
1.91k
  {
1763
1.91k
    int avaiAboveUnits = 0;
1764
1765
1.91k
    if( tu.cu->above || chromaArea.y > tu.cu->blocks[CH_C].y )
1766
1.67k
    {
1767
1.67k
      avaiAboveUnits  = aboveUnits;
1768
1.67k
      aboveRightUnits = aboveRightUnits > ( cHeight / unitWidth ) ? cHeight / unitWidth : aboveRightUnits;
1769
1.67k
      avaiAboveUnits += isAboveAvailable( tu, CHANNEL_TYPE_CHROMA, { posLT.x + ( PosType ) cWidth, posLT.y }, aboveRightUnits, unitWidth );
1770
1.67k
    }
1771
1772
1.91k
    aboveAvailable           = avaiAboveUnits >= tuWidthInUnits;
1773
1.91k
    actualTopTemplateSampNum = unitWidth * avaiAboveUnits;
1774
1.91k
  }
1775
10.5k
  else if( curChromaMode == MDLM_L_IDX )
1776
1.79k
  {
1777
1.79k
    int avaiLeftUnits = 0;
1778
    
1779
1.79k
    if( tu.cu->left || chromaArea.x > tu.cu->blocks[CH_C].x )
1780
1.70k
    {
1781
1.70k
      avaiLeftUnits  = leftUnits;
1782
1.70k
      leftBelowUnits = leftBelowUnits > ( cWidth / unitHeight ) ? cWidth / unitHeight : leftBelowUnits;
1783
1.70k
      avaiLeftUnits += isLeftAvailable( tu, CHANNEL_TYPE_CHROMA, { posLT.x, posLT.y + ( PosType ) cHeight }, leftBelowUnits, unitHeight );
1784
1.70k
    }
1785
    
1786
1.79k
    leftAvailable             = avaiLeftUnits >= tuHeightInUnits;
1787
1.79k
    actualLeftTemplateSampNum = unitHeight * avaiLeftUnits;
1788
1.79k
  }
1789
8.71k
  else if( curChromaMode == LM_CHROMA_IDX )
1790
8.72k
  {
1791
8.72k
    aboveAvailable = tu.cu->above || chromaArea.y > tu.cu->blocks[CH_C].y;
1792
8.72k
    leftAvailable  = tu.cu->left || chromaArea.x > tu.cu->blocks[CH_C].x;
1793
8.72k
    actualTopTemplateSampNum =  cWidth;
1794
8.72k
    actualLeftTemplateSampNum = cHeight;
1795
8.72k
  }
1796
12.4k
  int startPos[2]; //0:Above, 1: Left
1797
12.4k
  int pickStep[2];
1798
1799
12.4k
  int aboveIs4 = leftAvailable  ? 0 : 1;
1800
12.4k
  int leftIs4 =  aboveAvailable ? 0 : 1;
1801
1802
12.4k
  startPos[0] = actualTopTemplateSampNum >> (2 + aboveIs4);
1803
12.4k
  pickStep[0] = std::max(1, actualTopTemplateSampNum >> (1 + aboveIs4));
1804
1805
12.4k
  startPos[1] = actualLeftTemplateSampNum >> (2 + leftIs4);
1806
12.4k
  pickStep[1] = std::max(1, actualLeftTemplateSampNum >> (1 + leftIs4));
1807
1808
12.4k
  Pel selectLumaPix[4] = { 0, 0, 0, 0 };
1809
12.4k
  Pel selectChromaPix[4] = { 0, 0, 0, 0 };
1810
1811
12.4k
  int cntT, cntL;
1812
12.4k
  cntT = cntL = 0;
1813
12.4k
  int cnt = 0;
1814
12.4k
  if (aboveAvailable)
1815
8.38k
  {
1816
8.38k
    cntT = std::min(actualTopTemplateSampNum, (1 + aboveIs4) << 1);
1817
8.38k
    src = srcColor0 - srcStride;
1818
8.38k
    cur = curChroma0 - curStride;
1819
29.4k
    for (int pos = startPos[0]; cnt < cntT; pos += pickStep[0], cnt++)
1820
21.1k
    {
1821
21.1k
      selectLumaPix[cnt] = src[pos];
1822
21.1k
      selectChromaPix[cnt] = cur[pos];
1823
21.1k
    }
1824
8.38k
  }
1825
1826
12.4k
  if (leftAvailable)
1827
8.95k
  {
1828
8.95k
    cntL = std::min(actualLeftTemplateSampNum, ( 1 + leftIs4 ) << 1 );
1829
8.95k
    src = srcColor0 - 1;
1830
8.95k
    cur = curChroma0 - 1;
1831
32.3k
    for (int pos = startPos[1], cnt = 0; cnt < cntL; pos += pickStep[1], cnt++)
1832
23.3k
    {
1833
23.3k
      selectLumaPix[cnt + cntT] = src[pos * srcStride];
1834
23.3k
      selectChromaPix[cnt+ cntT] = cur[pos * curStride];
1835
23.3k
    }
1836
8.95k
  }
1837
12.4k
  cnt = cntL + cntT;
1838
1839
12.4k
  if (cnt == 2)
1840
16
  {
1841
16
    selectLumaPix[3] = selectLumaPix[0]; selectChromaPix[3] = selectChromaPix[0];
1842
16
    selectLumaPix[2] = selectLumaPix[1]; selectChromaPix[2] = selectChromaPix[1];
1843
16
    selectLumaPix[0] = selectLumaPix[1]; selectChromaPix[0] = selectChromaPix[1];
1844
16
    selectLumaPix[1] = selectLumaPix[3]; selectChromaPix[1] = selectChromaPix[3];
1845
16
  }
1846
1847
12.4k
  int minGrpIdx[2] = { 0, 2 };
1848
12.4k
  int maxGrpIdx[2] = { 1, 3 };
1849
12.4k
  int *tmpMinGrp = minGrpIdx;
1850
12.4k
  int *tmpMaxGrp = maxGrpIdx;
1851
12.4k
  if (selectLumaPix[tmpMinGrp[0]] > selectLumaPix[tmpMinGrp[1]]) std::swap(tmpMinGrp[0], tmpMinGrp[1]);
1852
12.4k
  if (selectLumaPix[tmpMaxGrp[0]] > selectLumaPix[tmpMaxGrp[1]]) std::swap(tmpMaxGrp[0], tmpMaxGrp[1]);
1853
12.4k
  if (selectLumaPix[tmpMinGrp[0]] > selectLumaPix[tmpMaxGrp[1]]) std::swap(tmpMinGrp, tmpMaxGrp);       // TODO: really? not std::swap(tmpMinGrp[0], tmpMaxGrp[1]); ?
1854
12.4k
  if (selectLumaPix[tmpMinGrp[1]] > selectLumaPix[tmpMaxGrp[0]]) std::swap(tmpMinGrp[1], tmpMaxGrp[0]);
1855
1856
12.4k
  minLuma[0] = (selectLumaPix  [tmpMinGrp[0]] + selectLumaPix  [tmpMinGrp[1]] + 1) >> 1;
1857
12.4k
  minLuma[1] = (selectChromaPix[tmpMinGrp[0]] + selectChromaPix[tmpMinGrp[1]] + 1) >> 1;
1858
12.4k
  maxLuma[0] = (selectLumaPix  [tmpMaxGrp[0]] + selectLumaPix  [tmpMaxGrp[1]] + 1) >> 1;
1859
12.4k
  maxLuma[1] = (selectChromaPix[tmpMaxGrp[0]] + selectChromaPix[tmpMaxGrp[1]] + 1) >> 1;
1860
1861
12.4k
  if (leftAvailable || aboveAvailable)
1862
11.1k
  {
1863
11.1k
    int diff = maxLuma[0] - minLuma[0];
1864
11.1k
    if (diff > 0)
1865
9.57k
    {
1866
9.57k
      int diffC = maxLuma[1] - minLuma[1];
1867
9.57k
      int x = getLog2( diff );
1868
9.57k
      static const uint8_t DivSigTable[1 << 4] = {
1869
        // 4bit significands - 8 ( MSB is omitted )
1870
9.57k
        0,  7,  6,  5,  5,  4,  4,  3,  3,  2,  2,  1,  1,  1,  1,  0
1871
9.57k
      };
1872
9.57k
      int normDiff = (diff << 4 >> x) & 15;
1873
9.57k
      int v = DivSigTable[normDiff] | 8;
1874
9.57k
      x += normDiff != 0;
1875
1876
9.57k
      int y = diffC == 0 ? 0 : getLog2( abs( diffC ) ) + 1;
1877
9.57k
      int add = 1 << y >> 1;
1878
9.57k
      a = (diffC * v + add) >> y;
1879
9.57k
      iShift = 3 + x - y;
1880
9.57k
      if ( iShift < 1 ) {
1881
1.07k
        iShift = 1;
1882
1.07k
        a = ( (a == 0)? 0: (a < 0)? -15 : 15 );   // a=Sign(a)*15
1883
1.07k
      }
1884
9.57k
      b = minLuma[1] - ((a * minLuma[0]) >> iShift);
1885
9.57k
    }
1886
1.54k
    else
1887
1.54k
    {
1888
1.54k
      a = 0;
1889
1.54k
      b = minLuma[1];
1890
1.54k
      iShift = 0;
1891
1.54k
    }
1892
11.1k
  }
1893
1.30k
  else
1894
1.30k
  {
1895
1.30k
    a = 0;
1896
1897
1.30k
    b = 1 << (internalBitDepth - 1);
1898
1899
1.30k
    iShift = 0;
1900
1.30k
  }
1901
12.4k
}
1902
1903
void IntraPrediction::initIntraMip( const CodingUnit &cu, const CompArea &area )
1904
2.77k
{
1905
2.77k
  CHECK( area.width > MIP_MAX_WIDTH || area.height > MIP_MAX_HEIGHT, "Error: block size not supported for MIP" );
1906
1907
  // prepare input (boundary) data for prediction
1908
//  CHECK( m_ipaParam.refFilterFlag, "ERROR: unfiltered refs expected for MIP" );
1909
2.77k
  Pel *ptrSrc = getPredictorPtr( area.compID() );
1910
2.77k
  const int srcStride  = m_topRefLength  + 1; //TODO: check this if correct
1911
2.77k
  const int srcHStride = m_leftRefLength + 1;
1912
1913
2.77k
  m_matrixIntraPred.prepareInputForPred( CPelBuf( ptrSrc, srcStride, srcHStride ), area, cu.sps->getBitDepth(), area.compID() );
1914
2.77k
}
1915
1916
void IntraPrediction::predIntraMip( const ComponentID compId, PelBuf &piPred, const CodingUnit &cu )
1917
2.77k
{
1918
2.77k
  CHECK( piPred.width > MIP_MAX_WIDTH || piPred.height > MIP_MAX_HEIGHT, "Error: block size not supported for MIP" );
1919
2.77k
  CHECK( piPred.width != (1 << getLog2(piPred.width)) || piPred.height != (1 << getLog2(piPred.height)), "Error: expecting blocks of size 2^M x 2^N" );
1920
1921
  // generate mode-specific prediction
1922
2.77k
  uint32_t modeIdx       = MAX_NUM_MIP_MODE;
1923
2.77k
  bool     transposeFlag = false;
1924
2.77k
  if( compId == COMPONENT_Y )
1925
2.77k
  {
1926
2.77k
    modeIdx       = cu.intraDir[CHANNEL_TYPE_LUMA];
1927
2.77k
    transposeFlag = cu.mipTransposedFlag();
1928
2.77k
  }
1929
0
  else
1930
0
  {
1931
0
    const CodingUnit &coLocatedLumaPU = PU::getCoLocatedLumaPU(cu);
1932
1933
0
    CHECK(cu.intraDir[CHANNEL_TYPE_CHROMA] != DM_CHROMA_IDX, "Error: MIP is only supported for chroma with DM_CHROMA.");
1934
0
    CHECK(!coLocatedLumaPU.mipFlag(), "Error: Co-located luma CU should use MIP.");
1935
1936
0
    modeIdx       = coLocatedLumaPU.intraDir[CHANNEL_TYPE_LUMA];
1937
0
    transposeFlag = coLocatedLumaPU.mipTransposedFlag();
1938
0
  }
1939
1940
2.77k
  CHECK(modeIdx >= getNumModesMip(piPred), "Error: Wrong MIP mode index");
1941
1942
2.77k
  const int bitDepth = cu.sps->getBitDepth();
1943
2.77k
  m_matrixIntraPred.predBlock( piPred, modeIdx, piPred, transposeFlag, bitDepth, compId, m_piYuvExt[0] );
1944
2.77k
}
1945
1946
}