Coverage Report

Created: 2026-05-30 06:10

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/vvenc/source/Lib/EncoderLib/IntraSearch.cpp
Line
Count
Source
1
/* -----------------------------------------------------------------------------
2
The copyright in this software is being made available under the Clear BSD
3
License, included below. No patent rights, trademark rights and/or 
4
other Intellectual Property Rights other than the copyrights concerning 
5
the Software are granted under this license.
6
7
The Clear BSD License
8
9
Copyright (c) 2019-2026, Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. & The VVenC Authors.
10
All rights reserved.
11
12
Redistribution and use in source and binary forms, with or without modification,
13
are permitted (subject to the limitations in the disclaimer below) provided that
14
the following conditions are met:
15
16
     * Redistributions of source code must retain the above copyright notice,
17
     this list of conditions and the following disclaimer.
18
19
     * Redistributions in binary form must reproduce the above copyright
20
     notice, this list of conditions and the following disclaimer in the
21
     documentation and/or other materials provided with the distribution.
22
23
     * Neither the name of the copyright holder nor the names of its
24
     contributors may be used to endorse or promote products derived from this
25
     software without specific prior written permission.
26
27
NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
28
THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
29
CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
31
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
32
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
33
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
34
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
35
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
36
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38
POSSIBILITY OF SUCH DAMAGE.
39
40
41
------------------------------------------------------------------------------------------- */
42
43
44
/** \file     EncSearch.cpp
45
 *  \brief    encoder intra search class
46
 */
47
48
#include "IntraSearch.h"
49
#include "EncPicture.h"
50
#include "CommonLib/CommonDef.h"
51
#include "CommonLib/Rom.h"
52
#include "CommonLib/Picture.h"
53
#include "CommonLib/UnitTools.h"
54
#include "CommonLib/dtrace_next.h"
55
#include "CommonLib/dtrace_buffer.h"
56
#include "CommonLib/Reshape.h"
57
#include <math.h>
58
#include "vvenc/vvencCfg.h"
59
60
//! \ingroup EncoderLib
61
//! \{
62
63
namespace vvenc {
64
65
#define PLTCtx(c) SubCtx( Ctx::Palette, c )
66
67
IntraSearch::IntraSearch()
68
20.7k
  : m_pSaveCS       (nullptr)
69
20.7k
  , m_pcEncCfg      (nullptr)
70
20.7k
  , m_pcTrQuant     (nullptr)
71
20.7k
  , m_pcRdCost      (nullptr)
72
20.7k
  , m_CABACEstimator(nullptr)
73
20.7k
  , m_CtxCache      (nullptr)
74
20.7k
{
75
20.7k
}
76
77
void IntraSearch::init(const VVEncCfg &encCfg, TrQuant *pTrQuant, RdCost *pRdCost, SortedPelUnitBufs<SORTED_BUFS> *pSortedPelUnitBufs, XUCache &unitCache )
78
20.7k
{
79
20.7k
  IntraPrediction::init( encCfg.m_internChromaFormat, encCfg.m_internalBitDepth[ CH_L ] );
80
81
20.7k
  m_pcEncCfg          = &encCfg;
82
20.7k
  m_pcTrQuant         = pTrQuant;
83
20.7k
  m_pcRdCost          = pRdCost;
84
20.7k
  m_SortedPelUnitBufs = pSortedPelUnitBufs;
85
86
20.7k
  const ChromaFormat chrFormat = encCfg.m_internChromaFormat;
87
20.7k
  const int maxCUSize          = encCfg.m_CTUSize;
88
89
20.7k
  Area area = Area( 0, 0, maxCUSize, maxCUSize );
90
91
20.7k
  m_pTempCS = new CodingStructure( unitCache, nullptr );
92
20.7k
  m_pBestCS = new CodingStructure( unitCache, nullptr );
93
94
20.7k
  m_pTempCS->createForSearch( chrFormat, area );
95
20.7k
  m_pBestCS->createForSearch( chrFormat, area );
96
97
20.7k
  const int uiNumSaveLayersToAllocate = 3;
98
20.7k
  m_pSaveCS = new CodingStructure*[uiNumSaveLayersToAllocate];
99
83.0k
  for( int layer = 0; layer < uiNumSaveLayersToAllocate; layer++ )
100
62.3k
  {
101
62.3k
    m_pSaveCS[ layer ] = new CodingStructure( unitCache, nullptr );
102
62.3k
    m_pSaveCS[ layer ]->createForSearch( chrFormat, Area( 0, 0, maxCUSize, maxCUSize ) );
103
62.3k
    m_pSaveCS[ layer ]->initStructData();
104
62.3k
  }
105
106
20.7k
  CompArea chromaArea( COMP_Cb, chrFormat, area, true );
107
124k
  for( int i = 0; i < 5; i++ )
108
103k
  {
109
103k
    m_orgResiCb[i].create( chromaArea );
110
103k
    m_orgResiCr[i].create( chromaArea );
111
103k
  }
112
20.7k
}
113
114
void IntraSearch::destroy()
115
20.7k
{
116
20.7k
  if ( m_pSaveCS )
117
20.7k
  {
118
20.7k
    const int uiNumSaveLayersToAllocate = 3;
119
83.0k
    for( int layer = 0; layer < uiNumSaveLayersToAllocate; layer++ )
120
62.3k
    {
121
62.3k
      if ( m_pSaveCS[ layer ] ) { m_pSaveCS[ layer ]->destroy(); delete m_pSaveCS[ layer ]; }
122
62.3k
    }
123
20.7k
    delete[] m_pSaveCS;
124
20.7k
    m_pSaveCS = nullptr;
125
20.7k
  }
126
127
20.7k
  if( m_pTempCS )
128
20.7k
  {
129
20.7k
    m_pTempCS->destroy();
130
20.7k
    delete m_pTempCS; m_pTempCS = nullptr;
131
20.7k
  }
132
133
20.7k
  if( m_pBestCS )
134
20.7k
  {
135
20.7k
    m_pBestCS->destroy();
136
20.7k
    delete m_pBestCS; m_pBestCS = nullptr;
137
20.7k
  }
138
20.7k
}
139
140
IntraSearch::~IntraSearch()
141
20.7k
{
142
20.7k
  destroy();
143
20.7k
}
144
145
void IntraSearch::setCtuEncRsrc( CABACWriter* cabacEstimator, CtxCache *ctxCache )
146
4.05k
{
147
4.05k
  m_CABACEstimator = cabacEstimator;
148
4.05k
  m_CtxCache       = ctxCache;
149
4.05k
}
150
151
//////////////////////////////////////////////////////////////////////////
152
// INTRA PREDICTION
153
//////////////////////////////////////////////////////////////////////////
154
static constexpr double COST_UNKNOWN = -65536.0;
155
156
double IntraSearch::xFindInterCUCost( CodingUnit &cu )
157
27.1k
{
158
27.1k
  if( CU::isConsIntra(cu) && !cu.slice->isIntra() )
159
0
  {
160
    //search corresponding inter CU cost
161
0
    for( int i = 0; i < m_numCuInSCIPU; i++ )
162
0
    {
163
0
      if( cu.lumaPos() == m_cuAreaInSCIPU[i].pos() && cu.lumaSize() == m_cuAreaInSCIPU[i].size() )
164
0
      {
165
0
        return m_cuCostInSCIPU[i];
166
0
      }
167
0
    }
168
0
  }
169
27.1k
  return COST_UNKNOWN;
170
27.1k
}
171
172
void IntraSearch::xEstimateLumaRdModeList(int& numModesForFullRD,
173
  static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM>& RdModeList,
174
  static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM>& HadModeList,
175
  static_vector<double, FAST_UDI_MAX_RDMODE_NUM>& CandCostList,
176
  static_vector<double, FAST_UDI_MAX_RDMODE_NUM>& CandHadList, CodingUnit& cu, bool testMip )
177
27.1k
{
178
27.1k
  PROFILER_SCOPE_AND_STAGE_EXT( 1, _TPROF, P_INTRA_EST_RD_CAND, cu.cs, CH_L );
179
27.1k
  const uint16_t intra_ctx_size = Ctx::IntraLumaMpmFlag.size() + Ctx::IntraLumaPlanarFlag.size() + Ctx::MultiRefLineIdx.size() + Ctx::ISPMode.size() + Ctx::MipFlag.size();
180
27.1k
  const TempCtx  ctxStartIntraCtx(m_CtxCache, SubCtx(CtxSet(Ctx::IntraLumaMpmFlag(), intra_ctx_size), m_CABACEstimator->getCtx()));
181
27.1k
  const double   sqrtLambdaForFirstPass = m_pcRdCost->getMotionLambda() * FRAC_BITS_SCALE;
182
27.1k
  const int numModesAvailable = NUM_LUMA_MODE; // total number of Intra modes
183
184
27.1k
  CHECK(numModesForFullRD >= numModesAvailable, "Too many modes for full RD search");
185
186
27.1k
  const SPS& sps     = *cu.cs->sps;
187
27.1k
  const bool fastMip = sps.MIP && m_pcEncCfg->m_useFastMIP;
188
189
  // this should always be true
190
27.1k
  CHECK( !cu.Y().valid(), "CU is not valid" );
191
192
27.1k
  const CompArea& area = cu.Y();
193
194
27.1k
  const UnitArea localUnitArea(area.chromaFormat, Area(0, 0, area.width, area.height));
195
27.1k
  if( testMip)
196
20.7k
  {
197
20.7k
    numModesForFullRD += fastMip ? numModesForFullRD - std::min( m_pcEncCfg->m_useFastMIP, numModesForFullRD )
198
20.7k
                                 : numModesForFullRD;
199
20.7k
    m_SortedPelUnitBufs->prepare( localUnitArea, numModesForFullRD + 1 );
200
20.7k
  }
201
6.39k
  else
202
6.39k
  {
203
6.39k
    m_SortedPelUnitBufs->prepare( localUnitArea, numModesForFullRD );
204
6.39k
  }
205
206
27.1k
  CPelBuf piOrg   = cu.cs->getOrgBuf(COMP_Y);
207
27.1k
  PelBuf piPred  = m_SortedPelUnitBufs->getTestBuf(COMP_Y);
208
209
27.1k
  const ReshapeData& reshapeData = cu.cs->picture->reshapeData;
210
27.1k
  if (cu.cs->picHeader->lmcsEnabled && reshapeData.getCTUFlag())
211
0
  {
212
0
    piOrg = cu.cs->getRspOrgBuf();
213
0
  }
214
27.1k
  DistParam distParam    = m_pcRdCost->setDistParam( piOrg, piPred, sps.bitDepths[ CH_L ], DF_HAD_2SAD); // Use HAD (SATD) cost
215
216
27.1k
  const int numHadCand = (testMip ? 2 : 1) * 3;
217
218
  //*** Derive (regular) candidates using Hadamard
219
27.1k
  cu.mipFlag = false;
220
27.1k
  cu.multiRefIdx = 0;
221
222
  //===== init pattern for luma prediction =====
223
27.1k
  initIntraPatternChType(cu, cu.Y(), true);
224
225
27.1k
  bool satdChecked[NUM_INTRA_MODE] = { false };
226
227
27.1k
  unsigned mpmLst[NUM_MOST_PROBABLE_MODES];
228
27.1k
  CU::getIntraMPMs(cu, mpmLst);
229
230
27.1k
  const int decMsk = ( 1 << m_pcEncCfg->m_IntraEstDecBit ) - 1;
231
232
27.1k
  m_parentCandList.resize( 0 );
233
27.1k
  m_parentCandList.reserve( ( numModesAvailable >> m_pcEncCfg->m_IntraEstDecBit ) + 2 );
234
235
1.84M
  for( unsigned mode = 0; mode < numModesAvailable; mode++ )
236
1.81M
  {
237
    // Skip checking extended Angular modes in the first round of SATD
238
1.81M
    if( mode > DC_IDX && ( mode & decMsk ) )
239
1.33M
    {
240
1.33M
      continue;
241
1.33M
    }
242
243
488k
    m_parentCandList.push_back( ModeInfo( false, false, 0, NOT_INTRA_SUBPARTITIONS, mode ) );
244
488k
  }
245
   
246
108k
  for( int decDst = 1 << m_pcEncCfg->m_IntraEstDecBit; decDst > 0; decDst >>= 1 )
247
81.4k
  {
248
733k
    for( unsigned idx = 0; idx < m_parentCandList.size(); idx++ )
249
651k
    {
250
651k
      int modeParent = m_parentCandList[idx].modeId;
251
252
651k
      int off = decDst & decMsk;
253
651k
      int inc = decDst << 1;
254
255
651k
#if 1 // INTRA_AS_IN_VTM
256
651k
      if( off != 0 && ( modeParent <= ( DC_IDX + 1 ) || modeParent >= ( NUM_LUMA_MODE - 1 ) ) )
257
105k
      {
258
105k
        continue;
259
105k
      }
260
261
545k
#endif
262
1.14M
      for( int mode = modeParent - off; mode < modeParent + off + 1; mode += inc )
263
602k
      {
264
602k
        if( satdChecked[mode] || mode < 0 || mode >= NUM_LUMA_MODE )
265
2.57k
        {
266
2.57k
          continue;
267
2.57k
        }
268
269
600k
        cu.intraDir[0] = mode;
270
271
600k
        initPredIntraParams( cu, cu.Y(), sps );
272
600k
        distParam.cur.buf = piPred.buf = m_SortedPelUnitBufs->getTestBuf().Y().buf;
273
600k
        predIntraAng( COMP_Y, piPred, cu );
274
275
        // Use the min between SAD and HAD as the cost criterion
276
        // SAD is scaled by 2 to align with the scaling of HAD
277
600k
        Distortion minSadHad = distParam.distFunc( distParam );
278
279
600k
        uint64_t fracModeBits = xFracModeBitsIntraLuma( cu, mpmLst );
280
281
        //restore ctx
282
600k
        m_CABACEstimator->getCtx() = SubCtx( CtxSet( Ctx::IntraLumaMpmFlag(), intra_ctx_size ), ctxStartIntraCtx );
283
284
600k
        double cost = ( double ) minSadHad + ( double ) fracModeBits * sqrtLambdaForFirstPass;
285
600k
        DTRACE( g_trace_ctx, D_INTRA_COST, "IntraHAD: %u, %llu, %f (%d)\n", minSadHad, fracModeBits, cost, mode );
286
287
600k
        int insertPos = -1;
288
600k
        updateCandList( ModeInfo( false, false, 0, NOT_INTRA_SUBPARTITIONS, mode ), cost, RdModeList, CandCostList, numModesForFullRD, &insertPos );
289
600k
        updateCandList( ModeInfo( false, false, 0, NOT_INTRA_SUBPARTITIONS, mode ), ( double ) minSadHad, HadModeList, CandHadList, numHadCand );
290
600k
        m_SortedPelUnitBufs->insert( insertPos, ( int ) RdModeList.size() );
291
292
600k
        satdChecked[mode] = true;
293
600k
      }
294
545k
    }
295
296
81.4k
    m_parentCandList.resize( RdModeList.size() );
297
81.4k
    std::copy( RdModeList.cbegin(), RdModeList.cend(), m_parentCandList.begin() );
298
81.4k
  }
299
300
27.1k
  const bool isFirstLineOfCtu = (((cu.block(COMP_Y).y)&((cu.cs->sps)->CTUSize - 1)) == 0);
301
27.1k
  if( m_pcEncCfg->m_MRL && ! isFirstLineOfCtu )
302
16.5k
  {
303
16.5k
    cu.multiRefIdx = 1;
304
16.5k
    unsigned  multiRefMPM [NUM_MOST_PROBABLE_MODES];
305
16.5k
    CU::getIntraMPMs(cu, multiRefMPM);
306
307
49.5k
    for (int mRefNum = 1; mRefNum < MRL_NUM_REF_LINES; mRefNum++)
308
33.0k
    {
309
33.0k
      int multiRefIdx = MULTI_REF_LINE_IDX[mRefNum];
310
311
33.0k
      cu.multiRefIdx = multiRefIdx;
312
33.0k
      initIntraPatternChType(cu, cu.Y(), true);
313
314
198k
      for (int x = 1; x < NUM_MOST_PROBABLE_MODES; x++)
315
165k
      {
316
165k
        cu.intraDir[0] = multiRefMPM[x];
317
165k
        initPredIntraParams(cu, cu.Y(), sps);
318
165k
        distParam.cur.buf = piPred.buf = m_SortedPelUnitBufs->getTestBuf().Y().buf;
319
165k
        predIntraAng(COMP_Y, piPred, cu);
320
321
        // Use the min between SAD and SATD as the cost criterion
322
        // SAD is scaled by 2 to align with the scaling of HAD
323
165k
        Distortion minSadHad = distParam.distFunc(distParam);
324
325
        // NB xFracModeBitsIntra will not affect the mode for chroma that may have already been pre-estimated.
326
165k
        uint64_t fracModeBits = xFracModeBitsIntraLuma( cu, mpmLst );
327
328
        //restore ctx
329
165k
        m_CABACEstimator->getCtx() = SubCtx(CtxSet(Ctx::IntraLumaMpmFlag(), intra_ctx_size), ctxStartIntraCtx);
330
331
165k
        double cost = (double) minSadHad + (double) fracModeBits * sqrtLambdaForFirstPass;
332
//        DTRACE(g_trace_ctx, D_INTRA_COST, "IntraMRL: %u, %llu, %f (%d)\n", minSadHad, fracModeBits, cost, cu.intraDir[0]);
333
334
165k
        int insertPos = -1;
335
165k
        updateCandList( ModeInfo( false, false, multiRefIdx, NOT_INTRA_SUBPARTITIONS, cu.intraDir[0] ), cost, RdModeList,  CandCostList, numModesForFullRD, &insertPos );
336
165k
        updateCandList( ModeInfo( false, false, multiRefIdx, NOT_INTRA_SUBPARTITIONS, cu.intraDir[0] ), (double)minSadHad, HadModeList, CandHadList,  numHadCand );
337
165k
        m_SortedPelUnitBufs->insert(insertPos, (int)RdModeList.size());
338
165k
      }
339
33.0k
    }
340
16.5k
    cu.multiRefIdx = 0;
341
16.5k
  }
342
343
27.1k
  if (testMip)
344
20.7k
  {
345
20.7k
    cu.mipFlag = true;
346
20.7k
    cu.multiRefIdx = 0;
347
348
20.7k
    double mipHadCost[MAX_NUM_MIP_MODE] = { MAX_DOUBLE };
349
350
20.7k
    initIntraPatternChType(cu, cu.Y());
351
20.7k
    initIntraMip( cu );
352
353
20.7k
    const int transpOff    = getNumModesMip( cu.Y() );
354
20.7k
    const int numModesFull = (transpOff << 1);
355
271k
    for( uint32_t uiModeFull = 0; uiModeFull < numModesFull; uiModeFull++ )
356
250k
    {
357
250k
      const bool     isTransposed = (uiModeFull >= transpOff ? true : false);
358
250k
      const uint32_t uiMode       = (isTransposed ? uiModeFull - transpOff : uiModeFull);
359
360
250k
      cu.mipTransposedFlag = isTransposed;
361
250k
      cu.intraDir[CH_L] = uiMode;
362
250k
      distParam.cur.buf = piPred.buf = m_SortedPelUnitBufs->getTestBuf().Y().buf;
363
250k
      predIntraMip(piPred, cu);
364
365
      // Use the min between SAD and HAD as the cost criterion
366
      // SAD is scaled by 2 to align with the scaling of HAD
367
250k
      Distortion minSadHad = distParam.distFunc(distParam);
368
369
250k
      uint64_t fracModeBits = xFracModeBitsIntraLuma( cu, mpmLst );
370
371
      //restore ctx
372
250k
      m_CABACEstimator->getCtx() = SubCtx(CtxSet(Ctx::IntraLumaMpmFlag(), intra_ctx_size), ctxStartIntraCtx);
373
374
250k
      double cost = double(minSadHad) + double(fracModeBits) * sqrtLambdaForFirstPass;
375
250k
      mipHadCost[uiModeFull] = cost;
376
250k
      DTRACE(g_trace_ctx, D_INTRA_COST, "IntraMIP: %u, %llu, %f (%d)\n", minSadHad, fracModeBits, cost, uiModeFull);
377
378
250k
      int insertPos = -1;
379
250k
      updateCandList( ModeInfo( true, isTransposed, 0, NOT_INTRA_SUBPARTITIONS, cu.intraDir[0] ), cost, RdModeList,  CandCostList, numModesForFullRD+1, &insertPos );
380
250k
      updateCandList( ModeInfo( true, isTransposed, 0, NOT_INTRA_SUBPARTITIONS, cu.intraDir[0] ), 0.8*(double)minSadHad, HadModeList, CandHadList,  numHadCand );
381
250k
      m_SortedPelUnitBufs->insert(insertPos, (int)RdModeList.size());
382
250k
    }
383
384
20.7k
    const double thresholdHadCost = 1.0 + 1.4 / sqrt((double)(cu.lwidth()*cu.lheight()));
385
20.7k
    xReduceHadCandList(RdModeList, CandCostList, *m_SortedPelUnitBufs, numModesForFullRD, thresholdHadCost, mipHadCost, cu, fastMip);
386
20.7k
  }
387
388
27.1k
  if( m_pcEncCfg->m_bFastUDIUseMPMEnabled )
389
27.1k
  {
390
27.1k
    const int numMPMs = NUM_MOST_PROBABLE_MODES;
391
27.1k
    unsigned  intraMpms[numMPMs];
392
393
27.1k
    cu.multiRefIdx = 0;
394
395
27.1k
    const int numCand = CU::getIntraMPMs( cu, intraMpms );
396
27.1k
    ModeInfo mostProbableMode(false, false, 0, NOT_INTRA_SUBPARTITIONS, 0);
397
398
55.1k
    for( int j = 0; j < numCand; j++ )
399
27.9k
    {
400
27.9k
      bool mostProbableModeIncluded = false;
401
27.9k
      mostProbableMode.modeId = intraMpms[j];
402
403
142k
      for( int i = 0; i < numModesForFullRD; i++ )
404
114k
      {
405
114k
        mostProbableModeIncluded |= ( mostProbableMode == RdModeList[i] );
406
114k
      }
407
27.9k
      if( !mostProbableModeIncluded )
408
187
      {
409
187
        numModesForFullRD++;
410
187
        RdModeList.push_back( mostProbableMode );
411
187
        CandCostList.push_back(0);
412
187
      }
413
27.9k
    }
414
27.1k
  }
415
27.1k
}
416
417
bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, double bestCost)
418
27.1k
{
419
27.1k
  CodingStructure       &cs           = *cu.cs;
420
27.1k
  const int             width         = partitioner.currArea().lwidth();
421
27.1k
  const int             height        = partitioner.currArea().lheight();
422
423
  //===== loop over partitions =====
424
425
27.1k
  const TempCtx ctxStart           ( m_CtxCache, m_CABACEstimator->getCtx() );
426
427
  // variables for saving fast intra modes scan results across multiple LFNST passes
428
27.1k
  double costInterCU = xFindInterCUCost( cu );
429
430
27.1k
  bool validReturn = false;
431
432
  //===== determine set of modes to be tested (using prediction signal only) =====
433
27.1k
  int numModesAvailable = NUM_LUMA_MODE; // total number of Intra modes
434
27.1k
  static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM> RdModeList;
435
27.1k
  static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM> HadModeList;
436
27.1k
  static_vector<double, FAST_UDI_MAX_RDMODE_NUM> CandCostList;
437
27.1k
  static_vector<double, FAST_UDI_MAX_RDMODE_NUM> CandHadList;
438
439
27.1k
  int numModesForFullRD = g_aucIntraModeNumFast_UseMPM_2D[Log2(width) - MIN_CU_LOG2][Log2(height) - MIN_CU_LOG2];
440
27.1k
  if (m_pcEncCfg->m_numIntraModesFullRD > 0)
441
0
    numModesForFullRD=m_pcEncCfg->m_numIntraModesFullRD;
442
443
#if INTRA_FULL_SEARCH
444
  numModesForFullRD = numModesAvailable;
445
#endif
446
27.1k
  const SPS& sps = *cu.cs->sps;
447
27.1k
  const bool mipAllowed = sps.MIP && cu.lwidth() <= sps.getMaxTbSize() && cu.lheight() <= sps.getMaxTbSize() && ((cu.lfnstIdx == 0) || allowLfnstWithMip(cu.lumaSize()));
448
27.1k
  const int SizeThr     = 8 >> std::max( 0, m_pcEncCfg->m_useFastMIP - 1 );
449
27.1k
  const bool testMip    = mipAllowed && ( cu.lwidth() <= ( SizeThr * cu.lheight() ) && cu.lheight() <= ( SizeThr * cu.lwidth() ) ) && ( cu.lwidth() <= MIP_MAX_WIDTH && cu.lheight() <= MIP_MAX_HEIGHT );
450
27.1k
  bool testISP = sps.ISP && CU::canUseISP(width, height, cu.cs->sps->getMaxTbSize());
451
27.1k
  if (testISP)
452
27.1k
  {
453
27.1k
    int numTotalPartsHor = (int)width >> floorLog2(CU::getISPSplitDim(width, height, TU_1D_VERT_SPLIT));
454
27.1k
    int numTotalPartsVer = (int)height >> floorLog2(CU::getISPSplitDim(width, height, TU_1D_HORZ_SPLIT));
455
27.1k
    m_ispTestedModes[0].init(numTotalPartsHor, numTotalPartsVer, 0);
456
    // the total number of subpartitions is modified to take into account the cases where LFNST cannot be combined with
457
    // ISP due to size restrictions
458
27.1k
    numTotalPartsHor = sps.LFNST && CU::canUseLfnstWithISP(cu.Y(), HOR_INTRA_SUBPARTITIONS) ? numTotalPartsHor : 0;
459
27.1k
    numTotalPartsVer = sps.LFNST && CU::canUseLfnstWithISP(cu.Y(), VER_INTRA_SUBPARTITIONS) ? numTotalPartsVer : 0;
460
81.4k
    for (int j = 1; j < NUM_LFNST_NUM_PER_SET; j++)
461
54.3k
    {
462
54.3k
      m_ispTestedModes[j].init(numTotalPartsHor, numTotalPartsVer, 0);
463
54.3k
    }
464
27.1k
    testISP = m_ispTestedModes[0].numTotalParts[0];
465
27.1k
  }
466
0
  else
467
0
  {
468
0
    m_ispTestedModes[0].init(0, 0, 0);
469
0
  }
470
471
27.1k
  xEstimateLumaRdModeList(numModesForFullRD, RdModeList, HadModeList, CandCostList, CandHadList, cu, testMip);
472
473
27.1k
  CHECK( (size_t)numModesForFullRD != RdModeList.size(), "Inconsistent state!" );
474
475
  // after this point, don't use numModesForFullRD
476
27.1k
  if( m_pcEncCfg->m_usePbIntraFast && !cs.slice->isIntra() && RdModeList.size() < numModesAvailable )
477
0
  {
478
0
    double pbintraRatio = m_pcEncCfg->m_usePbIntraFast == 1 && ( cs.area.lwidth() >= 16 && cs.area.lheight() >= 16 ) ? 1.2 : PBINTRA_RATIO;
479
480
0
    int maxSize = -1;
481
0
    ModeInfo bestMipMode;
482
0
    int bestMipIdx = -1;
483
0
    for( int idx = 0; idx < RdModeList.size(); idx++ )
484
0
    {
485
0
      if( RdModeList[idx].mipFlg )
486
0
      {
487
0
        bestMipMode = RdModeList[idx];
488
0
        bestMipIdx = idx;
489
0
        break;
490
0
      }
491
0
    }
492
0
    const int numHadCand = 3;
493
0
    for (int k = numHadCand - 1; k >= 0; k--)
494
0
    {
495
0
      if (CandHadList.size() < (k + 1) || CandHadList[k] > cs.interHad * pbintraRatio) { maxSize = k; }
496
0
    }
497
0
    if (maxSize > 0)
498
0
    {
499
0
      RdModeList.resize(std::min<size_t>(RdModeList.size(), maxSize));
500
0
      if( bestMipIdx >= 0 )
501
0
      {
502
0
        if( RdModeList.size() <= bestMipIdx )
503
0
        {
504
0
          RdModeList.push_back(bestMipMode);
505
0
          m_SortedPelUnitBufs->swap( maxSize, bestMipIdx );
506
0
        }
507
0
      }
508
0
    }
509
0
    if (maxSize == 0)
510
0
    {
511
0
      cs.dist = MAX_DISTORTION;
512
0
      cs.interHad = 0;
513
0
      return false;
514
0
    }
515
0
  }
516
517
  //===== check modes (using r-d costs) =====
518
27.1k
  ModeInfo bestPUMode;
519
520
27.1k
  CodingStructure *csTemp = m_pTempCS;
521
27.1k
  CodingStructure *csBest = m_pBestCS;
522
523
27.1k
  csTemp->slice   = csBest->slice   = cs.slice;
524
27.1k
  csTemp->picture = csBest->picture = cs.picture;
525
27.1k
  csTemp->compactResize( cu );
526
27.1k
  csBest->compactResize( cu );
527
27.1k
  csTemp->initStructData();
528
27.1k
  csBest->initStructData();
529
530
27.1k
  int   bestLfnstIdx  = 0;
531
27.1k
  const bool useBDPCM = cs.picture->useBDPCM;
532
27.1k
  int   NumBDPCMCand  = (useBDPCM && sps.BDPCM && CU::bdpcmAllowed(cu, ComponentID(partitioner.chType))) ? 2 : 0;
533
27.1k
  int   bestbdpcmMode = 0;
534
27.1k
  int   bestISP       = 0;
535
27.1k
  int   bestMrl       = 0;
536
27.1k
  bool  bestMip       = 0;
537
27.1k
  int   EndMode       = (int)RdModeList.size();
538
27.1k
  bool  useISPlfnst   = testISP && sps.LFNST;
539
27.1k
  bool  noLFNST_ts    = false;
540
27.1k
  double bestCostIsp[2] = { MAX_DOUBLE, MAX_DOUBLE };
541
27.1k
  bool disableMTS = false;
542
27.1k
  bool disableLFNST = false;
543
27.1k
  bool disableDCT2test = false;
544
27.1k
  if (m_pcEncCfg->m_FastIntraTools)
545
27.1k
  {
546
27.1k
    int speedIntra = 0;
547
27.1k
    xSpeedUpIntra(bestCost, EndMode, speedIntra, cu);
548
27.1k
    disableMTS = (speedIntra >> 2 ) & 0x1;
549
27.1k
    disableLFNST = (speedIntra >> 1) & 0x1;
550
27.1k
    disableDCT2test = speedIntra>>3;
551
27.1k
    if (disableLFNST)
552
24.3k
    {
553
24.3k
      noLFNST_ts = true;
554
24.3k
      useISPlfnst = false;
555
24.3k
    }
556
27.1k
    if (speedIntra & 0x1)
557
24.3k
    {
558
24.3k
      testISP = false;
559
24.3k
    }
560
27.1k
  }
561
562
145k
  for (int mode_cur = 0; mode_cur < EndMode + NumBDPCMCand; mode_cur++)
563
118k
  {
564
118k
    int mode = mode_cur;
565
118k
    if (mode_cur >= EndMode)
566
7.68k
    {
567
7.68k
      mode = mode_cur - EndMode ? -1 : -2;
568
7.68k
      testISP = false;
569
7.68k
    }
570
    // set CU/PU to luma prediction mode
571
118k
    ModeInfo testMode;
572
118k
    int noISP = 0;
573
118k
    int endISP = testISP ? 2 : 0;
574
118k
    bool noLFNST = false || noLFNST_ts;
575
118k
    if (mode && useISPlfnst)
576
9.36k
    {
577
9.36k
      noLFNST |= (bestCostIsp[0] > (bestCostIsp[1] * 1.4));
578
9.36k
      if (mode > 2)
579
2.53k
      {
580
2.53k
        endISP = 0;
581
2.53k
        testISP = false;
582
2.53k
      }
583
9.36k
    }
584
118k
    if (testISP)
585
5.95k
    {
586
5.95k
      xSpeedUpISP(1, testISP, mode, noISP, endISP, cu, RdModeList, bestPUMode, bestISP, bestLfnstIdx);
587
5.95k
    }
588
118k
    int startISP = 0;
589
118k
    if (disableDCT2test && mode && bestISP)
590
0
    {
591
0
      startISP = endISP ? 1 : 0;
592
0
    }
593
246k
    for (int ispM = startISP; ispM <= endISP; ispM++)
594
127k
    {
595
127k
      if (ispM && (ispM == noISP))
596
51
      {
597
51
        continue;
598
51
      }
599
600
127k
      if (mode < 0)
601
7.68k
      {
602
7.68k
        cu.bdpcmM[CH_L] = -mode;
603
7.68k
        testMode = ModeInfo(false, false, 0, NOT_INTRA_SUBPARTITIONS, cu.bdpcmM[CH_L] == 2 ? VER_IDX : HOR_IDX);
604
7.68k
      }
605
119k
      else
606
119k
      {
607
119k
        testMode = RdModeList[mode];
608
119k
        cu.bdpcmM[CH_L] = 0;
609
119k
      }
610
611
127k
      cu.ispMode = ispM;
612
127k
      cu.mipFlag = testMode.mipFlg;
613
127k
      cu.mipTransposedFlag = testMode.mipTrFlg;
614
127k
      cu.multiRefIdx = testMode.mRefId;
615
127k
      cu.intraDir[CH_L] = testMode.modeId;
616
127k
      if (cu.ispMode && xSpeedUpISP(0, testISP, mode, noISP, endISP, cu, RdModeList, bestPUMode, bestISP, 0) )
617
3.11k
      {
618
3.11k
        continue;
619
3.11k
      }
620
124k
      if (m_pcEncCfg->m_FastIntraTools && (cu.ispMode || sps.LFNST || sps.MTS))
621
124k
      {
622
124k
        m_ispTestedModes[0].intraWasTested = true;
623
124k
      }
624
124k
      CHECK(cu.mipFlag && cu.multiRefIdx, "Error: combination of MIP and MRL not supported");
625
124k
      CHECK(cu.multiRefIdx && (cu.intraDir[0] == PLANAR_IDX), "Error: combination of MRL and Planar mode not supported");
626
124k
      CHECK(cu.ispMode && cu.mipFlag, "Error: combination of ISP and MIP not supported");
627
124k
      CHECK(cu.ispMode && cu.multiRefIdx, "Error: combination of ISP and MRL not supported");
628
629
      // determine residual for partition
630
124k
      cs.initSubStructure(*csTemp, partitioner.chType, cs.area, true);
631
124k
      int doISP = (((cu.ispMode == 0) && noLFNST) || (useISPlfnst && mode && cu.ispMode && (bestLfnstIdx == 0)) || disableLFNST) ? -mode : mode;
632
124k
      xIntraCodingLumaQT(*csTemp, partitioner, m_SortedPelUnitBufs->getBufFromSortedList(mode), bestCost, doISP, disableMTS);
633
634
124k
      DTRACE(g_trace_ctx, D_INTRA_COST, "IntraCost T [x=%d,y=%d,w=%d,h=%d] %f (%d,%d,%d,%d,%d,%d) \n", cu.blocks[0].x,
635
124k
        cu.blocks[0].y, width, height, csTemp->cost, testMode.modeId, testMode.ispMod,
636
124k
        cu.multiRefIdx, cu.mipFlag, cu.lfnstIdx, cu.mtsFlag);
637
638
124k
      if (cu.ispMode && !csTemp->cus[0]->firstTU->cbf[COMP_Y])
639
1.91k
      {
640
1.91k
        csTemp->cost = MAX_DOUBLE;
641
1.91k
        csTemp->costDbOffset = 0;
642
1.91k
      }
643
124k
      if (useISPlfnst)
644
17.8k
      {
645
17.8k
        int n = (cu.ispMode == 0) ? 0 : 1;
646
17.8k
        bestCostIsp[n] = csTemp->cost < bestCostIsp[n] ? csTemp->cost : bestCostIsp[n];
647
17.8k
      }
648
649
      // check r-d cost
650
124k
      if (csTemp->cost < csBest->cost)
651
34.2k
      {
652
34.2k
        validReturn   = true;
653
34.2k
        std::swap(csTemp, csBest);
654
34.2k
        bestPUMode    = testMode;
655
34.2k
        bestLfnstIdx  = csBest->cus[0]->lfnstIdx;
656
34.2k
        bestISP       = csBest->cus[0]->ispMode;
657
34.2k
        bestMip       = csBest->cus[0]->mipFlag;
658
34.2k
        bestMrl       = csBest->cus[0]->multiRefIdx;
659
34.2k
        bestbdpcmMode = cu.bdpcmM[CH_L];
660
34.2k
        m_ispTestedModes[bestLfnstIdx].bestSplitSoFar = ISPType(bestISP);
661
34.2k
        if (csBest->cost < bestCost)
662
34.2k
        {
663
34.2k
          bestCost = csBest->cost;
664
34.2k
        }
665
34.2k
        if ((csBest->getTU(partitioner.chType)->mtsIdx[COMP_Y] == MTS_SKIP) && ( floorLog2(csBest->getTU(partitioner.chType)->blocks[COMP_Y].area()) >= 6 ))
666
4.62k
        {
667
4.62k
          noLFNST_ts = 1;
668
4.62k
        }
669
34.2k
      }
670
671
      // reset context models
672
124k
      m_CABACEstimator->getCtx() = ctxStart;
673
674
124k
      csTemp->releaseIntermediateData();
675
676
124k
      if (m_pcEncCfg->m_fastLocalDualTreeMode && CU::isConsIntra(cu) && !cu.slice->isIntra() && csBest->cost != MAX_DOUBLE && costInterCU != COST_UNKNOWN && mode >= 0)
677
0
      {
678
0
        if( (m_pcEncCfg->m_fastLocalDualTreeMode == 2) || (csBest->cost > costInterCU * 1.5))
679
0
        {
680
          //Note: only try one intra mode, which is especially useful to reduce EncT for LDB case (around 4%)
681
0
          EndMode = 0;
682
0
          break;
683
0
        }
684
0
      }
685
124k
    }
686
118k
  } // Mode loop
687
688
27.1k
  if (m_pcEncCfg->m_FastIntraTools && (sps.ISP|| sps.LFNST || sps.MTS))
689
27.1k
  {
690
27.1k
    int bestMode = csBest->getTU(partitioner.chType)->mtsIdx[COMP_Y] ? 4 : 0;
691
27.1k
    bestMode |= bestLfnstIdx ? 2 : 0;
692
27.1k
    bestMode |= bestISP ? 1 : 0;
693
27.1k
    m_ispTestedModes[0].bestIntraMode = bestMode;
694
27.1k
  }
695
27.1k
  cu.ispMode = bestISP;
696
27.1k
  if( validReturn )
697
27.1k
  {
698
27.1k
    cs.useSubStructure( *csBest, partitioner.chType, TREE_D, cu.singleChan( CH_L ), true );
699
27.1k
    const ReshapeData& reshapeData = cs.picture->reshapeData;
700
27.1k
    if (cs.picHeader->lmcsEnabled && reshapeData.getCTUFlag())
701
0
    {
702
0
      cs.getRspRecoBuf().copyFrom(csBest->getRspRecoBuf());
703
0
    }
704
705
    //=== update PU data ====
706
27.1k
    cu.lfnstIdx           = bestLfnstIdx;
707
27.1k
    cu.mipTransposedFlag  = bestPUMode.mipTrFlg;
708
27.1k
    cu.intraDir[CH_L]     = bestPUMode.modeId;
709
27.1k
    cu.bdpcmM[CH_L]       = bestbdpcmMode;
710
27.1k
    cu.mipFlag            = bestMip;
711
27.1k
    cu.multiRefIdx        = bestMrl;
712
27.1k
  }
713
0
  else
714
0
  {
715
0
    THROW("fix this");
716
0
  }
717
718
27.1k
  csBest->releaseIntermediateData();
719
720
27.1k
  return validReturn;
721
27.1k
}
722
723
void IntraSearch::estIntraPredChromaQT( CodingUnit& cu, Partitioner& partitioner, const double maxCostAllowed )
724
60.9k
{
725
60.9k
  PROFILER_SCOPE_AND_STAGE_EXT( 0, _TPROF, P_INTRA_CHROMA, cu.cs, CH_C );
726
60.9k
  const TempCtx ctxStart( m_CtxCache, m_CABACEstimator->getCtx() );
727
60.9k
  CodingStructure &cs   = *cu.cs;
728
60.9k
  bool lumaUsesISP      = !CU::isSepTree(cu) && cu.ispMode;
729
60.9k
  PartSplit ispType     = lumaUsesISP ? CU::getISPType(cu, COMP_Y) : TU_NO_ISP;
730
60.9k
  double bestCostSoFar  = maxCostAllowed;
731
60.9k
  const uint32_t numberValidComponents = getNumberValidComponents( cu.chromaFormat );
732
60.9k
  const bool useBDPCM   = cs.picture->useBDPCM;
733
734
60.9k
  uint32_t   uiBestMode = 0;
735
60.9k
  Distortion uiBestDist = 0;
736
60.9k
  double     dBestCost  = MAX_DOUBLE;
737
738
  //----- init mode list ----
739
60.9k
  {
740
60.9k
    uint32_t  uiMinMode = 0;
741
60.9k
    uint32_t  uiMaxMode = NUM_CHROMA_MODE;
742
743
60.9k
    const int reducedModeNumber = uiMaxMode >> (m_pcEncCfg->m_reduceIntraChromaModesFullRD ? 1 : 2);
744
    //----- check chroma modes -----
745
60.9k
    uint32_t chromaCandModes[ NUM_CHROMA_MODE ];
746
60.9k
    CU::getIntraChromaCandModes( cu, chromaCandModes );
747
748
    // create a temporary CS
749
60.9k
    CodingStructure &saveCS = *m_pSaveCS[0];
750
60.9k
    saveCS.pcv      = cs.pcv;
751
60.9k
    saveCS.picture  = cs.picture;
752
60.9k
    saveCS.area.repositionTo( cs.area );
753
60.9k
    saveCS.clearTUs();
754
755
60.9k
    if( !CU::isSepTree(cu) && cu.ispMode )
756
0
    {
757
0
      saveCS.clearCUs();
758
0
    }
759
760
60.9k
    if( CU::isSepTree(cu) )
761
60.9k
    {
762
60.9k
      if( partitioner.canSplit( TU_MAX_TR_SPLIT, cs ) )
763
0
      {
764
0
        partitioner.splitCurrArea( TU_MAX_TR_SPLIT, cs );
765
766
0
        do
767
0
        {
768
0
          cs.addTU( CS::getArea( cs, partitioner.currArea(), partitioner.chType, partitioner.treeType ), partitioner.chType, &cu ).depth = partitioner.currTrDepth;
769
0
        } while( partitioner.nextPart( cs ) );
770
771
0
        partitioner.exitCurrSplit();
772
0
      }
773
60.9k
      else
774
60.9k
        cs.addTU( CS::getArea( cs, partitioner.currArea(), partitioner.chType, partitioner.treeType ), partitioner.chType, &cu );
775
60.9k
    }
776
777
    // create a store for the TUs
778
60.9k
    std::vector<TransformUnit*> orgTUs;
779
60.9k
    for( const auto &ptu : cs.tus )
780
60.9k
    {
781
      // for split TUs in HEVC, add the TUs without Chroma parts for correct setting of Cbfs
782
60.9k
      if (lumaUsesISP || cu.contains(*ptu, CH_C))
783
60.9k
      {
784
60.9k
        saveCS.addTU( *ptu, partitioner.chType, nullptr );
785
60.9k
        orgTUs.push_back( ptu );
786
60.9k
      }
787
60.9k
    }
788
789
    // SATD pre-selecting.
790
60.9k
    int     satdModeList  [NUM_CHROMA_MODE] = { 0 };
791
60.9k
    int64_t satdSortedCost[NUM_CHROMA_MODE] = { 0 };
792
60.9k
    bool    modeDisable[NUM_INTRA_MODE + 1] = { false }; // use intra mode idx to check whether enable
793
794
60.9k
    CodingStructure& cs = *(cu.cs);
795
60.9k
    CompArea areaCb = cu.Cb();
796
60.9k
    CompArea areaCr = cu.Cr();
797
60.9k
    CPelBuf orgCb  = cs.getOrgBuf (COMP_Cb);
798
60.9k
    PelBuf predCb  = cs.getPredBuf(COMP_Cb);
799
60.9k
    CPelBuf orgCr  = cs.getOrgBuf (COMP_Cr);
800
60.9k
    PelBuf predCr  = cs.getPredBuf(COMP_Cr);
801
802
60.9k
    DistParam distParamSadCb  = m_pcRdCost->setDistParam( orgCb, predCb, cu.cs->sps->bitDepths[ CH_C ], DF_SAD);
803
60.9k
    DistParam distParamSatdCb = m_pcRdCost->setDistParam( orgCb, predCb, cu.cs->sps->bitDepths[ CH_C ], DF_HAD);
804
60.9k
    DistParam distParamSadCr  = m_pcRdCost->setDistParam( orgCr, predCr, cu.cs->sps->bitDepths[ CH_C ], DF_SAD);
805
60.9k
    DistParam distParamSatdCr = m_pcRdCost->setDistParam( orgCr, predCr, cu.cs->sps->bitDepths[ CH_C ], DF_HAD);
806
807
60.9k
    cu.intraDir[1] = MDLM_L_IDX; // temporary assigned, just to indicate this is a MDLM mode. for luma down-sampling operation.
808
809
60.9k
    initIntraPatternChType(cu, cu.Cb());
810
60.9k
    initIntraPatternChType(cu, cu.Cr());
811
60.9k
    loadLMLumaRecPels(cu, cu.Cb());
812
813
548k
    for (int idx = uiMinMode; idx < uiMaxMode; idx++)
814
487k
    {
815
487k
      int mode = chromaCandModes[idx];
816
487k
      satdModeList[idx] = mode;
817
487k
      if (CU::isLMCMode(mode) && ( !CU::isLMCModeEnabled(cu, mode) || cu.slice->lmChromaCheckDisable ) )
818
52.6k
      {
819
52.6k
        continue;
820
52.6k
      }
821
434k
      if ((mode == LM_CHROMA_IDX) || (mode == PLANAR_IDX) || (mode == DM_CHROMA_IDX)) // only pre-check regular modes and MDLM modes, not including DM ,Planar, and LM
822
106k
      {
823
106k
        continue;
824
106k
      }
825
826
328k
      cu.intraDir[1]    = mode; // temporary assigned, for SATD checking.
827
828
328k
      const bool isLMCMode = CU::isLMCMode(mode);
829
328k
      if( isLMCMode )
830
86.7k
      {
831
86.7k
        predIntraChromaLM(COMP_Cb, predCb, cu, areaCb, mode);
832
86.7k
      }
833
241k
      else
834
241k
      {
835
241k
        initPredIntraParams(cu, cu.Cb(), *cs.sps);
836
241k
        predIntraAng(COMP_Cb, predCb, cu);
837
241k
      }
838
328k
      int64_t sadCb = distParamSadCb.distFunc(distParamSadCb) * 2;
839
328k
      int64_t satdCb = distParamSatdCb.distFunc(distParamSatdCb);
840
328k
      int64_t sad = std::min(sadCb, satdCb);
841
842
328k
      if( isLMCMode )
843
86.7k
      {
844
86.7k
        predIntraChromaLM(COMP_Cr, predCr, cu, areaCr, mode);
845
86.7k
      }
846
241k
      else
847
241k
      {
848
241k
        initPredIntraParams(cu, cu.Cr(), *cs.sps);
849
241k
        predIntraAng(COMP_Cr, predCr, cu);
850
241k
      }
851
328k
      int64_t sadCr = distParamSadCr.distFunc(distParamSadCr) * 2;
852
328k
      int64_t satdCr = distParamSatdCr.distFunc(distParamSatdCr);
853
328k
      sad += std::min(sadCr, satdCr);
854
328k
      satdSortedCost[idx] = sad;
855
328k
    }
856
857
    // sort the mode based on the cost from small to large.
858
548k
    for (int i = uiMinMode; i <= uiMaxMode - 1; i++)
859
487k
    {
860
2.19M
      for (int j = i + 1; j <= uiMaxMode - 1; j++)
861
1.70M
      {
862
1.70M
        if (satdSortedCost[j] < satdSortedCost[i])
863
104k
        {
864
104k
          std::swap( satdModeList[i],   satdModeList[j]);
865
104k
          std::swap( satdSortedCost[i], satdSortedCost[j]);
866
104k
        }
867
1.70M
      }
868
487k
    }
869
870
304k
    for (int i = 0; i < reducedModeNumber; i++)
871
243k
    {
872
243k
      modeDisable[satdModeList[uiMaxMode - 1 - i]] = true; // disable the last reducedModeNumber modes
873
243k
    }
874
875
60.9k
    int bestLfnstIdx = 0;
876
    // save the dist
877
60.9k
    Distortion baseDist = cs.dist;
878
60.9k
    int32_t bestbdpcmMode = 0;
879
60.9k
    uint32_t numbdpcmModes = ( useBDPCM && CU::bdpcmAllowed(cu, COMP_Cb)
880
40.3k
        && ((partitioner.chType == CH_C) || (cu.ispMode == 0 && cu.lfnstIdx == 0 && cu.firstTU->mtsIdx[COMP_Y] == MTS_SKIP))) ? 2 : 0;
881
629k
    for (int mode_cur = uiMinMode; mode_cur < (int)(uiMaxMode + numbdpcmModes); mode_cur++)
882
568k
    {
883
568k
      int mode = mode_cur;
884
568k
      if (mode_cur >= uiMaxMode)
885
80.7k
      {
886
80.7k
        mode = mode_cur > uiMaxMode ? -1 : -2; //set bdpcm mode
887
80.7k
        if ((mode == -1) && (saveCS.tus[0]->mtsIdx[COMP_Cb] != MTS_SKIP) && (saveCS.tus[0]->mtsIdx[COMP_Cr] != MTS_SKIP))
888
40.3k
        {
889
40.3k
          continue;
890
40.3k
        }
891
80.7k
      }
892
527k
      int chromaIntraMode;
893
527k
      if (mode < 0)
894
40.3k
      {
895
40.3k
        cu.bdpcmM[CH_C] = -mode;
896
40.3k
        chromaIntraMode = cu.bdpcmM[CH_C] == 2 ? chromaCandModes[1] : chromaCandModes[2];
897
40.3k
      }
898
487k
      else
899
487k
      {
900
487k
        cu.bdpcmM[CH_C] = 0;
901
487k
        chromaIntraMode = chromaCandModes[mode];
902
487k
        if (CU::isLMCMode(chromaIntraMode) && ( !CU::isLMCModeEnabled(cu, chromaIntraMode) || cu.slice->lmChromaCheckDisable ) )
903
52.6k
        {
904
52.6k
          continue;
905
52.6k
        }
906
434k
        if (modeDisable[chromaIntraMode] && CU::isLMCModeEnabled(cu, chromaIntraMode)) // when CCLM is disable, then MDLM is disable. not use satd checking
907
173k
        {
908
173k
          continue;
909
173k
        }
910
434k
      }
911
301k
      cs.dist = baseDist;
912
      //----- restore context models -----
913
301k
      m_CABACEstimator->getCtx() = ctxStart;
914
915
      //----- chroma coding -----
916
301k
      cu.intraDir[1] = chromaIntraMode;
917
301k
      m_ispTestedModes[0].IspType = ispType;
918
301k
      m_ispTestedModes[0].subTuCounter = -1;
919
301k
      xIntraChromaCodingQT( cs, partitioner );
920
301k
      if (lumaUsesISP && cs.dist == MAX_UINT)
921
0
      {
922
0
        continue;
923
0
      }
924
925
301k
      if (cs.sps->transformSkip)
926
301k
      {
927
301k
        m_CABACEstimator->getCtx() = ctxStart;
928
301k
      }
929
301k
      m_ispTestedModes[0].IspType = ispType;
930
301k
      m_ispTestedModes[0].subTuCounter = -1;
931
301k
      uint64_t fracBits   = xGetIntraFracBitsQT( cs, partitioner, false );
932
301k
      Distortion uiDist = cs.dist;
933
301k
      double    dCost   = m_pcRdCost->calcRdCost( fracBits, uiDist - baseDist );
934
935
      //----- compare -----
936
301k
      if( dCost < dBestCost )
937
109k
      {
938
109k
        if (lumaUsesISP && (dCost < bestCostSoFar))
939
0
        {
940
0
          bestCostSoFar = dCost;
941
0
        }
942
327k
        for( uint32_t i = getFirstComponentOfChannel( CH_C ); i < numberValidComponents; i++ )
943
218k
        {
944
218k
          const CompArea& area = cu.blocks[i];
945
218k
          saveCS.getRecoBuf     ( area ).copyFrom( cs.getRecoBuf   ( area ) );
946
218k
          cs.picture->getRecoBuf( area ).copyFrom( cs.getRecoBuf   ( area ) );
947
437k
          for( uint32_t j = 0; j < saveCS.tus.size(); j++ )
948
218k
          {
949
218k
            saveCS.tus[j]->copyComponentFrom( *orgTUs[j], area.compID );
950
218k
          }
951
218k
        }
952
109k
        dBestCost    = dCost;
953
109k
        uiBestDist   = uiDist;
954
109k
        uiBestMode   = chromaIntraMode;
955
109k
        bestLfnstIdx = cu.lfnstIdx;
956
109k
        bestbdpcmMode = cu.bdpcmM[CH_C];
957
958
109k
      }
959
301k
    }
960
60.9k
    cu.lfnstIdx = bestLfnstIdx;
961
60.9k
    cu.bdpcmM[CH_C]= bestbdpcmMode;
962
963
182k
    for( uint32_t i = getFirstComponentOfChannel( CH_C ); i < numberValidComponents; i++ )
964
121k
    {
965
121k
      const CompArea& area = cu.blocks[i];
966
967
121k
      cs.getRecoBuf         ( area ).copyFrom( saveCS.getRecoBuf( area ) );
968
121k
      cs.picture->getRecoBuf( area ).copyFrom( cs.getRecoBuf    ( area ) );
969
970
243k
      for( uint32_t j = 0; j < saveCS.tus.size(); j++ )
971
121k
      {
972
121k
        orgTUs[ j ]->copyComponentFrom( *saveCS.tus[ j ], area.compID );
973
121k
      }
974
121k
    }
975
60.9k
  }
976
60.9k
  cu.intraDir[1] = uiBestMode;
977
60.9k
  cs.dist        = uiBestDist;
978
979
  //----- restore context models -----
980
60.9k
  m_CABACEstimator->getCtx() = ctxStart;
981
60.9k
  if (lumaUsesISP && bestCostSoFar >= maxCostAllowed)
982
0
  {
983
0
    cu.ispMode = 0;
984
0
  }
985
60.9k
}
986
987
void IntraSearch::saveCuAreaCostInSCIPU( Area area, double cost )
988
0
{
989
0
  if( m_numCuInSCIPU < NUM_INTER_CU_INFO_SAVE )
990
0
  {
991
0
    m_cuAreaInSCIPU[m_numCuInSCIPU] = area;
992
0
    m_cuCostInSCIPU[m_numCuInSCIPU] = cost;
993
0
    m_numCuInSCIPU++;
994
0
  }
995
0
}
996
997
void IntraSearch::initCuAreaCostInSCIPU()
998
0
{
999
0
  for( int i = 0; i < NUM_INTER_CU_INFO_SAVE; i++ )
1000
0
  {
1001
0
    m_cuAreaInSCIPU[i] = Area();
1002
0
    m_cuCostInSCIPU[i] = 0;
1003
0
  }
1004
0
  m_numCuInSCIPU = 0;
1005
0
}
1006
// -------------------------------------------------------------------------------------------------------------------
1007
// Intra search
1008
// -------------------------------------------------------------------------------------------------------------------
1009
1010
void IntraSearch::xEncIntraHeader( CodingStructure &cs, Partitioner &partitioner, const bool luma )
1011
502k
{
1012
502k
  CodingUnit &cu = *cs.getCU( partitioner.chType, partitioner.treeType );
1013
1014
502k
  if (luma)
1015
200k
  {
1016
200k
    bool isFirst = cu.ispMode ? m_ispTestedModes[0].subTuCounter == 0 : partitioner.currArea().lumaPos() == cs.area.lumaPos();
1017
1018
    // CU header
1019
200k
    if( isFirst )
1020
196k
    {
1021
196k
      if ((!cs.slice->isIntra() || cs.slice->sps->IBC || cs.slice->sps->PLT) && cu.Y().valid())
1022
196k
      {
1023
196k
        m_CABACEstimator->pred_mode   ( cu );
1024
196k
      }
1025
196k
      m_CABACEstimator->bdpcm_mode  ( cu, ComponentID(partitioner.chType) );
1026
196k
    }
1027
1028
    // luma prediction mode
1029
200k
    if (isFirst)
1030
196k
    {
1031
196k
      if ( !cu.Y().valid())
1032
0
      {
1033
0
        m_CABACEstimator->pred_mode( cu );
1034
0
      }
1035
196k
      m_CABACEstimator->intra_luma_pred_mode( cu );
1036
196k
    }
1037
200k
  }
1038
301k
  else //  if (chroma)
1039
301k
  {
1040
301k
    bool isFirst = partitioner.currArea().Cb().valid() && partitioner.currArea().chromaPos() == cs.area.chromaPos();
1041
1042
301k
    if( isFirst )
1043
301k
    {
1044
301k
      m_CABACEstimator->bdpcm_mode(cu, ComponentID(CH_C));
1045
301k
      m_CABACEstimator->intra_chroma_pred_mode(  cu );
1046
301k
    }
1047
301k
  }
1048
502k
}
1049
1050
void IntraSearch::xEncSubdivCbfQT( CodingStructure &cs, Partitioner &partitioner, const bool luma )
1051
502k
{
1052
502k
  const UnitArea& currArea = partitioner.currArea();
1053
502k
  int subTuCounter = m_ispTestedModes[0].subTuCounter;
1054
502k
  TransformUnit  &currTU   = *cs.getTU(currArea.blocks[partitioner.chType], partitioner.chType, subTuCounter);
1055
502k
  CodingUnit     &currCU   = *currTU.cu;
1056
502k
  const uint32_t currDepth = partitioner.currTrDepth;
1057
502k
  const bool  subdiv = currTU.depth > currDepth;
1058
502k
  ComponentID compID = partitioner.chType == CH_L ? COMP_Y : COMP_Cb;
1059
1060
502k
  if (!luma)
1061
301k
  {
1062
301k
    const bool chromaCbfISP = currArea.blocks[COMP_Cb].valid() && currCU.ispMode && !subdiv;
1063
301k
    if (!currCU.ispMode || chromaCbfISP)
1064
301k
    {
1065
301k
      const uint32_t numberValidComponents = getNumberValidComponents(currArea.chromaFormat);
1066
301k
      const uint32_t cbfDepth = (chromaCbfISP ? currDepth - 1 : currDepth);
1067
1068
905k
      for (uint32_t ch = COMP_Cb; ch < numberValidComponents; ch++)
1069
603k
      {
1070
603k
        const ComponentID compID = ComponentID(ch);
1071
603k
        if (currDepth == 0 || TU::getCbfAtDepth(currTU, compID, currDepth - 1) || chromaCbfISP)
1072
603k
        {
1073
603k
          const bool prevCbf = (compID == COMP_Cr ? TU::getCbfAtDepth(currTU, COMP_Cb, currDepth) : false);
1074
603k
          m_CABACEstimator->cbf_comp(currCU, TU::getCbfAtDepth(currTU, compID, currDepth), currArea.blocks[compID], cbfDepth, prevCbf);
1075
603k
        }
1076
603k
      }
1077
301k
    }
1078
301k
  }
1079
1080
502k
  if (subdiv)
1081
0
  {
1082
0
    if (partitioner.canSplit(TU_MAX_TR_SPLIT, cs))
1083
0
    {
1084
0
      partitioner.splitCurrArea(TU_MAX_TR_SPLIT, cs);
1085
0
    }
1086
0
    else if (currCU.ispMode && isLuma(compID))
1087
0
    {
1088
0
      partitioner.splitCurrArea(m_ispTestedModes[0].IspType, cs);
1089
0
    }
1090
0
    else
1091
0
      THROW("Cannot perform an implicit split!");
1092
1093
0
    do
1094
0
    {
1095
0
      xEncSubdivCbfQT(cs, partitioner, luma);   //?
1096
0
      subTuCounter += subTuCounter != -1 ? 1 : 0;
1097
0
    } while (partitioner.nextPart(cs));
1098
1099
0
    partitioner.exitCurrSplit();
1100
0
  }
1101
502k
  else
1102
502k
  {
1103
    //===== Cbfs =====
1104
502k
    if (luma)
1105
200k
    {
1106
200k
      bool previousCbf = false;
1107
200k
      bool lastCbfIsInferred = false;
1108
200k
      if (m_ispTestedModes[0].IspType != TU_NO_ISP)
1109
15.1k
      {
1110
15.1k
        bool     rootCbfSoFar = false;
1111
15.1k
        uint32_t nTus = currCU.ispMode == HOR_INTRA_SUBPARTITIONS ? currCU.lheight() >> floorLog2(currTU.lheight())
1112
15.1k
          : currCU.lwidth() >> floorLog2(currTU.lwidth());
1113
15.1k
        if (subTuCounter == nTus - 1)
1114
1.41k
        {
1115
1.41k
          TransformUnit* tuPointer = currCU.firstTU;
1116
5.66k
          for (int tuIdx = 0; tuIdx < nTus - 1; tuIdx++)
1117
4.24k
          {
1118
4.24k
            rootCbfSoFar |= TU::getCbfAtDepth(*tuPointer, COMP_Y, currDepth);
1119
4.24k
            tuPointer = tuPointer->next;
1120
4.24k
          }
1121
1.41k
          if (!rootCbfSoFar)
1122
0
          {
1123
0
            lastCbfIsInferred = true;
1124
0
          }
1125
1.41k
        }
1126
15.1k
        if (!lastCbfIsInferred)
1127
15.1k
        {
1128
15.1k
          previousCbf = TU::getPrevTuCbfAtDepth(currTU, COMP_Y, partitioner.currTrDepth);
1129
15.1k
        }
1130
15.1k
      }
1131
200k
      if (!lastCbfIsInferred)
1132
200k
      {
1133
200k
        m_CABACEstimator->cbf_comp(currCU, TU::getCbfAtDepth(currTU, COMP_Y, currDepth), currTU.Y(), currTU.depth, previousCbf, currCU.ispMode);
1134
200k
      }
1135
200k
    }
1136
502k
  }
1137
502k
}
1138
void IntraSearch::xEncCoeffQT(CodingStructure& cs, Partitioner& partitioner, const ComponentID compID, CUCtx* cuCtx, const int subTuIdx, const PartSplit ispType)
1139
804k
{
1140
804k
  const UnitArea& currArea  = partitioner.currArea();
1141
1142
804k
  int subTuCounter          = m_ispTestedModes[0].subTuCounter;
1143
804k
  TransformUnit& currTU     = *cs.getTU(currArea.blocks[partitioner.chType], partitioner.chType, subTuCounter);
1144
804k
  uint32_t   currDepth      = partitioner.currTrDepth;
1145
804k
  const bool subdiv         = currTU.depth > currDepth;
1146
1147
804k
  if (subdiv)
1148
0
  {
1149
0
    if (partitioner.canSplit(TU_MAX_TR_SPLIT, cs))
1150
0
    {
1151
0
      partitioner.splitCurrArea(TU_MAX_TR_SPLIT, cs);
1152
0
    }
1153
0
    else if (currTU.cu->ispMode)
1154
0
    {
1155
0
      partitioner.splitCurrArea(m_ispTestedModes[0].IspType, cs);
1156
0
    }
1157
0
    else
1158
0
      THROW("Implicit TU split not available!");
1159
1160
0
    do
1161
0
    {
1162
0
      xEncCoeffQT(cs, partitioner, compID, cuCtx, subTuCounter, m_ispTestedModes[0].IspType);
1163
0
      subTuCounter += subTuCounter != -1 ? 1 : 0;
1164
0
    } while( partitioner.nextPart( cs ) );
1165
1166
0
    partitioner.exitCurrSplit();
1167
0
  }
1168
804k
  else
1169
1170
804k
  if( currArea.blocks[compID].valid() )
1171
804k
  {
1172
804k
    if( compID == COMP_Cr )
1173
301k
    {
1174
301k
      const int cbfMask = ( TU::getCbf( currTU, COMP_Cb ) ? 2 : 0 ) + ( TU::getCbf( currTU, COMP_Cr ) ? 1 : 0 );
1175
301k
      m_CABACEstimator->joint_cb_cr( currTU, cbfMask );
1176
301k
    }
1177
804k
    if( TU::getCbf( currTU, compID ) )
1178
243k
    {
1179
243k
      if( isLuma(compID) )
1180
26.8k
      {
1181
26.8k
        m_CABACEstimator->residual_coding( currTU, compID, cuCtx );
1182
26.8k
        m_CABACEstimator->mts_idx( *currTU.cu, cuCtx );
1183
26.8k
      }
1184
216k
      else
1185
216k
        m_CABACEstimator->residual_coding( currTU, compID );
1186
243k
    }
1187
804k
  }
1188
804k
}
1189
1190
uint64_t IntraSearch::xGetIntraFracBitsQT( CodingStructure &cs, Partitioner &partitioner, const bool luma, CUCtx *cuCtx )
1191
502k
{
1192
502k
  m_CABACEstimator->resetBits();
1193
1194
502k
  xEncIntraHeader( cs, partitioner, luma );
1195
502k
  xEncSubdivCbfQT( cs, partitioner, luma );
1196
1197
502k
  if( luma )
1198
200k
  {
1199
200k
    xEncCoeffQT( cs, partitioner, COMP_Y, cuCtx );
1200
1201
200k
    CodingUnit &cu = *cs.cus[0];
1202
200k
    if (cuCtx /*&& CU::isSepTree(cu)*/
1203
126k
      && (!cu.ispMode || (cu.lfnstIdx && m_ispTestedModes[0].subTuCounter == 0)
1204
9.57k
        || (!cu.lfnstIdx
1205
8.16k
          && m_ispTestedModes[0].subTuCounter == m_ispTestedModes[cu.lfnstIdx].numTotalParts[cu.ispMode - 1] - 1)))
1206
117k
    {
1207
117k
      m_CABACEstimator->residual_lfnst_mode( cu, *cuCtx );
1208
117k
    }
1209
200k
  }
1210
301k
  else
1211
301k
  {
1212
301k
    xEncCoeffQT( cs, partitioner, COMP_Cb );
1213
301k
    xEncCoeffQT( cs, partitioner, COMP_Cr );
1214
301k
  }
1215
1216
502k
  uint64_t fracBits = m_CABACEstimator->getEstFracBits();
1217
502k
  return fracBits;
1218
502k
}
1219
1220
uint64_t IntraSearch::xGetIntraFracBitsQTChroma(const TransformUnit& currTU, const ComponentID compID, CUCtx *cuCtx)
1221
1.88M
{
1222
1.88M
  m_CABACEstimator->resetBits();
1223
1224
1.88M
  if ( currTU.jointCbCr )
1225
279k
  {
1226
279k
    const int cbfMask = ( TU::getCbf( currTU, COMP_Cb ) ? 2 : 0 ) + ( TU::getCbf( currTU, COMP_Cr ) ? 1 : 0 );
1227
279k
    m_CABACEstimator->cbf_comp( *currTU.cu, cbfMask>>1, currTU.blocks[ COMP_Cb ], currTU.depth, false );
1228
279k
    m_CABACEstimator->cbf_comp( *currTU.cu, cbfMask &1, currTU.blocks[ COMP_Cr ], currTU.depth, cbfMask>>1 );
1229
279k
    if( cbfMask )
1230
279k
      m_CABACEstimator->joint_cb_cr( currTU, cbfMask );
1231
279k
    if (cbfMask >> 1)
1232
278k
      m_CABACEstimator->residual_coding( currTU, COMP_Cb, cuCtx );
1233
279k
    if (cbfMask & 1)
1234
279k
      m_CABACEstimator->residual_coding( currTU, COMP_Cr, cuCtx );
1235
279k
  }
1236
1.60M
  else
1237
1.60M
  {
1238
1.60M
    if ( compID == COMP_Cb )
1239
802k
      m_CABACEstimator->cbf_comp( *currTU.cu, TU::getCbf( currTU, compID ), currTU.blocks[ compID ], currTU.depth, false );
1240
802k
    else
1241
802k
    {
1242
802k
      const bool cbCbf    = TU::getCbf( currTU, COMP_Cb );
1243
802k
      const bool crCbf    = TU::getCbf( currTU, compID );
1244
802k
      const int  cbfMask  = ( cbCbf ? 2 : 0 ) + ( crCbf ? 1 : 0 );
1245
802k
      m_CABACEstimator->cbf_comp( *currTU.cu, crCbf, currTU.blocks[ compID ], currTU.depth, cbCbf );
1246
802k
      m_CABACEstimator->joint_cb_cr( currTU, cbfMask );
1247
802k
    }
1248
1.60M
  }
1249
1250
1.88M
  if( !currTU.jointCbCr && TU::getCbf( currTU, compID ) )
1251
563k
  {
1252
563k
    m_CABACEstimator->residual_coding( currTU, compID, cuCtx );
1253
563k
  }
1254
1255
1.88M
  uint64_t fracBits = m_CABACEstimator->getEstFracBits();
1256
1.88M
  return fracBits;
1257
1.88M
}
1258
1259
void IntraSearch::xIntraCodingTUBlock(TransformUnit &tu, const ComponentID compID, const bool checkCrossCPrediction, Distortion &ruiDist, uint32_t *numSig, PelUnitBuf *predBuf, const bool loadTr)
1260
2.09M
{
1261
2.09M
  if (!tu.blocks[compID].valid())
1262
0
  {
1263
0
    return;
1264
0
  }
1265
1266
2.09M
  CodingStructure &cs             = *tu.cs;
1267
2.09M
  const CompArea      &area       = tu.blocks[compID];
1268
2.09M
  const SPS           &sps        = *cs.sps;
1269
2.09M
  const ReshapeData&  reshapeData = cs.picture->reshapeData;
1270
1271
2.09M
  const ChannelType    chType     = toChannelType(compID);
1272
2.09M
  const int            bitDepth   = sps.bitDepths[chType];
1273
1274
2.09M
  CPelBuf        piOrg            = cs.getOrgBuf    (area);
1275
2.09M
  PelBuf         piPred           = cs.getPredBuf   (area);
1276
2.09M
  PelBuf         piResi           = cs.getResiBuf   (area);
1277
2.09M
  PelBuf         piReco           = cs.getRecoBuf   (area);
1278
1279
2.09M
  const CodingUnit& cu            = *tu.cu;
1280
1281
  //===== init availability pattern =====
1282
2.09M
  CHECK( tu.jointCbCr && compID == COMP_Cr, "wrong combination of compID and jointCbCr" );
1283
2.09M
  bool jointCbCr = tu.jointCbCr && compID == COMP_Cb;
1284
1285
2.09M
  if ( isLuma(compID) )
1286
206k
  {
1287
206k
    bool predRegDiffFromTB = CU::isPredRegDiffFromTB(*tu.cu );
1288
206k
    bool firstTBInPredReg  = false;
1289
206k
    CompArea areaPredReg(COMP_Y, tu.chromaFormat, area);
1290
206k
    if (tu.cu->ispMode )
1291
20.7k
    {
1292
20.7k
      firstTBInPredReg = CU::isFirstTBInPredReg(*tu.cu, area);
1293
20.7k
      if (predRegDiffFromTB)
1294
0
      {
1295
0
        if (firstTBInPredReg)
1296
0
        {
1297
0
          CU::adjustPredArea(areaPredReg);
1298
0
          initIntraPatternChTypeISP(*tu.cu, areaPredReg, piReco);
1299
0
        }
1300
0
      }
1301
20.7k
      else
1302
20.7k
        initIntraPatternChTypeISP(*tu.cu, area, piReco);
1303
20.7k
    }
1304
185k
    else if( !predBuf )
1305
31.6k
    {
1306
31.6k
      initIntraPatternChType(*tu.cu, area);
1307
31.6k
    }
1308
1309
    //===== get prediction signal =====
1310
206k
    if (predRegDiffFromTB)
1311
0
    {
1312
0
      if (firstTBInPredReg)
1313
0
      {
1314
0
        PelBuf piPredReg = cs.getPredBuf(areaPredReg);
1315
0
        predIntraAng(compID, piPredReg, cu);
1316
0
      }
1317
0
    }
1318
206k
    else
1319
206k
    {
1320
206k
      if( predBuf )
1321
154k
      {
1322
154k
        piPred.copyFrom( predBuf->Y() );
1323
154k
      }
1324
52.3k
      else if( CU::isMIP( cu, CH_L ) )
1325
23.7k
      {
1326
23.7k
        initIntraMip( cu );
1327
23.7k
        predIntraMip( piPred, cu );
1328
23.7k
      }
1329
28.6k
      else
1330
28.6k
      {
1331
28.6k
        predIntraAng(compID, piPred, cu);
1332
28.6k
      }
1333
206k
    }
1334
206k
  }
1335
2.09M
  DTRACE( g_trace_ctx, D_PRED, "@(%4d,%4d) [%2dx%2d] IMode=%d\n", tu.lx(), tu.ly(), tu.lwidth(), tu.lheight(), CU::getFinalIntraMode(cu, chType) );
1336
2.09M
  const Slice &slice = *cs.slice;
1337
2.09M
  bool flag = cs.picHeader->lmcsEnabled && (slice.isIntra() || (!slice.isIntra() && reshapeData.getCTUFlag()));
1338
1339
2.09M
  if (isLuma(compID))
1340
206k
  {
1341
    //===== get residual signal =====
1342
206k
    if (cs.picHeader->lmcsEnabled && reshapeData.getCTUFlag() )
1343
0
    {
1344
0
      piResi.subtract(cs.getRspOrgBuf(area), piPred);
1345
0
    }
1346
206k
    else
1347
206k
    {
1348
206k
      piResi.subtract( piOrg, piPred );
1349
206k
    }
1350
206k
  }
1351
1352
  //===== transform and quantization =====
1353
  //--- init rate estimation arrays for RDOQ ---
1354
  //--- transform and quantization           ---
1355
2.09M
  TCoeff uiAbsSum = 0;
1356
2.09M
  const QpParam cQP(tu, compID);
1357
1358
2.09M
  m_pcTrQuant->selectLambda(compID);
1359
1360
2.09M
  flag =flag && (tu.blocks[compID].width*tu.blocks[compID].height > 4);
1361
2.09M
  if (flag && isChroma(compID) && cs.picHeader->lmcsChromaResidualScale )
1362
0
  {
1363
0
    int cResScaleInv = tu.chromaAdj;
1364
0
    double cRescale = (double)(1 << CSCALE_FP_PREC) / (double)cResScaleInv;
1365
0
    m_pcTrQuant->scaleLambda( 1.0/(cRescale*cRescale) );
1366
0
  }
1367
1368
2.09M
  if ( jointCbCr )
1369
283k
  {
1370
    // Lambda is loosened for the joint mode with respect to single modes as the same residual is used for both chroma blocks
1371
283k
    const int    absIct = abs( TU::getICTMode(tu) );
1372
283k
    const double lfact  = ( absIct == 1 || absIct == 3 ? 0.8 : 0.5 );
1373
283k
    m_pcTrQuant->scaleLambda( lfact );
1374
283k
  }
1375
2.09M
  if ( sps.jointCbCr && isChroma(compID) && (tu.cu->cs->slice->sliceQp > 18) )
1376
1.26M
  {
1377
1.26M
    m_pcTrQuant->scaleLambda( 1.3 );
1378
1.26M
  }
1379
1380
2.09M
  if( isLuma(compID) )
1381
206k
  {
1382
206k
    m_pcTrQuant->transformNxN(tu, compID, cQP, uiAbsSum, m_CABACEstimator->getCtx(), loadTr);
1383
1384
206k
    DTRACE( g_trace_ctx, D_TU_ABS_SUM, "%d: comp=%d, abssum=%d\n", DTRACE_GET_COUNTER( g_trace_ctx, D_TU_ABS_SUM ), compID, uiAbsSum );
1385
206k
    if (tu.cu->ispMode && isLuma(compID) && CU::isISPLast(*tu.cu, area, area.compID) && CU::allLumaCBFsAreZero(*tu.cu))
1386
0
    {
1387
      // ISP has to have at least one non-zero CBF
1388
0
      ruiDist = MAX_INT;
1389
0
      return;
1390
0
    }
1391
    //--- inverse transform ---
1392
206k
    if (uiAbsSum > 0)
1393
32.4k
    {
1394
32.4k
      m_pcTrQuant->invTransformNxN(tu, compID, piResi, cQP);
1395
32.4k
    }
1396
174k
    else
1397
174k
    {
1398
174k
      piResi.fill(0);
1399
174k
    }
1400
206k
  }
1401
1.88M
  else // chroma
1402
1.88M
  {
1403
1.88M
    PelBuf          crPred = cs.getPredBuf ( COMP_Cr );
1404
1.88M
    PelBuf          crResi = cs.getResiBuf ( COMP_Cr );
1405
1.88M
    PelBuf          crReco = cs.getRecoBuf ( COMP_Cr );
1406
1407
1.88M
    int         codedCbfMask  = 0;
1408
1.88M
    ComponentID codeCompId    = (tu.jointCbCr ? (tu.jointCbCr >> 1 ? COMP_Cb : COMP_Cr) : compID);
1409
1.88M
    const QpParam qpCbCr(tu, codeCompId);
1410
1411
1.88M
    if( tu.jointCbCr )
1412
283k
    {
1413
283k
      ComponentID otherCompId = ( codeCompId==COMP_Cr ? COMP_Cb : COMP_Cr );
1414
283k
      tu.getCoeffs( otherCompId ).fill(0); // do we need that?
1415
283k
      TU::setCbfAtDepth (tu, otherCompId, tu.depth, false );
1416
283k
    }
1417
1.88M
    PelBuf& codeResi = ( codeCompId == COMP_Cr ? crResi : piResi );
1418
1.88M
    uiAbsSum = 0;
1419
1.88M
    m_pcTrQuant->transformNxN(tu, codeCompId, qpCbCr, uiAbsSum, m_CABACEstimator->getCtx(), loadTr);
1420
1.88M
    DTRACE( g_trace_ctx, D_TU_ABS_SUM, "%d: comp=%d, abssum=%d\n", DTRACE_GET_COUNTER( g_trace_ctx, D_TU_ABS_SUM ), codeCompId, uiAbsSum );
1421
1.88M
    if( uiAbsSum > 0 )
1422
842k
    {
1423
842k
      m_pcTrQuant->invTransformNxN(tu, codeCompId, codeResi, qpCbCr);
1424
842k
      codedCbfMask += ( codeCompId == COMP_Cb ? 2 : 1 );
1425
842k
    }
1426
1.04M
    else
1427
1.04M
    {
1428
1.04M
      codeResi.fill(0);
1429
1.04M
    }
1430
1431
1.88M
    if( tu.jointCbCr )
1432
283k
    {
1433
283k
      if( tu.jointCbCr == 3 && codedCbfMask == 2 )
1434
278k
      {
1435
278k
        codedCbfMask = 3;
1436
278k
        TU::setCbfAtDepth (tu, COMP_Cr, tu.depth, true );
1437
278k
      }
1438
283k
      if( tu.jointCbCr != codedCbfMask )
1439
3.55k
      {
1440
3.55k
        ruiDist = MAX_DISTORTION;
1441
3.55k
        return;
1442
3.55k
      }
1443
279k
      m_pcTrQuant->invTransformICT( tu, piResi, crResi );
1444
279k
      uiAbsSum = codedCbfMask;
1445
279k
    }
1446
1447
    //===== reconstruction =====
1448
1.88M
    if ( flag && uiAbsSum > 0 && cs.picHeader->lmcsChromaResidualScale )
1449
0
    {
1450
0
      piResi.scaleSignal(tu.chromaAdj, 0, slice.clpRngs[compID]);
1451
1452
0
      if( jointCbCr )
1453
0
      {
1454
0
        crResi.scaleSignal(tu.chromaAdj, 0, slice.clpRngs[COMP_Cr]);
1455
0
      }
1456
0
    }
1457
1458
1.88M
    if( jointCbCr )
1459
279k
    {
1460
279k
      crReco.reconstruct(crPred, crResi, cs.slice->clpRngs[ COMP_Cr ]);
1461
279k
    }
1462
1.88M
  }
1463
2.09M
  piReco.reconstruct(piPred, piResi, cs.slice->clpRngs[ compID ]);
1464
  
1465
1466
1467
  //===== update distortion =====
1468
2.09M
  const bool reshapeIntraCMD = m_pcEncCfg->m_reshapeSignalType == RESHAPE_SIGNAL_PQ;
1469
2.09M
  if(((cs.picHeader->lmcsEnabled && (reshapeData.getCTUFlag() || (isChroma(compID) && reshapeIntraCMD))) || m_pcEncCfg->m_lumaLevelToDeltaQPEnabled ) )
1470
0
  {
1471
0
    const CPelBuf orgLuma = cs.getOrgBuf( cs.area.blocks[COMP_Y] );
1472
0
    if( compID == COMP_Y && !m_pcEncCfg->m_lumaLevelToDeltaQPEnabled )
1473
0
    {
1474
0
      PelBuf tmpRecLuma = cs.getRspRecoBuf(area);
1475
0
      tmpRecLuma.rspSignal( piReco, reshapeData.getInvLUT());
1476
0
      ruiDist += m_pcRdCost->getDistPart(piOrg, tmpRecLuma, sps.bitDepths[toChannelType(compID)], compID, DF_SSE_WTD, &orgLuma);
1477
0
    }
1478
0
    else
1479
0
    {
1480
0
      ruiDist += m_pcRdCost->getDistPart( piOrg, piReco, bitDepth, compID, DF_SSE_WTD, &orgLuma );
1481
0
      if( jointCbCr )
1482
0
      {
1483
0
        CPelBuf         crOrg  = cs.getOrgBuf  ( COMP_Cr );
1484
0
        PelBuf          crReco = cs.getRecoBuf ( COMP_Cr );
1485
0
        ruiDist += m_pcRdCost->getDistPart( crOrg, crReco, bitDepth, COMP_Cr, DF_SSE_WTD, &orgLuma );
1486
0
      }
1487
0
    }
1488
0
  }
1489
2.09M
  else
1490
2.09M
  {
1491
2.09M
    ruiDist += m_pcRdCost->getDistPart( piOrg, piReco, bitDepth, compID, DF_SSE );
1492
2.09M
    if( jointCbCr )
1493
279k
    {
1494
279k
      CPelBuf         crOrg  = cs.getOrgBuf  ( COMP_Cr );
1495
279k
      PelBuf          crReco = cs.getRecoBuf ( COMP_Cr );
1496
279k
      ruiDist += m_pcRdCost->getDistPart( crOrg, crReco, bitDepth, COMP_Cr, DF_SSE );
1497
279k
    }
1498
2.09M
  }
1499
2.09M
}
1500
1501
void IntraSearch::xIntraCodingLumaQT(CodingStructure& cs, Partitioner& partitioner, PelUnitBuf* predBuf, const double bestCostSoFar, int numMode, bool disableMTS)
1502
124k
{
1503
124k
  PROFILER_SCOPE_AND_STAGE_EXT( 0, _TPROF, P_INTRA_RD_SEARCH_LUMA, &cs, partitioner.chType );
1504
124k
  const UnitArea& currArea  = partitioner.currArea();
1505
124k
  uint32_t        currDepth = partitioner.currTrDepth;
1506
124k
  Distortion singleDistLuma = 0;
1507
124k
  uint32_t   numSig         = 0;
1508
124k
  const SPS &sps            = *cs.sps;
1509
124k
  CodingUnit &cu            = *cs.cus[0];
1510
124k
  bool mtsAllowed = (numMode < 0) || disableMTS ? false : CU::isMTSAllowed(cu, COMP_Y);
1511
124k
  uint64_t singleFracBits   = 0;
1512
124k
  bool   splitCbfLumaSum    = false;
1513
124k
  double bestCostForISP     = bestCostSoFar;
1514
124k
  double dSingleCost        = MAX_DOUBLE;
1515
124k
  int endLfnstIdx           = (partitioner.isSepTree(cs) && partitioner.chType == CH_C && (currArea.lwidth() < 8 || currArea.lheight() < 8))
1516
124k
                           || (currArea.lwidth() > sps.getMaxTbSize() || currArea.lheight() > sps.getMaxTbSize()) || !sps.LFNST || (numMode < 0) ? 0 : 2;
1517
124k
  const bool useTS          = cs.picture->useTS;
1518
124k
  numMode                   = (numMode < 0) ? -numMode : numMode;
1519
1520
124k
  if (cu.mipFlag && !allowLfnstWithMip(cu.lumaSize()))
1521
2.08k
  {
1522
2.08k
    endLfnstIdx = 0;
1523
2.08k
  }
1524
124k
  int bestMTS = 0;
1525
124k
  int EndMTS  = mtsAllowed ? m_pcEncCfg->m_MTSIntraMaxCand : 0;
1526
124k
  if (cu.ispMode && (EndMTS || endLfnstIdx))
1527
5.63k
  {
1528
5.63k
    EndMTS = 0;
1529
5.63k
    if ((m_ispTestedModes[1].numTotalParts[cu.ispMode - 1] == 0)
1530
302
     && (m_ispTestedModes[2].numTotalParts[cu.ispMode - 1] == 0))
1531
302
    {
1532
302
      endLfnstIdx = 0;
1533
302
    }
1534
5.63k
  }
1535
124k
  if (cu.bdpcmM[CH_L])
1536
7.68k
  {
1537
7.68k
    endLfnstIdx = 0;
1538
7.68k
    EndMTS = 0;
1539
7.68k
  }
1540
124k
  bool checkTransformSkip = sps.transformSkip;
1541
1542
124k
  SizeType transformSkipMaxSize = 1 << sps.log2MaxTransformSkipBlockSize;
1543
124k
  bool tsAllowed = useTS  && cu.cs->sps->transformSkip && (!cu.ispMode) && (!cu.bdpcmM[CH_L]) && (!cu.sbtInfo);
1544
124k
  tsAllowed &= cu.blocks[COMP_Y].width <= transformSkipMaxSize && cu.blocks[COMP_Y].height <= transformSkipMaxSize;
1545
124k
  if (tsAllowed)
1546
15.4k
  {
1547
15.4k
    EndMTS += 1;
1548
15.4k
  }
1549
124k
  if (endLfnstIdx || EndMTS)
1550
49.8k
  {
1551
49.8k
    bool       splitCbfLuma  = false;
1552
49.8k
    const PartSplit ispType  = CU::getISPType(cu, COMP_Y);
1553
49.8k
    CUCtx cuCtx;
1554
49.8k
    cuCtx.isDQPCoded         = true;
1555
49.8k
    cuCtx.isChromaQpAdjCoded = true;
1556
49.8k
    cs.cost                  = 0.0;
1557
49.8k
    Distortion       singleDistTmpLuma = 0;
1558
49.8k
    uint64_t         singleTmpFracBits = 0;
1559
49.8k
    double           singleCostTmp     = 0;
1560
49.8k
    const TempCtx    ctxStart          (m_CtxCache, m_CABACEstimator->getCtx());
1561
49.8k
          TempCtx    ctxBest           (m_CtxCache);
1562
49.8k
    CodingStructure &saveCS            = *m_pSaveCS[cu.ispMode?0:1];
1563
49.8k
    TransformUnit *  tmpTU             = nullptr;
1564
49.8k
    int              bestLfnstIdx      = 0;
1565
49.8k
    int              startLfnstIdx     = 0;
1566
    // speedUps LFNST
1567
49.8k
    bool   rapidLFNST                  = false;
1568
49.8k
    bool   rapidDCT                    = false;
1569
49.8k
    double thresholdDCT                = 1;
1570
1571
49.8k
    if (m_pcEncCfg->m_MTS == 2)
1572
0
    {
1573
0
      thresholdDCT += 1.4 / sqrt(cu.lwidth() * cu.lheight());
1574
0
    }
1575
1576
49.8k
    if (m_pcEncCfg->m_LFNST > 1)
1577
0
    {
1578
0
      rapidLFNST = true;
1579
1580
0
      if (m_pcEncCfg->m_LFNST > 2)
1581
0
      {
1582
0
        rapidDCT    = true;
1583
0
        endLfnstIdx = endLfnstIdx ? 1 : 0;
1584
0
      }
1585
0
    }
1586
1587
49.8k
    saveCS.pcv              = cs.pcv;
1588
49.8k
    saveCS.picture          = cs.picture;
1589
49.8k
    saveCS.area.repositionTo( cs.area);
1590
1591
49.8k
    if (cu.ispMode)
1592
5.33k
    {
1593
5.33k
      partitioner.splitCurrArea(ispType, cs);
1594
5.33k
    }
1595
1596
49.8k
    TransformUnit& tu = cs.addTU(CS::getArea(cs, partitioner.currArea(), partitioner.chType, partitioner.treeType), partitioner.chType, cs.cus[0]);
1597
1598
49.8k
    if (cu.ispMode)
1599
5.33k
    {
1600
5.33k
      saveCS.clearTUs();
1601
5.33k
      do
1602
21.3k
      {
1603
21.3k
        saveCS.addTU(
1604
21.3k
          CS::getArea(cs, partitioner.currArea(), partitioner.chType, partitioner.treeType),
1605
21.3k
          partitioner.chType, cs.cus[0]);
1606
21.3k
      } while (partitioner.nextPart(cs));
1607
1608
5.33k
      partitioner.exitCurrSplit();
1609
5.33k
    }
1610
44.5k
    else
1611
44.5k
    {
1612
44.5k
      tmpTU = saveCS.tus.empty() ? &saveCS.addTU( currArea, partitioner.chType, nullptr ) : saveCS.tus.front();
1613
44.5k
      tmpTU->initData();
1614
44.5k
      tmpTU->UnitArea::operator=( currArea );
1615
44.5k
    }
1616
1617
1618
49.8k
    std::vector<TrMode> trModes{ TrMode(0, true) };
1619
49.8k
    if (tsAllowed)
1620
15.4k
    {
1621
15.4k
      trModes.push_back(TrMode(1, true));
1622
15.4k
    }
1623
49.8k
    double dct2Cost           = MAX_DOUBLE;
1624
49.8k
    double trGrpStopThreshold = 1.001;
1625
49.8k
    double trGrpBestCost      = MAX_DOUBLE;
1626
1627
49.8k
    if (mtsAllowed)
1628
0
    {
1629
0
      if (m_pcEncCfg->m_LFNST)
1630
0
      {
1631
0
        uint32_t uiIntraMode = cs.cus[0]->intraDir[partitioner.chType];
1632
0
        int MTScur           = (uiIntraMode < 34) ? MTS_DST7_DCT8 : MTS_DCT8_DST7;
1633
1634
0
        trModes.push_back(TrMode(     2, true));
1635
0
        trModes.push_back(TrMode(MTScur, true));
1636
1637
0
        MTScur = (uiIntraMode < 34) ? MTS_DCT8_DST7 : MTS_DST7_DCT8;
1638
1639
0
        trModes.push_back(TrMode(MTScur,            true));
1640
0
        trModes.push_back(TrMode(MTS_DST7_DST7 + 3, true));
1641
0
      }
1642
0
      else
1643
0
      {
1644
0
        for (int i = 2; i < 6; i++)
1645
0
        {
1646
0
          trModes.push_back(TrMode(i, true));
1647
0
        }
1648
0
      }
1649
0
    }
1650
1651
49.8k
    if ((EndMTS && !m_pcEncCfg->m_LFNST) || (tsAllowed && !mtsAllowed))
1652
15.4k
    {
1653
15.4k
      xPreCheckMTS(tu, &trModes, m_pcEncCfg->m_MTSIntraMaxCand, predBuf);
1654
15.4k
      if (!mtsAllowed && !trModes[1].second)
1655
2.89k
      {
1656
2.89k
        EndMTS = 0;
1657
2.89k
      }
1658
15.4k
    }
1659
1660
49.8k
    bool NStopMTS = true;
1661
1662
99.7k
    for (int modeId = 0; modeId <= EndMTS && NStopMTS; modeId++)
1663
49.8k
    {
1664
49.8k
      if (modeId > 1)
1665
0
      {
1666
0
        trGrpBestCost = MAX_DOUBLE;
1667
0
      }
1668
177k
      for (int lfnstIdx = startLfnstIdx; lfnstIdx <= endLfnstIdx; lfnstIdx++)
1669
127k
      {
1670
127k
        if (lfnstIdx && modeId)
1671
0
        {
1672
0
          continue;
1673
0
        }
1674
127k
        if (mtsAllowed || tsAllowed)
1675
24.2k
        {
1676
24.2k
          if (m_pcEncCfg->m_TS && bestMTS == MTS_SKIP)
1677
0
          {
1678
0
            break;
1679
0
          }
1680
24.2k
          if (!m_pcEncCfg->m_LFNST && !trModes[modeId].second && mtsAllowed)
1681
0
          {
1682
0
            continue;
1683
0
          }
1684
1685
24.2k
          tu.mtsIdx[COMP_Y] = trModes[modeId].first;
1686
24.2k
        }
1687
1688
127k
        if (cu.ispMode && lfnstIdx)
1689
10.6k
        {
1690
10.6k
          if (m_ispTestedModes[lfnstIdx].numTotalParts[cu.ispMode - 1] == 0)
1691
0
          {
1692
0
            if (lfnstIdx == 2)
1693
0
            {
1694
0
              endLfnstIdx = 1;
1695
0
            }
1696
0
            continue;
1697
0
          }
1698
10.6k
        }
1699
1700
127k
        cu.lfnstIdx                          = lfnstIdx;
1701
127k
        cuCtx.lfnstLastScanPos               = false;
1702
127k
        cuCtx.violatesLfnstConstrained[CH_L] = false;
1703
127k
        cuCtx.violatesLfnstConstrained[CH_C] = false;
1704
1705
127k
        if ((lfnstIdx != startLfnstIdx) || (modeId))
1706
77.5k
        {
1707
77.5k
          m_CABACEstimator->getCtx() = ctxStart;
1708
77.5k
        }
1709
1710
127k
        singleDistTmpLuma = 0;
1711
1712
127k
        if (cu.ispMode)
1713
15.9k
        {
1714
15.9k
          splitCbfLuma = false;
1715
1716
15.9k
          partitioner.splitCurrArea(ispType, cs);
1717
1718
15.9k
          singleCostTmp = xTestISP(cs, partitioner, bestCostForISP, ispType, splitCbfLuma, singleTmpFracBits, singleDistTmpLuma, cuCtx);
1719
1720
15.9k
          partitioner.exitCurrSplit();
1721
1722
15.9k
          if (modeId && (singleCostTmp == MAX_DOUBLE))
1723
0
          {
1724
0
            m_ispTestedModes[lfnstIdx].numTotalParts[cu.ispMode - 1] = 0;
1725
0
          }
1726
1727
15.9k
          bool storeCost = (numMode == 1) ? true : false;
1728
1729
15.9k
          if ((m_pcEncCfg->m_ISP >= 2) && (numMode <= 1))
1730
15.9k
          {
1731
15.9k
            storeCost = true;
1732
15.9k
          }
1733
1734
15.9k
          if (storeCost)
1735
15.9k
          {
1736
15.9k
            m_ispTestedModes[0].bestCost[cu.ispMode - 1] = singleCostTmp;
1737
15.9k
          }
1738
15.9k
        }
1739
111k
        else
1740
111k
        {
1741
111k
          bool TrLoad = (EndMTS && !m_pcEncCfg->m_LFNST) || (tsAllowed && !mtsAllowed && (lfnstIdx == 0)) ? true : false;
1742
1743
111k
          xIntraCodingTUBlock(tu, COMP_Y, false, singleDistTmpLuma, &numSig, predBuf, TrLoad);
1744
1745
111k
          cuCtx.mtsLastScanPos = false;
1746
          //----- determine rate and r-d cost -----
1747
111k
        if ((sps.LFNST ? (modeId == EndMTS && modeId != 0 && checkTransformSkip) : (trModes[modeId].first != 0)) && !TU::getCbfAtDepth(tu, COMP_Y, currDepth))
1748
0
        {
1749
0
          singleCostTmp = MAX_DOUBLE;
1750
0
        }
1751
111k
        else
1752
111k
        {
1753
111k
          m_ispTestedModes[0].IspType      = TU_NO_ISP;
1754
111k
          m_ispTestedModes[0].subTuCounter = -1;
1755
111k
          singleTmpFracBits = xGetIntraFracBitsQT(cs, partitioner, true, &cuCtx);
1756
1757
111k
          if (tu.mtsIdx[COMP_Y] > MTS_SKIP)
1758
0
          {
1759
0
            if (!cuCtx.mtsLastScanPos)
1760
0
            {
1761
0
              singleCostTmp = MAX_DOUBLE;
1762
0
            }
1763
0
            else
1764
0
            {
1765
0
              singleCostTmp = m_pcRdCost->calcRdCost(singleTmpFracBits, singleDistTmpLuma);
1766
0
            }
1767
0
          }
1768
111k
          else
1769
111k
          {
1770
111k
            singleCostTmp = m_pcRdCost->calcRdCost(singleTmpFracBits, singleDistTmpLuma);
1771
111k
          }
1772
111k
        }
1773
1774
111k
          if (((EndMTS && (m_pcEncCfg->m_MTS == 2)) || rapidLFNST) && modeId == 0 && lfnstIdx == 0)
1775
0
          {
1776
0
            if (singleCostTmp > bestCostSoFar * thresholdDCT)
1777
0
            {
1778
0
              EndMTS = 0;
1779
1780
0
              if (rapidDCT)
1781
0
              {
1782
0
                endLfnstIdx = 0;   // break the loop but do not cpy best
1783
0
              }
1784
0
            }
1785
0
          }
1786
1787
111k
          if (lfnstIdx && !cuCtx.lfnstLastScanPos && !cu.ispMode)
1788
56.1k
          {
1789
56.1k
            bool rootCbfL = false;
1790
1791
224k
            for (uint32_t t = 0; t < getNumberValidTBlocks(*cu.cs->pcv); t++)
1792
168k
            {
1793
168k
              rootCbfL |= tu.cbf[t] != 0;
1794
168k
            }
1795
1796
56.1k
            if (rapidLFNST && !rootCbfL)
1797
0
            {
1798
0
              endLfnstIdx = lfnstIdx; // break the loop
1799
0
            }
1800
56.1k
            bool cbfAtZeroDepth = CU::isSepTree(cu)
1801
56.1k
              ? rootCbfL
1802
56.1k
              : (cs.area.chromaFormat != CHROMA_400 && std::min(cu.firstTU->blocks[1].width, cu.firstTU->blocks[1].height) < 4)
1803
1
                ? TU::getCbfAtDepth(tu, COMP_Y, currDepth)
1804
1
                : rootCbfL;
1805
1806
56.1k
            if (cbfAtZeroDepth)
1807
408
            {
1808
408
              singleCostTmp = MAX_DOUBLE;
1809
408
            }
1810
56.1k
          }
1811
111k
        }
1812
1813
127k
        if (singleCostTmp < dSingleCost)
1814
45.9k
        {
1815
45.9k
          trGrpBestCost  = singleCostTmp;
1816
45.9k
          dSingleCost    = singleCostTmp;
1817
45.9k
          singleDistLuma = singleDistTmpLuma;
1818
45.9k
          singleFracBits = singleTmpFracBits;
1819
45.9k
          bestLfnstIdx   = lfnstIdx;
1820
45.9k
          bestMTS        = modeId;
1821
1822
45.9k
          if (dSingleCost < bestCostForISP)
1823
29.1k
          {
1824
29.1k
            bestCostForISP = dSingleCost;
1825
29.1k
          }
1826
1827
45.9k
          splitCbfLumaSum = splitCbfLuma;
1828
1829
45.9k
          if (lfnstIdx == 0 && modeId == 0 && cu.ispMode == 0)
1830
44.5k
          {
1831
44.5k
            dct2Cost = singleCostTmp;
1832
1833
44.5k
            if (!TU::getCbfAtDepth(tu, COMP_Y, currDepth))
1834
37.7k
            {
1835
37.7k
              if (rapidLFNST)
1836
0
              {
1837
0
                 endLfnstIdx = 0;   // break the loop but do not cpy best
1838
0
              }
1839
1840
37.7k
              EndMTS = 0;
1841
37.7k
            }
1842
44.5k
          }
1843
1844
45.9k
          if (bestLfnstIdx != endLfnstIdx || bestMTS != EndMTS)
1845
34.7k
          {
1846
34.7k
            if (cu.ispMode)
1847
1.09k
            {
1848
1.09k
              saveCS.getRecoBuf(currArea.Y()).copyFrom(cs.getRecoBuf(currArea.Y()));
1849
1850
5.46k
              for (uint32_t j = 0; j < cs.tus.size(); j++)
1851
4.36k
              {
1852
4.36k
                saveCS.tus[j]->copyComponentFrom(*cs.tus[j], COMP_Y);
1853
4.36k
              }
1854
1.09k
            }
1855
33.6k
            else
1856
33.6k
            {
1857
33.6k
              saveCS.getPredBuf(tu.Y()).copyFrom(cs.getPredBuf(tu.Y()));
1858
33.6k
              saveCS.getRecoBuf(tu.Y()).copyFrom(cs.getRecoBuf(tu.Y()));
1859
1860
33.6k
              tmpTU->copyComponentFrom(tu, COMP_Y);
1861
33.6k
            }
1862
1863
34.7k
            ctxBest = m_CABACEstimator->getCtx();
1864
34.7k
          }
1865
      
1866
45.9k
        }
1867
81.4k
        else
1868
81.4k
        {
1869
81.4k
          if( rapidLFNST )
1870
0
          {
1871
0
            endLfnstIdx = lfnstIdx; // break the loop
1872
0
          }
1873
81.4k
        }
1874
127k
      }
1875
49.8k
      if (m_pcEncCfg->m_LFNST && m_pcEncCfg->m_MTS == 2 && modeId && modeId != EndMTS)
1876
0
      {
1877
0
        NStopMTS = false;
1878
1879
0
        if (bestMTS || bestLfnstIdx)
1880
0
        {
1881
0
          if ((modeId > 1 && bestMTS == modeId) || modeId == 1)
1882
0
          {
1883
0
            NStopMTS = (dct2Cost / trGrpBestCost) < trGrpStopThreshold;
1884
0
          }
1885
0
        }
1886
0
      }
1887
49.8k
    }
1888
1889
49.8k
    cu.lfnstIdx = bestLfnstIdx;
1890
49.8k
    if (dSingleCost != MAX_DOUBLE)
1891
45.4k
    {
1892
45.4k
      if (bestLfnstIdx != endLfnstIdx || bestMTS != EndMTS)
1893
34.2k
      {
1894
34.2k
        if (cu.ispMode)
1895
766
        {
1896
766
          const UnitArea& currArea = partitioner.currArea();
1897
766
          cs.getRecoBuf(currArea.Y()).copyFrom(saveCS.getRecoBuf(currArea.Y()));
1898
1899
766
          if (saveCS.tus.size() != cs.tus.size())
1900
0
          {
1901
0
            partitioner.splitCurrArea(ispType, cs);
1902
1903
0
            do
1904
0
            {
1905
0
              partitioner.nextPart(cs);
1906
0
              cs.addTU(CS::getArea(cs, partitioner.currArea(), partitioner.chType, partitioner.treeType),
1907
0
                partitioner.chType, cs.cus[0]);
1908
0
            } while (saveCS.tus.size() != cs.tus.size());
1909
1910
0
            partitioner.exitCurrSplit();
1911
0
          }
1912
1913
3.83k
          for (uint32_t j = 0; j < saveCS.tus.size(); j++)
1914
3.06k
          {
1915
3.06k
            cs.tus[j]->copyComponentFrom(*saveCS.tus[j], COMP_Y);
1916
3.06k
          }
1917
766
        }
1918
33.4k
        else
1919
33.4k
        {
1920
33.4k
          cs.getRecoBuf(tu.Y()).copyFrom(saveCS.getRecoBuf(tu.Y()));
1921
1922
33.4k
          tu.copyComponentFrom(*tmpTU, COMP_Y);
1923
33.4k
        }
1924
1925
34.2k
        m_CABACEstimator->getCtx() = ctxBest;
1926
34.2k
      }
1927
1928
      // otherwise this would've happened in useSubStructure
1929
45.4k
      cs.picture->getRecoBuf(currArea.Y()).copyFrom(cs.getRecoBuf(currArea.Y()));
1930
45.4k
    }
1931
49.8k
  }
1932
74.6k
  else
1933
74.6k
  {
1934
74.6k
    if (cu.ispMode)
1935
302
    {
1936
302
      const PartSplit ispType = CU::getISPType(cu, COMP_Y);
1937
302
      partitioner.splitCurrArea(ispType, cs);
1938
1939
302
      CUCtx      cuCtx;
1940
302
      dSingleCost = xTestISP(cs, partitioner, bestCostForISP, ispType, splitCbfLumaSum, singleFracBits, singleDistLuma, cuCtx);
1941
302
      partitioner.exitCurrSplit();
1942
302
      bool storeCost = (numMode == 1) ? true : false;
1943
302
      if ((m_pcEncCfg->m_ISP >= 2) && (numMode <= 1))
1944
302
      {
1945
302
        storeCost = true;
1946
302
      }
1947
302
      if (storeCost)
1948
302
      {
1949
302
        m_ispTestedModes[0].bestCost[cu.ispMode - 1] = dSingleCost;
1950
302
      }
1951
302
    }
1952
74.3k
    else
1953
74.3k
    {
1954
74.3k
      TransformUnit& tu =
1955
74.3k
        cs.addTU(CS::getArea(cs, currArea, partitioner.chType, partitioner.treeType), partitioner.chType, cs.cus[0]);
1956
74.3k
      tu.depth = currDepth;
1957
1958
74.3k
      CHECK(!tu.Y().valid(), "Invalid TU");
1959
74.3k
      xIntraCodingTUBlock(tu, COMP_Y, false, singleDistLuma, &numSig, predBuf);
1960
      //----- determine rate and r-d cost -----
1961
74.3k
      m_ispTestedModes[0].IspType = TU_NO_ISP;
1962
74.3k
      m_ispTestedModes[0].subTuCounter = -1;
1963
74.3k
      singleFracBits = xGetIntraFracBitsQT(cs, partitioner, true);
1964
74.3k
      dSingleCost = m_pcRdCost->calcRdCost(singleFracBits, singleDistLuma);
1965
74.3k
    }
1966
74.6k
  }
1967
1968
124k
  if (cu.ispMode)
1969
5.63k
  { 
1970
5.63k
    for (auto& ptu : cs.tus)
1971
9.01k
    {
1972
9.01k
      if (currArea.Y().contains(ptu->Y()))
1973
9.01k
      {
1974
9.01k
        TU::setCbfAtDepth(*ptu, COMP_Y, currDepth, splitCbfLumaSum ? 1 : 0);
1975
9.01k
      }
1976
9.01k
    }
1977
5.63k
  }
1978
124k
  cs.dist     += singleDistLuma;
1979
124k
  cs.fracBits += singleFracBits;
1980
124k
  cs.cost      = dSingleCost;
1981
1982
124k
  STAT_COUNT_CU_MODES( partitioner.chType == CH_L, g_cuCounters1D[CU_RD_TESTS][0][!cs.slice->isIntra() + cs.slice->depth] );
1983
124k
  STAT_COUNT_CU_MODES( partitioner.chType == CH_L && !cs.slice->isIntra(), g_cuCounters2D[CU_RD_TESTS][Log2( cs.area.lheight() )][Log2( cs.area.lwidth() )] );
1984
124k
}
1985
1986
ChromaCbfs IntraSearch::xIntraChromaCodingQT(CodingStructure& cs, Partitioner& partitioner)
1987
301k
{
1988
301k
  UnitArea    currArea      = partitioner.currArea();
1989
1990
301k
  if( !currArea.Cb().valid() ) 
1991
0
    return ChromaCbfs(false);
1992
1993
301k
  TransformUnit& currTU     = *cs.getTU( currArea.chromaPos(), CH_C );
1994
301k
  const CodingUnit& cu  = *cs.getCU( currArea.chromaPos(), CH_C, TREE_D );
1995
301k
  ChromaCbfs cbfs(false);
1996
301k
  uint32_t   currDepth = partitioner.currTrDepth;
1997
301k
  const bool useTS = cs.picture->useTS;
1998
301k
  if (currDepth == currTU.depth)
1999
301k
  {
2000
301k
    if (!currArea.Cb().valid() || !currArea.Cr().valid())
2001
0
    {
2002
0
      return cbfs;
2003
0
    }
2004
2005
301k
    CodingStructure& saveCS = *m_pSaveCS[1];
2006
301k
    saveCS.pcv = cs.pcv;
2007
301k
    saveCS.picture = cs.picture;
2008
301k
    saveCS.area.repositionTo(cs.area);
2009
2010
301k
    TransformUnit& tmpTU = saveCS.tus.empty() ? saveCS.addTU(currArea, partitioner.chType, nullptr) : *saveCS.tus.front();
2011
301k
    tmpTU.initData();
2012
301k
    tmpTU.UnitArea::operator=(currArea);
2013
301k
    const unsigned      numTBlocks = getNumberValidTBlocks(*cs.pcv);
2014
2015
301k
    CompArea& cbArea = currTU.blocks[COMP_Cb];
2016
301k
    CompArea& crArea = currTU.blocks[COMP_Cr];
2017
301k
    double     bestCostCb = MAX_DOUBLE;
2018
301k
    double     bestCostCr = MAX_DOUBLE;
2019
301k
    Distortion bestDistCb = 0;
2020
301k
    Distortion bestDistCr = 0;
2021
2022
301k
    TempCtx ctxStartTU(m_CtxCache);
2023
301k
    TempCtx ctxStart(m_CtxCache);
2024
301k
    TempCtx ctxBest(m_CtxCache);
2025
2026
301k
    ctxStartTU = m_CABACEstimator->getCtx();
2027
301k
    ctxStart = m_CABACEstimator->getCtx();
2028
301k
    currTU.jointCbCr = 0;
2029
2030
    // Do predictions here to avoid repeating the "default0Save1Load2" stuff
2031
301k
    int  predMode = cu.bdpcmM[CH_C] ? BDPCM_IDX : CU::getFinalIntraMode(cu, CH_C);
2032
2033
301k
    PelBuf piPredCb = cs.getPredBuf(COMP_Cb);
2034
301k
    PelBuf piPredCr = cs.getPredBuf(COMP_Cr);
2035
2036
301k
    initIntraPatternChType(*currTU.cu, cbArea);
2037
301k
    initIntraPatternChType(*currTU.cu, crArea);
2038
2039
301k
    if (CU::isLMCMode(predMode))
2040
22.1k
    {
2041
22.1k
      loadLMLumaRecPels(cu, cbArea);
2042
22.1k
      predIntraChromaLM(COMP_Cb, piPredCb, cu, cbArea, predMode);
2043
22.1k
      predIntraChromaLM(COMP_Cr, piPredCr, cu, crArea, predMode);
2044
22.1k
    }
2045
279k
    else
2046
279k
    {
2047
279k
      predIntraAng(COMP_Cb, piPredCb, cu);
2048
279k
      predIntraAng(COMP_Cr, piPredCr, cu);
2049
279k
    }
2050
2051
    // determination of chroma residuals including reshaping and cross-component prediction
2052
    //----- get chroma residuals -----
2053
301k
    PelBuf resiCb = cs.getResiBuf(COMP_Cb);
2054
301k
    PelBuf resiCr = cs.getResiBuf(COMP_Cr);
2055
301k
    resiCb.subtract(cs.getOrgBuf(COMP_Cb), piPredCb);
2056
301k
    resiCr.subtract(cs.getOrgBuf(COMP_Cr), piPredCr);
2057
2058
    //----- get reshape parameter ----
2059
301k
    ReshapeData& reshapeData = cs.picture->reshapeData;
2060
301k
    bool doReshaping = (cs.picHeader->lmcsEnabled && cs.picHeader->lmcsChromaResidualScale && (cs.slice->isIntra() || reshapeData.getCTUFlag()) && (cbArea.width * cbArea.height > 4));
2061
301k
    if (doReshaping)
2062
0
    {
2063
0
      const Area area = currTU.Y().valid() ? currTU.Y() : Area(recalcPosition(currTU.chromaFormat, currTU.chType, CH_L, currTU.blocks[currTU.chType].pos()), recalcSize(currTU.chromaFormat, currTU.chType, CH_L, currTU.blocks[currTU.chType].size()));
2064
0
      const CompArea& areaY = CompArea(COMP_Y, currTU.chromaFormat, area);
2065
0
      currTU.chromaAdj = reshapeData.calculateChromaAdjVpduNei(currTU, areaY, currTU.cu->treeType);
2066
0
    }
2067
2068
    //===== store original residual signals (std and crossCompPred) =====
2069
1.81M
    for( int k = 0; k < 5; k++ )
2070
1.50M
    {
2071
1.50M
      m_orgResiCb[k].compactResize( cbArea );
2072
1.50M
      m_orgResiCr[k].compactResize( crArea );
2073
1.50M
    }
2074
603k
    for (int k = 0; k < 1; k += 4)
2075
301k
    {
2076
301k
      m_orgResiCb[k].copyFrom(resiCb);
2077
301k
      m_orgResiCr[k].copyFrom(resiCr);
2078
2079
301k
      if (doReshaping)
2080
0
      {
2081
0
        int cResScaleInv = currTU.chromaAdj;
2082
0
        m_orgResiCb[k].scaleSignal(cResScaleInv, 1, cs.slice->clpRngs[COMP_Cb]);
2083
0
        m_orgResiCr[k].scaleSignal(cResScaleInv, 1, cs.slice->clpRngs[COMP_Cr]);
2084
0
      }
2085
301k
    }
2086
2087
301k
    CUCtx cuCtx;
2088
301k
    cuCtx.isDQPCoded = true;
2089
301k
    cuCtx.isChromaQpAdjCoded = true;
2090
301k
    cuCtx.lfnstLastScanPos = false;
2091
2092
301k
    CodingStructure& saveCScur = *m_pSaveCS[2];
2093
2094
301k
    saveCScur.pcv = cs.pcv;
2095
301k
    saveCScur.picture = cs.picture;
2096
301k
    saveCScur.area.repositionTo(cs.area);
2097
2098
301k
    TransformUnit& tmpTUcur = saveCScur.tus.empty() ? saveCScur.addTU(currArea, partitioner.chType, nullptr) : *saveCScur.tus.front();
2099
301k
    tmpTUcur.initData();
2100
301k
    tmpTUcur.UnitArea::operator=(currArea);
2101
2102
301k
    TempCtx ctxBestTUL(m_CtxCache);
2103
2104
301k
    const SPS& sps = *cs.sps;
2105
301k
    double     bestCostCbcur = MAX_DOUBLE;
2106
301k
    double     bestCostCrcur = MAX_DOUBLE;
2107
301k
    Distortion bestDistCbcur = 0;
2108
301k
    Distortion bestDistCrcur = 0;
2109
2110
301k
    int  endLfnstIdx = (partitioner.isSepTree(cs) && partitioner.chType == CH_C && (partitioner.currArea().lwidth() < 8 || partitioner.currArea().lheight() < 8))
2111
288k
      || (partitioner.currArea().lwidth() > sps.getMaxTbSize() || partitioner.currArea().lheight() > sps.getMaxTbSize()) || !sps.LFNST ? 0 : 2;
2112
301k
    int  startLfnstIdx = 0;
2113
301k
    int  bestLfnstIdx = 0;
2114
301k
    bool testLFNST = sps.LFNST;
2115
2116
    // speedUps LFNST
2117
301k
    bool rapidLFNST = false;
2118
301k
    if (m_pcEncCfg->m_LFNST > 1)
2119
0
    {
2120
0
      rapidLFNST = true;
2121
0
      if (m_pcEncCfg->m_LFNST > 2)
2122
0
      {
2123
0
        endLfnstIdx = endLfnstIdx ? 1 : 0;
2124
0
      }
2125
0
    }
2126
301k
    int ts_used = 0;
2127
301k
    bool testTS = false;
2128
301k
    if (partitioner.chType != CH_C)
2129
0
    {
2130
0
      startLfnstIdx = currTU.cu->lfnstIdx;
2131
0
      endLfnstIdx = currTU.cu->lfnstIdx;
2132
0
      bestLfnstIdx = currTU.cu->lfnstIdx;
2133
0
      testLFNST  = false;
2134
0
      rapidLFNST = false;
2135
0
      ts_used = currTU.mtsIdx[COMP_Y];
2136
0
    }
2137
301k
    if (cu.bdpcmM[CH_C])
2138
40.3k
    {
2139
40.3k
      endLfnstIdx = 0;
2140
40.3k
      testLFNST = false;
2141
40.3k
    }
2142
2143
301k
    double dSingleCostAll = MAX_DOUBLE;
2144
301k
    double singleCostTmpAll = 0;
2145
2146
1.10M
    for (int lfnstIdx = startLfnstIdx; lfnstIdx <= endLfnstIdx; lfnstIdx++)
2147
802k
    {
2148
802k
      if (rapidLFNST && lfnstIdx)
2149
0
      {
2150
0
        if ((lfnstIdx == 2) && (bestLfnstIdx == 0))
2151
0
        {
2152
0
          continue;
2153
0
        }
2154
0
      }
2155
2156
802k
      currTU.cu->lfnstIdx = lfnstIdx;
2157
802k
      if (lfnstIdx)
2158
500k
      {
2159
500k
        m_CABACEstimator->getCtx() = ctxStartTU;
2160
500k
      }
2161
2162
802k
      cuCtx.lfnstLastScanPos = false;
2163
802k
      cuCtx.violatesLfnstConstrained[CH_L] = false;
2164
802k
      cuCtx.violatesLfnstConstrained[CH_C] = false;
2165
2166
2.40M
      for (uint32_t c = COMP_Cb; c < numTBlocks; c++)
2167
1.60M
      {
2168
1.60M
        const ComponentID compID = ComponentID(c);
2169
1.60M
        const CompArea& area = currTU.blocks[compID];
2170
1.60M
        double     dSingleCost = MAX_DOUBLE;
2171
1.60M
        Distortion singleDistCTmp = 0;
2172
1.60M
        double     singleCostTmp = 0;
2173
1.60M
        bool tsAllowed = useTS && TU::isTSAllowed(currTU, compID) && m_pcEncCfg->m_useChromaTS && !currTU.cu->lfnstIdx && !cu.bdpcmM[CH_C];
2174
1.60M
        if ((partitioner.chType == CH_L) && (!ts_used))
2175
0
        {
2176
0
          tsAllowed = false;
2177
0
        }
2178
1.60M
        uint8_t nNumTransformCands = 1 + (tsAllowed ? 1 : 0); // DCT + TS = 2 tests       
2179
1.60M
        std::vector<TrMode> trModes;
2180
1.60M
        if (nNumTransformCands > 1)
2181
0
        {
2182
0
          trModes.push_back(TrMode(0, true));   // DCT2
2183
0
          trModes.push_back(TrMode(1, true));   // TS
2184
0
          testTS = true;
2185
0
        }
2186
1.60M
        bool cbfDCT2 = true;
2187
18.4E
        const bool isLastMode = testLFNST || cs.sps->jointCbCr ||  tsAllowed ? false : true;
2188
1.60M
        int bestModeId = 0;
2189
1.60M
        ctxStart = m_CABACEstimator->getCtx();
2190
3.21M
        for (int modeId = 0; modeId < nNumTransformCands; modeId++)
2191
1.60M
        {
2192
1.60M
          if (doReshaping || lfnstIdx || modeId)
2193
1.00M
          {
2194
1.00M
            resiCb.copyFrom(m_orgResiCb[0]);
2195
1.00M
            resiCr.copyFrom(m_orgResiCr[0]);
2196
1.00M
          }
2197
1.60M
          if (modeId == 0)
2198
1.60M
          {
2199
1.60M
            if ( tsAllowed)
2200
0
            {
2201
0
              xPreCheckMTS(currTU, &trModes, m_pcEncCfg->m_MTSIntraMaxCand, 0, compID);
2202
0
            }
2203
1.60M
          }
2204
2205
1.60M
          currTU.mtsIdx[compID] = currTU.cu->bdpcmM[CH_C] ? MTS_SKIP : modeId;
2206
2207
1.60M
          if (modeId)
2208
0
          {
2209
0
            if (!cbfDCT2 && trModes[modeId].first == MTS_SKIP)
2210
0
            {
2211
0
              break;
2212
0
            }
2213
0
            m_CABACEstimator->getCtx() = ctxStart;
2214
0
          }
2215
1.60M
          singleDistCTmp = 0;
2216
1.60M
          if (tsAllowed)
2217
0
          {
2218
0
            xIntraCodingTUBlock(currTU, compID, false, singleDistCTmp, 0, 0, true);
2219
0
            if ((modeId == 0) && (!trModes[modeId + 1].second))
2220
0
            {
2221
0
              nNumTransformCands = 1;
2222
0
            }
2223
0
          }
2224
1.60M
          else
2225
1.60M
        {
2226
1.60M
          xIntraCodingTUBlock(currTU, compID, false, singleDistCTmp);
2227
1.60M
        }
2228
1.60M
        if (((currTU.mtsIdx[compID] == MTS_SKIP && !currTU.cu->bdpcmM[CH_C])
2229
0
          && !TU::getCbf(currTU, compID)))   // In order not to code TS flag when cbf is zero, the case for TS with
2230
                                             // cbf being zero is forbidden.
2231
0
        {
2232
0
          singleCostTmp = MAX_DOUBLE;
2233
0
        }
2234
1.60M
        else
2235
1.60M
        {
2236
1.60M
          uint64_t fracBitsTmp = xGetIntraFracBitsQTChroma(currTU, compID, &cuCtx);
2237
1.60M
          singleCostTmp = m_pcRdCost->calcRdCost(fracBitsTmp, singleDistCTmp);
2238
1.60M
        }
2239
2240
1.60M
        if (singleCostTmp < dSingleCost)
2241
1.60M
        {
2242
1.60M
          dSingleCost = singleCostTmp;
2243
2244
1.60M
          if (compID == COMP_Cb)
2245
802k
          {
2246
802k
            bestCostCb = singleCostTmp;
2247
802k
            bestDistCb = singleDistCTmp;
2248
802k
          }
2249
802k
          else
2250
802k
          {
2251
802k
            bestCostCr = singleCostTmp;
2252
802k
            bestDistCr = singleDistCTmp;
2253
802k
          }
2254
1.60M
          bestModeId = modeId;
2255
1.60M
          if (currTU.mtsIdx[compID] == MTS_DCT2_DCT2)
2256
1.52M
          {
2257
1.52M
            cbfDCT2 = TU::getCbfAtDepth(currTU, compID, currDepth);
2258
1.52M
          }
2259
1.60M
          if (!isLastMode)
2260
1.60M
          {
2261
1.60M
            saveCS.getRecoBuf(area).copyFrom(cs.getRecoBuf(area));
2262
1.60M
            tmpTU.copyComponentFrom(currTU, compID);
2263
1.60M
            ctxBest = m_CABACEstimator->getCtx();
2264
1.60M
          }
2265
1.60M
        }
2266
1.60M
        }
2267
1.60M
        if (testTS && ((c == COMP_Cb && bestModeId < (nNumTransformCands - 1)) ))
2268
0
        {
2269
0
          m_CABACEstimator->getCtx() = ctxBest;
2270
2271
0
          currTU.copyComponentFrom(tmpTU, COMP_Cb); // Cbf of Cb is needed to estimate cost for Cr Cbf
2272
0
        }
2273
1.60M
      }
2274
2275
802k
      singleCostTmpAll = bestCostCb + bestCostCr;
2276
2277
802k
      bool rootCbfL = false;
2278
802k
      if (testLFNST)
2279
762k
      {
2280
3.04M
        for (uint32_t t = 0; t < getNumberValidTBlocks(*cs.pcv); t++)
2281
2.28M
        {
2282
2.28M
          rootCbfL |= bool(tmpTU.cbf[t]);
2283
2.28M
        }
2284
762k
        if (rapidLFNST && !rootCbfL)
2285
0
        {
2286
0
          endLfnstIdx = lfnstIdx; // end this
2287
0
        }
2288
762k
      }
2289
2290
802k
      if (testLFNST && lfnstIdx && !cuCtx.lfnstLastScanPos)
2291
330k
      {
2292
330k
        bool cbfAtZeroDepth = CU::isSepTree(*currTU.cu)
2293
330k
          ? rootCbfL : (cs.area.chromaFormat != CHROMA_400
2294
0
            && std::min(tmpTU.blocks[1].width, tmpTU.blocks[1].height) < 4)
2295
0
          ? TU::getCbfAtDepth(currTU, COMP_Y, currTU.depth) : rootCbfL;
2296
330k
        if (cbfAtZeroDepth)
2297
1.61k
        {
2298
1.61k
          singleCostTmpAll = MAX_DOUBLE;
2299
1.61k
        }
2300
330k
      }
2301
802k
      if ((testLFNST || testTS) && (singleCostTmpAll < dSingleCostAll))
2302
261k
      {
2303
261k
        bestLfnstIdx = lfnstIdx;
2304
261k
        if ((lfnstIdx != endLfnstIdx) || testTS)
2305
250k
        {
2306
250k
          dSingleCostAll = singleCostTmpAll;
2307
2308
250k
          bestCostCbcur = bestCostCb;
2309
250k
          bestCostCrcur = bestCostCr;
2310
250k
          bestDistCbcur = bestDistCb;
2311
250k
          bestDistCrcur = bestDistCr;
2312
2313
250k
          saveCScur.getRecoBuf(cbArea).copyFrom(saveCS.getRecoBuf(cbArea));
2314
250k
          saveCScur.getRecoBuf(crArea).copyFrom(saveCS.getRecoBuf(crArea));
2315
2316
250k
          tmpTUcur.copyComponentFrom(tmpTU, COMP_Cb);
2317
250k
          tmpTUcur.copyComponentFrom(tmpTU, COMP_Cr);
2318
250k
        }
2319
261k
        ctxBestTUL = m_CABACEstimator->getCtx();
2320
261k
      }
2321
802k
    }
2322
301k
    if ((testLFNST && (bestLfnstIdx != endLfnstIdx)) || testTS)
2323
250k
    {
2324
250k
      bestCostCb = bestCostCbcur;
2325
250k
      bestCostCr = bestCostCrcur;
2326
250k
      bestDistCb = bestDistCbcur;
2327
250k
      bestDistCr = bestDistCrcur;
2328
250k
      currTU.cu->lfnstIdx = bestLfnstIdx;
2329
250k
      if (!cs.sps->jointCbCr)
2330
0
      {
2331
0
        cs.getRecoBuf(cbArea).copyFrom(saveCScur.getRecoBuf(cbArea));
2332
0
        cs.getRecoBuf(crArea).copyFrom(saveCScur.getRecoBuf(crArea));
2333
2334
0
        currTU.copyComponentFrom(tmpTUcur, COMP_Cb);
2335
0
        currTU.copyComponentFrom(tmpTUcur, COMP_Cr);
2336
2337
0
        m_CABACEstimator->getCtx() = ctxBestTUL;
2338
0
      }
2339
250k
    }
2340
2341
301k
    Distortion bestDistCbCr = bestDistCb + bestDistCr;
2342
2343
301k
    if (cs.sps->jointCbCr)
2344
301k
    {
2345
301k
      if ((testLFNST && (bestLfnstIdx != endLfnstIdx)) || testTS)
2346
250k
      {
2347
250k
        saveCS.getRecoBuf(cbArea).copyFrom(saveCScur.getRecoBuf(cbArea));
2348
250k
        saveCS.getRecoBuf(crArea).copyFrom(saveCScur.getRecoBuf(crArea));
2349
2350
250k
        tmpTU.copyComponentFrom(tmpTUcur, COMP_Cb);
2351
250k
        tmpTU.copyComponentFrom(tmpTUcur, COMP_Cr);
2352
250k
        m_CABACEstimator->getCtx() = ctxBestTUL;
2353
250k
        ctxBest = m_CABACEstimator->getCtx();
2354
250k
      }
2355
      // Test using joint chroma residual coding
2356
301k
      double     bestCostCbCr = bestCostCb + bestCostCr;
2357
301k
      int        bestJointCbCr = 0;
2358
301k
      bool checkDCTOnly = m_pcEncCfg->m_useChromaTS && ((TU::getCbf(tmpTU, COMP_Cb) && tmpTU.mtsIdx[COMP_Cb] == MTS_DCT2_DCT2 && !TU::getCbf(tmpTU, COMP_Cr)) ||
2359
0
        (TU::getCbf(tmpTU, COMP_Cr) && tmpTU.mtsIdx[COMP_Cr] == MTS_DCT2_DCT2 && !TU::getCbf(tmpTU, COMP_Cb)) ||
2360
0
        (TU::getCbf(tmpTU, COMP_Cb) && tmpTU.mtsIdx[COMP_Cb] == MTS_DCT2_DCT2 && TU::getCbf(tmpTU, COMP_Cr) && tmpTU.mtsIdx[COMP_Cr] == MTS_DCT2_DCT2));
2361
301k
      bool checkTSOnly = m_pcEncCfg->m_useChromaTS && ((TU::getCbf(tmpTU, COMP_Cb) && tmpTU.mtsIdx[COMP_Cb] == MTS_SKIP && !TU::getCbf(tmpTU, COMP_Cr)) ||
2362
0
        (TU::getCbf(tmpTU, COMP_Cr) && tmpTU.mtsIdx[COMP_Cr] == MTS_SKIP && !TU::getCbf(tmpTU, COMP_Cb)) ||
2363
0
        (TU::getCbf(tmpTU, COMP_Cb) && tmpTU.mtsIdx[COMP_Cb] == MTS_SKIP && TU::getCbf(tmpTU, COMP_Cr) && tmpTU.mtsIdx[COMP_Cr] == MTS_SKIP));
2364
301k
      bool       lastIsBest = false;
2365
301k
      bool noLFNST1 = false;
2366
301k
      if (rapidLFNST && (startLfnstIdx != endLfnstIdx))
2367
0
      {
2368
0
        if (bestLfnstIdx == 2)
2369
0
        {
2370
0
          noLFNST1 = true;
2371
0
        }
2372
0
        else
2373
0
        {
2374
0
          endLfnstIdx = 1;
2375
0
        }
2376
0
      }
2377
2378
1.10M
      for (int lfnstIdxj = startLfnstIdx; lfnstIdxj <= endLfnstIdx; lfnstIdxj++)
2379
802k
      {
2380
802k
        if (rapidLFNST && noLFNST1 && (lfnstIdxj == 1))
2381
0
        {
2382
0
          continue;
2383
0
        }
2384
802k
        currTU.cu->lfnstIdx = lfnstIdxj;
2385
802k
        std::vector<int> jointCbfMasksToTest;
2386
802k
        if (TU::getCbf(tmpTU, COMP_Cb) || TU::getCbf(tmpTU, COMP_Cr))
2387
283k
        {
2388
283k
          jointCbfMasksToTest = m_pcTrQuant->selectICTCandidates(currTU, m_orgResiCb, m_orgResiCr);
2389
283k
        }
2390
802k
        for (int cbfMask : jointCbfMasksToTest)
2391
283k
        {
2392
283k
          currTU.jointCbCr = (uint8_t)cbfMask;
2393
283k
          ComponentID codeCompId = ((currTU.jointCbCr >> 1) ? COMP_Cb : COMP_Cr);
2394
283k
          ComponentID otherCompId = ((codeCompId == COMP_Cb) ? COMP_Cr : COMP_Cb);
2395
283k
          bool tsAllowed = useTS && TU::isTSAllowed(currTU, codeCompId) && (m_pcEncCfg->m_useChromaTS) && !currTU.cu->lfnstIdx && !cu.bdpcmM[CH_C];
2396
283k
          if ((partitioner.chType == CH_L)&& tsAllowed && (currTU.mtsIdx[COMP_Y] != MTS_SKIP))
2397
0
          {
2398
0
            tsAllowed = false;
2399
0
          }
2400
283k
          if (!tsAllowed)
2401
283k
          {
2402
283k
            checkTSOnly = false;
2403
283k
          }
2404
283k
          uint8_t     numTransformCands = 1 + (tsAllowed && !(checkDCTOnly || checkTSOnly)? 1 : 0); // DCT + TS = 2 tests
2405
283k
          std::vector<TrMode> trModes;
2406
283k
          if (numTransformCands > 1)
2407
0
          {
2408
0
            trModes.push_back(TrMode(0, true)); // DCT2
2409
0
            trModes.push_back(TrMode(1, true));//TS
2410
0
          }
2411
283k
          else
2412
283k
          {
2413
283k
            currTU.mtsIdx[codeCompId] = checkTSOnly || currTU.cu->bdpcmM[CH_C] ? 1 : 0;
2414
283k
          }
2415
2416
566k
          for (int modeId = 0; modeId < numTransformCands; modeId++)
2417
283k
          {
2418
283k
            Distortion distTmp = 0;
2419
283k
            currTU.mtsIdx[codeCompId] = currTU.cu->bdpcmM[CH_C] ? MTS_SKIP : MTS_DCT2_DCT2;
2420
283k
            if (numTransformCands > 1)
2421
0
            {
2422
0
              currTU.mtsIdx[codeCompId] = currTU.cu->bdpcmM[CH_C] ? MTS_SKIP : trModes[modeId].first;
2423
0
            }
2424
283k
            currTU.mtsIdx[otherCompId] = MTS_DCT2_DCT2;
2425
2426
283k
            m_CABACEstimator->getCtx() = ctxStartTU;
2427
2428
283k
            resiCb.copyFrom(m_orgResiCb[cbfMask]);
2429
283k
            resiCr.copyFrom(m_orgResiCr[cbfMask]);
2430
283k
            if ((modeId == 0) && (numTransformCands > 1))
2431
0
            {
2432
0
              xPreCheckMTS(currTU, &trModes, m_pcEncCfg->m_MTSIntraMaxCand, 0, COMP_Cb);
2433
0
              currTU.mtsIdx[codeCompId] = trModes[modeId].first;
2434
0
              currTU.mtsIdx[(codeCompId == COMP_Cr) ? COMP_Cb : COMP_Cr] = MTS_DCT2_DCT2;
2435
0
            }
2436
283k
            cuCtx.lfnstLastScanPos = false;
2437
283k
            cuCtx.violatesLfnstConstrained[CH_L] = false;
2438
283k
            cuCtx.violatesLfnstConstrained[CH_C] = false;
2439
283k
            if (numTransformCands > 1)
2440
0
            {
2441
0
              xIntraCodingTUBlock(currTU, COMP_Cb, false, distTmp, 0, 0, true);
2442
0
              if ((modeId == 0) && !trModes[modeId + 1].second)
2443
0
              {
2444
0
                numTransformCands = 1;
2445
0
              }
2446
0
            }
2447
283k
            else
2448
283k
            {
2449
283k
              xIntraCodingTUBlock(currTU, COMP_Cb, false, distTmp, 0);
2450
283k
            }
2451
2452
283k
            double costTmp = std::numeric_limits<double>::max();
2453
283k
            if (distTmp < MAX_DISTORTION)
2454
279k
            {
2455
279k
              uint64_t bits = xGetIntraFracBitsQTChroma(currTU, COMP_Cb, &cuCtx);
2456
279k
              costTmp = m_pcRdCost->calcRdCost(bits, distTmp);
2457
279k
            }
2458
3.55k
            else if (!currTU.mtsIdx[codeCompId])
2459
3.55k
            {
2460
3.55k
              numTransformCands = 1;
2461
3.55k
            }
2462
283k
            bool rootCbfL = false;
2463
1.13M
            for (uint32_t t = 0; t < getNumberValidTBlocks(*cs.pcv); t++)
2464
850k
            {
2465
850k
              rootCbfL |= bool(tmpTU.cbf[t]);
2466
850k
            }
2467
283k
            if (rapidLFNST && !rootCbfL)
2468
0
            {
2469
0
              endLfnstIdx = lfnstIdxj;
2470
0
            }
2471
283k
            if (testLFNST && currTU.cu->lfnstIdx && !cuCtx.lfnstLastScanPos)
2472
3.48k
            {
2473
3.48k
              bool cbfAtZeroDepth = CU::isSepTree(*currTU.cu) ? rootCbfL
2474
3.48k
                : (cs.area.chromaFormat != CHROMA_400 && std::min(tmpTU.blocks[1].width, tmpTU.blocks[1].height) < 4)
2475
0
                ? TU::getCbfAtDepth(currTU, COMP_Y, currTU.depth) : rootCbfL;
2476
3.48k
              if (cbfAtZeroDepth)
2477
3.48k
              {
2478
3.48k
                costTmp = MAX_DOUBLE;
2479
3.48k
              }
2480
3.48k
            }
2481
283k
            if (costTmp < bestCostCbCr)
2482
106k
            {
2483
106k
              bestCostCbCr = costTmp;
2484
106k
              bestDistCbCr = distTmp;
2485
106k
              bestJointCbCr = currTU.jointCbCr;
2486
2487
              // store data
2488
106k
              bestLfnstIdx = lfnstIdxj;
2489
106k
              if ((cbfMask != jointCbfMasksToTest.back() || (lfnstIdxj != endLfnstIdx)) || (modeId != (numTransformCands - 1)))
2490
86.9k
              {
2491
86.9k
                saveCS.getRecoBuf(cbArea).copyFrom(cs.getRecoBuf(cbArea));
2492
86.9k
                saveCS.getRecoBuf(crArea).copyFrom(cs.getRecoBuf(crArea));
2493
2494
86.9k
                tmpTU.copyComponentFrom(currTU, COMP_Cb);
2495
86.9k
                tmpTU.copyComponentFrom(currTU, COMP_Cr);
2496
2497
86.9k
                ctxBest = m_CABACEstimator->getCtx();
2498
86.9k
              }
2499
19.8k
              else
2500
19.8k
              {
2501
19.8k
                lastIsBest = true;
2502
19.8k
                cs.cus[0]->lfnstIdx = bestLfnstIdx;
2503
19.8k
              }
2504
106k
            }
2505
283k
          }
2506
283k
        }
2507
2508
        // Retrieve the best CU data (unless it was the very last one tested)
2509
802k
      }
2510
301k
      if (!lastIsBest)
2511
281k
      {
2512
281k
        cs.getRecoBuf(cbArea).copyFrom(saveCS.getRecoBuf(cbArea));
2513
281k
        cs.getRecoBuf(crArea).copyFrom(saveCS.getRecoBuf(crArea));
2514
2515
281k
        cs.cus[0]->lfnstIdx = bestLfnstIdx;
2516
281k
        currTU.copyComponentFrom(tmpTU, COMP_Cb);
2517
281k
        currTU.copyComponentFrom(tmpTU, COMP_Cr);
2518
281k
        m_CABACEstimator->getCtx() = ctxBest;
2519
281k
      }
2520
301k
      currTU.jointCbCr = (TU::getCbf(currTU, COMP_Cb) || TU::getCbf(currTU, COMP_Cr)) ? bestJointCbCr : 0;
2521
301k
    } // jointCbCr
2522
2523
301k
    cs.dist += bestDistCbCr;
2524
301k
    cuCtx.violatesLfnstConstrained[CH_L] = false;
2525
301k
    cuCtx.violatesLfnstConstrained[CH_C] = false;
2526
301k
    cuCtx.lfnstLastScanPos = false;
2527
301k
    cuCtx.violatesMtsCoeffConstraint = false;
2528
301k
    cuCtx.mtsLastScanPos = false;
2529
301k
    cbfs.cbf(COMP_Cb) = TU::getCbf(currTU, COMP_Cb);
2530
301k
    cbfs.cbf(COMP_Cr) = TU::getCbf(currTU, COMP_Cr);
2531
301k
  }
2532
0
  else
2533
0
  {
2534
0
    unsigned   numValidTBlocks = getNumberValidTBlocks(*cs.pcv);
2535
0
    ChromaCbfs SplitCbfs(false);
2536
2537
0
    if (partitioner.canSplit(TU_MAX_TR_SPLIT, cs))
2538
0
    {
2539
0
      partitioner.splitCurrArea(TU_MAX_TR_SPLIT, cs);
2540
0
    }
2541
0
    else if (currTU.cu->ispMode)
2542
0
    {
2543
0
      partitioner.splitCurrArea(m_ispTestedModes[0].IspType, cs);
2544
0
    }
2545
0
    else
2546
0
      THROW("Implicit TU split not available");
2547
2548
0
    do
2549
0
    {
2550
0
      ChromaCbfs subCbfs = xIntraChromaCodingQT(cs, partitioner);
2551
2552
0
      for (uint32_t ch = COMP_Cb; ch < numValidTBlocks; ch++)
2553
0
      {
2554
0
        const ComponentID compID = ComponentID(ch);
2555
0
        SplitCbfs.cbf(compID) |= subCbfs.cbf(compID);
2556
0
      }
2557
0
    } while (partitioner.nextPart(cs));
2558
2559
0
    partitioner.exitCurrSplit();
2560
2561
    /*if (lumaUsesISP && cs.dist == MAX_UINT) //ahenkel
2562
    {
2563
      return cbfs;
2564
    }*/
2565
0
    {
2566
0
      cbfs.Cb |= SplitCbfs.Cb;
2567
0
      cbfs.Cr |= SplitCbfs.Cr;
2568
2569
0
      if (1)   //(!lumaUsesISP)
2570
0
      {
2571
0
        for (auto& ptu : cs.tus)
2572
0
        {
2573
0
          if (currArea.Cb().contains(ptu->Cb()) || (!ptu->Cb().valid() && currArea.Y().contains(ptu->Y())))
2574
0
          {
2575
0
            TU::setCbfAtDepth(*ptu, COMP_Cb, currDepth, SplitCbfs.Cb);
2576
0
            TU::setCbfAtDepth(*ptu, COMP_Cr, currDepth, SplitCbfs.Cr);
2577
0
          }
2578
0
        }
2579
0
      }
2580
0
    }
2581
0
  }
2582
301k
  return cbfs;
2583
301k
}
2584
2585
uint64_t IntraSearch::xFracModeBitsIntraLuma(const CodingUnit& cu, const unsigned* mpmLst)
2586
1.01M
{
2587
1.01M
  m_CABACEstimator->resetBits();
2588
2589
1.01M
  if (!cu.ciip)
2590
1.01M
  {
2591
1.01M
    m_CABACEstimator->intra_luma_pred_mode(cu, mpmLst);
2592
1.01M
  }
2593
2594
1.01M
  return m_CABACEstimator->getEstFracBits();
2595
1.01M
}
2596
2597
template<typename T, size_t N, int M>
2598
void IntraSearch::xReduceHadCandList(static_vector<T, N>& candModeList, static_vector<double, N>& candCostList, SortedPelUnitBufs<M>& sortedPelBuffer, int& numModesForFullRD, const double thresholdHadCost, const double* mipHadCost, const CodingUnit& cu, const bool fastMip)
2599
20.7k
{
2600
20.7k
  const int maxCandPerType = numModesForFullRD >> 1;
2601
20.7k
  static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM> tempRdModeList;
2602
20.7k
  static_vector<double, FAST_UDI_MAX_RDMODE_NUM> tempCandCostList;
2603
20.7k
  const double minCost = candCostList[0];
2604
20.7k
  bool keepOneMip = candModeList.size() > numModesForFullRD;
2605
20.7k
  const int maxNumConv = 3; 
2606
2607
20.7k
  int numConv = 0;
2608
20.7k
  int numMip = 0;
2609
93.8k
  for (int idx = 0; idx < candModeList.size() - (keepOneMip?0:1); idx++)
2610
73.1k
  {
2611
73.1k
    bool addMode = false;
2612
73.1k
    const ModeInfo& orgMode = candModeList[idx];
2613
2614
73.1k
    if (!orgMode.mipFlg)
2615
52.3k
    {
2616
52.3k
      addMode = (numConv < maxNumConv);
2617
52.3k
      numConv += addMode ? 1:0;
2618
52.3k
    }
2619
20.7k
    else
2620
20.7k
    {
2621
20.7k
      addMode = ( numMip < maxCandPerType || (candCostList[idx] < thresholdHadCost * minCost) || keepOneMip );
2622
20.7k
      keepOneMip = false;
2623
20.7k
      numMip += addMode ? 1:0;
2624
20.7k
    }
2625
73.1k
    if( addMode )
2626
73.0k
    {
2627
73.0k
      tempRdModeList.push_back(orgMode);
2628
73.0k
      tempCandCostList.push_back(candCostList[idx]);
2629
73.0k
    }
2630
73.1k
  }
2631
2632
  // sort Pel Buffer
2633
20.7k
  int i = -1;
2634
20.7k
  for( auto &m: tempRdModeList)
2635
73.0k
  {
2636
73.0k
    if( ! (m == candModeList.at( ++i )) )
2637
0
    {
2638
0
      for( int j = i; j < (int)candModeList.size()-1; )
2639
0
      {
2640
0
        if( m == candModeList.at( ++j ) )
2641
0
        {
2642
0
          sortedPelBuffer.swap( i, j);
2643
0
          break;
2644
0
        }
2645
0
      }
2646
0
    }
2647
73.0k
  }
2648
20.7k
  sortedPelBuffer.reduceTo( (int)tempRdModeList.size() );
2649
2650
20.7k
  if ((cu.lwidth() > 8 && cu.lheight() > 8))
2651
18.6k
  {
2652
    // Sort MIP candidates by Hadamard cost
2653
18.6k
    const int transpOff = getNumModesMip(cu.Y());
2654
18.6k
    static_vector<uint8_t, FAST_UDI_MAX_RDMODE_NUM> sortedMipModes(0);
2655
18.6k
    static_vector<double, FAST_UDI_MAX_RDMODE_NUM> sortedMipCost(0);
2656
18.6k
    for (uint8_t mode : { 0, 1, 2 })
2657
56.0k
    {
2658
56.0k
      uint8_t candMode = mode + uint8_t((mipHadCost[mode + transpOff] < mipHadCost[mode]) ? transpOff : 0);
2659
56.0k
      updateCandList(candMode, mipHadCost[candMode], sortedMipModes, sortedMipCost, 3);
2660
56.0k
    }
2661
2662
    // Append MIP mode to RD mode list
2663
18.6k
    const int modeListSize = int(tempRdModeList.size());
2664
37.3k
    for (int idx = 0; idx < 3; idx++)
2665
37.3k
    {
2666
37.3k
      const bool     isTransposed = (sortedMipModes[idx] >= transpOff ? true : false);
2667
37.3k
      const uint32_t mipIdx       = (isTransposed ? sortedMipModes[idx] - transpOff : sortedMipModes[idx]);
2668
37.3k
      const ModeInfo mipMode( true, isTransposed, 0, NOT_INTRA_SUBPARTITIONS, mipIdx );
2669
37.3k
      bool alreadyIncluded = false;
2670
149k
      for (int modeListIdx = 0; modeListIdx < modeListSize; modeListIdx++)
2671
130k
      {
2672
130k
        if (tempRdModeList[modeListIdx] == mipMode)
2673
18.6k
        {
2674
18.6k
          alreadyIncluded = true;
2675
18.6k
          break;
2676
18.6k
        }
2677
130k
      }
2678
2679
37.3k
      if (!alreadyIncluded)
2680
18.6k
      {
2681
18.6k
        tempRdModeList.push_back(mipMode);
2682
18.6k
        tempCandCostList.push_back(0);
2683
18.6k
        if( fastMip ) break;
2684
18.6k
      }
2685
37.3k
    }
2686
18.6k
  }
2687
2688
20.7k
  candModeList = tempRdModeList;
2689
20.7k
  candCostList = tempCandCostList;
2690
20.7k
  numModesForFullRD = int(candModeList.size());
2691
20.7k
}
2692
2693
void IntraSearch::xPreCheckMTS(TransformUnit &tu, std::vector<TrMode> *trModes, const int maxCand, PelUnitBuf *predBuf, const ComponentID& compID)
2694
15.4k
{
2695
15.4k
  if (compID == COMP_Y)
2696
15.4k
  {
2697
15.4k
    CodingStructure&  cs = *tu.cs;
2698
15.4k
    const CompArea& area = tu.blocks[compID];
2699
15.4k
    const ReshapeData& reshapeData = cs.picture->reshapeData;
2700
15.4k
    const CodingUnit& cu = *cs.getCU(area.pos(), CH_L,TREE_D);
2701
15.4k
    PelBuf piPred = cs.getPredBuf(area);
2702
15.4k
    PelBuf piResi = cs.getResiBuf(area);
2703
2704
15.4k
    initIntraPatternChType(*tu.cu, area);
2705
15.4k
    if (predBuf)
2706
13.8k
    {
2707
13.8k
      piPred.copyFrom(predBuf->Y());
2708
13.8k
    }
2709
1.65k
    else if (CU::isMIP(cu, CH_L))
2710
1.63k
    {
2711
1.63k
      initIntraMip(cu);
2712
1.63k
      predIntraMip(piPred, cu);
2713
1.63k
    }
2714
19
    else
2715
19
    {
2716
19
      predIntraAng(COMP_Y, piPred, cu);
2717
19
    }
2718
2719
    //===== get residual signal =====
2720
15.4k
    if (cs.picHeader->lmcsEnabled && reshapeData.getCTUFlag())
2721
0
    {
2722
0
      piResi.subtract(cs.getRspOrgBuf(), piPred);
2723
0
    }
2724
15.4k
    else
2725
15.4k
    {
2726
15.4k
      CPelBuf piOrg = cs.getOrgBuf(COMP_Y);
2727
15.4k
      piResi.subtract(piOrg, piPred);
2728
15.4k
    }
2729
15.4k
    m_pcTrQuant->checktransformsNxN(tu, trModes, m_pcEncCfg->m_MTSIntraMaxCand, compID);
2730
15.4k
  }
2731
0
  else
2732
0
  {
2733
0
    ComponentID codeCompId = (tu.jointCbCr ? (tu.jointCbCr >> 1 ? COMP_Cb : COMP_Cr) : compID);
2734
0
    m_pcTrQuant->checktransformsNxN(tu, trModes, m_pcEncCfg->m_MTSIntraMaxCand, codeCompId);
2735
0
  }
2736
15.4k
}
2737
2738
double IntraSearch::xTestISP(CodingStructure& cs, Partitioner& subTuPartitioner, double bestCostForISP, PartSplit ispType, bool& splitcbf, uint64_t& singleFracBits, Distortion& singleDistLuma, CUCtx& cuCtx)
2739
16.2k
{
2740
16.2k
  int  subTuCounter = 0;
2741
16.2k
  bool earlySkipISP = false;
2742
16.2k
  bool splitCbfLuma = false;
2743
16.2k
  CodingUnit& cu = *cs.cus[0];
2744
2745
16.2k
  Distortion singleDistTmpLumaSUM = 0;
2746
16.2k
  uint64_t   singleTmpFracBitsSUM = 0;
2747
16.2k
  double     singleCostTmpSUM = 0;
2748
16.2k
  cuCtx.isDQPCoded = true;
2749
16.2k
  cuCtx.isChromaQpAdjCoded = true;
2750
2751
16.2k
  do
2752
20.7k
  {
2753
20.7k
    Distortion singleDistTmpLuma = 0;
2754
20.7k
    uint64_t   singleTmpFracBits = 0;
2755
20.7k
    double     singleCostTmp = 0;
2756
20.7k
    TransformUnit& tmpTUcur = ((cs.tus.size() < (subTuCounter + 1)))
2757
20.7k
      ? cs.addTU(CS::getArea(cs, subTuPartitioner.currArea(), subTuPartitioner.chType,
2758
3.68k
        subTuPartitioner.treeType),
2759
3.68k
        subTuPartitioner.chType, cs.cus[0])
2760
20.7k
      : *cs.tus[subTuCounter];
2761
20.7k
    tmpTUcur.depth = subTuPartitioner.currTrDepth;
2762
2763
    // Encode TU
2764
20.7k
    xIntraCodingTUBlock(tmpTUcur, COMP_Y, false, singleDistTmpLuma, 0);
2765
20.7k
    cuCtx.mtsLastScanPos = false;
2766
2767
20.7k
    if (singleDistTmpLuma == MAX_INT)   // all zero CBF skip
2768
0
    {
2769
0
      earlySkipISP = true;
2770
0
      singleCostTmpSUM = MAX_DOUBLE;
2771
0
      break;
2772
0
    }
2773
2774
20.7k
    if (m_pcRdCost->calcRdCost(singleTmpFracBitsSUM, singleDistTmpLumaSUM + singleDistTmpLuma) > bestCostForISP)
2775
5.56k
    {
2776
5.56k
      earlySkipISP = true;
2777
5.56k
    }
2778
15.1k
    else
2779
15.1k
    {
2780
15.1k
      m_ispTestedModes[0].IspType = ispType;
2781
15.1k
      m_ispTestedModes[0].subTuCounter = subTuCounter;
2782
15.1k
      singleTmpFracBits = xGetIntraFracBitsQT(cs, subTuPartitioner, true, &cuCtx);
2783
15.1k
    }
2784
20.7k
    singleCostTmp = m_pcRdCost->calcRdCost(singleTmpFracBits, singleDistTmpLuma);
2785
2786
20.7k
    singleCostTmpSUM     += singleCostTmp;
2787
20.7k
    singleDistTmpLumaSUM += singleDistTmpLuma;
2788
20.7k
    singleTmpFracBitsSUM += singleTmpFracBits;
2789
2790
20.7k
    subTuCounter++;
2791
2792
20.7k
    splitCbfLuma |= TU::getCbfAtDepth( *cs.getTU(subTuPartitioner.currArea().lumaPos(), subTuPartitioner.chType, subTuCounter - 1), 
2793
20.7k
                                       COMP_Y, subTuPartitioner.currTrDepth);
2794
20.7k
    int nSubPartitions = m_ispTestedModes[cu.lfnstIdx].numTotalParts[cu.ispMode - 1];
2795
20.7k
    bool doStop = (m_pcEncCfg->m_ISP != 1) || (subTuCounter < nSubPartitions);
2796
20.7k
    if (doStop)
2797
20.7k
    {
2798
20.7k
      if (singleCostTmpSUM > bestCostForISP)
2799
13.7k
      {
2800
13.7k
        earlySkipISP = true;
2801
13.7k
        break;
2802
13.7k
      }
2803
6.93k
      if (subTuCounter < nSubPartitions)
2804
5.51k
      {
2805
5.51k
        double threshold = nSubPartitions == 2 ? 0.95 : subTuCounter == 1 ? 0.83 : 0.91;
2806
5.51k
        if (singleCostTmpSUM > bestCostForISP * threshold)
2807
1.10k
        {
2808
1.10k
          earlySkipISP = true;
2809
1.10k
          break;
2810
1.10k
        }
2811
5.51k
      }
2812
6.93k
    }
2813
20.7k
  } while (subTuPartitioner.nextPart(cs));
2814
16.2k
  singleDistLuma = singleDistTmpLumaSUM;
2815
16.2k
  singleFracBits = singleTmpFracBitsSUM;
2816
2817
16.2k
  splitcbf = splitCbfLuma;
2818
16.2k
  return earlySkipISP ? MAX_DOUBLE : singleCostTmpSUM;
2819
16.2k
}
2820
2821
int IntraSearch::xSpeedUpISP(int speed, bool& testISP, int mode, int& noISP, int& endISP, CodingUnit& cu, static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM>& RdModeList, const ModeInfo& bestPUMode, int bestISP, int bestLfnstIdx)
2822
14.6k
{
2823
14.6k
  if (speed)
2824
5.95k
  {
2825
5.95k
    if (mode >= 1)
2826
3.12k
    {
2827
3.12k
      if (m_ispTestedModes[0].splitIsFinished[1] && m_ispTestedModes[0].splitIsFinished[0])
2828
0
      {
2829
0
        testISP = false;
2830
0
        endISP = 0;
2831
0
      }
2832
3.12k
      else
2833
3.12k
      {
2834
3.12k
        if (m_pcEncCfg->m_ISP >= 2)
2835
3.12k
        {
2836
3.12k
          if (mode == 1) //best Hor||Ver
2837
2.82k
          {
2838
2.82k
            int bestDir = 0;
2839
8.47k
            for (int d = 0; d < 2; d++)
2840
5.65k
            {
2841
5.65k
              int d2 = d ? 0 : 1;
2842
5.65k
              if ((m_ispTestedModes[0].bestCost[d] <= m_ispTestedModes[0].bestCost[d2])
2843
5.34k
                && (m_ispTestedModes[0].bestCost[d] != MAX_DOUBLE))
2844
301
              {
2845
301
                bestDir = d + 1;
2846
301
                m_ispTestedModes[0].splitIsFinished[d2] = true;
2847
301
              }
2848
5.65k
            }
2849
2.82k
            m_ispTestedModes[0].bestModeSoFar = bestDir;
2850
2.82k
            if (m_ispTestedModes[0].bestModeSoFar <= 0)
2851
2.52k
            {
2852
2.52k
              m_ispTestedModes[0].splitIsFinished[1] = true;
2853
2.52k
              m_ispTestedModes[0].splitIsFinished[0] = true;
2854
2.52k
              testISP = false;
2855
2.52k
              endISP = 0;
2856
2.52k
            }
2857
2.82k
          }
2858
3.12k
          if (m_ispTestedModes[0].bestModeSoFar == 2)
2859
74
          {
2860
74
            noISP = 1;
2861
74
          }
2862
3.05k
          else
2863
3.05k
          {
2864
3.05k
            endISP = 1;
2865
3.05k
          }
2866
3.12k
        }
2867
3.12k
      }
2868
3.12k
    }
2869
5.95k
    if (testISP)
2870
3.42k
    {
2871
3.42k
      if (mode == 2)
2872
301
      {
2873
903
        for (int d = 0; d < 2; d++)
2874
602
        {
2875
602
          int d2 = d ? 0 : 1;
2876
602
          if (m_ispTestedModes[0].bestCost[d] == MAX_DOUBLE)
2877
278
          {
2878
278
            m_ispTestedModes[0].splitIsFinished[d] = true;
2879
278
          }
2880
602
          if ((m_ispTestedModes[0].bestCost[d2] < 1.3 * m_ispTestedModes[0].bestCost[d])
2881
324
            && (int(m_ispTestedModes[0].bestSplitSoFar) != (d + 1)))
2882
248
          {
2883
248
            if (d)
2884
211
            {
2885
211
              endISP = 1;
2886
211
            }
2887
37
            else
2888
37
            {
2889
37
              noISP = 1;
2890
37
            }
2891
248
            m_ispTestedModes[0].splitIsFinished[d] = true;
2892
248
          }
2893
602
        }
2894
301
      }
2895
3.12k
      else
2896
3.12k
      {
2897
3.12k
        if (m_ispTestedModes[0].splitIsFinished[0])
2898
37
        {
2899
37
          noISP = 1;
2900
37
        }
2901
3.12k
        if (m_ispTestedModes[0].splitIsFinished[1])
2902
264
        {
2903
264
          endISP = 1;
2904
264
        }
2905
3.12k
      }
2906
3.42k
    }
2907
5.95k
    if ((noISP == 1) && (endISP == 1))
2908
23
    {
2909
23
      endISP = 0;
2910
23
    }
2911
5.95k
  }
2912
8.74k
  else
2913
8.74k
  {
2914
8.74k
    bool stopFound = false;
2915
8.74k
    if (m_pcEncCfg->m_ISP >= 3)
2916
8.74k
    {
2917
8.74k
      if (mode)
2918
3.10k
      {
2919
3.10k
        if ((bestISP == 0) || ((bestPUMode.modeId != RdModeList[mode - 1].modeId)
2920
97
          && (bestPUMode.modeId != RdModeList[mode].modeId)))
2921
2.16k
        {
2922
2.16k
          stopFound = true;
2923
2.16k
        }
2924
3.10k
      }
2925
8.74k
    }
2926
8.74k
    if (cu.mipFlag || cu.multiRefIdx)
2927
182
    {
2928
182
      cu.mipFlag = false;
2929
182
      cu.multiRefIdx = 0;
2930
182
      if (!stopFound)
2931
0
      {
2932
0
        for (int k = 0; k < mode; k++)
2933
0
        {
2934
0
          if (cu.intraDir[CH_L] == RdModeList[k].modeId)
2935
0
          {
2936
0
            stopFound = true;
2937
0
            break;
2938
0
          }
2939
0
        }
2940
0
      }
2941
182
    }
2942
8.74k
    if (stopFound)
2943
2.16k
    {
2944
2.16k
      testISP = false;
2945
2.16k
      endISP = 0;
2946
2.16k
      return 1;
2947
2.16k
    }
2948
6.58k
    if (!stopFound && (m_pcEncCfg->m_ISP >= 2) && (cu.intraDir[CH_L] == DC_IDX))
2949
948
    {
2950
948
      stopFound = true;
2951
948
      endISP = 0;
2952
948
      return 1;
2953
948
    }
2954
6.58k
  }
2955
11.5k
  return 0;
2956
14.6k
}
2957
2958
void IntraSearch::xSpeedUpIntra(double bestcost, int& EndMode, int& speedIntra, CodingUnit& cu)
2959
27.1k
{
2960
27.1k
  int bestIdxbefore = m_ispTestedModes[0].bestIntraMode;
2961
27.1k
  if (m_ispTestedModes[0].isIntra)
2962
0
  {
2963
0
    if (bestIdxbefore == 1)//ISP
2964
0
    {
2965
0
      speedIntra = 14;
2966
0
    }
2967
0
    if (bestIdxbefore == 4)//MTS
2968
0
    {
2969
0
      speedIntra = 3;
2970
0
    }
2971
0
  }
2972
27.1k
  else if (!cu.cs->slice->isIntra())
2973
0
  {
2974
0
    if (bestcost != MAX_DOUBLE)
2975
0
    {
2976
0
      speedIntra = 10;
2977
0
    }
2978
0
  }
2979
27.1k
  if (m_ispTestedModes[0].bestBefore[0] == -1)
2980
24.3k
  {
2981
24.3k
    speedIntra |= 7;
2982
24.3k
    if (m_pcEncCfg->m_FastIntraTools == 2)
2983
0
    {
2984
0
      EndMode = 1;
2985
0
    }
2986
24.3k
  }
2987
27.1k
  if (!cu.cs->slice->isIntra())
2988
0
  {
2989
0
    if ((m_ispTestedModes[0].bestBefore[1] == 1) || (m_ispTestedModes[0].bestBefore[2] == 1))
2990
0
    {
2991
0
      speedIntra |= 2;
2992
0
    }
2993
0
    if ((m_ispTestedModes[0].bestBefore[1] == 4) || (m_ispTestedModes[0].bestBefore[2] == 4))
2994
0
    {
2995
0
      speedIntra |= 3;
2996
0
    }
2997
0
    if ((m_ispTestedModes[0].bestBefore[1] == 2) || (m_ispTestedModes[0].bestBefore[2] == 2))
2998
0
    {
2999
0
      speedIntra |= 1;
3000
0
    }
3001
0
  }
3002
27.1k
}
3003
3004
} // namespace vvenc
3005
3006
//! \}
3007