Coverage Report

Created: 2026-06-10 07:00

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/vvenc/source/Lib/EncoderLib/IntraSearch.cpp
Line
Count
Source
1
/* -----------------------------------------------------------------------------
2
The copyright in this software is being made available under the Clear BSD
3
License, included below. No patent rights, trademark rights and/or 
4
other Intellectual Property Rights other than the copyrights concerning 
5
the Software are granted under this license.
6
7
The Clear BSD License
8
9
Copyright (c) 2019-2026, Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. & The VVenC Authors.
10
All rights reserved.
11
12
Redistribution and use in source and binary forms, with or without modification,
13
are permitted (subject to the limitations in the disclaimer below) provided that
14
the following conditions are met:
15
16
     * Redistributions of source code must retain the above copyright notice,
17
     this list of conditions and the following disclaimer.
18
19
     * Redistributions in binary form must reproduce the above copyright
20
     notice, this list of conditions and the following disclaimer in the
21
     documentation and/or other materials provided with the distribution.
22
23
     * Neither the name of the copyright holder nor the names of its
24
     contributors may be used to endorse or promote products derived from this
25
     software without specific prior written permission.
26
27
NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
28
THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
29
CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
31
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
32
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
33
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
34
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
35
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
36
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38
POSSIBILITY OF SUCH DAMAGE.
39
40
41
------------------------------------------------------------------------------------------- */
42
43
44
/** \file     EncSearch.cpp
45
 *  \brief    encoder intra search class
46
 */
47
48
#include "IntraSearch.h"
49
#include "EncPicture.h"
50
#include "CommonLib/CommonDef.h"
51
#include "CommonLib/Rom.h"
52
#include "CommonLib/Picture.h"
53
#include "CommonLib/UnitTools.h"
54
#include "CommonLib/dtrace_next.h"
55
#include "CommonLib/dtrace_buffer.h"
56
#include "CommonLib/Reshape.h"
57
#include <math.h>
58
#include "vvenc/vvencCfg.h"
59
60
//! \ingroup EncoderLib
61
//! \{
62
63
namespace vvenc {
64
65
#define PLTCtx(c) SubCtx( Ctx::Palette, c )
66
67
IntraSearch::IntraSearch()
68
17.7k
  : m_pSaveCS       (nullptr)
69
17.7k
  , m_pcEncCfg      (nullptr)
70
17.7k
  , m_pcTrQuant     (nullptr)
71
17.7k
  , m_pcRdCost      (nullptr)
72
17.7k
  , m_CABACEstimator(nullptr)
73
17.7k
  , m_CtxCache      (nullptr)
74
17.7k
{
75
17.7k
}
76
77
void IntraSearch::init(const VVEncCfg &encCfg, TrQuant *pTrQuant, RdCost *pRdCost, SortedPelUnitBufs<SORTED_BUFS> *pSortedPelUnitBufs, XUCache &unitCache )
78
17.7k
{
79
17.7k
  IntraPrediction::init( encCfg.m_internChromaFormat, encCfg.m_internalBitDepth[ CH_L ] );
80
81
17.7k
  m_pcEncCfg          = &encCfg;
82
17.7k
  m_pcTrQuant         = pTrQuant;
83
17.7k
  m_pcRdCost          = pRdCost;
84
17.7k
  m_SortedPelUnitBufs = pSortedPelUnitBufs;
85
86
17.7k
  const ChromaFormat chrFormat = encCfg.m_internChromaFormat;
87
17.7k
  const int maxCUSize          = encCfg.m_CTUSize;
88
89
17.7k
  Area area = Area( 0, 0, maxCUSize, maxCUSize );
90
91
17.7k
  m_pTempCS = new CodingStructure( unitCache, nullptr );
92
17.7k
  m_pBestCS = new CodingStructure( unitCache, nullptr );
93
94
17.7k
  m_pTempCS->createForSearch( chrFormat, area );
95
17.7k
  m_pBestCS->createForSearch( chrFormat, area );
96
97
17.7k
  const int uiNumSaveLayersToAllocate = 3;
98
17.7k
  m_pSaveCS = new CodingStructure*[uiNumSaveLayersToAllocate];
99
71.1k
  for( int layer = 0; layer < uiNumSaveLayersToAllocate; layer++ )
100
53.3k
  {
101
53.3k
    m_pSaveCS[ layer ] = new CodingStructure( unitCache, nullptr );
102
53.3k
    m_pSaveCS[ layer ]->createForSearch( chrFormat, Area( 0, 0, maxCUSize, maxCUSize ) );
103
53.3k
    m_pSaveCS[ layer ]->initStructData();
104
53.3k
  }
105
106
17.7k
  CompArea chromaArea( COMP_Cb, chrFormat, area, true );
107
106k
  for( int i = 0; i < 5; i++ )
108
88.8k
  {
109
88.8k
    m_orgResiCb[i].create( chromaArea );
110
88.8k
    m_orgResiCr[i].create( chromaArea );
111
88.8k
  }
112
17.7k
}
113
114
void IntraSearch::destroy()
115
17.7k
{
116
17.7k
  if ( m_pSaveCS )
117
17.7k
  {
118
17.7k
    const int uiNumSaveLayersToAllocate = 3;
119
71.1k
    for( int layer = 0; layer < uiNumSaveLayersToAllocate; layer++ )
120
53.3k
    {
121
53.3k
      if ( m_pSaveCS[ layer ] ) { m_pSaveCS[ layer ]->destroy(); delete m_pSaveCS[ layer ]; }
122
53.3k
    }
123
17.7k
    delete[] m_pSaveCS;
124
17.7k
    m_pSaveCS = nullptr;
125
17.7k
  }
126
127
17.7k
  if( m_pTempCS )
128
17.7k
  {
129
17.7k
    m_pTempCS->destroy();
130
17.7k
    delete m_pTempCS; m_pTempCS = nullptr;
131
17.7k
  }
132
133
17.7k
  if( m_pBestCS )
134
17.7k
  {
135
17.7k
    m_pBestCS->destroy();
136
17.7k
    delete m_pBestCS; m_pBestCS = nullptr;
137
17.7k
  }
138
17.7k
}
139
140
IntraSearch::~IntraSearch()
141
17.7k
{
142
17.7k
  destroy();
143
17.7k
}
144
145
void IntraSearch::setCtuEncRsrc( CABACWriter* cabacEstimator, CtxCache *ctxCache )
146
3.46k
{
147
3.46k
  m_CABACEstimator = cabacEstimator;
148
3.46k
  m_CtxCache       = ctxCache;
149
3.46k
}
150
151
//////////////////////////////////////////////////////////////////////////
152
// INTRA PREDICTION
153
//////////////////////////////////////////////////////////////////////////
154
static constexpr double COST_UNKNOWN = -65536.0;
155
156
double IntraSearch::xFindInterCUCost( CodingUnit &cu )
157
23.1k
{
158
23.1k
  if( CU::isConsIntra(cu) && !cu.slice->isIntra() )
159
0
  {
160
    //search corresponding inter CU cost
161
0
    for( int i = 0; i < m_numCuInSCIPU; i++ )
162
0
    {
163
0
      if( cu.lumaPos() == m_cuAreaInSCIPU[i].pos() && cu.lumaSize() == m_cuAreaInSCIPU[i].size() )
164
0
      {
165
0
        return m_cuCostInSCIPU[i];
166
0
      }
167
0
    }
168
0
  }
169
23.1k
  return COST_UNKNOWN;
170
23.1k
}
171
172
void IntraSearch::xEstimateLumaRdModeList(int& numModesForFullRD,
173
  static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM>& RdModeList,
174
  static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM>& HadModeList,
175
  static_vector<double, FAST_UDI_MAX_RDMODE_NUM>& CandCostList,
176
  static_vector<double, FAST_UDI_MAX_RDMODE_NUM>& CandHadList, CodingUnit& cu, bool testMip )
177
23.1k
{
178
23.1k
  PROFILER_SCOPE_AND_STAGE_EXT( 1, _TPROF, P_INTRA_EST_RD_CAND, cu.cs, CH_L );
179
23.1k
  const uint16_t intra_ctx_size = Ctx::IntraLumaMpmFlag.size() + Ctx::IntraLumaPlanarFlag.size() + Ctx::MultiRefLineIdx.size() + Ctx::ISPMode.size() + Ctx::MipFlag.size();
180
23.1k
  const TempCtx  ctxStartIntraCtx(m_CtxCache, SubCtx(CtxSet(Ctx::IntraLumaMpmFlag(), intra_ctx_size), m_CABACEstimator->getCtx()));
181
23.1k
  const double   sqrtLambdaForFirstPass = m_pcRdCost->getMotionLambda() * FRAC_BITS_SCALE;
182
23.1k
  const int numModesAvailable = NUM_LUMA_MODE; // total number of Intra modes
183
184
23.1k
  CHECK(numModesForFullRD >= numModesAvailable, "Too many modes for full RD search");
185
186
23.1k
  const SPS& sps     = *cu.cs->sps;
187
23.1k
  const bool fastMip = sps.MIP && m_pcEncCfg->m_useFastMIP;
188
189
  // this should always be true
190
23.1k
  CHECK( !cu.Y().valid(), "CU is not valid" );
191
192
23.1k
  const CompArea& area = cu.Y();
193
194
23.1k
  const UnitArea localUnitArea(area.chromaFormat, Area(0, 0, area.width, area.height));
195
23.1k
  if( testMip)
196
17.7k
  {
197
17.7k
    numModesForFullRD += fastMip ? numModesForFullRD - std::min( m_pcEncCfg->m_useFastMIP, numModesForFullRD )
198
17.7k
                                 : numModesForFullRD;
199
17.7k
    m_SortedPelUnitBufs->prepare( localUnitArea, numModesForFullRD + 1 );
200
17.7k
  }
201
5.38k
  else
202
5.38k
  {
203
5.38k
    m_SortedPelUnitBufs->prepare( localUnitArea, numModesForFullRD );
204
5.38k
  }
205
206
23.1k
  CPelBuf piOrg   = cu.cs->getOrgBuf(COMP_Y);
207
23.1k
  PelBuf piPred  = m_SortedPelUnitBufs->getTestBuf(COMP_Y);
208
209
23.1k
  const ReshapeData& reshapeData = cu.cs->picture->reshapeData;
210
23.1k
  if (cu.cs->picHeader->lmcsEnabled && reshapeData.getCTUFlag())
211
0
  {
212
0
    piOrg = cu.cs->getRspOrgBuf();
213
0
  }
214
23.1k
  DistParam distParam    = m_pcRdCost->setDistParam( piOrg, piPred, sps.bitDepths[ CH_L ], DF_HAD_2SAD); // Use HAD (SATD) cost
215
216
23.1k
  const int numHadCand = (testMip ? 2 : 1) * 3;
217
218
  //*** Derive (regular) candidates using Hadamard
219
23.1k
  cu.mipFlag = false;
220
23.1k
  cu.multiRefIdx = 0;
221
222
  //===== init pattern for luma prediction =====
223
23.1k
  initIntraPatternChType(cu, cu.Y(), true);
224
225
23.1k
  bool satdChecked[NUM_INTRA_MODE] = { false };
226
227
23.1k
  unsigned mpmLst[NUM_MOST_PROBABLE_MODES];
228
23.1k
  CU::getIntraMPMs(cu, mpmLst);
229
230
23.1k
  const int decMsk = ( 1 << m_pcEncCfg->m_IntraEstDecBit ) - 1;
231
232
23.1k
  m_parentCandList.resize( 0 );
233
23.1k
  m_parentCandList.reserve( ( numModesAvailable >> m_pcEncCfg->m_IntraEstDecBit ) + 2 );
234
235
1.57M
  for( unsigned mode = 0; mode < numModesAvailable; mode++ )
236
1.54M
  {
237
    // Skip checking extended Angular modes in the first round of SATD
238
1.54M
    if( mode > DC_IDX && ( mode & decMsk ) )
239
1.13M
    {
240
1.13M
      continue;
241
1.13M
    }
242
243
415k
    m_parentCandList.push_back( ModeInfo( false, false, 0, NOT_INTRA_SUBPARTITIONS, mode ) );
244
415k
  }
245
   
246
92.4k
  for( int decDst = 1 << m_pcEncCfg->m_IntraEstDecBit; decDst > 0; decDst >>= 1 )
247
69.3k
  {
248
623k
    for( unsigned idx = 0; idx < m_parentCandList.size(); idx++ )
249
554k
    {
250
554k
      int modeParent = m_parentCandList[idx].modeId;
251
252
554k
      int off = decDst & decMsk;
253
554k
      int inc = decDst << 1;
254
255
554k
#if 1 // INTRA_AS_IN_VTM
256
554k
      if( off != 0 && ( modeParent <= ( DC_IDX + 1 ) || modeParent >= ( NUM_LUMA_MODE - 1 ) ) )
257
89.9k
      {
258
89.9k
        continue;
259
89.9k
      }
260
261
464k
#endif
262
977k
      for( int mode = modeParent - off; mode < modeParent + off + 1; mode += inc )
263
513k
      {
264
513k
        if( satdChecked[mode] || mode < 0 || mode >= NUM_LUMA_MODE )
265
2.38k
        {
266
2.38k
          continue;
267
2.38k
        }
268
269
510k
        cu.intraDir[0] = mode;
270
271
510k
        initPredIntraParams( cu, cu.Y(), sps );
272
510k
        distParam.cur.buf = piPred.buf = m_SortedPelUnitBufs->getTestBuf().Y().buf;
273
510k
        predIntraAng( COMP_Y, piPred, cu );
274
275
        // Use the min between SAD and HAD as the cost criterion
276
        // SAD is scaled by 2 to align with the scaling of HAD
277
510k
        Distortion minSadHad = distParam.distFunc( distParam );
278
279
510k
        uint64_t fracModeBits = xFracModeBitsIntraLuma( cu, mpmLst );
280
281
        //restore ctx
282
510k
        m_CABACEstimator->getCtx() = SubCtx( CtxSet( Ctx::IntraLumaMpmFlag(), intra_ctx_size ), ctxStartIntraCtx );
283
284
510k
        double cost = ( double ) minSadHad + ( double ) fracModeBits * sqrtLambdaForFirstPass;
285
510k
        DTRACE( g_trace_ctx, D_INTRA_COST, "IntraHAD: %u, %llu, %f (%d)\n", minSadHad, fracModeBits, cost, mode );
286
287
510k
        int insertPos = -1;
288
510k
        updateCandList( ModeInfo( false, false, 0, NOT_INTRA_SUBPARTITIONS, mode ), cost, RdModeList, CandCostList, numModesForFullRD, &insertPos );
289
510k
        updateCandList( ModeInfo( false, false, 0, NOT_INTRA_SUBPARTITIONS, mode ), ( double ) minSadHad, HadModeList, CandHadList, numHadCand );
290
510k
        m_SortedPelUnitBufs->insert( insertPos, ( int ) RdModeList.size() );
291
292
510k
        satdChecked[mode] = true;
293
510k
      }
294
464k
    }
295
296
69.3k
    m_parentCandList.resize( RdModeList.size() );
297
69.3k
    std::copy( RdModeList.cbegin(), RdModeList.cend(), m_parentCandList.begin() );
298
69.3k
  }
299
300
23.1k
  const bool isFirstLineOfCtu = (((cu.block(COMP_Y).y)&((cu.cs->sps)->CTUSize - 1)) == 0);
301
23.1k
  if( m_pcEncCfg->m_MRL && ! isFirstLineOfCtu )
302
13.9k
  {
303
13.9k
    cu.multiRefIdx = 1;
304
13.9k
    unsigned  multiRefMPM [NUM_MOST_PROBABLE_MODES];
305
13.9k
    CU::getIntraMPMs(cu, multiRefMPM);
306
307
41.7k
    for (int mRefNum = 1; mRefNum < MRL_NUM_REF_LINES; mRefNum++)
308
27.8k
    {
309
27.8k
      int multiRefIdx = MULTI_REF_LINE_IDX[mRefNum];
310
311
27.8k
      cu.multiRefIdx = multiRefIdx;
312
27.8k
      initIntraPatternChType(cu, cu.Y(), true);
313
314
166k
      for (int x = 1; x < NUM_MOST_PROBABLE_MODES; x++)
315
139k
      {
316
139k
        cu.intraDir[0] = multiRefMPM[x];
317
139k
        initPredIntraParams(cu, cu.Y(), sps);
318
139k
        distParam.cur.buf = piPred.buf = m_SortedPelUnitBufs->getTestBuf().Y().buf;
319
139k
        predIntraAng(COMP_Y, piPred, cu);
320
321
        // Use the min between SAD and SATD as the cost criterion
322
        // SAD is scaled by 2 to align with the scaling of HAD
323
139k
        Distortion minSadHad = distParam.distFunc(distParam);
324
325
        // NB xFracModeBitsIntra will not affect the mode for chroma that may have already been pre-estimated.
326
139k
        uint64_t fracModeBits = xFracModeBitsIntraLuma( cu, mpmLst );
327
328
        //restore ctx
329
139k
        m_CABACEstimator->getCtx() = SubCtx(CtxSet(Ctx::IntraLumaMpmFlag(), intra_ctx_size), ctxStartIntraCtx);
330
331
139k
        double cost = (double) minSadHad + (double) fracModeBits * sqrtLambdaForFirstPass;
332
//        DTRACE(g_trace_ctx, D_INTRA_COST, "IntraMRL: %u, %llu, %f (%d)\n", minSadHad, fracModeBits, cost, cu.intraDir[0]);
333
334
139k
        int insertPos = -1;
335
139k
        updateCandList( ModeInfo( false, false, multiRefIdx, NOT_INTRA_SUBPARTITIONS, cu.intraDir[0] ), cost, RdModeList,  CandCostList, numModesForFullRD, &insertPos );
336
139k
        updateCandList( ModeInfo( false, false, multiRefIdx, NOT_INTRA_SUBPARTITIONS, cu.intraDir[0] ), (double)minSadHad, HadModeList, CandHadList,  numHadCand );
337
139k
        m_SortedPelUnitBufs->insert(insertPos, (int)RdModeList.size());
338
139k
      }
339
27.8k
    }
340
13.9k
    cu.multiRefIdx = 0;
341
13.9k
  }
342
343
23.1k
  if (testMip)
344
17.7k
  {
345
17.7k
    cu.mipFlag = true;
346
17.7k
    cu.multiRefIdx = 0;
347
348
17.7k
    double mipHadCost[MAX_NUM_MIP_MODE] = { MAX_DOUBLE };
349
350
17.7k
    initIntraPatternChType(cu, cu.Y());
351
17.7k
    initIntraMip( cu );
352
353
17.7k
    const int transpOff    = getNumModesMip( cu.Y() );
354
17.7k
    const int numModesFull = (transpOff << 1);
355
231k
    for( uint32_t uiModeFull = 0; uiModeFull < numModesFull; uiModeFull++ )
356
214k
    {
357
214k
      const bool     isTransposed = (uiModeFull >= transpOff ? true : false);
358
214k
      const uint32_t uiMode       = (isTransposed ? uiModeFull - transpOff : uiModeFull);
359
360
214k
      cu.mipTransposedFlag = isTransposed;
361
214k
      cu.intraDir[CH_L] = uiMode;
362
214k
      distParam.cur.buf = piPred.buf = m_SortedPelUnitBufs->getTestBuf().Y().buf;
363
214k
      predIntraMip(piPred, cu);
364
365
      // Use the min between SAD and HAD as the cost criterion
366
      // SAD is scaled by 2 to align with the scaling of HAD
367
214k
      Distortion minSadHad = distParam.distFunc(distParam);
368
369
214k
      uint64_t fracModeBits = xFracModeBitsIntraLuma( cu, mpmLst );
370
371
      //restore ctx
372
214k
      m_CABACEstimator->getCtx() = SubCtx(CtxSet(Ctx::IntraLumaMpmFlag(), intra_ctx_size), ctxStartIntraCtx);
373
374
214k
      double cost = double(minSadHad) + double(fracModeBits) * sqrtLambdaForFirstPass;
375
214k
      mipHadCost[uiModeFull] = cost;
376
214k
      DTRACE(g_trace_ctx, D_INTRA_COST, "IntraMIP: %u, %llu, %f (%d)\n", minSadHad, fracModeBits, cost, uiModeFull);
377
378
214k
      int insertPos = -1;
379
214k
      updateCandList( ModeInfo( true, isTransposed, 0, NOT_INTRA_SUBPARTITIONS, cu.intraDir[0] ), cost, RdModeList,  CandCostList, numModesForFullRD+1, &insertPos );
380
214k
      updateCandList( ModeInfo( true, isTransposed, 0, NOT_INTRA_SUBPARTITIONS, cu.intraDir[0] ), 0.8*(double)minSadHad, HadModeList, CandHadList,  numHadCand );
381
214k
      m_SortedPelUnitBufs->insert(insertPos, (int)RdModeList.size());
382
214k
    }
383
384
17.7k
    const double thresholdHadCost = 1.0 + 1.4 / sqrt((double)(cu.lwidth()*cu.lheight()));
385
17.7k
    xReduceHadCandList(RdModeList, CandCostList, *m_SortedPelUnitBufs, numModesForFullRD, thresholdHadCost, mipHadCost, cu, fastMip);
386
17.7k
  }
387
388
23.1k
  if( m_pcEncCfg->m_bFastUDIUseMPMEnabled )
389
23.1k
  {
390
23.1k
    const int numMPMs = NUM_MOST_PROBABLE_MODES;
391
23.1k
    unsigned  intraMpms[numMPMs];
392
393
23.1k
    cu.multiRefIdx = 0;
394
395
23.1k
    const int numCand = CU::getIntraMPMs( cu, intraMpms );
396
23.1k
    ModeInfo mostProbableMode(false, false, 0, NOT_INTRA_SUBPARTITIONS, 0);
397
398
46.9k
    for( int j = 0; j < numCand; j++ )
399
23.8k
    {
400
23.8k
      bool mostProbableModeIncluded = false;
401
23.8k
      mostProbableMode.modeId = intraMpms[j];
402
403
121k
      for( int i = 0; i < numModesForFullRD; i++ )
404
97.6k
      {
405
97.6k
        mostProbableModeIncluded |= ( mostProbableMode == RdModeList[i] );
406
97.6k
      }
407
23.8k
      if( !mostProbableModeIncluded )
408
167
      {
409
167
        numModesForFullRD++;
410
167
        RdModeList.push_back( mostProbableMode );
411
167
        CandCostList.push_back(0);
412
167
      }
413
23.8k
    }
414
23.1k
  }
415
23.1k
}
416
417
bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, double bestCost)
418
23.1k
{
419
23.1k
  CodingStructure       &cs           = *cu.cs;
420
23.1k
  const int             width         = partitioner.currArea().lwidth();
421
23.1k
  const int             height        = partitioner.currArea().lheight();
422
423
  //===== loop over partitions =====
424
425
23.1k
  const TempCtx ctxStart           ( m_CtxCache, m_CABACEstimator->getCtx() );
426
427
  // variables for saving fast intra modes scan results across multiple LFNST passes
428
23.1k
  double costInterCU = xFindInterCUCost( cu );
429
430
23.1k
  bool validReturn = false;
431
432
  //===== determine set of modes to be tested (using prediction signal only) =====
433
23.1k
  int numModesAvailable = NUM_LUMA_MODE; // total number of Intra modes
434
23.1k
  static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM> RdModeList;
435
23.1k
  static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM> HadModeList;
436
23.1k
  static_vector<double, FAST_UDI_MAX_RDMODE_NUM> CandCostList;
437
23.1k
  static_vector<double, FAST_UDI_MAX_RDMODE_NUM> CandHadList;
438
439
23.1k
  int numModesForFullRD = g_aucIntraModeNumFast_UseMPM_2D[Log2(width) - MIN_CU_LOG2][Log2(height) - MIN_CU_LOG2];
440
23.1k
  if (m_pcEncCfg->m_numIntraModesFullRD > 0)
441
0
    numModesForFullRD=m_pcEncCfg->m_numIntraModesFullRD;
442
443
#if INTRA_FULL_SEARCH
444
  numModesForFullRD = numModesAvailable;
445
#endif
446
23.1k
  const SPS& sps = *cu.cs->sps;
447
23.1k
  const bool mipAllowed = sps.MIP && cu.lwidth() <= sps.getMaxTbSize() && cu.lheight() <= sps.getMaxTbSize() && ((cu.lfnstIdx == 0) || allowLfnstWithMip(cu.lumaSize()));
448
23.1k
  const int SizeThr     = 8 >> std::max( 0, m_pcEncCfg->m_useFastMIP - 1 );
449
23.1k
  const bool testMip    = mipAllowed && ( cu.lwidth() <= ( SizeThr * cu.lheight() ) && cu.lheight() <= ( SizeThr * cu.lwidth() ) ) && ( cu.lwidth() <= MIP_MAX_WIDTH && cu.lheight() <= MIP_MAX_HEIGHT );
450
23.1k
  bool testISP = sps.ISP && CU::canUseISP(width, height, cu.cs->sps->getMaxTbSize());
451
23.1k
  if (testISP)
452
23.1k
  {
453
23.1k
    int numTotalPartsHor = (int)width >> floorLog2(CU::getISPSplitDim(width, height, TU_1D_VERT_SPLIT));
454
23.1k
    int numTotalPartsVer = (int)height >> floorLog2(CU::getISPSplitDim(width, height, TU_1D_HORZ_SPLIT));
455
23.1k
    m_ispTestedModes[0].init(numTotalPartsHor, numTotalPartsVer, 0);
456
    // the total number of subpartitions is modified to take into account the cases where LFNST cannot be combined with
457
    // ISP due to size restrictions
458
23.1k
    numTotalPartsHor = sps.LFNST && CU::canUseLfnstWithISP(cu.Y(), HOR_INTRA_SUBPARTITIONS) ? numTotalPartsHor : 0;
459
23.1k
    numTotalPartsVer = sps.LFNST && CU::canUseLfnstWithISP(cu.Y(), VER_INTRA_SUBPARTITIONS) ? numTotalPartsVer : 0;
460
69.3k
    for (int j = 1; j < NUM_LFNST_NUM_PER_SET; j++)
461
46.2k
    {
462
46.2k
      m_ispTestedModes[j].init(numTotalPartsHor, numTotalPartsVer, 0);
463
46.2k
    }
464
23.1k
    testISP = m_ispTestedModes[0].numTotalParts[0];
465
23.1k
  }
466
0
  else
467
0
  {
468
0
    m_ispTestedModes[0].init(0, 0, 0);
469
0
  }
470
471
23.1k
  xEstimateLumaRdModeList(numModesForFullRD, RdModeList, HadModeList, CandCostList, CandHadList, cu, testMip);
472
473
23.1k
  CHECK( (size_t)numModesForFullRD != RdModeList.size(), "Inconsistent state!" );
474
475
  // after this point, don't use numModesForFullRD
476
23.1k
  if( m_pcEncCfg->m_usePbIntraFast && !cs.slice->isIntra() && RdModeList.size() < numModesAvailable )
477
0
  {
478
0
    double pbintraRatio = m_pcEncCfg->m_usePbIntraFast == 1 && ( cs.area.lwidth() >= 16 && cs.area.lheight() >= 16 ) ? 1.2 : PBINTRA_RATIO;
479
480
0
    int maxSize = -1;
481
0
    ModeInfo bestMipMode;
482
0
    int bestMipIdx = -1;
483
0
    for( int idx = 0; idx < RdModeList.size(); idx++ )
484
0
    {
485
0
      if( RdModeList[idx].mipFlg )
486
0
      {
487
0
        bestMipMode = RdModeList[idx];
488
0
        bestMipIdx = idx;
489
0
        break;
490
0
      }
491
0
    }
492
0
    const int numHadCand = 3;
493
0
    for (int k = numHadCand - 1; k >= 0; k--)
494
0
    {
495
0
      if (CandHadList.size() < (k + 1) || CandHadList[k] > cs.interHad * pbintraRatio) { maxSize = k; }
496
0
    }
497
0
    if (maxSize > 0)
498
0
    {
499
0
      RdModeList.resize(std::min<size_t>(RdModeList.size(), maxSize));
500
0
      if( bestMipIdx >= 0 )
501
0
      {
502
0
        if( RdModeList.size() <= bestMipIdx )
503
0
        {
504
0
          RdModeList.push_back(bestMipMode);
505
0
          m_SortedPelUnitBufs->swap( maxSize, bestMipIdx );
506
0
        }
507
0
      }
508
0
    }
509
0
    if (maxSize == 0)
510
0
    {
511
0
      cs.dist = MAX_DISTORTION;
512
0
      cs.interHad = 0;
513
0
      return false;
514
0
    }
515
0
  }
516
517
  //===== check modes (using r-d costs) =====
518
23.1k
  ModeInfo bestPUMode;
519
520
23.1k
  CodingStructure *csTemp = m_pTempCS;
521
23.1k
  CodingStructure *csBest = m_pBestCS;
522
523
23.1k
  csTemp->slice   = csBest->slice   = cs.slice;
524
23.1k
  csTemp->picture = csBest->picture = cs.picture;
525
23.1k
  csTemp->compactResize( cu );
526
23.1k
  csBest->compactResize( cu );
527
23.1k
  csTemp->initStructData();
528
23.1k
  csBest->initStructData();
529
530
23.1k
  int   bestLfnstIdx  = 0;
531
23.1k
  const bool useBDPCM = cs.picture->useBDPCM;
532
23.1k
  int   NumBDPCMCand  = (useBDPCM && sps.BDPCM && CU::bdpcmAllowed(cu, ComponentID(partitioner.chType))) ? 2 : 0;
533
23.1k
  int   bestbdpcmMode = 0;
534
23.1k
  int   bestISP       = 0;
535
23.1k
  int   bestMrl       = 0;
536
23.1k
  bool  bestMip       = 0;
537
23.1k
  int   EndMode       = (int)RdModeList.size();
538
23.1k
  bool  useISPlfnst   = testISP && sps.LFNST;
539
23.1k
  bool  noLFNST_ts    = false;
540
23.1k
  double bestCostIsp[2] = { MAX_DOUBLE, MAX_DOUBLE };
541
23.1k
  bool disableMTS = false;
542
23.1k
  bool disableLFNST = false;
543
23.1k
  bool disableDCT2test = false;
544
23.1k
  if (m_pcEncCfg->m_FastIntraTools)
545
23.1k
  {
546
23.1k
    int speedIntra = 0;
547
23.1k
    xSpeedUpIntra(bestCost, EndMode, speedIntra, cu);
548
23.1k
    disableMTS = (speedIntra >> 2 ) & 0x1;
549
23.1k
    disableLFNST = (speedIntra >> 1) & 0x1;
550
23.1k
    disableDCT2test = speedIntra>>3;
551
23.1k
    if (disableLFNST)
552
20.6k
    {
553
20.6k
      noLFNST_ts = true;
554
20.6k
      useISPlfnst = false;
555
20.6k
    }
556
23.1k
    if (speedIntra & 0x1)
557
20.6k
    {
558
20.6k
      testISP = false;
559
20.6k
    }
560
23.1k
  }
561
562
124k
  for (int mode_cur = 0; mode_cur < EndMode + NumBDPCMCand; mode_cur++)
563
101k
  {
564
101k
    int mode = mode_cur;
565
101k
    if (mode_cur >= EndMode)
566
7.00k
    {
567
7.00k
      mode = mode_cur - EndMode ? -1 : -2;
568
7.00k
      testISP = false;
569
7.00k
    }
570
    // set CU/PU to luma prediction mode
571
101k
    ModeInfo testMode;
572
101k
    int noISP = 0;
573
101k
    int endISP = testISP ? 2 : 0;
574
101k
    bool noLFNST = false || noLFNST_ts;
575
101k
    if (mode && useISPlfnst)
576
8.30k
    {
577
8.30k
      noLFNST |= (bestCostIsp[0] > (bestCostIsp[1] * 1.4));
578
8.30k
      if (mode > 2)
579
2.20k
      {
580
2.20k
        endISP = 0;
581
2.20k
        testISP = false;
582
2.20k
      }
583
8.30k
    }
584
101k
    if (testISP)
585
5.27k
    {
586
5.27k
      xSpeedUpISP(1, testISP, mode, noISP, endISP, cu, RdModeList, bestPUMode, bestISP, bestLfnstIdx);
587
5.27k
    }
588
101k
    int startISP = 0;
589
101k
    if (disableDCT2test && mode && bestISP)
590
0
    {
591
0
      startISP = endISP ? 1 : 0;
592
0
    }
593
210k
    for (int ispM = startISP; ispM <= endISP; ispM++)
594
109k
    {
595
109k
      if (ispM && (ispM == noISP))
596
49
      {
597
49
        continue;
598
49
      }
599
600
109k
      if (mode < 0)
601
7.00k
      {
602
7.00k
        cu.bdpcmM[CH_L] = -mode;
603
7.00k
        testMode = ModeInfo(false, false, 0, NOT_INTRA_SUBPARTITIONS, cu.bdpcmM[CH_L] == 2 ? VER_IDX : HOR_IDX);
604
7.00k
      }
605
102k
      else
606
102k
      {
607
102k
        testMode = RdModeList[mode];
608
102k
        cu.bdpcmM[CH_L] = 0;
609
102k
      }
610
611
109k
      cu.ispMode = ispM;
612
109k
      cu.mipFlag = testMode.mipFlg;
613
109k
      cu.mipTransposedFlag = testMode.mipTrFlg;
614
109k
      cu.multiRefIdx = testMode.mRefId;
615
109k
      cu.intraDir[CH_L] = testMode.modeId;
616
109k
      if (cu.ispMode && xSpeedUpISP(0, testISP, mode, noISP, endISP, cu, RdModeList, bestPUMode, bestISP, 0) )
617
2.76k
      {
618
2.76k
        continue;
619
2.76k
      }
620
106k
      if (m_pcEncCfg->m_FastIntraTools && (cu.ispMode || sps.LFNST || sps.MTS))
621
106k
      {
622
106k
        m_ispTestedModes[0].intraWasTested = true;
623
106k
      }
624
106k
      CHECK(cu.mipFlag && cu.multiRefIdx, "Error: combination of MIP and MRL not supported");
625
106k
      CHECK(cu.multiRefIdx && (cu.intraDir[0] == PLANAR_IDX), "Error: combination of MRL and Planar mode not supported");
626
106k
      CHECK(cu.ispMode && cu.mipFlag, "Error: combination of ISP and MIP not supported");
627
106k
      CHECK(cu.ispMode && cu.multiRefIdx, "Error: combination of ISP and MRL not supported");
628
629
      // determine residual for partition
630
106k
      cs.initSubStructure(*csTemp, partitioner.chType, cs.area, true);
631
106k
      int doISP = (((cu.ispMode == 0) && noLFNST) || (useISPlfnst && mode && cu.ispMode && (bestLfnstIdx == 0)) || disableLFNST) ? -mode : mode;
632
106k
      xIntraCodingLumaQT(*csTemp, partitioner, m_SortedPelUnitBufs->getBufFromSortedList(mode), bestCost, doISP, disableMTS);
633
634
106k
      DTRACE(g_trace_ctx, D_INTRA_COST, "IntraCost T [x=%d,y=%d,w=%d,h=%d] %f (%d,%d,%d,%d,%d,%d) \n", cu.blocks[0].x,
635
106k
        cu.blocks[0].y, width, height, csTemp->cost, testMode.modeId, testMode.ispMod,
636
106k
        cu.multiRefIdx, cu.mipFlag, cu.lfnstIdx, cu.mtsFlag);
637
638
106k
      if (cu.ispMode && !csTemp->cus[0]->firstTU->cbf[COMP_Y])
639
1.67k
      {
640
1.67k
        csTemp->cost = MAX_DOUBLE;
641
1.67k
        csTemp->costDbOffset = 0;
642
1.67k
      }
643
106k
      if (useISPlfnst)
644
15.7k
      {
645
15.7k
        int n = (cu.ispMode == 0) ? 0 : 1;
646
15.7k
        bestCostIsp[n] = csTemp->cost < bestCostIsp[n] ? csTemp->cost : bestCostIsp[n];
647
15.7k
      }
648
649
      // check r-d cost
650
106k
      if (csTemp->cost < csBest->cost)
651
29.7k
      {
652
29.7k
        validReturn   = true;
653
29.7k
        std::swap(csTemp, csBest);
654
29.7k
        bestPUMode    = testMode;
655
29.7k
        bestLfnstIdx  = csBest->cus[0]->lfnstIdx;
656
29.7k
        bestISP       = csBest->cus[0]->ispMode;
657
29.7k
        bestMip       = csBest->cus[0]->mipFlag;
658
29.7k
        bestMrl       = csBest->cus[0]->multiRefIdx;
659
29.7k
        bestbdpcmMode = cu.bdpcmM[CH_L];
660
29.7k
        m_ispTestedModes[bestLfnstIdx].bestSplitSoFar = ISPType(bestISP);
661
29.7k
        if (csBest->cost < bestCost)
662
29.7k
        {
663
29.7k
          bestCost = csBest->cost;
664
29.7k
        }
665
29.7k
        if ((csBest->getTU(partitioner.chType)->mtsIdx[COMP_Y] == MTS_SKIP) && ( floorLog2(csBest->getTU(partitioner.chType)->blocks[COMP_Y].area()) >= 6 ))
666
4.32k
        {
667
4.32k
          noLFNST_ts = 1;
668
4.32k
        }
669
29.7k
      }
670
671
      // reset context models
672
106k
      m_CABACEstimator->getCtx() = ctxStart;
673
674
106k
      csTemp->releaseIntermediateData();
675
676
106k
      if (m_pcEncCfg->m_fastLocalDualTreeMode && CU::isConsIntra(cu) && !cu.slice->isIntra() && csBest->cost != MAX_DOUBLE && costInterCU != COST_UNKNOWN && mode >= 0)
677
0
      {
678
0
        if( (m_pcEncCfg->m_fastLocalDualTreeMode == 2) || (csBest->cost > costInterCU * 1.5))
679
0
        {
680
          //Note: only try one intra mode, which is especially useful to reduce EncT for LDB case (around 4%)
681
0
          EndMode = 0;
682
0
          break;
683
0
        }
684
0
      }
685
106k
    }
686
101k
  } // Mode loop
687
688
23.1k
  if (m_pcEncCfg->m_FastIntraTools && (sps.ISP|| sps.LFNST || sps.MTS))
689
23.1k
  {
690
23.1k
    int bestMode = csBest->getTU(partitioner.chType)->mtsIdx[COMP_Y] ? 4 : 0;
691
23.1k
    bestMode |= bestLfnstIdx ? 2 : 0;
692
23.1k
    bestMode |= bestISP ? 1 : 0;
693
23.1k
    m_ispTestedModes[0].bestIntraMode = bestMode;
694
23.1k
  }
695
23.1k
  cu.ispMode = bestISP;
696
23.1k
  if( validReturn )
697
23.1k
  {
698
23.1k
    cs.useSubStructure( *csBest, partitioner.chType, TREE_D, cu.singleChan( CH_L ), true );
699
23.1k
    const ReshapeData& reshapeData = cs.picture->reshapeData;
700
23.1k
    if (cs.picHeader->lmcsEnabled && reshapeData.getCTUFlag())
701
0
    {
702
0
      cs.getRspRecoBuf().copyFrom(csBest->getRspRecoBuf());
703
0
    }
704
705
    //=== update PU data ====
706
23.1k
    cu.lfnstIdx           = bestLfnstIdx;
707
23.1k
    cu.mipTransposedFlag  = bestPUMode.mipTrFlg;
708
23.1k
    cu.intraDir[CH_L]     = bestPUMode.modeId;
709
23.1k
    cu.bdpcmM[CH_L]       = bestbdpcmMode;
710
23.1k
    cu.mipFlag            = bestMip;
711
23.1k
    cu.multiRefIdx        = bestMrl;
712
23.1k
  }
713
0
  else
714
0
  {
715
0
    THROW("fix this");
716
0
  }
717
718
23.1k
  csBest->releaseIntermediateData();
719
720
23.1k
  return validReturn;
721
23.1k
}
722
723
void IntraSearch::estIntraPredChromaQT( CodingUnit& cu, Partitioner& partitioner, const double maxCostAllowed )
724
52.1k
{
725
52.1k
  PROFILER_SCOPE_AND_STAGE_EXT( 0, _TPROF, P_INTRA_CHROMA, cu.cs, CH_C );
726
52.1k
  const TempCtx ctxStart( m_CtxCache, m_CABACEstimator->getCtx() );
727
52.1k
  CodingStructure &cs   = *cu.cs;
728
52.1k
  bool lumaUsesISP      = !CU::isSepTree(cu) && cu.ispMode;
729
52.1k
  PartSplit ispType     = lumaUsesISP ? CU::getISPType(cu, COMP_Y) : TU_NO_ISP;
730
52.1k
  double bestCostSoFar  = maxCostAllowed;
731
52.1k
  const uint32_t numberValidComponents = getNumberValidComponents( cu.chromaFormat );
732
52.1k
  const bool useBDPCM   = cs.picture->useBDPCM;
733
734
52.1k
  uint32_t   uiBestMode = 0;
735
52.1k
  Distortion uiBestDist = 0;
736
52.1k
  double     dBestCost  = MAX_DOUBLE;
737
738
  //----- init mode list ----
739
52.1k
  {
740
52.1k
    uint32_t  uiMinMode = 0;
741
52.1k
    uint32_t  uiMaxMode = NUM_CHROMA_MODE;
742
743
52.1k
    const int reducedModeNumber = uiMaxMode >> (m_pcEncCfg->m_reduceIntraChromaModesFullRD ? 1 : 2);
744
    //----- check chroma modes -----
745
52.1k
    uint32_t chromaCandModes[ NUM_CHROMA_MODE ];
746
52.1k
    CU::getIntraChromaCandModes( cu, chromaCandModes );
747
748
    // create a temporary CS
749
52.1k
    CodingStructure &saveCS = *m_pSaveCS[0];
750
52.1k
    saveCS.pcv      = cs.pcv;
751
52.1k
    saveCS.picture  = cs.picture;
752
52.1k
    saveCS.area.repositionTo( cs.area );
753
52.1k
    saveCS.clearTUs();
754
755
52.1k
    if( !CU::isSepTree(cu) && cu.ispMode )
756
0
    {
757
0
      saveCS.clearCUs();
758
0
    }
759
760
52.1k
    if( CU::isSepTree(cu) )
761
52.1k
    {
762
52.1k
      if( partitioner.canSplit( TU_MAX_TR_SPLIT, cs ) )
763
0
      {
764
0
        partitioner.splitCurrArea( TU_MAX_TR_SPLIT, cs );
765
766
0
        do
767
0
        {
768
0
          cs.addTU( CS::getArea( cs, partitioner.currArea(), partitioner.chType, partitioner.treeType ), partitioner.chType, &cu ).depth = partitioner.currTrDepth;
769
0
        } while( partitioner.nextPart( cs ) );
770
771
0
        partitioner.exitCurrSplit();
772
0
      }
773
52.1k
      else
774
52.1k
        cs.addTU( CS::getArea( cs, partitioner.currArea(), partitioner.chType, partitioner.treeType ), partitioner.chType, &cu );
775
52.1k
    }
776
777
    // create a store for the TUs
778
52.1k
    std::vector<TransformUnit*> orgTUs;
779
52.1k
    for( const auto &ptu : cs.tus )
780
52.1k
    {
781
      // for split TUs in HEVC, add the TUs without Chroma parts for correct setting of Cbfs
782
52.1k
      if (lumaUsesISP || cu.contains(*ptu, CH_C))
783
52.1k
      {
784
52.1k
        saveCS.addTU( *ptu, partitioner.chType, nullptr );
785
52.1k
        orgTUs.push_back( ptu );
786
52.1k
      }
787
52.1k
    }
788
789
    // SATD pre-selecting.
790
52.1k
    int     satdModeList  [NUM_CHROMA_MODE] = { 0 };
791
52.1k
    int64_t satdSortedCost[NUM_CHROMA_MODE] = { 0 };
792
52.1k
    bool    modeDisable[NUM_INTRA_MODE + 1] = { false }; // use intra mode idx to check whether enable
793
794
52.1k
    CodingStructure& cs = *(cu.cs);
795
52.1k
    CompArea areaCb = cu.Cb();
796
52.1k
    CompArea areaCr = cu.Cr();
797
52.1k
    CPelBuf orgCb  = cs.getOrgBuf (COMP_Cb);
798
52.1k
    PelBuf predCb  = cs.getPredBuf(COMP_Cb);
799
52.1k
    CPelBuf orgCr  = cs.getOrgBuf (COMP_Cr);
800
52.1k
    PelBuf predCr  = cs.getPredBuf(COMP_Cr);
801
802
52.1k
    DistParam distParamSadCb  = m_pcRdCost->setDistParam( orgCb, predCb, cu.cs->sps->bitDepths[ CH_C ], DF_SAD);
803
52.1k
    DistParam distParamSatdCb = m_pcRdCost->setDistParam( orgCb, predCb, cu.cs->sps->bitDepths[ CH_C ], DF_HAD);
804
52.1k
    DistParam distParamSadCr  = m_pcRdCost->setDistParam( orgCr, predCr, cu.cs->sps->bitDepths[ CH_C ], DF_SAD);
805
52.1k
    DistParam distParamSatdCr = m_pcRdCost->setDistParam( orgCr, predCr, cu.cs->sps->bitDepths[ CH_C ], DF_HAD);
806
807
52.1k
    cu.intraDir[1] = MDLM_L_IDX; // temporary assigned, just to indicate this is a MDLM mode. for luma down-sampling operation.
808
809
52.1k
    initIntraPatternChType(cu, cu.Cb());
810
52.1k
    initIntraPatternChType(cu, cu.Cr());
811
52.1k
    loadLMLumaRecPels(cu, cu.Cb());
812
813
469k
    for (int idx = uiMinMode; idx < uiMaxMode; idx++)
814
417k
    {
815
417k
      int mode = chromaCandModes[idx];
816
417k
      satdModeList[idx] = mode;
817
417k
      if (CU::isLMCMode(mode) && ( !CU::isLMCModeEnabled(cu, mode) || cu.slice->lmChromaCheckDisable ) )
818
45.1k
      {
819
45.1k
        continue;
820
45.1k
      }
821
372k
      if ((mode == LM_CHROMA_IDX) || (mode == PLANAR_IDX) || (mode == DM_CHROMA_IDX)) // only pre-check regular modes and MDLM modes, not including DM ,Planar, and LM
822
91.1k
      {
823
91.1k
        continue;
824
91.1k
      }
825
826
281k
      cu.intraDir[1]    = mode; // temporary assigned, for SATD checking.
827
828
281k
      const bool isLMCMode = CU::isLMCMode(mode);
829
281k
      if( isLMCMode )
830
74.2k
      {
831
74.2k
        predIntraChromaLM(COMP_Cb, predCb, cu, areaCb, mode);
832
74.2k
      }
833
206k
      else
834
206k
      {
835
206k
        initPredIntraParams(cu, cu.Cb(), *cs.sps);
836
206k
        predIntraAng(COMP_Cb, predCb, cu);
837
206k
      }
838
281k
      int64_t sadCb = distParamSadCb.distFunc(distParamSadCb) * 2;
839
281k
      int64_t satdCb = distParamSatdCb.distFunc(distParamSatdCb);
840
281k
      int64_t sad = std::min(sadCb, satdCb);
841
842
281k
      if( isLMCMode )
843
74.2k
      {
844
74.2k
        predIntraChromaLM(COMP_Cr, predCr, cu, areaCr, mode);
845
74.2k
      }
846
206k
      else
847
206k
      {
848
206k
        initPredIntraParams(cu, cu.Cr(), *cs.sps);
849
206k
        predIntraAng(COMP_Cr, predCr, cu);
850
206k
      }
851
281k
      int64_t sadCr = distParamSadCr.distFunc(distParamSadCr) * 2;
852
281k
      int64_t satdCr = distParamSatdCr.distFunc(distParamSatdCr);
853
281k
      sad += std::min(sadCr, satdCr);
854
281k
      satdSortedCost[idx] = sad;
855
281k
    }
856
857
    // sort the mode based on the cost from small to large.
858
469k
    for (int i = uiMinMode; i <= uiMaxMode - 1; i++)
859
417k
    {
860
1.87M
      for (int j = i + 1; j <= uiMaxMode - 1; j++)
861
1.46M
      {
862
1.46M
        if (satdSortedCost[j] < satdSortedCost[i])
863
90.0k
        {
864
90.0k
          std::swap( satdModeList[i],   satdModeList[j]);
865
90.0k
          std::swap( satdSortedCost[i], satdSortedCost[j]);
866
90.0k
        }
867
1.46M
      }
868
417k
    }
869
870
260k
    for (int i = 0; i < reducedModeNumber; i++)
871
208k
    {
872
208k
      modeDisable[satdModeList[uiMaxMode - 1 - i]] = true; // disable the last reducedModeNumber modes
873
208k
    }
874
875
52.1k
    int bestLfnstIdx = 0;
876
    // save the dist
877
52.1k
    Distortion baseDist = cs.dist;
878
52.1k
    int32_t bestbdpcmMode = 0;
879
52.1k
    uint32_t numbdpcmModes = ( useBDPCM && CU::bdpcmAllowed(cu, COMP_Cb)
880
34.9k
        && ((partitioner.chType == CH_C) || (cu.ispMode == 0 && cu.lfnstIdx == 0 && cu.firstTU->mtsIdx[COMP_Y] == MTS_SKIP))) ? 2 : 0;
881
539k
    for (int mode_cur = uiMinMode; mode_cur < (int)(uiMaxMode + numbdpcmModes); mode_cur++)
882
487k
    {
883
487k
      int mode = mode_cur;
884
487k
      if (mode_cur >= uiMaxMode)
885
69.9k
      {
886
69.9k
        mode = mode_cur > uiMaxMode ? -1 : -2; //set bdpcm mode
887
69.9k
        if ((mode == -1) && (saveCS.tus[0]->mtsIdx[COMP_Cb] != MTS_SKIP) && (saveCS.tus[0]->mtsIdx[COMP_Cr] != MTS_SKIP))
888
34.9k
        {
889
34.9k
          continue;
890
34.9k
        }
891
69.9k
      }
892
452k
      int chromaIntraMode;
893
452k
      if (mode < 0)
894
34.9k
      {
895
34.9k
        cu.bdpcmM[CH_C] = -mode;
896
34.9k
        chromaIntraMode = cu.bdpcmM[CH_C] == 2 ? chromaCandModes[1] : chromaCandModes[2];
897
34.9k
      }
898
417k
      else
899
417k
      {
900
417k
        cu.bdpcmM[CH_C] = 0;
901
417k
        chromaIntraMode = chromaCandModes[mode];
902
417k
        if (CU::isLMCMode(chromaIntraMode) && ( !CU::isLMCModeEnabled(cu, chromaIntraMode) || cu.slice->lmChromaCheckDisable ) )
903
45.1k
        {
904
45.1k
          continue;
905
45.1k
        }
906
372k
        if (modeDisable[chromaIntraMode] && CU::isLMCModeEnabled(cu, chromaIntraMode)) // when CCLM is disable, then MDLM is disable. not use satd checking
907
148k
        {
908
148k
          continue;
909
148k
        }
910
372k
      }
911
258k
      cs.dist = baseDist;
912
      //----- restore context models -----
913
258k
      m_CABACEstimator->getCtx() = ctxStart;
914
915
      //----- chroma coding -----
916
258k
      cu.intraDir[1] = chromaIntraMode;
917
258k
      m_ispTestedModes[0].IspType = ispType;
918
258k
      m_ispTestedModes[0].subTuCounter = -1;
919
258k
      xIntraChromaCodingQT( cs, partitioner );
920
258k
      if (lumaUsesISP && cs.dist == MAX_UINT)
921
0
      {
922
0
        continue;
923
0
      }
924
925
258k
      if (cs.sps->transformSkip)
926
258k
      {
927
258k
        m_CABACEstimator->getCtx() = ctxStart;
928
258k
      }
929
258k
      m_ispTestedModes[0].IspType = ispType;
930
258k
      m_ispTestedModes[0].subTuCounter = -1;
931
258k
      uint64_t fracBits   = xGetIntraFracBitsQT( cs, partitioner, false );
932
258k
      Distortion uiDist = cs.dist;
933
258k
      double    dCost   = m_pcRdCost->calcRdCost( fracBits, uiDist - baseDist );
934
935
      //----- compare -----
936
258k
      if( dCost < dBestCost )
937
93.8k
      {
938
93.8k
        if (lumaUsesISP && (dCost < bestCostSoFar))
939
0
        {
940
0
          bestCostSoFar = dCost;
941
0
        }
942
281k
        for( uint32_t i = getFirstComponentOfChannel( CH_C ); i < numberValidComponents; i++ )
943
187k
        {
944
187k
          const CompArea& area = cu.blocks[i];
945
187k
          saveCS.getRecoBuf     ( area ).copyFrom( cs.getRecoBuf   ( area ) );
946
187k
          cs.picture->getRecoBuf( area ).copyFrom( cs.getRecoBuf   ( area ) );
947
375k
          for( uint32_t j = 0; j < saveCS.tus.size(); j++ )
948
187k
          {
949
187k
            saveCS.tus[j]->copyComponentFrom( *orgTUs[j], area.compID );
950
187k
          }
951
187k
        }
952
93.8k
        dBestCost    = dCost;
953
93.8k
        uiBestDist   = uiDist;
954
93.8k
        uiBestMode   = chromaIntraMode;
955
93.8k
        bestLfnstIdx = cu.lfnstIdx;
956
93.8k
        bestbdpcmMode = cu.bdpcmM[CH_C];
957
958
93.8k
      }
959
258k
    }
960
52.1k
    cu.lfnstIdx = bestLfnstIdx;
961
52.1k
    cu.bdpcmM[CH_C]= bestbdpcmMode;
962
963
156k
    for( uint32_t i = getFirstComponentOfChannel( CH_C ); i < numberValidComponents; i++ )
964
104k
    {
965
104k
      const CompArea& area = cu.blocks[i];
966
967
104k
      cs.getRecoBuf         ( area ).copyFrom( saveCS.getRecoBuf( area ) );
968
104k
      cs.picture->getRecoBuf( area ).copyFrom( cs.getRecoBuf    ( area ) );
969
970
208k
      for( uint32_t j = 0; j < saveCS.tus.size(); j++ )
971
104k
      {
972
104k
        orgTUs[ j ]->copyComponentFrom( *saveCS.tus[ j ], area.compID );
973
104k
      }
974
104k
    }
975
52.1k
  }
976
52.1k
  cu.intraDir[1] = uiBestMode;
977
52.1k
  cs.dist        = uiBestDist;
978
979
  //----- restore context models -----
980
52.1k
  m_CABACEstimator->getCtx() = ctxStart;
981
52.1k
  if (lumaUsesISP && bestCostSoFar >= maxCostAllowed)
982
0
  {
983
0
    cu.ispMode = 0;
984
0
  }
985
52.1k
}
986
987
void IntraSearch::saveCuAreaCostInSCIPU( Area area, double cost )
988
0
{
989
0
  if( m_numCuInSCIPU < NUM_INTER_CU_INFO_SAVE )
990
0
  {
991
0
    m_cuAreaInSCIPU[m_numCuInSCIPU] = area;
992
0
    m_cuCostInSCIPU[m_numCuInSCIPU] = cost;
993
0
    m_numCuInSCIPU++;
994
0
  }
995
0
}
996
997
void IntraSearch::initCuAreaCostInSCIPU()
998
0
{
999
0
  for( int i = 0; i < NUM_INTER_CU_INFO_SAVE; i++ )
1000
0
  {
1001
0
    m_cuAreaInSCIPU[i] = Area();
1002
0
    m_cuCostInSCIPU[i] = 0;
1003
0
  }
1004
0
  m_numCuInSCIPU = 0;
1005
0
}
1006
// -------------------------------------------------------------------------------------------------------------------
1007
// Intra search
1008
// -------------------------------------------------------------------------------------------------------------------
1009
1010
void IntraSearch::xEncIntraHeader( CodingStructure &cs, Partitioner &partitioner, const bool luma )
1011
430k
{
1012
430k
  CodingUnit &cu = *cs.getCU( partitioner.chType, partitioner.treeType );
1013
1014
430k
  if (luma)
1015
172k
  {
1016
172k
    bool isFirst = cu.ispMode ? m_ispTestedModes[0].subTuCounter == 0 : partitioner.currArea().lumaPos() == cs.area.lumaPos();
1017
1018
    // CU header
1019
172k
    if( isFirst )
1020
168k
    {
1021
168k
      if ((!cs.slice->isIntra() || cs.slice->sps->IBC || cs.slice->sps->PLT) && cu.Y().valid())
1022
168k
      {
1023
168k
        m_CABACEstimator->pred_mode   ( cu );
1024
168k
      }
1025
168k
      m_CABACEstimator->bdpcm_mode  ( cu, ComponentID(partitioner.chType) );
1026
168k
    }
1027
1028
    // luma prediction mode
1029
172k
    if (isFirst)
1030
168k
    {
1031
168k
      if ( !cu.Y().valid())
1032
0
      {
1033
0
        m_CABACEstimator->pred_mode( cu );
1034
0
      }
1035
168k
      m_CABACEstimator->intra_luma_pred_mode( cu );
1036
168k
    }
1037
172k
  }
1038
258k
  else //  if (chroma)
1039
258k
  {
1040
258k
    bool isFirst = partitioner.currArea().Cb().valid() && partitioner.currArea().chromaPos() == cs.area.chromaPos();
1041
1042
258k
    if( isFirst )
1043
258k
    {
1044
258k
      m_CABACEstimator->bdpcm_mode(cu, ComponentID(CH_C));
1045
258k
      m_CABACEstimator->intra_chroma_pred_mode(  cu );
1046
258k
    }
1047
258k
  }
1048
430k
}
1049
1050
void IntraSearch::xEncSubdivCbfQT( CodingStructure &cs, Partitioner &partitioner, const bool luma )
1051
430k
{
1052
430k
  const UnitArea& currArea = partitioner.currArea();
1053
430k
  int subTuCounter = m_ispTestedModes[0].subTuCounter;
1054
430k
  TransformUnit  &currTU   = *cs.getTU(currArea.blocks[partitioner.chType], partitioner.chType, subTuCounter);
1055
430k
  CodingUnit     &currCU   = *currTU.cu;
1056
430k
  const uint32_t currDepth = partitioner.currTrDepth;
1057
430k
  const bool  subdiv = currTU.depth > currDepth;
1058
430k
  ComponentID compID = partitioner.chType == CH_L ? COMP_Y : COMP_Cb;
1059
1060
430k
  if (!luma)
1061
258k
  {
1062
258k
    const bool chromaCbfISP = currArea.blocks[COMP_Cb].valid() && currCU.ispMode && !subdiv;
1063
258k
    if (!currCU.ispMode || chromaCbfISP)
1064
258k
    {
1065
258k
      const uint32_t numberValidComponents = getNumberValidComponents(currArea.chromaFormat);
1066
258k
      const uint32_t cbfDepth = (chromaCbfISP ? currDepth - 1 : currDepth);
1067
1068
776k
      for (uint32_t ch = COMP_Cb; ch < numberValidComponents; ch++)
1069
517k
      {
1070
517k
        const ComponentID compID = ComponentID(ch);
1071
517k
        if (currDepth == 0 || TU::getCbfAtDepth(currTU, compID, currDepth - 1) || chromaCbfISP)
1072
517k
        {
1073
517k
          const bool prevCbf = (compID == COMP_Cr ? TU::getCbfAtDepth(currTU, COMP_Cb, currDepth) : false);
1074
517k
          m_CABACEstimator->cbf_comp(currCU, TU::getCbfAtDepth(currTU, compID, currDepth), currArea.blocks[compID], cbfDepth, prevCbf);
1075
517k
        }
1076
517k
      }
1077
258k
    }
1078
258k
  }
1079
1080
430k
  if (subdiv)
1081
0
  {
1082
0
    if (partitioner.canSplit(TU_MAX_TR_SPLIT, cs))
1083
0
    {
1084
0
      partitioner.splitCurrArea(TU_MAX_TR_SPLIT, cs);
1085
0
    }
1086
0
    else if (currCU.ispMode && isLuma(compID))
1087
0
    {
1088
0
      partitioner.splitCurrArea(m_ispTestedModes[0].IspType, cs);
1089
0
    }
1090
0
    else
1091
0
      THROW("Cannot perform an implicit split!");
1092
1093
0
    do
1094
0
    {
1095
0
      xEncSubdivCbfQT(cs, partitioner, luma);   //?
1096
0
      subTuCounter += subTuCounter != -1 ? 1 : 0;
1097
0
    } while (partitioner.nextPart(cs));
1098
1099
0
    partitioner.exitCurrSplit();
1100
0
  }
1101
430k
  else
1102
430k
  {
1103
    //===== Cbfs =====
1104
430k
    if (luma)
1105
172k
    {
1106
172k
      bool previousCbf = false;
1107
172k
      bool lastCbfIsInferred = false;
1108
172k
      if (m_ispTestedModes[0].IspType != TU_NO_ISP)
1109
13.5k
      {
1110
13.5k
        bool     rootCbfSoFar = false;
1111
13.5k
        uint32_t nTus = currCU.ispMode == HOR_INTRA_SUBPARTITIONS ? currCU.lheight() >> floorLog2(currTU.lheight())
1112
13.5k
          : currCU.lwidth() >> floorLog2(currTU.lwidth());
1113
13.5k
        if (subTuCounter == nTus - 1)
1114
1.32k
        {
1115
1.32k
          TransformUnit* tuPointer = currCU.firstTU;
1116
5.31k
          for (int tuIdx = 0; tuIdx < nTus - 1; tuIdx++)
1117
3.98k
          {
1118
3.98k
            rootCbfSoFar |= TU::getCbfAtDepth(*tuPointer, COMP_Y, currDepth);
1119
3.98k
            tuPointer = tuPointer->next;
1120
3.98k
          }
1121
1.32k
          if (!rootCbfSoFar)
1122
0
          {
1123
0
            lastCbfIsInferred = true;
1124
0
          }
1125
1.32k
        }
1126
13.5k
        if (!lastCbfIsInferred)
1127
13.5k
        {
1128
13.5k
          previousCbf = TU::getPrevTuCbfAtDepth(currTU, COMP_Y, partitioner.currTrDepth);
1129
13.5k
        }
1130
13.5k
      }
1131
172k
      if (!lastCbfIsInferred)
1132
172k
      {
1133
172k
        m_CABACEstimator->cbf_comp(currCU, TU::getCbfAtDepth(currTU, COMP_Y, currDepth), currTU.Y(), currTU.depth, previousCbf, currCU.ispMode);
1134
172k
      }
1135
172k
    }
1136
430k
  }
1137
430k
}
1138
void IntraSearch::xEncCoeffQT(CodingStructure& cs, Partitioner& partitioner, const ComponentID compID, CUCtx* cuCtx, const int subTuIdx, const PartSplit ispType)
1139
689k
{
1140
689k
  const UnitArea& currArea  = partitioner.currArea();
1141
1142
689k
  int subTuCounter          = m_ispTestedModes[0].subTuCounter;
1143
689k
  TransformUnit& currTU     = *cs.getTU(currArea.blocks[partitioner.chType], partitioner.chType, subTuCounter);
1144
689k
  uint32_t   currDepth      = partitioner.currTrDepth;
1145
689k
  const bool subdiv         = currTU.depth > currDepth;
1146
1147
689k
  if (subdiv)
1148
0
  {
1149
0
    if (partitioner.canSplit(TU_MAX_TR_SPLIT, cs))
1150
0
    {
1151
0
      partitioner.splitCurrArea(TU_MAX_TR_SPLIT, cs);
1152
0
    }
1153
0
    else if (currTU.cu->ispMode)
1154
0
    {
1155
0
      partitioner.splitCurrArea(m_ispTestedModes[0].IspType, cs);
1156
0
    }
1157
0
    else
1158
0
      THROW("Implicit TU split not available!");
1159
1160
0
    do
1161
0
    {
1162
0
      xEncCoeffQT(cs, partitioner, compID, cuCtx, subTuCounter, m_ispTestedModes[0].IspType);
1163
0
      subTuCounter += subTuCounter != -1 ? 1 : 0;
1164
0
    } while( partitioner.nextPart( cs ) );
1165
1166
0
    partitioner.exitCurrSplit();
1167
0
  }
1168
689k
  else
1169
1170
689k
  if( currArea.blocks[compID].valid() )
1171
689k
  {
1172
689k
    if( compID == COMP_Cr )
1173
258k
    {
1174
258k
      const int cbfMask = ( TU::getCbf( currTU, COMP_Cb ) ? 2 : 0 ) + ( TU::getCbf( currTU, COMP_Cr ) ? 1 : 0 );
1175
258k
      m_CABACEstimator->joint_cb_cr( currTU, cbfMask );
1176
258k
    }
1177
689k
    if( TU::getCbf( currTU, compID ) )
1178
210k
    {
1179
210k
      if( isLuma(compID) )
1180
23.6k
      {
1181
23.6k
        m_CABACEstimator->residual_coding( currTU, compID, cuCtx );
1182
23.6k
        m_CABACEstimator->mts_idx( *currTU.cu, cuCtx );
1183
23.6k
      }
1184
186k
      else
1185
186k
        m_CABACEstimator->residual_coding( currTU, compID );
1186
210k
    }
1187
689k
  }
1188
689k
}
1189
1190
uint64_t IntraSearch::xGetIntraFracBitsQT( CodingStructure &cs, Partitioner &partitioner, const bool luma, CUCtx *cuCtx )
1191
430k
{
1192
430k
  m_CABACEstimator->resetBits();
1193
1194
430k
  xEncIntraHeader( cs, partitioner, luma );
1195
430k
  xEncSubdivCbfQT( cs, partitioner, luma );
1196
1197
430k
  if( luma )
1198
172k
  {
1199
172k
    xEncCoeffQT( cs, partitioner, COMP_Y, cuCtx );
1200
1201
172k
    CodingUnit &cu = *cs.cus[0];
1202
172k
    if (cuCtx /*&& CU::isSepTree(cu)*/
1203
109k
      && (!cu.ispMode || (cu.lfnstIdx && m_ispTestedModes[0].subTuCounter == 0)
1204
8.68k
        || (!cu.lfnstIdx
1205
7.41k
          && m_ispTestedModes[0].subTuCounter == m_ispTestedModes[cu.lfnstIdx].numTotalParts[cu.ispMode - 1] - 1)))
1206
101k
    {
1207
101k
      m_CABACEstimator->residual_lfnst_mode( cu, *cuCtx );
1208
101k
    }
1209
172k
  }
1210
258k
  else
1211
258k
  {
1212
258k
    xEncCoeffQT( cs, partitioner, COMP_Cb );
1213
258k
    xEncCoeffQT( cs, partitioner, COMP_Cr );
1214
258k
  }
1215
1216
430k
  uint64_t fracBits = m_CABACEstimator->getEstFracBits();
1217
430k
  return fracBits;
1218
430k
}
1219
1220
uint64_t IntraSearch::xGetIntraFracBitsQTChroma(const TransformUnit& currTU, const ComponentID compID, CUCtx *cuCtx)
1221
1.61M
{
1222
1.61M
  m_CABACEstimator->resetBits();
1223
1224
1.61M
  if ( currTU.jointCbCr )
1225
240k
  {
1226
240k
    const int cbfMask = ( TU::getCbf( currTU, COMP_Cb ) ? 2 : 0 ) + ( TU::getCbf( currTU, COMP_Cr ) ? 1 : 0 );
1227
240k
    m_CABACEstimator->cbf_comp( *currTU.cu, cbfMask>>1, currTU.blocks[ COMP_Cb ], currTU.depth, false );
1228
240k
    m_CABACEstimator->cbf_comp( *currTU.cu, cbfMask &1, currTU.blocks[ COMP_Cr ], currTU.depth, cbfMask>>1 );
1229
240k
    if( cbfMask )
1230
240k
      m_CABACEstimator->joint_cb_cr( currTU, cbfMask );
1231
240k
    if (cbfMask >> 1)
1232
239k
      m_CABACEstimator->residual_coding( currTU, COMP_Cb, cuCtx );
1233
240k
    if (cbfMask & 1)
1234
240k
      m_CABACEstimator->residual_coding( currTU, COMP_Cr, cuCtx );
1235
240k
  }
1236
1.37M
  else
1237
1.37M
  {
1238
1.37M
    if ( compID == COMP_Cb )
1239
687k
      m_CABACEstimator->cbf_comp( *currTU.cu, TU::getCbf( currTU, compID ), currTU.blocks[ compID ], currTU.depth, false );
1240
687k
    else
1241
687k
    {
1242
687k
      const bool cbCbf    = TU::getCbf( currTU, COMP_Cb );
1243
687k
      const bool crCbf    = TU::getCbf( currTU, compID );
1244
687k
      const int  cbfMask  = ( cbCbf ? 2 : 0 ) + ( crCbf ? 1 : 0 );
1245
687k
      m_CABACEstimator->cbf_comp( *currTU.cu, crCbf, currTU.blocks[ compID ], currTU.depth, cbCbf );
1246
687k
      m_CABACEstimator->joint_cb_cr( currTU, cbfMask );
1247
687k
    }
1248
1.37M
  }
1249
1250
1.61M
  if( !currTU.jointCbCr && TU::getCbf( currTU, compID ) )
1251
483k
  {
1252
483k
    m_CABACEstimator->residual_coding( currTU, compID, cuCtx );
1253
483k
  }
1254
1255
1.61M
  uint64_t fracBits = m_CABACEstimator->getEstFracBits();
1256
1.61M
  return fracBits;
1257
1.61M
}
1258
1259
void IntraSearch::xIntraCodingTUBlock(TransformUnit &tu, const ComponentID compID, const bool checkCrossCPrediction, Distortion &ruiDist, uint32_t *numSig, PelUnitBuf *predBuf, const bool loadTr)
1260
1.79M
{
1261
1.79M
  if (!tu.blocks[compID].valid())
1262
0
  {
1263
0
    return;
1264
0
  }
1265
1266
1.79M
  CodingStructure &cs             = *tu.cs;
1267
1.79M
  const CompArea      &area       = tu.blocks[compID];
1268
1.79M
  const SPS           &sps        = *cs.sps;
1269
1.79M
  const ReshapeData&  reshapeData = cs.picture->reshapeData;
1270
1271
1.79M
  const ChannelType    chType     = toChannelType(compID);
1272
1.79M
  const int            bitDepth   = sps.bitDepths[chType];
1273
1274
1.79M
  CPelBuf        piOrg            = cs.getOrgBuf    (area);
1275
1.79M
  PelBuf         piPred           = cs.getPredBuf   (area);
1276
1.79M
  PelBuf         piResi           = cs.getResiBuf   (area);
1277
1.79M
  PelBuf         piReco           = cs.getRecoBuf   (area);
1278
1279
1.79M
  const CodingUnit& cu            = *tu.cu;
1280
1281
  //===== init availability pattern =====
1282
1.79M
  CHECK( tu.jointCbCr && compID == COMP_Cr, "wrong combination of compID and jointCbCr" );
1283
1.79M
  bool jointCbCr = tu.jointCbCr && compID == COMP_Cb;
1284
1285
1.79M
  if ( isLuma(compID) )
1286
177k
  {
1287
177k
    bool predRegDiffFromTB = CU::isPredRegDiffFromTB(*tu.cu );
1288
177k
    bool firstTBInPredReg  = false;
1289
177k
    CompArea areaPredReg(COMP_Y, tu.chromaFormat, area);
1290
177k
    if (tu.cu->ispMode )
1291
18.4k
    {
1292
18.4k
      firstTBInPredReg = CU::isFirstTBInPredReg(*tu.cu, area);
1293
18.4k
      if (predRegDiffFromTB)
1294
0
      {
1295
0
        if (firstTBInPredReg)
1296
0
        {
1297
0
          CU::adjustPredArea(areaPredReg);
1298
0
          initIntraPatternChTypeISP(*tu.cu, areaPredReg, piReco);
1299
0
        }
1300
0
      }
1301
18.4k
      else
1302
18.4k
        initIntraPatternChTypeISP(*tu.cu, area, piReco);
1303
18.4k
    }
1304
158k
    else if( !predBuf )
1305
27.4k
    {
1306
27.4k
      initIntraPatternChType(*tu.cu, area);
1307
27.4k
    }
1308
1309
    //===== get prediction signal =====
1310
177k
    if (predRegDiffFromTB)
1311
0
    {
1312
0
      if (firstTBInPredReg)
1313
0
      {
1314
0
        PelBuf piPredReg = cs.getPredBuf(areaPredReg);
1315
0
        predIntraAng(compID, piPredReg, cu);
1316
0
      }
1317
0
    }
1318
177k
    else
1319
177k
    {
1320
177k
      if( predBuf )
1321
131k
      {
1322
131k
        piPred.copyFrom( predBuf->Y() );
1323
131k
      }
1324
45.8k
      else if( CU::isMIP( cu, CH_L ) )
1325
20.2k
      {
1326
20.2k
        initIntraMip( cu );
1327
20.2k
        predIntraMip( piPred, cu );
1328
20.2k
      }
1329
25.6k
      else
1330
25.6k
      {
1331
25.6k
        predIntraAng(compID, piPred, cu);
1332
25.6k
      }
1333
177k
    }
1334
177k
  }
1335
1.79M
  DTRACE( g_trace_ctx, D_PRED, "@(%4d,%4d) [%2dx%2d] IMode=%d\n", tu.lx(), tu.ly(), tu.lwidth(), tu.lheight(), CU::getFinalIntraMode(cu, chType) );
1336
1.79M
  const Slice &slice = *cs.slice;
1337
1.79M
  bool flag = cs.picHeader->lmcsEnabled && (slice.isIntra() || (!slice.isIntra() && reshapeData.getCTUFlag()));
1338
1339
1.79M
  if (isLuma(compID))
1340
177k
  {
1341
    //===== get residual signal =====
1342
177k
    if (cs.picHeader->lmcsEnabled && reshapeData.getCTUFlag() )
1343
0
    {
1344
0
      piResi.subtract(cs.getRspOrgBuf(area), piPred);
1345
0
    }
1346
177k
    else
1347
177k
    {
1348
177k
      piResi.subtract( piOrg, piPred );
1349
177k
    }
1350
177k
  }
1351
1352
  //===== transform and quantization =====
1353
  //--- init rate estimation arrays for RDOQ ---
1354
  //--- transform and quantization           ---
1355
1.79M
  TCoeff uiAbsSum = 0;
1356
1.79M
  const QpParam cQP(tu, compID);
1357
1358
1.79M
  m_pcTrQuant->selectLambda(compID);
1359
1360
1.79M
  flag =flag && (tu.blocks[compID].width*tu.blocks[compID].height > 4);
1361
1.79M
  if (flag && isChroma(compID) && cs.picHeader->lmcsChromaResidualScale )
1362
0
  {
1363
0
    int cResScaleInv = tu.chromaAdj;
1364
0
    double cRescale = (double)(1 << CSCALE_FP_PREC) / (double)cResScaleInv;
1365
0
    m_pcTrQuant->scaleLambda( 1.0/(cRescale*cRescale) );
1366
0
  }
1367
1368
1.79M
  if ( jointCbCr )
1369
243k
  {
1370
    // Lambda is loosened for the joint mode with respect to single modes as the same residual is used for both chroma blocks
1371
243k
    const int    absIct = abs( TU::getICTMode(tu) );
1372
243k
    const double lfact  = ( absIct == 1 || absIct == 3 ? 0.8 : 0.5 );
1373
243k
    m_pcTrQuant->scaleLambda( lfact );
1374
243k
  }
1375
1.79M
  if ( sps.jointCbCr && isChroma(compID) && (tu.cu->cs->slice->sliceQp > 18) )
1376
1.05M
  {
1377
1.05M
    m_pcTrQuant->scaleLambda( 1.3 );
1378
1.05M
  }
1379
1380
1.79M
  if( isLuma(compID) )
1381
177k
  {
1382
177k
    m_pcTrQuant->transformNxN(tu, compID, cQP, uiAbsSum, m_CABACEstimator->getCtx(), loadTr);
1383
1384
177k
    DTRACE( g_trace_ctx, D_TU_ABS_SUM, "%d: comp=%d, abssum=%d\n", DTRACE_GET_COUNTER( g_trace_ctx, D_TU_ABS_SUM ), compID, uiAbsSum );
1385
177k
    if (tu.cu->ispMode && isLuma(compID) && CU::isISPLast(*tu.cu, area, area.compID) && CU::allLumaCBFsAreZero(*tu.cu))
1386
0
    {
1387
      // ISP has to have at least one non-zero CBF
1388
0
      ruiDist = MAX_INT;
1389
0
      return;
1390
0
    }
1391
    //--- inverse transform ---
1392
177k
    if (uiAbsSum > 0)
1393
28.5k
    {
1394
28.5k
      m_pcTrQuant->invTransformNxN(tu, compID, piResi, cQP);
1395
28.5k
    }
1396
148k
    else
1397
148k
    {
1398
148k
      piResi.fill(0);
1399
148k
    }
1400
177k
  }
1401
1.61M
  else // chroma
1402
1.61M
  {
1403
1.61M
    PelBuf          crPred = cs.getPredBuf ( COMP_Cr );
1404
1.61M
    PelBuf          crResi = cs.getResiBuf ( COMP_Cr );
1405
1.61M
    PelBuf          crReco = cs.getRecoBuf ( COMP_Cr );
1406
1407
1.61M
    int         codedCbfMask  = 0;
1408
1.61M
    ComponentID codeCompId    = (tu.jointCbCr ? (tu.jointCbCr >> 1 ? COMP_Cb : COMP_Cr) : compID);
1409
1.61M
    const QpParam qpCbCr(tu, codeCompId);
1410
1411
1.61M
    if( tu.jointCbCr )
1412
243k
    {
1413
243k
      ComponentID otherCompId = ( codeCompId==COMP_Cr ? COMP_Cb : COMP_Cr );
1414
243k
      tu.getCoeffs( otherCompId ).fill(0); // do we need that?
1415
243k
      TU::setCbfAtDepth (tu, otherCompId, tu.depth, false );
1416
243k
    }
1417
1.61M
    PelBuf& codeResi = ( codeCompId == COMP_Cr ? crResi : piResi );
1418
1.61M
    uiAbsSum = 0;
1419
1.61M
    m_pcTrQuant->transformNxN(tu, codeCompId, qpCbCr, uiAbsSum, m_CABACEstimator->getCtx(), loadTr);
1420
1.61M
    DTRACE( g_trace_ctx, D_TU_ABS_SUM, "%d: comp=%d, abssum=%d\n", DTRACE_GET_COUNTER( g_trace_ctx, D_TU_ABS_SUM ), codeCompId, uiAbsSum );
1421
1.61M
    if( uiAbsSum > 0 )
1422
724k
    {
1423
724k
      m_pcTrQuant->invTransformNxN(tu, codeCompId, codeResi, qpCbCr);
1424
724k
      codedCbfMask += ( codeCompId == COMP_Cb ? 2 : 1 );
1425
724k
    }
1426
893k
    else
1427
893k
    {
1428
893k
      codeResi.fill(0);
1429
893k
    }
1430
1431
1.61M
    if( tu.jointCbCr )
1432
243k
    {
1433
243k
      if( tu.jointCbCr == 3 && codedCbfMask == 2 )
1434
239k
      {
1435
239k
        codedCbfMask = 3;
1436
239k
        TU::setCbfAtDepth (tu, COMP_Cr, tu.depth, true );
1437
239k
      }
1438
243k
      if( tu.jointCbCr != codedCbfMask )
1439
2.67k
      {
1440
2.67k
        ruiDist = MAX_DISTORTION;
1441
2.67k
        return;
1442
2.67k
      }
1443
240k
      m_pcTrQuant->invTransformICT( tu, piResi, crResi );
1444
240k
      uiAbsSum = codedCbfMask;
1445
240k
    }
1446
1447
    //===== reconstruction =====
1448
1.61M
    if ( flag && uiAbsSum > 0 && cs.picHeader->lmcsChromaResidualScale )
1449
0
    {
1450
0
      piResi.scaleSignal(tu.chromaAdj, 0, slice.clpRngs[compID]);
1451
1452
0
      if( jointCbCr )
1453
0
      {
1454
0
        crResi.scaleSignal(tu.chromaAdj, 0, slice.clpRngs[COMP_Cr]);
1455
0
      }
1456
0
    }
1457
1458
1.61M
    if( jointCbCr )
1459
240k
    {
1460
240k
      crReco.reconstruct(crPred, crResi, cs.slice->clpRngs[ COMP_Cr ]);
1461
240k
    }
1462
1.61M
  }
1463
1.79M
  piReco.reconstruct(piPred, piResi, cs.slice->clpRngs[ compID ]);
1464
  
1465
1466
1467
  //===== update distortion =====
1468
1.79M
  const bool reshapeIntraCMD = m_pcEncCfg->m_reshapeSignalType == RESHAPE_SIGNAL_PQ;
1469
1.79M
  if(((cs.picHeader->lmcsEnabled && (reshapeData.getCTUFlag() || (isChroma(compID) && reshapeIntraCMD))) || m_pcEncCfg->m_lumaLevelToDeltaQPEnabled ) )
1470
0
  {
1471
0
    const CPelBuf orgLuma = cs.getOrgBuf( cs.area.blocks[COMP_Y] );
1472
0
    if( compID == COMP_Y && !m_pcEncCfg->m_lumaLevelToDeltaQPEnabled )
1473
0
    {
1474
0
      PelBuf tmpRecLuma = cs.getRspRecoBuf(area);
1475
0
      tmpRecLuma.rspSignal( piReco, reshapeData.getInvLUT());
1476
0
      ruiDist += m_pcRdCost->getDistPart(piOrg, tmpRecLuma, sps.bitDepths[toChannelType(compID)], compID, DF_SSE_WTD, &orgLuma);
1477
0
    }
1478
0
    else
1479
0
    {
1480
0
      ruiDist += m_pcRdCost->getDistPart( piOrg, piReco, bitDepth, compID, DF_SSE_WTD, &orgLuma );
1481
0
      if( jointCbCr )
1482
0
      {
1483
0
        CPelBuf         crOrg  = cs.getOrgBuf  ( COMP_Cr );
1484
0
        PelBuf          crReco = cs.getRecoBuf ( COMP_Cr );
1485
0
        ruiDist += m_pcRdCost->getDistPart( crOrg, crReco, bitDepth, COMP_Cr, DF_SSE_WTD, &orgLuma );
1486
0
      }
1487
0
    }
1488
0
  }
1489
1.79M
  else
1490
1.79M
  {
1491
1.79M
    ruiDist += m_pcRdCost->getDistPart( piOrg, piReco, bitDepth, compID, DF_SSE );
1492
1.79M
    if( jointCbCr )
1493
240k
    {
1494
240k
      CPelBuf         crOrg  = cs.getOrgBuf  ( COMP_Cr );
1495
240k
      PelBuf          crReco = cs.getRecoBuf ( COMP_Cr );
1496
240k
      ruiDist += m_pcRdCost->getDistPart( crOrg, crReco, bitDepth, COMP_Cr, DF_SSE );
1497
240k
    }
1498
1.79M
  }
1499
1.79M
}
1500
1501
void IntraSearch::xIntraCodingLumaQT(CodingStructure& cs, Partitioner& partitioner, PelUnitBuf* predBuf, const double bestCostSoFar, int numMode, bool disableMTS)
1502
106k
{
1503
106k
  PROFILER_SCOPE_AND_STAGE_EXT( 0, _TPROF, P_INTRA_RD_SEARCH_LUMA, &cs, partitioner.chType );
1504
106k
  const UnitArea& currArea  = partitioner.currArea();
1505
106k
  uint32_t        currDepth = partitioner.currTrDepth;
1506
106k
  Distortion singleDistLuma = 0;
1507
106k
  uint32_t   numSig         = 0;
1508
106k
  const SPS &sps            = *cs.sps;
1509
106k
  CodingUnit &cu            = *cs.cus[0];
1510
106k
  bool mtsAllowed = (numMode < 0) || disableMTS ? false : CU::isMTSAllowed(cu, COMP_Y);
1511
106k
  uint64_t singleFracBits   = 0;
1512
106k
  bool   splitCbfLumaSum    = false;
1513
106k
  double bestCostForISP     = bestCostSoFar;
1514
106k
  double dSingleCost        = MAX_DOUBLE;
1515
106k
  int endLfnstIdx           = (partitioner.isSepTree(cs) && partitioner.chType == CH_C && (currArea.lwidth() < 8 || currArea.lheight() < 8))
1516
106k
                           || (currArea.lwidth() > sps.getMaxTbSize() || currArea.lheight() > sps.getMaxTbSize()) || !sps.LFNST || (numMode < 0) ? 0 : 2;
1517
106k
  const bool useTS          = cs.picture->useTS;
1518
106k
  numMode                   = (numMode < 0) ? -numMode : numMode;
1519
1520
106k
  if (cu.mipFlag && !allowLfnstWithMip(cu.lumaSize()))
1521
1.89k
  {
1522
1.89k
    endLfnstIdx = 0;
1523
1.89k
  }
1524
106k
  int bestMTS = 0;
1525
106k
  int EndMTS  = mtsAllowed ? m_pcEncCfg->m_MTSIntraMaxCand : 0;
1526
106k
  if (cu.ispMode && (EndMTS || endLfnstIdx))
1527
4.96k
  {
1528
4.96k
    EndMTS = 0;
1529
4.96k
    if ((m_ispTestedModes[1].numTotalParts[cu.ispMode - 1] == 0)
1530
299
     && (m_ispTestedModes[2].numTotalParts[cu.ispMode - 1] == 0))
1531
299
    {
1532
299
      endLfnstIdx = 0;
1533
299
    }
1534
4.96k
  }
1535
106k
  if (cu.bdpcmM[CH_L])
1536
7.00k
  {
1537
7.00k
    endLfnstIdx = 0;
1538
7.00k
    EndMTS = 0;
1539
7.00k
  }
1540
106k
  bool checkTransformSkip = sps.transformSkip;
1541
1542
106k
  SizeType transformSkipMaxSize = 1 << sps.log2MaxTransformSkipBlockSize;
1543
106k
  bool tsAllowed = useTS  && cu.cs->sps->transformSkip && (!cu.ispMode) && (!cu.bdpcmM[CH_L]) && (!cu.sbtInfo);
1544
106k
  tsAllowed &= cu.blocks[COMP_Y].width <= transformSkipMaxSize && cu.blocks[COMP_Y].height <= transformSkipMaxSize;
1545
106k
  if (tsAllowed)
1546
14.0k
  {
1547
14.0k
    EndMTS += 1;
1548
14.0k
  }
1549
106k
  if (endLfnstIdx || EndMTS)
1550
43.2k
  {
1551
43.2k
    bool       splitCbfLuma  = false;
1552
43.2k
    const PartSplit ispType  = CU::getISPType(cu, COMP_Y);
1553
43.2k
    CUCtx cuCtx;
1554
43.2k
    cuCtx.isDQPCoded         = true;
1555
43.2k
    cuCtx.isChromaQpAdjCoded = true;
1556
43.2k
    cs.cost                  = 0.0;
1557
43.2k
    Distortion       singleDistTmpLuma = 0;
1558
43.2k
    uint64_t         singleTmpFracBits = 0;
1559
43.2k
    double           singleCostTmp     = 0;
1560
43.2k
    const TempCtx    ctxStart          (m_CtxCache, m_CABACEstimator->getCtx());
1561
43.2k
          TempCtx    ctxBest           (m_CtxCache);
1562
43.2k
    CodingStructure &saveCS            = *m_pSaveCS[cu.ispMode?0:1];
1563
43.2k
    TransformUnit *  tmpTU             = nullptr;
1564
43.2k
    int              bestLfnstIdx      = 0;
1565
43.2k
    int              startLfnstIdx     = 0;
1566
    // speedUps LFNST
1567
43.2k
    bool   rapidLFNST                  = false;
1568
43.2k
    bool   rapidDCT                    = false;
1569
43.2k
    double thresholdDCT                = 1;
1570
1571
43.2k
    if (m_pcEncCfg->m_MTS == 2)
1572
0
    {
1573
0
      thresholdDCT += 1.4 / sqrt(cu.lwidth() * cu.lheight());
1574
0
    }
1575
1576
43.2k
    if (m_pcEncCfg->m_LFNST > 1)
1577
0
    {
1578
0
      rapidLFNST = true;
1579
1580
0
      if (m_pcEncCfg->m_LFNST > 2)
1581
0
      {
1582
0
        rapidDCT    = true;
1583
0
        endLfnstIdx = endLfnstIdx ? 1 : 0;
1584
0
      }
1585
0
    }
1586
1587
43.2k
    saveCS.pcv              = cs.pcv;
1588
43.2k
    saveCS.picture          = cs.picture;
1589
43.2k
    saveCS.area.repositionTo( cs.area);
1590
1591
43.2k
    if (cu.ispMode)
1592
4.66k
    {
1593
4.66k
      partitioner.splitCurrArea(ispType, cs);
1594
4.66k
    }
1595
1596
43.2k
    TransformUnit& tu = cs.addTU(CS::getArea(cs, partitioner.currArea(), partitioner.chType, partitioner.treeType), partitioner.chType, cs.cus[0]);
1597
1598
43.2k
    if (cu.ispMode)
1599
4.66k
    {
1600
4.66k
      saveCS.clearTUs();
1601
4.66k
      do
1602
18.6k
      {
1603
18.6k
        saveCS.addTU(
1604
18.6k
          CS::getArea(cs, partitioner.currArea(), partitioner.chType, partitioner.treeType),
1605
18.6k
          partitioner.chType, cs.cus[0]);
1606
18.6k
      } while (partitioner.nextPart(cs));
1607
1608
4.66k
      partitioner.exitCurrSplit();
1609
4.66k
    }
1610
38.5k
    else
1611
38.5k
    {
1612
38.5k
      tmpTU = saveCS.tus.empty() ? &saveCS.addTU( currArea, partitioner.chType, nullptr ) : saveCS.tus.front();
1613
38.5k
      tmpTU->initData();
1614
38.5k
      tmpTU->UnitArea::operator=( currArea );
1615
38.5k
    }
1616
1617
1618
43.2k
    std::vector<TrMode> trModes{ TrMode(0, true) };
1619
43.2k
    if (tsAllowed)
1620
14.0k
    {
1621
14.0k
      trModes.push_back(TrMode(1, true));
1622
14.0k
    }
1623
43.2k
    double dct2Cost           = MAX_DOUBLE;
1624
43.2k
    double trGrpStopThreshold = 1.001;
1625
43.2k
    double trGrpBestCost      = MAX_DOUBLE;
1626
1627
43.2k
    if (mtsAllowed)
1628
0
    {
1629
0
      if (m_pcEncCfg->m_LFNST)
1630
0
      {
1631
0
        uint32_t uiIntraMode = cs.cus[0]->intraDir[partitioner.chType];
1632
0
        int MTScur           = (uiIntraMode < 34) ? MTS_DST7_DCT8 : MTS_DCT8_DST7;
1633
1634
0
        trModes.push_back(TrMode(     2, true));
1635
0
        trModes.push_back(TrMode(MTScur, true));
1636
1637
0
        MTScur = (uiIntraMode < 34) ? MTS_DCT8_DST7 : MTS_DST7_DCT8;
1638
1639
0
        trModes.push_back(TrMode(MTScur,            true));
1640
0
        trModes.push_back(TrMode(MTS_DST7_DST7 + 3, true));
1641
0
      }
1642
0
      else
1643
0
      {
1644
0
        for (int i = 2; i < 6; i++)
1645
0
        {
1646
0
          trModes.push_back(TrMode(i, true));
1647
0
        }
1648
0
      }
1649
0
    }
1650
1651
43.2k
    if ((EndMTS && !m_pcEncCfg->m_LFNST) || (tsAllowed && !mtsAllowed))
1652
14.0k
    {
1653
14.0k
      xPreCheckMTS(tu, &trModes, m_pcEncCfg->m_MTSIntraMaxCand, predBuf);
1654
14.0k
      if (!mtsAllowed && !trModes[1].second)
1655
2.81k
      {
1656
2.81k
        EndMTS = 0;
1657
2.81k
      }
1658
14.0k
    }
1659
1660
43.2k
    bool NStopMTS = true;
1661
1662
86.4k
    for (int modeId = 0; modeId <= EndMTS && NStopMTS; modeId++)
1663
43.2k
    {
1664
43.2k
      if (modeId > 1)
1665
0
      {
1666
0
        trGrpBestCost = MAX_DOUBLE;
1667
0
      }
1668
152k
      for (int lfnstIdx = startLfnstIdx; lfnstIdx <= endLfnstIdx; lfnstIdx++)
1669
109k
      {
1670
109k
        if (lfnstIdx && modeId)
1671
0
        {
1672
0
          continue;
1673
0
        }
1674
109k
        if (mtsAllowed || tsAllowed)
1675
22.0k
        {
1676
22.0k
          if (m_pcEncCfg->m_TS && bestMTS == MTS_SKIP)
1677
0
          {
1678
0
            break;
1679
0
          }
1680
22.0k
          if (!m_pcEncCfg->m_LFNST && !trModes[modeId].second && mtsAllowed)
1681
0
          {
1682
0
            continue;
1683
0
          }
1684
1685
22.0k
          tu.mtsIdx[COMP_Y] = trModes[modeId].first;
1686
22.0k
        }
1687
1688
109k
        if (cu.ispMode && lfnstIdx)
1689
9.33k
        {
1690
9.33k
          if (m_ispTestedModes[lfnstIdx].numTotalParts[cu.ispMode - 1] == 0)
1691
0
          {
1692
0
            if (lfnstIdx == 2)
1693
0
            {
1694
0
              endLfnstIdx = 1;
1695
0
            }
1696
0
            continue;
1697
0
          }
1698
9.33k
        }
1699
1700
109k
        cu.lfnstIdx                          = lfnstIdx;
1701
109k
        cuCtx.lfnstLastScanPos               = false;
1702
109k
        cuCtx.violatesLfnstConstrained[CH_L] = false;
1703
109k
        cuCtx.violatesLfnstConstrained[CH_C] = false;
1704
1705
109k
        if ((lfnstIdx != startLfnstIdx) || (modeId))
1706
66.3k
        {
1707
66.3k
          m_CABACEstimator->getCtx() = ctxStart;
1708
66.3k
        }
1709
1710
109k
        singleDistTmpLuma = 0;
1711
1712
109k
        if (cu.ispMode)
1713
14.0k
        {
1714
14.0k
          splitCbfLuma = false;
1715
1716
14.0k
          partitioner.splitCurrArea(ispType, cs);
1717
1718
14.0k
          singleCostTmp = xTestISP(cs, partitioner, bestCostForISP, ispType, splitCbfLuma, singleTmpFracBits, singleDistTmpLuma, cuCtx);
1719
1720
14.0k
          partitioner.exitCurrSplit();
1721
1722
14.0k
          if (modeId && (singleCostTmp == MAX_DOUBLE))
1723
0
          {
1724
0
            m_ispTestedModes[lfnstIdx].numTotalParts[cu.ispMode - 1] = 0;
1725
0
          }
1726
1727
14.0k
          bool storeCost = (numMode == 1) ? true : false;
1728
1729
14.0k
          if ((m_pcEncCfg->m_ISP >= 2) && (numMode <= 1))
1730
14.0k
          {
1731
14.0k
            storeCost = true;
1732
14.0k
          }
1733
1734
14.0k
          if (storeCost)
1735
14.0k
          {
1736
14.0k
            m_ispTestedModes[0].bestCost[cu.ispMode - 1] = singleCostTmp;
1737
14.0k
          }
1738
14.0k
        }
1739
95.5k
        else
1740
95.5k
        {
1741
95.5k
          bool TrLoad = (EndMTS && !m_pcEncCfg->m_LFNST) || (tsAllowed && !mtsAllowed && (lfnstIdx == 0)) ? true : false;
1742
1743
95.5k
          xIntraCodingTUBlock(tu, COMP_Y, false, singleDistTmpLuma, &numSig, predBuf, TrLoad);
1744
1745
95.5k
          cuCtx.mtsLastScanPos = false;
1746
          //----- determine rate and r-d cost -----
1747
18.4E
        if ((sps.LFNST ? (modeId == EndMTS && modeId != 0 && checkTransformSkip) : (trModes[modeId].first != 0)) && !TU::getCbfAtDepth(tu, COMP_Y, currDepth))
1748
0
        {
1749
0
          singleCostTmp = MAX_DOUBLE;
1750
0
        }
1751
95.5k
        else
1752
95.5k
        {
1753
95.5k
          m_ispTestedModes[0].IspType      = TU_NO_ISP;
1754
95.5k
          m_ispTestedModes[0].subTuCounter = -1;
1755
95.5k
          singleTmpFracBits = xGetIntraFracBitsQT(cs, partitioner, true, &cuCtx);
1756
1757
95.5k
          if (tu.mtsIdx[COMP_Y] > MTS_SKIP)
1758
0
          {
1759
0
            if (!cuCtx.mtsLastScanPos)
1760
0
            {
1761
0
              singleCostTmp = MAX_DOUBLE;
1762
0
            }
1763
0
            else
1764
0
            {
1765
0
              singleCostTmp = m_pcRdCost->calcRdCost(singleTmpFracBits, singleDistTmpLuma);
1766
0
            }
1767
0
          }
1768
95.5k
          else
1769
95.5k
          {
1770
95.5k
            singleCostTmp = m_pcRdCost->calcRdCost(singleTmpFracBits, singleDistTmpLuma);
1771
95.5k
          }
1772
95.5k
        }
1773
1774
95.5k
          if (((EndMTS && (m_pcEncCfg->m_MTS == 2)) || rapidLFNST) && modeId == 0 && lfnstIdx == 0)
1775
0
          {
1776
0
            if (singleCostTmp > bestCostSoFar * thresholdDCT)
1777
0
            {
1778
0
              EndMTS = 0;
1779
1780
0
              if (rapidDCT)
1781
0
              {
1782
0
                endLfnstIdx = 0;   // break the loop but do not cpy best
1783
0
              }
1784
0
            }
1785
0
          }
1786
1787
95.5k
          if (lfnstIdx && !cuCtx.lfnstLastScanPos && !cu.ispMode)
1788
47.7k
          {
1789
47.7k
            bool rootCbfL = false;
1790
1791
190k
            for (uint32_t t = 0; t < getNumberValidTBlocks(*cu.cs->pcv); t++)
1792
143k
            {
1793
143k
              rootCbfL |= tu.cbf[t] != 0;
1794
143k
            }
1795
1796
47.7k
            if (rapidLFNST && !rootCbfL)
1797
0
            {
1798
0
              endLfnstIdx = lfnstIdx; // break the loop
1799
0
            }
1800
47.7k
            bool cbfAtZeroDepth = CU::isSepTree(cu)
1801
47.7k
              ? rootCbfL
1802
47.7k
              : (cs.area.chromaFormat != CHROMA_400 && std::min(cu.firstTU->blocks[1].width, cu.firstTU->blocks[1].height) < 4)
1803
0
                ? TU::getCbfAtDepth(tu, COMP_Y, currDepth)
1804
0
                : rootCbfL;
1805
1806
47.7k
            if (cbfAtZeroDepth)
1807
362
            {
1808
362
              singleCostTmp = MAX_DOUBLE;
1809
362
            }
1810
47.7k
          }
1811
95.5k
        }
1812
1813
109k
        if (singleCostTmp < dSingleCost)
1814
39.8k
        {
1815
39.8k
          trGrpBestCost  = singleCostTmp;
1816
39.8k
          dSingleCost    = singleCostTmp;
1817
39.8k
          singleDistLuma = singleDistTmpLuma;
1818
39.8k
          singleFracBits = singleTmpFracBits;
1819
39.8k
          bestLfnstIdx   = lfnstIdx;
1820
39.8k
          bestMTS        = modeId;
1821
1822
39.8k
          if (dSingleCost < bestCostForISP)
1823
24.9k
          {
1824
24.9k
            bestCostForISP = dSingleCost;
1825
24.9k
          }
1826
1827
39.8k
          splitCbfLumaSum = splitCbfLuma;
1828
1829
39.8k
          if (lfnstIdx == 0 && modeId == 0 && cu.ispMode == 0)
1830
38.5k
          {
1831
38.5k
            dct2Cost = singleCostTmp;
1832
1833
38.5k
            if (!TU::getCbfAtDepth(tu, COMP_Y, currDepth))
1834
32.6k
            {
1835
32.6k
              if (rapidLFNST)
1836
0
              {
1837
0
                 endLfnstIdx = 0;   // break the loop but do not cpy best
1838
0
              }
1839
1840
32.6k
              EndMTS = 0;
1841
32.6k
            }
1842
38.5k
          }
1843
1844
39.8k
          if (bestLfnstIdx != endLfnstIdx || bestMTS != EndMTS)
1845
29.6k
          {
1846
29.6k
            if (cu.ispMode)
1847
1.01k
            {
1848
1.01k
              saveCS.getRecoBuf(currArea.Y()).copyFrom(cs.getRecoBuf(currArea.Y()));
1849
1850
5.06k
              for (uint32_t j = 0; j < cs.tus.size(); j++)
1851
4.04k
              {
1852
4.04k
                saveCS.tus[j]->copyComponentFrom(*cs.tus[j], COMP_Y);
1853
4.04k
              }
1854
1.01k
            }
1855
28.6k
            else
1856
28.6k
            {
1857
28.6k
              saveCS.getPredBuf(tu.Y()).copyFrom(cs.getPredBuf(tu.Y()));
1858
28.6k
              saveCS.getRecoBuf(tu.Y()).copyFrom(cs.getRecoBuf(tu.Y()));
1859
1860
28.6k
              tmpTU->copyComponentFrom(tu, COMP_Y);
1861
28.6k
            }
1862
1863
29.6k
            ctxBest = m_CABACEstimator->getCtx();
1864
29.6k
          }
1865
      
1866
39.8k
        }
1867
69.6k
        else
1868
69.6k
        {
1869
69.6k
          if( rapidLFNST )
1870
0
          {
1871
0
            endLfnstIdx = lfnstIdx; // break the loop
1872
0
          }
1873
69.6k
        }
1874
109k
      }
1875
43.2k
      if (m_pcEncCfg->m_LFNST && m_pcEncCfg->m_MTS == 2 && modeId && modeId != EndMTS)
1876
0
      {
1877
0
        NStopMTS = false;
1878
1879
0
        if (bestMTS || bestLfnstIdx)
1880
0
        {
1881
0
          if ((modeId > 1 && bestMTS == modeId) || modeId == 1)
1882
0
          {
1883
0
            NStopMTS = (dct2Cost / trGrpBestCost) < trGrpStopThreshold;
1884
0
          }
1885
0
        }
1886
0
      }
1887
43.2k
    }
1888
1889
43.2k
    cu.lfnstIdx = bestLfnstIdx;
1890
43.2k
    if (dSingleCost != MAX_DOUBLE)
1891
39.4k
    {
1892
39.4k
      if (bestLfnstIdx != endLfnstIdx || bestMTS != EndMTS)
1893
29.2k
      {
1894
29.2k
        if (cu.ispMode)
1895
722
        {
1896
722
          const UnitArea& currArea = partitioner.currArea();
1897
722
          cs.getRecoBuf(currArea.Y()).copyFrom(saveCS.getRecoBuf(currArea.Y()));
1898
1899
722
          if (saveCS.tus.size() != cs.tus.size())
1900
0
          {
1901
0
            partitioner.splitCurrArea(ispType, cs);
1902
1903
0
            do
1904
0
            {
1905
0
              partitioner.nextPart(cs);
1906
0
              cs.addTU(CS::getArea(cs, partitioner.currArea(), partitioner.chType, partitioner.treeType),
1907
0
                partitioner.chType, cs.cus[0]);
1908
0
            } while (saveCS.tus.size() != cs.tus.size());
1909
1910
0
            partitioner.exitCurrSplit();
1911
0
          }
1912
1913
3.61k
          for (uint32_t j = 0; j < saveCS.tus.size(); j++)
1914
2.88k
          {
1915
2.88k
            cs.tus[j]->copyComponentFrom(*saveCS.tus[j], COMP_Y);
1916
2.88k
          }
1917
722
        }
1918
28.5k
        else
1919
28.5k
        {
1920
28.5k
          cs.getRecoBuf(tu.Y()).copyFrom(saveCS.getRecoBuf(tu.Y()));
1921
1922
28.5k
          tu.copyComponentFrom(*tmpTU, COMP_Y);
1923
28.5k
        }
1924
1925
29.2k
        m_CABACEstimator->getCtx() = ctxBest;
1926
29.2k
      }
1927
1928
      // otherwise this would've happened in useSubStructure
1929
39.4k
      cs.picture->getRecoBuf(currArea.Y()).copyFrom(cs.getRecoBuf(currArea.Y()));
1930
39.4k
    }
1931
43.2k
  }
1932
63.3k
  else
1933
63.3k
  {
1934
63.3k
    if (cu.ispMode)
1935
299
    {
1936
299
      const PartSplit ispType = CU::getISPType(cu, COMP_Y);
1937
299
      partitioner.splitCurrArea(ispType, cs);
1938
1939
299
      CUCtx      cuCtx;
1940
299
      dSingleCost = xTestISP(cs, partitioner, bestCostForISP, ispType, splitCbfLumaSum, singleFracBits, singleDistLuma, cuCtx);
1941
299
      partitioner.exitCurrSplit();
1942
299
      bool storeCost = (numMode == 1) ? true : false;
1943
299
      if ((m_pcEncCfg->m_ISP >= 2) && (numMode <= 1))
1944
299
      {
1945
299
        storeCost = true;
1946
299
      }
1947
299
      if (storeCost)
1948
299
      {
1949
299
        m_ispTestedModes[0].bestCost[cu.ispMode - 1] = dSingleCost;
1950
299
      }
1951
299
    }
1952
63.0k
    else
1953
63.0k
    {
1954
63.0k
      TransformUnit& tu =
1955
63.0k
        cs.addTU(CS::getArea(cs, currArea, partitioner.chType, partitioner.treeType), partitioner.chType, cs.cus[0]);
1956
63.0k
      tu.depth = currDepth;
1957
1958
63.0k
      CHECK(!tu.Y().valid(), "Invalid TU");
1959
63.0k
      xIntraCodingTUBlock(tu, COMP_Y, false, singleDistLuma, &numSig, predBuf);
1960
      //----- determine rate and r-d cost -----
1961
63.0k
      m_ispTestedModes[0].IspType = TU_NO_ISP;
1962
63.0k
      m_ispTestedModes[0].subTuCounter = -1;
1963
63.0k
      singleFracBits = xGetIntraFracBitsQT(cs, partitioner, true);
1964
63.0k
      dSingleCost = m_pcRdCost->calcRdCost(singleFracBits, singleDistLuma);
1965
63.0k
    }
1966
63.3k
  }
1967
1968
106k
  if (cu.ispMode)
1969
4.96k
  { 
1970
4.96k
    for (auto& ptu : cs.tus)
1971
8.17k
    {
1972
8.17k
      if (currArea.Y().contains(ptu->Y()))
1973
8.17k
      {
1974
8.17k
        TU::setCbfAtDepth(*ptu, COMP_Y, currDepth, splitCbfLumaSum ? 1 : 0);
1975
8.17k
      }
1976
8.17k
    }
1977
4.96k
  }
1978
106k
  cs.dist     += singleDistLuma;
1979
106k
  cs.fracBits += singleFracBits;
1980
106k
  cs.cost      = dSingleCost;
1981
1982
106k
  STAT_COUNT_CU_MODES( partitioner.chType == CH_L, g_cuCounters1D[CU_RD_TESTS][0][!cs.slice->isIntra() + cs.slice->depth] );
1983
106k
  STAT_COUNT_CU_MODES( partitioner.chType == CH_L && !cs.slice->isIntra(), g_cuCounters2D[CU_RD_TESTS][Log2( cs.area.lheight() )][Log2( cs.area.lwidth() )] );
1984
106k
}
1985
1986
ChromaCbfs IntraSearch::xIntraChromaCodingQT(CodingStructure& cs, Partitioner& partitioner)
1987
258k
{
1988
258k
  UnitArea    currArea      = partitioner.currArea();
1989
1990
258k
  if( !currArea.Cb().valid() ) 
1991
0
    return ChromaCbfs(false);
1992
1993
258k
  TransformUnit& currTU     = *cs.getTU( currArea.chromaPos(), CH_C );
1994
258k
  const CodingUnit& cu  = *cs.getCU( currArea.chromaPos(), CH_C, TREE_D );
1995
258k
  ChromaCbfs cbfs(false);
1996
258k
  uint32_t   currDepth = partitioner.currTrDepth;
1997
258k
  const bool useTS = cs.picture->useTS;
1998
258k
  if (currDepth == currTU.depth)
1999
258k
  {
2000
258k
    if (!currArea.Cb().valid() || !currArea.Cr().valid())
2001
0
    {
2002
0
      return cbfs;
2003
0
    }
2004
2005
258k
    CodingStructure& saveCS = *m_pSaveCS[1];
2006
258k
    saveCS.pcv = cs.pcv;
2007
258k
    saveCS.picture = cs.picture;
2008
258k
    saveCS.area.repositionTo(cs.area);
2009
2010
258k
    TransformUnit& tmpTU = saveCS.tus.empty() ? saveCS.addTU(currArea, partitioner.chType, nullptr) : *saveCS.tus.front();
2011
258k
    tmpTU.initData();
2012
258k
    tmpTU.UnitArea::operator=(currArea);
2013
258k
    const unsigned      numTBlocks = getNumberValidTBlocks(*cs.pcv);
2014
2015
258k
    CompArea& cbArea = currTU.blocks[COMP_Cb];
2016
258k
    CompArea& crArea = currTU.blocks[COMP_Cr];
2017
258k
    double     bestCostCb = MAX_DOUBLE;
2018
258k
    double     bestCostCr = MAX_DOUBLE;
2019
258k
    Distortion bestDistCb = 0;
2020
258k
    Distortion bestDistCr = 0;
2021
2022
258k
    TempCtx ctxStartTU(m_CtxCache);
2023
258k
    TempCtx ctxStart(m_CtxCache);
2024
258k
    TempCtx ctxBest(m_CtxCache);
2025
2026
258k
    ctxStartTU = m_CABACEstimator->getCtx();
2027
258k
    ctxStart = m_CABACEstimator->getCtx();
2028
258k
    currTU.jointCbCr = 0;
2029
2030
    // Do predictions here to avoid repeating the "default0Save1Load2" stuff
2031
258k
    int  predMode = cu.bdpcmM[CH_C] ? BDPCM_IDX : CU::getFinalIntraMode(cu, CH_C);
2032
2033
258k
    PelBuf piPredCb = cs.getPredBuf(COMP_Cb);
2034
258k
    PelBuf piPredCr = cs.getPredBuf(COMP_Cr);
2035
2036
258k
    initIntraPatternChType(*currTU.cu, cbArea);
2037
258k
    initIntraPatternChType(*currTU.cu, crArea);
2038
2039
258k
    if (CU::isLMCMode(predMode))
2040
18.9k
    {
2041
18.9k
      loadLMLumaRecPels(cu, cbArea);
2042
18.9k
      predIntraChromaLM(COMP_Cb, piPredCb, cu, cbArea, predMode);
2043
18.9k
      predIntraChromaLM(COMP_Cr, piPredCr, cu, crArea, predMode);
2044
18.9k
    }
2045
239k
    else
2046
239k
    {
2047
239k
      predIntraAng(COMP_Cb, piPredCb, cu);
2048
239k
      predIntraAng(COMP_Cr, piPredCr, cu);
2049
239k
    }
2050
2051
    // determination of chroma residuals including reshaping and cross-component prediction
2052
    //----- get chroma residuals -----
2053
258k
    PelBuf resiCb = cs.getResiBuf(COMP_Cb);
2054
258k
    PelBuf resiCr = cs.getResiBuf(COMP_Cr);
2055
258k
    resiCb.subtract(cs.getOrgBuf(COMP_Cb), piPredCb);
2056
258k
    resiCr.subtract(cs.getOrgBuf(COMP_Cr), piPredCr);
2057
2058
    //----- get reshape parameter ----
2059
258k
    ReshapeData& reshapeData = cs.picture->reshapeData;
2060
258k
    bool doReshaping = (cs.picHeader->lmcsEnabled && cs.picHeader->lmcsChromaResidualScale && (cs.slice->isIntra() || reshapeData.getCTUFlag()) && (cbArea.width * cbArea.height > 4));
2061
258k
    if (doReshaping)
2062
0
    {
2063
0
      const Area area = currTU.Y().valid() ? currTU.Y() : Area(recalcPosition(currTU.chromaFormat, currTU.chType, CH_L, currTU.blocks[currTU.chType].pos()), recalcSize(currTU.chromaFormat, currTU.chType, CH_L, currTU.blocks[currTU.chType].size()));
2064
0
      const CompArea& areaY = CompArea(COMP_Y, currTU.chromaFormat, area);
2065
0
      currTU.chromaAdj = reshapeData.calculateChromaAdjVpduNei(currTU, areaY, currTU.cu->treeType);
2066
0
    }
2067
2068
    //===== store original residual signals (std and crossCompPred) =====
2069
1.55M
    for( int k = 0; k < 5; k++ )
2070
1.29M
    {
2071
1.29M
      m_orgResiCb[k].compactResize( cbArea );
2072
1.29M
      m_orgResiCr[k].compactResize( crArea );
2073
1.29M
    }
2074
517k
    for (int k = 0; k < 1; k += 4)
2075
258k
    {
2076
258k
      m_orgResiCb[k].copyFrom(resiCb);
2077
258k
      m_orgResiCr[k].copyFrom(resiCr);
2078
2079
258k
      if (doReshaping)
2080
0
      {
2081
0
        int cResScaleInv = currTU.chromaAdj;
2082
0
        m_orgResiCb[k].scaleSignal(cResScaleInv, 1, cs.slice->clpRngs[COMP_Cb]);
2083
0
        m_orgResiCr[k].scaleSignal(cResScaleInv, 1, cs.slice->clpRngs[COMP_Cr]);
2084
0
      }
2085
258k
    }
2086
2087
258k
    CUCtx cuCtx;
2088
258k
    cuCtx.isDQPCoded = true;
2089
258k
    cuCtx.isChromaQpAdjCoded = true;
2090
258k
    cuCtx.lfnstLastScanPos = false;
2091
2092
258k
    CodingStructure& saveCScur = *m_pSaveCS[2];
2093
2094
258k
    saveCScur.pcv = cs.pcv;
2095
258k
    saveCScur.picture = cs.picture;
2096
258k
    saveCScur.area.repositionTo(cs.area);
2097
2098
258k
    TransformUnit& tmpTUcur = saveCScur.tus.empty() ? saveCScur.addTU(currArea, partitioner.chType, nullptr) : *saveCScur.tus.front();
2099
258k
    tmpTUcur.initData();
2100
258k
    tmpTUcur.UnitArea::operator=(currArea);
2101
2102
258k
    TempCtx ctxBestTUL(m_CtxCache);
2103
2104
258k
    const SPS& sps = *cs.sps;
2105
258k
    double     bestCostCbcur = MAX_DOUBLE;
2106
258k
    double     bestCostCrcur = MAX_DOUBLE;
2107
258k
    Distortion bestDistCbcur = 0;
2108
258k
    Distortion bestDistCrcur = 0;
2109
2110
258k
    int  endLfnstIdx = (partitioner.isSepTree(cs) && partitioner.chType == CH_C && (partitioner.currArea().lwidth() < 8 || partitioner.currArea().lheight() < 8))
2111
247k
      || (partitioner.currArea().lwidth() > sps.getMaxTbSize() || partitioner.currArea().lheight() > sps.getMaxTbSize()) || !sps.LFNST ? 0 : 2;
2112
258k
    int  startLfnstIdx = 0;
2113
258k
    int  bestLfnstIdx = 0;
2114
258k
    bool testLFNST = sps.LFNST;
2115
2116
    // speedUps LFNST
2117
258k
    bool rapidLFNST = false;
2118
258k
    if (m_pcEncCfg->m_LFNST > 1)
2119
0
    {
2120
0
      rapidLFNST = true;
2121
0
      if (m_pcEncCfg->m_LFNST > 2)
2122
0
      {
2123
0
        endLfnstIdx = endLfnstIdx ? 1 : 0;
2124
0
      }
2125
0
    }
2126
258k
    int ts_used = 0;
2127
258k
    bool testTS = false;
2128
258k
    if (partitioner.chType != CH_C)
2129
0
    {
2130
0
      startLfnstIdx = currTU.cu->lfnstIdx;
2131
0
      endLfnstIdx = currTU.cu->lfnstIdx;
2132
0
      bestLfnstIdx = currTU.cu->lfnstIdx;
2133
0
      testLFNST  = false;
2134
0
      rapidLFNST = false;
2135
0
      ts_used = currTU.mtsIdx[COMP_Y];
2136
0
    }
2137
258k
    if (cu.bdpcmM[CH_C])
2138
34.9k
    {
2139
34.9k
      endLfnstIdx = 0;
2140
34.9k
      testLFNST = false;
2141
34.9k
    }
2142
2143
258k
    double dSingleCostAll = MAX_DOUBLE;
2144
258k
    double singleCostTmpAll = 0;
2145
2146
945k
    for (int lfnstIdx = startLfnstIdx; lfnstIdx <= endLfnstIdx; lfnstIdx++)
2147
687k
    {
2148
687k
      if (rapidLFNST && lfnstIdx)
2149
0
      {
2150
0
        if ((lfnstIdx == 2) && (bestLfnstIdx == 0))
2151
0
        {
2152
0
          continue;
2153
0
        }
2154
0
      }
2155
2156
687k
      currTU.cu->lfnstIdx = lfnstIdx;
2157
687k
      if (lfnstIdx)
2158
428k
      {
2159
428k
        m_CABACEstimator->getCtx() = ctxStartTU;
2160
428k
      }
2161
2162
687k
      cuCtx.lfnstLastScanPos = false;
2163
687k
      cuCtx.violatesLfnstConstrained[CH_L] = false;
2164
687k
      cuCtx.violatesLfnstConstrained[CH_C] = false;
2165
2166
2.06M
      for (uint32_t c = COMP_Cb; c < numTBlocks; c++)
2167
1.37M
      {
2168
1.37M
        const ComponentID compID = ComponentID(c);
2169
1.37M
        const CompArea& area = currTU.blocks[compID];
2170
1.37M
        double     dSingleCost = MAX_DOUBLE;
2171
1.37M
        Distortion singleDistCTmp = 0;
2172
1.37M
        double     singleCostTmp = 0;
2173
1.37M
        bool tsAllowed = useTS && TU::isTSAllowed(currTU, compID) && m_pcEncCfg->m_useChromaTS && !currTU.cu->lfnstIdx && !cu.bdpcmM[CH_C];
2174
1.37M
        if ((partitioner.chType == CH_L) && (!ts_used))
2175
0
        {
2176
0
          tsAllowed = false;
2177
0
        }
2178
1.37M
        uint8_t nNumTransformCands = 1 + (tsAllowed ? 1 : 0); // DCT + TS = 2 tests       
2179
1.37M
        std::vector<TrMode> trModes;
2180
1.37M
        if (nNumTransformCands > 1)
2181
0
        {
2182
0
          trModes.push_back(TrMode(0, true));   // DCT2
2183
0
          trModes.push_back(TrMode(1, true));   // TS
2184
0
          testTS = true;
2185
0
        }
2186
1.37M
        bool cbfDCT2 = true;
2187
1.37M
        const bool isLastMode = testLFNST || cs.sps->jointCbCr ||  tsAllowed ? false : true;
2188
1.37M
        int bestModeId = 0;
2189
1.37M
        ctxStart = m_CABACEstimator->getCtx();
2190
2.74M
        for (int modeId = 0; modeId < nNumTransformCands; modeId++)
2191
1.37M
        {
2192
1.37M
          if (doReshaping || lfnstIdx || modeId)
2193
856k
          {
2194
856k
            resiCb.copyFrom(m_orgResiCb[0]);
2195
856k
            resiCr.copyFrom(m_orgResiCr[0]);
2196
856k
          }
2197
1.37M
          if (modeId == 0)
2198
1.37M
          {
2199
1.37M
            if ( tsAllowed)
2200
0
            {
2201
0
              xPreCheckMTS(currTU, &trModes, m_pcEncCfg->m_MTSIntraMaxCand, 0, compID);
2202
0
            }
2203
1.37M
          }
2204
2205
1.37M
          currTU.mtsIdx[compID] = currTU.cu->bdpcmM[CH_C] ? MTS_SKIP : modeId;
2206
2207
1.37M
          if (modeId)
2208
0
          {
2209
0
            if (!cbfDCT2 && trModes[modeId].first == MTS_SKIP)
2210
0
            {
2211
0
              break;
2212
0
            }
2213
0
            m_CABACEstimator->getCtx() = ctxStart;
2214
0
          }
2215
1.37M
          singleDistCTmp = 0;
2216
1.37M
          if (tsAllowed)
2217
0
          {
2218
0
            xIntraCodingTUBlock(currTU, compID, false, singleDistCTmp, 0, 0, true);
2219
0
            if ((modeId == 0) && (!trModes[modeId + 1].second))
2220
0
            {
2221
0
              nNumTransformCands = 1;
2222
0
            }
2223
0
          }
2224
1.37M
          else
2225
1.37M
        {
2226
1.37M
          xIntraCodingTUBlock(currTU, compID, false, singleDistCTmp);
2227
1.37M
        }
2228
1.37M
        if (((currTU.mtsIdx[compID] == MTS_SKIP && !currTU.cu->bdpcmM[CH_C])
2229
0
          && !TU::getCbf(currTU, compID)))   // In order not to code TS flag when cbf is zero, the case for TS with
2230
                                             // cbf being zero is forbidden.
2231
0
        {
2232
0
          singleCostTmp = MAX_DOUBLE;
2233
0
        }
2234
1.37M
        else
2235
1.37M
        {
2236
1.37M
          uint64_t fracBitsTmp = xGetIntraFracBitsQTChroma(currTU, compID, &cuCtx);
2237
1.37M
          singleCostTmp = m_pcRdCost->calcRdCost(fracBitsTmp, singleDistCTmp);
2238
1.37M
        }
2239
2240
1.37M
        if (singleCostTmp < dSingleCost)
2241
1.37M
        {
2242
1.37M
          dSingleCost = singleCostTmp;
2243
2244
1.37M
          if (compID == COMP_Cb)
2245
687k
          {
2246
687k
            bestCostCb = singleCostTmp;
2247
687k
            bestDistCb = singleDistCTmp;
2248
687k
          }
2249
687k
          else
2250
687k
          {
2251
687k
            bestCostCr = singleCostTmp;
2252
687k
            bestDistCr = singleDistCTmp;
2253
687k
          }
2254
1.37M
          bestModeId = modeId;
2255
1.37M
          if (currTU.mtsIdx[compID] == MTS_DCT2_DCT2)
2256
1.30M
          {
2257
1.30M
            cbfDCT2 = TU::getCbfAtDepth(currTU, compID, currDepth);
2258
1.30M
          }
2259
1.37M
          if (!isLastMode)
2260
1.37M
          {
2261
1.37M
            saveCS.getRecoBuf(area).copyFrom(cs.getRecoBuf(area));
2262
1.37M
            tmpTU.copyComponentFrom(currTU, compID);
2263
1.37M
            ctxBest = m_CABACEstimator->getCtx();
2264
1.37M
          }
2265
1.37M
        }
2266
1.37M
        }
2267
1.37M
        if (testTS && ((c == COMP_Cb && bestModeId < (nNumTransformCands - 1)) ))
2268
0
        {
2269
0
          m_CABACEstimator->getCtx() = ctxBest;
2270
2271
0
          currTU.copyComponentFrom(tmpTU, COMP_Cb); // Cbf of Cb is needed to estimate cost for Cr Cbf
2272
0
        }
2273
1.37M
      }
2274
2275
687k
      singleCostTmpAll = bestCostCb + bestCostCr;
2276
2277
687k
      bool rootCbfL = false;
2278
687k
      if (testLFNST)
2279
652k
      {
2280
2.60M
        for (uint32_t t = 0; t < getNumberValidTBlocks(*cs.pcv); t++)
2281
1.95M
        {
2282
1.95M
          rootCbfL |= bool(tmpTU.cbf[t]);
2283
1.95M
        }
2284
652k
        if (rapidLFNST && !rootCbfL)
2285
0
        {
2286
0
          endLfnstIdx = lfnstIdx; // end this
2287
0
        }
2288
652k
      }
2289
2290
687k
      if (testLFNST && lfnstIdx && !cuCtx.lfnstLastScanPos)
2291
281k
      {
2292
281k
        bool cbfAtZeroDepth = CU::isSepTree(*currTU.cu)
2293
281k
          ? rootCbfL : (cs.area.chromaFormat != CHROMA_400
2294
0
            && std::min(tmpTU.blocks[1].width, tmpTU.blocks[1].height) < 4)
2295
1
          ? TU::getCbfAtDepth(currTU, COMP_Y, currTU.depth) : rootCbfL;
2296
281k
        if (cbfAtZeroDepth)
2297
1.23k
        {
2298
1.23k
          singleCostTmpAll = MAX_DOUBLE;
2299
1.23k
        }
2300
281k
      }
2301
687k
      if ((testLFNST || testTS) && (singleCostTmpAll < dSingleCostAll))
2302
223k
      {
2303
223k
        bestLfnstIdx = lfnstIdx;
2304
223k
        if ((lfnstIdx != endLfnstIdx) || testTS)
2305
214k
        {
2306
214k
          dSingleCostAll = singleCostTmpAll;
2307
2308
214k
          bestCostCbcur = bestCostCb;
2309
214k
          bestCostCrcur = bestCostCr;
2310
214k
          bestDistCbcur = bestDistCb;
2311
214k
          bestDistCrcur = bestDistCr;
2312
2313
214k
          saveCScur.getRecoBuf(cbArea).copyFrom(saveCS.getRecoBuf(cbArea));
2314
214k
          saveCScur.getRecoBuf(crArea).copyFrom(saveCS.getRecoBuf(crArea));
2315
2316
214k
          tmpTUcur.copyComponentFrom(tmpTU, COMP_Cb);
2317
214k
          tmpTUcur.copyComponentFrom(tmpTU, COMP_Cr);
2318
214k
        }
2319
223k
        ctxBestTUL = m_CABACEstimator->getCtx();
2320
223k
      }
2321
687k
    }
2322
258k
    if ((testLFNST && (bestLfnstIdx != endLfnstIdx)) || testTS)
2323
214k
    {
2324
214k
      bestCostCb = bestCostCbcur;
2325
214k
      bestCostCr = bestCostCrcur;
2326
214k
      bestDistCb = bestDistCbcur;
2327
214k
      bestDistCr = bestDistCrcur;
2328
214k
      currTU.cu->lfnstIdx = bestLfnstIdx;
2329
214k
      if (!cs.sps->jointCbCr)
2330
0
      {
2331
0
        cs.getRecoBuf(cbArea).copyFrom(saveCScur.getRecoBuf(cbArea));
2332
0
        cs.getRecoBuf(crArea).copyFrom(saveCScur.getRecoBuf(crArea));
2333
2334
0
        currTU.copyComponentFrom(tmpTUcur, COMP_Cb);
2335
0
        currTU.copyComponentFrom(tmpTUcur, COMP_Cr);
2336
2337
0
        m_CABACEstimator->getCtx() = ctxBestTUL;
2338
0
      }
2339
214k
    }
2340
2341
258k
    Distortion bestDistCbCr = bestDistCb + bestDistCr;
2342
2343
258k
    if (cs.sps->jointCbCr)
2344
258k
    {
2345
258k
      if ((testLFNST && (bestLfnstIdx != endLfnstIdx)) || testTS)
2346
214k
      {
2347
214k
        saveCS.getRecoBuf(cbArea).copyFrom(saveCScur.getRecoBuf(cbArea));
2348
214k
        saveCS.getRecoBuf(crArea).copyFrom(saveCScur.getRecoBuf(crArea));
2349
2350
214k
        tmpTU.copyComponentFrom(tmpTUcur, COMP_Cb);
2351
214k
        tmpTU.copyComponentFrom(tmpTUcur, COMP_Cr);
2352
214k
        m_CABACEstimator->getCtx() = ctxBestTUL;
2353
214k
        ctxBest = m_CABACEstimator->getCtx();
2354
214k
      }
2355
      // Test using joint chroma residual coding
2356
258k
      double     bestCostCbCr = bestCostCb + bestCostCr;
2357
258k
      int        bestJointCbCr = 0;
2358
258k
      bool checkDCTOnly = m_pcEncCfg->m_useChromaTS && ((TU::getCbf(tmpTU, COMP_Cb) && tmpTU.mtsIdx[COMP_Cb] == MTS_DCT2_DCT2 && !TU::getCbf(tmpTU, COMP_Cr)) ||
2359
0
        (TU::getCbf(tmpTU, COMP_Cr) && tmpTU.mtsIdx[COMP_Cr] == MTS_DCT2_DCT2 && !TU::getCbf(tmpTU, COMP_Cb)) ||
2360
0
        (TU::getCbf(tmpTU, COMP_Cb) && tmpTU.mtsIdx[COMP_Cb] == MTS_DCT2_DCT2 && TU::getCbf(tmpTU, COMP_Cr) && tmpTU.mtsIdx[COMP_Cr] == MTS_DCT2_DCT2));
2361
258k
      bool checkTSOnly = m_pcEncCfg->m_useChromaTS && ((TU::getCbf(tmpTU, COMP_Cb) && tmpTU.mtsIdx[COMP_Cb] == MTS_SKIP && !TU::getCbf(tmpTU, COMP_Cr)) ||
2362
0
        (TU::getCbf(tmpTU, COMP_Cr) && tmpTU.mtsIdx[COMP_Cr] == MTS_SKIP && !TU::getCbf(tmpTU, COMP_Cb)) ||
2363
0
        (TU::getCbf(tmpTU, COMP_Cb) && tmpTU.mtsIdx[COMP_Cb] == MTS_SKIP && TU::getCbf(tmpTU, COMP_Cr) && tmpTU.mtsIdx[COMP_Cr] == MTS_SKIP));
2364
258k
      bool       lastIsBest = false;
2365
258k
      bool noLFNST1 = false;
2366
258k
      if (rapidLFNST && (startLfnstIdx != endLfnstIdx))
2367
0
      {
2368
0
        if (bestLfnstIdx == 2)
2369
0
        {
2370
0
          noLFNST1 = true;
2371
0
        }
2372
0
        else
2373
0
        {
2374
0
          endLfnstIdx = 1;
2375
0
        }
2376
0
      }
2377
2378
945k
      for (int lfnstIdxj = startLfnstIdx; lfnstIdxj <= endLfnstIdx; lfnstIdxj++)
2379
687k
      {
2380
687k
        if (rapidLFNST && noLFNST1 && (lfnstIdxj == 1))
2381
0
        {
2382
0
          continue;
2383
0
        }
2384
687k
        currTU.cu->lfnstIdx = lfnstIdxj;
2385
687k
        std::vector<int> jointCbfMasksToTest;
2386
687k
        if (TU::getCbf(tmpTU, COMP_Cb) || TU::getCbf(tmpTU, COMP_Cr))
2387
243k
        {
2388
243k
          jointCbfMasksToTest = m_pcTrQuant->selectICTCandidates(currTU, m_orgResiCb, m_orgResiCr);
2389
243k
        }
2390
687k
        for (int cbfMask : jointCbfMasksToTest)
2391
243k
        {
2392
243k
          currTU.jointCbCr = (uint8_t)cbfMask;
2393
243k
          ComponentID codeCompId = ((currTU.jointCbCr >> 1) ? COMP_Cb : COMP_Cr);
2394
243k
          ComponentID otherCompId = ((codeCompId == COMP_Cb) ? COMP_Cr : COMP_Cb);
2395
243k
          bool tsAllowed = useTS && TU::isTSAllowed(currTU, codeCompId) && (m_pcEncCfg->m_useChromaTS) && !currTU.cu->lfnstIdx && !cu.bdpcmM[CH_C];
2396
243k
          if ((partitioner.chType == CH_L)&& tsAllowed && (currTU.mtsIdx[COMP_Y] != MTS_SKIP))
2397
0
          {
2398
0
            tsAllowed = false;
2399
0
          }
2400
243k
          if (!tsAllowed)
2401
243k
          {
2402
243k
            checkTSOnly = false;
2403
243k
          }
2404
243k
          uint8_t     numTransformCands = 1 + (tsAllowed && !(checkDCTOnly || checkTSOnly)? 1 : 0); // DCT + TS = 2 tests
2405
243k
          std::vector<TrMode> trModes;
2406
243k
          if (numTransformCands > 1)
2407
0
          {
2408
0
            trModes.push_back(TrMode(0, true)); // DCT2
2409
0
            trModes.push_back(TrMode(1, true));//TS
2410
0
          }
2411
243k
          else
2412
243k
          {
2413
243k
            currTU.mtsIdx[codeCompId] = checkTSOnly || currTU.cu->bdpcmM[CH_C] ? 1 : 0;
2414
243k
          }
2415
2416
486k
          for (int modeId = 0; modeId < numTransformCands; modeId++)
2417
243k
          {
2418
243k
            Distortion distTmp = 0;
2419
243k
            currTU.mtsIdx[codeCompId] = currTU.cu->bdpcmM[CH_C] ? MTS_SKIP : MTS_DCT2_DCT2;
2420
243k
            if (numTransformCands > 1)
2421
0
            {
2422
0
              currTU.mtsIdx[codeCompId] = currTU.cu->bdpcmM[CH_C] ? MTS_SKIP : trModes[modeId].first;
2423
0
            }
2424
243k
            currTU.mtsIdx[otherCompId] = MTS_DCT2_DCT2;
2425
2426
243k
            m_CABACEstimator->getCtx() = ctxStartTU;
2427
2428
243k
            resiCb.copyFrom(m_orgResiCb[cbfMask]);
2429
243k
            resiCr.copyFrom(m_orgResiCr[cbfMask]);
2430
243k
            if ((modeId == 0) && (numTransformCands > 1))
2431
0
            {
2432
0
              xPreCheckMTS(currTU, &trModes, m_pcEncCfg->m_MTSIntraMaxCand, 0, COMP_Cb);
2433
0
              currTU.mtsIdx[codeCompId] = trModes[modeId].first;
2434
0
              currTU.mtsIdx[(codeCompId == COMP_Cr) ? COMP_Cb : COMP_Cr] = MTS_DCT2_DCT2;
2435
0
            }
2436
243k
            cuCtx.lfnstLastScanPos = false;
2437
243k
            cuCtx.violatesLfnstConstrained[CH_L] = false;
2438
243k
            cuCtx.violatesLfnstConstrained[CH_C] = false;
2439
243k
            if (numTransformCands > 1)
2440
0
            {
2441
0
              xIntraCodingTUBlock(currTU, COMP_Cb, false, distTmp, 0, 0, true);
2442
0
              if ((modeId == 0) && !trModes[modeId + 1].second)
2443
0
              {
2444
0
                numTransformCands = 1;
2445
0
              }
2446
0
            }
2447
243k
            else
2448
243k
            {
2449
243k
              xIntraCodingTUBlock(currTU, COMP_Cb, false, distTmp, 0);
2450
243k
            }
2451
2452
243k
            double costTmp = std::numeric_limits<double>::max();
2453
243k
            if (distTmp < MAX_DISTORTION)
2454
240k
            {
2455
240k
              uint64_t bits = xGetIntraFracBitsQTChroma(currTU, COMP_Cb, &cuCtx);
2456
240k
              costTmp = m_pcRdCost->calcRdCost(bits, distTmp);
2457
240k
            }
2458
2.67k
            else if (!currTU.mtsIdx[codeCompId])
2459
2.67k
            {
2460
2.67k
              numTransformCands = 1;
2461
2.67k
            }
2462
243k
            bool rootCbfL = false;
2463
972k
            for (uint32_t t = 0; t < getNumberValidTBlocks(*cs.pcv); t++)
2464
729k
            {
2465
729k
              rootCbfL |= bool(tmpTU.cbf[t]);
2466
729k
            }
2467
243k
            if (rapidLFNST && !rootCbfL)
2468
0
            {
2469
0
              endLfnstIdx = lfnstIdxj;
2470
0
            }
2471
243k
            if (testLFNST && currTU.cu->lfnstIdx && !cuCtx.lfnstLastScanPos)
2472
2.61k
            {
2473
2.61k
              bool cbfAtZeroDepth = CU::isSepTree(*currTU.cu) ? rootCbfL
2474
2.61k
                : (cs.area.chromaFormat != CHROMA_400 && std::min(tmpTU.blocks[1].width, tmpTU.blocks[1].height) < 4)
2475
0
                ? TU::getCbfAtDepth(currTU, COMP_Y, currTU.depth) : rootCbfL;
2476
2.61k
              if (cbfAtZeroDepth)
2477
2.61k
              {
2478
2.61k
                costTmp = MAX_DOUBLE;
2479
2.61k
              }
2480
2.61k
            }
2481
243k
            if (costTmp < bestCostCbCr)
2482
91.8k
            {
2483
91.8k
              bestCostCbCr = costTmp;
2484
91.8k
              bestDistCbCr = distTmp;
2485
91.8k
              bestJointCbCr = currTU.jointCbCr;
2486
2487
              // store data
2488
91.8k
              bestLfnstIdx = lfnstIdxj;
2489
91.8k
              if ((cbfMask != jointCbfMasksToTest.back() || (lfnstIdxj != endLfnstIdx)) || (modeId != (numTransformCands - 1)))
2490
74.5k
              {
2491
74.5k
                saveCS.getRecoBuf(cbArea).copyFrom(cs.getRecoBuf(cbArea));
2492
74.5k
                saveCS.getRecoBuf(crArea).copyFrom(cs.getRecoBuf(crArea));
2493
2494
74.5k
                tmpTU.copyComponentFrom(currTU, COMP_Cb);
2495
74.5k
                tmpTU.copyComponentFrom(currTU, COMP_Cr);
2496
2497
74.5k
                ctxBest = m_CABACEstimator->getCtx();
2498
74.5k
              }
2499
17.2k
              else
2500
17.2k
              {
2501
17.2k
                lastIsBest = true;
2502
17.2k
                cs.cus[0]->lfnstIdx = bestLfnstIdx;
2503
17.2k
              }
2504
91.8k
            }
2505
243k
          }
2506
243k
        }
2507
2508
        // Retrieve the best CU data (unless it was the very last one tested)
2509
687k
      }
2510
258k
      if (!lastIsBest)
2511
241k
      {
2512
241k
        cs.getRecoBuf(cbArea).copyFrom(saveCS.getRecoBuf(cbArea));
2513
241k
        cs.getRecoBuf(crArea).copyFrom(saveCS.getRecoBuf(crArea));
2514
2515
241k
        cs.cus[0]->lfnstIdx = bestLfnstIdx;
2516
241k
        currTU.copyComponentFrom(tmpTU, COMP_Cb);
2517
241k
        currTU.copyComponentFrom(tmpTU, COMP_Cr);
2518
241k
        m_CABACEstimator->getCtx() = ctxBest;
2519
241k
      }
2520
258k
      currTU.jointCbCr = (TU::getCbf(currTU, COMP_Cb) || TU::getCbf(currTU, COMP_Cr)) ? bestJointCbCr : 0;
2521
258k
    } // jointCbCr
2522
2523
258k
    cs.dist += bestDistCbCr;
2524
258k
    cuCtx.violatesLfnstConstrained[CH_L] = false;
2525
258k
    cuCtx.violatesLfnstConstrained[CH_C] = false;
2526
258k
    cuCtx.lfnstLastScanPos = false;
2527
258k
    cuCtx.violatesMtsCoeffConstraint = false;
2528
258k
    cuCtx.mtsLastScanPos = false;
2529
258k
    cbfs.cbf(COMP_Cb) = TU::getCbf(currTU, COMP_Cb);
2530
258k
    cbfs.cbf(COMP_Cr) = TU::getCbf(currTU, COMP_Cr);
2531
258k
  }
2532
1
  else
2533
1
  {
2534
1
    unsigned   numValidTBlocks = getNumberValidTBlocks(*cs.pcv);
2535
1
    ChromaCbfs SplitCbfs(false);
2536
2537
1
    if (partitioner.canSplit(TU_MAX_TR_SPLIT, cs))
2538
0
    {
2539
0
      partitioner.splitCurrArea(TU_MAX_TR_SPLIT, cs);
2540
0
    }
2541
1
    else if (currTU.cu->ispMode)
2542
0
    {
2543
0
      partitioner.splitCurrArea(m_ispTestedModes[0].IspType, cs);
2544
0
    }
2545
1
    else
2546
1
      THROW("Implicit TU split not available");
2547
2548
0
    do
2549
0
    {
2550
0
      ChromaCbfs subCbfs = xIntraChromaCodingQT(cs, partitioner);
2551
2552
0
      for (uint32_t ch = COMP_Cb; ch < numValidTBlocks; ch++)
2553
0
      {
2554
0
        const ComponentID compID = ComponentID(ch);
2555
0
        SplitCbfs.cbf(compID) |= subCbfs.cbf(compID);
2556
0
      }
2557
0
    } while (partitioner.nextPart(cs));
2558
2559
0
    partitioner.exitCurrSplit();
2560
2561
    /*if (lumaUsesISP && cs.dist == MAX_UINT) //ahenkel
2562
    {
2563
      return cbfs;
2564
    }*/
2565
0
    {
2566
0
      cbfs.Cb |= SplitCbfs.Cb;
2567
0
      cbfs.Cr |= SplitCbfs.Cr;
2568
2569
0
      if (1)   //(!lumaUsesISP)
2570
0
      {
2571
0
        for (auto& ptu : cs.tus)
2572
0
        {
2573
0
          if (currArea.Cb().contains(ptu->Cb()) || (!ptu->Cb().valid() && currArea.Y().contains(ptu->Y())))
2574
0
          {
2575
0
            TU::setCbfAtDepth(*ptu, COMP_Cb, currDepth, SplitCbfs.Cb);
2576
0
            TU::setCbfAtDepth(*ptu, COMP_Cr, currDepth, SplitCbfs.Cr);
2577
0
          }
2578
0
        }
2579
0
      }
2580
0
    }
2581
0
  }
2582
258k
  return cbfs;
2583
258k
}
2584
2585
uint64_t IntraSearch::xFracModeBitsIntraLuma(const CodingUnit& cu, const unsigned* mpmLst)
2586
864k
{
2587
864k
  m_CABACEstimator->resetBits();
2588
2589
864k
  if (!cu.ciip)
2590
864k
  {
2591
864k
    m_CABACEstimator->intra_luma_pred_mode(cu, mpmLst);
2592
864k
  }
2593
2594
864k
  return m_CABACEstimator->getEstFracBits();
2595
864k
}
2596
2597
template<typename T, size_t N, int M>
2598
void IntraSearch::xReduceHadCandList(static_vector<T, N>& candModeList, static_vector<double, N>& candCostList, SortedPelUnitBufs<M>& sortedPelBuffer, int& numModesForFullRD, const double thresholdHadCost, const double* mipHadCost, const CodingUnit& cu, const bool fastMip)
2599
17.7k
{
2600
17.7k
  const int maxCandPerType = numModesForFullRD >> 1;
2601
17.7k
  static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM> tempRdModeList;
2602
17.7k
  static_vector<double, FAST_UDI_MAX_RDMODE_NUM> tempCandCostList;
2603
17.7k
  const double minCost = candCostList[0];
2604
17.7k
  bool keepOneMip = candModeList.size() > numModesForFullRD;
2605
17.7k
  const int maxNumConv = 3; 
2606
2607
17.7k
  int numConv = 0;
2608
17.7k
  int numMip = 0;
2609
80.1k
  for (int idx = 0; idx < candModeList.size() - (keepOneMip?0:1); idx++)
2610
62.4k
  {
2611
62.4k
    bool addMode = false;
2612
62.4k
    const ModeInfo& orgMode = candModeList[idx];
2613
2614
62.4k
    if (!orgMode.mipFlg)
2615
44.7k
    {
2616
44.7k
      addMode = (numConv < maxNumConv);
2617
44.7k
      numConv += addMode ? 1:0;
2618
44.7k
    }
2619
17.7k
    else
2620
17.7k
    {
2621
17.7k
      addMode = ( numMip < maxCandPerType || (candCostList[idx] < thresholdHadCost * minCost) || keepOneMip );
2622
17.7k
      keepOneMip = false;
2623
17.7k
      numMip += addMode ? 1:0;
2624
17.7k
    }
2625
62.4k
    if( addMode )
2626
62.4k
    {
2627
62.4k
      tempRdModeList.push_back(orgMode);
2628
62.4k
      tempCandCostList.push_back(candCostList[idx]);
2629
62.4k
    }
2630
62.4k
  }
2631
2632
  // sort Pel Buffer
2633
17.7k
  int i = -1;
2634
17.7k
  for( auto &m: tempRdModeList)
2635
62.4k
  {
2636
62.4k
    if( ! (m == candModeList.at( ++i )) )
2637
0
    {
2638
0
      for( int j = i; j < (int)candModeList.size()-1; )
2639
0
      {
2640
0
        if( m == candModeList.at( ++j ) )
2641
0
        {
2642
0
          sortedPelBuffer.swap( i, j);
2643
0
          break;
2644
0
        }
2645
0
      }
2646
0
    }
2647
62.4k
  }
2648
17.7k
  sortedPelBuffer.reduceTo( (int)tempRdModeList.size() );
2649
2650
17.7k
  if ((cu.lwidth() > 8 && cu.lheight() > 8))
2651
15.8k
  {
2652
    // Sort MIP candidates by Hadamard cost
2653
15.8k
    const int transpOff = getNumModesMip(cu.Y());
2654
15.8k
    static_vector<uint8_t, FAST_UDI_MAX_RDMODE_NUM> sortedMipModes(0);
2655
15.8k
    static_vector<double, FAST_UDI_MAX_RDMODE_NUM> sortedMipCost(0);
2656
15.8k
    for (uint8_t mode : { 0, 1, 2 })
2657
47.4k
    {
2658
47.4k
      uint8_t candMode = mode + uint8_t((mipHadCost[mode + transpOff] < mipHadCost[mode]) ? transpOff : 0);
2659
47.4k
      updateCandList(candMode, mipHadCost[candMode], sortedMipModes, sortedMipCost, 3);
2660
47.4k
    }
2661
2662
    // Append MIP mode to RD mode list
2663
15.8k
    const int modeListSize = int(tempRdModeList.size());
2664
31.6k
    for (int idx = 0; idx < 3; idx++)
2665
31.6k
    {
2666
31.6k
      const bool     isTransposed = (sortedMipModes[idx] >= transpOff ? true : false);
2667
31.6k
      const uint32_t mipIdx       = (isTransposed ? sortedMipModes[idx] - transpOff : sortedMipModes[idx]);
2668
31.6k
      const ModeInfo mipMode( true, isTransposed, 0, NOT_INTRA_SUBPARTITIONS, mipIdx );
2669
31.6k
      bool alreadyIncluded = false;
2670
126k
      for (int modeListIdx = 0; modeListIdx < modeListSize; modeListIdx++)
2671
110k
      {
2672
110k
        if (tempRdModeList[modeListIdx] == mipMode)
2673
15.8k
        {
2674
15.8k
          alreadyIncluded = true;
2675
15.8k
          break;
2676
15.8k
        }
2677
110k
      }
2678
2679
31.6k
      if (!alreadyIncluded)
2680
15.8k
      {
2681
15.8k
        tempRdModeList.push_back(mipMode);
2682
15.8k
        tempCandCostList.push_back(0);
2683
15.8k
        if( fastMip ) break;
2684
15.8k
      }
2685
31.6k
    }
2686
15.8k
  }
2687
2688
17.7k
  candModeList = tempRdModeList;
2689
17.7k
  candCostList = tempCandCostList;
2690
17.7k
  numModesForFullRD = int(candModeList.size());
2691
17.7k
}
2692
2693
void IntraSearch::xPreCheckMTS(TransformUnit &tu, std::vector<TrMode> *trModes, const int maxCand, PelUnitBuf *predBuf, const ComponentID& compID)
2694
14.0k
{
2695
14.0k
  if (compID == COMP_Y)
2696
14.0k
  {
2697
14.0k
    CodingStructure&  cs = *tu.cs;
2698
14.0k
    const CompArea& area = tu.blocks[compID];
2699
14.0k
    const ReshapeData& reshapeData = cs.picture->reshapeData;
2700
14.0k
    const CodingUnit& cu = *cs.getCU(area.pos(), CH_L,TREE_D);
2701
14.0k
    PelBuf piPred = cs.getPredBuf(area);
2702
14.0k
    PelBuf piResi = cs.getResiBuf(area);
2703
2704
14.0k
    initIntraPatternChType(*tu.cu, area);
2705
14.0k
    if (predBuf)
2706
12.5k
    {
2707
12.5k
      piPred.copyFrom(predBuf->Y());
2708
12.5k
    }
2709
1.50k
    else if (CU::isMIP(cu, CH_L))
2710
1.49k
    {
2711
1.49k
      initIntraMip(cu);
2712
1.49k
      predIntraMip(piPred, cu);
2713
1.49k
    }
2714
19
    else
2715
19
    {
2716
19
      predIntraAng(COMP_Y, piPred, cu);
2717
19
    }
2718
2719
    //===== get residual signal =====
2720
14.0k
    if (cs.picHeader->lmcsEnabled && reshapeData.getCTUFlag())
2721
0
    {
2722
0
      piResi.subtract(cs.getRspOrgBuf(), piPred);
2723
0
    }
2724
14.0k
    else
2725
14.0k
    {
2726
14.0k
      CPelBuf piOrg = cs.getOrgBuf(COMP_Y);
2727
14.0k
      piResi.subtract(piOrg, piPred);
2728
14.0k
    }
2729
14.0k
    m_pcTrQuant->checktransformsNxN(tu, trModes, m_pcEncCfg->m_MTSIntraMaxCand, compID);
2730
14.0k
  }
2731
0
  else
2732
0
  {
2733
0
    ComponentID codeCompId = (tu.jointCbCr ? (tu.jointCbCr >> 1 ? COMP_Cb : COMP_Cr) : compID);
2734
0
    m_pcTrQuant->checktransformsNxN(tu, trModes, m_pcEncCfg->m_MTSIntraMaxCand, codeCompId);
2735
0
  }
2736
14.0k
}
2737
2738
double IntraSearch::xTestISP(CodingStructure& cs, Partitioner& subTuPartitioner, double bestCostForISP, PartSplit ispType, bool& splitcbf, uint64_t& singleFracBits, Distortion& singleDistLuma, CUCtx& cuCtx)
2739
14.3k
{
2740
14.3k
  int  subTuCounter = 0;
2741
14.3k
  bool earlySkipISP = false;
2742
14.3k
  bool splitCbfLuma = false;
2743
14.3k
  CodingUnit& cu = *cs.cus[0];
2744
2745
14.3k
  Distortion singleDistTmpLumaSUM = 0;
2746
14.3k
  uint64_t   singleTmpFracBitsSUM = 0;
2747
14.3k
  double     singleCostTmpSUM = 0;
2748
14.3k
  cuCtx.isDQPCoded = true;
2749
14.3k
  cuCtx.isChromaQpAdjCoded = true;
2750
2751
14.3k
  do
2752
18.4k
  {
2753
18.4k
    Distortion singleDistTmpLuma = 0;
2754
18.4k
    uint64_t   singleTmpFracBits = 0;
2755
18.4k
    double     singleCostTmp = 0;
2756
18.4k
    TransformUnit& tmpTUcur = ((cs.tus.size() < (subTuCounter + 1)))
2757
18.4k
      ? cs.addTU(CS::getArea(cs, subTuPartitioner.currArea(), subTuPartitioner.chType,
2758
3.51k
        subTuPartitioner.treeType),
2759
3.51k
        subTuPartitioner.chType, cs.cus[0])
2760
18.4k
      : *cs.tus[subTuCounter];
2761
18.4k
    tmpTUcur.depth = subTuPartitioner.currTrDepth;
2762
2763
    // Encode TU
2764
18.4k
    xIntraCodingTUBlock(tmpTUcur, COMP_Y, false, singleDistTmpLuma, 0);
2765
18.4k
    cuCtx.mtsLastScanPos = false;
2766
2767
18.4k
    if (singleDistTmpLuma == MAX_INT)   // all zero CBF skip
2768
0
    {
2769
0
      earlySkipISP = true;
2770
0
      singleCostTmpSUM = MAX_DOUBLE;
2771
0
      break;
2772
0
    }
2773
2774
18.4k
    if (m_pcRdCost->calcRdCost(singleTmpFracBitsSUM, singleDistTmpLumaSUM + singleDistTmpLuma) > bestCostForISP)
2775
4.88k
    {
2776
4.88k
      earlySkipISP = true;
2777
4.88k
    }
2778
13.5k
    else
2779
13.5k
    {
2780
13.5k
      m_ispTestedModes[0].IspType = ispType;
2781
13.5k
      m_ispTestedModes[0].subTuCounter = subTuCounter;
2782
13.5k
      singleTmpFracBits = xGetIntraFracBitsQT(cs, subTuPartitioner, true, &cuCtx);
2783
13.5k
    }
2784
18.4k
    singleCostTmp = m_pcRdCost->calcRdCost(singleTmpFracBits, singleDistTmpLuma);
2785
2786
18.4k
    singleCostTmpSUM     += singleCostTmp;
2787
18.4k
    singleDistTmpLumaSUM += singleDistTmpLuma;
2788
18.4k
    singleTmpFracBitsSUM += singleTmpFracBits;
2789
2790
18.4k
    subTuCounter++;
2791
2792
18.4k
    splitCbfLuma |= TU::getCbfAtDepth( *cs.getTU(subTuPartitioner.currArea().lumaPos(), subTuPartitioner.chType, subTuCounter - 1), 
2793
18.4k
                                       COMP_Y, subTuPartitioner.currTrDepth);
2794
18.4k
    int nSubPartitions = m_ispTestedModes[cu.lfnstIdx].numTotalParts[cu.ispMode - 1];
2795
18.4k
    bool doStop = (m_pcEncCfg->m_ISP != 1) || (subTuCounter < nSubPartitions);
2796
18.4k
    if (doStop)
2797
18.4k
    {
2798
18.4k
      if (singleCostTmpSUM > bestCostForISP)
2799
11.9k
      {
2800
11.9k
        earlySkipISP = true;
2801
11.9k
        break;
2802
11.9k
      }
2803
6.49k
      if (subTuCounter < nSubPartitions)
2804
5.16k
      {
2805
5.16k
        double threshold = nSubPartitions == 2 ? 0.95 : subTuCounter == 1 ? 0.83 : 0.91;
2806
5.16k
        if (singleCostTmpSUM > bestCostForISP * threshold)
2807
1.03k
        {
2808
1.03k
          earlySkipISP = true;
2809
1.03k
          break;
2810
1.03k
        }
2811
5.16k
      }
2812
6.49k
    }
2813
18.4k
  } while (subTuPartitioner.nextPart(cs));
2814
14.3k
  singleDistLuma = singleDistTmpLumaSUM;
2815
14.3k
  singleFracBits = singleTmpFracBitsSUM;
2816
2817
14.3k
  splitcbf = splitCbfLuma;
2818
14.3k
  return earlySkipISP ? MAX_DOUBLE : singleCostTmpSUM;
2819
14.3k
}
2820
2821
int IntraSearch::xSpeedUpISP(int speed, bool& testISP, int mode, int& noISP, int& endISP, CodingUnit& cu, static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM>& RdModeList, const ModeInfo& bestPUMode, int bestISP, int bestLfnstIdx)
2822
13.0k
{
2823
13.0k
  if (speed)
2824
5.27k
  {
2825
5.27k
    if (mode >= 1)
2826
2.78k
    {
2827
2.78k
      if (m_ispTestedModes[0].splitIsFinished[1] && m_ispTestedModes[0].splitIsFinished[0])
2828
0
      {
2829
0
        testISP = false;
2830
0
        endISP = 0;
2831
0
      }
2832
2.78k
      else
2833
2.78k
      {
2834
2.78k
        if (m_pcEncCfg->m_ISP >= 2)
2835
2.78k
        {
2836
2.78k
          if (mode == 1) //best Hor||Ver
2837
2.49k
          {
2838
2.49k
            int bestDir = 0;
2839
7.47k
            for (int d = 0; d < 2; d++)
2840
4.98k
            {
2841
4.98k
              int d2 = d ? 0 : 1;
2842
4.98k
              if ((m_ispTestedModes[0].bestCost[d] <= m_ispTestedModes[0].bestCost[d2])
2843
4.68k
                && (m_ispTestedModes[0].bestCost[d] != MAX_DOUBLE))
2844
294
              {
2845
294
                bestDir = d + 1;
2846
294
                m_ispTestedModes[0].splitIsFinished[d2] = true;
2847
294
              }
2848
4.98k
            }
2849
2.49k
            m_ispTestedModes[0].bestModeSoFar = bestDir;
2850
2.49k
            if (m_ispTestedModes[0].bestModeSoFar <= 0)
2851
2.19k
            {
2852
2.19k
              m_ispTestedModes[0].splitIsFinished[1] = true;
2853
2.19k
              m_ispTestedModes[0].splitIsFinished[0] = true;
2854
2.19k
              testISP = false;
2855
2.19k
              endISP = 0;
2856
2.19k
            }
2857
2.49k
          }
2858
2.78k
          if (m_ispTestedModes[0].bestModeSoFar == 2)
2859
72
          {
2860
72
            noISP = 1;
2861
72
          }
2862
2.71k
          else
2863
2.71k
          {
2864
2.71k
            endISP = 1;
2865
2.71k
          }
2866
2.78k
        }
2867
2.78k
      }
2868
2.78k
    }
2869
5.27k
    if (testISP)
2870
3.07k
    {
2871
3.07k
      if (mode == 2)
2872
294
      {
2873
882
        for (int d = 0; d < 2; d++)
2874
588
        {
2875
588
          int d2 = d ? 0 : 1;
2876
588
          if (m_ispTestedModes[0].bestCost[d] == MAX_DOUBLE)
2877
271
          {
2878
271
            m_ispTestedModes[0].splitIsFinished[d] = true;
2879
271
          }
2880
588
          if ((m_ispTestedModes[0].bestCost[d2] < 1.3 * m_ispTestedModes[0].bestCost[d])
2881
317
            && (int(m_ispTestedModes[0].bestSplitSoFar) != (d + 1)))
2882
234
          {
2883
234
            if (d)
2884
198
            {
2885
198
              endISP = 1;
2886
198
            }
2887
36
            else
2888
36
            {
2889
36
              noISP = 1;
2890
36
            }
2891
234
            m_ispTestedModes[0].splitIsFinished[d] = true;
2892
234
          }
2893
588
        }
2894
294
      }
2895
2.78k
      else
2896
2.78k
      {
2897
2.78k
        if (m_ispTestedModes[0].splitIsFinished[0])
2898
36
        {
2899
36
          noISP = 1;
2900
36
        }
2901
2.78k
        if (m_ispTestedModes[0].splitIsFinished[1])
2902
258
        {
2903
258
          endISP = 1;
2904
258
        }
2905
2.78k
      }
2906
3.07k
    }
2907
5.27k
    if ((noISP == 1) && (endISP == 1))
2908
23
    {
2909
23
      endISP = 0;
2910
23
    }
2911
5.27k
  }
2912
7.73k
  else
2913
7.73k
  {
2914
7.73k
    bool stopFound = false;
2915
7.73k
    if (m_pcEncCfg->m_ISP >= 3)
2916
7.73k
    {
2917
7.73k
      if (mode)
2918
2.76k
      {
2919
2.76k
        if ((bestISP == 0) || ((bestPUMode.modeId != RdModeList[mode - 1].modeId)
2920
98
          && (bestPUMode.modeId != RdModeList[mode].modeId)))
2921
1.87k
        {
2922
1.87k
          stopFound = true;
2923
1.87k
        }
2924
2.76k
      }
2925
7.73k
    }
2926
7.73k
    if (cu.mipFlag || cu.multiRefIdx)
2927
175
    {
2928
175
      cu.mipFlag = false;
2929
175
      cu.multiRefIdx = 0;
2930
175
      if (!stopFound)
2931
0
      {
2932
0
        for (int k = 0; k < mode; k++)
2933
0
        {
2934
0
          if (cu.intraDir[CH_L] == RdModeList[k].modeId)
2935
0
          {
2936
0
            stopFound = true;
2937
0
            break;
2938
0
          }
2939
0
        }
2940
0
      }
2941
175
    }
2942
7.73k
    if (stopFound)
2943
1.87k
    {
2944
1.87k
      testISP = false;
2945
1.87k
      endISP = 0;
2946
1.87k
      return 1;
2947
1.87k
    }
2948
5.85k
    if (!stopFound && (m_pcEncCfg->m_ISP >= 2) && (cu.intraDir[CH_L] == DC_IDX))
2949
892
    {
2950
892
      stopFound = true;
2951
892
      endISP = 0;
2952
892
      return 1;
2953
892
    }
2954
5.85k
  }
2955
10.2k
  return 0;
2956
13.0k
}
2957
2958
void IntraSearch::xSpeedUpIntra(double bestcost, int& EndMode, int& speedIntra, CodingUnit& cu)
2959
23.1k
{
2960
23.1k
  int bestIdxbefore = m_ispTestedModes[0].bestIntraMode;
2961
23.1k
  if (m_ispTestedModes[0].isIntra)
2962
0
  {
2963
0
    if (bestIdxbefore == 1)//ISP
2964
0
    {
2965
0
      speedIntra = 14;
2966
0
    }
2967
0
    if (bestIdxbefore == 4)//MTS
2968
0
    {
2969
0
      speedIntra = 3;
2970
0
    }
2971
0
  }
2972
23.1k
  else if (!cu.cs->slice->isIntra())
2973
0
  {
2974
0
    if (bestcost != MAX_DOUBLE)
2975
0
    {
2976
0
      speedIntra = 10;
2977
0
    }
2978
0
  }
2979
23.1k
  if (m_ispTestedModes[0].bestBefore[0] == -1)
2980
20.6k
  {
2981
20.6k
    speedIntra |= 7;
2982
20.6k
    if (m_pcEncCfg->m_FastIntraTools == 2)
2983
0
    {
2984
0
      EndMode = 1;
2985
0
    }
2986
20.6k
  }
2987
23.1k
  if (!cu.cs->slice->isIntra())
2988
0
  {
2989
0
    if ((m_ispTestedModes[0].bestBefore[1] == 1) || (m_ispTestedModes[0].bestBefore[2] == 1))
2990
0
    {
2991
0
      speedIntra |= 2;
2992
0
    }
2993
0
    if ((m_ispTestedModes[0].bestBefore[1] == 4) || (m_ispTestedModes[0].bestBefore[2] == 4))
2994
0
    {
2995
0
      speedIntra |= 3;
2996
0
    }
2997
0
    if ((m_ispTestedModes[0].bestBefore[1] == 2) || (m_ispTestedModes[0].bestBefore[2] == 2))
2998
0
    {
2999
0
      speedIntra |= 1;
3000
0
    }
3001
0
  }
3002
23.1k
}
3003
3004
} // namespace vvenc
3005
3006
//! \}
3007