Coverage Report

Created: 2026-06-15 06:25

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/vvenc/source/Lib/EncoderLib/IntraSearch.cpp
Line
Count
Source
1
/* -----------------------------------------------------------------------------
2
The copyright in this software is being made available under the Clear BSD
3
License, included below. No patent rights, trademark rights and/or 
4
other Intellectual Property Rights other than the copyrights concerning 
5
the Software are granted under this license.
6
7
The Clear BSD License
8
9
Copyright (c) 2019-2026, Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. & The VVenC Authors.
10
All rights reserved.
11
12
Redistribution and use in source and binary forms, with or without modification,
13
are permitted (subject to the limitations in the disclaimer below) provided that
14
the following conditions are met:
15
16
     * Redistributions of source code must retain the above copyright notice,
17
     this list of conditions and the following disclaimer.
18
19
     * Redistributions in binary form must reproduce the above copyright
20
     notice, this list of conditions and the following disclaimer in the
21
     documentation and/or other materials provided with the distribution.
22
23
     * Neither the name of the copyright holder nor the names of its
24
     contributors may be used to endorse or promote products derived from this
25
     software without specific prior written permission.
26
27
NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
28
THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
29
CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
31
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
32
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
33
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
34
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
35
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
36
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38
POSSIBILITY OF SUCH DAMAGE.
39
40
41
------------------------------------------------------------------------------------------- */
42
43
44
/** \file     EncSearch.cpp
45
 *  \brief    encoder intra search class
46
 */
47
48
#include "IntraSearch.h"
49
#include "EncPicture.h"
50
#include "CommonLib/CommonDef.h"
51
#include "CommonLib/Rom.h"
52
#include "CommonLib/Picture.h"
53
#include "CommonLib/UnitTools.h"
54
#include "CommonLib/dtrace_next.h"
55
#include "CommonLib/dtrace_buffer.h"
56
#include "CommonLib/Reshape.h"
57
#include <math.h>
58
#include "vvenc/vvencCfg.h"
59
60
//! \ingroup EncoderLib
61
//! \{
62
63
namespace vvenc {
64
65
#define PLTCtx(c) SubCtx( Ctx::Palette, c )
66
67
IntraSearch::IntraSearch()
68
17.3k
  : m_pSaveCS       (nullptr)
69
17.3k
  , m_pcEncCfg      (nullptr)
70
17.3k
  , m_pcTrQuant     (nullptr)
71
17.3k
  , m_pcRdCost      (nullptr)
72
17.3k
  , m_CABACEstimator(nullptr)
73
17.3k
  , m_CtxCache      (nullptr)
74
17.3k
{
75
17.3k
}
76
77
void IntraSearch::init(const VVEncCfg &encCfg, TrQuant *pTrQuant, RdCost *pRdCost, SortedPelUnitBufs<SORTED_BUFS> *pSortedPelUnitBufs, XUCache &unitCache )
78
17.3k
{
79
17.3k
  IntraPrediction::init( encCfg.m_internChromaFormat, encCfg.m_internalBitDepth[ CH_L ] );
80
81
17.3k
  m_pcEncCfg          = &encCfg;
82
17.3k
  m_pcTrQuant         = pTrQuant;
83
17.3k
  m_pcRdCost          = pRdCost;
84
17.3k
  m_SortedPelUnitBufs = pSortedPelUnitBufs;
85
86
17.3k
  const ChromaFormat chrFormat = encCfg.m_internChromaFormat;
87
17.3k
  const int maxCUSize          = encCfg.m_CTUSize;
88
89
17.3k
  Area area = Area( 0, 0, maxCUSize, maxCUSize );
90
91
17.3k
  m_pTempCS = new CodingStructure( unitCache, nullptr );
92
17.3k
  m_pBestCS = new CodingStructure( unitCache, nullptr );
93
94
17.3k
  m_pTempCS->createForSearch( chrFormat, area );
95
17.3k
  m_pBestCS->createForSearch( chrFormat, area );
96
97
17.3k
  const int uiNumSaveLayersToAllocate = 3;
98
17.3k
  m_pSaveCS = new CodingStructure*[uiNumSaveLayersToAllocate];
99
69.5k
  for( int layer = 0; layer < uiNumSaveLayersToAllocate; layer++ )
100
52.1k
  {
101
52.1k
    m_pSaveCS[ layer ] = new CodingStructure( unitCache, nullptr );
102
52.1k
    m_pSaveCS[ layer ]->createForSearch( chrFormat, Area( 0, 0, maxCUSize, maxCUSize ) );
103
52.1k
    m_pSaveCS[ layer ]->initStructData();
104
52.1k
  }
105
106
17.3k
  CompArea chromaArea( COMP_Cb, chrFormat, area, true );
107
104k
  for( int i = 0; i < 5; i++ )
108
86.8k
  {
109
86.8k
    m_orgResiCb[i].create( chromaArea );
110
86.8k
    m_orgResiCr[i].create( chromaArea );
111
86.8k
  }
112
17.3k
}
113
114
void IntraSearch::destroy()
115
17.3k
{
116
17.3k
  if ( m_pSaveCS )
117
17.3k
  {
118
17.3k
    const int uiNumSaveLayersToAllocate = 3;
119
69.5k
    for( int layer = 0; layer < uiNumSaveLayersToAllocate; layer++ )
120
52.1k
    {
121
52.1k
      if ( m_pSaveCS[ layer ] ) { m_pSaveCS[ layer ]->destroy(); delete m_pSaveCS[ layer ]; }
122
52.1k
    }
123
17.3k
    delete[] m_pSaveCS;
124
17.3k
    m_pSaveCS = nullptr;
125
17.3k
  }
126
127
17.3k
  if( m_pTempCS )
128
17.3k
  {
129
17.3k
    m_pTempCS->destroy();
130
17.3k
    delete m_pTempCS; m_pTempCS = nullptr;
131
17.3k
  }
132
133
17.3k
  if( m_pBestCS )
134
17.3k
  {
135
17.3k
    m_pBestCS->destroy();
136
17.3k
    delete m_pBestCS; m_pBestCS = nullptr;
137
17.3k
  }
138
17.3k
}
139
140
IntraSearch::~IntraSearch()
141
17.3k
{
142
17.3k
  destroy();
143
17.3k
}
144
145
void IntraSearch::setCtuEncRsrc( CABACWriter* cabacEstimator, CtxCache *ctxCache )
146
3.33k
{
147
3.33k
  m_CABACEstimator = cabacEstimator;
148
3.33k
  m_CtxCache       = ctxCache;
149
3.33k
}
150
151
//////////////////////////////////////////////////////////////////////////
152
// INTRA PREDICTION
153
//////////////////////////////////////////////////////////////////////////
154
static constexpr double COST_UNKNOWN = -65536.0;
155
156
double IntraSearch::xFindInterCUCost( CodingUnit &cu )
157
22.7k
{
158
22.7k
  if( CU::isConsIntra(cu) && !cu.slice->isIntra() )
159
0
  {
160
    //search corresponding inter CU cost
161
0
    for( int i = 0; i < m_numCuInSCIPU; i++ )
162
0
    {
163
0
      if( cu.lumaPos() == m_cuAreaInSCIPU[i].pos() && cu.lumaSize() == m_cuAreaInSCIPU[i].size() )
164
0
      {
165
0
        return m_cuCostInSCIPU[i];
166
0
      }
167
0
    }
168
0
  }
169
22.7k
  return COST_UNKNOWN;
170
22.7k
}
171
172
void IntraSearch::xEstimateLumaRdModeList(int& numModesForFullRD,
173
  static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM>& RdModeList,
174
  static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM>& HadModeList,
175
  static_vector<double, FAST_UDI_MAX_RDMODE_NUM>& CandCostList,
176
  static_vector<double, FAST_UDI_MAX_RDMODE_NUM>& CandHadList, CodingUnit& cu, bool testMip )
177
22.7k
{
178
22.7k
  PROFILER_SCOPE_AND_STAGE_EXT( 1, _TPROF, P_INTRA_EST_RD_CAND, cu.cs, CH_L );
179
22.7k
  const uint16_t intra_ctx_size = Ctx::IntraLumaMpmFlag.size() + Ctx::IntraLumaPlanarFlag.size() + Ctx::MultiRefLineIdx.size() + Ctx::ISPMode.size() + Ctx::MipFlag.size();
180
22.7k
  const TempCtx  ctxStartIntraCtx(m_CtxCache, SubCtx(CtxSet(Ctx::IntraLumaMpmFlag(), intra_ctx_size), m_CABACEstimator->getCtx()));
181
22.7k
  const double   sqrtLambdaForFirstPass = m_pcRdCost->getMotionLambda() * FRAC_BITS_SCALE;
182
22.7k
  const int numModesAvailable = NUM_LUMA_MODE; // total number of Intra modes
183
184
22.7k
  CHECK(numModesForFullRD >= numModesAvailable, "Too many modes for full RD search");
185
186
22.7k
  const SPS& sps     = *cu.cs->sps;
187
22.7k
  const bool fastMip = sps.MIP && m_pcEncCfg->m_useFastMIP;
188
189
  // this should always be true
190
22.7k
  CHECK( !cu.Y().valid(), "CU is not valid" );
191
192
22.7k
  const CompArea& area = cu.Y();
193
194
22.7k
  const UnitArea localUnitArea(area.chromaFormat, Area(0, 0, area.width, area.height));
195
22.7k
  if( testMip)
196
17.4k
  {
197
17.4k
    numModesForFullRD += fastMip ? numModesForFullRD - std::min( m_pcEncCfg->m_useFastMIP, numModesForFullRD )
198
17.4k
                                 : numModesForFullRD;
199
17.4k
    m_SortedPelUnitBufs->prepare( localUnitArea, numModesForFullRD + 1 );
200
17.4k
  }
201
5.29k
  else
202
5.29k
  {
203
5.29k
    m_SortedPelUnitBufs->prepare( localUnitArea, numModesForFullRD );
204
5.29k
  }
205
206
22.7k
  CPelBuf piOrg   = cu.cs->getOrgBuf(COMP_Y);
207
22.7k
  PelBuf piPred  = m_SortedPelUnitBufs->getTestBuf(COMP_Y);
208
209
22.7k
  const ReshapeData& reshapeData = cu.cs->picture->reshapeData;
210
22.7k
  if (cu.cs->picHeader->lmcsEnabled && reshapeData.getCTUFlag())
211
0
  {
212
0
    piOrg = cu.cs->getRspOrgBuf();
213
0
  }
214
22.7k
  DistParam distParam    = m_pcRdCost->setDistParam( piOrg, piPred, sps.bitDepths[ CH_L ], DF_HAD_2SAD); // Use HAD (SATD) cost
215
216
22.7k
  const int numHadCand = (testMip ? 2 : 1) * 3;
217
218
  //*** Derive (regular) candidates using Hadamard
219
22.7k
  cu.mipFlag = false;
220
22.7k
  cu.multiRefIdx = 0;
221
222
  //===== init pattern for luma prediction =====
223
22.7k
  initIntraPatternChType(cu, cu.Y(), true);
224
225
22.7k
  bool satdChecked[NUM_INTRA_MODE] = { false };
226
227
22.7k
  unsigned mpmLst[NUM_MOST_PROBABLE_MODES];
228
22.7k
  CU::getIntraMPMs(cu, mpmLst);
229
230
22.7k
  const int decMsk = ( 1 << m_pcEncCfg->m_IntraEstDecBit ) - 1;
231
232
22.7k
  m_parentCandList.resize( 0 );
233
22.7k
  m_parentCandList.reserve( ( numModesAvailable >> m_pcEncCfg->m_IntraEstDecBit ) + 2 );
234
235
1.54M
  for( unsigned mode = 0; mode < numModesAvailable; mode++ )
236
1.52M
  {
237
    // Skip checking extended Angular modes in the first round of SATD
238
1.52M
    if( mode > DC_IDX && ( mode & decMsk ) )
239
1.11M
    {
240
1.11M
      continue;
241
1.11M
    }
242
243
408k
    m_parentCandList.push_back( ModeInfo( false, false, 0, NOT_INTRA_SUBPARTITIONS, mode ) );
244
408k
  }
245
   
246
90.8k
  for( int decDst = 1 << m_pcEncCfg->m_IntraEstDecBit; decDst > 0; decDst >>= 1 )
247
68.1k
  {
248
613k
    for( unsigned idx = 0; idx < m_parentCandList.size(); idx++ )
249
544k
    {
250
544k
      int modeParent = m_parentCandList[idx].modeId;
251
252
544k
      int off = decDst & decMsk;
253
544k
      int inc = decDst << 1;
254
255
544k
#if 1 // INTRA_AS_IN_VTM
256
544k
      if( off != 0 && ( modeParent <= ( DC_IDX + 1 ) || modeParent >= ( NUM_LUMA_MODE - 1 ) ) )
257
88.2k
      {
258
88.2k
        continue;
259
88.2k
      }
260
261
456k
#endif
262
961k
      for( int mode = modeParent - off; mode < modeParent + off + 1; mode += inc )
263
504k
      {
264
504k
        if( satdChecked[mode] || mode < 0 || mode >= NUM_LUMA_MODE )
265
2.45k
        {
266
2.45k
          continue;
267
2.45k
        }
268
269
502k
        cu.intraDir[0] = mode;
270
271
502k
        initPredIntraParams( cu, cu.Y(), sps );
272
502k
        distParam.cur.buf = piPred.buf = m_SortedPelUnitBufs->getTestBuf().Y().buf;
273
502k
        predIntraAng( COMP_Y, piPred, cu );
274
275
        // Use the min between SAD and HAD as the cost criterion
276
        // SAD is scaled by 2 to align with the scaling of HAD
277
502k
        Distortion minSadHad = distParam.distFunc( distParam );
278
279
502k
        uint64_t fracModeBits = xFracModeBitsIntraLuma( cu, mpmLst );
280
281
        //restore ctx
282
502k
        m_CABACEstimator->getCtx() = SubCtx( CtxSet( Ctx::IntraLumaMpmFlag(), intra_ctx_size ), ctxStartIntraCtx );
283
284
502k
        double cost = ( double ) minSadHad + ( double ) fracModeBits * sqrtLambdaForFirstPass;
285
502k
        DTRACE( g_trace_ctx, D_INTRA_COST, "IntraHAD: %u, %llu, %f (%d)\n", minSadHad, fracModeBits, cost, mode );
286
287
502k
        int insertPos = -1;
288
502k
        updateCandList( ModeInfo( false, false, 0, NOT_INTRA_SUBPARTITIONS, mode ), cost, RdModeList, CandCostList, numModesForFullRD, &insertPos );
289
502k
        updateCandList( ModeInfo( false, false, 0, NOT_INTRA_SUBPARTITIONS, mode ), ( double ) minSadHad, HadModeList, CandHadList, numHadCand );
290
502k
        m_SortedPelUnitBufs->insert( insertPos, ( int ) RdModeList.size() );
291
292
502k
        satdChecked[mode] = true;
293
502k
      }
294
456k
    }
295
296
68.1k
    m_parentCandList.resize( RdModeList.size() );
297
68.1k
    std::copy( RdModeList.cbegin(), RdModeList.cend(), m_parentCandList.begin() );
298
68.1k
  }
299
300
22.7k
  const bool isFirstLineOfCtu = (((cu.block(COMP_Y).y)&((cu.cs->sps)->CTUSize - 1)) == 0);
301
22.7k
  if( m_pcEncCfg->m_MRL && ! isFirstLineOfCtu )
302
13.7k
  {
303
13.7k
    cu.multiRefIdx = 1;
304
13.7k
    unsigned  multiRefMPM [NUM_MOST_PROBABLE_MODES];
305
13.7k
    CU::getIntraMPMs(cu, multiRefMPM);
306
307
41.1k
    for (int mRefNum = 1; mRefNum < MRL_NUM_REF_LINES; mRefNum++)
308
27.4k
    {
309
27.4k
      int multiRefIdx = MULTI_REF_LINE_IDX[mRefNum];
310
311
27.4k
      cu.multiRefIdx = multiRefIdx;
312
27.4k
      initIntraPatternChType(cu, cu.Y(), true);
313
314
164k
      for (int x = 1; x < NUM_MOST_PROBABLE_MODES; x++)
315
137k
      {
316
137k
        cu.intraDir[0] = multiRefMPM[x];
317
137k
        initPredIntraParams(cu, cu.Y(), sps);
318
137k
        distParam.cur.buf = piPred.buf = m_SortedPelUnitBufs->getTestBuf().Y().buf;
319
137k
        predIntraAng(COMP_Y, piPred, cu);
320
321
        // Use the min between SAD and SATD as the cost criterion
322
        // SAD is scaled by 2 to align with the scaling of HAD
323
137k
        Distortion minSadHad = distParam.distFunc(distParam);
324
325
        // NB xFracModeBitsIntra will not affect the mode for chroma that may have already been pre-estimated.
326
137k
        uint64_t fracModeBits = xFracModeBitsIntraLuma( cu, mpmLst );
327
328
        //restore ctx
329
137k
        m_CABACEstimator->getCtx() = SubCtx(CtxSet(Ctx::IntraLumaMpmFlag(), intra_ctx_size), ctxStartIntraCtx);
330
331
137k
        double cost = (double) minSadHad + (double) fracModeBits * sqrtLambdaForFirstPass;
332
//        DTRACE(g_trace_ctx, D_INTRA_COST, "IntraMRL: %u, %llu, %f (%d)\n", minSadHad, fracModeBits, cost, cu.intraDir[0]);
333
334
137k
        int insertPos = -1;
335
137k
        updateCandList( ModeInfo( false, false, multiRefIdx, NOT_INTRA_SUBPARTITIONS, cu.intraDir[0] ), cost, RdModeList,  CandCostList, numModesForFullRD, &insertPos );
336
137k
        updateCandList( ModeInfo( false, false, multiRefIdx, NOT_INTRA_SUBPARTITIONS, cu.intraDir[0] ), (double)minSadHad, HadModeList, CandHadList,  numHadCand );
337
137k
        m_SortedPelUnitBufs->insert(insertPos, (int)RdModeList.size());
338
137k
      }
339
27.4k
    }
340
13.7k
    cu.multiRefIdx = 0;
341
13.7k
  }
342
343
22.7k
  if (testMip)
344
17.4k
  {
345
17.4k
    cu.mipFlag = true;
346
17.4k
    cu.multiRefIdx = 0;
347
348
17.4k
    double mipHadCost[MAX_NUM_MIP_MODE] = { MAX_DOUBLE };
349
350
17.4k
    initIntraPatternChType(cu, cu.Y());
351
17.4k
    initIntraMip( cu );
352
353
17.4k
    const int transpOff    = getNumModesMip( cu.Y() );
354
17.4k
    const int numModesFull = (transpOff << 1);
355
227k
    for( uint32_t uiModeFull = 0; uiModeFull < numModesFull; uiModeFull++ )
356
210k
    {
357
210k
      const bool     isTransposed = (uiModeFull >= transpOff ? true : false);
358
210k
      const uint32_t uiMode       = (isTransposed ? uiModeFull - transpOff : uiModeFull);
359
360
210k
      cu.mipTransposedFlag = isTransposed;
361
210k
      cu.intraDir[CH_L] = uiMode;
362
210k
      distParam.cur.buf = piPred.buf = m_SortedPelUnitBufs->getTestBuf().Y().buf;
363
210k
      predIntraMip(piPred, cu);
364
365
      // Use the min between SAD and HAD as the cost criterion
366
      // SAD is scaled by 2 to align with the scaling of HAD
367
210k
      Distortion minSadHad = distParam.distFunc(distParam);
368
369
210k
      uint64_t fracModeBits = xFracModeBitsIntraLuma( cu, mpmLst );
370
371
      //restore ctx
372
210k
      m_CABACEstimator->getCtx() = SubCtx(CtxSet(Ctx::IntraLumaMpmFlag(), intra_ctx_size), ctxStartIntraCtx);
373
374
210k
      double cost = double(minSadHad) + double(fracModeBits) * sqrtLambdaForFirstPass;
375
210k
      mipHadCost[uiModeFull] = cost;
376
210k
      DTRACE(g_trace_ctx, D_INTRA_COST, "IntraMIP: %u, %llu, %f (%d)\n", minSadHad, fracModeBits, cost, uiModeFull);
377
378
210k
      int insertPos = -1;
379
210k
      updateCandList( ModeInfo( true, isTransposed, 0, NOT_INTRA_SUBPARTITIONS, cu.intraDir[0] ), cost, RdModeList,  CandCostList, numModesForFullRD+1, &insertPos );
380
210k
      updateCandList( ModeInfo( true, isTransposed, 0, NOT_INTRA_SUBPARTITIONS, cu.intraDir[0] ), 0.8*(double)minSadHad, HadModeList, CandHadList,  numHadCand );
381
210k
      m_SortedPelUnitBufs->insert(insertPos, (int)RdModeList.size());
382
210k
    }
383
384
17.4k
    const double thresholdHadCost = 1.0 + 1.4 / sqrt((double)(cu.lwidth()*cu.lheight()));
385
17.4k
    xReduceHadCandList(RdModeList, CandCostList, *m_SortedPelUnitBufs, numModesForFullRD, thresholdHadCost, mipHadCost, cu, fastMip);
386
17.4k
  }
387
388
22.7k
  if( m_pcEncCfg->m_bFastUDIUseMPMEnabled )
389
22.7k
  {
390
22.7k
    const int numMPMs = NUM_MOST_PROBABLE_MODES;
391
22.7k
    unsigned  intraMpms[numMPMs];
392
393
22.7k
    cu.multiRefIdx = 0;
394
395
22.7k
    const int numCand = CU::getIntraMPMs( cu, intraMpms );
396
22.7k
    ModeInfo mostProbableMode(false, false, 0, NOT_INTRA_SUBPARTITIONS, 0);
397
398
46.1k
    for( int j = 0; j < numCand; j++ )
399
23.4k
    {
400
23.4k
      bool mostProbableModeIncluded = false;
401
23.4k
      mostProbableMode.modeId = intraMpms[j];
402
403
119k
      for( int i = 0; i < numModesForFullRD; i++ )
404
96.1k
      {
405
96.1k
        mostProbableModeIncluded |= ( mostProbableMode == RdModeList[i] );
406
96.1k
      }
407
23.4k
      if( !mostProbableModeIncluded )
408
163
      {
409
163
        numModesForFullRD++;
410
163
        RdModeList.push_back( mostProbableMode );
411
163
        CandCostList.push_back(0);
412
163
      }
413
23.4k
    }
414
22.7k
  }
415
22.7k
}
416
417
bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, double bestCost)
418
22.7k
{
419
22.7k
  CodingStructure       &cs           = *cu.cs;
420
22.7k
  const int             width         = partitioner.currArea().lwidth();
421
22.7k
  const int             height        = partitioner.currArea().lheight();
422
423
  //===== loop over partitions =====
424
425
22.7k
  const TempCtx ctxStart           ( m_CtxCache, m_CABACEstimator->getCtx() );
426
427
  // variables for saving fast intra modes scan results across multiple LFNST passes
428
22.7k
  double costInterCU = xFindInterCUCost( cu );
429
430
22.7k
  bool validReturn = false;
431
432
  //===== determine set of modes to be tested (using prediction signal only) =====
433
22.7k
  int numModesAvailable = NUM_LUMA_MODE; // total number of Intra modes
434
22.7k
  static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM> RdModeList;
435
22.7k
  static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM> HadModeList;
436
22.7k
  static_vector<double, FAST_UDI_MAX_RDMODE_NUM> CandCostList;
437
22.7k
  static_vector<double, FAST_UDI_MAX_RDMODE_NUM> CandHadList;
438
439
22.7k
  int numModesForFullRD = g_aucIntraModeNumFast_UseMPM_2D[Log2(width) - MIN_CU_LOG2][Log2(height) - MIN_CU_LOG2];
440
22.7k
  if (m_pcEncCfg->m_numIntraModesFullRD > 0)
441
0
    numModesForFullRD=m_pcEncCfg->m_numIntraModesFullRD;
442
443
#if INTRA_FULL_SEARCH
444
  numModesForFullRD = numModesAvailable;
445
#endif
446
22.7k
  const SPS& sps = *cu.cs->sps;
447
22.7k
  const bool mipAllowed = sps.MIP && cu.lwidth() <= sps.getMaxTbSize() && cu.lheight() <= sps.getMaxTbSize() && ((cu.lfnstIdx == 0) || allowLfnstWithMip(cu.lumaSize()));
448
22.7k
  const int SizeThr     = 8 >> std::max( 0, m_pcEncCfg->m_useFastMIP - 1 );
449
22.7k
  const bool testMip    = mipAllowed && ( cu.lwidth() <= ( SizeThr * cu.lheight() ) && cu.lheight() <= ( SizeThr * cu.lwidth() ) ) && ( cu.lwidth() <= MIP_MAX_WIDTH && cu.lheight() <= MIP_MAX_HEIGHT );
450
22.7k
  bool testISP = sps.ISP && CU::canUseISP(width, height, cu.cs->sps->getMaxTbSize());
451
22.7k
  if (testISP)
452
22.7k
  {
453
22.7k
    int numTotalPartsHor = (int)width >> floorLog2(CU::getISPSplitDim(width, height, TU_1D_VERT_SPLIT));
454
22.7k
    int numTotalPartsVer = (int)height >> floorLog2(CU::getISPSplitDim(width, height, TU_1D_HORZ_SPLIT));
455
22.7k
    m_ispTestedModes[0].init(numTotalPartsHor, numTotalPartsVer, 0);
456
    // the total number of subpartitions is modified to take into account the cases where LFNST cannot be combined with
457
    // ISP due to size restrictions
458
22.7k
    numTotalPartsHor = sps.LFNST && CU::canUseLfnstWithISP(cu.Y(), HOR_INTRA_SUBPARTITIONS) ? numTotalPartsHor : 0;
459
22.7k
    numTotalPartsVer = sps.LFNST && CU::canUseLfnstWithISP(cu.Y(), VER_INTRA_SUBPARTITIONS) ? numTotalPartsVer : 0;
460
68.1k
    for (int j = 1; j < NUM_LFNST_NUM_PER_SET; j++)
461
45.4k
    {
462
45.4k
      m_ispTestedModes[j].init(numTotalPartsHor, numTotalPartsVer, 0);
463
45.4k
    }
464
22.7k
    testISP = m_ispTestedModes[0].numTotalParts[0];
465
22.7k
  }
466
0
  else
467
0
  {
468
0
    m_ispTestedModes[0].init(0, 0, 0);
469
0
  }
470
471
22.7k
  xEstimateLumaRdModeList(numModesForFullRD, RdModeList, HadModeList, CandCostList, CandHadList, cu, testMip);
472
473
22.7k
  CHECK( (size_t)numModesForFullRD != RdModeList.size(), "Inconsistent state!" );
474
475
  // after this point, don't use numModesForFullRD
476
22.7k
  if( m_pcEncCfg->m_usePbIntraFast && !cs.slice->isIntra() && RdModeList.size() < numModesAvailable )
477
0
  {
478
0
    double pbintraRatio = m_pcEncCfg->m_usePbIntraFast == 1 && ( cs.area.lwidth() >= 16 && cs.area.lheight() >= 16 ) ? 1.2 : PBINTRA_RATIO;
479
480
0
    int maxSize = -1;
481
0
    ModeInfo bestMipMode;
482
0
    int bestMipIdx = -1;
483
0
    for( int idx = 0; idx < RdModeList.size(); idx++ )
484
0
    {
485
0
      if( RdModeList[idx].mipFlg )
486
0
      {
487
0
        bestMipMode = RdModeList[idx];
488
0
        bestMipIdx = idx;
489
0
        break;
490
0
      }
491
0
    }
492
0
    const int numHadCand = 3;
493
0
    for (int k = numHadCand - 1; k >= 0; k--)
494
0
    {
495
0
      if (CandHadList.size() < (k + 1) || CandHadList[k] > cs.interHad * pbintraRatio) { maxSize = k; }
496
0
    }
497
0
    if (maxSize > 0)
498
0
    {
499
0
      RdModeList.resize(std::min<size_t>(RdModeList.size(), maxSize));
500
0
      if( bestMipIdx >= 0 )
501
0
      {
502
0
        if( RdModeList.size() <= bestMipIdx )
503
0
        {
504
0
          RdModeList.push_back(bestMipMode);
505
0
          m_SortedPelUnitBufs->swap( maxSize, bestMipIdx );
506
0
        }
507
0
      }
508
0
    }
509
0
    if (maxSize == 0)
510
0
    {
511
0
      cs.dist = MAX_DISTORTION;
512
0
      cs.interHad = 0;
513
0
      return false;
514
0
    }
515
0
  }
516
517
  //===== check modes (using r-d costs) =====
518
22.7k
  ModeInfo bestPUMode;
519
520
22.7k
  CodingStructure *csTemp = m_pTempCS;
521
22.7k
  CodingStructure *csBest = m_pBestCS;
522
523
22.7k
  csTemp->slice   = csBest->slice   = cs.slice;
524
22.7k
  csTemp->picture = csBest->picture = cs.picture;
525
22.7k
  csTemp->compactResize( cu );
526
22.7k
  csBest->compactResize( cu );
527
22.7k
  csTemp->initStructData();
528
22.7k
  csBest->initStructData();
529
530
22.7k
  int   bestLfnstIdx  = 0;
531
22.7k
  const bool useBDPCM = cs.picture->useBDPCM;
532
22.7k
  int   NumBDPCMCand  = (useBDPCM && sps.BDPCM && CU::bdpcmAllowed(cu, ComponentID(partitioner.chType))) ? 2 : 0;
533
22.7k
  int   bestbdpcmMode = 0;
534
22.7k
  int   bestISP       = 0;
535
22.7k
  int   bestMrl       = 0;
536
22.7k
  bool  bestMip       = 0;
537
22.7k
  int   EndMode       = (int)RdModeList.size();
538
22.7k
  bool  useISPlfnst   = testISP && sps.LFNST;
539
22.7k
  bool  noLFNST_ts    = false;
540
22.7k
  double bestCostIsp[2] = { MAX_DOUBLE, MAX_DOUBLE };
541
22.7k
  bool disableMTS = false;
542
22.7k
  bool disableLFNST = false;
543
22.7k
  bool disableDCT2test = false;
544
22.7k
  if (m_pcEncCfg->m_FastIntraTools)
545
22.7k
  {
546
22.7k
    int speedIntra = 0;
547
22.7k
    xSpeedUpIntra(bestCost, EndMode, speedIntra, cu);
548
22.7k
    disableMTS = (speedIntra >> 2 ) & 0x1;
549
22.7k
    disableLFNST = (speedIntra >> 1) & 0x1;
550
22.7k
    disableDCT2test = speedIntra>>3;
551
22.7k
    if (disableLFNST)
552
20.2k
    {
553
20.2k
      noLFNST_ts = true;
554
20.2k
      useISPlfnst = false;
555
20.2k
    }
556
22.7k
    if (speedIntra & 0x1)
557
20.2k
    {
558
20.2k
      testISP = false;
559
20.2k
    }
560
22.7k
  }
561
562
122k
  for (int mode_cur = 0; mode_cur < EndMode + NumBDPCMCand; mode_cur++)
563
100k
  {
564
100k
    int mode = mode_cur;
565
100k
    if (mode_cur >= EndMode)
566
7.02k
    {
567
7.02k
      mode = mode_cur - EndMode ? -1 : -2;
568
7.02k
      testISP = false;
569
7.02k
    }
570
    // set CU/PU to luma prediction mode
571
100k
    ModeInfo testMode;
572
100k
    int noISP = 0;
573
100k
    int endISP = testISP ? 2 : 0;
574
100k
    bool noLFNST = false || noLFNST_ts;
575
100k
    if (mode && useISPlfnst)
576
8.12k
    {
577
8.12k
      noLFNST |= (bestCostIsp[0] > (bestCostIsp[1] * 1.4));
578
8.12k
      if (mode > 2)
579
2.15k
      {
580
2.15k
        endISP = 0;
581
2.15k
        testISP = false;
582
2.15k
      }
583
8.12k
    }
584
100k
    if (testISP)
585
5.15k
    {
586
5.15k
      xSpeedUpISP(1, testISP, mode, noISP, endISP, cu, RdModeList, bestPUMode, bestISP, bestLfnstIdx);
587
5.15k
    }
588
100k
    int startISP = 0;
589
100k
    if (disableDCT2test && mode && bestISP)
590
0
    {
591
0
      startISP = endISP ? 1 : 0;
592
0
    }
593
207k
    for (int ispM = startISP; ispM <= endISP; ispM++)
594
107k
    {
595
107k
      if (ispM && (ispM == noISP))
596
49
      {
597
49
        continue;
598
49
      }
599
600
107k
      if (mode < 0)
601
7.02k
      {
602
7.02k
        cu.bdpcmM[CH_L] = -mode;
603
7.02k
        testMode = ModeInfo(false, false, 0, NOT_INTRA_SUBPARTITIONS, cu.bdpcmM[CH_L] == 2 ? VER_IDX : HOR_IDX);
604
7.02k
      }
605
100k
      else
606
100k
      {
607
100k
        testMode = RdModeList[mode];
608
100k
        cu.bdpcmM[CH_L] = 0;
609
100k
      }
610
611
107k
      cu.ispMode = ispM;
612
107k
      cu.mipFlag = testMode.mipFlg;
613
107k
      cu.mipTransposedFlag = testMode.mipTrFlg;
614
107k
      cu.multiRefIdx = testMode.mRefId;
615
107k
      cu.intraDir[CH_L] = testMode.modeId;
616
107k
      if (cu.ispMode && xSpeedUpISP(0, testISP, mode, noISP, endISP, cu, RdModeList, bestPUMode, bestISP, 0) )
617
2.70k
      {
618
2.70k
        continue;
619
2.70k
      }
620
104k
      if (m_pcEncCfg->m_FastIntraTools && (cu.ispMode || sps.LFNST || sps.MTS))
621
104k
      {
622
104k
        m_ispTestedModes[0].intraWasTested = true;
623
104k
      }
624
104k
      CHECK(cu.mipFlag && cu.multiRefIdx, "Error: combination of MIP and MRL not supported");
625
104k
      CHECK(cu.multiRefIdx && (cu.intraDir[0] == PLANAR_IDX), "Error: combination of MRL and Planar mode not supported");
626
104k
      CHECK(cu.ispMode && cu.mipFlag, "Error: combination of ISP and MIP not supported");
627
104k
      CHECK(cu.ispMode && cu.multiRefIdx, "Error: combination of ISP and MRL not supported");
628
629
      // determine residual for partition
630
104k
      cs.initSubStructure(*csTemp, partitioner.chType, cs.area, true);
631
104k
      int doISP = (((cu.ispMode == 0) && noLFNST) || (useISPlfnst && mode && cu.ispMode && (bestLfnstIdx == 0)) || disableLFNST) ? -mode : mode;
632
104k
      xIntraCodingLumaQT(*csTemp, partitioner, m_SortedPelUnitBufs->getBufFromSortedList(mode), bestCost, doISP, disableMTS);
633
634
104k
      DTRACE(g_trace_ctx, D_INTRA_COST, "IntraCost T [x=%d,y=%d,w=%d,h=%d] %f (%d,%d,%d,%d,%d,%d) \n", cu.blocks[0].x,
635
104k
        cu.blocks[0].y, width, height, csTemp->cost, testMode.modeId, testMode.ispMod,
636
104k
        cu.multiRefIdx, cu.mipFlag, cu.lfnstIdx, cu.mtsFlag);
637
638
104k
      if (cu.ispMode && !csTemp->cus[0]->firstTU->cbf[COMP_Y])
639
1.62k
      {
640
1.62k
        csTemp->cost = MAX_DOUBLE;
641
1.62k
        csTemp->costDbOffset = 0;
642
1.62k
      }
643
104k
      if (useISPlfnst)
644
15.4k
      {
645
15.4k
        int n = (cu.ispMode == 0) ? 0 : 1;
646
15.4k
        bestCostIsp[n] = csTemp->cost < bestCostIsp[n] ? csTemp->cost : bestCostIsp[n];
647
15.4k
      }
648
649
      // check r-d cost
650
104k
      if (csTemp->cost < csBest->cost)
651
29.4k
      {
652
29.4k
        validReturn   = true;
653
29.4k
        std::swap(csTemp, csBest);
654
29.4k
        bestPUMode    = testMode;
655
29.4k
        bestLfnstIdx  = csBest->cus[0]->lfnstIdx;
656
29.4k
        bestISP       = csBest->cus[0]->ispMode;
657
29.4k
        bestMip       = csBest->cus[0]->mipFlag;
658
29.4k
        bestMrl       = csBest->cus[0]->multiRefIdx;
659
29.4k
        bestbdpcmMode = cu.bdpcmM[CH_L];
660
29.4k
        m_ispTestedModes[bestLfnstIdx].bestSplitSoFar = ISPType(bestISP);
661
29.4k
        if (csBest->cost < bestCost)
662
29.4k
        {
663
29.4k
          bestCost = csBest->cost;
664
29.4k
        }
665
29.4k
        if ((csBest->getTU(partitioner.chType)->mtsIdx[COMP_Y] == MTS_SKIP) && ( floorLog2(csBest->getTU(partitioner.chType)->blocks[COMP_Y].area()) >= 6 ))
666
4.37k
        {
667
4.37k
          noLFNST_ts = 1;
668
4.37k
        }
669
29.4k
      }
670
671
      // reset context models
672
104k
      m_CABACEstimator->getCtx() = ctxStart;
673
674
104k
      csTemp->releaseIntermediateData();
675
676
104k
      if (m_pcEncCfg->m_fastLocalDualTreeMode && CU::isConsIntra(cu) && !cu.slice->isIntra() && csBest->cost != MAX_DOUBLE && costInterCU != COST_UNKNOWN && mode >= 0)
677
0
      {
678
0
        if( (m_pcEncCfg->m_fastLocalDualTreeMode == 2) || (csBest->cost > costInterCU * 1.5))
679
0
        {
680
          //Note: only try one intra mode, which is especially useful to reduce EncT for LDB case (around 4%)
681
0
          EndMode = 0;
682
0
          break;
683
0
        }
684
0
      }
685
104k
    }
686
100k
  } // Mode loop
687
688
22.7k
  if (m_pcEncCfg->m_FastIntraTools && (sps.ISP|| sps.LFNST || sps.MTS))
689
22.7k
  {
690
22.7k
    int bestMode = csBest->getTU(partitioner.chType)->mtsIdx[COMP_Y] ? 4 : 0;
691
22.7k
    bestMode |= bestLfnstIdx ? 2 : 0;
692
22.7k
    bestMode |= bestISP ? 1 : 0;
693
22.7k
    m_ispTestedModes[0].bestIntraMode = bestMode;
694
22.7k
  }
695
22.7k
  cu.ispMode = bestISP;
696
22.7k
  if( validReturn )
697
22.7k
  {
698
22.7k
    cs.useSubStructure( *csBest, partitioner.chType, TREE_D, cu.singleChan( CH_L ), true );
699
22.7k
    const ReshapeData& reshapeData = cs.picture->reshapeData;
700
22.7k
    if (cs.picHeader->lmcsEnabled && reshapeData.getCTUFlag())
701
0
    {
702
0
      cs.getRspRecoBuf().copyFrom(csBest->getRspRecoBuf());
703
0
    }
704
705
    //=== update PU data ====
706
22.7k
    cu.lfnstIdx           = bestLfnstIdx;
707
22.7k
    cu.mipTransposedFlag  = bestPUMode.mipTrFlg;
708
22.7k
    cu.intraDir[CH_L]     = bestPUMode.modeId;
709
22.7k
    cu.bdpcmM[CH_L]       = bestbdpcmMode;
710
22.7k
    cu.mipFlag            = bestMip;
711
22.7k
    cu.multiRefIdx        = bestMrl;
712
22.7k
  }
713
0
  else
714
0
  {
715
0
    THROW("fix this");
716
0
  }
717
718
22.7k
  csBest->releaseIntermediateData();
719
720
22.7k
  return validReturn;
721
22.7k
}
722
723
void IntraSearch::estIntraPredChromaQT( CodingUnit& cu, Partitioner& partitioner, const double maxCostAllowed )
724
51.3k
{
725
51.3k
  PROFILER_SCOPE_AND_STAGE_EXT( 0, _TPROF, P_INTRA_CHROMA, cu.cs, CH_C );
726
51.3k
  const TempCtx ctxStart( m_CtxCache, m_CABACEstimator->getCtx() );
727
51.3k
  CodingStructure &cs   = *cu.cs;
728
51.3k
  bool lumaUsesISP      = !CU::isSepTree(cu) && cu.ispMode;
729
51.3k
  PartSplit ispType     = lumaUsesISP ? CU::getISPType(cu, COMP_Y) : TU_NO_ISP;
730
51.3k
  double bestCostSoFar  = maxCostAllowed;
731
51.3k
  const uint32_t numberValidComponents = getNumberValidComponents( cu.chromaFormat );
732
51.3k
  const bool useBDPCM   = cs.picture->useBDPCM;
733
734
51.3k
  uint32_t   uiBestMode = 0;
735
51.3k
  Distortion uiBestDist = 0;
736
51.3k
  double     dBestCost  = MAX_DOUBLE;
737
738
  //----- init mode list ----
739
51.3k
  {
740
51.3k
    uint32_t  uiMinMode = 0;
741
51.3k
    uint32_t  uiMaxMode = NUM_CHROMA_MODE;
742
743
51.3k
    const int reducedModeNumber = uiMaxMode >> (m_pcEncCfg->m_reduceIntraChromaModesFullRD ? 1 : 2);
744
    //----- check chroma modes -----
745
51.3k
    uint32_t chromaCandModes[ NUM_CHROMA_MODE ];
746
51.3k
    CU::getIntraChromaCandModes( cu, chromaCandModes );
747
748
    // create a temporary CS
749
51.3k
    CodingStructure &saveCS = *m_pSaveCS[0];
750
51.3k
    saveCS.pcv      = cs.pcv;
751
51.3k
    saveCS.picture  = cs.picture;
752
51.3k
    saveCS.area.repositionTo( cs.area );
753
51.3k
    saveCS.clearTUs();
754
755
51.3k
    if( !CU::isSepTree(cu) && cu.ispMode )
756
0
    {
757
0
      saveCS.clearCUs();
758
0
    }
759
760
51.3k
    if( CU::isSepTree(cu) )
761
51.3k
    {
762
51.3k
      if( partitioner.canSplit( TU_MAX_TR_SPLIT, cs ) )
763
0
      {
764
0
        partitioner.splitCurrArea( TU_MAX_TR_SPLIT, cs );
765
766
0
        do
767
0
        {
768
0
          cs.addTU( CS::getArea( cs, partitioner.currArea(), partitioner.chType, partitioner.treeType ), partitioner.chType, &cu ).depth = partitioner.currTrDepth;
769
0
        } while( partitioner.nextPart( cs ) );
770
771
0
        partitioner.exitCurrSplit();
772
0
      }
773
51.3k
      else
774
51.3k
        cs.addTU( CS::getArea( cs, partitioner.currArea(), partitioner.chType, partitioner.treeType ), partitioner.chType, &cu );
775
51.3k
    }
776
777
    // create a store for the TUs
778
51.3k
    std::vector<TransformUnit*> orgTUs;
779
51.3k
    for( const auto &ptu : cs.tus )
780
51.3k
    {
781
      // for split TUs in HEVC, add the TUs without Chroma parts for correct setting of Cbfs
782
51.3k
      if (lumaUsesISP || cu.contains(*ptu, CH_C))
783
51.3k
      {
784
51.3k
        saveCS.addTU( *ptu, partitioner.chType, nullptr );
785
51.3k
        orgTUs.push_back( ptu );
786
51.3k
      }
787
51.3k
    }
788
789
    // SATD pre-selecting.
790
51.3k
    int     satdModeList  [NUM_CHROMA_MODE] = { 0 };
791
51.3k
    int64_t satdSortedCost[NUM_CHROMA_MODE] = { 0 };
792
51.3k
    bool    modeDisable[NUM_INTRA_MODE + 1] = { false }; // use intra mode idx to check whether enable
793
794
51.3k
    CodingStructure& cs = *(cu.cs);
795
51.3k
    CompArea areaCb = cu.Cb();
796
51.3k
    CompArea areaCr = cu.Cr();
797
51.3k
    CPelBuf orgCb  = cs.getOrgBuf (COMP_Cb);
798
51.3k
    PelBuf predCb  = cs.getPredBuf(COMP_Cb);
799
51.3k
    CPelBuf orgCr  = cs.getOrgBuf (COMP_Cr);
800
51.3k
    PelBuf predCr  = cs.getPredBuf(COMP_Cr);
801
802
51.3k
    DistParam distParamSadCb  = m_pcRdCost->setDistParam( orgCb, predCb, cu.cs->sps->bitDepths[ CH_C ], DF_SAD);
803
51.3k
    DistParam distParamSatdCb = m_pcRdCost->setDistParam( orgCb, predCb, cu.cs->sps->bitDepths[ CH_C ], DF_HAD);
804
51.3k
    DistParam distParamSadCr  = m_pcRdCost->setDistParam( orgCr, predCr, cu.cs->sps->bitDepths[ CH_C ], DF_SAD);
805
51.3k
    DistParam distParamSatdCr = m_pcRdCost->setDistParam( orgCr, predCr, cu.cs->sps->bitDepths[ CH_C ], DF_HAD);
806
807
51.3k
    cu.intraDir[1] = MDLM_L_IDX; // temporary assigned, just to indicate this is a MDLM mode. for luma down-sampling operation.
808
809
51.3k
    initIntraPatternChType(cu, cu.Cb());
810
51.3k
    initIntraPatternChType(cu, cu.Cr());
811
51.3k
    loadLMLumaRecPels(cu, cu.Cb());
812
813
462k
    for (int idx = uiMinMode; idx < uiMaxMode; idx++)
814
411k
    {
815
411k
      int mode = chromaCandModes[idx];
816
411k
      satdModeList[idx] = mode;
817
411k
      if (CU::isLMCMode(mode) && ( !CU::isLMCModeEnabled(cu, mode) || cu.slice->lmChromaCheckDisable ) )
818
44.0k
      {
819
44.0k
        continue;
820
44.0k
      }
821
366k
      if ((mode == LM_CHROMA_IDX) || (mode == PLANAR_IDX) || (mode == DM_CHROMA_IDX)) // only pre-check regular modes and MDLM modes, not including DM ,Planar, and LM
822
89.9k
      {
823
89.9k
        continue;
824
89.9k
      }
825
826
277k
      cu.intraDir[1]    = mode; // temporary assigned, for SATD checking.
827
828
277k
      const bool isLMCMode = CU::isLMCMode(mode);
829
277k
      if( isLMCMode )
830
73.3k
      {
831
73.3k
        predIntraChromaLM(COMP_Cb, predCb, cu, areaCb, mode);
832
73.3k
      }
833
203k
      else
834
203k
      {
835
203k
        initPredIntraParams(cu, cu.Cb(), *cs.sps);
836
203k
        predIntraAng(COMP_Cb, predCb, cu);
837
203k
      }
838
277k
      int64_t sadCb = distParamSadCb.distFunc(distParamSadCb) * 2;
839
277k
      int64_t satdCb = distParamSatdCb.distFunc(distParamSatdCb);
840
277k
      int64_t sad = std::min(sadCb, satdCb);
841
842
277k
      if( isLMCMode )
843
73.3k
      {
844
73.3k
        predIntraChromaLM(COMP_Cr, predCr, cu, areaCr, mode);
845
73.3k
      }
846
203k
      else
847
203k
      {
848
203k
        initPredIntraParams(cu, cu.Cr(), *cs.sps);
849
203k
        predIntraAng(COMP_Cr, predCr, cu);
850
203k
      }
851
277k
      int64_t sadCr = distParamSadCr.distFunc(distParamSadCr) * 2;
852
277k
      int64_t satdCr = distParamSatdCr.distFunc(distParamSatdCr);
853
277k
      sad += std::min(sadCr, satdCr);
854
277k
      satdSortedCost[idx] = sad;
855
277k
    }
856
857
    // sort the mode based on the cost from small to large.
858
462k
    for (int i = uiMinMode; i <= uiMaxMode - 1; i++)
859
411k
    {
860
1.84M
      for (int j = i + 1; j <= uiMaxMode - 1; j++)
861
1.43M
      {
862
1.43M
        if (satdSortedCost[j] < satdSortedCost[i])
863
88.3k
        {
864
88.3k
          std::swap( satdModeList[i],   satdModeList[j]);
865
88.3k
          std::swap( satdSortedCost[i], satdSortedCost[j]);
866
88.3k
        }
867
1.43M
      }
868
411k
    }
869
870
256k
    for (int i = 0; i < reducedModeNumber; i++)
871
205k
    {
872
205k
      modeDisable[satdModeList[uiMaxMode - 1 - i]] = true; // disable the last reducedModeNumber modes
873
205k
    }
874
875
51.3k
    int bestLfnstIdx = 0;
876
    // save the dist
877
51.3k
    Distortion baseDist = cs.dist;
878
51.3k
    int32_t bestbdpcmMode = 0;
879
51.3k
    uint32_t numbdpcmModes = ( useBDPCM && CU::bdpcmAllowed(cu, COMP_Cb)
880
34.6k
        && ((partitioner.chType == CH_C) || (cu.ispMode == 0 && cu.lfnstIdx == 0 && cu.firstTU->mtsIdx[COMP_Y] == MTS_SKIP))) ? 2 : 0;
881
531k
    for (int mode_cur = uiMinMode; mode_cur < (int)(uiMaxMode + numbdpcmModes); mode_cur++)
882
480k
    {
883
480k
      int mode = mode_cur;
884
480k
      if (mode_cur >= uiMaxMode)
885
69.3k
      {
886
69.3k
        mode = mode_cur > uiMaxMode ? -1 : -2; //set bdpcm mode
887
69.3k
        if ((mode == -1) && (saveCS.tus[0]->mtsIdx[COMP_Cb] != MTS_SKIP) && (saveCS.tus[0]->mtsIdx[COMP_Cr] != MTS_SKIP))
888
34.6k
        {
889
34.6k
          continue;
890
34.6k
        }
891
69.3k
      }
892
445k
      int chromaIntraMode;
893
445k
      if (mode < 0)
894
34.6k
      {
895
34.6k
        cu.bdpcmM[CH_C] = -mode;
896
34.6k
        chromaIntraMode = cu.bdpcmM[CH_C] == 2 ? chromaCandModes[1] : chromaCandModes[2];
897
34.6k
      }
898
411k
      else
899
411k
      {
900
411k
        cu.bdpcmM[CH_C] = 0;
901
411k
        chromaIntraMode = chromaCandModes[mode];
902
411k
        if (CU::isLMCMode(chromaIntraMode) && ( !CU::isLMCModeEnabled(cu, chromaIntraMode) || cu.slice->lmChromaCheckDisable ) )
903
44.0k
        {
904
44.0k
          continue;
905
44.0k
        }
906
366k
        if (modeDisable[chromaIntraMode] && CU::isLMCModeEnabled(cu, chromaIntraMode)) // when CCLM is disable, then MDLM is disable. not use satd checking
907
146k
        {
908
146k
          continue;
909
146k
        }
910
366k
      }
911
254k
      cs.dist = baseDist;
912
      //----- restore context models -----
913
254k
      m_CABACEstimator->getCtx() = ctxStart;
914
915
      //----- chroma coding -----
916
254k
      cu.intraDir[1] = chromaIntraMode;
917
254k
      m_ispTestedModes[0].IspType = ispType;
918
254k
      m_ispTestedModes[0].subTuCounter = -1;
919
254k
      xIntraChromaCodingQT( cs, partitioner );
920
254k
      if (lumaUsesISP && cs.dist == MAX_UINT)
921
0
      {
922
0
        continue;
923
0
      }
924
925
254k
      if (cs.sps->transformSkip)
926
254k
      {
927
254k
        m_CABACEstimator->getCtx() = ctxStart;
928
254k
      }
929
254k
      m_ispTestedModes[0].IspType = ispType;
930
254k
      m_ispTestedModes[0].subTuCounter = -1;
931
254k
      uint64_t fracBits   = xGetIntraFracBitsQT( cs, partitioner, false );
932
254k
      Distortion uiDist = cs.dist;
933
254k
      double    dCost   = m_pcRdCost->calcRdCost( fracBits, uiDist - baseDist );
934
935
      //----- compare -----
936
254k
      if( dCost < dBestCost )
937
92.4k
      {
938
92.4k
        if (lumaUsesISP && (dCost < bestCostSoFar))
939
0
        {
940
0
          bestCostSoFar = dCost;
941
0
        }
942
277k
        for( uint32_t i = getFirstComponentOfChannel( CH_C ); i < numberValidComponents; i++ )
943
184k
        {
944
184k
          const CompArea& area = cu.blocks[i];
945
184k
          saveCS.getRecoBuf     ( area ).copyFrom( cs.getRecoBuf   ( area ) );
946
184k
          cs.picture->getRecoBuf( area ).copyFrom( cs.getRecoBuf   ( area ) );
947
369k
          for( uint32_t j = 0; j < saveCS.tus.size(); j++ )
948
184k
          {
949
184k
            saveCS.tus[j]->copyComponentFrom( *orgTUs[j], area.compID );
950
184k
          }
951
184k
        }
952
92.4k
        dBestCost    = dCost;
953
92.4k
        uiBestDist   = uiDist;
954
92.4k
        uiBestMode   = chromaIntraMode;
955
92.4k
        bestLfnstIdx = cu.lfnstIdx;
956
92.4k
        bestbdpcmMode = cu.bdpcmM[CH_C];
957
958
92.4k
      }
959
254k
    }
960
51.3k
    cu.lfnstIdx = bestLfnstIdx;
961
51.3k
    cu.bdpcmM[CH_C]= bestbdpcmMode;
962
963
154k
    for( uint32_t i = getFirstComponentOfChannel( CH_C ); i < numberValidComponents; i++ )
964
102k
    {
965
102k
      const CompArea& area = cu.blocks[i];
966
967
102k
      cs.getRecoBuf         ( area ).copyFrom( saveCS.getRecoBuf( area ) );
968
102k
      cs.picture->getRecoBuf( area ).copyFrom( cs.getRecoBuf    ( area ) );
969
970
205k
      for( uint32_t j = 0; j < saveCS.tus.size(); j++ )
971
102k
      {
972
102k
        orgTUs[ j ]->copyComponentFrom( *saveCS.tus[ j ], area.compID );
973
102k
      }
974
102k
    }
975
51.3k
  }
976
51.3k
  cu.intraDir[1] = uiBestMode;
977
51.3k
  cs.dist        = uiBestDist;
978
979
  //----- restore context models -----
980
51.3k
  m_CABACEstimator->getCtx() = ctxStart;
981
51.3k
  if (lumaUsesISP && bestCostSoFar >= maxCostAllowed)
982
0
  {
983
0
    cu.ispMode = 0;
984
0
  }
985
51.3k
}
986
987
void IntraSearch::saveCuAreaCostInSCIPU( Area area, double cost )
988
0
{
989
0
  if( m_numCuInSCIPU < NUM_INTER_CU_INFO_SAVE )
990
0
  {
991
0
    m_cuAreaInSCIPU[m_numCuInSCIPU] = area;
992
0
    m_cuCostInSCIPU[m_numCuInSCIPU] = cost;
993
0
    m_numCuInSCIPU++;
994
0
  }
995
0
}
996
997
void IntraSearch::initCuAreaCostInSCIPU()
998
0
{
999
0
  for( int i = 0; i < NUM_INTER_CU_INFO_SAVE; i++ )
1000
0
  {
1001
0
    m_cuAreaInSCIPU[i] = Area();
1002
0
    m_cuCostInSCIPU[i] = 0;
1003
0
  }
1004
0
  m_numCuInSCIPU = 0;
1005
0
}
1006
// -------------------------------------------------------------------------------------------------------------------
1007
// Intra search
1008
// -------------------------------------------------------------------------------------------------------------------
1009
1010
void IntraSearch::xEncIntraHeader( CodingStructure &cs, Partitioner &partitioner, const bool luma )
1011
424k
{
1012
424k
  CodingUnit &cu = *cs.getCU( partitioner.chType, partitioner.treeType );
1013
1014
424k
  if (luma)
1015
169k
  {
1016
169k
    bool isFirst = cu.ispMode ? m_ispTestedModes[0].subTuCounter == 0 : partitioner.currArea().lumaPos() == cs.area.lumaPos();
1017
1018
    // CU header
1019
169k
    if( isFirst )
1020
165k
    {
1021
165k
      if ((!cs.slice->isIntra() || cs.slice->sps->IBC || cs.slice->sps->PLT) && cu.Y().valid())
1022
165k
      {
1023
165k
        m_CABACEstimator->pred_mode   ( cu );
1024
165k
      }
1025
165k
      m_CABACEstimator->bdpcm_mode  ( cu, ComponentID(partitioner.chType) );
1026
165k
    }
1027
1028
    // luma prediction mode
1029
169k
    if (isFirst)
1030
165k
    {
1031
165k
      if ( !cu.Y().valid())
1032
0
      {
1033
0
        m_CABACEstimator->pred_mode( cu );
1034
0
      }
1035
165k
      m_CABACEstimator->intra_luma_pred_mode( cu );
1036
165k
    }
1037
169k
  }
1038
254k
  else //  if (chroma)
1039
254k
  {
1040
254k
    bool isFirst = partitioner.currArea().Cb().valid() && partitioner.currArea().chromaPos() == cs.area.chromaPos();
1041
1042
254k
    if( isFirst )
1043
254k
    {
1044
254k
      m_CABACEstimator->bdpcm_mode(cu, ComponentID(CH_C));
1045
254k
      m_CABACEstimator->intra_chroma_pred_mode(  cu );
1046
254k
    }
1047
254k
  }
1048
424k
}
1049
1050
void IntraSearch::xEncSubdivCbfQT( CodingStructure &cs, Partitioner &partitioner, const bool luma )
1051
424k
{
1052
424k
  const UnitArea& currArea = partitioner.currArea();
1053
424k
  int subTuCounter = m_ispTestedModes[0].subTuCounter;
1054
424k
  TransformUnit  &currTU   = *cs.getTU(currArea.blocks[partitioner.chType], partitioner.chType, subTuCounter);
1055
424k
  CodingUnit     &currCU   = *currTU.cu;
1056
424k
  const uint32_t currDepth = partitioner.currTrDepth;
1057
424k
  const bool  subdiv = currTU.depth > currDepth;
1058
424k
  ComponentID compID = partitioner.chType == CH_L ? COMP_Y : COMP_Cb;
1059
1060
424k
  if (!luma)
1061
254k
  {
1062
254k
    const bool chromaCbfISP = currArea.blocks[COMP_Cb].valid() && currCU.ispMode && !subdiv;
1063
254k
    if (!currCU.ispMode || chromaCbfISP)
1064
254k
    {
1065
254k
      const uint32_t numberValidComponents = getNumberValidComponents(currArea.chromaFormat);
1066
254k
      const uint32_t cbfDepth = (chromaCbfISP ? currDepth - 1 : currDepth);
1067
1068
764k
      for (uint32_t ch = COMP_Cb; ch < numberValidComponents; ch++)
1069
509k
      {
1070
509k
        const ComponentID compID = ComponentID(ch);
1071
509k
        if (currDepth == 0 || TU::getCbfAtDepth(currTU, compID, currDepth - 1) || chromaCbfISP)
1072
509k
        {
1073
509k
          const bool prevCbf = (compID == COMP_Cr ? TU::getCbfAtDepth(currTU, COMP_Cb, currDepth) : false);
1074
509k
          m_CABACEstimator->cbf_comp(currCU, TU::getCbfAtDepth(currTU, compID, currDepth), currArea.blocks[compID], cbfDepth, prevCbf);
1075
509k
        }
1076
509k
      }
1077
254k
    }
1078
254k
  }
1079
1080
424k
  if (subdiv)
1081
0
  {
1082
0
    if (partitioner.canSplit(TU_MAX_TR_SPLIT, cs))
1083
0
    {
1084
0
      partitioner.splitCurrArea(TU_MAX_TR_SPLIT, cs);
1085
0
    }
1086
0
    else if (currCU.ispMode && isLuma(compID))
1087
0
    {
1088
0
      partitioner.splitCurrArea(m_ispTestedModes[0].IspType, cs);
1089
0
    }
1090
0
    else
1091
0
      THROW("Cannot perform an implicit split!");
1092
1093
0
    do
1094
0
    {
1095
0
      xEncSubdivCbfQT(cs, partitioner, luma);   //?
1096
0
      subTuCounter += subTuCounter != -1 ? 1 : 0;
1097
0
    } while (partitioner.nextPart(cs));
1098
1099
0
    partitioner.exitCurrSplit();
1100
0
  }
1101
424k
  else
1102
424k
  {
1103
    //===== Cbfs =====
1104
424k
    if (luma)
1105
169k
    {
1106
169k
      bool previousCbf = false;
1107
169k
      bool lastCbfIsInferred = false;
1108
169k
      if (m_ispTestedModes[0].IspType != TU_NO_ISP)
1109
13.3k
      {
1110
13.3k
        bool     rootCbfSoFar = false;
1111
13.3k
        uint32_t nTus = currCU.ispMode == HOR_INTRA_SUBPARTITIONS ? currCU.lheight() >> floorLog2(currTU.lheight())
1112
13.3k
          : currCU.lwidth() >> floorLog2(currTU.lwidth());
1113
13.3k
        if (subTuCounter == nTus - 1)
1114
1.33k
        {
1115
1.33k
          TransformUnit* tuPointer = currCU.firstTU;
1116
5.33k
          for (int tuIdx = 0; tuIdx < nTus - 1; tuIdx++)
1117
3.99k
          {
1118
3.99k
            rootCbfSoFar |= TU::getCbfAtDepth(*tuPointer, COMP_Y, currDepth);
1119
3.99k
            tuPointer = tuPointer->next;
1120
3.99k
          }
1121
1.33k
          if (!rootCbfSoFar)
1122
0
          {
1123
0
            lastCbfIsInferred = true;
1124
0
          }
1125
1.33k
        }
1126
13.3k
        if (!lastCbfIsInferred)
1127
13.3k
        {
1128
13.3k
          previousCbf = TU::getPrevTuCbfAtDepth(currTU, COMP_Y, partitioner.currTrDepth);
1129
13.3k
        }
1130
13.3k
      }
1131
169k
      if (!lastCbfIsInferred)
1132
169k
      {
1133
169k
        m_CABACEstimator->cbf_comp(currCU, TU::getCbfAtDepth(currTU, COMP_Y, currDepth), currTU.Y(), currTU.depth, previousCbf, currCU.ispMode);
1134
169k
      }
1135
169k
    }
1136
424k
  }
1137
424k
}
1138
void IntraSearch::xEncCoeffQT(CodingStructure& cs, Partitioner& partitioner, const ComponentID compID, CUCtx* cuCtx, const int subTuIdx, const PartSplit ispType)
1139
679k
{
1140
679k
  const UnitArea& currArea  = partitioner.currArea();
1141
1142
679k
  int subTuCounter          = m_ispTestedModes[0].subTuCounter;
1143
679k
  TransformUnit& currTU     = *cs.getTU(currArea.blocks[partitioner.chType], partitioner.chType, subTuCounter);
1144
679k
  uint32_t   currDepth      = partitioner.currTrDepth;
1145
679k
  const bool subdiv         = currTU.depth > currDepth;
1146
1147
679k
  if (subdiv)
1148
0
  {
1149
0
    if (partitioner.canSplit(TU_MAX_TR_SPLIT, cs))
1150
0
    {
1151
0
      partitioner.splitCurrArea(TU_MAX_TR_SPLIT, cs);
1152
0
    }
1153
0
    else if (currTU.cu->ispMode)
1154
0
    {
1155
0
      partitioner.splitCurrArea(m_ispTestedModes[0].IspType, cs);
1156
0
    }
1157
0
    else
1158
0
      THROW("Implicit TU split not available!");
1159
1160
0
    do
1161
0
    {
1162
0
      xEncCoeffQT(cs, partitioner, compID, cuCtx, subTuCounter, m_ispTestedModes[0].IspType);
1163
0
      subTuCounter += subTuCounter != -1 ? 1 : 0;
1164
0
    } while( partitioner.nextPart( cs ) );
1165
1166
0
    partitioner.exitCurrSplit();
1167
0
  }
1168
679k
  else
1169
1170
679k
  if( currArea.blocks[compID].valid() )
1171
679k
  {
1172
679k
    if( compID == COMP_Cr )
1173
254k
    {
1174
254k
      const int cbfMask = ( TU::getCbf( currTU, COMP_Cb ) ? 2 : 0 ) + ( TU::getCbf( currTU, COMP_Cr ) ? 1 : 0 );
1175
254k
      m_CABACEstimator->joint_cb_cr( currTU, cbfMask );
1176
254k
    }
1177
679k
    if( TU::getCbf( currTU, compID ) )
1178
207k
    {
1179
207k
      if( isLuma(compID) )
1180
23.2k
      {
1181
23.2k
        m_CABACEstimator->residual_coding( currTU, compID, cuCtx );
1182
23.2k
        m_CABACEstimator->mts_idx( *currTU.cu, cuCtx );
1183
23.2k
      }
1184
183k
      else
1185
183k
        m_CABACEstimator->residual_coding( currTU, compID );
1186
207k
    }
1187
679k
  }
1188
679k
}
1189
1190
uint64_t IntraSearch::xGetIntraFracBitsQT( CodingStructure &cs, Partitioner &partitioner, const bool luma, CUCtx *cuCtx )
1191
424k
{
1192
424k
  m_CABACEstimator->resetBits();
1193
1194
424k
  xEncIntraHeader( cs, partitioner, luma );
1195
424k
  xEncSubdivCbfQT( cs, partitioner, luma );
1196
1197
424k
  if( luma )
1198
169k
  {
1199
169k
    xEncCoeffQT( cs, partitioner, COMP_Y, cuCtx );
1200
1201
169k
    CodingUnit &cu = *cs.cus[0];
1202
169k
    if (cuCtx /*&& CU::isSepTree(cu)*/
1203
107k
      && (!cu.ispMode || (cu.lfnstIdx && m_ispTestedModes[0].subTuCounter == 0)
1204
8.60k
        || (!cu.lfnstIdx
1205
7.28k
          && m_ispTestedModes[0].subTuCounter == m_ispTestedModes[cu.lfnstIdx].numTotalParts[cu.ispMode - 1] - 1)))
1206
99.6k
    {
1207
99.6k
      m_CABACEstimator->residual_lfnst_mode( cu, *cuCtx );
1208
99.6k
    }
1209
169k
  }
1210
254k
  else
1211
254k
  {
1212
254k
    xEncCoeffQT( cs, partitioner, COMP_Cb );
1213
254k
    xEncCoeffQT( cs, partitioner, COMP_Cr );
1214
254k
  }
1215
1216
424k
  uint64_t fracBits = m_CABACEstimator->getEstFracBits();
1217
424k
  return fracBits;
1218
424k
}
1219
1220
uint64_t IntraSearch::xGetIntraFracBitsQTChroma(const TransformUnit& currTU, const ComponentID compID, CUCtx *cuCtx)
1221
1.58M
{
1222
1.58M
  m_CABACEstimator->resetBits();
1223
1224
1.58M
  if ( currTU.jointCbCr )
1225
236k
  {
1226
236k
    const int cbfMask = ( TU::getCbf( currTU, COMP_Cb ) ? 2 : 0 ) + ( TU::getCbf( currTU, COMP_Cr ) ? 1 : 0 );
1227
236k
    m_CABACEstimator->cbf_comp( *currTU.cu, cbfMask>>1, currTU.blocks[ COMP_Cb ], currTU.depth, false );
1228
236k
    m_CABACEstimator->cbf_comp( *currTU.cu, cbfMask &1, currTU.blocks[ COMP_Cr ], currTU.depth, cbfMask>>1 );
1229
236k
    if( cbfMask )
1230
236k
      m_CABACEstimator->joint_cb_cr( currTU, cbfMask );
1231
236k
    if (cbfMask >> 1)
1232
235k
      m_CABACEstimator->residual_coding( currTU, COMP_Cb, cuCtx );
1233
236k
    if (cbfMask & 1)
1234
236k
      m_CABACEstimator->residual_coding( currTU, COMP_Cr, cuCtx );
1235
236k
  }
1236
1.35M
  else
1237
1.35M
  {
1238
1.35M
    if ( compID == COMP_Cb )
1239
675k
      m_CABACEstimator->cbf_comp( *currTU.cu, TU::getCbf( currTU, compID ), currTU.blocks[ compID ], currTU.depth, false );
1240
675k
    else
1241
675k
    {
1242
675k
      const bool cbCbf    = TU::getCbf( currTU, COMP_Cb );
1243
675k
      const bool crCbf    = TU::getCbf( currTU, compID );
1244
675k
      const int  cbfMask  = ( cbCbf ? 2 : 0 ) + ( crCbf ? 1 : 0 );
1245
675k
      m_CABACEstimator->cbf_comp( *currTU.cu, crCbf, currTU.blocks[ compID ], currTU.depth, cbCbf );
1246
675k
      m_CABACEstimator->joint_cb_cr( currTU, cbfMask );
1247
675k
    }
1248
1.35M
  }
1249
1250
1.58M
  if( !currTU.jointCbCr && TU::getCbf( currTU, compID ) )
1251
476k
  {
1252
476k
    m_CABACEstimator->residual_coding( currTU, compID, cuCtx );
1253
476k
  }
1254
1255
1.58M
  uint64_t fracBits = m_CABACEstimator->getEstFracBits();
1256
1.58M
  return fracBits;
1257
1.58M
}
1258
1259
void IntraSearch::xIntraCodingTUBlock(TransformUnit &tu, const ComponentID compID, const bool checkCrossCPrediction, Distortion &ruiDist, uint32_t *numSig, PelUnitBuf *predBuf, const bool loadTr)
1260
1.76M
{
1261
1.76M
  if (!tu.blocks[compID].valid())
1262
0
  {
1263
0
    return;
1264
0
  }
1265
1266
1.76M
  CodingStructure &cs             = *tu.cs;
1267
1.76M
  const CompArea      &area       = tu.blocks[compID];
1268
1.76M
  const SPS           &sps        = *cs.sps;
1269
1.76M
  const ReshapeData&  reshapeData = cs.picture->reshapeData;
1270
1271
1.76M
  const ChannelType    chType     = toChannelType(compID);
1272
1.76M
  const int            bitDepth   = sps.bitDepths[chType];
1273
1274
1.76M
  CPelBuf        piOrg            = cs.getOrgBuf    (area);
1275
1.76M
  PelBuf         piPred           = cs.getPredBuf   (area);
1276
1.76M
  PelBuf         piResi           = cs.getResiBuf   (area);
1277
1.76M
  PelBuf         piReco           = cs.getRecoBuf   (area);
1278
1279
1.76M
  const CodingUnit& cu            = *tu.cu;
1280
1281
  //===== init availability pattern =====
1282
1.76M
  CHECK( tu.jointCbCr && compID == COMP_Cr, "wrong combination of compID and jointCbCr" );
1283
1.76M
  bool jointCbCr = tu.jointCbCr && compID == COMP_Cb;
1284
1285
1.76M
  if ( isLuma(compID) )
1286
174k
  {
1287
174k
    bool predRegDiffFromTB = CU::isPredRegDiffFromTB(*tu.cu );
1288
174k
    bool firstTBInPredReg  = false;
1289
174k
    CompArea areaPredReg(COMP_Y, tu.chromaFormat, area);
1290
174k
    if (tu.cu->ispMode )
1291
18.1k
    {
1292
18.1k
      firstTBInPredReg = CU::isFirstTBInPredReg(*tu.cu, area);
1293
18.1k
      if (predRegDiffFromTB)
1294
0
      {
1295
0
        if (firstTBInPredReg)
1296
0
        {
1297
0
          CU::adjustPredArea(areaPredReg);
1298
0
          initIntraPatternChTypeISP(*tu.cu, areaPredReg, piReco);
1299
0
        }
1300
0
      }
1301
18.1k
      else
1302
18.1k
        initIntraPatternChTypeISP(*tu.cu, area, piReco);
1303
18.1k
    }
1304
155k
    else if( !predBuf )
1305
26.9k
    {
1306
26.9k
      initIntraPatternChType(*tu.cu, area);
1307
26.9k
    }
1308
1309
    //===== get prediction signal =====
1310
174k
    if (predRegDiffFromTB)
1311
0
    {
1312
0
      if (firstTBInPredReg)
1313
0
      {
1314
0
        PelBuf piPredReg = cs.getPredBuf(areaPredReg);
1315
0
        predIntraAng(compID, piPredReg, cu);
1316
0
      }
1317
0
    }
1318
174k
    else
1319
174k
    {
1320
174k
      if( predBuf )
1321
128k
      {
1322
128k
        piPred.copyFrom( predBuf->Y() );
1323
128k
      }
1324
45.1k
      else if( CU::isMIP( cu, CH_L ) )
1325
19.7k
      {
1326
19.7k
        initIntraMip( cu );
1327
19.7k
        predIntraMip( piPred, cu );
1328
19.7k
      }
1329
25.3k
      else
1330
25.3k
      {
1331
25.3k
        predIntraAng(compID, piPred, cu);
1332
25.3k
      }
1333
174k
    }
1334
174k
  }
1335
1.76M
  DTRACE( g_trace_ctx, D_PRED, "@(%4d,%4d) [%2dx%2d] IMode=%d\n", tu.lx(), tu.ly(), tu.lwidth(), tu.lheight(), CU::getFinalIntraMode(cu, chType) );
1336
1.76M
  const Slice &slice = *cs.slice;
1337
1.76M
  bool flag = cs.picHeader->lmcsEnabled && (slice.isIntra() || (!slice.isIntra() && reshapeData.getCTUFlag()));
1338
1339
1.76M
  if (isLuma(compID))
1340
174k
  {
1341
    //===== get residual signal =====
1342
174k
    if (cs.picHeader->lmcsEnabled && reshapeData.getCTUFlag() )
1343
0
    {
1344
0
      piResi.subtract(cs.getRspOrgBuf(area), piPred);
1345
0
    }
1346
174k
    else
1347
174k
    {
1348
174k
      piResi.subtract( piOrg, piPred );
1349
174k
    }
1350
174k
  }
1351
1352
  //===== transform and quantization =====
1353
  //--- init rate estimation arrays for RDOQ ---
1354
  //--- transform and quantization           ---
1355
1.76M
  TCoeff uiAbsSum = 0;
1356
1.76M
  const QpParam cQP(tu, compID);
1357
1358
1.76M
  m_pcTrQuant->selectLambda(compID);
1359
1360
1.76M
  flag =flag && (tu.blocks[compID].width*tu.blocks[compID].height > 4);
1361
1.76M
  if (flag && isChroma(compID) && cs.picHeader->lmcsChromaResidualScale )
1362
0
  {
1363
0
    int cResScaleInv = tu.chromaAdj;
1364
0
    double cRescale = (double)(1 << CSCALE_FP_PREC) / (double)cResScaleInv;
1365
0
    m_pcTrQuant->scaleLambda( 1.0/(cRescale*cRescale) );
1366
0
  }
1367
1368
1.76M
  if ( jointCbCr )
1369
239k
  {
1370
    // Lambda is loosened for the joint mode with respect to single modes as the same residual is used for both chroma blocks
1371
239k
    const int    absIct = abs( TU::getICTMode(tu) );
1372
239k
    const double lfact  = ( absIct == 1 || absIct == 3 ? 0.8 : 0.5 );
1373
239k
    m_pcTrQuant->scaleLambda( lfact );
1374
239k
  }
1375
1.76M
  if ( sps.jointCbCr && isChroma(compID) && (tu.cu->cs->slice->sliceQp > 18) )
1376
1.04M
  {
1377
1.04M
    m_pcTrQuant->scaleLambda( 1.3 );
1378
1.04M
  }
1379
1380
1.76M
  if( isLuma(compID) )
1381
174k
  {
1382
174k
    m_pcTrQuant->transformNxN(tu, compID, cQP, uiAbsSum, m_CABACEstimator->getCtx(), loadTr);
1383
1384
174k
    DTRACE( g_trace_ctx, D_TU_ABS_SUM, "%d: comp=%d, abssum=%d\n", DTRACE_GET_COUNTER( g_trace_ctx, D_TU_ABS_SUM ), compID, uiAbsSum );
1385
174k
    if (tu.cu->ispMode && isLuma(compID) && CU::isISPLast(*tu.cu, area, area.compID) && CU::allLumaCBFsAreZero(*tu.cu))
1386
0
    {
1387
      // ISP has to have at least one non-zero CBF
1388
0
      ruiDist = MAX_INT;
1389
0
      return;
1390
0
    }
1391
    //--- inverse transform ---
1392
174k
    if (uiAbsSum > 0)
1393
27.9k
    {
1394
27.9k
      m_pcTrQuant->invTransformNxN(tu, compID, piResi, cQP);
1395
27.9k
    }
1396
146k
    else
1397
146k
    {
1398
146k
      piResi.fill(0);
1399
146k
    }
1400
174k
  }
1401
1.59M
  else // chroma
1402
1.59M
  {
1403
1.59M
    PelBuf          crPred = cs.getPredBuf ( COMP_Cr );
1404
1.59M
    PelBuf          crResi = cs.getResiBuf ( COMP_Cr );
1405
1.59M
    PelBuf          crReco = cs.getRecoBuf ( COMP_Cr );
1406
1407
1.59M
    int         codedCbfMask  = 0;
1408
1.59M
    ComponentID codeCompId    = (tu.jointCbCr ? (tu.jointCbCr >> 1 ? COMP_Cb : COMP_Cr) : compID);
1409
1.59M
    const QpParam qpCbCr(tu, codeCompId);
1410
1411
1.59M
    if( tu.jointCbCr )
1412
239k
    {
1413
239k
      ComponentID otherCompId = ( codeCompId==COMP_Cr ? COMP_Cb : COMP_Cr );
1414
239k
      tu.getCoeffs( otherCompId ).fill(0); // do we need that?
1415
239k
      TU::setCbfAtDepth (tu, otherCompId, tu.depth, false );
1416
239k
    }
1417
1.59M
    PelBuf& codeResi = ( codeCompId == COMP_Cr ? crResi : piResi );
1418
1.59M
    uiAbsSum = 0;
1419
1.59M
    m_pcTrQuant->transformNxN(tu, codeCompId, qpCbCr, uiAbsSum, m_CABACEstimator->getCtx(), loadTr);
1420
1.59M
    DTRACE( g_trace_ctx, D_TU_ABS_SUM, "%d: comp=%d, abssum=%d\n", DTRACE_GET_COUNTER( g_trace_ctx, D_TU_ABS_SUM ), codeCompId, uiAbsSum );
1421
1.59M
    if( uiAbsSum > 0 )
1422
712k
    {
1423
712k
      m_pcTrQuant->invTransformNxN(tu, codeCompId, codeResi, qpCbCr);
1424
712k
      codedCbfMask += ( codeCompId == COMP_Cb ? 2 : 1 );
1425
712k
    }
1426
878k
    else
1427
878k
    {
1428
878k
      codeResi.fill(0);
1429
878k
    }
1430
1431
1.59M
    if( tu.jointCbCr )
1432
239k
    {
1433
239k
      if( tu.jointCbCr == 3 && codedCbfMask == 2 )
1434
235k
      {
1435
235k
        codedCbfMask = 3;
1436
235k
        TU::setCbfAtDepth (tu, COMP_Cr, tu.depth, true );
1437
235k
      }
1438
239k
      if( tu.jointCbCr != codedCbfMask )
1439
2.87k
      {
1440
2.87k
        ruiDist = MAX_DISTORTION;
1441
2.87k
        return;
1442
2.87k
      }
1443
236k
      m_pcTrQuant->invTransformICT( tu, piResi, crResi );
1444
236k
      uiAbsSum = codedCbfMask;
1445
236k
    }
1446
1447
    //===== reconstruction =====
1448
1.58M
    if ( flag && uiAbsSum > 0 && cs.picHeader->lmcsChromaResidualScale )
1449
0
    {
1450
0
      piResi.scaleSignal(tu.chromaAdj, 0, slice.clpRngs[compID]);
1451
1452
0
      if( jointCbCr )
1453
0
      {
1454
0
        crResi.scaleSignal(tu.chromaAdj, 0, slice.clpRngs[COMP_Cr]);
1455
0
      }
1456
0
    }
1457
1458
1.58M
    if( jointCbCr )
1459
236k
    {
1460
236k
      crReco.reconstruct(crPred, crResi, cs.slice->clpRngs[ COMP_Cr ]);
1461
236k
    }
1462
1.58M
  }
1463
1.76M
  piReco.reconstruct(piPred, piResi, cs.slice->clpRngs[ compID ]);
1464
  
1465
1466
1467
  //===== update distortion =====
1468
1.76M
  const bool reshapeIntraCMD = m_pcEncCfg->m_reshapeSignalType == RESHAPE_SIGNAL_PQ;
1469
1.76M
  if(((cs.picHeader->lmcsEnabled && (reshapeData.getCTUFlag() || (isChroma(compID) && reshapeIntraCMD))) || m_pcEncCfg->m_lumaLevelToDeltaQPEnabled ) )
1470
0
  {
1471
0
    const CPelBuf orgLuma = cs.getOrgBuf( cs.area.blocks[COMP_Y] );
1472
0
    if( compID == COMP_Y && !m_pcEncCfg->m_lumaLevelToDeltaQPEnabled )
1473
0
    {
1474
0
      PelBuf tmpRecLuma = cs.getRspRecoBuf(area);
1475
0
      tmpRecLuma.rspSignal( piReco, reshapeData.getInvLUT());
1476
0
      ruiDist += m_pcRdCost->getDistPart(piOrg, tmpRecLuma, sps.bitDepths[toChannelType(compID)], compID, DF_SSE_WTD, &orgLuma);
1477
0
    }
1478
0
    else
1479
0
    {
1480
0
      ruiDist += m_pcRdCost->getDistPart( piOrg, piReco, bitDepth, compID, DF_SSE_WTD, &orgLuma );
1481
0
      if( jointCbCr )
1482
0
      {
1483
0
        CPelBuf         crOrg  = cs.getOrgBuf  ( COMP_Cr );
1484
0
        PelBuf          crReco = cs.getRecoBuf ( COMP_Cr );
1485
0
        ruiDist += m_pcRdCost->getDistPart( crOrg, crReco, bitDepth, COMP_Cr, DF_SSE_WTD, &orgLuma );
1486
0
      }
1487
0
    }
1488
0
  }
1489
1.76M
  else
1490
1.76M
  {
1491
1.76M
    ruiDist += m_pcRdCost->getDistPart( piOrg, piReco, bitDepth, compID, DF_SSE );
1492
1.76M
    if( jointCbCr )
1493
236k
    {
1494
236k
      CPelBuf         crOrg  = cs.getOrgBuf  ( COMP_Cr );
1495
236k
      PelBuf          crReco = cs.getRecoBuf ( COMP_Cr );
1496
236k
      ruiDist += m_pcRdCost->getDistPart( crOrg, crReco, bitDepth, COMP_Cr, DF_SSE );
1497
236k
    }
1498
1.76M
  }
1499
1.76M
}
1500
1501
void IntraSearch::xIntraCodingLumaQT(CodingStructure& cs, Partitioner& partitioner, PelUnitBuf* predBuf, const double bestCostSoFar, int numMode, bool disableMTS)
1502
104k
{
1503
104k
  PROFILER_SCOPE_AND_STAGE_EXT( 0, _TPROF, P_INTRA_RD_SEARCH_LUMA, &cs, partitioner.chType );
1504
104k
  const UnitArea& currArea  = partitioner.currArea();
1505
104k
  uint32_t        currDepth = partitioner.currTrDepth;
1506
104k
  Distortion singleDistLuma = 0;
1507
104k
  uint32_t   numSig         = 0;
1508
104k
  const SPS &sps            = *cs.sps;
1509
104k
  CodingUnit &cu            = *cs.cus[0];
1510
104k
  bool mtsAllowed = (numMode < 0) || disableMTS ? false : CU::isMTSAllowed(cu, COMP_Y);
1511
104k
  uint64_t singleFracBits   = 0;
1512
104k
  bool   splitCbfLumaSum    = false;
1513
104k
  double bestCostForISP     = bestCostSoFar;
1514
104k
  double dSingleCost        = MAX_DOUBLE;
1515
104k
  int endLfnstIdx           = (partitioner.isSepTree(cs) && partitioner.chType == CH_C && (currArea.lwidth() < 8 || currArea.lheight() < 8))
1516
104k
                           || (currArea.lwidth() > sps.getMaxTbSize() || currArea.lheight() > sps.getMaxTbSize()) || !sps.LFNST || (numMode < 0) ? 0 : 2;
1517
104k
  const bool useTS          = cs.picture->useTS;
1518
104k
  numMode                   = (numMode < 0) ? -numMode : numMode;
1519
1520
104k
  if (cu.mipFlag && !allowLfnstWithMip(cu.lumaSize()))
1521
1.89k
  {
1522
1.89k
    endLfnstIdx = 0;
1523
1.89k
  }
1524
104k
  int bestMTS = 0;
1525
104k
  int EndMTS  = mtsAllowed ? m_pcEncCfg->m_MTSIntraMaxCand : 0;
1526
104k
  if (cu.ispMode && (EndMTS || endLfnstIdx))
1527
4.85k
  {
1528
4.85k
    EndMTS = 0;
1529
4.85k
    if ((m_ispTestedModes[1].numTotalParts[cu.ispMode - 1] == 0)
1530
293
     && (m_ispTestedModes[2].numTotalParts[cu.ispMode - 1] == 0))
1531
293
    {
1532
293
      endLfnstIdx = 0;
1533
293
    }
1534
4.85k
  }
1535
104k
  if (cu.bdpcmM[CH_L])
1536
7.02k
  {
1537
7.02k
    endLfnstIdx = 0;
1538
7.02k
    EndMTS = 0;
1539
7.02k
  }
1540
104k
  bool checkTransformSkip = sps.transformSkip;
1541
1542
104k
  SizeType transformSkipMaxSize = 1 << sps.log2MaxTransformSkipBlockSize;
1543
104k
  bool tsAllowed = useTS  && cu.cs->sps->transformSkip && (!cu.ispMode) && (!cu.bdpcmM[CH_L]) && (!cu.sbtInfo);
1544
104k
  tsAllowed &= cu.blocks[COMP_Y].width <= transformSkipMaxSize && cu.blocks[COMP_Y].height <= transformSkipMaxSize;
1545
104k
  if (tsAllowed)
1546
14.1k
  {
1547
14.1k
    EndMTS += 1;
1548
14.1k
  }
1549
104k
  if (endLfnstIdx || EndMTS)
1550
42.6k
  {
1551
42.6k
    bool       splitCbfLuma  = false;
1552
42.6k
    const PartSplit ispType  = CU::getISPType(cu, COMP_Y);
1553
42.6k
    CUCtx cuCtx;
1554
42.6k
    cuCtx.isDQPCoded         = true;
1555
42.6k
    cuCtx.isChromaQpAdjCoded = true;
1556
42.6k
    cs.cost                  = 0.0;
1557
42.6k
    Distortion       singleDistTmpLuma = 0;
1558
42.6k
    uint64_t         singleTmpFracBits = 0;
1559
42.6k
    double           singleCostTmp     = 0;
1560
42.6k
    const TempCtx    ctxStart          (m_CtxCache, m_CABACEstimator->getCtx());
1561
42.6k
          TempCtx    ctxBest           (m_CtxCache);
1562
42.6k
    CodingStructure &saveCS            = *m_pSaveCS[cu.ispMode?0:1];
1563
42.6k
    TransformUnit *  tmpTU             = nullptr;
1564
42.6k
    int              bestLfnstIdx      = 0;
1565
42.6k
    int              startLfnstIdx     = 0;
1566
    // speedUps LFNST
1567
42.6k
    bool   rapidLFNST                  = false;
1568
42.6k
    bool   rapidDCT                    = false;
1569
42.6k
    double thresholdDCT                = 1;
1570
1571
42.6k
    if (m_pcEncCfg->m_MTS == 2)
1572
0
    {
1573
0
      thresholdDCT += 1.4 / sqrt(cu.lwidth() * cu.lheight());
1574
0
    }
1575
1576
42.6k
    if (m_pcEncCfg->m_LFNST > 1)
1577
0
    {
1578
0
      rapidLFNST = true;
1579
1580
0
      if (m_pcEncCfg->m_LFNST > 2)
1581
0
      {
1582
0
        rapidDCT    = true;
1583
0
        endLfnstIdx = endLfnstIdx ? 1 : 0;
1584
0
      }
1585
0
    }
1586
1587
42.6k
    saveCS.pcv              = cs.pcv;
1588
42.6k
    saveCS.picture          = cs.picture;
1589
42.6k
    saveCS.area.repositionTo( cs.area);
1590
1591
42.6k
    if (cu.ispMode)
1592
4.56k
    {
1593
4.56k
      partitioner.splitCurrArea(ispType, cs);
1594
4.56k
    }
1595
1596
42.6k
    TransformUnit& tu = cs.addTU(CS::getArea(cs, partitioner.currArea(), partitioner.chType, partitioner.treeType), partitioner.chType, cs.cus[0]);
1597
1598
42.6k
    if (cu.ispMode)
1599
4.56k
    {
1600
4.56k
      saveCS.clearTUs();
1601
4.56k
      do
1602
18.2k
      {
1603
18.2k
        saveCS.addTU(
1604
18.2k
          CS::getArea(cs, partitioner.currArea(), partitioner.chType, partitioner.treeType),
1605
18.2k
          partitioner.chType, cs.cus[0]);
1606
18.2k
      } while (partitioner.nextPart(cs));
1607
1608
4.56k
      partitioner.exitCurrSplit();
1609
4.56k
    }
1610
38.0k
    else
1611
38.0k
    {
1612
38.0k
      tmpTU = saveCS.tus.empty() ? &saveCS.addTU( currArea, partitioner.chType, nullptr ) : saveCS.tus.front();
1613
38.0k
      tmpTU->initData();
1614
38.0k
      tmpTU->UnitArea::operator=( currArea );
1615
38.0k
    }
1616
1617
1618
42.6k
    std::vector<TrMode> trModes{ TrMode(0, true) };
1619
42.6k
    if (tsAllowed)
1620
14.1k
    {
1621
14.1k
      trModes.push_back(TrMode(1, true));
1622
14.1k
    }
1623
42.6k
    double dct2Cost           = MAX_DOUBLE;
1624
42.6k
    double trGrpStopThreshold = 1.001;
1625
42.6k
    double trGrpBestCost      = MAX_DOUBLE;
1626
1627
42.6k
    if (mtsAllowed)
1628
0
    {
1629
0
      if (m_pcEncCfg->m_LFNST)
1630
0
      {
1631
0
        uint32_t uiIntraMode = cs.cus[0]->intraDir[partitioner.chType];
1632
0
        int MTScur           = (uiIntraMode < 34) ? MTS_DST7_DCT8 : MTS_DCT8_DST7;
1633
1634
0
        trModes.push_back(TrMode(     2, true));
1635
0
        trModes.push_back(TrMode(MTScur, true));
1636
1637
0
        MTScur = (uiIntraMode < 34) ? MTS_DCT8_DST7 : MTS_DST7_DCT8;
1638
1639
0
        trModes.push_back(TrMode(MTScur,            true));
1640
0
        trModes.push_back(TrMode(MTS_DST7_DST7 + 3, true));
1641
0
      }
1642
0
      else
1643
0
      {
1644
0
        for (int i = 2; i < 6; i++)
1645
0
        {
1646
0
          trModes.push_back(TrMode(i, true));
1647
0
        }
1648
0
      }
1649
0
    }
1650
1651
42.6k
    if ((EndMTS && !m_pcEncCfg->m_LFNST) || (tsAllowed && !mtsAllowed))
1652
14.1k
    {
1653
14.1k
      xPreCheckMTS(tu, &trModes, m_pcEncCfg->m_MTSIntraMaxCand, predBuf);
1654
14.1k
      if (!mtsAllowed && !trModes[1].second)
1655
2.77k
      {
1656
2.77k
        EndMTS = 0;
1657
2.77k
      }
1658
14.1k
    }
1659
1660
42.6k
    bool NStopMTS = true;
1661
1662
85.2k
    for (int modeId = 0; modeId <= EndMTS && NStopMTS; modeId++)
1663
42.6k
    {
1664
42.6k
      if (modeId > 1)
1665
0
      {
1666
0
        trGrpBestCost = MAX_DOUBLE;
1667
0
      }
1668
150k
      for (int lfnstIdx = startLfnstIdx; lfnstIdx <= endLfnstIdx; lfnstIdx++)
1669
107k
      {
1670
107k
        if (lfnstIdx && modeId)
1671
0
        {
1672
0
          continue;
1673
0
        }
1674
107k
        if (mtsAllowed || tsAllowed)
1675
22.1k
        {
1676
22.1k
          if (m_pcEncCfg->m_TS && bestMTS == MTS_SKIP)
1677
0
          {
1678
0
            break;
1679
0
          }
1680
22.1k
          if (!m_pcEncCfg->m_LFNST && !trModes[modeId].second && mtsAllowed)
1681
0
          {
1682
0
            continue;
1683
0
          }
1684
1685
22.1k
          tu.mtsIdx[COMP_Y] = trModes[modeId].first;
1686
22.1k
        }
1687
1688
107k
        if (cu.ispMode && lfnstIdx)
1689
9.12k
        {
1690
9.12k
          if (m_ispTestedModes[lfnstIdx].numTotalParts[cu.ispMode - 1] == 0)
1691
0
          {
1692
0
            if (lfnstIdx == 2)
1693
0
            {
1694
0
              endLfnstIdx = 1;
1695
0
            }
1696
0
            continue;
1697
0
          }
1698
9.12k
        }
1699
1700
107k
        cu.lfnstIdx                          = lfnstIdx;
1701
107k
        cuCtx.lfnstLastScanPos               = false;
1702
107k
        cuCtx.violatesLfnstConstrained[CH_L] = false;
1703
107k
        cuCtx.violatesLfnstConstrained[CH_C] = false;
1704
1705
107k
        if ((lfnstIdx != startLfnstIdx) || (modeId))
1706
64.9k
        {
1707
64.9k
          m_CABACEstimator->getCtx() = ctxStart;
1708
64.9k
        }
1709
1710
107k
        singleDistTmpLuma = 0;
1711
1712
107k
        if (cu.ispMode)
1713
13.6k
        {
1714
13.6k
          splitCbfLuma = false;
1715
1716
13.6k
          partitioner.splitCurrArea(ispType, cs);
1717
1718
13.6k
          singleCostTmp = xTestISP(cs, partitioner, bestCostForISP, ispType, splitCbfLuma, singleTmpFracBits, singleDistTmpLuma, cuCtx);
1719
1720
13.6k
          partitioner.exitCurrSplit();
1721
1722
13.6k
          if (modeId && (singleCostTmp == MAX_DOUBLE))
1723
0
          {
1724
0
            m_ispTestedModes[lfnstIdx].numTotalParts[cu.ispMode - 1] = 0;
1725
0
          }
1726
1727
13.6k
          bool storeCost = (numMode == 1) ? true : false;
1728
1729
13.6k
          if ((m_pcEncCfg->m_ISP >= 2) && (numMode <= 1))
1730
13.6k
          {
1731
13.6k
            storeCost = true;
1732
13.6k
          }
1733
1734
13.6k
          if (storeCost)
1735
13.6k
          {
1736
13.6k
            m_ispTestedModes[0].bestCost[cu.ispMode - 1] = singleCostTmp;
1737
13.6k
          }
1738
13.6k
        }
1739
93.9k
        else
1740
93.9k
        {
1741
93.9k
          bool TrLoad = (EndMTS && !m_pcEncCfg->m_LFNST) || (tsAllowed && !mtsAllowed && (lfnstIdx == 0)) ? true : false;
1742
1743
93.9k
          xIntraCodingTUBlock(tu, COMP_Y, false, singleDistTmpLuma, &numSig, predBuf, TrLoad);
1744
1745
93.9k
          cuCtx.mtsLastScanPos = false;
1746
          //----- determine rate and r-d cost -----
1747
18.4E
        if ((sps.LFNST ? (modeId == EndMTS && modeId != 0 && checkTransformSkip) : (trModes[modeId].first != 0)) && !TU::getCbfAtDepth(tu, COMP_Y, currDepth))
1748
0
        {
1749
0
          singleCostTmp = MAX_DOUBLE;
1750
0
        }
1751
93.9k
        else
1752
93.9k
        {
1753
93.9k
          m_ispTestedModes[0].IspType      = TU_NO_ISP;
1754
93.9k
          m_ispTestedModes[0].subTuCounter = -1;
1755
93.9k
          singleTmpFracBits = xGetIntraFracBitsQT(cs, partitioner, true, &cuCtx);
1756
1757
93.9k
          if (tu.mtsIdx[COMP_Y] > MTS_SKIP)
1758
0
          {
1759
0
            if (!cuCtx.mtsLastScanPos)
1760
0
            {
1761
0
              singleCostTmp = MAX_DOUBLE;
1762
0
            }
1763
0
            else
1764
0
            {
1765
0
              singleCostTmp = m_pcRdCost->calcRdCost(singleTmpFracBits, singleDistTmpLuma);
1766
0
            }
1767
0
          }
1768
93.9k
          else
1769
93.9k
          {
1770
93.9k
            singleCostTmp = m_pcRdCost->calcRdCost(singleTmpFracBits, singleDistTmpLuma);
1771
93.9k
          }
1772
93.9k
        }
1773
1774
93.9k
          if (((EndMTS && (m_pcEncCfg->m_MTS == 2)) || rapidLFNST) && modeId == 0 && lfnstIdx == 0)
1775
0
          {
1776
0
            if (singleCostTmp > bestCostSoFar * thresholdDCT)
1777
0
            {
1778
0
              EndMTS = 0;
1779
1780
0
              if (rapidDCT)
1781
0
              {
1782
0
                endLfnstIdx = 0;   // break the loop but do not cpy best
1783
0
              }
1784
0
            }
1785
0
          }
1786
1787
93.9k
          if (lfnstIdx && !cuCtx.lfnstLastScanPos && !cu.ispMode)
1788
46.7k
          {
1789
46.7k
            bool rootCbfL = false;
1790
1791
187k
            for (uint32_t t = 0; t < getNumberValidTBlocks(*cu.cs->pcv); t++)
1792
140k
            {
1793
140k
              rootCbfL |= tu.cbf[t] != 0;
1794
140k
            }
1795
1796
46.7k
            if (rapidLFNST && !rootCbfL)
1797
0
            {
1798
0
              endLfnstIdx = lfnstIdx; // break the loop
1799
0
            }
1800
46.7k
            bool cbfAtZeroDepth = CU::isSepTree(cu)
1801
46.7k
              ? rootCbfL
1802
46.7k
              : (cs.area.chromaFormat != CHROMA_400 && std::min(cu.firstTU->blocks[1].width, cu.firstTU->blocks[1].height) < 4)
1803
3
                ? TU::getCbfAtDepth(tu, COMP_Y, currDepth)
1804
3
                : rootCbfL;
1805
1806
46.7k
            if (cbfAtZeroDepth)
1807
346
            {
1808
346
              singleCostTmp = MAX_DOUBLE;
1809
346
            }
1810
46.7k
          }
1811
93.9k
        }
1812
1813
107k
        if (singleCostTmp < dSingleCost)
1814
39.4k
        {
1815
39.4k
          trGrpBestCost  = singleCostTmp;
1816
39.4k
          dSingleCost    = singleCostTmp;
1817
39.4k
          singleDistLuma = singleDistTmpLuma;
1818
39.4k
          singleFracBits = singleTmpFracBits;
1819
39.4k
          bestLfnstIdx   = lfnstIdx;
1820
39.4k
          bestMTS        = modeId;
1821
1822
39.4k
          if (dSingleCost < bestCostForISP)
1823
24.5k
          {
1824
24.5k
            bestCostForISP = dSingleCost;
1825
24.5k
          }
1826
1827
39.4k
          splitCbfLumaSum = splitCbfLuma;
1828
1829
39.4k
          if (lfnstIdx == 0 && modeId == 0 && cu.ispMode == 0)
1830
38.0k
          {
1831
38.0k
            dct2Cost = singleCostTmp;
1832
1833
38.0k
            if (!TU::getCbfAtDepth(tu, COMP_Y, currDepth))
1834
32.2k
            {
1835
32.2k
              if (rapidLFNST)
1836
0
              {
1837
0
                 endLfnstIdx = 0;   // break the loop but do not cpy best
1838
0
              }
1839
1840
32.2k
              EndMTS = 0;
1841
32.2k
            }
1842
38.0k
          }
1843
1844
39.4k
          if (bestLfnstIdx != endLfnstIdx || bestMTS != EndMTS)
1845
29.1k
          {
1846
29.1k
            if (cu.ispMode)
1847
1.02k
            {
1848
1.02k
              saveCS.getRecoBuf(currArea.Y()).copyFrom(cs.getRecoBuf(currArea.Y()));
1849
1850
5.10k
              for (uint32_t j = 0; j < cs.tus.size(); j++)
1851
4.08k
              {
1852
4.08k
                saveCS.tus[j]->copyComponentFrom(*cs.tus[j], COMP_Y);
1853
4.08k
              }
1854
1.02k
            }
1855
28.1k
            else
1856
28.1k
            {
1857
28.1k
              saveCS.getPredBuf(tu.Y()).copyFrom(cs.getPredBuf(tu.Y()));
1858
28.1k
              saveCS.getRecoBuf(tu.Y()).copyFrom(cs.getRecoBuf(tu.Y()));
1859
1860
28.1k
              tmpTU->copyComponentFrom(tu, COMP_Y);
1861
28.1k
            }
1862
1863
29.1k
            ctxBest = m_CABACEstimator->getCtx();
1864
29.1k
          }
1865
      
1866
39.4k
        }
1867
68.2k
        else
1868
68.2k
        {
1869
68.2k
          if( rapidLFNST )
1870
0
          {
1871
0
            endLfnstIdx = lfnstIdx; // break the loop
1872
0
          }
1873
68.2k
        }
1874
107k
      }
1875
42.6k
      if (m_pcEncCfg->m_LFNST && m_pcEncCfg->m_MTS == 2 && modeId && modeId != EndMTS)
1876
0
      {
1877
0
        NStopMTS = false;
1878
1879
0
        if (bestMTS || bestLfnstIdx)
1880
0
        {
1881
0
          if ((modeId > 1 && bestMTS == modeId) || modeId == 1)
1882
0
          {
1883
0
            NStopMTS = (dct2Cost / trGrpBestCost) < trGrpStopThreshold;
1884
0
          }
1885
0
        }
1886
0
      }
1887
42.6k
    }
1888
1889
42.6k
    cu.lfnstIdx = bestLfnstIdx;
1890
42.6k
    if (dSingleCost != MAX_DOUBLE)
1891
38.9k
    {
1892
38.9k
      if (bestLfnstIdx != endLfnstIdx || bestMTS != EndMTS)
1893
28.6k
      {
1894
28.6k
        if (cu.ispMode)
1895
721
        {
1896
721
          const UnitArea& currArea = partitioner.currArea();
1897
721
          cs.getRecoBuf(currArea.Y()).copyFrom(saveCS.getRecoBuf(currArea.Y()));
1898
1899
721
          if (saveCS.tus.size() != cs.tus.size())
1900
0
          {
1901
0
            partitioner.splitCurrArea(ispType, cs);
1902
1903
0
            do
1904
0
            {
1905
0
              partitioner.nextPart(cs);
1906
0
              cs.addTU(CS::getArea(cs, partitioner.currArea(), partitioner.chType, partitioner.treeType),
1907
0
                partitioner.chType, cs.cus[0]);
1908
0
            } while (saveCS.tus.size() != cs.tus.size());
1909
1910
0
            partitioner.exitCurrSplit();
1911
0
          }
1912
1913
3.60k
          for (uint32_t j = 0; j < saveCS.tus.size(); j++)
1914
2.88k
          {
1915
2.88k
            cs.tus[j]->copyComponentFrom(*saveCS.tus[j], COMP_Y);
1916
2.88k
          }
1917
721
        }
1918
27.9k
        else
1919
27.9k
        {
1920
27.9k
          cs.getRecoBuf(tu.Y()).copyFrom(saveCS.getRecoBuf(tu.Y()));
1921
1922
27.9k
          tu.copyComponentFrom(*tmpTU, COMP_Y);
1923
27.9k
        }
1924
1925
28.6k
        m_CABACEstimator->getCtx() = ctxBest;
1926
28.6k
      }
1927
1928
      // otherwise this would've happened in useSubStructure
1929
38.9k
      cs.picture->getRecoBuf(currArea.Y()).copyFrom(cs.getRecoBuf(currArea.Y()));
1930
38.9k
    }
1931
42.6k
  }
1932
62.2k
  else
1933
62.2k
  {
1934
62.2k
    if (cu.ispMode)
1935
293
    {
1936
293
      const PartSplit ispType = CU::getISPType(cu, COMP_Y);
1937
293
      partitioner.splitCurrArea(ispType, cs);
1938
1939
293
      CUCtx      cuCtx;
1940
293
      dSingleCost = xTestISP(cs, partitioner, bestCostForISP, ispType, splitCbfLumaSum, singleFracBits, singleDistLuma, cuCtx);
1941
293
      partitioner.exitCurrSplit();
1942
293
      bool storeCost = (numMode == 1) ? true : false;
1943
293
      if ((m_pcEncCfg->m_ISP >= 2) && (numMode <= 1))
1944
293
      {
1945
293
        storeCost = true;
1946
293
      }
1947
293
      if (storeCost)
1948
293
      {
1949
293
        m_ispTestedModes[0].bestCost[cu.ispMode - 1] = dSingleCost;
1950
293
      }
1951
293
    }
1952
61.9k
    else
1953
61.9k
    {
1954
61.9k
      TransformUnit& tu =
1955
61.9k
        cs.addTU(CS::getArea(cs, currArea, partitioner.chType, partitioner.treeType), partitioner.chType, cs.cus[0]);
1956
61.9k
      tu.depth = currDepth;
1957
1958
61.9k
      CHECK(!tu.Y().valid(), "Invalid TU");
1959
61.9k
      xIntraCodingTUBlock(tu, COMP_Y, false, singleDistLuma, &numSig, predBuf);
1960
      //----- determine rate and r-d cost -----
1961
61.9k
      m_ispTestedModes[0].IspType = TU_NO_ISP;
1962
61.9k
      m_ispTestedModes[0].subTuCounter = -1;
1963
61.9k
      singleFracBits = xGetIntraFracBitsQT(cs, partitioner, true);
1964
61.9k
      dSingleCost = m_pcRdCost->calcRdCost(singleFracBits, singleDistLuma);
1965
61.9k
    }
1966
62.2k
  }
1967
1968
104k
  if (cu.ispMode)
1969
4.85k
  { 
1970
4.85k
    for (auto& ptu : cs.tus)
1971
8.05k
    {
1972
8.05k
      if (currArea.Y().contains(ptu->Y()))
1973
8.05k
      {
1974
8.05k
        TU::setCbfAtDepth(*ptu, COMP_Y, currDepth, splitCbfLumaSum ? 1 : 0);
1975
8.05k
      }
1976
8.05k
    }
1977
4.85k
  }
1978
104k
  cs.dist     += singleDistLuma;
1979
104k
  cs.fracBits += singleFracBits;
1980
104k
  cs.cost      = dSingleCost;
1981
1982
104k
  STAT_COUNT_CU_MODES( partitioner.chType == CH_L, g_cuCounters1D[CU_RD_TESTS][0][!cs.slice->isIntra() + cs.slice->depth] );
1983
104k
  STAT_COUNT_CU_MODES( partitioner.chType == CH_L && !cs.slice->isIntra(), g_cuCounters2D[CU_RD_TESTS][Log2( cs.area.lheight() )][Log2( cs.area.lwidth() )] );
1984
104k
}
1985
1986
ChromaCbfs IntraSearch::xIntraChromaCodingQT(CodingStructure& cs, Partitioner& partitioner)
1987
254k
{
1988
254k
  UnitArea    currArea      = partitioner.currArea();
1989
1990
254k
  if( !currArea.Cb().valid() ) 
1991
0
    return ChromaCbfs(false);
1992
1993
254k
  TransformUnit& currTU     = *cs.getTU( currArea.chromaPos(), CH_C );
1994
254k
  const CodingUnit& cu  = *cs.getCU( currArea.chromaPos(), CH_C, TREE_D );
1995
254k
  ChromaCbfs cbfs(false);
1996
254k
  uint32_t   currDepth = partitioner.currTrDepth;
1997
254k
  const bool useTS = cs.picture->useTS;
1998
254k
  if (currDepth == currTU.depth)
1999
254k
  {
2000
254k
    if (!currArea.Cb().valid() || !currArea.Cr().valid())
2001
0
    {
2002
0
      return cbfs;
2003
0
    }
2004
2005
254k
    CodingStructure& saveCS = *m_pSaveCS[1];
2006
254k
    saveCS.pcv = cs.pcv;
2007
254k
    saveCS.picture = cs.picture;
2008
254k
    saveCS.area.repositionTo(cs.area);
2009
2010
254k
    TransformUnit& tmpTU = saveCS.tus.empty() ? saveCS.addTU(currArea, partitioner.chType, nullptr) : *saveCS.tus.front();
2011
254k
    tmpTU.initData();
2012
254k
    tmpTU.UnitArea::operator=(currArea);
2013
254k
    const unsigned      numTBlocks = getNumberValidTBlocks(*cs.pcv);
2014
2015
254k
    CompArea& cbArea = currTU.blocks[COMP_Cb];
2016
254k
    CompArea& crArea = currTU.blocks[COMP_Cr];
2017
254k
    double     bestCostCb = MAX_DOUBLE;
2018
254k
    double     bestCostCr = MAX_DOUBLE;
2019
254k
    Distortion bestDistCb = 0;
2020
254k
    Distortion bestDistCr = 0;
2021
2022
254k
    TempCtx ctxStartTU(m_CtxCache);
2023
254k
    TempCtx ctxStart(m_CtxCache);
2024
254k
    TempCtx ctxBest(m_CtxCache);
2025
2026
254k
    ctxStartTU = m_CABACEstimator->getCtx();
2027
254k
    ctxStart = m_CABACEstimator->getCtx();
2028
254k
    currTU.jointCbCr = 0;
2029
2030
    // Do predictions here to avoid repeating the "default0Save1Load2" stuff
2031
254k
    int  predMode = cu.bdpcmM[CH_C] ? BDPCM_IDX : CU::getFinalIntraMode(cu, CH_C);
2032
2033
254k
    PelBuf piPredCb = cs.getPredBuf(COMP_Cb);
2034
254k
    PelBuf piPredCr = cs.getPredBuf(COMP_Cr);
2035
2036
254k
    initIntraPatternChType(*currTU.cu, cbArea);
2037
254k
    initIntraPatternChType(*currTU.cu, crArea);
2038
2039
254k
    if (CU::isLMCMode(predMode))
2040
18.6k
    {
2041
18.6k
      loadLMLumaRecPels(cu, cbArea);
2042
18.6k
      predIntraChromaLM(COMP_Cb, piPredCb, cu, cbArea, predMode);
2043
18.6k
      predIntraChromaLM(COMP_Cr, piPredCr, cu, crArea, predMode);
2044
18.6k
    }
2045
236k
    else
2046
236k
    {
2047
236k
      predIntraAng(COMP_Cb, piPredCb, cu);
2048
236k
      predIntraAng(COMP_Cr, piPredCr, cu);
2049
236k
    }
2050
2051
    // determination of chroma residuals including reshaping and cross-component prediction
2052
    //----- get chroma residuals -----
2053
254k
    PelBuf resiCb = cs.getResiBuf(COMP_Cb);
2054
254k
    PelBuf resiCr = cs.getResiBuf(COMP_Cr);
2055
254k
    resiCb.subtract(cs.getOrgBuf(COMP_Cb), piPredCb);
2056
254k
    resiCr.subtract(cs.getOrgBuf(COMP_Cr), piPredCr);
2057
2058
    //----- get reshape parameter ----
2059
254k
    ReshapeData& reshapeData = cs.picture->reshapeData;
2060
254k
    bool doReshaping = (cs.picHeader->lmcsEnabled && cs.picHeader->lmcsChromaResidualScale && (cs.slice->isIntra() || reshapeData.getCTUFlag()) && (cbArea.width * cbArea.height > 4));
2061
254k
    if (doReshaping)
2062
0
    {
2063
0
      const Area area = currTU.Y().valid() ? currTU.Y() : Area(recalcPosition(currTU.chromaFormat, currTU.chType, CH_L, currTU.blocks[currTU.chType].pos()), recalcSize(currTU.chromaFormat, currTU.chType, CH_L, currTU.blocks[currTU.chType].size()));
2064
0
      const CompArea& areaY = CompArea(COMP_Y, currTU.chromaFormat, area);
2065
0
      currTU.chromaAdj = reshapeData.calculateChromaAdjVpduNei(currTU, areaY, currTU.cu->treeType);
2066
0
    }
2067
2068
    //===== store original residual signals (std and crossCompPred) =====
2069
1.52M
    for( int k = 0; k < 5; k++ )
2070
1.27M
    {
2071
1.27M
      m_orgResiCb[k].compactResize( cbArea );
2072
1.27M
      m_orgResiCr[k].compactResize( crArea );
2073
1.27M
    }
2074
509k
    for (int k = 0; k < 1; k += 4)
2075
254k
    {
2076
254k
      m_orgResiCb[k].copyFrom(resiCb);
2077
254k
      m_orgResiCr[k].copyFrom(resiCr);
2078
2079
254k
      if (doReshaping)
2080
0
      {
2081
0
        int cResScaleInv = currTU.chromaAdj;
2082
0
        m_orgResiCb[k].scaleSignal(cResScaleInv, 1, cs.slice->clpRngs[COMP_Cb]);
2083
0
        m_orgResiCr[k].scaleSignal(cResScaleInv, 1, cs.slice->clpRngs[COMP_Cr]);
2084
0
      }
2085
254k
    }
2086
2087
254k
    CUCtx cuCtx;
2088
254k
    cuCtx.isDQPCoded = true;
2089
254k
    cuCtx.isChromaQpAdjCoded = true;
2090
254k
    cuCtx.lfnstLastScanPos = false;
2091
2092
254k
    CodingStructure& saveCScur = *m_pSaveCS[2];
2093
2094
254k
    saveCScur.pcv = cs.pcv;
2095
254k
    saveCScur.picture = cs.picture;
2096
254k
    saveCScur.area.repositionTo(cs.area);
2097
2098
254k
    TransformUnit& tmpTUcur = saveCScur.tus.empty() ? saveCScur.addTU(currArea, partitioner.chType, nullptr) : *saveCScur.tus.front();
2099
254k
    tmpTUcur.initData();
2100
254k
    tmpTUcur.UnitArea::operator=(currArea);
2101
2102
254k
    TempCtx ctxBestTUL(m_CtxCache);
2103
2104
254k
    const SPS& sps = *cs.sps;
2105
254k
    double     bestCostCbcur = MAX_DOUBLE;
2106
254k
    double     bestCostCrcur = MAX_DOUBLE;
2107
254k
    Distortion bestDistCbcur = 0;
2108
254k
    Distortion bestDistCrcur = 0;
2109
2110
254k
    int  endLfnstIdx = (partitioner.isSepTree(cs) && partitioner.chType == CH_C && (partitioner.currArea().lwidth() < 8 || partitioner.currArea().lheight() < 8))
2111
243k
      || (partitioner.currArea().lwidth() > sps.getMaxTbSize() || partitioner.currArea().lheight() > sps.getMaxTbSize()) || !sps.LFNST ? 0 : 2;
2112
254k
    int  startLfnstIdx = 0;
2113
254k
    int  bestLfnstIdx = 0;
2114
254k
    bool testLFNST = sps.LFNST;
2115
2116
    // speedUps LFNST
2117
254k
    bool rapidLFNST = false;
2118
254k
    if (m_pcEncCfg->m_LFNST > 1)
2119
0
    {
2120
0
      rapidLFNST = true;
2121
0
      if (m_pcEncCfg->m_LFNST > 2)
2122
0
      {
2123
0
        endLfnstIdx = endLfnstIdx ? 1 : 0;
2124
0
      }
2125
0
    }
2126
254k
    int ts_used = 0;
2127
254k
    bool testTS = false;
2128
254k
    if (partitioner.chType != CH_C)
2129
0
    {
2130
0
      startLfnstIdx = currTU.cu->lfnstIdx;
2131
0
      endLfnstIdx = currTU.cu->lfnstIdx;
2132
0
      bestLfnstIdx = currTU.cu->lfnstIdx;
2133
0
      testLFNST  = false;
2134
0
      rapidLFNST = false;
2135
0
      ts_used = currTU.mtsIdx[COMP_Y];
2136
0
    }
2137
254k
    if (cu.bdpcmM[CH_C])
2138
34.6k
    {
2139
34.6k
      endLfnstIdx = 0;
2140
34.6k
      testLFNST = false;
2141
34.6k
    }
2142
2143
254k
    double dSingleCostAll = MAX_DOUBLE;
2144
254k
    double singleCostTmpAll = 0;
2145
2146
930k
    for (int lfnstIdx = startLfnstIdx; lfnstIdx <= endLfnstIdx; lfnstIdx++)
2147
675k
    {
2148
675k
      if (rapidLFNST && lfnstIdx)
2149
0
      {
2150
0
        if ((lfnstIdx == 2) && (bestLfnstIdx == 0))
2151
0
        {
2152
0
          continue;
2153
0
        }
2154
0
      }
2155
2156
675k
      currTU.cu->lfnstIdx = lfnstIdx;
2157
675k
      if (lfnstIdx)
2158
421k
      {
2159
421k
        m_CABACEstimator->getCtx() = ctxStartTU;
2160
421k
      }
2161
2162
675k
      cuCtx.lfnstLastScanPos = false;
2163
675k
      cuCtx.violatesLfnstConstrained[CH_L] = false;
2164
675k
      cuCtx.violatesLfnstConstrained[CH_C] = false;
2165
2166
2.02M
      for (uint32_t c = COMP_Cb; c < numTBlocks; c++)
2167
1.35M
      {
2168
1.35M
        const ComponentID compID = ComponentID(c);
2169
1.35M
        const CompArea& area = currTU.blocks[compID];
2170
1.35M
        double     dSingleCost = MAX_DOUBLE;
2171
1.35M
        Distortion singleDistCTmp = 0;
2172
1.35M
        double     singleCostTmp = 0;
2173
1.35M
        bool tsAllowed = useTS && TU::isTSAllowed(currTU, compID) && m_pcEncCfg->m_useChromaTS && !currTU.cu->lfnstIdx && !cu.bdpcmM[CH_C];
2174
1.35M
        if ((partitioner.chType == CH_L) && (!ts_used))
2175
0
        {
2176
0
          tsAllowed = false;
2177
0
        }
2178
1.35M
        uint8_t nNumTransformCands = 1 + (tsAllowed ? 1 : 0); // DCT + TS = 2 tests       
2179
1.35M
        std::vector<TrMode> trModes;
2180
1.35M
        if (nNumTransformCands > 1)
2181
0
        {
2182
0
          trModes.push_back(TrMode(0, true));   // DCT2
2183
0
          trModes.push_back(TrMode(1, true));   // TS
2184
0
          testTS = true;
2185
0
        }
2186
1.35M
        bool cbfDCT2 = true;
2187
1.35M
        const bool isLastMode = testLFNST || cs.sps->jointCbCr ||  tsAllowed ? false : true;
2188
1.35M
        int bestModeId = 0;
2189
1.35M
        ctxStart = m_CABACEstimator->getCtx();
2190
2.70M
        for (int modeId = 0; modeId < nNumTransformCands; modeId++)
2191
1.35M
        {
2192
1.35M
          if (doReshaping || lfnstIdx || modeId)
2193
842k
          {
2194
842k
            resiCb.copyFrom(m_orgResiCb[0]);
2195
842k
            resiCr.copyFrom(m_orgResiCr[0]);
2196
842k
          }
2197
1.35M
          if (modeId == 0)
2198
1.35M
          {
2199
1.35M
            if ( tsAllowed)
2200
0
            {
2201
0
              xPreCheckMTS(currTU, &trModes, m_pcEncCfg->m_MTSIntraMaxCand, 0, compID);
2202
0
            }
2203
1.35M
          }
2204
2205
1.35M
          currTU.mtsIdx[compID] = currTU.cu->bdpcmM[CH_C] ? MTS_SKIP : modeId;
2206
2207
1.35M
          if (modeId)
2208
0
          {
2209
0
            if (!cbfDCT2 && trModes[modeId].first == MTS_SKIP)
2210
0
            {
2211
0
              break;
2212
0
            }
2213
0
            m_CABACEstimator->getCtx() = ctxStart;
2214
0
          }
2215
1.35M
          singleDistCTmp = 0;
2216
1.35M
          if (tsAllowed)
2217
0
          {
2218
0
            xIntraCodingTUBlock(currTU, compID, false, singleDistCTmp, 0, 0, true);
2219
0
            if ((modeId == 0) && (!trModes[modeId + 1].second))
2220
0
            {
2221
0
              nNumTransformCands = 1;
2222
0
            }
2223
0
          }
2224
1.35M
          else
2225
1.35M
        {
2226
1.35M
          xIntraCodingTUBlock(currTU, compID, false, singleDistCTmp);
2227
1.35M
        }
2228
1.35M
        if (((currTU.mtsIdx[compID] == MTS_SKIP && !currTU.cu->bdpcmM[CH_C])
2229
0
          && !TU::getCbf(currTU, compID)))   // In order not to code TS flag when cbf is zero, the case for TS with
2230
                                             // cbf being zero is forbidden.
2231
0
        {
2232
0
          singleCostTmp = MAX_DOUBLE;
2233
0
        }
2234
1.35M
        else
2235
1.35M
        {
2236
1.35M
          uint64_t fracBitsTmp = xGetIntraFracBitsQTChroma(currTU, compID, &cuCtx);
2237
1.35M
          singleCostTmp = m_pcRdCost->calcRdCost(fracBitsTmp, singleDistCTmp);
2238
1.35M
        }
2239
2240
1.35M
        if (singleCostTmp < dSingleCost)
2241
1.35M
        {
2242
1.35M
          dSingleCost = singleCostTmp;
2243
2244
1.35M
          if (compID == COMP_Cb)
2245
675k
          {
2246
675k
            bestCostCb = singleCostTmp;
2247
675k
            bestDistCb = singleDistCTmp;
2248
675k
          }
2249
675k
          else
2250
675k
          {
2251
675k
            bestCostCr = singleCostTmp;
2252
675k
            bestDistCr = singleDistCTmp;
2253
675k
          }
2254
1.35M
          bestModeId = modeId;
2255
1.35M
          if (currTU.mtsIdx[compID] == MTS_DCT2_DCT2)
2256
1.28M
          {
2257
1.28M
            cbfDCT2 = TU::getCbfAtDepth(currTU, compID, currDepth);
2258
1.28M
          }
2259
1.35M
          if (!isLastMode)
2260
1.35M
          {
2261
1.35M
            saveCS.getRecoBuf(area).copyFrom(cs.getRecoBuf(area));
2262
1.35M
            tmpTU.copyComponentFrom(currTU, compID);
2263
1.35M
            ctxBest = m_CABACEstimator->getCtx();
2264
1.35M
          }
2265
1.35M
        }
2266
1.35M
        }
2267
1.35M
        if (testTS && ((c == COMP_Cb && bestModeId < (nNumTransformCands - 1)) ))
2268
0
        {
2269
0
          m_CABACEstimator->getCtx() = ctxBest;
2270
2271
0
          currTU.copyComponentFrom(tmpTU, COMP_Cb); // Cbf of Cb is needed to estimate cost for Cr Cbf
2272
0
        }
2273
1.35M
      }
2274
2275
675k
      singleCostTmpAll = bestCostCb + bestCostCr;
2276
2277
675k
      bool rootCbfL = false;
2278
675k
      if (testLFNST)
2279
641k
      {
2280
2.56M
        for (uint32_t t = 0; t < getNumberValidTBlocks(*cs.pcv); t++)
2281
1.92M
        {
2282
1.92M
          rootCbfL |= bool(tmpTU.cbf[t]);
2283
1.92M
        }
2284
641k
        if (rapidLFNST && !rootCbfL)
2285
0
        {
2286
0
          endLfnstIdx = lfnstIdx; // end this
2287
0
        }
2288
641k
      }
2289
2290
675k
      if (testLFNST && lfnstIdx && !cuCtx.lfnstLastScanPos)
2291
277k
      {
2292
277k
        bool cbfAtZeroDepth = CU::isSepTree(*currTU.cu)
2293
277k
          ? rootCbfL : (cs.area.chromaFormat != CHROMA_400
2294
0
            && std::min(tmpTU.blocks[1].width, tmpTU.blocks[1].height) < 4)
2295
0
          ? TU::getCbfAtDepth(currTU, COMP_Y, currTU.depth) : rootCbfL;
2296
277k
        if (cbfAtZeroDepth)
2297
1.32k
        {
2298
1.32k
          singleCostTmpAll = MAX_DOUBLE;
2299
1.32k
        }
2300
277k
      }
2301
675k
      if ((testLFNST || testTS) && (singleCostTmpAll < dSingleCostAll))
2302
220k
      {
2303
220k
        bestLfnstIdx = lfnstIdx;
2304
220k
        if ((lfnstIdx != endLfnstIdx) || testTS)
2305
210k
        {
2306
210k
          dSingleCostAll = singleCostTmpAll;
2307
2308
210k
          bestCostCbcur = bestCostCb;
2309
210k
          bestCostCrcur = bestCostCr;
2310
210k
          bestDistCbcur = bestDistCb;
2311
210k
          bestDistCrcur = bestDistCr;
2312
2313
210k
          saveCScur.getRecoBuf(cbArea).copyFrom(saveCS.getRecoBuf(cbArea));
2314
210k
          saveCScur.getRecoBuf(crArea).copyFrom(saveCS.getRecoBuf(crArea));
2315
2316
210k
          tmpTUcur.copyComponentFrom(tmpTU, COMP_Cb);
2317
210k
          tmpTUcur.copyComponentFrom(tmpTU, COMP_Cr);
2318
210k
        }
2319
220k
        ctxBestTUL = m_CABACEstimator->getCtx();
2320
220k
      }
2321
675k
    }
2322
254k
    if ((testLFNST && (bestLfnstIdx != endLfnstIdx)) || testTS)
2323
210k
    {
2324
210k
      bestCostCb = bestCostCbcur;
2325
210k
      bestCostCr = bestCostCrcur;
2326
210k
      bestDistCb = bestDistCbcur;
2327
210k
      bestDistCr = bestDistCrcur;
2328
210k
      currTU.cu->lfnstIdx = bestLfnstIdx;
2329
210k
      if (!cs.sps->jointCbCr)
2330
0
      {
2331
0
        cs.getRecoBuf(cbArea).copyFrom(saveCScur.getRecoBuf(cbArea));
2332
0
        cs.getRecoBuf(crArea).copyFrom(saveCScur.getRecoBuf(crArea));
2333
2334
0
        currTU.copyComponentFrom(tmpTUcur, COMP_Cb);
2335
0
        currTU.copyComponentFrom(tmpTUcur, COMP_Cr);
2336
2337
0
        m_CABACEstimator->getCtx() = ctxBestTUL;
2338
0
      }
2339
210k
    }
2340
2341
254k
    Distortion bestDistCbCr = bestDistCb + bestDistCr;
2342
2343
254k
    if (cs.sps->jointCbCr)
2344
254k
    {
2345
254k
      if ((testLFNST && (bestLfnstIdx != endLfnstIdx)) || testTS)
2346
210k
      {
2347
210k
        saveCS.getRecoBuf(cbArea).copyFrom(saveCScur.getRecoBuf(cbArea));
2348
210k
        saveCS.getRecoBuf(crArea).copyFrom(saveCScur.getRecoBuf(crArea));
2349
2350
210k
        tmpTU.copyComponentFrom(tmpTUcur, COMP_Cb);
2351
210k
        tmpTU.copyComponentFrom(tmpTUcur, COMP_Cr);
2352
210k
        m_CABACEstimator->getCtx() = ctxBestTUL;
2353
210k
        ctxBest = m_CABACEstimator->getCtx();
2354
210k
      }
2355
      // Test using joint chroma residual coding
2356
254k
      double     bestCostCbCr = bestCostCb + bestCostCr;
2357
254k
      int        bestJointCbCr = 0;
2358
254k
      bool checkDCTOnly = m_pcEncCfg->m_useChromaTS && ((TU::getCbf(tmpTU, COMP_Cb) && tmpTU.mtsIdx[COMP_Cb] == MTS_DCT2_DCT2 && !TU::getCbf(tmpTU, COMP_Cr)) ||
2359
0
        (TU::getCbf(tmpTU, COMP_Cr) && tmpTU.mtsIdx[COMP_Cr] == MTS_DCT2_DCT2 && !TU::getCbf(tmpTU, COMP_Cb)) ||
2360
0
        (TU::getCbf(tmpTU, COMP_Cb) && tmpTU.mtsIdx[COMP_Cb] == MTS_DCT2_DCT2 && TU::getCbf(tmpTU, COMP_Cr) && tmpTU.mtsIdx[COMP_Cr] == MTS_DCT2_DCT2));
2361
254k
      bool checkTSOnly = m_pcEncCfg->m_useChromaTS && ((TU::getCbf(tmpTU, COMP_Cb) && tmpTU.mtsIdx[COMP_Cb] == MTS_SKIP && !TU::getCbf(tmpTU, COMP_Cr)) ||
2362
0
        (TU::getCbf(tmpTU, COMP_Cr) && tmpTU.mtsIdx[COMP_Cr] == MTS_SKIP && !TU::getCbf(tmpTU, COMP_Cb)) ||
2363
0
        (TU::getCbf(tmpTU, COMP_Cb) && tmpTU.mtsIdx[COMP_Cb] == MTS_SKIP && TU::getCbf(tmpTU, COMP_Cr) && tmpTU.mtsIdx[COMP_Cr] == MTS_SKIP));
2364
254k
      bool       lastIsBest = false;
2365
254k
      bool noLFNST1 = false;
2366
254k
      if (rapidLFNST && (startLfnstIdx != endLfnstIdx))
2367
0
      {
2368
0
        if (bestLfnstIdx == 2)
2369
0
        {
2370
0
          noLFNST1 = true;
2371
0
        }
2372
0
        else
2373
0
        {
2374
0
          endLfnstIdx = 1;
2375
0
        }
2376
0
      }
2377
2378
930k
      for (int lfnstIdxj = startLfnstIdx; lfnstIdxj <= endLfnstIdx; lfnstIdxj++)
2379
675k
      {
2380
675k
        if (rapidLFNST && noLFNST1 && (lfnstIdxj == 1))
2381
0
        {
2382
0
          continue;
2383
0
        }
2384
675k
        currTU.cu->lfnstIdx = lfnstIdxj;
2385
675k
        std::vector<int> jointCbfMasksToTest;
2386
675k
        if (TU::getCbf(tmpTU, COMP_Cb) || TU::getCbf(tmpTU, COMP_Cr))
2387
239k
        {
2388
239k
          jointCbfMasksToTest = m_pcTrQuant->selectICTCandidates(currTU, m_orgResiCb, m_orgResiCr);
2389
239k
        }
2390
675k
        for (int cbfMask : jointCbfMasksToTest)
2391
239k
        {
2392
239k
          currTU.jointCbCr = (uint8_t)cbfMask;
2393
239k
          ComponentID codeCompId = ((currTU.jointCbCr >> 1) ? COMP_Cb : COMP_Cr);
2394
239k
          ComponentID otherCompId = ((codeCompId == COMP_Cb) ? COMP_Cr : COMP_Cb);
2395
239k
          bool tsAllowed = useTS && TU::isTSAllowed(currTU, codeCompId) && (m_pcEncCfg->m_useChromaTS) && !currTU.cu->lfnstIdx && !cu.bdpcmM[CH_C];
2396
239k
          if ((partitioner.chType == CH_L)&& tsAllowed && (currTU.mtsIdx[COMP_Y] != MTS_SKIP))
2397
0
          {
2398
0
            tsAllowed = false;
2399
0
          }
2400
239k
          if (!tsAllowed)
2401
239k
          {
2402
239k
            checkTSOnly = false;
2403
239k
          }
2404
239k
          uint8_t     numTransformCands = 1 + (tsAllowed && !(checkDCTOnly || checkTSOnly)? 1 : 0); // DCT + TS = 2 tests
2405
239k
          std::vector<TrMode> trModes;
2406
239k
          if (numTransformCands > 1)
2407
0
          {
2408
0
            trModes.push_back(TrMode(0, true)); // DCT2
2409
0
            trModes.push_back(TrMode(1, true));//TS
2410
0
          }
2411
239k
          else
2412
239k
          {
2413
239k
            currTU.mtsIdx[codeCompId] = checkTSOnly || currTU.cu->bdpcmM[CH_C] ? 1 : 0;
2414
239k
          }
2415
2416
478k
          for (int modeId = 0; modeId < numTransformCands; modeId++)
2417
239k
          {
2418
239k
            Distortion distTmp = 0;
2419
239k
            currTU.mtsIdx[codeCompId] = currTU.cu->bdpcmM[CH_C] ? MTS_SKIP : MTS_DCT2_DCT2;
2420
239k
            if (numTransformCands > 1)
2421
0
            {
2422
0
              currTU.mtsIdx[codeCompId] = currTU.cu->bdpcmM[CH_C] ? MTS_SKIP : trModes[modeId].first;
2423
0
            }
2424
239k
            currTU.mtsIdx[otherCompId] = MTS_DCT2_DCT2;
2425
2426
239k
            m_CABACEstimator->getCtx() = ctxStartTU;
2427
2428
239k
            resiCb.copyFrom(m_orgResiCb[cbfMask]);
2429
239k
            resiCr.copyFrom(m_orgResiCr[cbfMask]);
2430
239k
            if ((modeId == 0) && (numTransformCands > 1))
2431
0
            {
2432
0
              xPreCheckMTS(currTU, &trModes, m_pcEncCfg->m_MTSIntraMaxCand, 0, COMP_Cb);
2433
0
              currTU.mtsIdx[codeCompId] = trModes[modeId].first;
2434
0
              currTU.mtsIdx[(codeCompId == COMP_Cr) ? COMP_Cb : COMP_Cr] = MTS_DCT2_DCT2;
2435
0
            }
2436
239k
            cuCtx.lfnstLastScanPos = false;
2437
239k
            cuCtx.violatesLfnstConstrained[CH_L] = false;
2438
239k
            cuCtx.violatesLfnstConstrained[CH_C] = false;
2439
239k
            if (numTransformCands > 1)
2440
0
            {
2441
0
              xIntraCodingTUBlock(currTU, COMP_Cb, false, distTmp, 0, 0, true);
2442
0
              if ((modeId == 0) && !trModes[modeId + 1].second)
2443
0
              {
2444
0
                numTransformCands = 1;
2445
0
              }
2446
0
            }
2447
239k
            else
2448
239k
            {
2449
239k
              xIntraCodingTUBlock(currTU, COMP_Cb, false, distTmp, 0);
2450
239k
            }
2451
2452
239k
            double costTmp = std::numeric_limits<double>::max();
2453
239k
            if (distTmp < MAX_DISTORTION)
2454
236k
            {
2455
236k
              uint64_t bits = xGetIntraFracBitsQTChroma(currTU, COMP_Cb, &cuCtx);
2456
236k
              costTmp = m_pcRdCost->calcRdCost(bits, distTmp);
2457
236k
            }
2458
2.87k
            else if (!currTU.mtsIdx[codeCompId])
2459
2.87k
            {
2460
2.87k
              numTransformCands = 1;
2461
2.87k
            }
2462
239k
            bool rootCbfL = false;
2463
957k
            for (uint32_t t = 0; t < getNumberValidTBlocks(*cs.pcv); t++)
2464
718k
            {
2465
718k
              rootCbfL |= bool(tmpTU.cbf[t]);
2466
718k
            }
2467
239k
            if (rapidLFNST && !rootCbfL)
2468
0
            {
2469
0
              endLfnstIdx = lfnstIdxj;
2470
0
            }
2471
239k
            if (testLFNST && currTU.cu->lfnstIdx && !cuCtx.lfnstLastScanPos)
2472
2.80k
            {
2473
2.80k
              bool cbfAtZeroDepth = CU::isSepTree(*currTU.cu) ? rootCbfL
2474
2.80k
                : (cs.area.chromaFormat != CHROMA_400 && std::min(tmpTU.blocks[1].width, tmpTU.blocks[1].height) < 4)
2475
0
                ? TU::getCbfAtDepth(currTU, COMP_Y, currTU.depth) : rootCbfL;
2476
2.80k
              if (cbfAtZeroDepth)
2477
2.80k
              {
2478
2.80k
                costTmp = MAX_DOUBLE;
2479
2.80k
              }
2480
2.80k
            }
2481
239k
            if (costTmp < bestCostCbCr)
2482
90.5k
            {
2483
90.5k
              bestCostCbCr = costTmp;
2484
90.5k
              bestDistCbCr = distTmp;
2485
90.5k
              bestJointCbCr = currTU.jointCbCr;
2486
2487
              // store data
2488
90.5k
              bestLfnstIdx = lfnstIdxj;
2489
90.5k
              if ((cbfMask != jointCbfMasksToTest.back() || (lfnstIdxj != endLfnstIdx)) || (modeId != (numTransformCands - 1)))
2490
73.3k
              {
2491
73.3k
                saveCS.getRecoBuf(cbArea).copyFrom(cs.getRecoBuf(cbArea));
2492
73.3k
                saveCS.getRecoBuf(crArea).copyFrom(cs.getRecoBuf(crArea));
2493
2494
73.3k
                tmpTU.copyComponentFrom(currTU, COMP_Cb);
2495
73.3k
                tmpTU.copyComponentFrom(currTU, COMP_Cr);
2496
2497
73.3k
                ctxBest = m_CABACEstimator->getCtx();
2498
73.3k
              }
2499
17.1k
              else
2500
17.1k
              {
2501
17.1k
                lastIsBest = true;
2502
17.1k
                cs.cus[0]->lfnstIdx = bestLfnstIdx;
2503
17.1k
              }
2504
90.5k
            }
2505
239k
          }
2506
239k
        }
2507
2508
        // Retrieve the best CU data (unless it was the very last one tested)
2509
675k
      }
2510
254k
      if (!lastIsBest)
2511
237k
      {
2512
237k
        cs.getRecoBuf(cbArea).copyFrom(saveCS.getRecoBuf(cbArea));
2513
237k
        cs.getRecoBuf(crArea).copyFrom(saveCS.getRecoBuf(crArea));
2514
2515
237k
        cs.cus[0]->lfnstIdx = bestLfnstIdx;
2516
237k
        currTU.copyComponentFrom(tmpTU, COMP_Cb);
2517
237k
        currTU.copyComponentFrom(tmpTU, COMP_Cr);
2518
237k
        m_CABACEstimator->getCtx() = ctxBest;
2519
237k
      }
2520
254k
      currTU.jointCbCr = (TU::getCbf(currTU, COMP_Cb) || TU::getCbf(currTU, COMP_Cr)) ? bestJointCbCr : 0;
2521
254k
    } // jointCbCr
2522
2523
254k
    cs.dist += bestDistCbCr;
2524
254k
    cuCtx.violatesLfnstConstrained[CH_L] = false;
2525
254k
    cuCtx.violatesLfnstConstrained[CH_C] = false;
2526
254k
    cuCtx.lfnstLastScanPos = false;
2527
254k
    cuCtx.violatesMtsCoeffConstraint = false;
2528
254k
    cuCtx.mtsLastScanPos = false;
2529
254k
    cbfs.cbf(COMP_Cb) = TU::getCbf(currTU, COMP_Cb);
2530
254k
    cbfs.cbf(COMP_Cr) = TU::getCbf(currTU, COMP_Cr);
2531
254k
  }
2532
0
  else
2533
0
  {
2534
0
    unsigned   numValidTBlocks = getNumberValidTBlocks(*cs.pcv);
2535
0
    ChromaCbfs SplitCbfs(false);
2536
2537
0
    if (partitioner.canSplit(TU_MAX_TR_SPLIT, cs))
2538
0
    {
2539
0
      partitioner.splitCurrArea(TU_MAX_TR_SPLIT, cs);
2540
0
    }
2541
0
    else if (currTU.cu->ispMode)
2542
0
    {
2543
0
      partitioner.splitCurrArea(m_ispTestedModes[0].IspType, cs);
2544
0
    }
2545
0
    else
2546
0
      THROW("Implicit TU split not available");
2547
2548
0
    do
2549
0
    {
2550
0
      ChromaCbfs subCbfs = xIntraChromaCodingQT(cs, partitioner);
2551
2552
0
      for (uint32_t ch = COMP_Cb; ch < numValidTBlocks; ch++)
2553
0
      {
2554
0
        const ComponentID compID = ComponentID(ch);
2555
0
        SplitCbfs.cbf(compID) |= subCbfs.cbf(compID);
2556
0
      }
2557
0
    } while (partitioner.nextPart(cs));
2558
2559
0
    partitioner.exitCurrSplit();
2560
2561
    /*if (lumaUsesISP && cs.dist == MAX_UINT) //ahenkel
2562
    {
2563
      return cbfs;
2564
    }*/
2565
0
    {
2566
0
      cbfs.Cb |= SplitCbfs.Cb;
2567
0
      cbfs.Cr |= SplitCbfs.Cr;
2568
2569
0
      if (1)   //(!lumaUsesISP)
2570
0
      {
2571
0
        for (auto& ptu : cs.tus)
2572
0
        {
2573
0
          if (currArea.Cb().contains(ptu->Cb()) || (!ptu->Cb().valid() && currArea.Y().contains(ptu->Y())))
2574
0
          {
2575
0
            TU::setCbfAtDepth(*ptu, COMP_Cb, currDepth, SplitCbfs.Cb);
2576
0
            TU::setCbfAtDepth(*ptu, COMP_Cr, currDepth, SplitCbfs.Cr);
2577
0
          }
2578
0
        }
2579
0
      }
2580
0
    }
2581
0
  }
2582
254k
  return cbfs;
2583
254k
}
2584
2585
uint64_t IntraSearch::xFracModeBitsIntraLuma(const CodingUnit& cu, const unsigned* mpmLst)
2586
849k
{
2587
849k
  m_CABACEstimator->resetBits();
2588
2589
849k
  if (!cu.ciip)
2590
849k
  {
2591
849k
    m_CABACEstimator->intra_luma_pred_mode(cu, mpmLst);
2592
849k
  }
2593
2594
849k
  return m_CABACEstimator->getEstFracBits();
2595
849k
}
2596
2597
template<typename T, size_t N, int M>
2598
void IntraSearch::xReduceHadCandList(static_vector<T, N>& candModeList, static_vector<double, N>& candCostList, SortedPelUnitBufs<M>& sortedPelBuffer, int& numModesForFullRD, const double thresholdHadCost, const double* mipHadCost, const CodingUnit& cu, const bool fastMip)
2599
17.4k
{
2600
17.4k
  const int maxCandPerType = numModesForFullRD >> 1;
2601
17.4k
  static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM> tempRdModeList;
2602
17.4k
  static_vector<double, FAST_UDI_MAX_RDMODE_NUM> tempCandCostList;
2603
17.4k
  const double minCost = candCostList[0];
2604
17.4k
  bool keepOneMip = candModeList.size() > numModesForFullRD;
2605
17.4k
  const int maxNumConv = 3; 
2606
2607
17.4k
  int numConv = 0;
2608
17.4k
  int numMip = 0;
2609
78.8k
  for (int idx = 0; idx < candModeList.size() - (keepOneMip?0:1); idx++)
2610
61.4k
  {
2611
61.4k
    bool addMode = false;
2612
61.4k
    const ModeInfo& orgMode = candModeList[idx];
2613
2614
61.4k
    if (!orgMode.mipFlg)
2615
44.0k
    {
2616
44.0k
      addMode = (numConv < maxNumConv);
2617
44.0k
      numConv += addMode ? 1:0;
2618
44.0k
    }
2619
17.4k
    else
2620
17.4k
    {
2621
17.4k
      addMode = ( numMip < maxCandPerType || (candCostList[idx] < thresholdHadCost * minCost) || keepOneMip );
2622
17.4k
      keepOneMip = false;
2623
17.4k
      numMip += addMode ? 1:0;
2624
17.4k
    }
2625
61.4k
    if( addMode )
2626
61.4k
    {
2627
61.4k
      tempRdModeList.push_back(orgMode);
2628
61.4k
      tempCandCostList.push_back(candCostList[idx]);
2629
61.4k
    }
2630
61.4k
  }
2631
2632
  // sort Pel Buffer
2633
17.4k
  int i = -1;
2634
17.4k
  for( auto &m: tempRdModeList)
2635
61.4k
  {
2636
61.4k
    if( ! (m == candModeList.at( ++i )) )
2637
0
    {
2638
0
      for( int j = i; j < (int)candModeList.size()-1; )
2639
0
      {
2640
0
        if( m == candModeList.at( ++j ) )
2641
0
        {
2642
0
          sortedPelBuffer.swap( i, j);
2643
0
          break;
2644
0
        }
2645
0
      }
2646
0
    }
2647
61.4k
  }
2648
17.4k
  sortedPelBuffer.reduceTo( (int)tempRdModeList.size() );
2649
2650
17.4k
  if ((cu.lwidth() > 8 && cu.lheight() > 8))
2651
15.5k
  {
2652
    // Sort MIP candidates by Hadamard cost
2653
15.5k
    const int transpOff = getNumModesMip(cu.Y());
2654
15.5k
    static_vector<uint8_t, FAST_UDI_MAX_RDMODE_NUM> sortedMipModes(0);
2655
15.5k
    static_vector<double, FAST_UDI_MAX_RDMODE_NUM> sortedMipCost(0);
2656
15.5k
    for (uint8_t mode : { 0, 1, 2 })
2657
46.5k
    {
2658
46.5k
      uint8_t candMode = mode + uint8_t((mipHadCost[mode + transpOff] < mipHadCost[mode]) ? transpOff : 0);
2659
46.5k
      updateCandList(candMode, mipHadCost[candMode], sortedMipModes, sortedMipCost, 3);
2660
46.5k
    }
2661
2662
    // Append MIP mode to RD mode list
2663
15.5k
    const int modeListSize = int(tempRdModeList.size());
2664
31.0k
    for (int idx = 0; idx < 3; idx++)
2665
31.0k
    {
2666
31.0k
      const bool     isTransposed = (sortedMipModes[idx] >= transpOff ? true : false);
2667
31.0k
      const uint32_t mipIdx       = (isTransposed ? sortedMipModes[idx] - transpOff : sortedMipModes[idx]);
2668
31.0k
      const ModeInfo mipMode( true, isTransposed, 0, NOT_INTRA_SUBPARTITIONS, mipIdx );
2669
31.0k
      bool alreadyIncluded = false;
2670
124k
      for (int modeListIdx = 0; modeListIdx < modeListSize; modeListIdx++)
2671
108k
      {
2672
108k
        if (tempRdModeList[modeListIdx] == mipMode)
2673
15.5k
        {
2674
15.5k
          alreadyIncluded = true;
2675
15.5k
          break;
2676
15.5k
        }
2677
108k
      }
2678
2679
31.0k
      if (!alreadyIncluded)
2680
15.5k
      {
2681
15.5k
        tempRdModeList.push_back(mipMode);
2682
15.5k
        tempCandCostList.push_back(0);
2683
15.5k
        if( fastMip ) break;
2684
15.5k
      }
2685
31.0k
    }
2686
15.5k
  }
2687
2688
17.4k
  candModeList = tempRdModeList;
2689
17.4k
  candCostList = tempCandCostList;
2690
17.4k
  numModesForFullRD = int(candModeList.size());
2691
17.4k
}
2692
2693
void IntraSearch::xPreCheckMTS(TransformUnit &tu, std::vector<TrMode> *trModes, const int maxCand, PelUnitBuf *predBuf, const ComponentID& compID)
2694
14.1k
{
2695
14.1k
  if (compID == COMP_Y)
2696
14.1k
  {
2697
14.1k
    CodingStructure&  cs = *tu.cs;
2698
14.1k
    const CompArea& area = tu.blocks[compID];
2699
14.1k
    const ReshapeData& reshapeData = cs.picture->reshapeData;
2700
14.1k
    const CodingUnit& cu = *cs.getCU(area.pos(), CH_L,TREE_D);
2701
14.1k
    PelBuf piPred = cs.getPredBuf(area);
2702
14.1k
    PelBuf piResi = cs.getResiBuf(area);
2703
2704
14.1k
    initIntraPatternChType(*tu.cu, area);
2705
14.1k
    if (predBuf)
2706
12.6k
    {
2707
12.6k
      piPred.copyFrom(predBuf->Y());
2708
12.6k
    }
2709
1.51k
    else if (CU::isMIP(cu, CH_L))
2710
1.49k
    {
2711
1.49k
      initIntraMip(cu);
2712
1.49k
      predIntraMip(piPred, cu);
2713
1.49k
    }
2714
21
    else
2715
21
    {
2716
21
      predIntraAng(COMP_Y, piPred, cu);
2717
21
    }
2718
2719
    //===== get residual signal =====
2720
14.1k
    if (cs.picHeader->lmcsEnabled && reshapeData.getCTUFlag())
2721
0
    {
2722
0
      piResi.subtract(cs.getRspOrgBuf(), piPred);
2723
0
    }
2724
14.1k
    else
2725
14.1k
    {
2726
14.1k
      CPelBuf piOrg = cs.getOrgBuf(COMP_Y);
2727
14.1k
      piResi.subtract(piOrg, piPred);
2728
14.1k
    }
2729
14.1k
    m_pcTrQuant->checktransformsNxN(tu, trModes, m_pcEncCfg->m_MTSIntraMaxCand, compID);
2730
14.1k
  }
2731
0
  else
2732
0
  {
2733
0
    ComponentID codeCompId = (tu.jointCbCr ? (tu.jointCbCr >> 1 ? COMP_Cb : COMP_Cr) : compID);
2734
0
    m_pcTrQuant->checktransformsNxN(tu, trModes, m_pcEncCfg->m_MTSIntraMaxCand, codeCompId);
2735
0
  }
2736
14.1k
}
2737
2738
double IntraSearch::xTestISP(CodingStructure& cs, Partitioner& subTuPartitioner, double bestCostForISP, PartSplit ispType, bool& splitcbf, uint64_t& singleFracBits, Distortion& singleDistLuma, CUCtx& cuCtx)
2739
13.9k
{
2740
13.9k
  int  subTuCounter = 0;
2741
13.9k
  bool earlySkipISP = false;
2742
13.9k
  bool splitCbfLuma = false;
2743
13.9k
  CodingUnit& cu = *cs.cus[0];
2744
2745
13.9k
  Distortion singleDistTmpLumaSUM = 0;
2746
13.9k
  uint64_t   singleTmpFracBitsSUM = 0;
2747
13.9k
  double     singleCostTmpSUM = 0;
2748
13.9k
  cuCtx.isDQPCoded = true;
2749
13.9k
  cuCtx.isChromaQpAdjCoded = true;
2750
2751
13.9k
  do
2752
18.1k
  {
2753
18.1k
    Distortion singleDistTmpLuma = 0;
2754
18.1k
    uint64_t   singleTmpFracBits = 0;
2755
18.1k
    double     singleCostTmp = 0;
2756
18.1k
    TransformUnit& tmpTUcur = ((cs.tus.size() < (subTuCounter + 1)))
2757
18.1k
      ? cs.addTU(CS::getArea(cs, subTuPartitioner.currArea(), subTuPartitioner.chType,
2758
3.49k
        subTuPartitioner.treeType),
2759
3.49k
        subTuPartitioner.chType, cs.cus[0])
2760
18.1k
      : *cs.tus[subTuCounter];
2761
18.1k
    tmpTUcur.depth = subTuPartitioner.currTrDepth;
2762
2763
    // Encode TU
2764
18.1k
    xIntraCodingTUBlock(tmpTUcur, COMP_Y, false, singleDistTmpLuma, 0);
2765
18.1k
    cuCtx.mtsLastScanPos = false;
2766
2767
18.1k
    if (singleDistTmpLuma == MAX_INT)   // all zero CBF skip
2768
0
    {
2769
0
      earlySkipISP = true;
2770
0
      singleCostTmpSUM = MAX_DOUBLE;
2771
0
      break;
2772
0
    }
2773
2774
18.1k
    if (m_pcRdCost->calcRdCost(singleTmpFracBitsSUM, singleDistTmpLumaSUM + singleDistTmpLuma) > bestCostForISP)
2775
4.75k
    {
2776
4.75k
      earlySkipISP = true;
2777
4.75k
    }
2778
13.3k
    else
2779
13.3k
    {
2780
13.3k
      m_ispTestedModes[0].IspType = ispType;
2781
13.3k
      m_ispTestedModes[0].subTuCounter = subTuCounter;
2782
13.3k
      singleTmpFracBits = xGetIntraFracBitsQT(cs, subTuPartitioner, true, &cuCtx);
2783
13.3k
    }
2784
18.1k
    singleCostTmp = m_pcRdCost->calcRdCost(singleTmpFracBits, singleDistTmpLuma);
2785
2786
18.1k
    singleCostTmpSUM     += singleCostTmp;
2787
18.1k
    singleDistTmpLumaSUM += singleDistTmpLuma;
2788
18.1k
    singleTmpFracBitsSUM += singleTmpFracBits;
2789
2790
18.1k
    subTuCounter++;
2791
2792
18.1k
    splitCbfLuma |= TU::getCbfAtDepth( *cs.getTU(subTuPartitioner.currArea().lumaPos(), subTuPartitioner.chType, subTuCounter - 1), 
2793
18.1k
                                       COMP_Y, subTuPartitioner.currTrDepth);
2794
18.1k
    int nSubPartitions = m_ispTestedModes[cu.lfnstIdx].numTotalParts[cu.ispMode - 1];
2795
18.1k
    bool doStop = (m_pcEncCfg->m_ISP != 1) || (subTuCounter < nSubPartitions);
2796
18.1k
    if (doStop)
2797
18.1k
    {
2798
18.1k
      if (singleCostTmpSUM > bestCostForISP)
2799
11.5k
      {
2800
11.5k
        earlySkipISP = true;
2801
11.5k
        break;
2802
11.5k
      }
2803
6.54k
      if (subTuCounter < nSubPartitions)
2804
5.21k
      {
2805
5.21k
        double threshold = nSubPartitions == 2 ? 0.95 : subTuCounter == 1 ? 0.83 : 0.91;
2806
5.21k
        if (singleCostTmpSUM > bestCostForISP * threshold)
2807
1.05k
        {
2808
1.05k
          earlySkipISP = true;
2809
1.05k
          break;
2810
1.05k
        }
2811
5.21k
      }
2812
6.54k
    }
2813
18.1k
  } while (subTuPartitioner.nextPart(cs));
2814
13.9k
  singleDistLuma = singleDistTmpLumaSUM;
2815
13.9k
  singleFracBits = singleTmpFracBitsSUM;
2816
2817
13.9k
  splitcbf = splitCbfLuma;
2818
13.9k
  return earlySkipISP ? MAX_DOUBLE : singleCostTmpSUM;
2819
13.9k
}
2820
2821
int IntraSearch::xSpeedUpISP(int speed, bool& testISP, int mode, int& noISP, int& endISP, CodingUnit& cu, static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM>& RdModeList, const ModeInfo& bestPUMode, int bestISP, int bestLfnstIdx)
2822
12.7k
{
2823
12.7k
  if (speed)
2824
5.15k
  {
2825
5.15k
    if (mode >= 1)
2826
2.72k
    {
2827
2.72k
      if (m_ispTestedModes[0].splitIsFinished[1] && m_ispTestedModes[0].splitIsFinished[0])
2828
0
      {
2829
0
        testISP = false;
2830
0
        endISP = 0;
2831
0
      }
2832
2.72k
      else
2833
2.72k
      {
2834
2.72k
        if (m_pcEncCfg->m_ISP >= 2)
2835
2.72k
        {
2836
2.72k
          if (mode == 1) //best Hor||Ver
2837
2.43k
          {
2838
2.43k
            int bestDir = 0;
2839
7.29k
            for (int d = 0; d < 2; d++)
2840
4.86k
            {
2841
4.86k
              int d2 = d ? 0 : 1;
2842
4.86k
              if ((m_ispTestedModes[0].bestCost[d] <= m_ispTestedModes[0].bestCost[d2])
2843
4.57k
                && (m_ispTestedModes[0].bestCost[d] != MAX_DOUBLE))
2844
290
              {
2845
290
                bestDir = d + 1;
2846
290
                m_ispTestedModes[0].splitIsFinished[d2] = true;
2847
290
              }
2848
4.86k
            }
2849
2.43k
            m_ispTestedModes[0].bestModeSoFar = bestDir;
2850
2.43k
            if (m_ispTestedModes[0].bestModeSoFar <= 0)
2851
2.14k
            {
2852
2.14k
              m_ispTestedModes[0].splitIsFinished[1] = true;
2853
2.14k
              m_ispTestedModes[0].splitIsFinished[0] = true;
2854
2.14k
              testISP = false;
2855
2.14k
              endISP = 0;
2856
2.14k
            }
2857
2.43k
          }
2858
2.72k
          if (m_ispTestedModes[0].bestModeSoFar == 2)
2859
72
          {
2860
72
            noISP = 1;
2861
72
          }
2862
2.65k
          else
2863
2.65k
          {
2864
2.65k
            endISP = 1;
2865
2.65k
          }
2866
2.72k
        }
2867
2.72k
      }
2868
2.72k
    }
2869
5.15k
    if (testISP)
2870
3.01k
    {
2871
3.01k
      if (mode == 2)
2872
290
      {
2873
870
        for (int d = 0; d < 2; d++)
2874
580
        {
2875
580
          int d2 = d ? 0 : 1;
2876
580
          if (m_ispTestedModes[0].bestCost[d] == MAX_DOUBLE)
2877
267
          {
2878
267
            m_ispTestedModes[0].splitIsFinished[d] = true;
2879
267
          }
2880
580
          if ((m_ispTestedModes[0].bestCost[d2] < 1.3 * m_ispTestedModes[0].bestCost[d])
2881
313
            && (int(m_ispTestedModes[0].bestSplitSoFar) != (d + 1)))
2882
234
          {
2883
234
            if (d)
2884
198
            {
2885
198
              endISP = 1;
2886
198
            }
2887
36
            else
2888
36
            {
2889
36
              noISP = 1;
2890
36
            }
2891
234
            m_ispTestedModes[0].splitIsFinished[d] = true;
2892
234
          }
2893
580
        }
2894
290
      }
2895
2.72k
      else
2896
2.72k
      {
2897
2.72k
        if (m_ispTestedModes[0].splitIsFinished[0])
2898
36
        {
2899
36
          noISP = 1;
2900
36
        }
2901
2.72k
        if (m_ispTestedModes[0].splitIsFinished[1])
2902
254
        {
2903
254
          endISP = 1;
2904
254
        }
2905
2.72k
      }
2906
3.01k
    }
2907
5.15k
    if ((noISP == 1) && (endISP == 1))
2908
23
    {
2909
23
      endISP = 0;
2910
23
    }
2911
5.15k
  }
2912
7.56k
  else
2913
7.56k
  {
2914
7.56k
    bool stopFound = false;
2915
7.56k
    if (m_pcEncCfg->m_ISP >= 3)
2916
7.56k
    {
2917
7.56k
      if (mode)
2918
2.70k
      {
2919
2.70k
        if ((bestISP == 0) || ((bestPUMode.modeId != RdModeList[mode - 1].modeId)
2920
97
          && (bestPUMode.modeId != RdModeList[mode].modeId)))
2921
1.81k
        {
2922
1.81k
          stopFound = true;
2923
1.81k
        }
2924
2.70k
      }
2925
7.56k
    }
2926
7.56k
    if (cu.mipFlag || cu.multiRefIdx)
2927
171
    {
2928
171
      cu.mipFlag = false;
2929
171
      cu.multiRefIdx = 0;
2930
171
      if (!stopFound)
2931
0
      {
2932
0
        for (int k = 0; k < mode; k++)
2933
0
        {
2934
0
          if (cu.intraDir[CH_L] == RdModeList[k].modeId)
2935
0
          {
2936
0
            stopFound = true;
2937
0
            break;
2938
0
          }
2939
0
        }
2940
0
      }
2941
171
    }
2942
7.56k
    if (stopFound)
2943
1.81k
    {
2944
1.81k
      testISP = false;
2945
1.81k
      endISP = 0;
2946
1.81k
      return 1;
2947
1.81k
    }
2948
5.74k
    if (!stopFound && (m_pcEncCfg->m_ISP >= 2) && (cu.intraDir[CH_L] == DC_IDX))
2949
888
    {
2950
888
      stopFound = true;
2951
888
      endISP = 0;
2952
888
      return 1;
2953
888
    }
2954
5.74k
  }
2955
10.0k
  return 0;
2956
12.7k
}
2957
2958
void IntraSearch::xSpeedUpIntra(double bestcost, int& EndMode, int& speedIntra, CodingUnit& cu)
2959
22.7k
{
2960
22.7k
  int bestIdxbefore = m_ispTestedModes[0].bestIntraMode;
2961
22.7k
  if (m_ispTestedModes[0].isIntra)
2962
0
  {
2963
0
    if (bestIdxbefore == 1)//ISP
2964
0
    {
2965
0
      speedIntra = 14;
2966
0
    }
2967
0
    if (bestIdxbefore == 4)//MTS
2968
0
    {
2969
0
      speedIntra = 3;
2970
0
    }
2971
0
  }
2972
22.7k
  else if (!cu.cs->slice->isIntra())
2973
0
  {
2974
0
    if (bestcost != MAX_DOUBLE)
2975
0
    {
2976
0
      speedIntra = 10;
2977
0
    }
2978
0
  }
2979
22.7k
  if (m_ispTestedModes[0].bestBefore[0] == -1)
2980
20.2k
  {
2981
20.2k
    speedIntra |= 7;
2982
20.2k
    if (m_pcEncCfg->m_FastIntraTools == 2)
2983
0
    {
2984
0
      EndMode = 1;
2985
0
    }
2986
20.2k
  }
2987
22.7k
  if (!cu.cs->slice->isIntra())
2988
0
  {
2989
0
    if ((m_ispTestedModes[0].bestBefore[1] == 1) || (m_ispTestedModes[0].bestBefore[2] == 1))
2990
0
    {
2991
0
      speedIntra |= 2;
2992
0
    }
2993
0
    if ((m_ispTestedModes[0].bestBefore[1] == 4) || (m_ispTestedModes[0].bestBefore[2] == 4))
2994
0
    {
2995
0
      speedIntra |= 3;
2996
0
    }
2997
0
    if ((m_ispTestedModes[0].bestBefore[1] == 2) || (m_ispTestedModes[0].bestBefore[2] == 2))
2998
0
    {
2999
0
      speedIntra |= 1;
3000
0
    }
3001
0
  }
3002
22.7k
}
3003
3004
} // namespace vvenc
3005
3006
//! \}
3007