Coverage Report

Created: 2026-04-01 07:49

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/vvenc/source/Lib/EncoderLib/IntraSearch.cpp
Line
Count
Source
1
/* -----------------------------------------------------------------------------
2
The copyright in this software is being made available under the Clear BSD
3
License, included below. No patent rights, trademark rights and/or 
4
other Intellectual Property Rights other than the copyrights concerning 
5
the Software are granted under this license.
6
7
The Clear BSD License
8
9
Copyright (c) 2019-2026, Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. & The VVenC Authors.
10
All rights reserved.
11
12
Redistribution and use in source and binary forms, with or without modification,
13
are permitted (subject to the limitations in the disclaimer below) provided that
14
the following conditions are met:
15
16
     * Redistributions of source code must retain the above copyright notice,
17
     this list of conditions and the following disclaimer.
18
19
     * Redistributions in binary form must reproduce the above copyright
20
     notice, this list of conditions and the following disclaimer in the
21
     documentation and/or other materials provided with the distribution.
22
23
     * Neither the name of the copyright holder nor the names of its
24
     contributors may be used to endorse or promote products derived from this
25
     software without specific prior written permission.
26
27
NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
28
THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
29
CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
31
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
32
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
33
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
34
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
35
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
36
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38
POSSIBILITY OF SUCH DAMAGE.
39
40
41
------------------------------------------------------------------------------------------- */
42
43
44
/** \file     EncSearch.cpp
45
 *  \brief    encoder intra search class
46
 */
47
48
#include "IntraSearch.h"
49
#include "EncPicture.h"
50
#include "CommonLib/CommonDef.h"
51
#include "CommonLib/Rom.h"
52
#include "CommonLib/Picture.h"
53
#include "CommonLib/UnitTools.h"
54
#include "CommonLib/dtrace_next.h"
55
#include "CommonLib/dtrace_buffer.h"
56
#include "CommonLib/Reshape.h"
57
#include <math.h>
58
#include "vvenc/vvencCfg.h"
59
60
//! \ingroup EncoderLib
61
//! \{
62
63
namespace vvenc {
64
65
#define PLTCtx(c) SubCtx( Ctx::Palette, c )
66
67
IntraSearch::IntraSearch()
68
0
  : m_pSaveCS       (nullptr)
69
0
  , m_pcEncCfg      (nullptr)
70
0
  , m_pcTrQuant     (nullptr)
71
0
  , m_pcRdCost      (nullptr)
72
0
  , m_CABACEstimator(nullptr)
73
0
  , m_CtxCache      (nullptr)
74
0
{
75
0
}
76
77
void IntraSearch::init(const VVEncCfg &encCfg, TrQuant *pTrQuant, RdCost *pRdCost, SortedPelUnitBufs<SORTED_BUFS> *pSortedPelUnitBufs, XUCache &unitCache )
78
0
{
79
0
  IntraPrediction::init( encCfg.m_internChromaFormat, encCfg.m_internalBitDepth[ CH_L ] );
80
81
0
  m_pcEncCfg          = &encCfg;
82
0
  m_pcTrQuant         = pTrQuant;
83
0
  m_pcRdCost          = pRdCost;
84
0
  m_SortedPelUnitBufs = pSortedPelUnitBufs;
85
86
0
  const ChromaFormat chrFormat = encCfg.m_internChromaFormat;
87
0
  const int maxCUSize          = encCfg.m_CTUSize;
88
89
0
  Area area = Area( 0, 0, maxCUSize, maxCUSize );
90
91
0
  m_pTempCS = new CodingStructure( unitCache, nullptr );
92
0
  m_pBestCS = new CodingStructure( unitCache, nullptr );
93
94
0
  m_pTempCS->createForSearch( chrFormat, area );
95
0
  m_pBestCS->createForSearch( chrFormat, area );
96
97
0
  const int uiNumSaveLayersToAllocate = 3;
98
0
  m_pSaveCS = new CodingStructure*[uiNumSaveLayersToAllocate];
99
0
  for( int layer = 0; layer < uiNumSaveLayersToAllocate; layer++ )
100
0
  {
101
0
    m_pSaveCS[ layer ] = new CodingStructure( unitCache, nullptr );
102
0
    m_pSaveCS[ layer ]->createForSearch( chrFormat, Area( 0, 0, maxCUSize, maxCUSize ) );
103
0
    m_pSaveCS[ layer ]->initStructData();
104
0
  }
105
106
0
  CompArea chromaArea( COMP_Cb, chrFormat, area, true );
107
0
  for( int i = 0; i < 5; i++ )
108
0
  {
109
0
    m_orgResiCb[i].create( chromaArea );
110
0
    m_orgResiCr[i].create( chromaArea );
111
0
  }
112
0
}
113
114
void IntraSearch::destroy()
115
0
{
116
0
  if ( m_pSaveCS )
117
0
  {
118
0
    const int uiNumSaveLayersToAllocate = 3;
119
0
    for( int layer = 0; layer < uiNumSaveLayersToAllocate; layer++ )
120
0
    {
121
0
      if ( m_pSaveCS[ layer ] ) { m_pSaveCS[ layer ]->destroy(); delete m_pSaveCS[ layer ]; }
122
0
    }
123
0
    delete[] m_pSaveCS;
124
0
    m_pSaveCS = nullptr;
125
0
  }
126
127
0
  if( m_pTempCS )
128
0
  {
129
0
    m_pTempCS->destroy();
130
0
    delete m_pTempCS; m_pTempCS = nullptr;
131
0
  }
132
133
0
  if( m_pBestCS )
134
0
  {
135
0
    m_pBestCS->destroy();
136
0
    delete m_pBestCS; m_pBestCS = nullptr;
137
0
  }
138
0
}
139
140
IntraSearch::~IntraSearch()
141
0
{
142
0
  destroy();
143
0
}
144
145
void IntraSearch::setCtuEncRsrc( CABACWriter* cabacEstimator, CtxCache *ctxCache )
146
0
{
147
0
  m_CABACEstimator = cabacEstimator;
148
0
  m_CtxCache       = ctxCache;
149
0
}
150
151
//////////////////////////////////////////////////////////////////////////
152
// INTRA PREDICTION
153
//////////////////////////////////////////////////////////////////////////
154
static constexpr double COST_UNKNOWN = -65536.0;
155
156
double IntraSearch::xFindInterCUCost( CodingUnit &cu )
157
0
{
158
0
  if( CU::isConsIntra(cu) && !cu.slice->isIntra() )
159
0
  {
160
    //search corresponding inter CU cost
161
0
    for( int i = 0; i < m_numCuInSCIPU; i++ )
162
0
    {
163
0
      if( cu.lumaPos() == m_cuAreaInSCIPU[i].pos() && cu.lumaSize() == m_cuAreaInSCIPU[i].size() )
164
0
      {
165
0
        return m_cuCostInSCIPU[i];
166
0
      }
167
0
    }
168
0
  }
169
0
  return COST_UNKNOWN;
170
0
}
171
172
void IntraSearch::xEstimateLumaRdModeList(int& numModesForFullRD,
173
  static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM>& RdModeList,
174
  static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM>& HadModeList,
175
  static_vector<double, FAST_UDI_MAX_RDMODE_NUM>& CandCostList,
176
  static_vector<double, FAST_UDI_MAX_RDMODE_NUM>& CandHadList, CodingUnit& cu, bool testMip )
177
0
{
178
0
  PROFILER_SCOPE_AND_STAGE_EXT( 1, _TPROF, P_INTRA_EST_RD_CAND, cu.cs, CH_L );
179
0
  const uint16_t intra_ctx_size = Ctx::IntraLumaMpmFlag.size() + Ctx::IntraLumaPlanarFlag.size() + Ctx::MultiRefLineIdx.size() + Ctx::ISPMode.size() + Ctx::MipFlag.size();
180
0
  const TempCtx  ctxStartIntraCtx(m_CtxCache, SubCtx(CtxSet(Ctx::IntraLumaMpmFlag(), intra_ctx_size), m_CABACEstimator->getCtx()));
181
0
  const double   sqrtLambdaForFirstPass = m_pcRdCost->getMotionLambda() * FRAC_BITS_SCALE;
182
0
  const int numModesAvailable = NUM_LUMA_MODE; // total number of Intra modes
183
184
0
  CHECK(numModesForFullRD >= numModesAvailable, "Too many modes for full RD search");
185
186
0
  const SPS& sps     = *cu.cs->sps;
187
0
  const bool fastMip = sps.MIP && m_pcEncCfg->m_useFastMIP;
188
189
  // this should always be true
190
0
  CHECK( !cu.Y().valid(), "CU is not valid" );
191
192
0
  const CompArea& area = cu.Y();
193
194
0
  const UnitArea localUnitArea(area.chromaFormat, Area(0, 0, area.width, area.height));
195
0
  if( testMip)
196
0
  {
197
0
    numModesForFullRD += fastMip ? numModesForFullRD - std::min( m_pcEncCfg->m_useFastMIP, numModesForFullRD )
198
0
                                 : numModesForFullRD;
199
0
    m_SortedPelUnitBufs->prepare( localUnitArea, numModesForFullRD + 1 );
200
0
  }
201
0
  else
202
0
  {
203
0
    m_SortedPelUnitBufs->prepare( localUnitArea, numModesForFullRD );
204
0
  }
205
206
0
  CPelBuf piOrg   = cu.cs->getOrgBuf(COMP_Y);
207
0
  PelBuf piPred  = m_SortedPelUnitBufs->getTestBuf(COMP_Y);
208
209
0
  const ReshapeData& reshapeData = cu.cs->picture->reshapeData;
210
0
  if (cu.cs->picHeader->lmcsEnabled && reshapeData.getCTUFlag())
211
0
  {
212
0
    piOrg = cu.cs->getRspOrgBuf();
213
0
  }
214
0
  DistParam distParam    = m_pcRdCost->setDistParam( piOrg, piPred, sps.bitDepths[ CH_L ], DF_HAD_2SAD); // Use HAD (SATD) cost
215
216
0
  const int numHadCand = (testMip ? 2 : 1) * 3;
217
218
  //*** Derive (regular) candidates using Hadamard
219
0
  cu.mipFlag = false;
220
0
  cu.multiRefIdx = 0;
221
222
  //===== init pattern for luma prediction =====
223
0
  initIntraPatternChType(cu, cu.Y(), true);
224
225
0
  bool satdChecked[NUM_INTRA_MODE] = { false };
226
227
0
  unsigned mpmLst[NUM_MOST_PROBABLE_MODES];
228
0
  CU::getIntraMPMs(cu, mpmLst);
229
230
0
  const int decMsk = ( 1 << m_pcEncCfg->m_IntraEstDecBit ) - 1;
231
232
0
  m_parentCandList.resize( 0 );
233
0
  m_parentCandList.reserve( ( numModesAvailable >> m_pcEncCfg->m_IntraEstDecBit ) + 2 );
234
235
0
  for( unsigned mode = 0; mode < numModesAvailable; mode++ )
236
0
  {
237
    // Skip checking extended Angular modes in the first round of SATD
238
0
    if( mode > DC_IDX && ( mode & decMsk ) )
239
0
    {
240
0
      continue;
241
0
    }
242
243
0
    m_parentCandList.push_back( ModeInfo( false, false, 0, NOT_INTRA_SUBPARTITIONS, mode ) );
244
0
  }
245
   
246
0
  for( int decDst = 1 << m_pcEncCfg->m_IntraEstDecBit; decDst > 0; decDst >>= 1 )
247
0
  {
248
0
    for( unsigned idx = 0; idx < m_parentCandList.size(); idx++ )
249
0
    {
250
0
      int modeParent = m_parentCandList[idx].modeId;
251
252
0
      int off = decDst & decMsk;
253
0
      int inc = decDst << 1;
254
255
0
#if 1 // INTRA_AS_IN_VTM
256
0
      if( off != 0 && ( modeParent <= ( DC_IDX + 1 ) || modeParent >= ( NUM_LUMA_MODE - 1 ) ) )
257
0
      {
258
0
        continue;
259
0
      }
260
261
0
#endif
262
0
      for( int mode = modeParent - off; mode < modeParent + off + 1; mode += inc )
263
0
      {
264
0
        if( satdChecked[mode] || mode < 0 || mode >= NUM_LUMA_MODE )
265
0
        {
266
0
          continue;
267
0
        }
268
269
0
        cu.intraDir[0] = mode;
270
271
0
        initPredIntraParams( cu, cu.Y(), sps );
272
0
        distParam.cur.buf = piPred.buf = m_SortedPelUnitBufs->getTestBuf().Y().buf;
273
0
        predIntraAng( COMP_Y, piPred, cu );
274
275
        // Use the min between SAD and HAD as the cost criterion
276
        // SAD is scaled by 2 to align with the scaling of HAD
277
0
        Distortion minSadHad = distParam.distFunc( distParam );
278
279
0
        uint64_t fracModeBits = xFracModeBitsIntraLuma( cu, mpmLst );
280
281
        //restore ctx
282
0
        m_CABACEstimator->getCtx() = SubCtx( CtxSet( Ctx::IntraLumaMpmFlag(), intra_ctx_size ), ctxStartIntraCtx );
283
284
0
        double cost = ( double ) minSadHad + ( double ) fracModeBits * sqrtLambdaForFirstPass;
285
0
        DTRACE( g_trace_ctx, D_INTRA_COST, "IntraHAD: %u, %llu, %f (%d)\n", minSadHad, fracModeBits, cost, mode );
286
287
0
        int insertPos = -1;
288
0
        updateCandList( ModeInfo( false, false, 0, NOT_INTRA_SUBPARTITIONS, mode ), cost, RdModeList, CandCostList, numModesForFullRD, &insertPos );
289
0
        updateCandList( ModeInfo( false, false, 0, NOT_INTRA_SUBPARTITIONS, mode ), ( double ) minSadHad, HadModeList, CandHadList, numHadCand );
290
0
        m_SortedPelUnitBufs->insert( insertPos, ( int ) RdModeList.size() );
291
292
0
        satdChecked[mode] = true;
293
0
      }
294
0
    }
295
296
0
    m_parentCandList.resize( RdModeList.size() );
297
0
    std::copy( RdModeList.cbegin(), RdModeList.cend(), m_parentCandList.begin() );
298
0
  }
299
300
0
  const bool isFirstLineOfCtu = (((cu.block(COMP_Y).y)&((cu.cs->sps)->CTUSize - 1)) == 0);
301
0
  if( m_pcEncCfg->m_MRL && ! isFirstLineOfCtu )
302
0
  {
303
0
    cu.multiRefIdx = 1;
304
0
    unsigned  multiRefMPM [NUM_MOST_PROBABLE_MODES];
305
0
    CU::getIntraMPMs(cu, multiRefMPM);
306
307
0
    for (int mRefNum = 1; mRefNum < MRL_NUM_REF_LINES; mRefNum++)
308
0
    {
309
0
      int multiRefIdx = MULTI_REF_LINE_IDX[mRefNum];
310
311
0
      cu.multiRefIdx = multiRefIdx;
312
0
      initIntraPatternChType(cu, cu.Y(), true);
313
314
0
      for (int x = 1; x < NUM_MOST_PROBABLE_MODES; x++)
315
0
      {
316
0
        cu.intraDir[0] = multiRefMPM[x];
317
0
        initPredIntraParams(cu, cu.Y(), sps);
318
0
        distParam.cur.buf = piPred.buf = m_SortedPelUnitBufs->getTestBuf().Y().buf;
319
0
        predIntraAng(COMP_Y, piPred, cu);
320
321
        // Use the min between SAD and SATD as the cost criterion
322
        // SAD is scaled by 2 to align with the scaling of HAD
323
0
        Distortion minSadHad = distParam.distFunc(distParam);
324
325
        // NB xFracModeBitsIntra will not affect the mode for chroma that may have already been pre-estimated.
326
0
        uint64_t fracModeBits = xFracModeBitsIntraLuma( cu, mpmLst );
327
328
        //restore ctx
329
0
        m_CABACEstimator->getCtx() = SubCtx(CtxSet(Ctx::IntraLumaMpmFlag(), intra_ctx_size), ctxStartIntraCtx);
330
331
0
        double cost = (double) minSadHad + (double) fracModeBits * sqrtLambdaForFirstPass;
332
//        DTRACE(g_trace_ctx, D_INTRA_COST, "IntraMRL: %u, %llu, %f (%d)\n", minSadHad, fracModeBits, cost, cu.intraDir[0]);
333
334
0
        int insertPos = -1;
335
0
        updateCandList( ModeInfo( false, false, multiRefIdx, NOT_INTRA_SUBPARTITIONS, cu.intraDir[0] ), cost, RdModeList,  CandCostList, numModesForFullRD, &insertPos );
336
0
        updateCandList( ModeInfo( false, false, multiRefIdx, NOT_INTRA_SUBPARTITIONS, cu.intraDir[0] ), (double)minSadHad, HadModeList, CandHadList,  numHadCand );
337
0
        m_SortedPelUnitBufs->insert(insertPos, (int)RdModeList.size());
338
0
      }
339
0
    }
340
0
    cu.multiRefIdx = 0;
341
0
  }
342
343
0
  if (testMip)
344
0
  {
345
0
    cu.mipFlag = true;
346
0
    cu.multiRefIdx = 0;
347
348
0
    double mipHadCost[MAX_NUM_MIP_MODE] = { MAX_DOUBLE };
349
350
0
    initIntraPatternChType(cu, cu.Y());
351
0
    initIntraMip( cu );
352
353
0
    const int transpOff    = getNumModesMip( cu.Y() );
354
0
    const int numModesFull = (transpOff << 1);
355
0
    for( uint32_t uiModeFull = 0; uiModeFull < numModesFull; uiModeFull++ )
356
0
    {
357
0
      const bool     isTransposed = (uiModeFull >= transpOff ? true : false);
358
0
      const uint32_t uiMode       = (isTransposed ? uiModeFull - transpOff : uiModeFull);
359
360
0
      cu.mipTransposedFlag = isTransposed;
361
0
      cu.intraDir[CH_L] = uiMode;
362
0
      distParam.cur.buf = piPred.buf = m_SortedPelUnitBufs->getTestBuf().Y().buf;
363
0
      predIntraMip(piPred, cu);
364
365
      // Use the min between SAD and HAD as the cost criterion
366
      // SAD is scaled by 2 to align with the scaling of HAD
367
0
      Distortion minSadHad = distParam.distFunc(distParam);
368
369
0
      uint64_t fracModeBits = xFracModeBitsIntraLuma( cu, mpmLst );
370
371
      //restore ctx
372
0
      m_CABACEstimator->getCtx() = SubCtx(CtxSet(Ctx::IntraLumaMpmFlag(), intra_ctx_size), ctxStartIntraCtx);
373
374
0
      double cost = double(minSadHad) + double(fracModeBits) * sqrtLambdaForFirstPass;
375
0
      mipHadCost[uiModeFull] = cost;
376
0
      DTRACE(g_trace_ctx, D_INTRA_COST, "IntraMIP: %u, %llu, %f (%d)\n", minSadHad, fracModeBits, cost, uiModeFull);
377
378
0
      int insertPos = -1;
379
0
      updateCandList( ModeInfo( true, isTransposed, 0, NOT_INTRA_SUBPARTITIONS, cu.intraDir[0] ), cost, RdModeList,  CandCostList, numModesForFullRD+1, &insertPos );
380
0
      updateCandList( ModeInfo( true, isTransposed, 0, NOT_INTRA_SUBPARTITIONS, cu.intraDir[0] ), 0.8*(double)minSadHad, HadModeList, CandHadList,  numHadCand );
381
0
      m_SortedPelUnitBufs->insert(insertPos, (int)RdModeList.size());
382
0
    }
383
384
0
    const double thresholdHadCost = 1.0 + 1.4 / sqrt((double)(cu.lwidth()*cu.lheight()));
385
0
    xReduceHadCandList(RdModeList, CandCostList, *m_SortedPelUnitBufs, numModesForFullRD, thresholdHadCost, mipHadCost, cu, fastMip);
386
0
  }
387
388
0
  if( m_pcEncCfg->m_bFastUDIUseMPMEnabled )
389
0
  {
390
0
    const int numMPMs = NUM_MOST_PROBABLE_MODES;
391
0
    unsigned  intraMpms[numMPMs];
392
393
0
    cu.multiRefIdx = 0;
394
395
0
    const int numCand = CU::getIntraMPMs( cu, intraMpms );
396
0
    ModeInfo mostProbableMode(false, false, 0, NOT_INTRA_SUBPARTITIONS, 0);
397
398
0
    for( int j = 0; j < numCand; j++ )
399
0
    {
400
0
      bool mostProbableModeIncluded = false;
401
0
      mostProbableMode.modeId = intraMpms[j];
402
403
0
      for( int i = 0; i < numModesForFullRD; i++ )
404
0
      {
405
0
        mostProbableModeIncluded |= ( mostProbableMode == RdModeList[i] );
406
0
      }
407
0
      if( !mostProbableModeIncluded )
408
0
      {
409
0
        numModesForFullRD++;
410
0
        RdModeList.push_back( mostProbableMode );
411
0
        CandCostList.push_back(0);
412
0
      }
413
0
    }
414
0
  }
415
0
}
416
417
bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, double bestCost)
418
0
{
419
0
  CodingStructure       &cs           = *cu.cs;
420
0
  const int             width         = partitioner.currArea().lwidth();
421
0
  const int             height        = partitioner.currArea().lheight();
422
423
  //===== loop over partitions =====
424
425
0
  const TempCtx ctxStart           ( m_CtxCache, m_CABACEstimator->getCtx() );
426
427
  // variables for saving fast intra modes scan results across multiple LFNST passes
428
0
  double costInterCU = xFindInterCUCost( cu );
429
430
0
  bool validReturn = false;
431
432
  //===== determine set of modes to be tested (using prediction signal only) =====
433
0
  int numModesAvailable = NUM_LUMA_MODE; // total number of Intra modes
434
0
  static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM> RdModeList;
435
0
  static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM> HadModeList;
436
0
  static_vector<double, FAST_UDI_MAX_RDMODE_NUM> CandCostList;
437
0
  static_vector<double, FAST_UDI_MAX_RDMODE_NUM> CandHadList;
438
439
0
  int numModesForFullRD = g_aucIntraModeNumFast_UseMPM_2D[Log2(width) - MIN_CU_LOG2][Log2(height) - MIN_CU_LOG2];
440
0
  if (m_pcEncCfg->m_numIntraModesFullRD > 0)
441
0
    numModesForFullRD=m_pcEncCfg->m_numIntraModesFullRD;
442
443
#if INTRA_FULL_SEARCH
444
  numModesForFullRD = numModesAvailable;
445
#endif
446
0
  const SPS& sps = *cu.cs->sps;
447
0
  const bool mipAllowed = sps.MIP && cu.lwidth() <= sps.getMaxTbSize() && cu.lheight() <= sps.getMaxTbSize() && ((cu.lfnstIdx == 0) || allowLfnstWithMip(cu.lumaSize()));
448
0
  const int SizeThr     = 8 >> std::max( 0, m_pcEncCfg->m_useFastMIP - 1 );
449
0
  const bool testMip    = mipAllowed && ( cu.lwidth() <= ( SizeThr * cu.lheight() ) && cu.lheight() <= ( SizeThr * cu.lwidth() ) ) && ( cu.lwidth() <= MIP_MAX_WIDTH && cu.lheight() <= MIP_MAX_HEIGHT );
450
0
  bool testISP = sps.ISP && CU::canUseISP(width, height, cu.cs->sps->getMaxTbSize());
451
0
  if (testISP)
452
0
  {
453
0
    int numTotalPartsHor = (int)width >> floorLog2(CU::getISPSplitDim(width, height, TU_1D_VERT_SPLIT));
454
0
    int numTotalPartsVer = (int)height >> floorLog2(CU::getISPSplitDim(width, height, TU_1D_HORZ_SPLIT));
455
0
    m_ispTestedModes[0].init(numTotalPartsHor, numTotalPartsVer, 0);
456
    // the total number of subpartitions is modified to take into account the cases where LFNST cannot be combined with
457
    // ISP due to size restrictions
458
0
    numTotalPartsHor = sps.LFNST && CU::canUseLfnstWithISP(cu.Y(), HOR_INTRA_SUBPARTITIONS) ? numTotalPartsHor : 0;
459
0
    numTotalPartsVer = sps.LFNST && CU::canUseLfnstWithISP(cu.Y(), VER_INTRA_SUBPARTITIONS) ? numTotalPartsVer : 0;
460
0
    for (int j = 1; j < NUM_LFNST_NUM_PER_SET; j++)
461
0
    {
462
0
      m_ispTestedModes[j].init(numTotalPartsHor, numTotalPartsVer, 0);
463
0
    }
464
0
    testISP = m_ispTestedModes[0].numTotalParts[0];
465
0
  }
466
0
  else
467
0
  {
468
0
    m_ispTestedModes[0].init(0, 0, 0);
469
0
  }
470
471
0
  xEstimateLumaRdModeList(numModesForFullRD, RdModeList, HadModeList, CandCostList, CandHadList, cu, testMip);
472
473
0
  CHECK( (size_t)numModesForFullRD != RdModeList.size(), "Inconsistent state!" );
474
475
  // after this point, don't use numModesForFullRD
476
0
  if( m_pcEncCfg->m_usePbIntraFast && !cs.slice->isIntra() && RdModeList.size() < numModesAvailable )
477
0
  {
478
0
    double pbintraRatio = m_pcEncCfg->m_usePbIntraFast == 1 && ( cs.area.lwidth() >= 16 && cs.area.lheight() >= 16 ) ? 1.2 : PBINTRA_RATIO;
479
480
0
    int maxSize = -1;
481
0
    ModeInfo bestMipMode;
482
0
    int bestMipIdx = -1;
483
0
    for( int idx = 0; idx < RdModeList.size(); idx++ )
484
0
    {
485
0
      if( RdModeList[idx].mipFlg )
486
0
      {
487
0
        bestMipMode = RdModeList[idx];
488
0
        bestMipIdx = idx;
489
0
        break;
490
0
      }
491
0
    }
492
0
    const int numHadCand = 3;
493
0
    for (int k = numHadCand - 1; k >= 0; k--)
494
0
    {
495
0
      if (CandHadList.size() < (k + 1) || CandHadList[k] > cs.interHad * pbintraRatio) { maxSize = k; }
496
0
    }
497
0
    if (maxSize > 0)
498
0
    {
499
0
      RdModeList.resize(std::min<size_t>(RdModeList.size(), maxSize));
500
0
      if( bestMipIdx >= 0 )
501
0
      {
502
0
        if( RdModeList.size() <= bestMipIdx )
503
0
        {
504
0
          RdModeList.push_back(bestMipMode);
505
0
          m_SortedPelUnitBufs->swap( maxSize, bestMipIdx );
506
0
        }
507
0
      }
508
0
    }
509
0
    if (maxSize == 0)
510
0
    {
511
0
      cs.dist = MAX_DISTORTION;
512
0
      cs.interHad = 0;
513
0
      return false;
514
0
    }
515
0
  }
516
517
  //===== check modes (using r-d costs) =====
518
0
  ModeInfo bestPUMode;
519
520
0
  CodingStructure *csTemp = m_pTempCS;
521
0
  CodingStructure *csBest = m_pBestCS;
522
523
0
  csTemp->slice   = csBest->slice   = cs.slice;
524
0
  csTemp->picture = csBest->picture = cs.picture;
525
0
  csTemp->compactResize( cu );
526
0
  csBest->compactResize( cu );
527
0
  csTemp->initStructData();
528
0
  csBest->initStructData();
529
530
0
  int   bestLfnstIdx  = 0;
531
0
  const bool useBDPCM = cs.picture->useBDPCM;
532
0
  int   NumBDPCMCand  = (useBDPCM && sps.BDPCM && CU::bdpcmAllowed(cu, ComponentID(partitioner.chType))) ? 2 : 0;
533
0
  int   bestbdpcmMode = 0;
534
0
  int   bestISP       = 0;
535
0
  int   bestMrl       = 0;
536
0
  bool  bestMip       = 0;
537
0
  int   EndMode       = (int)RdModeList.size();
538
0
  bool  useISPlfnst   = testISP && sps.LFNST;
539
0
  bool  noLFNST_ts    = false;
540
0
  double bestCostIsp[2] = { MAX_DOUBLE, MAX_DOUBLE };
541
0
  bool disableMTS = false;
542
0
  bool disableLFNST = false;
543
0
  bool disableDCT2test = false;
544
0
  if (m_pcEncCfg->m_FastIntraTools)
545
0
  {
546
0
    int speedIntra = 0;
547
0
    xSpeedUpIntra(bestCost, EndMode, speedIntra, cu);
548
0
    disableMTS = (speedIntra >> 2 ) & 0x1;
549
0
    disableLFNST = (speedIntra >> 1) & 0x1;
550
0
    disableDCT2test = speedIntra>>3;
551
0
    if (disableLFNST)
552
0
    {
553
0
      noLFNST_ts = true;
554
0
      useISPlfnst = false;
555
0
    }
556
0
    if (speedIntra & 0x1)
557
0
    {
558
0
      testISP = false;
559
0
    }
560
0
  }
561
562
0
  for (int mode_cur = 0; mode_cur < EndMode + NumBDPCMCand; mode_cur++)
563
0
  {
564
0
    int mode = mode_cur;
565
0
    if (mode_cur >= EndMode)
566
0
    {
567
0
      mode = mode_cur - EndMode ? -1 : -2;
568
0
      testISP = false;
569
0
    }
570
    // set CU/PU to luma prediction mode
571
0
    ModeInfo testMode;
572
0
    int noISP = 0;
573
0
    int endISP = testISP ? 2 : 0;
574
0
    bool noLFNST = false || noLFNST_ts;
575
0
    if (mode && useISPlfnst)
576
0
    {
577
0
      noLFNST |= (bestCostIsp[0] > (bestCostIsp[1] * 1.4));
578
0
      if (mode > 2)
579
0
      {
580
0
        endISP = 0;
581
0
        testISP = false;
582
0
      }
583
0
    }
584
0
    if (testISP)
585
0
    {
586
0
      xSpeedUpISP(1, testISP, mode, noISP, endISP, cu, RdModeList, bestPUMode, bestISP, bestLfnstIdx);
587
0
    }
588
0
    int startISP = 0;
589
0
    if (disableDCT2test && mode && bestISP)
590
0
    {
591
0
      startISP = endISP ? 1 : 0;
592
0
    }
593
0
    for (int ispM = startISP; ispM <= endISP; ispM++)
594
0
    {
595
0
      if (ispM && (ispM == noISP))
596
0
      {
597
0
        continue;
598
0
      }
599
600
0
      if (mode < 0)
601
0
      {
602
0
        cu.bdpcmM[CH_L] = -mode;
603
0
        testMode = ModeInfo(false, false, 0, NOT_INTRA_SUBPARTITIONS, cu.bdpcmM[CH_L] == 2 ? VER_IDX : HOR_IDX);
604
0
      }
605
0
      else
606
0
      {
607
0
        testMode = RdModeList[mode];
608
0
        cu.bdpcmM[CH_L] = 0;
609
0
      }
610
611
0
      cu.ispMode = ispM;
612
0
      cu.mipFlag = testMode.mipFlg;
613
0
      cu.mipTransposedFlag = testMode.mipTrFlg;
614
0
      cu.multiRefIdx = testMode.mRefId;
615
0
      cu.intraDir[CH_L] = testMode.modeId;
616
0
      if (cu.ispMode && xSpeedUpISP(0, testISP, mode, noISP, endISP, cu, RdModeList, bestPUMode, bestISP, 0) )
617
0
      {
618
0
        continue;
619
0
      }
620
0
      if (m_pcEncCfg->m_FastIntraTools && (cu.ispMode || sps.LFNST || sps.MTS))
621
0
      {
622
0
        m_ispTestedModes[0].intraWasTested = true;
623
0
      }
624
0
      CHECK(cu.mipFlag && cu.multiRefIdx, "Error: combination of MIP and MRL not supported");
625
0
      CHECK(cu.multiRefIdx && (cu.intraDir[0] == PLANAR_IDX), "Error: combination of MRL and Planar mode not supported");
626
0
      CHECK(cu.ispMode && cu.mipFlag, "Error: combination of ISP and MIP not supported");
627
0
      CHECK(cu.ispMode && cu.multiRefIdx, "Error: combination of ISP and MRL not supported");
628
629
      // determine residual for partition
630
0
      cs.initSubStructure(*csTemp, partitioner.chType, cs.area, true);
631
0
      int doISP = (((cu.ispMode == 0) && noLFNST) || (useISPlfnst && mode && cu.ispMode && (bestLfnstIdx == 0)) || disableLFNST) ? -mode : mode;
632
0
      xIntraCodingLumaQT(*csTemp, partitioner, m_SortedPelUnitBufs->getBufFromSortedList(mode), bestCost, doISP, disableMTS);
633
634
0
      DTRACE(g_trace_ctx, D_INTRA_COST, "IntraCost T [x=%d,y=%d,w=%d,h=%d] %f (%d,%d,%d,%d,%d,%d) \n", cu.blocks[0].x,
635
0
        cu.blocks[0].y, width, height, csTemp->cost, testMode.modeId, testMode.ispMod,
636
0
        cu.multiRefIdx, cu.mipFlag, cu.lfnstIdx, cu.mtsFlag);
637
638
0
      if (cu.ispMode && !csTemp->cus[0]->firstTU->cbf[COMP_Y])
639
0
      {
640
0
        csTemp->cost = MAX_DOUBLE;
641
0
        csTemp->costDbOffset = 0;
642
0
      }
643
0
      if (useISPlfnst)
644
0
      {
645
0
        int n = (cu.ispMode == 0) ? 0 : 1;
646
0
        bestCostIsp[n] = csTemp->cost < bestCostIsp[n] ? csTemp->cost : bestCostIsp[n];
647
0
      }
648
649
      // check r-d cost
650
0
      if (csTemp->cost < csBest->cost)
651
0
      {
652
0
        validReturn   = true;
653
0
        std::swap(csTemp, csBest);
654
0
        bestPUMode    = testMode;
655
0
        bestLfnstIdx  = csBest->cus[0]->lfnstIdx;
656
0
        bestISP       = csBest->cus[0]->ispMode;
657
0
        bestMip       = csBest->cus[0]->mipFlag;
658
0
        bestMrl       = csBest->cus[0]->multiRefIdx;
659
0
        bestbdpcmMode = cu.bdpcmM[CH_L];
660
0
        m_ispTestedModes[bestLfnstIdx].bestSplitSoFar = ISPType(bestISP);
661
0
        if (csBest->cost < bestCost)
662
0
        {
663
0
          bestCost = csBest->cost;
664
0
        }
665
0
        if ((csBest->getTU(partitioner.chType)->mtsIdx[COMP_Y] == MTS_SKIP) && ( floorLog2(csBest->getTU(partitioner.chType)->blocks[COMP_Y].area()) >= 6 ))
666
0
        {
667
0
          noLFNST_ts = 1;
668
0
        }
669
0
      }
670
671
      // reset context models
672
0
      m_CABACEstimator->getCtx() = ctxStart;
673
674
0
      csTemp->releaseIntermediateData();
675
676
0
      if (m_pcEncCfg->m_fastLocalDualTreeMode && CU::isConsIntra(cu) && !cu.slice->isIntra() && csBest->cost != MAX_DOUBLE && costInterCU != COST_UNKNOWN && mode >= 0)
677
0
      {
678
0
        if( (m_pcEncCfg->m_fastLocalDualTreeMode == 2) || (csBest->cost > costInterCU * 1.5))
679
0
        {
680
          //Note: only try one intra mode, which is especially useful to reduce EncT for LDB case (around 4%)
681
0
          EndMode = 0;
682
0
          break;
683
0
        }
684
0
      }
685
0
    }
686
0
  } // Mode loop
687
688
0
  if (m_pcEncCfg->m_FastIntraTools && (sps.ISP|| sps.LFNST || sps.MTS))
689
0
  {
690
0
    int bestMode = csBest->getTU(partitioner.chType)->mtsIdx[COMP_Y] ? 4 : 0;
691
0
    bestMode |= bestLfnstIdx ? 2 : 0;
692
0
    bestMode |= bestISP ? 1 : 0;
693
0
    m_ispTestedModes[0].bestIntraMode = bestMode;
694
0
  }
695
0
  cu.ispMode = bestISP;
696
0
  if( validReturn )
697
0
  {
698
0
    cs.useSubStructure( *csBest, partitioner.chType, TREE_D, cu.singleChan( CH_L ), true );
699
0
    const ReshapeData& reshapeData = cs.picture->reshapeData;
700
0
    if (cs.picHeader->lmcsEnabled && reshapeData.getCTUFlag())
701
0
    {
702
0
      cs.getRspRecoBuf().copyFrom(csBest->getRspRecoBuf());
703
0
    }
704
705
    //=== update PU data ====
706
0
    cu.lfnstIdx           = bestLfnstIdx;
707
0
    cu.mipTransposedFlag  = bestPUMode.mipTrFlg;
708
0
    cu.intraDir[CH_L]     = bestPUMode.modeId;
709
0
    cu.bdpcmM[CH_L]       = bestbdpcmMode;
710
0
    cu.mipFlag            = bestMip;
711
0
    cu.multiRefIdx        = bestMrl;
712
0
  }
713
0
  else
714
0
  {
715
0
    THROW("fix this");
716
0
  }
717
718
0
  csBest->releaseIntermediateData();
719
720
0
  return validReturn;
721
0
}
722
723
void IntraSearch::estIntraPredChromaQT( CodingUnit& cu, Partitioner& partitioner, const double maxCostAllowed )
724
0
{
725
0
  PROFILER_SCOPE_AND_STAGE_EXT( 0, _TPROF, P_INTRA_CHROMA, cu.cs, CH_C );
726
0
  const TempCtx ctxStart( m_CtxCache, m_CABACEstimator->getCtx() );
727
0
  CodingStructure &cs   = *cu.cs;
728
0
  bool lumaUsesISP      = !CU::isSepTree(cu) && cu.ispMode;
729
0
  PartSplit ispType     = lumaUsesISP ? CU::getISPType(cu, COMP_Y) : TU_NO_ISP;
730
0
  double bestCostSoFar  = maxCostAllowed;
731
0
  const uint32_t numberValidComponents = getNumberValidComponents( cu.chromaFormat );
732
0
  const bool useBDPCM   = cs.picture->useBDPCM;
733
734
0
  uint32_t   uiBestMode = 0;
735
0
  Distortion uiBestDist = 0;
736
0
  double     dBestCost  = MAX_DOUBLE;
737
738
  //----- init mode list ----
739
0
  {
740
0
    uint32_t  uiMinMode = 0;
741
0
    uint32_t  uiMaxMode = NUM_CHROMA_MODE;
742
743
0
    const int reducedModeNumber = uiMaxMode >> (m_pcEncCfg->m_reduceIntraChromaModesFullRD ? 1 : 2);
744
    //----- check chroma modes -----
745
0
    uint32_t chromaCandModes[ NUM_CHROMA_MODE ];
746
0
    CU::getIntraChromaCandModes( cu, chromaCandModes );
747
748
    // create a temporary CS
749
0
    CodingStructure &saveCS = *m_pSaveCS[0];
750
0
    saveCS.pcv      = cs.pcv;
751
0
    saveCS.picture  = cs.picture;
752
0
    saveCS.area.repositionTo( cs.area );
753
0
    saveCS.clearTUs();
754
755
0
    if( !CU::isSepTree(cu) && cu.ispMode )
756
0
    {
757
0
      saveCS.clearCUs();
758
0
    }
759
760
0
    if( CU::isSepTree(cu) )
761
0
    {
762
0
      if( partitioner.canSplit( TU_MAX_TR_SPLIT, cs ) )
763
0
      {
764
0
        partitioner.splitCurrArea( TU_MAX_TR_SPLIT, cs );
765
766
0
        do
767
0
        {
768
0
          cs.addTU( CS::getArea( cs, partitioner.currArea(), partitioner.chType, partitioner.treeType ), partitioner.chType, &cu ).depth = partitioner.currTrDepth;
769
0
        } while( partitioner.nextPart( cs ) );
770
771
0
        partitioner.exitCurrSplit();
772
0
      }
773
0
      else
774
0
        cs.addTU( CS::getArea( cs, partitioner.currArea(), partitioner.chType, partitioner.treeType ), partitioner.chType, &cu );
775
0
    }
776
777
    // create a store for the TUs
778
0
    std::vector<TransformUnit*> orgTUs;
779
0
    for( const auto &ptu : cs.tus )
780
0
    {
781
      // for split TUs in HEVC, add the TUs without Chroma parts for correct setting of Cbfs
782
0
      if (lumaUsesISP || cu.contains(*ptu, CH_C))
783
0
      {
784
0
        saveCS.addTU( *ptu, partitioner.chType, nullptr );
785
0
        orgTUs.push_back( ptu );
786
0
      }
787
0
    }
788
789
    // SATD pre-selecting.
790
0
    int     satdModeList  [NUM_CHROMA_MODE] = { 0 };
791
0
    int64_t satdSortedCost[NUM_CHROMA_MODE] = { 0 };
792
0
    bool    modeDisable[NUM_INTRA_MODE + 1] = { false }; // use intra mode idx to check whether enable
793
794
0
    CodingStructure& cs = *(cu.cs);
795
0
    CompArea areaCb = cu.Cb();
796
0
    CompArea areaCr = cu.Cr();
797
0
    CPelBuf orgCb  = cs.getOrgBuf (COMP_Cb);
798
0
    PelBuf predCb  = cs.getPredBuf(COMP_Cb);
799
0
    CPelBuf orgCr  = cs.getOrgBuf (COMP_Cr);
800
0
    PelBuf predCr  = cs.getPredBuf(COMP_Cr);
801
802
0
    DistParam distParamSadCb  = m_pcRdCost->setDistParam( orgCb, predCb, cu.cs->sps->bitDepths[ CH_C ], DF_SAD);
803
0
    DistParam distParamSatdCb = m_pcRdCost->setDistParam( orgCb, predCb, cu.cs->sps->bitDepths[ CH_C ], DF_HAD);
804
0
    DistParam distParamSadCr  = m_pcRdCost->setDistParam( orgCr, predCr, cu.cs->sps->bitDepths[ CH_C ], DF_SAD);
805
0
    DistParam distParamSatdCr = m_pcRdCost->setDistParam( orgCr, predCr, cu.cs->sps->bitDepths[ CH_C ], DF_HAD);
806
807
0
    cu.intraDir[1] = MDLM_L_IDX; // temporary assigned, just to indicate this is a MDLM mode. for luma down-sampling operation.
808
809
0
    initIntraPatternChType(cu, cu.Cb());
810
0
    initIntraPatternChType(cu, cu.Cr());
811
0
    loadLMLumaRecPels(cu, cu.Cb());
812
813
0
    for (int idx = uiMinMode; idx < uiMaxMode; idx++)
814
0
    {
815
0
      int mode = chromaCandModes[idx];
816
0
      satdModeList[idx] = mode;
817
0
      if (CU::isLMCMode(mode) && ( !CU::isLMCModeEnabled(cu, mode) || cu.slice->lmChromaCheckDisable ) )
818
0
      {
819
0
        continue;
820
0
      }
821
0
      if ((mode == LM_CHROMA_IDX) || (mode == PLANAR_IDX) || (mode == DM_CHROMA_IDX)) // only pre-check regular modes and MDLM modes, not including DM ,Planar, and LM
822
0
      {
823
0
        continue;
824
0
      }
825
826
0
      cu.intraDir[1]    = mode; // temporary assigned, for SATD checking.
827
828
0
      const bool isLMCMode = CU::isLMCMode(mode);
829
0
      if( isLMCMode )
830
0
      {
831
0
        predIntraChromaLM(COMP_Cb, predCb, cu, areaCb, mode);
832
0
      }
833
0
      else
834
0
      {
835
0
        initPredIntraParams(cu, cu.Cb(), *cs.sps);
836
0
        predIntraAng(COMP_Cb, predCb, cu);
837
0
      }
838
0
      int64_t sadCb = distParamSadCb.distFunc(distParamSadCb) * 2;
839
0
      int64_t satdCb = distParamSatdCb.distFunc(distParamSatdCb);
840
0
      int64_t sad = std::min(sadCb, satdCb);
841
842
0
      if( isLMCMode )
843
0
      {
844
0
        predIntraChromaLM(COMP_Cr, predCr, cu, areaCr, mode);
845
0
      }
846
0
      else
847
0
      {
848
0
        initPredIntraParams(cu, cu.Cr(), *cs.sps);
849
0
        predIntraAng(COMP_Cr, predCr, cu);
850
0
      }
851
0
      int64_t sadCr = distParamSadCr.distFunc(distParamSadCr) * 2;
852
0
      int64_t satdCr = distParamSatdCr.distFunc(distParamSatdCr);
853
0
      sad += std::min(sadCr, satdCr);
854
0
      satdSortedCost[idx] = sad;
855
0
    }
856
857
    // sort the mode based on the cost from small to large.
858
0
    for (int i = uiMinMode; i <= uiMaxMode - 1; i++)
859
0
    {
860
0
      for (int j = i + 1; j <= uiMaxMode - 1; j++)
861
0
      {
862
0
        if (satdSortedCost[j] < satdSortedCost[i])
863
0
        {
864
0
          std::swap( satdModeList[i],   satdModeList[j]);
865
0
          std::swap( satdSortedCost[i], satdSortedCost[j]);
866
0
        }
867
0
      }
868
0
    }
869
870
0
    for (int i = 0; i < reducedModeNumber; i++)
871
0
    {
872
0
      modeDisable[satdModeList[uiMaxMode - 1 - i]] = true; // disable the last reducedModeNumber modes
873
0
    }
874
875
0
    int bestLfnstIdx = 0;
876
    // save the dist
877
0
    Distortion baseDist = cs.dist;
878
0
    int32_t bestbdpcmMode = 0;
879
0
    uint32_t numbdpcmModes = ( useBDPCM && CU::bdpcmAllowed(cu, COMP_Cb)
880
0
        && ((partitioner.chType == CH_C) || (cu.ispMode == 0 && cu.lfnstIdx == 0 && cu.firstTU->mtsIdx[COMP_Y] == MTS_SKIP))) ? 2 : 0;
881
0
    for (int mode_cur = uiMinMode; mode_cur < (int)(uiMaxMode + numbdpcmModes); mode_cur++)
882
0
    {
883
0
      int mode = mode_cur;
884
0
      if (mode_cur >= uiMaxMode)
885
0
      {
886
0
        mode = mode_cur > uiMaxMode ? -1 : -2; //set bdpcm mode
887
0
        if ((mode == -1) && (saveCS.tus[0]->mtsIdx[COMP_Cb] != MTS_SKIP) && (saveCS.tus[0]->mtsIdx[COMP_Cr] != MTS_SKIP))
888
0
        {
889
0
          continue;
890
0
        }
891
0
      }
892
0
      int chromaIntraMode;
893
0
      if (mode < 0)
894
0
      {
895
0
        cu.bdpcmM[CH_C] = -mode;
896
0
        chromaIntraMode = cu.bdpcmM[CH_C] == 2 ? chromaCandModes[1] : chromaCandModes[2];
897
0
      }
898
0
      else
899
0
      {
900
0
        cu.bdpcmM[CH_C] = 0;
901
0
        chromaIntraMode = chromaCandModes[mode];
902
0
        if (CU::isLMCMode(chromaIntraMode) && ( !CU::isLMCModeEnabled(cu, chromaIntraMode) || cu.slice->lmChromaCheckDisable ) )
903
0
        {
904
0
          continue;
905
0
        }
906
0
        if (modeDisable[chromaIntraMode] && CU::isLMCModeEnabled(cu, chromaIntraMode)) // when CCLM is disable, then MDLM is disable. not use satd checking
907
0
        {
908
0
          continue;
909
0
        }
910
0
      }
911
0
      cs.dist = baseDist;
912
      //----- restore context models -----
913
0
      m_CABACEstimator->getCtx() = ctxStart;
914
915
      //----- chroma coding -----
916
0
      cu.intraDir[1] = chromaIntraMode;
917
0
      m_ispTestedModes[0].IspType = ispType;
918
0
      m_ispTestedModes[0].subTuCounter = -1;
919
0
      xIntraChromaCodingQT( cs, partitioner );
920
0
      if (lumaUsesISP && cs.dist == MAX_UINT)
921
0
      {
922
0
        continue;
923
0
      }
924
925
0
      if (cs.sps->transformSkip)
926
0
      {
927
0
        m_CABACEstimator->getCtx() = ctxStart;
928
0
      }
929
0
      m_ispTestedModes[0].IspType = ispType;
930
0
      m_ispTestedModes[0].subTuCounter = -1;
931
0
      uint64_t fracBits   = xGetIntraFracBitsQT( cs, partitioner, false );
932
0
      Distortion uiDist = cs.dist;
933
0
      double    dCost   = m_pcRdCost->calcRdCost( fracBits, uiDist - baseDist );
934
935
      //----- compare -----
936
0
      if( dCost < dBestCost )
937
0
      {
938
0
        if (lumaUsesISP && (dCost < bestCostSoFar))
939
0
        {
940
0
          bestCostSoFar = dCost;
941
0
        }
942
0
        for( uint32_t i = getFirstComponentOfChannel( CH_C ); i < numberValidComponents; i++ )
943
0
        {
944
0
          const CompArea& area = cu.blocks[i];
945
0
          saveCS.getRecoBuf     ( area ).copyFrom( cs.getRecoBuf   ( area ) );
946
0
          cs.picture->getRecoBuf( area ).copyFrom( cs.getRecoBuf   ( area ) );
947
0
          for( uint32_t j = 0; j < saveCS.tus.size(); j++ )
948
0
          {
949
0
            saveCS.tus[j]->copyComponentFrom( *orgTUs[j], area.compID );
950
0
          }
951
0
        }
952
0
        dBestCost    = dCost;
953
0
        uiBestDist   = uiDist;
954
0
        uiBestMode   = chromaIntraMode;
955
0
        bestLfnstIdx = cu.lfnstIdx;
956
0
        bestbdpcmMode = cu.bdpcmM[CH_C];
957
958
0
      }
959
0
    }
960
0
    cu.lfnstIdx = bestLfnstIdx;
961
0
    cu.bdpcmM[CH_C]= bestbdpcmMode;
962
963
0
    for( uint32_t i = getFirstComponentOfChannel( CH_C ); i < numberValidComponents; i++ )
964
0
    {
965
0
      const CompArea& area = cu.blocks[i];
966
967
0
      cs.getRecoBuf         ( area ).copyFrom( saveCS.getRecoBuf( area ) );
968
0
      cs.picture->getRecoBuf( area ).copyFrom( cs.getRecoBuf    ( area ) );
969
970
0
      for( uint32_t j = 0; j < saveCS.tus.size(); j++ )
971
0
      {
972
0
        orgTUs[ j ]->copyComponentFrom( *saveCS.tus[ j ], area.compID );
973
0
      }
974
0
    }
975
0
  }
976
0
  cu.intraDir[1] = uiBestMode;
977
0
  cs.dist        = uiBestDist;
978
979
  //----- restore context models -----
980
0
  m_CABACEstimator->getCtx() = ctxStart;
981
0
  if (lumaUsesISP && bestCostSoFar >= maxCostAllowed)
982
0
  {
983
0
    cu.ispMode = 0;
984
0
  }
985
0
}
986
987
void IntraSearch::saveCuAreaCostInSCIPU( Area area, double cost )
988
0
{
989
0
  if( m_numCuInSCIPU < NUM_INTER_CU_INFO_SAVE )
990
0
  {
991
0
    m_cuAreaInSCIPU[m_numCuInSCIPU] = area;
992
0
    m_cuCostInSCIPU[m_numCuInSCIPU] = cost;
993
0
    m_numCuInSCIPU++;
994
0
  }
995
0
}
996
997
void IntraSearch::initCuAreaCostInSCIPU()
998
0
{
999
0
  for( int i = 0; i < NUM_INTER_CU_INFO_SAVE; i++ )
1000
0
  {
1001
0
    m_cuAreaInSCIPU[i] = Area();
1002
0
    m_cuCostInSCIPU[i] = 0;
1003
0
  }
1004
0
  m_numCuInSCIPU = 0;
1005
0
}
1006
// -------------------------------------------------------------------------------------------------------------------
1007
// Intra search
1008
// -------------------------------------------------------------------------------------------------------------------
1009
1010
void IntraSearch::xEncIntraHeader( CodingStructure &cs, Partitioner &partitioner, const bool luma )
1011
0
{
1012
0
  CodingUnit &cu = *cs.getCU( partitioner.chType, partitioner.treeType );
1013
1014
0
  if (luma)
1015
0
  {
1016
0
    bool isFirst = cu.ispMode ? m_ispTestedModes[0].subTuCounter == 0 : partitioner.currArea().lumaPos() == cs.area.lumaPos();
1017
1018
    // CU header
1019
0
    if( isFirst )
1020
0
    {
1021
0
      if ((!cs.slice->isIntra() || cs.slice->sps->IBC || cs.slice->sps->PLT) && cu.Y().valid())
1022
0
      {
1023
0
        m_CABACEstimator->pred_mode   ( cu );
1024
0
      }
1025
0
      m_CABACEstimator->bdpcm_mode  ( cu, ComponentID(partitioner.chType) );
1026
0
    }
1027
1028
    // luma prediction mode
1029
0
    if (isFirst)
1030
0
    {
1031
0
      if ( !cu.Y().valid())
1032
0
      {
1033
0
        m_CABACEstimator->pred_mode( cu );
1034
0
      }
1035
0
      m_CABACEstimator->intra_luma_pred_mode( cu );
1036
0
    }
1037
0
  }
1038
0
  else //  if (chroma)
1039
0
  {
1040
0
    bool isFirst = partitioner.currArea().Cb().valid() && partitioner.currArea().chromaPos() == cs.area.chromaPos();
1041
1042
0
    if( isFirst )
1043
0
    {
1044
0
      m_CABACEstimator->bdpcm_mode(cu, ComponentID(CH_C));
1045
0
      m_CABACEstimator->intra_chroma_pred_mode(  cu );
1046
0
    }
1047
0
  }
1048
0
}
1049
1050
void IntraSearch::xEncSubdivCbfQT( CodingStructure &cs, Partitioner &partitioner, const bool luma )
1051
0
{
1052
0
  const UnitArea& currArea = partitioner.currArea();
1053
0
  int subTuCounter = m_ispTestedModes[0].subTuCounter;
1054
0
  TransformUnit  &currTU   = *cs.getTU(currArea.blocks[partitioner.chType], partitioner.chType, subTuCounter);
1055
0
  CodingUnit     &currCU   = *currTU.cu;
1056
0
  const uint32_t currDepth = partitioner.currTrDepth;
1057
0
  const bool  subdiv = currTU.depth > currDepth;
1058
0
  ComponentID compID = partitioner.chType == CH_L ? COMP_Y : COMP_Cb;
1059
1060
0
  if (!luma)
1061
0
  {
1062
0
    const bool chromaCbfISP = currArea.blocks[COMP_Cb].valid() && currCU.ispMode && !subdiv;
1063
0
    if (!currCU.ispMode || chromaCbfISP)
1064
0
    {
1065
0
      const uint32_t numberValidComponents = getNumberValidComponents(currArea.chromaFormat);
1066
0
      const uint32_t cbfDepth = (chromaCbfISP ? currDepth - 1 : currDepth);
1067
1068
0
      for (uint32_t ch = COMP_Cb; ch < numberValidComponents; ch++)
1069
0
      {
1070
0
        const ComponentID compID = ComponentID(ch);
1071
0
        if (currDepth == 0 || TU::getCbfAtDepth(currTU, compID, currDepth - 1) || chromaCbfISP)
1072
0
        {
1073
0
          const bool prevCbf = (compID == COMP_Cr ? TU::getCbfAtDepth(currTU, COMP_Cb, currDepth) : false);
1074
0
          m_CABACEstimator->cbf_comp(currCU, TU::getCbfAtDepth(currTU, compID, currDepth), currArea.blocks[compID], cbfDepth, prevCbf);
1075
0
        }
1076
0
      }
1077
0
    }
1078
0
  }
1079
1080
0
  if (subdiv)
1081
0
  {
1082
0
    if (partitioner.canSplit(TU_MAX_TR_SPLIT, cs))
1083
0
    {
1084
0
      partitioner.splitCurrArea(TU_MAX_TR_SPLIT, cs);
1085
0
    }
1086
0
    else if (currCU.ispMode && isLuma(compID))
1087
0
    {
1088
0
      partitioner.splitCurrArea(m_ispTestedModes[0].IspType, cs);
1089
0
    }
1090
0
    else
1091
0
      THROW("Cannot perform an implicit split!");
1092
1093
0
    do
1094
0
    {
1095
0
      xEncSubdivCbfQT(cs, partitioner, luma);   //?
1096
0
      subTuCounter += subTuCounter != -1 ? 1 : 0;
1097
0
    } while (partitioner.nextPart(cs));
1098
1099
0
    partitioner.exitCurrSplit();
1100
0
  }
1101
0
  else
1102
0
  {
1103
    //===== Cbfs =====
1104
0
    if (luma)
1105
0
    {
1106
0
      bool previousCbf = false;
1107
0
      bool lastCbfIsInferred = false;
1108
0
      if (m_ispTestedModes[0].IspType != TU_NO_ISP)
1109
0
      {
1110
0
        bool     rootCbfSoFar = false;
1111
0
        uint32_t nTus = currCU.ispMode == HOR_INTRA_SUBPARTITIONS ? currCU.lheight() >> floorLog2(currTU.lheight())
1112
0
          : currCU.lwidth() >> floorLog2(currTU.lwidth());
1113
0
        if (subTuCounter == nTus - 1)
1114
0
        {
1115
0
          TransformUnit* tuPointer = currCU.firstTU;
1116
0
          for (int tuIdx = 0; tuIdx < nTus - 1; tuIdx++)
1117
0
          {
1118
0
            rootCbfSoFar |= TU::getCbfAtDepth(*tuPointer, COMP_Y, currDepth);
1119
0
            tuPointer = tuPointer->next;
1120
0
          }
1121
0
          if (!rootCbfSoFar)
1122
0
          {
1123
0
            lastCbfIsInferred = true;
1124
0
          }
1125
0
        }
1126
0
        if (!lastCbfIsInferred)
1127
0
        {
1128
0
          previousCbf = TU::getPrevTuCbfAtDepth(currTU, COMP_Y, partitioner.currTrDepth);
1129
0
        }
1130
0
      }
1131
0
      if (!lastCbfIsInferred)
1132
0
      {
1133
0
        m_CABACEstimator->cbf_comp(currCU, TU::getCbfAtDepth(currTU, COMP_Y, currDepth), currTU.Y(), currTU.depth, previousCbf, currCU.ispMode);
1134
0
      }
1135
0
    }
1136
0
  }
1137
0
}
1138
void IntraSearch::xEncCoeffQT(CodingStructure& cs, Partitioner& partitioner, const ComponentID compID, CUCtx* cuCtx, const int subTuIdx, const PartSplit ispType)
1139
0
{
1140
0
  const UnitArea& currArea  = partitioner.currArea();
1141
1142
0
  int subTuCounter          = m_ispTestedModes[0].subTuCounter;
1143
0
  TransformUnit& currTU     = *cs.getTU(currArea.blocks[partitioner.chType], partitioner.chType, subTuCounter);
1144
0
  uint32_t   currDepth      = partitioner.currTrDepth;
1145
0
  const bool subdiv         = currTU.depth > currDepth;
1146
1147
0
  if (subdiv)
1148
0
  {
1149
0
    if (partitioner.canSplit(TU_MAX_TR_SPLIT, cs))
1150
0
    {
1151
0
      partitioner.splitCurrArea(TU_MAX_TR_SPLIT, cs);
1152
0
    }
1153
0
    else if (currTU.cu->ispMode)
1154
0
    {
1155
0
      partitioner.splitCurrArea(m_ispTestedModes[0].IspType, cs);
1156
0
    }
1157
0
    else
1158
0
      THROW("Implicit TU split not available!");
1159
1160
0
    do
1161
0
    {
1162
0
      xEncCoeffQT(cs, partitioner, compID, cuCtx, subTuCounter, m_ispTestedModes[0].IspType);
1163
0
      subTuCounter += subTuCounter != -1 ? 1 : 0;
1164
0
    } while( partitioner.nextPart( cs ) );
1165
1166
0
    partitioner.exitCurrSplit();
1167
0
  }
1168
0
  else
1169
1170
0
  if( currArea.blocks[compID].valid() )
1171
0
  {
1172
0
    if( compID == COMP_Cr )
1173
0
    {
1174
0
      const int cbfMask = ( TU::getCbf( currTU, COMP_Cb ) ? 2 : 0 ) + ( TU::getCbf( currTU, COMP_Cr ) ? 1 : 0 );
1175
0
      m_CABACEstimator->joint_cb_cr( currTU, cbfMask );
1176
0
    }
1177
0
    if( TU::getCbf( currTU, compID ) )
1178
0
    {
1179
0
      if( isLuma(compID) )
1180
0
      {
1181
0
        m_CABACEstimator->residual_coding( currTU, compID, cuCtx );
1182
0
        m_CABACEstimator->mts_idx( *currTU.cu, cuCtx );
1183
0
      }
1184
0
      else
1185
0
        m_CABACEstimator->residual_coding( currTU, compID );
1186
0
    }
1187
0
  }
1188
0
}
1189
1190
uint64_t IntraSearch::xGetIntraFracBitsQT( CodingStructure &cs, Partitioner &partitioner, const bool luma, CUCtx *cuCtx )
1191
0
{
1192
0
  m_CABACEstimator->resetBits();
1193
1194
0
  xEncIntraHeader( cs, partitioner, luma );
1195
0
  xEncSubdivCbfQT( cs, partitioner, luma );
1196
1197
0
  if( luma )
1198
0
  {
1199
0
    xEncCoeffQT( cs, partitioner, COMP_Y, cuCtx );
1200
1201
0
    CodingUnit &cu = *cs.cus[0];
1202
0
    if (cuCtx /*&& CU::isSepTree(cu)*/
1203
0
      && (!cu.ispMode || (cu.lfnstIdx && m_ispTestedModes[0].subTuCounter == 0)
1204
0
        || (!cu.lfnstIdx
1205
0
          && m_ispTestedModes[0].subTuCounter == m_ispTestedModes[cu.lfnstIdx].numTotalParts[cu.ispMode - 1] - 1)))
1206
0
    {
1207
0
      m_CABACEstimator->residual_lfnst_mode( cu, *cuCtx );
1208
0
    }
1209
0
  }
1210
0
  else
1211
0
  {
1212
0
    xEncCoeffQT( cs, partitioner, COMP_Cb );
1213
0
    xEncCoeffQT( cs, partitioner, COMP_Cr );
1214
0
  }
1215
1216
0
  uint64_t fracBits = m_CABACEstimator->getEstFracBits();
1217
0
  return fracBits;
1218
0
}
1219
1220
uint64_t IntraSearch::xGetIntraFracBitsQTChroma(const TransformUnit& currTU, const ComponentID compID, CUCtx *cuCtx)
1221
0
{
1222
0
  m_CABACEstimator->resetBits();
1223
1224
0
  if ( currTU.jointCbCr )
1225
0
  {
1226
0
    const int cbfMask = ( TU::getCbf( currTU, COMP_Cb ) ? 2 : 0 ) + ( TU::getCbf( currTU, COMP_Cr ) ? 1 : 0 );
1227
0
    m_CABACEstimator->cbf_comp( *currTU.cu, cbfMask>>1, currTU.blocks[ COMP_Cb ], currTU.depth, false );
1228
0
    m_CABACEstimator->cbf_comp( *currTU.cu, cbfMask &1, currTU.blocks[ COMP_Cr ], currTU.depth, cbfMask>>1 );
1229
0
    if( cbfMask )
1230
0
      m_CABACEstimator->joint_cb_cr( currTU, cbfMask );
1231
0
    if (cbfMask >> 1)
1232
0
      m_CABACEstimator->residual_coding( currTU, COMP_Cb, cuCtx );
1233
0
    if (cbfMask & 1)
1234
0
      m_CABACEstimator->residual_coding( currTU, COMP_Cr, cuCtx );
1235
0
  }
1236
0
  else
1237
0
  {
1238
0
    if ( compID == COMP_Cb )
1239
0
      m_CABACEstimator->cbf_comp( *currTU.cu, TU::getCbf( currTU, compID ), currTU.blocks[ compID ], currTU.depth, false );
1240
0
    else
1241
0
    {
1242
0
      const bool cbCbf    = TU::getCbf( currTU, COMP_Cb );
1243
0
      const bool crCbf    = TU::getCbf( currTU, compID );
1244
0
      const int  cbfMask  = ( cbCbf ? 2 : 0 ) + ( crCbf ? 1 : 0 );
1245
0
      m_CABACEstimator->cbf_comp( *currTU.cu, crCbf, currTU.blocks[ compID ], currTU.depth, cbCbf );
1246
0
      m_CABACEstimator->joint_cb_cr( currTU, cbfMask );
1247
0
    }
1248
0
  }
1249
1250
0
  if( !currTU.jointCbCr && TU::getCbf( currTU, compID ) )
1251
0
  {
1252
0
    m_CABACEstimator->residual_coding( currTU, compID, cuCtx );
1253
0
  }
1254
1255
0
  uint64_t fracBits = m_CABACEstimator->getEstFracBits();
1256
0
  return fracBits;
1257
0
}
1258
1259
void IntraSearch::xIntraCodingTUBlock(TransformUnit &tu, const ComponentID compID, const bool checkCrossCPrediction, Distortion &ruiDist, uint32_t *numSig, PelUnitBuf *predBuf, const bool loadTr)
1260
0
{
1261
0
  if (!tu.blocks[compID].valid())
1262
0
  {
1263
0
    return;
1264
0
  }
1265
1266
0
  CodingStructure &cs             = *tu.cs;
1267
0
  const CompArea      &area       = tu.blocks[compID];
1268
0
  const SPS           &sps        = *cs.sps;
1269
0
  const ReshapeData&  reshapeData = cs.picture->reshapeData;
1270
1271
0
  const ChannelType    chType     = toChannelType(compID);
1272
0
  const int            bitDepth   = sps.bitDepths[chType];
1273
1274
0
  CPelBuf        piOrg            = cs.getOrgBuf    (area);
1275
0
  PelBuf         piPred           = cs.getPredBuf   (area);
1276
0
  PelBuf         piResi           = cs.getResiBuf   (area);
1277
0
  PelBuf         piReco           = cs.getRecoBuf   (area);
1278
1279
0
  const CodingUnit& cu            = *tu.cu;
1280
1281
  //===== init availability pattern =====
1282
0
  CHECK( tu.jointCbCr && compID == COMP_Cr, "wrong combination of compID and jointCbCr" );
1283
0
  bool jointCbCr = tu.jointCbCr && compID == COMP_Cb;
1284
1285
0
  if ( isLuma(compID) )
1286
0
  {
1287
0
    bool predRegDiffFromTB = CU::isPredRegDiffFromTB(*tu.cu );
1288
0
    bool firstTBInPredReg  = false;
1289
0
    CompArea areaPredReg(COMP_Y, tu.chromaFormat, area);
1290
0
    if (tu.cu->ispMode )
1291
0
    {
1292
0
      firstTBInPredReg = CU::isFirstTBInPredReg(*tu.cu, area);
1293
0
      if (predRegDiffFromTB)
1294
0
      {
1295
0
        if (firstTBInPredReg)
1296
0
        {
1297
0
          CU::adjustPredArea(areaPredReg);
1298
0
          initIntraPatternChTypeISP(*tu.cu, areaPredReg, piReco);
1299
0
        }
1300
0
      }
1301
0
      else
1302
0
        initIntraPatternChTypeISP(*tu.cu, area, piReco);
1303
0
    }
1304
0
    else if( !predBuf )
1305
0
    {
1306
0
      initIntraPatternChType(*tu.cu, area);
1307
0
    }
1308
1309
    //===== get prediction signal =====
1310
0
    if (predRegDiffFromTB)
1311
0
    {
1312
0
      if (firstTBInPredReg)
1313
0
      {
1314
0
        PelBuf piPredReg = cs.getPredBuf(areaPredReg);
1315
0
        predIntraAng(compID, piPredReg, cu);
1316
0
      }
1317
0
    }
1318
0
    else
1319
0
    {
1320
0
      if( predBuf )
1321
0
      {
1322
0
        piPred.copyFrom( predBuf->Y() );
1323
0
      }
1324
0
      else if( CU::isMIP( cu, CH_L ) )
1325
0
      {
1326
0
        initIntraMip( cu );
1327
0
        predIntraMip( piPred, cu );
1328
0
      }
1329
0
      else
1330
0
      {
1331
0
        predIntraAng(compID, piPred, cu);
1332
0
      }
1333
0
    }
1334
0
  }
1335
0
  DTRACE( g_trace_ctx, D_PRED, "@(%4d,%4d) [%2dx%2d] IMode=%d\n", tu.lx(), tu.ly(), tu.lwidth(), tu.lheight(), CU::getFinalIntraMode(cu, chType) );
1336
0
  const Slice &slice = *cs.slice;
1337
0
  bool flag = cs.picHeader->lmcsEnabled && (slice.isIntra() || (!slice.isIntra() && reshapeData.getCTUFlag()));
1338
1339
0
  if (isLuma(compID))
1340
0
  {
1341
    //===== get residual signal =====
1342
0
    if (cs.picHeader->lmcsEnabled && reshapeData.getCTUFlag() )
1343
0
    {
1344
0
      piResi.subtract(cs.getRspOrgBuf(area), piPred);
1345
0
    }
1346
0
    else
1347
0
    {
1348
0
      piResi.subtract( piOrg, piPred );
1349
0
    }
1350
0
  }
1351
1352
  //===== transform and quantization =====
1353
  //--- init rate estimation arrays for RDOQ ---
1354
  //--- transform and quantization           ---
1355
0
  TCoeff uiAbsSum = 0;
1356
0
  const QpParam cQP(tu, compID);
1357
1358
0
  m_pcTrQuant->selectLambda(compID);
1359
1360
0
  flag =flag && (tu.blocks[compID].width*tu.blocks[compID].height > 4);
1361
0
  if (flag && isChroma(compID) && cs.picHeader->lmcsChromaResidualScale )
1362
0
  {
1363
0
    int cResScaleInv = tu.chromaAdj;
1364
0
    double cRescale = (double)(1 << CSCALE_FP_PREC) / (double)cResScaleInv;
1365
0
    m_pcTrQuant->scaleLambda( 1.0/(cRescale*cRescale) );
1366
0
  }
1367
1368
0
  if ( jointCbCr )
1369
0
  {
1370
    // Lambda is loosened for the joint mode with respect to single modes as the same residual is used for both chroma blocks
1371
0
    const int    absIct = abs( TU::getICTMode(tu) );
1372
0
    const double lfact  = ( absIct == 1 || absIct == 3 ? 0.8 : 0.5 );
1373
0
    m_pcTrQuant->scaleLambda( lfact );
1374
0
  }
1375
0
  if ( sps.jointCbCr && isChroma(compID) && (tu.cu->cs->slice->sliceQp > 18) )
1376
0
  {
1377
0
    m_pcTrQuant->scaleLambda( 1.3 );
1378
0
  }
1379
1380
0
  if( isLuma(compID) )
1381
0
  {
1382
0
    m_pcTrQuant->transformNxN(tu, compID, cQP, uiAbsSum, m_CABACEstimator->getCtx(), loadTr);
1383
1384
0
    DTRACE( g_trace_ctx, D_TU_ABS_SUM, "%d: comp=%d, abssum=%d\n", DTRACE_GET_COUNTER( g_trace_ctx, D_TU_ABS_SUM ), compID, uiAbsSum );
1385
0
    if (tu.cu->ispMode && isLuma(compID) && CU::isISPLast(*tu.cu, area, area.compID) && CU::allLumaCBFsAreZero(*tu.cu))
1386
0
    {
1387
      // ISP has to have at least one non-zero CBF
1388
0
      ruiDist = MAX_INT;
1389
0
      return;
1390
0
    }
1391
    //--- inverse transform ---
1392
0
    if (uiAbsSum > 0)
1393
0
    {
1394
0
      m_pcTrQuant->invTransformNxN(tu, compID, piResi, cQP);
1395
0
    }
1396
0
    else
1397
0
    {
1398
0
      piResi.fill(0);
1399
0
    }
1400
0
  }
1401
0
  else // chroma
1402
0
  {
1403
0
    PelBuf          crPred = cs.getPredBuf ( COMP_Cr );
1404
0
    PelBuf          crResi = cs.getResiBuf ( COMP_Cr );
1405
0
    PelBuf          crReco = cs.getRecoBuf ( COMP_Cr );
1406
1407
0
    int         codedCbfMask  = 0;
1408
0
    ComponentID codeCompId    = (tu.jointCbCr ? (tu.jointCbCr >> 1 ? COMP_Cb : COMP_Cr) : compID);
1409
0
    const QpParam qpCbCr(tu, codeCompId);
1410
1411
0
    if( tu.jointCbCr )
1412
0
    {
1413
0
      ComponentID otherCompId = ( codeCompId==COMP_Cr ? COMP_Cb : COMP_Cr );
1414
0
      tu.getCoeffs( otherCompId ).fill(0); // do we need that?
1415
0
      TU::setCbfAtDepth (tu, otherCompId, tu.depth, false );
1416
0
    }
1417
0
    PelBuf& codeResi = ( codeCompId == COMP_Cr ? crResi : piResi );
1418
0
    uiAbsSum = 0;
1419
0
    m_pcTrQuant->transformNxN(tu, codeCompId, qpCbCr, uiAbsSum, m_CABACEstimator->getCtx(), loadTr);
1420
0
    DTRACE( g_trace_ctx, D_TU_ABS_SUM, "%d: comp=%d, abssum=%d\n", DTRACE_GET_COUNTER( g_trace_ctx, D_TU_ABS_SUM ), codeCompId, uiAbsSum );
1421
0
    if( uiAbsSum > 0 )
1422
0
    {
1423
0
      m_pcTrQuant->invTransformNxN(tu, codeCompId, codeResi, qpCbCr);
1424
0
      codedCbfMask += ( codeCompId == COMP_Cb ? 2 : 1 );
1425
0
    }
1426
0
    else
1427
0
    {
1428
0
      codeResi.fill(0);
1429
0
    }
1430
1431
0
    if( tu.jointCbCr )
1432
0
    {
1433
0
      if( tu.jointCbCr == 3 && codedCbfMask == 2 )
1434
0
      {
1435
0
        codedCbfMask = 3;
1436
0
        TU::setCbfAtDepth (tu, COMP_Cr, tu.depth, true );
1437
0
      }
1438
0
      if( tu.jointCbCr != codedCbfMask )
1439
0
      {
1440
0
        ruiDist = MAX_DISTORTION;
1441
0
        return;
1442
0
      }
1443
0
      m_pcTrQuant->invTransformICT( tu, piResi, crResi );
1444
0
      uiAbsSum = codedCbfMask;
1445
0
    }
1446
1447
    //===== reconstruction =====
1448
0
    if ( flag && uiAbsSum > 0 && cs.picHeader->lmcsChromaResidualScale )
1449
0
    {
1450
0
      piResi.scaleSignal(tu.chromaAdj, 0, slice.clpRngs[compID]);
1451
1452
0
      if( jointCbCr )
1453
0
      {
1454
0
        crResi.scaleSignal(tu.chromaAdj, 0, slice.clpRngs[COMP_Cr]);
1455
0
      }
1456
0
    }
1457
1458
0
    if( jointCbCr )
1459
0
    {
1460
0
      crReco.reconstruct(crPred, crResi, cs.slice->clpRngs[ COMP_Cr ]);
1461
0
    }
1462
0
  }
1463
0
  piReco.reconstruct(piPred, piResi, cs.slice->clpRngs[ compID ]);
1464
  
1465
1466
1467
  //===== update distortion =====
1468
0
  const bool reshapeIntraCMD = m_pcEncCfg->m_reshapeSignalType == RESHAPE_SIGNAL_PQ;
1469
0
  if(((cs.picHeader->lmcsEnabled && (reshapeData.getCTUFlag() || (isChroma(compID) && reshapeIntraCMD))) || m_pcEncCfg->m_lumaLevelToDeltaQPEnabled ) )
1470
0
  {
1471
0
    const CPelBuf orgLuma = cs.getOrgBuf( cs.area.blocks[COMP_Y] );
1472
0
    if( compID == COMP_Y && !m_pcEncCfg->m_lumaLevelToDeltaQPEnabled )
1473
0
    {
1474
0
      PelBuf tmpRecLuma = cs.getRspRecoBuf(area);
1475
0
      tmpRecLuma.rspSignal( piReco, reshapeData.getInvLUT());
1476
0
      ruiDist += m_pcRdCost->getDistPart(piOrg, tmpRecLuma, sps.bitDepths[toChannelType(compID)], compID, DF_SSE_WTD, &orgLuma);
1477
0
    }
1478
0
    else
1479
0
    {
1480
0
      ruiDist += m_pcRdCost->getDistPart( piOrg, piReco, bitDepth, compID, DF_SSE_WTD, &orgLuma );
1481
0
      if( jointCbCr )
1482
0
      {
1483
0
        CPelBuf         crOrg  = cs.getOrgBuf  ( COMP_Cr );
1484
0
        PelBuf          crReco = cs.getRecoBuf ( COMP_Cr );
1485
0
        ruiDist += m_pcRdCost->getDistPart( crOrg, crReco, bitDepth, COMP_Cr, DF_SSE_WTD, &orgLuma );
1486
0
      }
1487
0
    }
1488
0
  }
1489
0
  else
1490
0
  {
1491
0
    ruiDist += m_pcRdCost->getDistPart( piOrg, piReco, bitDepth, compID, DF_SSE );
1492
0
    if( jointCbCr )
1493
0
    {
1494
0
      CPelBuf         crOrg  = cs.getOrgBuf  ( COMP_Cr );
1495
0
      PelBuf          crReco = cs.getRecoBuf ( COMP_Cr );
1496
0
      ruiDist += m_pcRdCost->getDistPart( crOrg, crReco, bitDepth, COMP_Cr, DF_SSE );
1497
0
    }
1498
0
  }
1499
0
}
1500
1501
void IntraSearch::xIntraCodingLumaQT(CodingStructure& cs, Partitioner& partitioner, PelUnitBuf* predBuf, const double bestCostSoFar, int numMode, bool disableMTS)
1502
0
{
1503
0
  PROFILER_SCOPE_AND_STAGE_EXT( 0, _TPROF, P_INTRA_RD_SEARCH_LUMA, &cs, partitioner.chType );
1504
0
  const UnitArea& currArea  = partitioner.currArea();
1505
0
  uint32_t        currDepth = partitioner.currTrDepth;
1506
0
  Distortion singleDistLuma = 0;
1507
0
  uint32_t   numSig         = 0;
1508
0
  const SPS &sps            = *cs.sps;
1509
0
  CodingUnit &cu            = *cs.cus[0];
1510
0
  bool mtsAllowed = (numMode < 0) || disableMTS ? false : CU::isMTSAllowed(cu, COMP_Y);
1511
0
  uint64_t singleFracBits   = 0;
1512
0
  bool   splitCbfLumaSum    = false;
1513
0
  double bestCostForISP     = bestCostSoFar;
1514
0
  double dSingleCost        = MAX_DOUBLE;
1515
0
  int endLfnstIdx           = (partitioner.isSepTree(cs) && partitioner.chType == CH_C && (currArea.lwidth() < 8 || currArea.lheight() < 8))
1516
0
                           || (currArea.lwidth() > sps.getMaxTbSize() || currArea.lheight() > sps.getMaxTbSize()) || !sps.LFNST || (numMode < 0) ? 0 : 2;
1517
0
  const bool useTS          = cs.picture->useTS;
1518
0
  numMode                   = (numMode < 0) ? -numMode : numMode;
1519
1520
0
  if (cu.mipFlag && !allowLfnstWithMip(cu.lumaSize()))
1521
0
  {
1522
0
    endLfnstIdx = 0;
1523
0
  }
1524
0
  int bestMTS = 0;
1525
0
  int EndMTS  = mtsAllowed ? m_pcEncCfg->m_MTSIntraMaxCand : 0;
1526
0
  if (cu.ispMode && (EndMTS || endLfnstIdx))
1527
0
  {
1528
0
    EndMTS = 0;
1529
0
    if ((m_ispTestedModes[1].numTotalParts[cu.ispMode - 1] == 0)
1530
0
     && (m_ispTestedModes[2].numTotalParts[cu.ispMode - 1] == 0))
1531
0
    {
1532
0
      endLfnstIdx = 0;
1533
0
    }
1534
0
  }
1535
0
  if (cu.bdpcmM[CH_L])
1536
0
  {
1537
0
    endLfnstIdx = 0;
1538
0
    EndMTS = 0;
1539
0
  }
1540
0
  bool checkTransformSkip = sps.transformSkip;
1541
1542
0
  SizeType transformSkipMaxSize = 1 << sps.log2MaxTransformSkipBlockSize;
1543
0
  bool tsAllowed = useTS  && cu.cs->sps->transformSkip && (!cu.ispMode) && (!cu.bdpcmM[CH_L]) && (!cu.sbtInfo);
1544
0
  tsAllowed &= cu.blocks[COMP_Y].width <= transformSkipMaxSize && cu.blocks[COMP_Y].height <= transformSkipMaxSize;
1545
0
  if (tsAllowed)
1546
0
  {
1547
0
    EndMTS += 1;
1548
0
  }
1549
0
  if (endLfnstIdx || EndMTS)
1550
0
  {
1551
0
    bool       splitCbfLuma  = false;
1552
0
    const PartSplit ispType  = CU::getISPType(cu, COMP_Y);
1553
0
    CUCtx cuCtx;
1554
0
    cuCtx.isDQPCoded         = true;
1555
0
    cuCtx.isChromaQpAdjCoded = true;
1556
0
    cs.cost                  = 0.0;
1557
0
    Distortion       singleDistTmpLuma = 0;
1558
0
    uint64_t         singleTmpFracBits = 0;
1559
0
    double           singleCostTmp     = 0;
1560
0
    const TempCtx    ctxStart          (m_CtxCache, m_CABACEstimator->getCtx());
1561
0
          TempCtx    ctxBest           (m_CtxCache);
1562
0
    CodingStructure &saveCS            = *m_pSaveCS[cu.ispMode?0:1];
1563
0
    TransformUnit *  tmpTU             = nullptr;
1564
0
    int              bestLfnstIdx      = 0;
1565
0
    int              startLfnstIdx     = 0;
1566
    // speedUps LFNST
1567
0
    bool   rapidLFNST                  = false;
1568
0
    bool   rapidDCT                    = false;
1569
0
    double thresholdDCT                = 1;
1570
1571
0
    if (m_pcEncCfg->m_MTS == 2)
1572
0
    {
1573
0
      thresholdDCT += 1.4 / sqrt(cu.lwidth() * cu.lheight());
1574
0
    }
1575
1576
0
    if (m_pcEncCfg->m_LFNST > 1)
1577
0
    {
1578
0
      rapidLFNST = true;
1579
1580
0
      if (m_pcEncCfg->m_LFNST > 2)
1581
0
      {
1582
0
        rapidDCT    = true;
1583
0
        endLfnstIdx = endLfnstIdx ? 1 : 0;
1584
0
      }
1585
0
    }
1586
1587
0
    saveCS.pcv              = cs.pcv;
1588
0
    saveCS.picture          = cs.picture;
1589
0
    saveCS.area.repositionTo( cs.area);
1590
1591
0
    if (cu.ispMode)
1592
0
    {
1593
0
      partitioner.splitCurrArea(ispType, cs);
1594
0
    }
1595
1596
0
    TransformUnit& tu = cs.addTU(CS::getArea(cs, partitioner.currArea(), partitioner.chType, partitioner.treeType), partitioner.chType, cs.cus[0]);
1597
1598
0
    if (cu.ispMode)
1599
0
    {
1600
0
      saveCS.clearTUs();
1601
0
      do
1602
0
      {
1603
0
        saveCS.addTU(
1604
0
          CS::getArea(cs, partitioner.currArea(), partitioner.chType, partitioner.treeType),
1605
0
          partitioner.chType, cs.cus[0]);
1606
0
      } while (partitioner.nextPart(cs));
1607
1608
0
      partitioner.exitCurrSplit();
1609
0
    }
1610
0
    else
1611
0
    {
1612
0
      tmpTU = saveCS.tus.empty() ? &saveCS.addTU( currArea, partitioner.chType, nullptr ) : saveCS.tus.front();
1613
0
      tmpTU->initData();
1614
0
      tmpTU->UnitArea::operator=( currArea );
1615
0
    }
1616
1617
1618
0
    std::vector<TrMode> trModes{ TrMode(0, true) };
1619
0
    if (tsAllowed)
1620
0
    {
1621
0
      trModes.push_back(TrMode(1, true));
1622
0
    }
1623
0
    double dct2Cost           = MAX_DOUBLE;
1624
0
    double trGrpStopThreshold = 1.001;
1625
0
    double trGrpBestCost      = MAX_DOUBLE;
1626
1627
0
    if (mtsAllowed)
1628
0
    {
1629
0
      if (m_pcEncCfg->m_LFNST)
1630
0
      {
1631
0
        uint32_t uiIntraMode = cs.cus[0]->intraDir[partitioner.chType];
1632
0
        int MTScur           = (uiIntraMode < 34) ? MTS_DST7_DCT8 : MTS_DCT8_DST7;
1633
1634
0
        trModes.push_back(TrMode(     2, true));
1635
0
        trModes.push_back(TrMode(MTScur, true));
1636
1637
0
        MTScur = (uiIntraMode < 34) ? MTS_DCT8_DST7 : MTS_DST7_DCT8;
1638
1639
0
        trModes.push_back(TrMode(MTScur,            true));
1640
0
        trModes.push_back(TrMode(MTS_DST7_DST7 + 3, true));
1641
0
      }
1642
0
      else
1643
0
      {
1644
0
        for (int i = 2; i < 6; i++)
1645
0
        {
1646
0
          trModes.push_back(TrMode(i, true));
1647
0
        }
1648
0
      }
1649
0
    }
1650
1651
0
    if ((EndMTS && !m_pcEncCfg->m_LFNST) || (tsAllowed && !mtsAllowed))
1652
0
    {
1653
0
      xPreCheckMTS(tu, &trModes, m_pcEncCfg->m_MTSIntraMaxCand, predBuf);
1654
0
      if (!mtsAllowed && !trModes[1].second)
1655
0
      {
1656
0
        EndMTS = 0;
1657
0
      }
1658
0
    }
1659
1660
0
    bool NStopMTS = true;
1661
1662
0
    for (int modeId = 0; modeId <= EndMTS && NStopMTS; modeId++)
1663
0
    {
1664
0
      if (modeId > 1)
1665
0
      {
1666
0
        trGrpBestCost = MAX_DOUBLE;
1667
0
      }
1668
0
      for (int lfnstIdx = startLfnstIdx; lfnstIdx <= endLfnstIdx; lfnstIdx++)
1669
0
      {
1670
0
        if (lfnstIdx && modeId)
1671
0
        {
1672
0
          continue;
1673
0
        }
1674
0
        if (mtsAllowed || tsAllowed)
1675
0
        {
1676
0
          if (m_pcEncCfg->m_TS && bestMTS == MTS_SKIP)
1677
0
          {
1678
0
            break;
1679
0
          }
1680
0
          if (!m_pcEncCfg->m_LFNST && !trModes[modeId].second && mtsAllowed)
1681
0
          {
1682
0
            continue;
1683
0
          }
1684
1685
0
          tu.mtsIdx[COMP_Y] = trModes[modeId].first;
1686
0
        }
1687
1688
0
        if (cu.ispMode && lfnstIdx)
1689
0
        {
1690
0
          if (m_ispTestedModes[lfnstIdx].numTotalParts[cu.ispMode - 1] == 0)
1691
0
          {
1692
0
            if (lfnstIdx == 2)
1693
0
            {
1694
0
              endLfnstIdx = 1;
1695
0
            }
1696
0
            continue;
1697
0
          }
1698
0
        }
1699
1700
0
        cu.lfnstIdx                          = lfnstIdx;
1701
0
        cuCtx.lfnstLastScanPos               = false;
1702
0
        cuCtx.violatesLfnstConstrained[CH_L] = false;
1703
0
        cuCtx.violatesLfnstConstrained[CH_C] = false;
1704
1705
0
        if ((lfnstIdx != startLfnstIdx) || (modeId))
1706
0
        {
1707
0
          m_CABACEstimator->getCtx() = ctxStart;
1708
0
        }
1709
1710
0
        singleDistTmpLuma = 0;
1711
1712
0
        if (cu.ispMode)
1713
0
        {
1714
0
          splitCbfLuma = false;
1715
1716
0
          partitioner.splitCurrArea(ispType, cs);
1717
1718
0
          singleCostTmp = xTestISP(cs, partitioner, bestCostForISP, ispType, splitCbfLuma, singleTmpFracBits, singleDistTmpLuma, cuCtx);
1719
1720
0
          partitioner.exitCurrSplit();
1721
1722
0
          if (modeId && (singleCostTmp == MAX_DOUBLE))
1723
0
          {
1724
0
            m_ispTestedModes[lfnstIdx].numTotalParts[cu.ispMode - 1] = 0;
1725
0
          }
1726
1727
0
          bool storeCost = (numMode == 1) ? true : false;
1728
1729
0
          if ((m_pcEncCfg->m_ISP >= 2) && (numMode <= 1))
1730
0
          {
1731
0
            storeCost = true;
1732
0
          }
1733
1734
0
          if (storeCost)
1735
0
          {
1736
0
            m_ispTestedModes[0].bestCost[cu.ispMode - 1] = singleCostTmp;
1737
0
          }
1738
0
        }
1739
0
        else
1740
0
        {
1741
0
          bool TrLoad = (EndMTS && !m_pcEncCfg->m_LFNST) || (tsAllowed && !mtsAllowed && (lfnstIdx == 0)) ? true : false;
1742
1743
0
          xIntraCodingTUBlock(tu, COMP_Y, false, singleDistTmpLuma, &numSig, predBuf, TrLoad);
1744
1745
0
          cuCtx.mtsLastScanPos = false;
1746
          //----- determine rate and r-d cost -----
1747
0
        if ((sps.LFNST ? (modeId == EndMTS && modeId != 0 && checkTransformSkip) : (trModes[modeId].first != 0)) && !TU::getCbfAtDepth(tu, COMP_Y, currDepth))
1748
0
        {
1749
0
          singleCostTmp = MAX_DOUBLE;
1750
0
        }
1751
0
        else
1752
0
        {
1753
0
          m_ispTestedModes[0].IspType      = TU_NO_ISP;
1754
0
          m_ispTestedModes[0].subTuCounter = -1;
1755
0
          singleTmpFracBits = xGetIntraFracBitsQT(cs, partitioner, true, &cuCtx);
1756
1757
0
          if (tu.mtsIdx[COMP_Y] > MTS_SKIP)
1758
0
          {
1759
0
            if (!cuCtx.mtsLastScanPos)
1760
0
            {
1761
0
              singleCostTmp = MAX_DOUBLE;
1762
0
            }
1763
0
            else
1764
0
            {
1765
0
              singleCostTmp = m_pcRdCost->calcRdCost(singleTmpFracBits, singleDistTmpLuma);
1766
0
            }
1767
0
          }
1768
0
          else
1769
0
          {
1770
0
            singleCostTmp = m_pcRdCost->calcRdCost(singleTmpFracBits, singleDistTmpLuma);
1771
0
          }
1772
0
        }
1773
1774
0
          if (((EndMTS && (m_pcEncCfg->m_MTS == 2)) || rapidLFNST) && modeId == 0 && lfnstIdx == 0)
1775
0
          {
1776
0
            if (singleCostTmp > bestCostSoFar * thresholdDCT)
1777
0
            {
1778
0
              EndMTS = 0;
1779
1780
0
              if (rapidDCT)
1781
0
              {
1782
0
                endLfnstIdx = 0;   // break the loop but do not cpy best
1783
0
              }
1784
0
            }
1785
0
          }
1786
1787
0
          if (lfnstIdx && !cuCtx.lfnstLastScanPos && !cu.ispMode)
1788
0
          {
1789
0
            bool rootCbfL = false;
1790
1791
0
            for (uint32_t t = 0; t < getNumberValidTBlocks(*cu.cs->pcv); t++)
1792
0
            {
1793
0
              rootCbfL |= tu.cbf[t] != 0;
1794
0
            }
1795
1796
0
            if (rapidLFNST && !rootCbfL)
1797
0
            {
1798
0
              endLfnstIdx = lfnstIdx; // break the loop
1799
0
            }
1800
0
            bool cbfAtZeroDepth = CU::isSepTree(cu)
1801
0
              ? rootCbfL
1802
0
              : (cs.area.chromaFormat != CHROMA_400 && std::min(cu.firstTU->blocks[1].width, cu.firstTU->blocks[1].height) < 4)
1803
0
                ? TU::getCbfAtDepth(tu, COMP_Y, currDepth)
1804
0
                : rootCbfL;
1805
1806
0
            if (cbfAtZeroDepth)
1807
0
            {
1808
0
              singleCostTmp = MAX_DOUBLE;
1809
0
            }
1810
0
          }
1811
0
        }
1812
1813
0
        if (singleCostTmp < dSingleCost)
1814
0
        {
1815
0
          trGrpBestCost  = singleCostTmp;
1816
0
          dSingleCost    = singleCostTmp;
1817
0
          singleDistLuma = singleDistTmpLuma;
1818
0
          singleFracBits = singleTmpFracBits;
1819
0
          bestLfnstIdx   = lfnstIdx;
1820
0
          bestMTS        = modeId;
1821
1822
0
          if (dSingleCost < bestCostForISP)
1823
0
          {
1824
0
            bestCostForISP = dSingleCost;
1825
0
          }
1826
1827
0
          splitCbfLumaSum = splitCbfLuma;
1828
1829
0
          if (lfnstIdx == 0 && modeId == 0 && cu.ispMode == 0)
1830
0
          {
1831
0
            dct2Cost = singleCostTmp;
1832
1833
0
            if (!TU::getCbfAtDepth(tu, COMP_Y, currDepth))
1834
0
            {
1835
0
              if (rapidLFNST)
1836
0
              {
1837
0
                 endLfnstIdx = 0;   // break the loop but do not cpy best
1838
0
              }
1839
1840
0
              EndMTS = 0;
1841
0
            }
1842
0
          }
1843
1844
0
          if (bestLfnstIdx != endLfnstIdx || bestMTS != EndMTS)
1845
0
          {
1846
0
            if (cu.ispMode)
1847
0
            {
1848
0
              saveCS.getRecoBuf(currArea.Y()).copyFrom(cs.getRecoBuf(currArea.Y()));
1849
1850
0
              for (uint32_t j = 0; j < cs.tus.size(); j++)
1851
0
              {
1852
0
                saveCS.tus[j]->copyComponentFrom(*cs.tus[j], COMP_Y);
1853
0
              }
1854
0
            }
1855
0
            else
1856
0
            {
1857
0
              saveCS.getPredBuf(tu.Y()).copyFrom(cs.getPredBuf(tu.Y()));
1858
0
              saveCS.getRecoBuf(tu.Y()).copyFrom(cs.getRecoBuf(tu.Y()));
1859
1860
0
              tmpTU->copyComponentFrom(tu, COMP_Y);
1861
0
            }
1862
1863
0
            ctxBest = m_CABACEstimator->getCtx();
1864
0
          }
1865
      
1866
0
        }
1867
0
        else
1868
0
        {
1869
0
          if( rapidLFNST )
1870
0
          {
1871
0
            endLfnstIdx = lfnstIdx; // break the loop
1872
0
          }
1873
0
        }
1874
0
      }
1875
0
      if (m_pcEncCfg->m_LFNST && m_pcEncCfg->m_MTS == 2 && modeId && modeId != EndMTS)
1876
0
      {
1877
0
        NStopMTS = false;
1878
1879
0
        if (bestMTS || bestLfnstIdx)
1880
0
        {
1881
0
          if ((modeId > 1 && bestMTS == modeId) || modeId == 1)
1882
0
          {
1883
0
            NStopMTS = (dct2Cost / trGrpBestCost) < trGrpStopThreshold;
1884
0
          }
1885
0
        }
1886
0
      }
1887
0
    }
1888
1889
0
    cu.lfnstIdx = bestLfnstIdx;
1890
0
    if (dSingleCost != MAX_DOUBLE)
1891
0
    {
1892
0
      if (bestLfnstIdx != endLfnstIdx || bestMTS != EndMTS)
1893
0
      {
1894
0
        if (cu.ispMode)
1895
0
        {
1896
0
          const UnitArea& currArea = partitioner.currArea();
1897
0
          cs.getRecoBuf(currArea.Y()).copyFrom(saveCS.getRecoBuf(currArea.Y()));
1898
1899
0
          if (saveCS.tus.size() != cs.tus.size())
1900
0
          {
1901
0
            partitioner.splitCurrArea(ispType, cs);
1902
1903
0
            do
1904
0
            {
1905
0
              partitioner.nextPart(cs);
1906
0
              cs.addTU(CS::getArea(cs, partitioner.currArea(), partitioner.chType, partitioner.treeType),
1907
0
                partitioner.chType, cs.cus[0]);
1908
0
            } while (saveCS.tus.size() != cs.tus.size());
1909
1910
0
            partitioner.exitCurrSplit();
1911
0
          }
1912
1913
0
          for (uint32_t j = 0; j < saveCS.tus.size(); j++)
1914
0
          {
1915
0
            cs.tus[j]->copyComponentFrom(*saveCS.tus[j], COMP_Y);
1916
0
          }
1917
0
        }
1918
0
        else
1919
0
        {
1920
0
          cs.getRecoBuf(tu.Y()).copyFrom(saveCS.getRecoBuf(tu.Y()));
1921
1922
0
          tu.copyComponentFrom(*tmpTU, COMP_Y);
1923
0
        }
1924
1925
0
        m_CABACEstimator->getCtx() = ctxBest;
1926
0
      }
1927
1928
      // otherwise this would've happened in useSubStructure
1929
0
      cs.picture->getRecoBuf(currArea.Y()).copyFrom(cs.getRecoBuf(currArea.Y()));
1930
0
    }
1931
0
  }
1932
0
  else
1933
0
  {
1934
0
    if (cu.ispMode)
1935
0
    {
1936
0
      const PartSplit ispType = CU::getISPType(cu, COMP_Y);
1937
0
      partitioner.splitCurrArea(ispType, cs);
1938
1939
0
      CUCtx      cuCtx;
1940
0
      dSingleCost = xTestISP(cs, partitioner, bestCostForISP, ispType, splitCbfLumaSum, singleFracBits, singleDistLuma, cuCtx);
1941
0
      partitioner.exitCurrSplit();
1942
0
      bool storeCost = (numMode == 1) ? true : false;
1943
0
      if ((m_pcEncCfg->m_ISP >= 2) && (numMode <= 1))
1944
0
      {
1945
0
        storeCost = true;
1946
0
      }
1947
0
      if (storeCost)
1948
0
      {
1949
0
        m_ispTestedModes[0].bestCost[cu.ispMode - 1] = dSingleCost;
1950
0
      }
1951
0
    }
1952
0
    else
1953
0
    {
1954
0
      TransformUnit& tu =
1955
0
        cs.addTU(CS::getArea(cs, currArea, partitioner.chType, partitioner.treeType), partitioner.chType, cs.cus[0]);
1956
0
      tu.depth = currDepth;
1957
1958
0
      CHECK(!tu.Y().valid(), "Invalid TU");
1959
0
      xIntraCodingTUBlock(tu, COMP_Y, false, singleDistLuma, &numSig, predBuf);
1960
      //----- determine rate and r-d cost -----
1961
0
      m_ispTestedModes[0].IspType = TU_NO_ISP;
1962
0
      m_ispTestedModes[0].subTuCounter = -1;
1963
0
      singleFracBits = xGetIntraFracBitsQT(cs, partitioner, true);
1964
0
      dSingleCost = m_pcRdCost->calcRdCost(singleFracBits, singleDistLuma);
1965
0
    }
1966
0
  }
1967
1968
0
  if (cu.ispMode)
1969
0
  { 
1970
0
    for (auto& ptu : cs.tus)
1971
0
    {
1972
0
      if (currArea.Y().contains(ptu->Y()))
1973
0
      {
1974
0
        TU::setCbfAtDepth(*ptu, COMP_Y, currDepth, splitCbfLumaSum ? 1 : 0);
1975
0
      }
1976
0
    }
1977
0
  }
1978
0
  cs.dist     += singleDistLuma;
1979
0
  cs.fracBits += singleFracBits;
1980
0
  cs.cost      = dSingleCost;
1981
1982
0
  STAT_COUNT_CU_MODES( partitioner.chType == CH_L, g_cuCounters1D[CU_RD_TESTS][0][!cs.slice->isIntra() + cs.slice->depth] );
1983
0
  STAT_COUNT_CU_MODES( partitioner.chType == CH_L && !cs.slice->isIntra(), g_cuCounters2D[CU_RD_TESTS][Log2( cs.area.lheight() )][Log2( cs.area.lwidth() )] );
1984
0
}
1985
1986
ChromaCbfs IntraSearch::xIntraChromaCodingQT(CodingStructure& cs, Partitioner& partitioner)
1987
0
{
1988
0
  UnitArea    currArea      = partitioner.currArea();
1989
1990
0
  if( !currArea.Cb().valid() ) 
1991
0
    return ChromaCbfs(false);
1992
1993
0
  TransformUnit& currTU     = *cs.getTU( currArea.chromaPos(), CH_C );
1994
0
  const CodingUnit& cu  = *cs.getCU( currArea.chromaPos(), CH_C, TREE_D );
1995
0
  ChromaCbfs cbfs(false);
1996
0
  uint32_t   currDepth = partitioner.currTrDepth;
1997
0
  const bool useTS = cs.picture->useTS;
1998
0
  if (currDepth == currTU.depth)
1999
0
  {
2000
0
    if (!currArea.Cb().valid() || !currArea.Cr().valid())
2001
0
    {
2002
0
      return cbfs;
2003
0
    }
2004
2005
0
    CodingStructure& saveCS = *m_pSaveCS[1];
2006
0
    saveCS.pcv = cs.pcv;
2007
0
    saveCS.picture = cs.picture;
2008
0
    saveCS.area.repositionTo(cs.area);
2009
2010
0
    TransformUnit& tmpTU = saveCS.tus.empty() ? saveCS.addTU(currArea, partitioner.chType, nullptr) : *saveCS.tus.front();
2011
0
    tmpTU.initData();
2012
0
    tmpTU.UnitArea::operator=(currArea);
2013
0
    const unsigned      numTBlocks = getNumberValidTBlocks(*cs.pcv);
2014
2015
0
    CompArea& cbArea = currTU.blocks[COMP_Cb];
2016
0
    CompArea& crArea = currTU.blocks[COMP_Cr];
2017
0
    double     bestCostCb = MAX_DOUBLE;
2018
0
    double     bestCostCr = MAX_DOUBLE;
2019
0
    Distortion bestDistCb = 0;
2020
0
    Distortion bestDistCr = 0;
2021
2022
0
    TempCtx ctxStartTU(m_CtxCache);
2023
0
    TempCtx ctxStart(m_CtxCache);
2024
0
    TempCtx ctxBest(m_CtxCache);
2025
2026
0
    ctxStartTU = m_CABACEstimator->getCtx();
2027
0
    ctxStart = m_CABACEstimator->getCtx();
2028
0
    currTU.jointCbCr = 0;
2029
2030
    // Do predictions here to avoid repeating the "default0Save1Load2" stuff
2031
0
    int  predMode = cu.bdpcmM[CH_C] ? BDPCM_IDX : CU::getFinalIntraMode(cu, CH_C);
2032
2033
0
    PelBuf piPredCb = cs.getPredBuf(COMP_Cb);
2034
0
    PelBuf piPredCr = cs.getPredBuf(COMP_Cr);
2035
2036
0
    initIntraPatternChType(*currTU.cu, cbArea);
2037
0
    initIntraPatternChType(*currTU.cu, crArea);
2038
2039
0
    if (CU::isLMCMode(predMode))
2040
0
    {
2041
0
      loadLMLumaRecPels(cu, cbArea);
2042
0
      predIntraChromaLM(COMP_Cb, piPredCb, cu, cbArea, predMode);
2043
0
      predIntraChromaLM(COMP_Cr, piPredCr, cu, crArea, predMode);
2044
0
    }
2045
0
    else
2046
0
    {
2047
0
      predIntraAng(COMP_Cb, piPredCb, cu);
2048
0
      predIntraAng(COMP_Cr, piPredCr, cu);
2049
0
    }
2050
2051
    // determination of chroma residuals including reshaping and cross-component prediction
2052
    //----- get chroma residuals -----
2053
0
    PelBuf resiCb = cs.getResiBuf(COMP_Cb);
2054
0
    PelBuf resiCr = cs.getResiBuf(COMP_Cr);
2055
0
    resiCb.subtract(cs.getOrgBuf(COMP_Cb), piPredCb);
2056
0
    resiCr.subtract(cs.getOrgBuf(COMP_Cr), piPredCr);
2057
2058
    //----- get reshape parameter ----
2059
0
    ReshapeData& reshapeData = cs.picture->reshapeData;
2060
0
    bool doReshaping = (cs.picHeader->lmcsEnabled && cs.picHeader->lmcsChromaResidualScale && (cs.slice->isIntra() || reshapeData.getCTUFlag()) && (cbArea.width * cbArea.height > 4));
2061
0
    if (doReshaping)
2062
0
    {
2063
0
      const Area area = currTU.Y().valid() ? currTU.Y() : Area(recalcPosition(currTU.chromaFormat, currTU.chType, CH_L, currTU.blocks[currTU.chType].pos()), recalcSize(currTU.chromaFormat, currTU.chType, CH_L, currTU.blocks[currTU.chType].size()));
2064
0
      const CompArea& areaY = CompArea(COMP_Y, currTU.chromaFormat, area);
2065
0
      currTU.chromaAdj = reshapeData.calculateChromaAdjVpduNei(currTU, areaY, currTU.cu->treeType);
2066
0
    }
2067
2068
    //===== store original residual signals (std and crossCompPred) =====
2069
0
    for( int k = 0; k < 5; k++ )
2070
0
    {
2071
0
      m_orgResiCb[k].compactResize( cbArea );
2072
0
      m_orgResiCr[k].compactResize( crArea );
2073
0
    }
2074
0
    for (int k = 0; k < 1; k += 4)
2075
0
    {
2076
0
      m_orgResiCb[k].copyFrom(resiCb);
2077
0
      m_orgResiCr[k].copyFrom(resiCr);
2078
2079
0
      if (doReshaping)
2080
0
      {
2081
0
        int cResScaleInv = currTU.chromaAdj;
2082
0
        m_orgResiCb[k].scaleSignal(cResScaleInv, 1, cs.slice->clpRngs[COMP_Cb]);
2083
0
        m_orgResiCr[k].scaleSignal(cResScaleInv, 1, cs.slice->clpRngs[COMP_Cr]);
2084
0
      }
2085
0
    }
2086
2087
0
    CUCtx cuCtx;
2088
0
    cuCtx.isDQPCoded = true;
2089
0
    cuCtx.isChromaQpAdjCoded = true;
2090
0
    cuCtx.lfnstLastScanPos = false;
2091
2092
0
    CodingStructure& saveCScur = *m_pSaveCS[2];
2093
2094
0
    saveCScur.pcv = cs.pcv;
2095
0
    saveCScur.picture = cs.picture;
2096
0
    saveCScur.area.repositionTo(cs.area);
2097
2098
0
    TransformUnit& tmpTUcur = saveCScur.tus.empty() ? saveCScur.addTU(currArea, partitioner.chType, nullptr) : *saveCScur.tus.front();
2099
0
    tmpTUcur.initData();
2100
0
    tmpTUcur.UnitArea::operator=(currArea);
2101
2102
0
    TempCtx ctxBestTUL(m_CtxCache);
2103
2104
0
    const SPS& sps = *cs.sps;
2105
0
    double     bestCostCbcur = MAX_DOUBLE;
2106
0
    double     bestCostCrcur = MAX_DOUBLE;
2107
0
    Distortion bestDistCbcur = 0;
2108
0
    Distortion bestDistCrcur = 0;
2109
2110
0
    int  endLfnstIdx = (partitioner.isSepTree(cs) && partitioner.chType == CH_C && (partitioner.currArea().lwidth() < 8 || partitioner.currArea().lheight() < 8))
2111
0
      || (partitioner.currArea().lwidth() > sps.getMaxTbSize() || partitioner.currArea().lheight() > sps.getMaxTbSize()) || !sps.LFNST ? 0 : 2;
2112
0
    int  startLfnstIdx = 0;
2113
0
    int  bestLfnstIdx = 0;
2114
0
    bool testLFNST = sps.LFNST;
2115
2116
    // speedUps LFNST
2117
0
    bool rapidLFNST = false;
2118
0
    if (m_pcEncCfg->m_LFNST > 1)
2119
0
    {
2120
0
      rapidLFNST = true;
2121
0
      if (m_pcEncCfg->m_LFNST > 2)
2122
0
      {
2123
0
        endLfnstIdx = endLfnstIdx ? 1 : 0;
2124
0
      }
2125
0
    }
2126
0
    int ts_used = 0;
2127
0
    bool testTS = false;
2128
0
    if (partitioner.chType != CH_C)
2129
0
    {
2130
0
      startLfnstIdx = currTU.cu->lfnstIdx;
2131
0
      endLfnstIdx = currTU.cu->lfnstIdx;
2132
0
      bestLfnstIdx = currTU.cu->lfnstIdx;
2133
0
      testLFNST  = false;
2134
0
      rapidLFNST = false;
2135
0
      ts_used = currTU.mtsIdx[COMP_Y];
2136
0
    }
2137
0
    if (cu.bdpcmM[CH_C])
2138
0
    {
2139
0
      endLfnstIdx = 0;
2140
0
      testLFNST = false;
2141
0
    }
2142
2143
0
    double dSingleCostAll = MAX_DOUBLE;
2144
0
    double singleCostTmpAll = 0;
2145
2146
0
    for (int lfnstIdx = startLfnstIdx; lfnstIdx <= endLfnstIdx; lfnstIdx++)
2147
0
    {
2148
0
      if (rapidLFNST && lfnstIdx)
2149
0
      {
2150
0
        if ((lfnstIdx == 2) && (bestLfnstIdx == 0))
2151
0
        {
2152
0
          continue;
2153
0
        }
2154
0
      }
2155
2156
0
      currTU.cu->lfnstIdx = lfnstIdx;
2157
0
      if (lfnstIdx)
2158
0
      {
2159
0
        m_CABACEstimator->getCtx() = ctxStartTU;
2160
0
      }
2161
2162
0
      cuCtx.lfnstLastScanPos = false;
2163
0
      cuCtx.violatesLfnstConstrained[CH_L] = false;
2164
0
      cuCtx.violatesLfnstConstrained[CH_C] = false;
2165
2166
0
      for (uint32_t c = COMP_Cb; c < numTBlocks; c++)
2167
0
      {
2168
0
        const ComponentID compID = ComponentID(c);
2169
0
        const CompArea& area = currTU.blocks[compID];
2170
0
        double     dSingleCost = MAX_DOUBLE;
2171
0
        Distortion singleDistCTmp = 0;
2172
0
        double     singleCostTmp = 0;
2173
0
        bool tsAllowed = useTS && TU::isTSAllowed(currTU, compID) && m_pcEncCfg->m_useChromaTS && !currTU.cu->lfnstIdx && !cu.bdpcmM[CH_C];
2174
0
        if ((partitioner.chType == CH_L) && (!ts_used))
2175
0
        {
2176
0
          tsAllowed = false;
2177
0
        }
2178
0
        uint8_t nNumTransformCands = 1 + (tsAllowed ? 1 : 0); // DCT + TS = 2 tests       
2179
0
        std::vector<TrMode> trModes;
2180
0
        if (nNumTransformCands > 1)
2181
0
        {
2182
0
          trModes.push_back(TrMode(0, true));   // DCT2
2183
0
          trModes.push_back(TrMode(1, true));   // TS
2184
0
          testTS = true;
2185
0
        }
2186
0
        bool cbfDCT2 = true;
2187
0
        const bool isLastMode = testLFNST || cs.sps->jointCbCr ||  tsAllowed ? false : true;
2188
0
        int bestModeId = 0;
2189
0
        ctxStart = m_CABACEstimator->getCtx();
2190
0
        for (int modeId = 0; modeId < nNumTransformCands; modeId++)
2191
0
        {
2192
0
          if (doReshaping || lfnstIdx || modeId)
2193
0
          {
2194
0
            resiCb.copyFrom(m_orgResiCb[0]);
2195
0
            resiCr.copyFrom(m_orgResiCr[0]);
2196
0
          }
2197
0
          if (modeId == 0)
2198
0
          {
2199
0
            if ( tsAllowed)
2200
0
            {
2201
0
              xPreCheckMTS(currTU, &trModes, m_pcEncCfg->m_MTSIntraMaxCand, 0, compID);
2202
0
            }
2203
0
          }
2204
2205
0
          currTU.mtsIdx[compID] = currTU.cu->bdpcmM[CH_C] ? MTS_SKIP : modeId;
2206
2207
0
          if (modeId)
2208
0
          {
2209
0
            if (!cbfDCT2 && trModes[modeId].first == MTS_SKIP)
2210
0
            {
2211
0
              break;
2212
0
            }
2213
0
            m_CABACEstimator->getCtx() = ctxStart;
2214
0
          }
2215
0
          singleDistCTmp = 0;
2216
0
          if (tsAllowed)
2217
0
          {
2218
0
            xIntraCodingTUBlock(currTU, compID, false, singleDistCTmp, 0, 0, true);
2219
0
            if ((modeId == 0) && (!trModes[modeId + 1].second))
2220
0
            {
2221
0
              nNumTransformCands = 1;
2222
0
            }
2223
0
          }
2224
0
          else
2225
0
        {
2226
0
          xIntraCodingTUBlock(currTU, compID, false, singleDistCTmp);
2227
0
        }
2228
0
        if (((currTU.mtsIdx[compID] == MTS_SKIP && !currTU.cu->bdpcmM[CH_C])
2229
0
          && !TU::getCbf(currTU, compID)))   // In order not to code TS flag when cbf is zero, the case for TS with
2230
                                             // cbf being zero is forbidden.
2231
0
        {
2232
0
          singleCostTmp = MAX_DOUBLE;
2233
0
        }
2234
0
        else
2235
0
        {
2236
0
          uint64_t fracBitsTmp = xGetIntraFracBitsQTChroma(currTU, compID, &cuCtx);
2237
0
          singleCostTmp = m_pcRdCost->calcRdCost(fracBitsTmp, singleDistCTmp);
2238
0
        }
2239
2240
0
        if (singleCostTmp < dSingleCost)
2241
0
        {
2242
0
          dSingleCost = singleCostTmp;
2243
2244
0
          if (compID == COMP_Cb)
2245
0
          {
2246
0
            bestCostCb = singleCostTmp;
2247
0
            bestDistCb = singleDistCTmp;
2248
0
          }
2249
0
          else
2250
0
          {
2251
0
            bestCostCr = singleCostTmp;
2252
0
            bestDistCr = singleDistCTmp;
2253
0
          }
2254
0
          bestModeId = modeId;
2255
0
          if (currTU.mtsIdx[compID] == MTS_DCT2_DCT2)
2256
0
          {
2257
0
            cbfDCT2 = TU::getCbfAtDepth(currTU, compID, currDepth);
2258
0
          }
2259
0
          if (!isLastMode)
2260
0
          {
2261
0
            saveCS.getRecoBuf(area).copyFrom(cs.getRecoBuf(area));
2262
0
            tmpTU.copyComponentFrom(currTU, compID);
2263
0
            ctxBest = m_CABACEstimator->getCtx();
2264
0
          }
2265
0
        }
2266
0
        }
2267
0
        if (testTS && ((c == COMP_Cb && bestModeId < (nNumTransformCands - 1)) ))
2268
0
        {
2269
0
          m_CABACEstimator->getCtx() = ctxBest;
2270
2271
0
          currTU.copyComponentFrom(tmpTU, COMP_Cb); // Cbf of Cb is needed to estimate cost for Cr Cbf
2272
0
        }
2273
0
      }
2274
2275
0
      singleCostTmpAll = bestCostCb + bestCostCr;
2276
2277
0
      bool rootCbfL = false;
2278
0
      if (testLFNST)
2279
0
      {
2280
0
        for (uint32_t t = 0; t < getNumberValidTBlocks(*cs.pcv); t++)
2281
0
        {
2282
0
          rootCbfL |= bool(tmpTU.cbf[t]);
2283
0
        }
2284
0
        if (rapidLFNST && !rootCbfL)
2285
0
        {
2286
0
          endLfnstIdx = lfnstIdx; // end this
2287
0
        }
2288
0
      }
2289
2290
0
      if (testLFNST && lfnstIdx && !cuCtx.lfnstLastScanPos)
2291
0
      {
2292
0
        bool cbfAtZeroDepth = CU::isSepTree(*currTU.cu)
2293
0
          ? rootCbfL : (cs.area.chromaFormat != CHROMA_400
2294
0
            && std::min(tmpTU.blocks[1].width, tmpTU.blocks[1].height) < 4)
2295
0
          ? TU::getCbfAtDepth(currTU, COMP_Y, currTU.depth) : rootCbfL;
2296
0
        if (cbfAtZeroDepth)
2297
0
        {
2298
0
          singleCostTmpAll = MAX_DOUBLE;
2299
0
        }
2300
0
      }
2301
0
      if ((testLFNST || testTS) && (singleCostTmpAll < dSingleCostAll))
2302
0
      {
2303
0
        bestLfnstIdx = lfnstIdx;
2304
0
        if ((lfnstIdx != endLfnstIdx) || testTS)
2305
0
        {
2306
0
          dSingleCostAll = singleCostTmpAll;
2307
2308
0
          bestCostCbcur = bestCostCb;
2309
0
          bestCostCrcur = bestCostCr;
2310
0
          bestDistCbcur = bestDistCb;
2311
0
          bestDistCrcur = bestDistCr;
2312
2313
0
          saveCScur.getRecoBuf(cbArea).copyFrom(saveCS.getRecoBuf(cbArea));
2314
0
          saveCScur.getRecoBuf(crArea).copyFrom(saveCS.getRecoBuf(crArea));
2315
2316
0
          tmpTUcur.copyComponentFrom(tmpTU, COMP_Cb);
2317
0
          tmpTUcur.copyComponentFrom(tmpTU, COMP_Cr);
2318
0
        }
2319
0
        ctxBestTUL = m_CABACEstimator->getCtx();
2320
0
      }
2321
0
    }
2322
0
    if ((testLFNST && (bestLfnstIdx != endLfnstIdx)) || testTS)
2323
0
    {
2324
0
      bestCostCb = bestCostCbcur;
2325
0
      bestCostCr = bestCostCrcur;
2326
0
      bestDistCb = bestDistCbcur;
2327
0
      bestDistCr = bestDistCrcur;
2328
0
      currTU.cu->lfnstIdx = bestLfnstIdx;
2329
0
      if (!cs.sps->jointCbCr)
2330
0
      {
2331
0
        cs.getRecoBuf(cbArea).copyFrom(saveCScur.getRecoBuf(cbArea));
2332
0
        cs.getRecoBuf(crArea).copyFrom(saveCScur.getRecoBuf(crArea));
2333
2334
0
        currTU.copyComponentFrom(tmpTUcur, COMP_Cb);
2335
0
        currTU.copyComponentFrom(tmpTUcur, COMP_Cr);
2336
2337
0
        m_CABACEstimator->getCtx() = ctxBestTUL;
2338
0
      }
2339
0
    }
2340
2341
0
    Distortion bestDistCbCr = bestDistCb + bestDistCr;
2342
2343
0
    if (cs.sps->jointCbCr)
2344
0
    {
2345
0
      if ((testLFNST && (bestLfnstIdx != endLfnstIdx)) || testTS)
2346
0
      {
2347
0
        saveCS.getRecoBuf(cbArea).copyFrom(saveCScur.getRecoBuf(cbArea));
2348
0
        saveCS.getRecoBuf(crArea).copyFrom(saveCScur.getRecoBuf(crArea));
2349
2350
0
        tmpTU.copyComponentFrom(tmpTUcur, COMP_Cb);
2351
0
        tmpTU.copyComponentFrom(tmpTUcur, COMP_Cr);
2352
0
        m_CABACEstimator->getCtx() = ctxBestTUL;
2353
0
        ctxBest = m_CABACEstimator->getCtx();
2354
0
      }
2355
      // Test using joint chroma residual coding
2356
0
      double     bestCostCbCr = bestCostCb + bestCostCr;
2357
0
      int        bestJointCbCr = 0;
2358
0
      bool checkDCTOnly = m_pcEncCfg->m_useChromaTS && ((TU::getCbf(tmpTU, COMP_Cb) && tmpTU.mtsIdx[COMP_Cb] == MTS_DCT2_DCT2 && !TU::getCbf(tmpTU, COMP_Cr)) ||
2359
0
        (TU::getCbf(tmpTU, COMP_Cr) && tmpTU.mtsIdx[COMP_Cr] == MTS_DCT2_DCT2 && !TU::getCbf(tmpTU, COMP_Cb)) ||
2360
0
        (TU::getCbf(tmpTU, COMP_Cb) && tmpTU.mtsIdx[COMP_Cb] == MTS_DCT2_DCT2 && TU::getCbf(tmpTU, COMP_Cr) && tmpTU.mtsIdx[COMP_Cr] == MTS_DCT2_DCT2));
2361
0
      bool checkTSOnly = m_pcEncCfg->m_useChromaTS && ((TU::getCbf(tmpTU, COMP_Cb) && tmpTU.mtsIdx[COMP_Cb] == MTS_SKIP && !TU::getCbf(tmpTU, COMP_Cr)) ||
2362
0
        (TU::getCbf(tmpTU, COMP_Cr) && tmpTU.mtsIdx[COMP_Cr] == MTS_SKIP && !TU::getCbf(tmpTU, COMP_Cb)) ||
2363
0
        (TU::getCbf(tmpTU, COMP_Cb) && tmpTU.mtsIdx[COMP_Cb] == MTS_SKIP && TU::getCbf(tmpTU, COMP_Cr) && tmpTU.mtsIdx[COMP_Cr] == MTS_SKIP));
2364
0
      bool       lastIsBest = false;
2365
0
      bool noLFNST1 = false;
2366
0
      if (rapidLFNST && (startLfnstIdx != endLfnstIdx))
2367
0
      {
2368
0
        if (bestLfnstIdx == 2)
2369
0
        {
2370
0
          noLFNST1 = true;
2371
0
        }
2372
0
        else
2373
0
        {
2374
0
          endLfnstIdx = 1;
2375
0
        }
2376
0
      }
2377
2378
0
      for (int lfnstIdxj = startLfnstIdx; lfnstIdxj <= endLfnstIdx; lfnstIdxj++)
2379
0
      {
2380
0
        if (rapidLFNST && noLFNST1 && (lfnstIdxj == 1))
2381
0
        {
2382
0
          continue;
2383
0
        }
2384
0
        currTU.cu->lfnstIdx = lfnstIdxj;
2385
0
        std::vector<int> jointCbfMasksToTest;
2386
0
        if (TU::getCbf(tmpTU, COMP_Cb) || TU::getCbf(tmpTU, COMP_Cr))
2387
0
        {
2388
0
          jointCbfMasksToTest = m_pcTrQuant->selectICTCandidates(currTU, m_orgResiCb, m_orgResiCr);
2389
0
        }
2390
0
        for (int cbfMask : jointCbfMasksToTest)
2391
0
        {
2392
0
          currTU.jointCbCr = (uint8_t)cbfMask;
2393
0
          ComponentID codeCompId = ((currTU.jointCbCr >> 1) ? COMP_Cb : COMP_Cr);
2394
0
          ComponentID otherCompId = ((codeCompId == COMP_Cb) ? COMP_Cr : COMP_Cb);
2395
0
          bool tsAllowed = useTS && TU::isTSAllowed(currTU, codeCompId) && (m_pcEncCfg->m_useChromaTS) && !currTU.cu->lfnstIdx && !cu.bdpcmM[CH_C];
2396
0
          if ((partitioner.chType == CH_L)&& tsAllowed && (currTU.mtsIdx[COMP_Y] != MTS_SKIP))
2397
0
          {
2398
0
            tsAllowed = false;
2399
0
          }
2400
0
          if (!tsAllowed)
2401
0
          {
2402
0
            checkTSOnly = false;
2403
0
          }
2404
0
          uint8_t     numTransformCands = 1 + (tsAllowed && !(checkDCTOnly || checkTSOnly)? 1 : 0); // DCT + TS = 2 tests
2405
0
          std::vector<TrMode> trModes;
2406
0
          if (numTransformCands > 1)
2407
0
          {
2408
0
            trModes.push_back(TrMode(0, true)); // DCT2
2409
0
            trModes.push_back(TrMode(1, true));//TS
2410
0
          }
2411
0
          else
2412
0
          {
2413
0
            currTU.mtsIdx[codeCompId] = checkTSOnly || currTU.cu->bdpcmM[CH_C] ? 1 : 0;
2414
0
          }
2415
2416
0
          for (int modeId = 0; modeId < numTransformCands; modeId++)
2417
0
          {
2418
0
            Distortion distTmp = 0;
2419
0
            currTU.mtsIdx[codeCompId] = currTU.cu->bdpcmM[CH_C] ? MTS_SKIP : MTS_DCT2_DCT2;
2420
0
            if (numTransformCands > 1)
2421
0
            {
2422
0
              currTU.mtsIdx[codeCompId] = currTU.cu->bdpcmM[CH_C] ? MTS_SKIP : trModes[modeId].first;
2423
0
            }
2424
0
            currTU.mtsIdx[otherCompId] = MTS_DCT2_DCT2;
2425
2426
0
            m_CABACEstimator->getCtx() = ctxStartTU;
2427
2428
0
            resiCb.copyFrom(m_orgResiCb[cbfMask]);
2429
0
            resiCr.copyFrom(m_orgResiCr[cbfMask]);
2430
0
            if ((modeId == 0) && (numTransformCands > 1))
2431
0
            {
2432
0
              xPreCheckMTS(currTU, &trModes, m_pcEncCfg->m_MTSIntraMaxCand, 0, COMP_Cb);
2433
0
              currTU.mtsIdx[codeCompId] = trModes[modeId].first;
2434
0
              currTU.mtsIdx[(codeCompId == COMP_Cr) ? COMP_Cb : COMP_Cr] = MTS_DCT2_DCT2;
2435
0
            }
2436
0
            cuCtx.lfnstLastScanPos = false;
2437
0
            cuCtx.violatesLfnstConstrained[CH_L] = false;
2438
0
            cuCtx.violatesLfnstConstrained[CH_C] = false;
2439
0
            if (numTransformCands > 1)
2440
0
            {
2441
0
              xIntraCodingTUBlock(currTU, COMP_Cb, false, distTmp, 0, 0, true);
2442
0
              if ((modeId == 0) && !trModes[modeId + 1].second)
2443
0
              {
2444
0
                numTransformCands = 1;
2445
0
              }
2446
0
            }
2447
0
            else
2448
0
            {
2449
0
              xIntraCodingTUBlock(currTU, COMP_Cb, false, distTmp, 0);
2450
0
            }
2451
2452
0
            double costTmp = std::numeric_limits<double>::max();
2453
0
            if (distTmp < MAX_DISTORTION)
2454
0
            {
2455
0
              uint64_t bits = xGetIntraFracBitsQTChroma(currTU, COMP_Cb, &cuCtx);
2456
0
              costTmp = m_pcRdCost->calcRdCost(bits, distTmp);
2457
0
            }
2458
0
            else if (!currTU.mtsIdx[codeCompId])
2459
0
            {
2460
0
              numTransformCands = 1;
2461
0
            }
2462
0
            bool rootCbfL = false;
2463
0
            for (uint32_t t = 0; t < getNumberValidTBlocks(*cs.pcv); t++)
2464
0
            {
2465
0
              rootCbfL |= bool(tmpTU.cbf[t]);
2466
0
            }
2467
0
            if (rapidLFNST && !rootCbfL)
2468
0
            {
2469
0
              endLfnstIdx = lfnstIdxj;
2470
0
            }
2471
0
            if (testLFNST && currTU.cu->lfnstIdx && !cuCtx.lfnstLastScanPos)
2472
0
            {
2473
0
              bool cbfAtZeroDepth = CU::isSepTree(*currTU.cu) ? rootCbfL
2474
0
                : (cs.area.chromaFormat != CHROMA_400 && std::min(tmpTU.blocks[1].width, tmpTU.blocks[1].height) < 4)
2475
0
                ? TU::getCbfAtDepth(currTU, COMP_Y, currTU.depth) : rootCbfL;
2476
0
              if (cbfAtZeroDepth)
2477
0
              {
2478
0
                costTmp = MAX_DOUBLE;
2479
0
              }
2480
0
            }
2481
0
            if (costTmp < bestCostCbCr)
2482
0
            {
2483
0
              bestCostCbCr = costTmp;
2484
0
              bestDistCbCr = distTmp;
2485
0
              bestJointCbCr = currTU.jointCbCr;
2486
2487
              // store data
2488
0
              bestLfnstIdx = lfnstIdxj;
2489
0
              if ((cbfMask != jointCbfMasksToTest.back() || (lfnstIdxj != endLfnstIdx)) || (modeId != (numTransformCands - 1)))
2490
0
              {
2491
0
                saveCS.getRecoBuf(cbArea).copyFrom(cs.getRecoBuf(cbArea));
2492
0
                saveCS.getRecoBuf(crArea).copyFrom(cs.getRecoBuf(crArea));
2493
2494
0
                tmpTU.copyComponentFrom(currTU, COMP_Cb);
2495
0
                tmpTU.copyComponentFrom(currTU, COMP_Cr);
2496
2497
0
                ctxBest = m_CABACEstimator->getCtx();
2498
0
              }
2499
0
              else
2500
0
              {
2501
0
                lastIsBest = true;
2502
0
                cs.cus[0]->lfnstIdx = bestLfnstIdx;
2503
0
              }
2504
0
            }
2505
0
          }
2506
0
        }
2507
2508
        // Retrieve the best CU data (unless it was the very last one tested)
2509
0
      }
2510
0
      if (!lastIsBest)
2511
0
      {
2512
0
        cs.getRecoBuf(cbArea).copyFrom(saveCS.getRecoBuf(cbArea));
2513
0
        cs.getRecoBuf(crArea).copyFrom(saveCS.getRecoBuf(crArea));
2514
2515
0
        cs.cus[0]->lfnstIdx = bestLfnstIdx;
2516
0
        currTU.copyComponentFrom(tmpTU, COMP_Cb);
2517
0
        currTU.copyComponentFrom(tmpTU, COMP_Cr);
2518
0
        m_CABACEstimator->getCtx() = ctxBest;
2519
0
      }
2520
0
      currTU.jointCbCr = (TU::getCbf(currTU, COMP_Cb) || TU::getCbf(currTU, COMP_Cr)) ? bestJointCbCr : 0;
2521
0
    } // jointCbCr
2522
2523
0
    cs.dist += bestDistCbCr;
2524
0
    cuCtx.violatesLfnstConstrained[CH_L] = false;
2525
0
    cuCtx.violatesLfnstConstrained[CH_C] = false;
2526
0
    cuCtx.lfnstLastScanPos = false;
2527
0
    cuCtx.violatesMtsCoeffConstraint = false;
2528
0
    cuCtx.mtsLastScanPos = false;
2529
0
    cbfs.cbf(COMP_Cb) = TU::getCbf(currTU, COMP_Cb);
2530
0
    cbfs.cbf(COMP_Cr) = TU::getCbf(currTU, COMP_Cr);
2531
0
  }
2532
0
  else
2533
0
  {
2534
0
    unsigned   numValidTBlocks = getNumberValidTBlocks(*cs.pcv);
2535
0
    ChromaCbfs SplitCbfs(false);
2536
2537
0
    if (partitioner.canSplit(TU_MAX_TR_SPLIT, cs))
2538
0
    {
2539
0
      partitioner.splitCurrArea(TU_MAX_TR_SPLIT, cs);
2540
0
    }
2541
0
    else if (currTU.cu->ispMode)
2542
0
    {
2543
0
      partitioner.splitCurrArea(m_ispTestedModes[0].IspType, cs);
2544
0
    }
2545
0
    else
2546
0
      THROW("Implicit TU split not available");
2547
2548
0
    do
2549
0
    {
2550
0
      ChromaCbfs subCbfs = xIntraChromaCodingQT(cs, partitioner);
2551
2552
0
      for (uint32_t ch = COMP_Cb; ch < numValidTBlocks; ch++)
2553
0
      {
2554
0
        const ComponentID compID = ComponentID(ch);
2555
0
        SplitCbfs.cbf(compID) |= subCbfs.cbf(compID);
2556
0
      }
2557
0
    } while (partitioner.nextPart(cs));
2558
2559
0
    partitioner.exitCurrSplit();
2560
2561
    /*if (lumaUsesISP && cs.dist == MAX_UINT) //ahenkel
2562
    {
2563
      return cbfs;
2564
    }*/
2565
0
    {
2566
0
      cbfs.Cb |= SplitCbfs.Cb;
2567
0
      cbfs.Cr |= SplitCbfs.Cr;
2568
2569
0
      if (1)   //(!lumaUsesISP)
2570
0
      {
2571
0
        for (auto& ptu : cs.tus)
2572
0
        {
2573
0
          if (currArea.Cb().contains(ptu->Cb()) || (!ptu->Cb().valid() && currArea.Y().contains(ptu->Y())))
2574
0
          {
2575
0
            TU::setCbfAtDepth(*ptu, COMP_Cb, currDepth, SplitCbfs.Cb);
2576
0
            TU::setCbfAtDepth(*ptu, COMP_Cr, currDepth, SplitCbfs.Cr);
2577
0
          }
2578
0
        }
2579
0
      }
2580
0
    }
2581
0
  }
2582
0
  return cbfs;
2583
0
}
2584
2585
uint64_t IntraSearch::xFracModeBitsIntraLuma(const CodingUnit& cu, const unsigned* mpmLst)
2586
0
{
2587
0
  m_CABACEstimator->resetBits();
2588
2589
0
  if (!cu.ciip)
2590
0
  {
2591
0
    m_CABACEstimator->intra_luma_pred_mode(cu, mpmLst);
2592
0
  }
2593
2594
0
  return m_CABACEstimator->getEstFracBits();
2595
0
}
2596
2597
template<typename T, size_t N, int M>
2598
void IntraSearch::xReduceHadCandList(static_vector<T, N>& candModeList, static_vector<double, N>& candCostList, SortedPelUnitBufs<M>& sortedPelBuffer, int& numModesForFullRD, const double thresholdHadCost, const double* mipHadCost, const CodingUnit& cu, const bool fastMip)
2599
0
{
2600
0
  const int maxCandPerType = numModesForFullRD >> 1;
2601
0
  static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM> tempRdModeList;
2602
0
  static_vector<double, FAST_UDI_MAX_RDMODE_NUM> tempCandCostList;
2603
0
  const double minCost = candCostList[0];
2604
0
  bool keepOneMip = candModeList.size() > numModesForFullRD;
2605
0
  const int maxNumConv = 3; 
2606
2607
0
  int numConv = 0;
2608
0
  int numMip = 0;
2609
0
  for (int idx = 0; idx < candModeList.size() - (keepOneMip?0:1); idx++)
2610
0
  {
2611
0
    bool addMode = false;
2612
0
    const ModeInfo& orgMode = candModeList[idx];
2613
2614
0
    if (!orgMode.mipFlg)
2615
0
    {
2616
0
      addMode = (numConv < maxNumConv);
2617
0
      numConv += addMode ? 1:0;
2618
0
    }
2619
0
    else
2620
0
    {
2621
0
      addMode = ( numMip < maxCandPerType || (candCostList[idx] < thresholdHadCost * minCost) || keepOneMip );
2622
0
      keepOneMip = false;
2623
0
      numMip += addMode ? 1:0;
2624
0
    }
2625
0
    if( addMode )
2626
0
    {
2627
0
      tempRdModeList.push_back(orgMode);
2628
0
      tempCandCostList.push_back(candCostList[idx]);
2629
0
    }
2630
0
  }
2631
2632
  // sort Pel Buffer
2633
0
  int i = -1;
2634
0
  for( auto &m: tempRdModeList)
2635
0
  {
2636
0
    if( ! (m == candModeList.at( ++i )) )
2637
0
    {
2638
0
      for( int j = i; j < (int)candModeList.size()-1; )
2639
0
      {
2640
0
        if( m == candModeList.at( ++j ) )
2641
0
        {
2642
0
          sortedPelBuffer.swap( i, j);
2643
0
          break;
2644
0
        }
2645
0
      }
2646
0
    }
2647
0
  }
2648
0
  sortedPelBuffer.reduceTo( (int)tempRdModeList.size() );
2649
2650
0
  if ((cu.lwidth() > 8 && cu.lheight() > 8))
2651
0
  {
2652
    // Sort MIP candidates by Hadamard cost
2653
0
    const int transpOff = getNumModesMip(cu.Y());
2654
0
    static_vector<uint8_t, FAST_UDI_MAX_RDMODE_NUM> sortedMipModes(0);
2655
0
    static_vector<double, FAST_UDI_MAX_RDMODE_NUM> sortedMipCost(0);
2656
0
    for (uint8_t mode : { 0, 1, 2 })
2657
0
    {
2658
0
      uint8_t candMode = mode + uint8_t((mipHadCost[mode + transpOff] < mipHadCost[mode]) ? transpOff : 0);
2659
0
      updateCandList(candMode, mipHadCost[candMode], sortedMipModes, sortedMipCost, 3);
2660
0
    }
2661
2662
    // Append MIP mode to RD mode list
2663
0
    const int modeListSize = int(tempRdModeList.size());
2664
0
    for (int idx = 0; idx < 3; idx++)
2665
0
    {
2666
0
      const bool     isTransposed = (sortedMipModes[idx] >= transpOff ? true : false);
2667
0
      const uint32_t mipIdx       = (isTransposed ? sortedMipModes[idx] - transpOff : sortedMipModes[idx]);
2668
0
      const ModeInfo mipMode( true, isTransposed, 0, NOT_INTRA_SUBPARTITIONS, mipIdx );
2669
0
      bool alreadyIncluded = false;
2670
0
      for (int modeListIdx = 0; modeListIdx < modeListSize; modeListIdx++)
2671
0
      {
2672
0
        if (tempRdModeList[modeListIdx] == mipMode)
2673
0
        {
2674
0
          alreadyIncluded = true;
2675
0
          break;
2676
0
        }
2677
0
      }
2678
2679
0
      if (!alreadyIncluded)
2680
0
      {
2681
0
        tempRdModeList.push_back(mipMode);
2682
0
        tempCandCostList.push_back(0);
2683
0
        if( fastMip ) break;
2684
0
      }
2685
0
    }
2686
0
  }
2687
2688
0
  candModeList = tempRdModeList;
2689
0
  candCostList = tempCandCostList;
2690
0
  numModesForFullRD = int(candModeList.size());
2691
0
}
2692
2693
void IntraSearch::xPreCheckMTS(TransformUnit &tu, std::vector<TrMode> *trModes, const int maxCand, PelUnitBuf *predBuf, const ComponentID& compID)
2694
0
{
2695
0
  if (compID == COMP_Y)
2696
0
  {
2697
0
    CodingStructure&  cs = *tu.cs;
2698
0
    const CompArea& area = tu.blocks[compID];
2699
0
    const ReshapeData& reshapeData = cs.picture->reshapeData;
2700
0
    const CodingUnit& cu = *cs.getCU(area.pos(), CH_L,TREE_D);
2701
0
    PelBuf piPred = cs.getPredBuf(area);
2702
0
    PelBuf piResi = cs.getResiBuf(area);
2703
2704
0
    initIntraPatternChType(*tu.cu, area);
2705
0
    if (predBuf)
2706
0
    {
2707
0
      piPred.copyFrom(predBuf->Y());
2708
0
    }
2709
0
    else if (CU::isMIP(cu, CH_L))
2710
0
    {
2711
0
      initIntraMip(cu);
2712
0
      predIntraMip(piPred, cu);
2713
0
    }
2714
0
    else
2715
0
    {
2716
0
      predIntraAng(COMP_Y, piPred, cu);
2717
0
    }
2718
2719
    //===== get residual signal =====
2720
0
    if (cs.picHeader->lmcsEnabled && reshapeData.getCTUFlag())
2721
0
    {
2722
0
      piResi.subtract(cs.getRspOrgBuf(), piPred);
2723
0
    }
2724
0
    else
2725
0
    {
2726
0
      CPelBuf piOrg = cs.getOrgBuf(COMP_Y);
2727
0
      piResi.subtract(piOrg, piPred);
2728
0
    }
2729
0
    m_pcTrQuant->checktransformsNxN(tu, trModes, m_pcEncCfg->m_MTSIntraMaxCand, compID);
2730
0
  }
2731
0
  else
2732
0
  {
2733
0
    ComponentID codeCompId = (tu.jointCbCr ? (tu.jointCbCr >> 1 ? COMP_Cb : COMP_Cr) : compID);
2734
0
    m_pcTrQuant->checktransformsNxN(tu, trModes, m_pcEncCfg->m_MTSIntraMaxCand, codeCompId);
2735
0
  }
2736
0
}
2737
2738
double IntraSearch::xTestISP(CodingStructure& cs, Partitioner& subTuPartitioner, double bestCostForISP, PartSplit ispType, bool& splitcbf, uint64_t& singleFracBits, Distortion& singleDistLuma, CUCtx& cuCtx)
2739
0
{
2740
0
  int  subTuCounter = 0;
2741
0
  bool earlySkipISP = false;
2742
0
  bool splitCbfLuma = false;
2743
0
  CodingUnit& cu = *cs.cus[0];
2744
2745
0
  Distortion singleDistTmpLumaSUM = 0;
2746
0
  uint64_t   singleTmpFracBitsSUM = 0;
2747
0
  double     singleCostTmpSUM = 0;
2748
0
  cuCtx.isDQPCoded = true;
2749
0
  cuCtx.isChromaQpAdjCoded = true;
2750
2751
0
  do
2752
0
  {
2753
0
    Distortion singleDistTmpLuma = 0;
2754
0
    uint64_t   singleTmpFracBits = 0;
2755
0
    double     singleCostTmp = 0;
2756
0
    TransformUnit& tmpTUcur = ((cs.tus.size() < (subTuCounter + 1)))
2757
0
      ? cs.addTU(CS::getArea(cs, subTuPartitioner.currArea(), subTuPartitioner.chType,
2758
0
        subTuPartitioner.treeType),
2759
0
        subTuPartitioner.chType, cs.cus[0])
2760
0
      : *cs.tus[subTuCounter];
2761
0
    tmpTUcur.depth = subTuPartitioner.currTrDepth;
2762
2763
    // Encode TU
2764
0
    xIntraCodingTUBlock(tmpTUcur, COMP_Y, false, singleDistTmpLuma, 0);
2765
0
    cuCtx.mtsLastScanPos = false;
2766
2767
0
    if (singleDistTmpLuma == MAX_INT)   // all zero CBF skip
2768
0
    {
2769
0
      earlySkipISP = true;
2770
0
      singleCostTmpSUM = MAX_DOUBLE;
2771
0
      break;
2772
0
    }
2773
2774
0
    if (m_pcRdCost->calcRdCost(singleTmpFracBitsSUM, singleDistTmpLumaSUM + singleDistTmpLuma) > bestCostForISP)
2775
0
    {
2776
0
      earlySkipISP = true;
2777
0
    }
2778
0
    else
2779
0
    {
2780
0
      m_ispTestedModes[0].IspType = ispType;
2781
0
      m_ispTestedModes[0].subTuCounter = subTuCounter;
2782
0
      singleTmpFracBits = xGetIntraFracBitsQT(cs, subTuPartitioner, true, &cuCtx);
2783
0
    }
2784
0
    singleCostTmp = m_pcRdCost->calcRdCost(singleTmpFracBits, singleDistTmpLuma);
2785
2786
0
    singleCostTmpSUM     += singleCostTmp;
2787
0
    singleDistTmpLumaSUM += singleDistTmpLuma;
2788
0
    singleTmpFracBitsSUM += singleTmpFracBits;
2789
2790
0
    subTuCounter++;
2791
2792
0
    splitCbfLuma |= TU::getCbfAtDepth( *cs.getTU(subTuPartitioner.currArea().lumaPos(), subTuPartitioner.chType, subTuCounter - 1), 
2793
0
                                       COMP_Y, subTuPartitioner.currTrDepth);
2794
0
    int nSubPartitions = m_ispTestedModes[cu.lfnstIdx].numTotalParts[cu.ispMode - 1];
2795
0
    bool doStop = (m_pcEncCfg->m_ISP != 1) || (subTuCounter < nSubPartitions);
2796
0
    if (doStop)
2797
0
    {
2798
0
      if (singleCostTmpSUM > bestCostForISP)
2799
0
      {
2800
0
        earlySkipISP = true;
2801
0
        break;
2802
0
      }
2803
0
      if (subTuCounter < nSubPartitions)
2804
0
      {
2805
0
        double threshold = nSubPartitions == 2 ? 0.95 : subTuCounter == 1 ? 0.83 : 0.91;
2806
0
        if (singleCostTmpSUM > bestCostForISP * threshold)
2807
0
        {
2808
0
          earlySkipISP = true;
2809
0
          break;
2810
0
        }
2811
0
      }
2812
0
    }
2813
0
  } while (subTuPartitioner.nextPart(cs));
2814
0
  singleDistLuma = singleDistTmpLumaSUM;
2815
0
  singleFracBits = singleTmpFracBitsSUM;
2816
2817
0
  splitcbf = splitCbfLuma;
2818
0
  return earlySkipISP ? MAX_DOUBLE : singleCostTmpSUM;
2819
0
}
2820
2821
int IntraSearch::xSpeedUpISP(int speed, bool& testISP, int mode, int& noISP, int& endISP, CodingUnit& cu, static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM>& RdModeList, const ModeInfo& bestPUMode, int bestISP, int bestLfnstIdx)
2822
0
{
2823
0
  if (speed)
2824
0
  {
2825
0
    if (mode >= 1)
2826
0
    {
2827
0
      if (m_ispTestedModes[0].splitIsFinished[1] && m_ispTestedModes[0].splitIsFinished[0])
2828
0
      {
2829
0
        testISP = false;
2830
0
        endISP = 0;
2831
0
      }
2832
0
      else
2833
0
      {
2834
0
        if (m_pcEncCfg->m_ISP >= 2)
2835
0
        {
2836
0
          if (mode == 1) //best Hor||Ver
2837
0
          {
2838
0
            int bestDir = 0;
2839
0
            for (int d = 0; d < 2; d++)
2840
0
            {
2841
0
              int d2 = d ? 0 : 1;
2842
0
              if ((m_ispTestedModes[0].bestCost[d] <= m_ispTestedModes[0].bestCost[d2])
2843
0
                && (m_ispTestedModes[0].bestCost[d] != MAX_DOUBLE))
2844
0
              {
2845
0
                bestDir = d + 1;
2846
0
                m_ispTestedModes[0].splitIsFinished[d2] = true;
2847
0
              }
2848
0
            }
2849
0
            m_ispTestedModes[0].bestModeSoFar = bestDir;
2850
0
            if (m_ispTestedModes[0].bestModeSoFar <= 0)
2851
0
            {
2852
0
              m_ispTestedModes[0].splitIsFinished[1] = true;
2853
0
              m_ispTestedModes[0].splitIsFinished[0] = true;
2854
0
              testISP = false;
2855
0
              endISP = 0;
2856
0
            }
2857
0
          }
2858
0
          if (m_ispTestedModes[0].bestModeSoFar == 2)
2859
0
          {
2860
0
            noISP = 1;
2861
0
          }
2862
0
          else
2863
0
          {
2864
0
            endISP = 1;
2865
0
          }
2866
0
        }
2867
0
      }
2868
0
    }
2869
0
    if (testISP)
2870
0
    {
2871
0
      if (mode == 2)
2872
0
      {
2873
0
        for (int d = 0; d < 2; d++)
2874
0
        {
2875
0
          int d2 = d ? 0 : 1;
2876
0
          if (m_ispTestedModes[0].bestCost[d] == MAX_DOUBLE)
2877
0
          {
2878
0
            m_ispTestedModes[0].splitIsFinished[d] = true;
2879
0
          }
2880
0
          if ((m_ispTestedModes[0].bestCost[d2] < 1.3 * m_ispTestedModes[0].bestCost[d])
2881
0
            && (int(m_ispTestedModes[0].bestSplitSoFar) != (d + 1)))
2882
0
          {
2883
0
            if (d)
2884
0
            {
2885
0
              endISP = 1;
2886
0
            }
2887
0
            else
2888
0
            {
2889
0
              noISP = 1;
2890
0
            }
2891
0
            m_ispTestedModes[0].splitIsFinished[d] = true;
2892
0
          }
2893
0
        }
2894
0
      }
2895
0
      else
2896
0
      {
2897
0
        if (m_ispTestedModes[0].splitIsFinished[0])
2898
0
        {
2899
0
          noISP = 1;
2900
0
        }
2901
0
        if (m_ispTestedModes[0].splitIsFinished[1])
2902
0
        {
2903
0
          endISP = 1;
2904
0
        }
2905
0
      }
2906
0
    }
2907
0
    if ((noISP == 1) && (endISP == 1))
2908
0
    {
2909
0
      endISP = 0;
2910
0
    }
2911
0
  }
2912
0
  else
2913
0
  {
2914
0
    bool stopFound = false;
2915
0
    if (m_pcEncCfg->m_ISP >= 3)
2916
0
    {
2917
0
      if (mode)
2918
0
      {
2919
0
        if ((bestISP == 0) || ((bestPUMode.modeId != RdModeList[mode - 1].modeId)
2920
0
          && (bestPUMode.modeId != RdModeList[mode].modeId)))
2921
0
        {
2922
0
          stopFound = true;
2923
0
        }
2924
0
      }
2925
0
    }
2926
0
    if (cu.mipFlag || cu.multiRefIdx)
2927
0
    {
2928
0
      cu.mipFlag = false;
2929
0
      cu.multiRefIdx = 0;
2930
0
      if (!stopFound)
2931
0
      {
2932
0
        for (int k = 0; k < mode; k++)
2933
0
        {
2934
0
          if (cu.intraDir[CH_L] == RdModeList[k].modeId)
2935
0
          {
2936
0
            stopFound = true;
2937
0
            break;
2938
0
          }
2939
0
        }
2940
0
      }
2941
0
    }
2942
0
    if (stopFound)
2943
0
    {
2944
0
      testISP = false;
2945
0
      endISP = 0;
2946
0
      return 1;
2947
0
    }
2948
0
    if (!stopFound && (m_pcEncCfg->m_ISP >= 2) && (cu.intraDir[CH_L] == DC_IDX))
2949
0
    {
2950
0
      stopFound = true;
2951
0
      endISP = 0;
2952
0
      return 1;
2953
0
    }
2954
0
  }
2955
0
  return 0;
2956
0
}
2957
2958
void IntraSearch::xSpeedUpIntra(double bestcost, int& EndMode, int& speedIntra, CodingUnit& cu)
2959
0
{
2960
0
  int bestIdxbefore = m_ispTestedModes[0].bestIntraMode;
2961
0
  if (m_ispTestedModes[0].isIntra)
2962
0
  {
2963
0
    if (bestIdxbefore == 1)//ISP
2964
0
    {
2965
0
      speedIntra = 14;
2966
0
    }
2967
0
    if (bestIdxbefore == 4)//MTS
2968
0
    {
2969
0
      speedIntra = 3;
2970
0
    }
2971
0
  }
2972
0
  else if (!cu.cs->slice->isIntra())
2973
0
  {
2974
0
    if (bestcost != MAX_DOUBLE)
2975
0
    {
2976
0
      speedIntra = 10;
2977
0
    }
2978
0
  }
2979
0
  if (m_ispTestedModes[0].bestBefore[0] == -1)
2980
0
  {
2981
0
    speedIntra |= 7;
2982
0
    if (m_pcEncCfg->m_FastIntraTools == 2)
2983
0
    {
2984
0
      EndMode = 1;
2985
0
    }
2986
0
  }
2987
0
  if (!cu.cs->slice->isIntra())
2988
0
  {
2989
0
    if ((m_ispTestedModes[0].bestBefore[1] == 1) || (m_ispTestedModes[0].bestBefore[2] == 1))
2990
0
    {
2991
0
      speedIntra |= 2;
2992
0
    }
2993
0
    if ((m_ispTestedModes[0].bestBefore[1] == 4) || (m_ispTestedModes[0].bestBefore[2] == 4))
2994
0
    {
2995
0
      speedIntra |= 3;
2996
0
    }
2997
0
    if ((m_ispTestedModes[0].bestBefore[1] == 2) || (m_ispTestedModes[0].bestBefore[2] == 2))
2998
0
    {
2999
0
      speedIntra |= 1;
3000
0
    }
3001
0
  }
3002
0
}
3003
3004
} // namespace vvenc
3005
3006
//! \}
3007