Coverage Report

Created: 2026-04-01 07:49

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/vvenc/source/Lib/CommonLib/QuantRDOQ.cpp
Line
Count
Source
1
/* -----------------------------------------------------------------------------
2
The copyright in this software is being made available under the Clear BSD
3
License, included below. No patent rights, trademark rights and/or 
4
other Intellectual Property Rights other than the copyrights concerning 
5
the Software are granted under this license.
6
7
The Clear BSD License
8
9
Copyright (c) 2019-2026, Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. & The VVenC Authors.
10
All rights reserved.
11
12
Redistribution and use in source and binary forms, with or without modification,
13
are permitted (subject to the limitations in the disclaimer below) provided that
14
the following conditions are met:
15
16
     * Redistributions of source code must retain the above copyright notice,
17
     this list of conditions and the following disclaimer.
18
19
     * Redistributions in binary form must reproduce the above copyright
20
     notice, this list of conditions and the following disclaimer in the
21
     documentation and/or other materials provided with the distribution.
22
23
     * Neither the name of the copyright holder nor the names of its
24
     contributors may be used to endorse or promote products derived from this
25
     software without specific prior written permission.
26
27
NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
28
THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
29
CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
31
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
32
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
33
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
34
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
35
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
36
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38
POSSIBILITY OF SUCH DAMAGE.
39
40
------------------------------------------------------------------------------------------- */
41
42
43
/** \file     QuantRDOQ.cpp
44
    \brief    transform and quantization class
45
*/
46
47
#include "QuantRDOQ.h"
48
#include "UnitTools.h"
49
#include "ContextModelling.h"
50
#include "CodingStructure.h"
51
#include "dtrace_next.h"
52
#include "dtrace_buffer.h"
53
54
#include <stdlib.h>
55
#include <memory.h>
56
57
//! \ingroup CommonLib
58
//! \{
59
60
namespace vvenc {
61
62
struct coeffGroupRDStats
63
{
64
  int    iNNZbeforePos0;
65
  double d64CodedLevelandDist; // distortion and level cost only
66
  double d64UncodedDist;    // all zero coded block distortion
67
  double d64SigCost;
68
  double d64SigCost_0;
69
 int   iNumSbbCtxBins;
70
};
71
72
73
//! \ingroup CommonLib
74
//! \{
75
76
// ====================================================================================================================
77
// Constants
78
// ====================================================================================================================
79
80
81
// ====================================================================================================================
82
// Static functions
83
// ====================================================================================================================
84
85
// ====================================================================================================================
86
// QuantRDOQ class member functions
87
// ====================================================================================================================
88
89
90
0
QuantRDOQ::QuantRDOQ( const Quant* other, bool useScalingLists ) : Quant( other, useScalingLists )
91
0
{
92
93
0
  const QuantRDOQ *rdoq = dynamic_cast<const QuantRDOQ*>( other );
94
0
  CHECK( other && !rdoq, "The RDOQ cast must be successfull!" );
95
0
  xInitScalingList( rdoq );
96
0
}
97
98
QuantRDOQ::~QuantRDOQ()
99
0
{
100
0
  xDestroyScalingList();
101
0
}
102
103
104
105
106
/** Get the best level in RD sense
107
 *
108
 * \returns best quantized transform level for given scan position
109
 *
110
 * This method calculates the best quantized transform level for a given scan position.
111
 */
112
inline uint32_t QuantRDOQ::xGetCodedLevel( double&            rd64CodedCost,
113
                                       double&            rd64CodedCost0,
114
                                       double&            rd64CodedCostSig,
115
                                       Intermediate_Int   lLevelDouble,
116
                                       uint32_t               uiMaxAbsLevel,
117
                                       const BinFracBits* fracBitsSig,
118
                                       const BinFracBits& fracBitsPar,
119
                                       const BinFracBits& fracBitsGt1,
120
                                       const BinFracBits& fracBitsGt2,
121
                                       const int          remRegBins,
122
                                       unsigned           goRiceZero,
123
                                       uint16_t             ui16AbsGoRice,
124
                                       int                iQBits,
125
                                       double             errorScale,
126
                                       bool               bLast,
127
                                       const int          maxLog2TrDynamicRange
128
                                     ) const
129
0
{
130
0
  double dCurrCostSig   = 0;
131
0
  uint32_t   uiBestAbsLevel = 0;
132
133
0
  if( !bLast && uiMaxAbsLevel < 3 )
134
0
  {
135
0
    rd64CodedCostSig    = xGetRateSigCoef( *fracBitsSig, 0 );
136
0
    rd64CodedCost       = rd64CodedCost0 + rd64CodedCostSig;
137
0
    if( uiMaxAbsLevel == 0 )
138
0
    {
139
0
      return uiBestAbsLevel;
140
0
    }
141
0
  }
142
0
  else
143
0
  {
144
0
    rd64CodedCost       = MAX_DOUBLE;
145
0
  }
146
147
0
  if( !bLast )
148
0
  {
149
0
    dCurrCostSig        = xGetRateSigCoef( *fracBitsSig, 1 );
150
0
  }
151
152
0
  uint32_t uiMinAbsLevel    = ( uiMaxAbsLevel > 1 ? uiMaxAbsLevel - 1 : 1 );
153
0
  for( int uiAbsLevel  = uiMaxAbsLevel; uiAbsLevel >= uiMinAbsLevel ; uiAbsLevel-- )
154
0
  {
155
0
    double dErr         = double( lLevelDouble  - ( Intermediate_Int(uiAbsLevel) << iQBits ) );
156
157
0
    double dCurrCost    = dErr * dErr * errorScale + xGetICost( xGetICRate( uiAbsLevel, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, ui16AbsGoRice, maxLog2TrDynamicRange ) );
158
0
    dCurrCost          += dCurrCostSig;
159
160
0
    if( dCurrCost < rd64CodedCost )
161
0
    {
162
0
      uiBestAbsLevel    = uiAbsLevel;
163
0
      rd64CodedCost     = dCurrCost;
164
0
      rd64CodedCostSig  = dCurrCostSig;
165
0
    }
166
0
  }
167
168
0
  return uiBestAbsLevel;
169
0
}
170
171
/** Calculates the cost for specific absolute transform level
172
 * \param uiAbsLevel scaled quantized level
173
 * \param ui16CtxNumOne current ctxInc for coeff_abs_level_greater1 (1st bin of coeff_abs_level_minus1 in AVC)
174
 * \param ui16CtxNumAbs current ctxInc for coeff_abs_level_greater2 (remaining bins of coeff_abs_level_minus1 in AVC)
175
 * \param ui16AbsGoRice Rice parameter for coeff_abs_level_minus3
176
 * \param c1Idx
177
 * \param c2Idx
178
 * \param useLimitedPrefixLength
179
 * \param maxLog2TrDynamicRange
180
 * \returns cost of given absolute transform level
181
 */
182
inline int QuantRDOQ::xGetICRate( const uint32_t         uiAbsLevel,
183
                                  const BinFracBits& fracBitsPar,
184
                                  const BinFracBits& fracBitsGt1,
185
                                  const BinFracBits& fracBitsGt2,
186
                                  const int          remRegBins,
187
                                  unsigned           goRiceZero,
188
                                  const uint16_t       ui16AbsGoRice,
189
                                  const int          maxLog2TrDynamicRange  ) const
190
0
{
191
0
  if( remRegBins < 4 )
192
0
  {
193
0
    int       iRate   = int( xGetIEPRate() ); // cost of sign bit
194
0
    uint32_t  symbol  = ( uiAbsLevel == 0 ? goRiceZero : uiAbsLevel <= goRiceZero ? uiAbsLevel-1 : uiAbsLevel );
195
0
    uint32_t  length;
196
0
    const int threshold = COEF_REMAIN_BIN_REDUCTION;
197
0
    if( symbol < ( threshold << ui16AbsGoRice ) )
198
0
    {
199
0
      length = symbol >> ui16AbsGoRice;
200
0
      iRate += ( length + 1 + ui16AbsGoRice ) << SCALE_BITS;
201
0
    }
202
0
    else
203
0
    {
204
0
      length = ui16AbsGoRice;
205
0
      symbol = symbol - ( threshold << ui16AbsGoRice );
206
0
      while( symbol >= ( 1 << length ) )
207
0
      {
208
0
        symbol -= ( 1 << ( length++ ) );
209
0
      }
210
0
      iRate += ( threshold + length + 1 - ui16AbsGoRice + length ) << SCALE_BITS;
211
0
    }
212
0
    return iRate;
213
0
  }
214
215
0
  int iRate = int( xGetIEPRate() ); // cost of sign bit
216
0
  const uint32_t cthres = 4;
217
0
  if( uiAbsLevel >= cthres )
218
0
  {
219
0
    uint32_t symbol = ( uiAbsLevel - cthres ) >> 1;
220
0
    uint32_t length;
221
0
    const int threshold = COEF_REMAIN_BIN_REDUCTION;
222
0
    if( symbol < ( threshold << ui16AbsGoRice ) )
223
0
    {
224
0
      length = symbol >> ui16AbsGoRice;
225
0
      iRate += ( length + 1 + ui16AbsGoRice ) << SCALE_BITS;
226
0
    }
227
0
    else
228
0
    {
229
0
      length = ui16AbsGoRice;
230
0
      symbol = symbol - ( threshold << ui16AbsGoRice );
231
0
      while( symbol >= ( 1 << length ) )
232
0
      {
233
0
        symbol -= ( 1 << ( length++ ) );
234
0
      }
235
0
      iRate += ( threshold + length + 1 - ui16AbsGoRice + length ) << SCALE_BITS;
236
0
    }
237
238
0
    iRate += fracBitsGt1.intBits[1];
239
0
    iRate += fracBitsPar.intBits[( uiAbsLevel - 2 ) & 1];
240
0
    iRate += fracBitsGt2.intBits[1];
241
0
  }
242
0
  else if( uiAbsLevel == 1 )
243
0
  {
244
0
    iRate += fracBitsGt1.intBits[0];
245
0
  }
246
0
  else if( uiAbsLevel == 2 )
247
0
  {
248
0
    iRate += fracBitsGt1.intBits[1];
249
0
    iRate += fracBitsPar.intBits[0];
250
0
    iRate += fracBitsGt2.intBits[0];
251
0
  }
252
0
  else if( uiAbsLevel == 3 )
253
0
  {
254
0
    iRate += fracBitsGt1.intBits[1];
255
0
    iRate += fracBitsPar.intBits[1];
256
0
    iRate += fracBitsGt2.intBits[0];
257
0
  }
258
0
  else
259
0
  {
260
0
    iRate = 0;
261
0
  }
262
0
  return  iRate;
263
0
}
264
265
inline double QuantRDOQ::xGetRateSigCoeffGroup( const BinFracBits& fracBitsSigCG, unsigned uiSignificanceCoeffGroup ) const
266
0
{
267
0
  return xGetICost( fracBitsSigCG.intBits[uiSignificanceCoeffGroup] );
268
0
}
269
270
/** Calculates the cost of signaling the last significant coefficient in the block
271
 * \param uiPosX X coordinate of the last significant coefficient
272
 * \param uiPosY Y coordinate of the last significant coefficient
273
 * \param component colour component ID
274
 * \returns cost of last significant coefficient
275
 */
276
/*
277
 * \param uiWidth width of the transform unit (TU)
278
*/
279
inline double QuantRDOQ::xGetRateLast( const int* lastBitsX, const int* lastBitsY, unsigned PosX, unsigned PosY ) const
280
0
{
281
0
  uint32_t    CtxX  = g_uiGroupIdx[PosX];
282
0
  uint32_t    CtxY  = g_uiGroupIdx[PosY];
283
0
  double  Cost  = lastBitsX[ CtxX ] + lastBitsY[ CtxY ];
284
0
  if( CtxX > 3 )
285
0
  {
286
0
    Cost += xGetIEPRate() * ((CtxX-2)>>1);
287
0
  }
288
0
  if( CtxY > 3 )
289
0
  {
290
0
    Cost += xGetIEPRate() * ((CtxY-2)>>1);
291
0
  }
292
0
  return xGetICost( Cost );
293
0
}
294
295
296
inline double QuantRDOQ::xGetRateSigCoef( const BinFracBits& fracBitsSig, unsigned uiSignificance ) const
297
0
{
298
0
  return xGetICost( fracBitsSig.intBits[uiSignificance] );
299
0
}
300
301
/** Get the cost for a specific rate
302
 * \param dRate rate of a bit
303
 * \returns cost at the specific rate
304
 */
305
inline double QuantRDOQ::xGetICost        ( double                          dRate         ) const
306
0
{
307
0
  return m_dLambda * dRate;
308
0
}
309
310
/** Get the cost of an equal probable bit
311
 * \returns cost of equal probable bit
312
 */
313
inline double QuantRDOQ::xGetIEPRate() const
314
0
{
315
0
  return 32768;
316
0
}
317
318
319
double QuantRDOQ::xGetErrScaleCoeff(const bool needsSqrt2, SizeType width, SizeType height, int qp, const int maxLog2TrDynamicRange, const int channelBitDepth, bool bTransformSkip=false)
320
0
{
321
0
  const int iTransformShift = bTransformSkip ? 0 : getTransformShift(channelBitDepth, Size(width, height), maxLog2TrDynamicRange);
322
0
  double    dErrScale = (double)(1 << SCALE_BITS);                                // Compensate for scaling of bitcount in Lagrange cost function
323
0
  double    dTransShift = (double)iTransformShift + (needsSqrt2 ? -0.5 : 0.0);
324
0
  dErrScale = dErrScale * pow(2.0, (-2.0*dTransShift));                     // Compensate for scaling through forward transform
325
0
  const int  QStep = g_quantScales[needsSqrt2 ? 1 : 0][qp];
326
0
  double    finalErrScale = dErrScale / QStep / QStep / (1 << (DISTORTION_PRECISION_ADJUSTMENT(channelBitDepth) << 1));
327
0
  return    finalErrScale;
328
0
}
329
330
331
332
/** set error scale coefficients
333
 * \param list                   list ID
334
 * \param size
335
 * \param qp                     quantization parameter
336
 * \param maxLog2TrDynamicRange
337
 * \param bitDepths              reference to bit depth array for all channels
338
 */
339
void QuantRDOQ::xSetErrScaleCoeff( uint32_t list, uint32_t sizeX, uint32_t sizeY, int qp, const int maxLog2TrDynamicRange[MAX_NUM_CH], const BitDepths &bitDepths )
340
0
{
341
0
  const int width = g_scalingListSizeX[sizeX];
342
0
  const int height = g_scalingListSizeX[sizeY];
343
0
  const ChannelType channelType = ( ( list == 0 ) || ( list == MAX_NUM_COMP ) ) ? CH_L : CH_C;
344
0
  const int channelBitDepth = bitDepths[channelType];
345
0
  const int iTransformShift = getTransformShift( channelBitDepth, Size( g_scalingListSizeX[sizeX], g_scalingListSizeX[sizeY] ), maxLog2TrDynamicRange[channelType] );  // Represents scaling through forward transform
346
347
0
  double dErrScale = (double)( 1 << SCALE_BITS );                                // Compensate for scaling of bitcount in Lagrange cost function
348
349
0
  const bool needsSqrt2 = ((Log2(width*height)) & 1) == 1;
350
0
  double dTransShift = (double)iTransformShift + ( needsSqrt2 ? -0.5 : 0.0 );
351
0
  dErrScale = dErrScale*pow( 2.0, ( -2.0*dTransShift ) );                     // Compensate for scaling through forward transform
352
353
0
  if( getScalingListEnabled() )
354
0
  {
355
0
    uint32_t i, uiMaxNumCoeff = width * height;
356
357
0
    int*  piQuantcoeff = getQuantCoeff( list, qp, sizeX, sizeY );
358
0
    double* pdErrScale = xGetErrScaleCoeffSL( list, sizeX, sizeY, qp );
359
360
0
    for( i = 0; i < uiMaxNumCoeff; i++ )
361
0
    {
362
0
      pdErrScale[i] = dErrScale / piQuantcoeff[i] / piQuantcoeff[i] / (1 << (DISTORTION_PRECISION_ADJUSTMENT( bitDepths[channelType] ) << 1));
363
0
    }
364
0
  }
365
366
0
  int QStep = g_quantScales[needsSqrt2][qp];
367
368
0
  xGetErrScaleCoeffNoScalingList(list, sizeX, sizeY, qp) =
369
0
    dErrScale / QStep / QStep / (1 << (DISTORTION_PRECISION_ADJUSTMENT(bitDepths[channelType]) << 1));
370
0
}
371
372
/** set flat matrix value to quantized coefficient
373
 */
374
void QuantRDOQ::setFlatScalingList(const int maxLog2TrDynamicRange[MAX_NUM_CH], const BitDepths &bitDepths)
375
0
{
376
0
  Quant::setFlatScalingList( maxLog2TrDynamicRange, bitDepths );
377
378
0
  const int minimumQp = 0;
379
0
  const int maximumQp = SCALING_LIST_REM_NUM;
380
381
0
  for(uint32_t sizeX = 0; sizeX < SCALING_LIST_SIZE_NUM; sizeX++)
382
0
  {
383
0
    for(uint32_t sizeY = 0; sizeY < SCALING_LIST_SIZE_NUM; sizeY++)
384
0
    {
385
0
      for(uint32_t list = 0; list < SCALING_LIST_NUM; list++)
386
0
      {
387
0
        for(int qp = minimumQp; qp < maximumQp; qp++)
388
0
        {
389
0
          xSetErrScaleCoeff( list, sizeX, sizeY, qp, maxLog2TrDynamicRange, bitDepths );
390
0
        }
391
0
      }
392
0
    }
393
0
  }
394
0
}
395
396
/** initialization process of scaling list array
397
 */
398
void QuantRDOQ::xInitScalingList( const QuantRDOQ* other )
399
0
{
400
0
  m_isErrScaleListOwner = other == nullptr;
401
402
0
  bool useScalingLists = getScalingListEnabled();
403
404
0
  for(uint32_t sizeIdX = 0; sizeIdX < SCALING_LIST_SIZE_NUM; sizeIdX++)
405
0
  {
406
0
    for(uint32_t sizeIdY = 0; sizeIdY < SCALING_LIST_SIZE_NUM; sizeIdY++)
407
0
    {
408
0
      for(uint32_t qp = 0; qp < SCALING_LIST_REM_NUM; qp++)
409
0
      {
410
0
        for(uint32_t listId = 0; listId < SCALING_LIST_NUM; listId++)
411
0
        {
412
0
          if( m_isErrScaleListOwner )
413
0
          {
414
0
            m_errScale[sizeIdX][sizeIdY][listId][qp] = useScalingLists ? new double[g_scalingListSizeX[sizeIdX] * g_scalingListSizeX[sizeIdY]] : nullptr;
415
0
          }
416
0
          else
417
0
          {
418
0
            m_errScale[sizeIdX][sizeIdY][listId][qp] = other->m_errScale[sizeIdX][sizeIdY][listId][qp];
419
0
          }
420
0
        } // listID loop
421
0
      }
422
0
    }
423
0
  }
424
0
}
425
426
/** destroy quantization matrix array
427
 */
428
void QuantRDOQ::xDestroyScalingList()
429
0
{
430
0
  if( !m_isErrScaleListOwner ) return;
431
432
0
  for(uint32_t sizeIdX = 0; sizeIdX < SCALING_LIST_SIZE_NUM; sizeIdX++)
433
0
  {
434
0
    for(uint32_t sizeIdY = 0; sizeIdY < SCALING_LIST_SIZE_NUM; sizeIdY++)
435
0
    {
436
0
      for(uint32_t listId = 0; listId < SCALING_LIST_NUM; listId++)
437
0
      {
438
0
        for(uint32_t qp = 0; qp < SCALING_LIST_REM_NUM; qp++)
439
0
        {
440
0
          if(m_errScale[sizeIdX][sizeIdY][listId][qp])
441
0
          {
442
0
            delete [] m_errScale[sizeIdX][sizeIdY][listId][qp];
443
0
          }
444
0
        }
445
0
      }
446
0
    }
447
0
  }
448
//   Quant::destroyScalingList();
449
0
}
450
451
452
void QuantRDOQ::quant(TransformUnit& tu, const ComponentID compID, const CCoeffBuf& pSrc, TCoeff &uiAbsSum, const QpParam& cQP, const Ctx& ctx)
453
0
{
454
0
  const CompArea& rect       = tu.blocks[compID];
455
0
  const uint32_t uiWidth     = rect.width;
456
0
  const uint32_t uiHeight    = rect.height;
457
458
0
  const CCoeffBuf&  piCoef   = pSrc;
459
0
        CoeffSigBuf piQCoef  = tu.getCoeffs(compID);
460
461
0
  const bool useTransformSkip      = tu.mtsIdx[compID]==MTS_SKIP;
462
463
0
  bool useRDOQ = useTransformSkip ? m_useRDOQTS : m_RDOQ > 0;
464
465
0
  if( !tu.cu->ispMode || !isLuma(compID) )
466
0
  {
467
0
    useRDOQ &= uiWidth > 2;
468
0
    useRDOQ &= uiHeight > 2;
469
0
  }
470
471
0
  if( useRDOQ )
472
0
  {
473
0
    if (!tu.cs->picture->useSelectiveRdoq || xNeedRDOQ(tu, compID, piCoef, cQP))
474
0
    {
475
0
      if( useTransformSkip )
476
0
      {
477
0
        if(tu.cu->bdpcmM[toChannelType(compID)])
478
0
        {
479
0
          forwardRDPCM( tu, compID, pSrc, uiAbsSum, cQP, ctx );
480
0
        }
481
0
        else
482
0
        {
483
0
          rateDistOptQuantTS( tu, compID, pSrc, uiAbsSum, cQP, ctx );
484
0
        }
485
0
      }
486
0
      else
487
0
      {
488
0
        xRateDistOptQuant( tu, compID, pSrc, uiAbsSum, cQP, ctx );
489
0
      }
490
0
    }
491
0
    else
492
0
    {
493
0
      piQCoef.fill(0);
494
0
      uiAbsSum = 0;
495
0
      tu.lastPos[compID] = -1;
496
0
    }
497
0
  }
498
0
  else
499
0
  {
500
0
    Quant::quant( tu, compID, pSrc, uiAbsSum, cQP, ctx );
501
0
  }
502
0
}
503
504
505
506
void QuantRDOQ::xRateDistOptQuant(TransformUnit& tu, const ComponentID compID, const CCoeffBuf& pSrc, TCoeff &uiAbsSum, const QpParam& cQP, const Ctx &ctx)
507
0
{
508
0
  const FracBitsAccess& fracBits = ctx.getFracBitsAcess();
509
510
0
  const SPS &sps            = *tu.cs->sps;
511
0
  const CompArea& rect      = tu.blocks[compID];
512
0
  const uint32_t uiWidth    = rect.width;
513
0
  const uint32_t uiHeight   = rect.height;
514
0
  const ChannelType chType  = toChannelType(compID);
515
0
  const int channelBitDepth = sps.bitDepths[ chType ];
516
517
0
  const int  maxLog2TrDynamicRange = sps.getMaxLog2TrDynamicRange();
518
519
0
  const bool useIntraSubPartitions = tu.cu->ispMode && isLuma(compID);
520
  /* for 422 chroma blocks, the effective scaling applied during transformation is not a power of 2, hence it cannot be
521
  * implemented as a bit-shift (the quantised result will be sqrt(2) * larger than required). Alternatively, adjust the
522
  * uiLog2TrSize applied in iTransformShift, such that the result is 1/sqrt(2) the required result (i.e. smaller)
523
  * Then a QP+3 (sqrt(2)) or QP-3 (1/sqrt(2)) method could be used to get the required result
524
  */
525
526
  // Represents scaling through forward transform
527
0
  const int iTransformShift = getTransformShift(channelBitDepth, rect.size(), maxLog2TrDynamicRange);
528
529
0
  double     d64BlockUncodedCost               = 0;
530
0
  const uint32_t uiLog2BlockWidth                  = Log2(uiWidth);
531
0
  const uint32_t uiLog2BlockHeight                 = Log2(uiHeight);
532
0
  const uint32_t uiMaxNumCoeff                     = rect.area();
533
534
0
  CHECK(compID >= MAX_NUM_TBLOCKS, "Invalid component ID");
535
536
0
  int scalingListType = getScalingListType(tu.cu->predMode, compID);
537
538
0
  CHECK(scalingListType >= SCALING_LIST_NUM, "Invalid scaling list");
539
540
0
  const TCoeff    *plSrcCoeff = pSrc.buf;
541
0
        TCoeffSig *piDstCoeff = tu.getCoeffs(compID).buf;
542
543
0
  double *pdCostCoeff  = m_pdCostCoeff;
544
0
  double *pdCostSig    = m_pdCostSig;
545
0
  double *pdCostCoeff0 = m_pdCostCoeff0;
546
0
  int    *rateIncUp    = m_rateIncUp;
547
0
  int    *rateIncDown  = m_rateIncDown;
548
0
  int    *sigRateDelta = m_sigRateDelta;
549
0
  TCoeff *deltaU       = m_deltaU;
550
551
0
  memset( piDstCoeff,     0, sizeof( TCoeffSig ) * uiMaxNumCoeff );
552
0
  memset( m_pdCostCoeff,  0, sizeof( double ) *  uiMaxNumCoeff );
553
0
  memset( m_pdCostSig,    0, sizeof( double ) *  uiMaxNumCoeff );
554
0
  memset( m_rateIncUp,    0, sizeof( int    ) *  uiMaxNumCoeff );
555
0
  memset( m_rateIncDown,  0, sizeof( int    ) *  uiMaxNumCoeff );
556
0
  memset( m_sigRateDelta, 0, sizeof( int    ) *  uiMaxNumCoeff );
557
0
  memset( m_deltaU,       0, sizeof( TCoeff ) *  uiMaxNumCoeff );
558
559
560
0
  const bool   needSqrtAdjustment = TU::needsSqrt2Scale( tu, compID );
561
0
  const bool   isTransformSkip    = tu.mtsIdx[compID]==MTS_SKIP;
562
0
  const double *const pdErrScale  = xGetErrScaleCoeffSL(scalingListType, uiLog2BlockWidth, uiLog2BlockHeight, cQP.rem(isTransformSkip));
563
0
  const int    *const piQCoef     = getQuantCoeff(scalingListType, cQP.rem(isTransformSkip), uiLog2BlockWidth, uiLog2BlockHeight);
564
0
  const bool isLfnstApplied       = tu.cu->lfnstIdx > 0 && (CU::isSepTree(*tu.cu) ? true : isLuma(compID));
565
0
  const bool enableScalingLists   = getUseScalingList(uiWidth, uiHeight, isTransformSkip, isLfnstApplied);
566
0
  const int    defaultQuantisationCoefficient = g_quantScales[ needSqrtAdjustment ?1:0][cQP.rem(isTransformSkip)];
567
0
  const double defaultErrorScale              = xGetErrScaleCoeffNoScalingList(scalingListType, uiLog2BlockWidth, uiLog2BlockHeight, cQP.rem(isTransformSkip));
568
0
  const int iQBits = QUANT_SHIFT + cQP.per(isTransformSkip) + iTransformShift + (needSqrtAdjustment?-1:0);                   // Right shift of non-RDOQ quantizer;  level = (coeff*uiQ + offset)>>q_bits
569
570
571
0
  const TCoeff entropyCodingMinimum = -(1 << maxLog2TrDynamicRange);
572
0
  const TCoeff entropyCodingMaximum =  (1 << maxLog2TrDynamicRange) - 1;
573
574
0
  CoeffCodingContext cctx(tu, compID, tu.cs->slice->signDataHidingEnabled);
575
0
  const int    iCGSizeM1      = (1 << cctx.log2CGSize()) - 1;
576
577
0
  int     iCGLastScanPos      = -1;
578
0
  double  d64BaseCost         = 0;
579
0
  int     iLastScanPos        = -1;
580
581
0
  int ctxBinSampleRatio   = MAX_TU_LEVEL_CTX_CODED_BIN_CONSTRAINT;
582
0
  int remRegBins          = (tu.getTbAreaAfterCoefZeroOut( compID ) * ctxBinSampleRatio) >> 4;
583
0
  uint32_t  goRiceParam   = 0;
584
585
0
  double *pdCostCoeffGroupSig = m_pdCostCoeffGroupSig;
586
0
  memset( pdCostCoeffGroupSig, 0, ( uiMaxNumCoeff >> cctx.log2CGSize() ) * sizeof( double ) );
587
0
  int iScanPos;
588
0
  coeffGroupRDStats rdStats;
589
590
#if ENABLE_TRACING
591
  DTRACE( g_trace_ctx, D_RDOQ, "%d: %3d, %3d, %dx%d, comp=%d\n", DTRACE_GET_COUNTER( g_trace_ctx, D_RDOQ ), rect.x, rect.y, rect.width, rect.height, compID );
592
#endif
593
594
0
  const uint32_t lfnstIdx = tu.cu->lfnstIdx;
595
596
0
  const int iCGNum = lfnstIdx > 0 ? 1 : std::min<int>(JVET_C0024_ZERO_OUT_TH, uiWidth) * std::min<int>(JVET_C0024_ZERO_OUT_TH, uiHeight) >> cctx.log2CGSize();
597
598
0
  for (int subSetId = iCGNum - 1; subSetId >= 0; subSetId--)
599
0
  {
600
0
    cctx.initSubblock( subSetId );
601
602
0
    int remRegBinsStartCG = remRegBins;
603
604
0
    uint32_t maxNonZeroPosInCG = iCGSizeM1;
605
0
    if( lfnstIdx > 0 && ( ( uiWidth == 4 && uiHeight == 4 ) || ( uiWidth == 8 && uiHeight == 8 && cctx.cgPosX() == 0 && cctx.cgPosY() == 0 ) ) )
606
0
    {
607
0
      maxNonZeroPosInCG = 7;
608
0
    }
609
610
0
    memset( &rdStats, 0, sizeof (coeffGroupRDStats));
611
612
0
    for( int iScanPosinCG = iCGSizeM1; iScanPosinCG > maxNonZeroPosInCG; iScanPosinCG-- )
613
0
    {
614
0
      iScanPos = cctx.minSubPos() + iScanPosinCG;
615
0
      uint32_t    blkPos = cctx.blockPos( iScanPos );
616
0
      piDstCoeff[ blkPos ] = 0;
617
0
    }
618
0
    for( int iScanPosinCG = maxNonZeroPosInCG; iScanPosinCG >= 0; iScanPosinCG-- )
619
0
    {
620
0
      iScanPos = cctx.minSubPos() + iScanPosinCG;
621
      //===== quantization =====
622
0
      uint32_t    uiBlkPos          = cctx.blockPos(iScanPos);
623
624
      // set coeff
625
0
      const int    quantisationCoefficient = (enableScalingLists) ? piQCoef   [uiBlkPos]               : defaultQuantisationCoefficient;
626
0
      const double errorScale              = (enableScalingLists) ? pdErrScale[uiBlkPos]               : defaultErrorScale;
627
0
      const int64_t  tmpLevel                = int64_t(abs(plSrcCoeff[ uiBlkPos ])) * quantisationCoefficient;
628
629
0
      const Intermediate_Int lLevelDouble  = (Intermediate_Int)std::min<int64_t>(tmpLevel, std::numeric_limits<Intermediate_Int>::max() - (Intermediate_Int(1) << (iQBits - 1)));
630
631
0
      uint32_t uiMaxAbsLevel        = std::min<uint32_t>(uint32_t(entropyCodingMaximum), uint32_t((lLevelDouble + (Intermediate_Int(1) << (iQBits - 1))) >> iQBits));
632
633
0
      const double dErr         = double( lLevelDouble );
634
0
      pdCostCoeff0[ iScanPos ]  = dErr * dErr * errorScale;
635
0
      d64BlockUncodedCost      += pdCostCoeff0[ iScanPos ];
636
0
      piDstCoeff[ uiBlkPos ]    = uiMaxAbsLevel;
637
638
0
      if ( uiMaxAbsLevel > 0 && iLastScanPos < 0 )
639
0
      {
640
0
        iLastScanPos            = iScanPos;
641
0
        iCGLastScanPos          = cctx.subSetId();
642
0
      }
643
644
0
      if ( iLastScanPos >= 0 )
645
0
      {
646
647
#if ENABLE_TRACING
648
        uint32_t uiCGPosY = cctx.cgPosY();
649
        uint32_t uiCGPosX = cctx.cgPosX();
650
        uint32_t uiPosY = cctx.posY( iScanPos );
651
        uint32_t uiPosX = cctx.posX( iScanPos );
652
        DTRACE( g_trace_ctx, D_RDOQ, "%d [%d][%d][%2d:%2d][%2d:%2d]", DTRACE_GET_COUNTER( g_trace_ctx, D_RDOQ ), iScanPos, uiBlkPos, uiCGPosX, uiCGPosY, uiPosX, uiPosY );
653
#endif
654
        //===== coefficient level estimation =====
655
0
        unsigned ctxIdSig = 0;
656
0
        if( iScanPos != iLastScanPos )
657
0
        {
658
0
          ctxIdSig = cctx.sigCtxIdAbs( iScanPos, piDstCoeff, 0 );
659
0
        }
660
0
        uint32_t    uiLevel;
661
0
        uint8_t ctxOffset     = cctx.ctxOffsetAbs     ();
662
0
        uint32_t    uiParCtx      = cctx.parityCtxIdAbs   ( ctxOffset );
663
0
        uint32_t    uiGt1Ctx      = cctx.greater1CtxIdAbs ( ctxOffset );
664
0
        uint32_t    uiGt2Ctx      = cctx.greater2CtxIdAbs ( ctxOffset );
665
0
        uint32_t    goRiceZero    = 0;
666
0
        if( remRegBins < 4 )
667
0
        {
668
0
          unsigned  sumAbs = cctx.templateAbsSum( iScanPos, piDstCoeff, 0 );
669
0
          goRiceParam             = g_auiGoRiceParsCoeff   [ sumAbs ];
670
0
          goRiceZero              = g_auiGoRicePosCoeff0(0, goRiceParam);
671
0
        }
672
673
0
        const BinFracBits fracBitsPar = fracBits.getFracBitsArray( uiParCtx );
674
0
        const BinFracBits fracBitsGt1 = fracBits.getFracBitsArray( uiGt1Ctx );
675
0
        const BinFracBits fracBitsGt2 = fracBits.getFracBitsArray( uiGt2Ctx );
676
677
0
        if( iScanPos == iLastScanPos )
678
0
        {
679
0
          uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ],
680
0
                                    lLevelDouble, uiMaxAbsLevel, nullptr, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, iQBits, errorScale, 1, maxLog2TrDynamicRange );
681
0
        }
682
0
        else
683
0
        {
684
0
          DTRACE_COND( ( uiMaxAbsLevel != 0 ), g_trace_ctx, D_RDOQ_MORE, " uiCtxSig=%d", ctxIdSig );
685
686
0
          const BinFracBits fracBitsSig = fracBits.getFracBitsArray( ctxIdSig );
687
0
          uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ],
688
0
                                    lLevelDouble, uiMaxAbsLevel, &fracBitsSig, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, iQBits, errorScale, 0, maxLog2TrDynamicRange );
689
0
          sigRateDelta[ uiBlkPos ] = ( remRegBins < 4 ? 0 : fracBitsSig.intBits[1] - fracBitsSig.intBits[0] );
690
0
        }
691
692
0
        DTRACE( g_trace_ctx, D_RDOQ, " Lev=%d \n", uiLevel );
693
0
        DTRACE_COND( ( uiMaxAbsLevel != 0 ), g_trace_ctx, D_RDOQ, " CostC0=%d\n", (int64_t)( pdCostCoeff0[iScanPos] ) );
694
0
        DTRACE_COND( ( uiMaxAbsLevel != 0 ), g_trace_ctx, D_RDOQ, " CostC =%d\n", (int64_t)( pdCostCoeff[iScanPos] ) );
695
696
0
        deltaU[ uiBlkPos ]        = TCoeff((lLevelDouble - (Intermediate_Int(uiLevel) << iQBits)) >> (iQBits-8));
697
698
0
        if( uiLevel > 0 )
699
0
        {
700
0
          int rateNow              = xGetICRate( uiLevel,   fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, maxLog2TrDynamicRange );
701
0
          rateIncUp   [ uiBlkPos ] = xGetICRate( uiLevel+1, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, maxLog2TrDynamicRange ) - rateNow;
702
0
          rateIncDown [ uiBlkPos ] = xGetICRate( uiLevel-1, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, maxLog2TrDynamicRange ) - rateNow;
703
0
        }
704
0
        else // uiLevel == 0
705
0
        {
706
0
          if( remRegBins < 4 )
707
0
          {
708
0
            int rateNow            = xGetICRate( uiLevel,   fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, maxLog2TrDynamicRange );
709
0
            rateIncUp [ uiBlkPos ] = xGetICRate( uiLevel+1, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, maxLog2TrDynamicRange ) - rateNow;
710
0
          }
711
0
          else
712
0
          {
713
0
            rateIncUp [ uiBlkPos ] = fracBitsGt1.intBits[ 0 ];
714
0
          }
715
0
        }
716
0
        piDstCoeff[ uiBlkPos ] = uiLevel;
717
0
        d64BaseCost           += pdCostCoeff [ iScanPos ];
718
719
0
        if( ( (iScanPos & iCGSizeM1) == 0 ) && ( iScanPos > 0 ) )
720
0
        {
721
0
          goRiceParam   = 0;
722
0
        }
723
0
        else if( remRegBins >= 4 )
724
0
        {
725
0
          int  sumAll = cctx.templateAbsSum(iScanPos, piDstCoeff, 4);
726
0
          goRiceParam = g_auiGoRiceParsCoeff[sumAll];
727
0
          remRegBins -= (uiLevel < 2 ? uiLevel : 3) + (iScanPos != iLastScanPos);
728
0
        }
729
0
      }
730
0
      else
731
0
      {
732
0
        d64BaseCost    += pdCostCoeff0[ iScanPos ];
733
0
      }
734
0
      rdStats.d64SigCost += pdCostSig[ iScanPos ];
735
0
      if (iScanPosinCG == 0 )
736
0
      {
737
0
        rdStats.d64SigCost_0 = pdCostSig[ iScanPos ];
738
0
      }
739
0
      if (piDstCoeff[ uiBlkPos ] )
740
0
      {
741
0
        cctx.setSigGroup();
742
0
        rdStats.d64CodedLevelandDist += pdCostCoeff[ iScanPos ] - pdCostSig[ iScanPos ];
743
0
        rdStats.d64UncodedDist += pdCostCoeff0[ iScanPos ];
744
0
        if ( iScanPosinCG != 0 )
745
0
        {
746
0
          rdStats.iNNZbeforePos0++;
747
0
        }
748
0
      }
749
0
    } //end for (iScanPosinCG)
750
751
0
    if (iCGLastScanPos >= 0)
752
0
    {
753
0
      if( cctx.subSetId() )
754
0
      {
755
0
        if( !cctx.isSigGroup() )
756
0
        {
757
0
          const BinFracBits fracBitsSigGroup = fracBits.getFracBitsArray( cctx.sigGroupCtxId() );
758
0
          d64BaseCost += xGetRateSigCoeffGroup(fracBitsSigGroup, 0) - rdStats.d64SigCost;
759
0
          pdCostCoeffGroupSig[ cctx.subSetId() ] = xGetRateSigCoeffGroup(fracBitsSigGroup, 0);
760
0
        }
761
0
        else
762
0
        {
763
0
          if (cctx.subSetId() < iCGLastScanPos) //skip the last coefficient group, which will be handled together with last position below.
764
0
          {
765
0
            if ( rdStats.iNNZbeforePos0 == 0 )
766
0
            {
767
0
              d64BaseCost -= rdStats.d64SigCost_0;
768
0
              rdStats.d64SigCost -= rdStats.d64SigCost_0;
769
0
            }
770
            // rd-cost if SigCoeffGroupFlag = 0, initialization
771
0
            double d64CostZeroCG = d64BaseCost;
772
773
0
            const BinFracBits fracBitsSigGroup = fracBits.getFracBitsArray( cctx.sigGroupCtxId() );
774
775
0
            if (cctx.subSetId() < iCGLastScanPos)
776
0
            {
777
0
              d64BaseCost  += xGetRateSigCoeffGroup(fracBitsSigGroup,1);
778
0
              d64CostZeroCG += xGetRateSigCoeffGroup(fracBitsSigGroup,0);
779
0
              pdCostCoeffGroupSig[ cctx.subSetId() ] = xGetRateSigCoeffGroup(fracBitsSigGroup,1);
780
0
            }
781
782
            // try to convert the current coeff group from non-zero to all-zero
783
0
            d64CostZeroCG += rdStats.d64UncodedDist;  // distortion for resetting non-zero levels to zero levels
784
0
            d64CostZeroCG -= rdStats.d64CodedLevelandDist;   // distortion and level cost for keeping all non-zero levels
785
0
            d64CostZeroCG -= rdStats.d64SigCost;     // sig cost for all coeffs, including zero levels and non-zerl levels
786
787
                                                     // if we can save cost, change this block to all-zero block
788
0
            if ( d64CostZeroCG < d64BaseCost )
789
0
            {
790
0
              cctx.resetSigGroup();
791
0
              d64BaseCost = d64CostZeroCG;
792
0
              remRegBins = remRegBinsStartCG;
793
0
              if (cctx.subSetId() < iCGLastScanPos)
794
0
              {
795
0
                pdCostCoeffGroupSig[ cctx.subSetId() ] = xGetRateSigCoeffGroup(fracBitsSigGroup,0);
796
0
              }
797
              // reset coeffs to 0 in this block
798
0
              for( int iScanPosinCG = maxNonZeroPosInCG; iScanPosinCG >= 0; iScanPosinCG-- )
799
0
              {
800
0
                iScanPos      = cctx.minSubPos() + iScanPosinCG;
801
0
                uint32_t uiBlkPos = cctx.blockPos( iScanPos );
802
803
0
                if (piDstCoeff[ uiBlkPos ])
804
0
                {
805
0
                  piDstCoeff [ uiBlkPos ] = 0;
806
0
                  pdCostCoeff[ iScanPos ] = pdCostCoeff0[ iScanPos ];
807
0
                  pdCostSig  [ iScanPos ] = 0;
808
0
                }
809
0
              }
810
0
            } // end if ( d64CostAllZeros < d64BaseCost )
811
0
          }
812
0
        } // end if if (uiSigCoeffGroupFlag[ uiCGBlkPos ] == 0)
813
0
      }
814
0
      else
815
0
      {
816
0
        cctx.setSigGroup();
817
0
      }
818
0
    }
819
0
  } //end for (cctx.subSetId)
820
821
822
  //===== estimate last position =====
823
0
  if ( iLastScanPos < 0 )
824
0
  {
825
0
    return;
826
0
  }
827
828
0
  double  d64BestCost         = 0;
829
0
  int     iBestLastIdxP1      = 0;
830
831
832
0
  if( !CU::isIntra( *tu.cu ) && isLuma( compID ) && tu.depth == 0 )
833
0
  {
834
0
    const BinFracBits fracBitsQtRootCbf = fracBits.getFracBitsArray( Ctx::QtRootCbf() );
835
0
    d64BestCost  = d64BlockUncodedCost + xGetICost( fracBitsQtRootCbf.intBits[ 0 ] );
836
0
    d64BaseCost += xGetICost( fracBitsQtRootCbf.intBits[ 1 ] );
837
0
  }
838
0
  else
839
0
  {
840
0
    bool previousCbf       = tu.cbf[COMP_Cb];
841
0
    bool lastCbfIsInferred = false;
842
0
    if( useIntraSubPartitions )
843
0
    {
844
0
      bool rootCbfSoFar       = false;
845
0
      bool isLastSubPartition = CU::isISPLast(*tu.cu, tu.Y(), compID);
846
0
      uint32_t nTus = tu.cu->ispMode == HOR_INTRA_SUBPARTITIONS ? tu.cu->lheight() >> Log2(tu.lheight()) : tu.cu->lwidth() >> Log2(tu.lwidth());
847
0
      if( isLastSubPartition )
848
0
      {
849
0
        TransformUnit* tuPointer = tu.cu->firstTU;
850
0
        for( int tuIdx = 0; tuIdx < nTus - 1; tuIdx++ )
851
0
        {
852
0
          rootCbfSoFar |= TU::getCbfAtDepth(*tuPointer, COMP_Y, tu.depth);
853
0
          tuPointer     = tuPointer->next;
854
0
        }
855
0
        if( !rootCbfSoFar )
856
0
        {
857
0
          lastCbfIsInferred = true;
858
0
        }
859
0
      }
860
0
      if( !lastCbfIsInferred )
861
0
      {
862
0
        previousCbf = TU::getPrevTuCbfAtDepth(tu, compID, tu.depth);
863
0
      }
864
0
    }
865
0
    BinFracBits fracBitsQtCbf = fracBits.getFracBitsArray( Ctx::QtCbf[compID]( DeriveCtx::CtxQtCbf( rect.compID, previousCbf, useIntraSubPartitions ) ) );
866
867
0
    if( !lastCbfIsInferred )
868
0
    {
869
0
      d64BestCost  = d64BlockUncodedCost + xGetICost(fracBitsQtCbf.intBits[0]);
870
0
      d64BaseCost += xGetICost(fracBitsQtCbf.intBits[1]);
871
0
    }
872
0
    else
873
0
    {
874
0
      d64BestCost  = d64BlockUncodedCost;
875
0
    }
876
0
  }
877
878
0
  int lastBitsX[LAST_SIGNIFICANT_GROUPS] = { 0 };
879
0
  int lastBitsY[LAST_SIGNIFICANT_GROUPS] = { 0 };
880
0
  {
881
0
    int dim1 = std::min<int>(JVET_C0024_ZERO_OUT_TH, uiWidth);
882
0
    int dim2 = std::min<int>(JVET_C0024_ZERO_OUT_TH, uiHeight);
883
0
    int bitsX = 0;
884
0
    int bitsY = 0;
885
0
    int ctxId;
886
    //X-coordinate
887
0
    for ( ctxId = 0; ctxId < g_uiGroupIdx[dim1-1]; ctxId++)
888
0
    {
889
0
      const BinFracBits fB = fracBits.getFracBitsArray( cctx.lastXCtxId(ctxId) );
890
0
      lastBitsX[ ctxId ]   = bitsX + fB.intBits[ 0 ];
891
0
      bitsX               +=         fB.intBits[ 1 ];
892
0
    }
893
0
    lastBitsX[ctxId] = bitsX;
894
    //Y-coordinate
895
0
    for ( ctxId = 0; ctxId < g_uiGroupIdx[dim2-1]; ctxId++)
896
0
    {
897
0
      const BinFracBits fB = fracBits.getFracBitsArray( cctx.lastYCtxId(ctxId) );
898
0
      lastBitsY[ ctxId ]   = bitsY + fB.intBits[ 0 ];
899
0
      bitsY               +=         fB.intBits[ 1 ];
900
0
    }
901
0
    lastBitsY[ctxId] = bitsY;
902
0
  }
903
904
905
0
  bool bFoundLast = false;
906
0
  for (int iCGScanPos = iCGLastScanPos; iCGScanPos >= 0; iCGScanPos--)
907
0
  {
908
0
    d64BaseCost -= pdCostCoeffGroupSig [ iCGScanPos ];
909
0
    if (cctx.isSigGroup( iCGScanPos ) )
910
0
    {
911
0
      uint32_t maxNonZeroPosInCG = iCGSizeM1;
912
0
      if( lfnstIdx > 0 && ( ( uiWidth == 4 && uiHeight == 4 ) || ( uiWidth == 8 && uiHeight == 8 && cctx.cgPosX() == 0 && cctx.cgPosY() == 0 ) ) )
913
0
      {
914
0
        maxNonZeroPosInCG = 7;
915
0
      }
916
0
      for( int iScanPosinCG = maxNonZeroPosInCG; iScanPosinCG >= 0; iScanPosinCG-- )
917
0
      {
918
0
        iScanPos = iCGScanPos * (iCGSizeM1 + 1) + iScanPosinCG;
919
920
0
        if (iScanPos > iLastScanPos)
921
0
        {
922
0
          continue;
923
0
        }
924
0
        uint32_t   uiBlkPos     = cctx.blockPos( iScanPos );
925
926
0
        if( piDstCoeff[ uiBlkPos ] )
927
0
        {
928
0
          uint32_t   uiPosY = uiBlkPos >> uiLog2BlockWidth;
929
0
          uint32_t   uiPosX = uiBlkPos - ( uiPosY << uiLog2BlockWidth );
930
0
          double d64CostLast  = xGetRateLast( lastBitsX, lastBitsY, uiPosX, uiPosY );
931
932
0
          double totalCost = d64BaseCost + d64CostLast - pdCostSig[ iScanPos ];
933
934
0
          if( totalCost < d64BestCost )
935
0
          {
936
0
            iBestLastIdxP1  = iScanPos + 1;
937
0
            d64BestCost     = totalCost;
938
0
          }
939
0
          if( piDstCoeff[ uiBlkPos ] > 1 )
940
0
          {
941
0
            bFoundLast = true;
942
0
            break;
943
0
          }
944
0
          d64BaseCost      -= pdCostCoeff[ iScanPos ];
945
0
          d64BaseCost      += pdCostCoeff0[ iScanPos ];
946
0
        }
947
0
        else
948
0
        {
949
0
          d64BaseCost      -= pdCostSig[ iScanPos ];
950
0
        }
951
0
      } //end for
952
0
      if (bFoundLast)
953
0
      {
954
0
        break;
955
0
      }
956
0
    } // end if (uiSigCoeffGroupFlag[ uiCGBlkPos ])
957
0
    DTRACE( g_trace_ctx, D_RDOQ_COST, "%d: %3d, %3d, %dx%d, comp=%d\n", DTRACE_GET_COUNTER( g_trace_ctx, D_RDOQ_COST ), rect.x, rect.y, rect.width, rect.height, compID );
958
0
    DTRACE( g_trace_ctx, D_RDOQ_COST, "Uncoded=%d\n", (int64_t)( d64BlockUncodedCost ) );
959
0
    DTRACE( g_trace_ctx, D_RDOQ_COST, "Coded  =%d\n", (int64_t)( d64BaseCost ) );
960
961
0
  } // end for
962
963
964
0
  for ( int scanPos = 0; scanPos < iBestLastIdxP1; scanPos++ )
965
0
  {
966
0
    int blkPos = cctx.blockPos( scanPos );
967
0
    TCoeff level = piDstCoeff[ blkPos ];
968
0
    uiAbsSum += level;
969
0
    piDstCoeff[ blkPos ] = ( plSrcCoeff[ blkPos ] < 0 ) ? -level : level;
970
0
  }
971
972
  //===== clean uncoded coefficients =====
973
0
  for ( int scanPos = iBestLastIdxP1; scanPos <= iLastScanPos; scanPos++ )
974
0
  {
975
0
    piDstCoeff[ cctx.blockPos( scanPos ) ] = 0;
976
0
  }
977
0
  iLastScanPos = iBestLastIdxP1 - 1;
978
979
0
  if( cctx.signHiding() && uiAbsSum>=2)
980
0
  {
981
0
    const double inverseQuantScale = double(g_invQuantScales[0][cQP.rem(isTransformSkip)]);
982
0
    int64_t rdFactor = (int64_t)(inverseQuantScale * inverseQuantScale * (1 << (2 * cQP.per(isTransformSkip))) / m_dLambda / 16
983
0
                                  / (1 << (2 * DISTORTION_PRECISION_ADJUSTMENT(channelBitDepth)))
984
0
                             + 0.5);
985
986
0
    int lastCG = -1;
987
0
    int absSum = 0 ;
988
0
    int n ;
989
0
    for (int subSet = iCGNum - 1; subSet >= 0; subSet--)
990
0
    {
991
0
      int  subPos         = subSet << cctx.log2CGSize();
992
0
      int  firstNZPosInCG = iCGSizeM1 + 1, lastNZPosInCG = -1;
993
0
      absSum = 0 ;
994
995
0
      for( n = iCGSizeM1; n >= 0; --n )
996
0
      {
997
0
        if( piDstCoeff[ cctx.blockPos( n + subPos )] )
998
0
        {
999
0
          lastNZPosInCG = n;
1000
0
          break;
1001
0
        }
1002
0
      }
1003
1004
0
      for( n = 0; n <= iCGSizeM1; n++ )
1005
0
      {
1006
0
        if( piDstCoeff[ cctx.blockPos( n + subPos )] )
1007
0
        {
1008
0
          firstNZPosInCG = n;
1009
0
          break;
1010
0
        }
1011
0
      }
1012
1013
0
      for( n = firstNZPosInCG; n <= lastNZPosInCG; n++ )
1014
0
      {
1015
0
        absSum += int(piDstCoeff[ cctx.blockPos( n + subPos )]);
1016
0
      }
1017
1018
0
      if(lastNZPosInCG>=0 && lastCG==-1)
1019
0
      {
1020
0
        lastCG = 1;
1021
0
      }
1022
1023
0
      if( lastNZPosInCG-firstNZPosInCG>=SBH_THRESHOLD )
1024
0
      {
1025
0
        uint32_t signbit = (piDstCoeff[cctx.blockPos(subPos+firstNZPosInCG)]>0?0:1);
1026
0
        if( signbit!=(absSum&0x1) )  // hide but need tune
1027
0
        {
1028
          // calculate the cost
1029
0
          int64_t minCostInc = std::numeric_limits<int64_t>::max(), curCost = std::numeric_limits<int64_t>::max();
1030
0
          int minPos = -1, finalChange = 0, curChange = 0;
1031
1032
0
          for( n = (lastCG == 1 ? lastNZPosInCG : iCGSizeM1); n >= 0; --n )
1033
0
          {
1034
0
            uint32_t uiBlkPos   = cctx.blockPos( n + subPos );
1035
0
            if(piDstCoeff[ uiBlkPos ] != 0 )
1036
0
            {
1037
0
              int64_t costUp   = rdFactor * ( - deltaU[uiBlkPos] ) + rateIncUp[uiBlkPos];
1038
0
              int64_t costDown = rdFactor * (   deltaU[uiBlkPos] ) + rateIncDown[uiBlkPos]
1039
0
                -   ((abs(piDstCoeff[uiBlkPos]) == 1) ? sigRateDelta[uiBlkPos] : 0);
1040
1041
0
              if(lastCG==1 && lastNZPosInCG==n && abs(piDstCoeff[uiBlkPos])==1)
1042
0
              {
1043
0
                costDown -= (4<<SCALE_BITS);
1044
0
              }
1045
1046
0
              if(costUp<costDown)
1047
0
              {
1048
0
                curCost = costUp;
1049
0
                curChange =  1;
1050
0
              }
1051
0
              else
1052
0
              {
1053
0
                curChange = -1;
1054
0
                if(n==firstNZPosInCG && abs(piDstCoeff[uiBlkPos])==1)
1055
0
                {
1056
0
                  curCost = std::numeric_limits<int64_t>::max();
1057
0
                }
1058
0
                else
1059
0
                {
1060
0
                  curCost = costDown;
1061
0
                }
1062
0
              }
1063
0
            }
1064
0
            else
1065
0
            {
1066
0
              curCost = rdFactor * ( - (abs(deltaU[uiBlkPos])) ) + (1<<SCALE_BITS) + rateIncUp[uiBlkPos] + sigRateDelta[uiBlkPos] ;
1067
0
              curChange = 1 ;
1068
1069
0
              if(n<firstNZPosInCG)
1070
0
              {
1071
0
                uint32_t thissignbit = (plSrcCoeff[uiBlkPos]>=0?0:1);
1072
0
                if(thissignbit != signbit )
1073
0
                {
1074
0
                  curCost = std::numeric_limits<int64_t>::max();
1075
0
                }
1076
0
              }
1077
0
            }
1078
1079
0
            if( curCost<minCostInc)
1080
0
            {
1081
0
              minCostInc = curCost;
1082
0
              finalChange = curChange;
1083
0
              minPos = uiBlkPos;
1084
0
            }
1085
0
          }
1086
1087
0
          if(piDstCoeff[minPos] == entropyCodingMaximum || piDstCoeff[minPos] == entropyCodingMinimum)
1088
0
          {
1089
0
            finalChange = -1;
1090
0
          }
1091
1092
0
          if(plSrcCoeff[minPos]>=0)
1093
0
          {
1094
0
            piDstCoeff[minPos] += finalChange ;
1095
0
          }
1096
0
          else
1097
0
          {
1098
0
            piDstCoeff[minPos] -= finalChange ;
1099
0
          }
1100
0
        }
1101
0
      }
1102
1103
0
      if(lastCG==1)
1104
0
      {
1105
0
        lastCG=0 ;
1106
0
      }
1107
0
    }
1108
1109
    // Check due to saving of last pos. Sign data hiding can change the position of last coef.
1110
0
    if( piDstCoeff[cctx.blockPos( iLastScanPos )] == 0 )
1111
0
    {
1112
0
      int scanPos = iLastScanPos - 1;
1113
0
      for( ; scanPos >= 0; scanPos-- )
1114
0
      {
1115
0
        if( piDstCoeff[cctx.blockPos( scanPos )] )
1116
0
          break;
1117
0
      }
1118
0
      iLastScanPos = scanPos;
1119
0
    }
1120
0
  }
1121
0
  tu.lastPos[compID] = iLastScanPos;
1122
0
}
1123
1124
void QuantRDOQ::rateDistOptQuantTS( TransformUnit& tu, const ComponentID compID, const CCoeffBuf& coeffs, TCoeff &absSum, const QpParam& qp, const Ctx &ctx )
1125
0
{
1126
0
  const FracBitsAccess& fracBits = ctx.getFracBitsAcess();
1127
1128
0
  const SPS &sps            = *tu.cs->sps;
1129
0
  const CompArea& rect      = tu.blocks[compID];
1130
0
  const uint32_t width      = rect.width;
1131
0
  const uint32_t height     = rect.height;
1132
0
  const ChannelType chType  = toChannelType(compID);
1133
0
  const int channelBitDepth = sps.bitDepths[ chType ];
1134
1135
0
  const int  maxLog2TrDynamicRange = sps.getMaxLog2TrDynamicRange();
1136
1137
0
  const int transformShift = getTransformShift( channelBitDepth, rect.size(), maxLog2TrDynamicRange );
1138
1139
0
  const uint32_t maxNumCoeff                        = rect.area();
1140
1141
0
  CHECK( compID >= MAX_NUM_TBLOCKS, "Invalid component ID" );
1142
1143
0
  int scalingListType = getScalingListType( tu.cu->predMode, compID );
1144
0
  CHECK( scalingListType >= SCALING_LIST_NUM, "Invalid scaling list" );
1145
1146
0
  const TCoeff    *srcCoeff = coeffs.buf;
1147
0
        TCoeffSig *dstCoeff = tu.getCoeffs( compID ).buf;
1148
1149
0
  double *costCoeff  = m_pdCostCoeff;
1150
0
  double *costSig    = m_pdCostSig;
1151
0
  double *costCoeff0 = m_pdCostCoeff0;
1152
1153
0
  memset( m_pdCostCoeff,  0, sizeof( double ) *  maxNumCoeff );
1154
0
  memset( m_pdCostSig,    0, sizeof( double ) *  maxNumCoeff );
1155
1156
0
  m_bdpcm = 0;
1157
1158
0
  const bool   needsSqrt2Scale = TU::needsSqrt2Scale( tu, compID );  // should always be false - transform-skipped blocks don't require sqrt(2) compensation.
1159
0
  const bool   isTransformSkip = tu.mtsIdx[compID]==MTS_SKIP;
1160
0
  const int    qBits = QUANT_SHIFT + qp.per(isTransformSkip) + (isTransformSkip ? 0 : transformShift) + (needsSqrt2Scale ? -1 : 0);  // Right shift of non-RDOQ quantizer;  level = (coeff*uiQ + offset)>>q_bits
1161
0
  const int    quantisationCoefficient = g_quantScales[needsSqrt2Scale?1:0][qp.rem(isTransformSkip)];
1162
0
  const double errorScale              = xGetErrScaleCoeff( TU::needsSqrt2Scale(tu, compID), width, height, qp.rem(isTransformSkip), maxLog2TrDynamicRange, channelBitDepth, isTransformSkip);
1163
1164
0
  const TCoeff entropyCodingMaximum = ( 1 << maxLog2TrDynamicRange ) - 1;
1165
1166
0
  uint32_t coeffLevels[3];
1167
0
  double   coeffLevelError[4];
1168
1169
0
  CoeffCodingContext cctx( tu, compID, tu.cs->slice->signDataHidingEnabled );
1170
0
  const int sbSizeM1    = ( 1 << cctx.log2CGSize() ) - 1;
1171
0
  double    baseCost    = 0;
1172
0
  uint32_t  goRiceParam = 0;
1173
1174
0
  double *costSigSubBlock = m_pdCostCoeffGroupSig;
1175
0
  memset( costSigSubBlock, 0, ( maxNumCoeff >> cctx.log2CGSize() ) * sizeof( double ) );
1176
1177
0
  const int sbNum = width * height >> cctx.log2CGSize();
1178
0
  int scanPos;
1179
0
  coeffGroupRDStats rdStats;
1180
1181
0
  bool anySigCG = false;
1182
1183
0
  int maxCtxBins = (cctx.maxNumCoeff() * 7) >> 2;
1184
0
  cctx.remRegBins = maxCtxBins;
1185
1186
0
  for( int sbId = 0; sbId < sbNum; sbId++ )
1187
0
  {
1188
0
    cctx.initSubblock( sbId );
1189
1190
0
    int noCoeffCoded = 0;
1191
0
    baseCost = 0.0;
1192
0
    memset( &rdStats, 0, sizeof (coeffGroupRDStats));
1193
1194
0
    rdStats.iNumSbbCtxBins = 0;
1195
1196
0
    for( int scanPosInSB = 0; scanPosInSB <= sbSizeM1; scanPosInSB++ )
1197
0
    {
1198
0
      int lastPosCoded = sbSizeM1;
1199
0
      scanPos = cctx.minSubPos() + scanPosInSB;
1200
      //===== quantization =====
1201
0
      uint32_t blkPos = cctx.blockPos( scanPos );
1202
1203
      // set coeff
1204
0
      const int64_t          tmpLevel    = int64_t( abs( srcCoeff[blkPos] ) ) * quantisationCoefficient;
1205
0
      const Intermediate_Int levelDouble = (Intermediate_Int)std::min<int64_t>( tmpLevel, std::numeric_limits<Intermediate_Int>::max() - ( Intermediate_Int( 1 ) << ( qBits - 1 ) ) );
1206
1207
0
      uint32_t roundAbsLevel = std::min<uint32_t>(uint32_t(entropyCodingMaximum), uint32_t((levelDouble + (Intermediate_Int(1) << (qBits - 1))) >> qBits));
1208
0
      uint32_t minAbsLevel = (roundAbsLevel > 1 ? roundAbsLevel - 1 : 1);
1209
1210
0
      uint32_t downAbsLevel = std::min<uint32_t>(uint32_t(entropyCodingMaximum), uint32_t(levelDouble >> qBits));
1211
0
      uint32_t upAbsLevel = std::min<uint32_t>(uint32_t(entropyCodingMaximum), downAbsLevel + 1);
1212
1213
0
      m_testedLevels = 0;
1214
0
      coeffLevels[m_testedLevels++] = roundAbsLevel;
1215
1216
0
      if (minAbsLevel != roundAbsLevel)
1217
0
        coeffLevels[m_testedLevels++] = minAbsLevel;
1218
1219
0
      int rightPixel, belowPixel, predPixel;
1220
1221
0
      cctx.neighTS(rightPixel, belowPixel, scanPos, dstCoeff);
1222
0
      predPixel = cctx.deriveModCoeff(rightPixel, belowPixel, upAbsLevel, 0);
1223
1224
0
      if (upAbsLevel != roundAbsLevel && upAbsLevel != minAbsLevel && predPixel == 1)
1225
0
        coeffLevels[m_testedLevels++] = upAbsLevel;
1226
1227
0
      double dErr = double(levelDouble);
1228
0
      coeffLevelError[0] = dErr * dErr * errorScale;
1229
1230
0
      costCoeff0[scanPos] = coeffLevelError[0];
1231
0
      dstCoeff[blkPos]    = coeffLevels[0];
1232
1233
      //===== coefficient level estimation =====
1234
0
            unsigned    ctxIdSig = cctx.sigCtxIdAbsTS( scanPos, dstCoeff );
1235
0
            uint32_t    cLevel;
1236
0
      const BinFracBits fracBitsPar = fracBits.getFracBitsArray( cctx.parityCtxIdAbsTS() );
1237
1238
      //goRiceParam = cctx.templateAbsSumTS( scanPos, dstCoeff );
1239
0
      goRiceParam = 1;
1240
0
      unsigned ctxIdSign = cctx.signCtxIdAbsTS(scanPos, dstCoeff, 0);
1241
0
      const BinFracBits fracBitsSign = fracBits.getFracBitsArray(ctxIdSign);
1242
0
      const uint8_t     sign         = srcCoeff[ blkPos ] < 0 ? 1 : 0;
1243
1244
0
      DTRACE_COND( ( coeffLevels[0] != 0 ), g_trace_ctx, D_RDOQ_MORE, " uiCtxSig=%d", ctxIdSig );
1245
1246
0
      unsigned gt1CtxId = cctx.lrg1CtxIdAbsTS(scanPos, dstCoeff, 0);
1247
0
      const BinFracBits fracBitsGr1 = fracBits.getFracBitsArray(gt1CtxId);
1248
1249
0
      const BinFracBits fracBitsSig = fracBits.getFracBitsArray( ctxIdSig );
1250
0
      bool lastCoeff = false; //
1251
0
      if (scanPosInSB == lastPosCoded && noCoeffCoded == 0)
1252
0
      {
1253
0
        lastCoeff = true;
1254
0
      }
1255
0
      int numUsedCtxBins = 0;
1256
0
      cLevel = xGetCodedLevelTSPred(costCoeff[scanPos], costCoeff0[scanPos], costSig[scanPos], levelDouble, qBits, errorScale, coeffLevels, coeffLevelError,
1257
0
                                    &fracBitsSig, fracBitsPar, cctx, fracBits, fracBitsSign, fracBitsGr1, sign, rightPixel, belowPixel, goRiceParam, lastCoeff, maxLog2TrDynamicRange, numUsedCtxBins);
1258
1259
0
      cctx.remRegBins -= numUsedCtxBins;
1260
0
      rdStats.iNumSbbCtxBins += numUsedCtxBins;
1261
1262
0
      if (cLevel > 0)
1263
0
      {
1264
0
        noCoeffCoded++;
1265
0
      }
1266
1267
0
      TCoeff level = cLevel;
1268
0
      dstCoeff[blkPos] = (level != 0 && srcCoeff[blkPos] < 0) ? -level : level;
1269
0
      baseCost           += costCoeff[ scanPos ];
1270
0
      rdStats.d64SigCost += costSig[ scanPos ];
1271
1272
0
      if( dstCoeff[ blkPos ] )
1273
0
      {
1274
0
        cctx.setSigGroup();
1275
0
        rdStats.d64CodedLevelandDist += costCoeff [ scanPos ] - costSig[ scanPos ];
1276
0
        rdStats.d64UncodedDist       += costCoeff0[ scanPos ];
1277
0
      }
1278
0
    } //end for (iScanPosinCG)
1279
1280
0
    if( !cctx.isSigGroup() )
1281
0
    {
1282
0
      const BinFracBits fracBitsSigGroup = fracBits.getFracBitsArray( cctx.sigGroupCtxId( true ) );
1283
0
      baseCost += xGetRateSigCoeffGroup( fracBitsSigGroup, 0 ) - rdStats.d64SigCost;
1284
0
      costSigSubBlock[cctx.subSetId()] = xGetRateSigCoeffGroup( fracBitsSigGroup, 0 );
1285
0
      cctx.remRegBins += rdStats.iNumSbbCtxBins; // skip sub-block
1286
0
    }
1287
0
    else if( sbId != sbNum - 1 || anySigCG )
1288
0
    {
1289
      // rd-cost if SigCoeffGroupFlag = 0, initialization
1290
0
      double costZeroSB = baseCost;
1291
1292
0
      const BinFracBits fracBitsSigGroup = fracBits.getFracBitsArray( cctx.sigGroupCtxId( true ) );
1293
1294
0
      baseCost   += xGetRateSigCoeffGroup( fracBitsSigGroup, 1 );
1295
0
      costZeroSB += xGetRateSigCoeffGroup( fracBitsSigGroup, 0 );
1296
0
      costSigSubBlock[ cctx.subSetId() ] = xGetRateSigCoeffGroup( fracBitsSigGroup, 1 );
1297
1298
0
      costZeroSB += rdStats.d64UncodedDist;         // distortion for resetting non-zero levels to zero levels
1299
0
      costZeroSB -= rdStats.d64CodedLevelandDist;   // distortion and level cost for keeping all non-zero levels
1300
0
      costZeroSB -= rdStats.d64SigCost;             // sig cost for all coeffs, including zero levels and non-zerl levels
1301
1302
0
      if( costZeroSB < baseCost )
1303
0
      {
1304
0
        cctx.resetSigGroup();
1305
0
        baseCost = costZeroSB;
1306
0
        costSigSubBlock[ cctx.subSetId() ] = xGetRateSigCoeffGroup( fracBitsSigGroup, 0 );
1307
0
        cctx.remRegBins += rdStats.iNumSbbCtxBins; // skip sub-block
1308
1309
0
        for( int scanPosInSB = 0; scanPosInSB <= sbSizeM1; scanPosInSB++ )
1310
0
        {
1311
0
          scanPos = cctx.minSubPos() + scanPosInSB;
1312
0
          uint32_t blkPos = cctx.blockPos( scanPos );
1313
1314
0
          if( dstCoeff[ blkPos ] )
1315
0
          {
1316
0
            dstCoeff[ blkPos ] = 0;
1317
0
            costCoeff[ scanPos ] = costCoeff0[ scanPos ];
1318
0
            costSig[ scanPos] = 0;
1319
0
          }
1320
0
        }
1321
0
      }
1322
0
      else
1323
0
      {
1324
0
        anySigCG = true;
1325
0
      }
1326
0
    }
1327
0
  }
1328
1329
  //===== estimate last position =====
1330
0
  for( int scanPos = 0; scanPos < maxNumCoeff; scanPos++ )
1331
0
  {
1332
0
    int blkPos = cctx.blockPos( scanPos );
1333
0
    TCoeff level = dstCoeff[ blkPos ];
1334
0
    absSum += abs(level);
1335
0
  }
1336
0
}
1337
1338
void QuantRDOQ::forwardRDPCM( TransformUnit& tu, const ComponentID compID, const CCoeffBuf& coeffs, TCoeff &absSum, const QpParam& qp, const Ctx &ctx )
1339
0
{
1340
0
  const FracBitsAccess& fracBits = ctx.getFracBitsAcess();
1341
1342
0
  const SPS &sps = *tu.cs->sps;
1343
0
  const CompArea& rect = tu.blocks[compID];
1344
0
  const uint32_t width = rect.width;
1345
0
  const uint32_t height = rect.height;
1346
0
  const ChannelType chType = toChannelType(compID);
1347
0
  const int channelBitDepth = sps.bitDepths[chType];
1348
1349
0
  const int  maxLog2TrDynamicRange = sps.getMaxLog2TrDynamicRange();
1350
0
  const int  dirMode = tu.cu->bdpcmM[toChannelType(compID)];
1351
1352
0
  const int transformShift = getTransformShift(channelBitDepth, rect.size(), maxLog2TrDynamicRange);
1353
1354
0
  const uint32_t maxNumCoeff = rect.area();
1355
1356
0
  CHECK(compID >= MAX_NUM_TBLOCKS, "Invalid component ID");
1357
1358
0
  int scalingListType = getScalingListType(tu.cu->predMode, compID);
1359
0
  CHECK(scalingListType >= SCALING_LIST_NUM, "Invalid scaling list");
1360
1361
0
  const TCoeff    *srcCoeff = coeffs.buf;
1362
0
        TCoeffSig *dstCoeff = tu.getCoeffs(compID).buf;
1363
1364
0
  double *costCoeff = m_pdCostCoeff;
1365
0
  double *costSig = m_pdCostSig;
1366
0
  double *costCoeff0 = m_pdCostCoeff0;
1367
1368
0
  memset(m_pdCostCoeff, 0, sizeof(double) *  maxNumCoeff);
1369
0
  memset(m_pdCostSig, 0, sizeof(double) *  maxNumCoeff);
1370
0
  memset(m_fullCoeff, 0, sizeof(TCoeff) * maxNumCoeff);
1371
1372
0
  m_bdpcm = dirMode;
1373
1374
0
  const bool   needsSqrt2Scale = TU::needsSqrt2Scale(tu, compID);  // should always be false - transform-skipped blocks don't require sqrt(2) compensation.
1375
0
  const bool   isTransformSkip = tu.mtsIdx[compID]==MTS_SKIP;
1376
0
  const int    qBits = QUANT_SHIFT + qp.per(isTransformSkip) + (isTransformSkip? 0 : transformShift) + ( needsSqrt2Scale ? -1 : 0);  // Right shift of non-RDOQ quantizer;  level = (coeff*uiQ + offset)>>q_bits
1377
0
  const int    quantisationCoefficient = g_quantScales[needsSqrt2Scale ? 1 : 0][qp.rem(isTransformSkip)];
1378
0
  const double errorScale = xGetErrScaleCoeff(TU::needsSqrt2Scale(tu, compID), width, height, qp.rem(isTransformSkip), maxLog2TrDynamicRange, channelBitDepth, isTransformSkip);
1379
1380
0
  TrQuantParams trQuantParams;
1381
0
  trQuantParams.rightShift = (IQUANT_SHIFT - ((isTransformSkip ? 0 : transformShift) + qp.per(isTransformSkip)));
1382
0
  trQuantParams.qScale = g_invQuantScales[needsSqrt2Scale ? 1 : 0][qp.rem(isTransformSkip)];
1383
1384
0
  const TCoeff entropyCodingMaximum = (1 << maxLog2TrDynamicRange) - 1;
1385
1386
0
  uint32_t coeffLevels[3];
1387
0
  double   coeffLevelError[4];
1388
1389
0
  CoeffCodingContext cctx(tu, compID, tu.cs->slice->signDataHidingEnabled);
1390
0
  const int sbSizeM1 = (1 << cctx.log2CGSize()) - 1;
1391
0
  double    baseCost = 0;
1392
0
  uint32_t  goRiceParam = 0;
1393
1394
0
  double *costSigSubBlock = m_pdCostCoeffGroupSig;
1395
0
  memset(costSigSubBlock, 0, (maxNumCoeff >> cctx.log2CGSize()) * sizeof(double));
1396
1397
0
  const int sbNum = width * height >> cctx.log2CGSize();
1398
0
  int scanPos;
1399
0
  coeffGroupRDStats rdStats;
1400
1401
0
  bool anySigCG = false;
1402
1403
0
  int maxCtxBins = (cctx.maxNumCoeff() * 7) >> 2;
1404
0
  cctx.remRegBins = maxCtxBins;
1405
1406
0
  for (int sbId = 0; sbId < sbNum; sbId++)
1407
0
  {
1408
0
    cctx.initSubblock(sbId);
1409
1410
0
    int noCoeffCoded = 0;
1411
0
    baseCost = 0.0;
1412
0
    memset(&rdStats, 0, sizeof(coeffGroupRDStats));
1413
0
    rdStats.iNumSbbCtxBins = 0;
1414
1415
0
    for (int scanPosInSB = 0; scanPosInSB <= sbSizeM1; scanPosInSB++)
1416
0
    {
1417
0
      int lastPosCoded = sbSizeM1;
1418
0
      scanPos = cctx.minSubPos() + scanPosInSB;
1419
      //===== quantization =====
1420
0
      uint32_t blkPos = cctx.blockPos(scanPos);
1421
1422
0
      const int posX = cctx.posX(scanPos);
1423
0
      const int posY = cctx.posY(scanPos);
1424
0
      const int posS = (1 == dirMode) ? posX : posY;
1425
0
      const int posNb = (1 == dirMode) ? (posX - 1) + posY * coeffs.stride : posX + (posY - 1) * coeffs.stride;
1426
0
      TCoeff predCoeff = (0 != posS) ? m_fullCoeff[posNb] : 0;
1427
1428
      // set coeff
1429
0
      const int64_t          tmpLevel = int64_t(abs(srcCoeff[blkPos] - predCoeff)) * quantisationCoefficient;
1430
0
      const Intermediate_Int levelDouble = (Intermediate_Int)std::min<int64_t>(tmpLevel, std::numeric_limits<Intermediate_Int>::max() - (Intermediate_Int(1) << (qBits - 1)));
1431
0
      uint32_t roundAbsLevel = std::min<uint32_t>(uint32_t(entropyCodingMaximum), uint32_t((levelDouble + (Intermediate_Int(1) << (qBits - 1))) >> qBits));
1432
0
      uint32_t minAbsLevel = (roundAbsLevel > 1 ? roundAbsLevel - 1 : 1);
1433
1434
0
      m_testedLevels = 0;
1435
0
      coeffLevels[m_testedLevels++] = roundAbsLevel;
1436
1437
0
      if (minAbsLevel != roundAbsLevel)
1438
0
        coeffLevels[m_testedLevels++] = minAbsLevel;
1439
1440
0
      double dErr = double(levelDouble);
1441
0
      coeffLevelError[0]  = dErr * dErr * errorScale;
1442
1443
0
      costCoeff0[scanPos] = coeffLevelError[0];
1444
0
      dstCoeff[blkPos]    = coeffLevels[0];
1445
1446
      //===== coefficient level estimation =====
1447
0
      unsigned    ctxIdSig = cctx.sigCtxIdAbsTS(scanPos, dstCoeff);
1448
0
      uint32_t    cLevel;
1449
0
      const BinFracBits fracBitsPar = fracBits.getFracBitsArray(cctx.parityCtxIdAbsTS());
1450
1451
      //goRiceParam = cctx.templateAbsSumTS(scanPos, dstCoeff);
1452
0
      goRiceParam = 1;
1453
0
      unsigned ctxIdSign = cctx.signCtxIdAbsTS(scanPos, dstCoeff, dirMode);
1454
0
      const BinFracBits fracBitsSign = fracBits.getFracBitsArray(ctxIdSign);
1455
0
      const uint8_t     sign = srcCoeff[blkPos] - predCoeff < 0 ? 1 : 0;
1456
0
      unsigned gt1CtxId = cctx.lrg1CtxIdAbsTS(scanPos, dstCoeff, dirMode);
1457
0
      const BinFracBits fracBitsGr1 = fracBits.getFracBitsArray(gt1CtxId);
1458
1459
0
      DTRACE_COND((dstCoeff[blkPos] != 0), g_trace_ctx, D_RDOQ_MORE, " uiCtxSig=%d", ctxIdSig);
1460
1461
0
      const BinFracBits fracBitsSig = fracBits.getFracBitsArray(ctxIdSig);
1462
0
      bool lastCoeff = false; //
1463
0
      if (scanPosInSB == lastPosCoded && noCoeffCoded == 0)
1464
0
      {
1465
0
        lastCoeff = true;
1466
0
      }
1467
0
      int rightPixel, belowPixel;
1468
0
      cctx.neighTS(rightPixel, belowPixel, scanPos, dstCoeff);
1469
0
      int numUsedCtxBins = 0;
1470
0
      cLevel = xGetCodedLevelTSPred(costCoeff[scanPos], costCoeff0[scanPos], costSig[scanPos], levelDouble, qBits, errorScale, coeffLevels, coeffLevelError,
1471
0
        &fracBitsSig, fracBitsPar, cctx, fracBits, fracBitsSign, fracBitsGr1, sign, rightPixel, belowPixel, goRiceParam, lastCoeff, maxLog2TrDynamicRange, numUsedCtxBins);
1472
0
      cctx.remRegBins -= numUsedCtxBins;
1473
0
      rdStats.iNumSbbCtxBins += numUsedCtxBins;
1474
1475
0
      if (cLevel > 0)
1476
0
      {
1477
0
        noCoeffCoded++;
1478
0
      }
1479
0
      dstCoeff[blkPos] = cLevel;
1480
1481
0
      if (sign)
1482
0
      {
1483
0
        dstCoeff[blkPos] = -dstCoeff[blkPos];
1484
0
      }
1485
1486
0
      xDequantSample( m_fullCoeff[blkPos], dstCoeff[blkPos], trQuantParams );
1487
0
      m_fullCoeff[blkPos] += predCoeff;
1488
1489
0
      baseCost += costCoeff[scanPos];
1490
0
      rdStats.d64SigCost += costSig[scanPos];
1491
1492
0
      if (dstCoeff[blkPos])
1493
0
      {
1494
0
        cctx.setSigGroup();
1495
0
        rdStats.d64CodedLevelandDist += costCoeff[scanPos] - costSig[scanPos];
1496
0
        rdStats.d64UncodedDist += costCoeff0[scanPos];
1497
0
      }
1498
0
    } //end for (iScanPosinCG)
1499
1500
0
    if (!cctx.isSigGroup())
1501
0
    {
1502
0
      const BinFracBits fracBitsSigGroup = fracBits.getFracBitsArray(cctx.sigGroupCtxId(true));
1503
0
      baseCost += xGetRateSigCoeffGroup(fracBitsSigGroup, 0) - rdStats.d64SigCost;
1504
0
      costSigSubBlock[cctx.subSetId()] = xGetRateSigCoeffGroup(fracBitsSigGroup, 0);
1505
0
      cctx.remRegBins += rdStats.iNumSbbCtxBins; // skip sub-block
1506
0
    }
1507
0
    else if (sbId != sbNum - 1 || anySigCG)
1508
0
    {
1509
      // rd-cost if SigCoeffGroupFlag = 0, initialization
1510
0
      double costZeroSB = baseCost;
1511
1512
0
      const BinFracBits fracBitsSigGroup = fracBits.getFracBitsArray(cctx.sigGroupCtxId(true));
1513
1514
0
      baseCost += xGetRateSigCoeffGroup(fracBitsSigGroup, 1);
1515
0
      costZeroSB += xGetRateSigCoeffGroup(fracBitsSigGroup, 0);
1516
0
      costSigSubBlock[cctx.subSetId()] = xGetRateSigCoeffGroup(fracBitsSigGroup, 1);
1517
1518
0
      costZeroSB += rdStats.d64UncodedDist;         // distortion for resetting non-zero levels to zero levels
1519
0
      costZeroSB -= rdStats.d64CodedLevelandDist;   // distortion and level cost for keeping all non-zero levels
1520
0
      costZeroSB -= rdStats.d64SigCost;             // sig cost for all coeffs, including zero levels and non-zerl levels
1521
1522
0
      if (costZeroSB < baseCost)
1523
0
      {
1524
0
        cctx.resetSigGroup();
1525
0
        baseCost = costZeroSB;
1526
0
        costSigSubBlock[cctx.subSetId()] = xGetRateSigCoeffGroup(fracBitsSigGroup, 0);
1527
0
        cctx.remRegBins += rdStats.iNumSbbCtxBins; // skip sub-block
1528
1529
0
        for (int scanPosInSB = 0; scanPosInSB <= sbSizeM1; scanPosInSB++)
1530
0
        {
1531
0
          scanPos = cctx.minSubPos() + scanPosInSB;
1532
0
          uint32_t blkPos = cctx.blockPos(scanPos);
1533
1534
0
          const int posX = cctx.posX(scanPos);
1535
0
          const int posY = cctx.posY(scanPos);
1536
0
          const int posS = (1 == dirMode) ? posX : posY;
1537
0
          const int posNb = (1 == dirMode) ? (posX - 1) + posY * coeffs.stride : posX + (posY - 1) * coeffs.stride;
1538
0
          m_fullCoeff[scanPos] = (0 != posS) ? m_fullCoeff[posNb] : 0;
1539
1540
0
          if (dstCoeff[blkPos])
1541
0
          {
1542
0
            dstCoeff[blkPos] = 0;
1543
0
            costCoeff[scanPos] = costCoeff0[scanPos];
1544
0
            costSig[scanPos] = 0;
1545
0
          }
1546
0
        }
1547
0
      }
1548
0
      else
1549
0
      {
1550
0
        anySigCG = true;
1551
0
      }
1552
0
    }
1553
0
  }
1554
1555
  //===== estimate last position =====
1556
0
  for (int scanPos = 0; scanPos < maxNumCoeff; scanPos++)
1557
0
  {
1558
0
    int blkPos = cctx.blockPos(scanPos);
1559
0
    TCoeff level = dstCoeff[blkPos];
1560
0
    absSum += abs(level);
1561
0
  }
1562
0
}
1563
1564
void QuantRDOQ::xDequantSample(TCoeff& pRes, TCoeffSig& coeff, const TrQuantParams& trQuantParams)
1565
0
{
1566
  // xDequant
1567
0
  if (trQuantParams.rightShift > 0)
1568
0
  {
1569
0
    const Intermediate_Int qAdd = Intermediate_Int(1) << (trQuantParams.rightShift - 1);
1570
0
    pRes = TCoeff((Intermediate_Int(coeff) * trQuantParams.qScale + qAdd) >> trQuantParams.rightShift);
1571
0
  }
1572
0
  else
1573
0
  {
1574
0
    pRes = TCoeff((Intermediate_Int(coeff) * trQuantParams.qScale) *(1<< -trQuantParams.rightShift));
1575
0
  }
1576
0
}
1577
1578
inline uint32_t QuantRDOQ::xGetCodedLevelTSPred(double&            rd64CodedCost,
1579
  double&            rd64CodedCost0,
1580
  double&            rd64CodedCostSig,
1581
  Intermediate_Int    levelDouble,
1582
  int                 qBits,
1583
  double              errorScale,
1584
  uint32_t coeffLevels[],
1585
  double coeffLevelError[],
1586
  const BinFracBits* fracBitsSig,
1587
  const BinFracBits& fracBitsPar,
1588
  CoeffCodingContext& cctx,
1589
  const FracBitsAccess& fracBitsAccess,
1590
  const BinFracBits& fracBitsSign,
1591
  const BinFracBits& fracBitsGt1,
1592
  const uint8_t      sign,
1593
  int                rightPixel,
1594
  int                belowPixel,
1595
  uint16_t           ricePar,
1596
  bool               isLast,
1597
  const int          maxLog2TrDynamicRange,
1598
  int&               numUsedCtxBins
1599
) const
1600
0
{
1601
0
  double currCostSig = 0;
1602
0
  uint32_t   bestAbsLevel = 0;
1603
0
  numUsedCtxBins = 0;
1604
0
  int numBestCtxBin = 0;
1605
0
  if (!isLast && coeffLevels[0] < 3)
1606
0
  {
1607
0
    if (cctx.remRegBins >= 4)
1608
0
    rd64CodedCostSig = xGetRateSigCoef(*fracBitsSig, 0);
1609
0
    else
1610
0
      rd64CodedCostSig = xGetICost(1 << SCALE_BITS);
1611
0
    rd64CodedCost = rd64CodedCost0 + rd64CodedCostSig;
1612
0
    if (cctx.remRegBins >= 4)
1613
0
      numUsedCtxBins++;
1614
0
    if (coeffLevels[0] == 0)
1615
0
    {
1616
0
      return bestAbsLevel;
1617
0
    }
1618
0
  }
1619
0
  else
1620
0
  {
1621
0
    rd64CodedCost = MAX_DOUBLE;
1622
0
  }
1623
1624
0
  if (!isLast)
1625
0
  {
1626
0
    if (cctx.remRegBins >= 4)
1627
0
      currCostSig = xGetRateSigCoef(*fracBitsSig, 1);
1628
0
    else
1629
0
      currCostSig = xGetICost(1 << SCALE_BITS);
1630
0
    if (coeffLevels[0] >= 3 && cctx.remRegBins >= 4)
1631
0
      numUsedCtxBins++;
1632
0
  }
1633
1634
0
  for (int errorInd = 1; errorInd <= m_testedLevels; errorInd++)
1635
0
  {
1636
0
    int absLevel = coeffLevels[errorInd - 1];
1637
0
    double dErr = 0.0;
1638
0
    dErr = double(levelDouble - (Intermediate_Int(absLevel) << qBits));
1639
0
    coeffLevelError[errorInd] = dErr * dErr * errorScale;
1640
0
    int modAbsLevel = absLevel;
1641
0
    if (cctx.remRegBins >= 4) 
1642
0
    {
1643
0
      modAbsLevel = cctx.deriveModCoeff(rightPixel, belowPixel, absLevel, m_bdpcm);
1644
0
    }
1645
0
    int numCtxBins = 0;
1646
0
    double dCurrCost = coeffLevelError[errorInd] + xGetICost(xGetICRateTS(modAbsLevel, fracBitsPar, cctx, fracBitsAccess, fracBitsSign, fracBitsGt1, numCtxBins, sign, ricePar, maxLog2TrDynamicRange));
1647
1648
0
    if (cctx.remRegBins >= 4)
1649
0
      dCurrCost += currCostSig; // if cctx.numCtxBins < 4, xGetICRateTS return rate including sign cost. dont need to add any more
1650
1651
0
    if (dCurrCost < rd64CodedCost)
1652
0
    {
1653
0
      bestAbsLevel = absLevel;
1654
0
      rd64CodedCost = dCurrCost;
1655
0
      rd64CodedCostSig = currCostSig;
1656
0
      numBestCtxBin = numCtxBins;
1657
0
    }
1658
0
  }
1659
0
  numUsedCtxBins += numBestCtxBin;
1660
0
  return bestAbsLevel;
1661
0
}
1662
1663
inline int QuantRDOQ::xGetICRateTS( const uint32_t            absLevel,
1664
                                    const BinFracBits&        fracBitsPar,
1665
                                    const CoeffCodingContext& cctx,
1666
                                    const FracBitsAccess&     fracBitsAccess,
1667
                                    const BinFracBits&        fracBitsSign,
1668
                                    const BinFracBits&        fracBitsGt1,
1669
                                    int&                      numCtxBins,
1670
                                    const uint8_t             sign,
1671
                                    const uint16_t            ricePar,
1672
                                    const int                 maxLog2TrDynamicRange  ) const
1673
0
{
1674
 
1675
0
  if (cctx.remRegBins < 4) // Full by-pass coding 
1676
0
  {
1677
0
    int rate = absLevel ? (1 << SCALE_BITS) : 0; // 1 bit to signal sign of non-zero 
1678
1679
0
    uint32_t symbol = absLevel;
1680
1681
0
    uint32_t length;
1682
0
    const int threshold = COEF_REMAIN_BIN_REDUCTION;
1683
0
    if (symbol < (threshold << ricePar))
1684
0
    {
1685
0
      length = symbol >> ricePar;
1686
0
      rate += (length + 1 + ricePar) << SCALE_BITS;
1687
0
    }
1688
0
    else
1689
0
    {
1690
0
      length = ricePar;
1691
0
      symbol = symbol - (threshold << ricePar);
1692
0
      while (symbol >= (1 << length))
1693
0
      {
1694
0
        symbol -= (1 << (length++));
1695
0
      }
1696
0
      rate += (threshold + length + 1 - ricePar + length) << SCALE_BITS;
1697
0
    }
1698
1699
0
    return rate;
1700
0
  }
1701
1702
0
  else if (cctx.remRegBins >= 4 && cctx.remRegBins < 8) // First pass context coding and all by-pass coding ( Sign flag is not counted here)
1703
0
  {
1704
0
    int rate = fracBitsSign.intBits[sign]; // sign bits
1705
0
    if (absLevel)
1706
0
      numCtxBins++;
1707
1708
0
    if (absLevel > 1)
1709
0
    {
1710
0
      rate += fracBitsGt1.intBits[1];
1711
0
      rate += fracBitsPar.intBits[(absLevel - 2) & 1];
1712
1713
0
      numCtxBins += 2;
1714
1715
0
      int cutoffVal = 2;
1716
1717
0
      if (absLevel >= cutoffVal)
1718
0
      {
1719
0
        uint32_t symbol = (absLevel - cutoffVal) >> 1;
1720
0
        uint32_t length;
1721
0
        const int threshold = COEF_REMAIN_BIN_REDUCTION;
1722
0
        if (symbol < (threshold << ricePar))
1723
0
        {
1724
0
          length = symbol >> ricePar;
1725
0
          rate += (length + 1 + ricePar) << SCALE_BITS;
1726
0
        }
1727
0
        else
1728
0
        {
1729
0
          length = ricePar;
1730
0
          symbol = symbol - (threshold << ricePar);
1731
0
          while (symbol >= (1 << length))
1732
0
          {
1733
0
            symbol -= (1 << (length++));
1734
0
          }
1735
0
          rate += (threshold + length + 1 - ricePar + length) << SCALE_BITS;
1736
0
        }
1737
0
      }
1738
0
    }
1739
0
    else if (absLevel == 1)
1740
0
    {
1741
0
      rate += fracBitsGt1.intBits[0];
1742
0
      numCtxBins++;
1743
0
    }
1744
0
    else
1745
0
    {
1746
0
      rate = 0;
1747
0
    }
1748
0
    return rate;
1749
0
  }
1750
    
1751
0
  int rate = fracBitsSign.intBits[sign];
1752
1753
0
  if (absLevel)
1754
0
    numCtxBins++;
1755
1756
0
  if( absLevel > 1 )
1757
0
  {
1758
0
    rate += fracBitsGt1.intBits[1];
1759
0
    rate += fracBitsPar.intBits[( absLevel - 2 ) & 1];
1760
0
    numCtxBins += 2;
1761
1762
0
          int cutoffVal = 2;
1763
0
    const int numGtBins = 4;
1764
0
    for( int i = 0; i < numGtBins; i++ )
1765
0
    {
1766
0
      if( absLevel >= cutoffVal )
1767
0
      {
1768
0
        const uint16_t ctxGtX = cctx.greaterXCtxIdAbsTS( cutoffVal>>1 );
1769
0
        const BinFracBits &fracBitsGtX = fracBitsAccess.getFracBitsArray( ctxGtX );
1770
0
        unsigned gtX = ( absLevel >= ( cutoffVal + 2 ) );
1771
0
        rate += fracBitsGtX.intBits[gtX];
1772
0
        numCtxBins++;
1773
0
      }
1774
0
      cutoffVal += 2;
1775
0
    }
1776
1777
0
    if( absLevel >= cutoffVal )
1778
0
    {
1779
0
      uint32_t symbol = ( absLevel - cutoffVal ) >> 1;
1780
0
      uint32_t length;
1781
0
      const int threshold = COEF_REMAIN_BIN_REDUCTION;
1782
0
      if( symbol < ( threshold << ricePar ) )
1783
0
      {
1784
0
        length = symbol >> ricePar;
1785
0
        rate  += ( length + 1 + ricePar ) << SCALE_BITS;
1786
0
      }
1787
0
      else
1788
0
      {
1789
0
        length = ricePar;
1790
0
        symbol = symbol - ( threshold << ricePar );
1791
0
        while( symbol >= ( 1 << length ) )
1792
0
        {
1793
0
          symbol -= ( 1 << ( length++ ) );
1794
0
        }
1795
0
        rate += ( threshold + length + 1 - ricePar + length ) << SCALE_BITS;
1796
0
      }
1797
0
    }
1798
0
  }
1799
0
  else if( absLevel == 1 )
1800
0
  {
1801
0
    rate += fracBitsGt1.intBits[0];
1802
0
    numCtxBins++;
1803
0
  }
1804
0
  else
1805
0
  {
1806
0
    rate = 0;
1807
0
  }
1808
0
  return rate;
1809
0
}
1810
1811
} // namespace vvenc
1812
1813
//! \}
1814