Coverage Report

Created: 2026-06-10 07:00

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/vvenc/source/Lib/CommonLib/QuantRDOQ.cpp
Line
Count
Source
1
/* -----------------------------------------------------------------------------
2
The copyright in this software is being made available under the Clear BSD
3
License, included below. No patent rights, trademark rights and/or 
4
other Intellectual Property Rights other than the copyrights concerning 
5
the Software are granted under this license.
6
7
The Clear BSD License
8
9
Copyright (c) 2019-2026, Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. & The VVenC Authors.
10
All rights reserved.
11
12
Redistribution and use in source and binary forms, with or without modification,
13
are permitted (subject to the limitations in the disclaimer below) provided that
14
the following conditions are met:
15
16
     * Redistributions of source code must retain the above copyright notice,
17
     this list of conditions and the following disclaimer.
18
19
     * Redistributions in binary form must reproduce the above copyright
20
     notice, this list of conditions and the following disclaimer in the
21
     documentation and/or other materials provided with the distribution.
22
23
     * Neither the name of the copyright holder nor the names of its
24
     contributors may be used to endorse or promote products derived from this
25
     software without specific prior written permission.
26
27
NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
28
THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
29
CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
31
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
32
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
33
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
34
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
35
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
36
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38
POSSIBILITY OF SUCH DAMAGE.
39
40
------------------------------------------------------------------------------------------- */
41
42
43
/** \file     QuantRDOQ.cpp
44
    \brief    transform and quantization class
45
*/
46
47
#include "QuantRDOQ.h"
48
#include "UnitTools.h"
49
#include "ContextModelling.h"
50
#include "CodingStructure.h"
51
#include "dtrace_next.h"
52
#include "dtrace_buffer.h"
53
54
#include <stdlib.h>
55
#include <memory.h>
56
57
//! \ingroup CommonLib
58
//! \{
59
60
namespace vvenc {
61
62
struct coeffGroupRDStats
63
{
64
  int    iNNZbeforePos0;
65
  double d64CodedLevelandDist; // distortion and level cost only
66
  double d64UncodedDist;    // all zero coded block distortion
67
  double d64SigCost;
68
  double d64SigCost_0;
69
 int   iNumSbbCtxBins;
70
};
71
72
73
//! \ingroup CommonLib
74
//! \{
75
76
// ====================================================================================================================
77
// Constants
78
// ====================================================================================================================
79
80
81
// ====================================================================================================================
82
// Static functions
83
// ====================================================================================================================
84
85
// ====================================================================================================================
86
// QuantRDOQ class member functions
87
// ====================================================================================================================
88
89
90
17.7k
QuantRDOQ::QuantRDOQ( const Quant* other, bool useScalingLists ) : Quant( other, useScalingLists )
91
17.7k
{
92
93
17.7k
  const QuantRDOQ *rdoq = dynamic_cast<const QuantRDOQ*>( other );
94
17.7k
  CHECK( other && !rdoq, "The RDOQ cast must be successfull!" );
95
17.7k
  xInitScalingList( rdoq );
96
17.7k
}
97
98
QuantRDOQ::~QuantRDOQ()
99
17.7k
{
100
17.7k
  xDestroyScalingList();
101
17.7k
}
102
103
104
105
106
/** Get the best level in RD sense
107
 *
108
 * \returns best quantized transform level for given scan position
109
 *
110
 * This method calculates the best quantized transform level for a given scan position.
111
 */
112
inline uint32_t QuantRDOQ::xGetCodedLevel( double&            rd64CodedCost,
113
                                       double&            rd64CodedCost0,
114
                                       double&            rd64CodedCostSig,
115
                                       Intermediate_Int   lLevelDouble,
116
                                       uint32_t               uiMaxAbsLevel,
117
                                       const BinFracBits* fracBitsSig,
118
                                       const BinFracBits& fracBitsPar,
119
                                       const BinFracBits& fracBitsGt1,
120
                                       const BinFracBits& fracBitsGt2,
121
                                       const int          remRegBins,
122
                                       unsigned           goRiceZero,
123
                                       uint16_t             ui16AbsGoRice,
124
                                       int                iQBits,
125
                                       double             errorScale,
126
                                       bool               bLast,
127
                                       const int          maxLog2TrDynamicRange
128
                                     ) const
129
0
{
130
0
  double dCurrCostSig   = 0;
131
0
  uint32_t   uiBestAbsLevel = 0;
132
133
0
  if( !bLast && uiMaxAbsLevel < 3 )
134
0
  {
135
0
    rd64CodedCostSig    = xGetRateSigCoef( *fracBitsSig, 0 );
136
0
    rd64CodedCost       = rd64CodedCost0 + rd64CodedCostSig;
137
0
    if( uiMaxAbsLevel == 0 )
138
0
    {
139
0
      return uiBestAbsLevel;
140
0
    }
141
0
  }
142
0
  else
143
0
  {
144
0
    rd64CodedCost       = MAX_DOUBLE;
145
0
  }
146
147
0
  if( !bLast )
148
0
  {
149
0
    dCurrCostSig        = xGetRateSigCoef( *fracBitsSig, 1 );
150
0
  }
151
152
0
  uint32_t uiMinAbsLevel    = ( uiMaxAbsLevel > 1 ? uiMaxAbsLevel - 1 : 1 );
153
0
  for( int uiAbsLevel  = uiMaxAbsLevel; uiAbsLevel >= uiMinAbsLevel ; uiAbsLevel-- )
154
0
  {
155
0
    double dErr         = double( lLevelDouble  - ( Intermediate_Int(uiAbsLevel) << iQBits ) );
156
157
0
    double dCurrCost    = dErr * dErr * errorScale + xGetICost( xGetICRate( uiAbsLevel, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, ui16AbsGoRice, maxLog2TrDynamicRange ) );
158
0
    dCurrCost          += dCurrCostSig;
159
160
0
    if( dCurrCost < rd64CodedCost )
161
0
    {
162
0
      uiBestAbsLevel    = uiAbsLevel;
163
0
      rd64CodedCost     = dCurrCost;
164
0
      rd64CodedCostSig  = dCurrCostSig;
165
0
    }
166
0
  }
167
168
0
  return uiBestAbsLevel;
169
0
}
170
171
/** Calculates the cost for specific absolute transform level
172
 * \param uiAbsLevel scaled quantized level
173
 * \param ui16CtxNumOne current ctxInc for coeff_abs_level_greater1 (1st bin of coeff_abs_level_minus1 in AVC)
174
 * \param ui16CtxNumAbs current ctxInc for coeff_abs_level_greater2 (remaining bins of coeff_abs_level_minus1 in AVC)
175
 * \param ui16AbsGoRice Rice parameter for coeff_abs_level_minus3
176
 * \param c1Idx
177
 * \param c2Idx
178
 * \param useLimitedPrefixLength
179
 * \param maxLog2TrDynamicRange
180
 * \returns cost of given absolute transform level
181
 */
182
inline int QuantRDOQ::xGetICRate( const uint32_t         uiAbsLevel,
183
                                  const BinFracBits& fracBitsPar,
184
                                  const BinFracBits& fracBitsGt1,
185
                                  const BinFracBits& fracBitsGt2,
186
                                  const int          remRegBins,
187
                                  unsigned           goRiceZero,
188
                                  const uint16_t       ui16AbsGoRice,
189
                                  const int          maxLog2TrDynamicRange  ) const
190
0
{
191
0
  if( remRegBins < 4 )
192
0
  {
193
0
    int       iRate   = int( xGetIEPRate() ); // cost of sign bit
194
0
    uint32_t  symbol  = ( uiAbsLevel == 0 ? goRiceZero : uiAbsLevel <= goRiceZero ? uiAbsLevel-1 : uiAbsLevel );
195
0
    uint32_t  length;
196
0
    const int threshold = COEF_REMAIN_BIN_REDUCTION;
197
0
    if( symbol < ( threshold << ui16AbsGoRice ) )
198
0
    {
199
0
      length = symbol >> ui16AbsGoRice;
200
0
      iRate += ( length + 1 + ui16AbsGoRice ) << SCALE_BITS;
201
0
    }
202
0
    else
203
0
    {
204
0
      length = ui16AbsGoRice;
205
0
      symbol = symbol - ( threshold << ui16AbsGoRice );
206
0
      while( symbol >= ( 1 << length ) )
207
0
      {
208
0
        symbol -= ( 1 << ( length++ ) );
209
0
      }
210
0
      iRate += ( threshold + length + 1 - ui16AbsGoRice + length ) << SCALE_BITS;
211
0
    }
212
0
    return iRate;
213
0
  }
214
215
0
  int iRate = int( xGetIEPRate() ); // cost of sign bit
216
0
  const uint32_t cthres = 4;
217
0
  if( uiAbsLevel >= cthres )
218
0
  {
219
0
    uint32_t symbol = ( uiAbsLevel - cthres ) >> 1;
220
0
    uint32_t length;
221
0
    const int threshold = COEF_REMAIN_BIN_REDUCTION;
222
0
    if( symbol < ( threshold << ui16AbsGoRice ) )
223
0
    {
224
0
      length = symbol >> ui16AbsGoRice;
225
0
      iRate += ( length + 1 + ui16AbsGoRice ) << SCALE_BITS;
226
0
    }
227
0
    else
228
0
    {
229
0
      length = ui16AbsGoRice;
230
0
      symbol = symbol - ( threshold << ui16AbsGoRice );
231
0
      while( symbol >= ( 1 << length ) )
232
0
      {
233
0
        symbol -= ( 1 << ( length++ ) );
234
0
      }
235
0
      iRate += ( threshold + length + 1 - ui16AbsGoRice + length ) << SCALE_BITS;
236
0
    }
237
238
0
    iRate += fracBitsGt1.intBits[1];
239
0
    iRate += fracBitsPar.intBits[( uiAbsLevel - 2 ) & 1];
240
0
    iRate += fracBitsGt2.intBits[1];
241
0
  }
242
0
  else if( uiAbsLevel == 1 )
243
0
  {
244
0
    iRate += fracBitsGt1.intBits[0];
245
0
  }
246
0
  else if( uiAbsLevel == 2 )
247
0
  {
248
0
    iRate += fracBitsGt1.intBits[1];
249
0
    iRate += fracBitsPar.intBits[0];
250
0
    iRate += fracBitsGt2.intBits[0];
251
0
  }
252
0
  else if( uiAbsLevel == 3 )
253
0
  {
254
0
    iRate += fracBitsGt1.intBits[1];
255
0
    iRate += fracBitsPar.intBits[1];
256
0
    iRate += fracBitsGt2.intBits[0];
257
0
  }
258
0
  else
259
0
  {
260
0
    iRate = 0;
261
0
  }
262
0
  return  iRate;
263
0
}
264
265
inline double QuantRDOQ::xGetRateSigCoeffGroup( const BinFracBits& fracBitsSigCG, unsigned uiSignificanceCoeffGroup ) const
266
1.24M
{
267
1.24M
  return xGetICost( fracBitsSigCG.intBits[uiSignificanceCoeffGroup] );
268
1.24M
}
269
270
/** Calculates the cost of signaling the last significant coefficient in the block
271
 * \param uiPosX X coordinate of the last significant coefficient
272
 * \param uiPosY Y coordinate of the last significant coefficient
273
 * \param component colour component ID
274
 * \returns cost of last significant coefficient
275
 */
276
/*
277
 * \param uiWidth width of the transform unit (TU)
278
*/
279
inline double QuantRDOQ::xGetRateLast( const int* lastBitsX, const int* lastBitsY, unsigned PosX, unsigned PosY ) const
280
0
{
281
0
  uint32_t    CtxX  = g_uiGroupIdx[PosX];
282
0
  uint32_t    CtxY  = g_uiGroupIdx[PosY];
283
0
  double  Cost  = lastBitsX[ CtxX ] + lastBitsY[ CtxY ];
284
0
  if( CtxX > 3 )
285
0
  {
286
0
    Cost += xGetIEPRate() * ((CtxX-2)>>1);
287
0
  }
288
0
  if( CtxY > 3 )
289
0
  {
290
0
    Cost += xGetIEPRate() * ((CtxY-2)>>1);
291
0
  }
292
0
  return xGetICost( Cost );
293
0
}
294
295
296
inline double QuantRDOQ::xGetRateSigCoef( const BinFracBits& fracBitsSig, unsigned uiSignificance ) const
297
8.91M
{
298
8.91M
  return xGetICost( fracBitsSig.intBits[uiSignificance] );
299
8.91M
}
300
301
/** Get the cost for a specific rate
302
 * \param dRate rate of a bit
303
 * \returns cost at the specific rate
304
 */
305
inline double QuantRDOQ::xGetICost        ( double                          dRate         ) const
306
12.4M
{
307
12.4M
  return m_dLambda * dRate;
308
12.4M
}
309
310
/** Get the cost of an equal probable bit
311
 * \returns cost of equal probable bit
312
 */
313
inline double QuantRDOQ::xGetIEPRate() const
314
0
{
315
0
  return 32768;
316
0
}
317
318
319
double QuantRDOQ::xGetErrScaleCoeff(const bool needsSqrt2, SizeType width, SizeType height, int qp, const int maxLog2TrDynamicRange, const int channelBitDepth, bool bTransformSkip=false)
320
88.8k
{
321
88.8k
  const int iTransformShift = bTransformSkip ? 0 : getTransformShift(channelBitDepth, Size(width, height), maxLog2TrDynamicRange);
322
88.8k
  double    dErrScale = (double)(1 << SCALE_BITS);                                // Compensate for scaling of bitcount in Lagrange cost function
323
88.8k
  double    dTransShift = (double)iTransformShift + (needsSqrt2 ? -0.5 : 0.0);
324
88.8k
  dErrScale = dErrScale * pow(2.0, (-2.0*dTransShift));                     // Compensate for scaling through forward transform
325
88.8k
  const int  QStep = g_quantScales[needsSqrt2 ? 1 : 0][qp];
326
88.8k
  double    finalErrScale = dErrScale / QStep / QStep / (1 << (DISTORTION_PRECISION_ADJUSTMENT(channelBitDepth) << 1));
327
88.8k
  return    finalErrScale;
328
88.8k
}
329
330
331
332
/** set error scale coefficients
333
 * \param list                   list ID
334
 * \param size
335
 * \param qp                     quantization parameter
336
 * \param maxLog2TrDynamicRange
337
 * \param bitDepths              reference to bit depth array for all channels
338
 */
339
void QuantRDOQ::xSetErrScaleCoeff( uint32_t list, uint32_t sizeX, uint32_t sizeY, int qp, const int maxLog2TrDynamicRange[MAX_NUM_CH], const BitDepths &bitDepths )
340
31.3M
{
341
31.3M
  const int width = g_scalingListSizeX[sizeX];
342
31.3M
  const int height = g_scalingListSizeX[sizeY];
343
31.3M
  const ChannelType channelType = ( ( list == 0 ) || ( list == MAX_NUM_COMP ) ) ? CH_L : CH_C;
344
31.3M
  const int channelBitDepth = bitDepths[channelType];
345
31.3M
  const int iTransformShift = getTransformShift( channelBitDepth, Size( g_scalingListSizeX[sizeX], g_scalingListSizeX[sizeY] ), maxLog2TrDynamicRange[channelType] );  // Represents scaling through forward transform
346
347
31.3M
  double dErrScale = (double)( 1 << SCALE_BITS );                                // Compensate for scaling of bitcount in Lagrange cost function
348
349
31.3M
  const bool needsSqrt2 = ((Log2(width*height)) & 1) == 1;
350
31.3M
  double dTransShift = (double)iTransformShift + ( needsSqrt2 ? -0.5 : 0.0 );
351
31.3M
  dErrScale = dErrScale*pow( 2.0, ( -2.0*dTransShift ) );                     // Compensate for scaling through forward transform
352
353
31.3M
  if( getScalingListEnabled() )
354
0
  {
355
0
    uint32_t i, uiMaxNumCoeff = width * height;
356
357
0
    int*  piQuantcoeff = getQuantCoeff( list, qp, sizeX, sizeY );
358
0
    double* pdErrScale = xGetErrScaleCoeffSL( list, sizeX, sizeY, qp );
359
360
0
    for( i = 0; i < uiMaxNumCoeff; i++ )
361
0
    {
362
0
      pdErrScale[i] = dErrScale / piQuantcoeff[i] / piQuantcoeff[i] / (1 << (DISTORTION_PRECISION_ADJUSTMENT( bitDepths[channelType] ) << 1));
363
0
    }
364
0
  }
365
366
31.3M
  int QStep = g_quantScales[needsSqrt2][qp];
367
368
31.3M
  xGetErrScaleCoeffNoScalingList(list, sizeX, sizeY, qp) =
369
31.3M
    dErrScale / QStep / QStep / (1 << (DISTORTION_PRECISION_ADJUSTMENT(bitDepths[channelType]) << 1));
370
31.3M
}
371
372
/** set flat matrix value to quantized coefficient
373
 */
374
void QuantRDOQ::setFlatScalingList(const int maxLog2TrDynamicRange[MAX_NUM_CH], const BitDepths &bitDepths)
375
17.7k
{
376
17.7k
  Quant::setFlatScalingList( maxLog2TrDynamicRange, bitDepths );
377
378
17.7k
  const int minimumQp = 0;
379
17.7k
  const int maximumQp = SCALING_LIST_REM_NUM;
380
381
142k
  for(uint32_t sizeX = 0; sizeX < SCALING_LIST_SIZE_NUM; sizeX++)
382
124k
  {
383
995k
    for(uint32_t sizeY = 0; sizeY < SCALING_LIST_SIZE_NUM; sizeY++)
384
871k
    {
385
6.09M
      for(uint32_t list = 0; list < SCALING_LIST_NUM; list++)
386
5.22M
      {
387
36.5M
        for(int qp = minimumQp; qp < maximumQp; qp++)
388
31.3M
        {
389
31.3M
          xSetErrScaleCoeff( list, sizeX, sizeY, qp, maxLog2TrDynamicRange, bitDepths );
390
31.3M
        }
391
5.22M
      }
392
871k
    }
393
124k
  }
394
17.7k
}
395
396
/** initialization process of scaling list array
397
 */
398
void QuantRDOQ::xInitScalingList( const QuantRDOQ* other )
399
17.7k
{
400
17.7k
  m_isErrScaleListOwner = other == nullptr;
401
402
17.7k
  bool useScalingLists = getScalingListEnabled();
403
404
142k
  for(uint32_t sizeIdX = 0; sizeIdX < SCALING_LIST_SIZE_NUM; sizeIdX++)
405
124k
  {
406
995k
    for(uint32_t sizeIdY = 0; sizeIdY < SCALING_LIST_SIZE_NUM; sizeIdY++)
407
871k
    {
408
6.09M
      for(uint32_t qp = 0; qp < SCALING_LIST_REM_NUM; qp++)
409
5.22M
      {
410
36.5M
        for(uint32_t listId = 0; listId < SCALING_LIST_NUM; listId++)
411
31.3M
        {
412
31.3M
          if( m_isErrScaleListOwner )
413
31.3M
          {
414
31.3M
            m_errScale[sizeIdX][sizeIdY][listId][qp] = useScalingLists ? new double[g_scalingListSizeX[sizeIdX] * g_scalingListSizeX[sizeIdY]] : nullptr;
415
31.3M
          }
416
0
          else
417
0
          {
418
0
            m_errScale[sizeIdX][sizeIdY][listId][qp] = other->m_errScale[sizeIdX][sizeIdY][listId][qp];
419
0
          }
420
31.3M
        } // listID loop
421
5.22M
      }
422
871k
    }
423
124k
  }
424
17.7k
}
425
426
/** destroy quantization matrix array
427
 */
428
void QuantRDOQ::xDestroyScalingList()
429
17.7k
{
430
17.7k
  if( !m_isErrScaleListOwner ) return;
431
432
142k
  for(uint32_t sizeIdX = 0; sizeIdX < SCALING_LIST_SIZE_NUM; sizeIdX++)
433
124k
  {
434
995k
    for(uint32_t sizeIdY = 0; sizeIdY < SCALING_LIST_SIZE_NUM; sizeIdY++)
435
871k
    {
436
6.09M
      for(uint32_t listId = 0; listId < SCALING_LIST_NUM; listId++)
437
5.22M
      {
438
36.5M
        for(uint32_t qp = 0; qp < SCALING_LIST_REM_NUM; qp++)
439
31.3M
        {
440
31.3M
          if(m_errScale[sizeIdX][sizeIdY][listId][qp])
441
0
          {
442
0
            delete [] m_errScale[sizeIdX][sizeIdY][listId][qp];
443
0
          }
444
31.3M
        }
445
5.22M
      }
446
871k
    }
447
124k
  }
448
//   Quant::destroyScalingList();
449
17.7k
}
450
451
452
void QuantRDOQ::quant(TransformUnit& tu, const ComponentID compID, const CCoeffBuf& pSrc, TCoeff &uiAbsSum, const QpParam& cQP, const Ctx& ctx)
453
94.0k
{
454
94.0k
  const CompArea& rect       = tu.blocks[compID];
455
94.0k
  const uint32_t uiWidth     = rect.width;
456
94.0k
  const uint32_t uiHeight    = rect.height;
457
458
94.0k
  const CCoeffBuf&  piCoef   = pSrc;
459
94.0k
        CoeffSigBuf piQCoef  = tu.getCoeffs(compID);
460
461
94.0k
  const bool useTransformSkip      = tu.mtsIdx[compID]==MTS_SKIP;
462
463
94.0k
  bool useRDOQ = useTransformSkip ? m_useRDOQTS : m_RDOQ > 0;
464
465
94.0k
  if( !tu.cu->ispMode || !isLuma(compID) )
466
94.0k
  {
467
94.0k
    useRDOQ &= uiWidth > 2;
468
94.0k
    useRDOQ &= uiHeight > 2;
469
94.0k
  }
470
471
94.0k
  if( useRDOQ )
472
88.8k
  {
473
88.8k
    if (!tu.cs->picture->useSelectiveRdoq || xNeedRDOQ(tu, compID, piCoef, cQP))
474
88.8k
    {
475
88.8k
      if( useTransformSkip )
476
88.8k
      {
477
88.8k
        if(tu.cu->bdpcmM[toChannelType(compID)])
478
86.1k
        {
479
86.1k
          forwardRDPCM( tu, compID, pSrc, uiAbsSum, cQP, ctx );
480
86.1k
        }
481
2.74k
        else
482
2.74k
        {
483
2.74k
          rateDistOptQuantTS( tu, compID, pSrc, uiAbsSum, cQP, ctx );
484
2.74k
        }
485
88.8k
      }
486
0
      else
487
0
      {
488
0
        xRateDistOptQuant( tu, compID, pSrc, uiAbsSum, cQP, ctx );
489
0
      }
490
88.8k
    }
491
0
    else
492
0
    {
493
0
      piQCoef.fill(0);
494
0
      uiAbsSum = 0;
495
0
      tu.lastPos[compID] = -1;
496
0
    }
497
88.8k
  }
498
5.24k
  else
499
5.24k
  {
500
5.24k
    Quant::quant( tu, compID, pSrc, uiAbsSum, cQP, ctx );
501
5.24k
  }
502
94.0k
}
503
504
505
506
void QuantRDOQ::xRateDistOptQuant(TransformUnit& tu, const ComponentID compID, const CCoeffBuf& pSrc, TCoeff &uiAbsSum, const QpParam& cQP, const Ctx &ctx)
507
0
{
508
0
  const FracBitsAccess& fracBits = ctx.getFracBitsAcess();
509
510
0
  const SPS &sps            = *tu.cs->sps;
511
0
  const CompArea& rect      = tu.blocks[compID];
512
0
  const uint32_t uiWidth    = rect.width;
513
0
  const uint32_t uiHeight   = rect.height;
514
0
  const ChannelType chType  = toChannelType(compID);
515
0
  const int channelBitDepth = sps.bitDepths[ chType ];
516
517
0
  const int  maxLog2TrDynamicRange = sps.getMaxLog2TrDynamicRange();
518
519
0
  const bool useIntraSubPartitions = tu.cu->ispMode && isLuma(compID);
520
  /* for 422 chroma blocks, the effective scaling applied during transformation is not a power of 2, hence it cannot be
521
  * implemented as a bit-shift (the quantised result will be sqrt(2) * larger than required). Alternatively, adjust the
522
  * uiLog2TrSize applied in iTransformShift, such that the result is 1/sqrt(2) the required result (i.e. smaller)
523
  * Then a QP+3 (sqrt(2)) or QP-3 (1/sqrt(2)) method could be used to get the required result
524
  */
525
526
  // Represents scaling through forward transform
527
0
  const int iTransformShift = getTransformShift(channelBitDepth, rect.size(), maxLog2TrDynamicRange);
528
529
0
  double     d64BlockUncodedCost               = 0;
530
0
  const uint32_t uiLog2BlockWidth                  = Log2(uiWidth);
531
0
  const uint32_t uiLog2BlockHeight                 = Log2(uiHeight);
532
0
  const uint32_t uiMaxNumCoeff                     = rect.area();
533
534
0
  CHECK(compID >= MAX_NUM_TBLOCKS, "Invalid component ID");
535
536
0
  int scalingListType = getScalingListType(tu.cu->predMode, compID);
537
538
0
  CHECK(scalingListType >= SCALING_LIST_NUM, "Invalid scaling list");
539
540
0
  const TCoeff    *plSrcCoeff = pSrc.buf;
541
0
        TCoeffSig *piDstCoeff = tu.getCoeffs(compID).buf;
542
543
0
  double *pdCostCoeff  = m_pdCostCoeff;
544
0
  double *pdCostSig    = m_pdCostSig;
545
0
  double *pdCostCoeff0 = m_pdCostCoeff0;
546
0
  int    *rateIncUp    = m_rateIncUp;
547
0
  int    *rateIncDown  = m_rateIncDown;
548
0
  int    *sigRateDelta = m_sigRateDelta;
549
0
  TCoeff *deltaU       = m_deltaU;
550
551
0
  memset( piDstCoeff,     0, sizeof( TCoeffSig ) * uiMaxNumCoeff );
552
0
  memset( m_pdCostCoeff,  0, sizeof( double ) *  uiMaxNumCoeff );
553
0
  memset( m_pdCostSig,    0, sizeof( double ) *  uiMaxNumCoeff );
554
0
  memset( m_rateIncUp,    0, sizeof( int    ) *  uiMaxNumCoeff );
555
0
  memset( m_rateIncDown,  0, sizeof( int    ) *  uiMaxNumCoeff );
556
0
  memset( m_sigRateDelta, 0, sizeof( int    ) *  uiMaxNumCoeff );
557
0
  memset( m_deltaU,       0, sizeof( TCoeff ) *  uiMaxNumCoeff );
558
559
560
0
  const bool   needSqrtAdjustment = TU::needsSqrt2Scale( tu, compID );
561
0
  const bool   isTransformSkip    = tu.mtsIdx[compID]==MTS_SKIP;
562
0
  const double *const pdErrScale  = xGetErrScaleCoeffSL(scalingListType, uiLog2BlockWidth, uiLog2BlockHeight, cQP.rem(isTransformSkip));
563
0
  const int    *const piQCoef     = getQuantCoeff(scalingListType, cQP.rem(isTransformSkip), uiLog2BlockWidth, uiLog2BlockHeight);
564
0
  const bool isLfnstApplied       = tu.cu->lfnstIdx > 0 && (CU::isSepTree(*tu.cu) ? true : isLuma(compID));
565
0
  const bool enableScalingLists   = getUseScalingList(uiWidth, uiHeight, isTransformSkip, isLfnstApplied);
566
0
  const int    defaultQuantisationCoefficient = g_quantScales[ needSqrtAdjustment ?1:0][cQP.rem(isTransformSkip)];
567
0
  const double defaultErrorScale              = xGetErrScaleCoeffNoScalingList(scalingListType, uiLog2BlockWidth, uiLog2BlockHeight, cQP.rem(isTransformSkip));
568
0
  const int iQBits = QUANT_SHIFT + cQP.per(isTransformSkip) + iTransformShift + (needSqrtAdjustment?-1:0);                   // Right shift of non-RDOQ quantizer;  level = (coeff*uiQ + offset)>>q_bits
569
570
571
0
  const TCoeff entropyCodingMinimum = -(1 << maxLog2TrDynamicRange);
572
0
  const TCoeff entropyCodingMaximum =  (1 << maxLog2TrDynamicRange) - 1;
573
574
0
  CoeffCodingContext cctx(tu, compID, tu.cs->slice->signDataHidingEnabled);
575
0
  const int    iCGSizeM1      = (1 << cctx.log2CGSize()) - 1;
576
577
0
  int     iCGLastScanPos      = -1;
578
0
  double  d64BaseCost         = 0;
579
0
  int     iLastScanPos        = -1;
580
581
0
  int ctxBinSampleRatio   = MAX_TU_LEVEL_CTX_CODED_BIN_CONSTRAINT;
582
0
  int remRegBins          = (tu.getTbAreaAfterCoefZeroOut( compID ) * ctxBinSampleRatio) >> 4;
583
0
  uint32_t  goRiceParam   = 0;
584
585
0
  double *pdCostCoeffGroupSig = m_pdCostCoeffGroupSig;
586
0
  memset( pdCostCoeffGroupSig, 0, ( uiMaxNumCoeff >> cctx.log2CGSize() ) * sizeof( double ) );
587
0
  int iScanPos;
588
0
  coeffGroupRDStats rdStats;
589
590
#if ENABLE_TRACING
591
  DTRACE( g_trace_ctx, D_RDOQ, "%d: %3d, %3d, %dx%d, comp=%d\n", DTRACE_GET_COUNTER( g_trace_ctx, D_RDOQ ), rect.x, rect.y, rect.width, rect.height, compID );
592
#endif
593
594
0
  const uint32_t lfnstIdx = tu.cu->lfnstIdx;
595
596
0
  const int iCGNum = lfnstIdx > 0 ? 1 : std::min<int>(JVET_C0024_ZERO_OUT_TH, uiWidth) * std::min<int>(JVET_C0024_ZERO_OUT_TH, uiHeight) >> cctx.log2CGSize();
597
598
0
  for (int subSetId = iCGNum - 1; subSetId >= 0; subSetId--)
599
0
  {
600
0
    cctx.initSubblock( subSetId );
601
602
0
    int remRegBinsStartCG = remRegBins;
603
604
0
    uint32_t maxNonZeroPosInCG = iCGSizeM1;
605
0
    if( lfnstIdx > 0 && ( ( uiWidth == 4 && uiHeight == 4 ) || ( uiWidth == 8 && uiHeight == 8 && cctx.cgPosX() == 0 && cctx.cgPosY() == 0 ) ) )
606
0
    {
607
0
      maxNonZeroPosInCG = 7;
608
0
    }
609
610
0
    memset( &rdStats, 0, sizeof (coeffGroupRDStats));
611
612
0
    for( int iScanPosinCG = iCGSizeM1; iScanPosinCG > maxNonZeroPosInCG; iScanPosinCG-- )
613
0
    {
614
0
      iScanPos = cctx.minSubPos() + iScanPosinCG;
615
0
      uint32_t    blkPos = cctx.blockPos( iScanPos );
616
0
      piDstCoeff[ blkPos ] = 0;
617
0
    }
618
0
    for( int iScanPosinCG = maxNonZeroPosInCG; iScanPosinCG >= 0; iScanPosinCG-- )
619
0
    {
620
0
      iScanPos = cctx.minSubPos() + iScanPosinCG;
621
      //===== quantization =====
622
0
      uint32_t    uiBlkPos          = cctx.blockPos(iScanPos);
623
624
      // set coeff
625
0
      const int    quantisationCoefficient = (enableScalingLists) ? piQCoef   [uiBlkPos]               : defaultQuantisationCoefficient;
626
0
      const double errorScale              = (enableScalingLists) ? pdErrScale[uiBlkPos]               : defaultErrorScale;
627
0
      const int64_t  tmpLevel                = int64_t(abs(plSrcCoeff[ uiBlkPos ])) * quantisationCoefficient;
628
629
0
      const Intermediate_Int lLevelDouble  = (Intermediate_Int)std::min<int64_t>(tmpLevel, std::numeric_limits<Intermediate_Int>::max() - (Intermediate_Int(1) << (iQBits - 1)));
630
631
0
      uint32_t uiMaxAbsLevel        = std::min<uint32_t>(uint32_t(entropyCodingMaximum), uint32_t((lLevelDouble + (Intermediate_Int(1) << (iQBits - 1))) >> iQBits));
632
633
0
      const double dErr         = double( lLevelDouble );
634
0
      pdCostCoeff0[ iScanPos ]  = dErr * dErr * errorScale;
635
0
      d64BlockUncodedCost      += pdCostCoeff0[ iScanPos ];
636
0
      piDstCoeff[ uiBlkPos ]    = uiMaxAbsLevel;
637
638
0
      if ( uiMaxAbsLevel > 0 && iLastScanPos < 0 )
639
0
      {
640
0
        iLastScanPos            = iScanPos;
641
0
        iCGLastScanPos          = cctx.subSetId();
642
0
      }
643
644
0
      if ( iLastScanPos >= 0 )
645
0
      {
646
647
#if ENABLE_TRACING
648
        uint32_t uiCGPosY = cctx.cgPosY();
649
        uint32_t uiCGPosX = cctx.cgPosX();
650
        uint32_t uiPosY = cctx.posY( iScanPos );
651
        uint32_t uiPosX = cctx.posX( iScanPos );
652
        DTRACE( g_trace_ctx, D_RDOQ, "%d [%d][%d][%2d:%2d][%2d:%2d]", DTRACE_GET_COUNTER( g_trace_ctx, D_RDOQ ), iScanPos, uiBlkPos, uiCGPosX, uiCGPosY, uiPosX, uiPosY );
653
#endif
654
        //===== coefficient level estimation =====
655
0
        unsigned ctxIdSig = 0;
656
0
        if( iScanPos != iLastScanPos )
657
0
        {
658
0
          ctxIdSig = cctx.sigCtxIdAbs( iScanPos, piDstCoeff, 0 );
659
0
        }
660
0
        uint32_t    uiLevel;
661
0
        uint8_t ctxOffset     = cctx.ctxOffsetAbs     ();
662
0
        uint32_t    uiParCtx      = cctx.parityCtxIdAbs   ( ctxOffset );
663
0
        uint32_t    uiGt1Ctx      = cctx.greater1CtxIdAbs ( ctxOffset );
664
0
        uint32_t    uiGt2Ctx      = cctx.greater2CtxIdAbs ( ctxOffset );
665
0
        uint32_t    goRiceZero    = 0;
666
0
        if( remRegBins < 4 )
667
0
        {
668
0
          unsigned  sumAbs = cctx.templateAbsSum( iScanPos, piDstCoeff, 0 );
669
0
          goRiceParam             = g_auiGoRiceParsCoeff   [ sumAbs ];
670
0
          goRiceZero              = g_auiGoRicePosCoeff0(0, goRiceParam);
671
0
        }
672
673
0
        const BinFracBits fracBitsPar = fracBits.getFracBitsArray( uiParCtx );
674
0
        const BinFracBits fracBitsGt1 = fracBits.getFracBitsArray( uiGt1Ctx );
675
0
        const BinFracBits fracBitsGt2 = fracBits.getFracBitsArray( uiGt2Ctx );
676
677
0
        if( iScanPos == iLastScanPos )
678
0
        {
679
0
          uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ],
680
0
                                    lLevelDouble, uiMaxAbsLevel, nullptr, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, iQBits, errorScale, 1, maxLog2TrDynamicRange );
681
0
        }
682
0
        else
683
0
        {
684
0
          DTRACE_COND( ( uiMaxAbsLevel != 0 ), g_trace_ctx, D_RDOQ_MORE, " uiCtxSig=%d", ctxIdSig );
685
686
0
          const BinFracBits fracBitsSig = fracBits.getFracBitsArray( ctxIdSig );
687
0
          uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ],
688
0
                                    lLevelDouble, uiMaxAbsLevel, &fracBitsSig, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, iQBits, errorScale, 0, maxLog2TrDynamicRange );
689
0
          sigRateDelta[ uiBlkPos ] = ( remRegBins < 4 ? 0 : fracBitsSig.intBits[1] - fracBitsSig.intBits[0] );
690
0
        }
691
692
0
        DTRACE( g_trace_ctx, D_RDOQ, " Lev=%d \n", uiLevel );
693
0
        DTRACE_COND( ( uiMaxAbsLevel != 0 ), g_trace_ctx, D_RDOQ, " CostC0=%d\n", (int64_t)( pdCostCoeff0[iScanPos] ) );
694
0
        DTRACE_COND( ( uiMaxAbsLevel != 0 ), g_trace_ctx, D_RDOQ, " CostC =%d\n", (int64_t)( pdCostCoeff[iScanPos] ) );
695
696
0
        deltaU[ uiBlkPos ]        = TCoeff((lLevelDouble - (Intermediate_Int(uiLevel) << iQBits)) >> (iQBits-8));
697
698
0
        if( uiLevel > 0 )
699
0
        {
700
0
          int rateNow              = xGetICRate( uiLevel,   fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, maxLog2TrDynamicRange );
701
0
          rateIncUp   [ uiBlkPos ] = xGetICRate( uiLevel+1, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, maxLog2TrDynamicRange ) - rateNow;
702
0
          rateIncDown [ uiBlkPos ] = xGetICRate( uiLevel-1, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, maxLog2TrDynamicRange ) - rateNow;
703
0
        }
704
0
        else // uiLevel == 0
705
0
        {
706
0
          if( remRegBins < 4 )
707
0
          {
708
0
            int rateNow            = xGetICRate( uiLevel,   fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, maxLog2TrDynamicRange );
709
0
            rateIncUp [ uiBlkPos ] = xGetICRate( uiLevel+1, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, maxLog2TrDynamicRange ) - rateNow;
710
0
          }
711
0
          else
712
0
          {
713
0
            rateIncUp [ uiBlkPos ] = fracBitsGt1.intBits[ 0 ];
714
0
          }
715
0
        }
716
0
        piDstCoeff[ uiBlkPos ] = uiLevel;
717
0
        d64BaseCost           += pdCostCoeff [ iScanPos ];
718
719
0
        if( ( (iScanPos & iCGSizeM1) == 0 ) && ( iScanPos > 0 ) )
720
0
        {
721
0
          goRiceParam   = 0;
722
0
        }
723
0
        else if( remRegBins >= 4 )
724
0
        {
725
0
          int  sumAll = cctx.templateAbsSum(iScanPos, piDstCoeff, 4);
726
0
          goRiceParam = g_auiGoRiceParsCoeff[sumAll];
727
0
          remRegBins -= (uiLevel < 2 ? uiLevel : 3) + (iScanPos != iLastScanPos);
728
0
        }
729
0
      }
730
0
      else
731
0
      {
732
0
        d64BaseCost    += pdCostCoeff0[ iScanPos ];
733
0
      }
734
0
      rdStats.d64SigCost += pdCostSig[ iScanPos ];
735
0
      if (iScanPosinCG == 0 )
736
0
      {
737
0
        rdStats.d64SigCost_0 = pdCostSig[ iScanPos ];
738
0
      }
739
0
      if (piDstCoeff[ uiBlkPos ] )
740
0
      {
741
0
        cctx.setSigGroup();
742
0
        rdStats.d64CodedLevelandDist += pdCostCoeff[ iScanPos ] - pdCostSig[ iScanPos ];
743
0
        rdStats.d64UncodedDist += pdCostCoeff0[ iScanPos ];
744
0
        if ( iScanPosinCG != 0 )
745
0
        {
746
0
          rdStats.iNNZbeforePos0++;
747
0
        }
748
0
      }
749
0
    } //end for (iScanPosinCG)
750
751
0
    if (iCGLastScanPos >= 0)
752
0
    {
753
0
      if( cctx.subSetId() )
754
0
      {
755
0
        if( !cctx.isSigGroup() )
756
0
        {
757
0
          const BinFracBits fracBitsSigGroup = fracBits.getFracBitsArray( cctx.sigGroupCtxId() );
758
0
          d64BaseCost += xGetRateSigCoeffGroup(fracBitsSigGroup, 0) - rdStats.d64SigCost;
759
0
          pdCostCoeffGroupSig[ cctx.subSetId() ] = xGetRateSigCoeffGroup(fracBitsSigGroup, 0);
760
0
        }
761
0
        else
762
0
        {
763
0
          if (cctx.subSetId() < iCGLastScanPos) //skip the last coefficient group, which will be handled together with last position below.
764
0
          {
765
0
            if ( rdStats.iNNZbeforePos0 == 0 )
766
0
            {
767
0
              d64BaseCost -= rdStats.d64SigCost_0;
768
0
              rdStats.d64SigCost -= rdStats.d64SigCost_0;
769
0
            }
770
            // rd-cost if SigCoeffGroupFlag = 0, initialization
771
0
            double d64CostZeroCG = d64BaseCost;
772
773
0
            const BinFracBits fracBitsSigGroup = fracBits.getFracBitsArray( cctx.sigGroupCtxId() );
774
775
0
            if (cctx.subSetId() < iCGLastScanPos)
776
0
            {
777
0
              d64BaseCost  += xGetRateSigCoeffGroup(fracBitsSigGroup,1);
778
0
              d64CostZeroCG += xGetRateSigCoeffGroup(fracBitsSigGroup,0);
779
0
              pdCostCoeffGroupSig[ cctx.subSetId() ] = xGetRateSigCoeffGroup(fracBitsSigGroup,1);
780
0
            }
781
782
            // try to convert the current coeff group from non-zero to all-zero
783
0
            d64CostZeroCG += rdStats.d64UncodedDist;  // distortion for resetting non-zero levels to zero levels
784
0
            d64CostZeroCG -= rdStats.d64CodedLevelandDist;   // distortion and level cost for keeping all non-zero levels
785
0
            d64CostZeroCG -= rdStats.d64SigCost;     // sig cost for all coeffs, including zero levels and non-zerl levels
786
787
                                                     // if we can save cost, change this block to all-zero block
788
0
            if ( d64CostZeroCG < d64BaseCost )
789
0
            {
790
0
              cctx.resetSigGroup();
791
0
              d64BaseCost = d64CostZeroCG;
792
0
              remRegBins = remRegBinsStartCG;
793
0
              if (cctx.subSetId() < iCGLastScanPos)
794
0
              {
795
0
                pdCostCoeffGroupSig[ cctx.subSetId() ] = xGetRateSigCoeffGroup(fracBitsSigGroup,0);
796
0
              }
797
              // reset coeffs to 0 in this block
798
0
              for( int iScanPosinCG = maxNonZeroPosInCG; iScanPosinCG >= 0; iScanPosinCG-- )
799
0
              {
800
0
                iScanPos      = cctx.minSubPos() + iScanPosinCG;
801
0
                uint32_t uiBlkPos = cctx.blockPos( iScanPos );
802
803
0
                if (piDstCoeff[ uiBlkPos ])
804
0
                {
805
0
                  piDstCoeff [ uiBlkPos ] = 0;
806
0
                  pdCostCoeff[ iScanPos ] = pdCostCoeff0[ iScanPos ];
807
0
                  pdCostSig  [ iScanPos ] = 0;
808
0
                }
809
0
              }
810
0
            } // end if ( d64CostAllZeros < d64BaseCost )
811
0
          }
812
0
        } // end if if (uiSigCoeffGroupFlag[ uiCGBlkPos ] == 0)
813
0
      }
814
0
      else
815
0
      {
816
0
        cctx.setSigGroup();
817
0
      }
818
0
    }
819
0
  } //end for (cctx.subSetId)
820
821
822
  //===== estimate last position =====
823
0
  if ( iLastScanPos < 0 )
824
0
  {
825
0
    return;
826
0
  }
827
828
0
  double  d64BestCost         = 0;
829
0
  int     iBestLastIdxP1      = 0;
830
831
832
0
  if( !CU::isIntra( *tu.cu ) && isLuma( compID ) && tu.depth == 0 )
833
0
  {
834
0
    const BinFracBits fracBitsQtRootCbf = fracBits.getFracBitsArray( Ctx::QtRootCbf() );
835
0
    d64BestCost  = d64BlockUncodedCost + xGetICost( fracBitsQtRootCbf.intBits[ 0 ] );
836
0
    d64BaseCost += xGetICost( fracBitsQtRootCbf.intBits[ 1 ] );
837
0
  }
838
0
  else
839
0
  {
840
0
    bool previousCbf       = tu.cbf[COMP_Cb];
841
0
    bool lastCbfIsInferred = false;
842
0
    if( useIntraSubPartitions )
843
0
    {
844
0
      bool rootCbfSoFar       = false;
845
0
      bool isLastSubPartition = CU::isISPLast(*tu.cu, tu.Y(), compID);
846
0
      uint32_t nTus = tu.cu->ispMode == HOR_INTRA_SUBPARTITIONS ? tu.cu->lheight() >> Log2(tu.lheight()) : tu.cu->lwidth() >> Log2(tu.lwidth());
847
0
      if( isLastSubPartition )
848
0
      {
849
0
        TransformUnit* tuPointer = tu.cu->firstTU;
850
0
        for( int tuIdx = 0; tuIdx < nTus - 1; tuIdx++ )
851
0
        {
852
0
          rootCbfSoFar |= TU::getCbfAtDepth(*tuPointer, COMP_Y, tu.depth);
853
0
          tuPointer     = tuPointer->next;
854
0
        }
855
0
        if( !rootCbfSoFar )
856
0
        {
857
0
          lastCbfIsInferred = true;
858
0
        }
859
0
      }
860
0
      if( !lastCbfIsInferred )
861
0
      {
862
0
        previousCbf = TU::getPrevTuCbfAtDepth(tu, compID, tu.depth);
863
0
      }
864
0
    }
865
0
    BinFracBits fracBitsQtCbf = fracBits.getFracBitsArray( Ctx::QtCbf[compID]( DeriveCtx::CtxQtCbf( rect.compID, previousCbf, useIntraSubPartitions ) ) );
866
867
0
    if( !lastCbfIsInferred )
868
0
    {
869
0
      d64BestCost  = d64BlockUncodedCost + xGetICost(fracBitsQtCbf.intBits[0]);
870
0
      d64BaseCost += xGetICost(fracBitsQtCbf.intBits[1]);
871
0
    }
872
0
    else
873
0
    {
874
0
      d64BestCost  = d64BlockUncodedCost;
875
0
    }
876
0
  }
877
878
0
  int lastBitsX[LAST_SIGNIFICANT_GROUPS] = { 0 };
879
0
  int lastBitsY[LAST_SIGNIFICANT_GROUPS] = { 0 };
880
0
  {
881
0
    int dim1 = std::min<int>(JVET_C0024_ZERO_OUT_TH, uiWidth);
882
0
    int dim2 = std::min<int>(JVET_C0024_ZERO_OUT_TH, uiHeight);
883
0
    int bitsX = 0;
884
0
    int bitsY = 0;
885
0
    int ctxId;
886
    //X-coordinate
887
0
    for ( ctxId = 0; ctxId < g_uiGroupIdx[dim1-1]; ctxId++)
888
0
    {
889
0
      const BinFracBits fB = fracBits.getFracBitsArray( cctx.lastXCtxId(ctxId) );
890
0
      lastBitsX[ ctxId ]   = bitsX + fB.intBits[ 0 ];
891
0
      bitsX               +=         fB.intBits[ 1 ];
892
0
    }
893
0
    lastBitsX[ctxId] = bitsX;
894
    //Y-coordinate
895
0
    for ( ctxId = 0; ctxId < g_uiGroupIdx[dim2-1]; ctxId++)
896
0
    {
897
0
      const BinFracBits fB = fracBits.getFracBitsArray( cctx.lastYCtxId(ctxId) );
898
0
      lastBitsY[ ctxId ]   = bitsY + fB.intBits[ 0 ];
899
0
      bitsY               +=         fB.intBits[ 1 ];
900
0
    }
901
0
    lastBitsY[ctxId] = bitsY;
902
0
  }
903
904
905
0
  bool bFoundLast = false;
906
0
  for (int iCGScanPos = iCGLastScanPos; iCGScanPos >= 0; iCGScanPos--)
907
0
  {
908
0
    d64BaseCost -= pdCostCoeffGroupSig [ iCGScanPos ];
909
0
    if (cctx.isSigGroup( iCGScanPos ) )
910
0
    {
911
0
      uint32_t maxNonZeroPosInCG = iCGSizeM1;
912
0
      if( lfnstIdx > 0 && ( ( uiWidth == 4 && uiHeight == 4 ) || ( uiWidth == 8 && uiHeight == 8 && cctx.cgPosX() == 0 && cctx.cgPosY() == 0 ) ) )
913
0
      {
914
0
        maxNonZeroPosInCG = 7;
915
0
      }
916
0
      for( int iScanPosinCG = maxNonZeroPosInCG; iScanPosinCG >= 0; iScanPosinCG-- )
917
0
      {
918
0
        iScanPos = iCGScanPos * (iCGSizeM1 + 1) + iScanPosinCG;
919
920
0
        if (iScanPos > iLastScanPos)
921
0
        {
922
0
          continue;
923
0
        }
924
0
        uint32_t   uiBlkPos     = cctx.blockPos( iScanPos );
925
926
0
        if( piDstCoeff[ uiBlkPos ] )
927
0
        {
928
0
          uint32_t   uiPosY = uiBlkPos >> uiLog2BlockWidth;
929
0
          uint32_t   uiPosX = uiBlkPos - ( uiPosY << uiLog2BlockWidth );
930
0
          double d64CostLast  = xGetRateLast( lastBitsX, lastBitsY, uiPosX, uiPosY );
931
932
0
          double totalCost = d64BaseCost + d64CostLast - pdCostSig[ iScanPos ];
933
934
0
          if( totalCost < d64BestCost )
935
0
          {
936
0
            iBestLastIdxP1  = iScanPos + 1;
937
0
            d64BestCost     = totalCost;
938
0
          }
939
0
          if( piDstCoeff[ uiBlkPos ] > 1 )
940
0
          {
941
0
            bFoundLast = true;
942
0
            break;
943
0
          }
944
0
          d64BaseCost      -= pdCostCoeff[ iScanPos ];
945
0
          d64BaseCost      += pdCostCoeff0[ iScanPos ];
946
0
        }
947
0
        else
948
0
        {
949
0
          d64BaseCost      -= pdCostSig[ iScanPos ];
950
0
        }
951
0
      } //end for
952
0
      if (bFoundLast)
953
0
      {
954
0
        break;
955
0
      }
956
0
    } // end if (uiSigCoeffGroupFlag[ uiCGBlkPos ])
957
0
    DTRACE( g_trace_ctx, D_RDOQ_COST, "%d: %3d, %3d, %dx%d, comp=%d\n", DTRACE_GET_COUNTER( g_trace_ctx, D_RDOQ_COST ), rect.x, rect.y, rect.width, rect.height, compID );
958
0
    DTRACE( g_trace_ctx, D_RDOQ_COST, "Uncoded=%d\n", (int64_t)( d64BlockUncodedCost ) );
959
0
    DTRACE( g_trace_ctx, D_RDOQ_COST, "Coded  =%d\n", (int64_t)( d64BaseCost ) );
960
961
0
  } // end for
962
963
964
0
  for ( int scanPos = 0; scanPos < iBestLastIdxP1; scanPos++ )
965
0
  {
966
0
    int blkPos = cctx.blockPos( scanPos );
967
0
    TCoeff level = piDstCoeff[ blkPos ];
968
0
    uiAbsSum += level;
969
0
    piDstCoeff[ blkPos ] = ( plSrcCoeff[ blkPos ] < 0 ) ? -level : level;
970
0
  }
971
972
  //===== clean uncoded coefficients =====
973
0
  for ( int scanPos = iBestLastIdxP1; scanPos <= iLastScanPos; scanPos++ )
974
0
  {
975
0
    piDstCoeff[ cctx.blockPos( scanPos ) ] = 0;
976
0
  }
977
0
  iLastScanPos = iBestLastIdxP1 - 1;
978
979
0
  if( cctx.signHiding() && uiAbsSum>=2)
980
0
  {
981
0
    const double inverseQuantScale = double(g_invQuantScales[0][cQP.rem(isTransformSkip)]);
982
0
    int64_t rdFactor = (int64_t)(inverseQuantScale * inverseQuantScale * (1 << (2 * cQP.per(isTransformSkip))) / m_dLambda / 16
983
0
                                  / (1 << (2 * DISTORTION_PRECISION_ADJUSTMENT(channelBitDepth)))
984
0
                             + 0.5);
985
986
0
    int lastCG = -1;
987
0
    int absSum = 0 ;
988
0
    int n ;
989
0
    for (int subSet = iCGNum - 1; subSet >= 0; subSet--)
990
0
    {
991
0
      int  subPos         = subSet << cctx.log2CGSize();
992
0
      int  firstNZPosInCG = iCGSizeM1 + 1, lastNZPosInCG = -1;
993
0
      absSum = 0 ;
994
995
0
      for( n = iCGSizeM1; n >= 0; --n )
996
0
      {
997
0
        if( piDstCoeff[ cctx.blockPos( n + subPos )] )
998
0
        {
999
0
          lastNZPosInCG = n;
1000
0
          break;
1001
0
        }
1002
0
      }
1003
1004
0
      for( n = 0; n <= iCGSizeM1; n++ )
1005
0
      {
1006
0
        if( piDstCoeff[ cctx.blockPos( n + subPos )] )
1007
0
        {
1008
0
          firstNZPosInCG = n;
1009
0
          break;
1010
0
        }
1011
0
      }
1012
1013
0
      for( n = firstNZPosInCG; n <= lastNZPosInCG; n++ )
1014
0
      {
1015
0
        absSum += int(piDstCoeff[ cctx.blockPos( n + subPos )]);
1016
0
      }
1017
1018
0
      if(lastNZPosInCG>=0 && lastCG==-1)
1019
0
      {
1020
0
        lastCG = 1;
1021
0
      }
1022
1023
0
      if( lastNZPosInCG-firstNZPosInCG>=SBH_THRESHOLD )
1024
0
      {
1025
0
        uint32_t signbit = (piDstCoeff[cctx.blockPos(subPos+firstNZPosInCG)]>0?0:1);
1026
0
        if( signbit!=(absSum&0x1) )  // hide but need tune
1027
0
        {
1028
          // calculate the cost
1029
0
          int64_t minCostInc = std::numeric_limits<int64_t>::max(), curCost = std::numeric_limits<int64_t>::max();
1030
0
          int minPos = -1, finalChange = 0, curChange = 0;
1031
1032
0
          for( n = (lastCG == 1 ? lastNZPosInCG : iCGSizeM1); n >= 0; --n )
1033
0
          {
1034
0
            uint32_t uiBlkPos   = cctx.blockPos( n + subPos );
1035
0
            if(piDstCoeff[ uiBlkPos ] != 0 )
1036
0
            {
1037
0
              int64_t costUp   = rdFactor * ( - deltaU[uiBlkPos] ) + rateIncUp[uiBlkPos];
1038
0
              int64_t costDown = rdFactor * (   deltaU[uiBlkPos] ) + rateIncDown[uiBlkPos]
1039
0
                -   ((abs(piDstCoeff[uiBlkPos]) == 1) ? sigRateDelta[uiBlkPos] : 0);
1040
1041
0
              if(lastCG==1 && lastNZPosInCG==n && abs(piDstCoeff[uiBlkPos])==1)
1042
0
              {
1043
0
                costDown -= (4<<SCALE_BITS);
1044
0
              }
1045
1046
0
              if(costUp<costDown)
1047
0
              {
1048
0
                curCost = costUp;
1049
0
                curChange =  1;
1050
0
              }
1051
0
              else
1052
0
              {
1053
0
                curChange = -1;
1054
0
                if(n==firstNZPosInCG && abs(piDstCoeff[uiBlkPos])==1)
1055
0
                {
1056
0
                  curCost = std::numeric_limits<int64_t>::max();
1057
0
                }
1058
0
                else
1059
0
                {
1060
0
                  curCost = costDown;
1061
0
                }
1062
0
              }
1063
0
            }
1064
0
            else
1065
0
            {
1066
0
              curCost = rdFactor * ( - (abs(deltaU[uiBlkPos])) ) + (1<<SCALE_BITS) + rateIncUp[uiBlkPos] + sigRateDelta[uiBlkPos] ;
1067
0
              curChange = 1 ;
1068
1069
0
              if(n<firstNZPosInCG)
1070
0
              {
1071
0
                uint32_t thissignbit = (plSrcCoeff[uiBlkPos]>=0?0:1);
1072
0
                if(thissignbit != signbit )
1073
0
                {
1074
0
                  curCost = std::numeric_limits<int64_t>::max();
1075
0
                }
1076
0
              }
1077
0
            }
1078
1079
0
            if( curCost<minCostInc)
1080
0
            {
1081
0
              minCostInc = curCost;
1082
0
              finalChange = curChange;
1083
0
              minPos = uiBlkPos;
1084
0
            }
1085
0
          }
1086
1087
0
          if(piDstCoeff[minPos] == entropyCodingMaximum || piDstCoeff[minPos] == entropyCodingMinimum)
1088
0
          {
1089
0
            finalChange = -1;
1090
0
          }
1091
1092
0
          if(plSrcCoeff[minPos]>=0)
1093
0
          {
1094
0
            piDstCoeff[minPos] += finalChange ;
1095
0
          }
1096
0
          else
1097
0
          {
1098
0
            piDstCoeff[minPos] -= finalChange ;
1099
0
          }
1100
0
        }
1101
0
      }
1102
1103
0
      if(lastCG==1)
1104
0
      {
1105
0
        lastCG=0 ;
1106
0
      }
1107
0
    }
1108
1109
    // Check due to saving of last pos. Sign data hiding can change the position of last coef.
1110
0
    if( piDstCoeff[cctx.blockPos( iLastScanPos )] == 0 )
1111
0
    {
1112
0
      int scanPos = iLastScanPos - 1;
1113
0
      for( ; scanPos >= 0; scanPos-- )
1114
0
      {
1115
0
        if( piDstCoeff[cctx.blockPos( scanPos )] )
1116
0
          break;
1117
0
      }
1118
0
      iLastScanPos = scanPos;
1119
0
    }
1120
0
  }
1121
0
  tu.lastPos[compID] = iLastScanPos;
1122
0
}
1123
1124
void QuantRDOQ::rateDistOptQuantTS( TransformUnit& tu, const ComponentID compID, const CCoeffBuf& coeffs, TCoeff &absSum, const QpParam& qp, const Ctx &ctx )
1125
2.74k
{
1126
2.74k
  const FracBitsAccess& fracBits = ctx.getFracBitsAcess();
1127
1128
2.74k
  const SPS &sps            = *tu.cs->sps;
1129
2.74k
  const CompArea& rect      = tu.blocks[compID];
1130
2.74k
  const uint32_t width      = rect.width;
1131
2.74k
  const uint32_t height     = rect.height;
1132
2.74k
  const ChannelType chType  = toChannelType(compID);
1133
2.74k
  const int channelBitDepth = sps.bitDepths[ chType ];
1134
1135
2.74k
  const int  maxLog2TrDynamicRange = sps.getMaxLog2TrDynamicRange();
1136
1137
2.74k
  const int transformShift = getTransformShift( channelBitDepth, rect.size(), maxLog2TrDynamicRange );
1138
1139
2.74k
  const uint32_t maxNumCoeff                        = rect.area();
1140
1141
2.74k
  CHECK( compID >= MAX_NUM_TBLOCKS, "Invalid component ID" );
1142
1143
2.74k
  int scalingListType = getScalingListType( tu.cu->predMode, compID );
1144
2.74k
  CHECK( scalingListType >= SCALING_LIST_NUM, "Invalid scaling list" );
1145
1146
2.74k
  const TCoeff    *srcCoeff = coeffs.buf;
1147
2.74k
        TCoeffSig *dstCoeff = tu.getCoeffs( compID ).buf;
1148
1149
2.74k
  double *costCoeff  = m_pdCostCoeff;
1150
2.74k
  double *costSig    = m_pdCostSig;
1151
2.74k
  double *costCoeff0 = m_pdCostCoeff0;
1152
1153
2.74k
  memset( m_pdCostCoeff,  0, sizeof( double ) *  maxNumCoeff );
1154
2.74k
  memset( m_pdCostSig,    0, sizeof( double ) *  maxNumCoeff );
1155
1156
2.74k
  m_bdpcm = 0;
1157
1158
2.74k
  const bool   needsSqrt2Scale = TU::needsSqrt2Scale( tu, compID );  // should always be false - transform-skipped blocks don't require sqrt(2) compensation.
1159
2.74k
  const bool   isTransformSkip = tu.mtsIdx[compID]==MTS_SKIP;
1160
2.74k
  const int    qBits = QUANT_SHIFT + qp.per(isTransformSkip) + (isTransformSkip ? 0 : transformShift) + (needsSqrt2Scale ? -1 : 0);  // Right shift of non-RDOQ quantizer;  level = (coeff*uiQ + offset)>>q_bits
1161
2.74k
  const int    quantisationCoefficient = g_quantScales[needsSqrt2Scale?1:0][qp.rem(isTransformSkip)];
1162
2.74k
  const double errorScale              = xGetErrScaleCoeff( TU::needsSqrt2Scale(tu, compID), width, height, qp.rem(isTransformSkip), maxLog2TrDynamicRange, channelBitDepth, isTransformSkip);
1163
1164
2.74k
  const TCoeff entropyCodingMaximum = ( 1 << maxLog2TrDynamicRange ) - 1;
1165
1166
2.74k
  uint32_t coeffLevels[3];
1167
2.74k
  double   coeffLevelError[4];
1168
1169
2.74k
  CoeffCodingContext cctx( tu, compID, tu.cs->slice->signDataHidingEnabled );
1170
2.74k
  const int sbSizeM1    = ( 1 << cctx.log2CGSize() ) - 1;
1171
2.74k
  double    baseCost    = 0;
1172
2.74k
  uint32_t  goRiceParam = 0;
1173
1174
2.74k
  double *costSigSubBlock = m_pdCostCoeffGroupSig;
1175
2.74k
  memset( costSigSubBlock, 0, ( maxNumCoeff >> cctx.log2CGSize() ) * sizeof( double ) );
1176
1177
2.74k
  const int sbNum = width * height >> cctx.log2CGSize();
1178
2.74k
  int scanPos;
1179
2.74k
  coeffGroupRDStats rdStats;
1180
1181
2.74k
  bool anySigCG = false;
1182
1183
2.74k
  int maxCtxBins = (cctx.maxNumCoeff() * 7) >> 2;
1184
2.74k
  cctx.remRegBins = maxCtxBins;
1185
1186
31.2k
  for( int sbId = 0; sbId < sbNum; sbId++ )
1187
28.5k
  {
1188
28.5k
    cctx.initSubblock( sbId );
1189
1190
28.5k
    int noCoeffCoded = 0;
1191
28.5k
    baseCost = 0.0;
1192
28.5k
    memset( &rdStats, 0, sizeof (coeffGroupRDStats));
1193
1194
28.5k
    rdStats.iNumSbbCtxBins = 0;
1195
1196
485k
    for( int scanPosInSB = 0; scanPosInSB <= sbSizeM1; scanPosInSB++ )
1197
456k
    {
1198
456k
      int lastPosCoded = sbSizeM1;
1199
456k
      scanPos = cctx.minSubPos() + scanPosInSB;
1200
      //===== quantization =====
1201
456k
      uint32_t blkPos = cctx.blockPos( scanPos );
1202
1203
      // set coeff
1204
456k
      const int64_t          tmpLevel    = int64_t( abs( srcCoeff[blkPos] ) ) * quantisationCoefficient;
1205
456k
      const Intermediate_Int levelDouble = (Intermediate_Int)std::min<int64_t>( tmpLevel, std::numeric_limits<Intermediate_Int>::max() - ( Intermediate_Int( 1 ) << ( qBits - 1 ) ) );
1206
1207
456k
      uint32_t roundAbsLevel = std::min<uint32_t>(uint32_t(entropyCodingMaximum), uint32_t((levelDouble + (Intermediate_Int(1) << (qBits - 1))) >> qBits));
1208
456k
      uint32_t minAbsLevel = (roundAbsLevel > 1 ? roundAbsLevel - 1 : 1);
1209
1210
456k
      uint32_t downAbsLevel = std::min<uint32_t>(uint32_t(entropyCodingMaximum), uint32_t(levelDouble >> qBits));
1211
456k
      uint32_t upAbsLevel = std::min<uint32_t>(uint32_t(entropyCodingMaximum), downAbsLevel + 1);
1212
1213
456k
      m_testedLevels = 0;
1214
456k
      coeffLevels[m_testedLevels++] = roundAbsLevel;
1215
1216
456k
      if (minAbsLevel != roundAbsLevel)
1217
456k
        coeffLevels[m_testedLevels++] = minAbsLevel;
1218
1219
456k
      int rightPixel, belowPixel, predPixel;
1220
1221
456k
      cctx.neighTS(rightPixel, belowPixel, scanPos, dstCoeff);
1222
456k
      predPixel = cctx.deriveModCoeff(rightPixel, belowPixel, upAbsLevel, 0);
1223
1224
456k
      if (upAbsLevel != roundAbsLevel && upAbsLevel != minAbsLevel && predPixel == 1)
1225
0
        coeffLevels[m_testedLevels++] = upAbsLevel;
1226
1227
456k
      double dErr = double(levelDouble);
1228
456k
      coeffLevelError[0] = dErr * dErr * errorScale;
1229
1230
456k
      costCoeff0[scanPos] = coeffLevelError[0];
1231
456k
      dstCoeff[blkPos]    = coeffLevels[0];
1232
1233
      //===== coefficient level estimation =====
1234
456k
            unsigned    ctxIdSig = cctx.sigCtxIdAbsTS( scanPos, dstCoeff );
1235
456k
            uint32_t    cLevel;
1236
456k
      const BinFracBits fracBitsPar = fracBits.getFracBitsArray( cctx.parityCtxIdAbsTS() );
1237
1238
      //goRiceParam = cctx.templateAbsSumTS( scanPos, dstCoeff );
1239
456k
      goRiceParam = 1;
1240
456k
      unsigned ctxIdSign = cctx.signCtxIdAbsTS(scanPos, dstCoeff, 0);
1241
456k
      const BinFracBits fracBitsSign = fracBits.getFracBitsArray(ctxIdSign);
1242
456k
      const uint8_t     sign         = srcCoeff[ blkPos ] < 0 ? 1 : 0;
1243
1244
456k
      DTRACE_COND( ( coeffLevels[0] != 0 ), g_trace_ctx, D_RDOQ_MORE, " uiCtxSig=%d", ctxIdSig );
1245
1246
456k
      unsigned gt1CtxId = cctx.lrg1CtxIdAbsTS(scanPos, dstCoeff, 0);
1247
456k
      const BinFracBits fracBitsGr1 = fracBits.getFracBitsArray(gt1CtxId);
1248
1249
456k
      const BinFracBits fracBitsSig = fracBits.getFracBitsArray( ctxIdSig );
1250
456k
      bool lastCoeff = false; //
1251
456k
      if (scanPosInSB == lastPosCoded && noCoeffCoded == 0)
1252
28.5k
      {
1253
28.5k
        lastCoeff = true;
1254
28.5k
      }
1255
456k
      int numUsedCtxBins = 0;
1256
456k
      cLevel = xGetCodedLevelTSPred(costCoeff[scanPos], costCoeff0[scanPos], costSig[scanPos], levelDouble, qBits, errorScale, coeffLevels, coeffLevelError,
1257
456k
                                    &fracBitsSig, fracBitsPar, cctx, fracBits, fracBitsSign, fracBitsGr1, sign, rightPixel, belowPixel, goRiceParam, lastCoeff, maxLog2TrDynamicRange, numUsedCtxBins);
1258
1259
456k
      cctx.remRegBins -= numUsedCtxBins;
1260
456k
      rdStats.iNumSbbCtxBins += numUsedCtxBins;
1261
1262
456k
      if (cLevel > 0)
1263
0
      {
1264
0
        noCoeffCoded++;
1265
0
      }
1266
1267
456k
      TCoeff level = cLevel;
1268
456k
      dstCoeff[blkPos] = (level != 0 && srcCoeff[blkPos] < 0) ? -level : level;
1269
456k
      baseCost           += costCoeff[ scanPos ];
1270
456k
      rdStats.d64SigCost += costSig[ scanPos ];
1271
1272
456k
      if( dstCoeff[ blkPos ] )
1273
0
      {
1274
0
        cctx.setSigGroup();
1275
0
        rdStats.d64CodedLevelandDist += costCoeff [ scanPos ] - costSig[ scanPos ];
1276
0
        rdStats.d64UncodedDist       += costCoeff0[ scanPos ];
1277
0
      }
1278
456k
    } //end for (iScanPosinCG)
1279
1280
28.5k
    if( !cctx.isSigGroup() )
1281
28.5k
    {
1282
28.5k
      const BinFracBits fracBitsSigGroup = fracBits.getFracBitsArray( cctx.sigGroupCtxId( true ) );
1283
28.5k
      baseCost += xGetRateSigCoeffGroup( fracBitsSigGroup, 0 ) - rdStats.d64SigCost;
1284
28.5k
      costSigSubBlock[cctx.subSetId()] = xGetRateSigCoeffGroup( fracBitsSigGroup, 0 );
1285
28.5k
      cctx.remRegBins += rdStats.iNumSbbCtxBins; // skip sub-block
1286
28.5k
    }
1287
0
    else if( sbId != sbNum - 1 || anySigCG )
1288
0
    {
1289
      // rd-cost if SigCoeffGroupFlag = 0, initialization
1290
0
      double costZeroSB = baseCost;
1291
1292
0
      const BinFracBits fracBitsSigGroup = fracBits.getFracBitsArray( cctx.sigGroupCtxId( true ) );
1293
1294
0
      baseCost   += xGetRateSigCoeffGroup( fracBitsSigGroup, 1 );
1295
0
      costZeroSB += xGetRateSigCoeffGroup( fracBitsSigGroup, 0 );
1296
0
      costSigSubBlock[ cctx.subSetId() ] = xGetRateSigCoeffGroup( fracBitsSigGroup, 1 );
1297
1298
0
      costZeroSB += rdStats.d64UncodedDist;         // distortion for resetting non-zero levels to zero levels
1299
0
      costZeroSB -= rdStats.d64CodedLevelandDist;   // distortion and level cost for keeping all non-zero levels
1300
0
      costZeroSB -= rdStats.d64SigCost;             // sig cost for all coeffs, including zero levels and non-zerl levels
1301
1302
0
      if( costZeroSB < baseCost )
1303
0
      {
1304
0
        cctx.resetSigGroup();
1305
0
        baseCost = costZeroSB;
1306
0
        costSigSubBlock[ cctx.subSetId() ] = xGetRateSigCoeffGroup( fracBitsSigGroup, 0 );
1307
0
        cctx.remRegBins += rdStats.iNumSbbCtxBins; // skip sub-block
1308
1309
0
        for( int scanPosInSB = 0; scanPosInSB <= sbSizeM1; scanPosInSB++ )
1310
0
        {
1311
0
          scanPos = cctx.minSubPos() + scanPosInSB;
1312
0
          uint32_t blkPos = cctx.blockPos( scanPos );
1313
1314
0
          if( dstCoeff[ blkPos ] )
1315
0
          {
1316
0
            dstCoeff[ blkPos ] = 0;
1317
0
            costCoeff[ scanPos ] = costCoeff0[ scanPos ];
1318
0
            costSig[ scanPos] = 0;
1319
0
          }
1320
0
        }
1321
0
      }
1322
0
      else
1323
0
      {
1324
0
        anySigCG = true;
1325
0
      }
1326
0
    }
1327
28.5k
  }
1328
1329
  //===== estimate last position =====
1330
459k
  for( int scanPos = 0; scanPos < maxNumCoeff; scanPos++ )
1331
456k
  {
1332
456k
    int blkPos = cctx.blockPos( scanPos );
1333
456k
    TCoeff level = dstCoeff[ blkPos ];
1334
456k
    absSum += abs(level);
1335
456k
  }
1336
2.74k
}
1337
1338
void QuantRDOQ::forwardRDPCM( TransformUnit& tu, const ComponentID compID, const CCoeffBuf& coeffs, TCoeff &absSum, const QpParam& qp, const Ctx &ctx )
1339
86.1k
{
1340
86.1k
  const FracBitsAccess& fracBits = ctx.getFracBitsAcess();
1341
1342
86.1k
  const SPS &sps = *tu.cs->sps;
1343
86.1k
  const CompArea& rect = tu.blocks[compID];
1344
86.1k
  const uint32_t width = rect.width;
1345
86.1k
  const uint32_t height = rect.height;
1346
86.1k
  const ChannelType chType = toChannelType(compID);
1347
86.1k
  const int channelBitDepth = sps.bitDepths[chType];
1348
1349
86.1k
  const int  maxLog2TrDynamicRange = sps.getMaxLog2TrDynamicRange();
1350
86.1k
  const int  dirMode = tu.cu->bdpcmM[toChannelType(compID)];
1351
1352
86.1k
  const int transformShift = getTransformShift(channelBitDepth, rect.size(), maxLog2TrDynamicRange);
1353
1354
86.1k
  const uint32_t maxNumCoeff = rect.area();
1355
1356
86.1k
  CHECK(compID >= MAX_NUM_TBLOCKS, "Invalid component ID");
1357
1358
86.1k
  int scalingListType = getScalingListType(tu.cu->predMode, compID);
1359
86.1k
  CHECK(scalingListType >= SCALING_LIST_NUM, "Invalid scaling list");
1360
1361
86.1k
  const TCoeff    *srcCoeff = coeffs.buf;
1362
86.1k
        TCoeffSig *dstCoeff = tu.getCoeffs(compID).buf;
1363
1364
86.1k
  double *costCoeff = m_pdCostCoeff;
1365
86.1k
  double *costSig = m_pdCostSig;
1366
86.1k
  double *costCoeff0 = m_pdCostCoeff0;
1367
1368
86.1k
  memset(m_pdCostCoeff, 0, sizeof(double) *  maxNumCoeff);
1369
86.1k
  memset(m_pdCostSig, 0, sizeof(double) *  maxNumCoeff);
1370
86.1k
  memset(m_fullCoeff, 0, sizeof(TCoeff) * maxNumCoeff);
1371
1372
86.1k
  m_bdpcm = dirMode;
1373
1374
86.1k
  const bool   needsSqrt2Scale = TU::needsSqrt2Scale(tu, compID);  // should always be false - transform-skipped blocks don't require sqrt(2) compensation.
1375
86.1k
  const bool   isTransformSkip = tu.mtsIdx[compID]==MTS_SKIP;
1376
86.1k
  const int    qBits = QUANT_SHIFT + qp.per(isTransformSkip) + (isTransformSkip? 0 : transformShift) + ( needsSqrt2Scale ? -1 : 0);  // Right shift of non-RDOQ quantizer;  level = (coeff*uiQ + offset)>>q_bits
1377
86.1k
  const int    quantisationCoefficient = g_quantScales[needsSqrt2Scale ? 1 : 0][qp.rem(isTransformSkip)];
1378
86.1k
  const double errorScale = xGetErrScaleCoeff(TU::needsSqrt2Scale(tu, compID), width, height, qp.rem(isTransformSkip), maxLog2TrDynamicRange, channelBitDepth, isTransformSkip);
1379
1380
86.1k
  TrQuantParams trQuantParams;
1381
86.1k
  trQuantParams.rightShift = (IQUANT_SHIFT - ((isTransformSkip ? 0 : transformShift) + qp.per(isTransformSkip)));
1382
86.1k
  trQuantParams.qScale = g_invQuantScales[needsSqrt2Scale ? 1 : 0][qp.rem(isTransformSkip)];
1383
1384
86.1k
  const TCoeff entropyCodingMaximum = (1 << maxLog2TrDynamicRange) - 1;
1385
1386
86.1k
  uint32_t coeffLevels[3];
1387
86.1k
  double   coeffLevelError[4];
1388
1389
86.1k
  CoeffCodingContext cctx(tu, compID, tu.cs->slice->signDataHidingEnabled);
1390
86.1k
  const int sbSizeM1 = (1 << cctx.log2CGSize()) - 1;
1391
86.1k
  double    baseCost = 0;
1392
86.1k
  uint32_t  goRiceParam = 0;
1393
1394
86.1k
  double *costSigSubBlock = m_pdCostCoeffGroupSig;
1395
86.1k
  memset(costSigSubBlock, 0, (maxNumCoeff >> cctx.log2CGSize()) * sizeof(double));
1396
1397
86.1k
  const int sbNum = width * height >> cctx.log2CGSize();
1398
86.1k
  int scanPos;
1399
86.1k
  coeffGroupRDStats rdStats;
1400
1401
86.1k
  bool anySigCG = false;
1402
1403
86.1k
  int maxCtxBins = (cctx.maxNumCoeff() * 7) >> 2;
1404
86.1k
  cctx.remRegBins = maxCtxBins;
1405
1406
627k
  for (int sbId = 0; sbId < sbNum; sbId++)
1407
541k
  {
1408
541k
    cctx.initSubblock(sbId);
1409
1410
541k
    int noCoeffCoded = 0;
1411
541k
    baseCost = 0.0;
1412
541k
    memset(&rdStats, 0, sizeof(coeffGroupRDStats));
1413
541k
    rdStats.iNumSbbCtxBins = 0;
1414
1415
9.20M
    for (int scanPosInSB = 0; scanPosInSB <= sbSizeM1; scanPosInSB++)
1416
8.66M
    {
1417
8.66M
      int lastPosCoded = sbSizeM1;
1418
8.66M
      scanPos = cctx.minSubPos() + scanPosInSB;
1419
      //===== quantization =====
1420
8.66M
      uint32_t blkPos = cctx.blockPos(scanPos);
1421
1422
8.66M
      const int posX = cctx.posX(scanPos);
1423
8.66M
      const int posY = cctx.posY(scanPos);
1424
8.66M
      const int posS = (1 == dirMode) ? posX : posY;
1425
8.66M
      const int posNb = (1 == dirMode) ? (posX - 1) + posY * coeffs.stride : posX + (posY - 1) * coeffs.stride;
1426
8.66M
      TCoeff predCoeff = (0 != posS) ? m_fullCoeff[posNb] : 0;
1427
1428
      // set coeff
1429
8.66M
      const int64_t          tmpLevel = int64_t(abs(srcCoeff[blkPos] - predCoeff)) * quantisationCoefficient;
1430
8.66M
      const Intermediate_Int levelDouble = (Intermediate_Int)std::min<int64_t>(tmpLevel, std::numeric_limits<Intermediate_Int>::max() - (Intermediate_Int(1) << (qBits - 1)));
1431
8.66M
      uint32_t roundAbsLevel = std::min<uint32_t>(uint32_t(entropyCodingMaximum), uint32_t((levelDouble + (Intermediate_Int(1) << (qBits - 1))) >> qBits));
1432
8.66M
      uint32_t minAbsLevel = (roundAbsLevel > 1 ? roundAbsLevel - 1 : 1);
1433
1434
8.66M
      m_testedLevels = 0;
1435
8.66M
      coeffLevels[m_testedLevels++] = roundAbsLevel;
1436
1437
8.66M
      if (minAbsLevel != roundAbsLevel)
1438
8.24M
        coeffLevels[m_testedLevels++] = minAbsLevel;
1439
1440
8.66M
      double dErr = double(levelDouble);
1441
8.66M
      coeffLevelError[0]  = dErr * dErr * errorScale;
1442
1443
8.66M
      costCoeff0[scanPos] = coeffLevelError[0];
1444
8.66M
      dstCoeff[blkPos]    = coeffLevels[0];
1445
1446
      //===== coefficient level estimation =====
1447
8.66M
      unsigned    ctxIdSig = cctx.sigCtxIdAbsTS(scanPos, dstCoeff);
1448
8.66M
      uint32_t    cLevel;
1449
8.66M
      const BinFracBits fracBitsPar = fracBits.getFracBitsArray(cctx.parityCtxIdAbsTS());
1450
1451
      //goRiceParam = cctx.templateAbsSumTS(scanPos, dstCoeff);
1452
8.66M
      goRiceParam = 1;
1453
8.66M
      unsigned ctxIdSign = cctx.signCtxIdAbsTS(scanPos, dstCoeff, dirMode);
1454
8.66M
      const BinFracBits fracBitsSign = fracBits.getFracBitsArray(ctxIdSign);
1455
8.66M
      const uint8_t     sign = srcCoeff[blkPos] - predCoeff < 0 ? 1 : 0;
1456
8.66M
      unsigned gt1CtxId = cctx.lrg1CtxIdAbsTS(scanPos, dstCoeff, dirMode);
1457
8.66M
      const BinFracBits fracBitsGr1 = fracBits.getFracBitsArray(gt1CtxId);
1458
1459
8.66M
      DTRACE_COND((dstCoeff[blkPos] != 0), g_trace_ctx, D_RDOQ_MORE, " uiCtxSig=%d", ctxIdSig);
1460
1461
8.66M
      const BinFracBits fracBitsSig = fracBits.getFracBitsArray(ctxIdSig);
1462
8.66M
      bool lastCoeff = false; //
1463
8.66M
      if (scanPosInSB == lastPosCoded && noCoeffCoded == 0)
1464
442k
      {
1465
442k
        lastCoeff = true;
1466
442k
      }
1467
8.66M
      int rightPixel, belowPixel;
1468
8.66M
      cctx.neighTS(rightPixel, belowPixel, scanPos, dstCoeff);
1469
8.66M
      int numUsedCtxBins = 0;
1470
8.66M
      cLevel = xGetCodedLevelTSPred(costCoeff[scanPos], costCoeff0[scanPos], costSig[scanPos], levelDouble, qBits, errorScale, coeffLevels, coeffLevelError,
1471
8.66M
        &fracBitsSig, fracBitsPar, cctx, fracBits, fracBitsSign, fracBitsGr1, sign, rightPixel, belowPixel, goRiceParam, lastCoeff, maxLog2TrDynamicRange, numUsedCtxBins);
1472
8.66M
      cctx.remRegBins -= numUsedCtxBins;
1473
8.66M
      rdStats.iNumSbbCtxBins += numUsedCtxBins;
1474
1475
8.66M
      if (cLevel > 0)
1476
416k
      {
1477
416k
        noCoeffCoded++;
1478
416k
      }
1479
8.66M
      dstCoeff[blkPos] = cLevel;
1480
1481
8.66M
      if (sign)
1482
3.00M
      {
1483
3.00M
        dstCoeff[blkPos] = -dstCoeff[blkPos];
1484
3.00M
      }
1485
1486
8.66M
      xDequantSample( m_fullCoeff[blkPos], dstCoeff[blkPos], trQuantParams );
1487
8.66M
      m_fullCoeff[blkPos] += predCoeff;
1488
1489
8.66M
      baseCost += costCoeff[scanPos];
1490
8.66M
      rdStats.d64SigCost += costSig[scanPos];
1491
1492
8.66M
      if (dstCoeff[blkPos])
1493
416k
      {
1494
416k
        cctx.setSigGroup();
1495
416k
        rdStats.d64CodedLevelandDist += costCoeff[scanPos] - costSig[scanPos];
1496
416k
        rdStats.d64UncodedDist += costCoeff0[scanPos];
1497
416k
      }
1498
8.66M
    } //end for (iScanPosinCG)
1499
1500
541k
    if (!cctx.isSigGroup())
1501
431k
    {
1502
431k
      const BinFracBits fracBitsSigGroup = fracBits.getFracBitsArray(cctx.sigGroupCtxId(true));
1503
431k
      baseCost += xGetRateSigCoeffGroup(fracBitsSigGroup, 0) - rdStats.d64SigCost;
1504
431k
      costSigSubBlock[cctx.subSetId()] = xGetRateSigCoeffGroup(fracBitsSigGroup, 0);
1505
431k
      cctx.remRegBins += rdStats.iNumSbbCtxBins; // skip sub-block
1506
431k
    }
1507
109k
    else if (sbId != sbNum - 1 || anySigCG)
1508
105k
    {
1509
      // rd-cost if SigCoeffGroupFlag = 0, initialization
1510
105k
      double costZeroSB = baseCost;
1511
1512
105k
      const BinFracBits fracBitsSigGroup = fracBits.getFracBitsArray(cctx.sigGroupCtxId(true));
1513
1514
105k
      baseCost += xGetRateSigCoeffGroup(fracBitsSigGroup, 1);
1515
105k
      costZeroSB += xGetRateSigCoeffGroup(fracBitsSigGroup, 0);
1516
105k
      costSigSubBlock[cctx.subSetId()] = xGetRateSigCoeffGroup(fracBitsSigGroup, 1);
1517
1518
105k
      costZeroSB += rdStats.d64UncodedDist;         // distortion for resetting non-zero levels to zero levels
1519
105k
      costZeroSB -= rdStats.d64CodedLevelandDist;   // distortion and level cost for keeping all non-zero levels
1520
105k
      costZeroSB -= rdStats.d64SigCost;             // sig cost for all coeffs, including zero levels and non-zerl levels
1521
1522
105k
      if (costZeroSB < baseCost)
1523
11.0k
      {
1524
11.0k
        cctx.resetSigGroup();
1525
11.0k
        baseCost = costZeroSB;
1526
11.0k
        costSigSubBlock[cctx.subSetId()] = xGetRateSigCoeffGroup(fracBitsSigGroup, 0);
1527
11.0k
        cctx.remRegBins += rdStats.iNumSbbCtxBins; // skip sub-block
1528
1529
187k
        for (int scanPosInSB = 0; scanPosInSB <= sbSizeM1; scanPosInSB++)
1530
176k
        {
1531
176k
          scanPos = cctx.minSubPos() + scanPosInSB;
1532
176k
          uint32_t blkPos = cctx.blockPos(scanPos);
1533
1534
176k
          const int posX = cctx.posX(scanPos);
1535
176k
          const int posY = cctx.posY(scanPos);
1536
176k
          const int posS = (1 == dirMode) ? posX : posY;
1537
176k
          const int posNb = (1 == dirMode) ? (posX - 1) + posY * coeffs.stride : posX + (posY - 1) * coeffs.stride;
1538
176k
          m_fullCoeff[scanPos] = (0 != posS) ? m_fullCoeff[posNb] : 0;
1539
1540
176k
          if (dstCoeff[blkPos])
1541
11.3k
          {
1542
11.3k
            dstCoeff[blkPos] = 0;
1543
11.3k
            costCoeff[scanPos] = costCoeff0[scanPos];
1544
11.3k
            costSig[scanPos] = 0;
1545
11.3k
          }
1546
176k
        }
1547
11.0k
      }
1548
94.3k
      else
1549
94.3k
      {
1550
94.3k
        anySigCG = true;
1551
94.3k
      }
1552
105k
    }
1553
541k
  }
1554
1555
  //===== estimate last position =====
1556
8.74M
  for (int scanPos = 0; scanPos < maxNumCoeff; scanPos++)
1557
8.66M
  {
1558
8.66M
    int blkPos = cctx.blockPos(scanPos);
1559
8.66M
    TCoeff level = dstCoeff[blkPos];
1560
8.66M
    absSum += abs(level);
1561
8.66M
  }
1562
86.1k
}
1563
1564
void QuantRDOQ::xDequantSample(TCoeff& pRes, TCoeffSig& coeff, const TrQuantParams& trQuantParams)
1565
8.66M
{
1566
  // xDequant
1567
8.66M
  if (trQuantParams.rightShift > 0)
1568
6.20M
  {
1569
6.20M
    const Intermediate_Int qAdd = Intermediate_Int(1) << (trQuantParams.rightShift - 1);
1570
6.20M
    pRes = TCoeff((Intermediate_Int(coeff) * trQuantParams.qScale + qAdd) >> trQuantParams.rightShift);
1571
6.20M
  }
1572
2.46M
  else
1573
2.46M
  {
1574
2.46M
    pRes = TCoeff((Intermediate_Int(coeff) * trQuantParams.qScale) *(1<< -trQuantParams.rightShift));
1575
2.46M
  }
1576
8.66M
}
1577
1578
inline uint32_t QuantRDOQ::xGetCodedLevelTSPred(double&            rd64CodedCost,
1579
  double&            rd64CodedCost0,
1580
  double&            rd64CodedCostSig,
1581
  Intermediate_Int    levelDouble,
1582
  int                 qBits,
1583
  double              errorScale,
1584
  uint32_t coeffLevels[],
1585
  double coeffLevelError[],
1586
  const BinFracBits* fracBitsSig,
1587
  const BinFracBits& fracBitsPar,
1588
  CoeffCodingContext& cctx,
1589
  const FracBitsAccess& fracBitsAccess,
1590
  const BinFracBits& fracBitsSign,
1591
  const BinFracBits& fracBitsGt1,
1592
  const uint8_t      sign,
1593
  int                rightPixel,
1594
  int                belowPixel,
1595
  uint16_t           ricePar,
1596
  bool               isLast,
1597
  const int          maxLog2TrDynamicRange,
1598
  int&               numUsedCtxBins
1599
) const
1600
9.11M
{
1601
9.11M
  double currCostSig = 0;
1602
9.11M
  uint32_t   bestAbsLevel = 0;
1603
9.11M
  numUsedCtxBins = 0;
1604
9.11M
  int numBestCtxBin = 0;
1605
9.11M
  if (!isLast && coeffLevels[0] < 3)
1606
8.29M
  {
1607
8.29M
    if (cctx.remRegBins >= 4)
1608
8.15M
    rd64CodedCostSig = xGetRateSigCoef(*fracBitsSig, 0);
1609
147k
    else
1610
147k
      rd64CodedCostSig = xGetICost(1 << SCALE_BITS);
1611
8.29M
    rd64CodedCost = rd64CodedCost0 + rd64CodedCostSig;
1612
8.29M
    if (cctx.remRegBins >= 4)
1613
8.15M
      numUsedCtxBins++;
1614
8.29M
    if (coeffLevels[0] == 0)
1615
7.84M
    {
1616
7.84M
      return bestAbsLevel;
1617
7.84M
    }
1618
8.29M
  }
1619
821k
  else
1620
821k
  {
1621
821k
    rd64CodedCost = MAX_DOUBLE;
1622
821k
  }
1623
1624
1.27M
  if (!isLast)
1625
800k
  {
1626
800k
    if (cctx.remRegBins >= 4)
1627
765k
      currCostSig = xGetRateSigCoef(*fracBitsSig, 1);
1628
35.8k
    else
1629
35.8k
      currCostSig = xGetICost(1 << SCALE_BITS);
1630
800k
    if (coeffLevels[0] >= 3 && cctx.remRegBins >= 4)
1631
325k
      numUsedCtxBins++;
1632
800k
  }
1633
1634
3.39M
  for (int errorInd = 1; errorInd <= m_testedLevels; errorInd++)
1635
2.12M
  {
1636
2.12M
    int absLevel = coeffLevels[errorInd - 1];
1637
2.12M
    double dErr = 0.0;
1638
2.12M
    dErr = double(levelDouble - (Intermediate_Int(absLevel) << qBits));
1639
2.12M
    coeffLevelError[errorInd] = dErr * dErr * errorScale;
1640
2.12M
    int modAbsLevel = absLevel;
1641
2.12M
    if (cctx.remRegBins >= 4) 
1642
2.06M
    {
1643
2.06M
      modAbsLevel = cctx.deriveModCoeff(rightPixel, belowPixel, absLevel, m_bdpcm);
1644
2.06M
    }
1645
2.12M
    int numCtxBins = 0;
1646
2.12M
    double dCurrCost = coeffLevelError[errorInd] + xGetICost(xGetICRateTS(modAbsLevel, fracBitsPar, cctx, fracBitsAccess, fracBitsSign, fracBitsGt1, numCtxBins, sign, ricePar, maxLog2TrDynamicRange));
1647
1648
2.12M
    if (cctx.remRegBins >= 4)
1649
2.06M
      dCurrCost += currCostSig; // if cctx.numCtxBins < 4, xGetICRateTS return rate including sign cost. dont need to add any more
1650
1651
2.12M
    if (dCurrCost < rd64CodedCost)
1652
919k
    {
1653
919k
      bestAbsLevel = absLevel;
1654
919k
      rd64CodedCost = dCurrCost;
1655
919k
      rd64CodedCostSig = currCostSig;
1656
919k
      numBestCtxBin = numCtxBins;
1657
919k
    }
1658
2.12M
  }
1659
1.27M
  numUsedCtxBins += numBestCtxBin;
1660
1.27M
  return bestAbsLevel;
1661
9.11M
}
1662
1663
inline int QuantRDOQ::xGetICRateTS( const uint32_t            absLevel,
1664
                                    const BinFracBits&        fracBitsPar,
1665
                                    const CoeffCodingContext& cctx,
1666
                                    const FracBitsAccess&     fracBitsAccess,
1667
                                    const BinFracBits&        fracBitsSign,
1668
                                    const BinFracBits&        fracBitsGt1,
1669
                                    int&                      numCtxBins,
1670
                                    const uint8_t             sign,
1671
                                    const uint16_t            ricePar,
1672
                                    const int                 maxLog2TrDynamicRange  ) const
1673
2.12M
{
1674
 
1675
2.12M
  if (cctx.remRegBins < 4) // Full by-pass coding 
1676
66.4k
  {
1677
66.4k
    int rate = absLevel ? (1 << SCALE_BITS) : 0; // 1 bit to signal sign of non-zero 
1678
1679
66.4k
    uint32_t symbol = absLevel;
1680
1681
66.4k
    uint32_t length;
1682
66.4k
    const int threshold = COEF_REMAIN_BIN_REDUCTION;
1683
66.4k
    if (symbol < (threshold << ricePar))
1684
32.4k
    {
1685
32.4k
      length = symbol >> ricePar;
1686
32.4k
      rate += (length + 1 + ricePar) << SCALE_BITS;
1687
32.4k
    }
1688
34.0k
    else
1689
34.0k
    {
1690
34.0k
      length = ricePar;
1691
34.0k
      symbol = symbol - (threshold << ricePar);
1692
155k
      while (symbol >= (1 << length))
1693
121k
      {
1694
121k
        symbol -= (1 << (length++));
1695
121k
      }
1696
34.0k
      rate += (threshold + length + 1 - ricePar + length) << SCALE_BITS;
1697
34.0k
    }
1698
1699
66.4k
    return rate;
1700
66.4k
  }
1701
1702
2.06M
  else if (cctx.remRegBins >= 4 && cctx.remRegBins < 8) // First pass context coding and all by-pass coding ( Sign flag is not counted here)
1703
12.0k
  {
1704
12.0k
    int rate = fracBitsSign.intBits[sign]; // sign bits
1705
12.0k
    if (absLevel)
1706
11.4k
      numCtxBins++;
1707
1708
12.0k
    if (absLevel > 1)
1709
8.22k
    {
1710
8.22k
      rate += fracBitsGt1.intBits[1];
1711
8.22k
      rate += fracBitsPar.intBits[(absLevel - 2) & 1];
1712
1713
8.22k
      numCtxBins += 2;
1714
1715
8.22k
      int cutoffVal = 2;
1716
1717
8.22k
      if (absLevel >= cutoffVal)
1718
8.22k
      {
1719
8.22k
        uint32_t symbol = (absLevel - cutoffVal) >> 1;
1720
8.22k
        uint32_t length;
1721
8.22k
        const int threshold = COEF_REMAIN_BIN_REDUCTION;
1722
8.22k
        if (symbol < (threshold << ricePar))
1723
5.61k
        {
1724
5.61k
          length = symbol >> ricePar;
1725
5.61k
          rate += (length + 1 + ricePar) << SCALE_BITS;
1726
5.61k
        }
1727
2.61k
        else
1728
2.61k
        {
1729
2.61k
          length = ricePar;
1730
2.61k
          symbol = symbol - (threshold << ricePar);
1731
10.7k
          while (symbol >= (1 << length))
1732
8.16k
          {
1733
8.16k
            symbol -= (1 << (length++));
1734
8.16k
          }
1735
2.61k
          rate += (threshold + length + 1 - ricePar + length) << SCALE_BITS;
1736
2.61k
        }
1737
8.22k
      }
1738
8.22k
    }
1739
3.80k
    else if (absLevel == 1)
1740
3.26k
    {
1741
3.26k
      rate += fracBitsGt1.intBits[0];
1742
3.26k
      numCtxBins++;
1743
3.26k
    }
1744
538
    else
1745
538
    {
1746
538
      rate = 0;
1747
538
    }
1748
12.0k
    return rate;
1749
12.0k
  }
1750
    
1751
2.04M
  int rate = fracBitsSign.intBits[sign];
1752
1753
2.04M
  if (absLevel)
1754
1.59M
    numCtxBins++;
1755
1756
2.04M
  if( absLevel > 1 )
1757
686k
  {
1758
686k
    rate += fracBitsGt1.intBits[1];
1759
686k
    rate += fracBitsPar.intBits[( absLevel - 2 ) & 1];
1760
686k
    numCtxBins += 2;
1761
1762
686k
          int cutoffVal = 2;
1763
686k
    const int numGtBins = 4;
1764
3.43M
    for( int i = 0; i < numGtBins; i++ )
1765
2.74M
    {
1766
2.74M
      if( absLevel >= cutoffVal )
1767
1.95M
      {
1768
1.95M
        const uint16_t ctxGtX = cctx.greaterXCtxIdAbsTS( cutoffVal>>1 );
1769
1.95M
        const BinFracBits &fracBitsGtX = fracBitsAccess.getFracBitsArray( ctxGtX );
1770
1.95M
        unsigned gtX = ( absLevel >= ( cutoffVal + 2 ) );
1771
1.95M
        rate += fracBitsGtX.intBits[gtX];
1772
1.95M
        numCtxBins++;
1773
1.95M
      }
1774
2.74M
      cutoffVal += 2;
1775
2.74M
    }
1776
1777
686k
    if( absLevel >= cutoffVal )
1778
338k
    {
1779
338k
      uint32_t symbol = ( absLevel - cutoffVal ) >> 1;
1780
338k
      uint32_t length;
1781
338k
      const int threshold = COEF_REMAIN_BIN_REDUCTION;
1782
338k
      if( symbol < ( threshold << ricePar ) )
1783
121k
      {
1784
121k
        length = symbol >> ricePar;
1785
121k
        rate  += ( length + 1 + ricePar ) << SCALE_BITS;
1786
121k
      }
1787
216k
      else
1788
216k
      {
1789
216k
        length = ricePar;
1790
216k
        symbol = symbol - ( threshold << ricePar );
1791
923k
        while( symbol >= ( 1 << length ) )
1792
707k
        {
1793
707k
          symbol -= ( 1 << ( length++ ) );
1794
707k
        }
1795
216k
        rate += ( threshold + length + 1 - ricePar + length ) << SCALE_BITS;
1796
216k
      }
1797
338k
    }
1798
686k
  }
1799
1.36M
  else if( absLevel == 1 )
1800
904k
  {
1801
904k
    rate += fracBitsGt1.intBits[0];
1802
904k
    numCtxBins++;
1803
904k
  }
1804
457k
  else
1805
457k
  {
1806
457k
    rate = 0;
1807
457k
  }
1808
2.04M
  return rate;
1809
2.12M
}
1810
1811
} // namespace vvenc
1812
1813
//! \}
1814