Coverage Report

Created: 2026-05-16 06:41

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/vvenc/source/Lib/CommonLib/QuantRDOQ.cpp
Line
Count
Source
1
/* -----------------------------------------------------------------------------
2
The copyright in this software is being made available under the Clear BSD
3
License, included below. No patent rights, trademark rights and/or 
4
other Intellectual Property Rights other than the copyrights concerning 
5
the Software are granted under this license.
6
7
The Clear BSD License
8
9
Copyright (c) 2019-2026, Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. & The VVenC Authors.
10
All rights reserved.
11
12
Redistribution and use in source and binary forms, with or without modification,
13
are permitted (subject to the limitations in the disclaimer below) provided that
14
the following conditions are met:
15
16
     * Redistributions of source code must retain the above copyright notice,
17
     this list of conditions and the following disclaimer.
18
19
     * Redistributions in binary form must reproduce the above copyright
20
     notice, this list of conditions and the following disclaimer in the
21
     documentation and/or other materials provided with the distribution.
22
23
     * Neither the name of the copyright holder nor the names of its
24
     contributors may be used to endorse or promote products derived from this
25
     software without specific prior written permission.
26
27
NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
28
THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
29
CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
31
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
32
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
33
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
34
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
35
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
36
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38
POSSIBILITY OF SUCH DAMAGE.
39
40
------------------------------------------------------------------------------------------- */
41
42
43
/** \file     QuantRDOQ.cpp
44
    \brief    transform and quantization class
45
*/
46
47
#include "QuantRDOQ.h"
48
#include "UnitTools.h"
49
#include "ContextModelling.h"
50
#include "CodingStructure.h"
51
#include "dtrace_next.h"
52
#include "dtrace_buffer.h"
53
54
#include <stdlib.h>
55
#include <memory.h>
56
57
//! \ingroup CommonLib
58
//! \{
59
60
namespace vvenc {
61
62
struct coeffGroupRDStats
63
{
64
  int    iNNZbeforePos0;
65
  double d64CodedLevelandDist; // distortion and level cost only
66
  double d64UncodedDist;    // all zero coded block distortion
67
  double d64SigCost;
68
  double d64SigCost_0;
69
 int   iNumSbbCtxBins;
70
};
71
72
73
//! \ingroup CommonLib
74
//! \{
75
76
// ====================================================================================================================
77
// Constants
78
// ====================================================================================================================
79
80
81
// ====================================================================================================================
82
// Static functions
83
// ====================================================================================================================
84
85
// ====================================================================================================================
86
// QuantRDOQ class member functions
87
// ====================================================================================================================
88
89
90
19.6k
QuantRDOQ::QuantRDOQ( const Quant* other, bool useScalingLists ) : Quant( other, useScalingLists )
91
19.6k
{
92
93
19.6k
  const QuantRDOQ *rdoq = dynamic_cast<const QuantRDOQ*>( other );
94
19.6k
  CHECK( other && !rdoq, "The RDOQ cast must be successfull!" );
95
19.6k
  xInitScalingList( rdoq );
96
19.6k
}
97
98
QuantRDOQ::~QuantRDOQ()
99
19.6k
{
100
19.6k
  xDestroyScalingList();
101
19.6k
}
102
103
104
105
106
/** Get the best level in RD sense
107
 *
108
 * \returns best quantized transform level for given scan position
109
 *
110
 * This method calculates the best quantized transform level for a given scan position.
111
 */
112
inline uint32_t QuantRDOQ::xGetCodedLevel( double&            rd64CodedCost,
113
                                       double&            rd64CodedCost0,
114
                                       double&            rd64CodedCostSig,
115
                                       Intermediate_Int   lLevelDouble,
116
                                       uint32_t               uiMaxAbsLevel,
117
                                       const BinFracBits* fracBitsSig,
118
                                       const BinFracBits& fracBitsPar,
119
                                       const BinFracBits& fracBitsGt1,
120
                                       const BinFracBits& fracBitsGt2,
121
                                       const int          remRegBins,
122
                                       unsigned           goRiceZero,
123
                                       uint16_t             ui16AbsGoRice,
124
                                       int                iQBits,
125
                                       double             errorScale,
126
                                       bool               bLast,
127
                                       const int          maxLog2TrDynamicRange
128
                                     ) const
129
0
{
130
0
  double dCurrCostSig   = 0;
131
0
  uint32_t   uiBestAbsLevel = 0;
132
133
0
  if( !bLast && uiMaxAbsLevel < 3 )
134
0
  {
135
0
    rd64CodedCostSig    = xGetRateSigCoef( *fracBitsSig, 0 );
136
0
    rd64CodedCost       = rd64CodedCost0 + rd64CodedCostSig;
137
0
    if( uiMaxAbsLevel == 0 )
138
0
    {
139
0
      return uiBestAbsLevel;
140
0
    }
141
0
  }
142
0
  else
143
0
  {
144
0
    rd64CodedCost       = MAX_DOUBLE;
145
0
  }
146
147
0
  if( !bLast )
148
0
  {
149
0
    dCurrCostSig        = xGetRateSigCoef( *fracBitsSig, 1 );
150
0
  }
151
152
0
  uint32_t uiMinAbsLevel    = ( uiMaxAbsLevel > 1 ? uiMaxAbsLevel - 1 : 1 );
153
0
  for( int uiAbsLevel  = uiMaxAbsLevel; uiAbsLevel >= uiMinAbsLevel ; uiAbsLevel-- )
154
0
  {
155
0
    double dErr         = double( lLevelDouble  - ( Intermediate_Int(uiAbsLevel) << iQBits ) );
156
157
0
    double dCurrCost    = dErr * dErr * errorScale + xGetICost( xGetICRate( uiAbsLevel, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, ui16AbsGoRice, maxLog2TrDynamicRange ) );
158
0
    dCurrCost          += dCurrCostSig;
159
160
0
    if( dCurrCost < rd64CodedCost )
161
0
    {
162
0
      uiBestAbsLevel    = uiAbsLevel;
163
0
      rd64CodedCost     = dCurrCost;
164
0
      rd64CodedCostSig  = dCurrCostSig;
165
0
    }
166
0
  }
167
168
0
  return uiBestAbsLevel;
169
0
}
170
171
/** Calculates the cost for specific absolute transform level
172
 * \param uiAbsLevel scaled quantized level
173
 * \param ui16CtxNumOne current ctxInc for coeff_abs_level_greater1 (1st bin of coeff_abs_level_minus1 in AVC)
174
 * \param ui16CtxNumAbs current ctxInc for coeff_abs_level_greater2 (remaining bins of coeff_abs_level_minus1 in AVC)
175
 * \param ui16AbsGoRice Rice parameter for coeff_abs_level_minus3
176
 * \param c1Idx
177
 * \param c2Idx
178
 * \param useLimitedPrefixLength
179
 * \param maxLog2TrDynamicRange
180
 * \returns cost of given absolute transform level
181
 */
182
inline int QuantRDOQ::xGetICRate( const uint32_t         uiAbsLevel,
183
                                  const BinFracBits& fracBitsPar,
184
                                  const BinFracBits& fracBitsGt1,
185
                                  const BinFracBits& fracBitsGt2,
186
                                  const int          remRegBins,
187
                                  unsigned           goRiceZero,
188
                                  const uint16_t       ui16AbsGoRice,
189
                                  const int          maxLog2TrDynamicRange  ) const
190
0
{
191
0
  if( remRegBins < 4 )
192
0
  {
193
0
    int       iRate   = int( xGetIEPRate() ); // cost of sign bit
194
0
    uint32_t  symbol  = ( uiAbsLevel == 0 ? goRiceZero : uiAbsLevel <= goRiceZero ? uiAbsLevel-1 : uiAbsLevel );
195
0
    uint32_t  length;
196
0
    const int threshold = COEF_REMAIN_BIN_REDUCTION;
197
0
    if( symbol < ( threshold << ui16AbsGoRice ) )
198
0
    {
199
0
      length = symbol >> ui16AbsGoRice;
200
0
      iRate += ( length + 1 + ui16AbsGoRice ) << SCALE_BITS;
201
0
    }
202
0
    else
203
0
    {
204
0
      length = ui16AbsGoRice;
205
0
      symbol = symbol - ( threshold << ui16AbsGoRice );
206
0
      while( symbol >= ( 1 << length ) )
207
0
      {
208
0
        symbol -= ( 1 << ( length++ ) );
209
0
      }
210
0
      iRate += ( threshold + length + 1 - ui16AbsGoRice + length ) << SCALE_BITS;
211
0
    }
212
0
    return iRate;
213
0
  }
214
215
0
  int iRate = int( xGetIEPRate() ); // cost of sign bit
216
0
  const uint32_t cthres = 4;
217
0
  if( uiAbsLevel >= cthres )
218
0
  {
219
0
    uint32_t symbol = ( uiAbsLevel - cthres ) >> 1;
220
0
    uint32_t length;
221
0
    const int threshold = COEF_REMAIN_BIN_REDUCTION;
222
0
    if( symbol < ( threshold << ui16AbsGoRice ) )
223
0
    {
224
0
      length = symbol >> ui16AbsGoRice;
225
0
      iRate += ( length + 1 + ui16AbsGoRice ) << SCALE_BITS;
226
0
    }
227
0
    else
228
0
    {
229
0
      length = ui16AbsGoRice;
230
0
      symbol = symbol - ( threshold << ui16AbsGoRice );
231
0
      while( symbol >= ( 1 << length ) )
232
0
      {
233
0
        symbol -= ( 1 << ( length++ ) );
234
0
      }
235
0
      iRate += ( threshold + length + 1 - ui16AbsGoRice + length ) << SCALE_BITS;
236
0
    }
237
238
0
    iRate += fracBitsGt1.intBits[1];
239
0
    iRate += fracBitsPar.intBits[( uiAbsLevel - 2 ) & 1];
240
0
    iRate += fracBitsGt2.intBits[1];
241
0
  }
242
0
  else if( uiAbsLevel == 1 )
243
0
  {
244
0
    iRate += fracBitsGt1.intBits[0];
245
0
  }
246
0
  else if( uiAbsLevel == 2 )
247
0
  {
248
0
    iRate += fracBitsGt1.intBits[1];
249
0
    iRate += fracBitsPar.intBits[0];
250
0
    iRate += fracBitsGt2.intBits[0];
251
0
  }
252
0
  else if( uiAbsLevel == 3 )
253
0
  {
254
0
    iRate += fracBitsGt1.intBits[1];
255
0
    iRate += fracBitsPar.intBits[1];
256
0
    iRate += fracBitsGt2.intBits[0];
257
0
  }
258
0
  else
259
0
  {
260
0
    iRate = 0;
261
0
  }
262
0
  return  iRate;
263
0
}
264
265
inline double QuantRDOQ::xGetRateSigCoeffGroup( const BinFracBits& fracBitsSigCG, unsigned uiSignificanceCoeffGroup ) const
266
1.38M
{
267
1.38M
  return xGetICost( fracBitsSigCG.intBits[uiSignificanceCoeffGroup] );
268
1.38M
}
269
270
/** Calculates the cost of signaling the last significant coefficient in the block
271
 * \param uiPosX X coordinate of the last significant coefficient
272
 * \param uiPosY Y coordinate of the last significant coefficient
273
 * \param component colour component ID
274
 * \returns cost of last significant coefficient
275
 */
276
/*
277
 * \param uiWidth width of the transform unit (TU)
278
*/
279
inline double QuantRDOQ::xGetRateLast( const int* lastBitsX, const int* lastBitsY, unsigned PosX, unsigned PosY ) const
280
0
{
281
0
  uint32_t    CtxX  = g_uiGroupIdx[PosX];
282
0
  uint32_t    CtxY  = g_uiGroupIdx[PosY];
283
0
  double  Cost  = lastBitsX[ CtxX ] + lastBitsY[ CtxY ];
284
0
  if( CtxX > 3 )
285
0
  {
286
0
    Cost += xGetIEPRate() * ((CtxX-2)>>1);
287
0
  }
288
0
  if( CtxY > 3 )
289
0
  {
290
0
    Cost += xGetIEPRate() * ((CtxY-2)>>1);
291
0
  }
292
0
  return xGetICost( Cost );
293
0
}
294
295
296
inline double QuantRDOQ::xGetRateSigCoef( const BinFracBits& fracBitsSig, unsigned uiSignificance ) const
297
9.91M
{
298
9.91M
  return xGetICost( fracBitsSig.intBits[uiSignificance] );
299
9.91M
}
300
301
/** Get the cost for a specific rate
302
 * \param dRate rate of a bit
303
 * \returns cost at the specific rate
304
 */
305
inline double QuantRDOQ::xGetICost        ( double                          dRate         ) const
306
13.8M
{
307
13.8M
  return m_dLambda * dRate;
308
13.8M
}
309
310
/** Get the cost of an equal probable bit
311
 * \returns cost of equal probable bit
312
 */
313
inline double QuantRDOQ::xGetIEPRate() const
314
0
{
315
0
  return 32768;
316
0
}
317
318
319
double QuantRDOQ::xGetErrScaleCoeff(const bool needsSqrt2, SizeType width, SizeType height, int qp, const int maxLog2TrDynamicRange, const int channelBitDepth, bool bTransformSkip=false)
320
98.7k
{
321
98.7k
  const int iTransformShift = bTransformSkip ? 0 : getTransformShift(channelBitDepth, Size(width, height), maxLog2TrDynamicRange);
322
98.7k
  double    dErrScale = (double)(1 << SCALE_BITS);                                // Compensate for scaling of bitcount in Lagrange cost function
323
98.7k
  double    dTransShift = (double)iTransformShift + (needsSqrt2 ? -0.5 : 0.0);
324
98.7k
  dErrScale = dErrScale * pow(2.0, (-2.0*dTransShift));                     // Compensate for scaling through forward transform
325
98.7k
  const int  QStep = g_quantScales[needsSqrt2 ? 1 : 0][qp];
326
98.7k
  double    finalErrScale = dErrScale / QStep / QStep / (1 << (DISTORTION_PRECISION_ADJUSTMENT(channelBitDepth) << 1));
327
98.7k
  return    finalErrScale;
328
98.7k
}
329
330
331
332
/** set error scale coefficients
333
 * \param list                   list ID
334
 * \param size
335
 * \param qp                     quantization parameter
336
 * \param maxLog2TrDynamicRange
337
 * \param bitDepths              reference to bit depth array for all channels
338
 */
339
void QuantRDOQ::xSetErrScaleCoeff( uint32_t list, uint32_t sizeX, uint32_t sizeY, int qp, const int maxLog2TrDynamicRange[MAX_NUM_CH], const BitDepths &bitDepths )
340
34.6M
{
341
34.6M
  const int width = g_scalingListSizeX[sizeX];
342
34.6M
  const int height = g_scalingListSizeX[sizeY];
343
34.6M
  const ChannelType channelType = ( ( list == 0 ) || ( list == MAX_NUM_COMP ) ) ? CH_L : CH_C;
344
34.6M
  const int channelBitDepth = bitDepths[channelType];
345
34.6M
  const int iTransformShift = getTransformShift( channelBitDepth, Size( g_scalingListSizeX[sizeX], g_scalingListSizeX[sizeY] ), maxLog2TrDynamicRange[channelType] );  // Represents scaling through forward transform
346
347
34.6M
  double dErrScale = (double)( 1 << SCALE_BITS );                                // Compensate for scaling of bitcount in Lagrange cost function
348
349
34.6M
  const bool needsSqrt2 = ((Log2(width*height)) & 1) == 1;
350
34.6M
  double dTransShift = (double)iTransformShift + ( needsSqrt2 ? -0.5 : 0.0 );
351
34.6M
  dErrScale = dErrScale*pow( 2.0, ( -2.0*dTransShift ) );                     // Compensate for scaling through forward transform
352
353
34.6M
  if( getScalingListEnabled() )
354
0
  {
355
0
    uint32_t i, uiMaxNumCoeff = width * height;
356
357
0
    int*  piQuantcoeff = getQuantCoeff( list, qp, sizeX, sizeY );
358
0
    double* pdErrScale = xGetErrScaleCoeffSL( list, sizeX, sizeY, qp );
359
360
0
    for( i = 0; i < uiMaxNumCoeff; i++ )
361
0
    {
362
0
      pdErrScale[i] = dErrScale / piQuantcoeff[i] / piQuantcoeff[i] / (1 << (DISTORTION_PRECISION_ADJUSTMENT( bitDepths[channelType] ) << 1));
363
0
    }
364
0
  }
365
366
34.6M
  int QStep = g_quantScales[needsSqrt2][qp];
367
368
34.6M
  xGetErrScaleCoeffNoScalingList(list, sizeX, sizeY, qp) =
369
34.6M
    dErrScale / QStep / QStep / (1 << (DISTORTION_PRECISION_ADJUSTMENT(bitDepths[channelType]) << 1));
370
34.6M
}
371
372
/** set flat matrix value to quantized coefficient
373
 */
374
void QuantRDOQ::setFlatScalingList(const int maxLog2TrDynamicRange[MAX_NUM_CH], const BitDepths &bitDepths)
375
19.6k
{
376
19.6k
  Quant::setFlatScalingList( maxLog2TrDynamicRange, bitDepths );
377
378
19.6k
  const int minimumQp = 0;
379
19.6k
  const int maximumQp = SCALING_LIST_REM_NUM;
380
381
156k
  for(uint32_t sizeX = 0; sizeX < SCALING_LIST_SIZE_NUM; sizeX++)
382
137k
  {
383
1.09M
    for(uint32_t sizeY = 0; sizeY < SCALING_LIST_SIZE_NUM; sizeY++)
384
961k
    {
385
6.72M
      for(uint32_t list = 0; list < SCALING_LIST_NUM; list++)
386
5.76M
      {
387
40.3M
        for(int qp = minimumQp; qp < maximumQp; qp++)
388
34.6M
        {
389
34.6M
          xSetErrScaleCoeff( list, sizeX, sizeY, qp, maxLog2TrDynamicRange, bitDepths );
390
34.6M
        }
391
5.76M
      }
392
961k
    }
393
137k
  }
394
19.6k
}
395
396
/** initialization process of scaling list array
397
 */
398
void QuantRDOQ::xInitScalingList( const QuantRDOQ* other )
399
19.6k
{
400
19.6k
  m_isErrScaleListOwner = other == nullptr;
401
402
19.6k
  bool useScalingLists = getScalingListEnabled();
403
404
156k
  for(uint32_t sizeIdX = 0; sizeIdX < SCALING_LIST_SIZE_NUM; sizeIdX++)
405
137k
  {
406
1.09M
    for(uint32_t sizeIdY = 0; sizeIdY < SCALING_LIST_SIZE_NUM; sizeIdY++)
407
961k
    {
408
6.72M
      for(uint32_t qp = 0; qp < SCALING_LIST_REM_NUM; qp++)
409
5.76M
      {
410
40.3M
        for(uint32_t listId = 0; listId < SCALING_LIST_NUM; listId++)
411
34.6M
        {
412
34.6M
          if( m_isErrScaleListOwner )
413
34.6M
          {
414
34.6M
            m_errScale[sizeIdX][sizeIdY][listId][qp] = useScalingLists ? new double[g_scalingListSizeX[sizeIdX] * g_scalingListSizeX[sizeIdY]] : nullptr;
415
34.6M
          }
416
0
          else
417
0
          {
418
0
            m_errScale[sizeIdX][sizeIdY][listId][qp] = other->m_errScale[sizeIdX][sizeIdY][listId][qp];
419
0
          }
420
34.6M
        } // listID loop
421
5.76M
      }
422
961k
    }
423
137k
  }
424
19.6k
}
425
426
/** destroy quantization matrix array
427
 */
428
void QuantRDOQ::xDestroyScalingList()
429
19.6k
{
430
19.6k
  if( !m_isErrScaleListOwner ) return;
431
432
156k
  for(uint32_t sizeIdX = 0; sizeIdX < SCALING_LIST_SIZE_NUM; sizeIdX++)
433
137k
  {
434
1.09M
    for(uint32_t sizeIdY = 0; sizeIdY < SCALING_LIST_SIZE_NUM; sizeIdY++)
435
961k
    {
436
6.72M
      for(uint32_t listId = 0; listId < SCALING_LIST_NUM; listId++)
437
5.76M
      {
438
40.3M
        for(uint32_t qp = 0; qp < SCALING_LIST_REM_NUM; qp++)
439
34.6M
        {
440
34.6M
          if(m_errScale[sizeIdX][sizeIdY][listId][qp])
441
0
          {
442
0
            delete [] m_errScale[sizeIdX][sizeIdY][listId][qp];
443
0
          }
444
34.6M
        }
445
5.76M
      }
446
961k
    }
447
137k
  }
448
//   Quant::destroyScalingList();
449
19.6k
}
450
451
452
void QuantRDOQ::quant(TransformUnit& tu, const ComponentID compID, const CCoeffBuf& pSrc, TCoeff &uiAbsSum, const QpParam& cQP, const Ctx& ctx)
453
104k
{
454
104k
  const CompArea& rect       = tu.blocks[compID];
455
104k
  const uint32_t uiWidth     = rect.width;
456
104k
  const uint32_t uiHeight    = rect.height;
457
458
104k
  const CCoeffBuf&  piCoef   = pSrc;
459
104k
        CoeffSigBuf piQCoef  = tu.getCoeffs(compID);
460
461
104k
  const bool useTransformSkip      = tu.mtsIdx[compID]==MTS_SKIP;
462
463
104k
  bool useRDOQ = useTransformSkip ? m_useRDOQTS : m_RDOQ > 0;
464
465
104k
  if( !tu.cu->ispMode || !isLuma(compID) )
466
104k
  {
467
104k
    useRDOQ &= uiWidth > 2;
468
104k
    useRDOQ &= uiHeight > 2;
469
104k
  }
470
471
104k
  if( useRDOQ )
472
98.7k
  {
473
98.7k
    if (!tu.cs->picture->useSelectiveRdoq || xNeedRDOQ(tu, compID, piCoef, cQP))
474
98.7k
    {
475
98.7k
      if( useTransformSkip )
476
98.7k
      {
477
98.7k
        if(tu.cu->bdpcmM[toChannelType(compID)])
478
95.8k
        {
479
95.8k
          forwardRDPCM( tu, compID, pSrc, uiAbsSum, cQP, ctx );
480
95.8k
        }
481
2.92k
        else
482
2.92k
        {
483
2.92k
          rateDistOptQuantTS( tu, compID, pSrc, uiAbsSum, cQP, ctx );
484
2.92k
        }
485
98.7k
      }
486
0
      else
487
0
      {
488
0
        xRateDistOptQuant( tu, compID, pSrc, uiAbsSum, cQP, ctx );
489
0
      }
490
98.7k
    }
491
0
    else
492
0
    {
493
0
      piQCoef.fill(0);
494
0
      uiAbsSum = 0;
495
0
      tu.lastPos[compID] = -1;
496
0
    }
497
98.7k
  }
498
5.86k
  else
499
5.86k
  {
500
5.86k
    Quant::quant( tu, compID, pSrc, uiAbsSum, cQP, ctx );
501
5.86k
  }
502
104k
}
503
504
505
506
void QuantRDOQ::xRateDistOptQuant(TransformUnit& tu, const ComponentID compID, const CCoeffBuf& pSrc, TCoeff &uiAbsSum, const QpParam& cQP, const Ctx &ctx)
507
0
{
508
0
  const FracBitsAccess& fracBits = ctx.getFracBitsAcess();
509
510
0
  const SPS &sps            = *tu.cs->sps;
511
0
  const CompArea& rect      = tu.blocks[compID];
512
0
  const uint32_t uiWidth    = rect.width;
513
0
  const uint32_t uiHeight   = rect.height;
514
0
  const ChannelType chType  = toChannelType(compID);
515
0
  const int channelBitDepth = sps.bitDepths[ chType ];
516
517
0
  const int  maxLog2TrDynamicRange = sps.getMaxLog2TrDynamicRange();
518
519
0
  const bool useIntraSubPartitions = tu.cu->ispMode && isLuma(compID);
520
  /* for 422 chroma blocks, the effective scaling applied during transformation is not a power of 2, hence it cannot be
521
  * implemented as a bit-shift (the quantised result will be sqrt(2) * larger than required). Alternatively, adjust the
522
  * uiLog2TrSize applied in iTransformShift, such that the result is 1/sqrt(2) the required result (i.e. smaller)
523
  * Then a QP+3 (sqrt(2)) or QP-3 (1/sqrt(2)) method could be used to get the required result
524
  */
525
526
  // Represents scaling through forward transform
527
0
  const int iTransformShift = getTransformShift(channelBitDepth, rect.size(), maxLog2TrDynamicRange);
528
529
0
  double     d64BlockUncodedCost               = 0;
530
0
  const uint32_t uiLog2BlockWidth                  = Log2(uiWidth);
531
0
  const uint32_t uiLog2BlockHeight                 = Log2(uiHeight);
532
0
  const uint32_t uiMaxNumCoeff                     = rect.area();
533
534
0
  CHECK(compID >= MAX_NUM_TBLOCKS, "Invalid component ID");
535
536
0
  int scalingListType = getScalingListType(tu.cu->predMode, compID);
537
538
0
  CHECK(scalingListType >= SCALING_LIST_NUM, "Invalid scaling list");
539
540
0
  const TCoeff    *plSrcCoeff = pSrc.buf;
541
0
        TCoeffSig *piDstCoeff = tu.getCoeffs(compID).buf;
542
543
0
  double *pdCostCoeff  = m_pdCostCoeff;
544
0
  double *pdCostSig    = m_pdCostSig;
545
0
  double *pdCostCoeff0 = m_pdCostCoeff0;
546
0
  int    *rateIncUp    = m_rateIncUp;
547
0
  int    *rateIncDown  = m_rateIncDown;
548
0
  int    *sigRateDelta = m_sigRateDelta;
549
0
  TCoeff *deltaU       = m_deltaU;
550
551
0
  memset( piDstCoeff,     0, sizeof( TCoeffSig ) * uiMaxNumCoeff );
552
0
  memset( m_pdCostCoeff,  0, sizeof( double ) *  uiMaxNumCoeff );
553
0
  memset( m_pdCostSig,    0, sizeof( double ) *  uiMaxNumCoeff );
554
0
  memset( m_rateIncUp,    0, sizeof( int    ) *  uiMaxNumCoeff );
555
0
  memset( m_rateIncDown,  0, sizeof( int    ) *  uiMaxNumCoeff );
556
0
  memset( m_sigRateDelta, 0, sizeof( int    ) *  uiMaxNumCoeff );
557
0
  memset( m_deltaU,       0, sizeof( TCoeff ) *  uiMaxNumCoeff );
558
559
560
0
  const bool   needSqrtAdjustment = TU::needsSqrt2Scale( tu, compID );
561
0
  const bool   isTransformSkip    = tu.mtsIdx[compID]==MTS_SKIP;
562
0
  const double *const pdErrScale  = xGetErrScaleCoeffSL(scalingListType, uiLog2BlockWidth, uiLog2BlockHeight, cQP.rem(isTransformSkip));
563
0
  const int    *const piQCoef     = getQuantCoeff(scalingListType, cQP.rem(isTransformSkip), uiLog2BlockWidth, uiLog2BlockHeight);
564
0
  const bool isLfnstApplied       = tu.cu->lfnstIdx > 0 && (CU::isSepTree(*tu.cu) ? true : isLuma(compID));
565
0
  const bool enableScalingLists   = getUseScalingList(uiWidth, uiHeight, isTransformSkip, isLfnstApplied);
566
0
  const int    defaultQuantisationCoefficient = g_quantScales[ needSqrtAdjustment ?1:0][cQP.rem(isTransformSkip)];
567
0
  const double defaultErrorScale              = xGetErrScaleCoeffNoScalingList(scalingListType, uiLog2BlockWidth, uiLog2BlockHeight, cQP.rem(isTransformSkip));
568
0
  const int iQBits = QUANT_SHIFT + cQP.per(isTransformSkip) + iTransformShift + (needSqrtAdjustment?-1:0);                   // Right shift of non-RDOQ quantizer;  level = (coeff*uiQ + offset)>>q_bits
569
570
571
0
  const TCoeff entropyCodingMinimum = -(1 << maxLog2TrDynamicRange);
572
0
  const TCoeff entropyCodingMaximum =  (1 << maxLog2TrDynamicRange) - 1;
573
574
0
  CoeffCodingContext cctx(tu, compID, tu.cs->slice->signDataHidingEnabled);
575
0
  const int    iCGSizeM1      = (1 << cctx.log2CGSize()) - 1;
576
577
0
  int     iCGLastScanPos      = -1;
578
0
  double  d64BaseCost         = 0;
579
0
  int     iLastScanPos        = -1;
580
581
0
  int ctxBinSampleRatio   = MAX_TU_LEVEL_CTX_CODED_BIN_CONSTRAINT;
582
0
  int remRegBins          = (tu.getTbAreaAfterCoefZeroOut( compID ) * ctxBinSampleRatio) >> 4;
583
0
  uint32_t  goRiceParam   = 0;
584
585
0
  double *pdCostCoeffGroupSig = m_pdCostCoeffGroupSig;
586
0
  memset( pdCostCoeffGroupSig, 0, ( uiMaxNumCoeff >> cctx.log2CGSize() ) * sizeof( double ) );
587
0
  int iScanPos;
588
0
  coeffGroupRDStats rdStats;
589
590
#if ENABLE_TRACING
591
  DTRACE( g_trace_ctx, D_RDOQ, "%d: %3d, %3d, %dx%d, comp=%d\n", DTRACE_GET_COUNTER( g_trace_ctx, D_RDOQ ), rect.x, rect.y, rect.width, rect.height, compID );
592
#endif
593
594
0
  const uint32_t lfnstIdx = tu.cu->lfnstIdx;
595
596
0
  const int iCGNum = lfnstIdx > 0 ? 1 : std::min<int>(JVET_C0024_ZERO_OUT_TH, uiWidth) * std::min<int>(JVET_C0024_ZERO_OUT_TH, uiHeight) >> cctx.log2CGSize();
597
598
0
  for (int subSetId = iCGNum - 1; subSetId >= 0; subSetId--)
599
0
  {
600
0
    cctx.initSubblock( subSetId );
601
602
0
    int remRegBinsStartCG = remRegBins;
603
604
0
    uint32_t maxNonZeroPosInCG = iCGSizeM1;
605
0
    if( lfnstIdx > 0 && ( ( uiWidth == 4 && uiHeight == 4 ) || ( uiWidth == 8 && uiHeight == 8 && cctx.cgPosX() == 0 && cctx.cgPosY() == 0 ) ) )
606
0
    {
607
0
      maxNonZeroPosInCG = 7;
608
0
    }
609
610
0
    memset( &rdStats, 0, sizeof (coeffGroupRDStats));
611
612
0
    for( int iScanPosinCG = iCGSizeM1; iScanPosinCG > maxNonZeroPosInCG; iScanPosinCG-- )
613
0
    {
614
0
      iScanPos = cctx.minSubPos() + iScanPosinCG;
615
0
      uint32_t    blkPos = cctx.blockPos( iScanPos );
616
0
      piDstCoeff[ blkPos ] = 0;
617
0
    }
618
0
    for( int iScanPosinCG = maxNonZeroPosInCG; iScanPosinCG >= 0; iScanPosinCG-- )
619
0
    {
620
0
      iScanPos = cctx.minSubPos() + iScanPosinCG;
621
      //===== quantization =====
622
0
      uint32_t    uiBlkPos          = cctx.blockPos(iScanPos);
623
624
      // set coeff
625
0
      const int    quantisationCoefficient = (enableScalingLists) ? piQCoef   [uiBlkPos]               : defaultQuantisationCoefficient;
626
0
      const double errorScale              = (enableScalingLists) ? pdErrScale[uiBlkPos]               : defaultErrorScale;
627
0
      const int64_t  tmpLevel                = int64_t(abs(plSrcCoeff[ uiBlkPos ])) * quantisationCoefficient;
628
629
0
      const Intermediate_Int lLevelDouble  = (Intermediate_Int)std::min<int64_t>(tmpLevel, std::numeric_limits<Intermediate_Int>::max() - (Intermediate_Int(1) << (iQBits - 1)));
630
631
0
      uint32_t uiMaxAbsLevel        = std::min<uint32_t>(uint32_t(entropyCodingMaximum), uint32_t((lLevelDouble + (Intermediate_Int(1) << (iQBits - 1))) >> iQBits));
632
633
0
      const double dErr         = double( lLevelDouble );
634
0
      pdCostCoeff0[ iScanPos ]  = dErr * dErr * errorScale;
635
0
      d64BlockUncodedCost      += pdCostCoeff0[ iScanPos ];
636
0
      piDstCoeff[ uiBlkPos ]    = uiMaxAbsLevel;
637
638
0
      if ( uiMaxAbsLevel > 0 && iLastScanPos < 0 )
639
0
      {
640
0
        iLastScanPos            = iScanPos;
641
0
        iCGLastScanPos          = cctx.subSetId();
642
0
      }
643
644
0
      if ( iLastScanPos >= 0 )
645
0
      {
646
647
#if ENABLE_TRACING
648
        uint32_t uiCGPosY = cctx.cgPosY();
649
        uint32_t uiCGPosX = cctx.cgPosX();
650
        uint32_t uiPosY = cctx.posY( iScanPos );
651
        uint32_t uiPosX = cctx.posX( iScanPos );
652
        DTRACE( g_trace_ctx, D_RDOQ, "%d [%d][%d][%2d:%2d][%2d:%2d]", DTRACE_GET_COUNTER( g_trace_ctx, D_RDOQ ), iScanPos, uiBlkPos, uiCGPosX, uiCGPosY, uiPosX, uiPosY );
653
#endif
654
        //===== coefficient level estimation =====
655
0
        unsigned ctxIdSig = 0;
656
0
        if( iScanPos != iLastScanPos )
657
0
        {
658
0
          ctxIdSig = cctx.sigCtxIdAbs( iScanPos, piDstCoeff, 0 );
659
0
        }
660
0
        uint32_t    uiLevel;
661
0
        uint8_t ctxOffset     = cctx.ctxOffsetAbs     ();
662
0
        uint32_t    uiParCtx      = cctx.parityCtxIdAbs   ( ctxOffset );
663
0
        uint32_t    uiGt1Ctx      = cctx.greater1CtxIdAbs ( ctxOffset );
664
0
        uint32_t    uiGt2Ctx      = cctx.greater2CtxIdAbs ( ctxOffset );
665
0
        uint32_t    goRiceZero    = 0;
666
0
        if( remRegBins < 4 )
667
0
        {
668
0
          unsigned  sumAbs = cctx.templateAbsSum( iScanPos, piDstCoeff, 0 );
669
0
          goRiceParam             = g_auiGoRiceParsCoeff   [ sumAbs ];
670
0
          goRiceZero              = g_auiGoRicePosCoeff0(0, goRiceParam);
671
0
        }
672
673
0
        const BinFracBits fracBitsPar = fracBits.getFracBitsArray( uiParCtx );
674
0
        const BinFracBits fracBitsGt1 = fracBits.getFracBitsArray( uiGt1Ctx );
675
0
        const BinFracBits fracBitsGt2 = fracBits.getFracBitsArray( uiGt2Ctx );
676
677
0
        if( iScanPos == iLastScanPos )
678
0
        {
679
0
          uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ],
680
0
                                    lLevelDouble, uiMaxAbsLevel, nullptr, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, iQBits, errorScale, 1, maxLog2TrDynamicRange );
681
0
        }
682
0
        else
683
0
        {
684
0
          DTRACE_COND( ( uiMaxAbsLevel != 0 ), g_trace_ctx, D_RDOQ_MORE, " uiCtxSig=%d", ctxIdSig );
685
686
0
          const BinFracBits fracBitsSig = fracBits.getFracBitsArray( ctxIdSig );
687
0
          uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ],
688
0
                                    lLevelDouble, uiMaxAbsLevel, &fracBitsSig, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, iQBits, errorScale, 0, maxLog2TrDynamicRange );
689
0
          sigRateDelta[ uiBlkPos ] = ( remRegBins < 4 ? 0 : fracBitsSig.intBits[1] - fracBitsSig.intBits[0] );
690
0
        }
691
692
0
        DTRACE( g_trace_ctx, D_RDOQ, " Lev=%d \n", uiLevel );
693
0
        DTRACE_COND( ( uiMaxAbsLevel != 0 ), g_trace_ctx, D_RDOQ, " CostC0=%d\n", (int64_t)( pdCostCoeff0[iScanPos] ) );
694
0
        DTRACE_COND( ( uiMaxAbsLevel != 0 ), g_trace_ctx, D_RDOQ, " CostC =%d\n", (int64_t)( pdCostCoeff[iScanPos] ) );
695
696
0
        deltaU[ uiBlkPos ]        = TCoeff((lLevelDouble - (Intermediate_Int(uiLevel) << iQBits)) >> (iQBits-8));
697
698
0
        if( uiLevel > 0 )
699
0
        {
700
0
          int rateNow              = xGetICRate( uiLevel,   fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, maxLog2TrDynamicRange );
701
0
          rateIncUp   [ uiBlkPos ] = xGetICRate( uiLevel+1, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, maxLog2TrDynamicRange ) - rateNow;
702
0
          rateIncDown [ uiBlkPos ] = xGetICRate( uiLevel-1, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, maxLog2TrDynamicRange ) - rateNow;
703
0
        }
704
0
        else // uiLevel == 0
705
0
        {
706
0
          if( remRegBins < 4 )
707
0
          {
708
0
            int rateNow            = xGetICRate( uiLevel,   fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, maxLog2TrDynamicRange );
709
0
            rateIncUp [ uiBlkPos ] = xGetICRate( uiLevel+1, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, maxLog2TrDynamicRange ) - rateNow;
710
0
          }
711
0
          else
712
0
          {
713
0
            rateIncUp [ uiBlkPos ] = fracBitsGt1.intBits[ 0 ];
714
0
          }
715
0
        }
716
0
        piDstCoeff[ uiBlkPos ] = uiLevel;
717
0
        d64BaseCost           += pdCostCoeff [ iScanPos ];
718
719
0
        if( ( (iScanPos & iCGSizeM1) == 0 ) && ( iScanPos > 0 ) )
720
0
        {
721
0
          goRiceParam   = 0;
722
0
        }
723
0
        else if( remRegBins >= 4 )
724
0
        {
725
0
          int  sumAll = cctx.templateAbsSum(iScanPos, piDstCoeff, 4);
726
0
          goRiceParam = g_auiGoRiceParsCoeff[sumAll];
727
0
          remRegBins -= (uiLevel < 2 ? uiLevel : 3) + (iScanPos != iLastScanPos);
728
0
        }
729
0
      }
730
0
      else
731
0
      {
732
0
        d64BaseCost    += pdCostCoeff0[ iScanPos ];
733
0
      }
734
0
      rdStats.d64SigCost += pdCostSig[ iScanPos ];
735
0
      if (iScanPosinCG == 0 )
736
0
      {
737
0
        rdStats.d64SigCost_0 = pdCostSig[ iScanPos ];
738
0
      }
739
0
      if (piDstCoeff[ uiBlkPos ] )
740
0
      {
741
0
        cctx.setSigGroup();
742
0
        rdStats.d64CodedLevelandDist += pdCostCoeff[ iScanPos ] - pdCostSig[ iScanPos ];
743
0
        rdStats.d64UncodedDist += pdCostCoeff0[ iScanPos ];
744
0
        if ( iScanPosinCG != 0 )
745
0
        {
746
0
          rdStats.iNNZbeforePos0++;
747
0
        }
748
0
      }
749
0
    } //end for (iScanPosinCG)
750
751
0
    if (iCGLastScanPos >= 0)
752
0
    {
753
0
      if( cctx.subSetId() )
754
0
      {
755
0
        if( !cctx.isSigGroup() )
756
0
        {
757
0
          const BinFracBits fracBitsSigGroup = fracBits.getFracBitsArray( cctx.sigGroupCtxId() );
758
0
          d64BaseCost += xGetRateSigCoeffGroup(fracBitsSigGroup, 0) - rdStats.d64SigCost;
759
0
          pdCostCoeffGroupSig[ cctx.subSetId() ] = xGetRateSigCoeffGroup(fracBitsSigGroup, 0);
760
0
        }
761
0
        else
762
0
        {
763
0
          if (cctx.subSetId() < iCGLastScanPos) //skip the last coefficient group, which will be handled together with last position below.
764
0
          {
765
0
            if ( rdStats.iNNZbeforePos0 == 0 )
766
0
            {
767
0
              d64BaseCost -= rdStats.d64SigCost_0;
768
0
              rdStats.d64SigCost -= rdStats.d64SigCost_0;
769
0
            }
770
            // rd-cost if SigCoeffGroupFlag = 0, initialization
771
0
            double d64CostZeroCG = d64BaseCost;
772
773
0
            const BinFracBits fracBitsSigGroup = fracBits.getFracBitsArray( cctx.sigGroupCtxId() );
774
775
0
            if (cctx.subSetId() < iCGLastScanPos)
776
0
            {
777
0
              d64BaseCost  += xGetRateSigCoeffGroup(fracBitsSigGroup,1);
778
0
              d64CostZeroCG += xGetRateSigCoeffGroup(fracBitsSigGroup,0);
779
0
              pdCostCoeffGroupSig[ cctx.subSetId() ] = xGetRateSigCoeffGroup(fracBitsSigGroup,1);
780
0
            }
781
782
            // try to convert the current coeff group from non-zero to all-zero
783
0
            d64CostZeroCG += rdStats.d64UncodedDist;  // distortion for resetting non-zero levels to zero levels
784
0
            d64CostZeroCG -= rdStats.d64CodedLevelandDist;   // distortion and level cost for keeping all non-zero levels
785
0
            d64CostZeroCG -= rdStats.d64SigCost;     // sig cost for all coeffs, including zero levels and non-zerl levels
786
787
                                                     // if we can save cost, change this block to all-zero block
788
0
            if ( d64CostZeroCG < d64BaseCost )
789
0
            {
790
0
              cctx.resetSigGroup();
791
0
              d64BaseCost = d64CostZeroCG;
792
0
              remRegBins = remRegBinsStartCG;
793
0
              if (cctx.subSetId() < iCGLastScanPos)
794
0
              {
795
0
                pdCostCoeffGroupSig[ cctx.subSetId() ] = xGetRateSigCoeffGroup(fracBitsSigGroup,0);
796
0
              }
797
              // reset coeffs to 0 in this block
798
0
              for( int iScanPosinCG = maxNonZeroPosInCG; iScanPosinCG >= 0; iScanPosinCG-- )
799
0
              {
800
0
                iScanPos      = cctx.minSubPos() + iScanPosinCG;
801
0
                uint32_t uiBlkPos = cctx.blockPos( iScanPos );
802
803
0
                if (piDstCoeff[ uiBlkPos ])
804
0
                {
805
0
                  piDstCoeff [ uiBlkPos ] = 0;
806
0
                  pdCostCoeff[ iScanPos ] = pdCostCoeff0[ iScanPos ];
807
0
                  pdCostSig  [ iScanPos ] = 0;
808
0
                }
809
0
              }
810
0
            } // end if ( d64CostAllZeros < d64BaseCost )
811
0
          }
812
0
        } // end if if (uiSigCoeffGroupFlag[ uiCGBlkPos ] == 0)
813
0
      }
814
0
      else
815
0
      {
816
0
        cctx.setSigGroup();
817
0
      }
818
0
    }
819
0
  } //end for (cctx.subSetId)
820
821
822
  //===== estimate last position =====
823
0
  if ( iLastScanPos < 0 )
824
0
  {
825
0
    return;
826
0
  }
827
828
0
  double  d64BestCost         = 0;
829
0
  int     iBestLastIdxP1      = 0;
830
831
832
0
  if( !CU::isIntra( *tu.cu ) && isLuma( compID ) && tu.depth == 0 )
833
0
  {
834
0
    const BinFracBits fracBitsQtRootCbf = fracBits.getFracBitsArray( Ctx::QtRootCbf() );
835
0
    d64BestCost  = d64BlockUncodedCost + xGetICost( fracBitsQtRootCbf.intBits[ 0 ] );
836
0
    d64BaseCost += xGetICost( fracBitsQtRootCbf.intBits[ 1 ] );
837
0
  }
838
0
  else
839
0
  {
840
0
    bool previousCbf       = tu.cbf[COMP_Cb];
841
0
    bool lastCbfIsInferred = false;
842
0
    if( useIntraSubPartitions )
843
0
    {
844
0
      bool rootCbfSoFar       = false;
845
0
      bool isLastSubPartition = CU::isISPLast(*tu.cu, tu.Y(), compID);
846
0
      uint32_t nTus = tu.cu->ispMode == HOR_INTRA_SUBPARTITIONS ? tu.cu->lheight() >> Log2(tu.lheight()) : tu.cu->lwidth() >> Log2(tu.lwidth());
847
0
      if( isLastSubPartition )
848
0
      {
849
0
        TransformUnit* tuPointer = tu.cu->firstTU;
850
0
        for( int tuIdx = 0; tuIdx < nTus - 1; tuIdx++ )
851
0
        {
852
0
          rootCbfSoFar |= TU::getCbfAtDepth(*tuPointer, COMP_Y, tu.depth);
853
0
          tuPointer     = tuPointer->next;
854
0
        }
855
0
        if( !rootCbfSoFar )
856
0
        {
857
0
          lastCbfIsInferred = true;
858
0
        }
859
0
      }
860
0
      if( !lastCbfIsInferred )
861
0
      {
862
0
        previousCbf = TU::getPrevTuCbfAtDepth(tu, compID, tu.depth);
863
0
      }
864
0
    }
865
0
    BinFracBits fracBitsQtCbf = fracBits.getFracBitsArray( Ctx::QtCbf[compID]( DeriveCtx::CtxQtCbf( rect.compID, previousCbf, useIntraSubPartitions ) ) );
866
867
0
    if( !lastCbfIsInferred )
868
0
    {
869
0
      d64BestCost  = d64BlockUncodedCost + xGetICost(fracBitsQtCbf.intBits[0]);
870
0
      d64BaseCost += xGetICost(fracBitsQtCbf.intBits[1]);
871
0
    }
872
0
    else
873
0
    {
874
0
      d64BestCost  = d64BlockUncodedCost;
875
0
    }
876
0
  }
877
878
0
  int lastBitsX[LAST_SIGNIFICANT_GROUPS] = { 0 };
879
0
  int lastBitsY[LAST_SIGNIFICANT_GROUPS] = { 0 };
880
0
  {
881
0
    int dim1 = std::min<int>(JVET_C0024_ZERO_OUT_TH, uiWidth);
882
0
    int dim2 = std::min<int>(JVET_C0024_ZERO_OUT_TH, uiHeight);
883
0
    int bitsX = 0;
884
0
    int bitsY = 0;
885
0
    int ctxId;
886
    //X-coordinate
887
0
    for ( ctxId = 0; ctxId < g_uiGroupIdx[dim1-1]; ctxId++)
888
0
    {
889
0
      const BinFracBits fB = fracBits.getFracBitsArray( cctx.lastXCtxId(ctxId) );
890
0
      lastBitsX[ ctxId ]   = bitsX + fB.intBits[ 0 ];
891
0
      bitsX               +=         fB.intBits[ 1 ];
892
0
    }
893
0
    lastBitsX[ctxId] = bitsX;
894
    //Y-coordinate
895
0
    for ( ctxId = 0; ctxId < g_uiGroupIdx[dim2-1]; ctxId++)
896
0
    {
897
0
      const BinFracBits fB = fracBits.getFracBitsArray( cctx.lastYCtxId(ctxId) );
898
0
      lastBitsY[ ctxId ]   = bitsY + fB.intBits[ 0 ];
899
0
      bitsY               +=         fB.intBits[ 1 ];
900
0
    }
901
0
    lastBitsY[ctxId] = bitsY;
902
0
  }
903
904
905
0
  bool bFoundLast = false;
906
0
  for (int iCGScanPos = iCGLastScanPos; iCGScanPos >= 0; iCGScanPos--)
907
0
  {
908
0
    d64BaseCost -= pdCostCoeffGroupSig [ iCGScanPos ];
909
0
    if (cctx.isSigGroup( iCGScanPos ) )
910
0
    {
911
0
      uint32_t maxNonZeroPosInCG = iCGSizeM1;
912
0
      if( lfnstIdx > 0 && ( ( uiWidth == 4 && uiHeight == 4 ) || ( uiWidth == 8 && uiHeight == 8 && cctx.cgPosX() == 0 && cctx.cgPosY() == 0 ) ) )
913
0
      {
914
0
        maxNonZeroPosInCG = 7;
915
0
      }
916
0
      for( int iScanPosinCG = maxNonZeroPosInCG; iScanPosinCG >= 0; iScanPosinCG-- )
917
0
      {
918
0
        iScanPos = iCGScanPos * (iCGSizeM1 + 1) + iScanPosinCG;
919
920
0
        if (iScanPos > iLastScanPos)
921
0
        {
922
0
          continue;
923
0
        }
924
0
        uint32_t   uiBlkPos     = cctx.blockPos( iScanPos );
925
926
0
        if( piDstCoeff[ uiBlkPos ] )
927
0
        {
928
0
          uint32_t   uiPosY = uiBlkPos >> uiLog2BlockWidth;
929
0
          uint32_t   uiPosX = uiBlkPos - ( uiPosY << uiLog2BlockWidth );
930
0
          double d64CostLast  = xGetRateLast( lastBitsX, lastBitsY, uiPosX, uiPosY );
931
932
0
          double totalCost = d64BaseCost + d64CostLast - pdCostSig[ iScanPos ];
933
934
0
          if( totalCost < d64BestCost )
935
0
          {
936
0
            iBestLastIdxP1  = iScanPos + 1;
937
0
            d64BestCost     = totalCost;
938
0
          }
939
0
          if( piDstCoeff[ uiBlkPos ] > 1 )
940
0
          {
941
0
            bFoundLast = true;
942
0
            break;
943
0
          }
944
0
          d64BaseCost      -= pdCostCoeff[ iScanPos ];
945
0
          d64BaseCost      += pdCostCoeff0[ iScanPos ];
946
0
        }
947
0
        else
948
0
        {
949
0
          d64BaseCost      -= pdCostSig[ iScanPos ];
950
0
        }
951
0
      } //end for
952
0
      if (bFoundLast)
953
0
      {
954
0
        break;
955
0
      }
956
0
    } // end if (uiSigCoeffGroupFlag[ uiCGBlkPos ])
957
0
    DTRACE( g_trace_ctx, D_RDOQ_COST, "%d: %3d, %3d, %dx%d, comp=%d\n", DTRACE_GET_COUNTER( g_trace_ctx, D_RDOQ_COST ), rect.x, rect.y, rect.width, rect.height, compID );
958
0
    DTRACE( g_trace_ctx, D_RDOQ_COST, "Uncoded=%d\n", (int64_t)( d64BlockUncodedCost ) );
959
0
    DTRACE( g_trace_ctx, D_RDOQ_COST, "Coded  =%d\n", (int64_t)( d64BaseCost ) );
960
961
0
  } // end for
962
963
964
0
  for ( int scanPos = 0; scanPos < iBestLastIdxP1; scanPos++ )
965
0
  {
966
0
    int blkPos = cctx.blockPos( scanPos );
967
0
    TCoeff level = piDstCoeff[ blkPos ];
968
0
    uiAbsSum += level;
969
0
    piDstCoeff[ blkPos ] = ( plSrcCoeff[ blkPos ] < 0 ) ? -level : level;
970
0
  }
971
972
  //===== clean uncoded coefficients =====
973
0
  for ( int scanPos = iBestLastIdxP1; scanPos <= iLastScanPos; scanPos++ )
974
0
  {
975
0
    piDstCoeff[ cctx.blockPos( scanPos ) ] = 0;
976
0
  }
977
0
  iLastScanPos = iBestLastIdxP1 - 1;
978
979
0
  if( cctx.signHiding() && uiAbsSum>=2)
980
0
  {
981
0
    const double inverseQuantScale = double(g_invQuantScales[0][cQP.rem(isTransformSkip)]);
982
0
    int64_t rdFactor = (int64_t)(inverseQuantScale * inverseQuantScale * (1 << (2 * cQP.per(isTransformSkip))) / m_dLambda / 16
983
0
                                  / (1 << (2 * DISTORTION_PRECISION_ADJUSTMENT(channelBitDepth)))
984
0
                             + 0.5);
985
986
0
    int lastCG = -1;
987
0
    int absSum = 0 ;
988
0
    int n ;
989
0
    for (int subSet = iCGNum - 1; subSet >= 0; subSet--)
990
0
    {
991
0
      int  subPos         = subSet << cctx.log2CGSize();
992
0
      int  firstNZPosInCG = iCGSizeM1 + 1, lastNZPosInCG = -1;
993
0
      absSum = 0 ;
994
995
0
      for( n = iCGSizeM1; n >= 0; --n )
996
0
      {
997
0
        if( piDstCoeff[ cctx.blockPos( n + subPos )] )
998
0
        {
999
0
          lastNZPosInCG = n;
1000
0
          break;
1001
0
        }
1002
0
      }
1003
1004
0
      for( n = 0; n <= iCGSizeM1; n++ )
1005
0
      {
1006
0
        if( piDstCoeff[ cctx.blockPos( n + subPos )] )
1007
0
        {
1008
0
          firstNZPosInCG = n;
1009
0
          break;
1010
0
        }
1011
0
      }
1012
1013
0
      for( n = firstNZPosInCG; n <= lastNZPosInCG; n++ )
1014
0
      {
1015
0
        absSum += int(piDstCoeff[ cctx.blockPos( n + subPos )]);
1016
0
      }
1017
1018
0
      if(lastNZPosInCG>=0 && lastCG==-1)
1019
0
      {
1020
0
        lastCG = 1;
1021
0
      }
1022
1023
0
      if( lastNZPosInCG-firstNZPosInCG>=SBH_THRESHOLD )
1024
0
      {
1025
0
        uint32_t signbit = (piDstCoeff[cctx.blockPos(subPos+firstNZPosInCG)]>0?0:1);
1026
0
        if( signbit!=(absSum&0x1) )  // hide but need tune
1027
0
        {
1028
          // calculate the cost
1029
0
          int64_t minCostInc = std::numeric_limits<int64_t>::max(), curCost = std::numeric_limits<int64_t>::max();
1030
0
          int minPos = -1, finalChange = 0, curChange = 0;
1031
1032
0
          for( n = (lastCG == 1 ? lastNZPosInCG : iCGSizeM1); n >= 0; --n )
1033
0
          {
1034
0
            uint32_t uiBlkPos   = cctx.blockPos( n + subPos );
1035
0
            if(piDstCoeff[ uiBlkPos ] != 0 )
1036
0
            {
1037
0
              int64_t costUp   = rdFactor * ( - deltaU[uiBlkPos] ) + rateIncUp[uiBlkPos];
1038
0
              int64_t costDown = rdFactor * (   deltaU[uiBlkPos] ) + rateIncDown[uiBlkPos]
1039
0
                -   ((abs(piDstCoeff[uiBlkPos]) == 1) ? sigRateDelta[uiBlkPos] : 0);
1040
1041
0
              if(lastCG==1 && lastNZPosInCG==n && abs(piDstCoeff[uiBlkPos])==1)
1042
0
              {
1043
0
                costDown -= (4<<SCALE_BITS);
1044
0
              }
1045
1046
0
              if(costUp<costDown)
1047
0
              {
1048
0
                curCost = costUp;
1049
0
                curChange =  1;
1050
0
              }
1051
0
              else
1052
0
              {
1053
0
                curChange = -1;
1054
0
                if(n==firstNZPosInCG && abs(piDstCoeff[uiBlkPos])==1)
1055
0
                {
1056
0
                  curCost = std::numeric_limits<int64_t>::max();
1057
0
                }
1058
0
                else
1059
0
                {
1060
0
                  curCost = costDown;
1061
0
                }
1062
0
              }
1063
0
            }
1064
0
            else
1065
0
            {
1066
0
              curCost = rdFactor * ( - (abs(deltaU[uiBlkPos])) ) + (1<<SCALE_BITS) + rateIncUp[uiBlkPos] + sigRateDelta[uiBlkPos] ;
1067
0
              curChange = 1 ;
1068
1069
0
              if(n<firstNZPosInCG)
1070
0
              {
1071
0
                uint32_t thissignbit = (plSrcCoeff[uiBlkPos]>=0?0:1);
1072
0
                if(thissignbit != signbit )
1073
0
                {
1074
0
                  curCost = std::numeric_limits<int64_t>::max();
1075
0
                }
1076
0
              }
1077
0
            }
1078
1079
0
            if( curCost<minCostInc)
1080
0
            {
1081
0
              minCostInc = curCost;
1082
0
              finalChange = curChange;
1083
0
              minPos = uiBlkPos;
1084
0
            }
1085
0
          }
1086
1087
0
          if(piDstCoeff[minPos] == entropyCodingMaximum || piDstCoeff[minPos] == entropyCodingMinimum)
1088
0
          {
1089
0
            finalChange = -1;
1090
0
          }
1091
1092
0
          if(plSrcCoeff[minPos]>=0)
1093
0
          {
1094
0
            piDstCoeff[minPos] += finalChange ;
1095
0
          }
1096
0
          else
1097
0
          {
1098
0
            piDstCoeff[minPos] -= finalChange ;
1099
0
          }
1100
0
        }
1101
0
      }
1102
1103
0
      if(lastCG==1)
1104
0
      {
1105
0
        lastCG=0 ;
1106
0
      }
1107
0
    }
1108
1109
    // Check due to saving of last pos. Sign data hiding can change the position of last coef.
1110
0
    if( piDstCoeff[cctx.blockPos( iLastScanPos )] == 0 )
1111
0
    {
1112
0
      int scanPos = iLastScanPos - 1;
1113
0
      for( ; scanPos >= 0; scanPos-- )
1114
0
      {
1115
0
        if( piDstCoeff[cctx.blockPos( scanPos )] )
1116
0
          break;
1117
0
      }
1118
0
      iLastScanPos = scanPos;
1119
0
    }
1120
0
  }
1121
0
  tu.lastPos[compID] = iLastScanPos;
1122
0
}
1123
1124
void QuantRDOQ::rateDistOptQuantTS( TransformUnit& tu, const ComponentID compID, const CCoeffBuf& coeffs, TCoeff &absSum, const QpParam& qp, const Ctx &ctx )
1125
2.92k
{
1126
2.92k
  const FracBitsAccess& fracBits = ctx.getFracBitsAcess();
1127
1128
2.92k
  const SPS &sps            = *tu.cs->sps;
1129
2.92k
  const CompArea& rect      = tu.blocks[compID];
1130
2.92k
  const uint32_t width      = rect.width;
1131
2.92k
  const uint32_t height     = rect.height;
1132
2.92k
  const ChannelType chType  = toChannelType(compID);
1133
2.92k
  const int channelBitDepth = sps.bitDepths[ chType ];
1134
1135
2.92k
  const int  maxLog2TrDynamicRange = sps.getMaxLog2TrDynamicRange();
1136
1137
2.92k
  const int transformShift = getTransformShift( channelBitDepth, rect.size(), maxLog2TrDynamicRange );
1138
1139
2.92k
  const uint32_t maxNumCoeff                        = rect.area();
1140
1141
2.92k
  CHECK( compID >= MAX_NUM_TBLOCKS, "Invalid component ID" );
1142
1143
2.92k
  int scalingListType = getScalingListType( tu.cu->predMode, compID );
1144
2.92k
  CHECK( scalingListType >= SCALING_LIST_NUM, "Invalid scaling list" );
1145
1146
2.92k
  const TCoeff    *srcCoeff = coeffs.buf;
1147
2.92k
        TCoeffSig *dstCoeff = tu.getCoeffs( compID ).buf;
1148
1149
2.92k
  double *costCoeff  = m_pdCostCoeff;
1150
2.92k
  double *costSig    = m_pdCostSig;
1151
2.92k
  double *costCoeff0 = m_pdCostCoeff0;
1152
1153
2.92k
  memset( m_pdCostCoeff,  0, sizeof( double ) *  maxNumCoeff );
1154
2.92k
  memset( m_pdCostSig,    0, sizeof( double ) *  maxNumCoeff );
1155
1156
2.92k
  m_bdpcm = 0;
1157
1158
2.92k
  const bool   needsSqrt2Scale = TU::needsSqrt2Scale( tu, compID );  // should always be false - transform-skipped blocks don't require sqrt(2) compensation.
1159
2.92k
  const bool   isTransformSkip = tu.mtsIdx[compID]==MTS_SKIP;
1160
2.92k
  const int    qBits = QUANT_SHIFT + qp.per(isTransformSkip) + (isTransformSkip ? 0 : transformShift) + (needsSqrt2Scale ? -1 : 0);  // Right shift of non-RDOQ quantizer;  level = (coeff*uiQ + offset)>>q_bits
1161
2.92k
  const int    quantisationCoefficient = g_quantScales[needsSqrt2Scale?1:0][qp.rem(isTransformSkip)];
1162
2.92k
  const double errorScale              = xGetErrScaleCoeff( TU::needsSqrt2Scale(tu, compID), width, height, qp.rem(isTransformSkip), maxLog2TrDynamicRange, channelBitDepth, isTransformSkip);
1163
1164
2.92k
  const TCoeff entropyCodingMaximum = ( 1 << maxLog2TrDynamicRange ) - 1;
1165
1166
2.92k
  uint32_t coeffLevels[3];
1167
2.92k
  double   coeffLevelError[4];
1168
1169
2.92k
  CoeffCodingContext cctx( tu, compID, tu.cs->slice->signDataHidingEnabled );
1170
2.92k
  const int sbSizeM1    = ( 1 << cctx.log2CGSize() ) - 1;
1171
2.92k
  double    baseCost    = 0;
1172
2.92k
  uint32_t  goRiceParam = 0;
1173
1174
2.92k
  double *costSigSubBlock = m_pdCostCoeffGroupSig;
1175
2.92k
  memset( costSigSubBlock, 0, ( maxNumCoeff >> cctx.log2CGSize() ) * sizeof( double ) );
1176
1177
2.92k
  const int sbNum = width * height >> cctx.log2CGSize();
1178
2.92k
  int scanPos;
1179
2.92k
  coeffGroupRDStats rdStats;
1180
1181
2.92k
  bool anySigCG = false;
1182
1183
2.92k
  int maxCtxBins = (cctx.maxNumCoeff() * 7) >> 2;
1184
2.92k
  cctx.remRegBins = maxCtxBins;
1185
1186
33.0k
  for( int sbId = 0; sbId < sbNum; sbId++ )
1187
30.1k
  {
1188
30.1k
    cctx.initSubblock( sbId );
1189
1190
30.1k
    int noCoeffCoded = 0;
1191
30.1k
    baseCost = 0.0;
1192
30.1k
    memset( &rdStats, 0, sizeof (coeffGroupRDStats));
1193
1194
30.1k
    rdStats.iNumSbbCtxBins = 0;
1195
1196
511k
    for( int scanPosInSB = 0; scanPosInSB <= sbSizeM1; scanPosInSB++ )
1197
481k
    {
1198
481k
      int lastPosCoded = sbSizeM1;
1199
481k
      scanPos = cctx.minSubPos() + scanPosInSB;
1200
      //===== quantization =====
1201
481k
      uint32_t blkPos = cctx.blockPos( scanPos );
1202
1203
      // set coeff
1204
481k
      const int64_t          tmpLevel    = int64_t( abs( srcCoeff[blkPos] ) ) * quantisationCoefficient;
1205
481k
      const Intermediate_Int levelDouble = (Intermediate_Int)std::min<int64_t>( tmpLevel, std::numeric_limits<Intermediate_Int>::max() - ( Intermediate_Int( 1 ) << ( qBits - 1 ) ) );
1206
1207
481k
      uint32_t roundAbsLevel = std::min<uint32_t>(uint32_t(entropyCodingMaximum), uint32_t((levelDouble + (Intermediate_Int(1) << (qBits - 1))) >> qBits));
1208
481k
      uint32_t minAbsLevel = (roundAbsLevel > 1 ? roundAbsLevel - 1 : 1);
1209
1210
481k
      uint32_t downAbsLevel = std::min<uint32_t>(uint32_t(entropyCodingMaximum), uint32_t(levelDouble >> qBits));
1211
481k
      uint32_t upAbsLevel = std::min<uint32_t>(uint32_t(entropyCodingMaximum), downAbsLevel + 1);
1212
1213
481k
      m_testedLevels = 0;
1214
481k
      coeffLevels[m_testedLevels++] = roundAbsLevel;
1215
1216
481k
      if (minAbsLevel != roundAbsLevel)
1217
481k
        coeffLevels[m_testedLevels++] = minAbsLevel;
1218
1219
481k
      int rightPixel, belowPixel, predPixel;
1220
1221
481k
      cctx.neighTS(rightPixel, belowPixel, scanPos, dstCoeff);
1222
481k
      predPixel = cctx.deriveModCoeff(rightPixel, belowPixel, upAbsLevel, 0);
1223
1224
481k
      if (upAbsLevel != roundAbsLevel && upAbsLevel != minAbsLevel && predPixel == 1)
1225
0
        coeffLevels[m_testedLevels++] = upAbsLevel;
1226
1227
481k
      double dErr = double(levelDouble);
1228
481k
      coeffLevelError[0] = dErr * dErr * errorScale;
1229
1230
481k
      costCoeff0[scanPos] = coeffLevelError[0];
1231
481k
      dstCoeff[blkPos]    = coeffLevels[0];
1232
1233
      //===== coefficient level estimation =====
1234
481k
            unsigned    ctxIdSig = cctx.sigCtxIdAbsTS( scanPos, dstCoeff );
1235
481k
            uint32_t    cLevel;
1236
481k
      const BinFracBits fracBitsPar = fracBits.getFracBitsArray( cctx.parityCtxIdAbsTS() );
1237
1238
      //goRiceParam = cctx.templateAbsSumTS( scanPos, dstCoeff );
1239
481k
      goRiceParam = 1;
1240
481k
      unsigned ctxIdSign = cctx.signCtxIdAbsTS(scanPos, dstCoeff, 0);
1241
481k
      const BinFracBits fracBitsSign = fracBits.getFracBitsArray(ctxIdSign);
1242
481k
      const uint8_t     sign         = srcCoeff[ blkPos ] < 0 ? 1 : 0;
1243
1244
481k
      DTRACE_COND( ( coeffLevels[0] != 0 ), g_trace_ctx, D_RDOQ_MORE, " uiCtxSig=%d", ctxIdSig );
1245
1246
481k
      unsigned gt1CtxId = cctx.lrg1CtxIdAbsTS(scanPos, dstCoeff, 0);
1247
481k
      const BinFracBits fracBitsGr1 = fracBits.getFracBitsArray(gt1CtxId);
1248
1249
481k
      const BinFracBits fracBitsSig = fracBits.getFracBitsArray( ctxIdSig );
1250
481k
      bool lastCoeff = false; //
1251
481k
      if (scanPosInSB == lastPosCoded && noCoeffCoded == 0)
1252
30.1k
      {
1253
30.1k
        lastCoeff = true;
1254
30.1k
      }
1255
481k
      int numUsedCtxBins = 0;
1256
481k
      cLevel = xGetCodedLevelTSPred(costCoeff[scanPos], costCoeff0[scanPos], costSig[scanPos], levelDouble, qBits, errorScale, coeffLevels, coeffLevelError,
1257
481k
                                    &fracBitsSig, fracBitsPar, cctx, fracBits, fracBitsSign, fracBitsGr1, sign, rightPixel, belowPixel, goRiceParam, lastCoeff, maxLog2TrDynamicRange, numUsedCtxBins);
1258
1259
481k
      cctx.remRegBins -= numUsedCtxBins;
1260
481k
      rdStats.iNumSbbCtxBins += numUsedCtxBins;
1261
1262
481k
      if (cLevel > 0)
1263
0
      {
1264
0
        noCoeffCoded++;
1265
0
      }
1266
1267
481k
      TCoeff level = cLevel;
1268
481k
      dstCoeff[blkPos] = (level != 0 && srcCoeff[blkPos] < 0) ? -level : level;
1269
481k
      baseCost           += costCoeff[ scanPos ];
1270
481k
      rdStats.d64SigCost += costSig[ scanPos ];
1271
1272
481k
      if( dstCoeff[ blkPos ] )
1273
0
      {
1274
0
        cctx.setSigGroup();
1275
0
        rdStats.d64CodedLevelandDist += costCoeff [ scanPos ] - costSig[ scanPos ];
1276
0
        rdStats.d64UncodedDist       += costCoeff0[ scanPos ];
1277
0
      }
1278
481k
    } //end for (iScanPosinCG)
1279
1280
30.1k
    if( !cctx.isSigGroup() )
1281
30.1k
    {
1282
30.1k
      const BinFracBits fracBitsSigGroup = fracBits.getFracBitsArray( cctx.sigGroupCtxId( true ) );
1283
30.1k
      baseCost += xGetRateSigCoeffGroup( fracBitsSigGroup, 0 ) - rdStats.d64SigCost;
1284
30.1k
      costSigSubBlock[cctx.subSetId()] = xGetRateSigCoeffGroup( fracBitsSigGroup, 0 );
1285
30.1k
      cctx.remRegBins += rdStats.iNumSbbCtxBins; // skip sub-block
1286
30.1k
    }
1287
0
    else if( sbId != sbNum - 1 || anySigCG )
1288
0
    {
1289
      // rd-cost if SigCoeffGroupFlag = 0, initialization
1290
0
      double costZeroSB = baseCost;
1291
1292
0
      const BinFracBits fracBitsSigGroup = fracBits.getFracBitsArray( cctx.sigGroupCtxId( true ) );
1293
1294
0
      baseCost   += xGetRateSigCoeffGroup( fracBitsSigGroup, 1 );
1295
0
      costZeroSB += xGetRateSigCoeffGroup( fracBitsSigGroup, 0 );
1296
0
      costSigSubBlock[ cctx.subSetId() ] = xGetRateSigCoeffGroup( fracBitsSigGroup, 1 );
1297
1298
0
      costZeroSB += rdStats.d64UncodedDist;         // distortion for resetting non-zero levels to zero levels
1299
0
      costZeroSB -= rdStats.d64CodedLevelandDist;   // distortion and level cost for keeping all non-zero levels
1300
0
      costZeroSB -= rdStats.d64SigCost;             // sig cost for all coeffs, including zero levels and non-zerl levels
1301
1302
0
      if( costZeroSB < baseCost )
1303
0
      {
1304
0
        cctx.resetSigGroup();
1305
0
        baseCost = costZeroSB;
1306
0
        costSigSubBlock[ cctx.subSetId() ] = xGetRateSigCoeffGroup( fracBitsSigGroup, 0 );
1307
0
        cctx.remRegBins += rdStats.iNumSbbCtxBins; // skip sub-block
1308
1309
0
        for( int scanPosInSB = 0; scanPosInSB <= sbSizeM1; scanPosInSB++ )
1310
0
        {
1311
0
          scanPos = cctx.minSubPos() + scanPosInSB;
1312
0
          uint32_t blkPos = cctx.blockPos( scanPos );
1313
1314
0
          if( dstCoeff[ blkPos ] )
1315
0
          {
1316
0
            dstCoeff[ blkPos ] = 0;
1317
0
            costCoeff[ scanPos ] = costCoeff0[ scanPos ];
1318
0
            costSig[ scanPos] = 0;
1319
0
          }
1320
0
        }
1321
0
      }
1322
0
      else
1323
0
      {
1324
0
        anySigCG = true;
1325
0
      }
1326
0
    }
1327
30.1k
  }
1328
1329
  //===== estimate last position =====
1330
484k
  for( int scanPos = 0; scanPos < maxNumCoeff; scanPos++ )
1331
481k
  {
1332
481k
    int blkPos = cctx.blockPos( scanPos );
1333
481k
    TCoeff level = dstCoeff[ blkPos ];
1334
481k
    absSum += abs(level);
1335
481k
  }
1336
2.92k
}
1337
1338
void QuantRDOQ::forwardRDPCM( TransformUnit& tu, const ComponentID compID, const CCoeffBuf& coeffs, TCoeff &absSum, const QpParam& qp, const Ctx &ctx )
1339
95.8k
{
1340
95.8k
  const FracBitsAccess& fracBits = ctx.getFracBitsAcess();
1341
1342
95.8k
  const SPS &sps = *tu.cs->sps;
1343
95.8k
  const CompArea& rect = tu.blocks[compID];
1344
95.8k
  const uint32_t width = rect.width;
1345
95.8k
  const uint32_t height = rect.height;
1346
95.8k
  const ChannelType chType = toChannelType(compID);
1347
95.8k
  const int channelBitDepth = sps.bitDepths[chType];
1348
1349
95.8k
  const int  maxLog2TrDynamicRange = sps.getMaxLog2TrDynamicRange();
1350
95.8k
  const int  dirMode = tu.cu->bdpcmM[toChannelType(compID)];
1351
1352
95.8k
  const int transformShift = getTransformShift(channelBitDepth, rect.size(), maxLog2TrDynamicRange);
1353
1354
95.8k
  const uint32_t maxNumCoeff = rect.area();
1355
1356
95.8k
  CHECK(compID >= MAX_NUM_TBLOCKS, "Invalid component ID");
1357
1358
95.8k
  int scalingListType = getScalingListType(tu.cu->predMode, compID);
1359
95.8k
  CHECK(scalingListType >= SCALING_LIST_NUM, "Invalid scaling list");
1360
1361
95.8k
  const TCoeff    *srcCoeff = coeffs.buf;
1362
95.8k
        TCoeffSig *dstCoeff = tu.getCoeffs(compID).buf;
1363
1364
95.8k
  double *costCoeff = m_pdCostCoeff;
1365
95.8k
  double *costSig = m_pdCostSig;
1366
95.8k
  double *costCoeff0 = m_pdCostCoeff0;
1367
1368
95.8k
  memset(m_pdCostCoeff, 0, sizeof(double) *  maxNumCoeff);
1369
95.8k
  memset(m_pdCostSig, 0, sizeof(double) *  maxNumCoeff);
1370
95.8k
  memset(m_fullCoeff, 0, sizeof(TCoeff) * maxNumCoeff);
1371
1372
95.8k
  m_bdpcm = dirMode;
1373
1374
95.8k
  const bool   needsSqrt2Scale = TU::needsSqrt2Scale(tu, compID);  // should always be false - transform-skipped blocks don't require sqrt(2) compensation.
1375
95.8k
  const bool   isTransformSkip = tu.mtsIdx[compID]==MTS_SKIP;
1376
95.8k
  const int    qBits = QUANT_SHIFT + qp.per(isTransformSkip) + (isTransformSkip? 0 : transformShift) + ( needsSqrt2Scale ? -1 : 0);  // Right shift of non-RDOQ quantizer;  level = (coeff*uiQ + offset)>>q_bits
1377
95.8k
  const int    quantisationCoefficient = g_quantScales[needsSqrt2Scale ? 1 : 0][qp.rem(isTransformSkip)];
1378
95.8k
  const double errorScale = xGetErrScaleCoeff(TU::needsSqrt2Scale(tu, compID), width, height, qp.rem(isTransformSkip), maxLog2TrDynamicRange, channelBitDepth, isTransformSkip);
1379
1380
95.8k
  TrQuantParams trQuantParams;
1381
95.8k
  trQuantParams.rightShift = (IQUANT_SHIFT - ((isTransformSkip ? 0 : transformShift) + qp.per(isTransformSkip)));
1382
95.8k
  trQuantParams.qScale = g_invQuantScales[needsSqrt2Scale ? 1 : 0][qp.rem(isTransformSkip)];
1383
1384
95.8k
  const TCoeff entropyCodingMaximum = (1 << maxLog2TrDynamicRange) - 1;
1385
1386
95.8k
  uint32_t coeffLevels[3];
1387
95.8k
  double   coeffLevelError[4];
1388
1389
95.8k
  CoeffCodingContext cctx(tu, compID, tu.cs->slice->signDataHidingEnabled);
1390
95.8k
  const int sbSizeM1 = (1 << cctx.log2CGSize()) - 1;
1391
95.8k
  double    baseCost = 0;
1392
95.8k
  uint32_t  goRiceParam = 0;
1393
1394
95.8k
  double *costSigSubBlock = m_pdCostCoeffGroupSig;
1395
95.8k
  memset(costSigSubBlock, 0, (maxNumCoeff >> cctx.log2CGSize()) * sizeof(double));
1396
1397
95.8k
  const int sbNum = width * height >> cctx.log2CGSize();
1398
95.8k
  int scanPos;
1399
95.8k
  coeffGroupRDStats rdStats;
1400
1401
95.8k
  bool anySigCG = false;
1402
1403
95.8k
  int maxCtxBins = (cctx.maxNumCoeff() * 7) >> 2;
1404
95.8k
  cctx.remRegBins = maxCtxBins;
1405
1406
699k
  for (int sbId = 0; sbId < sbNum; sbId++)
1407
603k
  {
1408
603k
    cctx.initSubblock(sbId);
1409
1410
603k
    int noCoeffCoded = 0;
1411
603k
    baseCost = 0.0;
1412
603k
    memset(&rdStats, 0, sizeof(coeffGroupRDStats));
1413
603k
    rdStats.iNumSbbCtxBins = 0;
1414
1415
10.2M
    for (int scanPosInSB = 0; scanPosInSB <= sbSizeM1; scanPosInSB++)
1416
9.65M
    {
1417
9.65M
      int lastPosCoded = sbSizeM1;
1418
9.65M
      scanPos = cctx.minSubPos() + scanPosInSB;
1419
      //===== quantization =====
1420
9.65M
      uint32_t blkPos = cctx.blockPos(scanPos);
1421
1422
9.65M
      const int posX = cctx.posX(scanPos);
1423
9.65M
      const int posY = cctx.posY(scanPos);
1424
9.65M
      const int posS = (1 == dirMode) ? posX : posY;
1425
9.65M
      const int posNb = (1 == dirMode) ? (posX - 1) + posY * coeffs.stride : posX + (posY - 1) * coeffs.stride;
1426
9.65M
      TCoeff predCoeff = (0 != posS) ? m_fullCoeff[posNb] : 0;
1427
1428
      // set coeff
1429
9.65M
      const int64_t          tmpLevel = int64_t(abs(srcCoeff[blkPos] - predCoeff)) * quantisationCoefficient;
1430
9.65M
      const Intermediate_Int levelDouble = (Intermediate_Int)std::min<int64_t>(tmpLevel, std::numeric_limits<Intermediate_Int>::max() - (Intermediate_Int(1) << (qBits - 1)));
1431
9.65M
      uint32_t roundAbsLevel = std::min<uint32_t>(uint32_t(entropyCodingMaximum), uint32_t((levelDouble + (Intermediate_Int(1) << (qBits - 1))) >> qBits));
1432
9.65M
      uint32_t minAbsLevel = (roundAbsLevel > 1 ? roundAbsLevel - 1 : 1);
1433
1434
9.65M
      m_testedLevels = 0;
1435
9.65M
      coeffLevels[m_testedLevels++] = roundAbsLevel;
1436
1437
9.65M
      if (minAbsLevel != roundAbsLevel)
1438
9.18M
        coeffLevels[m_testedLevels++] = minAbsLevel;
1439
1440
9.65M
      double dErr = double(levelDouble);
1441
9.65M
      coeffLevelError[0]  = dErr * dErr * errorScale;
1442
1443
9.65M
      costCoeff0[scanPos] = coeffLevelError[0];
1444
9.65M
      dstCoeff[blkPos]    = coeffLevels[0];
1445
1446
      //===== coefficient level estimation =====
1447
9.65M
      unsigned    ctxIdSig = cctx.sigCtxIdAbsTS(scanPos, dstCoeff);
1448
9.65M
      uint32_t    cLevel;
1449
9.65M
      const BinFracBits fracBitsPar = fracBits.getFracBitsArray(cctx.parityCtxIdAbsTS());
1450
1451
      //goRiceParam = cctx.templateAbsSumTS(scanPos, dstCoeff);
1452
9.65M
      goRiceParam = 1;
1453
9.65M
      unsigned ctxIdSign = cctx.signCtxIdAbsTS(scanPos, dstCoeff, dirMode);
1454
9.65M
      const BinFracBits fracBitsSign = fracBits.getFracBitsArray(ctxIdSign);
1455
9.65M
      const uint8_t     sign = srcCoeff[blkPos] - predCoeff < 0 ? 1 : 0;
1456
9.65M
      unsigned gt1CtxId = cctx.lrg1CtxIdAbsTS(scanPos, dstCoeff, dirMode);
1457
9.65M
      const BinFracBits fracBitsGr1 = fracBits.getFracBitsArray(gt1CtxId);
1458
1459
9.65M
      DTRACE_COND((dstCoeff[blkPos] != 0), g_trace_ctx, D_RDOQ_MORE, " uiCtxSig=%d", ctxIdSig);
1460
1461
9.65M
      const BinFracBits fracBitsSig = fracBits.getFracBitsArray(ctxIdSig);
1462
9.65M
      bool lastCoeff = false; //
1463
9.65M
      if (scanPosInSB == lastPosCoded && noCoeffCoded == 0)
1464
493k
      {
1465
493k
        lastCoeff = true;
1466
493k
      }
1467
9.65M
      int rightPixel, belowPixel;
1468
9.65M
      cctx.neighTS(rightPixel, belowPixel, scanPos, dstCoeff);
1469
9.65M
      int numUsedCtxBins = 0;
1470
9.65M
      cLevel = xGetCodedLevelTSPred(costCoeff[scanPos], costCoeff0[scanPos], costSig[scanPos], levelDouble, qBits, errorScale, coeffLevels, coeffLevelError,
1471
9.65M
        &fracBitsSig, fracBitsPar, cctx, fracBits, fracBitsSign, fracBitsGr1, sign, rightPixel, belowPixel, goRiceParam, lastCoeff, maxLog2TrDynamicRange, numUsedCtxBins);
1472
9.65M
      cctx.remRegBins -= numUsedCtxBins;
1473
9.65M
      rdStats.iNumSbbCtxBins += numUsedCtxBins;
1474
1475
9.65M
      if (cLevel > 0)
1476
464k
      {
1477
464k
        noCoeffCoded++;
1478
464k
      }
1479
9.65M
      dstCoeff[blkPos] = cLevel;
1480
1481
9.65M
      if (sign)
1482
3.43M
      {
1483
3.43M
        dstCoeff[blkPos] = -dstCoeff[blkPos];
1484
3.43M
      }
1485
1486
9.65M
      xDequantSample( m_fullCoeff[blkPos], dstCoeff[blkPos], trQuantParams );
1487
9.65M
      m_fullCoeff[blkPos] += predCoeff;
1488
1489
9.65M
      baseCost += costCoeff[scanPos];
1490
9.65M
      rdStats.d64SigCost += costSig[scanPos];
1491
1492
9.65M
      if (dstCoeff[blkPos])
1493
464k
      {
1494
464k
        cctx.setSigGroup();
1495
464k
        rdStats.d64CodedLevelandDist += costCoeff[scanPos] - costSig[scanPos];
1496
464k
        rdStats.d64UncodedDist += costCoeff0[scanPos];
1497
464k
      }
1498
9.65M
    } //end for (iScanPosinCG)
1499
1500
603k
    if (!cctx.isSigGroup())
1501
480k
    {
1502
480k
      const BinFracBits fracBitsSigGroup = fracBits.getFracBitsArray(cctx.sigGroupCtxId(true));
1503
480k
      baseCost += xGetRateSigCoeffGroup(fracBitsSigGroup, 0) - rdStats.d64SigCost;
1504
480k
      costSigSubBlock[cctx.subSetId()] = xGetRateSigCoeffGroup(fracBitsSigGroup, 0);
1505
480k
      cctx.remRegBins += rdStats.iNumSbbCtxBins; // skip sub-block
1506
480k
    }
1507
122k
    else if (sbId != sbNum - 1 || anySigCG)
1508
117k
    {
1509
      // rd-cost if SigCoeffGroupFlag = 0, initialization
1510
117k
      double costZeroSB = baseCost;
1511
1512
117k
      const BinFracBits fracBitsSigGroup = fracBits.getFracBitsArray(cctx.sigGroupCtxId(true));
1513
1514
117k
      baseCost += xGetRateSigCoeffGroup(fracBitsSigGroup, 1);
1515
117k
      costZeroSB += xGetRateSigCoeffGroup(fracBitsSigGroup, 0);
1516
117k
      costSigSubBlock[cctx.subSetId()] = xGetRateSigCoeffGroup(fracBitsSigGroup, 1);
1517
1518
117k
      costZeroSB += rdStats.d64UncodedDist;         // distortion for resetting non-zero levels to zero levels
1519
117k
      costZeroSB -= rdStats.d64CodedLevelandDist;   // distortion and level cost for keeping all non-zero levels
1520
117k
      costZeroSB -= rdStats.d64SigCost;             // sig cost for all coeffs, including zero levels and non-zerl levels
1521
1522
117k
      if (costZeroSB < baseCost)
1523
12.4k
      {
1524
12.4k
        cctx.resetSigGroup();
1525
12.4k
        baseCost = costZeroSB;
1526
12.4k
        costSigSubBlock[cctx.subSetId()] = xGetRateSigCoeffGroup(fracBitsSigGroup, 0);
1527
12.4k
        cctx.remRegBins += rdStats.iNumSbbCtxBins; // skip sub-block
1528
1529
210k
        for (int scanPosInSB = 0; scanPosInSB <= sbSizeM1; scanPosInSB++)
1530
198k
        {
1531
198k
          scanPos = cctx.minSubPos() + scanPosInSB;
1532
198k
          uint32_t blkPos = cctx.blockPos(scanPos);
1533
1534
198k
          const int posX = cctx.posX(scanPos);
1535
198k
          const int posY = cctx.posY(scanPos);
1536
198k
          const int posS = (1 == dirMode) ? posX : posY;
1537
198k
          const int posNb = (1 == dirMode) ? (posX - 1) + posY * coeffs.stride : posX + (posY - 1) * coeffs.stride;
1538
198k
          m_fullCoeff[scanPos] = (0 != posS) ? m_fullCoeff[posNb] : 0;
1539
1540
198k
          if (dstCoeff[blkPos])
1541
12.7k
          {
1542
12.7k
            dstCoeff[blkPos] = 0;
1543
12.7k
            costCoeff[scanPos] = costCoeff0[scanPos];
1544
12.7k
            costSig[scanPos] = 0;
1545
12.7k
          }
1546
198k
        }
1547
12.4k
      }
1548
105k
      else
1549
105k
      {
1550
105k
        anySigCG = true;
1551
105k
      }
1552
117k
    }
1553
603k
  }
1554
1555
  //===== estimate last position =====
1556
9.74M
  for (int scanPos = 0; scanPos < maxNumCoeff; scanPos++)
1557
9.65M
  {
1558
9.65M
    int blkPos = cctx.blockPos(scanPos);
1559
9.65M
    TCoeff level = dstCoeff[blkPos];
1560
9.65M
    absSum += abs(level);
1561
9.65M
  }
1562
95.8k
}
1563
1564
void QuantRDOQ::xDequantSample(TCoeff& pRes, TCoeffSig& coeff, const TrQuantParams& trQuantParams)
1565
9.65M
{
1566
  // xDequant
1567
9.65M
  if (trQuantParams.rightShift > 0)
1568
6.87M
  {
1569
6.87M
    const Intermediate_Int qAdd = Intermediate_Int(1) << (trQuantParams.rightShift - 1);
1570
6.87M
    pRes = TCoeff((Intermediate_Int(coeff) * trQuantParams.qScale + qAdd) >> trQuantParams.rightShift);
1571
6.87M
  }
1572
2.77M
  else
1573
2.77M
  {
1574
2.77M
    pRes = TCoeff((Intermediate_Int(coeff) * trQuantParams.qScale) *(1<< -trQuantParams.rightShift));
1575
2.77M
  }
1576
9.65M
}
1577
1578
inline uint32_t QuantRDOQ::xGetCodedLevelTSPred(double&            rd64CodedCost,
1579
  double&            rd64CodedCost0,
1580
  double&            rd64CodedCostSig,
1581
  Intermediate_Int    levelDouble,
1582
  int                 qBits,
1583
  double              errorScale,
1584
  uint32_t coeffLevels[],
1585
  double coeffLevelError[],
1586
  const BinFracBits* fracBitsSig,
1587
  const BinFracBits& fracBitsPar,
1588
  CoeffCodingContext& cctx,
1589
  const FracBitsAccess& fracBitsAccess,
1590
  const BinFracBits& fracBitsSign,
1591
  const BinFracBits& fracBitsGt1,
1592
  const uint8_t      sign,
1593
  int                rightPixel,
1594
  int                belowPixel,
1595
  uint16_t           ricePar,
1596
  bool               isLast,
1597
  const int          maxLog2TrDynamicRange,
1598
  int&               numUsedCtxBins
1599
) const
1600
10.1M
{
1601
10.1M
  double currCostSig = 0;
1602
10.1M
  uint32_t   bestAbsLevel = 0;
1603
10.1M
  numUsedCtxBins = 0;
1604
10.1M
  int numBestCtxBin = 0;
1605
10.1M
  if (!isLast && coeffLevels[0] < 3)
1606
9.22M
  {
1607
9.22M
    if (cctx.remRegBins >= 4)
1608
9.05M
    rd64CodedCostSig = xGetRateSigCoef(*fracBitsSig, 0);
1609
163k
    else
1610
163k
      rd64CodedCostSig = xGetICost(1 << SCALE_BITS);
1611
9.22M
    rd64CodedCost = rd64CodedCost0 + rd64CodedCostSig;
1612
9.22M
    if (cctx.remRegBins >= 4)
1613
9.05M
      numUsedCtxBins++;
1614
9.22M
    if (coeffLevels[0] == 0)
1615
8.71M
    {
1616
8.71M
      return bestAbsLevel;
1617
8.71M
    }
1618
9.22M
  }
1619
913k
  else
1620
913k
  {
1621
913k
    rd64CodedCost = MAX_DOUBLE;
1622
913k
  }
1623
1624
1.41M
  if (!isLast)
1625
892k
  {
1626
892k
    if (cctx.remRegBins >= 4)
1627
853k
      currCostSig = xGetRateSigCoef(*fracBitsSig, 1);
1628
39.6k
    else
1629
39.6k
      currCostSig = xGetICost(1 << SCALE_BITS);
1630
892k
    if (coeffLevels[0] >= 3 && cctx.remRegBins >= 4)
1631
362k
      numUsedCtxBins++;
1632
892k
  }
1633
1634
3.78M
  for (int errorInd = 1; errorInd <= m_testedLevels; errorInd++)
1635
2.36M
  {
1636
2.36M
    int absLevel = coeffLevels[errorInd - 1];
1637
2.36M
    double dErr = 0.0;
1638
2.36M
    dErr = double(levelDouble - (Intermediate_Int(absLevel) << qBits));
1639
2.36M
    coeffLevelError[errorInd] = dErr * dErr * errorScale;
1640
2.36M
    int modAbsLevel = absLevel;
1641
2.36M
    if (cctx.remRegBins >= 4) 
1642
2.29M
    {
1643
2.29M
      modAbsLevel = cctx.deriveModCoeff(rightPixel, belowPixel, absLevel, m_bdpcm);
1644
2.29M
    }
1645
2.36M
    int numCtxBins = 0;
1646
2.36M
    double dCurrCost = coeffLevelError[errorInd] + xGetICost(xGetICRateTS(modAbsLevel, fracBitsPar, cctx, fracBitsAccess, fracBitsSign, fracBitsGt1, numCtxBins, sign, ricePar, maxLog2TrDynamicRange));
1647
1648
2.36M
    if (cctx.remRegBins >= 4)
1649
2.29M
      dCurrCost += currCostSig; // if cctx.numCtxBins < 4, xGetICRateTS return rate including sign cost. dont need to add any more
1650
1651
2.36M
    if (dCurrCost < rd64CodedCost)
1652
1.02M
    {
1653
1.02M
      bestAbsLevel = absLevel;
1654
1.02M
      rd64CodedCost = dCurrCost;
1655
1.02M
      rd64CodedCostSig = currCostSig;
1656
1.02M
      numBestCtxBin = numCtxBins;
1657
1.02M
    }
1658
2.36M
  }
1659
1.41M
  numUsedCtxBins += numBestCtxBin;
1660
1.41M
  return bestAbsLevel;
1661
10.1M
}
1662
1663
inline int QuantRDOQ::xGetICRateTS( const uint32_t            absLevel,
1664
                                    const BinFracBits&        fracBitsPar,
1665
                                    const CoeffCodingContext& cctx,
1666
                                    const FracBitsAccess&     fracBitsAccess,
1667
                                    const BinFracBits&        fracBitsSign,
1668
                                    const BinFracBits&        fracBitsGt1,
1669
                                    int&                      numCtxBins,
1670
                                    const uint8_t             sign,
1671
                                    const uint16_t            ricePar,
1672
                                    const int                 maxLog2TrDynamicRange  ) const
1673
2.36M
{
1674
 
1675
2.36M
  if (cctx.remRegBins < 4) // Full by-pass coding 
1676
73.4k
  {
1677
73.4k
    int rate = absLevel ? (1 << SCALE_BITS) : 0; // 1 bit to signal sign of non-zero 
1678
1679
73.4k
    uint32_t symbol = absLevel;
1680
1681
73.4k
    uint32_t length;
1682
73.4k
    const int threshold = COEF_REMAIN_BIN_REDUCTION;
1683
73.4k
    if (symbol < (threshold << ricePar))
1684
37.5k
    {
1685
37.5k
      length = symbol >> ricePar;
1686
37.5k
      rate += (length + 1 + ricePar) << SCALE_BITS;
1687
37.5k
    }
1688
35.8k
    else
1689
35.8k
    {
1690
35.8k
      length = ricePar;
1691
35.8k
      symbol = symbol - (threshold << ricePar);
1692
159k
      while (symbol >= (1 << length))
1693
123k
      {
1694
123k
        symbol -= (1 << (length++));
1695
123k
      }
1696
35.8k
      rate += (threshold + length + 1 - ricePar + length) << SCALE_BITS;
1697
35.8k
    }
1698
1699
73.4k
    return rate;
1700
73.4k
  }
1701
1702
2.29M
  else if (cctx.remRegBins >= 4 && cctx.remRegBins < 8) // First pass context coding and all by-pass coding ( Sign flag is not counted here)
1703
13.3k
  {
1704
13.3k
    int rate = fracBitsSign.intBits[sign]; // sign bits
1705
13.3k
    if (absLevel)
1706
12.7k
      numCtxBins++;
1707
1708
13.3k
    if (absLevel > 1)
1709
9.05k
    {
1710
9.05k
      rate += fracBitsGt1.intBits[1];
1711
9.05k
      rate += fracBitsPar.intBits[(absLevel - 2) & 1];
1712
1713
9.05k
      numCtxBins += 2;
1714
1715
9.05k
      int cutoffVal = 2;
1716
1717
9.05k
      if (absLevel >= cutoffVal)
1718
9.05k
      {
1719
9.05k
        uint32_t symbol = (absLevel - cutoffVal) >> 1;
1720
9.05k
        uint32_t length;
1721
9.05k
        const int threshold = COEF_REMAIN_BIN_REDUCTION;
1722
9.05k
        if (symbol < (threshold << ricePar))
1723
6.40k
        {
1724
6.40k
          length = symbol >> ricePar;
1725
6.40k
          rate += (length + 1 + ricePar) << SCALE_BITS;
1726
6.40k
        }
1727
2.64k
        else
1728
2.64k
        {
1729
2.64k
          length = ricePar;
1730
2.64k
          symbol = symbol - (threshold << ricePar);
1731
10.8k
          while (symbol >= (1 << length))
1732
8.21k
          {
1733
8.21k
            symbol -= (1 << (length++));
1734
8.21k
          }
1735
2.64k
          rate += (threshold + length + 1 - ricePar + length) << SCALE_BITS;
1736
2.64k
        }
1737
9.05k
      }
1738
9.05k
    }
1739
4.24k
    else if (absLevel == 1)
1740
3.68k
    {
1741
3.68k
      rate += fracBitsGt1.intBits[0];
1742
3.68k
      numCtxBins++;
1743
3.68k
    }
1744
559
    else
1745
559
    {
1746
559
      rate = 0;
1747
559
    }
1748
13.3k
    return rate;
1749
13.3k
  }
1750
    
1751
2.28M
  int rate = fracBitsSign.intBits[sign];
1752
1753
2.28M
  if (absLevel)
1754
1.77M
    numCtxBins++;
1755
1756
2.28M
  if( absLevel > 1 )
1757
765k
  {
1758
765k
    rate += fracBitsGt1.intBits[1];
1759
765k
    rate += fracBitsPar.intBits[( absLevel - 2 ) & 1];
1760
765k
    numCtxBins += 2;
1761
1762
765k
          int cutoffVal = 2;
1763
765k
    const int numGtBins = 4;
1764
3.82M
    for( int i = 0; i < numGtBins; i++ )
1765
3.06M
    {
1766
3.06M
      if( absLevel >= cutoffVal )
1767
2.17M
      {
1768
2.17M
        const uint16_t ctxGtX = cctx.greaterXCtxIdAbsTS( cutoffVal>>1 );
1769
2.17M
        const BinFracBits &fracBitsGtX = fracBitsAccess.getFracBitsArray( ctxGtX );
1770
2.17M
        unsigned gtX = ( absLevel >= ( cutoffVal + 2 ) );
1771
2.17M
        rate += fracBitsGtX.intBits[gtX];
1772
2.17M
        numCtxBins++;
1773
2.17M
      }
1774
3.06M
      cutoffVal += 2;
1775
3.06M
    }
1776
1777
765k
    if( absLevel >= cutoffVal )
1778
362k
    {
1779
362k
      uint32_t symbol = ( absLevel - cutoffVal ) >> 1;
1780
362k
      uint32_t length;
1781
362k
      const int threshold = COEF_REMAIN_BIN_REDUCTION;
1782
362k
      if( symbol < ( threshold << ricePar ) )
1783
141k
      {
1784
141k
        length = symbol >> ricePar;
1785
141k
        rate  += ( length + 1 + ricePar ) << SCALE_BITS;
1786
141k
      }
1787
221k
      else
1788
221k
      {
1789
221k
        length = ricePar;
1790
221k
        symbol = symbol - ( threshold << ricePar );
1791
950k
        while( symbol >= ( 1 << length ) )
1792
729k
        {
1793
729k
          symbol -= ( 1 << ( length++ ) );
1794
729k
        }
1795
221k
        rate += ( threshold + length + 1 - ricePar + length ) << SCALE_BITS;
1796
221k
      }
1797
362k
    }
1798
765k
  }
1799
1.51M
  else if( absLevel == 1 )
1800
1.00M
  {
1801
1.00M
    rate += fracBitsGt1.intBits[0];
1802
1.00M
    numCtxBins++;
1803
1.00M
  }
1804
507k
  else
1805
507k
  {
1806
507k
    rate = 0;
1807
507k
  }
1808
2.28M
  return rate;
1809
2.36M
}
1810
1811
} // namespace vvenc
1812
1813
//! \}
1814