Coverage Report

Created: 2026-06-15 06:25

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/vvenc/source/Lib/CommonLib/QuantRDOQ.cpp
Line
Count
Source
1
/* -----------------------------------------------------------------------------
2
The copyright in this software is being made available under the Clear BSD
3
License, included below. No patent rights, trademark rights and/or 
4
other Intellectual Property Rights other than the copyrights concerning 
5
the Software are granted under this license.
6
7
The Clear BSD License
8
9
Copyright (c) 2019-2026, Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. & The VVenC Authors.
10
All rights reserved.
11
12
Redistribution and use in source and binary forms, with or without modification,
13
are permitted (subject to the limitations in the disclaimer below) provided that
14
the following conditions are met:
15
16
     * Redistributions of source code must retain the above copyright notice,
17
     this list of conditions and the following disclaimer.
18
19
     * Redistributions in binary form must reproduce the above copyright
20
     notice, this list of conditions and the following disclaimer in the
21
     documentation and/or other materials provided with the distribution.
22
23
     * Neither the name of the copyright holder nor the names of its
24
     contributors may be used to endorse or promote products derived from this
25
     software without specific prior written permission.
26
27
NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
28
THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
29
CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
31
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
32
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
33
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
34
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
35
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
36
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38
POSSIBILITY OF SUCH DAMAGE.
39
40
------------------------------------------------------------------------------------------- */
41
42
43
/** \file     QuantRDOQ.cpp
44
    \brief    transform and quantization class
45
*/
46
47
#include "QuantRDOQ.h"
48
#include "UnitTools.h"
49
#include "ContextModelling.h"
50
#include "CodingStructure.h"
51
#include "dtrace_next.h"
52
#include "dtrace_buffer.h"
53
54
#include <stdlib.h>
55
#include <memory.h>
56
57
//! \ingroup CommonLib
58
//! \{
59
60
namespace vvenc {
61
62
struct coeffGroupRDStats
63
{
64
  int    iNNZbeforePos0;
65
  double d64CodedLevelandDist; // distortion and level cost only
66
  double d64UncodedDist;    // all zero coded block distortion
67
  double d64SigCost;
68
  double d64SigCost_0;
69
 int   iNumSbbCtxBins;
70
};
71
72
73
//! \ingroup CommonLib
74
//! \{
75
76
// ====================================================================================================================
77
// Constants
78
// ====================================================================================================================
79
80
81
// ====================================================================================================================
82
// Static functions
83
// ====================================================================================================================
84
85
// ====================================================================================================================
86
// QuantRDOQ class member functions
87
// ====================================================================================================================
88
89
90
17.3k
QuantRDOQ::QuantRDOQ( const Quant* other, bool useScalingLists ) : Quant( other, useScalingLists )
91
17.3k
{
92
93
17.3k
  const QuantRDOQ *rdoq = dynamic_cast<const QuantRDOQ*>( other );
94
17.3k
  CHECK( other && !rdoq, "The RDOQ cast must be successfull!" );
95
17.3k
  xInitScalingList( rdoq );
96
17.3k
}
97
98
QuantRDOQ::~QuantRDOQ()
99
17.3k
{
100
17.3k
  xDestroyScalingList();
101
17.3k
}
102
103
104
105
106
/** Get the best level in RD sense
107
 *
108
 * \returns best quantized transform level for given scan position
109
 *
110
 * This method calculates the best quantized transform level for a given scan position.
111
 */
112
inline uint32_t QuantRDOQ::xGetCodedLevel( double&            rd64CodedCost,
113
                                       double&            rd64CodedCost0,
114
                                       double&            rd64CodedCostSig,
115
                                       Intermediate_Int   lLevelDouble,
116
                                       uint32_t               uiMaxAbsLevel,
117
                                       const BinFracBits* fracBitsSig,
118
                                       const BinFracBits& fracBitsPar,
119
                                       const BinFracBits& fracBitsGt1,
120
                                       const BinFracBits& fracBitsGt2,
121
                                       const int          remRegBins,
122
                                       unsigned           goRiceZero,
123
                                       uint16_t             ui16AbsGoRice,
124
                                       int                iQBits,
125
                                       double             errorScale,
126
                                       bool               bLast,
127
                                       const int          maxLog2TrDynamicRange
128
                                     ) const
129
0
{
130
0
  double dCurrCostSig   = 0;
131
0
  uint32_t   uiBestAbsLevel = 0;
132
133
0
  if( !bLast && uiMaxAbsLevel < 3 )
134
0
  {
135
0
    rd64CodedCostSig    = xGetRateSigCoef( *fracBitsSig, 0 );
136
0
    rd64CodedCost       = rd64CodedCost0 + rd64CodedCostSig;
137
0
    if( uiMaxAbsLevel == 0 )
138
0
    {
139
0
      return uiBestAbsLevel;
140
0
    }
141
0
  }
142
0
  else
143
0
  {
144
0
    rd64CodedCost       = MAX_DOUBLE;
145
0
  }
146
147
0
  if( !bLast )
148
0
  {
149
0
    dCurrCostSig        = xGetRateSigCoef( *fracBitsSig, 1 );
150
0
  }
151
152
0
  uint32_t uiMinAbsLevel    = ( uiMaxAbsLevel > 1 ? uiMaxAbsLevel - 1 : 1 );
153
0
  for( int uiAbsLevel  = uiMaxAbsLevel; uiAbsLevel >= uiMinAbsLevel ; uiAbsLevel-- )
154
0
  {
155
0
    double dErr         = double( lLevelDouble  - ( Intermediate_Int(uiAbsLevel) << iQBits ) );
156
157
0
    double dCurrCost    = dErr * dErr * errorScale + xGetICost( xGetICRate( uiAbsLevel, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, ui16AbsGoRice, maxLog2TrDynamicRange ) );
158
0
    dCurrCost          += dCurrCostSig;
159
160
0
    if( dCurrCost < rd64CodedCost )
161
0
    {
162
0
      uiBestAbsLevel    = uiAbsLevel;
163
0
      rd64CodedCost     = dCurrCost;
164
0
      rd64CodedCostSig  = dCurrCostSig;
165
0
    }
166
0
  }
167
168
0
  return uiBestAbsLevel;
169
0
}
170
171
/** Calculates the cost for specific absolute transform level
172
 * \param uiAbsLevel scaled quantized level
173
 * \param ui16CtxNumOne current ctxInc for coeff_abs_level_greater1 (1st bin of coeff_abs_level_minus1 in AVC)
174
 * \param ui16CtxNumAbs current ctxInc for coeff_abs_level_greater2 (remaining bins of coeff_abs_level_minus1 in AVC)
175
 * \param ui16AbsGoRice Rice parameter for coeff_abs_level_minus3
176
 * \param c1Idx
177
 * \param c2Idx
178
 * \param useLimitedPrefixLength
179
 * \param maxLog2TrDynamicRange
180
 * \returns cost of given absolute transform level
181
 */
182
inline int QuantRDOQ::xGetICRate( const uint32_t         uiAbsLevel,
183
                                  const BinFracBits& fracBitsPar,
184
                                  const BinFracBits& fracBitsGt1,
185
                                  const BinFracBits& fracBitsGt2,
186
                                  const int          remRegBins,
187
                                  unsigned           goRiceZero,
188
                                  const uint16_t       ui16AbsGoRice,
189
                                  const int          maxLog2TrDynamicRange  ) const
190
0
{
191
0
  if( remRegBins < 4 )
192
0
  {
193
0
    int       iRate   = int( xGetIEPRate() ); // cost of sign bit
194
0
    uint32_t  symbol  = ( uiAbsLevel == 0 ? goRiceZero : uiAbsLevel <= goRiceZero ? uiAbsLevel-1 : uiAbsLevel );
195
0
    uint32_t  length;
196
0
    const int threshold = COEF_REMAIN_BIN_REDUCTION;
197
0
    if( symbol < ( threshold << ui16AbsGoRice ) )
198
0
    {
199
0
      length = symbol >> ui16AbsGoRice;
200
0
      iRate += ( length + 1 + ui16AbsGoRice ) << SCALE_BITS;
201
0
    }
202
0
    else
203
0
    {
204
0
      length = ui16AbsGoRice;
205
0
      symbol = symbol - ( threshold << ui16AbsGoRice );
206
0
      while( symbol >= ( 1 << length ) )
207
0
      {
208
0
        symbol -= ( 1 << ( length++ ) );
209
0
      }
210
0
      iRate += ( threshold + length + 1 - ui16AbsGoRice + length ) << SCALE_BITS;
211
0
    }
212
0
    return iRate;
213
0
  }
214
215
0
  int iRate = int( xGetIEPRate() ); // cost of sign bit
216
0
  const uint32_t cthres = 4;
217
0
  if( uiAbsLevel >= cthres )
218
0
  {
219
0
    uint32_t symbol = ( uiAbsLevel - cthres ) >> 1;
220
0
    uint32_t length;
221
0
    const int threshold = COEF_REMAIN_BIN_REDUCTION;
222
0
    if( symbol < ( threshold << ui16AbsGoRice ) )
223
0
    {
224
0
      length = symbol >> ui16AbsGoRice;
225
0
      iRate += ( length + 1 + ui16AbsGoRice ) << SCALE_BITS;
226
0
    }
227
0
    else
228
0
    {
229
0
      length = ui16AbsGoRice;
230
0
      symbol = symbol - ( threshold << ui16AbsGoRice );
231
0
      while( symbol >= ( 1 << length ) )
232
0
      {
233
0
        symbol -= ( 1 << ( length++ ) );
234
0
      }
235
0
      iRate += ( threshold + length + 1 - ui16AbsGoRice + length ) << SCALE_BITS;
236
0
    }
237
238
0
    iRate += fracBitsGt1.intBits[1];
239
0
    iRate += fracBitsPar.intBits[( uiAbsLevel - 2 ) & 1];
240
0
    iRate += fracBitsGt2.intBits[1];
241
0
  }
242
0
  else if( uiAbsLevel == 1 )
243
0
  {
244
0
    iRate += fracBitsGt1.intBits[0];
245
0
  }
246
0
  else if( uiAbsLevel == 2 )
247
0
  {
248
0
    iRate += fracBitsGt1.intBits[1];
249
0
    iRate += fracBitsPar.intBits[0];
250
0
    iRate += fracBitsGt2.intBits[0];
251
0
  }
252
0
  else if( uiAbsLevel == 3 )
253
0
  {
254
0
    iRate += fracBitsGt1.intBits[1];
255
0
    iRate += fracBitsPar.intBits[1];
256
0
    iRate += fracBitsGt2.intBits[0];
257
0
  }
258
0
  else
259
0
  {
260
0
    iRate = 0;
261
0
  }
262
0
  return  iRate;
263
0
}
264
265
inline double QuantRDOQ::xGetRateSigCoeffGroup( const BinFracBits& fracBitsSigCG, unsigned uiSignificanceCoeffGroup ) const
266
1.22M
{
267
1.22M
  return xGetICost( fracBitsSigCG.intBits[uiSignificanceCoeffGroup] );
268
1.22M
}
269
270
/** Calculates the cost of signaling the last significant coefficient in the block
271
 * \param uiPosX X coordinate of the last significant coefficient
272
 * \param uiPosY Y coordinate of the last significant coefficient
273
 * \param component colour component ID
274
 * \returns cost of last significant coefficient
275
 */
276
/*
277
 * \param uiWidth width of the transform unit (TU)
278
*/
279
inline double QuantRDOQ::xGetRateLast( const int* lastBitsX, const int* lastBitsY, unsigned PosX, unsigned PosY ) const
280
0
{
281
0
  uint32_t    CtxX  = g_uiGroupIdx[PosX];
282
0
  uint32_t    CtxY  = g_uiGroupIdx[PosY];
283
0
  double  Cost  = lastBitsX[ CtxX ] + lastBitsY[ CtxY ];
284
0
  if( CtxX > 3 )
285
0
  {
286
0
    Cost += xGetIEPRate() * ((CtxX-2)>>1);
287
0
  }
288
0
  if( CtxY > 3 )
289
0
  {
290
0
    Cost += xGetIEPRate() * ((CtxY-2)>>1);
291
0
  }
292
0
  return xGetICost( Cost );
293
0
}
294
295
296
inline double QuantRDOQ::xGetRateSigCoef( const BinFracBits& fracBitsSig, unsigned uiSignificance ) const
297
8.77M
{
298
8.77M
  return xGetICost( fracBitsSig.intBits[uiSignificance] );
299
8.77M
}
300
301
/** Get the cost for a specific rate
302
 * \param dRate rate of a bit
303
 * \returns cost at the specific rate
304
 */
305
inline double QuantRDOQ::xGetICost        ( double                          dRate         ) const
306
12.2M
{
307
12.2M
  return m_dLambda * dRate;
308
12.2M
}
309
310
/** Get the cost of an equal probable bit
311
 * \returns cost of equal probable bit
312
 */
313
inline double QuantRDOQ::xGetIEPRate() const
314
0
{
315
0
  return 32768;
316
0
}
317
318
319
double QuantRDOQ::xGetErrScaleCoeff(const bool needsSqrt2, SizeType width, SizeType height, int qp, const int maxLog2TrDynamicRange, const int channelBitDepth, bool bTransformSkip=false)
320
88.0k
{
321
88.0k
  const int iTransformShift = bTransformSkip ? 0 : getTransformShift(channelBitDepth, Size(width, height), maxLog2TrDynamicRange);
322
88.0k
  double    dErrScale = (double)(1 << SCALE_BITS);                                // Compensate for scaling of bitcount in Lagrange cost function
323
88.0k
  double    dTransShift = (double)iTransformShift + (needsSqrt2 ? -0.5 : 0.0);
324
88.0k
  dErrScale = dErrScale * pow(2.0, (-2.0*dTransShift));                     // Compensate for scaling through forward transform
325
88.0k
  const int  QStep = g_quantScales[needsSqrt2 ? 1 : 0][qp];
326
88.0k
  double    finalErrScale = dErrScale / QStep / QStep / (1 << (DISTORTION_PRECISION_ADJUSTMENT(channelBitDepth) << 1));
327
88.0k
  return    finalErrScale;
328
88.0k
}
329
330
331
332
/** set error scale coefficients
333
 * \param list                   list ID
334
 * \param size
335
 * \param qp                     quantization parameter
336
 * \param maxLog2TrDynamicRange
337
 * \param bitDepths              reference to bit depth array for all channels
338
 */
339
void QuantRDOQ::xSetErrScaleCoeff( uint32_t list, uint32_t sizeX, uint32_t sizeY, int qp, const int maxLog2TrDynamicRange[MAX_NUM_CH], const BitDepths &bitDepths )
340
30.6M
{
341
30.6M
  const int width = g_scalingListSizeX[sizeX];
342
30.6M
  const int height = g_scalingListSizeX[sizeY];
343
30.6M
  const ChannelType channelType = ( ( list == 0 ) || ( list == MAX_NUM_COMP ) ) ? CH_L : CH_C;
344
30.6M
  const int channelBitDepth = bitDepths[channelType];
345
30.6M
  const int iTransformShift = getTransformShift( channelBitDepth, Size( g_scalingListSizeX[sizeX], g_scalingListSizeX[sizeY] ), maxLog2TrDynamicRange[channelType] );  // Represents scaling through forward transform
346
347
30.6M
  double dErrScale = (double)( 1 << SCALE_BITS );                                // Compensate for scaling of bitcount in Lagrange cost function
348
349
30.6M
  const bool needsSqrt2 = ((Log2(width*height)) & 1) == 1;
350
30.6M
  double dTransShift = (double)iTransformShift + ( needsSqrt2 ? -0.5 : 0.0 );
351
30.6M
  dErrScale = dErrScale*pow( 2.0, ( -2.0*dTransShift ) );                     // Compensate for scaling through forward transform
352
353
30.6M
  if( getScalingListEnabled() )
354
0
  {
355
0
    uint32_t i, uiMaxNumCoeff = width * height;
356
357
0
    int*  piQuantcoeff = getQuantCoeff( list, qp, sizeX, sizeY );
358
0
    double* pdErrScale = xGetErrScaleCoeffSL( list, sizeX, sizeY, qp );
359
360
0
    for( i = 0; i < uiMaxNumCoeff; i++ )
361
0
    {
362
0
      pdErrScale[i] = dErrScale / piQuantcoeff[i] / piQuantcoeff[i] / (1 << (DISTORTION_PRECISION_ADJUSTMENT( bitDepths[channelType] ) << 1));
363
0
    }
364
0
  }
365
366
30.6M
  int QStep = g_quantScales[needsSqrt2][qp];
367
368
30.6M
  xGetErrScaleCoeffNoScalingList(list, sizeX, sizeY, qp) =
369
30.6M
    dErrScale / QStep / QStep / (1 << (DISTORTION_PRECISION_ADJUSTMENT(bitDepths[channelType]) << 1));
370
30.6M
}
371
372
/** set flat matrix value to quantized coefficient
373
 */
374
void QuantRDOQ::setFlatScalingList(const int maxLog2TrDynamicRange[MAX_NUM_CH], const BitDepths &bitDepths)
375
17.3k
{
376
17.3k
  Quant::setFlatScalingList( maxLog2TrDynamicRange, bitDepths );
377
378
17.3k
  const int minimumQp = 0;
379
17.3k
  const int maximumQp = SCALING_LIST_REM_NUM;
380
381
139k
  for(uint32_t sizeX = 0; sizeX < SCALING_LIST_SIZE_NUM; sizeX++)
382
121k
  {
383
973k
    for(uint32_t sizeY = 0; sizeY < SCALING_LIST_SIZE_NUM; sizeY++)
384
851k
    {
385
5.95M
      for(uint32_t list = 0; list < SCALING_LIST_NUM; list++)
386
5.10M
      {
387
35.7M
        for(int qp = minimumQp; qp < maximumQp; qp++)
388
30.6M
        {
389
30.6M
          xSetErrScaleCoeff( list, sizeX, sizeY, qp, maxLog2TrDynamicRange, bitDepths );
390
30.6M
        }
391
5.10M
      }
392
851k
    }
393
121k
  }
394
17.3k
}
395
396
/** initialization process of scaling list array
397
 */
398
void QuantRDOQ::xInitScalingList( const QuantRDOQ* other )
399
17.3k
{
400
17.3k
  m_isErrScaleListOwner = other == nullptr;
401
402
17.3k
  bool useScalingLists = getScalingListEnabled();
403
404
139k
  for(uint32_t sizeIdX = 0; sizeIdX < SCALING_LIST_SIZE_NUM; sizeIdX++)
405
121k
  {
406
973k
    for(uint32_t sizeIdY = 0; sizeIdY < SCALING_LIST_SIZE_NUM; sizeIdY++)
407
851k
    {
408
5.95M
      for(uint32_t qp = 0; qp < SCALING_LIST_REM_NUM; qp++)
409
5.10M
      {
410
35.7M
        for(uint32_t listId = 0; listId < SCALING_LIST_NUM; listId++)
411
30.6M
        {
412
30.6M
          if( m_isErrScaleListOwner )
413
30.6M
          {
414
30.6M
            m_errScale[sizeIdX][sizeIdY][listId][qp] = useScalingLists ? new double[g_scalingListSizeX[sizeIdX] * g_scalingListSizeX[sizeIdY]] : nullptr;
415
30.6M
          }
416
0
          else
417
0
          {
418
0
            m_errScale[sizeIdX][sizeIdY][listId][qp] = other->m_errScale[sizeIdX][sizeIdY][listId][qp];
419
0
          }
420
30.6M
        } // listID loop
421
5.10M
      }
422
851k
    }
423
121k
  }
424
17.3k
}
425
426
/** destroy quantization matrix array
427
 */
428
void QuantRDOQ::xDestroyScalingList()
429
17.3k
{
430
17.3k
  if( !m_isErrScaleListOwner ) return;
431
432
139k
  for(uint32_t sizeIdX = 0; sizeIdX < SCALING_LIST_SIZE_NUM; sizeIdX++)
433
121k
  {
434
973k
    for(uint32_t sizeIdY = 0; sizeIdY < SCALING_LIST_SIZE_NUM; sizeIdY++)
435
851k
    {
436
5.95M
      for(uint32_t listId = 0; listId < SCALING_LIST_NUM; listId++)
437
5.10M
      {
438
35.7M
        for(uint32_t qp = 0; qp < SCALING_LIST_REM_NUM; qp++)
439
30.6M
        {
440
30.6M
          if(m_errScale[sizeIdX][sizeIdY][listId][qp])
441
0
          {
442
0
            delete [] m_errScale[sizeIdX][sizeIdY][listId][qp];
443
0
          }
444
30.6M
        }
445
5.10M
      }
446
851k
    }
447
121k
  }
448
//   Quant::destroyScalingList();
449
17.3k
}
450
451
452
void QuantRDOQ::quant(TransformUnit& tu, const ComponentID compID, const CCoeffBuf& pSrc, TCoeff &uiAbsSum, const QpParam& cQP, const Ctx& ctx)
453
93.4k
{
454
93.4k
  const CompArea& rect       = tu.blocks[compID];
455
93.4k
  const uint32_t uiWidth     = rect.width;
456
93.4k
  const uint32_t uiHeight    = rect.height;
457
458
93.4k
  const CCoeffBuf&  piCoef   = pSrc;
459
93.4k
        CoeffSigBuf piQCoef  = tu.getCoeffs(compID);
460
461
93.4k
  const bool useTransformSkip      = tu.mtsIdx[compID]==MTS_SKIP;
462
463
93.4k
  bool useRDOQ = useTransformSkip ? m_useRDOQTS : m_RDOQ > 0;
464
465
93.4k
  if( !tu.cu->ispMode || !isLuma(compID) )
466
93.4k
  {
467
93.4k
    useRDOQ &= uiWidth > 2;
468
93.4k
    useRDOQ &= uiHeight > 2;
469
93.4k
  }
470
471
93.4k
  if( useRDOQ )
472
88.0k
  {
473
88.0k
    if (!tu.cs->picture->useSelectiveRdoq || xNeedRDOQ(tu, compID, piCoef, cQP))
474
88.0k
    {
475
88.0k
      if( useTransformSkip )
476
88.0k
      {
477
88.0k
        if(tu.cu->bdpcmM[toChannelType(compID)])
478
85.3k
        {
479
85.3k
          forwardRDPCM( tu, compID, pSrc, uiAbsSum, cQP, ctx );
480
85.3k
        }
481
2.76k
        else
482
2.76k
        {
483
2.76k
          rateDistOptQuantTS( tu, compID, pSrc, uiAbsSum, cQP, ctx );
484
2.76k
        }
485
88.0k
      }
486
0
      else
487
0
      {
488
0
        xRateDistOptQuant( tu, compID, pSrc, uiAbsSum, cQP, ctx );
489
0
      }
490
88.0k
    }
491
0
    else
492
0
    {
493
0
      piQCoef.fill(0);
494
0
      uiAbsSum = 0;
495
0
      tu.lastPos[compID] = -1;
496
0
    }
497
88.0k
  }
498
5.32k
  else
499
5.32k
  {
500
5.32k
    Quant::quant( tu, compID, pSrc, uiAbsSum, cQP, ctx );
501
5.32k
  }
502
93.4k
}
503
504
505
506
void QuantRDOQ::xRateDistOptQuant(TransformUnit& tu, const ComponentID compID, const CCoeffBuf& pSrc, TCoeff &uiAbsSum, const QpParam& cQP, const Ctx &ctx)
507
0
{
508
0
  const FracBitsAccess& fracBits = ctx.getFracBitsAcess();
509
510
0
  const SPS &sps            = *tu.cs->sps;
511
0
  const CompArea& rect      = tu.blocks[compID];
512
0
  const uint32_t uiWidth    = rect.width;
513
0
  const uint32_t uiHeight   = rect.height;
514
0
  const ChannelType chType  = toChannelType(compID);
515
0
  const int channelBitDepth = sps.bitDepths[ chType ];
516
517
0
  const int  maxLog2TrDynamicRange = sps.getMaxLog2TrDynamicRange();
518
519
0
  const bool useIntraSubPartitions = tu.cu->ispMode && isLuma(compID);
520
  /* for 422 chroma blocks, the effective scaling applied during transformation is not a power of 2, hence it cannot be
521
  * implemented as a bit-shift (the quantised result will be sqrt(2) * larger than required). Alternatively, adjust the
522
  * uiLog2TrSize applied in iTransformShift, such that the result is 1/sqrt(2) the required result (i.e. smaller)
523
  * Then a QP+3 (sqrt(2)) or QP-3 (1/sqrt(2)) method could be used to get the required result
524
  */
525
526
  // Represents scaling through forward transform
527
0
  const int iTransformShift = getTransformShift(channelBitDepth, rect.size(), maxLog2TrDynamicRange);
528
529
0
  double     d64BlockUncodedCost               = 0;
530
0
  const uint32_t uiLog2BlockWidth                  = Log2(uiWidth);
531
0
  const uint32_t uiLog2BlockHeight                 = Log2(uiHeight);
532
0
  const uint32_t uiMaxNumCoeff                     = rect.area();
533
534
0
  CHECK(compID >= MAX_NUM_TBLOCKS, "Invalid component ID");
535
536
0
  int scalingListType = getScalingListType(tu.cu->predMode, compID);
537
538
0
  CHECK(scalingListType >= SCALING_LIST_NUM, "Invalid scaling list");
539
540
0
  const TCoeff    *plSrcCoeff = pSrc.buf;
541
0
        TCoeffSig *piDstCoeff = tu.getCoeffs(compID).buf;
542
543
0
  double *pdCostCoeff  = m_pdCostCoeff;
544
0
  double *pdCostSig    = m_pdCostSig;
545
0
  double *pdCostCoeff0 = m_pdCostCoeff0;
546
0
  int    *rateIncUp    = m_rateIncUp;
547
0
  int    *rateIncDown  = m_rateIncDown;
548
0
  int    *sigRateDelta = m_sigRateDelta;
549
0
  TCoeff *deltaU       = m_deltaU;
550
551
0
  memset( piDstCoeff,     0, sizeof( TCoeffSig ) * uiMaxNumCoeff );
552
0
  memset( m_pdCostCoeff,  0, sizeof( double ) *  uiMaxNumCoeff );
553
0
  memset( m_pdCostSig,    0, sizeof( double ) *  uiMaxNumCoeff );
554
0
  memset( m_rateIncUp,    0, sizeof( int    ) *  uiMaxNumCoeff );
555
0
  memset( m_rateIncDown,  0, sizeof( int    ) *  uiMaxNumCoeff );
556
0
  memset( m_sigRateDelta, 0, sizeof( int    ) *  uiMaxNumCoeff );
557
0
  memset( m_deltaU,       0, sizeof( TCoeff ) *  uiMaxNumCoeff );
558
559
560
0
  const bool   needSqrtAdjustment = TU::needsSqrt2Scale( tu, compID );
561
0
  const bool   isTransformSkip    = tu.mtsIdx[compID]==MTS_SKIP;
562
0
  const double *const pdErrScale  = xGetErrScaleCoeffSL(scalingListType, uiLog2BlockWidth, uiLog2BlockHeight, cQP.rem(isTransformSkip));
563
0
  const int    *const piQCoef     = getQuantCoeff(scalingListType, cQP.rem(isTransformSkip), uiLog2BlockWidth, uiLog2BlockHeight);
564
0
  const bool isLfnstApplied       = tu.cu->lfnstIdx > 0 && (CU::isSepTree(*tu.cu) ? true : isLuma(compID));
565
0
  const bool enableScalingLists   = getUseScalingList(uiWidth, uiHeight, isTransformSkip, isLfnstApplied);
566
0
  const int    defaultQuantisationCoefficient = g_quantScales[ needSqrtAdjustment ?1:0][cQP.rem(isTransformSkip)];
567
0
  const double defaultErrorScale              = xGetErrScaleCoeffNoScalingList(scalingListType, uiLog2BlockWidth, uiLog2BlockHeight, cQP.rem(isTransformSkip));
568
0
  const int iQBits = QUANT_SHIFT + cQP.per(isTransformSkip) + iTransformShift + (needSqrtAdjustment?-1:0);                   // Right shift of non-RDOQ quantizer;  level = (coeff*uiQ + offset)>>q_bits
569
570
571
0
  const TCoeff entropyCodingMinimum = -(1 << maxLog2TrDynamicRange);
572
0
  const TCoeff entropyCodingMaximum =  (1 << maxLog2TrDynamicRange) - 1;
573
574
0
  CoeffCodingContext cctx(tu, compID, tu.cs->slice->signDataHidingEnabled);
575
0
  const int    iCGSizeM1      = (1 << cctx.log2CGSize()) - 1;
576
577
0
  int     iCGLastScanPos      = -1;
578
0
  double  d64BaseCost         = 0;
579
0
  int     iLastScanPos        = -1;
580
581
0
  int ctxBinSampleRatio   = MAX_TU_LEVEL_CTX_CODED_BIN_CONSTRAINT;
582
0
  int remRegBins          = (tu.getTbAreaAfterCoefZeroOut( compID ) * ctxBinSampleRatio) >> 4;
583
0
  uint32_t  goRiceParam   = 0;
584
585
0
  double *pdCostCoeffGroupSig = m_pdCostCoeffGroupSig;
586
0
  memset( pdCostCoeffGroupSig, 0, ( uiMaxNumCoeff >> cctx.log2CGSize() ) * sizeof( double ) );
587
0
  int iScanPos;
588
0
  coeffGroupRDStats rdStats;
589
590
#if ENABLE_TRACING
591
  DTRACE( g_trace_ctx, D_RDOQ, "%d: %3d, %3d, %dx%d, comp=%d\n", DTRACE_GET_COUNTER( g_trace_ctx, D_RDOQ ), rect.x, rect.y, rect.width, rect.height, compID );
592
#endif
593
594
0
  const uint32_t lfnstIdx = tu.cu->lfnstIdx;
595
596
0
  const int iCGNum = lfnstIdx > 0 ? 1 : std::min<int>(JVET_C0024_ZERO_OUT_TH, uiWidth) * std::min<int>(JVET_C0024_ZERO_OUT_TH, uiHeight) >> cctx.log2CGSize();
597
598
0
  for (int subSetId = iCGNum - 1; subSetId >= 0; subSetId--)
599
0
  {
600
0
    cctx.initSubblock( subSetId );
601
602
0
    int remRegBinsStartCG = remRegBins;
603
604
0
    uint32_t maxNonZeroPosInCG = iCGSizeM1;
605
0
    if( lfnstIdx > 0 && ( ( uiWidth == 4 && uiHeight == 4 ) || ( uiWidth == 8 && uiHeight == 8 && cctx.cgPosX() == 0 && cctx.cgPosY() == 0 ) ) )
606
0
    {
607
0
      maxNonZeroPosInCG = 7;
608
0
    }
609
610
0
    memset( &rdStats, 0, sizeof (coeffGroupRDStats));
611
612
0
    for( int iScanPosinCG = iCGSizeM1; iScanPosinCG > maxNonZeroPosInCG; iScanPosinCG-- )
613
0
    {
614
0
      iScanPos = cctx.minSubPos() + iScanPosinCG;
615
0
      uint32_t    blkPos = cctx.blockPos( iScanPos );
616
0
      piDstCoeff[ blkPos ] = 0;
617
0
    }
618
0
    for( int iScanPosinCG = maxNonZeroPosInCG; iScanPosinCG >= 0; iScanPosinCG-- )
619
0
    {
620
0
      iScanPos = cctx.minSubPos() + iScanPosinCG;
621
      //===== quantization =====
622
0
      uint32_t    uiBlkPos          = cctx.blockPos(iScanPos);
623
624
      // set coeff
625
0
      const int    quantisationCoefficient = (enableScalingLists) ? piQCoef   [uiBlkPos]               : defaultQuantisationCoefficient;
626
0
      const double errorScale              = (enableScalingLists) ? pdErrScale[uiBlkPos]               : defaultErrorScale;
627
0
      const int64_t  tmpLevel                = int64_t(abs(plSrcCoeff[ uiBlkPos ])) * quantisationCoefficient;
628
629
0
      const Intermediate_Int lLevelDouble  = (Intermediate_Int)std::min<int64_t>(tmpLevel, std::numeric_limits<Intermediate_Int>::max() - (Intermediate_Int(1) << (iQBits - 1)));
630
631
0
      uint32_t uiMaxAbsLevel        = std::min<uint32_t>(uint32_t(entropyCodingMaximum), uint32_t((lLevelDouble + (Intermediate_Int(1) << (iQBits - 1))) >> iQBits));
632
633
0
      const double dErr         = double( lLevelDouble );
634
0
      pdCostCoeff0[ iScanPos ]  = dErr * dErr * errorScale;
635
0
      d64BlockUncodedCost      += pdCostCoeff0[ iScanPos ];
636
0
      piDstCoeff[ uiBlkPos ]    = uiMaxAbsLevel;
637
638
0
      if ( uiMaxAbsLevel > 0 && iLastScanPos < 0 )
639
0
      {
640
0
        iLastScanPos            = iScanPos;
641
0
        iCGLastScanPos          = cctx.subSetId();
642
0
      }
643
644
0
      if ( iLastScanPos >= 0 )
645
0
      {
646
647
#if ENABLE_TRACING
648
        uint32_t uiCGPosY = cctx.cgPosY();
649
        uint32_t uiCGPosX = cctx.cgPosX();
650
        uint32_t uiPosY = cctx.posY( iScanPos );
651
        uint32_t uiPosX = cctx.posX( iScanPos );
652
        DTRACE( g_trace_ctx, D_RDOQ, "%d [%d][%d][%2d:%2d][%2d:%2d]", DTRACE_GET_COUNTER( g_trace_ctx, D_RDOQ ), iScanPos, uiBlkPos, uiCGPosX, uiCGPosY, uiPosX, uiPosY );
653
#endif
654
        //===== coefficient level estimation =====
655
0
        unsigned ctxIdSig = 0;
656
0
        if( iScanPos != iLastScanPos )
657
0
        {
658
0
          ctxIdSig = cctx.sigCtxIdAbs( iScanPos, piDstCoeff, 0 );
659
0
        }
660
0
        uint32_t    uiLevel;
661
0
        uint8_t ctxOffset     = cctx.ctxOffsetAbs     ();
662
0
        uint32_t    uiParCtx      = cctx.parityCtxIdAbs   ( ctxOffset );
663
0
        uint32_t    uiGt1Ctx      = cctx.greater1CtxIdAbs ( ctxOffset );
664
0
        uint32_t    uiGt2Ctx      = cctx.greater2CtxIdAbs ( ctxOffset );
665
0
        uint32_t    goRiceZero    = 0;
666
0
        if( remRegBins < 4 )
667
0
        {
668
0
          unsigned  sumAbs = cctx.templateAbsSum( iScanPos, piDstCoeff, 0 );
669
0
          goRiceParam             = g_auiGoRiceParsCoeff   [ sumAbs ];
670
0
          goRiceZero              = g_auiGoRicePosCoeff0(0, goRiceParam);
671
0
        }
672
673
0
        const BinFracBits fracBitsPar = fracBits.getFracBitsArray( uiParCtx );
674
0
        const BinFracBits fracBitsGt1 = fracBits.getFracBitsArray( uiGt1Ctx );
675
0
        const BinFracBits fracBitsGt2 = fracBits.getFracBitsArray( uiGt2Ctx );
676
677
0
        if( iScanPos == iLastScanPos )
678
0
        {
679
0
          uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ],
680
0
                                    lLevelDouble, uiMaxAbsLevel, nullptr, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, iQBits, errorScale, 1, maxLog2TrDynamicRange );
681
0
        }
682
0
        else
683
0
        {
684
0
          DTRACE_COND( ( uiMaxAbsLevel != 0 ), g_trace_ctx, D_RDOQ_MORE, " uiCtxSig=%d", ctxIdSig );
685
686
0
          const BinFracBits fracBitsSig = fracBits.getFracBitsArray( ctxIdSig );
687
0
          uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ],
688
0
                                    lLevelDouble, uiMaxAbsLevel, &fracBitsSig, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, iQBits, errorScale, 0, maxLog2TrDynamicRange );
689
0
          sigRateDelta[ uiBlkPos ] = ( remRegBins < 4 ? 0 : fracBitsSig.intBits[1] - fracBitsSig.intBits[0] );
690
0
        }
691
692
0
        DTRACE( g_trace_ctx, D_RDOQ, " Lev=%d \n", uiLevel );
693
0
        DTRACE_COND( ( uiMaxAbsLevel != 0 ), g_trace_ctx, D_RDOQ, " CostC0=%d\n", (int64_t)( pdCostCoeff0[iScanPos] ) );
694
0
        DTRACE_COND( ( uiMaxAbsLevel != 0 ), g_trace_ctx, D_RDOQ, " CostC =%d\n", (int64_t)( pdCostCoeff[iScanPos] ) );
695
696
0
        deltaU[ uiBlkPos ]        = TCoeff((lLevelDouble - (Intermediate_Int(uiLevel) << iQBits)) >> (iQBits-8));
697
698
0
        if( uiLevel > 0 )
699
0
        {
700
0
          int rateNow              = xGetICRate( uiLevel,   fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, maxLog2TrDynamicRange );
701
0
          rateIncUp   [ uiBlkPos ] = xGetICRate( uiLevel+1, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, maxLog2TrDynamicRange ) - rateNow;
702
0
          rateIncDown [ uiBlkPos ] = xGetICRate( uiLevel-1, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, maxLog2TrDynamicRange ) - rateNow;
703
0
        }
704
0
        else // uiLevel == 0
705
0
        {
706
0
          if( remRegBins < 4 )
707
0
          {
708
0
            int rateNow            = xGetICRate( uiLevel,   fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, maxLog2TrDynamicRange );
709
0
            rateIncUp [ uiBlkPos ] = xGetICRate( uiLevel+1, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, maxLog2TrDynamicRange ) - rateNow;
710
0
          }
711
0
          else
712
0
          {
713
0
            rateIncUp [ uiBlkPos ] = fracBitsGt1.intBits[ 0 ];
714
0
          }
715
0
        }
716
0
        piDstCoeff[ uiBlkPos ] = uiLevel;
717
0
        d64BaseCost           += pdCostCoeff [ iScanPos ];
718
719
0
        if( ( (iScanPos & iCGSizeM1) == 0 ) && ( iScanPos > 0 ) )
720
0
        {
721
0
          goRiceParam   = 0;
722
0
        }
723
0
        else if( remRegBins >= 4 )
724
0
        {
725
0
          int  sumAll = cctx.templateAbsSum(iScanPos, piDstCoeff, 4);
726
0
          goRiceParam = g_auiGoRiceParsCoeff[sumAll];
727
0
          remRegBins -= (uiLevel < 2 ? uiLevel : 3) + (iScanPos != iLastScanPos);
728
0
        }
729
0
      }
730
0
      else
731
0
      {
732
0
        d64BaseCost    += pdCostCoeff0[ iScanPos ];
733
0
      }
734
0
      rdStats.d64SigCost += pdCostSig[ iScanPos ];
735
0
      if (iScanPosinCG == 0 )
736
0
      {
737
0
        rdStats.d64SigCost_0 = pdCostSig[ iScanPos ];
738
0
      }
739
0
      if (piDstCoeff[ uiBlkPos ] )
740
0
      {
741
0
        cctx.setSigGroup();
742
0
        rdStats.d64CodedLevelandDist += pdCostCoeff[ iScanPos ] - pdCostSig[ iScanPos ];
743
0
        rdStats.d64UncodedDist += pdCostCoeff0[ iScanPos ];
744
0
        if ( iScanPosinCG != 0 )
745
0
        {
746
0
          rdStats.iNNZbeforePos0++;
747
0
        }
748
0
      }
749
0
    } //end for (iScanPosinCG)
750
751
0
    if (iCGLastScanPos >= 0)
752
0
    {
753
0
      if( cctx.subSetId() )
754
0
      {
755
0
        if( !cctx.isSigGroup() )
756
0
        {
757
0
          const BinFracBits fracBitsSigGroup = fracBits.getFracBitsArray( cctx.sigGroupCtxId() );
758
0
          d64BaseCost += xGetRateSigCoeffGroup(fracBitsSigGroup, 0) - rdStats.d64SigCost;
759
0
          pdCostCoeffGroupSig[ cctx.subSetId() ] = xGetRateSigCoeffGroup(fracBitsSigGroup, 0);
760
0
        }
761
0
        else
762
0
        {
763
0
          if (cctx.subSetId() < iCGLastScanPos) //skip the last coefficient group, which will be handled together with last position below.
764
0
          {
765
0
            if ( rdStats.iNNZbeforePos0 == 0 )
766
0
            {
767
0
              d64BaseCost -= rdStats.d64SigCost_0;
768
0
              rdStats.d64SigCost -= rdStats.d64SigCost_0;
769
0
            }
770
            // rd-cost if SigCoeffGroupFlag = 0, initialization
771
0
            double d64CostZeroCG = d64BaseCost;
772
773
0
            const BinFracBits fracBitsSigGroup = fracBits.getFracBitsArray( cctx.sigGroupCtxId() );
774
775
0
            if (cctx.subSetId() < iCGLastScanPos)
776
0
            {
777
0
              d64BaseCost  += xGetRateSigCoeffGroup(fracBitsSigGroup,1);
778
0
              d64CostZeroCG += xGetRateSigCoeffGroup(fracBitsSigGroup,0);
779
0
              pdCostCoeffGroupSig[ cctx.subSetId() ] = xGetRateSigCoeffGroup(fracBitsSigGroup,1);
780
0
            }
781
782
            // try to convert the current coeff group from non-zero to all-zero
783
0
            d64CostZeroCG += rdStats.d64UncodedDist;  // distortion for resetting non-zero levels to zero levels
784
0
            d64CostZeroCG -= rdStats.d64CodedLevelandDist;   // distortion and level cost for keeping all non-zero levels
785
0
            d64CostZeroCG -= rdStats.d64SigCost;     // sig cost for all coeffs, including zero levels and non-zerl levels
786
787
                                                     // if we can save cost, change this block to all-zero block
788
0
            if ( d64CostZeroCG < d64BaseCost )
789
0
            {
790
0
              cctx.resetSigGroup();
791
0
              d64BaseCost = d64CostZeroCG;
792
0
              remRegBins = remRegBinsStartCG;
793
0
              if (cctx.subSetId() < iCGLastScanPos)
794
0
              {
795
0
                pdCostCoeffGroupSig[ cctx.subSetId() ] = xGetRateSigCoeffGroup(fracBitsSigGroup,0);
796
0
              }
797
              // reset coeffs to 0 in this block
798
0
              for( int iScanPosinCG = maxNonZeroPosInCG; iScanPosinCG >= 0; iScanPosinCG-- )
799
0
              {
800
0
                iScanPos      = cctx.minSubPos() + iScanPosinCG;
801
0
                uint32_t uiBlkPos = cctx.blockPos( iScanPos );
802
803
0
                if (piDstCoeff[ uiBlkPos ])
804
0
                {
805
0
                  piDstCoeff [ uiBlkPos ] = 0;
806
0
                  pdCostCoeff[ iScanPos ] = pdCostCoeff0[ iScanPos ];
807
0
                  pdCostSig  [ iScanPos ] = 0;
808
0
                }
809
0
              }
810
0
            } // end if ( d64CostAllZeros < d64BaseCost )
811
0
          }
812
0
        } // end if if (uiSigCoeffGroupFlag[ uiCGBlkPos ] == 0)
813
0
      }
814
0
      else
815
0
      {
816
0
        cctx.setSigGroup();
817
0
      }
818
0
    }
819
0
  } //end for (cctx.subSetId)
820
821
822
  //===== estimate last position =====
823
0
  if ( iLastScanPos < 0 )
824
0
  {
825
0
    return;
826
0
  }
827
828
0
  double  d64BestCost         = 0;
829
0
  int     iBestLastIdxP1      = 0;
830
831
832
0
  if( !CU::isIntra( *tu.cu ) && isLuma( compID ) && tu.depth == 0 )
833
0
  {
834
0
    const BinFracBits fracBitsQtRootCbf = fracBits.getFracBitsArray( Ctx::QtRootCbf() );
835
0
    d64BestCost  = d64BlockUncodedCost + xGetICost( fracBitsQtRootCbf.intBits[ 0 ] );
836
0
    d64BaseCost += xGetICost( fracBitsQtRootCbf.intBits[ 1 ] );
837
0
  }
838
0
  else
839
0
  {
840
0
    bool previousCbf       = tu.cbf[COMP_Cb];
841
0
    bool lastCbfIsInferred = false;
842
0
    if( useIntraSubPartitions )
843
0
    {
844
0
      bool rootCbfSoFar       = false;
845
0
      bool isLastSubPartition = CU::isISPLast(*tu.cu, tu.Y(), compID);
846
0
      uint32_t nTus = tu.cu->ispMode == HOR_INTRA_SUBPARTITIONS ? tu.cu->lheight() >> Log2(tu.lheight()) : tu.cu->lwidth() >> Log2(tu.lwidth());
847
0
      if( isLastSubPartition )
848
0
      {
849
0
        TransformUnit* tuPointer = tu.cu->firstTU;
850
0
        for( int tuIdx = 0; tuIdx < nTus - 1; tuIdx++ )
851
0
        {
852
0
          rootCbfSoFar |= TU::getCbfAtDepth(*tuPointer, COMP_Y, tu.depth);
853
0
          tuPointer     = tuPointer->next;
854
0
        }
855
0
        if( !rootCbfSoFar )
856
0
        {
857
0
          lastCbfIsInferred = true;
858
0
        }
859
0
      }
860
0
      if( !lastCbfIsInferred )
861
0
      {
862
0
        previousCbf = TU::getPrevTuCbfAtDepth(tu, compID, tu.depth);
863
0
      }
864
0
    }
865
0
    BinFracBits fracBitsQtCbf = fracBits.getFracBitsArray( Ctx::QtCbf[compID]( DeriveCtx::CtxQtCbf( rect.compID, previousCbf, useIntraSubPartitions ) ) );
866
867
0
    if( !lastCbfIsInferred )
868
0
    {
869
0
      d64BestCost  = d64BlockUncodedCost + xGetICost(fracBitsQtCbf.intBits[0]);
870
0
      d64BaseCost += xGetICost(fracBitsQtCbf.intBits[1]);
871
0
    }
872
0
    else
873
0
    {
874
0
      d64BestCost  = d64BlockUncodedCost;
875
0
    }
876
0
  }
877
878
0
  int lastBitsX[LAST_SIGNIFICANT_GROUPS] = { 0 };
879
0
  int lastBitsY[LAST_SIGNIFICANT_GROUPS] = { 0 };
880
0
  {
881
0
    int dim1 = std::min<int>(JVET_C0024_ZERO_OUT_TH, uiWidth);
882
0
    int dim2 = std::min<int>(JVET_C0024_ZERO_OUT_TH, uiHeight);
883
0
    int bitsX = 0;
884
0
    int bitsY = 0;
885
0
    int ctxId;
886
    //X-coordinate
887
0
    for ( ctxId = 0; ctxId < g_uiGroupIdx[dim1-1]; ctxId++)
888
0
    {
889
0
      const BinFracBits fB = fracBits.getFracBitsArray( cctx.lastXCtxId(ctxId) );
890
0
      lastBitsX[ ctxId ]   = bitsX + fB.intBits[ 0 ];
891
0
      bitsX               +=         fB.intBits[ 1 ];
892
0
    }
893
0
    lastBitsX[ctxId] = bitsX;
894
    //Y-coordinate
895
0
    for ( ctxId = 0; ctxId < g_uiGroupIdx[dim2-1]; ctxId++)
896
0
    {
897
0
      const BinFracBits fB = fracBits.getFracBitsArray( cctx.lastYCtxId(ctxId) );
898
0
      lastBitsY[ ctxId ]   = bitsY + fB.intBits[ 0 ];
899
0
      bitsY               +=         fB.intBits[ 1 ];
900
0
    }
901
0
    lastBitsY[ctxId] = bitsY;
902
0
  }
903
904
905
0
  bool bFoundLast = false;
906
0
  for (int iCGScanPos = iCGLastScanPos; iCGScanPos >= 0; iCGScanPos--)
907
0
  {
908
0
    d64BaseCost -= pdCostCoeffGroupSig [ iCGScanPos ];
909
0
    if (cctx.isSigGroup( iCGScanPos ) )
910
0
    {
911
0
      uint32_t maxNonZeroPosInCG = iCGSizeM1;
912
0
      if( lfnstIdx > 0 && ( ( uiWidth == 4 && uiHeight == 4 ) || ( uiWidth == 8 && uiHeight == 8 && cctx.cgPosX() == 0 && cctx.cgPosY() == 0 ) ) )
913
0
      {
914
0
        maxNonZeroPosInCG = 7;
915
0
      }
916
0
      for( int iScanPosinCG = maxNonZeroPosInCG; iScanPosinCG >= 0; iScanPosinCG-- )
917
0
      {
918
0
        iScanPos = iCGScanPos * (iCGSizeM1 + 1) + iScanPosinCG;
919
920
0
        if (iScanPos > iLastScanPos)
921
0
        {
922
0
          continue;
923
0
        }
924
0
        uint32_t   uiBlkPos     = cctx.blockPos( iScanPos );
925
926
0
        if( piDstCoeff[ uiBlkPos ] )
927
0
        {
928
0
          uint32_t   uiPosY = uiBlkPos >> uiLog2BlockWidth;
929
0
          uint32_t   uiPosX = uiBlkPos - ( uiPosY << uiLog2BlockWidth );
930
0
          double d64CostLast  = xGetRateLast( lastBitsX, lastBitsY, uiPosX, uiPosY );
931
932
0
          double totalCost = d64BaseCost + d64CostLast - pdCostSig[ iScanPos ];
933
934
0
          if( totalCost < d64BestCost )
935
0
          {
936
0
            iBestLastIdxP1  = iScanPos + 1;
937
0
            d64BestCost     = totalCost;
938
0
          }
939
0
          if( piDstCoeff[ uiBlkPos ] > 1 )
940
0
          {
941
0
            bFoundLast = true;
942
0
            break;
943
0
          }
944
0
          d64BaseCost      -= pdCostCoeff[ iScanPos ];
945
0
          d64BaseCost      += pdCostCoeff0[ iScanPos ];
946
0
        }
947
0
        else
948
0
        {
949
0
          d64BaseCost      -= pdCostSig[ iScanPos ];
950
0
        }
951
0
      } //end for
952
0
      if (bFoundLast)
953
0
      {
954
0
        break;
955
0
      }
956
0
    } // end if (uiSigCoeffGroupFlag[ uiCGBlkPos ])
957
0
    DTRACE( g_trace_ctx, D_RDOQ_COST, "%d: %3d, %3d, %dx%d, comp=%d\n", DTRACE_GET_COUNTER( g_trace_ctx, D_RDOQ_COST ), rect.x, rect.y, rect.width, rect.height, compID );
958
0
    DTRACE( g_trace_ctx, D_RDOQ_COST, "Uncoded=%d\n", (int64_t)( d64BlockUncodedCost ) );
959
0
    DTRACE( g_trace_ctx, D_RDOQ_COST, "Coded  =%d\n", (int64_t)( d64BaseCost ) );
960
961
0
  } // end for
962
963
964
0
  for ( int scanPos = 0; scanPos < iBestLastIdxP1; scanPos++ )
965
0
  {
966
0
    int blkPos = cctx.blockPos( scanPos );
967
0
    TCoeff level = piDstCoeff[ blkPos ];
968
0
    uiAbsSum += level;
969
0
    piDstCoeff[ blkPos ] = ( plSrcCoeff[ blkPos ] < 0 ) ? -level : level;
970
0
  }
971
972
  //===== clean uncoded coefficients =====
973
0
  for ( int scanPos = iBestLastIdxP1; scanPos <= iLastScanPos; scanPos++ )
974
0
  {
975
0
    piDstCoeff[ cctx.blockPos( scanPos ) ] = 0;
976
0
  }
977
0
  iLastScanPos = iBestLastIdxP1 - 1;
978
979
0
  if( cctx.signHiding() && uiAbsSum>=2)
980
0
  {
981
0
    const double inverseQuantScale = double(g_invQuantScales[0][cQP.rem(isTransformSkip)]);
982
0
    int64_t rdFactor = (int64_t)(inverseQuantScale * inverseQuantScale * (1 << (2 * cQP.per(isTransformSkip))) / m_dLambda / 16
983
0
                                  / (1 << (2 * DISTORTION_PRECISION_ADJUSTMENT(channelBitDepth)))
984
0
                             + 0.5);
985
986
0
    int lastCG = -1;
987
0
    int absSum = 0 ;
988
0
    int n ;
989
0
    for (int subSet = iCGNum - 1; subSet >= 0; subSet--)
990
0
    {
991
0
      int  subPos         = subSet << cctx.log2CGSize();
992
0
      int  firstNZPosInCG = iCGSizeM1 + 1, lastNZPosInCG = -1;
993
0
      absSum = 0 ;
994
995
0
      for( n = iCGSizeM1; n >= 0; --n )
996
0
      {
997
0
        if( piDstCoeff[ cctx.blockPos( n + subPos )] )
998
0
        {
999
0
          lastNZPosInCG = n;
1000
0
          break;
1001
0
        }
1002
0
      }
1003
1004
0
      for( n = 0; n <= iCGSizeM1; n++ )
1005
0
      {
1006
0
        if( piDstCoeff[ cctx.blockPos( n + subPos )] )
1007
0
        {
1008
0
          firstNZPosInCG = n;
1009
0
          break;
1010
0
        }
1011
0
      }
1012
1013
0
      for( n = firstNZPosInCG; n <= lastNZPosInCG; n++ )
1014
0
      {
1015
0
        absSum += int(piDstCoeff[ cctx.blockPos( n + subPos )]);
1016
0
      }
1017
1018
0
      if(lastNZPosInCG>=0 && lastCG==-1)
1019
0
      {
1020
0
        lastCG = 1;
1021
0
      }
1022
1023
0
      if( lastNZPosInCG-firstNZPosInCG>=SBH_THRESHOLD )
1024
0
      {
1025
0
        uint32_t signbit = (piDstCoeff[cctx.blockPos(subPos+firstNZPosInCG)]>0?0:1);
1026
0
        if( signbit!=(absSum&0x1) )  // hide but need tune
1027
0
        {
1028
          // calculate the cost
1029
0
          int64_t minCostInc = std::numeric_limits<int64_t>::max(), curCost = std::numeric_limits<int64_t>::max();
1030
0
          int minPos = -1, finalChange = 0, curChange = 0;
1031
1032
0
          for( n = (lastCG == 1 ? lastNZPosInCG : iCGSizeM1); n >= 0; --n )
1033
0
          {
1034
0
            uint32_t uiBlkPos   = cctx.blockPos( n + subPos );
1035
0
            if(piDstCoeff[ uiBlkPos ] != 0 )
1036
0
            {
1037
0
              int64_t costUp   = rdFactor * ( - deltaU[uiBlkPos] ) + rateIncUp[uiBlkPos];
1038
0
              int64_t costDown = rdFactor * (   deltaU[uiBlkPos] ) + rateIncDown[uiBlkPos]
1039
0
                -   ((abs(piDstCoeff[uiBlkPos]) == 1) ? sigRateDelta[uiBlkPos] : 0);
1040
1041
0
              if(lastCG==1 && lastNZPosInCG==n && abs(piDstCoeff[uiBlkPos])==1)
1042
0
              {
1043
0
                costDown -= (4<<SCALE_BITS);
1044
0
              }
1045
1046
0
              if(costUp<costDown)
1047
0
              {
1048
0
                curCost = costUp;
1049
0
                curChange =  1;
1050
0
              }
1051
0
              else
1052
0
              {
1053
0
                curChange = -1;
1054
0
                if(n==firstNZPosInCG && abs(piDstCoeff[uiBlkPos])==1)
1055
0
                {
1056
0
                  curCost = std::numeric_limits<int64_t>::max();
1057
0
                }
1058
0
                else
1059
0
                {
1060
0
                  curCost = costDown;
1061
0
                }
1062
0
              }
1063
0
            }
1064
0
            else
1065
0
            {
1066
0
              curCost = rdFactor * ( - (abs(deltaU[uiBlkPos])) ) + (1<<SCALE_BITS) + rateIncUp[uiBlkPos] + sigRateDelta[uiBlkPos] ;
1067
0
              curChange = 1 ;
1068
1069
0
              if(n<firstNZPosInCG)
1070
0
              {
1071
0
                uint32_t thissignbit = (plSrcCoeff[uiBlkPos]>=0?0:1);
1072
0
                if(thissignbit != signbit )
1073
0
                {
1074
0
                  curCost = std::numeric_limits<int64_t>::max();
1075
0
                }
1076
0
              }
1077
0
            }
1078
1079
0
            if( curCost<minCostInc)
1080
0
            {
1081
0
              minCostInc = curCost;
1082
0
              finalChange = curChange;
1083
0
              minPos = uiBlkPos;
1084
0
            }
1085
0
          }
1086
1087
0
          if(piDstCoeff[minPos] == entropyCodingMaximum || piDstCoeff[minPos] == entropyCodingMinimum)
1088
0
          {
1089
0
            finalChange = -1;
1090
0
          }
1091
1092
0
          if(plSrcCoeff[minPos]>=0)
1093
0
          {
1094
0
            piDstCoeff[minPos] += finalChange ;
1095
0
          }
1096
0
          else
1097
0
          {
1098
0
            piDstCoeff[minPos] -= finalChange ;
1099
0
          }
1100
0
        }
1101
0
      }
1102
1103
0
      if(lastCG==1)
1104
0
      {
1105
0
        lastCG=0 ;
1106
0
      }
1107
0
    }
1108
1109
    // Check due to saving of last pos. Sign data hiding can change the position of last coef.
1110
0
    if( piDstCoeff[cctx.blockPos( iLastScanPos )] == 0 )
1111
0
    {
1112
0
      int scanPos = iLastScanPos - 1;
1113
0
      for( ; scanPos >= 0; scanPos-- )
1114
0
      {
1115
0
        if( piDstCoeff[cctx.blockPos( scanPos )] )
1116
0
          break;
1117
0
      }
1118
0
      iLastScanPos = scanPos;
1119
0
    }
1120
0
  }
1121
0
  tu.lastPos[compID] = iLastScanPos;
1122
0
}
1123
1124
void QuantRDOQ::rateDistOptQuantTS( TransformUnit& tu, const ComponentID compID, const CCoeffBuf& coeffs, TCoeff &absSum, const QpParam& qp, const Ctx &ctx )
1125
2.76k
{
1126
2.76k
  const FracBitsAccess& fracBits = ctx.getFracBitsAcess();
1127
1128
2.76k
  const SPS &sps            = *tu.cs->sps;
1129
2.76k
  const CompArea& rect      = tu.blocks[compID];
1130
2.76k
  const uint32_t width      = rect.width;
1131
2.76k
  const uint32_t height     = rect.height;
1132
2.76k
  const ChannelType chType  = toChannelType(compID);
1133
2.76k
  const int channelBitDepth = sps.bitDepths[ chType ];
1134
1135
2.76k
  const int  maxLog2TrDynamicRange = sps.getMaxLog2TrDynamicRange();
1136
1137
2.76k
  const int transformShift = getTransformShift( channelBitDepth, rect.size(), maxLog2TrDynamicRange );
1138
1139
2.76k
  const uint32_t maxNumCoeff                        = rect.area();
1140
1141
2.76k
  CHECK( compID >= MAX_NUM_TBLOCKS, "Invalid component ID" );
1142
1143
2.76k
  int scalingListType = getScalingListType( tu.cu->predMode, compID );
1144
2.76k
  CHECK( scalingListType >= SCALING_LIST_NUM, "Invalid scaling list" );
1145
1146
2.76k
  const TCoeff    *srcCoeff = coeffs.buf;
1147
2.76k
        TCoeffSig *dstCoeff = tu.getCoeffs( compID ).buf;
1148
1149
2.76k
  double *costCoeff  = m_pdCostCoeff;
1150
2.76k
  double *costSig    = m_pdCostSig;
1151
2.76k
  double *costCoeff0 = m_pdCostCoeff0;
1152
1153
2.76k
  memset( m_pdCostCoeff,  0, sizeof( double ) *  maxNumCoeff );
1154
2.76k
  memset( m_pdCostSig,    0, sizeof( double ) *  maxNumCoeff );
1155
1156
2.76k
  m_bdpcm = 0;
1157
1158
2.76k
  const bool   needsSqrt2Scale = TU::needsSqrt2Scale( tu, compID );  // should always be false - transform-skipped blocks don't require sqrt(2) compensation.
1159
2.76k
  const bool   isTransformSkip = tu.mtsIdx[compID]==MTS_SKIP;
1160
2.76k
  const int    qBits = QUANT_SHIFT + qp.per(isTransformSkip) + (isTransformSkip ? 0 : transformShift) + (needsSqrt2Scale ? -1 : 0);  // Right shift of non-RDOQ quantizer;  level = (coeff*uiQ + offset)>>q_bits
1161
2.76k
  const int    quantisationCoefficient = g_quantScales[needsSqrt2Scale?1:0][qp.rem(isTransformSkip)];
1162
2.76k
  const double errorScale              = xGetErrScaleCoeff( TU::needsSqrt2Scale(tu, compID), width, height, qp.rem(isTransformSkip), maxLog2TrDynamicRange, channelBitDepth, isTransformSkip);
1163
1164
2.76k
  const TCoeff entropyCodingMaximum = ( 1 << maxLog2TrDynamicRange ) - 1;
1165
1166
2.76k
  uint32_t coeffLevels[3];
1167
2.76k
  double   coeffLevelError[4];
1168
1169
2.76k
  CoeffCodingContext cctx( tu, compID, tu.cs->slice->signDataHidingEnabled );
1170
2.76k
  const int sbSizeM1    = ( 1 << cctx.log2CGSize() ) - 1;
1171
2.76k
  double    baseCost    = 0;
1172
2.76k
  uint32_t  goRiceParam = 0;
1173
1174
2.76k
  double *costSigSubBlock = m_pdCostCoeffGroupSig;
1175
2.76k
  memset( costSigSubBlock, 0, ( maxNumCoeff >> cctx.log2CGSize() ) * sizeof( double ) );
1176
1177
2.76k
  const int sbNum = width * height >> cctx.log2CGSize();
1178
2.76k
  int scanPos;
1179
2.76k
  coeffGroupRDStats rdStats;
1180
1181
2.76k
  bool anySigCG = false;
1182
1183
2.76k
  int maxCtxBins = (cctx.maxNumCoeff() * 7) >> 2;
1184
2.76k
  cctx.remRegBins = maxCtxBins;
1185
1186
31.5k
  for( int sbId = 0; sbId < sbNum; sbId++ )
1187
28.7k
  {
1188
28.7k
    cctx.initSubblock( sbId );
1189
1190
28.7k
    int noCoeffCoded = 0;
1191
28.7k
    baseCost = 0.0;
1192
28.7k
    memset( &rdStats, 0, sizeof (coeffGroupRDStats));
1193
1194
28.7k
    rdStats.iNumSbbCtxBins = 0;
1195
1196
489k
    for( int scanPosInSB = 0; scanPosInSB <= sbSizeM1; scanPosInSB++ )
1197
460k
    {
1198
460k
      int lastPosCoded = sbSizeM1;
1199
460k
      scanPos = cctx.minSubPos() + scanPosInSB;
1200
      //===== quantization =====
1201
460k
      uint32_t blkPos = cctx.blockPos( scanPos );
1202
1203
      // set coeff
1204
460k
      const int64_t          tmpLevel    = int64_t( abs( srcCoeff[blkPos] ) ) * quantisationCoefficient;
1205
460k
      const Intermediate_Int levelDouble = (Intermediate_Int)std::min<int64_t>( tmpLevel, std::numeric_limits<Intermediate_Int>::max() - ( Intermediate_Int( 1 ) << ( qBits - 1 ) ) );
1206
1207
460k
      uint32_t roundAbsLevel = std::min<uint32_t>(uint32_t(entropyCodingMaximum), uint32_t((levelDouble + (Intermediate_Int(1) << (qBits - 1))) >> qBits));
1208
460k
      uint32_t minAbsLevel = (roundAbsLevel > 1 ? roundAbsLevel - 1 : 1);
1209
1210
460k
      uint32_t downAbsLevel = std::min<uint32_t>(uint32_t(entropyCodingMaximum), uint32_t(levelDouble >> qBits));
1211
460k
      uint32_t upAbsLevel = std::min<uint32_t>(uint32_t(entropyCodingMaximum), downAbsLevel + 1);
1212
1213
460k
      m_testedLevels = 0;
1214
460k
      coeffLevels[m_testedLevels++] = roundAbsLevel;
1215
1216
460k
      if (minAbsLevel != roundAbsLevel)
1217
460k
        coeffLevels[m_testedLevels++] = minAbsLevel;
1218
1219
460k
      int rightPixel, belowPixel, predPixel;
1220
1221
460k
      cctx.neighTS(rightPixel, belowPixel, scanPos, dstCoeff);
1222
460k
      predPixel = cctx.deriveModCoeff(rightPixel, belowPixel, upAbsLevel, 0);
1223
1224
460k
      if (upAbsLevel != roundAbsLevel && upAbsLevel != minAbsLevel && predPixel == 1)
1225
0
        coeffLevels[m_testedLevels++] = upAbsLevel;
1226
1227
460k
      double dErr = double(levelDouble);
1228
460k
      coeffLevelError[0] = dErr * dErr * errorScale;
1229
1230
460k
      costCoeff0[scanPos] = coeffLevelError[0];
1231
460k
      dstCoeff[blkPos]    = coeffLevels[0];
1232
1233
      //===== coefficient level estimation =====
1234
460k
            unsigned    ctxIdSig = cctx.sigCtxIdAbsTS( scanPos, dstCoeff );
1235
460k
            uint32_t    cLevel;
1236
460k
      const BinFracBits fracBitsPar = fracBits.getFracBitsArray( cctx.parityCtxIdAbsTS() );
1237
1238
      //goRiceParam = cctx.templateAbsSumTS( scanPos, dstCoeff );
1239
460k
      goRiceParam = 1;
1240
460k
      unsigned ctxIdSign = cctx.signCtxIdAbsTS(scanPos, dstCoeff, 0);
1241
460k
      const BinFracBits fracBitsSign = fracBits.getFracBitsArray(ctxIdSign);
1242
460k
      const uint8_t     sign         = srcCoeff[ blkPos ] < 0 ? 1 : 0;
1243
1244
460k
      DTRACE_COND( ( coeffLevels[0] != 0 ), g_trace_ctx, D_RDOQ_MORE, " uiCtxSig=%d", ctxIdSig );
1245
1246
460k
      unsigned gt1CtxId = cctx.lrg1CtxIdAbsTS(scanPos, dstCoeff, 0);
1247
460k
      const BinFracBits fracBitsGr1 = fracBits.getFracBitsArray(gt1CtxId);
1248
1249
460k
      const BinFracBits fracBitsSig = fracBits.getFracBitsArray( ctxIdSig );
1250
460k
      bool lastCoeff = false; //
1251
460k
      if (scanPosInSB == lastPosCoded && noCoeffCoded == 0)
1252
28.7k
      {
1253
28.7k
        lastCoeff = true;
1254
28.7k
      }
1255
460k
      int numUsedCtxBins = 0;
1256
460k
      cLevel = xGetCodedLevelTSPred(costCoeff[scanPos], costCoeff0[scanPos], costSig[scanPos], levelDouble, qBits, errorScale, coeffLevels, coeffLevelError,
1257
460k
                                    &fracBitsSig, fracBitsPar, cctx, fracBits, fracBitsSign, fracBitsGr1, sign, rightPixel, belowPixel, goRiceParam, lastCoeff, maxLog2TrDynamicRange, numUsedCtxBins);
1258
1259
460k
      cctx.remRegBins -= numUsedCtxBins;
1260
460k
      rdStats.iNumSbbCtxBins += numUsedCtxBins;
1261
1262
460k
      if (cLevel > 0)
1263
0
      {
1264
0
        noCoeffCoded++;
1265
0
      }
1266
1267
460k
      TCoeff level = cLevel;
1268
460k
      dstCoeff[blkPos] = (level != 0 && srcCoeff[blkPos] < 0) ? -level : level;
1269
460k
      baseCost           += costCoeff[ scanPos ];
1270
460k
      rdStats.d64SigCost += costSig[ scanPos ];
1271
1272
460k
      if( dstCoeff[ blkPos ] )
1273
0
      {
1274
0
        cctx.setSigGroup();
1275
0
        rdStats.d64CodedLevelandDist += costCoeff [ scanPos ] - costSig[ scanPos ];
1276
0
        rdStats.d64UncodedDist       += costCoeff0[ scanPos ];
1277
0
      }
1278
460k
    } //end for (iScanPosinCG)
1279
1280
28.7k
    if( !cctx.isSigGroup() )
1281
28.7k
    {
1282
28.7k
      const BinFracBits fracBitsSigGroup = fracBits.getFracBitsArray( cctx.sigGroupCtxId( true ) );
1283
28.7k
      baseCost += xGetRateSigCoeffGroup( fracBitsSigGroup, 0 ) - rdStats.d64SigCost;
1284
28.7k
      costSigSubBlock[cctx.subSetId()] = xGetRateSigCoeffGroup( fracBitsSigGroup, 0 );
1285
28.7k
      cctx.remRegBins += rdStats.iNumSbbCtxBins; // skip sub-block
1286
28.7k
    }
1287
0
    else if( sbId != sbNum - 1 || anySigCG )
1288
0
    {
1289
      // rd-cost if SigCoeffGroupFlag = 0, initialization
1290
0
      double costZeroSB = baseCost;
1291
1292
0
      const BinFracBits fracBitsSigGroup = fracBits.getFracBitsArray( cctx.sigGroupCtxId( true ) );
1293
1294
0
      baseCost   += xGetRateSigCoeffGroup( fracBitsSigGroup, 1 );
1295
0
      costZeroSB += xGetRateSigCoeffGroup( fracBitsSigGroup, 0 );
1296
0
      costSigSubBlock[ cctx.subSetId() ] = xGetRateSigCoeffGroup( fracBitsSigGroup, 1 );
1297
1298
0
      costZeroSB += rdStats.d64UncodedDist;         // distortion for resetting non-zero levels to zero levels
1299
0
      costZeroSB -= rdStats.d64CodedLevelandDist;   // distortion and level cost for keeping all non-zero levels
1300
0
      costZeroSB -= rdStats.d64SigCost;             // sig cost for all coeffs, including zero levels and non-zerl levels
1301
1302
0
      if( costZeroSB < baseCost )
1303
0
      {
1304
0
        cctx.resetSigGroup();
1305
0
        baseCost = costZeroSB;
1306
0
        costSigSubBlock[ cctx.subSetId() ] = xGetRateSigCoeffGroup( fracBitsSigGroup, 0 );
1307
0
        cctx.remRegBins += rdStats.iNumSbbCtxBins; // skip sub-block
1308
1309
0
        for( int scanPosInSB = 0; scanPosInSB <= sbSizeM1; scanPosInSB++ )
1310
0
        {
1311
0
          scanPos = cctx.minSubPos() + scanPosInSB;
1312
0
          uint32_t blkPos = cctx.blockPos( scanPos );
1313
1314
0
          if( dstCoeff[ blkPos ] )
1315
0
          {
1316
0
            dstCoeff[ blkPos ] = 0;
1317
0
            costCoeff[ scanPos ] = costCoeff0[ scanPos ];
1318
0
            costSig[ scanPos] = 0;
1319
0
          }
1320
0
        }
1321
0
      }
1322
0
      else
1323
0
      {
1324
0
        anySigCG = true;
1325
0
      }
1326
0
    }
1327
28.7k
  }
1328
1329
  //===== estimate last position =====
1330
463k
  for( int scanPos = 0; scanPos < maxNumCoeff; scanPos++ )
1331
460k
  {
1332
460k
    int blkPos = cctx.blockPos( scanPos );
1333
460k
    TCoeff level = dstCoeff[ blkPos ];
1334
460k
    absSum += abs(level);
1335
460k
  }
1336
2.76k
}
1337
1338
void QuantRDOQ::forwardRDPCM( TransformUnit& tu, const ComponentID compID, const CCoeffBuf& coeffs, TCoeff &absSum, const QpParam& qp, const Ctx &ctx )
1339
85.3k
{
1340
85.3k
  const FracBitsAccess& fracBits = ctx.getFracBitsAcess();
1341
1342
85.3k
  const SPS &sps = *tu.cs->sps;
1343
85.3k
  const CompArea& rect = tu.blocks[compID];
1344
85.3k
  const uint32_t width = rect.width;
1345
85.3k
  const uint32_t height = rect.height;
1346
85.3k
  const ChannelType chType = toChannelType(compID);
1347
85.3k
  const int channelBitDepth = sps.bitDepths[chType];
1348
1349
85.3k
  const int  maxLog2TrDynamicRange = sps.getMaxLog2TrDynamicRange();
1350
85.3k
  const int  dirMode = tu.cu->bdpcmM[toChannelType(compID)];
1351
1352
85.3k
  const int transformShift = getTransformShift(channelBitDepth, rect.size(), maxLog2TrDynamicRange);
1353
1354
85.3k
  const uint32_t maxNumCoeff = rect.area();
1355
1356
85.3k
  CHECK(compID >= MAX_NUM_TBLOCKS, "Invalid component ID");
1357
1358
85.3k
  int scalingListType = getScalingListType(tu.cu->predMode, compID);
1359
85.3k
  CHECK(scalingListType >= SCALING_LIST_NUM, "Invalid scaling list");
1360
1361
85.3k
  const TCoeff    *srcCoeff = coeffs.buf;
1362
85.3k
        TCoeffSig *dstCoeff = tu.getCoeffs(compID).buf;
1363
1364
85.3k
  double *costCoeff = m_pdCostCoeff;
1365
85.3k
  double *costSig = m_pdCostSig;
1366
85.3k
  double *costCoeff0 = m_pdCostCoeff0;
1367
1368
85.3k
  memset(m_pdCostCoeff, 0, sizeof(double) *  maxNumCoeff);
1369
85.3k
  memset(m_pdCostSig, 0, sizeof(double) *  maxNumCoeff);
1370
85.3k
  memset(m_fullCoeff, 0, sizeof(TCoeff) * maxNumCoeff);
1371
1372
85.3k
  m_bdpcm = dirMode;
1373
1374
85.3k
  const bool   needsSqrt2Scale = TU::needsSqrt2Scale(tu, compID);  // should always be false - transform-skipped blocks don't require sqrt(2) compensation.
1375
85.3k
  const bool   isTransformSkip = tu.mtsIdx[compID]==MTS_SKIP;
1376
85.3k
  const int    qBits = QUANT_SHIFT + qp.per(isTransformSkip) + (isTransformSkip? 0 : transformShift) + ( needsSqrt2Scale ? -1 : 0);  // Right shift of non-RDOQ quantizer;  level = (coeff*uiQ + offset)>>q_bits
1377
85.3k
  const int    quantisationCoefficient = g_quantScales[needsSqrt2Scale ? 1 : 0][qp.rem(isTransformSkip)];
1378
85.3k
  const double errorScale = xGetErrScaleCoeff(TU::needsSqrt2Scale(tu, compID), width, height, qp.rem(isTransformSkip), maxLog2TrDynamicRange, channelBitDepth, isTransformSkip);
1379
1380
85.3k
  TrQuantParams trQuantParams;
1381
85.3k
  trQuantParams.rightShift = (IQUANT_SHIFT - ((isTransformSkip ? 0 : transformShift) + qp.per(isTransformSkip)));
1382
85.3k
  trQuantParams.qScale = g_invQuantScales[needsSqrt2Scale ? 1 : 0][qp.rem(isTransformSkip)];
1383
1384
85.3k
  const TCoeff entropyCodingMaximum = (1 << maxLog2TrDynamicRange) - 1;
1385
1386
85.3k
  uint32_t coeffLevels[3];
1387
85.3k
  double   coeffLevelError[4];
1388
1389
85.3k
  CoeffCodingContext cctx(tu, compID, tu.cs->slice->signDataHidingEnabled);
1390
85.3k
  const int sbSizeM1 = (1 << cctx.log2CGSize()) - 1;
1391
85.3k
  double    baseCost = 0;
1392
85.3k
  uint32_t  goRiceParam = 0;
1393
1394
85.3k
  double *costSigSubBlock = m_pdCostCoeffGroupSig;
1395
85.3k
  memset(costSigSubBlock, 0, (maxNumCoeff >> cctx.log2CGSize()) * sizeof(double));
1396
1397
85.3k
  const int sbNum = width * height >> cctx.log2CGSize();
1398
85.3k
  int scanPos;
1399
85.3k
  coeffGroupRDStats rdStats;
1400
1401
85.3k
  bool anySigCG = false;
1402
1403
85.3k
  int maxCtxBins = (cctx.maxNumCoeff() * 7) >> 2;
1404
85.3k
  cctx.remRegBins = maxCtxBins;
1405
1406
617k
  for (int sbId = 0; sbId < sbNum; sbId++)
1407
532k
  {
1408
532k
    cctx.initSubblock(sbId);
1409
1410
532k
    int noCoeffCoded = 0;
1411
532k
    baseCost = 0.0;
1412
532k
    memset(&rdStats, 0, sizeof(coeffGroupRDStats));
1413
532k
    rdStats.iNumSbbCtxBins = 0;
1414
1415
9.05M
    for (int scanPosInSB = 0; scanPosInSB <= sbSizeM1; scanPosInSB++)
1416
8.52M
    {
1417
8.52M
      int lastPosCoded = sbSizeM1;
1418
8.52M
      scanPos = cctx.minSubPos() + scanPosInSB;
1419
      //===== quantization =====
1420
8.52M
      uint32_t blkPos = cctx.blockPos(scanPos);
1421
1422
8.52M
      const int posX = cctx.posX(scanPos);
1423
8.52M
      const int posY = cctx.posY(scanPos);
1424
8.52M
      const int posS = (1 == dirMode) ? posX : posY;
1425
8.52M
      const int posNb = (1 == dirMode) ? (posX - 1) + posY * coeffs.stride : posX + (posY - 1) * coeffs.stride;
1426
8.52M
      TCoeff predCoeff = (0 != posS) ? m_fullCoeff[posNb] : 0;
1427
1428
      // set coeff
1429
8.52M
      const int64_t          tmpLevel = int64_t(abs(srcCoeff[blkPos] - predCoeff)) * quantisationCoefficient;
1430
8.52M
      const Intermediate_Int levelDouble = (Intermediate_Int)std::min<int64_t>(tmpLevel, std::numeric_limits<Intermediate_Int>::max() - (Intermediate_Int(1) << (qBits - 1)));
1431
8.52M
      uint32_t roundAbsLevel = std::min<uint32_t>(uint32_t(entropyCodingMaximum), uint32_t((levelDouble + (Intermediate_Int(1) << (qBits - 1))) >> qBits));
1432
8.52M
      uint32_t minAbsLevel = (roundAbsLevel > 1 ? roundAbsLevel - 1 : 1);
1433
1434
8.52M
      m_testedLevels = 0;
1435
8.52M
      coeffLevels[m_testedLevels++] = roundAbsLevel;
1436
1437
8.52M
      if (minAbsLevel != roundAbsLevel)
1438
8.11M
        coeffLevels[m_testedLevels++] = minAbsLevel;
1439
1440
8.52M
      double dErr = double(levelDouble);
1441
8.52M
      coeffLevelError[0]  = dErr * dErr * errorScale;
1442
1443
8.52M
      costCoeff0[scanPos] = coeffLevelError[0];
1444
8.52M
      dstCoeff[blkPos]    = coeffLevels[0];
1445
1446
      //===== coefficient level estimation =====
1447
8.52M
      unsigned    ctxIdSig = cctx.sigCtxIdAbsTS(scanPos, dstCoeff);
1448
8.52M
      uint32_t    cLevel;
1449
8.52M
      const BinFracBits fracBitsPar = fracBits.getFracBitsArray(cctx.parityCtxIdAbsTS());
1450
1451
      //goRiceParam = cctx.templateAbsSumTS(scanPos, dstCoeff);
1452
8.52M
      goRiceParam = 1;
1453
8.52M
      unsigned ctxIdSign = cctx.signCtxIdAbsTS(scanPos, dstCoeff, dirMode);
1454
8.52M
      const BinFracBits fracBitsSign = fracBits.getFracBitsArray(ctxIdSign);
1455
8.52M
      const uint8_t     sign = srcCoeff[blkPos] - predCoeff < 0 ? 1 : 0;
1456
8.52M
      unsigned gt1CtxId = cctx.lrg1CtxIdAbsTS(scanPos, dstCoeff, dirMode);
1457
8.52M
      const BinFracBits fracBitsGr1 = fracBits.getFracBitsArray(gt1CtxId);
1458
1459
8.52M
      DTRACE_COND((dstCoeff[blkPos] != 0), g_trace_ctx, D_RDOQ_MORE, " uiCtxSig=%d", ctxIdSig);
1460
1461
8.52M
      const BinFracBits fracBitsSig = fracBits.getFracBitsArray(ctxIdSig);
1462
8.52M
      bool lastCoeff = false; //
1463
8.52M
      if (scanPosInSB == lastPosCoded && noCoeffCoded == 0)
1464
435k
      {
1465
435k
        lastCoeff = true;
1466
435k
      }
1467
8.52M
      int rightPixel, belowPixel;
1468
8.52M
      cctx.neighTS(rightPixel, belowPixel, scanPos, dstCoeff);
1469
8.52M
      int numUsedCtxBins = 0;
1470
8.52M
      cLevel = xGetCodedLevelTSPred(costCoeff[scanPos], costCoeff0[scanPos], costSig[scanPos], levelDouble, qBits, errorScale, coeffLevels, coeffLevelError,
1471
8.52M
        &fracBitsSig, fracBitsPar, cctx, fracBits, fracBitsSign, fracBitsGr1, sign, rightPixel, belowPixel, goRiceParam, lastCoeff, maxLog2TrDynamicRange, numUsedCtxBins);
1472
8.52M
      cctx.remRegBins -= numUsedCtxBins;
1473
8.52M
      rdStats.iNumSbbCtxBins += numUsedCtxBins;
1474
1475
8.52M
      if (cLevel > 0)
1476
411k
      {
1477
411k
        noCoeffCoded++;
1478
411k
      }
1479
8.52M
      dstCoeff[blkPos] = cLevel;
1480
1481
8.52M
      if (sign)
1482
2.96M
      {
1483
2.96M
        dstCoeff[blkPos] = -dstCoeff[blkPos];
1484
2.96M
      }
1485
1486
8.52M
      xDequantSample( m_fullCoeff[blkPos], dstCoeff[blkPos], trQuantParams );
1487
8.52M
      m_fullCoeff[blkPos] += predCoeff;
1488
1489
8.52M
      baseCost += costCoeff[scanPos];
1490
8.52M
      rdStats.d64SigCost += costSig[scanPos];
1491
1492
8.52M
      if (dstCoeff[blkPos])
1493
411k
      {
1494
411k
        cctx.setSigGroup();
1495
411k
        rdStats.d64CodedLevelandDist += costCoeff[scanPos] - costSig[scanPos];
1496
411k
        rdStats.d64UncodedDist += costCoeff0[scanPos];
1497
411k
      }
1498
8.52M
    } //end for (iScanPosinCG)
1499
1500
532k
    if (!cctx.isSigGroup())
1501
424k
    {
1502
424k
      const BinFracBits fracBitsSigGroup = fracBits.getFracBitsArray(cctx.sigGroupCtxId(true));
1503
424k
      baseCost += xGetRateSigCoeffGroup(fracBitsSigGroup, 0) - rdStats.d64SigCost;
1504
424k
      costSigSubBlock[cctx.subSetId()] = xGetRateSigCoeffGroup(fracBitsSigGroup, 0);
1505
424k
      cctx.remRegBins += rdStats.iNumSbbCtxBins; // skip sub-block
1506
424k
    }
1507
108k
    else if (sbId != sbNum - 1 || anySigCG)
1508
103k
    {
1509
      // rd-cost if SigCoeffGroupFlag = 0, initialization
1510
103k
      double costZeroSB = baseCost;
1511
1512
103k
      const BinFracBits fracBitsSigGroup = fracBits.getFracBitsArray(cctx.sigGroupCtxId(true));
1513
1514
103k
      baseCost += xGetRateSigCoeffGroup(fracBitsSigGroup, 1);
1515
103k
      costZeroSB += xGetRateSigCoeffGroup(fracBitsSigGroup, 0);
1516
103k
      costSigSubBlock[cctx.subSetId()] = xGetRateSigCoeffGroup(fracBitsSigGroup, 1);
1517
1518
103k
      costZeroSB += rdStats.d64UncodedDist;         // distortion for resetting non-zero levels to zero levels
1519
103k
      costZeroSB -= rdStats.d64CodedLevelandDist;   // distortion and level cost for keeping all non-zero levels
1520
103k
      costZeroSB -= rdStats.d64SigCost;             // sig cost for all coeffs, including zero levels and non-zerl levels
1521
1522
103k
      if (costZeroSB < baseCost)
1523
10.9k
      {
1524
10.9k
        cctx.resetSigGroup();
1525
10.9k
        baseCost = costZeroSB;
1526
10.9k
        costSigSubBlock[cctx.subSetId()] = xGetRateSigCoeffGroup(fracBitsSigGroup, 0);
1527
10.9k
        cctx.remRegBins += rdStats.iNumSbbCtxBins; // skip sub-block
1528
1529
186k
        for (int scanPosInSB = 0; scanPosInSB <= sbSizeM1; scanPosInSB++)
1530
175k
        {
1531
175k
          scanPos = cctx.minSubPos() + scanPosInSB;
1532
175k
          uint32_t blkPos = cctx.blockPos(scanPos);
1533
1534
175k
          const int posX = cctx.posX(scanPos);
1535
175k
          const int posY = cctx.posY(scanPos);
1536
175k
          const int posS = (1 == dirMode) ? posX : posY;
1537
175k
          const int posNb = (1 == dirMode) ? (posX - 1) + posY * coeffs.stride : posX + (posY - 1) * coeffs.stride;
1538
175k
          m_fullCoeff[scanPos] = (0 != posS) ? m_fullCoeff[posNb] : 0;
1539
1540
175k
          if (dstCoeff[blkPos])
1541
11.3k
          {
1542
11.3k
            dstCoeff[blkPos] = 0;
1543
11.3k
            costCoeff[scanPos] = costCoeff0[scanPos];
1544
11.3k
            costSig[scanPos] = 0;
1545
11.3k
          }
1546
175k
        }
1547
10.9k
      }
1548
92.7k
      else
1549
92.7k
      {
1550
92.7k
        anySigCG = true;
1551
92.7k
      }
1552
103k
    }
1553
532k
  }
1554
1555
  //===== estimate last position =====
1556
8.60M
  for (int scanPos = 0; scanPos < maxNumCoeff; scanPos++)
1557
8.52M
  {
1558
8.52M
    int blkPos = cctx.blockPos(scanPos);
1559
8.52M
    TCoeff level = dstCoeff[blkPos];
1560
8.52M
    absSum += abs(level);
1561
8.52M
  }
1562
85.3k
}
1563
1564
void QuantRDOQ::xDequantSample(TCoeff& pRes, TCoeffSig& coeff, const TrQuantParams& trQuantParams)
1565
8.52M
{
1566
  // xDequant
1567
8.52M
  if (trQuantParams.rightShift > 0)
1568
6.19M
  {
1569
6.19M
    const Intermediate_Int qAdd = Intermediate_Int(1) << (trQuantParams.rightShift - 1);
1570
6.19M
    pRes = TCoeff((Intermediate_Int(coeff) * trQuantParams.qScale + qAdd) >> trQuantParams.rightShift);
1571
6.19M
  }
1572
2.32M
  else
1573
2.32M
  {
1574
2.32M
    pRes = TCoeff((Intermediate_Int(coeff) * trQuantParams.qScale) *(1<< -trQuantParams.rightShift));
1575
2.32M
  }
1576
8.52M
}
1577
1578
inline uint32_t QuantRDOQ::xGetCodedLevelTSPred(double&            rd64CodedCost,
1579
  double&            rd64CodedCost0,
1580
  double&            rd64CodedCostSig,
1581
  Intermediate_Int    levelDouble,
1582
  int                 qBits,
1583
  double              errorScale,
1584
  uint32_t coeffLevels[],
1585
  double coeffLevelError[],
1586
  const BinFracBits* fracBitsSig,
1587
  const BinFracBits& fracBitsPar,
1588
  CoeffCodingContext& cctx,
1589
  const FracBitsAccess& fracBitsAccess,
1590
  const BinFracBits& fracBitsSign,
1591
  const BinFracBits& fracBitsGt1,
1592
  const uint8_t      sign,
1593
  int                rightPixel,
1594
  int                belowPixel,
1595
  uint16_t           ricePar,
1596
  bool               isLast,
1597
  const int          maxLog2TrDynamicRange,
1598
  int&               numUsedCtxBins
1599
) const
1600
8.98M
{
1601
8.98M
  double currCostSig = 0;
1602
8.98M
  uint32_t   bestAbsLevel = 0;
1603
8.98M
  numUsedCtxBins = 0;
1604
8.98M
  int numBestCtxBin = 0;
1605
8.98M
  if (!isLast && coeffLevels[0] < 3)
1606
8.17M
  {
1607
8.17M
    if (cctx.remRegBins >= 4)
1608
8.02M
    rd64CodedCostSig = xGetRateSigCoef(*fracBitsSig, 0);
1609
149k
    else
1610
149k
      rd64CodedCostSig = xGetICost(1 << SCALE_BITS);
1611
8.17M
    rd64CodedCost = rd64CodedCost0 + rd64CodedCostSig;
1612
8.17M
    if (cctx.remRegBins >= 4)
1613
8.02M
      numUsedCtxBins++;
1614
8.17M
    if (coeffLevels[0] == 0)
1615
7.73M
    {
1616
7.73M
      return bestAbsLevel;
1617
7.73M
    }
1618
8.17M
  }
1619
811k
  else
1620
811k
  {
1621
811k
    rd64CodedCost = MAX_DOUBLE;
1622
811k
  }
1623
1624
1.25M
  if (!isLast)
1625
786k
  {
1626
786k
    if (cctx.remRegBins >= 4)
1627
750k
      currCostSig = xGetRateSigCoef(*fracBitsSig, 1);
1628
36.1k
    else
1629
36.1k
      currCostSig = xGetICost(1 << SCALE_BITS);
1630
786k
    if (coeffLevels[0] >= 3 && cctx.remRegBins >= 4)
1631
322k
      numUsedCtxBins++;
1632
786k
  }
1633
1634
3.34M
  for (int errorInd = 1; errorInd <= m_testedLevels; errorInd++)
1635
2.09M
  {
1636
2.09M
    int absLevel = coeffLevels[errorInd - 1];
1637
2.09M
    double dErr = 0.0;
1638
2.09M
    dErr = double(levelDouble - (Intermediate_Int(absLevel) << qBits));
1639
2.09M
    coeffLevelError[errorInd] = dErr * dErr * errorScale;
1640
2.09M
    int modAbsLevel = absLevel;
1641
2.09M
    if (cctx.remRegBins >= 4) 
1642
2.02M
    {
1643
2.02M
      modAbsLevel = cctx.deriveModCoeff(rightPixel, belowPixel, absLevel, m_bdpcm);
1644
2.02M
    }
1645
2.09M
    int numCtxBins = 0;
1646
2.09M
    double dCurrCost = coeffLevelError[errorInd] + xGetICost(xGetICRateTS(modAbsLevel, fracBitsPar, cctx, fracBitsAccess, fracBitsSign, fracBitsGt1, numCtxBins, sign, ricePar, maxLog2TrDynamicRange));
1647
1648
2.09M
    if (cctx.remRegBins >= 4)
1649
2.02M
      dCurrCost += currCostSig; // if cctx.numCtxBins < 4, xGetICRateTS return rate including sign cost. dont need to add any more
1650
1651
2.09M
    if (dCurrCost < rd64CodedCost)
1652
905k
    {
1653
905k
      bestAbsLevel = absLevel;
1654
905k
      rd64CodedCost = dCurrCost;
1655
905k
      rd64CodedCostSig = currCostSig;
1656
905k
      numBestCtxBin = numCtxBins;
1657
905k
    }
1658
2.09M
  }
1659
1.25M
  numUsedCtxBins += numBestCtxBin;
1660
1.25M
  return bestAbsLevel;
1661
8.98M
}
1662
1663
inline int QuantRDOQ::xGetICRateTS( const uint32_t            absLevel,
1664
                                    const BinFracBits&        fracBitsPar,
1665
                                    const CoeffCodingContext& cctx,
1666
                                    const FracBitsAccess&     fracBitsAccess,
1667
                                    const BinFracBits&        fracBitsSign,
1668
                                    const BinFracBits&        fracBitsGt1,
1669
                                    int&                      numCtxBins,
1670
                                    const uint8_t             sign,
1671
                                    const uint16_t            ricePar,
1672
                                    const int                 maxLog2TrDynamicRange  ) const
1673
2.09M
{
1674
 
1675
2.09M
  if (cctx.remRegBins < 4) // Full by-pass coding 
1676
67.0k
  {
1677
67.0k
    int rate = absLevel ? (1 << SCALE_BITS) : 0; // 1 bit to signal sign of non-zero 
1678
1679
67.0k
    uint32_t symbol = absLevel;
1680
1681
67.0k
    uint32_t length;
1682
67.0k
    const int threshold = COEF_REMAIN_BIN_REDUCTION;
1683
67.0k
    if (symbol < (threshold << ricePar))
1684
32.8k
    {
1685
32.8k
      length = symbol >> ricePar;
1686
32.8k
      rate += (length + 1 + ricePar) << SCALE_BITS;
1687
32.8k
    }
1688
34.2k
    else
1689
34.2k
    {
1690
34.2k
      length = ricePar;
1691
34.2k
      symbol = symbol - (threshold << ricePar);
1692
154k
      while (symbol >= (1 << length))
1693
120k
      {
1694
120k
        symbol -= (1 << (length++));
1695
120k
      }
1696
34.2k
      rate += (threshold + length + 1 - ricePar + length) << SCALE_BITS;
1697
34.2k
    }
1698
1699
67.0k
    return rate;
1700
67.0k
  }
1701
1702
2.02M
  else if (cctx.remRegBins >= 4 && cctx.remRegBins < 8) // First pass context coding and all by-pass coding ( Sign flag is not counted here)
1703
11.9k
  {
1704
11.9k
    int rate = fracBitsSign.intBits[sign]; // sign bits
1705
11.9k
    if (absLevel)
1706
11.4k
      numCtxBins++;
1707
1708
11.9k
    if (absLevel > 1)
1709
8.24k
    {
1710
8.24k
      rate += fracBitsGt1.intBits[1];
1711
8.24k
      rate += fracBitsPar.intBits[(absLevel - 2) & 1];
1712
1713
8.24k
      numCtxBins += 2;
1714
1715
8.24k
      int cutoffVal = 2;
1716
1717
8.24k
      if (absLevel >= cutoffVal)
1718
8.24k
      {
1719
8.24k
        uint32_t symbol = (absLevel - cutoffVal) >> 1;
1720
8.24k
        uint32_t length;
1721
8.24k
        const int threshold = COEF_REMAIN_BIN_REDUCTION;
1722
8.24k
        if (symbol < (threshold << ricePar))
1723
5.63k
        {
1724
5.63k
          length = symbol >> ricePar;
1725
5.63k
          rate += (length + 1 + ricePar) << SCALE_BITS;
1726
5.63k
        }
1727
2.60k
        else
1728
2.60k
        {
1729
2.60k
          length = ricePar;
1730
2.60k
          symbol = symbol - (threshold << ricePar);
1731
10.6k
          while (symbol >= (1 << length))
1732
8.07k
          {
1733
8.07k
            symbol -= (1 << (length++));
1734
8.07k
          }
1735
2.60k
          rate += (threshold + length + 1 - ricePar + length) << SCALE_BITS;
1736
2.60k
        }
1737
8.24k
      }
1738
8.24k
    }
1739
3.72k
    else if (absLevel == 1)
1740
3.18k
    {
1741
3.18k
      rate += fracBitsGt1.intBits[0];
1742
3.18k
      numCtxBins++;
1743
3.18k
    }
1744
539
    else
1745
539
    {
1746
539
      rate = 0;
1747
539
    }
1748
11.9k
    return rate;
1749
11.9k
  }
1750
    
1751
2.01M
  int rate = fracBitsSign.intBits[sign];
1752
1753
2.01M
  if (absLevel)
1754
1.56M
    numCtxBins++;
1755
1756
2.01M
  if( absLevel > 1 )
1757
677k
  {
1758
677k
    rate += fracBitsGt1.intBits[1];
1759
677k
    rate += fracBitsPar.intBits[( absLevel - 2 ) & 1];
1760
677k
    numCtxBins += 2;
1761
1762
677k
          int cutoffVal = 2;
1763
677k
    const int numGtBins = 4;
1764
3.38M
    for( int i = 0; i < numGtBins; i++ )
1765
2.71M
    {
1766
2.71M
      if( absLevel >= cutoffVal )
1767
1.94M
      {
1768
1.94M
        const uint16_t ctxGtX = cctx.greaterXCtxIdAbsTS( cutoffVal>>1 );
1769
1.94M
        const BinFracBits &fracBitsGtX = fracBitsAccess.getFracBitsArray( ctxGtX );
1770
1.94M
        unsigned gtX = ( absLevel >= ( cutoffVal + 2 ) );
1771
1.94M
        rate += fracBitsGtX.intBits[gtX];
1772
1.94M
        numCtxBins++;
1773
1.94M
      }
1774
2.71M
      cutoffVal += 2;
1775
2.71M
    }
1776
1777
677k
    if( absLevel >= cutoffVal )
1778
338k
    {
1779
338k
      uint32_t symbol = ( absLevel - cutoffVal ) >> 1;
1780
338k
      uint32_t length;
1781
338k
      const int threshold = COEF_REMAIN_BIN_REDUCTION;
1782
338k
      if( symbol < ( threshold << ricePar ) )
1783
130k
      {
1784
130k
        length = symbol >> ricePar;
1785
130k
        rate  += ( length + 1 + ricePar ) << SCALE_BITS;
1786
130k
      }
1787
208k
      else
1788
208k
      {
1789
208k
        length = ricePar;
1790
208k
        symbol = symbol - ( threshold << ricePar );
1791
887k
        while( symbol >= ( 1 << length ) )
1792
679k
        {
1793
679k
          symbol -= ( 1 << ( length++ ) );
1794
679k
        }
1795
208k
        rate += ( threshold + length + 1 - ricePar + length ) << SCALE_BITS;
1796
208k
      }
1797
338k
    }
1798
677k
  }
1799
1.33M
  else if( absLevel == 1 )
1800
885k
  {
1801
885k
    rate += fracBitsGt1.intBits[0];
1802
885k
    numCtxBins++;
1803
885k
  }
1804
449k
  else
1805
449k
  {
1806
449k
    rate = 0;
1807
449k
  }
1808
2.01M
  return rate;
1809
2.09M
}
1810
1811
} // namespace vvenc
1812
1813
//! \}
1814