Coverage Report

Created: 2026-05-30 06:10

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/vvenc/source/Lib/CommonLib/QuantRDOQ.cpp
Line
Count
Source
1
/* -----------------------------------------------------------------------------
2
The copyright in this software is being made available under the Clear BSD
3
License, included below. No patent rights, trademark rights and/or 
4
other Intellectual Property Rights other than the copyrights concerning 
5
the Software are granted under this license.
6
7
The Clear BSD License
8
9
Copyright (c) 2019-2026, Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. & The VVenC Authors.
10
All rights reserved.
11
12
Redistribution and use in source and binary forms, with or without modification,
13
are permitted (subject to the limitations in the disclaimer below) provided that
14
the following conditions are met:
15
16
     * Redistributions of source code must retain the above copyright notice,
17
     this list of conditions and the following disclaimer.
18
19
     * Redistributions in binary form must reproduce the above copyright
20
     notice, this list of conditions and the following disclaimer in the
21
     documentation and/or other materials provided with the distribution.
22
23
     * Neither the name of the copyright holder nor the names of its
24
     contributors may be used to endorse or promote products derived from this
25
     software without specific prior written permission.
26
27
NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
28
THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
29
CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
31
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
32
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
33
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
34
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
35
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
36
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38
POSSIBILITY OF SUCH DAMAGE.
39
40
------------------------------------------------------------------------------------------- */
41
42
43
/** \file     QuantRDOQ.cpp
44
    \brief    transform and quantization class
45
*/
46
47
#include "QuantRDOQ.h"
48
#include "UnitTools.h"
49
#include "ContextModelling.h"
50
#include "CodingStructure.h"
51
#include "dtrace_next.h"
52
#include "dtrace_buffer.h"
53
54
#include <stdlib.h>
55
#include <memory.h>
56
57
//! \ingroup CommonLib
58
//! \{
59
60
namespace vvenc {
61
62
struct coeffGroupRDStats
63
{
64
  int    iNNZbeforePos0;
65
  double d64CodedLevelandDist; // distortion and level cost only
66
  double d64UncodedDist;    // all zero coded block distortion
67
  double d64SigCost;
68
  double d64SigCost_0;
69
 int   iNumSbbCtxBins;
70
};
71
72
73
//! \ingroup CommonLib
74
//! \{
75
76
// ====================================================================================================================
77
// Constants
78
// ====================================================================================================================
79
80
81
// ====================================================================================================================
82
// Static functions
83
// ====================================================================================================================
84
85
// ====================================================================================================================
86
// QuantRDOQ class member functions
87
// ====================================================================================================================
88
89
90
20.7k
QuantRDOQ::QuantRDOQ( const Quant* other, bool useScalingLists ) : Quant( other, useScalingLists )
91
20.7k
{
92
93
20.7k
  const QuantRDOQ *rdoq = dynamic_cast<const QuantRDOQ*>( other );
94
20.7k
  CHECK( other && !rdoq, "The RDOQ cast must be successfull!" );
95
20.7k
  xInitScalingList( rdoq );
96
20.7k
}
97
98
QuantRDOQ::~QuantRDOQ()
99
20.7k
{
100
20.7k
  xDestroyScalingList();
101
20.7k
}
102
103
104
105
106
/** Get the best level in RD sense
107
 *
108
 * \returns best quantized transform level for given scan position
109
 *
110
 * This method calculates the best quantized transform level for a given scan position.
111
 */
112
inline uint32_t QuantRDOQ::xGetCodedLevel( double&            rd64CodedCost,
113
                                       double&            rd64CodedCost0,
114
                                       double&            rd64CodedCostSig,
115
                                       Intermediate_Int   lLevelDouble,
116
                                       uint32_t               uiMaxAbsLevel,
117
                                       const BinFracBits* fracBitsSig,
118
                                       const BinFracBits& fracBitsPar,
119
                                       const BinFracBits& fracBitsGt1,
120
                                       const BinFracBits& fracBitsGt2,
121
                                       const int          remRegBins,
122
                                       unsigned           goRiceZero,
123
                                       uint16_t             ui16AbsGoRice,
124
                                       int                iQBits,
125
                                       double             errorScale,
126
                                       bool               bLast,
127
                                       const int          maxLog2TrDynamicRange
128
                                     ) const
129
0
{
130
0
  double dCurrCostSig   = 0;
131
0
  uint32_t   uiBestAbsLevel = 0;
132
133
0
  if( !bLast && uiMaxAbsLevel < 3 )
134
0
  {
135
0
    rd64CodedCostSig    = xGetRateSigCoef( *fracBitsSig, 0 );
136
0
    rd64CodedCost       = rd64CodedCost0 + rd64CodedCostSig;
137
0
    if( uiMaxAbsLevel == 0 )
138
0
    {
139
0
      return uiBestAbsLevel;
140
0
    }
141
0
  }
142
0
  else
143
0
  {
144
0
    rd64CodedCost       = MAX_DOUBLE;
145
0
  }
146
147
0
  if( !bLast )
148
0
  {
149
0
    dCurrCostSig        = xGetRateSigCoef( *fracBitsSig, 1 );
150
0
  }
151
152
0
  uint32_t uiMinAbsLevel    = ( uiMaxAbsLevel > 1 ? uiMaxAbsLevel - 1 : 1 );
153
0
  for( int uiAbsLevel  = uiMaxAbsLevel; uiAbsLevel >= uiMinAbsLevel ; uiAbsLevel-- )
154
0
  {
155
0
    double dErr         = double( lLevelDouble  - ( Intermediate_Int(uiAbsLevel) << iQBits ) );
156
157
0
    double dCurrCost    = dErr * dErr * errorScale + xGetICost( xGetICRate( uiAbsLevel, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, ui16AbsGoRice, maxLog2TrDynamicRange ) );
158
0
    dCurrCost          += dCurrCostSig;
159
160
0
    if( dCurrCost < rd64CodedCost )
161
0
    {
162
0
      uiBestAbsLevel    = uiAbsLevel;
163
0
      rd64CodedCost     = dCurrCost;
164
0
      rd64CodedCostSig  = dCurrCostSig;
165
0
    }
166
0
  }
167
168
0
  return uiBestAbsLevel;
169
0
}
170
171
/** Calculates the cost for specific absolute transform level
172
 * \param uiAbsLevel scaled quantized level
173
 * \param ui16CtxNumOne current ctxInc for coeff_abs_level_greater1 (1st bin of coeff_abs_level_minus1 in AVC)
174
 * \param ui16CtxNumAbs current ctxInc for coeff_abs_level_greater2 (remaining bins of coeff_abs_level_minus1 in AVC)
175
 * \param ui16AbsGoRice Rice parameter for coeff_abs_level_minus3
176
 * \param c1Idx
177
 * \param c2Idx
178
 * \param useLimitedPrefixLength
179
 * \param maxLog2TrDynamicRange
180
 * \returns cost of given absolute transform level
181
 */
182
inline int QuantRDOQ::xGetICRate( const uint32_t         uiAbsLevel,
183
                                  const BinFracBits& fracBitsPar,
184
                                  const BinFracBits& fracBitsGt1,
185
                                  const BinFracBits& fracBitsGt2,
186
                                  const int          remRegBins,
187
                                  unsigned           goRiceZero,
188
                                  const uint16_t       ui16AbsGoRice,
189
                                  const int          maxLog2TrDynamicRange  ) const
190
0
{
191
0
  if( remRegBins < 4 )
192
0
  {
193
0
    int       iRate   = int( xGetIEPRate() ); // cost of sign bit
194
0
    uint32_t  symbol  = ( uiAbsLevel == 0 ? goRiceZero : uiAbsLevel <= goRiceZero ? uiAbsLevel-1 : uiAbsLevel );
195
0
    uint32_t  length;
196
0
    const int threshold = COEF_REMAIN_BIN_REDUCTION;
197
0
    if( symbol < ( threshold << ui16AbsGoRice ) )
198
0
    {
199
0
      length = symbol >> ui16AbsGoRice;
200
0
      iRate += ( length + 1 + ui16AbsGoRice ) << SCALE_BITS;
201
0
    }
202
0
    else
203
0
    {
204
0
      length = ui16AbsGoRice;
205
0
      symbol = symbol - ( threshold << ui16AbsGoRice );
206
0
      while( symbol >= ( 1 << length ) )
207
0
      {
208
0
        symbol -= ( 1 << ( length++ ) );
209
0
      }
210
0
      iRate += ( threshold + length + 1 - ui16AbsGoRice + length ) << SCALE_BITS;
211
0
    }
212
0
    return iRate;
213
0
  }
214
215
0
  int iRate = int( xGetIEPRate() ); // cost of sign bit
216
0
  const uint32_t cthres = 4;
217
0
  if( uiAbsLevel >= cthres )
218
0
  {
219
0
    uint32_t symbol = ( uiAbsLevel - cthres ) >> 1;
220
0
    uint32_t length;
221
0
    const int threshold = COEF_REMAIN_BIN_REDUCTION;
222
0
    if( symbol < ( threshold << ui16AbsGoRice ) )
223
0
    {
224
0
      length = symbol >> ui16AbsGoRice;
225
0
      iRate += ( length + 1 + ui16AbsGoRice ) << SCALE_BITS;
226
0
    }
227
0
    else
228
0
    {
229
0
      length = ui16AbsGoRice;
230
0
      symbol = symbol - ( threshold << ui16AbsGoRice );
231
0
      while( symbol >= ( 1 << length ) )
232
0
      {
233
0
        symbol -= ( 1 << ( length++ ) );
234
0
      }
235
0
      iRate += ( threshold + length + 1 - ui16AbsGoRice + length ) << SCALE_BITS;
236
0
    }
237
238
0
    iRate += fracBitsGt1.intBits[1];
239
0
    iRate += fracBitsPar.intBits[( uiAbsLevel - 2 ) & 1];
240
0
    iRate += fracBitsGt2.intBits[1];
241
0
  }
242
0
  else if( uiAbsLevel == 1 )
243
0
  {
244
0
    iRate += fracBitsGt1.intBits[0];
245
0
  }
246
0
  else if( uiAbsLevel == 2 )
247
0
  {
248
0
    iRate += fracBitsGt1.intBits[1];
249
0
    iRate += fracBitsPar.intBits[0];
250
0
    iRate += fracBitsGt2.intBits[0];
251
0
  }
252
0
  else if( uiAbsLevel == 3 )
253
0
  {
254
0
    iRate += fracBitsGt1.intBits[1];
255
0
    iRate += fracBitsPar.intBits[1];
256
0
    iRate += fracBitsGt2.intBits[0];
257
0
  }
258
0
  else
259
0
  {
260
0
    iRate = 0;
261
0
  }
262
0
  return  iRate;
263
0
}
264
265
inline double QuantRDOQ::xGetRateSigCoeffGroup( const BinFracBits& fracBitsSigCG, unsigned uiSignificanceCoeffGroup ) const
266
1.45M
{
267
1.45M
  return xGetICost( fracBitsSigCG.intBits[uiSignificanceCoeffGroup] );
268
1.45M
}
269
270
/** Calculates the cost of signaling the last significant coefficient in the block
271
 * \param uiPosX X coordinate of the last significant coefficient
272
 * \param uiPosY Y coordinate of the last significant coefficient
273
 * \param component colour component ID
274
 * \returns cost of last significant coefficient
275
 */
276
/*
277
 * \param uiWidth width of the transform unit (TU)
278
*/
279
inline double QuantRDOQ::xGetRateLast( const int* lastBitsX, const int* lastBitsY, unsigned PosX, unsigned PosY ) const
280
0
{
281
0
  uint32_t    CtxX  = g_uiGroupIdx[PosX];
282
0
  uint32_t    CtxY  = g_uiGroupIdx[PosY];
283
0
  double  Cost  = lastBitsX[ CtxX ] + lastBitsY[ CtxY ];
284
0
  if( CtxX > 3 )
285
0
  {
286
0
    Cost += xGetIEPRate() * ((CtxX-2)>>1);
287
0
  }
288
0
  if( CtxY > 3 )
289
0
  {
290
0
    Cost += xGetIEPRate() * ((CtxY-2)>>1);
291
0
  }
292
0
  return xGetICost( Cost );
293
0
}
294
295
296
inline double QuantRDOQ::xGetRateSigCoef( const BinFracBits& fracBitsSig, unsigned uiSignificance ) const
297
10.3M
{
298
10.3M
  return xGetICost( fracBitsSig.intBits[uiSignificance] );
299
10.3M
}
300
301
/** Get the cost for a specific rate
302
 * \param dRate rate of a bit
303
 * \returns cost at the specific rate
304
 */
305
inline double QuantRDOQ::xGetICost        ( double                          dRate         ) const
306
14.4M
{
307
14.4M
  return m_dLambda * dRate;
308
14.4M
}
309
310
/** Get the cost of an equal probable bit
311
 * \returns cost of equal probable bit
312
 */
313
inline double QuantRDOQ::xGetIEPRate() const
314
0
{
315
0
  return 32768;
316
0
}
317
318
319
double QuantRDOQ::xGetErrScaleCoeff(const bool needsSqrt2, SizeType width, SizeType height, int qp, const int maxLog2TrDynamicRange, const int channelBitDepth, bool bTransformSkip=false)
320
102k
{
321
102k
  const int iTransformShift = bTransformSkip ? 0 : getTransformShift(channelBitDepth, Size(width, height), maxLog2TrDynamicRange);
322
102k
  double    dErrScale = (double)(1 << SCALE_BITS);                                // Compensate for scaling of bitcount in Lagrange cost function
323
102k
  double    dTransShift = (double)iTransformShift + (needsSqrt2 ? -0.5 : 0.0);
324
102k
  dErrScale = dErrScale * pow(2.0, (-2.0*dTransShift));                     // Compensate for scaling through forward transform
325
102k
  const int  QStep = g_quantScales[needsSqrt2 ? 1 : 0][qp];
326
102k
  double    finalErrScale = dErrScale / QStep / QStep / (1 << (DISTORTION_PRECISION_ADJUSTMENT(channelBitDepth) << 1));
327
102k
  return    finalErrScale;
328
102k
}
329
330
331
332
/** set error scale coefficients
333
 * \param list                   list ID
334
 * \param size
335
 * \param qp                     quantization parameter
336
 * \param maxLog2TrDynamicRange
337
 * \param bitDepths              reference to bit depth array for all channels
338
 */
339
void QuantRDOQ::xSetErrScaleCoeff( uint32_t list, uint32_t sizeX, uint32_t sizeY, int qp, const int maxLog2TrDynamicRange[MAX_NUM_CH], const BitDepths &bitDepths )
340
36.6M
{
341
36.6M
  const int width = g_scalingListSizeX[sizeX];
342
36.6M
  const int height = g_scalingListSizeX[sizeY];
343
36.6M
  const ChannelType channelType = ( ( list == 0 ) || ( list == MAX_NUM_COMP ) ) ? CH_L : CH_C;
344
36.6M
  const int channelBitDepth = bitDepths[channelType];
345
36.6M
  const int iTransformShift = getTransformShift( channelBitDepth, Size( g_scalingListSizeX[sizeX], g_scalingListSizeX[sizeY] ), maxLog2TrDynamicRange[channelType] );  // Represents scaling through forward transform
346
347
36.6M
  double dErrScale = (double)( 1 << SCALE_BITS );                                // Compensate for scaling of bitcount in Lagrange cost function
348
349
36.6M
  const bool needsSqrt2 = ((Log2(width*height)) & 1) == 1;
350
36.6M
  double dTransShift = (double)iTransformShift + ( needsSqrt2 ? -0.5 : 0.0 );
351
36.6M
  dErrScale = dErrScale*pow( 2.0, ( -2.0*dTransShift ) );                     // Compensate for scaling through forward transform
352
353
36.6M
  if( getScalingListEnabled() )
354
0
  {
355
0
    uint32_t i, uiMaxNumCoeff = width * height;
356
357
0
    int*  piQuantcoeff = getQuantCoeff( list, qp, sizeX, sizeY );
358
0
    double* pdErrScale = xGetErrScaleCoeffSL( list, sizeX, sizeY, qp );
359
360
0
    for( i = 0; i < uiMaxNumCoeff; i++ )
361
0
    {
362
0
      pdErrScale[i] = dErrScale / piQuantcoeff[i] / piQuantcoeff[i] / (1 << (DISTORTION_PRECISION_ADJUSTMENT( bitDepths[channelType] ) << 1));
363
0
    }
364
0
  }
365
366
36.6M
  int QStep = g_quantScales[needsSqrt2][qp];
367
368
36.6M
  xGetErrScaleCoeffNoScalingList(list, sizeX, sizeY, qp) =
369
36.6M
    dErrScale / QStep / QStep / (1 << (DISTORTION_PRECISION_ADJUSTMENT(bitDepths[channelType]) << 1));
370
36.6M
}
371
372
/** set flat matrix value to quantized coefficient
373
 */
374
void QuantRDOQ::setFlatScalingList(const int maxLog2TrDynamicRange[MAX_NUM_CH], const BitDepths &bitDepths)
375
20.7k
{
376
20.7k
  Quant::setFlatScalingList( maxLog2TrDynamicRange, bitDepths );
377
378
20.7k
  const int minimumQp = 0;
379
20.7k
  const int maximumQp = SCALING_LIST_REM_NUM;
380
381
166k
  for(uint32_t sizeX = 0; sizeX < SCALING_LIST_SIZE_NUM; sizeX++)
382
145k
  {
383
1.16M
    for(uint32_t sizeY = 0; sizeY < SCALING_LIST_SIZE_NUM; sizeY++)
384
1.01M
    {
385
7.12M
      for(uint32_t list = 0; list < SCALING_LIST_NUM; list++)
386
6.10M
      {
387
42.7M
        for(int qp = minimumQp; qp < maximumQp; qp++)
388
36.6M
        {
389
36.6M
          xSetErrScaleCoeff( list, sizeX, sizeY, qp, maxLog2TrDynamicRange, bitDepths );
390
36.6M
        }
391
6.10M
      }
392
1.01M
    }
393
145k
  }
394
20.7k
}
395
396
/** initialization process of scaling list array
397
 */
398
void QuantRDOQ::xInitScalingList( const QuantRDOQ* other )
399
20.7k
{
400
20.7k
  m_isErrScaleListOwner = other == nullptr;
401
402
20.7k
  bool useScalingLists = getScalingListEnabled();
403
404
166k
  for(uint32_t sizeIdX = 0; sizeIdX < SCALING_LIST_SIZE_NUM; sizeIdX++)
405
145k
  {
406
1.16M
    for(uint32_t sizeIdY = 0; sizeIdY < SCALING_LIST_SIZE_NUM; sizeIdY++)
407
1.01M
    {
408
7.12M
      for(uint32_t qp = 0; qp < SCALING_LIST_REM_NUM; qp++)
409
6.10M
      {
410
42.7M
        for(uint32_t listId = 0; listId < SCALING_LIST_NUM; listId++)
411
36.6M
        {
412
36.6M
          if( m_isErrScaleListOwner )
413
36.6M
          {
414
36.6M
            m_errScale[sizeIdX][sizeIdY][listId][qp] = useScalingLists ? new double[g_scalingListSizeX[sizeIdX] * g_scalingListSizeX[sizeIdY]] : nullptr;
415
36.6M
          }
416
0
          else
417
0
          {
418
0
            m_errScale[sizeIdX][sizeIdY][listId][qp] = other->m_errScale[sizeIdX][sizeIdY][listId][qp];
419
0
          }
420
36.6M
        } // listID loop
421
6.10M
      }
422
1.01M
    }
423
145k
  }
424
20.7k
}
425
426
/** destroy quantization matrix array
427
 */
428
void QuantRDOQ::xDestroyScalingList()
429
20.7k
{
430
20.7k
  if( !m_isErrScaleListOwner ) return;
431
432
166k
  for(uint32_t sizeIdX = 0; sizeIdX < SCALING_LIST_SIZE_NUM; sizeIdX++)
433
145k
  {
434
1.16M
    for(uint32_t sizeIdY = 0; sizeIdY < SCALING_LIST_SIZE_NUM; sizeIdY++)
435
1.01M
    {
436
7.12M
      for(uint32_t listId = 0; listId < SCALING_LIST_NUM; listId++)
437
6.10M
      {
438
42.7M
        for(uint32_t qp = 0; qp < SCALING_LIST_REM_NUM; qp++)
439
36.6M
        {
440
36.6M
          if(m_errScale[sizeIdX][sizeIdY][listId][qp])
441
0
          {
442
0
            delete [] m_errScale[sizeIdX][sizeIdY][listId][qp];
443
0
          }
444
36.6M
        }
445
6.10M
      }
446
1.01M
    }
447
145k
  }
448
//   Quant::destroyScalingList();
449
20.7k
}
450
451
452
void QuantRDOQ::quant(TransformUnit& tu, const ComponentID compID, const CCoeffBuf& pSrc, TCoeff &uiAbsSum, const QpParam& cQP, const Ctx& ctx)
453
108k
{
454
108k
  const CompArea& rect       = tu.blocks[compID];
455
108k
  const uint32_t uiWidth     = rect.width;
456
108k
  const uint32_t uiHeight    = rect.height;
457
458
108k
  const CCoeffBuf&  piCoef   = pSrc;
459
108k
        CoeffSigBuf piQCoef  = tu.getCoeffs(compID);
460
461
108k
  const bool useTransformSkip      = tu.mtsIdx[compID]==MTS_SKIP;
462
463
108k
  bool useRDOQ = useTransformSkip ? m_useRDOQTS : m_RDOQ > 0;
464
465
108k
  if( !tu.cu->ispMode || !isLuma(compID) )
466
108k
  {
467
108k
    useRDOQ &= uiWidth > 2;
468
108k
    useRDOQ &= uiHeight > 2;
469
108k
  }
470
471
108k
  if( useRDOQ )
472
102k
  {
473
102k
    if (!tu.cs->picture->useSelectiveRdoq || xNeedRDOQ(tu, compID, piCoef, cQP))
474
102k
    {
475
102k
      if( useTransformSkip )
476
102k
      {
477
102k
        if(tu.cu->bdpcmM[toChannelType(compID)])
478
99.0k
        {
479
99.0k
          forwardRDPCM( tu, compID, pSrc, uiAbsSum, cQP, ctx );
480
99.0k
        }
481
3.06k
        else
482
3.06k
        {
483
3.06k
          rateDistOptQuantTS( tu, compID, pSrc, uiAbsSum, cQP, ctx );
484
3.06k
        }
485
102k
      }
486
0
      else
487
0
      {
488
0
        xRateDistOptQuant( tu, compID, pSrc, uiAbsSum, cQP, ctx );
489
0
      }
490
102k
    }
491
0
    else
492
0
    {
493
0
      piQCoef.fill(0);
494
0
      uiAbsSum = 0;
495
0
      tu.lastPos[compID] = -1;
496
0
    }
497
102k
  }
498
6.05k
  else
499
6.05k
  {
500
6.05k
    Quant::quant( tu, compID, pSrc, uiAbsSum, cQP, ctx );
501
6.05k
  }
502
108k
}
503
504
505
506
void QuantRDOQ::xRateDistOptQuant(TransformUnit& tu, const ComponentID compID, const CCoeffBuf& pSrc, TCoeff &uiAbsSum, const QpParam& cQP, const Ctx &ctx)
507
0
{
508
0
  const FracBitsAccess& fracBits = ctx.getFracBitsAcess();
509
510
0
  const SPS &sps            = *tu.cs->sps;
511
0
  const CompArea& rect      = tu.blocks[compID];
512
0
  const uint32_t uiWidth    = rect.width;
513
0
  const uint32_t uiHeight   = rect.height;
514
0
  const ChannelType chType  = toChannelType(compID);
515
0
  const int channelBitDepth = sps.bitDepths[ chType ];
516
517
0
  const int  maxLog2TrDynamicRange = sps.getMaxLog2TrDynamicRange();
518
519
0
  const bool useIntraSubPartitions = tu.cu->ispMode && isLuma(compID);
520
  /* for 422 chroma blocks, the effective scaling applied during transformation is not a power of 2, hence it cannot be
521
  * implemented as a bit-shift (the quantised result will be sqrt(2) * larger than required). Alternatively, adjust the
522
  * uiLog2TrSize applied in iTransformShift, such that the result is 1/sqrt(2) the required result (i.e. smaller)
523
  * Then a QP+3 (sqrt(2)) or QP-3 (1/sqrt(2)) method could be used to get the required result
524
  */
525
526
  // Represents scaling through forward transform
527
0
  const int iTransformShift = getTransformShift(channelBitDepth, rect.size(), maxLog2TrDynamicRange);
528
529
0
  double     d64BlockUncodedCost               = 0;
530
0
  const uint32_t uiLog2BlockWidth                  = Log2(uiWidth);
531
0
  const uint32_t uiLog2BlockHeight                 = Log2(uiHeight);
532
0
  const uint32_t uiMaxNumCoeff                     = rect.area();
533
534
0
  CHECK(compID >= MAX_NUM_TBLOCKS, "Invalid component ID");
535
536
0
  int scalingListType = getScalingListType(tu.cu->predMode, compID);
537
538
0
  CHECK(scalingListType >= SCALING_LIST_NUM, "Invalid scaling list");
539
540
0
  const TCoeff    *plSrcCoeff = pSrc.buf;
541
0
        TCoeffSig *piDstCoeff = tu.getCoeffs(compID).buf;
542
543
0
  double *pdCostCoeff  = m_pdCostCoeff;
544
0
  double *pdCostSig    = m_pdCostSig;
545
0
  double *pdCostCoeff0 = m_pdCostCoeff0;
546
0
  int    *rateIncUp    = m_rateIncUp;
547
0
  int    *rateIncDown  = m_rateIncDown;
548
0
  int    *sigRateDelta = m_sigRateDelta;
549
0
  TCoeff *deltaU       = m_deltaU;
550
551
0
  memset( piDstCoeff,     0, sizeof( TCoeffSig ) * uiMaxNumCoeff );
552
0
  memset( m_pdCostCoeff,  0, sizeof( double ) *  uiMaxNumCoeff );
553
0
  memset( m_pdCostSig,    0, sizeof( double ) *  uiMaxNumCoeff );
554
0
  memset( m_rateIncUp,    0, sizeof( int    ) *  uiMaxNumCoeff );
555
0
  memset( m_rateIncDown,  0, sizeof( int    ) *  uiMaxNumCoeff );
556
0
  memset( m_sigRateDelta, 0, sizeof( int    ) *  uiMaxNumCoeff );
557
0
  memset( m_deltaU,       0, sizeof( TCoeff ) *  uiMaxNumCoeff );
558
559
560
0
  const bool   needSqrtAdjustment = TU::needsSqrt2Scale( tu, compID );
561
0
  const bool   isTransformSkip    = tu.mtsIdx[compID]==MTS_SKIP;
562
0
  const double *const pdErrScale  = xGetErrScaleCoeffSL(scalingListType, uiLog2BlockWidth, uiLog2BlockHeight, cQP.rem(isTransformSkip));
563
0
  const int    *const piQCoef     = getQuantCoeff(scalingListType, cQP.rem(isTransformSkip), uiLog2BlockWidth, uiLog2BlockHeight);
564
0
  const bool isLfnstApplied       = tu.cu->lfnstIdx > 0 && (CU::isSepTree(*tu.cu) ? true : isLuma(compID));
565
0
  const bool enableScalingLists   = getUseScalingList(uiWidth, uiHeight, isTransformSkip, isLfnstApplied);
566
0
  const int    defaultQuantisationCoefficient = g_quantScales[ needSqrtAdjustment ?1:0][cQP.rem(isTransformSkip)];
567
0
  const double defaultErrorScale              = xGetErrScaleCoeffNoScalingList(scalingListType, uiLog2BlockWidth, uiLog2BlockHeight, cQP.rem(isTransformSkip));
568
0
  const int iQBits = QUANT_SHIFT + cQP.per(isTransformSkip) + iTransformShift + (needSqrtAdjustment?-1:0);                   // Right shift of non-RDOQ quantizer;  level = (coeff*uiQ + offset)>>q_bits
569
570
571
0
  const TCoeff entropyCodingMinimum = -(1 << maxLog2TrDynamicRange);
572
0
  const TCoeff entropyCodingMaximum =  (1 << maxLog2TrDynamicRange) - 1;
573
574
0
  CoeffCodingContext cctx(tu, compID, tu.cs->slice->signDataHidingEnabled);
575
0
  const int    iCGSizeM1      = (1 << cctx.log2CGSize()) - 1;
576
577
0
  int     iCGLastScanPos      = -1;
578
0
  double  d64BaseCost         = 0;
579
0
  int     iLastScanPos        = -1;
580
581
0
  int ctxBinSampleRatio   = MAX_TU_LEVEL_CTX_CODED_BIN_CONSTRAINT;
582
0
  int remRegBins          = (tu.getTbAreaAfterCoefZeroOut( compID ) * ctxBinSampleRatio) >> 4;
583
0
  uint32_t  goRiceParam   = 0;
584
585
0
  double *pdCostCoeffGroupSig = m_pdCostCoeffGroupSig;
586
0
  memset( pdCostCoeffGroupSig, 0, ( uiMaxNumCoeff >> cctx.log2CGSize() ) * sizeof( double ) );
587
0
  int iScanPos;
588
0
  coeffGroupRDStats rdStats;
589
590
#if ENABLE_TRACING
591
  DTRACE( g_trace_ctx, D_RDOQ, "%d: %3d, %3d, %dx%d, comp=%d\n", DTRACE_GET_COUNTER( g_trace_ctx, D_RDOQ ), rect.x, rect.y, rect.width, rect.height, compID );
592
#endif
593
594
0
  const uint32_t lfnstIdx = tu.cu->lfnstIdx;
595
596
0
  const int iCGNum = lfnstIdx > 0 ? 1 : std::min<int>(JVET_C0024_ZERO_OUT_TH, uiWidth) * std::min<int>(JVET_C0024_ZERO_OUT_TH, uiHeight) >> cctx.log2CGSize();
597
598
0
  for (int subSetId = iCGNum - 1; subSetId >= 0; subSetId--)
599
0
  {
600
0
    cctx.initSubblock( subSetId );
601
602
0
    int remRegBinsStartCG = remRegBins;
603
604
0
    uint32_t maxNonZeroPosInCG = iCGSizeM1;
605
0
    if( lfnstIdx > 0 && ( ( uiWidth == 4 && uiHeight == 4 ) || ( uiWidth == 8 && uiHeight == 8 && cctx.cgPosX() == 0 && cctx.cgPosY() == 0 ) ) )
606
0
    {
607
0
      maxNonZeroPosInCG = 7;
608
0
    }
609
610
0
    memset( &rdStats, 0, sizeof (coeffGroupRDStats));
611
612
0
    for( int iScanPosinCG = iCGSizeM1; iScanPosinCG > maxNonZeroPosInCG; iScanPosinCG-- )
613
0
    {
614
0
      iScanPos = cctx.minSubPos() + iScanPosinCG;
615
0
      uint32_t    blkPos = cctx.blockPos( iScanPos );
616
0
      piDstCoeff[ blkPos ] = 0;
617
0
    }
618
0
    for( int iScanPosinCG = maxNonZeroPosInCG; iScanPosinCG >= 0; iScanPosinCG-- )
619
0
    {
620
0
      iScanPos = cctx.minSubPos() + iScanPosinCG;
621
      //===== quantization =====
622
0
      uint32_t    uiBlkPos          = cctx.blockPos(iScanPos);
623
624
      // set coeff
625
0
      const int    quantisationCoefficient = (enableScalingLists) ? piQCoef   [uiBlkPos]               : defaultQuantisationCoefficient;
626
0
      const double errorScale              = (enableScalingLists) ? pdErrScale[uiBlkPos]               : defaultErrorScale;
627
0
      const int64_t  tmpLevel                = int64_t(abs(plSrcCoeff[ uiBlkPos ])) * quantisationCoefficient;
628
629
0
      const Intermediate_Int lLevelDouble  = (Intermediate_Int)std::min<int64_t>(tmpLevel, std::numeric_limits<Intermediate_Int>::max() - (Intermediate_Int(1) << (iQBits - 1)));
630
631
0
      uint32_t uiMaxAbsLevel        = std::min<uint32_t>(uint32_t(entropyCodingMaximum), uint32_t((lLevelDouble + (Intermediate_Int(1) << (iQBits - 1))) >> iQBits));
632
633
0
      const double dErr         = double( lLevelDouble );
634
0
      pdCostCoeff0[ iScanPos ]  = dErr * dErr * errorScale;
635
0
      d64BlockUncodedCost      += pdCostCoeff0[ iScanPos ];
636
0
      piDstCoeff[ uiBlkPos ]    = uiMaxAbsLevel;
637
638
0
      if ( uiMaxAbsLevel > 0 && iLastScanPos < 0 )
639
0
      {
640
0
        iLastScanPos            = iScanPos;
641
0
        iCGLastScanPos          = cctx.subSetId();
642
0
      }
643
644
0
      if ( iLastScanPos >= 0 )
645
0
      {
646
647
#if ENABLE_TRACING
648
        uint32_t uiCGPosY = cctx.cgPosY();
649
        uint32_t uiCGPosX = cctx.cgPosX();
650
        uint32_t uiPosY = cctx.posY( iScanPos );
651
        uint32_t uiPosX = cctx.posX( iScanPos );
652
        DTRACE( g_trace_ctx, D_RDOQ, "%d [%d][%d][%2d:%2d][%2d:%2d]", DTRACE_GET_COUNTER( g_trace_ctx, D_RDOQ ), iScanPos, uiBlkPos, uiCGPosX, uiCGPosY, uiPosX, uiPosY );
653
#endif
654
        //===== coefficient level estimation =====
655
0
        unsigned ctxIdSig = 0;
656
0
        if( iScanPos != iLastScanPos )
657
0
        {
658
0
          ctxIdSig = cctx.sigCtxIdAbs( iScanPos, piDstCoeff, 0 );
659
0
        }
660
0
        uint32_t    uiLevel;
661
0
        uint8_t ctxOffset     = cctx.ctxOffsetAbs     ();
662
0
        uint32_t    uiParCtx      = cctx.parityCtxIdAbs   ( ctxOffset );
663
0
        uint32_t    uiGt1Ctx      = cctx.greater1CtxIdAbs ( ctxOffset );
664
0
        uint32_t    uiGt2Ctx      = cctx.greater2CtxIdAbs ( ctxOffset );
665
0
        uint32_t    goRiceZero    = 0;
666
0
        if( remRegBins < 4 )
667
0
        {
668
0
          unsigned  sumAbs = cctx.templateAbsSum( iScanPos, piDstCoeff, 0 );
669
0
          goRiceParam             = g_auiGoRiceParsCoeff   [ sumAbs ];
670
0
          goRiceZero              = g_auiGoRicePosCoeff0(0, goRiceParam);
671
0
        }
672
673
0
        const BinFracBits fracBitsPar = fracBits.getFracBitsArray( uiParCtx );
674
0
        const BinFracBits fracBitsGt1 = fracBits.getFracBitsArray( uiGt1Ctx );
675
0
        const BinFracBits fracBitsGt2 = fracBits.getFracBitsArray( uiGt2Ctx );
676
677
0
        if( iScanPos == iLastScanPos )
678
0
        {
679
0
          uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ],
680
0
                                    lLevelDouble, uiMaxAbsLevel, nullptr, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, iQBits, errorScale, 1, maxLog2TrDynamicRange );
681
0
        }
682
0
        else
683
0
        {
684
0
          DTRACE_COND( ( uiMaxAbsLevel != 0 ), g_trace_ctx, D_RDOQ_MORE, " uiCtxSig=%d", ctxIdSig );
685
686
0
          const BinFracBits fracBitsSig = fracBits.getFracBitsArray( ctxIdSig );
687
0
          uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ],
688
0
                                    lLevelDouble, uiMaxAbsLevel, &fracBitsSig, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, iQBits, errorScale, 0, maxLog2TrDynamicRange );
689
0
          sigRateDelta[ uiBlkPos ] = ( remRegBins < 4 ? 0 : fracBitsSig.intBits[1] - fracBitsSig.intBits[0] );
690
0
        }
691
692
0
        DTRACE( g_trace_ctx, D_RDOQ, " Lev=%d \n", uiLevel );
693
0
        DTRACE_COND( ( uiMaxAbsLevel != 0 ), g_trace_ctx, D_RDOQ, " CostC0=%d\n", (int64_t)( pdCostCoeff0[iScanPos] ) );
694
0
        DTRACE_COND( ( uiMaxAbsLevel != 0 ), g_trace_ctx, D_RDOQ, " CostC =%d\n", (int64_t)( pdCostCoeff[iScanPos] ) );
695
696
0
        deltaU[ uiBlkPos ]        = TCoeff((lLevelDouble - (Intermediate_Int(uiLevel) << iQBits)) >> (iQBits-8));
697
698
0
        if( uiLevel > 0 )
699
0
        {
700
0
          int rateNow              = xGetICRate( uiLevel,   fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, maxLog2TrDynamicRange );
701
0
          rateIncUp   [ uiBlkPos ] = xGetICRate( uiLevel+1, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, maxLog2TrDynamicRange ) - rateNow;
702
0
          rateIncDown [ uiBlkPos ] = xGetICRate( uiLevel-1, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, maxLog2TrDynamicRange ) - rateNow;
703
0
        }
704
0
        else // uiLevel == 0
705
0
        {
706
0
          if( remRegBins < 4 )
707
0
          {
708
0
            int rateNow            = xGetICRate( uiLevel,   fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, maxLog2TrDynamicRange );
709
0
            rateIncUp [ uiBlkPos ] = xGetICRate( uiLevel+1, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, maxLog2TrDynamicRange ) - rateNow;
710
0
          }
711
0
          else
712
0
          {
713
0
            rateIncUp [ uiBlkPos ] = fracBitsGt1.intBits[ 0 ];
714
0
          }
715
0
        }
716
0
        piDstCoeff[ uiBlkPos ] = uiLevel;
717
0
        d64BaseCost           += pdCostCoeff [ iScanPos ];
718
719
0
        if( ( (iScanPos & iCGSizeM1) == 0 ) && ( iScanPos > 0 ) )
720
0
        {
721
0
          goRiceParam   = 0;
722
0
        }
723
0
        else if( remRegBins >= 4 )
724
0
        {
725
0
          int  sumAll = cctx.templateAbsSum(iScanPos, piDstCoeff, 4);
726
0
          goRiceParam = g_auiGoRiceParsCoeff[sumAll];
727
0
          remRegBins -= (uiLevel < 2 ? uiLevel : 3) + (iScanPos != iLastScanPos);
728
0
        }
729
0
      }
730
0
      else
731
0
      {
732
0
        d64BaseCost    += pdCostCoeff0[ iScanPos ];
733
0
      }
734
0
      rdStats.d64SigCost += pdCostSig[ iScanPos ];
735
0
      if (iScanPosinCG == 0 )
736
0
      {
737
0
        rdStats.d64SigCost_0 = pdCostSig[ iScanPos ];
738
0
      }
739
0
      if (piDstCoeff[ uiBlkPos ] )
740
0
      {
741
0
        cctx.setSigGroup();
742
0
        rdStats.d64CodedLevelandDist += pdCostCoeff[ iScanPos ] - pdCostSig[ iScanPos ];
743
0
        rdStats.d64UncodedDist += pdCostCoeff0[ iScanPos ];
744
0
        if ( iScanPosinCG != 0 )
745
0
        {
746
0
          rdStats.iNNZbeforePos0++;
747
0
        }
748
0
      }
749
0
    } //end for (iScanPosinCG)
750
751
0
    if (iCGLastScanPos >= 0)
752
0
    {
753
0
      if( cctx.subSetId() )
754
0
      {
755
0
        if( !cctx.isSigGroup() )
756
0
        {
757
0
          const BinFracBits fracBitsSigGroup = fracBits.getFracBitsArray( cctx.sigGroupCtxId() );
758
0
          d64BaseCost += xGetRateSigCoeffGroup(fracBitsSigGroup, 0) - rdStats.d64SigCost;
759
0
          pdCostCoeffGroupSig[ cctx.subSetId() ] = xGetRateSigCoeffGroup(fracBitsSigGroup, 0);
760
0
        }
761
0
        else
762
0
        {
763
0
          if (cctx.subSetId() < iCGLastScanPos) //skip the last coefficient group, which will be handled together with last position below.
764
0
          {
765
0
            if ( rdStats.iNNZbeforePos0 == 0 )
766
0
            {
767
0
              d64BaseCost -= rdStats.d64SigCost_0;
768
0
              rdStats.d64SigCost -= rdStats.d64SigCost_0;
769
0
            }
770
            // rd-cost if SigCoeffGroupFlag = 0, initialization
771
0
            double d64CostZeroCG = d64BaseCost;
772
773
0
            const BinFracBits fracBitsSigGroup = fracBits.getFracBitsArray( cctx.sigGroupCtxId() );
774
775
0
            if (cctx.subSetId() < iCGLastScanPos)
776
0
            {
777
0
              d64BaseCost  += xGetRateSigCoeffGroup(fracBitsSigGroup,1);
778
0
              d64CostZeroCG += xGetRateSigCoeffGroup(fracBitsSigGroup,0);
779
0
              pdCostCoeffGroupSig[ cctx.subSetId() ] = xGetRateSigCoeffGroup(fracBitsSigGroup,1);
780
0
            }
781
782
            // try to convert the current coeff group from non-zero to all-zero
783
0
            d64CostZeroCG += rdStats.d64UncodedDist;  // distortion for resetting non-zero levels to zero levels
784
0
            d64CostZeroCG -= rdStats.d64CodedLevelandDist;   // distortion and level cost for keeping all non-zero levels
785
0
            d64CostZeroCG -= rdStats.d64SigCost;     // sig cost for all coeffs, including zero levels and non-zerl levels
786
787
                                                     // if we can save cost, change this block to all-zero block
788
0
            if ( d64CostZeroCG < d64BaseCost )
789
0
            {
790
0
              cctx.resetSigGroup();
791
0
              d64BaseCost = d64CostZeroCG;
792
0
              remRegBins = remRegBinsStartCG;
793
0
              if (cctx.subSetId() < iCGLastScanPos)
794
0
              {
795
0
                pdCostCoeffGroupSig[ cctx.subSetId() ] = xGetRateSigCoeffGroup(fracBitsSigGroup,0);
796
0
              }
797
              // reset coeffs to 0 in this block
798
0
              for( int iScanPosinCG = maxNonZeroPosInCG; iScanPosinCG >= 0; iScanPosinCG-- )
799
0
              {
800
0
                iScanPos      = cctx.minSubPos() + iScanPosinCG;
801
0
                uint32_t uiBlkPos = cctx.blockPos( iScanPos );
802
803
0
                if (piDstCoeff[ uiBlkPos ])
804
0
                {
805
0
                  piDstCoeff [ uiBlkPos ] = 0;
806
0
                  pdCostCoeff[ iScanPos ] = pdCostCoeff0[ iScanPos ];
807
0
                  pdCostSig  [ iScanPos ] = 0;
808
0
                }
809
0
              }
810
0
            } // end if ( d64CostAllZeros < d64BaseCost )
811
0
          }
812
0
        } // end if if (uiSigCoeffGroupFlag[ uiCGBlkPos ] == 0)
813
0
      }
814
0
      else
815
0
      {
816
0
        cctx.setSigGroup();
817
0
      }
818
0
    }
819
0
  } //end for (cctx.subSetId)
820
821
822
  //===== estimate last position =====
823
0
  if ( iLastScanPos < 0 )
824
0
  {
825
0
    return;
826
0
  }
827
828
0
  double  d64BestCost         = 0;
829
0
  int     iBestLastIdxP1      = 0;
830
831
832
0
  if( !CU::isIntra( *tu.cu ) && isLuma( compID ) && tu.depth == 0 )
833
0
  {
834
0
    const BinFracBits fracBitsQtRootCbf = fracBits.getFracBitsArray( Ctx::QtRootCbf() );
835
0
    d64BestCost  = d64BlockUncodedCost + xGetICost( fracBitsQtRootCbf.intBits[ 0 ] );
836
0
    d64BaseCost += xGetICost( fracBitsQtRootCbf.intBits[ 1 ] );
837
0
  }
838
0
  else
839
0
  {
840
0
    bool previousCbf       = tu.cbf[COMP_Cb];
841
0
    bool lastCbfIsInferred = false;
842
0
    if( useIntraSubPartitions )
843
0
    {
844
0
      bool rootCbfSoFar       = false;
845
0
      bool isLastSubPartition = CU::isISPLast(*tu.cu, tu.Y(), compID);
846
0
      uint32_t nTus = tu.cu->ispMode == HOR_INTRA_SUBPARTITIONS ? tu.cu->lheight() >> Log2(tu.lheight()) : tu.cu->lwidth() >> Log2(tu.lwidth());
847
0
      if( isLastSubPartition )
848
0
      {
849
0
        TransformUnit* tuPointer = tu.cu->firstTU;
850
0
        for( int tuIdx = 0; tuIdx < nTus - 1; tuIdx++ )
851
0
        {
852
0
          rootCbfSoFar |= TU::getCbfAtDepth(*tuPointer, COMP_Y, tu.depth);
853
0
          tuPointer     = tuPointer->next;
854
0
        }
855
0
        if( !rootCbfSoFar )
856
0
        {
857
0
          lastCbfIsInferred = true;
858
0
        }
859
0
      }
860
0
      if( !lastCbfIsInferred )
861
0
      {
862
0
        previousCbf = TU::getPrevTuCbfAtDepth(tu, compID, tu.depth);
863
0
      }
864
0
    }
865
0
    BinFracBits fracBitsQtCbf = fracBits.getFracBitsArray( Ctx::QtCbf[compID]( DeriveCtx::CtxQtCbf( rect.compID, previousCbf, useIntraSubPartitions ) ) );
866
867
0
    if( !lastCbfIsInferred )
868
0
    {
869
0
      d64BestCost  = d64BlockUncodedCost + xGetICost(fracBitsQtCbf.intBits[0]);
870
0
      d64BaseCost += xGetICost(fracBitsQtCbf.intBits[1]);
871
0
    }
872
0
    else
873
0
    {
874
0
      d64BestCost  = d64BlockUncodedCost;
875
0
    }
876
0
  }
877
878
0
  int lastBitsX[LAST_SIGNIFICANT_GROUPS] = { 0 };
879
0
  int lastBitsY[LAST_SIGNIFICANT_GROUPS] = { 0 };
880
0
  {
881
0
    int dim1 = std::min<int>(JVET_C0024_ZERO_OUT_TH, uiWidth);
882
0
    int dim2 = std::min<int>(JVET_C0024_ZERO_OUT_TH, uiHeight);
883
0
    int bitsX = 0;
884
0
    int bitsY = 0;
885
0
    int ctxId;
886
    //X-coordinate
887
0
    for ( ctxId = 0; ctxId < g_uiGroupIdx[dim1-1]; ctxId++)
888
0
    {
889
0
      const BinFracBits fB = fracBits.getFracBitsArray( cctx.lastXCtxId(ctxId) );
890
0
      lastBitsX[ ctxId ]   = bitsX + fB.intBits[ 0 ];
891
0
      bitsX               +=         fB.intBits[ 1 ];
892
0
    }
893
0
    lastBitsX[ctxId] = bitsX;
894
    //Y-coordinate
895
0
    for ( ctxId = 0; ctxId < g_uiGroupIdx[dim2-1]; ctxId++)
896
0
    {
897
0
      const BinFracBits fB = fracBits.getFracBitsArray( cctx.lastYCtxId(ctxId) );
898
0
      lastBitsY[ ctxId ]   = bitsY + fB.intBits[ 0 ];
899
0
      bitsY               +=         fB.intBits[ 1 ];
900
0
    }
901
0
    lastBitsY[ctxId] = bitsY;
902
0
  }
903
904
905
0
  bool bFoundLast = false;
906
0
  for (int iCGScanPos = iCGLastScanPos; iCGScanPos >= 0; iCGScanPos--)
907
0
  {
908
0
    d64BaseCost -= pdCostCoeffGroupSig [ iCGScanPos ];
909
0
    if (cctx.isSigGroup( iCGScanPos ) )
910
0
    {
911
0
      uint32_t maxNonZeroPosInCG = iCGSizeM1;
912
0
      if( lfnstIdx > 0 && ( ( uiWidth == 4 && uiHeight == 4 ) || ( uiWidth == 8 && uiHeight == 8 && cctx.cgPosX() == 0 && cctx.cgPosY() == 0 ) ) )
913
0
      {
914
0
        maxNonZeroPosInCG = 7;
915
0
      }
916
0
      for( int iScanPosinCG = maxNonZeroPosInCG; iScanPosinCG >= 0; iScanPosinCG-- )
917
0
      {
918
0
        iScanPos = iCGScanPos * (iCGSizeM1 + 1) + iScanPosinCG;
919
920
0
        if (iScanPos > iLastScanPos)
921
0
        {
922
0
          continue;
923
0
        }
924
0
        uint32_t   uiBlkPos     = cctx.blockPos( iScanPos );
925
926
0
        if( piDstCoeff[ uiBlkPos ] )
927
0
        {
928
0
          uint32_t   uiPosY = uiBlkPos >> uiLog2BlockWidth;
929
0
          uint32_t   uiPosX = uiBlkPos - ( uiPosY << uiLog2BlockWidth );
930
0
          double d64CostLast  = xGetRateLast( lastBitsX, lastBitsY, uiPosX, uiPosY );
931
932
0
          double totalCost = d64BaseCost + d64CostLast - pdCostSig[ iScanPos ];
933
934
0
          if( totalCost < d64BestCost )
935
0
          {
936
0
            iBestLastIdxP1  = iScanPos + 1;
937
0
            d64BestCost     = totalCost;
938
0
          }
939
0
          if( piDstCoeff[ uiBlkPos ] > 1 )
940
0
          {
941
0
            bFoundLast = true;
942
0
            break;
943
0
          }
944
0
          d64BaseCost      -= pdCostCoeff[ iScanPos ];
945
0
          d64BaseCost      += pdCostCoeff0[ iScanPos ];
946
0
        }
947
0
        else
948
0
        {
949
0
          d64BaseCost      -= pdCostSig[ iScanPos ];
950
0
        }
951
0
      } //end for
952
0
      if (bFoundLast)
953
0
      {
954
0
        break;
955
0
      }
956
0
    } // end if (uiSigCoeffGroupFlag[ uiCGBlkPos ])
957
0
    DTRACE( g_trace_ctx, D_RDOQ_COST, "%d: %3d, %3d, %dx%d, comp=%d\n", DTRACE_GET_COUNTER( g_trace_ctx, D_RDOQ_COST ), rect.x, rect.y, rect.width, rect.height, compID );
958
0
    DTRACE( g_trace_ctx, D_RDOQ_COST, "Uncoded=%d\n", (int64_t)( d64BlockUncodedCost ) );
959
0
    DTRACE( g_trace_ctx, D_RDOQ_COST, "Coded  =%d\n", (int64_t)( d64BaseCost ) );
960
961
0
  } // end for
962
963
964
0
  for ( int scanPos = 0; scanPos < iBestLastIdxP1; scanPos++ )
965
0
  {
966
0
    int blkPos = cctx.blockPos( scanPos );
967
0
    TCoeff level = piDstCoeff[ blkPos ];
968
0
    uiAbsSum += level;
969
0
    piDstCoeff[ blkPos ] = ( plSrcCoeff[ blkPos ] < 0 ) ? -level : level;
970
0
  }
971
972
  //===== clean uncoded coefficients =====
973
0
  for ( int scanPos = iBestLastIdxP1; scanPos <= iLastScanPos; scanPos++ )
974
0
  {
975
0
    piDstCoeff[ cctx.blockPos( scanPos ) ] = 0;
976
0
  }
977
0
  iLastScanPos = iBestLastIdxP1 - 1;
978
979
0
  if( cctx.signHiding() && uiAbsSum>=2)
980
0
  {
981
0
    const double inverseQuantScale = double(g_invQuantScales[0][cQP.rem(isTransformSkip)]);
982
0
    int64_t rdFactor = (int64_t)(inverseQuantScale * inverseQuantScale * (1 << (2 * cQP.per(isTransformSkip))) / m_dLambda / 16
983
0
                                  / (1 << (2 * DISTORTION_PRECISION_ADJUSTMENT(channelBitDepth)))
984
0
                             + 0.5);
985
986
0
    int lastCG = -1;
987
0
    int absSum = 0 ;
988
0
    int n ;
989
0
    for (int subSet = iCGNum - 1; subSet >= 0; subSet--)
990
0
    {
991
0
      int  subPos         = subSet << cctx.log2CGSize();
992
0
      int  firstNZPosInCG = iCGSizeM1 + 1, lastNZPosInCG = -1;
993
0
      absSum = 0 ;
994
995
0
      for( n = iCGSizeM1; n >= 0; --n )
996
0
      {
997
0
        if( piDstCoeff[ cctx.blockPos( n + subPos )] )
998
0
        {
999
0
          lastNZPosInCG = n;
1000
0
          break;
1001
0
        }
1002
0
      }
1003
1004
0
      for( n = 0; n <= iCGSizeM1; n++ )
1005
0
      {
1006
0
        if( piDstCoeff[ cctx.blockPos( n + subPos )] )
1007
0
        {
1008
0
          firstNZPosInCG = n;
1009
0
          break;
1010
0
        }
1011
0
      }
1012
1013
0
      for( n = firstNZPosInCG; n <= lastNZPosInCG; n++ )
1014
0
      {
1015
0
        absSum += int(piDstCoeff[ cctx.blockPos( n + subPos )]);
1016
0
      }
1017
1018
0
      if(lastNZPosInCG>=0 && lastCG==-1)
1019
0
      {
1020
0
        lastCG = 1;
1021
0
      }
1022
1023
0
      if( lastNZPosInCG-firstNZPosInCG>=SBH_THRESHOLD )
1024
0
      {
1025
0
        uint32_t signbit = (piDstCoeff[cctx.blockPos(subPos+firstNZPosInCG)]>0?0:1);
1026
0
        if( signbit!=(absSum&0x1) )  // hide but need tune
1027
0
        {
1028
          // calculate the cost
1029
0
          int64_t minCostInc = std::numeric_limits<int64_t>::max(), curCost = std::numeric_limits<int64_t>::max();
1030
0
          int minPos = -1, finalChange = 0, curChange = 0;
1031
1032
0
          for( n = (lastCG == 1 ? lastNZPosInCG : iCGSizeM1); n >= 0; --n )
1033
0
          {
1034
0
            uint32_t uiBlkPos   = cctx.blockPos( n + subPos );
1035
0
            if(piDstCoeff[ uiBlkPos ] != 0 )
1036
0
            {
1037
0
              int64_t costUp   = rdFactor * ( - deltaU[uiBlkPos] ) + rateIncUp[uiBlkPos];
1038
0
              int64_t costDown = rdFactor * (   deltaU[uiBlkPos] ) + rateIncDown[uiBlkPos]
1039
0
                -   ((abs(piDstCoeff[uiBlkPos]) == 1) ? sigRateDelta[uiBlkPos] : 0);
1040
1041
0
              if(lastCG==1 && lastNZPosInCG==n && abs(piDstCoeff[uiBlkPos])==1)
1042
0
              {
1043
0
                costDown -= (4<<SCALE_BITS);
1044
0
              }
1045
1046
0
              if(costUp<costDown)
1047
0
              {
1048
0
                curCost = costUp;
1049
0
                curChange =  1;
1050
0
              }
1051
0
              else
1052
0
              {
1053
0
                curChange = -1;
1054
0
                if(n==firstNZPosInCG && abs(piDstCoeff[uiBlkPos])==1)
1055
0
                {
1056
0
                  curCost = std::numeric_limits<int64_t>::max();
1057
0
                }
1058
0
                else
1059
0
                {
1060
0
                  curCost = costDown;
1061
0
                }
1062
0
              }
1063
0
            }
1064
0
            else
1065
0
            {
1066
0
              curCost = rdFactor * ( - (abs(deltaU[uiBlkPos])) ) + (1<<SCALE_BITS) + rateIncUp[uiBlkPos] + sigRateDelta[uiBlkPos] ;
1067
0
              curChange = 1 ;
1068
1069
0
              if(n<firstNZPosInCG)
1070
0
              {
1071
0
                uint32_t thissignbit = (plSrcCoeff[uiBlkPos]>=0?0:1);
1072
0
                if(thissignbit != signbit )
1073
0
                {
1074
0
                  curCost = std::numeric_limits<int64_t>::max();
1075
0
                }
1076
0
              }
1077
0
            }
1078
1079
0
            if( curCost<minCostInc)
1080
0
            {
1081
0
              minCostInc = curCost;
1082
0
              finalChange = curChange;
1083
0
              minPos = uiBlkPos;
1084
0
            }
1085
0
          }
1086
1087
0
          if(piDstCoeff[minPos] == entropyCodingMaximum || piDstCoeff[minPos] == entropyCodingMinimum)
1088
0
          {
1089
0
            finalChange = -1;
1090
0
          }
1091
1092
0
          if(plSrcCoeff[minPos]>=0)
1093
0
          {
1094
0
            piDstCoeff[minPos] += finalChange ;
1095
0
          }
1096
0
          else
1097
0
          {
1098
0
            piDstCoeff[minPos] -= finalChange ;
1099
0
          }
1100
0
        }
1101
0
      }
1102
1103
0
      if(lastCG==1)
1104
0
      {
1105
0
        lastCG=0 ;
1106
0
      }
1107
0
    }
1108
1109
    // Check due to saving of last pos. Sign data hiding can change the position of last coef.
1110
0
    if( piDstCoeff[cctx.blockPos( iLastScanPos )] == 0 )
1111
0
    {
1112
0
      int scanPos = iLastScanPos - 1;
1113
0
      for( ; scanPos >= 0; scanPos-- )
1114
0
      {
1115
0
        if( piDstCoeff[cctx.blockPos( scanPos )] )
1116
0
          break;
1117
0
      }
1118
0
      iLastScanPos = scanPos;
1119
0
    }
1120
0
  }
1121
0
  tu.lastPos[compID] = iLastScanPos;
1122
0
}
1123
1124
void QuantRDOQ::rateDistOptQuantTS( TransformUnit& tu, const ComponentID compID, const CCoeffBuf& coeffs, TCoeff &absSum, const QpParam& qp, const Ctx &ctx )
1125
3.06k
{
1126
3.06k
  const FracBitsAccess& fracBits = ctx.getFracBitsAcess();
1127
1128
3.06k
  const SPS &sps            = *tu.cs->sps;
1129
3.06k
  const CompArea& rect      = tu.blocks[compID];
1130
3.06k
  const uint32_t width      = rect.width;
1131
3.06k
  const uint32_t height     = rect.height;
1132
3.06k
  const ChannelType chType  = toChannelType(compID);
1133
3.06k
  const int channelBitDepth = sps.bitDepths[ chType ];
1134
1135
3.06k
  const int  maxLog2TrDynamicRange = sps.getMaxLog2TrDynamicRange();
1136
1137
3.06k
  const int transformShift = getTransformShift( channelBitDepth, rect.size(), maxLog2TrDynamicRange );
1138
1139
3.06k
  const uint32_t maxNumCoeff                        = rect.area();
1140
1141
3.06k
  CHECK( compID >= MAX_NUM_TBLOCKS, "Invalid component ID" );
1142
1143
3.06k
  int scalingListType = getScalingListType( tu.cu->predMode, compID );
1144
3.06k
  CHECK( scalingListType >= SCALING_LIST_NUM, "Invalid scaling list" );
1145
1146
3.06k
  const TCoeff    *srcCoeff = coeffs.buf;
1147
3.06k
        TCoeffSig *dstCoeff = tu.getCoeffs( compID ).buf;
1148
1149
3.06k
  double *costCoeff  = m_pdCostCoeff;
1150
3.06k
  double *costSig    = m_pdCostSig;
1151
3.06k
  double *costCoeff0 = m_pdCostCoeff0;
1152
1153
3.06k
  memset( m_pdCostCoeff,  0, sizeof( double ) *  maxNumCoeff );
1154
3.06k
  memset( m_pdCostSig,    0, sizeof( double ) *  maxNumCoeff );
1155
1156
3.06k
  m_bdpcm = 0;
1157
1158
3.06k
  const bool   needsSqrt2Scale = TU::needsSqrt2Scale( tu, compID );  // should always be false - transform-skipped blocks don't require sqrt(2) compensation.
1159
3.06k
  const bool   isTransformSkip = tu.mtsIdx[compID]==MTS_SKIP;
1160
3.06k
  const int    qBits = QUANT_SHIFT + qp.per(isTransformSkip) + (isTransformSkip ? 0 : transformShift) + (needsSqrt2Scale ? -1 : 0);  // Right shift of non-RDOQ quantizer;  level = (coeff*uiQ + offset)>>q_bits
1161
3.06k
  const int    quantisationCoefficient = g_quantScales[needsSqrt2Scale?1:0][qp.rem(isTransformSkip)];
1162
3.06k
  const double errorScale              = xGetErrScaleCoeff( TU::needsSqrt2Scale(tu, compID), width, height, qp.rem(isTransformSkip), maxLog2TrDynamicRange, channelBitDepth, isTransformSkip);
1163
1164
3.06k
  const TCoeff entropyCodingMaximum = ( 1 << maxLog2TrDynamicRange ) - 1;
1165
1166
3.06k
  uint32_t coeffLevels[3];
1167
3.06k
  double   coeffLevelError[4];
1168
1169
3.06k
  CoeffCodingContext cctx( tu, compID, tu.cs->slice->signDataHidingEnabled );
1170
3.06k
  const int sbSizeM1    = ( 1 << cctx.log2CGSize() ) - 1;
1171
3.06k
  double    baseCost    = 0;
1172
3.06k
  uint32_t  goRiceParam = 0;
1173
1174
3.06k
  double *costSigSubBlock = m_pdCostCoeffGroupSig;
1175
3.06k
  memset( costSigSubBlock, 0, ( maxNumCoeff >> cctx.log2CGSize() ) * sizeof( double ) );
1176
1177
3.06k
  const int sbNum = width * height >> cctx.log2CGSize();
1178
3.06k
  int scanPos;
1179
3.06k
  coeffGroupRDStats rdStats;
1180
1181
3.06k
  bool anySigCG = false;
1182
1183
3.06k
  int maxCtxBins = (cctx.maxNumCoeff() * 7) >> 2;
1184
3.06k
  cctx.remRegBins = maxCtxBins;
1185
1186
34.8k
  for( int sbId = 0; sbId < sbNum; sbId++ )
1187
31.8k
  {
1188
31.8k
    cctx.initSubblock( sbId );
1189
1190
31.8k
    int noCoeffCoded = 0;
1191
31.8k
    baseCost = 0.0;
1192
31.8k
    memset( &rdStats, 0, sizeof (coeffGroupRDStats));
1193
1194
31.8k
    rdStats.iNumSbbCtxBins = 0;
1195
1196
540k
    for( int scanPosInSB = 0; scanPosInSB <= sbSizeM1; scanPosInSB++ )
1197
509k
    {
1198
509k
      int lastPosCoded = sbSizeM1;
1199
509k
      scanPos = cctx.minSubPos() + scanPosInSB;
1200
      //===== quantization =====
1201
509k
      uint32_t blkPos = cctx.blockPos( scanPos );
1202
1203
      // set coeff
1204
509k
      const int64_t          tmpLevel    = int64_t( abs( srcCoeff[blkPos] ) ) * quantisationCoefficient;
1205
509k
      const Intermediate_Int levelDouble = (Intermediate_Int)std::min<int64_t>( tmpLevel, std::numeric_limits<Intermediate_Int>::max() - ( Intermediate_Int( 1 ) << ( qBits - 1 ) ) );
1206
1207
509k
      uint32_t roundAbsLevel = std::min<uint32_t>(uint32_t(entropyCodingMaximum), uint32_t((levelDouble + (Intermediate_Int(1) << (qBits - 1))) >> qBits));
1208
509k
      uint32_t minAbsLevel = (roundAbsLevel > 1 ? roundAbsLevel - 1 : 1);
1209
1210
509k
      uint32_t downAbsLevel = std::min<uint32_t>(uint32_t(entropyCodingMaximum), uint32_t(levelDouble >> qBits));
1211
509k
      uint32_t upAbsLevel = std::min<uint32_t>(uint32_t(entropyCodingMaximum), downAbsLevel + 1);
1212
1213
509k
      m_testedLevels = 0;
1214
509k
      coeffLevels[m_testedLevels++] = roundAbsLevel;
1215
1216
509k
      if (minAbsLevel != roundAbsLevel)
1217
509k
        coeffLevels[m_testedLevels++] = minAbsLevel;
1218
1219
509k
      int rightPixel, belowPixel, predPixel;
1220
1221
509k
      cctx.neighTS(rightPixel, belowPixel, scanPos, dstCoeff);
1222
509k
      predPixel = cctx.deriveModCoeff(rightPixel, belowPixel, upAbsLevel, 0);
1223
1224
509k
      if (upAbsLevel != roundAbsLevel && upAbsLevel != minAbsLevel && predPixel == 1)
1225
0
        coeffLevels[m_testedLevels++] = upAbsLevel;
1226
1227
509k
      double dErr = double(levelDouble);
1228
509k
      coeffLevelError[0] = dErr * dErr * errorScale;
1229
1230
509k
      costCoeff0[scanPos] = coeffLevelError[0];
1231
509k
      dstCoeff[blkPos]    = coeffLevels[0];
1232
1233
      //===== coefficient level estimation =====
1234
509k
            unsigned    ctxIdSig = cctx.sigCtxIdAbsTS( scanPos, dstCoeff );
1235
509k
            uint32_t    cLevel;
1236
509k
      const BinFracBits fracBitsPar = fracBits.getFracBitsArray( cctx.parityCtxIdAbsTS() );
1237
1238
      //goRiceParam = cctx.templateAbsSumTS( scanPos, dstCoeff );
1239
509k
      goRiceParam = 1;
1240
509k
      unsigned ctxIdSign = cctx.signCtxIdAbsTS(scanPos, dstCoeff, 0);
1241
509k
      const BinFracBits fracBitsSign = fracBits.getFracBitsArray(ctxIdSign);
1242
509k
      const uint8_t     sign         = srcCoeff[ blkPos ] < 0 ? 1 : 0;
1243
1244
509k
      DTRACE_COND( ( coeffLevels[0] != 0 ), g_trace_ctx, D_RDOQ_MORE, " uiCtxSig=%d", ctxIdSig );
1245
1246
509k
      unsigned gt1CtxId = cctx.lrg1CtxIdAbsTS(scanPos, dstCoeff, 0);
1247
509k
      const BinFracBits fracBitsGr1 = fracBits.getFracBitsArray(gt1CtxId);
1248
1249
509k
      const BinFracBits fracBitsSig = fracBits.getFracBitsArray( ctxIdSig );
1250
509k
      bool lastCoeff = false; //
1251
509k
      if (scanPosInSB == lastPosCoded && noCoeffCoded == 0)
1252
31.8k
      {
1253
31.8k
        lastCoeff = true;
1254
31.8k
      }
1255
509k
      int numUsedCtxBins = 0;
1256
509k
      cLevel = xGetCodedLevelTSPred(costCoeff[scanPos], costCoeff0[scanPos], costSig[scanPos], levelDouble, qBits, errorScale, coeffLevels, coeffLevelError,
1257
509k
                                    &fracBitsSig, fracBitsPar, cctx, fracBits, fracBitsSign, fracBitsGr1, sign, rightPixel, belowPixel, goRiceParam, lastCoeff, maxLog2TrDynamicRange, numUsedCtxBins);
1258
1259
509k
      cctx.remRegBins -= numUsedCtxBins;
1260
509k
      rdStats.iNumSbbCtxBins += numUsedCtxBins;
1261
1262
509k
      if (cLevel > 0)
1263
0
      {
1264
0
        noCoeffCoded++;
1265
0
      }
1266
1267
509k
      TCoeff level = cLevel;
1268
509k
      dstCoeff[blkPos] = (level != 0 && srcCoeff[blkPos] < 0) ? -level : level;
1269
509k
      baseCost           += costCoeff[ scanPos ];
1270
509k
      rdStats.d64SigCost += costSig[ scanPos ];
1271
1272
509k
      if( dstCoeff[ blkPos ] )
1273
0
      {
1274
0
        cctx.setSigGroup();
1275
0
        rdStats.d64CodedLevelandDist += costCoeff [ scanPos ] - costSig[ scanPos ];
1276
0
        rdStats.d64UncodedDist       += costCoeff0[ scanPos ];
1277
0
      }
1278
509k
    } //end for (iScanPosinCG)
1279
1280
31.8k
    if( !cctx.isSigGroup() )
1281
31.8k
    {
1282
31.8k
      const BinFracBits fracBitsSigGroup = fracBits.getFracBitsArray( cctx.sigGroupCtxId( true ) );
1283
31.8k
      baseCost += xGetRateSigCoeffGroup( fracBitsSigGroup, 0 ) - rdStats.d64SigCost;
1284
31.8k
      costSigSubBlock[cctx.subSetId()] = xGetRateSigCoeffGroup( fracBitsSigGroup, 0 );
1285
31.8k
      cctx.remRegBins += rdStats.iNumSbbCtxBins; // skip sub-block
1286
31.8k
    }
1287
0
    else if( sbId != sbNum - 1 || anySigCG )
1288
0
    {
1289
      // rd-cost if SigCoeffGroupFlag = 0, initialization
1290
0
      double costZeroSB = baseCost;
1291
1292
0
      const BinFracBits fracBitsSigGroup = fracBits.getFracBitsArray( cctx.sigGroupCtxId( true ) );
1293
1294
0
      baseCost   += xGetRateSigCoeffGroup( fracBitsSigGroup, 1 );
1295
0
      costZeroSB += xGetRateSigCoeffGroup( fracBitsSigGroup, 0 );
1296
0
      costSigSubBlock[ cctx.subSetId() ] = xGetRateSigCoeffGroup( fracBitsSigGroup, 1 );
1297
1298
0
      costZeroSB += rdStats.d64UncodedDist;         // distortion for resetting non-zero levels to zero levels
1299
0
      costZeroSB -= rdStats.d64CodedLevelandDist;   // distortion and level cost for keeping all non-zero levels
1300
0
      costZeroSB -= rdStats.d64SigCost;             // sig cost for all coeffs, including zero levels and non-zerl levels
1301
1302
0
      if( costZeroSB < baseCost )
1303
0
      {
1304
0
        cctx.resetSigGroup();
1305
0
        baseCost = costZeroSB;
1306
0
        costSigSubBlock[ cctx.subSetId() ] = xGetRateSigCoeffGroup( fracBitsSigGroup, 0 );
1307
0
        cctx.remRegBins += rdStats.iNumSbbCtxBins; // skip sub-block
1308
1309
0
        for( int scanPosInSB = 0; scanPosInSB <= sbSizeM1; scanPosInSB++ )
1310
0
        {
1311
0
          scanPos = cctx.minSubPos() + scanPosInSB;
1312
0
          uint32_t blkPos = cctx.blockPos( scanPos );
1313
1314
0
          if( dstCoeff[ blkPos ] )
1315
0
          {
1316
0
            dstCoeff[ blkPos ] = 0;
1317
0
            costCoeff[ scanPos ] = costCoeff0[ scanPos ];
1318
0
            costSig[ scanPos] = 0;
1319
0
          }
1320
0
        }
1321
0
      }
1322
0
      else
1323
0
      {
1324
0
        anySigCG = true;
1325
0
      }
1326
0
    }
1327
31.8k
  }
1328
1329
  //===== estimate last position =====
1330
512k
  for( int scanPos = 0; scanPos < maxNumCoeff; scanPos++ )
1331
509k
  {
1332
509k
    int blkPos = cctx.blockPos( scanPos );
1333
509k
    TCoeff level = dstCoeff[ blkPos ];
1334
509k
    absSum += abs(level);
1335
509k
  }
1336
3.06k
}
1337
1338
void QuantRDOQ::forwardRDPCM( TransformUnit& tu, const ComponentID compID, const CCoeffBuf& coeffs, TCoeff &absSum, const QpParam& qp, const Ctx &ctx )
1339
99.0k
{
1340
99.0k
  const FracBitsAccess& fracBits = ctx.getFracBitsAcess();
1341
1342
99.0k
  const SPS &sps = *tu.cs->sps;
1343
99.0k
  const CompArea& rect = tu.blocks[compID];
1344
99.0k
  const uint32_t width = rect.width;
1345
99.0k
  const uint32_t height = rect.height;
1346
99.0k
  const ChannelType chType = toChannelType(compID);
1347
99.0k
  const int channelBitDepth = sps.bitDepths[chType];
1348
1349
99.0k
  const int  maxLog2TrDynamicRange = sps.getMaxLog2TrDynamicRange();
1350
99.0k
  const int  dirMode = tu.cu->bdpcmM[toChannelType(compID)];
1351
1352
99.0k
  const int transformShift = getTransformShift(channelBitDepth, rect.size(), maxLog2TrDynamicRange);
1353
1354
99.0k
  const uint32_t maxNumCoeff = rect.area();
1355
1356
99.0k
  CHECK(compID >= MAX_NUM_TBLOCKS, "Invalid component ID");
1357
1358
99.0k
  int scalingListType = getScalingListType(tu.cu->predMode, compID);
1359
99.0k
  CHECK(scalingListType >= SCALING_LIST_NUM, "Invalid scaling list");
1360
1361
99.0k
  const TCoeff    *srcCoeff = coeffs.buf;
1362
99.0k
        TCoeffSig *dstCoeff = tu.getCoeffs(compID).buf;
1363
1364
99.0k
  double *costCoeff = m_pdCostCoeff;
1365
99.0k
  double *costSig = m_pdCostSig;
1366
99.0k
  double *costCoeff0 = m_pdCostCoeff0;
1367
1368
99.0k
  memset(m_pdCostCoeff, 0, sizeof(double) *  maxNumCoeff);
1369
99.0k
  memset(m_pdCostSig, 0, sizeof(double) *  maxNumCoeff);
1370
99.0k
  memset(m_fullCoeff, 0, sizeof(TCoeff) * maxNumCoeff);
1371
1372
99.0k
  m_bdpcm = dirMode;
1373
1374
99.0k
  const bool   needsSqrt2Scale = TU::needsSqrt2Scale(tu, compID);  // should always be false - transform-skipped blocks don't require sqrt(2) compensation.
1375
99.0k
  const bool   isTransformSkip = tu.mtsIdx[compID]==MTS_SKIP;
1376
99.0k
  const int    qBits = QUANT_SHIFT + qp.per(isTransformSkip) + (isTransformSkip? 0 : transformShift) + ( needsSqrt2Scale ? -1 : 0);  // Right shift of non-RDOQ quantizer;  level = (coeff*uiQ + offset)>>q_bits
1377
99.0k
  const int    quantisationCoefficient = g_quantScales[needsSqrt2Scale ? 1 : 0][qp.rem(isTransformSkip)];
1378
99.0k
  const double errorScale = xGetErrScaleCoeff(TU::needsSqrt2Scale(tu, compID), width, height, qp.rem(isTransformSkip), maxLog2TrDynamicRange, channelBitDepth, isTransformSkip);
1379
1380
99.0k
  TrQuantParams trQuantParams;
1381
99.0k
  trQuantParams.rightShift = (IQUANT_SHIFT - ((isTransformSkip ? 0 : transformShift) + qp.per(isTransformSkip)));
1382
99.0k
  trQuantParams.qScale = g_invQuantScales[needsSqrt2Scale ? 1 : 0][qp.rem(isTransformSkip)];
1383
1384
99.0k
  const TCoeff entropyCodingMaximum = (1 << maxLog2TrDynamicRange) - 1;
1385
1386
99.0k
  uint32_t coeffLevels[3];
1387
99.0k
  double   coeffLevelError[4];
1388
1389
99.0k
  CoeffCodingContext cctx(tu, compID, tu.cs->slice->signDataHidingEnabled);
1390
99.0k
  const int sbSizeM1 = (1 << cctx.log2CGSize()) - 1;
1391
99.0k
  double    baseCost = 0;
1392
99.0k
  uint32_t  goRiceParam = 0;
1393
1394
99.0k
  double *costSigSubBlock = m_pdCostCoeffGroupSig;
1395
99.0k
  memset(costSigSubBlock, 0, (maxNumCoeff >> cctx.log2CGSize()) * sizeof(double));
1396
1397
99.0k
  const int sbNum = width * height >> cctx.log2CGSize();
1398
99.0k
  int scanPos;
1399
99.0k
  coeffGroupRDStats rdStats;
1400
1401
99.0k
  bool anySigCG = false;
1402
1403
99.0k
  int maxCtxBins = (cctx.maxNumCoeff() * 7) >> 2;
1404
99.0k
  cctx.remRegBins = maxCtxBins;
1405
1406
730k
  for (int sbId = 0; sbId < sbNum; sbId++)
1407
631k
  {
1408
631k
    cctx.initSubblock(sbId);
1409
1410
631k
    int noCoeffCoded = 0;
1411
631k
    baseCost = 0.0;
1412
631k
    memset(&rdStats, 0, sizeof(coeffGroupRDStats));
1413
631k
    rdStats.iNumSbbCtxBins = 0;
1414
1415
10.7M
    for (int scanPosInSB = 0; scanPosInSB <= sbSizeM1; scanPosInSB++)
1416
10.1M
    {
1417
10.1M
      int lastPosCoded = sbSizeM1;
1418
10.1M
      scanPos = cctx.minSubPos() + scanPosInSB;
1419
      //===== quantization =====
1420
10.1M
      uint32_t blkPos = cctx.blockPos(scanPos);
1421
1422
10.1M
      const int posX = cctx.posX(scanPos);
1423
10.1M
      const int posY = cctx.posY(scanPos);
1424
10.1M
      const int posS = (1 == dirMode) ? posX : posY;
1425
10.1M
      const int posNb = (1 == dirMode) ? (posX - 1) + posY * coeffs.stride : posX + (posY - 1) * coeffs.stride;
1426
10.1M
      TCoeff predCoeff = (0 != posS) ? m_fullCoeff[posNb] : 0;
1427
1428
      // set coeff
1429
10.1M
      const int64_t          tmpLevel = int64_t(abs(srcCoeff[blkPos] - predCoeff)) * quantisationCoefficient;
1430
10.1M
      const Intermediate_Int levelDouble = (Intermediate_Int)std::min<int64_t>(tmpLevel, std::numeric_limits<Intermediate_Int>::max() - (Intermediate_Int(1) << (qBits - 1)));
1431
10.1M
      uint32_t roundAbsLevel = std::min<uint32_t>(uint32_t(entropyCodingMaximum), uint32_t((levelDouble + (Intermediate_Int(1) << (qBits - 1))) >> qBits));
1432
10.1M
      uint32_t minAbsLevel = (roundAbsLevel > 1 ? roundAbsLevel - 1 : 1);
1433
1434
10.1M
      m_testedLevels = 0;
1435
10.1M
      coeffLevels[m_testedLevels++] = roundAbsLevel;
1436
1437
10.1M
      if (minAbsLevel != roundAbsLevel)
1438
9.63M
        coeffLevels[m_testedLevels++] = minAbsLevel;
1439
1440
10.1M
      double dErr = double(levelDouble);
1441
10.1M
      coeffLevelError[0]  = dErr * dErr * errorScale;
1442
1443
10.1M
      costCoeff0[scanPos] = coeffLevelError[0];
1444
10.1M
      dstCoeff[blkPos]    = coeffLevels[0];
1445
1446
      //===== coefficient level estimation =====
1447
10.1M
      unsigned    ctxIdSig = cctx.sigCtxIdAbsTS(scanPos, dstCoeff);
1448
10.1M
      uint32_t    cLevel;
1449
10.1M
      const BinFracBits fracBitsPar = fracBits.getFracBitsArray(cctx.parityCtxIdAbsTS());
1450
1451
      //goRiceParam = cctx.templateAbsSumTS(scanPos, dstCoeff);
1452
10.1M
      goRiceParam = 1;
1453
10.1M
      unsigned ctxIdSign = cctx.signCtxIdAbsTS(scanPos, dstCoeff, dirMode);
1454
10.1M
      const BinFracBits fracBitsSign = fracBits.getFracBitsArray(ctxIdSign);
1455
10.1M
      const uint8_t     sign = srcCoeff[blkPos] - predCoeff < 0 ? 1 : 0;
1456
10.1M
      unsigned gt1CtxId = cctx.lrg1CtxIdAbsTS(scanPos, dstCoeff, dirMode);
1457
10.1M
      const BinFracBits fracBitsGr1 = fracBits.getFracBitsArray(gt1CtxId);
1458
1459
10.1M
      DTRACE_COND((dstCoeff[blkPos] != 0), g_trace_ctx, D_RDOQ_MORE, " uiCtxSig=%d", ctxIdSig);
1460
1461
10.1M
      const BinFracBits fracBitsSig = fracBits.getFracBitsArray(ctxIdSig);
1462
10.1M
      bool lastCoeff = false; //
1463
10.1M
      if (scanPosInSB == lastPosCoded && noCoeffCoded == 0)
1464
516k
      {
1465
516k
        lastCoeff = true;
1466
516k
      }
1467
10.1M
      int rightPixel, belowPixel;
1468
10.1M
      cctx.neighTS(rightPixel, belowPixel, scanPos, dstCoeff);
1469
10.1M
      int numUsedCtxBins = 0;
1470
10.1M
      cLevel = xGetCodedLevelTSPred(costCoeff[scanPos], costCoeff0[scanPos], costSig[scanPos], levelDouble, qBits, errorScale, coeffLevels, coeffLevelError,
1471
10.1M
        &fracBitsSig, fracBitsPar, cctx, fracBits, fracBitsSign, fracBitsGr1, sign, rightPixel, belowPixel, goRiceParam, lastCoeff, maxLog2TrDynamicRange, numUsedCtxBins);
1472
10.1M
      cctx.remRegBins -= numUsedCtxBins;
1473
10.1M
      rdStats.iNumSbbCtxBins += numUsedCtxBins;
1474
1475
10.1M
      if (cLevel > 0)
1476
482k
      {
1477
482k
        noCoeffCoded++;
1478
482k
      }
1479
10.1M
      dstCoeff[blkPos] = cLevel;
1480
1481
10.1M
      if (sign)
1482
3.53M
      {
1483
3.53M
        dstCoeff[blkPos] = -dstCoeff[blkPos];
1484
3.53M
      }
1485
1486
10.1M
      xDequantSample( m_fullCoeff[blkPos], dstCoeff[blkPos], trQuantParams );
1487
10.1M
      m_fullCoeff[blkPos] += predCoeff;
1488
1489
10.1M
      baseCost += costCoeff[scanPos];
1490
10.1M
      rdStats.d64SigCost += costSig[scanPos];
1491
1492
10.1M
      if (dstCoeff[blkPos])
1493
482k
      {
1494
482k
        cctx.setSigGroup();
1495
482k
        rdStats.d64CodedLevelandDist += costCoeff[scanPos] - costSig[scanPos];
1496
482k
        rdStats.d64UncodedDist += costCoeff0[scanPos];
1497
482k
      }
1498
10.1M
    } //end for (iScanPosinCG)
1499
1500
631k
    if (!cctx.isSigGroup())
1501
504k
    {
1502
504k
      const BinFracBits fracBitsSigGroup = fracBits.getFracBitsArray(cctx.sigGroupCtxId(true));
1503
504k
      baseCost += xGetRateSigCoeffGroup(fracBitsSigGroup, 0) - rdStats.d64SigCost;
1504
504k
      costSigSubBlock[cctx.subSetId()] = xGetRateSigCoeffGroup(fracBitsSigGroup, 0);
1505
504k
      cctx.remRegBins += rdStats.iNumSbbCtxBins; // skip sub-block
1506
504k
    }
1507
126k
    else if (sbId != sbNum - 1 || anySigCG)
1508
121k
    {
1509
      // rd-cost if SigCoeffGroupFlag = 0, initialization
1510
121k
      double costZeroSB = baseCost;
1511
1512
121k
      const BinFracBits fracBitsSigGroup = fracBits.getFracBitsArray(cctx.sigGroupCtxId(true));
1513
1514
121k
      baseCost += xGetRateSigCoeffGroup(fracBitsSigGroup, 1);
1515
121k
      costZeroSB += xGetRateSigCoeffGroup(fracBitsSigGroup, 0);
1516
121k
      costSigSubBlock[cctx.subSetId()] = xGetRateSigCoeffGroup(fracBitsSigGroup, 1);
1517
1518
121k
      costZeroSB += rdStats.d64UncodedDist;         // distortion for resetting non-zero levels to zero levels
1519
121k
      costZeroSB -= rdStats.d64CodedLevelandDist;   // distortion and level cost for keeping all non-zero levels
1520
121k
      costZeroSB -= rdStats.d64SigCost;             // sig cost for all coeffs, including zero levels and non-zerl levels
1521
1522
121k
      if (costZeroSB < baseCost)
1523
12.3k
      {
1524
12.3k
        cctx.resetSigGroup();
1525
12.3k
        baseCost = costZeroSB;
1526
12.3k
        costSigSubBlock[cctx.subSetId()] = xGetRateSigCoeffGroup(fracBitsSigGroup, 0);
1527
12.3k
        cctx.remRegBins += rdStats.iNumSbbCtxBins; // skip sub-block
1528
1529
210k
        for (int scanPosInSB = 0; scanPosInSB <= sbSizeM1; scanPosInSB++)
1530
197k
        {
1531
197k
          scanPos = cctx.minSubPos() + scanPosInSB;
1532
197k
          uint32_t blkPos = cctx.blockPos(scanPos);
1533
1534
197k
          const int posX = cctx.posX(scanPos);
1535
197k
          const int posY = cctx.posY(scanPos);
1536
197k
          const int posS = (1 == dirMode) ? posX : posY;
1537
197k
          const int posNb = (1 == dirMode) ? (posX - 1) + posY * coeffs.stride : posX + (posY - 1) * coeffs.stride;
1538
197k
          m_fullCoeff[scanPos] = (0 != posS) ? m_fullCoeff[posNb] : 0;
1539
1540
197k
          if (dstCoeff[blkPos])
1541
12.7k
          {
1542
12.7k
            dstCoeff[blkPos] = 0;
1543
12.7k
            costCoeff[scanPos] = costCoeff0[scanPos];
1544
12.7k
            costSig[scanPos] = 0;
1545
12.7k
          }
1546
197k
        }
1547
12.3k
      }
1548
109k
      else
1549
109k
      {
1550
109k
        anySigCG = true;
1551
109k
      }
1552
121k
    }
1553
631k
  }
1554
1555
  //===== estimate last position =====
1556
10.2M
  for (int scanPos = 0; scanPos < maxNumCoeff; scanPos++)
1557
10.1M
  {
1558
10.1M
    int blkPos = cctx.blockPos(scanPos);
1559
10.1M
    TCoeff level = dstCoeff[blkPos];
1560
10.1M
    absSum += abs(level);
1561
10.1M
  }
1562
99.0k
}
1563
1564
void QuantRDOQ::xDequantSample(TCoeff& pRes, TCoeffSig& coeff, const TrQuantParams& trQuantParams)
1565
10.1M
{
1566
  // xDequant
1567
10.1M
  if (trQuantParams.rightShift > 0)
1568
7.22M
  {
1569
7.22M
    const Intermediate_Int qAdd = Intermediate_Int(1) << (trQuantParams.rightShift - 1);
1570
7.22M
    pRes = TCoeff((Intermediate_Int(coeff) * trQuantParams.qScale + qAdd) >> trQuantParams.rightShift);
1571
7.22M
  }
1572
2.87M
  else
1573
2.87M
  {
1574
2.87M
    pRes = TCoeff((Intermediate_Int(coeff) * trQuantParams.qScale) *(1<< -trQuantParams.rightShift));
1575
2.87M
  }
1576
10.1M
}
1577
1578
inline uint32_t QuantRDOQ::xGetCodedLevelTSPred(double&            rd64CodedCost,
1579
  double&            rd64CodedCost0,
1580
  double&            rd64CodedCostSig,
1581
  Intermediate_Int    levelDouble,
1582
  int                 qBits,
1583
  double              errorScale,
1584
  uint32_t coeffLevels[],
1585
  double coeffLevelError[],
1586
  const BinFracBits* fracBitsSig,
1587
  const BinFracBits& fracBitsPar,
1588
  CoeffCodingContext& cctx,
1589
  const FracBitsAccess& fracBitsAccess,
1590
  const BinFracBits& fracBitsSign,
1591
  const BinFracBits& fracBitsGt1,
1592
  const uint8_t      sign,
1593
  int                rightPixel,
1594
  int                belowPixel,
1595
  uint16_t           ricePar,
1596
  bool               isLast,
1597
  const int          maxLog2TrDynamicRange,
1598
  int&               numUsedCtxBins
1599
) const
1600
10.6M
{
1601
10.6M
  double currCostSig = 0;
1602
10.6M
  uint32_t   bestAbsLevel = 0;
1603
10.6M
  numUsedCtxBins = 0;
1604
10.6M
  int numBestCtxBin = 0;
1605
10.6M
  if (!isLast && coeffLevels[0] < 3)
1606
9.65M
  {
1607
9.65M
    if (cctx.remRegBins >= 4)
1608
9.48M
    rd64CodedCostSig = xGetRateSigCoef(*fracBitsSig, 0);
1609
168k
    else
1610
168k
      rd64CodedCostSig = xGetICost(1 << SCALE_BITS);
1611
9.65M
    rd64CodedCost = rd64CodedCost0 + rd64CodedCostSig;
1612
9.65M
    if (cctx.remRegBins >= 4)
1613
9.48M
      numUsedCtxBins++;
1614
9.65M
    if (coeffLevels[0] == 0)
1615
9.15M
    {
1616
9.15M
      return bestAbsLevel;
1617
9.15M
    }
1618
9.65M
  }
1619
955k
  else
1620
955k
  {
1621
955k
    rd64CodedCost = MAX_DOUBLE;
1622
955k
  }
1623
1624
1.46M
  if (!isLast)
1625
912k
  {
1626
912k
    if (cctx.remRegBins >= 4)
1627
871k
      currCostSig = xGetRateSigCoef(*fracBitsSig, 1);
1628
40.7k
    else
1629
40.7k
      currCostSig = xGetICost(1 << SCALE_BITS);
1630
912k
    if (coeffLevels[0] >= 3 && cctx.remRegBins >= 4)
1631
378k
      numUsedCtxBins++;
1632
912k
  }
1633
1634
3.91M
  for (int errorInd = 1; errorInd <= m_testedLevels; errorInd++)
1635
2.45M
  {
1636
2.45M
    int absLevel = coeffLevels[errorInd - 1];
1637
2.45M
    double dErr = 0.0;
1638
2.45M
    dErr = double(levelDouble - (Intermediate_Int(absLevel) << qBits));
1639
2.45M
    coeffLevelError[errorInd] = dErr * dErr * errorScale;
1640
2.45M
    int modAbsLevel = absLevel;
1641
2.45M
    if (cctx.remRegBins >= 4) 
1642
2.37M
    {
1643
2.37M
      modAbsLevel = cctx.deriveModCoeff(rightPixel, belowPixel, absLevel, m_bdpcm);
1644
2.37M
    }
1645
2.45M
    int numCtxBins = 0;
1646
2.45M
    double dCurrCost = coeffLevelError[errorInd] + xGetICost(xGetICRateTS(modAbsLevel, fracBitsPar, cctx, fracBitsAccess, fracBitsSign, fracBitsGt1, numCtxBins, sign, ricePar, maxLog2TrDynamicRange));
1647
1648
2.45M
    if (cctx.remRegBins >= 4)
1649
2.37M
      dCurrCost += currCostSig; // if cctx.numCtxBins < 4, xGetICRateTS return rate including sign cost. dont need to add any more
1650
1651
2.45M
    if (dCurrCost < rd64CodedCost)
1652
1.06M
    {
1653
1.06M
      bestAbsLevel = absLevel;
1654
1.06M
      rd64CodedCost = dCurrCost;
1655
1.06M
      rd64CodedCostSig = currCostSig;
1656
1.06M
      numBestCtxBin = numCtxBins;
1657
1.06M
    }
1658
2.45M
  }
1659
1.46M
  numUsedCtxBins += numBestCtxBin;
1660
1.46M
  return bestAbsLevel;
1661
10.6M
}
1662
1663
inline int QuantRDOQ::xGetICRateTS( const uint32_t            absLevel,
1664
                                    const BinFracBits&        fracBitsPar,
1665
                                    const CoeffCodingContext& cctx,
1666
                                    const FracBitsAccess&     fracBitsAccess,
1667
                                    const BinFracBits&        fracBitsSign,
1668
                                    const BinFracBits&        fracBitsGt1,
1669
                                    int&                      numCtxBins,
1670
                                    const uint8_t             sign,
1671
                                    const uint16_t            ricePar,
1672
                                    const int                 maxLog2TrDynamicRange  ) const
1673
2.45M
{
1674
 
1675
2.45M
  if (cctx.remRegBins < 4) // Full by-pass coding 
1676
75.4k
  {
1677
75.4k
    int rate = absLevel ? (1 << SCALE_BITS) : 0; // 1 bit to signal sign of non-zero 
1678
1679
75.4k
    uint32_t symbol = absLevel;
1680
1681
75.4k
    uint32_t length;
1682
75.4k
    const int threshold = COEF_REMAIN_BIN_REDUCTION;
1683
75.4k
    if (symbol < (threshold << ricePar))
1684
38.0k
    {
1685
38.0k
      length = symbol >> ricePar;
1686
38.0k
      rate += (length + 1 + ricePar) << SCALE_BITS;
1687
38.0k
    }
1688
37.4k
    else
1689
37.4k
    {
1690
37.4k
      length = ricePar;
1691
37.4k
      symbol = symbol - (threshold << ricePar);
1692
170k
      while (symbol >= (1 << length))
1693
132k
      {
1694
132k
        symbol -= (1 << (length++));
1695
132k
      }
1696
37.4k
      rate += (threshold + length + 1 - ricePar + length) << SCALE_BITS;
1697
37.4k
    }
1698
1699
75.4k
    return rate;
1700
75.4k
  }
1701
1702
2.37M
  else if (cctx.remRegBins >= 4 && cctx.remRegBins < 8) // First pass context coding and all by-pass coding ( Sign flag is not counted here)
1703
13.6k
  {
1704
13.6k
    int rate = fracBitsSign.intBits[sign]; // sign bits
1705
13.6k
    if (absLevel)
1706
13.0k
      numCtxBins++;
1707
1708
13.6k
    if (absLevel > 1)
1709
9.35k
    {
1710
9.35k
      rate += fracBitsGt1.intBits[1];
1711
9.35k
      rate += fracBitsPar.intBits[(absLevel - 2) & 1];
1712
1713
9.35k
      numCtxBins += 2;
1714
1715
9.35k
      int cutoffVal = 2;
1716
1717
9.35k
      if (absLevel >= cutoffVal)
1718
9.35k
      {
1719
9.35k
        uint32_t symbol = (absLevel - cutoffVal) >> 1;
1720
9.35k
        uint32_t length;
1721
9.35k
        const int threshold = COEF_REMAIN_BIN_REDUCTION;
1722
9.35k
        if (symbol < (threshold << ricePar))
1723
6.55k
        {
1724
6.55k
          length = symbol >> ricePar;
1725
6.55k
          rate += (length + 1 + ricePar) << SCALE_BITS;
1726
6.55k
        }
1727
2.80k
        else
1728
2.80k
        {
1729
2.80k
          length = ricePar;
1730
2.80k
          symbol = symbol - (threshold << ricePar);
1731
11.5k
          while (symbol >= (1 << length))
1732
8.77k
          {
1733
8.77k
            symbol -= (1 << (length++));
1734
8.77k
          }
1735
2.80k
          rate += (threshold + length + 1 - ricePar + length) << SCALE_BITS;
1736
2.80k
        }
1737
9.35k
      }
1738
9.35k
    }
1739
4.29k
    else if (absLevel == 1)
1740
3.67k
    {
1741
3.67k
      rate += fracBitsGt1.intBits[0];
1742
3.67k
      numCtxBins++;
1743
3.67k
    }
1744
628
    else
1745
628
    {
1746
628
      rate = 0;
1747
628
    }
1748
13.6k
    return rate;
1749
13.6k
  }
1750
    
1751
2.36M
  int rate = fracBitsSign.intBits[sign];
1752
1753
2.36M
  if (absLevel)
1754
1.83M
    numCtxBins++;
1755
1756
2.36M
  if( absLevel > 1 )
1757
798k
  {
1758
798k
    rate += fracBitsGt1.intBits[1];
1759
798k
    rate += fracBitsPar.intBits[( absLevel - 2 ) & 1];
1760
798k
    numCtxBins += 2;
1761
1762
798k
          int cutoffVal = 2;
1763
798k
    const int numGtBins = 4;
1764
3.99M
    for( int i = 0; i < numGtBins; i++ )
1765
3.19M
    {
1766
3.19M
      if( absLevel >= cutoffVal )
1767
2.27M
      {
1768
2.27M
        const uint16_t ctxGtX = cctx.greaterXCtxIdAbsTS( cutoffVal>>1 );
1769
2.27M
        const BinFracBits &fracBitsGtX = fracBitsAccess.getFracBitsArray( ctxGtX );
1770
2.27M
        unsigned gtX = ( absLevel >= ( cutoffVal + 2 ) );
1771
2.27M
        rate += fracBitsGtX.intBits[gtX];
1772
2.27M
        numCtxBins++;
1773
2.27M
      }
1774
3.19M
      cutoffVal += 2;
1775
3.19M
    }
1776
1777
798k
    if( absLevel >= cutoffVal )
1778
376k
    {
1779
376k
      uint32_t symbol = ( absLevel - cutoffVal ) >> 1;
1780
376k
      uint32_t length;
1781
376k
      const int threshold = COEF_REMAIN_BIN_REDUCTION;
1782
376k
      if( symbol < ( threshold << ricePar ) )
1783
133k
      {
1784
133k
        length = symbol >> ricePar;
1785
133k
        rate  += ( length + 1 + ricePar ) << SCALE_BITS;
1786
133k
      }
1787
243k
      else
1788
243k
      {
1789
243k
        length = ricePar;
1790
243k
        symbol = symbol - ( threshold << ricePar );
1791
1.04M
        while( symbol >= ( 1 << length ) )
1792
801k
        {
1793
801k
          symbol -= ( 1 << ( length++ ) );
1794
801k
        }
1795
243k
        rate += ( threshold + length + 1 - ricePar + length ) << SCALE_BITS;
1796
243k
      }
1797
376k
    }
1798
798k
  }
1799
1.56M
  else if( absLevel == 1 )
1800
1.03M
  {
1801
1.03M
    rate += fracBitsGt1.intBits[0];
1802
1.03M
    numCtxBins++;
1803
1.03M
  }
1804
532k
  else
1805
532k
  {
1806
532k
    rate = 0;
1807
532k
  }
1808
2.36M
  return rate;
1809
2.45M
}
1810
1811
} // namespace vvenc
1812
1813
//! \}
1814