Coverage Report

Created: 2026-04-01 07:49

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/vvenc/source/Lib/CommonLib/QuantRDOQ2.cpp
Line
Count
Source
1
/* -----------------------------------------------------------------------------
2
The copyright in this software is being made available under the Clear BSD
3
License, included below. No patent rights, trademark rights and/or 
4
other Intellectual Property Rights other than the copyrights concerning 
5
the Software are granted under this license.
6
7
The Clear BSD License
8
9
Copyright (c) 2019-2026, Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. & The VVenC Authors.
10
All rights reserved.
11
12
Redistribution and use in source and binary forms, with or without modification,
13
are permitted (subject to the limitations in the disclaimer below) provided that
14
the following conditions are met:
15
16
     * Redistributions of source code must retain the above copyright notice,
17
     this list of conditions and the following disclaimer.
18
19
     * Redistributions in binary form must reproduce the above copyright
20
     notice, this list of conditions and the following disclaimer in the
21
     documentation and/or other materials provided with the distribution.
22
23
     * Neither the name of the copyright holder nor the names of its
24
     contributors may be used to endorse or promote products derived from this
25
     software without specific prior written permission.
26
27
NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
28
THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
29
CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
31
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
32
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
33
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
34
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
35
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
36
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38
POSSIBILITY OF SUCH DAMAGE.
39
40
41
------------------------------------------------------------------------------------------- */
42
43
44
/** \file     QuantRDOQ2.cpp
45
    \brief    transform and quantization class
46
*/
47
48
#include "QuantRDOQ2.h"
49
#include "UnitTools.h"
50
#include "ContextModelling.h"
51
#include "CodingStructure.h"
52
#include "dtrace_next.h"
53
#include "dtrace_buffer.h"
54
55
#include <stdlib.h>
56
#include <memory.h>
57
58
#if defined( TARGET_SIMD_X86 )
59
#  include "CommonDefX86.h"
60
#  include <simde/x86/sse4.1.h>
61
#endif
62
63
//! \ingroup CommonLib
64
//! \{
65
66
namespace vvenc {
67
68
struct coeffGroupRDStats
69
{
70
  int    iNNZbeforePos0;
71
  double d64CodedLevelandDist; // distortion and level cost only
72
  double d64UncodedDist;    // all zero coded block distortion
73
  double d64SigCost;
74
  double d64SigCost_0;
75
};
76
77
78
//! \ingroup CommonLib
79
//! \{
80
81
// ====================================================================================================================
82
// Constants
83
// ====================================================================================================================
84
0
#define COEFF_ERR_SCALE_PRECISION_BITS 20
85
86
//! \}
87
88
0
QuantRDOQ2::QuantRDOQ2( const Quant* other, bool useScalingLists ) : QuantRDOQ( other, useScalingLists ), m_isErrScaleListOwner( false ), m_iLambda( 0 )
89
0
{
90
0
  const QuantRDOQ2 *rdoq2 = dynamic_cast<const QuantRDOQ2*>( other );
91
0
  CHECK( other && !rdoq2, "The RDOQ cast must be successfull!" );
92
0
  xInitScalingList( rdoq2 );
93
0
}
94
95
QuantRDOQ2::~QuantRDOQ2()
96
0
{
97
0
  xDestroyScalingList();
98
0
}
99
100
101
/** initialization process of scaling list array
102
*/
103
void QuantRDOQ2::xInitScalingList( const QuantRDOQ2* other )
104
0
{
105
0
  m_isErrScaleListOwner = other == nullptr;
106
107
0
  const bool useScalingLists = getScalingListEnabled();
108
109
0
  for(uint32_t sizeIdX = 0; sizeIdX < SCALING_LIST_SIZE_NUM; sizeIdX++)
110
0
  {
111
0
    for(uint32_t sizeIdY = 0; sizeIdY < SCALING_LIST_SIZE_NUM; sizeIdY++)
112
0
    {
113
0
      for(uint32_t qp = 0; qp < SCALING_LIST_REM_NUM; qp++)
114
0
      {
115
0
        for(uint32_t listId = 0; listId < SCALING_LIST_NUM; listId++)
116
0
        {
117
0
          if( m_isErrScaleListOwner )
118
0
          {
119
0
            m_errScale[sizeIdX][sizeIdY][listId][qp] = useScalingLists ? new int[g_scalingListSizeX[sizeIdX] * g_scalingListSizeX[sizeIdY]] : nullptr;
120
0
          }
121
0
          else
122
0
          {
123
0
            m_errScale[sizeIdX][sizeIdY][listId][qp] = other->m_errScale[sizeIdX][sizeIdY][listId][qp];
124
0
          }
125
0
        } // listID loop
126
0
      }
127
0
    }
128
0
  }
129
0
}
130
131
/** destroy quantization matrix array
132
*/
133
void QuantRDOQ2::xDestroyScalingList()
134
0
{
135
0
  if( !m_isErrScaleListOwner ) return;
136
137
0
  for(uint32_t sizeIdX = 0; sizeIdX < SCALING_LIST_SIZE_NUM; sizeIdX++)
138
0
  {
139
0
    for(uint32_t sizeIdY = 0; sizeIdY < SCALING_LIST_SIZE_NUM; sizeIdY++)
140
0
    {
141
0
      for(uint32_t listId = 0; listId < SCALING_LIST_NUM; listId++)
142
0
      {
143
0
        for(uint32_t qp = 0; qp < SCALING_LIST_REM_NUM; qp++)
144
0
        {
145
0
          if(m_errScale[sizeIdX][sizeIdY][listId][qp])
146
0
          {
147
0
            delete [] m_errScale[sizeIdX][sizeIdY][listId][qp];
148
0
          }
149
0
        }
150
0
      }
151
0
    }
152
0
  }
153
  //   Quant::destroyScalingList();
154
0
}
155
156
int QuantRDOQ2::xGetErrScaleCoeff( const bool needsSqrt2, SizeType width, SizeType height, int qp, const int maxLog2TrDynamicRange, const int channelBitDepth )
157
0
{
158
0
  const int iTransformShift = getTransformShift(channelBitDepth, Size(width, height), maxLog2TrDynamicRange);
159
0
  double    dErrScale = (double)(1 << SCALE_BITS);                                // Compensate for scaling of bitcount in Lagrange cost function
160
0
  double    dTransShift = (double)iTransformShift + (needsSqrt2 ? -0.5 : 0.0);
161
0
  dErrScale = dErrScale * pow(2.0, (-2.0*dTransShift));                     // Compensate for scaling through forward transform
162
0
  const int  QStep = g_quantScales[needsSqrt2 ? 1 : 0][qp];
163
0
  double    finalErrScale = dErrScale / QStep / QStep / (1 << (DISTORTION_PRECISION_ADJUSTMENT(channelBitDepth) << 1));
164
0
  return    finalErrScale;
165
0
}
166
167
/** set error scale coefficients
168
* \param list                   list ID
169
* \param size
170
* \param qp                     quantization parameter
171
* \param maxLog2TrDynamicRange
172
* \param bitDepths              reference to bit depth array for all channels
173
*/
174
void QuantRDOQ2::xSetErrScaleCoeff( unsigned list, unsigned sizeX, unsigned sizeY, int qp, const int maxLog2TrDynamicRange[MAX_NUM_CH], const BitDepths &bitDepths )
175
0
{
176
0
  const int width               = g_scalingListSizeX[sizeX];
177
0
  const int height              = g_scalingListSizeX[sizeY];
178
0
  const ChannelType channelType = ((list == 0) || (list == MAX_NUM_COMP)) ? CH_L : CH_C;
179
0
  const int channelBitDepth     = bitDepths.recon[channelType];
180
0
  const int iTransformShift     = getTransformShift( channelBitDepth, Size( width, height ), maxLog2TrDynamicRange[channelType] );
181
0
  const double dTransShift      = (double)iTransformShift;
182
183
0
  double dErrScale = pow( 2.0, ( (double)SCALE_BITS / 2.0 ) );    // Compensate for scaling of bitcount in Lagrange cost function
184
0
  dErrScale = dErrScale*pow( 2.0, ( -/*2.0**/( dTransShift ) ) );   // Compensate for scaling through forward transform
185
186
0
  if( getScalingListEnabled() )
187
0
  {
188
0
    const unsigned uiMaxNumCoeff  = g_scalingListSizeX[sizeX] * g_scalingListSizeX[sizeY];
189
0
    const int *piQuantCoeff       = getQuantCoeff( list, qp, sizeX, sizeY );
190
0
    int *piErrScale               = xGetErrScaleCoeffSL( list, sizeX, sizeY, qp );
191
192
0
    for( unsigned i = 0; i < uiMaxNumCoeff; i++ )
193
0
    {
194
0
      int QStep = piQuantCoeff[i];
195
0
      double errScale = dErrScale / QStep / (1 << (DISTORTION_PRECISION_ADJUSTMENT( channelBitDepth ) /*<< 1*/)); // (1 << ( /*2 **/ (bitDepths.recon[channelType] - 8)));
196
0
      piErrScale[i] = ( int ) (errScale * ( double ) (1 << COEFF_ERR_SCALE_PRECISION_BITS));
197
0
    }
198
0
  }
199
200
0
  xSetErrScaleCoeffNoScalingList( list, sizeX, sizeY, qp, maxLog2TrDynamicRange, bitDepths );
201
0
}
202
203
void QuantRDOQ2::xSetErrScaleCoeffNoScalingList( unsigned list, unsigned wIdx, unsigned hIdx, int qp, const int maxLog2TrDynamicRange[MAX_NUM_CH], const BitDepths &bitDepths )
204
0
{
205
0
  const int width               = g_scalingListSizeX[wIdx];
206
0
  const int height              = g_scalingListSizeX[hIdx];
207
0
  const ChannelType channelType = ( ( list == 0 ) || ( list == MAX_NUM_COMP ) ) ? CH_L : CH_C;
208
0
  const int channelBitDepth     = bitDepths.recon[channelType];
209
0
  const int iTransformShift     = getTransformShift( channelBitDepth, Size( width, height ), maxLog2TrDynamicRange[channelType] );
210
0
  const bool needsSqrt2         = ((Log2(width*height)) & 1) == 1;
211
0
  double dTransShift            = (double)iTransformShift + ( needsSqrt2 ? -0.5 : 0.0 );
212
213
0
  double dErrScale   = pow( 2.0, ( (double)SCALE_BITS / 2.0 ) );             // Compensate for scaling of bitcount in Lagrange cost function
214
0
  dErrScale          = dErrScale*pow( 2.0, ( -( dTransShift ) ) );          // Compensate for scaling through forward transform
215
0
  int QStep          = g_quantScales[needsSqrt2][qp];
216
217
0
  double errScale = dErrScale / QStep /*/ QStep*/ / (1 << (DISTORTION_PRECISION_ADJUSTMENT( channelBitDepth ) /*<< 1*/));
218
0
  xGetErrScaleCoeffNoScalingList( list, wIdx, hIdx, qp ) = (int)( errScale * (double)( 1 << COEFF_ERR_SCALE_PRECISION_BITS ) );
219
0
}
220
221
222
/** set flat matrix value to quantized coefficient
223
*/
224
void QuantRDOQ2::setFlatScalingList(const int maxLog2TrDynamicRange[MAX_NUM_CH], const BitDepths &bitDepths)
225
0
{
226
0
  QuantRDOQ::setFlatScalingList( maxLog2TrDynamicRange, bitDepths );
227
228
0
  const int minimumQp = 0;
229
0
  const int maximumQp = SCALING_LIST_REM_NUM;
230
231
0
  for(uint32_t sizeX = 0; sizeX < SCALING_LIST_SIZE_NUM; sizeX++)
232
0
  {
233
0
    for(uint32_t sizeY = 0; sizeY < SCALING_LIST_SIZE_NUM; sizeY++)
234
0
    {
235
0
      for(uint32_t list = 0; list < SCALING_LIST_NUM; list++)
236
0
      {
237
0
        for(int qp = minimumQp; qp < maximumQp; qp++)
238
0
        {
239
0
          xSetErrScaleCoeff( list, sizeX, sizeY, qp, maxLog2TrDynamicRange, bitDepths );
240
0
        }
241
0
      }
242
0
    }
243
0
  }
244
0
}
245
246
247
void QuantRDOQ2::quant( TransformUnit &tu, const ComponentID compID, const CCoeffBuf &pSrc, TCoeff &uiAbsSum, const QpParam &cQP, const Ctx& ctx )
248
0
{
249
0
  if( m_RDOQ == 1 )
250
0
  {
251
0
    QuantRDOQ::quant( tu, compID, pSrc, uiAbsSum, cQP, ctx );
252
0
    return;
253
0
  }
254
255
0
  const CompArea& rect      = tu.blocks[compID];
256
0
  const uint32_t uiWidth    = rect.width;
257
0
  const uint32_t uiHeight   = rect.height;
258
259
0
  const CCoeffBuf&  piCoef   = pSrc;
260
261
0
  const bool useTransformSkip = tu.mtsIdx[compID]==MTS_SKIP;
262
263
0
  bool useRDOQ = useTransformSkip ? m_useRDOQTS : m_RDOQ > 0;
264
265
0
  if( !tu.cu->ispMode || !isLuma(compID) )
266
0
  {
267
0
    useRDOQ &= uiWidth > 2;
268
0
    useRDOQ &= uiHeight > 2;
269
0
  }
270
271
0
  if( useRDOQ )
272
0
  {
273
0
    if( !tu.cs->picture->useSelectiveRdoq || xNeedRDOQ( tu, compID, piCoef, cQP ) )
274
0
    {
275
0
      if( useTransformSkip )
276
0
      {
277
0
        if( tu.cu->bdpcmM[toChannelType( compID )] )
278
0
        {
279
0
          forwardRDPCM( tu, compID, pSrc, uiAbsSum, cQP, ctx );
280
0
        }
281
0
        else
282
0
        {
283
0
          rateDistOptQuantTS( tu, compID, pSrc, uiAbsSum, cQP, ctx );
284
0
        }
285
0
      }
286
0
      else
287
0
      {
288
0
        xRateDistOptQuant( tu, compID, pSrc, uiAbsSum, cQP, ctx, getScalingListEnabled() );
289
0
      }
290
0
    }
291
0
    else
292
0
    {
293
0
      uiAbsSum    = 0;
294
0
      tu.lastPos[compID] = -1;
295
0
    }
296
0
  }
297
0
  else
298
0
  {
299
0
    Quant::quant( tu, compID, pSrc, uiAbsSum, cQP, ctx );
300
0
  }
301
0
}
302
303
inline cost_t QuantRDOQ2::xiGetICost(int iRate ) const
304
0
{
305
0
  return (cost_t)(m_dLambda * iRate);
306
0
}
307
308
inline cost_t QuantRDOQ2::xGetIEPRate() const
309
0
{
310
0
  return 1 << SCALE_BITS;
311
0
}
312
313
/** Calculates the cost for specific absolute transform level
314
* \param uiAbsLevel scaled quantized level
315
* \param ui16CtxNumOne current ctxInc for coeff_abs_level_greater1 (1st bin of coeff_abs_level_minus1 in AVC)
316
* \param ui16CtxNumAbs current ctxInc for coeff_abs_level_greater2 (remaining bins of coeff_abs_level_minus1 in AVC)
317
* \param ui16AbsGoRice Rice parameter for coeff_abs_level_minus3
318
* \returns cost of given absolute transform level
319
*/
320
inline cost_t QuantRDOQ2::xiGetICRateCost( const uint32_t     uiAbsLevel,
321
                                            const BinFracBits& fracBitsPar,
322
                                            const BinFracBits& fracBitsGt1,
323
                                            const BinFracBits& fracBitsGt2,
324
                                            const int          remRegBins,
325
                                            unsigned           goRiceZero,
326
                                            const uint16_t     ui16AbsGoRice,
327
                                            const int          maxLog2TrDynamicRange ) const
328
0
{
329
0
  cost_t iRate = xGetIEPRate();
330
0
  if( remRegBins < 4 )
331
0
  {
332
0
    uint32_t  symbol  = ( uiAbsLevel == 0 ? goRiceZero : uiAbsLevel <= goRiceZero ? uiAbsLevel-1 : uiAbsLevel );
333
0
    uint32_t  length;
334
0
    const int threshold = COEF_REMAIN_BIN_REDUCTION;
335
0
    if( symbol < ( threshold << ui16AbsGoRice ) )
336
0
    {
337
0
      length = symbol >> ui16AbsGoRice;
338
0
      iRate += ( length + 1 + ui16AbsGoRice ) << SCALE_BITS;
339
0
    }
340
0
    else
341
0
    {
342
0
      length = ui16AbsGoRice;
343
0
      symbol = symbol - ( threshold << ui16AbsGoRice );
344
0
      while( symbol >= ( 1 << length ) )
345
0
      {
346
0
        symbol -= ( 1 << ( length++ ) );
347
0
      }
348
0
      iRate += ( threshold + length + 1 - ui16AbsGoRice + length ) << SCALE_BITS;
349
0
    }
350
0
  }
351
0
  else
352
0
  {
353
0
    const uint32_t cthres = 4;
354
0
    if( uiAbsLevel >= cthres )
355
0
    {
356
0
      uint32_t symbol = ( uiAbsLevel - cthres ) >> 1;
357
0
      uint32_t length;
358
0
      const int threshold = COEF_REMAIN_BIN_REDUCTION;
359
0
      if( symbol < ( threshold << ui16AbsGoRice ) )
360
0
      {
361
0
        length = symbol >> ui16AbsGoRice;
362
0
        iRate += ( length + 1 + ui16AbsGoRice ) << SCALE_BITS;
363
0
      }
364
0
      else
365
0
      {
366
0
        length = ui16AbsGoRice;
367
0
        symbol = symbol - ( threshold << ui16AbsGoRice );
368
0
        while( symbol >= ( 1 << length ) )
369
0
        {
370
0
          symbol -= ( 1 << ( length++ ) );
371
0
        }
372
0
        iRate += ( threshold + length + 1 - ui16AbsGoRice + length ) << SCALE_BITS;
373
0
      }
374
375
0
      iRate += fracBitsGt1.intBits[1];
376
0
      iRate += fracBitsPar.intBits[( uiAbsLevel - 2 ) & 1];
377
0
      iRate += fracBitsGt2.intBits[1];
378
0
    }
379
0
    else if( uiAbsLevel == 1 )
380
0
    {
381
0
      iRate += fracBitsGt1.intBits[0];
382
0
    }
383
0
    else if( uiAbsLevel == 2 )
384
0
    {
385
0
      iRate += fracBitsGt1.intBits[1];
386
0
      iRate += fracBitsPar.intBits[0];
387
0
      iRate += fracBitsGt2.intBits[0];
388
0
    }
389
0
    else if( uiAbsLevel == 3 )
390
0
    {
391
0
      iRate += fracBitsGt1.intBits[1];
392
0
      iRate += fracBitsPar.intBits[1];
393
0
      iRate += fracBitsGt2.intBits[0];
394
0
    }
395
0
    else
396
0
    {
397
0
      iRate = 0;
398
0
    }
399
0
  }
400
0
  return xiGetICost( (int)iRate );
401
0
}
402
403
inline cost_t QuantRDOQ2::xiGetCostSigCoeffGroup( const BinFracBits& fracBitsSigCG, unsigned uiSignificanceCoeffGroup ) const
404
0
{
405
0
  return xiGetICost( fracBitsSigCG.intBits[uiSignificanceCoeffGroup] );
406
0
}
407
408
void QuantRDOQ2::xInitLastPosBitsTab( const CoeffCodingContext& cctx, const uint32_t uiWidth, const uint32_t uiHeight, const ChannelType chType, const FracBitsAccess& fracBits )
409
0
{
410
0
  int dim1 = std::min<int>(JVET_C0024_ZERO_OUT_TH, uiWidth);
411
0
  int dim2 = std::min<int>(JVET_C0024_ZERO_OUT_TH, uiHeight);
412
413
0
  int bitsX = 0;
414
0
  int bitsY = 0;
415
0
  int ctxId;
416
417
  //X-coordinate
418
0
  for( ctxId = 0; ctxId < g_uiGroupIdx[dim1 - 1]; ctxId++ )
419
0
  {
420
0
    const BinFracBits fB = fracBits.getFracBitsArray( cctx.lastXCtxId( ctxId ) );
421
0
    m_lastBitsX[chType][ctxId] = bitsX + fB.intBits[0];
422
0
    bitsX += fB.intBits[1];
423
0
  }
424
0
  m_lastBitsX[chType][ctxId] = bitsX;
425
426
  //Y-coordinate
427
0
  for( ctxId = 0; ctxId < g_uiGroupIdx[dim2 - 1]; ctxId++ )
428
0
  {
429
0
    const BinFracBits fB = fracBits.getFracBitsArray( cctx.lastYCtxId( ctxId ) );
430
0
    m_lastBitsY[chType][ctxId] = bitsY + fB.intBits[0];
431
0
    bitsY += fB.intBits[1];
432
0
  }
433
0
  m_lastBitsY[chType][ctxId] = bitsY;
434
0
}
435
436
437
/** Calculates the cost of signaling the last significant coefficient in the block
438
* \param uiPosX X coordinate of the last significant coefficient
439
* \param uiPosY Y coordinate of the last significant coefficient
440
* \returns cost of last significant coefficient
441
*/
442
/*
443
* \param uiWidth width of the transform unit (TU)
444
*/
445
inline cost_t QuantRDOQ2::xiGetCostLast( const uint32_t uiPosX, const uint32_t uiPosY, const ChannelType chType ) const
446
0
{
447
0
  uint32_t uiCtxX = g_uiGroupIdx[uiPosX];
448
0
  uint32_t uiCtxY = g_uiGroupIdx[uiPosY];
449
450
0
  uint32_t uiCost = m_lastBitsX[chType][uiCtxX] + m_lastBitsY[chType][uiCtxY];
451
452
0
  if( uiCtxX > 3 )
453
0
  {
454
0
    uiCost += xGetIEPRate() * ( ( uiCtxX - 2 ) >> 1 );
455
0
  }
456
0
  if( uiCtxY > 3 )
457
0
  {
458
0
    uiCost += xGetIEPRate() * ( ( uiCtxY - 2 ) >> 1 );
459
0
  }
460
0
  return xiGetICost( (int)uiCost );
461
0
}
462
463
inline cost_t QuantRDOQ2::xiGetCostSigCoef( const BinFracBits& fracBitsSig, unsigned uiSignificance ) const
464
0
{
465
0
  return xiGetICost( fracBitsSig.intBits[uiSignificance] );
466
0
}
467
468
static inline cost_t _dist( cost_t iErr, cost_t iErrScale, int64_t iErrScaleShift )
469
0
{
470
0
  int64_t iSqrtErrCost = ( iErr*iErrScale ) >> iErrScaleShift;
471
0
  int64_t iDist = iSqrtErrCost*iSqrtErrCost;
472
0
  return iDist;
473
0
}
474
475
template< bool bSBH, bool bUseScalingList >
476
int QuantRDOQ2::xRateDistOptQuantFast( TransformUnit &tu, const ComponentID &compID, const CCoeffBuf &pSrc, TCoeff &uiAbsSum, const QpParam &cQP, const Ctx &ctx )
477
0
{
478
0
  CoeffCodingContext cctx( tu, compID, bSBH, false, m_tplBuf );
479
0
  const FracBitsAccess& fracBits = ctx.getFracBitsAcess();
480
481
0
  const SPS &sps            = *tu.cs->sps;
482
0
  const CompArea &rect      = tu.blocks[compID];
483
0
  const uint32_t uiWidth    = rect.width;
484
0
  const uint32_t uiHeight   = rect.height;
485
0
  const ChannelType chType  = toChannelType( compID );
486
0
  const int channelBitDepth = sps.bitDepths[ chType ];
487
488
0
  const int  maxLog2TrDynamicRange = sps.getMaxLog2TrDynamicRange();
489
490
0
  if( compID != COMP_Cr || !tu.cbf[COMP_Cb] )
491
0
    xInitLastPosBitsTab( cctx, uiWidth, uiHeight, chType, fracBits );
492
493
  /* for 422 chroma blocks, the effective scaling applied during transformation is not a power of 2, hence it cannot be
494
  * implemented as a bit-shift (the quantised result will be sqrt(2) * larger than required). Alternatively, adjust the
495
  * uiLog2TrSize applied in iTransformShift, such that the result is 1/sqrt(2) the required result (i.e. smaller)
496
  * Then a QP+3 (sqrt(2)) or QP-3 (1/sqrt(2)) method could be used to get the required result
497
  */
498
499
  // Represents scaling through forward transform
500
0
  const int iTransformShift = getTransformShift(channelBitDepth, rect.size(), maxLog2TrDynamicRange);
501
502
0
  const uint32_t uiLog2BlockWidth   = Log2(uiWidth);
503
0
  const uint32_t uiLog2BlockHeight  = Log2(uiHeight);
504
0
  const uint32_t uiMaxNumCoeff      = uiWidth * uiHeight;
505
0
  const uint32_t log2CGSize         = cctx.log2CGSize();
506
507
0
  int scalingListType = getScalingListType( tu.cu->predMode, compID );
508
0
  CHECK(scalingListType >= SCALING_LIST_NUM, "Invalid scaling list");
509
510
0
  const TCoeff    *plSrcCoeff   = pSrc.buf;
511
0
        TCoeffSig *piDstCoeff   = tu.getCoeffs( compID ).buf;
512
513
0
  memset( piDstCoeff, 0, sizeof( *piDstCoeff ) * uiMaxNumCoeff );
514
515
0
  const bool needSqrtAdjustment = TU::needsSqrt2Scale( tu, compID );
516
0
  const bool isTransformSkip    = tu.mtsIdx[compID] == MTS_SKIP;
517
0
  const int  *quantScaleList    = getQuantCoeff( scalingListType, cQP.rem( isTransformSkip ), uiLog2BlockWidth, uiLog2BlockHeight );
518
0
  const int  defaultQuantScale  = g_quantScales[ needSqrtAdjustment ?1:0][cQP.rem( isTransformSkip )];
519
0
  const int  defaultErrScale    = xGetErrScaleCoeffNoScalingList( scalingListType, uiLog2BlockWidth, uiLog2BlockHeight, cQP.rem( isTransformSkip ) );
520
0
  const int  *piErrScale        = xGetErrScaleCoeffSL           ( scalingListType, uiLog2BlockWidth, uiLog2BlockHeight, cQP.rem( isTransformSkip ) );
521
0
  const int  iErrScaleShift     = COEFF_ERR_SCALE_PRECISION_BITS;
522
0
  int iQBits                    = QUANT_SHIFT + cQP.per( isTransformSkip ) + iTransformShift + (needSqrtAdjustment?-1:0);    // Right shift of non-RDOQ quantizer;  level = (coeff*uiQ + offset)>>q_bits
523
0
  int iQOffset                  = 1 << ( iQBits - 1 );
524
525
0
  cost_t piCostCoeff   [16];
526
0
  cost_t piCostSig     [16];
527
0
  cost_t piCostCoeff0  [16];
528
0
  cost_t piCostDeltaSBH[16];
529
0
  int    piAddSBH      [16];
530
531
0
  cost_t iCodedCostBlock   = 0;
532
0
  cost_t iUncodedCostBlock = 0;
533
0
  int    iLastScanPos      = -1;
534
0
  int    lastSubSetId      = -1;
535
0
  bool   lastOptFinished   = false;
536
0
  cost_t bestTotalCost  = std::numeric_limits<cost_t>::max() / 2;
537
538
0
  int ctxBinSampleRatio = MAX_TU_LEVEL_CTX_CODED_BIN_CONSTRAINT;
539
0
  int remRegBins = ( tu.getTbAreaAfterCoefZeroOut( compID ) * ctxBinSampleRatio ) >> 4;
540
0
  uint32_t  goRiceParam   = 0;
541
542
#if ENABLE_TRACING
543
  bool  bFirstNZSeen = false;
544
  DTRACE( g_trace_ctx, D_RDOQ, "%d: %3d, %3d, %dx%d, comp=%d\n", DTRACE_GET_COUNTER( g_trace_ctx, D_RDOQ ), rect.x, rect.y, rect.width, rect.height, compID );
545
#endif
546
547
0
  uiAbsSum = 0;
548
549
0
  const int iCGSize   = 1 << log2CGSize;
550
0
  const int iCGSizeM1 = iCGSize - 1;
551
552
0
  const uint32_t lfnstIdx = tu.cu->lfnstIdx;
553
0
  const int iCGNum   = lfnstIdx > 0 ? 1 : std::min<int>(JVET_C0024_ZERO_OUT_TH, uiWidth) * std::min<int>(JVET_C0024_ZERO_OUT_TH, uiHeight) >> cctx.log2CGSize();
554
0
  int       iScanPos = ( iCGNum << log2CGSize ) - 1;
555
556
0
  if( lfnstIdx > 0 && ( ( uiWidth == 4 && uiHeight == 4 ) || ( uiWidth == 8 && uiHeight == 8 ) ) )
557
0
  {
558
0
    iScanPos = 7;
559
0
  }
560
561
  // Find first non-zero coeff
562
0
  for( ; iScanPos > 0; iScanPos-- )
563
0
  {
564
0
    uint32_t uiBlkPos = cctx.blockPos( iScanPos );
565
0
    if( plSrcCoeff[uiBlkPos] )
566
0
      break;
567
0
  }
568
569
  //////////////////////////////////////////////////////////////////////////
570
  //  Loop over sub-sets (coefficient groups)
571
  //////////////////////////////////////////////////////////////////////////
572
  
573
0
  TCoeff thres = 0, useThres = 0;
574
  
575
0
  if( iQBits )
576
0
    thres = TCoeff( ( int64_t( m_thrVal ) << ( iQBits - 1 ) ) );
577
0
  else
578
0
    thres = TCoeff( ( int64_t( m_thrVal >> 1 ) << iQBits ) );
579
580
0
  if( !bUseScalingList )
581
0
  {
582
0
    useThres = thres / ( defaultQuantScale << 2 );
583
0
  }
584
585
0
  const bool scanFirstBlk = !bUseScalingList && log2CGSize == 4 && cctx.log2CGWidth() == 2;
586
0
#if ENABLE_SIMD_OPT_QUANT && defined( TARGET_SIMD_X86 )
587
0
  const bool isSimd       = read_x86_extension_flags() > x86_simd::SCALAR;
588
0
#endif
589
590
0
  int subSetId = iScanPos >> log2CGSize;
591
0
  for( ; subSetId >= 0; subSetId-- )
592
0
  {
593
0
    int    iNZbeforePos0  = 0;
594
0
    int    uiAbsSumCG     = 0;
595
0
    cost_t iCodedCostCG   = 0;
596
0
    cost_t iUncodedCostCG = 0;
597
598
0
    int iScanPosinCG = iScanPos & ( iCGSize - 1 );
599
0
    if( iLastScanPos < 0 )
600
0
    {
601
0
#if ENABLE_SIMD_OPT_QUANT && defined( TARGET_SIMD_X86 )
602
      // if more than one 4x4 coding subblock is available, use SIMD to find first subblock with coefficient larger than threshold
603
0
      if( scanFirstBlk && iScanPos >= 16 && isSimd )
604
0
      {
605
        // move the pointer to the beginning of the current subblock
606
0
        const int firstTestPos  = iScanPos - iScanPosinCG;
607
0
        uint32_t  uiBlkPos      = cctx.blockPos( firstTestPos );
608
609
0
        const __m128i xdfTh = _mm_set1_epi32( useThres );
610
611
        // read first line of the subblock and check for coefficients larger than the threshold
612
        // assumming the subblocks are dense 4x4 blocks in raster scan order with the stride of tuPars.m_width
613
0
        __m128i xl0 = _mm_abs_epi32( _mm_loadu_si128( ( const __m128i* ) &plSrcCoeff[uiBlkPos] ) );
614
0
        __m128i xdf = _mm_cmpgt_epi32( xl0, xdfTh );
615
616
        // same for the next line in the subblock
617
0
        uiBlkPos += uiWidth;
618
0
        xl0 = _mm_abs_epi32( _mm_loadu_si128( ( const __m128i* ) &plSrcCoeff[uiBlkPos] ) );
619
0
        xdf = _mm_or_si128( xdf, _mm_cmpgt_epi32( xl0, xdfTh ) );
620
621
        // and the third line
622
0
        uiBlkPos += uiWidth;
623
0
        xl0 = _mm_abs_epi32( _mm_loadu_si128( ( const __m128i* ) &plSrcCoeff[uiBlkPos] ) );
624
0
        xdf = _mm_or_si128( xdf, _mm_cmpgt_epi32( xl0, xdfTh ) );
625
626
        // and the last line
627
0
        uiBlkPos += uiWidth;
628
0
        xl0 = _mm_abs_epi32( _mm_loadu_si128( ( const __m128i* ) &plSrcCoeff[uiBlkPos] ) );
629
0
        xdf = _mm_or_si128( xdf, _mm_cmpgt_epi32( xl0, xdfTh ) );
630
631
0
        if( _mm_testz_si128( xdf, xdf ) )
632
0
        {
633
0
          iScanPos    -= iScanPosinCG + 1;
634
0
          iScanPosinCG = -1;
635
0
          continue;
636
0
        }
637
0
      }
638
0
      else
639
0
#endif
640
0
      if( scanFirstBlk && iScanPos >= 16 )
641
0
      {
642
0
        bool allSmaller = true;
643
644
0
        for( int xScanPosinCG = iScanPosinCG, xScanPos = iScanPos; allSmaller && xScanPosinCG >= 0; xScanPosinCG--, xScanPos-- )
645
0
        {
646
0
          const uint32_t uiBlkPos = cctx.blockPos( xScanPos );
647
0
          allSmaller &= std::abs( plSrcCoeff[uiBlkPos] ) <= useThres;
648
0
        }
649
650
0
        if( allSmaller )
651
0
        {
652
0
          iScanPos    -= iScanPosinCG + 1;
653
0
          iScanPosinCG = -1;
654
0
          continue;
655
0
        }
656
0
      }
657
658
0
    findlast2:
659
      // Fast loop to find last-pos.
660
      // No need to add distortion to cost as it would be added to both the coded and uncoded cost
661
0
      for( ; iScanPosinCG >= 0; iScanPosinCG--, iScanPos-- )
662
0
      {
663
0
        const uint32_t uiBlkPos = cctx.blockPos( iScanPos );
664
665
        //===== quantization =====
666
0
        int quantScale;
667
0
        if( bUseScalingList ){ quantScale = quantScaleList[uiBlkPos]; }
668
0
        else{                  quantScale = defaultQuantScale; }
669
        
670
0
        const uint32_t uiMaxAbsLevel = ( std::abs( plSrcCoeff[uiBlkPos] ) * quantScale + iQOffset ) >> iQBits;
671
672
0
        if( uiMaxAbsLevel )
673
0
        {
674
0
          iLastScanPos = iScanPos;
675
0
          lastSubSetId = subSetId;
676
0
          break;
677
0
        }
678
#if ENABLE_TRACING
679
        if( bFirstNZSeen )
680
        {
681
          DTRACE( g_trace_ctx, D_RDOQ, "%d [%d][%d][%2d:%2d][%2d:%2d]", DTRACE_GET_COUNTER( g_trace_ctx, D_RDOQ ), iScanPos, cctx.blockPos( iScanPos ), cctx.cgPosX(), cctx.cgPosY(), cctx.posX( iScanPos ), cctx.posY( iScanPos ) );
682
          DTRACE( g_trace_ctx, D_RDOQ, " remRegBins=%d \n", remRegBins );
683
          DTRACE( g_trace_ctx, D_RDOQ, " Lev=%d \n", 0 );
684
        }
685
#endif
686
0
      }
687
0
    }
688
689
    //////////////////////////////////////////////////////////////////////////
690
    //  Loop over coefficients
691
    //////////////////////////////////////////////////////////////////////////
692
693
0
    cctx.initSubblock( subSetId );
694
695
0
    const int remRegBinsStartCG = remRegBins;
696
697
0
    for( ; iScanPosinCG >= 0; iScanPosinCG--, iScanPos-- )
698
0
    {
699
0
      const uint32_t uiBlkPos = cctx.blockPos( iScanPos );
700
0
      int quantScale;
701
0
      int iErrScale;
702
      //===== quantization =====
703
0
      if( bUseScalingList ){
704
0
        quantScale = quantScaleList[uiBlkPos];
705
0
        iErrScale  = piErrScale[uiBlkPos];
706
0
      }
707
0
      else{
708
0
        quantScale = defaultQuantScale;
709
0
        iErrScale  = defaultErrScale;
710
0
      }
711
0
      const int iScaledLevel = std::abs( plSrcCoeff[uiBlkPos] ) * quantScale;
712
0
      const int iAbsLevel    = ( iScaledLevel + iQOffset ) >> iQBits;
713
714
      //============ Set context models ===============
715
0
      unsigned ctxIdSig = 0;
716
717
0
      if( iScanPos != iLastScanPos )
718
0
      {
719
0
        ctxIdSig = cctx.sigCtxIdAbsWithAcc( iScanPos, 0 );
720
0
      }
721
0
      uint8_t     ctxOffset     = cctx.ctxOffsetAbs();
722
0
      uint32_t    uiParCtx      = cctx.parityCtxIdAbs   ( ctxOffset );
723
0
      uint32_t    uiGt1Ctx      = cctx.greater1CtxIdAbs ( ctxOffset );
724
0
      uint32_t    uiGt2Ctx      = cctx.greater2CtxIdAbs ( ctxOffset );
725
0
      uint32_t    goRiceZero    = 0;
726
727
0
      const BinFracBits& fracBitsPar = fracBits.getFracBitsArray( uiParCtx );
728
0
      const BinFracBits& fracBitsGt1 = fracBits.getFracBitsArray( uiGt1Ctx );
729
0
      const BinFracBits& fracBitsGt2 = fracBits.getFracBitsArray( uiGt2Ctx );
730
731
0
      if( remRegBins < 4 )
732
0
      {
733
0
        unsigned  sumAbs = cctx.templateAbsSum( iScanPos, piDstCoeff, 0 );
734
0
        goRiceParam      = g_auiGoRiceParsCoeff   [ sumAbs ];
735
0
        goRiceZero       = g_auiGoRicePosCoeff0(0, goRiceParam);
736
0
      }
737
738
#if ENABLE_TRACING
739
      DTRACE( g_trace_ctx, D_RDOQ, "%d [%d][%d][%2d:%2d][%2d:%2d]", DTRACE_GET_COUNTER( g_trace_ctx, D_RDOQ ), iScanPos, cctx.blockPos( iScanPos ), cctx.cgPosX(), cctx.cgPosY(), cctx.posX( iScanPos ), cctx.posY( iScanPos ) );
740
      DTRACE( g_trace_ctx, D_RDOQ, " remRegBins=%d \n", remRegBins );
741
      bFirstNZSeen = true;
742
#endif
743
744
      // Cost for zero coeff
745
0
      piCostCoeff0[iScanPosinCG] = _dist( iScaledLevel, iErrScale, iErrScaleShift );
746
747
0
      uint32_t uiLevel = 0;
748
0
      if( iAbsLevel == 0 )
749
0
      {
750
        // ----------------- ABS LEVEL 0 ----------------
751
0
        const BinFracBits fracBitsSig = fracBits.getFracBitsArray( ctxIdSig );
752
0
        piCostSig  [iScanPosinCG] = xiGetCostSigCoef( fracBitsSig, 0 );
753
0
        piCostCoeff[iScanPosinCG] = piCostCoeff0[iScanPosinCG] + piCostSig[iScanPosinCG];
754
755
0
        if( bSBH )
756
0
        {
757
0
          cost_t iErr1        = iScaledLevel - ( (int64_t)1 << iQBits );
758
0
          cost_t iDist1       = _dist( iErr1, iErrScale, iErrScaleShift );
759
0
          cost_t iRate1       = remRegBins < 4 ? 
760
0
                                 xiGetICRateCost( 1, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, maxLog2TrDynamicRange ) -
761
0
                                 xiGetICRateCost( 0, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, maxLog2TrDynamicRange ):
762
0
                                 fracBitsGt1.intBits[ 0 ];
763
764
0
          cost_t iCost1       = iDist1 + iRate1 + xiGetCostSigCoef( fracBitsSig, 1 );
765
766
0
          piCostDeltaSBH[iScanPosinCG] = iCost1 - piCostCoeff[iScanPosinCG];
767
0
          piAddSBH      [iScanPosinCG] = 1;
768
0
        }
769
0
        DTRACE( g_trace_ctx, D_RDOQ, " Lev=%d \n", 0 );
770
0
      }
771
0
      else
772
0
      {
773
        //===== coefficient level estimation =====
774
0
        const int iFloor = (int)( iScaledLevel >> iQBits );
775
0
        const int iCeil  = iFloor + 1;
776
777
0
        if( remRegBins >= 4 && iScanPos != iLastScanPos && iCeil >= 4 )
778
0
        {
779
0
          int  sumAll = cctx.templateAbsSum( iScanPos, piDstCoeff, 4 );
780
0
          goRiceParam = g_auiGoRiceParsCoeff[ sumAll ];
781
0
        }
782
783
0
        if( iScanPos == iLastScanPos )
784
0
        {
785
          // =======================             =======================
786
          // ======================= LAST LEVEL  =======================
787
          // =======================             =======================
788
0
          piCostSig[ iScanPosinCG ] = 0;
789
          // Floor = 0, Uncoded
790
0
          cost_t iCurrCostF = piCostCoeff0[ iScanPosinCG ];
791
792
0
          if( iFloor )
793
0
          {
794
            // ----------------- LEVEL > 0  ----------------
795
0
            cost_t iErrF       = iScaledLevel - (iFloor << iQBits);
796
0
            cost_t iDistF      = _dist( iErrF, iErrScale, iErrScaleShift ); //(iErrF*iErrScale) >> iErrScaleShift;
797
0
            iCurrCostF         = iDistF + xiGetICRateCost( iFloor, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, maxLog2TrDynamicRange );
798
0
          }
799
800
          // ----------------- LEVEL + 1 ----------------
801
0
          cost_t iErrC         = iScaledLevel - (iCeil << iQBits);
802
0
          cost_t iDistC        = _dist( iErrC, iErrScale, iErrScaleShift ); //(iErrC*iErrScale) >> iErrScaleShift;
803
0
          cost_t iCurrCostC    = iDistC + xiGetICRateCost( iCeil, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, maxLog2TrDynamicRange );
804
805
0
          if( iCurrCostC < iCurrCostF )
806
0
          {
807
0
            uiLevel                   = iCeil;
808
0
            piCostCoeff[iScanPosinCG] = iCurrCostC;
809
0
            if( bSBH ){
810
0
              piCostDeltaSBH[iScanPosinCG] = iCurrCostF - iCurrCostC;
811
0
              piAddSBH      [iScanPosinCG] = -1;
812
0
            }
813
0
          }
814
0
          else
815
0
          {
816
0
            if( iFloor == 0 )
817
0
            {
818
0
              DTRACE( g_trace_ctx, D_RDOQ, " Lev=%d \n", 0 );
819
0
              DTRACE( g_trace_ctx, D_RDOQ, " CostC0=%lld\n", (int64_t)piCostCoeff0[iScanPosinCG] );
820
0
              DTRACE( g_trace_ctx, D_RDOQ, " CostC =%lld\n", (int64_t)iCurrCostC        );
821
822
0
              iLastScanPos = -1;
823
0
              lastSubSetId = -1;
824
0
              iScanPos--;
825
0
              iScanPosinCG--;
826
0
              goto findlast2;
827
0
            }
828
0
            uiLevel = iFloor;
829
0
            piCostCoeff[iScanPosinCG] = iCurrCostF;
830
0
            if( bSBH ){
831
0
              piCostDeltaSBH[iScanPosinCG] = iCurrCostC - iCurrCostF;
832
0
              piAddSBH      [iScanPosinCG] = 1;
833
0
            }
834
0
          }
835
0
        }
836
0
        else
837
0
        {
838
0
          const BinFracBits& fracBitsSig = fracBits.getFracBitsArray( ctxIdSig );
839
0
          cost_t iCostSig1 = xiGetCostSigCoef( fracBitsSig, 1 );
840
0
          if( iCeil < 3 )
841
0
          {
842
            // =======================                 =======================
843
            // ======================= LEVELS 0, 1, 2  =======================
844
            // =======================                 =======================
845
            
846
            // ----------------- BEST LEVEL = 0 ----------------
847
0
            cost_t iCostSig0    = xiGetCostSigCoef( fracBitsSig, 0 );
848
0
            cost_t iBestCost    = piCostCoeff0[iScanPosinCG] + iCostSig0;
849
0
            cost_t iBestCostSig = iCostSig0;
850
0
            cost_t iCostF       = iBestCost;
851
0
            uiLevel = 0;
852
853
0
            if( iFloor == 1 )
854
0
            {
855
              // ----------------- LEVEL = 1 ----------------
856
0
              cost_t iErrF      = iScaledLevel - ( iFloor << iQBits );
857
0
              cost_t iDistF     = _dist( iErrF, iErrScale, iErrScaleShift ); //( iErrF*iErrScale ) >> iErrScaleShift;
858
0
              iCostF            = iDistF + iCostSig1 + xiGetICRateCost( iFloor, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, maxLog2TrDynamicRange );
859
860
0
              if( iCostF < iBestCost )
861
0
              {
862
0
                uiLevel      = iFloor;
863
0
                iBestCost    = iCostF;
864
0
                iBestCostSig = iCostSig1;
865
0
                if( bSBH )
866
0
                {
867
0
                  piCostDeltaSBH[iScanPosinCG] = iBestCost - iCostF;
868
0
                  piAddSBH      [iScanPosinCG] = -1;
869
0
                }
870
0
              }
871
0
              else
872
0
              {
873
0
                if( bSBH )
874
0
                {
875
0
                  piCostDeltaSBH[iScanPosinCG] = iCostF - iBestCost;
876
0
                  piAddSBH      [iScanPosinCG] = 1;
877
0
                }
878
0
              }
879
0
            }
880
881
            // ----------------- LEVELS = 1, 2 ----------------
882
0
            cost_t iErrC         = iScaledLevel - ( iCeil << iQBits );
883
0
            cost_t iDistC        = _dist( iErrC, iErrScale, iErrScaleShift ); //( iErrC*iErrScale ) >> iErrScaleShift;
884
0
            cost_t iCostC        = iDistC + iCostSig1 + xiGetICRateCost( iCeil, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, maxLog2TrDynamicRange );
885
886
0
            if( iCostC < iBestCost )
887
0
            {
888
0
              uiLevel                   = iCeil;
889
0
              piCostCoeff[iScanPosinCG] = iCostC;
890
0
              piCostSig[iScanPosinCG]   = iCostSig1;
891
0
              if( bSBH )
892
0
              {
893
0
                piCostDeltaSBH[iScanPosinCG] = iCostF - iCostC;
894
0
                piAddSBH[iScanPosinCG]       = -1;
895
0
              }
896
0
            }
897
0
            else
898
0
            {
899
0
              piCostCoeff[iScanPosinCG] = iBestCost;
900
0
              piCostSig[iScanPosinCG] = iBestCostSig;
901
0
              if( bSBH )
902
0
              {
903
0
                piCostDeltaSBH[iScanPosinCG] = iCostC - iCostF;
904
0
                piAddSBH      [iScanPosinCG] = 1;
905
0
              }
906
0
            }
907
0
          }
908
0
          else
909
0
          {
910
            // ----------------- LEVEL X, X+1 ----------------
911
0
            cost_t iErrF        = iScaledLevel - (iFloor << iQBits);
912
0
            cost_t iDistF       = _dist( iErrF, iErrScale, iErrScaleShift ); //(iErrF*iErrScale) >> iErrScaleShift;
913
0
            cost_t iCostF       = iDistF + iCostSig1 + xiGetICRateCost( iFloor, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, maxLog2TrDynamicRange );
914
915
0
            cost_t iErrC        = iScaledLevel - ( iCeil << iQBits );
916
0
            cost_t iDistC       = _dist( iErrC, iErrScale, iErrScaleShift ); //( iErrC*iErrScale ) >> iErrScaleShift;
917
0
            cost_t iCostC       = iDistC + iCostSig1 + xiGetICRateCost( iCeil, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, maxLog2TrDynamicRange );
918
919
0
            piCostSig[iScanPosinCG] = iCostSig1;
920
0
            if( iCostC < iCostF )
921
0
            {
922
0
              uiLevel = iCeil;
923
0
              piCostCoeff[iScanPosinCG] = iCostC;
924
0
              if( bSBH )
925
0
              {
926
0
                piCostDeltaSBH[iScanPosinCG] = iCostF - iCostC;
927
0
                piAddSBH[iScanPosinCG]       = -1;
928
0
              }
929
0
            }
930
0
            else
931
0
            {
932
0
              uiLevel = iFloor;
933
0
              piCostCoeff[iScanPosinCG] = iCostF;
934
0
              if( bSBH )
935
0
              {
936
0
                piCostDeltaSBH[iScanPosinCG] = iCostC - iCostF;
937
0
                piAddSBH[iScanPosinCG] = 1;
938
0
              }
939
0
            }
940
0
          }
941
0
        }
942
0
        piDstCoeff[uiBlkPos] = uiLevel;
943
0
        DTRACE( g_trace_ctx, D_RDOQ, " Lev=%d \n", uiLevel );
944
0
        DTRACE( g_trace_ctx, D_RDOQ, " CostC0=%lld\n", (int64_t)piCostCoeff0[iScanPosinCG] );
945
0
        DTRACE( g_trace_ctx, D_RDOQ, " CostC =%lld\n", (int64_t)piCostCoeff [iScanPosinCG] );
946
0
        if( uiLevel )
947
0
        {
948
0
          uiAbsSumCG    += uiLevel;
949
0
          iNZbeforePos0 += iScanPosinCG; // hack-> just add instead of checking iScanPosinCG >0 and increment
950
0
          cctx.absVal1stPass( iScanPos, std::min<TCoeff>( 4 + ( uiLevel & 1 ), uiLevel ) );
951
0
          cctx.setSigGroup();
952
0
        }
953
0
      }
954
955
956
0
      if( ( (iScanPos & iCGSizeM1) == 0 ) && ( iScanPos > 0 ) )
957
0
      {
958
0
        goRiceParam   = 0;
959
0
      }
960
0
      else if( remRegBins >= 4 )
961
0
      {
962
0
        remRegBins -= (uiLevel < 2 ? uiLevel : 3) + (iScanPos != iLastScanPos);
963
0
      }
964
965
0
      iUncodedCostCG += piCostCoeff0[iScanPosinCG];
966
0
      iCodedCostCG   += piCostCoeff[iScanPosinCG];
967
0
      DTRACE( g_trace_ctx, D_RDOQ_MORE, "Uncoded=%lld\n", (long long)( iUncodedCostBlock + iUncodedCostCG ) );
968
0
      DTRACE( g_trace_ctx, D_RDOQ_MORE, "Coded  =%lld\n", (long long)( iCodedCostBlock   + iCodedCostCG   ) );
969
0
    } // for (iScanPosinCG)
970
971
    //================== Group sig. flag ===================
972
0
    cost_t iCostCoeffGroupSig = 0;
973
0
    if( lastSubSetId >= 0 )
974
0
    {
975
0
      if( subSetId )
976
0
      {
977
0
        const BinFracBits fracBitsSigGroup = fracBits.getFracBitsArray( cctx.sigGroupCtxId() );
978
0
        cost_t iCostCoeffGroupSig0 = xiGetCostSigCoeffGroup( fracBitsSigGroup, 0 );
979
980
        // if no coeff in CG
981
0
        if( !cctx.isSigGroup() )
982
0
        {
983
0
          iCodedCostCG = iUncodedCostCG + iCostCoeffGroupSig0;
984
0
          iCostCoeffGroupSig  = iCostCoeffGroupSig0;
985
0
        }
986
0
        else
987
0
        {
988
          // if not topleft CG
989
0
          if( subSetId < lastSubSetId )
990
0
          {
991
0
            cost_t iCostCoeffGroupSig1 = xiGetCostSigCoeffGroup( fracBitsSigGroup, 1 );
992
0
            iCostCoeffGroupSig = iCostCoeffGroupSig1;
993
994
            // if only one coeff in CG
995
0
            if( !iNZbeforePos0 ) {
996
0
              iCodedCostCG -= piCostSig[0];
997
0
            }
998
0
            cost_t iUncodedCostCGTmp = iUncodedCostCG + iCostCoeffGroupSig0;
999
0
            iCodedCostCG += iCostCoeffGroupSig1;
1000
1001
            // if we can save cost, change this block to all-zero block
1002
0
            if( iUncodedCostCGTmp < iCodedCostCG )
1003
0
            {
1004
0
              cctx.resetSigGroup();
1005
0
              iCodedCostCG = iUncodedCostCGTmp;
1006
0
              iCostCoeffGroupSig = iCostCoeffGroupSig0;
1007
0
              remRegBins = remRegBinsStartCG;
1008
1009
              // reset coeffs to 0 in this block
1010
0
              for( iScanPosinCG = iCGSize - 1; iScanPosinCG >= 0; iScanPosinCG-- )
1011
0
              {
1012
0
                int iScanPosTmp = subSetId * iCGSize + iScanPosinCG;
1013
0
                uint32_t uiBlkPos = cctx.blockPos( iScanPosTmp );
1014
0
                if( piDstCoeff[uiBlkPos] )
1015
0
                {
1016
0
                  int absLevel = std::abs( piDstCoeff[uiBlkPos] );
1017
0
                  cctx.remAbsVal1stPass( iScanPosTmp, std::min( absLevel, 4 + ( absLevel & 1 ) ) );
1018
0
                  piDstCoeff[uiBlkPos] = 0;
1019
0
                }
1020
0
              }
1021
0
              uiAbsSumCG = 0;
1022
0
              if( lastSubSetId == subSetId ) {
1023
0
                iCodedCostCG   = 0;
1024
0
                iUncodedCostCG = 0;
1025
0
                iLastScanPos   = -1;
1026
0
                lastSubSetId   = -1;
1027
0
              }
1028
0
            }
1029
0
          }
1030
0
          else
1031
0
          {
1032
0
            cctx.setSigGroup();
1033
0
          }
1034
0
        }
1035
0
      }
1036
0
    }
1037
1038
    //===== estimate last position cost =====
1039
0
    bestTotalCost += iCodedCostCG;
1040
0
    if( !lastOptFinished )
1041
0
    {
1042
0
      if( cctx.isSigGroup( subSetId ) )
1043
0
      {
1044
0
        cost_t codedCostBlockTmp = iUncodedCostBlock + iCodedCostCG - iCostCoeffGroupSig;
1045
0
        int startPosInCG  = subSetId == lastSubSetId ? iLastScanPos % iCGSize: iCGSizeM1;
1046
0
        int newAbsSumCG   = uiAbsSumCG;
1047
0
        int bestLastIdxP1 = iLastScanPos + 1;
1048
0
        for( int iScanPosinCGTmp = startPosInCG; iScanPosinCGTmp >= 0; iScanPosinCGTmp-- )
1049
0
        {
1050
0
          uint32_t iScanPosTmp = ( subSetId << log2CGSize ) + iScanPosinCGTmp;
1051
0
          uint32_t uiBlkPos    = cctx.blockPos( iScanPosTmp );
1052
1053
0
          if( piDstCoeff[uiBlkPos] )
1054
0
          {
1055
0
            uint32_t  uiPosY = uiBlkPos >> uiLog2BlockWidth;
1056
0
            uint32_t  uiPosX = uiBlkPos - (uiPosY << uiLog2BlockWidth);
1057
0
            const cost_t iCostLast = xiGetCostLast( uiPosX, uiPosY, chType );
1058
0
            const cost_t totalCost = codedCostBlockTmp + iCostLast - piCostSig[iScanPosinCGTmp];
1059
1060
0
            if( totalCost < bestTotalCost )
1061
0
            {
1062
0
              bestLastIdxP1 = iScanPosTmp + 1;
1063
0
              bestTotalCost = totalCost;
1064
0
              lastSubSetId  = subSetId;
1065
0
              uiAbsSumCG    = newAbsSumCG;
1066
0
              uiAbsSum      = 0;
1067
0
            }
1068
1069
0
            if( piDstCoeff[uiBlkPos] > 1 )
1070
0
            {
1071
0
              lastOptFinished = true;
1072
0
              break;
1073
0
            }
1074
0
            newAbsSumCG -= 1;
1075
0
            codedCostBlockTmp -= piCostCoeff [ iScanPosinCGTmp ];
1076
0
            codedCostBlockTmp += piCostCoeff0[ iScanPosinCGTmp ];
1077
0
          }
1078
0
          else
1079
0
          {
1080
0
            codedCostBlockTmp -= piCostSig[ iScanPosinCGTmp ];
1081
0
          }
1082
0
        } //end for
1083
0
        for( int iScanPosTmp = bestLastIdxP1; iScanPosTmp <= iLastScanPos; iScanPosTmp++ )
1084
0
        {
1085
0
          const int uiBlkPos = cctx.blockPos( iScanPosTmp );
1086
0
          if( piDstCoeff[uiBlkPos] )
1087
0
          {
1088
0
            int absLevel = std::abs( piDstCoeff[uiBlkPos] );
1089
0
            cctx.remAbsVal1stPass( iScanPosTmp, std::min( absLevel, 4 + ( absLevel & 1 ) ) );
1090
0
            piDstCoeff[uiBlkPos] = 0;
1091
0
          }
1092
0
        }
1093
0
        iLastScanPos = bestLastIdxP1 - 1;
1094
0
      }
1095
0
    }
1096
1097
    //=============== estimate Sign Bit Hiding ================
1098
0
    if( bSBH )
1099
0
    {
1100
0
      if( uiAbsSumCG >= 2 /*&& cctx.isSigGroup()*/ )
1101
0
      {
1102
0
        int iSubPos         = subSetId*iCGSize;
1103
0
        int iLastNZPosInCG  = -1;
1104
0
        int iFirstNZPosInCG = iCGSize;
1105
1106
0
        for( int n = 0; n <iCGSize; n++ ) {
1107
0
          if( piDstCoeff[ cctx.blockPos( n + iSubPos ) ] ) {
1108
0
            iFirstNZPosInCG = n;
1109
0
            break;
1110
0
          }
1111
0
        }
1112
0
        if( lastSubSetId == subSetId ){
1113
0
          iLastNZPosInCG = ( iLastScanPos )%iCGSize;
1114
0
          if( piDstCoeff[ cctx.blockPos( iLastScanPos ) ] == 1 && ( piAddSBH[iLastNZPosInCG] == -1 ) )
1115
0
          {
1116
0
            piCostDeltaSBH[iLastNZPosInCG] -= (4<<SCALE_BITS);
1117
0
          }
1118
0
        }
1119
0
        else{
1120
0
          for( int n = iCGSize - 1; n >= 0; n-- ) {
1121
0
            if( piDstCoeff[ cctx.blockPos( n + iSubPos ) ] ) {
1122
0
              iLastNZPosInCG = n;
1123
0
              break;
1124
0
            }
1125
0
          }
1126
0
        }
1127
0
        if( iLastNZPosInCG - iFirstNZPosInCG >= SBH_THRESHOLD )
1128
0
        {
1129
0
          iCodedCostCG -= xiGetICost( (int)xGetIEPRate() ); //subtract cost for one sign bin
1130
0
          bool bSign    = plSrcCoeff[ cctx.blockPos( iSubPos + iFirstNZPosInCG) ] < 0;
1131
1132
0
          if( bSign != ( uiAbsSumCG & 0x1 ) ) {
1133
0
            int iLastPosInCG    = ( lastSubSetId == subSetId ) ? iLastNZPosInCG : iCGSize - 1;
1134
0
            int64_t iMinCostDelta = std::numeric_limits<int64_t>::max();
1135
0
            int iMinCostPos     = -1;
1136
1137
0
            if( piDstCoeff[ cctx.blockPos( iFirstNZPosInCG + iSubPos ) ] >1 ){
1138
0
              iMinCostDelta = piCostDeltaSBH[iFirstNZPosInCG];
1139
0
              iMinCostPos   = iFirstNZPosInCG;
1140
0
            }
1141
1142
0
            for( int n = 0; n<iFirstNZPosInCG; n++ ){
1143
0
              if( ( plSrcCoeff[ cctx.blockPos( iSubPos + n ) ] < 0 ) == bSign ){
1144
0
                if( piCostDeltaSBH[n] < iMinCostDelta ){
1145
0
                  iMinCostDelta = piCostDeltaSBH[n];
1146
0
                  iMinCostPos   = n;
1147
0
                }
1148
0
              }
1149
0
            }
1150
1151
0
            for( int n = iFirstNZPosInCG + 1; n <= iLastPosInCG; n++ ){
1152
0
              if( piCostDeltaSBH[n] < iMinCostDelta ){
1153
0
                iMinCostDelta = piCostDeltaSBH[n];
1154
0
                iMinCostPos   = n;
1155
0
              }
1156
0
            }
1157
0
            const int oldAbsVal = std::abs( piDstCoeff[cctx.blockPos( iMinCostPos + iSubPos )] );
1158
0
            if( oldAbsVal ) cctx.remAbsVal1stPass( iMinCostPos + iSubPos, std::min( oldAbsVal, 4 + ( oldAbsVal & 1 ) ) );
1159
0
            piDstCoeff[ cctx.blockPos( iMinCostPos + iSubPos ) ] += piAddSBH[iMinCostPos];
1160
0
            const int absVal = std::abs( piDstCoeff[cctx.blockPos( iMinCostPos + iSubPos )] );
1161
0
            if( absVal ) cctx.absVal1stPass( iMinCostPos + iSubPos, std::min( absVal, 4 + ( absVal & 1 ) ) );
1162
0
            uiAbsSumCG   += piAddSBH[iMinCostPos];
1163
0
            iCodedCostCG += iMinCostDelta;
1164
0
          }
1165
0
        }
1166
0
      }
1167
0
    }
1168
1169
0
    iCodedCostBlock   += iCodedCostCG;
1170
0
    iUncodedCostBlock += iUncodedCostCG;
1171
0
    uiAbsSum += uiAbsSumCG;
1172
0
    DTRACE( g_trace_ctx, D_RDOQ_COST, "%d: [%2d:%2d]\n", DTRACE_GET_COUNTER( g_trace_ctx, D_RDOQ_COST ), cctx.cgPosX(), cctx.cgPosY() );
1173
0
    DTRACE( g_trace_ctx, D_RDOQ_MORE, "Uncoded=%lld\n", (long long)( iUncodedCostBlock ) );
1174
0
    DTRACE( g_trace_ctx, D_RDOQ_MORE, "Coded  =%lld\n", (long long)( iCodedCostBlock ) );
1175
0
  } //end for (iCGScanPos)
1176
1177
0
  iCodedCostBlock = bestTotalCost;
1178
1179
0
  if( iLastScanPos < 0 )
1180
0
  {
1181
0
    CHECK( uiAbsSum != 0, "Illegal" );
1182
0
    return 0;
1183
0
  }
1184
1185
0
  if( !CU::isIntra( *tu.cu ) && isLuma( compID ) )
1186
0
  {
1187
0
    const BinFracBits fracBitsQtRootCbf = fracBits.getFracBitsArray( Ctx::QtRootCbf() );
1188
0
    iUncodedCostBlock += xiGetICost( fracBitsQtRootCbf.intBits[0] );
1189
0
    iCodedCostBlock   += xiGetICost( fracBitsQtRootCbf.intBits[1] );
1190
0
  }
1191
0
  else
1192
0
  {
1193
0
    bool previousCbf       = tu.cbf[COMP_Cb];
1194
0
    bool lastCbfIsInferred = false;
1195
0
    const bool useIntraSubPartitions = tu.cu->ispMode && isLuma(compID);
1196
0
    if( useIntraSubPartitions )
1197
0
    {
1198
0
      bool rootCbfSoFar       = false;
1199
0
      bool isLastSubPartition = CU::isISPLast(*tu.cu, tu.Y(), compID);
1200
0
      uint32_t nTus = tu.cu->ispMode == HOR_INTRA_SUBPARTITIONS ? tu.cu->lheight() >> Log2(tu.lheight()) : tu.cu->lwidth() >> Log2(tu.lwidth());
1201
0
      if( isLastSubPartition )
1202
0
      {
1203
0
        TransformUnit* tuPointer = tu.cu->firstTU;
1204
0
        for( int tuIdx = 0; tuIdx < nTus - 1; tuIdx++ )
1205
0
        {
1206
0
          rootCbfSoFar |= TU::getCbfAtDepth(*tuPointer, COMP_Y, tu.depth);
1207
0
          tuPointer     = tuPointer->next;
1208
0
        }
1209
0
        if( !rootCbfSoFar )
1210
0
        {
1211
0
          lastCbfIsInferred = true;
1212
0
        }
1213
0
      }
1214
0
      if( !lastCbfIsInferred )
1215
0
      {
1216
0
        previousCbf = TU::getPrevTuCbfAtDepth(tu, compID, tu.depth);
1217
0
      }
1218
0
    }
1219
0
    BinFracBits fracBitsQtCbf = fracBits.getFracBitsArray( Ctx::QtCbf[compID]( DeriveCtx::CtxQtCbf( rect.compID, previousCbf, useIntraSubPartitions ) ) );
1220
1221
0
    if( !lastCbfIsInferred )
1222
0
    {
1223
0
      iUncodedCostBlock += xiGetICost(fracBitsQtCbf.intBits[0]);
1224
0
      iCodedCostBlock   += xiGetICost(fracBitsQtCbf.intBits[1]);
1225
0
    }
1226
0
  }
1227
1228
0
  if( iUncodedCostBlock <= iCodedCostBlock )
1229
0
  {
1230
0
    iCodedCostBlock = iUncodedCostBlock;
1231
0
    uiAbsSum = 0;
1232
0
    ::memset( piDstCoeff, 0, uiMaxNumCoeff*sizeof( TCoeffSig ) );
1233
0
  }
1234
0
  else
1235
0
  {
1236
    // Check due to saving of last pos. Sign data hiding can change the position of last coef.
1237
0
    if( bSBH )
1238
0
    {
1239
0
      if( piDstCoeff[cctx.blockPos( iLastScanPos )] == 0 )
1240
0
      {
1241
0
        int scanPos = iLastScanPos - 1;
1242
0
        for( ; scanPos >= 0; scanPos-- )
1243
0
        {
1244
0
          if( piDstCoeff[cctx.blockPos( scanPos )] )
1245
0
            break;
1246
0
        }
1247
0
        iLastScanPos = scanPos;
1248
0
      }
1249
0
    }
1250
1251
0
    for ( int scanPos = 0; scanPos <= iLastScanPos; scanPos++ )
1252
0
    {
1253
0
      int blkPos = cctx.blockPos( scanPos );
1254
0
      TCoeff level = piDstCoeff[ blkPos ];
1255
0
      int iSign = plSrcCoeff[blkPos] >> ( sizeof(TCoeff)*8 - 1 );
1256
0
      piDstCoeff[blkPos] = ( iSign^level ) - iSign;
1257
0
    }
1258
0
    tu.lastPos[compID] = iLastScanPos;
1259
0
  }
1260
1261
#if ENABLE_TRACING
1262
  for ( int scanPos = iCGNum * iCGSize-1; scanPos >= 0; scanPos-- )
1263
  {
1264
    if(( scanPos & iCGSizeM1) == iCGSizeM1 )
1265
    {
1266
      DTRACE(g_trace_ctx, D_RDOQ, "%d:", scanPos >> cctx.log2CGSize() );
1267
    }
1268
    int blkPos = cctx.blockPos( scanPos );
1269
    DTRACE( g_trace_ctx, D_RDOQ, "%3d ", piDstCoeff[blkPos] );
1270
    if( scanPos % iCGSize == 0 )
1271
    {
1272
      DTRACE(g_trace_ctx, D_RDOQ, "\n");
1273
    }
1274
  }
1275
#endif
1276
1277
0
  DTRACE( g_trace_ctx, D_RDOQ_MORE, "Uncoded=%lld\n", (long long)( iUncodedCostBlock ) );
1278
0
  DTRACE( g_trace_ctx, D_RDOQ_MORE, "Coded  =%lld\n", (long long)( iCodedCostBlock ) );
1279
0
  DTRACE( g_trace_ctx, D_RDOQ, "%d: %3d, %3d, %dx%d, comp=%d, lastScanPos=%d, absSum=%d, cost=%lld \n", DTRACE_GET_COUNTER( g_trace_ctx, D_RDOQ ), rect.x, rect.y, rect.width, rect.height, compID, iLastScanPos, uiAbsSum,  (long long)iCodedCostBlock );
1280
0
  return 0;
1281
0
}
Unexecuted instantiation: int vvenc::QuantRDOQ2::xRateDistOptQuantFast<true, true>(vvenc::TransformUnit&, vvenc::ComponentID const&, vvenc::AreaBuf<int const> const&, int&, vvenc::QpParam const&, vvenc::Ctx const&)
Unexecuted instantiation: int vvenc::QuantRDOQ2::xRateDistOptQuantFast<true, false>(vvenc::TransformUnit&, vvenc::ComponentID const&, vvenc::AreaBuf<int const> const&, int&, vvenc::QpParam const&, vvenc::Ctx const&)
Unexecuted instantiation: int vvenc::QuantRDOQ2::xRateDistOptQuantFast<false, true>(vvenc::TransformUnit&, vvenc::ComponentID const&, vvenc::AreaBuf<int const> const&, int&, vvenc::QpParam const&, vvenc::Ctx const&)
Unexecuted instantiation: int vvenc::QuantRDOQ2::xRateDistOptQuantFast<false, false>(vvenc::TransformUnit&, vvenc::ComponentID const&, vvenc::AreaBuf<int const> const&, int&, vvenc::QpParam const&, vvenc::Ctx const&)
1282
1283
int QuantRDOQ2::xRateDistOptQuant( TransformUnit &tu, const ComponentID &compID, const CCoeffBuf &pSrc, TCoeff &uiAbsSum, const QpParam &cQP, const Ctx &ctx, bool bUseScalingList )
1284
0
{
1285
0
  if( tu.cs->slice->signDataHidingEnabled/*m_bSBH*/ )
1286
0
  {
1287
0
    if( bUseScalingList ) return xRateDistOptQuantFast<true, true >( tu, compID, pSrc, uiAbsSum, cQP, ctx );
1288
0
    else                  return xRateDistOptQuantFast<true, false>( tu, compID, pSrc, uiAbsSum, cQP, ctx );
1289
0
  }
1290
0
  else
1291
0
  {
1292
0
    if( bUseScalingList ) return xRateDistOptQuantFast<false, true >( tu, compID, pSrc, uiAbsSum, cQP, ctx );
1293
0
    else                  return xRateDistOptQuantFast<false, false>( tu, compID, pSrc, uiAbsSum, cQP, ctx );
1294
0
  }
1295
1296
0
}
1297
1298
1299
} // namespace vvenc
1300
1301
//! \}