Coverage Report

Created: 2026-06-10 07:00

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/vvenc/source/Lib/CommonLib/DepQuant.cpp
Line
Count
Source
1
/* -----------------------------------------------------------------------------
2
The copyright in this software is being made available under the Clear BSD
3
License, included below. No patent rights, trademark rights and/or 
4
other Intellectual Property Rights other than the copyrights concerning 
5
the Software are granted under this license.
6
7
The Clear BSD License
8
9
Copyright (c) 2019-2026, Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. & The VVenC Authors.
10
All rights reserved.
11
12
Redistribution and use in source and binary forms, with or without modification,
13
are permitted (subject to the limitations in the disclaimer below) provided that
14
the following conditions are met:
15
16
     * Redistributions of source code must retain the above copyright notice,
17
     this list of conditions and the following disclaimer.
18
19
     * Redistributions in binary form must reproduce the above copyright
20
     notice, this list of conditions and the following disclaimer in the
21
     documentation and/or other materials provided with the distribution.
22
23
     * Neither the name of the copyright holder nor the names of its
24
     contributors may be used to endorse or promote products derived from this
25
     software without specific prior written permission.
26
27
NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
28
THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
29
CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
31
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
32
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
33
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
34
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
35
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
36
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38
POSSIBILITY OF SUCH DAMAGE.
39
40
41
------------------------------------------------------------------------------------------- */
42
43
#include "DepQuant.h"
44
#include "TrQuant.h"
45
#include "CodingStructure.h"
46
#include "UnitTools.h"
47
48
#include <bitset>
49
50
//! \ingroup CommonLib
51
//! \{
52
53
namespace vvenc {
54
55
56
namespace DQIntern
57
{
58
  static void findFirstPos( int& firstTestPos, const TCoeff* tCoeff, const DQIntern::TUParameters& tuPars, int defaultTh,
59
                            bool zeroOutForThres, int zeroOutWidth, int zeroOutHeight )
60
1.72M
  {
61
193M
    for( ; firstTestPos >= 0; firstTestPos-- )
62
192M
    {
63
192M
      if( zeroOutForThres && ( tuPars.m_scanId2BlkPos[firstTestPos].x >= zeroOutWidth ||
64
24.4M
                              tuPars.m_scanId2BlkPos[firstTestPos].y >= zeroOutHeight ) )
65
0
      {
66
0
        continue;
67
0
      }
68
192M
      if( abs( tCoeff[tuPars.m_scanId2BlkPos[firstTestPos].idx] ) > defaultTh )
69
718k
      {
70
718k
        break;
71
718k
      }
72
192M
    }
73
1.72M
  }
74
75
  void Rom::xInitScanArrays()
76
17.7k
  {
77
17.7k
    if( m_scansInitialized )
78
0
    {
79
0
      return;
80
0
    }
81
17.7k
    ::memset( m_scanId2NbInfoSbbArray, 0, sizeof(m_scanId2NbInfoSbbArray) );
82
17.7k
    ::memset( m_scanId2NbInfoOutArray, 0, sizeof(m_scanId2NbInfoOutArray) );
83
17.7k
    ::memset( m_tuParameters,          0, sizeof(m_tuParameters) );
84
85
17.7k
    uint32_t raster2id[ MAX_CU_SIZE * MAX_CU_SIZE ];
86
17.7k
    ::memset(raster2id, 0, sizeof(raster2id));
87
88
142k
    for( int hd = 0; hd < MAX_TU_SIZE_IDX; hd++ )
89
124k
    {
90
995k
      for( int vd = 0; vd < MAX_TU_SIZE_IDX; vd++ )
91
871k
      {
92
871k
        if( (hd == 0 && vd <= 1) || (hd <= 1 && vd == 0) )
93
53.3k
        {
94
53.3k
          continue;
95
53.3k
        }
96
817k
        const uint32_t      blockWidth    = (1 << hd);
97
817k
        const uint32_t      blockHeight   = (1 << vd);
98
817k
        const uint32_t      log2CGWidth   = g_log2SbbSize[hd][vd][0];
99
817k
        const uint32_t      log2CGHeight  = g_log2SbbSize[hd][vd][1];
100
817k
        const uint32_t      groupWidth    = 1 << log2CGWidth;
101
817k
        const uint32_t      groupHeight   = 1 << log2CGHeight;
102
817k
        const uint32_t      groupSize     = groupWidth * groupHeight;
103
817k
        const SizeType      blkWidthIdx   = Log2( blockWidth );
104
817k
        const SizeType      blkHeightIdx  = Log2( blockHeight );
105
817k
        const ScanElement * scanId2RP     = getScanOrder( SCAN_GROUPED_4x4, blkWidthIdx, blkHeightIdx );
106
817k
        NbInfoSbb*&         sId2NbSbb     = m_scanId2NbInfoSbbArray[hd][vd];
107
817k
        NbInfoOut*&         sId2NbOut     = m_scanId2NbInfoOutArray[hd][vd];
108
        // consider only non-zero-out region
109
817k
        const uint32_t      blkWidthNZOut = std::min<unsigned>( JVET_C0024_ZERO_OUT_TH, blockWidth  );
110
817k
        const uint32_t      blkHeightNZOut= std::min<unsigned>( JVET_C0024_ZERO_OUT_TH, blockHeight );
111
817k
        const uint32_t      totalValues   = blkWidthNZOut * blkHeightNZOut;
112
113
817k
        sId2NbSbb = new NbInfoSbb[ totalValues ];
114
817k
        sId2NbOut = new NbInfoOut[ totalValues ];
115
116
161M
        for( uint32_t scanId = 0; scanId < totalValues; scanId++ )
117
160M
        {
118
160M
          raster2id[scanId2RP[scanId].idx] = scanId;
119
160M
          sId2NbSbb[scanId].numInv = 0;
120
160M
        }
121
122
161M
        for( unsigned scanId = 0; scanId < totalValues; scanId++ )
123
160M
        {
124
160M
          const int posX = scanId2RP[scanId].x;
125
160M
          const int posY = scanId2RP[scanId].y;
126
160M
          const int rpos = scanId2RP[scanId].idx;
127
160M
          {
128
            //===== inside subband neighbours =====
129
160M
            const int      begSbb = scanId - ( scanId & (groupSize-1) ); // first pos in current subblock
130
160M
            int            cpos[5];
131
132
160M
            cpos[0] = ( posX + 1 < blkWidthNZOut                              ? ( raster2id[rpos+1           ] < groupSize + begSbb ? raster2id[rpos+1           ] - begSbb : 0 ) : 0 );
133
160M
            cpos[1] = ( posX + 2 < blkWidthNZOut                              ? ( raster2id[rpos+2           ] < groupSize + begSbb ? raster2id[rpos+2           ] - begSbb : 0 ) : 0 );
134
160M
            cpos[2] = ( posX + 1 < blkWidthNZOut && posY + 1 < blkHeightNZOut ? ( raster2id[rpos+1+blockWidth] < groupSize + begSbb ? raster2id[rpos+1+blockWidth] - begSbb : 0 ) : 0 );
135
160M
            cpos[3] = ( posY + 1 < blkHeightNZOut                             ? ( raster2id[rpos+  blockWidth] < groupSize + begSbb ? raster2id[rpos+  blockWidth] - begSbb : 0 ) : 0 );
136
160M
            cpos[4] = ( posY + 2 < blkHeightNZOut                             ? ( raster2id[rpos+2*blockWidth] < groupSize + begSbb ? raster2id[rpos+2*blockWidth] - begSbb : 0 ) : 0 );
137
138
160M
            int num = 0;
139
160M
            int inPos[5] = { 0, };
140
141
643M
            while( true )
142
643M
            {
143
643M
              int nk = -1;
144
3.86G
              for( int k = 0; k < 5; k++ )
145
3.21G
              {
146
3.21G
                if( cpos[k] != 0 && ( nk < 0 || cpos[k] < cpos[nk] ) )
147
723M
                {
148
723M
                  nk = k;
149
723M
                }
150
3.21G
              }
151
643M
              if( nk < 0 )
152
160M
              {
153
160M
                break;
154
160M
              }
155
483M
              inPos[ num++ ] = uint8_t( cpos[nk] );
156
483M
              cpos[nk] = 0;
157
483M
            }
158
478M
            for( int k = num; k < 5; k++ )
159
318M
            {
160
318M
              inPos[k] = 0;
161
318M
            }
162
643M
            for( int k = 0; k < num; k++ )
163
483M
            {
164
483M
              CHECK( sId2NbSbb[begSbb + inPos[k]].numInv >= 5, "" );
165
483M
              sId2NbSbb[begSbb + inPos[k]].invInPos[sId2NbSbb[begSbb + inPos[k]].numInv++] = scanId & ( groupSize - 1 );
166
483M
            }
167
160M
          }
168
160M
          {
169
            //===== outside subband neighbours =====
170
160M
            NbInfoOut&     nbOut  = sId2NbOut[ scanId ];
171
160M
            const int      begSbb = scanId - ( scanId & (groupSize-1) ); // first pos in current subblock
172
160M
            int            cpos[5];
173
174
160M
            cpos[0] = ( posX + 1 < blkWidthNZOut                              ? ( raster2id[rpos+1           ] >= groupSize + begSbb ? raster2id[rpos+1           ] : 0 ) : 0 );
175
160M
            cpos[1] = ( posX + 2 < blkWidthNZOut                              ? ( raster2id[rpos+2           ] >= groupSize + begSbb ? raster2id[rpos+2           ] : 0 ) : 0 );
176
160M
            cpos[2] = ( posX + 1 < blkWidthNZOut && posY + 1 < blkHeightNZOut ? ( raster2id[rpos+1+blockWidth] >= groupSize + begSbb ? raster2id[rpos+1+blockWidth] : 0 ) : 0 );
177
160M
            cpos[3] = ( posY + 1 < blkHeightNZOut                             ? ( raster2id[rpos+  blockWidth] >= groupSize + begSbb ? raster2id[rpos+  blockWidth] : 0 ) : 0 );
178
160M
            cpos[4] = ( posY + 2 < blkHeightNZOut                             ? ( raster2id[rpos+2*blockWidth] >= groupSize + begSbb ? raster2id[rpos+2*blockWidth] : 0 ) : 0 );
179
180
389M
            for( nbOut.num = 0; true; )
181
389M
            {
182
389M
              int nk = -1;
183
2.33G
              for( int k = 0; k < 5; k++ )
184
1.94G
              {
185
1.94G
                if( cpos[k] != 0 && ( nk < 0 || cpos[k] < cpos[nk] ) )
186
335M
                {
187
335M
                  nk = k;
188
335M
                }
189
1.94G
              }
190
389M
              if( nk < 0 )
191
160M
              {
192
160M
                break;
193
160M
              }
194
228M
              nbOut.outPos[ nbOut.num++ ] = uint16_t( cpos[nk] );
195
228M
              cpos[nk] = 0;
196
228M
            }
197
733M
            for( int k = nbOut.num; k < 5; k++ )
198
572M
            {
199
572M
              nbOut.outPos[k] = 0;
200
572M
            }
201
160M
            nbOut.maxDist = ( scanId == 0 ? 0 : sId2NbOut[scanId-1].maxDist );
202
389M
            for( int k = 0; k < nbOut.num; k++ )
203
228M
            {
204
228M
              if( nbOut.outPos[k] > nbOut.maxDist )
205
24.7M
              {
206
24.7M
                nbOut.maxDist = nbOut.outPos[k];
207
24.7M
              }
208
228M
            }
209
160M
          }
210
160M
        }
211
212
        // make it relative
213
161M
        for( unsigned scanId = 0; scanId < totalValues; scanId++ )
214
160M
        {
215
160M
          NbInfoOut& nbOut  = sId2NbOut[scanId];
216
160M
          const int  begSbb = scanId - ( scanId & (groupSize-1) ); // first pos in current subblock
217
389M
          for( int k = 0; k < nbOut.num; k++ )
218
228M
          {
219
228M
            CHECK(begSbb > nbOut.outPos[k], "Position must be past sub block begin");
220
228M
            nbOut.outPos[k] -= begSbb;
221
228M
          }
222
160M
          nbOut.maxDist -= scanId;
223
160M
        }
224
225
2.45M
        for( int chId = 0; chId < MAX_NUM_CH; chId++ )
226
1.63M
        {
227
1.63M
          m_tuParameters[hd][vd][chId] = new TUParameters( *this, blockWidth, blockHeight, ChannelType(chId) );
228
1.63M
        }
229
817k
      }
230
124k
    }
231
17.7k
    m_scansInitialized = true;
232
17.7k
  }
233
234
  void Rom::xUninitScanArrays()
235
17.7k
  {
236
17.7k
    if( !m_scansInitialized )
237
0
    {
238
0
      return;
239
0
    }
240
142k
    for( int hd = 0; hd < MAX_TU_SIZE_IDX; hd++ )
241
124k
    {
242
995k
      for( int vd = 0; vd < MAX_TU_SIZE_IDX; vd++ )
243
871k
      {
244
871k
        NbInfoSbb*& sId2NbSbb = m_scanId2NbInfoSbbArray[hd][vd];
245
871k
        NbInfoOut*& sId2NbOut = m_scanId2NbInfoOutArray[hd][vd];
246
871k
        if( sId2NbSbb )
247
817k
        {
248
817k
          delete [] sId2NbSbb;
249
817k
        }
250
871k
        if( sId2NbOut )
251
817k
        {
252
817k
          delete [] sId2NbOut;
253
817k
        }
254
2.61M
        for( int chId = 0; chId < MAX_NUM_CH; chId++ )
255
1.74M
        {
256
1.74M
          TUParameters*& tuPars = m_tuParameters[hd][vd][chId];
257
1.74M
          if( tuPars )
258
1.63M
          {
259
1.63M
            delete tuPars;
260
1.63M
          }
261
1.74M
        }
262
871k
      }
263
124k
    }
264
17.7k
    m_scansInitialized = false;
265
17.7k
  }
266
267
268
  TUParameters::TUParameters( const Rom& rom, const unsigned width, const unsigned height, const ChannelType chType )
269
1.63M
  {
270
1.63M
    m_chType              = chType;
271
1.63M
    m_width               = width;
272
1.63M
    m_height              = height;
273
1.63M
    const uint32_t nonzeroWidth  = std::min<uint32_t>(JVET_C0024_ZERO_OUT_TH, m_width);
274
1.63M
    const uint32_t nonzeroHeight = std::min<uint32_t>(JVET_C0024_ZERO_OUT_TH, m_height);
275
1.63M
    m_numCoeff                   = nonzeroWidth * nonzeroHeight;
276
1.63M
    m_log2SbbWidth        = g_log2SbbSize[ Log2(m_width) ][ Log2(m_height) ][0];
277
1.63M
    m_log2SbbHeight       = g_log2SbbSize[ Log2(m_width) ][ Log2(m_height) ][1];
278
1.63M
    m_log2SbbSize         = m_log2SbbWidth + m_log2SbbHeight;
279
1.63M
    m_sbbSize             = ( 1 << m_log2SbbSize );
280
1.63M
    m_sbbMask             = m_sbbSize - 1;
281
1.63M
    m_widthInSbb  = nonzeroWidth >> m_log2SbbWidth;
282
1.63M
    m_heightInSbb = nonzeroHeight >> m_log2SbbHeight;
283
1.63M
    m_numSbb              = m_widthInSbb * m_heightInSbb;
284
1.63M
    SizeType        hsbb  = Log2( m_widthInSbb  );
285
1.63M
    SizeType        vsbb  = Log2( m_heightInSbb );
286
1.63M
    SizeType        hsId  = Log2( m_width  );
287
1.63M
    SizeType        vsId  = Log2( m_height );
288
1.63M
    m_scanSbbId2SbbPos    = getScanOrder( SCAN_UNGROUPED   , hsbb , vsbb );
289
1.63M
    m_scanId2BlkPos       = getScanOrder( SCAN_GROUPED_4x4 , hsId , vsId );
290
1.63M
    int log2W             = Log2( m_width  );
291
1.63M
    int log2H             = Log2( m_height );
292
1.63M
    m_scanId2NbInfoSbb    = rom.getNbInfoSbb( log2W, log2H );
293
1.63M
    m_scanId2NbInfoOut    = rom.getNbInfoOut( log2W, log2H );
294
1.63M
    m_scanInfo            = new ScanInfo[ m_numCoeff ];
295
322M
    for( int scanIdx = 0; scanIdx < m_numCoeff; scanIdx++ )
296
320M
    {
297
320M
      xSetScanInfo( m_scanInfo[scanIdx], scanIdx );
298
320M
    }
299
1.63M
  }
300
301
302
  void TUParameters::xSetScanInfo( ScanInfo& scanInfo, int scanIdx )
303
320M
  {
304
320M
    scanInfo.sbbSize    = m_sbbSize;
305
320M
    scanInfo.numSbb     = m_numSbb;
306
320M
    scanInfo.scanIdx    = scanIdx;
307
320M
    scanInfo.rasterPos  = m_scanId2BlkPos[scanIdx].idx;
308
320M
    scanInfo.sbbPos     = m_scanSbbId2SbbPos[scanIdx >> m_log2SbbSize].idx;
309
320M
    scanInfo.insidePos  = scanIdx & m_sbbMask;
310
320M
    scanInfo.spt        = SCAN_ISCSBB;
311
320M
    if(  scanInfo.insidePos == m_sbbMask && scanIdx > scanInfo.sbbSize && scanIdx < m_numCoeff - 1 )
312
17.3M
      scanInfo.spt      = SCAN_SOCSBB;
313
303M
    else if( scanInfo.insidePos == 0 && scanIdx > 0 && scanIdx < m_numCoeff - m_sbbSize )
314
17.3M
      scanInfo.spt      = SCAN_EOCSBB;
315
320M
    scanInfo.posX = m_scanId2BlkPos[scanIdx].x;
316
320M
    scanInfo.posY = m_scanId2BlkPos[scanIdx].y;
317
320M
    if( scanIdx )
318
319M
    {
319
319M
      const int nextScanIdx = scanIdx - 1;
320
319M
      const int diag        = m_scanId2BlkPos[nextScanIdx].x + m_scanId2BlkPos[nextScanIdx].y;
321
319M
      if( m_chType == CH_L )
322
159M
      {
323
159M
        scanInfo.sigCtxOffsetNext = ( diag < 2 ? 8 : diag < 5 ?  4 : 0 );
324
159M
        scanInfo.gtxCtxOffsetNext = ( diag < 1 ? 16 : diag < 3 ? 11 : diag < 10 ? 6 : 1 );
325
159M
      }
326
159M
      else
327
159M
      {
328
159M
        scanInfo.sigCtxOffsetNext = ( diag < 2 ? 4 : 0 );
329
159M
        scanInfo.gtxCtxOffsetNext = ( diag < 1 ? 6 : 1 );
330
159M
      }
331
319M
      scanInfo.nextInsidePos      = nextScanIdx & m_sbbMask;
332
319M
      scanInfo.currNbInfoSbb      = m_scanId2NbInfoSbb[ scanIdx ];
333
319M
      if( scanInfo.insidePos == 0 )
334
18.6M
      {
335
18.6M
        const int nextSbbPos  = m_scanSbbId2SbbPos[nextScanIdx >> m_log2SbbSize].idx;
336
18.6M
        const int nextSbbPosY = nextSbbPos               / m_widthInSbb;
337
18.6M
        const int nextSbbPosX = nextSbbPos - nextSbbPosY * m_widthInSbb;
338
18.6M
        scanInfo.nextSbbRight = ( nextSbbPosX < m_widthInSbb  - 1 ? nextSbbPos + 1            : 0 );
339
18.6M
        scanInfo.nextSbbBelow = ( nextSbbPosY < m_heightInSbb - 1 ? nextSbbPos + m_widthInSbb : 0 );
340
18.6M
      }
341
319M
    }
342
320M
  }
343
344
  void RateEstimator::initCtx( const TUParameters& tuPars, const TransformUnit& tu, const ComponentID compID, const FracBitsAccess& fracBitsAccess )
345
718k
  {
346
718k
    m_scanId2Pos = tuPars.m_scanId2BlkPos;
347
718k
    xSetSigSbbFracBits  ( fracBitsAccess, tuPars.m_chType );
348
718k
    xSetSigFlagBits     ( fracBitsAccess, tuPars.m_chType );
349
718k
    xSetGtxFlagBits     ( fracBitsAccess, tuPars.m_chType );
350
718k
    xSetLastCoeffOffset ( fracBitsAccess, tuPars, tu, compID );
351
718k
  }
352
353
  void RateEstimator::xSetLastCoeffOffset( const FracBitsAccess& fracBitsAccess, const TUParameters& tuPars, const TransformUnit& tu, const ComponentID compID )
354
718k
  {
355
718k
    const ChannelType chType = ( compID == COMP_Y ? CH_L : CH_C );
356
718k
    int32_t cbfDeltaBits = 0;
357
718k
    if( compID == COMP_Y && !CU::isIntra(*tu.cu) && !tu.depth )
358
257
    {
359
257
      const BinFracBits bits  = fracBitsAccess.getFracBitsArray( Ctx::QtRootCbf() );
360
257
      cbfDeltaBits            = int32_t( bits.intBits[1] ) - int32_t( bits.intBits[0] );
361
257
    }
362
718k
    else
363
718k
    {
364
718k
      BinFracBits bits;
365
718k
      bool prevLumaCbf           = false;
366
718k
      bool lastCbfIsInferred     = false;
367
718k
      bool useIntraSubPartitions = tu.cu->ispMode && isLuma(chType);
368
718k
      if( useIntraSubPartitions )
369
10.5k
      {
370
10.5k
        bool rootCbfSoFar = false;
371
10.5k
        bool isLastSubPartition = CU::isISPLast(*tu.cu, tu.Y(), compID);
372
10.5k
        uint32_t nTus = tu.cu->ispMode == HOR_INTRA_SUBPARTITIONS ? tu.cu->lheight() >> Log2(tu.lheight()) : tu.cu->lwidth() >> Log2(tu.lwidth());
373
10.5k
        if( isLastSubPartition )
374
186
        {
375
186
          TransformUnit* tuPointer = tu.cu->firstTU;
376
744
          for( int tuIdx = 0; tuIdx < nTus - 1; tuIdx++ )
377
558
          {
378
558
            rootCbfSoFar |= TU::getCbfAtDepth(*tuPointer, COMP_Y, tu.depth);
379
558
            tuPointer     = tuPointer->next;
380
558
          }
381
186
          if( !rootCbfSoFar )
382
0
          {
383
0
            lastCbfIsInferred = true;
384
0
          }
385
186
        }
386
10.5k
        if( !lastCbfIsInferred )
387
10.5k
        {
388
10.5k
          prevLumaCbf = TU::getPrevTuCbfAtDepth(tu, compID, tu.depth);
389
10.5k
        }
390
10.5k
        bits = fracBitsAccess.getFracBitsArray(Ctx::QtCbf[compID](DeriveCtx::CtxQtCbf(compID, prevLumaCbf, true)));
391
10.5k
      }
392
707k
      else
393
707k
      {
394
707k
        bits = fracBitsAccess.getFracBitsArray(Ctx::QtCbf[compID](DeriveCtx::CtxQtCbf(compID, tu.cbf[COMP_Cb])));
395
707k
      }
396
718k
      cbfDeltaBits = lastCbfIsInferred ? 0 : int32_t(bits.intBits[1]) - int32_t(bits.intBits[0]);
397
718k
    }
398
399
718k
    static const unsigned prefixCtx[] = { 0, 0, 0, 3, 6, 10, 15, 21 };
400
718k
    uint32_t              ctxBits  [ LAST_SIGNIFICANT_GROUPS ];
401
2.15M
    for( unsigned xy = 0; xy < 2; xy++ )
402
1.43M
    {
403
1.43M
      int32_t             bitOffset   = ( xy ? cbfDeltaBits : 0 );
404
1.43M
      int32_t*            lastBits    = ( xy ? m_lastBitsY : m_lastBitsX );
405
1.43M
      const unsigned      size        = ( xy ? tuPars.m_height : tuPars.m_width );
406
1.43M
      const unsigned      log2Size    = Log2( size );
407
1.43M
      const bool          useYCtx     = ( xy != 0 );
408
1.43M
      const CtxSet&       ctxSetLast  = ( useYCtx ? Ctx::LastY : Ctx::LastX )[ chType ];
409
1.43M
      const unsigned      lastShift   = ( compID == COMP_Y ? (log2Size+1)>>2 : Clip3<unsigned>(0,2,size>>3) );
410
1.43M
      const unsigned      lastOffset  = ( compID == COMP_Y ? ( prefixCtx[log2Size] ) : 0 );
411
1.43M
      uint32_t            sumFBits    = 0;
412
1.43M
      unsigned            maxCtxId    = g_uiGroupIdx[std::min<unsigned>(JVET_C0024_ZERO_OUT_TH, size) - 1];
413
9.62M
      for( unsigned ctxId = 0; ctxId < maxCtxId; ctxId++ )
414
8.19M
      {
415
8.19M
        const BinFracBits bits  = fracBitsAccess.getFracBitsArray( ctxSetLast( lastOffset + ( ctxId >> lastShift ) ) );
416
8.19M
        ctxBits[ ctxId ]        = sumFBits + bits.intBits[0] + ( ctxId>3 ? ((ctxId-2)>>1)<<SCALE_BITS : 0 ) + bitOffset;
417
8.19M
        sumFBits               +=            bits.intBits[1];
418
8.19M
      }
419
1.43M
      ctxBits  [ maxCtxId ]     = sumFBits + ( maxCtxId>3 ? ((maxCtxId-2)>>1)<<SCALE_BITS : 0 ) + bitOffset;
420
20.0M
      for (unsigned pos = 0; pos < std::min<unsigned>(JVET_C0024_ZERO_OUT_TH, size); pos++)
421
18.6M
      {
422
18.6M
        lastBits[ pos ]         = ctxBits[ g_uiGroupIdx[ pos ] ];
423
18.6M
      }
424
1.43M
    }
425
718k
  }
426
427
  void RateEstimator::xSetSigSbbFracBits( const FracBitsAccess& fracBitsAccess, ChannelType chType )
428
718k
  {
429
718k
    const CtxSet& ctxSet = Ctx::SigCoeffGroup[ chType ];
430
2.15M
    for( unsigned ctxId = 0; ctxId < sm_maxNumSigSbbCtx; ctxId++ )
431
1.43M
    {
432
1.43M
      m_sigSbbFracBits[ ctxId ] = fracBitsAccess.getFracBitsArray( ctxSet( ctxId ) );
433
1.43M
    }
434
718k
  }
435
436
  void RateEstimator::xSetSigFlagBits( const FracBitsAccess& fracBitsAccess, ChannelType chType )
437
718k
  {
438
2.87M
    for( unsigned ctxSetId = 0; ctxSetId < sm_numCtxSetsSig; ctxSetId++ )
439
2.15M
    {
440
2.15M
      BinFracBits*    bits    = m_sigFracBits [ ctxSetId ];
441
2.15M
      const CtxSet&   ctxSet  = Ctx::SigFlag  [ chType + 2*ctxSetId ];
442
2.15M
      const unsigned  numCtx  = ( chType == CH_L ? 12 : 8 );
443
19.7M
      for( unsigned ctxId = 0; ctxId < numCtx; ctxId++ )
444
17.5M
      {
445
17.5M
        bits[ ctxId ] = fracBitsAccess.getFracBitsArray( ctxSet( ctxId ) );
446
17.5M
      }
447
2.15M
    }
448
718k
  }
449
450
  void RateEstimator::xSetGtxFlagBits( const FracBitsAccess& fracBitsAccess, ChannelType chType )
451
718k
  {
452
718k
    const CtxSet&   ctxSetPar   = Ctx::ParFlag [     chType ];
453
718k
    const CtxSet&   ctxSetGt1   = Ctx::GtxFlag [ 2 + chType ];
454
718k
    const CtxSet&   ctxSetGt2   = Ctx::GtxFlag [     chType ];
455
718k
    const unsigned  numCtx      = ( chType == CH_L ? 21 : 11 );
456
8.90M
    for( unsigned ctxId = 0; ctxId < numCtx; ctxId++ )
457
8.18M
    {
458
8.18M
      BinFracBits     fbPar = fracBitsAccess.getFracBitsArray( ctxSetPar( ctxId ) );
459
8.18M
      BinFracBits     fbGt1 = fracBitsAccess.getFracBitsArray( ctxSetGt1( ctxId ) );
460
8.18M
      BinFracBits     fbGt2 = fracBitsAccess.getFracBitsArray( ctxSetGt2( ctxId ) );
461
8.18M
      CoeffFracBits&  cb    = m_gtxFracBits[ ctxId ];
462
8.18M
      int32_t         par0  = (1<<SCALE_BITS) + int32_t(fbPar.intBits[0]);
463
8.18M
      int32_t         par1  = (1<<SCALE_BITS) + int32_t(fbPar.intBits[1]);
464
8.18M
      cb.bits[0] = 0;
465
8.18M
      cb.bits[1] = fbGt1.intBits[0] + (1 << SCALE_BITS);
466
8.18M
      cb.bits[2] = fbGt1.intBits[1] + par0 + fbGt2.intBits[0];
467
8.18M
      cb.bits[3] = fbGt1.intBits[1] + par1 + fbGt2.intBits[0];
468
8.18M
      cb.bits[4] = fbGt1.intBits[1] + par0 + fbGt2.intBits[1];
469
8.18M
      cb.bits[5] = fbGt1.intBits[1] + par1 + fbGt2.intBits[1];
470
8.18M
    }
471
718k
  }
472
473
  void CommonCtx::update( const ScanInfo& scanInfo, const int prevId, int stateId, StateMem& curr )
474
113k
  {
475
113k
    uint8_t*    sbbFlags  = m_currSbbCtx[stateId].sbbFlags;
476
113k
    uint8_t*    levels    = m_currSbbCtx[stateId].levels;
477
113k
    uint16_t    maxDist   = m_nbInfo[scanInfo.scanIdx - 1].maxDist;
478
113k
    uint16_t    sbbSize   = scanInfo.sbbSize;
479
113k
    std::size_t setCpSize = ( maxDist > sbbSize ? maxDist - sbbSize : 0 ) * sizeof( uint8_t );
480
113k
    if( prevId >= 0 )
481
93.6k
    {
482
93.6k
      ::memcpy( sbbFlags, m_prevSbbCtx[prevId].sbbFlags, scanInfo.numSbb * sizeof( uint8_t ) );
483
93.6k
      ::memcpy( levels + scanInfo.scanIdx + sbbSize, m_prevSbbCtx[prevId].levels + scanInfo.scanIdx + sbbSize, setCpSize );
484
93.6k
    }
485
19.5k
    else
486
19.5k
    {
487
19.5k
      ::memset( sbbFlags, 0, scanInfo.numSbb * sizeof( uint8_t ) );
488
19.5k
      ::memset( levels + scanInfo.scanIdx + sbbSize, 0, setCpSize );
489
19.5k
    }
490
113k
    sbbFlags[scanInfo.sbbPos] = !!curr.numSig[stateId];
491
492
113k
    const int       sigNSbb = ( ( scanInfo.nextSbbRight ? sbbFlags[scanInfo.nextSbbRight] : false ) || ( scanInfo.nextSbbBelow ? sbbFlags[scanInfo.nextSbbBelow] : false ) ? 1 : 0 );
493
113k
    curr.refSbbCtxId[stateId] = stateId;
494
113k
    const BinFracBits sbbBits = m_sbbFlagBits[sigNSbb];
495
496
113k
    curr.sbbBits0[stateId] = sbbBits.intBits[0];
497
113k
    curr.sbbBits1[stateId] = sbbBits.intBits[1];
498
499
113k
    if( sigNSbb || ( ( scanInfo.nextSbbRight && scanInfo.nextSbbBelow ) ? sbbFlags[scanInfo.nextSbbBelow + 1] : false ) )
500
70.6k
    {
501
70.6k
      const int         scanBeg = scanInfo.scanIdx - scanInfo.sbbSize;
502
70.6k
      const NbInfoOut* nbOut = m_nbInfo + scanBeg;
503
70.6k
      const uint8_t* absLevels = levels + scanBeg;
504
505
1.20M
      for( int id = 0; id < scanInfo.sbbSize; id++, nbOut++ )
506
1.13M
      {
507
1.13M
        if( nbOut->num )
508
759k
        {
509
759k
          TCoeff sumAbs = 0, sumAbs1 = 0, sumNum = 0;
510
1.83M
#define UPDATE(k) {TCoeff t=absLevels[nbOut->outPos[k]]; sumAbs+=t; sumAbs1+=std::min<TCoeff>(4+(t&1),t); sumNum+=!!t; }
511
759k
          switch( nbOut->num )
512
759k
          {
513
0
          default:
514
48.8k
          case 5:
515
48.8k
            UPDATE( 4 );
516
146k
          case 4:
517
146k
            UPDATE( 3 );
518
406k
          case 3:
519
406k
            UPDATE( 2 );
520
477k
          case 2:
521
477k
            UPDATE( 1 );
522
759k
          case 1:
523
759k
            UPDATE( 0 );
524
759k
          }
525
759k
#undef UPDATE
526
759k
          curr.tplAcc[id][stateId] = ( sumNum << 5 ) | sumAbs1;
527
759k
          curr.sum1st[id][stateId] = ( uint8_t ) std::min( 255, sumAbs );
528
759k
        }
529
1.13M
      }
530
70.6k
    }
531
113k
  }
532
533
  void Quantizer::initQuantBlock(const TransformUnit& tu, const ComponentID compID, const QpParam& cQP, const double lambda, int gValue)
534
1.72M
  {
535
1.72M
    CHECKD( lambda <= 0.0, "Lambda must be greater than 0" );
536
537
1.72M
    const int         qpDQ                  = cQP.Qp(tu.mtsIdx[compID]==MTS_SKIP) + 1;
538
1.72M
    const int         qpPer                 = qpDQ / 6;
539
1.72M
    const int         qpRem                 = qpDQ - 6 * qpPer;
540
1.72M
    const SPS&        sps                   = *tu.cs->sps;
541
1.72M
    const CompArea&   area                  = tu.blocks[ compID ];
542
1.72M
    const ChannelType chType                = toChannelType( compID );
543
1.72M
    const int         channelBitDepth       = sps.bitDepths[ chType ];
544
1.72M
    const int         maxLog2TrDynamicRange = sps.getMaxLog2TrDynamicRange();
545
1.72M
    const int         nomTransformShift     = getTransformShift( channelBitDepth, area.size(), maxLog2TrDynamicRange );
546
1.72M
    const bool    needsSqrt2ScaleAdjustment = TU::needsSqrt2Scale(tu, compID);
547
1.72M
    const int         transformShift        = nomTransformShift + (needsSqrt2ScaleAdjustment?-1:0);
548
    // quant parameters
549
1.72M
    m_QShift                    = QUANT_SHIFT  - 1 + qpPer + transformShift;
550
1.72M
    m_QAdd                      = -( ( 3 << m_QShift ) >> 1 );
551
1.72M
    Intermediate_Int  invShift  = IQUANT_SHIFT + 1 - qpPer - transformShift;
552
1.72M
    m_QScale                    = g_quantScales[needsSqrt2ScaleAdjustment?1:0][ qpRem ];
553
1.72M
    const unsigned    qIdxBD    = std::min<unsigned>( maxLog2TrDynamicRange + 1, 8*sizeof(Intermediate_Int) + invShift - IQUANT_SHIFT - 1 );
554
1.72M
    m_maxQIdx                   = ( 1 << (qIdxBD-1) ) - 4;
555
1.72M
    if( m_QShift )
556
1.72M
      m_thresLast               = TCoeff((int64_t(m_DqThrVal) << (m_QShift-1)));
557
33
    else
558
33
      m_thresLast               = TCoeff((int64_t(m_DqThrVal>>1) << m_QShift));
559
1.72M
    m_thresSSbb                 = TCoeff((int64_t(3) << m_QShift));
560
    // distortion calculation parameters
561
1.72M
    const int64_t qScale        = (gValue==-1) ? m_QScale : gValue;
562
1.72M
    const int nomDShift =
563
1.72M
      SCALE_BITS - 2 * (nomTransformShift + DISTORTION_PRECISION_ADJUSTMENT(channelBitDepth)) + m_QShift + (needsSqrt2ScaleAdjustment ? 1 : 0);
564
1.72M
    const double  qScale2       = double( qScale * qScale );
565
1.72M
    const double  nomDistFactor = ( nomDShift < 0 ? 1.0/(double(int64_t(1)<<(-nomDShift))*qScale2*lambda) : double(int64_t(1)<<nomDShift)/(qScale2*lambda) );
566
1.72M
    const uint32_t pow2dfShift   = (uint32_t)( nomDistFactor * qScale2 ) + 1;
567
1.72M
    const int     dfShift       = ceilLog2( pow2dfShift );
568
1.72M
    m_DistShift                 = 62 + m_QShift - 2*maxLog2TrDynamicRange - dfShift;
569
1.72M
    m_DistAdd                   = (int64_t(1) << m_DistShift) >> 1;
570
1.72M
    m_DistStepAdd               = ((m_DistShift+m_QShift)>=64 ? (int64_t)( nomDistFactor * pow(2,m_DistShift+m_QShift) + .5 ) : (int64_t)( nomDistFactor * double(int64_t(1)<<(m_DistShift+m_QShift)) + .5 ));
571
1.72M
    m_DistOrgFact               = (int64_t)( nomDistFactor * double(int64_t(1)<<(m_DistShift+1       )) + .5 );
572
1.72M
  }
573
574
  void Quantizer::dequantBlock( const TransformUnit& tu, const ComponentID compID, const QpParam& cQP, CoeffBuf& recCoeff, bool enableScalingLists, int* piDequantCoef) const
575
708k
  {
576
577
    //----- set basic parameters -----
578
708k
    const CompArea&     area      = tu.blocks[ compID ];
579
708k
    const int           numCoeff  = area.area();
580
708k
    const SizeType      hsId      = Log2( area.width );
581
708k
    const SizeType      vsId      = Log2( area.height );
582
708k
    const ScanElement  *scan      = getScanOrder( SCAN_GROUPED_4x4, hsId, vsId );
583
708k
    const TCoeffSig*    qCoeff    = tu.getCoeffs( compID ).buf;
584
708k
          TCoeff*       tCoeff    = recCoeff.buf;
585
586
    //----- reset coefficients and get last scan index -----
587
708k
    ::memset( tCoeff, 0, numCoeff * sizeof( TCoeff ) );
588
708k
    int lastScanIdx = tu.lastPos[compID];
589
708k
    if( lastScanIdx < 0 )
590
0
    {
591
0
      return;
592
0
    }
593
594
    //----- set dequant parameters -----
595
708k
    const int         qpDQ                  = cQP.Qp(tu.mtsIdx[compID]==MTS_SKIP) + 1;
596
708k
    const int         qpPer                 = qpDQ / 6;
597
708k
    const int         qpRem                 = qpDQ - 6 * qpPer;
598
708k
    const SPS&        sps                   = *tu.cs->sps;
599
708k
    const ChannelType chType                = toChannelType( compID );
600
708k
    const int         channelBitDepth       = sps.bitDepths[ chType ];
601
708k
    const int         maxLog2TrDynamicRange = sps.getMaxLog2TrDynamicRange();
602
708k
    const TCoeff      minTCoeff             = -( 1 << maxLog2TrDynamicRange );
603
708k
    const TCoeff      maxTCoeff             =  ( 1 << maxLog2TrDynamicRange ) - 1;
604
708k
    const int         nomTransformShift     = getTransformShift( channelBitDepth, area.size(), maxLog2TrDynamicRange );
605
708k
    const bool    needsSqrt2ScaleAdjustment = TU::needsSqrt2Scale(tu, compID);
606
708k
    const int         transformShift        = nomTransformShift + (needsSqrt2ScaleAdjustment?-1:0);
607
708k
    Intermediate_Int  shift                 = IQUANT_SHIFT + 1 - qpPer - transformShift + (enableScalingLists ? LOG2_SCALING_LIST_NEUTRAL_VALUE : 0);
608
708k
    Intermediate_Int  invQScale             = g_invQuantScales[needsSqrt2ScaleAdjustment?1:0][ qpRem ];
609
708k
    Intermediate_Int  add                   = (shift < 0) ? 0 : ((1 << shift) >> 1);
610
    //----- dequant coefficients -----
611
7.52M
    for( int state = 0, scanIdx = lastScanIdx; scanIdx >= 0; scanIdx-- )
612
6.81M
    {
613
6.81M
      const unsigned   rasterPos = scan[scanIdx].idx;
614
6.81M
      const TCoeffSig& level     = qCoeff[ rasterPos ];
615
6.81M
      if( level )
616
6.13M
      {
617
6.13M
        if (enableScalingLists)
618
0
          invQScale = piDequantCoef[rasterPos];//scalingfactor*levelScale
619
6.13M
        if (shift < 0 && (enableScalingLists || scanIdx == lastScanIdx))
620
379k
        {
621
379k
          invQScale <<= -shift;
622
379k
        }
623
6.13M
        Intermediate_Int qIdx = 2 * level + (level > 0 ? -(state>>1) : (state>>1));
624
6.13M
        int64_t  nomTCoeff          = ((int64_t)qIdx * (int64_t)invQScale + add) >> ((shift < 0) ? 0 : shift);
625
6.13M
        tCoeff[rasterPos]           = (TCoeff)Clip3<int64_t>(minTCoeff, maxTCoeff, nomTCoeff);
626
6.13M
      }
627
6.81M
      state = ( 32040 >> ((state<<2)+((level&1)<<1)) ) & 3;   // the 16-bit value "32040" represent the state transition table
628
6.81M
    }
629
708k
  }
630
631
  bool Quantizer::preQuantCoeff( const TCoeff absCoeff, PQData* pqData, int quanCoeff ) const
632
0
  {
633
0
    int64_t scaledOrg = int64_t( absCoeff ) * quanCoeff;
634
0
    TCoeff  qIdx      = TCoeff( ( scaledOrg + m_QAdd ) >> m_QShift );
635
636
0
    if( qIdx < 0 )
637
0
    {
638
0
      int64_t scaledAdd = m_DistStepAdd - scaledOrg * m_DistOrgFact;
639
0
      PQData& pq_a      = pqData[1];
640
0
      PQData& pq_b      = pqData[2];
641
642
0
      pq_a.deltaDist    = ( ( scaledAdd + 0 * m_DistStepAdd ) * 1 + m_DistAdd ) >> m_DistShift;
643
0
      pq_a.absLevel     = 1;
644
645
0
      pq_b.deltaDist    = ( ( scaledAdd + 1 * m_DistStepAdd ) * 2 + m_DistAdd ) >> m_DistShift;
646
0
      pq_b.absLevel     = 1;
647
      
648
0
      return true;
649
0
    }
650
     
651
0
    qIdx              = std::max<TCoeff>( 1, std::min<TCoeff>( m_maxQIdx, qIdx ) );
652
0
    int64_t scaledAdd = qIdx * m_DistStepAdd - scaledOrg * m_DistOrgFact;
653
654
0
    PQData& pq_a      = pqData[( qIdx + 0 ) & 3];
655
0
    PQData& pq_b      = pqData[( qIdx + 1 ) & 3];
656
0
    PQData& pq_c      = pqData[( qIdx + 2 ) & 3];
657
0
    PQData& pq_d      = pqData[( qIdx + 3 ) & 3];
658
659
0
    pq_a.deltaDist    = ( ( scaledAdd + 0 * m_DistStepAdd ) * ( qIdx + 0 ) + m_DistAdd ) >> m_DistShift;
660
0
    pq_a.absLevel     = ( qIdx + 1 ) >> 1;
661
662
0
    pq_b.deltaDist    = ( ( scaledAdd + 1 * m_DistStepAdd ) * ( qIdx + 1 ) + m_DistAdd ) >> m_DistShift;
663
0
    pq_b.absLevel     = ( qIdx + 2 ) >> 1;
664
665
0
    pq_c.deltaDist    = ( ( scaledAdd + 2 * m_DistStepAdd ) * ( qIdx + 2 ) + m_DistAdd ) >> m_DistShift;
666
0
    pq_c.absLevel     = ( qIdx + 3 ) >> 1;
667
668
0
    pq_d.deltaDist    = ( ( scaledAdd + 3 * m_DistStepAdd ) * ( qIdx + 3 ) + m_DistAdd ) >> m_DistShift;
669
0
    pq_d.absLevel     = ( qIdx + 4 ) >> 1;
670
671
0
    return false;
672
0
  }
673
674
  const int32_t g_goRiceBits[4][RICEMAX] =
675
  {
676
    { 32768,  65536,  98304, 131072, 163840, 196608, 262144, 262144, 327680, 327680, 327680, 327680, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752},
677
    { 65536,  65536,  98304,  98304, 131072, 131072, 163840, 163840, 196608, 196608, 229376, 229376, 294912, 294912, 294912, 294912, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 425984, 425984, 425984, 425984, 425984, 425984, 425984, 425984},
678
    { 98304,  98304,  98304,  98304, 131072, 131072, 131072, 131072, 163840, 163840, 163840, 163840, 196608, 196608, 196608, 196608, 229376, 229376, 229376, 229376, 262144, 262144, 262144, 262144, 327680, 327680, 327680, 327680, 327680, 327680, 327680, 327680},
679
    {131072, 131072, 131072, 131072, 131072, 131072, 131072, 131072, 163840, 163840, 163840, 163840, 163840, 163840, 163840, 163840, 196608, 196608, 196608, 196608, 196608, 196608, 196608, 196608, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376}
680
  };
681
682
  static inline void initStates( const int stateId, DQIntern::StateMem& state )
683
5.74M
  {
684
5.74M
    state.rdCost[stateId]         = DQIntern::rdCostInit;
685
5.74M
    state.ctx.cff[stateId]        =  0;
686
5.74M
    state.ctx.sig[stateId]        =  0;
687
5.74M
    state.numSig[stateId]         =  0;
688
5.74M
    state.refSbbCtxId[stateId]    = -1;
689
5.74M
    state.remRegBins[stateId]     =  4;
690
5.74M
    state.cffBitsCtxOffset        =  0;
691
5.74M
    state.m_goRicePar[stateId]    =  0;
692
5.74M
    state.m_goRiceZero[stateId]   =  0;
693
5.74M
    state.sbbBits0[stateId]       =  0;
694
5.74M
    state.sbbBits1[stateId]       =  0;
695
5.74M
  }
696
697
  template<bool rrgEnsured = false>
698
  static inline void checkRdCosts( const int stateId, const DQIntern::ScanPosType spt, const DQIntern::PQData& pqDataA, const DQIntern::PQData& pqDataB, DQIntern::Decisions& decisions, int idxAZ, int idxB, const DQIntern::StateMem& state )
699
23.0M
  {
700
23.0M
    const int32_t* goRiceTab = DQIntern::g_goRiceBits[state.m_goRicePar[stateId]];
701
23.0M
    int64_t         rdCostA = state.rdCost[stateId] + pqDataA.deltaDist;
702
23.0M
    int64_t         rdCostB = state.rdCost[stateId] + pqDataB.deltaDist;
703
23.0M
    int64_t         rdCostZ = state.rdCost[stateId];
704
705
23.0M
    if( rrgEnsured || state.remRegBins[stateId] >= 4 )
706
22.5M
    {
707
22.5M
      const CoeffFracBits& cffBits = state.m_gtxFracBitsArray[state.ctx.cff[stateId]];
708
22.5M
      const BinFracBits    sigBits = state.m_sigFracBitsArray[stateId][state.ctx.sig[stateId]];
709
710
22.5M
      if( pqDataA.absLevel < 4 )
711
5.45M
        rdCostA += cffBits.bits[pqDataA.absLevel];
712
17.1M
      else
713
17.1M
      {
714
17.1M
        const unsigned value = ( pqDataA.absLevel - 4 ) >> 1;
715
17.1M
        rdCostA += cffBits.bits[pqDataA.absLevel - ( value << 1 )] + goRiceTab[std::min<unsigned>( value, RICEMAX - 1 )];
716
17.1M
      }
717
718
22.5M
      if( pqDataB.absLevel < 4 )
719
6.80M
        rdCostB += cffBits.bits[pqDataB.absLevel];
720
15.7M
      else
721
15.7M
      {
722
15.7M
        const unsigned value = ( pqDataB.absLevel - 4 ) >> 1;
723
15.7M
        rdCostB += cffBits.bits[pqDataB.absLevel - ( value << 1 )] + goRiceTab[std::min<unsigned>( value, RICEMAX - 1 )];
724
15.7M
      }
725
726
22.5M
      if( spt == SCAN_ISCSBB )
727
22.5M
      {
728
22.5M
        rdCostA += sigBits.intBits[1];
729
22.5M
        rdCostB += sigBits.intBits[1];
730
22.5M
        rdCostZ += sigBits.intBits[0];
731
22.5M
      }
732
67.6k
      else if( spt == SCAN_SOCSBB )
733
11.9k
      {
734
11.9k
        rdCostA += state.sbbBits1[stateId] + sigBits.intBits[1];
735
11.9k
        rdCostB += state.sbbBits1[stateId] + sigBits.intBits[1];
736
11.9k
        rdCostZ += state.sbbBits1[stateId] + sigBits.intBits[0];
737
11.9k
      }
738
55.6k
      else if( state.numSig[stateId] )
739
54.5k
      {
740
54.5k
        rdCostA += sigBits.intBits[1];
741
54.5k
        rdCostB += sigBits.intBits[1];
742
54.5k
        rdCostZ += sigBits.intBits[0];
743
54.5k
      }
744
1.13k
      else
745
1.13k
      {
746
1.13k
        rdCostZ = rdCostInit;
747
1.13k
      }
748
22.5M
    }
749
411k
    else
750
411k
    {
751
411k
      rdCostA += ( 1 << SCALE_BITS ) + goRiceTab[pqDataA.absLevel <= state.m_goRiceZero[stateId] ? pqDataA.absLevel - 1 : std::min<int>( pqDataA.absLevel, RICEMAX - 1 )];
752
411k
      rdCostB += ( 1 << SCALE_BITS ) + goRiceTab[pqDataB.absLevel <= state.m_goRiceZero[stateId] ? pqDataB.absLevel - 1 : std::min<int>( pqDataB.absLevel, RICEMAX - 1 )];
753
411k
      rdCostZ += goRiceTab[state.m_goRiceZero[stateId]];
754
411k
    }
755
756
23.0M
    if( rdCostA < rdCostZ && rdCostA < decisions.rdCost[idxAZ] )
757
14.3M
    {
758
14.3M
      decisions.rdCost[idxAZ] = rdCostA;
759
14.3M
      decisions.absLevel[idxAZ] = pqDataA.absLevel;
760
14.3M
      decisions.prevId[idxAZ] = stateId;
761
14.3M
    }
762
8.62M
    else if( rdCostZ < decisions.rdCost[idxAZ] )
763
294k
    {
764
294k
      decisions.rdCost[idxAZ] = rdCostZ;
765
294k
      decisions.absLevel[idxAZ] = 0;
766
294k
      decisions.prevId[idxAZ] = stateId;
767
294k
    }
768
769
23.0M
    if( rdCostB < decisions.rdCost[idxB] )
770
14.7M
    {
771
14.7M
      decisions.rdCost[idxB] = rdCostB;
772
14.7M
      decisions.absLevel[idxB] = pqDataB.absLevel;
773
14.7M
      decisions.prevId[idxB] = stateId;
774
14.7M
    }
775
23.0M
  }
DepQuant.cpp:void vvenc::DQIntern::checkRdCosts<true>(int, vvenc::DQIntern::ScanPosType, vvenc::DQIntern::PQData const&, vvenc::DQIntern::PQData const&, vvenc::DQIntern::Decisions&, int, int, vvenc::DQIntern::StateMem const&)
Line
Count
Source
699
4.10M
  {
700
4.10M
    const int32_t* goRiceTab = DQIntern::g_goRiceBits[state.m_goRicePar[stateId]];
701
4.10M
    int64_t         rdCostA = state.rdCost[stateId] + pqDataA.deltaDist;
702
4.10M
    int64_t         rdCostB = state.rdCost[stateId] + pqDataB.deltaDist;
703
4.10M
    int64_t         rdCostZ = state.rdCost[stateId];
704
705
4.10M
    if( rrgEnsured || state.remRegBins[stateId] >= 4 )
706
4.10M
    {
707
4.10M
      const CoeffFracBits& cffBits = state.m_gtxFracBitsArray[state.ctx.cff[stateId]];
708
4.10M
      const BinFracBits    sigBits = state.m_sigFracBitsArray[stateId][state.ctx.sig[stateId]];
709
710
4.10M
      if( pqDataA.absLevel < 4 )
711
4.10M
        rdCostA += cffBits.bits[pqDataA.absLevel];
712
0
      else
713
0
      {
714
0
        const unsigned value = ( pqDataA.absLevel - 4 ) >> 1;
715
0
        rdCostA += cffBits.bits[pqDataA.absLevel - ( value << 1 )] + goRiceTab[std::min<unsigned>( value, RICEMAX - 1 )];
716
0
      }
717
718
4.10M
      if( pqDataB.absLevel < 4 )
719
4.10M
        rdCostB += cffBits.bits[pqDataB.absLevel];
720
0
      else
721
0
      {
722
0
        const unsigned value = ( pqDataB.absLevel - 4 ) >> 1;
723
0
        rdCostB += cffBits.bits[pqDataB.absLevel - ( value << 1 )] + goRiceTab[std::min<unsigned>( value, RICEMAX - 1 )];
724
0
      }
725
726
4.10M
      if( spt == SCAN_ISCSBB )
727
4.08M
      {
728
4.08M
        rdCostA += sigBits.intBits[1];
729
4.08M
        rdCostB += sigBits.intBits[1];
730
4.08M
        rdCostZ += sigBits.intBits[0];
731
4.08M
      }
732
26.6k
      else if( spt == SCAN_SOCSBB )
733
9.90k
      {
734
9.90k
        rdCostA += state.sbbBits1[stateId] + sigBits.intBits[1];
735
9.90k
        rdCostB += state.sbbBits1[stateId] + sigBits.intBits[1];
736
9.90k
        rdCostZ += state.sbbBits1[stateId] + sigBits.intBits[0];
737
9.90k
      }
738
16.7k
      else if( state.numSig[stateId] )
739
15.9k
      {
740
15.9k
        rdCostA += sigBits.intBits[1];
741
15.9k
        rdCostB += sigBits.intBits[1];
742
15.9k
        rdCostZ += sigBits.intBits[0];
743
15.9k
      }
744
797
      else
745
797
      {
746
797
        rdCostZ = rdCostInit;
747
797
      }
748
4.10M
    }
749
0
    else
750
0
    {
751
0
      rdCostA += ( 1 << SCALE_BITS ) + goRiceTab[pqDataA.absLevel <= state.m_goRiceZero[stateId] ? pqDataA.absLevel - 1 : std::min<int>( pqDataA.absLevel, RICEMAX - 1 )];
752
0
      rdCostB += ( 1 << SCALE_BITS ) + goRiceTab[pqDataB.absLevel <= state.m_goRiceZero[stateId] ? pqDataB.absLevel - 1 : std::min<int>( pqDataB.absLevel, RICEMAX - 1 )];
753
0
      rdCostZ += goRiceTab[state.m_goRiceZero[stateId]];
754
0
    }
755
756
4.10M
    if( rdCostA < rdCostZ && rdCostA < decisions.rdCost[idxAZ] )
757
2.69M
    {
758
2.69M
      decisions.rdCost[idxAZ] = rdCostA;
759
2.69M
      decisions.absLevel[idxAZ] = pqDataA.absLevel;
760
2.69M
      decisions.prevId[idxAZ] = stateId;
761
2.69M
    }
762
1.40M
    else if( rdCostZ < decisions.rdCost[idxAZ] )
763
294k
    {
764
294k
      decisions.rdCost[idxAZ] = rdCostZ;
765
294k
      decisions.absLevel[idxAZ] = 0;
766
294k
      decisions.prevId[idxAZ] = stateId;
767
294k
    }
768
769
4.10M
    if( rdCostB < decisions.rdCost[idxB] )
770
2.99M
    {
771
2.99M
      decisions.rdCost[idxB] = rdCostB;
772
2.99M
      decisions.absLevel[idxB] = pqDataB.absLevel;
773
2.99M
      decisions.prevId[idxB] = stateId;
774
2.99M
    }
775
4.10M
  }
DepQuant.cpp:void vvenc::DQIntern::checkRdCosts<false>(int, vvenc::DQIntern::ScanPosType, vvenc::DQIntern::PQData const&, vvenc::DQIntern::PQData const&, vvenc::DQIntern::Decisions&, int, int, vvenc::DQIntern::StateMem const&)
Line
Count
Source
699
18.8M
  {
700
18.8M
    const int32_t* goRiceTab = DQIntern::g_goRiceBits[state.m_goRicePar[stateId]];
701
18.8M
    int64_t         rdCostA = state.rdCost[stateId] + pqDataA.deltaDist;
702
18.8M
    int64_t         rdCostB = state.rdCost[stateId] + pqDataB.deltaDist;
703
18.8M
    int64_t         rdCostZ = state.rdCost[stateId];
704
705
18.8M
    if( rrgEnsured || state.remRegBins[stateId] >= 4 )
706
18.4M
    {
707
18.4M
      const CoeffFracBits& cffBits = state.m_gtxFracBitsArray[state.ctx.cff[stateId]];
708
18.4M
      const BinFracBits    sigBits = state.m_sigFracBitsArray[stateId][state.ctx.sig[stateId]];
709
710
18.4M
      if( pqDataA.absLevel < 4 )
711
1.34M
        rdCostA += cffBits.bits[pqDataA.absLevel];
712
17.1M
      else
713
17.1M
      {
714
17.1M
        const unsigned value = ( pqDataA.absLevel - 4 ) >> 1;
715
17.1M
        rdCostA += cffBits.bits[pqDataA.absLevel - ( value << 1 )] + goRiceTab[std::min<unsigned>( value, RICEMAX - 1 )];
716
17.1M
      }
717
718
18.4M
      if( pqDataB.absLevel < 4 )
719
2.69M
        rdCostB += cffBits.bits[pqDataB.absLevel];
720
15.7M
      else
721
15.7M
      {
722
15.7M
        const unsigned value = ( pqDataB.absLevel - 4 ) >> 1;
723
15.7M
        rdCostB += cffBits.bits[pqDataB.absLevel - ( value << 1 )] + goRiceTab[std::min<unsigned>( value, RICEMAX - 1 )];
724
15.7M
      }
725
726
18.4M
      if( spt == SCAN_ISCSBB )
727
18.4M
      {
728
18.4M
        rdCostA += sigBits.intBits[1];
729
18.4M
        rdCostB += sigBits.intBits[1];
730
18.4M
        rdCostZ += sigBits.intBits[0];
731
18.4M
      }
732
41.0k
      else if( spt == SCAN_SOCSBB )
733
2.09k
      {
734
2.09k
        rdCostA += state.sbbBits1[stateId] + sigBits.intBits[1];
735
2.09k
        rdCostB += state.sbbBits1[stateId] + sigBits.intBits[1];
736
2.09k
        rdCostZ += state.sbbBits1[stateId] + sigBits.intBits[0];
737
2.09k
      }
738
38.9k
      else if( state.numSig[stateId] )
739
38.6k
      {
740
38.6k
        rdCostA += sigBits.intBits[1];
741
38.6k
        rdCostB += sigBits.intBits[1];
742
38.6k
        rdCostZ += sigBits.intBits[0];
743
38.6k
      }
744
336
      else
745
336
      {
746
336
        rdCostZ = rdCostInit;
747
336
      }
748
18.4M
    }
749
411k
    else
750
411k
    {
751
411k
      rdCostA += ( 1 << SCALE_BITS ) + goRiceTab[pqDataA.absLevel <= state.m_goRiceZero[stateId] ? pqDataA.absLevel - 1 : std::min<int>( pqDataA.absLevel, RICEMAX - 1 )];
752
411k
      rdCostB += ( 1 << SCALE_BITS ) + goRiceTab[pqDataB.absLevel <= state.m_goRiceZero[stateId] ? pqDataB.absLevel - 1 : std::min<int>( pqDataB.absLevel, RICEMAX - 1 )];
753
411k
      rdCostZ += goRiceTab[state.m_goRiceZero[stateId]];
754
411k
    }
755
756
18.8M
    if( rdCostA < rdCostZ && rdCostA < decisions.rdCost[idxAZ] )
757
11.6M
    {
758
11.6M
      decisions.rdCost[idxAZ] = rdCostA;
759
11.6M
      decisions.absLevel[idxAZ] = pqDataA.absLevel;
760
11.6M
      decisions.prevId[idxAZ] = stateId;
761
11.6M
    }
762
7.22M
    else if( rdCostZ < decisions.rdCost[idxAZ] )
763
436
    {
764
436
      decisions.rdCost[idxAZ] = rdCostZ;
765
436
      decisions.absLevel[idxAZ] = 0;
766
436
      decisions.prevId[idxAZ] = stateId;
767
436
    }
768
769
18.8M
    if( rdCostB < decisions.rdCost[idxB] )
770
11.7M
    {
771
11.7M
      decisions.rdCost[idxB] = rdCostB;
772
11.7M
      decisions.absLevel[idxB] = pqDataB.absLevel;
773
11.7M
      decisions.prevId[idxB] = stateId;
774
11.7M
    }
775
18.8M
  }
776
777
  void checkAllRdCosts( const DQIntern::ScanPosType spt, const DQIntern::PQData* pqData, DQIntern::Decisions& decisions, const DQIntern::StateMem& state )
778
1.02M
  {
779
1.02M
    checkRdCosts<true>( 0, spt, pqData[0], pqData[2], decisions, 0, 2, state );
780
1.02M
    checkRdCosts<true>( 1, spt, pqData[0], pqData[2], decisions, 2, 0, state );
781
1.02M
    checkRdCosts<true>( 2, spt, pqData[3], pqData[1], decisions, 1, 3, state );
782
1.02M
    checkRdCosts<true>( 3, spt, pqData[3], pqData[1], decisions, 3, 1, state );
783
1.02M
  }
784
785
  template<bool rrgEnsured = false>
786
  static void checkRdCostsOdd1( const int stateId, const ScanPosType spt, const int64_t deltaDist, Decisions& decisions, int idxA, int idxZ, const StateMem& state )
787
4.59M
  {
788
4.59M
    int64_t         rdCostA = state.rdCost[stateId] + deltaDist;
789
4.59M
    int64_t         rdCostZ = state.rdCost[stateId];
790
791
4.59M
    if( rrgEnsured || state.remRegBins[stateId] >= 4 )
792
4.59M
    {
793
4.59M
      const BinFracBits sigBits = state.m_sigFracBitsArray[stateId][state.ctx.sig[stateId]];
794
795
4.59M
      rdCostA += state.cffBits1[state.ctx.cff[stateId]];
796
797
4.59M
      if( spt == SCAN_ISCSBB )
798
4.48M
      {
799
4.48M
        rdCostA += sigBits.intBits[1];
800
4.48M
        rdCostZ += sigBits.intBits[0];
801
4.48M
      }
802
115k
      else if( spt == SCAN_SOCSBB )
803
75.2k
      {
804
75.2k
        rdCostA += state.sbbBits1[stateId] + sigBits.intBits[1];
805
75.2k
        rdCostZ += state.sbbBits1[stateId] + sigBits.intBits[0];
806
75.2k
      }
807
39.9k
      else if( state.numSig[stateId] )
808
7.99k
      {
809
7.99k
        rdCostA += sigBits.intBits[1];
810
7.99k
        rdCostZ += sigBits.intBits[0];
811
7.99k
      }
812
32.0k
      else
813
32.0k
      {
814
32.0k
        rdCostZ = rdCostInit;
815
32.0k
      }
816
4.59M
    }
817
1.68k
    else
818
1.68k
    {
819
1.68k
      const int32_t* goRiceTab = g_goRiceBits[state.m_goRicePar[stateId]];
820
821
1.68k
      rdCostA += ( 1 << SCALE_BITS ) + goRiceTab[0];
822
1.68k
      rdCostZ += goRiceTab[state.m_goRiceZero[stateId]];
823
1.68k
    }
824
825
4.59M
    if( rdCostA < decisions.rdCost[idxA] )
826
2.74M
    {
827
2.74M
      decisions.rdCost[idxA] = rdCostA;
828
2.74M
      decisions.absLevel[idxA] = 1;
829
2.74M
      decisions.prevId[idxA] = stateId;
830
2.74M
    }
831
832
4.59M
    if( rdCostZ < decisions.rdCost[idxZ] )
833
3.25M
    {
834
3.25M
      decisions.rdCost[idxZ] = rdCostZ;
835
3.25M
      decisions.absLevel[idxZ] = 0;
836
3.25M
      decisions.prevId[idxZ] = stateId;
837
3.25M
    }
838
4.59M
  }
DepQuant.cpp:void vvenc::DQIntern::checkRdCostsOdd1<true>(int, vvenc::DQIntern::ScanPosType, long, vvenc::DQIntern::Decisions&, int, int, vvenc::DQIntern::StateMem const&)
Line
Count
Source
787
4.29M
  {
788
4.29M
    int64_t         rdCostA = state.rdCost[stateId] + deltaDist;
789
4.29M
    int64_t         rdCostZ = state.rdCost[stateId];
790
791
4.29M
    if( rrgEnsured || state.remRegBins[stateId] >= 4 )
792
4.29M
    {
793
4.29M
      const BinFracBits sigBits = state.m_sigFracBitsArray[stateId][state.ctx.sig[stateId]];
794
795
4.29M
      rdCostA += state.cffBits1[state.ctx.cff[stateId]];
796
797
4.29M
      if( spt == SCAN_ISCSBB )
798
4.17M
      {
799
4.17M
        rdCostA += sigBits.intBits[1];
800
4.17M
        rdCostZ += sigBits.intBits[0];
801
4.17M
      }
802
114k
      else if( spt == SCAN_SOCSBB )
803
75.1k
      {
804
75.1k
        rdCostA += state.sbbBits1[stateId] + sigBits.intBits[1];
805
75.1k
        rdCostZ += state.sbbBits1[stateId] + sigBits.intBits[0];
806
75.1k
      }
807
39.5k
      else if( state.numSig[stateId] )
808
7.99k
      {
809
7.99k
        rdCostA += sigBits.intBits[1];
810
7.99k
        rdCostZ += sigBits.intBits[0];
811
7.99k
      }
812
31.5k
      else
813
31.5k
      {
814
31.5k
        rdCostZ = rdCostInit;
815
31.5k
      }
816
4.29M
    }
817
0
    else
818
0
    {
819
0
      const int32_t* goRiceTab = g_goRiceBits[state.m_goRicePar[stateId]];
820
821
0
      rdCostA += ( 1 << SCALE_BITS ) + goRiceTab[0];
822
0
      rdCostZ += goRiceTab[state.m_goRiceZero[stateId]];
823
0
    }
824
825
4.29M
    if( rdCostA < decisions.rdCost[idxA] )
826
2.74M
    {
827
2.74M
      decisions.rdCost[idxA] = rdCostA;
828
2.74M
      decisions.absLevel[idxA] = 1;
829
2.74M
      decisions.prevId[idxA] = stateId;
830
2.74M
    }
831
832
4.29M
    if( rdCostZ < decisions.rdCost[idxZ] )
833
3.25M
    {
834
3.25M
      decisions.rdCost[idxZ] = rdCostZ;
835
3.25M
      decisions.absLevel[idxZ] = 0;
836
3.25M
      decisions.prevId[idxZ] = stateId;
837
3.25M
    }
838
4.29M
  }
DepQuant.cpp:void vvenc::DQIntern::checkRdCostsOdd1<false>(int, vvenc::DQIntern::ScanPosType, long, vvenc::DQIntern::Decisions&, int, int, vvenc::DQIntern::StateMem const&)
Line
Count
Source
787
306k
  {
788
306k
    int64_t         rdCostA = state.rdCost[stateId] + deltaDist;
789
306k
    int64_t         rdCostZ = state.rdCost[stateId];
790
791
306k
    if( rrgEnsured || state.remRegBins[stateId] >= 4 )
792
305k
    {
793
305k
      const BinFracBits sigBits = state.m_sigFracBitsArray[stateId][state.ctx.sig[stateId]];
794
795
305k
      rdCostA += state.cffBits1[state.ctx.cff[stateId]];
796
797
305k
      if( spt == SCAN_ISCSBB )
798
304k
      {
799
304k
        rdCostA += sigBits.intBits[1];
800
304k
        rdCostZ += sigBits.intBits[0];
801
304k
      }
802
606
      else if( spt == SCAN_SOCSBB )
803
174
      {
804
174
        rdCostA += state.sbbBits1[stateId] + sigBits.intBits[1];
805
174
        rdCostZ += state.sbbBits1[stateId] + sigBits.intBits[0];
806
174
      }
807
432
      else if( state.numSig[stateId] )
808
0
      {
809
0
        rdCostA += sigBits.intBits[1];
810
0
        rdCostZ += sigBits.intBits[0];
811
0
      }
812
432
      else
813
432
      {
814
432
        rdCostZ = rdCostInit;
815
432
      }
816
305k
    }
817
1.68k
    else
818
1.68k
    {
819
1.68k
      const int32_t* goRiceTab = g_goRiceBits[state.m_goRicePar[stateId]];
820
821
1.68k
      rdCostA += ( 1 << SCALE_BITS ) + goRiceTab[0];
822
1.68k
      rdCostZ += goRiceTab[state.m_goRiceZero[stateId]];
823
1.68k
    }
824
825
306k
    if( rdCostA < decisions.rdCost[idxA] )
826
1.25k
    {
827
1.25k
      decisions.rdCost[idxA] = rdCostA;
828
1.25k
      decisions.absLevel[idxA] = 1;
829
1.25k
      decisions.prevId[idxA] = stateId;
830
1.25k
    }
831
832
306k
    if( rdCostZ < decisions.rdCost[idxZ] )
833
1.92k
    {
834
1.92k
      decisions.rdCost[idxZ] = rdCostZ;
835
1.92k
      decisions.absLevel[idxZ] = 0;
836
1.92k
      decisions.prevId[idxZ] = stateId;
837
1.92k
    }
838
306k
  }
839
840
  static void checkAllRdCostsOdd1( const DQIntern::ScanPosType spt, const int64_t pq_a_dist, const int64_t pq_b_dist, DQIntern::Decisions& decisions, const DQIntern::StateMem& state )
841
1.07M
  {
842
1.07M
    checkRdCostsOdd1<true>( 0, spt, pq_b_dist, decisions, 2, 0, state );
843
1.07M
    checkRdCostsOdd1<true>( 1, spt, pq_b_dist, decisions, 0, 2, state );
844
1.07M
    checkRdCostsOdd1<true>( 2, spt, pq_a_dist, decisions, 3, 1, state );
845
1.07M
    checkRdCostsOdd1<true>( 3, spt, pq_a_dist, decisions, 1, 3, state );
846
1.07M
  }
847
848
  static inline void checkRdCostStart( int32_t lastOffset, const PQData& pqData, Decisions& decisions, int idx, const StateMem& state )
849
12.6M
  {
850
12.6M
    const CoeffFracBits& cffBits = state.m_gtxFracBitsArray[0];
851
852
12.6M
    int64_t rdCost = pqData.deltaDist + lastOffset;
853
12.6M
    if( pqData.absLevel < 4 )
854
4.41M
    {
855
4.41M
      rdCost += cffBits.bits[pqData.absLevel];
856
4.41M
    }
857
8.23M
    else
858
8.23M
    {
859
8.23M
      const unsigned value = ( pqData.absLevel - 4 ) >> 1;
860
8.23M
      rdCost += cffBits.bits[pqData.absLevel - ( value << 1 )] + g_goRiceBits[0][value < RICEMAX ? value : RICEMAX - 1];
861
8.23M
    }
862
863
12.6M
    if( rdCost < decisions.rdCost[idx] )
864
1.66M
    {
865
1.66M
      decisions.rdCost[idx]   = rdCost;
866
1.66M
      decisions.absLevel[idx] = pqData.absLevel;
867
1.66M
      decisions.prevId[idx]   = -1;
868
1.66M
    }
869
12.6M
  }
870
871
  static inline void checkRdCostSkipSbb( const int stateId, Decisions& decisions, int idx, const StateMem& state )
872
108k
  {
873
108k
    int64_t rdCost = state.rdCost[stateId] + state.sbbBits0[stateId];
874
108k
    if( rdCost < decisions.rdCost[idx] )
875
37.1k
    {
876
37.1k
      decisions.rdCost[idx]   = rdCost;
877
37.1k
      decisions.absLevel[idx] = 0;
878
37.1k
      decisions.prevId[idx]   = 4 | stateId;
879
37.1k
    }
880
108k
  }
881
882
  static inline void checkRdCostSkipSbbZeroOut( const int stateId, Decisions& decisions, int idx, const StateMem& state )
883
0
  {
884
0
    int64_t rdCost          = state.rdCost[stateId] + state.sbbBits0[stateId];
885
0
    decisions.rdCost[idx]   = rdCost;
886
0
    decisions.absLevel[idx] = 0;
887
0
    decisions.prevId[idx]   = 4 | stateId;
888
0
  }
889
890
  static inline void setRiceParam( const int stateId, const ScanInfo& scanInfo, StateMem& state, bool ge4 )
891
18.7M
  {
892
18.7M
    if( state.remRegBins[stateId] < 4 || ge4 )
893
17.5M
    {
894
17.5M
      TCoeff  sumAbs = state.sum1st[scanInfo.insidePos][stateId];
895
17.5M
      int sumSub     = state.remRegBins[stateId] < 4 ? 0 : 4 * 5;
896
17.5M
      int sumAll     = std::max( std::min( 31, ( int ) sumAbs - sumSub ), 0 );
897
17.5M
      state.m_goRicePar[stateId]
898
17.5M
                     = g_auiGoRiceParsCoeff[sumAll];
899
900
17.5M
      if( state.remRegBins[stateId] < 4 )
901
413k
      {
902
413k
        state.m_goRiceZero[stateId] = g_auiGoRicePosCoeff0( stateId, state.m_goRicePar[stateId] );
903
413k
      }
904
17.5M
    }
905
18.7M
  }
906
907
  static void update1State( int stateId, const DQIntern::ScanInfo& scanInfo, const DQIntern::Decisions& decisions, DQIntern::StateMem& curr, DQIntern::StateMem& prev )
908
24.6M
  {
909
24.6M
    curr.rdCost[stateId] = decisions.rdCost[stateId];
910
24.6M
    if( decisions.prevId[stateId] > -2 )
911
23.6M
    {
912
23.6M
      if( decisions.prevId[stateId] >= 0 )
913
22.4M
      {
914
22.4M
        const int prevId          = decisions.prevId[stateId];
915
22.4M
        curr.numSig[stateId]      = prev.numSig[prevId] + !!decisions.absLevel[stateId];
916
22.4M
        curr.refSbbCtxId[stateId] = prev.refSbbCtxId[prevId];
917
22.4M
        curr.sbbBits0[stateId]    = prev.sbbBits0[prevId];
918
22.4M
        curr.sbbBits1[stateId]    = prev.sbbBits1[prevId];
919
22.4M
        curr.remRegBins[stateId]  = prev.remRegBins[prevId] - 1;
920
921
22.4M
        if( curr.remRegBins[stateId] >= 4 )
922
22.1M
        {
923
22.1M
          curr.remRegBins[stateId] -= ( decisions.absLevel[stateId] < 2 ? decisions.absLevel[stateId] : 3 );
924
22.1M
        }
925
926
381M
        for( int i = 0; i < 16; i++ )
927
359M
        {
928
359M
          curr.tplAcc[i][stateId] = prev.tplAcc[i][prevId];
929
359M
          curr.sum1st[i][stateId] = prev.sum1st[i][prevId];
930
359M
          curr.absVal[i][stateId] = prev.absVal[i][prevId];
931
359M
        }
932
22.4M
      }
933
1.16M
      else
934
1.16M
      {
935
1.16M
        curr.numSig[stateId]      =  1;
936
1.16M
        curr.refSbbCtxId[stateId] = -1;
937
1.16M
        curr.remRegBins[stateId]  = prev.initRemRegBins;
938
1.16M
        curr.remRegBins[stateId] -= ( decisions.absLevel[stateId] < 2 ? decisions.absLevel[stateId] : 3 );
939
940
19.7M
        for( int i = 0; i < 16; i++ )
941
18.6M
        {
942
18.6M
          curr.tplAcc[i][stateId] = 0;
943
18.6M
          curr.sum1st[i][stateId] = 0;
944
18.6M
          curr.absVal[i][stateId] = 0;
945
18.6M
        }
946
1.16M
      }
947
948
23.6M
      if( decisions.absLevel[stateId] )
949
20.9M
      {
950
20.9M
        curr.absVal[scanInfo.insidePos][stateId] = ( uint8_t ) std::min<TCoeff>( 126 + ( decisions.absLevel[stateId] & 1 ), decisions.absLevel[stateId] );
951
952
20.9M
        if( scanInfo.currNbInfoSbb.numInv )
953
20.9M
        {
954
20.9M
          int min4_or_5 = std::min<TCoeff>( 4 + ( decisions.absLevel[stateId] & 1 ), decisions.absLevel[stateId] );
955
956
20.9M
          auto adds8 = []( uint8_t a, uint8_t b )
957
60.9M
          {
958
60.9M
            uint8_t c = a + b;
959
60.9M
            if( c < a ) c = -1;
960
60.9M
            return c;
961
60.9M
          };
962
963
20.9M
          auto update_deps = [&]( int k )
964
60.9M
          {
965
60.9M
            curr.tplAcc[scanInfo.currNbInfoSbb.invInPos[k]][stateId] += 32 + min4_or_5;
966
60.9M
            curr.sum1st[scanInfo.currNbInfoSbb.invInPos[k]][stateId] = adds8( curr.sum1st[scanInfo.currNbInfoSbb.invInPos[k]][stateId], decisions.absLevel[stateId] );
967
60.9M
          };
968
969
20.9M
          switch( scanInfo.currNbInfoSbb.numInv )
970
20.9M
          {
971
0
          default:
972
3.43M
          case 5:
973
3.43M
            update_deps( 4 );
974
8.68M
          case 4:
975
8.68M
            update_deps( 3 );
976
10.5M
          case 3:
977
10.5M
            update_deps( 2 );
978
17.4M
          case 2:
979
17.4M
            update_deps( 1 );
980
20.9M
          case 1:
981
20.9M
            update_deps( 0 );
982
20.9M
          }
983
20.9M
        }
984
20.9M
      }
985
986
23.6M
      if( curr.remRegBins[stateId] >= 4 )
987
23.2M
      {
988
23.2M
        TCoeff  sumAbs1 = curr.tplAcc[scanInfo.nextInsidePos][stateId] & 31;
989
23.2M
        TCoeff  sumNum  = curr.tplAcc[scanInfo.nextInsidePos][stateId] >> 5u;
990
23.2M
        int sumGt1 = sumAbs1 - sumNum;
991
992
23.2M
        curr.ctx.sig[stateId] = scanInfo.sigCtxOffsetNext + std::min( ( sumAbs1 + 1 ) >> 1, 3 );
993
23.2M
        curr.ctx.cff[stateId] = scanInfo.gtxCtxOffsetNext + std::min( sumGt1, 4 );
994
23.2M
      }
995
400k
      else
996
400k
      {
997
400k
        curr.anyRemRegBinsLt4 = true;
998
400k
      }
999
23.6M
    }
1000
24.6M
  }
1001
1002
  static void update1StateEOS( const int stateId, const DQIntern::ScanInfo& scanInfo, const DQIntern::Decisions& decisions, const DQIntern::StateMem& skip, DQIntern::StateMem& curr, DQIntern::StateMem& prev, DQIntern::CommonCtx& commonCtx )
1003
114k
  {
1004
114k
    curr.rdCost[stateId] = decisions.rdCost[stateId];
1005
1006
114k
    if( decisions.prevId[stateId] > -2 )
1007
113k
    {
1008
113k
      if( decisions.prevId[stateId] >= 4 )
1009
37.1k
      {
1010
37.1k
        CHECK( decisions.absLevel[stateId] != 0, "cannot happen" );
1011
1012
37.1k
        const int prevId          = decisions.prevId[stateId] - 4;
1013
37.1k
        curr.numSig    [stateId]  = 0;
1014
37.1k
        curr.remRegBins[stateId]  = skip.remRegBins[prevId];
1015
37.1k
        curr.refSbbCtxId[stateId] = prevId;
1016
1017
631k
        for( int i = 0; i < 16; i++ )
1018
593k
        {
1019
593k
          curr.absVal[i][stateId] = 0;
1020
593k
        }
1021
37.1k
      }
1022
76.0k
      else if( decisions.prevId[stateId] >= 0 )
1023
74.3k
      {
1024
74.3k
        const int prevId          = decisions.prevId[stateId];
1025
74.3k
        curr.numSig[stateId]      = prev.numSig[prevId] + !!decisions.absLevel[stateId];
1026
74.3k
        curr.refSbbCtxId[stateId] = prev.refSbbCtxId[prevId];
1027
74.3k
        curr.remRegBins[stateId]  = prev.remRegBins[prevId] - 1;
1028
1029
74.3k
        if( curr.remRegBins[stateId] >= 4 )
1030
61.3k
        {
1031
61.3k
          curr.remRegBins[stateId] -= ( decisions.absLevel[stateId] < 2 ? decisions.absLevel[stateId] : 3 );
1032
61.3k
        }
1033
1034
1.26M
        for( int i = 0; i < 16; i++ )
1035
1.18M
        {
1036
1.18M
          curr.absVal[i][stateId] = prev.absVal[i][prevId];
1037
1.18M
        }
1038
74.3k
      }
1039
1.72k
      else
1040
1.72k
      {
1041
1.72k
        curr.numSig[stateId]      =  1;
1042
1.72k
        curr.refSbbCtxId[stateId] = -1;
1043
1.72k
        curr.remRegBins[stateId]  = prev.initRemRegBins;
1044
1.72k
        curr.remRegBins[stateId] -= ( decisions.absLevel[stateId] < 2 ? decisions.absLevel[stateId] : 3 );
1045
1046
29.3k
        for( int i = 0; i < 16; i++ )
1047
27.5k
        {
1048
27.5k
          curr.absVal[i][stateId] = 0;
1049
27.5k
        }
1050
1.72k
      }
1051
1052
113k
      curr.absVal[scanInfo.insidePos][stateId] = ( uint8_t ) std::min<TCoeff>( 126 + ( decisions.absLevel[stateId] & 1 ), decisions.absLevel[stateId] );
1053
1054
113k
      uint8_t* levels[4];
1055
113k
      commonCtx.getLevelPtrs( scanInfo, levels[0], levels[1], levels[2], levels[3] );
1056
1.92M
      for( int i = 0; i < 16; i++ )
1057
1.81M
      {
1058
        // save abs levels to commonCtx
1059
1.81M
        levels[stateId][i] = curr.absVal[i][stateId];
1060
        // clean the SBB ctx
1061
1.81M
        curr.tplAcc[i][stateId] = 0;
1062
1.81M
        curr.sum1st[i][stateId] = 0;
1063
1.81M
        curr.absVal[i][stateId] = 0;
1064
1.81M
      }
1065
1066
113k
      commonCtx.update( scanInfo, curr.refSbbCtxId[stateId], stateId, curr );
1067
1068
113k
      curr.numSig[stateId] = 0;
1069
1070
113k
      if( curr.remRegBins[stateId] >= 4 )
1071
99.9k
      {
1072
99.9k
        TCoeff  sumAbs1 = curr.tplAcc[scanInfo.nextInsidePos][stateId] & 31;
1073
99.9k
        TCoeff  sumNum  = curr.tplAcc[scanInfo.nextInsidePos][stateId] >> 5u;
1074
99.9k
        int sumGt1 = sumAbs1 - sumNum;
1075
1076
99.9k
        curr.ctx.sig[stateId] = scanInfo.sigCtxOffsetNext + std::min( ( sumAbs1 + 1 ) >> 1, 3 );
1077
99.9k
        curr.ctx.cff[stateId] = scanInfo.gtxCtxOffsetNext + std::min( sumGt1, 4 );
1078
99.9k
      }
1079
13.1k
      else
1080
13.1k
      {
1081
13.1k
        curr.anyRemRegBinsLt4 = true;
1082
13.1k
      }
1083
113k
    }
1084
114k
  }
1085
1086
  static void updateStates( const DQIntern::ScanInfo& scanInfo, const DQIntern::Decisions& decisions, DQIntern::StateMem& curr )
1087
6.15M
  {
1088
6.15M
    DQIntern::StateMem prev = curr;
1089
6.15M
    curr.anyRemRegBinsLt4   = false;
1090
1091
6.15M
    update1State( 0, scanInfo, decisions, curr, prev );
1092
6.15M
    update1State( 1, scanInfo, decisions, curr, prev );
1093
6.15M
    update1State( 2, scanInfo, decisions, curr, prev );
1094
6.15M
    update1State( 3, scanInfo, decisions, curr, prev );
1095
1096
6.15M
    curr.cffBitsCtxOffset = scanInfo.gtxCtxOffsetNext;
1097
6.15M
  }
1098
1099
  static void updateStatesEOS( const DQIntern::ScanInfo& scanInfo, const DQIntern::Decisions& decisions, const DQIntern::StateMem& skip, DQIntern::StateMem& curr, DQIntern::CommonCtx& commonCtx )
1100
28.6k
  {
1101
28.6k
    DQIntern::StateMem prev = curr;
1102
28.6k
    curr.anyRemRegBinsLt4   = false;
1103
1104
28.6k
    update1StateEOS( 0, scanInfo, decisions, skip, curr, prev, commonCtx );
1105
28.6k
    update1StateEOS( 1, scanInfo, decisions, skip, curr, prev, commonCtx );
1106
28.6k
    update1StateEOS( 2, scanInfo, decisions, skip, curr, prev, commonCtx );
1107
28.6k
    update1StateEOS( 3, scanInfo, decisions, skip, curr, prev, commonCtx );
1108
1109
28.6k
    curr.cffBitsCtxOffset = scanInfo.gtxCtxOffsetNext;
1110
28.6k
  }
1111
}; // namespace DQIntern
1112
1113
static const DQIntern::Decisions startDec[2] =
1114
{
1115
  DQIntern::Decisions
1116
  {
1117
    { DQIntern::rdCostInit >> 2, DQIntern::rdCostInit >> 2, DQIntern::rdCostInit >> 2, DQIntern::rdCostInit >> 2 },
1118
    { -1, -1, -1, -1 },
1119
    { -2, -2, -2, -2 },
1120
  },
1121
  DQIntern::Decisions
1122
  {
1123
    { DQIntern::rdCostInit >> 2, DQIntern::rdCostInit >> 2, DQIntern::rdCostInit >> 2, DQIntern::rdCostInit >> 2 },
1124
    { 0, 0, 0, 0 },
1125
    { 4, 5, 6, 7 },
1126
  }
1127
};
1128
1129
void DepQuant::xQuantDQ( TransformUnit& tu, const CCoeffBuf& srcCoeff, const ComponentID compID, const QpParam& cQP, const double lambda, const Ctx& ctx, TCoeff& absSum, bool enableScalingLists, int* quantCoeff )
1130
1.72M
{
1131
1.72M
  using namespace DQIntern;
1132
  
1133
  //===== reset / pre-init =====
1134
1.72M
  const TUParameters& tuPars  = *m_scansRom->getTUPars( tu.blocks[compID], compID );
1135
1.72M
  m_quant.initQuantBlock    ( tu, compID, cQP, lambda );
1136
1.72M
  TCoeffSig*    qCoeff      = tu.getCoeffs( compID ).buf;
1137
1.72M
  const TCoeff* tCoeff      = srcCoeff.buf;
1138
1.72M
  const int     numCoeff    = tu.blocks[compID].area();
1139
1.72M
  ::memset( qCoeff, 0x00, numCoeff * sizeof( TCoeffSig ) );
1140
1.72M
  absSum                    = 0;
1141
1142
1.72M
  const CompArea& area      = tu.blocks[ compID ];
1143
1.72M
  const uint32_t  width     = area.width;
1144
1.72M
  const uint32_t  height    = area.height;
1145
1.72M
  const uint32_t  lfnstIdx  = tu.cu->lfnstIdx;
1146
  //===== scaling matrix ====
1147
  //const int         qpDQ = cQP.Qp + 1;
1148
  //const int         qpPer = qpDQ / 6;
1149
  //const int         qpRem = qpDQ - 6 * qpPer;
1150
1151
  //TCoeff thresTmp = thres;
1152
1.72M
  bool zeroOut = false;
1153
1.72M
  bool zeroOutforThres = false;
1154
1.72M
  int effWidth = tuPars.m_width, effHeight = tuPars.m_height;
1155
1.72M
  if( ( tu.mtsIdx[compID] > MTS_SKIP || ( tu.cs->sps->MTS && tu.cu->sbtInfo != 0 && tuPars.m_height <= 32 && tuPars.m_width <= 32 ) ) && compID == COMP_Y )
1156
0
  {
1157
0
    effHeight = ( tuPars.m_height == 32 ) ? 16 : tuPars.m_height;
1158
0
    effWidth  = ( tuPars.m_width  == 32 ) ? 16 : tuPars.m_width;
1159
0
    zeroOut   = ( effHeight < tuPars.m_height || effWidth < tuPars.m_width );
1160
0
  }
1161
1.72M
  zeroOutforThres = zeroOut || ( 32 < tuPars.m_height || 32 < tuPars.m_width );
1162
  //===== find first test position =====
1163
1.72M
  int firstTestPos = std::min<int>( tuPars.m_width, JVET_C0024_ZERO_OUT_TH ) * std::min<int>( tuPars.m_height, JVET_C0024_ZERO_OUT_TH ) - 1;
1164
1.72M
  if( lfnstIdx > 0 && tu.mtsIdx[compID] != MTS_SKIP && width >= 4 && height >= 4 )
1165
1.07M
  {
1166
1.07M
    firstTestPos = ( ( width == 4 && height == 4 ) || ( width == 8 && height == 8 ) )  ? 7 : 15 ;
1167
1.07M
  }
1168
1169
1.72M
  const TCoeff defaultQuantisationCoefficient = (TCoeff)m_quant.getQScale();
1170
1.72M
  const TCoeff thres = m_quant.getLastThreshold();
1171
1.72M
  const int zeroOutWidth  = ( tuPars.m_width  == 32 && zeroOut ) ? 16 : 32;
1172
1.72M
  const int zeroOutHeight = ( tuPars.m_height == 32 && zeroOut ) ? 16 : 32;
1173
1174
1.72M
  if( enableScalingLists )
1175
0
  {
1176
0
    for( ; firstTestPos >= 0; firstTestPos-- )
1177
0
    {
1178
0
      if( zeroOutforThres && ( tuPars.m_scanId2BlkPos[firstTestPos].x >= zeroOutWidth || tuPars.m_scanId2BlkPos[firstTestPos].y >= zeroOutHeight ) ) continue;
1179
1180
0
      const TCoeff thresTmp = TCoeff( thres / ( 4 * quantCoeff[tuPars.m_scanId2BlkPos[firstTestPos].idx] ) );
1181
1182
0
      if( abs( tCoeff[tuPars.m_scanId2BlkPos[firstTestPos].idx] ) > thresTmp ) break;
1183
0
    }
1184
0
  }
1185
1.72M
  else
1186
1.72M
  {
1187
1.72M
    const TCoeff defaultTh = TCoeff( thres / ( defaultQuantisationCoefficient << 2 ) );
1188
1189
1.72M
    m_findFirstPos( firstTestPos, tCoeff, tuPars, defaultTh, zeroOutforThres, zeroOutWidth, zeroOutHeight );
1190
1.72M
  }
1191
1192
1.72M
  if( firstTestPos < 0 )
1193
1.00M
  {
1194
1.00M
    tu.lastPos[compID] = -1;
1195
1.00M
    return;
1196
1.00M
  }
1197
1198
  //===== real init =====
1199
718k
  RateEstimator::initCtx( tuPars, tu, compID, ctx.getFracBitsAcess() );
1200
718k
  m_commonCtx.reset( tuPars, *this );
1201
3.59M
  for( int k = 0; k < 4; k++ )
1202
2.87M
  {
1203
2.87M
    DQIntern::initStates( k, m_state_curr );
1204
2.87M
    DQIntern::initStates( k, m_state_skip );
1205
2.87M
    m_state_curr.m_sigFracBitsArray[k] = RateEstimator::sigFlagBits(k);
1206
2.87M
  }
1207
1208
718k
  m_state_curr.m_gtxFracBitsArray = RateEstimator::gtxFracBits();
1209
  //memset( m_state_curr.tplAcc, 0, sizeof( m_state_curr.tplAcc ) ); // will be set in updateStates{,EOS} before first access
1210
718k
  memset( m_state_curr.sum1st, 0, sizeof( m_state_curr.sum1st ) );   // will be accessed in setRiceParam before updateState{,EOS}
1211
  //memset( m_state_curr.absVal, 0, sizeof( m_state_curr.absVal ) ); // will be set in updateStates{,EOS} before first access
1212
1213
718k
  const int numCtx = isLuma( compID ) ? 21 : 11;
1214
718k
  const CoeffFracBits* const cffBits = gtxFracBits();
1215
8.90M
  for( int i = 0; i < numCtx; i++ )
1216
8.18M
  {
1217
8.18M
    m_state_curr.cffBits1[i] = cffBits[i].bits[1];
1218
8.18M
  }
1219
1220
718k
  int effectWidth  = std::min( 32, effWidth );
1221
718k
  int effectHeight = std::min( 32, effHeight );
1222
718k
  m_state_curr.initRemRegBins   = ( effectWidth * effectHeight * MAX_TU_LEVEL_CTX_CODED_BIN_CONSTRAINT ) / 16;
1223
718k
  m_state_curr.anyRemRegBinsLt4 = true; // for the first coeff use scalar impl., because it check against the init state, which
1224
                                        // prohibits some paths
1225
1226
  //===== populate trellis =====
1227
7.61M
  for( int scanIdx = firstTestPos; scanIdx >= 0; scanIdx-- )
1228
6.90M
  {
1229
6.90M
    const ScanInfo& scanInfo = tuPars.m_scanInfo[ scanIdx ];
1230
6.90M
    if( enableScalingLists )
1231
0
    {
1232
0
      m_quant.initQuantBlock( tu, compID, cQP, lambda, quantCoeff[scanInfo.rasterPos] );
1233
0
      xDecideAndUpdate( abs( tCoeff[scanInfo.rasterPos] ), scanInfo, zeroOut && ( scanInfo.posX >= effWidth || scanInfo.posY >= effHeight ), quantCoeff[scanInfo.rasterPos] );
1234
0
    }
1235
6.90M
    else
1236
6.90M
      xDecideAndUpdate( abs( tCoeff[scanInfo.rasterPos] ), scanInfo, zeroOut && ( scanInfo.posX >= effWidth || scanInfo.posY >= effHeight ), defaultQuantisationCoefficient );
1237
6.90M
  }
1238
1239
  //===== find best path =====
1240
718k
  int       prevId      = -1;
1241
718k
  int64_t   minPathCost =  0;
1242
3.59M
  for( int8_t stateId = 0; stateId < 4; stateId++ )
1243
2.87M
  {
1244
2.87M
    int64_t pathCost = m_trellis[0][0].rdCost[stateId];
1245
2.87M
    if( pathCost < minPathCost )
1246
1.22M
    {
1247
1.22M
      prevId      = stateId;
1248
1.22M
      minPathCost = pathCost;
1249
1.22M
    }
1250
2.87M
  }
1251
1252
  //===== backward scanning =====
1253
718k
  int scanIdx = 0;
1254
7.53M
  for( ; prevId >= 0; scanIdx++ )
1255
6.81M
  {
1256
6.81M
    TCoeffSig absLevel = m_trellis[scanIdx][prevId >> 2].absLevel[prevId & 3];
1257
6.81M
    int32_t blkpos     = tuPars.m_scanId2BlkPos[scanIdx].idx;
1258
6.81M
    qCoeff[ blkpos ]   = TCoeffSig( tCoeff[blkpos] < 0 ? -absLevel : absLevel );
1259
6.81M
    absSum            += absLevel;
1260
6.81M
    prevId             = m_trellis[scanIdx][prevId >> 2].prevId[prevId & 3];
1261
6.81M
  }
1262
1263
718k
  tu.lastPos[compID] = scanIdx - 1;
1264
718k
}
1265
1266
void DepQuant::xDecide( const DQIntern::ScanInfo& scanInfo, const TCoeff absCoeff, const int lastOffset, DQIntern::Decisions& decisions, bool zeroOut, int quantCoeff )
1267
6.90M
{
1268
6.90M
  using namespace DQIntern;
1269
1270
6.90M
  ::memcpy( &decisions, startDec, sizeof( Decisions ) );
1271
1272
6.90M
  StateMem& skip = m_state_skip;
1273
1274
6.90M
  if( zeroOut )
1275
0
  {
1276
0
    if( scanInfo.spt == SCAN_EOCSBB )
1277
0
    {
1278
0
      checkRdCostSkipSbbZeroOut( 0, decisions, 0, skip );
1279
0
      checkRdCostSkipSbbZeroOut( 1, decisions, 1, skip );
1280
0
      checkRdCostSkipSbbZeroOut( 2, decisions, 2, skip );
1281
0
      checkRdCostSkipSbbZeroOut( 3, decisions, 3, skip );
1282
0
    }
1283
0
    return;
1284
0
  }
1285
1286
6.90M
  StateMem& prev = m_state_curr;
1287
1288
  /// start inline prequant
1289
6.90M
  int64_t scaledOrg = int64_t( absCoeff ) * quantCoeff;
1290
6.90M
  TCoeff  qIdx      = TCoeff( ( scaledOrg + m_quant.m_QAdd ) >> m_quant.m_QShift );
1291
1292
6.90M
  if( qIdx < 0 )
1293
1.14M
  {
1294
1.14M
    int64_t scaledAdd = m_quant.m_DistStepAdd - scaledOrg * m_quant.m_DistOrgFact;
1295
1.14M
    int64_t pq_a_dist = ( ( scaledAdd + 0 * m_quant.m_DistStepAdd ) * 1 + m_quant.m_DistAdd ) >> m_quant.m_DistShift;
1296
1.14M
    int64_t pq_b_dist = ( ( scaledAdd + 1 * m_quant.m_DistStepAdd ) * 2 + m_quant.m_DistAdd ) >> m_quant.m_DistShift;
1297
    /// stop inline prequant
1298
1299
1.14M
    if( prev.anyRemRegBinsLt4 )
1300
76.7k
    {
1301
76.7k
      setRiceParam( 0, scanInfo, prev, false );
1302
76.7k
      checkRdCostsOdd1( 0, scanInfo.spt, pq_b_dist, decisions, 2, 0, prev );
1303
1304
76.7k
      setRiceParam( 1, scanInfo, prev, false );
1305
76.7k
      checkRdCostsOdd1( 1, scanInfo.spt, pq_b_dist, decisions, 0, 2, prev );
1306
1307
76.7k
      setRiceParam( 2, scanInfo, prev, false );
1308
76.7k
      checkRdCostsOdd1( 2, scanInfo.spt, pq_a_dist, decisions, 3, 1, prev );
1309
1310
76.7k
      setRiceParam( 3, scanInfo, prev, false );
1311
76.7k
      checkRdCostsOdd1( 3, scanInfo.spt, pq_a_dist, decisions, 1, 3, prev );
1312
76.7k
    }
1313
1.07M
    else
1314
1.07M
    {
1315
      // has to be called as a first check, assumes no decision has been made yet
1316
1.07M
      m_checkAllRdCostsOdd1( scanInfo.spt, pq_a_dist, pq_b_dist, decisions, prev );
1317
1.07M
    }
1318
1319
1.14M
    checkRdCostStart( lastOffset, PQData{ 1, pq_b_dist }, decisions, 2, prev );
1320
1.14M
  }
1321
5.75M
  else
1322
5.75M
  {
1323
    /// start inline prequant
1324
5.75M
    qIdx = std::max<TCoeff>( 1, std::min<TCoeff>( m_quant.m_maxQIdx, qIdx ) );
1325
5.75M
    int64_t scaledAdd = qIdx * m_quant.m_DistStepAdd - scaledOrg * m_quant.m_DistOrgFact;
1326
1327
5.75M
    PQData  pqData[4];
1328
1329
5.75M
    PQData& pq_a = pqData[( qIdx + 0 ) & 3];
1330
5.75M
    PQData& pq_b = pqData[( qIdx + 1 ) & 3];
1331
5.75M
    PQData& pq_c = pqData[( qIdx + 2 ) & 3];
1332
5.75M
    PQData& pq_d = pqData[( qIdx + 3 ) & 3];
1333
1334
5.75M
    pq_a.deltaDist = ( ( scaledAdd + 0 * m_quant.m_DistStepAdd ) * ( qIdx + 0 ) + m_quant.m_DistAdd ) >> m_quant.m_DistShift;
1335
5.75M
    pq_a.absLevel = ( qIdx + 1 ) >> 1;
1336
1337
5.75M
    pq_b.deltaDist = ( ( scaledAdd + 1 * m_quant.m_DistStepAdd ) * ( qIdx + 1 ) + m_quant.m_DistAdd ) >> m_quant.m_DistShift;
1338
5.75M
    pq_b.absLevel = ( qIdx + 2 ) >> 1;
1339
1340
5.75M
    pq_c.deltaDist = ( ( scaledAdd + 2 * m_quant.m_DistStepAdd ) * ( qIdx + 2 ) + m_quant.m_DistAdd ) >> m_quant.m_DistShift;
1341
5.75M
    pq_c.absLevel = ( qIdx + 3 ) >> 1;
1342
1343
5.75M
    pq_d.deltaDist = ( ( scaledAdd + 3 * m_quant.m_DistStepAdd ) * ( qIdx + 3 ) + m_quant.m_DistAdd ) >> m_quant.m_DistShift;
1344
5.75M
    pq_d.absLevel = ( qIdx + 4 ) >> 1;
1345
    /// stop inline prequant
1346
1347
5.75M
    bool cff02ge4 = pqData[0].absLevel >= 4/* || pqData[2].absLevel >= 4 */;
1348
5.75M
    bool cff13ge4 = /* pqData[1].absLevel >= 4 || */ pqData[3].absLevel >= 4;
1349
1350
5.75M
    if( cff02ge4 || cff13ge4 || prev.anyRemRegBinsLt4 )
1351
4.72M
    {
1352
4.72M
      if( prev.anyRemRegBinsLt4 || cff02ge4 )
1353
4.50M
      {
1354
4.50M
        setRiceParam( 0, scanInfo, prev, cff02ge4 );
1355
4.50M
        setRiceParam( 1, scanInfo, prev, cff02ge4 );
1356
4.50M
      }
1357
1358
4.72M
      if( prev.anyRemRegBinsLt4 || cff13ge4 )
1359
4.72M
      {
1360
4.72M
        setRiceParam( 2, scanInfo, prev, cff13ge4 );
1361
4.72M
        setRiceParam( 3, scanInfo, prev, cff13ge4 );
1362
4.72M
      }
1363
1364
4.72M
      checkRdCosts( 0, scanInfo.spt, pqData[0], pqData[2], decisions, 0, 2, prev );
1365
4.72M
      checkRdCosts( 1, scanInfo.spt, pqData[0], pqData[2], decisions, 2, 0, prev );
1366
4.72M
      checkRdCosts( 2, scanInfo.spt, pqData[3], pqData[1], decisions, 1, 3, prev );
1367
4.72M
      checkRdCosts( 3, scanInfo.spt, pqData[3], pqData[1], decisions, 3, 1, prev );
1368
4.72M
    }
1369
1.02M
    else
1370
1.02M
    {
1371
      // has to be called as a first check, assumes no decision has been made yet
1372
1.02M
      m_checkAllRdCosts( scanInfo.spt, pqData, decisions, prev );
1373
1.02M
    }
1374
1375
5.75M
    checkRdCostStart( lastOffset, pqData[0], decisions, 0, prev );
1376
5.75M
    checkRdCostStart( lastOffset, pqData[2], decisions, 2, prev );
1377
5.75M
  }
1378
1379
6.90M
  if( scanInfo.spt == SCAN_EOCSBB )
1380
27.0k
  {
1381
27.0k
    checkRdCostSkipSbb( 0, decisions, 0, skip );
1382
27.0k
    checkRdCostSkipSbb( 1, decisions, 1, skip );
1383
27.0k
    checkRdCostSkipSbb( 2, decisions, 2, skip );
1384
27.0k
    checkRdCostSkipSbb( 3, decisions, 3, skip );
1385
27.0k
  }
1386
6.90M
}
1387
1388
void DepQuant::xDecideAndUpdate( const TCoeff absCoeff, const DQIntern::ScanInfo& scanInfo, bool zeroOut, int quantCoeff )
1389
6.90M
{
1390
6.90M
  using namespace DQIntern;
1391
1392
6.90M
  Decisions* decisions = &m_trellis[scanInfo.scanIdx][0];
1393
1394
6.90M
  xDecide( scanInfo, absCoeff, lastOffset( scanInfo.scanIdx ), *decisions, zeroOut, quantCoeff );
1395
1396
6.90M
  if( scanInfo.scanIdx )
1397
6.18M
  {
1398
6.18M
    if( scanInfo.spt == SCAN_SOCSBB )
1399
23.8k
    {
1400
23.8k
      memcpy( &m_state_skip, &m_state_curr, DQIntern::StateMemSkipCpySize );
1401
23.8k
    }
1402
1403
6.18M
    if( scanInfo.insidePos == 0 )
1404
28.6k
    {
1405
28.6k
      m_commonCtx.swap();
1406
28.6k
      m_updateStatesEOS( scanInfo, *decisions, m_state_skip, m_state_curr, m_commonCtx );
1407
28.6k
      ::memcpy( decisions + 1, decisions, sizeof( Decisions ) );
1408
28.6k
    }
1409
6.15M
    else if( !zeroOut )
1410
6.15M
    {
1411
6.15M
      m_updateStates( scanInfo, *decisions, m_state_curr );
1412
6.15M
    }
1413
6.18M
  }
1414
6.90M
}
1415
1416
void DepQuant::xDequantDQ( const TransformUnit& tu,  CoeffBuf& recCoeff, const ComponentID compID, const QpParam& cQP, bool enableScalingLists, int* piDequantCoef )
1417
708k
{
1418
708k
  m_quant.dequantBlock( tu, compID, cQP, recCoeff, enableScalingLists, piDequantCoef );
1419
708k
}
1420
1421
17.7k
DepQuant::DepQuant( const Quant* other, bool enc, bool useScalingLists, bool enableOpt ) : QuantRDOQ2( other, useScalingLists ), RateEstimator(), m_commonCtx()
1422
17.7k
{
1423
17.7k
  const DepQuant* dq = dynamic_cast<const DepQuant*>( other );
1424
17.7k
  CHECK( other && !dq, "The DepQuant cast must be successfull!" );
1425
1426
17.7k
  if( !dq )
1427
17.7k
  {
1428
17.7k
    m_scansRom = std::make_shared<DQIntern::Rom>();
1429
17.7k
    m_scansRom->init();
1430
17.7k
  }
1431
0
  else
1432
0
  {
1433
0
    m_scansRom = dq->m_scansRom;
1434
0
  }
1435
1436
72.8M
  for( int t = 0; t < ( MAX_TB_SIZEY * MAX_TB_SIZEY ); t++ )
1437
72.8M
  {
1438
72.8M
    memcpy( m_trellis[t], startDec, sizeof( startDec ) );
1439
72.8M
  }
1440
1441
17.7k
  m_checkAllRdCosts     = DQIntern::checkAllRdCosts;
1442
17.7k
  m_checkAllRdCostsOdd1 = DQIntern::checkAllRdCostsOdd1;
1443
17.7k
  m_updateStatesEOS     = DQIntern::updateStatesEOS;
1444
17.7k
  m_updateStates        = DQIntern::updateStates;
1445
17.7k
  m_findFirstPos        = DQIntern::findFirstPos;
1446
1447
17.7k
  if( enableOpt )
1448
17.7k
  {
1449
#if defined( TARGET_SIMD_X86 ) && ENABLE_SIMD_OPT_QUANT
1450
    initDepQuantX86();
1451
#endif
1452
#if defined( TARGET_SIMD_ARM ) && ENABLE_SIMD_OPT_QUANT
1453
    initDepQuantARM();
1454
#endif
1455
17.7k
  }
1456
17.7k
}
1457
1458
DepQuant::~DepQuant()
1459
17.7k
{
1460
17.7k
}
1461
1462
void DepQuant::quant( TransformUnit& tu, const ComponentID compID, const CCoeffBuf& pSrc, TCoeff& uiAbsSum, const QpParam& cQP, const Ctx& ctx )
1463
1.81M
{
1464
1.81M
  if( tu.cs->picture->useSelectiveRdoq && !xNeedRDOQ( tu, compID, pSrc, cQP ) )
1465
0
  {
1466
0
    tu.lastPos[compID] = -1;
1467
0
    uiAbsSum           =  0;
1468
0
  }
1469
1.81M
  else if( tu.cs->slice->depQuantEnabled && tu.mtsIdx[compID] != MTS_SKIP )
1470
1.72M
  {
1471
    //===== scaling matrix ====
1472
1.72M
    const int         qpDQ            = cQP.Qp(tu.mtsIdx[compID]==MTS_SKIP) + 1;
1473
1.72M
    const int         qpPer           = qpDQ / 6;
1474
1.72M
    const int         qpRem           = qpDQ - 6 * qpPer;
1475
1.72M
    const CompArea    &rect           = tu.blocks[compID];
1476
1.72M
    const int         width           = rect.width;
1477
1.72M
    const int         height          = rect.height;
1478
1.72M
    uint32_t          scalingListType = getScalingListType(tu.cu->predMode, compID);
1479
1.72M
    CHECK(scalingListType >= SCALING_LIST_NUM, "Invalid scaling list");
1480
1.72M
    const uint32_t    log2TrWidth     = Log2(width);
1481
1.72M
    const uint32_t    log2TrHeight    = Log2(height);
1482
18.4E
    const bool isLfnstApplied         = tu.cu->lfnstIdx > 0 && (CU::isSepTree(*tu.cu) ? true : isLuma(compID));
1483
1.72M
    const bool enableScalingLists     = getUseScalingList(width, height, (tu.mtsIdx[compID] == MTS_SKIP), isLfnstApplied);
1484
1.72M
    xQuantDQ( tu, pSrc, compID, cQP, Quant::m_dLambda, ctx, uiAbsSum, enableScalingLists, Quant::getQuantCoeff(scalingListType, qpRem, log2TrWidth, log2TrHeight) );
1485
1.72M
  }
1486
94.0k
  else
1487
94.0k
  {
1488
94.0k
    QuantRDOQ2::quant( tu, compID, pSrc, uiAbsSum, cQP, ctx );
1489
94.0k
  }
1490
1.81M
}
1491
1492
void DepQuant::dequant( const TransformUnit& tu, CoeffBuf& dstCoeff, const ComponentID compID, const QpParam& cQP )
1493
752k
{
1494
752k
  if( tu.cs->slice->depQuantEnabled && (tu.mtsIdx[compID] != MTS_SKIP) )
1495
708k
  {
1496
708k
    const int         qpDQ            = cQP.Qp(tu.mtsIdx[compID]==MTS_SKIP) + 1;
1497
708k
    const int         qpPer           = qpDQ / 6;
1498
708k
    const int         qpRem           = qpDQ - 6 * qpPer;
1499
708k
    const CompArea    &rect           = tu.blocks[compID];
1500
708k
    const int         width           = rect.width;
1501
708k
    const int         height          = rect.height;
1502
708k
    uint32_t          scalingListType = getScalingListType(tu.cu->predMode, compID);
1503
708k
    CHECK(scalingListType >= SCALING_LIST_NUM, "Invalid scaling list");
1504
708k
    const uint32_t    log2TrWidth    = Log2(width);
1505
708k
    const uint32_t    log2TrHeight   = Log2(height);
1506
708k
    const bool isLfnstApplied        = tu.cu->lfnstIdx > 0 && (CU::isSepTree(*tu.cu) ? true : isLuma(compID));
1507
708k
    const bool enableScalingLists    = getUseScalingList(width, height, (tu.mtsIdx[compID] == MTS_SKIP), isLfnstApplied);
1508
708k
    xDequantDQ( tu, dstCoeff, compID, cQP, enableScalingLists, Quant::getDequantCoeff(scalingListType, qpRem, log2TrWidth, log2TrHeight) );
1509
708k
  }
1510
44.3k
  else
1511
44.3k
  {
1512
44.3k
    QuantRDOQ::dequant( tu, dstCoeff, compID, cQP );
1513
44.3k
  }
1514
752k
}
1515
1516
void DepQuant::init( int rdoq, bool useRDOQTS, int thrVal )
1517
17.7k
{
1518
17.7k
  QuantRDOQ2::init( rdoq, useRDOQTS, thrVal );
1519
17.7k
  m_quant.init( thrVal );
1520
17.7k
}
1521
1522
} // namespace vvenc
1523
1524
//! \}
1525