Coverage Report

Created: 2026-06-15 06:25

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/vvenc/source/Lib/CommonLib/DepQuant.cpp
Line
Count
Source
1
/* -----------------------------------------------------------------------------
2
The copyright in this software is being made available under the Clear BSD
3
License, included below. No patent rights, trademark rights and/or 
4
other Intellectual Property Rights other than the copyrights concerning 
5
the Software are granted under this license.
6
7
The Clear BSD License
8
9
Copyright (c) 2019-2026, Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. & The VVenC Authors.
10
All rights reserved.
11
12
Redistribution and use in source and binary forms, with or without modification,
13
are permitted (subject to the limitations in the disclaimer below) provided that
14
the following conditions are met:
15
16
     * Redistributions of source code must retain the above copyright notice,
17
     this list of conditions and the following disclaimer.
18
19
     * Redistributions in binary form must reproduce the above copyright
20
     notice, this list of conditions and the following disclaimer in the
21
     documentation and/or other materials provided with the distribution.
22
23
     * Neither the name of the copyright holder nor the names of its
24
     contributors may be used to endorse or promote products derived from this
25
     software without specific prior written permission.
26
27
NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
28
THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
29
CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
31
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
32
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
33
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
34
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
35
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
36
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38
POSSIBILITY OF SUCH DAMAGE.
39
40
41
------------------------------------------------------------------------------------------- */
42
43
#include "DepQuant.h"
44
#include "TrQuant.h"
45
#include "CodingStructure.h"
46
#include "UnitTools.h"
47
48
#include <bitset>
49
50
//! \ingroup CommonLib
51
//! \{
52
53
namespace vvenc {
54
55
56
namespace DQIntern
57
{
58
  static void findFirstPos( int& firstTestPos, const TCoeff* tCoeff, const DQIntern::TUParameters& tuPars, int defaultTh,
59
                            bool zeroOutForThres, int zeroOutWidth, int zeroOutHeight )
60
1.69M
  {
61
189M
    for( ; firstTestPos >= 0; firstTestPos-- )
62
188M
    {
63
188M
      if( zeroOutForThres && ( tuPars.m_scanId2BlkPos[firstTestPos].x >= zeroOutWidth ||
64
23.5M
                              tuPars.m_scanId2BlkPos[firstTestPos].y >= zeroOutHeight ) )
65
0
      {
66
0
        continue;
67
0
      }
68
188M
      if( abs( tCoeff[tuPars.m_scanId2BlkPos[firstTestPos].idx] ) > defaultTh )
69
707k
      {
70
707k
        break;
71
707k
      }
72
188M
    }
73
1.69M
  }
74
75
  void Rom::xInitScanArrays()
76
17.3k
  {
77
17.3k
    if( m_scansInitialized )
78
0
    {
79
0
      return;
80
0
    }
81
17.3k
    ::memset( m_scanId2NbInfoSbbArray, 0, sizeof(m_scanId2NbInfoSbbArray) );
82
17.3k
    ::memset( m_scanId2NbInfoOutArray, 0, sizeof(m_scanId2NbInfoOutArray) );
83
17.3k
    ::memset( m_tuParameters,          0, sizeof(m_tuParameters) );
84
85
17.3k
    uint32_t raster2id[ MAX_CU_SIZE * MAX_CU_SIZE ];
86
17.3k
    ::memset(raster2id, 0, sizeof(raster2id));
87
88
139k
    for( int hd = 0; hd < MAX_TU_SIZE_IDX; hd++ )
89
121k
    {
90
973k
      for( int vd = 0; vd < MAX_TU_SIZE_IDX; vd++ )
91
851k
      {
92
851k
        if( (hd == 0 && vd <= 1) || (hd <= 1 && vd == 0) )
93
52.1k
        {
94
52.1k
          continue;
95
52.1k
        }
96
799k
        const uint32_t      blockWidth    = (1 << hd);
97
799k
        const uint32_t      blockHeight   = (1 << vd);
98
799k
        const uint32_t      log2CGWidth   = g_log2SbbSize[hd][vd][0];
99
799k
        const uint32_t      log2CGHeight  = g_log2SbbSize[hd][vd][1];
100
799k
        const uint32_t      groupWidth    = 1 << log2CGWidth;
101
799k
        const uint32_t      groupHeight   = 1 << log2CGHeight;
102
799k
        const uint32_t      groupSize     = groupWidth * groupHeight;
103
799k
        const SizeType      blkWidthIdx   = Log2( blockWidth );
104
799k
        const SizeType      blkHeightIdx  = Log2( blockHeight );
105
799k
        const ScanElement * scanId2RP     = getScanOrder( SCAN_GROUPED_4x4, blkWidthIdx, blkHeightIdx );
106
799k
        NbInfoSbb*&         sId2NbSbb     = m_scanId2NbInfoSbbArray[hd][vd];
107
799k
        NbInfoOut*&         sId2NbOut     = m_scanId2NbInfoOutArray[hd][vd];
108
        // consider only non-zero-out region
109
799k
        const uint32_t      blkWidthNZOut = std::min<unsigned>( JVET_C0024_ZERO_OUT_TH, blockWidth  );
110
799k
        const uint32_t      blkHeightNZOut= std::min<unsigned>( JVET_C0024_ZERO_OUT_TH, blockHeight );
111
799k
        const uint32_t      totalValues   = blkWidthNZOut * blkHeightNZOut;
112
113
799k
        sId2NbSbb = new NbInfoSbb[ totalValues ];
114
799k
        sId2NbOut = new NbInfoOut[ totalValues ];
115
116
157M
        for( uint32_t scanId = 0; scanId < totalValues; scanId++ )
117
156M
        {
118
156M
          raster2id[scanId2RP[scanId].idx] = scanId;
119
156M
          sId2NbSbb[scanId].numInv = 0;
120
156M
        }
121
122
157M
        for( unsigned scanId = 0; scanId < totalValues; scanId++ )
123
156M
        {
124
156M
          const int posX = scanId2RP[scanId].x;
125
156M
          const int posY = scanId2RP[scanId].y;
126
156M
          const int rpos = scanId2RP[scanId].idx;
127
156M
          {
128
            //===== inside subband neighbours =====
129
156M
            const int      begSbb = scanId - ( scanId & (groupSize-1) ); // first pos in current subblock
130
156M
            int            cpos[5];
131
132
156M
            cpos[0] = ( posX + 1 < blkWidthNZOut                              ? ( raster2id[rpos+1           ] < groupSize + begSbb ? raster2id[rpos+1           ] - begSbb : 0 ) : 0 );
133
156M
            cpos[1] = ( posX + 2 < blkWidthNZOut                              ? ( raster2id[rpos+2           ] < groupSize + begSbb ? raster2id[rpos+2           ] - begSbb : 0 ) : 0 );
134
156M
            cpos[2] = ( posX + 1 < blkWidthNZOut && posY + 1 < blkHeightNZOut ? ( raster2id[rpos+1+blockWidth] < groupSize + begSbb ? raster2id[rpos+1+blockWidth] - begSbb : 0 ) : 0 );
135
156M
            cpos[3] = ( posY + 1 < blkHeightNZOut                             ? ( raster2id[rpos+  blockWidth] < groupSize + begSbb ? raster2id[rpos+  blockWidth] - begSbb : 0 ) : 0 );
136
156M
            cpos[4] = ( posY + 2 < blkHeightNZOut                             ? ( raster2id[rpos+2*blockWidth] < groupSize + begSbb ? raster2id[rpos+2*blockWidth] - begSbb : 0 ) : 0 );
137
138
156M
            int num = 0;
139
156M
            int inPos[5] = { 0, };
140
141
628M
            while( true )
142
628M
            {
143
628M
              int nk = -1;
144
3.77G
              for( int k = 0; k < 5; k++ )
145
3.14G
              {
146
3.14G
                if( cpos[k] != 0 && ( nk < 0 || cpos[k] < cpos[nk] ) )
147
707M
                {
148
707M
                  nk = k;
149
707M
                }
150
3.14G
              }
151
628M
              if( nk < 0 )
152
156M
              {
153
156M
                break;
154
156M
              }
155
472M
              inPos[ num++ ] = uint8_t( cpos[nk] );
156
472M
              cpos[nk] = 0;
157
472M
            }
158
468M
            for( int k = num; k < 5; k++ )
159
311M
            {
160
311M
              inPos[k] = 0;
161
311M
            }
162
628M
            for( int k = 0; k < num; k++ )
163
472M
            {
164
472M
              CHECK( sId2NbSbb[begSbb + inPos[k]].numInv >= 5, "" );
165
472M
              sId2NbSbb[begSbb + inPos[k]].invInPos[sId2NbSbb[begSbb + inPos[k]].numInv++] = scanId & ( groupSize - 1 );
166
472M
            }
167
156M
          }
168
156M
          {
169
            //===== outside subband neighbours =====
170
156M
            NbInfoOut&     nbOut  = sId2NbOut[ scanId ];
171
156M
            const int      begSbb = scanId - ( scanId & (groupSize-1) ); // first pos in current subblock
172
156M
            int            cpos[5];
173
174
156M
            cpos[0] = ( posX + 1 < blkWidthNZOut                              ? ( raster2id[rpos+1           ] >= groupSize + begSbb ? raster2id[rpos+1           ] : 0 ) : 0 );
175
156M
            cpos[1] = ( posX + 2 < blkWidthNZOut                              ? ( raster2id[rpos+2           ] >= groupSize + begSbb ? raster2id[rpos+2           ] : 0 ) : 0 );
176
156M
            cpos[2] = ( posX + 1 < blkWidthNZOut && posY + 1 < blkHeightNZOut ? ( raster2id[rpos+1+blockWidth] >= groupSize + begSbb ? raster2id[rpos+1+blockWidth] : 0 ) : 0 );
177
156M
            cpos[3] = ( posY + 1 < blkHeightNZOut                             ? ( raster2id[rpos+  blockWidth] >= groupSize + begSbb ? raster2id[rpos+  blockWidth] : 0 ) : 0 );
178
156M
            cpos[4] = ( posY + 2 < blkHeightNZOut                             ? ( raster2id[rpos+2*blockWidth] >= groupSize + begSbb ? raster2id[rpos+2*blockWidth] : 0 ) : 0 );
179
180
380M
            for( nbOut.num = 0; true; )
181
380M
            {
182
380M
              int nk = -1;
183
2.28G
              for( int k = 0; k < 5; k++ )
184
1.90G
              {
185
1.90G
                if( cpos[k] != 0 && ( nk < 0 || cpos[k] < cpos[nk] ) )
186
328M
                {
187
328M
                  nk = k;
188
328M
                }
189
1.90G
              }
190
380M
              if( nk < 0 )
191
156M
              {
192
156M
                break;
193
156M
              }
194
223M
              nbOut.outPos[ nbOut.num++ ] = uint16_t( cpos[nk] );
195
223M
              cpos[nk] = 0;
196
223M
            }
197
716M
            for( int k = nbOut.num; k < 5; k++ )
198
560M
            {
199
560M
              nbOut.outPos[k] = 0;
200
560M
            }
201
156M
            nbOut.maxDist = ( scanId == 0 ? 0 : sId2NbOut[scanId-1].maxDist );
202
380M
            for( int k = 0; k < nbOut.num; k++ )
203
223M
            {
204
223M
              if( nbOut.outPos[k] > nbOut.maxDist )
205
24.1M
              {
206
24.1M
                nbOut.maxDist = nbOut.outPos[k];
207
24.1M
              }
208
223M
            }
209
156M
          }
210
156M
        }
211
212
        // make it relative
213
157M
        for( unsigned scanId = 0; scanId < totalValues; scanId++ )
214
156M
        {
215
156M
          NbInfoOut& nbOut  = sId2NbOut[scanId];
216
156M
          const int  begSbb = scanId - ( scanId & (groupSize-1) ); // first pos in current subblock
217
380M
          for( int k = 0; k < nbOut.num; k++ )
218
223M
          {
219
223M
            CHECK(begSbb > nbOut.outPos[k], "Position must be past sub block begin");
220
223M
            nbOut.outPos[k] -= begSbb;
221
223M
          }
222
156M
          nbOut.maxDist -= scanId;
223
156M
        }
224
225
2.39M
        for( int chId = 0; chId < MAX_NUM_CH; chId++ )
226
1.59M
        {
227
1.59M
          m_tuParameters[hd][vd][chId] = new TUParameters( *this, blockWidth, blockHeight, ChannelType(chId) );
228
1.59M
        }
229
799k
      }
230
121k
    }
231
17.3k
    m_scansInitialized = true;
232
17.3k
  }
233
234
  void Rom::xUninitScanArrays()
235
17.3k
  {
236
17.3k
    if( !m_scansInitialized )
237
0
    {
238
0
      return;
239
0
    }
240
139k
    for( int hd = 0; hd < MAX_TU_SIZE_IDX; hd++ )
241
121k
    {
242
973k
      for( int vd = 0; vd < MAX_TU_SIZE_IDX; vd++ )
243
851k
      {
244
851k
        NbInfoSbb*& sId2NbSbb = m_scanId2NbInfoSbbArray[hd][vd];
245
851k
        NbInfoOut*& sId2NbOut = m_scanId2NbInfoOutArray[hd][vd];
246
851k
        if( sId2NbSbb )
247
799k
        {
248
799k
          delete [] sId2NbSbb;
249
799k
        }
250
851k
        if( sId2NbOut )
251
799k
        {
252
799k
          delete [] sId2NbOut;
253
799k
        }
254
2.55M
        for( int chId = 0; chId < MAX_NUM_CH; chId++ )
255
1.70M
        {
256
1.70M
          TUParameters*& tuPars = m_tuParameters[hd][vd][chId];
257
1.70M
          if( tuPars )
258
1.59M
          {
259
1.59M
            delete tuPars;
260
1.59M
          }
261
1.70M
        }
262
851k
      }
263
121k
    }
264
17.3k
    m_scansInitialized = false;
265
17.3k
  }
266
267
268
  TUParameters::TUParameters( const Rom& rom, const unsigned width, const unsigned height, const ChannelType chType )
269
1.59M
  {
270
1.59M
    m_chType              = chType;
271
1.59M
    m_width               = width;
272
1.59M
    m_height              = height;
273
1.59M
    const uint32_t nonzeroWidth  = std::min<uint32_t>(JVET_C0024_ZERO_OUT_TH, m_width);
274
1.59M
    const uint32_t nonzeroHeight = std::min<uint32_t>(JVET_C0024_ZERO_OUT_TH, m_height);
275
1.59M
    m_numCoeff                   = nonzeroWidth * nonzeroHeight;
276
1.59M
    m_log2SbbWidth        = g_log2SbbSize[ Log2(m_width) ][ Log2(m_height) ][0];
277
1.59M
    m_log2SbbHeight       = g_log2SbbSize[ Log2(m_width) ][ Log2(m_height) ][1];
278
1.59M
    m_log2SbbSize         = m_log2SbbWidth + m_log2SbbHeight;
279
1.59M
    m_sbbSize             = ( 1 << m_log2SbbSize );
280
1.59M
    m_sbbMask             = m_sbbSize - 1;
281
1.59M
    m_widthInSbb  = nonzeroWidth >> m_log2SbbWidth;
282
1.59M
    m_heightInSbb = nonzeroHeight >> m_log2SbbHeight;
283
1.59M
    m_numSbb              = m_widthInSbb * m_heightInSbb;
284
1.59M
    SizeType        hsbb  = Log2( m_widthInSbb  );
285
1.59M
    SizeType        vsbb  = Log2( m_heightInSbb );
286
1.59M
    SizeType        hsId  = Log2( m_width  );
287
1.59M
    SizeType        vsId  = Log2( m_height );
288
1.59M
    m_scanSbbId2SbbPos    = getScanOrder( SCAN_UNGROUPED   , hsbb , vsbb );
289
1.59M
    m_scanId2BlkPos       = getScanOrder( SCAN_GROUPED_4x4 , hsId , vsId );
290
1.59M
    int log2W             = Log2( m_width  );
291
1.59M
    int log2H             = Log2( m_height );
292
1.59M
    m_scanId2NbInfoSbb    = rom.getNbInfoSbb( log2W, log2H );
293
1.59M
    m_scanId2NbInfoOut    = rom.getNbInfoOut( log2W, log2H );
294
1.59M
    m_scanInfo            = new ScanInfo[ m_numCoeff ];
295
315M
    for( int scanIdx = 0; scanIdx < m_numCoeff; scanIdx++ )
296
313M
    {
297
313M
      xSetScanInfo( m_scanInfo[scanIdx], scanIdx );
298
313M
    }
299
1.59M
  }
300
301
302
  void TUParameters::xSetScanInfo( ScanInfo& scanInfo, int scanIdx )
303
313M
  {
304
313M
    scanInfo.sbbSize    = m_sbbSize;
305
313M
    scanInfo.numSbb     = m_numSbb;
306
313M
    scanInfo.scanIdx    = scanIdx;
307
313M
    scanInfo.rasterPos  = m_scanId2BlkPos[scanIdx].idx;
308
313M
    scanInfo.sbbPos     = m_scanSbbId2SbbPos[scanIdx >> m_log2SbbSize].idx;
309
313M
    scanInfo.insidePos  = scanIdx & m_sbbMask;
310
313M
    scanInfo.spt        = SCAN_ISCSBB;
311
313M
    if(  scanInfo.insidePos == m_sbbMask && scanIdx > scanInfo.sbbSize && scanIdx < m_numCoeff - 1 )
312
16.9M
      scanInfo.spt      = SCAN_SOCSBB;
313
296M
    else if( scanInfo.insidePos == 0 && scanIdx > 0 && scanIdx < m_numCoeff - m_sbbSize )
314
16.9M
      scanInfo.spt      = SCAN_EOCSBB;
315
313M
    scanInfo.posX = m_scanId2BlkPos[scanIdx].x;
316
313M
    scanInfo.posY = m_scanId2BlkPos[scanIdx].y;
317
313M
    if( scanIdx )
318
311M
    {
319
311M
      const int nextScanIdx = scanIdx - 1;
320
311M
      const int diag        = m_scanId2BlkPos[nextScanIdx].x + m_scanId2BlkPos[nextScanIdx].y;
321
311M
      if( m_chType == CH_L )
322
155M
      {
323
155M
        scanInfo.sigCtxOffsetNext = ( diag < 2 ? 8 : diag < 5 ?  4 : 0 );
324
155M
        scanInfo.gtxCtxOffsetNext = ( diag < 1 ? 16 : diag < 3 ? 11 : diag < 10 ? 6 : 1 );
325
155M
      }
326
155M
      else
327
155M
      {
328
155M
        scanInfo.sigCtxOffsetNext = ( diag < 2 ? 4 : 0 );
329
155M
        scanInfo.gtxCtxOffsetNext = ( diag < 1 ? 6 : 1 );
330
155M
      }
331
311M
      scanInfo.nextInsidePos      = nextScanIdx & m_sbbMask;
332
311M
      scanInfo.currNbInfoSbb      = m_scanId2NbInfoSbb[ scanIdx ];
333
311M
      if( scanInfo.insidePos == 0 )
334
18.2M
      {
335
18.2M
        const int nextSbbPos  = m_scanSbbId2SbbPos[nextScanIdx >> m_log2SbbSize].idx;
336
18.2M
        const int nextSbbPosY = nextSbbPos               / m_widthInSbb;
337
18.2M
        const int nextSbbPosX = nextSbbPos - nextSbbPosY * m_widthInSbb;
338
18.2M
        scanInfo.nextSbbRight = ( nextSbbPosX < m_widthInSbb  - 1 ? nextSbbPos + 1            : 0 );
339
18.2M
        scanInfo.nextSbbBelow = ( nextSbbPosY < m_heightInSbb - 1 ? nextSbbPos + m_widthInSbb : 0 );
340
18.2M
      }
341
311M
    }
342
313M
  }
343
344
  void RateEstimator::initCtx( const TUParameters& tuPars, const TransformUnit& tu, const ComponentID compID, const FracBitsAccess& fracBitsAccess )
345
707k
  {
346
707k
    m_scanId2Pos = tuPars.m_scanId2BlkPos;
347
707k
    xSetSigSbbFracBits  ( fracBitsAccess, tuPars.m_chType );
348
707k
    xSetSigFlagBits     ( fracBitsAccess, tuPars.m_chType );
349
707k
    xSetGtxFlagBits     ( fracBitsAccess, tuPars.m_chType );
350
707k
    xSetLastCoeffOffset ( fracBitsAccess, tuPars, tu, compID );
351
707k
  }
352
353
  void RateEstimator::xSetLastCoeffOffset( const FracBitsAccess& fracBitsAccess, const TUParameters& tuPars, const TransformUnit& tu, const ComponentID compID )
354
707k
  {
355
707k
    const ChannelType chType = ( compID == COMP_Y ? CH_L : CH_C );
356
707k
    int32_t cbfDeltaBits = 0;
357
707k
    if( compID == COMP_Y && !CU::isIntra(*tu.cu) && !tu.depth )
358
254
    {
359
254
      const BinFracBits bits  = fracBitsAccess.getFracBitsArray( Ctx::QtRootCbf() );
360
254
      cbfDeltaBits            = int32_t( bits.intBits[1] ) - int32_t( bits.intBits[0] );
361
254
    }
362
706k
    else
363
706k
    {
364
706k
      BinFracBits bits;
365
706k
      bool prevLumaCbf           = false;
366
706k
      bool lastCbfIsInferred     = false;
367
706k
      bool useIntraSubPartitions = tu.cu->ispMode && isLuma(chType);
368
706k
      if( useIntraSubPartitions )
369
10.3k
      {
370
10.3k
        bool rootCbfSoFar = false;
371
10.3k
        bool isLastSubPartition = CU::isISPLast(*tu.cu, tu.Y(), compID);
372
10.3k
        uint32_t nTus = tu.cu->ispMode == HOR_INTRA_SUBPARTITIONS ? tu.cu->lheight() >> Log2(tu.lheight()) : tu.cu->lwidth() >> Log2(tu.lwidth());
373
10.3k
        if( isLastSubPartition )
374
199
        {
375
199
          TransformUnit* tuPointer = tu.cu->firstTU;
376
796
          for( int tuIdx = 0; tuIdx < nTus - 1; tuIdx++ )
377
597
          {
378
597
            rootCbfSoFar |= TU::getCbfAtDepth(*tuPointer, COMP_Y, tu.depth);
379
597
            tuPointer     = tuPointer->next;
380
597
          }
381
199
          if( !rootCbfSoFar )
382
0
          {
383
0
            lastCbfIsInferred = true;
384
0
          }
385
199
        }
386
10.3k
        if( !lastCbfIsInferred )
387
10.3k
        {
388
10.3k
          prevLumaCbf = TU::getPrevTuCbfAtDepth(tu, compID, tu.depth);
389
10.3k
        }
390
10.3k
        bits = fracBitsAccess.getFracBitsArray(Ctx::QtCbf[compID](DeriveCtx::CtxQtCbf(compID, prevLumaCbf, true)));
391
10.3k
      }
392
696k
      else
393
696k
      {
394
696k
        bits = fracBitsAccess.getFracBitsArray(Ctx::QtCbf[compID](DeriveCtx::CtxQtCbf(compID, tu.cbf[COMP_Cb])));
395
696k
      }
396
706k
      cbfDeltaBits = lastCbfIsInferred ? 0 : int32_t(bits.intBits[1]) - int32_t(bits.intBits[0]);
397
706k
    }
398
399
707k
    static const unsigned prefixCtx[] = { 0, 0, 0, 3, 6, 10, 15, 21 };
400
707k
    uint32_t              ctxBits  [ LAST_SIGNIFICANT_GROUPS ];
401
2.12M
    for( unsigned xy = 0; xy < 2; xy++ )
402
1.41M
    {
403
1.41M
      int32_t             bitOffset   = ( xy ? cbfDeltaBits : 0 );
404
1.41M
      int32_t*            lastBits    = ( xy ? m_lastBitsY : m_lastBitsX );
405
1.41M
      const unsigned      size        = ( xy ? tuPars.m_height : tuPars.m_width );
406
1.41M
      const unsigned      log2Size    = Log2( size );
407
1.41M
      const bool          useYCtx     = ( xy != 0 );
408
1.41M
      const CtxSet&       ctxSetLast  = ( useYCtx ? Ctx::LastY : Ctx::LastX )[ chType ];
409
1.41M
      const unsigned      lastShift   = ( compID == COMP_Y ? (log2Size+1)>>2 : Clip3<unsigned>(0,2,size>>3) );
410
1.41M
      const unsigned      lastOffset  = ( compID == COMP_Y ? ( prefixCtx[log2Size] ) : 0 );
411
1.41M
      uint32_t            sumFBits    = 0;
412
1.41M
      unsigned            maxCtxId    = g_uiGroupIdx[std::min<unsigned>(JVET_C0024_ZERO_OUT_TH, size) - 1];
413
9.43M
      for( unsigned ctxId = 0; ctxId < maxCtxId; ctxId++ )
414
8.02M
      {
415
8.02M
        const BinFracBits bits  = fracBitsAccess.getFracBitsArray( ctxSetLast( lastOffset + ( ctxId >> lastShift ) ) );
416
8.02M
        ctxBits[ ctxId ]        = sumFBits + bits.intBits[0] + ( ctxId>3 ? ((ctxId-2)>>1)<<SCALE_BITS : 0 ) + bitOffset;
417
8.02M
        sumFBits               +=            bits.intBits[1];
418
8.02M
      }
419
1.41M
      ctxBits  [ maxCtxId ]     = sumFBits + ( maxCtxId>3 ? ((maxCtxId-2)>>1)<<SCALE_BITS : 0 ) + bitOffset;
420
19.6M
      for (unsigned pos = 0; pos < std::min<unsigned>(JVET_C0024_ZERO_OUT_TH, size); pos++)
421
18.1M
      {
422
18.1M
        lastBits[ pos ]         = ctxBits[ g_uiGroupIdx[ pos ] ];
423
18.1M
      }
424
1.41M
    }
425
707k
  }
426
427
  void RateEstimator::xSetSigSbbFracBits( const FracBitsAccess& fracBitsAccess, ChannelType chType )
428
707k
  {
429
707k
    const CtxSet& ctxSet = Ctx::SigCoeffGroup[ chType ];
430
2.12M
    for( unsigned ctxId = 0; ctxId < sm_maxNumSigSbbCtx; ctxId++ )
431
1.41M
    {
432
1.41M
      m_sigSbbFracBits[ ctxId ] = fracBitsAccess.getFracBitsArray( ctxSet( ctxId ) );
433
1.41M
    }
434
707k
  }
435
436
  void RateEstimator::xSetSigFlagBits( const FracBitsAccess& fracBitsAccess, ChannelType chType )
437
707k
  {
438
2.82M
    for( unsigned ctxSetId = 0; ctxSetId < sm_numCtxSetsSig; ctxSetId++ )
439
2.12M
    {
440
2.12M
      BinFracBits*    bits    = m_sigFracBits [ ctxSetId ];
441
2.12M
      const CtxSet&   ctxSet  = Ctx::SigFlag  [ chType + 2*ctxSetId ];
442
2.12M
      const unsigned  numCtx  = ( chType == CH_L ? 12 : 8 );
443
19.4M
      for( unsigned ctxId = 0; ctxId < numCtx; ctxId++ )
444
17.3M
      {
445
17.3M
        bits[ ctxId ] = fracBitsAccess.getFracBitsArray( ctxSet( ctxId ) );
446
17.3M
      }
447
2.12M
    }
448
707k
  }
449
450
  void RateEstimator::xSetGtxFlagBits( const FracBitsAccess& fracBitsAccess, ChannelType chType )
451
707k
  {
452
707k
    const CtxSet&   ctxSetPar   = Ctx::ParFlag [     chType ];
453
707k
    const CtxSet&   ctxSetGt1   = Ctx::GtxFlag [ 2 + chType ];
454
707k
    const CtxSet&   ctxSetGt2   = Ctx::GtxFlag [     chType ];
455
707k
    const unsigned  numCtx      = ( chType == CH_L ? 21 : 11 );
456
8.76M
    for( unsigned ctxId = 0; ctxId < numCtx; ctxId++ )
457
8.05M
    {
458
8.05M
      BinFracBits     fbPar = fracBitsAccess.getFracBitsArray( ctxSetPar( ctxId ) );
459
8.05M
      BinFracBits     fbGt1 = fracBitsAccess.getFracBitsArray( ctxSetGt1( ctxId ) );
460
8.05M
      BinFracBits     fbGt2 = fracBitsAccess.getFracBitsArray( ctxSetGt2( ctxId ) );
461
8.05M
      CoeffFracBits&  cb    = m_gtxFracBits[ ctxId ];
462
8.05M
      int32_t         par0  = (1<<SCALE_BITS) + int32_t(fbPar.intBits[0]);
463
8.05M
      int32_t         par1  = (1<<SCALE_BITS) + int32_t(fbPar.intBits[1]);
464
8.05M
      cb.bits[0] = 0;
465
8.05M
      cb.bits[1] = fbGt1.intBits[0] + (1 << SCALE_BITS);
466
8.05M
      cb.bits[2] = fbGt1.intBits[1] + par0 + fbGt2.intBits[0];
467
8.05M
      cb.bits[3] = fbGt1.intBits[1] + par1 + fbGt2.intBits[0];
468
8.05M
      cb.bits[4] = fbGt1.intBits[1] + par0 + fbGt2.intBits[1];
469
8.05M
      cb.bits[5] = fbGt1.intBits[1] + par1 + fbGt2.intBits[1];
470
8.05M
    }
471
707k
  }
472
473
  void CommonCtx::update( const ScanInfo& scanInfo, const int prevId, int stateId, StateMem& curr )
474
111k
  {
475
111k
    uint8_t*    sbbFlags  = m_currSbbCtx[stateId].sbbFlags;
476
111k
    uint8_t*    levels    = m_currSbbCtx[stateId].levels;
477
111k
    uint16_t    maxDist   = m_nbInfo[scanInfo.scanIdx - 1].maxDist;
478
111k
    uint16_t    sbbSize   = scanInfo.sbbSize;
479
111k
    std::size_t setCpSize = ( maxDist > sbbSize ? maxDist - sbbSize : 0 ) * sizeof( uint8_t );
480
111k
    if( prevId >= 0 )
481
92.1k
    {
482
92.1k
      ::memcpy( sbbFlags, m_prevSbbCtx[prevId].sbbFlags, scanInfo.numSbb * sizeof( uint8_t ) );
483
92.1k
      ::memcpy( levels + scanInfo.scanIdx + sbbSize, m_prevSbbCtx[prevId].levels + scanInfo.scanIdx + sbbSize, setCpSize );
484
92.1k
    }
485
19.2k
    else
486
19.2k
    {
487
19.2k
      ::memset( sbbFlags, 0, scanInfo.numSbb * sizeof( uint8_t ) );
488
19.2k
      ::memset( levels + scanInfo.scanIdx + sbbSize, 0, setCpSize );
489
19.2k
    }
490
111k
    sbbFlags[scanInfo.sbbPos] = !!curr.numSig[stateId];
491
492
111k
    const int       sigNSbb = ( ( scanInfo.nextSbbRight ? sbbFlags[scanInfo.nextSbbRight] : false ) || ( scanInfo.nextSbbBelow ? sbbFlags[scanInfo.nextSbbBelow] : false ) ? 1 : 0 );
493
111k
    curr.refSbbCtxId[stateId] = stateId;
494
111k
    const BinFracBits sbbBits = m_sbbFlagBits[sigNSbb];
495
496
111k
    curr.sbbBits0[stateId] = sbbBits.intBits[0];
497
111k
    curr.sbbBits1[stateId] = sbbBits.intBits[1];
498
499
111k
    if( sigNSbb || ( ( scanInfo.nextSbbRight && scanInfo.nextSbbBelow ) ? sbbFlags[scanInfo.nextSbbBelow + 1] : false ) )
500
69.5k
    {
501
69.5k
      const int         scanBeg = scanInfo.scanIdx - scanInfo.sbbSize;
502
69.5k
      const NbInfoOut* nbOut = m_nbInfo + scanBeg;
503
69.5k
      const uint8_t* absLevels = levels + scanBeg;
504
505
1.18M
      for( int id = 0; id < scanInfo.sbbSize; id++, nbOut++ )
506
1.11M
      {
507
1.11M
        if( nbOut->num )
508
747k
        {
509
747k
          TCoeff sumAbs = 0, sumAbs1 = 0, sumNum = 0;
510
1.81M
#define UPDATE(k) {TCoeff t=absLevels[nbOut->outPos[k]]; sumAbs+=t; sumAbs1+=std::min<TCoeff>(4+(t&1),t); sumNum+=!!t; }
511
747k
          switch( nbOut->num )
512
747k
          {
513
0
          default:
514
48.2k
          case 5:
515
48.2k
            UPDATE( 4 );
516
144k
          case 4:
517
144k
            UPDATE( 3 );
518
400k
          case 3:
519
400k
            UPDATE( 2 );
520
470k
          case 2:
521
470k
            UPDATE( 1 );
522
747k
          case 1:
523
747k
            UPDATE( 0 );
524
747k
          }
525
747k
#undef UPDATE
526
747k
          curr.tplAcc[id][stateId] = ( sumNum << 5 ) | sumAbs1;
527
747k
          curr.sum1st[id][stateId] = ( uint8_t ) std::min( 255, sumAbs );
528
747k
        }
529
1.11M
      }
530
69.5k
    }
531
111k
  }
532
533
  void Quantizer::initQuantBlock(const TransformUnit& tu, const ComponentID compID, const QpParam& cQP, const double lambda, int gValue)
534
1.69M
  {
535
1.69M
    CHECKD( lambda <= 0.0, "Lambda must be greater than 0" );
536
537
1.69M
    const int         qpDQ                  = cQP.Qp(tu.mtsIdx[compID]==MTS_SKIP) + 1;
538
1.69M
    const int         qpPer                 = qpDQ / 6;
539
1.69M
    const int         qpRem                 = qpDQ - 6 * qpPer;
540
1.69M
    const SPS&        sps                   = *tu.cs->sps;
541
1.69M
    const CompArea&   area                  = tu.blocks[ compID ];
542
1.69M
    const ChannelType chType                = toChannelType( compID );
543
1.69M
    const int         channelBitDepth       = sps.bitDepths[ chType ];
544
1.69M
    const int         maxLog2TrDynamicRange = sps.getMaxLog2TrDynamicRange();
545
1.69M
    const int         nomTransformShift     = getTransformShift( channelBitDepth, area.size(), maxLog2TrDynamicRange );
546
1.69M
    const bool    needsSqrt2ScaleAdjustment = TU::needsSqrt2Scale(tu, compID);
547
1.69M
    const int         transformShift        = nomTransformShift + (needsSqrt2ScaleAdjustment?-1:0);
548
    // quant parameters
549
1.69M
    m_QShift                    = QUANT_SHIFT  - 1 + qpPer + transformShift;
550
1.69M
    m_QAdd                      = -( ( 3 << m_QShift ) >> 1 );
551
1.69M
    Intermediate_Int  invShift  = IQUANT_SHIFT + 1 - qpPer - transformShift;
552
1.69M
    m_QScale                    = g_quantScales[needsSqrt2ScaleAdjustment?1:0][ qpRem ];
553
1.69M
    const unsigned    qIdxBD    = std::min<unsigned>( maxLog2TrDynamicRange + 1, 8*sizeof(Intermediate_Int) + invShift - IQUANT_SHIFT - 1 );
554
1.69M
    m_maxQIdx                   = ( 1 << (qIdxBD-1) ) - 4;
555
1.69M
    if( m_QShift )
556
1.69M
      m_thresLast               = TCoeff((int64_t(m_DqThrVal) << (m_QShift-1)));
557
18.4E
    else
558
18.4E
      m_thresLast               = TCoeff((int64_t(m_DqThrVal>>1) << m_QShift));
559
1.69M
    m_thresSSbb                 = TCoeff((int64_t(3) << m_QShift));
560
    // distortion calculation parameters
561
18.4E
    const int64_t qScale        = (gValue==-1) ? m_QScale : gValue;
562
1.69M
    const int nomDShift =
563
1.69M
      SCALE_BITS - 2 * (nomTransformShift + DISTORTION_PRECISION_ADJUSTMENT(channelBitDepth)) + m_QShift + (needsSqrt2ScaleAdjustment ? 1 : 0);
564
1.69M
    const double  qScale2       = double( qScale * qScale );
565
1.69M
    const double  nomDistFactor = ( nomDShift < 0 ? 1.0/(double(int64_t(1)<<(-nomDShift))*qScale2*lambda) : double(int64_t(1)<<nomDShift)/(qScale2*lambda) );
566
1.69M
    const uint32_t pow2dfShift   = (uint32_t)( nomDistFactor * qScale2 ) + 1;
567
1.69M
    const int     dfShift       = ceilLog2( pow2dfShift );
568
1.69M
    m_DistShift                 = 62 + m_QShift - 2*maxLog2TrDynamicRange - dfShift;
569
1.69M
    m_DistAdd                   = (int64_t(1) << m_DistShift) >> 1;
570
1.69M
    m_DistStepAdd               = ((m_DistShift+m_QShift)>=64 ? (int64_t)( nomDistFactor * pow(2,m_DistShift+m_QShift) + .5 ) : (int64_t)( nomDistFactor * double(int64_t(1)<<(m_DistShift+m_QShift)) + .5 ));
571
1.69M
    m_DistOrgFact               = (int64_t)( nomDistFactor * double(int64_t(1)<<(m_DistShift+1       )) + .5 );
572
1.69M
  }
573
574
  void Quantizer::dequantBlock( const TransformUnit& tu, const ComponentID compID, const QpParam& cQP, CoeffBuf& recCoeff, bool enableScalingLists, int* piDequantCoef) const
575
696k
  {
576
577
    //----- set basic parameters -----
578
696k
    const CompArea&     area      = tu.blocks[ compID ];
579
696k
    const int           numCoeff  = area.area();
580
696k
    const SizeType      hsId      = Log2( area.width );
581
696k
    const SizeType      vsId      = Log2( area.height );
582
696k
    const ScanElement  *scan      = getScanOrder( SCAN_GROUPED_4x4, hsId, vsId );
583
696k
    const TCoeffSig*    qCoeff    = tu.getCoeffs( compID ).buf;
584
696k
          TCoeff*       tCoeff    = recCoeff.buf;
585
586
    //----- reset coefficients and get last scan index -----
587
696k
    ::memset( tCoeff, 0, numCoeff * sizeof( TCoeff ) );
588
696k
    int lastScanIdx = tu.lastPos[compID];
589
696k
    if( lastScanIdx < 0 )
590
0
    {
591
0
      return;
592
0
    }
593
594
    //----- set dequant parameters -----
595
696k
    const int         qpDQ                  = cQP.Qp(tu.mtsIdx[compID]==MTS_SKIP) + 1;
596
696k
    const int         qpPer                 = qpDQ / 6;
597
696k
    const int         qpRem                 = qpDQ - 6 * qpPer;
598
696k
    const SPS&        sps                   = *tu.cs->sps;
599
696k
    const ChannelType chType                = toChannelType( compID );
600
696k
    const int         channelBitDepth       = sps.bitDepths[ chType ];
601
696k
    const int         maxLog2TrDynamicRange = sps.getMaxLog2TrDynamicRange();
602
696k
    const TCoeff      minTCoeff             = -( 1 << maxLog2TrDynamicRange );
603
696k
    const TCoeff      maxTCoeff             =  ( 1 << maxLog2TrDynamicRange ) - 1;
604
696k
    const int         nomTransformShift     = getTransformShift( channelBitDepth, area.size(), maxLog2TrDynamicRange );
605
696k
    const bool    needsSqrt2ScaleAdjustment = TU::needsSqrt2Scale(tu, compID);
606
696k
    const int         transformShift        = nomTransformShift + (needsSqrt2ScaleAdjustment?-1:0);
607
696k
    Intermediate_Int  shift                 = IQUANT_SHIFT + 1 - qpPer - transformShift + (enableScalingLists ? LOG2_SCALING_LIST_NEUTRAL_VALUE : 0);
608
696k
    Intermediate_Int  invQScale             = g_invQuantScales[needsSqrt2ScaleAdjustment?1:0][ qpRem ];
609
696k
    Intermediate_Int  add                   = (shift < 0) ? 0 : ((1 << shift) >> 1);
610
    //----- dequant coefficients -----
611
7.38M
    for( int state = 0, scanIdx = lastScanIdx; scanIdx >= 0; scanIdx-- )
612
6.68M
    {
613
6.68M
      const unsigned   rasterPos = scan[scanIdx].idx;
614
6.68M
      const TCoeffSig& level     = qCoeff[ rasterPos ];
615
6.68M
      if( level )
616
6.01M
      {
617
6.01M
        if (enableScalingLists)
618
0
          invQScale = piDequantCoef[rasterPos];//scalingfactor*levelScale
619
6.01M
        if (shift < 0 && (enableScalingLists || scanIdx == lastScanIdx))
620
372k
        {
621
372k
          invQScale <<= -shift;
622
372k
        }
623
6.01M
        Intermediate_Int qIdx = 2 * level + (level > 0 ? -(state>>1) : (state>>1));
624
6.01M
        int64_t  nomTCoeff          = ((int64_t)qIdx * (int64_t)invQScale + add) >> ((shift < 0) ? 0 : shift);
625
6.01M
        tCoeff[rasterPos]           = (TCoeff)Clip3<int64_t>(minTCoeff, maxTCoeff, nomTCoeff);
626
6.01M
      }
627
6.68M
      state = ( 32040 >> ((state<<2)+((level&1)<<1)) ) & 3;   // the 16-bit value "32040" represent the state transition table
628
6.68M
    }
629
696k
  }
630
631
  bool Quantizer::preQuantCoeff( const TCoeff absCoeff, PQData* pqData, int quanCoeff ) const
632
0
  {
633
0
    int64_t scaledOrg = int64_t( absCoeff ) * quanCoeff;
634
0
    TCoeff  qIdx      = TCoeff( ( scaledOrg + m_QAdd ) >> m_QShift );
635
636
0
    if( qIdx < 0 )
637
0
    {
638
0
      int64_t scaledAdd = m_DistStepAdd - scaledOrg * m_DistOrgFact;
639
0
      PQData& pq_a      = pqData[1];
640
0
      PQData& pq_b      = pqData[2];
641
642
0
      pq_a.deltaDist    = ( ( scaledAdd + 0 * m_DistStepAdd ) * 1 + m_DistAdd ) >> m_DistShift;
643
0
      pq_a.absLevel     = 1;
644
645
0
      pq_b.deltaDist    = ( ( scaledAdd + 1 * m_DistStepAdd ) * 2 + m_DistAdd ) >> m_DistShift;
646
0
      pq_b.absLevel     = 1;
647
      
648
0
      return true;
649
0
    }
650
     
651
0
    qIdx              = std::max<TCoeff>( 1, std::min<TCoeff>( m_maxQIdx, qIdx ) );
652
0
    int64_t scaledAdd = qIdx * m_DistStepAdd - scaledOrg * m_DistOrgFact;
653
654
0
    PQData& pq_a      = pqData[( qIdx + 0 ) & 3];
655
0
    PQData& pq_b      = pqData[( qIdx + 1 ) & 3];
656
0
    PQData& pq_c      = pqData[( qIdx + 2 ) & 3];
657
0
    PQData& pq_d      = pqData[( qIdx + 3 ) & 3];
658
659
0
    pq_a.deltaDist    = ( ( scaledAdd + 0 * m_DistStepAdd ) * ( qIdx + 0 ) + m_DistAdd ) >> m_DistShift;
660
0
    pq_a.absLevel     = ( qIdx + 1 ) >> 1;
661
662
0
    pq_b.deltaDist    = ( ( scaledAdd + 1 * m_DistStepAdd ) * ( qIdx + 1 ) + m_DistAdd ) >> m_DistShift;
663
0
    pq_b.absLevel     = ( qIdx + 2 ) >> 1;
664
665
0
    pq_c.deltaDist    = ( ( scaledAdd + 2 * m_DistStepAdd ) * ( qIdx + 2 ) + m_DistAdd ) >> m_DistShift;
666
0
    pq_c.absLevel     = ( qIdx + 3 ) >> 1;
667
668
0
    pq_d.deltaDist    = ( ( scaledAdd + 3 * m_DistStepAdd ) * ( qIdx + 3 ) + m_DistAdd ) >> m_DistShift;
669
0
    pq_d.absLevel     = ( qIdx + 4 ) >> 1;
670
671
0
    return false;
672
0
  }
673
674
  const int32_t g_goRiceBits[4][RICEMAX] =
675
  {
676
    { 32768,  65536,  98304, 131072, 163840, 196608, 262144, 262144, 327680, 327680, 327680, 327680, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752},
677
    { 65536,  65536,  98304,  98304, 131072, 131072, 163840, 163840, 196608, 196608, 229376, 229376, 294912, 294912, 294912, 294912, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 425984, 425984, 425984, 425984, 425984, 425984, 425984, 425984},
678
    { 98304,  98304,  98304,  98304, 131072, 131072, 131072, 131072, 163840, 163840, 163840, 163840, 196608, 196608, 196608, 196608, 229376, 229376, 229376, 229376, 262144, 262144, 262144, 262144, 327680, 327680, 327680, 327680, 327680, 327680, 327680, 327680},
679
    {131072, 131072, 131072, 131072, 131072, 131072, 131072, 131072, 163840, 163840, 163840, 163840, 163840, 163840, 163840, 163840, 196608, 196608, 196608, 196608, 196608, 196608, 196608, 196608, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376}
680
  };
681
682
  static inline void initStates( const int stateId, DQIntern::StateMem& state )
683
5.65M
  {
684
5.65M
    state.rdCost[stateId]         = DQIntern::rdCostInit;
685
5.65M
    state.ctx.cff[stateId]        =  0;
686
5.65M
    state.ctx.sig[stateId]        =  0;
687
5.65M
    state.numSig[stateId]         =  0;
688
5.65M
    state.refSbbCtxId[stateId]    = -1;
689
5.65M
    state.remRegBins[stateId]     =  4;
690
5.65M
    state.cffBitsCtxOffset        =  0;
691
5.65M
    state.m_goRicePar[stateId]    =  0;
692
5.65M
    state.m_goRiceZero[stateId]   =  0;
693
5.65M
    state.sbbBits0[stateId]       =  0;
694
5.65M
    state.sbbBits1[stateId]       =  0;
695
5.65M
  }
696
697
  template<bool rrgEnsured = false>
698
  static inline void checkRdCosts( const int stateId, const DQIntern::ScanPosType spt, const DQIntern::PQData& pqDataA, const DQIntern::PQData& pqDataB, DQIntern::Decisions& decisions, int idxAZ, int idxB, const DQIntern::StateMem& state )
699
22.6M
  {
700
22.6M
    const int32_t* goRiceTab = DQIntern::g_goRiceBits[state.m_goRicePar[stateId]];
701
22.6M
    int64_t         rdCostA = state.rdCost[stateId] + pqDataA.deltaDist;
702
22.6M
    int64_t         rdCostB = state.rdCost[stateId] + pqDataB.deltaDist;
703
22.6M
    int64_t         rdCostZ = state.rdCost[stateId];
704
705
22.6M
    if( rrgEnsured || state.remRegBins[stateId] >= 4 )
706
22.2M
    {
707
22.2M
      const CoeffFracBits& cffBits = state.m_gtxFracBitsArray[state.ctx.cff[stateId]];
708
22.2M
      const BinFracBits    sigBits = state.m_sigFracBitsArray[stateId][state.ctx.sig[stateId]];
709
710
22.2M
      if( pqDataA.absLevel < 4 )
711
5.32M
        rdCostA += cffBits.bits[pqDataA.absLevel];
712
16.8M
      else
713
16.8M
      {
714
16.8M
        const unsigned value = ( pqDataA.absLevel - 4 ) >> 1;
715
16.8M
        rdCostA += cffBits.bits[pqDataA.absLevel - ( value << 1 )] + goRiceTab[std::min<unsigned>( value, RICEMAX - 1 )];
716
16.8M
      }
717
718
22.2M
      if( pqDataB.absLevel < 4 )
719
6.65M
        rdCostB += cffBits.bits[pqDataB.absLevel];
720
15.5M
      else
721
15.5M
      {
722
15.5M
        const unsigned value = ( pqDataB.absLevel - 4 ) >> 1;
723
15.5M
        rdCostB += cffBits.bits[pqDataB.absLevel - ( value << 1 )] + goRiceTab[std::min<unsigned>( value, RICEMAX - 1 )];
724
15.5M
      }
725
726
22.2M
      if( spt == SCAN_ISCSBB )
727
22.1M
      {
728
22.1M
        rdCostA += sigBits.intBits[1];
729
22.1M
        rdCostB += sigBits.intBits[1];
730
22.1M
        rdCostZ += sigBits.intBits[0];
731
22.1M
      }
732
66.9k
      else if( spt == SCAN_SOCSBB )
733
11.7k
      {
734
11.7k
        rdCostA += state.sbbBits1[stateId] + sigBits.intBits[1];
735
11.7k
        rdCostB += state.sbbBits1[stateId] + sigBits.intBits[1];
736
11.7k
        rdCostZ += state.sbbBits1[stateId] + sigBits.intBits[0];
737
11.7k
      }
738
55.2k
      else if( state.numSig[stateId] )
739
54.1k
      {
740
54.1k
        rdCostA += sigBits.intBits[1];
741
54.1k
        rdCostB += sigBits.intBits[1];
742
54.1k
        rdCostZ += sigBits.intBits[0];
743
54.1k
      }
744
1.11k
      else
745
1.11k
      {
746
1.11k
        rdCostZ = rdCostInit;
747
1.11k
      }
748
22.2M
    }
749
403k
    else
750
403k
    {
751
403k
      rdCostA += ( 1 << SCALE_BITS ) + goRiceTab[pqDataA.absLevel <= state.m_goRiceZero[stateId] ? pqDataA.absLevel - 1 : std::min<int>( pqDataA.absLevel, RICEMAX - 1 )];
752
403k
      rdCostB += ( 1 << SCALE_BITS ) + goRiceTab[pqDataB.absLevel <= state.m_goRiceZero[stateId] ? pqDataB.absLevel - 1 : std::min<int>( pqDataB.absLevel, RICEMAX - 1 )];
753
403k
      rdCostZ += goRiceTab[state.m_goRiceZero[stateId]];
754
403k
    }
755
756
22.6M
    if( rdCostA < rdCostZ && rdCostA < decisions.rdCost[idxAZ] )
757
14.1M
    {
758
14.1M
      decisions.rdCost[idxAZ] = rdCostA;
759
14.1M
      decisions.absLevel[idxAZ] = pqDataA.absLevel;
760
14.1M
      decisions.prevId[idxAZ] = stateId;
761
14.1M
    }
762
8.46M
    else if( rdCostZ < decisions.rdCost[idxAZ] )
763
285k
    {
764
285k
      decisions.rdCost[idxAZ] = rdCostZ;
765
285k
      decisions.absLevel[idxAZ] = 0;
766
285k
      decisions.prevId[idxAZ] = stateId;
767
285k
    }
768
769
22.6M
    if( rdCostB < decisions.rdCost[idxB] )
770
14.4M
    {
771
14.4M
      decisions.rdCost[idxB] = rdCostB;
772
14.4M
      decisions.absLevel[idxB] = pqDataB.absLevel;
773
14.4M
      decisions.prevId[idxB] = stateId;
774
14.4M
    }
775
22.6M
  }
DepQuant.cpp:void vvenc::DQIntern::checkRdCosts<true>(int, vvenc::DQIntern::ScanPosType, vvenc::DQIntern::PQData const&, vvenc::DQIntern::PQData const&, vvenc::DQIntern::Decisions&, int, int, vvenc::DQIntern::StateMem const&)
Line
Count
Source
699
3.99M
  {
700
3.99M
    const int32_t* goRiceTab = DQIntern::g_goRiceBits[state.m_goRicePar[stateId]];
701
3.99M
    int64_t         rdCostA = state.rdCost[stateId] + pqDataA.deltaDist;
702
3.99M
    int64_t         rdCostB = state.rdCost[stateId] + pqDataB.deltaDist;
703
3.99M
    int64_t         rdCostZ = state.rdCost[stateId];
704
705
3.99M
    if( rrgEnsured || state.remRegBins[stateId] >= 4 )
706
3.99M
    {
707
3.99M
      const CoeffFracBits& cffBits = state.m_gtxFracBitsArray[state.ctx.cff[stateId]];
708
3.99M
      const BinFracBits    sigBits = state.m_sigFracBitsArray[stateId][state.ctx.sig[stateId]];
709
710
3.99M
      if( pqDataA.absLevel < 4 )
711
3.99M
        rdCostA += cffBits.bits[pqDataA.absLevel];
712
0
      else
713
0
      {
714
0
        const unsigned value = ( pqDataA.absLevel - 4 ) >> 1;
715
0
        rdCostA += cffBits.bits[pqDataA.absLevel - ( value << 1 )] + goRiceTab[std::min<unsigned>( value, RICEMAX - 1 )];
716
0
      }
717
718
3.99M
      if( pqDataB.absLevel < 4 )
719
3.99M
        rdCostB += cffBits.bits[pqDataB.absLevel];
720
0
      else
721
0
      {
722
0
        const unsigned value = ( pqDataB.absLevel - 4 ) >> 1;
723
0
        rdCostB += cffBits.bits[pqDataB.absLevel - ( value << 1 )] + goRiceTab[std::min<unsigned>( value, RICEMAX - 1 )];
724
0
      }
725
726
3.99M
      if( spt == SCAN_ISCSBB )
727
3.97M
      {
728
3.97M
        rdCostA += sigBits.intBits[1];
729
3.97M
        rdCostB += sigBits.intBits[1];
730
3.97M
        rdCostZ += sigBits.intBits[0];
731
3.97M
      }
732
26.1k
      else if( spt == SCAN_SOCSBB )
733
9.72k
      {
734
9.72k
        rdCostA += state.sbbBits1[stateId] + sigBits.intBits[1];
735
9.72k
        rdCostB += state.sbbBits1[stateId] + sigBits.intBits[1];
736
9.72k
        rdCostZ += state.sbbBits1[stateId] + sigBits.intBits[0];
737
9.72k
      }
738
16.4k
      else if( state.numSig[stateId] )
739
15.6k
      {
740
15.6k
        rdCostA += sigBits.intBits[1];
741
15.6k
        rdCostB += sigBits.intBits[1];
742
15.6k
        rdCostZ += sigBits.intBits[0];
743
15.6k
      }
744
759
      else
745
759
      {
746
759
        rdCostZ = rdCostInit;
747
759
      }
748
3.99M
    }
749
0
    else
750
0
    {
751
0
      rdCostA += ( 1 << SCALE_BITS ) + goRiceTab[pqDataA.absLevel <= state.m_goRiceZero[stateId] ? pqDataA.absLevel - 1 : std::min<int>( pqDataA.absLevel, RICEMAX - 1 )];
752
0
      rdCostB += ( 1 << SCALE_BITS ) + goRiceTab[pqDataB.absLevel <= state.m_goRiceZero[stateId] ? pqDataB.absLevel - 1 : std::min<int>( pqDataB.absLevel, RICEMAX - 1 )];
753
0
      rdCostZ += goRiceTab[state.m_goRiceZero[stateId]];
754
0
    }
755
756
3.99M
    if( rdCostA < rdCostZ && rdCostA < decisions.rdCost[idxAZ] )
757
2.63M
    {
758
2.63M
      decisions.rdCost[idxAZ] = rdCostA;
759
2.63M
      decisions.absLevel[idxAZ] = pqDataA.absLevel;
760
2.63M
      decisions.prevId[idxAZ] = stateId;
761
2.63M
    }
762
1.36M
    else if( rdCostZ < decisions.rdCost[idxAZ] )
763
284k
    {
764
284k
      decisions.rdCost[idxAZ] = rdCostZ;
765
284k
      decisions.absLevel[idxAZ] = 0;
766
284k
      decisions.prevId[idxAZ] = stateId;
767
284k
    }
768
769
3.99M
    if( rdCostB < decisions.rdCost[idxB] )
770
2.91M
    {
771
2.91M
      decisions.rdCost[idxB] = rdCostB;
772
2.91M
      decisions.absLevel[idxB] = pqDataB.absLevel;
773
2.91M
      decisions.prevId[idxB] = stateId;
774
2.91M
    }
775
3.99M
  }
DepQuant.cpp:void vvenc::DQIntern::checkRdCosts<false>(int, vvenc::DQIntern::ScanPosType, vvenc::DQIntern::PQData const&, vvenc::DQIntern::PQData const&, vvenc::DQIntern::Decisions&, int, int, vvenc::DQIntern::StateMem const&)
Line
Count
Source
699
18.6M
  {
700
18.6M
    const int32_t* goRiceTab = DQIntern::g_goRiceBits[state.m_goRicePar[stateId]];
701
18.6M
    int64_t         rdCostA = state.rdCost[stateId] + pqDataA.deltaDist;
702
18.6M
    int64_t         rdCostB = state.rdCost[stateId] + pqDataB.deltaDist;
703
18.6M
    int64_t         rdCostZ = state.rdCost[stateId];
704
705
18.6M
    if( rrgEnsured || state.remRegBins[stateId] >= 4 )
706
18.2M
    {
707
18.2M
      const CoeffFracBits& cffBits = state.m_gtxFracBitsArray[state.ctx.cff[stateId]];
708
18.2M
      const BinFracBits    sigBits = state.m_sigFracBitsArray[stateId][state.ctx.sig[stateId]];
709
710
18.2M
      if( pqDataA.absLevel < 4 )
711
1.32M
        rdCostA += cffBits.bits[pqDataA.absLevel];
712
16.8M
      else
713
16.8M
      {
714
16.8M
        const unsigned value = ( pqDataA.absLevel - 4 ) >> 1;
715
16.8M
        rdCostA += cffBits.bits[pqDataA.absLevel - ( value << 1 )] + goRiceTab[std::min<unsigned>( value, RICEMAX - 1 )];
716
16.8M
      }
717
718
18.2M
      if( pqDataB.absLevel < 4 )
719
2.65M
        rdCostB += cffBits.bits[pqDataB.absLevel];
720
15.5M
      else
721
15.5M
      {
722
15.5M
        const unsigned value = ( pqDataB.absLevel - 4 ) >> 1;
723
15.5M
        rdCostB += cffBits.bits[pqDataB.absLevel - ( value << 1 )] + goRiceTab[std::min<unsigned>( value, RICEMAX - 1 )];
724
15.5M
      }
725
726
18.2M
      if( spt == SCAN_ISCSBB )
727
18.1M
      {
728
18.1M
        rdCostA += sigBits.intBits[1];
729
18.1M
        rdCostB += sigBits.intBits[1];
730
18.1M
        rdCostZ += sigBits.intBits[0];
731
18.1M
      }
732
40.8k
      else if( spt == SCAN_SOCSBB )
733
2.01k
      {
734
2.01k
        rdCostA += state.sbbBits1[stateId] + sigBits.intBits[1];
735
2.01k
        rdCostB += state.sbbBits1[stateId] + sigBits.intBits[1];
736
2.01k
        rdCostZ += state.sbbBits1[stateId] + sigBits.intBits[0];
737
2.01k
      }
738
38.8k
      else if( state.numSig[stateId] )
739
38.4k
      {
740
38.4k
        rdCostA += sigBits.intBits[1];
741
38.4k
        rdCostB += sigBits.intBits[1];
742
38.4k
        rdCostZ += sigBits.intBits[0];
743
38.4k
      }
744
352
      else
745
352
      {
746
352
        rdCostZ = rdCostInit;
747
352
      }
748
18.2M
    }
749
403k
    else
750
403k
    {
751
403k
      rdCostA += ( 1 << SCALE_BITS ) + goRiceTab[pqDataA.absLevel <= state.m_goRiceZero[stateId] ? pqDataA.absLevel - 1 : std::min<int>( pqDataA.absLevel, RICEMAX - 1 )];
752
403k
      rdCostB += ( 1 << SCALE_BITS ) + goRiceTab[pqDataB.absLevel <= state.m_goRiceZero[stateId] ? pqDataB.absLevel - 1 : std::min<int>( pqDataB.absLevel, RICEMAX - 1 )];
753
403k
      rdCostZ += goRiceTab[state.m_goRiceZero[stateId]];
754
403k
    }
755
756
18.6M
    if( rdCostA < rdCostZ && rdCostA < decisions.rdCost[idxAZ] )
757
11.5M
    {
758
11.5M
      decisions.rdCost[idxAZ] = rdCostA;
759
11.5M
      decisions.absLevel[idxAZ] = pqDataA.absLevel;
760
11.5M
      decisions.prevId[idxAZ] = stateId;
761
11.5M
    }
762
7.10M
    else if( rdCostZ < decisions.rdCost[idxAZ] )
763
440
    {
764
440
      decisions.rdCost[idxAZ] = rdCostZ;
765
440
      decisions.absLevel[idxAZ] = 0;
766
440
      decisions.prevId[idxAZ] = stateId;
767
440
    }
768
769
18.6M
    if( rdCostB < decisions.rdCost[idxB] )
770
11.5M
    {
771
11.5M
      decisions.rdCost[idxB] = rdCostB;
772
11.5M
      decisions.absLevel[idxB] = pqDataB.absLevel;
773
11.5M
      decisions.prevId[idxB] = stateId;
774
11.5M
    }
775
18.6M
  }
776
777
  void checkAllRdCosts( const DQIntern::ScanPosType spt, const DQIntern::PQData* pqData, DQIntern::Decisions& decisions, const DQIntern::StateMem& state )
778
999k
  {
779
999k
    checkRdCosts<true>( 0, spt, pqData[0], pqData[2], decisions, 0, 2, state );
780
999k
    checkRdCosts<true>( 1, spt, pqData[0], pqData[2], decisions, 2, 0, state );
781
999k
    checkRdCosts<true>( 2, spt, pqData[3], pqData[1], decisions, 1, 3, state );
782
999k
    checkRdCosts<true>( 3, spt, pqData[3], pqData[1], decisions, 3, 1, state );
783
999k
  }
784
785
  template<bool rrgEnsured = false>
786
  static void checkRdCostsOdd1( const int stateId, const ScanPosType spt, const int64_t deltaDist, Decisions& decisions, int idxA, int idxZ, const StateMem& state )
787
4.46M
  {
788
4.46M
    int64_t         rdCostA = state.rdCost[stateId] + deltaDist;
789
4.46M
    int64_t         rdCostZ = state.rdCost[stateId];
790
791
4.46M
    if( rrgEnsured || state.remRegBins[stateId] >= 4 )
792
4.45M
    {
793
4.45M
      const BinFracBits sigBits = state.m_sigFracBitsArray[stateId][state.ctx.sig[stateId]];
794
795
4.45M
      rdCostA += state.cffBits1[state.ctx.cff[stateId]];
796
797
4.45M
      if( spt == SCAN_ISCSBB )
798
4.34M
      {
799
4.34M
        rdCostA += sigBits.intBits[1];
800
4.34M
        rdCostZ += sigBits.intBits[0];
801
4.34M
      }
802
113k
      else if( spt == SCAN_SOCSBB )
803
74.2k
      {
804
74.2k
        rdCostA += state.sbbBits1[stateId] + sigBits.intBits[1];
805
74.2k
        rdCostZ += state.sbbBits1[stateId] + sigBits.intBits[0];
806
74.2k
      }
807
39.1k
      else if( state.numSig[stateId] )
808
7.92k
      {
809
7.92k
        rdCostA += sigBits.intBits[1];
810
7.92k
        rdCostZ += sigBits.intBits[0];
811
7.92k
      }
812
31.1k
      else
813
31.1k
      {
814
31.1k
        rdCostZ = rdCostInit;
815
31.1k
      }
816
4.45M
    }
817
1.62k
    else
818
1.62k
    {
819
1.62k
      const int32_t* goRiceTab = g_goRiceBits[state.m_goRicePar[stateId]];
820
821
1.62k
      rdCostA += ( 1 << SCALE_BITS ) + goRiceTab[0];
822
1.62k
      rdCostZ += goRiceTab[state.m_goRiceZero[stateId]];
823
1.62k
    }
824
825
4.46M
    if( rdCostA < decisions.rdCost[idxA] )
826
2.65M
    {
827
2.65M
      decisions.rdCost[idxA] = rdCostA;
828
2.65M
      decisions.absLevel[idxA] = 1;
829
2.65M
      decisions.prevId[idxA] = stateId;
830
2.65M
    }
831
832
4.46M
    if( rdCostZ < decisions.rdCost[idxZ] )
833
3.15M
    {
834
3.15M
      decisions.rdCost[idxZ] = rdCostZ;
835
3.15M
      decisions.absLevel[idxZ] = 0;
836
3.15M
      decisions.prevId[idxZ] = stateId;
837
3.15M
    }
838
4.46M
  }
DepQuant.cpp:void vvenc::DQIntern::checkRdCostsOdd1<true>(int, vvenc::DQIntern::ScanPosType, long, vvenc::DQIntern::Decisions&, int, int, vvenc::DQIntern::StateMem const&)
Line
Count
Source
787
4.15M
  {
788
4.15M
    int64_t         rdCostA = state.rdCost[stateId] + deltaDist;
789
4.15M
    int64_t         rdCostZ = state.rdCost[stateId];
790
791
4.15M
    if( rrgEnsured || state.remRegBins[stateId] >= 4 )
792
4.15M
    {
793
4.15M
      const BinFracBits sigBits = state.m_sigFracBitsArray[stateId][state.ctx.sig[stateId]];
794
795
4.15M
      rdCostA += state.cffBits1[state.ctx.cff[stateId]];
796
797
4.15M
      if( spt == SCAN_ISCSBB )
798
4.04M
      {
799
4.04M
        rdCostA += sigBits.intBits[1];
800
4.04M
        rdCostZ += sigBits.intBits[0];
801
4.04M
      }
802
112k
      else if( spt == SCAN_SOCSBB )
803
74.0k
      {
804
74.0k
        rdCostA += state.sbbBits1[stateId] + sigBits.intBits[1];
805
74.0k
        rdCostZ += state.sbbBits1[stateId] + sigBits.intBits[0];
806
74.0k
      }
807
38.7k
      else if( state.numSig[stateId] )
808
7.92k
      {
809
7.92k
        rdCostA += sigBits.intBits[1];
810
7.92k
        rdCostZ += sigBits.intBits[0];
811
7.92k
      }
812
30.7k
      else
813
30.7k
      {
814
30.7k
        rdCostZ = rdCostInit;
815
30.7k
      }
816
4.15M
    }
817
0
    else
818
0
    {
819
0
      const int32_t* goRiceTab = g_goRiceBits[state.m_goRicePar[stateId]];
820
821
0
      rdCostA += ( 1 << SCALE_BITS ) + goRiceTab[0];
822
0
      rdCostZ += goRiceTab[state.m_goRiceZero[stateId]];
823
0
    }
824
825
4.15M
    if( rdCostA < decisions.rdCost[idxA] )
826
2.65M
    {
827
2.65M
      decisions.rdCost[idxA] = rdCostA;
828
2.65M
      decisions.absLevel[idxA] = 1;
829
2.65M
      decisions.prevId[idxA] = stateId;
830
2.65M
    }
831
832
4.15M
    if( rdCostZ < decisions.rdCost[idxZ] )
833
3.15M
    {
834
3.15M
      decisions.rdCost[idxZ] = rdCostZ;
835
3.15M
      decisions.absLevel[idxZ] = 0;
836
3.15M
      decisions.prevId[idxZ] = stateId;
837
3.15M
    }
838
4.15M
  }
DepQuant.cpp:void vvenc::DQIntern::checkRdCostsOdd1<false>(int, vvenc::DQIntern::ScanPosType, long, vvenc::DQIntern::Decisions&, int, int, vvenc::DQIntern::StateMem const&)
Line
Count
Source
787
304k
  {
788
304k
    int64_t         rdCostA = state.rdCost[stateId] + deltaDist;
789
304k
    int64_t         rdCostZ = state.rdCost[stateId];
790
791
304k
    if( rrgEnsured || state.remRegBins[stateId] >= 4 )
792
302k
    {
793
302k
      const BinFracBits sigBits = state.m_sigFracBitsArray[stateId][state.ctx.sig[stateId]];
794
795
302k
      rdCostA += state.cffBits1[state.ctx.cff[stateId]];
796
797
302k
      if( spt == SCAN_ISCSBB )
798
302k
      {
799
302k
        rdCostA += sigBits.intBits[1];
800
302k
        rdCostZ += sigBits.intBits[0];
801
302k
      }
802
577
      else if( spt == SCAN_SOCSBB )
803
177
      {
804
177
        rdCostA += state.sbbBits1[stateId] + sigBits.intBits[1];
805
177
        rdCostZ += state.sbbBits1[stateId] + sigBits.intBits[0];
806
177
      }
807
400
      else if( state.numSig[stateId] )
808
0
      {
809
0
        rdCostA += sigBits.intBits[1];
810
0
        rdCostZ += sigBits.intBits[0];
811
0
      }
812
400
      else
813
400
      {
814
400
        rdCostZ = rdCostInit;
815
400
      }
816
302k
    }
817
1.62k
    else
818
1.62k
    {
819
1.62k
      const int32_t* goRiceTab = g_goRiceBits[state.m_goRicePar[stateId]];
820
821
1.62k
      rdCostA += ( 1 << SCALE_BITS ) + goRiceTab[0];
822
1.62k
      rdCostZ += goRiceTab[state.m_goRiceZero[stateId]];
823
1.62k
    }
824
825
304k
    if( rdCostA < decisions.rdCost[idxA] )
826
1.21k
    {
827
1.21k
      decisions.rdCost[idxA] = rdCostA;
828
1.21k
      decisions.absLevel[idxA] = 1;
829
1.21k
      decisions.prevId[idxA] = stateId;
830
1.21k
    }
831
832
304k
    if( rdCostZ < decisions.rdCost[idxZ] )
833
1.86k
    {
834
1.86k
      decisions.rdCost[idxZ] = rdCostZ;
835
1.86k
      decisions.absLevel[idxZ] = 0;
836
1.86k
      decisions.prevId[idxZ] = stateId;
837
1.86k
    }
838
304k
  }
839
840
  static void checkAllRdCostsOdd1( const DQIntern::ScanPosType spt, const int64_t pq_a_dist, const int64_t pq_b_dist, DQIntern::Decisions& decisions, const DQIntern::StateMem& state )
841
1.03M
  {
842
1.03M
    checkRdCostsOdd1<true>( 0, spt, pq_b_dist, decisions, 2, 0, state );
843
1.03M
    checkRdCostsOdd1<true>( 1, spt, pq_b_dist, decisions, 0, 2, state );
844
1.03M
    checkRdCostsOdd1<true>( 2, spt, pq_a_dist, decisions, 3, 1, state );
845
1.03M
    checkRdCostsOdd1<true>( 3, spt, pq_a_dist, decisions, 1, 3, state );
846
1.03M
  }
847
848
  static inline void checkRdCostStart( int32_t lastOffset, const PQData& pqData, Decisions& decisions, int idx, const StateMem& state )
849
12.4M
  {
850
12.4M
    const CoeffFracBits& cffBits = state.m_gtxFracBitsArray[0];
851
852
12.4M
    int64_t rdCost = pqData.deltaDist + lastOffset;
853
12.4M
    if( pqData.absLevel < 4 )
854
4.31M
    {
855
4.31M
      rdCost += cffBits.bits[pqData.absLevel];
856
4.31M
    }
857
8.10M
    else
858
8.10M
    {
859
8.10M
      const unsigned value = ( pqData.absLevel - 4 ) >> 1;
860
8.10M
      rdCost += cffBits.bits[pqData.absLevel - ( value << 1 )] + g_goRiceBits[0][value < RICEMAX ? value : RICEMAX - 1];
861
8.10M
    }
862
863
12.4M
    if( rdCost < decisions.rdCost[idx] )
864
1.63M
    {
865
1.63M
      decisions.rdCost[idx]   = rdCost;
866
1.63M
      decisions.absLevel[idx] = pqData.absLevel;
867
1.63M
      decisions.prevId[idx]   = -1;
868
1.63M
    }
869
12.4M
  }
870
871
  static inline void checkRdCostSkipSbb( const int stateId, Decisions& decisions, int idx, const StateMem& state )
872
106k
  {
873
106k
    int64_t rdCost = state.rdCost[stateId] + state.sbbBits0[stateId];
874
106k
    if( rdCost < decisions.rdCost[idx] )
875
36.2k
    {
876
36.2k
      decisions.rdCost[idx]   = rdCost;
877
36.2k
      decisions.absLevel[idx] = 0;
878
36.2k
      decisions.prevId[idx]   = 4 | stateId;
879
36.2k
    }
880
106k
  }
881
882
  static inline void checkRdCostSkipSbbZeroOut( const int stateId, Decisions& decisions, int idx, const StateMem& state )
883
0
  {
884
0
    int64_t rdCost          = state.rdCost[stateId] + state.sbbBits0[stateId];
885
0
    decisions.rdCost[idx]   = rdCost;
886
0
    decisions.absLevel[idx] = 0;
887
0
    decisions.prevId[idx]   = 4 | stateId;
888
0
  }
889
890
  static inline void setRiceParam( const int stateId, const ScanInfo& scanInfo, StateMem& state, bool ge4 )
891
18.4M
  {
892
18.4M
    if( state.remRegBins[stateId] < 4 || ge4 )
893
17.2M
    {
894
17.2M
      TCoeff  sumAbs = state.sum1st[scanInfo.insidePos][stateId];
895
17.2M
      int sumSub     = state.remRegBins[stateId] < 4 ? 0 : 4 * 5;
896
17.2M
      int sumAll     = std::max( std::min( 31, ( int ) sumAbs - sumSub ), 0 );
897
17.2M
      state.m_goRicePar[stateId]
898
17.2M
                     = g_auiGoRiceParsCoeff[sumAll];
899
900
17.2M
      if( state.remRegBins[stateId] < 4 )
901
405k
      {
902
405k
        state.m_goRiceZero[stateId] = g_auiGoRicePosCoeff0( stateId, state.m_goRicePar[stateId] );
903
405k
      }
904
17.2M
    }
905
18.4M
  }
906
907
  static void update1State( int stateId, const DQIntern::ScanInfo& scanInfo, const DQIntern::Decisions& decisions, DQIntern::StateMem& curr, DQIntern::StateMem& prev )
908
24.1M
  {
909
24.1M
    curr.rdCost[stateId] = decisions.rdCost[stateId];
910
24.1M
    if( decisions.prevId[stateId] > -2 )
911
23.1M
    {
912
23.1M
      if( decisions.prevId[stateId] >= 0 )
913
21.9M
      {
914
21.9M
        const int prevId          = decisions.prevId[stateId];
915
21.9M
        curr.numSig[stateId]      = prev.numSig[prevId] + !!decisions.absLevel[stateId];
916
21.9M
        curr.refSbbCtxId[stateId] = prev.refSbbCtxId[prevId];
917
21.9M
        curr.sbbBits0[stateId]    = prev.sbbBits0[prevId];
918
21.9M
        curr.sbbBits1[stateId]    = prev.sbbBits1[prevId];
919
21.9M
        curr.remRegBins[stateId]  = prev.remRegBins[prevId] - 1;
920
921
21.9M
        if( curr.remRegBins[stateId] >= 4 )
922
21.7M
        {
923
21.7M
          curr.remRegBins[stateId] -= ( decisions.absLevel[stateId] < 2 ? decisions.absLevel[stateId] : 3 );
924
21.7M
        }
925
926
373M
        for( int i = 0; i < 16; i++ )
927
351M
        {
928
351M
          curr.tplAcc[i][stateId] = prev.tplAcc[i][prevId];
929
351M
          curr.sum1st[i][stateId] = prev.sum1st[i][prevId];
930
351M
          curr.absVal[i][stateId] = prev.absVal[i][prevId];
931
351M
        }
932
21.9M
      }
933
1.14M
      else
934
1.14M
      {
935
1.14M
        curr.numSig[stateId]      =  1;
936
1.14M
        curr.refSbbCtxId[stateId] = -1;
937
1.14M
        curr.remRegBins[stateId]  = prev.initRemRegBins;
938
1.14M
        curr.remRegBins[stateId] -= ( decisions.absLevel[stateId] < 2 ? decisions.absLevel[stateId] : 3 );
939
940
19.3M
        for( int i = 0; i < 16; i++ )
941
18.2M
        {
942
18.2M
          curr.tplAcc[i][stateId] = 0;
943
18.2M
          curr.sum1st[i][stateId] = 0;
944
18.2M
          curr.absVal[i][stateId] = 0;
945
18.2M
        }
946
1.14M
      }
947
948
23.1M
      if( decisions.absLevel[stateId] )
949
20.5M
      {
950
20.5M
        curr.absVal[scanInfo.insidePos][stateId] = ( uint8_t ) std::min<TCoeff>( 126 + ( decisions.absLevel[stateId] & 1 ), decisions.absLevel[stateId] );
951
952
20.5M
        if( scanInfo.currNbInfoSbb.numInv )
953
20.5M
        {
954
20.5M
          int min4_or_5 = std::min<TCoeff>( 4 + ( decisions.absLevel[stateId] & 1 ), decisions.absLevel[stateId] );
955
956
20.5M
          auto adds8 = []( uint8_t a, uint8_t b )
957
59.8M
          {
958
59.8M
            uint8_t c = a + b;
959
59.8M
            if( c < a ) c = -1;
960
59.8M
            return c;
961
59.8M
          };
962
963
20.5M
          auto update_deps = [&]( int k )
964
59.8M
          {
965
59.8M
            curr.tplAcc[scanInfo.currNbInfoSbb.invInPos[k]][stateId] += 32 + min4_or_5;
966
59.8M
            curr.sum1st[scanInfo.currNbInfoSbb.invInPos[k]][stateId] = adds8( curr.sum1st[scanInfo.currNbInfoSbb.invInPos[k]][stateId], decisions.absLevel[stateId] );
967
59.8M
          };
968
969
20.5M
          switch( scanInfo.currNbInfoSbb.numInv )
970
20.5M
          {
971
0
          default:
972
3.36M
          case 5:
973
3.36M
            update_deps( 4 );
974
8.51M
          case 4:
975
8.51M
            update_deps( 3 );
976
10.3M
          case 3:
977
10.3M
            update_deps( 2 );
978
17.0M
          case 2:
979
17.0M
            update_deps( 1 );
980
20.5M
          case 1:
981
20.5M
            update_deps( 0 );
982
20.5M
          }
983
20.5M
        }
984
20.5M
      }
985
986
23.1M
      if( curr.remRegBins[stateId] >= 4 )
987
22.7M
      {
988
22.7M
        TCoeff  sumAbs1 = curr.tplAcc[scanInfo.nextInsidePos][stateId] & 31;
989
22.7M
        TCoeff  sumNum  = curr.tplAcc[scanInfo.nextInsidePos][stateId] >> 5u;
990
22.7M
        int sumGt1 = sumAbs1 - sumNum;
991
992
22.7M
        curr.ctx.sig[stateId] = scanInfo.sigCtxOffsetNext + std::min( ( sumAbs1 + 1 ) >> 1, 3 );
993
22.7M
        curr.ctx.cff[stateId] = scanInfo.gtxCtxOffsetNext + std::min( sumGt1, 4 );
994
22.7M
      }
995
392k
      else
996
392k
      {
997
392k
        curr.anyRemRegBinsLt4 = true;
998
392k
      }
999
23.1M
    }
1000
24.1M
  }
1001
1002
  static void update1StateEOS( const int stateId, const DQIntern::ScanInfo& scanInfo, const DQIntern::Decisions& decisions, const DQIntern::StateMem& skip, DQIntern::StateMem& curr, DQIntern::StateMem& prev, DQIntern::CommonCtx& commonCtx )
1003
112k
  {
1004
112k
    curr.rdCost[stateId] = decisions.rdCost[stateId];
1005
1006
112k
    if( decisions.prevId[stateId] > -2 )
1007
111k
    {
1008
111k
      if( decisions.prevId[stateId] >= 4 )
1009
36.2k
      {
1010
36.2k
        CHECK( decisions.absLevel[stateId] != 0, "cannot happen" );
1011
1012
36.2k
        const int prevId          = decisions.prevId[stateId] - 4;
1013
36.2k
        curr.numSig    [stateId]  = 0;
1014
36.2k
        curr.remRegBins[stateId]  = skip.remRegBins[prevId];
1015
36.2k
        curr.refSbbCtxId[stateId] = prevId;
1016
1017
616k
        for( int i = 0; i < 16; i++ )
1018
580k
        {
1019
580k
          curr.absVal[i][stateId] = 0;
1020
580k
        }
1021
36.2k
      }
1022
75.0k
      else if( decisions.prevId[stateId] >= 0 )
1023
73.3k
      {
1024
73.3k
        const int prevId          = decisions.prevId[stateId];
1025
73.3k
        curr.numSig[stateId]      = prev.numSig[prevId] + !!decisions.absLevel[stateId];
1026
73.3k
        curr.refSbbCtxId[stateId] = prev.refSbbCtxId[prevId];
1027
73.3k
        curr.remRegBins[stateId]  = prev.remRegBins[prevId] - 1;
1028
1029
73.3k
        if( curr.remRegBins[stateId] >= 4 )
1030
60.8k
        {
1031
60.8k
          curr.remRegBins[stateId] -= ( decisions.absLevel[stateId] < 2 ? decisions.absLevel[stateId] : 3 );
1032
60.8k
        }
1033
1034
1.24M
        for( int i = 0; i < 16; i++ )
1035
1.17M
        {
1036
1.17M
          curr.absVal[i][stateId] = prev.absVal[i][prevId];
1037
1.17M
        }
1038
73.3k
      }
1039
1.68k
      else
1040
1.68k
      {
1041
1.68k
        curr.numSig[stateId]      =  1;
1042
1.68k
        curr.refSbbCtxId[stateId] = -1;
1043
1.68k
        curr.remRegBins[stateId]  = prev.initRemRegBins;
1044
1.68k
        curr.remRegBins[stateId] -= ( decisions.absLevel[stateId] < 2 ? decisions.absLevel[stateId] : 3 );
1045
1046
28.7k
        for( int i = 0; i < 16; i++ )
1047
27.0k
        {
1048
27.0k
          curr.absVal[i][stateId] = 0;
1049
27.0k
        }
1050
1.68k
      }
1051
1052
111k
      curr.absVal[scanInfo.insidePos][stateId] = ( uint8_t ) std::min<TCoeff>( 126 + ( decisions.absLevel[stateId] & 1 ), decisions.absLevel[stateId] );
1053
1054
111k
      uint8_t* levels[4];
1055
111k
      commonCtx.getLevelPtrs( scanInfo, levels[0], levels[1], levels[2], levels[3] );
1056
1.89M
      for( int i = 0; i < 16; i++ )
1057
1.78M
      {
1058
        // save abs levels to commonCtx
1059
1.78M
        levels[stateId][i] = curr.absVal[i][stateId];
1060
        // clean the SBB ctx
1061
1.78M
        curr.tplAcc[i][stateId] = 0;
1062
1.78M
        curr.sum1st[i][stateId] = 0;
1063
1.78M
        curr.absVal[i][stateId] = 0;
1064
1.78M
      }
1065
1066
111k
      commonCtx.update( scanInfo, curr.refSbbCtxId[stateId], stateId, curr );
1067
1068
111k
      curr.numSig[stateId] = 0;
1069
1070
111k
      if( curr.remRegBins[stateId] >= 4 )
1071
98.5k
      {
1072
98.5k
        TCoeff  sumAbs1 = curr.tplAcc[scanInfo.nextInsidePos][stateId] & 31;
1073
98.5k
        TCoeff  sumNum  = curr.tplAcc[scanInfo.nextInsidePos][stateId] >> 5u;
1074
98.5k
        int sumGt1 = sumAbs1 - sumNum;
1075
1076
98.5k
        curr.ctx.sig[stateId] = scanInfo.sigCtxOffsetNext + std::min( ( sumAbs1 + 1 ) >> 1, 3 );
1077
98.5k
        curr.ctx.cff[stateId] = scanInfo.gtxCtxOffsetNext + std::min( sumGt1, 4 );
1078
98.5k
      }
1079
12.7k
      else
1080
12.7k
      {
1081
12.7k
        curr.anyRemRegBinsLt4 = true;
1082
12.7k
      }
1083
111k
    }
1084
112k
  }
1085
1086
  static void updateStates( const DQIntern::ScanInfo& scanInfo, const DQIntern::Decisions& decisions, DQIntern::StateMem& curr )
1087
6.03M
  {
1088
6.03M
    DQIntern::StateMem prev = curr;
1089
6.03M
    curr.anyRemRegBinsLt4   = false;
1090
1091
6.03M
    update1State( 0, scanInfo, decisions, curr, prev );
1092
6.03M
    update1State( 1, scanInfo, decisions, curr, prev );
1093
6.03M
    update1State( 2, scanInfo, decisions, curr, prev );
1094
6.03M
    update1State( 3, scanInfo, decisions, curr, prev );
1095
1096
6.03M
    curr.cffBitsCtxOffset = scanInfo.gtxCtxOffsetNext;
1097
6.03M
  }
1098
1099
  static void updateStatesEOS( const DQIntern::ScanInfo& scanInfo, const DQIntern::Decisions& decisions, const DQIntern::StateMem& skip, DQIntern::StateMem& curr, DQIntern::CommonCtx& commonCtx )
1100
28.1k
  {
1101
28.1k
    DQIntern::StateMem prev = curr;
1102
28.1k
    curr.anyRemRegBinsLt4   = false;
1103
1104
28.1k
    update1StateEOS( 0, scanInfo, decisions, skip, curr, prev, commonCtx );
1105
28.1k
    update1StateEOS( 1, scanInfo, decisions, skip, curr, prev, commonCtx );
1106
28.1k
    update1StateEOS( 2, scanInfo, decisions, skip, curr, prev, commonCtx );
1107
28.1k
    update1StateEOS( 3, scanInfo, decisions, skip, curr, prev, commonCtx );
1108
1109
28.1k
    curr.cffBitsCtxOffset = scanInfo.gtxCtxOffsetNext;
1110
28.1k
  }
1111
}; // namespace DQIntern
1112
1113
static const DQIntern::Decisions startDec[2] =
1114
{
1115
  DQIntern::Decisions
1116
  {
1117
    { DQIntern::rdCostInit >> 2, DQIntern::rdCostInit >> 2, DQIntern::rdCostInit >> 2, DQIntern::rdCostInit >> 2 },
1118
    { -1, -1, -1, -1 },
1119
    { -2, -2, -2, -2 },
1120
  },
1121
  DQIntern::Decisions
1122
  {
1123
    { DQIntern::rdCostInit >> 2, DQIntern::rdCostInit >> 2, DQIntern::rdCostInit >> 2, DQIntern::rdCostInit >> 2 },
1124
    { 0, 0, 0, 0 },
1125
    { 4, 5, 6, 7 },
1126
  }
1127
};
1128
1129
void DepQuant::xQuantDQ( TransformUnit& tu, const CCoeffBuf& srcCoeff, const ComponentID compID, const QpParam& cQP, const double lambda, const Ctx& ctx, TCoeff& absSum, bool enableScalingLists, int* quantCoeff )
1130
1.69M
{
1131
1.69M
  using namespace DQIntern;
1132
  
1133
  //===== reset / pre-init =====
1134
1.69M
  const TUParameters& tuPars  = *m_scansRom->getTUPars( tu.blocks[compID], compID );
1135
1.69M
  m_quant.initQuantBlock    ( tu, compID, cQP, lambda );
1136
1.69M
  TCoeffSig*    qCoeff      = tu.getCoeffs( compID ).buf;
1137
1.69M
  const TCoeff* tCoeff      = srcCoeff.buf;
1138
1.69M
  const int     numCoeff    = tu.blocks[compID].area();
1139
1.69M
  ::memset( qCoeff, 0x00, numCoeff * sizeof( TCoeffSig ) );
1140
1.69M
  absSum                    = 0;
1141
1142
1.69M
  const CompArea& area      = tu.blocks[ compID ];
1143
1.69M
  const uint32_t  width     = area.width;
1144
1.69M
  const uint32_t  height    = area.height;
1145
1.69M
  const uint32_t  lfnstIdx  = tu.cu->lfnstIdx;
1146
  //===== scaling matrix ====
1147
  //const int         qpDQ = cQP.Qp + 1;
1148
  //const int         qpPer = qpDQ / 6;
1149
  //const int         qpRem = qpDQ - 6 * qpPer;
1150
1151
  //TCoeff thresTmp = thres;
1152
1.69M
  bool zeroOut = false;
1153
1.69M
  bool zeroOutforThres = false;
1154
1.69M
  int effWidth = tuPars.m_width, effHeight = tuPars.m_height;
1155
1.69M
  if( ( tu.mtsIdx[compID] > MTS_SKIP || ( tu.cs->sps->MTS && tu.cu->sbtInfo != 0 && tuPars.m_height <= 32 && tuPars.m_width <= 32 ) ) && compID == COMP_Y )
1156
0
  {
1157
0
    effHeight = ( tuPars.m_height == 32 ) ? 16 : tuPars.m_height;
1158
0
    effWidth  = ( tuPars.m_width  == 32 ) ? 16 : tuPars.m_width;
1159
0
    zeroOut   = ( effHeight < tuPars.m_height || effWidth < tuPars.m_width );
1160
0
  }
1161
1.69M
  zeroOutforThres = zeroOut || ( 32 < tuPars.m_height || 32 < tuPars.m_width );
1162
  //===== find first test position =====
1163
1.69M
  int firstTestPos = std::min<int>( tuPars.m_width, JVET_C0024_ZERO_OUT_TH ) * std::min<int>( tuPars.m_height, JVET_C0024_ZERO_OUT_TH ) - 1;
1164
1.69M
  if( lfnstIdx > 0 && tu.mtsIdx[compID] != MTS_SKIP && width >= 4 && height >= 4 )
1165
1.05M
  {
1166
1.05M
    firstTestPos = ( ( width == 4 && height == 4 ) || ( width == 8 && height == 8 ) )  ? 7 : 15 ;
1167
1.05M
  }
1168
1169
1.69M
  const TCoeff defaultQuantisationCoefficient = (TCoeff)m_quant.getQScale();
1170
1.69M
  const TCoeff thres = m_quant.getLastThreshold();
1171
1.69M
  const int zeroOutWidth  = ( tuPars.m_width  == 32 && zeroOut ) ? 16 : 32;
1172
1.69M
  const int zeroOutHeight = ( tuPars.m_height == 32 && zeroOut ) ? 16 : 32;
1173
1174
1.69M
  if( enableScalingLists )
1175
0
  {
1176
0
    for( ; firstTestPos >= 0; firstTestPos-- )
1177
0
    {
1178
0
      if( zeroOutforThres && ( tuPars.m_scanId2BlkPos[firstTestPos].x >= zeroOutWidth || tuPars.m_scanId2BlkPos[firstTestPos].y >= zeroOutHeight ) ) continue;
1179
1180
0
      const TCoeff thresTmp = TCoeff( thres / ( 4 * quantCoeff[tuPars.m_scanId2BlkPos[firstTestPos].idx] ) );
1181
1182
0
      if( abs( tCoeff[tuPars.m_scanId2BlkPos[firstTestPos].idx] ) > thresTmp ) break;
1183
0
    }
1184
0
  }
1185
1.69M
  else
1186
1.69M
  {
1187
1.69M
    const TCoeff defaultTh = TCoeff( thres / ( defaultQuantisationCoefficient << 2 ) );
1188
1189
1.69M
    m_findFirstPos( firstTestPos, tCoeff, tuPars, defaultTh, zeroOutforThres, zeroOutWidth, zeroOutHeight );
1190
1.69M
  }
1191
1192
1.69M
  if( firstTestPos < 0 )
1193
988k
  {
1194
988k
    tu.lastPos[compID] = -1;
1195
988k
    return;
1196
988k
  }
1197
1198
  //===== real init =====
1199
707k
  RateEstimator::initCtx( tuPars, tu, compID, ctx.getFracBitsAcess() );
1200
707k
  m_commonCtx.reset( tuPars, *this );
1201
3.53M
  for( int k = 0; k < 4; k++ )
1202
2.82M
  {
1203
2.82M
    DQIntern::initStates( k, m_state_curr );
1204
2.82M
    DQIntern::initStates( k, m_state_skip );
1205
2.82M
    m_state_curr.m_sigFracBitsArray[k] = RateEstimator::sigFlagBits(k);
1206
2.82M
  }
1207
1208
707k
  m_state_curr.m_gtxFracBitsArray = RateEstimator::gtxFracBits();
1209
  //memset( m_state_curr.tplAcc, 0, sizeof( m_state_curr.tplAcc ) ); // will be set in updateStates{,EOS} before first access
1210
707k
  memset( m_state_curr.sum1st, 0, sizeof( m_state_curr.sum1st ) );   // will be accessed in setRiceParam before updateState{,EOS}
1211
  //memset( m_state_curr.absVal, 0, sizeof( m_state_curr.absVal ) ); // will be set in updateStates{,EOS} before first access
1212
1213
707k
  const int numCtx = isLuma( compID ) ? 21 : 11;
1214
707k
  const CoeffFracBits* const cffBits = gtxFracBits();
1215
8.76M
  for( int i = 0; i < numCtx; i++ )
1216
8.05M
  {
1217
8.05M
    m_state_curr.cffBits1[i] = cffBits[i].bits[1];
1218
8.05M
  }
1219
1220
707k
  int effectWidth  = std::min( 32, effWidth );
1221
707k
  int effectHeight = std::min( 32, effHeight );
1222
707k
  m_state_curr.initRemRegBins   = ( effectWidth * effectHeight * MAX_TU_LEVEL_CTX_CODED_BIN_CONSTRAINT ) / 16;
1223
707k
  m_state_curr.anyRemRegBinsLt4 = true; // for the first coeff use scalar impl., because it check against the init state, which
1224
                                        // prohibits some paths
1225
1226
  //===== populate trellis =====
1227
7.47M
  for( int scanIdx = firstTestPos; scanIdx >= 0; scanIdx-- )
1228
6.76M
  {
1229
6.76M
    const ScanInfo& scanInfo = tuPars.m_scanInfo[ scanIdx ];
1230
6.76M
    if( enableScalingLists )
1231
0
    {
1232
0
      m_quant.initQuantBlock( tu, compID, cQP, lambda, quantCoeff[scanInfo.rasterPos] );
1233
0
      xDecideAndUpdate( abs( tCoeff[scanInfo.rasterPos] ), scanInfo, zeroOut && ( scanInfo.posX >= effWidth || scanInfo.posY >= effHeight ), quantCoeff[scanInfo.rasterPos] );
1234
0
    }
1235
6.76M
    else
1236
6.76M
      xDecideAndUpdate( abs( tCoeff[scanInfo.rasterPos] ), scanInfo, zeroOut && ( scanInfo.posX >= effWidth || scanInfo.posY >= effHeight ), defaultQuantisationCoefficient );
1237
6.76M
  }
1238
1239
  //===== find best path =====
1240
707k
  int       prevId      = -1;
1241
707k
  int64_t   minPathCost =  0;
1242
3.53M
  for( int8_t stateId = 0; stateId < 4; stateId++ )
1243
2.82M
  {
1244
2.82M
    int64_t pathCost = m_trellis[0][0].rdCost[stateId];
1245
2.82M
    if( pathCost < minPathCost )
1246
1.20M
    {
1247
1.20M
      prevId      = stateId;
1248
1.20M
      minPathCost = pathCost;
1249
1.20M
    }
1250
2.82M
  }
1251
1252
  //===== backward scanning =====
1253
707k
  int scanIdx = 0;
1254
7.39M
  for( ; prevId >= 0; scanIdx++ )
1255
6.68M
  {
1256
6.68M
    TCoeffSig absLevel = m_trellis[scanIdx][prevId >> 2].absLevel[prevId & 3];
1257
6.68M
    int32_t blkpos     = tuPars.m_scanId2BlkPos[scanIdx].idx;
1258
6.68M
    qCoeff[ blkpos ]   = TCoeffSig( tCoeff[blkpos] < 0 ? -absLevel : absLevel );
1259
6.68M
    absSum            += absLevel;
1260
6.68M
    prevId             = m_trellis[scanIdx][prevId >> 2].prevId[prevId & 3];
1261
6.68M
  }
1262
1263
707k
  tu.lastPos[compID] = scanIdx - 1;
1264
707k
}
1265
1266
void DepQuant::xDecide( const DQIntern::ScanInfo& scanInfo, const TCoeff absCoeff, const int lastOffset, DQIntern::Decisions& decisions, bool zeroOut, int quantCoeff )
1267
6.76M
{
1268
6.76M
  using namespace DQIntern;
1269
1270
6.76M
  ::memcpy( &decisions, startDec, sizeof( Decisions ) );
1271
1272
6.76M
  StateMem& skip = m_state_skip;
1273
1274
6.76M
  if( zeroOut )
1275
0
  {
1276
0
    if( scanInfo.spt == SCAN_EOCSBB )
1277
0
    {
1278
0
      checkRdCostSkipSbbZeroOut( 0, decisions, 0, skip );
1279
0
      checkRdCostSkipSbbZeroOut( 1, decisions, 1, skip );
1280
0
      checkRdCostSkipSbbZeroOut( 2, decisions, 2, skip );
1281
0
      checkRdCostSkipSbbZeroOut( 3, decisions, 3, skip );
1282
0
    }
1283
0
    return;
1284
0
  }
1285
1286
6.76M
  StateMem& prev = m_state_curr;
1287
1288
  /// start inline prequant
1289
6.76M
  int64_t scaledOrg = int64_t( absCoeff ) * quantCoeff;
1290
6.76M
  TCoeff  qIdx      = TCoeff( ( scaledOrg + m_quant.m_QAdd ) >> m_quant.m_QShift );
1291
1292
6.76M
  if( qIdx < 0 )
1293
1.11M
  {
1294
1.11M
    int64_t scaledAdd = m_quant.m_DistStepAdd - scaledOrg * m_quant.m_DistOrgFact;
1295
1.11M
    int64_t pq_a_dist = ( ( scaledAdd + 0 * m_quant.m_DistStepAdd ) * 1 + m_quant.m_DistAdd ) >> m_quant.m_DistShift;
1296
1.11M
    int64_t pq_b_dist = ( ( scaledAdd + 1 * m_quant.m_DistStepAdd ) * 2 + m_quant.m_DistAdd ) >> m_quant.m_DistShift;
1297
    /// stop inline prequant
1298
1299
1.11M
    if( prev.anyRemRegBinsLt4 )
1300
76.0k
    {
1301
76.0k
      setRiceParam( 0, scanInfo, prev, false );
1302
76.0k
      checkRdCostsOdd1( 0, scanInfo.spt, pq_b_dist, decisions, 2, 0, prev );
1303
1304
76.0k
      setRiceParam( 1, scanInfo, prev, false );
1305
76.0k
      checkRdCostsOdd1( 1, scanInfo.spt, pq_b_dist, decisions, 0, 2, prev );
1306
1307
76.0k
      setRiceParam( 2, scanInfo, prev, false );
1308
76.0k
      checkRdCostsOdd1( 2, scanInfo.spt, pq_a_dist, decisions, 3, 1, prev );
1309
1310
76.0k
      setRiceParam( 3, scanInfo, prev, false );
1311
76.0k
      checkRdCostsOdd1( 3, scanInfo.spt, pq_a_dist, decisions, 1, 3, prev );
1312
76.0k
    }
1313
1.03M
    else
1314
1.03M
    {
1315
      // has to be called as a first check, assumes no decision has been made yet
1316
1.03M
      m_checkAllRdCostsOdd1( scanInfo.spt, pq_a_dist, pq_b_dist, decisions, prev );
1317
1.03M
    }
1318
1319
1.11M
    checkRdCostStart( lastOffset, PQData{ 1, pq_b_dist }, decisions, 2, prev );
1320
1.11M
  }
1321
5.65M
  else
1322
5.65M
  {
1323
    /// start inline prequant
1324
5.65M
    qIdx = std::max<TCoeff>( 1, std::min<TCoeff>( m_quant.m_maxQIdx, qIdx ) );
1325
5.65M
    int64_t scaledAdd = qIdx * m_quant.m_DistStepAdd - scaledOrg * m_quant.m_DistOrgFact;
1326
1327
5.65M
    PQData  pqData[4];
1328
1329
5.65M
    PQData& pq_a = pqData[( qIdx + 0 ) & 3];
1330
5.65M
    PQData& pq_b = pqData[( qIdx + 1 ) & 3];
1331
5.65M
    PQData& pq_c = pqData[( qIdx + 2 ) & 3];
1332
5.65M
    PQData& pq_d = pqData[( qIdx + 3 ) & 3];
1333
1334
5.65M
    pq_a.deltaDist = ( ( scaledAdd + 0 * m_quant.m_DistStepAdd ) * ( qIdx + 0 ) + m_quant.m_DistAdd ) >> m_quant.m_DistShift;
1335
5.65M
    pq_a.absLevel = ( qIdx + 1 ) >> 1;
1336
1337
5.65M
    pq_b.deltaDist = ( ( scaledAdd + 1 * m_quant.m_DistStepAdd ) * ( qIdx + 1 ) + m_quant.m_DistAdd ) >> m_quant.m_DistShift;
1338
5.65M
    pq_b.absLevel = ( qIdx + 2 ) >> 1;
1339
1340
5.65M
    pq_c.deltaDist = ( ( scaledAdd + 2 * m_quant.m_DistStepAdd ) * ( qIdx + 2 ) + m_quant.m_DistAdd ) >> m_quant.m_DistShift;
1341
5.65M
    pq_c.absLevel = ( qIdx + 3 ) >> 1;
1342
1343
5.65M
    pq_d.deltaDist = ( ( scaledAdd + 3 * m_quant.m_DistStepAdd ) * ( qIdx + 3 ) + m_quant.m_DistAdd ) >> m_quant.m_DistShift;
1344
5.65M
    pq_d.absLevel = ( qIdx + 4 ) >> 1;
1345
    /// stop inline prequant
1346
1347
5.65M
    bool cff02ge4 = pqData[0].absLevel >= 4/* || pqData[2].absLevel >= 4 */;
1348
5.65M
    bool cff13ge4 = /* pqData[1].absLevel >= 4 || */ pqData[3].absLevel >= 4;
1349
1350
5.65M
    if( cff02ge4 || cff13ge4 || prev.anyRemRegBinsLt4 )
1351
4.65M
    {
1352
4.65M
      if( prev.anyRemRegBinsLt4 || cff02ge4 )
1353
4.43M
      {
1354
4.43M
        setRiceParam( 0, scanInfo, prev, cff02ge4 );
1355
4.43M
        setRiceParam( 1, scanInfo, prev, cff02ge4 );
1356
4.43M
      }
1357
1358
4.65M
      if( prev.anyRemRegBinsLt4 || cff13ge4 )
1359
4.65M
      {
1360
4.65M
        setRiceParam( 2, scanInfo, prev, cff13ge4 );
1361
4.65M
        setRiceParam( 3, scanInfo, prev, cff13ge4 );
1362
4.65M
      }
1363
1364
4.65M
      checkRdCosts( 0, scanInfo.spt, pqData[0], pqData[2], decisions, 0, 2, prev );
1365
4.65M
      checkRdCosts( 1, scanInfo.spt, pqData[0], pqData[2], decisions, 2, 0, prev );
1366
4.65M
      checkRdCosts( 2, scanInfo.spt, pqData[3], pqData[1], decisions, 1, 3, prev );
1367
4.65M
      checkRdCosts( 3, scanInfo.spt, pqData[3], pqData[1], decisions, 3, 1, prev );
1368
4.65M
    }
1369
999k
    else
1370
999k
    {
1371
      // has to be called as a first check, assumes no decision has been made yet
1372
999k
      m_checkAllRdCosts( scanInfo.spt, pqData, decisions, prev );
1373
999k
    }
1374
1375
5.65M
    checkRdCostStart( lastOffset, pqData[0], decisions, 0, prev );
1376
5.65M
    checkRdCostStart( lastOffset, pqData[2], decisions, 2, prev );
1377
5.65M
  }
1378
1379
6.76M
  if( scanInfo.spt == SCAN_EOCSBB )
1380
26.6k
  {
1381
26.6k
    checkRdCostSkipSbb( 0, decisions, 0, skip );
1382
26.6k
    checkRdCostSkipSbb( 1, decisions, 1, skip );
1383
26.6k
    checkRdCostSkipSbb( 2, decisions, 2, skip );
1384
26.6k
    checkRdCostSkipSbb( 3, decisions, 3, skip );
1385
26.6k
  }
1386
6.76M
}
1387
1388
void DepQuant::xDecideAndUpdate( const TCoeff absCoeff, const DQIntern::ScanInfo& scanInfo, bool zeroOut, int quantCoeff )
1389
6.76M
{
1390
6.76M
  using namespace DQIntern;
1391
1392
6.76M
  Decisions* decisions = &m_trellis[scanInfo.scanIdx][0];
1393
1394
6.76M
  xDecide( scanInfo, absCoeff, lastOffset( scanInfo.scanIdx ), *decisions, zeroOut, quantCoeff );
1395
1396
6.76M
  if( scanInfo.scanIdx )
1397
6.06M
  {
1398
6.06M
    if( scanInfo.spt == SCAN_SOCSBB )
1399
23.4k
    {
1400
23.4k
      memcpy( &m_state_skip, &m_state_curr, DQIntern::StateMemSkipCpySize );
1401
23.4k
    }
1402
1403
6.06M
    if( scanInfo.insidePos == 0 )
1404
28.1k
    {
1405
28.1k
      m_commonCtx.swap();
1406
28.1k
      m_updateStatesEOS( scanInfo, *decisions, m_state_skip, m_state_curr, m_commonCtx );
1407
28.1k
      ::memcpy( decisions + 1, decisions, sizeof( Decisions ) );
1408
28.1k
    }
1409
6.03M
    else if( !zeroOut )
1410
6.03M
    {
1411
6.03M
      m_updateStates( scanInfo, *decisions, m_state_curr );
1412
6.03M
    }
1413
6.06M
  }
1414
6.76M
}
1415
1416
void DepQuant::xDequantDQ( const TransformUnit& tu,  CoeffBuf& recCoeff, const ComponentID compID, const QpParam& cQP, bool enableScalingLists, int* piDequantCoef )
1417
696k
{
1418
696k
  m_quant.dequantBlock( tu, compID, cQP, recCoeff, enableScalingLists, piDequantCoef );
1419
696k
}
1420
1421
17.3k
DepQuant::DepQuant( const Quant* other, bool enc, bool useScalingLists, bool enableOpt ) : QuantRDOQ2( other, useScalingLists ), RateEstimator(), m_commonCtx()
1422
17.3k
{
1423
17.3k
  const DepQuant* dq = dynamic_cast<const DepQuant*>( other );
1424
17.3k
  CHECK( other && !dq, "The DepQuant cast must be successfull!" );
1425
1426
17.3k
  if( !dq )
1427
17.3k
  {
1428
17.3k
    m_scansRom = std::make_shared<DQIntern::Rom>();
1429
17.3k
    m_scansRom->init();
1430
17.3k
  }
1431
0
  else
1432
0
  {
1433
0
    m_scansRom = dq->m_scansRom;
1434
0
  }
1435
1436
71.1M
  for( int t = 0; t < ( MAX_TB_SIZEY * MAX_TB_SIZEY ); t++ )
1437
71.1M
  {
1438
71.1M
    memcpy( m_trellis[t], startDec, sizeof( startDec ) );
1439
71.1M
  }
1440
1441
17.3k
  m_checkAllRdCosts     = DQIntern::checkAllRdCosts;
1442
17.3k
  m_checkAllRdCostsOdd1 = DQIntern::checkAllRdCostsOdd1;
1443
17.3k
  m_updateStatesEOS     = DQIntern::updateStatesEOS;
1444
17.3k
  m_updateStates        = DQIntern::updateStates;
1445
17.3k
  m_findFirstPos        = DQIntern::findFirstPos;
1446
1447
17.3k
  if( enableOpt )
1448
17.3k
  {
1449
#if defined( TARGET_SIMD_X86 ) && ENABLE_SIMD_OPT_QUANT
1450
    initDepQuantX86();
1451
#endif
1452
#if defined( TARGET_SIMD_ARM ) && ENABLE_SIMD_OPT_QUANT
1453
    initDepQuantARM();
1454
#endif
1455
17.3k
  }
1456
17.3k
}
1457
1458
DepQuant::~DepQuant()
1459
17.3k
{
1460
17.3k
}
1461
1462
void DepQuant::quant( TransformUnit& tu, const ComponentID compID, const CCoeffBuf& pSrc, TCoeff& uiAbsSum, const QpParam& cQP, const Ctx& ctx )
1463
1.78M
{
1464
1.78M
  if( tu.cs->picture->useSelectiveRdoq && !xNeedRDOQ( tu, compID, pSrc, cQP ) )
1465
0
  {
1466
0
    tu.lastPos[compID] = -1;
1467
0
    uiAbsSum           =  0;
1468
0
  }
1469
1.78M
  else if( tu.cs->slice->depQuantEnabled && tu.mtsIdx[compID] != MTS_SKIP )
1470
1.69M
  {
1471
    //===== scaling matrix ====
1472
1.69M
    const int         qpDQ            = cQP.Qp(tu.mtsIdx[compID]==MTS_SKIP) + 1;
1473
1.69M
    const int         qpPer           = qpDQ / 6;
1474
1.69M
    const int         qpRem           = qpDQ - 6 * qpPer;
1475
1.69M
    const CompArea    &rect           = tu.blocks[compID];
1476
1.69M
    const int         width           = rect.width;
1477
1.69M
    const int         height          = rect.height;
1478
1.69M
    uint32_t          scalingListType = getScalingListType(tu.cu->predMode, compID);
1479
1.69M
    CHECK(scalingListType >= SCALING_LIST_NUM, "Invalid scaling list");
1480
1.69M
    const uint32_t    log2TrWidth     = Log2(width);
1481
1.69M
    const uint32_t    log2TrHeight    = Log2(height);
1482
1.69M
    const bool isLfnstApplied         = tu.cu->lfnstIdx > 0 && (CU::isSepTree(*tu.cu) ? true : isLuma(compID));
1483
1.69M
    const bool enableScalingLists     = getUseScalingList(width, height, (tu.mtsIdx[compID] == MTS_SKIP), isLfnstApplied);
1484
1.69M
    xQuantDQ( tu, pSrc, compID, cQP, Quant::m_dLambda, ctx, uiAbsSum, enableScalingLists, Quant::getQuantCoeff(scalingListType, qpRem, log2TrWidth, log2TrHeight) );
1485
1.69M
  }
1486
93.4k
  else
1487
93.4k
  {
1488
93.4k
    QuantRDOQ2::quant( tu, compID, pSrc, uiAbsSum, cQP, ctx );
1489
93.4k
  }
1490
1.78M
}
1491
1492
void DepQuant::dequant( const TransformUnit& tu, CoeffBuf& dstCoeff, const ComponentID compID, const QpParam& cQP )
1493
740k
{
1494
740k
  if( tu.cs->slice->depQuantEnabled && (tu.mtsIdx[compID] != MTS_SKIP) )
1495
696k
  {
1496
696k
    const int         qpDQ            = cQP.Qp(tu.mtsIdx[compID]==MTS_SKIP) + 1;
1497
696k
    const int         qpPer           = qpDQ / 6;
1498
696k
    const int         qpRem           = qpDQ - 6 * qpPer;
1499
696k
    const CompArea    &rect           = tu.blocks[compID];
1500
696k
    const int         width           = rect.width;
1501
696k
    const int         height          = rect.height;
1502
696k
    uint32_t          scalingListType = getScalingListType(tu.cu->predMode, compID);
1503
696k
    CHECK(scalingListType >= SCALING_LIST_NUM, "Invalid scaling list");
1504
696k
    const uint32_t    log2TrWidth    = Log2(width);
1505
696k
    const uint32_t    log2TrHeight   = Log2(height);
1506
696k
    const bool isLfnstApplied        = tu.cu->lfnstIdx > 0 && (CU::isSepTree(*tu.cu) ? true : isLuma(compID));
1507
696k
    const bool enableScalingLists    = getUseScalingList(width, height, (tu.mtsIdx[compID] == MTS_SKIP), isLfnstApplied);
1508
696k
    xDequantDQ( tu, dstCoeff, compID, cQP, enableScalingLists, Quant::getDequantCoeff(scalingListType, qpRem, log2TrWidth, log2TrHeight) );
1509
696k
  }
1510
43.8k
  else
1511
43.8k
  {
1512
43.8k
    QuantRDOQ::dequant( tu, dstCoeff, compID, cQP );
1513
43.8k
  }
1514
740k
}
1515
1516
void DepQuant::init( int rdoq, bool useRDOQTS, int thrVal )
1517
17.3k
{
1518
17.3k
  QuantRDOQ2::init( rdoq, useRDOQTS, thrVal );
1519
17.3k
  m_quant.init( thrVal );
1520
17.3k
}
1521
1522
} // namespace vvenc
1523
1524
//! \}
1525