Coverage Report

Created: 2026-05-30 06:10

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/vvenc/source/Lib/CommonLib/DepQuant.cpp
Line
Count
Source
1
/* -----------------------------------------------------------------------------
2
The copyright in this software is being made available under the Clear BSD
3
License, included below. No patent rights, trademark rights and/or 
4
other Intellectual Property Rights other than the copyrights concerning 
5
the Software are granted under this license.
6
7
The Clear BSD License
8
9
Copyright (c) 2019-2026, Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. & The VVenC Authors.
10
All rights reserved.
11
12
Redistribution and use in source and binary forms, with or without modification,
13
are permitted (subject to the limitations in the disclaimer below) provided that
14
the following conditions are met:
15
16
     * Redistributions of source code must retain the above copyright notice,
17
     this list of conditions and the following disclaimer.
18
19
     * Redistributions in binary form must reproduce the above copyright
20
     notice, this list of conditions and the following disclaimer in the
21
     documentation and/or other materials provided with the distribution.
22
23
     * Neither the name of the copyright holder nor the names of its
24
     contributors may be used to endorse or promote products derived from this
25
     software without specific prior written permission.
26
27
NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
28
THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
29
CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
31
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
32
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
33
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
34
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
35
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
36
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38
POSSIBILITY OF SUCH DAMAGE.
39
40
41
------------------------------------------------------------------------------------------- */
42
43
#include "DepQuant.h"
44
#include "TrQuant.h"
45
#include "CodingStructure.h"
46
#include "UnitTools.h"
47
48
#include <bitset>
49
50
//! \ingroup CommonLib
51
//! \{
52
53
namespace vvenc {
54
55
56
namespace DQIntern
57
{
58
  static void findFirstPos( int& firstTestPos, const TCoeff* tCoeff, const DQIntern::TUParameters& tuPars, int defaultTh,
59
                            bool zeroOutForThres, int zeroOutWidth, int zeroOutHeight )
60
2.01M
  {
61
231M
    for( ; firstTestPos >= 0; firstTestPos-- )
62
230M
    {
63
230M
      if( zeroOutForThres && ( tuPars.m_scanId2BlkPos[firstTestPos].x >= zeroOutWidth ||
64
29.2M
                              tuPars.m_scanId2BlkPos[firstTestPos].y >= zeroOutHeight ) )
65
0
      {
66
0
        continue;
67
0
      }
68
230M
      if( abs( tCoeff[tuPars.m_scanId2BlkPos[firstTestPos].idx] ) > defaultTh )
69
836k
      {
70
836k
        break;
71
836k
      }
72
230M
    }
73
2.01M
  }
74
75
  void Rom::xInitScanArrays()
76
20.7k
  {
77
20.7k
    if( m_scansInitialized )
78
0
    {
79
0
      return;
80
0
    }
81
20.7k
    ::memset( m_scanId2NbInfoSbbArray, 0, sizeof(m_scanId2NbInfoSbbArray) );
82
20.7k
    ::memset( m_scanId2NbInfoOutArray, 0, sizeof(m_scanId2NbInfoOutArray) );
83
20.7k
    ::memset( m_tuParameters,          0, sizeof(m_tuParameters) );
84
85
20.7k
    uint32_t raster2id[ MAX_CU_SIZE * MAX_CU_SIZE ];
86
20.7k
    ::memset(raster2id, 0, sizeof(raster2id));
87
88
166k
    for( int hd = 0; hd < MAX_TU_SIZE_IDX; hd++ )
89
145k
    {
90
1.16M
      for( int vd = 0; vd < MAX_TU_SIZE_IDX; vd++ )
91
1.01M
      {
92
1.01M
        if( (hd == 0 && vd <= 1) || (hd <= 1 && vd == 0) )
93
62.3k
        {
94
62.3k
          continue;
95
62.3k
        }
96
955k
        const uint32_t      blockWidth    = (1 << hd);
97
955k
        const uint32_t      blockHeight   = (1 << vd);
98
955k
        const uint32_t      log2CGWidth   = g_log2SbbSize[hd][vd][0];
99
955k
        const uint32_t      log2CGHeight  = g_log2SbbSize[hd][vd][1];
100
955k
        const uint32_t      groupWidth    = 1 << log2CGWidth;
101
955k
        const uint32_t      groupHeight   = 1 << log2CGHeight;
102
955k
        const uint32_t      groupSize     = groupWidth * groupHeight;
103
955k
        const SizeType      blkWidthIdx   = Log2( blockWidth );
104
955k
        const SizeType      blkHeightIdx  = Log2( blockHeight );
105
955k
        const ScanElement * scanId2RP     = getScanOrder( SCAN_GROUPED_4x4, blkWidthIdx, blkHeightIdx );
106
955k
        NbInfoSbb*&         sId2NbSbb     = m_scanId2NbInfoSbbArray[hd][vd];
107
955k
        NbInfoOut*&         sId2NbOut     = m_scanId2NbInfoOutArray[hd][vd];
108
        // consider only non-zero-out region
109
955k
        const uint32_t      blkWidthNZOut = std::min<unsigned>( JVET_C0024_ZERO_OUT_TH, blockWidth  );
110
955k
        const uint32_t      blkHeightNZOut= std::min<unsigned>( JVET_C0024_ZERO_OUT_TH, blockHeight );
111
955k
        const uint32_t      totalValues   = blkWidthNZOut * blkHeightNZOut;
112
113
955k
        sId2NbSbb = new NbInfoSbb[ totalValues ];
114
955k
        sId2NbOut = new NbInfoOut[ totalValues ];
115
116
188M
        for( uint32_t scanId = 0; scanId < totalValues; scanId++ )
117
187M
        {
118
187M
          raster2id[scanId2RP[scanId].idx] = scanId;
119
187M
          sId2NbSbb[scanId].numInv = 0;
120
187M
        }
121
122
188M
        for( unsigned scanId = 0; scanId < totalValues; scanId++ )
123
187M
        {
124
187M
          const int posX = scanId2RP[scanId].x;
125
187M
          const int posY = scanId2RP[scanId].y;
126
187M
          const int rpos = scanId2RP[scanId].idx;
127
187M
          {
128
            //===== inside subband neighbours =====
129
187M
            const int      begSbb = scanId - ( scanId & (groupSize-1) ); // first pos in current subblock
130
187M
            int            cpos[5];
131
132
187M
            cpos[0] = ( posX + 1 < blkWidthNZOut                              ? ( raster2id[rpos+1           ] < groupSize + begSbb ? raster2id[rpos+1           ] - begSbb : 0 ) : 0 );
133
187M
            cpos[1] = ( posX + 2 < blkWidthNZOut                              ? ( raster2id[rpos+2           ] < groupSize + begSbb ? raster2id[rpos+2           ] - begSbb : 0 ) : 0 );
134
187M
            cpos[2] = ( posX + 1 < blkWidthNZOut && posY + 1 < blkHeightNZOut ? ( raster2id[rpos+1+blockWidth] < groupSize + begSbb ? raster2id[rpos+1+blockWidth] - begSbb : 0 ) : 0 );
135
187M
            cpos[3] = ( posY + 1 < blkHeightNZOut                             ? ( raster2id[rpos+  blockWidth] < groupSize + begSbb ? raster2id[rpos+  blockWidth] - begSbb : 0 ) : 0 );
136
187M
            cpos[4] = ( posY + 2 < blkHeightNZOut                             ? ( raster2id[rpos+2*blockWidth] < groupSize + begSbb ? raster2id[rpos+2*blockWidth] - begSbb : 0 ) : 0 );
137
138
187M
            int num = 0;
139
187M
            int inPos[5] = { 0, };
140
141
751M
            while( true )
142
751M
            {
143
751M
              int nk = -1;
144
4.51G
              for( int k = 0; k < 5; k++ )
145
3.75G
              {
146
3.75G
                if( cpos[k] != 0 && ( nk < 0 || cpos[k] < cpos[nk] ) )
147
845M
                {
148
845M
                  nk = k;
149
845M
                }
150
3.75G
              }
151
751M
              if( nk < 0 )
152
187M
              {
153
187M
                break;
154
187M
              }
155
564M
              inPos[ num++ ] = uint8_t( cpos[nk] );
156
564M
              cpos[nk] = 0;
157
564M
            }
158
559M
            for( int k = num; k < 5; k++ )
159
372M
            {
160
372M
              inPos[k] = 0;
161
372M
            }
162
751M
            for( int k = 0; k < num; k++ )
163
564M
            {
164
564M
              CHECK( sId2NbSbb[begSbb + inPos[k]].numInv >= 5, "" );
165
564M
              sId2NbSbb[begSbb + inPos[k]].invInPos[sId2NbSbb[begSbb + inPos[k]].numInv++] = scanId & ( groupSize - 1 );
166
564M
            }
167
187M
          }
168
187M
          {
169
            //===== outside subband neighbours =====
170
187M
            NbInfoOut&     nbOut  = sId2NbOut[ scanId ];
171
187M
            const int      begSbb = scanId - ( scanId & (groupSize-1) ); // first pos in current subblock
172
187M
            int            cpos[5];
173
174
187M
            cpos[0] = ( posX + 1 < blkWidthNZOut                              ? ( raster2id[rpos+1           ] >= groupSize + begSbb ? raster2id[rpos+1           ] : 0 ) : 0 );
175
187M
            cpos[1] = ( posX + 2 < blkWidthNZOut                              ? ( raster2id[rpos+2           ] >= groupSize + begSbb ? raster2id[rpos+2           ] : 0 ) : 0 );
176
187M
            cpos[2] = ( posX + 1 < blkWidthNZOut && posY + 1 < blkHeightNZOut ? ( raster2id[rpos+1+blockWidth] >= groupSize + begSbb ? raster2id[rpos+1+blockWidth] : 0 ) : 0 );
177
187M
            cpos[3] = ( posY + 1 < blkHeightNZOut                             ? ( raster2id[rpos+  blockWidth] >= groupSize + begSbb ? raster2id[rpos+  blockWidth] : 0 ) : 0 );
178
187M
            cpos[4] = ( posY + 2 < blkHeightNZOut                             ? ( raster2id[rpos+2*blockWidth] >= groupSize + begSbb ? raster2id[rpos+2*blockWidth] : 0 ) : 0 );
179
180
454M
            for( nbOut.num = 0; true; )
181
454M
            {
182
454M
              int nk = -1;
183
2.72G
              for( int k = 0; k < 5; k++ )
184
2.27G
              {
185
2.27G
                if( cpos[k] != 0 && ( nk < 0 || cpos[k] < cpos[nk] ) )
186
392M
                {
187
392M
                  nk = k;
188
392M
                }
189
2.27G
              }
190
454M
              if( nk < 0 )
191
187M
              {
192
187M
                break;
193
187M
              }
194
267M
              nbOut.outPos[ nbOut.num++ ] = uint16_t( cpos[nk] );
195
267M
              cpos[nk] = 0;
196
267M
            }
197
856M
            for( int k = nbOut.num; k < 5; k++ )
198
669M
            {
199
669M
              nbOut.outPos[k] = 0;
200
669M
            }
201
187M
            nbOut.maxDist = ( scanId == 0 ? 0 : sId2NbOut[scanId-1].maxDist );
202
454M
            for( int k = 0; k < nbOut.num; k++ )
203
267M
            {
204
267M
              if( nbOut.outPos[k] > nbOut.maxDist )
205
28.8M
              {
206
28.8M
                nbOut.maxDist = nbOut.outPos[k];
207
28.8M
              }
208
267M
            }
209
187M
          }
210
187M
        }
211
212
        // make it relative
213
188M
        for( unsigned scanId = 0; scanId < totalValues; scanId++ )
214
187M
        {
215
187M
          NbInfoOut& nbOut  = sId2NbOut[scanId];
216
187M
          const int  begSbb = scanId - ( scanId & (groupSize-1) ); // first pos in current subblock
217
454M
          for( int k = 0; k < nbOut.num; k++ )
218
267M
          {
219
267M
            CHECK(begSbb > nbOut.outPos[k], "Position must be past sub block begin");
220
267M
            nbOut.outPos[k] -= begSbb;
221
267M
          }
222
187M
          nbOut.maxDist -= scanId;
223
187M
        }
224
225
2.86M
        for( int chId = 0; chId < MAX_NUM_CH; chId++ )
226
1.91M
        {
227
1.91M
          m_tuParameters[hd][vd][chId] = new TUParameters( *this, blockWidth, blockHeight, ChannelType(chId) );
228
1.91M
        }
229
955k
      }
230
145k
    }
231
20.7k
    m_scansInitialized = true;
232
20.7k
  }
233
234
  void Rom::xUninitScanArrays()
235
20.7k
  {
236
20.7k
    if( !m_scansInitialized )
237
0
    {
238
0
      return;
239
0
    }
240
166k
    for( int hd = 0; hd < MAX_TU_SIZE_IDX; hd++ )
241
145k
    {
242
1.16M
      for( int vd = 0; vd < MAX_TU_SIZE_IDX; vd++ )
243
1.01M
      {
244
1.01M
        NbInfoSbb*& sId2NbSbb = m_scanId2NbInfoSbbArray[hd][vd];
245
1.01M
        NbInfoOut*& sId2NbOut = m_scanId2NbInfoOutArray[hd][vd];
246
1.01M
        if( sId2NbSbb )
247
955k
        {
248
955k
          delete [] sId2NbSbb;
249
955k
        }
250
1.01M
        if( sId2NbOut )
251
955k
        {
252
955k
          delete [] sId2NbOut;
253
955k
        }
254
3.05M
        for( int chId = 0; chId < MAX_NUM_CH; chId++ )
255
2.03M
        {
256
2.03M
          TUParameters*& tuPars = m_tuParameters[hd][vd][chId];
257
2.03M
          if( tuPars )
258
1.91M
          {
259
1.91M
            delete tuPars;
260
1.91M
          }
261
2.03M
        }
262
1.01M
      }
263
145k
    }
264
20.7k
    m_scansInitialized = false;
265
20.7k
  }
266
267
268
  TUParameters::TUParameters( const Rom& rom, const unsigned width, const unsigned height, const ChannelType chType )
269
1.91M
  {
270
1.91M
    m_chType              = chType;
271
1.91M
    m_width               = width;
272
1.91M
    m_height              = height;
273
1.91M
    const uint32_t nonzeroWidth  = std::min<uint32_t>(JVET_C0024_ZERO_OUT_TH, m_width);
274
1.91M
    const uint32_t nonzeroHeight = std::min<uint32_t>(JVET_C0024_ZERO_OUT_TH, m_height);
275
1.91M
    m_numCoeff                   = nonzeroWidth * nonzeroHeight;
276
1.91M
    m_log2SbbWidth        = g_log2SbbSize[ Log2(m_width) ][ Log2(m_height) ][0];
277
1.91M
    m_log2SbbHeight       = g_log2SbbSize[ Log2(m_width) ][ Log2(m_height) ][1];
278
1.91M
    m_log2SbbSize         = m_log2SbbWidth + m_log2SbbHeight;
279
1.91M
    m_sbbSize             = ( 1 << m_log2SbbSize );
280
1.91M
    m_sbbMask             = m_sbbSize - 1;
281
1.91M
    m_widthInSbb  = nonzeroWidth >> m_log2SbbWidth;
282
1.91M
    m_heightInSbb = nonzeroHeight >> m_log2SbbHeight;
283
1.91M
    m_numSbb              = m_widthInSbb * m_heightInSbb;
284
1.91M
    SizeType        hsbb  = Log2( m_widthInSbb  );
285
1.91M
    SizeType        vsbb  = Log2( m_heightInSbb );
286
1.91M
    SizeType        hsId  = Log2( m_width  );
287
1.91M
    SizeType        vsId  = Log2( m_height );
288
1.91M
    m_scanSbbId2SbbPos    = getScanOrder( SCAN_UNGROUPED   , hsbb , vsbb );
289
1.91M
    m_scanId2BlkPos       = getScanOrder( SCAN_GROUPED_4x4 , hsId , vsId );
290
1.91M
    int log2W             = Log2( m_width  );
291
1.91M
    int log2H             = Log2( m_height );
292
1.91M
    m_scanId2NbInfoSbb    = rom.getNbInfoSbb( log2W, log2H );
293
1.91M
    m_scanId2NbInfoOut    = rom.getNbInfoOut( log2W, log2H );
294
1.91M
    m_scanInfo            = new ScanInfo[ m_numCoeff ];
295
376M
    for( int scanIdx = 0; scanIdx < m_numCoeff; scanIdx++ )
296
374M
    {
297
374M
      xSetScanInfo( m_scanInfo[scanIdx], scanIdx );
298
374M
    }
299
1.91M
  }
300
301
302
  void TUParameters::xSetScanInfo( ScanInfo& scanInfo, int scanIdx )
303
374M
  {
304
374M
    scanInfo.sbbSize    = m_sbbSize;
305
374M
    scanInfo.numSbb     = m_numSbb;
306
374M
    scanInfo.scanIdx    = scanIdx;
307
374M
    scanInfo.rasterPos  = m_scanId2BlkPos[scanIdx].idx;
308
374M
    scanInfo.sbbPos     = m_scanSbbId2SbbPos[scanIdx >> m_log2SbbSize].idx;
309
374M
    scanInfo.insidePos  = scanIdx & m_sbbMask;
310
374M
    scanInfo.spt        = SCAN_ISCSBB;
311
374M
    if(  scanInfo.insidePos == m_sbbMask && scanIdx > scanInfo.sbbSize && scanIdx < m_numCoeff - 1 )
312
20.2M
      scanInfo.spt      = SCAN_SOCSBB;
313
354M
    else if( scanInfo.insidePos == 0 && scanIdx > 0 && scanIdx < m_numCoeff - m_sbbSize )
314
20.2M
      scanInfo.spt      = SCAN_EOCSBB;
315
374M
    scanInfo.posX = m_scanId2BlkPos[scanIdx].x;
316
374M
    scanInfo.posY = m_scanId2BlkPos[scanIdx].y;
317
374M
    if( scanIdx )
318
372M
    {
319
372M
      const int nextScanIdx = scanIdx - 1;
320
372M
      const int diag        = m_scanId2BlkPos[nextScanIdx].x + m_scanId2BlkPos[nextScanIdx].y;
321
372M
      if( m_chType == CH_L )
322
186M
      {
323
186M
        scanInfo.sigCtxOffsetNext = ( diag < 2 ? 8 : diag < 5 ?  4 : 0 );
324
186M
        scanInfo.gtxCtxOffsetNext = ( diag < 1 ? 16 : diag < 3 ? 11 : diag < 10 ? 6 : 1 );
325
186M
      }
326
186M
      else
327
186M
      {
328
186M
        scanInfo.sigCtxOffsetNext = ( diag < 2 ? 4 : 0 );
329
186M
        scanInfo.gtxCtxOffsetNext = ( diag < 1 ? 6 : 1 );
330
186M
      }
331
372M
      scanInfo.nextInsidePos      = nextScanIdx & m_sbbMask;
332
372M
      scanInfo.currNbInfoSbb      = m_scanId2NbInfoSbb[ scanIdx ];
333
372M
      if( scanInfo.insidePos == 0 )
334
21.7M
      {
335
21.7M
        const int nextSbbPos  = m_scanSbbId2SbbPos[nextScanIdx >> m_log2SbbSize].idx;
336
21.7M
        const int nextSbbPosY = nextSbbPos               / m_widthInSbb;
337
21.7M
        const int nextSbbPosX = nextSbbPos - nextSbbPosY * m_widthInSbb;
338
21.7M
        scanInfo.nextSbbRight = ( nextSbbPosX < m_widthInSbb  - 1 ? nextSbbPos + 1            : 0 );
339
21.7M
        scanInfo.nextSbbBelow = ( nextSbbPosY < m_heightInSbb - 1 ? nextSbbPos + m_widthInSbb : 0 );
340
21.7M
      }
341
372M
    }
342
374M
  }
343
344
  void RateEstimator::initCtx( const TUParameters& tuPars, const TransformUnit& tu, const ComponentID compID, const FracBitsAccess& fracBitsAccess )
345
836k
  {
346
836k
    m_scanId2Pos = tuPars.m_scanId2BlkPos;
347
836k
    xSetSigSbbFracBits  ( fracBitsAccess, tuPars.m_chType );
348
836k
    xSetSigFlagBits     ( fracBitsAccess, tuPars.m_chType );
349
836k
    xSetGtxFlagBits     ( fracBitsAccess, tuPars.m_chType );
350
836k
    xSetLastCoeffOffset ( fracBitsAccess, tuPars, tu, compID );
351
836k
  }
352
353
  void RateEstimator::xSetLastCoeffOffset( const FracBitsAccess& fracBitsAccess, const TUParameters& tuPars, const TransformUnit& tu, const ComponentID compID )
354
836k
  {
355
836k
    const ChannelType chType = ( compID == COMP_Y ? CH_L : CH_C );
356
836k
    int32_t cbfDeltaBits = 0;
357
836k
    if( compID == COMP_Y && !CU::isIntra(*tu.cu) && !tu.depth )
358
286
    {
359
286
      const BinFracBits bits  = fracBitsAccess.getFracBitsArray( Ctx::QtRootCbf() );
360
286
      cbfDeltaBits            = int32_t( bits.intBits[1] ) - int32_t( bits.intBits[0] );
361
286
    }
362
836k
    else
363
836k
    {
364
836k
      BinFracBits bits;
365
836k
      bool prevLumaCbf           = false;
366
836k
      bool lastCbfIsInferred     = false;
367
836k
      bool useIntraSubPartitions = tu.cu->ispMode && isLuma(chType);
368
836k
      if( useIntraSubPartitions )
369
11.8k
      {
370
11.8k
        bool rootCbfSoFar = false;
371
11.8k
        bool isLastSubPartition = CU::isISPLast(*tu.cu, tu.Y(), compID);
372
11.8k
        uint32_t nTus = tu.cu->ispMode == HOR_INTRA_SUBPARTITIONS ? tu.cu->lheight() >> Log2(tu.lheight()) : tu.cu->lwidth() >> Log2(tu.lwidth());
373
11.8k
        if( isLastSubPartition )
374
197
        {
375
197
          TransformUnit* tuPointer = tu.cu->firstTU;
376
788
          for( int tuIdx = 0; tuIdx < nTus - 1; tuIdx++ )
377
591
          {
378
591
            rootCbfSoFar |= TU::getCbfAtDepth(*tuPointer, COMP_Y, tu.depth);
379
591
            tuPointer     = tuPointer->next;
380
591
          }
381
197
          if( !rootCbfSoFar )
382
0
          {
383
0
            lastCbfIsInferred = true;
384
0
          }
385
197
        }
386
11.8k
        if( !lastCbfIsInferred )
387
11.8k
        {
388
11.8k
          prevLumaCbf = TU::getPrevTuCbfAtDepth(tu, compID, tu.depth);
389
11.8k
        }
390
11.8k
        bits = fracBitsAccess.getFracBitsArray(Ctx::QtCbf[compID](DeriveCtx::CtxQtCbf(compID, prevLumaCbf, true)));
391
11.8k
      }
392
824k
      else
393
824k
      {
394
824k
        bits = fracBitsAccess.getFracBitsArray(Ctx::QtCbf[compID](DeriveCtx::CtxQtCbf(compID, tu.cbf[COMP_Cb])));
395
824k
      }
396
836k
      cbfDeltaBits = lastCbfIsInferred ? 0 : int32_t(bits.intBits[1]) - int32_t(bits.intBits[0]);
397
836k
    }
398
399
836k
    static const unsigned prefixCtx[] = { 0, 0, 0, 3, 6, 10, 15, 21 };
400
836k
    uint32_t              ctxBits  [ LAST_SIGNIFICANT_GROUPS ];
401
2.51M
    for( unsigned xy = 0; xy < 2; xy++ )
402
1.67M
    {
403
1.67M
      int32_t             bitOffset   = ( xy ? cbfDeltaBits : 0 );
404
1.67M
      int32_t*            lastBits    = ( xy ? m_lastBitsY : m_lastBitsX );
405
1.67M
      const unsigned      size        = ( xy ? tuPars.m_height : tuPars.m_width );
406
1.67M
      const unsigned      log2Size    = Log2( size );
407
1.67M
      const bool          useYCtx     = ( xy != 0 );
408
1.67M
      const CtxSet&       ctxSetLast  = ( useYCtx ? Ctx::LastY : Ctx::LastX )[ chType ];
409
1.67M
      const unsigned      lastShift   = ( compID == COMP_Y ? (log2Size+1)>>2 : Clip3<unsigned>(0,2,size>>3) );
410
1.67M
      const unsigned      lastOffset  = ( compID == COMP_Y ? ( prefixCtx[log2Size] ) : 0 );
411
1.67M
      uint32_t            sumFBits    = 0;
412
1.67M
      unsigned            maxCtxId    = g_uiGroupIdx[std::min<unsigned>(JVET_C0024_ZERO_OUT_TH, size) - 1];
413
11.2M
      for( unsigned ctxId = 0; ctxId < maxCtxId; ctxId++ )
414
9.60M
      {
415
9.60M
        const BinFracBits bits  = fracBitsAccess.getFracBitsArray( ctxSetLast( lastOffset + ( ctxId >> lastShift ) ) );
416
9.60M
        ctxBits[ ctxId ]        = sumFBits + bits.intBits[0] + ( ctxId>3 ? ((ctxId-2)>>1)<<SCALE_BITS : 0 ) + bitOffset;
417
9.60M
        sumFBits               +=            bits.intBits[1];
418
9.60M
      }
419
1.67M
      ctxBits  [ maxCtxId ]     = sumFBits + ( maxCtxId>3 ? ((maxCtxId-2)>>1)<<SCALE_BITS : 0 ) + bitOffset;
420
23.6M
      for (unsigned pos = 0; pos < std::min<unsigned>(JVET_C0024_ZERO_OUT_TH, size); pos++)
421
21.9M
      {
422
21.9M
        lastBits[ pos ]         = ctxBits[ g_uiGroupIdx[ pos ] ];
423
21.9M
      }
424
1.67M
    }
425
836k
  }
426
427
  void RateEstimator::xSetSigSbbFracBits( const FracBitsAccess& fracBitsAccess, ChannelType chType )
428
836k
  {
429
836k
    const CtxSet& ctxSet = Ctx::SigCoeffGroup[ chType ];
430
2.51M
    for( unsigned ctxId = 0; ctxId < sm_maxNumSigSbbCtx; ctxId++ )
431
1.67M
    {
432
1.67M
      m_sigSbbFracBits[ ctxId ] = fracBitsAccess.getFracBitsArray( ctxSet( ctxId ) );
433
1.67M
    }
434
836k
  }
435
436
  void RateEstimator::xSetSigFlagBits( const FracBitsAccess& fracBitsAccess, ChannelType chType )
437
836k
  {
438
3.34M
    for( unsigned ctxSetId = 0; ctxSetId < sm_numCtxSetsSig; ctxSetId++ )
439
2.51M
    {
440
2.51M
      BinFracBits*    bits    = m_sigFracBits [ ctxSetId ];
441
2.51M
      const CtxSet&   ctxSet  = Ctx::SigFlag  [ chType + 2*ctxSetId ];
442
2.51M
      const unsigned  numCtx  = ( chType == CH_L ? 12 : 8 );
443
22.9M
      for( unsigned ctxId = 0; ctxId < numCtx; ctxId++ )
444
20.4M
      {
445
20.4M
        bits[ ctxId ] = fracBitsAccess.getFracBitsArray( ctxSet( ctxId ) );
446
20.4M
      }
447
2.51M
    }
448
836k
  }
449
450
  void RateEstimator::xSetGtxFlagBits( const FracBitsAccess& fracBitsAccess, ChannelType chType )
451
836k
  {
452
836k
    const CtxSet&   ctxSetPar   = Ctx::ParFlag [     chType ];
453
836k
    const CtxSet&   ctxSetGt1   = Ctx::GtxFlag [ 2 + chType ];
454
836k
    const CtxSet&   ctxSetGt2   = Ctx::GtxFlag [     chType ];
455
836k
    const unsigned  numCtx      = ( chType == CH_L ? 21 : 11 );
456
10.3M
    for( unsigned ctxId = 0; ctxId < numCtx; ctxId++ )
457
9.52M
    {
458
9.52M
      BinFracBits     fbPar = fracBitsAccess.getFracBitsArray( ctxSetPar( ctxId ) );
459
9.52M
      BinFracBits     fbGt1 = fracBitsAccess.getFracBitsArray( ctxSetGt1( ctxId ) );
460
9.52M
      BinFracBits     fbGt2 = fracBitsAccess.getFracBitsArray( ctxSetGt2( ctxId ) );
461
9.52M
      CoeffFracBits&  cb    = m_gtxFracBits[ ctxId ];
462
9.52M
      int32_t         par0  = (1<<SCALE_BITS) + int32_t(fbPar.intBits[0]);
463
9.52M
      int32_t         par1  = (1<<SCALE_BITS) + int32_t(fbPar.intBits[1]);
464
9.52M
      cb.bits[0] = 0;
465
9.52M
      cb.bits[1] = fbGt1.intBits[0] + (1 << SCALE_BITS);
466
9.52M
      cb.bits[2] = fbGt1.intBits[1] + par0 + fbGt2.intBits[0];
467
9.52M
      cb.bits[3] = fbGt1.intBits[1] + par1 + fbGt2.intBits[0];
468
9.52M
      cb.bits[4] = fbGt1.intBits[1] + par0 + fbGt2.intBits[1];
469
9.52M
      cb.bits[5] = fbGt1.intBits[1] + par1 + fbGt2.intBits[1];
470
9.52M
    }
471
836k
  }
472
473
  void CommonCtx::update( const ScanInfo& scanInfo, const int prevId, int stateId, StateMem& curr )
474
126k
  {
475
126k
    uint8_t*    sbbFlags  = m_currSbbCtx[stateId].sbbFlags;
476
126k
    uint8_t*    levels    = m_currSbbCtx[stateId].levels;
477
126k
    uint16_t    maxDist   = m_nbInfo[scanInfo.scanIdx - 1].maxDist;
478
126k
    uint16_t    sbbSize   = scanInfo.sbbSize;
479
126k
    std::size_t setCpSize = ( maxDist > sbbSize ? maxDist - sbbSize : 0 ) * sizeof( uint8_t );
480
126k
    if( prevId >= 0 )
481
104k
    {
482
104k
      ::memcpy( sbbFlags, m_prevSbbCtx[prevId].sbbFlags, scanInfo.numSbb * sizeof( uint8_t ) );
483
104k
      ::memcpy( levels + scanInfo.scanIdx + sbbSize, m_prevSbbCtx[prevId].levels + scanInfo.scanIdx + sbbSize, setCpSize );
484
104k
    }
485
21.5k
    else
486
21.5k
    {
487
21.5k
      ::memset( sbbFlags, 0, scanInfo.numSbb * sizeof( uint8_t ) );
488
21.5k
      ::memset( levels + scanInfo.scanIdx + sbbSize, 0, setCpSize );
489
21.5k
    }
490
126k
    sbbFlags[scanInfo.sbbPos] = !!curr.numSig[stateId];
491
492
126k
    const int       sigNSbb = ( ( scanInfo.nextSbbRight ? sbbFlags[scanInfo.nextSbbRight] : false ) || ( scanInfo.nextSbbBelow ? sbbFlags[scanInfo.nextSbbBelow] : false ) ? 1 : 0 );
493
126k
    curr.refSbbCtxId[stateId] = stateId;
494
126k
    const BinFracBits sbbBits = m_sbbFlagBits[sigNSbb];
495
496
126k
    curr.sbbBits0[stateId] = sbbBits.intBits[0];
497
126k
    curr.sbbBits1[stateId] = sbbBits.intBits[1];
498
499
126k
    if( sigNSbb || ( ( scanInfo.nextSbbRight && scanInfo.nextSbbBelow ) ? sbbFlags[scanInfo.nextSbbBelow + 1] : false ) )
500
76.7k
    {
501
76.7k
      const int         scanBeg = scanInfo.scanIdx - scanInfo.sbbSize;
502
76.7k
      const NbInfoOut* nbOut = m_nbInfo + scanBeg;
503
76.7k
      const uint8_t* absLevels = levels + scanBeg;
504
505
1.30M
      for( int id = 0; id < scanInfo.sbbSize; id++, nbOut++ )
506
1.22M
      {
507
1.22M
        if( nbOut->num )
508
831k
        {
509
831k
          TCoeff sumAbs = 0, sumAbs1 = 0, sumNum = 0;
510
2.02M
#define UPDATE(k) {TCoeff t=absLevels[nbOut->outPos[k]]; sumAbs+=t; sumAbs1+=std::min<TCoeff>(4+(t&1),t); sumNum+=!!t; }
511
831k
          switch( nbOut->num )
512
831k
          {
513
0
          default:
514
54.5k
          case 5:
515
54.5k
            UPDATE( 4 );
516
163k
          case 4:
517
163k
            UPDATE( 3 );
518
447k
          case 3:
519
447k
            UPDATE( 2 );
520
524k
          case 2:
521
524k
            UPDATE( 1 );
522
831k
          case 1:
523
831k
            UPDATE( 0 );
524
831k
          }
525
831k
#undef UPDATE
526
831k
          curr.tplAcc[id][stateId] = ( sumNum << 5 ) | sumAbs1;
527
831k
          curr.sum1st[id][stateId] = ( uint8_t ) std::min( 255, sumAbs );
528
831k
        }
529
1.22M
      }
530
76.7k
    }
531
126k
  }
532
533
  void Quantizer::initQuantBlock(const TransformUnit& tu, const ComponentID compID, const QpParam& cQP, const double lambda, int gValue)
534
2.01M
  {
535
2.01M
    CHECKD( lambda <= 0.0, "Lambda must be greater than 0" );
536
537
2.01M
    const int         qpDQ                  = cQP.Qp(tu.mtsIdx[compID]==MTS_SKIP) + 1;
538
2.01M
    const int         qpPer                 = qpDQ / 6;
539
2.01M
    const int         qpRem                 = qpDQ - 6 * qpPer;
540
2.01M
    const SPS&        sps                   = *tu.cs->sps;
541
2.01M
    const CompArea&   area                  = tu.blocks[ compID ];
542
2.01M
    const ChannelType chType                = toChannelType( compID );
543
2.01M
    const int         channelBitDepth       = sps.bitDepths[ chType ];
544
2.01M
    const int         maxLog2TrDynamicRange = sps.getMaxLog2TrDynamicRange();
545
2.01M
    const int         nomTransformShift     = getTransformShift( channelBitDepth, area.size(), maxLog2TrDynamicRange );
546
2.01M
    const bool    needsSqrt2ScaleAdjustment = TU::needsSqrt2Scale(tu, compID);
547
2.01M
    const int         transformShift        = nomTransformShift + (needsSqrt2ScaleAdjustment?-1:0);
548
    // quant parameters
549
2.01M
    m_QShift                    = QUANT_SHIFT  - 1 + qpPer + transformShift;
550
2.01M
    m_QAdd                      = -( ( 3 << m_QShift ) >> 1 );
551
2.01M
    Intermediate_Int  invShift  = IQUANT_SHIFT + 1 - qpPer - transformShift;
552
2.01M
    m_QScale                    = g_quantScales[needsSqrt2ScaleAdjustment?1:0][ qpRem ];
553
2.01M
    const unsigned    qIdxBD    = std::min<unsigned>( maxLog2TrDynamicRange + 1, 8*sizeof(Intermediate_Int) + invShift - IQUANT_SHIFT - 1 );
554
2.01M
    m_maxQIdx                   = ( 1 << (qIdxBD-1) ) - 4;
555
2.01M
    if( m_QShift )
556
2.01M
      m_thresLast               = TCoeff((int64_t(m_DqThrVal) << (m_QShift-1)));
557
0
    else
558
0
      m_thresLast               = TCoeff((int64_t(m_DqThrVal>>1) << m_QShift));
559
2.01M
    m_thresSSbb                 = TCoeff((int64_t(3) << m_QShift));
560
    // distortion calculation parameters
561
2.01M
    const int64_t qScale        = (gValue==-1) ? m_QScale : gValue;
562
2.01M
    const int nomDShift =
563
2.01M
      SCALE_BITS - 2 * (nomTransformShift + DISTORTION_PRECISION_ADJUSTMENT(channelBitDepth)) + m_QShift + (needsSqrt2ScaleAdjustment ? 1 : 0);
564
2.01M
    const double  qScale2       = double( qScale * qScale );
565
2.01M
    const double  nomDistFactor = ( nomDShift < 0 ? 1.0/(double(int64_t(1)<<(-nomDShift))*qScale2*lambda) : double(int64_t(1)<<nomDShift)/(qScale2*lambda) );
566
2.01M
    const uint32_t pow2dfShift   = (uint32_t)( nomDistFactor * qScale2 ) + 1;
567
2.01M
    const int     dfShift       = ceilLog2( pow2dfShift );
568
2.01M
    m_DistShift                 = 62 + m_QShift - 2*maxLog2TrDynamicRange - dfShift;
569
2.01M
    m_DistAdd                   = (int64_t(1) << m_DistShift) >> 1;
570
2.01M
    m_DistStepAdd               = ((m_DistShift+m_QShift)>=64 ? (int64_t)( nomDistFactor * pow(2,m_DistShift+m_QShift) + .5 ) : (int64_t)( nomDistFactor * double(int64_t(1)<<(m_DistShift+m_QShift)) + .5 ));
571
2.01M
    m_DistOrgFact               = (int64_t)( nomDistFactor * double(int64_t(1)<<(m_DistShift+1       )) + .5 );
572
2.01M
  }
573
574
  void Quantizer::dequantBlock( const TransformUnit& tu, const ComponentID compID, const QpParam& cQP, CoeffBuf& recCoeff, bool enableScalingLists, int* piDequantCoef) const
575
824k
  {
576
577
    //----- set basic parameters -----
578
824k
    const CompArea&     area      = tu.blocks[ compID ];
579
824k
    const int           numCoeff  = area.area();
580
824k
    const SizeType      hsId      = Log2( area.width );
581
824k
    const SizeType      vsId      = Log2( area.height );
582
824k
    const ScanElement  *scan      = getScanOrder( SCAN_GROUPED_4x4, hsId, vsId );
583
824k
    const TCoeffSig*    qCoeff    = tu.getCoeffs( compID ).buf;
584
824k
          TCoeff*       tCoeff    = recCoeff.buf;
585
586
    //----- reset coefficients and get last scan index -----
587
824k
    ::memset( tCoeff, 0, numCoeff * sizeof( TCoeff ) );
588
824k
    int lastScanIdx = tu.lastPos[compID];
589
824k
    if( lastScanIdx < 0 )
590
0
    {
591
0
      return;
592
0
    }
593
594
    //----- set dequant parameters -----
595
824k
    const int         qpDQ                  = cQP.Qp(tu.mtsIdx[compID]==MTS_SKIP) + 1;
596
824k
    const int         qpPer                 = qpDQ / 6;
597
824k
    const int         qpRem                 = qpDQ - 6 * qpPer;
598
824k
    const SPS&        sps                   = *tu.cs->sps;
599
824k
    const ChannelType chType                = toChannelType( compID );
600
824k
    const int         channelBitDepth       = sps.bitDepths[ chType ];
601
824k
    const int         maxLog2TrDynamicRange = sps.getMaxLog2TrDynamicRange();
602
824k
    const TCoeff      minTCoeff             = -( 1 << maxLog2TrDynamicRange );
603
824k
    const TCoeff      maxTCoeff             =  ( 1 << maxLog2TrDynamicRange ) - 1;
604
824k
    const int         nomTransformShift     = getTransformShift( channelBitDepth, area.size(), maxLog2TrDynamicRange );
605
824k
    const bool    needsSqrt2ScaleAdjustment = TU::needsSqrt2Scale(tu, compID);
606
824k
    const int         transformShift        = nomTransformShift + (needsSqrt2ScaleAdjustment?-1:0);
607
824k
    Intermediate_Int  shift                 = IQUANT_SHIFT + 1 - qpPer - transformShift + (enableScalingLists ? LOG2_SCALING_LIST_NEUTRAL_VALUE : 0);
608
824k
    Intermediate_Int  invQScale             = g_invQuantScales[needsSqrt2ScaleAdjustment?1:0][ qpRem ];
609
824k
    Intermediate_Int  add                   = (shift < 0) ? 0 : ((1 << shift) >> 1);
610
    //----- dequant coefficients -----
611
8.74M
    for( int state = 0, scanIdx = lastScanIdx; scanIdx >= 0; scanIdx-- )
612
7.92M
    {
613
7.92M
      const unsigned   rasterPos = scan[scanIdx].idx;
614
7.92M
      const TCoeffSig& level     = qCoeff[ rasterPos ];
615
7.92M
      if( level )
616
7.12M
      {
617
7.12M
        if (enableScalingLists)
618
0
          invQScale = piDequantCoef[rasterPos];//scalingfactor*levelScale
619
7.12M
        if (shift < 0 && (enableScalingLists || scanIdx == lastScanIdx))
620
449k
        {
621
449k
          invQScale <<= -shift;
622
449k
        }
623
7.12M
        Intermediate_Int qIdx = 2 * level + (level > 0 ? -(state>>1) : (state>>1));
624
7.12M
        int64_t  nomTCoeff          = ((int64_t)qIdx * (int64_t)invQScale + add) >> ((shift < 0) ? 0 : shift);
625
7.12M
        tCoeff[rasterPos]           = (TCoeff)Clip3<int64_t>(minTCoeff, maxTCoeff, nomTCoeff);
626
7.12M
      }
627
7.92M
      state = ( 32040 >> ((state<<2)+((level&1)<<1)) ) & 3;   // the 16-bit value "32040" represent the state transition table
628
7.92M
    }
629
824k
  }
630
631
  bool Quantizer::preQuantCoeff( const TCoeff absCoeff, PQData* pqData, int quanCoeff ) const
632
0
  {
633
0
    int64_t scaledOrg = int64_t( absCoeff ) * quanCoeff;
634
0
    TCoeff  qIdx      = TCoeff( ( scaledOrg + m_QAdd ) >> m_QShift );
635
636
0
    if( qIdx < 0 )
637
0
    {
638
0
      int64_t scaledAdd = m_DistStepAdd - scaledOrg * m_DistOrgFact;
639
0
      PQData& pq_a      = pqData[1];
640
0
      PQData& pq_b      = pqData[2];
641
642
0
      pq_a.deltaDist    = ( ( scaledAdd + 0 * m_DistStepAdd ) * 1 + m_DistAdd ) >> m_DistShift;
643
0
      pq_a.absLevel     = 1;
644
645
0
      pq_b.deltaDist    = ( ( scaledAdd + 1 * m_DistStepAdd ) * 2 + m_DistAdd ) >> m_DistShift;
646
0
      pq_b.absLevel     = 1;
647
      
648
0
      return true;
649
0
    }
650
     
651
0
    qIdx              = std::max<TCoeff>( 1, std::min<TCoeff>( m_maxQIdx, qIdx ) );
652
0
    int64_t scaledAdd = qIdx * m_DistStepAdd - scaledOrg * m_DistOrgFact;
653
654
0
    PQData& pq_a      = pqData[( qIdx + 0 ) & 3];
655
0
    PQData& pq_b      = pqData[( qIdx + 1 ) & 3];
656
0
    PQData& pq_c      = pqData[( qIdx + 2 ) & 3];
657
0
    PQData& pq_d      = pqData[( qIdx + 3 ) & 3];
658
659
0
    pq_a.deltaDist    = ( ( scaledAdd + 0 * m_DistStepAdd ) * ( qIdx + 0 ) + m_DistAdd ) >> m_DistShift;
660
0
    pq_a.absLevel     = ( qIdx + 1 ) >> 1;
661
662
0
    pq_b.deltaDist    = ( ( scaledAdd + 1 * m_DistStepAdd ) * ( qIdx + 1 ) + m_DistAdd ) >> m_DistShift;
663
0
    pq_b.absLevel     = ( qIdx + 2 ) >> 1;
664
665
0
    pq_c.deltaDist    = ( ( scaledAdd + 2 * m_DistStepAdd ) * ( qIdx + 2 ) + m_DistAdd ) >> m_DistShift;
666
0
    pq_c.absLevel     = ( qIdx + 3 ) >> 1;
667
668
0
    pq_d.deltaDist    = ( ( scaledAdd + 3 * m_DistStepAdd ) * ( qIdx + 3 ) + m_DistAdd ) >> m_DistShift;
669
0
    pq_d.absLevel     = ( qIdx + 4 ) >> 1;
670
671
0
    return false;
672
0
  }
673
674
  const int32_t g_goRiceBits[4][RICEMAX] =
675
  {
676
    { 32768,  65536,  98304, 131072, 163840, 196608, 262144, 262144, 327680, 327680, 327680, 327680, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752},
677
    { 65536,  65536,  98304,  98304, 131072, 131072, 163840, 163840, 196608, 196608, 229376, 229376, 294912, 294912, 294912, 294912, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 425984, 425984, 425984, 425984, 425984, 425984, 425984, 425984},
678
    { 98304,  98304,  98304,  98304, 131072, 131072, 131072, 131072, 163840, 163840, 163840, 163840, 196608, 196608, 196608, 196608, 229376, 229376, 229376, 229376, 262144, 262144, 262144, 262144, 327680, 327680, 327680, 327680, 327680, 327680, 327680, 327680},
679
    {131072, 131072, 131072, 131072, 131072, 131072, 131072, 131072, 163840, 163840, 163840, 163840, 163840, 163840, 163840, 163840, 196608, 196608, 196608, 196608, 196608, 196608, 196608, 196608, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376}
680
  };
681
682
  static inline void initStates( const int stateId, DQIntern::StateMem& state )
683
6.69M
  {
684
6.69M
    state.rdCost[stateId]         = DQIntern::rdCostInit;
685
6.69M
    state.ctx.cff[stateId]        =  0;
686
6.69M
    state.ctx.sig[stateId]        =  0;
687
6.69M
    state.numSig[stateId]         =  0;
688
6.69M
    state.refSbbCtxId[stateId]    = -1;
689
6.69M
    state.remRegBins[stateId]     =  4;
690
6.69M
    state.cffBitsCtxOffset        =  0;
691
6.69M
    state.m_goRicePar[stateId]    =  0;
692
6.69M
    state.m_goRiceZero[stateId]   =  0;
693
6.69M
    state.sbbBits0[stateId]       =  0;
694
6.69M
    state.sbbBits1[stateId]       =  0;
695
6.69M
  }
696
697
  template<bool rrgEnsured = false>
698
  static inline void checkRdCosts( const int stateId, const DQIntern::ScanPosType spt, const DQIntern::PQData& pqDataA, const DQIntern::PQData& pqDataB, DQIntern::Decisions& decisions, int idxAZ, int idxB, const DQIntern::StateMem& state )
699
26.7M
  {
700
26.7M
    const int32_t* goRiceTab = DQIntern::g_goRiceBits[state.m_goRicePar[stateId]];
701
26.7M
    int64_t         rdCostA = state.rdCost[stateId] + pqDataA.deltaDist;
702
26.7M
    int64_t         rdCostB = state.rdCost[stateId] + pqDataB.deltaDist;
703
26.7M
    int64_t         rdCostZ = state.rdCost[stateId];
704
705
26.7M
    if( rrgEnsured || state.remRegBins[stateId] >= 4 )
706
26.2M
    {
707
26.2M
      const CoeffFracBits& cffBits = state.m_gtxFracBitsArray[state.ctx.cff[stateId]];
708
26.2M
      const BinFracBits    sigBits = state.m_sigFracBitsArray[stateId][state.ctx.sig[stateId]];
709
710
26.2M
      if( pqDataA.absLevel < 4 )
711
6.42M
        rdCostA += cffBits.bits[pqDataA.absLevel];
712
19.8M
      else
713
19.8M
      {
714
19.8M
        const unsigned value = ( pqDataA.absLevel - 4 ) >> 1;
715
19.8M
        rdCostA += cffBits.bits[pqDataA.absLevel - ( value << 1 )] + goRiceTab[std::min<unsigned>( value, RICEMAX - 1 )];
716
19.8M
      }
717
718
26.2M
      if( pqDataB.absLevel < 4 )
719
8.01M
        rdCostB += cffBits.bits[pqDataB.absLevel];
720
18.2M
      else
721
18.2M
      {
722
18.2M
        const unsigned value = ( pqDataB.absLevel - 4 ) >> 1;
723
18.2M
        rdCostB += cffBits.bits[pqDataB.absLevel - ( value << 1 )] + goRiceTab[std::min<unsigned>( value, RICEMAX - 1 )];
724
18.2M
      }
725
726
26.2M
      if( spt == SCAN_ISCSBB )
727
26.2M
      {
728
26.2M
        rdCostA += sigBits.intBits[1];
729
26.2M
        rdCostB += sigBits.intBits[1];
730
26.2M
        rdCostZ += sigBits.intBits[0];
731
26.2M
      }
732
74.5k
      else if( spt == SCAN_SOCSBB )
733
12.1k
      {
734
12.1k
        rdCostA += state.sbbBits1[stateId] + sigBits.intBits[1];
735
12.1k
        rdCostB += state.sbbBits1[stateId] + sigBits.intBits[1];
736
12.1k
        rdCostZ += state.sbbBits1[stateId] + sigBits.intBits[0];
737
12.1k
      }
738
62.3k
      else if( state.numSig[stateId] )
739
61.0k
      {
740
61.0k
        rdCostA += sigBits.intBits[1];
741
61.0k
        rdCostB += sigBits.intBits[1];
742
61.0k
        rdCostZ += sigBits.intBits[0];
743
61.0k
      }
744
1.27k
      else
745
1.27k
      {
746
1.27k
        rdCostZ = rdCostInit;
747
1.27k
      }
748
26.2M
    }
749
427k
    else
750
427k
    {
751
427k
      rdCostA += ( 1 << SCALE_BITS ) + goRiceTab[pqDataA.absLevel <= state.m_goRiceZero[stateId] ? pqDataA.absLevel - 1 : std::min<int>( pqDataA.absLevel, RICEMAX - 1 )];
752
427k
      rdCostB += ( 1 << SCALE_BITS ) + goRiceTab[pqDataB.absLevel <= state.m_goRiceZero[stateId] ? pqDataB.absLevel - 1 : std::min<int>( pqDataB.absLevel, RICEMAX - 1 )];
753
427k
      rdCostZ += goRiceTab[state.m_goRiceZero[stateId]];
754
427k
    }
755
756
26.7M
    if( rdCostA < rdCostZ && rdCostA < decisions.rdCost[idxAZ] )
757
16.7M
    {
758
16.7M
      decisions.rdCost[idxAZ] = rdCostA;
759
16.7M
      decisions.absLevel[idxAZ] = pqDataA.absLevel;
760
16.7M
      decisions.prevId[idxAZ] = stateId;
761
16.7M
    }
762
10.0M
    else if( rdCostZ < decisions.rdCost[idxAZ] )
763
347k
    {
764
347k
      decisions.rdCost[idxAZ] = rdCostZ;
765
347k
      decisions.absLevel[idxAZ] = 0;
766
347k
      decisions.prevId[idxAZ] = stateId;
767
347k
    }
768
769
26.7M
    if( rdCostB < decisions.rdCost[idxB] )
770
17.0M
    {
771
17.0M
      decisions.rdCost[idxB] = rdCostB;
772
17.0M
      decisions.absLevel[idxB] = pqDataB.absLevel;
773
17.0M
      decisions.prevId[idxB] = stateId;
774
17.0M
    }
775
26.7M
  }
DepQuant.cpp:void vvenc::DQIntern::checkRdCosts<true>(int, vvenc::DQIntern::ScanPosType, vvenc::DQIntern::PQData const&, vvenc::DQIntern::PQData const&, vvenc::DQIntern::Decisions&, int, int, vvenc::DQIntern::StateMem const&)
Line
Count
Source
699
4.83M
  {
700
4.83M
    const int32_t* goRiceTab = DQIntern::g_goRiceBits[state.m_goRicePar[stateId]];
701
4.83M
    int64_t         rdCostA = state.rdCost[stateId] + pqDataA.deltaDist;
702
4.83M
    int64_t         rdCostB = state.rdCost[stateId] + pqDataB.deltaDist;
703
4.83M
    int64_t         rdCostZ = state.rdCost[stateId];
704
705
4.83M
    if( rrgEnsured || state.remRegBins[stateId] >= 4 )
706
4.83M
    {
707
4.83M
      const CoeffFracBits& cffBits = state.m_gtxFracBitsArray[state.ctx.cff[stateId]];
708
4.83M
      const BinFracBits    sigBits = state.m_sigFracBitsArray[stateId][state.ctx.sig[stateId]];
709
710
4.83M
      if( pqDataA.absLevel < 4 )
711
4.83M
        rdCostA += cffBits.bits[pqDataA.absLevel];
712
0
      else
713
0
      {
714
0
        const unsigned value = ( pqDataA.absLevel - 4 ) >> 1;
715
0
        rdCostA += cffBits.bits[pqDataA.absLevel - ( value << 1 )] + goRiceTab[std::min<unsigned>( value, RICEMAX - 1 )];
716
0
      }
717
718
4.83M
      if( pqDataB.absLevel < 4 )
719
4.83M
        rdCostB += cffBits.bits[pqDataB.absLevel];
720
0
      else
721
0
      {
722
0
        const unsigned value = ( pqDataB.absLevel - 4 ) >> 1;
723
0
        rdCostB += cffBits.bits[pqDataB.absLevel - ( value << 1 )] + goRiceTab[std::min<unsigned>( value, RICEMAX - 1 )];
724
0
      }
725
726
4.83M
      if( spt == SCAN_ISCSBB )
727
4.80M
      {
728
4.80M
        rdCostA += sigBits.intBits[1];
729
4.80M
        rdCostB += sigBits.intBits[1];
730
4.80M
        rdCostZ += sigBits.intBits[0];
731
4.80M
      }
732
28.8k
      else if( spt == SCAN_SOCSBB )
733
10.0k
      {
734
10.0k
        rdCostA += state.sbbBits1[stateId] + sigBits.intBits[1];
735
10.0k
        rdCostB += state.sbbBits1[stateId] + sigBits.intBits[1];
736
10.0k
        rdCostZ += state.sbbBits1[stateId] + sigBits.intBits[0];
737
10.0k
      }
738
18.8k
      else if( state.numSig[stateId] )
739
17.9k
      {
740
17.9k
        rdCostA += sigBits.intBits[1];
741
17.9k
        rdCostB += sigBits.intBits[1];
742
17.9k
        rdCostZ += sigBits.intBits[0];
743
17.9k
      }
744
902
      else
745
902
      {
746
902
        rdCostZ = rdCostInit;
747
902
      }
748
4.83M
    }
749
0
    else
750
0
    {
751
0
      rdCostA += ( 1 << SCALE_BITS ) + goRiceTab[pqDataA.absLevel <= state.m_goRiceZero[stateId] ? pqDataA.absLevel - 1 : std::min<int>( pqDataA.absLevel, RICEMAX - 1 )];
752
0
      rdCostB += ( 1 << SCALE_BITS ) + goRiceTab[pqDataB.absLevel <= state.m_goRiceZero[stateId] ? pqDataB.absLevel - 1 : std::min<int>( pqDataB.absLevel, RICEMAX - 1 )];
753
0
      rdCostZ += goRiceTab[state.m_goRiceZero[stateId]];
754
0
    }
755
756
4.83M
    if( rdCostA < rdCostZ && rdCostA < decisions.rdCost[idxAZ] )
757
3.17M
    {
758
3.17M
      decisions.rdCost[idxAZ] = rdCostA;
759
3.17M
      decisions.absLevel[idxAZ] = pqDataA.absLevel;
760
3.17M
      decisions.prevId[idxAZ] = stateId;
761
3.17M
    }
762
1.65M
    else if( rdCostZ < decisions.rdCost[idxAZ] )
763
347k
    {
764
347k
      decisions.rdCost[idxAZ] = rdCostZ;
765
347k
      decisions.absLevel[idxAZ] = 0;
766
347k
      decisions.prevId[idxAZ] = stateId;
767
347k
    }
768
769
4.83M
    if( rdCostB < decisions.rdCost[idxB] )
770
3.52M
    {
771
3.52M
      decisions.rdCost[idxB] = rdCostB;
772
3.52M
      decisions.absLevel[idxB] = pqDataB.absLevel;
773
3.52M
      decisions.prevId[idxB] = stateId;
774
3.52M
    }
775
4.83M
  }
DepQuant.cpp:void vvenc::DQIntern::checkRdCosts<false>(int, vvenc::DQIntern::ScanPosType, vvenc::DQIntern::PQData const&, vvenc::DQIntern::PQData const&, vvenc::DQIntern::Decisions&, int, int, vvenc::DQIntern::StateMem const&)
Line
Count
Source
699
21.8M
  {
700
21.8M
    const int32_t* goRiceTab = DQIntern::g_goRiceBits[state.m_goRicePar[stateId]];
701
21.8M
    int64_t         rdCostA = state.rdCost[stateId] + pqDataA.deltaDist;
702
21.8M
    int64_t         rdCostB = state.rdCost[stateId] + pqDataB.deltaDist;
703
21.8M
    int64_t         rdCostZ = state.rdCost[stateId];
704
705
21.8M
    if( rrgEnsured || state.remRegBins[stateId] >= 4 )
706
21.4M
    {
707
21.4M
      const CoeffFracBits& cffBits = state.m_gtxFracBitsArray[state.ctx.cff[stateId]];
708
21.4M
      const BinFracBits    sigBits = state.m_sigFracBitsArray[stateId][state.ctx.sig[stateId]];
709
710
21.4M
      if( pqDataA.absLevel < 4 )
711
1.59M
        rdCostA += cffBits.bits[pqDataA.absLevel];
712
19.8M
      else
713
19.8M
      {
714
19.8M
        const unsigned value = ( pqDataA.absLevel - 4 ) >> 1;
715
19.8M
        rdCostA += cffBits.bits[pqDataA.absLevel - ( value << 1 )] + goRiceTab[std::min<unsigned>( value, RICEMAX - 1 )];
716
19.8M
      }
717
718
21.4M
      if( pqDataB.absLevel < 4 )
719
3.18M
        rdCostB += cffBits.bits[pqDataB.absLevel];
720
18.2M
      else
721
18.2M
      {
722
18.2M
        const unsigned value = ( pqDataB.absLevel - 4 ) >> 1;
723
18.2M
        rdCostB += cffBits.bits[pqDataB.absLevel - ( value << 1 )] + goRiceTab[std::min<unsigned>( value, RICEMAX - 1 )];
724
18.2M
      }
725
726
21.4M
      if( spt == SCAN_ISCSBB )
727
21.4M
      {
728
21.4M
        rdCostA += sigBits.intBits[1];
729
21.4M
        rdCostB += sigBits.intBits[1];
730
21.4M
        rdCostZ += sigBits.intBits[0];
731
21.4M
      }
732
45.6k
      else if( spt == SCAN_SOCSBB )
733
2.08k
      {
734
2.08k
        rdCostA += state.sbbBits1[stateId] + sigBits.intBits[1];
735
2.08k
        rdCostB += state.sbbBits1[stateId] + sigBits.intBits[1];
736
2.08k
        rdCostZ += state.sbbBits1[stateId] + sigBits.intBits[0];
737
2.08k
      }
738
43.5k
      else if( state.numSig[stateId] )
739
43.1k
      {
740
43.1k
        rdCostA += sigBits.intBits[1];
741
43.1k
        rdCostB += sigBits.intBits[1];
742
43.1k
        rdCostZ += sigBits.intBits[0];
743
43.1k
      }
744
368
      else
745
368
      {
746
368
        rdCostZ = rdCostInit;
747
368
      }
748
21.4M
    }
749
427k
    else
750
427k
    {
751
427k
      rdCostA += ( 1 << SCALE_BITS ) + goRiceTab[pqDataA.absLevel <= state.m_goRiceZero[stateId] ? pqDataA.absLevel - 1 : std::min<int>( pqDataA.absLevel, RICEMAX - 1 )];
752
427k
      rdCostB += ( 1 << SCALE_BITS ) + goRiceTab[pqDataB.absLevel <= state.m_goRiceZero[stateId] ? pqDataB.absLevel - 1 : std::min<int>( pqDataB.absLevel, RICEMAX - 1 )];
753
427k
      rdCostZ += goRiceTab[state.m_goRiceZero[stateId]];
754
427k
    }
755
756
21.8M
    if( rdCostA < rdCostZ && rdCostA < decisions.rdCost[idxAZ] )
757
13.5M
    {
758
13.5M
      decisions.rdCost[idxAZ] = rdCostA;
759
13.5M
      decisions.absLevel[idxAZ] = pqDataA.absLevel;
760
13.5M
      decisions.prevId[idxAZ] = stateId;
761
13.5M
    }
762
8.35M
    else if( rdCostZ < decisions.rdCost[idxAZ] )
763
571
    {
764
571
      decisions.rdCost[idxAZ] = rdCostZ;
765
571
      decisions.absLevel[idxAZ] = 0;
766
571
      decisions.prevId[idxAZ] = stateId;
767
571
    }
768
769
21.8M
    if( rdCostB < decisions.rdCost[idxB] )
770
13.5M
    {
771
13.5M
      decisions.rdCost[idxB] = rdCostB;
772
13.5M
      decisions.absLevel[idxB] = pqDataB.absLevel;
773
13.5M
      decisions.prevId[idxB] = stateId;
774
13.5M
    }
775
21.8M
  }
776
777
  void checkAllRdCosts( const DQIntern::ScanPosType spt, const DQIntern::PQData* pqData, DQIntern::Decisions& decisions, const DQIntern::StateMem& state )
778
1.20M
  {
779
1.20M
    checkRdCosts<true>( 0, spt, pqData[0], pqData[2], decisions, 0, 2, state );
780
1.20M
    checkRdCosts<true>( 1, spt, pqData[0], pqData[2], decisions, 2, 0, state );
781
1.20M
    checkRdCosts<true>( 2, spt, pqData[3], pqData[1], decisions, 1, 3, state );
782
1.20M
    checkRdCosts<true>( 3, spt, pqData[3], pqData[1], decisions, 3, 1, state );
783
1.20M
  }
784
785
  template<bool rrgEnsured = false>
786
  static void checkRdCostsOdd1( const int stateId, const ScanPosType spt, const int64_t deltaDist, Decisions& decisions, int idxA, int idxZ, const StateMem& state )
787
5.37M
  {
788
5.37M
    int64_t         rdCostA = state.rdCost[stateId] + deltaDist;
789
5.37M
    int64_t         rdCostZ = state.rdCost[stateId];
790
791
5.37M
    if( rrgEnsured || state.remRegBins[stateId] >= 4 )
792
5.37M
    {
793
5.37M
      const BinFracBits sigBits = state.m_sigFracBitsArray[stateId][state.ctx.sig[stateId]];
794
795
5.37M
      rdCostA += state.cffBits1[state.ctx.cff[stateId]];
796
797
5.37M
      if( spt == SCAN_ISCSBB )
798
5.24M
      {
799
5.24M
        rdCostA += sigBits.intBits[1];
800
5.24M
        rdCostZ += sigBits.intBits[0];
801
5.24M
      }
802
132k
      else if( spt == SCAN_SOCSBB )
803
86.4k
      {
804
86.4k
        rdCostA += state.sbbBits1[stateId] + sigBits.intBits[1];
805
86.4k
        rdCostZ += state.sbbBits1[stateId] + sigBits.intBits[0];
806
86.4k
      }
807
46.5k
      else if( state.numSig[stateId] )
808
9.03k
      {
809
9.03k
        rdCostA += sigBits.intBits[1];
810
9.03k
        rdCostZ += sigBits.intBits[0];
811
9.03k
      }
812
37.4k
      else
813
37.4k
      {
814
37.4k
        rdCostZ = rdCostInit;
815
37.4k
      }
816
5.37M
    }
817
1.62k
    else
818
1.62k
    {
819
1.62k
      const int32_t* goRiceTab = g_goRiceBits[state.m_goRicePar[stateId]];
820
821
1.62k
      rdCostA += ( 1 << SCALE_BITS ) + goRiceTab[0];
822
1.62k
      rdCostZ += goRiceTab[state.m_goRiceZero[stateId]];
823
1.62k
    }
824
825
5.37M
    if( rdCostA < decisions.rdCost[idxA] )
826
3.20M
    {
827
3.20M
      decisions.rdCost[idxA] = rdCostA;
828
3.20M
      decisions.absLevel[idxA] = 1;
829
3.20M
      decisions.prevId[idxA] = stateId;
830
3.20M
    }
831
832
5.37M
    if( rdCostZ < decisions.rdCost[idxZ] )
833
3.79M
    {
834
3.79M
      decisions.rdCost[idxZ] = rdCostZ;
835
3.79M
      decisions.absLevel[idxZ] = 0;
836
3.79M
      decisions.prevId[idxZ] = stateId;
837
3.79M
    }
838
5.37M
  }
DepQuant.cpp:void vvenc::DQIntern::checkRdCostsOdd1<true>(int, vvenc::DQIntern::ScanPosType, long, vvenc::DQIntern::Decisions&, int, int, vvenc::DQIntern::StateMem const&)
Line
Count
Source
787
5.00M
  {
788
5.00M
    int64_t         rdCostA = state.rdCost[stateId] + deltaDist;
789
5.00M
    int64_t         rdCostZ = state.rdCost[stateId];
790
791
5.00M
    if( rrgEnsured || state.remRegBins[stateId] >= 4 )
792
5.00M
    {
793
5.00M
      const BinFracBits sigBits = state.m_sigFracBitsArray[stateId][state.ctx.sig[stateId]];
794
795
5.00M
      rdCostA += state.cffBits1[state.ctx.cff[stateId]];
796
797
5.00M
      if( spt == SCAN_ISCSBB )
798
4.87M
      {
799
4.87M
        rdCostA += sigBits.intBits[1];
800
4.87M
        rdCostZ += sigBits.intBits[0];
801
4.87M
      }
802
132k
      else if( spt == SCAN_SOCSBB )
803
86.2k
      {
804
86.2k
        rdCostA += state.sbbBits1[stateId] + sigBits.intBits[1];
805
86.2k
        rdCostZ += state.sbbBits1[stateId] + sigBits.intBits[0];
806
86.2k
      }
807
46.0k
      else if( state.numSig[stateId] )
808
9.03k
      {
809
9.03k
        rdCostA += sigBits.intBits[1];
810
9.03k
        rdCostZ += sigBits.intBits[0];
811
9.03k
      }
812
37.0k
      else
813
37.0k
      {
814
37.0k
        rdCostZ = rdCostInit;
815
37.0k
      }
816
5.00M
    }
817
0
    else
818
0
    {
819
0
      const int32_t* goRiceTab = g_goRiceBits[state.m_goRicePar[stateId]];
820
821
0
      rdCostA += ( 1 << SCALE_BITS ) + goRiceTab[0];
822
0
      rdCostZ += goRiceTab[state.m_goRiceZero[stateId]];
823
0
    }
824
825
5.00M
    if( rdCostA < decisions.rdCost[idxA] )
826
3.20M
    {
827
3.20M
      decisions.rdCost[idxA] = rdCostA;
828
3.20M
      decisions.absLevel[idxA] = 1;
829
3.20M
      decisions.prevId[idxA] = stateId;
830
3.20M
    }
831
832
5.00M
    if( rdCostZ < decisions.rdCost[idxZ] )
833
3.79M
    {
834
3.79M
      decisions.rdCost[idxZ] = rdCostZ;
835
3.79M
      decisions.absLevel[idxZ] = 0;
836
3.79M
      decisions.prevId[idxZ] = stateId;
837
3.79M
    }
838
5.00M
  }
DepQuant.cpp:void vvenc::DQIntern::checkRdCostsOdd1<false>(int, vvenc::DQIntern::ScanPosType, long, vvenc::DQIntern::Decisions&, int, int, vvenc::DQIntern::StateMem const&)
Line
Count
Source
787
367k
  {
788
367k
    int64_t         rdCostA = state.rdCost[stateId] + deltaDist;
789
367k
    int64_t         rdCostZ = state.rdCost[stateId];
790
791
367k
    if( rrgEnsured || state.remRegBins[stateId] >= 4 )
792
365k
    {
793
365k
      const BinFracBits sigBits = state.m_sigFracBitsArray[stateId][state.ctx.sig[stateId]];
794
795
365k
      rdCostA += state.cffBits1[state.ctx.cff[stateId]];
796
797
365k
      if( spt == SCAN_ISCSBB )
798
364k
      {
799
364k
        rdCostA += sigBits.intBits[1];
800
364k
        rdCostZ += sigBits.intBits[0];
801
364k
      }
802
632
      else if( spt == SCAN_SOCSBB )
803
180
      {
804
180
        rdCostA += state.sbbBits1[stateId] + sigBits.intBits[1];
805
180
        rdCostZ += state.sbbBits1[stateId] + sigBits.intBits[0];
806
180
      }
807
452
      else if( state.numSig[stateId] )
808
0
      {
809
0
        rdCostA += sigBits.intBits[1];
810
0
        rdCostZ += sigBits.intBits[0];
811
0
      }
812
452
      else
813
452
      {
814
452
        rdCostZ = rdCostInit;
815
452
      }
816
365k
    }
817
1.62k
    else
818
1.62k
    {
819
1.62k
      const int32_t* goRiceTab = g_goRiceBits[state.m_goRicePar[stateId]];
820
821
1.62k
      rdCostA += ( 1 << SCALE_BITS ) + goRiceTab[0];
822
1.62k
      rdCostZ += goRiceTab[state.m_goRiceZero[stateId]];
823
1.62k
    }
824
825
367k
    if( rdCostA < decisions.rdCost[idxA] )
826
1.21k
    {
827
1.21k
      decisions.rdCost[idxA] = rdCostA;
828
1.21k
      decisions.absLevel[idxA] = 1;
829
1.21k
      decisions.prevId[idxA] = stateId;
830
1.21k
    }
831
832
367k
    if( rdCostZ < decisions.rdCost[idxZ] )
833
1.86k
    {
834
1.86k
      decisions.rdCost[idxZ] = rdCostZ;
835
1.86k
      decisions.absLevel[idxZ] = 0;
836
1.86k
      decisions.prevId[idxZ] = stateId;
837
1.86k
    }
838
367k
  }
839
840
  static void checkAllRdCostsOdd1( const DQIntern::ScanPosType spt, const int64_t pq_a_dist, const int64_t pq_b_dist, DQIntern::Decisions& decisions, const DQIntern::StateMem& state )
841
1.25M
  {
842
1.25M
    checkRdCostsOdd1<true>( 0, spt, pq_b_dist, decisions, 2, 0, state );
843
1.25M
    checkRdCostsOdd1<true>( 1, spt, pq_b_dist, decisions, 0, 2, state );
844
1.25M
    checkRdCostsOdd1<true>( 2, spt, pq_a_dist, decisions, 3, 1, state );
845
1.25M
    checkRdCostsOdd1<true>( 3, spt, pq_a_dist, decisions, 1, 3, state );
846
1.25M
  }
847
848
  static inline void checkRdCostStart( int32_t lastOffset, const PQData& pqData, Decisions& decisions, int idx, const StateMem& state )
849
14.7M
  {
850
14.7M
    const CoeffFracBits& cffBits = state.m_gtxFracBitsArray[0];
851
852
14.7M
    int64_t rdCost = pqData.deltaDist + lastOffset;
853
14.7M
    if( pqData.absLevel < 4 )
854
5.19M
    {
855
5.19M
      rdCost += cffBits.bits[pqData.absLevel];
856
5.19M
    }
857
9.51M
    else
858
9.51M
    {
859
9.51M
      const unsigned value = ( pqData.absLevel - 4 ) >> 1;
860
9.51M
      rdCost += cffBits.bits[pqData.absLevel - ( value << 1 )] + g_goRiceBits[0][value < RICEMAX ? value : RICEMAX - 1];
861
9.51M
    }
862
863
14.7M
    if( rdCost < decisions.rdCost[idx] )
864
1.93M
    {
865
1.93M
      decisions.rdCost[idx]   = rdCost;
866
1.93M
      decisions.absLevel[idx] = pqData.absLevel;
867
1.93M
      decisions.prevId[idx]   = -1;
868
1.93M
    }
869
14.7M
  }
870
871
  static inline void checkRdCostSkipSbb( const int stateId, Decisions& decisions, int idx, const StateMem& state )
872
121k
  {
873
121k
    int64_t rdCost = state.rdCost[stateId] + state.sbbBits0[stateId];
874
121k
    if( rdCost < decisions.rdCost[idx] )
875
43.4k
    {
876
43.4k
      decisions.rdCost[idx]   = rdCost;
877
43.4k
      decisions.absLevel[idx] = 0;
878
43.4k
      decisions.prevId[idx]   = 4 | stateId;
879
43.4k
    }
880
121k
  }
881
882
  static inline void checkRdCostSkipSbbZeroOut( const int stateId, Decisions& decisions, int idx, const StateMem& state )
883
0
  {
884
0
    int64_t rdCost          = state.rdCost[stateId] + state.sbbBits0[stateId];
885
0
    decisions.rdCost[idx]   = rdCost;
886
0
    decisions.absLevel[idx] = 0;
887
0
    decisions.prevId[idx]   = 4 | stateId;
888
0
  }
889
890
  static inline void setRiceParam( const int stateId, const ScanInfo& scanInfo, StateMem& state, bool ge4 )
891
21.7M
  {
892
21.7M
    if( state.remRegBins[stateId] < 4 || ge4 )
893
20.2M
    {
894
20.2M
      TCoeff  sumAbs = state.sum1st[scanInfo.insidePos][stateId];
895
20.2M
      int sumSub     = state.remRegBins[stateId] < 4 ? 0 : 4 * 5;
896
20.2M
      int sumAll     = std::max( std::min( 31, ( int ) sumAbs - sumSub ), 0 );
897
20.2M
      state.m_goRicePar[stateId]
898
20.2M
                     = g_auiGoRiceParsCoeff[sumAll];
899
900
20.2M
      if( state.remRegBins[stateId] < 4 )
901
428k
      {
902
428k
        state.m_goRiceZero[stateId] = g_auiGoRicePosCoeff0( stateId, state.m_goRicePar[stateId] );
903
428k
      }
904
20.2M
    }
905
21.7M
  }
906
907
  static void update1State( int stateId, const DQIntern::ScanInfo& scanInfo, const DQIntern::Decisions& decisions, DQIntern::StateMem& curr, DQIntern::StateMem& prev )
908
28.6M
  {
909
28.6M
    curr.rdCost[stateId] = decisions.rdCost[stateId];
910
28.6M
    if( decisions.prevId[stateId] > -2 )
911
27.4M
    {
912
27.4M
      if( decisions.prevId[stateId] >= 0 )
913
26.0M
      {
914
26.0M
        const int prevId          = decisions.prevId[stateId];
915
26.0M
        curr.numSig[stateId]      = prev.numSig[prevId] + !!decisions.absLevel[stateId];
916
26.0M
        curr.refSbbCtxId[stateId] = prev.refSbbCtxId[prevId];
917
26.0M
        curr.sbbBits0[stateId]    = prev.sbbBits0[prevId];
918
26.0M
        curr.sbbBits1[stateId]    = prev.sbbBits1[prevId];
919
26.0M
        curr.remRegBins[stateId]  = prev.remRegBins[prevId] - 1;
920
921
26.0M
        if( curr.remRegBins[stateId] >= 4 )
922
25.8M
        {
923
25.8M
          curr.remRegBins[stateId] -= ( decisions.absLevel[stateId] < 2 ? decisions.absLevel[stateId] : 3 );
924
25.8M
        }
925
926
443M
        for( int i = 0; i < 16; i++ )
927
417M
        {
928
417M
          curr.tplAcc[i][stateId] = prev.tplAcc[i][prevId];
929
417M
          curr.sum1st[i][stateId] = prev.sum1st[i][prevId];
930
417M
          curr.absVal[i][stateId] = prev.absVal[i][prevId];
931
417M
        }
932
26.0M
      }
933
1.35M
      else
934
1.35M
      {
935
1.35M
        curr.numSig[stateId]      =  1;
936
1.35M
        curr.refSbbCtxId[stateId] = -1;
937
1.35M
        curr.remRegBins[stateId]  = prev.initRemRegBins;
938
1.35M
        curr.remRegBins[stateId] -= ( decisions.absLevel[stateId] < 2 ? decisions.absLevel[stateId] : 3 );
939
940
23.0M
        for( int i = 0; i < 16; i++ )
941
21.7M
        {
942
21.7M
          curr.tplAcc[i][stateId] = 0;
943
21.7M
          curr.sum1st[i][stateId] = 0;
944
21.7M
          curr.absVal[i][stateId] = 0;
945
21.7M
        }
946
1.35M
      }
947
948
27.4M
      if( decisions.absLevel[stateId] )
949
24.2M
      {
950
24.2M
        curr.absVal[scanInfo.insidePos][stateId] = ( uint8_t ) std::min<TCoeff>( 126 + ( decisions.absLevel[stateId] & 1 ), decisions.absLevel[stateId] );
951
952
24.2M
        if( scanInfo.currNbInfoSbb.numInv )
953
24.2M
        {
954
24.2M
          int min4_or_5 = std::min<TCoeff>( 4 + ( decisions.absLevel[stateId] & 1 ), decisions.absLevel[stateId] );
955
956
24.2M
          auto adds8 = []( uint8_t a, uint8_t b )
957
70.9M
          {
958
70.9M
            uint8_t c = a + b;
959
70.9M
            if( c < a ) c = -1;
960
70.9M
            return c;
961
70.9M
          };
962
963
24.2M
          auto update_deps = [&]( int k )
964
70.9M
          {
965
70.9M
            curr.tplAcc[scanInfo.currNbInfoSbb.invInPos[k]][stateId] += 32 + min4_or_5;
966
70.9M
            curr.sum1st[scanInfo.currNbInfoSbb.invInPos[k]][stateId] = adds8( curr.sum1st[scanInfo.currNbInfoSbb.invInPos[k]][stateId], decisions.absLevel[stateId] );
967
70.9M
          };
968
969
24.2M
          switch( scanInfo.currNbInfoSbb.numInv )
970
24.2M
          {
971
0
          default:
972
4.01M
          case 5:
973
4.01M
            update_deps( 4 );
974
10.1M
          case 4:
975
10.1M
            update_deps( 3 );
976
12.2M
          case 3:
977
12.2M
            update_deps( 2 );
978
20.2M
          case 2:
979
20.2M
            update_deps( 1 );
980
24.2M
          case 1:
981
24.2M
            update_deps( 0 );
982
24.2M
          }
983
24.2M
        }
984
24.2M
      }
985
986
27.4M
      if( curr.remRegBins[stateId] >= 4 )
987
27.0M
      {
988
27.0M
        TCoeff  sumAbs1 = curr.tplAcc[scanInfo.nextInsidePos][stateId] & 31;
989
27.0M
        TCoeff  sumNum  = curr.tplAcc[scanInfo.nextInsidePos][stateId] >> 5u;
990
27.0M
        int sumGt1 = sumAbs1 - sumNum;
991
992
27.0M
        curr.ctx.sig[stateId] = scanInfo.sigCtxOffsetNext + std::min( ( sumAbs1 + 1 ) >> 1, 3 );
993
27.0M
        curr.ctx.cff[stateId] = scanInfo.gtxCtxOffsetNext + std::min( sumGt1, 4 );
994
27.0M
      }
995
415k
      else
996
415k
      {
997
415k
        curr.anyRemRegBinsLt4 = true;
998
415k
      }
999
27.4M
    }
1000
28.6M
  }
1001
1002
  static void update1StateEOS( const int stateId, const DQIntern::ScanInfo& scanInfo, const DQIntern::Decisions& decisions, const DQIntern::StateMem& skip, DQIntern::StateMem& curr, DQIntern::StateMem& prev, DQIntern::CommonCtx& commonCtx )
1003
127k
  {
1004
127k
    curr.rdCost[stateId] = decisions.rdCost[stateId];
1005
1006
127k
    if( decisions.prevId[stateId] > -2 )
1007
126k
    {
1008
126k
      if( decisions.prevId[stateId] >= 4 )
1009
43.4k
      {
1010
43.4k
        CHECK( decisions.absLevel[stateId] != 0, "cannot happen" );
1011
1012
43.4k
        const int prevId          = decisions.prevId[stateId] - 4;
1013
43.4k
        curr.numSig    [stateId]  = 0;
1014
43.4k
        curr.remRegBins[stateId]  = skip.remRegBins[prevId];
1015
43.4k
        curr.refSbbCtxId[stateId] = prevId;
1016
1017
739k
        for( int i = 0; i < 16; i++ )
1018
695k
        {
1019
695k
          curr.absVal[i][stateId] = 0;
1020
695k
        }
1021
43.4k
      }
1022
82.7k
      else if( decisions.prevId[stateId] >= 0 )
1023
80.8k
      {
1024
80.8k
        const int prevId          = decisions.prevId[stateId];
1025
80.8k
        curr.numSig[stateId]      = prev.numSig[prevId] + !!decisions.absLevel[stateId];
1026
80.8k
        curr.refSbbCtxId[stateId] = prev.refSbbCtxId[prevId];
1027
80.8k
        curr.remRegBins[stateId]  = prev.remRegBins[prevId] - 1;
1028
1029
80.8k
        if( curr.remRegBins[stateId] >= 4 )
1030
67.9k
        {
1031
67.9k
          curr.remRegBins[stateId] -= ( decisions.absLevel[stateId] < 2 ? decisions.absLevel[stateId] : 3 );
1032
67.9k
        }
1033
1034
1.37M
        for( int i = 0; i < 16; i++ )
1035
1.29M
        {
1036
1.29M
          curr.absVal[i][stateId] = prev.absVal[i][prevId];
1037
1.29M
        }
1038
80.8k
      }
1039
1.90k
      else
1040
1.90k
      {
1041
1.90k
        curr.numSig[stateId]      =  1;
1042
1.90k
        curr.refSbbCtxId[stateId] = -1;
1043
1.90k
        curr.remRegBins[stateId]  = prev.initRemRegBins;
1044
1.90k
        curr.remRegBins[stateId] -= ( decisions.absLevel[stateId] < 2 ? decisions.absLevel[stateId] : 3 );
1045
1046
32.3k
        for( int i = 0; i < 16; i++ )
1047
30.4k
        {
1048
30.4k
          curr.absVal[i][stateId] = 0;
1049
30.4k
        }
1050
1.90k
      }
1051
1052
126k
      curr.absVal[scanInfo.insidePos][stateId] = ( uint8_t ) std::min<TCoeff>( 126 + ( decisions.absLevel[stateId] & 1 ), decisions.absLevel[stateId] );
1053
1054
126k
      uint8_t* levels[4];
1055
126k
      commonCtx.getLevelPtrs( scanInfo, levels[0], levels[1], levels[2], levels[3] );
1056
2.14M
      for( int i = 0; i < 16; i++ )
1057
2.02M
      {
1058
        // save abs levels to commonCtx
1059
2.02M
        levels[stateId][i] = curr.absVal[i][stateId];
1060
        // clean the SBB ctx
1061
2.02M
        curr.tplAcc[i][stateId] = 0;
1062
2.02M
        curr.sum1st[i][stateId] = 0;
1063
2.02M
        curr.absVal[i][stateId] = 0;
1064
2.02M
      }
1065
1066
126k
      commonCtx.update( scanInfo, curr.refSbbCtxId[stateId], stateId, curr );
1067
1068
126k
      curr.numSig[stateId] = 0;
1069
1070
126k
      if( curr.remRegBins[stateId] >= 4 )
1071
113k
      {
1072
113k
        TCoeff  sumAbs1 = curr.tplAcc[scanInfo.nextInsidePos][stateId] & 31;
1073
113k
        TCoeff  sumNum  = curr.tplAcc[scanInfo.nextInsidePos][stateId] >> 5u;
1074
113k
        int sumGt1 = sumAbs1 - sumNum;
1075
1076
113k
        curr.ctx.sig[stateId] = scanInfo.sigCtxOffsetNext + std::min( ( sumAbs1 + 1 ) >> 1, 3 );
1077
113k
        curr.ctx.cff[stateId] = scanInfo.gtxCtxOffsetNext + std::min( sumGt1, 4 );
1078
113k
      }
1079
13.1k
      else
1080
13.1k
      {
1081
13.1k
        curr.anyRemRegBinsLt4 = true;
1082
13.1k
      }
1083
126k
    }
1084
127k
  }
1085
1086
  static void updateStates( const DQIntern::ScanInfo& scanInfo, const DQIntern::Decisions& decisions, DQIntern::StateMem& curr )
1087
7.15M
  {
1088
7.15M
    DQIntern::StateMem prev = curr;
1089
7.15M
    curr.anyRemRegBinsLt4   = false;
1090
1091
7.15M
    update1State( 0, scanInfo, decisions, curr, prev );
1092
7.15M
    update1State( 1, scanInfo, decisions, curr, prev );
1093
7.15M
    update1State( 2, scanInfo, decisions, curr, prev );
1094
7.15M
    update1State( 3, scanInfo, decisions, curr, prev );
1095
1096
7.15M
    curr.cffBitsCtxOffset = scanInfo.gtxCtxOffsetNext;
1097
7.15M
  }
1098
1099
  static void updateStatesEOS( const DQIntern::ScanInfo& scanInfo, const DQIntern::Decisions& decisions, const DQIntern::StateMem& skip, DQIntern::StateMem& curr, DQIntern::CommonCtx& commonCtx )
1100
31.9k
  {
1101
31.9k
    DQIntern::StateMem prev = curr;
1102
31.9k
    curr.anyRemRegBinsLt4   = false;
1103
1104
31.9k
    update1StateEOS( 0, scanInfo, decisions, skip, curr, prev, commonCtx );
1105
31.9k
    update1StateEOS( 1, scanInfo, decisions, skip, curr, prev, commonCtx );
1106
31.9k
    update1StateEOS( 2, scanInfo, decisions, skip, curr, prev, commonCtx );
1107
31.9k
    update1StateEOS( 3, scanInfo, decisions, skip, curr, prev, commonCtx );
1108
1109
31.9k
    curr.cffBitsCtxOffset = scanInfo.gtxCtxOffsetNext;
1110
31.9k
  }
1111
}; // namespace DQIntern
1112
1113
static const DQIntern::Decisions startDec[2] =
1114
{
1115
  DQIntern::Decisions
1116
  {
1117
    { DQIntern::rdCostInit >> 2, DQIntern::rdCostInit >> 2, DQIntern::rdCostInit >> 2, DQIntern::rdCostInit >> 2 },
1118
    { -1, -1, -1, -1 },
1119
    { -2, -2, -2, -2 },
1120
  },
1121
  DQIntern::Decisions
1122
  {
1123
    { DQIntern::rdCostInit >> 2, DQIntern::rdCostInit >> 2, DQIntern::rdCostInit >> 2, DQIntern::rdCostInit >> 2 },
1124
    { 0, 0, 0, 0 },
1125
    { 4, 5, 6, 7 },
1126
  }
1127
};
1128
1129
void DepQuant::xQuantDQ( TransformUnit& tu, const CCoeffBuf& srcCoeff, const ComponentID compID, const QpParam& cQP, const double lambda, const Ctx& ctx, TCoeff& absSum, bool enableScalingLists, int* quantCoeff )
1130
2.01M
{
1131
2.01M
  using namespace DQIntern;
1132
  
1133
  //===== reset / pre-init =====
1134
2.01M
  const TUParameters& tuPars  = *m_scansRom->getTUPars( tu.blocks[compID], compID );
1135
2.01M
  m_quant.initQuantBlock    ( tu, compID, cQP, lambda );
1136
2.01M
  TCoeffSig*    qCoeff      = tu.getCoeffs( compID ).buf;
1137
2.01M
  const TCoeff* tCoeff      = srcCoeff.buf;
1138
2.01M
  const int     numCoeff    = tu.blocks[compID].area();
1139
2.01M
  ::memset( qCoeff, 0x00, numCoeff * sizeof( TCoeffSig ) );
1140
2.01M
  absSum                    = 0;
1141
1142
2.01M
  const CompArea& area      = tu.blocks[ compID ];
1143
2.01M
  const uint32_t  width     = area.width;
1144
2.01M
  const uint32_t  height    = area.height;
1145
2.01M
  const uint32_t  lfnstIdx  = tu.cu->lfnstIdx;
1146
  //===== scaling matrix ====
1147
  //const int         qpDQ = cQP.Qp + 1;
1148
  //const int         qpPer = qpDQ / 6;
1149
  //const int         qpRem = qpDQ - 6 * qpPer;
1150
1151
  //TCoeff thresTmp = thres;
1152
2.01M
  bool zeroOut = false;
1153
2.01M
  bool zeroOutforThres = false;
1154
2.01M
  int effWidth = tuPars.m_width, effHeight = tuPars.m_height;
1155
2.01M
  if( ( tu.mtsIdx[compID] > MTS_SKIP || ( tu.cs->sps->MTS && tu.cu->sbtInfo != 0 && tuPars.m_height <= 32 && tuPars.m_width <= 32 ) ) && compID == COMP_Y )
1156
0
  {
1157
0
    effHeight = ( tuPars.m_height == 32 ) ? 16 : tuPars.m_height;
1158
0
    effWidth  = ( tuPars.m_width  == 32 ) ? 16 : tuPars.m_width;
1159
0
    zeroOut   = ( effHeight < tuPars.m_height || effWidth < tuPars.m_width );
1160
0
  }
1161
2.01M
  zeroOutforThres = zeroOut || ( 32 < tuPars.m_height || 32 < tuPars.m_width );
1162
  //===== find first test position =====
1163
2.01M
  int firstTestPos = std::min<int>( tuPars.m_width, JVET_C0024_ZERO_OUT_TH ) * std::min<int>( tuPars.m_height, JVET_C0024_ZERO_OUT_TH ) - 1;
1164
2.01M
  if( lfnstIdx > 0 && tu.mtsIdx[compID] != MTS_SKIP && width >= 4 && height >= 4 )
1165
1.25M
  {
1166
1.25M
    firstTestPos = ( ( width == 4 && height == 4 ) || ( width == 8 && height == 8 ) )  ? 7 : 15 ;
1167
1.25M
  }
1168
1169
2.01M
  const TCoeff defaultQuantisationCoefficient = (TCoeff)m_quant.getQScale();
1170
2.01M
  const TCoeff thres = m_quant.getLastThreshold();
1171
2.01M
  const int zeroOutWidth  = ( tuPars.m_width  == 32 && zeroOut ) ? 16 : 32;
1172
2.01M
  const int zeroOutHeight = ( tuPars.m_height == 32 && zeroOut ) ? 16 : 32;
1173
1174
2.01M
  if( enableScalingLists )
1175
0
  {
1176
0
    for( ; firstTestPos >= 0; firstTestPos-- )
1177
0
    {
1178
0
      if( zeroOutforThres && ( tuPars.m_scanId2BlkPos[firstTestPos].x >= zeroOutWidth || tuPars.m_scanId2BlkPos[firstTestPos].y >= zeroOutHeight ) ) continue;
1179
1180
0
      const TCoeff thresTmp = TCoeff( thres / ( 4 * quantCoeff[tuPars.m_scanId2BlkPos[firstTestPos].idx] ) );
1181
1182
0
      if( abs( tCoeff[tuPars.m_scanId2BlkPos[firstTestPos].idx] ) > thresTmp ) break;
1183
0
    }
1184
0
  }
1185
2.01M
  else
1186
2.01M
  {
1187
2.01M
    const TCoeff defaultTh = TCoeff( thres / ( defaultQuantisationCoefficient << 2 ) );
1188
1189
2.01M
    m_findFirstPos( firstTestPos, tCoeff, tuPars, defaultTh, zeroOutforThres, zeroOutWidth, zeroOutHeight );
1190
2.01M
  }
1191
1192
2.01M
  if( firstTestPos < 0 )
1193
1.17M
  {
1194
1.17M
    tu.lastPos[compID] = -1;
1195
1.17M
    return;
1196
1.17M
  }
1197
1198
  //===== real init =====
1199
836k
  RateEstimator::initCtx( tuPars, tu, compID, ctx.getFracBitsAcess() );
1200
836k
  m_commonCtx.reset( tuPars, *this );
1201
4.18M
  for( int k = 0; k < 4; k++ )
1202
3.34M
  {
1203
3.34M
    DQIntern::initStates( k, m_state_curr );
1204
3.34M
    DQIntern::initStates( k, m_state_skip );
1205
3.34M
    m_state_curr.m_sigFracBitsArray[k] = RateEstimator::sigFlagBits(k);
1206
3.34M
  }
1207
1208
836k
  m_state_curr.m_gtxFracBitsArray = RateEstimator::gtxFracBits();
1209
  //memset( m_state_curr.tplAcc, 0, sizeof( m_state_curr.tplAcc ) ); // will be set in updateStates{,EOS} before first access
1210
836k
  memset( m_state_curr.sum1st, 0, sizeof( m_state_curr.sum1st ) );   // will be accessed in setRiceParam before updateState{,EOS}
1211
  //memset( m_state_curr.absVal, 0, sizeof( m_state_curr.absVal ) ); // will be set in updateStates{,EOS} before first access
1212
1213
836k
  const int numCtx = isLuma( compID ) ? 21 : 11;
1214
836k
  const CoeffFracBits* const cffBits = gtxFracBits();
1215
10.3M
  for( int i = 0; i < numCtx; i++ )
1216
9.52M
  {
1217
9.52M
    m_state_curr.cffBits1[i] = cffBits[i].bits[1];
1218
9.52M
  }
1219
1220
836k
  int effectWidth  = std::min( 32, effWidth );
1221
836k
  int effectHeight = std::min( 32, effHeight );
1222
836k
  m_state_curr.initRemRegBins   = ( effectWidth * effectHeight * MAX_TU_LEVEL_CTX_CODED_BIN_CONSTRAINT ) / 16;
1223
836k
  m_state_curr.anyRemRegBinsLt4 = true; // for the first coeff use scalar impl., because it check against the init state, which
1224
                                        // prohibits some paths
1225
1226
  //===== populate trellis =====
1227
8.86M
  for( int scanIdx = firstTestPos; scanIdx >= 0; scanIdx-- )
1228
8.02M
  {
1229
8.02M
    const ScanInfo& scanInfo = tuPars.m_scanInfo[ scanIdx ];
1230
8.02M
    if( enableScalingLists )
1231
0
    {
1232
0
      m_quant.initQuantBlock( tu, compID, cQP, lambda, quantCoeff[scanInfo.rasterPos] );
1233
0
      xDecideAndUpdate( abs( tCoeff[scanInfo.rasterPos] ), scanInfo, zeroOut && ( scanInfo.posX >= effWidth || scanInfo.posY >= effHeight ), quantCoeff[scanInfo.rasterPos] );
1234
0
    }
1235
8.02M
    else
1236
8.02M
      xDecideAndUpdate( abs( tCoeff[scanInfo.rasterPos] ), scanInfo, zeroOut && ( scanInfo.posX >= effWidth || scanInfo.posY >= effHeight ), defaultQuantisationCoefficient );
1237
8.02M
  }
1238
1239
  //===== find best path =====
1240
836k
  int       prevId      = -1;
1241
836k
  int64_t   minPathCost =  0;
1242
4.18M
  for( int8_t stateId = 0; stateId < 4; stateId++ )
1243
3.34M
  {
1244
3.34M
    int64_t pathCost = m_trellis[0][0].rdCost[stateId];
1245
3.34M
    if( pathCost < minPathCost )
1246
1.42M
    {
1247
1.42M
      prevId      = stateId;
1248
1.42M
      minPathCost = pathCost;
1249
1.42M
    }
1250
3.34M
  }
1251
1252
  //===== backward scanning =====
1253
836k
  int scanIdx = 0;
1254
8.75M
  for( ; prevId >= 0; scanIdx++ )
1255
7.92M
  {
1256
7.92M
    TCoeffSig absLevel = m_trellis[scanIdx][prevId >> 2].absLevel[prevId & 3];
1257
7.92M
    int32_t blkpos     = tuPars.m_scanId2BlkPos[scanIdx].idx;
1258
7.92M
    qCoeff[ blkpos ]   = TCoeffSig( tCoeff[blkpos] < 0 ? -absLevel : absLevel );
1259
7.92M
    absSum            += absLevel;
1260
7.92M
    prevId             = m_trellis[scanIdx][prevId >> 2].prevId[prevId & 3];
1261
7.92M
  }
1262
1263
836k
  tu.lastPos[compID] = scanIdx - 1;
1264
836k
}
1265
1266
void DepQuant::xDecide( const DQIntern::ScanInfo& scanInfo, const TCoeff absCoeff, const int lastOffset, DQIntern::Decisions& decisions, bool zeroOut, int quantCoeff )
1267
8.02M
{
1268
8.02M
  using namespace DQIntern;
1269
1270
8.02M
  ::memcpy( &decisions, startDec, sizeof( Decisions ) );
1271
1272
8.02M
  StateMem& skip = m_state_skip;
1273
1274
8.02M
  if( zeroOut )
1275
0
  {
1276
0
    if( scanInfo.spt == SCAN_EOCSBB )
1277
0
    {
1278
0
      checkRdCostSkipSbbZeroOut( 0, decisions, 0, skip );
1279
0
      checkRdCostSkipSbbZeroOut( 1, decisions, 1, skip );
1280
0
      checkRdCostSkipSbbZeroOut( 2, decisions, 2, skip );
1281
0
      checkRdCostSkipSbbZeroOut( 3, decisions, 3, skip );
1282
0
    }
1283
0
    return;
1284
0
  }
1285
1286
8.02M
  StateMem& prev = m_state_curr;
1287
1288
  /// start inline prequant
1289
8.02M
  int64_t scaledOrg = int64_t( absCoeff ) * quantCoeff;
1290
8.02M
  TCoeff  qIdx      = TCoeff( ( scaledOrg + m_quant.m_QAdd ) >> m_quant.m_QShift );
1291
1292
8.02M
  if( qIdx < 0 )
1293
1.34M
  {
1294
1.34M
    int64_t scaledAdd = m_quant.m_DistStepAdd - scaledOrg * m_quant.m_DistOrgFact;
1295
1.34M
    int64_t pq_a_dist = ( ( scaledAdd + 0 * m_quant.m_DistStepAdd ) * 1 + m_quant.m_DistAdd ) >> m_quant.m_DistShift;
1296
1.34M
    int64_t pq_b_dist = ( ( scaledAdd + 1 * m_quant.m_DistStepAdd ) * 2 + m_quant.m_DistAdd ) >> m_quant.m_DistShift;
1297
    /// stop inline prequant
1298
1299
1.34M
    if( prev.anyRemRegBinsLt4 )
1300
91.7k
    {
1301
91.7k
      setRiceParam( 0, scanInfo, prev, false );
1302
91.7k
      checkRdCostsOdd1( 0, scanInfo.spt, pq_b_dist, decisions, 2, 0, prev );
1303
1304
91.7k
      setRiceParam( 1, scanInfo, prev, false );
1305
91.7k
      checkRdCostsOdd1( 1, scanInfo.spt, pq_b_dist, decisions, 0, 2, prev );
1306
1307
91.7k
      setRiceParam( 2, scanInfo, prev, false );
1308
91.7k
      checkRdCostsOdd1( 2, scanInfo.spt, pq_a_dist, decisions, 3, 1, prev );
1309
1310
91.7k
      setRiceParam( 3, scanInfo, prev, false );
1311
91.7k
      checkRdCostsOdd1( 3, scanInfo.spt, pq_a_dist, decisions, 1, 3, prev );
1312
91.7k
    }
1313
1.25M
    else
1314
1.25M
    {
1315
      // has to be called as a first check, assumes no decision has been made yet
1316
1.25M
      m_checkAllRdCostsOdd1( scanInfo.spt, pq_a_dist, pq_b_dist, decisions, prev );
1317
1.25M
    }
1318
1319
1.34M
    checkRdCostStart( lastOffset, PQData{ 1, pq_b_dist }, decisions, 2, prev );
1320
1.34M
  }
1321
6.67M
  else
1322
6.67M
  {
1323
    /// start inline prequant
1324
6.67M
    qIdx = std::max<TCoeff>( 1, std::min<TCoeff>( m_quant.m_maxQIdx, qIdx ) );
1325
6.67M
    int64_t scaledAdd = qIdx * m_quant.m_DistStepAdd - scaledOrg * m_quant.m_DistOrgFact;
1326
1327
6.67M
    PQData  pqData[4];
1328
1329
6.67M
    PQData& pq_a = pqData[( qIdx + 0 ) & 3];
1330
6.67M
    PQData& pq_b = pqData[( qIdx + 1 ) & 3];
1331
6.67M
    PQData& pq_c = pqData[( qIdx + 2 ) & 3];
1332
6.67M
    PQData& pq_d = pqData[( qIdx + 3 ) & 3];
1333
1334
6.67M
    pq_a.deltaDist = ( ( scaledAdd + 0 * m_quant.m_DistStepAdd ) * ( qIdx + 0 ) + m_quant.m_DistAdd ) >> m_quant.m_DistShift;
1335
6.67M
    pq_a.absLevel = ( qIdx + 1 ) >> 1;
1336
1337
6.67M
    pq_b.deltaDist = ( ( scaledAdd + 1 * m_quant.m_DistStepAdd ) * ( qIdx + 1 ) + m_quant.m_DistAdd ) >> m_quant.m_DistShift;
1338
6.67M
    pq_b.absLevel = ( qIdx + 2 ) >> 1;
1339
1340
6.67M
    pq_c.deltaDist = ( ( scaledAdd + 2 * m_quant.m_DistStepAdd ) * ( qIdx + 2 ) + m_quant.m_DistAdd ) >> m_quant.m_DistShift;
1341
6.67M
    pq_c.absLevel = ( qIdx + 3 ) >> 1;
1342
1343
6.67M
    pq_d.deltaDist = ( ( scaledAdd + 3 * m_quant.m_DistStepAdd ) * ( qIdx + 3 ) + m_quant.m_DistAdd ) >> m_quant.m_DistShift;
1344
6.67M
    pq_d.absLevel = ( qIdx + 4 ) >> 1;
1345
    /// stop inline prequant
1346
1347
6.67M
    bool cff02ge4 = pqData[0].absLevel >= 4/* || pqData[2].absLevel >= 4 */;
1348
6.67M
    bool cff13ge4 = /* pqData[1].absLevel >= 4 || */ pqData[3].absLevel >= 4;
1349
1350
6.67M
    if( cff02ge4 || cff13ge4 || prev.anyRemRegBinsLt4 )
1351
5.47M
    {
1352
5.47M
      if( prev.anyRemRegBinsLt4 || cff02ge4 )
1353
5.21M
      {
1354
5.21M
        setRiceParam( 0, scanInfo, prev, cff02ge4 );
1355
5.21M
        setRiceParam( 1, scanInfo, prev, cff02ge4 );
1356
5.21M
      }
1357
1358
5.47M
      if( prev.anyRemRegBinsLt4 || cff13ge4 )
1359
5.47M
      {
1360
5.47M
        setRiceParam( 2, scanInfo, prev, cff13ge4 );
1361
5.47M
        setRiceParam( 3, scanInfo, prev, cff13ge4 );
1362
5.47M
      }
1363
1364
5.47M
      checkRdCosts( 0, scanInfo.spt, pqData[0], pqData[2], decisions, 0, 2, prev );
1365
5.47M
      checkRdCosts( 1, scanInfo.spt, pqData[0], pqData[2], decisions, 2, 0, prev );
1366
5.47M
      checkRdCosts( 2, scanInfo.spt, pqData[3], pqData[1], decisions, 1, 3, prev );
1367
5.47M
      checkRdCosts( 3, scanInfo.spt, pqData[3], pqData[1], decisions, 3, 1, prev );
1368
5.47M
    }
1369
1.20M
    else
1370
1.20M
    {
1371
      // has to be called as a first check, assumes no decision has been made yet
1372
1.20M
      m_checkAllRdCosts( scanInfo.spt, pqData, decisions, prev );
1373
1.20M
    }
1374
1375
6.67M
    checkRdCostStart( lastOffset, pqData[0], decisions, 0, prev );
1376
6.67M
    checkRdCostStart( lastOffset, pqData[2], decisions, 2, prev );
1377
6.67M
  }
1378
1379
8.02M
  if( scanInfo.spt == SCAN_EOCSBB )
1380
30.3k
  {
1381
30.3k
    checkRdCostSkipSbb( 0, decisions, 0, skip );
1382
30.3k
    checkRdCostSkipSbb( 1, decisions, 1, skip );
1383
30.3k
    checkRdCostSkipSbb( 2, decisions, 2, skip );
1384
30.3k
    checkRdCostSkipSbb( 3, decisions, 3, skip );
1385
30.3k
  }
1386
8.02M
}
1387
1388
void DepQuant::xDecideAndUpdate( const TCoeff absCoeff, const DQIntern::ScanInfo& scanInfo, bool zeroOut, int quantCoeff )
1389
8.02M
{
1390
8.02M
  using namespace DQIntern;
1391
1392
8.02M
  Decisions* decisions = &m_trellis[scanInfo.scanIdx][0];
1393
1394
8.02M
  xDecide( scanInfo, absCoeff, lastOffset( scanInfo.scanIdx ), *decisions, zeroOut, quantCoeff );
1395
1396
8.02M
  if( scanInfo.scanIdx )
1397
7.18M
  {
1398
7.18M
    if( scanInfo.spt == SCAN_SOCSBB )
1399
26.6k
    {
1400
26.6k
      memcpy( &m_state_skip, &m_state_curr, DQIntern::StateMemSkipCpySize );
1401
26.6k
    }
1402
1403
7.18M
    if( scanInfo.insidePos == 0 )
1404
31.9k
    {
1405
31.9k
      m_commonCtx.swap();
1406
31.9k
      m_updateStatesEOS( scanInfo, *decisions, m_state_skip, m_state_curr, m_commonCtx );
1407
31.9k
      ::memcpy( decisions + 1, decisions, sizeof( Decisions ) );
1408
31.9k
    }
1409
7.15M
    else if( !zeroOut )
1410
7.15M
    {
1411
7.15M
      m_updateStates( scanInfo, *decisions, m_state_curr );
1412
7.15M
    }
1413
7.18M
  }
1414
8.02M
}
1415
1416
void DepQuant::xDequantDQ( const TransformUnit& tu,  CoeffBuf& recCoeff, const ComponentID compID, const QpParam& cQP, bool enableScalingLists, int* piDequantCoef )
1417
824k
{
1418
824k
  m_quant.dequantBlock( tu, compID, cQP, recCoeff, enableScalingLists, piDequantCoef );
1419
824k
}
1420
1421
20.7k
DepQuant::DepQuant( const Quant* other, bool enc, bool useScalingLists, bool enableOpt ) : QuantRDOQ2( other, useScalingLists ), RateEstimator(), m_commonCtx()
1422
20.7k
{
1423
20.7k
  const DepQuant* dq = dynamic_cast<const DepQuant*>( other );
1424
20.7k
  CHECK( other && !dq, "The DepQuant cast must be successfull!" );
1425
1426
20.7k
  if( !dq )
1427
20.7k
  {
1428
20.7k
    m_scansRom = std::make_shared<DQIntern::Rom>();
1429
20.7k
    m_scansRom->init();
1430
20.7k
  }
1431
0
  else
1432
0
  {
1433
0
    m_scansRom = dq->m_scansRom;
1434
0
  }
1435
1436
85.0M
  for( int t = 0; t < ( MAX_TB_SIZEY * MAX_TB_SIZEY ); t++ )
1437
85.0M
  {
1438
85.0M
    memcpy( m_trellis[t], startDec, sizeof( startDec ) );
1439
85.0M
  }
1440
1441
20.7k
  m_checkAllRdCosts     = DQIntern::checkAllRdCosts;
1442
20.7k
  m_checkAllRdCostsOdd1 = DQIntern::checkAllRdCostsOdd1;
1443
20.7k
  m_updateStatesEOS     = DQIntern::updateStatesEOS;
1444
20.7k
  m_updateStates        = DQIntern::updateStates;
1445
20.7k
  m_findFirstPos        = DQIntern::findFirstPos;
1446
1447
20.7k
  if( enableOpt )
1448
20.7k
  {
1449
#if defined( TARGET_SIMD_X86 ) && ENABLE_SIMD_OPT_QUANT
1450
    initDepQuantX86();
1451
#endif
1452
#if defined( TARGET_SIMD_ARM ) && ENABLE_SIMD_OPT_QUANT
1453
    initDepQuantARM();
1454
#endif
1455
20.7k
  }
1456
20.7k
}
1457
1458
DepQuant::~DepQuant()
1459
20.7k
{
1460
20.7k
}
1461
1462
void DepQuant::quant( TransformUnit& tu, const ComponentID compID, const CCoeffBuf& pSrc, TCoeff& uiAbsSum, const QpParam& cQP, const Ctx& ctx )
1463
2.12M
{
1464
2.12M
  if( tu.cs->picture->useSelectiveRdoq && !xNeedRDOQ( tu, compID, pSrc, cQP ) )
1465
0
  {
1466
0
    tu.lastPos[compID] = -1;
1467
0
    uiAbsSum           =  0;
1468
0
  }
1469
2.12M
  else if( tu.cs->slice->depQuantEnabled && tu.mtsIdx[compID] != MTS_SKIP )
1470
2.01M
  {
1471
    //===== scaling matrix ====
1472
2.01M
    const int         qpDQ            = cQP.Qp(tu.mtsIdx[compID]==MTS_SKIP) + 1;
1473
2.01M
    const int         qpPer           = qpDQ / 6;
1474
2.01M
    const int         qpRem           = qpDQ - 6 * qpPer;
1475
2.01M
    const CompArea    &rect           = tu.blocks[compID];
1476
2.01M
    const int         width           = rect.width;
1477
2.01M
    const int         height          = rect.height;
1478
2.01M
    uint32_t          scalingListType = getScalingListType(tu.cu->predMode, compID);
1479
2.01M
    CHECK(scalingListType >= SCALING_LIST_NUM, "Invalid scaling list");
1480
2.01M
    const uint32_t    log2TrWidth     = Log2(width);
1481
2.01M
    const uint32_t    log2TrHeight    = Log2(height);
1482
2.01M
    const bool isLfnstApplied         = tu.cu->lfnstIdx > 0 && (CU::isSepTree(*tu.cu) ? true : isLuma(compID));
1483
2.01M
    const bool enableScalingLists     = getUseScalingList(width, height, (tu.mtsIdx[compID] == MTS_SKIP), isLfnstApplied);
1484
2.01M
    xQuantDQ( tu, pSrc, compID, cQP, Quant::m_dLambda, ctx, uiAbsSum, enableScalingLists, Quant::getQuantCoeff(scalingListType, qpRem, log2TrWidth, log2TrHeight) );
1485
2.01M
  }
1486
108k
  else
1487
108k
  {
1488
108k
    QuantRDOQ2::quant( tu, compID, pSrc, uiAbsSum, cQP, ctx );
1489
108k
  }
1490
2.12M
}
1491
1492
void DepQuant::dequant( const TransformUnit& tu, CoeffBuf& dstCoeff, const ComponentID compID, const QpParam& cQP )
1493
875k
{
1494
875k
  if( tu.cs->slice->depQuantEnabled && (tu.mtsIdx[compID] != MTS_SKIP) )
1495
824k
  {
1496
824k
    const int         qpDQ            = cQP.Qp(tu.mtsIdx[compID]==MTS_SKIP) + 1;
1497
824k
    const int         qpPer           = qpDQ / 6;
1498
824k
    const int         qpRem           = qpDQ - 6 * qpPer;
1499
824k
    const CompArea    &rect           = tu.blocks[compID];
1500
824k
    const int         width           = rect.width;
1501
824k
    const int         height          = rect.height;
1502
824k
    uint32_t          scalingListType = getScalingListType(tu.cu->predMode, compID);
1503
824k
    CHECK(scalingListType >= SCALING_LIST_NUM, "Invalid scaling list");
1504
824k
    const uint32_t    log2TrWidth    = Log2(width);
1505
824k
    const uint32_t    log2TrHeight   = Log2(height);
1506
824k
    const bool isLfnstApplied        = tu.cu->lfnstIdx > 0 && (CU::isSepTree(*tu.cu) ? true : isLuma(compID));
1507
824k
    const bool enableScalingLists    = getUseScalingList(width, height, (tu.mtsIdx[compID] == MTS_SKIP), isLfnstApplied);
1508
824k
    xDequantDQ( tu, dstCoeff, compID, cQP, enableScalingLists, Quant::getDequantCoeff(scalingListType, qpRem, log2TrWidth, log2TrHeight) );
1509
824k
  }
1510
51.0k
  else
1511
51.0k
  {
1512
51.0k
    QuantRDOQ::dequant( tu, dstCoeff, compID, cQP );
1513
51.0k
  }
1514
875k
}
1515
1516
void DepQuant::init( int rdoq, bool useRDOQTS, int thrVal )
1517
20.7k
{
1518
20.7k
  QuantRDOQ2::init( rdoq, useRDOQTS, thrVal );
1519
20.7k
  m_quant.init( thrVal );
1520
20.7k
}
1521
1522
} // namespace vvenc
1523
1524
//! \}
1525