Coverage Report

Created: 2026-06-16 07:20

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/vvenc/source/Lib/CommonLib/DepQuant.cpp
Line
Count
Source
1
/* -----------------------------------------------------------------------------
2
The copyright in this software is being made available under the Clear BSD
3
License, included below. No patent rights, trademark rights and/or 
4
other Intellectual Property Rights other than the copyrights concerning 
5
the Software are granted under this license.
6
7
The Clear BSD License
8
9
Copyright (c) 2019-2026, Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. & The VVenC Authors.
10
All rights reserved.
11
12
Redistribution and use in source and binary forms, with or without modification,
13
are permitted (subject to the limitations in the disclaimer below) provided that
14
the following conditions are met:
15
16
     * Redistributions of source code must retain the above copyright notice,
17
     this list of conditions and the following disclaimer.
18
19
     * Redistributions in binary form must reproduce the above copyright
20
     notice, this list of conditions and the following disclaimer in the
21
     documentation and/or other materials provided with the distribution.
22
23
     * Neither the name of the copyright holder nor the names of its
24
     contributors may be used to endorse or promote products derived from this
25
     software without specific prior written permission.
26
27
NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
28
THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
29
CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
31
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
32
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
33
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
34
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
35
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
36
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38
POSSIBILITY OF SUCH DAMAGE.
39
40
41
------------------------------------------------------------------------------------------- */
42
43
#include "DepQuant.h"
44
#include "TrQuant.h"
45
#include "CodingStructure.h"
46
#include "UnitTools.h"
47
48
#include <bitset>
49
50
//! \ingroup CommonLib
51
//! \{
52
53
namespace vvenc {
54
55
56
namespace DQIntern
57
{
58
  static void findFirstPos( int& firstTestPos, const TCoeff* tCoeff, const DQIntern::TUParameters& tuPars, int defaultTh,
59
                            bool zeroOutForThres, int zeroOutWidth, int zeroOutHeight )
60
0
  {
61
0
    for( ; firstTestPos >= 0; firstTestPos-- )
62
0
    {
63
0
      if( zeroOutForThres && ( tuPars.m_scanId2BlkPos[firstTestPos].x >= zeroOutWidth ||
64
0
                              tuPars.m_scanId2BlkPos[firstTestPos].y >= zeroOutHeight ) )
65
0
      {
66
0
        continue;
67
0
      }
68
0
      if( abs( tCoeff[tuPars.m_scanId2BlkPos[firstTestPos].idx] ) > defaultTh )
69
0
      {
70
0
        break;
71
0
      }
72
0
    }
73
0
  }
74
75
  void Rom::xInitScanArrays()
76
0
  {
77
0
    if( m_scansInitialized )
78
0
    {
79
0
      return;
80
0
    }
81
0
    ::memset( m_scanId2NbInfoSbbArray, 0, sizeof(m_scanId2NbInfoSbbArray) );
82
0
    ::memset( m_scanId2NbInfoOutArray, 0, sizeof(m_scanId2NbInfoOutArray) );
83
0
    ::memset( m_tuParameters,          0, sizeof(m_tuParameters) );
84
85
0
    uint32_t raster2id[ MAX_CU_SIZE * MAX_CU_SIZE ];
86
0
    ::memset(raster2id, 0, sizeof(raster2id));
87
88
0
    for( int hd = 0; hd < MAX_TU_SIZE_IDX; hd++ )
89
0
    {
90
0
      for( int vd = 0; vd < MAX_TU_SIZE_IDX; vd++ )
91
0
      {
92
0
        if( (hd == 0 && vd <= 1) || (hd <= 1 && vd == 0) )
93
0
        {
94
0
          continue;
95
0
        }
96
0
        const uint32_t      blockWidth    = (1 << hd);
97
0
        const uint32_t      blockHeight   = (1 << vd);
98
0
        const uint32_t      log2CGWidth   = g_log2SbbSize[hd][vd][0];
99
0
        const uint32_t      log2CGHeight  = g_log2SbbSize[hd][vd][1];
100
0
        const uint32_t      groupWidth    = 1 << log2CGWidth;
101
0
        const uint32_t      groupHeight   = 1 << log2CGHeight;
102
0
        const uint32_t      groupSize     = groupWidth * groupHeight;
103
0
        const SizeType      blkWidthIdx   = Log2( blockWidth );
104
0
        const SizeType      blkHeightIdx  = Log2( blockHeight );
105
0
        const ScanElement * scanId2RP     = getScanOrder( SCAN_GROUPED_4x4, blkWidthIdx, blkHeightIdx );
106
0
        NbInfoSbb*&         sId2NbSbb     = m_scanId2NbInfoSbbArray[hd][vd];
107
0
        NbInfoOut*&         sId2NbOut     = m_scanId2NbInfoOutArray[hd][vd];
108
        // consider only non-zero-out region
109
0
        const uint32_t      blkWidthNZOut = std::min<unsigned>( JVET_C0024_ZERO_OUT_TH, blockWidth  );
110
0
        const uint32_t      blkHeightNZOut= std::min<unsigned>( JVET_C0024_ZERO_OUT_TH, blockHeight );
111
0
        const uint32_t      totalValues   = blkWidthNZOut * blkHeightNZOut;
112
113
0
        sId2NbSbb = new NbInfoSbb[ totalValues ];
114
0
        sId2NbOut = new NbInfoOut[ totalValues ];
115
116
0
        for( uint32_t scanId = 0; scanId < totalValues; scanId++ )
117
0
        {
118
0
          raster2id[scanId2RP[scanId].idx] = scanId;
119
0
          sId2NbSbb[scanId].numInv = 0;
120
0
        }
121
122
0
        for( unsigned scanId = 0; scanId < totalValues; scanId++ )
123
0
        {
124
0
          const int posX = scanId2RP[scanId].x;
125
0
          const int posY = scanId2RP[scanId].y;
126
0
          const int rpos = scanId2RP[scanId].idx;
127
0
          {
128
            //===== inside subband neighbours =====
129
0
            const int      begSbb = scanId - ( scanId & (groupSize-1) ); // first pos in current subblock
130
0
            int            cpos[5];
131
132
0
            cpos[0] = ( posX + 1 < blkWidthNZOut                              ? ( raster2id[rpos+1           ] < groupSize + begSbb ? raster2id[rpos+1           ] - begSbb : 0 ) : 0 );
133
0
            cpos[1] = ( posX + 2 < blkWidthNZOut                              ? ( raster2id[rpos+2           ] < groupSize + begSbb ? raster2id[rpos+2           ] - begSbb : 0 ) : 0 );
134
0
            cpos[2] = ( posX + 1 < blkWidthNZOut && posY + 1 < blkHeightNZOut ? ( raster2id[rpos+1+blockWidth] < groupSize + begSbb ? raster2id[rpos+1+blockWidth] - begSbb : 0 ) : 0 );
135
0
            cpos[3] = ( posY + 1 < blkHeightNZOut                             ? ( raster2id[rpos+  blockWidth] < groupSize + begSbb ? raster2id[rpos+  blockWidth] - begSbb : 0 ) : 0 );
136
0
            cpos[4] = ( posY + 2 < blkHeightNZOut                             ? ( raster2id[rpos+2*blockWidth] < groupSize + begSbb ? raster2id[rpos+2*blockWidth] - begSbb : 0 ) : 0 );
137
138
0
            int num = 0;
139
0
            int inPos[5] = { 0, };
140
141
0
            while( true )
142
0
            {
143
0
              int nk = -1;
144
0
              for( int k = 0; k < 5; k++ )
145
0
              {
146
0
                if( cpos[k] != 0 && ( nk < 0 || cpos[k] < cpos[nk] ) )
147
0
                {
148
0
                  nk = k;
149
0
                }
150
0
              }
151
0
              if( nk < 0 )
152
0
              {
153
0
                break;
154
0
              }
155
0
              inPos[ num++ ] = uint8_t( cpos[nk] );
156
0
              cpos[nk] = 0;
157
0
            }
158
0
            for( int k = num; k < 5; k++ )
159
0
            {
160
0
              inPos[k] = 0;
161
0
            }
162
0
            for( int k = 0; k < num; k++ )
163
0
            {
164
0
              CHECK( sId2NbSbb[begSbb + inPos[k]].numInv >= 5, "" );
165
0
              sId2NbSbb[begSbb + inPos[k]].invInPos[sId2NbSbb[begSbb + inPos[k]].numInv++] = scanId & ( groupSize - 1 );
166
0
            }
167
0
          }
168
0
          {
169
            //===== outside subband neighbours =====
170
0
            NbInfoOut&     nbOut  = sId2NbOut[ scanId ];
171
0
            const int      begSbb = scanId - ( scanId & (groupSize-1) ); // first pos in current subblock
172
0
            int            cpos[5];
173
174
0
            cpos[0] = ( posX + 1 < blkWidthNZOut                              ? ( raster2id[rpos+1           ] >= groupSize + begSbb ? raster2id[rpos+1           ] : 0 ) : 0 );
175
0
            cpos[1] = ( posX + 2 < blkWidthNZOut                              ? ( raster2id[rpos+2           ] >= groupSize + begSbb ? raster2id[rpos+2           ] : 0 ) : 0 );
176
0
            cpos[2] = ( posX + 1 < blkWidthNZOut && posY + 1 < blkHeightNZOut ? ( raster2id[rpos+1+blockWidth] >= groupSize + begSbb ? raster2id[rpos+1+blockWidth] : 0 ) : 0 );
177
0
            cpos[3] = ( posY + 1 < blkHeightNZOut                             ? ( raster2id[rpos+  blockWidth] >= groupSize + begSbb ? raster2id[rpos+  blockWidth] : 0 ) : 0 );
178
0
            cpos[4] = ( posY + 2 < blkHeightNZOut                             ? ( raster2id[rpos+2*blockWidth] >= groupSize + begSbb ? raster2id[rpos+2*blockWidth] : 0 ) : 0 );
179
180
0
            for( nbOut.num = 0; true; )
181
0
            {
182
0
              int nk = -1;
183
0
              for( int k = 0; k < 5; k++ )
184
0
              {
185
0
                if( cpos[k] != 0 && ( nk < 0 || cpos[k] < cpos[nk] ) )
186
0
                {
187
0
                  nk = k;
188
0
                }
189
0
              }
190
0
              if( nk < 0 )
191
0
              {
192
0
                break;
193
0
              }
194
0
              nbOut.outPos[ nbOut.num++ ] = uint16_t( cpos[nk] );
195
0
              cpos[nk] = 0;
196
0
            }
197
0
            for( int k = nbOut.num; k < 5; k++ )
198
0
            {
199
0
              nbOut.outPos[k] = 0;
200
0
            }
201
0
            nbOut.maxDist = ( scanId == 0 ? 0 : sId2NbOut[scanId-1].maxDist );
202
0
            for( int k = 0; k < nbOut.num; k++ )
203
0
            {
204
0
              if( nbOut.outPos[k] > nbOut.maxDist )
205
0
              {
206
0
                nbOut.maxDist = nbOut.outPos[k];
207
0
              }
208
0
            }
209
0
          }
210
0
        }
211
212
        // make it relative
213
0
        for( unsigned scanId = 0; scanId < totalValues; scanId++ )
214
0
        {
215
0
          NbInfoOut& nbOut  = sId2NbOut[scanId];
216
0
          const int  begSbb = scanId - ( scanId & (groupSize-1) ); // first pos in current subblock
217
0
          for( int k = 0; k < nbOut.num; k++ )
218
0
          {
219
0
            CHECK(begSbb > nbOut.outPos[k], "Position must be past sub block begin");
220
0
            nbOut.outPos[k] -= begSbb;
221
0
          }
222
0
          nbOut.maxDist -= scanId;
223
0
        }
224
225
0
        for( int chId = 0; chId < MAX_NUM_CH; chId++ )
226
0
        {
227
0
          m_tuParameters[hd][vd][chId] = new TUParameters( *this, blockWidth, blockHeight, ChannelType(chId) );
228
0
        }
229
0
      }
230
0
    }
231
0
    m_scansInitialized = true;
232
0
  }
233
234
  void Rom::xUninitScanArrays()
235
0
  {
236
0
    if( !m_scansInitialized )
237
0
    {
238
0
      return;
239
0
    }
240
0
    for( int hd = 0; hd < MAX_TU_SIZE_IDX; hd++ )
241
0
    {
242
0
      for( int vd = 0; vd < MAX_TU_SIZE_IDX; vd++ )
243
0
      {
244
0
        NbInfoSbb*& sId2NbSbb = m_scanId2NbInfoSbbArray[hd][vd];
245
0
        NbInfoOut*& sId2NbOut = m_scanId2NbInfoOutArray[hd][vd];
246
0
        if( sId2NbSbb )
247
0
        {
248
0
          delete [] sId2NbSbb;
249
0
        }
250
0
        if( sId2NbOut )
251
0
        {
252
0
          delete [] sId2NbOut;
253
0
        }
254
0
        for( int chId = 0; chId < MAX_NUM_CH; chId++ )
255
0
        {
256
0
          TUParameters*& tuPars = m_tuParameters[hd][vd][chId];
257
0
          if( tuPars )
258
0
          {
259
0
            delete tuPars;
260
0
          }
261
0
        }
262
0
      }
263
0
    }
264
0
    m_scansInitialized = false;
265
0
  }
266
267
268
  TUParameters::TUParameters( const Rom& rom, const unsigned width, const unsigned height, const ChannelType chType )
269
0
  {
270
0
    m_chType              = chType;
271
0
    m_width               = width;
272
0
    m_height              = height;
273
0
    const uint32_t nonzeroWidth  = std::min<uint32_t>(JVET_C0024_ZERO_OUT_TH, m_width);
274
0
    const uint32_t nonzeroHeight = std::min<uint32_t>(JVET_C0024_ZERO_OUT_TH, m_height);
275
0
    m_numCoeff                   = nonzeroWidth * nonzeroHeight;
276
0
    m_log2SbbWidth        = g_log2SbbSize[ Log2(m_width) ][ Log2(m_height) ][0];
277
0
    m_log2SbbHeight       = g_log2SbbSize[ Log2(m_width) ][ Log2(m_height) ][1];
278
0
    m_log2SbbSize         = m_log2SbbWidth + m_log2SbbHeight;
279
0
    m_sbbSize             = ( 1 << m_log2SbbSize );
280
0
    m_sbbMask             = m_sbbSize - 1;
281
0
    m_widthInSbb  = nonzeroWidth >> m_log2SbbWidth;
282
0
    m_heightInSbb = nonzeroHeight >> m_log2SbbHeight;
283
0
    m_numSbb              = m_widthInSbb * m_heightInSbb;
284
0
    SizeType        hsbb  = Log2( m_widthInSbb  );
285
0
    SizeType        vsbb  = Log2( m_heightInSbb );
286
0
    SizeType        hsId  = Log2( m_width  );
287
0
    SizeType        vsId  = Log2( m_height );
288
0
    m_scanSbbId2SbbPos    = getScanOrder( SCAN_UNGROUPED   , hsbb , vsbb );
289
0
    m_scanId2BlkPos       = getScanOrder( SCAN_GROUPED_4x4 , hsId , vsId );
290
0
    int log2W             = Log2( m_width  );
291
0
    int log2H             = Log2( m_height );
292
0
    m_scanId2NbInfoSbb    = rom.getNbInfoSbb( log2W, log2H );
293
0
    m_scanId2NbInfoOut    = rom.getNbInfoOut( log2W, log2H );
294
0
    m_scanInfo            = new ScanInfo[ m_numCoeff ];
295
0
    for( int scanIdx = 0; scanIdx < m_numCoeff; scanIdx++ )
296
0
    {
297
0
      xSetScanInfo( m_scanInfo[scanIdx], scanIdx );
298
0
    }
299
0
  }
300
301
302
  void TUParameters::xSetScanInfo( ScanInfo& scanInfo, int scanIdx )
303
0
  {
304
0
    scanInfo.sbbSize    = m_sbbSize;
305
0
    scanInfo.numSbb     = m_numSbb;
306
0
    scanInfo.scanIdx    = scanIdx;
307
0
    scanInfo.rasterPos  = m_scanId2BlkPos[scanIdx].idx;
308
0
    scanInfo.sbbPos     = m_scanSbbId2SbbPos[scanIdx >> m_log2SbbSize].idx;
309
0
    scanInfo.insidePos  = scanIdx & m_sbbMask;
310
0
    scanInfo.spt        = SCAN_ISCSBB;
311
0
    if(  scanInfo.insidePos == m_sbbMask && scanIdx > scanInfo.sbbSize && scanIdx < m_numCoeff - 1 )
312
0
      scanInfo.spt      = SCAN_SOCSBB;
313
0
    else if( scanInfo.insidePos == 0 && scanIdx > 0 && scanIdx < m_numCoeff - m_sbbSize )
314
0
      scanInfo.spt      = SCAN_EOCSBB;
315
0
    scanInfo.posX = m_scanId2BlkPos[scanIdx].x;
316
0
    scanInfo.posY = m_scanId2BlkPos[scanIdx].y;
317
0
    if( scanIdx )
318
0
    {
319
0
      const int nextScanIdx = scanIdx - 1;
320
0
      const int diag        = m_scanId2BlkPos[nextScanIdx].x + m_scanId2BlkPos[nextScanIdx].y;
321
0
      if( m_chType == CH_L )
322
0
      {
323
0
        scanInfo.sigCtxOffsetNext = ( diag < 2 ? 8 : diag < 5 ?  4 : 0 );
324
0
        scanInfo.gtxCtxOffsetNext = ( diag < 1 ? 16 : diag < 3 ? 11 : diag < 10 ? 6 : 1 );
325
0
      }
326
0
      else
327
0
      {
328
0
        scanInfo.sigCtxOffsetNext = ( diag < 2 ? 4 : 0 );
329
0
        scanInfo.gtxCtxOffsetNext = ( diag < 1 ? 6 : 1 );
330
0
      }
331
0
      scanInfo.nextInsidePos      = nextScanIdx & m_sbbMask;
332
0
      scanInfo.currNbInfoSbb      = m_scanId2NbInfoSbb[ scanIdx ];
333
0
      if( scanInfo.insidePos == 0 )
334
0
      {
335
0
        const int nextSbbPos  = m_scanSbbId2SbbPos[nextScanIdx >> m_log2SbbSize].idx;
336
0
        const int nextSbbPosY = nextSbbPos               / m_widthInSbb;
337
0
        const int nextSbbPosX = nextSbbPos - nextSbbPosY * m_widthInSbb;
338
0
        scanInfo.nextSbbRight = ( nextSbbPosX < m_widthInSbb  - 1 ? nextSbbPos + 1            : 0 );
339
0
        scanInfo.nextSbbBelow = ( nextSbbPosY < m_heightInSbb - 1 ? nextSbbPos + m_widthInSbb : 0 );
340
0
      }
341
0
    }
342
0
  }
343
344
  void RateEstimator::initCtx( const TUParameters& tuPars, const TransformUnit& tu, const ComponentID compID, const FracBitsAccess& fracBitsAccess )
345
0
  {
346
0
    m_scanId2Pos = tuPars.m_scanId2BlkPos;
347
0
    xSetSigSbbFracBits  ( fracBitsAccess, tuPars.m_chType );
348
0
    xSetSigFlagBits     ( fracBitsAccess, tuPars.m_chType );
349
0
    xSetGtxFlagBits     ( fracBitsAccess, tuPars.m_chType );
350
0
    xSetLastCoeffOffset ( fracBitsAccess, tuPars, tu, compID );
351
0
  }
352
353
  void RateEstimator::xSetLastCoeffOffset( const FracBitsAccess& fracBitsAccess, const TUParameters& tuPars, const TransformUnit& tu, const ComponentID compID )
354
0
  {
355
0
    const ChannelType chType = ( compID == COMP_Y ? CH_L : CH_C );
356
0
    int32_t cbfDeltaBits = 0;
357
0
    if( compID == COMP_Y && !CU::isIntra(*tu.cu) && !tu.depth )
358
0
    {
359
0
      const BinFracBits bits  = fracBitsAccess.getFracBitsArray( Ctx::QtRootCbf() );
360
0
      cbfDeltaBits            = int32_t( bits.intBits[1] ) - int32_t( bits.intBits[0] );
361
0
    }
362
0
    else
363
0
    {
364
0
      BinFracBits bits;
365
0
      bool prevLumaCbf           = false;
366
0
      bool lastCbfIsInferred     = false;
367
0
      bool useIntraSubPartitions = tu.cu->ispMode && isLuma(chType);
368
0
      if( useIntraSubPartitions )
369
0
      {
370
0
        bool rootCbfSoFar = false;
371
0
        bool isLastSubPartition = CU::isISPLast(*tu.cu, tu.Y(), compID);
372
0
        uint32_t nTus = tu.cu->ispMode == HOR_INTRA_SUBPARTITIONS ? tu.cu->lheight() >> Log2(tu.lheight()) : tu.cu->lwidth() >> Log2(tu.lwidth());
373
0
        if( isLastSubPartition )
374
0
        {
375
0
          TransformUnit* tuPointer = tu.cu->firstTU;
376
0
          for( int tuIdx = 0; tuIdx < nTus - 1; tuIdx++ )
377
0
          {
378
0
            rootCbfSoFar |= TU::getCbfAtDepth(*tuPointer, COMP_Y, tu.depth);
379
0
            tuPointer     = tuPointer->next;
380
0
          }
381
0
          if( !rootCbfSoFar )
382
0
          {
383
0
            lastCbfIsInferred = true;
384
0
          }
385
0
        }
386
0
        if( !lastCbfIsInferred )
387
0
        {
388
0
          prevLumaCbf = TU::getPrevTuCbfAtDepth(tu, compID, tu.depth);
389
0
        }
390
0
        bits = fracBitsAccess.getFracBitsArray(Ctx::QtCbf[compID](DeriveCtx::CtxQtCbf(compID, prevLumaCbf, true)));
391
0
      }
392
0
      else
393
0
      {
394
0
        bits = fracBitsAccess.getFracBitsArray(Ctx::QtCbf[compID](DeriveCtx::CtxQtCbf(compID, tu.cbf[COMP_Cb])));
395
0
      }
396
0
      cbfDeltaBits = lastCbfIsInferred ? 0 : int32_t(bits.intBits[1]) - int32_t(bits.intBits[0]);
397
0
    }
398
399
0
    static const unsigned prefixCtx[] = { 0, 0, 0, 3, 6, 10, 15, 21 };
400
0
    uint32_t              ctxBits  [ LAST_SIGNIFICANT_GROUPS ];
401
0
    for( unsigned xy = 0; xy < 2; xy++ )
402
0
    {
403
0
      int32_t             bitOffset   = ( xy ? cbfDeltaBits : 0 );
404
0
      int32_t*            lastBits    = ( xy ? m_lastBitsY : m_lastBitsX );
405
0
      const unsigned      size        = ( xy ? tuPars.m_height : tuPars.m_width );
406
0
      const unsigned      log2Size    = Log2( size );
407
0
      const bool          useYCtx     = ( xy != 0 );
408
0
      const CtxSet&       ctxSetLast  = ( useYCtx ? Ctx::LastY : Ctx::LastX )[ chType ];
409
0
      const unsigned      lastShift   = ( compID == COMP_Y ? (log2Size+1)>>2 : Clip3<unsigned>(0,2,size>>3) );
410
0
      const unsigned      lastOffset  = ( compID == COMP_Y ? ( prefixCtx[log2Size] ) : 0 );
411
0
      uint32_t            sumFBits    = 0;
412
0
      unsigned            maxCtxId    = g_uiGroupIdx[std::min<unsigned>(JVET_C0024_ZERO_OUT_TH, size) - 1];
413
0
      for( unsigned ctxId = 0; ctxId < maxCtxId; ctxId++ )
414
0
      {
415
0
        const BinFracBits bits  = fracBitsAccess.getFracBitsArray( ctxSetLast( lastOffset + ( ctxId >> lastShift ) ) );
416
0
        ctxBits[ ctxId ]        = sumFBits + bits.intBits[0] + ( ctxId>3 ? ((ctxId-2)>>1)<<SCALE_BITS : 0 ) + bitOffset;
417
0
        sumFBits               +=            bits.intBits[1];
418
0
      }
419
0
      ctxBits  [ maxCtxId ]     = sumFBits + ( maxCtxId>3 ? ((maxCtxId-2)>>1)<<SCALE_BITS : 0 ) + bitOffset;
420
0
      for (unsigned pos = 0; pos < std::min<unsigned>(JVET_C0024_ZERO_OUT_TH, size); pos++)
421
0
      {
422
0
        lastBits[ pos ]         = ctxBits[ g_uiGroupIdx[ pos ] ];
423
0
      }
424
0
    }
425
0
  }
426
427
  void RateEstimator::xSetSigSbbFracBits( const FracBitsAccess& fracBitsAccess, ChannelType chType )
428
0
  {
429
0
    const CtxSet& ctxSet = Ctx::SigCoeffGroup[ chType ];
430
0
    for( unsigned ctxId = 0; ctxId < sm_maxNumSigSbbCtx; ctxId++ )
431
0
    {
432
0
      m_sigSbbFracBits[ ctxId ] = fracBitsAccess.getFracBitsArray( ctxSet( ctxId ) );
433
0
    }
434
0
  }
435
436
  void RateEstimator::xSetSigFlagBits( const FracBitsAccess& fracBitsAccess, ChannelType chType )
437
0
  {
438
0
    for( unsigned ctxSetId = 0; ctxSetId < sm_numCtxSetsSig; ctxSetId++ )
439
0
    {
440
0
      BinFracBits*    bits    = m_sigFracBits [ ctxSetId ];
441
0
      const CtxSet&   ctxSet  = Ctx::SigFlag  [ chType + 2*ctxSetId ];
442
0
      const unsigned  numCtx  = ( chType == CH_L ? 12 : 8 );
443
0
      for( unsigned ctxId = 0; ctxId < numCtx; ctxId++ )
444
0
      {
445
0
        bits[ ctxId ] = fracBitsAccess.getFracBitsArray( ctxSet( ctxId ) );
446
0
      }
447
0
    }
448
0
  }
449
450
  void RateEstimator::xSetGtxFlagBits( const FracBitsAccess& fracBitsAccess, ChannelType chType )
451
0
  {
452
0
    const CtxSet&   ctxSetPar   = Ctx::ParFlag [     chType ];
453
0
    const CtxSet&   ctxSetGt1   = Ctx::GtxFlag [ 2 + chType ];
454
0
    const CtxSet&   ctxSetGt2   = Ctx::GtxFlag [     chType ];
455
0
    const unsigned  numCtx      = ( chType == CH_L ? 21 : 11 );
456
0
    for( unsigned ctxId = 0; ctxId < numCtx; ctxId++ )
457
0
    {
458
0
      BinFracBits     fbPar = fracBitsAccess.getFracBitsArray( ctxSetPar( ctxId ) );
459
0
      BinFracBits     fbGt1 = fracBitsAccess.getFracBitsArray( ctxSetGt1( ctxId ) );
460
0
      BinFracBits     fbGt2 = fracBitsAccess.getFracBitsArray( ctxSetGt2( ctxId ) );
461
0
      CoeffFracBits&  cb    = m_gtxFracBits[ ctxId ];
462
0
      int32_t         par0  = (1<<SCALE_BITS) + int32_t(fbPar.intBits[0]);
463
0
      int32_t         par1  = (1<<SCALE_BITS) + int32_t(fbPar.intBits[1]);
464
0
      cb.bits[0] = 0;
465
0
      cb.bits[1] = fbGt1.intBits[0] + (1 << SCALE_BITS);
466
0
      cb.bits[2] = fbGt1.intBits[1] + par0 + fbGt2.intBits[0];
467
0
      cb.bits[3] = fbGt1.intBits[1] + par1 + fbGt2.intBits[0];
468
0
      cb.bits[4] = fbGt1.intBits[1] + par0 + fbGt2.intBits[1];
469
0
      cb.bits[5] = fbGt1.intBits[1] + par1 + fbGt2.intBits[1];
470
0
    }
471
0
  }
472
473
  void CommonCtx::update( const ScanInfo& scanInfo, const int prevId, int stateId, StateMem& curr )
474
0
  {
475
0
    uint8_t*    sbbFlags  = m_currSbbCtx[stateId].sbbFlags;
476
0
    uint8_t*    levels    = m_currSbbCtx[stateId].levels;
477
0
    uint16_t    maxDist   = m_nbInfo[scanInfo.scanIdx - 1].maxDist;
478
0
    uint16_t    sbbSize   = scanInfo.sbbSize;
479
0
    std::size_t setCpSize = ( maxDist > sbbSize ? maxDist - sbbSize : 0 ) * sizeof( uint8_t );
480
0
    if( prevId >= 0 )
481
0
    {
482
0
      ::memcpy( sbbFlags, m_prevSbbCtx[prevId].sbbFlags, scanInfo.numSbb * sizeof( uint8_t ) );
483
0
      ::memcpy( levels + scanInfo.scanIdx + sbbSize, m_prevSbbCtx[prevId].levels + scanInfo.scanIdx + sbbSize, setCpSize );
484
0
    }
485
0
    else
486
0
    {
487
0
      ::memset( sbbFlags, 0, scanInfo.numSbb * sizeof( uint8_t ) );
488
0
      ::memset( levels + scanInfo.scanIdx + sbbSize, 0, setCpSize );
489
0
    }
490
0
    sbbFlags[scanInfo.sbbPos] = !!curr.numSig[stateId];
491
492
0
    const int       sigNSbb = ( ( scanInfo.nextSbbRight ? sbbFlags[scanInfo.nextSbbRight] : false ) || ( scanInfo.nextSbbBelow ? sbbFlags[scanInfo.nextSbbBelow] : false ) ? 1 : 0 );
493
0
    curr.refSbbCtxId[stateId] = stateId;
494
0
    const BinFracBits sbbBits = m_sbbFlagBits[sigNSbb];
495
496
0
    curr.sbbBits0[stateId] = sbbBits.intBits[0];
497
0
    curr.sbbBits1[stateId] = sbbBits.intBits[1];
498
499
0
    if( sigNSbb || ( ( scanInfo.nextSbbRight && scanInfo.nextSbbBelow ) ? sbbFlags[scanInfo.nextSbbBelow + 1] : false ) )
500
0
    {
501
0
      const int         scanBeg = scanInfo.scanIdx - scanInfo.sbbSize;
502
0
      const NbInfoOut* nbOut = m_nbInfo + scanBeg;
503
0
      const uint8_t* absLevels = levels + scanBeg;
504
505
0
      for( int id = 0; id < scanInfo.sbbSize; id++, nbOut++ )
506
0
      {
507
0
        if( nbOut->num )
508
0
        {
509
0
          TCoeff sumAbs = 0, sumAbs1 = 0, sumNum = 0;
510
0
#define UPDATE(k) {TCoeff t=absLevels[nbOut->outPos[k]]; sumAbs+=t; sumAbs1+=std::min<TCoeff>(4+(t&1),t); sumNum+=!!t; }
511
0
          switch( nbOut->num )
512
0
          {
513
0
          default:
514
0
          case 5:
515
0
            UPDATE( 4 );
516
0
          case 4:
517
0
            UPDATE( 3 );
518
0
          case 3:
519
0
            UPDATE( 2 );
520
0
          case 2:
521
0
            UPDATE( 1 );
522
0
          case 1:
523
0
            UPDATE( 0 );
524
0
          }
525
0
#undef UPDATE
526
0
          curr.tplAcc[id][stateId] = ( sumNum << 5 ) | sumAbs1;
527
0
          curr.sum1st[id][stateId] = ( uint8_t ) std::min( 255, sumAbs );
528
0
        }
529
0
      }
530
0
    }
531
0
  }
532
533
  void Quantizer::initQuantBlock(const TransformUnit& tu, const ComponentID compID, const QpParam& cQP, const double lambda, int gValue)
534
0
  {
535
0
    CHECKD( lambda <= 0.0, "Lambda must be greater than 0" );
536
537
0
    const int         qpDQ                  = cQP.Qp(tu.mtsIdx[compID]==MTS_SKIP) + 1;
538
0
    const int         qpPer                 = qpDQ / 6;
539
0
    const int         qpRem                 = qpDQ - 6 * qpPer;
540
0
    const SPS&        sps                   = *tu.cs->sps;
541
0
    const CompArea&   area                  = tu.blocks[ compID ];
542
0
    const ChannelType chType                = toChannelType( compID );
543
0
    const int         channelBitDepth       = sps.bitDepths[ chType ];
544
0
    const int         maxLog2TrDynamicRange = sps.getMaxLog2TrDynamicRange();
545
0
    const int         nomTransformShift     = getTransformShift( channelBitDepth, area.size(), maxLog2TrDynamicRange );
546
0
    const bool    needsSqrt2ScaleAdjustment = TU::needsSqrt2Scale(tu, compID);
547
0
    const int         transformShift        = nomTransformShift + (needsSqrt2ScaleAdjustment?-1:0);
548
    // quant parameters
549
0
    m_QShift                    = QUANT_SHIFT  - 1 + qpPer + transformShift;
550
0
    m_QAdd                      = -( ( 3 << m_QShift ) >> 1 );
551
0
    Intermediate_Int  invShift  = IQUANT_SHIFT + 1 - qpPer - transformShift;
552
0
    m_QScale                    = g_quantScales[needsSqrt2ScaleAdjustment?1:0][ qpRem ];
553
0
    const unsigned    qIdxBD    = std::min<unsigned>( maxLog2TrDynamicRange + 1, 8*sizeof(Intermediate_Int) + invShift - IQUANT_SHIFT - 1 );
554
0
    m_maxQIdx                   = ( 1 << (qIdxBD-1) ) - 4;
555
0
    if( m_QShift )
556
0
      m_thresLast               = TCoeff((int64_t(m_DqThrVal) << (m_QShift-1)));
557
0
    else
558
0
      m_thresLast               = TCoeff((int64_t(m_DqThrVal>>1) << m_QShift));
559
0
    m_thresSSbb                 = TCoeff((int64_t(3) << m_QShift));
560
    // distortion calculation parameters
561
0
    const int64_t qScale        = (gValue==-1) ? m_QScale : gValue;
562
0
    const int nomDShift =
563
0
      SCALE_BITS - 2 * (nomTransformShift + DISTORTION_PRECISION_ADJUSTMENT(channelBitDepth)) + m_QShift + (needsSqrt2ScaleAdjustment ? 1 : 0);
564
0
    const double  qScale2       = double( qScale * qScale );
565
0
    const double  nomDistFactor = ( nomDShift < 0 ? 1.0/(double(int64_t(1)<<(-nomDShift))*qScale2*lambda) : double(int64_t(1)<<nomDShift)/(qScale2*lambda) );
566
0
    const uint32_t pow2dfShift   = (uint32_t)( nomDistFactor * qScale2 ) + 1;
567
0
    const int     dfShift       = ceilLog2( pow2dfShift );
568
0
    m_DistShift                 = 62 + m_QShift - 2*maxLog2TrDynamicRange - dfShift;
569
0
    m_DistAdd                   = (int64_t(1) << m_DistShift) >> 1;
570
0
    m_DistStepAdd               = ((m_DistShift+m_QShift)>=64 ? (int64_t)( nomDistFactor * pow(2,m_DistShift+m_QShift) + .5 ) : (int64_t)( nomDistFactor * double(int64_t(1)<<(m_DistShift+m_QShift)) + .5 ));
571
0
    m_DistOrgFact               = (int64_t)( nomDistFactor * double(int64_t(1)<<(m_DistShift+1       )) + .5 );
572
0
  }
573
574
  void Quantizer::dequantBlock( const TransformUnit& tu, const ComponentID compID, const QpParam& cQP, CoeffBuf& recCoeff, bool enableScalingLists, int* piDequantCoef) const
575
0
  {
576
577
    //----- set basic parameters -----
578
0
    const CompArea&     area      = tu.blocks[ compID ];
579
0
    const int           numCoeff  = area.area();
580
0
    const SizeType      hsId      = Log2( area.width );
581
0
    const SizeType      vsId      = Log2( area.height );
582
0
    const ScanElement  *scan      = getScanOrder( SCAN_GROUPED_4x4, hsId, vsId );
583
0
    const TCoeffSig*    qCoeff    = tu.getCoeffs( compID ).buf;
584
0
          TCoeff*       tCoeff    = recCoeff.buf;
585
586
    //----- reset coefficients and get last scan index -----
587
0
    ::memset( tCoeff, 0, numCoeff * sizeof( TCoeff ) );
588
0
    int lastScanIdx = tu.lastPos[compID];
589
0
    if( lastScanIdx < 0 )
590
0
    {
591
0
      return;
592
0
    }
593
594
    //----- set dequant parameters -----
595
0
    const int         qpDQ                  = cQP.Qp(tu.mtsIdx[compID]==MTS_SKIP) + 1;
596
0
    const int         qpPer                 = qpDQ / 6;
597
0
    const int         qpRem                 = qpDQ - 6 * qpPer;
598
0
    const SPS&        sps                   = *tu.cs->sps;
599
0
    const ChannelType chType                = toChannelType( compID );
600
0
    const int         channelBitDepth       = sps.bitDepths[ chType ];
601
0
    const int         maxLog2TrDynamicRange = sps.getMaxLog2TrDynamicRange();
602
0
    const TCoeff      minTCoeff             = -( 1 << maxLog2TrDynamicRange );
603
0
    const TCoeff      maxTCoeff             =  ( 1 << maxLog2TrDynamicRange ) - 1;
604
0
    const int         nomTransformShift     = getTransformShift( channelBitDepth, area.size(), maxLog2TrDynamicRange );
605
0
    const bool    needsSqrt2ScaleAdjustment = TU::needsSqrt2Scale(tu, compID);
606
0
    const int         transformShift        = nomTransformShift + (needsSqrt2ScaleAdjustment?-1:0);
607
0
    Intermediate_Int  shift                 = IQUANT_SHIFT + 1 - qpPer - transformShift + (enableScalingLists ? LOG2_SCALING_LIST_NEUTRAL_VALUE : 0);
608
0
    Intermediate_Int  invQScale             = g_invQuantScales[needsSqrt2ScaleAdjustment?1:0][ qpRem ];
609
0
    Intermediate_Int  add                   = (shift < 0) ? 0 : ((1 << shift) >> 1);
610
    //----- dequant coefficients -----
611
0
    for( int state = 0, scanIdx = lastScanIdx; scanIdx >= 0; scanIdx-- )
612
0
    {
613
0
      const unsigned   rasterPos = scan[scanIdx].idx;
614
0
      const TCoeffSig& level     = qCoeff[ rasterPos ];
615
0
      if( level )
616
0
      {
617
0
        if (enableScalingLists)
618
0
          invQScale = piDequantCoef[rasterPos];//scalingfactor*levelScale
619
0
        if (shift < 0 && (enableScalingLists || scanIdx == lastScanIdx))
620
0
        {
621
0
          invQScale <<= -shift;
622
0
        }
623
0
        Intermediate_Int qIdx = 2 * level + (level > 0 ? -(state>>1) : (state>>1));
624
0
        int64_t  nomTCoeff          = ((int64_t)qIdx * (int64_t)invQScale + add) >> ((shift < 0) ? 0 : shift);
625
0
        tCoeff[rasterPos]           = (TCoeff)Clip3<int64_t>(minTCoeff, maxTCoeff, nomTCoeff);
626
0
      }
627
0
      state = ( 32040 >> ((state<<2)+((level&1)<<1)) ) & 3;   // the 16-bit value "32040" represent the state transition table
628
0
    }
629
0
  }
630
631
  bool Quantizer::preQuantCoeff( const TCoeff absCoeff, PQData* pqData, int quanCoeff ) const
632
0
  {
633
0
    int64_t scaledOrg = int64_t( absCoeff ) * quanCoeff;
634
0
    TCoeff  qIdx      = TCoeff( ( scaledOrg + m_QAdd ) >> m_QShift );
635
636
0
    if( qIdx < 0 )
637
0
    {
638
0
      int64_t scaledAdd = m_DistStepAdd - scaledOrg * m_DistOrgFact;
639
0
      PQData& pq_a      = pqData[1];
640
0
      PQData& pq_b      = pqData[2];
641
642
0
      pq_a.deltaDist    = ( ( scaledAdd + 0 * m_DistStepAdd ) * 1 + m_DistAdd ) >> m_DistShift;
643
0
      pq_a.absLevel     = 1;
644
645
0
      pq_b.deltaDist    = ( ( scaledAdd + 1 * m_DistStepAdd ) * 2 + m_DistAdd ) >> m_DistShift;
646
0
      pq_b.absLevel     = 1;
647
      
648
0
      return true;
649
0
    }
650
     
651
0
    qIdx              = std::max<TCoeff>( 1, std::min<TCoeff>( m_maxQIdx, qIdx ) );
652
0
    int64_t scaledAdd = qIdx * m_DistStepAdd - scaledOrg * m_DistOrgFact;
653
654
0
    PQData& pq_a      = pqData[( qIdx + 0 ) & 3];
655
0
    PQData& pq_b      = pqData[( qIdx + 1 ) & 3];
656
0
    PQData& pq_c      = pqData[( qIdx + 2 ) & 3];
657
0
    PQData& pq_d      = pqData[( qIdx + 3 ) & 3];
658
659
0
    pq_a.deltaDist    = ( ( scaledAdd + 0 * m_DistStepAdd ) * ( qIdx + 0 ) + m_DistAdd ) >> m_DistShift;
660
0
    pq_a.absLevel     = ( qIdx + 1 ) >> 1;
661
662
0
    pq_b.deltaDist    = ( ( scaledAdd + 1 * m_DistStepAdd ) * ( qIdx + 1 ) + m_DistAdd ) >> m_DistShift;
663
0
    pq_b.absLevel     = ( qIdx + 2 ) >> 1;
664
665
0
    pq_c.deltaDist    = ( ( scaledAdd + 2 * m_DistStepAdd ) * ( qIdx + 2 ) + m_DistAdd ) >> m_DistShift;
666
0
    pq_c.absLevel     = ( qIdx + 3 ) >> 1;
667
668
0
    pq_d.deltaDist    = ( ( scaledAdd + 3 * m_DistStepAdd ) * ( qIdx + 3 ) + m_DistAdd ) >> m_DistShift;
669
0
    pq_d.absLevel     = ( qIdx + 4 ) >> 1;
670
671
0
    return false;
672
0
  }
673
674
  const int32_t g_goRiceBits[4][RICEMAX] =
675
  {
676
    { 32768,  65536,  98304, 131072, 163840, 196608, 262144, 262144, 327680, 327680, 327680, 327680, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 393216, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752, 458752},
677
    { 65536,  65536,  98304,  98304, 131072, 131072, 163840, 163840, 196608, 196608, 229376, 229376, 294912, 294912, 294912, 294912, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 360448, 425984, 425984, 425984, 425984, 425984, 425984, 425984, 425984},
678
    { 98304,  98304,  98304,  98304, 131072, 131072, 131072, 131072, 163840, 163840, 163840, 163840, 196608, 196608, 196608, 196608, 229376, 229376, 229376, 229376, 262144, 262144, 262144, 262144, 327680, 327680, 327680, 327680, 327680, 327680, 327680, 327680},
679
    {131072, 131072, 131072, 131072, 131072, 131072, 131072, 131072, 163840, 163840, 163840, 163840, 163840, 163840, 163840, 163840, 196608, 196608, 196608, 196608, 196608, 196608, 196608, 196608, 229376, 229376, 229376, 229376, 229376, 229376, 229376, 229376}
680
  };
681
682
  static inline void initStates( const int stateId, DQIntern::StateMem& state )
683
0
  {
684
0
    state.rdCost[stateId]         = DQIntern::rdCostInit;
685
0
    state.ctx.cff[stateId]        =  0;
686
0
    state.ctx.sig[stateId]        =  0;
687
0
    state.numSig[stateId]         =  0;
688
0
    state.refSbbCtxId[stateId]    = -1;
689
0
    state.remRegBins[stateId]     =  4;
690
0
    state.cffBitsCtxOffset        =  0;
691
0
    state.m_goRicePar[stateId]    =  0;
692
0
    state.m_goRiceZero[stateId]   =  0;
693
0
    state.sbbBits0[stateId]       =  0;
694
0
    state.sbbBits1[stateId]       =  0;
695
0
  }
696
697
  template<bool rrgEnsured = false>
698
  static inline void checkRdCosts( const int stateId, const DQIntern::ScanPosType spt, const DQIntern::PQData& pqDataA, const DQIntern::PQData& pqDataB, DQIntern::Decisions& decisions, int idxAZ, int idxB, const DQIntern::StateMem& state )
699
0
  {
700
0
    const int32_t* goRiceTab = DQIntern::g_goRiceBits[state.m_goRicePar[stateId]];
701
0
    int64_t         rdCostA = state.rdCost[stateId] + pqDataA.deltaDist;
702
0
    int64_t         rdCostB = state.rdCost[stateId] + pqDataB.deltaDist;
703
0
    int64_t         rdCostZ = state.rdCost[stateId];
704
705
0
    if( rrgEnsured || state.remRegBins[stateId] >= 4 )
706
0
    {
707
0
      const CoeffFracBits& cffBits = state.m_gtxFracBitsArray[state.ctx.cff[stateId]];
708
0
      const BinFracBits    sigBits = state.m_sigFracBitsArray[stateId][state.ctx.sig[stateId]];
709
710
0
      if( pqDataA.absLevel < 4 )
711
0
        rdCostA += cffBits.bits[pqDataA.absLevel];
712
0
      else
713
0
      {
714
0
        const unsigned value = ( pqDataA.absLevel - 4 ) >> 1;
715
0
        rdCostA += cffBits.bits[pqDataA.absLevel - ( value << 1 )] + goRiceTab[std::min<unsigned>( value, RICEMAX - 1 )];
716
0
      }
717
718
0
      if( pqDataB.absLevel < 4 )
719
0
        rdCostB += cffBits.bits[pqDataB.absLevel];
720
0
      else
721
0
      {
722
0
        const unsigned value = ( pqDataB.absLevel - 4 ) >> 1;
723
0
        rdCostB += cffBits.bits[pqDataB.absLevel - ( value << 1 )] + goRiceTab[std::min<unsigned>( value, RICEMAX - 1 )];
724
0
      }
725
726
0
      if( spt == SCAN_ISCSBB )
727
0
      {
728
0
        rdCostA += sigBits.intBits[1];
729
0
        rdCostB += sigBits.intBits[1];
730
0
        rdCostZ += sigBits.intBits[0];
731
0
      }
732
0
      else if( spt == SCAN_SOCSBB )
733
0
      {
734
0
        rdCostA += state.sbbBits1[stateId] + sigBits.intBits[1];
735
0
        rdCostB += state.sbbBits1[stateId] + sigBits.intBits[1];
736
0
        rdCostZ += state.sbbBits1[stateId] + sigBits.intBits[0];
737
0
      }
738
0
      else if( state.numSig[stateId] )
739
0
      {
740
0
        rdCostA += sigBits.intBits[1];
741
0
        rdCostB += sigBits.intBits[1];
742
0
        rdCostZ += sigBits.intBits[0];
743
0
      }
744
0
      else
745
0
      {
746
0
        rdCostZ = rdCostInit;
747
0
      }
748
0
    }
749
0
    else
750
0
    {
751
0
      rdCostA += ( 1 << SCALE_BITS ) + goRiceTab[pqDataA.absLevel <= state.m_goRiceZero[stateId] ? pqDataA.absLevel - 1 : std::min<int>( pqDataA.absLevel, RICEMAX - 1 )];
752
0
      rdCostB += ( 1 << SCALE_BITS ) + goRiceTab[pqDataB.absLevel <= state.m_goRiceZero[stateId] ? pqDataB.absLevel - 1 : std::min<int>( pqDataB.absLevel, RICEMAX - 1 )];
753
0
      rdCostZ += goRiceTab[state.m_goRiceZero[stateId]];
754
0
    }
755
756
0
    if( rdCostA < rdCostZ && rdCostA < decisions.rdCost[idxAZ] )
757
0
    {
758
0
      decisions.rdCost[idxAZ] = rdCostA;
759
0
      decisions.absLevel[idxAZ] = pqDataA.absLevel;
760
0
      decisions.prevId[idxAZ] = stateId;
761
0
    }
762
0
    else if( rdCostZ < decisions.rdCost[idxAZ] )
763
0
    {
764
0
      decisions.rdCost[idxAZ] = rdCostZ;
765
0
      decisions.absLevel[idxAZ] = 0;
766
0
      decisions.prevId[idxAZ] = stateId;
767
0
    }
768
769
0
    if( rdCostB < decisions.rdCost[idxB] )
770
0
    {
771
0
      decisions.rdCost[idxB] = rdCostB;
772
0
      decisions.absLevel[idxB] = pqDataB.absLevel;
773
0
      decisions.prevId[idxB] = stateId;
774
0
    }
775
0
  }
Unexecuted instantiation: DepQuant.cpp:void vvenc::DQIntern::checkRdCosts<true>(int, vvenc::DQIntern::ScanPosType, vvenc::DQIntern::PQData const&, vvenc::DQIntern::PQData const&, vvenc::DQIntern::Decisions&, int, int, vvenc::DQIntern::StateMem const&)
Unexecuted instantiation: DepQuant.cpp:void vvenc::DQIntern::checkRdCosts<false>(int, vvenc::DQIntern::ScanPosType, vvenc::DQIntern::PQData const&, vvenc::DQIntern::PQData const&, vvenc::DQIntern::Decisions&, int, int, vvenc::DQIntern::StateMem const&)
776
777
  void checkAllRdCosts( const DQIntern::ScanPosType spt, const DQIntern::PQData* pqData, DQIntern::Decisions& decisions, const DQIntern::StateMem& state )
778
0
  {
779
0
    checkRdCosts<true>( 0, spt, pqData[0], pqData[2], decisions, 0, 2, state );
780
0
    checkRdCosts<true>( 1, spt, pqData[0], pqData[2], decisions, 2, 0, state );
781
0
    checkRdCosts<true>( 2, spt, pqData[3], pqData[1], decisions, 1, 3, state );
782
0
    checkRdCosts<true>( 3, spt, pqData[3], pqData[1], decisions, 3, 1, state );
783
0
  }
784
785
  template<bool rrgEnsured = false>
786
  static void checkRdCostsOdd1( const int stateId, const ScanPosType spt, const int64_t deltaDist, Decisions& decisions, int idxA, int idxZ, const StateMem& state )
787
0
  {
788
0
    int64_t         rdCostA = state.rdCost[stateId] + deltaDist;
789
0
    int64_t         rdCostZ = state.rdCost[stateId];
790
791
0
    if( rrgEnsured || state.remRegBins[stateId] >= 4 )
792
0
    {
793
0
      const BinFracBits sigBits = state.m_sigFracBitsArray[stateId][state.ctx.sig[stateId]];
794
795
0
      rdCostA += state.cffBits1[state.ctx.cff[stateId]];
796
797
0
      if( spt == SCAN_ISCSBB )
798
0
      {
799
0
        rdCostA += sigBits.intBits[1];
800
0
        rdCostZ += sigBits.intBits[0];
801
0
      }
802
0
      else if( spt == SCAN_SOCSBB )
803
0
      {
804
0
        rdCostA += state.sbbBits1[stateId] + sigBits.intBits[1];
805
0
        rdCostZ += state.sbbBits1[stateId] + sigBits.intBits[0];
806
0
      }
807
0
      else if( state.numSig[stateId] )
808
0
      {
809
0
        rdCostA += sigBits.intBits[1];
810
0
        rdCostZ += sigBits.intBits[0];
811
0
      }
812
0
      else
813
0
      {
814
0
        rdCostZ = rdCostInit;
815
0
      }
816
0
    }
817
0
    else
818
0
    {
819
0
      const int32_t* goRiceTab = g_goRiceBits[state.m_goRicePar[stateId]];
820
821
0
      rdCostA += ( 1 << SCALE_BITS ) + goRiceTab[0];
822
0
      rdCostZ += goRiceTab[state.m_goRiceZero[stateId]];
823
0
    }
824
825
0
    if( rdCostA < decisions.rdCost[idxA] )
826
0
    {
827
0
      decisions.rdCost[idxA] = rdCostA;
828
0
      decisions.absLevel[idxA] = 1;
829
0
      decisions.prevId[idxA] = stateId;
830
0
    }
831
832
0
    if( rdCostZ < decisions.rdCost[idxZ] )
833
0
    {
834
0
      decisions.rdCost[idxZ] = rdCostZ;
835
0
      decisions.absLevel[idxZ] = 0;
836
0
      decisions.prevId[idxZ] = stateId;
837
0
    }
838
0
  }
Unexecuted instantiation: DepQuant.cpp:void vvenc::DQIntern::checkRdCostsOdd1<true>(int, vvenc::DQIntern::ScanPosType, long, vvenc::DQIntern::Decisions&, int, int, vvenc::DQIntern::StateMem const&)
Unexecuted instantiation: DepQuant.cpp:void vvenc::DQIntern::checkRdCostsOdd1<false>(int, vvenc::DQIntern::ScanPosType, long, vvenc::DQIntern::Decisions&, int, int, vvenc::DQIntern::StateMem const&)
839
840
  static void checkAllRdCostsOdd1( const DQIntern::ScanPosType spt, const int64_t pq_a_dist, const int64_t pq_b_dist, DQIntern::Decisions& decisions, const DQIntern::StateMem& state )
841
0
  {
842
0
    checkRdCostsOdd1<true>( 0, spt, pq_b_dist, decisions, 2, 0, state );
843
0
    checkRdCostsOdd1<true>( 1, spt, pq_b_dist, decisions, 0, 2, state );
844
0
    checkRdCostsOdd1<true>( 2, spt, pq_a_dist, decisions, 3, 1, state );
845
0
    checkRdCostsOdd1<true>( 3, spt, pq_a_dist, decisions, 1, 3, state );
846
0
  }
847
848
  static inline void checkRdCostStart( int32_t lastOffset, const PQData& pqData, Decisions& decisions, int idx, const StateMem& state )
849
0
  {
850
0
    const CoeffFracBits& cffBits = state.m_gtxFracBitsArray[0];
851
852
0
    int64_t rdCost = pqData.deltaDist + lastOffset;
853
0
    if( pqData.absLevel < 4 )
854
0
    {
855
0
      rdCost += cffBits.bits[pqData.absLevel];
856
0
    }
857
0
    else
858
0
    {
859
0
      const unsigned value = ( pqData.absLevel - 4 ) >> 1;
860
0
      rdCost += cffBits.bits[pqData.absLevel - ( value << 1 )] + g_goRiceBits[0][value < RICEMAX ? value : RICEMAX - 1];
861
0
    }
862
863
0
    if( rdCost < decisions.rdCost[idx] )
864
0
    {
865
0
      decisions.rdCost[idx]   = rdCost;
866
0
      decisions.absLevel[idx] = pqData.absLevel;
867
0
      decisions.prevId[idx]   = -1;
868
0
    }
869
0
  }
870
871
  static inline void checkRdCostSkipSbb( const int stateId, Decisions& decisions, int idx, const StateMem& state )
872
0
  {
873
0
    int64_t rdCost = state.rdCost[stateId] + state.sbbBits0[stateId];
874
0
    if( rdCost < decisions.rdCost[idx] )
875
0
    {
876
0
      decisions.rdCost[idx]   = rdCost;
877
0
      decisions.absLevel[idx] = 0;
878
0
      decisions.prevId[idx]   = 4 | stateId;
879
0
    }
880
0
  }
881
882
  static inline void checkRdCostSkipSbbZeroOut( const int stateId, Decisions& decisions, int idx, const StateMem& state )
883
0
  {
884
0
    int64_t rdCost          = state.rdCost[stateId] + state.sbbBits0[stateId];
885
0
    decisions.rdCost[idx]   = rdCost;
886
0
    decisions.absLevel[idx] = 0;
887
0
    decisions.prevId[idx]   = 4 | stateId;
888
0
  }
889
890
  static inline void setRiceParam( const int stateId, const ScanInfo& scanInfo, StateMem& state, bool ge4 )
891
0
  {
892
0
    if( state.remRegBins[stateId] < 4 || ge4 )
893
0
    {
894
0
      TCoeff  sumAbs = state.sum1st[scanInfo.insidePos][stateId];
895
0
      int sumSub     = state.remRegBins[stateId] < 4 ? 0 : 4 * 5;
896
0
      int sumAll     = std::max( std::min( 31, ( int ) sumAbs - sumSub ), 0 );
897
0
      state.m_goRicePar[stateId]
898
0
                     = g_auiGoRiceParsCoeff[sumAll];
899
900
0
      if( state.remRegBins[stateId] < 4 )
901
0
      {
902
0
        state.m_goRiceZero[stateId] = g_auiGoRicePosCoeff0( stateId, state.m_goRicePar[stateId] );
903
0
      }
904
0
    }
905
0
  }
906
907
  static void update1State( int stateId, const DQIntern::ScanInfo& scanInfo, const DQIntern::Decisions& decisions, DQIntern::StateMem& curr, DQIntern::StateMem& prev )
908
0
  {
909
0
    curr.rdCost[stateId] = decisions.rdCost[stateId];
910
0
    if( decisions.prevId[stateId] > -2 )
911
0
    {
912
0
      if( decisions.prevId[stateId] >= 0 )
913
0
      {
914
0
        const int prevId          = decisions.prevId[stateId];
915
0
        curr.numSig[stateId]      = prev.numSig[prevId] + !!decisions.absLevel[stateId];
916
0
        curr.refSbbCtxId[stateId] = prev.refSbbCtxId[prevId];
917
0
        curr.sbbBits0[stateId]    = prev.sbbBits0[prevId];
918
0
        curr.sbbBits1[stateId]    = prev.sbbBits1[prevId];
919
0
        curr.remRegBins[stateId]  = prev.remRegBins[prevId] - 1;
920
921
0
        if( curr.remRegBins[stateId] >= 4 )
922
0
        {
923
0
          curr.remRegBins[stateId] -= ( decisions.absLevel[stateId] < 2 ? decisions.absLevel[stateId] : 3 );
924
0
        }
925
926
0
        for( int i = 0; i < 16; i++ )
927
0
        {
928
0
          curr.tplAcc[i][stateId] = prev.tplAcc[i][prevId];
929
0
          curr.sum1st[i][stateId] = prev.sum1st[i][prevId];
930
0
          curr.absVal[i][stateId] = prev.absVal[i][prevId];
931
0
        }
932
0
      }
933
0
      else
934
0
      {
935
0
        curr.numSig[stateId]      =  1;
936
0
        curr.refSbbCtxId[stateId] = -1;
937
0
        curr.remRegBins[stateId]  = prev.initRemRegBins;
938
0
        curr.remRegBins[stateId] -= ( decisions.absLevel[stateId] < 2 ? decisions.absLevel[stateId] : 3 );
939
940
0
        for( int i = 0; i < 16; i++ )
941
0
        {
942
0
          curr.tplAcc[i][stateId] = 0;
943
0
          curr.sum1st[i][stateId] = 0;
944
0
          curr.absVal[i][stateId] = 0;
945
0
        }
946
0
      }
947
948
0
      if( decisions.absLevel[stateId] )
949
0
      {
950
0
        curr.absVal[scanInfo.insidePos][stateId] = ( uint8_t ) std::min<TCoeff>( 126 + ( decisions.absLevel[stateId] & 1 ), decisions.absLevel[stateId] );
951
952
0
        if( scanInfo.currNbInfoSbb.numInv )
953
0
        {
954
0
          int min4_or_5 = std::min<TCoeff>( 4 + ( decisions.absLevel[stateId] & 1 ), decisions.absLevel[stateId] );
955
956
0
          auto adds8 = []( uint8_t a, uint8_t b )
957
0
          {
958
0
            uint8_t c = a + b;
959
0
            if( c < a ) c = -1;
960
0
            return c;
961
0
          };
962
963
0
          auto update_deps = [&]( int k )
964
0
          {
965
0
            curr.tplAcc[scanInfo.currNbInfoSbb.invInPos[k]][stateId] += 32 + min4_or_5;
966
0
            curr.sum1st[scanInfo.currNbInfoSbb.invInPos[k]][stateId] = adds8( curr.sum1st[scanInfo.currNbInfoSbb.invInPos[k]][stateId], decisions.absLevel[stateId] );
967
0
          };
968
969
0
          switch( scanInfo.currNbInfoSbb.numInv )
970
0
          {
971
0
          default:
972
0
          case 5:
973
0
            update_deps( 4 );
974
0
          case 4:
975
0
            update_deps( 3 );
976
0
          case 3:
977
0
            update_deps( 2 );
978
0
          case 2:
979
0
            update_deps( 1 );
980
0
          case 1:
981
0
            update_deps( 0 );
982
0
          }
983
0
        }
984
0
      }
985
986
0
      if( curr.remRegBins[stateId] >= 4 )
987
0
      {
988
0
        TCoeff  sumAbs1 = curr.tplAcc[scanInfo.nextInsidePos][stateId] & 31;
989
0
        TCoeff  sumNum  = curr.tplAcc[scanInfo.nextInsidePos][stateId] >> 5u;
990
0
        int sumGt1 = sumAbs1 - sumNum;
991
992
0
        curr.ctx.sig[stateId] = scanInfo.sigCtxOffsetNext + std::min( ( sumAbs1 + 1 ) >> 1, 3 );
993
0
        curr.ctx.cff[stateId] = scanInfo.gtxCtxOffsetNext + std::min( sumGt1, 4 );
994
0
      }
995
0
      else
996
0
      {
997
0
        curr.anyRemRegBinsLt4 = true;
998
0
      }
999
0
    }
1000
0
  }
1001
1002
  static void update1StateEOS( const int stateId, const DQIntern::ScanInfo& scanInfo, const DQIntern::Decisions& decisions, const DQIntern::StateMem& skip, DQIntern::StateMem& curr, DQIntern::StateMem& prev, DQIntern::CommonCtx& commonCtx )
1003
0
  {
1004
0
    curr.rdCost[stateId] = decisions.rdCost[stateId];
1005
1006
0
    if( decisions.prevId[stateId] > -2 )
1007
0
    {
1008
0
      if( decisions.prevId[stateId] >= 4 )
1009
0
      {
1010
0
        CHECK( decisions.absLevel[stateId] != 0, "cannot happen" );
1011
1012
0
        const int prevId          = decisions.prevId[stateId] - 4;
1013
0
        curr.numSig    [stateId]  = 0;
1014
0
        curr.remRegBins[stateId]  = skip.remRegBins[prevId];
1015
0
        curr.refSbbCtxId[stateId] = prevId;
1016
1017
0
        for( int i = 0; i < 16; i++ )
1018
0
        {
1019
0
          curr.absVal[i][stateId] = 0;
1020
0
        }
1021
0
      }
1022
0
      else if( decisions.prevId[stateId] >= 0 )
1023
0
      {
1024
0
        const int prevId          = decisions.prevId[stateId];
1025
0
        curr.numSig[stateId]      = prev.numSig[prevId] + !!decisions.absLevel[stateId];
1026
0
        curr.refSbbCtxId[stateId] = prev.refSbbCtxId[prevId];
1027
0
        curr.remRegBins[stateId]  = prev.remRegBins[prevId] - 1;
1028
1029
0
        if( curr.remRegBins[stateId] >= 4 )
1030
0
        {
1031
0
          curr.remRegBins[stateId] -= ( decisions.absLevel[stateId] < 2 ? decisions.absLevel[stateId] : 3 );
1032
0
        }
1033
1034
0
        for( int i = 0; i < 16; i++ )
1035
0
        {
1036
0
          curr.absVal[i][stateId] = prev.absVal[i][prevId];
1037
0
        }
1038
0
      }
1039
0
      else
1040
0
      {
1041
0
        curr.numSig[stateId]      =  1;
1042
0
        curr.refSbbCtxId[stateId] = -1;
1043
0
        curr.remRegBins[stateId]  = prev.initRemRegBins;
1044
0
        curr.remRegBins[stateId] -= ( decisions.absLevel[stateId] < 2 ? decisions.absLevel[stateId] : 3 );
1045
1046
0
        for( int i = 0; i < 16; i++ )
1047
0
        {
1048
0
          curr.absVal[i][stateId] = 0;
1049
0
        }
1050
0
      }
1051
1052
0
      curr.absVal[scanInfo.insidePos][stateId] = ( uint8_t ) std::min<TCoeff>( 126 + ( decisions.absLevel[stateId] & 1 ), decisions.absLevel[stateId] );
1053
1054
0
      uint8_t* levels[4];
1055
0
      commonCtx.getLevelPtrs( scanInfo, levels[0], levels[1], levels[2], levels[3] );
1056
0
      for( int i = 0; i < 16; i++ )
1057
0
      {
1058
        // save abs levels to commonCtx
1059
0
        levels[stateId][i] = curr.absVal[i][stateId];
1060
        // clean the SBB ctx
1061
0
        curr.tplAcc[i][stateId] = 0;
1062
0
        curr.sum1st[i][stateId] = 0;
1063
0
        curr.absVal[i][stateId] = 0;
1064
0
      }
1065
1066
0
      commonCtx.update( scanInfo, curr.refSbbCtxId[stateId], stateId, curr );
1067
1068
0
      curr.numSig[stateId] = 0;
1069
1070
0
      if( curr.remRegBins[stateId] >= 4 )
1071
0
      {
1072
0
        TCoeff  sumAbs1 = curr.tplAcc[scanInfo.nextInsidePos][stateId] & 31;
1073
0
        TCoeff  sumNum  = curr.tplAcc[scanInfo.nextInsidePos][stateId] >> 5u;
1074
0
        int sumGt1 = sumAbs1 - sumNum;
1075
1076
0
        curr.ctx.sig[stateId] = scanInfo.sigCtxOffsetNext + std::min( ( sumAbs1 + 1 ) >> 1, 3 );
1077
0
        curr.ctx.cff[stateId] = scanInfo.gtxCtxOffsetNext + std::min( sumGt1, 4 );
1078
0
      }
1079
0
      else
1080
0
      {
1081
0
        curr.anyRemRegBinsLt4 = true;
1082
0
      }
1083
0
    }
1084
0
  }
1085
1086
  static void updateStates( const DQIntern::ScanInfo& scanInfo, const DQIntern::Decisions& decisions, DQIntern::StateMem& curr )
1087
0
  {
1088
0
    DQIntern::StateMem prev = curr;
1089
0
    curr.anyRemRegBinsLt4   = false;
1090
1091
0
    update1State( 0, scanInfo, decisions, curr, prev );
1092
0
    update1State( 1, scanInfo, decisions, curr, prev );
1093
0
    update1State( 2, scanInfo, decisions, curr, prev );
1094
0
    update1State( 3, scanInfo, decisions, curr, prev );
1095
1096
0
    curr.cffBitsCtxOffset = scanInfo.gtxCtxOffsetNext;
1097
0
  }
1098
1099
  static void updateStatesEOS( const DQIntern::ScanInfo& scanInfo, const DQIntern::Decisions& decisions, const DQIntern::StateMem& skip, DQIntern::StateMem& curr, DQIntern::CommonCtx& commonCtx )
1100
0
  {
1101
0
    DQIntern::StateMem prev = curr;
1102
0
    curr.anyRemRegBinsLt4   = false;
1103
1104
0
    update1StateEOS( 0, scanInfo, decisions, skip, curr, prev, commonCtx );
1105
0
    update1StateEOS( 1, scanInfo, decisions, skip, curr, prev, commonCtx );
1106
0
    update1StateEOS( 2, scanInfo, decisions, skip, curr, prev, commonCtx );
1107
0
    update1StateEOS( 3, scanInfo, decisions, skip, curr, prev, commonCtx );
1108
1109
0
    curr.cffBitsCtxOffset = scanInfo.gtxCtxOffsetNext;
1110
0
  }
1111
}; // namespace DQIntern
1112
1113
static const DQIntern::Decisions startDec[2] =
1114
{
1115
  DQIntern::Decisions
1116
  {
1117
    { DQIntern::rdCostInit >> 2, DQIntern::rdCostInit >> 2, DQIntern::rdCostInit >> 2, DQIntern::rdCostInit >> 2 },
1118
    { -1, -1, -1, -1 },
1119
    { -2, -2, -2, -2 },
1120
  },
1121
  DQIntern::Decisions
1122
  {
1123
    { DQIntern::rdCostInit >> 2, DQIntern::rdCostInit >> 2, DQIntern::rdCostInit >> 2, DQIntern::rdCostInit >> 2 },
1124
    { 0, 0, 0, 0 },
1125
    { 4, 5, 6, 7 },
1126
  }
1127
};
1128
1129
void DepQuant::xQuantDQ( TransformUnit& tu, const CCoeffBuf& srcCoeff, const ComponentID compID, const QpParam& cQP, const double lambda, const Ctx& ctx, TCoeff& absSum, bool enableScalingLists, int* quantCoeff )
1130
0
{
1131
0
  using namespace DQIntern;
1132
  
1133
  //===== reset / pre-init =====
1134
0
  const TUParameters& tuPars  = *m_scansRom->getTUPars( tu.blocks[compID], compID );
1135
0
  m_quant.initQuantBlock    ( tu, compID, cQP, lambda );
1136
0
  TCoeffSig*    qCoeff      = tu.getCoeffs( compID ).buf;
1137
0
  const TCoeff* tCoeff      = srcCoeff.buf;
1138
0
  const int     numCoeff    = tu.blocks[compID].area();
1139
0
  ::memset( qCoeff, 0x00, numCoeff * sizeof( TCoeffSig ) );
1140
0
  absSum                    = 0;
1141
1142
0
  const CompArea& area      = tu.blocks[ compID ];
1143
0
  const uint32_t  width     = area.width;
1144
0
  const uint32_t  height    = area.height;
1145
0
  const uint32_t  lfnstIdx  = tu.cu->lfnstIdx;
1146
  //===== scaling matrix ====
1147
  //const int         qpDQ = cQP.Qp + 1;
1148
  //const int         qpPer = qpDQ / 6;
1149
  //const int         qpRem = qpDQ - 6 * qpPer;
1150
1151
  //TCoeff thresTmp = thres;
1152
0
  bool zeroOut = false;
1153
0
  bool zeroOutforThres = false;
1154
0
  int effWidth = tuPars.m_width, effHeight = tuPars.m_height;
1155
0
  if( ( tu.mtsIdx[compID] > MTS_SKIP || ( tu.cs->sps->MTS && tu.cu->sbtInfo != 0 && tuPars.m_height <= 32 && tuPars.m_width <= 32 ) ) && compID == COMP_Y )
1156
0
  {
1157
0
    effHeight = ( tuPars.m_height == 32 ) ? 16 : tuPars.m_height;
1158
0
    effWidth  = ( tuPars.m_width  == 32 ) ? 16 : tuPars.m_width;
1159
0
    zeroOut   = ( effHeight < tuPars.m_height || effWidth < tuPars.m_width );
1160
0
  }
1161
0
  zeroOutforThres = zeroOut || ( 32 < tuPars.m_height || 32 < tuPars.m_width );
1162
  //===== find first test position =====
1163
0
  int firstTestPos = std::min<int>( tuPars.m_width, JVET_C0024_ZERO_OUT_TH ) * std::min<int>( tuPars.m_height, JVET_C0024_ZERO_OUT_TH ) - 1;
1164
0
  if( lfnstIdx > 0 && tu.mtsIdx[compID] != MTS_SKIP && width >= 4 && height >= 4 )
1165
0
  {
1166
0
    firstTestPos = ( ( width == 4 && height == 4 ) || ( width == 8 && height == 8 ) )  ? 7 : 15 ;
1167
0
  }
1168
1169
0
  const TCoeff defaultQuantisationCoefficient = (TCoeff)m_quant.getQScale();
1170
0
  const TCoeff thres = m_quant.getLastThreshold();
1171
0
  const int zeroOutWidth  = ( tuPars.m_width  == 32 && zeroOut ) ? 16 : 32;
1172
0
  const int zeroOutHeight = ( tuPars.m_height == 32 && zeroOut ) ? 16 : 32;
1173
1174
0
  if( enableScalingLists )
1175
0
  {
1176
0
    for( ; firstTestPos >= 0; firstTestPos-- )
1177
0
    {
1178
0
      if( zeroOutforThres && ( tuPars.m_scanId2BlkPos[firstTestPos].x >= zeroOutWidth || tuPars.m_scanId2BlkPos[firstTestPos].y >= zeroOutHeight ) ) continue;
1179
1180
0
      const TCoeff thresTmp = TCoeff( thres / ( 4 * quantCoeff[tuPars.m_scanId2BlkPos[firstTestPos].idx] ) );
1181
1182
0
      if( abs( tCoeff[tuPars.m_scanId2BlkPos[firstTestPos].idx] ) > thresTmp ) break;
1183
0
    }
1184
0
  }
1185
0
  else
1186
0
  {
1187
0
    const TCoeff defaultTh = TCoeff( thres / ( defaultQuantisationCoefficient << 2 ) );
1188
1189
0
    m_findFirstPos( firstTestPos, tCoeff, tuPars, defaultTh, zeroOutforThres, zeroOutWidth, zeroOutHeight );
1190
0
  }
1191
1192
0
  if( firstTestPos < 0 )
1193
0
  {
1194
0
    tu.lastPos[compID] = -1;
1195
0
    return;
1196
0
  }
1197
1198
  //===== real init =====
1199
0
  RateEstimator::initCtx( tuPars, tu, compID, ctx.getFracBitsAcess() );
1200
0
  m_commonCtx.reset( tuPars, *this );
1201
0
  for( int k = 0; k < 4; k++ )
1202
0
  {
1203
0
    DQIntern::initStates( k, m_state_curr );
1204
0
    DQIntern::initStates( k, m_state_skip );
1205
0
    m_state_curr.m_sigFracBitsArray[k] = RateEstimator::sigFlagBits(k);
1206
0
  }
1207
1208
0
  m_state_curr.m_gtxFracBitsArray = RateEstimator::gtxFracBits();
1209
  //memset( m_state_curr.tplAcc, 0, sizeof( m_state_curr.tplAcc ) ); // will be set in updateStates{,EOS} before first access
1210
0
  memset( m_state_curr.sum1st, 0, sizeof( m_state_curr.sum1st ) );   // will be accessed in setRiceParam before updateState{,EOS}
1211
  //memset( m_state_curr.absVal, 0, sizeof( m_state_curr.absVal ) ); // will be set in updateStates{,EOS} before first access
1212
1213
0
  const int numCtx = isLuma( compID ) ? 21 : 11;
1214
0
  const CoeffFracBits* const cffBits = gtxFracBits();
1215
0
  for( int i = 0; i < numCtx; i++ )
1216
0
  {
1217
0
    m_state_curr.cffBits1[i] = cffBits[i].bits[1];
1218
0
  }
1219
1220
0
  int effectWidth  = std::min( 32, effWidth );
1221
0
  int effectHeight = std::min( 32, effHeight );
1222
0
  m_state_curr.initRemRegBins   = ( effectWidth * effectHeight * MAX_TU_LEVEL_CTX_CODED_BIN_CONSTRAINT ) / 16;
1223
0
  m_state_curr.anyRemRegBinsLt4 = true; // for the first coeff use scalar impl., because it check against the init state, which
1224
                                        // prohibits some paths
1225
1226
  //===== populate trellis =====
1227
0
  for( int scanIdx = firstTestPos; scanIdx >= 0; scanIdx-- )
1228
0
  {
1229
0
    const ScanInfo& scanInfo = tuPars.m_scanInfo[ scanIdx ];
1230
0
    if( enableScalingLists )
1231
0
    {
1232
0
      m_quant.initQuantBlock( tu, compID, cQP, lambda, quantCoeff[scanInfo.rasterPos] );
1233
0
      xDecideAndUpdate( abs( tCoeff[scanInfo.rasterPos] ), scanInfo, zeroOut && ( scanInfo.posX >= effWidth || scanInfo.posY >= effHeight ), quantCoeff[scanInfo.rasterPos] );
1234
0
    }
1235
0
    else
1236
0
      xDecideAndUpdate( abs( tCoeff[scanInfo.rasterPos] ), scanInfo, zeroOut && ( scanInfo.posX >= effWidth || scanInfo.posY >= effHeight ), defaultQuantisationCoefficient );
1237
0
  }
1238
1239
  //===== find best path =====
1240
0
  int       prevId      = -1;
1241
0
  int64_t   minPathCost =  0;
1242
0
  for( int8_t stateId = 0; stateId < 4; stateId++ )
1243
0
  {
1244
0
    int64_t pathCost = m_trellis[0][0].rdCost[stateId];
1245
0
    if( pathCost < minPathCost )
1246
0
    {
1247
0
      prevId      = stateId;
1248
0
      minPathCost = pathCost;
1249
0
    }
1250
0
  }
1251
1252
  //===== backward scanning =====
1253
0
  int scanIdx = 0;
1254
0
  for( ; prevId >= 0; scanIdx++ )
1255
0
  {
1256
0
    TCoeffSig absLevel = m_trellis[scanIdx][prevId >> 2].absLevel[prevId & 3];
1257
0
    int32_t blkpos     = tuPars.m_scanId2BlkPos[scanIdx].idx;
1258
0
    qCoeff[ blkpos ]   = TCoeffSig( tCoeff[blkpos] < 0 ? -absLevel : absLevel );
1259
0
    absSum            += absLevel;
1260
0
    prevId             = m_trellis[scanIdx][prevId >> 2].prevId[prevId & 3];
1261
0
  }
1262
1263
0
  tu.lastPos[compID] = scanIdx - 1;
1264
0
}
1265
1266
void DepQuant::xDecide( const DQIntern::ScanInfo& scanInfo, const TCoeff absCoeff, const int lastOffset, DQIntern::Decisions& decisions, bool zeroOut, int quantCoeff )
1267
0
{
1268
0
  using namespace DQIntern;
1269
1270
0
  ::memcpy( &decisions, startDec, sizeof( Decisions ) );
1271
1272
0
  StateMem& skip = m_state_skip;
1273
1274
0
  if( zeroOut )
1275
0
  {
1276
0
    if( scanInfo.spt == SCAN_EOCSBB )
1277
0
    {
1278
0
      checkRdCostSkipSbbZeroOut( 0, decisions, 0, skip );
1279
0
      checkRdCostSkipSbbZeroOut( 1, decisions, 1, skip );
1280
0
      checkRdCostSkipSbbZeroOut( 2, decisions, 2, skip );
1281
0
      checkRdCostSkipSbbZeroOut( 3, decisions, 3, skip );
1282
0
    }
1283
0
    return;
1284
0
  }
1285
1286
0
  StateMem& prev = m_state_curr;
1287
1288
  /// start inline prequant
1289
0
  int64_t scaledOrg = int64_t( absCoeff ) * quantCoeff;
1290
0
  TCoeff  qIdx      = TCoeff( ( scaledOrg + m_quant.m_QAdd ) >> m_quant.m_QShift );
1291
1292
0
  if( qIdx < 0 )
1293
0
  {
1294
0
    int64_t scaledAdd = m_quant.m_DistStepAdd - scaledOrg * m_quant.m_DistOrgFact;
1295
0
    int64_t pq_a_dist = ( ( scaledAdd + 0 * m_quant.m_DistStepAdd ) * 1 + m_quant.m_DistAdd ) >> m_quant.m_DistShift;
1296
0
    int64_t pq_b_dist = ( ( scaledAdd + 1 * m_quant.m_DistStepAdd ) * 2 + m_quant.m_DistAdd ) >> m_quant.m_DistShift;
1297
    /// stop inline prequant
1298
1299
0
    if( prev.anyRemRegBinsLt4 )
1300
0
    {
1301
0
      setRiceParam( 0, scanInfo, prev, false );
1302
0
      checkRdCostsOdd1( 0, scanInfo.spt, pq_b_dist, decisions, 2, 0, prev );
1303
1304
0
      setRiceParam( 1, scanInfo, prev, false );
1305
0
      checkRdCostsOdd1( 1, scanInfo.spt, pq_b_dist, decisions, 0, 2, prev );
1306
1307
0
      setRiceParam( 2, scanInfo, prev, false );
1308
0
      checkRdCostsOdd1( 2, scanInfo.spt, pq_a_dist, decisions, 3, 1, prev );
1309
1310
0
      setRiceParam( 3, scanInfo, prev, false );
1311
0
      checkRdCostsOdd1( 3, scanInfo.spt, pq_a_dist, decisions, 1, 3, prev );
1312
0
    }
1313
0
    else
1314
0
    {
1315
      // has to be called as a first check, assumes no decision has been made yet
1316
0
      m_checkAllRdCostsOdd1( scanInfo.spt, pq_a_dist, pq_b_dist, decisions, prev );
1317
0
    }
1318
1319
0
    checkRdCostStart( lastOffset, PQData{ 1, pq_b_dist }, decisions, 2, prev );
1320
0
  }
1321
0
  else
1322
0
  {
1323
    /// start inline prequant
1324
0
    qIdx = std::max<TCoeff>( 1, std::min<TCoeff>( m_quant.m_maxQIdx, qIdx ) );
1325
0
    int64_t scaledAdd = qIdx * m_quant.m_DistStepAdd - scaledOrg * m_quant.m_DistOrgFact;
1326
1327
0
    PQData  pqData[4];
1328
1329
0
    PQData& pq_a = pqData[( qIdx + 0 ) & 3];
1330
0
    PQData& pq_b = pqData[( qIdx + 1 ) & 3];
1331
0
    PQData& pq_c = pqData[( qIdx + 2 ) & 3];
1332
0
    PQData& pq_d = pqData[( qIdx + 3 ) & 3];
1333
1334
0
    pq_a.deltaDist = ( ( scaledAdd + 0 * m_quant.m_DistStepAdd ) * ( qIdx + 0 ) + m_quant.m_DistAdd ) >> m_quant.m_DistShift;
1335
0
    pq_a.absLevel = ( qIdx + 1 ) >> 1;
1336
1337
0
    pq_b.deltaDist = ( ( scaledAdd + 1 * m_quant.m_DistStepAdd ) * ( qIdx + 1 ) + m_quant.m_DistAdd ) >> m_quant.m_DistShift;
1338
0
    pq_b.absLevel = ( qIdx + 2 ) >> 1;
1339
1340
0
    pq_c.deltaDist = ( ( scaledAdd + 2 * m_quant.m_DistStepAdd ) * ( qIdx + 2 ) + m_quant.m_DistAdd ) >> m_quant.m_DistShift;
1341
0
    pq_c.absLevel = ( qIdx + 3 ) >> 1;
1342
1343
0
    pq_d.deltaDist = ( ( scaledAdd + 3 * m_quant.m_DistStepAdd ) * ( qIdx + 3 ) + m_quant.m_DistAdd ) >> m_quant.m_DistShift;
1344
0
    pq_d.absLevel = ( qIdx + 4 ) >> 1;
1345
    /// stop inline prequant
1346
1347
0
    bool cff02ge4 = pqData[0].absLevel >= 4/* || pqData[2].absLevel >= 4 */;
1348
0
    bool cff13ge4 = /* pqData[1].absLevel >= 4 || */ pqData[3].absLevel >= 4;
1349
1350
0
    if( cff02ge4 || cff13ge4 || prev.anyRemRegBinsLt4 )
1351
0
    {
1352
0
      if( prev.anyRemRegBinsLt4 || cff02ge4 )
1353
0
      {
1354
0
        setRiceParam( 0, scanInfo, prev, cff02ge4 );
1355
0
        setRiceParam( 1, scanInfo, prev, cff02ge4 );
1356
0
      }
1357
1358
0
      if( prev.anyRemRegBinsLt4 || cff13ge4 )
1359
0
      {
1360
0
        setRiceParam( 2, scanInfo, prev, cff13ge4 );
1361
0
        setRiceParam( 3, scanInfo, prev, cff13ge4 );
1362
0
      }
1363
1364
0
      checkRdCosts( 0, scanInfo.spt, pqData[0], pqData[2], decisions, 0, 2, prev );
1365
0
      checkRdCosts( 1, scanInfo.spt, pqData[0], pqData[2], decisions, 2, 0, prev );
1366
0
      checkRdCosts( 2, scanInfo.spt, pqData[3], pqData[1], decisions, 1, 3, prev );
1367
0
      checkRdCosts( 3, scanInfo.spt, pqData[3], pqData[1], decisions, 3, 1, prev );
1368
0
    }
1369
0
    else
1370
0
    {
1371
      // has to be called as a first check, assumes no decision has been made yet
1372
0
      m_checkAllRdCosts( scanInfo.spt, pqData, decisions, prev );
1373
0
    }
1374
1375
0
    checkRdCostStart( lastOffset, pqData[0], decisions, 0, prev );
1376
0
    checkRdCostStart( lastOffset, pqData[2], decisions, 2, prev );
1377
0
  }
1378
1379
0
  if( scanInfo.spt == SCAN_EOCSBB )
1380
0
  {
1381
0
    checkRdCostSkipSbb( 0, decisions, 0, skip );
1382
0
    checkRdCostSkipSbb( 1, decisions, 1, skip );
1383
0
    checkRdCostSkipSbb( 2, decisions, 2, skip );
1384
0
    checkRdCostSkipSbb( 3, decisions, 3, skip );
1385
0
  }
1386
0
}
1387
1388
void DepQuant::xDecideAndUpdate( const TCoeff absCoeff, const DQIntern::ScanInfo& scanInfo, bool zeroOut, int quantCoeff )
1389
0
{
1390
0
  using namespace DQIntern;
1391
1392
0
  Decisions* decisions = &m_trellis[scanInfo.scanIdx][0];
1393
1394
0
  xDecide( scanInfo, absCoeff, lastOffset( scanInfo.scanIdx ), *decisions, zeroOut, quantCoeff );
1395
1396
0
  if( scanInfo.scanIdx )
1397
0
  {
1398
0
    if( scanInfo.spt == SCAN_SOCSBB )
1399
0
    {
1400
0
      memcpy( &m_state_skip, &m_state_curr, DQIntern::StateMemSkipCpySize );
1401
0
    }
1402
1403
0
    if( scanInfo.insidePos == 0 )
1404
0
    {
1405
0
      m_commonCtx.swap();
1406
0
      m_updateStatesEOS( scanInfo, *decisions, m_state_skip, m_state_curr, m_commonCtx );
1407
0
      ::memcpy( decisions + 1, decisions, sizeof( Decisions ) );
1408
0
    }
1409
0
    else if( !zeroOut )
1410
0
    {
1411
0
      m_updateStates( scanInfo, *decisions, m_state_curr );
1412
0
    }
1413
0
  }
1414
0
}
1415
1416
void DepQuant::xDequantDQ( const TransformUnit& tu,  CoeffBuf& recCoeff, const ComponentID compID, const QpParam& cQP, bool enableScalingLists, int* piDequantCoef )
1417
0
{
1418
0
  m_quant.dequantBlock( tu, compID, cQP, recCoeff, enableScalingLists, piDequantCoef );
1419
0
}
1420
1421
0
DepQuant::DepQuant( const Quant* other, bool enc, bool useScalingLists, bool enableOpt ) : QuantRDOQ2( other, useScalingLists ), RateEstimator(), m_commonCtx()
1422
0
{
1423
0
  const DepQuant* dq = dynamic_cast<const DepQuant*>( other );
1424
0
  CHECK( other && !dq, "The DepQuant cast must be successfull!" );
1425
1426
0
  if( !dq )
1427
0
  {
1428
0
    m_scansRom = std::make_shared<DQIntern::Rom>();
1429
0
    m_scansRom->init();
1430
0
  }
1431
0
  else
1432
0
  {
1433
0
    m_scansRom = dq->m_scansRom;
1434
0
  }
1435
1436
0
  for( int t = 0; t < ( MAX_TB_SIZEY * MAX_TB_SIZEY ); t++ )
1437
0
  {
1438
0
    memcpy( m_trellis[t], startDec, sizeof( startDec ) );
1439
0
  }
1440
1441
0
  m_checkAllRdCosts     = DQIntern::checkAllRdCosts;
1442
0
  m_checkAllRdCostsOdd1 = DQIntern::checkAllRdCostsOdd1;
1443
0
  m_updateStatesEOS     = DQIntern::updateStatesEOS;
1444
0
  m_updateStates        = DQIntern::updateStates;
1445
0
  m_findFirstPos        = DQIntern::findFirstPos;
1446
1447
0
  if( enableOpt )
1448
0
  {
1449
0
#if defined( TARGET_SIMD_X86 ) && ENABLE_SIMD_OPT_QUANT
1450
0
    initDepQuantX86();
1451
0
#endif
1452
#if defined( TARGET_SIMD_ARM ) && ENABLE_SIMD_OPT_QUANT
1453
    initDepQuantARM();
1454
#endif
1455
0
  }
1456
0
}
1457
1458
DepQuant::~DepQuant()
1459
0
{
1460
0
}
1461
1462
void DepQuant::quant( TransformUnit& tu, const ComponentID compID, const CCoeffBuf& pSrc, TCoeff& uiAbsSum, const QpParam& cQP, const Ctx& ctx )
1463
0
{
1464
0
  if( tu.cs->picture->useSelectiveRdoq && !xNeedRDOQ( tu, compID, pSrc, cQP ) )
1465
0
  {
1466
0
    tu.lastPos[compID] = -1;
1467
0
    uiAbsSum           =  0;
1468
0
  }
1469
0
  else if( tu.cs->slice->depQuantEnabled && tu.mtsIdx[compID] != MTS_SKIP )
1470
0
  {
1471
    //===== scaling matrix ====
1472
0
    const int         qpDQ            = cQP.Qp(tu.mtsIdx[compID]==MTS_SKIP) + 1;
1473
0
    const int         qpPer           = qpDQ / 6;
1474
0
    const int         qpRem           = qpDQ - 6 * qpPer;
1475
0
    const CompArea    &rect           = tu.blocks[compID];
1476
0
    const int         width           = rect.width;
1477
0
    const int         height          = rect.height;
1478
0
    uint32_t          scalingListType = getScalingListType(tu.cu->predMode, compID);
1479
0
    CHECK(scalingListType >= SCALING_LIST_NUM, "Invalid scaling list");
1480
0
    const uint32_t    log2TrWidth     = Log2(width);
1481
0
    const uint32_t    log2TrHeight    = Log2(height);
1482
0
    const bool isLfnstApplied         = tu.cu->lfnstIdx > 0 && (CU::isSepTree(*tu.cu) ? true : isLuma(compID));
1483
0
    const bool enableScalingLists     = getUseScalingList(width, height, (tu.mtsIdx[compID] == MTS_SKIP), isLfnstApplied);
1484
0
    xQuantDQ( tu, pSrc, compID, cQP, Quant::m_dLambda, ctx, uiAbsSum, enableScalingLists, Quant::getQuantCoeff(scalingListType, qpRem, log2TrWidth, log2TrHeight) );
1485
0
  }
1486
0
  else
1487
0
  {
1488
0
    QuantRDOQ2::quant( tu, compID, pSrc, uiAbsSum, cQP, ctx );
1489
0
  }
1490
0
}
1491
1492
void DepQuant::dequant( const TransformUnit& tu, CoeffBuf& dstCoeff, const ComponentID compID, const QpParam& cQP )
1493
0
{
1494
0
  if( tu.cs->slice->depQuantEnabled && (tu.mtsIdx[compID] != MTS_SKIP) )
1495
0
  {
1496
0
    const int         qpDQ            = cQP.Qp(tu.mtsIdx[compID]==MTS_SKIP) + 1;
1497
0
    const int         qpPer           = qpDQ / 6;
1498
0
    const int         qpRem           = qpDQ - 6 * qpPer;
1499
0
    const CompArea    &rect           = tu.blocks[compID];
1500
0
    const int         width           = rect.width;
1501
0
    const int         height          = rect.height;
1502
0
    uint32_t          scalingListType = getScalingListType(tu.cu->predMode, compID);
1503
0
    CHECK(scalingListType >= SCALING_LIST_NUM, "Invalid scaling list");
1504
0
    const uint32_t    log2TrWidth    = Log2(width);
1505
0
    const uint32_t    log2TrHeight   = Log2(height);
1506
0
    const bool isLfnstApplied        = tu.cu->lfnstIdx > 0 && (CU::isSepTree(*tu.cu) ? true : isLuma(compID));
1507
0
    const bool enableScalingLists    = getUseScalingList(width, height, (tu.mtsIdx[compID] == MTS_SKIP), isLfnstApplied);
1508
0
    xDequantDQ( tu, dstCoeff, compID, cQP, enableScalingLists, Quant::getDequantCoeff(scalingListType, qpRem, log2TrWidth, log2TrHeight) );
1509
0
  }
1510
0
  else
1511
0
  {
1512
0
    QuantRDOQ::dequant( tu, dstCoeff, compID, cQP );
1513
0
  }
1514
0
}
1515
1516
void DepQuant::init( int rdoq, bool useRDOQTS, int thrVal )
1517
0
{
1518
0
  QuantRDOQ2::init( rdoq, useRDOQTS, thrVal );
1519
0
  m_quant.init( thrVal );
1520
0
}
1521
1522
} // namespace vvenc
1523
1524
//! \}
1525