Coverage Report

Created: 2026-04-01 07:49

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/vvenc/source/Lib/EncoderLib/EncSlice.cpp
Line
Count
Source
1
/* -----------------------------------------------------------------------------
2
The copyright in this software is being made available under the Clear BSD
3
License, included below. No patent rights, trademark rights and/or 
4
other Intellectual Property Rights other than the copyrights concerning 
5
the Software are granted under this license.
6
7
The Clear BSD License
8
9
Copyright (c) 2019-2026, Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. & The VVenC Authors.
10
All rights reserved.
11
12
Redistribution and use in source and binary forms, with or without modification,
13
are permitted (subject to the limitations in the disclaimer below) provided that
14
the following conditions are met:
15
16
     * Redistributions of source code must retain the above copyright notice,
17
     this list of conditions and the following disclaimer.
18
19
     * Redistributions in binary form must reproduce the above copyright
20
     notice, this list of conditions and the following disclaimer in the
21
     documentation and/or other materials provided with the distribution.
22
23
     * Neither the name of the copyright holder nor the names of its
24
     contributors may be used to endorse or promote products derived from this
25
     software without specific prior written permission.
26
27
NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
28
THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
29
CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
31
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
32
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
33
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
34
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
35
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
36
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38
POSSIBILITY OF SUCH DAMAGE.
39
40
41
------------------------------------------------------------------------------------------- */
42
43
44
/** \file     EncSlice.cpp
45
    \brief    slice encoder class
46
*/
47
48
#include "EncSlice.h"
49
#include "EncStage.h"
50
#include "EncLib.h"
51
#include "EncPicture.h"
52
#include "BitAllocation.h"
53
#include "CommonLib/UnitTools.h"
54
#include "CommonLib/Picture.h"
55
#include "CommonLib/TimeProfiler.h"
56
#include "CommonLib/dtrace_codingstruct.h"
57
#include "Utilities/NoMallocThreadPool.h"
58
59
#include <math.h>
60
#include "vvenc/vvencCfg.h"
61
62
//! \ingroup EncoderLib
63
//! \{
64
65
namespace vvenc {
66
67
#ifdef TRACE_ENABLE_ITT
68
static const __itt_domain* itt_domain_encode              = __itt_domain_create( "Encode" );
69
static const __itt_string_handle* itt_handle_ctuEncode    = __itt_string_handle_create( "Encode_CTU" );
70
static const __itt_string_handle* itt_handle_rspLfVer     = __itt_string_handle_create( "RspLfVer_CTU" );
71
static const __itt_string_handle* itt_handle_lfHor        = __itt_string_handle_create( "LfHor_CTU" );
72
static const __itt_string_handle* itt_handle_sao          = __itt_string_handle_create( "SAO_CTU" );
73
static const __itt_string_handle* itt_handle_alf_stat     = __itt_string_handle_create( "ALF_CTU_STAT" );
74
static const __itt_string_handle* itt_handle_alf_derive   = __itt_string_handle_create( "ALF_DERIVE" );
75
static const __itt_string_handle* itt_handle_alf_recon    = __itt_string_handle_create( "ALF_RECONSTRUCT" );
76
static const __itt_string_handle* itt_handle_ccalf_stat   = __itt_string_handle_create( "CCALF_CTU_STAT" );
77
static const __itt_string_handle* itt_handle_ccalf_derive = __itt_string_handle_create( "CCALF_DERIVE" );
78
static const __itt_string_handle* itt_handle_ccalf_recon  = __itt_string_handle_create( "CCALF_RECONSTRUCT" );
79
#endif
80
81
void setArbitraryWppPattern( const PreCalcValues& pcv, std::vector<int>& ctuAddrMap, int stepX = 1 )
82
0
{
83
0
  ctuAddrMap.resize( pcv.sizeInCtus, 0 );
84
0
  std::vector<int> x_in_line( pcv.heightInCtus, 0 );
85
0
  int x = 0, y = 0, addr = 0;
86
0
  int y_top = 0;
87
0
  const int step = stepX; // number of CTUs in x-direction to scan 
88
0
  ctuAddrMap[addr++] = x++; // first entry (can be omitted)
89
0
  while( addr < pcv.sizeInCtus )
90
0
  {
91
    // fill entries in x-direction
92
0
    int x1 = x;
93
0
    while( x < std::min(x1 + step, (int)pcv.widthInCtus) )
94
0
    {
95
      // general WPP condition (top-right CTU availability)
96
0
      if( y > 0 && !( x_in_line[y - 1] - x >= 2 ) && x != pcv.widthInCtus - 1 )
97
0
        break;
98
0
      ctuAddrMap[addr++] = y*pcv.widthInCtus + x;
99
0
      x++;
100
0
    }
101
0
    x_in_line[y] = x;
102
        
103
0
    y += 1;
104
105
0
    if( y >= pcv.heightInCtus )
106
0
    {
107
      // go up
108
0
      if( x_in_line[y_top] >= pcv.widthInCtus )
109
0
      {
110
0
        y_top++;
111
0
        if( y_top >= pcv.heightInCtus )
112
0
        {
113
          // done
114
0
          break;
115
0
        }
116
0
      }
117
0
      y = y_top;
118
0
    }
119
0
    x = x_in_line[y];
120
121
0
    CHECK( y >= pcv.heightInCtus, "Height in CTUs is exceeded" );
122
0
  }
123
0
}
124
125
struct TileLineEncRsrc
126
{
127
  BitEstimator            m_BitEstimator;
128
  CABACWriter             m_CABACEstimator;
129
  BitEstimator            m_SaoBitEstimator;
130
  CABACWriter             m_SaoCABACEstimator;
131
  BitEstimator            m_AlfBitEstimator;
132
  CABACWriter             m_AlfCABACEstimator;
133
  ReuseUniMv              m_ReuseUniMv;
134
  BlkUniMvInfoBuffer      m_BlkUniMvInfoBuffer;
135
  AffineProfList          m_AffineProfList;
136
  IbcBvCand               m_CachedBvs;
137
  EncSampleAdaptiveOffset m_encSao;
138
  int                     m_prevQp[ MAX_NUM_CH ];
139
0
  TileLineEncRsrc( const VVEncCfg& encCfg ) : m_CABACEstimator( m_BitEstimator ), m_SaoCABACEstimator( m_SaoBitEstimator ), m_AlfCABACEstimator( m_AlfBitEstimator ) { m_AffineProfList.init( ! encCfg.m_picReordering ); }
140
};
141
142
struct PerThreadRsrc
143
{
144
  CtxCache  m_CtxCache;
145
  EncCu     m_encCu;
146
  PelStorage m_alfTempCtuBuf;
147
};
148
149
struct CtuEncParam
150
{
151
  Picture*  pic;
152
  EncSlice* encSlice;
153
  int       ctuRsAddr;
154
  int       ctuPosX;
155
  int       ctuPosY;
156
  UnitArea  ctuArea;
157
  int       tileLineResIdx;
158
159
0
  CtuEncParam() : pic( nullptr ), encSlice( nullptr ), ctuRsAddr( 0 ), ctuPosX( 0 ), ctuPosY( 0 ), ctuArea(), tileLineResIdx( 0 ) {}
160
  CtuEncParam( Picture* _p, EncSlice* _s, const int _r, const int _x, const int _y, const int _tileLineResIdx )
161
    : pic( _p )
162
    , encSlice( _s )
163
    , ctuRsAddr( _r )
164
    , ctuPosX( _x )
165
    , ctuPosY( _y )
166
    , ctuArea( pic->chromaFormat, pic->slices[0]->pps->pcv->getCtuArea( _x, _y ) )
167
0
    , tileLineResIdx( _tileLineResIdx ) {}
168
};
169
170
// ====================================================================================================================
171
// Constructor / destructor / create / destroy
172
// ====================================================================================================================
173
174
EncSlice::EncSlice()
175
0
  : m_pcEncCfg           ( nullptr)
176
0
  , m_threadPool         ( nullptr )
177
0
  , m_ctuTasksDoneCounter( nullptr )
178
0
  , m_ctuEncDelay        ( 1 )
179
0
  , m_pLoopFilter        ( nullptr )
180
0
  , m_pALF               ( nullptr )
181
0
  , m_pcRateCtrl         ( nullptr )
182
0
  , m_CABACWriter        ( m_BinEncoder )
183
0
  , m_encCABACTableIdx   ( VVENC_I_SLICE )
184
0
{
185
0
}
186
187
188
EncSlice::~EncSlice()
189
0
{
190
0
  for( auto* lnRsc : m_TileLineEncRsrc )
191
0
  {
192
0
    delete lnRsc;
193
0
  }
194
0
  m_TileLineEncRsrc.clear();
195
196
0
  for( auto* taskRsc: m_ThreadRsrc )
197
0
  {
198
0
    taskRsc->m_alfTempCtuBuf.destroy();
199
0
    delete taskRsc;
200
0
  }
201
0
  m_ThreadRsrc.clear();
202
203
0
  m_saoReconParams.clear();
204
205
0
  for( int i = 0; i < m_saoStatData.size(); i++ )
206
0
  {
207
0
    for( int compIdx = 0; compIdx < MAX_NUM_COMP; compIdx++ )
208
0
    {
209
0
      delete[] m_saoStatData[ i ][ compIdx ];
210
0
    }
211
0
    delete[] m_saoStatData[ i ];
212
0
  }
213
0
  m_saoStatData.clear();
214
0
}
215
216
void EncSlice::init( const VVEncCfg& encCfg,
217
                     const SPS& sps,
218
                     const PPS& pps,
219
                     std::vector<int>* const globalCtuQpVector,
220
                     LoopFilter& loopFilter,
221
                     EncAdaptiveLoopFilter& alf,
222
                     RateCtrl& rateCtrl,
223
                     NoMallocThreadPool* threadPool,
224
                     WaitCounter* ctuTasksDoneCounter )
225
0
{
226
0
  m_pcEncCfg            = &encCfg;
227
0
  m_pLoopFilter         = &loopFilter;
228
0
  m_pALF                = &alf;
229
0
  m_pcRateCtrl          = &rateCtrl;
230
0
  m_threadPool          = threadPool;
231
0
  m_ctuTasksDoneCounter = ctuTasksDoneCounter;
232
0
  m_syncPicCtx.resize( encCfg.m_entropyCodingSyncEnabled ? pps.getNumTileLineIds() : 0 );
233
234
  
235
0
  const int maxCntRscr = ( encCfg.m_numThreads > 0 ) ? pps.getNumTileLineIds() : 1;
236
0
  const int maxCtuEnc  = ( encCfg.m_numThreads > 0 && threadPool ) ? threadPool->numThreads() : 1;
237
238
0
  m_ThreadRsrc.resize( maxCtuEnc,  nullptr );
239
0
  m_TileLineEncRsrc.resize( maxCntRscr, nullptr );
240
241
0
  for( PerThreadRsrc*& taskRsc : m_ThreadRsrc )
242
0
  {
243
0
    taskRsc = new PerThreadRsrc();
244
0
    taskRsc->m_encCu.init( encCfg,
245
0
                           sps,
246
0
                           globalCtuQpVector,
247
0
                           m_syncPicCtx.data(),
248
0
                           &rateCtrl );
249
0
    taskRsc->m_alfTempCtuBuf.create( pps.pcv->chrFormat, Area( 0, 0, pps.pcv->maxCUSize + (MAX_ALF_PADDING_SIZE << 1), pps.pcv->maxCUSize + (MAX_ALF_PADDING_SIZE << 1) ), pps.pcv->maxCUSize, MAX_ALF_PADDING_SIZE, 0, false );
250
0
  }
251
252
0
  for( TileLineEncRsrc*& lnRsc : m_TileLineEncRsrc )
253
0
  {
254
0
    lnRsc = new TileLineEncRsrc( encCfg );
255
0
    if( sps.saoEnabled )
256
0
    {
257
0
      lnRsc->m_encSao.init( encCfg );
258
0
    }
259
0
  }
260
261
0
  const int sizeInCtus = pps.pcv->sizeInCtus;
262
0
  m_processStates = std::vector<ProcessCtuState>( sizeInCtus );
263
0
  m_saoReconParams.resize( sizeInCtus );
264
265
0
  ::memset( m_saoDisabledRate, 0, sizeof( m_saoDisabledRate ) );
266
267
  // sao statistics
268
0
  if( encCfg.m_bUseSAO )
269
0
  {
270
0
    m_saoStatData.resize( sizeInCtus );
271
0
    for( int i = 0; i < sizeInCtus; i++ )
272
0
    {
273
0
      m_saoStatData[ i ] = new SAOStatData*[ MAX_NUM_COMP ];
274
0
      for( int compIdx = 0; compIdx < MAX_NUM_COMP; compIdx++ )
275
0
      {
276
0
        m_saoStatData[ i ][ compIdx ] = new SAOStatData[ NUM_SAO_NEW_TYPES ];
277
0
      }
278
0
    }
279
0
  }
280
0
  ctuEncParams.resize( sizeInCtus );
281
0
  setArbitraryWppPattern( *pps.pcv, m_ctuAddrMap, 3 );
282
283
0
  const unsigned asuHeightInCtus = m_pALF->getAsuHeightInCtus();
284
0
  const unsigned numDeriveLines  = encCfg.m_ifpLines ? 
285
0
    std::min( ((encCfg.m_ifpLines & (~(asuHeightInCtus - 1))) + asuHeightInCtus), pps.pcv->heightInCtus ) : pps.pcv->heightInCtus;
286
0
  m_alfDeriveCtu  = numDeriveLines * pps.pcv->widthInCtus - 1;
287
0
  m_ccalfDeriveCtu = encCfg.m_ifpLines ? pps.pcv->widthInCtus * std::min((unsigned)encCfg.m_ifpLines + 1, pps.pcv->heightInCtus) - 1: pps.pcv->sizeInCtus - 1;
288
0
}
289
290
291
void EncSlice::initPic( Picture* pic )
292
0
{
293
0
  Slice* slice = pic->cs->slice;
294
295
0
  if( slice->pps->numTileCols * slice->pps->numTileRows > 1 )
296
0
  {
297
0
    slice->sliceMap = slice->pps->sliceMap[0];
298
0
  }
299
0
  else
300
0
  {
301
0
    slice->sliceMap.addCtusToSlice( 0, pic->cs->pcv->widthInCtus, 0, pic->cs->pcv->heightInCtus, pic->cs->pcv->widthInCtus);
302
0
  }
303
304
  // this ensures that independently encoded bitstream chunks can be combined to bit-equal
305
0
  const SliceType cabacTableIdx = ! slice->pps->cabacInitPresent || slice->pendingRasInit ? slice->sliceType : m_encCABACTableIdx;
306
0
  slice->encCABACTableIdx = cabacTableIdx;
307
308
  // set QP and lambda values
309
0
  xInitSliceLambdaQP( slice );
310
311
0
  for( auto* thrRsc : m_ThreadRsrc )
312
0
  {
313
0
    thrRsc->m_encCu.initPic( pic );
314
0
  }
315
316
0
  for( auto* lnRsc : m_TileLineEncRsrc )
317
0
  {
318
0
    lnRsc->m_ReuseUniMv.resetReusedUniMvs();
319
0
  }
320
321
0
  m_ctuEncDelay = 1;
322
0
  if( pic->useIBC )
323
0
  {
324
    // IBC needs unfiltered samples up to max IBC search range
325
    // therefore ensure that numCtuDelayLUT CTU's have been enocded first
326
    // assuming IBC localSearchRangeX / Y = 128
327
0
    const int numCtuDelayLUT[ 3 ] = { 15, 3, 1 };
328
0
    CHECK( pic->cs->pcv->maxCUSizeLog2 < 5 || pic->cs->pcv->maxCUSizeLog2 > 7, "invalid max CTUSize" );
329
0
    m_ctuEncDelay = numCtuDelayLUT[ pic->cs->pcv->maxCUSizeLog2 - 5 ];
330
0
  }
331
0
}
332
333
334
335
void EncSlice::xInitSliceLambdaQP( Slice* slice )
336
0
{
337
  // pre-compute lambda and QP
338
0
  const bool rcp = (m_pcEncCfg->m_RCTargetBitrate > 0 && slice->pic->picInitialQP >= 0); // 2nd pass
339
0
  int  iQP = Clip3 (-slice->sps->qpBDOffset[CH_L], MAX_QP, slice->pic->picInitialQP); // RC start QP
340
0
  double dQP     = (rcp ? (double) slice->pic->picInitialQP : xGetQPForPicture (slice));
341
0
  double dLambda = (rcp ? slice->pic->picInitialLambda : xCalculateLambda (slice, slice->TLayer, dQP, dQP, iQP));
342
0
  int sliceChromaQpOffsetIntraOrPeriodic[2] = { m_pcEncCfg->m_sliceChromaQpOffsetIntraOrPeriodic[0], m_pcEncCfg->m_sliceChromaQpOffsetIntraOrPeriodic[1] };
343
0
  const int lookAheadRCCQpOffset = 0;   // was (m_pcEncCfg->m_RCTargetBitrate > 0 && m_pcEncCfg->m_LookAhead && CS::isDualITree (*slice->pic->cs) ? 1 : 0);
344
0
  int cbQP = 0, crQP = 0, cbCrQP = 0;
345
346
0
  if (m_pcEncCfg->m_usePerceptQPA) // adapt sliceChromaQpOffsetIntraOrPeriodic and pic->ctuAdaptedQP
347
0
  {
348
0
    const bool cqp = (slice->isIntra() && !slice->sps->IBC) || (m_pcEncCfg->m_sliceChromaQpOffsetPeriodicity > 0 && (slice->poc % m_pcEncCfg->m_sliceChromaQpOffsetPeriodicity) == 0);
349
0
    const uint32_t startCtuTsAddr    = slice->sliceMap.ctuAddrInSlice[0];
350
0
    const uint32_t boundingCtuTsAddr = slice->pic->cs->pcv->sizeInCtus;
351
352
0
    if ((iQP = BitAllocation::applyQPAdaptationSlice (slice, m_pcEncCfg, iQP, dLambda, &slice->pic->picVA.visAct, // updates pic->picInitialQP
353
0
                                                      *m_ThreadRsrc[0]->m_encCu.getQpPtr(), m_pcRateCtrl->getIntraPQPAStats(),
354
0
                                                      (slice->pps->sliceChromaQpFlag && cqp ? sliceChromaQpOffsetIntraOrPeriodic : nullptr),
355
0
                                                      m_pcRateCtrl->getMinNoiseLevels(), startCtuTsAddr, boundingCtuTsAddr)) >= 0) // QP OK?
356
0
    {
357
0
      dLambda *= pow (2.0, ((double) iQP - dQP) / 3.0); // adjust lambda based on change of slice QP
358
0
    }
359
0
    else iQP = (int) dQP; // revert to unadapted slice QP
360
0
  }
361
0
  else if (rcp)
362
0
  {
363
0
    slice->pic->picInitialQP = -1; // no QPA - unused now
364
0
  }
365
366
0
  if (slice->pps->sliceChromaQpFlag && CS::isDualITree (*slice->pic->cs) && !m_pcEncCfg->m_usePerceptQPA && (m_pcEncCfg->m_sliceChromaQpOffsetPeriodicity == 0))
367
0
  {
368
0
    cbQP = m_pcEncCfg->m_chromaCbQpOffsetDualTree + lookAheadRCCQpOffset; // QP offset for dual-tree
369
0
    crQP = m_pcEncCfg->m_chromaCrQpOffsetDualTree + lookAheadRCCQpOffset;
370
0
    cbCrQP = m_pcEncCfg->m_chromaCbCrQpOffsetDualTree + lookAheadRCCQpOffset;
371
0
  }
372
0
  else if (slice->pps->sliceChromaQpFlag)
373
0
  {
374
0
    const GOPEntry &gopEntry             = *(slice->pic->gopEntry);
375
0
    const bool bUseIntraOrPeriodicOffset = (slice->isIntra() && !slice->sps->IBC) || (m_pcEncCfg->m_sliceChromaQpOffsetPeriodicity > 0 && (slice->poc % m_pcEncCfg->m_sliceChromaQpOffsetPeriodicity) == 0);
376
377
0
    cbQP = (bUseIntraOrPeriodicOffset ? sliceChromaQpOffsetIntraOrPeriodic[0] : gopEntry.m_CbQPoffset) + lookAheadRCCQpOffset;
378
0
    crQP = (bUseIntraOrPeriodicOffset ? sliceChromaQpOffsetIntraOrPeriodic[1] : gopEntry.m_CrQPoffset) + lookAheadRCCQpOffset;
379
0
    cbCrQP = (cbQP + crQP) >> 1; // use floor of average CbCr chroma QP offset for joint-CbCr coding
380
381
0
    cbQP = Clip3 (-12, 12, cbQP + slice->pps->chromaQpOffset[COMP_Cb]) - slice->pps->chromaQpOffset[COMP_Cb];
382
0
    crQP = Clip3 (-12, 12, crQP + slice->pps->chromaQpOffset[COMP_Cr]) - slice->pps->chromaQpOffset[COMP_Cr];
383
0
    cbCrQP = Clip3 (-12, 12, cbCrQP + slice->pps->chromaQpOffset[COMP_JOINT_CbCr]) - slice->pps->chromaQpOffset[COMP_JOINT_CbCr];
384
0
  }
385
386
0
  slice->sliceChromaQpDelta[COMP_Cb] = Clip3 (-12, 12, cbQP);
387
0
  slice->sliceChromaQpDelta[COMP_Cr] = Clip3 (-12, 12, crQP);
388
0
  slice->sliceChromaQpDelta[COMP_JOINT_CbCr] = (slice->sps->jointCbCr ? Clip3 (-12, 12, cbCrQP) : 0);
389
390
0
  for( auto& thrRsc : m_ThreadRsrc )
391
0
  {
392
0
    thrRsc->m_encCu.setUpLambda( *slice, dLambda, iQP, true, true );
393
0
  }
394
395
0
  slice->sliceQp            = iQP;
396
0
  slice->chromaQpAdjEnabled = slice->pps->chromaQpOffsetListLen > 0;
397
0
}
398
399
static const int highTL[6] = { -1, 0, 0, 2, 4, 5 };
400
401
int EncSlice::xGetQPForPicture( const Slice* slice )
402
0
{
403
0
  const int lumaQpBDOffset = slice->sps->qpBDOffset[ CH_L ];
404
0
  int qp;
405
406
0
  if ( m_pcEncCfg->m_costMode == VVENC_COST_LOSSLESS_CODING )
407
0
  {
408
0
    qp = LOSSLESS_AND_MIXED_LOSSLESS_RD_COST_TEST_QP;
409
0
  }
410
0
  else
411
0
  {
412
0
    qp = m_pcEncCfg->m_QP + slice->pic->gopAdaptedQP;
413
414
0
    if (m_pcEncCfg->m_usePerceptQPA)
415
0
    {
416
0
      const int tlayer = slice->pic->gopEntry->m_vtl;
417
418
0
      qp = (slice->isIntra() ? std::min (qp, ((qp - std::min (3, floorLog2 (m_pcEncCfg->m_GOPSize) - 4/*TODO 3 with JVET-AC0149?*/)) * 15 + 3) >> 4) : highTL[tlayer] + ((qp * (16 + std::min (2, tlayer))) >> 4) + 0/*TODO +-1?*/);
419
0
    }
420
0
    else if( slice->isIntra() )
421
0
    {
422
0
      qp += m_pcEncCfg->m_intraQPOffset;
423
0
    }
424
0
    else
425
0
    {
426
0
      if( qp != -lumaQpBDOffset )
427
0
      {
428
0
        const GOPEntry &gopEntry = *(slice->pic->gopEntry);
429
        // adjust QP according to the QP offset for the GOP entry.
430
0
        qp += gopEntry.m_QPOffset;
431
432
        // adjust QP according to QPOffsetModel for the GOP entry.
433
0
        double dqpOffset = qp * gopEntry.m_QPOffsetModelScale + gopEntry.m_QPOffsetModelOffset + 0.5;
434
0
        int qpOffset = (int)floor( Clip3<double>( 0.0, 3.0, dqpOffset ) );
435
0
        qp += qpOffset;
436
0
      }
437
0
    }
438
439
0
    if( m_pcEncCfg->m_blockImportanceMapping && !slice->pic->m_picShared->m_ctuBimQpOffset.empty() )
440
0
    {
441
0
      qp += slice->pic->m_picShared->m_picAuxQpOffset;
442
0
    }
443
0
  }
444
0
  qp = Clip3( -lumaQpBDOffset, MAX_QP, qp );
445
0
  return qp;
446
0
}
447
448
449
double EncSlice::xCalculateLambda( const Slice* slice,
450
                                   const int    depth, // slice GOP hierarchical depth.
451
                                   const double refQP, // initial slice-level QP
452
                                   const double dQP,   // initial double-precision QP
453
                                         int&   iQP )  // returned integer QP.
454
0
{
455
0
  const GOPEntry &gopEntry = *(slice->pic->gopEntry);
456
0
  const int SHIFT_QP       = 12;
457
0
  const int temporalId     = gopEntry.m_temporalId;
458
0
  std::vector<double> intraLambdaModifiers;
459
0
  for ( int i = 0; i < VVENC_MAX_TLAYER; i++ )
460
0
  {
461
0
    if( m_pcEncCfg->m_adIntraLambdaModifier[i] != 0.0 ) intraLambdaModifiers.push_back( m_pcEncCfg->m_adIntraLambdaModifier[i] );
462
0
    else break;
463
0
  }
464
465
0
  int bitdepth_luma_qp_scale = 6
466
0
                               * (slice->sps->bitDepths[ CH_L ] - 8
467
0
                                  - DISTORTION_PRECISION_ADJUSTMENT(slice->sps->bitDepths[ CH_L ]));
468
0
  double qp_temp = dQP + bitdepth_luma_qp_scale - SHIFT_QP;
469
  // Case #1: I or P-slices (key-frame)
470
0
  double dQPFactor = gopEntry.m_QPFactor;
471
0
  if( slice->sliceType == VVENC_I_SLICE )
472
0
  {
473
0
    if (m_pcEncCfg->m_dIntraQpFactor>=0.0 && gopEntry.m_sliceType != 'I')
474
0
    {
475
0
      dQPFactor = m_pcEncCfg->m_dIntraQpFactor;
476
0
    }
477
0
    else
478
0
    {
479
0
      dQPFactor = 0.57;
480
0
      if( ! m_pcEncCfg->m_lambdaFromQPEnable )
481
0
      {
482
0
        const int NumberBFrames = ( m_pcEncCfg->m_GOPSize - 1 );
483
0
        const double dLambda_scale = 1.0 - Clip3( 0.0, 0.5, 0.05 * (double)NumberBFrames );
484
0
        dQPFactor *= dLambda_scale;
485
0
      }
486
0
    }
487
0
  }
488
0
  else if( m_pcEncCfg->m_lambdaFromQPEnable )
489
0
  {
490
0
    dQPFactor=0.57;
491
0
  }
492
493
0
  double dLambda = dQPFactor*pow( 2.0, qp_temp/3.0 );
494
495
0
  if( !(m_pcEncCfg->m_lambdaFromQPEnable) && depth>0 )
496
0
  {
497
0
    double qp_temp_ref = refQP + bitdepth_luma_qp_scale - SHIFT_QP;
498
0
    dLambda *= Clip3(2.00, 4.00, (qp_temp_ref / 6.0));   // (j == B_SLICE && p_cur_frm->layer != 0 )
499
0
  }
500
501
  // if hadamard is used in ME process
502
0
  if ( !m_pcEncCfg->m_bUseHADME && slice->sliceType != VVENC_I_SLICE )
503
0
  {
504
0
    dLambda *= 0.95;
505
0
  }
506
507
0
  double lambdaModifier;
508
0
  if( slice->sliceType != VVENC_I_SLICE || intraLambdaModifiers.empty())
509
0
  {
510
0
    lambdaModifier = m_pcEncCfg->m_adLambdaModifier[ temporalId ];
511
0
  }
512
0
  else
513
0
  {
514
0
    lambdaModifier = intraLambdaModifiers[ (temporalId < intraLambdaModifiers.size()) ? temporalId : (intraLambdaModifiers.size()-1) ];
515
0
  }
516
0
  dLambda *= lambdaModifier;
517
518
0
  iQP = Clip3( -slice->sps->qpBDOffset[ CH_L ], MAX_QP, (int) floor( dQP + 0.5 ) );
519
520
0
  if( m_pcEncCfg->m_DepQuantEnabled )
521
0
  {
522
0
    dLambda *= pow( 2.0, 0.25/3.0 ); // slight lambda adjustment for dependent quantization (due to different slope of quantizer)
523
0
  }
524
525
  // NOTE: the lambda modifiers that are sometimes applied later might be best always applied in here.
526
0
  return dLambda;
527
0
}
528
529
530
// ====================================================================================================================
531
// Public member functions
532
// ====================================================================================================================
533
534
535
/** \param pic   picture class
536
 */
537
void EncSlice::compressSlice( Picture* pic )
538
0
{
539
0
  PROFILER_SCOPE_AND_STAGE( 1, g_timeProfiler, P_COMPRESS_SLICE );
540
0
  CodingStructure& cs         = *pic->cs;
541
0
  Slice* const slice          = cs.slice;
542
0
  uint32_t  startCtuTsAddr    = slice->sliceMap.ctuAddrInSlice[0];
543
0
  uint32_t  boundingCtuTsAddr = pic->cs->pcv->sizeInCtus;
544
545
0
  cs.pcv      = slice->pps->pcv;
546
0
  cs.fracBits = 0;
547
548
0
  if( startCtuTsAddr == 0 )
549
0
  {
550
0
    cs.initStructData( slice->sliceQp );
551
0
  }
552
553
0
  for( auto* thrRsrc : m_ThreadRsrc )
554
0
  {
555
0
    thrRsrc->m_encCu.initSlice( slice );
556
0
  }
557
558
0
  for( auto* lnRsrc : m_TileLineEncRsrc )
559
0
  {
560
0
    lnRsrc->m_CABACEstimator    .initCtxModels( *slice );
561
0
    lnRsrc->m_SaoCABACEstimator .initCtxModels( *slice );
562
0
    lnRsrc->m_AlfCABACEstimator .initCtxModels( *slice );
563
0
    lnRsrc->m_AffineProfList    .resetAffineMVList();
564
0
    lnRsrc->m_BlkUniMvInfoBuffer.resetUniMvList();
565
0
    lnRsrc->m_CachedBvs         .resetIbcBvCand();
566
567
0
    if( slice->sps->saoEnabled && pic->useSAO )
568
0
    {
569
0
      lnRsrc->m_encSao          .initSlice( slice );
570
0
    }
571
0
  }
572
573
0
  if( slice->sps->fpelMmvd && !slice->picHeader->disFracMMVD )
574
0
  {
575
0
    slice->picHeader->disFracMMVD = ( pic->lwidth() * pic->lheight() > 1920 * 1080 ) ? true : false;
576
0
  }
577
578
0
  xProcessCtus( pic, startCtuTsAddr, boundingCtuTsAddr );
579
0
}
580
581
void setJointCbCrModes( CodingStructure& cs, const Position topLeftLuma, const Size sizeLuma )
582
0
{
583
0
  bool              sgnFlag = true;
584
585
0
  if( isChromaEnabled( cs.picture->chromaFormat) )
586
0
  {
587
0
    const CompArea  cbArea  = CompArea( COMP_Cb, cs.picture->chromaFormat, Area(topLeftLuma,sizeLuma), true );
588
0
    const CompArea  crArea  = CompArea( COMP_Cr, cs.picture->chromaFormat, Area(topLeftLuma,sizeLuma), true );
589
590
0
    const CPelBuf   orgCb   = cs.picture->getFilteredOrigBuffer().valid() ? cs.picture->getRspOrigBuf( cbArea ): cs.picture->getOrigBuf( cbArea );
591
0
    const CPelBuf   orgCr   = cs.picture->getFilteredOrigBuffer().valid() ? cs.picture->getRspOrigBuf( crArea ): cs.picture->getOrigBuf( crArea );
592
0
    const int       x0      = ( cbArea.x > 0 ? 0 : 1 );
593
0
    const int       y0      = ( cbArea.y > 0 ? 0 : 1 );
594
0
    const int       x1      = ( cbArea.x + cbArea.width  < cs.picture->Cb().width  ? cbArea.width  : cbArea.width  - 1 );
595
0
    const int       y1      = ( cbArea.y + cbArea.height < cs.picture->Cb().height ? cbArea.height : cbArea.height - 1 );
596
0
    const int       cbs     = orgCb.stride;
597
0
    const int       crs     = orgCr.stride;
598
0
    const Pel*      pCb     = orgCb.buf + y0 * cbs;
599
0
    const Pel*      pCr     = orgCr.buf + y0 * crs;
600
0
    int64_t         sumCbCr = 0;
601
602
    // determine inter-chroma transform sign from correlation between high-pass filtered (i.e., zero-mean) Cb and Cr planes
603
0
    for( int y = y0; y < y1; y++, pCb += cbs, pCr += crs )
604
0
    {
605
0
      for( int x = x0; x < x1; x++ )
606
0
      {
607
0
        int cb = ( 12*(int)pCb[x] - 2*((int)pCb[x-1] + (int)pCb[x+1] + (int)pCb[x-cbs] + (int)pCb[x+cbs]) - ((int)pCb[x-1-cbs] + (int)pCb[x+1-cbs] + (int)pCb[x-1+cbs] + (int)pCb[x+1+cbs]) );
608
0
        int cr = ( 12*(int)pCr[x] - 2*((int)pCr[x-1] + (int)pCr[x+1] + (int)pCr[x-crs] + (int)pCr[x+crs]) - ((int)pCr[x-1-crs] + (int)pCr[x+1-crs] + (int)pCr[x-1+crs] + (int)pCr[x+1+crs]) );
609
0
        sumCbCr += cb*cr;
610
0
      }
611
0
    }
612
613
0
    sgnFlag = ( sumCbCr < 0 );
614
0
  }
615
616
0
  cs.slice->picHeader->jointCbCrSign = sgnFlag;
617
0
}
618
619
struct CtuPos
620
{
621
  const int ctuPosX;
622
  const int ctuPosY;
623
  const int ctuRsAddr;
624
625
0
  CtuPos( int _x, int _y, int _a ) : ctuPosX( _x ), ctuPosY( _y ), ctuRsAddr( _a ) {}
626
};
627
628
class CtuTsIterator
629
{
630
  private:
631
    const CodingStructure& cs;
632
    const int        m_startTsAddr;
633
    const int        m_endTsAddr;
634
    std::vector<int> m_ctuAddrMap;
635
          int        m_ctuTsAddr;
636
637
  private:
638
    int getNextTsAddr( const int _tsAddr ) const
639
0
    {
640
0
      const PreCalcValues& pcv  = *cs.pcv;
641
0
      const int startSliceRsRow = m_startTsAddr / pcv.widthInCtus;
642
0
      const int startSliceRsCol = m_startTsAddr % pcv.widthInCtus;
643
0
      const int endSliceRsRow   = (m_endTsAddr - 1) / pcv.widthInCtus;
644
0
      const int endSliceRsCol   = (m_endTsAddr - 1) % pcv.widthInCtus;
645
0
            int ctuTsAddr = _tsAddr;
646
0
      CHECK( ctuTsAddr > m_endTsAddr, "error: array index out of bounds" );
647
0
      while( ctuTsAddr < m_endTsAddr )
648
0
      {
649
0
        ctuTsAddr++;
650
0
        const int ctuRsAddr = ctuTsAddr; 
651
0
        if( cs.slice->pps->rectSlice
652
0
            && ( (ctuRsAddr / pcv.widthInCtus) < startSliceRsRow
653
0
              || (ctuRsAddr / pcv.widthInCtus) > endSliceRsRow
654
0
              || (ctuRsAddr % pcv.widthInCtus) < startSliceRsCol
655
0
              || (ctuRsAddr % pcv.widthInCtus) > endSliceRsCol ) )
656
0
          continue;
657
0
        break;
658
0
      }
659
0
      return ctuTsAddr;
660
0
    }
661
662
    int mapAddr( const int _addr ) const
663
0
    {
664
0
      if( _addr < 0 )
665
0
        return _addr;
666
0
      if( _addr >= m_ctuAddrMap.size() )
667
0
        return _addr;
668
0
      return m_ctuAddrMap[ _addr ];
669
0
    }
670
671
  public:
672
0
    CtuTsIterator( const CodingStructure& _cs, int _s, int _e,       std::vector<int>& _m         ) : cs( _cs ), m_startTsAddr( _s ), m_endTsAddr( _e ), m_ctuAddrMap( _m ), m_ctuTsAddr( _s ) {}
673
0
    CtuTsIterator( const CodingStructure& _cs, int _s, int _e, bool _wpp                          ) : cs( _cs ), m_startTsAddr( _s ), m_endTsAddr( _e ),                     m_ctuTsAddr( _s ) { if( _wpp ) setWppPattern(); }
674
0
    CtuTsIterator( const CodingStructure& _cs, int _s, int _e, const std::vector<int>& _m         ) : cs( _cs ), m_startTsAddr( _s ), m_endTsAddr( _e ), m_ctuAddrMap( _m ), m_ctuTsAddr( _s ) {}
675
0
    CtuTsIterator( const CodingStructure& _cs, int _s, int _e, const std::vector<int>& _m, int _c ) : cs( _cs ), m_startTsAddr( _s ), m_endTsAddr( _e ), m_ctuAddrMap( _m ), m_ctuTsAddr( std::max( _s, _c ) ) {}
676
0
    CtuTsIterator( const CodingStructure& _cs, int _s, int _e, const std::vector<int>* _m, bool _wpp ) : cs( _cs ), m_startTsAddr( _s ), m_endTsAddr( _e ), m_ctuTsAddr( _s ) {  if( _wpp ) m_ctuAddrMap = *_m;  }
677
678
0
    virtual ~CtuTsIterator() { m_ctuAddrMap.clear(); }
679
680
0
    CtuTsIterator& operator++()                { m_ctuTsAddr = getNextTsAddr( m_ctuTsAddr ); return *this; }
681
0
    CtuTsIterator  operator++(int)             { auto retval = *this; ++(*this); return retval; }
682
0
    bool operator==(CtuTsIterator other) const { return m_ctuTsAddr == other.m_ctuTsAddr; }
683
0
    bool operator!=(CtuTsIterator other) const { return m_ctuTsAddr != other.m_ctuTsAddr; }
684
0
    CtuPos operator*()                   const { const int ctuRsAddr = mapAddr( m_ctuTsAddr );  return CtuPos( ctuRsAddr % cs.pcv->widthInCtus, ctuRsAddr / cs.pcv->widthInCtus, ctuRsAddr ); }
685
686
0
    CtuTsIterator begin() { return CtuTsIterator( cs, m_startTsAddr, m_endTsAddr, m_ctuAddrMap ); };
687
0
    CtuTsIterator end()   { return CtuTsIterator( cs, m_startTsAddr, m_endTsAddr, m_ctuAddrMap, m_endTsAddr ); };
688
689
    using iterator_category = std::forward_iterator_tag;
690
    using value_type        = int;
691
    using pointer           = int*;
692
    using reference         = int&;
693
    using difference_type   = ptrdiff_t;
694
695
    void setWppPattern()
696
0
    {
697
0
      const PreCalcValues& pcv = *cs.pcv;
698
0
      m_ctuAddrMap.resize( pcv.sizeInCtus, 0 );
699
0
      int addr = 0;
700
0
      for( int i = 1; i < pcv.sizeInCtus; i++ )
701
0
      {
702
0
        int x = addr % pcv.widthInCtus;
703
0
        int y = addr / pcv.widthInCtus;
704
0
        x -= 1;
705
0
        y += 1;
706
0
        if( x < 0 || y >= pcv.heightInCtus )
707
0
        {
708
0
          x += 1 + y;
709
0
          y  = 0;
710
0
        }
711
0
        if( x >= pcv.widthInCtus )
712
0
        {
713
0
          y += ( x - pcv.widthInCtus ) + 1;
714
0
          x  = pcv.widthInCtus - 1;
715
0
        }
716
0
        addr = y * pcv.widthInCtus + x;
717
0
        m_ctuAddrMap[ i ] = addr;
718
0
      }
719
0
    }
720
};
721
722
void EncSlice::saoDisabledRate( CodingStructure& cs, SAOBlkParam* reconParams )
723
0
{
724
0
  EncSampleAdaptiveOffset::disabledRate( cs, m_saoDisabledRate, reconParams, m_pcEncCfg->m_saoEncodingRate, m_pcEncCfg->m_saoEncodingRateChroma, m_pcEncCfg->m_internChromaFormat );
725
0
}
726
727
void EncSlice::finishCompressSlice( Picture* pic, Slice& slice )
728
0
{
729
0
  CodingStructure& cs = *pic->cs;
730
731
  // finalize
732
0
  if( slice.sps->saoEnabled && pic->useSAO )
733
0
  {
734
    // store disabled statistics
735
0
    if( !m_pcEncCfg->m_numThreads )
736
0
      saoDisabledRate( cs, &m_saoReconParams[ 0 ] );
737
738
    // set slice header flags
739
0
    CHECK( m_saoEnabled[ COMP_Cb ] != m_saoEnabled[ COMP_Cr ], "Unspecified error");
740
0
    for( auto s : pic->slices )
741
0
    {
742
0
      s->saoEnabled[ CH_L ] = m_saoEnabled[ COMP_Y  ];
743
0
      s->saoEnabled[ CH_C ] = m_saoEnabled[ COMP_Cb ];
744
0
    }
745
0
  }
746
0
}
747
748
void EncSlice::xProcessCtus( Picture* pic, const unsigned startCtuTsAddr, const unsigned boundingCtuTsAddr )
749
0
{
750
0
  PROFILER_SCOPE_TOP_LEVEL_EXT( 1, g_timeProfiler, P_IGNORE, pic->cs );
751
0
  CodingStructure& cs      = *pic->cs;
752
0
  Slice&           slice   = *cs.slice;
753
0
  const PreCalcValues& pcv = *cs.pcv;
754
755
  // initialization
756
0
  if( slice.sps->jointCbCr )
757
0
  {
758
0
    setJointCbCrModes( cs, Position(0, 0), cs.area.lumaSize() );
759
0
  }
760
761
0
  if( slice.sps->saoEnabled && pic->useSAO )
762
0
  {
763
    // check SAO enabled or disabled
764
0
    EncSampleAdaptiveOffset::decidePicParams( cs, m_saoDisabledRate, m_saoEnabled, m_pcEncCfg->m_saoEncodingRate, m_pcEncCfg->m_saoEncodingRateChroma, m_pcEncCfg->m_internChromaFormat );
765
766
0
    m_saoAllDisabled = true;
767
0
    for( int compIdx = 0; compIdx < getNumberValidComponents( pcv.chrFormat ); compIdx++ )
768
0
    {
769
0
      m_saoAllDisabled &= ! m_saoEnabled[ compIdx ];
770
0
    }
771
772
0
    std::fill( m_saoReconParams.begin(), m_saoReconParams.end(), SAOBlkParam() );
773
0
  }
774
0
  else
775
0
  {
776
0
    m_saoAllDisabled = true;
777
0
  }
778
779
0
  if( slice.sps->alfEnabled )
780
0
  {
781
0
    m_pALF->initEncProcess( slice );
782
0
  }
783
784
0
  std::fill( m_processStates.begin(), m_processStates.end(), CTU_ENCODE );
785
786
  // fill encoder parameter list
787
0
  int idx = 0;
788
0
  const std::vector<int> base = slice.sliceMap.ctuAddrInSlice;
789
0
  auto ctuIter = CtuTsIterator( cs, startCtuTsAddr, boundingCtuTsAddr, &m_ctuAddrMap, m_pcEncCfg->m_numThreads > 0 );
790
0
  for( auto ctuPos : ctuIter )
791
0
  {
792
0
    ctuEncParams[ idx ].pic       = pic;
793
0
    ctuEncParams[ idx ].encSlice  = this;
794
0
    ctuEncParams[ idx ].ctuRsAddr = ctuPos.ctuRsAddr;
795
0
    ctuEncParams[ idx ].ctuPosX   = ctuPos.ctuPosX;
796
0
    ctuEncParams[ idx ].ctuPosY   = ctuPos.ctuPosY;
797
0
    ctuEncParams[ idx ].ctuArea   = UnitArea( pic->chromaFormat, slice.pps->pcv->getCtuArea( ctuPos.ctuPosX, ctuPos.ctuPosY ) );
798
799
0
    if( m_pcEncCfg->m_numThreads > 0 )
800
0
    {
801
0
      ctuEncParams[idx].tileLineResIdx = slice.pps->getTileLineId( ctuPos.ctuPosX, ctuPos.ctuPosY );
802
0
    }
803
0
    else
804
0
    {
805
0
      ctuEncParams[idx].tileLineResIdx = 0;
806
0
    }
807
0
    idx++;
808
0
  }
809
810
  //for( int i = 0; i < idx; i++ )
811
  //{
812
  //  for( int j = i; j < idx; j++ )
813
  //  {
814
  //    if( ctuEncParams[i].tileLineResIdx != ctuEncParams[j].tileLineResIdx ) continue;
815
  //
816
  //    CHECK( ctuEncParams[i].ctuPosY != ctuEncParams[j].ctuPosY, "Not the same CTU line!" );
817
  //    CHECK( slice.pps->getTileIdx( ctuEncParams[i].ctuPosX, ctuEncParams[i].ctuPosY ) != slice.pps->getTileIdx( ctuEncParams[j].ctuPosX, ctuEncParams[j].ctuPosY ), "Not the same tile!" );
818
  //  }
819
  //}
820
821
0
  CHECK( idx != pcv.sizeInCtus, "array index out of bounds" );
822
823
  // process ctu's until last ctu is done
824
0
  if( m_pcEncCfg->m_numThreads > 0 )
825
0
  {
826
0
    for( auto& ctuEncParam : ctuEncParams )
827
0
    {
828
0
      m_threadPool->addBarrierTask<CtuEncParam>( EncSlice::xProcessCtuTask<false>,
829
0
                                                 &ctuEncParam,
830
0
                                                 m_ctuTasksDoneCounter,
831
0
                                                 nullptr,
832
0
                                                 {},
833
0
                                                 EncSlice::xProcessCtuTask<true> );
834
0
    }
835
0
  }
836
0
  else
837
0
  {
838
0
    do
839
0
    {
840
0
      for( auto& ctuEncParam : ctuEncParams )
841
0
      {
842
0
        if( m_processStates[ctuEncParam.ctuRsAddr] != PROCESS_DONE )
843
0
          EncSlice::xProcessCtuTask<false>( 0, &ctuEncParam );
844
0
      }
845
0
      DTRACE_PIC_COMP_COND( m_processStates[ 0 ] == SAO_FILTER && m_processStates[ boundingCtuTsAddr - 1 ] == SAO_FILTER, D_REC_CB_LUMA_LF,   cs, cs.getRecoBuf(), COMP_Y  );
846
0
      DTRACE_PIC_COMP_COND( m_processStates[ 0 ] == SAO_FILTER && m_processStates[ boundingCtuTsAddr - 1 ] == SAO_FILTER, D_REC_CB_CHROMA_LF, cs, cs.getRecoBuf(), COMP_Cb );
847
0
      DTRACE_PIC_COMP_COND( m_processStates[ 0 ] == SAO_FILTER && m_processStates[ boundingCtuTsAddr - 1 ] == SAO_FILTER, D_REC_CB_CHROMA_LF, cs, cs.getRecoBuf(), COMP_Cr );
848
0
      DTRACE_PIC_COMP_COND( m_processStates[ 0 ] == ALF_GET_STATISTICS && m_processStates[ boundingCtuTsAddr - 1 ] == ALF_GET_STATISTICS, D_REC_CB_LUMA_SAO,   cs, cs.getRecoBuf(), COMP_Y  );
849
0
      DTRACE_PIC_COMP_COND( m_processStates[ 0 ] == ALF_GET_STATISTICS && m_processStates[ boundingCtuTsAddr - 1 ] == ALF_GET_STATISTICS, D_REC_CB_CHROMA_SAO, cs, cs.getRecoBuf(), COMP_Cb );
850
0
      DTRACE_PIC_COMP_COND( m_processStates[ 0 ] == ALF_GET_STATISTICS && m_processStates[ boundingCtuTsAddr - 1 ] == ALF_GET_STATISTICS, D_REC_CB_CHROMA_SAO, cs, cs.getRecoBuf(), COMP_Cr );
851
0
    }
852
0
    while( m_processStates[ boundingCtuTsAddr - 1 ] != PROCESS_DONE );
853
0
  }
854
0
}
855
856
inline bool checkCtuTaskNbTop( const PPS& pps, const int& ctuPosX, const int& ctuPosY, const int& ctuRsAddr, const ProcessCtuState* processStates, const TaskType tskType, bool override = false )
857
0
{
858
0
  return ctuPosY > 0 && ( override || pps.canFilterCtuBdry( ctuPosX, ctuPosY, 0, -1 ) ) && processStates[ ctuRsAddr - pps.pcv->widthInCtus ] <= tskType;
859
0
}
860
861
inline bool checkCtuTaskNbBot( const PPS& pps, const int& ctuPosX, const int& ctuPosY, const int& ctuRsAddr, const ProcessCtuState* processStates, const TaskType tskType, bool override = false )
862
0
{
863
0
  return ctuPosY + 1 < pps.pcv->heightInCtus && ( override || pps.canFilterCtuBdry( ctuPosX, ctuPosY, 0, 1 ) ) && processStates[ ctuRsAddr     + pps.pcv->widthInCtus ] <= tskType;
864
0
}
865
866
inline bool checkCtuTaskNbRgt( const PPS& pps, const int& ctuPosX, const int& ctuPosY, const int& ctuRsAddr, const ProcessCtuState* processStates, const TaskType tskType, bool override = false )
867
0
{
868
0
  return ctuPosX + 1 < pps.pcv->widthInCtus && ( override || pps.canFilterCtuBdry( ctuPosX, ctuPosY, 1, 0 ) ) && processStates[ ctuRsAddr + 1 ] <= tskType;
869
0
}
870
871
inline bool checkCtuTaskNbTopRgt( const PPS& pps, const int& ctuPosX, const int& ctuPosY, const int& ctuRsAddr, const ProcessCtuState* processStates, const TaskType tskType, bool override = false )
872
0
{
873
0
  return ctuPosY > 0 && ctuPosX + 1 < pps.pcv->widthInCtus && ( override || pps.canFilterCtuBdry( ctuPosX, ctuPosY, 1, -1 ) ) && processStates[ ctuRsAddr - pps.pcv->widthInCtus + 1 ] <= tskType;
874
0
}
875
876
inline bool checkCtuTaskNbBotRgt( const PPS& pps, const int& ctuPosX, const int& ctuPosY, const int& ctuRsAddr, const ProcessCtuState* processStates, const TaskType tskType, const int rightOffset = 1, bool override = false )
877
0
{
878
0
  return ctuPosX + rightOffset < pps.pcv->widthInCtus && ctuPosY + 1 < pps.pcv->heightInCtus && ( override || pps.canFilterCtuBdry( ctuPosX, ctuPosY, rightOffset, 1 ) ) && processStates[ ctuRsAddr + rightOffset + pps.pcv->widthInCtus ] <= tskType;
879
0
}
880
881
template<bool checkReadyState>
882
bool EncSlice::xProcessCtuTask( int threadIdx, CtuEncParam* ctuEncParam )
883
0
{
884
0
  Picture* pic                   = ctuEncParam->pic;
885
0
  EncSlice* encSlice             = ctuEncParam->encSlice;
886
0
  CodingStructure& cs            = *pic->cs;
887
0
  Slice&           slice         = *cs.slice;
888
0
  const PPS&       pps           = *slice.pps;
889
0
  const PreCalcValues& pcv       = *cs.pcv;
890
0
  const int ctuRsAddr            = ctuEncParam->ctuRsAddr;
891
0
  const int ctuPosX              = ctuEncParam->ctuPosX;
892
0
  const int ctuPosY              = ctuEncParam->ctuPosY;
893
0
  const int x                    = ctuPosX << pcv.maxCUSizeLog2;
894
0
  const int y                    = ctuPosY << pcv.maxCUSizeLog2;
895
0
  const int width                = std::min( pcv.maxCUSize, pcv.lumaWidth  - x );
896
0
  const int height               = std::min( pcv.maxCUSize, pcv.lumaHeight - y );
897
0
  const int ctuStride            = pcv.widthInCtus;
898
0
  const int lineIdx              = ctuEncParam->tileLineResIdx;
899
0
  ProcessCtuState* processStates = encSlice->m_processStates.data();
900
0
  const UnitArea& ctuArea        = ctuEncParam->ctuArea;
901
0
  const bool wppSyncEnabled      = cs.sps->entropyCodingSyncEnabled;
902
0
  const TaskType currState       = processStates[ ctuRsAddr ];
903
0
  const unsigned syncLines       = encSlice->m_pcEncCfg->m_ifpLines;
904
905
0
  DTRACE_UPDATE( g_trace_ctx, std::make_pair( "poc", cs.slice->poc ) );
906
0
  DTRACE_UPDATE( g_trace_ctx, std::make_pair( "ctu", ctuRsAddr ) );
907
0
  DTRACE_UPDATE( g_trace_ctx, std::make_pair( "final", processStates[ ctuRsAddr ] == CTU_ENCODE ? 0 : 1 ) );
908
909
  // process ctu's line wise from left to right
910
0
  const bool tileParallel = encSlice->m_pcEncCfg->m_tileParallelCtuEnc;
911
0
  if( tileParallel && currState == CTU_ENCODE && ctuPosX > 0 && slice.pps->getTileIdx( ctuPosX, ctuPosY ) != slice.pps->getTileIdx( ctuPosX - 1, ctuPosY ) )
912
0
    ; // for CTU_ENCODE on tile boundaries, allow parallel processing of tiles
913
0
  else if( ctuPosX > 0 && processStates[ ctuRsAddr - 1 ] <= currState && currState < PROCESS_DONE )
914
0
    return false;
915
916
0
  switch( currState )
917
0
  {
918
    // encode
919
0
    case CTU_ENCODE:
920
0
      {
921
        // CTU line-wise inter-frame parallel processing synchronization
922
0
        if( syncLines )
923
0
        {
924
0
          const bool lineStart = ctuPosX == 0 || ( tileParallel && slice.pps->getTileIdx( ctuPosX, ctuPosY ) != slice.pps->getTileIdx( ctuPosX - 1, ctuPosY ) );
925
0
          if( lineStart && !refPicCtuLineReady( slice, ctuPosY + (int)syncLines, pcv ) )
926
0
          {
927
0
            return false;
928
0
          }
929
0
        }
930
931
        // general wpp conditions, top and top-right ctu have to be encoded
932
0
        if( encSlice->m_pcEncCfg->m_tileParallelCtuEnc && ctuPosY > 0 && slice.pps->getTileIdx( ctuPosX, ctuPosY ) != slice.pps->getTileIdx( ctuPosX, ctuPosY - 1 ) )
933
0
          ; // allow parallel processing of CTU-encoding on independent tiles
934
0
        else if( ctuPosY > 0                                  && processStates[ ctuRsAddr - ctuStride     ] <= CTU_ENCODE )
935
0
          return false;
936
0
        else if( ctuPosY > 0 && ctuPosX + 1 < pcv.widthInCtus && processStates[ ctuRsAddr - ctuStride + 1 ] <= CTU_ENCODE && !wppSyncEnabled )
937
0
          return false;
938
        
939
0
        if( checkReadyState )
940
0
          return true;
941
942
#ifdef TRACE_ENABLE_ITT
943
        std::stringstream ss;
944
        ss << "Encode_" << slice.poc << "_CTU_" << ctuPosY << "_" << ctuPosX;
945
        __itt_string_handle* itt_handle_ctuEncode = __itt_string_handle_create( ss.str().c_str() );
946
#endif
947
0
        ITT_TASKSTART( itt_domain_encode, itt_handle_ctuEncode );
948
949
0
        TileLineEncRsrc* lineEncRsrc = encSlice->m_TileLineEncRsrc[ lineIdx ];
950
0
        PerThreadRsrc* taskRsrc      = encSlice->m_ThreadRsrc[ threadIdx ];
951
0
        EncCu& encCu                 = taskRsrc->m_encCu;
952
953
0
        encCu.setCtuEncRsrc( &lineEncRsrc->m_CABACEstimator, &taskRsrc->m_CtxCache, &lineEncRsrc->m_ReuseUniMv, &lineEncRsrc->m_BlkUniMvInfoBuffer, &lineEncRsrc->m_AffineProfList, &lineEncRsrc->m_CachedBvs );
954
0
        encCu.encodeCtu( pic, lineEncRsrc->m_prevQp, ctuPosX, ctuPosY );
955
956
        // cleanup line memory when last ctu in line done to reduce overall memory consumption
957
0
        if( encSlice->m_pcEncCfg->m_ensureWppBitEqual && ( ctuPosX == pcv.widthInCtus - 1 || slice.pps->getTileIdx( ctuPosX, ctuPosY ) != slice.pps->getTileIdx( ctuPosX + 1, ctuPosY ) ) )
958
0
        {
959
0
          lineEncRsrc->m_AffineProfList    .resetAffineMVList();
960
0
          lineEncRsrc->m_BlkUniMvInfoBuffer.resetUniMvList();
961
0
          lineEncRsrc->m_ReuseUniMv        .resetReusedUniMvs();
962
0
          lineEncRsrc->m_CachedBvs         .resetIbcBvCand();
963
0
        }
964
965
0
        DTRACE_UPDATE( g_trace_ctx, std::make_pair( "final", 1 ) );
966
0
        ITT_TASKEND( itt_domain_encode, itt_handle_ctuEncode );
967
968
0
        processStates[ ctuRsAddr ] = RESHAPE_LF_VER;
969
0
      }
970
0
      break;
971
972
    // reshape + vertical loopfilter
973
0
    case RESHAPE_LF_VER:
974
0
      {
975
        // clip check to right tile border (CTU_ENCODE pre-processing delay due to IBC)
976
0
        const int tileCol = slice.pps->ctuToTileCol[ctuPosX];
977
0
        const int lastCtuPosXInTile = slice.pps->tileColBd[tileCol] + slice.pps->tileColWidth[tileCol] - 1;
978
0
        const int checkRight = std::min<int>( encSlice->m_ctuEncDelay, lastCtuPosXInTile - ctuPosX );
979
980
0
        const bool hasTiles = encSlice->m_pcEncCfg->m_tileParallelCtuEnc && slice.pps->getNumTiles() > 1;
981
982
        // need to check line above bcs of tiling, which allows CTU_ENCODE to run independently across tiles
983
0
        if( hasTiles )
984
0
        {
985
0
          if( ctuPosY > 0 )
986
0
          {
987
0
            for( int i = -!!ctuPosX; i <= checkRight; i++ )
988
0
              if( pps.canFilterCtuBdry( ctuPosX, ctuPosY, i, -1 ) && processStates[ctuRsAddr - ctuStride + i] <= CTU_ENCODE )
989
0
                return false;
990
0
          }
991
0
        }
992
        
993
        // ensure all surrounding ctu's are encoded (intra pred requires non-reshaped and unfiltered residual, IBC requires unfiltered samples too)
994
        // check right with max offset (due to WPP condition above, this implies top-right has been already encoded)
995
0
        for( int i = hasTiles ? -!!ctuPosX : checkRight; i <= checkRight; i++ )
996
0
          if( pps.canFilterCtuBdry( ctuPosX, ctuPosY, i, 0 ) && processStates[ctuRsAddr + i] <= CTU_ENCODE )
997
0
            return false;
998
999
        // check bottom right with 1 CTU delay (this is only required for intra pred)
1000
        // at the right picture border this will check the bottom CTU
1001
0
        const int checkBottomRight = std::min<int>( 1, lastCtuPosXInTile - ctuPosX );
1002
0
        if( checkCtuTaskNbBotRgt( pps, ctuPosX, ctuPosY, ctuRsAddr, processStates, CTU_ENCODE, checkBottomRight ) ) 
1003
0
          return false;
1004
1005
0
        if( checkReadyState )
1006
0
          return true;
1007
1008
0
        ITT_TASKSTART( itt_domain_encode, itt_handle_rspLfVer );
1009
1010
        // reshape
1011
0
        if( slice.sps->lumaReshapeEnable && slice.picHeader->lmcsEnabled )
1012
0
        {
1013
0
          PROFILER_EXT_ACCUM_AND_START_NEW_SET( 1, _TPROF, P_RESHAPER, &cs, CH_L );
1014
0
          PelBuf reco = pic->getRecoBuf( COMP_Y ).subBuf( x, y, width, height );
1015
0
          reco.rspSignal( pic->reshapeData.getInvLUT() );
1016
0
          PROFILER_EXT_ACCUM_AND_START_NEW_SET( 1, _TPROF, P_IGNORE, &cs, CH_L );
1017
0
        }
1018
1019
        // loopfilter
1020
0
        if( !cs.pps->deblockingFilterControlPresent || !cs.pps->deblockingFilterDisabled || cs.pps->deblockingFilterOverrideEnabled )
1021
0
        {
1022
0
          PROFILER_EXT_ACCUM_AND_START_NEW_SET( 1, _TPROF, P_DEBLOCK_FILTER, &cs, CH_L );
1023
          // calculate filter strengths
1024
0
          encSlice->m_pLoopFilter->calcFilterStrengthsCTU( cs, ctuArea, true );
1025
1026
          // vertical filter
1027
0
          PelUnitBuf reco = cs.picture->getRecoBuf();
1028
0
          encSlice->m_pLoopFilter->xDeblockArea<EDGE_VER>( cs, ctuArea, MAX_NUM_CH, reco );
1029
0
          PROFILER_EXT_ACCUM_AND_START_NEW_SET( 1, _TPROF, P_IGNORE, &cs, CH_L );
1030
0
        }
1031
1032
0
        ITT_TASKEND( itt_domain_encode, itt_handle_rspLfVer );
1033
1034
0
        processStates[ ctuRsAddr ] = LF_HOR;
1035
0
      }
1036
0
      break;
1037
1038
    // horizontal loopfilter
1039
0
    case LF_HOR:
1040
0
      {
1041
        // ensure horizontal ordering (from top to bottom)
1042
0
        if( checkCtuTaskNbTop   ( pps, ctuPosX, ctuPosY, ctuRsAddr, processStates, LF_HOR ) )         
1043
0
          return false;
1044
1045
        // ensure vertical loop filter of neighbor ctu's will not modify current residual
1046
        // check top, top-right and right ctu
1047
        // (top, top-right checked implicitly due to ordering check above)
1048
0
        if( checkCtuTaskNbRgt   ( pps, ctuPosX, ctuPosY, ctuRsAddr, processStates, RESHAPE_LF_VER ) ) 
1049
0
          return false;
1050
1051
0
        if( checkReadyState )
1052
0
          return true;
1053
1054
0
        ITT_TASKSTART( itt_domain_encode, itt_handle_lfHor );
1055
1056
0
        if( !cs.pps->deblockingFilterControlPresent || !cs.pps->deblockingFilterDisabled || cs.pps->deblockingFilterOverrideEnabled )
1057
0
        {
1058
0
          PROFILER_EXT_ACCUM_AND_START_NEW_SET( 1, _TPROF, P_DEBLOCK_FILTER, &cs, CH_L );
1059
0
          PelUnitBuf reco = cs.picture->getRecoBuf();
1060
0
          encSlice->m_pLoopFilter->xDeblockArea<EDGE_HOR>( cs, ctuArea, MAX_NUM_CH, reco );
1061
0
          PROFILER_EXT_ACCUM_AND_START_NEW_SET( 1, _TPROF, P_IGNORE, &cs, CH_L );
1062
0
        }
1063
1064
0
        ITT_TASKEND( itt_domain_encode, itt_handle_lfHor );
1065
1066
0
        processStates[ ctuRsAddr ] = SAO_FILTER;
1067
0
      }
1068
0
      break;
1069
1070
    // SAO filter
1071
0
    case SAO_FILTER:
1072
0
      {
1073
        // general wpp conditions, top and top-right ctu have to be filtered
1074
0
        if( checkCtuTaskNbTop   ( pps, ctuPosX, ctuPosY, ctuRsAddr, processStates, SAO_FILTER, true ) ) return false;
1075
0
        if( checkCtuTaskNbTopRgt( pps, ctuPosX, ctuPosY, ctuRsAddr, processStates, SAO_FILTER, true ) ) return false;
1076
1077
        // ensure loop filter of neighbor ctu's will not modify current residual
1078
        // sao processing dependents on +1 pixel to each side
1079
        // due to wpp condition above, only right, bottom and bottom-right ctu have to be checked
1080
0
        if( checkCtuTaskNbRgt   ( pps, ctuPosX, ctuPosY, ctuRsAddr, processStates, LF_HOR,    true ) ) return false;
1081
0
        if( checkCtuTaskNbBot   ( pps, ctuPosX, ctuPosY, ctuRsAddr, processStates, LF_HOR,    true ) ) return false;
1082
0
        if( checkCtuTaskNbBotRgt( pps, ctuPosX, ctuPosY, ctuRsAddr, processStates, LF_HOR, 1, true ) ) return false;
1083
1084
0
        if( checkReadyState )
1085
0
          return true;
1086
1087
0
        ITT_TASKSTART( itt_domain_encode, itt_handle_sao );
1088
1089
        // SAO filter
1090
0
        if( slice.sps->saoEnabled && pic->useSAO )
1091
0
        {
1092
0
          PROFILER_EXT_ACCUM_AND_START_NEW_SET( 1, _TPROF, P_SAO, &cs, CH_L );
1093
0
          TileLineEncRsrc* lineEncRsrc    = encSlice->m_TileLineEncRsrc[ lineIdx ];
1094
0
          PerThreadRsrc* taskRsrc         = encSlice->m_ThreadRsrc[ threadIdx ];
1095
0
          EncSampleAdaptiveOffset& encSao = lineEncRsrc->m_encSao;
1096
1097
0
          encSao.setCtuEncRsrc( &lineEncRsrc->m_SaoCABACEstimator, &taskRsrc->m_CtxCache );
1098
0
          encSao.storeCtuReco( cs, ctuArea, ctuPosX, ctuPosY );
1099
0
          encSao.getCtuStatistics( cs, encSlice->m_saoStatData, ctuArea, ctuRsAddr );
1100
0
          encSao.decideCtuParams( cs, encSlice->m_saoStatData, encSlice->m_saoEnabled, encSlice->m_saoAllDisabled, ctuArea, ctuRsAddr, &encSlice->m_saoReconParams[ 0 ], cs.picture->getSAO() );
1101
0
          PROFILER_EXT_ACCUM_AND_START_NEW_SET( 1, _TPROF, P_IGNORE, &cs, CH_L );
1102
0
        }
1103
1104
        // ALF border extension
1105
0
        if( cs.sps->alfEnabled )
1106
0
        {
1107
          // we have to do some kind of position aware boundary padding
1108
          // it's done here because the conditions are readable
1109
0
          PelUnitBuf recoBuf = cs.picture->getRecoBuf();
1110
0
          const int fltSize  = ( MAX_ALF_FILTER_LENGTH + 1 ) >> 1;
1111
0
          const int xL       = ( ctuPosX == 0 )                 ? ( x-fltSize       ) : ( x );
1112
0
          const int xR       = ( ctuPosX+1 == pcv.widthInCtus ) ? ( x+width+fltSize ) : ( x+width );
1113
1114
0
          if( ctuPosX == 0 )                  recoBuf.extendBorderPelLft( y, height, fltSize );
1115
0
          if( ctuPosX+1 == pcv.widthInCtus )  recoBuf.extendBorderPelRgt( y, height, fltSize );
1116
0
          if( ctuPosY == 0 )                  recoBuf.extendBorderPelTop( xL, xR-xL, fltSize );
1117
0
          if( ctuPosY+1 == pcv.heightInCtus ) recoBuf.extendBorderPelBot( xL, xR-xL, fltSize );
1118
1119
0
          encSlice->m_pALF->copyCTUforALF(cs, ctuPosX, ctuPosY);
1120
0
        }
1121
1122
        // DMVR refinement can be stored now
1123
0
        if( slice.sps->DMVR && !slice.picHeader->disDmvrFlag )
1124
0
        {
1125
0
          CS::setRefinedMotionFieldCTU( cs, ctuPosX, ctuPosY );
1126
0
        }
1127
0
        ITT_TASKEND( itt_domain_encode, itt_handle_sao );
1128
1129
0
        const int tileCol = slice.pps->ctuToTileCol[ctuPosX];
1130
0
        const int lastCtuColInTileRow = slice.pps->tileColBd[tileCol] + slice.pps->tileColWidth[tileCol] - 1;
1131
0
        if( ctuPosX == lastCtuColInTileRow )
1132
0
        {
1133
0
          processStates[ctuRsAddr] = ALF_GET_STATISTICS;
1134
0
        }
1135
0
        else
1136
0
        {
1137
0
          processStates[ctuRsAddr] = PROCESS_DONE;
1138
0
          return true;
1139
0
        }
1140
0
      }
1141
0
      break;
1142
1143
0
    case ALF_GET_STATISTICS:
1144
0
      {
1145
        // ensure all surrounding ctu's are filtered (ALF will use pixels of adjacent CTU's)
1146
        // due to wpp condition above in SAO_FILTER, only right, bottom and bottom-right ctu have to be checked
1147
0
        if( checkCtuTaskNbRgt   ( pps, ctuPosX, ctuPosY, ctuRsAddr, processStates, SAO_FILTER ) ) return false;
1148
0
        if( checkCtuTaskNbBot   ( pps, ctuPosX, ctuPosY, ctuRsAddr, processStates, SAO_FILTER ) ) return false;
1149
0
        if( checkCtuTaskNbBotRgt( pps, ctuPosX, ctuPosY, ctuRsAddr, processStates, SAO_FILTER ) ) return false;
1150
1151
0
        if( checkReadyState )
1152
0
          return true;
1153
1154
0
        ITT_TASKSTART( itt_domain_encode, itt_handle_alf_stat );
1155
1156
        // ALF pre-processing
1157
0
        if( slice.sps->alfEnabled )
1158
0
        {
1159
0
          PROFILER_EXT_ACCUM_AND_START_NEW_SET( 1, _TPROF, P_ALF, &cs, CH_L );
1160
0
          PelUnitBuf recoBuf = cs.picture->getRecoBuf();
1161
0
          const int firstCtuInRow = ctuRsAddr + 1 - slice.pps->tileColWidth[slice.pps->ctuToTileCol[ctuPosX]];
1162
0
          for( int ctu = firstCtuInRow; ctu <= ctuRsAddr; ctu++ )
1163
0
          {
1164
0
            encSlice->m_pALF->getStatisticsCTU( *cs.picture, cs, recoBuf, ctu, encSlice->m_ThreadRsrc[ threadIdx ]->m_alfTempCtuBuf );
1165
0
          }
1166
0
          PROFILER_EXT_ACCUM_AND_START_NEW_SET( 1, _TPROF, P_IGNORE, &cs, CH_L );
1167
0
        }
1168
1169
0
        ITT_TASKEND( itt_domain_encode, itt_handle_alf_stat );
1170
1171
        // start alf filter derivation either for a sub-set of CTUs (syncLines mode) or for the whole picture (regular mode)
1172
0
        const unsigned deriveFilterCtu = encSlice->m_alfDeriveCtu;
1173
0
        processStates[ctuRsAddr] = (ctuRsAddr < deriveFilterCtu) ? ALF_RECONSTRUCT: ALF_DERIVE_FILTER;
1174
0
      }
1175
0
      break;
1176
1177
0
    case ALF_DERIVE_FILTER:
1178
0
      {
1179
0
        const unsigned deriveFilterCtu = encSlice->m_alfDeriveCtu;
1180
0
        if( ctuRsAddr == deriveFilterCtu )
1181
0
        {
1182
          // ensure statistics from all previous ctu's have been collected
1183
0
          int numCheckLines = deriveFilterCtu / pcv.widthInCtus + 1;
1184
0
          for( int y = 0; y < numCheckLines; y++ )
1185
0
          {
1186
0
            for( int tileCol = 0; tileCol < slice.pps->numTileCols; tileCol++ )
1187
0
            {
1188
0
              const int lastCtuInTileRow = y * pcv.widthInCtus + slice.pps->tileColBd[tileCol] + slice.pps->tileColWidth[tileCol] - 1;
1189
0
              if( processStates[lastCtuInTileRow] <= ALF_GET_STATISTICS )
1190
0
                return false;
1191
0
            }
1192
0
          }
1193
0
        }
1194
0
        else if( syncLines )
1195
0
        {
1196
          // ALF bitstream coding dependency for the sub-sequent ctu-lines
1197
0
          if( processStates[deriveFilterCtu] < ALF_RECONSTRUCT || checkCtuTaskNbTop( pps, ctuPosX, ctuPosY, ctuRsAddr, processStates, ALF_DERIVE_FILTER ) ) 
1198
0
            return false;
1199
0
        }
1200
0
        if( checkReadyState )
1201
0
          return true;
1202
1203
0
        ITT_TASKSTART( itt_domain_encode, itt_handle_alf_derive );
1204
        // ALF post-processing
1205
0
        if( slice.sps->alfEnabled )
1206
0
        {
1207
0
          PROFILER_EXT_ACCUM_AND_START_NEW_SET( 1, _TPROF, P_ALF, &cs, CH_L );
1208
0
          if( ctuRsAddr == deriveFilterCtu )
1209
0
          {
1210
0
            encSlice->m_pALF->initDerivation( slice );
1211
0
            encSlice->m_pALF->deriveFilter( *cs.picture, cs, slice.getLambdas(), deriveFilterCtu + 1 );
1212
0
            encSlice->m_pALF->reconstructCoeffAPSs( cs, cs.slice->alfEnabled[COMP_Y], cs.slice->alfEnabled[COMP_Cb] || cs.slice->alfEnabled[COMP_Cr], false );
1213
0
          }
1214
0
          else if( syncLines )
1215
0
          {
1216
            // in sync lines mode: derive/select filter for the remaining lines
1217
0
            TileLineEncRsrc* lineEncRsrc = encSlice->m_TileLineEncRsrc[ lineIdx ];
1218
0
            PerThreadRsrc*   taskRsrc    = encSlice->m_ThreadRsrc[ threadIdx ];
1219
0
            const int firstCtuInRow = ctuRsAddr + 1 - slice.pps->tileColWidth[slice.pps->ctuToTileCol[ctuPosX]];
1220
0
            for(int ctu = firstCtuInRow; ctu <= ctuRsAddr; ctu++)
1221
0
            {
1222
0
              encSlice->m_pALF->selectFilterForCTU( cs, &lineEncRsrc->m_AlfCABACEstimator, &taskRsrc->m_CtxCache, ctu );
1223
0
            }
1224
0
          }
1225
0
          PROFILER_EXT_ACCUM_AND_START_NEW_SET( 1, _TPROF, P_IGNORE, &cs, CH_L );
1226
0
        }
1227
1228
0
        ITT_TASKEND( itt_domain_encode, itt_handle_alf_derive );
1229
0
        processStates[ ctuRsAddr ] = ALF_RECONSTRUCT;
1230
0
      }
1231
0
      break;
1232
1233
0
    case ALF_RECONSTRUCT:
1234
0
      {
1235
        // start alf filter derivation either for a sub-set of CTUs (syncLines mode) or for the whole picture (regular mode)
1236
0
        const unsigned deriveFilterCtu = encSlice->m_alfDeriveCtu;
1237
0
        if( processStates[deriveFilterCtu] < ALF_RECONSTRUCT )
1238
0
          return false;
1239
0
        else if( syncLines && ctuRsAddr > deriveFilterCtu && encSlice->m_pALF->getAsuHeightInCtus() > 1 )
1240
0
        {
1241
0
          const int asuHeightInCtus = encSlice->m_pALF->getAsuHeightInCtus();
1242
0
          const int botCtuLineInAsu = std::min( (( ctuPosY & ( ~(asuHeightInCtus - 1) ) ) + asuHeightInCtus - 1), (int)pcv.heightInCtus - 1 );
1243
0
          if( processStates[botCtuLineInAsu * ctuStride + ctuPosX] < ALF_RECONSTRUCT ) 
1244
0
            return false;
1245
0
        }
1246
1247
0
        if( checkReadyState )
1248
0
          return true;
1249
1250
0
        ITT_TASKSTART( itt_domain_encode, itt_handle_alf_recon );
1251
1252
0
        if( slice.sps->alfEnabled )
1253
0
        {
1254
0
          PROFILER_EXT_ACCUM_AND_START_NEW_SET( 1, _TPROF, P_ALF, &cs, CH_L );
1255
0
          const int firstCtuInRow = ctuRsAddr + 1 - slice.pps->tileColWidth[slice.pps->ctuToTileCol[ctuPosX]];
1256
0
          for( int ctu = firstCtuInRow; ctu <= ctuRsAddr; ctu++ )
1257
0
          {
1258
0
            encSlice->m_pALF->reconstructCTU_MT( *cs.picture, cs, ctu, encSlice->m_ThreadRsrc[ threadIdx ]->m_alfTempCtuBuf );
1259
0
          }
1260
0
          PROFILER_EXT_ACCUM_AND_START_NEW_SET( 1, _TPROF, P_IGNORE, &cs, CH_L );
1261
0
        }
1262
1263
0
        ITT_TASKEND( itt_domain_encode, itt_handle_alf_recon );
1264
0
        processStates[ctuRsAddr] = CCALF_GET_STATISTICS;
1265
0
      }
1266
      // dont break, no additional deps, can continue straigt away!
1267
      //break;
1268
1269
0
    case CCALF_GET_STATISTICS:
1270
0
      {
1271
0
        if( checkCtuTaskNbTop   ( pps, ctuPosX, ctuPosY, ctuRsAddr, processStates, ALF_RECONSTRUCT ) ) return false;
1272
0
        if( checkCtuTaskNbBot   ( pps, ctuPosX, ctuPosY, ctuRsAddr, processStates, ALF_RECONSTRUCT ) ) return false;
1273
1274
0
        if( checkReadyState )
1275
0
          return true;
1276
1277
0
        ITT_TASKSTART( itt_domain_encode, itt_handle_ccalf_stat );
1278
1279
        // ALF pre-processing
1280
0
        if( slice.sps->ccalfEnabled )
1281
0
        {
1282
0
          PROFILER_EXT_ACCUM_AND_START_NEW_SET( 1, _TPROF, P_ALF, &cs, CH_L);
1283
0
          const int firstCtuInRow = ctuRsAddr + 1 - slice.pps->tileColWidth[slice.pps->ctuToTileCol[ctuPosX]];
1284
0
          for( int ctu = firstCtuInRow; ctu <= ctuRsAddr; ctu++ )
1285
0
          {
1286
0
            encSlice->m_pALF->deriveStatsForCcAlfFilteringCTU( cs, COMP_Cb, ctu, encSlice->m_ThreadRsrc[ threadIdx ]->m_alfTempCtuBuf );
1287
0
            encSlice->m_pALF->deriveStatsForCcAlfFilteringCTU( cs, COMP_Cr, ctu, encSlice->m_ThreadRsrc[ threadIdx ]->m_alfTempCtuBuf );
1288
0
          }
1289
0
          PROFILER_EXT_ACCUM_AND_START_NEW_SET( 1, _TPROF, P_IGNORE, &cs, CH_L );
1290
0
        }
1291
1292
0
        ITT_TASKEND( itt_domain_encode, itt_handle_ccalf_stat );
1293
1294
        // start alf filter derivation either for a sub-set of CTUs (syncLines mode) or for the whole picture (regular mode)
1295
0
        processStates[ctuRsAddr] = (ctuRsAddr < encSlice->m_ccalfDeriveCtu) ? CCALF_RECONSTRUCT: CCALF_DERIVE_FILTER;
1296
0
      }
1297
0
      break;
1298
1299
0
    case CCALF_DERIVE_FILTER:
1300
0
      {
1301
        // synchronization dependencies
1302
0
        const unsigned deriveFilterCtu = encSlice->m_ccalfDeriveCtu;
1303
0
        if( ctuRsAddr == deriveFilterCtu )
1304
0
        {
1305
          // ensure statistics from all previous ctu's have been collected
1306
0
          int numCheckLines = deriveFilterCtu / pcv.widthInCtus + 1;
1307
0
          for( int y = 0; y < numCheckLines; y++ )
1308
0
          {
1309
0
            for( int tileCol = 0; tileCol < slice.pps->numTileCols; tileCol++ )
1310
0
            {
1311
0
              const int lastCtuInTileRow = y * pcv.widthInCtus + slice.pps->tileColBd[tileCol] + slice.pps->tileColWidth[tileCol] - 1;
1312
0
              if( processStates[lastCtuInTileRow] <= CCALF_GET_STATISTICS )
1313
0
                return false;
1314
0
            }
1315
0
          }
1316
0
        }
1317
0
        else if( syncLines )
1318
0
        {
1319
          // ALF bitstream coding dependency for the sub-sequent CTU-lines
1320
0
          if( processStates[deriveFilterCtu] < CCALF_RECONSTRUCT || checkCtuTaskNbTop( pps, ctuPosX, ctuPosY, ctuRsAddr, processStates, CCALF_DERIVE_FILTER ) ) 
1321
0
            return false;
1322
0
        }
1323
0
        if( checkReadyState )
1324
0
          return true;
1325
1326
0
        ITT_TASKSTART( itt_domain_encode, itt_handle_ccalf_derive );
1327
1328
        // start task
1329
0
        if( slice.sps->ccalfEnabled )
1330
0
        {
1331
0
          if( ctuRsAddr == deriveFilterCtu )
1332
0
          {
1333
0
            encSlice->m_pALF->deriveCcAlfFilter( *cs.picture, cs, encSlice->m_ccalfDeriveCtu + 1 );
1334
0
          }
1335
0
          else if( syncLines )
1336
0
          {
1337
            // in sync lines mode: derive/select filter for the remaining lines
1338
0
            TileLineEncRsrc* lineEncRsrc = encSlice->m_TileLineEncRsrc[ lineIdx ];
1339
0
            PerThreadRsrc*   taskRsrc    = encSlice->m_ThreadRsrc[ threadIdx ];
1340
0
            const int firstCtuInRow = ctuRsAddr + 1 - slice.pps->tileColWidth[slice.pps->ctuToTileCol[ctuPosX]];
1341
0
            encSlice->m_pALF->selectCcAlfFilterForCtuLine( cs, COMP_Cb, cs.getRecoBuf(), &lineEncRsrc->m_AlfCABACEstimator, &taskRsrc->m_CtxCache, firstCtuInRow, ctuRsAddr );
1342
0
            encSlice->m_pALF->selectCcAlfFilterForCtuLine( cs, COMP_Cr, cs.getRecoBuf(), &lineEncRsrc->m_AlfCABACEstimator, &taskRsrc->m_CtxCache, firstCtuInRow, ctuRsAddr );
1343
0
          }
1344
0
        }
1345
0
        ITT_TASKEND( itt_domain_encode, itt_handle_ccalf_derive );
1346
1347
0
        processStates[ctuRsAddr] = CCALF_RECONSTRUCT;
1348
0
      }
1349
0
      break;
1350
1351
0
    case CCALF_RECONSTRUCT:
1352
0
      {
1353
        // start ccalf filter derivation either for a sub-set of CTUs (syncLines mode) or for the whole picture (regular mode)
1354
0
        const unsigned deriveFilterCtu = encSlice->m_ccalfDeriveCtu;
1355
0
        if( processStates[deriveFilterCtu] < CCALF_RECONSTRUCT )
1356
0
          return false;
1357
1358
0
        if( syncLines )
1359
0
        {
1360
          // ensure line-by-line reconstruction due to line synchronization
1361
0
          if( checkCtuTaskNbTop( pps, ctuPosX, ctuPosY, ctuRsAddr, processStates, CCALF_RECONSTRUCT ) ) return false;
1362
          // check bottom due to rec. buffer usage in ccalf statistics
1363
0
          if( checkCtuTaskNbBot( pps, ctuPosX, ctuPosY, ctuRsAddr, processStates, CCALF_GET_STATISTICS ) ) return false;
1364
0
        }
1365
1366
0
        if( checkReadyState )
1367
0
          return true;
1368
1369
0
        ITT_TASKSTART( itt_domain_encode, itt_handle_ccalf_recon );
1370
1371
0
        if( slice.sps->ccalfEnabled )
1372
0
        {
1373
0
          const int firstCtuInRow = ctuRsAddr + 1 - slice.pps->tileColWidth[slice.pps->ctuToTileCol[ctuPosX]];
1374
0
          for( int ctu = firstCtuInRow; ctu <= ctuRsAddr; ctu++ )
1375
0
          {
1376
0
            encSlice->m_pALF->applyCcAlfFilterCTU( cs, COMP_Cb, ctu, encSlice->m_ThreadRsrc[ threadIdx ]->m_alfTempCtuBuf );
1377
0
            encSlice->m_pALF->applyCcAlfFilterCTU( cs, COMP_Cr, ctu, encSlice->m_ThreadRsrc[ threadIdx ]->m_alfTempCtuBuf );
1378
0
          }
1379
0
        }
1380
1381
0
        ITT_TASKEND( itt_domain_encode, itt_handle_ccalf_recon );
1382
1383
        // extend pic border
1384
        // CCALF reconstruction stage is done per tile, ensure that all tiles in current CTU row are done  
1385
0
        if( ++(pic->m_tileColsDone->at(ctuPosY)) >= pps.numTileCols )
1386
0
        {
1387
0
          PelUnitBuf recoBuf = cs.picture->getRecoBuf();
1388
0
          const int margin = cs.picture->margin;
1389
0
          recoBuf.extendBorderPelLft( y, height, margin );
1390
0
          recoBuf.extendBorderPelRgt( y, height, margin );
1391
0
          if(ctuPosY == 0)
1392
0
            recoBuf.extendBorderPelTop( -margin, pcv.lumaWidth + 2 * margin, margin );
1393
0
          if(ctuPosY + 1 == pcv.heightInCtus)
1394
0
            recoBuf.extendBorderPelBot( -margin, pcv.lumaWidth + 2 * margin, margin );
1395
1396
          // for IFP lines synchro, do an additional increment signaling that CTU row is ready
1397
0
          if( syncLines )
1398
0
            ++(pic->m_tileColsDone->at( ctuPosY ));
1399
0
        }
1400
1401
        // perform finish only once for whole picture
1402
0
        const unsigned finishCtu = pcv.sizeInCtus - 1;
1403
0
        if( ctuRsAddr < finishCtu )
1404
0
        {
1405
0
          processStates[ctuRsAddr] = PROCESS_DONE;
1406
          // processing done => terminate thread
1407
0
          return true;
1408
0
        }
1409
0
        processStates[ctuRsAddr] = FINISH_SLICE;
1410
0
      }
1411
1412
0
    case FINISH_SLICE:
1413
0
      {
1414
0
        CHECK( ctuRsAddr != pcv.sizeInCtus - 1, "invalid state, finish slice only once for last ctu" );
1415
1416
        // ensure all coding tasks have been done for all previous ctu's
1417
0
        for( int i = 0; i < ctuRsAddr; i++ )
1418
0
          if( processStates[ i ] < FINISH_SLICE )
1419
0
            return false;
1420
1421
0
        if( checkReadyState )
1422
0
          return true;
1423
1424
0
        encSlice->finishCompressSlice( cs.picture, slice );
1425
1426
0
        processStates[ ctuRsAddr ] = PROCESS_DONE;
1427
        // processing done => terminate thread
1428
0
        return true;
1429
0
      }
1430
1431
0
    case PROCESS_DONE:
1432
0
      CHECK( true, "process state is PROCESS_DONE, but thread is still running" );
1433
0
      return true;
1434
1435
0
    default:
1436
0
      CHECK( true, "unknown process state" );
1437
0
      return true;
1438
0
  }
1439
1440
0
  return false;
1441
0
}
Unexecuted instantiation: bool vvenc::EncSlice::xProcessCtuTask<false>(int, vvenc::CtuEncParam*)
Unexecuted instantiation: bool vvenc::EncSlice::xProcessCtuTask<true>(int, vvenc::CtuEncParam*)
1442
1443
void EncSlice::encodeSliceData( Picture* pic )
1444
0
{
1445
0
  CodingStructure& cs              = *pic->cs;
1446
0
  Slice* const slice               = cs.slice;
1447
0
  const uint32_t startCtuTsAddr    = slice->sliceMap.ctuAddrInSlice[0];
1448
0
  const uint32_t boundingCtuTsAddr = cs.pcv->sizeInCtus;
1449
0
  const bool wavefrontsEnabled     = slice->sps->entropyCodingSyncEnabled;
1450
1451
  // this ensures that independently encoded bitstream chunks can be combined to bit-equal
1452
0
  const SliceType cabacTableIdx = ! slice->pps->cabacInitPresent || slice->pendingRasInit ? slice->sliceType : m_encCABACTableIdx;
1453
0
  slice->encCABACTableIdx = cabacTableIdx;
1454
1455
  // initialise entropy coder for the slice
1456
0
  m_CABACWriter.initCtxModels( *slice );
1457
1458
0
  DTRACE( g_trace_ctx, D_HEADER, "=========== POC: %d ===========\n", slice->poc );
1459
1460
0
  int prevQP[MAX_NUM_CH];
1461
0
  prevQP[0] = prevQP[1] = slice->sliceQp;
1462
1463
0
  const PreCalcValues& pcv        = *cs.pcv;
1464
0
  const uint32_t widthInCtus      = pcv.widthInCtus;
1465
0
  uint32_t uiSubStrm              = 0;
1466
0
  const int numSubstreamsColumns  = slice->pps->numTileCols;
1467
0
  const int numSubstreamRows      = slice->sps->entropyCodingSyncEnabled ? pic->cs->pcv->heightInCtus : slice->pps->numTileRows;
1468
0
  const int numSubstreams         = std::max<int>( numSubstreamRows * numSubstreamsColumns, 0/*(int)pic->brickMap->bricks.size()*/ );
1469
0
  std::vector<OutputBitstream> substreamsOut( numSubstreams );
1470
1471
0
  slice->clearSubstreamSizes();
1472
1473
0
  for( uint32_t ctuTsAddr = startCtuTsAddr; ctuTsAddr < boundingCtuTsAddr; ctuTsAddr++ )
1474
0
  {
1475
0
    const uint32_t ctuRsAddr            = slice->sliceMap.ctuAddrInSlice[ctuTsAddr];
1476
0
    const uint32_t ctuXPosInCtus        = ctuRsAddr % widthInCtus;
1477
0
    const uint32_t ctuYPosInCtus        = ctuRsAddr / widthInCtus;
1478
0
    const uint32_t tileXPosInCtus       = slice->pps->tileColBd[cs.pps->ctuToTileCol[ctuXPosInCtus]];
1479
0
    const uint32_t tileYPosInCtus       = slice->pps->tileRowBd[cs.pps->ctuToTileRow[ctuYPosInCtus]];
1480
1481
0
    DTRACE_UPDATE( g_trace_ctx, std::make_pair( "ctu", ctuRsAddr ) );
1482
1483
0
    const Position pos (ctuXPosInCtus * pcv.maxCUSize, ctuYPosInCtus * pcv.maxCUSize);
1484
0
    const UnitArea ctuArea (cs.area.chromaFormat, Area(pos.x, pos.y, pcv.maxCUSize, pcv.maxCUSize));
1485
0
    CHECK( uiSubStrm >= numSubstreams, "array index out of bounds" );
1486
0
    m_CABACWriter.initBitstream( &substreamsOut[ uiSubStrm ] );
1487
1488
    // set up CABAC contexts' state for this CTU
1489
0
    if (ctuXPosInCtus == tileXPosInCtus && ctuYPosInCtus == tileYPosInCtus )
1490
0
    {
1491
0
      if (ctuTsAddr != startCtuTsAddr) // if it is the first CTU, then the entropy coder has already been reset
1492
0
      {
1493
0
        m_CABACWriter.initCtxModels( *slice );
1494
0
      }
1495
0
      prevQP[0] = prevQP[1] = slice->sliceQp;
1496
0
    }
1497
0
    else if (ctuXPosInCtus == tileXPosInCtus && wavefrontsEnabled)
1498
0
    {
1499
      // Synchronize cabac probabilities with upper-right CTU if it's available and at the start of a line.
1500
0
      if (ctuTsAddr != startCtuTsAddr) // if it is the first CTU, then the entropy coder has already been reset
1501
0
      {
1502
0
        m_CABACWriter.initCtxModels( *slice );
1503
0
      }
1504
0
      if( cs.getCURestricted( pos.offset( 0, -1 ), pos, slice->independentSliceIdx, slice->pps->getTileIdx( ctuXPosInCtus, ctuYPosInCtus ), CH_L, TREE_D ) )
1505
0
      {
1506
        // Top-right is available, so use it.
1507
0
        m_CABACWriter.getCtx() = m_entropyCodingSyncContextState;
1508
0
      }
1509
0
      prevQP[0] = prevQP[1] = slice->sliceQp;
1510
0
    }
1511
1512
0
    m_CABACWriter.coding_tree_unit( cs, ctuArea, prevQP, ctuRsAddr );
1513
1514
    // store probabilities of second CTU in line into buffer
1515
0
    if( ctuXPosInCtus == tileXPosInCtus && wavefrontsEnabled )
1516
0
    {
1517
0
      m_entropyCodingSyncContextState = m_CABACWriter.getCtx();
1518
0
    }
1519
1520
    // terminate the sub-stream, if required (end of slice-segment, end of tile, end of wavefront-CTU-row):
1521
0
    bool isMoreCTUsinSlice = ctuTsAddr != (boundingCtuTsAddr - 1);
1522
0
    bool isLastCTUinTile   = isMoreCTUsinSlice && slice->pps->getTileIdx( ctuRsAddr ) != slice->pps->getTileIdx( slice->sliceMap.ctuAddrInSlice[ctuTsAddr+1] );
1523
0
    bool isLastCTUinWPP    = wavefrontsEnabled && isMoreCTUsinSlice && !isLastCTUinTile && ( (slice->sliceMap.ctuAddrInSlice[ctuTsAddr+1] % widthInCtus) == cs.pps->tileColBd[cs.pps->ctuToTileCol[slice->sliceMap.ctuAddrInSlice[ctuTsAddr+1] % widthInCtus]] ); //TODO: adjust tile bound condition
1524
1525
0
    if (isLastCTUinWPP || !isMoreCTUsinSlice || isLastCTUinTile )         // this the the last CTU of either tile/brick/WPP/slice
1526
0
    {
1527
0
      m_CABACWriter.end_of_slice();
1528
1529
      // Byte-alignment in slice_data() when new tile
1530
0
      substreamsOut[ uiSubStrm ].writeByteAlignment();
1531
1532
0
      if (isMoreCTUsinSlice) //Byte alignment only when it is not the last substream in the slice
1533
0
      {
1534
        // write sub-stream size
1535
0
        slice->addSubstreamSize( ( substreamsOut[ uiSubStrm ].getNumberOfWrittenBits() >> 3 ) + substreamsOut[ uiSubStrm ].countStartCodeEmulations() );
1536
0
      }
1537
0
      uiSubStrm++;
1538
0
    }
1539
0
  } // CTU-loop
1540
1541
0
  if(slice->pps->cabacInitPresent)
1542
0
  {
1543
0
    m_encCABACTableIdx = m_CABACWriter.getCtxInitId( *slice );
1544
0
  }
1545
0
  else
1546
0
  {
1547
0
    m_encCABACTableIdx = slice->sliceType;
1548
0
  }
1549
1550
  // concatenate substreams
1551
0
  OutputBitstream& outStream = pic->sliceDataStreams[ 0/*slice->sliceIdx*/ ];
1552
0
  for ( int i = 0; i < slice->getNumberOfSubstreamSizes() + 1; i++ )
1553
0
  {
1554
0
    outStream.addSubstream( &(substreamsOut[ i ]) );
1555
0
  }
1556
0
  pic->sliceDataNumBins += m_CABACWriter.getNumBins();
1557
0
}
1558
1559
} // namespace vvenc
1560
1561
//! \}
1562