Coverage Report

Created: 2026-06-16 07:20

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/vvenc/source/Lib/EncoderLib/EncSlice.cpp
Line
Count
Source
1
/* -----------------------------------------------------------------------------
2
The copyright in this software is being made available under the Clear BSD
3
License, included below. No patent rights, trademark rights and/or 
4
other Intellectual Property Rights other than the copyrights concerning 
5
the Software are granted under this license.
6
7
The Clear BSD License
8
9
Copyright (c) 2019-2026, Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. & The VVenC Authors.
10
All rights reserved.
11
12
Redistribution and use in source and binary forms, with or without modification,
13
are permitted (subject to the limitations in the disclaimer below) provided that
14
the following conditions are met:
15
16
     * Redistributions of source code must retain the above copyright notice,
17
     this list of conditions and the following disclaimer.
18
19
     * Redistributions in binary form must reproduce the above copyright
20
     notice, this list of conditions and the following disclaimer in the
21
     documentation and/or other materials provided with the distribution.
22
23
     * Neither the name of the copyright holder nor the names of its
24
     contributors may be used to endorse or promote products derived from this
25
     software without specific prior written permission.
26
27
NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
28
THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
29
CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
31
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
32
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
33
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
34
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
35
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
36
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38
POSSIBILITY OF SUCH DAMAGE.
39
40
41
------------------------------------------------------------------------------------------- */
42
43
44
/** \file     EncSlice.cpp
45
    \brief    slice encoder class
46
*/
47
48
#include "EncSlice.h"
49
#include "EncStage.h"
50
#include "EncLib.h"
51
#include "EncPicture.h"
52
#include "BitAllocation.h"
53
#include "CommonLib/UnitTools.h"
54
#include "CommonLib/Picture.h"
55
#include "CommonLib/TimeProfiler.h"
56
#include "CommonLib/dtrace_codingstruct.h"
57
#include "Utilities/NoMallocThreadPool.h"
58
59
#include <math.h>
60
#include "vvenc/vvencCfg.h"
61
62
//! \ingroup EncoderLib
63
//! \{
64
65
namespace vvenc {
66
67
#ifdef TRACE_ENABLE_ITT
68
static const __itt_domain* itt_domain_encode              = __itt_domain_create( "Encode" );
69
static const __itt_string_handle* itt_handle_ctuEncode    = __itt_string_handle_create( "Encode_CTU" );
70
static const __itt_string_handle* itt_handle_rspLfVer     = __itt_string_handle_create( "RspLfVer_CTU" );
71
static const __itt_string_handle* itt_handle_lfHor        = __itt_string_handle_create( "LfHor_CTU" );
72
static const __itt_string_handle* itt_handle_sao          = __itt_string_handle_create( "SAO_CTU" );
73
static const __itt_string_handle* itt_handle_alf_stat     = __itt_string_handle_create( "ALF_CTU_STAT" );
74
static const __itt_string_handle* itt_handle_alf_derive   = __itt_string_handle_create( "ALF_DERIVE" );
75
static const __itt_string_handle* itt_handle_alf_recon    = __itt_string_handle_create( "ALF_RECONSTRUCT" );
76
static const __itt_string_handle* itt_handle_ccalf_stat   = __itt_string_handle_create( "CCALF_CTU_STAT" );
77
static const __itt_string_handle* itt_handle_ccalf_derive = __itt_string_handle_create( "CCALF_DERIVE" );
78
static const __itt_string_handle* itt_handle_ccalf_recon  = __itt_string_handle_create( "CCALF_RECONSTRUCT" );
79
#endif
80
81
void setArbitraryWppPattern( const PreCalcValues& pcv, std::vector<int>& ctuAddrMap, int stepX = 1 )
82
0
{
83
0
  ctuAddrMap.resize( pcv.sizeInCtus, 0 );
84
0
  std::vector<int> x_in_line( pcv.heightInCtus, 0 );
85
0
  int x = 0, y = 0, addr = 0;
86
0
  int y_top = 0;
87
0
  const int step = stepX; // number of CTUs in x-direction to scan 
88
0
  ctuAddrMap[addr++] = x++; // first entry (can be omitted)
89
0
  while( addr < pcv.sizeInCtus )
90
0
  {
91
    // fill entries in x-direction
92
0
    int x1 = x;
93
0
    while( x < std::min(x1 + step, (int)pcv.widthInCtus) )
94
0
    {
95
      // general WPP condition (top-right CTU availability)
96
0
      if( y > 0 && !( x_in_line[y - 1] - x >= 2 ) && x != pcv.widthInCtus - 1 )
97
0
        break;
98
0
      ctuAddrMap[addr++] = y*pcv.widthInCtus + x;
99
0
      x++;
100
0
    }
101
0
    x_in_line[y] = x;
102
        
103
0
    y += 1;
104
105
0
    if( y >= pcv.heightInCtus )
106
0
    {
107
      // go up
108
0
      if( x_in_line[y_top] >= pcv.widthInCtus )
109
0
      {
110
0
        y_top++;
111
0
        if( y_top >= pcv.heightInCtus )
112
0
        {
113
          // done
114
0
          break;
115
0
        }
116
0
      }
117
0
      y = y_top;
118
0
    }
119
0
    x = x_in_line[y];
120
121
0
    CHECK( y >= pcv.heightInCtus, "Height in CTUs is exceeded" );
122
0
  }
123
0
}
124
125
struct TileLineEncRsrc
126
{
127
  BitEstimator            m_BitEstimator;
128
  CABACWriter             m_CABACEstimator;
129
  BitEstimator            m_SaoBitEstimator;
130
  CABACWriter             m_SaoCABACEstimator;
131
  BitEstimator            m_AlfBitEstimator;
132
  CABACWriter             m_AlfCABACEstimator;
133
  ReuseUniMv              m_ReuseUniMv;
134
  BlkUniMvInfoBuffer      m_BlkUniMvInfoBuffer;
135
  AffineProfList          m_AffineProfList;
136
  IbcBvCand               m_CachedBvs;
137
  EncSampleAdaptiveOffset m_encSao;
138
  int                     m_prevQp[ MAX_NUM_CH ];
139
0
  TileLineEncRsrc( const VVEncCfg& encCfg ) : m_CABACEstimator( m_BitEstimator ), m_SaoCABACEstimator( m_SaoBitEstimator ), m_AlfCABACEstimator( m_AlfBitEstimator ) { m_AffineProfList.init( ! encCfg.m_picReordering ); }
140
};
141
142
struct PerThreadRsrc
143
{
144
  CtxCache  m_CtxCache;
145
  EncCu     m_encCu;
146
  PelStorage m_alfTempCtuBuf;
147
};
148
149
struct CtuEncParam
150
{
151
  Picture*  pic;
152
  EncSlice* encSlice;
153
  int       ctuRsAddr;
154
  int       ctuPosX;
155
  int       ctuPosY;
156
  UnitArea  ctuArea;
157
  int       tileLineResIdx;
158
159
0
  CtuEncParam() : pic( nullptr ), encSlice( nullptr ), ctuRsAddr( 0 ), ctuPosX( 0 ), ctuPosY( 0 ), ctuArea(), tileLineResIdx( 0 ) {}
160
  CtuEncParam( Picture* _p, EncSlice* _s, const int _r, const int _x, const int _y, const int _tileLineResIdx )
161
    : pic( _p )
162
    , encSlice( _s )
163
    , ctuRsAddr( _r )
164
    , ctuPosX( _x )
165
    , ctuPosY( _y )
166
    , ctuArea( pic->chromaFormat, pic->slices[0]->pps->pcv->getCtuArea( _x, _y ) )
167
0
    , tileLineResIdx( _tileLineResIdx ) {}
168
};
169
170
// ====================================================================================================================
171
// Constructor / destructor / create / destroy
172
// ====================================================================================================================
173
174
EncSlice::EncSlice()
175
0
  : m_pcEncCfg           ( nullptr)
176
0
  , m_threadPool         ( nullptr )
177
0
  , m_ctuTasksDoneCounter( nullptr )
178
0
  , m_ctuEncDelay        ( 1 )
179
0
  , m_pLoopFilter        ( nullptr )
180
0
  , m_pALF               ( nullptr )
181
0
  , m_pcRateCtrl         ( nullptr )
182
0
  , m_CABACWriter        ( m_BinEncoder )
183
0
  , m_encCABACTableIdx   ( VVENC_I_SLICE )
184
0
{
185
0
}
186
187
188
EncSlice::~EncSlice()
189
0
{
190
0
  for( auto* lnRsc : m_TileLineEncRsrc )
191
0
  {
192
0
    delete lnRsc;
193
0
  }
194
0
  m_TileLineEncRsrc.clear();
195
196
0
  for( auto* taskRsc: m_ThreadRsrc )
197
0
  {
198
0
    taskRsc->m_alfTempCtuBuf.destroy();
199
0
    delete taskRsc;
200
0
  }
201
0
  m_ThreadRsrc.clear();
202
203
0
  m_saoReconParams.clear();
204
205
0
  for( int i = 0; i < m_saoStatData.size(); i++ )
206
0
  {
207
0
    for( int compIdx = 0; compIdx < MAX_NUM_COMP; compIdx++ )
208
0
    {
209
0
      delete[] m_saoStatData[ i ][ compIdx ];
210
0
    }
211
0
    delete[] m_saoStatData[ i ];
212
0
  }
213
0
  m_saoStatData.clear();
214
0
}
215
216
void EncSlice::init( const VVEncCfg& encCfg,
217
                     const SPS& sps,
218
                     const PPS& pps,
219
                     std::vector<int>* const globalCtuQpVector,
220
                     LoopFilter& loopFilter,
221
                     EncAdaptiveLoopFilter& alf,
222
                     RateCtrl& rateCtrl,
223
                     NoMallocThreadPool* threadPool,
224
                     WaitCounter* ctuTasksDoneCounter )
225
0
{
226
0
  m_pcEncCfg            = &encCfg;
227
0
  m_pLoopFilter         = &loopFilter;
228
0
  m_pALF                = &alf;
229
0
  m_pcRateCtrl          = &rateCtrl;
230
0
  m_threadPool          = threadPool;
231
0
  m_ctuTasksDoneCounter = ctuTasksDoneCounter;
232
0
  m_syncPicCtx.resize( encCfg.m_entropyCodingSyncEnabled ? pps.getNumTileLineIds() : 0 );
233
234
  
235
0
  const int maxCntRscr = ( encCfg.m_numThreads > 0 ) ? pps.getNumTileLineIds() : 1;
236
0
  const int maxCtuEnc  = ( encCfg.m_numThreads > 0 && threadPool ) ? threadPool->numThreads() : 1;
237
238
0
  m_ThreadRsrc.resize( maxCtuEnc,  nullptr );
239
0
  m_TileLineEncRsrc.resize( maxCntRscr, nullptr );
240
241
0
  for( PerThreadRsrc*& taskRsc : m_ThreadRsrc )
242
0
  {
243
0
    taskRsc = new PerThreadRsrc();
244
0
    taskRsc->m_encCu.init( encCfg,
245
0
                           sps,
246
0
                           globalCtuQpVector,
247
0
                           m_syncPicCtx.data(),
248
0
                           &rateCtrl );
249
0
    taskRsc->m_alfTempCtuBuf.create( pps.pcv->chrFormat, Area( 0, 0, pps.pcv->maxCUSize + (MAX_ALF_PADDING_SIZE << 1), pps.pcv->maxCUSize + (MAX_ALF_PADDING_SIZE << 1) ), pps.pcv->maxCUSize, MAX_ALF_PADDING_SIZE, 0, false );
250
0
  }
251
252
0
  for( TileLineEncRsrc*& lnRsc : m_TileLineEncRsrc )
253
0
  {
254
0
    lnRsc = new TileLineEncRsrc( encCfg );
255
0
    if( sps.saoEnabled )
256
0
    {
257
0
      lnRsc->m_encSao.init( encCfg );
258
0
    }
259
0
  }
260
261
0
  const int sizeInCtus = pps.pcv->sizeInCtus;
262
0
  m_processStates = std::vector<ProcessCtuState>( sizeInCtus );
263
0
  m_saoReconParams.resize( sizeInCtus );
264
265
0
  ::memset( m_saoDisabledRate, 0, sizeof( m_saoDisabledRate ) );
266
267
  // sao statistics
268
0
  if( encCfg.m_bUseSAO )
269
0
  {
270
0
    m_saoStatData.resize( sizeInCtus );
271
0
    for( int i = 0; i < sizeInCtus; i++ )
272
0
    {
273
0
      m_saoStatData[ i ] = new SAOStatData*[ MAX_NUM_COMP ];
274
0
      for( int compIdx = 0; compIdx < MAX_NUM_COMP; compIdx++ )
275
0
      {
276
0
        m_saoStatData[ i ][ compIdx ] = new SAOStatData[ NUM_SAO_NEW_TYPES ];
277
0
      }
278
0
    }
279
0
  }
280
0
  ctuEncParams.resize( sizeInCtus );
281
0
  setArbitraryWppPattern( *pps.pcv, m_ctuAddrMap, 3 );
282
283
0
  const unsigned asuHeightInCtus = m_pALF->getAsuHeightInCtus();
284
0
  const unsigned numDeriveLines  = encCfg.m_ifpLines ? 
285
0
    std::min( ((encCfg.m_ifpLines & (~(asuHeightInCtus - 1))) + asuHeightInCtus), pps.pcv->heightInCtus ) : pps.pcv->heightInCtus;
286
0
  m_alfDeriveCtu  = numDeriveLines * pps.pcv->widthInCtus - 1;
287
0
  m_ccalfDeriveCtu = encCfg.m_ifpLines ? pps.pcv->widthInCtus * std::min((unsigned)encCfg.m_ifpLines + 1, pps.pcv->heightInCtus) - 1: pps.pcv->sizeInCtus - 1;
288
0
}
289
290
291
void EncSlice::initPic( Picture* pic )
292
0
{
293
0
  Slice* slice = pic->cs->slice;
294
295
0
  if( slice->pps->numTileCols * slice->pps->numTileRows > 1 )
296
0
  {
297
0
    slice->sliceMap = slice->pps->sliceMap[0];
298
0
  }
299
0
  else
300
0
  {
301
0
    slice->sliceMap.addCtusToSlice( 0, pic->cs->pcv->widthInCtus, 0, pic->cs->pcv->heightInCtus, pic->cs->pcv->widthInCtus);
302
0
  }
303
304
  // this ensures that independently encoded bitstream chunks can be combined to bit-equal
305
0
  const SliceType cabacTableIdx = ! slice->pps->cabacInitPresent || slice->pendingRasInit ? slice->sliceType : m_encCABACTableIdx;
306
0
  slice->encCABACTableIdx = cabacTableIdx;
307
308
  // set QP and lambda values
309
0
  xInitSliceLambdaQP( slice );
310
311
0
  for( auto* thrRsc : m_ThreadRsrc )
312
0
  {
313
0
    thrRsc->m_encCu.initPic( pic );
314
0
  }
315
316
0
  for( auto* lnRsc : m_TileLineEncRsrc )
317
0
  {
318
0
    lnRsc->m_ReuseUniMv.resetReusedUniMvs();
319
0
  }
320
321
0
  m_ctuEncDelay = 1;
322
0
  if( pic->useIBC )
323
0
  {
324
    // IBC needs unfiltered samples up to max IBC search range
325
    // therefore ensure that numCtuDelayLUT CTU's have been enocded first
326
    // assuming IBC localSearchRangeX / Y = 128
327
0
    const int numCtuDelayLUT[ 3 ] = { 15, 3, 1 };
328
0
    CHECK( pic->cs->pcv->maxCUSizeLog2 < 5 || pic->cs->pcv->maxCUSizeLog2 > 7, "invalid max CTUSize" );
329
0
    m_ctuEncDelay = numCtuDelayLUT[ pic->cs->pcv->maxCUSizeLog2 - 5 ];
330
0
  }
331
0
}
332
333
334
335
void EncSlice::xInitSliceLambdaQP( Slice* slice )
336
0
{
337
  // pre-compute lambda and QP
338
0
  const bool rcp = (m_pcEncCfg->m_RCTargetBitrate > 0 && slice->pic->picInitialQP >= 0); // 2nd pass
339
0
  int  iQP = Clip3 (-slice->sps->qpBDOffset[CH_L], MAX_QP, slice->pic->picInitialQP); // RC start QP
340
0
  double dQP     = (rcp ? (double) slice->pic->picInitialQP : xGetQPForPicture (slice));
341
0
  double dLambda = (rcp ? slice->pic->picInitialLambda : xCalculateLambda (slice, slice->TLayer, dQP, dQP, iQP));
342
0
  int sliceChromaQpOffsetIntraOrPeriodic[2] = { m_pcEncCfg->m_sliceChromaQpOffsetIntraOrPeriodic[0], m_pcEncCfg->m_sliceChromaQpOffsetIntraOrPeriodic[1] };
343
0
  const int lookAheadRCCQpOffset = 0;   // was (m_pcEncCfg->m_RCTargetBitrate > 0 && m_pcEncCfg->m_LookAhead && CS::isDualITree (*slice->pic->cs) ? 1 : 0);
344
0
  int cbQP = 0, crQP = 0, cbCrQP = 0;
345
346
0
  if (m_pcEncCfg->m_usePerceptQPA) // adapt sliceChromaQpOffsetIntraOrPeriodic and pic->ctuAdaptedQP
347
0
  {
348
0
    const bool cqp = (slice->isIntra() && !slice->sps->IBC) || (m_pcEncCfg->m_sliceChromaQpOffsetPeriodicity > 0 && (slice->poc % m_pcEncCfg->m_sliceChromaQpOffsetPeriodicity) == 0);
349
0
    const uint32_t startCtuTsAddr    = slice->sliceMap.ctuAddrInSlice[0];
350
0
    const uint32_t boundingCtuTsAddr = slice->pic->cs->pcv->sizeInCtus;
351
352
0
    if ((iQP = BitAllocation::applyQPAdaptationSlice (slice, m_pcEncCfg, iQP, dLambda, &slice->pic->picVA.visAct, // updates pic->picInitialQP
353
0
                                                      *m_ThreadRsrc[0]->m_encCu.getQpPtr(), m_pcRateCtrl->getIntraPQPAStats(),
354
0
                                                      (slice->pps->sliceChromaQpFlag && cqp ? sliceChromaQpOffsetIntraOrPeriodic : nullptr),
355
0
                                                      m_pcRateCtrl->getMinNoiseLevels(), startCtuTsAddr, boundingCtuTsAddr)) >= 0) // QP OK?
356
0
    {
357
0
      dLambda *= pow (2.0, ((double) iQP - dQP) / 3.0); // adjust lambda based on change of slice QP
358
0
    }
359
0
    else iQP = (int) dQP; // revert to unadapted slice QP
360
0
  }
361
0
  else if (rcp)
362
0
  {
363
0
    slice->pic->picInitialQP = -1; // no QPA - unused now
364
0
  }
365
366
0
  if (slice->pps->sliceChromaQpFlag && CS::isDualITree (*slice->pic->cs) && !m_pcEncCfg->m_usePerceptQPA && (m_pcEncCfg->m_sliceChromaQpOffsetPeriodicity == 0))
367
0
  {
368
0
    cbQP = m_pcEncCfg->m_chromaCbQpOffsetDualTree + lookAheadRCCQpOffset; // QP offset for dual-tree
369
0
    crQP = m_pcEncCfg->m_chromaCrQpOffsetDualTree + lookAheadRCCQpOffset;
370
0
    cbCrQP = m_pcEncCfg->m_chromaCbCrQpOffsetDualTree + lookAheadRCCQpOffset;
371
0
  }
372
0
  else if (slice->pps->sliceChromaQpFlag)
373
0
  {
374
0
    const GOPEntry &gopEntry             = *(slice->pic->gopEntry);
375
0
    const bool bUseIntraOrPeriodicOffset = (slice->isIntra() && !slice->sps->IBC) || (m_pcEncCfg->m_sliceChromaQpOffsetPeriodicity > 0 && (slice->poc % m_pcEncCfg->m_sliceChromaQpOffsetPeriodicity) == 0);
376
377
0
    cbQP = (bUseIntraOrPeriodicOffset ? sliceChromaQpOffsetIntraOrPeriodic[0] : gopEntry.m_CbQPoffset) + lookAheadRCCQpOffset;
378
0
    crQP = (bUseIntraOrPeriodicOffset ? sliceChromaQpOffsetIntraOrPeriodic[1] : gopEntry.m_CrQPoffset) + lookAheadRCCQpOffset;
379
0
    cbCrQP = (cbQP + crQP) >> 1; // use floor of average CbCr chroma QP offset for joint-CbCr coding
380
381
0
    cbQP = Clip3 (-12, 12, cbQP + slice->pps->chromaQpOffset[COMP_Cb]) - slice->pps->chromaQpOffset[COMP_Cb];
382
0
    crQP = Clip3 (-12, 12, crQP + slice->pps->chromaQpOffset[COMP_Cr]) - slice->pps->chromaQpOffset[COMP_Cr];
383
0
    cbCrQP = Clip3 (-12, 12, cbCrQP + slice->pps->chromaQpOffset[COMP_JOINT_CbCr]) - slice->pps->chromaQpOffset[COMP_JOINT_CbCr];
384
0
  }
385
386
0
  slice->sliceChromaQpDelta[COMP_Cb] = Clip3 (-12, 12, cbQP);
387
0
  slice->sliceChromaQpDelta[COMP_Cr] = Clip3 (-12, 12, crQP);
388
0
  slice->sliceChromaQpDelta[COMP_JOINT_CbCr] = (slice->sps->jointCbCr ? Clip3 (-12, 12, cbCrQP) : 0);
389
390
0
  for( auto& thrRsc : m_ThreadRsrc )
391
0
  {
392
0
    thrRsc->m_encCu.setUpLambda( *slice, dLambda, iQP, true, true );
393
0
  }
394
395
0
  slice->sliceQp            = iQP;
396
0
  slice->chromaQpAdjEnabled = slice->pps->chromaQpOffsetListLen > 0;
397
0
}
398
399
static const int highTL[6] = { -1, 0, 0, 2, 4, 5 };
400
401
int EncSlice::xGetQPForPicture( const Slice* slice )
402
0
{
403
0
  const int lumaQpBDOffset = slice->sps->qpBDOffset[ CH_L ];
404
0
  int qp;
405
406
0
  if ( m_pcEncCfg->m_costMode == VVENC_COST_LOSSLESS_CODING )
407
0
  {
408
0
    qp = LOSSLESS_AND_MIXED_LOSSLESS_RD_COST_TEST_QP;
409
0
  }
410
0
  else
411
0
  {
412
0
    qp = m_pcEncCfg->m_QP + slice->pic->gopAdaptedQP;
413
414
0
    if (m_pcEncCfg->m_usePerceptQPA)
415
0
    {
416
0
      const int tlayer = slice->pic->gopEntry->m_vtl;
417
418
0
      qp = (slice->isIntra() ? std::min (qp, ((qp - std::min (3, floorLog2 (m_pcEncCfg->m_GOPSize) - 4/*TODO 3 with JVET-AC0149?*/)) * 15 + 3) >> 4) : highTL[tlayer] + ((qp * (16 + std::min (2, tlayer))) >> 4) + 0/*TODO +-1?*/);
419
0
    }
420
0
    else if( slice->isIntra() )
421
0
    {
422
0
      qp += m_pcEncCfg->m_intraQPOffset;
423
0
    }
424
0
    else
425
0
    {
426
0
      if( qp != -lumaQpBDOffset )
427
0
      {
428
0
        const GOPEntry &gopEntry = *(slice->pic->gopEntry);
429
        // adjust QP according to the QP offset for the GOP entry.
430
0
        qp += gopEntry.m_QPOffset;
431
432
        // adjust QP according to QPOffsetModel for the GOP entry.
433
0
        double dqpOffset = qp * gopEntry.m_QPOffsetModelScale + gopEntry.m_QPOffsetModelOffset + 0.5;
434
0
        int qpOffset = (int)floor( Clip3<double>( 0.0, 3.0, dqpOffset ) );
435
0
        qp += qpOffset;
436
0
      }
437
0
    }
438
439
0
    if( m_pcEncCfg->m_blockImportanceMapping && !slice->pic->m_picShared->m_ctuBimQpOffset.empty() )
440
0
    {
441
0
      qp += slice->pic->m_picShared->m_picAuxQpOffset;
442
0
    }
443
0
  }
444
0
  qp = Clip3( -lumaQpBDOffset, MAX_QP, qp );
445
0
  return qp;
446
0
}
447
448
449
double EncSlice::xCalculateLambda( const Slice* slice,
450
                                   const int    depth, // slice GOP hierarchical depth.
451
                                   const double refQP, // initial slice-level QP
452
                                   const double dQP,   // initial double-precision QP
453
                                         int&   iQP )  // returned integer QP.
454
0
{
455
0
  const GOPEntry &gopEntry = *(slice->pic->gopEntry);
456
0
  const int SHIFT_QP       = 12;
457
0
  const int temporalId     = gopEntry.m_temporalId;
458
0
  std::vector<double> intraLambdaModifiers;
459
0
  for ( int i = 0; i < VVENC_MAX_TLAYER; i++ )
460
0
  {
461
0
    if( m_pcEncCfg->m_adIntraLambdaModifier[i] != 0.0 ) intraLambdaModifiers.push_back( m_pcEncCfg->m_adIntraLambdaModifier[i] );
462
0
    else break;
463
0
  }
464
465
0
  int bitdepth_luma_qp_scale = 6
466
0
                               * (slice->sps->bitDepths[ CH_L ] - 8
467
0
                                  - DISTORTION_PRECISION_ADJUSTMENT(slice->sps->bitDepths[ CH_L ]));
468
0
  double qp_temp = dQP + bitdepth_luma_qp_scale - SHIFT_QP;
469
  // Case #1: I or P-slices (key-frame)
470
0
  double dQPFactor = gopEntry.m_QPFactor;
471
0
  if( slice->sliceType == VVENC_I_SLICE )
472
0
  {
473
0
    if (m_pcEncCfg->m_dIntraQpFactor>=0.0 && gopEntry.m_sliceType != 'I')
474
0
    {
475
0
      dQPFactor = m_pcEncCfg->m_dIntraQpFactor;
476
0
    }
477
0
    else
478
0
    {
479
0
      dQPFactor = 0.57;
480
0
      if( ! m_pcEncCfg->m_lambdaFromQPEnable )
481
0
      {
482
0
        const int NumberBFrames = ( m_pcEncCfg->m_GOPSize - 1 );
483
0
        const double dLambda_scale = 1.0 - Clip3( 0.0, 0.5, 0.05 * (double)NumberBFrames );
484
0
        dQPFactor *= dLambda_scale;
485
0
      }
486
0
    }
487
0
  }
488
0
  else if( m_pcEncCfg->m_lambdaFromQPEnable )
489
0
  {
490
0
    dQPFactor=0.57;
491
0
  }
492
493
0
  double dLambda = dQPFactor*pow( 2.0, qp_temp/3.0 );
494
495
0
  if( !(m_pcEncCfg->m_lambdaFromQPEnable) && depth>0 )
496
0
  {
497
0
    double qp_temp_ref = refQP + bitdepth_luma_qp_scale - SHIFT_QP;
498
0
    dLambda *= Clip3(2.00, 4.00, (qp_temp_ref / 6.0));   // (j == B_SLICE && p_cur_frm->layer != 0 )
499
0
  }
500
501
  // if hadamard is used in ME process
502
0
  if ( !m_pcEncCfg->m_bUseHADME && slice->sliceType != VVENC_I_SLICE )
503
0
  {
504
0
    dLambda *= 0.95;
505
0
  }
506
507
0
  double lambdaModifier;
508
0
  if( slice->sliceType != VVENC_I_SLICE || intraLambdaModifiers.empty())
509
0
  {
510
0
    lambdaModifier = m_pcEncCfg->m_adLambdaModifier[ temporalId ];
511
0
  }
512
0
  else
513
0
  {
514
0
    lambdaModifier = intraLambdaModifiers[ (temporalId < intraLambdaModifiers.size()) ? temporalId : (intraLambdaModifiers.size()-1) ];
515
0
  }
516
0
  dLambda *= lambdaModifier;
517
518
0
  iQP = Clip3( -slice->sps->qpBDOffset[ CH_L ], MAX_QP, (int) floor( dQP + 0.5 ) );
519
520
0
  if( m_pcEncCfg->m_DepQuantEnabled )
521
0
  {
522
0
    dLambda *= pow( 2.0, 0.25/3.0 ); // slight lambda adjustment for dependent quantization (due to different slope of quantizer)
523
0
  }
524
525
  // NOTE: the lambda modifiers that are sometimes applied later might be best always applied in here.
526
0
  return dLambda;
527
0
}
528
529
530
// ====================================================================================================================
531
// Public member functions
532
// ====================================================================================================================
533
534
535
/** \param pic   picture class
536
 */
537
void EncSlice::compressSlice( Picture* pic )
538
0
{
539
0
  PROFILER_SCOPE_AND_STAGE( 1, g_timeProfiler, P_COMPRESS_SLICE );
540
0
  CodingStructure& cs         = *pic->cs;
541
0
  Slice* const slice          = cs.slice;
542
0
  uint32_t  startCtuTsAddr    = slice->sliceMap.ctuAddrInSlice[0];
543
0
  uint32_t  boundingCtuTsAddr = pic->cs->pcv->sizeInCtus;
544
545
0
  cs.pcv      = slice->pps->pcv;
546
0
  cs.fracBits = 0;
547
548
0
  if( startCtuTsAddr == 0 )
549
0
  {
550
0
    cs.initStructData( slice->sliceQp );
551
0
  }
552
553
0
  for( auto* thrRsrc : m_ThreadRsrc )
554
0
  {
555
0
    thrRsrc->m_encCu.initSlice( slice );
556
0
  }
557
558
0
  for( auto* lnRsrc : m_TileLineEncRsrc )
559
0
  {
560
0
    lnRsrc->m_CABACEstimator    .initCtxModels( *slice );
561
0
    lnRsrc->m_SaoCABACEstimator .initCtxModels( *slice );
562
0
    lnRsrc->m_AlfCABACEstimator .initCtxModels( *slice );
563
0
    lnRsrc->m_AffineProfList    .resetAffineMVList();
564
0
    lnRsrc->m_BlkUniMvInfoBuffer.resetUniMvList();
565
0
    lnRsrc->m_CachedBvs         .resetIbcBvCand();
566
567
0
    if( slice->sps->saoEnabled && pic->useSAO )
568
0
    {
569
0
      lnRsrc->m_encSao          .initSlice( slice );
570
0
    }
571
0
  }
572
573
0
  if( slice->sps->fpelMmvd && !slice->picHeader->disFracMMVD )
574
0
  {
575
0
    slice->picHeader->disFracMMVD = ( pic->lwidth() * pic->lheight() > 1920 * 1080 ) ? true : false;
576
0
  }
577
578
0
  xProcessCtus( pic, startCtuTsAddr, boundingCtuTsAddr );
579
0
}
580
581
void setJointCbCrModes( CodingStructure& cs, const Position topLeftLuma, const Size sizeLuma )
582
0
{
583
0
  bool              sgnFlag = true;
584
585
0
  if( isChromaEnabled( cs.picture->chromaFormat) )
586
0
  {
587
0
    const CompArea  cbArea  = CompArea( COMP_Cb, cs.picture->chromaFormat, Area(topLeftLuma,sizeLuma), true );
588
0
    const CompArea  crArea  = CompArea( COMP_Cr, cs.picture->chromaFormat, Area(topLeftLuma,sizeLuma), true );
589
590
0
    const CPelBuf   orgCb   = cs.picture->getFilteredOrigBuffer().valid() ? cs.picture->getRspOrigBuf( cbArea ): cs.picture->getOrigBuf( cbArea );
591
0
    const CPelBuf   orgCr   = cs.picture->getFilteredOrigBuffer().valid() ? cs.picture->getRspOrigBuf( crArea ): cs.picture->getOrigBuf( crArea );
592
0
    const int       x0      = ( cbArea.x > 0 ? 0 : 1 );
593
0
    const int       y0      = ( cbArea.y > 0 ? 0 : 1 );
594
0
    const int       x1      = ( cbArea.x + cbArea.width  < cs.picture->Cb().width  ? cbArea.width  : cbArea.width  - 1 );
595
0
    const int       y1      = ( cbArea.y + cbArea.height < cs.picture->Cb().height ? cbArea.height : cbArea.height - 1 );
596
0
    const int       cbs     = orgCb.stride;
597
0
    const int       crs     = orgCr.stride;
598
0
    const Pel*      pCb     = orgCb.buf + y0 * cbs;
599
0
    const Pel*      pCr     = orgCr.buf + y0 * crs;
600
0
    int64_t         sumCbCr = 0;
601
602
    // determine inter-chroma transform sign from correlation between high-pass filtered (i.e., zero-mean) Cb and Cr planes
603
0
    for( int y = y0; y < y1; y++, pCb += cbs, pCr += crs )
604
0
    {
605
0
      for( int x = x0; x < x1; x++ )
606
0
      {
607
0
        int cb = ( 12*(int)pCb[x] - 2*((int)pCb[x-1] + (int)pCb[x+1] + (int)pCb[x-cbs] + (int)pCb[x+cbs]) - ((int)pCb[x-1-cbs] + (int)pCb[x+1-cbs] + (int)pCb[x-1+cbs] + (int)pCb[x+1+cbs]) );
608
0
        int cr = ( 12*(int)pCr[x] - 2*((int)pCr[x-1] + (int)pCr[x+1] + (int)pCr[x-crs] + (int)pCr[x+crs]) - ((int)pCr[x-1-crs] + (int)pCr[x+1-crs] + (int)pCr[x-1+crs] + (int)pCr[x+1+crs]) );
609
0
        sumCbCr += cb*cr;
610
0
      }
611
0
    }
612
613
0
    sgnFlag = ( sumCbCr < 0 );
614
0
  }
615
616
0
  cs.slice->picHeader->jointCbCrSign = sgnFlag;
617
0
}
618
619
struct CtuPos
620
{
621
  const int ctuPosX;
622
  const int ctuPosY;
623
  const int ctuRsAddr;
624
625
0
  CtuPos( int _x, int _y, int _a ) : ctuPosX( _x ), ctuPosY( _y ), ctuRsAddr( _a ) {}
626
};
627
628
class CtuTsIterator
629
{
630
  private:
631
    const CodingStructure& cs;
632
    const int        m_startTsAddr;
633
    const int        m_endTsAddr;
634
    std::vector<int> m_ctuAddrMap;
635
          int        m_ctuTsAddr;
636
637
  private:
638
    int getNextTsAddr( const int _tsAddr ) const
639
0
    {
640
0
      const PreCalcValues& pcv  = *cs.pcv;
641
0
      const int startSliceRsRow = m_startTsAddr / pcv.widthInCtus;
642
0
      const int startSliceRsCol = m_startTsAddr % pcv.widthInCtus;
643
0
      const int endSliceRsRow   = (m_endTsAddr - 1) / pcv.widthInCtus;
644
0
      const int endSliceRsCol   = (m_endTsAddr - 1) % pcv.widthInCtus;
645
0
            int ctuTsAddr = _tsAddr;
646
0
      CHECK( ctuTsAddr > m_endTsAddr, "error: array index out of bounds" );
647
0
      while( ctuTsAddr < m_endTsAddr )
648
0
      {
649
0
        ctuTsAddr++;
650
0
        const int ctuRsAddr = ctuTsAddr; 
651
0
        if( cs.slice->pps->rectSlice
652
0
            && ( (ctuRsAddr / pcv.widthInCtus) < startSliceRsRow
653
0
              || (ctuRsAddr / pcv.widthInCtus) > endSliceRsRow
654
0
              || (ctuRsAddr % pcv.widthInCtus) < startSliceRsCol
655
0
              || (ctuRsAddr % pcv.widthInCtus) > endSliceRsCol ) )
656
0
          continue;
657
0
        break;
658
0
      }
659
0
      return ctuTsAddr;
660
0
    }
661
662
    int mapAddr( const int _addr ) const
663
0
    {
664
0
      if( _addr < 0 )
665
0
        return _addr;
666
0
      if( _addr >= m_ctuAddrMap.size() )
667
0
        return _addr;
668
0
      return m_ctuAddrMap[ _addr ];
669
0
    }
670
671
  public:
672
0
    CtuTsIterator( const CodingStructure& _cs, int _s, int _e,       std::vector<int>& _m         ) : cs( _cs ), m_startTsAddr( _s ), m_endTsAddr( _e ), m_ctuAddrMap( _m ), m_ctuTsAddr( _s ) {}
673
0
    CtuTsIterator( const CodingStructure& _cs, int _s, int _e, bool _wpp                          ) : cs( _cs ), m_startTsAddr( _s ), m_endTsAddr( _e ),                     m_ctuTsAddr( _s ) { if( _wpp ) setWppPattern(); }
674
0
    CtuTsIterator( const CodingStructure& _cs, int _s, int _e, const std::vector<int>& _m         ) : cs( _cs ), m_startTsAddr( _s ), m_endTsAddr( _e ), m_ctuAddrMap( _m ), m_ctuTsAddr( _s ) {}
675
0
    CtuTsIterator( const CodingStructure& _cs, int _s, int _e, const std::vector<int>& _m, int _c ) : cs( _cs ), m_startTsAddr( _s ), m_endTsAddr( _e ), m_ctuAddrMap( _m ), m_ctuTsAddr( std::max( _s, _c ) ) {}
676
0
    CtuTsIterator( const CodingStructure& _cs, int _s, int _e, const std::vector<int>* _m, bool _wpp ) : cs( _cs ), m_startTsAddr( _s ), m_endTsAddr( _e ), m_ctuTsAddr( _s ) {  if( _wpp ) m_ctuAddrMap = *_m;  }
677
678
0
    virtual ~CtuTsIterator() { m_ctuAddrMap.clear(); }
679
680
0
    CtuTsIterator& operator++()                { m_ctuTsAddr = getNextTsAddr( m_ctuTsAddr ); return *this; }
681
0
    CtuTsIterator  operator++(int)             { auto retval = *this; ++(*this); return retval; }
682
0
    bool operator==(CtuTsIterator other) const { return m_ctuTsAddr == other.m_ctuTsAddr; }
683
0
    bool operator!=(CtuTsIterator other) const { return m_ctuTsAddr != other.m_ctuTsAddr; }
684
0
    CtuPos operator*()                   const { const int ctuRsAddr = mapAddr( m_ctuTsAddr );  return CtuPos( ctuRsAddr % cs.pcv->widthInCtus, ctuRsAddr / cs.pcv->widthInCtus, ctuRsAddr ); }
685
686
0
    CtuTsIterator begin() { return CtuTsIterator( cs, m_startTsAddr, m_endTsAddr, m_ctuAddrMap ); };
687
0
    CtuTsIterator end()   { return CtuTsIterator( cs, m_startTsAddr, m_endTsAddr, m_ctuAddrMap, m_endTsAddr ); };
688
689
    using iterator_category = std::forward_iterator_tag;
690
    using value_type        = int;
691
    using pointer           = int*;
692
    using reference         = int&;
693
    using difference_type   = ptrdiff_t;
694
695
    void setWppPattern()
696
0
    {
697
0
      const PreCalcValues& pcv = *cs.pcv;
698
0
      m_ctuAddrMap.resize( pcv.sizeInCtus, 0 );
699
0
      int addr = 0;
700
0
      for( int i = 1; i < pcv.sizeInCtus; i++ )
701
0
      {
702
0
        int x = addr % pcv.widthInCtus;
703
0
        int y = addr / pcv.widthInCtus;
704
0
        x -= 1;
705
0
        y += 1;
706
0
        if( x < 0 || y >= pcv.heightInCtus )
707
0
        {
708
0
          x += 1 + y;
709
0
          y  = 0;
710
0
        }
711
0
        if( x >= pcv.widthInCtus )
712
0
        {
713
0
          y += ( x - pcv.widthInCtus ) + 1;
714
0
          x  = pcv.widthInCtus - 1;
715
0
        }
716
0
        addr = y * pcv.widthInCtus + x;
717
0
        m_ctuAddrMap[ i ] = addr;
718
0
      }
719
0
    }
720
};
721
722
void EncSlice::saoDisabledRate( CodingStructure& cs, SAOBlkParam* reconParams )
723
0
{
724
0
  EncSampleAdaptiveOffset::disabledRate( cs, m_saoDisabledRate, reconParams, m_pcEncCfg->m_saoEncodingRate, m_pcEncCfg->m_saoEncodingRateChroma, m_pcEncCfg->m_internChromaFormat );
725
0
}
726
727
void EncSlice::finishCompressSlice( Picture* pic, Slice& slice )
728
0
{
729
0
  CodingStructure& cs = *pic->cs;
730
731
  // finalize
732
0
  if( slice.sps->saoEnabled && pic->useSAO )
733
0
  {
734
    // store disabled statistics
735
0
    if( !m_pcEncCfg->m_numThreads )
736
0
      saoDisabledRate( cs, &m_saoReconParams[ 0 ] );
737
738
    // set slice header flags
739
0
    CHECK( m_saoEnabled[ COMP_Cb ] != m_saoEnabled[ COMP_Cr ], "Unspecified error");
740
0
    for( auto s : pic->slices )
741
0
    {
742
0
      s->saoEnabled[ CH_L ] = m_saoEnabled[ COMP_Y  ];
743
0
      s->saoEnabled[ CH_C ] = m_saoEnabled[ COMP_Cb ];
744
0
    }
745
0
  }
746
0
}
747
748
void EncSlice::xProcessCtus( Picture* pic, const unsigned startCtuTsAddr, const unsigned boundingCtuTsAddr )
749
0
{
750
0
  PROFILER_SCOPE_TOP_LEVEL_EXT( 1, g_timeProfiler, P_IGNORE, pic->cs );
751
0
  CodingStructure& cs      = *pic->cs;
752
0
  Slice&           slice   = *cs.slice;
753
0
  const PreCalcValues& pcv = *cs.pcv;
754
755
  // initialization
756
0
  if( slice.sps->jointCbCr )
757
0
  {
758
0
    setJointCbCrModes( cs, Position(0, 0), cs.area.lumaSize() );
759
0
  }
760
761
0
  if( slice.sps->saoEnabled && pic->useSAO )
762
0
  {
763
    // check SAO enabled or disabled
764
0
    EncSampleAdaptiveOffset::decidePicParams( cs, m_saoDisabledRate, m_saoEnabled, m_pcEncCfg->m_saoEncodingRate, m_pcEncCfg->m_saoEncodingRateChroma, m_pcEncCfg->m_internChromaFormat );
765
766
0
    m_saoAllDisabled = true;
767
0
    for( int compIdx = 0; compIdx < getNumberValidComponents( pcv.chrFormat ); compIdx++ )
768
0
    {
769
0
      m_saoAllDisabled &= ! m_saoEnabled[ compIdx ];
770
0
    }
771
772
0
    std::fill( m_saoReconParams.begin(), m_saoReconParams.end(), SAOBlkParam() );
773
0
  }
774
0
  else
775
0
  {
776
0
    m_saoAllDisabled = true;
777
0
  }
778
779
0
  if( slice.sps->alfEnabled )
780
0
  {
781
0
    m_pALF->initEncProcess( slice );
782
0
  }
783
784
0
  std::fill( m_processStates.begin(), m_processStates.end(), CTU_ENCODE );
785
786
  // fill encoder parameter list
787
0
  int idx = 0;
788
0
  const std::vector<int> base = slice.sliceMap.ctuAddrInSlice;
789
0
  auto ctuIter = CtuTsIterator( cs, startCtuTsAddr, boundingCtuTsAddr, &m_ctuAddrMap, m_pcEncCfg->m_numThreads > 0 );
790
0
  for( auto ctuPos : ctuIter )
791
0
  {
792
0
    ctuEncParams[ idx ].pic       = pic;
793
0
    ctuEncParams[ idx ].encSlice  = this;
794
0
    ctuEncParams[ idx ].ctuRsAddr = ctuPos.ctuRsAddr;
795
0
    ctuEncParams[ idx ].ctuPosX   = ctuPos.ctuPosX;
796
0
    ctuEncParams[ idx ].ctuPosY   = ctuPos.ctuPosY;
797
0
    ctuEncParams[ idx ].ctuArea   = UnitArea( pic->chromaFormat, slice.pps->pcv->getCtuArea( ctuPos.ctuPosX, ctuPos.ctuPosY ) );
798
799
0
    if( m_pcEncCfg->m_numThreads > 0 )
800
0
    {
801
0
      ctuEncParams[idx].tileLineResIdx = slice.pps->getTileLineId( ctuPos.ctuPosX, ctuPos.ctuPosY );
802
0
    }
803
0
    else
804
0
    {
805
0
      ctuEncParams[idx].tileLineResIdx = 0;
806
0
    }
807
0
    idx++;
808
0
  }
809
810
  //for( int i = 0; i < idx; i++ )
811
  //{
812
  //  for( int j = i; j < idx; j++ )
813
  //  {
814
  //    if( ctuEncParams[i].tileLineResIdx != ctuEncParams[j].tileLineResIdx ) continue;
815
  //
816
  //    CHECK( ctuEncParams[i].ctuPosY != ctuEncParams[j].ctuPosY, "Not the same CTU line!" );
817
  //    CHECK( slice.pps->getTileIdx( ctuEncParams[i].ctuPosX, ctuEncParams[i].ctuPosY ) != slice.pps->getTileIdx( ctuEncParams[j].ctuPosX, ctuEncParams[j].ctuPosY ), "Not the same tile!" );
818
  //  }
819
  //}
820
821
0
  CHECK( idx != pcv.sizeInCtus, "array index out of bounds" );
822
823
  // process ctu's until last ctu is done
824
0
  if( m_pcEncCfg->m_numThreads > 0 )
825
0
  {
826
0
    for( auto& ctuEncParam : ctuEncParams )
827
0
    {
828
0
      m_threadPool->addBarrierTask( EncSlice::xProcessCtuTask<false>,
829
0
                                    &ctuEncParam,
830
0
                                    m_ctuTasksDoneCounter,
831
0
                                    nullptr,
832
0
                                    {},
833
0
                                    EncSlice::xProcessCtuTask<true> );
834
0
    }
835
0
  }
836
0
  else
837
0
  {
838
0
    do
839
0
    {
840
0
      for( auto& ctuEncParam : ctuEncParams )
841
0
      {
842
0
        if( m_processStates[ctuEncParam.ctuRsAddr] != PROCESS_DONE )
843
0
          EncSlice::xProcessCtuTask<false>( 0, &ctuEncParam );
844
0
      }
845
0
      DTRACE_PIC_COMP_COND( m_processStates[ 0 ] == SAO_FILTER && m_processStates[ boundingCtuTsAddr - 1 ] == SAO_FILTER, D_REC_CB_LUMA_LF,   cs, cs.getRecoBuf(), COMP_Y  );
846
0
      DTRACE_PIC_COMP_COND( m_processStates[ 0 ] == SAO_FILTER && m_processStates[ boundingCtuTsAddr - 1 ] == SAO_FILTER, D_REC_CB_CHROMA_LF, cs, cs.getRecoBuf(), COMP_Cb );
847
0
      DTRACE_PIC_COMP_COND( m_processStates[ 0 ] == SAO_FILTER && m_processStates[ boundingCtuTsAddr - 1 ] == SAO_FILTER, D_REC_CB_CHROMA_LF, cs, cs.getRecoBuf(), COMP_Cr );
848
0
      DTRACE_PIC_COMP_COND( m_processStates[ 0 ] == ALF_GET_STATISTICS && m_processStates[ boundingCtuTsAddr - 1 ] == ALF_GET_STATISTICS, D_REC_CB_LUMA_SAO,   cs, cs.getRecoBuf(), COMP_Y  );
849
0
      DTRACE_PIC_COMP_COND( m_processStates[ 0 ] == ALF_GET_STATISTICS && m_processStates[ boundingCtuTsAddr - 1 ] == ALF_GET_STATISTICS, D_REC_CB_CHROMA_SAO, cs, cs.getRecoBuf(), COMP_Cb );
850
0
      DTRACE_PIC_COMP_COND( m_processStates[ 0 ] == ALF_GET_STATISTICS && m_processStates[ boundingCtuTsAddr - 1 ] == ALF_GET_STATISTICS, D_REC_CB_CHROMA_SAO, cs, cs.getRecoBuf(), COMP_Cr );
851
0
    }
852
0
    while( m_processStates[ boundingCtuTsAddr - 1 ] != PROCESS_DONE );
853
0
  }
854
0
}
855
856
inline bool checkCtuTaskNbTop( const PPS& pps, const int& ctuPosX, const int& ctuPosY, const int& ctuRsAddr, const ProcessCtuState* processStates, const TaskType tskType, bool override = false )
857
0
{
858
0
  return ctuPosY > 0 && ( override || pps.canFilterCtuBdry( ctuPosX, ctuPosY, 0, -1 ) ) && processStates[ ctuRsAddr - pps.pcv->widthInCtus ] <= tskType;
859
0
}
860
861
inline bool checkCtuTaskNbBot( const PPS& pps, const int& ctuPosX, const int& ctuPosY, const int& ctuRsAddr, const ProcessCtuState* processStates, const TaskType tskType, bool override = false )
862
0
{
863
0
  return ctuPosY + 1 < pps.pcv->heightInCtus && ( override || pps.canFilterCtuBdry( ctuPosX, ctuPosY, 0, 1 ) ) && processStates[ ctuRsAddr     + pps.pcv->widthInCtus ] <= tskType;
864
0
}
865
866
inline bool checkCtuTaskNbRgt( const PPS& pps, const int& ctuPosX, const int& ctuPosY, const int& ctuRsAddr, const ProcessCtuState* processStates, const TaskType tskType, bool override = false )
867
0
{
868
0
  return ctuPosX + 1 < pps.pcv->widthInCtus && ( override || pps.canFilterCtuBdry( ctuPosX, ctuPosY, 1, 0 ) ) && processStates[ ctuRsAddr + 1 ] <= tskType;
869
0
}
870
871
inline bool checkCtuTaskNbTopRgt( const PPS& pps, const int& ctuPosX, const int& ctuPosY, const int& ctuRsAddr, const ProcessCtuState* processStates, const TaskType tskType, bool override = false )
872
0
{
873
0
  return ctuPosY > 0 && ctuPosX + 1 < pps.pcv->widthInCtus && ( override || pps.canFilterCtuBdry( ctuPosX, ctuPosY, 1, -1 ) ) && processStates[ ctuRsAddr - pps.pcv->widthInCtus + 1 ] <= tskType;
874
0
}
875
876
inline bool checkCtuTaskNbBotRgt( const PPS& pps, const int& ctuPosX, const int& ctuPosY, const int& ctuRsAddr, const ProcessCtuState* processStates, const TaskType tskType, const int rightOffset = 1, bool override = false )
877
0
{
878
0
  return ctuPosX + rightOffset < pps.pcv->widthInCtus && ctuPosY + 1 < pps.pcv->heightInCtus && ( override || pps.canFilterCtuBdry( ctuPosX, ctuPosY, rightOffset, 1 ) ) && processStates[ ctuRsAddr + rightOffset + pps.pcv->widthInCtus ] <= tskType;
879
0
}
880
881
template<bool checkReadyState>
882
bool EncSlice::xProcessCtuTask( int threadIdx, void* taskParam )
883
0
{
884
0
  CtuEncParam* ctuEncParam       = static_cast<CtuEncParam*>( taskParam );
885
0
  Picture* pic                   = ctuEncParam->pic;
886
0
  EncSlice* encSlice             = ctuEncParam->encSlice;
887
0
  CodingStructure& cs            = *pic->cs;
888
0
  Slice&           slice         = *cs.slice;
889
0
  const PPS&       pps           = *slice.pps;
890
0
  const PreCalcValues& pcv       = *cs.pcv;
891
0
  const int ctuRsAddr            = ctuEncParam->ctuRsAddr;
892
0
  const int ctuPosX              = ctuEncParam->ctuPosX;
893
0
  const int ctuPosY              = ctuEncParam->ctuPosY;
894
0
  const int x                    = ctuPosX << pcv.maxCUSizeLog2;
895
0
  const int y                    = ctuPosY << pcv.maxCUSizeLog2;
896
0
  const int width                = std::min( pcv.maxCUSize, pcv.lumaWidth  - x );
897
0
  const int height               = std::min( pcv.maxCUSize, pcv.lumaHeight - y );
898
0
  const int ctuStride            = pcv.widthInCtus;
899
0
  const int lineIdx              = ctuEncParam->tileLineResIdx;
900
0
  ProcessCtuState* processStates = encSlice->m_processStates.data();
901
0
  const UnitArea& ctuArea        = ctuEncParam->ctuArea;
902
0
  const bool wppSyncEnabled      = cs.sps->entropyCodingSyncEnabled;
903
0
  const TaskType currState       = processStates[ ctuRsAddr ];
904
0
  const unsigned syncLines       = encSlice->m_pcEncCfg->m_ifpLines;
905
906
0
  DTRACE_UPDATE( g_trace_ctx, std::make_pair( "poc", cs.slice->poc ) );
907
0
  DTRACE_UPDATE( g_trace_ctx, std::make_pair( "ctu", ctuRsAddr ) );
908
0
  DTRACE_UPDATE( g_trace_ctx, std::make_pair( "final", processStates[ ctuRsAddr ] == CTU_ENCODE ? 0 : 1 ) );
909
910
  // process ctu's line wise from left to right
911
0
  const bool tileParallel = encSlice->m_pcEncCfg->m_tileParallelCtuEnc;
912
0
  if( tileParallel && currState == CTU_ENCODE && ctuPosX > 0 && slice.pps->getTileIdx( ctuPosX, ctuPosY ) != slice.pps->getTileIdx( ctuPosX - 1, ctuPosY ) )
913
0
    ; // for CTU_ENCODE on tile boundaries, allow parallel processing of tiles
914
0
  else if( ctuPosX > 0 && processStates[ ctuRsAddr - 1 ] <= currState && currState < PROCESS_DONE )
915
0
    return false;
916
917
0
  switch( currState )
918
0
  {
919
    // encode
920
0
    case CTU_ENCODE:
921
0
      {
922
        // CTU line-wise inter-frame parallel processing synchronization
923
0
        if( syncLines )
924
0
        {
925
0
          const bool lineStart = ctuPosX == 0 || ( tileParallel && slice.pps->getTileIdx( ctuPosX, ctuPosY ) != slice.pps->getTileIdx( ctuPosX - 1, ctuPosY ) );
926
0
          if( lineStart && !refPicCtuLineReady( slice, ctuPosY + (int)syncLines, pcv ) )
927
0
          {
928
0
            return false;
929
0
          }
930
0
        }
931
932
        // general wpp conditions, top and top-right ctu have to be encoded
933
0
        if( encSlice->m_pcEncCfg->m_tileParallelCtuEnc && ctuPosY > 0 && slice.pps->getTileIdx( ctuPosX, ctuPosY ) != slice.pps->getTileIdx( ctuPosX, ctuPosY - 1 ) )
934
0
          ; // allow parallel processing of CTU-encoding on independent tiles
935
0
        else if( ctuPosY > 0                                  && processStates[ ctuRsAddr - ctuStride     ] <= CTU_ENCODE )
936
0
          return false;
937
0
        else if( ctuPosY > 0 && ctuPosX + 1 < pcv.widthInCtus && processStates[ ctuRsAddr - ctuStride + 1 ] <= CTU_ENCODE && !wppSyncEnabled )
938
0
          return false;
939
        
940
0
        if( checkReadyState )
941
0
          return true;
942
943
#ifdef TRACE_ENABLE_ITT
944
        std::stringstream ss;
945
        ss << "Encode_" << slice.poc << "_CTU_" << ctuPosY << "_" << ctuPosX;
946
        __itt_string_handle* itt_handle_ctuEncode = __itt_string_handle_create( ss.str().c_str() );
947
#endif
948
0
        ITT_TASKSTART( itt_domain_encode, itt_handle_ctuEncode );
949
950
0
        TileLineEncRsrc* lineEncRsrc = encSlice->m_TileLineEncRsrc[ lineIdx ];
951
0
        PerThreadRsrc* taskRsrc      = encSlice->m_ThreadRsrc[ threadIdx ];
952
0
        EncCu& encCu                 = taskRsrc->m_encCu;
953
954
0
        encCu.setCtuEncRsrc( &lineEncRsrc->m_CABACEstimator, &taskRsrc->m_CtxCache, &lineEncRsrc->m_ReuseUniMv, &lineEncRsrc->m_BlkUniMvInfoBuffer, &lineEncRsrc->m_AffineProfList, &lineEncRsrc->m_CachedBvs );
955
0
        encCu.encodeCtu( pic, lineEncRsrc->m_prevQp, ctuPosX, ctuPosY );
956
957
        // cleanup line memory when last ctu in line done to reduce overall memory consumption
958
0
        if( encSlice->m_pcEncCfg->m_ensureWppBitEqual && ( ctuPosX == pcv.widthInCtus - 1 || slice.pps->getTileIdx( ctuPosX, ctuPosY ) != slice.pps->getTileIdx( ctuPosX + 1, ctuPosY ) ) )
959
0
        {
960
0
          lineEncRsrc->m_AffineProfList    .resetAffineMVList();
961
0
          lineEncRsrc->m_BlkUniMvInfoBuffer.resetUniMvList();
962
0
          lineEncRsrc->m_ReuseUniMv        .resetReusedUniMvs();
963
0
          lineEncRsrc->m_CachedBvs         .resetIbcBvCand();
964
0
        }
965
966
0
        DTRACE_UPDATE( g_trace_ctx, std::make_pair( "final", 1 ) );
967
0
        ITT_TASKEND( itt_domain_encode, itt_handle_ctuEncode );
968
969
0
        processStates[ ctuRsAddr ] = RESHAPE_LF_VER;
970
0
      }
971
0
      break;
972
973
    // reshape + vertical loopfilter
974
0
    case RESHAPE_LF_VER:
975
0
      {
976
        // clip check to right tile border (CTU_ENCODE pre-processing delay due to IBC)
977
0
        const int tileCol = slice.pps->ctuToTileCol[ctuPosX];
978
0
        const int lastCtuPosXInTile = slice.pps->tileColBd[tileCol] + slice.pps->tileColWidth[tileCol] - 1;
979
0
        const int checkRight = std::min<int>( encSlice->m_ctuEncDelay, lastCtuPosXInTile - ctuPosX );
980
981
0
        const bool hasTiles = encSlice->m_pcEncCfg->m_tileParallelCtuEnc && slice.pps->getNumTiles() > 1;
982
983
        // need to check line above bcs of tiling, which allows CTU_ENCODE to run independently across tiles
984
0
        if( hasTiles )
985
0
        {
986
0
          if( ctuPosY > 0 )
987
0
          {
988
0
            for( int i = -!!ctuPosX; i <= checkRight; i++ )
989
0
              if( pps.canFilterCtuBdry( ctuPosX, ctuPosY, i, -1 ) && processStates[ctuRsAddr - ctuStride + i] <= CTU_ENCODE )
990
0
                return false;
991
0
          }
992
0
        }
993
        
994
        // ensure all surrounding ctu's are encoded (intra pred requires non-reshaped and unfiltered residual, IBC requires unfiltered samples too)
995
        // check right with max offset (due to WPP condition above, this implies top-right has been already encoded)
996
0
        for( int i = hasTiles ? -!!ctuPosX : checkRight; i <= checkRight; i++ )
997
0
          if( pps.canFilterCtuBdry( ctuPosX, ctuPosY, i, 0 ) && processStates[ctuRsAddr + i] <= CTU_ENCODE )
998
0
            return false;
999
1000
        // check bottom right with 1 CTU delay (this is only required for intra pred)
1001
        // at the right picture border this will check the bottom CTU
1002
0
        const int checkBottomRight = std::min<int>( 1, lastCtuPosXInTile - ctuPosX );
1003
0
        if( checkCtuTaskNbBotRgt( pps, ctuPosX, ctuPosY, ctuRsAddr, processStates, CTU_ENCODE, checkBottomRight ) ) 
1004
0
          return false;
1005
1006
0
        if( checkReadyState )
1007
0
          return true;
1008
1009
0
        ITT_TASKSTART( itt_domain_encode, itt_handle_rspLfVer );
1010
1011
        // reshape
1012
0
        if( slice.sps->lumaReshapeEnable && slice.picHeader->lmcsEnabled )
1013
0
        {
1014
0
          PROFILER_EXT_ACCUM_AND_START_NEW_SET( 1, _TPROF, P_RESHAPER, &cs, CH_L );
1015
0
          PelBuf reco = pic->getRecoBuf( COMP_Y ).subBuf( x, y, width, height );
1016
0
          reco.rspSignal( pic->reshapeData.getInvLUT() );
1017
0
          PROFILER_EXT_ACCUM_AND_START_NEW_SET( 1, _TPROF, P_IGNORE, &cs, CH_L );
1018
0
        }
1019
1020
        // loopfilter
1021
0
        if( !cs.pps->deblockingFilterControlPresent || !cs.pps->deblockingFilterDisabled || cs.pps->deblockingFilterOverrideEnabled )
1022
0
        {
1023
0
          PROFILER_EXT_ACCUM_AND_START_NEW_SET( 1, _TPROF, P_DEBLOCK_FILTER, &cs, CH_L );
1024
          // calculate filter strengths
1025
0
          encSlice->m_pLoopFilter->calcFilterStrengthsCTU( cs, ctuArea, true );
1026
1027
          // vertical filter
1028
0
          PelUnitBuf reco = cs.picture->getRecoBuf();
1029
0
          encSlice->m_pLoopFilter->xDeblockArea<EDGE_VER>( cs, ctuArea, MAX_NUM_CH, reco );
1030
0
          PROFILER_EXT_ACCUM_AND_START_NEW_SET( 1, _TPROF, P_IGNORE, &cs, CH_L );
1031
0
        }
1032
1033
0
        ITT_TASKEND( itt_domain_encode, itt_handle_rspLfVer );
1034
1035
0
        processStates[ ctuRsAddr ] = LF_HOR;
1036
0
      }
1037
0
      break;
1038
1039
    // horizontal loopfilter
1040
0
    case LF_HOR:
1041
0
      {
1042
        // ensure horizontal ordering (from top to bottom)
1043
0
        if( checkCtuTaskNbTop   ( pps, ctuPosX, ctuPosY, ctuRsAddr, processStates, LF_HOR ) )         
1044
0
          return false;
1045
1046
        // ensure vertical loop filter of neighbor ctu's will not modify current residual
1047
        // check top, top-right and right ctu
1048
        // (top, top-right checked implicitly due to ordering check above)
1049
0
        if( checkCtuTaskNbRgt   ( pps, ctuPosX, ctuPosY, ctuRsAddr, processStates, RESHAPE_LF_VER ) ) 
1050
0
          return false;
1051
1052
0
        if( checkReadyState )
1053
0
          return true;
1054
1055
0
        ITT_TASKSTART( itt_domain_encode, itt_handle_lfHor );
1056
1057
0
        if( !cs.pps->deblockingFilterControlPresent || !cs.pps->deblockingFilterDisabled || cs.pps->deblockingFilterOverrideEnabled )
1058
0
        {
1059
0
          PROFILER_EXT_ACCUM_AND_START_NEW_SET( 1, _TPROF, P_DEBLOCK_FILTER, &cs, CH_L );
1060
0
          PelUnitBuf reco = cs.picture->getRecoBuf();
1061
0
          encSlice->m_pLoopFilter->xDeblockArea<EDGE_HOR>( cs, ctuArea, MAX_NUM_CH, reco );
1062
0
          PROFILER_EXT_ACCUM_AND_START_NEW_SET( 1, _TPROF, P_IGNORE, &cs, CH_L );
1063
0
        }
1064
1065
0
        ITT_TASKEND( itt_domain_encode, itt_handle_lfHor );
1066
1067
0
        processStates[ ctuRsAddr ] = SAO_FILTER;
1068
0
      }
1069
0
      break;
1070
1071
    // SAO filter
1072
0
    case SAO_FILTER:
1073
0
      {
1074
        // general wpp conditions, top and top-right ctu have to be filtered
1075
0
        if( checkCtuTaskNbTop   ( pps, ctuPosX, ctuPosY, ctuRsAddr, processStates, SAO_FILTER, true ) ) return false;
1076
0
        if( checkCtuTaskNbTopRgt( pps, ctuPosX, ctuPosY, ctuRsAddr, processStates, SAO_FILTER, true ) ) return false;
1077
1078
        // ensure loop filter of neighbor ctu's will not modify current residual
1079
        // sao processing dependents on +1 pixel to each side
1080
        // due to wpp condition above, only right, bottom and bottom-right ctu have to be checked
1081
0
        if( checkCtuTaskNbRgt   ( pps, ctuPosX, ctuPosY, ctuRsAddr, processStates, LF_HOR,    true ) ) return false;
1082
0
        if( checkCtuTaskNbBot   ( pps, ctuPosX, ctuPosY, ctuRsAddr, processStates, LF_HOR,    true ) ) return false;
1083
0
        if( checkCtuTaskNbBotRgt( pps, ctuPosX, ctuPosY, ctuRsAddr, processStates, LF_HOR, 1, true ) ) return false;
1084
1085
0
        if( checkReadyState )
1086
0
          return true;
1087
1088
0
        ITT_TASKSTART( itt_domain_encode, itt_handle_sao );
1089
1090
        // SAO filter
1091
0
        if( slice.sps->saoEnabled && pic->useSAO )
1092
0
        {
1093
0
          PROFILER_EXT_ACCUM_AND_START_NEW_SET( 1, _TPROF, P_SAO, &cs, CH_L );
1094
0
          TileLineEncRsrc* lineEncRsrc    = encSlice->m_TileLineEncRsrc[ lineIdx ];
1095
0
          PerThreadRsrc* taskRsrc         = encSlice->m_ThreadRsrc[ threadIdx ];
1096
0
          EncSampleAdaptiveOffset& encSao = lineEncRsrc->m_encSao;
1097
1098
0
          encSao.setCtuEncRsrc( &lineEncRsrc->m_SaoCABACEstimator, &taskRsrc->m_CtxCache );
1099
0
          encSao.storeCtuReco( cs, ctuArea, ctuPosX, ctuPosY );
1100
0
          encSao.getCtuStatistics( cs, encSlice->m_saoStatData, ctuArea, ctuRsAddr );
1101
0
          encSao.decideCtuParams( cs, encSlice->m_saoStatData, encSlice->m_saoEnabled, encSlice->m_saoAllDisabled, ctuArea, ctuRsAddr, &encSlice->m_saoReconParams[ 0 ], cs.picture->getSAO() );
1102
0
          PROFILER_EXT_ACCUM_AND_START_NEW_SET( 1, _TPROF, P_IGNORE, &cs, CH_L );
1103
0
        }
1104
1105
        // ALF border extension
1106
0
        if( cs.sps->alfEnabled )
1107
0
        {
1108
          // we have to do some kind of position aware boundary padding
1109
          // it's done here because the conditions are readable
1110
0
          PelUnitBuf recoBuf = cs.picture->getRecoBuf();
1111
0
          const int fltSize  = ( MAX_ALF_FILTER_LENGTH + 1 ) >> 1;
1112
0
          const int xL       = ( ctuPosX == 0 )                 ? ( x-fltSize       ) : ( x );
1113
0
          const int xR       = ( ctuPosX+1 == pcv.widthInCtus ) ? ( x+width+fltSize ) : ( x+width );
1114
1115
0
          if( ctuPosX == 0 )                  recoBuf.extendBorderPelLft( y, height, fltSize );
1116
0
          if( ctuPosX+1 == pcv.widthInCtus )  recoBuf.extendBorderPelRgt( y, height, fltSize );
1117
0
          if( ctuPosY == 0 )                  recoBuf.extendBorderPelTop( xL, xR-xL, fltSize );
1118
0
          if( ctuPosY+1 == pcv.heightInCtus ) recoBuf.extendBorderPelBot( xL, xR-xL, fltSize );
1119
1120
0
          encSlice->m_pALF->copyCTUforALF(cs, ctuPosX, ctuPosY);
1121
0
        }
1122
1123
        // DMVR refinement can be stored now
1124
0
        if( slice.sps->DMVR && !slice.picHeader->disDmvrFlag )
1125
0
        {
1126
0
          CS::setRefinedMotionFieldCTU( cs, ctuPosX, ctuPosY );
1127
0
        }
1128
0
        ITT_TASKEND( itt_domain_encode, itt_handle_sao );
1129
1130
0
        const int tileCol = slice.pps->ctuToTileCol[ctuPosX];
1131
0
        const int lastCtuColInTileRow = slice.pps->tileColBd[tileCol] + slice.pps->tileColWidth[tileCol] - 1;
1132
0
        if( ctuPosX == lastCtuColInTileRow )
1133
0
        {
1134
0
          processStates[ctuRsAddr] = ALF_GET_STATISTICS;
1135
0
        }
1136
0
        else
1137
0
        {
1138
0
          processStates[ctuRsAddr] = PROCESS_DONE;
1139
0
          return true;
1140
0
        }
1141
0
      }
1142
0
      break;
1143
1144
0
    case ALF_GET_STATISTICS:
1145
0
      {
1146
        // ensure all surrounding ctu's are filtered (ALF will use pixels of adjacent CTU's)
1147
        // due to wpp condition above in SAO_FILTER, only right, bottom and bottom-right ctu have to be checked
1148
0
        if( checkCtuTaskNbRgt   ( pps, ctuPosX, ctuPosY, ctuRsAddr, processStates, SAO_FILTER ) ) return false;
1149
0
        if( checkCtuTaskNbBot   ( pps, ctuPosX, ctuPosY, ctuRsAddr, processStates, SAO_FILTER ) ) return false;
1150
0
        if( checkCtuTaskNbBotRgt( pps, ctuPosX, ctuPosY, ctuRsAddr, processStates, SAO_FILTER ) ) return false;
1151
1152
0
        if( checkReadyState )
1153
0
          return true;
1154
1155
0
        ITT_TASKSTART( itt_domain_encode, itt_handle_alf_stat );
1156
1157
        // ALF pre-processing
1158
0
        if( slice.sps->alfEnabled )
1159
0
        {
1160
0
          PROFILER_EXT_ACCUM_AND_START_NEW_SET( 1, _TPROF, P_ALF, &cs, CH_L );
1161
0
          PelUnitBuf recoBuf = cs.picture->getRecoBuf();
1162
0
          const int firstCtuInRow = ctuRsAddr + 1 - slice.pps->tileColWidth[slice.pps->ctuToTileCol[ctuPosX]];
1163
0
          for( int ctu = firstCtuInRow; ctu <= ctuRsAddr; ctu++ )
1164
0
          {
1165
0
            encSlice->m_pALF->getStatisticsCTU( *cs.picture, cs, recoBuf, ctu, encSlice->m_ThreadRsrc[ threadIdx ]->m_alfTempCtuBuf );
1166
0
          }
1167
0
          PROFILER_EXT_ACCUM_AND_START_NEW_SET( 1, _TPROF, P_IGNORE, &cs, CH_L );
1168
0
        }
1169
1170
0
        ITT_TASKEND( itt_domain_encode, itt_handle_alf_stat );
1171
1172
        // start alf filter derivation either for a sub-set of CTUs (syncLines mode) or for the whole picture (regular mode)
1173
0
        const unsigned deriveFilterCtu = encSlice->m_alfDeriveCtu;
1174
0
        processStates[ctuRsAddr] = (ctuRsAddr < deriveFilterCtu) ? ALF_RECONSTRUCT: ALF_DERIVE_FILTER;
1175
0
      }
1176
0
      break;
1177
1178
0
    case ALF_DERIVE_FILTER:
1179
0
      {
1180
0
        const unsigned deriveFilterCtu = encSlice->m_alfDeriveCtu;
1181
0
        if( ctuRsAddr == deriveFilterCtu )
1182
0
        {
1183
          // ensure statistics from all previous ctu's have been collected
1184
0
          int numCheckLines = deriveFilterCtu / pcv.widthInCtus + 1;
1185
0
          for( int y = 0; y < numCheckLines; y++ )
1186
0
          {
1187
0
            for( int tileCol = 0; tileCol < slice.pps->numTileCols; tileCol++ )
1188
0
            {
1189
0
              const int lastCtuInTileRow = y * pcv.widthInCtus + slice.pps->tileColBd[tileCol] + slice.pps->tileColWidth[tileCol] - 1;
1190
0
              if( processStates[lastCtuInTileRow] <= ALF_GET_STATISTICS )
1191
0
                return false;
1192
0
            }
1193
0
          }
1194
0
        }
1195
0
        else if( syncLines )
1196
0
        {
1197
          // ALF bitstream coding dependency for the sub-sequent ctu-lines
1198
0
          if( processStates[deriveFilterCtu] < ALF_RECONSTRUCT || checkCtuTaskNbTop( pps, ctuPosX, ctuPosY, ctuRsAddr, processStates, ALF_DERIVE_FILTER ) ) 
1199
0
            return false;
1200
0
        }
1201
0
        if( checkReadyState )
1202
0
          return true;
1203
1204
0
        ITT_TASKSTART( itt_domain_encode, itt_handle_alf_derive );
1205
        // ALF post-processing
1206
0
        if( slice.sps->alfEnabled )
1207
0
        {
1208
0
          PROFILER_EXT_ACCUM_AND_START_NEW_SET( 1, _TPROF, P_ALF, &cs, CH_L );
1209
0
          if( ctuRsAddr == deriveFilterCtu )
1210
0
          {
1211
0
            encSlice->m_pALF->initDerivation( slice );
1212
0
            encSlice->m_pALF->deriveFilter( *cs.picture, cs, slice.getLambdas(), deriveFilterCtu + 1 );
1213
0
            encSlice->m_pALF->reconstructCoeffAPSs( cs, cs.slice->alfEnabled[COMP_Y], cs.slice->alfEnabled[COMP_Cb] || cs.slice->alfEnabled[COMP_Cr], false );
1214
0
          }
1215
0
          else if( syncLines )
1216
0
          {
1217
            // in sync lines mode: derive/select filter for the remaining lines
1218
0
            TileLineEncRsrc* lineEncRsrc = encSlice->m_TileLineEncRsrc[ lineIdx ];
1219
0
            PerThreadRsrc*   taskRsrc    = encSlice->m_ThreadRsrc[ threadIdx ];
1220
0
            const int firstCtuInRow = ctuRsAddr + 1 - slice.pps->tileColWidth[slice.pps->ctuToTileCol[ctuPosX]];
1221
0
            for(int ctu = firstCtuInRow; ctu <= ctuRsAddr; ctu++)
1222
0
            {
1223
0
              encSlice->m_pALF->selectFilterForCTU( cs, &lineEncRsrc->m_AlfCABACEstimator, &taskRsrc->m_CtxCache, ctu );
1224
0
            }
1225
0
          }
1226
0
          PROFILER_EXT_ACCUM_AND_START_NEW_SET( 1, _TPROF, P_IGNORE, &cs, CH_L );
1227
0
        }
1228
1229
0
        ITT_TASKEND( itt_domain_encode, itt_handle_alf_derive );
1230
0
        processStates[ ctuRsAddr ] = ALF_RECONSTRUCT;
1231
0
      }
1232
0
      break;
1233
1234
0
    case ALF_RECONSTRUCT:
1235
0
      {
1236
        // start alf filter derivation either for a sub-set of CTUs (syncLines mode) or for the whole picture (regular mode)
1237
0
        const unsigned deriveFilterCtu = encSlice->m_alfDeriveCtu;
1238
0
        if( processStates[deriveFilterCtu] < ALF_RECONSTRUCT )
1239
0
          return false;
1240
0
        else if( syncLines && ctuRsAddr > deriveFilterCtu && encSlice->m_pALF->getAsuHeightInCtus() > 1 )
1241
0
        {
1242
0
          const int asuHeightInCtus = encSlice->m_pALF->getAsuHeightInCtus();
1243
0
          const int botCtuLineInAsu = std::min( (( ctuPosY & ( ~(asuHeightInCtus - 1) ) ) + asuHeightInCtus - 1), (int)pcv.heightInCtus - 1 );
1244
0
          if( processStates[botCtuLineInAsu * ctuStride + ctuPosX] < ALF_RECONSTRUCT ) 
1245
0
            return false;
1246
0
        }
1247
1248
0
        if( checkReadyState )
1249
0
          return true;
1250
1251
0
        ITT_TASKSTART( itt_domain_encode, itt_handle_alf_recon );
1252
1253
0
        if( slice.sps->alfEnabled )
1254
0
        {
1255
0
          PROFILER_EXT_ACCUM_AND_START_NEW_SET( 1, _TPROF, P_ALF, &cs, CH_L );
1256
0
          const int firstCtuInRow = ctuRsAddr + 1 - slice.pps->tileColWidth[slice.pps->ctuToTileCol[ctuPosX]];
1257
0
          for( int ctu = firstCtuInRow; ctu <= ctuRsAddr; ctu++ )
1258
0
          {
1259
0
            encSlice->m_pALF->reconstructCTU_MT( *cs.picture, cs, ctu, encSlice->m_ThreadRsrc[ threadIdx ]->m_alfTempCtuBuf );
1260
0
          }
1261
0
          PROFILER_EXT_ACCUM_AND_START_NEW_SET( 1, _TPROF, P_IGNORE, &cs, CH_L );
1262
0
        }
1263
1264
0
        ITT_TASKEND( itt_domain_encode, itt_handle_alf_recon );
1265
0
        processStates[ctuRsAddr] = CCALF_GET_STATISTICS;
1266
0
      }
1267
      // dont break, no additional deps, can continue straigt away!
1268
      //break;
1269
1270
0
    case CCALF_GET_STATISTICS:
1271
0
      {
1272
0
        if( checkCtuTaskNbTop   ( pps, ctuPosX, ctuPosY, ctuRsAddr, processStates, ALF_RECONSTRUCT ) ) return false;
1273
0
        if( checkCtuTaskNbBot   ( pps, ctuPosX, ctuPosY, ctuRsAddr, processStates, ALF_RECONSTRUCT ) ) return false;
1274
1275
0
        if( checkReadyState )
1276
0
          return true;
1277
1278
0
        ITT_TASKSTART( itt_domain_encode, itt_handle_ccalf_stat );
1279
1280
        // ALF pre-processing
1281
0
        if( slice.sps->ccalfEnabled )
1282
0
        {
1283
0
          PROFILER_EXT_ACCUM_AND_START_NEW_SET( 1, _TPROF, P_ALF, &cs, CH_L);
1284
0
          const int firstCtuInRow = ctuRsAddr + 1 - slice.pps->tileColWidth[slice.pps->ctuToTileCol[ctuPosX]];
1285
0
          for( int ctu = firstCtuInRow; ctu <= ctuRsAddr; ctu++ )
1286
0
          {
1287
0
            encSlice->m_pALF->deriveStatsForCcAlfFilteringCTU( cs, COMP_Cb, ctu, encSlice->m_ThreadRsrc[ threadIdx ]->m_alfTempCtuBuf );
1288
0
            encSlice->m_pALF->deriveStatsForCcAlfFilteringCTU( cs, COMP_Cr, ctu, encSlice->m_ThreadRsrc[ threadIdx ]->m_alfTempCtuBuf );
1289
0
          }
1290
0
          PROFILER_EXT_ACCUM_AND_START_NEW_SET( 1, _TPROF, P_IGNORE, &cs, CH_L );
1291
0
        }
1292
1293
0
        ITT_TASKEND( itt_domain_encode, itt_handle_ccalf_stat );
1294
1295
        // start alf filter derivation either for a sub-set of CTUs (syncLines mode) or for the whole picture (regular mode)
1296
0
        processStates[ctuRsAddr] = (ctuRsAddr < encSlice->m_ccalfDeriveCtu) ? CCALF_RECONSTRUCT: CCALF_DERIVE_FILTER;
1297
0
      }
1298
0
      break;
1299
1300
0
    case CCALF_DERIVE_FILTER:
1301
0
      {
1302
        // synchronization dependencies
1303
0
        const unsigned deriveFilterCtu = encSlice->m_ccalfDeriveCtu;
1304
0
        if( ctuRsAddr == deriveFilterCtu )
1305
0
        {
1306
          // ensure statistics from all previous ctu's have been collected
1307
0
          int numCheckLines = deriveFilterCtu / pcv.widthInCtus + 1;
1308
0
          for( int y = 0; y < numCheckLines; y++ )
1309
0
          {
1310
0
            for( int tileCol = 0; tileCol < slice.pps->numTileCols; tileCol++ )
1311
0
            {
1312
0
              const int lastCtuInTileRow = y * pcv.widthInCtus + slice.pps->tileColBd[tileCol] + slice.pps->tileColWidth[tileCol] - 1;
1313
0
              if( processStates[lastCtuInTileRow] <= CCALF_GET_STATISTICS )
1314
0
                return false;
1315
0
            }
1316
0
          }
1317
0
        }
1318
0
        else if( syncLines )
1319
0
        {
1320
          // ALF bitstream coding dependency for the sub-sequent CTU-lines
1321
0
          if( processStates[deriveFilterCtu] < CCALF_RECONSTRUCT || checkCtuTaskNbTop( pps, ctuPosX, ctuPosY, ctuRsAddr, processStates, CCALF_DERIVE_FILTER ) ) 
1322
0
            return false;
1323
0
        }
1324
0
        if( checkReadyState )
1325
0
          return true;
1326
1327
0
        ITT_TASKSTART( itt_domain_encode, itt_handle_ccalf_derive );
1328
1329
        // start task
1330
0
        if( slice.sps->ccalfEnabled )
1331
0
        {
1332
0
          if( ctuRsAddr == deriveFilterCtu )
1333
0
          {
1334
0
            encSlice->m_pALF->deriveCcAlfFilter( *cs.picture, cs, encSlice->m_ccalfDeriveCtu + 1 );
1335
0
          }
1336
0
          else if( syncLines )
1337
0
          {
1338
            // in sync lines mode: derive/select filter for the remaining lines
1339
0
            TileLineEncRsrc* lineEncRsrc = encSlice->m_TileLineEncRsrc[ lineIdx ];
1340
0
            PerThreadRsrc*   taskRsrc    = encSlice->m_ThreadRsrc[ threadIdx ];
1341
0
            const int firstCtuInRow = ctuRsAddr + 1 - slice.pps->tileColWidth[slice.pps->ctuToTileCol[ctuPosX]];
1342
0
            encSlice->m_pALF->selectCcAlfFilterForCtuLine( cs, COMP_Cb, cs.getRecoBuf(), &lineEncRsrc->m_AlfCABACEstimator, &taskRsrc->m_CtxCache, firstCtuInRow, ctuRsAddr );
1343
0
            encSlice->m_pALF->selectCcAlfFilterForCtuLine( cs, COMP_Cr, cs.getRecoBuf(), &lineEncRsrc->m_AlfCABACEstimator, &taskRsrc->m_CtxCache, firstCtuInRow, ctuRsAddr );
1344
0
          }
1345
0
        }
1346
0
        ITT_TASKEND( itt_domain_encode, itt_handle_ccalf_derive );
1347
1348
0
        processStates[ctuRsAddr] = CCALF_RECONSTRUCT;
1349
0
      }
1350
0
      break;
1351
1352
0
    case CCALF_RECONSTRUCT:
1353
0
      {
1354
        // start ccalf filter derivation either for a sub-set of CTUs (syncLines mode) or for the whole picture (regular mode)
1355
0
        const unsigned deriveFilterCtu = encSlice->m_ccalfDeriveCtu;
1356
0
        if( processStates[deriveFilterCtu] < CCALF_RECONSTRUCT )
1357
0
          return false;
1358
1359
0
        if( syncLines )
1360
0
        {
1361
          // ensure line-by-line reconstruction due to line synchronization
1362
0
          if( checkCtuTaskNbTop( pps, ctuPosX, ctuPosY, ctuRsAddr, processStates, CCALF_RECONSTRUCT ) ) return false;
1363
          // check bottom due to rec. buffer usage in ccalf statistics
1364
0
          if( checkCtuTaskNbBot( pps, ctuPosX, ctuPosY, ctuRsAddr, processStates, CCALF_GET_STATISTICS ) ) return false;
1365
0
        }
1366
1367
0
        if( checkReadyState )
1368
0
          return true;
1369
1370
0
        ITT_TASKSTART( itt_domain_encode, itt_handle_ccalf_recon );
1371
1372
0
        if( slice.sps->ccalfEnabled )
1373
0
        {
1374
0
          const int firstCtuInRow = ctuRsAddr + 1 - slice.pps->tileColWidth[slice.pps->ctuToTileCol[ctuPosX]];
1375
0
          for( int ctu = firstCtuInRow; ctu <= ctuRsAddr; ctu++ )
1376
0
          {
1377
0
            encSlice->m_pALF->applyCcAlfFilterCTU( cs, COMP_Cb, ctu, encSlice->m_ThreadRsrc[ threadIdx ]->m_alfTempCtuBuf );
1378
0
            encSlice->m_pALF->applyCcAlfFilterCTU( cs, COMP_Cr, ctu, encSlice->m_ThreadRsrc[ threadIdx ]->m_alfTempCtuBuf );
1379
0
          }
1380
0
        }
1381
1382
0
        ITT_TASKEND( itt_domain_encode, itt_handle_ccalf_recon );
1383
1384
        // extend pic border
1385
        // CCALF reconstruction stage is done per tile, ensure that all tiles in current CTU row are done  
1386
0
        if( ++(pic->m_tileColsDone->at(ctuPosY)) >= pps.numTileCols )
1387
0
        {
1388
0
          PelUnitBuf recoBuf = cs.picture->getRecoBuf();
1389
0
          const int margin = cs.picture->margin;
1390
0
          recoBuf.extendBorderPelLft( y, height, margin );
1391
0
          recoBuf.extendBorderPelRgt( y, height, margin );
1392
0
          if(ctuPosY == 0)
1393
0
            recoBuf.extendBorderPelTop( -margin, pcv.lumaWidth + 2 * margin, margin );
1394
0
          if(ctuPosY + 1 == pcv.heightInCtus)
1395
0
            recoBuf.extendBorderPelBot( -margin, pcv.lumaWidth + 2 * margin, margin );
1396
1397
          // for IFP lines synchro, do an additional increment signaling that CTU row is ready
1398
0
          if( syncLines )
1399
0
            ++(pic->m_tileColsDone->at( ctuPosY ));
1400
0
        }
1401
1402
        // perform finish only once for whole picture
1403
0
        const unsigned finishCtu = pcv.sizeInCtus - 1;
1404
0
        if( ctuRsAddr < finishCtu )
1405
0
        {
1406
0
          processStates[ctuRsAddr] = PROCESS_DONE;
1407
          // processing done => terminate thread
1408
0
          return true;
1409
0
        }
1410
0
        processStates[ctuRsAddr] = FINISH_SLICE;
1411
0
      }
1412
1413
0
    case FINISH_SLICE:
1414
0
      {
1415
0
        CHECK( ctuRsAddr != pcv.sizeInCtus - 1, "invalid state, finish slice only once for last ctu" );
1416
1417
        // ensure all coding tasks have been done for all previous ctu's
1418
0
        for( int i = 0; i < ctuRsAddr; i++ )
1419
0
          if( processStates[ i ] < FINISH_SLICE )
1420
0
            return false;
1421
1422
0
        if( checkReadyState )
1423
0
          return true;
1424
1425
0
        encSlice->finishCompressSlice( cs.picture, slice );
1426
1427
0
        processStates[ ctuRsAddr ] = PROCESS_DONE;
1428
        // processing done => terminate thread
1429
0
        return true;
1430
0
      }
1431
1432
0
    case PROCESS_DONE:
1433
0
      CHECK( true, "process state is PROCESS_DONE, but thread is still running" );
1434
0
      return true;
1435
1436
0
    default:
1437
0
      CHECK( true, "unknown process state" );
1438
0
      return true;
1439
0
  }
1440
1441
0
  return false;
1442
0
}
Unexecuted instantiation: bool vvenc::EncSlice::xProcessCtuTask<false>(int, void*)
Unexecuted instantiation: bool vvenc::EncSlice::xProcessCtuTask<true>(int, void*)
1443
1444
void EncSlice::encodeSliceData( Picture* pic )
1445
0
{
1446
0
  CodingStructure& cs              = *pic->cs;
1447
0
  Slice* const slice               = cs.slice;
1448
0
  const uint32_t startCtuTsAddr    = slice->sliceMap.ctuAddrInSlice[0];
1449
0
  const uint32_t boundingCtuTsAddr = cs.pcv->sizeInCtus;
1450
0
  const bool wavefrontsEnabled     = slice->sps->entropyCodingSyncEnabled;
1451
1452
  // this ensures that independently encoded bitstream chunks can be combined to bit-equal
1453
0
  const SliceType cabacTableIdx = ! slice->pps->cabacInitPresent || slice->pendingRasInit ? slice->sliceType : m_encCABACTableIdx;
1454
0
  slice->encCABACTableIdx = cabacTableIdx;
1455
1456
  // initialise entropy coder for the slice
1457
0
  m_CABACWriter.initCtxModels( *slice );
1458
1459
0
  DTRACE( g_trace_ctx, D_HEADER, "=========== POC: %d ===========\n", slice->poc );
1460
1461
0
  int prevQP[MAX_NUM_CH];
1462
0
  prevQP[0] = prevQP[1] = slice->sliceQp;
1463
1464
0
  const PreCalcValues& pcv        = *cs.pcv;
1465
0
  const uint32_t widthInCtus      = pcv.widthInCtus;
1466
0
  uint32_t uiSubStrm              = 0;
1467
0
  const int numSubstreamsColumns  = slice->pps->numTileCols;
1468
0
  const int numSubstreamRows      = slice->sps->entropyCodingSyncEnabled ? pic->cs->pcv->heightInCtus : slice->pps->numTileRows;
1469
0
  const int numSubstreams         = std::max<int>( numSubstreamRows * numSubstreamsColumns, 0/*(int)pic->brickMap->bricks.size()*/ );
1470
0
  std::vector<OutputBitstream> substreamsOut( numSubstreams );
1471
1472
0
  slice->clearSubstreamSizes();
1473
1474
0
  for( uint32_t ctuTsAddr = startCtuTsAddr; ctuTsAddr < boundingCtuTsAddr; ctuTsAddr++ )
1475
0
  {
1476
0
    const uint32_t ctuRsAddr            = slice->sliceMap.ctuAddrInSlice[ctuTsAddr];
1477
0
    const uint32_t ctuXPosInCtus        = ctuRsAddr % widthInCtus;
1478
0
    const uint32_t ctuYPosInCtus        = ctuRsAddr / widthInCtus;
1479
0
    const uint32_t tileXPosInCtus       = slice->pps->tileColBd[cs.pps->ctuToTileCol[ctuXPosInCtus]];
1480
0
    const uint32_t tileYPosInCtus       = slice->pps->tileRowBd[cs.pps->ctuToTileRow[ctuYPosInCtus]];
1481
1482
0
    DTRACE_UPDATE( g_trace_ctx, std::make_pair( "ctu", ctuRsAddr ) );
1483
1484
0
    const Position pos (ctuXPosInCtus * pcv.maxCUSize, ctuYPosInCtus * pcv.maxCUSize);
1485
0
    const UnitArea ctuArea (cs.area.chromaFormat, Area(pos.x, pos.y, pcv.maxCUSize, pcv.maxCUSize));
1486
0
    CHECK( uiSubStrm >= numSubstreams, "array index out of bounds" );
1487
0
    m_CABACWriter.initBitstream( &substreamsOut[ uiSubStrm ] );
1488
1489
    // set up CABAC contexts' state for this CTU
1490
0
    if (ctuXPosInCtus == tileXPosInCtus && ctuYPosInCtus == tileYPosInCtus )
1491
0
    {
1492
0
      if (ctuTsAddr != startCtuTsAddr) // if it is the first CTU, then the entropy coder has already been reset
1493
0
      {
1494
0
        m_CABACWriter.initCtxModels( *slice );
1495
0
      }
1496
0
      prevQP[0] = prevQP[1] = slice->sliceQp;
1497
0
    }
1498
0
    else if (ctuXPosInCtus == tileXPosInCtus && wavefrontsEnabled)
1499
0
    {
1500
      // Synchronize cabac probabilities with upper-right CTU if it's available and at the start of a line.
1501
0
      if (ctuTsAddr != startCtuTsAddr) // if it is the first CTU, then the entropy coder has already been reset
1502
0
      {
1503
0
        m_CABACWriter.initCtxModels( *slice );
1504
0
      }
1505
0
      if( cs.getCURestricted( pos.offset( 0, -1 ), pos, slice->independentSliceIdx, slice->pps->getTileIdx( ctuXPosInCtus, ctuYPosInCtus ), CH_L, TREE_D ) )
1506
0
      {
1507
        // Top-right is available, so use it.
1508
0
        m_CABACWriter.getCtx() = m_entropyCodingSyncContextState;
1509
0
      }
1510
0
      prevQP[0] = prevQP[1] = slice->sliceQp;
1511
0
    }
1512
1513
0
    m_CABACWriter.coding_tree_unit( cs, ctuArea, prevQP, ctuRsAddr );
1514
1515
    // store probabilities of second CTU in line into buffer
1516
0
    if( ctuXPosInCtus == tileXPosInCtus && wavefrontsEnabled )
1517
0
    {
1518
0
      m_entropyCodingSyncContextState = m_CABACWriter.getCtx();
1519
0
    }
1520
1521
    // terminate the sub-stream, if required (end of slice-segment, end of tile, end of wavefront-CTU-row):
1522
0
    bool isMoreCTUsinSlice = ctuTsAddr != (boundingCtuTsAddr - 1);
1523
0
    bool isLastCTUinTile   = isMoreCTUsinSlice && slice->pps->getTileIdx( ctuRsAddr ) != slice->pps->getTileIdx( slice->sliceMap.ctuAddrInSlice[ctuTsAddr+1] );
1524
0
    bool isLastCTUinWPP    = wavefrontsEnabled && isMoreCTUsinSlice && !isLastCTUinTile && ( (slice->sliceMap.ctuAddrInSlice[ctuTsAddr+1] % widthInCtus) == cs.pps->tileColBd[cs.pps->ctuToTileCol[slice->sliceMap.ctuAddrInSlice[ctuTsAddr+1] % widthInCtus]] ); //TODO: adjust tile bound condition
1525
1526
0
    if (isLastCTUinWPP || !isMoreCTUsinSlice || isLastCTUinTile )         // this the the last CTU of either tile/brick/WPP/slice
1527
0
    {
1528
0
      m_CABACWriter.end_of_slice();
1529
1530
      // Byte-alignment in slice_data() when new tile
1531
0
      substreamsOut[ uiSubStrm ].writeByteAlignment();
1532
1533
0
      if (isMoreCTUsinSlice) //Byte alignment only when it is not the last substream in the slice
1534
0
      {
1535
        // write sub-stream size
1536
0
        slice->addSubstreamSize( ( substreamsOut[ uiSubStrm ].getNumberOfWrittenBits() >> 3 ) + substreamsOut[ uiSubStrm ].countStartCodeEmulations() );
1537
0
      }
1538
0
      uiSubStrm++;
1539
0
    }
1540
0
  } // CTU-loop
1541
1542
0
  if(slice->pps->cabacInitPresent)
1543
0
  {
1544
0
    m_encCABACTableIdx = m_CABACWriter.getCtxInitId( *slice );
1545
0
  }
1546
0
  else
1547
0
  {
1548
0
    m_encCABACTableIdx = slice->sliceType;
1549
0
  }
1550
1551
  // concatenate substreams
1552
0
  OutputBitstream& outStream = pic->sliceDataStreams[ 0/*slice->sliceIdx*/ ];
1553
0
  for ( int i = 0; i < slice->getNumberOfSubstreamSizes() + 1; i++ )
1554
0
  {
1555
0
    outStream.addSubstream( &(substreamsOut[ i ]) );
1556
0
  }
1557
0
  pic->sliceDataNumBins += m_CABACWriter.getNumBins();
1558
0
}
1559
1560
} // namespace vvenc
1561
1562
//! \}
1563