Coverage Report

Created: 2026-06-16 07:20

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/vvdec/source/Lib/DecoderLib/DecLibRecon.cpp
Line
Count
Source
1
/* -----------------------------------------------------------------------------
2
The copyright in this software is being made available under the Clear BSD
3
License, included below. No patent rights, trademark rights and/or 
4
other Intellectual Property Rights other than the copyrights concerning 
5
the Software are granted under this license.
6
7
The Clear BSD License
8
9
Copyright (c) 2018-2026, Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. & The VVdeC Authors.
10
All rights reserved.
11
12
Redistribution and use in source and binary forms, with or without modification,
13
are permitted (subject to the limitations in the disclaimer below) provided that
14
the following conditions are met:
15
16
     * Redistributions of source code must retain the above copyright notice,
17
     this list of conditions and the following disclaimer.
18
19
     * Redistributions in binary form must reproduce the above copyright
20
     notice, this list of conditions and the following disclaimer in the
21
     documentation and/or other materials provided with the distribution.
22
23
     * Neither the name of the copyright holder nor the names of its
24
     contributors may be used to endorse or promote products derived from this
25
     software without specific prior written permission.
26
27
NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
28
THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
29
CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
31
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
32
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
33
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
34
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
35
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
36
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38
POSSIBILITY OF SUCH DAMAGE.
39
40
41
------------------------------------------------------------------------------------------- */
42
43
/** \file     DecLibRecon.cpp
44
    \brief    decoder class
45
*/
46
47
#include "DecLib.h"
48
49
#include "CommonLib/TrQuant.h"
50
#if ENABLE_SIMD_TCOEFF_OPS
51
#include "CommonLib/TrQuant_EMT.h"
52
#endif
53
#include "CommonLib/InterPrediction.h"
54
#include "CommonLib/IntraPrediction.h"
55
#include "CommonLib/Unit.h"
56
#include "CommonLib/Buffer.h"
57
#include "CommonLib/UnitTools.h"
58
59
#include "CommonLib/dtrace_next.h"
60
#include "CommonLib/dtrace_buffer.h"
61
62
namespace vvdec
63
{
64
65
#ifdef TRACE_ENABLE_ITT
66
extern __itt_domain*              itt_domain_dec;
67
extern std::vector<__itt_domain*> itt_domain_decInst;
68
69
extern __itt_string_handle* itt_handle_alf;
70
extern __itt_string_handle* itt_handle_presao;
71
extern __itt_string_handle* itt_handle_sao;
72
extern __itt_string_handle* itt_handle_lfl;
73
extern __itt_string_handle* itt_handle_intra;
74
extern __itt_string_handle* itt_handle_inter;
75
extern __itt_string_handle* itt_handle_mider;
76
extern __itt_string_handle* itt_handle_lfcl;
77
extern __itt_string_handle* itt_handle_ext;
78
extern __itt_string_handle* itt_handle_dmvr;
79
extern __itt_string_handle* itt_handle_rsp;
80
81
extern __itt_string_handle* itt_handle_schedTasks;
82
extern __itt_string_handle* itt_handle_waitTasks;
83
84
// create global domain for DecLib
85
extern __itt_domain* itt_domain_glb;
86
// create a global counter
87
extern __itt_counter itt_frame_counter;
88
89
#define ITT_TASKSTART( d, t ) __itt_task_begin( ( d ), __itt_null, __itt_null, ( t ) )
90
#define ITT_TASKEND( d, t )   __itt_task_end  ( ( d ) )
91
#else
92
#define ITT_TASKSTART( d, t )
93
#define ITT_TASKEND( d, t )
94
#endif
95
96
//! \ingroup DecoderLib
97
//! \{
98
99
void CommonTaskParam::reset( CodingStructure& cs, TaskType ctuStartState, int tasksPerLine, bool _doALF )
100
713
{
101
713
  this->cs = &cs;
102
103
713
  const int heightInCtus = cs.pcv->heightInCtus;
104
713
  CHECKD( !ctuStates.empty() && std::any_of( ctuStates.begin(), ctuStates.end(), []( CtuState& s ) { return s != DONE; } ), "some CTUs of previous pic not done" );
105
713
  ctuStates = std::vector<CtuState>( heightInCtus * tasksPerLine );
106
713
  for( auto& ctu: ctuStates )
107
9.24k
  {
108
9.24k
    ctu.store( ctuStartState );
109
9.24k
  }
110
713
  perLineMiHist = std::vector<MotionHist>( heightInCtus );
111
713
  doALF         = _doALF;
112
713
}
113
114
DecLibRecon::DecLibRecon()
115
5.61k
{
116
5.61k
#if ENABLE_SIMD_OPT_BUFFER
117
5.61k
#  if defined( TARGET_SIMD_X86 )
118
5.61k
  g_pelBufOP.initPelBufOpsX86();
119
5.61k
#  endif
120
#  if defined( TARGET_SIMD_ARM )
121
  g_pelBufOP.initPelBufOpsARM();
122
#  endif
123
5.61k
#endif
124
5.61k
#if ENABLE_SIMD_TCOEFF_OPS && defined( TARGET_SIMD_X86 )
125
5.61k
  g_tCoeffOps.initTCoeffOpsX86();
126
5.61k
#endif
127
#if ENABLE_SIMD_TCOEFF_OPS && defined( TARGET_SIMD_ARM )
128
  g_tCoeffOps.initTCoeffOpsARM();
129
#endif
130
5.61k
}
131
132
void DecLibRecon::create( ThreadPool* threadPool, unsigned instanceId, bool upscaleOutputEnabled )
133
1.87k
{
134
  // run constructor again to ensure all variables, especially in DecLibParser have been reset
135
1.87k
  this->~DecLibRecon();
136
1.87k
  new( this ) DecLibRecon;
137
138
139
#if TRACE_ENABLE_ITT
140
  if( itt_domain_decInst.size() < instanceId + 1 )
141
  {
142
    std::string name( "DecLibRecon " + std::to_string( instanceId ) );
143
    itt_domain_decInst.push_back( __itt_domain_create( name.c_str() ) );
144
    itt_domain_decInst.back()->flags = 1;
145
146
    CHECK_FATAL( itt_domain_decInst.back() != itt_domain_decInst[instanceId], "current decLibRecon ITT-Domain is not the last in vector. Instances created in the wrong order?" );
147
  }
148
  m_itt_decInst = itt_domain_decInst[instanceId];
149
#endif
150
151
1.87k
  m_decodeThreadPool = threadPool;
152
1.87k
  m_numDecThreads    = std::max( 1, threadPool ? threadPool->numThreads() : 1 );
153
154
1.87k
  m_upscaleOutputEnabled = upscaleOutputEnabled;
155
1.87k
  m_predBufSize     = 0;
156
1.87k
  m_dmvrMvCacheSize = 0;
157
1.87k
  m_dmvrMvCache     = nullptr;
158
159
1.87k
  m_num4x4Elements   = 0;
160
1.87k
  m_loopFilterParam  = nullptr;
161
1.87k
  m_motionInfo       = nullptr;
162
163
1.87k
  m_pcThreadResource    = new PerThreadResource*[m_numDecThreads];
164
1.87k
  m_pcThreadResource[0] = new PerThreadResource();
165
59.8k
  for( int i = 1; i < m_numDecThreads; i++ )
166
57.9k
  {
167
57.9k
    m_pcThreadResource[i] = new PerThreadResource( m_pcThreadResource[0]->m_cTrQuant );
168
57.9k
  }
169
1.87k
}
170
171
void DecLibRecon::destroy()
172
1.87k
{
173
1.87k
  m_decodeThreadPool = nullptr;
174
175
1.87k
  if( m_predBuf )
176
713
  {
177
713
    m_predBuf.reset();
178
713
    m_predBufSize = 0;
179
713
  }
180
181
1.87k
  if( m_dmvrMvCache )
182
713
  {
183
713
    free( m_dmvrMvCache );
184
713
    m_dmvrMvCache = nullptr;
185
713
    m_dmvrMvCacheSize = 0;
186
713
  }
187
188
1.87k
  if( m_loopFilterParam )
189
713
  {
190
713
    free( m_loopFilterParam );
191
713
    m_loopFilterParam = nullptr;
192
713
  }
193
194
1.87k
  if( m_motionInfo )
195
713
  {
196
713
    free( m_motionInfo );
197
713
    m_motionInfo = nullptr;
198
713
  }
199
200
1.87k
  m_num4x4Elements = 0;
201
202
61.7k
  for( int i = 0; i < m_numDecThreads; i++ ) delete m_pcThreadResource[i];
203
1.87k
  delete[] m_pcThreadResource; m_pcThreadResource = nullptr;
204
1.87k
}
205
206
207
static void getCompatibleBuffer( const CodingStructure& cs, const CPelUnitBuf& srcBuf, PelStorage& destBuf, const UserAllocator* userAllocator )
208
713
{
209
713
  if( !destBuf.bufs.empty() )
210
0
  {
211
0
    bool compat = false;
212
0
    if( destBuf.chromaFormat == srcBuf.chromaFormat )
213
0
    {
214
0
      compat = true;
215
0
      const uint32_t numCh = getNumberValidComponents( srcBuf.chromaFormat );
216
0
      for( uint32_t i = 0; i < numCh; i++ )
217
0
      {
218
        // check this otherwise it would turn out to get very weird
219
0
        compat &= destBuf.get( ComponentID( i ) )         == srcBuf.get( ComponentID( i ) );
220
0
        compat &= destBuf.get( ComponentID( i ) ).stride  == srcBuf.get( ComponentID( i ) ).stride;
221
0
        compat &= destBuf.get( ComponentID( i ) ).width   == srcBuf.get( ComponentID( i ) ).width;
222
0
        compat &= destBuf.get( ComponentID( i ) ).height  == srcBuf.get( ComponentID( i ) ).height;
223
0
      }
224
0
    }
225
0
    if( !compat )
226
0
    {
227
0
      destBuf.destroy();
228
0
    }
229
0
  }
230
713
  if( destBuf.bufs.empty() )
231
713
  {
232
713
    destBuf.create( cs.picture->chromaFormat, cs.picture->lumaSize(), cs.pcv->maxCUWidth, cs.picture->margin, MEMORY_ALIGN_DEF_SIZE, true, userAllocator );
233
713
  }
234
713
}
235
236
void DecLibRecon::borderExtPic( Picture* pic, const Picture* currPic )
237
0
{
238
  // we block and wait here, so the exceptions from the reference pic don't propagate to the current picture
239
0
  pic->waitForAllTasks();
240
0
  if( pic->progress < Picture::reconstructed )   // an exception must have happended in the picture, so we need to clean it up
241
0
  {
242
0
    CHECK_FATAL( pic->progress < Picture::parsing, "Slice parsing should have started, so all structures are there" );
243
0
    try
244
0
    {
245
0
      pic->reconDone.checkAndRethrowException();
246
0
      pic->parseDone.checkAndRethrowException();  // when the error happened in the slice parsing tasks, there might not be an exception in recon done, so check parseDone also
247
0
    }
248
0
    catch( ... )
249
0
    {
250
0
      pic->error = true;
251
0
      pic->reconDone.clearException();
252
      // TODO: for now we set it on parseDone, so we can handle it outside:
253
0
      if( !pic->parseDone.hasException() )
254
0
      {
255
0
        pic->parseDone.setException( std::current_exception() );
256
0
      }
257
258
0
      pic->fillGrey( currPic->cs->sps.get() );
259
0
    }
260
0
  }
261
262
0
  pic->borderExtStarted = true;
263
264
0
  const bool wrapAround = pic->cs->sps->getUseWrapAround();
265
0
  if( wrapAround )
266
0
  {
267
    // copy reconstruction buffer to wrapAround buffer. All other border-extension tasks depend on this task.
268
0
    static auto copyTask = []( int, void* task_param )
269
0
    {
270
0
      ITT_TASKSTART( itt_domain_dec, itt_handle_ext );
271
0
      Picture* picture = static_cast<Picture*>( task_param );
272
0
      picture->getRecoBuf( true ).copyFrom( picture->getRecoBuf() );
273
0
      ITT_TASKEND( itt_domain_dec, itt_handle_ext );
274
0
      return true;
275
0
    };
276
0
    pic->m_copyWrapBufDone.lock();
277
0
    m_decodeThreadPool->addBarrierTask( TP_TASK_NAME_ARG( "POC:" + std::to_string( currPic->poc ) + " copyTask Ref-POC:" + std::to_string( pic->poc ) )
278
0
                                        copyTask,
279
0
                                        pic,
280
0
                                        &pic->m_borderExtTaskCounter,
281
0
                                        &pic->m_copyWrapBufDone,
282
0
                                        { &pic->reconDone } );
283
0
  }
284
285
  // start actual border extension tasks
286
0
  {
287
0
    static auto task = []( int, void* task_param )
288
0
    {
289
0
      ITT_TASKSTART( itt_domain_dec, itt_handle_ext );
290
0
      Picture* picture = static_cast<Picture*>( task_param );
291
0
      picture->extendPicBorder( true, false, false, false );
292
0
      ITT_TASKEND( itt_domain_dec, itt_handle_ext );
293
0
      return true;
294
0
    };
295
0
    m_decodeThreadPool->addBarrierTask( TP_TASK_NAME_ARG( "POC:" + std::to_string(currPic->poc) + " borderExtTask T Ref-POC:" + std::to_string(pic->poc) )
296
0
                                        task,
297
0
                                        pic,
298
0
                                        &pic->m_borderExtTaskCounter,
299
0
                                        nullptr,
300
0
                                        { wrapAround ? &pic->m_copyWrapBufDone : &pic->reconDone } );
301
0
  }
302
303
0
  {
304
0
    static auto task = []( int, void* task_param )
305
0
    {
306
0
      ITT_TASKSTART( itt_domain_dec, itt_handle_ext );
307
0
      Picture* picture = static_cast<Picture*>( task_param );
308
0
      picture->extendPicBorder( false, true, false, false );
309
0
      ITT_TASKEND( itt_domain_dec, itt_handle_ext );
310
0
      return true;
311
0
    };
312
0
    m_decodeThreadPool->addBarrierTask( TP_TASK_NAME_ARG( "POC:" + std::to_string(currPic->poc) + " borderExtTask B Ref-POC:" + std::to_string(pic->poc) )
313
0
                                        task,
314
0
                                        pic,
315
0
                                        &pic->m_borderExtTaskCounter,
316
0
                                        nullptr,
317
0
                                        { wrapAround ? &pic->m_copyWrapBufDone : &pic->reconDone } );
318
0
  }
319
320
0
  {
321
0
    static auto task = []( int, void* task_param )
322
0
    {
323
0
      ITT_TASKSTART( itt_domain_dec, itt_handle_ext );
324
0
      Picture* picture = static_cast<Picture*>( task_param );
325
0
      picture->extendPicBorder( false, false, true, false, CH_L );
326
0
      ITT_TASKEND( itt_domain_dec, itt_handle_ext );
327
0
      return true;
328
0
    };
329
0
    m_decodeThreadPool->addBarrierTask( TP_TASK_NAME_ARG( "POC:" + std::to_string(currPic->poc) + " borderExtTask ltT Ref-POC:" + std::to_string(pic->poc) )
330
0
                                        task,
331
0
                                        pic,
332
0
                                        &pic->m_borderExtTaskCounter,
333
0
                                        nullptr,
334
0
                                        { wrapAround ? &pic->m_copyWrapBufDone : &pic->reconDone } );
335
0
  }
336
0
  {
337
0
    static auto task = []( int, void* task_param )
338
0
    {
339
0
      ITT_TASKSTART( itt_domain_dec, itt_handle_ext );
340
0
      Picture* picture = static_cast<Picture*>( task_param );
341
0
      picture->extendPicBorder( false, false, false, true, CH_L );
342
0
      ITT_TASKEND( itt_domain_dec, itt_handle_ext );
343
0
      return true;
344
0
    };
345
0
    m_decodeThreadPool->addBarrierTask( TP_TASK_NAME_ARG( "POC:" + std::to_string(currPic->poc) + " borderExtTask lrB Y Ref-POC:" + std::to_string(pic->poc) )
346
0
                                        task,
347
0
                                        pic,
348
0
                                        &pic->m_borderExtTaskCounter,
349
0
                                        nullptr,
350
0
                                        { wrapAround ? &pic->m_copyWrapBufDone : &pic->reconDone } );
351
0
  }
352
353
0
  {
354
0
    static auto task = []( int, void* task_param )
355
0
    {
356
0
      ITT_TASKSTART( itt_domain_dec, itt_handle_ext );
357
0
      Picture* picture = static_cast<Picture*>( task_param );
358
0
      picture->extendPicBorder( false, false, true, false, CH_C );
359
0
      ITT_TASKEND( itt_domain_dec, itt_handle_ext );
360
0
      return true;
361
0
    };
362
0
    m_decodeThreadPool->addBarrierTask( TP_TASK_NAME_ARG( "POC:" + std::to_string(currPic->poc) + " borderExtTask lrB UV Ref-POC:" + std::to_string(pic->poc) )
363
0
                                        task,
364
0
                                        pic,
365
0
                                        &pic->m_borderExtTaskCounter,
366
0
                                        nullptr,
367
0
                                        { wrapAround ? &pic->m_copyWrapBufDone : &pic->reconDone } );
368
0
  }
369
0
  {
370
0
    static auto task = []( int, void* task_param )
371
0
    {
372
0
      ITT_TASKSTART( itt_domain_dec, itt_handle_ext );
373
0
      Picture* picture = static_cast<Picture*>( task_param );
374
0
      picture->extendPicBorder( false, false, false, true, CH_C );
375
0
      ITT_TASKEND( itt_domain_dec, itt_handle_ext );
376
0
      return true;
377
0
    };
378
0
    m_decodeThreadPool->addBarrierTask( TP_TASK_NAME_ARG( "POC:" + std::to_string(currPic->poc) + " borderExtTask lrB UV Ref-POC:" + std::to_string(pic->poc) )
379
0
                                        task,
380
0
                                        pic,
381
0
                                        &pic->m_borderExtTaskCounter,
382
0
                                        nullptr,
383
0
                                        { wrapAround ? &pic->m_copyWrapBufDone : &pic->reconDone } );
384
0
  }
385
0
}
386
387
void DecLibRecon::createSubPicRefBufs( Picture* pic, const Picture* currPic )
388
0
{
389
0
  pic->subPicExtStarted = true;
390
391
0
  const PPS* pps       = pic->cs->pps.get();
392
0
  const SPS* sps       = pic->cs->sps.get();
393
0
  const int  numSubPic = pps->getNumSubPics();
394
395
0
  pic->m_subPicRefBufs.resize( numSubPic );
396
0
  for( int i = 0; i < numSubPic; ++i )
397
0
  {
398
0
    const SubPic& currSubPic = pps->getSubPic( i );
399
0
    const Area    subPicArea( currSubPic.getSubPicLeft(),
400
0
                              currSubPic.getSubPicTop(),
401
0
                              currSubPic.getSubPicWidthInLumaSample(),
402
0
                              currSubPic.getSubPicHeightInLumaSample() );
403
404
0
    pic->m_subPicRefBufs[i].create( pic->chromaFormat, Size( subPicArea ), sps->getMaxCUWidth(), pic->margin, MEMORY_ALIGN_DEF_SIZE );
405
406
0
    static auto task = []( int, void* task_param )
407
0
    {
408
0
      SubPicExtTask* t = static_cast<SubPicExtTask*>( task_param );
409
0
      t->subPicBuf->copyFrom( t->picture->getRecoBuf().subBuf( t->subPicArea ) );
410
0
      t->picture->extendPicBorderBuf( *t->subPicBuf );
411
0
      return true;
412
0
    };
413
0
    m_subPicExtTasks.emplace_back( SubPicExtTask{ pic, &pic->m_subPicRefBufs[i], subPicArea } );
414
0
    m_decodeThreadPool->addBarrierTask( TP_TASK_NAME_ARG( "POC:" + std::to_string( currPic->poc ) + " subPicBorderExtTask refPOC:" + std::to_string( pic->poc ) )
415
0
                                        task,
416
0
                                        &m_subPicExtTasks.back(),
417
0
                                        &pic->m_borderExtTaskCounter,
418
0
                                        nullptr,
419
0
                                        { &pic->reconDone } );
420
0
  }
421
0
}
422
423
void DecLibRecon::swapBufs( CodingStructure& cs )
424
3
{
425
3
  cs.picture->m_bufs[PIC_RECONSTRUCTION].swap( m_fltBuf );
426
3
  cs.rebindPicBufs();   // ensure the recon buf in the coding structure points to the correct buffer
427
3
}
428
429
void DecLibRecon::decompressPicture( Picture* pcPic )
430
713
{
431
713
  m_currDecompPic = pcPic;
432
433
713
  CodingStructure& cs = *pcPic->cs;
434
435
713
  pcPic->progress = Picture::reconstructing;
436
437
#ifdef TRACE_ENABLE_ITT
438
  // mark start of frame
439
    pcPic->m_itt_decLibInst = m_itt_decInst;
440
  __itt_frame_begin_v3( pcPic->m_itt_decLibInst, nullptr );
441
#endif
442
443
  // Initialise the various objects for the new set of settings
444
713
  const SPS * sps = cs.sps.get();
445
713
  const PPS * pps = cs.pps.get();
446
447
23.5k
  for( int i = 0; i < m_numDecThreads; i++ )
448
22.8k
  {
449
22.8k
    if( sps->getUseReshaper() )
450
4.67k
    {
451
4.67k
      m_pcThreadResource[i]->m_cReshaper.createDec( sps->getBitDepth() );
452
4.67k
      m_pcThreadResource[i]->m_cReshaper.initSlice( pcPic->slices[0]->getNalUnitLayerId(), *pcPic->slices[0]->getPicHeader(), pcPic->slices[0]->getVPS_nothrow() );
453
4.67k
    }
454
455
22.8k
    m_pcThreadResource[i]->m_cIntraPred.init( sps->getChromaFormatIdc(), sps->getBitDepth() );
456
22.8k
    m_pcThreadResource[i]->m_cInterPred.init( &m_cRdCost, sps->getChromaFormatIdc(), sps->getMaxCUHeight() );
457
458
    // Recursive structure
459
22.8k
    m_pcThreadResource[i]->m_cTrQuant.init( pcPic );
460
22.8k
    m_pcThreadResource[i]->m_cCuDecoder.init( &m_pcThreadResource[i]->m_cIntraPred, &m_pcThreadResource[i]->m_cInterPred, &m_pcThreadResource[i]->m_cReshaper, &m_pcThreadResource[i]->m_cTrQuant );
461
22.8k
  }
462
463
713
  getCompatibleBuffer( *pcPic->cs, pcPic->cs->getRecoBuf(), m_fltBuf, pcPic->getUserAllocator() );
464
465
713
  const uint32_t  log2SaoOffsetScale = (uint32_t) std::max(0, sps->getBitDepth() - MAX_SAO_TRUNCATED_BITDEPTH);
466
713
  const int maxDepth = getLog2(sps->getMaxCUWidth()) - pps->pcv->minCUWidthLog2;
467
713
  m_cSAO.create( pps->getPicWidthInLumaSamples(),
468
713
                 pps->getPicHeightInLumaSamples(),
469
713
                 sps->getChromaFormatIdc(),
470
713
                 sps->getMaxCUWidth(),
471
713
                 sps->getMaxCUHeight(),
472
713
                 maxDepth,
473
713
                 log2SaoOffsetScale,
474
713
                 m_fltBuf
475
713
               );
476
477
713
  if( sps->getUseALF() )
478
713
  {
479
713
    m_cALF.create( cs.picHeader.get(), sps, pps, m_numDecThreads, m_fltBuf );
480
713
  }
481
482
713
  const PreCalcValues* pcv = cs.pcv;
483
484
  // set reconstruction buffers in CodingStructure
485
713
  const ptrdiff_t ctuSampleSizeL = pcv->maxCUHeight * pcv->maxCUWidth;
486
713
  const ptrdiff_t ctuSampleSizeC = isChromaEnabled( pcv->chrFormat ) ? ( ctuSampleSizeL >> ( getChannelTypeScaleX( CH_C, pcv->chrFormat ) + getChannelTypeScaleY( CH_C, pcv->chrFormat ) ) ) : 0;
487
713
  const ptrdiff_t ctuSampleSize  = ctuSampleSizeL + 2 * ctuSampleSizeC;
488
713
  const size_t    predBufSize    = ctuSampleSize * pcv->sizeInCtus;
489
713
  if( predBufSize != m_predBufSize )
490
713
  {
491
713
    m_predBuf.reset( ( Pel* ) xMalloc( Pel, predBufSize ) );
492
713
    m_predBufSize = predBufSize;
493
713
  }
494
495
713
  pcPic->cs->m_predBuf = m_predBuf.get();
496
497
  // for the worst case of all PUs being 8x8 and using DMVR
498
713
  const size_t _maxNumDmvrMvs = pcv->num8x8CtuBlks * pcv->sizeInCtus;
499
713
  if( _maxNumDmvrMvs != m_dmvrMvCacheSize )
500
713
  {
501
713
    if( m_dmvrMvCache ) free( m_dmvrMvCache );
502
713
    m_dmvrMvCacheSize = _maxNumDmvrMvs;
503
713
    m_dmvrMvCache     = ( Mv* ) malloc( sizeof( Mv ) * _maxNumDmvrMvs );
504
713
  }
505
506
713
  pcPic->cs->m_dmvrMvCache = m_dmvrMvCache;
507
508
713
  if( m_num4x4Elements != cs.pcv->num4x4CtuBlks * cs.pcv->sizeInCtus )
509
713
  {
510
713
    if( m_loopFilterParam ) free( m_loopFilterParam );
511
713
    if( m_motionInfo      ) free( m_motionInfo );
512
513
713
    m_num4x4Elements = cs.pcv->num4x4CtuBlks * cs.pcv->sizeInCtus;
514
515
713
    m_loopFilterParam = ( LoopFilterParam* ) malloc( sizeof( LoopFilterParam ) * m_num4x4Elements * 2 );
516
713
    m_motionInfo      = ( MotionInfo* )      malloc( sizeof( MotionInfo      ) * m_num4x4Elements );
517
713
  }
518
  // finished
519
520
713
  const int widthInCtus  = cs.pcv->widthInCtus;
521
713
  const int heightInCtus = cs.pcv->heightInCtus;
522
523
713
  if( sps->getIBCFlag() )
524
706
  {
525
706
    cs.initVIbcBuf( heightInCtus, sps->getChromaFormatIdc(), sps->getMaxCUHeight() );
526
706
  }
527
713
  pcPic->startProcessingTimer();
528
529
713
  if( m_decodeThreadPool->numThreads() > 0 )
530
713
  {
531
713
    ITT_TASKSTART( itt_domain_dec, itt_handle_schedTasks );
532
713
  }
533
534
713
  picBarriers.clear();
535
713
#if ALLOW_MIDER_LF_DURING_PICEXT
536
713
  CBarrierVec  picExtBarriers;
537
#else
538
  CBarrierVec &picExtBarriers = picBarriers;
539
#endif
540
541
713
  const int numSubPic = cs.pps->getNumSubPics();
542
713
  if( numSubPic > 1 )
543
0
  {
544
0
    m_subPicExtTasks.clear();
545
0
    m_subPicExtTasks.reserve( pcPic->slices.size() * MAX_NUM_REF_PICS * numSubPic );
546
0
  }
547
548
713
  std::vector<Picture*> borderExtRefPics( pcPic->buildAllRefPicsVec() );
549
713
  for( Picture* refPic : borderExtRefPics )
550
0
  {
551
0
    if( !refPic->borderExtStarted )
552
0
    {
553
      // TODO: (GH) Can we bypass this border extension, when all subpics (>1) are treated as pics?
554
0
      borderExtPic( refPic, pcPic );
555
0
    }
556
557
0
    if( !refPic->subPicExtStarted && numSubPic > 1 && refPic->m_subPicRefBufs.size() != numSubPic )
558
0
    {
559
0
      CHECK( !refPic->m_subPicRefBufs.empty(), "Wrong number of subpics already present in reference picture" );
560
0
      CHECK( cs.sps->getUseWrapAround(), "Wraparound + subpics not implemented" );
561
562
0
      createSubPicRefBufs( refPic, pcPic );
563
0
    }
564
565
0
    if( refPic->m_borderExtTaskCounter.isBlocked() &&
566
0
        std::find( picExtBarriers.cbegin(), picExtBarriers.cend(), refPic->m_borderExtTaskCounter.donePtr() ) == picExtBarriers.cend() )
567
0
    {
568
0
      picExtBarriers.push_back( refPic->m_borderExtTaskCounter.donePtr() );
569
0
    }
570
0
  }
571
572
713
  if( m_decodeThreadPool->numThreads() == 0 && (
573
0
       std::any_of( picExtBarriers.cbegin(), picExtBarriers.cend(), []( const Barrier* b ) { return b->isBlocked(); } ) ||
574
0
       std::any_of( picBarriers   .cbegin(), picBarriers   .cend(), []( const Barrier* b ) { return b->isBlocked(); } ) ) )
575
0
  {
576
0
    m_decodeThreadPool->processTasksOnMainThread();
577
0
  }
578
579
713
  const bool isIntra = std::all_of( pcPic->slices.begin(), pcPic->slices.end(), []( const Slice* pcSlice ) { return pcSlice->isIntra(); } );
580
581
713
  const int numColPerTask = std::max( std::min( widthInCtus, ( widthInCtus / std::max( m_numDecThreads * ( isIntra ? 2 : 1 ), 1 ) ) + ( isIntra ? 0 : 1 ) ), 1 );
582
713
  const int numTasksPerLine = widthInCtus / numColPerTask + !!( widthInCtus % numColPerTask );
583
584
713
#if ALLOW_MIDER_LF_DURING_PICEXT
585
713
  pcPic->refPicExtDepBarriers = std::move( picExtBarriers );
586
713
#endif
587
#if !RECO_WHILE_PARSE
588
  picBarriers.push_back( &cs.picture->parseDone );
589
#endif
590
591
713
  const TaskType ctuStartState = MIDER;
592
713
  const bool     doALF         = cs.sps->getUseALF() && !AdaptiveLoopFilter::getAlfSkipPic( cs );
593
713
  commonTaskParam.reset( cs, ctuStartState, numTasksPerLine, doALF );
594
595
713
  tasksFinishMotion = std::vector<LineTaskParam>( heightInCtus, LineTaskParam{ commonTaskParam, -1 } );
596
713
  tasksCtu          = std::vector<CtuTaskParam >( heightInCtus * numTasksPerLine, CtuTaskParam{ commonTaskParam, -1, -1, {} } );
597
598
713
  pcPic->reconDone.lock();
599
600
#if 0
601
  // schedule in raster scan order
602
  for( int line = 0; line < heightInCtus; ++line )
603
  {
604
    for( int col = 0; col < widthInCtus;  ++col )
605
    {
606
#else
607
  // schedule in zig-zag scan order
608
5.41k
  for( int i = 0; i < numTasksPerLine + heightInCtus; ++i )
609
4.70k
  {
610
4.70k
    int line = 0;
611
25.1k
    for( int col = i; col >= 0; --col, ++line )
612
20.4k
    {
613
20.4k
#endif
614
20.4k
      if( line < heightInCtus && col < numTasksPerLine )
615
9.14k
      {
616
9.14k
        CBarrierVec ctuBarriers = picBarriers;
617
9.14k
        const int   ctuStart    = col * numColPerTask;
618
9.14k
        const int   ctuEnd      = std::min( ctuStart + numColPerTask, widthInCtus );
619
620
9.14k
#if RECO_WHILE_PARSE
621
9.14k
        if( pcPic->parseDone.isBlocked() )
622
9.04k
        {
623
          // wait for the last CTU in the current line to be parsed
624
9.04k
          ctuBarriers.push_back( &pcPic->ctuParsedBarrier[( line + 1 ) * widthInCtus - 1] );
625
9.04k
        }
626
627
9.14k
#endif
628
9.14k
        CtuTaskParam* param    = &tasksCtu[line * numTasksPerLine + col];
629
9.14k
        param->taskLine        = line;
630
9.14k
        param->taskCol         = col;
631
9.14k
        param->ctuEnd          = ctuEnd;
632
9.14k
        param->ctuStart        = ctuStart;
633
9.14k
        param->numColPerTask   = numColPerTask;
634
9.14k
        param->numTasksPerLine = numTasksPerLine;
635
636
9.14k
        m_decodeThreadPool->addBarrierTask( TP_TASK_NAME_ARG( "POC:" + std::to_string(pcPic->poc) + " ctuTask:" + std::to_string( col ) + "," + std::to_string( line ) )
637
9.14k
                                            ctuTask<false>,
638
9.14k
                                            param,
639
9.14k
                                            &pcPic->m_ctuTaskCounter,
640
9.14k
                                            nullptr,
641
9.14k
                                            std::move( ctuBarriers ),
642
9.14k
                                            ctuTask<true> );
643
9.14k
      }
644
20.4k
    }
645
4.70k
  }
646
647
713
  {
648
713
    static auto finishReconTask = []( int, void* task_param )
649
713
    {
650
3
      FinishPicTaskParam* param = static_cast<FinishPicTaskParam*>( task_param );
651
3
      CodingStructure& cs = *param->pic->cs;
652
653
3
      if( cs.sps->getUseALF() && !AdaptiveLoopFilter::getAlfSkipPic( cs ) )
654
3
      {
655
3
        param->decLib->swapBufs( cs );
656
3
      }
657
658
3
      cs.deallocTempInternals();
659
660
#ifdef TRACE_ENABLE_ITT
661
      // mark end of frame
662
      __itt_frame_end_v3( param->pic->m_itt_decLibInst, nullptr );
663
#endif
664
3
      param->pic->stopProcessingTimer();
665
666
3
      param->pic->progress = Picture::reconstructed;
667
3
      return true;
668
3
    };
669
670
713
    taskFinishPic = FinishPicTaskParam( this, pcPic );
671
713
    m_decodeThreadPool->addBarrierTask( TP_TASK_NAME_ARG( "POC:" + std::to_string( pcPic->poc ) + " finishPicTask" )
672
713
                                        finishReconTask,
673
713
                                        &taskFinishPic,
674
713
                                        &pcPic->m_divTasksCounter,
675
713
                                        &pcPic->reconDone,
676
713
                                        { pcPic->m_ctuTaskCounter.donePtr() } );
677
713
  }
678
679
713
  if( m_decodeThreadPool->numThreads() == 0 )
680
0
  {
681
0
  }
682
713
  else
683
713
  {
684
713
    ITT_TASKEND( itt_domain_dec, itt_handle_schedTasks );
685
713
  }
686
713
}
687
688
Picture* DecLibRecon::waitForPrevDecompressedPic()
689
2.54k
{
690
2.54k
  if( !m_currDecompPic )
691
1.83k
    return nullptr;
692
693
713
  ITT_TASKSTART( itt_domain_dec, itt_handle_waitTasks );
694
713
  if( m_decodeThreadPool->numThreads() == 0 )
695
0
  {
696
0
    m_decodeThreadPool->processTasksOnMainThread();
697
0
    CHECK_FATAL( m_currDecompPic->reconDone.isBlocked(), "can't make progress. some dependecy has not been finished" );
698
0
  }
699
700
713
  try
701
713
  {
702
713
    m_currDecompPic->reconDone.wait();
703
713
  }
704
713
  catch( ... )
705
713
  {
706
710
    m_currDecompPic->error = true;
707
710
  }
708
709
  // also check error flag, which can have been set earlier (e.g., when trying to use the picture as reference)
710
713
  if( m_currDecompPic->error || m_currDecompPic->reconDone.hasException() )
711
710
  {
712
    // ensure all tasks are cleared from declibRecon
713
710
    cleanupOnException( std::current_exception() );
714
710
  }
715
716
713
  ITT_TASKEND( itt_domain_dec, itt_handle_waitTasks );
717
718
713
  return std::exchange( m_currDecompPic, nullptr );
719
713
}
720
721
void DecLibRecon::cleanupOnException( std::exception_ptr exception )
722
710
{
723
  // there was an exception anywhere in m_currDecompPic
724
  // => we need to wait for all tasks to be cleared from the thread pool
725
710
  m_currDecompPic->waitForAllTasks();
726
727
710
  commonTaskParam.ctuStates.clear();
728
710
}
729
730
template<bool onlyCheckReadyState>
731
bool DecLibRecon::ctuTask( int tid, void* task_param )
732
646k
{
733
646k
  CtuTaskParam* param = static_cast<CtuTaskParam*>( task_param );
734
735
646k
  const int       taskCol      = param->taskCol;
736
646k
  const int       line         = param->taskLine;
737
646k
  const int       col          = taskCol;
738
739
646k
  auto&           cs           = *param->common.cs;
740
646k
  auto&           decLib       = param->common.decLib;
741
646k
  const int       tasksPerLine = param->numTasksPerLine;
742
646k
  const int       heightInCtus = cs.pcv->heightInCtus;
743
744
646k
  CtuState&       thisCtuState =  param->common.ctuStates[line * tasksPerLine + taskCol];
745
646k
  const CtuState* thisLine     = &param->common.ctuStates[line * tasksPerLine];
746
646k
  const CtuState* lineAbove    = thisLine - tasksPerLine;
747
646k
  const CtuState* lineBelow    = thisLine + tasksPerLine;
748
749
646k
  const int       ctuStart     = param->ctuStart;
750
646k
  const int       ctuEnd       = param->ctuEnd;
751
752
646k
  try
753
646k
  {
754
646k
    if( cs.picture->m_ctuTaskCounter.hasException() )
755
3.47k
    {
756
3.47k
      std::rethrow_exception( cs.picture->m_ctuTaskCounter.getException() );
757
3.47k
    }
758
759
646k
    switch( thisCtuState.load() )
760
646k
    {
761
      // all case statements fall through to continue with next task, unless they return false due to unsatisfied preconditions
762
763
48.0k
    case MIDER:
764
48.0k
    {
765
48.0k
      if( col > 0 && thisLine[col - 1] <= MIDER_cont )
766
41.3k
        return false;
767
6.70k
      if( line > 0 )
768
2.48k
      {
769
2.48k
        if( col + 1 < tasksPerLine )
770
1.91k
        {
771
1.91k
          if( lineAbove[col + 1] <= MIDER )
772
0
            return false;
773
1.91k
        }
774
562
        else
775
562
        {
776
562
          if( lineAbove[col] <= MIDER_cont )
777
0
            return false;
778
562
        }
779
2.48k
      }
780
6.70k
      if( onlyCheckReadyState )
781
3.35k
        return true;
782
783
3.34k
      ITT_TASKSTART( itt_domain_dec, itt_handle_mider );
784
785
6.70k
      for( int ctu = ctuStart; ctu < ctuEnd; ctu++ )
786
3.35k
      {
787
3.35k
        const int ctuRsAddr = ctu + line * cs.pcv->widthInCtus;
788
3.35k
        CtuData& ctuData    = cs.getCtuData( ctuRsAddr );
789
3.35k
        ctuData.motion      = &decLib.m_motionInfo[cs.pcv->num4x4CtuBlks * ctuRsAddr];
790
791
3.35k
        if( !ctuData.slice->isIntra() || cs.sps->getIBCFlag() )
792
3.35k
        {
793
3.35k
          const UnitArea ctuArea = getCtuArea( cs, ctu, line, true );
794
3.35k
          decLib.m_pcThreadResource[tid]->m_cCuDecoder.TaskDeriveCtuMotionInfo( cs, ctuRsAddr, ctuArea, param->common.perLineMiHist[line] );
795
3.35k
        }
796
0
        else
797
0
        {
798
0
          memset( NO_WARNING_class_memaccess( ctuData.motion ), MI_NOT_VALID, sizeof( MotionInfo ) * cs.pcv->num4x4CtuBlks );
799
0
        }
800
801
3.35k
        thisCtuState = MIDER_cont;
802
3.35k
      }
803
804
3.34k
      thisCtuState = LF_INIT;
805
806
3.34k
      ITT_TASKEND( itt_domain_dec, itt_handle_mider );
807
3.34k
    }
808
809
3.34k
    case LF_INIT:
810
3.34k
    {
811
3.34k
      if( onlyCheckReadyState )
812
0
        return true;
813
814
3.34k
      ITT_TASKSTART( itt_domain_dec, itt_handle_lfcl );
815
816
6.70k
      for( int ctu = ctuStart; ctu < ctuEnd; ctu++ )
817
3.35k
      {
818
3.35k
        const int ctuRsAddr = ctu + line * cs.pcv->widthInCtus;
819
3.35k
        CtuData& ctuData    = cs.getCtuData( ctuRsAddr );
820
3.35k
        ctuData.lfParam[0]  = &decLib.m_loopFilterParam[cs.pcv->num4x4CtuBlks * ( 2 * ctuRsAddr + 0 )];
821
3.35k
        ctuData.lfParam[1]  = &decLib.m_loopFilterParam[cs.pcv->num4x4CtuBlks * ( 2 * ctuRsAddr + 1 )];
822
3.35k
        memset( ctuData.lfParam[0], 0, sizeof( LoopFilterParam ) * 2 * cs.pcv->num4x4CtuBlks );
823
824
3.35k
        decLib.m_cLoopFilter.calcFilterStrengthsCTU( cs, ctuRsAddr );
825
3.35k
      }
826
827
3.34k
      thisCtuState = INTER;
828
829
3.34k
      ITT_TASKEND( itt_domain_dec, itt_handle_lfcl );
830
3.34k
    }
831
832
22.6k
    case INTER:
833
22.6k
    {
834
22.6k
      if( std::all_of( cs.picture->slices.begin(), cs.picture->slices.end(), []( const Slice* pcSlice ) { return pcSlice->isIntra(); } ) )
vvdec::DecLibRecon::ctuTask<false>(int, void*)::{lambda(vvdec::Slice const*)#1}::operator()(vvdec::Slice const*) const
Line
Count
Source
834
3.61k
      if( std::all_of( cs.picture->slices.begin(), cs.picture->slices.end(), []( const Slice* pcSlice ) { return pcSlice->isIntra(); } ) )
vvdec::DecLibRecon::ctuTask<true>(int, void*)::{lambda(vvdec::Slice const*)#1}::operator()(vvdec::Slice const*) const
Line
Count
Source
834
19.0k
      if( std::all_of( cs.picture->slices.begin(), cs.picture->slices.end(), []( const Slice* pcSlice ) { return pcSlice->isIntra(); } ) )
835
22.6k
      {
836
        // not really necessary, but only for optimizing the wave-fronts
837
22.6k
        if( col > 1 && thisLine[col - 2] <= INTER )
838
18.6k
          return false;
839
4.00k
        if( line > 0 && lineAbove[col] <= INTER )
840
674
          return false;
841
4.00k
      }
842
843
3.35k
      if( std::any_of( cs.picture->refPicExtDepBarriers.cbegin(), cs.picture->refPicExtDepBarriers.cend(), []( const Barrier* b ) { return b->isBlocked(); } ) )
Unexecuted instantiation: vvdec::DecLibRecon::ctuTask<false>(int, void*)::{lambda(vvdec::Barrier const*)#1}::operator()(vvdec::Barrier const*) const
Unexecuted instantiation: vvdec::DecLibRecon::ctuTask<true>(int, void*)::{lambda(vvdec::Barrier const*)#1}::operator()(vvdec::Barrier const*) const
844
0
      {
845
0
        return false;
846
0
      }
847
848
3.35k
      if( onlyCheckReadyState )
849
267
        return true;
850
851
3.08k
      ITT_TASKSTART( itt_domain_dec, itt_handle_inter );
852
853
6.16k
      for( int ctu = ctuStart; ctu < ctuEnd; ctu++ )
854
3.08k
      {
855
3.08k
        const int ctuRsAddr    = ctu + line * cs.pcv->widthInCtus;
856
3.08k
        const UnitArea ctuArea = getCtuArea( cs, ctu, line, true );
857
3.08k
        const CtuData& ctuData = cs.getCtuData( ctuRsAddr );
858
859
3.08k
        decLib.m_pcThreadResource[tid]->m_cCuDecoder.TaskTrafoCtu( cs, ctuRsAddr, ctuArea );
860
861
3.08k
        if( !ctuData.slice->isIntra() )
862
0
        {
863
0
          decLib.m_pcThreadResource[tid]->m_cCuDecoder.TaskInterCtu( cs, ctuRsAddr, ctuArea );
864
865
0
          if( cs.picture->stillReferenced )
866
0
          {
867
0
            decLib.m_pcThreadResource[tid]->m_cCuDecoder.TaskFinishMotionInfo( cs, ctuRsAddr, ctu, line );
868
0
          }
869
0
        }
870
3.08k
      }
871
872
3.08k
      thisCtuState = INTRA;
873
874
3.08k
      ITT_TASKEND( itt_domain_dec, itt_handle_inter );
875
3.08k
    }
876
877
57.4k
    case INTRA:
878
57.4k
    {
879
57.4k
      if( col > 0 && thisLine[col - 1] <= INTRA_cont )
880
53.7k
        return false;
881
882
3.62k
      if( line > 0 )
883
1.57k
      {
884
1.57k
        if( col + 1 < tasksPerLine )
885
1.50k
        {
886
1.50k
          if( lineAbove[col + 1] <= INTRA )
887
942
            return false;
888
1.50k
        }
889
69
        else
890
69
        {
891
69
          if( lineAbove[col] <= INTRA_cont )
892
0
            return false;
893
69
        }
894
1.57k
      }
895
2.68k
      if( onlyCheckReadyState )
896
675
        return true;
897
898
2.01k
      ITT_TASKSTART( itt_domain_dec, itt_handle_intra );
899
900
4.02k
      for( int ctu = ctuStart; ctu < ctuEnd; ctu++ )
901
2.01k
      {
902
2.01k
        const int ctuRsAddr    = ctu + line * cs.pcv->widthInCtus;
903
2.01k
        const UnitArea ctuArea = getCtuArea( cs, ctu, line, true );
904
2.01k
        decLib.m_pcThreadResource[tid]->m_cCuDecoder.TaskCriticalIntraKernel( cs, ctuRsAddr, ctuArea );
905
906
2.01k
        thisCtuState = INTRA_cont;
907
2.01k
      }
908
909
2.01k
      thisCtuState = RSP;
910
911
2.01k
      ITT_TASKEND( itt_domain_dec, itt_handle_intra );
912
2.01k
    }
913
914
510k
    case RSP:
915
510k
    {
916
      // RIRZIIIII
917
      // IIIIIXXXX
918
      //
919
      // - Z can be reshaped when it is no more an intra prediction source for X in the next line
920
921
922
510k
      if     ( line + 1 < heightInCtus && col + 1 < tasksPerLine && lineBelow[col + 1] < INTRA_cont )
923
417k
        return false;
924
92.3k
      else if( line + 1 < heightInCtus &&                           lineBelow[col]     < RSP )
925
91.7k
        return false;
926
548
      else if(                            col + 1 < tasksPerLine && thisLine [col + 1] < INTRA_cont ) // need this for the last line
927
20
        return false;
928
929
528
      if( onlyCheckReadyState )
930
151
        return true;
931
932
377
      ITT_TASKSTART( itt_domain_dec, itt_handle_rsp );
933
934
531
      for( int ctu = ctuStart; ctu < ctuEnd; ctu++ )
935
154
      {
936
154
        decLib.m_pcThreadResource[tid]->m_cReshaper.rspCtuBcw( cs, ctu, line );
937
154
      }
938
939
377
      ITT_TASKEND( itt_domain_dec, itt_handle_rsp );
940
941
377
      thisCtuState = LF_V;
942
377
    }
943
944
418
    case LF_V:
945
418
    {
946
418
      if( col > 0 && thisLine[col - 1] < LF_V )
947
31
        return false;
948
387
      if( onlyCheckReadyState )
949
10
        return true;
950
951
377
      ITT_TASKSTART( itt_domain_dec, itt_handle_lfl );
952
953
531
      for( int ctu = ctuStart; ctu < ctuEnd; ctu++ )
954
154
      {
955
154
        decLib.m_cLoopFilter.loopFilterCTU( cs, MAX_NUM_CHANNEL_TYPE, ctu, line, EDGE_VER );
956
957
154
        thisCtuState = LF_V_cont;
958
154
      }
959
960
377
      thisCtuState = LF_H;
961
962
377
      ITT_TASKEND( itt_domain_dec, itt_handle_lfl );
963
377
    }
964
965
8.07k
    case LF_H:
966
8.07k
    {
967
8.07k
      if( line > 0 && lineAbove[col] < LF_H )
968
9
        return false;
969
970
8.06k
      if( line > 0 && col + 1 < tasksPerLine && lineAbove[col + 1] < LF_V_cont )
971
39
        return false;
972
973
8.02k
      if(             col + 1 < tasksPerLine && thisLine[col + 1] < LF_V_cont )
974
7.62k
        return false;
975
976
395
      if( onlyCheckReadyState )
977
65
        return true;
978
979
330
      ITT_TASKSTART( itt_domain_dec, itt_handle_lfl );
980
981
437
      for( int ctu = ctuStart; ctu < ctuEnd; ctu++ )
982
107
      {
983
107
        decLib.m_cLoopFilter.loopFilterCTU( cs, MAX_NUM_CHANNEL_TYPE, ctu, line, EDGE_HOR );
984
107
      }
985
986
330
      thisCtuState = PRESAO;
987
988
330
      ITT_TASKEND( itt_domain_dec, itt_handle_lfl );
989
330
    }
990
991
5.19k
    case PRESAO:
992
5.19k
    {
993
      // only last CTU processes full line
994
5.19k
      if( col == tasksPerLine - 1 )
995
733
      {
996
733
        if( line > 0 && lineAbove[col] <= PRESAO )
997
146
          return false;
998
999
928
        for( int c = 0; c < tasksPerLine; ++c )
1000
918
        {
1001
918
          if( thisLine[c] < PRESAO )
1002
4
            return false;
1003
1004
914
          if( line + 1 < heightInCtus && lineBelow[c] < PRESAO )
1005
573
            return false;
1006
914
        }
1007
10
        if( onlyCheckReadyState )
1008
4
          return true;
1009
1010
6
        ITT_TASKSTART( itt_domain_dec, itt_handle_presao );
1011
1012
6
        if( cs.sps->getUseSAO() )
1013
6
        {
1014
6
          decLib.m_cSAO.SAOPrepareCTULine( cs, getLineArea( cs, line, true ) );
1015
6
        }
1016
1017
6
        ITT_TASKEND( itt_domain_dec, itt_handle_presao );
1018
6
      }
1019
4.46k
      else if( thisLine[tasksPerLine - 1] <= PRESAO )   // wait for last CTU to finish PRESAO
1020
4.21k
      {
1021
4.21k
        return false;
1022
4.21k
      }
1023
249
      if( onlyCheckReadyState )
1024
12
        return true;
1025
1026
237
      thisCtuState = SAO;
1027
237
    }
1028
1029
237
    case SAO:
1030
237
    {
1031
237
      if( onlyCheckReadyState )
1032
0
        return true;
1033
1034
      // only last CTU processes full line
1035
237
      if( cs.sps->getUseSAO() )
1036
18
      {
1037
18
        ITT_TASKSTART( itt_domain_dec, itt_handle_sao );
1038
1039
36
        for( int ctu = ctuStart; ctu < ctuEnd; ctu++ )
1040
18
        {
1041
18
          const UnitArea  ctuArea = getCtuArea( cs, ctu, line, true );
1042
18
          decLib.m_cSAO.SAOProcessCTU( cs, ctuArea );
1043
18
        }
1044
1045
18
        ITT_TASKEND( itt_domain_dec, itt_handle_sao );
1046
18
      }
1047
237
      if( param->common.doALF )
1048
18
      {
1049
18
        ITT_TASKSTART( itt_domain_dec, itt_handle_alf );
1050
1051
36
        for( int ctu = ctuStart; ctu < ctuEnd; ctu++ )
1052
18
        {
1053
18
          AdaptiveLoopFilter::prepareCTU( cs, ctu, line );
1054
1055
18
          thisCtuState = SAO_cont;
1056
18
        }
1057
1058
18
        ITT_TASKEND( itt_domain_dec, itt_handle_alf );
1059
18
      }
1060
1061
237
      thisCtuState = ALF;
1062
237
    }
1063
1064
731
    case ALF:
1065
731
    {
1066
731
      if( param->common.doALF )
1067
512
      {
1068
512
        const bool a = line > 0;
1069
512
        const bool b = line + 1 < heightInCtus;
1070
512
        const bool c = col > 0;
1071
512
        const bool d = col + 1 < tasksPerLine;
1072
1073
512
        if( a )
1074
120
        {
1075
120
          if( c && lineAbove[col - 1] < ALF ) return false;
1076
111
          if(      lineAbove[col    ] < ALF ) return false;
1077
108
          if( d && lineAbove[col + 1] < SAO_cont ) return false;
1078
108
        }
1079
1080
500
        if( b )
1081
487
        {
1082
487
          if( c && lineBelow[col - 1] < ALF ) return false;
1083
85
          if(      lineBelow[col    ] < ALF ) return false;
1084
30
          if( d && lineBelow[col + 1] < SAO_cont ) return false;
1085
30
        }
1086
1087
37
        if( c && thisLine[col - 1] < ALF ) return false;
1088
36
        if( d && thisLine[col + 1] < SAO_cont ) return false;
1089
1090
31
        if( onlyCheckReadyState )
1091
14
          return true;
1092
1093
17
        ITT_TASKSTART( itt_domain_dec, itt_handle_alf );
1094
35
        for( int ctu = ctuStart; ctu < ctuEnd; ctu++ )
1095
18
        {
1096
18
          decLib.m_cALF.processCTU( cs, ctu, line, tid );
1097
18
        }
1098
17
        ITT_TASKEND( itt_domain_dec, itt_handle_alf );
1099
17
      }
1100
219
      else if( onlyCheckReadyState )
1101
0
        return true;
1102
1103
236
      thisCtuState = DONE;
1104
236
    }
1105
1106
236
    default:
1107
236
      CHECKD( thisCtuState != DONE, "Wrong CTU state" );
1108
646k
    }   // end switch
1109
646k
  }
1110
646k
  catch( ... )
1111
646k
  {
1112
3.84k
    std::rethrow_exception( std::current_exception() );
1113
3.84k
  }
1114
1115
18
  return true;
1116
646k
}
bool vvdec::DecLibRecon::ctuTask<false>(int, void*)
Line
Count
Source
732
4.55k
{
733
4.55k
  CtuTaskParam* param = static_cast<CtuTaskParam*>( task_param );
734
735
4.55k
  const int       taskCol      = param->taskCol;
736
4.55k
  const int       line         = param->taskLine;
737
4.55k
  const int       col          = taskCol;
738
739
4.55k
  auto&           cs           = *param->common.cs;
740
4.55k
  auto&           decLib       = param->common.decLib;
741
4.55k
  const int       tasksPerLine = param->numTasksPerLine;
742
4.55k
  const int       heightInCtus = cs.pcv->heightInCtus;
743
744
4.55k
  CtuState&       thisCtuState =  param->common.ctuStates[line * tasksPerLine + taskCol];
745
4.55k
  const CtuState* thisLine     = &param->common.ctuStates[line * tasksPerLine];
746
4.55k
  const CtuState* lineAbove    = thisLine - tasksPerLine;
747
4.55k
  const CtuState* lineBelow    = thisLine + tasksPerLine;
748
749
4.55k
  const int       ctuStart     = param->ctuStart;
750
4.55k
  const int       ctuEnd       = param->ctuEnd;
751
752
4.55k
  try
753
4.55k
  {
754
4.55k
    if( cs.picture->m_ctuTaskCounter.hasException() )
755
4
    {
756
4
      std::rethrow_exception( cs.picture->m_ctuTaskCounter.getException() );
757
4
    }
758
759
4.55k
    switch( thisCtuState.load() )
760
4.55k
    {
761
      // all case statements fall through to continue with next task, unless they return false due to unsatisfied preconditions
762
763
3.35k
    case MIDER:
764
3.35k
    {
765
3.35k
      if( col > 0 && thisLine[col - 1] <= MIDER_cont )
766
0
        return false;
767
3.35k
      if( line > 0 )
768
1.23k
      {
769
1.23k
        if( col + 1 < tasksPerLine )
770
958
        {
771
958
          if( lineAbove[col + 1] <= MIDER )
772
0
            return false;
773
958
        }
774
280
        else
775
280
        {
776
280
          if( lineAbove[col] <= MIDER_cont )
777
0
            return false;
778
280
        }
779
1.23k
      }
780
3.35k
      if( onlyCheckReadyState )
781
0
        return true;
782
783
3.35k
      ITT_TASKSTART( itt_domain_dec, itt_handle_mider );
784
785
6.71k
      for( int ctu = ctuStart; ctu < ctuEnd; ctu++ )
786
3.35k
      {
787
3.35k
        const int ctuRsAddr = ctu + line * cs.pcv->widthInCtus;
788
3.35k
        CtuData& ctuData    = cs.getCtuData( ctuRsAddr );
789
3.35k
        ctuData.motion      = &decLib.m_motionInfo[cs.pcv->num4x4CtuBlks * ctuRsAddr];
790
791
3.35k
        if( !ctuData.slice->isIntra() || cs.sps->getIBCFlag() )
792
3.35k
        {
793
3.35k
          const UnitArea ctuArea = getCtuArea( cs, ctu, line, true );
794
3.35k
          decLib.m_pcThreadResource[tid]->m_cCuDecoder.TaskDeriveCtuMotionInfo( cs, ctuRsAddr, ctuArea, param->common.perLineMiHist[line] );
795
3.35k
        }
796
0
        else
797
0
        {
798
0
          memset( NO_WARNING_class_memaccess( ctuData.motion ), MI_NOT_VALID, sizeof( MotionInfo ) * cs.pcv->num4x4CtuBlks );
799
0
        }
800
801
3.35k
        thisCtuState = MIDER_cont;
802
3.35k
      }
803
804
3.35k
      thisCtuState = LF_INIT;
805
806
3.35k
      ITT_TASKEND( itt_domain_dec, itt_handle_mider );
807
3.35k
    }
808
809
3.35k
    case LF_INIT:
810
3.35k
    {
811
3.35k
      if( onlyCheckReadyState )
812
0
        return true;
813
814
3.35k
      ITT_TASKSTART( itt_domain_dec, itt_handle_lfcl );
815
816
6.71k
      for( int ctu = ctuStart; ctu < ctuEnd; ctu++ )
817
3.35k
      {
818
3.35k
        const int ctuRsAddr = ctu + line * cs.pcv->widthInCtus;
819
3.35k
        CtuData& ctuData    = cs.getCtuData( ctuRsAddr );
820
3.35k
        ctuData.lfParam[0]  = &decLib.m_loopFilterParam[cs.pcv->num4x4CtuBlks * ( 2 * ctuRsAddr + 0 )];
821
3.35k
        ctuData.lfParam[1]  = &decLib.m_loopFilterParam[cs.pcv->num4x4CtuBlks * ( 2 * ctuRsAddr + 1 )];
822
3.35k
        memset( ctuData.lfParam[0], 0, sizeof( LoopFilterParam ) * 2 * cs.pcv->num4x4CtuBlks );
823
824
3.35k
        decLib.m_cLoopFilter.calcFilterStrengthsCTU( cs, ctuRsAddr );
825
3.35k
      }
826
827
3.35k
      thisCtuState = INTER;
828
829
3.35k
      ITT_TASKEND( itt_domain_dec, itt_handle_lfcl );
830
3.35k
    }
831
832
3.62k
    case INTER:
833
3.62k
    {
834
3.62k
      if( std::all_of( cs.picture->slices.begin(), cs.picture->slices.end(), []( const Slice* pcSlice ) { return pcSlice->isIntra(); } ) )
835
3.61k
      {
836
        // not really necessary, but only for optimizing the wave-fronts
837
3.61k
        if( col > 1 && thisLine[col - 2] <= INTER )
838
518
          return false;
839
3.10k
        if( line > 0 && lineAbove[col] <= INTER )
840
19
          return false;
841
3.10k
      }
842
843
3.08k
      if( std::any_of( cs.picture->refPicExtDepBarriers.cbegin(), cs.picture->refPicExtDepBarriers.cend(), []( const Barrier* b ) { return b->isBlocked(); } ) )
844
0
      {
845
0
        return false;
846
0
      }
847
848
3.08k
      if( onlyCheckReadyState )
849
0
        return true;
850
851
3.08k
      ITT_TASKSTART( itt_domain_dec, itt_handle_inter );
852
853
6.16k
      for( int ctu = ctuStart; ctu < ctuEnd; ctu++ )
854
3.08k
      {
855
3.08k
        const int ctuRsAddr    = ctu + line * cs.pcv->widthInCtus;
856
3.08k
        const UnitArea ctuArea = getCtuArea( cs, ctu, line, true );
857
3.08k
        const CtuData& ctuData = cs.getCtuData( ctuRsAddr );
858
859
3.08k
        decLib.m_pcThreadResource[tid]->m_cCuDecoder.TaskTrafoCtu( cs, ctuRsAddr, ctuArea );
860
861
3.08k
        if( !ctuData.slice->isIntra() )
862
0
        {
863
0
          decLib.m_pcThreadResource[tid]->m_cCuDecoder.TaskInterCtu( cs, ctuRsAddr, ctuArea );
864
865
0
          if( cs.picture->stillReferenced )
866
0
          {
867
0
            decLib.m_pcThreadResource[tid]->m_cCuDecoder.TaskFinishMotionInfo( cs, ctuRsAddr, ctu, line );
868
0
          }
869
0
        }
870
3.08k
      }
871
872
3.08k
      thisCtuState = INTRA;
873
874
3.08k
      ITT_TASKEND( itt_domain_dec, itt_handle_inter );
875
3.08k
    }
876
877
3.76k
    case INTRA:
878
3.76k
    {
879
3.76k
      if( col > 0 && thisLine[col - 1] <= INTRA_cont )
880
1.68k
        return false;
881
882
2.07k
      if( line > 0 )
883
576
      {
884
576
        if( col + 1 < tasksPerLine )
885
540
        {
886
540
          if( lineAbove[col + 1] <= INTRA )
887
65
            return false;
888
540
        }
889
36
        else
890
36
        {
891
36
          if( lineAbove[col] <= INTRA_cont )
892
0
            return false;
893
36
        }
894
576
      }
895
2.01k
      if( onlyCheckReadyState )
896
0
        return true;
897
898
2.01k
      ITT_TASKSTART( itt_domain_dec, itt_handle_intra );
899
900
4.02k
      for( int ctu = ctuStart; ctu < ctuEnd; ctu++ )
901
2.01k
      {
902
2.01k
        const int ctuRsAddr    = ctu + line * cs.pcv->widthInCtus;
903
2.01k
        const UnitArea ctuArea = getCtuArea( cs, ctu, line, true );
904
2.01k
        decLib.m_pcThreadResource[tid]->m_cCuDecoder.TaskCriticalIntraKernel( cs, ctuRsAddr, ctuArea );
905
906
2.01k
        thisCtuState = INTRA_cont;
907
2.01k
      }
908
909
2.01k
      thisCtuState = RSP;
910
911
2.01k
      ITT_TASKEND( itt_domain_dec, itt_handle_intra );
912
2.01k
    }
913
914
2.16k
    case RSP:
915
2.16k
    {
916
      // RIRZIIIII
917
      // IIIIIXXXX
918
      //
919
      // - Z can be reshaped when it is no more an intra prediction source for X in the next line
920
921
922
2.16k
      if     ( line + 1 < heightInCtus && col + 1 < tasksPerLine && lineBelow[col + 1] < INTRA_cont )
923
1.39k
        return false;
924
767
      else if( line + 1 < heightInCtus &&                           lineBelow[col]     < RSP )
925
238
        return false;
926
529
      else if(                            col + 1 < tasksPerLine && thisLine [col + 1] < INTRA_cont ) // need this for the last line
927
3
        return false;
928
929
526
      if( onlyCheckReadyState )
930
0
        return true;
931
932
526
      ITT_TASKSTART( itt_domain_dec, itt_handle_rsp );
933
934
680
      for( int ctu = ctuStart; ctu < ctuEnd; ctu++ )
935
154
      {
936
154
        decLib.m_pcThreadResource[tid]->m_cReshaper.rspCtuBcw( cs, ctu, line );
937
154
      }
938
939
526
      ITT_TASKEND( itt_domain_dec, itt_handle_rsp );
940
941
526
      thisCtuState = LF_V;
942
526
    }
943
944
536
    case LF_V:
945
536
    {
946
536
      if( col > 0 && thisLine[col - 1] < LF_V )
947
10
        return false;
948
526
      if( onlyCheckReadyState )
949
0
        return true;
950
951
526
      ITT_TASKSTART( itt_domain_dec, itt_handle_lfl );
952
953
680
      for( int ctu = ctuStart; ctu < ctuEnd; ctu++ )
954
154
      {
955
154
        decLib.m_cLoopFilter.loopFilterCTU( cs, MAX_NUM_CHANNEL_TYPE, ctu, line, EDGE_VER );
956
957
154
        thisCtuState = LF_V_cont;
958
154
      }
959
960
526
      thisCtuState = LF_H;
961
962
526
      ITT_TASKEND( itt_domain_dec, itt_handle_lfl );
963
526
    }
964
965
591
    case LF_H:
966
591
    {
967
591
      if( line > 0 && lineAbove[col] < LF_H )
968
2
        return false;
969
970
589
      if( line > 0 && col + 1 < tasksPerLine && lineAbove[col + 1] < LF_V_cont )
971
0
        return false;
972
973
589
      if(             col + 1 < tasksPerLine && thisLine[col + 1] < LF_V_cont )
974
110
        return false;
975
976
479
      if( onlyCheckReadyState )
977
0
        return true;
978
979
479
      ITT_TASKSTART( itt_domain_dec, itt_handle_lfl );
980
981
586
      for( int ctu = ctuStart; ctu < ctuEnd; ctu++ )
982
107
      {
983
107
        decLib.m_cLoopFilter.loopFilterCTU( cs, MAX_NUM_CHANNEL_TYPE, ctu, line, EDGE_HOR );
984
107
      }
985
986
479
      thisCtuState = PRESAO;
987
988
479
      ITT_TASKEND( itt_domain_dec, itt_handle_lfl );
989
479
    }
990
991
494
    case PRESAO:
992
494
    {
993
      // only last CTU processes full line
994
494
      if( col == tasksPerLine - 1 )
995
36
      {
996
36
        if( line > 0 && lineAbove[col] <= PRESAO )
997
3
          return false;
998
999
51
        for( int c = 0; c < tasksPerLine; ++c )
1000
45
        {
1001
45
          if( thisLine[c] < PRESAO )
1002
4
            return false;
1003
1004
41
          if( line + 1 < heightInCtus && lineBelow[c] < PRESAO )
1005
23
            return false;
1006
41
        }
1007
6
        if( onlyCheckReadyState )
1008
0
          return true;
1009
1010
6
        ITT_TASKSTART( itt_domain_dec, itt_handle_presao );
1011
1012
6
        if( cs.sps->getUseSAO() )
1013
6
        {
1014
6
          decLib.m_cSAO.SAOPrepareCTULine( cs, getLineArea( cs, line, true ) );
1015
6
        }
1016
1017
6
        ITT_TASKEND( itt_domain_dec, itt_handle_presao );
1018
6
      }
1019
458
      else if( thisLine[tasksPerLine - 1] <= PRESAO )   // wait for last CTU to finish PRESAO
1020
75
      {
1021
75
        return false;
1022
75
      }
1023
389
      if( onlyCheckReadyState )
1024
0
        return true;
1025
1026
389
      thisCtuState = SAO;
1027
389
    }
1028
1029
389
    case SAO:
1030
389
    {
1031
389
      if( onlyCheckReadyState )
1032
0
        return true;
1033
1034
      // only last CTU processes full line
1035
389
      if( cs.sps->getUseSAO() )
1036
18
      {
1037
18
        ITT_TASKSTART( itt_domain_dec, itt_handle_sao );
1038
1039
36
        for( int ctu = ctuStart; ctu < ctuEnd; ctu++ )
1040
18
        {
1041
18
          const UnitArea  ctuArea = getCtuArea( cs, ctu, line, true );
1042
18
          decLib.m_cSAO.SAOProcessCTU( cs, ctuArea );
1043
18
        }
1044
1045
18
        ITT_TASKEND( itt_domain_dec, itt_handle_sao );
1046
18
      }
1047
389
      if( param->common.doALF )
1048
18
      {
1049
18
        ITT_TASKSTART( itt_domain_dec, itt_handle_alf );
1050
1051
36
        for( int ctu = ctuStart; ctu < ctuEnd; ctu++ )
1052
18
        {
1053
18
          AdaptiveLoopFilter::prepareCTU( cs, ctu, line );
1054
1055
18
          thisCtuState = SAO_cont;
1056
18
        }
1057
1058
18
        ITT_TASKEND( itt_domain_dec, itt_handle_alf );
1059
18
      }
1060
1061
389
      thisCtuState = ALF;
1062
389
    }
1063
1064
403
    case ALF:
1065
403
    {
1066
403
      if( param->common.doALF )
1067
32
      {
1068
32
        const bool a = line > 0;
1069
32
        const bool b = line + 1 < heightInCtus;
1070
32
        const bool c = col > 0;
1071
32
        const bool d = col + 1 < tasksPerLine;
1072
1073
32
        if( a )
1074
22
        {
1075
22
          if( c && lineAbove[col - 1] < ALF ) return false;
1076
20
          if(      lineAbove[col    ] < ALF ) return false;
1077
19
          if( d && lineAbove[col + 1] < SAO_cont ) return false;
1078
19
        }
1079
1080
29
        if( b )
1081
23
        {
1082
23
          if( c && lineBelow[col - 1] < ALF ) return false;
1083
14
          if(      lineBelow[col    ] < ALF ) return false;
1084
12
          if( d && lineBelow[col + 1] < SAO_cont ) return false;
1085
12
        }
1086
1087
18
        if( c && thisLine[col - 1] < ALF ) return false;
1088
18
        if( d && thisLine[col + 1] < SAO_cont ) return false;
1089
1090
18
        if( onlyCheckReadyState )
1091
0
          return true;
1092
1093
18
        ITT_TASKSTART( itt_domain_dec, itt_handle_alf );
1094
36
        for( int ctu = ctuStart; ctu < ctuEnd; ctu++ )
1095
18
        {
1096
18
          decLib.m_cALF.processCTU( cs, ctu, line, tid );
1097
18
        }
1098
18
        ITT_TASKEND( itt_domain_dec, itt_handle_alf );
1099
18
      }
1100
371
      else if( onlyCheckReadyState )
1101
0
        return true;
1102
1103
389
      thisCtuState = DONE;
1104
389
    }
1105
1106
389
    default:
1107
389
      CHECKD( thisCtuState != DONE, "Wrong CTU state" );
1108
4.55k
    }   // end switch
1109
4.55k
  }
1110
4.55k
  catch( ... )
1111
4.55k
  {
1112
376
    std::rethrow_exception( std::current_exception() );
1113
376
  }
1114
1115
18
  return true;
1116
4.55k
}
bool vvdec::DecLibRecon::ctuTask<true>(int, void*)
Line
Count
Source
732
641k
{
733
641k
  CtuTaskParam* param = static_cast<CtuTaskParam*>( task_param );
734
735
641k
  const int       taskCol      = param->taskCol;
736
641k
  const int       line         = param->taskLine;
737
641k
  const int       col          = taskCol;
738
739
641k
  auto&           cs           = *param->common.cs;
740
641k
  auto&           decLib       = param->common.decLib;
741
641k
  const int       tasksPerLine = param->numTasksPerLine;
742
641k
  const int       heightInCtus = cs.pcv->heightInCtus;
743
744
641k
  CtuState&       thisCtuState =  param->common.ctuStates[line * tasksPerLine + taskCol];
745
641k
  const CtuState* thisLine     = &param->common.ctuStates[line * tasksPerLine];
746
641k
  const CtuState* lineAbove    = thisLine - tasksPerLine;
747
641k
  const CtuState* lineBelow    = thisLine + tasksPerLine;
748
749
641k
  const int       ctuStart     = param->ctuStart;
750
641k
  const int       ctuEnd       = param->ctuEnd;
751
752
641k
  try
753
641k
  {
754
641k
    if( cs.picture->m_ctuTaskCounter.hasException() )
755
3.47k
    {
756
3.47k
      std::rethrow_exception( cs.picture->m_ctuTaskCounter.getException() );
757
3.47k
    }
758
759
641k
    switch( thisCtuState.load() )
760
641k
    {
761
      // all case statements fall through to continue with next task, unless they return false due to unsatisfied preconditions
762
763
44.7k
    case MIDER:
764
44.7k
    {
765
44.7k
      if( col > 0 && thisLine[col - 1] <= MIDER_cont )
766
41.3k
        return false;
767
3.35k
      if( line > 0 )
768
1.24k
      {
769
1.24k
        if( col + 1 < tasksPerLine )
770
960
        {
771
960
          if( lineAbove[col + 1] <= MIDER )
772
0
            return false;
773
960
        }
774
282
        else
775
282
        {
776
282
          if( lineAbove[col] <= MIDER_cont )
777
0
            return false;
778
282
        }
779
1.24k
      }
780
3.35k
      if( onlyCheckReadyState )
781
3.35k
        return true;
782
783
18.4E
      ITT_TASKSTART( itt_domain_dec, itt_handle_mider );
784
785
18.4E
      for( int ctu = ctuStart; ctu < ctuEnd; ctu++ )
786
0
      {
787
0
        const int ctuRsAddr = ctu + line * cs.pcv->widthInCtus;
788
0
        CtuData& ctuData    = cs.getCtuData( ctuRsAddr );
789
0
        ctuData.motion      = &decLib.m_motionInfo[cs.pcv->num4x4CtuBlks * ctuRsAddr];
790
791
0
        if( !ctuData.slice->isIntra() || cs.sps->getIBCFlag() )
792
0
        {
793
0
          const UnitArea ctuArea = getCtuArea( cs, ctu, line, true );
794
0
          decLib.m_pcThreadResource[tid]->m_cCuDecoder.TaskDeriveCtuMotionInfo( cs, ctuRsAddr, ctuArea, param->common.perLineMiHist[line] );
795
0
        }
796
0
        else
797
0
        {
798
0
          memset( NO_WARNING_class_memaccess( ctuData.motion ), MI_NOT_VALID, sizeof( MotionInfo ) * cs.pcv->num4x4CtuBlks );
799
0
        }
800
801
0
        thisCtuState = MIDER_cont;
802
0
      }
803
804
18.4E
      thisCtuState = LF_INIT;
805
806
18.4E
      ITT_TASKEND( itt_domain_dec, itt_handle_mider );
807
18.4E
    }
808
809
18.4E
    case LF_INIT:
810
18.4E
    {
811
18.4E
      if( onlyCheckReadyState )
812
0
        return true;
813
814
18.4E
      ITT_TASKSTART( itt_domain_dec, itt_handle_lfcl );
815
816
18.4E
      for( int ctu = ctuStart; ctu < ctuEnd; ctu++ )
817
0
      {
818
0
        const int ctuRsAddr = ctu + line * cs.pcv->widthInCtus;
819
0
        CtuData& ctuData    = cs.getCtuData( ctuRsAddr );
820
0
        ctuData.lfParam[0]  = &decLib.m_loopFilterParam[cs.pcv->num4x4CtuBlks * ( 2 * ctuRsAddr + 0 )];
821
0
        ctuData.lfParam[1]  = &decLib.m_loopFilterParam[cs.pcv->num4x4CtuBlks * ( 2 * ctuRsAddr + 1 )];
822
0
        memset( ctuData.lfParam[0], 0, sizeof( LoopFilterParam ) * 2 * cs.pcv->num4x4CtuBlks );
823
824
0
        decLib.m_cLoopFilter.calcFilterStrengthsCTU( cs, ctuRsAddr );
825
0
      }
826
827
18.4E
      thisCtuState = INTER;
828
829
18.4E
      ITT_TASKEND( itt_domain_dec, itt_handle_lfcl );
830
18.4E
    }
831
832
19.0k
    case INTER:
833
19.0k
    {
834
19.0k
      if( std::all_of( cs.picture->slices.begin(), cs.picture->slices.end(), []( const Slice* pcSlice ) { return pcSlice->isIntra(); } ) )
835
19.0k
      {
836
        // not really necessary, but only for optimizing the wave-fronts
837
19.0k
        if( col > 1 && thisLine[col - 2] <= INTER )
838
18.1k
          return false;
839
908
        if( line > 0 && lineAbove[col] <= INTER )
840
655
          return false;
841
908
      }
842
843
265
      if( std::any_of( cs.picture->refPicExtDepBarriers.cbegin(), cs.picture->refPicExtDepBarriers.cend(), []( const Barrier* b ) { return b->isBlocked(); } ) )
844
0
      {
845
0
        return false;
846
0
      }
847
848
265
      if( onlyCheckReadyState )
849
267
        return true;
850
851
18.4E
      ITT_TASKSTART( itt_domain_dec, itt_handle_inter );
852
853
18.4E
      for( int ctu = ctuStart; ctu < ctuEnd; ctu++ )
854
0
      {
855
0
        const int ctuRsAddr    = ctu + line * cs.pcv->widthInCtus;
856
0
        const UnitArea ctuArea = getCtuArea( cs, ctu, line, true );
857
0
        const CtuData& ctuData = cs.getCtuData( ctuRsAddr );
858
859
0
        decLib.m_pcThreadResource[tid]->m_cCuDecoder.TaskTrafoCtu( cs, ctuRsAddr, ctuArea );
860
861
0
        if( !ctuData.slice->isIntra() )
862
0
        {
863
0
          decLib.m_pcThreadResource[tid]->m_cCuDecoder.TaskInterCtu( cs, ctuRsAddr, ctuArea );
864
865
0
          if( cs.picture->stillReferenced )
866
0
          {
867
0
            decLib.m_pcThreadResource[tid]->m_cCuDecoder.TaskFinishMotionInfo( cs, ctuRsAddr, ctu, line );
868
0
          }
869
0
        }
870
0
      }
871
872
18.4E
      thisCtuState = INTRA;
873
874
18.4E
      ITT_TASKEND( itt_domain_dec, itt_handle_inter );
875
18.4E
    }
876
877
53.6k
    case INTRA:
878
53.6k
    {
879
53.6k
      if( col > 0 && thisLine[col - 1] <= INTRA_cont )
880
52.0k
        return false;
881
882
1.55k
      if( line > 0 )
883
996
      {
884
996
        if( col + 1 < tasksPerLine )
885
963
        {
886
963
          if( lineAbove[col + 1] <= INTRA )
887
877
            return false;
888
963
        }
889
33
        else
890
33
        {
891
33
          if( lineAbove[col] <= INTRA_cont )
892
0
            return false;
893
33
        }
894
996
      }
895
674
      if( onlyCheckReadyState )
896
675
        return true;
897
898
18.4E
      ITT_TASKSTART( itt_domain_dec, itt_handle_intra );
899
900
18.4E
      for( int ctu = ctuStart; ctu < ctuEnd; ctu++ )
901
0
      {
902
0
        const int ctuRsAddr    = ctu + line * cs.pcv->widthInCtus;
903
0
        const UnitArea ctuArea = getCtuArea( cs, ctu, line, true );
904
0
        decLib.m_pcThreadResource[tid]->m_cCuDecoder.TaskCriticalIntraKernel( cs, ctuRsAddr, ctuArea );
905
906
0
        thisCtuState = INTRA_cont;
907
0
      }
908
909
18.4E
      thisCtuState = RSP;
910
911
18.4E
      ITT_TASKEND( itt_domain_dec, itt_handle_intra );
912
18.4E
    }
913
914
508k
    case RSP:
915
508k
    {
916
      // RIRZIIIII
917
      // IIIIIXXXX
918
      //
919
      // - Z can be reshaped when it is no more an intra prediction source for X in the next line
920
921
922
508k
      if     ( line + 1 < heightInCtus && col + 1 < tasksPerLine && lineBelow[col + 1] < INTRA_cont )
923
416k
        return false;
924
91.6k
      else if( line + 1 < heightInCtus &&                           lineBelow[col]     < RSP )
925
91.5k
        return false;
926
139
      else if(                            col + 1 < tasksPerLine && thisLine [col + 1] < INTRA_cont ) // need this for the last line
927
17
        return false;
928
929
2
      if( onlyCheckReadyState )
930
151
        return true;
931
932
18.4E
      ITT_TASKSTART( itt_domain_dec, itt_handle_rsp );
933
934
18.4E
      for( int ctu = ctuStart; ctu < ctuEnd; ctu++ )
935
0
      {
936
0
        decLib.m_pcThreadResource[tid]->m_cReshaper.rspCtuBcw( cs, ctu, line );
937
0
      }
938
939
18.4E
      ITT_TASKEND( itt_domain_dec, itt_handle_rsp );
940
941
18.4E
      thisCtuState = LF_V;
942
18.4E
    }
943
944
18.4E
    case LF_V:
945
18.4E
    {
946
18.4E
      if( col > 0 && thisLine[col - 1] < LF_V )
947
21
        return false;
948
18.4E
      if( onlyCheckReadyState )
949
10
        return true;
950
951
18.4E
      ITT_TASKSTART( itt_domain_dec, itt_handle_lfl );
952
953
18.4E
      for( int ctu = ctuStart; ctu < ctuEnd; ctu++ )
954
0
      {
955
0
        decLib.m_cLoopFilter.loopFilterCTU( cs, MAX_NUM_CHANNEL_TYPE, ctu, line, EDGE_VER );
956
957
0
        thisCtuState = LF_V_cont;
958
0
      }
959
960
18.4E
      thisCtuState = LF_H;
961
962
18.4E
      ITT_TASKEND( itt_domain_dec, itt_handle_lfl );
963
18.4E
    }
964
965
7.47k
    case LF_H:
966
7.47k
    {
967
7.47k
      if( line > 0 && lineAbove[col] < LF_H )
968
7
        return false;
969
970
7.47k
      if( line > 0 && col + 1 < tasksPerLine && lineAbove[col + 1] < LF_V_cont )
971
39
        return false;
972
973
7.58k
      if(             col + 1 < tasksPerLine && thisLine[col + 1] < LF_V_cont )
974
7.51k
        return false;
975
976
18.4E
      if( onlyCheckReadyState )
977
65
        return true;
978
979
18.4E
      ITT_TASKSTART( itt_domain_dec, itt_handle_lfl );
980
981
18.4E
      for( int ctu = ctuStart; ctu < ctuEnd; ctu++ )
982
0
      {
983
0
        decLib.m_cLoopFilter.loopFilterCTU( cs, MAX_NUM_CHANNEL_TYPE, ctu, line, EDGE_HOR );
984
0
      }
985
986
18.4E
      thisCtuState = PRESAO;
987
988
18.4E
      ITT_TASKEND( itt_domain_dec, itt_handle_lfl );
989
18.4E
    }
990
991
4.69k
    case PRESAO:
992
4.69k
    {
993
      // only last CTU processes full line
994
4.69k
      if( col == tasksPerLine - 1 )
995
697
      {
996
697
        if( line > 0 && lineAbove[col] <= PRESAO )
997
143
          return false;
998
999
877
        for( int c = 0; c < tasksPerLine; ++c )
1000
873
        {
1001
873
          if( thisLine[c] < PRESAO )
1002
0
            return false;
1003
1004
873
          if( line + 1 < heightInCtus && lineBelow[c] < PRESAO )
1005
550
            return false;
1006
873
        }
1007
4
        if( onlyCheckReadyState )
1008
4
          return true;
1009
1010
0
        ITT_TASKSTART( itt_domain_dec, itt_handle_presao );
1011
1012
0
        if( cs.sps->getUseSAO() )
1013
0
        {
1014
0
          decLib.m_cSAO.SAOPrepareCTULine( cs, getLineArea( cs, line, true ) );
1015
0
        }
1016
1017
0
        ITT_TASKEND( itt_domain_dec, itt_handle_presao );
1018
0
      }
1019
4.00k
      else if( thisLine[tasksPerLine - 1] <= PRESAO )   // wait for last CTU to finish PRESAO
1020
4.14k
      {
1021
4.14k
        return false;
1022
4.14k
      }
1023
18.4E
      if( onlyCheckReadyState )
1024
12
        return true;
1025
1026
18.4E
      thisCtuState = SAO;
1027
18.4E
    }
1028
1029
18.4E
    case SAO:
1030
18.4E
    {
1031
18.4E
      if( onlyCheckReadyState )
1032
0
        return true;
1033
1034
      // only last CTU processes full line
1035
18.4E
      if( cs.sps->getUseSAO() )
1036
0
      {
1037
0
        ITT_TASKSTART( itt_domain_dec, itt_handle_sao );
1038
1039
0
        for( int ctu = ctuStart; ctu < ctuEnd; ctu++ )
1040
0
        {
1041
0
          const UnitArea  ctuArea = getCtuArea( cs, ctu, line, true );
1042
0
          decLib.m_cSAO.SAOProcessCTU( cs, ctuArea );
1043
0
        }
1044
1045
0
        ITT_TASKEND( itt_domain_dec, itt_handle_sao );
1046
0
      }
1047
18.4E
      if( param->common.doALF )
1048
0
      {
1049
0
        ITT_TASKSTART( itt_domain_dec, itt_handle_alf );
1050
1051
0
        for( int ctu = ctuStart; ctu < ctuEnd; ctu++ )
1052
0
        {
1053
0
          AdaptiveLoopFilter::prepareCTU( cs, ctu, line );
1054
1055
0
          thisCtuState = SAO_cont;
1056
0
        }
1057
1058
0
        ITT_TASKEND( itt_domain_dec, itt_handle_alf );
1059
0
      }
1060
1061
18.4E
      thisCtuState = ALF;
1062
18.4E
    }
1063
1064
328
    case ALF:
1065
328
    {
1066
328
      if( param->common.doALF )
1067
480
      {
1068
480
        const bool a = line > 0;
1069
480
        const bool b = line + 1 < heightInCtus;
1070
480
        const bool c = col > 0;
1071
480
        const bool d = col + 1 < tasksPerLine;
1072
1073
480
        if( a )
1074
98
        {
1075
98
          if( c && lineAbove[col - 1] < ALF ) return false;
1076
91
          if(      lineAbove[col    ] < ALF ) return false;
1077
89
          if( d && lineAbove[col + 1] < SAO_cont ) return false;
1078
89
        }
1079
1080
471
        if( b )
1081
464
        {
1082
464
          if( c && lineBelow[col - 1] < ALF ) return false;
1083
71
          if(      lineBelow[col    ] < ALF ) return false;
1084
18
          if( d && lineBelow[col + 1] < SAO_cont ) return false;
1085
18
        }
1086
1087
19
        if( c && thisLine[col - 1] < ALF ) return false;
1088
18
        if( d && thisLine[col + 1] < SAO_cont ) return false;
1089
1090
13
        if( onlyCheckReadyState )
1091
14
          return true;
1092
1093
18.4E
        ITT_TASKSTART( itt_domain_dec, itt_handle_alf );
1094
18.4E
        for( int ctu = ctuStart; ctu < ctuEnd; ctu++ )
1095
0
        {
1096
0
          decLib.m_cALF.processCTU( cs, ctu, line, tid );
1097
0
        }
1098
18.4E
        ITT_TASKEND( itt_domain_dec, itt_handle_alf );
1099
18.4E
      }
1100
18.4E
      else if( onlyCheckReadyState )
1101
0
        return true;
1102
1103
18.4E
      thisCtuState = DONE;
1104
18.4E
    }
1105
1106
18.4E
    default:
1107
18.4E
      CHECKD( thisCtuState != DONE, "Wrong CTU state" );
1108
641k
    }   // end switch
1109
641k
  }
1110
641k
  catch( ... )
1111
641k
  {
1112
3.47k
    std::rethrow_exception( std::current_exception() );
1113
3.47k
  }
1114
1115
0
  return true;
1116
641k
}
1117
1118
}