/src/vvdec/source/Lib/DecoderLib/DecLibRecon.cpp
Line | Count | Source |
1 | | /* ----------------------------------------------------------------------------- |
2 | | The copyright in this software is being made available under the Clear BSD |
3 | | License, included below. No patent rights, trademark rights and/or |
4 | | other Intellectual Property Rights other than the copyrights concerning |
5 | | the Software are granted under this license. |
6 | | |
7 | | The Clear BSD License |
8 | | |
9 | | Copyright (c) 2018-2026, Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. & The VVdeC Authors. |
10 | | All rights reserved. |
11 | | |
12 | | Redistribution and use in source and binary forms, with or without modification, |
13 | | are permitted (subject to the limitations in the disclaimer below) provided that |
14 | | the following conditions are met: |
15 | | |
16 | | * Redistributions of source code must retain the above copyright notice, |
17 | | this list of conditions and the following disclaimer. |
18 | | |
19 | | * Redistributions in binary form must reproduce the above copyright |
20 | | notice, this list of conditions and the following disclaimer in the |
21 | | documentation and/or other materials provided with the distribution. |
22 | | |
23 | | * Neither the name of the copyright holder nor the names of its |
24 | | contributors may be used to endorse or promote products derived from this |
25 | | software without specific prior written permission. |
26 | | |
27 | | NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY |
28 | | THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND |
29 | | CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
30 | | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A |
31 | | PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR |
32 | | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
33 | | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
34 | | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR |
35 | | BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER |
36 | | IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
37 | | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
38 | | POSSIBILITY OF SUCH DAMAGE. |
39 | | |
40 | | |
41 | | ------------------------------------------------------------------------------------------- */ |
42 | | |
43 | | /** \file DecLibRecon.cpp |
44 | | \brief decoder class |
45 | | */ |
46 | | |
47 | | #include "DecLib.h" |
48 | | |
49 | | #include "CommonLib/TrQuant.h" |
50 | | #if ENABLE_SIMD_TCOEFF_OPS |
51 | | #include "CommonLib/TrQuant_EMT.h" |
52 | | #endif |
53 | | #include "CommonLib/InterPrediction.h" |
54 | | #include "CommonLib/IntraPrediction.h" |
55 | | #include "CommonLib/Unit.h" |
56 | | #include "CommonLib/Buffer.h" |
57 | | #include "CommonLib/UnitTools.h" |
58 | | |
59 | | #include "CommonLib/dtrace_next.h" |
60 | | #include "CommonLib/dtrace_buffer.h" |
61 | | |
62 | | namespace vvdec |
63 | | { |
64 | | |
65 | | #ifdef TRACE_ENABLE_ITT |
66 | | extern __itt_domain* itt_domain_dec; |
67 | | extern std::vector<__itt_domain*> itt_domain_decInst; |
68 | | |
69 | | extern __itt_string_handle* itt_handle_alf; |
70 | | extern __itt_string_handle* itt_handle_presao; |
71 | | extern __itt_string_handle* itt_handle_sao; |
72 | | extern __itt_string_handle* itt_handle_lfl; |
73 | | extern __itt_string_handle* itt_handle_intra; |
74 | | extern __itt_string_handle* itt_handle_inter; |
75 | | extern __itt_string_handle* itt_handle_mider; |
76 | | extern __itt_string_handle* itt_handle_lfcl; |
77 | | extern __itt_string_handle* itt_handle_ext; |
78 | | extern __itt_string_handle* itt_handle_dmvr; |
79 | | extern __itt_string_handle* itt_handle_rsp; |
80 | | |
81 | | extern __itt_string_handle* itt_handle_schedTasks; |
82 | | extern __itt_string_handle* itt_handle_waitTasks; |
83 | | |
84 | | // create global domain for DecLib |
85 | | extern __itt_domain* itt_domain_glb; |
86 | | // create a global counter |
87 | | extern __itt_counter itt_frame_counter; |
88 | | |
89 | | #define ITT_TASKSTART( d, t ) __itt_task_begin( ( d ), __itt_null, __itt_null, ( t ) ) |
90 | | #define ITT_TASKEND( d, t ) __itt_task_end ( ( d ) ) |
91 | | #else |
92 | | #define ITT_TASKSTART( d, t ) |
93 | | #define ITT_TASKEND( d, t ) |
94 | | #endif |
95 | | |
96 | | //! \ingroup DecoderLib |
97 | | //! \{ |
98 | | |
99 | | void CommonTaskParam::reset( CodingStructure& cs, TaskType ctuStartState, int tasksPerLine, bool _doALF ) |
100 | 713 | { |
101 | 713 | this->cs = &cs; |
102 | | |
103 | 713 | const int heightInCtus = cs.pcv->heightInCtus; |
104 | 713 | CHECKD( !ctuStates.empty() && std::any_of( ctuStates.begin(), ctuStates.end(), []( CtuState& s ) { return s != DONE; } ), "some CTUs of previous pic not done" ); |
105 | 713 | ctuStates = std::vector<CtuState>( heightInCtus * tasksPerLine ); |
106 | 713 | for( auto& ctu: ctuStates ) |
107 | 9.24k | { |
108 | 9.24k | ctu.store( ctuStartState ); |
109 | 9.24k | } |
110 | 713 | perLineMiHist = std::vector<MotionHist>( heightInCtus ); |
111 | 713 | doALF = _doALF; |
112 | 713 | } |
113 | | |
114 | | DecLibRecon::DecLibRecon() |
115 | 5.61k | { |
116 | 5.61k | #if ENABLE_SIMD_OPT_BUFFER |
117 | 5.61k | # if defined( TARGET_SIMD_X86 ) |
118 | 5.61k | g_pelBufOP.initPelBufOpsX86(); |
119 | 5.61k | # endif |
120 | | # if defined( TARGET_SIMD_ARM ) |
121 | | g_pelBufOP.initPelBufOpsARM(); |
122 | | # endif |
123 | 5.61k | #endif |
124 | 5.61k | #if ENABLE_SIMD_TCOEFF_OPS && defined( TARGET_SIMD_X86 ) |
125 | 5.61k | g_tCoeffOps.initTCoeffOpsX86(); |
126 | 5.61k | #endif |
127 | | #if ENABLE_SIMD_TCOEFF_OPS && defined( TARGET_SIMD_ARM ) |
128 | | g_tCoeffOps.initTCoeffOpsARM(); |
129 | | #endif |
130 | 5.61k | } |
131 | | |
132 | | void DecLibRecon::create( ThreadPool* threadPool, unsigned instanceId, bool upscaleOutputEnabled ) |
133 | 1.87k | { |
134 | | // run constructor again to ensure all variables, especially in DecLibParser have been reset |
135 | 1.87k | this->~DecLibRecon(); |
136 | 1.87k | new( this ) DecLibRecon; |
137 | | |
138 | | |
139 | | #if TRACE_ENABLE_ITT |
140 | | if( itt_domain_decInst.size() < instanceId + 1 ) |
141 | | { |
142 | | std::string name( "DecLibRecon " + std::to_string( instanceId ) ); |
143 | | itt_domain_decInst.push_back( __itt_domain_create( name.c_str() ) ); |
144 | | itt_domain_decInst.back()->flags = 1; |
145 | | |
146 | | CHECK_FATAL( itt_domain_decInst.back() != itt_domain_decInst[instanceId], "current decLibRecon ITT-Domain is not the last in vector. Instances created in the wrong order?" ); |
147 | | } |
148 | | m_itt_decInst = itt_domain_decInst[instanceId]; |
149 | | #endif |
150 | | |
151 | 1.87k | m_decodeThreadPool = threadPool; |
152 | 1.87k | m_numDecThreads = std::max( 1, threadPool ? threadPool->numThreads() : 1 ); |
153 | | |
154 | 1.87k | m_upscaleOutputEnabled = upscaleOutputEnabled; |
155 | 1.87k | m_predBufSize = 0; |
156 | 1.87k | m_dmvrMvCacheSize = 0; |
157 | 1.87k | m_dmvrMvCache = nullptr; |
158 | | |
159 | 1.87k | m_num4x4Elements = 0; |
160 | 1.87k | m_loopFilterParam = nullptr; |
161 | 1.87k | m_motionInfo = nullptr; |
162 | | |
163 | 1.87k | m_pcThreadResource = new PerThreadResource*[m_numDecThreads]; |
164 | 1.87k | m_pcThreadResource[0] = new PerThreadResource(); |
165 | 59.8k | for( int i = 1; i < m_numDecThreads; i++ ) |
166 | 57.9k | { |
167 | 57.9k | m_pcThreadResource[i] = new PerThreadResource( m_pcThreadResource[0]->m_cTrQuant ); |
168 | 57.9k | } |
169 | 1.87k | } |
170 | | |
171 | | void DecLibRecon::destroy() |
172 | 1.87k | { |
173 | 1.87k | m_decodeThreadPool = nullptr; |
174 | | |
175 | 1.87k | if( m_predBuf ) |
176 | 713 | { |
177 | 713 | m_predBuf.reset(); |
178 | 713 | m_predBufSize = 0; |
179 | 713 | } |
180 | | |
181 | 1.87k | if( m_dmvrMvCache ) |
182 | 713 | { |
183 | 713 | free( m_dmvrMvCache ); |
184 | 713 | m_dmvrMvCache = nullptr; |
185 | 713 | m_dmvrMvCacheSize = 0; |
186 | 713 | } |
187 | | |
188 | 1.87k | if( m_loopFilterParam ) |
189 | 713 | { |
190 | 713 | free( m_loopFilterParam ); |
191 | 713 | m_loopFilterParam = nullptr; |
192 | 713 | } |
193 | | |
194 | 1.87k | if( m_motionInfo ) |
195 | 713 | { |
196 | 713 | free( m_motionInfo ); |
197 | 713 | m_motionInfo = nullptr; |
198 | 713 | } |
199 | | |
200 | 1.87k | m_num4x4Elements = 0; |
201 | | |
202 | 61.7k | for( int i = 0; i < m_numDecThreads; i++ ) delete m_pcThreadResource[i]; |
203 | 1.87k | delete[] m_pcThreadResource; m_pcThreadResource = nullptr; |
204 | 1.87k | } |
205 | | |
206 | | |
207 | | static void getCompatibleBuffer( const CodingStructure& cs, const CPelUnitBuf& srcBuf, PelStorage& destBuf, const UserAllocator* userAllocator ) |
208 | 713 | { |
209 | 713 | if( !destBuf.bufs.empty() ) |
210 | 0 | { |
211 | 0 | bool compat = false; |
212 | 0 | if( destBuf.chromaFormat == srcBuf.chromaFormat ) |
213 | 0 | { |
214 | 0 | compat = true; |
215 | 0 | const uint32_t numCh = getNumberValidComponents( srcBuf.chromaFormat ); |
216 | 0 | for( uint32_t i = 0; i < numCh; i++ ) |
217 | 0 | { |
218 | | // check this otherwise it would turn out to get very weird |
219 | 0 | compat &= destBuf.get( ComponentID( i ) ) == srcBuf.get( ComponentID( i ) ); |
220 | 0 | compat &= destBuf.get( ComponentID( i ) ).stride == srcBuf.get( ComponentID( i ) ).stride; |
221 | 0 | compat &= destBuf.get( ComponentID( i ) ).width == srcBuf.get( ComponentID( i ) ).width; |
222 | 0 | compat &= destBuf.get( ComponentID( i ) ).height == srcBuf.get( ComponentID( i ) ).height; |
223 | 0 | } |
224 | 0 | } |
225 | 0 | if( !compat ) |
226 | 0 | { |
227 | 0 | destBuf.destroy(); |
228 | 0 | } |
229 | 0 | } |
230 | 713 | if( destBuf.bufs.empty() ) |
231 | 713 | { |
232 | 713 | destBuf.create( cs.picture->chromaFormat, cs.picture->lumaSize(), cs.pcv->maxCUWidth, cs.picture->margin, MEMORY_ALIGN_DEF_SIZE, true, userAllocator ); |
233 | 713 | } |
234 | 713 | } |
235 | | |
236 | | void DecLibRecon::borderExtPic( Picture* pic, const Picture* currPic ) |
237 | 0 | { |
238 | | // we block and wait here, so the exceptions from the reference pic don't propagate to the current picture |
239 | 0 | pic->waitForAllTasks(); |
240 | 0 | if( pic->progress < Picture::reconstructed ) // an exception must have happended in the picture, so we need to clean it up |
241 | 0 | { |
242 | 0 | CHECK_FATAL( pic->progress < Picture::parsing, "Slice parsing should have started, so all structures are there" ); |
243 | 0 | try |
244 | 0 | { |
245 | 0 | pic->reconDone.checkAndRethrowException(); |
246 | 0 | pic->parseDone.checkAndRethrowException(); // when the error happened in the slice parsing tasks, there might not be an exception in recon done, so check parseDone also |
247 | 0 | } |
248 | 0 | catch( ... ) |
249 | 0 | { |
250 | 0 | pic->error = true; |
251 | 0 | pic->reconDone.clearException(); |
252 | | // TODO: for now we set it on parseDone, so we can handle it outside: |
253 | 0 | if( !pic->parseDone.hasException() ) |
254 | 0 | { |
255 | 0 | pic->parseDone.setException( std::current_exception() ); |
256 | 0 | } |
257 | |
|
258 | 0 | pic->fillGrey( currPic->cs->sps.get() ); |
259 | 0 | } |
260 | 0 | } |
261 | | |
262 | 0 | pic->borderExtStarted = true; |
263 | |
|
264 | 0 | const bool wrapAround = pic->cs->sps->getUseWrapAround(); |
265 | 0 | if( wrapAround ) |
266 | 0 | { |
267 | | // copy reconstruction buffer to wrapAround buffer. All other border-extension tasks depend on this task. |
268 | 0 | static auto copyTask = []( int, void* task_param ) |
269 | 0 | { |
270 | 0 | ITT_TASKSTART( itt_domain_dec, itt_handle_ext ); |
271 | 0 | Picture* picture = static_cast<Picture*>( task_param ); |
272 | 0 | picture->getRecoBuf( true ).copyFrom( picture->getRecoBuf() ); |
273 | 0 | ITT_TASKEND( itt_domain_dec, itt_handle_ext ); |
274 | 0 | return true; |
275 | 0 | }; |
276 | 0 | pic->m_copyWrapBufDone.lock(); |
277 | 0 | m_decodeThreadPool->addBarrierTask( TP_TASK_NAME_ARG( "POC:" + std::to_string( currPic->poc ) + " copyTask Ref-POC:" + std::to_string( pic->poc ) ) |
278 | 0 | copyTask, |
279 | 0 | pic, |
280 | 0 | &pic->m_borderExtTaskCounter, |
281 | 0 | &pic->m_copyWrapBufDone, |
282 | 0 | { &pic->reconDone } ); |
283 | 0 | } |
284 | | |
285 | | // start actual border extension tasks |
286 | 0 | { |
287 | 0 | static auto task = []( int, void* task_param ) |
288 | 0 | { |
289 | 0 | ITT_TASKSTART( itt_domain_dec, itt_handle_ext ); |
290 | 0 | Picture* picture = static_cast<Picture*>( task_param ); |
291 | 0 | picture->extendPicBorder( true, false, false, false ); |
292 | 0 | ITT_TASKEND( itt_domain_dec, itt_handle_ext ); |
293 | 0 | return true; |
294 | 0 | }; |
295 | 0 | m_decodeThreadPool->addBarrierTask( TP_TASK_NAME_ARG( "POC:" + std::to_string(currPic->poc) + " borderExtTask T Ref-POC:" + std::to_string(pic->poc) ) |
296 | 0 | task, |
297 | 0 | pic, |
298 | 0 | &pic->m_borderExtTaskCounter, |
299 | 0 | nullptr, |
300 | 0 | { wrapAround ? &pic->m_copyWrapBufDone : &pic->reconDone } ); |
301 | 0 | } |
302 | |
|
303 | 0 | { |
304 | 0 | static auto task = []( int, void* task_param ) |
305 | 0 | { |
306 | 0 | ITT_TASKSTART( itt_domain_dec, itt_handle_ext ); |
307 | 0 | Picture* picture = static_cast<Picture*>( task_param ); |
308 | 0 | picture->extendPicBorder( false, true, false, false ); |
309 | 0 | ITT_TASKEND( itt_domain_dec, itt_handle_ext ); |
310 | 0 | return true; |
311 | 0 | }; |
312 | 0 | m_decodeThreadPool->addBarrierTask( TP_TASK_NAME_ARG( "POC:" + std::to_string(currPic->poc) + " borderExtTask B Ref-POC:" + std::to_string(pic->poc) ) |
313 | 0 | task, |
314 | 0 | pic, |
315 | 0 | &pic->m_borderExtTaskCounter, |
316 | 0 | nullptr, |
317 | 0 | { wrapAround ? &pic->m_copyWrapBufDone : &pic->reconDone } ); |
318 | 0 | } |
319 | |
|
320 | 0 | { |
321 | 0 | static auto task = []( int, void* task_param ) |
322 | 0 | { |
323 | 0 | ITT_TASKSTART( itt_domain_dec, itt_handle_ext ); |
324 | 0 | Picture* picture = static_cast<Picture*>( task_param ); |
325 | 0 | picture->extendPicBorder( false, false, true, false, CH_L ); |
326 | 0 | ITT_TASKEND( itt_domain_dec, itt_handle_ext ); |
327 | 0 | return true; |
328 | 0 | }; |
329 | 0 | m_decodeThreadPool->addBarrierTask( TP_TASK_NAME_ARG( "POC:" + std::to_string(currPic->poc) + " borderExtTask ltT Ref-POC:" + std::to_string(pic->poc) ) |
330 | 0 | task, |
331 | 0 | pic, |
332 | 0 | &pic->m_borderExtTaskCounter, |
333 | 0 | nullptr, |
334 | 0 | { wrapAround ? &pic->m_copyWrapBufDone : &pic->reconDone } ); |
335 | 0 | } |
336 | 0 | { |
337 | 0 | static auto task = []( int, void* task_param ) |
338 | 0 | { |
339 | 0 | ITT_TASKSTART( itt_domain_dec, itt_handle_ext ); |
340 | 0 | Picture* picture = static_cast<Picture*>( task_param ); |
341 | 0 | picture->extendPicBorder( false, false, false, true, CH_L ); |
342 | 0 | ITT_TASKEND( itt_domain_dec, itt_handle_ext ); |
343 | 0 | return true; |
344 | 0 | }; |
345 | 0 | m_decodeThreadPool->addBarrierTask( TP_TASK_NAME_ARG( "POC:" + std::to_string(currPic->poc) + " borderExtTask lrB Y Ref-POC:" + std::to_string(pic->poc) ) |
346 | 0 | task, |
347 | 0 | pic, |
348 | 0 | &pic->m_borderExtTaskCounter, |
349 | 0 | nullptr, |
350 | 0 | { wrapAround ? &pic->m_copyWrapBufDone : &pic->reconDone } ); |
351 | 0 | } |
352 | |
|
353 | 0 | { |
354 | 0 | static auto task = []( int, void* task_param ) |
355 | 0 | { |
356 | 0 | ITT_TASKSTART( itt_domain_dec, itt_handle_ext ); |
357 | 0 | Picture* picture = static_cast<Picture*>( task_param ); |
358 | 0 | picture->extendPicBorder( false, false, true, false, CH_C ); |
359 | 0 | ITT_TASKEND( itt_domain_dec, itt_handle_ext ); |
360 | 0 | return true; |
361 | 0 | }; |
362 | 0 | m_decodeThreadPool->addBarrierTask( TP_TASK_NAME_ARG( "POC:" + std::to_string(currPic->poc) + " borderExtTask lrB UV Ref-POC:" + std::to_string(pic->poc) ) |
363 | 0 | task, |
364 | 0 | pic, |
365 | 0 | &pic->m_borderExtTaskCounter, |
366 | 0 | nullptr, |
367 | 0 | { wrapAround ? &pic->m_copyWrapBufDone : &pic->reconDone } ); |
368 | 0 | } |
369 | 0 | { |
370 | 0 | static auto task = []( int, void* task_param ) |
371 | 0 | { |
372 | 0 | ITT_TASKSTART( itt_domain_dec, itt_handle_ext ); |
373 | 0 | Picture* picture = static_cast<Picture*>( task_param ); |
374 | 0 | picture->extendPicBorder( false, false, false, true, CH_C ); |
375 | 0 | ITT_TASKEND( itt_domain_dec, itt_handle_ext ); |
376 | 0 | return true; |
377 | 0 | }; |
378 | 0 | m_decodeThreadPool->addBarrierTask( TP_TASK_NAME_ARG( "POC:" + std::to_string(currPic->poc) + " borderExtTask lrB UV Ref-POC:" + std::to_string(pic->poc) ) |
379 | 0 | task, |
380 | 0 | pic, |
381 | 0 | &pic->m_borderExtTaskCounter, |
382 | 0 | nullptr, |
383 | 0 | { wrapAround ? &pic->m_copyWrapBufDone : &pic->reconDone } ); |
384 | 0 | } |
385 | 0 | } |
386 | | |
387 | | void DecLibRecon::createSubPicRefBufs( Picture* pic, const Picture* currPic ) |
388 | 0 | { |
389 | 0 | pic->subPicExtStarted = true; |
390 | |
|
391 | 0 | const PPS* pps = pic->cs->pps.get(); |
392 | 0 | const SPS* sps = pic->cs->sps.get(); |
393 | 0 | const int numSubPic = pps->getNumSubPics(); |
394 | |
|
395 | 0 | pic->m_subPicRefBufs.resize( numSubPic ); |
396 | 0 | for( int i = 0; i < numSubPic; ++i ) |
397 | 0 | { |
398 | 0 | const SubPic& currSubPic = pps->getSubPic( i ); |
399 | 0 | const Area subPicArea( currSubPic.getSubPicLeft(), |
400 | 0 | currSubPic.getSubPicTop(), |
401 | 0 | currSubPic.getSubPicWidthInLumaSample(), |
402 | 0 | currSubPic.getSubPicHeightInLumaSample() ); |
403 | |
|
404 | 0 | pic->m_subPicRefBufs[i].create( pic->chromaFormat, Size( subPicArea ), sps->getMaxCUWidth(), pic->margin, MEMORY_ALIGN_DEF_SIZE ); |
405 | |
|
406 | 0 | static auto task = []( int, void* task_param ) |
407 | 0 | { |
408 | 0 | SubPicExtTask* t = static_cast<SubPicExtTask*>( task_param ); |
409 | 0 | t->subPicBuf->copyFrom( t->picture->getRecoBuf().subBuf( t->subPicArea ) ); |
410 | 0 | t->picture->extendPicBorderBuf( *t->subPicBuf ); |
411 | 0 | return true; |
412 | 0 | }; |
413 | 0 | m_subPicExtTasks.emplace_back( SubPicExtTask{ pic, &pic->m_subPicRefBufs[i], subPicArea } ); |
414 | 0 | m_decodeThreadPool->addBarrierTask( TP_TASK_NAME_ARG( "POC:" + std::to_string( currPic->poc ) + " subPicBorderExtTask refPOC:" + std::to_string( pic->poc ) ) |
415 | 0 | task, |
416 | 0 | &m_subPicExtTasks.back(), |
417 | 0 | &pic->m_borderExtTaskCounter, |
418 | 0 | nullptr, |
419 | 0 | { &pic->reconDone } ); |
420 | 0 | } |
421 | 0 | } |
422 | | |
423 | | void DecLibRecon::swapBufs( CodingStructure& cs ) |
424 | 3 | { |
425 | 3 | cs.picture->m_bufs[PIC_RECONSTRUCTION].swap( m_fltBuf ); |
426 | 3 | cs.rebindPicBufs(); // ensure the recon buf in the coding structure points to the correct buffer |
427 | 3 | } |
428 | | |
429 | | void DecLibRecon::decompressPicture( Picture* pcPic ) |
430 | 713 | { |
431 | 713 | m_currDecompPic = pcPic; |
432 | | |
433 | 713 | CodingStructure& cs = *pcPic->cs; |
434 | | |
435 | 713 | pcPic->progress = Picture::reconstructing; |
436 | | |
437 | | #ifdef TRACE_ENABLE_ITT |
438 | | // mark start of frame |
439 | | pcPic->m_itt_decLibInst = m_itt_decInst; |
440 | | __itt_frame_begin_v3( pcPic->m_itt_decLibInst, nullptr ); |
441 | | #endif |
442 | | |
443 | | // Initialise the various objects for the new set of settings |
444 | 713 | const SPS * sps = cs.sps.get(); |
445 | 713 | const PPS * pps = cs.pps.get(); |
446 | | |
447 | 23.5k | for( int i = 0; i < m_numDecThreads; i++ ) |
448 | 22.8k | { |
449 | 22.8k | if( sps->getUseReshaper() ) |
450 | 4.67k | { |
451 | 4.67k | m_pcThreadResource[i]->m_cReshaper.createDec( sps->getBitDepth() ); |
452 | 4.67k | m_pcThreadResource[i]->m_cReshaper.initSlice( pcPic->slices[0]->getNalUnitLayerId(), *pcPic->slices[0]->getPicHeader(), pcPic->slices[0]->getVPS_nothrow() ); |
453 | 4.67k | } |
454 | | |
455 | 22.8k | m_pcThreadResource[i]->m_cIntraPred.init( sps->getChromaFormatIdc(), sps->getBitDepth() ); |
456 | 22.8k | m_pcThreadResource[i]->m_cInterPred.init( &m_cRdCost, sps->getChromaFormatIdc(), sps->getMaxCUHeight() ); |
457 | | |
458 | | // Recursive structure |
459 | 22.8k | m_pcThreadResource[i]->m_cTrQuant.init( pcPic ); |
460 | 22.8k | m_pcThreadResource[i]->m_cCuDecoder.init( &m_pcThreadResource[i]->m_cIntraPred, &m_pcThreadResource[i]->m_cInterPred, &m_pcThreadResource[i]->m_cReshaper, &m_pcThreadResource[i]->m_cTrQuant ); |
461 | 22.8k | } |
462 | | |
463 | 713 | getCompatibleBuffer( *pcPic->cs, pcPic->cs->getRecoBuf(), m_fltBuf, pcPic->getUserAllocator() ); |
464 | | |
465 | 713 | const uint32_t log2SaoOffsetScale = (uint32_t) std::max(0, sps->getBitDepth() - MAX_SAO_TRUNCATED_BITDEPTH); |
466 | 713 | const int maxDepth = getLog2(sps->getMaxCUWidth()) - pps->pcv->minCUWidthLog2; |
467 | 713 | m_cSAO.create( pps->getPicWidthInLumaSamples(), |
468 | 713 | pps->getPicHeightInLumaSamples(), |
469 | 713 | sps->getChromaFormatIdc(), |
470 | 713 | sps->getMaxCUWidth(), |
471 | 713 | sps->getMaxCUHeight(), |
472 | 713 | maxDepth, |
473 | 713 | log2SaoOffsetScale, |
474 | 713 | m_fltBuf |
475 | 713 | ); |
476 | | |
477 | 713 | if( sps->getUseALF() ) |
478 | 713 | { |
479 | 713 | m_cALF.create( cs.picHeader.get(), sps, pps, m_numDecThreads, m_fltBuf ); |
480 | 713 | } |
481 | | |
482 | 713 | const PreCalcValues* pcv = cs.pcv; |
483 | | |
484 | | // set reconstruction buffers in CodingStructure |
485 | 713 | const ptrdiff_t ctuSampleSizeL = pcv->maxCUHeight * pcv->maxCUWidth; |
486 | 713 | const ptrdiff_t ctuSampleSizeC = isChromaEnabled( pcv->chrFormat ) ? ( ctuSampleSizeL >> ( getChannelTypeScaleX( CH_C, pcv->chrFormat ) + getChannelTypeScaleY( CH_C, pcv->chrFormat ) ) ) : 0; |
487 | 713 | const ptrdiff_t ctuSampleSize = ctuSampleSizeL + 2 * ctuSampleSizeC; |
488 | 713 | const size_t predBufSize = ctuSampleSize * pcv->sizeInCtus; |
489 | 713 | if( predBufSize != m_predBufSize ) |
490 | 713 | { |
491 | 713 | m_predBuf.reset( ( Pel* ) xMalloc( Pel, predBufSize ) ); |
492 | 713 | m_predBufSize = predBufSize; |
493 | 713 | } |
494 | | |
495 | 713 | pcPic->cs->m_predBuf = m_predBuf.get(); |
496 | | |
497 | | // for the worst case of all PUs being 8x8 and using DMVR |
498 | 713 | const size_t _maxNumDmvrMvs = pcv->num8x8CtuBlks * pcv->sizeInCtus; |
499 | 713 | if( _maxNumDmvrMvs != m_dmvrMvCacheSize ) |
500 | 713 | { |
501 | 713 | if( m_dmvrMvCache ) free( m_dmvrMvCache ); |
502 | 713 | m_dmvrMvCacheSize = _maxNumDmvrMvs; |
503 | 713 | m_dmvrMvCache = ( Mv* ) malloc( sizeof( Mv ) * _maxNumDmvrMvs ); |
504 | 713 | } |
505 | | |
506 | 713 | pcPic->cs->m_dmvrMvCache = m_dmvrMvCache; |
507 | | |
508 | 713 | if( m_num4x4Elements != cs.pcv->num4x4CtuBlks * cs.pcv->sizeInCtus ) |
509 | 713 | { |
510 | 713 | if( m_loopFilterParam ) free( m_loopFilterParam ); |
511 | 713 | if( m_motionInfo ) free( m_motionInfo ); |
512 | | |
513 | 713 | m_num4x4Elements = cs.pcv->num4x4CtuBlks * cs.pcv->sizeInCtus; |
514 | | |
515 | 713 | m_loopFilterParam = ( LoopFilterParam* ) malloc( sizeof( LoopFilterParam ) * m_num4x4Elements * 2 ); |
516 | 713 | m_motionInfo = ( MotionInfo* ) malloc( sizeof( MotionInfo ) * m_num4x4Elements ); |
517 | 713 | } |
518 | | // finished |
519 | | |
520 | 713 | const int widthInCtus = cs.pcv->widthInCtus; |
521 | 713 | const int heightInCtus = cs.pcv->heightInCtus; |
522 | | |
523 | 713 | if( sps->getIBCFlag() ) |
524 | 706 | { |
525 | 706 | cs.initVIbcBuf( heightInCtus, sps->getChromaFormatIdc(), sps->getMaxCUHeight() ); |
526 | 706 | } |
527 | 713 | pcPic->startProcessingTimer(); |
528 | | |
529 | 713 | if( m_decodeThreadPool->numThreads() > 0 ) |
530 | 713 | { |
531 | 713 | ITT_TASKSTART( itt_domain_dec, itt_handle_schedTasks ); |
532 | 713 | } |
533 | | |
534 | 713 | picBarriers.clear(); |
535 | 713 | #if ALLOW_MIDER_LF_DURING_PICEXT |
536 | 713 | CBarrierVec picExtBarriers; |
537 | | #else |
538 | | CBarrierVec &picExtBarriers = picBarriers; |
539 | | #endif |
540 | | |
541 | 713 | const int numSubPic = cs.pps->getNumSubPics(); |
542 | 713 | if( numSubPic > 1 ) |
543 | 0 | { |
544 | 0 | m_subPicExtTasks.clear(); |
545 | 0 | m_subPicExtTasks.reserve( pcPic->slices.size() * MAX_NUM_REF_PICS * numSubPic ); |
546 | 0 | } |
547 | | |
548 | 713 | std::vector<Picture*> borderExtRefPics( pcPic->buildAllRefPicsVec() ); |
549 | 713 | for( Picture* refPic : borderExtRefPics ) |
550 | 0 | { |
551 | 0 | if( !refPic->borderExtStarted ) |
552 | 0 | { |
553 | | // TODO: (GH) Can we bypass this border extension, when all subpics (>1) are treated as pics? |
554 | 0 | borderExtPic( refPic, pcPic ); |
555 | 0 | } |
556 | |
|
557 | 0 | if( !refPic->subPicExtStarted && numSubPic > 1 && refPic->m_subPicRefBufs.size() != numSubPic ) |
558 | 0 | { |
559 | 0 | CHECK( !refPic->m_subPicRefBufs.empty(), "Wrong number of subpics already present in reference picture" ); |
560 | 0 | CHECK( cs.sps->getUseWrapAround(), "Wraparound + subpics not implemented" ); |
561 | |
|
562 | 0 | createSubPicRefBufs( refPic, pcPic ); |
563 | 0 | } |
564 | | |
565 | 0 | if( refPic->m_borderExtTaskCounter.isBlocked() && |
566 | 0 | std::find( picExtBarriers.cbegin(), picExtBarriers.cend(), refPic->m_borderExtTaskCounter.donePtr() ) == picExtBarriers.cend() ) |
567 | 0 | { |
568 | 0 | picExtBarriers.push_back( refPic->m_borderExtTaskCounter.donePtr() ); |
569 | 0 | } |
570 | 0 | } |
571 | | |
572 | 713 | if( m_decodeThreadPool->numThreads() == 0 && ( |
573 | 0 | std::any_of( picExtBarriers.cbegin(), picExtBarriers.cend(), []( const Barrier* b ) { return b->isBlocked(); } ) || |
574 | 0 | std::any_of( picBarriers .cbegin(), picBarriers .cend(), []( const Barrier* b ) { return b->isBlocked(); } ) ) ) |
575 | 0 | { |
576 | 0 | m_decodeThreadPool->processTasksOnMainThread(); |
577 | 0 | } |
578 | | |
579 | 713 | const bool isIntra = std::all_of( pcPic->slices.begin(), pcPic->slices.end(), []( const Slice* pcSlice ) { return pcSlice->isIntra(); } ); |
580 | | |
581 | 713 | const int numColPerTask = std::max( std::min( widthInCtus, ( widthInCtus / std::max( m_numDecThreads * ( isIntra ? 2 : 1 ), 1 ) ) + ( isIntra ? 0 : 1 ) ), 1 ); |
582 | 713 | const int numTasksPerLine = widthInCtus / numColPerTask + !!( widthInCtus % numColPerTask ); |
583 | | |
584 | 713 | #if ALLOW_MIDER_LF_DURING_PICEXT |
585 | 713 | pcPic->refPicExtDepBarriers = std::move( picExtBarriers ); |
586 | 713 | #endif |
587 | | #if !RECO_WHILE_PARSE |
588 | | picBarriers.push_back( &cs.picture->parseDone ); |
589 | | #endif |
590 | | |
591 | 713 | const TaskType ctuStartState = MIDER; |
592 | 713 | const bool doALF = cs.sps->getUseALF() && !AdaptiveLoopFilter::getAlfSkipPic( cs ); |
593 | 713 | commonTaskParam.reset( cs, ctuStartState, numTasksPerLine, doALF ); |
594 | | |
595 | 713 | tasksFinishMotion = std::vector<LineTaskParam>( heightInCtus, LineTaskParam{ commonTaskParam, -1 } ); |
596 | 713 | tasksCtu = std::vector<CtuTaskParam >( heightInCtus * numTasksPerLine, CtuTaskParam{ commonTaskParam, -1, -1, {} } ); |
597 | | |
598 | 713 | pcPic->reconDone.lock(); |
599 | | |
600 | | #if 0 |
601 | | // schedule in raster scan order |
602 | | for( int line = 0; line < heightInCtus; ++line ) |
603 | | { |
604 | | for( int col = 0; col < widthInCtus; ++col ) |
605 | | { |
606 | | #else |
607 | | // schedule in zig-zag scan order |
608 | 5.41k | for( int i = 0; i < numTasksPerLine + heightInCtus; ++i ) |
609 | 4.70k | { |
610 | 4.70k | int line = 0; |
611 | 25.1k | for( int col = i; col >= 0; --col, ++line ) |
612 | 20.4k | { |
613 | 20.4k | #endif |
614 | 20.4k | if( line < heightInCtus && col < numTasksPerLine ) |
615 | 9.14k | { |
616 | 9.14k | CBarrierVec ctuBarriers = picBarriers; |
617 | 9.14k | const int ctuStart = col * numColPerTask; |
618 | 9.14k | const int ctuEnd = std::min( ctuStart + numColPerTask, widthInCtus ); |
619 | | |
620 | 9.14k | #if RECO_WHILE_PARSE |
621 | 9.14k | if( pcPic->parseDone.isBlocked() ) |
622 | 9.04k | { |
623 | | // wait for the last CTU in the current line to be parsed |
624 | 9.04k | ctuBarriers.push_back( &pcPic->ctuParsedBarrier[( line + 1 ) * widthInCtus - 1] ); |
625 | 9.04k | } |
626 | | |
627 | 9.14k | #endif |
628 | 9.14k | CtuTaskParam* param = &tasksCtu[line * numTasksPerLine + col]; |
629 | 9.14k | param->taskLine = line; |
630 | 9.14k | param->taskCol = col; |
631 | 9.14k | param->ctuEnd = ctuEnd; |
632 | 9.14k | param->ctuStart = ctuStart; |
633 | 9.14k | param->numColPerTask = numColPerTask; |
634 | 9.14k | param->numTasksPerLine = numTasksPerLine; |
635 | | |
636 | 9.14k | m_decodeThreadPool->addBarrierTask( TP_TASK_NAME_ARG( "POC:" + std::to_string(pcPic->poc) + " ctuTask:" + std::to_string( col ) + "," + std::to_string( line ) ) |
637 | 9.14k | ctuTask<false>, |
638 | 9.14k | param, |
639 | 9.14k | &pcPic->m_ctuTaskCounter, |
640 | 9.14k | nullptr, |
641 | 9.14k | std::move( ctuBarriers ), |
642 | 9.14k | ctuTask<true> ); |
643 | 9.14k | } |
644 | 20.4k | } |
645 | 4.70k | } |
646 | | |
647 | 713 | { |
648 | 713 | static auto finishReconTask = []( int, void* task_param ) |
649 | 713 | { |
650 | 3 | FinishPicTaskParam* param = static_cast<FinishPicTaskParam*>( task_param ); |
651 | 3 | CodingStructure& cs = *param->pic->cs; |
652 | | |
653 | 3 | if( cs.sps->getUseALF() && !AdaptiveLoopFilter::getAlfSkipPic( cs ) ) |
654 | 3 | { |
655 | 3 | param->decLib->swapBufs( cs ); |
656 | 3 | } |
657 | | |
658 | 3 | cs.deallocTempInternals(); |
659 | | |
660 | | #ifdef TRACE_ENABLE_ITT |
661 | | // mark end of frame |
662 | | __itt_frame_end_v3( param->pic->m_itt_decLibInst, nullptr ); |
663 | | #endif |
664 | 3 | param->pic->stopProcessingTimer(); |
665 | | |
666 | 3 | param->pic->progress = Picture::reconstructed; |
667 | 3 | return true; |
668 | 3 | }; |
669 | | |
670 | 713 | taskFinishPic = FinishPicTaskParam( this, pcPic ); |
671 | 713 | m_decodeThreadPool->addBarrierTask( TP_TASK_NAME_ARG( "POC:" + std::to_string( pcPic->poc ) + " finishPicTask" ) |
672 | 713 | finishReconTask, |
673 | 713 | &taskFinishPic, |
674 | 713 | &pcPic->m_divTasksCounter, |
675 | 713 | &pcPic->reconDone, |
676 | 713 | { pcPic->m_ctuTaskCounter.donePtr() } ); |
677 | 713 | } |
678 | | |
679 | 713 | if( m_decodeThreadPool->numThreads() == 0 ) |
680 | 0 | { |
681 | 0 | } |
682 | 713 | else |
683 | 713 | { |
684 | 713 | ITT_TASKEND( itt_domain_dec, itt_handle_schedTasks ); |
685 | 713 | } |
686 | 713 | } |
687 | | |
688 | | Picture* DecLibRecon::waitForPrevDecompressedPic() |
689 | 2.54k | { |
690 | 2.54k | if( !m_currDecompPic ) |
691 | 1.83k | return nullptr; |
692 | | |
693 | 713 | ITT_TASKSTART( itt_domain_dec, itt_handle_waitTasks ); |
694 | 713 | if( m_decodeThreadPool->numThreads() == 0 ) |
695 | 0 | { |
696 | 0 | m_decodeThreadPool->processTasksOnMainThread(); |
697 | 0 | CHECK_FATAL( m_currDecompPic->reconDone.isBlocked(), "can't make progress. some dependecy has not been finished" ); |
698 | 0 | } |
699 | | |
700 | 713 | try |
701 | 713 | { |
702 | 713 | m_currDecompPic->reconDone.wait(); |
703 | 713 | } |
704 | 713 | catch( ... ) |
705 | 713 | { |
706 | 710 | m_currDecompPic->error = true; |
707 | 710 | } |
708 | | |
709 | | // also check error flag, which can have been set earlier (e.g., when trying to use the picture as reference) |
710 | 713 | if( m_currDecompPic->error || m_currDecompPic->reconDone.hasException() ) |
711 | 710 | { |
712 | | // ensure all tasks are cleared from declibRecon |
713 | 710 | cleanupOnException( std::current_exception() ); |
714 | 710 | } |
715 | | |
716 | 713 | ITT_TASKEND( itt_domain_dec, itt_handle_waitTasks ); |
717 | | |
718 | 713 | return std::exchange( m_currDecompPic, nullptr ); |
719 | 713 | } |
720 | | |
721 | | void DecLibRecon::cleanupOnException( std::exception_ptr exception ) |
722 | 710 | { |
723 | | // there was an exception anywhere in m_currDecompPic |
724 | | // => we need to wait for all tasks to be cleared from the thread pool |
725 | 710 | m_currDecompPic->waitForAllTasks(); |
726 | | |
727 | 710 | commonTaskParam.ctuStates.clear(); |
728 | 710 | } |
729 | | |
730 | | template<bool onlyCheckReadyState> |
731 | | bool DecLibRecon::ctuTask( int tid, void* task_param ) |
732 | 646k | { |
733 | 646k | CtuTaskParam* param = static_cast<CtuTaskParam*>( task_param ); |
734 | | |
735 | 646k | const int taskCol = param->taskCol; |
736 | 646k | const int line = param->taskLine; |
737 | 646k | const int col = taskCol; |
738 | | |
739 | 646k | auto& cs = *param->common.cs; |
740 | 646k | auto& decLib = param->common.decLib; |
741 | 646k | const int tasksPerLine = param->numTasksPerLine; |
742 | 646k | const int heightInCtus = cs.pcv->heightInCtus; |
743 | | |
744 | 646k | CtuState& thisCtuState = param->common.ctuStates[line * tasksPerLine + taskCol]; |
745 | 646k | const CtuState* thisLine = ¶m->common.ctuStates[line * tasksPerLine]; |
746 | 646k | const CtuState* lineAbove = thisLine - tasksPerLine; |
747 | 646k | const CtuState* lineBelow = thisLine + tasksPerLine; |
748 | | |
749 | 646k | const int ctuStart = param->ctuStart; |
750 | 646k | const int ctuEnd = param->ctuEnd; |
751 | | |
752 | 646k | try |
753 | 646k | { |
754 | 646k | if( cs.picture->m_ctuTaskCounter.hasException() ) |
755 | 3.47k | { |
756 | 3.47k | std::rethrow_exception( cs.picture->m_ctuTaskCounter.getException() ); |
757 | 3.47k | } |
758 | | |
759 | 646k | switch( thisCtuState.load() ) |
760 | 646k | { |
761 | | // all case statements fall through to continue with next task, unless they return false due to unsatisfied preconditions |
762 | | |
763 | 48.0k | case MIDER: |
764 | 48.0k | { |
765 | 48.0k | if( col > 0 && thisLine[col - 1] <= MIDER_cont ) |
766 | 41.3k | return false; |
767 | 6.70k | if( line > 0 ) |
768 | 2.48k | { |
769 | 2.48k | if( col + 1 < tasksPerLine ) |
770 | 1.91k | { |
771 | 1.91k | if( lineAbove[col + 1] <= MIDER ) |
772 | 0 | return false; |
773 | 1.91k | } |
774 | 562 | else |
775 | 562 | { |
776 | 562 | if( lineAbove[col] <= MIDER_cont ) |
777 | 0 | return false; |
778 | 562 | } |
779 | 2.48k | } |
780 | 6.70k | if( onlyCheckReadyState ) |
781 | 3.35k | return true; |
782 | | |
783 | 3.34k | ITT_TASKSTART( itt_domain_dec, itt_handle_mider ); |
784 | | |
785 | 6.70k | for( int ctu = ctuStart; ctu < ctuEnd; ctu++ ) |
786 | 3.35k | { |
787 | 3.35k | const int ctuRsAddr = ctu + line * cs.pcv->widthInCtus; |
788 | 3.35k | CtuData& ctuData = cs.getCtuData( ctuRsAddr ); |
789 | 3.35k | ctuData.motion = &decLib.m_motionInfo[cs.pcv->num4x4CtuBlks * ctuRsAddr]; |
790 | | |
791 | 3.35k | if( !ctuData.slice->isIntra() || cs.sps->getIBCFlag() ) |
792 | 3.35k | { |
793 | 3.35k | const UnitArea ctuArea = getCtuArea( cs, ctu, line, true ); |
794 | 3.35k | decLib.m_pcThreadResource[tid]->m_cCuDecoder.TaskDeriveCtuMotionInfo( cs, ctuRsAddr, ctuArea, param->common.perLineMiHist[line] ); |
795 | 3.35k | } |
796 | 0 | else |
797 | 0 | { |
798 | 0 | memset( NO_WARNING_class_memaccess( ctuData.motion ), MI_NOT_VALID, sizeof( MotionInfo ) * cs.pcv->num4x4CtuBlks ); |
799 | 0 | } |
800 | | |
801 | 3.35k | thisCtuState = MIDER_cont; |
802 | 3.35k | } |
803 | | |
804 | 3.34k | thisCtuState = LF_INIT; |
805 | | |
806 | 3.34k | ITT_TASKEND( itt_domain_dec, itt_handle_mider ); |
807 | 3.34k | } |
808 | | |
809 | 3.34k | case LF_INIT: |
810 | 3.34k | { |
811 | 3.34k | if( onlyCheckReadyState ) |
812 | 0 | return true; |
813 | | |
814 | 3.34k | ITT_TASKSTART( itt_domain_dec, itt_handle_lfcl ); |
815 | | |
816 | 6.70k | for( int ctu = ctuStart; ctu < ctuEnd; ctu++ ) |
817 | 3.35k | { |
818 | 3.35k | const int ctuRsAddr = ctu + line * cs.pcv->widthInCtus; |
819 | 3.35k | CtuData& ctuData = cs.getCtuData( ctuRsAddr ); |
820 | 3.35k | ctuData.lfParam[0] = &decLib.m_loopFilterParam[cs.pcv->num4x4CtuBlks * ( 2 * ctuRsAddr + 0 )]; |
821 | 3.35k | ctuData.lfParam[1] = &decLib.m_loopFilterParam[cs.pcv->num4x4CtuBlks * ( 2 * ctuRsAddr + 1 )]; |
822 | 3.35k | memset( ctuData.lfParam[0], 0, sizeof( LoopFilterParam ) * 2 * cs.pcv->num4x4CtuBlks ); |
823 | | |
824 | 3.35k | decLib.m_cLoopFilter.calcFilterStrengthsCTU( cs, ctuRsAddr ); |
825 | 3.35k | } |
826 | | |
827 | 3.34k | thisCtuState = INTER; |
828 | | |
829 | 3.34k | ITT_TASKEND( itt_domain_dec, itt_handle_lfcl ); |
830 | 3.34k | } |
831 | | |
832 | 22.6k | case INTER: |
833 | 22.6k | { |
834 | 22.6k | if( std::all_of( cs.picture->slices.begin(), cs.picture->slices.end(), []( const Slice* pcSlice ) { return pcSlice->isIntra(); } ) )vvdec::DecLibRecon::ctuTask<false>(int, void*)::{lambda(vvdec::Slice const*)#1}::operator()(vvdec::Slice const*) constLine | Count | Source | 834 | 3.61k | if( std::all_of( cs.picture->slices.begin(), cs.picture->slices.end(), []( const Slice* pcSlice ) { return pcSlice->isIntra(); } ) ) |
vvdec::DecLibRecon::ctuTask<true>(int, void*)::{lambda(vvdec::Slice const*)#1}::operator()(vvdec::Slice const*) constLine | Count | Source | 834 | 19.0k | if( std::all_of( cs.picture->slices.begin(), cs.picture->slices.end(), []( const Slice* pcSlice ) { return pcSlice->isIntra(); } ) ) |
|
835 | 22.6k | { |
836 | | // not really necessary, but only for optimizing the wave-fronts |
837 | 22.6k | if( col > 1 && thisLine[col - 2] <= INTER ) |
838 | 18.6k | return false; |
839 | 4.00k | if( line > 0 && lineAbove[col] <= INTER ) |
840 | 674 | return false; |
841 | 4.00k | } |
842 | | |
843 | 3.35k | if( std::any_of( cs.picture->refPicExtDepBarriers.cbegin(), cs.picture->refPicExtDepBarriers.cend(), []( const Barrier* b ) { return b->isBlocked(); } ) )Unexecuted instantiation: vvdec::DecLibRecon::ctuTask<false>(int, void*)::{lambda(vvdec::Barrier const*)#1}::operator()(vvdec::Barrier const*) constUnexecuted instantiation: vvdec::DecLibRecon::ctuTask<true>(int, void*)::{lambda(vvdec::Barrier const*)#1}::operator()(vvdec::Barrier const*) const |
844 | 0 | { |
845 | 0 | return false; |
846 | 0 | } |
847 | | |
848 | 3.35k | if( onlyCheckReadyState ) |
849 | 267 | return true; |
850 | | |
851 | 3.08k | ITT_TASKSTART( itt_domain_dec, itt_handle_inter ); |
852 | | |
853 | 6.16k | for( int ctu = ctuStart; ctu < ctuEnd; ctu++ ) |
854 | 3.08k | { |
855 | 3.08k | const int ctuRsAddr = ctu + line * cs.pcv->widthInCtus; |
856 | 3.08k | const UnitArea ctuArea = getCtuArea( cs, ctu, line, true ); |
857 | 3.08k | const CtuData& ctuData = cs.getCtuData( ctuRsAddr ); |
858 | | |
859 | 3.08k | decLib.m_pcThreadResource[tid]->m_cCuDecoder.TaskTrafoCtu( cs, ctuRsAddr, ctuArea ); |
860 | | |
861 | 3.08k | if( !ctuData.slice->isIntra() ) |
862 | 0 | { |
863 | 0 | decLib.m_pcThreadResource[tid]->m_cCuDecoder.TaskInterCtu( cs, ctuRsAddr, ctuArea ); |
864 | |
|
865 | 0 | if( cs.picture->stillReferenced ) |
866 | 0 | { |
867 | 0 | decLib.m_pcThreadResource[tid]->m_cCuDecoder.TaskFinishMotionInfo( cs, ctuRsAddr, ctu, line ); |
868 | 0 | } |
869 | 0 | } |
870 | 3.08k | } |
871 | | |
872 | 3.08k | thisCtuState = INTRA; |
873 | | |
874 | 3.08k | ITT_TASKEND( itt_domain_dec, itt_handle_inter ); |
875 | 3.08k | } |
876 | | |
877 | 57.4k | case INTRA: |
878 | 57.4k | { |
879 | 57.4k | if( col > 0 && thisLine[col - 1] <= INTRA_cont ) |
880 | 53.7k | return false; |
881 | | |
882 | 3.62k | if( line > 0 ) |
883 | 1.57k | { |
884 | 1.57k | if( col + 1 < tasksPerLine ) |
885 | 1.50k | { |
886 | 1.50k | if( lineAbove[col + 1] <= INTRA ) |
887 | 942 | return false; |
888 | 1.50k | } |
889 | 69 | else |
890 | 69 | { |
891 | 69 | if( lineAbove[col] <= INTRA_cont ) |
892 | 0 | return false; |
893 | 69 | } |
894 | 1.57k | } |
895 | 2.68k | if( onlyCheckReadyState ) |
896 | 675 | return true; |
897 | | |
898 | 2.01k | ITT_TASKSTART( itt_domain_dec, itt_handle_intra ); |
899 | | |
900 | 4.02k | for( int ctu = ctuStart; ctu < ctuEnd; ctu++ ) |
901 | 2.01k | { |
902 | 2.01k | const int ctuRsAddr = ctu + line * cs.pcv->widthInCtus; |
903 | 2.01k | const UnitArea ctuArea = getCtuArea( cs, ctu, line, true ); |
904 | 2.01k | decLib.m_pcThreadResource[tid]->m_cCuDecoder.TaskCriticalIntraKernel( cs, ctuRsAddr, ctuArea ); |
905 | | |
906 | 2.01k | thisCtuState = INTRA_cont; |
907 | 2.01k | } |
908 | | |
909 | 2.01k | thisCtuState = RSP; |
910 | | |
911 | 2.01k | ITT_TASKEND( itt_domain_dec, itt_handle_intra ); |
912 | 2.01k | } |
913 | | |
914 | 510k | case RSP: |
915 | 510k | { |
916 | | // RIRZIIIII |
917 | | // IIIIIXXXX |
918 | | // |
919 | | // - Z can be reshaped when it is no more an intra prediction source for X in the next line |
920 | | |
921 | | |
922 | 510k | if ( line + 1 < heightInCtus && col + 1 < tasksPerLine && lineBelow[col + 1] < INTRA_cont ) |
923 | 417k | return false; |
924 | 92.3k | else if( line + 1 < heightInCtus && lineBelow[col] < RSP ) |
925 | 91.7k | return false; |
926 | 548 | else if( col + 1 < tasksPerLine && thisLine [col + 1] < INTRA_cont ) // need this for the last line |
927 | 20 | return false; |
928 | | |
929 | 528 | if( onlyCheckReadyState ) |
930 | 151 | return true; |
931 | | |
932 | 377 | ITT_TASKSTART( itt_domain_dec, itt_handle_rsp ); |
933 | | |
934 | 531 | for( int ctu = ctuStart; ctu < ctuEnd; ctu++ ) |
935 | 154 | { |
936 | 154 | decLib.m_pcThreadResource[tid]->m_cReshaper.rspCtuBcw( cs, ctu, line ); |
937 | 154 | } |
938 | | |
939 | 377 | ITT_TASKEND( itt_domain_dec, itt_handle_rsp ); |
940 | | |
941 | 377 | thisCtuState = LF_V; |
942 | 377 | } |
943 | | |
944 | 418 | case LF_V: |
945 | 418 | { |
946 | 418 | if( col > 0 && thisLine[col - 1] < LF_V ) |
947 | 31 | return false; |
948 | 387 | if( onlyCheckReadyState ) |
949 | 10 | return true; |
950 | | |
951 | 377 | ITT_TASKSTART( itt_domain_dec, itt_handle_lfl ); |
952 | | |
953 | 531 | for( int ctu = ctuStart; ctu < ctuEnd; ctu++ ) |
954 | 154 | { |
955 | 154 | decLib.m_cLoopFilter.loopFilterCTU( cs, MAX_NUM_CHANNEL_TYPE, ctu, line, EDGE_VER ); |
956 | | |
957 | 154 | thisCtuState = LF_V_cont; |
958 | 154 | } |
959 | | |
960 | 377 | thisCtuState = LF_H; |
961 | | |
962 | 377 | ITT_TASKEND( itt_domain_dec, itt_handle_lfl ); |
963 | 377 | } |
964 | | |
965 | 8.07k | case LF_H: |
966 | 8.07k | { |
967 | 8.07k | if( line > 0 && lineAbove[col] < LF_H ) |
968 | 9 | return false; |
969 | | |
970 | 8.06k | if( line > 0 && col + 1 < tasksPerLine && lineAbove[col + 1] < LF_V_cont ) |
971 | 39 | return false; |
972 | | |
973 | 8.02k | if( col + 1 < tasksPerLine && thisLine[col + 1] < LF_V_cont ) |
974 | 7.62k | return false; |
975 | | |
976 | 395 | if( onlyCheckReadyState ) |
977 | 65 | return true; |
978 | | |
979 | 330 | ITT_TASKSTART( itt_domain_dec, itt_handle_lfl ); |
980 | | |
981 | 437 | for( int ctu = ctuStart; ctu < ctuEnd; ctu++ ) |
982 | 107 | { |
983 | 107 | decLib.m_cLoopFilter.loopFilterCTU( cs, MAX_NUM_CHANNEL_TYPE, ctu, line, EDGE_HOR ); |
984 | 107 | } |
985 | | |
986 | 330 | thisCtuState = PRESAO; |
987 | | |
988 | 330 | ITT_TASKEND( itt_domain_dec, itt_handle_lfl ); |
989 | 330 | } |
990 | | |
991 | 5.19k | case PRESAO: |
992 | 5.19k | { |
993 | | // only last CTU processes full line |
994 | 5.19k | if( col == tasksPerLine - 1 ) |
995 | 733 | { |
996 | 733 | if( line > 0 && lineAbove[col] <= PRESAO ) |
997 | 146 | return false; |
998 | | |
999 | 928 | for( int c = 0; c < tasksPerLine; ++c ) |
1000 | 918 | { |
1001 | 918 | if( thisLine[c] < PRESAO ) |
1002 | 4 | return false; |
1003 | | |
1004 | 914 | if( line + 1 < heightInCtus && lineBelow[c] < PRESAO ) |
1005 | 573 | return false; |
1006 | 914 | } |
1007 | 10 | if( onlyCheckReadyState ) |
1008 | 4 | return true; |
1009 | | |
1010 | 6 | ITT_TASKSTART( itt_domain_dec, itt_handle_presao ); |
1011 | | |
1012 | 6 | if( cs.sps->getUseSAO() ) |
1013 | 6 | { |
1014 | 6 | decLib.m_cSAO.SAOPrepareCTULine( cs, getLineArea( cs, line, true ) ); |
1015 | 6 | } |
1016 | | |
1017 | 6 | ITT_TASKEND( itt_domain_dec, itt_handle_presao ); |
1018 | 6 | } |
1019 | 4.46k | else if( thisLine[tasksPerLine - 1] <= PRESAO ) // wait for last CTU to finish PRESAO |
1020 | 4.21k | { |
1021 | 4.21k | return false; |
1022 | 4.21k | } |
1023 | 249 | if( onlyCheckReadyState ) |
1024 | 12 | return true; |
1025 | | |
1026 | 237 | thisCtuState = SAO; |
1027 | 237 | } |
1028 | | |
1029 | 237 | case SAO: |
1030 | 237 | { |
1031 | 237 | if( onlyCheckReadyState ) |
1032 | 0 | return true; |
1033 | | |
1034 | | // only last CTU processes full line |
1035 | 237 | if( cs.sps->getUseSAO() ) |
1036 | 18 | { |
1037 | 18 | ITT_TASKSTART( itt_domain_dec, itt_handle_sao ); |
1038 | | |
1039 | 36 | for( int ctu = ctuStart; ctu < ctuEnd; ctu++ ) |
1040 | 18 | { |
1041 | 18 | const UnitArea ctuArea = getCtuArea( cs, ctu, line, true ); |
1042 | 18 | decLib.m_cSAO.SAOProcessCTU( cs, ctuArea ); |
1043 | 18 | } |
1044 | | |
1045 | 18 | ITT_TASKEND( itt_domain_dec, itt_handle_sao ); |
1046 | 18 | } |
1047 | 237 | if( param->common.doALF ) |
1048 | 18 | { |
1049 | 18 | ITT_TASKSTART( itt_domain_dec, itt_handle_alf ); |
1050 | | |
1051 | 36 | for( int ctu = ctuStart; ctu < ctuEnd; ctu++ ) |
1052 | 18 | { |
1053 | 18 | AdaptiveLoopFilter::prepareCTU( cs, ctu, line ); |
1054 | | |
1055 | 18 | thisCtuState = SAO_cont; |
1056 | 18 | } |
1057 | | |
1058 | 18 | ITT_TASKEND( itt_domain_dec, itt_handle_alf ); |
1059 | 18 | } |
1060 | | |
1061 | 237 | thisCtuState = ALF; |
1062 | 237 | } |
1063 | | |
1064 | 731 | case ALF: |
1065 | 731 | { |
1066 | 731 | if( param->common.doALF ) |
1067 | 512 | { |
1068 | 512 | const bool a = line > 0; |
1069 | 512 | const bool b = line + 1 < heightInCtus; |
1070 | 512 | const bool c = col > 0; |
1071 | 512 | const bool d = col + 1 < tasksPerLine; |
1072 | | |
1073 | 512 | if( a ) |
1074 | 120 | { |
1075 | 120 | if( c && lineAbove[col - 1] < ALF ) return false; |
1076 | 111 | if( lineAbove[col ] < ALF ) return false; |
1077 | 108 | if( d && lineAbove[col + 1] < SAO_cont ) return false; |
1078 | 108 | } |
1079 | | |
1080 | 500 | if( b ) |
1081 | 487 | { |
1082 | 487 | if( c && lineBelow[col - 1] < ALF ) return false; |
1083 | 85 | if( lineBelow[col ] < ALF ) return false; |
1084 | 30 | if( d && lineBelow[col + 1] < SAO_cont ) return false; |
1085 | 30 | } |
1086 | | |
1087 | 37 | if( c && thisLine[col - 1] < ALF ) return false; |
1088 | 36 | if( d && thisLine[col + 1] < SAO_cont ) return false; |
1089 | | |
1090 | 31 | if( onlyCheckReadyState ) |
1091 | 14 | return true; |
1092 | | |
1093 | 17 | ITT_TASKSTART( itt_domain_dec, itt_handle_alf ); |
1094 | 35 | for( int ctu = ctuStart; ctu < ctuEnd; ctu++ ) |
1095 | 18 | { |
1096 | 18 | decLib.m_cALF.processCTU( cs, ctu, line, tid ); |
1097 | 18 | } |
1098 | 17 | ITT_TASKEND( itt_domain_dec, itt_handle_alf ); |
1099 | 17 | } |
1100 | 219 | else if( onlyCheckReadyState ) |
1101 | 0 | return true; |
1102 | | |
1103 | 236 | thisCtuState = DONE; |
1104 | 236 | } |
1105 | | |
1106 | 236 | default: |
1107 | 236 | CHECKD( thisCtuState != DONE, "Wrong CTU state" ); |
1108 | 646k | } // end switch |
1109 | 646k | } |
1110 | 646k | catch( ... ) |
1111 | 646k | { |
1112 | 3.84k | std::rethrow_exception( std::current_exception() ); |
1113 | 3.84k | } |
1114 | | |
1115 | 18 | return true; |
1116 | 646k | } bool vvdec::DecLibRecon::ctuTask<false>(int, void*) Line | Count | Source | 732 | 4.55k | { | 733 | 4.55k | CtuTaskParam* param = static_cast<CtuTaskParam*>( task_param ); | 734 | | | 735 | 4.55k | const int taskCol = param->taskCol; | 736 | 4.55k | const int line = param->taskLine; | 737 | 4.55k | const int col = taskCol; | 738 | | | 739 | 4.55k | auto& cs = *param->common.cs; | 740 | 4.55k | auto& decLib = param->common.decLib; | 741 | 4.55k | const int tasksPerLine = param->numTasksPerLine; | 742 | 4.55k | const int heightInCtus = cs.pcv->heightInCtus; | 743 | | | 744 | 4.55k | CtuState& thisCtuState = param->common.ctuStates[line * tasksPerLine + taskCol]; | 745 | 4.55k | const CtuState* thisLine = ¶m->common.ctuStates[line * tasksPerLine]; | 746 | 4.55k | const CtuState* lineAbove = thisLine - tasksPerLine; | 747 | 4.55k | const CtuState* lineBelow = thisLine + tasksPerLine; | 748 | | | 749 | 4.55k | const int ctuStart = param->ctuStart; | 750 | 4.55k | const int ctuEnd = param->ctuEnd; | 751 | | | 752 | 4.55k | try | 753 | 4.55k | { | 754 | 4.55k | if( cs.picture->m_ctuTaskCounter.hasException() ) | 755 | 4 | { | 756 | 4 | std::rethrow_exception( cs.picture->m_ctuTaskCounter.getException() ); | 757 | 4 | } | 758 | | | 759 | 4.55k | switch( thisCtuState.load() ) | 760 | 4.55k | { | 761 | | // all case statements fall through to continue with next task, unless they return false due to unsatisfied preconditions | 762 | | | 763 | 3.35k | case MIDER: | 764 | 3.35k | { | 765 | 3.35k | if( col > 0 && thisLine[col - 1] <= MIDER_cont ) | 766 | 0 | return false; | 767 | 3.35k | if( line > 0 ) | 768 | 1.23k | { | 769 | 1.23k | if( col + 1 < tasksPerLine ) | 770 | 958 | { | 771 | 958 | if( lineAbove[col + 1] <= MIDER ) | 772 | 0 | return false; | 773 | 958 | } | 774 | 280 | else | 775 | 280 | { | 776 | 280 | if( lineAbove[col] <= MIDER_cont ) | 777 | 0 | return false; | 778 | 280 | } | 779 | 1.23k | } | 780 | 3.35k | if( onlyCheckReadyState ) | 781 | 0 | return true; | 782 | | | 783 | 3.35k | ITT_TASKSTART( itt_domain_dec, itt_handle_mider ); | 784 | | | 785 | 6.71k | for( int ctu = ctuStart; ctu < ctuEnd; ctu++ ) | 786 | 3.35k | { | 787 | 3.35k | const int ctuRsAddr = ctu + line * cs.pcv->widthInCtus; | 788 | 3.35k | CtuData& ctuData = cs.getCtuData( ctuRsAddr ); | 789 | 3.35k | ctuData.motion = &decLib.m_motionInfo[cs.pcv->num4x4CtuBlks * ctuRsAddr]; | 790 | | | 791 | 3.35k | if( !ctuData.slice->isIntra() || cs.sps->getIBCFlag() ) | 792 | 3.35k | { | 793 | 3.35k | const UnitArea ctuArea = getCtuArea( cs, ctu, line, true ); | 794 | 3.35k | decLib.m_pcThreadResource[tid]->m_cCuDecoder.TaskDeriveCtuMotionInfo( cs, ctuRsAddr, ctuArea, param->common.perLineMiHist[line] ); | 795 | 3.35k | } | 796 | 0 | else | 797 | 0 | { | 798 | 0 | memset( NO_WARNING_class_memaccess( ctuData.motion ), MI_NOT_VALID, sizeof( MotionInfo ) * cs.pcv->num4x4CtuBlks ); | 799 | 0 | } | 800 | | | 801 | 3.35k | thisCtuState = MIDER_cont; | 802 | 3.35k | } | 803 | | | 804 | 3.35k | thisCtuState = LF_INIT; | 805 | | | 806 | 3.35k | ITT_TASKEND( itt_domain_dec, itt_handle_mider ); | 807 | 3.35k | } | 808 | | | 809 | 3.35k | case LF_INIT: | 810 | 3.35k | { | 811 | 3.35k | if( onlyCheckReadyState ) | 812 | 0 | return true; | 813 | | | 814 | 3.35k | ITT_TASKSTART( itt_domain_dec, itt_handle_lfcl ); | 815 | | | 816 | 6.71k | for( int ctu = ctuStart; ctu < ctuEnd; ctu++ ) | 817 | 3.35k | { | 818 | 3.35k | const int ctuRsAddr = ctu + line * cs.pcv->widthInCtus; | 819 | 3.35k | CtuData& ctuData = cs.getCtuData( ctuRsAddr ); | 820 | 3.35k | ctuData.lfParam[0] = &decLib.m_loopFilterParam[cs.pcv->num4x4CtuBlks * ( 2 * ctuRsAddr + 0 )]; | 821 | 3.35k | ctuData.lfParam[1] = &decLib.m_loopFilterParam[cs.pcv->num4x4CtuBlks * ( 2 * ctuRsAddr + 1 )]; | 822 | 3.35k | memset( ctuData.lfParam[0], 0, sizeof( LoopFilterParam ) * 2 * cs.pcv->num4x4CtuBlks ); | 823 | | | 824 | 3.35k | decLib.m_cLoopFilter.calcFilterStrengthsCTU( cs, ctuRsAddr ); | 825 | 3.35k | } | 826 | | | 827 | 3.35k | thisCtuState = INTER; | 828 | | | 829 | 3.35k | ITT_TASKEND( itt_domain_dec, itt_handle_lfcl ); | 830 | 3.35k | } | 831 | | | 832 | 3.62k | case INTER: | 833 | 3.62k | { | 834 | 3.62k | if( std::all_of( cs.picture->slices.begin(), cs.picture->slices.end(), []( const Slice* pcSlice ) { return pcSlice->isIntra(); } ) ) | 835 | 3.61k | { | 836 | | // not really necessary, but only for optimizing the wave-fronts | 837 | 3.61k | if( col > 1 && thisLine[col - 2] <= INTER ) | 838 | 518 | return false; | 839 | 3.10k | if( line > 0 && lineAbove[col] <= INTER ) | 840 | 19 | return false; | 841 | 3.10k | } | 842 | | | 843 | 3.08k | if( std::any_of( cs.picture->refPicExtDepBarriers.cbegin(), cs.picture->refPicExtDepBarriers.cend(), []( const Barrier* b ) { return b->isBlocked(); } ) ) | 844 | 0 | { | 845 | 0 | return false; | 846 | 0 | } | 847 | | | 848 | 3.08k | if( onlyCheckReadyState ) | 849 | 0 | return true; | 850 | | | 851 | 3.08k | ITT_TASKSTART( itt_domain_dec, itt_handle_inter ); | 852 | | | 853 | 6.16k | for( int ctu = ctuStart; ctu < ctuEnd; ctu++ ) | 854 | 3.08k | { | 855 | 3.08k | const int ctuRsAddr = ctu + line * cs.pcv->widthInCtus; | 856 | 3.08k | const UnitArea ctuArea = getCtuArea( cs, ctu, line, true ); | 857 | 3.08k | const CtuData& ctuData = cs.getCtuData( ctuRsAddr ); | 858 | | | 859 | 3.08k | decLib.m_pcThreadResource[tid]->m_cCuDecoder.TaskTrafoCtu( cs, ctuRsAddr, ctuArea ); | 860 | | | 861 | 3.08k | if( !ctuData.slice->isIntra() ) | 862 | 0 | { | 863 | 0 | decLib.m_pcThreadResource[tid]->m_cCuDecoder.TaskInterCtu( cs, ctuRsAddr, ctuArea ); | 864 | |
| 865 | 0 | if( cs.picture->stillReferenced ) | 866 | 0 | { | 867 | 0 | decLib.m_pcThreadResource[tid]->m_cCuDecoder.TaskFinishMotionInfo( cs, ctuRsAddr, ctu, line ); | 868 | 0 | } | 869 | 0 | } | 870 | 3.08k | } | 871 | | | 872 | 3.08k | thisCtuState = INTRA; | 873 | | | 874 | 3.08k | ITT_TASKEND( itt_domain_dec, itt_handle_inter ); | 875 | 3.08k | } | 876 | | | 877 | 3.76k | case INTRA: | 878 | 3.76k | { | 879 | 3.76k | if( col > 0 && thisLine[col - 1] <= INTRA_cont ) | 880 | 1.68k | return false; | 881 | | | 882 | 2.07k | if( line > 0 ) | 883 | 576 | { | 884 | 576 | if( col + 1 < tasksPerLine ) | 885 | 540 | { | 886 | 540 | if( lineAbove[col + 1] <= INTRA ) | 887 | 65 | return false; | 888 | 540 | } | 889 | 36 | else | 890 | 36 | { | 891 | 36 | if( lineAbove[col] <= INTRA_cont ) | 892 | 0 | return false; | 893 | 36 | } | 894 | 576 | } | 895 | 2.01k | if( onlyCheckReadyState ) | 896 | 0 | return true; | 897 | | | 898 | 2.01k | ITT_TASKSTART( itt_domain_dec, itt_handle_intra ); | 899 | | | 900 | 4.02k | for( int ctu = ctuStart; ctu < ctuEnd; ctu++ ) | 901 | 2.01k | { | 902 | 2.01k | const int ctuRsAddr = ctu + line * cs.pcv->widthInCtus; | 903 | 2.01k | const UnitArea ctuArea = getCtuArea( cs, ctu, line, true ); | 904 | 2.01k | decLib.m_pcThreadResource[tid]->m_cCuDecoder.TaskCriticalIntraKernel( cs, ctuRsAddr, ctuArea ); | 905 | | | 906 | 2.01k | thisCtuState = INTRA_cont; | 907 | 2.01k | } | 908 | | | 909 | 2.01k | thisCtuState = RSP; | 910 | | | 911 | 2.01k | ITT_TASKEND( itt_domain_dec, itt_handle_intra ); | 912 | 2.01k | } | 913 | | | 914 | 2.16k | case RSP: | 915 | 2.16k | { | 916 | | // RIRZIIIII | 917 | | // IIIIIXXXX | 918 | | // | 919 | | // - Z can be reshaped when it is no more an intra prediction source for X in the next line | 920 | | | 921 | | | 922 | 2.16k | if ( line + 1 < heightInCtus && col + 1 < tasksPerLine && lineBelow[col + 1] < INTRA_cont ) | 923 | 1.39k | return false; | 924 | 767 | else if( line + 1 < heightInCtus && lineBelow[col] < RSP ) | 925 | 238 | return false; | 926 | 529 | else if( col + 1 < tasksPerLine && thisLine [col + 1] < INTRA_cont ) // need this for the last line | 927 | 3 | return false; | 928 | | | 929 | 526 | if( onlyCheckReadyState ) | 930 | 0 | return true; | 931 | | | 932 | 526 | ITT_TASKSTART( itt_domain_dec, itt_handle_rsp ); | 933 | | | 934 | 680 | for( int ctu = ctuStart; ctu < ctuEnd; ctu++ ) | 935 | 154 | { | 936 | 154 | decLib.m_pcThreadResource[tid]->m_cReshaper.rspCtuBcw( cs, ctu, line ); | 937 | 154 | } | 938 | | | 939 | 526 | ITT_TASKEND( itt_domain_dec, itt_handle_rsp ); | 940 | | | 941 | 526 | thisCtuState = LF_V; | 942 | 526 | } | 943 | | | 944 | 536 | case LF_V: | 945 | 536 | { | 946 | 536 | if( col > 0 && thisLine[col - 1] < LF_V ) | 947 | 10 | return false; | 948 | 526 | if( onlyCheckReadyState ) | 949 | 0 | return true; | 950 | | | 951 | 526 | ITT_TASKSTART( itt_domain_dec, itt_handle_lfl ); | 952 | | | 953 | 680 | for( int ctu = ctuStart; ctu < ctuEnd; ctu++ ) | 954 | 154 | { | 955 | 154 | decLib.m_cLoopFilter.loopFilterCTU( cs, MAX_NUM_CHANNEL_TYPE, ctu, line, EDGE_VER ); | 956 | | | 957 | 154 | thisCtuState = LF_V_cont; | 958 | 154 | } | 959 | | | 960 | 526 | thisCtuState = LF_H; | 961 | | | 962 | 526 | ITT_TASKEND( itt_domain_dec, itt_handle_lfl ); | 963 | 526 | } | 964 | | | 965 | 591 | case LF_H: | 966 | 591 | { | 967 | 591 | if( line > 0 && lineAbove[col] < LF_H ) | 968 | 2 | return false; | 969 | | | 970 | 589 | if( line > 0 && col + 1 < tasksPerLine && lineAbove[col + 1] < LF_V_cont ) | 971 | 0 | return false; | 972 | | | 973 | 589 | if( col + 1 < tasksPerLine && thisLine[col + 1] < LF_V_cont ) | 974 | 110 | return false; | 975 | | | 976 | 479 | if( onlyCheckReadyState ) | 977 | 0 | return true; | 978 | | | 979 | 479 | ITT_TASKSTART( itt_domain_dec, itt_handle_lfl ); | 980 | | | 981 | 586 | for( int ctu = ctuStart; ctu < ctuEnd; ctu++ ) | 982 | 107 | { | 983 | 107 | decLib.m_cLoopFilter.loopFilterCTU( cs, MAX_NUM_CHANNEL_TYPE, ctu, line, EDGE_HOR ); | 984 | 107 | } | 985 | | | 986 | 479 | thisCtuState = PRESAO; | 987 | | | 988 | 479 | ITT_TASKEND( itt_domain_dec, itt_handle_lfl ); | 989 | 479 | } | 990 | | | 991 | 494 | case PRESAO: | 992 | 494 | { | 993 | | // only last CTU processes full line | 994 | 494 | if( col == tasksPerLine - 1 ) | 995 | 36 | { | 996 | 36 | if( line > 0 && lineAbove[col] <= PRESAO ) | 997 | 3 | return false; | 998 | | | 999 | 51 | for( int c = 0; c < tasksPerLine; ++c ) | 1000 | 45 | { | 1001 | 45 | if( thisLine[c] < PRESAO ) | 1002 | 4 | return false; | 1003 | | | 1004 | 41 | if( line + 1 < heightInCtus && lineBelow[c] < PRESAO ) | 1005 | 23 | return false; | 1006 | 41 | } | 1007 | 6 | if( onlyCheckReadyState ) | 1008 | 0 | return true; | 1009 | | | 1010 | 6 | ITT_TASKSTART( itt_domain_dec, itt_handle_presao ); | 1011 | | | 1012 | 6 | if( cs.sps->getUseSAO() ) | 1013 | 6 | { | 1014 | 6 | decLib.m_cSAO.SAOPrepareCTULine( cs, getLineArea( cs, line, true ) ); | 1015 | 6 | } | 1016 | | | 1017 | 6 | ITT_TASKEND( itt_domain_dec, itt_handle_presao ); | 1018 | 6 | } | 1019 | 458 | else if( thisLine[tasksPerLine - 1] <= PRESAO ) // wait for last CTU to finish PRESAO | 1020 | 75 | { | 1021 | 75 | return false; | 1022 | 75 | } | 1023 | 389 | if( onlyCheckReadyState ) | 1024 | 0 | return true; | 1025 | | | 1026 | 389 | thisCtuState = SAO; | 1027 | 389 | } | 1028 | | | 1029 | 389 | case SAO: | 1030 | 389 | { | 1031 | 389 | if( onlyCheckReadyState ) | 1032 | 0 | return true; | 1033 | | | 1034 | | // only last CTU processes full line | 1035 | 389 | if( cs.sps->getUseSAO() ) | 1036 | 18 | { | 1037 | 18 | ITT_TASKSTART( itt_domain_dec, itt_handle_sao ); | 1038 | | | 1039 | 36 | for( int ctu = ctuStart; ctu < ctuEnd; ctu++ ) | 1040 | 18 | { | 1041 | 18 | const UnitArea ctuArea = getCtuArea( cs, ctu, line, true ); | 1042 | 18 | decLib.m_cSAO.SAOProcessCTU( cs, ctuArea ); | 1043 | 18 | } | 1044 | | | 1045 | 18 | ITT_TASKEND( itt_domain_dec, itt_handle_sao ); | 1046 | 18 | } | 1047 | 389 | if( param->common.doALF ) | 1048 | 18 | { | 1049 | 18 | ITT_TASKSTART( itt_domain_dec, itt_handle_alf ); | 1050 | | | 1051 | 36 | for( int ctu = ctuStart; ctu < ctuEnd; ctu++ ) | 1052 | 18 | { | 1053 | 18 | AdaptiveLoopFilter::prepareCTU( cs, ctu, line ); | 1054 | | | 1055 | 18 | thisCtuState = SAO_cont; | 1056 | 18 | } | 1057 | | | 1058 | 18 | ITT_TASKEND( itt_domain_dec, itt_handle_alf ); | 1059 | 18 | } | 1060 | | | 1061 | 389 | thisCtuState = ALF; | 1062 | 389 | } | 1063 | | | 1064 | 403 | case ALF: | 1065 | 403 | { | 1066 | 403 | if( param->common.doALF ) | 1067 | 32 | { | 1068 | 32 | const bool a = line > 0; | 1069 | 32 | const bool b = line + 1 < heightInCtus; | 1070 | 32 | const bool c = col > 0; | 1071 | 32 | const bool d = col + 1 < tasksPerLine; | 1072 | | | 1073 | 32 | if( a ) | 1074 | 22 | { | 1075 | 22 | if( c && lineAbove[col - 1] < ALF ) return false; | 1076 | 20 | if( lineAbove[col ] < ALF ) return false; | 1077 | 19 | if( d && lineAbove[col + 1] < SAO_cont ) return false; | 1078 | 19 | } | 1079 | | | 1080 | 29 | if( b ) | 1081 | 23 | { | 1082 | 23 | if( c && lineBelow[col - 1] < ALF ) return false; | 1083 | 14 | if( lineBelow[col ] < ALF ) return false; | 1084 | 12 | if( d && lineBelow[col + 1] < SAO_cont ) return false; | 1085 | 12 | } | 1086 | | | 1087 | 18 | if( c && thisLine[col - 1] < ALF ) return false; | 1088 | 18 | if( d && thisLine[col + 1] < SAO_cont ) return false; | 1089 | | | 1090 | 18 | if( onlyCheckReadyState ) | 1091 | 0 | return true; | 1092 | | | 1093 | 18 | ITT_TASKSTART( itt_domain_dec, itt_handle_alf ); | 1094 | 36 | for( int ctu = ctuStart; ctu < ctuEnd; ctu++ ) | 1095 | 18 | { | 1096 | 18 | decLib.m_cALF.processCTU( cs, ctu, line, tid ); | 1097 | 18 | } | 1098 | 18 | ITT_TASKEND( itt_domain_dec, itt_handle_alf ); | 1099 | 18 | } | 1100 | 371 | else if( onlyCheckReadyState ) | 1101 | 0 | return true; | 1102 | | | 1103 | 389 | thisCtuState = DONE; | 1104 | 389 | } | 1105 | | | 1106 | 389 | default: | 1107 | 389 | CHECKD( thisCtuState != DONE, "Wrong CTU state" ); | 1108 | 4.55k | } // end switch | 1109 | 4.55k | } | 1110 | 4.55k | catch( ... ) | 1111 | 4.55k | { | 1112 | 376 | std::rethrow_exception( std::current_exception() ); | 1113 | 376 | } | 1114 | | | 1115 | 18 | return true; | 1116 | 4.55k | } |
bool vvdec::DecLibRecon::ctuTask<true>(int, void*) Line | Count | Source | 732 | 641k | { | 733 | 641k | CtuTaskParam* param = static_cast<CtuTaskParam*>( task_param ); | 734 | | | 735 | 641k | const int taskCol = param->taskCol; | 736 | 641k | const int line = param->taskLine; | 737 | 641k | const int col = taskCol; | 738 | | | 739 | 641k | auto& cs = *param->common.cs; | 740 | 641k | auto& decLib = param->common.decLib; | 741 | 641k | const int tasksPerLine = param->numTasksPerLine; | 742 | 641k | const int heightInCtus = cs.pcv->heightInCtus; | 743 | | | 744 | 641k | CtuState& thisCtuState = param->common.ctuStates[line * tasksPerLine + taskCol]; | 745 | 641k | const CtuState* thisLine = ¶m->common.ctuStates[line * tasksPerLine]; | 746 | 641k | const CtuState* lineAbove = thisLine - tasksPerLine; | 747 | 641k | const CtuState* lineBelow = thisLine + tasksPerLine; | 748 | | | 749 | 641k | const int ctuStart = param->ctuStart; | 750 | 641k | const int ctuEnd = param->ctuEnd; | 751 | | | 752 | 641k | try | 753 | 641k | { | 754 | 641k | if( cs.picture->m_ctuTaskCounter.hasException() ) | 755 | 3.47k | { | 756 | 3.47k | std::rethrow_exception( cs.picture->m_ctuTaskCounter.getException() ); | 757 | 3.47k | } | 758 | | | 759 | 641k | switch( thisCtuState.load() ) | 760 | 641k | { | 761 | | // all case statements fall through to continue with next task, unless they return false due to unsatisfied preconditions | 762 | | | 763 | 44.7k | case MIDER: | 764 | 44.7k | { | 765 | 44.7k | if( col > 0 && thisLine[col - 1] <= MIDER_cont ) | 766 | 41.3k | return false; | 767 | 3.35k | if( line > 0 ) | 768 | 1.24k | { | 769 | 1.24k | if( col + 1 < tasksPerLine ) | 770 | 960 | { | 771 | 960 | if( lineAbove[col + 1] <= MIDER ) | 772 | 0 | return false; | 773 | 960 | } | 774 | 282 | else | 775 | 282 | { | 776 | 282 | if( lineAbove[col] <= MIDER_cont ) | 777 | 0 | return false; | 778 | 282 | } | 779 | 1.24k | } | 780 | 3.35k | if( onlyCheckReadyState ) | 781 | 3.35k | return true; | 782 | | | 783 | 18.4E | ITT_TASKSTART( itt_domain_dec, itt_handle_mider ); | 784 | | | 785 | 18.4E | for( int ctu = ctuStart; ctu < ctuEnd; ctu++ ) | 786 | 0 | { | 787 | 0 | const int ctuRsAddr = ctu + line * cs.pcv->widthInCtus; | 788 | 0 | CtuData& ctuData = cs.getCtuData( ctuRsAddr ); | 789 | 0 | ctuData.motion = &decLib.m_motionInfo[cs.pcv->num4x4CtuBlks * ctuRsAddr]; | 790 | |
| 791 | 0 | if( !ctuData.slice->isIntra() || cs.sps->getIBCFlag() ) | 792 | 0 | { | 793 | 0 | const UnitArea ctuArea = getCtuArea( cs, ctu, line, true ); | 794 | 0 | decLib.m_pcThreadResource[tid]->m_cCuDecoder.TaskDeriveCtuMotionInfo( cs, ctuRsAddr, ctuArea, param->common.perLineMiHist[line] ); | 795 | 0 | } | 796 | 0 | else | 797 | 0 | { | 798 | 0 | memset( NO_WARNING_class_memaccess( ctuData.motion ), MI_NOT_VALID, sizeof( MotionInfo ) * cs.pcv->num4x4CtuBlks ); | 799 | 0 | } | 800 | |
| 801 | 0 | thisCtuState = MIDER_cont; | 802 | 0 | } | 803 | | | 804 | 18.4E | thisCtuState = LF_INIT; | 805 | | | 806 | 18.4E | ITT_TASKEND( itt_domain_dec, itt_handle_mider ); | 807 | 18.4E | } | 808 | | | 809 | 18.4E | case LF_INIT: | 810 | 18.4E | { | 811 | 18.4E | if( onlyCheckReadyState ) | 812 | 0 | return true; | 813 | | | 814 | 18.4E | ITT_TASKSTART( itt_domain_dec, itt_handle_lfcl ); | 815 | | | 816 | 18.4E | for( int ctu = ctuStart; ctu < ctuEnd; ctu++ ) | 817 | 0 | { | 818 | 0 | const int ctuRsAddr = ctu + line * cs.pcv->widthInCtus; | 819 | 0 | CtuData& ctuData = cs.getCtuData( ctuRsAddr ); | 820 | 0 | ctuData.lfParam[0] = &decLib.m_loopFilterParam[cs.pcv->num4x4CtuBlks * ( 2 * ctuRsAddr + 0 )]; | 821 | 0 | ctuData.lfParam[1] = &decLib.m_loopFilterParam[cs.pcv->num4x4CtuBlks * ( 2 * ctuRsAddr + 1 )]; | 822 | 0 | memset( ctuData.lfParam[0], 0, sizeof( LoopFilterParam ) * 2 * cs.pcv->num4x4CtuBlks ); | 823 | |
| 824 | 0 | decLib.m_cLoopFilter.calcFilterStrengthsCTU( cs, ctuRsAddr ); | 825 | 0 | } | 826 | | | 827 | 18.4E | thisCtuState = INTER; | 828 | | | 829 | 18.4E | ITT_TASKEND( itt_domain_dec, itt_handle_lfcl ); | 830 | 18.4E | } | 831 | | | 832 | 19.0k | case INTER: | 833 | 19.0k | { | 834 | 19.0k | if( std::all_of( cs.picture->slices.begin(), cs.picture->slices.end(), []( const Slice* pcSlice ) { return pcSlice->isIntra(); } ) ) | 835 | 19.0k | { | 836 | | // not really necessary, but only for optimizing the wave-fronts | 837 | 19.0k | if( col > 1 && thisLine[col - 2] <= INTER ) | 838 | 18.1k | return false; | 839 | 908 | if( line > 0 && lineAbove[col] <= INTER ) | 840 | 655 | return false; | 841 | 908 | } | 842 | | | 843 | 265 | if( std::any_of( cs.picture->refPicExtDepBarriers.cbegin(), cs.picture->refPicExtDepBarriers.cend(), []( const Barrier* b ) { return b->isBlocked(); } ) ) | 844 | 0 | { | 845 | 0 | return false; | 846 | 0 | } | 847 | | | 848 | 265 | if( onlyCheckReadyState ) | 849 | 267 | return true; | 850 | | | 851 | 18.4E | ITT_TASKSTART( itt_domain_dec, itt_handle_inter ); | 852 | | | 853 | 18.4E | for( int ctu = ctuStart; ctu < ctuEnd; ctu++ ) | 854 | 0 | { | 855 | 0 | const int ctuRsAddr = ctu + line * cs.pcv->widthInCtus; | 856 | 0 | const UnitArea ctuArea = getCtuArea( cs, ctu, line, true ); | 857 | 0 | const CtuData& ctuData = cs.getCtuData( ctuRsAddr ); | 858 | |
| 859 | 0 | decLib.m_pcThreadResource[tid]->m_cCuDecoder.TaskTrafoCtu( cs, ctuRsAddr, ctuArea ); | 860 | |
| 861 | 0 | if( !ctuData.slice->isIntra() ) | 862 | 0 | { | 863 | 0 | decLib.m_pcThreadResource[tid]->m_cCuDecoder.TaskInterCtu( cs, ctuRsAddr, ctuArea ); | 864 | |
| 865 | 0 | if( cs.picture->stillReferenced ) | 866 | 0 | { | 867 | 0 | decLib.m_pcThreadResource[tid]->m_cCuDecoder.TaskFinishMotionInfo( cs, ctuRsAddr, ctu, line ); | 868 | 0 | } | 869 | 0 | } | 870 | 0 | } | 871 | | | 872 | 18.4E | thisCtuState = INTRA; | 873 | | | 874 | 18.4E | ITT_TASKEND( itt_domain_dec, itt_handle_inter ); | 875 | 18.4E | } | 876 | | | 877 | 53.6k | case INTRA: | 878 | 53.6k | { | 879 | 53.6k | if( col > 0 && thisLine[col - 1] <= INTRA_cont ) | 880 | 52.0k | return false; | 881 | | | 882 | 1.55k | if( line > 0 ) | 883 | 996 | { | 884 | 996 | if( col + 1 < tasksPerLine ) | 885 | 963 | { | 886 | 963 | if( lineAbove[col + 1] <= INTRA ) | 887 | 877 | return false; | 888 | 963 | } | 889 | 33 | else | 890 | 33 | { | 891 | 33 | if( lineAbove[col] <= INTRA_cont ) | 892 | 0 | return false; | 893 | 33 | } | 894 | 996 | } | 895 | 674 | if( onlyCheckReadyState ) | 896 | 675 | return true; | 897 | | | 898 | 18.4E | ITT_TASKSTART( itt_domain_dec, itt_handle_intra ); | 899 | | | 900 | 18.4E | for( int ctu = ctuStart; ctu < ctuEnd; ctu++ ) | 901 | 0 | { | 902 | 0 | const int ctuRsAddr = ctu + line * cs.pcv->widthInCtus; | 903 | 0 | const UnitArea ctuArea = getCtuArea( cs, ctu, line, true ); | 904 | 0 | decLib.m_pcThreadResource[tid]->m_cCuDecoder.TaskCriticalIntraKernel( cs, ctuRsAddr, ctuArea ); | 905 | |
| 906 | 0 | thisCtuState = INTRA_cont; | 907 | 0 | } | 908 | | | 909 | 18.4E | thisCtuState = RSP; | 910 | | | 911 | 18.4E | ITT_TASKEND( itt_domain_dec, itt_handle_intra ); | 912 | 18.4E | } | 913 | | | 914 | 508k | case RSP: | 915 | 508k | { | 916 | | // RIRZIIIII | 917 | | // IIIIIXXXX | 918 | | // | 919 | | // - Z can be reshaped when it is no more an intra prediction source for X in the next line | 920 | | | 921 | | | 922 | 508k | if ( line + 1 < heightInCtus && col + 1 < tasksPerLine && lineBelow[col + 1] < INTRA_cont ) | 923 | 416k | return false; | 924 | 91.6k | else if( line + 1 < heightInCtus && lineBelow[col] < RSP ) | 925 | 91.5k | return false; | 926 | 139 | else if( col + 1 < tasksPerLine && thisLine [col + 1] < INTRA_cont ) // need this for the last line | 927 | 17 | return false; | 928 | | | 929 | 2 | if( onlyCheckReadyState ) | 930 | 151 | return true; | 931 | | | 932 | 18.4E | ITT_TASKSTART( itt_domain_dec, itt_handle_rsp ); | 933 | | | 934 | 18.4E | for( int ctu = ctuStart; ctu < ctuEnd; ctu++ ) | 935 | 0 | { | 936 | 0 | decLib.m_pcThreadResource[tid]->m_cReshaper.rspCtuBcw( cs, ctu, line ); | 937 | 0 | } | 938 | | | 939 | 18.4E | ITT_TASKEND( itt_domain_dec, itt_handle_rsp ); | 940 | | | 941 | 18.4E | thisCtuState = LF_V; | 942 | 18.4E | } | 943 | | | 944 | 18.4E | case LF_V: | 945 | 18.4E | { | 946 | 18.4E | if( col > 0 && thisLine[col - 1] < LF_V ) | 947 | 21 | return false; | 948 | 18.4E | if( onlyCheckReadyState ) | 949 | 10 | return true; | 950 | | | 951 | 18.4E | ITT_TASKSTART( itt_domain_dec, itt_handle_lfl ); | 952 | | | 953 | 18.4E | for( int ctu = ctuStart; ctu < ctuEnd; ctu++ ) | 954 | 0 | { | 955 | 0 | decLib.m_cLoopFilter.loopFilterCTU( cs, MAX_NUM_CHANNEL_TYPE, ctu, line, EDGE_VER ); | 956 | |
| 957 | 0 | thisCtuState = LF_V_cont; | 958 | 0 | } | 959 | | | 960 | 18.4E | thisCtuState = LF_H; | 961 | | | 962 | 18.4E | ITT_TASKEND( itt_domain_dec, itt_handle_lfl ); | 963 | 18.4E | } | 964 | | | 965 | 7.47k | case LF_H: | 966 | 7.47k | { | 967 | 7.47k | if( line > 0 && lineAbove[col] < LF_H ) | 968 | 7 | return false; | 969 | | | 970 | 7.47k | if( line > 0 && col + 1 < tasksPerLine && lineAbove[col + 1] < LF_V_cont ) | 971 | 39 | return false; | 972 | | | 973 | 7.58k | if( col + 1 < tasksPerLine && thisLine[col + 1] < LF_V_cont ) | 974 | 7.51k | return false; | 975 | | | 976 | 18.4E | if( onlyCheckReadyState ) | 977 | 65 | return true; | 978 | | | 979 | 18.4E | ITT_TASKSTART( itt_domain_dec, itt_handle_lfl ); | 980 | | | 981 | 18.4E | for( int ctu = ctuStart; ctu < ctuEnd; ctu++ ) | 982 | 0 | { | 983 | 0 | decLib.m_cLoopFilter.loopFilterCTU( cs, MAX_NUM_CHANNEL_TYPE, ctu, line, EDGE_HOR ); | 984 | 0 | } | 985 | | | 986 | 18.4E | thisCtuState = PRESAO; | 987 | | | 988 | 18.4E | ITT_TASKEND( itt_domain_dec, itt_handle_lfl ); | 989 | 18.4E | } | 990 | | | 991 | 4.69k | case PRESAO: | 992 | 4.69k | { | 993 | | // only last CTU processes full line | 994 | 4.69k | if( col == tasksPerLine - 1 ) | 995 | 697 | { | 996 | 697 | if( line > 0 && lineAbove[col] <= PRESAO ) | 997 | 143 | return false; | 998 | | | 999 | 877 | for( int c = 0; c < tasksPerLine; ++c ) | 1000 | 873 | { | 1001 | 873 | if( thisLine[c] < PRESAO ) | 1002 | 0 | return false; | 1003 | | | 1004 | 873 | if( line + 1 < heightInCtus && lineBelow[c] < PRESAO ) | 1005 | 550 | return false; | 1006 | 873 | } | 1007 | 4 | if( onlyCheckReadyState ) | 1008 | 4 | return true; | 1009 | | | 1010 | 0 | ITT_TASKSTART( itt_domain_dec, itt_handle_presao ); | 1011 | |
| 1012 | 0 | if( cs.sps->getUseSAO() ) | 1013 | 0 | { | 1014 | 0 | decLib.m_cSAO.SAOPrepareCTULine( cs, getLineArea( cs, line, true ) ); | 1015 | 0 | } | 1016 | |
| 1017 | 0 | ITT_TASKEND( itt_domain_dec, itt_handle_presao ); | 1018 | 0 | } | 1019 | 4.00k | else if( thisLine[tasksPerLine - 1] <= PRESAO ) // wait for last CTU to finish PRESAO | 1020 | 4.14k | { | 1021 | 4.14k | return false; | 1022 | 4.14k | } | 1023 | 18.4E | if( onlyCheckReadyState ) | 1024 | 12 | return true; | 1025 | | | 1026 | 18.4E | thisCtuState = SAO; | 1027 | 18.4E | } | 1028 | | | 1029 | 18.4E | case SAO: | 1030 | 18.4E | { | 1031 | 18.4E | if( onlyCheckReadyState ) | 1032 | 0 | return true; | 1033 | | | 1034 | | // only last CTU processes full line | 1035 | 18.4E | if( cs.sps->getUseSAO() ) | 1036 | 0 | { | 1037 | 0 | ITT_TASKSTART( itt_domain_dec, itt_handle_sao ); | 1038 | |
| 1039 | 0 | for( int ctu = ctuStart; ctu < ctuEnd; ctu++ ) | 1040 | 0 | { | 1041 | 0 | const UnitArea ctuArea = getCtuArea( cs, ctu, line, true ); | 1042 | 0 | decLib.m_cSAO.SAOProcessCTU( cs, ctuArea ); | 1043 | 0 | } | 1044 | |
| 1045 | 0 | ITT_TASKEND( itt_domain_dec, itt_handle_sao ); | 1046 | 0 | } | 1047 | 18.4E | if( param->common.doALF ) | 1048 | 0 | { | 1049 | 0 | ITT_TASKSTART( itt_domain_dec, itt_handle_alf ); | 1050 | |
| 1051 | 0 | for( int ctu = ctuStart; ctu < ctuEnd; ctu++ ) | 1052 | 0 | { | 1053 | 0 | AdaptiveLoopFilter::prepareCTU( cs, ctu, line ); | 1054 | |
| 1055 | 0 | thisCtuState = SAO_cont; | 1056 | 0 | } | 1057 | |
| 1058 | 0 | ITT_TASKEND( itt_domain_dec, itt_handle_alf ); | 1059 | 0 | } | 1060 | | | 1061 | 18.4E | thisCtuState = ALF; | 1062 | 18.4E | } | 1063 | | | 1064 | 328 | case ALF: | 1065 | 328 | { | 1066 | 328 | if( param->common.doALF ) | 1067 | 480 | { | 1068 | 480 | const bool a = line > 0; | 1069 | 480 | const bool b = line + 1 < heightInCtus; | 1070 | 480 | const bool c = col > 0; | 1071 | 480 | const bool d = col + 1 < tasksPerLine; | 1072 | | | 1073 | 480 | if( a ) | 1074 | 98 | { | 1075 | 98 | if( c && lineAbove[col - 1] < ALF ) return false; | 1076 | 91 | if( lineAbove[col ] < ALF ) return false; | 1077 | 89 | if( d && lineAbove[col + 1] < SAO_cont ) return false; | 1078 | 89 | } | 1079 | | | 1080 | 471 | if( b ) | 1081 | 464 | { | 1082 | 464 | if( c && lineBelow[col - 1] < ALF ) return false; | 1083 | 71 | if( lineBelow[col ] < ALF ) return false; | 1084 | 18 | if( d && lineBelow[col + 1] < SAO_cont ) return false; | 1085 | 18 | } | 1086 | | | 1087 | 19 | if( c && thisLine[col - 1] < ALF ) return false; | 1088 | 18 | if( d && thisLine[col + 1] < SAO_cont ) return false; | 1089 | | | 1090 | 13 | if( onlyCheckReadyState ) | 1091 | 14 | return true; | 1092 | | | 1093 | 18.4E | ITT_TASKSTART( itt_domain_dec, itt_handle_alf ); | 1094 | 18.4E | for( int ctu = ctuStart; ctu < ctuEnd; ctu++ ) | 1095 | 0 | { | 1096 | 0 | decLib.m_cALF.processCTU( cs, ctu, line, tid ); | 1097 | 0 | } | 1098 | 18.4E | ITT_TASKEND( itt_domain_dec, itt_handle_alf ); | 1099 | 18.4E | } | 1100 | 18.4E | else if( onlyCheckReadyState ) | 1101 | 0 | return true; | 1102 | | | 1103 | 18.4E | thisCtuState = DONE; | 1104 | 18.4E | } | 1105 | | | 1106 | 18.4E | default: | 1107 | 18.4E | CHECKD( thisCtuState != DONE, "Wrong CTU state" ); | 1108 | 641k | } // end switch | 1109 | 641k | } | 1110 | 641k | catch( ... ) | 1111 | 641k | { | 1112 | 3.47k | std::rethrow_exception( std::current_exception() ); | 1113 | 3.47k | } | 1114 | | | 1115 | 0 | return true; | 1116 | 641k | } |
|
1117 | | |
1118 | | } |