/src/vvenc/source/Lib/EncoderLib/EncSlice.cpp
Line | Count | Source |
1 | | /* ----------------------------------------------------------------------------- |
2 | | The copyright in this software is being made available under the Clear BSD |
3 | | License, included below. No patent rights, trademark rights and/or |
4 | | other Intellectual Property Rights other than the copyrights concerning |
5 | | the Software are granted under this license. |
6 | | |
7 | | The Clear BSD License |
8 | | |
9 | | Copyright (c) 2019-2026, Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. & The VVenC Authors. |
10 | | All rights reserved. |
11 | | |
12 | | Redistribution and use in source and binary forms, with or without modification, |
13 | | are permitted (subject to the limitations in the disclaimer below) provided that |
14 | | the following conditions are met: |
15 | | |
16 | | * Redistributions of source code must retain the above copyright notice, |
17 | | this list of conditions and the following disclaimer. |
18 | | |
19 | | * Redistributions in binary form must reproduce the above copyright |
20 | | notice, this list of conditions and the following disclaimer in the |
21 | | documentation and/or other materials provided with the distribution. |
22 | | |
23 | | * Neither the name of the copyright holder nor the names of its |
24 | | contributors may be used to endorse or promote products derived from this |
25 | | software without specific prior written permission. |
26 | | |
27 | | NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY |
28 | | THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND |
29 | | CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
30 | | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A |
31 | | PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR |
32 | | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
33 | | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
34 | | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR |
35 | | BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER |
36 | | IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
37 | | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
38 | | POSSIBILITY OF SUCH DAMAGE. |
39 | | |
40 | | |
41 | | ------------------------------------------------------------------------------------------- */ |
42 | | |
43 | | |
44 | | /** \file EncSlice.cpp |
45 | | \brief slice encoder class |
46 | | */ |
47 | | |
48 | | #include "EncSlice.h" |
49 | | #include "EncStage.h" |
50 | | #include "EncLib.h" |
51 | | #include "EncPicture.h" |
52 | | #include "BitAllocation.h" |
53 | | #include "CommonLib/UnitTools.h" |
54 | | #include "CommonLib/Picture.h" |
55 | | #include "CommonLib/TimeProfiler.h" |
56 | | #include "CommonLib/dtrace_codingstruct.h" |
57 | | #include "Utilities/NoMallocThreadPool.h" |
58 | | |
59 | | #include <math.h> |
60 | | #include "vvenc/vvencCfg.h" |
61 | | |
62 | | //! \ingroup EncoderLib |
63 | | //! \{ |
64 | | |
65 | | namespace vvenc { |
66 | | |
67 | | #ifdef TRACE_ENABLE_ITT |
68 | | static const __itt_domain* itt_domain_encode = __itt_domain_create( "Encode" ); |
69 | | static const __itt_string_handle* itt_handle_ctuEncode = __itt_string_handle_create( "Encode_CTU" ); |
70 | | static const __itt_string_handle* itt_handle_rspLfVer = __itt_string_handle_create( "RspLfVer_CTU" ); |
71 | | static const __itt_string_handle* itt_handle_lfHor = __itt_string_handle_create( "LfHor_CTU" ); |
72 | | static const __itt_string_handle* itt_handle_sao = __itt_string_handle_create( "SAO_CTU" ); |
73 | | static const __itt_string_handle* itt_handle_alf_stat = __itt_string_handle_create( "ALF_CTU_STAT" ); |
74 | | static const __itt_string_handle* itt_handle_alf_derive = __itt_string_handle_create( "ALF_DERIVE" ); |
75 | | static const __itt_string_handle* itt_handle_alf_recon = __itt_string_handle_create( "ALF_RECONSTRUCT" ); |
76 | | static const __itt_string_handle* itt_handle_ccalf_stat = __itt_string_handle_create( "CCALF_CTU_STAT" ); |
77 | | static const __itt_string_handle* itt_handle_ccalf_derive = __itt_string_handle_create( "CCALF_DERIVE" ); |
78 | | static const __itt_string_handle* itt_handle_ccalf_recon = __itt_string_handle_create( "CCALF_RECONSTRUCT" ); |
79 | | #endif |
80 | | |
81 | | void setArbitraryWppPattern( const PreCalcValues& pcv, std::vector<int>& ctuAddrMap, int stepX = 1 ) |
82 | 0 | { |
83 | 0 | ctuAddrMap.resize( pcv.sizeInCtus, 0 ); |
84 | 0 | std::vector<int> x_in_line( pcv.heightInCtus, 0 ); |
85 | 0 | int x = 0, y = 0, addr = 0; |
86 | 0 | int y_top = 0; |
87 | 0 | const int step = stepX; // number of CTUs in x-direction to scan |
88 | 0 | ctuAddrMap[addr++] = x++; // first entry (can be omitted) |
89 | 0 | while( addr < pcv.sizeInCtus ) |
90 | 0 | { |
91 | | // fill entries in x-direction |
92 | 0 | int x1 = x; |
93 | 0 | while( x < std::min(x1 + step, (int)pcv.widthInCtus) ) |
94 | 0 | { |
95 | | // general WPP condition (top-right CTU availability) |
96 | 0 | if( y > 0 && !( x_in_line[y - 1] - x >= 2 ) && x != pcv.widthInCtus - 1 ) |
97 | 0 | break; |
98 | 0 | ctuAddrMap[addr++] = y*pcv.widthInCtus + x; |
99 | 0 | x++; |
100 | 0 | } |
101 | 0 | x_in_line[y] = x; |
102 | | |
103 | 0 | y += 1; |
104 | |
|
105 | 0 | if( y >= pcv.heightInCtus ) |
106 | 0 | { |
107 | | // go up |
108 | 0 | if( x_in_line[y_top] >= pcv.widthInCtus ) |
109 | 0 | { |
110 | 0 | y_top++; |
111 | 0 | if( y_top >= pcv.heightInCtus ) |
112 | 0 | { |
113 | | // done |
114 | 0 | break; |
115 | 0 | } |
116 | 0 | } |
117 | 0 | y = y_top; |
118 | 0 | } |
119 | 0 | x = x_in_line[y]; |
120 | |
|
121 | 0 | CHECK( y >= pcv.heightInCtus, "Height in CTUs is exceeded" ); |
122 | 0 | } |
123 | 0 | } |
124 | | |
125 | | struct TileLineEncRsrc |
126 | | { |
127 | | BitEstimator m_BitEstimator; |
128 | | CABACWriter m_CABACEstimator; |
129 | | BitEstimator m_SaoBitEstimator; |
130 | | CABACWriter m_SaoCABACEstimator; |
131 | | BitEstimator m_AlfBitEstimator; |
132 | | CABACWriter m_AlfCABACEstimator; |
133 | | ReuseUniMv m_ReuseUniMv; |
134 | | BlkUniMvInfoBuffer m_BlkUniMvInfoBuffer; |
135 | | AffineProfList m_AffineProfList; |
136 | | IbcBvCand m_CachedBvs; |
137 | | EncSampleAdaptiveOffset m_encSao; |
138 | | int m_prevQp[ MAX_NUM_CH ]; |
139 | 0 | TileLineEncRsrc( const VVEncCfg& encCfg ) : m_CABACEstimator( m_BitEstimator ), m_SaoCABACEstimator( m_SaoBitEstimator ), m_AlfCABACEstimator( m_AlfBitEstimator ) { m_AffineProfList.init( ! encCfg.m_picReordering ); } |
140 | | }; |
141 | | |
142 | | struct PerThreadRsrc |
143 | | { |
144 | | CtxCache m_CtxCache; |
145 | | EncCu m_encCu; |
146 | | PelStorage m_alfTempCtuBuf; |
147 | | }; |
148 | | |
149 | | struct CtuEncParam |
150 | | { |
151 | | Picture* pic; |
152 | | EncSlice* encSlice; |
153 | | int ctuRsAddr; |
154 | | int ctuPosX; |
155 | | int ctuPosY; |
156 | | UnitArea ctuArea; |
157 | | int tileLineResIdx; |
158 | | |
159 | 0 | CtuEncParam() : pic( nullptr ), encSlice( nullptr ), ctuRsAddr( 0 ), ctuPosX( 0 ), ctuPosY( 0 ), ctuArea(), tileLineResIdx( 0 ) {} |
160 | | CtuEncParam( Picture* _p, EncSlice* _s, const int _r, const int _x, const int _y, const int _tileLineResIdx ) |
161 | | : pic( _p ) |
162 | | , encSlice( _s ) |
163 | | , ctuRsAddr( _r ) |
164 | | , ctuPosX( _x ) |
165 | | , ctuPosY( _y ) |
166 | | , ctuArea( pic->chromaFormat, pic->slices[0]->pps->pcv->getCtuArea( _x, _y ) ) |
167 | 0 | , tileLineResIdx( _tileLineResIdx ) {} |
168 | | }; |
169 | | |
170 | | // ==================================================================================================================== |
171 | | // Constructor / destructor / create / destroy |
172 | | // ==================================================================================================================== |
173 | | |
174 | | EncSlice::EncSlice() |
175 | 0 | : m_pcEncCfg ( nullptr) |
176 | 0 | , m_threadPool ( nullptr ) |
177 | 0 | , m_ctuTasksDoneCounter( nullptr ) |
178 | 0 | , m_ctuEncDelay ( 1 ) |
179 | 0 | , m_pLoopFilter ( nullptr ) |
180 | 0 | , m_pALF ( nullptr ) |
181 | 0 | , m_pcRateCtrl ( nullptr ) |
182 | 0 | , m_CABACWriter ( m_BinEncoder ) |
183 | 0 | , m_encCABACTableIdx ( VVENC_I_SLICE ) |
184 | 0 | { |
185 | 0 | } |
186 | | |
187 | | |
188 | | EncSlice::~EncSlice() |
189 | 0 | { |
190 | 0 | for( auto* lnRsc : m_TileLineEncRsrc ) |
191 | 0 | { |
192 | 0 | delete lnRsc; |
193 | 0 | } |
194 | 0 | m_TileLineEncRsrc.clear(); |
195 | |
|
196 | 0 | for( auto* taskRsc: m_ThreadRsrc ) |
197 | 0 | { |
198 | 0 | taskRsc->m_alfTempCtuBuf.destroy(); |
199 | 0 | delete taskRsc; |
200 | 0 | } |
201 | 0 | m_ThreadRsrc.clear(); |
202 | |
|
203 | 0 | m_saoReconParams.clear(); |
204 | |
|
205 | 0 | for( int i = 0; i < m_saoStatData.size(); i++ ) |
206 | 0 | { |
207 | 0 | for( int compIdx = 0; compIdx < MAX_NUM_COMP; compIdx++ ) |
208 | 0 | { |
209 | 0 | delete[] m_saoStatData[ i ][ compIdx ]; |
210 | 0 | } |
211 | 0 | delete[] m_saoStatData[ i ]; |
212 | 0 | } |
213 | 0 | m_saoStatData.clear(); |
214 | 0 | } |
215 | | |
216 | | void EncSlice::init( const VVEncCfg& encCfg, |
217 | | const SPS& sps, |
218 | | const PPS& pps, |
219 | | std::vector<int>* const globalCtuQpVector, |
220 | | LoopFilter& loopFilter, |
221 | | EncAdaptiveLoopFilter& alf, |
222 | | RateCtrl& rateCtrl, |
223 | | NoMallocThreadPool* threadPool, |
224 | | WaitCounter* ctuTasksDoneCounter ) |
225 | 0 | { |
226 | 0 | m_pcEncCfg = &encCfg; |
227 | 0 | m_pLoopFilter = &loopFilter; |
228 | 0 | m_pALF = &alf; |
229 | 0 | m_pcRateCtrl = &rateCtrl; |
230 | 0 | m_threadPool = threadPool; |
231 | 0 | m_ctuTasksDoneCounter = ctuTasksDoneCounter; |
232 | 0 | m_syncPicCtx.resize( encCfg.m_entropyCodingSyncEnabled ? pps.getNumTileLineIds() : 0 ); |
233 | | |
234 | | |
235 | 0 | const int maxCntRscr = ( encCfg.m_numThreads > 0 ) ? pps.getNumTileLineIds() : 1; |
236 | 0 | const int maxCtuEnc = ( encCfg.m_numThreads > 0 && threadPool ) ? threadPool->numThreads() : 1; |
237 | |
|
238 | 0 | m_ThreadRsrc.resize( maxCtuEnc, nullptr ); |
239 | 0 | m_TileLineEncRsrc.resize( maxCntRscr, nullptr ); |
240 | |
|
241 | 0 | for( PerThreadRsrc*& taskRsc : m_ThreadRsrc ) |
242 | 0 | { |
243 | 0 | taskRsc = new PerThreadRsrc(); |
244 | 0 | taskRsc->m_encCu.init( encCfg, |
245 | 0 | sps, |
246 | 0 | globalCtuQpVector, |
247 | 0 | m_syncPicCtx.data(), |
248 | 0 | &rateCtrl ); |
249 | 0 | taskRsc->m_alfTempCtuBuf.create( pps.pcv->chrFormat, Area( 0, 0, pps.pcv->maxCUSize + (MAX_ALF_PADDING_SIZE << 1), pps.pcv->maxCUSize + (MAX_ALF_PADDING_SIZE << 1) ), pps.pcv->maxCUSize, MAX_ALF_PADDING_SIZE, 0, false ); |
250 | 0 | } |
251 | |
|
252 | 0 | for( TileLineEncRsrc*& lnRsc : m_TileLineEncRsrc ) |
253 | 0 | { |
254 | 0 | lnRsc = new TileLineEncRsrc( encCfg ); |
255 | 0 | if( sps.saoEnabled ) |
256 | 0 | { |
257 | 0 | lnRsc->m_encSao.init( encCfg ); |
258 | 0 | } |
259 | 0 | } |
260 | |
|
261 | 0 | const int sizeInCtus = pps.pcv->sizeInCtus; |
262 | 0 | m_processStates = std::vector<ProcessCtuState>( sizeInCtus ); |
263 | 0 | m_saoReconParams.resize( sizeInCtus ); |
264 | |
|
265 | 0 | ::memset( m_saoDisabledRate, 0, sizeof( m_saoDisabledRate ) ); |
266 | | |
267 | | // sao statistics |
268 | 0 | if( encCfg.m_bUseSAO ) |
269 | 0 | { |
270 | 0 | m_saoStatData.resize( sizeInCtus ); |
271 | 0 | for( int i = 0; i < sizeInCtus; i++ ) |
272 | 0 | { |
273 | 0 | m_saoStatData[ i ] = new SAOStatData*[ MAX_NUM_COMP ]; |
274 | 0 | for( int compIdx = 0; compIdx < MAX_NUM_COMP; compIdx++ ) |
275 | 0 | { |
276 | 0 | m_saoStatData[ i ][ compIdx ] = new SAOStatData[ NUM_SAO_NEW_TYPES ]; |
277 | 0 | } |
278 | 0 | } |
279 | 0 | } |
280 | 0 | ctuEncParams.resize( sizeInCtus ); |
281 | 0 | setArbitraryWppPattern( *pps.pcv, m_ctuAddrMap, 3 ); |
282 | |
|
283 | 0 | const unsigned asuHeightInCtus = m_pALF->getAsuHeightInCtus(); |
284 | 0 | const unsigned numDeriveLines = encCfg.m_ifpLines ? |
285 | 0 | std::min( ((encCfg.m_ifpLines & (~(asuHeightInCtus - 1))) + asuHeightInCtus), pps.pcv->heightInCtus ) : pps.pcv->heightInCtus; |
286 | 0 | m_alfDeriveCtu = numDeriveLines * pps.pcv->widthInCtus - 1; |
287 | 0 | m_ccalfDeriveCtu = encCfg.m_ifpLines ? pps.pcv->widthInCtus * std::min((unsigned)encCfg.m_ifpLines + 1, pps.pcv->heightInCtus) - 1: pps.pcv->sizeInCtus - 1; |
288 | 0 | } |
289 | | |
290 | | |
291 | | void EncSlice::initPic( Picture* pic ) |
292 | 0 | { |
293 | 0 | Slice* slice = pic->cs->slice; |
294 | |
|
295 | 0 | if( slice->pps->numTileCols * slice->pps->numTileRows > 1 ) |
296 | 0 | { |
297 | 0 | slice->sliceMap = slice->pps->sliceMap[0]; |
298 | 0 | } |
299 | 0 | else |
300 | 0 | { |
301 | 0 | slice->sliceMap.addCtusToSlice( 0, pic->cs->pcv->widthInCtus, 0, pic->cs->pcv->heightInCtus, pic->cs->pcv->widthInCtus); |
302 | 0 | } |
303 | | |
304 | | // this ensures that independently encoded bitstream chunks can be combined to bit-equal |
305 | 0 | const SliceType cabacTableIdx = ! slice->pps->cabacInitPresent || slice->pendingRasInit ? slice->sliceType : m_encCABACTableIdx; |
306 | 0 | slice->encCABACTableIdx = cabacTableIdx; |
307 | | |
308 | | // set QP and lambda values |
309 | 0 | xInitSliceLambdaQP( slice ); |
310 | |
|
311 | 0 | for( auto* thrRsc : m_ThreadRsrc ) |
312 | 0 | { |
313 | 0 | thrRsc->m_encCu.initPic( pic ); |
314 | 0 | } |
315 | |
|
316 | 0 | for( auto* lnRsc : m_TileLineEncRsrc ) |
317 | 0 | { |
318 | 0 | lnRsc->m_ReuseUniMv.resetReusedUniMvs(); |
319 | 0 | } |
320 | |
|
321 | 0 | m_ctuEncDelay = 1; |
322 | 0 | if( pic->useIBC ) |
323 | 0 | { |
324 | | // IBC needs unfiltered samples up to max IBC search range |
325 | | // therefore ensure that numCtuDelayLUT CTU's have been enocded first |
326 | | // assuming IBC localSearchRangeX / Y = 128 |
327 | 0 | const int numCtuDelayLUT[ 3 ] = { 15, 3, 1 }; |
328 | 0 | CHECK( pic->cs->pcv->maxCUSizeLog2 < 5 || pic->cs->pcv->maxCUSizeLog2 > 7, "invalid max CTUSize" ); |
329 | 0 | m_ctuEncDelay = numCtuDelayLUT[ pic->cs->pcv->maxCUSizeLog2 - 5 ]; |
330 | 0 | } |
331 | 0 | } |
332 | | |
333 | | |
334 | | |
335 | | void EncSlice::xInitSliceLambdaQP( Slice* slice ) |
336 | 0 | { |
337 | | // pre-compute lambda and QP |
338 | 0 | const bool rcp = (m_pcEncCfg->m_RCTargetBitrate > 0 && slice->pic->picInitialQP >= 0); // 2nd pass |
339 | 0 | int iQP = Clip3 (-slice->sps->qpBDOffset[CH_L], MAX_QP, slice->pic->picInitialQP); // RC start QP |
340 | 0 | double dQP = (rcp ? (double) slice->pic->picInitialQP : xGetQPForPicture (slice)); |
341 | 0 | double dLambda = (rcp ? slice->pic->picInitialLambda : xCalculateLambda (slice, slice->TLayer, dQP, dQP, iQP)); |
342 | 0 | int sliceChromaQpOffsetIntraOrPeriodic[2] = { m_pcEncCfg->m_sliceChromaQpOffsetIntraOrPeriodic[0], m_pcEncCfg->m_sliceChromaQpOffsetIntraOrPeriodic[1] }; |
343 | 0 | const int lookAheadRCCQpOffset = 0; // was (m_pcEncCfg->m_RCTargetBitrate > 0 && m_pcEncCfg->m_LookAhead && CS::isDualITree (*slice->pic->cs) ? 1 : 0); |
344 | 0 | int cbQP = 0, crQP = 0, cbCrQP = 0; |
345 | |
|
346 | 0 | if (m_pcEncCfg->m_usePerceptQPA) // adapt sliceChromaQpOffsetIntraOrPeriodic and pic->ctuAdaptedQP |
347 | 0 | { |
348 | 0 | const bool cqp = (slice->isIntra() && !slice->sps->IBC) || (m_pcEncCfg->m_sliceChromaQpOffsetPeriodicity > 0 && (slice->poc % m_pcEncCfg->m_sliceChromaQpOffsetPeriodicity) == 0); |
349 | 0 | const uint32_t startCtuTsAddr = slice->sliceMap.ctuAddrInSlice[0]; |
350 | 0 | const uint32_t boundingCtuTsAddr = slice->pic->cs->pcv->sizeInCtus; |
351 | |
|
352 | 0 | if ((iQP = BitAllocation::applyQPAdaptationSlice (slice, m_pcEncCfg, iQP, dLambda, &slice->pic->picVA.visAct, // updates pic->picInitialQP |
353 | 0 | *m_ThreadRsrc[0]->m_encCu.getQpPtr(), m_pcRateCtrl->getIntraPQPAStats(), |
354 | 0 | (slice->pps->sliceChromaQpFlag && cqp ? sliceChromaQpOffsetIntraOrPeriodic : nullptr), |
355 | 0 | m_pcRateCtrl->getMinNoiseLevels(), startCtuTsAddr, boundingCtuTsAddr)) >= 0) // QP OK? |
356 | 0 | { |
357 | 0 | dLambda *= pow (2.0, ((double) iQP - dQP) / 3.0); // adjust lambda based on change of slice QP |
358 | 0 | } |
359 | 0 | else iQP = (int) dQP; // revert to unadapted slice QP |
360 | 0 | } |
361 | 0 | else if (rcp) |
362 | 0 | { |
363 | 0 | slice->pic->picInitialQP = -1; // no QPA - unused now |
364 | 0 | } |
365 | |
|
366 | 0 | if (slice->pps->sliceChromaQpFlag && CS::isDualITree (*slice->pic->cs) && !m_pcEncCfg->m_usePerceptQPA && (m_pcEncCfg->m_sliceChromaQpOffsetPeriodicity == 0)) |
367 | 0 | { |
368 | 0 | cbQP = m_pcEncCfg->m_chromaCbQpOffsetDualTree + lookAheadRCCQpOffset; // QP offset for dual-tree |
369 | 0 | crQP = m_pcEncCfg->m_chromaCrQpOffsetDualTree + lookAheadRCCQpOffset; |
370 | 0 | cbCrQP = m_pcEncCfg->m_chromaCbCrQpOffsetDualTree + lookAheadRCCQpOffset; |
371 | 0 | } |
372 | 0 | else if (slice->pps->sliceChromaQpFlag) |
373 | 0 | { |
374 | 0 | const GOPEntry &gopEntry = *(slice->pic->gopEntry); |
375 | 0 | const bool bUseIntraOrPeriodicOffset = (slice->isIntra() && !slice->sps->IBC) || (m_pcEncCfg->m_sliceChromaQpOffsetPeriodicity > 0 && (slice->poc % m_pcEncCfg->m_sliceChromaQpOffsetPeriodicity) == 0); |
376 | |
|
377 | 0 | cbQP = (bUseIntraOrPeriodicOffset ? sliceChromaQpOffsetIntraOrPeriodic[0] : gopEntry.m_CbQPoffset) + lookAheadRCCQpOffset; |
378 | 0 | crQP = (bUseIntraOrPeriodicOffset ? sliceChromaQpOffsetIntraOrPeriodic[1] : gopEntry.m_CrQPoffset) + lookAheadRCCQpOffset; |
379 | 0 | cbCrQP = (cbQP + crQP) >> 1; // use floor of average CbCr chroma QP offset for joint-CbCr coding |
380 | |
|
381 | 0 | cbQP = Clip3 (-12, 12, cbQP + slice->pps->chromaQpOffset[COMP_Cb]) - slice->pps->chromaQpOffset[COMP_Cb]; |
382 | 0 | crQP = Clip3 (-12, 12, crQP + slice->pps->chromaQpOffset[COMP_Cr]) - slice->pps->chromaQpOffset[COMP_Cr]; |
383 | 0 | cbCrQP = Clip3 (-12, 12, cbCrQP + slice->pps->chromaQpOffset[COMP_JOINT_CbCr]) - slice->pps->chromaQpOffset[COMP_JOINT_CbCr]; |
384 | 0 | } |
385 | |
|
386 | 0 | slice->sliceChromaQpDelta[COMP_Cb] = Clip3 (-12, 12, cbQP); |
387 | 0 | slice->sliceChromaQpDelta[COMP_Cr] = Clip3 (-12, 12, crQP); |
388 | 0 | slice->sliceChromaQpDelta[COMP_JOINT_CbCr] = (slice->sps->jointCbCr ? Clip3 (-12, 12, cbCrQP) : 0); |
389 | |
|
390 | 0 | for( auto& thrRsc : m_ThreadRsrc ) |
391 | 0 | { |
392 | 0 | thrRsc->m_encCu.setUpLambda( *slice, dLambda, iQP, true, true ); |
393 | 0 | } |
394 | |
|
395 | 0 | slice->sliceQp = iQP; |
396 | 0 | slice->chromaQpAdjEnabled = slice->pps->chromaQpOffsetListLen > 0; |
397 | 0 | } |
398 | | |
399 | | static const int highTL[6] = { -1, 0, 0, 2, 4, 5 }; |
400 | | |
401 | | int EncSlice::xGetQPForPicture( const Slice* slice ) |
402 | 0 | { |
403 | 0 | const int lumaQpBDOffset = slice->sps->qpBDOffset[ CH_L ]; |
404 | 0 | int qp; |
405 | |
|
406 | 0 | if ( m_pcEncCfg->m_costMode == VVENC_COST_LOSSLESS_CODING ) |
407 | 0 | { |
408 | 0 | qp = LOSSLESS_AND_MIXED_LOSSLESS_RD_COST_TEST_QP; |
409 | 0 | } |
410 | 0 | else |
411 | 0 | { |
412 | 0 | qp = m_pcEncCfg->m_QP + slice->pic->gopAdaptedQP; |
413 | |
|
414 | 0 | if (m_pcEncCfg->m_usePerceptQPA) |
415 | 0 | { |
416 | 0 | const int tlayer = slice->pic->gopEntry->m_vtl; |
417 | |
|
418 | 0 | qp = (slice->isIntra() ? std::min (qp, ((qp - std::min (3, floorLog2 (m_pcEncCfg->m_GOPSize) - 4/*TODO 3 with JVET-AC0149?*/)) * 15 + 3) >> 4) : highTL[tlayer] + ((qp * (16 + std::min (2, tlayer))) >> 4) + 0/*TODO +-1?*/); |
419 | 0 | } |
420 | 0 | else if( slice->isIntra() ) |
421 | 0 | { |
422 | 0 | qp += m_pcEncCfg->m_intraQPOffset; |
423 | 0 | } |
424 | 0 | else |
425 | 0 | { |
426 | 0 | if( qp != -lumaQpBDOffset ) |
427 | 0 | { |
428 | 0 | const GOPEntry &gopEntry = *(slice->pic->gopEntry); |
429 | | // adjust QP according to the QP offset for the GOP entry. |
430 | 0 | qp += gopEntry.m_QPOffset; |
431 | | |
432 | | // adjust QP according to QPOffsetModel for the GOP entry. |
433 | 0 | double dqpOffset = qp * gopEntry.m_QPOffsetModelScale + gopEntry.m_QPOffsetModelOffset + 0.5; |
434 | 0 | int qpOffset = (int)floor( Clip3<double>( 0.0, 3.0, dqpOffset ) ); |
435 | 0 | qp += qpOffset; |
436 | 0 | } |
437 | 0 | } |
438 | |
|
439 | 0 | if( m_pcEncCfg->m_blockImportanceMapping && !slice->pic->m_picShared->m_ctuBimQpOffset.empty() ) |
440 | 0 | { |
441 | 0 | qp += slice->pic->m_picShared->m_picAuxQpOffset; |
442 | 0 | } |
443 | 0 | } |
444 | 0 | qp = Clip3( -lumaQpBDOffset, MAX_QP, qp ); |
445 | 0 | return qp; |
446 | 0 | } |
447 | | |
448 | | |
449 | | double EncSlice::xCalculateLambda( const Slice* slice, |
450 | | const int depth, // slice GOP hierarchical depth. |
451 | | const double refQP, // initial slice-level QP |
452 | | const double dQP, // initial double-precision QP |
453 | | int& iQP ) // returned integer QP. |
454 | 0 | { |
455 | 0 | const GOPEntry &gopEntry = *(slice->pic->gopEntry); |
456 | 0 | const int SHIFT_QP = 12; |
457 | 0 | const int temporalId = gopEntry.m_temporalId; |
458 | 0 | std::vector<double> intraLambdaModifiers; |
459 | 0 | for ( int i = 0; i < VVENC_MAX_TLAYER; i++ ) |
460 | 0 | { |
461 | 0 | if( m_pcEncCfg->m_adIntraLambdaModifier[i] != 0.0 ) intraLambdaModifiers.push_back( m_pcEncCfg->m_adIntraLambdaModifier[i] ); |
462 | 0 | else break; |
463 | 0 | } |
464 | |
|
465 | 0 | int bitdepth_luma_qp_scale = 6 |
466 | 0 | * (slice->sps->bitDepths[ CH_L ] - 8 |
467 | 0 | - DISTORTION_PRECISION_ADJUSTMENT(slice->sps->bitDepths[ CH_L ])); |
468 | 0 | double qp_temp = dQP + bitdepth_luma_qp_scale - SHIFT_QP; |
469 | | // Case #1: I or P-slices (key-frame) |
470 | 0 | double dQPFactor = gopEntry.m_QPFactor; |
471 | 0 | if( slice->sliceType == VVENC_I_SLICE ) |
472 | 0 | { |
473 | 0 | if (m_pcEncCfg->m_dIntraQpFactor>=0.0 && gopEntry.m_sliceType != 'I') |
474 | 0 | { |
475 | 0 | dQPFactor = m_pcEncCfg->m_dIntraQpFactor; |
476 | 0 | } |
477 | 0 | else |
478 | 0 | { |
479 | 0 | dQPFactor = 0.57; |
480 | 0 | if( ! m_pcEncCfg->m_lambdaFromQPEnable ) |
481 | 0 | { |
482 | 0 | const int NumberBFrames = ( m_pcEncCfg->m_GOPSize - 1 ); |
483 | 0 | const double dLambda_scale = 1.0 - Clip3( 0.0, 0.5, 0.05 * (double)NumberBFrames ); |
484 | 0 | dQPFactor *= dLambda_scale; |
485 | 0 | } |
486 | 0 | } |
487 | 0 | } |
488 | 0 | else if( m_pcEncCfg->m_lambdaFromQPEnable ) |
489 | 0 | { |
490 | 0 | dQPFactor=0.57; |
491 | 0 | } |
492 | |
|
493 | 0 | double dLambda = dQPFactor*pow( 2.0, qp_temp/3.0 ); |
494 | |
|
495 | 0 | if( !(m_pcEncCfg->m_lambdaFromQPEnable) && depth>0 ) |
496 | 0 | { |
497 | 0 | double qp_temp_ref = refQP + bitdepth_luma_qp_scale - SHIFT_QP; |
498 | 0 | dLambda *= Clip3(2.00, 4.00, (qp_temp_ref / 6.0)); // (j == B_SLICE && p_cur_frm->layer != 0 ) |
499 | 0 | } |
500 | | |
501 | | // if hadamard is used in ME process |
502 | 0 | if ( !m_pcEncCfg->m_bUseHADME && slice->sliceType != VVENC_I_SLICE ) |
503 | 0 | { |
504 | 0 | dLambda *= 0.95; |
505 | 0 | } |
506 | |
|
507 | 0 | double lambdaModifier; |
508 | 0 | if( slice->sliceType != VVENC_I_SLICE || intraLambdaModifiers.empty()) |
509 | 0 | { |
510 | 0 | lambdaModifier = m_pcEncCfg->m_adLambdaModifier[ temporalId ]; |
511 | 0 | } |
512 | 0 | else |
513 | 0 | { |
514 | 0 | lambdaModifier = intraLambdaModifiers[ (temporalId < intraLambdaModifiers.size()) ? temporalId : (intraLambdaModifiers.size()-1) ]; |
515 | 0 | } |
516 | 0 | dLambda *= lambdaModifier; |
517 | |
|
518 | 0 | iQP = Clip3( -slice->sps->qpBDOffset[ CH_L ], MAX_QP, (int) floor( dQP + 0.5 ) ); |
519 | |
|
520 | 0 | if( m_pcEncCfg->m_DepQuantEnabled ) |
521 | 0 | { |
522 | 0 | dLambda *= pow( 2.0, 0.25/3.0 ); // slight lambda adjustment for dependent quantization (due to different slope of quantizer) |
523 | 0 | } |
524 | | |
525 | | // NOTE: the lambda modifiers that are sometimes applied later might be best always applied in here. |
526 | 0 | return dLambda; |
527 | 0 | } |
528 | | |
529 | | |
530 | | // ==================================================================================================================== |
531 | | // Public member functions |
532 | | // ==================================================================================================================== |
533 | | |
534 | | |
535 | | /** \param pic picture class |
536 | | */ |
537 | | void EncSlice::compressSlice( Picture* pic ) |
538 | 0 | { |
539 | 0 | PROFILER_SCOPE_AND_STAGE( 1, g_timeProfiler, P_COMPRESS_SLICE ); |
540 | 0 | CodingStructure& cs = *pic->cs; |
541 | 0 | Slice* const slice = cs.slice; |
542 | 0 | uint32_t startCtuTsAddr = slice->sliceMap.ctuAddrInSlice[0]; |
543 | 0 | uint32_t boundingCtuTsAddr = pic->cs->pcv->sizeInCtus; |
544 | |
|
545 | 0 | cs.pcv = slice->pps->pcv; |
546 | 0 | cs.fracBits = 0; |
547 | |
|
548 | 0 | if( startCtuTsAddr == 0 ) |
549 | 0 | { |
550 | 0 | cs.initStructData( slice->sliceQp ); |
551 | 0 | } |
552 | |
|
553 | 0 | for( auto* thrRsrc : m_ThreadRsrc ) |
554 | 0 | { |
555 | 0 | thrRsrc->m_encCu.initSlice( slice ); |
556 | 0 | } |
557 | |
|
558 | 0 | for( auto* lnRsrc : m_TileLineEncRsrc ) |
559 | 0 | { |
560 | 0 | lnRsrc->m_CABACEstimator .initCtxModels( *slice ); |
561 | 0 | lnRsrc->m_SaoCABACEstimator .initCtxModels( *slice ); |
562 | 0 | lnRsrc->m_AlfCABACEstimator .initCtxModels( *slice ); |
563 | 0 | lnRsrc->m_AffineProfList .resetAffineMVList(); |
564 | 0 | lnRsrc->m_BlkUniMvInfoBuffer.resetUniMvList(); |
565 | 0 | lnRsrc->m_CachedBvs .resetIbcBvCand(); |
566 | |
|
567 | 0 | if( slice->sps->saoEnabled && pic->useSAO ) |
568 | 0 | { |
569 | 0 | lnRsrc->m_encSao .initSlice( slice ); |
570 | 0 | } |
571 | 0 | } |
572 | |
|
573 | 0 | if( slice->sps->fpelMmvd && !slice->picHeader->disFracMMVD ) |
574 | 0 | { |
575 | 0 | slice->picHeader->disFracMMVD = ( pic->lwidth() * pic->lheight() > 1920 * 1080 ) ? true : false; |
576 | 0 | } |
577 | |
|
578 | 0 | xProcessCtus( pic, startCtuTsAddr, boundingCtuTsAddr ); |
579 | 0 | } |
580 | | |
581 | | void setJointCbCrModes( CodingStructure& cs, const Position topLeftLuma, const Size sizeLuma ) |
582 | 0 | { |
583 | 0 | bool sgnFlag = true; |
584 | |
|
585 | 0 | if( isChromaEnabled( cs.picture->chromaFormat) ) |
586 | 0 | { |
587 | 0 | const CompArea cbArea = CompArea( COMP_Cb, cs.picture->chromaFormat, Area(topLeftLuma,sizeLuma), true ); |
588 | 0 | const CompArea crArea = CompArea( COMP_Cr, cs.picture->chromaFormat, Area(topLeftLuma,sizeLuma), true ); |
589 | |
|
590 | 0 | const CPelBuf orgCb = cs.picture->getFilteredOrigBuffer().valid() ? cs.picture->getRspOrigBuf( cbArea ): cs.picture->getOrigBuf( cbArea ); |
591 | 0 | const CPelBuf orgCr = cs.picture->getFilteredOrigBuffer().valid() ? cs.picture->getRspOrigBuf( crArea ): cs.picture->getOrigBuf( crArea ); |
592 | 0 | const int x0 = ( cbArea.x > 0 ? 0 : 1 ); |
593 | 0 | const int y0 = ( cbArea.y > 0 ? 0 : 1 ); |
594 | 0 | const int x1 = ( cbArea.x + cbArea.width < cs.picture->Cb().width ? cbArea.width : cbArea.width - 1 ); |
595 | 0 | const int y1 = ( cbArea.y + cbArea.height < cs.picture->Cb().height ? cbArea.height : cbArea.height - 1 ); |
596 | 0 | const int cbs = orgCb.stride; |
597 | 0 | const int crs = orgCr.stride; |
598 | 0 | const Pel* pCb = orgCb.buf + y0 * cbs; |
599 | 0 | const Pel* pCr = orgCr.buf + y0 * crs; |
600 | 0 | int64_t sumCbCr = 0; |
601 | | |
602 | | // determine inter-chroma transform sign from correlation between high-pass filtered (i.e., zero-mean) Cb and Cr planes |
603 | 0 | for( int y = y0; y < y1; y++, pCb += cbs, pCr += crs ) |
604 | 0 | { |
605 | 0 | for( int x = x0; x < x1; x++ ) |
606 | 0 | { |
607 | 0 | int cb = ( 12*(int)pCb[x] - 2*((int)pCb[x-1] + (int)pCb[x+1] + (int)pCb[x-cbs] + (int)pCb[x+cbs]) - ((int)pCb[x-1-cbs] + (int)pCb[x+1-cbs] + (int)pCb[x-1+cbs] + (int)pCb[x+1+cbs]) ); |
608 | 0 | int cr = ( 12*(int)pCr[x] - 2*((int)pCr[x-1] + (int)pCr[x+1] + (int)pCr[x-crs] + (int)pCr[x+crs]) - ((int)pCr[x-1-crs] + (int)pCr[x+1-crs] + (int)pCr[x-1+crs] + (int)pCr[x+1+crs]) ); |
609 | 0 | sumCbCr += cb*cr; |
610 | 0 | } |
611 | 0 | } |
612 | |
|
613 | 0 | sgnFlag = ( sumCbCr < 0 ); |
614 | 0 | } |
615 | |
|
616 | 0 | cs.slice->picHeader->jointCbCrSign = sgnFlag; |
617 | 0 | } |
618 | | |
619 | | struct CtuPos |
620 | | { |
621 | | const int ctuPosX; |
622 | | const int ctuPosY; |
623 | | const int ctuRsAddr; |
624 | | |
625 | 0 | CtuPos( int _x, int _y, int _a ) : ctuPosX( _x ), ctuPosY( _y ), ctuRsAddr( _a ) {} |
626 | | }; |
627 | | |
628 | | class CtuTsIterator |
629 | | { |
630 | | private: |
631 | | const CodingStructure& cs; |
632 | | const int m_startTsAddr; |
633 | | const int m_endTsAddr; |
634 | | std::vector<int> m_ctuAddrMap; |
635 | | int m_ctuTsAddr; |
636 | | |
637 | | private: |
638 | | int getNextTsAddr( const int _tsAddr ) const |
639 | 0 | { |
640 | 0 | const PreCalcValues& pcv = *cs.pcv; |
641 | 0 | const int startSliceRsRow = m_startTsAddr / pcv.widthInCtus; |
642 | 0 | const int startSliceRsCol = m_startTsAddr % pcv.widthInCtus; |
643 | 0 | const int endSliceRsRow = (m_endTsAddr - 1) / pcv.widthInCtus; |
644 | 0 | const int endSliceRsCol = (m_endTsAddr - 1) % pcv.widthInCtus; |
645 | 0 | int ctuTsAddr = _tsAddr; |
646 | 0 | CHECK( ctuTsAddr > m_endTsAddr, "error: array index out of bounds" ); |
647 | 0 | while( ctuTsAddr < m_endTsAddr ) |
648 | 0 | { |
649 | 0 | ctuTsAddr++; |
650 | 0 | const int ctuRsAddr = ctuTsAddr; |
651 | 0 | if( cs.slice->pps->rectSlice |
652 | 0 | && ( (ctuRsAddr / pcv.widthInCtus) < startSliceRsRow |
653 | 0 | || (ctuRsAddr / pcv.widthInCtus) > endSliceRsRow |
654 | 0 | || (ctuRsAddr % pcv.widthInCtus) < startSliceRsCol |
655 | 0 | || (ctuRsAddr % pcv.widthInCtus) > endSliceRsCol ) ) |
656 | 0 | continue; |
657 | 0 | break; |
658 | 0 | } |
659 | 0 | return ctuTsAddr; |
660 | 0 | } |
661 | | |
662 | | int mapAddr( const int _addr ) const |
663 | 0 | { |
664 | 0 | if( _addr < 0 ) |
665 | 0 | return _addr; |
666 | 0 | if( _addr >= m_ctuAddrMap.size() ) |
667 | 0 | return _addr; |
668 | 0 | return m_ctuAddrMap[ _addr ]; |
669 | 0 | } |
670 | | |
671 | | public: |
672 | 0 | CtuTsIterator( const CodingStructure& _cs, int _s, int _e, std::vector<int>& _m ) : cs( _cs ), m_startTsAddr( _s ), m_endTsAddr( _e ), m_ctuAddrMap( _m ), m_ctuTsAddr( _s ) {} |
673 | 0 | CtuTsIterator( const CodingStructure& _cs, int _s, int _e, bool _wpp ) : cs( _cs ), m_startTsAddr( _s ), m_endTsAddr( _e ), m_ctuTsAddr( _s ) { if( _wpp ) setWppPattern(); } |
674 | 0 | CtuTsIterator( const CodingStructure& _cs, int _s, int _e, const std::vector<int>& _m ) : cs( _cs ), m_startTsAddr( _s ), m_endTsAddr( _e ), m_ctuAddrMap( _m ), m_ctuTsAddr( _s ) {} |
675 | 0 | CtuTsIterator( const CodingStructure& _cs, int _s, int _e, const std::vector<int>& _m, int _c ) : cs( _cs ), m_startTsAddr( _s ), m_endTsAddr( _e ), m_ctuAddrMap( _m ), m_ctuTsAddr( std::max( _s, _c ) ) {} |
676 | 0 | CtuTsIterator( const CodingStructure& _cs, int _s, int _e, const std::vector<int>* _m, bool _wpp ) : cs( _cs ), m_startTsAddr( _s ), m_endTsAddr( _e ), m_ctuTsAddr( _s ) { if( _wpp ) m_ctuAddrMap = *_m; } |
677 | | |
678 | 0 | virtual ~CtuTsIterator() { m_ctuAddrMap.clear(); } |
679 | | |
680 | 0 | CtuTsIterator& operator++() { m_ctuTsAddr = getNextTsAddr( m_ctuTsAddr ); return *this; } |
681 | 0 | CtuTsIterator operator++(int) { auto retval = *this; ++(*this); return retval; } |
682 | 0 | bool operator==(CtuTsIterator other) const { return m_ctuTsAddr == other.m_ctuTsAddr; } |
683 | 0 | bool operator!=(CtuTsIterator other) const { return m_ctuTsAddr != other.m_ctuTsAddr; } |
684 | 0 | CtuPos operator*() const { const int ctuRsAddr = mapAddr( m_ctuTsAddr ); return CtuPos( ctuRsAddr % cs.pcv->widthInCtus, ctuRsAddr / cs.pcv->widthInCtus, ctuRsAddr ); } |
685 | | |
686 | 0 | CtuTsIterator begin() { return CtuTsIterator( cs, m_startTsAddr, m_endTsAddr, m_ctuAddrMap ); }; |
687 | 0 | CtuTsIterator end() { return CtuTsIterator( cs, m_startTsAddr, m_endTsAddr, m_ctuAddrMap, m_endTsAddr ); }; |
688 | | |
689 | | using iterator_category = std::forward_iterator_tag; |
690 | | using value_type = int; |
691 | | using pointer = int*; |
692 | | using reference = int&; |
693 | | using difference_type = ptrdiff_t; |
694 | | |
695 | | void setWppPattern() |
696 | 0 | { |
697 | 0 | const PreCalcValues& pcv = *cs.pcv; |
698 | 0 | m_ctuAddrMap.resize( pcv.sizeInCtus, 0 ); |
699 | 0 | int addr = 0; |
700 | 0 | for( int i = 1; i < pcv.sizeInCtus; i++ ) |
701 | 0 | { |
702 | 0 | int x = addr % pcv.widthInCtus; |
703 | 0 | int y = addr / pcv.widthInCtus; |
704 | 0 | x -= 1; |
705 | 0 | y += 1; |
706 | 0 | if( x < 0 || y >= pcv.heightInCtus ) |
707 | 0 | { |
708 | 0 | x += 1 + y; |
709 | 0 | y = 0; |
710 | 0 | } |
711 | 0 | if( x >= pcv.widthInCtus ) |
712 | 0 | { |
713 | 0 | y += ( x - pcv.widthInCtus ) + 1; |
714 | 0 | x = pcv.widthInCtus - 1; |
715 | 0 | } |
716 | 0 | addr = y * pcv.widthInCtus + x; |
717 | 0 | m_ctuAddrMap[ i ] = addr; |
718 | 0 | } |
719 | 0 | } |
720 | | }; |
721 | | |
722 | | void EncSlice::saoDisabledRate( CodingStructure& cs, SAOBlkParam* reconParams ) |
723 | 0 | { |
724 | 0 | EncSampleAdaptiveOffset::disabledRate( cs, m_saoDisabledRate, reconParams, m_pcEncCfg->m_saoEncodingRate, m_pcEncCfg->m_saoEncodingRateChroma, m_pcEncCfg->m_internChromaFormat ); |
725 | 0 | } |
726 | | |
727 | | void EncSlice::finishCompressSlice( Picture* pic, Slice& slice ) |
728 | 0 | { |
729 | 0 | CodingStructure& cs = *pic->cs; |
730 | | |
731 | | // finalize |
732 | 0 | if( slice.sps->saoEnabled && pic->useSAO ) |
733 | 0 | { |
734 | | // store disabled statistics |
735 | 0 | if( !m_pcEncCfg->m_numThreads ) |
736 | 0 | saoDisabledRate( cs, &m_saoReconParams[ 0 ] ); |
737 | | |
738 | | // set slice header flags |
739 | 0 | CHECK( m_saoEnabled[ COMP_Cb ] != m_saoEnabled[ COMP_Cr ], "Unspecified error"); |
740 | 0 | for( auto s : pic->slices ) |
741 | 0 | { |
742 | 0 | s->saoEnabled[ CH_L ] = m_saoEnabled[ COMP_Y ]; |
743 | 0 | s->saoEnabled[ CH_C ] = m_saoEnabled[ COMP_Cb ]; |
744 | 0 | } |
745 | 0 | } |
746 | 0 | } |
747 | | |
748 | | void EncSlice::xProcessCtus( Picture* pic, const unsigned startCtuTsAddr, const unsigned boundingCtuTsAddr ) |
749 | 0 | { |
750 | 0 | PROFILER_SCOPE_TOP_LEVEL_EXT( 1, g_timeProfiler, P_IGNORE, pic->cs ); |
751 | 0 | CodingStructure& cs = *pic->cs; |
752 | 0 | Slice& slice = *cs.slice; |
753 | 0 | const PreCalcValues& pcv = *cs.pcv; |
754 | | |
755 | | // initialization |
756 | 0 | if( slice.sps->jointCbCr ) |
757 | 0 | { |
758 | 0 | setJointCbCrModes( cs, Position(0, 0), cs.area.lumaSize() ); |
759 | 0 | } |
760 | |
|
761 | 0 | if( slice.sps->saoEnabled && pic->useSAO ) |
762 | 0 | { |
763 | | // check SAO enabled or disabled |
764 | 0 | EncSampleAdaptiveOffset::decidePicParams( cs, m_saoDisabledRate, m_saoEnabled, m_pcEncCfg->m_saoEncodingRate, m_pcEncCfg->m_saoEncodingRateChroma, m_pcEncCfg->m_internChromaFormat ); |
765 | |
|
766 | 0 | m_saoAllDisabled = true; |
767 | 0 | for( int compIdx = 0; compIdx < getNumberValidComponents( pcv.chrFormat ); compIdx++ ) |
768 | 0 | { |
769 | 0 | m_saoAllDisabled &= ! m_saoEnabled[ compIdx ]; |
770 | 0 | } |
771 | |
|
772 | 0 | std::fill( m_saoReconParams.begin(), m_saoReconParams.end(), SAOBlkParam() ); |
773 | 0 | } |
774 | 0 | else |
775 | 0 | { |
776 | 0 | m_saoAllDisabled = true; |
777 | 0 | } |
778 | |
|
779 | 0 | if( slice.sps->alfEnabled ) |
780 | 0 | { |
781 | 0 | m_pALF->initEncProcess( slice ); |
782 | 0 | } |
783 | |
|
784 | 0 | std::fill( m_processStates.begin(), m_processStates.end(), CTU_ENCODE ); |
785 | | |
786 | | // fill encoder parameter list |
787 | 0 | int idx = 0; |
788 | 0 | const std::vector<int> base = slice.sliceMap.ctuAddrInSlice; |
789 | 0 | auto ctuIter = CtuTsIterator( cs, startCtuTsAddr, boundingCtuTsAddr, &m_ctuAddrMap, m_pcEncCfg->m_numThreads > 0 ); |
790 | 0 | for( auto ctuPos : ctuIter ) |
791 | 0 | { |
792 | 0 | ctuEncParams[ idx ].pic = pic; |
793 | 0 | ctuEncParams[ idx ].encSlice = this; |
794 | 0 | ctuEncParams[ idx ].ctuRsAddr = ctuPos.ctuRsAddr; |
795 | 0 | ctuEncParams[ idx ].ctuPosX = ctuPos.ctuPosX; |
796 | 0 | ctuEncParams[ idx ].ctuPosY = ctuPos.ctuPosY; |
797 | 0 | ctuEncParams[ idx ].ctuArea = UnitArea( pic->chromaFormat, slice.pps->pcv->getCtuArea( ctuPos.ctuPosX, ctuPos.ctuPosY ) ); |
798 | |
|
799 | 0 | if( m_pcEncCfg->m_numThreads > 0 ) |
800 | 0 | { |
801 | 0 | ctuEncParams[idx].tileLineResIdx = slice.pps->getTileLineId( ctuPos.ctuPosX, ctuPos.ctuPosY ); |
802 | 0 | } |
803 | 0 | else |
804 | 0 | { |
805 | 0 | ctuEncParams[idx].tileLineResIdx = 0; |
806 | 0 | } |
807 | 0 | idx++; |
808 | 0 | } |
809 | | |
810 | | //for( int i = 0; i < idx; i++ ) |
811 | | //{ |
812 | | // for( int j = i; j < idx; j++ ) |
813 | | // { |
814 | | // if( ctuEncParams[i].tileLineResIdx != ctuEncParams[j].tileLineResIdx ) continue; |
815 | | // |
816 | | // CHECK( ctuEncParams[i].ctuPosY != ctuEncParams[j].ctuPosY, "Not the same CTU line!" ); |
817 | | // CHECK( slice.pps->getTileIdx( ctuEncParams[i].ctuPosX, ctuEncParams[i].ctuPosY ) != slice.pps->getTileIdx( ctuEncParams[j].ctuPosX, ctuEncParams[j].ctuPosY ), "Not the same tile!" ); |
818 | | // } |
819 | | //} |
820 | |
|
821 | 0 | CHECK( idx != pcv.sizeInCtus, "array index out of bounds" ); |
822 | | |
823 | | // process ctu's until last ctu is done |
824 | 0 | if( m_pcEncCfg->m_numThreads > 0 ) |
825 | 0 | { |
826 | 0 | for( auto& ctuEncParam : ctuEncParams ) |
827 | 0 | { |
828 | 0 | m_threadPool->addBarrierTask<CtuEncParam>( EncSlice::xProcessCtuTask<false>, |
829 | 0 | &ctuEncParam, |
830 | 0 | m_ctuTasksDoneCounter, |
831 | 0 | nullptr, |
832 | 0 | {}, |
833 | 0 | EncSlice::xProcessCtuTask<true> ); |
834 | 0 | } |
835 | 0 | } |
836 | 0 | else |
837 | 0 | { |
838 | 0 | do |
839 | 0 | { |
840 | 0 | for( auto& ctuEncParam : ctuEncParams ) |
841 | 0 | { |
842 | 0 | if( m_processStates[ctuEncParam.ctuRsAddr] != PROCESS_DONE ) |
843 | 0 | EncSlice::xProcessCtuTask<false>( 0, &ctuEncParam ); |
844 | 0 | } |
845 | 0 | DTRACE_PIC_COMP_COND( m_processStates[ 0 ] == SAO_FILTER && m_processStates[ boundingCtuTsAddr - 1 ] == SAO_FILTER, D_REC_CB_LUMA_LF, cs, cs.getRecoBuf(), COMP_Y ); |
846 | 0 | DTRACE_PIC_COMP_COND( m_processStates[ 0 ] == SAO_FILTER && m_processStates[ boundingCtuTsAddr - 1 ] == SAO_FILTER, D_REC_CB_CHROMA_LF, cs, cs.getRecoBuf(), COMP_Cb ); |
847 | 0 | DTRACE_PIC_COMP_COND( m_processStates[ 0 ] == SAO_FILTER && m_processStates[ boundingCtuTsAddr - 1 ] == SAO_FILTER, D_REC_CB_CHROMA_LF, cs, cs.getRecoBuf(), COMP_Cr ); |
848 | 0 | DTRACE_PIC_COMP_COND( m_processStates[ 0 ] == ALF_GET_STATISTICS && m_processStates[ boundingCtuTsAddr - 1 ] == ALF_GET_STATISTICS, D_REC_CB_LUMA_SAO, cs, cs.getRecoBuf(), COMP_Y ); |
849 | 0 | DTRACE_PIC_COMP_COND( m_processStates[ 0 ] == ALF_GET_STATISTICS && m_processStates[ boundingCtuTsAddr - 1 ] == ALF_GET_STATISTICS, D_REC_CB_CHROMA_SAO, cs, cs.getRecoBuf(), COMP_Cb ); |
850 | 0 | DTRACE_PIC_COMP_COND( m_processStates[ 0 ] == ALF_GET_STATISTICS && m_processStates[ boundingCtuTsAddr - 1 ] == ALF_GET_STATISTICS, D_REC_CB_CHROMA_SAO, cs, cs.getRecoBuf(), COMP_Cr ); |
851 | 0 | } |
852 | 0 | while( m_processStates[ boundingCtuTsAddr - 1 ] != PROCESS_DONE ); |
853 | 0 | } |
854 | 0 | } |
855 | | |
856 | | inline bool checkCtuTaskNbTop( const PPS& pps, const int& ctuPosX, const int& ctuPosY, const int& ctuRsAddr, const ProcessCtuState* processStates, const TaskType tskType, bool override = false ) |
857 | 0 | { |
858 | 0 | return ctuPosY > 0 && ( override || pps.canFilterCtuBdry( ctuPosX, ctuPosY, 0, -1 ) ) && processStates[ ctuRsAddr - pps.pcv->widthInCtus ] <= tskType; |
859 | 0 | } |
860 | | |
861 | | inline bool checkCtuTaskNbBot( const PPS& pps, const int& ctuPosX, const int& ctuPosY, const int& ctuRsAddr, const ProcessCtuState* processStates, const TaskType tskType, bool override = false ) |
862 | 0 | { |
863 | 0 | return ctuPosY + 1 < pps.pcv->heightInCtus && ( override || pps.canFilterCtuBdry( ctuPosX, ctuPosY, 0, 1 ) ) && processStates[ ctuRsAddr + pps.pcv->widthInCtus ] <= tskType; |
864 | 0 | } |
865 | | |
866 | | inline bool checkCtuTaskNbRgt( const PPS& pps, const int& ctuPosX, const int& ctuPosY, const int& ctuRsAddr, const ProcessCtuState* processStates, const TaskType tskType, bool override = false ) |
867 | 0 | { |
868 | 0 | return ctuPosX + 1 < pps.pcv->widthInCtus && ( override || pps.canFilterCtuBdry( ctuPosX, ctuPosY, 1, 0 ) ) && processStates[ ctuRsAddr + 1 ] <= tskType; |
869 | 0 | } |
870 | | |
871 | | inline bool checkCtuTaskNbTopRgt( const PPS& pps, const int& ctuPosX, const int& ctuPosY, const int& ctuRsAddr, const ProcessCtuState* processStates, const TaskType tskType, bool override = false ) |
872 | 0 | { |
873 | 0 | return ctuPosY > 0 && ctuPosX + 1 < pps.pcv->widthInCtus && ( override || pps.canFilterCtuBdry( ctuPosX, ctuPosY, 1, -1 ) ) && processStates[ ctuRsAddr - pps.pcv->widthInCtus + 1 ] <= tskType; |
874 | 0 | } |
875 | | |
876 | | inline bool checkCtuTaskNbBotRgt( const PPS& pps, const int& ctuPosX, const int& ctuPosY, const int& ctuRsAddr, const ProcessCtuState* processStates, const TaskType tskType, const int rightOffset = 1, bool override = false ) |
877 | 0 | { |
878 | 0 | return ctuPosX + rightOffset < pps.pcv->widthInCtus && ctuPosY + 1 < pps.pcv->heightInCtus && ( override || pps.canFilterCtuBdry( ctuPosX, ctuPosY, rightOffset, 1 ) ) && processStates[ ctuRsAddr + rightOffset + pps.pcv->widthInCtus ] <= tskType; |
879 | 0 | } |
880 | | |
881 | | template<bool checkReadyState> |
882 | | bool EncSlice::xProcessCtuTask( int threadIdx, CtuEncParam* ctuEncParam ) |
883 | 0 | { |
884 | 0 | Picture* pic = ctuEncParam->pic; |
885 | 0 | EncSlice* encSlice = ctuEncParam->encSlice; |
886 | 0 | CodingStructure& cs = *pic->cs; |
887 | 0 | Slice& slice = *cs.slice; |
888 | 0 | const PPS& pps = *slice.pps; |
889 | 0 | const PreCalcValues& pcv = *cs.pcv; |
890 | 0 | const int ctuRsAddr = ctuEncParam->ctuRsAddr; |
891 | 0 | const int ctuPosX = ctuEncParam->ctuPosX; |
892 | 0 | const int ctuPosY = ctuEncParam->ctuPosY; |
893 | 0 | const int x = ctuPosX << pcv.maxCUSizeLog2; |
894 | 0 | const int y = ctuPosY << pcv.maxCUSizeLog2; |
895 | 0 | const int width = std::min( pcv.maxCUSize, pcv.lumaWidth - x ); |
896 | 0 | const int height = std::min( pcv.maxCUSize, pcv.lumaHeight - y ); |
897 | 0 | const int ctuStride = pcv.widthInCtus; |
898 | 0 | const int lineIdx = ctuEncParam->tileLineResIdx; |
899 | 0 | ProcessCtuState* processStates = encSlice->m_processStates.data(); |
900 | 0 | const UnitArea& ctuArea = ctuEncParam->ctuArea; |
901 | 0 | const bool wppSyncEnabled = cs.sps->entropyCodingSyncEnabled; |
902 | 0 | const TaskType currState = processStates[ ctuRsAddr ]; |
903 | 0 | const unsigned syncLines = encSlice->m_pcEncCfg->m_ifpLines; |
904 | |
|
905 | 0 | DTRACE_UPDATE( g_trace_ctx, std::make_pair( "poc", cs.slice->poc ) ); |
906 | 0 | DTRACE_UPDATE( g_trace_ctx, std::make_pair( "ctu", ctuRsAddr ) ); |
907 | 0 | DTRACE_UPDATE( g_trace_ctx, std::make_pair( "final", processStates[ ctuRsAddr ] == CTU_ENCODE ? 0 : 1 ) ); |
908 | | |
909 | | // process ctu's line wise from left to right |
910 | 0 | const bool tileParallel = encSlice->m_pcEncCfg->m_tileParallelCtuEnc; |
911 | 0 | if( tileParallel && currState == CTU_ENCODE && ctuPosX > 0 && slice.pps->getTileIdx( ctuPosX, ctuPosY ) != slice.pps->getTileIdx( ctuPosX - 1, ctuPosY ) ) |
912 | 0 | ; // for CTU_ENCODE on tile boundaries, allow parallel processing of tiles |
913 | 0 | else if( ctuPosX > 0 && processStates[ ctuRsAddr - 1 ] <= currState && currState < PROCESS_DONE ) |
914 | 0 | return false; |
915 | | |
916 | 0 | switch( currState ) |
917 | 0 | { |
918 | | // encode |
919 | 0 | case CTU_ENCODE: |
920 | 0 | { |
921 | | // CTU line-wise inter-frame parallel processing synchronization |
922 | 0 | if( syncLines ) |
923 | 0 | { |
924 | 0 | const bool lineStart = ctuPosX == 0 || ( tileParallel && slice.pps->getTileIdx( ctuPosX, ctuPosY ) != slice.pps->getTileIdx( ctuPosX - 1, ctuPosY ) ); |
925 | 0 | if( lineStart && !refPicCtuLineReady( slice, ctuPosY + (int)syncLines, pcv ) ) |
926 | 0 | { |
927 | 0 | return false; |
928 | 0 | } |
929 | 0 | } |
930 | | |
931 | | // general wpp conditions, top and top-right ctu have to be encoded |
932 | 0 | if( encSlice->m_pcEncCfg->m_tileParallelCtuEnc && ctuPosY > 0 && slice.pps->getTileIdx( ctuPosX, ctuPosY ) != slice.pps->getTileIdx( ctuPosX, ctuPosY - 1 ) ) |
933 | 0 | ; // allow parallel processing of CTU-encoding on independent tiles |
934 | 0 | else if( ctuPosY > 0 && processStates[ ctuRsAddr - ctuStride ] <= CTU_ENCODE ) |
935 | 0 | return false; |
936 | 0 | else if( ctuPosY > 0 && ctuPosX + 1 < pcv.widthInCtus && processStates[ ctuRsAddr - ctuStride + 1 ] <= CTU_ENCODE && !wppSyncEnabled ) |
937 | 0 | return false; |
938 | | |
939 | 0 | if( checkReadyState ) |
940 | 0 | return true; |
941 | | |
942 | | #ifdef TRACE_ENABLE_ITT |
943 | | std::stringstream ss; |
944 | | ss << "Encode_" << slice.poc << "_CTU_" << ctuPosY << "_" << ctuPosX; |
945 | | __itt_string_handle* itt_handle_ctuEncode = __itt_string_handle_create( ss.str().c_str() ); |
946 | | #endif |
947 | 0 | ITT_TASKSTART( itt_domain_encode, itt_handle_ctuEncode ); |
948 | |
|
949 | 0 | TileLineEncRsrc* lineEncRsrc = encSlice->m_TileLineEncRsrc[ lineIdx ]; |
950 | 0 | PerThreadRsrc* taskRsrc = encSlice->m_ThreadRsrc[ threadIdx ]; |
951 | 0 | EncCu& encCu = taskRsrc->m_encCu; |
952 | |
|
953 | 0 | encCu.setCtuEncRsrc( &lineEncRsrc->m_CABACEstimator, &taskRsrc->m_CtxCache, &lineEncRsrc->m_ReuseUniMv, &lineEncRsrc->m_BlkUniMvInfoBuffer, &lineEncRsrc->m_AffineProfList, &lineEncRsrc->m_CachedBvs ); |
954 | 0 | encCu.encodeCtu( pic, lineEncRsrc->m_prevQp, ctuPosX, ctuPosY ); |
955 | | |
956 | | // cleanup line memory when last ctu in line done to reduce overall memory consumption |
957 | 0 | if( encSlice->m_pcEncCfg->m_ensureWppBitEqual && ( ctuPosX == pcv.widthInCtus - 1 || slice.pps->getTileIdx( ctuPosX, ctuPosY ) != slice.pps->getTileIdx( ctuPosX + 1, ctuPosY ) ) ) |
958 | 0 | { |
959 | 0 | lineEncRsrc->m_AffineProfList .resetAffineMVList(); |
960 | 0 | lineEncRsrc->m_BlkUniMvInfoBuffer.resetUniMvList(); |
961 | 0 | lineEncRsrc->m_ReuseUniMv .resetReusedUniMvs(); |
962 | 0 | lineEncRsrc->m_CachedBvs .resetIbcBvCand(); |
963 | 0 | } |
964 | |
|
965 | 0 | DTRACE_UPDATE( g_trace_ctx, std::make_pair( "final", 1 ) ); |
966 | 0 | ITT_TASKEND( itt_domain_encode, itt_handle_ctuEncode ); |
967 | |
|
968 | 0 | processStates[ ctuRsAddr ] = RESHAPE_LF_VER; |
969 | 0 | } |
970 | 0 | break; |
971 | | |
972 | | // reshape + vertical loopfilter |
973 | 0 | case RESHAPE_LF_VER: |
974 | 0 | { |
975 | | // clip check to right tile border (CTU_ENCODE pre-processing delay due to IBC) |
976 | 0 | const int tileCol = slice.pps->ctuToTileCol[ctuPosX]; |
977 | 0 | const int lastCtuPosXInTile = slice.pps->tileColBd[tileCol] + slice.pps->tileColWidth[tileCol] - 1; |
978 | 0 | const int checkRight = std::min<int>( encSlice->m_ctuEncDelay, lastCtuPosXInTile - ctuPosX ); |
979 | |
|
980 | 0 | const bool hasTiles = encSlice->m_pcEncCfg->m_tileParallelCtuEnc && slice.pps->getNumTiles() > 1; |
981 | | |
982 | | // need to check line above bcs of tiling, which allows CTU_ENCODE to run independently across tiles |
983 | 0 | if( hasTiles ) |
984 | 0 | { |
985 | 0 | if( ctuPosY > 0 ) |
986 | 0 | { |
987 | 0 | for( int i = -!!ctuPosX; i <= checkRight; i++ ) |
988 | 0 | if( pps.canFilterCtuBdry( ctuPosX, ctuPosY, i, -1 ) && processStates[ctuRsAddr - ctuStride + i] <= CTU_ENCODE ) |
989 | 0 | return false; |
990 | 0 | } |
991 | 0 | } |
992 | | |
993 | | // ensure all surrounding ctu's are encoded (intra pred requires non-reshaped and unfiltered residual, IBC requires unfiltered samples too) |
994 | | // check right with max offset (due to WPP condition above, this implies top-right has been already encoded) |
995 | 0 | for( int i = hasTiles ? -!!ctuPosX : checkRight; i <= checkRight; i++ ) |
996 | 0 | if( pps.canFilterCtuBdry( ctuPosX, ctuPosY, i, 0 ) && processStates[ctuRsAddr + i] <= CTU_ENCODE ) |
997 | 0 | return false; |
998 | | |
999 | | // check bottom right with 1 CTU delay (this is only required for intra pred) |
1000 | | // at the right picture border this will check the bottom CTU |
1001 | 0 | const int checkBottomRight = std::min<int>( 1, lastCtuPosXInTile - ctuPosX ); |
1002 | 0 | if( checkCtuTaskNbBotRgt( pps, ctuPosX, ctuPosY, ctuRsAddr, processStates, CTU_ENCODE, checkBottomRight ) ) |
1003 | 0 | return false; |
1004 | | |
1005 | 0 | if( checkReadyState ) |
1006 | 0 | return true; |
1007 | | |
1008 | 0 | ITT_TASKSTART( itt_domain_encode, itt_handle_rspLfVer ); |
1009 | | |
1010 | | // reshape |
1011 | 0 | if( slice.sps->lumaReshapeEnable && slice.picHeader->lmcsEnabled ) |
1012 | 0 | { |
1013 | 0 | PROFILER_EXT_ACCUM_AND_START_NEW_SET( 1, _TPROF, P_RESHAPER, &cs, CH_L ); |
1014 | 0 | PelBuf reco = pic->getRecoBuf( COMP_Y ).subBuf( x, y, width, height ); |
1015 | 0 | reco.rspSignal( pic->reshapeData.getInvLUT() ); |
1016 | 0 | PROFILER_EXT_ACCUM_AND_START_NEW_SET( 1, _TPROF, P_IGNORE, &cs, CH_L ); |
1017 | 0 | } |
1018 | | |
1019 | | // loopfilter |
1020 | 0 | if( !cs.pps->deblockingFilterControlPresent || !cs.pps->deblockingFilterDisabled || cs.pps->deblockingFilterOverrideEnabled ) |
1021 | 0 | { |
1022 | 0 | PROFILER_EXT_ACCUM_AND_START_NEW_SET( 1, _TPROF, P_DEBLOCK_FILTER, &cs, CH_L ); |
1023 | | // calculate filter strengths |
1024 | 0 | encSlice->m_pLoopFilter->calcFilterStrengthsCTU( cs, ctuArea, true ); |
1025 | | |
1026 | | // vertical filter |
1027 | 0 | PelUnitBuf reco = cs.picture->getRecoBuf(); |
1028 | 0 | encSlice->m_pLoopFilter->xDeblockArea<EDGE_VER>( cs, ctuArea, MAX_NUM_CH, reco ); |
1029 | 0 | PROFILER_EXT_ACCUM_AND_START_NEW_SET( 1, _TPROF, P_IGNORE, &cs, CH_L ); |
1030 | 0 | } |
1031 | |
|
1032 | 0 | ITT_TASKEND( itt_domain_encode, itt_handle_rspLfVer ); |
1033 | |
|
1034 | 0 | processStates[ ctuRsAddr ] = LF_HOR; |
1035 | 0 | } |
1036 | 0 | break; |
1037 | | |
1038 | | // horizontal loopfilter |
1039 | 0 | case LF_HOR: |
1040 | 0 | { |
1041 | | // ensure horizontal ordering (from top to bottom) |
1042 | 0 | if( checkCtuTaskNbTop ( pps, ctuPosX, ctuPosY, ctuRsAddr, processStates, LF_HOR ) ) |
1043 | 0 | return false; |
1044 | | |
1045 | | // ensure vertical loop filter of neighbor ctu's will not modify current residual |
1046 | | // check top, top-right and right ctu |
1047 | | // (top, top-right checked implicitly due to ordering check above) |
1048 | 0 | if( checkCtuTaskNbRgt ( pps, ctuPosX, ctuPosY, ctuRsAddr, processStates, RESHAPE_LF_VER ) ) |
1049 | 0 | return false; |
1050 | | |
1051 | 0 | if( checkReadyState ) |
1052 | 0 | return true; |
1053 | | |
1054 | 0 | ITT_TASKSTART( itt_domain_encode, itt_handle_lfHor ); |
1055 | |
|
1056 | 0 | if( !cs.pps->deblockingFilterControlPresent || !cs.pps->deblockingFilterDisabled || cs.pps->deblockingFilterOverrideEnabled ) |
1057 | 0 | { |
1058 | 0 | PROFILER_EXT_ACCUM_AND_START_NEW_SET( 1, _TPROF, P_DEBLOCK_FILTER, &cs, CH_L ); |
1059 | 0 | PelUnitBuf reco = cs.picture->getRecoBuf(); |
1060 | 0 | encSlice->m_pLoopFilter->xDeblockArea<EDGE_HOR>( cs, ctuArea, MAX_NUM_CH, reco ); |
1061 | 0 | PROFILER_EXT_ACCUM_AND_START_NEW_SET( 1, _TPROF, P_IGNORE, &cs, CH_L ); |
1062 | 0 | } |
1063 | |
|
1064 | 0 | ITT_TASKEND( itt_domain_encode, itt_handle_lfHor ); |
1065 | |
|
1066 | 0 | processStates[ ctuRsAddr ] = SAO_FILTER; |
1067 | 0 | } |
1068 | 0 | break; |
1069 | | |
1070 | | // SAO filter |
1071 | 0 | case SAO_FILTER: |
1072 | 0 | { |
1073 | | // general wpp conditions, top and top-right ctu have to be filtered |
1074 | 0 | if( checkCtuTaskNbTop ( pps, ctuPosX, ctuPosY, ctuRsAddr, processStates, SAO_FILTER, true ) ) return false; |
1075 | 0 | if( checkCtuTaskNbTopRgt( pps, ctuPosX, ctuPosY, ctuRsAddr, processStates, SAO_FILTER, true ) ) return false; |
1076 | | |
1077 | | // ensure loop filter of neighbor ctu's will not modify current residual |
1078 | | // sao processing dependents on +1 pixel to each side |
1079 | | // due to wpp condition above, only right, bottom and bottom-right ctu have to be checked |
1080 | 0 | if( checkCtuTaskNbRgt ( pps, ctuPosX, ctuPosY, ctuRsAddr, processStates, LF_HOR, true ) ) return false; |
1081 | 0 | if( checkCtuTaskNbBot ( pps, ctuPosX, ctuPosY, ctuRsAddr, processStates, LF_HOR, true ) ) return false; |
1082 | 0 | if( checkCtuTaskNbBotRgt( pps, ctuPosX, ctuPosY, ctuRsAddr, processStates, LF_HOR, 1, true ) ) return false; |
1083 | | |
1084 | 0 | if( checkReadyState ) |
1085 | 0 | return true; |
1086 | | |
1087 | 0 | ITT_TASKSTART( itt_domain_encode, itt_handle_sao ); |
1088 | | |
1089 | | // SAO filter |
1090 | 0 | if( slice.sps->saoEnabled && pic->useSAO ) |
1091 | 0 | { |
1092 | 0 | PROFILER_EXT_ACCUM_AND_START_NEW_SET( 1, _TPROF, P_SAO, &cs, CH_L ); |
1093 | 0 | TileLineEncRsrc* lineEncRsrc = encSlice->m_TileLineEncRsrc[ lineIdx ]; |
1094 | 0 | PerThreadRsrc* taskRsrc = encSlice->m_ThreadRsrc[ threadIdx ]; |
1095 | 0 | EncSampleAdaptiveOffset& encSao = lineEncRsrc->m_encSao; |
1096 | |
|
1097 | 0 | encSao.setCtuEncRsrc( &lineEncRsrc->m_SaoCABACEstimator, &taskRsrc->m_CtxCache ); |
1098 | 0 | encSao.storeCtuReco( cs, ctuArea, ctuPosX, ctuPosY ); |
1099 | 0 | encSao.getCtuStatistics( cs, encSlice->m_saoStatData, ctuArea, ctuRsAddr ); |
1100 | 0 | encSao.decideCtuParams( cs, encSlice->m_saoStatData, encSlice->m_saoEnabled, encSlice->m_saoAllDisabled, ctuArea, ctuRsAddr, &encSlice->m_saoReconParams[ 0 ], cs.picture->getSAO() ); |
1101 | 0 | PROFILER_EXT_ACCUM_AND_START_NEW_SET( 1, _TPROF, P_IGNORE, &cs, CH_L ); |
1102 | 0 | } |
1103 | | |
1104 | | // ALF border extension |
1105 | 0 | if( cs.sps->alfEnabled ) |
1106 | 0 | { |
1107 | | // we have to do some kind of position aware boundary padding |
1108 | | // it's done here because the conditions are readable |
1109 | 0 | PelUnitBuf recoBuf = cs.picture->getRecoBuf(); |
1110 | 0 | const int fltSize = ( MAX_ALF_FILTER_LENGTH + 1 ) >> 1; |
1111 | 0 | const int xL = ( ctuPosX == 0 ) ? ( x-fltSize ) : ( x ); |
1112 | 0 | const int xR = ( ctuPosX+1 == pcv.widthInCtus ) ? ( x+width+fltSize ) : ( x+width ); |
1113 | |
|
1114 | 0 | if( ctuPosX == 0 ) recoBuf.extendBorderPelLft( y, height, fltSize ); |
1115 | 0 | if( ctuPosX+1 == pcv.widthInCtus ) recoBuf.extendBorderPelRgt( y, height, fltSize ); |
1116 | 0 | if( ctuPosY == 0 ) recoBuf.extendBorderPelTop( xL, xR-xL, fltSize ); |
1117 | 0 | if( ctuPosY+1 == pcv.heightInCtus ) recoBuf.extendBorderPelBot( xL, xR-xL, fltSize ); |
1118 | |
|
1119 | 0 | encSlice->m_pALF->copyCTUforALF(cs, ctuPosX, ctuPosY); |
1120 | 0 | } |
1121 | | |
1122 | | // DMVR refinement can be stored now |
1123 | 0 | if( slice.sps->DMVR && !slice.picHeader->disDmvrFlag ) |
1124 | 0 | { |
1125 | 0 | CS::setRefinedMotionFieldCTU( cs, ctuPosX, ctuPosY ); |
1126 | 0 | } |
1127 | 0 | ITT_TASKEND( itt_domain_encode, itt_handle_sao ); |
1128 | |
|
1129 | 0 | const int tileCol = slice.pps->ctuToTileCol[ctuPosX]; |
1130 | 0 | const int lastCtuColInTileRow = slice.pps->tileColBd[tileCol] + slice.pps->tileColWidth[tileCol] - 1; |
1131 | 0 | if( ctuPosX == lastCtuColInTileRow ) |
1132 | 0 | { |
1133 | 0 | processStates[ctuRsAddr] = ALF_GET_STATISTICS; |
1134 | 0 | } |
1135 | 0 | else |
1136 | 0 | { |
1137 | 0 | processStates[ctuRsAddr] = PROCESS_DONE; |
1138 | 0 | return true; |
1139 | 0 | } |
1140 | 0 | } |
1141 | 0 | break; |
1142 | | |
1143 | 0 | case ALF_GET_STATISTICS: |
1144 | 0 | { |
1145 | | // ensure all surrounding ctu's are filtered (ALF will use pixels of adjacent CTU's) |
1146 | | // due to wpp condition above in SAO_FILTER, only right, bottom and bottom-right ctu have to be checked |
1147 | 0 | if( checkCtuTaskNbRgt ( pps, ctuPosX, ctuPosY, ctuRsAddr, processStates, SAO_FILTER ) ) return false; |
1148 | 0 | if( checkCtuTaskNbBot ( pps, ctuPosX, ctuPosY, ctuRsAddr, processStates, SAO_FILTER ) ) return false; |
1149 | 0 | if( checkCtuTaskNbBotRgt( pps, ctuPosX, ctuPosY, ctuRsAddr, processStates, SAO_FILTER ) ) return false; |
1150 | | |
1151 | 0 | if( checkReadyState ) |
1152 | 0 | return true; |
1153 | | |
1154 | 0 | ITT_TASKSTART( itt_domain_encode, itt_handle_alf_stat ); |
1155 | | |
1156 | | // ALF pre-processing |
1157 | 0 | if( slice.sps->alfEnabled ) |
1158 | 0 | { |
1159 | 0 | PROFILER_EXT_ACCUM_AND_START_NEW_SET( 1, _TPROF, P_ALF, &cs, CH_L ); |
1160 | 0 | PelUnitBuf recoBuf = cs.picture->getRecoBuf(); |
1161 | 0 | const int firstCtuInRow = ctuRsAddr + 1 - slice.pps->tileColWidth[slice.pps->ctuToTileCol[ctuPosX]]; |
1162 | 0 | for( int ctu = firstCtuInRow; ctu <= ctuRsAddr; ctu++ ) |
1163 | 0 | { |
1164 | 0 | encSlice->m_pALF->getStatisticsCTU( *cs.picture, cs, recoBuf, ctu, encSlice->m_ThreadRsrc[ threadIdx ]->m_alfTempCtuBuf ); |
1165 | 0 | } |
1166 | 0 | PROFILER_EXT_ACCUM_AND_START_NEW_SET( 1, _TPROF, P_IGNORE, &cs, CH_L ); |
1167 | 0 | } |
1168 | |
|
1169 | 0 | ITT_TASKEND( itt_domain_encode, itt_handle_alf_stat ); |
1170 | | |
1171 | | // start alf filter derivation either for a sub-set of CTUs (syncLines mode) or for the whole picture (regular mode) |
1172 | 0 | const unsigned deriveFilterCtu = encSlice->m_alfDeriveCtu; |
1173 | 0 | processStates[ctuRsAddr] = (ctuRsAddr < deriveFilterCtu) ? ALF_RECONSTRUCT: ALF_DERIVE_FILTER; |
1174 | 0 | } |
1175 | 0 | break; |
1176 | | |
1177 | 0 | case ALF_DERIVE_FILTER: |
1178 | 0 | { |
1179 | 0 | const unsigned deriveFilterCtu = encSlice->m_alfDeriveCtu; |
1180 | 0 | if( ctuRsAddr == deriveFilterCtu ) |
1181 | 0 | { |
1182 | | // ensure statistics from all previous ctu's have been collected |
1183 | 0 | int numCheckLines = deriveFilterCtu / pcv.widthInCtus + 1; |
1184 | 0 | for( int y = 0; y < numCheckLines; y++ ) |
1185 | 0 | { |
1186 | 0 | for( int tileCol = 0; tileCol < slice.pps->numTileCols; tileCol++ ) |
1187 | 0 | { |
1188 | 0 | const int lastCtuInTileRow = y * pcv.widthInCtus + slice.pps->tileColBd[tileCol] + slice.pps->tileColWidth[tileCol] - 1; |
1189 | 0 | if( processStates[lastCtuInTileRow] <= ALF_GET_STATISTICS ) |
1190 | 0 | return false; |
1191 | 0 | } |
1192 | 0 | } |
1193 | 0 | } |
1194 | 0 | else if( syncLines ) |
1195 | 0 | { |
1196 | | // ALF bitstream coding dependency for the sub-sequent ctu-lines |
1197 | 0 | if( processStates[deriveFilterCtu] < ALF_RECONSTRUCT || checkCtuTaskNbTop( pps, ctuPosX, ctuPosY, ctuRsAddr, processStates, ALF_DERIVE_FILTER ) ) |
1198 | 0 | return false; |
1199 | 0 | } |
1200 | 0 | if( checkReadyState ) |
1201 | 0 | return true; |
1202 | | |
1203 | 0 | ITT_TASKSTART( itt_domain_encode, itt_handle_alf_derive ); |
1204 | | // ALF post-processing |
1205 | 0 | if( slice.sps->alfEnabled ) |
1206 | 0 | { |
1207 | 0 | PROFILER_EXT_ACCUM_AND_START_NEW_SET( 1, _TPROF, P_ALF, &cs, CH_L ); |
1208 | 0 | if( ctuRsAddr == deriveFilterCtu ) |
1209 | 0 | { |
1210 | 0 | encSlice->m_pALF->initDerivation( slice ); |
1211 | 0 | encSlice->m_pALF->deriveFilter( *cs.picture, cs, slice.getLambdas(), deriveFilterCtu + 1 ); |
1212 | 0 | encSlice->m_pALF->reconstructCoeffAPSs( cs, cs.slice->alfEnabled[COMP_Y], cs.slice->alfEnabled[COMP_Cb] || cs.slice->alfEnabled[COMP_Cr], false ); |
1213 | 0 | } |
1214 | 0 | else if( syncLines ) |
1215 | 0 | { |
1216 | | // in sync lines mode: derive/select filter for the remaining lines |
1217 | 0 | TileLineEncRsrc* lineEncRsrc = encSlice->m_TileLineEncRsrc[ lineIdx ]; |
1218 | 0 | PerThreadRsrc* taskRsrc = encSlice->m_ThreadRsrc[ threadIdx ]; |
1219 | 0 | const int firstCtuInRow = ctuRsAddr + 1 - slice.pps->tileColWidth[slice.pps->ctuToTileCol[ctuPosX]]; |
1220 | 0 | for(int ctu = firstCtuInRow; ctu <= ctuRsAddr; ctu++) |
1221 | 0 | { |
1222 | 0 | encSlice->m_pALF->selectFilterForCTU( cs, &lineEncRsrc->m_AlfCABACEstimator, &taskRsrc->m_CtxCache, ctu ); |
1223 | 0 | } |
1224 | 0 | } |
1225 | 0 | PROFILER_EXT_ACCUM_AND_START_NEW_SET( 1, _TPROF, P_IGNORE, &cs, CH_L ); |
1226 | 0 | } |
1227 | |
|
1228 | 0 | ITT_TASKEND( itt_domain_encode, itt_handle_alf_derive ); |
1229 | 0 | processStates[ ctuRsAddr ] = ALF_RECONSTRUCT; |
1230 | 0 | } |
1231 | 0 | break; |
1232 | | |
1233 | 0 | case ALF_RECONSTRUCT: |
1234 | 0 | { |
1235 | | // start alf filter derivation either for a sub-set of CTUs (syncLines mode) or for the whole picture (regular mode) |
1236 | 0 | const unsigned deriveFilterCtu = encSlice->m_alfDeriveCtu; |
1237 | 0 | if( processStates[deriveFilterCtu] < ALF_RECONSTRUCT ) |
1238 | 0 | return false; |
1239 | 0 | else if( syncLines && ctuRsAddr > deriveFilterCtu && encSlice->m_pALF->getAsuHeightInCtus() > 1 ) |
1240 | 0 | { |
1241 | 0 | const int asuHeightInCtus = encSlice->m_pALF->getAsuHeightInCtus(); |
1242 | 0 | const int botCtuLineInAsu = std::min( (( ctuPosY & ( ~(asuHeightInCtus - 1) ) ) + asuHeightInCtus - 1), (int)pcv.heightInCtus - 1 ); |
1243 | 0 | if( processStates[botCtuLineInAsu * ctuStride + ctuPosX] < ALF_RECONSTRUCT ) |
1244 | 0 | return false; |
1245 | 0 | } |
1246 | | |
1247 | 0 | if( checkReadyState ) |
1248 | 0 | return true; |
1249 | | |
1250 | 0 | ITT_TASKSTART( itt_domain_encode, itt_handle_alf_recon ); |
1251 | |
|
1252 | 0 | if( slice.sps->alfEnabled ) |
1253 | 0 | { |
1254 | 0 | PROFILER_EXT_ACCUM_AND_START_NEW_SET( 1, _TPROF, P_ALF, &cs, CH_L ); |
1255 | 0 | const int firstCtuInRow = ctuRsAddr + 1 - slice.pps->tileColWidth[slice.pps->ctuToTileCol[ctuPosX]]; |
1256 | 0 | for( int ctu = firstCtuInRow; ctu <= ctuRsAddr; ctu++ ) |
1257 | 0 | { |
1258 | 0 | encSlice->m_pALF->reconstructCTU_MT( *cs.picture, cs, ctu, encSlice->m_ThreadRsrc[ threadIdx ]->m_alfTempCtuBuf ); |
1259 | 0 | } |
1260 | 0 | PROFILER_EXT_ACCUM_AND_START_NEW_SET( 1, _TPROF, P_IGNORE, &cs, CH_L ); |
1261 | 0 | } |
1262 | |
|
1263 | 0 | ITT_TASKEND( itt_domain_encode, itt_handle_alf_recon ); |
1264 | 0 | processStates[ctuRsAddr] = CCALF_GET_STATISTICS; |
1265 | 0 | } |
1266 | | // dont break, no additional deps, can continue straigt away! |
1267 | | //break; |
1268 | | |
1269 | 0 | case CCALF_GET_STATISTICS: |
1270 | 0 | { |
1271 | 0 | if( checkCtuTaskNbTop ( pps, ctuPosX, ctuPosY, ctuRsAddr, processStates, ALF_RECONSTRUCT ) ) return false; |
1272 | 0 | if( checkCtuTaskNbBot ( pps, ctuPosX, ctuPosY, ctuRsAddr, processStates, ALF_RECONSTRUCT ) ) return false; |
1273 | | |
1274 | 0 | if( checkReadyState ) |
1275 | 0 | return true; |
1276 | | |
1277 | 0 | ITT_TASKSTART( itt_domain_encode, itt_handle_ccalf_stat ); |
1278 | | |
1279 | | // ALF pre-processing |
1280 | 0 | if( slice.sps->ccalfEnabled ) |
1281 | 0 | { |
1282 | 0 | PROFILER_EXT_ACCUM_AND_START_NEW_SET( 1, _TPROF, P_ALF, &cs, CH_L); |
1283 | 0 | const int firstCtuInRow = ctuRsAddr + 1 - slice.pps->tileColWidth[slice.pps->ctuToTileCol[ctuPosX]]; |
1284 | 0 | for( int ctu = firstCtuInRow; ctu <= ctuRsAddr; ctu++ ) |
1285 | 0 | { |
1286 | 0 | encSlice->m_pALF->deriveStatsForCcAlfFilteringCTU( cs, COMP_Cb, ctu, encSlice->m_ThreadRsrc[ threadIdx ]->m_alfTempCtuBuf ); |
1287 | 0 | encSlice->m_pALF->deriveStatsForCcAlfFilteringCTU( cs, COMP_Cr, ctu, encSlice->m_ThreadRsrc[ threadIdx ]->m_alfTempCtuBuf ); |
1288 | 0 | } |
1289 | 0 | PROFILER_EXT_ACCUM_AND_START_NEW_SET( 1, _TPROF, P_IGNORE, &cs, CH_L ); |
1290 | 0 | } |
1291 | |
|
1292 | 0 | ITT_TASKEND( itt_domain_encode, itt_handle_ccalf_stat ); |
1293 | | |
1294 | | // start alf filter derivation either for a sub-set of CTUs (syncLines mode) or for the whole picture (regular mode) |
1295 | 0 | processStates[ctuRsAddr] = (ctuRsAddr < encSlice->m_ccalfDeriveCtu) ? CCALF_RECONSTRUCT: CCALF_DERIVE_FILTER; |
1296 | 0 | } |
1297 | 0 | break; |
1298 | | |
1299 | 0 | case CCALF_DERIVE_FILTER: |
1300 | 0 | { |
1301 | | // synchronization dependencies |
1302 | 0 | const unsigned deriveFilterCtu = encSlice->m_ccalfDeriveCtu; |
1303 | 0 | if( ctuRsAddr == deriveFilterCtu ) |
1304 | 0 | { |
1305 | | // ensure statistics from all previous ctu's have been collected |
1306 | 0 | int numCheckLines = deriveFilterCtu / pcv.widthInCtus + 1; |
1307 | 0 | for( int y = 0; y < numCheckLines; y++ ) |
1308 | 0 | { |
1309 | 0 | for( int tileCol = 0; tileCol < slice.pps->numTileCols; tileCol++ ) |
1310 | 0 | { |
1311 | 0 | const int lastCtuInTileRow = y * pcv.widthInCtus + slice.pps->tileColBd[tileCol] + slice.pps->tileColWidth[tileCol] - 1; |
1312 | 0 | if( processStates[lastCtuInTileRow] <= CCALF_GET_STATISTICS ) |
1313 | 0 | return false; |
1314 | 0 | } |
1315 | 0 | } |
1316 | 0 | } |
1317 | 0 | else if( syncLines ) |
1318 | 0 | { |
1319 | | // ALF bitstream coding dependency for the sub-sequent CTU-lines |
1320 | 0 | if( processStates[deriveFilterCtu] < CCALF_RECONSTRUCT || checkCtuTaskNbTop( pps, ctuPosX, ctuPosY, ctuRsAddr, processStates, CCALF_DERIVE_FILTER ) ) |
1321 | 0 | return false; |
1322 | 0 | } |
1323 | 0 | if( checkReadyState ) |
1324 | 0 | return true; |
1325 | | |
1326 | 0 | ITT_TASKSTART( itt_domain_encode, itt_handle_ccalf_derive ); |
1327 | | |
1328 | | // start task |
1329 | 0 | if( slice.sps->ccalfEnabled ) |
1330 | 0 | { |
1331 | 0 | if( ctuRsAddr == deriveFilterCtu ) |
1332 | 0 | { |
1333 | 0 | encSlice->m_pALF->deriveCcAlfFilter( *cs.picture, cs, encSlice->m_ccalfDeriveCtu + 1 ); |
1334 | 0 | } |
1335 | 0 | else if( syncLines ) |
1336 | 0 | { |
1337 | | // in sync lines mode: derive/select filter for the remaining lines |
1338 | 0 | TileLineEncRsrc* lineEncRsrc = encSlice->m_TileLineEncRsrc[ lineIdx ]; |
1339 | 0 | PerThreadRsrc* taskRsrc = encSlice->m_ThreadRsrc[ threadIdx ]; |
1340 | 0 | const int firstCtuInRow = ctuRsAddr + 1 - slice.pps->tileColWidth[slice.pps->ctuToTileCol[ctuPosX]]; |
1341 | 0 | encSlice->m_pALF->selectCcAlfFilterForCtuLine( cs, COMP_Cb, cs.getRecoBuf(), &lineEncRsrc->m_AlfCABACEstimator, &taskRsrc->m_CtxCache, firstCtuInRow, ctuRsAddr ); |
1342 | 0 | encSlice->m_pALF->selectCcAlfFilterForCtuLine( cs, COMP_Cr, cs.getRecoBuf(), &lineEncRsrc->m_AlfCABACEstimator, &taskRsrc->m_CtxCache, firstCtuInRow, ctuRsAddr ); |
1343 | 0 | } |
1344 | 0 | } |
1345 | 0 | ITT_TASKEND( itt_domain_encode, itt_handle_ccalf_derive ); |
1346 | |
|
1347 | 0 | processStates[ctuRsAddr] = CCALF_RECONSTRUCT; |
1348 | 0 | } |
1349 | 0 | break; |
1350 | | |
1351 | 0 | case CCALF_RECONSTRUCT: |
1352 | 0 | { |
1353 | | // start ccalf filter derivation either for a sub-set of CTUs (syncLines mode) or for the whole picture (regular mode) |
1354 | 0 | const unsigned deriveFilterCtu = encSlice->m_ccalfDeriveCtu; |
1355 | 0 | if( processStates[deriveFilterCtu] < CCALF_RECONSTRUCT ) |
1356 | 0 | return false; |
1357 | | |
1358 | 0 | if( syncLines ) |
1359 | 0 | { |
1360 | | // ensure line-by-line reconstruction due to line synchronization |
1361 | 0 | if( checkCtuTaskNbTop( pps, ctuPosX, ctuPosY, ctuRsAddr, processStates, CCALF_RECONSTRUCT ) ) return false; |
1362 | | // check bottom due to rec. buffer usage in ccalf statistics |
1363 | 0 | if( checkCtuTaskNbBot( pps, ctuPosX, ctuPosY, ctuRsAddr, processStates, CCALF_GET_STATISTICS ) ) return false; |
1364 | 0 | } |
1365 | | |
1366 | 0 | if( checkReadyState ) |
1367 | 0 | return true; |
1368 | | |
1369 | 0 | ITT_TASKSTART( itt_domain_encode, itt_handle_ccalf_recon ); |
1370 | |
|
1371 | 0 | if( slice.sps->ccalfEnabled ) |
1372 | 0 | { |
1373 | 0 | const int firstCtuInRow = ctuRsAddr + 1 - slice.pps->tileColWidth[slice.pps->ctuToTileCol[ctuPosX]]; |
1374 | 0 | for( int ctu = firstCtuInRow; ctu <= ctuRsAddr; ctu++ ) |
1375 | 0 | { |
1376 | 0 | encSlice->m_pALF->applyCcAlfFilterCTU( cs, COMP_Cb, ctu, encSlice->m_ThreadRsrc[ threadIdx ]->m_alfTempCtuBuf ); |
1377 | 0 | encSlice->m_pALF->applyCcAlfFilterCTU( cs, COMP_Cr, ctu, encSlice->m_ThreadRsrc[ threadIdx ]->m_alfTempCtuBuf ); |
1378 | 0 | } |
1379 | 0 | } |
1380 | |
|
1381 | 0 | ITT_TASKEND( itt_domain_encode, itt_handle_ccalf_recon ); |
1382 | | |
1383 | | // extend pic border |
1384 | | // CCALF reconstruction stage is done per tile, ensure that all tiles in current CTU row are done |
1385 | 0 | if( ++(pic->m_tileColsDone->at(ctuPosY)) >= pps.numTileCols ) |
1386 | 0 | { |
1387 | 0 | PelUnitBuf recoBuf = cs.picture->getRecoBuf(); |
1388 | 0 | const int margin = cs.picture->margin; |
1389 | 0 | recoBuf.extendBorderPelLft( y, height, margin ); |
1390 | 0 | recoBuf.extendBorderPelRgt( y, height, margin ); |
1391 | 0 | if(ctuPosY == 0) |
1392 | 0 | recoBuf.extendBorderPelTop( -margin, pcv.lumaWidth + 2 * margin, margin ); |
1393 | 0 | if(ctuPosY + 1 == pcv.heightInCtus) |
1394 | 0 | recoBuf.extendBorderPelBot( -margin, pcv.lumaWidth + 2 * margin, margin ); |
1395 | | |
1396 | | // for IFP lines synchro, do an additional increment signaling that CTU row is ready |
1397 | 0 | if( syncLines ) |
1398 | 0 | ++(pic->m_tileColsDone->at( ctuPosY )); |
1399 | 0 | } |
1400 | | |
1401 | | // perform finish only once for whole picture |
1402 | 0 | const unsigned finishCtu = pcv.sizeInCtus - 1; |
1403 | 0 | if( ctuRsAddr < finishCtu ) |
1404 | 0 | { |
1405 | 0 | processStates[ctuRsAddr] = PROCESS_DONE; |
1406 | | // processing done => terminate thread |
1407 | 0 | return true; |
1408 | 0 | } |
1409 | 0 | processStates[ctuRsAddr] = FINISH_SLICE; |
1410 | 0 | } |
1411 | | |
1412 | 0 | case FINISH_SLICE: |
1413 | 0 | { |
1414 | 0 | CHECK( ctuRsAddr != pcv.sizeInCtus - 1, "invalid state, finish slice only once for last ctu" ); |
1415 | | |
1416 | | // ensure all coding tasks have been done for all previous ctu's |
1417 | 0 | for( int i = 0; i < ctuRsAddr; i++ ) |
1418 | 0 | if( processStates[ i ] < FINISH_SLICE ) |
1419 | 0 | return false; |
1420 | | |
1421 | 0 | if( checkReadyState ) |
1422 | 0 | return true; |
1423 | | |
1424 | 0 | encSlice->finishCompressSlice( cs.picture, slice ); |
1425 | |
|
1426 | 0 | processStates[ ctuRsAddr ] = PROCESS_DONE; |
1427 | | // processing done => terminate thread |
1428 | 0 | return true; |
1429 | 0 | } |
1430 | | |
1431 | 0 | case PROCESS_DONE: |
1432 | 0 | CHECK( true, "process state is PROCESS_DONE, but thread is still running" ); |
1433 | 0 | return true; |
1434 | | |
1435 | 0 | default: |
1436 | 0 | CHECK( true, "unknown process state" ); |
1437 | 0 | return true; |
1438 | 0 | } |
1439 | | |
1440 | 0 | return false; |
1441 | 0 | } Unexecuted instantiation: bool vvenc::EncSlice::xProcessCtuTask<false>(int, vvenc::CtuEncParam*) Unexecuted instantiation: bool vvenc::EncSlice::xProcessCtuTask<true>(int, vvenc::CtuEncParam*) |
1442 | | |
1443 | | void EncSlice::encodeSliceData( Picture* pic ) |
1444 | 0 | { |
1445 | 0 | CodingStructure& cs = *pic->cs; |
1446 | 0 | Slice* const slice = cs.slice; |
1447 | 0 | const uint32_t startCtuTsAddr = slice->sliceMap.ctuAddrInSlice[0]; |
1448 | 0 | const uint32_t boundingCtuTsAddr = cs.pcv->sizeInCtus; |
1449 | 0 | const bool wavefrontsEnabled = slice->sps->entropyCodingSyncEnabled; |
1450 | | |
1451 | | // this ensures that independently encoded bitstream chunks can be combined to bit-equal |
1452 | 0 | const SliceType cabacTableIdx = ! slice->pps->cabacInitPresent || slice->pendingRasInit ? slice->sliceType : m_encCABACTableIdx; |
1453 | 0 | slice->encCABACTableIdx = cabacTableIdx; |
1454 | | |
1455 | | // initialise entropy coder for the slice |
1456 | 0 | m_CABACWriter.initCtxModels( *slice ); |
1457 | |
|
1458 | 0 | DTRACE( g_trace_ctx, D_HEADER, "=========== POC: %d ===========\n", slice->poc ); |
1459 | |
|
1460 | 0 | int prevQP[MAX_NUM_CH]; |
1461 | 0 | prevQP[0] = prevQP[1] = slice->sliceQp; |
1462 | |
|
1463 | 0 | const PreCalcValues& pcv = *cs.pcv; |
1464 | 0 | const uint32_t widthInCtus = pcv.widthInCtus; |
1465 | 0 | uint32_t uiSubStrm = 0; |
1466 | 0 | const int numSubstreamsColumns = slice->pps->numTileCols; |
1467 | 0 | const int numSubstreamRows = slice->sps->entropyCodingSyncEnabled ? pic->cs->pcv->heightInCtus : slice->pps->numTileRows; |
1468 | 0 | const int numSubstreams = std::max<int>( numSubstreamRows * numSubstreamsColumns, 0/*(int)pic->brickMap->bricks.size()*/ ); |
1469 | 0 | std::vector<OutputBitstream> substreamsOut( numSubstreams ); |
1470 | |
|
1471 | 0 | slice->clearSubstreamSizes(); |
1472 | |
|
1473 | 0 | for( uint32_t ctuTsAddr = startCtuTsAddr; ctuTsAddr < boundingCtuTsAddr; ctuTsAddr++ ) |
1474 | 0 | { |
1475 | 0 | const uint32_t ctuRsAddr = slice->sliceMap.ctuAddrInSlice[ctuTsAddr]; |
1476 | 0 | const uint32_t ctuXPosInCtus = ctuRsAddr % widthInCtus; |
1477 | 0 | const uint32_t ctuYPosInCtus = ctuRsAddr / widthInCtus; |
1478 | 0 | const uint32_t tileXPosInCtus = slice->pps->tileColBd[cs.pps->ctuToTileCol[ctuXPosInCtus]]; |
1479 | 0 | const uint32_t tileYPosInCtus = slice->pps->tileRowBd[cs.pps->ctuToTileRow[ctuYPosInCtus]]; |
1480 | |
|
1481 | 0 | DTRACE_UPDATE( g_trace_ctx, std::make_pair( "ctu", ctuRsAddr ) ); |
1482 | |
|
1483 | 0 | const Position pos (ctuXPosInCtus * pcv.maxCUSize, ctuYPosInCtus * pcv.maxCUSize); |
1484 | 0 | const UnitArea ctuArea (cs.area.chromaFormat, Area(pos.x, pos.y, pcv.maxCUSize, pcv.maxCUSize)); |
1485 | 0 | CHECK( uiSubStrm >= numSubstreams, "array index out of bounds" ); |
1486 | 0 | m_CABACWriter.initBitstream( &substreamsOut[ uiSubStrm ] ); |
1487 | | |
1488 | | // set up CABAC contexts' state for this CTU |
1489 | 0 | if (ctuXPosInCtus == tileXPosInCtus && ctuYPosInCtus == tileYPosInCtus ) |
1490 | 0 | { |
1491 | 0 | if (ctuTsAddr != startCtuTsAddr) // if it is the first CTU, then the entropy coder has already been reset |
1492 | 0 | { |
1493 | 0 | m_CABACWriter.initCtxModels( *slice ); |
1494 | 0 | } |
1495 | 0 | prevQP[0] = prevQP[1] = slice->sliceQp; |
1496 | 0 | } |
1497 | 0 | else if (ctuXPosInCtus == tileXPosInCtus && wavefrontsEnabled) |
1498 | 0 | { |
1499 | | // Synchronize cabac probabilities with upper-right CTU if it's available and at the start of a line. |
1500 | 0 | if (ctuTsAddr != startCtuTsAddr) // if it is the first CTU, then the entropy coder has already been reset |
1501 | 0 | { |
1502 | 0 | m_CABACWriter.initCtxModels( *slice ); |
1503 | 0 | } |
1504 | 0 | if( cs.getCURestricted( pos.offset( 0, -1 ), pos, slice->independentSliceIdx, slice->pps->getTileIdx( ctuXPosInCtus, ctuYPosInCtus ), CH_L, TREE_D ) ) |
1505 | 0 | { |
1506 | | // Top-right is available, so use it. |
1507 | 0 | m_CABACWriter.getCtx() = m_entropyCodingSyncContextState; |
1508 | 0 | } |
1509 | 0 | prevQP[0] = prevQP[1] = slice->sliceQp; |
1510 | 0 | } |
1511 | |
|
1512 | 0 | m_CABACWriter.coding_tree_unit( cs, ctuArea, prevQP, ctuRsAddr ); |
1513 | | |
1514 | | // store probabilities of second CTU in line into buffer |
1515 | 0 | if( ctuXPosInCtus == tileXPosInCtus && wavefrontsEnabled ) |
1516 | 0 | { |
1517 | 0 | m_entropyCodingSyncContextState = m_CABACWriter.getCtx(); |
1518 | 0 | } |
1519 | | |
1520 | | // terminate the sub-stream, if required (end of slice-segment, end of tile, end of wavefront-CTU-row): |
1521 | 0 | bool isMoreCTUsinSlice = ctuTsAddr != (boundingCtuTsAddr - 1); |
1522 | 0 | bool isLastCTUinTile = isMoreCTUsinSlice && slice->pps->getTileIdx( ctuRsAddr ) != slice->pps->getTileIdx( slice->sliceMap.ctuAddrInSlice[ctuTsAddr+1] ); |
1523 | 0 | bool isLastCTUinWPP = wavefrontsEnabled && isMoreCTUsinSlice && !isLastCTUinTile && ( (slice->sliceMap.ctuAddrInSlice[ctuTsAddr+1] % widthInCtus) == cs.pps->tileColBd[cs.pps->ctuToTileCol[slice->sliceMap.ctuAddrInSlice[ctuTsAddr+1] % widthInCtus]] ); //TODO: adjust tile bound condition |
1524 | |
|
1525 | 0 | if (isLastCTUinWPP || !isMoreCTUsinSlice || isLastCTUinTile ) // this the the last CTU of either tile/brick/WPP/slice |
1526 | 0 | { |
1527 | 0 | m_CABACWriter.end_of_slice(); |
1528 | | |
1529 | | // Byte-alignment in slice_data() when new tile |
1530 | 0 | substreamsOut[ uiSubStrm ].writeByteAlignment(); |
1531 | |
|
1532 | 0 | if (isMoreCTUsinSlice) //Byte alignment only when it is not the last substream in the slice |
1533 | 0 | { |
1534 | | // write sub-stream size |
1535 | 0 | slice->addSubstreamSize( ( substreamsOut[ uiSubStrm ].getNumberOfWrittenBits() >> 3 ) + substreamsOut[ uiSubStrm ].countStartCodeEmulations() ); |
1536 | 0 | } |
1537 | 0 | uiSubStrm++; |
1538 | 0 | } |
1539 | 0 | } // CTU-loop |
1540 | | |
1541 | 0 | if(slice->pps->cabacInitPresent) |
1542 | 0 | { |
1543 | 0 | m_encCABACTableIdx = m_CABACWriter.getCtxInitId( *slice ); |
1544 | 0 | } |
1545 | 0 | else |
1546 | 0 | { |
1547 | 0 | m_encCABACTableIdx = slice->sliceType; |
1548 | 0 | } |
1549 | | |
1550 | | // concatenate substreams |
1551 | 0 | OutputBitstream& outStream = pic->sliceDataStreams[ 0/*slice->sliceIdx*/ ]; |
1552 | 0 | for ( int i = 0; i < slice->getNumberOfSubstreamSizes() + 1; i++ ) |
1553 | 0 | { |
1554 | 0 | outStream.addSubstream( &(substreamsOut[ i ]) ); |
1555 | 0 | } |
1556 | 0 | pic->sliceDataNumBins += m_CABACWriter.getNumBins(); |
1557 | 0 | } |
1558 | | |
1559 | | } // namespace vvenc |
1560 | | |
1561 | | //! \} |
1562 | | |