/src/vvenc/source/Lib/EncoderLib/IntraSearch.cpp
Line | Count | Source |
1 | | /* ----------------------------------------------------------------------------- |
2 | | The copyright in this software is being made available under the Clear BSD |
3 | | License, included below. No patent rights, trademark rights and/or |
4 | | other Intellectual Property Rights other than the copyrights concerning |
5 | | the Software are granted under this license. |
6 | | |
7 | | The Clear BSD License |
8 | | |
9 | | Copyright (c) 2019-2026, Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. & The VVenC Authors. |
10 | | All rights reserved. |
11 | | |
12 | | Redistribution and use in source and binary forms, with or without modification, |
13 | | are permitted (subject to the limitations in the disclaimer below) provided that |
14 | | the following conditions are met: |
15 | | |
16 | | * Redistributions of source code must retain the above copyright notice, |
17 | | this list of conditions and the following disclaimer. |
18 | | |
19 | | * Redistributions in binary form must reproduce the above copyright |
20 | | notice, this list of conditions and the following disclaimer in the |
21 | | documentation and/or other materials provided with the distribution. |
22 | | |
23 | | * Neither the name of the copyright holder nor the names of its |
24 | | contributors may be used to endorse or promote products derived from this |
25 | | software without specific prior written permission. |
26 | | |
27 | | NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY |
28 | | THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND |
29 | | CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
30 | | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A |
31 | | PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR |
32 | | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
33 | | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
34 | | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR |
35 | | BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER |
36 | | IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
37 | | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
38 | | POSSIBILITY OF SUCH DAMAGE. |
39 | | |
40 | | |
41 | | ------------------------------------------------------------------------------------------- */ |
42 | | |
43 | | |
44 | | /** \file EncSearch.cpp |
45 | | * \brief encoder intra search class |
46 | | */ |
47 | | |
48 | | #include "IntraSearch.h" |
49 | | #include "EncPicture.h" |
50 | | #include "CommonLib/CommonDef.h" |
51 | | #include "CommonLib/Rom.h" |
52 | | #include "CommonLib/Picture.h" |
53 | | #include "CommonLib/UnitTools.h" |
54 | | #include "CommonLib/dtrace_next.h" |
55 | | #include "CommonLib/dtrace_buffer.h" |
56 | | #include "CommonLib/Reshape.h" |
57 | | #include <math.h> |
58 | | #include "vvenc/vvencCfg.h" |
59 | | |
60 | | //! \ingroup EncoderLib |
61 | | //! \{ |
62 | | |
63 | | namespace vvenc { |
64 | | |
65 | | #define PLTCtx(c) SubCtx( Ctx::Palette, c ) |
66 | | |
67 | | IntraSearch::IntraSearch() |
68 | 0 | : m_pSaveCS (nullptr) |
69 | 0 | , m_pcEncCfg (nullptr) |
70 | 0 | , m_pcTrQuant (nullptr) |
71 | 0 | , m_pcRdCost (nullptr) |
72 | 0 | , m_CABACEstimator(nullptr) |
73 | 0 | , m_CtxCache (nullptr) |
74 | 0 | { |
75 | 0 | } |
76 | | |
77 | | void IntraSearch::init(const VVEncCfg &encCfg, TrQuant *pTrQuant, RdCost *pRdCost, SortedPelUnitBufs<SORTED_BUFS> *pSortedPelUnitBufs, XUCache &unitCache ) |
78 | 0 | { |
79 | 0 | IntraPrediction::init( encCfg.m_internChromaFormat, encCfg.m_internalBitDepth[ CH_L ] ); |
80 | |
|
81 | 0 | m_pcEncCfg = &encCfg; |
82 | 0 | m_pcTrQuant = pTrQuant; |
83 | 0 | m_pcRdCost = pRdCost; |
84 | 0 | m_SortedPelUnitBufs = pSortedPelUnitBufs; |
85 | |
|
86 | 0 | const ChromaFormat chrFormat = encCfg.m_internChromaFormat; |
87 | 0 | const int maxCUSize = encCfg.m_CTUSize; |
88 | |
|
89 | 0 | Area area = Area( 0, 0, maxCUSize, maxCUSize ); |
90 | |
|
91 | 0 | m_pTempCS = new CodingStructure( unitCache, nullptr ); |
92 | 0 | m_pBestCS = new CodingStructure( unitCache, nullptr ); |
93 | |
|
94 | 0 | m_pTempCS->createForSearch( chrFormat, area ); |
95 | 0 | m_pBestCS->createForSearch( chrFormat, area ); |
96 | |
|
97 | 0 | const int uiNumSaveLayersToAllocate = 3; |
98 | 0 | m_pSaveCS = new CodingStructure*[uiNumSaveLayersToAllocate]; |
99 | 0 | for( int layer = 0; layer < uiNumSaveLayersToAllocate; layer++ ) |
100 | 0 | { |
101 | 0 | m_pSaveCS[ layer ] = new CodingStructure( unitCache, nullptr ); |
102 | 0 | m_pSaveCS[ layer ]->createForSearch( chrFormat, Area( 0, 0, maxCUSize, maxCUSize ) ); |
103 | 0 | m_pSaveCS[ layer ]->initStructData(); |
104 | 0 | } |
105 | |
|
106 | 0 | CompArea chromaArea( COMP_Cb, chrFormat, area, true ); |
107 | 0 | for( int i = 0; i < 5; i++ ) |
108 | 0 | { |
109 | 0 | m_orgResiCb[i].create( chromaArea ); |
110 | 0 | m_orgResiCr[i].create( chromaArea ); |
111 | 0 | } |
112 | 0 | } |
113 | | |
114 | | void IntraSearch::destroy() |
115 | 0 | { |
116 | 0 | if ( m_pSaveCS ) |
117 | 0 | { |
118 | 0 | const int uiNumSaveLayersToAllocate = 3; |
119 | 0 | for( int layer = 0; layer < uiNumSaveLayersToAllocate; layer++ ) |
120 | 0 | { |
121 | 0 | if ( m_pSaveCS[ layer ] ) { m_pSaveCS[ layer ]->destroy(); delete m_pSaveCS[ layer ]; } |
122 | 0 | } |
123 | 0 | delete[] m_pSaveCS; |
124 | 0 | m_pSaveCS = nullptr; |
125 | 0 | } |
126 | |
|
127 | 0 | if( m_pTempCS ) |
128 | 0 | { |
129 | 0 | m_pTempCS->destroy(); |
130 | 0 | delete m_pTempCS; m_pTempCS = nullptr; |
131 | 0 | } |
132 | |
|
133 | 0 | if( m_pBestCS ) |
134 | 0 | { |
135 | 0 | m_pBestCS->destroy(); |
136 | 0 | delete m_pBestCS; m_pBestCS = nullptr; |
137 | 0 | } |
138 | 0 | } |
139 | | |
140 | | IntraSearch::~IntraSearch() |
141 | 0 | { |
142 | 0 | destroy(); |
143 | 0 | } |
144 | | |
145 | | void IntraSearch::setCtuEncRsrc( CABACWriter* cabacEstimator, CtxCache *ctxCache ) |
146 | 0 | { |
147 | 0 | m_CABACEstimator = cabacEstimator; |
148 | 0 | m_CtxCache = ctxCache; |
149 | 0 | } |
150 | | |
151 | | ////////////////////////////////////////////////////////////////////////// |
152 | | // INTRA PREDICTION |
153 | | ////////////////////////////////////////////////////////////////////////// |
154 | | static constexpr double COST_UNKNOWN = -65536.0; |
155 | | |
156 | | double IntraSearch::xFindInterCUCost( CodingUnit &cu ) |
157 | 0 | { |
158 | 0 | if( CU::isConsIntra(cu) && !cu.slice->isIntra() ) |
159 | 0 | { |
160 | | //search corresponding inter CU cost |
161 | 0 | for( int i = 0; i < m_numCuInSCIPU; i++ ) |
162 | 0 | { |
163 | 0 | if( cu.lumaPos() == m_cuAreaInSCIPU[i].pos() && cu.lumaSize() == m_cuAreaInSCIPU[i].size() ) |
164 | 0 | { |
165 | 0 | return m_cuCostInSCIPU[i]; |
166 | 0 | } |
167 | 0 | } |
168 | 0 | } |
169 | 0 | return COST_UNKNOWN; |
170 | 0 | } |
171 | | |
172 | | void IntraSearch::xEstimateLumaRdModeList(int& numModesForFullRD, |
173 | | static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM>& RdModeList, |
174 | | static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM>& HadModeList, |
175 | | static_vector<double, FAST_UDI_MAX_RDMODE_NUM>& CandCostList, |
176 | | static_vector<double, FAST_UDI_MAX_RDMODE_NUM>& CandHadList, CodingUnit& cu, bool testMip ) |
177 | 0 | { |
178 | 0 | PROFILER_SCOPE_AND_STAGE_EXT( 1, _TPROF, P_INTRA_EST_RD_CAND, cu.cs, CH_L ); |
179 | 0 | const uint16_t intra_ctx_size = Ctx::IntraLumaMpmFlag.size() + Ctx::IntraLumaPlanarFlag.size() + Ctx::MultiRefLineIdx.size() + Ctx::ISPMode.size() + Ctx::MipFlag.size(); |
180 | 0 | const TempCtx ctxStartIntraCtx(m_CtxCache, SubCtx(CtxSet(Ctx::IntraLumaMpmFlag(), intra_ctx_size), m_CABACEstimator->getCtx())); |
181 | 0 | const double sqrtLambdaForFirstPass = m_pcRdCost->getMotionLambda() * FRAC_BITS_SCALE; |
182 | 0 | const int numModesAvailable = NUM_LUMA_MODE; // total number of Intra modes |
183 | |
|
184 | 0 | CHECK(numModesForFullRD >= numModesAvailable, "Too many modes for full RD search"); |
185 | |
|
186 | 0 | const SPS& sps = *cu.cs->sps; |
187 | 0 | const bool fastMip = sps.MIP && m_pcEncCfg->m_useFastMIP; |
188 | | |
189 | | // this should always be true |
190 | 0 | CHECK( !cu.Y().valid(), "CU is not valid" ); |
191 | |
|
192 | 0 | const CompArea& area = cu.Y(); |
193 | |
|
194 | 0 | const UnitArea localUnitArea(area.chromaFormat, Area(0, 0, area.width, area.height)); |
195 | 0 | if( testMip) |
196 | 0 | { |
197 | 0 | numModesForFullRD += fastMip ? numModesForFullRD - std::min( m_pcEncCfg->m_useFastMIP, numModesForFullRD ) |
198 | 0 | : numModesForFullRD; |
199 | 0 | m_SortedPelUnitBufs->prepare( localUnitArea, numModesForFullRD + 1 ); |
200 | 0 | } |
201 | 0 | else |
202 | 0 | { |
203 | 0 | m_SortedPelUnitBufs->prepare( localUnitArea, numModesForFullRD ); |
204 | 0 | } |
205 | |
|
206 | 0 | CPelBuf piOrg = cu.cs->getOrgBuf(COMP_Y); |
207 | 0 | PelBuf piPred = m_SortedPelUnitBufs->getTestBuf(COMP_Y); |
208 | |
|
209 | 0 | const ReshapeData& reshapeData = cu.cs->picture->reshapeData; |
210 | 0 | if (cu.cs->picHeader->lmcsEnabled && reshapeData.getCTUFlag()) |
211 | 0 | { |
212 | 0 | piOrg = cu.cs->getRspOrgBuf(); |
213 | 0 | } |
214 | 0 | DistParam distParam = m_pcRdCost->setDistParam( piOrg, piPred, sps.bitDepths[ CH_L ], DF_HAD_2SAD); // Use HAD (SATD) cost |
215 | |
|
216 | 0 | const int numHadCand = (testMip ? 2 : 1) * 3; |
217 | | |
218 | | //*** Derive (regular) candidates using Hadamard |
219 | 0 | cu.mipFlag = false; |
220 | 0 | cu.multiRefIdx = 0; |
221 | | |
222 | | //===== init pattern for luma prediction ===== |
223 | 0 | initIntraPatternChType(cu, cu.Y(), true); |
224 | |
|
225 | 0 | bool satdChecked[NUM_INTRA_MODE] = { false }; |
226 | |
|
227 | 0 | unsigned mpmLst[NUM_MOST_PROBABLE_MODES]; |
228 | 0 | CU::getIntraMPMs(cu, mpmLst); |
229 | |
|
230 | 0 | const int decMsk = ( 1 << m_pcEncCfg->m_IntraEstDecBit ) - 1; |
231 | |
|
232 | 0 | m_parentCandList.resize( 0 ); |
233 | 0 | m_parentCandList.reserve( ( numModesAvailable >> m_pcEncCfg->m_IntraEstDecBit ) + 2 ); |
234 | |
|
235 | 0 | for( unsigned mode = 0; mode < numModesAvailable; mode++ ) |
236 | 0 | { |
237 | | // Skip checking extended Angular modes in the first round of SATD |
238 | 0 | if( mode > DC_IDX && ( mode & decMsk ) ) |
239 | 0 | { |
240 | 0 | continue; |
241 | 0 | } |
242 | | |
243 | 0 | m_parentCandList.push_back( ModeInfo( false, false, 0, NOT_INTRA_SUBPARTITIONS, mode ) ); |
244 | 0 | } |
245 | | |
246 | 0 | for( int decDst = 1 << m_pcEncCfg->m_IntraEstDecBit; decDst > 0; decDst >>= 1 ) |
247 | 0 | { |
248 | 0 | for( unsigned idx = 0; idx < m_parentCandList.size(); idx++ ) |
249 | 0 | { |
250 | 0 | int modeParent = m_parentCandList[idx].modeId; |
251 | |
|
252 | 0 | int off = decDst & decMsk; |
253 | 0 | int inc = decDst << 1; |
254 | |
|
255 | 0 | #if 1 // INTRA_AS_IN_VTM |
256 | 0 | if( off != 0 && ( modeParent <= ( DC_IDX + 1 ) || modeParent >= ( NUM_LUMA_MODE - 1 ) ) ) |
257 | 0 | { |
258 | 0 | continue; |
259 | 0 | } |
260 | | |
261 | 0 | #endif |
262 | 0 | for( int mode = modeParent - off; mode < modeParent + off + 1; mode += inc ) |
263 | 0 | { |
264 | 0 | if( satdChecked[mode] || mode < 0 || mode >= NUM_LUMA_MODE ) |
265 | 0 | { |
266 | 0 | continue; |
267 | 0 | } |
268 | | |
269 | 0 | cu.intraDir[0] = mode; |
270 | |
|
271 | 0 | initPredIntraParams( cu, cu.Y(), sps ); |
272 | 0 | distParam.cur.buf = piPred.buf = m_SortedPelUnitBufs->getTestBuf().Y().buf; |
273 | 0 | predIntraAng( COMP_Y, piPred, cu ); |
274 | | |
275 | | // Use the min between SAD and HAD as the cost criterion |
276 | | // SAD is scaled by 2 to align with the scaling of HAD |
277 | 0 | Distortion minSadHad = distParam.distFunc( distParam ); |
278 | |
|
279 | 0 | uint64_t fracModeBits = xFracModeBitsIntraLuma( cu, mpmLst ); |
280 | | |
281 | | //restore ctx |
282 | 0 | m_CABACEstimator->getCtx() = SubCtx( CtxSet( Ctx::IntraLumaMpmFlag(), intra_ctx_size ), ctxStartIntraCtx ); |
283 | |
|
284 | 0 | double cost = ( double ) minSadHad + ( double ) fracModeBits * sqrtLambdaForFirstPass; |
285 | 0 | DTRACE( g_trace_ctx, D_INTRA_COST, "IntraHAD: %u, %llu, %f (%d)\n", minSadHad, fracModeBits, cost, mode ); |
286 | |
|
287 | 0 | int insertPos = -1; |
288 | 0 | updateCandList( ModeInfo( false, false, 0, NOT_INTRA_SUBPARTITIONS, mode ), cost, RdModeList, CandCostList, numModesForFullRD, &insertPos ); |
289 | 0 | updateCandList( ModeInfo( false, false, 0, NOT_INTRA_SUBPARTITIONS, mode ), ( double ) minSadHad, HadModeList, CandHadList, numHadCand ); |
290 | 0 | m_SortedPelUnitBufs->insert( insertPos, ( int ) RdModeList.size() ); |
291 | |
|
292 | 0 | satdChecked[mode] = true; |
293 | 0 | } |
294 | 0 | } |
295 | |
|
296 | 0 | m_parentCandList.resize( RdModeList.size() ); |
297 | 0 | std::copy( RdModeList.cbegin(), RdModeList.cend(), m_parentCandList.begin() ); |
298 | 0 | } |
299 | |
|
300 | 0 | const bool isFirstLineOfCtu = (((cu.block(COMP_Y).y)&((cu.cs->sps)->CTUSize - 1)) == 0); |
301 | 0 | if( m_pcEncCfg->m_MRL && ! isFirstLineOfCtu ) |
302 | 0 | { |
303 | 0 | cu.multiRefIdx = 1; |
304 | 0 | unsigned multiRefMPM [NUM_MOST_PROBABLE_MODES]; |
305 | 0 | CU::getIntraMPMs(cu, multiRefMPM); |
306 | |
|
307 | 0 | for (int mRefNum = 1; mRefNum < MRL_NUM_REF_LINES; mRefNum++) |
308 | 0 | { |
309 | 0 | int multiRefIdx = MULTI_REF_LINE_IDX[mRefNum]; |
310 | |
|
311 | 0 | cu.multiRefIdx = multiRefIdx; |
312 | 0 | initIntraPatternChType(cu, cu.Y(), true); |
313 | |
|
314 | 0 | for (int x = 1; x < NUM_MOST_PROBABLE_MODES; x++) |
315 | 0 | { |
316 | 0 | cu.intraDir[0] = multiRefMPM[x]; |
317 | 0 | initPredIntraParams(cu, cu.Y(), sps); |
318 | 0 | distParam.cur.buf = piPred.buf = m_SortedPelUnitBufs->getTestBuf().Y().buf; |
319 | 0 | predIntraAng(COMP_Y, piPred, cu); |
320 | | |
321 | | // Use the min between SAD and SATD as the cost criterion |
322 | | // SAD is scaled by 2 to align with the scaling of HAD |
323 | 0 | Distortion minSadHad = distParam.distFunc(distParam); |
324 | | |
325 | | // NB xFracModeBitsIntra will not affect the mode for chroma that may have already been pre-estimated. |
326 | 0 | uint64_t fracModeBits = xFracModeBitsIntraLuma( cu, mpmLst ); |
327 | | |
328 | | //restore ctx |
329 | 0 | m_CABACEstimator->getCtx() = SubCtx(CtxSet(Ctx::IntraLumaMpmFlag(), intra_ctx_size), ctxStartIntraCtx); |
330 | |
|
331 | 0 | double cost = (double) minSadHad + (double) fracModeBits * sqrtLambdaForFirstPass; |
332 | | // DTRACE(g_trace_ctx, D_INTRA_COST, "IntraMRL: %u, %llu, %f (%d)\n", minSadHad, fracModeBits, cost, cu.intraDir[0]); |
333 | |
|
334 | 0 | int insertPos = -1; |
335 | 0 | updateCandList( ModeInfo( false, false, multiRefIdx, NOT_INTRA_SUBPARTITIONS, cu.intraDir[0] ), cost, RdModeList, CandCostList, numModesForFullRD, &insertPos ); |
336 | 0 | updateCandList( ModeInfo( false, false, multiRefIdx, NOT_INTRA_SUBPARTITIONS, cu.intraDir[0] ), (double)minSadHad, HadModeList, CandHadList, numHadCand ); |
337 | 0 | m_SortedPelUnitBufs->insert(insertPos, (int)RdModeList.size()); |
338 | 0 | } |
339 | 0 | } |
340 | 0 | cu.multiRefIdx = 0; |
341 | 0 | } |
342 | |
|
343 | 0 | if (testMip) |
344 | 0 | { |
345 | 0 | cu.mipFlag = true; |
346 | 0 | cu.multiRefIdx = 0; |
347 | |
|
348 | 0 | double mipHadCost[MAX_NUM_MIP_MODE] = { MAX_DOUBLE }; |
349 | |
|
350 | 0 | initIntraPatternChType(cu, cu.Y()); |
351 | 0 | initIntraMip( cu ); |
352 | |
|
353 | 0 | const int transpOff = getNumModesMip( cu.Y() ); |
354 | 0 | const int numModesFull = (transpOff << 1); |
355 | 0 | for( uint32_t uiModeFull = 0; uiModeFull < numModesFull; uiModeFull++ ) |
356 | 0 | { |
357 | 0 | const bool isTransposed = (uiModeFull >= transpOff ? true : false); |
358 | 0 | const uint32_t uiMode = (isTransposed ? uiModeFull - transpOff : uiModeFull); |
359 | |
|
360 | 0 | cu.mipTransposedFlag = isTransposed; |
361 | 0 | cu.intraDir[CH_L] = uiMode; |
362 | 0 | distParam.cur.buf = piPred.buf = m_SortedPelUnitBufs->getTestBuf().Y().buf; |
363 | 0 | predIntraMip(piPred, cu); |
364 | | |
365 | | // Use the min between SAD and HAD as the cost criterion |
366 | | // SAD is scaled by 2 to align with the scaling of HAD |
367 | 0 | Distortion minSadHad = distParam.distFunc(distParam); |
368 | |
|
369 | 0 | uint64_t fracModeBits = xFracModeBitsIntraLuma( cu, mpmLst ); |
370 | | |
371 | | //restore ctx |
372 | 0 | m_CABACEstimator->getCtx() = SubCtx(CtxSet(Ctx::IntraLumaMpmFlag(), intra_ctx_size), ctxStartIntraCtx); |
373 | |
|
374 | 0 | double cost = double(minSadHad) + double(fracModeBits) * sqrtLambdaForFirstPass; |
375 | 0 | mipHadCost[uiModeFull] = cost; |
376 | 0 | DTRACE(g_trace_ctx, D_INTRA_COST, "IntraMIP: %u, %llu, %f (%d)\n", minSadHad, fracModeBits, cost, uiModeFull); |
377 | |
|
378 | 0 | int insertPos = -1; |
379 | 0 | updateCandList( ModeInfo( true, isTransposed, 0, NOT_INTRA_SUBPARTITIONS, cu.intraDir[0] ), cost, RdModeList, CandCostList, numModesForFullRD+1, &insertPos ); |
380 | 0 | updateCandList( ModeInfo( true, isTransposed, 0, NOT_INTRA_SUBPARTITIONS, cu.intraDir[0] ), 0.8*(double)minSadHad, HadModeList, CandHadList, numHadCand ); |
381 | 0 | m_SortedPelUnitBufs->insert(insertPos, (int)RdModeList.size()); |
382 | 0 | } |
383 | |
|
384 | 0 | const double thresholdHadCost = 1.0 + 1.4 / sqrt((double)(cu.lwidth()*cu.lheight())); |
385 | 0 | xReduceHadCandList(RdModeList, CandCostList, *m_SortedPelUnitBufs, numModesForFullRD, thresholdHadCost, mipHadCost, cu, fastMip); |
386 | 0 | } |
387 | |
|
388 | 0 | if( m_pcEncCfg->m_bFastUDIUseMPMEnabled ) |
389 | 0 | { |
390 | 0 | const int numMPMs = NUM_MOST_PROBABLE_MODES; |
391 | 0 | unsigned intraMpms[numMPMs]; |
392 | |
|
393 | 0 | cu.multiRefIdx = 0; |
394 | |
|
395 | 0 | const int numCand = CU::getIntraMPMs( cu, intraMpms ); |
396 | 0 | ModeInfo mostProbableMode(false, false, 0, NOT_INTRA_SUBPARTITIONS, 0); |
397 | |
|
398 | 0 | for( int j = 0; j < numCand; j++ ) |
399 | 0 | { |
400 | 0 | bool mostProbableModeIncluded = false; |
401 | 0 | mostProbableMode.modeId = intraMpms[j]; |
402 | |
|
403 | 0 | for( int i = 0; i < numModesForFullRD; i++ ) |
404 | 0 | { |
405 | 0 | mostProbableModeIncluded |= ( mostProbableMode == RdModeList[i] ); |
406 | 0 | } |
407 | 0 | if( !mostProbableModeIncluded ) |
408 | 0 | { |
409 | 0 | numModesForFullRD++; |
410 | 0 | RdModeList.push_back( mostProbableMode ); |
411 | 0 | CandCostList.push_back(0); |
412 | 0 | } |
413 | 0 | } |
414 | 0 | } |
415 | 0 | } |
416 | | |
417 | | bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, double bestCost) |
418 | 0 | { |
419 | 0 | CodingStructure &cs = *cu.cs; |
420 | 0 | const int width = partitioner.currArea().lwidth(); |
421 | 0 | const int height = partitioner.currArea().lheight(); |
422 | | |
423 | | //===== loop over partitions ===== |
424 | |
|
425 | 0 | const TempCtx ctxStart ( m_CtxCache, m_CABACEstimator->getCtx() ); |
426 | | |
427 | | // variables for saving fast intra modes scan results across multiple LFNST passes |
428 | 0 | double costInterCU = xFindInterCUCost( cu ); |
429 | |
|
430 | 0 | bool validReturn = false; |
431 | | |
432 | | //===== determine set of modes to be tested (using prediction signal only) ===== |
433 | 0 | int numModesAvailable = NUM_LUMA_MODE; // total number of Intra modes |
434 | 0 | static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM> RdModeList; |
435 | 0 | static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM> HadModeList; |
436 | 0 | static_vector<double, FAST_UDI_MAX_RDMODE_NUM> CandCostList; |
437 | 0 | static_vector<double, FAST_UDI_MAX_RDMODE_NUM> CandHadList; |
438 | |
|
439 | 0 | int numModesForFullRD = g_aucIntraModeNumFast_UseMPM_2D[Log2(width) - MIN_CU_LOG2][Log2(height) - MIN_CU_LOG2]; |
440 | 0 | if (m_pcEncCfg->m_numIntraModesFullRD > 0) |
441 | 0 | numModesForFullRD=m_pcEncCfg->m_numIntraModesFullRD; |
442 | |
|
443 | | #if INTRA_FULL_SEARCH |
444 | | numModesForFullRD = numModesAvailable; |
445 | | #endif |
446 | 0 | const SPS& sps = *cu.cs->sps; |
447 | 0 | const bool mipAllowed = sps.MIP && cu.lwidth() <= sps.getMaxTbSize() && cu.lheight() <= sps.getMaxTbSize() && ((cu.lfnstIdx == 0) || allowLfnstWithMip(cu.lumaSize())); |
448 | 0 | const int SizeThr = 8 >> std::max( 0, m_pcEncCfg->m_useFastMIP - 1 ); |
449 | 0 | const bool testMip = mipAllowed && ( cu.lwidth() <= ( SizeThr * cu.lheight() ) && cu.lheight() <= ( SizeThr * cu.lwidth() ) ) && ( cu.lwidth() <= MIP_MAX_WIDTH && cu.lheight() <= MIP_MAX_HEIGHT ); |
450 | 0 | bool testISP = sps.ISP && CU::canUseISP(width, height, cu.cs->sps->getMaxTbSize()); |
451 | 0 | if (testISP) |
452 | 0 | { |
453 | 0 | int numTotalPartsHor = (int)width >> floorLog2(CU::getISPSplitDim(width, height, TU_1D_VERT_SPLIT)); |
454 | 0 | int numTotalPartsVer = (int)height >> floorLog2(CU::getISPSplitDim(width, height, TU_1D_HORZ_SPLIT)); |
455 | 0 | m_ispTestedModes[0].init(numTotalPartsHor, numTotalPartsVer, 0); |
456 | | // the total number of subpartitions is modified to take into account the cases where LFNST cannot be combined with |
457 | | // ISP due to size restrictions |
458 | 0 | numTotalPartsHor = sps.LFNST && CU::canUseLfnstWithISP(cu.Y(), HOR_INTRA_SUBPARTITIONS) ? numTotalPartsHor : 0; |
459 | 0 | numTotalPartsVer = sps.LFNST && CU::canUseLfnstWithISP(cu.Y(), VER_INTRA_SUBPARTITIONS) ? numTotalPartsVer : 0; |
460 | 0 | for (int j = 1; j < NUM_LFNST_NUM_PER_SET; j++) |
461 | 0 | { |
462 | 0 | m_ispTestedModes[j].init(numTotalPartsHor, numTotalPartsVer, 0); |
463 | 0 | } |
464 | 0 | testISP = m_ispTestedModes[0].numTotalParts[0]; |
465 | 0 | } |
466 | 0 | else |
467 | 0 | { |
468 | 0 | m_ispTestedModes[0].init(0, 0, 0); |
469 | 0 | } |
470 | |
|
471 | 0 | xEstimateLumaRdModeList(numModesForFullRD, RdModeList, HadModeList, CandCostList, CandHadList, cu, testMip); |
472 | |
|
473 | 0 | CHECK( (size_t)numModesForFullRD != RdModeList.size(), "Inconsistent state!" ); |
474 | | |
475 | | // after this point, don't use numModesForFullRD |
476 | 0 | if( m_pcEncCfg->m_usePbIntraFast && !cs.slice->isIntra() && RdModeList.size() < numModesAvailable ) |
477 | 0 | { |
478 | 0 | double pbintraRatio = m_pcEncCfg->m_usePbIntraFast == 1 && ( cs.area.lwidth() >= 16 && cs.area.lheight() >= 16 ) ? 1.2 : PBINTRA_RATIO; |
479 | |
|
480 | 0 | int maxSize = -1; |
481 | 0 | ModeInfo bestMipMode; |
482 | 0 | int bestMipIdx = -1; |
483 | 0 | for( int idx = 0; idx < RdModeList.size(); idx++ ) |
484 | 0 | { |
485 | 0 | if( RdModeList[idx].mipFlg ) |
486 | 0 | { |
487 | 0 | bestMipMode = RdModeList[idx]; |
488 | 0 | bestMipIdx = idx; |
489 | 0 | break; |
490 | 0 | } |
491 | 0 | } |
492 | 0 | const int numHadCand = 3; |
493 | 0 | for (int k = numHadCand - 1; k >= 0; k--) |
494 | 0 | { |
495 | 0 | if (CandHadList.size() < (k + 1) || CandHadList[k] > cs.interHad * pbintraRatio) { maxSize = k; } |
496 | 0 | } |
497 | 0 | if (maxSize > 0) |
498 | 0 | { |
499 | 0 | RdModeList.resize(std::min<size_t>(RdModeList.size(), maxSize)); |
500 | 0 | if( bestMipIdx >= 0 ) |
501 | 0 | { |
502 | 0 | if( RdModeList.size() <= bestMipIdx ) |
503 | 0 | { |
504 | 0 | RdModeList.push_back(bestMipMode); |
505 | 0 | m_SortedPelUnitBufs->swap( maxSize, bestMipIdx ); |
506 | 0 | } |
507 | 0 | } |
508 | 0 | } |
509 | 0 | if (maxSize == 0) |
510 | 0 | { |
511 | 0 | cs.dist = MAX_DISTORTION; |
512 | 0 | cs.interHad = 0; |
513 | 0 | return false; |
514 | 0 | } |
515 | 0 | } |
516 | | |
517 | | //===== check modes (using r-d costs) ===== |
518 | 0 | ModeInfo bestPUMode; |
519 | |
|
520 | 0 | CodingStructure *csTemp = m_pTempCS; |
521 | 0 | CodingStructure *csBest = m_pBestCS; |
522 | |
|
523 | 0 | csTemp->slice = csBest->slice = cs.slice; |
524 | 0 | csTemp->picture = csBest->picture = cs.picture; |
525 | 0 | csTemp->compactResize( cu ); |
526 | 0 | csBest->compactResize( cu ); |
527 | 0 | csTemp->initStructData(); |
528 | 0 | csBest->initStructData(); |
529 | |
|
530 | 0 | int bestLfnstIdx = 0; |
531 | 0 | const bool useBDPCM = cs.picture->useBDPCM; |
532 | 0 | int NumBDPCMCand = (useBDPCM && sps.BDPCM && CU::bdpcmAllowed(cu, ComponentID(partitioner.chType))) ? 2 : 0; |
533 | 0 | int bestbdpcmMode = 0; |
534 | 0 | int bestISP = 0; |
535 | 0 | int bestMrl = 0; |
536 | 0 | bool bestMip = 0; |
537 | 0 | int EndMode = (int)RdModeList.size(); |
538 | 0 | bool useISPlfnst = testISP && sps.LFNST; |
539 | 0 | bool noLFNST_ts = false; |
540 | 0 | double bestCostIsp[2] = { MAX_DOUBLE, MAX_DOUBLE }; |
541 | 0 | bool disableMTS = false; |
542 | 0 | bool disableLFNST = false; |
543 | 0 | bool disableDCT2test = false; |
544 | 0 | if (m_pcEncCfg->m_FastIntraTools) |
545 | 0 | { |
546 | 0 | int speedIntra = 0; |
547 | 0 | xSpeedUpIntra(bestCost, EndMode, speedIntra, cu); |
548 | 0 | disableMTS = (speedIntra >> 2 ) & 0x1; |
549 | 0 | disableLFNST = (speedIntra >> 1) & 0x1; |
550 | 0 | disableDCT2test = speedIntra>>3; |
551 | 0 | if (disableLFNST) |
552 | 0 | { |
553 | 0 | noLFNST_ts = true; |
554 | 0 | useISPlfnst = false; |
555 | 0 | } |
556 | 0 | if (speedIntra & 0x1) |
557 | 0 | { |
558 | 0 | testISP = false; |
559 | 0 | } |
560 | 0 | } |
561 | |
|
562 | 0 | for (int mode_cur = 0; mode_cur < EndMode + NumBDPCMCand; mode_cur++) |
563 | 0 | { |
564 | 0 | int mode = mode_cur; |
565 | 0 | if (mode_cur >= EndMode) |
566 | 0 | { |
567 | 0 | mode = mode_cur - EndMode ? -1 : -2; |
568 | 0 | testISP = false; |
569 | 0 | } |
570 | | // set CU/PU to luma prediction mode |
571 | 0 | ModeInfo testMode; |
572 | 0 | int noISP = 0; |
573 | 0 | int endISP = testISP ? 2 : 0; |
574 | 0 | bool noLFNST = false || noLFNST_ts; |
575 | 0 | if (mode && useISPlfnst) |
576 | 0 | { |
577 | 0 | noLFNST |= (bestCostIsp[0] > (bestCostIsp[1] * 1.4)); |
578 | 0 | if (mode > 2) |
579 | 0 | { |
580 | 0 | endISP = 0; |
581 | 0 | testISP = false; |
582 | 0 | } |
583 | 0 | } |
584 | 0 | if (testISP) |
585 | 0 | { |
586 | 0 | xSpeedUpISP(1, testISP, mode, noISP, endISP, cu, RdModeList, bestPUMode, bestISP, bestLfnstIdx); |
587 | 0 | } |
588 | 0 | int startISP = 0; |
589 | 0 | if (disableDCT2test && mode && bestISP) |
590 | 0 | { |
591 | 0 | startISP = endISP ? 1 : 0; |
592 | 0 | } |
593 | 0 | for (int ispM = startISP; ispM <= endISP; ispM++) |
594 | 0 | { |
595 | 0 | if (ispM && (ispM == noISP)) |
596 | 0 | { |
597 | 0 | continue; |
598 | 0 | } |
599 | | |
600 | 0 | if (mode < 0) |
601 | 0 | { |
602 | 0 | cu.bdpcmM[CH_L] = -mode; |
603 | 0 | testMode = ModeInfo(false, false, 0, NOT_INTRA_SUBPARTITIONS, cu.bdpcmM[CH_L] == 2 ? VER_IDX : HOR_IDX); |
604 | 0 | } |
605 | 0 | else |
606 | 0 | { |
607 | 0 | testMode = RdModeList[mode]; |
608 | 0 | cu.bdpcmM[CH_L] = 0; |
609 | 0 | } |
610 | |
|
611 | 0 | cu.ispMode = ispM; |
612 | 0 | cu.mipFlag = testMode.mipFlg; |
613 | 0 | cu.mipTransposedFlag = testMode.mipTrFlg; |
614 | 0 | cu.multiRefIdx = testMode.mRefId; |
615 | 0 | cu.intraDir[CH_L] = testMode.modeId; |
616 | 0 | if (cu.ispMode && xSpeedUpISP(0, testISP, mode, noISP, endISP, cu, RdModeList, bestPUMode, bestISP, 0) ) |
617 | 0 | { |
618 | 0 | continue; |
619 | 0 | } |
620 | 0 | if (m_pcEncCfg->m_FastIntraTools && (cu.ispMode || sps.LFNST || sps.MTS)) |
621 | 0 | { |
622 | 0 | m_ispTestedModes[0].intraWasTested = true; |
623 | 0 | } |
624 | 0 | CHECK(cu.mipFlag && cu.multiRefIdx, "Error: combination of MIP and MRL not supported"); |
625 | 0 | CHECK(cu.multiRefIdx && (cu.intraDir[0] == PLANAR_IDX), "Error: combination of MRL and Planar mode not supported"); |
626 | 0 | CHECK(cu.ispMode && cu.mipFlag, "Error: combination of ISP and MIP not supported"); |
627 | 0 | CHECK(cu.ispMode && cu.multiRefIdx, "Error: combination of ISP and MRL not supported"); |
628 | | |
629 | | // determine residual for partition |
630 | 0 | cs.initSubStructure(*csTemp, partitioner.chType, cs.area, true); |
631 | 0 | int doISP = (((cu.ispMode == 0) && noLFNST) || (useISPlfnst && mode && cu.ispMode && (bestLfnstIdx == 0)) || disableLFNST) ? -mode : mode; |
632 | 0 | xIntraCodingLumaQT(*csTemp, partitioner, m_SortedPelUnitBufs->getBufFromSortedList(mode), bestCost, doISP, disableMTS); |
633 | |
|
634 | 0 | DTRACE(g_trace_ctx, D_INTRA_COST, "IntraCost T [x=%d,y=%d,w=%d,h=%d] %f (%d,%d,%d,%d,%d,%d) \n", cu.blocks[0].x, |
635 | 0 | cu.blocks[0].y, width, height, csTemp->cost, testMode.modeId, testMode.ispMod, |
636 | 0 | cu.multiRefIdx, cu.mipFlag, cu.lfnstIdx, cu.mtsFlag); |
637 | |
|
638 | 0 | if (cu.ispMode && !csTemp->cus[0]->firstTU->cbf[COMP_Y]) |
639 | 0 | { |
640 | 0 | csTemp->cost = MAX_DOUBLE; |
641 | 0 | csTemp->costDbOffset = 0; |
642 | 0 | } |
643 | 0 | if (useISPlfnst) |
644 | 0 | { |
645 | 0 | int n = (cu.ispMode == 0) ? 0 : 1; |
646 | 0 | bestCostIsp[n] = csTemp->cost < bestCostIsp[n] ? csTemp->cost : bestCostIsp[n]; |
647 | 0 | } |
648 | | |
649 | | // check r-d cost |
650 | 0 | if (csTemp->cost < csBest->cost) |
651 | 0 | { |
652 | 0 | validReturn = true; |
653 | 0 | std::swap(csTemp, csBest); |
654 | 0 | bestPUMode = testMode; |
655 | 0 | bestLfnstIdx = csBest->cus[0]->lfnstIdx; |
656 | 0 | bestISP = csBest->cus[0]->ispMode; |
657 | 0 | bestMip = csBest->cus[0]->mipFlag; |
658 | 0 | bestMrl = csBest->cus[0]->multiRefIdx; |
659 | 0 | bestbdpcmMode = cu.bdpcmM[CH_L]; |
660 | 0 | m_ispTestedModes[bestLfnstIdx].bestSplitSoFar = ISPType(bestISP); |
661 | 0 | if (csBest->cost < bestCost) |
662 | 0 | { |
663 | 0 | bestCost = csBest->cost; |
664 | 0 | } |
665 | 0 | if ((csBest->getTU(partitioner.chType)->mtsIdx[COMP_Y] == MTS_SKIP) && ( floorLog2(csBest->getTU(partitioner.chType)->blocks[COMP_Y].area()) >= 6 )) |
666 | 0 | { |
667 | 0 | noLFNST_ts = 1; |
668 | 0 | } |
669 | 0 | } |
670 | | |
671 | | // reset context models |
672 | 0 | m_CABACEstimator->getCtx() = ctxStart; |
673 | |
|
674 | 0 | csTemp->releaseIntermediateData(); |
675 | |
|
676 | 0 | if (m_pcEncCfg->m_fastLocalDualTreeMode && CU::isConsIntra(cu) && !cu.slice->isIntra() && csBest->cost != MAX_DOUBLE && costInterCU != COST_UNKNOWN && mode >= 0) |
677 | 0 | { |
678 | 0 | if( (m_pcEncCfg->m_fastLocalDualTreeMode == 2) || (csBest->cost > costInterCU * 1.5)) |
679 | 0 | { |
680 | | //Note: only try one intra mode, which is especially useful to reduce EncT for LDB case (around 4%) |
681 | 0 | EndMode = 0; |
682 | 0 | break; |
683 | 0 | } |
684 | 0 | } |
685 | 0 | } |
686 | 0 | } // Mode loop |
687 | | |
688 | 0 | if (m_pcEncCfg->m_FastIntraTools && (sps.ISP|| sps.LFNST || sps.MTS)) |
689 | 0 | { |
690 | 0 | int bestMode = csBest->getTU(partitioner.chType)->mtsIdx[COMP_Y] ? 4 : 0; |
691 | 0 | bestMode |= bestLfnstIdx ? 2 : 0; |
692 | 0 | bestMode |= bestISP ? 1 : 0; |
693 | 0 | m_ispTestedModes[0].bestIntraMode = bestMode; |
694 | 0 | } |
695 | 0 | cu.ispMode = bestISP; |
696 | 0 | if( validReturn ) |
697 | 0 | { |
698 | 0 | cs.useSubStructure( *csBest, partitioner.chType, TREE_D, cu.singleChan( CH_L ), true ); |
699 | 0 | const ReshapeData& reshapeData = cs.picture->reshapeData; |
700 | 0 | if (cs.picHeader->lmcsEnabled && reshapeData.getCTUFlag()) |
701 | 0 | { |
702 | 0 | cs.getRspRecoBuf().copyFrom(csBest->getRspRecoBuf()); |
703 | 0 | } |
704 | | |
705 | | //=== update PU data ==== |
706 | 0 | cu.lfnstIdx = bestLfnstIdx; |
707 | 0 | cu.mipTransposedFlag = bestPUMode.mipTrFlg; |
708 | 0 | cu.intraDir[CH_L] = bestPUMode.modeId; |
709 | 0 | cu.bdpcmM[CH_L] = bestbdpcmMode; |
710 | 0 | cu.mipFlag = bestMip; |
711 | 0 | cu.multiRefIdx = bestMrl; |
712 | 0 | } |
713 | 0 | else |
714 | 0 | { |
715 | 0 | THROW("fix this"); |
716 | 0 | } |
717 | | |
718 | 0 | csBest->releaseIntermediateData(); |
719 | |
|
720 | 0 | return validReturn; |
721 | 0 | } |
722 | | |
723 | | void IntraSearch::estIntraPredChromaQT( CodingUnit& cu, Partitioner& partitioner, const double maxCostAllowed ) |
724 | 0 | { |
725 | 0 | PROFILER_SCOPE_AND_STAGE_EXT( 0, _TPROF, P_INTRA_CHROMA, cu.cs, CH_C ); |
726 | 0 | const TempCtx ctxStart( m_CtxCache, m_CABACEstimator->getCtx() ); |
727 | 0 | CodingStructure &cs = *cu.cs; |
728 | 0 | bool lumaUsesISP = !CU::isSepTree(cu) && cu.ispMode; |
729 | 0 | PartSplit ispType = lumaUsesISP ? CU::getISPType(cu, COMP_Y) : TU_NO_ISP; |
730 | 0 | double bestCostSoFar = maxCostAllowed; |
731 | 0 | const uint32_t numberValidComponents = getNumberValidComponents( cu.chromaFormat ); |
732 | 0 | const bool useBDPCM = cs.picture->useBDPCM; |
733 | |
|
734 | 0 | uint32_t uiBestMode = 0; |
735 | 0 | Distortion uiBestDist = 0; |
736 | 0 | double dBestCost = MAX_DOUBLE; |
737 | | |
738 | | //----- init mode list ---- |
739 | 0 | { |
740 | 0 | uint32_t uiMinMode = 0; |
741 | 0 | uint32_t uiMaxMode = NUM_CHROMA_MODE; |
742 | |
|
743 | 0 | const int reducedModeNumber = uiMaxMode >> (m_pcEncCfg->m_reduceIntraChromaModesFullRD ? 1 : 2); |
744 | | //----- check chroma modes ----- |
745 | 0 | uint32_t chromaCandModes[ NUM_CHROMA_MODE ]; |
746 | 0 | CU::getIntraChromaCandModes( cu, chromaCandModes ); |
747 | | |
748 | | // create a temporary CS |
749 | 0 | CodingStructure &saveCS = *m_pSaveCS[0]; |
750 | 0 | saveCS.pcv = cs.pcv; |
751 | 0 | saveCS.picture = cs.picture; |
752 | 0 | saveCS.area.repositionTo( cs.area ); |
753 | 0 | saveCS.clearTUs(); |
754 | |
|
755 | 0 | if( !CU::isSepTree(cu) && cu.ispMode ) |
756 | 0 | { |
757 | 0 | saveCS.clearCUs(); |
758 | 0 | } |
759 | |
|
760 | 0 | if( CU::isSepTree(cu) ) |
761 | 0 | { |
762 | 0 | if( partitioner.canSplit( TU_MAX_TR_SPLIT, cs ) ) |
763 | 0 | { |
764 | 0 | partitioner.splitCurrArea( TU_MAX_TR_SPLIT, cs ); |
765 | |
|
766 | 0 | do |
767 | 0 | { |
768 | 0 | cs.addTU( CS::getArea( cs, partitioner.currArea(), partitioner.chType, partitioner.treeType ), partitioner.chType, &cu ).depth = partitioner.currTrDepth; |
769 | 0 | } while( partitioner.nextPart( cs ) ); |
770 | |
|
771 | 0 | partitioner.exitCurrSplit(); |
772 | 0 | } |
773 | 0 | else |
774 | 0 | cs.addTU( CS::getArea( cs, partitioner.currArea(), partitioner.chType, partitioner.treeType ), partitioner.chType, &cu ); |
775 | 0 | } |
776 | | |
777 | | // create a store for the TUs |
778 | 0 | std::vector<TransformUnit*> orgTUs; |
779 | 0 | for( const auto &ptu : cs.tus ) |
780 | 0 | { |
781 | | // for split TUs in HEVC, add the TUs without Chroma parts for correct setting of Cbfs |
782 | 0 | if (lumaUsesISP || cu.contains(*ptu, CH_C)) |
783 | 0 | { |
784 | 0 | saveCS.addTU( *ptu, partitioner.chType, nullptr ); |
785 | 0 | orgTUs.push_back( ptu ); |
786 | 0 | } |
787 | 0 | } |
788 | | |
789 | | // SATD pre-selecting. |
790 | 0 | int satdModeList [NUM_CHROMA_MODE] = { 0 }; |
791 | 0 | int64_t satdSortedCost[NUM_CHROMA_MODE] = { 0 }; |
792 | 0 | bool modeDisable[NUM_INTRA_MODE + 1] = { false }; // use intra mode idx to check whether enable |
793 | |
|
794 | 0 | CodingStructure& cs = *(cu.cs); |
795 | 0 | CompArea areaCb = cu.Cb(); |
796 | 0 | CompArea areaCr = cu.Cr(); |
797 | 0 | CPelBuf orgCb = cs.getOrgBuf (COMP_Cb); |
798 | 0 | PelBuf predCb = cs.getPredBuf(COMP_Cb); |
799 | 0 | CPelBuf orgCr = cs.getOrgBuf (COMP_Cr); |
800 | 0 | PelBuf predCr = cs.getPredBuf(COMP_Cr); |
801 | |
|
802 | 0 | DistParam distParamSadCb = m_pcRdCost->setDistParam( orgCb, predCb, cu.cs->sps->bitDepths[ CH_C ], DF_SAD); |
803 | 0 | DistParam distParamSatdCb = m_pcRdCost->setDistParam( orgCb, predCb, cu.cs->sps->bitDepths[ CH_C ], DF_HAD); |
804 | 0 | DistParam distParamSadCr = m_pcRdCost->setDistParam( orgCr, predCr, cu.cs->sps->bitDepths[ CH_C ], DF_SAD); |
805 | 0 | DistParam distParamSatdCr = m_pcRdCost->setDistParam( orgCr, predCr, cu.cs->sps->bitDepths[ CH_C ], DF_HAD); |
806 | |
|
807 | 0 | cu.intraDir[1] = MDLM_L_IDX; // temporary assigned, just to indicate this is a MDLM mode. for luma down-sampling operation. |
808 | |
|
809 | 0 | initIntraPatternChType(cu, cu.Cb()); |
810 | 0 | initIntraPatternChType(cu, cu.Cr()); |
811 | 0 | loadLMLumaRecPels(cu, cu.Cb()); |
812 | |
|
813 | 0 | for (int idx = uiMinMode; idx < uiMaxMode; idx++) |
814 | 0 | { |
815 | 0 | int mode = chromaCandModes[idx]; |
816 | 0 | satdModeList[idx] = mode; |
817 | 0 | if (CU::isLMCMode(mode) && ( !CU::isLMCModeEnabled(cu, mode) || cu.slice->lmChromaCheckDisable ) ) |
818 | 0 | { |
819 | 0 | continue; |
820 | 0 | } |
821 | 0 | if ((mode == LM_CHROMA_IDX) || (mode == PLANAR_IDX) || (mode == DM_CHROMA_IDX)) // only pre-check regular modes and MDLM modes, not including DM ,Planar, and LM |
822 | 0 | { |
823 | 0 | continue; |
824 | 0 | } |
825 | | |
826 | 0 | cu.intraDir[1] = mode; // temporary assigned, for SATD checking. |
827 | |
|
828 | 0 | const bool isLMCMode = CU::isLMCMode(mode); |
829 | 0 | if( isLMCMode ) |
830 | 0 | { |
831 | 0 | predIntraChromaLM(COMP_Cb, predCb, cu, areaCb, mode); |
832 | 0 | } |
833 | 0 | else |
834 | 0 | { |
835 | 0 | initPredIntraParams(cu, cu.Cb(), *cs.sps); |
836 | 0 | predIntraAng(COMP_Cb, predCb, cu); |
837 | 0 | } |
838 | 0 | int64_t sadCb = distParamSadCb.distFunc(distParamSadCb) * 2; |
839 | 0 | int64_t satdCb = distParamSatdCb.distFunc(distParamSatdCb); |
840 | 0 | int64_t sad = std::min(sadCb, satdCb); |
841 | |
|
842 | 0 | if( isLMCMode ) |
843 | 0 | { |
844 | 0 | predIntraChromaLM(COMP_Cr, predCr, cu, areaCr, mode); |
845 | 0 | } |
846 | 0 | else |
847 | 0 | { |
848 | 0 | initPredIntraParams(cu, cu.Cr(), *cs.sps); |
849 | 0 | predIntraAng(COMP_Cr, predCr, cu); |
850 | 0 | } |
851 | 0 | int64_t sadCr = distParamSadCr.distFunc(distParamSadCr) * 2; |
852 | 0 | int64_t satdCr = distParamSatdCr.distFunc(distParamSatdCr); |
853 | 0 | sad += std::min(sadCr, satdCr); |
854 | 0 | satdSortedCost[idx] = sad; |
855 | 0 | } |
856 | | |
857 | | // sort the mode based on the cost from small to large. |
858 | 0 | for (int i = uiMinMode; i <= uiMaxMode - 1; i++) |
859 | 0 | { |
860 | 0 | for (int j = i + 1; j <= uiMaxMode - 1; j++) |
861 | 0 | { |
862 | 0 | if (satdSortedCost[j] < satdSortedCost[i]) |
863 | 0 | { |
864 | 0 | std::swap( satdModeList[i], satdModeList[j]); |
865 | 0 | std::swap( satdSortedCost[i], satdSortedCost[j]); |
866 | 0 | } |
867 | 0 | } |
868 | 0 | } |
869 | |
|
870 | 0 | for (int i = 0; i < reducedModeNumber; i++) |
871 | 0 | { |
872 | 0 | modeDisable[satdModeList[uiMaxMode - 1 - i]] = true; // disable the last reducedModeNumber modes |
873 | 0 | } |
874 | |
|
875 | 0 | int bestLfnstIdx = 0; |
876 | | // save the dist |
877 | 0 | Distortion baseDist = cs.dist; |
878 | 0 | int32_t bestbdpcmMode = 0; |
879 | 0 | uint32_t numbdpcmModes = ( useBDPCM && CU::bdpcmAllowed(cu, COMP_Cb) |
880 | 0 | && ((partitioner.chType == CH_C) || (cu.ispMode == 0 && cu.lfnstIdx == 0 && cu.firstTU->mtsIdx[COMP_Y] == MTS_SKIP))) ? 2 : 0; |
881 | 0 | for (int mode_cur = uiMinMode; mode_cur < (int)(uiMaxMode + numbdpcmModes); mode_cur++) |
882 | 0 | { |
883 | 0 | int mode = mode_cur; |
884 | 0 | if (mode_cur >= uiMaxMode) |
885 | 0 | { |
886 | 0 | mode = mode_cur > uiMaxMode ? -1 : -2; //set bdpcm mode |
887 | 0 | if ((mode == -1) && (saveCS.tus[0]->mtsIdx[COMP_Cb] != MTS_SKIP) && (saveCS.tus[0]->mtsIdx[COMP_Cr] != MTS_SKIP)) |
888 | 0 | { |
889 | 0 | continue; |
890 | 0 | } |
891 | 0 | } |
892 | 0 | int chromaIntraMode; |
893 | 0 | if (mode < 0) |
894 | 0 | { |
895 | 0 | cu.bdpcmM[CH_C] = -mode; |
896 | 0 | chromaIntraMode = cu.bdpcmM[CH_C] == 2 ? chromaCandModes[1] : chromaCandModes[2]; |
897 | 0 | } |
898 | 0 | else |
899 | 0 | { |
900 | 0 | cu.bdpcmM[CH_C] = 0; |
901 | 0 | chromaIntraMode = chromaCandModes[mode]; |
902 | 0 | if (CU::isLMCMode(chromaIntraMode) && ( !CU::isLMCModeEnabled(cu, chromaIntraMode) || cu.slice->lmChromaCheckDisable ) ) |
903 | 0 | { |
904 | 0 | continue; |
905 | 0 | } |
906 | 0 | if (modeDisable[chromaIntraMode] && CU::isLMCModeEnabled(cu, chromaIntraMode)) // when CCLM is disable, then MDLM is disable. not use satd checking |
907 | 0 | { |
908 | 0 | continue; |
909 | 0 | } |
910 | 0 | } |
911 | 0 | cs.dist = baseDist; |
912 | | //----- restore context models ----- |
913 | 0 | m_CABACEstimator->getCtx() = ctxStart; |
914 | | |
915 | | //----- chroma coding ----- |
916 | 0 | cu.intraDir[1] = chromaIntraMode; |
917 | 0 | m_ispTestedModes[0].IspType = ispType; |
918 | 0 | m_ispTestedModes[0].subTuCounter = -1; |
919 | 0 | xIntraChromaCodingQT( cs, partitioner ); |
920 | 0 | if (lumaUsesISP && cs.dist == MAX_UINT) |
921 | 0 | { |
922 | 0 | continue; |
923 | 0 | } |
924 | | |
925 | 0 | if (cs.sps->transformSkip) |
926 | 0 | { |
927 | 0 | m_CABACEstimator->getCtx() = ctxStart; |
928 | 0 | } |
929 | 0 | m_ispTestedModes[0].IspType = ispType; |
930 | 0 | m_ispTestedModes[0].subTuCounter = -1; |
931 | 0 | uint64_t fracBits = xGetIntraFracBitsQT( cs, partitioner, false ); |
932 | 0 | Distortion uiDist = cs.dist; |
933 | 0 | double dCost = m_pcRdCost->calcRdCost( fracBits, uiDist - baseDist ); |
934 | | |
935 | | //----- compare ----- |
936 | 0 | if( dCost < dBestCost ) |
937 | 0 | { |
938 | 0 | if (lumaUsesISP && (dCost < bestCostSoFar)) |
939 | 0 | { |
940 | 0 | bestCostSoFar = dCost; |
941 | 0 | } |
942 | 0 | for( uint32_t i = getFirstComponentOfChannel( CH_C ); i < numberValidComponents; i++ ) |
943 | 0 | { |
944 | 0 | const CompArea& area = cu.blocks[i]; |
945 | 0 | saveCS.getRecoBuf ( area ).copyFrom( cs.getRecoBuf ( area ) ); |
946 | 0 | cs.picture->getRecoBuf( area ).copyFrom( cs.getRecoBuf ( area ) ); |
947 | 0 | for( uint32_t j = 0; j < saveCS.tus.size(); j++ ) |
948 | 0 | { |
949 | 0 | saveCS.tus[j]->copyComponentFrom( *orgTUs[j], area.compID ); |
950 | 0 | } |
951 | 0 | } |
952 | 0 | dBestCost = dCost; |
953 | 0 | uiBestDist = uiDist; |
954 | 0 | uiBestMode = chromaIntraMode; |
955 | 0 | bestLfnstIdx = cu.lfnstIdx; |
956 | 0 | bestbdpcmMode = cu.bdpcmM[CH_C]; |
957 | |
|
958 | 0 | } |
959 | 0 | } |
960 | 0 | cu.lfnstIdx = bestLfnstIdx; |
961 | 0 | cu.bdpcmM[CH_C]= bestbdpcmMode; |
962 | |
|
963 | 0 | for( uint32_t i = getFirstComponentOfChannel( CH_C ); i < numberValidComponents; i++ ) |
964 | 0 | { |
965 | 0 | const CompArea& area = cu.blocks[i]; |
966 | |
|
967 | 0 | cs.getRecoBuf ( area ).copyFrom( saveCS.getRecoBuf( area ) ); |
968 | 0 | cs.picture->getRecoBuf( area ).copyFrom( cs.getRecoBuf ( area ) ); |
969 | |
|
970 | 0 | for( uint32_t j = 0; j < saveCS.tus.size(); j++ ) |
971 | 0 | { |
972 | 0 | orgTUs[ j ]->copyComponentFrom( *saveCS.tus[ j ], area.compID ); |
973 | 0 | } |
974 | 0 | } |
975 | 0 | } |
976 | 0 | cu.intraDir[1] = uiBestMode; |
977 | 0 | cs.dist = uiBestDist; |
978 | | |
979 | | //----- restore context models ----- |
980 | 0 | m_CABACEstimator->getCtx() = ctxStart; |
981 | 0 | if (lumaUsesISP && bestCostSoFar >= maxCostAllowed) |
982 | 0 | { |
983 | 0 | cu.ispMode = 0; |
984 | 0 | } |
985 | 0 | } |
986 | | |
987 | | void IntraSearch::saveCuAreaCostInSCIPU( Area area, double cost ) |
988 | 0 | { |
989 | 0 | if( m_numCuInSCIPU < NUM_INTER_CU_INFO_SAVE ) |
990 | 0 | { |
991 | 0 | m_cuAreaInSCIPU[m_numCuInSCIPU] = area; |
992 | 0 | m_cuCostInSCIPU[m_numCuInSCIPU] = cost; |
993 | 0 | m_numCuInSCIPU++; |
994 | 0 | } |
995 | 0 | } |
996 | | |
997 | | void IntraSearch::initCuAreaCostInSCIPU() |
998 | 0 | { |
999 | 0 | for( int i = 0; i < NUM_INTER_CU_INFO_SAVE; i++ ) |
1000 | 0 | { |
1001 | 0 | m_cuAreaInSCIPU[i] = Area(); |
1002 | 0 | m_cuCostInSCIPU[i] = 0; |
1003 | 0 | } |
1004 | 0 | m_numCuInSCIPU = 0; |
1005 | 0 | } |
1006 | | // ------------------------------------------------------------------------------------------------------------------- |
1007 | | // Intra search |
1008 | | // ------------------------------------------------------------------------------------------------------------------- |
1009 | | |
1010 | | void IntraSearch::xEncIntraHeader( CodingStructure &cs, Partitioner &partitioner, const bool luma ) |
1011 | 0 | { |
1012 | 0 | CodingUnit &cu = *cs.getCU( partitioner.chType, partitioner.treeType ); |
1013 | |
|
1014 | 0 | if (luma) |
1015 | 0 | { |
1016 | 0 | bool isFirst = cu.ispMode ? m_ispTestedModes[0].subTuCounter == 0 : partitioner.currArea().lumaPos() == cs.area.lumaPos(); |
1017 | | |
1018 | | // CU header |
1019 | 0 | if( isFirst ) |
1020 | 0 | { |
1021 | 0 | if ((!cs.slice->isIntra() || cs.slice->sps->IBC || cs.slice->sps->PLT) && cu.Y().valid()) |
1022 | 0 | { |
1023 | 0 | m_CABACEstimator->pred_mode ( cu ); |
1024 | 0 | } |
1025 | 0 | m_CABACEstimator->bdpcm_mode ( cu, ComponentID(partitioner.chType) ); |
1026 | 0 | } |
1027 | | |
1028 | | // luma prediction mode |
1029 | 0 | if (isFirst) |
1030 | 0 | { |
1031 | 0 | if ( !cu.Y().valid()) |
1032 | 0 | { |
1033 | 0 | m_CABACEstimator->pred_mode( cu ); |
1034 | 0 | } |
1035 | 0 | m_CABACEstimator->intra_luma_pred_mode( cu ); |
1036 | 0 | } |
1037 | 0 | } |
1038 | 0 | else // if (chroma) |
1039 | 0 | { |
1040 | 0 | bool isFirst = partitioner.currArea().Cb().valid() && partitioner.currArea().chromaPos() == cs.area.chromaPos(); |
1041 | |
|
1042 | 0 | if( isFirst ) |
1043 | 0 | { |
1044 | 0 | m_CABACEstimator->bdpcm_mode(cu, ComponentID(CH_C)); |
1045 | 0 | m_CABACEstimator->intra_chroma_pred_mode( cu ); |
1046 | 0 | } |
1047 | 0 | } |
1048 | 0 | } |
1049 | | |
1050 | | void IntraSearch::xEncSubdivCbfQT( CodingStructure &cs, Partitioner &partitioner, const bool luma ) |
1051 | 0 | { |
1052 | 0 | const UnitArea& currArea = partitioner.currArea(); |
1053 | 0 | int subTuCounter = m_ispTestedModes[0].subTuCounter; |
1054 | 0 | TransformUnit &currTU = *cs.getTU(currArea.blocks[partitioner.chType], partitioner.chType, subTuCounter); |
1055 | 0 | CodingUnit &currCU = *currTU.cu; |
1056 | 0 | const uint32_t currDepth = partitioner.currTrDepth; |
1057 | 0 | const bool subdiv = currTU.depth > currDepth; |
1058 | 0 | ComponentID compID = partitioner.chType == CH_L ? COMP_Y : COMP_Cb; |
1059 | |
|
1060 | 0 | if (!luma) |
1061 | 0 | { |
1062 | 0 | const bool chromaCbfISP = currArea.blocks[COMP_Cb].valid() && currCU.ispMode && !subdiv; |
1063 | 0 | if (!currCU.ispMode || chromaCbfISP) |
1064 | 0 | { |
1065 | 0 | const uint32_t numberValidComponents = getNumberValidComponents(currArea.chromaFormat); |
1066 | 0 | const uint32_t cbfDepth = (chromaCbfISP ? currDepth - 1 : currDepth); |
1067 | |
|
1068 | 0 | for (uint32_t ch = COMP_Cb; ch < numberValidComponents; ch++) |
1069 | 0 | { |
1070 | 0 | const ComponentID compID = ComponentID(ch); |
1071 | 0 | if (currDepth == 0 || TU::getCbfAtDepth(currTU, compID, currDepth - 1) || chromaCbfISP) |
1072 | 0 | { |
1073 | 0 | const bool prevCbf = (compID == COMP_Cr ? TU::getCbfAtDepth(currTU, COMP_Cb, currDepth) : false); |
1074 | 0 | m_CABACEstimator->cbf_comp(currCU, TU::getCbfAtDepth(currTU, compID, currDepth), currArea.blocks[compID], cbfDepth, prevCbf); |
1075 | 0 | } |
1076 | 0 | } |
1077 | 0 | } |
1078 | 0 | } |
1079 | |
|
1080 | 0 | if (subdiv) |
1081 | 0 | { |
1082 | 0 | if (partitioner.canSplit(TU_MAX_TR_SPLIT, cs)) |
1083 | 0 | { |
1084 | 0 | partitioner.splitCurrArea(TU_MAX_TR_SPLIT, cs); |
1085 | 0 | } |
1086 | 0 | else if (currCU.ispMode && isLuma(compID)) |
1087 | 0 | { |
1088 | 0 | partitioner.splitCurrArea(m_ispTestedModes[0].IspType, cs); |
1089 | 0 | } |
1090 | 0 | else |
1091 | 0 | THROW("Cannot perform an implicit split!"); |
1092 | | |
1093 | 0 | do |
1094 | 0 | { |
1095 | 0 | xEncSubdivCbfQT(cs, partitioner, luma); //? |
1096 | 0 | subTuCounter += subTuCounter != -1 ? 1 : 0; |
1097 | 0 | } while (partitioner.nextPart(cs)); |
1098 | |
|
1099 | 0 | partitioner.exitCurrSplit(); |
1100 | 0 | } |
1101 | 0 | else |
1102 | 0 | { |
1103 | | //===== Cbfs ===== |
1104 | 0 | if (luma) |
1105 | 0 | { |
1106 | 0 | bool previousCbf = false; |
1107 | 0 | bool lastCbfIsInferred = false; |
1108 | 0 | if (m_ispTestedModes[0].IspType != TU_NO_ISP) |
1109 | 0 | { |
1110 | 0 | bool rootCbfSoFar = false; |
1111 | 0 | uint32_t nTus = currCU.ispMode == HOR_INTRA_SUBPARTITIONS ? currCU.lheight() >> floorLog2(currTU.lheight()) |
1112 | 0 | : currCU.lwidth() >> floorLog2(currTU.lwidth()); |
1113 | 0 | if (subTuCounter == nTus - 1) |
1114 | 0 | { |
1115 | 0 | TransformUnit* tuPointer = currCU.firstTU; |
1116 | 0 | for (int tuIdx = 0; tuIdx < nTus - 1; tuIdx++) |
1117 | 0 | { |
1118 | 0 | rootCbfSoFar |= TU::getCbfAtDepth(*tuPointer, COMP_Y, currDepth); |
1119 | 0 | tuPointer = tuPointer->next; |
1120 | 0 | } |
1121 | 0 | if (!rootCbfSoFar) |
1122 | 0 | { |
1123 | 0 | lastCbfIsInferred = true; |
1124 | 0 | } |
1125 | 0 | } |
1126 | 0 | if (!lastCbfIsInferred) |
1127 | 0 | { |
1128 | 0 | previousCbf = TU::getPrevTuCbfAtDepth(currTU, COMP_Y, partitioner.currTrDepth); |
1129 | 0 | } |
1130 | 0 | } |
1131 | 0 | if (!lastCbfIsInferred) |
1132 | 0 | { |
1133 | 0 | m_CABACEstimator->cbf_comp(currCU, TU::getCbfAtDepth(currTU, COMP_Y, currDepth), currTU.Y(), currTU.depth, previousCbf, currCU.ispMode); |
1134 | 0 | } |
1135 | 0 | } |
1136 | 0 | } |
1137 | 0 | } |
1138 | | void IntraSearch::xEncCoeffQT(CodingStructure& cs, Partitioner& partitioner, const ComponentID compID, CUCtx* cuCtx, const int subTuIdx, const PartSplit ispType) |
1139 | 0 | { |
1140 | 0 | const UnitArea& currArea = partitioner.currArea(); |
1141 | |
|
1142 | 0 | int subTuCounter = m_ispTestedModes[0].subTuCounter; |
1143 | 0 | TransformUnit& currTU = *cs.getTU(currArea.blocks[partitioner.chType], partitioner.chType, subTuCounter); |
1144 | 0 | uint32_t currDepth = partitioner.currTrDepth; |
1145 | 0 | const bool subdiv = currTU.depth > currDepth; |
1146 | |
|
1147 | 0 | if (subdiv) |
1148 | 0 | { |
1149 | 0 | if (partitioner.canSplit(TU_MAX_TR_SPLIT, cs)) |
1150 | 0 | { |
1151 | 0 | partitioner.splitCurrArea(TU_MAX_TR_SPLIT, cs); |
1152 | 0 | } |
1153 | 0 | else if (currTU.cu->ispMode) |
1154 | 0 | { |
1155 | 0 | partitioner.splitCurrArea(m_ispTestedModes[0].IspType, cs); |
1156 | 0 | } |
1157 | 0 | else |
1158 | 0 | THROW("Implicit TU split not available!"); |
1159 | | |
1160 | 0 | do |
1161 | 0 | { |
1162 | 0 | xEncCoeffQT(cs, partitioner, compID, cuCtx, subTuCounter, m_ispTestedModes[0].IspType); |
1163 | 0 | subTuCounter += subTuCounter != -1 ? 1 : 0; |
1164 | 0 | } while( partitioner.nextPart( cs ) ); |
1165 | |
|
1166 | 0 | partitioner.exitCurrSplit(); |
1167 | 0 | } |
1168 | 0 | else |
1169 | | |
1170 | 0 | if( currArea.blocks[compID].valid() ) |
1171 | 0 | { |
1172 | 0 | if( compID == COMP_Cr ) |
1173 | 0 | { |
1174 | 0 | const int cbfMask = ( TU::getCbf( currTU, COMP_Cb ) ? 2 : 0 ) + ( TU::getCbf( currTU, COMP_Cr ) ? 1 : 0 ); |
1175 | 0 | m_CABACEstimator->joint_cb_cr( currTU, cbfMask ); |
1176 | 0 | } |
1177 | 0 | if( TU::getCbf( currTU, compID ) ) |
1178 | 0 | { |
1179 | 0 | if( isLuma(compID) ) |
1180 | 0 | { |
1181 | 0 | m_CABACEstimator->residual_coding( currTU, compID, cuCtx ); |
1182 | 0 | m_CABACEstimator->mts_idx( *currTU.cu, cuCtx ); |
1183 | 0 | } |
1184 | 0 | else |
1185 | 0 | m_CABACEstimator->residual_coding( currTU, compID ); |
1186 | 0 | } |
1187 | 0 | } |
1188 | 0 | } |
1189 | | |
1190 | | uint64_t IntraSearch::xGetIntraFracBitsQT( CodingStructure &cs, Partitioner &partitioner, const bool luma, CUCtx *cuCtx ) |
1191 | 0 | { |
1192 | 0 | m_CABACEstimator->resetBits(); |
1193 | |
|
1194 | 0 | xEncIntraHeader( cs, partitioner, luma ); |
1195 | 0 | xEncSubdivCbfQT( cs, partitioner, luma ); |
1196 | |
|
1197 | 0 | if( luma ) |
1198 | 0 | { |
1199 | 0 | xEncCoeffQT( cs, partitioner, COMP_Y, cuCtx ); |
1200 | |
|
1201 | 0 | CodingUnit &cu = *cs.cus[0]; |
1202 | 0 | if (cuCtx /*&& CU::isSepTree(cu)*/ |
1203 | 0 | && (!cu.ispMode || (cu.lfnstIdx && m_ispTestedModes[0].subTuCounter == 0) |
1204 | 0 | || (!cu.lfnstIdx |
1205 | 0 | && m_ispTestedModes[0].subTuCounter == m_ispTestedModes[cu.lfnstIdx].numTotalParts[cu.ispMode - 1] - 1))) |
1206 | 0 | { |
1207 | 0 | m_CABACEstimator->residual_lfnst_mode( cu, *cuCtx ); |
1208 | 0 | } |
1209 | 0 | } |
1210 | 0 | else |
1211 | 0 | { |
1212 | 0 | xEncCoeffQT( cs, partitioner, COMP_Cb ); |
1213 | 0 | xEncCoeffQT( cs, partitioner, COMP_Cr ); |
1214 | 0 | } |
1215 | |
|
1216 | 0 | uint64_t fracBits = m_CABACEstimator->getEstFracBits(); |
1217 | 0 | return fracBits; |
1218 | 0 | } |
1219 | | |
1220 | | uint64_t IntraSearch::xGetIntraFracBitsQTChroma(const TransformUnit& currTU, const ComponentID compID, CUCtx *cuCtx) |
1221 | 0 | { |
1222 | 0 | m_CABACEstimator->resetBits(); |
1223 | |
|
1224 | 0 | if ( currTU.jointCbCr ) |
1225 | 0 | { |
1226 | 0 | const int cbfMask = ( TU::getCbf( currTU, COMP_Cb ) ? 2 : 0 ) + ( TU::getCbf( currTU, COMP_Cr ) ? 1 : 0 ); |
1227 | 0 | m_CABACEstimator->cbf_comp( *currTU.cu, cbfMask>>1, currTU.blocks[ COMP_Cb ], currTU.depth, false ); |
1228 | 0 | m_CABACEstimator->cbf_comp( *currTU.cu, cbfMask &1, currTU.blocks[ COMP_Cr ], currTU.depth, cbfMask>>1 ); |
1229 | 0 | if( cbfMask ) |
1230 | 0 | m_CABACEstimator->joint_cb_cr( currTU, cbfMask ); |
1231 | 0 | if (cbfMask >> 1) |
1232 | 0 | m_CABACEstimator->residual_coding( currTU, COMP_Cb, cuCtx ); |
1233 | 0 | if (cbfMask & 1) |
1234 | 0 | m_CABACEstimator->residual_coding( currTU, COMP_Cr, cuCtx ); |
1235 | 0 | } |
1236 | 0 | else |
1237 | 0 | { |
1238 | 0 | if ( compID == COMP_Cb ) |
1239 | 0 | m_CABACEstimator->cbf_comp( *currTU.cu, TU::getCbf( currTU, compID ), currTU.blocks[ compID ], currTU.depth, false ); |
1240 | 0 | else |
1241 | 0 | { |
1242 | 0 | const bool cbCbf = TU::getCbf( currTU, COMP_Cb ); |
1243 | 0 | const bool crCbf = TU::getCbf( currTU, compID ); |
1244 | 0 | const int cbfMask = ( cbCbf ? 2 : 0 ) + ( crCbf ? 1 : 0 ); |
1245 | 0 | m_CABACEstimator->cbf_comp( *currTU.cu, crCbf, currTU.blocks[ compID ], currTU.depth, cbCbf ); |
1246 | 0 | m_CABACEstimator->joint_cb_cr( currTU, cbfMask ); |
1247 | 0 | } |
1248 | 0 | } |
1249 | |
|
1250 | 0 | if( !currTU.jointCbCr && TU::getCbf( currTU, compID ) ) |
1251 | 0 | { |
1252 | 0 | m_CABACEstimator->residual_coding( currTU, compID, cuCtx ); |
1253 | 0 | } |
1254 | |
|
1255 | 0 | uint64_t fracBits = m_CABACEstimator->getEstFracBits(); |
1256 | 0 | return fracBits; |
1257 | 0 | } |
1258 | | |
1259 | | void IntraSearch::xIntraCodingTUBlock(TransformUnit &tu, const ComponentID compID, const bool checkCrossCPrediction, Distortion &ruiDist, uint32_t *numSig, PelUnitBuf *predBuf, const bool loadTr) |
1260 | 0 | { |
1261 | 0 | if (!tu.blocks[compID].valid()) |
1262 | 0 | { |
1263 | 0 | return; |
1264 | 0 | } |
1265 | | |
1266 | 0 | CodingStructure &cs = *tu.cs; |
1267 | 0 | const CompArea &area = tu.blocks[compID]; |
1268 | 0 | const SPS &sps = *cs.sps; |
1269 | 0 | const ReshapeData& reshapeData = cs.picture->reshapeData; |
1270 | |
|
1271 | 0 | const ChannelType chType = toChannelType(compID); |
1272 | 0 | const int bitDepth = sps.bitDepths[chType]; |
1273 | |
|
1274 | 0 | CPelBuf piOrg = cs.getOrgBuf (area); |
1275 | 0 | PelBuf piPred = cs.getPredBuf (area); |
1276 | 0 | PelBuf piResi = cs.getResiBuf (area); |
1277 | 0 | PelBuf piReco = cs.getRecoBuf (area); |
1278 | |
|
1279 | 0 | const CodingUnit& cu = *tu.cu; |
1280 | | |
1281 | | //===== init availability pattern ===== |
1282 | 0 | CHECK( tu.jointCbCr && compID == COMP_Cr, "wrong combination of compID and jointCbCr" ); |
1283 | 0 | bool jointCbCr = tu.jointCbCr && compID == COMP_Cb; |
1284 | |
|
1285 | 0 | if ( isLuma(compID) ) |
1286 | 0 | { |
1287 | 0 | bool predRegDiffFromTB = CU::isPredRegDiffFromTB(*tu.cu ); |
1288 | 0 | bool firstTBInPredReg = false; |
1289 | 0 | CompArea areaPredReg(COMP_Y, tu.chromaFormat, area); |
1290 | 0 | if (tu.cu->ispMode ) |
1291 | 0 | { |
1292 | 0 | firstTBInPredReg = CU::isFirstTBInPredReg(*tu.cu, area); |
1293 | 0 | if (predRegDiffFromTB) |
1294 | 0 | { |
1295 | 0 | if (firstTBInPredReg) |
1296 | 0 | { |
1297 | 0 | CU::adjustPredArea(areaPredReg); |
1298 | 0 | initIntraPatternChTypeISP(*tu.cu, areaPredReg, piReco); |
1299 | 0 | } |
1300 | 0 | } |
1301 | 0 | else |
1302 | 0 | initIntraPatternChTypeISP(*tu.cu, area, piReco); |
1303 | 0 | } |
1304 | 0 | else if( !predBuf ) |
1305 | 0 | { |
1306 | 0 | initIntraPatternChType(*tu.cu, area); |
1307 | 0 | } |
1308 | | |
1309 | | //===== get prediction signal ===== |
1310 | 0 | if (predRegDiffFromTB) |
1311 | 0 | { |
1312 | 0 | if (firstTBInPredReg) |
1313 | 0 | { |
1314 | 0 | PelBuf piPredReg = cs.getPredBuf(areaPredReg); |
1315 | 0 | predIntraAng(compID, piPredReg, cu); |
1316 | 0 | } |
1317 | 0 | } |
1318 | 0 | else |
1319 | 0 | { |
1320 | 0 | if( predBuf ) |
1321 | 0 | { |
1322 | 0 | piPred.copyFrom( predBuf->Y() ); |
1323 | 0 | } |
1324 | 0 | else if( CU::isMIP( cu, CH_L ) ) |
1325 | 0 | { |
1326 | 0 | initIntraMip( cu ); |
1327 | 0 | predIntraMip( piPred, cu ); |
1328 | 0 | } |
1329 | 0 | else |
1330 | 0 | { |
1331 | 0 | predIntraAng(compID, piPred, cu); |
1332 | 0 | } |
1333 | 0 | } |
1334 | 0 | } |
1335 | 0 | DTRACE( g_trace_ctx, D_PRED, "@(%4d,%4d) [%2dx%2d] IMode=%d\n", tu.lx(), tu.ly(), tu.lwidth(), tu.lheight(), CU::getFinalIntraMode(cu, chType) ); |
1336 | 0 | const Slice &slice = *cs.slice; |
1337 | 0 | bool flag = cs.picHeader->lmcsEnabled && (slice.isIntra() || (!slice.isIntra() && reshapeData.getCTUFlag())); |
1338 | |
|
1339 | 0 | if (isLuma(compID)) |
1340 | 0 | { |
1341 | | //===== get residual signal ===== |
1342 | 0 | if (cs.picHeader->lmcsEnabled && reshapeData.getCTUFlag() ) |
1343 | 0 | { |
1344 | 0 | piResi.subtract(cs.getRspOrgBuf(area), piPred); |
1345 | 0 | } |
1346 | 0 | else |
1347 | 0 | { |
1348 | 0 | piResi.subtract( piOrg, piPred ); |
1349 | 0 | } |
1350 | 0 | } |
1351 | | |
1352 | | //===== transform and quantization ===== |
1353 | | //--- init rate estimation arrays for RDOQ --- |
1354 | | //--- transform and quantization --- |
1355 | 0 | TCoeff uiAbsSum = 0; |
1356 | 0 | const QpParam cQP(tu, compID); |
1357 | |
|
1358 | 0 | m_pcTrQuant->selectLambda(compID); |
1359 | |
|
1360 | 0 | flag =flag && (tu.blocks[compID].width*tu.blocks[compID].height > 4); |
1361 | 0 | if (flag && isChroma(compID) && cs.picHeader->lmcsChromaResidualScale ) |
1362 | 0 | { |
1363 | 0 | int cResScaleInv = tu.chromaAdj; |
1364 | 0 | double cRescale = (double)(1 << CSCALE_FP_PREC) / (double)cResScaleInv; |
1365 | 0 | m_pcTrQuant->scaleLambda( 1.0/(cRescale*cRescale) ); |
1366 | 0 | } |
1367 | |
|
1368 | 0 | if ( jointCbCr ) |
1369 | 0 | { |
1370 | | // Lambda is loosened for the joint mode with respect to single modes as the same residual is used for both chroma blocks |
1371 | 0 | const int absIct = abs( TU::getICTMode(tu) ); |
1372 | 0 | const double lfact = ( absIct == 1 || absIct == 3 ? 0.8 : 0.5 ); |
1373 | 0 | m_pcTrQuant->scaleLambda( lfact ); |
1374 | 0 | } |
1375 | 0 | if ( sps.jointCbCr && isChroma(compID) && (tu.cu->cs->slice->sliceQp > 18) ) |
1376 | 0 | { |
1377 | 0 | m_pcTrQuant->scaleLambda( 1.3 ); |
1378 | 0 | } |
1379 | |
|
1380 | 0 | if( isLuma(compID) ) |
1381 | 0 | { |
1382 | 0 | m_pcTrQuant->transformNxN(tu, compID, cQP, uiAbsSum, m_CABACEstimator->getCtx(), loadTr); |
1383 | |
|
1384 | 0 | DTRACE( g_trace_ctx, D_TU_ABS_SUM, "%d: comp=%d, abssum=%d\n", DTRACE_GET_COUNTER( g_trace_ctx, D_TU_ABS_SUM ), compID, uiAbsSum ); |
1385 | 0 | if (tu.cu->ispMode && isLuma(compID) && CU::isISPLast(*tu.cu, area, area.compID) && CU::allLumaCBFsAreZero(*tu.cu)) |
1386 | 0 | { |
1387 | | // ISP has to have at least one non-zero CBF |
1388 | 0 | ruiDist = MAX_INT; |
1389 | 0 | return; |
1390 | 0 | } |
1391 | | //--- inverse transform --- |
1392 | 0 | if (uiAbsSum > 0) |
1393 | 0 | { |
1394 | 0 | m_pcTrQuant->invTransformNxN(tu, compID, piResi, cQP); |
1395 | 0 | } |
1396 | 0 | else |
1397 | 0 | { |
1398 | 0 | piResi.fill(0); |
1399 | 0 | } |
1400 | 0 | } |
1401 | 0 | else // chroma |
1402 | 0 | { |
1403 | 0 | PelBuf crPred = cs.getPredBuf ( COMP_Cr ); |
1404 | 0 | PelBuf crResi = cs.getResiBuf ( COMP_Cr ); |
1405 | 0 | PelBuf crReco = cs.getRecoBuf ( COMP_Cr ); |
1406 | |
|
1407 | 0 | int codedCbfMask = 0; |
1408 | 0 | ComponentID codeCompId = (tu.jointCbCr ? (tu.jointCbCr >> 1 ? COMP_Cb : COMP_Cr) : compID); |
1409 | 0 | const QpParam qpCbCr(tu, codeCompId); |
1410 | |
|
1411 | 0 | if( tu.jointCbCr ) |
1412 | 0 | { |
1413 | 0 | ComponentID otherCompId = ( codeCompId==COMP_Cr ? COMP_Cb : COMP_Cr ); |
1414 | 0 | tu.getCoeffs( otherCompId ).fill(0); // do we need that? |
1415 | 0 | TU::setCbfAtDepth (tu, otherCompId, tu.depth, false ); |
1416 | 0 | } |
1417 | 0 | PelBuf& codeResi = ( codeCompId == COMP_Cr ? crResi : piResi ); |
1418 | 0 | uiAbsSum = 0; |
1419 | 0 | m_pcTrQuant->transformNxN(tu, codeCompId, qpCbCr, uiAbsSum, m_CABACEstimator->getCtx(), loadTr); |
1420 | 0 | DTRACE( g_trace_ctx, D_TU_ABS_SUM, "%d: comp=%d, abssum=%d\n", DTRACE_GET_COUNTER( g_trace_ctx, D_TU_ABS_SUM ), codeCompId, uiAbsSum ); |
1421 | 0 | if( uiAbsSum > 0 ) |
1422 | 0 | { |
1423 | 0 | m_pcTrQuant->invTransformNxN(tu, codeCompId, codeResi, qpCbCr); |
1424 | 0 | codedCbfMask += ( codeCompId == COMP_Cb ? 2 : 1 ); |
1425 | 0 | } |
1426 | 0 | else |
1427 | 0 | { |
1428 | 0 | codeResi.fill(0); |
1429 | 0 | } |
1430 | |
|
1431 | 0 | if( tu.jointCbCr ) |
1432 | 0 | { |
1433 | 0 | if( tu.jointCbCr == 3 && codedCbfMask == 2 ) |
1434 | 0 | { |
1435 | 0 | codedCbfMask = 3; |
1436 | 0 | TU::setCbfAtDepth (tu, COMP_Cr, tu.depth, true ); |
1437 | 0 | } |
1438 | 0 | if( tu.jointCbCr != codedCbfMask ) |
1439 | 0 | { |
1440 | 0 | ruiDist = MAX_DISTORTION; |
1441 | 0 | return; |
1442 | 0 | } |
1443 | 0 | m_pcTrQuant->invTransformICT( tu, piResi, crResi ); |
1444 | 0 | uiAbsSum = codedCbfMask; |
1445 | 0 | } |
1446 | | |
1447 | | //===== reconstruction ===== |
1448 | 0 | if ( flag && uiAbsSum > 0 && cs.picHeader->lmcsChromaResidualScale ) |
1449 | 0 | { |
1450 | 0 | piResi.scaleSignal(tu.chromaAdj, 0, slice.clpRngs[compID]); |
1451 | |
|
1452 | 0 | if( jointCbCr ) |
1453 | 0 | { |
1454 | 0 | crResi.scaleSignal(tu.chromaAdj, 0, slice.clpRngs[COMP_Cr]); |
1455 | 0 | } |
1456 | 0 | } |
1457 | |
|
1458 | 0 | if( jointCbCr ) |
1459 | 0 | { |
1460 | 0 | crReco.reconstruct(crPred, crResi, cs.slice->clpRngs[ COMP_Cr ]); |
1461 | 0 | } |
1462 | 0 | } |
1463 | 0 | piReco.reconstruct(piPred, piResi, cs.slice->clpRngs[ compID ]); |
1464 | | |
1465 | | |
1466 | | |
1467 | | //===== update distortion ===== |
1468 | 0 | const bool reshapeIntraCMD = m_pcEncCfg->m_reshapeSignalType == RESHAPE_SIGNAL_PQ; |
1469 | 0 | if(((cs.picHeader->lmcsEnabled && (reshapeData.getCTUFlag() || (isChroma(compID) && reshapeIntraCMD))) || m_pcEncCfg->m_lumaLevelToDeltaQPEnabled ) ) |
1470 | 0 | { |
1471 | 0 | const CPelBuf orgLuma = cs.getOrgBuf( cs.area.blocks[COMP_Y] ); |
1472 | 0 | if( compID == COMP_Y && !m_pcEncCfg->m_lumaLevelToDeltaQPEnabled ) |
1473 | 0 | { |
1474 | 0 | PelBuf tmpRecLuma = cs.getRspRecoBuf(area); |
1475 | 0 | tmpRecLuma.rspSignal( piReco, reshapeData.getInvLUT()); |
1476 | 0 | ruiDist += m_pcRdCost->getDistPart(piOrg, tmpRecLuma, sps.bitDepths[toChannelType(compID)], compID, DF_SSE_WTD, &orgLuma); |
1477 | 0 | } |
1478 | 0 | else |
1479 | 0 | { |
1480 | 0 | ruiDist += m_pcRdCost->getDistPart( piOrg, piReco, bitDepth, compID, DF_SSE_WTD, &orgLuma ); |
1481 | 0 | if( jointCbCr ) |
1482 | 0 | { |
1483 | 0 | CPelBuf crOrg = cs.getOrgBuf ( COMP_Cr ); |
1484 | 0 | PelBuf crReco = cs.getRecoBuf ( COMP_Cr ); |
1485 | 0 | ruiDist += m_pcRdCost->getDistPart( crOrg, crReco, bitDepth, COMP_Cr, DF_SSE_WTD, &orgLuma ); |
1486 | 0 | } |
1487 | 0 | } |
1488 | 0 | } |
1489 | 0 | else |
1490 | 0 | { |
1491 | 0 | ruiDist += m_pcRdCost->getDistPart( piOrg, piReco, bitDepth, compID, DF_SSE ); |
1492 | 0 | if( jointCbCr ) |
1493 | 0 | { |
1494 | 0 | CPelBuf crOrg = cs.getOrgBuf ( COMP_Cr ); |
1495 | 0 | PelBuf crReco = cs.getRecoBuf ( COMP_Cr ); |
1496 | 0 | ruiDist += m_pcRdCost->getDistPart( crOrg, crReco, bitDepth, COMP_Cr, DF_SSE ); |
1497 | 0 | } |
1498 | 0 | } |
1499 | 0 | } |
1500 | | |
1501 | | void IntraSearch::xIntraCodingLumaQT(CodingStructure& cs, Partitioner& partitioner, PelUnitBuf* predBuf, const double bestCostSoFar, int numMode, bool disableMTS) |
1502 | 0 | { |
1503 | 0 | PROFILER_SCOPE_AND_STAGE_EXT( 0, _TPROF, P_INTRA_RD_SEARCH_LUMA, &cs, partitioner.chType ); |
1504 | 0 | const UnitArea& currArea = partitioner.currArea(); |
1505 | 0 | uint32_t currDepth = partitioner.currTrDepth; |
1506 | 0 | Distortion singleDistLuma = 0; |
1507 | 0 | uint32_t numSig = 0; |
1508 | 0 | const SPS &sps = *cs.sps; |
1509 | 0 | CodingUnit &cu = *cs.cus[0]; |
1510 | 0 | bool mtsAllowed = (numMode < 0) || disableMTS ? false : CU::isMTSAllowed(cu, COMP_Y); |
1511 | 0 | uint64_t singleFracBits = 0; |
1512 | 0 | bool splitCbfLumaSum = false; |
1513 | 0 | double bestCostForISP = bestCostSoFar; |
1514 | 0 | double dSingleCost = MAX_DOUBLE; |
1515 | 0 | int endLfnstIdx = (partitioner.isSepTree(cs) && partitioner.chType == CH_C && (currArea.lwidth() < 8 || currArea.lheight() < 8)) |
1516 | 0 | || (currArea.lwidth() > sps.getMaxTbSize() || currArea.lheight() > sps.getMaxTbSize()) || !sps.LFNST || (numMode < 0) ? 0 : 2; |
1517 | 0 | const bool useTS = cs.picture->useTS; |
1518 | 0 | numMode = (numMode < 0) ? -numMode : numMode; |
1519 | |
|
1520 | 0 | if (cu.mipFlag && !allowLfnstWithMip(cu.lumaSize())) |
1521 | 0 | { |
1522 | 0 | endLfnstIdx = 0; |
1523 | 0 | } |
1524 | 0 | int bestMTS = 0; |
1525 | 0 | int EndMTS = mtsAllowed ? m_pcEncCfg->m_MTSIntraMaxCand : 0; |
1526 | 0 | if (cu.ispMode && (EndMTS || endLfnstIdx)) |
1527 | 0 | { |
1528 | 0 | EndMTS = 0; |
1529 | 0 | if ((m_ispTestedModes[1].numTotalParts[cu.ispMode - 1] == 0) |
1530 | 0 | && (m_ispTestedModes[2].numTotalParts[cu.ispMode - 1] == 0)) |
1531 | 0 | { |
1532 | 0 | endLfnstIdx = 0; |
1533 | 0 | } |
1534 | 0 | } |
1535 | 0 | if (cu.bdpcmM[CH_L]) |
1536 | 0 | { |
1537 | 0 | endLfnstIdx = 0; |
1538 | 0 | EndMTS = 0; |
1539 | 0 | } |
1540 | 0 | bool checkTransformSkip = sps.transformSkip; |
1541 | |
|
1542 | 0 | SizeType transformSkipMaxSize = 1 << sps.log2MaxTransformSkipBlockSize; |
1543 | 0 | bool tsAllowed = useTS && cu.cs->sps->transformSkip && (!cu.ispMode) && (!cu.bdpcmM[CH_L]) && (!cu.sbtInfo); |
1544 | 0 | tsAllowed &= cu.blocks[COMP_Y].width <= transformSkipMaxSize && cu.blocks[COMP_Y].height <= transformSkipMaxSize; |
1545 | 0 | if (tsAllowed) |
1546 | 0 | { |
1547 | 0 | EndMTS += 1; |
1548 | 0 | } |
1549 | 0 | if (endLfnstIdx || EndMTS) |
1550 | 0 | { |
1551 | 0 | bool splitCbfLuma = false; |
1552 | 0 | const PartSplit ispType = CU::getISPType(cu, COMP_Y); |
1553 | 0 | CUCtx cuCtx; |
1554 | 0 | cuCtx.isDQPCoded = true; |
1555 | 0 | cuCtx.isChromaQpAdjCoded = true; |
1556 | 0 | cs.cost = 0.0; |
1557 | 0 | Distortion singleDistTmpLuma = 0; |
1558 | 0 | uint64_t singleTmpFracBits = 0; |
1559 | 0 | double singleCostTmp = 0; |
1560 | 0 | const TempCtx ctxStart (m_CtxCache, m_CABACEstimator->getCtx()); |
1561 | 0 | TempCtx ctxBest (m_CtxCache); |
1562 | 0 | CodingStructure &saveCS = *m_pSaveCS[cu.ispMode?0:1]; |
1563 | 0 | TransformUnit * tmpTU = nullptr; |
1564 | 0 | int bestLfnstIdx = 0; |
1565 | 0 | int startLfnstIdx = 0; |
1566 | | // speedUps LFNST |
1567 | 0 | bool rapidLFNST = false; |
1568 | 0 | bool rapidDCT = false; |
1569 | 0 | double thresholdDCT = 1; |
1570 | |
|
1571 | 0 | if (m_pcEncCfg->m_MTS == 2) |
1572 | 0 | { |
1573 | 0 | thresholdDCT += 1.4 / sqrt(cu.lwidth() * cu.lheight()); |
1574 | 0 | } |
1575 | |
|
1576 | 0 | if (m_pcEncCfg->m_LFNST > 1) |
1577 | 0 | { |
1578 | 0 | rapidLFNST = true; |
1579 | |
|
1580 | 0 | if (m_pcEncCfg->m_LFNST > 2) |
1581 | 0 | { |
1582 | 0 | rapidDCT = true; |
1583 | 0 | endLfnstIdx = endLfnstIdx ? 1 : 0; |
1584 | 0 | } |
1585 | 0 | } |
1586 | |
|
1587 | 0 | saveCS.pcv = cs.pcv; |
1588 | 0 | saveCS.picture = cs.picture; |
1589 | 0 | saveCS.area.repositionTo( cs.area); |
1590 | |
|
1591 | 0 | if (cu.ispMode) |
1592 | 0 | { |
1593 | 0 | partitioner.splitCurrArea(ispType, cs); |
1594 | 0 | } |
1595 | |
|
1596 | 0 | TransformUnit& tu = cs.addTU(CS::getArea(cs, partitioner.currArea(), partitioner.chType, partitioner.treeType), partitioner.chType, cs.cus[0]); |
1597 | |
|
1598 | 0 | if (cu.ispMode) |
1599 | 0 | { |
1600 | 0 | saveCS.clearTUs(); |
1601 | 0 | do |
1602 | 0 | { |
1603 | 0 | saveCS.addTU( |
1604 | 0 | CS::getArea(cs, partitioner.currArea(), partitioner.chType, partitioner.treeType), |
1605 | 0 | partitioner.chType, cs.cus[0]); |
1606 | 0 | } while (partitioner.nextPart(cs)); |
1607 | |
|
1608 | 0 | partitioner.exitCurrSplit(); |
1609 | 0 | } |
1610 | 0 | else |
1611 | 0 | { |
1612 | 0 | tmpTU = saveCS.tus.empty() ? &saveCS.addTU( currArea, partitioner.chType, nullptr ) : saveCS.tus.front(); |
1613 | 0 | tmpTU->initData(); |
1614 | 0 | tmpTU->UnitArea::operator=( currArea ); |
1615 | 0 | } |
1616 | | |
1617 | |
|
1618 | 0 | std::vector<TrMode> trModes{ TrMode(0, true) }; |
1619 | 0 | if (tsAllowed) |
1620 | 0 | { |
1621 | 0 | trModes.push_back(TrMode(1, true)); |
1622 | 0 | } |
1623 | 0 | double dct2Cost = MAX_DOUBLE; |
1624 | 0 | double trGrpStopThreshold = 1.001; |
1625 | 0 | double trGrpBestCost = MAX_DOUBLE; |
1626 | |
|
1627 | 0 | if (mtsAllowed) |
1628 | 0 | { |
1629 | 0 | if (m_pcEncCfg->m_LFNST) |
1630 | 0 | { |
1631 | 0 | uint32_t uiIntraMode = cs.cus[0]->intraDir[partitioner.chType]; |
1632 | 0 | int MTScur = (uiIntraMode < 34) ? MTS_DST7_DCT8 : MTS_DCT8_DST7; |
1633 | |
|
1634 | 0 | trModes.push_back(TrMode( 2, true)); |
1635 | 0 | trModes.push_back(TrMode(MTScur, true)); |
1636 | |
|
1637 | 0 | MTScur = (uiIntraMode < 34) ? MTS_DCT8_DST7 : MTS_DST7_DCT8; |
1638 | |
|
1639 | 0 | trModes.push_back(TrMode(MTScur, true)); |
1640 | 0 | trModes.push_back(TrMode(MTS_DST7_DST7 + 3, true)); |
1641 | 0 | } |
1642 | 0 | else |
1643 | 0 | { |
1644 | 0 | for (int i = 2; i < 6; i++) |
1645 | 0 | { |
1646 | 0 | trModes.push_back(TrMode(i, true)); |
1647 | 0 | } |
1648 | 0 | } |
1649 | 0 | } |
1650 | |
|
1651 | 0 | if ((EndMTS && !m_pcEncCfg->m_LFNST) || (tsAllowed && !mtsAllowed)) |
1652 | 0 | { |
1653 | 0 | xPreCheckMTS(tu, &trModes, m_pcEncCfg->m_MTSIntraMaxCand, predBuf); |
1654 | 0 | if (!mtsAllowed && !trModes[1].second) |
1655 | 0 | { |
1656 | 0 | EndMTS = 0; |
1657 | 0 | } |
1658 | 0 | } |
1659 | |
|
1660 | 0 | bool NStopMTS = true; |
1661 | |
|
1662 | 0 | for (int modeId = 0; modeId <= EndMTS && NStopMTS; modeId++) |
1663 | 0 | { |
1664 | 0 | if (modeId > 1) |
1665 | 0 | { |
1666 | 0 | trGrpBestCost = MAX_DOUBLE; |
1667 | 0 | } |
1668 | 0 | for (int lfnstIdx = startLfnstIdx; lfnstIdx <= endLfnstIdx; lfnstIdx++) |
1669 | 0 | { |
1670 | 0 | if (lfnstIdx && modeId) |
1671 | 0 | { |
1672 | 0 | continue; |
1673 | 0 | } |
1674 | 0 | if (mtsAllowed || tsAllowed) |
1675 | 0 | { |
1676 | 0 | if (m_pcEncCfg->m_TS && bestMTS == MTS_SKIP) |
1677 | 0 | { |
1678 | 0 | break; |
1679 | 0 | } |
1680 | 0 | if (!m_pcEncCfg->m_LFNST && !trModes[modeId].second && mtsAllowed) |
1681 | 0 | { |
1682 | 0 | continue; |
1683 | 0 | } |
1684 | | |
1685 | 0 | tu.mtsIdx[COMP_Y] = trModes[modeId].first; |
1686 | 0 | } |
1687 | | |
1688 | 0 | if (cu.ispMode && lfnstIdx) |
1689 | 0 | { |
1690 | 0 | if (m_ispTestedModes[lfnstIdx].numTotalParts[cu.ispMode - 1] == 0) |
1691 | 0 | { |
1692 | 0 | if (lfnstIdx == 2) |
1693 | 0 | { |
1694 | 0 | endLfnstIdx = 1; |
1695 | 0 | } |
1696 | 0 | continue; |
1697 | 0 | } |
1698 | 0 | } |
1699 | | |
1700 | 0 | cu.lfnstIdx = lfnstIdx; |
1701 | 0 | cuCtx.lfnstLastScanPos = false; |
1702 | 0 | cuCtx.violatesLfnstConstrained[CH_L] = false; |
1703 | 0 | cuCtx.violatesLfnstConstrained[CH_C] = false; |
1704 | |
|
1705 | 0 | if ((lfnstIdx != startLfnstIdx) || (modeId)) |
1706 | 0 | { |
1707 | 0 | m_CABACEstimator->getCtx() = ctxStart; |
1708 | 0 | } |
1709 | |
|
1710 | 0 | singleDistTmpLuma = 0; |
1711 | |
|
1712 | 0 | if (cu.ispMode) |
1713 | 0 | { |
1714 | 0 | splitCbfLuma = false; |
1715 | |
|
1716 | 0 | partitioner.splitCurrArea(ispType, cs); |
1717 | |
|
1718 | 0 | singleCostTmp = xTestISP(cs, partitioner, bestCostForISP, ispType, splitCbfLuma, singleTmpFracBits, singleDistTmpLuma, cuCtx); |
1719 | |
|
1720 | 0 | partitioner.exitCurrSplit(); |
1721 | |
|
1722 | 0 | if (modeId && (singleCostTmp == MAX_DOUBLE)) |
1723 | 0 | { |
1724 | 0 | m_ispTestedModes[lfnstIdx].numTotalParts[cu.ispMode - 1] = 0; |
1725 | 0 | } |
1726 | |
|
1727 | 0 | bool storeCost = (numMode == 1) ? true : false; |
1728 | |
|
1729 | 0 | if ((m_pcEncCfg->m_ISP >= 2) && (numMode <= 1)) |
1730 | 0 | { |
1731 | 0 | storeCost = true; |
1732 | 0 | } |
1733 | |
|
1734 | 0 | if (storeCost) |
1735 | 0 | { |
1736 | 0 | m_ispTestedModes[0].bestCost[cu.ispMode - 1] = singleCostTmp; |
1737 | 0 | } |
1738 | 0 | } |
1739 | 0 | else |
1740 | 0 | { |
1741 | 0 | bool TrLoad = (EndMTS && !m_pcEncCfg->m_LFNST) || (tsAllowed && !mtsAllowed && (lfnstIdx == 0)) ? true : false; |
1742 | |
|
1743 | 0 | xIntraCodingTUBlock(tu, COMP_Y, false, singleDistTmpLuma, &numSig, predBuf, TrLoad); |
1744 | |
|
1745 | 0 | cuCtx.mtsLastScanPos = false; |
1746 | | //----- determine rate and r-d cost ----- |
1747 | 0 | if ((sps.LFNST ? (modeId == EndMTS && modeId != 0 && checkTransformSkip) : (trModes[modeId].first != 0)) && !TU::getCbfAtDepth(tu, COMP_Y, currDepth)) |
1748 | 0 | { |
1749 | 0 | singleCostTmp = MAX_DOUBLE; |
1750 | 0 | } |
1751 | 0 | else |
1752 | 0 | { |
1753 | 0 | m_ispTestedModes[0].IspType = TU_NO_ISP; |
1754 | 0 | m_ispTestedModes[0].subTuCounter = -1; |
1755 | 0 | singleTmpFracBits = xGetIntraFracBitsQT(cs, partitioner, true, &cuCtx); |
1756 | |
|
1757 | 0 | if (tu.mtsIdx[COMP_Y] > MTS_SKIP) |
1758 | 0 | { |
1759 | 0 | if (!cuCtx.mtsLastScanPos) |
1760 | 0 | { |
1761 | 0 | singleCostTmp = MAX_DOUBLE; |
1762 | 0 | } |
1763 | 0 | else |
1764 | 0 | { |
1765 | 0 | singleCostTmp = m_pcRdCost->calcRdCost(singleTmpFracBits, singleDistTmpLuma); |
1766 | 0 | } |
1767 | 0 | } |
1768 | 0 | else |
1769 | 0 | { |
1770 | 0 | singleCostTmp = m_pcRdCost->calcRdCost(singleTmpFracBits, singleDistTmpLuma); |
1771 | 0 | } |
1772 | 0 | } |
1773 | |
|
1774 | 0 | if (((EndMTS && (m_pcEncCfg->m_MTS == 2)) || rapidLFNST) && modeId == 0 && lfnstIdx == 0) |
1775 | 0 | { |
1776 | 0 | if (singleCostTmp > bestCostSoFar * thresholdDCT) |
1777 | 0 | { |
1778 | 0 | EndMTS = 0; |
1779 | |
|
1780 | 0 | if (rapidDCT) |
1781 | 0 | { |
1782 | 0 | endLfnstIdx = 0; // break the loop but do not cpy best |
1783 | 0 | } |
1784 | 0 | } |
1785 | 0 | } |
1786 | |
|
1787 | 0 | if (lfnstIdx && !cuCtx.lfnstLastScanPos && !cu.ispMode) |
1788 | 0 | { |
1789 | 0 | bool rootCbfL = false; |
1790 | |
|
1791 | 0 | for (uint32_t t = 0; t < getNumberValidTBlocks(*cu.cs->pcv); t++) |
1792 | 0 | { |
1793 | 0 | rootCbfL |= tu.cbf[t] != 0; |
1794 | 0 | } |
1795 | |
|
1796 | 0 | if (rapidLFNST && !rootCbfL) |
1797 | 0 | { |
1798 | 0 | endLfnstIdx = lfnstIdx; // break the loop |
1799 | 0 | } |
1800 | 0 | bool cbfAtZeroDepth = CU::isSepTree(cu) |
1801 | 0 | ? rootCbfL |
1802 | 0 | : (cs.area.chromaFormat != CHROMA_400 && std::min(cu.firstTU->blocks[1].width, cu.firstTU->blocks[1].height) < 4) |
1803 | 0 | ? TU::getCbfAtDepth(tu, COMP_Y, currDepth) |
1804 | 0 | : rootCbfL; |
1805 | |
|
1806 | 0 | if (cbfAtZeroDepth) |
1807 | 0 | { |
1808 | 0 | singleCostTmp = MAX_DOUBLE; |
1809 | 0 | } |
1810 | 0 | } |
1811 | 0 | } |
1812 | |
|
1813 | 0 | if (singleCostTmp < dSingleCost) |
1814 | 0 | { |
1815 | 0 | trGrpBestCost = singleCostTmp; |
1816 | 0 | dSingleCost = singleCostTmp; |
1817 | 0 | singleDistLuma = singleDistTmpLuma; |
1818 | 0 | singleFracBits = singleTmpFracBits; |
1819 | 0 | bestLfnstIdx = lfnstIdx; |
1820 | 0 | bestMTS = modeId; |
1821 | |
|
1822 | 0 | if (dSingleCost < bestCostForISP) |
1823 | 0 | { |
1824 | 0 | bestCostForISP = dSingleCost; |
1825 | 0 | } |
1826 | |
|
1827 | 0 | splitCbfLumaSum = splitCbfLuma; |
1828 | |
|
1829 | 0 | if (lfnstIdx == 0 && modeId == 0 && cu.ispMode == 0) |
1830 | 0 | { |
1831 | 0 | dct2Cost = singleCostTmp; |
1832 | |
|
1833 | 0 | if (!TU::getCbfAtDepth(tu, COMP_Y, currDepth)) |
1834 | 0 | { |
1835 | 0 | if (rapidLFNST) |
1836 | 0 | { |
1837 | 0 | endLfnstIdx = 0; // break the loop but do not cpy best |
1838 | 0 | } |
1839 | |
|
1840 | 0 | EndMTS = 0; |
1841 | 0 | } |
1842 | 0 | } |
1843 | |
|
1844 | 0 | if (bestLfnstIdx != endLfnstIdx || bestMTS != EndMTS) |
1845 | 0 | { |
1846 | 0 | if (cu.ispMode) |
1847 | 0 | { |
1848 | 0 | saveCS.getRecoBuf(currArea.Y()).copyFrom(cs.getRecoBuf(currArea.Y())); |
1849 | |
|
1850 | 0 | for (uint32_t j = 0; j < cs.tus.size(); j++) |
1851 | 0 | { |
1852 | 0 | saveCS.tus[j]->copyComponentFrom(*cs.tus[j], COMP_Y); |
1853 | 0 | } |
1854 | 0 | } |
1855 | 0 | else |
1856 | 0 | { |
1857 | 0 | saveCS.getPredBuf(tu.Y()).copyFrom(cs.getPredBuf(tu.Y())); |
1858 | 0 | saveCS.getRecoBuf(tu.Y()).copyFrom(cs.getRecoBuf(tu.Y())); |
1859 | |
|
1860 | 0 | tmpTU->copyComponentFrom(tu, COMP_Y); |
1861 | 0 | } |
1862 | |
|
1863 | 0 | ctxBest = m_CABACEstimator->getCtx(); |
1864 | 0 | } |
1865 | | |
1866 | 0 | } |
1867 | 0 | else |
1868 | 0 | { |
1869 | 0 | if( rapidLFNST ) |
1870 | 0 | { |
1871 | 0 | endLfnstIdx = lfnstIdx; // break the loop |
1872 | 0 | } |
1873 | 0 | } |
1874 | 0 | } |
1875 | 0 | if (m_pcEncCfg->m_LFNST && m_pcEncCfg->m_MTS == 2 && modeId && modeId != EndMTS) |
1876 | 0 | { |
1877 | 0 | NStopMTS = false; |
1878 | |
|
1879 | 0 | if (bestMTS || bestLfnstIdx) |
1880 | 0 | { |
1881 | 0 | if ((modeId > 1 && bestMTS == modeId) || modeId == 1) |
1882 | 0 | { |
1883 | 0 | NStopMTS = (dct2Cost / trGrpBestCost) < trGrpStopThreshold; |
1884 | 0 | } |
1885 | 0 | } |
1886 | 0 | } |
1887 | 0 | } |
1888 | |
|
1889 | 0 | cu.lfnstIdx = bestLfnstIdx; |
1890 | 0 | if (dSingleCost != MAX_DOUBLE) |
1891 | 0 | { |
1892 | 0 | if (bestLfnstIdx != endLfnstIdx || bestMTS != EndMTS) |
1893 | 0 | { |
1894 | 0 | if (cu.ispMode) |
1895 | 0 | { |
1896 | 0 | const UnitArea& currArea = partitioner.currArea(); |
1897 | 0 | cs.getRecoBuf(currArea.Y()).copyFrom(saveCS.getRecoBuf(currArea.Y())); |
1898 | |
|
1899 | 0 | if (saveCS.tus.size() != cs.tus.size()) |
1900 | 0 | { |
1901 | 0 | partitioner.splitCurrArea(ispType, cs); |
1902 | |
|
1903 | 0 | do |
1904 | 0 | { |
1905 | 0 | partitioner.nextPart(cs); |
1906 | 0 | cs.addTU(CS::getArea(cs, partitioner.currArea(), partitioner.chType, partitioner.treeType), |
1907 | 0 | partitioner.chType, cs.cus[0]); |
1908 | 0 | } while (saveCS.tus.size() != cs.tus.size()); |
1909 | |
|
1910 | 0 | partitioner.exitCurrSplit(); |
1911 | 0 | } |
1912 | |
|
1913 | 0 | for (uint32_t j = 0; j < saveCS.tus.size(); j++) |
1914 | 0 | { |
1915 | 0 | cs.tus[j]->copyComponentFrom(*saveCS.tus[j], COMP_Y); |
1916 | 0 | } |
1917 | 0 | } |
1918 | 0 | else |
1919 | 0 | { |
1920 | 0 | cs.getRecoBuf(tu.Y()).copyFrom(saveCS.getRecoBuf(tu.Y())); |
1921 | |
|
1922 | 0 | tu.copyComponentFrom(*tmpTU, COMP_Y); |
1923 | 0 | } |
1924 | |
|
1925 | 0 | m_CABACEstimator->getCtx() = ctxBest; |
1926 | 0 | } |
1927 | | |
1928 | | // otherwise this would've happened in useSubStructure |
1929 | 0 | cs.picture->getRecoBuf(currArea.Y()).copyFrom(cs.getRecoBuf(currArea.Y())); |
1930 | 0 | } |
1931 | 0 | } |
1932 | 0 | else |
1933 | 0 | { |
1934 | 0 | if (cu.ispMode) |
1935 | 0 | { |
1936 | 0 | const PartSplit ispType = CU::getISPType(cu, COMP_Y); |
1937 | 0 | partitioner.splitCurrArea(ispType, cs); |
1938 | |
|
1939 | 0 | CUCtx cuCtx; |
1940 | 0 | dSingleCost = xTestISP(cs, partitioner, bestCostForISP, ispType, splitCbfLumaSum, singleFracBits, singleDistLuma, cuCtx); |
1941 | 0 | partitioner.exitCurrSplit(); |
1942 | 0 | bool storeCost = (numMode == 1) ? true : false; |
1943 | 0 | if ((m_pcEncCfg->m_ISP >= 2) && (numMode <= 1)) |
1944 | 0 | { |
1945 | 0 | storeCost = true; |
1946 | 0 | } |
1947 | 0 | if (storeCost) |
1948 | 0 | { |
1949 | 0 | m_ispTestedModes[0].bestCost[cu.ispMode - 1] = dSingleCost; |
1950 | 0 | } |
1951 | 0 | } |
1952 | 0 | else |
1953 | 0 | { |
1954 | 0 | TransformUnit& tu = |
1955 | 0 | cs.addTU(CS::getArea(cs, currArea, partitioner.chType, partitioner.treeType), partitioner.chType, cs.cus[0]); |
1956 | 0 | tu.depth = currDepth; |
1957 | |
|
1958 | 0 | CHECK(!tu.Y().valid(), "Invalid TU"); |
1959 | 0 | xIntraCodingTUBlock(tu, COMP_Y, false, singleDistLuma, &numSig, predBuf); |
1960 | | //----- determine rate and r-d cost ----- |
1961 | 0 | m_ispTestedModes[0].IspType = TU_NO_ISP; |
1962 | 0 | m_ispTestedModes[0].subTuCounter = -1; |
1963 | 0 | singleFracBits = xGetIntraFracBitsQT(cs, partitioner, true); |
1964 | 0 | dSingleCost = m_pcRdCost->calcRdCost(singleFracBits, singleDistLuma); |
1965 | 0 | } |
1966 | 0 | } |
1967 | | |
1968 | 0 | if (cu.ispMode) |
1969 | 0 | { |
1970 | 0 | for (auto& ptu : cs.tus) |
1971 | 0 | { |
1972 | 0 | if (currArea.Y().contains(ptu->Y())) |
1973 | 0 | { |
1974 | 0 | TU::setCbfAtDepth(*ptu, COMP_Y, currDepth, splitCbfLumaSum ? 1 : 0); |
1975 | 0 | } |
1976 | 0 | } |
1977 | 0 | } |
1978 | 0 | cs.dist += singleDistLuma; |
1979 | 0 | cs.fracBits += singleFracBits; |
1980 | 0 | cs.cost = dSingleCost; |
1981 | |
|
1982 | 0 | STAT_COUNT_CU_MODES( partitioner.chType == CH_L, g_cuCounters1D[CU_RD_TESTS][0][!cs.slice->isIntra() + cs.slice->depth] ); |
1983 | 0 | STAT_COUNT_CU_MODES( partitioner.chType == CH_L && !cs.slice->isIntra(), g_cuCounters2D[CU_RD_TESTS][Log2( cs.area.lheight() )][Log2( cs.area.lwidth() )] ); |
1984 | 0 | } |
1985 | | |
1986 | | ChromaCbfs IntraSearch::xIntraChromaCodingQT(CodingStructure& cs, Partitioner& partitioner) |
1987 | 0 | { |
1988 | 0 | UnitArea currArea = partitioner.currArea(); |
1989 | |
|
1990 | 0 | if( !currArea.Cb().valid() ) |
1991 | 0 | return ChromaCbfs(false); |
1992 | | |
1993 | 0 | TransformUnit& currTU = *cs.getTU( currArea.chromaPos(), CH_C ); |
1994 | 0 | const CodingUnit& cu = *cs.getCU( currArea.chromaPos(), CH_C, TREE_D ); |
1995 | 0 | ChromaCbfs cbfs(false); |
1996 | 0 | uint32_t currDepth = partitioner.currTrDepth; |
1997 | 0 | const bool useTS = cs.picture->useTS; |
1998 | 0 | if (currDepth == currTU.depth) |
1999 | 0 | { |
2000 | 0 | if (!currArea.Cb().valid() || !currArea.Cr().valid()) |
2001 | 0 | { |
2002 | 0 | return cbfs; |
2003 | 0 | } |
2004 | | |
2005 | 0 | CodingStructure& saveCS = *m_pSaveCS[1]; |
2006 | 0 | saveCS.pcv = cs.pcv; |
2007 | 0 | saveCS.picture = cs.picture; |
2008 | 0 | saveCS.area.repositionTo(cs.area); |
2009 | |
|
2010 | 0 | TransformUnit& tmpTU = saveCS.tus.empty() ? saveCS.addTU(currArea, partitioner.chType, nullptr) : *saveCS.tus.front(); |
2011 | 0 | tmpTU.initData(); |
2012 | 0 | tmpTU.UnitArea::operator=(currArea); |
2013 | 0 | const unsigned numTBlocks = getNumberValidTBlocks(*cs.pcv); |
2014 | |
|
2015 | 0 | CompArea& cbArea = currTU.blocks[COMP_Cb]; |
2016 | 0 | CompArea& crArea = currTU.blocks[COMP_Cr]; |
2017 | 0 | double bestCostCb = MAX_DOUBLE; |
2018 | 0 | double bestCostCr = MAX_DOUBLE; |
2019 | 0 | Distortion bestDistCb = 0; |
2020 | 0 | Distortion bestDistCr = 0; |
2021 | |
|
2022 | 0 | TempCtx ctxStartTU(m_CtxCache); |
2023 | 0 | TempCtx ctxStart(m_CtxCache); |
2024 | 0 | TempCtx ctxBest(m_CtxCache); |
2025 | |
|
2026 | 0 | ctxStartTU = m_CABACEstimator->getCtx(); |
2027 | 0 | ctxStart = m_CABACEstimator->getCtx(); |
2028 | 0 | currTU.jointCbCr = 0; |
2029 | | |
2030 | | // Do predictions here to avoid repeating the "default0Save1Load2" stuff |
2031 | 0 | int predMode = cu.bdpcmM[CH_C] ? BDPCM_IDX : CU::getFinalIntraMode(cu, CH_C); |
2032 | |
|
2033 | 0 | PelBuf piPredCb = cs.getPredBuf(COMP_Cb); |
2034 | 0 | PelBuf piPredCr = cs.getPredBuf(COMP_Cr); |
2035 | |
|
2036 | 0 | initIntraPatternChType(*currTU.cu, cbArea); |
2037 | 0 | initIntraPatternChType(*currTU.cu, crArea); |
2038 | |
|
2039 | 0 | if (CU::isLMCMode(predMode)) |
2040 | 0 | { |
2041 | 0 | loadLMLumaRecPels(cu, cbArea); |
2042 | 0 | predIntraChromaLM(COMP_Cb, piPredCb, cu, cbArea, predMode); |
2043 | 0 | predIntraChromaLM(COMP_Cr, piPredCr, cu, crArea, predMode); |
2044 | 0 | } |
2045 | 0 | else |
2046 | 0 | { |
2047 | 0 | predIntraAng(COMP_Cb, piPredCb, cu); |
2048 | 0 | predIntraAng(COMP_Cr, piPredCr, cu); |
2049 | 0 | } |
2050 | | |
2051 | | // determination of chroma residuals including reshaping and cross-component prediction |
2052 | | //----- get chroma residuals ----- |
2053 | 0 | PelBuf resiCb = cs.getResiBuf(COMP_Cb); |
2054 | 0 | PelBuf resiCr = cs.getResiBuf(COMP_Cr); |
2055 | 0 | resiCb.subtract(cs.getOrgBuf(COMP_Cb), piPredCb); |
2056 | 0 | resiCr.subtract(cs.getOrgBuf(COMP_Cr), piPredCr); |
2057 | | |
2058 | | //----- get reshape parameter ---- |
2059 | 0 | ReshapeData& reshapeData = cs.picture->reshapeData; |
2060 | 0 | bool doReshaping = (cs.picHeader->lmcsEnabled && cs.picHeader->lmcsChromaResidualScale && (cs.slice->isIntra() || reshapeData.getCTUFlag()) && (cbArea.width * cbArea.height > 4)); |
2061 | 0 | if (doReshaping) |
2062 | 0 | { |
2063 | 0 | const Area area = currTU.Y().valid() ? currTU.Y() : Area(recalcPosition(currTU.chromaFormat, currTU.chType, CH_L, currTU.blocks[currTU.chType].pos()), recalcSize(currTU.chromaFormat, currTU.chType, CH_L, currTU.blocks[currTU.chType].size())); |
2064 | 0 | const CompArea& areaY = CompArea(COMP_Y, currTU.chromaFormat, area); |
2065 | 0 | currTU.chromaAdj = reshapeData.calculateChromaAdjVpduNei(currTU, areaY, currTU.cu->treeType); |
2066 | 0 | } |
2067 | | |
2068 | | //===== store original residual signals (std and crossCompPred) ===== |
2069 | 0 | for( int k = 0; k < 5; k++ ) |
2070 | 0 | { |
2071 | 0 | m_orgResiCb[k].compactResize( cbArea ); |
2072 | 0 | m_orgResiCr[k].compactResize( crArea ); |
2073 | 0 | } |
2074 | 0 | for (int k = 0; k < 1; k += 4) |
2075 | 0 | { |
2076 | 0 | m_orgResiCb[k].copyFrom(resiCb); |
2077 | 0 | m_orgResiCr[k].copyFrom(resiCr); |
2078 | |
|
2079 | 0 | if (doReshaping) |
2080 | 0 | { |
2081 | 0 | int cResScaleInv = currTU.chromaAdj; |
2082 | 0 | m_orgResiCb[k].scaleSignal(cResScaleInv, 1, cs.slice->clpRngs[COMP_Cb]); |
2083 | 0 | m_orgResiCr[k].scaleSignal(cResScaleInv, 1, cs.slice->clpRngs[COMP_Cr]); |
2084 | 0 | } |
2085 | 0 | } |
2086 | |
|
2087 | 0 | CUCtx cuCtx; |
2088 | 0 | cuCtx.isDQPCoded = true; |
2089 | 0 | cuCtx.isChromaQpAdjCoded = true; |
2090 | 0 | cuCtx.lfnstLastScanPos = false; |
2091 | |
|
2092 | 0 | CodingStructure& saveCScur = *m_pSaveCS[2]; |
2093 | |
|
2094 | 0 | saveCScur.pcv = cs.pcv; |
2095 | 0 | saveCScur.picture = cs.picture; |
2096 | 0 | saveCScur.area.repositionTo(cs.area); |
2097 | |
|
2098 | 0 | TransformUnit& tmpTUcur = saveCScur.tus.empty() ? saveCScur.addTU(currArea, partitioner.chType, nullptr) : *saveCScur.tus.front(); |
2099 | 0 | tmpTUcur.initData(); |
2100 | 0 | tmpTUcur.UnitArea::operator=(currArea); |
2101 | |
|
2102 | 0 | TempCtx ctxBestTUL(m_CtxCache); |
2103 | |
|
2104 | 0 | const SPS& sps = *cs.sps; |
2105 | 0 | double bestCostCbcur = MAX_DOUBLE; |
2106 | 0 | double bestCostCrcur = MAX_DOUBLE; |
2107 | 0 | Distortion bestDistCbcur = 0; |
2108 | 0 | Distortion bestDistCrcur = 0; |
2109 | |
|
2110 | 0 | int endLfnstIdx = (partitioner.isSepTree(cs) && partitioner.chType == CH_C && (partitioner.currArea().lwidth() < 8 || partitioner.currArea().lheight() < 8)) |
2111 | 0 | || (partitioner.currArea().lwidth() > sps.getMaxTbSize() || partitioner.currArea().lheight() > sps.getMaxTbSize()) || !sps.LFNST ? 0 : 2; |
2112 | 0 | int startLfnstIdx = 0; |
2113 | 0 | int bestLfnstIdx = 0; |
2114 | 0 | bool testLFNST = sps.LFNST; |
2115 | | |
2116 | | // speedUps LFNST |
2117 | 0 | bool rapidLFNST = false; |
2118 | 0 | if (m_pcEncCfg->m_LFNST > 1) |
2119 | 0 | { |
2120 | 0 | rapidLFNST = true; |
2121 | 0 | if (m_pcEncCfg->m_LFNST > 2) |
2122 | 0 | { |
2123 | 0 | endLfnstIdx = endLfnstIdx ? 1 : 0; |
2124 | 0 | } |
2125 | 0 | } |
2126 | 0 | int ts_used = 0; |
2127 | 0 | bool testTS = false; |
2128 | 0 | if (partitioner.chType != CH_C) |
2129 | 0 | { |
2130 | 0 | startLfnstIdx = currTU.cu->lfnstIdx; |
2131 | 0 | endLfnstIdx = currTU.cu->lfnstIdx; |
2132 | 0 | bestLfnstIdx = currTU.cu->lfnstIdx; |
2133 | 0 | testLFNST = false; |
2134 | 0 | rapidLFNST = false; |
2135 | 0 | ts_used = currTU.mtsIdx[COMP_Y]; |
2136 | 0 | } |
2137 | 0 | if (cu.bdpcmM[CH_C]) |
2138 | 0 | { |
2139 | 0 | endLfnstIdx = 0; |
2140 | 0 | testLFNST = false; |
2141 | 0 | } |
2142 | |
|
2143 | 0 | double dSingleCostAll = MAX_DOUBLE; |
2144 | 0 | double singleCostTmpAll = 0; |
2145 | |
|
2146 | 0 | for (int lfnstIdx = startLfnstIdx; lfnstIdx <= endLfnstIdx; lfnstIdx++) |
2147 | 0 | { |
2148 | 0 | if (rapidLFNST && lfnstIdx) |
2149 | 0 | { |
2150 | 0 | if ((lfnstIdx == 2) && (bestLfnstIdx == 0)) |
2151 | 0 | { |
2152 | 0 | continue; |
2153 | 0 | } |
2154 | 0 | } |
2155 | | |
2156 | 0 | currTU.cu->lfnstIdx = lfnstIdx; |
2157 | 0 | if (lfnstIdx) |
2158 | 0 | { |
2159 | 0 | m_CABACEstimator->getCtx() = ctxStartTU; |
2160 | 0 | } |
2161 | |
|
2162 | 0 | cuCtx.lfnstLastScanPos = false; |
2163 | 0 | cuCtx.violatesLfnstConstrained[CH_L] = false; |
2164 | 0 | cuCtx.violatesLfnstConstrained[CH_C] = false; |
2165 | |
|
2166 | 0 | for (uint32_t c = COMP_Cb; c < numTBlocks; c++) |
2167 | 0 | { |
2168 | 0 | const ComponentID compID = ComponentID(c); |
2169 | 0 | const CompArea& area = currTU.blocks[compID]; |
2170 | 0 | double dSingleCost = MAX_DOUBLE; |
2171 | 0 | Distortion singleDistCTmp = 0; |
2172 | 0 | double singleCostTmp = 0; |
2173 | 0 | bool tsAllowed = useTS && TU::isTSAllowed(currTU, compID) && m_pcEncCfg->m_useChromaTS && !currTU.cu->lfnstIdx && !cu.bdpcmM[CH_C]; |
2174 | 0 | if ((partitioner.chType == CH_L) && (!ts_used)) |
2175 | 0 | { |
2176 | 0 | tsAllowed = false; |
2177 | 0 | } |
2178 | 0 | uint8_t nNumTransformCands = 1 + (tsAllowed ? 1 : 0); // DCT + TS = 2 tests |
2179 | 0 | std::vector<TrMode> trModes; |
2180 | 0 | if (nNumTransformCands > 1) |
2181 | 0 | { |
2182 | 0 | trModes.push_back(TrMode(0, true)); // DCT2 |
2183 | 0 | trModes.push_back(TrMode(1, true)); // TS |
2184 | 0 | testTS = true; |
2185 | 0 | } |
2186 | 0 | bool cbfDCT2 = true; |
2187 | 0 | const bool isLastMode = testLFNST || cs.sps->jointCbCr || tsAllowed ? false : true; |
2188 | 0 | int bestModeId = 0; |
2189 | 0 | ctxStart = m_CABACEstimator->getCtx(); |
2190 | 0 | for (int modeId = 0; modeId < nNumTransformCands; modeId++) |
2191 | 0 | { |
2192 | 0 | if (doReshaping || lfnstIdx || modeId) |
2193 | 0 | { |
2194 | 0 | resiCb.copyFrom(m_orgResiCb[0]); |
2195 | 0 | resiCr.copyFrom(m_orgResiCr[0]); |
2196 | 0 | } |
2197 | 0 | if (modeId == 0) |
2198 | 0 | { |
2199 | 0 | if ( tsAllowed) |
2200 | 0 | { |
2201 | 0 | xPreCheckMTS(currTU, &trModes, m_pcEncCfg->m_MTSIntraMaxCand, 0, compID); |
2202 | 0 | } |
2203 | 0 | } |
2204 | |
|
2205 | 0 | currTU.mtsIdx[compID] = currTU.cu->bdpcmM[CH_C] ? MTS_SKIP : modeId; |
2206 | |
|
2207 | 0 | if (modeId) |
2208 | 0 | { |
2209 | 0 | if (!cbfDCT2 && trModes[modeId].first == MTS_SKIP) |
2210 | 0 | { |
2211 | 0 | break; |
2212 | 0 | } |
2213 | 0 | m_CABACEstimator->getCtx() = ctxStart; |
2214 | 0 | } |
2215 | 0 | singleDistCTmp = 0; |
2216 | 0 | if (tsAllowed) |
2217 | 0 | { |
2218 | 0 | xIntraCodingTUBlock(currTU, compID, false, singleDistCTmp, 0, 0, true); |
2219 | 0 | if ((modeId == 0) && (!trModes[modeId + 1].second)) |
2220 | 0 | { |
2221 | 0 | nNumTransformCands = 1; |
2222 | 0 | } |
2223 | 0 | } |
2224 | 0 | else |
2225 | 0 | { |
2226 | 0 | xIntraCodingTUBlock(currTU, compID, false, singleDistCTmp); |
2227 | 0 | } |
2228 | 0 | if (((currTU.mtsIdx[compID] == MTS_SKIP && !currTU.cu->bdpcmM[CH_C]) |
2229 | 0 | && !TU::getCbf(currTU, compID))) // In order not to code TS flag when cbf is zero, the case for TS with |
2230 | | // cbf being zero is forbidden. |
2231 | 0 | { |
2232 | 0 | singleCostTmp = MAX_DOUBLE; |
2233 | 0 | } |
2234 | 0 | else |
2235 | 0 | { |
2236 | 0 | uint64_t fracBitsTmp = xGetIntraFracBitsQTChroma(currTU, compID, &cuCtx); |
2237 | 0 | singleCostTmp = m_pcRdCost->calcRdCost(fracBitsTmp, singleDistCTmp); |
2238 | 0 | } |
2239 | |
|
2240 | 0 | if (singleCostTmp < dSingleCost) |
2241 | 0 | { |
2242 | 0 | dSingleCost = singleCostTmp; |
2243 | |
|
2244 | 0 | if (compID == COMP_Cb) |
2245 | 0 | { |
2246 | 0 | bestCostCb = singleCostTmp; |
2247 | 0 | bestDistCb = singleDistCTmp; |
2248 | 0 | } |
2249 | 0 | else |
2250 | 0 | { |
2251 | 0 | bestCostCr = singleCostTmp; |
2252 | 0 | bestDistCr = singleDistCTmp; |
2253 | 0 | } |
2254 | 0 | bestModeId = modeId; |
2255 | 0 | if (currTU.mtsIdx[compID] == MTS_DCT2_DCT2) |
2256 | 0 | { |
2257 | 0 | cbfDCT2 = TU::getCbfAtDepth(currTU, compID, currDepth); |
2258 | 0 | } |
2259 | 0 | if (!isLastMode) |
2260 | 0 | { |
2261 | 0 | saveCS.getRecoBuf(area).copyFrom(cs.getRecoBuf(area)); |
2262 | 0 | tmpTU.copyComponentFrom(currTU, compID); |
2263 | 0 | ctxBest = m_CABACEstimator->getCtx(); |
2264 | 0 | } |
2265 | 0 | } |
2266 | 0 | } |
2267 | 0 | if (testTS && ((c == COMP_Cb && bestModeId < (nNumTransformCands - 1)) )) |
2268 | 0 | { |
2269 | 0 | m_CABACEstimator->getCtx() = ctxBest; |
2270 | |
|
2271 | 0 | currTU.copyComponentFrom(tmpTU, COMP_Cb); // Cbf of Cb is needed to estimate cost for Cr Cbf |
2272 | 0 | } |
2273 | 0 | } |
2274 | |
|
2275 | 0 | singleCostTmpAll = bestCostCb + bestCostCr; |
2276 | |
|
2277 | 0 | bool rootCbfL = false; |
2278 | 0 | if (testLFNST) |
2279 | 0 | { |
2280 | 0 | for (uint32_t t = 0; t < getNumberValidTBlocks(*cs.pcv); t++) |
2281 | 0 | { |
2282 | 0 | rootCbfL |= bool(tmpTU.cbf[t]); |
2283 | 0 | } |
2284 | 0 | if (rapidLFNST && !rootCbfL) |
2285 | 0 | { |
2286 | 0 | endLfnstIdx = lfnstIdx; // end this |
2287 | 0 | } |
2288 | 0 | } |
2289 | |
|
2290 | 0 | if (testLFNST && lfnstIdx && !cuCtx.lfnstLastScanPos) |
2291 | 0 | { |
2292 | 0 | bool cbfAtZeroDepth = CU::isSepTree(*currTU.cu) |
2293 | 0 | ? rootCbfL : (cs.area.chromaFormat != CHROMA_400 |
2294 | 0 | && std::min(tmpTU.blocks[1].width, tmpTU.blocks[1].height) < 4) |
2295 | 0 | ? TU::getCbfAtDepth(currTU, COMP_Y, currTU.depth) : rootCbfL; |
2296 | 0 | if (cbfAtZeroDepth) |
2297 | 0 | { |
2298 | 0 | singleCostTmpAll = MAX_DOUBLE; |
2299 | 0 | } |
2300 | 0 | } |
2301 | 0 | if ((testLFNST || testTS) && (singleCostTmpAll < dSingleCostAll)) |
2302 | 0 | { |
2303 | 0 | bestLfnstIdx = lfnstIdx; |
2304 | 0 | if ((lfnstIdx != endLfnstIdx) || testTS) |
2305 | 0 | { |
2306 | 0 | dSingleCostAll = singleCostTmpAll; |
2307 | |
|
2308 | 0 | bestCostCbcur = bestCostCb; |
2309 | 0 | bestCostCrcur = bestCostCr; |
2310 | 0 | bestDistCbcur = bestDistCb; |
2311 | 0 | bestDistCrcur = bestDistCr; |
2312 | |
|
2313 | 0 | saveCScur.getRecoBuf(cbArea).copyFrom(saveCS.getRecoBuf(cbArea)); |
2314 | 0 | saveCScur.getRecoBuf(crArea).copyFrom(saveCS.getRecoBuf(crArea)); |
2315 | |
|
2316 | 0 | tmpTUcur.copyComponentFrom(tmpTU, COMP_Cb); |
2317 | 0 | tmpTUcur.copyComponentFrom(tmpTU, COMP_Cr); |
2318 | 0 | } |
2319 | 0 | ctxBestTUL = m_CABACEstimator->getCtx(); |
2320 | 0 | } |
2321 | 0 | } |
2322 | 0 | if ((testLFNST && (bestLfnstIdx != endLfnstIdx)) || testTS) |
2323 | 0 | { |
2324 | 0 | bestCostCb = bestCostCbcur; |
2325 | 0 | bestCostCr = bestCostCrcur; |
2326 | 0 | bestDistCb = bestDistCbcur; |
2327 | 0 | bestDistCr = bestDistCrcur; |
2328 | 0 | currTU.cu->lfnstIdx = bestLfnstIdx; |
2329 | 0 | if (!cs.sps->jointCbCr) |
2330 | 0 | { |
2331 | 0 | cs.getRecoBuf(cbArea).copyFrom(saveCScur.getRecoBuf(cbArea)); |
2332 | 0 | cs.getRecoBuf(crArea).copyFrom(saveCScur.getRecoBuf(crArea)); |
2333 | |
|
2334 | 0 | currTU.copyComponentFrom(tmpTUcur, COMP_Cb); |
2335 | 0 | currTU.copyComponentFrom(tmpTUcur, COMP_Cr); |
2336 | |
|
2337 | 0 | m_CABACEstimator->getCtx() = ctxBestTUL; |
2338 | 0 | } |
2339 | 0 | } |
2340 | |
|
2341 | 0 | Distortion bestDistCbCr = bestDistCb + bestDistCr; |
2342 | |
|
2343 | 0 | if (cs.sps->jointCbCr) |
2344 | 0 | { |
2345 | 0 | if ((testLFNST && (bestLfnstIdx != endLfnstIdx)) || testTS) |
2346 | 0 | { |
2347 | 0 | saveCS.getRecoBuf(cbArea).copyFrom(saveCScur.getRecoBuf(cbArea)); |
2348 | 0 | saveCS.getRecoBuf(crArea).copyFrom(saveCScur.getRecoBuf(crArea)); |
2349 | |
|
2350 | 0 | tmpTU.copyComponentFrom(tmpTUcur, COMP_Cb); |
2351 | 0 | tmpTU.copyComponentFrom(tmpTUcur, COMP_Cr); |
2352 | 0 | m_CABACEstimator->getCtx() = ctxBestTUL; |
2353 | 0 | ctxBest = m_CABACEstimator->getCtx(); |
2354 | 0 | } |
2355 | | // Test using joint chroma residual coding |
2356 | 0 | double bestCostCbCr = bestCostCb + bestCostCr; |
2357 | 0 | int bestJointCbCr = 0; |
2358 | 0 | bool checkDCTOnly = m_pcEncCfg->m_useChromaTS && ((TU::getCbf(tmpTU, COMP_Cb) && tmpTU.mtsIdx[COMP_Cb] == MTS_DCT2_DCT2 && !TU::getCbf(tmpTU, COMP_Cr)) || |
2359 | 0 | (TU::getCbf(tmpTU, COMP_Cr) && tmpTU.mtsIdx[COMP_Cr] == MTS_DCT2_DCT2 && !TU::getCbf(tmpTU, COMP_Cb)) || |
2360 | 0 | (TU::getCbf(tmpTU, COMP_Cb) && tmpTU.mtsIdx[COMP_Cb] == MTS_DCT2_DCT2 && TU::getCbf(tmpTU, COMP_Cr) && tmpTU.mtsIdx[COMP_Cr] == MTS_DCT2_DCT2)); |
2361 | 0 | bool checkTSOnly = m_pcEncCfg->m_useChromaTS && ((TU::getCbf(tmpTU, COMP_Cb) && tmpTU.mtsIdx[COMP_Cb] == MTS_SKIP && !TU::getCbf(tmpTU, COMP_Cr)) || |
2362 | 0 | (TU::getCbf(tmpTU, COMP_Cr) && tmpTU.mtsIdx[COMP_Cr] == MTS_SKIP && !TU::getCbf(tmpTU, COMP_Cb)) || |
2363 | 0 | (TU::getCbf(tmpTU, COMP_Cb) && tmpTU.mtsIdx[COMP_Cb] == MTS_SKIP && TU::getCbf(tmpTU, COMP_Cr) && tmpTU.mtsIdx[COMP_Cr] == MTS_SKIP)); |
2364 | 0 | bool lastIsBest = false; |
2365 | 0 | bool noLFNST1 = false; |
2366 | 0 | if (rapidLFNST && (startLfnstIdx != endLfnstIdx)) |
2367 | 0 | { |
2368 | 0 | if (bestLfnstIdx == 2) |
2369 | 0 | { |
2370 | 0 | noLFNST1 = true; |
2371 | 0 | } |
2372 | 0 | else |
2373 | 0 | { |
2374 | 0 | endLfnstIdx = 1; |
2375 | 0 | } |
2376 | 0 | } |
2377 | |
|
2378 | 0 | for (int lfnstIdxj = startLfnstIdx; lfnstIdxj <= endLfnstIdx; lfnstIdxj++) |
2379 | 0 | { |
2380 | 0 | if (rapidLFNST && noLFNST1 && (lfnstIdxj == 1)) |
2381 | 0 | { |
2382 | 0 | continue; |
2383 | 0 | } |
2384 | 0 | currTU.cu->lfnstIdx = lfnstIdxj; |
2385 | 0 | std::vector<int> jointCbfMasksToTest; |
2386 | 0 | if (TU::getCbf(tmpTU, COMP_Cb) || TU::getCbf(tmpTU, COMP_Cr)) |
2387 | 0 | { |
2388 | 0 | jointCbfMasksToTest = m_pcTrQuant->selectICTCandidates(currTU, m_orgResiCb, m_orgResiCr); |
2389 | 0 | } |
2390 | 0 | for (int cbfMask : jointCbfMasksToTest) |
2391 | 0 | { |
2392 | 0 | currTU.jointCbCr = (uint8_t)cbfMask; |
2393 | 0 | ComponentID codeCompId = ((currTU.jointCbCr >> 1) ? COMP_Cb : COMP_Cr); |
2394 | 0 | ComponentID otherCompId = ((codeCompId == COMP_Cb) ? COMP_Cr : COMP_Cb); |
2395 | 0 | bool tsAllowed = useTS && TU::isTSAllowed(currTU, codeCompId) && (m_pcEncCfg->m_useChromaTS) && !currTU.cu->lfnstIdx && !cu.bdpcmM[CH_C]; |
2396 | 0 | if ((partitioner.chType == CH_L)&& tsAllowed && (currTU.mtsIdx[COMP_Y] != MTS_SKIP)) |
2397 | 0 | { |
2398 | 0 | tsAllowed = false; |
2399 | 0 | } |
2400 | 0 | if (!tsAllowed) |
2401 | 0 | { |
2402 | 0 | checkTSOnly = false; |
2403 | 0 | } |
2404 | 0 | uint8_t numTransformCands = 1 + (tsAllowed && !(checkDCTOnly || checkTSOnly)? 1 : 0); // DCT + TS = 2 tests |
2405 | 0 | std::vector<TrMode> trModes; |
2406 | 0 | if (numTransformCands > 1) |
2407 | 0 | { |
2408 | 0 | trModes.push_back(TrMode(0, true)); // DCT2 |
2409 | 0 | trModes.push_back(TrMode(1, true));//TS |
2410 | 0 | } |
2411 | 0 | else |
2412 | 0 | { |
2413 | 0 | currTU.mtsIdx[codeCompId] = checkTSOnly || currTU.cu->bdpcmM[CH_C] ? 1 : 0; |
2414 | 0 | } |
2415 | |
|
2416 | 0 | for (int modeId = 0; modeId < numTransformCands; modeId++) |
2417 | 0 | { |
2418 | 0 | Distortion distTmp = 0; |
2419 | 0 | currTU.mtsIdx[codeCompId] = currTU.cu->bdpcmM[CH_C] ? MTS_SKIP : MTS_DCT2_DCT2; |
2420 | 0 | if (numTransformCands > 1) |
2421 | 0 | { |
2422 | 0 | currTU.mtsIdx[codeCompId] = currTU.cu->bdpcmM[CH_C] ? MTS_SKIP : trModes[modeId].first; |
2423 | 0 | } |
2424 | 0 | currTU.mtsIdx[otherCompId] = MTS_DCT2_DCT2; |
2425 | |
|
2426 | 0 | m_CABACEstimator->getCtx() = ctxStartTU; |
2427 | |
|
2428 | 0 | resiCb.copyFrom(m_orgResiCb[cbfMask]); |
2429 | 0 | resiCr.copyFrom(m_orgResiCr[cbfMask]); |
2430 | 0 | if ((modeId == 0) && (numTransformCands > 1)) |
2431 | 0 | { |
2432 | 0 | xPreCheckMTS(currTU, &trModes, m_pcEncCfg->m_MTSIntraMaxCand, 0, COMP_Cb); |
2433 | 0 | currTU.mtsIdx[codeCompId] = trModes[modeId].first; |
2434 | 0 | currTU.mtsIdx[(codeCompId == COMP_Cr) ? COMP_Cb : COMP_Cr] = MTS_DCT2_DCT2; |
2435 | 0 | } |
2436 | 0 | cuCtx.lfnstLastScanPos = false; |
2437 | 0 | cuCtx.violatesLfnstConstrained[CH_L] = false; |
2438 | 0 | cuCtx.violatesLfnstConstrained[CH_C] = false; |
2439 | 0 | if (numTransformCands > 1) |
2440 | 0 | { |
2441 | 0 | xIntraCodingTUBlock(currTU, COMP_Cb, false, distTmp, 0, 0, true); |
2442 | 0 | if ((modeId == 0) && !trModes[modeId + 1].second) |
2443 | 0 | { |
2444 | 0 | numTransformCands = 1; |
2445 | 0 | } |
2446 | 0 | } |
2447 | 0 | else |
2448 | 0 | { |
2449 | 0 | xIntraCodingTUBlock(currTU, COMP_Cb, false, distTmp, 0); |
2450 | 0 | } |
2451 | |
|
2452 | 0 | double costTmp = std::numeric_limits<double>::max(); |
2453 | 0 | if (distTmp < MAX_DISTORTION) |
2454 | 0 | { |
2455 | 0 | uint64_t bits = xGetIntraFracBitsQTChroma(currTU, COMP_Cb, &cuCtx); |
2456 | 0 | costTmp = m_pcRdCost->calcRdCost(bits, distTmp); |
2457 | 0 | } |
2458 | 0 | else if (!currTU.mtsIdx[codeCompId]) |
2459 | 0 | { |
2460 | 0 | numTransformCands = 1; |
2461 | 0 | } |
2462 | 0 | bool rootCbfL = false; |
2463 | 0 | for (uint32_t t = 0; t < getNumberValidTBlocks(*cs.pcv); t++) |
2464 | 0 | { |
2465 | 0 | rootCbfL |= bool(tmpTU.cbf[t]); |
2466 | 0 | } |
2467 | 0 | if (rapidLFNST && !rootCbfL) |
2468 | 0 | { |
2469 | 0 | endLfnstIdx = lfnstIdxj; |
2470 | 0 | } |
2471 | 0 | if (testLFNST && currTU.cu->lfnstIdx && !cuCtx.lfnstLastScanPos) |
2472 | 0 | { |
2473 | 0 | bool cbfAtZeroDepth = CU::isSepTree(*currTU.cu) ? rootCbfL |
2474 | 0 | : (cs.area.chromaFormat != CHROMA_400 && std::min(tmpTU.blocks[1].width, tmpTU.blocks[1].height) < 4) |
2475 | 0 | ? TU::getCbfAtDepth(currTU, COMP_Y, currTU.depth) : rootCbfL; |
2476 | 0 | if (cbfAtZeroDepth) |
2477 | 0 | { |
2478 | 0 | costTmp = MAX_DOUBLE; |
2479 | 0 | } |
2480 | 0 | } |
2481 | 0 | if (costTmp < bestCostCbCr) |
2482 | 0 | { |
2483 | 0 | bestCostCbCr = costTmp; |
2484 | 0 | bestDistCbCr = distTmp; |
2485 | 0 | bestJointCbCr = currTU.jointCbCr; |
2486 | | |
2487 | | // store data |
2488 | 0 | bestLfnstIdx = lfnstIdxj; |
2489 | 0 | if ((cbfMask != jointCbfMasksToTest.back() || (lfnstIdxj != endLfnstIdx)) || (modeId != (numTransformCands - 1))) |
2490 | 0 | { |
2491 | 0 | saveCS.getRecoBuf(cbArea).copyFrom(cs.getRecoBuf(cbArea)); |
2492 | 0 | saveCS.getRecoBuf(crArea).copyFrom(cs.getRecoBuf(crArea)); |
2493 | |
|
2494 | 0 | tmpTU.copyComponentFrom(currTU, COMP_Cb); |
2495 | 0 | tmpTU.copyComponentFrom(currTU, COMP_Cr); |
2496 | |
|
2497 | 0 | ctxBest = m_CABACEstimator->getCtx(); |
2498 | 0 | } |
2499 | 0 | else |
2500 | 0 | { |
2501 | 0 | lastIsBest = true; |
2502 | 0 | cs.cus[0]->lfnstIdx = bestLfnstIdx; |
2503 | 0 | } |
2504 | 0 | } |
2505 | 0 | } |
2506 | 0 | } |
2507 | | |
2508 | | // Retrieve the best CU data (unless it was the very last one tested) |
2509 | 0 | } |
2510 | 0 | if (!lastIsBest) |
2511 | 0 | { |
2512 | 0 | cs.getRecoBuf(cbArea).copyFrom(saveCS.getRecoBuf(cbArea)); |
2513 | 0 | cs.getRecoBuf(crArea).copyFrom(saveCS.getRecoBuf(crArea)); |
2514 | |
|
2515 | 0 | cs.cus[0]->lfnstIdx = bestLfnstIdx; |
2516 | 0 | currTU.copyComponentFrom(tmpTU, COMP_Cb); |
2517 | 0 | currTU.copyComponentFrom(tmpTU, COMP_Cr); |
2518 | 0 | m_CABACEstimator->getCtx() = ctxBest; |
2519 | 0 | } |
2520 | 0 | currTU.jointCbCr = (TU::getCbf(currTU, COMP_Cb) || TU::getCbf(currTU, COMP_Cr)) ? bestJointCbCr : 0; |
2521 | 0 | } // jointCbCr |
2522 | |
|
2523 | 0 | cs.dist += bestDistCbCr; |
2524 | 0 | cuCtx.violatesLfnstConstrained[CH_L] = false; |
2525 | 0 | cuCtx.violatesLfnstConstrained[CH_C] = false; |
2526 | 0 | cuCtx.lfnstLastScanPos = false; |
2527 | 0 | cuCtx.violatesMtsCoeffConstraint = false; |
2528 | 0 | cuCtx.mtsLastScanPos = false; |
2529 | 0 | cbfs.cbf(COMP_Cb) = TU::getCbf(currTU, COMP_Cb); |
2530 | 0 | cbfs.cbf(COMP_Cr) = TU::getCbf(currTU, COMP_Cr); |
2531 | 0 | } |
2532 | 0 | else |
2533 | 0 | { |
2534 | 0 | unsigned numValidTBlocks = getNumberValidTBlocks(*cs.pcv); |
2535 | 0 | ChromaCbfs SplitCbfs(false); |
2536 | |
|
2537 | 0 | if (partitioner.canSplit(TU_MAX_TR_SPLIT, cs)) |
2538 | 0 | { |
2539 | 0 | partitioner.splitCurrArea(TU_MAX_TR_SPLIT, cs); |
2540 | 0 | } |
2541 | 0 | else if (currTU.cu->ispMode) |
2542 | 0 | { |
2543 | 0 | partitioner.splitCurrArea(m_ispTestedModes[0].IspType, cs); |
2544 | 0 | } |
2545 | 0 | else |
2546 | 0 | THROW("Implicit TU split not available"); |
2547 | | |
2548 | 0 | do |
2549 | 0 | { |
2550 | 0 | ChromaCbfs subCbfs = xIntraChromaCodingQT(cs, partitioner); |
2551 | |
|
2552 | 0 | for (uint32_t ch = COMP_Cb; ch < numValidTBlocks; ch++) |
2553 | 0 | { |
2554 | 0 | const ComponentID compID = ComponentID(ch); |
2555 | 0 | SplitCbfs.cbf(compID) |= subCbfs.cbf(compID); |
2556 | 0 | } |
2557 | 0 | } while (partitioner.nextPart(cs)); |
2558 | |
|
2559 | 0 | partitioner.exitCurrSplit(); |
2560 | | |
2561 | | /*if (lumaUsesISP && cs.dist == MAX_UINT) //ahenkel |
2562 | | { |
2563 | | return cbfs; |
2564 | | }*/ |
2565 | 0 | { |
2566 | 0 | cbfs.Cb |= SplitCbfs.Cb; |
2567 | 0 | cbfs.Cr |= SplitCbfs.Cr; |
2568 | |
|
2569 | 0 | if (1) //(!lumaUsesISP) |
2570 | 0 | { |
2571 | 0 | for (auto& ptu : cs.tus) |
2572 | 0 | { |
2573 | 0 | if (currArea.Cb().contains(ptu->Cb()) || (!ptu->Cb().valid() && currArea.Y().contains(ptu->Y()))) |
2574 | 0 | { |
2575 | 0 | TU::setCbfAtDepth(*ptu, COMP_Cb, currDepth, SplitCbfs.Cb); |
2576 | 0 | TU::setCbfAtDepth(*ptu, COMP_Cr, currDepth, SplitCbfs.Cr); |
2577 | 0 | } |
2578 | 0 | } |
2579 | 0 | } |
2580 | 0 | } |
2581 | 0 | } |
2582 | 0 | return cbfs; |
2583 | 0 | } |
2584 | | |
2585 | | uint64_t IntraSearch::xFracModeBitsIntraLuma(const CodingUnit& cu, const unsigned* mpmLst) |
2586 | 0 | { |
2587 | 0 | m_CABACEstimator->resetBits(); |
2588 | |
|
2589 | 0 | if (!cu.ciip) |
2590 | 0 | { |
2591 | 0 | m_CABACEstimator->intra_luma_pred_mode(cu, mpmLst); |
2592 | 0 | } |
2593 | |
|
2594 | 0 | return m_CABACEstimator->getEstFracBits(); |
2595 | 0 | } |
2596 | | |
2597 | | template<typename T, size_t N, int M> |
2598 | | void IntraSearch::xReduceHadCandList(static_vector<T, N>& candModeList, static_vector<double, N>& candCostList, SortedPelUnitBufs<M>& sortedPelBuffer, int& numModesForFullRD, const double thresholdHadCost, const double* mipHadCost, const CodingUnit& cu, const bool fastMip) |
2599 | 0 | { |
2600 | 0 | const int maxCandPerType = numModesForFullRD >> 1; |
2601 | 0 | static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM> tempRdModeList; |
2602 | 0 | static_vector<double, FAST_UDI_MAX_RDMODE_NUM> tempCandCostList; |
2603 | 0 | const double minCost = candCostList[0]; |
2604 | 0 | bool keepOneMip = candModeList.size() > numModesForFullRD; |
2605 | 0 | const int maxNumConv = 3; |
2606 | |
|
2607 | 0 | int numConv = 0; |
2608 | 0 | int numMip = 0; |
2609 | 0 | for (int idx = 0; idx < candModeList.size() - (keepOneMip?0:1); idx++) |
2610 | 0 | { |
2611 | 0 | bool addMode = false; |
2612 | 0 | const ModeInfo& orgMode = candModeList[idx]; |
2613 | |
|
2614 | 0 | if (!orgMode.mipFlg) |
2615 | 0 | { |
2616 | 0 | addMode = (numConv < maxNumConv); |
2617 | 0 | numConv += addMode ? 1:0; |
2618 | 0 | } |
2619 | 0 | else |
2620 | 0 | { |
2621 | 0 | addMode = ( numMip < maxCandPerType || (candCostList[idx] < thresholdHadCost * minCost) || keepOneMip ); |
2622 | 0 | keepOneMip = false; |
2623 | 0 | numMip += addMode ? 1:0; |
2624 | 0 | } |
2625 | 0 | if( addMode ) |
2626 | 0 | { |
2627 | 0 | tempRdModeList.push_back(orgMode); |
2628 | 0 | tempCandCostList.push_back(candCostList[idx]); |
2629 | 0 | } |
2630 | 0 | } |
2631 | | |
2632 | | // sort Pel Buffer |
2633 | 0 | int i = -1; |
2634 | 0 | for( auto &m: tempRdModeList) |
2635 | 0 | { |
2636 | 0 | if( ! (m == candModeList.at( ++i )) ) |
2637 | 0 | { |
2638 | 0 | for( int j = i; j < (int)candModeList.size()-1; ) |
2639 | 0 | { |
2640 | 0 | if( m == candModeList.at( ++j ) ) |
2641 | 0 | { |
2642 | 0 | sortedPelBuffer.swap( i, j); |
2643 | 0 | break; |
2644 | 0 | } |
2645 | 0 | } |
2646 | 0 | } |
2647 | 0 | } |
2648 | 0 | sortedPelBuffer.reduceTo( (int)tempRdModeList.size() ); |
2649 | |
|
2650 | 0 | if ((cu.lwidth() > 8 && cu.lheight() > 8)) |
2651 | 0 | { |
2652 | | // Sort MIP candidates by Hadamard cost |
2653 | 0 | const int transpOff = getNumModesMip(cu.Y()); |
2654 | 0 | static_vector<uint8_t, FAST_UDI_MAX_RDMODE_NUM> sortedMipModes(0); |
2655 | 0 | static_vector<double, FAST_UDI_MAX_RDMODE_NUM> sortedMipCost(0); |
2656 | 0 | for (uint8_t mode : { 0, 1, 2 }) |
2657 | 0 | { |
2658 | 0 | uint8_t candMode = mode + uint8_t((mipHadCost[mode + transpOff] < mipHadCost[mode]) ? transpOff : 0); |
2659 | 0 | updateCandList(candMode, mipHadCost[candMode], sortedMipModes, sortedMipCost, 3); |
2660 | 0 | } |
2661 | | |
2662 | | // Append MIP mode to RD mode list |
2663 | 0 | const int modeListSize = int(tempRdModeList.size()); |
2664 | 0 | for (int idx = 0; idx < 3; idx++) |
2665 | 0 | { |
2666 | 0 | const bool isTransposed = (sortedMipModes[idx] >= transpOff ? true : false); |
2667 | 0 | const uint32_t mipIdx = (isTransposed ? sortedMipModes[idx] - transpOff : sortedMipModes[idx]); |
2668 | 0 | const ModeInfo mipMode( true, isTransposed, 0, NOT_INTRA_SUBPARTITIONS, mipIdx ); |
2669 | 0 | bool alreadyIncluded = false; |
2670 | 0 | for (int modeListIdx = 0; modeListIdx < modeListSize; modeListIdx++) |
2671 | 0 | { |
2672 | 0 | if (tempRdModeList[modeListIdx] == mipMode) |
2673 | 0 | { |
2674 | 0 | alreadyIncluded = true; |
2675 | 0 | break; |
2676 | 0 | } |
2677 | 0 | } |
2678 | |
|
2679 | 0 | if (!alreadyIncluded) |
2680 | 0 | { |
2681 | 0 | tempRdModeList.push_back(mipMode); |
2682 | 0 | tempCandCostList.push_back(0); |
2683 | 0 | if( fastMip ) break; |
2684 | 0 | } |
2685 | 0 | } |
2686 | 0 | } |
2687 | |
|
2688 | 0 | candModeList = tempRdModeList; |
2689 | 0 | candCostList = tempCandCostList; |
2690 | 0 | numModesForFullRD = int(candModeList.size()); |
2691 | 0 | } |
2692 | | |
2693 | | void IntraSearch::xPreCheckMTS(TransformUnit &tu, std::vector<TrMode> *trModes, const int maxCand, PelUnitBuf *predBuf, const ComponentID& compID) |
2694 | 0 | { |
2695 | 0 | if (compID == COMP_Y) |
2696 | 0 | { |
2697 | 0 | CodingStructure& cs = *tu.cs; |
2698 | 0 | const CompArea& area = tu.blocks[compID]; |
2699 | 0 | const ReshapeData& reshapeData = cs.picture->reshapeData; |
2700 | 0 | const CodingUnit& cu = *cs.getCU(area.pos(), CH_L,TREE_D); |
2701 | 0 | PelBuf piPred = cs.getPredBuf(area); |
2702 | 0 | PelBuf piResi = cs.getResiBuf(area); |
2703 | |
|
2704 | 0 | initIntraPatternChType(*tu.cu, area); |
2705 | 0 | if (predBuf) |
2706 | 0 | { |
2707 | 0 | piPred.copyFrom(predBuf->Y()); |
2708 | 0 | } |
2709 | 0 | else if (CU::isMIP(cu, CH_L)) |
2710 | 0 | { |
2711 | 0 | initIntraMip(cu); |
2712 | 0 | predIntraMip(piPred, cu); |
2713 | 0 | } |
2714 | 0 | else |
2715 | 0 | { |
2716 | 0 | predIntraAng(COMP_Y, piPred, cu); |
2717 | 0 | } |
2718 | | |
2719 | | //===== get residual signal ===== |
2720 | 0 | if (cs.picHeader->lmcsEnabled && reshapeData.getCTUFlag()) |
2721 | 0 | { |
2722 | 0 | piResi.subtract(cs.getRspOrgBuf(), piPred); |
2723 | 0 | } |
2724 | 0 | else |
2725 | 0 | { |
2726 | 0 | CPelBuf piOrg = cs.getOrgBuf(COMP_Y); |
2727 | 0 | piResi.subtract(piOrg, piPred); |
2728 | 0 | } |
2729 | 0 | m_pcTrQuant->checktransformsNxN(tu, trModes, m_pcEncCfg->m_MTSIntraMaxCand, compID); |
2730 | 0 | } |
2731 | 0 | else |
2732 | 0 | { |
2733 | 0 | ComponentID codeCompId = (tu.jointCbCr ? (tu.jointCbCr >> 1 ? COMP_Cb : COMP_Cr) : compID); |
2734 | 0 | m_pcTrQuant->checktransformsNxN(tu, trModes, m_pcEncCfg->m_MTSIntraMaxCand, codeCompId); |
2735 | 0 | } |
2736 | 0 | } |
2737 | | |
2738 | | double IntraSearch::xTestISP(CodingStructure& cs, Partitioner& subTuPartitioner, double bestCostForISP, PartSplit ispType, bool& splitcbf, uint64_t& singleFracBits, Distortion& singleDistLuma, CUCtx& cuCtx) |
2739 | 0 | { |
2740 | 0 | int subTuCounter = 0; |
2741 | 0 | bool earlySkipISP = false; |
2742 | 0 | bool splitCbfLuma = false; |
2743 | 0 | CodingUnit& cu = *cs.cus[0]; |
2744 | |
|
2745 | 0 | Distortion singleDistTmpLumaSUM = 0; |
2746 | 0 | uint64_t singleTmpFracBitsSUM = 0; |
2747 | 0 | double singleCostTmpSUM = 0; |
2748 | 0 | cuCtx.isDQPCoded = true; |
2749 | 0 | cuCtx.isChromaQpAdjCoded = true; |
2750 | |
|
2751 | 0 | do |
2752 | 0 | { |
2753 | 0 | Distortion singleDistTmpLuma = 0; |
2754 | 0 | uint64_t singleTmpFracBits = 0; |
2755 | 0 | double singleCostTmp = 0; |
2756 | 0 | TransformUnit& tmpTUcur = ((cs.tus.size() < (subTuCounter + 1))) |
2757 | 0 | ? cs.addTU(CS::getArea(cs, subTuPartitioner.currArea(), subTuPartitioner.chType, |
2758 | 0 | subTuPartitioner.treeType), |
2759 | 0 | subTuPartitioner.chType, cs.cus[0]) |
2760 | 0 | : *cs.tus[subTuCounter]; |
2761 | 0 | tmpTUcur.depth = subTuPartitioner.currTrDepth; |
2762 | | |
2763 | | // Encode TU |
2764 | 0 | xIntraCodingTUBlock(tmpTUcur, COMP_Y, false, singleDistTmpLuma, 0); |
2765 | 0 | cuCtx.mtsLastScanPos = false; |
2766 | |
|
2767 | 0 | if (singleDistTmpLuma == MAX_INT) // all zero CBF skip |
2768 | 0 | { |
2769 | 0 | earlySkipISP = true; |
2770 | 0 | singleCostTmpSUM = MAX_DOUBLE; |
2771 | 0 | break; |
2772 | 0 | } |
2773 | | |
2774 | 0 | if (m_pcRdCost->calcRdCost(singleTmpFracBitsSUM, singleDistTmpLumaSUM + singleDistTmpLuma) > bestCostForISP) |
2775 | 0 | { |
2776 | 0 | earlySkipISP = true; |
2777 | 0 | } |
2778 | 0 | else |
2779 | 0 | { |
2780 | 0 | m_ispTestedModes[0].IspType = ispType; |
2781 | 0 | m_ispTestedModes[0].subTuCounter = subTuCounter; |
2782 | 0 | singleTmpFracBits = xGetIntraFracBitsQT(cs, subTuPartitioner, true, &cuCtx); |
2783 | 0 | } |
2784 | 0 | singleCostTmp = m_pcRdCost->calcRdCost(singleTmpFracBits, singleDistTmpLuma); |
2785 | |
|
2786 | 0 | singleCostTmpSUM += singleCostTmp; |
2787 | 0 | singleDistTmpLumaSUM += singleDistTmpLuma; |
2788 | 0 | singleTmpFracBitsSUM += singleTmpFracBits; |
2789 | |
|
2790 | 0 | subTuCounter++; |
2791 | |
|
2792 | 0 | splitCbfLuma |= TU::getCbfAtDepth( *cs.getTU(subTuPartitioner.currArea().lumaPos(), subTuPartitioner.chType, subTuCounter - 1), |
2793 | 0 | COMP_Y, subTuPartitioner.currTrDepth); |
2794 | 0 | int nSubPartitions = m_ispTestedModes[cu.lfnstIdx].numTotalParts[cu.ispMode - 1]; |
2795 | 0 | bool doStop = (m_pcEncCfg->m_ISP != 1) || (subTuCounter < nSubPartitions); |
2796 | 0 | if (doStop) |
2797 | 0 | { |
2798 | 0 | if (singleCostTmpSUM > bestCostForISP) |
2799 | 0 | { |
2800 | 0 | earlySkipISP = true; |
2801 | 0 | break; |
2802 | 0 | } |
2803 | 0 | if (subTuCounter < nSubPartitions) |
2804 | 0 | { |
2805 | 0 | double threshold = nSubPartitions == 2 ? 0.95 : subTuCounter == 1 ? 0.83 : 0.91; |
2806 | 0 | if (singleCostTmpSUM > bestCostForISP * threshold) |
2807 | 0 | { |
2808 | 0 | earlySkipISP = true; |
2809 | 0 | break; |
2810 | 0 | } |
2811 | 0 | } |
2812 | 0 | } |
2813 | 0 | } while (subTuPartitioner.nextPart(cs)); |
2814 | 0 | singleDistLuma = singleDistTmpLumaSUM; |
2815 | 0 | singleFracBits = singleTmpFracBitsSUM; |
2816 | |
|
2817 | 0 | splitcbf = splitCbfLuma; |
2818 | 0 | return earlySkipISP ? MAX_DOUBLE : singleCostTmpSUM; |
2819 | 0 | } |
2820 | | |
2821 | | int IntraSearch::xSpeedUpISP(int speed, bool& testISP, int mode, int& noISP, int& endISP, CodingUnit& cu, static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM>& RdModeList, const ModeInfo& bestPUMode, int bestISP, int bestLfnstIdx) |
2822 | 0 | { |
2823 | 0 | if (speed) |
2824 | 0 | { |
2825 | 0 | if (mode >= 1) |
2826 | 0 | { |
2827 | 0 | if (m_ispTestedModes[0].splitIsFinished[1] && m_ispTestedModes[0].splitIsFinished[0]) |
2828 | 0 | { |
2829 | 0 | testISP = false; |
2830 | 0 | endISP = 0; |
2831 | 0 | } |
2832 | 0 | else |
2833 | 0 | { |
2834 | 0 | if (m_pcEncCfg->m_ISP >= 2) |
2835 | 0 | { |
2836 | 0 | if (mode == 1) //best Hor||Ver |
2837 | 0 | { |
2838 | 0 | int bestDir = 0; |
2839 | 0 | for (int d = 0; d < 2; d++) |
2840 | 0 | { |
2841 | 0 | int d2 = d ? 0 : 1; |
2842 | 0 | if ((m_ispTestedModes[0].bestCost[d] <= m_ispTestedModes[0].bestCost[d2]) |
2843 | 0 | && (m_ispTestedModes[0].bestCost[d] != MAX_DOUBLE)) |
2844 | 0 | { |
2845 | 0 | bestDir = d + 1; |
2846 | 0 | m_ispTestedModes[0].splitIsFinished[d2] = true; |
2847 | 0 | } |
2848 | 0 | } |
2849 | 0 | m_ispTestedModes[0].bestModeSoFar = bestDir; |
2850 | 0 | if (m_ispTestedModes[0].bestModeSoFar <= 0) |
2851 | 0 | { |
2852 | 0 | m_ispTestedModes[0].splitIsFinished[1] = true; |
2853 | 0 | m_ispTestedModes[0].splitIsFinished[0] = true; |
2854 | 0 | testISP = false; |
2855 | 0 | endISP = 0; |
2856 | 0 | } |
2857 | 0 | } |
2858 | 0 | if (m_ispTestedModes[0].bestModeSoFar == 2) |
2859 | 0 | { |
2860 | 0 | noISP = 1; |
2861 | 0 | } |
2862 | 0 | else |
2863 | 0 | { |
2864 | 0 | endISP = 1; |
2865 | 0 | } |
2866 | 0 | } |
2867 | 0 | } |
2868 | 0 | } |
2869 | 0 | if (testISP) |
2870 | 0 | { |
2871 | 0 | if (mode == 2) |
2872 | 0 | { |
2873 | 0 | for (int d = 0; d < 2; d++) |
2874 | 0 | { |
2875 | 0 | int d2 = d ? 0 : 1; |
2876 | 0 | if (m_ispTestedModes[0].bestCost[d] == MAX_DOUBLE) |
2877 | 0 | { |
2878 | 0 | m_ispTestedModes[0].splitIsFinished[d] = true; |
2879 | 0 | } |
2880 | 0 | if ((m_ispTestedModes[0].bestCost[d2] < 1.3 * m_ispTestedModes[0].bestCost[d]) |
2881 | 0 | && (int(m_ispTestedModes[0].bestSplitSoFar) != (d + 1))) |
2882 | 0 | { |
2883 | 0 | if (d) |
2884 | 0 | { |
2885 | 0 | endISP = 1; |
2886 | 0 | } |
2887 | 0 | else |
2888 | 0 | { |
2889 | 0 | noISP = 1; |
2890 | 0 | } |
2891 | 0 | m_ispTestedModes[0].splitIsFinished[d] = true; |
2892 | 0 | } |
2893 | 0 | } |
2894 | 0 | } |
2895 | 0 | else |
2896 | 0 | { |
2897 | 0 | if (m_ispTestedModes[0].splitIsFinished[0]) |
2898 | 0 | { |
2899 | 0 | noISP = 1; |
2900 | 0 | } |
2901 | 0 | if (m_ispTestedModes[0].splitIsFinished[1]) |
2902 | 0 | { |
2903 | 0 | endISP = 1; |
2904 | 0 | } |
2905 | 0 | } |
2906 | 0 | } |
2907 | 0 | if ((noISP == 1) && (endISP == 1)) |
2908 | 0 | { |
2909 | 0 | endISP = 0; |
2910 | 0 | } |
2911 | 0 | } |
2912 | 0 | else |
2913 | 0 | { |
2914 | 0 | bool stopFound = false; |
2915 | 0 | if (m_pcEncCfg->m_ISP >= 3) |
2916 | 0 | { |
2917 | 0 | if (mode) |
2918 | 0 | { |
2919 | 0 | if ((bestISP == 0) || ((bestPUMode.modeId != RdModeList[mode - 1].modeId) |
2920 | 0 | && (bestPUMode.modeId != RdModeList[mode].modeId))) |
2921 | 0 | { |
2922 | 0 | stopFound = true; |
2923 | 0 | } |
2924 | 0 | } |
2925 | 0 | } |
2926 | 0 | if (cu.mipFlag || cu.multiRefIdx) |
2927 | 0 | { |
2928 | 0 | cu.mipFlag = false; |
2929 | 0 | cu.multiRefIdx = 0; |
2930 | 0 | if (!stopFound) |
2931 | 0 | { |
2932 | 0 | for (int k = 0; k < mode; k++) |
2933 | 0 | { |
2934 | 0 | if (cu.intraDir[CH_L] == RdModeList[k].modeId) |
2935 | 0 | { |
2936 | 0 | stopFound = true; |
2937 | 0 | break; |
2938 | 0 | } |
2939 | 0 | } |
2940 | 0 | } |
2941 | 0 | } |
2942 | 0 | if (stopFound) |
2943 | 0 | { |
2944 | 0 | testISP = false; |
2945 | 0 | endISP = 0; |
2946 | 0 | return 1; |
2947 | 0 | } |
2948 | 0 | if (!stopFound && (m_pcEncCfg->m_ISP >= 2) && (cu.intraDir[CH_L] == DC_IDX)) |
2949 | 0 | { |
2950 | 0 | stopFound = true; |
2951 | 0 | endISP = 0; |
2952 | 0 | return 1; |
2953 | 0 | } |
2954 | 0 | } |
2955 | 0 | return 0; |
2956 | 0 | } |
2957 | | |
2958 | | void IntraSearch::xSpeedUpIntra(double bestcost, int& EndMode, int& speedIntra, CodingUnit& cu) |
2959 | 0 | { |
2960 | 0 | int bestIdxbefore = m_ispTestedModes[0].bestIntraMode; |
2961 | 0 | if (m_ispTestedModes[0].isIntra) |
2962 | 0 | { |
2963 | 0 | if (bestIdxbefore == 1)//ISP |
2964 | 0 | { |
2965 | 0 | speedIntra = 14; |
2966 | 0 | } |
2967 | 0 | if (bestIdxbefore == 4)//MTS |
2968 | 0 | { |
2969 | 0 | speedIntra = 3; |
2970 | 0 | } |
2971 | 0 | } |
2972 | 0 | else if (!cu.cs->slice->isIntra()) |
2973 | 0 | { |
2974 | 0 | if (bestcost != MAX_DOUBLE) |
2975 | 0 | { |
2976 | 0 | speedIntra = 10; |
2977 | 0 | } |
2978 | 0 | } |
2979 | 0 | if (m_ispTestedModes[0].bestBefore[0] == -1) |
2980 | 0 | { |
2981 | 0 | speedIntra |= 7; |
2982 | 0 | if (m_pcEncCfg->m_FastIntraTools == 2) |
2983 | 0 | { |
2984 | 0 | EndMode = 1; |
2985 | 0 | } |
2986 | 0 | } |
2987 | 0 | if (!cu.cs->slice->isIntra()) |
2988 | 0 | { |
2989 | 0 | if ((m_ispTestedModes[0].bestBefore[1] == 1) || (m_ispTestedModes[0].bestBefore[2] == 1)) |
2990 | 0 | { |
2991 | 0 | speedIntra |= 2; |
2992 | 0 | } |
2993 | 0 | if ((m_ispTestedModes[0].bestBefore[1] == 4) || (m_ispTestedModes[0].bestBefore[2] == 4)) |
2994 | 0 | { |
2995 | 0 | speedIntra |= 3; |
2996 | 0 | } |
2997 | 0 | if ((m_ispTestedModes[0].bestBefore[1] == 2) || (m_ispTestedModes[0].bestBefore[2] == 2)) |
2998 | 0 | { |
2999 | 0 | speedIntra |= 1; |
3000 | 0 | } |
3001 | 0 | } |
3002 | 0 | } |
3003 | | |
3004 | | } // namespace vvenc |
3005 | | |
3006 | | //! \} |
3007 | | |