/work/vvenc/source/Lib/EncoderLib/IntraSearch.cpp
Line | Count | Source |
1 | | /* ----------------------------------------------------------------------------- |
2 | | The copyright in this software is being made available under the Clear BSD |
3 | | License, included below. No patent rights, trademark rights and/or |
4 | | other Intellectual Property Rights other than the copyrights concerning |
5 | | the Software are granted under this license. |
6 | | |
7 | | The Clear BSD License |
8 | | |
9 | | Copyright (c) 2019-2026, Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. & The VVenC Authors. |
10 | | All rights reserved. |
11 | | |
12 | | Redistribution and use in source and binary forms, with or without modification, |
13 | | are permitted (subject to the limitations in the disclaimer below) provided that |
14 | | the following conditions are met: |
15 | | |
16 | | * Redistributions of source code must retain the above copyright notice, |
17 | | this list of conditions and the following disclaimer. |
18 | | |
19 | | * Redistributions in binary form must reproduce the above copyright |
20 | | notice, this list of conditions and the following disclaimer in the |
21 | | documentation and/or other materials provided with the distribution. |
22 | | |
23 | | * Neither the name of the copyright holder nor the names of its |
24 | | contributors may be used to endorse or promote products derived from this |
25 | | software without specific prior written permission. |
26 | | |
27 | | NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY |
28 | | THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND |
29 | | CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
30 | | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A |
31 | | PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR |
32 | | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
33 | | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
34 | | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR |
35 | | BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER |
36 | | IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
37 | | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
38 | | POSSIBILITY OF SUCH DAMAGE. |
39 | | |
40 | | |
41 | | ------------------------------------------------------------------------------------------- */ |
42 | | |
43 | | |
44 | | /** \file EncSearch.cpp |
45 | | * \brief encoder intra search class |
46 | | */ |
47 | | |
48 | | #include "IntraSearch.h" |
49 | | #include "EncPicture.h" |
50 | | #include "CommonLib/CommonDef.h" |
51 | | #include "CommonLib/Rom.h" |
52 | | #include "CommonLib/Picture.h" |
53 | | #include "CommonLib/UnitTools.h" |
54 | | #include "CommonLib/dtrace_next.h" |
55 | | #include "CommonLib/dtrace_buffer.h" |
56 | | #include "CommonLib/Reshape.h" |
57 | | #include <math.h> |
58 | | #include "vvenc/vvencCfg.h" |
59 | | |
60 | | //! \ingroup EncoderLib |
61 | | //! \{ |
62 | | |
63 | | namespace vvenc { |
64 | | |
65 | | #define PLTCtx(c) SubCtx( Ctx::Palette, c ) |
66 | | |
67 | | IntraSearch::IntraSearch() |
68 | 20.7k | : m_pSaveCS (nullptr) |
69 | 20.7k | , m_pcEncCfg (nullptr) |
70 | 20.7k | , m_pcTrQuant (nullptr) |
71 | 20.7k | , m_pcRdCost (nullptr) |
72 | 20.7k | , m_CABACEstimator(nullptr) |
73 | 20.7k | , m_CtxCache (nullptr) |
74 | 20.7k | { |
75 | 20.7k | } |
76 | | |
77 | | void IntraSearch::init(const VVEncCfg &encCfg, TrQuant *pTrQuant, RdCost *pRdCost, SortedPelUnitBufs<SORTED_BUFS> *pSortedPelUnitBufs, XUCache &unitCache ) |
78 | 20.7k | { |
79 | 20.7k | IntraPrediction::init( encCfg.m_internChromaFormat, encCfg.m_internalBitDepth[ CH_L ] ); |
80 | | |
81 | 20.7k | m_pcEncCfg = &encCfg; |
82 | 20.7k | m_pcTrQuant = pTrQuant; |
83 | 20.7k | m_pcRdCost = pRdCost; |
84 | 20.7k | m_SortedPelUnitBufs = pSortedPelUnitBufs; |
85 | | |
86 | 20.7k | const ChromaFormat chrFormat = encCfg.m_internChromaFormat; |
87 | 20.7k | const int maxCUSize = encCfg.m_CTUSize; |
88 | | |
89 | 20.7k | Area area = Area( 0, 0, maxCUSize, maxCUSize ); |
90 | | |
91 | 20.7k | m_pTempCS = new CodingStructure( unitCache, nullptr ); |
92 | 20.7k | m_pBestCS = new CodingStructure( unitCache, nullptr ); |
93 | | |
94 | 20.7k | m_pTempCS->createForSearch( chrFormat, area ); |
95 | 20.7k | m_pBestCS->createForSearch( chrFormat, area ); |
96 | | |
97 | 20.7k | const int uiNumSaveLayersToAllocate = 3; |
98 | 20.7k | m_pSaveCS = new CodingStructure*[uiNumSaveLayersToAllocate]; |
99 | 83.0k | for( int layer = 0; layer < uiNumSaveLayersToAllocate; layer++ ) |
100 | 62.3k | { |
101 | 62.3k | m_pSaveCS[ layer ] = new CodingStructure( unitCache, nullptr ); |
102 | 62.3k | m_pSaveCS[ layer ]->createForSearch( chrFormat, Area( 0, 0, maxCUSize, maxCUSize ) ); |
103 | 62.3k | m_pSaveCS[ layer ]->initStructData(); |
104 | 62.3k | } |
105 | | |
106 | 20.7k | CompArea chromaArea( COMP_Cb, chrFormat, area, true ); |
107 | 124k | for( int i = 0; i < 5; i++ ) |
108 | 103k | { |
109 | 103k | m_orgResiCb[i].create( chromaArea ); |
110 | 103k | m_orgResiCr[i].create( chromaArea ); |
111 | 103k | } |
112 | 20.7k | } |
113 | | |
114 | | void IntraSearch::destroy() |
115 | 20.7k | { |
116 | 20.7k | if ( m_pSaveCS ) |
117 | 20.7k | { |
118 | 20.7k | const int uiNumSaveLayersToAllocate = 3; |
119 | 83.0k | for( int layer = 0; layer < uiNumSaveLayersToAllocate; layer++ ) |
120 | 62.3k | { |
121 | 62.3k | if ( m_pSaveCS[ layer ] ) { m_pSaveCS[ layer ]->destroy(); delete m_pSaveCS[ layer ]; } |
122 | 62.3k | } |
123 | 20.7k | delete[] m_pSaveCS; |
124 | 20.7k | m_pSaveCS = nullptr; |
125 | 20.7k | } |
126 | | |
127 | 20.7k | if( m_pTempCS ) |
128 | 20.7k | { |
129 | 20.7k | m_pTempCS->destroy(); |
130 | 20.7k | delete m_pTempCS; m_pTempCS = nullptr; |
131 | 20.7k | } |
132 | | |
133 | 20.7k | if( m_pBestCS ) |
134 | 20.7k | { |
135 | 20.7k | m_pBestCS->destroy(); |
136 | 20.7k | delete m_pBestCS; m_pBestCS = nullptr; |
137 | 20.7k | } |
138 | 20.7k | } |
139 | | |
140 | | IntraSearch::~IntraSearch() |
141 | 20.7k | { |
142 | 20.7k | destroy(); |
143 | 20.7k | } |
144 | | |
145 | | void IntraSearch::setCtuEncRsrc( CABACWriter* cabacEstimator, CtxCache *ctxCache ) |
146 | 4.05k | { |
147 | 4.05k | m_CABACEstimator = cabacEstimator; |
148 | 4.05k | m_CtxCache = ctxCache; |
149 | 4.05k | } |
150 | | |
151 | | ////////////////////////////////////////////////////////////////////////// |
152 | | // INTRA PREDICTION |
153 | | ////////////////////////////////////////////////////////////////////////// |
154 | | static constexpr double COST_UNKNOWN = -65536.0; |
155 | | |
156 | | double IntraSearch::xFindInterCUCost( CodingUnit &cu ) |
157 | 27.1k | { |
158 | 27.1k | if( CU::isConsIntra(cu) && !cu.slice->isIntra() ) |
159 | 0 | { |
160 | | //search corresponding inter CU cost |
161 | 0 | for( int i = 0; i < m_numCuInSCIPU; i++ ) |
162 | 0 | { |
163 | 0 | if( cu.lumaPos() == m_cuAreaInSCIPU[i].pos() && cu.lumaSize() == m_cuAreaInSCIPU[i].size() ) |
164 | 0 | { |
165 | 0 | return m_cuCostInSCIPU[i]; |
166 | 0 | } |
167 | 0 | } |
168 | 0 | } |
169 | 27.1k | return COST_UNKNOWN; |
170 | 27.1k | } |
171 | | |
172 | | void IntraSearch::xEstimateLumaRdModeList(int& numModesForFullRD, |
173 | | static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM>& RdModeList, |
174 | | static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM>& HadModeList, |
175 | | static_vector<double, FAST_UDI_MAX_RDMODE_NUM>& CandCostList, |
176 | | static_vector<double, FAST_UDI_MAX_RDMODE_NUM>& CandHadList, CodingUnit& cu, bool testMip ) |
177 | 27.1k | { |
178 | 27.1k | PROFILER_SCOPE_AND_STAGE_EXT( 1, _TPROF, P_INTRA_EST_RD_CAND, cu.cs, CH_L ); |
179 | 27.1k | const uint16_t intra_ctx_size = Ctx::IntraLumaMpmFlag.size() + Ctx::IntraLumaPlanarFlag.size() + Ctx::MultiRefLineIdx.size() + Ctx::ISPMode.size() + Ctx::MipFlag.size(); |
180 | 27.1k | const TempCtx ctxStartIntraCtx(m_CtxCache, SubCtx(CtxSet(Ctx::IntraLumaMpmFlag(), intra_ctx_size), m_CABACEstimator->getCtx())); |
181 | 27.1k | const double sqrtLambdaForFirstPass = m_pcRdCost->getMotionLambda() * FRAC_BITS_SCALE; |
182 | 27.1k | const int numModesAvailable = NUM_LUMA_MODE; // total number of Intra modes |
183 | | |
184 | 27.1k | CHECK(numModesForFullRD >= numModesAvailable, "Too many modes for full RD search"); |
185 | | |
186 | 27.1k | const SPS& sps = *cu.cs->sps; |
187 | 27.1k | const bool fastMip = sps.MIP && m_pcEncCfg->m_useFastMIP; |
188 | | |
189 | | // this should always be true |
190 | 27.1k | CHECK( !cu.Y().valid(), "CU is not valid" ); |
191 | | |
192 | 27.1k | const CompArea& area = cu.Y(); |
193 | | |
194 | 27.1k | const UnitArea localUnitArea(area.chromaFormat, Area(0, 0, area.width, area.height)); |
195 | 27.1k | if( testMip) |
196 | 20.7k | { |
197 | 20.7k | numModesForFullRD += fastMip ? numModesForFullRD - std::min( m_pcEncCfg->m_useFastMIP, numModesForFullRD ) |
198 | 20.7k | : numModesForFullRD; |
199 | 20.7k | m_SortedPelUnitBufs->prepare( localUnitArea, numModesForFullRD + 1 ); |
200 | 20.7k | } |
201 | 6.39k | else |
202 | 6.39k | { |
203 | 6.39k | m_SortedPelUnitBufs->prepare( localUnitArea, numModesForFullRD ); |
204 | 6.39k | } |
205 | | |
206 | 27.1k | CPelBuf piOrg = cu.cs->getOrgBuf(COMP_Y); |
207 | 27.1k | PelBuf piPred = m_SortedPelUnitBufs->getTestBuf(COMP_Y); |
208 | | |
209 | 27.1k | const ReshapeData& reshapeData = cu.cs->picture->reshapeData; |
210 | 27.1k | if (cu.cs->picHeader->lmcsEnabled && reshapeData.getCTUFlag()) |
211 | 0 | { |
212 | 0 | piOrg = cu.cs->getRspOrgBuf(); |
213 | 0 | } |
214 | 27.1k | DistParam distParam = m_pcRdCost->setDistParam( piOrg, piPred, sps.bitDepths[ CH_L ], DF_HAD_2SAD); // Use HAD (SATD) cost |
215 | | |
216 | 27.1k | const int numHadCand = (testMip ? 2 : 1) * 3; |
217 | | |
218 | | //*** Derive (regular) candidates using Hadamard |
219 | 27.1k | cu.mipFlag = false; |
220 | 27.1k | cu.multiRefIdx = 0; |
221 | | |
222 | | //===== init pattern for luma prediction ===== |
223 | 27.1k | initIntraPatternChType(cu, cu.Y(), true); |
224 | | |
225 | 27.1k | bool satdChecked[NUM_INTRA_MODE] = { false }; |
226 | | |
227 | 27.1k | unsigned mpmLst[NUM_MOST_PROBABLE_MODES]; |
228 | 27.1k | CU::getIntraMPMs(cu, mpmLst); |
229 | | |
230 | 27.1k | const int decMsk = ( 1 << m_pcEncCfg->m_IntraEstDecBit ) - 1; |
231 | | |
232 | 27.1k | m_parentCandList.resize( 0 ); |
233 | 27.1k | m_parentCandList.reserve( ( numModesAvailable >> m_pcEncCfg->m_IntraEstDecBit ) + 2 ); |
234 | | |
235 | 1.84M | for( unsigned mode = 0; mode < numModesAvailable; mode++ ) |
236 | 1.81M | { |
237 | | // Skip checking extended Angular modes in the first round of SATD |
238 | 1.81M | if( mode > DC_IDX && ( mode & decMsk ) ) |
239 | 1.33M | { |
240 | 1.33M | continue; |
241 | 1.33M | } |
242 | | |
243 | 488k | m_parentCandList.push_back( ModeInfo( false, false, 0, NOT_INTRA_SUBPARTITIONS, mode ) ); |
244 | 488k | } |
245 | | |
246 | 108k | for( int decDst = 1 << m_pcEncCfg->m_IntraEstDecBit; decDst > 0; decDst >>= 1 ) |
247 | 81.4k | { |
248 | 733k | for( unsigned idx = 0; idx < m_parentCandList.size(); idx++ ) |
249 | 651k | { |
250 | 651k | int modeParent = m_parentCandList[idx].modeId; |
251 | | |
252 | 651k | int off = decDst & decMsk; |
253 | 651k | int inc = decDst << 1; |
254 | | |
255 | 651k | #if 1 // INTRA_AS_IN_VTM |
256 | 651k | if( off != 0 && ( modeParent <= ( DC_IDX + 1 ) || modeParent >= ( NUM_LUMA_MODE - 1 ) ) ) |
257 | 105k | { |
258 | 105k | continue; |
259 | 105k | } |
260 | | |
261 | 545k | #endif |
262 | 1.14M | for( int mode = modeParent - off; mode < modeParent + off + 1; mode += inc ) |
263 | 602k | { |
264 | 602k | if( satdChecked[mode] || mode < 0 || mode >= NUM_LUMA_MODE ) |
265 | 2.57k | { |
266 | 2.57k | continue; |
267 | 2.57k | } |
268 | | |
269 | 600k | cu.intraDir[0] = mode; |
270 | | |
271 | 600k | initPredIntraParams( cu, cu.Y(), sps ); |
272 | 600k | distParam.cur.buf = piPred.buf = m_SortedPelUnitBufs->getTestBuf().Y().buf; |
273 | 600k | predIntraAng( COMP_Y, piPred, cu ); |
274 | | |
275 | | // Use the min between SAD and HAD as the cost criterion |
276 | | // SAD is scaled by 2 to align with the scaling of HAD |
277 | 600k | Distortion minSadHad = distParam.distFunc( distParam ); |
278 | | |
279 | 600k | uint64_t fracModeBits = xFracModeBitsIntraLuma( cu, mpmLst ); |
280 | | |
281 | | //restore ctx |
282 | 600k | m_CABACEstimator->getCtx() = SubCtx( CtxSet( Ctx::IntraLumaMpmFlag(), intra_ctx_size ), ctxStartIntraCtx ); |
283 | | |
284 | 600k | double cost = ( double ) minSadHad + ( double ) fracModeBits * sqrtLambdaForFirstPass; |
285 | 600k | DTRACE( g_trace_ctx, D_INTRA_COST, "IntraHAD: %u, %llu, %f (%d)\n", minSadHad, fracModeBits, cost, mode ); |
286 | | |
287 | 600k | int insertPos = -1; |
288 | 600k | updateCandList( ModeInfo( false, false, 0, NOT_INTRA_SUBPARTITIONS, mode ), cost, RdModeList, CandCostList, numModesForFullRD, &insertPos ); |
289 | 600k | updateCandList( ModeInfo( false, false, 0, NOT_INTRA_SUBPARTITIONS, mode ), ( double ) minSadHad, HadModeList, CandHadList, numHadCand ); |
290 | 600k | m_SortedPelUnitBufs->insert( insertPos, ( int ) RdModeList.size() ); |
291 | | |
292 | 600k | satdChecked[mode] = true; |
293 | 600k | } |
294 | 545k | } |
295 | | |
296 | 81.4k | m_parentCandList.resize( RdModeList.size() ); |
297 | 81.4k | std::copy( RdModeList.cbegin(), RdModeList.cend(), m_parentCandList.begin() ); |
298 | 81.4k | } |
299 | | |
300 | 27.1k | const bool isFirstLineOfCtu = (((cu.block(COMP_Y).y)&((cu.cs->sps)->CTUSize - 1)) == 0); |
301 | 27.1k | if( m_pcEncCfg->m_MRL && ! isFirstLineOfCtu ) |
302 | 16.5k | { |
303 | 16.5k | cu.multiRefIdx = 1; |
304 | 16.5k | unsigned multiRefMPM [NUM_MOST_PROBABLE_MODES]; |
305 | 16.5k | CU::getIntraMPMs(cu, multiRefMPM); |
306 | | |
307 | 49.5k | for (int mRefNum = 1; mRefNum < MRL_NUM_REF_LINES; mRefNum++) |
308 | 33.0k | { |
309 | 33.0k | int multiRefIdx = MULTI_REF_LINE_IDX[mRefNum]; |
310 | | |
311 | 33.0k | cu.multiRefIdx = multiRefIdx; |
312 | 33.0k | initIntraPatternChType(cu, cu.Y(), true); |
313 | | |
314 | 198k | for (int x = 1; x < NUM_MOST_PROBABLE_MODES; x++) |
315 | 165k | { |
316 | 165k | cu.intraDir[0] = multiRefMPM[x]; |
317 | 165k | initPredIntraParams(cu, cu.Y(), sps); |
318 | 165k | distParam.cur.buf = piPred.buf = m_SortedPelUnitBufs->getTestBuf().Y().buf; |
319 | 165k | predIntraAng(COMP_Y, piPred, cu); |
320 | | |
321 | | // Use the min between SAD and SATD as the cost criterion |
322 | | // SAD is scaled by 2 to align with the scaling of HAD |
323 | 165k | Distortion minSadHad = distParam.distFunc(distParam); |
324 | | |
325 | | // NB xFracModeBitsIntra will not affect the mode for chroma that may have already been pre-estimated. |
326 | 165k | uint64_t fracModeBits = xFracModeBitsIntraLuma( cu, mpmLst ); |
327 | | |
328 | | //restore ctx |
329 | 165k | m_CABACEstimator->getCtx() = SubCtx(CtxSet(Ctx::IntraLumaMpmFlag(), intra_ctx_size), ctxStartIntraCtx); |
330 | | |
331 | 165k | double cost = (double) minSadHad + (double) fracModeBits * sqrtLambdaForFirstPass; |
332 | | // DTRACE(g_trace_ctx, D_INTRA_COST, "IntraMRL: %u, %llu, %f (%d)\n", minSadHad, fracModeBits, cost, cu.intraDir[0]); |
333 | | |
334 | 165k | int insertPos = -1; |
335 | 165k | updateCandList( ModeInfo( false, false, multiRefIdx, NOT_INTRA_SUBPARTITIONS, cu.intraDir[0] ), cost, RdModeList, CandCostList, numModesForFullRD, &insertPos ); |
336 | 165k | updateCandList( ModeInfo( false, false, multiRefIdx, NOT_INTRA_SUBPARTITIONS, cu.intraDir[0] ), (double)minSadHad, HadModeList, CandHadList, numHadCand ); |
337 | 165k | m_SortedPelUnitBufs->insert(insertPos, (int)RdModeList.size()); |
338 | 165k | } |
339 | 33.0k | } |
340 | 16.5k | cu.multiRefIdx = 0; |
341 | 16.5k | } |
342 | | |
343 | 27.1k | if (testMip) |
344 | 20.7k | { |
345 | 20.7k | cu.mipFlag = true; |
346 | 20.7k | cu.multiRefIdx = 0; |
347 | | |
348 | 20.7k | double mipHadCost[MAX_NUM_MIP_MODE] = { MAX_DOUBLE }; |
349 | | |
350 | 20.7k | initIntraPatternChType(cu, cu.Y()); |
351 | 20.7k | initIntraMip( cu ); |
352 | | |
353 | 20.7k | const int transpOff = getNumModesMip( cu.Y() ); |
354 | 20.7k | const int numModesFull = (transpOff << 1); |
355 | 271k | for( uint32_t uiModeFull = 0; uiModeFull < numModesFull; uiModeFull++ ) |
356 | 250k | { |
357 | 250k | const bool isTransposed = (uiModeFull >= transpOff ? true : false); |
358 | 250k | const uint32_t uiMode = (isTransposed ? uiModeFull - transpOff : uiModeFull); |
359 | | |
360 | 250k | cu.mipTransposedFlag = isTransposed; |
361 | 250k | cu.intraDir[CH_L] = uiMode; |
362 | 250k | distParam.cur.buf = piPred.buf = m_SortedPelUnitBufs->getTestBuf().Y().buf; |
363 | 250k | predIntraMip(piPred, cu); |
364 | | |
365 | | // Use the min between SAD and HAD as the cost criterion |
366 | | // SAD is scaled by 2 to align with the scaling of HAD |
367 | 250k | Distortion minSadHad = distParam.distFunc(distParam); |
368 | | |
369 | 250k | uint64_t fracModeBits = xFracModeBitsIntraLuma( cu, mpmLst ); |
370 | | |
371 | | //restore ctx |
372 | 250k | m_CABACEstimator->getCtx() = SubCtx(CtxSet(Ctx::IntraLumaMpmFlag(), intra_ctx_size), ctxStartIntraCtx); |
373 | | |
374 | 250k | double cost = double(minSadHad) + double(fracModeBits) * sqrtLambdaForFirstPass; |
375 | 250k | mipHadCost[uiModeFull] = cost; |
376 | 250k | DTRACE(g_trace_ctx, D_INTRA_COST, "IntraMIP: %u, %llu, %f (%d)\n", minSadHad, fracModeBits, cost, uiModeFull); |
377 | | |
378 | 250k | int insertPos = -1; |
379 | 250k | updateCandList( ModeInfo( true, isTransposed, 0, NOT_INTRA_SUBPARTITIONS, cu.intraDir[0] ), cost, RdModeList, CandCostList, numModesForFullRD+1, &insertPos ); |
380 | 250k | updateCandList( ModeInfo( true, isTransposed, 0, NOT_INTRA_SUBPARTITIONS, cu.intraDir[0] ), 0.8*(double)minSadHad, HadModeList, CandHadList, numHadCand ); |
381 | 250k | m_SortedPelUnitBufs->insert(insertPos, (int)RdModeList.size()); |
382 | 250k | } |
383 | | |
384 | 20.7k | const double thresholdHadCost = 1.0 + 1.4 / sqrt((double)(cu.lwidth()*cu.lheight())); |
385 | 20.7k | xReduceHadCandList(RdModeList, CandCostList, *m_SortedPelUnitBufs, numModesForFullRD, thresholdHadCost, mipHadCost, cu, fastMip); |
386 | 20.7k | } |
387 | | |
388 | 27.1k | if( m_pcEncCfg->m_bFastUDIUseMPMEnabled ) |
389 | 27.1k | { |
390 | 27.1k | const int numMPMs = NUM_MOST_PROBABLE_MODES; |
391 | 27.1k | unsigned intraMpms[numMPMs]; |
392 | | |
393 | 27.1k | cu.multiRefIdx = 0; |
394 | | |
395 | 27.1k | const int numCand = CU::getIntraMPMs( cu, intraMpms ); |
396 | 27.1k | ModeInfo mostProbableMode(false, false, 0, NOT_INTRA_SUBPARTITIONS, 0); |
397 | | |
398 | 55.1k | for( int j = 0; j < numCand; j++ ) |
399 | 27.9k | { |
400 | 27.9k | bool mostProbableModeIncluded = false; |
401 | 27.9k | mostProbableMode.modeId = intraMpms[j]; |
402 | | |
403 | 142k | for( int i = 0; i < numModesForFullRD; i++ ) |
404 | 114k | { |
405 | 114k | mostProbableModeIncluded |= ( mostProbableMode == RdModeList[i] ); |
406 | 114k | } |
407 | 27.9k | if( !mostProbableModeIncluded ) |
408 | 187 | { |
409 | 187 | numModesForFullRD++; |
410 | 187 | RdModeList.push_back( mostProbableMode ); |
411 | 187 | CandCostList.push_back(0); |
412 | 187 | } |
413 | 27.9k | } |
414 | 27.1k | } |
415 | 27.1k | } |
416 | | |
417 | | bool IntraSearch::estIntraPredLumaQT(CodingUnit &cu, Partitioner &partitioner, double bestCost) |
418 | 27.1k | { |
419 | 27.1k | CodingStructure &cs = *cu.cs; |
420 | 27.1k | const int width = partitioner.currArea().lwidth(); |
421 | 27.1k | const int height = partitioner.currArea().lheight(); |
422 | | |
423 | | //===== loop over partitions ===== |
424 | | |
425 | 27.1k | const TempCtx ctxStart ( m_CtxCache, m_CABACEstimator->getCtx() ); |
426 | | |
427 | | // variables for saving fast intra modes scan results across multiple LFNST passes |
428 | 27.1k | double costInterCU = xFindInterCUCost( cu ); |
429 | | |
430 | 27.1k | bool validReturn = false; |
431 | | |
432 | | //===== determine set of modes to be tested (using prediction signal only) ===== |
433 | 27.1k | int numModesAvailable = NUM_LUMA_MODE; // total number of Intra modes |
434 | 27.1k | static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM> RdModeList; |
435 | 27.1k | static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM> HadModeList; |
436 | 27.1k | static_vector<double, FAST_UDI_MAX_RDMODE_NUM> CandCostList; |
437 | 27.1k | static_vector<double, FAST_UDI_MAX_RDMODE_NUM> CandHadList; |
438 | | |
439 | 27.1k | int numModesForFullRD = g_aucIntraModeNumFast_UseMPM_2D[Log2(width) - MIN_CU_LOG2][Log2(height) - MIN_CU_LOG2]; |
440 | 27.1k | if (m_pcEncCfg->m_numIntraModesFullRD > 0) |
441 | 0 | numModesForFullRD=m_pcEncCfg->m_numIntraModesFullRD; |
442 | | |
443 | | #if INTRA_FULL_SEARCH |
444 | | numModesForFullRD = numModesAvailable; |
445 | | #endif |
446 | 27.1k | const SPS& sps = *cu.cs->sps; |
447 | 27.1k | const bool mipAllowed = sps.MIP && cu.lwidth() <= sps.getMaxTbSize() && cu.lheight() <= sps.getMaxTbSize() && ((cu.lfnstIdx == 0) || allowLfnstWithMip(cu.lumaSize())); |
448 | 27.1k | const int SizeThr = 8 >> std::max( 0, m_pcEncCfg->m_useFastMIP - 1 ); |
449 | 27.1k | const bool testMip = mipAllowed && ( cu.lwidth() <= ( SizeThr * cu.lheight() ) && cu.lheight() <= ( SizeThr * cu.lwidth() ) ) && ( cu.lwidth() <= MIP_MAX_WIDTH && cu.lheight() <= MIP_MAX_HEIGHT ); |
450 | 27.1k | bool testISP = sps.ISP && CU::canUseISP(width, height, cu.cs->sps->getMaxTbSize()); |
451 | 27.1k | if (testISP) |
452 | 27.1k | { |
453 | 27.1k | int numTotalPartsHor = (int)width >> floorLog2(CU::getISPSplitDim(width, height, TU_1D_VERT_SPLIT)); |
454 | 27.1k | int numTotalPartsVer = (int)height >> floorLog2(CU::getISPSplitDim(width, height, TU_1D_HORZ_SPLIT)); |
455 | 27.1k | m_ispTestedModes[0].init(numTotalPartsHor, numTotalPartsVer, 0); |
456 | | // the total number of subpartitions is modified to take into account the cases where LFNST cannot be combined with |
457 | | // ISP due to size restrictions |
458 | 27.1k | numTotalPartsHor = sps.LFNST && CU::canUseLfnstWithISP(cu.Y(), HOR_INTRA_SUBPARTITIONS) ? numTotalPartsHor : 0; |
459 | 27.1k | numTotalPartsVer = sps.LFNST && CU::canUseLfnstWithISP(cu.Y(), VER_INTRA_SUBPARTITIONS) ? numTotalPartsVer : 0; |
460 | 81.4k | for (int j = 1; j < NUM_LFNST_NUM_PER_SET; j++) |
461 | 54.3k | { |
462 | 54.3k | m_ispTestedModes[j].init(numTotalPartsHor, numTotalPartsVer, 0); |
463 | 54.3k | } |
464 | 27.1k | testISP = m_ispTestedModes[0].numTotalParts[0]; |
465 | 27.1k | } |
466 | 0 | else |
467 | 0 | { |
468 | 0 | m_ispTestedModes[0].init(0, 0, 0); |
469 | 0 | } |
470 | | |
471 | 27.1k | xEstimateLumaRdModeList(numModesForFullRD, RdModeList, HadModeList, CandCostList, CandHadList, cu, testMip); |
472 | | |
473 | 27.1k | CHECK( (size_t)numModesForFullRD != RdModeList.size(), "Inconsistent state!" ); |
474 | | |
475 | | // after this point, don't use numModesForFullRD |
476 | 27.1k | if( m_pcEncCfg->m_usePbIntraFast && !cs.slice->isIntra() && RdModeList.size() < numModesAvailable ) |
477 | 0 | { |
478 | 0 | double pbintraRatio = m_pcEncCfg->m_usePbIntraFast == 1 && ( cs.area.lwidth() >= 16 && cs.area.lheight() >= 16 ) ? 1.2 : PBINTRA_RATIO; |
479 | |
|
480 | 0 | int maxSize = -1; |
481 | 0 | ModeInfo bestMipMode; |
482 | 0 | int bestMipIdx = -1; |
483 | 0 | for( int idx = 0; idx < RdModeList.size(); idx++ ) |
484 | 0 | { |
485 | 0 | if( RdModeList[idx].mipFlg ) |
486 | 0 | { |
487 | 0 | bestMipMode = RdModeList[idx]; |
488 | 0 | bestMipIdx = idx; |
489 | 0 | break; |
490 | 0 | } |
491 | 0 | } |
492 | 0 | const int numHadCand = 3; |
493 | 0 | for (int k = numHadCand - 1; k >= 0; k--) |
494 | 0 | { |
495 | 0 | if (CandHadList.size() < (k + 1) || CandHadList[k] > cs.interHad * pbintraRatio) { maxSize = k; } |
496 | 0 | } |
497 | 0 | if (maxSize > 0) |
498 | 0 | { |
499 | 0 | RdModeList.resize(std::min<size_t>(RdModeList.size(), maxSize)); |
500 | 0 | if( bestMipIdx >= 0 ) |
501 | 0 | { |
502 | 0 | if( RdModeList.size() <= bestMipIdx ) |
503 | 0 | { |
504 | 0 | RdModeList.push_back(bestMipMode); |
505 | 0 | m_SortedPelUnitBufs->swap( maxSize, bestMipIdx ); |
506 | 0 | } |
507 | 0 | } |
508 | 0 | } |
509 | 0 | if (maxSize == 0) |
510 | 0 | { |
511 | 0 | cs.dist = MAX_DISTORTION; |
512 | 0 | cs.interHad = 0; |
513 | 0 | return false; |
514 | 0 | } |
515 | 0 | } |
516 | | |
517 | | //===== check modes (using r-d costs) ===== |
518 | 27.1k | ModeInfo bestPUMode; |
519 | | |
520 | 27.1k | CodingStructure *csTemp = m_pTempCS; |
521 | 27.1k | CodingStructure *csBest = m_pBestCS; |
522 | | |
523 | 27.1k | csTemp->slice = csBest->slice = cs.slice; |
524 | 27.1k | csTemp->picture = csBest->picture = cs.picture; |
525 | 27.1k | csTemp->compactResize( cu ); |
526 | 27.1k | csBest->compactResize( cu ); |
527 | 27.1k | csTemp->initStructData(); |
528 | 27.1k | csBest->initStructData(); |
529 | | |
530 | 27.1k | int bestLfnstIdx = 0; |
531 | 27.1k | const bool useBDPCM = cs.picture->useBDPCM; |
532 | 27.1k | int NumBDPCMCand = (useBDPCM && sps.BDPCM && CU::bdpcmAllowed(cu, ComponentID(partitioner.chType))) ? 2 : 0; |
533 | 27.1k | int bestbdpcmMode = 0; |
534 | 27.1k | int bestISP = 0; |
535 | 27.1k | int bestMrl = 0; |
536 | 27.1k | bool bestMip = 0; |
537 | 27.1k | int EndMode = (int)RdModeList.size(); |
538 | 27.1k | bool useISPlfnst = testISP && sps.LFNST; |
539 | 27.1k | bool noLFNST_ts = false; |
540 | 27.1k | double bestCostIsp[2] = { MAX_DOUBLE, MAX_DOUBLE }; |
541 | 27.1k | bool disableMTS = false; |
542 | 27.1k | bool disableLFNST = false; |
543 | 27.1k | bool disableDCT2test = false; |
544 | 27.1k | if (m_pcEncCfg->m_FastIntraTools) |
545 | 27.1k | { |
546 | 27.1k | int speedIntra = 0; |
547 | 27.1k | xSpeedUpIntra(bestCost, EndMode, speedIntra, cu); |
548 | 27.1k | disableMTS = (speedIntra >> 2 ) & 0x1; |
549 | 27.1k | disableLFNST = (speedIntra >> 1) & 0x1; |
550 | 27.1k | disableDCT2test = speedIntra>>3; |
551 | 27.1k | if (disableLFNST) |
552 | 24.3k | { |
553 | 24.3k | noLFNST_ts = true; |
554 | 24.3k | useISPlfnst = false; |
555 | 24.3k | } |
556 | 27.1k | if (speedIntra & 0x1) |
557 | 24.3k | { |
558 | 24.3k | testISP = false; |
559 | 24.3k | } |
560 | 27.1k | } |
561 | | |
562 | 145k | for (int mode_cur = 0; mode_cur < EndMode + NumBDPCMCand; mode_cur++) |
563 | 118k | { |
564 | 118k | int mode = mode_cur; |
565 | 118k | if (mode_cur >= EndMode) |
566 | 7.68k | { |
567 | 7.68k | mode = mode_cur - EndMode ? -1 : -2; |
568 | 7.68k | testISP = false; |
569 | 7.68k | } |
570 | | // set CU/PU to luma prediction mode |
571 | 118k | ModeInfo testMode; |
572 | 118k | int noISP = 0; |
573 | 118k | int endISP = testISP ? 2 : 0; |
574 | 118k | bool noLFNST = false || noLFNST_ts; |
575 | 118k | if (mode && useISPlfnst) |
576 | 9.36k | { |
577 | 9.36k | noLFNST |= (bestCostIsp[0] > (bestCostIsp[1] * 1.4)); |
578 | 9.36k | if (mode > 2) |
579 | 2.53k | { |
580 | 2.53k | endISP = 0; |
581 | 2.53k | testISP = false; |
582 | 2.53k | } |
583 | 9.36k | } |
584 | 118k | if (testISP) |
585 | 5.95k | { |
586 | 5.95k | xSpeedUpISP(1, testISP, mode, noISP, endISP, cu, RdModeList, bestPUMode, bestISP, bestLfnstIdx); |
587 | 5.95k | } |
588 | 118k | int startISP = 0; |
589 | 118k | if (disableDCT2test && mode && bestISP) |
590 | 0 | { |
591 | 0 | startISP = endISP ? 1 : 0; |
592 | 0 | } |
593 | 246k | for (int ispM = startISP; ispM <= endISP; ispM++) |
594 | 127k | { |
595 | 127k | if (ispM && (ispM == noISP)) |
596 | 51 | { |
597 | 51 | continue; |
598 | 51 | } |
599 | | |
600 | 127k | if (mode < 0) |
601 | 7.68k | { |
602 | 7.68k | cu.bdpcmM[CH_L] = -mode; |
603 | 7.68k | testMode = ModeInfo(false, false, 0, NOT_INTRA_SUBPARTITIONS, cu.bdpcmM[CH_L] == 2 ? VER_IDX : HOR_IDX); |
604 | 7.68k | } |
605 | 119k | else |
606 | 119k | { |
607 | 119k | testMode = RdModeList[mode]; |
608 | 119k | cu.bdpcmM[CH_L] = 0; |
609 | 119k | } |
610 | | |
611 | 127k | cu.ispMode = ispM; |
612 | 127k | cu.mipFlag = testMode.mipFlg; |
613 | 127k | cu.mipTransposedFlag = testMode.mipTrFlg; |
614 | 127k | cu.multiRefIdx = testMode.mRefId; |
615 | 127k | cu.intraDir[CH_L] = testMode.modeId; |
616 | 127k | if (cu.ispMode && xSpeedUpISP(0, testISP, mode, noISP, endISP, cu, RdModeList, bestPUMode, bestISP, 0) ) |
617 | 3.11k | { |
618 | 3.11k | continue; |
619 | 3.11k | } |
620 | 124k | if (m_pcEncCfg->m_FastIntraTools && (cu.ispMode || sps.LFNST || sps.MTS)) |
621 | 124k | { |
622 | 124k | m_ispTestedModes[0].intraWasTested = true; |
623 | 124k | } |
624 | 124k | CHECK(cu.mipFlag && cu.multiRefIdx, "Error: combination of MIP and MRL not supported"); |
625 | 124k | CHECK(cu.multiRefIdx && (cu.intraDir[0] == PLANAR_IDX), "Error: combination of MRL and Planar mode not supported"); |
626 | 124k | CHECK(cu.ispMode && cu.mipFlag, "Error: combination of ISP and MIP not supported"); |
627 | 124k | CHECK(cu.ispMode && cu.multiRefIdx, "Error: combination of ISP and MRL not supported"); |
628 | | |
629 | | // determine residual for partition |
630 | 124k | cs.initSubStructure(*csTemp, partitioner.chType, cs.area, true); |
631 | 124k | int doISP = (((cu.ispMode == 0) && noLFNST) || (useISPlfnst && mode && cu.ispMode && (bestLfnstIdx == 0)) || disableLFNST) ? -mode : mode; |
632 | 124k | xIntraCodingLumaQT(*csTemp, partitioner, m_SortedPelUnitBufs->getBufFromSortedList(mode), bestCost, doISP, disableMTS); |
633 | | |
634 | 124k | DTRACE(g_trace_ctx, D_INTRA_COST, "IntraCost T [x=%d,y=%d,w=%d,h=%d] %f (%d,%d,%d,%d,%d,%d) \n", cu.blocks[0].x, |
635 | 124k | cu.blocks[0].y, width, height, csTemp->cost, testMode.modeId, testMode.ispMod, |
636 | 124k | cu.multiRefIdx, cu.mipFlag, cu.lfnstIdx, cu.mtsFlag); |
637 | | |
638 | 124k | if (cu.ispMode && !csTemp->cus[0]->firstTU->cbf[COMP_Y]) |
639 | 1.91k | { |
640 | 1.91k | csTemp->cost = MAX_DOUBLE; |
641 | 1.91k | csTemp->costDbOffset = 0; |
642 | 1.91k | } |
643 | 124k | if (useISPlfnst) |
644 | 17.8k | { |
645 | 17.8k | int n = (cu.ispMode == 0) ? 0 : 1; |
646 | 17.8k | bestCostIsp[n] = csTemp->cost < bestCostIsp[n] ? csTemp->cost : bestCostIsp[n]; |
647 | 17.8k | } |
648 | | |
649 | | // check r-d cost |
650 | 124k | if (csTemp->cost < csBest->cost) |
651 | 34.2k | { |
652 | 34.2k | validReturn = true; |
653 | 34.2k | std::swap(csTemp, csBest); |
654 | 34.2k | bestPUMode = testMode; |
655 | 34.2k | bestLfnstIdx = csBest->cus[0]->lfnstIdx; |
656 | 34.2k | bestISP = csBest->cus[0]->ispMode; |
657 | 34.2k | bestMip = csBest->cus[0]->mipFlag; |
658 | 34.2k | bestMrl = csBest->cus[0]->multiRefIdx; |
659 | 34.2k | bestbdpcmMode = cu.bdpcmM[CH_L]; |
660 | 34.2k | m_ispTestedModes[bestLfnstIdx].bestSplitSoFar = ISPType(bestISP); |
661 | 34.2k | if (csBest->cost < bestCost) |
662 | 34.2k | { |
663 | 34.2k | bestCost = csBest->cost; |
664 | 34.2k | } |
665 | 34.2k | if ((csBest->getTU(partitioner.chType)->mtsIdx[COMP_Y] == MTS_SKIP) && ( floorLog2(csBest->getTU(partitioner.chType)->blocks[COMP_Y].area()) >= 6 )) |
666 | 4.62k | { |
667 | 4.62k | noLFNST_ts = 1; |
668 | 4.62k | } |
669 | 34.2k | } |
670 | | |
671 | | // reset context models |
672 | 124k | m_CABACEstimator->getCtx() = ctxStart; |
673 | | |
674 | 124k | csTemp->releaseIntermediateData(); |
675 | | |
676 | 124k | if (m_pcEncCfg->m_fastLocalDualTreeMode && CU::isConsIntra(cu) && !cu.slice->isIntra() && csBest->cost != MAX_DOUBLE && costInterCU != COST_UNKNOWN && mode >= 0) |
677 | 0 | { |
678 | 0 | if( (m_pcEncCfg->m_fastLocalDualTreeMode == 2) || (csBest->cost > costInterCU * 1.5)) |
679 | 0 | { |
680 | | //Note: only try one intra mode, which is especially useful to reduce EncT for LDB case (around 4%) |
681 | 0 | EndMode = 0; |
682 | 0 | break; |
683 | 0 | } |
684 | 0 | } |
685 | 124k | } |
686 | 118k | } // Mode loop |
687 | | |
688 | 27.1k | if (m_pcEncCfg->m_FastIntraTools && (sps.ISP|| sps.LFNST || sps.MTS)) |
689 | 27.1k | { |
690 | 27.1k | int bestMode = csBest->getTU(partitioner.chType)->mtsIdx[COMP_Y] ? 4 : 0; |
691 | 27.1k | bestMode |= bestLfnstIdx ? 2 : 0; |
692 | 27.1k | bestMode |= bestISP ? 1 : 0; |
693 | 27.1k | m_ispTestedModes[0].bestIntraMode = bestMode; |
694 | 27.1k | } |
695 | 27.1k | cu.ispMode = bestISP; |
696 | 27.1k | if( validReturn ) |
697 | 27.1k | { |
698 | 27.1k | cs.useSubStructure( *csBest, partitioner.chType, TREE_D, cu.singleChan( CH_L ), true ); |
699 | 27.1k | const ReshapeData& reshapeData = cs.picture->reshapeData; |
700 | 27.1k | if (cs.picHeader->lmcsEnabled && reshapeData.getCTUFlag()) |
701 | 0 | { |
702 | 0 | cs.getRspRecoBuf().copyFrom(csBest->getRspRecoBuf()); |
703 | 0 | } |
704 | | |
705 | | //=== update PU data ==== |
706 | 27.1k | cu.lfnstIdx = bestLfnstIdx; |
707 | 27.1k | cu.mipTransposedFlag = bestPUMode.mipTrFlg; |
708 | 27.1k | cu.intraDir[CH_L] = bestPUMode.modeId; |
709 | 27.1k | cu.bdpcmM[CH_L] = bestbdpcmMode; |
710 | 27.1k | cu.mipFlag = bestMip; |
711 | 27.1k | cu.multiRefIdx = bestMrl; |
712 | 27.1k | } |
713 | 0 | else |
714 | 0 | { |
715 | 0 | THROW("fix this"); |
716 | 0 | } |
717 | | |
718 | 27.1k | csBest->releaseIntermediateData(); |
719 | | |
720 | 27.1k | return validReturn; |
721 | 27.1k | } |
722 | | |
723 | | void IntraSearch::estIntraPredChromaQT( CodingUnit& cu, Partitioner& partitioner, const double maxCostAllowed ) |
724 | 60.9k | { |
725 | 60.9k | PROFILER_SCOPE_AND_STAGE_EXT( 0, _TPROF, P_INTRA_CHROMA, cu.cs, CH_C ); |
726 | 60.9k | const TempCtx ctxStart( m_CtxCache, m_CABACEstimator->getCtx() ); |
727 | 60.9k | CodingStructure &cs = *cu.cs; |
728 | 60.9k | bool lumaUsesISP = !CU::isSepTree(cu) && cu.ispMode; |
729 | 60.9k | PartSplit ispType = lumaUsesISP ? CU::getISPType(cu, COMP_Y) : TU_NO_ISP; |
730 | 60.9k | double bestCostSoFar = maxCostAllowed; |
731 | 60.9k | const uint32_t numberValidComponents = getNumberValidComponents( cu.chromaFormat ); |
732 | 60.9k | const bool useBDPCM = cs.picture->useBDPCM; |
733 | | |
734 | 60.9k | uint32_t uiBestMode = 0; |
735 | 60.9k | Distortion uiBestDist = 0; |
736 | 60.9k | double dBestCost = MAX_DOUBLE; |
737 | | |
738 | | //----- init mode list ---- |
739 | 60.9k | { |
740 | 60.9k | uint32_t uiMinMode = 0; |
741 | 60.9k | uint32_t uiMaxMode = NUM_CHROMA_MODE; |
742 | | |
743 | 60.9k | const int reducedModeNumber = uiMaxMode >> (m_pcEncCfg->m_reduceIntraChromaModesFullRD ? 1 : 2); |
744 | | //----- check chroma modes ----- |
745 | 60.9k | uint32_t chromaCandModes[ NUM_CHROMA_MODE ]; |
746 | 60.9k | CU::getIntraChromaCandModes( cu, chromaCandModes ); |
747 | | |
748 | | // create a temporary CS |
749 | 60.9k | CodingStructure &saveCS = *m_pSaveCS[0]; |
750 | 60.9k | saveCS.pcv = cs.pcv; |
751 | 60.9k | saveCS.picture = cs.picture; |
752 | 60.9k | saveCS.area.repositionTo( cs.area ); |
753 | 60.9k | saveCS.clearTUs(); |
754 | | |
755 | 60.9k | if( !CU::isSepTree(cu) && cu.ispMode ) |
756 | 0 | { |
757 | 0 | saveCS.clearCUs(); |
758 | 0 | } |
759 | | |
760 | 60.9k | if( CU::isSepTree(cu) ) |
761 | 60.9k | { |
762 | 60.9k | if( partitioner.canSplit( TU_MAX_TR_SPLIT, cs ) ) |
763 | 0 | { |
764 | 0 | partitioner.splitCurrArea( TU_MAX_TR_SPLIT, cs ); |
765 | |
|
766 | 0 | do |
767 | 0 | { |
768 | 0 | cs.addTU( CS::getArea( cs, partitioner.currArea(), partitioner.chType, partitioner.treeType ), partitioner.chType, &cu ).depth = partitioner.currTrDepth; |
769 | 0 | } while( partitioner.nextPart( cs ) ); |
770 | |
|
771 | 0 | partitioner.exitCurrSplit(); |
772 | 0 | } |
773 | 60.9k | else |
774 | 60.9k | cs.addTU( CS::getArea( cs, partitioner.currArea(), partitioner.chType, partitioner.treeType ), partitioner.chType, &cu ); |
775 | 60.9k | } |
776 | | |
777 | | // create a store for the TUs |
778 | 60.9k | std::vector<TransformUnit*> orgTUs; |
779 | 60.9k | for( const auto &ptu : cs.tus ) |
780 | 60.9k | { |
781 | | // for split TUs in HEVC, add the TUs without Chroma parts for correct setting of Cbfs |
782 | 60.9k | if (lumaUsesISP || cu.contains(*ptu, CH_C)) |
783 | 60.9k | { |
784 | 60.9k | saveCS.addTU( *ptu, partitioner.chType, nullptr ); |
785 | 60.9k | orgTUs.push_back( ptu ); |
786 | 60.9k | } |
787 | 60.9k | } |
788 | | |
789 | | // SATD pre-selecting. |
790 | 60.9k | int satdModeList [NUM_CHROMA_MODE] = { 0 }; |
791 | 60.9k | int64_t satdSortedCost[NUM_CHROMA_MODE] = { 0 }; |
792 | 60.9k | bool modeDisable[NUM_INTRA_MODE + 1] = { false }; // use intra mode idx to check whether enable |
793 | | |
794 | 60.9k | CodingStructure& cs = *(cu.cs); |
795 | 60.9k | CompArea areaCb = cu.Cb(); |
796 | 60.9k | CompArea areaCr = cu.Cr(); |
797 | 60.9k | CPelBuf orgCb = cs.getOrgBuf (COMP_Cb); |
798 | 60.9k | PelBuf predCb = cs.getPredBuf(COMP_Cb); |
799 | 60.9k | CPelBuf orgCr = cs.getOrgBuf (COMP_Cr); |
800 | 60.9k | PelBuf predCr = cs.getPredBuf(COMP_Cr); |
801 | | |
802 | 60.9k | DistParam distParamSadCb = m_pcRdCost->setDistParam( orgCb, predCb, cu.cs->sps->bitDepths[ CH_C ], DF_SAD); |
803 | 60.9k | DistParam distParamSatdCb = m_pcRdCost->setDistParam( orgCb, predCb, cu.cs->sps->bitDepths[ CH_C ], DF_HAD); |
804 | 60.9k | DistParam distParamSadCr = m_pcRdCost->setDistParam( orgCr, predCr, cu.cs->sps->bitDepths[ CH_C ], DF_SAD); |
805 | 60.9k | DistParam distParamSatdCr = m_pcRdCost->setDistParam( orgCr, predCr, cu.cs->sps->bitDepths[ CH_C ], DF_HAD); |
806 | | |
807 | 60.9k | cu.intraDir[1] = MDLM_L_IDX; // temporary assigned, just to indicate this is a MDLM mode. for luma down-sampling operation. |
808 | | |
809 | 60.9k | initIntraPatternChType(cu, cu.Cb()); |
810 | 60.9k | initIntraPatternChType(cu, cu.Cr()); |
811 | 60.9k | loadLMLumaRecPels(cu, cu.Cb()); |
812 | | |
813 | 548k | for (int idx = uiMinMode; idx < uiMaxMode; idx++) |
814 | 487k | { |
815 | 487k | int mode = chromaCandModes[idx]; |
816 | 487k | satdModeList[idx] = mode; |
817 | 487k | if (CU::isLMCMode(mode) && ( !CU::isLMCModeEnabled(cu, mode) || cu.slice->lmChromaCheckDisable ) ) |
818 | 52.6k | { |
819 | 52.6k | continue; |
820 | 52.6k | } |
821 | 434k | if ((mode == LM_CHROMA_IDX) || (mode == PLANAR_IDX) || (mode == DM_CHROMA_IDX)) // only pre-check regular modes and MDLM modes, not including DM ,Planar, and LM |
822 | 106k | { |
823 | 106k | continue; |
824 | 106k | } |
825 | | |
826 | 328k | cu.intraDir[1] = mode; // temporary assigned, for SATD checking. |
827 | | |
828 | 328k | const bool isLMCMode = CU::isLMCMode(mode); |
829 | 328k | if( isLMCMode ) |
830 | 86.7k | { |
831 | 86.7k | predIntraChromaLM(COMP_Cb, predCb, cu, areaCb, mode); |
832 | 86.7k | } |
833 | 241k | else |
834 | 241k | { |
835 | 241k | initPredIntraParams(cu, cu.Cb(), *cs.sps); |
836 | 241k | predIntraAng(COMP_Cb, predCb, cu); |
837 | 241k | } |
838 | 328k | int64_t sadCb = distParamSadCb.distFunc(distParamSadCb) * 2; |
839 | 328k | int64_t satdCb = distParamSatdCb.distFunc(distParamSatdCb); |
840 | 328k | int64_t sad = std::min(sadCb, satdCb); |
841 | | |
842 | 328k | if( isLMCMode ) |
843 | 86.7k | { |
844 | 86.7k | predIntraChromaLM(COMP_Cr, predCr, cu, areaCr, mode); |
845 | 86.7k | } |
846 | 241k | else |
847 | 241k | { |
848 | 241k | initPredIntraParams(cu, cu.Cr(), *cs.sps); |
849 | 241k | predIntraAng(COMP_Cr, predCr, cu); |
850 | 241k | } |
851 | 328k | int64_t sadCr = distParamSadCr.distFunc(distParamSadCr) * 2; |
852 | 328k | int64_t satdCr = distParamSatdCr.distFunc(distParamSatdCr); |
853 | 328k | sad += std::min(sadCr, satdCr); |
854 | 328k | satdSortedCost[idx] = sad; |
855 | 328k | } |
856 | | |
857 | | // sort the mode based on the cost from small to large. |
858 | 548k | for (int i = uiMinMode; i <= uiMaxMode - 1; i++) |
859 | 487k | { |
860 | 2.19M | for (int j = i + 1; j <= uiMaxMode - 1; j++) |
861 | 1.70M | { |
862 | 1.70M | if (satdSortedCost[j] < satdSortedCost[i]) |
863 | 104k | { |
864 | 104k | std::swap( satdModeList[i], satdModeList[j]); |
865 | 104k | std::swap( satdSortedCost[i], satdSortedCost[j]); |
866 | 104k | } |
867 | 1.70M | } |
868 | 487k | } |
869 | | |
870 | 304k | for (int i = 0; i < reducedModeNumber; i++) |
871 | 243k | { |
872 | 243k | modeDisable[satdModeList[uiMaxMode - 1 - i]] = true; // disable the last reducedModeNumber modes |
873 | 243k | } |
874 | | |
875 | 60.9k | int bestLfnstIdx = 0; |
876 | | // save the dist |
877 | 60.9k | Distortion baseDist = cs.dist; |
878 | 60.9k | int32_t bestbdpcmMode = 0; |
879 | 60.9k | uint32_t numbdpcmModes = ( useBDPCM && CU::bdpcmAllowed(cu, COMP_Cb) |
880 | 40.3k | && ((partitioner.chType == CH_C) || (cu.ispMode == 0 && cu.lfnstIdx == 0 && cu.firstTU->mtsIdx[COMP_Y] == MTS_SKIP))) ? 2 : 0; |
881 | 629k | for (int mode_cur = uiMinMode; mode_cur < (int)(uiMaxMode + numbdpcmModes); mode_cur++) |
882 | 568k | { |
883 | 568k | int mode = mode_cur; |
884 | 568k | if (mode_cur >= uiMaxMode) |
885 | 80.7k | { |
886 | 80.7k | mode = mode_cur > uiMaxMode ? -1 : -2; //set bdpcm mode |
887 | 80.7k | if ((mode == -1) && (saveCS.tus[0]->mtsIdx[COMP_Cb] != MTS_SKIP) && (saveCS.tus[0]->mtsIdx[COMP_Cr] != MTS_SKIP)) |
888 | 40.3k | { |
889 | 40.3k | continue; |
890 | 40.3k | } |
891 | 80.7k | } |
892 | 527k | int chromaIntraMode; |
893 | 527k | if (mode < 0) |
894 | 40.3k | { |
895 | 40.3k | cu.bdpcmM[CH_C] = -mode; |
896 | 40.3k | chromaIntraMode = cu.bdpcmM[CH_C] == 2 ? chromaCandModes[1] : chromaCandModes[2]; |
897 | 40.3k | } |
898 | 487k | else |
899 | 487k | { |
900 | 487k | cu.bdpcmM[CH_C] = 0; |
901 | 487k | chromaIntraMode = chromaCandModes[mode]; |
902 | 487k | if (CU::isLMCMode(chromaIntraMode) && ( !CU::isLMCModeEnabled(cu, chromaIntraMode) || cu.slice->lmChromaCheckDisable ) ) |
903 | 52.6k | { |
904 | 52.6k | continue; |
905 | 52.6k | } |
906 | 434k | if (modeDisable[chromaIntraMode] && CU::isLMCModeEnabled(cu, chromaIntraMode)) // when CCLM is disable, then MDLM is disable. not use satd checking |
907 | 173k | { |
908 | 173k | continue; |
909 | 173k | } |
910 | 434k | } |
911 | 301k | cs.dist = baseDist; |
912 | | //----- restore context models ----- |
913 | 301k | m_CABACEstimator->getCtx() = ctxStart; |
914 | | |
915 | | //----- chroma coding ----- |
916 | 301k | cu.intraDir[1] = chromaIntraMode; |
917 | 301k | m_ispTestedModes[0].IspType = ispType; |
918 | 301k | m_ispTestedModes[0].subTuCounter = -1; |
919 | 301k | xIntraChromaCodingQT( cs, partitioner ); |
920 | 301k | if (lumaUsesISP && cs.dist == MAX_UINT) |
921 | 0 | { |
922 | 0 | continue; |
923 | 0 | } |
924 | | |
925 | 301k | if (cs.sps->transformSkip) |
926 | 301k | { |
927 | 301k | m_CABACEstimator->getCtx() = ctxStart; |
928 | 301k | } |
929 | 301k | m_ispTestedModes[0].IspType = ispType; |
930 | 301k | m_ispTestedModes[0].subTuCounter = -1; |
931 | 301k | uint64_t fracBits = xGetIntraFracBitsQT( cs, partitioner, false ); |
932 | 301k | Distortion uiDist = cs.dist; |
933 | 301k | double dCost = m_pcRdCost->calcRdCost( fracBits, uiDist - baseDist ); |
934 | | |
935 | | //----- compare ----- |
936 | 301k | if( dCost < dBestCost ) |
937 | 109k | { |
938 | 109k | if (lumaUsesISP && (dCost < bestCostSoFar)) |
939 | 0 | { |
940 | 0 | bestCostSoFar = dCost; |
941 | 0 | } |
942 | 327k | for( uint32_t i = getFirstComponentOfChannel( CH_C ); i < numberValidComponents; i++ ) |
943 | 218k | { |
944 | 218k | const CompArea& area = cu.blocks[i]; |
945 | 218k | saveCS.getRecoBuf ( area ).copyFrom( cs.getRecoBuf ( area ) ); |
946 | 218k | cs.picture->getRecoBuf( area ).copyFrom( cs.getRecoBuf ( area ) ); |
947 | 437k | for( uint32_t j = 0; j < saveCS.tus.size(); j++ ) |
948 | 218k | { |
949 | 218k | saveCS.tus[j]->copyComponentFrom( *orgTUs[j], area.compID ); |
950 | 218k | } |
951 | 218k | } |
952 | 109k | dBestCost = dCost; |
953 | 109k | uiBestDist = uiDist; |
954 | 109k | uiBestMode = chromaIntraMode; |
955 | 109k | bestLfnstIdx = cu.lfnstIdx; |
956 | 109k | bestbdpcmMode = cu.bdpcmM[CH_C]; |
957 | | |
958 | 109k | } |
959 | 301k | } |
960 | 60.9k | cu.lfnstIdx = bestLfnstIdx; |
961 | 60.9k | cu.bdpcmM[CH_C]= bestbdpcmMode; |
962 | | |
963 | 182k | for( uint32_t i = getFirstComponentOfChannel( CH_C ); i < numberValidComponents; i++ ) |
964 | 121k | { |
965 | 121k | const CompArea& area = cu.blocks[i]; |
966 | | |
967 | 121k | cs.getRecoBuf ( area ).copyFrom( saveCS.getRecoBuf( area ) ); |
968 | 121k | cs.picture->getRecoBuf( area ).copyFrom( cs.getRecoBuf ( area ) ); |
969 | | |
970 | 243k | for( uint32_t j = 0; j < saveCS.tus.size(); j++ ) |
971 | 121k | { |
972 | 121k | orgTUs[ j ]->copyComponentFrom( *saveCS.tus[ j ], area.compID ); |
973 | 121k | } |
974 | 121k | } |
975 | 60.9k | } |
976 | 60.9k | cu.intraDir[1] = uiBestMode; |
977 | 60.9k | cs.dist = uiBestDist; |
978 | | |
979 | | //----- restore context models ----- |
980 | 60.9k | m_CABACEstimator->getCtx() = ctxStart; |
981 | 60.9k | if (lumaUsesISP && bestCostSoFar >= maxCostAllowed) |
982 | 0 | { |
983 | 0 | cu.ispMode = 0; |
984 | 0 | } |
985 | 60.9k | } |
986 | | |
987 | | void IntraSearch::saveCuAreaCostInSCIPU( Area area, double cost ) |
988 | 0 | { |
989 | 0 | if( m_numCuInSCIPU < NUM_INTER_CU_INFO_SAVE ) |
990 | 0 | { |
991 | 0 | m_cuAreaInSCIPU[m_numCuInSCIPU] = area; |
992 | 0 | m_cuCostInSCIPU[m_numCuInSCIPU] = cost; |
993 | 0 | m_numCuInSCIPU++; |
994 | 0 | } |
995 | 0 | } |
996 | | |
997 | | void IntraSearch::initCuAreaCostInSCIPU() |
998 | 0 | { |
999 | 0 | for( int i = 0; i < NUM_INTER_CU_INFO_SAVE; i++ ) |
1000 | 0 | { |
1001 | 0 | m_cuAreaInSCIPU[i] = Area(); |
1002 | 0 | m_cuCostInSCIPU[i] = 0; |
1003 | 0 | } |
1004 | 0 | m_numCuInSCIPU = 0; |
1005 | 0 | } |
1006 | | // ------------------------------------------------------------------------------------------------------------------- |
1007 | | // Intra search |
1008 | | // ------------------------------------------------------------------------------------------------------------------- |
1009 | | |
1010 | | void IntraSearch::xEncIntraHeader( CodingStructure &cs, Partitioner &partitioner, const bool luma ) |
1011 | 502k | { |
1012 | 502k | CodingUnit &cu = *cs.getCU( partitioner.chType, partitioner.treeType ); |
1013 | | |
1014 | 502k | if (luma) |
1015 | 200k | { |
1016 | 200k | bool isFirst = cu.ispMode ? m_ispTestedModes[0].subTuCounter == 0 : partitioner.currArea().lumaPos() == cs.area.lumaPos(); |
1017 | | |
1018 | | // CU header |
1019 | 200k | if( isFirst ) |
1020 | 196k | { |
1021 | 196k | if ((!cs.slice->isIntra() || cs.slice->sps->IBC || cs.slice->sps->PLT) && cu.Y().valid()) |
1022 | 196k | { |
1023 | 196k | m_CABACEstimator->pred_mode ( cu ); |
1024 | 196k | } |
1025 | 196k | m_CABACEstimator->bdpcm_mode ( cu, ComponentID(partitioner.chType) ); |
1026 | 196k | } |
1027 | | |
1028 | | // luma prediction mode |
1029 | 200k | if (isFirst) |
1030 | 196k | { |
1031 | 196k | if ( !cu.Y().valid()) |
1032 | 0 | { |
1033 | 0 | m_CABACEstimator->pred_mode( cu ); |
1034 | 0 | } |
1035 | 196k | m_CABACEstimator->intra_luma_pred_mode( cu ); |
1036 | 196k | } |
1037 | 200k | } |
1038 | 301k | else // if (chroma) |
1039 | 301k | { |
1040 | 301k | bool isFirst = partitioner.currArea().Cb().valid() && partitioner.currArea().chromaPos() == cs.area.chromaPos(); |
1041 | | |
1042 | 301k | if( isFirst ) |
1043 | 301k | { |
1044 | 301k | m_CABACEstimator->bdpcm_mode(cu, ComponentID(CH_C)); |
1045 | 301k | m_CABACEstimator->intra_chroma_pred_mode( cu ); |
1046 | 301k | } |
1047 | 301k | } |
1048 | 502k | } |
1049 | | |
1050 | | void IntraSearch::xEncSubdivCbfQT( CodingStructure &cs, Partitioner &partitioner, const bool luma ) |
1051 | 502k | { |
1052 | 502k | const UnitArea& currArea = partitioner.currArea(); |
1053 | 502k | int subTuCounter = m_ispTestedModes[0].subTuCounter; |
1054 | 502k | TransformUnit &currTU = *cs.getTU(currArea.blocks[partitioner.chType], partitioner.chType, subTuCounter); |
1055 | 502k | CodingUnit &currCU = *currTU.cu; |
1056 | 502k | const uint32_t currDepth = partitioner.currTrDepth; |
1057 | 502k | const bool subdiv = currTU.depth > currDepth; |
1058 | 502k | ComponentID compID = partitioner.chType == CH_L ? COMP_Y : COMP_Cb; |
1059 | | |
1060 | 502k | if (!luma) |
1061 | 301k | { |
1062 | 301k | const bool chromaCbfISP = currArea.blocks[COMP_Cb].valid() && currCU.ispMode && !subdiv; |
1063 | 301k | if (!currCU.ispMode || chromaCbfISP) |
1064 | 301k | { |
1065 | 301k | const uint32_t numberValidComponents = getNumberValidComponents(currArea.chromaFormat); |
1066 | 301k | const uint32_t cbfDepth = (chromaCbfISP ? currDepth - 1 : currDepth); |
1067 | | |
1068 | 905k | for (uint32_t ch = COMP_Cb; ch < numberValidComponents; ch++) |
1069 | 603k | { |
1070 | 603k | const ComponentID compID = ComponentID(ch); |
1071 | 603k | if (currDepth == 0 || TU::getCbfAtDepth(currTU, compID, currDepth - 1) || chromaCbfISP) |
1072 | 603k | { |
1073 | 603k | const bool prevCbf = (compID == COMP_Cr ? TU::getCbfAtDepth(currTU, COMP_Cb, currDepth) : false); |
1074 | 603k | m_CABACEstimator->cbf_comp(currCU, TU::getCbfAtDepth(currTU, compID, currDepth), currArea.blocks[compID], cbfDepth, prevCbf); |
1075 | 603k | } |
1076 | 603k | } |
1077 | 301k | } |
1078 | 301k | } |
1079 | | |
1080 | 502k | if (subdiv) |
1081 | 0 | { |
1082 | 0 | if (partitioner.canSplit(TU_MAX_TR_SPLIT, cs)) |
1083 | 0 | { |
1084 | 0 | partitioner.splitCurrArea(TU_MAX_TR_SPLIT, cs); |
1085 | 0 | } |
1086 | 0 | else if (currCU.ispMode && isLuma(compID)) |
1087 | 0 | { |
1088 | 0 | partitioner.splitCurrArea(m_ispTestedModes[0].IspType, cs); |
1089 | 0 | } |
1090 | 0 | else |
1091 | 0 | THROW("Cannot perform an implicit split!"); |
1092 | | |
1093 | 0 | do |
1094 | 0 | { |
1095 | 0 | xEncSubdivCbfQT(cs, partitioner, luma); //? |
1096 | 0 | subTuCounter += subTuCounter != -1 ? 1 : 0; |
1097 | 0 | } while (partitioner.nextPart(cs)); |
1098 | |
|
1099 | 0 | partitioner.exitCurrSplit(); |
1100 | 0 | } |
1101 | 502k | else |
1102 | 502k | { |
1103 | | //===== Cbfs ===== |
1104 | 502k | if (luma) |
1105 | 200k | { |
1106 | 200k | bool previousCbf = false; |
1107 | 200k | bool lastCbfIsInferred = false; |
1108 | 200k | if (m_ispTestedModes[0].IspType != TU_NO_ISP) |
1109 | 15.1k | { |
1110 | 15.1k | bool rootCbfSoFar = false; |
1111 | 15.1k | uint32_t nTus = currCU.ispMode == HOR_INTRA_SUBPARTITIONS ? currCU.lheight() >> floorLog2(currTU.lheight()) |
1112 | 15.1k | : currCU.lwidth() >> floorLog2(currTU.lwidth()); |
1113 | 15.1k | if (subTuCounter == nTus - 1) |
1114 | 1.41k | { |
1115 | 1.41k | TransformUnit* tuPointer = currCU.firstTU; |
1116 | 5.66k | for (int tuIdx = 0; tuIdx < nTus - 1; tuIdx++) |
1117 | 4.24k | { |
1118 | 4.24k | rootCbfSoFar |= TU::getCbfAtDepth(*tuPointer, COMP_Y, currDepth); |
1119 | 4.24k | tuPointer = tuPointer->next; |
1120 | 4.24k | } |
1121 | 1.41k | if (!rootCbfSoFar) |
1122 | 0 | { |
1123 | 0 | lastCbfIsInferred = true; |
1124 | 0 | } |
1125 | 1.41k | } |
1126 | 15.1k | if (!lastCbfIsInferred) |
1127 | 15.1k | { |
1128 | 15.1k | previousCbf = TU::getPrevTuCbfAtDepth(currTU, COMP_Y, partitioner.currTrDepth); |
1129 | 15.1k | } |
1130 | 15.1k | } |
1131 | 200k | if (!lastCbfIsInferred) |
1132 | 200k | { |
1133 | 200k | m_CABACEstimator->cbf_comp(currCU, TU::getCbfAtDepth(currTU, COMP_Y, currDepth), currTU.Y(), currTU.depth, previousCbf, currCU.ispMode); |
1134 | 200k | } |
1135 | 200k | } |
1136 | 502k | } |
1137 | 502k | } |
1138 | | void IntraSearch::xEncCoeffQT(CodingStructure& cs, Partitioner& partitioner, const ComponentID compID, CUCtx* cuCtx, const int subTuIdx, const PartSplit ispType) |
1139 | 804k | { |
1140 | 804k | const UnitArea& currArea = partitioner.currArea(); |
1141 | | |
1142 | 804k | int subTuCounter = m_ispTestedModes[0].subTuCounter; |
1143 | 804k | TransformUnit& currTU = *cs.getTU(currArea.blocks[partitioner.chType], partitioner.chType, subTuCounter); |
1144 | 804k | uint32_t currDepth = partitioner.currTrDepth; |
1145 | 804k | const bool subdiv = currTU.depth > currDepth; |
1146 | | |
1147 | 804k | if (subdiv) |
1148 | 0 | { |
1149 | 0 | if (partitioner.canSplit(TU_MAX_TR_SPLIT, cs)) |
1150 | 0 | { |
1151 | 0 | partitioner.splitCurrArea(TU_MAX_TR_SPLIT, cs); |
1152 | 0 | } |
1153 | 0 | else if (currTU.cu->ispMode) |
1154 | 0 | { |
1155 | 0 | partitioner.splitCurrArea(m_ispTestedModes[0].IspType, cs); |
1156 | 0 | } |
1157 | 0 | else |
1158 | 0 | THROW("Implicit TU split not available!"); |
1159 | | |
1160 | 0 | do |
1161 | 0 | { |
1162 | 0 | xEncCoeffQT(cs, partitioner, compID, cuCtx, subTuCounter, m_ispTestedModes[0].IspType); |
1163 | 0 | subTuCounter += subTuCounter != -1 ? 1 : 0; |
1164 | 0 | } while( partitioner.nextPart( cs ) ); |
1165 | |
|
1166 | 0 | partitioner.exitCurrSplit(); |
1167 | 0 | } |
1168 | 804k | else |
1169 | | |
1170 | 804k | if( currArea.blocks[compID].valid() ) |
1171 | 804k | { |
1172 | 804k | if( compID == COMP_Cr ) |
1173 | 301k | { |
1174 | 301k | const int cbfMask = ( TU::getCbf( currTU, COMP_Cb ) ? 2 : 0 ) + ( TU::getCbf( currTU, COMP_Cr ) ? 1 : 0 ); |
1175 | 301k | m_CABACEstimator->joint_cb_cr( currTU, cbfMask ); |
1176 | 301k | } |
1177 | 804k | if( TU::getCbf( currTU, compID ) ) |
1178 | 243k | { |
1179 | 243k | if( isLuma(compID) ) |
1180 | 26.8k | { |
1181 | 26.8k | m_CABACEstimator->residual_coding( currTU, compID, cuCtx ); |
1182 | 26.8k | m_CABACEstimator->mts_idx( *currTU.cu, cuCtx ); |
1183 | 26.8k | } |
1184 | 216k | else |
1185 | 216k | m_CABACEstimator->residual_coding( currTU, compID ); |
1186 | 243k | } |
1187 | 804k | } |
1188 | 804k | } |
1189 | | |
1190 | | uint64_t IntraSearch::xGetIntraFracBitsQT( CodingStructure &cs, Partitioner &partitioner, const bool luma, CUCtx *cuCtx ) |
1191 | 502k | { |
1192 | 502k | m_CABACEstimator->resetBits(); |
1193 | | |
1194 | 502k | xEncIntraHeader( cs, partitioner, luma ); |
1195 | 502k | xEncSubdivCbfQT( cs, partitioner, luma ); |
1196 | | |
1197 | 502k | if( luma ) |
1198 | 200k | { |
1199 | 200k | xEncCoeffQT( cs, partitioner, COMP_Y, cuCtx ); |
1200 | | |
1201 | 200k | CodingUnit &cu = *cs.cus[0]; |
1202 | 200k | if (cuCtx /*&& CU::isSepTree(cu)*/ |
1203 | 126k | && (!cu.ispMode || (cu.lfnstIdx && m_ispTestedModes[0].subTuCounter == 0) |
1204 | 9.57k | || (!cu.lfnstIdx |
1205 | 8.16k | && m_ispTestedModes[0].subTuCounter == m_ispTestedModes[cu.lfnstIdx].numTotalParts[cu.ispMode - 1] - 1))) |
1206 | 117k | { |
1207 | 117k | m_CABACEstimator->residual_lfnst_mode( cu, *cuCtx ); |
1208 | 117k | } |
1209 | 200k | } |
1210 | 301k | else |
1211 | 301k | { |
1212 | 301k | xEncCoeffQT( cs, partitioner, COMP_Cb ); |
1213 | 301k | xEncCoeffQT( cs, partitioner, COMP_Cr ); |
1214 | 301k | } |
1215 | | |
1216 | 502k | uint64_t fracBits = m_CABACEstimator->getEstFracBits(); |
1217 | 502k | return fracBits; |
1218 | 502k | } |
1219 | | |
1220 | | uint64_t IntraSearch::xGetIntraFracBitsQTChroma(const TransformUnit& currTU, const ComponentID compID, CUCtx *cuCtx) |
1221 | 1.88M | { |
1222 | 1.88M | m_CABACEstimator->resetBits(); |
1223 | | |
1224 | 1.88M | if ( currTU.jointCbCr ) |
1225 | 279k | { |
1226 | 279k | const int cbfMask = ( TU::getCbf( currTU, COMP_Cb ) ? 2 : 0 ) + ( TU::getCbf( currTU, COMP_Cr ) ? 1 : 0 ); |
1227 | 279k | m_CABACEstimator->cbf_comp( *currTU.cu, cbfMask>>1, currTU.blocks[ COMP_Cb ], currTU.depth, false ); |
1228 | 279k | m_CABACEstimator->cbf_comp( *currTU.cu, cbfMask &1, currTU.blocks[ COMP_Cr ], currTU.depth, cbfMask>>1 ); |
1229 | 279k | if( cbfMask ) |
1230 | 279k | m_CABACEstimator->joint_cb_cr( currTU, cbfMask ); |
1231 | 279k | if (cbfMask >> 1) |
1232 | 278k | m_CABACEstimator->residual_coding( currTU, COMP_Cb, cuCtx ); |
1233 | 279k | if (cbfMask & 1) |
1234 | 279k | m_CABACEstimator->residual_coding( currTU, COMP_Cr, cuCtx ); |
1235 | 279k | } |
1236 | 1.60M | else |
1237 | 1.60M | { |
1238 | 1.60M | if ( compID == COMP_Cb ) |
1239 | 802k | m_CABACEstimator->cbf_comp( *currTU.cu, TU::getCbf( currTU, compID ), currTU.blocks[ compID ], currTU.depth, false ); |
1240 | 802k | else |
1241 | 802k | { |
1242 | 802k | const bool cbCbf = TU::getCbf( currTU, COMP_Cb ); |
1243 | 802k | const bool crCbf = TU::getCbf( currTU, compID ); |
1244 | 802k | const int cbfMask = ( cbCbf ? 2 : 0 ) + ( crCbf ? 1 : 0 ); |
1245 | 802k | m_CABACEstimator->cbf_comp( *currTU.cu, crCbf, currTU.blocks[ compID ], currTU.depth, cbCbf ); |
1246 | 802k | m_CABACEstimator->joint_cb_cr( currTU, cbfMask ); |
1247 | 802k | } |
1248 | 1.60M | } |
1249 | | |
1250 | 1.88M | if( !currTU.jointCbCr && TU::getCbf( currTU, compID ) ) |
1251 | 563k | { |
1252 | 563k | m_CABACEstimator->residual_coding( currTU, compID, cuCtx ); |
1253 | 563k | } |
1254 | | |
1255 | 1.88M | uint64_t fracBits = m_CABACEstimator->getEstFracBits(); |
1256 | 1.88M | return fracBits; |
1257 | 1.88M | } |
1258 | | |
1259 | | void IntraSearch::xIntraCodingTUBlock(TransformUnit &tu, const ComponentID compID, const bool checkCrossCPrediction, Distortion &ruiDist, uint32_t *numSig, PelUnitBuf *predBuf, const bool loadTr) |
1260 | 2.09M | { |
1261 | 2.09M | if (!tu.blocks[compID].valid()) |
1262 | 0 | { |
1263 | 0 | return; |
1264 | 0 | } |
1265 | | |
1266 | 2.09M | CodingStructure &cs = *tu.cs; |
1267 | 2.09M | const CompArea &area = tu.blocks[compID]; |
1268 | 2.09M | const SPS &sps = *cs.sps; |
1269 | 2.09M | const ReshapeData& reshapeData = cs.picture->reshapeData; |
1270 | | |
1271 | 2.09M | const ChannelType chType = toChannelType(compID); |
1272 | 2.09M | const int bitDepth = sps.bitDepths[chType]; |
1273 | | |
1274 | 2.09M | CPelBuf piOrg = cs.getOrgBuf (area); |
1275 | 2.09M | PelBuf piPred = cs.getPredBuf (area); |
1276 | 2.09M | PelBuf piResi = cs.getResiBuf (area); |
1277 | 2.09M | PelBuf piReco = cs.getRecoBuf (area); |
1278 | | |
1279 | 2.09M | const CodingUnit& cu = *tu.cu; |
1280 | | |
1281 | | //===== init availability pattern ===== |
1282 | 2.09M | CHECK( tu.jointCbCr && compID == COMP_Cr, "wrong combination of compID and jointCbCr" ); |
1283 | 2.09M | bool jointCbCr = tu.jointCbCr && compID == COMP_Cb; |
1284 | | |
1285 | 2.09M | if ( isLuma(compID) ) |
1286 | 206k | { |
1287 | 206k | bool predRegDiffFromTB = CU::isPredRegDiffFromTB(*tu.cu ); |
1288 | 206k | bool firstTBInPredReg = false; |
1289 | 206k | CompArea areaPredReg(COMP_Y, tu.chromaFormat, area); |
1290 | 206k | if (tu.cu->ispMode ) |
1291 | 20.7k | { |
1292 | 20.7k | firstTBInPredReg = CU::isFirstTBInPredReg(*tu.cu, area); |
1293 | 20.7k | if (predRegDiffFromTB) |
1294 | 0 | { |
1295 | 0 | if (firstTBInPredReg) |
1296 | 0 | { |
1297 | 0 | CU::adjustPredArea(areaPredReg); |
1298 | 0 | initIntraPatternChTypeISP(*tu.cu, areaPredReg, piReco); |
1299 | 0 | } |
1300 | 0 | } |
1301 | 20.7k | else |
1302 | 20.7k | initIntraPatternChTypeISP(*tu.cu, area, piReco); |
1303 | 20.7k | } |
1304 | 185k | else if( !predBuf ) |
1305 | 31.6k | { |
1306 | 31.6k | initIntraPatternChType(*tu.cu, area); |
1307 | 31.6k | } |
1308 | | |
1309 | | //===== get prediction signal ===== |
1310 | 206k | if (predRegDiffFromTB) |
1311 | 0 | { |
1312 | 0 | if (firstTBInPredReg) |
1313 | 0 | { |
1314 | 0 | PelBuf piPredReg = cs.getPredBuf(areaPredReg); |
1315 | 0 | predIntraAng(compID, piPredReg, cu); |
1316 | 0 | } |
1317 | 0 | } |
1318 | 206k | else |
1319 | 206k | { |
1320 | 206k | if( predBuf ) |
1321 | 154k | { |
1322 | 154k | piPred.copyFrom( predBuf->Y() ); |
1323 | 154k | } |
1324 | 52.3k | else if( CU::isMIP( cu, CH_L ) ) |
1325 | 23.7k | { |
1326 | 23.7k | initIntraMip( cu ); |
1327 | 23.7k | predIntraMip( piPred, cu ); |
1328 | 23.7k | } |
1329 | 28.6k | else |
1330 | 28.6k | { |
1331 | 28.6k | predIntraAng(compID, piPred, cu); |
1332 | 28.6k | } |
1333 | 206k | } |
1334 | 206k | } |
1335 | 2.09M | DTRACE( g_trace_ctx, D_PRED, "@(%4d,%4d) [%2dx%2d] IMode=%d\n", tu.lx(), tu.ly(), tu.lwidth(), tu.lheight(), CU::getFinalIntraMode(cu, chType) ); |
1336 | 2.09M | const Slice &slice = *cs.slice; |
1337 | 2.09M | bool flag = cs.picHeader->lmcsEnabled && (slice.isIntra() || (!slice.isIntra() && reshapeData.getCTUFlag())); |
1338 | | |
1339 | 2.09M | if (isLuma(compID)) |
1340 | 206k | { |
1341 | | //===== get residual signal ===== |
1342 | 206k | if (cs.picHeader->lmcsEnabled && reshapeData.getCTUFlag() ) |
1343 | 0 | { |
1344 | 0 | piResi.subtract(cs.getRspOrgBuf(area), piPred); |
1345 | 0 | } |
1346 | 206k | else |
1347 | 206k | { |
1348 | 206k | piResi.subtract( piOrg, piPred ); |
1349 | 206k | } |
1350 | 206k | } |
1351 | | |
1352 | | //===== transform and quantization ===== |
1353 | | //--- init rate estimation arrays for RDOQ --- |
1354 | | //--- transform and quantization --- |
1355 | 2.09M | TCoeff uiAbsSum = 0; |
1356 | 2.09M | const QpParam cQP(tu, compID); |
1357 | | |
1358 | 2.09M | m_pcTrQuant->selectLambda(compID); |
1359 | | |
1360 | 2.09M | flag =flag && (tu.blocks[compID].width*tu.blocks[compID].height > 4); |
1361 | 2.09M | if (flag && isChroma(compID) && cs.picHeader->lmcsChromaResidualScale ) |
1362 | 0 | { |
1363 | 0 | int cResScaleInv = tu.chromaAdj; |
1364 | 0 | double cRescale = (double)(1 << CSCALE_FP_PREC) / (double)cResScaleInv; |
1365 | 0 | m_pcTrQuant->scaleLambda( 1.0/(cRescale*cRescale) ); |
1366 | 0 | } |
1367 | | |
1368 | 2.09M | if ( jointCbCr ) |
1369 | 283k | { |
1370 | | // Lambda is loosened for the joint mode with respect to single modes as the same residual is used for both chroma blocks |
1371 | 283k | const int absIct = abs( TU::getICTMode(tu) ); |
1372 | 283k | const double lfact = ( absIct == 1 || absIct == 3 ? 0.8 : 0.5 ); |
1373 | 283k | m_pcTrQuant->scaleLambda( lfact ); |
1374 | 283k | } |
1375 | 2.09M | if ( sps.jointCbCr && isChroma(compID) && (tu.cu->cs->slice->sliceQp > 18) ) |
1376 | 1.26M | { |
1377 | 1.26M | m_pcTrQuant->scaleLambda( 1.3 ); |
1378 | 1.26M | } |
1379 | | |
1380 | 2.09M | if( isLuma(compID) ) |
1381 | 206k | { |
1382 | 206k | m_pcTrQuant->transformNxN(tu, compID, cQP, uiAbsSum, m_CABACEstimator->getCtx(), loadTr); |
1383 | | |
1384 | 206k | DTRACE( g_trace_ctx, D_TU_ABS_SUM, "%d: comp=%d, abssum=%d\n", DTRACE_GET_COUNTER( g_trace_ctx, D_TU_ABS_SUM ), compID, uiAbsSum ); |
1385 | 206k | if (tu.cu->ispMode && isLuma(compID) && CU::isISPLast(*tu.cu, area, area.compID) && CU::allLumaCBFsAreZero(*tu.cu)) |
1386 | 0 | { |
1387 | | // ISP has to have at least one non-zero CBF |
1388 | 0 | ruiDist = MAX_INT; |
1389 | 0 | return; |
1390 | 0 | } |
1391 | | //--- inverse transform --- |
1392 | 206k | if (uiAbsSum > 0) |
1393 | 32.4k | { |
1394 | 32.4k | m_pcTrQuant->invTransformNxN(tu, compID, piResi, cQP); |
1395 | 32.4k | } |
1396 | 174k | else |
1397 | 174k | { |
1398 | 174k | piResi.fill(0); |
1399 | 174k | } |
1400 | 206k | } |
1401 | 1.88M | else // chroma |
1402 | 1.88M | { |
1403 | 1.88M | PelBuf crPred = cs.getPredBuf ( COMP_Cr ); |
1404 | 1.88M | PelBuf crResi = cs.getResiBuf ( COMP_Cr ); |
1405 | 1.88M | PelBuf crReco = cs.getRecoBuf ( COMP_Cr ); |
1406 | | |
1407 | 1.88M | int codedCbfMask = 0; |
1408 | 1.88M | ComponentID codeCompId = (tu.jointCbCr ? (tu.jointCbCr >> 1 ? COMP_Cb : COMP_Cr) : compID); |
1409 | 1.88M | const QpParam qpCbCr(tu, codeCompId); |
1410 | | |
1411 | 1.88M | if( tu.jointCbCr ) |
1412 | 283k | { |
1413 | 283k | ComponentID otherCompId = ( codeCompId==COMP_Cr ? COMP_Cb : COMP_Cr ); |
1414 | 283k | tu.getCoeffs( otherCompId ).fill(0); // do we need that? |
1415 | 283k | TU::setCbfAtDepth (tu, otherCompId, tu.depth, false ); |
1416 | 283k | } |
1417 | 1.88M | PelBuf& codeResi = ( codeCompId == COMP_Cr ? crResi : piResi ); |
1418 | 1.88M | uiAbsSum = 0; |
1419 | 1.88M | m_pcTrQuant->transformNxN(tu, codeCompId, qpCbCr, uiAbsSum, m_CABACEstimator->getCtx(), loadTr); |
1420 | 1.88M | DTRACE( g_trace_ctx, D_TU_ABS_SUM, "%d: comp=%d, abssum=%d\n", DTRACE_GET_COUNTER( g_trace_ctx, D_TU_ABS_SUM ), codeCompId, uiAbsSum ); |
1421 | 1.88M | if( uiAbsSum > 0 ) |
1422 | 842k | { |
1423 | 842k | m_pcTrQuant->invTransformNxN(tu, codeCompId, codeResi, qpCbCr); |
1424 | 842k | codedCbfMask += ( codeCompId == COMP_Cb ? 2 : 1 ); |
1425 | 842k | } |
1426 | 1.04M | else |
1427 | 1.04M | { |
1428 | 1.04M | codeResi.fill(0); |
1429 | 1.04M | } |
1430 | | |
1431 | 1.88M | if( tu.jointCbCr ) |
1432 | 283k | { |
1433 | 283k | if( tu.jointCbCr == 3 && codedCbfMask == 2 ) |
1434 | 278k | { |
1435 | 278k | codedCbfMask = 3; |
1436 | 278k | TU::setCbfAtDepth (tu, COMP_Cr, tu.depth, true ); |
1437 | 278k | } |
1438 | 283k | if( tu.jointCbCr != codedCbfMask ) |
1439 | 3.55k | { |
1440 | 3.55k | ruiDist = MAX_DISTORTION; |
1441 | 3.55k | return; |
1442 | 3.55k | } |
1443 | 279k | m_pcTrQuant->invTransformICT( tu, piResi, crResi ); |
1444 | 279k | uiAbsSum = codedCbfMask; |
1445 | 279k | } |
1446 | | |
1447 | | //===== reconstruction ===== |
1448 | 1.88M | if ( flag && uiAbsSum > 0 && cs.picHeader->lmcsChromaResidualScale ) |
1449 | 0 | { |
1450 | 0 | piResi.scaleSignal(tu.chromaAdj, 0, slice.clpRngs[compID]); |
1451 | |
|
1452 | 0 | if( jointCbCr ) |
1453 | 0 | { |
1454 | 0 | crResi.scaleSignal(tu.chromaAdj, 0, slice.clpRngs[COMP_Cr]); |
1455 | 0 | } |
1456 | 0 | } |
1457 | | |
1458 | 1.88M | if( jointCbCr ) |
1459 | 279k | { |
1460 | 279k | crReco.reconstruct(crPred, crResi, cs.slice->clpRngs[ COMP_Cr ]); |
1461 | 279k | } |
1462 | 1.88M | } |
1463 | 2.09M | piReco.reconstruct(piPred, piResi, cs.slice->clpRngs[ compID ]); |
1464 | | |
1465 | | |
1466 | | |
1467 | | //===== update distortion ===== |
1468 | 2.09M | const bool reshapeIntraCMD = m_pcEncCfg->m_reshapeSignalType == RESHAPE_SIGNAL_PQ; |
1469 | 2.09M | if(((cs.picHeader->lmcsEnabled && (reshapeData.getCTUFlag() || (isChroma(compID) && reshapeIntraCMD))) || m_pcEncCfg->m_lumaLevelToDeltaQPEnabled ) ) |
1470 | 0 | { |
1471 | 0 | const CPelBuf orgLuma = cs.getOrgBuf( cs.area.blocks[COMP_Y] ); |
1472 | 0 | if( compID == COMP_Y && !m_pcEncCfg->m_lumaLevelToDeltaQPEnabled ) |
1473 | 0 | { |
1474 | 0 | PelBuf tmpRecLuma = cs.getRspRecoBuf(area); |
1475 | 0 | tmpRecLuma.rspSignal( piReco, reshapeData.getInvLUT()); |
1476 | 0 | ruiDist += m_pcRdCost->getDistPart(piOrg, tmpRecLuma, sps.bitDepths[toChannelType(compID)], compID, DF_SSE_WTD, &orgLuma); |
1477 | 0 | } |
1478 | 0 | else |
1479 | 0 | { |
1480 | 0 | ruiDist += m_pcRdCost->getDistPart( piOrg, piReco, bitDepth, compID, DF_SSE_WTD, &orgLuma ); |
1481 | 0 | if( jointCbCr ) |
1482 | 0 | { |
1483 | 0 | CPelBuf crOrg = cs.getOrgBuf ( COMP_Cr ); |
1484 | 0 | PelBuf crReco = cs.getRecoBuf ( COMP_Cr ); |
1485 | 0 | ruiDist += m_pcRdCost->getDistPart( crOrg, crReco, bitDepth, COMP_Cr, DF_SSE_WTD, &orgLuma ); |
1486 | 0 | } |
1487 | 0 | } |
1488 | 0 | } |
1489 | 2.09M | else |
1490 | 2.09M | { |
1491 | 2.09M | ruiDist += m_pcRdCost->getDistPart( piOrg, piReco, bitDepth, compID, DF_SSE ); |
1492 | 2.09M | if( jointCbCr ) |
1493 | 279k | { |
1494 | 279k | CPelBuf crOrg = cs.getOrgBuf ( COMP_Cr ); |
1495 | 279k | PelBuf crReco = cs.getRecoBuf ( COMP_Cr ); |
1496 | 279k | ruiDist += m_pcRdCost->getDistPart( crOrg, crReco, bitDepth, COMP_Cr, DF_SSE ); |
1497 | 279k | } |
1498 | 2.09M | } |
1499 | 2.09M | } |
1500 | | |
1501 | | void IntraSearch::xIntraCodingLumaQT(CodingStructure& cs, Partitioner& partitioner, PelUnitBuf* predBuf, const double bestCostSoFar, int numMode, bool disableMTS) |
1502 | 124k | { |
1503 | 124k | PROFILER_SCOPE_AND_STAGE_EXT( 0, _TPROF, P_INTRA_RD_SEARCH_LUMA, &cs, partitioner.chType ); |
1504 | 124k | const UnitArea& currArea = partitioner.currArea(); |
1505 | 124k | uint32_t currDepth = partitioner.currTrDepth; |
1506 | 124k | Distortion singleDistLuma = 0; |
1507 | 124k | uint32_t numSig = 0; |
1508 | 124k | const SPS &sps = *cs.sps; |
1509 | 124k | CodingUnit &cu = *cs.cus[0]; |
1510 | 124k | bool mtsAllowed = (numMode < 0) || disableMTS ? false : CU::isMTSAllowed(cu, COMP_Y); |
1511 | 124k | uint64_t singleFracBits = 0; |
1512 | 124k | bool splitCbfLumaSum = false; |
1513 | 124k | double bestCostForISP = bestCostSoFar; |
1514 | 124k | double dSingleCost = MAX_DOUBLE; |
1515 | 124k | int endLfnstIdx = (partitioner.isSepTree(cs) && partitioner.chType == CH_C && (currArea.lwidth() < 8 || currArea.lheight() < 8)) |
1516 | 124k | || (currArea.lwidth() > sps.getMaxTbSize() || currArea.lheight() > sps.getMaxTbSize()) || !sps.LFNST || (numMode < 0) ? 0 : 2; |
1517 | 124k | const bool useTS = cs.picture->useTS; |
1518 | 124k | numMode = (numMode < 0) ? -numMode : numMode; |
1519 | | |
1520 | 124k | if (cu.mipFlag && !allowLfnstWithMip(cu.lumaSize())) |
1521 | 2.08k | { |
1522 | 2.08k | endLfnstIdx = 0; |
1523 | 2.08k | } |
1524 | 124k | int bestMTS = 0; |
1525 | 124k | int EndMTS = mtsAllowed ? m_pcEncCfg->m_MTSIntraMaxCand : 0; |
1526 | 124k | if (cu.ispMode && (EndMTS || endLfnstIdx)) |
1527 | 5.63k | { |
1528 | 5.63k | EndMTS = 0; |
1529 | 5.63k | if ((m_ispTestedModes[1].numTotalParts[cu.ispMode - 1] == 0) |
1530 | 302 | && (m_ispTestedModes[2].numTotalParts[cu.ispMode - 1] == 0)) |
1531 | 302 | { |
1532 | 302 | endLfnstIdx = 0; |
1533 | 302 | } |
1534 | 5.63k | } |
1535 | 124k | if (cu.bdpcmM[CH_L]) |
1536 | 7.68k | { |
1537 | 7.68k | endLfnstIdx = 0; |
1538 | 7.68k | EndMTS = 0; |
1539 | 7.68k | } |
1540 | 124k | bool checkTransformSkip = sps.transformSkip; |
1541 | | |
1542 | 124k | SizeType transformSkipMaxSize = 1 << sps.log2MaxTransformSkipBlockSize; |
1543 | 124k | bool tsAllowed = useTS && cu.cs->sps->transformSkip && (!cu.ispMode) && (!cu.bdpcmM[CH_L]) && (!cu.sbtInfo); |
1544 | 124k | tsAllowed &= cu.blocks[COMP_Y].width <= transformSkipMaxSize && cu.blocks[COMP_Y].height <= transformSkipMaxSize; |
1545 | 124k | if (tsAllowed) |
1546 | 15.4k | { |
1547 | 15.4k | EndMTS += 1; |
1548 | 15.4k | } |
1549 | 124k | if (endLfnstIdx || EndMTS) |
1550 | 49.8k | { |
1551 | 49.8k | bool splitCbfLuma = false; |
1552 | 49.8k | const PartSplit ispType = CU::getISPType(cu, COMP_Y); |
1553 | 49.8k | CUCtx cuCtx; |
1554 | 49.8k | cuCtx.isDQPCoded = true; |
1555 | 49.8k | cuCtx.isChromaQpAdjCoded = true; |
1556 | 49.8k | cs.cost = 0.0; |
1557 | 49.8k | Distortion singleDistTmpLuma = 0; |
1558 | 49.8k | uint64_t singleTmpFracBits = 0; |
1559 | 49.8k | double singleCostTmp = 0; |
1560 | 49.8k | const TempCtx ctxStart (m_CtxCache, m_CABACEstimator->getCtx()); |
1561 | 49.8k | TempCtx ctxBest (m_CtxCache); |
1562 | 49.8k | CodingStructure &saveCS = *m_pSaveCS[cu.ispMode?0:1]; |
1563 | 49.8k | TransformUnit * tmpTU = nullptr; |
1564 | 49.8k | int bestLfnstIdx = 0; |
1565 | 49.8k | int startLfnstIdx = 0; |
1566 | | // speedUps LFNST |
1567 | 49.8k | bool rapidLFNST = false; |
1568 | 49.8k | bool rapidDCT = false; |
1569 | 49.8k | double thresholdDCT = 1; |
1570 | | |
1571 | 49.8k | if (m_pcEncCfg->m_MTS == 2) |
1572 | 0 | { |
1573 | 0 | thresholdDCT += 1.4 / sqrt(cu.lwidth() * cu.lheight()); |
1574 | 0 | } |
1575 | | |
1576 | 49.8k | if (m_pcEncCfg->m_LFNST > 1) |
1577 | 0 | { |
1578 | 0 | rapidLFNST = true; |
1579 | |
|
1580 | 0 | if (m_pcEncCfg->m_LFNST > 2) |
1581 | 0 | { |
1582 | 0 | rapidDCT = true; |
1583 | 0 | endLfnstIdx = endLfnstIdx ? 1 : 0; |
1584 | 0 | } |
1585 | 0 | } |
1586 | | |
1587 | 49.8k | saveCS.pcv = cs.pcv; |
1588 | 49.8k | saveCS.picture = cs.picture; |
1589 | 49.8k | saveCS.area.repositionTo( cs.area); |
1590 | | |
1591 | 49.8k | if (cu.ispMode) |
1592 | 5.33k | { |
1593 | 5.33k | partitioner.splitCurrArea(ispType, cs); |
1594 | 5.33k | } |
1595 | | |
1596 | 49.8k | TransformUnit& tu = cs.addTU(CS::getArea(cs, partitioner.currArea(), partitioner.chType, partitioner.treeType), partitioner.chType, cs.cus[0]); |
1597 | | |
1598 | 49.8k | if (cu.ispMode) |
1599 | 5.33k | { |
1600 | 5.33k | saveCS.clearTUs(); |
1601 | 5.33k | do |
1602 | 21.3k | { |
1603 | 21.3k | saveCS.addTU( |
1604 | 21.3k | CS::getArea(cs, partitioner.currArea(), partitioner.chType, partitioner.treeType), |
1605 | 21.3k | partitioner.chType, cs.cus[0]); |
1606 | 21.3k | } while (partitioner.nextPart(cs)); |
1607 | | |
1608 | 5.33k | partitioner.exitCurrSplit(); |
1609 | 5.33k | } |
1610 | 44.5k | else |
1611 | 44.5k | { |
1612 | 44.5k | tmpTU = saveCS.tus.empty() ? &saveCS.addTU( currArea, partitioner.chType, nullptr ) : saveCS.tus.front(); |
1613 | 44.5k | tmpTU->initData(); |
1614 | 44.5k | tmpTU->UnitArea::operator=( currArea ); |
1615 | 44.5k | } |
1616 | | |
1617 | | |
1618 | 49.8k | std::vector<TrMode> trModes{ TrMode(0, true) }; |
1619 | 49.8k | if (tsAllowed) |
1620 | 15.4k | { |
1621 | 15.4k | trModes.push_back(TrMode(1, true)); |
1622 | 15.4k | } |
1623 | 49.8k | double dct2Cost = MAX_DOUBLE; |
1624 | 49.8k | double trGrpStopThreshold = 1.001; |
1625 | 49.8k | double trGrpBestCost = MAX_DOUBLE; |
1626 | | |
1627 | 49.8k | if (mtsAllowed) |
1628 | 0 | { |
1629 | 0 | if (m_pcEncCfg->m_LFNST) |
1630 | 0 | { |
1631 | 0 | uint32_t uiIntraMode = cs.cus[0]->intraDir[partitioner.chType]; |
1632 | 0 | int MTScur = (uiIntraMode < 34) ? MTS_DST7_DCT8 : MTS_DCT8_DST7; |
1633 | |
|
1634 | 0 | trModes.push_back(TrMode( 2, true)); |
1635 | 0 | trModes.push_back(TrMode(MTScur, true)); |
1636 | |
|
1637 | 0 | MTScur = (uiIntraMode < 34) ? MTS_DCT8_DST7 : MTS_DST7_DCT8; |
1638 | |
|
1639 | 0 | trModes.push_back(TrMode(MTScur, true)); |
1640 | 0 | trModes.push_back(TrMode(MTS_DST7_DST7 + 3, true)); |
1641 | 0 | } |
1642 | 0 | else |
1643 | 0 | { |
1644 | 0 | for (int i = 2; i < 6; i++) |
1645 | 0 | { |
1646 | 0 | trModes.push_back(TrMode(i, true)); |
1647 | 0 | } |
1648 | 0 | } |
1649 | 0 | } |
1650 | | |
1651 | 49.8k | if ((EndMTS && !m_pcEncCfg->m_LFNST) || (tsAllowed && !mtsAllowed)) |
1652 | 15.4k | { |
1653 | 15.4k | xPreCheckMTS(tu, &trModes, m_pcEncCfg->m_MTSIntraMaxCand, predBuf); |
1654 | 15.4k | if (!mtsAllowed && !trModes[1].second) |
1655 | 2.89k | { |
1656 | 2.89k | EndMTS = 0; |
1657 | 2.89k | } |
1658 | 15.4k | } |
1659 | | |
1660 | 49.8k | bool NStopMTS = true; |
1661 | | |
1662 | 99.7k | for (int modeId = 0; modeId <= EndMTS && NStopMTS; modeId++) |
1663 | 49.8k | { |
1664 | 49.8k | if (modeId > 1) |
1665 | 0 | { |
1666 | 0 | trGrpBestCost = MAX_DOUBLE; |
1667 | 0 | } |
1668 | 177k | for (int lfnstIdx = startLfnstIdx; lfnstIdx <= endLfnstIdx; lfnstIdx++) |
1669 | 127k | { |
1670 | 127k | if (lfnstIdx && modeId) |
1671 | 0 | { |
1672 | 0 | continue; |
1673 | 0 | } |
1674 | 127k | if (mtsAllowed || tsAllowed) |
1675 | 24.2k | { |
1676 | 24.2k | if (m_pcEncCfg->m_TS && bestMTS == MTS_SKIP) |
1677 | 0 | { |
1678 | 0 | break; |
1679 | 0 | } |
1680 | 24.2k | if (!m_pcEncCfg->m_LFNST && !trModes[modeId].second && mtsAllowed) |
1681 | 0 | { |
1682 | 0 | continue; |
1683 | 0 | } |
1684 | | |
1685 | 24.2k | tu.mtsIdx[COMP_Y] = trModes[modeId].first; |
1686 | 24.2k | } |
1687 | | |
1688 | 127k | if (cu.ispMode && lfnstIdx) |
1689 | 10.6k | { |
1690 | 10.6k | if (m_ispTestedModes[lfnstIdx].numTotalParts[cu.ispMode - 1] == 0) |
1691 | 0 | { |
1692 | 0 | if (lfnstIdx == 2) |
1693 | 0 | { |
1694 | 0 | endLfnstIdx = 1; |
1695 | 0 | } |
1696 | 0 | continue; |
1697 | 0 | } |
1698 | 10.6k | } |
1699 | | |
1700 | 127k | cu.lfnstIdx = lfnstIdx; |
1701 | 127k | cuCtx.lfnstLastScanPos = false; |
1702 | 127k | cuCtx.violatesLfnstConstrained[CH_L] = false; |
1703 | 127k | cuCtx.violatesLfnstConstrained[CH_C] = false; |
1704 | | |
1705 | 127k | if ((lfnstIdx != startLfnstIdx) || (modeId)) |
1706 | 77.5k | { |
1707 | 77.5k | m_CABACEstimator->getCtx() = ctxStart; |
1708 | 77.5k | } |
1709 | | |
1710 | 127k | singleDistTmpLuma = 0; |
1711 | | |
1712 | 127k | if (cu.ispMode) |
1713 | 15.9k | { |
1714 | 15.9k | splitCbfLuma = false; |
1715 | | |
1716 | 15.9k | partitioner.splitCurrArea(ispType, cs); |
1717 | | |
1718 | 15.9k | singleCostTmp = xTestISP(cs, partitioner, bestCostForISP, ispType, splitCbfLuma, singleTmpFracBits, singleDistTmpLuma, cuCtx); |
1719 | | |
1720 | 15.9k | partitioner.exitCurrSplit(); |
1721 | | |
1722 | 15.9k | if (modeId && (singleCostTmp == MAX_DOUBLE)) |
1723 | 0 | { |
1724 | 0 | m_ispTestedModes[lfnstIdx].numTotalParts[cu.ispMode - 1] = 0; |
1725 | 0 | } |
1726 | | |
1727 | 15.9k | bool storeCost = (numMode == 1) ? true : false; |
1728 | | |
1729 | 15.9k | if ((m_pcEncCfg->m_ISP >= 2) && (numMode <= 1)) |
1730 | 15.9k | { |
1731 | 15.9k | storeCost = true; |
1732 | 15.9k | } |
1733 | | |
1734 | 15.9k | if (storeCost) |
1735 | 15.9k | { |
1736 | 15.9k | m_ispTestedModes[0].bestCost[cu.ispMode - 1] = singleCostTmp; |
1737 | 15.9k | } |
1738 | 15.9k | } |
1739 | 111k | else |
1740 | 111k | { |
1741 | 111k | bool TrLoad = (EndMTS && !m_pcEncCfg->m_LFNST) || (tsAllowed && !mtsAllowed && (lfnstIdx == 0)) ? true : false; |
1742 | | |
1743 | 111k | xIntraCodingTUBlock(tu, COMP_Y, false, singleDistTmpLuma, &numSig, predBuf, TrLoad); |
1744 | | |
1745 | 111k | cuCtx.mtsLastScanPos = false; |
1746 | | //----- determine rate and r-d cost ----- |
1747 | 111k | if ((sps.LFNST ? (modeId == EndMTS && modeId != 0 && checkTransformSkip) : (trModes[modeId].first != 0)) && !TU::getCbfAtDepth(tu, COMP_Y, currDepth)) |
1748 | 0 | { |
1749 | 0 | singleCostTmp = MAX_DOUBLE; |
1750 | 0 | } |
1751 | 111k | else |
1752 | 111k | { |
1753 | 111k | m_ispTestedModes[0].IspType = TU_NO_ISP; |
1754 | 111k | m_ispTestedModes[0].subTuCounter = -1; |
1755 | 111k | singleTmpFracBits = xGetIntraFracBitsQT(cs, partitioner, true, &cuCtx); |
1756 | | |
1757 | 111k | if (tu.mtsIdx[COMP_Y] > MTS_SKIP) |
1758 | 0 | { |
1759 | 0 | if (!cuCtx.mtsLastScanPos) |
1760 | 0 | { |
1761 | 0 | singleCostTmp = MAX_DOUBLE; |
1762 | 0 | } |
1763 | 0 | else |
1764 | 0 | { |
1765 | 0 | singleCostTmp = m_pcRdCost->calcRdCost(singleTmpFracBits, singleDistTmpLuma); |
1766 | 0 | } |
1767 | 0 | } |
1768 | 111k | else |
1769 | 111k | { |
1770 | 111k | singleCostTmp = m_pcRdCost->calcRdCost(singleTmpFracBits, singleDistTmpLuma); |
1771 | 111k | } |
1772 | 111k | } |
1773 | | |
1774 | 111k | if (((EndMTS && (m_pcEncCfg->m_MTS == 2)) || rapidLFNST) && modeId == 0 && lfnstIdx == 0) |
1775 | 0 | { |
1776 | 0 | if (singleCostTmp > bestCostSoFar * thresholdDCT) |
1777 | 0 | { |
1778 | 0 | EndMTS = 0; |
1779 | |
|
1780 | 0 | if (rapidDCT) |
1781 | 0 | { |
1782 | 0 | endLfnstIdx = 0; // break the loop but do not cpy best |
1783 | 0 | } |
1784 | 0 | } |
1785 | 0 | } |
1786 | | |
1787 | 111k | if (lfnstIdx && !cuCtx.lfnstLastScanPos && !cu.ispMode) |
1788 | 56.1k | { |
1789 | 56.1k | bool rootCbfL = false; |
1790 | | |
1791 | 224k | for (uint32_t t = 0; t < getNumberValidTBlocks(*cu.cs->pcv); t++) |
1792 | 168k | { |
1793 | 168k | rootCbfL |= tu.cbf[t] != 0; |
1794 | 168k | } |
1795 | | |
1796 | 56.1k | if (rapidLFNST && !rootCbfL) |
1797 | 0 | { |
1798 | 0 | endLfnstIdx = lfnstIdx; // break the loop |
1799 | 0 | } |
1800 | 56.1k | bool cbfAtZeroDepth = CU::isSepTree(cu) |
1801 | 56.1k | ? rootCbfL |
1802 | 56.1k | : (cs.area.chromaFormat != CHROMA_400 && std::min(cu.firstTU->blocks[1].width, cu.firstTU->blocks[1].height) < 4) |
1803 | 1 | ? TU::getCbfAtDepth(tu, COMP_Y, currDepth) |
1804 | 1 | : rootCbfL; |
1805 | | |
1806 | 56.1k | if (cbfAtZeroDepth) |
1807 | 408 | { |
1808 | 408 | singleCostTmp = MAX_DOUBLE; |
1809 | 408 | } |
1810 | 56.1k | } |
1811 | 111k | } |
1812 | | |
1813 | 127k | if (singleCostTmp < dSingleCost) |
1814 | 45.9k | { |
1815 | 45.9k | trGrpBestCost = singleCostTmp; |
1816 | 45.9k | dSingleCost = singleCostTmp; |
1817 | 45.9k | singleDistLuma = singleDistTmpLuma; |
1818 | 45.9k | singleFracBits = singleTmpFracBits; |
1819 | 45.9k | bestLfnstIdx = lfnstIdx; |
1820 | 45.9k | bestMTS = modeId; |
1821 | | |
1822 | 45.9k | if (dSingleCost < bestCostForISP) |
1823 | 29.1k | { |
1824 | 29.1k | bestCostForISP = dSingleCost; |
1825 | 29.1k | } |
1826 | | |
1827 | 45.9k | splitCbfLumaSum = splitCbfLuma; |
1828 | | |
1829 | 45.9k | if (lfnstIdx == 0 && modeId == 0 && cu.ispMode == 0) |
1830 | 44.5k | { |
1831 | 44.5k | dct2Cost = singleCostTmp; |
1832 | | |
1833 | 44.5k | if (!TU::getCbfAtDepth(tu, COMP_Y, currDepth)) |
1834 | 37.7k | { |
1835 | 37.7k | if (rapidLFNST) |
1836 | 0 | { |
1837 | 0 | endLfnstIdx = 0; // break the loop but do not cpy best |
1838 | 0 | } |
1839 | | |
1840 | 37.7k | EndMTS = 0; |
1841 | 37.7k | } |
1842 | 44.5k | } |
1843 | | |
1844 | 45.9k | if (bestLfnstIdx != endLfnstIdx || bestMTS != EndMTS) |
1845 | 34.7k | { |
1846 | 34.7k | if (cu.ispMode) |
1847 | 1.09k | { |
1848 | 1.09k | saveCS.getRecoBuf(currArea.Y()).copyFrom(cs.getRecoBuf(currArea.Y())); |
1849 | | |
1850 | 5.46k | for (uint32_t j = 0; j < cs.tus.size(); j++) |
1851 | 4.36k | { |
1852 | 4.36k | saveCS.tus[j]->copyComponentFrom(*cs.tus[j], COMP_Y); |
1853 | 4.36k | } |
1854 | 1.09k | } |
1855 | 33.6k | else |
1856 | 33.6k | { |
1857 | 33.6k | saveCS.getPredBuf(tu.Y()).copyFrom(cs.getPredBuf(tu.Y())); |
1858 | 33.6k | saveCS.getRecoBuf(tu.Y()).copyFrom(cs.getRecoBuf(tu.Y())); |
1859 | | |
1860 | 33.6k | tmpTU->copyComponentFrom(tu, COMP_Y); |
1861 | 33.6k | } |
1862 | | |
1863 | 34.7k | ctxBest = m_CABACEstimator->getCtx(); |
1864 | 34.7k | } |
1865 | | |
1866 | 45.9k | } |
1867 | 81.4k | else |
1868 | 81.4k | { |
1869 | 81.4k | if( rapidLFNST ) |
1870 | 0 | { |
1871 | 0 | endLfnstIdx = lfnstIdx; // break the loop |
1872 | 0 | } |
1873 | 81.4k | } |
1874 | 127k | } |
1875 | 49.8k | if (m_pcEncCfg->m_LFNST && m_pcEncCfg->m_MTS == 2 && modeId && modeId != EndMTS) |
1876 | 0 | { |
1877 | 0 | NStopMTS = false; |
1878 | |
|
1879 | 0 | if (bestMTS || bestLfnstIdx) |
1880 | 0 | { |
1881 | 0 | if ((modeId > 1 && bestMTS == modeId) || modeId == 1) |
1882 | 0 | { |
1883 | 0 | NStopMTS = (dct2Cost / trGrpBestCost) < trGrpStopThreshold; |
1884 | 0 | } |
1885 | 0 | } |
1886 | 0 | } |
1887 | 49.8k | } |
1888 | | |
1889 | 49.8k | cu.lfnstIdx = bestLfnstIdx; |
1890 | 49.8k | if (dSingleCost != MAX_DOUBLE) |
1891 | 45.4k | { |
1892 | 45.4k | if (bestLfnstIdx != endLfnstIdx || bestMTS != EndMTS) |
1893 | 34.2k | { |
1894 | 34.2k | if (cu.ispMode) |
1895 | 766 | { |
1896 | 766 | const UnitArea& currArea = partitioner.currArea(); |
1897 | 766 | cs.getRecoBuf(currArea.Y()).copyFrom(saveCS.getRecoBuf(currArea.Y())); |
1898 | | |
1899 | 766 | if (saveCS.tus.size() != cs.tus.size()) |
1900 | 0 | { |
1901 | 0 | partitioner.splitCurrArea(ispType, cs); |
1902 | |
|
1903 | 0 | do |
1904 | 0 | { |
1905 | 0 | partitioner.nextPart(cs); |
1906 | 0 | cs.addTU(CS::getArea(cs, partitioner.currArea(), partitioner.chType, partitioner.treeType), |
1907 | 0 | partitioner.chType, cs.cus[0]); |
1908 | 0 | } while (saveCS.tus.size() != cs.tus.size()); |
1909 | |
|
1910 | 0 | partitioner.exitCurrSplit(); |
1911 | 0 | } |
1912 | | |
1913 | 3.83k | for (uint32_t j = 0; j < saveCS.tus.size(); j++) |
1914 | 3.06k | { |
1915 | 3.06k | cs.tus[j]->copyComponentFrom(*saveCS.tus[j], COMP_Y); |
1916 | 3.06k | } |
1917 | 766 | } |
1918 | 33.4k | else |
1919 | 33.4k | { |
1920 | 33.4k | cs.getRecoBuf(tu.Y()).copyFrom(saveCS.getRecoBuf(tu.Y())); |
1921 | | |
1922 | 33.4k | tu.copyComponentFrom(*tmpTU, COMP_Y); |
1923 | 33.4k | } |
1924 | | |
1925 | 34.2k | m_CABACEstimator->getCtx() = ctxBest; |
1926 | 34.2k | } |
1927 | | |
1928 | | // otherwise this would've happened in useSubStructure |
1929 | 45.4k | cs.picture->getRecoBuf(currArea.Y()).copyFrom(cs.getRecoBuf(currArea.Y())); |
1930 | 45.4k | } |
1931 | 49.8k | } |
1932 | 74.6k | else |
1933 | 74.6k | { |
1934 | 74.6k | if (cu.ispMode) |
1935 | 302 | { |
1936 | 302 | const PartSplit ispType = CU::getISPType(cu, COMP_Y); |
1937 | 302 | partitioner.splitCurrArea(ispType, cs); |
1938 | | |
1939 | 302 | CUCtx cuCtx; |
1940 | 302 | dSingleCost = xTestISP(cs, partitioner, bestCostForISP, ispType, splitCbfLumaSum, singleFracBits, singleDistLuma, cuCtx); |
1941 | 302 | partitioner.exitCurrSplit(); |
1942 | 302 | bool storeCost = (numMode == 1) ? true : false; |
1943 | 302 | if ((m_pcEncCfg->m_ISP >= 2) && (numMode <= 1)) |
1944 | 302 | { |
1945 | 302 | storeCost = true; |
1946 | 302 | } |
1947 | 302 | if (storeCost) |
1948 | 302 | { |
1949 | 302 | m_ispTestedModes[0].bestCost[cu.ispMode - 1] = dSingleCost; |
1950 | 302 | } |
1951 | 302 | } |
1952 | 74.3k | else |
1953 | 74.3k | { |
1954 | 74.3k | TransformUnit& tu = |
1955 | 74.3k | cs.addTU(CS::getArea(cs, currArea, partitioner.chType, partitioner.treeType), partitioner.chType, cs.cus[0]); |
1956 | 74.3k | tu.depth = currDepth; |
1957 | | |
1958 | 74.3k | CHECK(!tu.Y().valid(), "Invalid TU"); |
1959 | 74.3k | xIntraCodingTUBlock(tu, COMP_Y, false, singleDistLuma, &numSig, predBuf); |
1960 | | //----- determine rate and r-d cost ----- |
1961 | 74.3k | m_ispTestedModes[0].IspType = TU_NO_ISP; |
1962 | 74.3k | m_ispTestedModes[0].subTuCounter = -1; |
1963 | 74.3k | singleFracBits = xGetIntraFracBitsQT(cs, partitioner, true); |
1964 | 74.3k | dSingleCost = m_pcRdCost->calcRdCost(singleFracBits, singleDistLuma); |
1965 | 74.3k | } |
1966 | 74.6k | } |
1967 | | |
1968 | 124k | if (cu.ispMode) |
1969 | 5.63k | { |
1970 | 5.63k | for (auto& ptu : cs.tus) |
1971 | 9.01k | { |
1972 | 9.01k | if (currArea.Y().contains(ptu->Y())) |
1973 | 9.01k | { |
1974 | 9.01k | TU::setCbfAtDepth(*ptu, COMP_Y, currDepth, splitCbfLumaSum ? 1 : 0); |
1975 | 9.01k | } |
1976 | 9.01k | } |
1977 | 5.63k | } |
1978 | 124k | cs.dist += singleDistLuma; |
1979 | 124k | cs.fracBits += singleFracBits; |
1980 | 124k | cs.cost = dSingleCost; |
1981 | | |
1982 | 124k | STAT_COUNT_CU_MODES( partitioner.chType == CH_L, g_cuCounters1D[CU_RD_TESTS][0][!cs.slice->isIntra() + cs.slice->depth] ); |
1983 | 124k | STAT_COUNT_CU_MODES( partitioner.chType == CH_L && !cs.slice->isIntra(), g_cuCounters2D[CU_RD_TESTS][Log2( cs.area.lheight() )][Log2( cs.area.lwidth() )] ); |
1984 | 124k | } |
1985 | | |
1986 | | ChromaCbfs IntraSearch::xIntraChromaCodingQT(CodingStructure& cs, Partitioner& partitioner) |
1987 | 301k | { |
1988 | 301k | UnitArea currArea = partitioner.currArea(); |
1989 | | |
1990 | 301k | if( !currArea.Cb().valid() ) |
1991 | 0 | return ChromaCbfs(false); |
1992 | | |
1993 | 301k | TransformUnit& currTU = *cs.getTU( currArea.chromaPos(), CH_C ); |
1994 | 301k | const CodingUnit& cu = *cs.getCU( currArea.chromaPos(), CH_C, TREE_D ); |
1995 | 301k | ChromaCbfs cbfs(false); |
1996 | 301k | uint32_t currDepth = partitioner.currTrDepth; |
1997 | 301k | const bool useTS = cs.picture->useTS; |
1998 | 301k | if (currDepth == currTU.depth) |
1999 | 301k | { |
2000 | 301k | if (!currArea.Cb().valid() || !currArea.Cr().valid()) |
2001 | 0 | { |
2002 | 0 | return cbfs; |
2003 | 0 | } |
2004 | | |
2005 | 301k | CodingStructure& saveCS = *m_pSaveCS[1]; |
2006 | 301k | saveCS.pcv = cs.pcv; |
2007 | 301k | saveCS.picture = cs.picture; |
2008 | 301k | saveCS.area.repositionTo(cs.area); |
2009 | | |
2010 | 301k | TransformUnit& tmpTU = saveCS.tus.empty() ? saveCS.addTU(currArea, partitioner.chType, nullptr) : *saveCS.tus.front(); |
2011 | 301k | tmpTU.initData(); |
2012 | 301k | tmpTU.UnitArea::operator=(currArea); |
2013 | 301k | const unsigned numTBlocks = getNumberValidTBlocks(*cs.pcv); |
2014 | | |
2015 | 301k | CompArea& cbArea = currTU.blocks[COMP_Cb]; |
2016 | 301k | CompArea& crArea = currTU.blocks[COMP_Cr]; |
2017 | 301k | double bestCostCb = MAX_DOUBLE; |
2018 | 301k | double bestCostCr = MAX_DOUBLE; |
2019 | 301k | Distortion bestDistCb = 0; |
2020 | 301k | Distortion bestDistCr = 0; |
2021 | | |
2022 | 301k | TempCtx ctxStartTU(m_CtxCache); |
2023 | 301k | TempCtx ctxStart(m_CtxCache); |
2024 | 301k | TempCtx ctxBest(m_CtxCache); |
2025 | | |
2026 | 301k | ctxStartTU = m_CABACEstimator->getCtx(); |
2027 | 301k | ctxStart = m_CABACEstimator->getCtx(); |
2028 | 301k | currTU.jointCbCr = 0; |
2029 | | |
2030 | | // Do predictions here to avoid repeating the "default0Save1Load2" stuff |
2031 | 301k | int predMode = cu.bdpcmM[CH_C] ? BDPCM_IDX : CU::getFinalIntraMode(cu, CH_C); |
2032 | | |
2033 | 301k | PelBuf piPredCb = cs.getPredBuf(COMP_Cb); |
2034 | 301k | PelBuf piPredCr = cs.getPredBuf(COMP_Cr); |
2035 | | |
2036 | 301k | initIntraPatternChType(*currTU.cu, cbArea); |
2037 | 301k | initIntraPatternChType(*currTU.cu, crArea); |
2038 | | |
2039 | 301k | if (CU::isLMCMode(predMode)) |
2040 | 22.1k | { |
2041 | 22.1k | loadLMLumaRecPels(cu, cbArea); |
2042 | 22.1k | predIntraChromaLM(COMP_Cb, piPredCb, cu, cbArea, predMode); |
2043 | 22.1k | predIntraChromaLM(COMP_Cr, piPredCr, cu, crArea, predMode); |
2044 | 22.1k | } |
2045 | 279k | else |
2046 | 279k | { |
2047 | 279k | predIntraAng(COMP_Cb, piPredCb, cu); |
2048 | 279k | predIntraAng(COMP_Cr, piPredCr, cu); |
2049 | 279k | } |
2050 | | |
2051 | | // determination of chroma residuals including reshaping and cross-component prediction |
2052 | | //----- get chroma residuals ----- |
2053 | 301k | PelBuf resiCb = cs.getResiBuf(COMP_Cb); |
2054 | 301k | PelBuf resiCr = cs.getResiBuf(COMP_Cr); |
2055 | 301k | resiCb.subtract(cs.getOrgBuf(COMP_Cb), piPredCb); |
2056 | 301k | resiCr.subtract(cs.getOrgBuf(COMP_Cr), piPredCr); |
2057 | | |
2058 | | //----- get reshape parameter ---- |
2059 | 301k | ReshapeData& reshapeData = cs.picture->reshapeData; |
2060 | 301k | bool doReshaping = (cs.picHeader->lmcsEnabled && cs.picHeader->lmcsChromaResidualScale && (cs.slice->isIntra() || reshapeData.getCTUFlag()) && (cbArea.width * cbArea.height > 4)); |
2061 | 301k | if (doReshaping) |
2062 | 0 | { |
2063 | 0 | const Area area = currTU.Y().valid() ? currTU.Y() : Area(recalcPosition(currTU.chromaFormat, currTU.chType, CH_L, currTU.blocks[currTU.chType].pos()), recalcSize(currTU.chromaFormat, currTU.chType, CH_L, currTU.blocks[currTU.chType].size())); |
2064 | 0 | const CompArea& areaY = CompArea(COMP_Y, currTU.chromaFormat, area); |
2065 | 0 | currTU.chromaAdj = reshapeData.calculateChromaAdjVpduNei(currTU, areaY, currTU.cu->treeType); |
2066 | 0 | } |
2067 | | |
2068 | | //===== store original residual signals (std and crossCompPred) ===== |
2069 | 1.81M | for( int k = 0; k < 5; k++ ) |
2070 | 1.50M | { |
2071 | 1.50M | m_orgResiCb[k].compactResize( cbArea ); |
2072 | 1.50M | m_orgResiCr[k].compactResize( crArea ); |
2073 | 1.50M | } |
2074 | 603k | for (int k = 0; k < 1; k += 4) |
2075 | 301k | { |
2076 | 301k | m_orgResiCb[k].copyFrom(resiCb); |
2077 | 301k | m_orgResiCr[k].copyFrom(resiCr); |
2078 | | |
2079 | 301k | if (doReshaping) |
2080 | 0 | { |
2081 | 0 | int cResScaleInv = currTU.chromaAdj; |
2082 | 0 | m_orgResiCb[k].scaleSignal(cResScaleInv, 1, cs.slice->clpRngs[COMP_Cb]); |
2083 | 0 | m_orgResiCr[k].scaleSignal(cResScaleInv, 1, cs.slice->clpRngs[COMP_Cr]); |
2084 | 0 | } |
2085 | 301k | } |
2086 | | |
2087 | 301k | CUCtx cuCtx; |
2088 | 301k | cuCtx.isDQPCoded = true; |
2089 | 301k | cuCtx.isChromaQpAdjCoded = true; |
2090 | 301k | cuCtx.lfnstLastScanPos = false; |
2091 | | |
2092 | 301k | CodingStructure& saveCScur = *m_pSaveCS[2]; |
2093 | | |
2094 | 301k | saveCScur.pcv = cs.pcv; |
2095 | 301k | saveCScur.picture = cs.picture; |
2096 | 301k | saveCScur.area.repositionTo(cs.area); |
2097 | | |
2098 | 301k | TransformUnit& tmpTUcur = saveCScur.tus.empty() ? saveCScur.addTU(currArea, partitioner.chType, nullptr) : *saveCScur.tus.front(); |
2099 | 301k | tmpTUcur.initData(); |
2100 | 301k | tmpTUcur.UnitArea::operator=(currArea); |
2101 | | |
2102 | 301k | TempCtx ctxBestTUL(m_CtxCache); |
2103 | | |
2104 | 301k | const SPS& sps = *cs.sps; |
2105 | 301k | double bestCostCbcur = MAX_DOUBLE; |
2106 | 301k | double bestCostCrcur = MAX_DOUBLE; |
2107 | 301k | Distortion bestDistCbcur = 0; |
2108 | 301k | Distortion bestDistCrcur = 0; |
2109 | | |
2110 | 301k | int endLfnstIdx = (partitioner.isSepTree(cs) && partitioner.chType == CH_C && (partitioner.currArea().lwidth() < 8 || partitioner.currArea().lheight() < 8)) |
2111 | 288k | || (partitioner.currArea().lwidth() > sps.getMaxTbSize() || partitioner.currArea().lheight() > sps.getMaxTbSize()) || !sps.LFNST ? 0 : 2; |
2112 | 301k | int startLfnstIdx = 0; |
2113 | 301k | int bestLfnstIdx = 0; |
2114 | 301k | bool testLFNST = sps.LFNST; |
2115 | | |
2116 | | // speedUps LFNST |
2117 | 301k | bool rapidLFNST = false; |
2118 | 301k | if (m_pcEncCfg->m_LFNST > 1) |
2119 | 0 | { |
2120 | 0 | rapidLFNST = true; |
2121 | 0 | if (m_pcEncCfg->m_LFNST > 2) |
2122 | 0 | { |
2123 | 0 | endLfnstIdx = endLfnstIdx ? 1 : 0; |
2124 | 0 | } |
2125 | 0 | } |
2126 | 301k | int ts_used = 0; |
2127 | 301k | bool testTS = false; |
2128 | 301k | if (partitioner.chType != CH_C) |
2129 | 0 | { |
2130 | 0 | startLfnstIdx = currTU.cu->lfnstIdx; |
2131 | 0 | endLfnstIdx = currTU.cu->lfnstIdx; |
2132 | 0 | bestLfnstIdx = currTU.cu->lfnstIdx; |
2133 | 0 | testLFNST = false; |
2134 | 0 | rapidLFNST = false; |
2135 | 0 | ts_used = currTU.mtsIdx[COMP_Y]; |
2136 | 0 | } |
2137 | 301k | if (cu.bdpcmM[CH_C]) |
2138 | 40.3k | { |
2139 | 40.3k | endLfnstIdx = 0; |
2140 | 40.3k | testLFNST = false; |
2141 | 40.3k | } |
2142 | | |
2143 | 301k | double dSingleCostAll = MAX_DOUBLE; |
2144 | 301k | double singleCostTmpAll = 0; |
2145 | | |
2146 | 1.10M | for (int lfnstIdx = startLfnstIdx; lfnstIdx <= endLfnstIdx; lfnstIdx++) |
2147 | 802k | { |
2148 | 802k | if (rapidLFNST && lfnstIdx) |
2149 | 0 | { |
2150 | 0 | if ((lfnstIdx == 2) && (bestLfnstIdx == 0)) |
2151 | 0 | { |
2152 | 0 | continue; |
2153 | 0 | } |
2154 | 0 | } |
2155 | | |
2156 | 802k | currTU.cu->lfnstIdx = lfnstIdx; |
2157 | 802k | if (lfnstIdx) |
2158 | 500k | { |
2159 | 500k | m_CABACEstimator->getCtx() = ctxStartTU; |
2160 | 500k | } |
2161 | | |
2162 | 802k | cuCtx.lfnstLastScanPos = false; |
2163 | 802k | cuCtx.violatesLfnstConstrained[CH_L] = false; |
2164 | 802k | cuCtx.violatesLfnstConstrained[CH_C] = false; |
2165 | | |
2166 | 2.40M | for (uint32_t c = COMP_Cb; c < numTBlocks; c++) |
2167 | 1.60M | { |
2168 | 1.60M | const ComponentID compID = ComponentID(c); |
2169 | 1.60M | const CompArea& area = currTU.blocks[compID]; |
2170 | 1.60M | double dSingleCost = MAX_DOUBLE; |
2171 | 1.60M | Distortion singleDistCTmp = 0; |
2172 | 1.60M | double singleCostTmp = 0; |
2173 | 1.60M | bool tsAllowed = useTS && TU::isTSAllowed(currTU, compID) && m_pcEncCfg->m_useChromaTS && !currTU.cu->lfnstIdx && !cu.bdpcmM[CH_C]; |
2174 | 1.60M | if ((partitioner.chType == CH_L) && (!ts_used)) |
2175 | 0 | { |
2176 | 0 | tsAllowed = false; |
2177 | 0 | } |
2178 | 1.60M | uint8_t nNumTransformCands = 1 + (tsAllowed ? 1 : 0); // DCT + TS = 2 tests |
2179 | 1.60M | std::vector<TrMode> trModes; |
2180 | 1.60M | if (nNumTransformCands > 1) |
2181 | 0 | { |
2182 | 0 | trModes.push_back(TrMode(0, true)); // DCT2 |
2183 | 0 | trModes.push_back(TrMode(1, true)); // TS |
2184 | 0 | testTS = true; |
2185 | 0 | } |
2186 | 1.60M | bool cbfDCT2 = true; |
2187 | 18.4E | const bool isLastMode = testLFNST || cs.sps->jointCbCr || tsAllowed ? false : true; |
2188 | 1.60M | int bestModeId = 0; |
2189 | 1.60M | ctxStart = m_CABACEstimator->getCtx(); |
2190 | 3.21M | for (int modeId = 0; modeId < nNumTransformCands; modeId++) |
2191 | 1.60M | { |
2192 | 1.60M | if (doReshaping || lfnstIdx || modeId) |
2193 | 1.00M | { |
2194 | 1.00M | resiCb.copyFrom(m_orgResiCb[0]); |
2195 | 1.00M | resiCr.copyFrom(m_orgResiCr[0]); |
2196 | 1.00M | } |
2197 | 1.60M | if (modeId == 0) |
2198 | 1.60M | { |
2199 | 1.60M | if ( tsAllowed) |
2200 | 0 | { |
2201 | 0 | xPreCheckMTS(currTU, &trModes, m_pcEncCfg->m_MTSIntraMaxCand, 0, compID); |
2202 | 0 | } |
2203 | 1.60M | } |
2204 | | |
2205 | 1.60M | currTU.mtsIdx[compID] = currTU.cu->bdpcmM[CH_C] ? MTS_SKIP : modeId; |
2206 | | |
2207 | 1.60M | if (modeId) |
2208 | 0 | { |
2209 | 0 | if (!cbfDCT2 && trModes[modeId].first == MTS_SKIP) |
2210 | 0 | { |
2211 | 0 | break; |
2212 | 0 | } |
2213 | 0 | m_CABACEstimator->getCtx() = ctxStart; |
2214 | 0 | } |
2215 | 1.60M | singleDistCTmp = 0; |
2216 | 1.60M | if (tsAllowed) |
2217 | 0 | { |
2218 | 0 | xIntraCodingTUBlock(currTU, compID, false, singleDistCTmp, 0, 0, true); |
2219 | 0 | if ((modeId == 0) && (!trModes[modeId + 1].second)) |
2220 | 0 | { |
2221 | 0 | nNumTransformCands = 1; |
2222 | 0 | } |
2223 | 0 | } |
2224 | 1.60M | else |
2225 | 1.60M | { |
2226 | 1.60M | xIntraCodingTUBlock(currTU, compID, false, singleDistCTmp); |
2227 | 1.60M | } |
2228 | 1.60M | if (((currTU.mtsIdx[compID] == MTS_SKIP && !currTU.cu->bdpcmM[CH_C]) |
2229 | 0 | && !TU::getCbf(currTU, compID))) // In order not to code TS flag when cbf is zero, the case for TS with |
2230 | | // cbf being zero is forbidden. |
2231 | 0 | { |
2232 | 0 | singleCostTmp = MAX_DOUBLE; |
2233 | 0 | } |
2234 | 1.60M | else |
2235 | 1.60M | { |
2236 | 1.60M | uint64_t fracBitsTmp = xGetIntraFracBitsQTChroma(currTU, compID, &cuCtx); |
2237 | 1.60M | singleCostTmp = m_pcRdCost->calcRdCost(fracBitsTmp, singleDistCTmp); |
2238 | 1.60M | } |
2239 | | |
2240 | 1.60M | if (singleCostTmp < dSingleCost) |
2241 | 1.60M | { |
2242 | 1.60M | dSingleCost = singleCostTmp; |
2243 | | |
2244 | 1.60M | if (compID == COMP_Cb) |
2245 | 802k | { |
2246 | 802k | bestCostCb = singleCostTmp; |
2247 | 802k | bestDistCb = singleDistCTmp; |
2248 | 802k | } |
2249 | 802k | else |
2250 | 802k | { |
2251 | 802k | bestCostCr = singleCostTmp; |
2252 | 802k | bestDistCr = singleDistCTmp; |
2253 | 802k | } |
2254 | 1.60M | bestModeId = modeId; |
2255 | 1.60M | if (currTU.mtsIdx[compID] == MTS_DCT2_DCT2) |
2256 | 1.52M | { |
2257 | 1.52M | cbfDCT2 = TU::getCbfAtDepth(currTU, compID, currDepth); |
2258 | 1.52M | } |
2259 | 1.60M | if (!isLastMode) |
2260 | 1.60M | { |
2261 | 1.60M | saveCS.getRecoBuf(area).copyFrom(cs.getRecoBuf(area)); |
2262 | 1.60M | tmpTU.copyComponentFrom(currTU, compID); |
2263 | 1.60M | ctxBest = m_CABACEstimator->getCtx(); |
2264 | 1.60M | } |
2265 | 1.60M | } |
2266 | 1.60M | } |
2267 | 1.60M | if (testTS && ((c == COMP_Cb && bestModeId < (nNumTransformCands - 1)) )) |
2268 | 0 | { |
2269 | 0 | m_CABACEstimator->getCtx() = ctxBest; |
2270 | |
|
2271 | 0 | currTU.copyComponentFrom(tmpTU, COMP_Cb); // Cbf of Cb is needed to estimate cost for Cr Cbf |
2272 | 0 | } |
2273 | 1.60M | } |
2274 | | |
2275 | 802k | singleCostTmpAll = bestCostCb + bestCostCr; |
2276 | | |
2277 | 802k | bool rootCbfL = false; |
2278 | 802k | if (testLFNST) |
2279 | 762k | { |
2280 | 3.04M | for (uint32_t t = 0; t < getNumberValidTBlocks(*cs.pcv); t++) |
2281 | 2.28M | { |
2282 | 2.28M | rootCbfL |= bool(tmpTU.cbf[t]); |
2283 | 2.28M | } |
2284 | 762k | if (rapidLFNST && !rootCbfL) |
2285 | 0 | { |
2286 | 0 | endLfnstIdx = lfnstIdx; // end this |
2287 | 0 | } |
2288 | 762k | } |
2289 | | |
2290 | 802k | if (testLFNST && lfnstIdx && !cuCtx.lfnstLastScanPos) |
2291 | 330k | { |
2292 | 330k | bool cbfAtZeroDepth = CU::isSepTree(*currTU.cu) |
2293 | 330k | ? rootCbfL : (cs.area.chromaFormat != CHROMA_400 |
2294 | 0 | && std::min(tmpTU.blocks[1].width, tmpTU.blocks[1].height) < 4) |
2295 | 0 | ? TU::getCbfAtDepth(currTU, COMP_Y, currTU.depth) : rootCbfL; |
2296 | 330k | if (cbfAtZeroDepth) |
2297 | 1.61k | { |
2298 | 1.61k | singleCostTmpAll = MAX_DOUBLE; |
2299 | 1.61k | } |
2300 | 330k | } |
2301 | 802k | if ((testLFNST || testTS) && (singleCostTmpAll < dSingleCostAll)) |
2302 | 261k | { |
2303 | 261k | bestLfnstIdx = lfnstIdx; |
2304 | 261k | if ((lfnstIdx != endLfnstIdx) || testTS) |
2305 | 250k | { |
2306 | 250k | dSingleCostAll = singleCostTmpAll; |
2307 | | |
2308 | 250k | bestCostCbcur = bestCostCb; |
2309 | 250k | bestCostCrcur = bestCostCr; |
2310 | 250k | bestDistCbcur = bestDistCb; |
2311 | 250k | bestDistCrcur = bestDistCr; |
2312 | | |
2313 | 250k | saveCScur.getRecoBuf(cbArea).copyFrom(saveCS.getRecoBuf(cbArea)); |
2314 | 250k | saveCScur.getRecoBuf(crArea).copyFrom(saveCS.getRecoBuf(crArea)); |
2315 | | |
2316 | 250k | tmpTUcur.copyComponentFrom(tmpTU, COMP_Cb); |
2317 | 250k | tmpTUcur.copyComponentFrom(tmpTU, COMP_Cr); |
2318 | 250k | } |
2319 | 261k | ctxBestTUL = m_CABACEstimator->getCtx(); |
2320 | 261k | } |
2321 | 802k | } |
2322 | 301k | if ((testLFNST && (bestLfnstIdx != endLfnstIdx)) || testTS) |
2323 | 250k | { |
2324 | 250k | bestCostCb = bestCostCbcur; |
2325 | 250k | bestCostCr = bestCostCrcur; |
2326 | 250k | bestDistCb = bestDistCbcur; |
2327 | 250k | bestDistCr = bestDistCrcur; |
2328 | 250k | currTU.cu->lfnstIdx = bestLfnstIdx; |
2329 | 250k | if (!cs.sps->jointCbCr) |
2330 | 0 | { |
2331 | 0 | cs.getRecoBuf(cbArea).copyFrom(saveCScur.getRecoBuf(cbArea)); |
2332 | 0 | cs.getRecoBuf(crArea).copyFrom(saveCScur.getRecoBuf(crArea)); |
2333 | |
|
2334 | 0 | currTU.copyComponentFrom(tmpTUcur, COMP_Cb); |
2335 | 0 | currTU.copyComponentFrom(tmpTUcur, COMP_Cr); |
2336 | |
|
2337 | 0 | m_CABACEstimator->getCtx() = ctxBestTUL; |
2338 | 0 | } |
2339 | 250k | } |
2340 | | |
2341 | 301k | Distortion bestDistCbCr = bestDistCb + bestDistCr; |
2342 | | |
2343 | 301k | if (cs.sps->jointCbCr) |
2344 | 301k | { |
2345 | 301k | if ((testLFNST && (bestLfnstIdx != endLfnstIdx)) || testTS) |
2346 | 250k | { |
2347 | 250k | saveCS.getRecoBuf(cbArea).copyFrom(saveCScur.getRecoBuf(cbArea)); |
2348 | 250k | saveCS.getRecoBuf(crArea).copyFrom(saveCScur.getRecoBuf(crArea)); |
2349 | | |
2350 | 250k | tmpTU.copyComponentFrom(tmpTUcur, COMP_Cb); |
2351 | 250k | tmpTU.copyComponentFrom(tmpTUcur, COMP_Cr); |
2352 | 250k | m_CABACEstimator->getCtx() = ctxBestTUL; |
2353 | 250k | ctxBest = m_CABACEstimator->getCtx(); |
2354 | 250k | } |
2355 | | // Test using joint chroma residual coding |
2356 | 301k | double bestCostCbCr = bestCostCb + bestCostCr; |
2357 | 301k | int bestJointCbCr = 0; |
2358 | 301k | bool checkDCTOnly = m_pcEncCfg->m_useChromaTS && ((TU::getCbf(tmpTU, COMP_Cb) && tmpTU.mtsIdx[COMP_Cb] == MTS_DCT2_DCT2 && !TU::getCbf(tmpTU, COMP_Cr)) || |
2359 | 0 | (TU::getCbf(tmpTU, COMP_Cr) && tmpTU.mtsIdx[COMP_Cr] == MTS_DCT2_DCT2 && !TU::getCbf(tmpTU, COMP_Cb)) || |
2360 | 0 | (TU::getCbf(tmpTU, COMP_Cb) && tmpTU.mtsIdx[COMP_Cb] == MTS_DCT2_DCT2 && TU::getCbf(tmpTU, COMP_Cr) && tmpTU.mtsIdx[COMP_Cr] == MTS_DCT2_DCT2)); |
2361 | 301k | bool checkTSOnly = m_pcEncCfg->m_useChromaTS && ((TU::getCbf(tmpTU, COMP_Cb) && tmpTU.mtsIdx[COMP_Cb] == MTS_SKIP && !TU::getCbf(tmpTU, COMP_Cr)) || |
2362 | 0 | (TU::getCbf(tmpTU, COMP_Cr) && tmpTU.mtsIdx[COMP_Cr] == MTS_SKIP && !TU::getCbf(tmpTU, COMP_Cb)) || |
2363 | 0 | (TU::getCbf(tmpTU, COMP_Cb) && tmpTU.mtsIdx[COMP_Cb] == MTS_SKIP && TU::getCbf(tmpTU, COMP_Cr) && tmpTU.mtsIdx[COMP_Cr] == MTS_SKIP)); |
2364 | 301k | bool lastIsBest = false; |
2365 | 301k | bool noLFNST1 = false; |
2366 | 301k | if (rapidLFNST && (startLfnstIdx != endLfnstIdx)) |
2367 | 0 | { |
2368 | 0 | if (bestLfnstIdx == 2) |
2369 | 0 | { |
2370 | 0 | noLFNST1 = true; |
2371 | 0 | } |
2372 | 0 | else |
2373 | 0 | { |
2374 | 0 | endLfnstIdx = 1; |
2375 | 0 | } |
2376 | 0 | } |
2377 | | |
2378 | 1.10M | for (int lfnstIdxj = startLfnstIdx; lfnstIdxj <= endLfnstIdx; lfnstIdxj++) |
2379 | 802k | { |
2380 | 802k | if (rapidLFNST && noLFNST1 && (lfnstIdxj == 1)) |
2381 | 0 | { |
2382 | 0 | continue; |
2383 | 0 | } |
2384 | 802k | currTU.cu->lfnstIdx = lfnstIdxj; |
2385 | 802k | std::vector<int> jointCbfMasksToTest; |
2386 | 802k | if (TU::getCbf(tmpTU, COMP_Cb) || TU::getCbf(tmpTU, COMP_Cr)) |
2387 | 283k | { |
2388 | 283k | jointCbfMasksToTest = m_pcTrQuant->selectICTCandidates(currTU, m_orgResiCb, m_orgResiCr); |
2389 | 283k | } |
2390 | 802k | for (int cbfMask : jointCbfMasksToTest) |
2391 | 283k | { |
2392 | 283k | currTU.jointCbCr = (uint8_t)cbfMask; |
2393 | 283k | ComponentID codeCompId = ((currTU.jointCbCr >> 1) ? COMP_Cb : COMP_Cr); |
2394 | 283k | ComponentID otherCompId = ((codeCompId == COMP_Cb) ? COMP_Cr : COMP_Cb); |
2395 | 283k | bool tsAllowed = useTS && TU::isTSAllowed(currTU, codeCompId) && (m_pcEncCfg->m_useChromaTS) && !currTU.cu->lfnstIdx && !cu.bdpcmM[CH_C]; |
2396 | 283k | if ((partitioner.chType == CH_L)&& tsAllowed && (currTU.mtsIdx[COMP_Y] != MTS_SKIP)) |
2397 | 0 | { |
2398 | 0 | tsAllowed = false; |
2399 | 0 | } |
2400 | 283k | if (!tsAllowed) |
2401 | 283k | { |
2402 | 283k | checkTSOnly = false; |
2403 | 283k | } |
2404 | 283k | uint8_t numTransformCands = 1 + (tsAllowed && !(checkDCTOnly || checkTSOnly)? 1 : 0); // DCT + TS = 2 tests |
2405 | 283k | std::vector<TrMode> trModes; |
2406 | 283k | if (numTransformCands > 1) |
2407 | 0 | { |
2408 | 0 | trModes.push_back(TrMode(0, true)); // DCT2 |
2409 | 0 | trModes.push_back(TrMode(1, true));//TS |
2410 | 0 | } |
2411 | 283k | else |
2412 | 283k | { |
2413 | 283k | currTU.mtsIdx[codeCompId] = checkTSOnly || currTU.cu->bdpcmM[CH_C] ? 1 : 0; |
2414 | 283k | } |
2415 | | |
2416 | 566k | for (int modeId = 0; modeId < numTransformCands; modeId++) |
2417 | 283k | { |
2418 | 283k | Distortion distTmp = 0; |
2419 | 283k | currTU.mtsIdx[codeCompId] = currTU.cu->bdpcmM[CH_C] ? MTS_SKIP : MTS_DCT2_DCT2; |
2420 | 283k | if (numTransformCands > 1) |
2421 | 0 | { |
2422 | 0 | currTU.mtsIdx[codeCompId] = currTU.cu->bdpcmM[CH_C] ? MTS_SKIP : trModes[modeId].first; |
2423 | 0 | } |
2424 | 283k | currTU.mtsIdx[otherCompId] = MTS_DCT2_DCT2; |
2425 | | |
2426 | 283k | m_CABACEstimator->getCtx() = ctxStartTU; |
2427 | | |
2428 | 283k | resiCb.copyFrom(m_orgResiCb[cbfMask]); |
2429 | 283k | resiCr.copyFrom(m_orgResiCr[cbfMask]); |
2430 | 283k | if ((modeId == 0) && (numTransformCands > 1)) |
2431 | 0 | { |
2432 | 0 | xPreCheckMTS(currTU, &trModes, m_pcEncCfg->m_MTSIntraMaxCand, 0, COMP_Cb); |
2433 | 0 | currTU.mtsIdx[codeCompId] = trModes[modeId].first; |
2434 | 0 | currTU.mtsIdx[(codeCompId == COMP_Cr) ? COMP_Cb : COMP_Cr] = MTS_DCT2_DCT2; |
2435 | 0 | } |
2436 | 283k | cuCtx.lfnstLastScanPos = false; |
2437 | 283k | cuCtx.violatesLfnstConstrained[CH_L] = false; |
2438 | 283k | cuCtx.violatesLfnstConstrained[CH_C] = false; |
2439 | 283k | if (numTransformCands > 1) |
2440 | 0 | { |
2441 | 0 | xIntraCodingTUBlock(currTU, COMP_Cb, false, distTmp, 0, 0, true); |
2442 | 0 | if ((modeId == 0) && !trModes[modeId + 1].second) |
2443 | 0 | { |
2444 | 0 | numTransformCands = 1; |
2445 | 0 | } |
2446 | 0 | } |
2447 | 283k | else |
2448 | 283k | { |
2449 | 283k | xIntraCodingTUBlock(currTU, COMP_Cb, false, distTmp, 0); |
2450 | 283k | } |
2451 | | |
2452 | 283k | double costTmp = std::numeric_limits<double>::max(); |
2453 | 283k | if (distTmp < MAX_DISTORTION) |
2454 | 279k | { |
2455 | 279k | uint64_t bits = xGetIntraFracBitsQTChroma(currTU, COMP_Cb, &cuCtx); |
2456 | 279k | costTmp = m_pcRdCost->calcRdCost(bits, distTmp); |
2457 | 279k | } |
2458 | 3.55k | else if (!currTU.mtsIdx[codeCompId]) |
2459 | 3.55k | { |
2460 | 3.55k | numTransformCands = 1; |
2461 | 3.55k | } |
2462 | 283k | bool rootCbfL = false; |
2463 | 1.13M | for (uint32_t t = 0; t < getNumberValidTBlocks(*cs.pcv); t++) |
2464 | 850k | { |
2465 | 850k | rootCbfL |= bool(tmpTU.cbf[t]); |
2466 | 850k | } |
2467 | 283k | if (rapidLFNST && !rootCbfL) |
2468 | 0 | { |
2469 | 0 | endLfnstIdx = lfnstIdxj; |
2470 | 0 | } |
2471 | 283k | if (testLFNST && currTU.cu->lfnstIdx && !cuCtx.lfnstLastScanPos) |
2472 | 3.48k | { |
2473 | 3.48k | bool cbfAtZeroDepth = CU::isSepTree(*currTU.cu) ? rootCbfL |
2474 | 3.48k | : (cs.area.chromaFormat != CHROMA_400 && std::min(tmpTU.blocks[1].width, tmpTU.blocks[1].height) < 4) |
2475 | 0 | ? TU::getCbfAtDepth(currTU, COMP_Y, currTU.depth) : rootCbfL; |
2476 | 3.48k | if (cbfAtZeroDepth) |
2477 | 3.48k | { |
2478 | 3.48k | costTmp = MAX_DOUBLE; |
2479 | 3.48k | } |
2480 | 3.48k | } |
2481 | 283k | if (costTmp < bestCostCbCr) |
2482 | 106k | { |
2483 | 106k | bestCostCbCr = costTmp; |
2484 | 106k | bestDistCbCr = distTmp; |
2485 | 106k | bestJointCbCr = currTU.jointCbCr; |
2486 | | |
2487 | | // store data |
2488 | 106k | bestLfnstIdx = lfnstIdxj; |
2489 | 106k | if ((cbfMask != jointCbfMasksToTest.back() || (lfnstIdxj != endLfnstIdx)) || (modeId != (numTransformCands - 1))) |
2490 | 86.9k | { |
2491 | 86.9k | saveCS.getRecoBuf(cbArea).copyFrom(cs.getRecoBuf(cbArea)); |
2492 | 86.9k | saveCS.getRecoBuf(crArea).copyFrom(cs.getRecoBuf(crArea)); |
2493 | | |
2494 | 86.9k | tmpTU.copyComponentFrom(currTU, COMP_Cb); |
2495 | 86.9k | tmpTU.copyComponentFrom(currTU, COMP_Cr); |
2496 | | |
2497 | 86.9k | ctxBest = m_CABACEstimator->getCtx(); |
2498 | 86.9k | } |
2499 | 19.8k | else |
2500 | 19.8k | { |
2501 | 19.8k | lastIsBest = true; |
2502 | 19.8k | cs.cus[0]->lfnstIdx = bestLfnstIdx; |
2503 | 19.8k | } |
2504 | 106k | } |
2505 | 283k | } |
2506 | 283k | } |
2507 | | |
2508 | | // Retrieve the best CU data (unless it was the very last one tested) |
2509 | 802k | } |
2510 | 301k | if (!lastIsBest) |
2511 | 281k | { |
2512 | 281k | cs.getRecoBuf(cbArea).copyFrom(saveCS.getRecoBuf(cbArea)); |
2513 | 281k | cs.getRecoBuf(crArea).copyFrom(saveCS.getRecoBuf(crArea)); |
2514 | | |
2515 | 281k | cs.cus[0]->lfnstIdx = bestLfnstIdx; |
2516 | 281k | currTU.copyComponentFrom(tmpTU, COMP_Cb); |
2517 | 281k | currTU.copyComponentFrom(tmpTU, COMP_Cr); |
2518 | 281k | m_CABACEstimator->getCtx() = ctxBest; |
2519 | 281k | } |
2520 | 301k | currTU.jointCbCr = (TU::getCbf(currTU, COMP_Cb) || TU::getCbf(currTU, COMP_Cr)) ? bestJointCbCr : 0; |
2521 | 301k | } // jointCbCr |
2522 | | |
2523 | 301k | cs.dist += bestDistCbCr; |
2524 | 301k | cuCtx.violatesLfnstConstrained[CH_L] = false; |
2525 | 301k | cuCtx.violatesLfnstConstrained[CH_C] = false; |
2526 | 301k | cuCtx.lfnstLastScanPos = false; |
2527 | 301k | cuCtx.violatesMtsCoeffConstraint = false; |
2528 | 301k | cuCtx.mtsLastScanPos = false; |
2529 | 301k | cbfs.cbf(COMP_Cb) = TU::getCbf(currTU, COMP_Cb); |
2530 | 301k | cbfs.cbf(COMP_Cr) = TU::getCbf(currTU, COMP_Cr); |
2531 | 301k | } |
2532 | 0 | else |
2533 | 0 | { |
2534 | 0 | unsigned numValidTBlocks = getNumberValidTBlocks(*cs.pcv); |
2535 | 0 | ChromaCbfs SplitCbfs(false); |
2536 | |
|
2537 | 0 | if (partitioner.canSplit(TU_MAX_TR_SPLIT, cs)) |
2538 | 0 | { |
2539 | 0 | partitioner.splitCurrArea(TU_MAX_TR_SPLIT, cs); |
2540 | 0 | } |
2541 | 0 | else if (currTU.cu->ispMode) |
2542 | 0 | { |
2543 | 0 | partitioner.splitCurrArea(m_ispTestedModes[0].IspType, cs); |
2544 | 0 | } |
2545 | 0 | else |
2546 | 0 | THROW("Implicit TU split not available"); |
2547 | | |
2548 | 0 | do |
2549 | 0 | { |
2550 | 0 | ChromaCbfs subCbfs = xIntraChromaCodingQT(cs, partitioner); |
2551 | |
|
2552 | 0 | for (uint32_t ch = COMP_Cb; ch < numValidTBlocks; ch++) |
2553 | 0 | { |
2554 | 0 | const ComponentID compID = ComponentID(ch); |
2555 | 0 | SplitCbfs.cbf(compID) |= subCbfs.cbf(compID); |
2556 | 0 | } |
2557 | 0 | } while (partitioner.nextPart(cs)); |
2558 | |
|
2559 | 0 | partitioner.exitCurrSplit(); |
2560 | | |
2561 | | /*if (lumaUsesISP && cs.dist == MAX_UINT) //ahenkel |
2562 | | { |
2563 | | return cbfs; |
2564 | | }*/ |
2565 | 0 | { |
2566 | 0 | cbfs.Cb |= SplitCbfs.Cb; |
2567 | 0 | cbfs.Cr |= SplitCbfs.Cr; |
2568 | |
|
2569 | 0 | if (1) //(!lumaUsesISP) |
2570 | 0 | { |
2571 | 0 | for (auto& ptu : cs.tus) |
2572 | 0 | { |
2573 | 0 | if (currArea.Cb().contains(ptu->Cb()) || (!ptu->Cb().valid() && currArea.Y().contains(ptu->Y()))) |
2574 | 0 | { |
2575 | 0 | TU::setCbfAtDepth(*ptu, COMP_Cb, currDepth, SplitCbfs.Cb); |
2576 | 0 | TU::setCbfAtDepth(*ptu, COMP_Cr, currDepth, SplitCbfs.Cr); |
2577 | 0 | } |
2578 | 0 | } |
2579 | 0 | } |
2580 | 0 | } |
2581 | 0 | } |
2582 | 301k | return cbfs; |
2583 | 301k | } |
2584 | | |
2585 | | uint64_t IntraSearch::xFracModeBitsIntraLuma(const CodingUnit& cu, const unsigned* mpmLst) |
2586 | 1.01M | { |
2587 | 1.01M | m_CABACEstimator->resetBits(); |
2588 | | |
2589 | 1.01M | if (!cu.ciip) |
2590 | 1.01M | { |
2591 | 1.01M | m_CABACEstimator->intra_luma_pred_mode(cu, mpmLst); |
2592 | 1.01M | } |
2593 | | |
2594 | 1.01M | return m_CABACEstimator->getEstFracBits(); |
2595 | 1.01M | } |
2596 | | |
2597 | | template<typename T, size_t N, int M> |
2598 | | void IntraSearch::xReduceHadCandList(static_vector<T, N>& candModeList, static_vector<double, N>& candCostList, SortedPelUnitBufs<M>& sortedPelBuffer, int& numModesForFullRD, const double thresholdHadCost, const double* mipHadCost, const CodingUnit& cu, const bool fastMip) |
2599 | 20.7k | { |
2600 | 20.7k | const int maxCandPerType = numModesForFullRD >> 1; |
2601 | 20.7k | static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM> tempRdModeList; |
2602 | 20.7k | static_vector<double, FAST_UDI_MAX_RDMODE_NUM> tempCandCostList; |
2603 | 20.7k | const double minCost = candCostList[0]; |
2604 | 20.7k | bool keepOneMip = candModeList.size() > numModesForFullRD; |
2605 | 20.7k | const int maxNumConv = 3; |
2606 | | |
2607 | 20.7k | int numConv = 0; |
2608 | 20.7k | int numMip = 0; |
2609 | 93.8k | for (int idx = 0; idx < candModeList.size() - (keepOneMip?0:1); idx++) |
2610 | 73.1k | { |
2611 | 73.1k | bool addMode = false; |
2612 | 73.1k | const ModeInfo& orgMode = candModeList[idx]; |
2613 | | |
2614 | 73.1k | if (!orgMode.mipFlg) |
2615 | 52.3k | { |
2616 | 52.3k | addMode = (numConv < maxNumConv); |
2617 | 52.3k | numConv += addMode ? 1:0; |
2618 | 52.3k | } |
2619 | 20.7k | else |
2620 | 20.7k | { |
2621 | 20.7k | addMode = ( numMip < maxCandPerType || (candCostList[idx] < thresholdHadCost * minCost) || keepOneMip ); |
2622 | 20.7k | keepOneMip = false; |
2623 | 20.7k | numMip += addMode ? 1:0; |
2624 | 20.7k | } |
2625 | 73.1k | if( addMode ) |
2626 | 73.0k | { |
2627 | 73.0k | tempRdModeList.push_back(orgMode); |
2628 | 73.0k | tempCandCostList.push_back(candCostList[idx]); |
2629 | 73.0k | } |
2630 | 73.1k | } |
2631 | | |
2632 | | // sort Pel Buffer |
2633 | 20.7k | int i = -1; |
2634 | 20.7k | for( auto &m: tempRdModeList) |
2635 | 73.0k | { |
2636 | 73.0k | if( ! (m == candModeList.at( ++i )) ) |
2637 | 0 | { |
2638 | 0 | for( int j = i; j < (int)candModeList.size()-1; ) |
2639 | 0 | { |
2640 | 0 | if( m == candModeList.at( ++j ) ) |
2641 | 0 | { |
2642 | 0 | sortedPelBuffer.swap( i, j); |
2643 | 0 | break; |
2644 | 0 | } |
2645 | 0 | } |
2646 | 0 | } |
2647 | 73.0k | } |
2648 | 20.7k | sortedPelBuffer.reduceTo( (int)tempRdModeList.size() ); |
2649 | | |
2650 | 20.7k | if ((cu.lwidth() > 8 && cu.lheight() > 8)) |
2651 | 18.6k | { |
2652 | | // Sort MIP candidates by Hadamard cost |
2653 | 18.6k | const int transpOff = getNumModesMip(cu.Y()); |
2654 | 18.6k | static_vector<uint8_t, FAST_UDI_MAX_RDMODE_NUM> sortedMipModes(0); |
2655 | 18.6k | static_vector<double, FAST_UDI_MAX_RDMODE_NUM> sortedMipCost(0); |
2656 | 18.6k | for (uint8_t mode : { 0, 1, 2 }) |
2657 | 56.0k | { |
2658 | 56.0k | uint8_t candMode = mode + uint8_t((mipHadCost[mode + transpOff] < mipHadCost[mode]) ? transpOff : 0); |
2659 | 56.0k | updateCandList(candMode, mipHadCost[candMode], sortedMipModes, sortedMipCost, 3); |
2660 | 56.0k | } |
2661 | | |
2662 | | // Append MIP mode to RD mode list |
2663 | 18.6k | const int modeListSize = int(tempRdModeList.size()); |
2664 | 37.3k | for (int idx = 0; idx < 3; idx++) |
2665 | 37.3k | { |
2666 | 37.3k | const bool isTransposed = (sortedMipModes[idx] >= transpOff ? true : false); |
2667 | 37.3k | const uint32_t mipIdx = (isTransposed ? sortedMipModes[idx] - transpOff : sortedMipModes[idx]); |
2668 | 37.3k | const ModeInfo mipMode( true, isTransposed, 0, NOT_INTRA_SUBPARTITIONS, mipIdx ); |
2669 | 37.3k | bool alreadyIncluded = false; |
2670 | 149k | for (int modeListIdx = 0; modeListIdx < modeListSize; modeListIdx++) |
2671 | 130k | { |
2672 | 130k | if (tempRdModeList[modeListIdx] == mipMode) |
2673 | 18.6k | { |
2674 | 18.6k | alreadyIncluded = true; |
2675 | 18.6k | break; |
2676 | 18.6k | } |
2677 | 130k | } |
2678 | | |
2679 | 37.3k | if (!alreadyIncluded) |
2680 | 18.6k | { |
2681 | 18.6k | tempRdModeList.push_back(mipMode); |
2682 | 18.6k | tempCandCostList.push_back(0); |
2683 | 18.6k | if( fastMip ) break; |
2684 | 18.6k | } |
2685 | 37.3k | } |
2686 | 18.6k | } |
2687 | | |
2688 | 20.7k | candModeList = tempRdModeList; |
2689 | 20.7k | candCostList = tempCandCostList; |
2690 | 20.7k | numModesForFullRD = int(candModeList.size()); |
2691 | 20.7k | } |
2692 | | |
2693 | | void IntraSearch::xPreCheckMTS(TransformUnit &tu, std::vector<TrMode> *trModes, const int maxCand, PelUnitBuf *predBuf, const ComponentID& compID) |
2694 | 15.4k | { |
2695 | 15.4k | if (compID == COMP_Y) |
2696 | 15.4k | { |
2697 | 15.4k | CodingStructure& cs = *tu.cs; |
2698 | 15.4k | const CompArea& area = tu.blocks[compID]; |
2699 | 15.4k | const ReshapeData& reshapeData = cs.picture->reshapeData; |
2700 | 15.4k | const CodingUnit& cu = *cs.getCU(area.pos(), CH_L,TREE_D); |
2701 | 15.4k | PelBuf piPred = cs.getPredBuf(area); |
2702 | 15.4k | PelBuf piResi = cs.getResiBuf(area); |
2703 | | |
2704 | 15.4k | initIntraPatternChType(*tu.cu, area); |
2705 | 15.4k | if (predBuf) |
2706 | 13.8k | { |
2707 | 13.8k | piPred.copyFrom(predBuf->Y()); |
2708 | 13.8k | } |
2709 | 1.65k | else if (CU::isMIP(cu, CH_L)) |
2710 | 1.63k | { |
2711 | 1.63k | initIntraMip(cu); |
2712 | 1.63k | predIntraMip(piPred, cu); |
2713 | 1.63k | } |
2714 | 19 | else |
2715 | 19 | { |
2716 | 19 | predIntraAng(COMP_Y, piPred, cu); |
2717 | 19 | } |
2718 | | |
2719 | | //===== get residual signal ===== |
2720 | 15.4k | if (cs.picHeader->lmcsEnabled && reshapeData.getCTUFlag()) |
2721 | 0 | { |
2722 | 0 | piResi.subtract(cs.getRspOrgBuf(), piPred); |
2723 | 0 | } |
2724 | 15.4k | else |
2725 | 15.4k | { |
2726 | 15.4k | CPelBuf piOrg = cs.getOrgBuf(COMP_Y); |
2727 | 15.4k | piResi.subtract(piOrg, piPred); |
2728 | 15.4k | } |
2729 | 15.4k | m_pcTrQuant->checktransformsNxN(tu, trModes, m_pcEncCfg->m_MTSIntraMaxCand, compID); |
2730 | 15.4k | } |
2731 | 0 | else |
2732 | 0 | { |
2733 | 0 | ComponentID codeCompId = (tu.jointCbCr ? (tu.jointCbCr >> 1 ? COMP_Cb : COMP_Cr) : compID); |
2734 | 0 | m_pcTrQuant->checktransformsNxN(tu, trModes, m_pcEncCfg->m_MTSIntraMaxCand, codeCompId); |
2735 | 0 | } |
2736 | 15.4k | } |
2737 | | |
2738 | | double IntraSearch::xTestISP(CodingStructure& cs, Partitioner& subTuPartitioner, double bestCostForISP, PartSplit ispType, bool& splitcbf, uint64_t& singleFracBits, Distortion& singleDistLuma, CUCtx& cuCtx) |
2739 | 16.2k | { |
2740 | 16.2k | int subTuCounter = 0; |
2741 | 16.2k | bool earlySkipISP = false; |
2742 | 16.2k | bool splitCbfLuma = false; |
2743 | 16.2k | CodingUnit& cu = *cs.cus[0]; |
2744 | | |
2745 | 16.2k | Distortion singleDistTmpLumaSUM = 0; |
2746 | 16.2k | uint64_t singleTmpFracBitsSUM = 0; |
2747 | 16.2k | double singleCostTmpSUM = 0; |
2748 | 16.2k | cuCtx.isDQPCoded = true; |
2749 | 16.2k | cuCtx.isChromaQpAdjCoded = true; |
2750 | | |
2751 | 16.2k | do |
2752 | 20.7k | { |
2753 | 20.7k | Distortion singleDistTmpLuma = 0; |
2754 | 20.7k | uint64_t singleTmpFracBits = 0; |
2755 | 20.7k | double singleCostTmp = 0; |
2756 | 20.7k | TransformUnit& tmpTUcur = ((cs.tus.size() < (subTuCounter + 1))) |
2757 | 20.7k | ? cs.addTU(CS::getArea(cs, subTuPartitioner.currArea(), subTuPartitioner.chType, |
2758 | 3.68k | subTuPartitioner.treeType), |
2759 | 3.68k | subTuPartitioner.chType, cs.cus[0]) |
2760 | 20.7k | : *cs.tus[subTuCounter]; |
2761 | 20.7k | tmpTUcur.depth = subTuPartitioner.currTrDepth; |
2762 | | |
2763 | | // Encode TU |
2764 | 20.7k | xIntraCodingTUBlock(tmpTUcur, COMP_Y, false, singleDistTmpLuma, 0); |
2765 | 20.7k | cuCtx.mtsLastScanPos = false; |
2766 | | |
2767 | 20.7k | if (singleDistTmpLuma == MAX_INT) // all zero CBF skip |
2768 | 0 | { |
2769 | 0 | earlySkipISP = true; |
2770 | 0 | singleCostTmpSUM = MAX_DOUBLE; |
2771 | 0 | break; |
2772 | 0 | } |
2773 | | |
2774 | 20.7k | if (m_pcRdCost->calcRdCost(singleTmpFracBitsSUM, singleDistTmpLumaSUM + singleDistTmpLuma) > bestCostForISP) |
2775 | 5.56k | { |
2776 | 5.56k | earlySkipISP = true; |
2777 | 5.56k | } |
2778 | 15.1k | else |
2779 | 15.1k | { |
2780 | 15.1k | m_ispTestedModes[0].IspType = ispType; |
2781 | 15.1k | m_ispTestedModes[0].subTuCounter = subTuCounter; |
2782 | 15.1k | singleTmpFracBits = xGetIntraFracBitsQT(cs, subTuPartitioner, true, &cuCtx); |
2783 | 15.1k | } |
2784 | 20.7k | singleCostTmp = m_pcRdCost->calcRdCost(singleTmpFracBits, singleDistTmpLuma); |
2785 | | |
2786 | 20.7k | singleCostTmpSUM += singleCostTmp; |
2787 | 20.7k | singleDistTmpLumaSUM += singleDistTmpLuma; |
2788 | 20.7k | singleTmpFracBitsSUM += singleTmpFracBits; |
2789 | | |
2790 | 20.7k | subTuCounter++; |
2791 | | |
2792 | 20.7k | splitCbfLuma |= TU::getCbfAtDepth( *cs.getTU(subTuPartitioner.currArea().lumaPos(), subTuPartitioner.chType, subTuCounter - 1), |
2793 | 20.7k | COMP_Y, subTuPartitioner.currTrDepth); |
2794 | 20.7k | int nSubPartitions = m_ispTestedModes[cu.lfnstIdx].numTotalParts[cu.ispMode - 1]; |
2795 | 20.7k | bool doStop = (m_pcEncCfg->m_ISP != 1) || (subTuCounter < nSubPartitions); |
2796 | 20.7k | if (doStop) |
2797 | 20.7k | { |
2798 | 20.7k | if (singleCostTmpSUM > bestCostForISP) |
2799 | 13.7k | { |
2800 | 13.7k | earlySkipISP = true; |
2801 | 13.7k | break; |
2802 | 13.7k | } |
2803 | 6.93k | if (subTuCounter < nSubPartitions) |
2804 | 5.51k | { |
2805 | 5.51k | double threshold = nSubPartitions == 2 ? 0.95 : subTuCounter == 1 ? 0.83 : 0.91; |
2806 | 5.51k | if (singleCostTmpSUM > bestCostForISP * threshold) |
2807 | 1.10k | { |
2808 | 1.10k | earlySkipISP = true; |
2809 | 1.10k | break; |
2810 | 1.10k | } |
2811 | 5.51k | } |
2812 | 6.93k | } |
2813 | 20.7k | } while (subTuPartitioner.nextPart(cs)); |
2814 | 16.2k | singleDistLuma = singleDistTmpLumaSUM; |
2815 | 16.2k | singleFracBits = singleTmpFracBitsSUM; |
2816 | | |
2817 | 16.2k | splitcbf = splitCbfLuma; |
2818 | 16.2k | return earlySkipISP ? MAX_DOUBLE : singleCostTmpSUM; |
2819 | 16.2k | } |
2820 | | |
2821 | | int IntraSearch::xSpeedUpISP(int speed, bool& testISP, int mode, int& noISP, int& endISP, CodingUnit& cu, static_vector<ModeInfo, FAST_UDI_MAX_RDMODE_NUM>& RdModeList, const ModeInfo& bestPUMode, int bestISP, int bestLfnstIdx) |
2822 | 14.6k | { |
2823 | 14.6k | if (speed) |
2824 | 5.95k | { |
2825 | 5.95k | if (mode >= 1) |
2826 | 3.12k | { |
2827 | 3.12k | if (m_ispTestedModes[0].splitIsFinished[1] && m_ispTestedModes[0].splitIsFinished[0]) |
2828 | 0 | { |
2829 | 0 | testISP = false; |
2830 | 0 | endISP = 0; |
2831 | 0 | } |
2832 | 3.12k | else |
2833 | 3.12k | { |
2834 | 3.12k | if (m_pcEncCfg->m_ISP >= 2) |
2835 | 3.12k | { |
2836 | 3.12k | if (mode == 1) //best Hor||Ver |
2837 | 2.82k | { |
2838 | 2.82k | int bestDir = 0; |
2839 | 8.47k | for (int d = 0; d < 2; d++) |
2840 | 5.65k | { |
2841 | 5.65k | int d2 = d ? 0 : 1; |
2842 | 5.65k | if ((m_ispTestedModes[0].bestCost[d] <= m_ispTestedModes[0].bestCost[d2]) |
2843 | 5.34k | && (m_ispTestedModes[0].bestCost[d] != MAX_DOUBLE)) |
2844 | 301 | { |
2845 | 301 | bestDir = d + 1; |
2846 | 301 | m_ispTestedModes[0].splitIsFinished[d2] = true; |
2847 | 301 | } |
2848 | 5.65k | } |
2849 | 2.82k | m_ispTestedModes[0].bestModeSoFar = bestDir; |
2850 | 2.82k | if (m_ispTestedModes[0].bestModeSoFar <= 0) |
2851 | 2.52k | { |
2852 | 2.52k | m_ispTestedModes[0].splitIsFinished[1] = true; |
2853 | 2.52k | m_ispTestedModes[0].splitIsFinished[0] = true; |
2854 | 2.52k | testISP = false; |
2855 | 2.52k | endISP = 0; |
2856 | 2.52k | } |
2857 | 2.82k | } |
2858 | 3.12k | if (m_ispTestedModes[0].bestModeSoFar == 2) |
2859 | 74 | { |
2860 | 74 | noISP = 1; |
2861 | 74 | } |
2862 | 3.05k | else |
2863 | 3.05k | { |
2864 | 3.05k | endISP = 1; |
2865 | 3.05k | } |
2866 | 3.12k | } |
2867 | 3.12k | } |
2868 | 3.12k | } |
2869 | 5.95k | if (testISP) |
2870 | 3.42k | { |
2871 | 3.42k | if (mode == 2) |
2872 | 301 | { |
2873 | 903 | for (int d = 0; d < 2; d++) |
2874 | 602 | { |
2875 | 602 | int d2 = d ? 0 : 1; |
2876 | 602 | if (m_ispTestedModes[0].bestCost[d] == MAX_DOUBLE) |
2877 | 278 | { |
2878 | 278 | m_ispTestedModes[0].splitIsFinished[d] = true; |
2879 | 278 | } |
2880 | 602 | if ((m_ispTestedModes[0].bestCost[d2] < 1.3 * m_ispTestedModes[0].bestCost[d]) |
2881 | 324 | && (int(m_ispTestedModes[0].bestSplitSoFar) != (d + 1))) |
2882 | 248 | { |
2883 | 248 | if (d) |
2884 | 211 | { |
2885 | 211 | endISP = 1; |
2886 | 211 | } |
2887 | 37 | else |
2888 | 37 | { |
2889 | 37 | noISP = 1; |
2890 | 37 | } |
2891 | 248 | m_ispTestedModes[0].splitIsFinished[d] = true; |
2892 | 248 | } |
2893 | 602 | } |
2894 | 301 | } |
2895 | 3.12k | else |
2896 | 3.12k | { |
2897 | 3.12k | if (m_ispTestedModes[0].splitIsFinished[0]) |
2898 | 37 | { |
2899 | 37 | noISP = 1; |
2900 | 37 | } |
2901 | 3.12k | if (m_ispTestedModes[0].splitIsFinished[1]) |
2902 | 264 | { |
2903 | 264 | endISP = 1; |
2904 | 264 | } |
2905 | 3.12k | } |
2906 | 3.42k | } |
2907 | 5.95k | if ((noISP == 1) && (endISP == 1)) |
2908 | 23 | { |
2909 | 23 | endISP = 0; |
2910 | 23 | } |
2911 | 5.95k | } |
2912 | 8.74k | else |
2913 | 8.74k | { |
2914 | 8.74k | bool stopFound = false; |
2915 | 8.74k | if (m_pcEncCfg->m_ISP >= 3) |
2916 | 8.74k | { |
2917 | 8.74k | if (mode) |
2918 | 3.10k | { |
2919 | 3.10k | if ((bestISP == 0) || ((bestPUMode.modeId != RdModeList[mode - 1].modeId) |
2920 | 97 | && (bestPUMode.modeId != RdModeList[mode].modeId))) |
2921 | 2.16k | { |
2922 | 2.16k | stopFound = true; |
2923 | 2.16k | } |
2924 | 3.10k | } |
2925 | 8.74k | } |
2926 | 8.74k | if (cu.mipFlag || cu.multiRefIdx) |
2927 | 182 | { |
2928 | 182 | cu.mipFlag = false; |
2929 | 182 | cu.multiRefIdx = 0; |
2930 | 182 | if (!stopFound) |
2931 | 0 | { |
2932 | 0 | for (int k = 0; k < mode; k++) |
2933 | 0 | { |
2934 | 0 | if (cu.intraDir[CH_L] == RdModeList[k].modeId) |
2935 | 0 | { |
2936 | 0 | stopFound = true; |
2937 | 0 | break; |
2938 | 0 | } |
2939 | 0 | } |
2940 | 0 | } |
2941 | 182 | } |
2942 | 8.74k | if (stopFound) |
2943 | 2.16k | { |
2944 | 2.16k | testISP = false; |
2945 | 2.16k | endISP = 0; |
2946 | 2.16k | return 1; |
2947 | 2.16k | } |
2948 | 6.58k | if (!stopFound && (m_pcEncCfg->m_ISP >= 2) && (cu.intraDir[CH_L] == DC_IDX)) |
2949 | 948 | { |
2950 | 948 | stopFound = true; |
2951 | 948 | endISP = 0; |
2952 | 948 | return 1; |
2953 | 948 | } |
2954 | 6.58k | } |
2955 | 11.5k | return 0; |
2956 | 14.6k | } |
2957 | | |
2958 | | void IntraSearch::xSpeedUpIntra(double bestcost, int& EndMode, int& speedIntra, CodingUnit& cu) |
2959 | 27.1k | { |
2960 | 27.1k | int bestIdxbefore = m_ispTestedModes[0].bestIntraMode; |
2961 | 27.1k | if (m_ispTestedModes[0].isIntra) |
2962 | 0 | { |
2963 | 0 | if (bestIdxbefore == 1)//ISP |
2964 | 0 | { |
2965 | 0 | speedIntra = 14; |
2966 | 0 | } |
2967 | 0 | if (bestIdxbefore == 4)//MTS |
2968 | 0 | { |
2969 | 0 | speedIntra = 3; |
2970 | 0 | } |
2971 | 0 | } |
2972 | 27.1k | else if (!cu.cs->slice->isIntra()) |
2973 | 0 | { |
2974 | 0 | if (bestcost != MAX_DOUBLE) |
2975 | 0 | { |
2976 | 0 | speedIntra = 10; |
2977 | 0 | } |
2978 | 0 | } |
2979 | 27.1k | if (m_ispTestedModes[0].bestBefore[0] == -1) |
2980 | 24.3k | { |
2981 | 24.3k | speedIntra |= 7; |
2982 | 24.3k | if (m_pcEncCfg->m_FastIntraTools == 2) |
2983 | 0 | { |
2984 | 0 | EndMode = 1; |
2985 | 0 | } |
2986 | 24.3k | } |
2987 | 27.1k | if (!cu.cs->slice->isIntra()) |
2988 | 0 | { |
2989 | 0 | if ((m_ispTestedModes[0].bestBefore[1] == 1) || (m_ispTestedModes[0].bestBefore[2] == 1)) |
2990 | 0 | { |
2991 | 0 | speedIntra |= 2; |
2992 | 0 | } |
2993 | 0 | if ((m_ispTestedModes[0].bestBefore[1] == 4) || (m_ispTestedModes[0].bestBefore[2] == 4)) |
2994 | 0 | { |
2995 | 0 | speedIntra |= 3; |
2996 | 0 | } |
2997 | 0 | if ((m_ispTestedModes[0].bestBefore[1] == 2) || (m_ispTestedModes[0].bestBefore[2] == 2)) |
2998 | 0 | { |
2999 | 0 | speedIntra |= 1; |
3000 | 0 | } |
3001 | 0 | } |
3002 | 27.1k | } |
3003 | | |
3004 | | } // namespace vvenc |
3005 | | |
3006 | | //! \} |
3007 | | |