/src/vvenc/source/Lib/CommonLib/QuantRDOQ2.cpp
Line | Count | Source |
1 | | /* ----------------------------------------------------------------------------- |
2 | | The copyright in this software is being made available under the Clear BSD |
3 | | License, included below. No patent rights, trademark rights and/or |
4 | | other Intellectual Property Rights other than the copyrights concerning |
5 | | the Software are granted under this license. |
6 | | |
7 | | The Clear BSD License |
8 | | |
9 | | Copyright (c) 2019-2026, Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. & The VVenC Authors. |
10 | | All rights reserved. |
11 | | |
12 | | Redistribution and use in source and binary forms, with or without modification, |
13 | | are permitted (subject to the limitations in the disclaimer below) provided that |
14 | | the following conditions are met: |
15 | | |
16 | | * Redistributions of source code must retain the above copyright notice, |
17 | | this list of conditions and the following disclaimer. |
18 | | |
19 | | * Redistributions in binary form must reproduce the above copyright |
20 | | notice, this list of conditions and the following disclaimer in the |
21 | | documentation and/or other materials provided with the distribution. |
22 | | |
23 | | * Neither the name of the copyright holder nor the names of its |
24 | | contributors may be used to endorse or promote products derived from this |
25 | | software without specific prior written permission. |
26 | | |
27 | | NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY |
28 | | THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND |
29 | | CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
30 | | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A |
31 | | PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR |
32 | | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
33 | | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
34 | | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR |
35 | | BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER |
36 | | IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
37 | | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
38 | | POSSIBILITY OF SUCH DAMAGE. |
39 | | |
40 | | |
41 | | ------------------------------------------------------------------------------------------- */ |
42 | | |
43 | | |
44 | | /** \file QuantRDOQ2.cpp |
45 | | \brief transform and quantization class |
46 | | */ |
47 | | |
48 | | #include "QuantRDOQ2.h" |
49 | | #include "UnitTools.h" |
50 | | #include "ContextModelling.h" |
51 | | #include "CodingStructure.h" |
52 | | #include "dtrace_next.h" |
53 | | #include "dtrace_buffer.h" |
54 | | |
55 | | #include <stdlib.h> |
56 | | #include <memory.h> |
57 | | |
58 | | #if defined( TARGET_SIMD_X86 ) |
59 | | # include "CommonDefX86.h" |
60 | | # include <simde/x86/sse4.1.h> |
61 | | #endif |
62 | | |
63 | | //! \ingroup CommonLib |
64 | | //! \{ |
65 | | |
66 | | namespace vvenc { |
67 | | |
68 | | struct coeffGroupRDStats |
69 | | { |
70 | | int iNNZbeforePos0; |
71 | | double d64CodedLevelandDist; // distortion and level cost only |
72 | | double d64UncodedDist; // all zero coded block distortion |
73 | | double d64SigCost; |
74 | | double d64SigCost_0; |
75 | | }; |
76 | | |
77 | | |
78 | | //! \ingroup CommonLib |
79 | | //! \{ |
80 | | |
81 | | // ==================================================================================================================== |
82 | | // Constants |
83 | | // ==================================================================================================================== |
84 | 0 | #define COEFF_ERR_SCALE_PRECISION_BITS 20 |
85 | | |
86 | | //! \} |
87 | | |
88 | 0 | QuantRDOQ2::QuantRDOQ2( const Quant* other, bool useScalingLists ) : QuantRDOQ( other, useScalingLists ), m_isErrScaleListOwner( false ), m_iLambda( 0 ) |
89 | 0 | { |
90 | 0 | const QuantRDOQ2 *rdoq2 = dynamic_cast<const QuantRDOQ2*>( other ); |
91 | 0 | CHECK( other && !rdoq2, "The RDOQ cast must be successfull!" ); |
92 | 0 | xInitScalingList( rdoq2 ); |
93 | 0 | } |
94 | | |
95 | | QuantRDOQ2::~QuantRDOQ2() |
96 | 0 | { |
97 | 0 | xDestroyScalingList(); |
98 | 0 | } |
99 | | |
100 | | |
101 | | /** initialization process of scaling list array |
102 | | */ |
103 | | void QuantRDOQ2::xInitScalingList( const QuantRDOQ2* other ) |
104 | 0 | { |
105 | 0 | m_isErrScaleListOwner = other == nullptr; |
106 | |
|
107 | 0 | const bool useScalingLists = getScalingListEnabled(); |
108 | |
|
109 | 0 | for(uint32_t sizeIdX = 0; sizeIdX < SCALING_LIST_SIZE_NUM; sizeIdX++) |
110 | 0 | { |
111 | 0 | for(uint32_t sizeIdY = 0; sizeIdY < SCALING_LIST_SIZE_NUM; sizeIdY++) |
112 | 0 | { |
113 | 0 | for(uint32_t qp = 0; qp < SCALING_LIST_REM_NUM; qp++) |
114 | 0 | { |
115 | 0 | for(uint32_t listId = 0; listId < SCALING_LIST_NUM; listId++) |
116 | 0 | { |
117 | 0 | if( m_isErrScaleListOwner ) |
118 | 0 | { |
119 | 0 | m_errScale[sizeIdX][sizeIdY][listId][qp] = useScalingLists ? new int[g_scalingListSizeX[sizeIdX] * g_scalingListSizeX[sizeIdY]] : nullptr; |
120 | 0 | } |
121 | 0 | else |
122 | 0 | { |
123 | 0 | m_errScale[sizeIdX][sizeIdY][listId][qp] = other->m_errScale[sizeIdX][sizeIdY][listId][qp]; |
124 | 0 | } |
125 | 0 | } // listID loop |
126 | 0 | } |
127 | 0 | } |
128 | 0 | } |
129 | 0 | } |
130 | | |
131 | | /** destroy quantization matrix array |
132 | | */ |
133 | | void QuantRDOQ2::xDestroyScalingList() |
134 | 0 | { |
135 | 0 | if( !m_isErrScaleListOwner ) return; |
136 | | |
137 | 0 | for(uint32_t sizeIdX = 0; sizeIdX < SCALING_LIST_SIZE_NUM; sizeIdX++) |
138 | 0 | { |
139 | 0 | for(uint32_t sizeIdY = 0; sizeIdY < SCALING_LIST_SIZE_NUM; sizeIdY++) |
140 | 0 | { |
141 | 0 | for(uint32_t listId = 0; listId < SCALING_LIST_NUM; listId++) |
142 | 0 | { |
143 | 0 | for(uint32_t qp = 0; qp < SCALING_LIST_REM_NUM; qp++) |
144 | 0 | { |
145 | 0 | if(m_errScale[sizeIdX][sizeIdY][listId][qp]) |
146 | 0 | { |
147 | 0 | delete [] m_errScale[sizeIdX][sizeIdY][listId][qp]; |
148 | 0 | } |
149 | 0 | } |
150 | 0 | } |
151 | 0 | } |
152 | 0 | } |
153 | | // Quant::destroyScalingList(); |
154 | 0 | } |
155 | | |
156 | | int QuantRDOQ2::xGetErrScaleCoeff( const bool needsSqrt2, SizeType width, SizeType height, int qp, const int maxLog2TrDynamicRange, const int channelBitDepth ) |
157 | 0 | { |
158 | 0 | const int iTransformShift = getTransformShift(channelBitDepth, Size(width, height), maxLog2TrDynamicRange); |
159 | 0 | double dErrScale = (double)(1 << SCALE_BITS); // Compensate for scaling of bitcount in Lagrange cost function |
160 | 0 | double dTransShift = (double)iTransformShift + (needsSqrt2 ? -0.5 : 0.0); |
161 | 0 | dErrScale = dErrScale * pow(2.0, (-2.0*dTransShift)); // Compensate for scaling through forward transform |
162 | 0 | const int QStep = g_quantScales[needsSqrt2 ? 1 : 0][qp]; |
163 | 0 | double finalErrScale = dErrScale / QStep / QStep / (1 << (DISTORTION_PRECISION_ADJUSTMENT(channelBitDepth) << 1)); |
164 | 0 | return finalErrScale; |
165 | 0 | } |
166 | | |
167 | | /** set error scale coefficients |
168 | | * \param list list ID |
169 | | * \param size |
170 | | * \param qp quantization parameter |
171 | | * \param maxLog2TrDynamicRange |
172 | | * \param bitDepths reference to bit depth array for all channels |
173 | | */ |
174 | | void QuantRDOQ2::xSetErrScaleCoeff( unsigned list, unsigned sizeX, unsigned sizeY, int qp, const int maxLog2TrDynamicRange[MAX_NUM_CH], const BitDepths &bitDepths ) |
175 | 0 | { |
176 | 0 | const int width = g_scalingListSizeX[sizeX]; |
177 | 0 | const int height = g_scalingListSizeX[sizeY]; |
178 | 0 | const ChannelType channelType = ((list == 0) || (list == MAX_NUM_COMP)) ? CH_L : CH_C; |
179 | 0 | const int channelBitDepth = bitDepths.recon[channelType]; |
180 | 0 | const int iTransformShift = getTransformShift( channelBitDepth, Size( width, height ), maxLog2TrDynamicRange[channelType] ); |
181 | 0 | const double dTransShift = (double)iTransformShift; |
182 | |
|
183 | 0 | double dErrScale = pow( 2.0, ( (double)SCALE_BITS / 2.0 ) ); // Compensate for scaling of bitcount in Lagrange cost function |
184 | 0 | dErrScale = dErrScale*pow( 2.0, ( -/*2.0**/( dTransShift ) ) ); // Compensate for scaling through forward transform |
185 | |
|
186 | 0 | if( getScalingListEnabled() ) |
187 | 0 | { |
188 | 0 | const unsigned uiMaxNumCoeff = g_scalingListSizeX[sizeX] * g_scalingListSizeX[sizeY]; |
189 | 0 | const int *piQuantCoeff = getQuantCoeff( list, qp, sizeX, sizeY ); |
190 | 0 | int *piErrScale = xGetErrScaleCoeffSL( list, sizeX, sizeY, qp ); |
191 | |
|
192 | 0 | for( unsigned i = 0; i < uiMaxNumCoeff; i++ ) |
193 | 0 | { |
194 | 0 | int QStep = piQuantCoeff[i]; |
195 | 0 | double errScale = dErrScale / QStep / (1 << (DISTORTION_PRECISION_ADJUSTMENT( channelBitDepth ) /*<< 1*/)); // (1 << ( /*2 **/ (bitDepths.recon[channelType] - 8))); |
196 | 0 | piErrScale[i] = ( int ) (errScale * ( double ) (1 << COEFF_ERR_SCALE_PRECISION_BITS)); |
197 | 0 | } |
198 | 0 | } |
199 | |
|
200 | 0 | xSetErrScaleCoeffNoScalingList( list, sizeX, sizeY, qp, maxLog2TrDynamicRange, bitDepths ); |
201 | 0 | } |
202 | | |
203 | | void QuantRDOQ2::xSetErrScaleCoeffNoScalingList( unsigned list, unsigned wIdx, unsigned hIdx, int qp, const int maxLog2TrDynamicRange[MAX_NUM_CH], const BitDepths &bitDepths ) |
204 | 0 | { |
205 | 0 | const int width = g_scalingListSizeX[wIdx]; |
206 | 0 | const int height = g_scalingListSizeX[hIdx]; |
207 | 0 | const ChannelType channelType = ( ( list == 0 ) || ( list == MAX_NUM_COMP ) ) ? CH_L : CH_C; |
208 | 0 | const int channelBitDepth = bitDepths.recon[channelType]; |
209 | 0 | const int iTransformShift = getTransformShift( channelBitDepth, Size( width, height ), maxLog2TrDynamicRange[channelType] ); |
210 | 0 | const bool needsSqrt2 = ((Log2(width*height)) & 1) == 1; |
211 | 0 | double dTransShift = (double)iTransformShift + ( needsSqrt2 ? -0.5 : 0.0 ); |
212 | |
|
213 | 0 | double dErrScale = pow( 2.0, ( (double)SCALE_BITS / 2.0 ) ); // Compensate for scaling of bitcount in Lagrange cost function |
214 | 0 | dErrScale = dErrScale*pow( 2.0, ( -( dTransShift ) ) ); // Compensate for scaling through forward transform |
215 | 0 | int QStep = g_quantScales[needsSqrt2][qp]; |
216 | |
|
217 | 0 | double errScale = dErrScale / QStep /*/ QStep*/ / (1 << (DISTORTION_PRECISION_ADJUSTMENT( channelBitDepth ) /*<< 1*/)); |
218 | 0 | xGetErrScaleCoeffNoScalingList( list, wIdx, hIdx, qp ) = (int)( errScale * (double)( 1 << COEFF_ERR_SCALE_PRECISION_BITS ) ); |
219 | 0 | } |
220 | | |
221 | | |
222 | | /** set flat matrix value to quantized coefficient |
223 | | */ |
224 | | void QuantRDOQ2::setFlatScalingList(const int maxLog2TrDynamicRange[MAX_NUM_CH], const BitDepths &bitDepths) |
225 | 0 | { |
226 | 0 | QuantRDOQ::setFlatScalingList( maxLog2TrDynamicRange, bitDepths ); |
227 | |
|
228 | 0 | const int minimumQp = 0; |
229 | 0 | const int maximumQp = SCALING_LIST_REM_NUM; |
230 | |
|
231 | 0 | for(uint32_t sizeX = 0; sizeX < SCALING_LIST_SIZE_NUM; sizeX++) |
232 | 0 | { |
233 | 0 | for(uint32_t sizeY = 0; sizeY < SCALING_LIST_SIZE_NUM; sizeY++) |
234 | 0 | { |
235 | 0 | for(uint32_t list = 0; list < SCALING_LIST_NUM; list++) |
236 | 0 | { |
237 | 0 | for(int qp = minimumQp; qp < maximumQp; qp++) |
238 | 0 | { |
239 | 0 | xSetErrScaleCoeff( list, sizeX, sizeY, qp, maxLog2TrDynamicRange, bitDepths ); |
240 | 0 | } |
241 | 0 | } |
242 | 0 | } |
243 | 0 | } |
244 | 0 | } |
245 | | |
246 | | |
247 | | void QuantRDOQ2::quant( TransformUnit &tu, const ComponentID compID, const CCoeffBuf &pSrc, TCoeff &uiAbsSum, const QpParam &cQP, const Ctx& ctx ) |
248 | 0 | { |
249 | 0 | if( m_RDOQ == 1 ) |
250 | 0 | { |
251 | 0 | QuantRDOQ::quant( tu, compID, pSrc, uiAbsSum, cQP, ctx ); |
252 | 0 | return; |
253 | 0 | } |
254 | | |
255 | 0 | const CompArea& rect = tu.blocks[compID]; |
256 | 0 | const uint32_t uiWidth = rect.width; |
257 | 0 | const uint32_t uiHeight = rect.height; |
258 | |
|
259 | 0 | const CCoeffBuf& piCoef = pSrc; |
260 | |
|
261 | 0 | const bool useTransformSkip = tu.mtsIdx[compID]==MTS_SKIP; |
262 | |
|
263 | 0 | bool useRDOQ = useTransformSkip ? m_useRDOQTS : m_RDOQ > 0; |
264 | |
|
265 | 0 | if( !tu.cu->ispMode || !isLuma(compID) ) |
266 | 0 | { |
267 | 0 | useRDOQ &= uiWidth > 2; |
268 | 0 | useRDOQ &= uiHeight > 2; |
269 | 0 | } |
270 | |
|
271 | 0 | if( useRDOQ ) |
272 | 0 | { |
273 | 0 | if( !tu.cs->picture->useSelectiveRdoq || xNeedRDOQ( tu, compID, piCoef, cQP ) ) |
274 | 0 | { |
275 | 0 | if( useTransformSkip ) |
276 | 0 | { |
277 | 0 | if( tu.cu->bdpcmM[toChannelType( compID )] ) |
278 | 0 | { |
279 | 0 | forwardRDPCM( tu, compID, pSrc, uiAbsSum, cQP, ctx ); |
280 | 0 | } |
281 | 0 | else |
282 | 0 | { |
283 | 0 | rateDistOptQuantTS( tu, compID, pSrc, uiAbsSum, cQP, ctx ); |
284 | 0 | } |
285 | 0 | } |
286 | 0 | else |
287 | 0 | { |
288 | 0 | xRateDistOptQuant( tu, compID, pSrc, uiAbsSum, cQP, ctx, getScalingListEnabled() ); |
289 | 0 | } |
290 | 0 | } |
291 | 0 | else |
292 | 0 | { |
293 | 0 | uiAbsSum = 0; |
294 | 0 | tu.lastPos[compID] = -1; |
295 | 0 | } |
296 | 0 | } |
297 | 0 | else |
298 | 0 | { |
299 | 0 | Quant::quant( tu, compID, pSrc, uiAbsSum, cQP, ctx ); |
300 | 0 | } |
301 | 0 | } |
302 | | |
303 | | inline cost_t QuantRDOQ2::xiGetICost(int iRate ) const |
304 | 0 | { |
305 | 0 | return (cost_t)(m_dLambda * iRate); |
306 | 0 | } |
307 | | |
308 | | inline cost_t QuantRDOQ2::xGetIEPRate() const |
309 | 0 | { |
310 | 0 | return 1 << SCALE_BITS; |
311 | 0 | } |
312 | | |
313 | | /** Calculates the cost for specific absolute transform level |
314 | | * \param uiAbsLevel scaled quantized level |
315 | | * \param ui16CtxNumOne current ctxInc for coeff_abs_level_greater1 (1st bin of coeff_abs_level_minus1 in AVC) |
316 | | * \param ui16CtxNumAbs current ctxInc for coeff_abs_level_greater2 (remaining bins of coeff_abs_level_minus1 in AVC) |
317 | | * \param ui16AbsGoRice Rice parameter for coeff_abs_level_minus3 |
318 | | * \returns cost of given absolute transform level |
319 | | */ |
320 | | inline cost_t QuantRDOQ2::xiGetICRateCost( const uint32_t uiAbsLevel, |
321 | | const BinFracBits& fracBitsPar, |
322 | | const BinFracBits& fracBitsGt1, |
323 | | const BinFracBits& fracBitsGt2, |
324 | | const int remRegBins, |
325 | | unsigned goRiceZero, |
326 | | const uint16_t ui16AbsGoRice, |
327 | | const int maxLog2TrDynamicRange ) const |
328 | 0 | { |
329 | 0 | cost_t iRate = xGetIEPRate(); |
330 | 0 | if( remRegBins < 4 ) |
331 | 0 | { |
332 | 0 | uint32_t symbol = ( uiAbsLevel == 0 ? goRiceZero : uiAbsLevel <= goRiceZero ? uiAbsLevel-1 : uiAbsLevel ); |
333 | 0 | uint32_t length; |
334 | 0 | const int threshold = COEF_REMAIN_BIN_REDUCTION; |
335 | 0 | if( symbol < ( threshold << ui16AbsGoRice ) ) |
336 | 0 | { |
337 | 0 | length = symbol >> ui16AbsGoRice; |
338 | 0 | iRate += ( length + 1 + ui16AbsGoRice ) << SCALE_BITS; |
339 | 0 | } |
340 | 0 | else |
341 | 0 | { |
342 | 0 | length = ui16AbsGoRice; |
343 | 0 | symbol = symbol - ( threshold << ui16AbsGoRice ); |
344 | 0 | while( symbol >= ( 1 << length ) ) |
345 | 0 | { |
346 | 0 | symbol -= ( 1 << ( length++ ) ); |
347 | 0 | } |
348 | 0 | iRate += ( threshold + length + 1 - ui16AbsGoRice + length ) << SCALE_BITS; |
349 | 0 | } |
350 | 0 | } |
351 | 0 | else |
352 | 0 | { |
353 | 0 | const uint32_t cthres = 4; |
354 | 0 | if( uiAbsLevel >= cthres ) |
355 | 0 | { |
356 | 0 | uint32_t symbol = ( uiAbsLevel - cthres ) >> 1; |
357 | 0 | uint32_t length; |
358 | 0 | const int threshold = COEF_REMAIN_BIN_REDUCTION; |
359 | 0 | if( symbol < ( threshold << ui16AbsGoRice ) ) |
360 | 0 | { |
361 | 0 | length = symbol >> ui16AbsGoRice; |
362 | 0 | iRate += ( length + 1 + ui16AbsGoRice ) << SCALE_BITS; |
363 | 0 | } |
364 | 0 | else |
365 | 0 | { |
366 | 0 | length = ui16AbsGoRice; |
367 | 0 | symbol = symbol - ( threshold << ui16AbsGoRice ); |
368 | 0 | while( symbol >= ( 1 << length ) ) |
369 | 0 | { |
370 | 0 | symbol -= ( 1 << ( length++ ) ); |
371 | 0 | } |
372 | 0 | iRate += ( threshold + length + 1 - ui16AbsGoRice + length ) << SCALE_BITS; |
373 | 0 | } |
374 | |
|
375 | 0 | iRate += fracBitsGt1.intBits[1]; |
376 | 0 | iRate += fracBitsPar.intBits[( uiAbsLevel - 2 ) & 1]; |
377 | 0 | iRate += fracBitsGt2.intBits[1]; |
378 | 0 | } |
379 | 0 | else if( uiAbsLevel == 1 ) |
380 | 0 | { |
381 | 0 | iRate += fracBitsGt1.intBits[0]; |
382 | 0 | } |
383 | 0 | else if( uiAbsLevel == 2 ) |
384 | 0 | { |
385 | 0 | iRate += fracBitsGt1.intBits[1]; |
386 | 0 | iRate += fracBitsPar.intBits[0]; |
387 | 0 | iRate += fracBitsGt2.intBits[0]; |
388 | 0 | } |
389 | 0 | else if( uiAbsLevel == 3 ) |
390 | 0 | { |
391 | 0 | iRate += fracBitsGt1.intBits[1]; |
392 | 0 | iRate += fracBitsPar.intBits[1]; |
393 | 0 | iRate += fracBitsGt2.intBits[0]; |
394 | 0 | } |
395 | 0 | else |
396 | 0 | { |
397 | 0 | iRate = 0; |
398 | 0 | } |
399 | 0 | } |
400 | 0 | return xiGetICost( (int)iRate ); |
401 | 0 | } |
402 | | |
403 | | inline cost_t QuantRDOQ2::xiGetCostSigCoeffGroup( const BinFracBits& fracBitsSigCG, unsigned uiSignificanceCoeffGroup ) const |
404 | 0 | { |
405 | 0 | return xiGetICost( fracBitsSigCG.intBits[uiSignificanceCoeffGroup] ); |
406 | 0 | } |
407 | | |
408 | | void QuantRDOQ2::xInitLastPosBitsTab( const CoeffCodingContext& cctx, const uint32_t uiWidth, const uint32_t uiHeight, const ChannelType chType, const FracBitsAccess& fracBits ) |
409 | 0 | { |
410 | 0 | int dim1 = std::min<int>(JVET_C0024_ZERO_OUT_TH, uiWidth); |
411 | 0 | int dim2 = std::min<int>(JVET_C0024_ZERO_OUT_TH, uiHeight); |
412 | |
|
413 | 0 | int bitsX = 0; |
414 | 0 | int bitsY = 0; |
415 | 0 | int ctxId; |
416 | | |
417 | | //X-coordinate |
418 | 0 | for( ctxId = 0; ctxId < g_uiGroupIdx[dim1 - 1]; ctxId++ ) |
419 | 0 | { |
420 | 0 | const BinFracBits fB = fracBits.getFracBitsArray( cctx.lastXCtxId( ctxId ) ); |
421 | 0 | m_lastBitsX[chType][ctxId] = bitsX + fB.intBits[0]; |
422 | 0 | bitsX += fB.intBits[1]; |
423 | 0 | } |
424 | 0 | m_lastBitsX[chType][ctxId] = bitsX; |
425 | | |
426 | | //Y-coordinate |
427 | 0 | for( ctxId = 0; ctxId < g_uiGroupIdx[dim2 - 1]; ctxId++ ) |
428 | 0 | { |
429 | 0 | const BinFracBits fB = fracBits.getFracBitsArray( cctx.lastYCtxId( ctxId ) ); |
430 | 0 | m_lastBitsY[chType][ctxId] = bitsY + fB.intBits[0]; |
431 | 0 | bitsY += fB.intBits[1]; |
432 | 0 | } |
433 | 0 | m_lastBitsY[chType][ctxId] = bitsY; |
434 | 0 | } |
435 | | |
436 | | |
437 | | /** Calculates the cost of signaling the last significant coefficient in the block |
438 | | * \param uiPosX X coordinate of the last significant coefficient |
439 | | * \param uiPosY Y coordinate of the last significant coefficient |
440 | | * \returns cost of last significant coefficient |
441 | | */ |
442 | | /* |
443 | | * \param uiWidth width of the transform unit (TU) |
444 | | */ |
445 | | inline cost_t QuantRDOQ2::xiGetCostLast( const uint32_t uiPosX, const uint32_t uiPosY, const ChannelType chType ) const |
446 | 0 | { |
447 | 0 | uint32_t uiCtxX = g_uiGroupIdx[uiPosX]; |
448 | 0 | uint32_t uiCtxY = g_uiGroupIdx[uiPosY]; |
449 | |
|
450 | 0 | uint32_t uiCost = m_lastBitsX[chType][uiCtxX] + m_lastBitsY[chType][uiCtxY]; |
451 | |
|
452 | 0 | if( uiCtxX > 3 ) |
453 | 0 | { |
454 | 0 | uiCost += xGetIEPRate() * ( ( uiCtxX - 2 ) >> 1 ); |
455 | 0 | } |
456 | 0 | if( uiCtxY > 3 ) |
457 | 0 | { |
458 | 0 | uiCost += xGetIEPRate() * ( ( uiCtxY - 2 ) >> 1 ); |
459 | 0 | } |
460 | 0 | return xiGetICost( (int)uiCost ); |
461 | 0 | } |
462 | | |
463 | | inline cost_t QuantRDOQ2::xiGetCostSigCoef( const BinFracBits& fracBitsSig, unsigned uiSignificance ) const |
464 | 0 | { |
465 | 0 | return xiGetICost( fracBitsSig.intBits[uiSignificance] ); |
466 | 0 | } |
467 | | |
468 | | static inline cost_t _dist( cost_t iErr, cost_t iErrScale, int64_t iErrScaleShift ) |
469 | 0 | { |
470 | 0 | int64_t iSqrtErrCost = ( iErr*iErrScale ) >> iErrScaleShift; |
471 | 0 | int64_t iDist = iSqrtErrCost*iSqrtErrCost; |
472 | 0 | return iDist; |
473 | 0 | } |
474 | | |
475 | | template< bool bSBH, bool bUseScalingList > |
476 | | int QuantRDOQ2::xRateDistOptQuantFast( TransformUnit &tu, const ComponentID &compID, const CCoeffBuf &pSrc, TCoeff &uiAbsSum, const QpParam &cQP, const Ctx &ctx ) |
477 | 0 | { |
478 | 0 | CoeffCodingContext cctx( tu, compID, bSBH, false, m_tplBuf ); |
479 | 0 | const FracBitsAccess& fracBits = ctx.getFracBitsAcess(); |
480 | |
|
481 | 0 | const SPS &sps = *tu.cs->sps; |
482 | 0 | const CompArea &rect = tu.blocks[compID]; |
483 | 0 | const uint32_t uiWidth = rect.width; |
484 | 0 | const uint32_t uiHeight = rect.height; |
485 | 0 | const ChannelType chType = toChannelType( compID ); |
486 | 0 | const int channelBitDepth = sps.bitDepths[ chType ]; |
487 | |
|
488 | 0 | const int maxLog2TrDynamicRange = sps.getMaxLog2TrDynamicRange(); |
489 | |
|
490 | 0 | if( compID != COMP_Cr || !tu.cbf[COMP_Cb] ) |
491 | 0 | xInitLastPosBitsTab( cctx, uiWidth, uiHeight, chType, fracBits ); |
492 | | |
493 | | /* for 422 chroma blocks, the effective scaling applied during transformation is not a power of 2, hence it cannot be |
494 | | * implemented as a bit-shift (the quantised result will be sqrt(2) * larger than required). Alternatively, adjust the |
495 | | * uiLog2TrSize applied in iTransformShift, such that the result is 1/sqrt(2) the required result (i.e. smaller) |
496 | | * Then a QP+3 (sqrt(2)) or QP-3 (1/sqrt(2)) method could be used to get the required result |
497 | | */ |
498 | | |
499 | | // Represents scaling through forward transform |
500 | 0 | const int iTransformShift = getTransformShift(channelBitDepth, rect.size(), maxLog2TrDynamicRange); |
501 | |
|
502 | 0 | const uint32_t uiLog2BlockWidth = Log2(uiWidth); |
503 | 0 | const uint32_t uiLog2BlockHeight = Log2(uiHeight); |
504 | 0 | const uint32_t uiMaxNumCoeff = uiWidth * uiHeight; |
505 | 0 | const uint32_t log2CGSize = cctx.log2CGSize(); |
506 | |
|
507 | 0 | int scalingListType = getScalingListType( tu.cu->predMode, compID ); |
508 | 0 | CHECK(scalingListType >= SCALING_LIST_NUM, "Invalid scaling list"); |
509 | |
|
510 | 0 | const TCoeff *plSrcCoeff = pSrc.buf; |
511 | 0 | TCoeffSig *piDstCoeff = tu.getCoeffs( compID ).buf; |
512 | |
|
513 | 0 | memset( piDstCoeff, 0, sizeof( *piDstCoeff ) * uiMaxNumCoeff ); |
514 | |
|
515 | 0 | const bool needSqrtAdjustment = TU::needsSqrt2Scale( tu, compID ); |
516 | 0 | const bool isTransformSkip = tu.mtsIdx[compID] == MTS_SKIP; |
517 | 0 | const int *quantScaleList = getQuantCoeff( scalingListType, cQP.rem( isTransformSkip ), uiLog2BlockWidth, uiLog2BlockHeight ); |
518 | 0 | const int defaultQuantScale = g_quantScales[ needSqrtAdjustment ?1:0][cQP.rem( isTransformSkip )]; |
519 | 0 | const int defaultErrScale = xGetErrScaleCoeffNoScalingList( scalingListType, uiLog2BlockWidth, uiLog2BlockHeight, cQP.rem( isTransformSkip ) ); |
520 | 0 | const int *piErrScale = xGetErrScaleCoeffSL ( scalingListType, uiLog2BlockWidth, uiLog2BlockHeight, cQP.rem( isTransformSkip ) ); |
521 | 0 | const int iErrScaleShift = COEFF_ERR_SCALE_PRECISION_BITS; |
522 | 0 | int iQBits = QUANT_SHIFT + cQP.per( isTransformSkip ) + iTransformShift + (needSqrtAdjustment?-1:0); // Right shift of non-RDOQ quantizer; level = (coeff*uiQ + offset)>>q_bits |
523 | 0 | int iQOffset = 1 << ( iQBits - 1 ); |
524 | |
|
525 | 0 | cost_t piCostCoeff [16]; |
526 | 0 | cost_t piCostSig [16]; |
527 | 0 | cost_t piCostCoeff0 [16]; |
528 | 0 | cost_t piCostDeltaSBH[16]; |
529 | 0 | int piAddSBH [16]; |
530 | |
|
531 | 0 | cost_t iCodedCostBlock = 0; |
532 | 0 | cost_t iUncodedCostBlock = 0; |
533 | 0 | int iLastScanPos = -1; |
534 | 0 | int lastSubSetId = -1; |
535 | 0 | bool lastOptFinished = false; |
536 | 0 | cost_t bestTotalCost = std::numeric_limits<cost_t>::max() / 2; |
537 | |
|
538 | 0 | int ctxBinSampleRatio = MAX_TU_LEVEL_CTX_CODED_BIN_CONSTRAINT; |
539 | 0 | int remRegBins = ( tu.getTbAreaAfterCoefZeroOut( compID ) * ctxBinSampleRatio ) >> 4; |
540 | 0 | uint32_t goRiceParam = 0; |
541 | |
|
542 | | #if ENABLE_TRACING |
543 | | bool bFirstNZSeen = false; |
544 | | DTRACE( g_trace_ctx, D_RDOQ, "%d: %3d, %3d, %dx%d, comp=%d\n", DTRACE_GET_COUNTER( g_trace_ctx, D_RDOQ ), rect.x, rect.y, rect.width, rect.height, compID ); |
545 | | #endif |
546 | |
|
547 | 0 | uiAbsSum = 0; |
548 | |
|
549 | 0 | const int iCGSize = 1 << log2CGSize; |
550 | 0 | const int iCGSizeM1 = iCGSize - 1; |
551 | |
|
552 | 0 | const uint32_t lfnstIdx = tu.cu->lfnstIdx; |
553 | 0 | const int iCGNum = lfnstIdx > 0 ? 1 : std::min<int>(JVET_C0024_ZERO_OUT_TH, uiWidth) * std::min<int>(JVET_C0024_ZERO_OUT_TH, uiHeight) >> cctx.log2CGSize(); |
554 | 0 | int iScanPos = ( iCGNum << log2CGSize ) - 1; |
555 | |
|
556 | 0 | if( lfnstIdx > 0 && ( ( uiWidth == 4 && uiHeight == 4 ) || ( uiWidth == 8 && uiHeight == 8 ) ) ) |
557 | 0 | { |
558 | 0 | iScanPos = 7; |
559 | 0 | } |
560 | | |
561 | | // Find first non-zero coeff |
562 | 0 | for( ; iScanPos > 0; iScanPos-- ) |
563 | 0 | { |
564 | 0 | uint32_t uiBlkPos = cctx.blockPos( iScanPos ); |
565 | 0 | if( plSrcCoeff[uiBlkPos] ) |
566 | 0 | break; |
567 | 0 | } |
568 | | |
569 | | ////////////////////////////////////////////////////////////////////////// |
570 | | // Loop over sub-sets (coefficient groups) |
571 | | ////////////////////////////////////////////////////////////////////////// |
572 | | |
573 | 0 | TCoeff thres = 0, useThres = 0; |
574 | | |
575 | 0 | if( iQBits ) |
576 | 0 | thres = TCoeff( ( int64_t( m_thrVal ) << ( iQBits - 1 ) ) ); |
577 | 0 | else |
578 | 0 | thres = TCoeff( ( int64_t( m_thrVal >> 1 ) << iQBits ) ); |
579 | |
|
580 | 0 | if( !bUseScalingList ) |
581 | 0 | { |
582 | 0 | useThres = thres / ( defaultQuantScale << 2 ); |
583 | 0 | } |
584 | |
|
585 | 0 | const bool scanFirstBlk = !bUseScalingList && log2CGSize == 4 && cctx.log2CGWidth() == 2; |
586 | 0 | #if ENABLE_SIMD_OPT_QUANT && defined( TARGET_SIMD_X86 ) |
587 | 0 | const bool isSimd = read_x86_extension_flags() > x86_simd::SCALAR; |
588 | 0 | #endif |
589 | |
|
590 | 0 | int subSetId = iScanPos >> log2CGSize; |
591 | 0 | for( ; subSetId >= 0; subSetId-- ) |
592 | 0 | { |
593 | 0 | int iNZbeforePos0 = 0; |
594 | 0 | int uiAbsSumCG = 0; |
595 | 0 | cost_t iCodedCostCG = 0; |
596 | 0 | cost_t iUncodedCostCG = 0; |
597 | |
|
598 | 0 | int iScanPosinCG = iScanPos & ( iCGSize - 1 ); |
599 | 0 | if( iLastScanPos < 0 ) |
600 | 0 | { |
601 | 0 | #if ENABLE_SIMD_OPT_QUANT && defined( TARGET_SIMD_X86 ) |
602 | | // if more than one 4x4 coding subblock is available, use SIMD to find first subblock with coefficient larger than threshold |
603 | 0 | if( scanFirstBlk && iScanPos >= 16 && isSimd ) |
604 | 0 | { |
605 | | // move the pointer to the beginning of the current subblock |
606 | 0 | const int firstTestPos = iScanPos - iScanPosinCG; |
607 | 0 | uint32_t uiBlkPos = cctx.blockPos( firstTestPos ); |
608 | |
|
609 | 0 | const __m128i xdfTh = _mm_set1_epi32( useThres ); |
610 | | |
611 | | // read first line of the subblock and check for coefficients larger than the threshold |
612 | | // assumming the subblocks are dense 4x4 blocks in raster scan order with the stride of tuPars.m_width |
613 | 0 | __m128i xl0 = _mm_abs_epi32( _mm_loadu_si128( ( const __m128i* ) &plSrcCoeff[uiBlkPos] ) ); |
614 | 0 | __m128i xdf = _mm_cmpgt_epi32( xl0, xdfTh ); |
615 | | |
616 | | // same for the next line in the subblock |
617 | 0 | uiBlkPos += uiWidth; |
618 | 0 | xl0 = _mm_abs_epi32( _mm_loadu_si128( ( const __m128i* ) &plSrcCoeff[uiBlkPos] ) ); |
619 | 0 | xdf = _mm_or_si128( xdf, _mm_cmpgt_epi32( xl0, xdfTh ) ); |
620 | | |
621 | | // and the third line |
622 | 0 | uiBlkPos += uiWidth; |
623 | 0 | xl0 = _mm_abs_epi32( _mm_loadu_si128( ( const __m128i* ) &plSrcCoeff[uiBlkPos] ) ); |
624 | 0 | xdf = _mm_or_si128( xdf, _mm_cmpgt_epi32( xl0, xdfTh ) ); |
625 | | |
626 | | // and the last line |
627 | 0 | uiBlkPos += uiWidth; |
628 | 0 | xl0 = _mm_abs_epi32( _mm_loadu_si128( ( const __m128i* ) &plSrcCoeff[uiBlkPos] ) ); |
629 | 0 | xdf = _mm_or_si128( xdf, _mm_cmpgt_epi32( xl0, xdfTh ) ); |
630 | |
|
631 | 0 | if( _mm_testz_si128( xdf, xdf ) ) |
632 | 0 | { |
633 | 0 | iScanPos -= iScanPosinCG + 1; |
634 | 0 | iScanPosinCG = -1; |
635 | 0 | continue; |
636 | 0 | } |
637 | 0 | } |
638 | 0 | else |
639 | 0 | #endif |
640 | 0 | if( scanFirstBlk && iScanPos >= 16 ) |
641 | 0 | { |
642 | 0 | bool allSmaller = true; |
643 | |
|
644 | 0 | for( int xScanPosinCG = iScanPosinCG, xScanPos = iScanPos; allSmaller && xScanPosinCG >= 0; xScanPosinCG--, xScanPos-- ) |
645 | 0 | { |
646 | 0 | const uint32_t uiBlkPos = cctx.blockPos( xScanPos ); |
647 | 0 | allSmaller &= std::abs( plSrcCoeff[uiBlkPos] ) <= useThres; |
648 | 0 | } |
649 | |
|
650 | 0 | if( allSmaller ) |
651 | 0 | { |
652 | 0 | iScanPos -= iScanPosinCG + 1; |
653 | 0 | iScanPosinCG = -1; |
654 | 0 | continue; |
655 | 0 | } |
656 | 0 | } |
657 | | |
658 | 0 | findlast2: |
659 | | // Fast loop to find last-pos. |
660 | | // No need to add distortion to cost as it would be added to both the coded and uncoded cost |
661 | 0 | for( ; iScanPosinCG >= 0; iScanPosinCG--, iScanPos-- ) |
662 | 0 | { |
663 | 0 | const uint32_t uiBlkPos = cctx.blockPos( iScanPos ); |
664 | | |
665 | | //===== quantization ===== |
666 | 0 | int quantScale; |
667 | 0 | if( bUseScalingList ){ quantScale = quantScaleList[uiBlkPos]; } |
668 | 0 | else{ quantScale = defaultQuantScale; } |
669 | | |
670 | 0 | const uint32_t uiMaxAbsLevel = ( std::abs( plSrcCoeff[uiBlkPos] ) * quantScale + iQOffset ) >> iQBits; |
671 | |
|
672 | 0 | if( uiMaxAbsLevel ) |
673 | 0 | { |
674 | 0 | iLastScanPos = iScanPos; |
675 | 0 | lastSubSetId = subSetId; |
676 | 0 | break; |
677 | 0 | } |
678 | | #if ENABLE_TRACING |
679 | | if( bFirstNZSeen ) |
680 | | { |
681 | | DTRACE( g_trace_ctx, D_RDOQ, "%d [%d][%d][%2d:%2d][%2d:%2d]", DTRACE_GET_COUNTER( g_trace_ctx, D_RDOQ ), iScanPos, cctx.blockPos( iScanPos ), cctx.cgPosX(), cctx.cgPosY(), cctx.posX( iScanPos ), cctx.posY( iScanPos ) ); |
682 | | DTRACE( g_trace_ctx, D_RDOQ, " remRegBins=%d \n", remRegBins ); |
683 | | DTRACE( g_trace_ctx, D_RDOQ, " Lev=%d \n", 0 ); |
684 | | } |
685 | | #endif |
686 | 0 | } |
687 | 0 | } |
688 | | |
689 | | ////////////////////////////////////////////////////////////////////////// |
690 | | // Loop over coefficients |
691 | | ////////////////////////////////////////////////////////////////////////// |
692 | | |
693 | 0 | cctx.initSubblock( subSetId ); |
694 | |
|
695 | 0 | const int remRegBinsStartCG = remRegBins; |
696 | |
|
697 | 0 | for( ; iScanPosinCG >= 0; iScanPosinCG--, iScanPos-- ) |
698 | 0 | { |
699 | 0 | const uint32_t uiBlkPos = cctx.blockPos( iScanPos ); |
700 | 0 | int quantScale; |
701 | 0 | int iErrScale; |
702 | | //===== quantization ===== |
703 | 0 | if( bUseScalingList ){ |
704 | 0 | quantScale = quantScaleList[uiBlkPos]; |
705 | 0 | iErrScale = piErrScale[uiBlkPos]; |
706 | 0 | } |
707 | 0 | else{ |
708 | 0 | quantScale = defaultQuantScale; |
709 | 0 | iErrScale = defaultErrScale; |
710 | 0 | } |
711 | 0 | const int iScaledLevel = std::abs( plSrcCoeff[uiBlkPos] ) * quantScale; |
712 | 0 | const int iAbsLevel = ( iScaledLevel + iQOffset ) >> iQBits; |
713 | | |
714 | | //============ Set context models =============== |
715 | 0 | unsigned ctxIdSig = 0; |
716 | |
|
717 | 0 | if( iScanPos != iLastScanPos ) |
718 | 0 | { |
719 | 0 | ctxIdSig = cctx.sigCtxIdAbsWithAcc( iScanPos, 0 ); |
720 | 0 | } |
721 | 0 | uint8_t ctxOffset = cctx.ctxOffsetAbs(); |
722 | 0 | uint32_t uiParCtx = cctx.parityCtxIdAbs ( ctxOffset ); |
723 | 0 | uint32_t uiGt1Ctx = cctx.greater1CtxIdAbs ( ctxOffset ); |
724 | 0 | uint32_t uiGt2Ctx = cctx.greater2CtxIdAbs ( ctxOffset ); |
725 | 0 | uint32_t goRiceZero = 0; |
726 | |
|
727 | 0 | const BinFracBits& fracBitsPar = fracBits.getFracBitsArray( uiParCtx ); |
728 | 0 | const BinFracBits& fracBitsGt1 = fracBits.getFracBitsArray( uiGt1Ctx ); |
729 | 0 | const BinFracBits& fracBitsGt2 = fracBits.getFracBitsArray( uiGt2Ctx ); |
730 | |
|
731 | 0 | if( remRegBins < 4 ) |
732 | 0 | { |
733 | 0 | unsigned sumAbs = cctx.templateAbsSum( iScanPos, piDstCoeff, 0 ); |
734 | 0 | goRiceParam = g_auiGoRiceParsCoeff [ sumAbs ]; |
735 | 0 | goRiceZero = g_auiGoRicePosCoeff0(0, goRiceParam); |
736 | 0 | } |
737 | |
|
738 | | #if ENABLE_TRACING |
739 | | DTRACE( g_trace_ctx, D_RDOQ, "%d [%d][%d][%2d:%2d][%2d:%2d]", DTRACE_GET_COUNTER( g_trace_ctx, D_RDOQ ), iScanPos, cctx.blockPos( iScanPos ), cctx.cgPosX(), cctx.cgPosY(), cctx.posX( iScanPos ), cctx.posY( iScanPos ) ); |
740 | | DTRACE( g_trace_ctx, D_RDOQ, " remRegBins=%d \n", remRegBins ); |
741 | | bFirstNZSeen = true; |
742 | | #endif |
743 | | |
744 | | // Cost for zero coeff |
745 | 0 | piCostCoeff0[iScanPosinCG] = _dist( iScaledLevel, iErrScale, iErrScaleShift ); |
746 | |
|
747 | 0 | uint32_t uiLevel = 0; |
748 | 0 | if( iAbsLevel == 0 ) |
749 | 0 | { |
750 | | // ----------------- ABS LEVEL 0 ---------------- |
751 | 0 | const BinFracBits fracBitsSig = fracBits.getFracBitsArray( ctxIdSig ); |
752 | 0 | piCostSig [iScanPosinCG] = xiGetCostSigCoef( fracBitsSig, 0 ); |
753 | 0 | piCostCoeff[iScanPosinCG] = piCostCoeff0[iScanPosinCG] + piCostSig[iScanPosinCG]; |
754 | |
|
755 | 0 | if( bSBH ) |
756 | 0 | { |
757 | 0 | cost_t iErr1 = iScaledLevel - ( (int64_t)1 << iQBits ); |
758 | 0 | cost_t iDist1 = _dist( iErr1, iErrScale, iErrScaleShift ); |
759 | 0 | cost_t iRate1 = remRegBins < 4 ? |
760 | 0 | xiGetICRateCost( 1, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, maxLog2TrDynamicRange ) - |
761 | 0 | xiGetICRateCost( 0, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, maxLog2TrDynamicRange ): |
762 | 0 | fracBitsGt1.intBits[ 0 ]; |
763 | |
|
764 | 0 | cost_t iCost1 = iDist1 + iRate1 + xiGetCostSigCoef( fracBitsSig, 1 ); |
765 | |
|
766 | 0 | piCostDeltaSBH[iScanPosinCG] = iCost1 - piCostCoeff[iScanPosinCG]; |
767 | 0 | piAddSBH [iScanPosinCG] = 1; |
768 | 0 | } |
769 | 0 | DTRACE( g_trace_ctx, D_RDOQ, " Lev=%d \n", 0 ); |
770 | 0 | } |
771 | 0 | else |
772 | 0 | { |
773 | | //===== coefficient level estimation ===== |
774 | 0 | const int iFloor = (int)( iScaledLevel >> iQBits ); |
775 | 0 | const int iCeil = iFloor + 1; |
776 | |
|
777 | 0 | if( remRegBins >= 4 && iScanPos != iLastScanPos && iCeil >= 4 ) |
778 | 0 | { |
779 | 0 | int sumAll = cctx.templateAbsSum( iScanPos, piDstCoeff, 4 ); |
780 | 0 | goRiceParam = g_auiGoRiceParsCoeff[ sumAll ]; |
781 | 0 | } |
782 | |
|
783 | 0 | if( iScanPos == iLastScanPos ) |
784 | 0 | { |
785 | | // ======================= ======================= |
786 | | // ======================= LAST LEVEL ======================= |
787 | | // ======================= ======================= |
788 | 0 | piCostSig[ iScanPosinCG ] = 0; |
789 | | // Floor = 0, Uncoded |
790 | 0 | cost_t iCurrCostF = piCostCoeff0[ iScanPosinCG ]; |
791 | |
|
792 | 0 | if( iFloor ) |
793 | 0 | { |
794 | | // ----------------- LEVEL > 0 ---------------- |
795 | 0 | cost_t iErrF = iScaledLevel - (iFloor << iQBits); |
796 | 0 | cost_t iDistF = _dist( iErrF, iErrScale, iErrScaleShift ); //(iErrF*iErrScale) >> iErrScaleShift; |
797 | 0 | iCurrCostF = iDistF + xiGetICRateCost( iFloor, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, maxLog2TrDynamicRange ); |
798 | 0 | } |
799 | | |
800 | | // ----------------- LEVEL + 1 ---------------- |
801 | 0 | cost_t iErrC = iScaledLevel - (iCeil << iQBits); |
802 | 0 | cost_t iDistC = _dist( iErrC, iErrScale, iErrScaleShift ); //(iErrC*iErrScale) >> iErrScaleShift; |
803 | 0 | cost_t iCurrCostC = iDistC + xiGetICRateCost( iCeil, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, maxLog2TrDynamicRange ); |
804 | |
|
805 | 0 | if( iCurrCostC < iCurrCostF ) |
806 | 0 | { |
807 | 0 | uiLevel = iCeil; |
808 | 0 | piCostCoeff[iScanPosinCG] = iCurrCostC; |
809 | 0 | if( bSBH ){ |
810 | 0 | piCostDeltaSBH[iScanPosinCG] = iCurrCostF - iCurrCostC; |
811 | 0 | piAddSBH [iScanPosinCG] = -1; |
812 | 0 | } |
813 | 0 | } |
814 | 0 | else |
815 | 0 | { |
816 | 0 | if( iFloor == 0 ) |
817 | 0 | { |
818 | 0 | DTRACE( g_trace_ctx, D_RDOQ, " Lev=%d \n", 0 ); |
819 | 0 | DTRACE( g_trace_ctx, D_RDOQ, " CostC0=%lld\n", (int64_t)piCostCoeff0[iScanPosinCG] ); |
820 | 0 | DTRACE( g_trace_ctx, D_RDOQ, " CostC =%lld\n", (int64_t)iCurrCostC ); |
821 | |
|
822 | 0 | iLastScanPos = -1; |
823 | 0 | lastSubSetId = -1; |
824 | 0 | iScanPos--; |
825 | 0 | iScanPosinCG--; |
826 | 0 | goto findlast2; |
827 | 0 | } |
828 | 0 | uiLevel = iFloor; |
829 | 0 | piCostCoeff[iScanPosinCG] = iCurrCostF; |
830 | 0 | if( bSBH ){ |
831 | 0 | piCostDeltaSBH[iScanPosinCG] = iCurrCostC - iCurrCostF; |
832 | 0 | piAddSBH [iScanPosinCG] = 1; |
833 | 0 | } |
834 | 0 | } |
835 | 0 | } |
836 | 0 | else |
837 | 0 | { |
838 | 0 | const BinFracBits& fracBitsSig = fracBits.getFracBitsArray( ctxIdSig ); |
839 | 0 | cost_t iCostSig1 = xiGetCostSigCoef( fracBitsSig, 1 ); |
840 | 0 | if( iCeil < 3 ) |
841 | 0 | { |
842 | | // ======================= ======================= |
843 | | // ======================= LEVELS 0, 1, 2 ======================= |
844 | | // ======================= ======================= |
845 | | |
846 | | // ----------------- BEST LEVEL = 0 ---------------- |
847 | 0 | cost_t iCostSig0 = xiGetCostSigCoef( fracBitsSig, 0 ); |
848 | 0 | cost_t iBestCost = piCostCoeff0[iScanPosinCG] + iCostSig0; |
849 | 0 | cost_t iBestCostSig = iCostSig0; |
850 | 0 | cost_t iCostF = iBestCost; |
851 | 0 | uiLevel = 0; |
852 | |
|
853 | 0 | if( iFloor == 1 ) |
854 | 0 | { |
855 | | // ----------------- LEVEL = 1 ---------------- |
856 | 0 | cost_t iErrF = iScaledLevel - ( iFloor << iQBits ); |
857 | 0 | cost_t iDistF = _dist( iErrF, iErrScale, iErrScaleShift ); //( iErrF*iErrScale ) >> iErrScaleShift; |
858 | 0 | iCostF = iDistF + iCostSig1 + xiGetICRateCost( iFloor, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, maxLog2TrDynamicRange ); |
859 | |
|
860 | 0 | if( iCostF < iBestCost ) |
861 | 0 | { |
862 | 0 | uiLevel = iFloor; |
863 | 0 | iBestCost = iCostF; |
864 | 0 | iBestCostSig = iCostSig1; |
865 | 0 | if( bSBH ) |
866 | 0 | { |
867 | 0 | piCostDeltaSBH[iScanPosinCG] = iBestCost - iCostF; |
868 | 0 | piAddSBH [iScanPosinCG] = -1; |
869 | 0 | } |
870 | 0 | } |
871 | 0 | else |
872 | 0 | { |
873 | 0 | if( bSBH ) |
874 | 0 | { |
875 | 0 | piCostDeltaSBH[iScanPosinCG] = iCostF - iBestCost; |
876 | 0 | piAddSBH [iScanPosinCG] = 1; |
877 | 0 | } |
878 | 0 | } |
879 | 0 | } |
880 | | |
881 | | // ----------------- LEVELS = 1, 2 ---------------- |
882 | 0 | cost_t iErrC = iScaledLevel - ( iCeil << iQBits ); |
883 | 0 | cost_t iDistC = _dist( iErrC, iErrScale, iErrScaleShift ); //( iErrC*iErrScale ) >> iErrScaleShift; |
884 | 0 | cost_t iCostC = iDistC + iCostSig1 + xiGetICRateCost( iCeil, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, maxLog2TrDynamicRange ); |
885 | |
|
886 | 0 | if( iCostC < iBestCost ) |
887 | 0 | { |
888 | 0 | uiLevel = iCeil; |
889 | 0 | piCostCoeff[iScanPosinCG] = iCostC; |
890 | 0 | piCostSig[iScanPosinCG] = iCostSig1; |
891 | 0 | if( bSBH ) |
892 | 0 | { |
893 | 0 | piCostDeltaSBH[iScanPosinCG] = iCostF - iCostC; |
894 | 0 | piAddSBH[iScanPosinCG] = -1; |
895 | 0 | } |
896 | 0 | } |
897 | 0 | else |
898 | 0 | { |
899 | 0 | piCostCoeff[iScanPosinCG] = iBestCost; |
900 | 0 | piCostSig[iScanPosinCG] = iBestCostSig; |
901 | 0 | if( bSBH ) |
902 | 0 | { |
903 | 0 | piCostDeltaSBH[iScanPosinCG] = iCostC - iCostF; |
904 | 0 | piAddSBH [iScanPosinCG] = 1; |
905 | 0 | } |
906 | 0 | } |
907 | 0 | } |
908 | 0 | else |
909 | 0 | { |
910 | | // ----------------- LEVEL X, X+1 ---------------- |
911 | 0 | cost_t iErrF = iScaledLevel - (iFloor << iQBits); |
912 | 0 | cost_t iDistF = _dist( iErrF, iErrScale, iErrScaleShift ); //(iErrF*iErrScale) >> iErrScaleShift; |
913 | 0 | cost_t iCostF = iDistF + iCostSig1 + xiGetICRateCost( iFloor, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, maxLog2TrDynamicRange ); |
914 | |
|
915 | 0 | cost_t iErrC = iScaledLevel - ( iCeil << iQBits ); |
916 | 0 | cost_t iDistC = _dist( iErrC, iErrScale, iErrScaleShift ); //( iErrC*iErrScale ) >> iErrScaleShift; |
917 | 0 | cost_t iCostC = iDistC + iCostSig1 + xiGetICRateCost( iCeil, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, maxLog2TrDynamicRange ); |
918 | |
|
919 | 0 | piCostSig[iScanPosinCG] = iCostSig1; |
920 | 0 | if( iCostC < iCostF ) |
921 | 0 | { |
922 | 0 | uiLevel = iCeil; |
923 | 0 | piCostCoeff[iScanPosinCG] = iCostC; |
924 | 0 | if( bSBH ) |
925 | 0 | { |
926 | 0 | piCostDeltaSBH[iScanPosinCG] = iCostF - iCostC; |
927 | 0 | piAddSBH[iScanPosinCG] = -1; |
928 | 0 | } |
929 | 0 | } |
930 | 0 | else |
931 | 0 | { |
932 | 0 | uiLevel = iFloor; |
933 | 0 | piCostCoeff[iScanPosinCG] = iCostF; |
934 | 0 | if( bSBH ) |
935 | 0 | { |
936 | 0 | piCostDeltaSBH[iScanPosinCG] = iCostC - iCostF; |
937 | 0 | piAddSBH[iScanPosinCG] = 1; |
938 | 0 | } |
939 | 0 | } |
940 | 0 | } |
941 | 0 | } |
942 | 0 | piDstCoeff[uiBlkPos] = uiLevel; |
943 | 0 | DTRACE( g_trace_ctx, D_RDOQ, " Lev=%d \n", uiLevel ); |
944 | 0 | DTRACE( g_trace_ctx, D_RDOQ, " CostC0=%lld\n", (int64_t)piCostCoeff0[iScanPosinCG] ); |
945 | 0 | DTRACE( g_trace_ctx, D_RDOQ, " CostC =%lld\n", (int64_t)piCostCoeff [iScanPosinCG] ); |
946 | 0 | if( uiLevel ) |
947 | 0 | { |
948 | 0 | uiAbsSumCG += uiLevel; |
949 | 0 | iNZbeforePos0 += iScanPosinCG; // hack-> just add instead of checking iScanPosinCG >0 and increment |
950 | 0 | cctx.absVal1stPass( iScanPos, std::min<TCoeff>( 4 + ( uiLevel & 1 ), uiLevel ) ); |
951 | 0 | cctx.setSigGroup(); |
952 | 0 | } |
953 | 0 | } |
954 | | |
955 | | |
956 | 0 | if( ( (iScanPos & iCGSizeM1) == 0 ) && ( iScanPos > 0 ) ) |
957 | 0 | { |
958 | 0 | goRiceParam = 0; |
959 | 0 | } |
960 | 0 | else if( remRegBins >= 4 ) |
961 | 0 | { |
962 | 0 | remRegBins -= (uiLevel < 2 ? uiLevel : 3) + (iScanPos != iLastScanPos); |
963 | 0 | } |
964 | |
|
965 | 0 | iUncodedCostCG += piCostCoeff0[iScanPosinCG]; |
966 | 0 | iCodedCostCG += piCostCoeff[iScanPosinCG]; |
967 | 0 | DTRACE( g_trace_ctx, D_RDOQ_MORE, "Uncoded=%lld\n", (long long)( iUncodedCostBlock + iUncodedCostCG ) ); |
968 | 0 | DTRACE( g_trace_ctx, D_RDOQ_MORE, "Coded =%lld\n", (long long)( iCodedCostBlock + iCodedCostCG ) ); |
969 | 0 | } // for (iScanPosinCG) |
970 | | |
971 | | //================== Group sig. flag =================== |
972 | 0 | cost_t iCostCoeffGroupSig = 0; |
973 | 0 | if( lastSubSetId >= 0 ) |
974 | 0 | { |
975 | 0 | if( subSetId ) |
976 | 0 | { |
977 | 0 | const BinFracBits fracBitsSigGroup = fracBits.getFracBitsArray( cctx.sigGroupCtxId() ); |
978 | 0 | cost_t iCostCoeffGroupSig0 = xiGetCostSigCoeffGroup( fracBitsSigGroup, 0 ); |
979 | | |
980 | | // if no coeff in CG |
981 | 0 | if( !cctx.isSigGroup() ) |
982 | 0 | { |
983 | 0 | iCodedCostCG = iUncodedCostCG + iCostCoeffGroupSig0; |
984 | 0 | iCostCoeffGroupSig = iCostCoeffGroupSig0; |
985 | 0 | } |
986 | 0 | else |
987 | 0 | { |
988 | | // if not topleft CG |
989 | 0 | if( subSetId < lastSubSetId ) |
990 | 0 | { |
991 | 0 | cost_t iCostCoeffGroupSig1 = xiGetCostSigCoeffGroup( fracBitsSigGroup, 1 ); |
992 | 0 | iCostCoeffGroupSig = iCostCoeffGroupSig1; |
993 | | |
994 | | // if only one coeff in CG |
995 | 0 | if( !iNZbeforePos0 ) { |
996 | 0 | iCodedCostCG -= piCostSig[0]; |
997 | 0 | } |
998 | 0 | cost_t iUncodedCostCGTmp = iUncodedCostCG + iCostCoeffGroupSig0; |
999 | 0 | iCodedCostCG += iCostCoeffGroupSig1; |
1000 | | |
1001 | | // if we can save cost, change this block to all-zero block |
1002 | 0 | if( iUncodedCostCGTmp < iCodedCostCG ) |
1003 | 0 | { |
1004 | 0 | cctx.resetSigGroup(); |
1005 | 0 | iCodedCostCG = iUncodedCostCGTmp; |
1006 | 0 | iCostCoeffGroupSig = iCostCoeffGroupSig0; |
1007 | 0 | remRegBins = remRegBinsStartCG; |
1008 | | |
1009 | | // reset coeffs to 0 in this block |
1010 | 0 | for( iScanPosinCG = iCGSize - 1; iScanPosinCG >= 0; iScanPosinCG-- ) |
1011 | 0 | { |
1012 | 0 | int iScanPosTmp = subSetId * iCGSize + iScanPosinCG; |
1013 | 0 | uint32_t uiBlkPos = cctx.blockPos( iScanPosTmp ); |
1014 | 0 | if( piDstCoeff[uiBlkPos] ) |
1015 | 0 | { |
1016 | 0 | int absLevel = std::abs( piDstCoeff[uiBlkPos] ); |
1017 | 0 | cctx.remAbsVal1stPass( iScanPosTmp, std::min( absLevel, 4 + ( absLevel & 1 ) ) ); |
1018 | 0 | piDstCoeff[uiBlkPos] = 0; |
1019 | 0 | } |
1020 | 0 | } |
1021 | 0 | uiAbsSumCG = 0; |
1022 | 0 | if( lastSubSetId == subSetId ) { |
1023 | 0 | iCodedCostCG = 0; |
1024 | 0 | iUncodedCostCG = 0; |
1025 | 0 | iLastScanPos = -1; |
1026 | 0 | lastSubSetId = -1; |
1027 | 0 | } |
1028 | 0 | } |
1029 | 0 | } |
1030 | 0 | else |
1031 | 0 | { |
1032 | 0 | cctx.setSigGroup(); |
1033 | 0 | } |
1034 | 0 | } |
1035 | 0 | } |
1036 | 0 | } |
1037 | | |
1038 | | //===== estimate last position cost ===== |
1039 | 0 | bestTotalCost += iCodedCostCG; |
1040 | 0 | if( !lastOptFinished ) |
1041 | 0 | { |
1042 | 0 | if( cctx.isSigGroup( subSetId ) ) |
1043 | 0 | { |
1044 | 0 | cost_t codedCostBlockTmp = iUncodedCostBlock + iCodedCostCG - iCostCoeffGroupSig; |
1045 | 0 | int startPosInCG = subSetId == lastSubSetId ? iLastScanPos % iCGSize: iCGSizeM1; |
1046 | 0 | int newAbsSumCG = uiAbsSumCG; |
1047 | 0 | int bestLastIdxP1 = iLastScanPos + 1; |
1048 | 0 | for( int iScanPosinCGTmp = startPosInCG; iScanPosinCGTmp >= 0; iScanPosinCGTmp-- ) |
1049 | 0 | { |
1050 | 0 | uint32_t iScanPosTmp = ( subSetId << log2CGSize ) + iScanPosinCGTmp; |
1051 | 0 | uint32_t uiBlkPos = cctx.blockPos( iScanPosTmp ); |
1052 | |
|
1053 | 0 | if( piDstCoeff[uiBlkPos] ) |
1054 | 0 | { |
1055 | 0 | uint32_t uiPosY = uiBlkPos >> uiLog2BlockWidth; |
1056 | 0 | uint32_t uiPosX = uiBlkPos - (uiPosY << uiLog2BlockWidth); |
1057 | 0 | const cost_t iCostLast = xiGetCostLast( uiPosX, uiPosY, chType ); |
1058 | 0 | const cost_t totalCost = codedCostBlockTmp + iCostLast - piCostSig[iScanPosinCGTmp]; |
1059 | |
|
1060 | 0 | if( totalCost < bestTotalCost ) |
1061 | 0 | { |
1062 | 0 | bestLastIdxP1 = iScanPosTmp + 1; |
1063 | 0 | bestTotalCost = totalCost; |
1064 | 0 | lastSubSetId = subSetId; |
1065 | 0 | uiAbsSumCG = newAbsSumCG; |
1066 | 0 | uiAbsSum = 0; |
1067 | 0 | } |
1068 | |
|
1069 | 0 | if( piDstCoeff[uiBlkPos] > 1 ) |
1070 | 0 | { |
1071 | 0 | lastOptFinished = true; |
1072 | 0 | break; |
1073 | 0 | } |
1074 | 0 | newAbsSumCG -= 1; |
1075 | 0 | codedCostBlockTmp -= piCostCoeff [ iScanPosinCGTmp ]; |
1076 | 0 | codedCostBlockTmp += piCostCoeff0[ iScanPosinCGTmp ]; |
1077 | 0 | } |
1078 | 0 | else |
1079 | 0 | { |
1080 | 0 | codedCostBlockTmp -= piCostSig[ iScanPosinCGTmp ]; |
1081 | 0 | } |
1082 | 0 | } //end for |
1083 | 0 | for( int iScanPosTmp = bestLastIdxP1; iScanPosTmp <= iLastScanPos; iScanPosTmp++ ) |
1084 | 0 | { |
1085 | 0 | const int uiBlkPos = cctx.blockPos( iScanPosTmp ); |
1086 | 0 | if( piDstCoeff[uiBlkPos] ) |
1087 | 0 | { |
1088 | 0 | int absLevel = std::abs( piDstCoeff[uiBlkPos] ); |
1089 | 0 | cctx.remAbsVal1stPass( iScanPosTmp, std::min( absLevel, 4 + ( absLevel & 1 ) ) ); |
1090 | 0 | piDstCoeff[uiBlkPos] = 0; |
1091 | 0 | } |
1092 | 0 | } |
1093 | 0 | iLastScanPos = bestLastIdxP1 - 1; |
1094 | 0 | } |
1095 | 0 | } |
1096 | | |
1097 | | //=============== estimate Sign Bit Hiding ================ |
1098 | 0 | if( bSBH ) |
1099 | 0 | { |
1100 | 0 | if( uiAbsSumCG >= 2 /*&& cctx.isSigGroup()*/ ) |
1101 | 0 | { |
1102 | 0 | int iSubPos = subSetId*iCGSize; |
1103 | 0 | int iLastNZPosInCG = -1; |
1104 | 0 | int iFirstNZPosInCG = iCGSize; |
1105 | |
|
1106 | 0 | for( int n = 0; n <iCGSize; n++ ) { |
1107 | 0 | if( piDstCoeff[ cctx.blockPos( n + iSubPos ) ] ) { |
1108 | 0 | iFirstNZPosInCG = n; |
1109 | 0 | break; |
1110 | 0 | } |
1111 | 0 | } |
1112 | 0 | if( lastSubSetId == subSetId ){ |
1113 | 0 | iLastNZPosInCG = ( iLastScanPos )%iCGSize; |
1114 | 0 | if( piDstCoeff[ cctx.blockPos( iLastScanPos ) ] == 1 && ( piAddSBH[iLastNZPosInCG] == -1 ) ) |
1115 | 0 | { |
1116 | 0 | piCostDeltaSBH[iLastNZPosInCG] -= (4<<SCALE_BITS); |
1117 | 0 | } |
1118 | 0 | } |
1119 | 0 | else{ |
1120 | 0 | for( int n = iCGSize - 1; n >= 0; n-- ) { |
1121 | 0 | if( piDstCoeff[ cctx.blockPos( n + iSubPos ) ] ) { |
1122 | 0 | iLastNZPosInCG = n; |
1123 | 0 | break; |
1124 | 0 | } |
1125 | 0 | } |
1126 | 0 | } |
1127 | 0 | if( iLastNZPosInCG - iFirstNZPosInCG >= SBH_THRESHOLD ) |
1128 | 0 | { |
1129 | 0 | iCodedCostCG -= xiGetICost( (int)xGetIEPRate() ); //subtract cost for one sign bin |
1130 | 0 | bool bSign = plSrcCoeff[ cctx.blockPos( iSubPos + iFirstNZPosInCG) ] < 0; |
1131 | |
|
1132 | 0 | if( bSign != ( uiAbsSumCG & 0x1 ) ) { |
1133 | 0 | int iLastPosInCG = ( lastSubSetId == subSetId ) ? iLastNZPosInCG : iCGSize - 1; |
1134 | 0 | int64_t iMinCostDelta = std::numeric_limits<int64_t>::max(); |
1135 | 0 | int iMinCostPos = -1; |
1136 | |
|
1137 | 0 | if( piDstCoeff[ cctx.blockPos( iFirstNZPosInCG + iSubPos ) ] >1 ){ |
1138 | 0 | iMinCostDelta = piCostDeltaSBH[iFirstNZPosInCG]; |
1139 | 0 | iMinCostPos = iFirstNZPosInCG; |
1140 | 0 | } |
1141 | |
|
1142 | 0 | for( int n = 0; n<iFirstNZPosInCG; n++ ){ |
1143 | 0 | if( ( plSrcCoeff[ cctx.blockPos( iSubPos + n ) ] < 0 ) == bSign ){ |
1144 | 0 | if( piCostDeltaSBH[n] < iMinCostDelta ){ |
1145 | 0 | iMinCostDelta = piCostDeltaSBH[n]; |
1146 | 0 | iMinCostPos = n; |
1147 | 0 | } |
1148 | 0 | } |
1149 | 0 | } |
1150 | |
|
1151 | 0 | for( int n = iFirstNZPosInCG + 1; n <= iLastPosInCG; n++ ){ |
1152 | 0 | if( piCostDeltaSBH[n] < iMinCostDelta ){ |
1153 | 0 | iMinCostDelta = piCostDeltaSBH[n]; |
1154 | 0 | iMinCostPos = n; |
1155 | 0 | } |
1156 | 0 | } |
1157 | 0 | const int oldAbsVal = std::abs( piDstCoeff[cctx.blockPos( iMinCostPos + iSubPos )] ); |
1158 | 0 | if( oldAbsVal ) cctx.remAbsVal1stPass( iMinCostPos + iSubPos, std::min( oldAbsVal, 4 + ( oldAbsVal & 1 ) ) ); |
1159 | 0 | piDstCoeff[ cctx.blockPos( iMinCostPos + iSubPos ) ] += piAddSBH[iMinCostPos]; |
1160 | 0 | const int absVal = std::abs( piDstCoeff[cctx.blockPos( iMinCostPos + iSubPos )] ); |
1161 | 0 | if( absVal ) cctx.absVal1stPass( iMinCostPos + iSubPos, std::min( absVal, 4 + ( absVal & 1 ) ) ); |
1162 | 0 | uiAbsSumCG += piAddSBH[iMinCostPos]; |
1163 | 0 | iCodedCostCG += iMinCostDelta; |
1164 | 0 | } |
1165 | 0 | } |
1166 | 0 | } |
1167 | 0 | } |
1168 | |
|
1169 | 0 | iCodedCostBlock += iCodedCostCG; |
1170 | 0 | iUncodedCostBlock += iUncodedCostCG; |
1171 | 0 | uiAbsSum += uiAbsSumCG; |
1172 | 0 | DTRACE( g_trace_ctx, D_RDOQ_COST, "%d: [%2d:%2d]\n", DTRACE_GET_COUNTER( g_trace_ctx, D_RDOQ_COST ), cctx.cgPosX(), cctx.cgPosY() ); |
1173 | 0 | DTRACE( g_trace_ctx, D_RDOQ_MORE, "Uncoded=%lld\n", (long long)( iUncodedCostBlock ) ); |
1174 | 0 | DTRACE( g_trace_ctx, D_RDOQ_MORE, "Coded =%lld\n", (long long)( iCodedCostBlock ) ); |
1175 | 0 | } //end for (iCGScanPos) |
1176 | | |
1177 | 0 | iCodedCostBlock = bestTotalCost; |
1178 | |
|
1179 | 0 | if( iLastScanPos < 0 ) |
1180 | 0 | { |
1181 | 0 | CHECK( uiAbsSum != 0, "Illegal" ); |
1182 | 0 | return 0; |
1183 | 0 | } |
1184 | | |
1185 | 0 | if( !CU::isIntra( *tu.cu ) && isLuma( compID ) ) |
1186 | 0 | { |
1187 | 0 | const BinFracBits fracBitsQtRootCbf = fracBits.getFracBitsArray( Ctx::QtRootCbf() ); |
1188 | 0 | iUncodedCostBlock += xiGetICost( fracBitsQtRootCbf.intBits[0] ); |
1189 | 0 | iCodedCostBlock += xiGetICost( fracBitsQtRootCbf.intBits[1] ); |
1190 | 0 | } |
1191 | 0 | else |
1192 | 0 | { |
1193 | 0 | bool previousCbf = tu.cbf[COMP_Cb]; |
1194 | 0 | bool lastCbfIsInferred = false; |
1195 | 0 | const bool useIntraSubPartitions = tu.cu->ispMode && isLuma(compID); |
1196 | 0 | if( useIntraSubPartitions ) |
1197 | 0 | { |
1198 | 0 | bool rootCbfSoFar = false; |
1199 | 0 | bool isLastSubPartition = CU::isISPLast(*tu.cu, tu.Y(), compID); |
1200 | 0 | uint32_t nTus = tu.cu->ispMode == HOR_INTRA_SUBPARTITIONS ? tu.cu->lheight() >> Log2(tu.lheight()) : tu.cu->lwidth() >> Log2(tu.lwidth()); |
1201 | 0 | if( isLastSubPartition ) |
1202 | 0 | { |
1203 | 0 | TransformUnit* tuPointer = tu.cu->firstTU; |
1204 | 0 | for( int tuIdx = 0; tuIdx < nTus - 1; tuIdx++ ) |
1205 | 0 | { |
1206 | 0 | rootCbfSoFar |= TU::getCbfAtDepth(*tuPointer, COMP_Y, tu.depth); |
1207 | 0 | tuPointer = tuPointer->next; |
1208 | 0 | } |
1209 | 0 | if( !rootCbfSoFar ) |
1210 | 0 | { |
1211 | 0 | lastCbfIsInferred = true; |
1212 | 0 | } |
1213 | 0 | } |
1214 | 0 | if( !lastCbfIsInferred ) |
1215 | 0 | { |
1216 | 0 | previousCbf = TU::getPrevTuCbfAtDepth(tu, compID, tu.depth); |
1217 | 0 | } |
1218 | 0 | } |
1219 | 0 | BinFracBits fracBitsQtCbf = fracBits.getFracBitsArray( Ctx::QtCbf[compID]( DeriveCtx::CtxQtCbf( rect.compID, previousCbf, useIntraSubPartitions ) ) ); |
1220 | |
|
1221 | 0 | if( !lastCbfIsInferred ) |
1222 | 0 | { |
1223 | 0 | iUncodedCostBlock += xiGetICost(fracBitsQtCbf.intBits[0]); |
1224 | 0 | iCodedCostBlock += xiGetICost(fracBitsQtCbf.intBits[1]); |
1225 | 0 | } |
1226 | 0 | } |
1227 | |
|
1228 | 0 | if( iUncodedCostBlock <= iCodedCostBlock ) |
1229 | 0 | { |
1230 | 0 | iCodedCostBlock = iUncodedCostBlock; |
1231 | 0 | uiAbsSum = 0; |
1232 | 0 | ::memset( piDstCoeff, 0, uiMaxNumCoeff*sizeof( TCoeffSig ) ); |
1233 | 0 | } |
1234 | 0 | else |
1235 | 0 | { |
1236 | | // Check due to saving of last pos. Sign data hiding can change the position of last coef. |
1237 | 0 | if( bSBH ) |
1238 | 0 | { |
1239 | 0 | if( piDstCoeff[cctx.blockPos( iLastScanPos )] == 0 ) |
1240 | 0 | { |
1241 | 0 | int scanPos = iLastScanPos - 1; |
1242 | 0 | for( ; scanPos >= 0; scanPos-- ) |
1243 | 0 | { |
1244 | 0 | if( piDstCoeff[cctx.blockPos( scanPos )] ) |
1245 | 0 | break; |
1246 | 0 | } |
1247 | 0 | iLastScanPos = scanPos; |
1248 | 0 | } |
1249 | 0 | } |
1250 | |
|
1251 | 0 | for ( int scanPos = 0; scanPos <= iLastScanPos; scanPos++ ) |
1252 | 0 | { |
1253 | 0 | int blkPos = cctx.blockPos( scanPos ); |
1254 | 0 | TCoeff level = piDstCoeff[ blkPos ]; |
1255 | 0 | int iSign = plSrcCoeff[blkPos] >> ( sizeof(TCoeff)*8 - 1 ); |
1256 | 0 | piDstCoeff[blkPos] = ( iSign^level ) - iSign; |
1257 | 0 | } |
1258 | 0 | tu.lastPos[compID] = iLastScanPos; |
1259 | 0 | } |
1260 | |
|
1261 | | #if ENABLE_TRACING |
1262 | | for ( int scanPos = iCGNum * iCGSize-1; scanPos >= 0; scanPos-- ) |
1263 | | { |
1264 | | if(( scanPos & iCGSizeM1) == iCGSizeM1 ) |
1265 | | { |
1266 | | DTRACE(g_trace_ctx, D_RDOQ, "%d:", scanPos >> cctx.log2CGSize() ); |
1267 | | } |
1268 | | int blkPos = cctx.blockPos( scanPos ); |
1269 | | DTRACE( g_trace_ctx, D_RDOQ, "%3d ", piDstCoeff[blkPos] ); |
1270 | | if( scanPos % iCGSize == 0 ) |
1271 | | { |
1272 | | DTRACE(g_trace_ctx, D_RDOQ, "\n"); |
1273 | | } |
1274 | | } |
1275 | | #endif |
1276 | |
|
1277 | 0 | DTRACE( g_trace_ctx, D_RDOQ_MORE, "Uncoded=%lld\n", (long long)( iUncodedCostBlock ) ); |
1278 | 0 | DTRACE( g_trace_ctx, D_RDOQ_MORE, "Coded =%lld\n", (long long)( iCodedCostBlock ) ); |
1279 | 0 | DTRACE( g_trace_ctx, D_RDOQ, "%d: %3d, %3d, %dx%d, comp=%d, lastScanPos=%d, absSum=%d, cost=%lld \n", DTRACE_GET_COUNTER( g_trace_ctx, D_RDOQ ), rect.x, rect.y, rect.width, rect.height, compID, iLastScanPos, uiAbsSum, (long long)iCodedCostBlock ); |
1280 | 0 | return 0; |
1281 | 0 | } Unexecuted instantiation: int vvenc::QuantRDOQ2::xRateDistOptQuantFast<true, true>(vvenc::TransformUnit&, vvenc::ComponentID const&, vvenc::AreaBuf<int const> const&, int&, vvenc::QpParam const&, vvenc::Ctx const&) Unexecuted instantiation: int vvenc::QuantRDOQ2::xRateDistOptQuantFast<true, false>(vvenc::TransformUnit&, vvenc::ComponentID const&, vvenc::AreaBuf<int const> const&, int&, vvenc::QpParam const&, vvenc::Ctx const&) Unexecuted instantiation: int vvenc::QuantRDOQ2::xRateDistOptQuantFast<false, true>(vvenc::TransformUnit&, vvenc::ComponentID const&, vvenc::AreaBuf<int const> const&, int&, vvenc::QpParam const&, vvenc::Ctx const&) Unexecuted instantiation: int vvenc::QuantRDOQ2::xRateDistOptQuantFast<false, false>(vvenc::TransformUnit&, vvenc::ComponentID const&, vvenc::AreaBuf<int const> const&, int&, vvenc::QpParam const&, vvenc::Ctx const&) |
1282 | | |
1283 | | int QuantRDOQ2::xRateDistOptQuant( TransformUnit &tu, const ComponentID &compID, const CCoeffBuf &pSrc, TCoeff &uiAbsSum, const QpParam &cQP, const Ctx &ctx, bool bUseScalingList ) |
1284 | 0 | { |
1285 | 0 | if( tu.cs->slice->signDataHidingEnabled/*m_bSBH*/ ) |
1286 | 0 | { |
1287 | 0 | if( bUseScalingList ) return xRateDistOptQuantFast<true, true >( tu, compID, pSrc, uiAbsSum, cQP, ctx ); |
1288 | 0 | else return xRateDistOptQuantFast<true, false>( tu, compID, pSrc, uiAbsSum, cQP, ctx ); |
1289 | 0 | } |
1290 | 0 | else |
1291 | 0 | { |
1292 | 0 | if( bUseScalingList ) return xRateDistOptQuantFast<false, true >( tu, compID, pSrc, uiAbsSum, cQP, ctx ); |
1293 | 0 | else return xRateDistOptQuantFast<false, false>( tu, compID, pSrc, uiAbsSum, cQP, ctx ); |
1294 | 0 | } |
1295 | |
|
1296 | 0 | } |
1297 | | |
1298 | | |
1299 | | } // namespace vvenc |
1300 | | |
1301 | | //! \} |