/src/vvenc/source/Lib/CommonLib/QuantRDOQ.cpp
Line | Count | Source |
1 | | /* ----------------------------------------------------------------------------- |
2 | | The copyright in this software is being made available under the Clear BSD |
3 | | License, included below. No patent rights, trademark rights and/or |
4 | | other Intellectual Property Rights other than the copyrights concerning |
5 | | the Software are granted under this license. |
6 | | |
7 | | The Clear BSD License |
8 | | |
9 | | Copyright (c) 2019-2026, Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. & The VVenC Authors. |
10 | | All rights reserved. |
11 | | |
12 | | Redistribution and use in source and binary forms, with or without modification, |
13 | | are permitted (subject to the limitations in the disclaimer below) provided that |
14 | | the following conditions are met: |
15 | | |
16 | | * Redistributions of source code must retain the above copyright notice, |
17 | | this list of conditions and the following disclaimer. |
18 | | |
19 | | * Redistributions in binary form must reproduce the above copyright |
20 | | notice, this list of conditions and the following disclaimer in the |
21 | | documentation and/or other materials provided with the distribution. |
22 | | |
23 | | * Neither the name of the copyright holder nor the names of its |
24 | | contributors may be used to endorse or promote products derived from this |
25 | | software without specific prior written permission. |
26 | | |
27 | | NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY |
28 | | THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND |
29 | | CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
30 | | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A |
31 | | PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR |
32 | | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
33 | | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
34 | | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR |
35 | | BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER |
36 | | IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
37 | | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
38 | | POSSIBILITY OF SUCH DAMAGE. |
39 | | |
40 | | ------------------------------------------------------------------------------------------- */ |
41 | | |
42 | | |
43 | | /** \file QuantRDOQ.cpp |
44 | | \brief transform and quantization class |
45 | | */ |
46 | | |
47 | | #include "QuantRDOQ.h" |
48 | | #include "UnitTools.h" |
49 | | #include "ContextModelling.h" |
50 | | #include "CodingStructure.h" |
51 | | #include "dtrace_next.h" |
52 | | #include "dtrace_buffer.h" |
53 | | |
54 | | #include <stdlib.h> |
55 | | #include <memory.h> |
56 | | |
57 | | //! \ingroup CommonLib |
58 | | //! \{ |
59 | | |
60 | | namespace vvenc { |
61 | | |
62 | | struct coeffGroupRDStats |
63 | | { |
64 | | int iNNZbeforePos0; |
65 | | double d64CodedLevelandDist; // distortion and level cost only |
66 | | double d64UncodedDist; // all zero coded block distortion |
67 | | double d64SigCost; |
68 | | double d64SigCost_0; |
69 | | int iNumSbbCtxBins; |
70 | | }; |
71 | | |
72 | | |
73 | | //! \ingroup CommonLib |
74 | | //! \{ |
75 | | |
76 | | // ==================================================================================================================== |
77 | | // Constants |
78 | | // ==================================================================================================================== |
79 | | |
80 | | |
81 | | // ==================================================================================================================== |
82 | | // Static functions |
83 | | // ==================================================================================================================== |
84 | | |
85 | | // ==================================================================================================================== |
86 | | // QuantRDOQ class member functions |
87 | | // ==================================================================================================================== |
88 | | |
89 | | |
90 | 0 | QuantRDOQ::QuantRDOQ( const Quant* other, bool useScalingLists ) : Quant( other, useScalingLists ) |
91 | 0 | { |
92 | |
|
93 | 0 | const QuantRDOQ *rdoq = dynamic_cast<const QuantRDOQ*>( other ); |
94 | 0 | CHECK( other && !rdoq, "The RDOQ cast must be successfull!" ); |
95 | 0 | xInitScalingList( rdoq ); |
96 | 0 | } |
97 | | |
98 | | QuantRDOQ::~QuantRDOQ() |
99 | 0 | { |
100 | 0 | xDestroyScalingList(); |
101 | 0 | } |
102 | | |
103 | | |
104 | | |
105 | | |
106 | | /** Get the best level in RD sense |
107 | | * |
108 | | * \returns best quantized transform level for given scan position |
109 | | * |
110 | | * This method calculates the best quantized transform level for a given scan position. |
111 | | */ |
112 | | inline uint32_t QuantRDOQ::xGetCodedLevel( double& rd64CodedCost, |
113 | | double& rd64CodedCost0, |
114 | | double& rd64CodedCostSig, |
115 | | Intermediate_Int lLevelDouble, |
116 | | uint32_t uiMaxAbsLevel, |
117 | | const BinFracBits* fracBitsSig, |
118 | | const BinFracBits& fracBitsPar, |
119 | | const BinFracBits& fracBitsGt1, |
120 | | const BinFracBits& fracBitsGt2, |
121 | | const int remRegBins, |
122 | | unsigned goRiceZero, |
123 | | uint16_t ui16AbsGoRice, |
124 | | int iQBits, |
125 | | double errorScale, |
126 | | bool bLast, |
127 | | const int maxLog2TrDynamicRange |
128 | | ) const |
129 | 0 | { |
130 | 0 | double dCurrCostSig = 0; |
131 | 0 | uint32_t uiBestAbsLevel = 0; |
132 | |
|
133 | 0 | if( !bLast && uiMaxAbsLevel < 3 ) |
134 | 0 | { |
135 | 0 | rd64CodedCostSig = xGetRateSigCoef( *fracBitsSig, 0 ); |
136 | 0 | rd64CodedCost = rd64CodedCost0 + rd64CodedCostSig; |
137 | 0 | if( uiMaxAbsLevel == 0 ) |
138 | 0 | { |
139 | 0 | return uiBestAbsLevel; |
140 | 0 | } |
141 | 0 | } |
142 | 0 | else |
143 | 0 | { |
144 | 0 | rd64CodedCost = MAX_DOUBLE; |
145 | 0 | } |
146 | | |
147 | 0 | if( !bLast ) |
148 | 0 | { |
149 | 0 | dCurrCostSig = xGetRateSigCoef( *fracBitsSig, 1 ); |
150 | 0 | } |
151 | |
|
152 | 0 | uint32_t uiMinAbsLevel = ( uiMaxAbsLevel > 1 ? uiMaxAbsLevel - 1 : 1 ); |
153 | 0 | for( int uiAbsLevel = uiMaxAbsLevel; uiAbsLevel >= uiMinAbsLevel ; uiAbsLevel-- ) |
154 | 0 | { |
155 | 0 | double dErr = double( lLevelDouble - ( Intermediate_Int(uiAbsLevel) << iQBits ) ); |
156 | |
|
157 | 0 | double dCurrCost = dErr * dErr * errorScale + xGetICost( xGetICRate( uiAbsLevel, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, ui16AbsGoRice, maxLog2TrDynamicRange ) ); |
158 | 0 | dCurrCost += dCurrCostSig; |
159 | |
|
160 | 0 | if( dCurrCost < rd64CodedCost ) |
161 | 0 | { |
162 | 0 | uiBestAbsLevel = uiAbsLevel; |
163 | 0 | rd64CodedCost = dCurrCost; |
164 | 0 | rd64CodedCostSig = dCurrCostSig; |
165 | 0 | } |
166 | 0 | } |
167 | |
|
168 | 0 | return uiBestAbsLevel; |
169 | 0 | } |
170 | | |
171 | | /** Calculates the cost for specific absolute transform level |
172 | | * \param uiAbsLevel scaled quantized level |
173 | | * \param ui16CtxNumOne current ctxInc for coeff_abs_level_greater1 (1st bin of coeff_abs_level_minus1 in AVC) |
174 | | * \param ui16CtxNumAbs current ctxInc for coeff_abs_level_greater2 (remaining bins of coeff_abs_level_minus1 in AVC) |
175 | | * \param ui16AbsGoRice Rice parameter for coeff_abs_level_minus3 |
176 | | * \param c1Idx |
177 | | * \param c2Idx |
178 | | * \param useLimitedPrefixLength |
179 | | * \param maxLog2TrDynamicRange |
180 | | * \returns cost of given absolute transform level |
181 | | */ |
182 | | inline int QuantRDOQ::xGetICRate( const uint32_t uiAbsLevel, |
183 | | const BinFracBits& fracBitsPar, |
184 | | const BinFracBits& fracBitsGt1, |
185 | | const BinFracBits& fracBitsGt2, |
186 | | const int remRegBins, |
187 | | unsigned goRiceZero, |
188 | | const uint16_t ui16AbsGoRice, |
189 | | const int maxLog2TrDynamicRange ) const |
190 | 0 | { |
191 | 0 | if( remRegBins < 4 ) |
192 | 0 | { |
193 | 0 | int iRate = int( xGetIEPRate() ); // cost of sign bit |
194 | 0 | uint32_t symbol = ( uiAbsLevel == 0 ? goRiceZero : uiAbsLevel <= goRiceZero ? uiAbsLevel-1 : uiAbsLevel ); |
195 | 0 | uint32_t length; |
196 | 0 | const int threshold = COEF_REMAIN_BIN_REDUCTION; |
197 | 0 | if( symbol < ( threshold << ui16AbsGoRice ) ) |
198 | 0 | { |
199 | 0 | length = symbol >> ui16AbsGoRice; |
200 | 0 | iRate += ( length + 1 + ui16AbsGoRice ) << SCALE_BITS; |
201 | 0 | } |
202 | 0 | else |
203 | 0 | { |
204 | 0 | length = ui16AbsGoRice; |
205 | 0 | symbol = symbol - ( threshold << ui16AbsGoRice ); |
206 | 0 | while( symbol >= ( 1 << length ) ) |
207 | 0 | { |
208 | 0 | symbol -= ( 1 << ( length++ ) ); |
209 | 0 | } |
210 | 0 | iRate += ( threshold + length + 1 - ui16AbsGoRice + length ) << SCALE_BITS; |
211 | 0 | } |
212 | 0 | return iRate; |
213 | 0 | } |
214 | | |
215 | 0 | int iRate = int( xGetIEPRate() ); // cost of sign bit |
216 | 0 | const uint32_t cthres = 4; |
217 | 0 | if( uiAbsLevel >= cthres ) |
218 | 0 | { |
219 | 0 | uint32_t symbol = ( uiAbsLevel - cthres ) >> 1; |
220 | 0 | uint32_t length; |
221 | 0 | const int threshold = COEF_REMAIN_BIN_REDUCTION; |
222 | 0 | if( symbol < ( threshold << ui16AbsGoRice ) ) |
223 | 0 | { |
224 | 0 | length = symbol >> ui16AbsGoRice; |
225 | 0 | iRate += ( length + 1 + ui16AbsGoRice ) << SCALE_BITS; |
226 | 0 | } |
227 | 0 | else |
228 | 0 | { |
229 | 0 | length = ui16AbsGoRice; |
230 | 0 | symbol = symbol - ( threshold << ui16AbsGoRice ); |
231 | 0 | while( symbol >= ( 1 << length ) ) |
232 | 0 | { |
233 | 0 | symbol -= ( 1 << ( length++ ) ); |
234 | 0 | } |
235 | 0 | iRate += ( threshold + length + 1 - ui16AbsGoRice + length ) << SCALE_BITS; |
236 | 0 | } |
237 | |
|
238 | 0 | iRate += fracBitsGt1.intBits[1]; |
239 | 0 | iRate += fracBitsPar.intBits[( uiAbsLevel - 2 ) & 1]; |
240 | 0 | iRate += fracBitsGt2.intBits[1]; |
241 | 0 | } |
242 | 0 | else if( uiAbsLevel == 1 ) |
243 | 0 | { |
244 | 0 | iRate += fracBitsGt1.intBits[0]; |
245 | 0 | } |
246 | 0 | else if( uiAbsLevel == 2 ) |
247 | 0 | { |
248 | 0 | iRate += fracBitsGt1.intBits[1]; |
249 | 0 | iRate += fracBitsPar.intBits[0]; |
250 | 0 | iRate += fracBitsGt2.intBits[0]; |
251 | 0 | } |
252 | 0 | else if( uiAbsLevel == 3 ) |
253 | 0 | { |
254 | 0 | iRate += fracBitsGt1.intBits[1]; |
255 | 0 | iRate += fracBitsPar.intBits[1]; |
256 | 0 | iRate += fracBitsGt2.intBits[0]; |
257 | 0 | } |
258 | 0 | else |
259 | 0 | { |
260 | 0 | iRate = 0; |
261 | 0 | } |
262 | 0 | return iRate; |
263 | 0 | } |
264 | | |
265 | | inline double QuantRDOQ::xGetRateSigCoeffGroup( const BinFracBits& fracBitsSigCG, unsigned uiSignificanceCoeffGroup ) const |
266 | 0 | { |
267 | 0 | return xGetICost( fracBitsSigCG.intBits[uiSignificanceCoeffGroup] ); |
268 | 0 | } |
269 | | |
270 | | /** Calculates the cost of signaling the last significant coefficient in the block |
271 | | * \param uiPosX X coordinate of the last significant coefficient |
272 | | * \param uiPosY Y coordinate of the last significant coefficient |
273 | | * \param component colour component ID |
274 | | * \returns cost of last significant coefficient |
275 | | */ |
276 | | /* |
277 | | * \param uiWidth width of the transform unit (TU) |
278 | | */ |
279 | | inline double QuantRDOQ::xGetRateLast( const int* lastBitsX, const int* lastBitsY, unsigned PosX, unsigned PosY ) const |
280 | 0 | { |
281 | 0 | uint32_t CtxX = g_uiGroupIdx[PosX]; |
282 | 0 | uint32_t CtxY = g_uiGroupIdx[PosY]; |
283 | 0 | double Cost = lastBitsX[ CtxX ] + lastBitsY[ CtxY ]; |
284 | 0 | if( CtxX > 3 ) |
285 | 0 | { |
286 | 0 | Cost += xGetIEPRate() * ((CtxX-2)>>1); |
287 | 0 | } |
288 | 0 | if( CtxY > 3 ) |
289 | 0 | { |
290 | 0 | Cost += xGetIEPRate() * ((CtxY-2)>>1); |
291 | 0 | } |
292 | 0 | return xGetICost( Cost ); |
293 | 0 | } |
294 | | |
295 | | |
296 | | inline double QuantRDOQ::xGetRateSigCoef( const BinFracBits& fracBitsSig, unsigned uiSignificance ) const |
297 | 0 | { |
298 | 0 | return xGetICost( fracBitsSig.intBits[uiSignificance] ); |
299 | 0 | } |
300 | | |
301 | | /** Get the cost for a specific rate |
302 | | * \param dRate rate of a bit |
303 | | * \returns cost at the specific rate |
304 | | */ |
305 | | inline double QuantRDOQ::xGetICost ( double dRate ) const |
306 | 0 | { |
307 | 0 | return m_dLambda * dRate; |
308 | 0 | } |
309 | | |
310 | | /** Get the cost of an equal probable bit |
311 | | * \returns cost of equal probable bit |
312 | | */ |
313 | | inline double QuantRDOQ::xGetIEPRate() const |
314 | 0 | { |
315 | 0 | return 32768; |
316 | 0 | } |
317 | | |
318 | | |
319 | | double QuantRDOQ::xGetErrScaleCoeff(const bool needsSqrt2, SizeType width, SizeType height, int qp, const int maxLog2TrDynamicRange, const int channelBitDepth, bool bTransformSkip=false) |
320 | 0 | { |
321 | 0 | const int iTransformShift = bTransformSkip ? 0 : getTransformShift(channelBitDepth, Size(width, height), maxLog2TrDynamicRange); |
322 | 0 | double dErrScale = (double)(1 << SCALE_BITS); // Compensate for scaling of bitcount in Lagrange cost function |
323 | 0 | double dTransShift = (double)iTransformShift + (needsSqrt2 ? -0.5 : 0.0); |
324 | 0 | dErrScale = dErrScale * pow(2.0, (-2.0*dTransShift)); // Compensate for scaling through forward transform |
325 | 0 | const int QStep = g_quantScales[needsSqrt2 ? 1 : 0][qp]; |
326 | 0 | double finalErrScale = dErrScale / QStep / QStep / (1 << (DISTORTION_PRECISION_ADJUSTMENT(channelBitDepth) << 1)); |
327 | 0 | return finalErrScale; |
328 | 0 | } |
329 | | |
330 | | |
331 | | |
332 | | /** set error scale coefficients |
333 | | * \param list list ID |
334 | | * \param size |
335 | | * \param qp quantization parameter |
336 | | * \param maxLog2TrDynamicRange |
337 | | * \param bitDepths reference to bit depth array for all channels |
338 | | */ |
339 | | void QuantRDOQ::xSetErrScaleCoeff( uint32_t list, uint32_t sizeX, uint32_t sizeY, int qp, const int maxLog2TrDynamicRange[MAX_NUM_CH], const BitDepths &bitDepths ) |
340 | 0 | { |
341 | 0 | const int width = g_scalingListSizeX[sizeX]; |
342 | 0 | const int height = g_scalingListSizeX[sizeY]; |
343 | 0 | const ChannelType channelType = ( ( list == 0 ) || ( list == MAX_NUM_COMP ) ) ? CH_L : CH_C; |
344 | 0 | const int channelBitDepth = bitDepths[channelType]; |
345 | 0 | const int iTransformShift = getTransformShift( channelBitDepth, Size( g_scalingListSizeX[sizeX], g_scalingListSizeX[sizeY] ), maxLog2TrDynamicRange[channelType] ); // Represents scaling through forward transform |
346 | |
|
347 | 0 | double dErrScale = (double)( 1 << SCALE_BITS ); // Compensate for scaling of bitcount in Lagrange cost function |
348 | |
|
349 | 0 | const bool needsSqrt2 = ((Log2(width*height)) & 1) == 1; |
350 | 0 | double dTransShift = (double)iTransformShift + ( needsSqrt2 ? -0.5 : 0.0 ); |
351 | 0 | dErrScale = dErrScale*pow( 2.0, ( -2.0*dTransShift ) ); // Compensate for scaling through forward transform |
352 | |
|
353 | 0 | if( getScalingListEnabled() ) |
354 | 0 | { |
355 | 0 | uint32_t i, uiMaxNumCoeff = width * height; |
356 | |
|
357 | 0 | int* piQuantcoeff = getQuantCoeff( list, qp, sizeX, sizeY ); |
358 | 0 | double* pdErrScale = xGetErrScaleCoeffSL( list, sizeX, sizeY, qp ); |
359 | |
|
360 | 0 | for( i = 0; i < uiMaxNumCoeff; i++ ) |
361 | 0 | { |
362 | 0 | pdErrScale[i] = dErrScale / piQuantcoeff[i] / piQuantcoeff[i] / (1 << (DISTORTION_PRECISION_ADJUSTMENT( bitDepths[channelType] ) << 1)); |
363 | 0 | } |
364 | 0 | } |
365 | |
|
366 | 0 | int QStep = g_quantScales[needsSqrt2][qp]; |
367 | |
|
368 | 0 | xGetErrScaleCoeffNoScalingList(list, sizeX, sizeY, qp) = |
369 | 0 | dErrScale / QStep / QStep / (1 << (DISTORTION_PRECISION_ADJUSTMENT(bitDepths[channelType]) << 1)); |
370 | 0 | } |
371 | | |
372 | | /** set flat matrix value to quantized coefficient |
373 | | */ |
374 | | void QuantRDOQ::setFlatScalingList(const int maxLog2TrDynamicRange[MAX_NUM_CH], const BitDepths &bitDepths) |
375 | 0 | { |
376 | 0 | Quant::setFlatScalingList( maxLog2TrDynamicRange, bitDepths ); |
377 | |
|
378 | 0 | const int minimumQp = 0; |
379 | 0 | const int maximumQp = SCALING_LIST_REM_NUM; |
380 | |
|
381 | 0 | for(uint32_t sizeX = 0; sizeX < SCALING_LIST_SIZE_NUM; sizeX++) |
382 | 0 | { |
383 | 0 | for(uint32_t sizeY = 0; sizeY < SCALING_LIST_SIZE_NUM; sizeY++) |
384 | 0 | { |
385 | 0 | for(uint32_t list = 0; list < SCALING_LIST_NUM; list++) |
386 | 0 | { |
387 | 0 | for(int qp = minimumQp; qp < maximumQp; qp++) |
388 | 0 | { |
389 | 0 | xSetErrScaleCoeff( list, sizeX, sizeY, qp, maxLog2TrDynamicRange, bitDepths ); |
390 | 0 | } |
391 | 0 | } |
392 | 0 | } |
393 | 0 | } |
394 | 0 | } |
395 | | |
396 | | /** initialization process of scaling list array |
397 | | */ |
398 | | void QuantRDOQ::xInitScalingList( const QuantRDOQ* other ) |
399 | 0 | { |
400 | 0 | m_isErrScaleListOwner = other == nullptr; |
401 | |
|
402 | 0 | bool useScalingLists = getScalingListEnabled(); |
403 | |
|
404 | 0 | for(uint32_t sizeIdX = 0; sizeIdX < SCALING_LIST_SIZE_NUM; sizeIdX++) |
405 | 0 | { |
406 | 0 | for(uint32_t sizeIdY = 0; sizeIdY < SCALING_LIST_SIZE_NUM; sizeIdY++) |
407 | 0 | { |
408 | 0 | for(uint32_t qp = 0; qp < SCALING_LIST_REM_NUM; qp++) |
409 | 0 | { |
410 | 0 | for(uint32_t listId = 0; listId < SCALING_LIST_NUM; listId++) |
411 | 0 | { |
412 | 0 | if( m_isErrScaleListOwner ) |
413 | 0 | { |
414 | 0 | m_errScale[sizeIdX][sizeIdY][listId][qp] = useScalingLists ? new double[g_scalingListSizeX[sizeIdX] * g_scalingListSizeX[sizeIdY]] : nullptr; |
415 | 0 | } |
416 | 0 | else |
417 | 0 | { |
418 | 0 | m_errScale[sizeIdX][sizeIdY][listId][qp] = other->m_errScale[sizeIdX][sizeIdY][listId][qp]; |
419 | 0 | } |
420 | 0 | } // listID loop |
421 | 0 | } |
422 | 0 | } |
423 | 0 | } |
424 | 0 | } |
425 | | |
426 | | /** destroy quantization matrix array |
427 | | */ |
428 | | void QuantRDOQ::xDestroyScalingList() |
429 | 0 | { |
430 | 0 | if( !m_isErrScaleListOwner ) return; |
431 | | |
432 | 0 | for(uint32_t sizeIdX = 0; sizeIdX < SCALING_LIST_SIZE_NUM; sizeIdX++) |
433 | 0 | { |
434 | 0 | for(uint32_t sizeIdY = 0; sizeIdY < SCALING_LIST_SIZE_NUM; sizeIdY++) |
435 | 0 | { |
436 | 0 | for(uint32_t listId = 0; listId < SCALING_LIST_NUM; listId++) |
437 | 0 | { |
438 | 0 | for(uint32_t qp = 0; qp < SCALING_LIST_REM_NUM; qp++) |
439 | 0 | { |
440 | 0 | if(m_errScale[sizeIdX][sizeIdY][listId][qp]) |
441 | 0 | { |
442 | 0 | delete [] m_errScale[sizeIdX][sizeIdY][listId][qp]; |
443 | 0 | } |
444 | 0 | } |
445 | 0 | } |
446 | 0 | } |
447 | 0 | } |
448 | | // Quant::destroyScalingList(); |
449 | 0 | } |
450 | | |
451 | | |
452 | | void QuantRDOQ::quant(TransformUnit& tu, const ComponentID compID, const CCoeffBuf& pSrc, TCoeff &uiAbsSum, const QpParam& cQP, const Ctx& ctx) |
453 | 0 | { |
454 | 0 | const CompArea& rect = tu.blocks[compID]; |
455 | 0 | const uint32_t uiWidth = rect.width; |
456 | 0 | const uint32_t uiHeight = rect.height; |
457 | |
|
458 | 0 | const CCoeffBuf& piCoef = pSrc; |
459 | 0 | CoeffSigBuf piQCoef = tu.getCoeffs(compID); |
460 | |
|
461 | 0 | const bool useTransformSkip = tu.mtsIdx[compID]==MTS_SKIP; |
462 | |
|
463 | 0 | bool useRDOQ = useTransformSkip ? m_useRDOQTS : m_RDOQ > 0; |
464 | |
|
465 | 0 | if( !tu.cu->ispMode || !isLuma(compID) ) |
466 | 0 | { |
467 | 0 | useRDOQ &= uiWidth > 2; |
468 | 0 | useRDOQ &= uiHeight > 2; |
469 | 0 | } |
470 | |
|
471 | 0 | if( useRDOQ ) |
472 | 0 | { |
473 | 0 | if (!tu.cs->picture->useSelectiveRdoq || xNeedRDOQ(tu, compID, piCoef, cQP)) |
474 | 0 | { |
475 | 0 | if( useTransformSkip ) |
476 | 0 | { |
477 | 0 | if(tu.cu->bdpcmM[toChannelType(compID)]) |
478 | 0 | { |
479 | 0 | forwardRDPCM( tu, compID, pSrc, uiAbsSum, cQP, ctx ); |
480 | 0 | } |
481 | 0 | else |
482 | 0 | { |
483 | 0 | rateDistOptQuantTS( tu, compID, pSrc, uiAbsSum, cQP, ctx ); |
484 | 0 | } |
485 | 0 | } |
486 | 0 | else |
487 | 0 | { |
488 | 0 | xRateDistOptQuant( tu, compID, pSrc, uiAbsSum, cQP, ctx ); |
489 | 0 | } |
490 | 0 | } |
491 | 0 | else |
492 | 0 | { |
493 | 0 | piQCoef.fill(0); |
494 | 0 | uiAbsSum = 0; |
495 | 0 | tu.lastPos[compID] = -1; |
496 | 0 | } |
497 | 0 | } |
498 | 0 | else |
499 | 0 | { |
500 | 0 | Quant::quant( tu, compID, pSrc, uiAbsSum, cQP, ctx ); |
501 | 0 | } |
502 | 0 | } |
503 | | |
504 | | |
505 | | |
506 | | void QuantRDOQ::xRateDistOptQuant(TransformUnit& tu, const ComponentID compID, const CCoeffBuf& pSrc, TCoeff &uiAbsSum, const QpParam& cQP, const Ctx &ctx) |
507 | 0 | { |
508 | 0 | const FracBitsAccess& fracBits = ctx.getFracBitsAcess(); |
509 | |
|
510 | 0 | const SPS &sps = *tu.cs->sps; |
511 | 0 | const CompArea& rect = tu.blocks[compID]; |
512 | 0 | const uint32_t uiWidth = rect.width; |
513 | 0 | const uint32_t uiHeight = rect.height; |
514 | 0 | const ChannelType chType = toChannelType(compID); |
515 | 0 | const int channelBitDepth = sps.bitDepths[ chType ]; |
516 | |
|
517 | 0 | const int maxLog2TrDynamicRange = sps.getMaxLog2TrDynamicRange(); |
518 | |
|
519 | 0 | const bool useIntraSubPartitions = tu.cu->ispMode && isLuma(compID); |
520 | | /* for 422 chroma blocks, the effective scaling applied during transformation is not a power of 2, hence it cannot be |
521 | | * implemented as a bit-shift (the quantised result will be sqrt(2) * larger than required). Alternatively, adjust the |
522 | | * uiLog2TrSize applied in iTransformShift, such that the result is 1/sqrt(2) the required result (i.e. smaller) |
523 | | * Then a QP+3 (sqrt(2)) or QP-3 (1/sqrt(2)) method could be used to get the required result |
524 | | */ |
525 | | |
526 | | // Represents scaling through forward transform |
527 | 0 | const int iTransformShift = getTransformShift(channelBitDepth, rect.size(), maxLog2TrDynamicRange); |
528 | |
|
529 | 0 | double d64BlockUncodedCost = 0; |
530 | 0 | const uint32_t uiLog2BlockWidth = Log2(uiWidth); |
531 | 0 | const uint32_t uiLog2BlockHeight = Log2(uiHeight); |
532 | 0 | const uint32_t uiMaxNumCoeff = rect.area(); |
533 | |
|
534 | 0 | CHECK(compID >= MAX_NUM_TBLOCKS, "Invalid component ID"); |
535 | |
|
536 | 0 | int scalingListType = getScalingListType(tu.cu->predMode, compID); |
537 | |
|
538 | 0 | CHECK(scalingListType >= SCALING_LIST_NUM, "Invalid scaling list"); |
539 | |
|
540 | 0 | const TCoeff *plSrcCoeff = pSrc.buf; |
541 | 0 | TCoeffSig *piDstCoeff = tu.getCoeffs(compID).buf; |
542 | |
|
543 | 0 | double *pdCostCoeff = m_pdCostCoeff; |
544 | 0 | double *pdCostSig = m_pdCostSig; |
545 | 0 | double *pdCostCoeff0 = m_pdCostCoeff0; |
546 | 0 | int *rateIncUp = m_rateIncUp; |
547 | 0 | int *rateIncDown = m_rateIncDown; |
548 | 0 | int *sigRateDelta = m_sigRateDelta; |
549 | 0 | TCoeff *deltaU = m_deltaU; |
550 | |
|
551 | 0 | memset( piDstCoeff, 0, sizeof( TCoeffSig ) * uiMaxNumCoeff ); |
552 | 0 | memset( m_pdCostCoeff, 0, sizeof( double ) * uiMaxNumCoeff ); |
553 | 0 | memset( m_pdCostSig, 0, sizeof( double ) * uiMaxNumCoeff ); |
554 | 0 | memset( m_rateIncUp, 0, sizeof( int ) * uiMaxNumCoeff ); |
555 | 0 | memset( m_rateIncDown, 0, sizeof( int ) * uiMaxNumCoeff ); |
556 | 0 | memset( m_sigRateDelta, 0, sizeof( int ) * uiMaxNumCoeff ); |
557 | 0 | memset( m_deltaU, 0, sizeof( TCoeff ) * uiMaxNumCoeff ); |
558 | | |
559 | |
|
560 | 0 | const bool needSqrtAdjustment = TU::needsSqrt2Scale( tu, compID ); |
561 | 0 | const bool isTransformSkip = tu.mtsIdx[compID]==MTS_SKIP; |
562 | 0 | const double *const pdErrScale = xGetErrScaleCoeffSL(scalingListType, uiLog2BlockWidth, uiLog2BlockHeight, cQP.rem(isTransformSkip)); |
563 | 0 | const int *const piQCoef = getQuantCoeff(scalingListType, cQP.rem(isTransformSkip), uiLog2BlockWidth, uiLog2BlockHeight); |
564 | 0 | const bool isLfnstApplied = tu.cu->lfnstIdx > 0 && (CU::isSepTree(*tu.cu) ? true : isLuma(compID)); |
565 | 0 | const bool enableScalingLists = getUseScalingList(uiWidth, uiHeight, isTransformSkip, isLfnstApplied); |
566 | 0 | const int defaultQuantisationCoefficient = g_quantScales[ needSqrtAdjustment ?1:0][cQP.rem(isTransformSkip)]; |
567 | 0 | const double defaultErrorScale = xGetErrScaleCoeffNoScalingList(scalingListType, uiLog2BlockWidth, uiLog2BlockHeight, cQP.rem(isTransformSkip)); |
568 | 0 | const int iQBits = QUANT_SHIFT + cQP.per(isTransformSkip) + iTransformShift + (needSqrtAdjustment?-1:0); // Right shift of non-RDOQ quantizer; level = (coeff*uiQ + offset)>>q_bits |
569 | | |
570 | |
|
571 | 0 | const TCoeff entropyCodingMinimum = -(1 << maxLog2TrDynamicRange); |
572 | 0 | const TCoeff entropyCodingMaximum = (1 << maxLog2TrDynamicRange) - 1; |
573 | |
|
574 | 0 | CoeffCodingContext cctx(tu, compID, tu.cs->slice->signDataHidingEnabled); |
575 | 0 | const int iCGSizeM1 = (1 << cctx.log2CGSize()) - 1; |
576 | |
|
577 | 0 | int iCGLastScanPos = -1; |
578 | 0 | double d64BaseCost = 0; |
579 | 0 | int iLastScanPos = -1; |
580 | |
|
581 | 0 | int ctxBinSampleRatio = MAX_TU_LEVEL_CTX_CODED_BIN_CONSTRAINT; |
582 | 0 | int remRegBins = (tu.getTbAreaAfterCoefZeroOut( compID ) * ctxBinSampleRatio) >> 4; |
583 | 0 | uint32_t goRiceParam = 0; |
584 | |
|
585 | 0 | double *pdCostCoeffGroupSig = m_pdCostCoeffGroupSig; |
586 | 0 | memset( pdCostCoeffGroupSig, 0, ( uiMaxNumCoeff >> cctx.log2CGSize() ) * sizeof( double ) ); |
587 | 0 | int iScanPos; |
588 | 0 | coeffGroupRDStats rdStats; |
589 | |
|
590 | | #if ENABLE_TRACING |
591 | | DTRACE( g_trace_ctx, D_RDOQ, "%d: %3d, %3d, %dx%d, comp=%d\n", DTRACE_GET_COUNTER( g_trace_ctx, D_RDOQ ), rect.x, rect.y, rect.width, rect.height, compID ); |
592 | | #endif |
593 | |
|
594 | 0 | const uint32_t lfnstIdx = tu.cu->lfnstIdx; |
595 | |
|
596 | 0 | const int iCGNum = lfnstIdx > 0 ? 1 : std::min<int>(JVET_C0024_ZERO_OUT_TH, uiWidth) * std::min<int>(JVET_C0024_ZERO_OUT_TH, uiHeight) >> cctx.log2CGSize(); |
597 | |
|
598 | 0 | for (int subSetId = iCGNum - 1; subSetId >= 0; subSetId--) |
599 | 0 | { |
600 | 0 | cctx.initSubblock( subSetId ); |
601 | |
|
602 | 0 | int remRegBinsStartCG = remRegBins; |
603 | |
|
604 | 0 | uint32_t maxNonZeroPosInCG = iCGSizeM1; |
605 | 0 | if( lfnstIdx > 0 && ( ( uiWidth == 4 && uiHeight == 4 ) || ( uiWidth == 8 && uiHeight == 8 && cctx.cgPosX() == 0 && cctx.cgPosY() == 0 ) ) ) |
606 | 0 | { |
607 | 0 | maxNonZeroPosInCG = 7; |
608 | 0 | } |
609 | |
|
610 | 0 | memset( &rdStats, 0, sizeof (coeffGroupRDStats)); |
611 | |
|
612 | 0 | for( int iScanPosinCG = iCGSizeM1; iScanPosinCG > maxNonZeroPosInCG; iScanPosinCG-- ) |
613 | 0 | { |
614 | 0 | iScanPos = cctx.minSubPos() + iScanPosinCG; |
615 | 0 | uint32_t blkPos = cctx.blockPos( iScanPos ); |
616 | 0 | piDstCoeff[ blkPos ] = 0; |
617 | 0 | } |
618 | 0 | for( int iScanPosinCG = maxNonZeroPosInCG; iScanPosinCG >= 0; iScanPosinCG-- ) |
619 | 0 | { |
620 | 0 | iScanPos = cctx.minSubPos() + iScanPosinCG; |
621 | | //===== quantization ===== |
622 | 0 | uint32_t uiBlkPos = cctx.blockPos(iScanPos); |
623 | | |
624 | | // set coeff |
625 | 0 | const int quantisationCoefficient = (enableScalingLists) ? piQCoef [uiBlkPos] : defaultQuantisationCoefficient; |
626 | 0 | const double errorScale = (enableScalingLists) ? pdErrScale[uiBlkPos] : defaultErrorScale; |
627 | 0 | const int64_t tmpLevel = int64_t(abs(plSrcCoeff[ uiBlkPos ])) * quantisationCoefficient; |
628 | |
|
629 | 0 | const Intermediate_Int lLevelDouble = (Intermediate_Int)std::min<int64_t>(tmpLevel, std::numeric_limits<Intermediate_Int>::max() - (Intermediate_Int(1) << (iQBits - 1))); |
630 | |
|
631 | 0 | uint32_t uiMaxAbsLevel = std::min<uint32_t>(uint32_t(entropyCodingMaximum), uint32_t((lLevelDouble + (Intermediate_Int(1) << (iQBits - 1))) >> iQBits)); |
632 | |
|
633 | 0 | const double dErr = double( lLevelDouble ); |
634 | 0 | pdCostCoeff0[ iScanPos ] = dErr * dErr * errorScale; |
635 | 0 | d64BlockUncodedCost += pdCostCoeff0[ iScanPos ]; |
636 | 0 | piDstCoeff[ uiBlkPos ] = uiMaxAbsLevel; |
637 | |
|
638 | 0 | if ( uiMaxAbsLevel > 0 && iLastScanPos < 0 ) |
639 | 0 | { |
640 | 0 | iLastScanPos = iScanPos; |
641 | 0 | iCGLastScanPos = cctx.subSetId(); |
642 | 0 | } |
643 | |
|
644 | 0 | if ( iLastScanPos >= 0 ) |
645 | 0 | { |
646 | |
|
647 | | #if ENABLE_TRACING |
648 | | uint32_t uiCGPosY = cctx.cgPosY(); |
649 | | uint32_t uiCGPosX = cctx.cgPosX(); |
650 | | uint32_t uiPosY = cctx.posY( iScanPos ); |
651 | | uint32_t uiPosX = cctx.posX( iScanPos ); |
652 | | DTRACE( g_trace_ctx, D_RDOQ, "%d [%d][%d][%2d:%2d][%2d:%2d]", DTRACE_GET_COUNTER( g_trace_ctx, D_RDOQ ), iScanPos, uiBlkPos, uiCGPosX, uiCGPosY, uiPosX, uiPosY ); |
653 | | #endif |
654 | | //===== coefficient level estimation ===== |
655 | 0 | unsigned ctxIdSig = 0; |
656 | 0 | if( iScanPos != iLastScanPos ) |
657 | 0 | { |
658 | 0 | ctxIdSig = cctx.sigCtxIdAbs( iScanPos, piDstCoeff, 0 ); |
659 | 0 | } |
660 | 0 | uint32_t uiLevel; |
661 | 0 | uint8_t ctxOffset = cctx.ctxOffsetAbs (); |
662 | 0 | uint32_t uiParCtx = cctx.parityCtxIdAbs ( ctxOffset ); |
663 | 0 | uint32_t uiGt1Ctx = cctx.greater1CtxIdAbs ( ctxOffset ); |
664 | 0 | uint32_t uiGt2Ctx = cctx.greater2CtxIdAbs ( ctxOffset ); |
665 | 0 | uint32_t goRiceZero = 0; |
666 | 0 | if( remRegBins < 4 ) |
667 | 0 | { |
668 | 0 | unsigned sumAbs = cctx.templateAbsSum( iScanPos, piDstCoeff, 0 ); |
669 | 0 | goRiceParam = g_auiGoRiceParsCoeff [ sumAbs ]; |
670 | 0 | goRiceZero = g_auiGoRicePosCoeff0(0, goRiceParam); |
671 | 0 | } |
672 | |
|
673 | 0 | const BinFracBits fracBitsPar = fracBits.getFracBitsArray( uiParCtx ); |
674 | 0 | const BinFracBits fracBitsGt1 = fracBits.getFracBitsArray( uiGt1Ctx ); |
675 | 0 | const BinFracBits fracBitsGt2 = fracBits.getFracBitsArray( uiGt2Ctx ); |
676 | |
|
677 | 0 | if( iScanPos == iLastScanPos ) |
678 | 0 | { |
679 | 0 | uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ], |
680 | 0 | lLevelDouble, uiMaxAbsLevel, nullptr, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, iQBits, errorScale, 1, maxLog2TrDynamicRange ); |
681 | 0 | } |
682 | 0 | else |
683 | 0 | { |
684 | 0 | DTRACE_COND( ( uiMaxAbsLevel != 0 ), g_trace_ctx, D_RDOQ_MORE, " uiCtxSig=%d", ctxIdSig ); |
685 | |
|
686 | 0 | const BinFracBits fracBitsSig = fracBits.getFracBitsArray( ctxIdSig ); |
687 | 0 | uiLevel = xGetCodedLevel( pdCostCoeff[ iScanPos ], pdCostCoeff0[ iScanPos ], pdCostSig[ iScanPos ], |
688 | 0 | lLevelDouble, uiMaxAbsLevel, &fracBitsSig, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, iQBits, errorScale, 0, maxLog2TrDynamicRange ); |
689 | 0 | sigRateDelta[ uiBlkPos ] = ( remRegBins < 4 ? 0 : fracBitsSig.intBits[1] - fracBitsSig.intBits[0] ); |
690 | 0 | } |
691 | |
|
692 | 0 | DTRACE( g_trace_ctx, D_RDOQ, " Lev=%d \n", uiLevel ); |
693 | 0 | DTRACE_COND( ( uiMaxAbsLevel != 0 ), g_trace_ctx, D_RDOQ, " CostC0=%d\n", (int64_t)( pdCostCoeff0[iScanPos] ) ); |
694 | 0 | DTRACE_COND( ( uiMaxAbsLevel != 0 ), g_trace_ctx, D_RDOQ, " CostC =%d\n", (int64_t)( pdCostCoeff[iScanPos] ) ); |
695 | |
|
696 | 0 | deltaU[ uiBlkPos ] = TCoeff((lLevelDouble - (Intermediate_Int(uiLevel) << iQBits)) >> (iQBits-8)); |
697 | |
|
698 | 0 | if( uiLevel > 0 ) |
699 | 0 | { |
700 | 0 | int rateNow = xGetICRate( uiLevel, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, maxLog2TrDynamicRange ); |
701 | 0 | rateIncUp [ uiBlkPos ] = xGetICRate( uiLevel+1, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, maxLog2TrDynamicRange ) - rateNow; |
702 | 0 | rateIncDown [ uiBlkPos ] = xGetICRate( uiLevel-1, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, maxLog2TrDynamicRange ) - rateNow; |
703 | 0 | } |
704 | 0 | else // uiLevel == 0 |
705 | 0 | { |
706 | 0 | if( remRegBins < 4 ) |
707 | 0 | { |
708 | 0 | int rateNow = xGetICRate( uiLevel, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, maxLog2TrDynamicRange ); |
709 | 0 | rateIncUp [ uiBlkPos ] = xGetICRate( uiLevel+1, fracBitsPar, fracBitsGt1, fracBitsGt2, remRegBins, goRiceZero, goRiceParam, maxLog2TrDynamicRange ) - rateNow; |
710 | 0 | } |
711 | 0 | else |
712 | 0 | { |
713 | 0 | rateIncUp [ uiBlkPos ] = fracBitsGt1.intBits[ 0 ]; |
714 | 0 | } |
715 | 0 | } |
716 | 0 | piDstCoeff[ uiBlkPos ] = uiLevel; |
717 | 0 | d64BaseCost += pdCostCoeff [ iScanPos ]; |
718 | |
|
719 | 0 | if( ( (iScanPos & iCGSizeM1) == 0 ) && ( iScanPos > 0 ) ) |
720 | 0 | { |
721 | 0 | goRiceParam = 0; |
722 | 0 | } |
723 | 0 | else if( remRegBins >= 4 ) |
724 | 0 | { |
725 | 0 | int sumAll = cctx.templateAbsSum(iScanPos, piDstCoeff, 4); |
726 | 0 | goRiceParam = g_auiGoRiceParsCoeff[sumAll]; |
727 | 0 | remRegBins -= (uiLevel < 2 ? uiLevel : 3) + (iScanPos != iLastScanPos); |
728 | 0 | } |
729 | 0 | } |
730 | 0 | else |
731 | 0 | { |
732 | 0 | d64BaseCost += pdCostCoeff0[ iScanPos ]; |
733 | 0 | } |
734 | 0 | rdStats.d64SigCost += pdCostSig[ iScanPos ]; |
735 | 0 | if (iScanPosinCG == 0 ) |
736 | 0 | { |
737 | 0 | rdStats.d64SigCost_0 = pdCostSig[ iScanPos ]; |
738 | 0 | } |
739 | 0 | if (piDstCoeff[ uiBlkPos ] ) |
740 | 0 | { |
741 | 0 | cctx.setSigGroup(); |
742 | 0 | rdStats.d64CodedLevelandDist += pdCostCoeff[ iScanPos ] - pdCostSig[ iScanPos ]; |
743 | 0 | rdStats.d64UncodedDist += pdCostCoeff0[ iScanPos ]; |
744 | 0 | if ( iScanPosinCG != 0 ) |
745 | 0 | { |
746 | 0 | rdStats.iNNZbeforePos0++; |
747 | 0 | } |
748 | 0 | } |
749 | 0 | } //end for (iScanPosinCG) |
750 | |
|
751 | 0 | if (iCGLastScanPos >= 0) |
752 | 0 | { |
753 | 0 | if( cctx.subSetId() ) |
754 | 0 | { |
755 | 0 | if( !cctx.isSigGroup() ) |
756 | 0 | { |
757 | 0 | const BinFracBits fracBitsSigGroup = fracBits.getFracBitsArray( cctx.sigGroupCtxId() ); |
758 | 0 | d64BaseCost += xGetRateSigCoeffGroup(fracBitsSigGroup, 0) - rdStats.d64SigCost; |
759 | 0 | pdCostCoeffGroupSig[ cctx.subSetId() ] = xGetRateSigCoeffGroup(fracBitsSigGroup, 0); |
760 | 0 | } |
761 | 0 | else |
762 | 0 | { |
763 | 0 | if (cctx.subSetId() < iCGLastScanPos) //skip the last coefficient group, which will be handled together with last position below. |
764 | 0 | { |
765 | 0 | if ( rdStats.iNNZbeforePos0 == 0 ) |
766 | 0 | { |
767 | 0 | d64BaseCost -= rdStats.d64SigCost_0; |
768 | 0 | rdStats.d64SigCost -= rdStats.d64SigCost_0; |
769 | 0 | } |
770 | | // rd-cost if SigCoeffGroupFlag = 0, initialization |
771 | 0 | double d64CostZeroCG = d64BaseCost; |
772 | |
|
773 | 0 | const BinFracBits fracBitsSigGroup = fracBits.getFracBitsArray( cctx.sigGroupCtxId() ); |
774 | |
|
775 | 0 | if (cctx.subSetId() < iCGLastScanPos) |
776 | 0 | { |
777 | 0 | d64BaseCost += xGetRateSigCoeffGroup(fracBitsSigGroup,1); |
778 | 0 | d64CostZeroCG += xGetRateSigCoeffGroup(fracBitsSigGroup,0); |
779 | 0 | pdCostCoeffGroupSig[ cctx.subSetId() ] = xGetRateSigCoeffGroup(fracBitsSigGroup,1); |
780 | 0 | } |
781 | | |
782 | | // try to convert the current coeff group from non-zero to all-zero |
783 | 0 | d64CostZeroCG += rdStats.d64UncodedDist; // distortion for resetting non-zero levels to zero levels |
784 | 0 | d64CostZeroCG -= rdStats.d64CodedLevelandDist; // distortion and level cost for keeping all non-zero levels |
785 | 0 | d64CostZeroCG -= rdStats.d64SigCost; // sig cost for all coeffs, including zero levels and non-zerl levels |
786 | | |
787 | | // if we can save cost, change this block to all-zero block |
788 | 0 | if ( d64CostZeroCG < d64BaseCost ) |
789 | 0 | { |
790 | 0 | cctx.resetSigGroup(); |
791 | 0 | d64BaseCost = d64CostZeroCG; |
792 | 0 | remRegBins = remRegBinsStartCG; |
793 | 0 | if (cctx.subSetId() < iCGLastScanPos) |
794 | 0 | { |
795 | 0 | pdCostCoeffGroupSig[ cctx.subSetId() ] = xGetRateSigCoeffGroup(fracBitsSigGroup,0); |
796 | 0 | } |
797 | | // reset coeffs to 0 in this block |
798 | 0 | for( int iScanPosinCG = maxNonZeroPosInCG; iScanPosinCG >= 0; iScanPosinCG-- ) |
799 | 0 | { |
800 | 0 | iScanPos = cctx.minSubPos() + iScanPosinCG; |
801 | 0 | uint32_t uiBlkPos = cctx.blockPos( iScanPos ); |
802 | |
|
803 | 0 | if (piDstCoeff[ uiBlkPos ]) |
804 | 0 | { |
805 | 0 | piDstCoeff [ uiBlkPos ] = 0; |
806 | 0 | pdCostCoeff[ iScanPos ] = pdCostCoeff0[ iScanPos ]; |
807 | 0 | pdCostSig [ iScanPos ] = 0; |
808 | 0 | } |
809 | 0 | } |
810 | 0 | } // end if ( d64CostAllZeros < d64BaseCost ) |
811 | 0 | } |
812 | 0 | } // end if if (uiSigCoeffGroupFlag[ uiCGBlkPos ] == 0) |
813 | 0 | } |
814 | 0 | else |
815 | 0 | { |
816 | 0 | cctx.setSigGroup(); |
817 | 0 | } |
818 | 0 | } |
819 | 0 | } //end for (cctx.subSetId) |
820 | | |
821 | | |
822 | | //===== estimate last position ===== |
823 | 0 | if ( iLastScanPos < 0 ) |
824 | 0 | { |
825 | 0 | return; |
826 | 0 | } |
827 | | |
828 | 0 | double d64BestCost = 0; |
829 | 0 | int iBestLastIdxP1 = 0; |
830 | | |
831 | |
|
832 | 0 | if( !CU::isIntra( *tu.cu ) && isLuma( compID ) && tu.depth == 0 ) |
833 | 0 | { |
834 | 0 | const BinFracBits fracBitsQtRootCbf = fracBits.getFracBitsArray( Ctx::QtRootCbf() ); |
835 | 0 | d64BestCost = d64BlockUncodedCost + xGetICost( fracBitsQtRootCbf.intBits[ 0 ] ); |
836 | 0 | d64BaseCost += xGetICost( fracBitsQtRootCbf.intBits[ 1 ] ); |
837 | 0 | } |
838 | 0 | else |
839 | 0 | { |
840 | 0 | bool previousCbf = tu.cbf[COMP_Cb]; |
841 | 0 | bool lastCbfIsInferred = false; |
842 | 0 | if( useIntraSubPartitions ) |
843 | 0 | { |
844 | 0 | bool rootCbfSoFar = false; |
845 | 0 | bool isLastSubPartition = CU::isISPLast(*tu.cu, tu.Y(), compID); |
846 | 0 | uint32_t nTus = tu.cu->ispMode == HOR_INTRA_SUBPARTITIONS ? tu.cu->lheight() >> Log2(tu.lheight()) : tu.cu->lwidth() >> Log2(tu.lwidth()); |
847 | 0 | if( isLastSubPartition ) |
848 | 0 | { |
849 | 0 | TransformUnit* tuPointer = tu.cu->firstTU; |
850 | 0 | for( int tuIdx = 0; tuIdx < nTus - 1; tuIdx++ ) |
851 | 0 | { |
852 | 0 | rootCbfSoFar |= TU::getCbfAtDepth(*tuPointer, COMP_Y, tu.depth); |
853 | 0 | tuPointer = tuPointer->next; |
854 | 0 | } |
855 | 0 | if( !rootCbfSoFar ) |
856 | 0 | { |
857 | 0 | lastCbfIsInferred = true; |
858 | 0 | } |
859 | 0 | } |
860 | 0 | if( !lastCbfIsInferred ) |
861 | 0 | { |
862 | 0 | previousCbf = TU::getPrevTuCbfAtDepth(tu, compID, tu.depth); |
863 | 0 | } |
864 | 0 | } |
865 | 0 | BinFracBits fracBitsQtCbf = fracBits.getFracBitsArray( Ctx::QtCbf[compID]( DeriveCtx::CtxQtCbf( rect.compID, previousCbf, useIntraSubPartitions ) ) ); |
866 | |
|
867 | 0 | if( !lastCbfIsInferred ) |
868 | 0 | { |
869 | 0 | d64BestCost = d64BlockUncodedCost + xGetICost(fracBitsQtCbf.intBits[0]); |
870 | 0 | d64BaseCost += xGetICost(fracBitsQtCbf.intBits[1]); |
871 | 0 | } |
872 | 0 | else |
873 | 0 | { |
874 | 0 | d64BestCost = d64BlockUncodedCost; |
875 | 0 | } |
876 | 0 | } |
877 | |
|
878 | 0 | int lastBitsX[LAST_SIGNIFICANT_GROUPS] = { 0 }; |
879 | 0 | int lastBitsY[LAST_SIGNIFICANT_GROUPS] = { 0 }; |
880 | 0 | { |
881 | 0 | int dim1 = std::min<int>(JVET_C0024_ZERO_OUT_TH, uiWidth); |
882 | 0 | int dim2 = std::min<int>(JVET_C0024_ZERO_OUT_TH, uiHeight); |
883 | 0 | int bitsX = 0; |
884 | 0 | int bitsY = 0; |
885 | 0 | int ctxId; |
886 | | //X-coordinate |
887 | 0 | for ( ctxId = 0; ctxId < g_uiGroupIdx[dim1-1]; ctxId++) |
888 | 0 | { |
889 | 0 | const BinFracBits fB = fracBits.getFracBitsArray( cctx.lastXCtxId(ctxId) ); |
890 | 0 | lastBitsX[ ctxId ] = bitsX + fB.intBits[ 0 ]; |
891 | 0 | bitsX += fB.intBits[ 1 ]; |
892 | 0 | } |
893 | 0 | lastBitsX[ctxId] = bitsX; |
894 | | //Y-coordinate |
895 | 0 | for ( ctxId = 0; ctxId < g_uiGroupIdx[dim2-1]; ctxId++) |
896 | 0 | { |
897 | 0 | const BinFracBits fB = fracBits.getFracBitsArray( cctx.lastYCtxId(ctxId) ); |
898 | 0 | lastBitsY[ ctxId ] = bitsY + fB.intBits[ 0 ]; |
899 | 0 | bitsY += fB.intBits[ 1 ]; |
900 | 0 | } |
901 | 0 | lastBitsY[ctxId] = bitsY; |
902 | 0 | } |
903 | | |
904 | |
|
905 | 0 | bool bFoundLast = false; |
906 | 0 | for (int iCGScanPos = iCGLastScanPos; iCGScanPos >= 0; iCGScanPos--) |
907 | 0 | { |
908 | 0 | d64BaseCost -= pdCostCoeffGroupSig [ iCGScanPos ]; |
909 | 0 | if (cctx.isSigGroup( iCGScanPos ) ) |
910 | 0 | { |
911 | 0 | uint32_t maxNonZeroPosInCG = iCGSizeM1; |
912 | 0 | if( lfnstIdx > 0 && ( ( uiWidth == 4 && uiHeight == 4 ) || ( uiWidth == 8 && uiHeight == 8 && cctx.cgPosX() == 0 && cctx.cgPosY() == 0 ) ) ) |
913 | 0 | { |
914 | 0 | maxNonZeroPosInCG = 7; |
915 | 0 | } |
916 | 0 | for( int iScanPosinCG = maxNonZeroPosInCG; iScanPosinCG >= 0; iScanPosinCG-- ) |
917 | 0 | { |
918 | 0 | iScanPos = iCGScanPos * (iCGSizeM1 + 1) + iScanPosinCG; |
919 | |
|
920 | 0 | if (iScanPos > iLastScanPos) |
921 | 0 | { |
922 | 0 | continue; |
923 | 0 | } |
924 | 0 | uint32_t uiBlkPos = cctx.blockPos( iScanPos ); |
925 | |
|
926 | 0 | if( piDstCoeff[ uiBlkPos ] ) |
927 | 0 | { |
928 | 0 | uint32_t uiPosY = uiBlkPos >> uiLog2BlockWidth; |
929 | 0 | uint32_t uiPosX = uiBlkPos - ( uiPosY << uiLog2BlockWidth ); |
930 | 0 | double d64CostLast = xGetRateLast( lastBitsX, lastBitsY, uiPosX, uiPosY ); |
931 | |
|
932 | 0 | double totalCost = d64BaseCost + d64CostLast - pdCostSig[ iScanPos ]; |
933 | |
|
934 | 0 | if( totalCost < d64BestCost ) |
935 | 0 | { |
936 | 0 | iBestLastIdxP1 = iScanPos + 1; |
937 | 0 | d64BestCost = totalCost; |
938 | 0 | } |
939 | 0 | if( piDstCoeff[ uiBlkPos ] > 1 ) |
940 | 0 | { |
941 | 0 | bFoundLast = true; |
942 | 0 | break; |
943 | 0 | } |
944 | 0 | d64BaseCost -= pdCostCoeff[ iScanPos ]; |
945 | 0 | d64BaseCost += pdCostCoeff0[ iScanPos ]; |
946 | 0 | } |
947 | 0 | else |
948 | 0 | { |
949 | 0 | d64BaseCost -= pdCostSig[ iScanPos ]; |
950 | 0 | } |
951 | 0 | } //end for |
952 | 0 | if (bFoundLast) |
953 | 0 | { |
954 | 0 | break; |
955 | 0 | } |
956 | 0 | } // end if (uiSigCoeffGroupFlag[ uiCGBlkPos ]) |
957 | 0 | DTRACE( g_trace_ctx, D_RDOQ_COST, "%d: %3d, %3d, %dx%d, comp=%d\n", DTRACE_GET_COUNTER( g_trace_ctx, D_RDOQ_COST ), rect.x, rect.y, rect.width, rect.height, compID ); |
958 | 0 | DTRACE( g_trace_ctx, D_RDOQ_COST, "Uncoded=%d\n", (int64_t)( d64BlockUncodedCost ) ); |
959 | 0 | DTRACE( g_trace_ctx, D_RDOQ_COST, "Coded =%d\n", (int64_t)( d64BaseCost ) ); |
960 | |
|
961 | 0 | } // end for |
962 | | |
963 | |
|
964 | 0 | for ( int scanPos = 0; scanPos < iBestLastIdxP1; scanPos++ ) |
965 | 0 | { |
966 | 0 | int blkPos = cctx.blockPos( scanPos ); |
967 | 0 | TCoeff level = piDstCoeff[ blkPos ]; |
968 | 0 | uiAbsSum += level; |
969 | 0 | piDstCoeff[ blkPos ] = ( plSrcCoeff[ blkPos ] < 0 ) ? -level : level; |
970 | 0 | } |
971 | | |
972 | | //===== clean uncoded coefficients ===== |
973 | 0 | for ( int scanPos = iBestLastIdxP1; scanPos <= iLastScanPos; scanPos++ ) |
974 | 0 | { |
975 | 0 | piDstCoeff[ cctx.blockPos( scanPos ) ] = 0; |
976 | 0 | } |
977 | 0 | iLastScanPos = iBestLastIdxP1 - 1; |
978 | |
|
979 | 0 | if( cctx.signHiding() && uiAbsSum>=2) |
980 | 0 | { |
981 | 0 | const double inverseQuantScale = double(g_invQuantScales[0][cQP.rem(isTransformSkip)]); |
982 | 0 | int64_t rdFactor = (int64_t)(inverseQuantScale * inverseQuantScale * (1 << (2 * cQP.per(isTransformSkip))) / m_dLambda / 16 |
983 | 0 | / (1 << (2 * DISTORTION_PRECISION_ADJUSTMENT(channelBitDepth))) |
984 | 0 | + 0.5); |
985 | |
|
986 | 0 | int lastCG = -1; |
987 | 0 | int absSum = 0 ; |
988 | 0 | int n ; |
989 | 0 | for (int subSet = iCGNum - 1; subSet >= 0; subSet--) |
990 | 0 | { |
991 | 0 | int subPos = subSet << cctx.log2CGSize(); |
992 | 0 | int firstNZPosInCG = iCGSizeM1 + 1, lastNZPosInCG = -1; |
993 | 0 | absSum = 0 ; |
994 | |
|
995 | 0 | for( n = iCGSizeM1; n >= 0; --n ) |
996 | 0 | { |
997 | 0 | if( piDstCoeff[ cctx.blockPos( n + subPos )] ) |
998 | 0 | { |
999 | 0 | lastNZPosInCG = n; |
1000 | 0 | break; |
1001 | 0 | } |
1002 | 0 | } |
1003 | |
|
1004 | 0 | for( n = 0; n <= iCGSizeM1; n++ ) |
1005 | 0 | { |
1006 | 0 | if( piDstCoeff[ cctx.blockPos( n + subPos )] ) |
1007 | 0 | { |
1008 | 0 | firstNZPosInCG = n; |
1009 | 0 | break; |
1010 | 0 | } |
1011 | 0 | } |
1012 | |
|
1013 | 0 | for( n = firstNZPosInCG; n <= lastNZPosInCG; n++ ) |
1014 | 0 | { |
1015 | 0 | absSum += int(piDstCoeff[ cctx.blockPos( n + subPos )]); |
1016 | 0 | } |
1017 | |
|
1018 | 0 | if(lastNZPosInCG>=0 && lastCG==-1) |
1019 | 0 | { |
1020 | 0 | lastCG = 1; |
1021 | 0 | } |
1022 | |
|
1023 | 0 | if( lastNZPosInCG-firstNZPosInCG>=SBH_THRESHOLD ) |
1024 | 0 | { |
1025 | 0 | uint32_t signbit = (piDstCoeff[cctx.blockPos(subPos+firstNZPosInCG)]>0?0:1); |
1026 | 0 | if( signbit!=(absSum&0x1) ) // hide but need tune |
1027 | 0 | { |
1028 | | // calculate the cost |
1029 | 0 | int64_t minCostInc = std::numeric_limits<int64_t>::max(), curCost = std::numeric_limits<int64_t>::max(); |
1030 | 0 | int minPos = -1, finalChange = 0, curChange = 0; |
1031 | |
|
1032 | 0 | for( n = (lastCG == 1 ? lastNZPosInCG : iCGSizeM1); n >= 0; --n ) |
1033 | 0 | { |
1034 | 0 | uint32_t uiBlkPos = cctx.blockPos( n + subPos ); |
1035 | 0 | if(piDstCoeff[ uiBlkPos ] != 0 ) |
1036 | 0 | { |
1037 | 0 | int64_t costUp = rdFactor * ( - deltaU[uiBlkPos] ) + rateIncUp[uiBlkPos]; |
1038 | 0 | int64_t costDown = rdFactor * ( deltaU[uiBlkPos] ) + rateIncDown[uiBlkPos] |
1039 | 0 | - ((abs(piDstCoeff[uiBlkPos]) == 1) ? sigRateDelta[uiBlkPos] : 0); |
1040 | |
|
1041 | 0 | if(lastCG==1 && lastNZPosInCG==n && abs(piDstCoeff[uiBlkPos])==1) |
1042 | 0 | { |
1043 | 0 | costDown -= (4<<SCALE_BITS); |
1044 | 0 | } |
1045 | |
|
1046 | 0 | if(costUp<costDown) |
1047 | 0 | { |
1048 | 0 | curCost = costUp; |
1049 | 0 | curChange = 1; |
1050 | 0 | } |
1051 | 0 | else |
1052 | 0 | { |
1053 | 0 | curChange = -1; |
1054 | 0 | if(n==firstNZPosInCG && abs(piDstCoeff[uiBlkPos])==1) |
1055 | 0 | { |
1056 | 0 | curCost = std::numeric_limits<int64_t>::max(); |
1057 | 0 | } |
1058 | 0 | else |
1059 | 0 | { |
1060 | 0 | curCost = costDown; |
1061 | 0 | } |
1062 | 0 | } |
1063 | 0 | } |
1064 | 0 | else |
1065 | 0 | { |
1066 | 0 | curCost = rdFactor * ( - (abs(deltaU[uiBlkPos])) ) + (1<<SCALE_BITS) + rateIncUp[uiBlkPos] + sigRateDelta[uiBlkPos] ; |
1067 | 0 | curChange = 1 ; |
1068 | |
|
1069 | 0 | if(n<firstNZPosInCG) |
1070 | 0 | { |
1071 | 0 | uint32_t thissignbit = (plSrcCoeff[uiBlkPos]>=0?0:1); |
1072 | 0 | if(thissignbit != signbit ) |
1073 | 0 | { |
1074 | 0 | curCost = std::numeric_limits<int64_t>::max(); |
1075 | 0 | } |
1076 | 0 | } |
1077 | 0 | } |
1078 | |
|
1079 | 0 | if( curCost<minCostInc) |
1080 | 0 | { |
1081 | 0 | minCostInc = curCost; |
1082 | 0 | finalChange = curChange; |
1083 | 0 | minPos = uiBlkPos; |
1084 | 0 | } |
1085 | 0 | } |
1086 | |
|
1087 | 0 | if(piDstCoeff[minPos] == entropyCodingMaximum || piDstCoeff[minPos] == entropyCodingMinimum) |
1088 | 0 | { |
1089 | 0 | finalChange = -1; |
1090 | 0 | } |
1091 | |
|
1092 | 0 | if(plSrcCoeff[minPos]>=0) |
1093 | 0 | { |
1094 | 0 | piDstCoeff[minPos] += finalChange ; |
1095 | 0 | } |
1096 | 0 | else |
1097 | 0 | { |
1098 | 0 | piDstCoeff[minPos] -= finalChange ; |
1099 | 0 | } |
1100 | 0 | } |
1101 | 0 | } |
1102 | |
|
1103 | 0 | if(lastCG==1) |
1104 | 0 | { |
1105 | 0 | lastCG=0 ; |
1106 | 0 | } |
1107 | 0 | } |
1108 | | |
1109 | | // Check due to saving of last pos. Sign data hiding can change the position of last coef. |
1110 | 0 | if( piDstCoeff[cctx.blockPos( iLastScanPos )] == 0 ) |
1111 | 0 | { |
1112 | 0 | int scanPos = iLastScanPos - 1; |
1113 | 0 | for( ; scanPos >= 0; scanPos-- ) |
1114 | 0 | { |
1115 | 0 | if( piDstCoeff[cctx.blockPos( scanPos )] ) |
1116 | 0 | break; |
1117 | 0 | } |
1118 | 0 | iLastScanPos = scanPos; |
1119 | 0 | } |
1120 | 0 | } |
1121 | 0 | tu.lastPos[compID] = iLastScanPos; |
1122 | 0 | } |
1123 | | |
1124 | | void QuantRDOQ::rateDistOptQuantTS( TransformUnit& tu, const ComponentID compID, const CCoeffBuf& coeffs, TCoeff &absSum, const QpParam& qp, const Ctx &ctx ) |
1125 | 0 | { |
1126 | 0 | const FracBitsAccess& fracBits = ctx.getFracBitsAcess(); |
1127 | |
|
1128 | 0 | const SPS &sps = *tu.cs->sps; |
1129 | 0 | const CompArea& rect = tu.blocks[compID]; |
1130 | 0 | const uint32_t width = rect.width; |
1131 | 0 | const uint32_t height = rect.height; |
1132 | 0 | const ChannelType chType = toChannelType(compID); |
1133 | 0 | const int channelBitDepth = sps.bitDepths[ chType ]; |
1134 | |
|
1135 | 0 | const int maxLog2TrDynamicRange = sps.getMaxLog2TrDynamicRange(); |
1136 | |
|
1137 | 0 | const int transformShift = getTransformShift( channelBitDepth, rect.size(), maxLog2TrDynamicRange ); |
1138 | |
|
1139 | 0 | const uint32_t maxNumCoeff = rect.area(); |
1140 | |
|
1141 | 0 | CHECK( compID >= MAX_NUM_TBLOCKS, "Invalid component ID" ); |
1142 | |
|
1143 | 0 | int scalingListType = getScalingListType( tu.cu->predMode, compID ); |
1144 | 0 | CHECK( scalingListType >= SCALING_LIST_NUM, "Invalid scaling list" ); |
1145 | |
|
1146 | 0 | const TCoeff *srcCoeff = coeffs.buf; |
1147 | 0 | TCoeffSig *dstCoeff = tu.getCoeffs( compID ).buf; |
1148 | |
|
1149 | 0 | double *costCoeff = m_pdCostCoeff; |
1150 | 0 | double *costSig = m_pdCostSig; |
1151 | 0 | double *costCoeff0 = m_pdCostCoeff0; |
1152 | |
|
1153 | 0 | memset( m_pdCostCoeff, 0, sizeof( double ) * maxNumCoeff ); |
1154 | 0 | memset( m_pdCostSig, 0, sizeof( double ) * maxNumCoeff ); |
1155 | |
|
1156 | 0 | m_bdpcm = 0; |
1157 | |
|
1158 | 0 | const bool needsSqrt2Scale = TU::needsSqrt2Scale( tu, compID ); // should always be false - transform-skipped blocks don't require sqrt(2) compensation. |
1159 | 0 | const bool isTransformSkip = tu.mtsIdx[compID]==MTS_SKIP; |
1160 | 0 | const int qBits = QUANT_SHIFT + qp.per(isTransformSkip) + (isTransformSkip ? 0 : transformShift) + (needsSqrt2Scale ? -1 : 0); // Right shift of non-RDOQ quantizer; level = (coeff*uiQ + offset)>>q_bits |
1161 | 0 | const int quantisationCoefficient = g_quantScales[needsSqrt2Scale?1:0][qp.rem(isTransformSkip)]; |
1162 | 0 | const double errorScale = xGetErrScaleCoeff( TU::needsSqrt2Scale(tu, compID), width, height, qp.rem(isTransformSkip), maxLog2TrDynamicRange, channelBitDepth, isTransformSkip); |
1163 | |
|
1164 | 0 | const TCoeff entropyCodingMaximum = ( 1 << maxLog2TrDynamicRange ) - 1; |
1165 | |
|
1166 | 0 | uint32_t coeffLevels[3]; |
1167 | 0 | double coeffLevelError[4]; |
1168 | |
|
1169 | 0 | CoeffCodingContext cctx( tu, compID, tu.cs->slice->signDataHidingEnabled ); |
1170 | 0 | const int sbSizeM1 = ( 1 << cctx.log2CGSize() ) - 1; |
1171 | 0 | double baseCost = 0; |
1172 | 0 | uint32_t goRiceParam = 0; |
1173 | |
|
1174 | 0 | double *costSigSubBlock = m_pdCostCoeffGroupSig; |
1175 | 0 | memset( costSigSubBlock, 0, ( maxNumCoeff >> cctx.log2CGSize() ) * sizeof( double ) ); |
1176 | |
|
1177 | 0 | const int sbNum = width * height >> cctx.log2CGSize(); |
1178 | 0 | int scanPos; |
1179 | 0 | coeffGroupRDStats rdStats; |
1180 | |
|
1181 | 0 | bool anySigCG = false; |
1182 | |
|
1183 | 0 | int maxCtxBins = (cctx.maxNumCoeff() * 7) >> 2; |
1184 | 0 | cctx.remRegBins = maxCtxBins; |
1185 | |
|
1186 | 0 | for( int sbId = 0; sbId < sbNum; sbId++ ) |
1187 | 0 | { |
1188 | 0 | cctx.initSubblock( sbId ); |
1189 | |
|
1190 | 0 | int noCoeffCoded = 0; |
1191 | 0 | baseCost = 0.0; |
1192 | 0 | memset( &rdStats, 0, sizeof (coeffGroupRDStats)); |
1193 | |
|
1194 | 0 | rdStats.iNumSbbCtxBins = 0; |
1195 | |
|
1196 | 0 | for( int scanPosInSB = 0; scanPosInSB <= sbSizeM1; scanPosInSB++ ) |
1197 | 0 | { |
1198 | 0 | int lastPosCoded = sbSizeM1; |
1199 | 0 | scanPos = cctx.minSubPos() + scanPosInSB; |
1200 | | //===== quantization ===== |
1201 | 0 | uint32_t blkPos = cctx.blockPos( scanPos ); |
1202 | | |
1203 | | // set coeff |
1204 | 0 | const int64_t tmpLevel = int64_t( abs( srcCoeff[blkPos] ) ) * quantisationCoefficient; |
1205 | 0 | const Intermediate_Int levelDouble = (Intermediate_Int)std::min<int64_t>( tmpLevel, std::numeric_limits<Intermediate_Int>::max() - ( Intermediate_Int( 1 ) << ( qBits - 1 ) ) ); |
1206 | |
|
1207 | 0 | uint32_t roundAbsLevel = std::min<uint32_t>(uint32_t(entropyCodingMaximum), uint32_t((levelDouble + (Intermediate_Int(1) << (qBits - 1))) >> qBits)); |
1208 | 0 | uint32_t minAbsLevel = (roundAbsLevel > 1 ? roundAbsLevel - 1 : 1); |
1209 | |
|
1210 | 0 | uint32_t downAbsLevel = std::min<uint32_t>(uint32_t(entropyCodingMaximum), uint32_t(levelDouble >> qBits)); |
1211 | 0 | uint32_t upAbsLevel = std::min<uint32_t>(uint32_t(entropyCodingMaximum), downAbsLevel + 1); |
1212 | |
|
1213 | 0 | m_testedLevels = 0; |
1214 | 0 | coeffLevels[m_testedLevels++] = roundAbsLevel; |
1215 | |
|
1216 | 0 | if (minAbsLevel != roundAbsLevel) |
1217 | 0 | coeffLevels[m_testedLevels++] = minAbsLevel; |
1218 | |
|
1219 | 0 | int rightPixel, belowPixel, predPixel; |
1220 | |
|
1221 | 0 | cctx.neighTS(rightPixel, belowPixel, scanPos, dstCoeff); |
1222 | 0 | predPixel = cctx.deriveModCoeff(rightPixel, belowPixel, upAbsLevel, 0); |
1223 | |
|
1224 | 0 | if (upAbsLevel != roundAbsLevel && upAbsLevel != minAbsLevel && predPixel == 1) |
1225 | 0 | coeffLevels[m_testedLevels++] = upAbsLevel; |
1226 | |
|
1227 | 0 | double dErr = double(levelDouble); |
1228 | 0 | coeffLevelError[0] = dErr * dErr * errorScale; |
1229 | |
|
1230 | 0 | costCoeff0[scanPos] = coeffLevelError[0]; |
1231 | 0 | dstCoeff[blkPos] = coeffLevels[0]; |
1232 | | |
1233 | | //===== coefficient level estimation ===== |
1234 | 0 | unsigned ctxIdSig = cctx.sigCtxIdAbsTS( scanPos, dstCoeff ); |
1235 | 0 | uint32_t cLevel; |
1236 | 0 | const BinFracBits fracBitsPar = fracBits.getFracBitsArray( cctx.parityCtxIdAbsTS() ); |
1237 | | |
1238 | | //goRiceParam = cctx.templateAbsSumTS( scanPos, dstCoeff ); |
1239 | 0 | goRiceParam = 1; |
1240 | 0 | unsigned ctxIdSign = cctx.signCtxIdAbsTS(scanPos, dstCoeff, 0); |
1241 | 0 | const BinFracBits fracBitsSign = fracBits.getFracBitsArray(ctxIdSign); |
1242 | 0 | const uint8_t sign = srcCoeff[ blkPos ] < 0 ? 1 : 0; |
1243 | |
|
1244 | 0 | DTRACE_COND( ( coeffLevels[0] != 0 ), g_trace_ctx, D_RDOQ_MORE, " uiCtxSig=%d", ctxIdSig ); |
1245 | |
|
1246 | 0 | unsigned gt1CtxId = cctx.lrg1CtxIdAbsTS(scanPos, dstCoeff, 0); |
1247 | 0 | const BinFracBits fracBitsGr1 = fracBits.getFracBitsArray(gt1CtxId); |
1248 | |
|
1249 | 0 | const BinFracBits fracBitsSig = fracBits.getFracBitsArray( ctxIdSig ); |
1250 | 0 | bool lastCoeff = false; // |
1251 | 0 | if (scanPosInSB == lastPosCoded && noCoeffCoded == 0) |
1252 | 0 | { |
1253 | 0 | lastCoeff = true; |
1254 | 0 | } |
1255 | 0 | int numUsedCtxBins = 0; |
1256 | 0 | cLevel = xGetCodedLevelTSPred(costCoeff[scanPos], costCoeff0[scanPos], costSig[scanPos], levelDouble, qBits, errorScale, coeffLevels, coeffLevelError, |
1257 | 0 | &fracBitsSig, fracBitsPar, cctx, fracBits, fracBitsSign, fracBitsGr1, sign, rightPixel, belowPixel, goRiceParam, lastCoeff, maxLog2TrDynamicRange, numUsedCtxBins); |
1258 | |
|
1259 | 0 | cctx.remRegBins -= numUsedCtxBins; |
1260 | 0 | rdStats.iNumSbbCtxBins += numUsedCtxBins; |
1261 | |
|
1262 | 0 | if (cLevel > 0) |
1263 | 0 | { |
1264 | 0 | noCoeffCoded++; |
1265 | 0 | } |
1266 | |
|
1267 | 0 | TCoeff level = cLevel; |
1268 | 0 | dstCoeff[blkPos] = (level != 0 && srcCoeff[blkPos] < 0) ? -level : level; |
1269 | 0 | baseCost += costCoeff[ scanPos ]; |
1270 | 0 | rdStats.d64SigCost += costSig[ scanPos ]; |
1271 | |
|
1272 | 0 | if( dstCoeff[ blkPos ] ) |
1273 | 0 | { |
1274 | 0 | cctx.setSigGroup(); |
1275 | 0 | rdStats.d64CodedLevelandDist += costCoeff [ scanPos ] - costSig[ scanPos ]; |
1276 | 0 | rdStats.d64UncodedDist += costCoeff0[ scanPos ]; |
1277 | 0 | } |
1278 | 0 | } //end for (iScanPosinCG) |
1279 | |
|
1280 | 0 | if( !cctx.isSigGroup() ) |
1281 | 0 | { |
1282 | 0 | const BinFracBits fracBitsSigGroup = fracBits.getFracBitsArray( cctx.sigGroupCtxId( true ) ); |
1283 | 0 | baseCost += xGetRateSigCoeffGroup( fracBitsSigGroup, 0 ) - rdStats.d64SigCost; |
1284 | 0 | costSigSubBlock[cctx.subSetId()] = xGetRateSigCoeffGroup( fracBitsSigGroup, 0 ); |
1285 | 0 | cctx.remRegBins += rdStats.iNumSbbCtxBins; // skip sub-block |
1286 | 0 | } |
1287 | 0 | else if( sbId != sbNum - 1 || anySigCG ) |
1288 | 0 | { |
1289 | | // rd-cost if SigCoeffGroupFlag = 0, initialization |
1290 | 0 | double costZeroSB = baseCost; |
1291 | |
|
1292 | 0 | const BinFracBits fracBitsSigGroup = fracBits.getFracBitsArray( cctx.sigGroupCtxId( true ) ); |
1293 | |
|
1294 | 0 | baseCost += xGetRateSigCoeffGroup( fracBitsSigGroup, 1 ); |
1295 | 0 | costZeroSB += xGetRateSigCoeffGroup( fracBitsSigGroup, 0 ); |
1296 | 0 | costSigSubBlock[ cctx.subSetId() ] = xGetRateSigCoeffGroup( fracBitsSigGroup, 1 ); |
1297 | |
|
1298 | 0 | costZeroSB += rdStats.d64UncodedDist; // distortion for resetting non-zero levels to zero levels |
1299 | 0 | costZeroSB -= rdStats.d64CodedLevelandDist; // distortion and level cost for keeping all non-zero levels |
1300 | 0 | costZeroSB -= rdStats.d64SigCost; // sig cost for all coeffs, including zero levels and non-zerl levels |
1301 | |
|
1302 | 0 | if( costZeroSB < baseCost ) |
1303 | 0 | { |
1304 | 0 | cctx.resetSigGroup(); |
1305 | 0 | baseCost = costZeroSB; |
1306 | 0 | costSigSubBlock[ cctx.subSetId() ] = xGetRateSigCoeffGroup( fracBitsSigGroup, 0 ); |
1307 | 0 | cctx.remRegBins += rdStats.iNumSbbCtxBins; // skip sub-block |
1308 | |
|
1309 | 0 | for( int scanPosInSB = 0; scanPosInSB <= sbSizeM1; scanPosInSB++ ) |
1310 | 0 | { |
1311 | 0 | scanPos = cctx.minSubPos() + scanPosInSB; |
1312 | 0 | uint32_t blkPos = cctx.blockPos( scanPos ); |
1313 | |
|
1314 | 0 | if( dstCoeff[ blkPos ] ) |
1315 | 0 | { |
1316 | 0 | dstCoeff[ blkPos ] = 0; |
1317 | 0 | costCoeff[ scanPos ] = costCoeff0[ scanPos ]; |
1318 | 0 | costSig[ scanPos] = 0; |
1319 | 0 | } |
1320 | 0 | } |
1321 | 0 | } |
1322 | 0 | else |
1323 | 0 | { |
1324 | 0 | anySigCG = true; |
1325 | 0 | } |
1326 | 0 | } |
1327 | 0 | } |
1328 | | |
1329 | | //===== estimate last position ===== |
1330 | 0 | for( int scanPos = 0; scanPos < maxNumCoeff; scanPos++ ) |
1331 | 0 | { |
1332 | 0 | int blkPos = cctx.blockPos( scanPos ); |
1333 | 0 | TCoeff level = dstCoeff[ blkPos ]; |
1334 | 0 | absSum += abs(level); |
1335 | 0 | } |
1336 | 0 | } |
1337 | | |
1338 | | void QuantRDOQ::forwardRDPCM( TransformUnit& tu, const ComponentID compID, const CCoeffBuf& coeffs, TCoeff &absSum, const QpParam& qp, const Ctx &ctx ) |
1339 | 0 | { |
1340 | 0 | const FracBitsAccess& fracBits = ctx.getFracBitsAcess(); |
1341 | |
|
1342 | 0 | const SPS &sps = *tu.cs->sps; |
1343 | 0 | const CompArea& rect = tu.blocks[compID]; |
1344 | 0 | const uint32_t width = rect.width; |
1345 | 0 | const uint32_t height = rect.height; |
1346 | 0 | const ChannelType chType = toChannelType(compID); |
1347 | 0 | const int channelBitDepth = sps.bitDepths[chType]; |
1348 | |
|
1349 | 0 | const int maxLog2TrDynamicRange = sps.getMaxLog2TrDynamicRange(); |
1350 | 0 | const int dirMode = tu.cu->bdpcmM[toChannelType(compID)]; |
1351 | |
|
1352 | 0 | const int transformShift = getTransformShift(channelBitDepth, rect.size(), maxLog2TrDynamicRange); |
1353 | |
|
1354 | 0 | const uint32_t maxNumCoeff = rect.area(); |
1355 | |
|
1356 | 0 | CHECK(compID >= MAX_NUM_TBLOCKS, "Invalid component ID"); |
1357 | |
|
1358 | 0 | int scalingListType = getScalingListType(tu.cu->predMode, compID); |
1359 | 0 | CHECK(scalingListType >= SCALING_LIST_NUM, "Invalid scaling list"); |
1360 | |
|
1361 | 0 | const TCoeff *srcCoeff = coeffs.buf; |
1362 | 0 | TCoeffSig *dstCoeff = tu.getCoeffs(compID).buf; |
1363 | |
|
1364 | 0 | double *costCoeff = m_pdCostCoeff; |
1365 | 0 | double *costSig = m_pdCostSig; |
1366 | 0 | double *costCoeff0 = m_pdCostCoeff0; |
1367 | |
|
1368 | 0 | memset(m_pdCostCoeff, 0, sizeof(double) * maxNumCoeff); |
1369 | 0 | memset(m_pdCostSig, 0, sizeof(double) * maxNumCoeff); |
1370 | 0 | memset(m_fullCoeff, 0, sizeof(TCoeff) * maxNumCoeff); |
1371 | |
|
1372 | 0 | m_bdpcm = dirMode; |
1373 | |
|
1374 | 0 | const bool needsSqrt2Scale = TU::needsSqrt2Scale(tu, compID); // should always be false - transform-skipped blocks don't require sqrt(2) compensation. |
1375 | 0 | const bool isTransformSkip = tu.mtsIdx[compID]==MTS_SKIP; |
1376 | 0 | const int qBits = QUANT_SHIFT + qp.per(isTransformSkip) + (isTransformSkip? 0 : transformShift) + ( needsSqrt2Scale ? -1 : 0); // Right shift of non-RDOQ quantizer; level = (coeff*uiQ + offset)>>q_bits |
1377 | 0 | const int quantisationCoefficient = g_quantScales[needsSqrt2Scale ? 1 : 0][qp.rem(isTransformSkip)]; |
1378 | 0 | const double errorScale = xGetErrScaleCoeff(TU::needsSqrt2Scale(tu, compID), width, height, qp.rem(isTransformSkip), maxLog2TrDynamicRange, channelBitDepth, isTransformSkip); |
1379 | |
|
1380 | 0 | TrQuantParams trQuantParams; |
1381 | 0 | trQuantParams.rightShift = (IQUANT_SHIFT - ((isTransformSkip ? 0 : transformShift) + qp.per(isTransformSkip))); |
1382 | 0 | trQuantParams.qScale = g_invQuantScales[needsSqrt2Scale ? 1 : 0][qp.rem(isTransformSkip)]; |
1383 | |
|
1384 | 0 | const TCoeff entropyCodingMaximum = (1 << maxLog2TrDynamicRange) - 1; |
1385 | |
|
1386 | 0 | uint32_t coeffLevels[3]; |
1387 | 0 | double coeffLevelError[4]; |
1388 | |
|
1389 | 0 | CoeffCodingContext cctx(tu, compID, tu.cs->slice->signDataHidingEnabled); |
1390 | 0 | const int sbSizeM1 = (1 << cctx.log2CGSize()) - 1; |
1391 | 0 | double baseCost = 0; |
1392 | 0 | uint32_t goRiceParam = 0; |
1393 | |
|
1394 | 0 | double *costSigSubBlock = m_pdCostCoeffGroupSig; |
1395 | 0 | memset(costSigSubBlock, 0, (maxNumCoeff >> cctx.log2CGSize()) * sizeof(double)); |
1396 | |
|
1397 | 0 | const int sbNum = width * height >> cctx.log2CGSize(); |
1398 | 0 | int scanPos; |
1399 | 0 | coeffGroupRDStats rdStats; |
1400 | |
|
1401 | 0 | bool anySigCG = false; |
1402 | |
|
1403 | 0 | int maxCtxBins = (cctx.maxNumCoeff() * 7) >> 2; |
1404 | 0 | cctx.remRegBins = maxCtxBins; |
1405 | |
|
1406 | 0 | for (int sbId = 0; sbId < sbNum; sbId++) |
1407 | 0 | { |
1408 | 0 | cctx.initSubblock(sbId); |
1409 | |
|
1410 | 0 | int noCoeffCoded = 0; |
1411 | 0 | baseCost = 0.0; |
1412 | 0 | memset(&rdStats, 0, sizeof(coeffGroupRDStats)); |
1413 | 0 | rdStats.iNumSbbCtxBins = 0; |
1414 | |
|
1415 | 0 | for (int scanPosInSB = 0; scanPosInSB <= sbSizeM1; scanPosInSB++) |
1416 | 0 | { |
1417 | 0 | int lastPosCoded = sbSizeM1; |
1418 | 0 | scanPos = cctx.minSubPos() + scanPosInSB; |
1419 | | //===== quantization ===== |
1420 | 0 | uint32_t blkPos = cctx.blockPos(scanPos); |
1421 | |
|
1422 | 0 | const int posX = cctx.posX(scanPos); |
1423 | 0 | const int posY = cctx.posY(scanPos); |
1424 | 0 | const int posS = (1 == dirMode) ? posX : posY; |
1425 | 0 | const int posNb = (1 == dirMode) ? (posX - 1) + posY * coeffs.stride : posX + (posY - 1) * coeffs.stride; |
1426 | 0 | TCoeff predCoeff = (0 != posS) ? m_fullCoeff[posNb] : 0; |
1427 | | |
1428 | | // set coeff |
1429 | 0 | const int64_t tmpLevel = int64_t(abs(srcCoeff[blkPos] - predCoeff)) * quantisationCoefficient; |
1430 | 0 | const Intermediate_Int levelDouble = (Intermediate_Int)std::min<int64_t>(tmpLevel, std::numeric_limits<Intermediate_Int>::max() - (Intermediate_Int(1) << (qBits - 1))); |
1431 | 0 | uint32_t roundAbsLevel = std::min<uint32_t>(uint32_t(entropyCodingMaximum), uint32_t((levelDouble + (Intermediate_Int(1) << (qBits - 1))) >> qBits)); |
1432 | 0 | uint32_t minAbsLevel = (roundAbsLevel > 1 ? roundAbsLevel - 1 : 1); |
1433 | |
|
1434 | 0 | m_testedLevels = 0; |
1435 | 0 | coeffLevels[m_testedLevels++] = roundAbsLevel; |
1436 | |
|
1437 | 0 | if (minAbsLevel != roundAbsLevel) |
1438 | 0 | coeffLevels[m_testedLevels++] = minAbsLevel; |
1439 | |
|
1440 | 0 | double dErr = double(levelDouble); |
1441 | 0 | coeffLevelError[0] = dErr * dErr * errorScale; |
1442 | |
|
1443 | 0 | costCoeff0[scanPos] = coeffLevelError[0]; |
1444 | 0 | dstCoeff[blkPos] = coeffLevels[0]; |
1445 | | |
1446 | | //===== coefficient level estimation ===== |
1447 | 0 | unsigned ctxIdSig = cctx.sigCtxIdAbsTS(scanPos, dstCoeff); |
1448 | 0 | uint32_t cLevel; |
1449 | 0 | const BinFracBits fracBitsPar = fracBits.getFracBitsArray(cctx.parityCtxIdAbsTS()); |
1450 | | |
1451 | | //goRiceParam = cctx.templateAbsSumTS(scanPos, dstCoeff); |
1452 | 0 | goRiceParam = 1; |
1453 | 0 | unsigned ctxIdSign = cctx.signCtxIdAbsTS(scanPos, dstCoeff, dirMode); |
1454 | 0 | const BinFracBits fracBitsSign = fracBits.getFracBitsArray(ctxIdSign); |
1455 | 0 | const uint8_t sign = srcCoeff[blkPos] - predCoeff < 0 ? 1 : 0; |
1456 | 0 | unsigned gt1CtxId = cctx.lrg1CtxIdAbsTS(scanPos, dstCoeff, dirMode); |
1457 | 0 | const BinFracBits fracBitsGr1 = fracBits.getFracBitsArray(gt1CtxId); |
1458 | |
|
1459 | 0 | DTRACE_COND((dstCoeff[blkPos] != 0), g_trace_ctx, D_RDOQ_MORE, " uiCtxSig=%d", ctxIdSig); |
1460 | |
|
1461 | 0 | const BinFracBits fracBitsSig = fracBits.getFracBitsArray(ctxIdSig); |
1462 | 0 | bool lastCoeff = false; // |
1463 | 0 | if (scanPosInSB == lastPosCoded && noCoeffCoded == 0) |
1464 | 0 | { |
1465 | 0 | lastCoeff = true; |
1466 | 0 | } |
1467 | 0 | int rightPixel, belowPixel; |
1468 | 0 | cctx.neighTS(rightPixel, belowPixel, scanPos, dstCoeff); |
1469 | 0 | int numUsedCtxBins = 0; |
1470 | 0 | cLevel = xGetCodedLevelTSPred(costCoeff[scanPos], costCoeff0[scanPos], costSig[scanPos], levelDouble, qBits, errorScale, coeffLevels, coeffLevelError, |
1471 | 0 | &fracBitsSig, fracBitsPar, cctx, fracBits, fracBitsSign, fracBitsGr1, sign, rightPixel, belowPixel, goRiceParam, lastCoeff, maxLog2TrDynamicRange, numUsedCtxBins); |
1472 | 0 | cctx.remRegBins -= numUsedCtxBins; |
1473 | 0 | rdStats.iNumSbbCtxBins += numUsedCtxBins; |
1474 | |
|
1475 | 0 | if (cLevel > 0) |
1476 | 0 | { |
1477 | 0 | noCoeffCoded++; |
1478 | 0 | } |
1479 | 0 | dstCoeff[blkPos] = cLevel; |
1480 | |
|
1481 | 0 | if (sign) |
1482 | 0 | { |
1483 | 0 | dstCoeff[blkPos] = -dstCoeff[blkPos]; |
1484 | 0 | } |
1485 | |
|
1486 | 0 | xDequantSample( m_fullCoeff[blkPos], dstCoeff[blkPos], trQuantParams ); |
1487 | 0 | m_fullCoeff[blkPos] += predCoeff; |
1488 | |
|
1489 | 0 | baseCost += costCoeff[scanPos]; |
1490 | 0 | rdStats.d64SigCost += costSig[scanPos]; |
1491 | |
|
1492 | 0 | if (dstCoeff[blkPos]) |
1493 | 0 | { |
1494 | 0 | cctx.setSigGroup(); |
1495 | 0 | rdStats.d64CodedLevelandDist += costCoeff[scanPos] - costSig[scanPos]; |
1496 | 0 | rdStats.d64UncodedDist += costCoeff0[scanPos]; |
1497 | 0 | } |
1498 | 0 | } //end for (iScanPosinCG) |
1499 | |
|
1500 | 0 | if (!cctx.isSigGroup()) |
1501 | 0 | { |
1502 | 0 | const BinFracBits fracBitsSigGroup = fracBits.getFracBitsArray(cctx.sigGroupCtxId(true)); |
1503 | 0 | baseCost += xGetRateSigCoeffGroup(fracBitsSigGroup, 0) - rdStats.d64SigCost; |
1504 | 0 | costSigSubBlock[cctx.subSetId()] = xGetRateSigCoeffGroup(fracBitsSigGroup, 0); |
1505 | 0 | cctx.remRegBins += rdStats.iNumSbbCtxBins; // skip sub-block |
1506 | 0 | } |
1507 | 0 | else if (sbId != sbNum - 1 || anySigCG) |
1508 | 0 | { |
1509 | | // rd-cost if SigCoeffGroupFlag = 0, initialization |
1510 | 0 | double costZeroSB = baseCost; |
1511 | |
|
1512 | 0 | const BinFracBits fracBitsSigGroup = fracBits.getFracBitsArray(cctx.sigGroupCtxId(true)); |
1513 | |
|
1514 | 0 | baseCost += xGetRateSigCoeffGroup(fracBitsSigGroup, 1); |
1515 | 0 | costZeroSB += xGetRateSigCoeffGroup(fracBitsSigGroup, 0); |
1516 | 0 | costSigSubBlock[cctx.subSetId()] = xGetRateSigCoeffGroup(fracBitsSigGroup, 1); |
1517 | |
|
1518 | 0 | costZeroSB += rdStats.d64UncodedDist; // distortion for resetting non-zero levels to zero levels |
1519 | 0 | costZeroSB -= rdStats.d64CodedLevelandDist; // distortion and level cost for keeping all non-zero levels |
1520 | 0 | costZeroSB -= rdStats.d64SigCost; // sig cost for all coeffs, including zero levels and non-zerl levels |
1521 | |
|
1522 | 0 | if (costZeroSB < baseCost) |
1523 | 0 | { |
1524 | 0 | cctx.resetSigGroup(); |
1525 | 0 | baseCost = costZeroSB; |
1526 | 0 | costSigSubBlock[cctx.subSetId()] = xGetRateSigCoeffGroup(fracBitsSigGroup, 0); |
1527 | 0 | cctx.remRegBins += rdStats.iNumSbbCtxBins; // skip sub-block |
1528 | |
|
1529 | 0 | for (int scanPosInSB = 0; scanPosInSB <= sbSizeM1; scanPosInSB++) |
1530 | 0 | { |
1531 | 0 | scanPos = cctx.minSubPos() + scanPosInSB; |
1532 | 0 | uint32_t blkPos = cctx.blockPos(scanPos); |
1533 | |
|
1534 | 0 | const int posX = cctx.posX(scanPos); |
1535 | 0 | const int posY = cctx.posY(scanPos); |
1536 | 0 | const int posS = (1 == dirMode) ? posX : posY; |
1537 | 0 | const int posNb = (1 == dirMode) ? (posX - 1) + posY * coeffs.stride : posX + (posY - 1) * coeffs.stride; |
1538 | 0 | m_fullCoeff[scanPos] = (0 != posS) ? m_fullCoeff[posNb] : 0; |
1539 | |
|
1540 | 0 | if (dstCoeff[blkPos]) |
1541 | 0 | { |
1542 | 0 | dstCoeff[blkPos] = 0; |
1543 | 0 | costCoeff[scanPos] = costCoeff0[scanPos]; |
1544 | 0 | costSig[scanPos] = 0; |
1545 | 0 | } |
1546 | 0 | } |
1547 | 0 | } |
1548 | 0 | else |
1549 | 0 | { |
1550 | 0 | anySigCG = true; |
1551 | 0 | } |
1552 | 0 | } |
1553 | 0 | } |
1554 | | |
1555 | | //===== estimate last position ===== |
1556 | 0 | for (int scanPos = 0; scanPos < maxNumCoeff; scanPos++) |
1557 | 0 | { |
1558 | 0 | int blkPos = cctx.blockPos(scanPos); |
1559 | 0 | TCoeff level = dstCoeff[blkPos]; |
1560 | 0 | absSum += abs(level); |
1561 | 0 | } |
1562 | 0 | } |
1563 | | |
1564 | | void QuantRDOQ::xDequantSample(TCoeff& pRes, TCoeffSig& coeff, const TrQuantParams& trQuantParams) |
1565 | 0 | { |
1566 | | // xDequant |
1567 | 0 | if (trQuantParams.rightShift > 0) |
1568 | 0 | { |
1569 | 0 | const Intermediate_Int qAdd = Intermediate_Int(1) << (trQuantParams.rightShift - 1); |
1570 | 0 | pRes = TCoeff((Intermediate_Int(coeff) * trQuantParams.qScale + qAdd) >> trQuantParams.rightShift); |
1571 | 0 | } |
1572 | 0 | else |
1573 | 0 | { |
1574 | 0 | pRes = TCoeff((Intermediate_Int(coeff) * trQuantParams.qScale) *(1<< -trQuantParams.rightShift)); |
1575 | 0 | } |
1576 | 0 | } |
1577 | | |
1578 | | inline uint32_t QuantRDOQ::xGetCodedLevelTSPred(double& rd64CodedCost, |
1579 | | double& rd64CodedCost0, |
1580 | | double& rd64CodedCostSig, |
1581 | | Intermediate_Int levelDouble, |
1582 | | int qBits, |
1583 | | double errorScale, |
1584 | | uint32_t coeffLevels[], |
1585 | | double coeffLevelError[], |
1586 | | const BinFracBits* fracBitsSig, |
1587 | | const BinFracBits& fracBitsPar, |
1588 | | CoeffCodingContext& cctx, |
1589 | | const FracBitsAccess& fracBitsAccess, |
1590 | | const BinFracBits& fracBitsSign, |
1591 | | const BinFracBits& fracBitsGt1, |
1592 | | const uint8_t sign, |
1593 | | int rightPixel, |
1594 | | int belowPixel, |
1595 | | uint16_t ricePar, |
1596 | | bool isLast, |
1597 | | const int maxLog2TrDynamicRange, |
1598 | | int& numUsedCtxBins |
1599 | | ) const |
1600 | 0 | { |
1601 | 0 | double currCostSig = 0; |
1602 | 0 | uint32_t bestAbsLevel = 0; |
1603 | 0 | numUsedCtxBins = 0; |
1604 | 0 | int numBestCtxBin = 0; |
1605 | 0 | if (!isLast && coeffLevels[0] < 3) |
1606 | 0 | { |
1607 | 0 | if (cctx.remRegBins >= 4) |
1608 | 0 | rd64CodedCostSig = xGetRateSigCoef(*fracBitsSig, 0); |
1609 | 0 | else |
1610 | 0 | rd64CodedCostSig = xGetICost(1 << SCALE_BITS); |
1611 | 0 | rd64CodedCost = rd64CodedCost0 + rd64CodedCostSig; |
1612 | 0 | if (cctx.remRegBins >= 4) |
1613 | 0 | numUsedCtxBins++; |
1614 | 0 | if (coeffLevels[0] == 0) |
1615 | 0 | { |
1616 | 0 | return bestAbsLevel; |
1617 | 0 | } |
1618 | 0 | } |
1619 | 0 | else |
1620 | 0 | { |
1621 | 0 | rd64CodedCost = MAX_DOUBLE; |
1622 | 0 | } |
1623 | | |
1624 | 0 | if (!isLast) |
1625 | 0 | { |
1626 | 0 | if (cctx.remRegBins >= 4) |
1627 | 0 | currCostSig = xGetRateSigCoef(*fracBitsSig, 1); |
1628 | 0 | else |
1629 | 0 | currCostSig = xGetICost(1 << SCALE_BITS); |
1630 | 0 | if (coeffLevels[0] >= 3 && cctx.remRegBins >= 4) |
1631 | 0 | numUsedCtxBins++; |
1632 | 0 | } |
1633 | |
|
1634 | 0 | for (int errorInd = 1; errorInd <= m_testedLevels; errorInd++) |
1635 | 0 | { |
1636 | 0 | int absLevel = coeffLevels[errorInd - 1]; |
1637 | 0 | double dErr = 0.0; |
1638 | 0 | dErr = double(levelDouble - (Intermediate_Int(absLevel) << qBits)); |
1639 | 0 | coeffLevelError[errorInd] = dErr * dErr * errorScale; |
1640 | 0 | int modAbsLevel = absLevel; |
1641 | 0 | if (cctx.remRegBins >= 4) |
1642 | 0 | { |
1643 | 0 | modAbsLevel = cctx.deriveModCoeff(rightPixel, belowPixel, absLevel, m_bdpcm); |
1644 | 0 | } |
1645 | 0 | int numCtxBins = 0; |
1646 | 0 | double dCurrCost = coeffLevelError[errorInd] + xGetICost(xGetICRateTS(modAbsLevel, fracBitsPar, cctx, fracBitsAccess, fracBitsSign, fracBitsGt1, numCtxBins, sign, ricePar, maxLog2TrDynamicRange)); |
1647 | |
|
1648 | 0 | if (cctx.remRegBins >= 4) |
1649 | 0 | dCurrCost += currCostSig; // if cctx.numCtxBins < 4, xGetICRateTS return rate including sign cost. dont need to add any more |
1650 | |
|
1651 | 0 | if (dCurrCost < rd64CodedCost) |
1652 | 0 | { |
1653 | 0 | bestAbsLevel = absLevel; |
1654 | 0 | rd64CodedCost = dCurrCost; |
1655 | 0 | rd64CodedCostSig = currCostSig; |
1656 | 0 | numBestCtxBin = numCtxBins; |
1657 | 0 | } |
1658 | 0 | } |
1659 | 0 | numUsedCtxBins += numBestCtxBin; |
1660 | 0 | return bestAbsLevel; |
1661 | 0 | } |
1662 | | |
1663 | | inline int QuantRDOQ::xGetICRateTS( const uint32_t absLevel, |
1664 | | const BinFracBits& fracBitsPar, |
1665 | | const CoeffCodingContext& cctx, |
1666 | | const FracBitsAccess& fracBitsAccess, |
1667 | | const BinFracBits& fracBitsSign, |
1668 | | const BinFracBits& fracBitsGt1, |
1669 | | int& numCtxBins, |
1670 | | const uint8_t sign, |
1671 | | const uint16_t ricePar, |
1672 | | const int maxLog2TrDynamicRange ) const |
1673 | 0 | { |
1674 | | |
1675 | 0 | if (cctx.remRegBins < 4) // Full by-pass coding |
1676 | 0 | { |
1677 | 0 | int rate = absLevel ? (1 << SCALE_BITS) : 0; // 1 bit to signal sign of non-zero |
1678 | |
|
1679 | 0 | uint32_t symbol = absLevel; |
1680 | |
|
1681 | 0 | uint32_t length; |
1682 | 0 | const int threshold = COEF_REMAIN_BIN_REDUCTION; |
1683 | 0 | if (symbol < (threshold << ricePar)) |
1684 | 0 | { |
1685 | 0 | length = symbol >> ricePar; |
1686 | 0 | rate += (length + 1 + ricePar) << SCALE_BITS; |
1687 | 0 | } |
1688 | 0 | else |
1689 | 0 | { |
1690 | 0 | length = ricePar; |
1691 | 0 | symbol = symbol - (threshold << ricePar); |
1692 | 0 | while (symbol >= (1 << length)) |
1693 | 0 | { |
1694 | 0 | symbol -= (1 << (length++)); |
1695 | 0 | } |
1696 | 0 | rate += (threshold + length + 1 - ricePar + length) << SCALE_BITS; |
1697 | 0 | } |
1698 | |
|
1699 | 0 | return rate; |
1700 | 0 | } |
1701 | | |
1702 | 0 | else if (cctx.remRegBins >= 4 && cctx.remRegBins < 8) // First pass context coding and all by-pass coding ( Sign flag is not counted here) |
1703 | 0 | { |
1704 | 0 | int rate = fracBitsSign.intBits[sign]; // sign bits |
1705 | 0 | if (absLevel) |
1706 | 0 | numCtxBins++; |
1707 | |
|
1708 | 0 | if (absLevel > 1) |
1709 | 0 | { |
1710 | 0 | rate += fracBitsGt1.intBits[1]; |
1711 | 0 | rate += fracBitsPar.intBits[(absLevel - 2) & 1]; |
1712 | |
|
1713 | 0 | numCtxBins += 2; |
1714 | |
|
1715 | 0 | int cutoffVal = 2; |
1716 | |
|
1717 | 0 | if (absLevel >= cutoffVal) |
1718 | 0 | { |
1719 | 0 | uint32_t symbol = (absLevel - cutoffVal) >> 1; |
1720 | 0 | uint32_t length; |
1721 | 0 | const int threshold = COEF_REMAIN_BIN_REDUCTION; |
1722 | 0 | if (symbol < (threshold << ricePar)) |
1723 | 0 | { |
1724 | 0 | length = symbol >> ricePar; |
1725 | 0 | rate += (length + 1 + ricePar) << SCALE_BITS; |
1726 | 0 | } |
1727 | 0 | else |
1728 | 0 | { |
1729 | 0 | length = ricePar; |
1730 | 0 | symbol = symbol - (threshold << ricePar); |
1731 | 0 | while (symbol >= (1 << length)) |
1732 | 0 | { |
1733 | 0 | symbol -= (1 << (length++)); |
1734 | 0 | } |
1735 | 0 | rate += (threshold + length + 1 - ricePar + length) << SCALE_BITS; |
1736 | 0 | } |
1737 | 0 | } |
1738 | 0 | } |
1739 | 0 | else if (absLevel == 1) |
1740 | 0 | { |
1741 | 0 | rate += fracBitsGt1.intBits[0]; |
1742 | 0 | numCtxBins++; |
1743 | 0 | } |
1744 | 0 | else |
1745 | 0 | { |
1746 | 0 | rate = 0; |
1747 | 0 | } |
1748 | 0 | return rate; |
1749 | 0 | } |
1750 | | |
1751 | 0 | int rate = fracBitsSign.intBits[sign]; |
1752 | |
|
1753 | 0 | if (absLevel) |
1754 | 0 | numCtxBins++; |
1755 | |
|
1756 | 0 | if( absLevel > 1 ) |
1757 | 0 | { |
1758 | 0 | rate += fracBitsGt1.intBits[1]; |
1759 | 0 | rate += fracBitsPar.intBits[( absLevel - 2 ) & 1]; |
1760 | 0 | numCtxBins += 2; |
1761 | |
|
1762 | 0 | int cutoffVal = 2; |
1763 | 0 | const int numGtBins = 4; |
1764 | 0 | for( int i = 0; i < numGtBins; i++ ) |
1765 | 0 | { |
1766 | 0 | if( absLevel >= cutoffVal ) |
1767 | 0 | { |
1768 | 0 | const uint16_t ctxGtX = cctx.greaterXCtxIdAbsTS( cutoffVal>>1 ); |
1769 | 0 | const BinFracBits &fracBitsGtX = fracBitsAccess.getFracBitsArray( ctxGtX ); |
1770 | 0 | unsigned gtX = ( absLevel >= ( cutoffVal + 2 ) ); |
1771 | 0 | rate += fracBitsGtX.intBits[gtX]; |
1772 | 0 | numCtxBins++; |
1773 | 0 | } |
1774 | 0 | cutoffVal += 2; |
1775 | 0 | } |
1776 | |
|
1777 | 0 | if( absLevel >= cutoffVal ) |
1778 | 0 | { |
1779 | 0 | uint32_t symbol = ( absLevel - cutoffVal ) >> 1; |
1780 | 0 | uint32_t length; |
1781 | 0 | const int threshold = COEF_REMAIN_BIN_REDUCTION; |
1782 | 0 | if( symbol < ( threshold << ricePar ) ) |
1783 | 0 | { |
1784 | 0 | length = symbol >> ricePar; |
1785 | 0 | rate += ( length + 1 + ricePar ) << SCALE_BITS; |
1786 | 0 | } |
1787 | 0 | else |
1788 | 0 | { |
1789 | 0 | length = ricePar; |
1790 | 0 | symbol = symbol - ( threshold << ricePar ); |
1791 | 0 | while( symbol >= ( 1 << length ) ) |
1792 | 0 | { |
1793 | 0 | symbol -= ( 1 << ( length++ ) ); |
1794 | 0 | } |
1795 | 0 | rate += ( threshold + length + 1 - ricePar + length ) << SCALE_BITS; |
1796 | 0 | } |
1797 | 0 | } |
1798 | 0 | } |
1799 | 0 | else if( absLevel == 1 ) |
1800 | 0 | { |
1801 | 0 | rate += fracBitsGt1.intBits[0]; |
1802 | 0 | numCtxBins++; |
1803 | 0 | } |
1804 | 0 | else |
1805 | 0 | { |
1806 | 0 | rate = 0; |
1807 | 0 | } |
1808 | 0 | return rate; |
1809 | 0 | } |
1810 | | |
1811 | | } // namespace vvenc |
1812 | | |
1813 | | //! \} |
1814 | | |