/src/vvenc/source/Lib/EncoderLib/BitAllocation.cpp
Line | Count | Source |
1 | | /* ----------------------------------------------------------------------------- |
2 | | The copyright in this software is being made available under the Clear BSD |
3 | | License, included below. No patent rights, trademark rights and/or |
4 | | other Intellectual Property Rights other than the copyrights concerning |
5 | | the Software are granted under this license. |
6 | | |
7 | | The Clear BSD License |
8 | | |
9 | | Copyright (c) 2019-2026, Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. & The VVenC Authors. |
10 | | All rights reserved. |
11 | | |
12 | | Redistribution and use in source and binary forms, with or without modification, |
13 | | are permitted (subject to the limitations in the disclaimer below) provided that |
14 | | the following conditions are met: |
15 | | |
16 | | * Redistributions of source code must retain the above copyright notice, |
17 | | this list of conditions and the following disclaimer. |
18 | | |
19 | | * Redistributions in binary form must reproduce the above copyright |
20 | | notice, this list of conditions and the following disclaimer in the |
21 | | documentation and/or other materials provided with the distribution. |
22 | | |
23 | | * Neither the name of the copyright holder nor the names of its |
24 | | contributors may be used to endorse or promote products derived from this |
25 | | software without specific prior written permission. |
26 | | |
27 | | NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY |
28 | | THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND |
29 | | CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
30 | | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A |
31 | | PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR |
32 | | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
33 | | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
34 | | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR |
35 | | BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER |
36 | | IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
37 | | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
38 | | POSSIBILITY OF SUCH DAMAGE. |
39 | | |
40 | | |
41 | | ------------------------------------------------------------------------------------------- */ |
42 | | |
43 | | |
44 | | /** \file BitAllocation.cpp |
45 | | \brief Bit allocation class for QP adaptation and, possibly, rate control |
46 | | */ |
47 | | |
48 | | #include "BitAllocation.h" |
49 | | #include "EncStage.h" |
50 | | #include "CommonLib/Picture.h" |
51 | | #include "CommonLib/UnitTools.h" |
52 | | #include <math.h> |
53 | | |
54 | | #include "vvenc/vvencCfg.h" |
55 | | |
56 | | |
57 | | //! \ingroup EncoderLib |
58 | | //! \{ |
59 | | |
60 | | namespace vvenc { |
61 | | |
62 | | // static functions |
63 | | |
64 | | static inline int apprI3Log2 (const double d, const bool isSccStrong) // rounded 2*log2(d) or 3*log2(d) |
65 | 0 | { |
66 | 0 | const double weight = (isSccStrong ? 2.0 : 3.0); |
67 | 0 | const double dLimit = (isSccStrong ? 5.5e-20 : 1.5e-13); |
68 | |
|
69 | 0 | return d < dLimit ? -128 : int (floor (weight * log (d) / log (2.0) + 0.5)); |
70 | 0 | } |
71 | | |
72 | | static inline int lumaDQPOffset (const uint32_t avgLumaValue, const uint32_t bitDepth) |
73 | 0 | { |
74 | 0 | if (bitDepth > 16 || avgLumaValue >= (1u << bitDepth)) return 0; |
75 | | #if 0 |
76 | | // mapping for peak luminance of ca. 3*400 = 1200 nits |
77 | | return (2 - int ((9 * uint64_t (avgLumaValue * avgLumaValue)) >> uint64_t (2 * bitDepth))); |
78 | | #else |
79 | | // mapping for peak luminance of ca. 2*400 = 800 nits |
80 | 0 | return (1 - int ((6 * uint64_t (avgLumaValue * avgLumaValue)) >> uint64_t (2 * bitDepth))); |
81 | 0 | #endif |
82 | 0 | } |
83 | | |
84 | | void calcSpatialVisAct ( const Pel* pSrc, |
85 | | const int iSrcStride, |
86 | | const int height, |
87 | | const int width, |
88 | | const uint32_t bitDepth, |
89 | | const bool isUHD, |
90 | | VisAct& va ) |
91 | 0 | { |
92 | 0 | CHECK( pSrc == nullptr, "no source buffer given to calculate temporal visual activity" ); |
93 | |
|
94 | 0 | uint64_t saAct; // spatial absolute activity sum |
95 | | |
96 | | // skip first row as there may be a black border frame |
97 | 0 | pSrc += iSrcStride; |
98 | | |
99 | | // center rows |
100 | 0 | if (isUHD) // high-pass with downsampling |
101 | 0 | { |
102 | 0 | pSrc += iSrcStride; |
103 | |
|
104 | 0 | saAct = g_pelBufOP.AvgHighPassWithDownsampling (width, height, pSrc, iSrcStride); |
105 | |
|
106 | 0 | va.hpSpatAct = double (saAct) / double ((width - 4) * (height - 4)); |
107 | 0 | } |
108 | 0 | else // HD high-pass without downsampling |
109 | 0 | { |
110 | 0 | saAct = g_pelBufOP.AvgHighPass (width, height, pSrc, iSrcStride); |
111 | |
|
112 | 0 | va.hpSpatAct = double (saAct) / double ((width - 2) * (height - 2)); |
113 | 0 | } |
114 | | |
115 | | // spatial in 12 bit |
116 | 0 | va.spatAct = unsigned (0.5 + va.hpSpatAct * double (bitDepth < 12 ? 1 << (12 - bitDepth) : 1)); |
117 | 0 | } |
118 | | |
119 | | void calcTemporalVisAct ( const Pel* pSrc, |
120 | | const int iSrcStride, |
121 | | const int height, |
122 | | const int width, |
123 | | const Pel* pSM1, |
124 | | const int iSM1Stride, |
125 | | const Pel* pSM2, |
126 | | const int iSM2Stride, |
127 | | uint32_t frameRate, |
128 | | const uint32_t bitDepth, |
129 | | const bool isUHD, |
130 | | VisAct& va ) |
131 | 0 | { |
132 | 0 | CHECK( pSrc == nullptr, "no source buffer given to calculate temporal visual activity" ); |
133 | 0 | CHECK( pSM1 == nullptr, "no compare buffer given to calculate temporal visual activity" ); |
134 | |
|
135 | 0 | const Pel* pS0 = pSrc; |
136 | 0 | uint64_t taAct; // temporal absolute activity sum |
137 | | |
138 | | // force 1st-order delta if only prev. frame available |
139 | 0 | if (pSM2 == nullptr || iSM2Stride <= 0) frameRate = 24; |
140 | | |
141 | | // skip first row as there may be a black border frame |
142 | 0 | pSrc += iSrcStride; |
143 | | |
144 | | // center rows |
145 | 0 | if (pS0 == pSM1 && frameRate <= 31) |
146 | 0 | { |
147 | 0 | va.hpTempAct = 0; // bypass high-pass, result will be zero |
148 | 0 | } |
149 | 0 | else if (isUHD) // downsampled high-pass |
150 | 0 | { |
151 | 0 | const int i2M1Stride = iSM1Stride * 2; |
152 | |
|
153 | 0 | CHECK (pSM1 == nullptr || iSM1Stride <= 0 || iSM1Stride < width, "Pel buffer pointer pSM1 must not be null!"); |
154 | |
|
155 | 0 | pSrc += iSrcStride; |
156 | 0 | pSM1 += i2M1Stride; |
157 | 0 | if (frameRate <= 31) // 1st-order delta |
158 | 0 | { |
159 | 0 | taAct = g_pelBufOP.AvgHighPassWithDownsamplingDiff1st (width, height, pSrc, pSM1, iSrcStride, iSM1Stride); |
160 | 0 | } |
161 | 0 | else // 2nd-order delta (diff of diffs) |
162 | 0 | { |
163 | 0 | const int i2M2Stride = iSM2Stride * 2; |
164 | |
|
165 | 0 | CHECK (pSM2 == nullptr || iSM2Stride <= 0 || iSM2Stride < width, "Pel buffer pointer pSM2 must not be null!"); |
166 | |
|
167 | 0 | pSM2 += i2M2Stride; |
168 | 0 | taAct = g_pelBufOP.AvgHighPassWithDownsamplingDiff2nd (width, height, pSrc, pSM1, pSM2, iSrcStride, iSM1Stride, iSM2Stride); |
169 | 0 | } |
170 | | |
171 | 0 | va.hpTempAct = double (taAct) / double ((width - 4) * (height - 4)); |
172 | 0 | } |
173 | 0 | else // HD high-pass without downsampling |
174 | 0 | { |
175 | 0 | CHECK (pSM1 == nullptr || iSM1Stride <= 0 || iSM1Stride < width, "Pel buffer pointer pSM1 must not be null!"); |
176 | |
|
177 | 0 | pSM1 += iSM1Stride; |
178 | 0 | if (frameRate <= 31) // 1st-order delta |
179 | 0 | { |
180 | 0 | taAct = g_pelBufOP.HDHighPass (width, height, pSrc, pSM1, iSrcStride, iSM1Stride); |
181 | 0 | } |
182 | 0 | else // 2nd-order delta (diff of diffs) |
183 | 0 | { |
184 | 0 | CHECK (pSM2 == nullptr || iSM2Stride <= 0 || iSM2Stride < width, "Pel buffer pointer pSM2 must not be null!"); |
185 | |
|
186 | 0 | pSM2 += iSM2Stride; |
187 | 0 | taAct = g_pelBufOP.HDHighPass2 (width, height, pSrc, pSM1, pSM2, iSrcStride, iSM1Stride, iSM2Stride); |
188 | 0 | } |
189 | | |
190 | 0 | va.hpTempAct = double (taAct) / double ((width - 2) * (height - 2)); |
191 | 0 | } |
192 | | |
193 | | // temporal in 12 bit |
194 | 0 | va.tempAct = unsigned (0.5 + va.hpTempAct * double (bitDepth < 12 ? 1 << (12 - bitDepth) : 1) * (frameRate <= 31 ? 1.15625 : 1.0)); |
195 | 0 | } |
196 | | |
197 | | void updateVisAct ( VisAct& va, const uint32_t bitDepth ) |
198 | 0 | { |
199 | | // minimum part in 12 bit |
200 | 0 | va.minAct = std::min( va.tempAct, va.spatAct ); |
201 | | // lower limit, compensate for high-pass amplification |
202 | 0 | va.hpVisAct = std::max (double (1 << (bitDepth - 6)), va.hpSpatAct + 2.0 * va.hpTempAct); |
203 | 0 | va.visAct = ClipBD( uint16_t( 0.5 + va.hpVisAct ), bitDepth ); |
204 | 0 | } |
205 | | |
206 | | double filterAndCalculateAverageActivity ( const Pel* pSrc, |
207 | | const int iSrcStride, |
208 | | const int height, |
209 | | const int width, |
210 | | const Pel* pSM1, |
211 | | const int iSM1Stride, |
212 | | const Pel* pSM2, |
213 | | const int iSM2Stride, |
214 | | uint32_t frameRate, |
215 | | const uint32_t bitDepth, |
216 | | const bool isUHD, |
217 | | unsigned* minVisAct = nullptr, |
218 | | unsigned* spVisAct = nullptr ) |
219 | 0 | { |
220 | 0 | VisAct va; |
221 | | |
222 | | // spatial activity |
223 | 0 | calcSpatialVisAct( pSrc, iSrcStride, height, width, bitDepth, isUHD, va ); |
224 | | |
225 | | // temporal activity |
226 | 0 | calcTemporalVisAct( pSrc, iSrcStride, height, width, pSM1, iSM1Stride, pSM2, iSM2Stride, |
227 | 0 | frameRate, bitDepth, isUHD, va ); |
228 | | |
229 | | // minimum and visual activity |
230 | 0 | updateVisAct( va, bitDepth ); |
231 | |
|
232 | 0 | if( minVisAct ) |
233 | 0 | { |
234 | 0 | *minVisAct = va.minAct; |
235 | 0 | } |
236 | 0 | if( spVisAct ) |
237 | 0 | { |
238 | 0 | *spVisAct = va.spatAct; |
239 | 0 | } |
240 | |
|
241 | 0 | return va.hpVisAct; |
242 | 0 | } |
243 | | |
244 | | static double getAveragePictureActivity (const uint32_t picWidth, const uint32_t picHeight, |
245 | | const int scaledAverageGopActivity, |
246 | | const bool tempFiltering, const uint32_t bitDepth) |
247 | 0 | { |
248 | 0 | if (scaledAverageGopActivity > 0) |
249 | 0 | { |
250 | 0 | return (double (scaledAverageGopActivity) / double (1 << (24 - bitDepth))); |
251 | 0 | } |
252 | 0 | const double hpEnerPic = (tempFiltering ? 32.0 : 16.0) * double (1 << (2 * bitDepth - 10)) * sqrt ((3840.0 * 2160.0) / double (picWidth * picHeight)); |
253 | |
|
254 | 0 | return sqrt (hpEnerPic); // square-root of a_pic value |
255 | 0 | } |
256 | | |
257 | | static int getGlaringColorQPOffset (Picture* const pic, const int ctuAddr, const int bitDepth, uint32_t &avgLumaValue) |
258 | 0 | { |
259 | 0 | const PreCalcValues& pcv = *pic->cs->pcv; |
260 | 0 | const ChromaFormat chrFmt = pic->chromaFormat; |
261 | 0 | const SizeType chrWidth = pcv.maxCUSize >> getChannelTypeScaleX (CH_C, chrFmt); |
262 | 0 | const SizeType chrHeight = pcv.maxCUSize >> getChannelTypeScaleY (CH_C, chrFmt); |
263 | 0 | const unsigned w = pcv.widthInCtus; |
264 | 0 | const int midLevel = 1 << (bitDepth - 1); |
265 | 0 | int chrValue = MAX_INT; |
266 | |
|
267 | 0 | avgLumaValue = uint32_t ((ctuAddr >= 0) ? pic->ctuAdaptedQP[ctuAddr] : pic->getOrigBuf().Y().getAvg()); |
268 | |
|
269 | 0 | for (uint32_t comp = COMP_Cb; comp < MAX_NUM_COMP; comp++) |
270 | 0 | { |
271 | 0 | const ComponentID compID = (ComponentID) comp; |
272 | 0 | int avgCompValue; |
273 | |
|
274 | 0 | if (ctuAddr >= 0) // chroma |
275 | 0 | { |
276 | 0 | const CompArea chrArea = clipArea (CompArea (compID, chrFmt, Area ((ctuAddr % w) * chrWidth, (ctuAddr / w) * chrHeight, chrWidth, chrHeight)), pic->block (compID)); |
277 | |
|
278 | 0 | avgCompValue = pic->getOrigBuf (chrArea).getAvg(); |
279 | 0 | } |
280 | 0 | else avgCompValue = pic->getOrigBuf (pic->block (compID)).getAvg(); |
281 | |
|
282 | 0 | if (chrValue > avgCompValue) chrValue = avgCompValue; // minimum of the DC offsets |
283 | 0 | } |
284 | 0 | CHECK (chrValue < 0, "mean chroma value cannot be negative!"); |
285 | |
|
286 | 0 | chrValue = (int) avgLumaValue - chrValue; |
287 | |
|
288 | 0 | if (chrValue > midLevel) return apprI3Log2 (double (chrValue * chrValue) / double (midLevel * midLevel), pic->isSccStrong); |
289 | | |
290 | 0 | return 0; |
291 | 0 | } |
292 | | |
293 | | static int getGlaringColorQPOffsetSubCtu (Picture* const pic, const CompArea& lumaArea, const int bitDepth, uint32_t &avgLumaValue) |
294 | 0 | { |
295 | 0 | const ChromaFormat chrFmt = pic->chromaFormat; |
296 | 0 | const SizeType chrWidth = lumaArea.width >> getChannelTypeScaleX (CH_C, chrFmt); |
297 | 0 | const SizeType chrHeight = lumaArea.height >> getChannelTypeScaleY (CH_C, chrFmt); |
298 | 0 | const PosType chrPosX = lumaArea.x >> getChannelTypeScaleX (CH_C, chrFmt); |
299 | 0 | const PosType chrPosY = lumaArea.y >> getChannelTypeScaleY (CH_C, chrFmt); |
300 | 0 | const int midLevel = 1 << (bitDepth - 1); |
301 | 0 | int chrValue = MAX_INT; |
302 | |
|
303 | 0 | avgLumaValue = pic->getOrigBuf (lumaArea).getAvg(); |
304 | |
|
305 | 0 | for (uint32_t comp = COMP_Cb; comp < MAX_NUM_COMP; comp++) |
306 | 0 | { |
307 | 0 | const ComponentID compID = (ComponentID) comp; |
308 | 0 | const CompArea chrArea = clipArea (CompArea (compID, chrFmt, Area (chrPosX, chrPosY, chrWidth, chrHeight)), pic->block (compID)); |
309 | |
|
310 | 0 | int avgCompValue = pic->getOrigBuf (chrArea).getAvg(); |
311 | |
|
312 | 0 | if (chrValue > avgCompValue) chrValue = avgCompValue; // minimum of the DC offsets |
313 | 0 | } |
314 | 0 | CHECK (chrValue < 0, "mean chroma value cannot be negative!"); |
315 | |
|
316 | 0 | chrValue = (int) avgLumaValue - chrValue; |
317 | |
|
318 | 0 | if (chrValue > midLevel) return apprI3Log2 (double (chrValue * chrValue) / double (midLevel * midLevel), pic->isSccStrong); |
319 | | |
320 | 0 | return 0; |
321 | 0 | } |
322 | | |
323 | | static void updateMinNoiseLevelsPic (uint8_t* const minNoiseLevels, const int bitDepth, const unsigned avgValue, const unsigned noise) |
324 | 0 | { |
325 | 0 | const unsigned avgIndex = avgValue >> (bitDepth - 3); // one of 8 mean level regions |
326 | |
|
327 | 0 | CHECK (avgIndex >= QPA_MAX_NOISE_LEVELS, "array index out of bounds"); |
328 | |
|
329 | 0 | if (noise < (unsigned) minNoiseLevels[avgIndex]) |
330 | 0 | { |
331 | 0 | minNoiseLevels[avgIndex] = (uint8_t) noise; |
332 | 0 | } |
333 | 0 | } |
334 | | |
335 | | static void clipQPValToEstimatedMinimStats (const uint8_t* minNoiseLevels, const int bitDepth, const unsigned avgValue, |
336 | | const double resFac, const int extraQPOffset, int& QP) // output QP |
337 | 0 | { |
338 | 0 | const unsigned avgIndex = avgValue >> (bitDepth - 3); // one of 8 mean level regions |
339 | 0 | const unsigned x = (1 << 3) - 1; |
340 | 0 | const int32_t dQPOffset = -15; |
341 | |
|
342 | 0 | CHECK (avgIndex >= QPA_MAX_NOISE_LEVELS, "array index out of bounds"); |
343 | |
|
344 | 0 | int i = minNoiseLevels[avgIndex]; |
345 | | |
346 | | // try to "fill in the blanks" in luma range (also results in peak smoothing, as described in PCS 2022 paper) |
347 | 0 | if (avgIndex == 0 && i > minNoiseLevels[0 + 1]) i = minNoiseLevels[0 + 1]; |
348 | 0 | if (avgIndex == x && i > minNoiseLevels[x - 1]) i = minNoiseLevels[x - 1]; |
349 | |
|
350 | 0 | if (avgIndex > 0 && avgIndex < x) |
351 | 0 | { |
352 | 0 | const uint8_t maxNeighborNoiseLevel = std::max (minNoiseLevels[avgIndex - 1], minNoiseLevels[avgIndex + 1]); |
353 | |
|
354 | 0 | if (i > maxNeighborNoiseLevel) i = maxNeighborNoiseLevel; |
355 | 0 | } |
356 | 0 | if (i >= 255) |
357 | 0 | { |
358 | 0 | return; |
359 | 0 | } |
360 | | |
361 | 0 | i = std::max (0, apprI3Log2 (std::min (1.0, resFac) * i * i, false) + dQPOffset + extraQPOffset); // = 6*log2 |
362 | 0 | if (QP < i) |
363 | 0 | { |
364 | 0 | QP = i; |
365 | 0 | } |
366 | 0 | } |
367 | | |
368 | | static int applyDeltaQpPeakSmoothing (Picture* const pic, const VVEncCfg* encCfg, const uint32_t startAddr, const uint32_t endAddr) |
369 | 0 | { |
370 | 0 | const uint32_t ctuWdt = pic->cs->pcv->widthInCtus; |
371 | 0 | const uint32_t ctuEnd = endAddr - ctuWdt; |
372 | 0 | int deltaQpSum = 0, preSmoothQP = 0; |
373 | 0 | std::vector<int> prevQP; |
374 | |
|
375 | 0 | if (ctuWdt == 0 || endAddr <= startAddr + 3u * ctuWdt) return 0; |
376 | | |
377 | 0 | prevQP.resize (ctuWdt); |
378 | |
|
379 | 0 | for (uint32_t ctuRsAddr = startAddr; ctuRsAddr < ctuEnd; ctuRsAddr++) |
380 | 0 | { |
381 | 0 | const int32_t idx = ctuRsAddr % ctuWdt; |
382 | |
|
383 | 0 | if (ctuRsAddr < ctuWdt) prevQP[idx] = pic->ctuAdaptedQP[ctuRsAddr]; |
384 | 0 | else if (idx == 0) preSmoothQP = pic->ctuAdaptedQP[ctuRsAddr]; |
385 | 0 | else if (idx == ctuWdt - 1) |
386 | 0 | { |
387 | 0 | prevQP[idx - 1] = preSmoothQP; |
388 | 0 | prevQP[idx] = pic->ctuAdaptedQP[ctuRsAddr]; |
389 | 0 | } |
390 | 0 | else // no boundary CTU |
391 | 0 | { |
392 | 0 | const int32_t nextIdx = ctuRsAddr + ctuWdt; // max. and min. in 3x3 neighborhood |
393 | 0 | int qpMax = std::max (preSmoothQP, pic->ctuAdaptedQP[ctuRsAddr + 1]); |
394 | 0 | int qpMin = std::min (preSmoothQP, pic->ctuAdaptedQP[ctuRsAddr + 1]); |
395 | |
|
396 | 0 | for (int32_t i = -1; i <= 1; i++) |
397 | 0 | { |
398 | 0 | qpMax = std::max (qpMax, std::max (prevQP[idx + i], pic->ctuAdaptedQP[nextIdx + i])); |
399 | 0 | qpMin = std::min (qpMin, std::min (prevQP[idx + i], pic->ctuAdaptedQP[nextIdx + i])); |
400 | 0 | } |
401 | 0 | prevQP[idx - 1] = preSmoothQP; |
402 | 0 | preSmoothQP = pic->ctuAdaptedQP[ctuRsAddr]; |
403 | |
|
404 | 0 | if (preSmoothQP > qpMax) |
405 | 0 | { |
406 | 0 | pic->ctuQpaLambda[ctuRsAddr] *= 0.793701; // peak: decrease adapted lambda, QP |
407 | 0 | pic->ctuAdaptedQP[ctuRsAddr]--; |
408 | 0 | deltaQpSum--; |
409 | 0 | } |
410 | 0 | if (preSmoothQP < qpMin) |
411 | 0 | { |
412 | 0 | pic->ctuQpaLambda[ctuRsAddr] *= 1.259921; // hole: increase adapted lambda, QP |
413 | 0 | pic->ctuAdaptedQP[ctuRsAddr]++; |
414 | 0 | deltaQpSum++; |
415 | 0 | } |
416 | 0 | } |
417 | 0 | } |
418 | |
|
419 | 0 | prevQP.clear(); |
420 | |
|
421 | 0 | return deltaQpSum; |
422 | 0 | } |
423 | | |
424 | | static int refineDeltaQpDistribution (Picture* const pic, const VVEncCfg* encCfg, const int sliceQP, |
425 | | const double sliceLambda, const int rcQpDiff, const int bitDepth, |
426 | | const uint32_t startAddr, const uint32_t endAddr, const int qpSum, |
427 | | const uint32_t tempLayer, const bool isIntra, const bool isEncPass, |
428 | | const uint8_t* minNoiseLevels, std::vector<int>& ctuAvgLuma) |
429 | 0 | { |
430 | 0 | const double resRatio = (isEncPass ? sqrt (double (encCfg->m_SourceWidth * encCfg->m_SourceHeight) / (3840.0 * 2160.0)) : 0.0); |
431 | 0 | const int ctusInSlice = int (endAddr - startAddr); |
432 | 0 | const int targetQpSum = (encCfg->m_RCTargetBitrate > 0 ? sliceQP * ctusInSlice : qpSum); |
433 | 0 | int blockQpSum = 0, tempLumaQP; |
434 | 0 | double blockLambda; |
435 | 0 | bool isLimited = false; |
436 | |
|
437 | 0 | for (uint32_t ctuRsAddr = startAddr; ctuRsAddr < endAddr; ctuRsAddr++) |
438 | 0 | { |
439 | 0 | int clippedLumaQP = std::max (0, pic->ctuAdaptedQP[ctuRsAddr] + rcQpDiff); |
440 | |
|
441 | 0 | if (isEncPass) |
442 | 0 | { |
443 | 0 | tempLumaQP = clippedLumaQP; // CTU QP before clipping for diff calculation below |
444 | |
|
445 | 0 | clipQPValToEstimatedMinimStats (minNoiseLevels, bitDepth, ctuAvgLuma[ctuRsAddr - startAddr], resRatio, (isIntra ? encCfg->m_intraQPOffset >> 1 : std::min (4, (int) tempLayer)), clippedLumaQP); |
446 | 0 | if (clippedLumaQP > tempLumaQP) |
447 | 0 | { |
448 | 0 | ctuAvgLuma[ctuRsAddr - startAddr] = -1; // mark CTU as being processed already |
449 | 0 | isLimited = isEncPass; |
450 | 0 | } |
451 | 0 | } |
452 | |
|
453 | 0 | clippedLumaQP = std::min (MAX_QP, clippedLumaQP); |
454 | |
|
455 | 0 | blockLambda = sliceLambda * pow (2.0, double (clippedLumaQP - sliceQP) / 3.0); |
456 | 0 | blockQpSum += clippedLumaQP; |
457 | |
|
458 | 0 | pic->ctuQpaLambda[ctuRsAddr] = blockLambda; // store modified CTU lambdas and QPs |
459 | 0 | pic->ctuAdaptedQP[ctuRsAddr] = clippedLumaQP; |
460 | 0 | } |
461 | |
|
462 | 0 | if (blockQpSum > targetQpSum && isLimited) // CTU QPs limited, so distribute saved rate among nonlimited CTUs |
463 | 0 | { |
464 | 0 | int maxCtuQP = 0, minCtuQP = MAX_QP; |
465 | |
|
466 | 0 | for (uint32_t ctuRsAddr = startAddr; ctuRsAddr < endAddr; ctuRsAddr++) // find max |
467 | 0 | { |
468 | 0 | if (ctuAvgLuma[ctuRsAddr - startAddr] >= 0 && pic->ctuAdaptedQP[ctuRsAddr] > maxCtuQP) // nonlimited CTUs |
469 | 0 | { |
470 | 0 | maxCtuQP = pic->ctuAdaptedQP[ctuRsAddr]; |
471 | 0 | } |
472 | 0 | if (pic->ctuAdaptedQP[ctuRsAddr] < minCtuQP) |
473 | 0 | { |
474 | 0 | minCtuQP = pic->ctuAdaptedQP[ctuRsAddr]; |
475 | 0 | } |
476 | 0 | } |
477 | |
|
478 | 0 | minCtuQP = std::max (0, minCtuQP); |
479 | |
|
480 | 0 | while (maxCtuQP > minCtuQP && blockQpSum > targetQpSum) // spend rate starting at max QPs, then go downward |
481 | 0 | { |
482 | 0 | for (uint32_t ctuRsAddr = startAddr; ctuRsAddr < endAddr; ctuRsAddr++) // reduce |
483 | 0 | { |
484 | 0 | if (ctuAvgLuma[ctuRsAddr - startAddr] >= 0 && pic->ctuAdaptedQP[ctuRsAddr] == maxCtuQP) |
485 | 0 | { |
486 | 0 | tempLumaQP = std::max (0, pic->ctuAdaptedQP[ctuRsAddr] - 1); |
487 | |
|
488 | 0 | ctuAvgLuma[ctuRsAddr - startAddr] = -1; // mark CTU as being reduced already |
489 | 0 | blockLambda = sliceLambda * pow (2.0, double (tempLumaQP - sliceQP) / 3.0); |
490 | 0 | if (tempLumaQP < pic->ctuAdaptedQP[ctuRsAddr]) blockQpSum--; |
491 | |
|
492 | 0 | pic->ctuQpaLambda[ctuRsAddr] = blockLambda; // store reduced lambdas and QPs |
493 | 0 | pic->ctuAdaptedQP[ctuRsAddr] = tempLumaQP; |
494 | 0 | } |
495 | |
|
496 | 0 | if (blockQpSum <= targetQpSum) break; |
497 | 0 | } |
498 | |
|
499 | 0 | maxCtuQP--; |
500 | 0 | } |
501 | 0 | } |
502 | |
|
503 | 0 | return (blockQpSum + (ctusInSlice >> 1)) / ctusInSlice; |
504 | 0 | } |
505 | | |
506 | | // public functions |
507 | | |
508 | | int BitAllocation::applyQPAdaptationSlice (const Slice* slice, const VVEncCfg* encCfg, const int sliceQP, |
509 | | const double sliceLambda, uint16_t* const picVisActLuma, |
510 | | std::vector<int>& ctuPumpRedQP, std::vector<uint8_t>* ctuRCQPMemory, |
511 | | int* const optChromaQPOffsets, const uint8_t* minNoiseLevels, |
512 | | const uint32_t ctuStartAddr, const uint32_t ctuBoundingAddr) |
513 | 0 | { |
514 | 0 | Picture* const pic = (slice != nullptr ? slice->pic : nullptr); |
515 | 0 | double hpEner[MAX_NUM_COMP] = {0.0, 0.0, 0.0}; |
516 | 0 | double averageAdaptedLambda = 0.0; |
517 | 0 | int averageAdaptedLumaQP = -1; |
518 | 0 | uint32_t meanLuma = MAX_UINT; |
519 | 0 | std::vector<int> ctuAvgLuma; |
520 | |
|
521 | 0 | if (pic == nullptr || pic->cs == nullptr || encCfg == nullptr || ctuStartAddr >= ctuBoundingAddr) |
522 | 0 | { |
523 | 0 | return -1; |
524 | 0 | } |
525 | | |
526 | 0 | const bool isEncPass = (encCfg->m_LookAhead > 0 && !pic->isPreAnalysis); |
527 | 0 | const bool isHDR = (encCfg->m_HdrMode != vvencHDRMode::VVENC_HDR_OFF) && !(encCfg->m_lumaReshapeEnable != 0 && encCfg->m_reshapeSignalType == RESHAPE_SIGNAL_PQ); |
528 | 0 | const bool isBIM = (encCfg->m_blockImportanceMapping && !pic->m_picShared->m_ctuBimQpOffset.empty()); |
529 | 0 | const bool isSccStrongRC = ((encCfg->m_LookAhead > 0 || encCfg->m_RCNumPasses == 2) && pic->isSccStrong); |
530 | 0 | const bool isHighResolution = (std::min (encCfg->m_SourceWidth, encCfg->m_SourceHeight) > 1280); |
531 | 0 | const bool useFrameWiseQPA = (encCfg->m_QP > MAX_QP_PERCEPT_QPA) && (encCfg->m_framesToBeEncoded != 1) && (slice->TLayer > 0); |
532 | 0 | const uint32_t hpFrameRate = (pic->force2ndOrder ? 32 : encCfg->m_FrameRate / encCfg->m_FrameScale); |
533 | 0 | const int bitDepth = slice->sps->bitDepths[CH_L]; |
534 | 0 | double hpEnerPicNorm = 1.0 / getAveragePictureActivity (encCfg->m_SourceWidth, encCfg->m_SourceHeight, (encCfg->m_RCNumPasses == 2 ? 0 : ctuPumpRedQP.back()), |
535 | 0 | (encCfg->m_internalUsePerceptQPATempFiltISlice || !slice->isIntra()), bitDepth); |
536 | 0 | const PreCalcValues& pcv = *pic->cs->pcv; |
537 | |
|
538 | 0 | pic->picInitialQP = sliceQP; // modified below and used in applyQPAdaptationSubCtu |
539 | 0 | if ((encCfg->m_RCTargetBitrate > 0) && useFrameWiseQPA) |
540 | 0 | { |
541 | 0 | averageAdaptedLumaQP = Clip3 (0, MAX_QP, sliceQP - 1); // one will be added again |
542 | 0 | } |
543 | |
|
544 | 0 | for (uint32_t comp = 0; comp < getNumberValidComponents (pic->chromaFormat); comp++) |
545 | 0 | { |
546 | 0 | const ComponentID compID = (ComponentID) comp; |
547 | |
|
548 | 0 | if (isLuma (compID)) // luma: CTU-wise QPA operation |
549 | 0 | { |
550 | 0 | const PosType guardSize = (isHighResolution ? 2 : 1); |
551 | 0 | unsigned zeroMinActCTUs = 0, picSpVisAct = 0; |
552 | |
|
553 | 0 | for (uint32_t ctuRsAddr = ctuStartAddr; ctuRsAddr < ctuBoundingAddr; ctuRsAddr++) |
554 | 0 | { |
555 | 0 | const Position pos ((ctuRsAddr % pcv.widthInCtus) * pcv.maxCUSize, (ctuRsAddr / pcv.widthInCtus) * pcv.maxCUSize); |
556 | 0 | const CompArea ctuArea = clipArea (CompArea (COMP_Y, pic->chromaFormat, Area (pos.x, pos.y, pcv.maxCUSize, pcv.maxCUSize)), pic->Y()); |
557 | 0 | const SizeType fltWidth = pcv.maxCUSize + guardSize * (pos.x > 0 ? 2 : 1); |
558 | 0 | const SizeType fltHeight = pcv.maxCUSize + guardSize * (pos.y > 0 ? 2 : 1); |
559 | 0 | const CompArea fltArea = clipArea (CompArea (COMP_Y, pic->chromaFormat, Area (pos.x > 0 ? pos.x - guardSize : 0, pos.y > 0 ? pos.y - guardSize : 0, fltWidth, fltHeight)), pic->Y()); |
560 | 0 | const CPelBuf picOrig = pic->getOrigBuf (fltArea); |
561 | 0 | const CPelBuf picPrv1 = pic->getOrigBufPrev (fltArea, PREV_FRAME_1); |
562 | 0 | const CPelBuf picPrv2 = pic->getOrigBufPrev (fltArea, PREV_FRAME_2); |
563 | 0 | unsigned minActivityPart = 0, spVisActCTU = 0; |
564 | |
|
565 | 0 | hpEner[1] = filterAndCalculateAverageActivity (picOrig.buf, picOrig.stride, picOrig.height, picOrig.width, |
566 | 0 | picPrv1.buf, picPrv1.stride, picPrv2.buf, picPrv2.stride, hpFrameRate, |
567 | 0 | bitDepth, isHighResolution, &minActivityPart, &spVisActCTU); |
568 | |
|
569 | 0 | if (minActivityPart == 0) zeroMinActCTUs++; |
570 | |
|
571 | 0 | hpEner[comp] += hpEner[1] * double (ctuArea.width * ctuArea.height); |
572 | 0 | pic->ctuQpaLambda[ctuRsAddr] = hpEner[1]; // temporary backup of CTU mean visual activity |
573 | 0 | pic->ctuAdaptedQP[ctuRsAddr] = (int) pic->getOrigBuf (ctuArea).getAvg(); // and mean luma |
574 | |
|
575 | 0 | if (picOrig.buf == picPrv1.buf) // replace temporal visual activity with min motion error |
576 | 0 | { |
577 | 0 | hpEner[1] = pic->m_picShared->m_minNoiseLevels[pic->ctuAdaptedQP[ctuRsAddr] >> (bitDepth - 3)] * (bitDepth >= 10 ? 1.5 : 0.375); |
578 | |
|
579 | 0 | if (hpEner[1] < (bitDepth >= 10 ? 382.5 : 95.625)) // levels in first frame |
580 | 0 | { |
581 | 0 | hpEner[comp] += hpEner[1] * double (ctuArea.width * ctuArea.height); |
582 | 0 | pic->ctuQpaLambda[ctuRsAddr] += hpEner[1]; // add noise level to mean visual activity |
583 | 0 | } |
584 | 0 | } |
585 | 0 | else if (!isEncPass && (encCfg->m_RCNumPasses == 2 || pic->gopEntry->m_mctfIndex < 0 || !pic->gopEntry->m_isStartOfGop)) |
586 | 0 | { |
587 | 0 | updateMinNoiseLevelsPic (pic->m_picShared->m_minNoiseLevels, bitDepth, pic->ctuAdaptedQP[ctuRsAddr], minActivityPart); |
588 | 0 | } |
589 | 0 | picSpVisAct += spVisActCTU; |
590 | 0 | } |
591 | |
|
592 | 0 | hpEner[comp] /= double (encCfg->m_SourceWidth * encCfg->m_SourceHeight); |
593 | 0 | if (picVisActLuma != nullptr) |
594 | 0 | { |
595 | 0 | *picVisActLuma = ClipBD (uint16_t (0.5 + hpEner[comp]), bitDepth); |
596 | 0 | } |
597 | 0 | if (ctuBoundingAddr > ctuStartAddr) |
598 | 0 | { |
599 | 0 | const uint32_t nCtu = ctuBoundingAddr - ctuStartAddr; |
600 | |
|
601 | 0 | pic->picVA.spatAct[ CH_L ] = ClipBD (uint16_t ((picSpVisAct + (nCtu >> 1)) / nCtu), 12); |
602 | 0 | } |
603 | 0 | if (encCfg->m_internalUsePerceptQPATempFiltISlice && slice->isIntra() && pic->getOrigBuf (compID).buf != pic->getOrigBufPrev (compID, PREV_FRAME_1).buf && zeroMinActCTUs * 2 > ctuBoundingAddr - ctuStartAddr) |
604 | 0 | { |
605 | 0 | hpEnerPicNorm *= sqrt (zeroMinActCTUs * 2.0 / float (ctuBoundingAddr - ctuStartAddr)); // frozen-image mode |
606 | 0 | } |
607 | 0 | } |
608 | 0 | else // chroma: only picture-wise operation required |
609 | 0 | { |
610 | 0 | const CPelBuf picOrig = pic->getOrigBuf (compID); |
611 | 0 | const CPelBuf picPrv1 = pic->getOrigBufPrev (compID, PREV_FRAME_1); |
612 | 0 | const CPelBuf picPrv2 = pic->getOrigBufPrev (compID, PREV_FRAME_2); |
613 | |
|
614 | 0 | hpEner[comp] = filterAndCalculateAverageActivity (picOrig.buf, picOrig.stride, picOrig.height, picOrig.width, |
615 | 0 | picPrv1.buf, picPrv1.stride, picPrv2.buf, picPrv2.stride, hpFrameRate, |
616 | 0 | bitDepth, isHighResolution && (pic->chromaFormat == CHROMA_444)); |
617 | |
|
618 | 0 | const int adaptChromaQPOffset = 1.5 * hpEner[comp] <= hpEner[0] ? 0 : apprI3Log2 (1.5 * hpEner[comp] / hpEner[0], pic->isSccStrong); |
619 | |
|
620 | 0 | if (averageAdaptedLumaQP < 0) // YUV is not 4:0:0! |
621 | 0 | { |
622 | 0 | averageAdaptedLumaQP = Clip3 (0, MAX_QP, sliceQP + apprI3Log2 (hpEner[0] * hpEnerPicNorm, isSccStrongRC)); |
623 | |
|
624 | 0 | if (isChromaEnabled (pic->chromaFormat) && (averageAdaptedLumaQP < MAX_QP)) |
625 | 0 | { |
626 | 0 | averageAdaptedLumaQP += getGlaringColorQPOffset (pic, -1 /*ctuAddr*/, slice->sps->bitDepths[CH_C], meanLuma); |
627 | |
|
628 | 0 | if ((averageAdaptedLumaQP > MAX_QP) && !isHDR) averageAdaptedLumaQP = MAX_QP; |
629 | 0 | } |
630 | | // change mean picture QP index based on picture's average luma value (Sharp) |
631 | 0 | if (isHDR) |
632 | 0 | { |
633 | 0 | if (meanLuma == MAX_UINT) meanLuma = pic->getOrigBuf().Y().getAvg(); |
634 | |
|
635 | 0 | averageAdaptedLumaQP = Clip3 (0, MAX_QP, averageAdaptedLumaQP + lumaDQPOffset (meanLuma, bitDepth)); |
636 | 0 | } |
637 | 0 | } |
638 | |
|
639 | 0 | if (optChromaQPOffsets != nullptr) // adapts sliceChromaQpOffsetIntraOrPeriodic |
640 | 0 | { |
641 | | GCC_WARNING_DISABLE_maybe_uninitialized // probably spurious warning, when building with -fsanitize=undefined: "error: 'encCfg.33' may be used uninitialized in this function" |
642 | 0 | const int lumaChromaMappingDQP = (averageAdaptedLumaQP - slice->sps->chromaQpMappingTable.getMappedChromaQpValue (compID, averageAdaptedLumaQP)) >> (encCfg->m_HdrMode == vvencHDRMode::VVENC_HDR_OFF ? 1 : 2); |
643 | 0 | GCC_WARNING_RESET |
644 | 0 | optChromaQPOffsets[comp - 1] = std::min (3 + lumaChromaMappingDQP, adaptChromaQPOffset + lumaChromaMappingDQP); |
645 | 0 | } |
646 | 0 | } // isLuma or isChroma |
647 | 0 | } |
648 | |
|
649 | 0 | if (averageAdaptedLumaQP < 0) // only if YUV is 4:0:0! |
650 | 0 | { |
651 | 0 | averageAdaptedLumaQP = Clip3 (0, MAX_QP, sliceQP + apprI3Log2 (hpEner[0] * hpEnerPicNorm, isSccStrongRC)); |
652 | | |
653 | | // change mean picture QP index based on the picture's average luma value (Sharp) |
654 | 0 | if (isHDR) |
655 | 0 | { |
656 | 0 | if (meanLuma == MAX_UINT) meanLuma = pic->getOrigBuf().Y().getAvg(); |
657 | |
|
658 | 0 | averageAdaptedLumaQP = Clip3 (0, MAX_QP, averageAdaptedLumaQP + lumaDQPOffset (meanLuma, bitDepth)); |
659 | 0 | } |
660 | 0 | } |
661 | |
|
662 | 0 | if (encCfg->m_RCNumPasses == 2 && (encCfg->m_RCTargetBitrate > 0) && (ctuRCQPMemory != nullptr) && slice->pps->useDQP && (encCfg->m_internalUsePerceptQPATempFiltISlice == 2) && slice->isIntra()) |
663 | 0 | { |
664 | 0 | const int nCtu = int (ctuBoundingAddr - ctuStartAddr); |
665 | 0 | const int offs = (slice->poc / encCfg->m_IntraPeriod) * ((nCtu + 1) >> 1); |
666 | 0 | std::vector<uint8_t>& ctuQPMem = *ctuRCQPMemory; // unpack 1st-pass reduction QPs |
667 | |
|
668 | 0 | if ((ctuPumpRedQP.size() >= nCtu) && (ctuQPMem.size() >= offs + ((nCtu + 1) >> 1))) |
669 | 0 | { |
670 | 0 | for (uint32_t ctuRsAddr = ctuStartAddr; ctuRsAddr < ctuBoundingAddr; ctuRsAddr++) |
671 | 0 | { |
672 | 0 | ctuPumpRedQP[ctuRsAddr] = int ((ctuRsAddr & 1) ? ctuQPMem[offs + (ctuRsAddr >> 1)] >> 4 : ctuQPMem[offs + (ctuRsAddr >> 1)] & 15) - 8; |
673 | 0 | } |
674 | 0 | } |
675 | 0 | } |
676 | |
|
677 | 0 | if (useFrameWiseQPA || (averageAdaptedLumaQP >= MAX_QP)) // store the CTU-wise QP/lambda values |
678 | 0 | { |
679 | 0 | averageAdaptedLumaQP = std::min (MAX_QP, averageAdaptedLumaQP + 1); |
680 | 0 | averageAdaptedLambda = sliceLambda * pow (2.0, double (averageAdaptedLumaQP - sliceQP) / 3.0); |
681 | |
|
682 | 0 | for (uint32_t ctuRsAddr = ctuStartAddr; ctuRsAddr < ctuBoundingAddr; ctuRsAddr++) |
683 | 0 | { |
684 | 0 | pic->ctuQpaLambda[ctuRsAddr] = averageAdaptedLambda; // save adapted lambda, QP |
685 | 0 | pic->ctuAdaptedQP[ctuRsAddr] = averageAdaptedLumaQP; |
686 | 0 | } |
687 | 0 | } |
688 | 0 | else // use CTU-level QPA |
689 | 0 | { |
690 | 0 | const int nCtu = int (ctuBoundingAddr - ctuStartAddr); |
691 | 0 | const int dvsr = encCfg->m_IntraPeriod - encCfg->m_GOPSize; |
692 | 0 | const int aaQP = averageAdaptedLumaQP; // backup of initial average QP from above |
693 | 0 | const bool rcIsFirstPassOf2 = ((encCfg->m_RCTargetBitrate == 0) && (ctuRCQPMemory != nullptr) && slice->pps->useDQP && (slice->poc > 0) ? encCfg->m_RCNumPasses == 2 : false); |
694 | |
|
695 | 0 | if (isEncPass) ctuAvgLuma.resize (nCtu); |
696 | |
|
697 | 0 | averageAdaptedLumaQP = 0; |
698 | 0 | for (uint32_t ctuRsAddr = ctuStartAddr; ctuRsAddr < ctuBoundingAddr; ctuRsAddr++) |
699 | 0 | { |
700 | 0 | const double hpEnerCTU = pic->ctuQpaLambda[ctuRsAddr]; |
701 | 0 | int adaptedLumaQP = Clip3 (0, MAX_QP, sliceQP + apprI3Log2 (hpEnerCTU * hpEnerPicNorm, isSccStrongRC)); |
702 | |
|
703 | 0 | if ((encCfg->m_internalUsePerceptQPATempFiltISlice == 2) && slice->isIntra() && (ctuPumpRedQP.size() > ctuRsAddr)) |
704 | 0 | { |
705 | 0 | if (rcIsFirstPassOf2) // backup 1st-pass I-frame QP for 2nd rate control pass |
706 | 0 | { |
707 | 0 | if (ctuRsAddr & 1) ctuRCQPMemory->back() |= (Clip3 (-8, 7, ctuPumpRedQP[ctuRsAddr]) + 8) << 4; |
708 | 0 | else /*even addr*/ ctuRCQPMemory->push_back (Clip3 (-8, 7, ctuPumpRedQP[ctuRsAddr]) + 8); |
709 | 0 | if (adaptedLumaQP > 0) |
710 | 0 | { |
711 | 0 | adaptedLumaQP -= (aaQP >> 4); // some first-pass tuning for stabilization |
712 | 0 | } |
713 | 0 | } |
714 | 0 | if (ctuPumpRedQP[ctuRsAddr] < 0) adaptedLumaQP = Clip3 (0, MAX_QP, adaptedLumaQP + (ctuPumpRedQP[ctuRsAddr] * encCfg->m_GOPSize - (dvsr >> 1)) / dvsr); |
715 | 0 | else /*ctuPumpRedQP[addr] >= 0*/ adaptedLumaQP = Clip3 (0, MAX_QP, adaptedLumaQP + (ctuPumpRedQP[ctuRsAddr] * encCfg->m_GOPSize + (dvsr >> 1)) / dvsr); |
716 | |
|
717 | 0 | ctuPumpRedQP[ctuRsAddr] = 0; // reset QP memory for temporal pumping analysis |
718 | 0 | } |
719 | 0 | if ((encCfg->m_internalUsePerceptQPATempFiltISlice == 2) && !slice->isIntra() && (slice->TLayer == 0) && rcIsFirstPassOf2 && (adaptedLumaQP < MAX_QP)) |
720 | 0 | { |
721 | 0 | adaptedLumaQP++; // this is a first-pass tuning to stabilize the rate control |
722 | 0 | } |
723 | 0 | meanLuma = MAX_UINT; |
724 | 0 | if (isChromaEnabled (pic->chromaFormat) && (adaptedLumaQP < MAX_QP)) |
725 | 0 | { |
726 | 0 | adaptedLumaQP += getGlaringColorQPOffset (pic, (int) ctuRsAddr, slice->sps->bitDepths[CH_C], meanLuma); |
727 | |
|
728 | 0 | if ((adaptedLumaQP > MAX_QP) && !isHDR) adaptedLumaQP = MAX_QP; |
729 | 0 | } |
730 | | // change the CTU-level QP index based on CTU area's average luma value (Sharp) |
731 | 0 | if (isHDR) |
732 | 0 | { |
733 | 0 | if (meanLuma == MAX_UINT) meanLuma = pic->ctuAdaptedQP[ctuRsAddr]; |
734 | |
|
735 | 0 | adaptedLumaQP = Clip3 (0, MAX_QP, adaptedLumaQP + lumaDQPOffset (meanLuma, bitDepth)); |
736 | 0 | } |
737 | | // add further delta-QP of block importance mapping (BIM) detector if available |
738 | 0 | if (isBIM) |
739 | 0 | { |
740 | 0 | adaptedLumaQP = Clip3 (-slice->sps->qpBDOffset[CH_L], MAX_QP, adaptedLumaQP + pic->m_picShared->m_ctuBimQpOffset[ctuRsAddr]); |
741 | 0 | } |
742 | | // reduce delta-QP variance, avoid wasting precious bit budget at low bit-rates |
743 | 0 | if ((encCfg->m_RCTargetBitrate == 0) && (3 + encCfg->m_QP > MAX_QP_PERCEPT_QPA) && (encCfg->m_framesToBeEncoded != 1)) |
744 | 0 | { |
745 | 0 | const int retunedAdLumaQP = adaptedLumaQP + 1; |
746 | |
|
747 | 0 | adaptedLumaQP = (std::max (0, 1 + MAX_QP_PERCEPT_QPA - encCfg->m_QP) * adaptedLumaQP + std::min (4, 3 + encCfg->m_QP - MAX_QP_PERCEPT_QPA) * aaQP + 2) >> 2; |
748 | 0 | if (adaptedLumaQP > retunedAdLumaQP) adaptedLumaQP = retunedAdLumaQP; |
749 | 0 | if (adaptedLumaQP < MAX_QP && encCfg->m_QP == MAX_QP_PERCEPT_QPA && slice->TLayer > 1) adaptedLumaQP++; // a fine-tuning |
750 | 0 | } |
751 | 0 | if (isEncPass) ctuAvgLuma[ctuRsAddr - ctuStartAddr] = pic->ctuAdaptedQP[ctuRsAddr]; |
752 | |
|
753 | 0 | averageAdaptedLambda = sliceLambda * pow (2.0, double (adaptedLumaQP - sliceQP) / 3.0); |
754 | 0 | averageAdaptedLumaQP += adaptedLumaQP; |
755 | |
|
756 | 0 | pic->ctuQpaLambda[ctuRsAddr] = averageAdaptedLambda; // save adapted lambda, QP |
757 | 0 | pic->ctuAdaptedQP[ctuRsAddr] = adaptedLumaQP; |
758 | 0 | } |
759 | |
|
760 | 0 | if (encCfg->m_cuQpDeltaSubdiv == 0 || !slice->isIntra()) averageAdaptedLumaQP += applyDeltaQpPeakSmoothing (pic, encCfg, ctuStartAddr, ctuBoundingAddr); |
761 | |
|
762 | 0 | meanLuma = std::max (0, averageAdaptedLumaQP); |
763 | 0 | averageAdaptedLumaQP = (meanLuma + (nCtu >> 1)) / nCtu; |
764 | |
|
765 | 0 | if ((encCfg->m_RCTargetBitrate > 0 && averageAdaptedLumaQP != sliceQP) || (isEncPass) || (encCfg->m_LookAhead > 0 && pic->isPreAnalysis) ) // QP/rate control |
766 | 0 | { |
767 | 0 | const int rcQpDiff = (encCfg->m_RCTargetBitrate > 0 || (encCfg->m_LookAhead > 0 && pic->isPreAnalysis) ? sliceQP - averageAdaptedLumaQP : 0); |
768 | |
|
769 | 0 | averageAdaptedLumaQP = refineDeltaQpDistribution (pic, encCfg, sliceQP, sliceLambda, rcQpDiff, bitDepth, ctuStartAddr, ctuBoundingAddr, |
770 | 0 | meanLuma, slice->TLayer, slice->isIntra(), isEncPass, minNoiseLevels, ctuAvgLuma); |
771 | |
|
772 | 0 | pic->picInitialQP = Clip3 (0, MAX_QP, pic->picInitialQP + rcQpDiff); // used in applyQPAdaptationSubCtu |
773 | 0 | pic->isMeanQPLimited = (encCfg->m_RCTargetBitrate > 0) && isEncPass && (averageAdaptedLumaQP > sliceQP); |
774 | 0 | } |
775 | 0 | else if (encCfg->m_rateCap && (pic->gopAdaptedQP > 0) && (averageAdaptedLumaQP < aaQP)) // capped CQF |
776 | 0 | { |
777 | 0 | const int rcQpDiff = aaQP - averageAdaptedLumaQP; |
778 | |
|
779 | 0 | averageAdaptedLambda = pow (2.0, double (rcQpDiff) / 3.0); |
780 | 0 | for (uint32_t ctuRsAddr = ctuStartAddr; ctuRsAddr < ctuBoundingAddr; ctuRsAddr++) |
781 | 0 | { |
782 | 0 | pic->ctuQpaLambda[ctuRsAddr] *= averageAdaptedLambda; // scale adapted lambda |
783 | 0 | pic->ctuAdaptedQP[ctuRsAddr] = std::min (MAX_QP, pic->ctuAdaptedQP[ctuRsAddr] + rcQpDiff); |
784 | 0 | } |
785 | |
|
786 | 0 | pic->picInitialQP = Clip3 (0, MAX_QP, pic->picInitialQP + rcQpDiff); // used in applyQPAdaptationSubCtu |
787 | 0 | averageAdaptedLumaQP = aaQP; |
788 | 0 | } |
789 | 0 | else if ((encCfg->m_RCTargetBitrate == 0) && (3 + encCfg->m_QP > MAX_QP_PERCEPT_QPA) && (encCfg->m_framesToBeEncoded != 1) && (averageAdaptedLumaQP + 1 < aaQP)) |
790 | 0 | { |
791 | 0 | const int lrQpDiff = (aaQP - averageAdaptedLumaQP) >> (encCfg->m_QP <= MAX_QP_PERCEPT_QPA ? 2 : 1); // for monotonous rate change at low rates |
792 | |
|
793 | 0 | averageAdaptedLambda = pow (2.0, double (lrQpDiff) / 3.0); |
794 | 0 | for (uint32_t ctuRsAddr = ctuStartAddr; ctuRsAddr < ctuBoundingAddr; ctuRsAddr++) |
795 | 0 | { |
796 | 0 | pic->ctuQpaLambda[ctuRsAddr] *= averageAdaptedLambda; // scale adapted lambda |
797 | 0 | pic->ctuAdaptedQP[ctuRsAddr] = std::min (MAX_QP, pic->ctuAdaptedQP[ctuRsAddr] + lrQpDiff); |
798 | 0 | } |
799 | |
|
800 | 0 | pic->picInitialQP = Clip3 (0, MAX_QP, pic->picInitialQP + lrQpDiff); // used in applyQPAdaptationSubCtu |
801 | 0 | averageAdaptedLumaQP = aaQP; // TODO hlm: += lrQpDiff? |
802 | |
|
803 | 0 | pic->isMeanQPLimited = false; |
804 | 0 | } |
805 | |
|
806 | 0 | if (isEncPass) ctuAvgLuma.clear(); |
807 | 0 | } // CTU-/frame-level QPA |
808 | |
|
809 | 0 | return averageAdaptedLumaQP; |
810 | 0 | } |
811 | | |
812 | | int BitAllocation::applyQPAdaptationSubCtu (const Slice* slice, const VVEncCfg* encCfg, const Area& lumaArea, const uint8_t* minNoiseLevels) |
813 | 0 | { |
814 | 0 | Picture* const pic = (slice != nullptr ? slice->pic : nullptr); |
815 | 0 | uint32_t meanLuma = MAX_UINT; |
816 | |
|
817 | 0 | if (pic == nullptr || encCfg == nullptr) |
818 | 0 | { |
819 | 0 | return -1; |
820 | 0 | } |
821 | | |
822 | 0 | const bool isEncPass = (encCfg->m_LookAhead > 0 && !pic->isPreAnalysis); |
823 | 0 | const bool isHDR = (encCfg->m_HdrMode != vvencHDRMode::VVENC_HDR_OFF) && !(encCfg->m_lumaReshapeEnable != 0 && encCfg->m_reshapeSignalType == RESHAPE_SIGNAL_PQ); |
824 | 0 | const bool isBIM = (encCfg->m_blockImportanceMapping && !pic->m_picShared->m_ctuBimQpOffset.empty()); |
825 | 0 | const bool isSccStrongRC = ((encCfg->m_LookAhead > 0 || encCfg->m_RCNumPasses == 2) && pic->isSccStrong); |
826 | 0 | const bool isHighResolution = (std::min (encCfg->m_SourceWidth, encCfg->m_SourceHeight) > 1280); |
827 | 0 | const uint32_t hpFrameRate = (pic->force2ndOrder ? 32 : encCfg->m_FrameRate / encCfg->m_FrameScale); |
828 | 0 | const int bitDepth = slice->sps->bitDepths[CH_L]; |
829 | 0 | const PosType guardSize = (isHighResolution ? 2 : 1); |
830 | 0 | const Position pos = lumaArea.pos(); |
831 | 0 | const CompArea subArea = clipArea (CompArea (COMP_Y, pic->chromaFormat, Area (pos.x, pos.y, lumaArea.width, lumaArea.height)), pic->Y()); |
832 | 0 | const SizeType fltWidth = lumaArea.width + guardSize * (pos.x > 0 ? 2 : 1); |
833 | 0 | const SizeType fltHeight = lumaArea.height + guardSize * (pos.y > 0 ? 2 : 1); |
834 | 0 | const CompArea fltArea = clipArea (CompArea (COMP_Y, pic->chromaFormat, Area (pos.x > 0 ? pos.x - guardSize : 0, pos.y > 0 ? pos.y - guardSize : 0, fltWidth, fltHeight)), pic->Y()); |
835 | 0 | const CPelBuf picOrig = pic->getOrigBuf (fltArea); |
836 | 0 | const CPelBuf picPrv1 = pic->getOrigBufPrev (fltArea, PREV_FRAME_1); |
837 | 0 | const CPelBuf picPrv2 = pic->getOrigBufPrev (fltArea, PREV_FRAME_2); |
838 | 0 | const double hpEnerSubCTU = filterAndCalculateAverageActivity (picOrig.buf, picOrig.stride, picOrig.height, picOrig.width, |
839 | 0 | picPrv1.buf, picPrv1.stride, picPrv2.buf, picPrv2.stride, hpFrameRate, |
840 | 0 | bitDepth, isHighResolution); |
841 | 0 | const double hpEnerPicNorm = 1.0 / getAveragePictureActivity (encCfg->m_SourceWidth, encCfg->m_SourceHeight, 0, |
842 | 0 | (encCfg->m_internalUsePerceptQPATempFiltISlice || !slice->isIntra()), bitDepth); |
843 | 0 | int adaptedSubCtuQP = Clip3 (0, MAX_QP, pic->picInitialQP + apprI3Log2 (hpEnerSubCTU * hpEnerPicNorm, isSccStrongRC)); |
844 | |
|
845 | 0 | if (isChromaEnabled (pic->chromaFormat) && (adaptedSubCtuQP < MAX_QP)) |
846 | 0 | { |
847 | 0 | adaptedSubCtuQP += getGlaringColorQPOffsetSubCtu (pic, subArea, slice->sps->bitDepths[CH_C], meanLuma); |
848 | |
|
849 | 0 | if ((adaptedSubCtuQP > MAX_QP) && !isHDR) adaptedSubCtuQP = MAX_QP; |
850 | 0 | } |
851 | | // change the sub-CTU-level QP index based on sub-area's average luma value (Sharp) |
852 | 0 | if (isHDR) |
853 | 0 | { |
854 | 0 | if (meanLuma == MAX_UINT) meanLuma = pic->getOrigBuf (subArea).getAvg(); |
855 | |
|
856 | 0 | adaptedSubCtuQP = Clip3 (0, MAX_QP, adaptedSubCtuQP + lumaDQPOffset (meanLuma, bitDepth)); |
857 | 0 | } |
858 | | // add additional delta-QP of block importance mapping (BIM) detection if available |
859 | 0 | if (isBIM) |
860 | 0 | { |
861 | 0 | adaptedSubCtuQP = Clip3 (-slice->sps->qpBDOffset[CH_L], MAX_QP, adaptedSubCtuQP + pic->m_picShared->m_ctuBimQpOffset[getCtuAddr (pos, *pic->cs->pcv)]); |
862 | 0 | } |
863 | | // reduce the delta-QP variance, avoid wasting precious bit budget at low bit-rates |
864 | 0 | if ((encCfg->m_RCTargetBitrate == 0) && (3 + encCfg->m_QP > MAX_QP_PERCEPT_QPA) && (slice->sliceQp >= 0) && (encCfg->m_framesToBeEncoded != 1)) |
865 | 0 | { |
866 | 0 | const int retunedAdLumaQP = adaptedSubCtuQP + 1; |
867 | |
|
868 | 0 | adaptedSubCtuQP = (std::max (0, 1 + MAX_QP_PERCEPT_QPA - encCfg->m_QP) * adaptedSubCtuQP + std::min (4, 3 + encCfg->m_QP - MAX_QP_PERCEPT_QPA) * slice->sliceQp + 2) >> 2; |
869 | 0 | if (adaptedSubCtuQP > retunedAdLumaQP) adaptedSubCtuQP = retunedAdLumaQP; |
870 | 0 | if (adaptedSubCtuQP < MAX_QP && encCfg->m_QP >= MAX_QP_PERCEPT_QPA) adaptedSubCtuQP++; // for monotonous rate change, l. 507 |
871 | 0 | } |
872 | 0 | if (isEncPass) |
873 | 0 | { |
874 | 0 | const double resRatio = sqrt (double (encCfg->m_SourceWidth * encCfg->m_SourceHeight) / (3840.0 * 2160.0)); |
875 | |
|
876 | 0 | if (meanLuma == MAX_UINT) meanLuma = pic->getOrigBuf (subArea).getAvg(); |
877 | 0 | clipQPValToEstimatedMinimStats (minNoiseLevels, bitDepth, meanLuma, resRatio, (slice->isIntra() ? encCfg->m_intraQPOffset >> 1 : std::min (4, (int) slice->TLayer)), adaptedSubCtuQP); |
878 | 0 | } |
879 | |
|
880 | 0 | return adaptedSubCtuQP; |
881 | 0 | } |
882 | | |
883 | | int BitAllocation::getCtuPumpingReducingQP (const Slice* slice, const CPelBuf& origY, const Distortion uiSadBestForQPA, |
884 | | std::vector<int>& ctuPumpRedQP, const uint32_t ctuRsAddr, const int baseQP, |
885 | | const bool isBIM) |
886 | 0 | { |
887 | 0 | if (slice == nullptr || !slice->pps->useDQP || ctuPumpRedQP.size() <= ctuRsAddr) return 0; |
888 | | |
889 | 0 | const int32_t avgOrig = origY.getAvg(); |
890 | 0 | uint32_t sumAbsZmOrig = 0; // zero-mean |
891 | 0 | const Pel* src = origY.buf; |
892 | |
|
893 | 0 | for (SizeType y = 0; y < origY.height; y++) // sum up the zero-mean absolute values |
894 | 0 | { |
895 | 0 | for (SizeType x = 0; x < origY.width; x++) |
896 | 0 | { |
897 | 0 | sumAbsZmOrig += (uint32_t) abs (src[x] - avgOrig); |
898 | 0 | } |
899 | 0 | src += origY.stride; |
900 | 0 | } |
901 | |
|
902 | 0 | const double sumAbsRatio = double (uiSadBestForQPA * 3 /*TODO: or 4? fine-tune!*/) / double (sumAbsZmOrig == 0 ? 1 : sumAbsZmOrig); |
903 | 0 | const int pumpingReducQP = ((isBIM ? -1 : 0) + int (log (Clip3 (0.25, 4.0, sumAbsRatio)) / log (2.0) + (sumAbsRatio < 1.0 ? -0.5 : 0.5))) >> (baseQP >= 38/*MAX_QP_PERCEPT_QPA*/ ? 1 : 0); |
904 | |
|
905 | 0 | ctuPumpRedQP[ctuRsAddr] += pumpingReducQP; |
906 | |
|
907 | 0 | return pumpingReducQP; |
908 | 0 | } |
909 | | |
910 | | } // namespace vvenc |
911 | | |
912 | | //! \} |