Coverage Report

Created: 2026-06-15 06:25

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/vvenc/source/Lib/EncoderLib/BitAllocation.cpp
Line
Count
Source
1
/* -----------------------------------------------------------------------------
2
The copyright in this software is being made available under the Clear BSD
3
License, included below. No patent rights, trademark rights and/or
4
other Intellectual Property Rights other than the copyrights concerning
5
the Software are granted under this license.
6
7
The Clear BSD License
8
9
Copyright (c) 2019-2026, Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. & The VVenC Authors.
10
All rights reserved.
11
12
Redistribution and use in source and binary forms, with or without modification,
13
are permitted (subject to the limitations in the disclaimer below) provided that
14
the following conditions are met:
15
16
     * Redistributions of source code must retain the above copyright notice,
17
     this list of conditions and the following disclaimer.
18
19
     * Redistributions in binary form must reproduce the above copyright
20
     notice, this list of conditions and the following disclaimer in the
21
     documentation and/or other materials provided with the distribution.
22
23
     * Neither the name of the copyright holder nor the names of its
24
     contributors may be used to endorse or promote products derived from this
25
     software without specific prior written permission.
26
27
NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
28
THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
29
CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
31
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
32
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
33
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
34
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
35
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
36
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38
POSSIBILITY OF SUCH DAMAGE.
39
40
41
------------------------------------------------------------------------------------------- */
42
43
44
/** \file     BitAllocation.cpp
45
\brief    Bit allocation class for QP adaptation and, possibly, rate control
46
*/
47
48
#include "BitAllocation.h"
49
#include "EncStage.h"
50
#include "CommonLib/Picture.h"
51
#include "CommonLib/UnitTools.h"
52
#include <math.h>
53
54
#include "vvenc/vvencCfg.h"
55
56
57
//! \ingroup EncoderLib
58
//! \{
59
60
namespace vvenc {
61
62
// static functions
63
64
static inline int apprI3Log2 (const double d, const bool isSccStrong) // rounded 2*log2(d) or 3*log2(d)
65
9.87k
{
66
9.87k
  const double weight = (isSccStrong ? 2.0 : 3.0);
67
9.87k
  const double dLimit = (isSccStrong ? 5.5e-20 : 1.5e-13);
68
69
9.87k
  return d < dLimit ? -128 : int (floor (weight * log (d) / log (2.0) + 0.5));
70
9.87k
}
71
72
static inline int lumaDQPOffset (const uint32_t avgLumaValue, const uint32_t bitDepth)
73
0
{
74
0
  if (bitDepth > 16 || avgLumaValue >= (1u << bitDepth)) return 0;
75
#if 0
76
  // mapping for peak luminance of ca. 3*400 = 1200 nits
77
  return (2 - int ((9 * uint64_t (avgLumaValue * avgLumaValue)) >> uint64_t (2 * bitDepth)));
78
#else
79
  // mapping for peak luminance of ca. 2*400 =  800 nits
80
0
  return (1 - int ((6 * uint64_t (avgLumaValue * avgLumaValue)) >> uint64_t (2 * bitDepth)));
81
0
#endif
82
0
}
83
84
void calcSpatialVisAct ( const Pel* pSrc,
85
                         const int iSrcStride,
86
                         const int height,
87
                         const int width,
88
                         const uint32_t bitDepth,
89
                         const bool isUHD,
90
                         VisAct& va )
91
9.87k
{
92
9.87k
  CHECK( pSrc == nullptr, "no source buffer given to calculate temporal visual activity" );
93
94
9.87k
  uint64_t saAct;  // spatial absolute activity sum
95
96
  // skip first row as there may be a black border frame
97
9.87k
  pSrc += iSrcStride;
98
99
  // center rows
100
9.87k
  if (isUHD) // high-pass with downsampling
101
0
  {
102
0
    pSrc += iSrcStride;
103
104
0
    saAct = g_pelBufOP.AvgHighPassWithDownsampling (width, height, pSrc, iSrcStride);
105
106
0
    va.hpSpatAct = double (saAct) / double ((width - 4) * (height - 4));
107
0
  }
108
9.87k
  else // HD high-pass without downsampling
109
9.87k
  {
110
9.87k
    saAct = g_pelBufOP.AvgHighPass (width, height, pSrc, iSrcStride);
111
112
9.87k
    va.hpSpatAct = double (saAct) / double ((width - 2) * (height - 2));
113
9.87k
  }
114
115
  // spatial in 12 bit
116
9.87k
  va.spatAct = unsigned (0.5 + va.hpSpatAct * double (bitDepth < 12 ? 1 << (12 - bitDepth) : 1));
117
9.87k
}
118
119
void calcTemporalVisAct ( const Pel* pSrc,
120
                          const int iSrcStride,
121
                          const int height,
122
                          const int width,
123
                          const Pel* pSM1,
124
                          const int iSM1Stride,
125
                          const Pel* pSM2,
126
                          const int iSM2Stride,
127
                          uint32_t frameRate,
128
                          const uint32_t bitDepth,
129
                          const bool isUHD,
130
                          VisAct& va )
131
8.78k
{
132
8.78k
  CHECK( pSrc == nullptr, "no source buffer given to calculate temporal visual activity" );
133
8.78k
  CHECK( pSM1 == nullptr, "no compare buffer given to calculate temporal visual activity" );
134
135
8.78k
  const Pel* pS0 = pSrc;
136
8.78k
  uint64_t taAct;  // temporal absolute activity sum
137
138
  // force 1st-order delta if only prev. frame available
139
8.78k
  if (pSM2 == nullptr || iSM2Stride <= 0) frameRate = 24;
140
141
  // skip first row as there may be a black border frame
142
8.78k
  pSrc += iSrcStride;
143
144
  // center rows
145
8.78k
  if (pS0 == pSM1 && frameRate <= 31)
146
8.78k
  {
147
8.78k
    va.hpTempAct = 0; // bypass high-pass, result will be zero
148
8.78k
  }
149
0
  else if (isUHD)  // downsampled high-pass
150
0
  {
151
0
    const int i2M1Stride = iSM1Stride * 2;
152
153
0
    CHECK (pSM1 == nullptr || iSM1Stride <= 0 || iSM1Stride < width, "Pel buffer pointer pSM1 must not be null!");
154
155
0
    pSrc += iSrcStride;
156
0
    pSM1 += i2M1Stride;
157
0
    if (frameRate <= 31) // 1st-order delta
158
0
    {
159
0
      taAct = g_pelBufOP.AvgHighPassWithDownsamplingDiff1st (width, height, pSrc, pSM1, iSrcStride, iSM1Stride);
160
0
    }
161
0
    else // 2nd-order delta (diff of diffs)
162
0
    {
163
0
      const int i2M2Stride = iSM2Stride * 2;
164
165
0
      CHECK (pSM2 == nullptr || iSM2Stride <= 0 || iSM2Stride < width, "Pel buffer pointer pSM2 must not be null!");
166
167
0
      pSM2 += i2M2Stride;
168
0
      taAct = g_pelBufOP.AvgHighPassWithDownsamplingDiff2nd (width, height, pSrc, pSM1, pSM2, iSrcStride, iSM1Stride, iSM2Stride);
169
0
    }
170
171
0
    va.hpTempAct = double (taAct) / double ((width - 4) * (height - 4));
172
0
  }
173
0
  else // HD high-pass without downsampling
174
0
  {
175
0
    CHECK (pSM1 == nullptr || iSM1Stride <= 0 || iSM1Stride < width, "Pel buffer pointer pSM1 must not be null!");
176
177
0
    pSM1 += iSM1Stride;
178
0
    if (frameRate <= 31) // 1st-order delta
179
0
    {
180
0
      taAct = g_pelBufOP.HDHighPass (width, height, pSrc, pSM1, iSrcStride, iSM1Stride);
181
0
    }
182
0
    else // 2nd-order delta (diff of diffs)
183
0
    {
184
0
      CHECK (pSM2 == nullptr || iSM2Stride <= 0 || iSM2Stride < width, "Pel buffer pointer pSM2 must not be null!");
185
186
0
      pSM2 += iSM2Stride;
187
0
      taAct = g_pelBufOP.HDHighPass2 (width, height, pSrc, pSM1, pSM2, iSrcStride, iSM1Stride, iSM2Stride);
188
0
    }
189
190
0
    va.hpTempAct = double (taAct) / double ((width - 2) * (height - 2));
191
0
  }
192
193
  // temporal in 12 bit
194
8.78k
  va.tempAct = unsigned (0.5 + va.hpTempAct * double (bitDepth < 12 ? 1 << (12 - bitDepth) : 1) * (frameRate <= 31 ? 1.15625 : 1.0));
195
8.78k
}
196
197
void updateVisAct ( VisAct& va, const uint32_t bitDepth )
198
8.78k
{
199
  // minimum part in 12 bit
200
8.78k
  va.minAct = std::min( va.tempAct, va.spatAct );
201
  // lower limit, compensate for high-pass amplification
202
8.78k
  va.hpVisAct = std::max (double (1 << (bitDepth - 6)), va.hpSpatAct + 2.0 * va.hpTempAct);
203
8.78k
  va.visAct   = ClipBD( uint16_t( 0.5 + va.hpVisAct ), bitDepth );
204
8.78k
}
205
206
double filterAndCalculateAverageActivity ( const Pel* pSrc,
207
                                           const int iSrcStride,
208
                                           const int height,
209
                                           const int width,
210
                                           const Pel* pSM1,
211
                                           const int iSM1Stride,
212
                                           const Pel* pSM2,
213
                                           const int iSM2Stride,
214
                                           uint32_t frameRate,
215
                                           const uint32_t bitDepth,
216
                                           const bool isUHD,
217
                                           unsigned* minVisAct = nullptr,
218
                                           unsigned* spVisAct  = nullptr )
219
8.78k
{
220
8.78k
  VisAct va;
221
222
  // spatial activity
223
8.78k
  calcSpatialVisAct( pSrc, iSrcStride, height, width, bitDepth, isUHD, va );
224
225
  // temporal activity
226
8.78k
  calcTemporalVisAct( pSrc, iSrcStride, height, width, pSM1, iSM1Stride, pSM2, iSM2Stride,
227
8.78k
                     frameRate, bitDepth, isUHD, va );
228
229
  // minimum and visual activity
230
8.78k
  updateVisAct( va, bitDepth );
231
232
8.78k
  if( minVisAct )
233
3.33k
  {
234
3.33k
    *minVisAct = va.minAct;
235
3.33k
  }
236
8.78k
  if( spVisAct )
237
3.33k
  {
238
3.33k
    *spVisAct = va.spatAct;
239
3.33k
  }
240
241
8.78k
  return va.hpVisAct;
242
8.78k
}
243
244
static double getAveragePictureActivity (const uint32_t picWidth,  const uint32_t picHeight,
245
                                         const int scaledAverageGopActivity,
246
                                         const bool tempFiltering, const uint32_t bitDepth)
247
4.36k
{
248
4.36k
  if (scaledAverageGopActivity > 0)
249
0
  {
250
0
    return (double (scaledAverageGopActivity) / double (1 << (24 - bitDepth)));
251
0
  }
252
4.36k
  const double hpEnerPic = (tempFiltering ? 32.0 : 16.0) * double (1 << (2 * bitDepth - 10)) * sqrt ((3840.0 * 2160.0) / double (picWidth * picHeight));
253
254
4.36k
  return sqrt (hpEnerPic); // square-root of a_pic value
255
4.36k
}
256
257
static int getGlaringColorQPOffset (Picture* const pic, const int ctuAddr, const int bitDepth, uint32_t &avgLumaValue)
258
4.41k
{
259
4.41k
  const PreCalcValues& pcv  = *pic->cs->pcv;
260
4.41k
  const ChromaFormat chrFmt = pic->chromaFormat;
261
4.41k
  const SizeType chrWidth   = pcv.maxCUSize >> getChannelTypeScaleX (CH_C, chrFmt);
262
4.41k
  const SizeType chrHeight  = pcv.maxCUSize >> getChannelTypeScaleY (CH_C, chrFmt);
263
4.41k
  const unsigned w          = pcv.widthInCtus;
264
4.41k
  const int      midLevel   = 1 << (bitDepth - 1);
265
4.41k
  int chrValue = MAX_INT;
266
267
4.41k
  avgLumaValue = uint32_t ((ctuAddr >= 0) ? pic->ctuAdaptedQP[ctuAddr] : pic->getOrigBuf().Y().getAvg());
268
269
13.2k
  for (uint32_t comp = COMP_Cb; comp < MAX_NUM_COMP; comp++)
270
8.83k
  {
271
8.83k
    const ComponentID compID = (ComponentID) comp;
272
8.83k
    int avgCompValue;
273
274
8.83k
    if (ctuAddr >= 0) // chroma
275
6.66k
    {
276
6.66k
      const CompArea chrArea = clipArea (CompArea (compID, chrFmt, Area ((ctuAddr % w) * chrWidth, (ctuAddr / w) * chrHeight, chrWidth, chrHeight)), pic->block (compID));
277
278
6.66k
      avgCompValue = pic->getOrigBuf (chrArea).getAvg();
279
6.66k
    }
280
2.17k
    else avgCompValue = pic->getOrigBuf (pic->block (compID)).getAvg();
281
282
8.83k
    if (chrValue > avgCompValue) chrValue = avgCompValue; // minimum of the DC offsets
283
8.83k
  }
284
4.41k
  CHECK (chrValue < 0, "mean chroma value cannot be negative!");
285
286
4.41k
  chrValue = (int) avgLumaValue - chrValue;
287
288
4.41k
  if (chrValue > midLevel) return apprI3Log2 (double (chrValue * chrValue) / double (midLevel * midLevel), pic->isSccStrong);
289
290
4.41k
  return 0;
291
4.41k
}
292
293
static int getGlaringColorQPOffsetSubCtu (Picture* const pic, const CompArea& lumaArea, const int bitDepth, uint32_t &avgLumaValue)
294
3.28k
{
295
3.28k
  const ChromaFormat chrFmt = pic->chromaFormat;
296
3.28k
  const SizeType chrWidth   = lumaArea.width  >> getChannelTypeScaleX (CH_C, chrFmt);
297
3.28k
  const SizeType chrHeight  = lumaArea.height >> getChannelTypeScaleY (CH_C, chrFmt);
298
3.28k
  const PosType  chrPosX    = lumaArea.x >> getChannelTypeScaleX (CH_C, chrFmt);
299
3.28k
  const PosType  chrPosY    = lumaArea.y >> getChannelTypeScaleY (CH_C, chrFmt);
300
3.28k
  const int      midLevel   = 1 << (bitDepth - 1);
301
3.28k
  int chrValue = MAX_INT;
302
303
3.28k
  avgLumaValue = pic->getOrigBuf (lumaArea).getAvg();
304
305
9.84k
  for (uint32_t comp = COMP_Cb; comp < MAX_NUM_COMP; comp++)
306
6.56k
  {
307
6.56k
    const ComponentID compID = (ComponentID) comp;
308
6.56k
    const CompArea   chrArea = clipArea (CompArea (compID, chrFmt, Area (chrPosX, chrPosY, chrWidth, chrHeight)), pic->block (compID));
309
310
6.56k
    int avgCompValue = pic->getOrigBuf (chrArea).getAvg();
311
312
6.56k
    if (chrValue > avgCompValue) chrValue = avgCompValue; // minimum of the DC offsets
313
6.56k
  }
314
3.28k
  CHECK (chrValue < 0, "mean chroma value cannot be negative!");
315
316
3.28k
  chrValue = (int) avgLumaValue - chrValue;
317
318
3.28k
  if (chrValue > midLevel) return apprI3Log2 (double (chrValue * chrValue) / double (midLevel * midLevel), pic->isSccStrong);
319
320
3.28k
  return 0;
321
3.28k
}
322
323
static void updateMinNoiseLevelsPic (uint8_t* const minNoiseLevels, const int bitDepth, const unsigned avgValue, const unsigned noise)
324
0
{
325
0
  const unsigned avgIndex = avgValue >> (bitDepth - 3); // one of 8 mean level regions
326
327
0
  CHECK (avgIndex >= QPA_MAX_NOISE_LEVELS, "array index out of bounds");
328
329
0
  if (noise < (unsigned) minNoiseLevels[avgIndex])
330
0
  {
331
0
    minNoiseLevels[avgIndex] = (uint8_t) noise;
332
0
  }
333
0
}
334
335
static void clipQPValToEstimatedMinimStats (const uint8_t* minNoiseLevels, const int bitDepth, const unsigned avgValue,
336
                                            const double resFac, const int extraQPOffset, int& QP) // output QP
337
0
{
338
0
  const unsigned avgIndex = avgValue >> (bitDepth - 3); // one of 8 mean level regions
339
0
  const unsigned x = (1 << 3) - 1;
340
0
  const int32_t dQPOffset = -15;
341
342
0
  CHECK (avgIndex >= QPA_MAX_NOISE_LEVELS, "array index out of bounds");
343
344
0
  int i = minNoiseLevels[avgIndex];
345
346
  // try to "fill in the blanks" in luma range (also results in peak smoothing, as described in PCS 2022 paper)
347
0
  if (avgIndex == 0 && i > minNoiseLevels[0 + 1]) i = minNoiseLevels[0 + 1];
348
0
  if (avgIndex == x && i > minNoiseLevels[x - 1]) i = minNoiseLevels[x - 1];
349
350
0
  if (avgIndex > 0 && avgIndex < x)
351
0
  {
352
0
    const uint8_t maxNeighborNoiseLevel = std::max (minNoiseLevels[avgIndex - 1], minNoiseLevels[avgIndex + 1]);
353
354
0
    if (i > maxNeighborNoiseLevel) i = maxNeighborNoiseLevel;
355
0
  }
356
0
  if (i >= 255)
357
0
  {
358
0
    return;
359
0
  }
360
361
0
  i = std::max (0, apprI3Log2 (std::min (1.0, resFac) * i * i, false) + dQPOffset + extraQPOffset); // = 6*log2
362
0
  if (QP < i)
363
0
  {
364
0
    QP = i;
365
0
  }
366
0
}
367
368
static int applyDeltaQpPeakSmoothing (Picture* const pic, const VVEncCfg* encCfg, const uint32_t startAddr, const uint32_t endAddr)
369
699
{
370
699
  const uint32_t ctuWdt = pic->cs->pcv->widthInCtus;
371
699
  const uint32_t ctuEnd = endAddr - ctuWdt;
372
699
  int deltaQpSum = 0, preSmoothQP = 0;
373
699
  std::vector<int> prevQP;
374
375
699
  if (ctuWdt == 0 || endAddr <= startAddr + 3u * ctuWdt) return 0;
376
377
0
  prevQP.resize (ctuWdt);
378
379
0
  for (uint32_t ctuRsAddr = startAddr; ctuRsAddr < ctuEnd; ctuRsAddr++)
380
0
  {
381
0
    const int32_t idx = ctuRsAddr % ctuWdt;
382
383
0
    if (ctuRsAddr < ctuWdt) prevQP[idx] = pic->ctuAdaptedQP[ctuRsAddr];
384
0
    else if (idx == 0)      preSmoothQP = pic->ctuAdaptedQP[ctuRsAddr];
385
0
    else if (idx == ctuWdt - 1)
386
0
    {
387
0
      prevQP[idx - 1] = preSmoothQP;
388
0
      prevQP[idx] = pic->ctuAdaptedQP[ctuRsAddr];
389
0
    }
390
0
    else // no boundary CTU
391
0
    {
392
0
      const int32_t nextIdx = ctuRsAddr + ctuWdt; // max. and min. in 3x3 neighborhood
393
0
      int qpMax = std::max (preSmoothQP, pic->ctuAdaptedQP[ctuRsAddr + 1]);
394
0
      int qpMin = std::min (preSmoothQP, pic->ctuAdaptedQP[ctuRsAddr + 1]);
395
396
0
      for (int32_t i = -1; i <= 1; i++)
397
0
      {
398
0
        qpMax = std::max (qpMax, std::max (prevQP[idx + i], pic->ctuAdaptedQP[nextIdx + i]));
399
0
        qpMin = std::min (qpMin, std::min (prevQP[idx + i], pic->ctuAdaptedQP[nextIdx + i]));
400
0
      }
401
0
      prevQP[idx - 1] = preSmoothQP;
402
0
      preSmoothQP = pic->ctuAdaptedQP[ctuRsAddr];
403
404
0
      if (preSmoothQP > qpMax)
405
0
      {
406
0
        pic->ctuQpaLambda[ctuRsAddr] *= 0.793701; // peak: decrease adapted lambda, QP
407
0
        pic->ctuAdaptedQP[ctuRsAddr]--;
408
0
        deltaQpSum--;
409
0
      }
410
0
      if (preSmoothQP < qpMin)
411
0
      {
412
0
        pic->ctuQpaLambda[ctuRsAddr] *= 1.259921; // hole: increase adapted lambda, QP
413
0
        pic->ctuAdaptedQP[ctuRsAddr]++;
414
0
        deltaQpSum++;
415
0
      }
416
0
    }
417
0
  }
418
419
0
  prevQP.clear();
420
421
0
  return deltaQpSum;
422
699
}
423
424
static int refineDeltaQpDistribution (Picture* const pic, const VVEncCfg* encCfg,   const int sliceQP,
425
                                      const double sliceLambda, const int rcQpDiff, const int bitDepth,
426
                                      const uint32_t startAddr, const uint32_t endAddr, const int qpSum,
427
                                      const uint32_t tempLayer, const bool isIntra, const bool isEncPass,
428
                                      const uint8_t* minNoiseLevels, std::vector<int>& ctuAvgLuma)
429
0
{
430
0
  const double resRatio = (isEncPass ? sqrt (double (encCfg->m_SourceWidth * encCfg->m_SourceHeight) / (3840.0 * 2160.0)) : 0.0);
431
0
  const int ctusInSlice = int (endAddr - startAddr);
432
0
  const int targetQpSum = (encCfg->m_RCTargetBitrate > 0 ? sliceQP * ctusInSlice : qpSum);
433
0
  int blockQpSum = 0, tempLumaQP;
434
0
  double blockLambda;
435
0
  bool isLimited = false;
436
437
0
  for (uint32_t ctuRsAddr = startAddr; ctuRsAddr < endAddr; ctuRsAddr++)
438
0
  {
439
0
    int clippedLumaQP = std::max (0, pic->ctuAdaptedQP[ctuRsAddr] + rcQpDiff);
440
441
0
    if (isEncPass)
442
0
    {
443
0
      tempLumaQP = clippedLumaQP; // CTU QP before clipping for diff calculation below
444
445
0
      clipQPValToEstimatedMinimStats (minNoiseLevels, bitDepth, ctuAvgLuma[ctuRsAddr - startAddr], resRatio, (isIntra ? encCfg->m_intraQPOffset >> 1 : std::min (4, (int) tempLayer)), clippedLumaQP);
446
0
      if (clippedLumaQP > tempLumaQP)
447
0
      {
448
0
        ctuAvgLuma[ctuRsAddr - startAddr] = -1; // mark CTU as being processed already
449
0
        isLimited = isEncPass;
450
0
      }
451
0
    }
452
453
0
    clippedLumaQP = std::min (MAX_QP, clippedLumaQP);
454
455
0
    blockLambda = sliceLambda * pow (2.0, double (clippedLumaQP - sliceQP) / 3.0);
456
0
    blockQpSum += clippedLumaQP;
457
458
0
    pic->ctuQpaLambda[ctuRsAddr] = blockLambda;  // store modified CTU lambdas and QPs
459
0
    pic->ctuAdaptedQP[ctuRsAddr] = clippedLumaQP;
460
0
  }
461
462
0
  if (blockQpSum > targetQpSum && isLimited) // CTU QPs limited, so distribute saved rate among nonlimited CTUs
463
0
  {
464
0
    int maxCtuQP = 0, minCtuQP = MAX_QP;
465
466
0
    for (uint32_t ctuRsAddr = startAddr; ctuRsAddr < endAddr; ctuRsAddr++) // find max
467
0
    {
468
0
      if (ctuAvgLuma[ctuRsAddr - startAddr] >= 0 && pic->ctuAdaptedQP[ctuRsAddr] > maxCtuQP) // nonlimited CTUs
469
0
      {
470
0
        maxCtuQP = pic->ctuAdaptedQP[ctuRsAddr];
471
0
      }
472
0
      if (pic->ctuAdaptedQP[ctuRsAddr] < minCtuQP)
473
0
      {
474
0
        minCtuQP = pic->ctuAdaptedQP[ctuRsAddr];
475
0
      }
476
0
    }
477
478
0
    minCtuQP = std::max (0, minCtuQP);
479
480
0
    while (maxCtuQP > minCtuQP && blockQpSum > targetQpSum) // spend rate starting at max QPs, then go downward
481
0
    {
482
0
      for (uint32_t ctuRsAddr = startAddr; ctuRsAddr < endAddr; ctuRsAddr++) // reduce
483
0
      {
484
0
        if (ctuAvgLuma[ctuRsAddr - startAddr] >= 0 && pic->ctuAdaptedQP[ctuRsAddr] == maxCtuQP)
485
0
        {
486
0
          tempLumaQP = std::max (0, pic->ctuAdaptedQP[ctuRsAddr] - 1);
487
488
0
          ctuAvgLuma[ctuRsAddr - startAddr] = -1; // mark CTU as being reduced already
489
0
          blockLambda = sliceLambda * pow (2.0, double (tempLumaQP - sliceQP) / 3.0);
490
0
          if (tempLumaQP < pic->ctuAdaptedQP[ctuRsAddr]) blockQpSum--;
491
492
0
          pic->ctuQpaLambda[ctuRsAddr] = blockLambda; // store reduced lambdas and QPs
493
0
          pic->ctuAdaptedQP[ctuRsAddr] = tempLumaQP;
494
0
        }
495
496
0
        if (blockQpSum <= targetQpSum) break;
497
0
      }
498
499
0
      maxCtuQP--;
500
0
    }
501
0
  }
502
503
0
  return (blockQpSum + (ctusInSlice >> 1)) / ctusInSlice;
504
0
}
505
506
// public functions
507
508
int BitAllocation::applyQPAdaptationSlice (const Slice* slice, const VVEncCfg* encCfg, const int sliceQP,
509
                                           const double sliceLambda, uint16_t* const picVisActLuma,
510
                                           std::vector<int>& ctuPumpRedQP, std::vector<uint8_t>* ctuRCQPMemory,
511
                                           int* const optChromaQPOffsets, const uint8_t* minNoiseLevels,
512
                                           const uint32_t ctuStartAddr, const uint32_t ctuBoundingAddr)
513
1.08k
{
514
1.08k
  Picture* const pic          = (slice != nullptr ? slice->pic : nullptr);
515
1.08k
  double hpEner[MAX_NUM_COMP] = {0.0, 0.0, 0.0};
516
1.08k
  double averageAdaptedLambda = 0.0;
517
1.08k
  int    averageAdaptedLumaQP = -1;
518
1.08k
  uint32_t meanLuma           = MAX_UINT;
519
1.08k
  std::vector<int> ctuAvgLuma;
520
521
1.08k
  if (pic == nullptr || pic->cs == nullptr || encCfg == nullptr || ctuStartAddr >= ctuBoundingAddr)
522
0
  {
523
0
    return -1;
524
0
  }
525
526
1.08k
  const bool isEncPass        = (encCfg->m_LookAhead > 0 && !pic->isPreAnalysis);
527
1.08k
  const bool isHDR            = (encCfg->m_HdrMode != vvencHDRMode::VVENC_HDR_OFF) && !(encCfg->m_lumaReshapeEnable != 0 && encCfg->m_reshapeSignalType == RESHAPE_SIGNAL_PQ);
528
1.08k
  const bool isBIM            = (encCfg->m_blockImportanceMapping && !pic->m_picShared->m_ctuBimQpOffset.empty());
529
1.08k
  const bool isSccStrongRC    = ((encCfg->m_LookAhead > 0 || encCfg->m_RCNumPasses == 2) && pic->isSccStrong);
530
1.08k
  const bool isHighResolution = (std::min (encCfg->m_SourceWidth, encCfg->m_SourceHeight) > 1280);
531
1.08k
  const bool useFrameWiseQPA  = (encCfg->m_QP > MAX_QP_PERCEPT_QPA) && (encCfg->m_framesToBeEncoded != 1) && (slice->TLayer > 0);
532
1.08k
  const uint32_t hpFrameRate  = (pic->force2ndOrder ? 32 : encCfg->m_FrameRate / encCfg->m_FrameScale);
533
1.08k
  const int  bitDepth         = slice->sps->bitDepths[CH_L];
534
1.08k
  double hpEnerPicNorm        = 1.0 / getAveragePictureActivity (encCfg->m_SourceWidth, encCfg->m_SourceHeight, (encCfg->m_RCNumPasses == 2 ? 0 : ctuPumpRedQP.back()),
535
1.08k
                                                                 (encCfg->m_internalUsePerceptQPATempFiltISlice || !slice->isIntra()), bitDepth);
536
1.08k
  const PreCalcValues& pcv    = *pic->cs->pcv;
537
538
1.08k
  pic->picInitialQP = sliceQP;  // modified below and used in applyQPAdaptationSubCtu
539
1.08k
  if ((encCfg->m_RCTargetBitrate > 0) && useFrameWiseQPA)
540
0
  {
541
0
    averageAdaptedLumaQP = Clip3 (0, MAX_QP, sliceQP - 1); // one will be added again
542
0
  }
543
544
4.34k
  for (uint32_t comp = 0; comp < getNumberValidComponents (pic->chromaFormat); comp++)
545
3.25k
  {
546
3.25k
    const ComponentID compID  = (ComponentID) comp;
547
548
3.25k
    if (isLuma (compID)) // luma: CTU-wise QPA operation
549
1.08k
    {
550
1.08k
      const PosType guardSize = (isHighResolution ? 2 : 1);
551
1.08k
      unsigned zeroMinActCTUs = 0, picSpVisAct = 0;
552
553
4.41k
      for (uint32_t ctuRsAddr = ctuStartAddr; ctuRsAddr < ctuBoundingAddr; ctuRsAddr++)
554
3.33k
      {
555
3.33k
        const Position pos ((ctuRsAddr % pcv.widthInCtus) * pcv.maxCUSize, (ctuRsAddr / pcv.widthInCtus) * pcv.maxCUSize);
556
3.33k
        const CompArea ctuArea   = clipArea (CompArea (COMP_Y, pic->chromaFormat, Area (pos.x, pos.y, pcv.maxCUSize, pcv.maxCUSize)), pic->Y());
557
3.33k
        const SizeType fltWidth  = pcv.maxCUSize + guardSize * (pos.x > 0 ? 2 : 1);
558
3.33k
        const SizeType fltHeight = pcv.maxCUSize + guardSize * (pos.y > 0 ? 2 : 1);
559
3.33k
        const CompArea fltArea   = clipArea (CompArea (COMP_Y, pic->chromaFormat, Area (pos.x > 0 ? pos.x - guardSize : 0, pos.y > 0 ? pos.y - guardSize : 0, fltWidth, fltHeight)), pic->Y());
560
3.33k
        const CPelBuf  picOrig   = pic->getOrigBuf (fltArea);
561
3.33k
        const CPelBuf  picPrv1   = pic->getOrigBufPrev (fltArea, PREV_FRAME_1);
562
3.33k
        const CPelBuf  picPrv2   = pic->getOrigBufPrev (fltArea, PREV_FRAME_2);
563
3.33k
        unsigned minActivityPart = 0, spVisActCTU = 0;
564
565
3.33k
        hpEner[1] = filterAndCalculateAverageActivity (picOrig.buf, picOrig.stride, picOrig.height, picOrig.width,
566
3.33k
                                                       picPrv1.buf, picPrv1.stride, picPrv2.buf, picPrv2.stride, hpFrameRate,
567
3.33k
                                                       bitDepth, isHighResolution, &minActivityPart, &spVisActCTU);
568
569
3.33k
        if (minActivityPart == 0) zeroMinActCTUs++;
570
571
3.33k
        hpEner[comp] += hpEner[1] * double (ctuArea.width * ctuArea.height);
572
3.33k
        pic->ctuQpaLambda[ctuRsAddr] = hpEner[1]; // temporary backup of CTU mean visual activity
573
3.33k
        pic->ctuAdaptedQP[ctuRsAddr] = (int) pic->getOrigBuf (ctuArea).getAvg(); // and mean luma
574
575
3.33k
        if (picOrig.buf == picPrv1.buf) // replace temporal visual activity with min motion error
576
3.33k
        {
577
3.33k
          hpEner[1] = pic->m_picShared->m_minNoiseLevels[pic->ctuAdaptedQP[ctuRsAddr] >> (bitDepth - 3)] * (bitDepth >= 10 ? 1.5 : 0.375);
578
579
3.33k
          if (hpEner[1] < (bitDepth >= 10 ? 382.5 : 95.625)) // levels in first frame
580
0
          {
581
0
            hpEner[comp] += hpEner[1] * double (ctuArea.width * ctuArea.height);
582
0
            pic->ctuQpaLambda[ctuRsAddr] += hpEner[1]; // add noise level to mean visual activity
583
0
          }
584
3.33k
        }
585
0
        else if (!isEncPass && (encCfg->m_RCNumPasses == 2 || pic->gopEntry->m_mctfIndex < 0 || !pic->gopEntry->m_isStartOfGop))
586
0
        {
587
0
          updateMinNoiseLevelsPic (pic->m_picShared->m_minNoiseLevels, bitDepth, pic->ctuAdaptedQP[ctuRsAddr], minActivityPart);
588
0
        }
589
3.33k
        picSpVisAct += spVisActCTU;
590
3.33k
      }
591
592
1.08k
      hpEner[comp] /= double (encCfg->m_SourceWidth * encCfg->m_SourceHeight);
593
1.08k
      if (picVisActLuma != nullptr)
594
1.08k
      {
595
1.08k
        *picVisActLuma = ClipBD (uint16_t (0.5 + hpEner[comp]), bitDepth);
596
1.08k
      }
597
1.08k
      if (ctuBoundingAddr > ctuStartAddr)
598
1.08k
      {
599
1.08k
        const uint32_t nCtu = ctuBoundingAddr - ctuStartAddr;
600
601
1.08k
        pic->picVA.spatAct[ CH_L ] = ClipBD (uint16_t ((picSpVisAct + (nCtu >> 1)) / nCtu), 12);
602
1.08k
      }
603
1.08k
      if (encCfg->m_internalUsePerceptQPATempFiltISlice && slice->isIntra() && pic->getOrigBuf (compID).buf != pic->getOrigBufPrev (compID, PREV_FRAME_1).buf && zeroMinActCTUs * 2 > ctuBoundingAddr - ctuStartAddr)
604
0
      {
605
0
        hpEnerPicNorm *= sqrt (zeroMinActCTUs * 2.0 / float (ctuBoundingAddr - ctuStartAddr)); // frozen-image mode
606
0
      }
607
1.08k
    }
608
2.17k
    else // chroma: only picture-wise operation required
609
2.17k
    {
610
2.17k
      const CPelBuf picOrig = pic->getOrigBuf (compID);
611
2.17k
      const CPelBuf picPrv1 = pic->getOrigBufPrev (compID, PREV_FRAME_1);
612
2.17k
      const CPelBuf picPrv2 = pic->getOrigBufPrev (compID, PREV_FRAME_2);
613
614
2.17k
      hpEner[comp] = filterAndCalculateAverageActivity (picOrig.buf, picOrig.stride, picOrig.height, picOrig.width,
615
2.17k
                                                        picPrv1.buf, picPrv1.stride, picPrv2.buf, picPrv2.stride, hpFrameRate,
616
2.17k
                                                        bitDepth, isHighResolution && (pic->chromaFormat == CHROMA_444));
617
618
2.17k
      const int adaptChromaQPOffset = 1.5 * hpEner[comp] <= hpEner[0] ? 0 : apprI3Log2 (1.5 * hpEner[comp] / hpEner[0], pic->isSccStrong);
619
620
2.17k
      if (averageAdaptedLumaQP < 0) // YUV is not 4:0:0!
621
1.08k
      {
622
1.08k
        averageAdaptedLumaQP = Clip3 (0, MAX_QP, sliceQP + apprI3Log2 (hpEner[0] * hpEnerPicNorm, isSccStrongRC));
623
624
1.08k
        if (isChromaEnabled (pic->chromaFormat) && (averageAdaptedLumaQP < MAX_QP))
625
1.08k
        {
626
1.08k
          averageAdaptedLumaQP += getGlaringColorQPOffset (pic, -1 /*ctuAddr*/, slice->sps->bitDepths[CH_C], meanLuma);
627
628
1.08k
          if ((averageAdaptedLumaQP > MAX_QP) && !isHDR) averageAdaptedLumaQP = MAX_QP;
629
1.08k
        }
630
        // change mean picture QP index based on picture's average luma value (Sharp)
631
1.08k
        if (isHDR)
632
0
        {
633
0
          if (meanLuma == MAX_UINT) meanLuma = pic->getOrigBuf().Y().getAvg();
634
635
0
          averageAdaptedLumaQP = Clip3 (0, MAX_QP, averageAdaptedLumaQP + lumaDQPOffset (meanLuma, bitDepth));
636
0
        }
637
1.08k
      }
638
639
2.17k
      if (optChromaQPOffsets != nullptr) // adapts sliceChromaQpOffsetIntraOrPeriodic
640
2.17k
      {
641
        GCC_WARNING_DISABLE_maybe_uninitialized // probably spurious warning, when building with -fsanitize=undefined: "error: 'encCfg.33' may be used uninitialized in this function"
642
2.17k
        const int lumaChromaMappingDQP = (averageAdaptedLumaQP - slice->sps->chromaQpMappingTable.getMappedChromaQpValue (compID, averageAdaptedLumaQP)) >> (encCfg->m_HdrMode == vvencHDRMode::VVENC_HDR_OFF ? 1 : 2);
643
2.17k
        GCC_WARNING_RESET
644
2.17k
        optChromaQPOffsets[comp - 1] = std::min (3 + lumaChromaMappingDQP, adaptChromaQPOffset + lumaChromaMappingDQP);
645
2.17k
      }
646
2.17k
    } // isLuma or isChroma
647
3.25k
  }
648
649
1.08k
  if (averageAdaptedLumaQP < 0) // only if YUV is 4:0:0!
650
0
  {
651
0
    averageAdaptedLumaQP = Clip3 (0, MAX_QP, sliceQP + apprI3Log2 (hpEner[0] * hpEnerPicNorm, isSccStrongRC));
652
653
    // change mean picture QP index based on the picture's average luma value (Sharp)
654
0
    if (isHDR)
655
0
    {
656
0
      if (meanLuma == MAX_UINT) meanLuma = pic->getOrigBuf().Y().getAvg();
657
658
0
      averageAdaptedLumaQP = Clip3 (0, MAX_QP, averageAdaptedLumaQP + lumaDQPOffset (meanLuma, bitDepth));
659
0
    }
660
0
  }
661
662
1.08k
  if (encCfg->m_RCNumPasses == 2 && (encCfg->m_RCTargetBitrate > 0) && (ctuRCQPMemory != nullptr) && slice->pps->useDQP && (encCfg->m_internalUsePerceptQPATempFiltISlice == 2) && slice->isIntra())
663
0
  {
664
0
    const int nCtu = int (ctuBoundingAddr - ctuStartAddr);
665
0
    const int offs = (slice->poc / encCfg->m_IntraPeriod) * ((nCtu + 1) >> 1);
666
0
    std::vector<uint8_t>& ctuQPMem = *ctuRCQPMemory; // unpack 1st-pass reduction QPs
667
668
0
    if ((ctuPumpRedQP.size() >= nCtu) && (ctuQPMem.size() >= offs + ((nCtu + 1) >> 1)))
669
0
    {
670
0
      for (uint32_t ctuRsAddr = ctuStartAddr; ctuRsAddr < ctuBoundingAddr; ctuRsAddr++)
671
0
      {
672
0
        ctuPumpRedQP[ctuRsAddr] = int ((ctuRsAddr & 1) ? ctuQPMem[offs + (ctuRsAddr >> 1)] >> 4 : ctuQPMem[offs + (ctuRsAddr >> 1)] & 15) - 8;
673
0
      }
674
0
    }
675
0
  }
676
677
1.08k
  if (useFrameWiseQPA || (averageAdaptedLumaQP >= MAX_QP)) // store the CTU-wise QP/lambda values
678
0
  {
679
0
    averageAdaptedLumaQP = std::min (MAX_QP, averageAdaptedLumaQP + 1);
680
0
    averageAdaptedLambda = sliceLambda * pow (2.0, double (averageAdaptedLumaQP - sliceQP) / 3.0);
681
682
0
    for (uint32_t ctuRsAddr = ctuStartAddr; ctuRsAddr < ctuBoundingAddr; ctuRsAddr++)
683
0
    {
684
0
      pic->ctuQpaLambda[ctuRsAddr] = averageAdaptedLambda; // save adapted lambda, QP
685
0
      pic->ctuAdaptedQP[ctuRsAddr] = averageAdaptedLumaQP;
686
0
    }
687
0
  }
688
1.08k
  else // use CTU-level QPA
689
1.08k
  {
690
1.08k
    const int nCtu = int (ctuBoundingAddr - ctuStartAddr);
691
1.08k
    const int dvsr = encCfg->m_IntraPeriod - encCfg->m_GOPSize;
692
1.08k
    const int aaQP = averageAdaptedLumaQP; // backup of initial average QP from above
693
1.08k
    const bool rcIsFirstPassOf2 = ((encCfg->m_RCTargetBitrate == 0) && (ctuRCQPMemory != nullptr) && slice->pps->useDQP && (slice->poc > 0) ? encCfg->m_RCNumPasses == 2 : false);
694
695
1.08k
    if (isEncPass) ctuAvgLuma.resize (nCtu);
696
697
1.08k
    averageAdaptedLumaQP = 0;
698
4.41k
    for (uint32_t ctuRsAddr = ctuStartAddr; ctuRsAddr < ctuBoundingAddr; ctuRsAddr++)
699
3.33k
    {
700
3.33k
      const double hpEnerCTU = pic->ctuQpaLambda[ctuRsAddr];
701
3.33k
      int adaptedLumaQP = Clip3 (0, MAX_QP, sliceQP + apprI3Log2 (hpEnerCTU * hpEnerPicNorm, isSccStrongRC));
702
703
3.33k
      if ((encCfg->m_internalUsePerceptQPATempFiltISlice == 2) && slice->isIntra() && (ctuPumpRedQP.size() > ctuRsAddr))
704
0
      {
705
0
        if (rcIsFirstPassOf2) // backup 1st-pass I-frame QP for 2nd rate control pass
706
0
        {
707
0
          if (ctuRsAddr & 1) ctuRCQPMemory->back() |= (Clip3 (-8, 7, ctuPumpRedQP[ctuRsAddr]) + 8) << 4;
708
0
          else /*even addr*/ ctuRCQPMemory->push_back (Clip3 (-8, 7, ctuPumpRedQP[ctuRsAddr]) + 8);
709
0
          if (adaptedLumaQP > 0)
710
0
          {
711
0
            adaptedLumaQP -= (aaQP >> 4); // some first-pass tuning for stabilization
712
0
          }
713
0
        }
714
0
        if (ctuPumpRedQP[ctuRsAddr] < 0) adaptedLumaQP = Clip3 (0, MAX_QP, adaptedLumaQP + (ctuPumpRedQP[ctuRsAddr] * encCfg->m_GOPSize - (dvsr >> 1)) / dvsr);
715
0
        else /*ctuPumpRedQP[addr] >= 0*/ adaptedLumaQP = Clip3 (0, MAX_QP, adaptedLumaQP + (ctuPumpRedQP[ctuRsAddr] * encCfg->m_GOPSize + (dvsr >> 1)) / dvsr);
716
717
0
        ctuPumpRedQP[ctuRsAddr] = 0; // reset QP memory for temporal pumping analysis
718
0
      }
719
3.33k
      if ((encCfg->m_internalUsePerceptQPATempFiltISlice == 2) && !slice->isIntra() && (slice->TLayer == 0) && rcIsFirstPassOf2 && (adaptedLumaQP < MAX_QP))
720
0
      {
721
0
        adaptedLumaQP++; // this is a first-pass tuning to stabilize the rate control
722
0
      }
723
3.33k
      meanLuma = MAX_UINT;
724
3.33k
      if (isChromaEnabled (pic->chromaFormat) && (adaptedLumaQP < MAX_QP))
725
3.33k
      {
726
3.33k
        adaptedLumaQP += getGlaringColorQPOffset (pic, (int) ctuRsAddr, slice->sps->bitDepths[CH_C], meanLuma);
727
728
3.33k
        if ((adaptedLumaQP > MAX_QP) && !isHDR) adaptedLumaQP = MAX_QP;
729
3.33k
      }
730
      // change the CTU-level QP index based on CTU area's average luma value (Sharp)
731
3.33k
      if (isHDR)
732
0
      {
733
0
        if (meanLuma == MAX_UINT) meanLuma = pic->ctuAdaptedQP[ctuRsAddr];
734
735
0
        adaptedLumaQP = Clip3 (0, MAX_QP, adaptedLumaQP + lumaDQPOffset (meanLuma, bitDepth));
736
0
      }
737
      // add further delta-QP of block importance mapping (BIM) detector if available
738
3.33k
      if (isBIM)
739
0
      {
740
0
        adaptedLumaQP = Clip3 (-slice->sps->qpBDOffset[CH_L], MAX_QP, adaptedLumaQP + pic->m_picShared->m_ctuBimQpOffset[ctuRsAddr]);
741
0
      }
742
      // reduce delta-QP variance, avoid wasting precious bit budget at low bit-rates
743
3.33k
      if ((encCfg->m_RCTargetBitrate == 0) && (3 + encCfg->m_QP > MAX_QP_PERCEPT_QPA) && (encCfg->m_framesToBeEncoded != 1))
744
539
      {
745
539
        const int retunedAdLumaQP = adaptedLumaQP + 1;
746
747
539
        adaptedLumaQP = (std::max (0, 1 + MAX_QP_PERCEPT_QPA - encCfg->m_QP) * adaptedLumaQP + std::min (4, 3 + encCfg->m_QP - MAX_QP_PERCEPT_QPA) * aaQP + 2) >> 2;
748
539
        if (adaptedLumaQP > retunedAdLumaQP) adaptedLumaQP = retunedAdLumaQP;
749
539
        if (adaptedLumaQP < MAX_QP && encCfg->m_QP == MAX_QP_PERCEPT_QPA && slice->TLayer > 1) adaptedLumaQP++; // a fine-tuning
750
539
      }
751
3.33k
      if (isEncPass) ctuAvgLuma[ctuRsAddr - ctuStartAddr] = pic->ctuAdaptedQP[ctuRsAddr];
752
753
3.33k
      averageAdaptedLambda = sliceLambda * pow (2.0, double (adaptedLumaQP - sliceQP) / 3.0);
754
3.33k
      averageAdaptedLumaQP += adaptedLumaQP;
755
756
3.33k
      pic->ctuQpaLambda[ctuRsAddr] = averageAdaptedLambda; // save adapted lambda, QP
757
3.33k
      pic->ctuAdaptedQP[ctuRsAddr] = adaptedLumaQP;
758
3.33k
    }
759
760
1.08k
    if (encCfg->m_cuQpDeltaSubdiv == 0 || !slice->isIntra()) averageAdaptedLumaQP += applyDeltaQpPeakSmoothing (pic, encCfg, ctuStartAddr, ctuBoundingAddr);
761
762
1.08k
    meanLuma = std::max (0, averageAdaptedLumaQP);
763
1.08k
    averageAdaptedLumaQP = (meanLuma + (nCtu >> 1)) / nCtu;
764
765
1.08k
    if ((encCfg->m_RCTargetBitrate > 0 && averageAdaptedLumaQP != sliceQP) || (isEncPass) || (encCfg->m_LookAhead > 0 && pic->isPreAnalysis) ) // QP/rate control
766
0
    {
767
0
      const int rcQpDiff = (encCfg->m_RCTargetBitrate > 0 || (encCfg->m_LookAhead > 0 && pic->isPreAnalysis) ? sliceQP - averageAdaptedLumaQP : 0);
768
769
0
      averageAdaptedLumaQP = refineDeltaQpDistribution (pic, encCfg, sliceQP, sliceLambda, rcQpDiff, bitDepth, ctuStartAddr, ctuBoundingAddr,
770
0
                                                        meanLuma, slice->TLayer, slice->isIntra(), isEncPass, minNoiseLevels, ctuAvgLuma);
771
772
0
      pic->picInitialQP = Clip3 (0, MAX_QP, pic->picInitialQP + rcQpDiff); // used in applyQPAdaptationSubCtu
773
0
      pic->isMeanQPLimited = (encCfg->m_RCTargetBitrate > 0) && isEncPass && (averageAdaptedLumaQP > sliceQP);
774
0
    }
775
1.08k
    else if (encCfg->m_rateCap && (pic->gopAdaptedQP > 0) && (averageAdaptedLumaQP < aaQP)) // capped CQF
776
0
    {
777
0
      const int rcQpDiff = aaQP - averageAdaptedLumaQP;
778
779
0
      averageAdaptedLambda = pow (2.0, double (rcQpDiff) / 3.0);
780
0
      for (uint32_t ctuRsAddr = ctuStartAddr; ctuRsAddr < ctuBoundingAddr; ctuRsAddr++)
781
0
      {
782
0
        pic->ctuQpaLambda[ctuRsAddr] *= averageAdaptedLambda; // scale adapted lambda
783
0
        pic->ctuAdaptedQP[ctuRsAddr] = std::min (MAX_QP, pic->ctuAdaptedQP[ctuRsAddr] + rcQpDiff);
784
0
      }
785
786
0
      pic->picInitialQP = Clip3 (0, MAX_QP, pic->picInitialQP + rcQpDiff); // used in applyQPAdaptationSubCtu
787
0
      averageAdaptedLumaQP = aaQP;
788
0
    }
789
1.08k
    else if ((encCfg->m_RCTargetBitrate == 0) && (3 + encCfg->m_QP > MAX_QP_PERCEPT_QPA) && (encCfg->m_framesToBeEncoded != 1) && (averageAdaptedLumaQP + 1 < aaQP))
790
0
    {
791
0
      const int lrQpDiff = (aaQP - averageAdaptedLumaQP) >> (encCfg->m_QP <= MAX_QP_PERCEPT_QPA ? 2 : 1); // for monotonous rate change at low rates
792
793
0
      averageAdaptedLambda = pow (2.0, double (lrQpDiff) / 3.0);
794
0
      for (uint32_t ctuRsAddr = ctuStartAddr; ctuRsAddr < ctuBoundingAddr; ctuRsAddr++)
795
0
      {
796
0
        pic->ctuQpaLambda[ctuRsAddr] *= averageAdaptedLambda; // scale adapted lambda
797
0
        pic->ctuAdaptedQP[ctuRsAddr] = std::min (MAX_QP, pic->ctuAdaptedQP[ctuRsAddr] + lrQpDiff);
798
0
      }
799
800
0
      pic->picInitialQP = Clip3 (0, MAX_QP, pic->picInitialQP + lrQpDiff); // used in applyQPAdaptationSubCtu
801
0
      averageAdaptedLumaQP = aaQP; // TODO hlm: += lrQpDiff?
802
803
0
      pic->isMeanQPLimited = false;
804
0
    }
805
806
1.08k
    if (isEncPass) ctuAvgLuma.clear();
807
1.08k
  } // CTU-/frame-level QPA
808
809
1.08k
  return averageAdaptedLumaQP;
810
1.08k
}
811
812
int BitAllocation::applyQPAdaptationSubCtu (const Slice* slice, const VVEncCfg* encCfg, const Area& lumaArea, const uint8_t* minNoiseLevels)
813
3.28k
{
814
3.28k
  Picture* const pic          = (slice != nullptr ? slice->pic : nullptr);
815
3.28k
  uint32_t meanLuma           = MAX_UINT;
816
817
3.28k
  if (pic == nullptr || encCfg == nullptr)
818
0
  {
819
0
    return -1;
820
0
  }
821
822
3.28k
  const bool isEncPass        = (encCfg->m_LookAhead > 0 && !pic->isPreAnalysis);
823
3.28k
  const bool isHDR            = (encCfg->m_HdrMode != vvencHDRMode::VVENC_HDR_OFF) && !(encCfg->m_lumaReshapeEnable != 0 && encCfg->m_reshapeSignalType == RESHAPE_SIGNAL_PQ);
824
3.28k
  const bool isBIM            = (encCfg->m_blockImportanceMapping && !pic->m_picShared->m_ctuBimQpOffset.empty());
825
3.28k
  const bool isSccStrongRC    = ((encCfg->m_LookAhead > 0 || encCfg->m_RCNumPasses == 2) && pic->isSccStrong);
826
3.28k
  const bool isHighResolution = (std::min (encCfg->m_SourceWidth, encCfg->m_SourceHeight) > 1280);
827
3.28k
  const uint32_t hpFrameRate  = (pic->force2ndOrder ? 32 : encCfg->m_FrameRate / encCfg->m_FrameScale);
828
3.28k
  const int  bitDepth         = slice->sps->bitDepths[CH_L];
829
3.28k
  const PosType     guardSize = (isHighResolution ? 2 : 1);
830
3.28k
  const Position    pos       = lumaArea.pos();
831
3.28k
  const CompArea    subArea   = clipArea (CompArea (COMP_Y, pic->chromaFormat, Area (pos.x, pos.y, lumaArea.width, lumaArea.height)), pic->Y());
832
3.28k
  const SizeType    fltWidth  = lumaArea.width  + guardSize * (pos.x > 0 ? 2 : 1);
833
3.28k
  const SizeType    fltHeight = lumaArea.height + guardSize * (pos.y > 0 ? 2 : 1);
834
3.28k
  const CompArea    fltArea   = clipArea (CompArea (COMP_Y, pic->chromaFormat, Area (pos.x > 0 ? pos.x - guardSize : 0, pos.y > 0 ? pos.y - guardSize : 0, fltWidth, fltHeight)), pic->Y());
835
3.28k
  const CPelBuf     picOrig   = pic->getOrigBuf (fltArea);
836
3.28k
  const CPelBuf     picPrv1   = pic->getOrigBufPrev (fltArea, PREV_FRAME_1);
837
3.28k
  const CPelBuf     picPrv2   = pic->getOrigBufPrev (fltArea, PREV_FRAME_2);
838
3.28k
  const double hpEnerSubCTU   = filterAndCalculateAverageActivity (picOrig.buf, picOrig.stride, picOrig.height, picOrig.width,
839
3.28k
                                                                   picPrv1.buf, picPrv1.stride, picPrv2.buf, picPrv2.stride, hpFrameRate,
840
3.28k
                                                                   bitDepth, isHighResolution);
841
3.28k
  const double hpEnerPicNorm  = 1.0 / getAveragePictureActivity (encCfg->m_SourceWidth, encCfg->m_SourceHeight, 0,
842
3.28k
                                                                 (encCfg->m_internalUsePerceptQPATempFiltISlice || !slice->isIntra()), bitDepth);
843
3.28k
  int adaptedSubCtuQP = Clip3 (0, MAX_QP, pic->picInitialQP + apprI3Log2 (hpEnerSubCTU * hpEnerPicNorm, isSccStrongRC));
844
845
3.28k
  if (isChromaEnabled (pic->chromaFormat) && (adaptedSubCtuQP < MAX_QP))
846
3.28k
  {
847
3.28k
    adaptedSubCtuQP += getGlaringColorQPOffsetSubCtu (pic, subArea, slice->sps->bitDepths[CH_C], meanLuma);
848
849
3.28k
    if ((adaptedSubCtuQP > MAX_QP) && !isHDR) adaptedSubCtuQP = MAX_QP;
850
3.28k
  }
851
  // change the sub-CTU-level QP index based on sub-area's average luma value (Sharp)
852
3.28k
  if (isHDR)
853
0
  {
854
0
    if (meanLuma == MAX_UINT) meanLuma = pic->getOrigBuf (subArea).getAvg();
855
856
0
    adaptedSubCtuQP = Clip3 (0, MAX_QP, adaptedSubCtuQP + lumaDQPOffset (meanLuma, bitDepth));
857
0
  }
858
  // add additional delta-QP of block importance mapping (BIM) detection if available
859
3.28k
  if (isBIM)
860
0
  {
861
0
    adaptedSubCtuQP = Clip3 (-slice->sps->qpBDOffset[CH_L], MAX_QP, adaptedSubCtuQP + pic->m_picShared->m_ctuBimQpOffset[getCtuAddr (pos, *pic->cs->pcv)]);
862
0
  }
863
  // reduce the delta-QP variance, avoid wasting precious bit budget at low bit-rates
864
3.28k
  if ((encCfg->m_RCTargetBitrate == 0) && (3 + encCfg->m_QP > MAX_QP_PERCEPT_QPA) && (slice->sliceQp >= 0) && (encCfg->m_framesToBeEncoded != 1))
865
0
  {
866
0
    const int retunedAdLumaQP = adaptedSubCtuQP + 1;
867
868
0
    adaptedSubCtuQP = (std::max (0, 1 + MAX_QP_PERCEPT_QPA - encCfg->m_QP) * adaptedSubCtuQP + std::min (4, 3 + encCfg->m_QP - MAX_QP_PERCEPT_QPA) * slice->sliceQp + 2) >> 2;
869
0
    if (adaptedSubCtuQP > retunedAdLumaQP) adaptedSubCtuQP = retunedAdLumaQP;
870
0
    if (adaptedSubCtuQP < MAX_QP && encCfg->m_QP >= MAX_QP_PERCEPT_QPA) adaptedSubCtuQP++; // for monotonous rate change, l. 507
871
0
  }
872
3.28k
  if (isEncPass)
873
0
  {
874
0
    const double resRatio = sqrt (double (encCfg->m_SourceWidth * encCfg->m_SourceHeight) / (3840.0 * 2160.0));
875
876
0
    if (meanLuma == MAX_UINT) meanLuma = pic->getOrigBuf (subArea).getAvg();
877
0
    clipQPValToEstimatedMinimStats (minNoiseLevels, bitDepth, meanLuma, resRatio, (slice->isIntra() ? encCfg->m_intraQPOffset >> 1 : std::min (4, (int) slice->TLayer)), adaptedSubCtuQP);
878
0
  }
879
880
3.28k
  return adaptedSubCtuQP;
881
3.28k
}
882
883
int BitAllocation::getCtuPumpingReducingQP (const Slice* slice, const CPelBuf& origY, const Distortion uiSadBestForQPA,
884
                                            std::vector<int>& ctuPumpRedQP, const uint32_t ctuRsAddr, const int baseQP,
885
                                            const bool isBIM)
886
0
{
887
0
  if (slice == nullptr || !slice->pps->useDQP || ctuPumpRedQP.size() <= ctuRsAddr) return 0;
888
889
0
  const int32_t avgOrig = origY.getAvg();
890
0
  uint32_t sumAbsZmOrig = 0; // zero-mean
891
0
  const Pel* src = origY.buf;
892
893
0
  for (SizeType y = 0; y < origY.height; y++) // sum up the zero-mean absolute values
894
0
  {
895
0
    for (SizeType x = 0; x < origY.width; x++)
896
0
    {
897
0
      sumAbsZmOrig += (uint32_t) abs (src[x] - avgOrig);
898
0
    }
899
0
    src += origY.stride;
900
0
  }
901
902
0
  const double sumAbsRatio = double (uiSadBestForQPA * 3 /*TODO: or 4? fine-tune!*/) / double (sumAbsZmOrig == 0 ? 1 : sumAbsZmOrig);
903
0
  const int pumpingReducQP = ((isBIM ? -1 : 0) + int (log (Clip3 (0.25, 4.0, sumAbsRatio)) / log (2.0) + (sumAbsRatio < 1.0 ? -0.5 : 0.5))) >> (baseQP >= 38/*MAX_QP_PERCEPT_QPA*/ ? 1 : 0);
904
905
0
  ctuPumpRedQP[ctuRsAddr] += pumpingReducQP;
906
907
0
  return pumpingReducQP;
908
0
}
909
910
} // namespace vvenc
911
912
//! \}