Coverage Report

Created: 2026-06-10 07:00

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/vvenc/source/Lib/EncoderLib/BitAllocation.cpp
Line
Count
Source
1
/* -----------------------------------------------------------------------------
2
The copyright in this software is being made available under the Clear BSD
3
License, included below. No patent rights, trademark rights and/or
4
other Intellectual Property Rights other than the copyrights concerning
5
the Software are granted under this license.
6
7
The Clear BSD License
8
9
Copyright (c) 2019-2026, Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. & The VVenC Authors.
10
All rights reserved.
11
12
Redistribution and use in source and binary forms, with or without modification,
13
are permitted (subject to the limitations in the disclaimer below) provided that
14
the following conditions are met:
15
16
     * Redistributions of source code must retain the above copyright notice,
17
     this list of conditions and the following disclaimer.
18
19
     * Redistributions in binary form must reproduce the above copyright
20
     notice, this list of conditions and the following disclaimer in the
21
     documentation and/or other materials provided with the distribution.
22
23
     * Neither the name of the copyright holder nor the names of its
24
     contributors may be used to endorse or promote products derived from this
25
     software without specific prior written permission.
26
27
NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
28
THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
29
CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
31
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
32
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
33
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
34
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
35
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
36
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38
POSSIBILITY OF SUCH DAMAGE.
39
40
41
------------------------------------------------------------------------------------------- */
42
43
44
/** \file     BitAllocation.cpp
45
\brief    Bit allocation class for QP adaptation and, possibly, rate control
46
*/
47
48
#include "BitAllocation.h"
49
#include "EncStage.h"
50
#include "CommonLib/Picture.h"
51
#include "CommonLib/UnitTools.h"
52
#include <math.h>
53
54
#include "vvenc/vvencCfg.h"
55
56
57
//! \ingroup EncoderLib
58
//! \{
59
60
namespace vvenc {
61
62
// static functions
63
64
static inline int apprI3Log2 (const double d, const bool isSccStrong) // rounded 2*log2(d) or 3*log2(d)
65
10.1k
{
66
10.1k
  const double weight = (isSccStrong ? 2.0 : 3.0);
67
10.1k
  const double dLimit = (isSccStrong ? 5.5e-20 : 1.5e-13);
68
69
10.1k
  return d < dLimit ? -128 : int (floor (weight * log (d) / log (2.0) + 0.5));
70
10.1k
}
71
72
static inline int lumaDQPOffset (const uint32_t avgLumaValue, const uint32_t bitDepth)
73
0
{
74
0
  if (bitDepth > 16 || avgLumaValue >= (1u << bitDepth)) return 0;
75
#if 0
76
  // mapping for peak luminance of ca. 3*400 = 1200 nits
77
  return (2 - int ((9 * uint64_t (avgLumaValue * avgLumaValue)) >> uint64_t (2 * bitDepth)));
78
#else
79
  // mapping for peak luminance of ca. 2*400 =  800 nits
80
0
  return (1 - int ((6 * uint64_t (avgLumaValue * avgLumaValue)) >> uint64_t (2 * bitDepth)));
81
0
#endif
82
0
}
83
84
void calcSpatialVisAct ( const Pel* pSrc,
85
                         const int iSrcStride,
86
                         const int height,
87
                         const int width,
88
                         const uint32_t bitDepth,
89
                         const bool isUHD,
90
                         VisAct& va )
91
10.1k
{
92
10.1k
  CHECK( pSrc == nullptr, "no source buffer given to calculate temporal visual activity" );
93
94
10.1k
  uint64_t saAct;  // spatial absolute activity sum
95
96
  // skip first row as there may be a black border frame
97
10.1k
  pSrc += iSrcStride;
98
99
  // center rows
100
10.1k
  if (isUHD) // high-pass with downsampling
101
0
  {
102
0
    pSrc += iSrcStride;
103
104
0
    saAct = g_pelBufOP.AvgHighPassWithDownsampling (width, height, pSrc, iSrcStride);
105
106
0
    va.hpSpatAct = double (saAct) / double ((width - 4) * (height - 4));
107
0
  }
108
10.1k
  else // HD high-pass without downsampling
109
10.1k
  {
110
10.1k
    saAct = g_pelBufOP.AvgHighPass (width, height, pSrc, iSrcStride);
111
112
10.1k
    va.hpSpatAct = double (saAct) / double ((width - 2) * (height - 2));
113
10.1k
  }
114
115
  // spatial in 12 bit
116
10.1k
  va.spatAct = unsigned (0.5 + va.hpSpatAct * double (bitDepth < 12 ? 1 << (12 - bitDepth) : 1));
117
10.1k
}
118
119
void calcTemporalVisAct ( const Pel* pSrc,
120
                          const int iSrcStride,
121
                          const int height,
122
                          const int width,
123
                          const Pel* pSM1,
124
                          const int iSM1Stride,
125
                          const Pel* pSM2,
126
                          const int iSM2Stride,
127
                          uint32_t frameRate,
128
                          const uint32_t bitDepth,
129
                          const bool isUHD,
130
                          VisAct& va )
131
9.05k
{
132
9.05k
  CHECK( pSrc == nullptr, "no source buffer given to calculate temporal visual activity" );
133
9.05k
  CHECK( pSM1 == nullptr, "no compare buffer given to calculate temporal visual activity" );
134
135
9.05k
  const Pel* pS0 = pSrc;
136
9.05k
  uint64_t taAct;  // temporal absolute activity sum
137
138
  // force 1st-order delta if only prev. frame available
139
9.05k
  if (pSM2 == nullptr || iSM2Stride <= 0) frameRate = 24;
140
141
  // skip first row as there may be a black border frame
142
9.05k
  pSrc += iSrcStride;
143
144
  // center rows
145
9.05k
  if (pS0 == pSM1 && frameRate <= 31)
146
9.05k
  {
147
9.05k
    va.hpTempAct = 0; // bypass high-pass, result will be zero
148
9.05k
  }
149
0
  else if (isUHD)  // downsampled high-pass
150
0
  {
151
0
    const int i2M1Stride = iSM1Stride * 2;
152
153
0
    CHECK (pSM1 == nullptr || iSM1Stride <= 0 || iSM1Stride < width, "Pel buffer pointer pSM1 must not be null!");
154
155
0
    pSrc += iSrcStride;
156
0
    pSM1 += i2M1Stride;
157
0
    if (frameRate <= 31) // 1st-order delta
158
0
    {
159
0
      taAct = g_pelBufOP.AvgHighPassWithDownsamplingDiff1st (width, height, pSrc, pSM1, iSrcStride, iSM1Stride);
160
0
    }
161
0
    else // 2nd-order delta (diff of diffs)
162
0
    {
163
0
      const int i2M2Stride = iSM2Stride * 2;
164
165
0
      CHECK (pSM2 == nullptr || iSM2Stride <= 0 || iSM2Stride < width, "Pel buffer pointer pSM2 must not be null!");
166
167
0
      pSM2 += i2M2Stride;
168
0
      taAct = g_pelBufOP.AvgHighPassWithDownsamplingDiff2nd (width, height, pSrc, pSM1, pSM2, iSrcStride, iSM1Stride, iSM2Stride);
169
0
    }
170
171
0
    va.hpTempAct = double (taAct) / double ((width - 4) * (height - 4));
172
0
  }
173
0
  else // HD high-pass without downsampling
174
0
  {
175
0
    CHECK (pSM1 == nullptr || iSM1Stride <= 0 || iSM1Stride < width, "Pel buffer pointer pSM1 must not be null!");
176
177
0
    pSM1 += iSM1Stride;
178
0
    if (frameRate <= 31) // 1st-order delta
179
0
    {
180
0
      taAct = g_pelBufOP.HDHighPass (width, height, pSrc, pSM1, iSrcStride, iSM1Stride);
181
0
    }
182
0
    else // 2nd-order delta (diff of diffs)
183
0
    {
184
0
      CHECK (pSM2 == nullptr || iSM2Stride <= 0 || iSM2Stride < width, "Pel buffer pointer pSM2 must not be null!");
185
186
0
      pSM2 += iSM2Stride;
187
0
      taAct = g_pelBufOP.HDHighPass2 (width, height, pSrc, pSM1, pSM2, iSrcStride, iSM1Stride, iSM2Stride);
188
0
    }
189
190
0
    va.hpTempAct = double (taAct) / double ((width - 2) * (height - 2));
191
0
  }
192
193
  // temporal in 12 bit
194
9.05k
  va.tempAct = unsigned (0.5 + va.hpTempAct * double (bitDepth < 12 ? 1 << (12 - bitDepth) : 1) * (frameRate <= 31 ? 1.15625 : 1.0));
195
9.05k
}
196
197
void updateVisAct ( VisAct& va, const uint32_t bitDepth )
198
9.05k
{
199
  // minimum part in 12 bit
200
9.05k
  va.minAct = std::min( va.tempAct, va.spatAct );
201
  // lower limit, compensate for high-pass amplification
202
9.05k
  va.hpVisAct = std::max (double (1 << (bitDepth - 6)), va.hpSpatAct + 2.0 * va.hpTempAct);
203
9.05k
  va.visAct   = ClipBD( uint16_t( 0.5 + va.hpVisAct ), bitDepth );
204
9.05k
}
205
206
double filterAndCalculateAverageActivity ( const Pel* pSrc,
207
                                           const int iSrcStride,
208
                                           const int height,
209
                                           const int width,
210
                                           const Pel* pSM1,
211
                                           const int iSM1Stride,
212
                                           const Pel* pSM2,
213
                                           const int iSM2Stride,
214
                                           uint32_t frameRate,
215
                                           const uint32_t bitDepth,
216
                                           const bool isUHD,
217
                                           unsigned* minVisAct = nullptr,
218
                                           unsigned* spVisAct  = nullptr )
219
9.05k
{
220
9.05k
  VisAct va;
221
222
  // spatial activity
223
9.05k
  calcSpatialVisAct( pSrc, iSrcStride, height, width, bitDepth, isUHD, va );
224
225
  // temporal activity
226
9.05k
  calcTemporalVisAct( pSrc, iSrcStride, height, width, pSM1, iSM1Stride, pSM2, iSM2Stride,
227
9.05k
                     frameRate, bitDepth, isUHD, va );
228
229
  // minimum and visual activity
230
9.05k
  updateVisAct( va, bitDepth );
231
232
9.05k
  if( minVisAct )
233
3.46k
  {
234
3.46k
    *minVisAct = va.minAct;
235
3.46k
  }
236
9.05k
  if( spVisAct )
237
3.46k
  {
238
3.46k
    *spVisAct = va.spatAct;
239
3.46k
  }
240
241
9.05k
  return va.hpVisAct;
242
9.05k
}
243
244
static double getAveragePictureActivity (const uint32_t picWidth,  const uint32_t picHeight,
245
                                         const int scaledAverageGopActivity,
246
                                         const bool tempFiltering, const uint32_t bitDepth)
247
4.47k
{
248
4.47k
  if (scaledAverageGopActivity > 0)
249
0
  {
250
0
    return (double (scaledAverageGopActivity) / double (1 << (24 - bitDepth)));
251
0
  }
252
4.47k
  const double hpEnerPic = (tempFiltering ? 32.0 : 16.0) * double (1 << (2 * bitDepth - 10)) * sqrt ((3840.0 * 2160.0) / double (picWidth * picHeight));
253
254
4.47k
  return sqrt (hpEnerPic); // square-root of a_pic value
255
4.47k
}
256
257
static int getGlaringColorQPOffset (Picture* const pic, const int ctuAddr, const int bitDepth, uint32_t &avgLumaValue)
258
4.57k
{
259
4.57k
  const PreCalcValues& pcv  = *pic->cs->pcv;
260
4.57k
  const ChromaFormat chrFmt = pic->chromaFormat;
261
4.57k
  const SizeType chrWidth   = pcv.maxCUSize >> getChannelTypeScaleX (CH_C, chrFmt);
262
4.57k
  const SizeType chrHeight  = pcv.maxCUSize >> getChannelTypeScaleY (CH_C, chrFmt);
263
4.57k
  const unsigned w          = pcv.widthInCtus;
264
4.57k
  const int      midLevel   = 1 << (bitDepth - 1);
265
4.57k
  int chrValue = MAX_INT;
266
267
4.57k
  avgLumaValue = uint32_t ((ctuAddr >= 0) ? pic->ctuAdaptedQP[ctuAddr] : pic->getOrigBuf().Y().getAvg());
268
269
13.7k
  for (uint32_t comp = COMP_Cb; comp < MAX_NUM_COMP; comp++)
270
9.15k
  {
271
9.15k
    const ComponentID compID = (ComponentID) comp;
272
9.15k
    int avgCompValue;
273
274
9.15k
    if (ctuAddr >= 0) // chroma
275
6.93k
    {
276
6.93k
      const CompArea chrArea = clipArea (CompArea (compID, chrFmt, Area ((ctuAddr % w) * chrWidth, (ctuAddr / w) * chrHeight, chrWidth, chrHeight)), pic->block (compID));
277
278
6.93k
      avgCompValue = pic->getOrigBuf (chrArea).getAvg();
279
6.93k
    }
280
2.22k
    else avgCompValue = pic->getOrigBuf (pic->block (compID)).getAvg();
281
282
9.15k
    if (chrValue > avgCompValue) chrValue = avgCompValue; // minimum of the DC offsets
283
9.15k
  }
284
4.57k
  CHECK (chrValue < 0, "mean chroma value cannot be negative!");
285
286
4.57k
  chrValue = (int) avgLumaValue - chrValue;
287
288
4.57k
  if (chrValue > midLevel) return apprI3Log2 (double (chrValue * chrValue) / double (midLevel * midLevel), pic->isSccStrong);
289
290
4.57k
  return 0;
291
4.57k
}
292
293
static int getGlaringColorQPOffsetSubCtu (Picture* const pic, const CompArea& lumaArea, const int bitDepth, uint32_t &avgLumaValue)
294
3.36k
{
295
3.36k
  const ChromaFormat chrFmt = pic->chromaFormat;
296
3.36k
  const SizeType chrWidth   = lumaArea.width  >> getChannelTypeScaleX (CH_C, chrFmt);
297
3.36k
  const SizeType chrHeight  = lumaArea.height >> getChannelTypeScaleY (CH_C, chrFmt);
298
3.36k
  const PosType  chrPosX    = lumaArea.x >> getChannelTypeScaleX (CH_C, chrFmt);
299
3.36k
  const PosType  chrPosY    = lumaArea.y >> getChannelTypeScaleY (CH_C, chrFmt);
300
3.36k
  const int      midLevel   = 1 << (bitDepth - 1);
301
3.36k
  int chrValue = MAX_INT;
302
303
3.36k
  avgLumaValue = pic->getOrigBuf (lumaArea).getAvg();
304
305
10.0k
  for (uint32_t comp = COMP_Cb; comp < MAX_NUM_COMP; comp++)
306
6.73k
  {
307
6.73k
    const ComponentID compID = (ComponentID) comp;
308
6.73k
    const CompArea   chrArea = clipArea (CompArea (compID, chrFmt, Area (chrPosX, chrPosY, chrWidth, chrHeight)), pic->block (compID));
309
310
6.73k
    int avgCompValue = pic->getOrigBuf (chrArea).getAvg();
311
312
6.73k
    if (chrValue > avgCompValue) chrValue = avgCompValue; // minimum of the DC offsets
313
6.73k
  }
314
3.36k
  CHECK (chrValue < 0, "mean chroma value cannot be negative!");
315
316
3.36k
  chrValue = (int) avgLumaValue - chrValue;
317
318
3.36k
  if (chrValue > midLevel) return apprI3Log2 (double (chrValue * chrValue) / double (midLevel * midLevel), pic->isSccStrong);
319
320
3.36k
  return 0;
321
3.36k
}
322
323
static void updateMinNoiseLevelsPic (uint8_t* const minNoiseLevels, const int bitDepth, const unsigned avgValue, const unsigned noise)
324
0
{
325
0
  const unsigned avgIndex = avgValue >> (bitDepth - 3); // one of 8 mean level regions
326
327
0
  CHECK (avgIndex >= QPA_MAX_NOISE_LEVELS, "array index out of bounds");
328
329
0
  if (noise < (unsigned) minNoiseLevels[avgIndex])
330
0
  {
331
0
    minNoiseLevels[avgIndex] = (uint8_t) noise;
332
0
  }
333
0
}
334
335
static void clipQPValToEstimatedMinimStats (const uint8_t* minNoiseLevels, const int bitDepth, const unsigned avgValue,
336
                                            const double resFac, const int extraQPOffset, int& QP) // output QP
337
0
{
338
0
  const unsigned avgIndex = avgValue >> (bitDepth - 3); // one of 8 mean level regions
339
0
  const unsigned x = (1 << 3) - 1;
340
0
  const int32_t dQPOffset = -15;
341
342
0
  CHECK (avgIndex >= QPA_MAX_NOISE_LEVELS, "array index out of bounds");
343
344
0
  int i = minNoiseLevels[avgIndex];
345
346
  // try to "fill in the blanks" in luma range (also results in peak smoothing, as described in PCS 2022 paper)
347
0
  if (avgIndex == 0 && i > minNoiseLevels[0 + 1]) i = minNoiseLevels[0 + 1];
348
0
  if (avgIndex == x && i > minNoiseLevels[x - 1]) i = minNoiseLevels[x - 1];
349
350
0
  if (avgIndex > 0 && avgIndex < x)
351
0
  {
352
0
    const uint8_t maxNeighborNoiseLevel = std::max (minNoiseLevels[avgIndex - 1], minNoiseLevels[avgIndex + 1]);
353
354
0
    if (i > maxNeighborNoiseLevel) i = maxNeighborNoiseLevel;
355
0
  }
356
0
  if (i >= 255)
357
0
  {
358
0
    return;
359
0
  }
360
361
0
  i = std::max (0, apprI3Log2 (std::min (1.0, resFac) * i * i, false) + dQPOffset + extraQPOffset); // = 6*log2
362
0
  if (QP < i)
363
0
  {
364
0
    QP = i;
365
0
  }
366
0
}
367
368
static int applyDeltaQpPeakSmoothing (Picture* const pic, const VVEncCfg* encCfg, const uint32_t startAddr, const uint32_t endAddr)
369
722
{
370
722
  const uint32_t ctuWdt = pic->cs->pcv->widthInCtus;
371
722
  const uint32_t ctuEnd = endAddr - ctuWdt;
372
722
  int deltaQpSum = 0, preSmoothQP = 0;
373
722
  std::vector<int> prevQP;
374
375
722
  if (ctuWdt == 0 || endAddr <= startAddr + 3u * ctuWdt) return 0;
376
377
0
  prevQP.resize (ctuWdt);
378
379
0
  for (uint32_t ctuRsAddr = startAddr; ctuRsAddr < ctuEnd; ctuRsAddr++)
380
0
  {
381
0
    const int32_t idx = ctuRsAddr % ctuWdt;
382
383
0
    if (ctuRsAddr < ctuWdt) prevQP[idx] = pic->ctuAdaptedQP[ctuRsAddr];
384
0
    else if (idx == 0)      preSmoothQP = pic->ctuAdaptedQP[ctuRsAddr];
385
0
    else if (idx == ctuWdt - 1)
386
0
    {
387
0
      prevQP[idx - 1] = preSmoothQP;
388
0
      prevQP[idx] = pic->ctuAdaptedQP[ctuRsAddr];
389
0
    }
390
0
    else // no boundary CTU
391
0
    {
392
0
      const int32_t nextIdx = ctuRsAddr + ctuWdt; // max. and min. in 3x3 neighborhood
393
0
      int qpMax = std::max (preSmoothQP, pic->ctuAdaptedQP[ctuRsAddr + 1]);
394
0
      int qpMin = std::min (preSmoothQP, pic->ctuAdaptedQP[ctuRsAddr + 1]);
395
396
0
      for (int32_t i = -1; i <= 1; i++)
397
0
      {
398
0
        qpMax = std::max (qpMax, std::max (prevQP[idx + i], pic->ctuAdaptedQP[nextIdx + i]));
399
0
        qpMin = std::min (qpMin, std::min (prevQP[idx + i], pic->ctuAdaptedQP[nextIdx + i]));
400
0
      }
401
0
      prevQP[idx - 1] = preSmoothQP;
402
0
      preSmoothQP = pic->ctuAdaptedQP[ctuRsAddr];
403
404
0
      if (preSmoothQP > qpMax)
405
0
      {
406
0
        pic->ctuQpaLambda[ctuRsAddr] *= 0.793701; // peak: decrease adapted lambda, QP
407
0
        pic->ctuAdaptedQP[ctuRsAddr]--;
408
0
        deltaQpSum--;
409
0
      }
410
0
      if (preSmoothQP < qpMin)
411
0
      {
412
0
        pic->ctuQpaLambda[ctuRsAddr] *= 1.259921; // hole: increase adapted lambda, QP
413
0
        pic->ctuAdaptedQP[ctuRsAddr]++;
414
0
        deltaQpSum++;
415
0
      }
416
0
    }
417
0
  }
418
419
0
  prevQP.clear();
420
421
0
  return deltaQpSum;
422
722
}
423
424
static int refineDeltaQpDistribution (Picture* const pic, const VVEncCfg* encCfg,   const int sliceQP,
425
                                      const double sliceLambda, const int rcQpDiff, const int bitDepth,
426
                                      const uint32_t startAddr, const uint32_t endAddr, const int qpSum,
427
                                      const uint32_t tempLayer, const bool isIntra, const bool isEncPass,
428
                                      const uint8_t* minNoiseLevels, std::vector<int>& ctuAvgLuma)
429
0
{
430
0
  const double resRatio = (isEncPass ? sqrt (double (encCfg->m_SourceWidth * encCfg->m_SourceHeight) / (3840.0 * 2160.0)) : 0.0);
431
0
  const int ctusInSlice = int (endAddr - startAddr);
432
0
  const int targetQpSum = (encCfg->m_RCTargetBitrate > 0 ? sliceQP * ctusInSlice : qpSum);
433
0
  int blockQpSum = 0, tempLumaQP;
434
0
  double blockLambda;
435
0
  bool isLimited = false;
436
437
0
  for (uint32_t ctuRsAddr = startAddr; ctuRsAddr < endAddr; ctuRsAddr++)
438
0
  {
439
0
    int clippedLumaQP = std::max (0, pic->ctuAdaptedQP[ctuRsAddr] + rcQpDiff);
440
441
0
    if (isEncPass)
442
0
    {
443
0
      tempLumaQP = clippedLumaQP; // CTU QP before clipping for diff calculation below
444
445
0
      clipQPValToEstimatedMinimStats (minNoiseLevels, bitDepth, ctuAvgLuma[ctuRsAddr - startAddr], resRatio, (isIntra ? encCfg->m_intraQPOffset >> 1 : std::min (4, (int) tempLayer)), clippedLumaQP);
446
0
      if (clippedLumaQP > tempLumaQP)
447
0
      {
448
0
        ctuAvgLuma[ctuRsAddr - startAddr] = -1; // mark CTU as being processed already
449
0
        isLimited = isEncPass;
450
0
      }
451
0
    }
452
453
0
    clippedLumaQP = std::min (MAX_QP, clippedLumaQP);
454
455
0
    blockLambda = sliceLambda * pow (2.0, double (clippedLumaQP - sliceQP) / 3.0);
456
0
    blockQpSum += clippedLumaQP;
457
458
0
    pic->ctuQpaLambda[ctuRsAddr] = blockLambda;  // store modified CTU lambdas and QPs
459
0
    pic->ctuAdaptedQP[ctuRsAddr] = clippedLumaQP;
460
0
  }
461
462
0
  if (blockQpSum > targetQpSum && isLimited) // CTU QPs limited, so distribute saved rate among nonlimited CTUs
463
0
  {
464
0
    int maxCtuQP = 0, minCtuQP = MAX_QP;
465
466
0
    for (uint32_t ctuRsAddr = startAddr; ctuRsAddr < endAddr; ctuRsAddr++) // find max
467
0
    {
468
0
      if (ctuAvgLuma[ctuRsAddr - startAddr] >= 0 && pic->ctuAdaptedQP[ctuRsAddr] > maxCtuQP) // nonlimited CTUs
469
0
      {
470
0
        maxCtuQP = pic->ctuAdaptedQP[ctuRsAddr];
471
0
      }
472
0
      if (pic->ctuAdaptedQP[ctuRsAddr] < minCtuQP)
473
0
      {
474
0
        minCtuQP = pic->ctuAdaptedQP[ctuRsAddr];
475
0
      }
476
0
    }
477
478
0
    minCtuQP = std::max (0, minCtuQP);
479
480
0
    while (maxCtuQP > minCtuQP && blockQpSum > targetQpSum) // spend rate starting at max QPs, then go downward
481
0
    {
482
0
      for (uint32_t ctuRsAddr = startAddr; ctuRsAddr < endAddr; ctuRsAddr++) // reduce
483
0
      {
484
0
        if (ctuAvgLuma[ctuRsAddr - startAddr] >= 0 && pic->ctuAdaptedQP[ctuRsAddr] == maxCtuQP)
485
0
        {
486
0
          tempLumaQP = std::max (0, pic->ctuAdaptedQP[ctuRsAddr] - 1);
487
488
0
          ctuAvgLuma[ctuRsAddr - startAddr] = -1; // mark CTU as being reduced already
489
0
          blockLambda = sliceLambda * pow (2.0, double (tempLumaQP - sliceQP) / 3.0);
490
0
          if (tempLumaQP < pic->ctuAdaptedQP[ctuRsAddr]) blockQpSum--;
491
492
0
          pic->ctuQpaLambda[ctuRsAddr] = blockLambda; // store reduced lambdas and QPs
493
0
          pic->ctuAdaptedQP[ctuRsAddr] = tempLumaQP;
494
0
        }
495
496
0
        if (blockQpSum <= targetQpSum) break;
497
0
      }
498
499
0
      maxCtuQP--;
500
0
    }
501
0
  }
502
503
0
  return (blockQpSum + (ctusInSlice >> 1)) / ctusInSlice;
504
0
}
505
506
// public functions
507
508
int BitAllocation::applyQPAdaptationSlice (const Slice* slice, const VVEncCfg* encCfg, const int sliceQP,
509
                                           const double sliceLambda, uint16_t* const picVisActLuma,
510
                                           std::vector<int>& ctuPumpRedQP, std::vector<uint8_t>* ctuRCQPMemory,
511
                                           int* const optChromaQPOffsets, const uint8_t* minNoiseLevels,
512
                                           const uint32_t ctuStartAddr, const uint32_t ctuBoundingAddr)
513
1.11k
{
514
1.11k
  Picture* const pic          = (slice != nullptr ? slice->pic : nullptr);
515
1.11k
  double hpEner[MAX_NUM_COMP] = {0.0, 0.0, 0.0};
516
1.11k
  double averageAdaptedLambda = 0.0;
517
1.11k
  int    averageAdaptedLumaQP = -1;
518
1.11k
  uint32_t meanLuma           = MAX_UINT;
519
1.11k
  std::vector<int> ctuAvgLuma;
520
521
1.11k
  if (pic == nullptr || pic->cs == nullptr || encCfg == nullptr || ctuStartAddr >= ctuBoundingAddr)
522
0
  {
523
0
    return -1;
524
0
  }
525
526
1.11k
  const bool isEncPass        = (encCfg->m_LookAhead > 0 && !pic->isPreAnalysis);
527
1.11k
  const bool isHDR            = (encCfg->m_HdrMode != vvencHDRMode::VVENC_HDR_OFF) && !(encCfg->m_lumaReshapeEnable != 0 && encCfg->m_reshapeSignalType == RESHAPE_SIGNAL_PQ);
528
1.11k
  const bool isBIM            = (encCfg->m_blockImportanceMapping && !pic->m_picShared->m_ctuBimQpOffset.empty());
529
1.11k
  const bool isSccStrongRC    = ((encCfg->m_LookAhead > 0 || encCfg->m_RCNumPasses == 2) && pic->isSccStrong);
530
1.11k
  const bool isHighResolution = (std::min (encCfg->m_SourceWidth, encCfg->m_SourceHeight) > 1280);
531
1.11k
  const bool useFrameWiseQPA  = (encCfg->m_QP > MAX_QP_PERCEPT_QPA) && (encCfg->m_framesToBeEncoded != 1) && (slice->TLayer > 0);
532
1.11k
  const uint32_t hpFrameRate  = (pic->force2ndOrder ? 32 : encCfg->m_FrameRate / encCfg->m_FrameScale);
533
1.11k
  const int  bitDepth         = slice->sps->bitDepths[CH_L];
534
1.11k
  double hpEnerPicNorm        = 1.0 / getAveragePictureActivity (encCfg->m_SourceWidth, encCfg->m_SourceHeight, (encCfg->m_RCNumPasses == 2 ? 0 : ctuPumpRedQP.back()),
535
1.11k
                                                                 (encCfg->m_internalUsePerceptQPATempFiltISlice || !slice->isIntra()), bitDepth);
536
1.11k
  const PreCalcValues& pcv    = *pic->cs->pcv;
537
538
1.11k
  pic->picInitialQP = sliceQP;  // modified below and used in applyQPAdaptationSubCtu
539
1.11k
  if ((encCfg->m_RCTargetBitrate > 0) && useFrameWiseQPA)
540
0
  {
541
0
    averageAdaptedLumaQP = Clip3 (0, MAX_QP, sliceQP - 1); // one will be added again
542
0
  }
543
544
4.44k
  for (uint32_t comp = 0; comp < getNumberValidComponents (pic->chromaFormat); comp++)
545
3.33k
  {
546
3.33k
    const ComponentID compID  = (ComponentID) comp;
547
548
3.33k
    if (isLuma (compID)) // luma: CTU-wise QPA operation
549
1.11k
    {
550
1.11k
      const PosType guardSize = (isHighResolution ? 2 : 1);
551
1.11k
      unsigned zeroMinActCTUs = 0, picSpVisAct = 0;
552
553
4.57k
      for (uint32_t ctuRsAddr = ctuStartAddr; ctuRsAddr < ctuBoundingAddr; ctuRsAddr++)
554
3.46k
      {
555
3.46k
        const Position pos ((ctuRsAddr % pcv.widthInCtus) * pcv.maxCUSize, (ctuRsAddr / pcv.widthInCtus) * pcv.maxCUSize);
556
3.46k
        const CompArea ctuArea   = clipArea (CompArea (COMP_Y, pic->chromaFormat, Area (pos.x, pos.y, pcv.maxCUSize, pcv.maxCUSize)), pic->Y());
557
3.46k
        const SizeType fltWidth  = pcv.maxCUSize + guardSize * (pos.x > 0 ? 2 : 1);
558
3.46k
        const SizeType fltHeight = pcv.maxCUSize + guardSize * (pos.y > 0 ? 2 : 1);
559
3.46k
        const CompArea fltArea   = clipArea (CompArea (COMP_Y, pic->chromaFormat, Area (pos.x > 0 ? pos.x - guardSize : 0, pos.y > 0 ? pos.y - guardSize : 0, fltWidth, fltHeight)), pic->Y());
560
3.46k
        const CPelBuf  picOrig   = pic->getOrigBuf (fltArea);
561
3.46k
        const CPelBuf  picPrv1   = pic->getOrigBufPrev (fltArea, PREV_FRAME_1);
562
3.46k
        const CPelBuf  picPrv2   = pic->getOrigBufPrev (fltArea, PREV_FRAME_2);
563
3.46k
        unsigned minActivityPart = 0, spVisActCTU = 0;
564
565
3.46k
        hpEner[1] = filterAndCalculateAverageActivity (picOrig.buf, picOrig.stride, picOrig.height, picOrig.width,
566
3.46k
                                                       picPrv1.buf, picPrv1.stride, picPrv2.buf, picPrv2.stride, hpFrameRate,
567
3.46k
                                                       bitDepth, isHighResolution, &minActivityPart, &spVisActCTU);
568
569
3.46k
        if (minActivityPart == 0) zeroMinActCTUs++;
570
571
3.46k
        hpEner[comp] += hpEner[1] * double (ctuArea.width * ctuArea.height);
572
3.46k
        pic->ctuQpaLambda[ctuRsAddr] = hpEner[1]; // temporary backup of CTU mean visual activity
573
3.46k
        pic->ctuAdaptedQP[ctuRsAddr] = (int) pic->getOrigBuf (ctuArea).getAvg(); // and mean luma
574
575
3.46k
        if (picOrig.buf == picPrv1.buf) // replace temporal visual activity with min motion error
576
3.46k
        {
577
3.46k
          hpEner[1] = pic->m_picShared->m_minNoiseLevels[pic->ctuAdaptedQP[ctuRsAddr] >> (bitDepth - 3)] * (bitDepth >= 10 ? 1.5 : 0.375);
578
579
3.46k
          if (hpEner[1] < (bitDepth >= 10 ? 382.5 : 95.625)) // levels in first frame
580
0
          {
581
0
            hpEner[comp] += hpEner[1] * double (ctuArea.width * ctuArea.height);
582
0
            pic->ctuQpaLambda[ctuRsAddr] += hpEner[1]; // add noise level to mean visual activity
583
0
          }
584
3.46k
        }
585
0
        else if (!isEncPass && (encCfg->m_RCNumPasses == 2 || pic->gopEntry->m_mctfIndex < 0 || !pic->gopEntry->m_isStartOfGop))
586
0
        {
587
0
          updateMinNoiseLevelsPic (pic->m_picShared->m_minNoiseLevels, bitDepth, pic->ctuAdaptedQP[ctuRsAddr], minActivityPart);
588
0
        }
589
3.46k
        picSpVisAct += spVisActCTU;
590
3.46k
      }
591
592
1.11k
      hpEner[comp] /= double (encCfg->m_SourceWidth * encCfg->m_SourceHeight);
593
1.11k
      if (picVisActLuma != nullptr)
594
1.11k
      {
595
1.11k
        *picVisActLuma = ClipBD (uint16_t (0.5 + hpEner[comp]), bitDepth);
596
1.11k
      }
597
1.11k
      if (ctuBoundingAddr > ctuStartAddr)
598
1.11k
      {
599
1.11k
        const uint32_t nCtu = ctuBoundingAddr - ctuStartAddr;
600
601
1.11k
        pic->picVA.spatAct[ CH_L ] = ClipBD (uint16_t ((picSpVisAct + (nCtu >> 1)) / nCtu), 12);
602
1.11k
      }
603
1.11k
      if (encCfg->m_internalUsePerceptQPATempFiltISlice && slice->isIntra() && pic->getOrigBuf (compID).buf != pic->getOrigBufPrev (compID, PREV_FRAME_1).buf && zeroMinActCTUs * 2 > ctuBoundingAddr - ctuStartAddr)
604
0
      {
605
0
        hpEnerPicNorm *= sqrt (zeroMinActCTUs * 2.0 / float (ctuBoundingAddr - ctuStartAddr)); // frozen-image mode
606
0
      }
607
1.11k
    }
608
2.22k
    else // chroma: only picture-wise operation required
609
2.22k
    {
610
2.22k
      const CPelBuf picOrig = pic->getOrigBuf (compID);
611
2.22k
      const CPelBuf picPrv1 = pic->getOrigBufPrev (compID, PREV_FRAME_1);
612
2.22k
      const CPelBuf picPrv2 = pic->getOrigBufPrev (compID, PREV_FRAME_2);
613
614
2.22k
      hpEner[comp] = filterAndCalculateAverageActivity (picOrig.buf, picOrig.stride, picOrig.height, picOrig.width,
615
2.22k
                                                        picPrv1.buf, picPrv1.stride, picPrv2.buf, picPrv2.stride, hpFrameRate,
616
2.22k
                                                        bitDepth, isHighResolution && (pic->chromaFormat == CHROMA_444));
617
618
2.22k
      const int adaptChromaQPOffset = 1.5 * hpEner[comp] <= hpEner[0] ? 0 : apprI3Log2 (1.5 * hpEner[comp] / hpEner[0], pic->isSccStrong);
619
620
2.22k
      if (averageAdaptedLumaQP < 0) // YUV is not 4:0:0!
621
1.11k
      {
622
1.11k
        averageAdaptedLumaQP = Clip3 (0, MAX_QP, sliceQP + apprI3Log2 (hpEner[0] * hpEnerPicNorm, isSccStrongRC));
623
624
1.11k
        if (isChromaEnabled (pic->chromaFormat) && (averageAdaptedLumaQP < MAX_QP))
625
1.11k
        {
626
1.11k
          averageAdaptedLumaQP += getGlaringColorQPOffset (pic, -1 /*ctuAddr*/, slice->sps->bitDepths[CH_C], meanLuma);
627
628
1.11k
          if ((averageAdaptedLumaQP > MAX_QP) && !isHDR) averageAdaptedLumaQP = MAX_QP;
629
1.11k
        }
630
        // change mean picture QP index based on picture's average luma value (Sharp)
631
1.11k
        if (isHDR)
632
0
        {
633
0
          if (meanLuma == MAX_UINT) meanLuma = pic->getOrigBuf().Y().getAvg();
634
635
0
          averageAdaptedLumaQP = Clip3 (0, MAX_QP, averageAdaptedLumaQP + lumaDQPOffset (meanLuma, bitDepth));
636
0
        }
637
1.11k
      }
638
639
2.22k
      if (optChromaQPOffsets != nullptr) // adapts sliceChromaQpOffsetIntraOrPeriodic
640
2.22k
      {
641
        GCC_WARNING_DISABLE_maybe_uninitialized // probably spurious warning, when building with -fsanitize=undefined: "error: 'encCfg.33' may be used uninitialized in this function"
642
2.22k
        const int lumaChromaMappingDQP = (averageAdaptedLumaQP - slice->sps->chromaQpMappingTable.getMappedChromaQpValue (compID, averageAdaptedLumaQP)) >> (encCfg->m_HdrMode == vvencHDRMode::VVENC_HDR_OFF ? 1 : 2);
643
2.22k
        GCC_WARNING_RESET
644
2.22k
        optChromaQPOffsets[comp - 1] = std::min (3 + lumaChromaMappingDQP, adaptChromaQPOffset + lumaChromaMappingDQP);
645
2.22k
      }
646
2.22k
    } // isLuma or isChroma
647
3.33k
  }
648
649
1.11k
  if (averageAdaptedLumaQP < 0) // only if YUV is 4:0:0!
650
0
  {
651
0
    averageAdaptedLumaQP = Clip3 (0, MAX_QP, sliceQP + apprI3Log2 (hpEner[0] * hpEnerPicNorm, isSccStrongRC));
652
653
    // change mean picture QP index based on the picture's average luma value (Sharp)
654
0
    if (isHDR)
655
0
    {
656
0
      if (meanLuma == MAX_UINT) meanLuma = pic->getOrigBuf().Y().getAvg();
657
658
0
      averageAdaptedLumaQP = Clip3 (0, MAX_QP, averageAdaptedLumaQP + lumaDQPOffset (meanLuma, bitDepth));
659
0
    }
660
0
  }
661
662
1.11k
  if (encCfg->m_RCNumPasses == 2 && (encCfg->m_RCTargetBitrate > 0) && (ctuRCQPMemory != nullptr) && slice->pps->useDQP && (encCfg->m_internalUsePerceptQPATempFiltISlice == 2) && slice->isIntra())
663
0
  {
664
0
    const int nCtu = int (ctuBoundingAddr - ctuStartAddr);
665
0
    const int offs = (slice->poc / encCfg->m_IntraPeriod) * ((nCtu + 1) >> 1);
666
0
    std::vector<uint8_t>& ctuQPMem = *ctuRCQPMemory; // unpack 1st-pass reduction QPs
667
668
0
    if ((ctuPumpRedQP.size() >= nCtu) && (ctuQPMem.size() >= offs + ((nCtu + 1) >> 1)))
669
0
    {
670
0
      for (uint32_t ctuRsAddr = ctuStartAddr; ctuRsAddr < ctuBoundingAddr; ctuRsAddr++)
671
0
      {
672
0
        ctuPumpRedQP[ctuRsAddr] = int ((ctuRsAddr & 1) ? ctuQPMem[offs + (ctuRsAddr >> 1)] >> 4 : ctuQPMem[offs + (ctuRsAddr >> 1)] & 15) - 8;
673
0
      }
674
0
    }
675
0
  }
676
677
1.11k
  if (useFrameWiseQPA || (averageAdaptedLumaQP >= MAX_QP)) // store the CTU-wise QP/lambda values
678
0
  {
679
0
    averageAdaptedLumaQP = std::min (MAX_QP, averageAdaptedLumaQP + 1);
680
0
    averageAdaptedLambda = sliceLambda * pow (2.0, double (averageAdaptedLumaQP - sliceQP) / 3.0);
681
682
0
    for (uint32_t ctuRsAddr = ctuStartAddr; ctuRsAddr < ctuBoundingAddr; ctuRsAddr++)
683
0
    {
684
0
      pic->ctuQpaLambda[ctuRsAddr] = averageAdaptedLambda; // save adapted lambda, QP
685
0
      pic->ctuAdaptedQP[ctuRsAddr] = averageAdaptedLumaQP;
686
0
    }
687
0
  }
688
1.11k
  else // use CTU-level QPA
689
1.11k
  {
690
1.11k
    const int nCtu = int (ctuBoundingAddr - ctuStartAddr);
691
1.11k
    const int dvsr = encCfg->m_IntraPeriod - encCfg->m_GOPSize;
692
1.11k
    const int aaQP = averageAdaptedLumaQP; // backup of initial average QP from above
693
1.11k
    const bool rcIsFirstPassOf2 = ((encCfg->m_RCTargetBitrate == 0) && (ctuRCQPMemory != nullptr) && slice->pps->useDQP && (slice->poc > 0) ? encCfg->m_RCNumPasses == 2 : false);
694
695
1.11k
    if (isEncPass) ctuAvgLuma.resize (nCtu);
696
697
1.11k
    averageAdaptedLumaQP = 0;
698
4.57k
    for (uint32_t ctuRsAddr = ctuStartAddr; ctuRsAddr < ctuBoundingAddr; ctuRsAddr++)
699
3.46k
    {
700
3.46k
      const double hpEnerCTU = pic->ctuQpaLambda[ctuRsAddr];
701
3.46k
      int adaptedLumaQP = Clip3 (0, MAX_QP, sliceQP + apprI3Log2 (hpEnerCTU * hpEnerPicNorm, isSccStrongRC));
702
703
3.46k
      if ((encCfg->m_internalUsePerceptQPATempFiltISlice == 2) && slice->isIntra() && (ctuPumpRedQP.size() > ctuRsAddr))
704
0
      {
705
0
        if (rcIsFirstPassOf2) // backup 1st-pass I-frame QP for 2nd rate control pass
706
0
        {
707
0
          if (ctuRsAddr & 1) ctuRCQPMemory->back() |= (Clip3 (-8, 7, ctuPumpRedQP[ctuRsAddr]) + 8) << 4;
708
0
          else /*even addr*/ ctuRCQPMemory->push_back (Clip3 (-8, 7, ctuPumpRedQP[ctuRsAddr]) + 8);
709
0
          if (adaptedLumaQP > 0)
710
0
          {
711
0
            adaptedLumaQP -= (aaQP >> 4); // some first-pass tuning for stabilization
712
0
          }
713
0
        }
714
0
        if (ctuPumpRedQP[ctuRsAddr] < 0) adaptedLumaQP = Clip3 (0, MAX_QP, adaptedLumaQP + (ctuPumpRedQP[ctuRsAddr] * encCfg->m_GOPSize - (dvsr >> 1)) / dvsr);
715
0
        else /*ctuPumpRedQP[addr] >= 0*/ adaptedLumaQP = Clip3 (0, MAX_QP, adaptedLumaQP + (ctuPumpRedQP[ctuRsAddr] * encCfg->m_GOPSize + (dvsr >> 1)) / dvsr);
716
717
0
        ctuPumpRedQP[ctuRsAddr] = 0; // reset QP memory for temporal pumping analysis
718
0
      }
719
3.46k
      if ((encCfg->m_internalUsePerceptQPATempFiltISlice == 2) && !slice->isIntra() && (slice->TLayer == 0) && rcIsFirstPassOf2 && (adaptedLumaQP < MAX_QP))
720
0
      {
721
0
        adaptedLumaQP++; // this is a first-pass tuning to stabilize the rate control
722
0
      }
723
3.46k
      meanLuma = MAX_UINT;
724
3.46k
      if (isChromaEnabled (pic->chromaFormat) && (adaptedLumaQP < MAX_QP))
725
3.46k
      {
726
3.46k
        adaptedLumaQP += getGlaringColorQPOffset (pic, (int) ctuRsAddr, slice->sps->bitDepths[CH_C], meanLuma);
727
728
3.46k
        if ((adaptedLumaQP > MAX_QP) && !isHDR) adaptedLumaQP = MAX_QP;
729
3.46k
      }
730
      // change the CTU-level QP index based on CTU area's average luma value (Sharp)
731
3.46k
      if (isHDR)
732
0
      {
733
0
        if (meanLuma == MAX_UINT) meanLuma = pic->ctuAdaptedQP[ctuRsAddr];
734
735
0
        adaptedLumaQP = Clip3 (0, MAX_QP, adaptedLumaQP + lumaDQPOffset (meanLuma, bitDepth));
736
0
      }
737
      // add further delta-QP of block importance mapping (BIM) detector if available
738
3.46k
      if (isBIM)
739
0
      {
740
0
        adaptedLumaQP = Clip3 (-slice->sps->qpBDOffset[CH_L], MAX_QP, adaptedLumaQP + pic->m_picShared->m_ctuBimQpOffset[ctuRsAddr]);
741
0
      }
742
      // reduce delta-QP variance, avoid wasting precious bit budget at low bit-rates
743
3.46k
      if ((encCfg->m_RCTargetBitrate == 0) && (3 + encCfg->m_QP > MAX_QP_PERCEPT_QPA) && (encCfg->m_framesToBeEncoded != 1))
744
607
      {
745
607
        const int retunedAdLumaQP = adaptedLumaQP + 1;
746
747
607
        adaptedLumaQP = (std::max (0, 1 + MAX_QP_PERCEPT_QPA - encCfg->m_QP) * adaptedLumaQP + std::min (4, 3 + encCfg->m_QP - MAX_QP_PERCEPT_QPA) * aaQP + 2) >> 2;
748
607
        if (adaptedLumaQP > retunedAdLumaQP) adaptedLumaQP = retunedAdLumaQP;
749
607
        if (adaptedLumaQP < MAX_QP && encCfg->m_QP == MAX_QP_PERCEPT_QPA && slice->TLayer > 1) adaptedLumaQP++; // a fine-tuning
750
607
      }
751
3.46k
      if (isEncPass) ctuAvgLuma[ctuRsAddr - ctuStartAddr] = pic->ctuAdaptedQP[ctuRsAddr];
752
753
3.46k
      averageAdaptedLambda = sliceLambda * pow (2.0, double (adaptedLumaQP - sliceQP) / 3.0);
754
3.46k
      averageAdaptedLumaQP += adaptedLumaQP;
755
756
3.46k
      pic->ctuQpaLambda[ctuRsAddr] = averageAdaptedLambda; // save adapted lambda, QP
757
3.46k
      pic->ctuAdaptedQP[ctuRsAddr] = adaptedLumaQP;
758
3.46k
    }
759
760
1.11k
    if (encCfg->m_cuQpDeltaSubdiv == 0 || !slice->isIntra()) averageAdaptedLumaQP += applyDeltaQpPeakSmoothing (pic, encCfg, ctuStartAddr, ctuBoundingAddr);
761
762
1.11k
    meanLuma = std::max (0, averageAdaptedLumaQP);
763
1.11k
    averageAdaptedLumaQP = (meanLuma + (nCtu >> 1)) / nCtu;
764
765
1.11k
    if ((encCfg->m_RCTargetBitrate > 0 && averageAdaptedLumaQP != sliceQP) || (isEncPass) || (encCfg->m_LookAhead > 0 && pic->isPreAnalysis) ) // QP/rate control
766
0
    {
767
0
      const int rcQpDiff = (encCfg->m_RCTargetBitrate > 0 || (encCfg->m_LookAhead > 0 && pic->isPreAnalysis) ? sliceQP - averageAdaptedLumaQP : 0);
768
769
0
      averageAdaptedLumaQP = refineDeltaQpDistribution (pic, encCfg, sliceQP, sliceLambda, rcQpDiff, bitDepth, ctuStartAddr, ctuBoundingAddr,
770
0
                                                        meanLuma, slice->TLayer, slice->isIntra(), isEncPass, minNoiseLevels, ctuAvgLuma);
771
772
0
      pic->picInitialQP = Clip3 (0, MAX_QP, pic->picInitialQP + rcQpDiff); // used in applyQPAdaptationSubCtu
773
0
      pic->isMeanQPLimited = (encCfg->m_RCTargetBitrate > 0) && isEncPass && (averageAdaptedLumaQP > sliceQP);
774
0
    }
775
1.11k
    else if (encCfg->m_rateCap && (pic->gopAdaptedQP > 0) && (averageAdaptedLumaQP < aaQP)) // capped CQF
776
0
    {
777
0
      const int rcQpDiff = aaQP - averageAdaptedLumaQP;
778
779
0
      averageAdaptedLambda = pow (2.0, double (rcQpDiff) / 3.0);
780
0
      for (uint32_t ctuRsAddr = ctuStartAddr; ctuRsAddr < ctuBoundingAddr; ctuRsAddr++)
781
0
      {
782
0
        pic->ctuQpaLambda[ctuRsAddr] *= averageAdaptedLambda; // scale adapted lambda
783
0
        pic->ctuAdaptedQP[ctuRsAddr] = std::min (MAX_QP, pic->ctuAdaptedQP[ctuRsAddr] + rcQpDiff);
784
0
      }
785
786
0
      pic->picInitialQP = Clip3 (0, MAX_QP, pic->picInitialQP + rcQpDiff); // used in applyQPAdaptationSubCtu
787
0
      averageAdaptedLumaQP = aaQP;
788
0
    }
789
1.11k
    else if ((encCfg->m_RCTargetBitrate == 0) && (3 + encCfg->m_QP > MAX_QP_PERCEPT_QPA) && (encCfg->m_framesToBeEncoded != 1) && (averageAdaptedLumaQP + 1 < aaQP))
790
0
    {
791
0
      const int lrQpDiff = (aaQP - averageAdaptedLumaQP) >> (encCfg->m_QP <= MAX_QP_PERCEPT_QPA ? 2 : 1); // for monotonous rate change at low rates
792
793
0
      averageAdaptedLambda = pow (2.0, double (lrQpDiff) / 3.0);
794
0
      for (uint32_t ctuRsAddr = ctuStartAddr; ctuRsAddr < ctuBoundingAddr; ctuRsAddr++)
795
0
      {
796
0
        pic->ctuQpaLambda[ctuRsAddr] *= averageAdaptedLambda; // scale adapted lambda
797
0
        pic->ctuAdaptedQP[ctuRsAddr] = std::min (MAX_QP, pic->ctuAdaptedQP[ctuRsAddr] + lrQpDiff);
798
0
      }
799
800
0
      pic->picInitialQP = Clip3 (0, MAX_QP, pic->picInitialQP + lrQpDiff); // used in applyQPAdaptationSubCtu
801
0
      averageAdaptedLumaQP = aaQP; // TODO hlm: += lrQpDiff?
802
803
0
      pic->isMeanQPLimited = false;
804
0
    }
805
806
1.11k
    if (isEncPass) ctuAvgLuma.clear();
807
1.11k
  } // CTU-/frame-level QPA
808
809
1.11k
  return averageAdaptedLumaQP;
810
1.11k
}
811
812
int BitAllocation::applyQPAdaptationSubCtu (const Slice* slice, const VVEncCfg* encCfg, const Area& lumaArea, const uint8_t* minNoiseLevels)
813
3.36k
{
814
3.36k
  Picture* const pic          = (slice != nullptr ? slice->pic : nullptr);
815
3.36k
  uint32_t meanLuma           = MAX_UINT;
816
817
3.36k
  if (pic == nullptr || encCfg == nullptr)
818
0
  {
819
0
    return -1;
820
0
  }
821
822
3.36k
  const bool isEncPass        = (encCfg->m_LookAhead > 0 && !pic->isPreAnalysis);
823
3.36k
  const bool isHDR            = (encCfg->m_HdrMode != vvencHDRMode::VVENC_HDR_OFF) && !(encCfg->m_lumaReshapeEnable != 0 && encCfg->m_reshapeSignalType == RESHAPE_SIGNAL_PQ);
824
3.36k
  const bool isBIM            = (encCfg->m_blockImportanceMapping && !pic->m_picShared->m_ctuBimQpOffset.empty());
825
3.36k
  const bool isSccStrongRC    = ((encCfg->m_LookAhead > 0 || encCfg->m_RCNumPasses == 2) && pic->isSccStrong);
826
3.36k
  const bool isHighResolution = (std::min (encCfg->m_SourceWidth, encCfg->m_SourceHeight) > 1280);
827
3.36k
  const uint32_t hpFrameRate  = (pic->force2ndOrder ? 32 : encCfg->m_FrameRate / encCfg->m_FrameScale);
828
3.36k
  const int  bitDepth         = slice->sps->bitDepths[CH_L];
829
3.36k
  const PosType     guardSize = (isHighResolution ? 2 : 1);
830
3.36k
  const Position    pos       = lumaArea.pos();
831
3.36k
  const CompArea    subArea   = clipArea (CompArea (COMP_Y, pic->chromaFormat, Area (pos.x, pos.y, lumaArea.width, lumaArea.height)), pic->Y());
832
3.36k
  const SizeType    fltWidth  = lumaArea.width  + guardSize * (pos.x > 0 ? 2 : 1);
833
3.36k
  const SizeType    fltHeight = lumaArea.height + guardSize * (pos.y > 0 ? 2 : 1);
834
3.36k
  const CompArea    fltArea   = clipArea (CompArea (COMP_Y, pic->chromaFormat, Area (pos.x > 0 ? pos.x - guardSize : 0, pos.y > 0 ? pos.y - guardSize : 0, fltWidth, fltHeight)), pic->Y());
835
3.36k
  const CPelBuf     picOrig   = pic->getOrigBuf (fltArea);
836
3.36k
  const CPelBuf     picPrv1   = pic->getOrigBufPrev (fltArea, PREV_FRAME_1);
837
3.36k
  const CPelBuf     picPrv2   = pic->getOrigBufPrev (fltArea, PREV_FRAME_2);
838
3.36k
  const double hpEnerSubCTU   = filterAndCalculateAverageActivity (picOrig.buf, picOrig.stride, picOrig.height, picOrig.width,
839
3.36k
                                                                   picPrv1.buf, picPrv1.stride, picPrv2.buf, picPrv2.stride, hpFrameRate,
840
3.36k
                                                                   bitDepth, isHighResolution);
841
3.36k
  const double hpEnerPicNorm  = 1.0 / getAveragePictureActivity (encCfg->m_SourceWidth, encCfg->m_SourceHeight, 0,
842
3.36k
                                                                 (encCfg->m_internalUsePerceptQPATempFiltISlice || !slice->isIntra()), bitDepth);
843
3.36k
  int adaptedSubCtuQP = Clip3 (0, MAX_QP, pic->picInitialQP + apprI3Log2 (hpEnerSubCTU * hpEnerPicNorm, isSccStrongRC));
844
845
3.36k
  if (isChromaEnabled (pic->chromaFormat) && (adaptedSubCtuQP < MAX_QP))
846
3.36k
  {
847
3.36k
    adaptedSubCtuQP += getGlaringColorQPOffsetSubCtu (pic, subArea, slice->sps->bitDepths[CH_C], meanLuma);
848
849
3.36k
    if ((adaptedSubCtuQP > MAX_QP) && !isHDR) adaptedSubCtuQP = MAX_QP;
850
3.36k
  }
851
  // change the sub-CTU-level QP index based on sub-area's average luma value (Sharp)
852
3.36k
  if (isHDR)
853
0
  {
854
0
    if (meanLuma == MAX_UINT) meanLuma = pic->getOrigBuf (subArea).getAvg();
855
856
0
    adaptedSubCtuQP = Clip3 (0, MAX_QP, adaptedSubCtuQP + lumaDQPOffset (meanLuma, bitDepth));
857
0
  }
858
  // add additional delta-QP of block importance mapping (BIM) detection if available
859
3.36k
  if (isBIM)
860
0
  {
861
0
    adaptedSubCtuQP = Clip3 (-slice->sps->qpBDOffset[CH_L], MAX_QP, adaptedSubCtuQP + pic->m_picShared->m_ctuBimQpOffset[getCtuAddr (pos, *pic->cs->pcv)]);
862
0
  }
863
  // reduce the delta-QP variance, avoid wasting precious bit budget at low bit-rates
864
3.36k
  if ((encCfg->m_RCTargetBitrate == 0) && (3 + encCfg->m_QP > MAX_QP_PERCEPT_QPA) && (slice->sliceQp >= 0) && (encCfg->m_framesToBeEncoded != 1))
865
0
  {
866
0
    const int retunedAdLumaQP = adaptedSubCtuQP + 1;
867
868
0
    adaptedSubCtuQP = (std::max (0, 1 + MAX_QP_PERCEPT_QPA - encCfg->m_QP) * adaptedSubCtuQP + std::min (4, 3 + encCfg->m_QP - MAX_QP_PERCEPT_QPA) * slice->sliceQp + 2) >> 2;
869
0
    if (adaptedSubCtuQP > retunedAdLumaQP) adaptedSubCtuQP = retunedAdLumaQP;
870
0
    if (adaptedSubCtuQP < MAX_QP && encCfg->m_QP >= MAX_QP_PERCEPT_QPA) adaptedSubCtuQP++; // for monotonous rate change, l. 507
871
0
  }
872
3.36k
  if (isEncPass)
873
0
  {
874
0
    const double resRatio = sqrt (double (encCfg->m_SourceWidth * encCfg->m_SourceHeight) / (3840.0 * 2160.0));
875
876
0
    if (meanLuma == MAX_UINT) meanLuma = pic->getOrigBuf (subArea).getAvg();
877
0
    clipQPValToEstimatedMinimStats (minNoiseLevels, bitDepth, meanLuma, resRatio, (slice->isIntra() ? encCfg->m_intraQPOffset >> 1 : std::min (4, (int) slice->TLayer)), adaptedSubCtuQP);
878
0
  }
879
880
3.36k
  return adaptedSubCtuQP;
881
3.36k
}
882
883
int BitAllocation::getCtuPumpingReducingQP (const Slice* slice, const CPelBuf& origY, const Distortion uiSadBestForQPA,
884
                                            std::vector<int>& ctuPumpRedQP, const uint32_t ctuRsAddr, const int baseQP,
885
                                            const bool isBIM)
886
0
{
887
0
  if (slice == nullptr || !slice->pps->useDQP || ctuPumpRedQP.size() <= ctuRsAddr) return 0;
888
889
0
  const int32_t avgOrig = origY.getAvg();
890
0
  uint32_t sumAbsZmOrig = 0; // zero-mean
891
0
  const Pel* src = origY.buf;
892
893
0
  for (SizeType y = 0; y < origY.height; y++) // sum up the zero-mean absolute values
894
0
  {
895
0
    for (SizeType x = 0; x < origY.width; x++)
896
0
    {
897
0
      sumAbsZmOrig += (uint32_t) abs (src[x] - avgOrig);
898
0
    }
899
0
    src += origY.stride;
900
0
  }
901
902
0
  const double sumAbsRatio = double (uiSadBestForQPA * 3 /*TODO: or 4? fine-tune!*/) / double (sumAbsZmOrig == 0 ? 1 : sumAbsZmOrig);
903
0
  const int pumpingReducQP = ((isBIM ? -1 : 0) + int (log (Clip3 (0.25, 4.0, sumAbsRatio)) / log (2.0) + (sumAbsRatio < 1.0 ? -0.5 : 0.5))) >> (baseQP >= 38/*MAX_QP_PERCEPT_QPA*/ ? 1 : 0);
904
905
0
  ctuPumpRedQP[ctuRsAddr] += pumpingReducQP;
906
907
0
  return pumpingReducQP;
908
0
}
909
910
} // namespace vvenc
911
912
//! \}