Coverage Report

Created: 2026-05-30 06:10

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/vvenc/source/Lib/EncoderLib/BitAllocation.cpp
Line
Count
Source
1
/* -----------------------------------------------------------------------------
2
The copyright in this software is being made available under the Clear BSD
3
License, included below. No patent rights, trademark rights and/or
4
other Intellectual Property Rights other than the copyrights concerning
5
the Software are granted under this license.
6
7
The Clear BSD License
8
9
Copyright (c) 2019-2026, Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. & The VVenC Authors.
10
All rights reserved.
11
12
Redistribution and use in source and binary forms, with or without modification,
13
are permitted (subject to the limitations in the disclaimer below) provided that
14
the following conditions are met:
15
16
     * Redistributions of source code must retain the above copyright notice,
17
     this list of conditions and the following disclaimer.
18
19
     * Redistributions in binary form must reproduce the above copyright
20
     notice, this list of conditions and the following disclaimer in the
21
     documentation and/or other materials provided with the distribution.
22
23
     * Neither the name of the copyright holder nor the names of its
24
     contributors may be used to endorse or promote products derived from this
25
     software without specific prior written permission.
26
27
NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
28
THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
29
CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
31
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
32
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
33
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
34
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
35
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
36
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38
POSSIBILITY OF SUCH DAMAGE.
39
40
41
------------------------------------------------------------------------------------------- */
42
43
44
/** \file     BitAllocation.cpp
45
\brief    Bit allocation class for QP adaptation and, possibly, rate control
46
*/
47
48
#include "BitAllocation.h"
49
#include "EncStage.h"
50
#include "CommonLib/Picture.h"
51
#include "CommonLib/UnitTools.h"
52
#include <math.h>
53
54
#include "vvenc/vvencCfg.h"
55
56
57
//! \ingroup EncoderLib
58
//! \{
59
60
namespace vvenc {
61
62
// static functions
63
64
static inline int apprI3Log2 (const double d, const bool isSccStrong) // rounded 2*log2(d) or 3*log2(d)
65
11.6k
{
66
11.6k
  const double weight = (isSccStrong ? 2.0 : 3.0);
67
11.6k
  const double dLimit = (isSccStrong ? 5.5e-20 : 1.5e-13);
68
69
11.6k
  return d < dLimit ? -128 : int (floor (weight * log (d) / log (2.0) + 0.5));
70
11.6k
}
71
72
static inline int lumaDQPOffset (const uint32_t avgLumaValue, const uint32_t bitDepth)
73
0
{
74
0
  if (bitDepth > 16 || avgLumaValue >= (1u << bitDepth)) return 0;
75
#if 0
76
  // mapping for peak luminance of ca. 3*400 = 1200 nits
77
  return (2 - int ((9 * uint64_t (avgLumaValue * avgLumaValue)) >> uint64_t (2 * bitDepth)));
78
#else
79
  // mapping for peak luminance of ca. 2*400 =  800 nits
80
0
  return (1 - int ((6 * uint64_t (avgLumaValue * avgLumaValue)) >> uint64_t (2 * bitDepth)));
81
0
#endif
82
0
}
83
84
void calcSpatialVisAct ( const Pel* pSrc,
85
                         const int iSrcStride,
86
                         const int height,
87
                         const int width,
88
                         const uint32_t bitDepth,
89
                         const bool isUHD,
90
                         VisAct& va )
91
11.6k
{
92
11.6k
  CHECK( pSrc == nullptr, "no source buffer given to calculate temporal visual activity" );
93
94
11.6k
  uint64_t saAct;  // spatial absolute activity sum
95
96
  // skip first row as there may be a black border frame
97
11.6k
  pSrc += iSrcStride;
98
99
  // center rows
100
11.6k
  if (isUHD) // high-pass with downsampling
101
0
  {
102
0
    pSrc += iSrcStride;
103
104
0
    saAct = g_pelBufOP.AvgHighPassWithDownsampling (width, height, pSrc, iSrcStride);
105
106
0
    va.hpSpatAct = double (saAct) / double ((width - 4) * (height - 4));
107
0
  }
108
11.6k
  else // HD high-pass without downsampling
109
11.6k
  {
110
11.6k
    saAct = g_pelBufOP.AvgHighPass (width, height, pSrc, iSrcStride);
111
112
11.6k
    va.hpSpatAct = double (saAct) / double ((width - 2) * (height - 2));
113
11.6k
  }
114
115
  // spatial in 12 bit
116
11.6k
  va.spatAct = unsigned (0.5 + va.hpSpatAct * double (bitDepth < 12 ? 1 << (12 - bitDepth) : 1));
117
11.6k
}
118
119
void calcTemporalVisAct ( const Pel* pSrc,
120
                          const int iSrcStride,
121
                          const int height,
122
                          const int width,
123
                          const Pel* pSM1,
124
                          const int iSM1Stride,
125
                          const Pel* pSM2,
126
                          const int iSM2Stride,
127
                          uint32_t frameRate,
128
                          const uint32_t bitDepth,
129
                          const bool isUHD,
130
                          VisAct& va )
131
10.3k
{
132
10.3k
  CHECK( pSrc == nullptr, "no source buffer given to calculate temporal visual activity" );
133
10.3k
  CHECK( pSM1 == nullptr, "no compare buffer given to calculate temporal visual activity" );
134
135
10.3k
  const Pel* pS0 = pSrc;
136
10.3k
  uint64_t taAct;  // temporal absolute activity sum
137
138
  // force 1st-order delta if only prev. frame available
139
10.3k
  if (pSM2 == nullptr || iSM2Stride <= 0) frameRate = 24;
140
141
  // skip first row as there may be a black border frame
142
10.3k
  pSrc += iSrcStride;
143
144
  // center rows
145
10.3k
  if (pS0 == pSM1 && frameRate <= 31)
146
10.3k
  {
147
10.3k
    va.hpTempAct = 0; // bypass high-pass, result will be zero
148
10.3k
  }
149
0
  else if (isUHD)  // downsampled high-pass
150
0
  {
151
0
    const int i2M1Stride = iSM1Stride * 2;
152
153
0
    CHECK (pSM1 == nullptr || iSM1Stride <= 0 || iSM1Stride < width, "Pel buffer pointer pSM1 must not be null!");
154
155
0
    pSrc += iSrcStride;
156
0
    pSM1 += i2M1Stride;
157
0
    if (frameRate <= 31) // 1st-order delta
158
0
    {
159
0
      taAct = g_pelBufOP.AvgHighPassWithDownsamplingDiff1st (width, height, pSrc, pSM1, iSrcStride, iSM1Stride);
160
0
    }
161
0
    else // 2nd-order delta (diff of diffs)
162
0
    {
163
0
      const int i2M2Stride = iSM2Stride * 2;
164
165
0
      CHECK (pSM2 == nullptr || iSM2Stride <= 0 || iSM2Stride < width, "Pel buffer pointer pSM2 must not be null!");
166
167
0
      pSM2 += i2M2Stride;
168
0
      taAct = g_pelBufOP.AvgHighPassWithDownsamplingDiff2nd (width, height, pSrc, pSM1, pSM2, iSrcStride, iSM1Stride, iSM2Stride);
169
0
    }
170
171
0
    va.hpTempAct = double (taAct) / double ((width - 4) * (height - 4));
172
0
  }
173
0
  else // HD high-pass without downsampling
174
0
  {
175
0
    CHECK (pSM1 == nullptr || iSM1Stride <= 0 || iSM1Stride < width, "Pel buffer pointer pSM1 must not be null!");
176
177
0
    pSM1 += iSM1Stride;
178
0
    if (frameRate <= 31) // 1st-order delta
179
0
    {
180
0
      taAct = g_pelBufOP.HDHighPass (width, height, pSrc, pSM1, iSrcStride, iSM1Stride);
181
0
    }
182
0
    else // 2nd-order delta (diff of diffs)
183
0
    {
184
0
      CHECK (pSM2 == nullptr || iSM2Stride <= 0 || iSM2Stride < width, "Pel buffer pointer pSM2 must not be null!");
185
186
0
      pSM2 += iSM2Stride;
187
0
      taAct = g_pelBufOP.HDHighPass2 (width, height, pSrc, pSM1, pSM2, iSrcStride, iSM1Stride, iSM2Stride);
188
0
    }
189
190
0
    va.hpTempAct = double (taAct) / double ((width - 2) * (height - 2));
191
0
  }
192
193
  // temporal in 12 bit
194
10.3k
  va.tempAct = unsigned (0.5 + va.hpTempAct * double (bitDepth < 12 ? 1 << (12 - bitDepth) : 1) * (frameRate <= 31 ? 1.15625 : 1.0));
195
10.3k
}
196
197
void updateVisAct ( VisAct& va, const uint32_t bitDepth )
198
10.3k
{
199
  // minimum part in 12 bit
200
10.3k
  va.minAct = std::min( va.tempAct, va.spatAct );
201
  // lower limit, compensate for high-pass amplification
202
10.3k
  va.hpVisAct = std::max (double (1 << (bitDepth - 6)), va.hpSpatAct + 2.0 * va.hpTempAct);
203
10.3k
  va.visAct   = ClipBD( uint16_t( 0.5 + va.hpVisAct ), bitDepth );
204
10.3k
}
205
206
double filterAndCalculateAverageActivity ( const Pel* pSrc,
207
                                           const int iSrcStride,
208
                                           const int height,
209
                                           const int width,
210
                                           const Pel* pSM1,
211
                                           const int iSM1Stride,
212
                                           const Pel* pSM2,
213
                                           const int iSM2Stride,
214
                                           uint32_t frameRate,
215
                                           const uint32_t bitDepth,
216
                                           const bool isUHD,
217
                                           unsigned* minVisAct = nullptr,
218
                                           unsigned* spVisAct  = nullptr )
219
10.3k
{
220
10.3k
  VisAct va;
221
222
  // spatial activity
223
10.3k
  calcSpatialVisAct( pSrc, iSrcStride, height, width, bitDepth, isUHD, va );
224
225
  // temporal activity
226
10.3k
  calcTemporalVisAct( pSrc, iSrcStride, height, width, pSM1, iSM1Stride, pSM2, iSM2Stride,
227
10.3k
                     frameRate, bitDepth, isUHD, va );
228
229
  // minimum and visual activity
230
10.3k
  updateVisAct( va, bitDepth );
231
232
10.3k
  if( minVisAct )
233
4.05k
  {
234
4.05k
    *minVisAct = va.minAct;
235
4.05k
  }
236
10.3k
  if( spVisAct )
237
4.05k
  {
238
4.05k
    *spVisAct = va.spatAct;
239
4.05k
  }
240
241
10.3k
  return va.hpVisAct;
242
10.3k
}
243
244
static double getAveragePictureActivity (const uint32_t picWidth,  const uint32_t picHeight,
245
                                         const int scaledAverageGopActivity,
246
                                         const bool tempFiltering, const uint32_t bitDepth)
247
5.00k
{
248
5.00k
  if (scaledAverageGopActivity > 0)
249
0
  {
250
0
    return (double (scaledAverageGopActivity) / double (1 << (24 - bitDepth)));
251
0
  }
252
5.00k
  const double hpEnerPic = (tempFiltering ? 32.0 : 16.0) * double (1 << (2 * bitDepth - 10)) * sqrt ((3840.0 * 2160.0) / double (picWidth * picHeight));
253
254
5.00k
  return sqrt (hpEnerPic); // square-root of a_pic value
255
5.00k
}
256
257
static int getGlaringColorQPOffset (Picture* const pic, const int ctuAddr, const int bitDepth, uint32_t &avgLumaValue)
258
5.34k
{
259
5.34k
  const PreCalcValues& pcv  = *pic->cs->pcv;
260
5.34k
  const ChromaFormat chrFmt = pic->chromaFormat;
261
5.34k
  const SizeType chrWidth   = pcv.maxCUSize >> getChannelTypeScaleX (CH_C, chrFmt);
262
5.34k
  const SizeType chrHeight  = pcv.maxCUSize >> getChannelTypeScaleY (CH_C, chrFmt);
263
5.34k
  const unsigned w          = pcv.widthInCtus;
264
5.34k
  const int      midLevel   = 1 << (bitDepth - 1);
265
5.34k
  int chrValue = MAX_INT;
266
267
5.34k
  avgLumaValue = uint32_t ((ctuAddr >= 0) ? pic->ctuAdaptedQP[ctuAddr] : pic->getOrigBuf().Y().getAvg());
268
269
16.0k
  for (uint32_t comp = COMP_Cb; comp < MAX_NUM_COMP; comp++)
270
10.6k
  {
271
10.6k
    const ComponentID compID = (ComponentID) comp;
272
10.6k
    int avgCompValue;
273
274
10.6k
    if (ctuAddr >= 0) // chroma
275
8.10k
    {
276
8.10k
      const CompArea chrArea = clipArea (CompArea (compID, chrFmt, Area ((ctuAddr % w) * chrWidth, (ctuAddr / w) * chrHeight, chrWidth, chrHeight)), pic->block (compID));
277
278
8.10k
      avgCompValue = pic->getOrigBuf (chrArea).getAvg();
279
8.10k
    }
280
2.59k
    else avgCompValue = pic->getOrigBuf (pic->block (compID)).getAvg();
281
282
10.6k
    if (chrValue > avgCompValue) chrValue = avgCompValue; // minimum of the DC offsets
283
10.6k
  }
284
5.34k
  CHECK (chrValue < 0, "mean chroma value cannot be negative!");
285
286
5.34k
  chrValue = (int) avgLumaValue - chrValue;
287
288
5.34k
  if (chrValue > midLevel) return apprI3Log2 (double (chrValue * chrValue) / double (midLevel * midLevel), pic->isSccStrong);
289
290
5.34k
  return 0;
291
5.34k
}
292
293
static int getGlaringColorQPOffsetSubCtu (Picture* const pic, const CompArea& lumaArea, const int bitDepth, uint32_t &avgLumaValue)
294
3.70k
{
295
3.70k
  const ChromaFormat chrFmt = pic->chromaFormat;
296
3.70k
  const SizeType chrWidth   = lumaArea.width  >> getChannelTypeScaleX (CH_C, chrFmt);
297
3.70k
  const SizeType chrHeight  = lumaArea.height >> getChannelTypeScaleY (CH_C, chrFmt);
298
3.70k
  const PosType  chrPosX    = lumaArea.x >> getChannelTypeScaleX (CH_C, chrFmt);
299
3.70k
  const PosType  chrPosY    = lumaArea.y >> getChannelTypeScaleY (CH_C, chrFmt);
300
3.70k
  const int      midLevel   = 1 << (bitDepth - 1);
301
3.70k
  int chrValue = MAX_INT;
302
303
3.70k
  avgLumaValue = pic->getOrigBuf (lumaArea).getAvg();
304
305
11.1k
  for (uint32_t comp = COMP_Cb; comp < MAX_NUM_COMP; comp++)
306
7.40k
  {
307
7.40k
    const ComponentID compID = (ComponentID) comp;
308
7.40k
    const CompArea   chrArea = clipArea (CompArea (compID, chrFmt, Area (chrPosX, chrPosY, chrWidth, chrHeight)), pic->block (compID));
309
310
7.40k
    int avgCompValue = pic->getOrigBuf (chrArea).getAvg();
311
312
7.40k
    if (chrValue > avgCompValue) chrValue = avgCompValue; // minimum of the DC offsets
313
7.40k
  }
314
3.70k
  CHECK (chrValue < 0, "mean chroma value cannot be negative!");
315
316
3.70k
  chrValue = (int) avgLumaValue - chrValue;
317
318
3.70k
  if (chrValue > midLevel) return apprI3Log2 (double (chrValue * chrValue) / double (midLevel * midLevel), pic->isSccStrong);
319
320
3.70k
  return 0;
321
3.70k
}
322
323
static void updateMinNoiseLevelsPic (uint8_t* const minNoiseLevels, const int bitDepth, const unsigned avgValue, const unsigned noise)
324
0
{
325
0
  const unsigned avgIndex = avgValue >> (bitDepth - 3); // one of 8 mean level regions
326
327
0
  CHECK (avgIndex >= QPA_MAX_NOISE_LEVELS, "array index out of bounds");
328
329
0
  if (noise < (unsigned) minNoiseLevels[avgIndex])
330
0
  {
331
0
    minNoiseLevels[avgIndex] = (uint8_t) noise;
332
0
  }
333
0
}
334
335
static void clipQPValToEstimatedMinimStats (const uint8_t* minNoiseLevels, const int bitDepth, const unsigned avgValue,
336
                                            const double resFac, const int extraQPOffset, int& QP) // output QP
337
0
{
338
0
  const unsigned avgIndex = avgValue >> (bitDepth - 3); // one of 8 mean level regions
339
0
  const unsigned x = (1 << 3) - 1;
340
0
  const int32_t dQPOffset = -15;
341
342
0
  CHECK (avgIndex >= QPA_MAX_NOISE_LEVELS, "array index out of bounds");
343
344
0
  int i = minNoiseLevels[avgIndex];
345
346
  // try to "fill in the blanks" in luma range (also results in peak smoothing, as described in PCS 2022 paper)
347
0
  if (avgIndex == 0 && i > minNoiseLevels[0 + 1]) i = minNoiseLevels[0 + 1];
348
0
  if (avgIndex == x && i > minNoiseLevels[x - 1]) i = minNoiseLevels[x - 1];
349
350
0
  if (avgIndex > 0 && avgIndex < x)
351
0
  {
352
0
    const uint8_t maxNeighborNoiseLevel = std::max (minNoiseLevels[avgIndex - 1], minNoiseLevels[avgIndex + 1]);
353
354
0
    if (i > maxNeighborNoiseLevel) i = maxNeighborNoiseLevel;
355
0
  }
356
0
  if (i >= 255)
357
0
  {
358
0
    return;
359
0
  }
360
361
0
  i = std::max (0, apprI3Log2 (std::min (1.0, resFac) * i * i, false) + dQPOffset + extraQPOffset); // = 6*log2
362
0
  if (QP < i)
363
0
  {
364
0
    QP = i;
365
0
  }
366
0
}
367
368
static int applyDeltaQpPeakSmoothing (Picture* const pic, const VVEncCfg* encCfg, const uint32_t startAddr, const uint32_t endAddr)
369
871
{
370
871
  const uint32_t ctuWdt = pic->cs->pcv->widthInCtus;
371
871
  const uint32_t ctuEnd = endAddr - ctuWdt;
372
871
  int deltaQpSum = 0, preSmoothQP = 0;
373
871
  std::vector<int> prevQP;
374
375
871
  if (ctuWdt == 0 || endAddr <= startAddr + 3u * ctuWdt) return 0;
376
377
0
  prevQP.resize (ctuWdt);
378
379
0
  for (uint32_t ctuRsAddr = startAddr; ctuRsAddr < ctuEnd; ctuRsAddr++)
380
0
  {
381
0
    const int32_t idx = ctuRsAddr % ctuWdt;
382
383
0
    if (ctuRsAddr < ctuWdt) prevQP[idx] = pic->ctuAdaptedQP[ctuRsAddr];
384
0
    else if (idx == 0)      preSmoothQP = pic->ctuAdaptedQP[ctuRsAddr];
385
0
    else if (idx == ctuWdt - 1)
386
0
    {
387
0
      prevQP[idx - 1] = preSmoothQP;
388
0
      prevQP[idx] = pic->ctuAdaptedQP[ctuRsAddr];
389
0
    }
390
0
    else // no boundary CTU
391
0
    {
392
0
      const int32_t nextIdx = ctuRsAddr + ctuWdt; // max. and min. in 3x3 neighborhood
393
0
      int qpMax = std::max (preSmoothQP, pic->ctuAdaptedQP[ctuRsAddr + 1]);
394
0
      int qpMin = std::min (preSmoothQP, pic->ctuAdaptedQP[ctuRsAddr + 1]);
395
396
0
      for (int32_t i = -1; i <= 1; i++)
397
0
      {
398
0
        qpMax = std::max (qpMax, std::max (prevQP[idx + i], pic->ctuAdaptedQP[nextIdx + i]));
399
0
        qpMin = std::min (qpMin, std::min (prevQP[idx + i], pic->ctuAdaptedQP[nextIdx + i]));
400
0
      }
401
0
      prevQP[idx - 1] = preSmoothQP;
402
0
      preSmoothQP = pic->ctuAdaptedQP[ctuRsAddr];
403
404
0
      if (preSmoothQP > qpMax)
405
0
      {
406
0
        pic->ctuQpaLambda[ctuRsAddr] *= 0.793701; // peak: decrease adapted lambda, QP
407
0
        pic->ctuAdaptedQP[ctuRsAddr]--;
408
0
        deltaQpSum--;
409
0
      }
410
0
      if (preSmoothQP < qpMin)
411
0
      {
412
0
        pic->ctuQpaLambda[ctuRsAddr] *= 1.259921; // hole: increase adapted lambda, QP
413
0
        pic->ctuAdaptedQP[ctuRsAddr]++;
414
0
        deltaQpSum++;
415
0
      }
416
0
    }
417
0
  }
418
419
0
  prevQP.clear();
420
421
0
  return deltaQpSum;
422
871
}
423
424
static int refineDeltaQpDistribution (Picture* const pic, const VVEncCfg* encCfg,   const int sliceQP,
425
                                      const double sliceLambda, const int rcQpDiff, const int bitDepth,
426
                                      const uint32_t startAddr, const uint32_t endAddr, const int qpSum,
427
                                      const uint32_t tempLayer, const bool isIntra, const bool isEncPass,
428
                                      const uint8_t* minNoiseLevels, std::vector<int>& ctuAvgLuma)
429
0
{
430
0
  const double resRatio = (isEncPass ? sqrt (double (encCfg->m_SourceWidth * encCfg->m_SourceHeight) / (3840.0 * 2160.0)) : 0.0);
431
0
  const int ctusInSlice = int (endAddr - startAddr);
432
0
  const int targetQpSum = (encCfg->m_RCTargetBitrate > 0 ? sliceQP * ctusInSlice : qpSum);
433
0
  int blockQpSum = 0, tempLumaQP;
434
0
  double blockLambda;
435
0
  bool isLimited = false;
436
437
0
  for (uint32_t ctuRsAddr = startAddr; ctuRsAddr < endAddr; ctuRsAddr++)
438
0
  {
439
0
    int clippedLumaQP = std::max (0, pic->ctuAdaptedQP[ctuRsAddr] + rcQpDiff);
440
441
0
    if (isEncPass)
442
0
    {
443
0
      tempLumaQP = clippedLumaQP; // CTU QP before clipping for diff calculation below
444
445
0
      clipQPValToEstimatedMinimStats (minNoiseLevels, bitDepth, ctuAvgLuma[ctuRsAddr - startAddr], resRatio, (isIntra ? encCfg->m_intraQPOffset >> 1 : std::min (4, (int) tempLayer)), clippedLumaQP);
446
0
      if (clippedLumaQP > tempLumaQP)
447
0
      {
448
0
        ctuAvgLuma[ctuRsAddr - startAddr] = -1; // mark CTU as being processed already
449
0
        isLimited = isEncPass;
450
0
      }
451
0
    }
452
453
0
    clippedLumaQP = std::min (MAX_QP, clippedLumaQP);
454
455
0
    blockLambda = sliceLambda * pow (2.0, double (clippedLumaQP - sliceQP) / 3.0);
456
0
    blockQpSum += clippedLumaQP;
457
458
0
    pic->ctuQpaLambda[ctuRsAddr] = blockLambda;  // store modified CTU lambdas and QPs
459
0
    pic->ctuAdaptedQP[ctuRsAddr] = clippedLumaQP;
460
0
  }
461
462
0
  if (blockQpSum > targetQpSum && isLimited) // CTU QPs limited, so distribute saved rate among nonlimited CTUs
463
0
  {
464
0
    int maxCtuQP = 0, minCtuQP = MAX_QP;
465
466
0
    for (uint32_t ctuRsAddr = startAddr; ctuRsAddr < endAddr; ctuRsAddr++) // find max
467
0
    {
468
0
      if (ctuAvgLuma[ctuRsAddr - startAddr] >= 0 && pic->ctuAdaptedQP[ctuRsAddr] > maxCtuQP) // nonlimited CTUs
469
0
      {
470
0
        maxCtuQP = pic->ctuAdaptedQP[ctuRsAddr];
471
0
      }
472
0
      if (pic->ctuAdaptedQP[ctuRsAddr] < minCtuQP)
473
0
      {
474
0
        minCtuQP = pic->ctuAdaptedQP[ctuRsAddr];
475
0
      }
476
0
    }
477
478
0
    minCtuQP = std::max (0, minCtuQP);
479
480
0
    while (maxCtuQP > minCtuQP && blockQpSum > targetQpSum) // spend rate starting at max QPs, then go downward
481
0
    {
482
0
      for (uint32_t ctuRsAddr = startAddr; ctuRsAddr < endAddr; ctuRsAddr++) // reduce
483
0
      {
484
0
        if (ctuAvgLuma[ctuRsAddr - startAddr] >= 0 && pic->ctuAdaptedQP[ctuRsAddr] == maxCtuQP)
485
0
        {
486
0
          tempLumaQP = std::max (0, pic->ctuAdaptedQP[ctuRsAddr] - 1);
487
488
0
          ctuAvgLuma[ctuRsAddr - startAddr] = -1; // mark CTU as being reduced already
489
0
          blockLambda = sliceLambda * pow (2.0, double (tempLumaQP - sliceQP) / 3.0);
490
0
          if (tempLumaQP < pic->ctuAdaptedQP[ctuRsAddr]) blockQpSum--;
491
492
0
          pic->ctuQpaLambda[ctuRsAddr] = blockLambda; // store reduced lambdas and QPs
493
0
          pic->ctuAdaptedQP[ctuRsAddr] = tempLumaQP;
494
0
        }
495
496
0
        if (blockQpSum <= targetQpSum) break;
497
0
      }
498
499
0
      maxCtuQP--;
500
0
    }
501
0
  }
502
503
0
  return (blockQpSum + (ctusInSlice >> 1)) / ctusInSlice;
504
0
}
505
506
// public functions
507
508
int BitAllocation::applyQPAdaptationSlice (const Slice* slice, const VVEncCfg* encCfg, const int sliceQP,
509
                                           const double sliceLambda, uint16_t* const picVisActLuma,
510
                                           std::vector<int>& ctuPumpRedQP, std::vector<uint8_t>* ctuRCQPMemory,
511
                                           int* const optChromaQPOffsets, const uint8_t* minNoiseLevels,
512
                                           const uint32_t ctuStartAddr, const uint32_t ctuBoundingAddr)
513
1.29k
{
514
1.29k
  Picture* const pic          = (slice != nullptr ? slice->pic : nullptr);
515
1.29k
  double hpEner[MAX_NUM_COMP] = {0.0, 0.0, 0.0};
516
1.29k
  double averageAdaptedLambda = 0.0;
517
1.29k
  int    averageAdaptedLumaQP = -1;
518
1.29k
  uint32_t meanLuma           = MAX_UINT;
519
1.29k
  std::vector<int> ctuAvgLuma;
520
521
1.29k
  if (pic == nullptr || pic->cs == nullptr || encCfg == nullptr || ctuStartAddr >= ctuBoundingAddr)
522
0
  {
523
0
    return -1;
524
0
  }
525
526
1.29k
  const bool isEncPass        = (encCfg->m_LookAhead > 0 && !pic->isPreAnalysis);
527
1.29k
  const bool isHDR            = (encCfg->m_HdrMode != vvencHDRMode::VVENC_HDR_OFF) && !(encCfg->m_lumaReshapeEnable != 0 && encCfg->m_reshapeSignalType == RESHAPE_SIGNAL_PQ);
528
1.29k
  const bool isBIM            = (encCfg->m_blockImportanceMapping && !pic->m_picShared->m_ctuBimQpOffset.empty());
529
1.29k
  const bool isSccStrongRC    = ((encCfg->m_LookAhead > 0 || encCfg->m_RCNumPasses == 2) && pic->isSccStrong);
530
1.29k
  const bool isHighResolution = (std::min (encCfg->m_SourceWidth, encCfg->m_SourceHeight) > 1280);
531
1.29k
  const bool useFrameWiseQPA  = (encCfg->m_QP > MAX_QP_PERCEPT_QPA) && (encCfg->m_framesToBeEncoded != 1) && (slice->TLayer > 0);
532
1.29k
  const uint32_t hpFrameRate  = (pic->force2ndOrder ? 32 : encCfg->m_FrameRate / encCfg->m_FrameScale);
533
1.29k
  const int  bitDepth         = slice->sps->bitDepths[CH_L];
534
1.29k
  double hpEnerPicNorm        = 1.0 / getAveragePictureActivity (encCfg->m_SourceWidth, encCfg->m_SourceHeight, (encCfg->m_RCNumPasses == 2 ? 0 : ctuPumpRedQP.back()),
535
1.29k
                                                                 (encCfg->m_internalUsePerceptQPATempFiltISlice || !slice->isIntra()), bitDepth);
536
1.29k
  const PreCalcValues& pcv    = *pic->cs->pcv;
537
538
1.29k
  pic->picInitialQP = sliceQP;  // modified below and used in applyQPAdaptationSubCtu
539
1.29k
  if ((encCfg->m_RCTargetBitrate > 0) && useFrameWiseQPA)
540
0
  {
541
0
    averageAdaptedLumaQP = Clip3 (0, MAX_QP, sliceQP - 1); // one will be added again
542
0
  }
543
544
5.19k
  for (uint32_t comp = 0; comp < getNumberValidComponents (pic->chromaFormat); comp++)
545
3.89k
  {
546
3.89k
    const ComponentID compID  = (ComponentID) comp;
547
548
3.89k
    if (isLuma (compID)) // luma: CTU-wise QPA operation
549
1.29k
    {
550
1.29k
      const PosType guardSize = (isHighResolution ? 2 : 1);
551
1.29k
      unsigned zeroMinActCTUs = 0, picSpVisAct = 0;
552
553
5.34k
      for (uint32_t ctuRsAddr = ctuStartAddr; ctuRsAddr < ctuBoundingAddr; ctuRsAddr++)
554
4.05k
      {
555
4.05k
        const Position pos ((ctuRsAddr % pcv.widthInCtus) * pcv.maxCUSize, (ctuRsAddr / pcv.widthInCtus) * pcv.maxCUSize);
556
4.05k
        const CompArea ctuArea   = clipArea (CompArea (COMP_Y, pic->chromaFormat, Area (pos.x, pos.y, pcv.maxCUSize, pcv.maxCUSize)), pic->Y());
557
4.05k
        const SizeType fltWidth  = pcv.maxCUSize + guardSize * (pos.x > 0 ? 2 : 1);
558
4.05k
        const SizeType fltHeight = pcv.maxCUSize + guardSize * (pos.y > 0 ? 2 : 1);
559
4.05k
        const CompArea fltArea   = clipArea (CompArea (COMP_Y, pic->chromaFormat, Area (pos.x > 0 ? pos.x - guardSize : 0, pos.y > 0 ? pos.y - guardSize : 0, fltWidth, fltHeight)), pic->Y());
560
4.05k
        const CPelBuf  picOrig   = pic->getOrigBuf (fltArea);
561
4.05k
        const CPelBuf  picPrv1   = pic->getOrigBufPrev (fltArea, PREV_FRAME_1);
562
4.05k
        const CPelBuf  picPrv2   = pic->getOrigBufPrev (fltArea, PREV_FRAME_2);
563
4.05k
        unsigned minActivityPart = 0, spVisActCTU = 0;
564
565
4.05k
        hpEner[1] = filterAndCalculateAverageActivity (picOrig.buf, picOrig.stride, picOrig.height, picOrig.width,
566
4.05k
                                                       picPrv1.buf, picPrv1.stride, picPrv2.buf, picPrv2.stride, hpFrameRate,
567
4.05k
                                                       bitDepth, isHighResolution, &minActivityPart, &spVisActCTU);
568
569
4.05k
        if (minActivityPart == 0) zeroMinActCTUs++;
570
571
4.05k
        hpEner[comp] += hpEner[1] * double (ctuArea.width * ctuArea.height);
572
4.05k
        pic->ctuQpaLambda[ctuRsAddr] = hpEner[1]; // temporary backup of CTU mean visual activity
573
4.05k
        pic->ctuAdaptedQP[ctuRsAddr] = (int) pic->getOrigBuf (ctuArea).getAvg(); // and mean luma
574
575
4.05k
        if (picOrig.buf == picPrv1.buf) // replace temporal visual activity with min motion error
576
4.05k
        {
577
4.05k
          hpEner[1] = pic->m_picShared->m_minNoiseLevels[pic->ctuAdaptedQP[ctuRsAddr] >> (bitDepth - 3)] * (bitDepth >= 10 ? 1.5 : 0.375);
578
579
4.05k
          if (hpEner[1] < (bitDepth >= 10 ? 382.5 : 95.625)) // levels in first frame
580
0
          {
581
0
            hpEner[comp] += hpEner[1] * double (ctuArea.width * ctuArea.height);
582
0
            pic->ctuQpaLambda[ctuRsAddr] += hpEner[1]; // add noise level to mean visual activity
583
0
          }
584
4.05k
        }
585
0
        else if (!isEncPass && (encCfg->m_RCNumPasses == 2 || pic->gopEntry->m_mctfIndex < 0 || !pic->gopEntry->m_isStartOfGop))
586
0
        {
587
0
          updateMinNoiseLevelsPic (pic->m_picShared->m_minNoiseLevels, bitDepth, pic->ctuAdaptedQP[ctuRsAddr], minActivityPart);
588
0
        }
589
4.05k
        picSpVisAct += spVisActCTU;
590
4.05k
      }
591
592
1.29k
      hpEner[comp] /= double (encCfg->m_SourceWidth * encCfg->m_SourceHeight);
593
1.29k
      if (picVisActLuma != nullptr)
594
1.29k
      {
595
1.29k
        *picVisActLuma = ClipBD (uint16_t (0.5 + hpEner[comp]), bitDepth);
596
1.29k
      }
597
1.29k
      if (ctuBoundingAddr > ctuStartAddr)
598
1.29k
      {
599
1.29k
        const uint32_t nCtu = ctuBoundingAddr - ctuStartAddr;
600
601
1.29k
        pic->picVA.spatAct[ CH_L ] = ClipBD (uint16_t ((picSpVisAct + (nCtu >> 1)) / nCtu), 12);
602
1.29k
      }
603
1.29k
      if (encCfg->m_internalUsePerceptQPATempFiltISlice && slice->isIntra() && pic->getOrigBuf (compID).buf != pic->getOrigBufPrev (compID, PREV_FRAME_1).buf && zeroMinActCTUs * 2 > ctuBoundingAddr - ctuStartAddr)
604
0
      {
605
0
        hpEnerPicNorm *= sqrt (zeroMinActCTUs * 2.0 / float (ctuBoundingAddr - ctuStartAddr)); // frozen-image mode
606
0
      }
607
1.29k
    }
608
2.59k
    else // chroma: only picture-wise operation required
609
2.59k
    {
610
2.59k
      const CPelBuf picOrig = pic->getOrigBuf (compID);
611
2.59k
      const CPelBuf picPrv1 = pic->getOrigBufPrev (compID, PREV_FRAME_1);
612
2.59k
      const CPelBuf picPrv2 = pic->getOrigBufPrev (compID, PREV_FRAME_2);
613
614
2.59k
      hpEner[comp] = filterAndCalculateAverageActivity (picOrig.buf, picOrig.stride, picOrig.height, picOrig.width,
615
2.59k
                                                        picPrv1.buf, picPrv1.stride, picPrv2.buf, picPrv2.stride, hpFrameRate,
616
2.59k
                                                        bitDepth, isHighResolution && (pic->chromaFormat == CHROMA_444));
617
618
2.59k
      const int adaptChromaQPOffset = 1.5 * hpEner[comp] <= hpEner[0] ? 0 : apprI3Log2 (1.5 * hpEner[comp] / hpEner[0], pic->isSccStrong);
619
620
2.59k
      if (averageAdaptedLumaQP < 0) // YUV is not 4:0:0!
621
1.29k
      {
622
1.29k
        averageAdaptedLumaQP = Clip3 (0, MAX_QP, sliceQP + apprI3Log2 (hpEner[0] * hpEnerPicNorm, isSccStrongRC));
623
624
1.29k
        if (isChromaEnabled (pic->chromaFormat) && (averageAdaptedLumaQP < MAX_QP))
625
1.29k
        {
626
1.29k
          averageAdaptedLumaQP += getGlaringColorQPOffset (pic, -1 /*ctuAddr*/, slice->sps->bitDepths[CH_C], meanLuma);
627
628
1.29k
          if ((averageAdaptedLumaQP > MAX_QP) && !isHDR) averageAdaptedLumaQP = MAX_QP;
629
1.29k
        }
630
        // change mean picture QP index based on picture's average luma value (Sharp)
631
1.29k
        if (isHDR)
632
0
        {
633
0
          if (meanLuma == MAX_UINT) meanLuma = pic->getOrigBuf().Y().getAvg();
634
635
0
          averageAdaptedLumaQP = Clip3 (0, MAX_QP, averageAdaptedLumaQP + lumaDQPOffset (meanLuma, bitDepth));
636
0
        }
637
1.29k
      }
638
639
2.59k
      if (optChromaQPOffsets != nullptr) // adapts sliceChromaQpOffsetIntraOrPeriodic
640
2.59k
      {
641
        GCC_WARNING_DISABLE_maybe_uninitialized // probably spurious warning, when building with -fsanitize=undefined: "error: 'encCfg.33' may be used uninitialized in this function"
642
2.59k
        const int lumaChromaMappingDQP = (averageAdaptedLumaQP - slice->sps->chromaQpMappingTable.getMappedChromaQpValue (compID, averageAdaptedLumaQP)) >> (encCfg->m_HdrMode == vvencHDRMode::VVENC_HDR_OFF ? 1 : 2);
643
2.59k
        GCC_WARNING_RESET
644
2.59k
        optChromaQPOffsets[comp - 1] = std::min (3 + lumaChromaMappingDQP, adaptChromaQPOffset + lumaChromaMappingDQP);
645
2.59k
      }
646
2.59k
    } // isLuma or isChroma
647
3.89k
  }
648
649
1.29k
  if (averageAdaptedLumaQP < 0) // only if YUV is 4:0:0!
650
0
  {
651
0
    averageAdaptedLumaQP = Clip3 (0, MAX_QP, sliceQP + apprI3Log2 (hpEner[0] * hpEnerPicNorm, isSccStrongRC));
652
653
    // change mean picture QP index based on the picture's average luma value (Sharp)
654
0
    if (isHDR)
655
0
    {
656
0
      if (meanLuma == MAX_UINT) meanLuma = pic->getOrigBuf().Y().getAvg();
657
658
0
      averageAdaptedLumaQP = Clip3 (0, MAX_QP, averageAdaptedLumaQP + lumaDQPOffset (meanLuma, bitDepth));
659
0
    }
660
0
  }
661
662
1.29k
  if (encCfg->m_RCNumPasses == 2 && (encCfg->m_RCTargetBitrate > 0) && (ctuRCQPMemory != nullptr) && slice->pps->useDQP && (encCfg->m_internalUsePerceptQPATempFiltISlice == 2) && slice->isIntra())
663
0
  {
664
0
    const int nCtu = int (ctuBoundingAddr - ctuStartAddr);
665
0
    const int offs = (slice->poc / encCfg->m_IntraPeriod) * ((nCtu + 1) >> 1);
666
0
    std::vector<uint8_t>& ctuQPMem = *ctuRCQPMemory; // unpack 1st-pass reduction QPs
667
668
0
    if ((ctuPumpRedQP.size() >= nCtu) && (ctuQPMem.size() >= offs + ((nCtu + 1) >> 1)))
669
0
    {
670
0
      for (uint32_t ctuRsAddr = ctuStartAddr; ctuRsAddr < ctuBoundingAddr; ctuRsAddr++)
671
0
      {
672
0
        ctuPumpRedQP[ctuRsAddr] = int ((ctuRsAddr & 1) ? ctuQPMem[offs + (ctuRsAddr >> 1)] >> 4 : ctuQPMem[offs + (ctuRsAddr >> 1)] & 15) - 8;
673
0
      }
674
0
    }
675
0
  }
676
677
1.29k
  if (useFrameWiseQPA || (averageAdaptedLumaQP >= MAX_QP)) // store the CTU-wise QP/lambda values
678
0
  {
679
0
    averageAdaptedLumaQP = std::min (MAX_QP, averageAdaptedLumaQP + 1);
680
0
    averageAdaptedLambda = sliceLambda * pow (2.0, double (averageAdaptedLumaQP - sliceQP) / 3.0);
681
682
0
    for (uint32_t ctuRsAddr = ctuStartAddr; ctuRsAddr < ctuBoundingAddr; ctuRsAddr++)
683
0
    {
684
0
      pic->ctuQpaLambda[ctuRsAddr] = averageAdaptedLambda; // save adapted lambda, QP
685
0
      pic->ctuAdaptedQP[ctuRsAddr] = averageAdaptedLumaQP;
686
0
    }
687
0
  }
688
1.29k
  else // use CTU-level QPA
689
1.29k
  {
690
1.29k
    const int nCtu = int (ctuBoundingAddr - ctuStartAddr);
691
1.29k
    const int dvsr = encCfg->m_IntraPeriod - encCfg->m_GOPSize;
692
1.29k
    const int aaQP = averageAdaptedLumaQP; // backup of initial average QP from above
693
1.29k
    const bool rcIsFirstPassOf2 = ((encCfg->m_RCTargetBitrate == 0) && (ctuRCQPMemory != nullptr) && slice->pps->useDQP && (slice->poc > 0) ? encCfg->m_RCNumPasses == 2 : false);
694
695
1.29k
    if (isEncPass) ctuAvgLuma.resize (nCtu);
696
697
1.29k
    averageAdaptedLumaQP = 0;
698
5.34k
    for (uint32_t ctuRsAddr = ctuStartAddr; ctuRsAddr < ctuBoundingAddr; ctuRsAddr++)
699
4.05k
    {
700
4.05k
      const double hpEnerCTU = pic->ctuQpaLambda[ctuRsAddr];
701
4.05k
      int adaptedLumaQP = Clip3 (0, MAX_QP, sliceQP + apprI3Log2 (hpEnerCTU * hpEnerPicNorm, isSccStrongRC));
702
703
4.05k
      if ((encCfg->m_internalUsePerceptQPATempFiltISlice == 2) && slice->isIntra() && (ctuPumpRedQP.size() > ctuRsAddr))
704
0
      {
705
0
        if (rcIsFirstPassOf2) // backup 1st-pass I-frame QP for 2nd rate control pass
706
0
        {
707
0
          if (ctuRsAddr & 1) ctuRCQPMemory->back() |= (Clip3 (-8, 7, ctuPumpRedQP[ctuRsAddr]) + 8) << 4;
708
0
          else /*even addr*/ ctuRCQPMemory->push_back (Clip3 (-8, 7, ctuPumpRedQP[ctuRsAddr]) + 8);
709
0
          if (adaptedLumaQP > 0)
710
0
          {
711
0
            adaptedLumaQP -= (aaQP >> 4); // some first-pass tuning for stabilization
712
0
          }
713
0
        }
714
0
        if (ctuPumpRedQP[ctuRsAddr] < 0) adaptedLumaQP = Clip3 (0, MAX_QP, adaptedLumaQP + (ctuPumpRedQP[ctuRsAddr] * encCfg->m_GOPSize - (dvsr >> 1)) / dvsr);
715
0
        else /*ctuPumpRedQP[addr] >= 0*/ adaptedLumaQP = Clip3 (0, MAX_QP, adaptedLumaQP + (ctuPumpRedQP[ctuRsAddr] * encCfg->m_GOPSize + (dvsr >> 1)) / dvsr);
716
717
0
        ctuPumpRedQP[ctuRsAddr] = 0; // reset QP memory for temporal pumping analysis
718
0
      }
719
4.05k
      if ((encCfg->m_internalUsePerceptQPATempFiltISlice == 2) && !slice->isIntra() && (slice->TLayer == 0) && rcIsFirstPassOf2 && (adaptedLumaQP < MAX_QP))
720
0
      {
721
0
        adaptedLumaQP++; // this is a first-pass tuning to stabilize the rate control
722
0
      }
723
4.05k
      meanLuma = MAX_UINT;
724
4.05k
      if (isChromaEnabled (pic->chromaFormat) && (adaptedLumaQP < MAX_QP))
725
4.05k
      {
726
4.05k
        adaptedLumaQP += getGlaringColorQPOffset (pic, (int) ctuRsAddr, slice->sps->bitDepths[CH_C], meanLuma);
727
728
4.05k
        if ((adaptedLumaQP > MAX_QP) && !isHDR) adaptedLumaQP = MAX_QP;
729
4.05k
      }
730
      // change the CTU-level QP index based on CTU area's average luma value (Sharp)
731
4.05k
      if (isHDR)
732
0
      {
733
0
        if (meanLuma == MAX_UINT) meanLuma = pic->ctuAdaptedQP[ctuRsAddr];
734
735
0
        adaptedLumaQP = Clip3 (0, MAX_QP, adaptedLumaQP + lumaDQPOffset (meanLuma, bitDepth));
736
0
      }
737
      // add further delta-QP of block importance mapping (BIM) detector if available
738
4.05k
      if (isBIM)
739
0
      {
740
0
        adaptedLumaQP = Clip3 (-slice->sps->qpBDOffset[CH_L], MAX_QP, adaptedLumaQP + pic->m_picShared->m_ctuBimQpOffset[ctuRsAddr]);
741
0
      }
742
      // reduce delta-QP variance, avoid wasting precious bit budget at low bit-rates
743
4.05k
      if ((encCfg->m_RCTargetBitrate == 0) && (3 + encCfg->m_QP > MAX_QP_PERCEPT_QPA) && (encCfg->m_framesToBeEncoded != 1))
744
697
      {
745
697
        const int retunedAdLumaQP = adaptedLumaQP + 1;
746
747
697
        adaptedLumaQP = (std::max (0, 1 + MAX_QP_PERCEPT_QPA - encCfg->m_QP) * adaptedLumaQP + std::min (4, 3 + encCfg->m_QP - MAX_QP_PERCEPT_QPA) * aaQP + 2) >> 2;
748
697
        if (adaptedLumaQP > retunedAdLumaQP) adaptedLumaQP = retunedAdLumaQP;
749
697
        if (adaptedLumaQP < MAX_QP && encCfg->m_QP == MAX_QP_PERCEPT_QPA && slice->TLayer > 1) adaptedLumaQP++; // a fine-tuning
750
697
      }
751
4.05k
      if (isEncPass) ctuAvgLuma[ctuRsAddr - ctuStartAddr] = pic->ctuAdaptedQP[ctuRsAddr];
752
753
4.05k
      averageAdaptedLambda = sliceLambda * pow (2.0, double (adaptedLumaQP - sliceQP) / 3.0);
754
4.05k
      averageAdaptedLumaQP += adaptedLumaQP;
755
756
4.05k
      pic->ctuQpaLambda[ctuRsAddr] = averageAdaptedLambda; // save adapted lambda, QP
757
4.05k
      pic->ctuAdaptedQP[ctuRsAddr] = adaptedLumaQP;
758
4.05k
    }
759
760
1.29k
    if (encCfg->m_cuQpDeltaSubdiv == 0 || !slice->isIntra()) averageAdaptedLumaQP += applyDeltaQpPeakSmoothing (pic, encCfg, ctuStartAddr, ctuBoundingAddr);
761
762
1.29k
    meanLuma = std::max (0, averageAdaptedLumaQP);
763
1.29k
    averageAdaptedLumaQP = (meanLuma + (nCtu >> 1)) / nCtu;
764
765
1.29k
    if ((encCfg->m_RCTargetBitrate > 0 && averageAdaptedLumaQP != sliceQP) || (isEncPass) || (encCfg->m_LookAhead > 0 && pic->isPreAnalysis) ) // QP/rate control
766
0
    {
767
0
      const int rcQpDiff = (encCfg->m_RCTargetBitrate > 0 || (encCfg->m_LookAhead > 0 && pic->isPreAnalysis) ? sliceQP - averageAdaptedLumaQP : 0);
768
769
0
      averageAdaptedLumaQP = refineDeltaQpDistribution (pic, encCfg, sliceQP, sliceLambda, rcQpDiff, bitDepth, ctuStartAddr, ctuBoundingAddr,
770
0
                                                        meanLuma, slice->TLayer, slice->isIntra(), isEncPass, minNoiseLevels, ctuAvgLuma);
771
772
0
      pic->picInitialQP = Clip3 (0, MAX_QP, pic->picInitialQP + rcQpDiff); // used in applyQPAdaptationSubCtu
773
0
      pic->isMeanQPLimited = (encCfg->m_RCTargetBitrate > 0) && isEncPass && (averageAdaptedLumaQP > sliceQP);
774
0
    }
775
1.29k
    else if (encCfg->m_rateCap && (pic->gopAdaptedQP > 0) && (averageAdaptedLumaQP < aaQP)) // capped CQF
776
0
    {
777
0
      const int rcQpDiff = aaQP - averageAdaptedLumaQP;
778
779
0
      averageAdaptedLambda = pow (2.0, double (rcQpDiff) / 3.0);
780
0
      for (uint32_t ctuRsAddr = ctuStartAddr; ctuRsAddr < ctuBoundingAddr; ctuRsAddr++)
781
0
      {
782
0
        pic->ctuQpaLambda[ctuRsAddr] *= averageAdaptedLambda; // scale adapted lambda
783
0
        pic->ctuAdaptedQP[ctuRsAddr] = std::min (MAX_QP, pic->ctuAdaptedQP[ctuRsAddr] + rcQpDiff);
784
0
      }
785
786
0
      pic->picInitialQP = Clip3 (0, MAX_QP, pic->picInitialQP + rcQpDiff); // used in applyQPAdaptationSubCtu
787
0
      averageAdaptedLumaQP = aaQP;
788
0
    }
789
1.29k
    else if ((encCfg->m_RCTargetBitrate == 0) && (3 + encCfg->m_QP > MAX_QP_PERCEPT_QPA) && (encCfg->m_framesToBeEncoded != 1) && (averageAdaptedLumaQP + 1 < aaQP))
790
0
    {
791
0
      const int lrQpDiff = (aaQP - averageAdaptedLumaQP) >> (encCfg->m_QP <= MAX_QP_PERCEPT_QPA ? 2 : 1); // for monotonous rate change at low rates
792
793
0
      averageAdaptedLambda = pow (2.0, double (lrQpDiff) / 3.0);
794
0
      for (uint32_t ctuRsAddr = ctuStartAddr; ctuRsAddr < ctuBoundingAddr; ctuRsAddr++)
795
0
      {
796
0
        pic->ctuQpaLambda[ctuRsAddr] *= averageAdaptedLambda; // scale adapted lambda
797
0
        pic->ctuAdaptedQP[ctuRsAddr] = std::min (MAX_QP, pic->ctuAdaptedQP[ctuRsAddr] + lrQpDiff);
798
0
      }
799
800
0
      pic->picInitialQP = Clip3 (0, MAX_QP, pic->picInitialQP + lrQpDiff); // used in applyQPAdaptationSubCtu
801
0
      averageAdaptedLumaQP = aaQP; // TODO hlm: += lrQpDiff?
802
803
0
      pic->isMeanQPLimited = false;
804
0
    }
805
806
1.29k
    if (isEncPass) ctuAvgLuma.clear();
807
1.29k
  } // CTU-/frame-level QPA
808
809
1.29k
  return averageAdaptedLumaQP;
810
1.29k
}
811
812
int BitAllocation::applyQPAdaptationSubCtu (const Slice* slice, const VVEncCfg* encCfg, const Area& lumaArea, const uint8_t* minNoiseLevels)
813
3.70k
{
814
3.70k
  Picture* const pic          = (slice != nullptr ? slice->pic : nullptr);
815
3.70k
  uint32_t meanLuma           = MAX_UINT;
816
817
3.70k
  if (pic == nullptr || encCfg == nullptr)
818
0
  {
819
0
    return -1;
820
0
  }
821
822
3.70k
  const bool isEncPass        = (encCfg->m_LookAhead > 0 && !pic->isPreAnalysis);
823
3.70k
  const bool isHDR            = (encCfg->m_HdrMode != vvencHDRMode::VVENC_HDR_OFF) && !(encCfg->m_lumaReshapeEnable != 0 && encCfg->m_reshapeSignalType == RESHAPE_SIGNAL_PQ);
824
3.70k
  const bool isBIM            = (encCfg->m_blockImportanceMapping && !pic->m_picShared->m_ctuBimQpOffset.empty());
825
3.70k
  const bool isSccStrongRC    = ((encCfg->m_LookAhead > 0 || encCfg->m_RCNumPasses == 2) && pic->isSccStrong);
826
3.70k
  const bool isHighResolution = (std::min (encCfg->m_SourceWidth, encCfg->m_SourceHeight) > 1280);
827
3.70k
  const uint32_t hpFrameRate  = (pic->force2ndOrder ? 32 : encCfg->m_FrameRate / encCfg->m_FrameScale);
828
3.70k
  const int  bitDepth         = slice->sps->bitDepths[CH_L];
829
3.70k
  const PosType     guardSize = (isHighResolution ? 2 : 1);
830
3.70k
  const Position    pos       = lumaArea.pos();
831
3.70k
  const CompArea    subArea   = clipArea (CompArea (COMP_Y, pic->chromaFormat, Area (pos.x, pos.y, lumaArea.width, lumaArea.height)), pic->Y());
832
3.70k
  const SizeType    fltWidth  = lumaArea.width  + guardSize * (pos.x > 0 ? 2 : 1);
833
3.70k
  const SizeType    fltHeight = lumaArea.height + guardSize * (pos.y > 0 ? 2 : 1);
834
3.70k
  const CompArea    fltArea   = clipArea (CompArea (COMP_Y, pic->chromaFormat, Area (pos.x > 0 ? pos.x - guardSize : 0, pos.y > 0 ? pos.y - guardSize : 0, fltWidth, fltHeight)), pic->Y());
835
3.70k
  const CPelBuf     picOrig   = pic->getOrigBuf (fltArea);
836
3.70k
  const CPelBuf     picPrv1   = pic->getOrigBufPrev (fltArea, PREV_FRAME_1);
837
3.70k
  const CPelBuf     picPrv2   = pic->getOrigBufPrev (fltArea, PREV_FRAME_2);
838
3.70k
  const double hpEnerSubCTU   = filterAndCalculateAverageActivity (picOrig.buf, picOrig.stride, picOrig.height, picOrig.width,
839
3.70k
                                                                   picPrv1.buf, picPrv1.stride, picPrv2.buf, picPrv2.stride, hpFrameRate,
840
3.70k
                                                                   bitDepth, isHighResolution);
841
3.70k
  const double hpEnerPicNorm  = 1.0 / getAveragePictureActivity (encCfg->m_SourceWidth, encCfg->m_SourceHeight, 0,
842
3.70k
                                                                 (encCfg->m_internalUsePerceptQPATempFiltISlice || !slice->isIntra()), bitDepth);
843
3.70k
  int adaptedSubCtuQP = Clip3 (0, MAX_QP, pic->picInitialQP + apprI3Log2 (hpEnerSubCTU * hpEnerPicNorm, isSccStrongRC));
844
845
3.70k
  if (isChromaEnabled (pic->chromaFormat) && (adaptedSubCtuQP < MAX_QP))
846
3.70k
  {
847
3.70k
    adaptedSubCtuQP += getGlaringColorQPOffsetSubCtu (pic, subArea, slice->sps->bitDepths[CH_C], meanLuma);
848
849
3.70k
    if ((adaptedSubCtuQP > MAX_QP) && !isHDR) adaptedSubCtuQP = MAX_QP;
850
3.70k
  }
851
  // change the sub-CTU-level QP index based on sub-area's average luma value (Sharp)
852
3.70k
  if (isHDR)
853
0
  {
854
0
    if (meanLuma == MAX_UINT) meanLuma = pic->getOrigBuf (subArea).getAvg();
855
856
0
    adaptedSubCtuQP = Clip3 (0, MAX_QP, adaptedSubCtuQP + lumaDQPOffset (meanLuma, bitDepth));
857
0
  }
858
  // add additional delta-QP of block importance mapping (BIM) detection if available
859
3.70k
  if (isBIM)
860
0
  {
861
0
    adaptedSubCtuQP = Clip3 (-slice->sps->qpBDOffset[CH_L], MAX_QP, adaptedSubCtuQP + pic->m_picShared->m_ctuBimQpOffset[getCtuAddr (pos, *pic->cs->pcv)]);
862
0
  }
863
  // reduce the delta-QP variance, avoid wasting precious bit budget at low bit-rates
864
3.70k
  if ((encCfg->m_RCTargetBitrate == 0) && (3 + encCfg->m_QP > MAX_QP_PERCEPT_QPA) && (slice->sliceQp >= 0) && (encCfg->m_framesToBeEncoded != 1))
865
0
  {
866
0
    const int retunedAdLumaQP = adaptedSubCtuQP + 1;
867
868
0
    adaptedSubCtuQP = (std::max (0, 1 + MAX_QP_PERCEPT_QPA - encCfg->m_QP) * adaptedSubCtuQP + std::min (4, 3 + encCfg->m_QP - MAX_QP_PERCEPT_QPA) * slice->sliceQp + 2) >> 2;
869
0
    if (adaptedSubCtuQP > retunedAdLumaQP) adaptedSubCtuQP = retunedAdLumaQP;
870
0
    if (adaptedSubCtuQP < MAX_QP && encCfg->m_QP >= MAX_QP_PERCEPT_QPA) adaptedSubCtuQP++; // for monotonous rate change, l. 507
871
0
  }
872
3.70k
  if (isEncPass)
873
0
  {
874
0
    const double resRatio = sqrt (double (encCfg->m_SourceWidth * encCfg->m_SourceHeight) / (3840.0 * 2160.0));
875
876
0
    if (meanLuma == MAX_UINT) meanLuma = pic->getOrigBuf (subArea).getAvg();
877
0
    clipQPValToEstimatedMinimStats (minNoiseLevels, bitDepth, meanLuma, resRatio, (slice->isIntra() ? encCfg->m_intraQPOffset >> 1 : std::min (4, (int) slice->TLayer)), adaptedSubCtuQP);
878
0
  }
879
880
3.70k
  return adaptedSubCtuQP;
881
3.70k
}
882
883
int BitAllocation::getCtuPumpingReducingQP (const Slice* slice, const CPelBuf& origY, const Distortion uiSadBestForQPA,
884
                                            std::vector<int>& ctuPumpRedQP, const uint32_t ctuRsAddr, const int baseQP,
885
                                            const bool isBIM)
886
0
{
887
0
  if (slice == nullptr || !slice->pps->useDQP || ctuPumpRedQP.size() <= ctuRsAddr) return 0;
888
889
0
  const int32_t avgOrig = origY.getAvg();
890
0
  uint32_t sumAbsZmOrig = 0; // zero-mean
891
0
  const Pel* src = origY.buf;
892
893
0
  for (SizeType y = 0; y < origY.height; y++) // sum up the zero-mean absolute values
894
0
  {
895
0
    for (SizeType x = 0; x < origY.width; x++)
896
0
    {
897
0
      sumAbsZmOrig += (uint32_t) abs (src[x] - avgOrig);
898
0
    }
899
0
    src += origY.stride;
900
0
  }
901
902
0
  const double sumAbsRatio = double (uiSadBestForQPA * 3 /*TODO: or 4? fine-tune!*/) / double (sumAbsZmOrig == 0 ? 1 : sumAbsZmOrig);
903
0
  const int pumpingReducQP = ((isBIM ? -1 : 0) + int (log (Clip3 (0.25, 4.0, sumAbsRatio)) / log (2.0) + (sumAbsRatio < 1.0 ? -0.5 : 0.5))) >> (baseQP >= 38/*MAX_QP_PERCEPT_QPA*/ ? 1 : 0);
904
905
0
  ctuPumpRedQP[ctuRsAddr] += pumpingReducQP;
906
907
0
  return pumpingReducQP;
908
0
}
909
910
} // namespace vvenc
911
912
//! \}