Coverage Report

Created: 2026-04-01 07:49

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/vvenc/source/Lib/CommonLib/MCTF.cpp
Line
Count
Source
1
/* -----------------------------------------------------------------------------
2
The copyright in this software is being made available under the Clear BSD
3
License, included below. No patent rights, trademark rights and/or 
4
other Intellectual Property Rights other than the copyrights concerning 
5
the Software are granted under this license.
6
7
The Clear BSD License
8
9
Copyright (c) 2019-2026, Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. & The VVenC Authors.
10
All rights reserved.
11
12
Redistribution and use in source and binary forms, with or without modification,
13
are permitted (subject to the limitations in the disclaimer below) provided that
14
the following conditions are met:
15
16
     * Redistributions of source code must retain the above copyright notice,
17
     this list of conditions and the following disclaimer.
18
19
     * Redistributions in binary form must reproduce the above copyright
20
     notice, this list of conditions and the following disclaimer in the
21
     documentation and/or other materials provided with the distribution.
22
23
     * Neither the name of the copyright holder nor the names of its
24
     contributors may be used to endorse or promote products derived from this
25
     software without specific prior written permission.
26
27
NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
28
THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
29
CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
31
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
32
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
33
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
34
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
35
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
36
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38
POSSIBILITY OF SUCH DAMAGE.
39
40
41
------------------------------------------------------------------------------------------- */
42
43
44
/** \file     MCTF.cpp
45
\brief    MCTF class
46
*/
47
48
#include "MCTF.h"
49
#include <math.h>
50
#include "CommonLib/Picture.h"
51
#include "CommonLib/dtrace_buffer.h"
52
#include "Utilities/NoMallocThreadPool.h"
53
54
namespace vvenc {
55
56
#ifdef TRACE_ENABLE_ITT
57
static __itt_string_handle* itt_handle_est = __itt_string_handle_create( "MCTF_est" );
58
static __itt_domain* itt_domain_MCTF_est   = __itt_domain_create( "MCTFEst" );
59
static __itt_string_handle* itt_handle_flt = __itt_string_handle_create( "MCTF_flt" );
60
static __itt_domain* itt_domain_MCTF_flt   = __itt_domain_create( "MCTFFlt" );
61
#endif
62
63
// ====================================================================================================================
64
// Constructor / destructor / initialization / destroy
65
// ====================================================================================================================
66
67
const double MCTF::m_chromaFactor     =  0.55;
68
const double MCTF::m_sigmaMultiplier  =  9.0;
69
const int MCTF::m_range               = VVENC_MCTF_RANGE-2;
70
const int MCTF::m_motionVectorFactor  = 16;
71
const int MCTF::m_padding             = MCTF_PADDING;
72
const int16_t MCTF::m_interpolationFilter8[16][8] =
73
{
74
  {   0,   0,   0,  64,   0,   0,   0,   0 },   //0
75
  {   0,   1,  -3,  64,   4,  -2,   0,   0 },   //1 -->-->
76
  {   0,   1,  -6,  62,   9,  -3,   1,   0 },   //2 -->
77
  {   0,   2,  -8,  60,  14,  -5,   1,   0 },   //3 -->-->
78
  {   0,   2,  -9,  57,  19,  -7,   2,   0 },   //4
79
  {   0,   3, -10,  53,  24,  -8,   2,   0 },   //5 -->-->
80
  {   0,   3, -11,  50,  29,  -9,   2,   0 },   //6 -->
81
  {   0,   3, -11,  44,  35, -10,   3,   0 },   //7 -->-->
82
  {   0,   1,  -7,  38,  38,  -7,   1,   0 },   //8
83
  {   0,   3, -10,  35,  44, -11,   3,   0 },   //9 -->-->
84
  {   0,   2,  -9,  29,  50, -11,   3,   0 },   //10-->
85
  {   0,   2,  -8,  24,  53, -10,   3,   0 },   //11-->-->
86
  {   0,   2,  -7,  19,  57,  -9,   2,   0 },   //12
87
  {   0,   1,  -5,  14,  60,  -8,   2,   0 },   //13-->-->
88
  {   0,   1,  -3,   9,  62,  -6,   1,   0 },   //14-->
89
  {   0,   0,  -2,   4,  64,  -3,   1,   0 }    //15-->-->
90
};
91
92
const int16_t MCTF::m_interpolationFilter4[16][4] =
93
{
94
  {  0, 64,  0,  0 },    //0
95
  { -2, 62,  4,  0 },    //1 -->-->
96
  { -2, 58, 10, -2 },    //2 -->
97
  { -4, 56, 14, -2 },    //3 -->-->
98
  { -4, 54, 16, -2 },    //4
99
  { -6, 52, 20, -2 },    //5 -->-->
100
  { -6, 46, 28, -4 },    //6 -->
101
  { -4, 42, 30, -4 },    //7 -->-->
102
  { -4, 36, 36, -4 },    //8
103
  { -4, 30, 42, -4 },    //9 -->-->
104
  { -4, 28, 46, -6 },    //10-->
105
  { -2, 20, 52, -6 },    //11-->-->
106
  { -2, 16, 54, -4 },    //12
107
  { -2, 14, 56, -4 },    //13-->-->
108
  { -2, 10, 58, -2 },    //14-->
109
  {  0,  4, 62, -2 },    //15-->-->
110
};
111
112
const double MCTF::m_refStrengths[2][6] = // min(..., {3 or 5} / (1 + 2 * |POC offset|))
113
{ // abs(POC offset)
114
  // 1       2       3       4       5       6
115
  { 0.84375, 0.6, 0.4286, 0.3333, 0.2727, 0.2308 }, // RA
116
  { 1.12500, 1.0, 0.7143, 0.5556, 0.4545, 0.3846 }  // LD
117
};
118
119
const int    MCTF::m_cuTreeThresh[4] = { 75, 60,     30, 15 };
120
const double MCTF::m_cuTreeCenter    =           45;
121
122
int motionErrorLumaInt( const Pel* org, const ptrdiff_t origStride, const Pel* buf, const ptrdiff_t buffStride, const int w, const int h, const int besterror )
123
0
{
124
0
  int error = 0;
125
126
0
  for( int y1 = 0; y1 < h; y1++ )
127
0
  {
128
0
    const Pel* origRowStart   = org + y1 * origStride;
129
0
    const Pel* bufferRowStart = buf + y1 * buffStride;
130
131
0
    for( int x1 = 0; x1 < w; x1 += 2 )
132
0
    {
133
0
      int diff = origRowStart[x1] - bufferRowStart[x1];
134
0
      error += diff * diff;
135
0
      diff = origRowStart[x1 + 1] - bufferRowStart[x1 + 1];
136
0
      error += diff * diff;
137
0
    }
138
0
    if( error > besterror )
139
0
    {
140
0
      return error;
141
0
    }
142
0
  }
143
144
0
  return error;
145
0
}
146
147
int motionErrorLumaFrac6( const Pel *org, const ptrdiff_t origStride, const Pel *buf, const ptrdiff_t buffStride, const int w, const int h, const int16_t *xFilter, const int16_t *yFilter, const int bitDepth, const int besterror )
148
0
{
149
0
  int error = 0;
150
0
  Pel tempArray[64 + 8][64];
151
0
  int sum, base;
152
0
  const Pel maxSampleValue = ( 1 << bitDepth ) - 1;
153
154
0
  for( int y1 = 1; y1 < h + 7; y1++ )
155
0
  {
156
0
    const int yOffset = y1 - 3;
157
0
    const Pel *sourceRow = buf + yOffset * buffStride;
158
0
    for( int x1 = 0; x1 < w; x1++ )
159
0
    {
160
0
      sum = 0;
161
0
      base = x1 - 3;
162
0
      const Pel *rowStart = sourceRow + base;
163
164
0
      sum += xFilter[1] * rowStart[1];
165
0
      sum += xFilter[2] * rowStart[2];
166
0
      sum += xFilter[3] * rowStart[3];
167
0
      sum += xFilter[4] * rowStart[4];
168
0
      sum += xFilter[5] * rowStart[5];
169
0
      sum += xFilter[6] * rowStart[6];
170
171
0
      sum = ( sum + ( 1 << 5 ) ) >> 6;
172
0
      sum = sum < 0 ? 0 : ( sum > maxSampleValue ? maxSampleValue : sum );
173
174
0
      tempArray[y1][x1] = sum;
175
0
    }
176
0
  }
177
178
0
  for( int y1 = 0; y1 < h; y1++ )
179
0
  {
180
0
    const Pel *origRow = org + y1 * origStride;
181
0
    for( int x1 = 0; x1 < w; x1++ )
182
0
    {
183
0
      sum = 0;
184
0
      sum += yFilter[1] * tempArray[y1 + 1][x1];
185
0
      sum += yFilter[2] * tempArray[y1 + 2][x1];
186
0
      sum += yFilter[3] * tempArray[y1 + 3][x1];
187
0
      sum += yFilter[4] * tempArray[y1 + 4][x1];
188
0
      sum += yFilter[5] * tempArray[y1 + 5][x1];
189
0
      sum += yFilter[6] * tempArray[y1 + 6][x1];
190
191
0
      sum = ( sum + ( 1 << 5 ) ) >> 6;
192
0
      sum = sum < 0 ? 0 : ( sum > maxSampleValue ? maxSampleValue : sum );
193
194
0
      error += ( sum - origRow[x1] ) * ( sum - origRow[x1] );
195
0
    }
196
0
    if( error > besterror )
197
0
    {
198
0
      return error;
199
0
    }
200
0
  }
201
202
0
  return error;
203
0
}
204
205
int motionErrorLumaFrac4( const Pel* org, const ptrdiff_t origStride, const Pel* buf, const ptrdiff_t buffStride, const int w, const int h, const int16_t* xFilter, const int16_t* yFilter, const int bitDepth, const int besterror )
206
0
{
207
0
  int error = 0;
208
0
  Pel tempArray[64 + 4][64];
209
0
  int sum, base;
210
0
  const Pel maxSampleValue = ( 1 << bitDepth ) - 1;
211
212
0
  for( int y1 = 0; y1 < h + 3; y1++ )
213
0
  {
214
0
    const int yOffset = y1 - 1;
215
0
    const Pel* sourceRow = buf + yOffset * buffStride;
216
0
    for( int x1 = 0; x1 < w; x1++ )
217
0
    {
218
0
      sum = 0;
219
0
      base = x1 - 1;
220
0
      const Pel* rowStart = sourceRow + base;
221
222
0
      sum += xFilter[0] * rowStart[0];
223
0
      sum += xFilter[1] * rowStart[1];
224
0
      sum += xFilter[2] * rowStart[2];
225
0
      sum += xFilter[3] * rowStart[3];
226
227
0
      sum = ( sum + ( 1 << 5 ) ) >> 6;
228
0
      sum = sum < 0 ? 0 : ( sum > maxSampleValue ? maxSampleValue : sum );
229
230
0
      tempArray[y1][x1] = sum;
231
0
    }
232
0
  }
233
234
0
  for( int y1 = 0; y1 < h; y1++ )
235
0
  {
236
0
    const Pel* origRow = org + y1 * origStride;
237
0
    for( int x1 = 0; x1 < w; x1++ )
238
0
    {
239
0
      sum = 0;
240
0
      sum += yFilter[0] * tempArray[y1 + 0][x1];
241
0
      sum += yFilter[1] * tempArray[y1 + 1][x1];
242
0
      sum += yFilter[2] * tempArray[y1 + 2][x1];
243
0
      sum += yFilter[3] * tempArray[y1 + 3][x1];
244
245
0
      sum = ( sum + ( 1 << 5 ) ) >> 6;
246
0
      sum = sum < 0 ? 0 : ( sum > maxSampleValue ? maxSampleValue : sum );
247
248
0
      error += ( sum - origRow[x1] ) * ( sum - origRow[x1] );
249
0
    }
250
0
    if( error > besterror )
251
0
    {
252
0
      return error;
253
0
    }
254
0
  }
255
256
0
  return error;
257
0
}
258
259
void applyFrac8Core_6Tap( const Pel* org, const ptrdiff_t origStride, Pel* dst, const ptrdiff_t dstStride, const int w, const int h, const int16_t* xFilter, const int16_t* yFilter, const int bitDepth )
260
0
{
261
0
  const int numFilterTaps   = 7;
262
0
  const int centreTapOffset = 3;
263
0
  const int maxValue        = ( 1 << bitDepth ) - 1;
264
265
0
  Pel tempArray[64 + numFilterTaps][64];
266
267
0
  for( int by = 1; by < h + numFilterTaps - 1; by++ )
268
0
  {
269
0
    const int yOffset = by - centreTapOffset;
270
0
    const Pel *sourceRow = org + yOffset * origStride;
271
0
    for( int bx = 0; bx < w; bx++ )
272
0
    {
273
0
      int base = bx - centreTapOffset;
274
0
      const Pel *rowStart = sourceRow + base;
275
276
0
      int sum = 0;
277
0
      sum += xFilter[1] * rowStart[1];
278
0
      sum += xFilter[2] * rowStart[2];
279
0
      sum += xFilter[3] * rowStart[3];
280
0
      sum += xFilter[4] * rowStart[4];
281
0
      sum += xFilter[5] * rowStart[5];
282
0
      sum += xFilter[6] * rowStart[6];
283
284
0
      sum = ( sum + ( 1 << 5 ) ) >> 6;
285
0
      tempArray[by][bx] = sum;
286
0
    }
287
0
  }
288
289
0
  Pel *dstRow = dst;
290
0
  for( int by = 0; by < h; by++, dstRow += dstStride )
291
0
  {
292
0
    Pel *dstPel = dstRow;
293
0
    for( int bx = 0; bx < w; bx++, dstPel++ )
294
0
    {
295
0
      int sum = 0;
296
297
0
      sum += yFilter[1] * tempArray[by + 1][bx];
298
0
      sum += yFilter[2] * tempArray[by + 2][bx];
299
0
      sum += yFilter[3] * tempArray[by + 3][bx];
300
0
      sum += yFilter[4] * tempArray[by + 4][bx];
301
0
      sum += yFilter[5] * tempArray[by + 5][bx];
302
0
      sum += yFilter[6] * tempArray[by + 6][bx];
303
304
0
      sum = ( sum + ( 1 << 5 ) ) >> 6;
305
0
      sum = sum < 0 ? 0 : ( sum > maxValue ? maxValue : sum );
306
0
      *dstPel = sum;
307
0
    }
308
0
  }
309
0
}
310
311
void applyFrac8Core_4Tap( const Pel* org, const ptrdiff_t origStride, Pel* dst, const ptrdiff_t dstStride, const int w, const int h, const int16_t* xFilter, const int16_t* yFilter, const int bitDepth )
312
0
{
313
0
  const int numFilterTaps   = 3;
314
0
  const int centreTapOffset = 1;
315
0
  const int maxValue        = ( 1 << bitDepth ) - 1;
316
317
0
  Pel tempArray[64 + numFilterTaps][64];
318
319
0
  for( int by = 0; by < h + numFilterTaps; by++ )
320
0
  {
321
0
    const int yOffset    = by - centreTapOffset;
322
0
    const Pel* sourceRow = org + yOffset * origStride;
323
324
0
    for( int bx = 0; bx < w; bx++ )
325
0
    {
326
0
      int base = bx - centreTapOffset;
327
0
      const Pel* rowStart = sourceRow + base;
328
329
0
      int sum = 0;
330
0
      sum += xFilter[0] * rowStart[0];
331
0
      sum += xFilter[1] * rowStart[1];
332
0
      sum += xFilter[2] * rowStart[2];
333
0
      sum += xFilter[3] * rowStart[3];
334
335
0
      sum = ( sum + ( 1 << 5 ) ) >> 6;
336
0
      tempArray[by][bx] = sum;
337
0
    }
338
0
  }
339
340
0
  Pel* dstRow = dst;
341
0
  for( int by = 0; by < h; by++, dstRow += dstStride )
342
0
  {
343
0
    Pel* dstPel = dstRow;
344
0
    for( int bx = 0; bx < w; bx++, dstPel++ )
345
0
    {
346
0
      int sum = 0;
347
0
      sum += yFilter[0] * tempArray[by + 0][bx];
348
0
      sum += yFilter[1] * tempArray[by + 1][bx];
349
0
      sum += yFilter[2] * tempArray[by + 2][bx];
350
0
      sum += yFilter[3] * tempArray[by + 3][bx];
351
352
0
      sum = ( sum + ( 1 << 5 ) ) >> 6;
353
0
      sum = sum < 0 ? 0 : ( sum > maxValue ? maxValue : sum );
354
0
      *dstPel = sum;
355
0
    }
356
0
  }
357
0
}
358
359
inline static float fastExp( float n, float d )
360
0
{
361
  // using the e^x ~= ( 1 + x/n )^n for n -> inf
362
0
  float x = 1.0f + n / ( d * 1024 );
363
0
  x *= x; x *= x; x *= x; x *= x;
364
0
  x *= x; x *= x; x *= x; x *= x;
365
0
  x *= x; x *= x;
366
0
  return x;
367
0
}
368
369
static const int32_t xSzm[6] = {0, 1, 20, 336, 5440, 87296};
370
371
// works for bit depths up to incl. 12 and power-of-2 block dimensions in both directions
372
void applyPlanarCorrectionCore( const Pel* refPel, const ptrdiff_t refStride, Pel* dstPel, const ptrdiff_t dstStride, const int32_t w, const int32_t h, const ClpRng& clpRng, const uint16_t motionError )
373
0
{
374
0
  const int32_t blockSize = w * h;
375
0
  const int32_t log2Width = floorLog2 (w);
376
0
  const int32_t maxPelVal = clpRng.max();
377
0
  const int32_t mWeight   = std::min (512u, (uint32_t) motionError * (uint32_t) motionError);
378
0
  const int32_t xSum      = (blockSize * (w - 1)) >> 1;
379
0
  int32_t x1yzm = 0,  x2yzm = 0,  ySum = 0;
380
0
  int32_t b0, b1, b2;
381
0
  int64_t numer, denom;
382
383
0
  for (int32_t y = 0; y < h; y++) // sum up dot-products between indices and sample diffs
384
0
  {
385
0
    for (int32_t x = 0; x < w; x++)
386
0
    {
387
0
      const Pel* pDst = dstPel + y * dstStride + x;
388
0
      const Pel* pRef = refPel + y * refStride + x;
389
0
      const int32_t z = *pDst - *pRef;
390
391
0
      x1yzm += x * z;  x2yzm += y * z;  ySum += z;
392
0
    }
393
0
  }
394
395
0
  denom = blockSize * xSzm[log2Width]; // plane-fit parameters, in fixed-point arithmetic
396
0
  numer = (int64_t) mWeight * ((int64_t) x1yzm * blockSize - xSum * ySum);
397
0
  b1 = int32_t ((numer < 0 ? numer - (denom >> 1) : numer + (denom >> 1)) / denom);
398
0
  b1 = (b1 < INT16_MIN ? INT16_MIN : (b1 > INT16_MAX ? INT16_MAX : b1));
399
0
  numer = (int64_t) mWeight * ((int64_t) x2yzm * blockSize - xSum * ySum);
400
0
  b2 = int32_t ((numer < 0 ? numer - (denom >> 1) : numer + (denom >> 1)) / denom);
401
0
  b2 = (b2 > INT16_MAX ? INT16_MAX : (b2 < INT16_MIN ? INT16_MIN : b2));
402
0
  b0 = (mWeight * ySum - (b1 + b2) * xSum + (blockSize >> 1)) >> (log2Width << 1);
403
404
0
  if (b0 == 0 && b1 == 0 && b2 == 0) return;
405
406
0
  for (int32_t y = 0; y < h; y++) // perform deblocking by adding fitted correction plane
407
0
  {
408
0
    for (int32_t x = 0; x < w; x++)
409
0
    {
410
0
      Pel* const pDst = dstPel + y * dstStride + x;
411
0
      const int32_t p = (b0 + b1 * x + b2 * y + 256) >> 9; // fixed-point plane corrector
412
0
      const int32_t z = *pDst - p;
413
414
0
      *pDst = Pel (z < 0 ? 0 : (z > maxPelVal ? maxPelVal : z));
415
0
    }
416
0
  }
417
0
}
418
419
void applyBlockCore( const CPelBuf& src, PelBuf& dst, const CompArea& blk, const ClpRng& clpRng, const Pel** correctedPics, int numRefs, const int* verror, const double* refStrenghts, double weightScaling, double sigmaSq )
420
0
{
421
0
  const int         w = blk.width;
422
0
  const int         h = blk.height;
423
0
  const int        bx = blk.x;
424
0
  const int        by = blk.y;
425
426
0
  const ptrdiff_t srcStride = src.stride;
427
0
  const ptrdiff_t dstStride = dst.stride;
428
429
0
  const Pel *srcPel = src.bufAt( bx, by );
430
0
        Pel *dstPel = dst.bufAt( bx, by );
431
432
0
  const Pel maxSampleValue = clpRng.max();
433
434
0
  int vnoise[2 * VVENC_MCTF_RANGE] = { 0, };
435
0
  float vsw [2 * VVENC_MCTF_RANGE] = { 0.0f, };
436
0
  float vww [2 * VVENC_MCTF_RANGE] = { 0.0f, };
437
438
0
  int minError = INT32_MAX;
439
440
0
  for( int i = 0; i < numRefs; i++ )
441
0
  {
442
0
    int64_t variance = 0, diffsum = 0;
443
0
    const ptrdiff_t refStride = w;
444
0
    const Pel *     refPel    = correctedPics[i];
445
0
    for( int y1 = 0; y1 < h; y1++ )
446
0
    {
447
0
      for( int x1 = 0; x1 < w; x1++ )
448
0
      {
449
0
        const Pel pix = *( srcPel + srcStride * y1 + x1 );
450
0
        const Pel ref = *( refPel + refStride * y1 + x1 );
451
452
0
        const int diff = pix - ref;
453
0
        variance += diff * diff;
454
0
        if( x1 != w - 1 )
455
0
        {
456
0
          const Pel pixR = *( srcPel + srcStride * y1 + x1 + 1 );
457
0
          const Pel refR = *( refPel + refStride * y1 + x1 + 1 );
458
0
          const int diffR = pixR - refR;
459
0
          diffsum += ( diffR - diff ) * ( diffR - diff );
460
0
        }
461
0
        if( y1 != h - 1 )
462
0
        {
463
0
          const Pel pixD = *( srcPel + srcStride * y1 + x1 + srcStride );
464
0
          const Pel refD = *( refPel + refStride * y1 + x1 + refStride );
465
0
          const int diffD = pixD - refD;
466
0
          diffsum += ( diffD - diff ) * ( diffD - diff );
467
0
        }
468
0
      }
469
0
    }
470
0
    variance *= (int64_t) 1 << (2*(10-clpRng.bd));
471
0
    diffsum  *= (int64_t) 1 << (2*(10-clpRng.bd));
472
0
    const int cntV = w * h;
473
0
    const int cntD = 2 * cntV - w - h;
474
0
    vnoise[i] = ( int ) round( ( 15.0 * cntD / cntV * variance + 5.0 ) / ( diffsum + 5.0 ) );
475
0
    minError = std::min( minError, verror[i] );
476
0
  }
477
478
0
  for( int i = 0; i < numRefs; i++ )
479
0
  {
480
0
    const int error = verror[i];
481
0
    const int noise = vnoise[i];
482
0
    float ww = 1, sw = 1;
483
0
    ww *= ( noise < 25 ) ? 1.0 : 0.6;
484
0
    sw *= ( noise < 25 ) ? 1.0 : 0.8;
485
0
    ww *= ( error < 50 ) ? 1.2 : ( ( error > 100 ) ? 0.6 : 1.0 );
486
0
    sw *= ( error < 50 ) ? 1.0 : 0.8;
487
0
    ww *= ( ( minError + 1.0 ) / ( error + 1.0 ) );
488
489
0
    vww[i] = ww * weightScaling * refStrenghts[i];
490
0
    vsw[i] = sw * 2 * sigmaSq;
491
0
  }
492
493
0
  for( int y = 0; y < h; y++ )
494
0
  {
495
0
    for( int x = 0; x < w; x++ )
496
0
    {
497
0
      const Pel orgVal  = *( srcPel + srcStride * y + x );
498
0
      float temporalWeightSum = 1.0;
499
0
      float newVal = ( float ) orgVal;
500
501
0
      for( int i = 0; i < numRefs; i++ )
502
0
      {
503
0
        const Pel* pCorrectedPelPtr = correctedPics[i] + y * w + x;
504
0
        const int    refVal = *pCorrectedPelPtr;
505
0
        const int    diff   = refVal - orgVal;
506
0
        const float  diffSq = diff * diff;
507
508
0
        float weight = vww[i] * fastExp( -diffSq, vsw[i] );
509
0
        newVal += weight * refVal;
510
0
        temporalWeightSum += weight;
511
0
      }
512
0
      newVal /= temporalWeightSum;
513
0
      Pel sampleVal = ( Pel ) ( newVal + 0.5 );
514
0
      sampleVal = ( sampleVal < 0 ? 0 : ( sampleVal > maxSampleValue ? maxSampleValue : sampleVal ) );
515
0
      *( dstPel + dstStride * y + x ) = sampleVal;
516
0
    }
517
0
  }
518
0
}
519
520
double calcVarCore( const Pel* org, const ptrdiff_t origStride, const int w, const int h )
521
0
{
522
  // calculate average
523
0
  int avg = 0;
524
0
  for( int y1 = 0; y1 < h; y1++ )
525
0
  {
526
0
    for( int x1 = 0; x1 < w; x1++ )
527
0
    {
528
0
      avg = avg + *( org + x1 + y1 * origStride );
529
0
    }
530
0
  }
531
0
  avg <<= 4;
532
0
  avg = avg / ( w * h );
533
534
  // calculate variance
535
0
  int64_t variance = 0;
536
0
  for( int y1 = 0; y1 < h; y1++ )
537
0
  {
538
0
    for( int x1 = 0; x1 < w; x1++ )
539
0
    {
540
0
      int pix = *( org + x1 + y1 * origStride ) << 4;
541
0
      variance = variance + ( pix - avg ) * ( pix - avg );
542
0
    }
543
0
  }
544
545
0
  return variance / 256.0;
546
0
}
547
548
MCTF::MCTF( bool enableOpt )
549
0
  : m_encCfg     ( nullptr )
550
0
  , m_threadPool ( nullptr )
551
0
  , m_isFinalPass( true )
552
0
  , m_filterPoc  ( 0 )
553
0
  , m_lastPicIn  ( nullptr )
554
0
{
555
0
  m_motionErrorLumaIntX     = motionErrorLumaInt;
556
0
  m_motionErrorLumaInt8     = motionErrorLumaInt;
557
0
  m_motionErrorLumaFracX[0] = motionErrorLumaFrac6;
558
0
  m_motionErrorLumaFrac8[0] = motionErrorLumaFrac6;
559
0
  m_motionErrorLumaFracX[1] = motionErrorLumaFrac4;
560
0
  m_motionErrorLumaFrac8[1] = motionErrorLumaFrac4;
561
0
  m_applyFrac[0][0]         = applyFrac8Core_6Tap;
562
0
  m_applyFrac[0][1]         = applyFrac8Core_4Tap;
563
0
  m_applyFrac[1][0]         = applyFrac8Core_6Tap;
564
0
  m_applyFrac[1][1]         = applyFrac8Core_4Tap;
565
0
  m_applyPlanarCorrection   = applyPlanarCorrectionCore;
566
0
  m_applyBlock              = applyBlockCore;
567
0
  m_calcVar                 = calcVarCore;
568
569
0
  if( enableOpt )
570
0
  {
571
0
#if defined( TARGET_SIMD_X86 ) && ENABLE_SIMD_OPT_MCTF
572
0
    initMCTF_X86();
573
0
#endif
574
#if defined( TARGET_SIMD_ARM ) && ENABLE_SIMD_OPT_MCTF
575
    initMCTF_ARM();
576
#endif
577
0
  }
578
0
}
579
580
MCTF::~MCTF()
581
0
{
582
0
}
583
584
void MCTF::init( const VVEncCfg& encCfg, bool isFinalPass, NoMallocThreadPool* threadPool )
585
0
{
586
0
  CHECK( encCfg.m_vvencMCTF.numFrames != encCfg.m_vvencMCTF.numStrength, "should have been checked before" );
587
588
0
  m_encCfg      = &encCfg;
589
0
  m_threadPool  = threadPool;
590
0
  m_isFinalPass = isFinalPass;
591
0
  m_filterPoc   = 0;
592
0
  m_area        = Area( 0, 0, m_encCfg->m_PadSourceWidth, m_encCfg->m_PadSourceHeight );
593
594
  // TLayer (TL) dependent definition of drop frames: TL = 4,  TL = 3,  TL = 2,  TL = 1,  TL = 0
595
0
  const static int sMCTFSpeed[5] { 0, 0, ((3<<12) + (2<<9) + (2<<6) + (0<<3) + 0),   ((3<<12) + (2<<9) + (2<<6) + (0<<3) + 0),   ((3<<12) + (3<<9) + (3<<6) + (2<<3) + 2) };
596
597
0
  m_MCTFSpeedVal     = sMCTFSpeed[ m_encCfg->m_vvencMCTF.MCTFSpeed ];
598
0
  m_lowResFltSearch  = m_encCfg->m_vvencMCTF.MCTFSpeed > 0;
599
0
  m_searchPttrn      = m_encCfg->m_vvencMCTF.MCTFSpeed > 0 ? ( m_encCfg->m_vvencMCTF.MCTFSpeed >= 3 ? 2 : 1 ) : 0;
600
0
  m_mctfUnitSize     = m_encCfg->m_vvencMCTF.MCTFUnitSize;
601
0
}
602
603
// ====================================================================================================================
604
// Public member functions
605
// ====================================================================================================================
606
607
608
void MCTF::initPicture( Picture* pic )
609
0
{
610
0
  pic->getOrigBuf().extendBorderPel( MCTF_PADDING, MCTF_PADDING );
611
0
  pic->setSccFlags( m_encCfg );
612
0
}
613
614
void MCTF::processPictures( const PicList& picList, AccessUnitList& auList, PicList& doneList, PicList& freeList )
615
0
{
616
  // ensure this is only processed if necessary 
617
0
  if( picList.empty() || ( m_lastPicIn == picList.back() && ! picList.back()->isFlush ))
618
0
  {
619
0
    return;
620
0
  }
621
0
  m_lastPicIn = picList.back();
622
623
  // filter one picture (either all or up to frames to be encoded)
624
0
  if( picList.size()
625
0
      && m_filterPoc <= picList.back()->poc
626
0
      && ( m_encCfg->m_framesToBeEncoded <= 0 || m_filterPoc < m_encCfg->m_framesToBeEncoded ) )
627
0
  {
628
    // setup fifo of pictures to be filtered
629
0
    std::deque<Picture*> picFifo;
630
0
    int filterIdx = 0;
631
0
    for( auto pic : picList )
632
0
    {
633
0
      const int minPoc = m_filterPoc - VVENC_MCTF_RANGE;
634
0
      const int maxPoc = m_encCfg->m_vvencMCTF.MCTFFutureReference ? m_filterPoc + VVENC_MCTF_RANGE : m_filterPoc;
635
0
      if( pic->poc >= minPoc && pic->poc <= maxPoc )
636
0
      {
637
0
        picFifo.push_back( pic );
638
0
        if( pic->poc < m_filterPoc )
639
0
        {
640
0
          filterIdx += 1;
641
0
        }
642
0
      }
643
0
    }
644
0
    CHECK( picFifo.empty(), "MCTF: no pictures to be filtered found" );
645
0
    CHECK( filterIdx >= (int)picFifo.size(), "MCTF: picture filter error" );
646
0
    CHECK( picFifo[ filterIdx ]->poc != m_filterPoc, "MCTF: picture filter error" );
647
    // filter picture (when more than 1 picture is available for processing)
648
0
    if( picFifo.size() > 1 )
649
0
    {
650
0
      filter( picFifo, filterIdx );
651
0
    }
652
    // set picture done
653
0
    doneList.push_back( picFifo[ filterIdx ] );
654
0
  }
655
656
  // mark pictures not needed anymore
657
0
  for( auto pic : picList )
658
0
  {
659
0
    if( pic->poc > m_filterPoc - VVENC_MCTF_RANGE )
660
0
      break;
661
0
    freeList.push_back( pic );
662
0
  }
663
0
  m_filterPoc += 1;
664
0
}
665
666
void MCTF::motionEstimationMCTF(Picture* curPic, std::deque<TemporalFilterSourcePicInfo> &srcFrameInfo, const PelStorage& origBuf, PelStorage& origSubsampled2, PelStorage& origSubsampled4, PelStorage& origSubsampled8, std::vector<double> &mvErr, double &minError, bool addLevel, bool calcErr)
667
0
{
668
0
  srcFrameInfo.push_back(TemporalFilterSourcePicInfo());
669
0
  TemporalFilterSourcePicInfo& srcPic = srcFrameInfo.back();
670
671
0
  const int wInBlks = (m_area.width + m_mctfUnitSize - 1) / m_mctfUnitSize;
672
0
  const int hInBlks = (m_area.height + m_mctfUnitSize - 1) / m_mctfUnitSize;
673
674
0
  srcPic.picBuffer.createFromBuf(curPic->getOrigBuf());
675
0
  srcPic.mvs.allocate(wInBlks, hInBlks);
676
0
  srcPic.index = std::min(5, std::abs(curPic->poc - m_filterPoc) - 1);
677
678
679
0
  {
680
0
    const int width = m_area.width;
681
0
    const int height = m_area.height;
682
0
    Array2D<MotionVector> mv_0(width / (m_mctfUnitSize * 8) + 1, height / (m_mctfUnitSize * 8) + 1);
683
0
    Array2D<MotionVector> mv_1(width / (m_mctfUnitSize * 4) + 1, height / (m_mctfUnitSize * 4) + 1);
684
0
    Array2D<MotionVector> mv_2(width / (m_mctfUnitSize * 2) + 1, height / (m_mctfUnitSize * 2) + 1);
685
686
0
    PelStorage bufferSub2;
687
0
    PelStorage bufferSub4;
688
689
0
    subsampleLuma(srcPic.picBuffer, bufferSub2);
690
0
    subsampleLuma(bufferSub2, bufferSub4);
691
692
0
    if (addLevel)
693
0
    {
694
0
      Array2D<MotionVector> mv_m(width / (m_mctfUnitSize * 16) + 1, height / (m_mctfUnitSize * 16) + 1);
695
0
      PelStorage bufferSub8;
696
0
      subsampleLuma(bufferSub4, bufferSub8);
697
0
      motionEstimationLuma(mv_m, origSubsampled8, bufferSub8, 2 * m_mctfUnitSize);
698
0
      motionEstimationLuma(mv_0, origSubsampled4, bufferSub4, 2 * m_mctfUnitSize, &mv_m, 2);
699
0
    }
700
0
    else
701
0
    {
702
0
      motionEstimationLuma(mv_0, origSubsampled4, bufferSub4, 2 * m_mctfUnitSize);
703
0
    }
704
0
    motionEstimationLuma(mv_1, origSubsampled2, bufferSub2, 2 * m_mctfUnitSize, &mv_0, 2);
705
0
    motionEstimationLuma(mv_2, origBuf, srcPic.picBuffer, 2 * m_mctfUnitSize, &mv_1, 2);
706
707
0
    motionEstimationLuma(srcPic.mvs, origBuf, srcPic.picBuffer, m_mctfUnitSize, &mv_2, 1, true);
708
709
0
    if (calcErr)
710
0
    {
711
0
      double sumErr = 0.0;
712
0
      for (int y = 0; y < srcPic.mvs.h(); y++) // going over ref pic in block steps
713
0
      {
714
0
        for (int x = 0; x < srcPic.mvs.w(); x++)
715
0
        {
716
0
          sumErr += srcPic.mvs.get(x, y).error;
717
0
        }
718
0
      }
719
0
      double S = 1.0 / (srcPic.mvs.w() * srcPic.mvs.h());
720
0
      mvErr.push_back(sumErr * S);
721
0
      minError = std::min(minError, sumErr * S);
722
0
    }
723
0
  }
724
0
}
725
726
void MCTF::filter( const std::deque<Picture*>& picFifo, int filterIdx )
727
0
{
728
0
  PROFILER_SCOPE_AND_STAGE( 1, g_timeProfiler, P_MCTF );
729
730
0
  Picture* pic = picFifo[ filterIdx ];
731
732
  // first-pass temporal downsampling
733
0
  if( ! m_isFinalPass && pic->gopEntry->m_skipFirstPass )
734
0
  {
735
0
    return;
736
0
  }
737
738
0
  const int mctfIdx            = pic->gopEntry ? pic->gopEntry->m_mctfIndex : -1;
739
0
  const double overallStrength = mctfIdx >= 0 ? m_encCfg->m_vvencMCTF.MCTFStrengths[ mctfIdx ] : -1.0;
740
0
  double   meanRmsAcrossPic    = 0.0;
741
0
  uint64_t sumSRmsAcrossPic    = 0;
742
0
  uint16_t nMax = 0, maxRmsCTU = 0;
743
0
  bool  isFilterThisFrame      = mctfIdx >= 0;
744
745
0
  int dropFrames = ( m_encCfg->m_usePerceptQPA ? VVENC_MCTF_RANGE >> 1 : 0 );
746
0
  if( mctfIdx >= 0 )
747
0
  {
748
0
    const int idxTLayer = m_encCfg->m_vvencMCTF.numFrames - (mctfIdx + 1);
749
0
    const int threshold = (m_MCTFSpeedVal >> (idxTLayer * 3)) & 7;
750
751
0
    dropFrames          = std::min(VVENC_MCTF_RANGE, threshold);
752
0
    isFilterThisFrame   = threshold < VVENC_MCTF_RANGE;
753
0
  }
754
755
0
  const int filterFrames = VVENC_MCTF_RANGE - 2 - dropFrames;
756
757
0
  int dropFramesFront = std::min( std::max(                                          filterIdx - filterFrames, 0 ), dropFrames + 2 );
758
0
  int dropFramesBack  = std::min( std::max( static_cast<int>( picFifo.size() ) - 1 - filterIdx - filterFrames, 0 ), dropFrames + 2 );
759
760
0
  if( !pic->useMCTF && !pic->gopEntry->m_isStartOfGop )
761
0
  {
762
0
    isFilterThisFrame = false;
763
0
  }
764
765
0
  if ( isFilterThisFrame )
766
0
  {
767
0
    bool  useMCTFadaptation = true;
768
0
    const bool condAddLevel = useMCTFadaptation && m_area.width >= 1920;
769
0
    std::vector<double> mvErr;
770
0
    double minError = MAX_DOUBLE;
771
772
0
    const PelStorage& origBuf = pic->getOrigBuffer();
773
0
          PelStorage& fltrBuf = pic->getFilteredOrigBuffer();
774
775
    // subsample original picture so it only needs to be done once
776
0
    PelStorage origSubsampled2;
777
0
    PelStorage origSubsampled4;
778
0
    PelStorage origSubsampled8;
779
0
    subsampleLuma( origBuf,         origSubsampled2 );
780
0
    subsampleLuma( origSubsampled2, origSubsampled4 );
781
0
    if (condAddLevel)
782
0
    {
783
0
      subsampleLuma(origSubsampled4, origSubsampled8);
784
0
    }
785
786
    // determine motion vectors
787
0
    std::deque<TemporalFilterSourcePicInfo> srcFrameInfo;
788
0
    for ( int i = dropFramesFront; i < picFifo.size() - dropFramesBack; i++ )
789
0
    {
790
0
      Picture* curPic = picFifo[ i ];
791
0
      if ( curPic->poc == m_filterPoc )
792
0
      {
793
0
        continue;
794
0
      }
795
0
      motionEstimationMCTF(curPic, srcFrameInfo, origBuf, origSubsampled2, origSubsampled4, origSubsampled8 ,mvErr, minError, condAddLevel, useMCTFadaptation);
796
0
    }
797
798
0
    int lastIndexRefFr = -1;
799
0
    if ((m_encCfg->m_vvencMCTF.MCTFSpeed < 4) && (minError > 80))
800
0
    {
801
0
      useMCTFadaptation = false;
802
0
    }
803
0
    if (useMCTFadaptation && minError)
804
0
    {
805
0
      const double errThr = 0.75 * minError * srcFrameInfo.size();
806
0
      int avgErrCond = 0;
807
0
      int minErrCond = 0;
808
0
      double factErr = m_encCfg->m_vvencMCTF.MCTFSpeed < 4 ? 1.0 : 2.0 ;
809
0
      double SizeThi = m_encCfg->m_vvencMCTF.MCTFSpeed < 4  ? filterFrames + 1 : 3.0;
810
811
0
      for (const double& framMvErr : mvErr)
812
0
      {
813
0
        if (factErr * framMvErr > errThr)
814
0
        {
815
0
          avgErrCond++;
816
0
        }
817
0
        if (framMvErr > SizeThi * minError)
818
0
        {
819
0
          minErrCond++;
820
0
        }
821
0
      }
822
0
      int newFilterFrames = minErrCond ? filterFrames : (filterFrames + 2 - avgErrCond);
823
0
      if (filterFrames <= 2 && newFilterFrames > 3)   newFilterFrames = 3;
824
825
0
      for (int curIdx = filterFrames + 1; (curIdx < newFilterFrames + 1)&&((lastIndexRefFr == -1)); curIdx++)
826
0
      {
827
0
        for (int i = 0; i < picFifo.size(); i++)
828
0
        {
829
0
          Picture* curPic = picFifo[i];
830
0
          if (curIdx == std::abs(curPic->poc - m_filterPoc))
831
0
          {
832
0
            motionEstimationMCTF(curPic, srcFrameInfo, origBuf, origSubsampled2, origSubsampled4, origSubsampled8, mvErr, minError, condAddLevel, m_encCfg->m_vvencMCTF.MCTFSpeed == 4);
833
0
            if (m_encCfg->m_vvencMCTF.MCTFSpeed == 4)
834
0
            {
835
0
              int nSize = (int(srcFrameInfo.size()) & 1) + int(srcFrameInfo.size());
836
0
              const double errThrcur = 0.75 * minError * nSize;
837
0
              if (mvErr.back() > errThrcur)
838
0
              {
839
0
                lastIndexRefFr = curIdx;
840
0
                break;
841
0
              }
842
0
            }
843
0
          }
844
0
        }
845
0
      }
846
0
      if ((lastIndexRefFr != -1))
847
0
      {
848
0
        for (auto it = srcFrameInfo.begin(); it != srcFrameInfo.end(); )
849
0
        {
850
0
          if ((it->index + 1) >= lastIndexRefFr)
851
0
          {
852
0
            it = srcFrameInfo.erase(it);
853
0
          }
854
0
          else
855
0
          {
856
0
            ++it;
857
0
          }
858
0
        }
859
0
      }
860
0
    }
861
862
    // filter
863
0
    if( pic->useMCTF )
864
0
    {
865
0
      fltrBuf.create( m_encCfg->m_internChromaFormat, m_area, 0, m_padding );
866
0
      bilateralFilter( origBuf, srcFrameInfo, fltrBuf, overallStrength );
867
0
    }
868
869
0
    if( m_encCfg->m_blockImportanceMapping || m_encCfg->m_usePerceptQPA || pic->gopEntry->m_isStartOfGop )
870
0
    {
871
0
      const int ctuSize        = m_encCfg->m_bimCtuSize;
872
0
      const int widthInCtus    = ( m_area.width  + ctuSize - 1 ) / ctuSize;
873
0
      const int heightInCtus   = ( m_area.height + ctuSize - 1 ) / ctuSize;
874
0
      const int numCtu         = widthInCtus * heightInCtus;
875
0
      const int ctuBlocks      = ctuSize / m_mctfUnitSize;
876
877
0
      std::vector<double> sumError( numCtu * 2, 0 );
878
0
      std::vector<uint32_t> sumRMS( numCtu * 2, 0 ); // RMS of motion estimation error
879
0
      std::vector<uint16_t> maxRMS( numCtu * 2, 0 ); // maximum block estimation error
880
0
      std::vector<double> blkCount( numCtu * 2, 0 );
881
882
0
      int distFactor[2] = { 3,3 };
883
884
0
      for( auto& srcPic : srcFrameInfo )
885
0
      {
886
0
        if( srcPic.index >= 2 )
887
0
        {
888
0
          continue;
889
0
        }
890
891
0
        int dist = srcPic.index;
892
0
        distFactor[dist]--;
893
894
0
        for( int y = 0; y < srcPic.mvs.h(); y++ ) // going over ref pic in block steps
895
0
        {
896
0
          for( int x = 0; x < srcPic.mvs.w(); x++ )
897
0
          {
898
0
            const int ctuX    = x / ctuBlocks;
899
0
            const int ctuY    = y / ctuBlocks;
900
0
            const int ctuId   = ctuY * widthInCtus + ctuX;
901
0
            const auto& mvBlk = srcPic.mvs.get( x, y );
902
0
            sumError[dist * numCtu + ctuId] += mvBlk.error;
903
0
            sumRMS  [dist * numCtu + ctuId] += mvBlk.rmsme;
904
0
            maxRMS  [dist * numCtu + ctuId] = std::max( maxRMS[dist * numCtu + ctuId], mvBlk.rmsme );
905
0
            blkCount[dist * numCtu + ctuId] += mvBlk.overlap;
906
0
          }
907
0
        }
908
0
      }
909
910
0
      if( distFactor[0] < 3 && distFactor[1] < 3 && ( m_encCfg->m_usePerceptQPA || pic->gopEntry->m_isStartOfGop ) )
911
0
      {
912
0
        const double bd12bScale = double (m_encCfg->m_internalBitDepth[CH_L] < 12 ? 4 : 1);
913
914
0
        for( int i = 0; i < numCtu; i++ ) // start noise estimation with motion errors
915
0
        {
916
0
          const Position pos ((i % widthInCtus) * ctuSize, (i / widthInCtus) * ctuSize);
917
0
          const CompArea ctuArea  = clipArea (CompArea (COMP_Y, pic->chromaFormat, Area (pos.x, pos.y, ctuSize, ctuSize)), pic->Y());
918
0
          const unsigned avgIndex = pic->getOrigBuf (ctuArea).getAvg() >> (m_encCfg->m_internalBitDepth[CH_L] - 3); // one of 8 mean level regions
919
0
          double meanInCTU;
920
921
0
          sumRMS[i] = std::min (sumRMS[i], sumRMS[i + numCtu]);
922
0
          meanInCTU = bd12bScale * sumRMS[i] / blkCount[i];
923
0
          meanRmsAcrossPic += meanInCTU;
924
0
          if (meanInCTU < pic->m_picShared->m_minNoiseLevels[avgIndex])
925
0
          {
926
0
            pic->m_picShared->m_minNoiseLevels[avgIndex] = uint8_t (0.5 + meanInCTU); // scaled to 12 bit, see filterAndCalculateAverageActivity()
927
0
          }
928
929
0
          maxRMS[i] = std::min (maxRMS[i], maxRMS[i + numCtu]);
930
0
          maxRmsCTU = std::max (maxRmsCTU, maxRMS[i]);
931
0
          sumSRmsAcrossPic += (uint64_t) maxRMS[i] * maxRMS[i];
932
0
          if (maxRMS[i] > 0)
933
0
          {
934
0
            nMax++; // count all CTUs with non-zero motion error (excludes e.g. black borders). CTU with the motion error peak is subtracted below
935
0
          }
936
0
        }
937
0
        pic->m_picShared->m_picMotEstError = uint16_t (0.5 + meanRmsAcrossPic / numCtu);
938
939
0
        if( pic->gopEntry->m_isStartOfGop && !pic->useMCTF && m_encCfg->m_vvencMCTF.MCTF > 0 && meanRmsAcrossPic > numCtu * 27.0 )
940
0
        {
941
          // check application (re-enabling) of MCTF filter for key pictures, in case MCTF has been disabled based on SCC detection
942
0
          bool allNoiseZero = true;
943
0
          for( int i = 0; i < QPA_MAX_NOISE_LEVELS; i++ )
944
0
          {
945
0
            if( pic->m_picShared->m_minNoiseLevels[i] && pic->m_picShared->m_minNoiseLevels[i] < 255 )
946
0
            {
947
0
              allNoiseZero = false;
948
0
              break;
949
0
            }
950
0
          }
951
0
          int numZeroRMSCtus = 0;
952
0
          if( allNoiseZero )
953
0
          {
954
0
            for( int i = 0; i < numCtu; i++ )
955
0
            {
956
0
              if( sumRMS[i] == 0 )
957
0
              {
958
0
                numZeroRMSCtus += 1;
959
0
              }
960
0
            }
961
0
          }
962
0
          const bool doFilter = ( numZeroRMSCtus * 100 <= numCtu * 6 );
963
0
          if( doFilter )
964
0
          {
965
0
            fltrBuf.create( m_encCfg->m_internChromaFormat, m_area, 0, m_padding );
966
0
            bilateralFilter( origBuf, srcFrameInfo, fltrBuf, overallStrength );
967
0
          }
968
0
        }
969
0
      }
970
0
      if (m_encCfg->m_forceScc <= 0)
971
0
      {
972
0
        bool forceSCC = false;
973
0
        if (pic->gopEntry->m_isStartOfGop)
974
0
        {
975
0
          forceSCC = true;
976
0
          for (int j = 0; j < QPA_MAX_NOISE_LEVELS; j++)
977
0
          {
978
0
            if (pic->m_picShared->m_minNoiseLevels[j] < 255 && pic->m_picShared->m_minNoiseLevels[j])
979
0
            {
980
0
              forceSCC = false;
981
0
              break;
982
0
            }
983
0
          }
984
0
          if (forceSCC)
985
0
          {
986
0
            for (int s = 0; s < mvErr.size(); s++)
987
0
            {
988
0
              if (int(mvErr[s]) == 0)
989
0
              {
990
0
                forceSCC = false;
991
0
                break;
992
0
              }
993
0
            }
994
0
          }
995
0
        }
996
0
        pic->m_picShared->m_forceSCC = forceSCC;
997
0
      }
998
999
0
      if( !m_encCfg->m_blockImportanceMapping || !pic->useMCTF )
1000
0
      {
1001
0
        CHECKD( !pic->m_picShared->m_ctuBimQpOffset.empty(), "BIM disabled, but offset vector not empty!" );
1002
0
        return;
1003
0
      }
1004
1005
0
      pic->m_picShared->m_ctuBimQpOffset.resize( numCtu, 0 );
1006
1007
0
      if( distFactor[0] < 3 && distFactor[1] < 3 )
1008
0
      {
1009
0
        const double weight = std::min( 1.0, overallStrength );
1010
0
        const double factor = std::min( 1.0, sqrt((1920.0 * 1080.0) / double (m_encCfg->m_SourceWidth * m_encCfg->m_SourceHeight)) ) * ( (double) m_encCfg->m_QP / (MAX_QP + 1.0) );
1011
0
        int sumCtuQpOffsets = 0;
1012
1013
0
        meanRmsAcrossPic = (!m_encCfg->m_usePerceptQPA || !m_encCfg->m_salienceBasedOpt || maxRmsCTU == 0 || nMax < 2 ? 65535.0 : sqrt (double (sumSRmsAcrossPic - (uint64_t) maxRmsCTU * maxRmsCTU) / (nMax - 1.0)));
1014
1015
0
        for( int i = 0; i < numCtu; i++ )
1016
0
        {
1017
0
          const int avgErrD1 = ( int ) ( ( sumError[i         ] / blkCount[i         ] ) * distFactor[0] );
1018
0
          const int avgErrD2 = ( int ) ( ( sumError[i + numCtu] / blkCount[i + numCtu] ) * distFactor[1] );
1019
0
          int weightedErr = std::max( avgErrD1, avgErrD2 ) + abs( avgErrD2 - avgErrD1 ) * 3;
1020
0
          weightedErr     = ( int ) ( weightedErr * weight + ( 1 - weight ) * m_cuTreeCenter );
1021
1022
0
          int qpOffset = 0;
1023
1024
0
          if( weightedErr > m_cuTreeThresh[0] )
1025
0
          {
1026
0
            qpOffset = 2;
1027
0
          }
1028
0
          else if( weightedErr > m_cuTreeThresh[1] )
1029
0
          {
1030
0
            qpOffset = 1;
1031
0
          }
1032
0
          else if( weightedErr < m_cuTreeThresh[3] )
1033
0
          {
1034
0
            qpOffset = -2;
1035
0
          }
1036
0
          else if( weightedErr < m_cuTreeThresh[2] )
1037
0
          {
1038
0
            qpOffset = -1;
1039
0
          }
1040
1041
0
          if (meanRmsAcrossPic < maxRMS[i] * factor)
1042
0
          {
1043
0
            qpOffset += int (6.0 * log (std::max ((ctuSize > 64 ? 0.625 : 0.5) * maxRMS[i] * factor, meanRmsAcrossPic) / (maxRMS[i] * factor)) / (sqrt (weight) * log (2.0)) - 0.5);
1044
0
          }
1045
1046
0
          pic->m_picShared->m_ctuBimQpOffset[i] = qpOffset;
1047
0
          sumCtuQpOffsets += qpOffset;
1048
0
        }
1049
1050
0
        pic->m_picShared->m_picAuxQpOffset = ( sumCtuQpOffsets + ( sumCtuQpOffsets < 0 ? -(numCtu >> 1) : numCtu >> 1 ) ) / numCtu; // pic average
1051
0
        for( int i = 0; i < numCtu; i++ )
1052
0
        {
1053
0
          pic->m_picShared->m_ctuBimQpOffset[i] -= pic->m_picShared->m_picAuxQpOffset; // delta-QP relative to above average, see xGetQPForPicture
1054
0
        }
1055
0
      }
1056
0
      else
1057
0
      {
1058
0
        std::fill( pic->m_picShared->m_ctuBimQpOffset.begin(), pic->m_picShared->m_ctuBimQpOffset.end(), 0 );
1059
0
      }
1060
0
    }
1061
0
  }
1062
0
  else
1063
0
  {
1064
0
    pic->m_picShared->m_ctuBimQpOffset.resize( 0 );
1065
0
  }
1066
0
}
1067
1068
// ====================================================================================================================
1069
// Private member functions
1070
// ====================================================================================================================
1071
1072
void MCTF::subsampleLuma(const PelStorage &input, PelStorage &output, const int factor) const
1073
0
{
1074
0
  const int newWidth = input.Y().width / factor;
1075
0
  const int newHeight = input.Y().height / factor;
1076
0
  output.create(CHROMA_400, Area(0, 0, newWidth, newHeight), 0, m_padding);
1077
1078
0
  const Pel* srcRow = input.Y().buf;
1079
0
  const int srcStride = input.Y().stride;
1080
0
  Pel* dstRow = output.Y().buf;
1081
0
  const int dstStride = output.Y().stride;
1082
1083
0
  for (int y = 0; y < newHeight; y++, srcRow+=factor*srcStride, dstRow+=dstStride)
1084
0
  {
1085
0
    const Pel* inRow      = srcRow;
1086
0
    const Pel* inRowBelow = srcRow+srcStride;
1087
0
    Pel* target     = dstRow;
1088
1089
0
    for (int x = 0; x < newWidth; x++)
1090
0
    {
1091
0
      target[x] = (inRow[0] + inRowBelow[0] + inRow[1] + inRowBelow[1] + 2) >> 2;
1092
0
      inRow += 2;
1093
0
      inRowBelow += 2;
1094
0
    }
1095
0
  }
1096
0
  output.extendBorderPel(m_padding, m_padding);
1097
0
}
1098
1099
int MCTF::motionErrorLuma(const PelStorage &orig,
1100
  const PelStorage &buffer,
1101
  const int x,
1102
  const int y,
1103
  int dx,
1104
  int dy,
1105
  const int bs,
1106
  const int besterror = MAX_INT) const
1107
0
{
1108
0
  int fx = dx & 0xf;
1109
0
  int fy = dy & 0xf;
1110
1111
0
  int error = 0;// dx * 10 + dy * 10;
1112
1113
0
  CHECKD( bs & 7, "Blocksize has to be a multiple of 8!" );
1114
1115
0
  const int w = std::min<int>( bs, orig.Y().width  - x ) & ~7;
1116
0
  const int h = std::min<int>( bs, orig.Y().height - y ) & ~7;
1117
1118
0
  CHECK( !w || !h, "Incompatible sizes!" );
1119
1120
0
  if( ( fx | fy ) == 0 )
1121
0
  {
1122
0
    dx /= m_motionVectorFactor;
1123
0
    dy /= m_motionVectorFactor;
1124
1125
0
    const int  origStride = orig.Y().stride;
1126
0
    const Pel* org        = orig.Y().buf + x + y * origStride;
1127
0
    const int  buffStride = buffer.Y().stride;
1128
0
    const Pel* buf        = buffer.Y().buf + x + dx + ( y + dy ) * buffStride;
1129
1130
0
    return m_motionErrorLumaInt8( org, origStride, buf, buffStride, w, h, besterror );
1131
0
  }
1132
0
  else if( m_lowResFltSearch )
1133
0
  {
1134
0
    dx >>= 4;
1135
0
    dy >>= 4;
1136
1137
0
    const int  origStride = orig.Y().stride;
1138
0
    const Pel* org        = orig.Y().buf + x + y * origStride;
1139
0
    const int  buffStride = buffer.Y().stride;
1140
0
    const Pel* buf        = buffer.Y().buf + x + dx + ( y + dy ) * buffStride;
1141
1142
0
    const int16_t *xFilter = m_interpolationFilter4[fx];
1143
0
    const int16_t *yFilter = m_interpolationFilter4[fy];
1144
1145
0
    return m_motionErrorLumaFrac8[1]( org, origStride, buf, buffStride, w, h, xFilter, yFilter, m_encCfg->m_internalBitDepth[CH_L], besterror );
1146
0
  }
1147
0
  else
1148
0
  {
1149
0
    dx >>= 4;
1150
0
    dy >>= 4;
1151
1152
0
    const int  origStride = orig.Y().stride;
1153
0
    const Pel* org        = orig.Y().buf + x + y * origStride;
1154
0
    const int  buffStride = buffer.Y().stride;
1155
0
    const Pel* buf        = buffer.Y().buf + x + dx + ( y + dy ) * buffStride;
1156
1157
0
    const int16_t *xFilter = m_interpolationFilter8[fx];
1158
0
    const int16_t *yFilter = m_interpolationFilter8[fy];
1159
1160
0
    return m_motionErrorLumaFrac8[0]( org, origStride, buf, buffStride, w,h, xFilter, yFilter, m_encCfg->m_internalBitDepth[CH_L], besterror );
1161
0
  }
1162
1163
0
  return error;
1164
0
}
1165
1166
bool MCTF::estimateLumaLn( std::atomic_int& blockX_, std::atomic_int* prevLineX, Array2D<MotionVector> &mvs, const PelStorage &orig, const PelStorage &buffer, const int blockSize,
1167
  const Array2D<MotionVector> *previous, const int factor, const bool doubleRes, int blockY, int bitDepth ) const
1168
0
{
1169
0
  PROFILER_SCOPE_AND_STAGE( 1, _TPROF, P_MCTF_SEARCH );
1170
1171
0
  const int stepSize = blockSize;
1172
0
  const int origWidth  = orig.Y().width;
1173
1174
0
  for( int blockX = blockX_.load(); blockX + 8 <= origWidth; blockX += stepSize, blockX_.store( blockX) )
1175
0
  {
1176
0
    if( prevLineX && blockX >= prevLineX->load() ) return false;
1177
1178
0
    int range = doubleRes ? 0 : ( m_searchPttrn == 2 ? 3 : 5 );
1179
0
    const int stepSize = blockSize;
1180
1181
0
    MotionVector best;
1182
1183
0
    if (previous == NULL)
1184
0
    {
1185
0
      range = 8;
1186
0
    }
1187
0
    else
1188
0
    {
1189
0
      for( int py = -1; py <= 1; py++ )
1190
0
      {
1191
0
        int testy = blockY / (2 * blockSize) + py;
1192
0
        if( (testy >= 0) && (testy < previous->h()) )
1193
0
        {
1194
0
          for (int px = -1; px <= 1; px++)
1195
0
          {
1196
0
            int testx = blockX / (2 * blockSize) + px;
1197
0
            if ((testx >= 0) && (testx < previous->w()) )
1198
0
            {
1199
0
              const MotionVector& old = previous->get(testx, testy);
1200
0
              int error = motionErrorLuma(orig, buffer, blockX, blockY, old.x * factor, old.y * factor, blockSize, best.error);
1201
0
              if (error < best.error)
1202
0
              {
1203
0
                best.set(old.x * factor, old.y * factor, error);
1204
0
              }
1205
0
            }
1206
0
          }
1207
0
        }
1208
0
      }
1209
1210
0
      int error = motionErrorLuma( orig, buffer, blockX, blockY, 0, 0, blockSize, best.error );
1211
0
      if( error < best.error )
1212
0
      {
1213
0
        best.set( 0, 0, error );
1214
0
      }
1215
0
    }
1216
0
    MotionVector prevBest = best;
1217
0
    const int d = previous == NULL && m_searchPttrn == 2 ? 2 : 1;
1218
0
    for( int y2 = prevBest.y / m_motionVectorFactor - range; y2 <= prevBest.y / m_motionVectorFactor + range; y2 += d )
1219
0
    {
1220
0
      for( int x2 = prevBest.x / m_motionVectorFactor - range; x2 <= prevBest.x / m_motionVectorFactor + range; x2 += d )
1221
0
      {
1222
0
        int error = motionErrorLuma( orig, buffer, blockX, blockY, x2 * m_motionVectorFactor, y2 * m_motionVectorFactor, blockSize, best.error );
1223
0
        if( error < best.error )
1224
0
        {
1225
0
          best.set( x2 * m_motionVectorFactor, y2 * m_motionVectorFactor, error );
1226
0
        }
1227
0
      }
1228
0
    }
1229
0
    if (doubleRes)
1230
0
    { // merge into one loop, probably with precision array (here [12, 3] or maybe [4, 1]) with setable number of iterations
1231
0
      PROFILER_SCOPE_AND_STAGE( 1, _TPROF, P_MCTF_SEARCH_SUBPEL );
1232
1233
0
      prevBest = best;
1234
0
      int doubleRange = m_searchPttrn ? 6 : 12;
1235
0
      const int d1 = m_searchPttrn == 2 ? 6 : 4;
1236
1237
      // first iteration, 49 - 1 or 16 checks or 9 - 1 checks
1238
0
      for( int y2 = -doubleRange; y2 <= doubleRange; y2 += d1 )
1239
0
      {
1240
0
        for( int x2 = -doubleRange; x2 <= doubleRange; x2 += d1 )
1241
0
        {
1242
0
          if( x2 || y2 )
1243
0
          {
1244
0
            int error = motionErrorLuma( orig, buffer, blockX, blockY, prevBest.x + x2, prevBest.y + y2, blockSize, best.error );
1245
0
            if( error < best.error )
1246
0
            {
1247
0
              best.set( prevBest.x + x2, prevBest.y + y2, error );
1248
0
            }
1249
0
          }
1250
0
        }
1251
0
      }
1252
1253
0
      prevBest = best;
1254
0
      doubleRange = 2;
1255
      // second iteration, 9 - 1 checks
1256
0
      for( int y2 = -doubleRange; y2 <= doubleRange; y2 += 2 )
1257
0
      {
1258
0
        for( int x2 = -doubleRange; x2 <= doubleRange; x2 += 2 )
1259
0
        {
1260
0
          if( x2 || y2 )
1261
0
          {
1262
0
            int error = motionErrorLuma( orig, buffer, blockX, blockY, prevBest.x + x2, prevBest.y + y2, blockSize, best.error );
1263
0
            if( error < best.error )
1264
0
            {
1265
0
              best.set( prevBest.x + x2, prevBest.y + y2, error );
1266
0
            }
1267
0
          }
1268
0
        }
1269
0
      }
1270
1271
0
      prevBest = best;
1272
0
      doubleRange = 1;
1273
      // third iteration, 9 - 1 checks
1274
0
      for (int y2 = -doubleRange; y2 <= doubleRange; y2++)
1275
0
      {
1276
0
        for (int x2 = -doubleRange; x2 <= doubleRange; x2++)
1277
0
        {
1278
0
          if( x2 || y2 )
1279
0
          {
1280
0
            int error = motionErrorLuma( orig, buffer, blockX, blockY, prevBest.x + x2, prevBest.y + y2, blockSize, best.error );
1281
0
            if( error < best.error )
1282
0
            {
1283
0
              best.set( prevBest.x + x2, prevBest.y + y2, error );
1284
0
            }
1285
0
          }
1286
0
        }
1287
0
      }
1288
0
    } 
1289
0
    if( blockY > 0 )
1290
0
    {
1291
0
      MotionVector aboveMV = mvs.get( blockX / stepSize, ( blockY - stepSize ) / stepSize );
1292
0
      int error = motionErrorLuma( orig, buffer, blockX, blockY, aboveMV.x, aboveMV.y, blockSize, best.error );
1293
0
      if( error < best.error )
1294
0
      {
1295
0
        best.set( aboveMV.x, aboveMV.y, error );
1296
0
      }
1297
0
    }
1298
0
    if( blockX > 0 )
1299
0
    {
1300
0
      MotionVector leftMV = mvs.get( ( blockX - stepSize ) / stepSize, blockY / stepSize );
1301
0
      int error = motionErrorLuma( orig, buffer, blockX, blockY, leftMV.x, leftMV.y, blockSize, best.error );
1302
0
      if( error < best.error )
1303
0
      {
1304
0
        best.set( leftMV.x, leftMV.y, error );
1305
0
      }
1306
0
    }
1307
1308
0
    if( doubleRes )
1309
0
    {
1310
0
      const int w = std::min<int>( blockSize, orig.Y().width  - blockX ) & ~7;
1311
0
      const int h = std::min<int>( blockSize, orig.Y().height - blockY ) & ~7;
1312
1313
0
      CHECKD(bitDepth>10, "unsupported internal bit depth (also in calcVar)" );
1314
0
      const double bdScale = double(1<<(2*(10-bitDepth)));
1315
0
      const double dvar = m_calcVar( orig.Y().bufAt( blockX, blockY ), orig.Y().stride, w, h ) * bdScale;
1316
0
      const double mse  = best.error * bdScale / double( w * h );
1317
1318
0
      best.error   = ( int ) ( 20 * ( ( best.error*bdScale + 5.0 ) / ( dvar + 5.0 ) ) + mse / 50.0 );
1319
0
      best.rmsme   = uint16_t( 0.5 + sqrt( mse ) );
1320
0
      best.overlap = ( ( double ) w * h ) / ( m_mctfUnitSize * m_mctfUnitSize );
1321
0
    }
1322
1323
0
    mvs.get(blockX / stepSize, blockY / stepSize) = best;
1324
0
  }
1325
1326
0
  return true;
1327
0
}
1328
1329
void MCTF::motionEstimationLuma(Array2D<MotionVector> &mvs, const PelStorage &orig, const PelStorage &buffer, const int blockSize, const Array2D<MotionVector> *previous, const int factor, const bool doubleRes) const
1330
0
{
1331
0
  const int stepSize = blockSize;
1332
0
  const int origHeight = orig.Y().height;
1333
0
  const int bitDepth = m_encCfg->m_internalBitDepth[CH_L];
1334
1335
0
  if( m_threadPool )
1336
0
  {
1337
0
    struct EstParams
1338
0
    {
1339
0
      std::atomic_int blockX;
1340
0
      std::atomic_int* prevLineX;
1341
0
      Array2D<MotionVector> *mvs;
1342
0
      const PelStorage* orig; 
1343
0
      const PelStorage* buffer; 
1344
0
      const Array2D<MotionVector> *previous; 
1345
0
      int   blockSize; 
1346
0
      int   factor; 
1347
0
      bool  doubleRes;
1348
0
      int   blockY;
1349
0
      int   bitDepth;
1350
0
      const MCTF* mctf;
1351
0
    };
1352
1353
0
    std::vector<EstParams> EstParamsArray( origHeight/stepSize + 1 );
1354
1355
0
    WaitCounter taskCounter;
1356
1357
0
    for( int n = 0, blockY = 0; blockY + 8 <= origHeight; blockY += stepSize, n++ )
1358
0
    {
1359
0
      static auto task = []( int tId, EstParams* params)
1360
0
      {
1361
0
        ITT_TASKSTART( itt_domain_MCTF_est, itt_handle_est );
1362
1363
0
        bool ret = params->mctf->estimateLumaLn( params->blockX, params->prevLineX, *params->mvs, *params->orig, *params->buffer, params->blockSize, params->previous, params->factor, params->doubleRes, params->blockY, params->bitDepth );
1364
1365
0
        ITT_TASKEND( itt_domain_MCTF_est, itt_handle_est );
1366
0
        return ret;
1367
0
      };
1368
1369
0
      EstParams& cEstParams = EstParamsArray[n];
1370
0
      cEstParams.blockX = 0;
1371
0
      cEstParams.prevLineX = n == 0 ? nullptr : &EstParamsArray[n-1].blockX;
1372
0
      cEstParams.mvs = &mvs; 
1373
0
      cEstParams.orig = &orig;
1374
0
      cEstParams.buffer = &buffer; 
1375
0
      cEstParams.previous = previous;
1376
0
      cEstParams.blockSize = blockSize; 
1377
0
      cEstParams.factor = factor;
1378
0
      cEstParams.doubleRes = doubleRes;
1379
0
      cEstParams.mctf = this;
1380
0
      cEstParams.blockY = blockY;
1381
0
      cEstParams.bitDepth = bitDepth;
1382
1383
0
      m_threadPool->addBarrierTask<EstParams>( task, &cEstParams, &taskCounter);
1384
0
    }
1385
0
    taskCounter.wait();
1386
0
  }
1387
0
  else
1388
0
  {
1389
0
    for( int blockY = 0; blockY + 8 <= origHeight; blockY += stepSize )
1390
0
    {
1391
0
      std::atomic_int blockX( 0 ), prevBlockX( orig.Y().width + stepSize );
1392
0
      estimateLumaLn( blockX, blockY ? &prevBlockX : nullptr, mvs, orig, buffer, blockSize, previous, factor, doubleRes, blockY, bitDepth );
1393
0
    }
1394
1395
0
  }
1396
0
}
1397
1398
void MCTF::xFinalizeBlkLine( const PelStorage &orgPic, std::deque<TemporalFilterSourcePicInfo> &srcFrameInfo, PelStorage &newOrgPic, int yStart, const double sigmaSqCh[MAX_NUM_CH], double overallStrength ) const
1399
0
{
1400
0
  PROFILER_SCOPE_AND_STAGE( 1, _TPROF, P_MCTF_APPLY );
1401
1402
0
  const int numRefs = int(srcFrameInfo.size());
1403
1404
0
  int refStrengthRow = m_encCfg->m_picReordering ? 0 : 1;
1405
1406
  // max 64*64*8*2 = 2^(6+6+3+1)=2^16=64kbps, usually 16*16*8*2=2^(4+4+3+1)=4kbps, and allow for overread of one line
1407
0
  Pel* dstBufs = ( Pel* ) alloca( sizeof( Pel ) * ( numRefs * m_mctfUnitSize * m_mctfUnitSize + m_mctfUnitSize ) );
1408
1409
0
  for( int c = 0; c < getNumberValidComponents( m_encCfg->m_internChromaFormat ); c++ )
1410
0
  {
1411
0
    const ComponentID compID = ( ComponentID ) c;
1412
0
    const int height    = orgPic.bufs[c].height;
1413
0
    const int width     = orgPic.bufs[c].width;
1414
1415
0
    const double sigmaSq = sigmaSqCh[ toChannelType( compID) ];
1416
0
    const double weightScaling = overallStrength * ( isChroma( compID ) ? m_chromaFactor : 0.4 );
1417
0
    const ClpRng clpRng{ m_encCfg->m_internalBitDepth[toChannelType( compID )] };
1418
1419
0
    const int blkSizeY = m_mctfUnitSize >> getComponentScaleY( compID, m_encCfg->m_internChromaFormat );
1420
0
    const int blkSizeX = m_mctfUnitSize >> getComponentScaleX( compID, m_encCfg->m_internChromaFormat );
1421
0
    const int yOut     = yStart         >> getComponentScaleY( compID, m_encCfg->m_internChromaFormat );
1422
1423
0
    for( int by = yOut, yBlkAddr = yStart / m_mctfUnitSize; by < std::min( yOut + blkSizeY, height ); by += blkSizeY, yBlkAddr++ )
1424
0
    {
1425
0
      const int h = std::min( blkSizeY, height - by );
1426
1427
0
      for( int bx = 0, xBlkAddr = 0; bx < width; bx += blkSizeX, xBlkAddr++ )
1428
0
      {
1429
0
        const int w = std::min( blkSizeX, width - bx );
1430
1431
0
        const int csx = getComponentScaleX( compID, m_encCfg->m_internChromaFormat );
1432
0
        const int csy = getComponentScaleY( compID, m_encCfg->m_internChromaFormat );
1433
1434
0
        const Pel* correctedPics[2 * VVENC_MCTF_RANGE] = { nullptr, };
1435
0
              Pel* currDst = dstBufs;
1436
0
        int verror     [2 * VVENC_MCTF_RANGE] = { 0,   };
1437
0
        double refStr  [2 * VVENC_MCTF_RANGE] = { 0.0, };
1438
1439
0
        for( int i = 0; i < numRefs; i++, currDst += w * h )
1440
0
        {
1441
0
          const Pel* srcImage = srcFrameInfo[i].picBuffer.bufs[compID].buf;
1442
0
          const int srcStride = srcFrameInfo[i].picBuffer.bufs[compID].stride;
1443
1444
0
                Pel* dst      = currDst;
1445
0
          const int dstStride = w;
1446
0
          correctedPics[i]    = dst;
1447
1448
0
          const MotionVector& mv = srcFrameInfo[i].mvs.get( xBlkAddr, yBlkAddr);
1449
0
          const int dx   = mv.x >> csx;
1450
0
          const int dy   = mv.y >> csy;
1451
0
          const int xInt = mv.x >> ( 4 + csx );
1452
0
          const int yInt = mv.y >> ( 4 + csy );
1453
1454
0
          const int yOffset = by + yInt;
1455
0
          const int xOffset = bx + xInt;
1456
0
          const Pel* src = srcImage + yOffset * srcStride + xOffset;
1457
1458
0
          if( m_lowResFltApply ) // || isChroma( compID )
1459
0
          {
1460
0
            const int16_t* xFilter = m_interpolationFilter4[dx & 0xf];
1461
0
            const int16_t* yFilter = m_interpolationFilter4[dy & 0xf]; // will add 6 bit.
1462
1463
0
            m_applyFrac[toChannelType( compID )][1]( src, srcStride, dst, dstStride, w, h, xFilter, yFilter, m_encCfg->m_internalBitDepth[toChannelType( compID )] );
1464
0
          }
1465
0
          else
1466
0
          {
1467
0
            const int16_t* xFilter = m_interpolationFilter8[dx & 0xf];
1468
0
            const int16_t* yFilter = m_interpolationFilter8[dy & 0xf]; // will add 6 bit.
1469
1470
0
            m_applyFrac[toChannelType( compID )][0]( src, srcStride, dst, dstStride, w, h, xFilter, yFilter, m_encCfg->m_internalBitDepth[toChannelType( compID )] );
1471
0
          }
1472
1473
0
          if( mv.rmsme > 0 && m_encCfg->m_QP <= 32 && w == h && w <= 32 ) // "deblocking"
1474
0
          {
1475
0
            m_applyPlanarCorrection( orgPic.bufs[c].bufAt( bx, by ), orgPic.bufs[c].stride, dst, dstStride, w, h, clpRng, mv.rmsme );
1476
0
          }
1477
1478
0
          verror[i] = mv.error;
1479
0
          refStr[i] = m_refStrengths[refStrengthRow][srcFrameInfo[i].index];
1480
0
        }
1481
1482
0
        m_applyBlock( orgPic.bufs[c], newOrgPic.bufs[c], CompArea( compID, orgPic.chromaFormat, Area( bx, by, w, h ) ), clpRng, correctedPics, numRefs, verror, refStr, weightScaling, sigmaSq );
1483
0
      }
1484
0
    }
1485
0
  }
1486
0
}
1487
1488
void MCTF::bilateralFilter(const PelStorage& orgPic, std::deque<TemporalFilterSourcePicInfo>& srcFrameInfo, PelStorage& newOrgPic, double overallStrength) const
1489
0
{
1490
0
  const double lumaSigmaSq = m_sigmaMultiplier * ( 128.0 + 3.0 / 256.0 * m_encCfg->m_QP * m_encCfg->m_QP * m_encCfg->m_QP );
1491
0
  const double chromaSigmaSq = 30 * 30;
1492
1493
0
  double sigmaSqCh[MAX_NUM_CH];
1494
0
  for(int c=0; c< getNumberValidChannels(m_encCfg->m_internChromaFormat); c++)
1495
0
  {
1496
0
    const ChannelType ch=(ChannelType)c;
1497
0
    const Pel maxSampleValue = (1<<m_encCfg->m_internalBitDepth[ch])-1;
1498
0
    const double bitDepthDiffWeighting=1024.0 / (maxSampleValue+1);
1499
0
    sigmaSqCh[ch] = ( isChroma( ch ) ? chromaSigmaSq : lumaSigmaSq ) / ( bitDepthDiffWeighting * bitDepthDiffWeighting );
1500
0
  }
1501
1502
0
  if( m_threadPool )
1503
0
  {
1504
0
    struct FltParams
1505
0
    {
1506
0
      const PelStorage *orgPic; 
1507
0
      std::deque<TemporalFilterSourcePicInfo> *srcFrameInfo; 
1508
0
      PelStorage *newOrgPic;
1509
0
      const double *sigmaSqCh;
1510
0
      double overallStrength;
1511
0
      const MCTF* mctf;
1512
0
      int yStart; 
1513
0
    };
1514
1515
0
    std::vector<FltParams> FltParamsArray( orgPic.Y().height/ m_mctfUnitSize + 1 );
1516
1517
0
    WaitCounter taskCounter;
1518
1519
0
    for (int n = 0, yStart = 0; yStart < orgPic.Y().height; yStart += m_mctfUnitSize, n++)
1520
0
    {
1521
0
      static auto task = []( int tId, FltParams* params)
1522
0
      {
1523
0
        ITT_TASKSTART( itt_domain_MCTF_flt, itt_handle_flt );
1524
1525
0
        params->mctf->xFinalizeBlkLine( *params->orgPic, *params->srcFrameInfo, *params->newOrgPic, params->yStart, params->sigmaSqCh, params->overallStrength );
1526
1527
0
        ITT_TASKEND( itt_domain_MCTF_flt, itt_handle_flt );
1528
0
        return true;
1529
0
      };
1530
1531
0
      FltParams& cFltParams = FltParamsArray[n];
1532
0
      cFltParams.orgPic = &orgPic; 
1533
0
      cFltParams.srcFrameInfo = &srcFrameInfo; 
1534
0
      cFltParams.newOrgPic = &newOrgPic;
1535
0
      cFltParams.sigmaSqCh = sigmaSqCh;
1536
0
      cFltParams.overallStrength = overallStrength;
1537
0
      cFltParams.mctf = this;
1538
0
      cFltParams.yStart = yStart;
1539
1540
0
      m_threadPool->addBarrierTask<FltParams>( task, &cFltParams, &taskCounter);
1541
0
    }
1542
0
    taskCounter.wait();
1543
0
  }
1544
0
  else
1545
0
  {
1546
0
    for (int yStart = 0; yStart < orgPic.Y().height; yStart += m_mctfUnitSize )
1547
0
    {
1548
0
      xFinalizeBlkLine( orgPic, srcFrameInfo, newOrgPic, yStart, sigmaSqCh, overallStrength );
1549
0
    }
1550
0
  }
1551
0
}
1552
1553
} // namespace vvenc
1554
1555
//! \}