Coverage Report

Created: 2026-04-01 07:49

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/vvenc/source/Lib/CommonLib/Buffer.cpp
Line
Count
Source
1
/* -----------------------------------------------------------------------------
2
The copyright in this software is being made available under the Clear BSD
3
License, included below. No patent rights, trademark rights and/or 
4
other Intellectual Property Rights other than the copyrights concerning 
5
the Software are granted under this license.
6
7
The Clear BSD License
8
9
Copyright (c) 2019-2026, Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. & The VVenC Authors.
10
All rights reserved.
11
12
Redistribution and use in source and binary forms, with or without modification,
13
are permitted (subject to the limitations in the disclaimer below) provided that
14
the following conditions are met:
15
16
     * Redistributions of source code must retain the above copyright notice,
17
     this list of conditions and the following disclaimer.
18
19
     * Redistributions in binary form must reproduce the above copyright
20
     notice, this list of conditions and the following disclaimer in the
21
     documentation and/or other materials provided with the distribution.
22
23
     * Neither the name of the copyright holder nor the names of its
24
     contributors may be used to endorse or promote products derived from this
25
     software without specific prior written permission.
26
27
NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
28
THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
29
CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
31
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
32
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
33
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
34
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
35
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
36
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38
POSSIBILITY OF SUCH DAMAGE.
39
40
41
------------------------------------------------------------------------------------------- */
42
43
44
/** \file     Buffer.cpp
45
 *  \brief    Low-overhead class describing 2D memory layout
46
 */
47
48
#define DONT_UNDEF_SIZE_AWARE_PER_EL_OP
49
50
// unit needs to come first due to a forward declaration
51
#include "Unit.h"
52
#include "Slice.h"
53
#include "InterpolationFilter.h"
54
55
//! \ingroup CommonLib
56
//! \{
57
58
namespace vvenc {
59
60
void weightCiipCore( Pel* res, const Pel* src, const int numSamples, int numIntra )
61
0
{
62
0
  if( numIntra == 1 )
63
0
  {
64
0
    for (int n = 0; n < numSamples; n+=2)
65
0
    {
66
0
      res[n  ] = (res[n  ] + src[n  ] + 1) >> 1;
67
0
      res[n+1] = (res[n+1] + src[n+1] + 1) >> 1;
68
0
    }
69
0
  }
70
0
  else
71
0
  {
72
0
    const Pel* scale   = numIntra ? src : res;
73
0
    const Pel* unscale = numIntra ? res : src;
74
75
0
    for (int n = 0; n < numSamples; n+=2)
76
0
    {
77
0
      res[n  ] = (unscale[n  ] + 3*scale[n  ] + 2) >> 2;
78
0
      res[n+1] = (unscale[n+1] + 3*scale[n+1] + 2) >> 2;
79
0
    }
80
0
  }
81
0
}
82
83
template< unsigned inputSize, unsigned outputSize >
84
void mipMatrixMulCore( Pel* res, const Pel* input, const uint8_t* weight, const int maxVal, const int inputOffset, bool transpose )
85
0
{
86
0
  Pel buffer[ outputSize*outputSize];
87
88
0
  int sum = 0;
89
0
  for( int i = 0; i < inputSize; i++ )
90
0
  {
91
0
    sum += input[i];
92
0
  }
93
0
  const int offset = (1 << (MIP_SHIFT_MATRIX - 1)) - MIP_OFFSET_MATRIX * sum + (inputOffset << MIP_SHIFT_MATRIX);
94
0
  CHECK( inputSize != 4 * (inputSize >> 2), "Error, input size not divisible by four" );
95
96
0
  Pel* mat = transpose ? buffer : res;
97
0
  unsigned posRes = 0;
98
0
  for( unsigned n = 0; n < outputSize*outputSize; n++ )
99
0
  {
100
0
    int tmp0 = input[0] * weight[0];
101
0
    int tmp1 = input[1] * weight[1];
102
0
    int tmp2 = input[2] * weight[2];
103
0
    int tmp3 = input[3] * weight[3];
104
0
    if( 8 == inputSize )
105
0
    {
106
0
      tmp0 += input[4] * weight[4];
107
0
      tmp1 += input[5] * weight[5];
108
0
      tmp2 += input[6] * weight[6];
109
0
      tmp3 += input[7] * weight[7];
110
0
    }
111
0
    mat[posRes++] = Clip3<int>( 0, maxVal, ((tmp0 + tmp1 + tmp2 + tmp3 + offset) >> MIP_SHIFT_MATRIX) );
112
113
0
    weight += inputSize;
114
0
  }
115
116
0
  if( transpose )
117
0
  {
118
0
    for( int j = 0; j < outputSize; j++ )
119
0
    {
120
0
      for( int i = 0; i < outputSize; i++ )
121
0
      {
122
0
        res[j * outputSize + i] = buffer[i * outputSize + j];
123
0
      }
124
0
    }
125
0
  }
126
0
}
Unexecuted instantiation: void vvenc::mipMatrixMulCore<4u, 4u>(short*, short const*, unsigned char const*, int, int, bool)
Unexecuted instantiation: void vvenc::mipMatrixMulCore<8u, 4u>(short*, short const*, unsigned char const*, int, int, bool)
Unexecuted instantiation: void vvenc::mipMatrixMulCore<8u, 8u>(short*, short const*, unsigned char const*, int, int, bool)
127
128
template< typename T >
129
void addAvgCore( const T* src1, int src1Stride, const T* src2, int src2Stride, T* dest, int dstStride, int width, int height, unsigned rshift, int offset, const ClpRng& clpRng )
130
0
{
131
0
#define ADD_AVG_CORE_OP( ADDR ) dest[ADDR] = ClipPel( rightShiftU( ( src1[ADDR] + src2[ADDR] + offset ), rshift ), clpRng )
132
0
#define ADD_AVG_CORE_INC    \
133
0
  src1 += src1Stride;       \
134
0
  src2 += src2Stride;       \
135
0
  dest +=  dstStride;       \
136
0
137
0
  SIZE_AWARE_PER_EL_OP( ADD_AVG_CORE_OP, ADD_AVG_CORE_INC );
138
139
0
#undef ADD_AVG_CORE_OP
140
0
#undef ADD_AVG_CORE_INC
141
0
}
142
143
template<typename T>
144
void addWeightedAvgCore( const T* src1, int src1Stride, const T* src2, int src2Stride, T* dest, int destStride, int width, int height, unsigned rshift, int offset, int w0, int w1, const ClpRng& clpRng )
145
0
{
146
0
#define ADD_WGHT_AVG_OP( ADDR ) dest[ADDR] = ClipPel( rightShiftU( ( src1[ADDR]*w0 + src2[ADDR]*w1 + offset ), rshift ), clpRng )
147
0
#define ADD_WGHT_AVG_INC     \
148
0
    src1 += src1Stride; \
149
0
    src2 += src2Stride; \
150
0
    dest += destStride; \
151
0
152
0
  SIZE_AWARE_PER_EL_OP( ADD_WGHT_AVG_OP, ADD_WGHT_AVG_INC );
153
154
0
#undef ADD_WGHT_AVG_OP
155
0
#undef ADD_WGHT_AVG_INC
156
0
}
157
158
template<typename T>
159
void subsCore( const T* src0, int src0Stride, const T* src1, int src1Stride, T* dest, int destStride, int width, int height )
160
0
{
161
0
#define SUBS_INC                \
162
0
  dest += destStride;  \
163
0
  src0 += src0Stride;  \
164
0
  src1 += src1Stride;  \
165
0
166
0
#define SUBS_OP( ADDR ) dest[ADDR] = src0[ADDR] - src1[ADDR]
167
168
0
  SIZE_AWARE_PER_EL_OP( SUBS_OP, SUBS_INC );
169
170
0
#undef SUBS_OP
171
0
#undef SUBS_INC
172
0
}
173
174
void removeHighFreq(int16_t* dst, int dstStride, const int16_t* src, int srcStride, int width, int height)
175
0
{
176
0
#define REM_HF_INC  \
177
0
 src += srcStride; \
178
0
 dst += dstStride; \
179
0
180
0
#define REM_HF_OP( ADDR )      dst[ADDR] =             2 * dst[ADDR] - src[ADDR]
181
182
0
 SIZE_AWARE_PER_EL_OP(REM_HF_OP, REM_HF_INC);
183
184
0
#undef REM_HF_INC
185
0
#undef REM_HF_OP
186
0
#undef REM_HF_OP_CLIP
187
0
}
188
189
template<typename T>
190
void reconstructCore( const T* src1, int src1Stride, const T* src2, int src2Stride, T* dest, int dstStride, int width, int height, const ClpRng& clpRng )
191
0
{
192
0
#define RECO_CORE_OP( ADDR ) dest[ADDR] = ClipPel( src1[ADDR] + src2[ADDR], clpRng )
193
0
#define RECO_CORE_INC     \
194
0
  src1 += src1Stride;     \
195
0
  src2 += src2Stride;     \
196
0
  dest +=  dstStride;     \
197
0
198
0
  SIZE_AWARE_PER_EL_OP( RECO_CORE_OP, RECO_CORE_INC );
199
200
0
#undef RECO_CORE_OP
201
0
#undef RECO_CORE_INC
202
0
}
203
204
template<typename T>
205
void recoCore( const T* src1, const T* src2, T* dest, int numSamples, const ClpRng& clpRng )
206
0
{
207
0
  for( int n = 0; n < numSamples; n+=2)
208
0
  {
209
0
    dest[n]   = ClipPel( src1[n]   + src2[n], clpRng );
210
0
    dest[n+1] = ClipPel( src1[n+1] + src2[n+1], clpRng );
211
0
  }
212
0
}
213
214
template<typename T>
215
void copyClipCore( const T* src, Pel* dst, int numSamples, const ClpRng& clpRng )
216
0
{
217
0
  for( int n = 0; n < numSamples; n+=2)
218
0
  {
219
0
    dst[n]   = ClipPel( src[n]   , clpRng );
220
0
    dst[n+1] = ClipPel( src[n+1] , clpRng );
221
0
  }
222
0
}
223
224
template< typename T >
225
void addAvgCore( const T* src1, const T* src2, T* dest, int numSamples, unsigned rshift, int offset, const ClpRng& clpRng )
226
0
{
227
0
  for( int n = 0; n < numSamples; n+=2)
228
0
  {
229
0
    dest[n]   = ClipPel( rightShiftU( ( src1[n]   + src2[n]   + offset ), rshift ), clpRng );
230
0
    dest[n+1] = ClipPel( rightShiftU( ( src1[n+1] + src2[n+1] + offset ), rshift ), clpRng );
231
0
  }
232
0
}
233
234
template< typename T >
235
void roundGeoCore( const T* src, T* dest, const int numSamples, unsigned rshift, int offset, const ClpRng &clpRng)
236
0
{
237
0
  for( int i = 0; i < numSamples; i+=2)
238
0
  {
239
0
    dest[i]   = ClipPel(rightShiftU(src[i  ] + offset, rshift), clpRng);
240
0
    dest[i+1] = ClipPel(rightShiftU(src[i+1] + offset, rshift), clpRng);
241
0
  }
242
0
}
243
244
template<typename T>
245
void linTfCore( const T* src, int srcStride, Pel* dst, int dstStride, int width, int height, int scale, unsigned shift, int offset, const ClpRng& clpRng, bool bClip )
246
0
{
247
0
#define LINTF_CORE_INC  \
248
0
  src += srcStride;     \
249
0
  dst += dstStride;     \
250
0
251
0
  if( bClip )
252
0
  {
253
0
#define LINTF_CORE_OP( ADDR ) dst[ADDR] = ( Pel ) ClipPel( rightShiftU( scale * src[ADDR], shift ) + offset, clpRng )
254
255
0
  SIZE_AWARE_PER_EL_OP( LINTF_CORE_OP, LINTF_CORE_INC );
256
257
0
#undef LINTF_CORE_OP
258
0
  }
259
0
  else
260
0
  {
261
0
#define LINTF_CORE_OP( ADDR ) dst[ADDR] = ( Pel ) ( rightShiftU( scale * src[ADDR], shift ) + offset )
262
263
0
  SIZE_AWARE_PER_EL_OP( LINTF_CORE_OP, LINTF_CORE_INC );
264
265
0
#undef LINTF_CORE_OP
266
0
  }
267
0
#undef LINTF_CORE_INC
268
0
}
269
270
template<typename T, int N>
271
void transposeNxNCore( const Pel* src, int srcStride, Pel* dst, int dstStride )
272
0
{
273
0
  for( int i = 0; i < N; i++ )
274
0
  {
275
0
    for( int j = 0; j < N; j++ )
276
0
    {
277
0
      dst[j * dstStride] = src[j];
278
0
    }
279
280
0
    dst++;
281
0
    src += srcStride;
282
0
  }
283
0
}
Unexecuted instantiation: void vvenc::transposeNxNCore<short, 4>(short const*, int, short*, int)
Unexecuted instantiation: void vvenc::transposeNxNCore<short, 8>(short const*, int, short*, int)
284
285
template<typename T>
286
void copyClipCore( const T* src, int srcStride, Pel* dst, int dstStride, int width, int height, const ClpRng& clpRng )
287
0
{
288
0
#define RECO_OP( ADDR ) dst[ADDR] = ClipPel( src[ADDR], clpRng )
289
0
#define RECO_INC      \
290
0
    src += srcStride; \
291
0
    dst += dstStride; \
292
0
293
0
  SIZE_AWARE_PER_EL_OP( RECO_OP, RECO_INC );
294
295
0
#undef RECO_OP
296
0
#undef RECO_INC
297
0
}
298
299
void copyBufferCore( const char* src, int srcStride, char* dst, int dstStride, int numBytes, int height)
300
0
{
301
0
  for( int i = 0; i < height; i++, src += srcStride, dst += dstStride )
302
0
  {
303
0
    memcpy( dst, src, numBytes );
304
0
  }
305
0
}
306
307
void applyLutCore( const Pel* src, const ptrdiff_t srcStride, Pel* dst, const ptrdiff_t dstStride, int width, int height, const Pel* lut )
308
0
{
309
0
#define RSP_SGNL_OP( ADDR ) dst[ADDR] = lut[src[ADDR]]
310
0
#define RSP_SGNL_INC        src      += srcStride; dst += dstStride;
311
312
0
  SIZE_AWARE_PER_EL_OP( RSP_SGNL_OP, RSP_SGNL_INC )
313
314
0
#undef RSP_SGNL_OP
315
0
#undef RSP_SGNL_INC
316
0
}
317
318
void fillMapPtr_Core( void** ptrMap, const ptrdiff_t mapStride, int width, int height, void* val )
319
0
{
320
0
  if( width == mapStride )
321
0
  {
322
0
    std::fill_n( ptrMap, width * height, val );
323
0
  }
324
0
  else
325
0
  {
326
0
    while( height-- )
327
0
    {
328
0
      std::fill_n( ptrMap, width, val );
329
0
      ptrMap += mapStride;
330
0
    }
331
0
  }
332
0
}
333
334
uint64_t AvgHighPassCore( const int width, const int height, const Pel* pSrc, const int iSrcStride)
335
0
{
336
0
  uint64_t saAct = 0;
337
0
  for (int y = 1; y < height - 1; y++)
338
0
  {
339
0
    for (int x = 1; x < width - 1; x++) // center cols
340
0
    {
341
0
      const int s = 12 * (int) pSrc[x  ] - 2 * ((int) pSrc[x-1] + (int) pSrc[x+1] + (int) pSrc[x  -iSrcStride] + (int) pSrc[x  +iSrcStride])
342
0
                             - ((int) pSrc[x-1-iSrcStride] + (int) pSrc[x+1-iSrcStride] + (int) pSrc[x-1+iSrcStride] + (int) pSrc[x+1+iSrcStride]);
343
0
      saAct += abs (s);
344
0
    }
345
0
    pSrc += iSrcStride;
346
0
  }
347
0
  return saAct;
348
0
}
349
350
uint64_t HDHighPassCore  (const int width, const int height,const Pel*  pSrc,const Pel* pSM1,const int iSrcStride,const int iSM1Stride)
351
0
{
352
0
  uint64_t taAct = 0;
353
0
  for (int y = 1; y < height - 1; y++)
354
0
  {
355
0
    for (int x = 1; x < width - 1; x++)  // cnt cols
356
0
    {
357
0
      const int t = (int) pSrc[x] - (int) pSM1[x];
358
0
      taAct += (1 + 3 * abs (t)) >> 1;
359
0
    }
360
0
    pSrc += iSrcStride;
361
0
    pSM1 += iSM1Stride;
362
0
  }
363
0
  return taAct;
364
0
}
365
366
uint64_t  HDHighPass2Core  (const int width, const int height,const Pel*  pSrc,const Pel* pSM1,const Pel* pSM2,const int iSrcStride,const int iSM1Stride,const int iSM2Stride)
367
0
{
368
0
  uint64_t taAct = 0;
369
0
  for (int y = 1; y < height - 1; y++)
370
0
  {
371
0
    for (int x = 1; x < width - 1; x++)  // cnt cols
372
0
    {
373
0
      const int t = (int) pSrc[x] - 2 * (int) pSM1[x] + (int) pSM2[x];
374
0
      taAct += abs (t);
375
0
    }
376
0
    pSrc += iSrcStride;
377
0
    pSM1 += iSM1Stride;
378
0
    pSM2 += iSM2Stride;
379
0
  }
380
0
  return taAct;
381
0
}
382
uint64_t AvgHighPassWithDownsamplingCore( const int width, const int height, const Pel* pSrc, const int iSrcStride)
383
0
{
384
0
  uint64_t saAct = 0;
385
0
  pSrc -= iSrcStride;
386
0
  pSrc -= iSrcStride;
387
0
 for (int y = 2; y < height - 2; y += 2)
388
0
 {
389
0
   for (int x = 2; x < width - 2; x += 2)
390
0
   {
391
0
     const int f = 12 * ((int)pSrc[ y   *iSrcStride + x  ] + (int)pSrc[ y   *iSrcStride + x+1] + (int)pSrc[(y+1)*iSrcStride + x  ] + (int)pSrc[(y+1)*iSrcStride + x+1])
392
0
                  - 3 * ((int)pSrc[(y-1)*iSrcStride + x  ] + (int)pSrc[(y-1)*iSrcStride + x+1] + (int)pSrc[(y+2)*iSrcStride + x  ] + (int)pSrc[(y+2)*iSrcStride + x+1])
393
0
                  - 3 * ((int)pSrc[ y   *iSrcStride + x-1] + (int)pSrc[ y   *iSrcStride + x+2] + (int)pSrc[(y+1)*iSrcStride + x-1] + (int)pSrc[(y+1)*iSrcStride + x+2])
394
0
                  - 2 * ((int)pSrc[(y-1)*iSrcStride + x-1] + (int)pSrc[(y-1)*iSrcStride + x+2] + (int)pSrc[(y+2)*iSrcStride + x-1] + (int)pSrc[(y+2)*iSrcStride + x+2])
395
0
                      - ((int)pSrc[(y-2)*iSrcStride + x-1] + (int)pSrc[(y-2)*iSrcStride + x  ] + (int)pSrc[(y-2)*iSrcStride + x+1] + (int)pSrc[(y-2)*iSrcStride + x+2]
396
0
                       + (int)pSrc[(y+3)*iSrcStride + x-1] + (int)pSrc[(y+3)*iSrcStride + x  ] + (int)pSrc[(y+3)*iSrcStride + x+1] + (int)pSrc[(y+3)*iSrcStride + x+2]
397
0
                       + (int)pSrc[(y-1)*iSrcStride + x-2] + (int)pSrc[ y   *iSrcStride + x-2] + (int)pSrc[(y+1)*iSrcStride + x-2] + (int)pSrc[(y+2)*iSrcStride + x-2]
398
0
                       + (int)pSrc[(y-1)*iSrcStride + x+3] + (int)pSrc[ y   *iSrcStride + x+3] + (int)pSrc[(y+1)*iSrcStride + x+3] + (int)pSrc[(y+2)*iSrcStride + x+3]);
399
0
     saAct += (uint64_t) abs(f);
400
0
   }
401
0
 }
402
0
 return saAct;
403
0
}
404
uint64_t AvgHighPassWithDownsamplingDiff1stCore (const int width, const int  height, const Pel* pSrc,const Pel* pSrcM1, const int iSrcStride, const int iSrcM1Stride)
405
0
{
406
0
  uint64_t taAct = 0;
407
0
  pSrc -= iSrcStride;
408
0
  pSrc -= iSrcStride;
409
0
  pSrcM1-=iSrcM1Stride;
410
0
  pSrcM1-=iSrcM1Stride;
411
412
0
  for (uint32_t y = 2; y < height-2; y += 2)
413
0
  {
414
0
    for (uint32_t x = 2; x < width-2; x += 2)
415
0
    {
416
0
      const int t = (int)pSrc  [y*iSrcStride + x] + (int)pSrc  [y*iSrcStride + x+1] + (int)pSrc  [(y+1)*iSrcStride + x] + (int)pSrc  [(y+1)*iSrcStride + x+1]
417
0
                 - ((int)pSrcM1[y*iSrcM1Stride + x] + (int)pSrcM1[y*iSrcM1Stride + x+1] + (int)pSrcM1[(y+1)*iSrcM1Stride + x] + (int)pSrcM1[(y+1)*iSrcM1Stride + x+1]);
418
0
      taAct += (1 + 3 * abs (t)) >> 1;
419
0
    }
420
0
  }
421
0
  return (taAct );
422
0
}
423
424
uint64_t AvgHighPassWithDownsamplingDiff2ndCore (const int width,const int height,const Pel* pSrc,const Pel* pSrcM1,const Pel* pSrcM2,const int iSrcStride,const int iSM1Stride,const int iSM2Stride)
425
0
{
426
0
  uint64_t taAct = 0;
427
428
0
  pSrc -= iSrcStride;
429
0
  pSrc -= iSrcStride;
430
0
  pSrcM1-=iSM1Stride;
431
0
  pSrcM1-=iSM1Stride;
432
0
  pSrcM2-=iSM2Stride;
433
0
  pSrcM2-=iSM2Stride;
434
435
0
  for (uint32_t y = 2; y < height-2; y += 2)
436
0
  {
437
0
    for (uint32_t x = 2; x < width-2; x += 2)
438
0
    {
439
0
      const int t = (int)pSrc  [y*iSrcStride + x] + (int)pSrc  [y*iSrcStride + x+1] + (int)pSrc  [(y+1)*iSrcStride + x] + (int)pSrc  [(y+1)*iSrcStride + x+1]
440
0
                            - 2 * ((int)pSrcM1[y*iSM1Stride + x] + (int)pSrcM1[y*iSM1Stride + x+1] + (int)pSrcM1[(y+1)*iSM1Stride + x] + (int)pSrcM1[(y+1)*iSM1Stride + x+1])
441
0
                            + (int)pSrcM2[y*iSM2Stride + x] + (int)pSrcM2[y*iSM2Stride + x+1] + (int)pSrcM2[(y+1)*iSM2Stride + x] + (int)pSrcM2[(y+1)*iSM2Stride + x+1];
442
0
      taAct += (uint64_t) abs(t);
443
0
    }
444
0
  }
445
0
  return (taAct);
446
0
}
447
448
PelBufferOps::PelBufferOps()
449
256
{
450
256
  isInitX86Done = false;
451
452
256
  addAvg            = addAvgCore<Pel>;
453
256
  reco              = recoCore<Pel>;
454
256
  copyClip          = copyClipCore<Pel>;
455
256
  roundGeo          = roundGeoCore<Pel>;
456
457
256
  addAvg4           = addAvgCore<Pel>;
458
256
  addAvg8           = addAvgCore<Pel>;
459
256
  addAvg16          = addAvgCore<Pel>;
460
461
256
  sub4              = subsCore<Pel>;
462
256
  sub8              = subsCore<Pel>;
463
464
256
  wghtAvg4          = addWeightedAvgCore<Pel>;
465
256
  wghtAvg8          = addWeightedAvgCore<Pel>;
466
467
256
  copyClip4         = copyClipCore<Pel>;
468
256
  copyClip8         = copyClipCore<Pel>;
469
470
256
  reco4             = reconstructCore<Pel>;
471
256
  reco8             = reconstructCore<Pel>;
472
473
256
  linTf4            = linTfCore<Pel>;
474
256
  linTf8            = linTfCore<Pel>;
475
476
256
  copyBuffer        = copyBufferCore;
477
478
256
  removeHighFreq8   = removeHighFreq;
479
256
  removeHighFreq4   = removeHighFreq;
480
481
256
  transpose4x4      = transposeNxNCore<Pel,4>;
482
256
  transpose8x8      = transposeNxNCore<Pel,8>;
483
256
  mipMatrixMul_4_4  = mipMatrixMulCore<4,4>;
484
256
  mipMatrixMul_8_4  = mipMatrixMulCore<8,4>;
485
256
  mipMatrixMul_8_8  = mipMatrixMulCore<8,8>;
486
256
  weightCiip        = weightCiipCore;
487
256
  roundIntVector    = nullptr;
488
489
256
  applyLut          = applyLutCore;
490
491
256
  fillPtrMap        = fillMapPtr_Core;
492
256
  AvgHighPassWithDownsampling = AvgHighPassWithDownsamplingCore;
493
256
  AvgHighPass = AvgHighPassCore;
494
256
  AvgHighPassWithDownsamplingDiff1st = AvgHighPassWithDownsamplingDiff1stCore;
495
256
  AvgHighPassWithDownsamplingDiff2nd = AvgHighPassWithDownsamplingDiff2ndCore;
496
256
  HDHighPass = HDHighPassCore;
497
256
  HDHighPass2 = HDHighPass2Core;
498
256
}
499
500
PelBufferOps g_pelBufOP = PelBufferOps();
501
502
template<>
503
void AreaBuf<Pel>::addWeightedAvg(const AreaBuf<const Pel>& other1, const AreaBuf<const Pel>& other2, const ClpRng& clpRng, const int8_t BcwIdx)
504
0
{
505
0
  const int8_t w0 = getBcwWeight( BcwIdx, REF_PIC_LIST_0 );
506
0
  const int8_t w1 = getBcwWeight( BcwIdx, REF_PIC_LIST_1 );
507
0
  const int8_t log2WeightBase = g_BcwLog2WeightBase;
508
0
  const Pel* src0 = other1.buf;
509
0
  const Pel* src2 = other2.buf;
510
0
        Pel* dest =        buf;
511
512
0
  const int src1Stride = other1.stride;
513
0
  const int src2Stride = other2.stride;
514
0
  const int destStride =        stride;
515
0
  const int clipbd     = clpRng.bd;
516
0
  const int shiftNum   = std::max<int>( 2, ( IF_INTERNAL_PREC - clipbd ) ) + log2WeightBase;
517
0
  const int offset     = ( 1 << ( shiftNum - 1 ) ) + ( IF_INTERNAL_OFFS << log2WeightBase );
518
519
0
  if( ( width & 7 ) == 0 )
520
0
  {
521
0
    g_pelBufOP.wghtAvg8( src0, src1Stride, src2, src2Stride, dest, destStride, width, height, shiftNum, offset, w0, w1, clpRng );
522
0
  }
523
0
  else if( ( width & 3 ) == 0 )
524
0
  {
525
0
    g_pelBufOP.wghtAvg4( src0, src1Stride, src2, src2Stride, dest, destStride, width, height, shiftNum, offset, w0, w1, clpRng );
526
0
  }
527
0
  else
528
0
  {
529
0
#define WGHT_AVG_OP( ADDR ) dest[ADDR] = ClipPel( rightShiftU( ( src0[ADDR]*w0 + src2[ADDR]*w1 + offset ), shiftNum ), clpRng )
530
0
#define WGHT_AVG_INC    \
531
0
    src0 += src1Stride; \
532
0
    src2 += src2Stride; \
533
0
    dest += destStride; \
534
0
535
0
    SIZE_AWARE_PER_EL_OP( WGHT_AVG_OP, WGHT_AVG_INC );
536
537
0
#undef WGHT_AVG_OP
538
0
#undef WGHT_AVG_INC
539
0
  }
540
0
}
541
542
template<>
543
void AreaBuf<Pel>::rspSignal( const Pel* pLUT)
544
0
{
545
0
  g_pelBufOP.applyLut( buf, stride, buf, stride, width, height, pLUT );
546
0
}
547
548
549
template<>
550
void AreaBuf<Pel>::rspSignal( const AreaBuf<const Pel>& other, const Pel* pLUT)
551
0
{
552
0
  g_pelBufOP.applyLut( other.buf, other.stride, buf, stride, width, height, pLUT );
553
0
}
554
555
template<>
556
void AreaBuf<Pel>::scaleSignal(const int scale, const bool dir, const ClpRng& clpRng)
557
0
{
558
0
        Pel* dst = buf;
559
0
  const Pel* src = buf;
560
0
  const int maxAbsclipBD = (1<<clpRng.bd) - 1;
561
562
0
  if (dir) // forward
563
0
  {
564
0
    if (width == 1)
565
0
    {
566
0
      THROW("Blocks of width = 1 not supported");
567
0
    }
568
0
    else
569
0
    {
570
0
      for (unsigned y = 0; y < height; y++)
571
0
      {
572
0
        for (unsigned x = 0; x < width; x++)
573
0
        {
574
0
          int sign = src[x] >= 0 ? 1 : -1;
575
0
          int absval = sign * src[x];
576
0
          dst[x] = (Pel)Clip3(-maxAbsclipBD, maxAbsclipBD, sign * (((absval << CSCALE_FP_PREC) + (scale >> 1)) / scale));
577
0
        }
578
0
        dst += stride;
579
0
        src += stride;
580
0
      }
581
0
    }
582
0
  }
583
0
  else // inverse
584
0
  {
585
0
    for (unsigned y = 0; y < height; y++)
586
0
    {
587
0
      for (unsigned x = 0; x < width; x++)
588
0
      {
589
0
        int val    = Clip3<int>((-maxAbsclipBD - 1), maxAbsclipBD, (int)src[x]);
590
0
        int sign   = src[x] >= 0 ? 1 : -1;
591
0
        int absval = sign * val;
592
0
               val = sign * ((absval * scale + (1 << (CSCALE_FP_PREC - 1))) >> CSCALE_FP_PREC);
593
0
        if (sizeof(Pel) == 2) // avoid overflow when storing data
594
0
        {
595
0
          val = Clip3<int>(-32768, 32767, val);
596
0
        }
597
0
        dst[x] = (Pel)val;
598
0
      }
599
0
      dst += stride;
600
0
      src += stride;
601
0
    }
602
0
  }
603
0
}
604
605
template<>
606
void AreaBuf<Pel>::addAvg( const AreaBuf<const Pel>& other1, const AreaBuf<const Pel>& other2, const ClpRng& clpRng)
607
0
{
608
0
  const Pel* src0 = other1.buf;
609
0
  const Pel* src2 = other2.buf;
610
0
        Pel* dest =        buf;
611
612
0
  const unsigned src1Stride = other1.stride;
613
0
  const unsigned src2Stride = other2.stride;
614
0
  const unsigned destStride =        stride;
615
0
  const int      clipbd     = clpRng.bd;
616
0
  const unsigned shiftNum   = std::max<int>(2, (IF_INTERNAL_PREC - clipbd)) + 1;
617
0
  const int      offset     = (1 << (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS;
618
619
0
#if ENABLE_SIMD_OPT_BUFFER
620
0
  if( destStride == width )
621
0
  {
622
0
    g_pelBufOP.addAvg(src0, src2, dest, width * height, shiftNum, offset, clpRng);
623
0
  }
624
0
  else if ((width & 15) == 0)
625
0
  {
626
0
    g_pelBufOP.addAvg16(src0, src1Stride, src2, src2Stride, dest, destStride, width, height, shiftNum, offset, clpRng);
627
0
  }
628
0
  else if( ( width & 7 ) == 0 )
629
0
  {
630
0
    g_pelBufOP.addAvg8( src0, src1Stride, src2, src2Stride, dest, destStride, width, height, shiftNum, offset, clpRng );
631
0
  }
632
0
  else if( ( width & 3 ) == 0 )
633
0
  {
634
0
    g_pelBufOP.addAvg4( src0, src1Stride, src2, src2Stride, dest, destStride, width, height, shiftNum, offset, clpRng );
635
0
  }
636
0
  else
637
0
#endif
638
0
  {
639
0
#define ADD_AVG_OP( ADDR ) dest[ADDR] = ClipPel( rightShiftU( ( src0[ADDR] + src2[ADDR] + offset ), shiftNum ), clpRng )
640
0
#define ADD_AVG_INC     \
641
0
    src0 += src1Stride; \
642
0
    src2 += src2Stride; \
643
0
    dest += destStride; \
644
0
645
0
    SIZE_AWARE_PER_EL_OP( ADD_AVG_OP, ADD_AVG_INC );
646
647
0
#undef ADD_AVG_OP
648
0
#undef ADD_AVG_INC
649
0
  }
650
0
}
651
652
template<>
653
void AreaBuf<Pel>::subtract( const AreaBuf<const Pel>& minuend, const AreaBuf<const Pel>& subtrahend )
654
0
{
655
0
  CHECKD( width  != minuend.width,     "Incompatible size" );
656
0
  CHECKD( height != minuend.height,    "Incompatible size" );
657
0
  CHECKD( width  != subtrahend.width,  "Incompatible size");
658
0
  CHECKD( height != subtrahend.height, "Incompatible size");
659
  
660
0
        Pel* dest =            buf;
661
0
  const Pel* mins = minuend   .buf;
662
0
  const Pel* subs = subtrahend.buf;
663
664
665
0
#if ENABLE_SIMD_OPT_BUFFER
666
0
  const unsigned destStride =            stride;
667
0
  const unsigned minsStride = minuend.   stride;
668
0
  const unsigned subsStride = subtrahend.stride;
669
670
0
  if( ( width & 7 ) == 0 )
671
0
  {
672
0
    g_pelBufOP.sub8( mins, minsStride, subs, subsStride, dest, destStride, width, height );
673
0
  }
674
0
  else if( ( width & 3 ) == 0 )
675
0
  {
676
0
    g_pelBufOP.sub4( mins, minsStride, subs, subsStride, dest, destStride, width, height );
677
0
  }
678
0
  else
679
0
#endif
680
0
  {
681
0
#define SUBS_INC                \
682
0
    dest +=            stride;  \
683
0
    mins += minuend   .stride;  \
684
0
    subs += subtrahend.stride;  \
685
0
686
0
#define SUBS_OP( ADDR ) dest[ADDR] = mins[ADDR] - subs[ADDR]
687
688
0
    SIZE_AWARE_PER_EL_OP( SUBS_OP, SUBS_INC );
689
690
0
#undef SUBS_OP
691
0
#undef SUBS_INC
692
0
  }
693
0
}
694
695
template<>
696
void AreaBuf<const Pel>::calcVarianceSplit( const AreaBuf<const Pel>& Org, const uint32_t  size, int& varh,int& varv) const
697
0
{
698
0
  CHECK( Org.width != Org.height, "Incompatible size!" );
699
0
  int stride = Org.stride;
700
0
  const Pel* src;
701
0
  Pel data;
702
0
  double variance=0;
703
0
  double mean=0;
704
0
  int64_t sum[4]={0,0,0,0};
705
0
  int64_t sum_sqr[4]={0,0,0,0};
706
0
  uint32_t halfsize =size>>1;
707
0
  uint32_t off[4]={0,halfsize,size*halfsize,size*halfsize+halfsize};
708
0
  int n,x,y;
709
710
0
  for( n = 0; n < 4; n++)
711
0
  {
712
0
    src = Org.buf+off[n];
713
0
    for( y = 0; y < halfsize; y++)
714
0
    {
715
0
      for(x = 0; x < halfsize; x++)
716
0
      {
717
0
        data=src[y*stride+x];
718
0
        sum[n]+=data;
719
0
        sum_sqr[n]+= data*data;
720
0
      }
721
0
    }
722
0
  }
723
0
  int num=size*(size>>1);
724
  // varhu
725
0
  mean=(double)(sum[0]+sum[1])/(num);
726
0
  variance =  (double)(sum_sqr[0]+sum_sqr[1])/(num) - (mean*mean);
727
0
  varh =(int)(variance+0.5);
728
  // varhl
729
0
  mean=(double)(sum[2]+sum[3])/(num);
730
0
  variance =  (double)(sum_sqr[2]+sum_sqr[3])/(num) - (mean*mean);
731
0
  varh +=(int)(variance+0.5);
732
  // varvl
733
0
  mean=(double)(sum[0]+sum[2])/(num);
734
0
  variance =  (double)(sum_sqr[0]+sum_sqr[2])/(num) - (mean*mean);
735
0
  varv =(int)(variance+0.5);
736
  // varvr
737
0
  mean=(double)(sum[1]+sum[3])/(num);
738
0
  variance =  (double)(sum_sqr[1]+sum_sqr[3])/(num) - (mean*mean);
739
0
  varv +=(int)(variance+0.5);
740
0
}
741
742
template<>
743
void AreaBuf<Pel>::copyClip( const AreaBuf<const Pel>& src, const ClpRng& clpRng )
744
0
{
745
0
  const Pel* srcp = src.buf;
746
0
        Pel* dest =     buf;
747
748
0
  const unsigned srcStride  = src.stride;
749
0
  const unsigned destStride = stride;
750
751
0
  if( destStride == width)
752
0
  {
753
0
    g_pelBufOP.copyClip(srcp, dest, width * height, clpRng);
754
0
  }
755
0
  else if ((width & 7) == 0)
756
0
  {
757
0
    g_pelBufOP.copyClip8(srcp, srcStride, dest, destStride, width, height, clpRng);
758
0
  }
759
0
  else if ((width & 3) == 0)
760
0
  {
761
0
    g_pelBufOP.copyClip4(srcp, srcStride, dest, destStride, width, height, clpRng);
762
0
  }
763
0
  else
764
0
  {
765
0
    for( int y = 0; y < height; y++ )
766
0
    {
767
0
      dest[0] = ClipPel( srcp[0], clpRng);
768
0
      dest[1] = ClipPel( srcp[1], clpRng);
769
0
      srcp += srcStride;
770
0
      dest += destStride;
771
0
    }                                                         \
772
0
  }
773
0
}
774
775
template<>
776
void AreaBuf<Pel>::reconstruct( const AreaBuf<const Pel>& pred, const AreaBuf<const Pel>& resi, const ClpRng& clpRng )
777
0
{
778
0
  const Pel* src1 = pred.buf;
779
0
  const Pel* src2 = resi.buf;
780
0
        Pel* dest =      buf;
781
782
0
  const unsigned src1Stride = pred.stride;
783
0
  const unsigned src2Stride = resi.stride;
784
0
  const unsigned destStride =      stride;
785
0
  if( src2Stride == width )
786
0
  {
787
0
    g_pelBufOP.reco( pred.buf, resi.buf, buf, width * height, clpRng );
788
0
  }
789
0
  else if( ( width & 7 ) == 0 )
790
0
  {
791
0
    g_pelBufOP.reco8( src1, src1Stride, src2, src2Stride, dest, destStride, width, height, clpRng );
792
0
  }
793
0
  else if( ( width & 3 ) == 0 )
794
0
  {
795
0
    g_pelBufOP.reco4( src1, src1Stride, src2, src2Stride, dest, destStride, width, height, clpRng );
796
0
  }
797
0
  else if( ( width & 1 ) == 0 )
798
0
  {
799
0
    for( int y = 0; y < height; y++ )
800
0
    {
801
0
      dest[0] = ClipPel( src1[0] + src2[0], clpRng);
802
0
      dest[1] = ClipPel( src1[1] + src2[1], clpRng);
803
0
      src1 += src1Stride;
804
0
      src2 += src2Stride;
805
0
      dest += destStride;
806
0
    }                        
807
0
  }
808
0
  else
809
0
  {
810
0
    CHECKD( width != 1, "Expecting width to be '1'!" );
811
812
0
    for( int y = 0; y < height; y++ )
813
0
    {
814
0
      dest[0] = ClipPel( src1[0] + src2[0], clpRng );
815
816
0
      src1 += src1Stride;
817
0
      src2 += src2Stride;
818
0
      dest += destStride;
819
0
    }
820
0
  }
821
0
}
822
823
template<>
824
void AreaBuf<Pel>::linearTransform( const int scale, const unsigned shift, const int offset, bool bClip, const ClpRng& clpRng )
825
0
{
826
0
  const Pel* src = buf;
827
0
        Pel* dst = buf;
828
829
0
  if( stride == width)
830
0
  {
831
0
    if( width > 2 && height > 2 )
832
0
    {
833
0
      g_pelBufOP.linTf8( src, stride<<2, dst, stride<<2, width<<2, height>>2, scale, shift, offset, clpRng, bClip );
834
0
    }
835
0
    else
836
0
    {
837
0
      g_pelBufOP.linTf4( src, stride<<1, dst, stride<<1, width<<1, height>>1, scale, shift, offset, clpRng, bClip );
838
0
    }
839
0
  }
840
0
  else if( ( width & 7 ) == 0 )
841
0
  {
842
0
    g_pelBufOP.linTf8( src, stride, dst, stride, width, height, scale, shift, offset, clpRng, bClip );
843
0
  }
844
0
  else if( ( width & 3 ) == 0 )
845
0
  {
846
0
    g_pelBufOP.linTf4( src, stride, dst, stride, width, height, scale, shift, offset, clpRng, bClip );
847
0
  }
848
0
  else
849
0
  {
850
0
    if( bClip )
851
0
    {
852
0
      for( int y = 0; y < height; y++ )
853
0
      {
854
0
        dst[0] = ( Pel ) ClipPel( rightShiftU( scale * src[0], shift ) + offset, clpRng );
855
0
        dst[1] = ( Pel ) ClipPel( rightShiftU( scale * src[1], shift ) + offset, clpRng );
856
0
        src += stride;
857
0
        dst += stride;
858
0
      }
859
0
    }
860
0
    else
861
0
    {
862
0
      for( int y = 0; y < height; y++ )
863
0
      {
864
0
        dst[0] = ( Pel ) ( rightShiftU( scale * src[0], shift ) + offset );
865
0
        dst[1] = ( Pel ) ( rightShiftU( scale * src[1], shift ) + offset );
866
0
        src += stride;
867
0
        dst += stride;
868
0
      }
869
0
    }
870
0
  }
871
0
}
872
873
#if ENABLE_SIMD_OPT_BUFFER
874
875
template<>
876
void AreaBuf<Pel>::transposedFrom( const AreaBuf<const Pel>& other )
877
0
{
878
0
  CHECK( width != other.height || height != other.width, "Incompatible size" );
879
880
0
  if( ( ( width | height ) & 7 ) == 0 )
881
0
  {
882
0
    const Pel* src = other.buf;
883
884
0
    for( unsigned y = 0; y < other.height; y += 8 )
885
0
    {
886
0
      Pel* dst = buf + y;
887
888
0
      for( unsigned x = 0; x < other.width; x += 8 )
889
0
      {
890
0
        g_pelBufOP.transpose8x8( &src[x], other.stride, dst, stride );
891
892
0
        dst += 8 * stride;
893
0
      }
894
895
0
      src += 8 * other.stride;
896
0
    }
897
0
  }
898
0
  else if( ( ( width | height ) & 3 ) == 0 )
899
0
  {
900
0
    const Pel* src = other.buf;
901
902
0
    for( unsigned y = 0; y < other.height; y += 4 )
903
0
    {
904
0
      Pel* dst = buf + y;
905
906
0
      for( unsigned x = 0; x < other.width; x += 4 )
907
0
      {
908
0
        g_pelBufOP.transpose4x4( &src[x], other.stride, dst, stride );
909
910
0
        dst += 4 * stride;
911
0
      }
912
913
0
      src += 4 * other.stride;
914
0
    }
915
0
  }
916
0
  else
917
0
  {
918
0
          Pel* dst =       buf;
919
0
    const Pel* src = other.buf;
920
0
    width          = other.height;
921
0
    height         = other.width;
922
0
    stride         = stride < width ? width : stride;
923
924
0
    for( unsigned y = 0; y < other.height; y++ )
925
0
    {
926
0
      for( unsigned x = 0; x < other.width; x++ )
927
0
      {
928
0
        dst[y + x*stride] = src[x + y * other.stride];
929
0
      }
930
0
    }
931
0
  }
932
0
}
933
#endif
934
935
template<>
936
void AreaBuf<Pel>::weightCiip( const AreaBuf<const Pel>& intra, const int numIntra )
937
0
{
938
0
  CHECK(width == 2, "Width of 2 is not supported");
939
0
  g_pelBufOP.weightCiip( buf, intra.buf, width * height, numIntra );
940
0
}
941
942
template<>
943
void AreaBuf<MotionInfo>::fill( const MotionInfo& val )
944
0
{
945
0
  if( width == stride )
946
0
  {
947
0
    std::fill_n( buf, width * height, val );
948
0
  }
949
0
  else
950
0
  {
951
0
    MotionInfo* dst = buf;
952
953
0
    for( int y = 0; y < height; y++, dst += stride )
954
0
    {
955
0
      std::fill_n( dst, width, val );
956
0
    }
957
0
  }
958
0
}
959
960
PelStorage::PelStorage()
961
0
{
962
0
  for( uint32_t i = 0; i < MAX_NUM_COMP; i++ )
963
0
  {
964
0
    m_origin[i] = nullptr;
965
0
  }
966
0
}
967
968
PelStorage::~PelStorage()
969
0
{
970
0
  destroy();
971
0
}
972
973
void PelStorage::create( const UnitArea& _UnitArea )
974
0
{
975
0
  create( _UnitArea.chromaFormat, _UnitArea.blocks[0] );
976
0
  m_maxArea = _UnitArea;
977
0
}
978
979
void PelStorage::create( const ChromaFormat &_chromaFormat, const Area& _area )
980
0
{
981
0
  CHECK( !bufs.empty(), "Trying to re-create an already initialized buffer" );
982
983
0
  chromaFormat = _chromaFormat;
984
985
0
  const uint32_t numComp = getNumberValidComponents( _chromaFormat );
986
987
0
  uint32_t bufSize = 0;
988
0
  for( uint32_t i = 0; i < numComp; i++ )
989
0
  {
990
0
    const ComponentID compID = ComponentID( i );
991
0
    const unsigned totalWidth  = _area.width  >> getComponentScaleX( compID, _chromaFormat );
992
0
    const unsigned totalHeight = _area.height >> getComponentScaleY( compID, _chromaFormat );
993
994
0
    const uint32_t area = totalWidth * totalHeight;
995
0
    CHECK( !area, "Trying to create a buffer with zero area" );
996
0
    bufSize += area;
997
0
  }
998
999
0
  bufSize += 1; // for SIMD DMVR on the bottom right corner, which overreads the lines by 1 sample
1000
1001
  //allocate one buffer
1002
0
  m_origin[0] = ( Pel* ) xMalloc( Pel, bufSize );
1003
1004
0
  Pel* topLeft = m_origin[0];
1005
0
  for( uint32_t i = 0; i < numComp; i++ )
1006
0
  {
1007
0
    const ComponentID compID = ComponentID( i );
1008
0
    const unsigned totalWidth  = _area.width  >> getComponentScaleX( compID, _chromaFormat );
1009
0
    const unsigned totalHeight = _area.height >> getComponentScaleY( compID, _chromaFormat );
1010
0
    const uint32_t area = totalWidth * totalHeight;
1011
1012
0
    bufs.push_back( PelBuf( topLeft, totalWidth, totalWidth, totalHeight ) );
1013
0
    topLeft += area;
1014
0
  }
1015
1016
0
  m_maxArea = UnitArea( _chromaFormat, _area );
1017
0
}
1018
1019
void PelStorage::create( const ChromaFormat &_chromaFormat, const Area& _area, const unsigned _maxCUSize, const unsigned _margin, const unsigned _alignment, const bool _scaleChromaMargin )
1020
0
{
1021
0
  CHECK( !bufs.empty(), "Trying to re-create an already initialized buffer" );
1022
1023
0
  chromaFormat = _chromaFormat;
1024
1025
0
  const uint32_t numComp = getNumberValidComponents( _chromaFormat );
1026
1027
0
  unsigned extHeight = _area.height;
1028
0
  unsigned extWidth  = _area.width;
1029
1030
0
  if( _maxCUSize )
1031
0
  {
1032
0
    extHeight = ( ( _area.height + _maxCUSize - 1 ) / _maxCUSize ) * _maxCUSize;
1033
0
    extWidth  = ( ( _area.width  + _maxCUSize - 1 ) / _maxCUSize ) * _maxCUSize;
1034
0
  }
1035
1036
0
  for( uint32_t i = 0; i < numComp; i++ )
1037
0
  {
1038
0
    const ComponentID compID = ComponentID( i );
1039
0
    const unsigned scaleX = getComponentScaleX( compID, _chromaFormat );
1040
0
    const unsigned scaleY = getComponentScaleY( compID, _chromaFormat );
1041
1042
0
    unsigned scaledHeight = extHeight >> scaleY;
1043
0
    unsigned scaledWidth  = extWidth  >> scaleX;
1044
0
    unsigned ymargin      = _margin >> (_scaleChromaMargin?scaleY:0);
1045
0
    unsigned xmargin      = _margin >> (_scaleChromaMargin?scaleX:0);
1046
0
    unsigned totalWidth   = scaledWidth + 2*xmargin;
1047
0
    unsigned totalHeight  = scaledHeight +2*ymargin;
1048
1049
0
    if( _alignment )
1050
0
    {
1051
      // make sure buffer lines are align
1052
0
      CHECK( _alignment != MEMORY_ALIGN_DEF_SIZE, "Unsupported alignment" );
1053
0
      totalWidth = ( ( totalWidth + _alignment - 1 ) / _alignment ) * _alignment;
1054
0
    }
1055
0
    uint32_t area = totalWidth * totalHeight;
1056
0
    CHECK( !area, "Trying to create a buffer with zero area" );
1057
1058
0
    m_origin[i] = ( Pel* ) xMalloc( Pel, area );
1059
0
    Pel* topLeft = m_origin[i] + totalWidth * ymargin + xmargin;
1060
0
    bufs.push_back( PelBuf( topLeft, totalWidth, _area.width >> scaleX, _area.height >> scaleY ) );
1061
0
  }
1062
1063
0
  m_maxArea = UnitArea( _chromaFormat, _area );
1064
0
}
1065
1066
void PelStorage::createFromBuf( PelUnitBuf buf )
1067
0
{
1068
0
  chromaFormat = buf.chromaFormat;
1069
1070
0
  const uint32_t numCh = getNumberValidComponents( chromaFormat );
1071
1072
0
  bufs.resize(numCh);
1073
1074
0
  for( uint32_t i = 0; i < numCh; i++ )
1075
0
  {
1076
0
    PelBuf cPelBuf = buf.get( ComponentID( i ) );
1077
0
    bufs[i] = PelBuf( cPelBuf.bufAt( 0, 0 ), cPelBuf.stride, cPelBuf.width, cPelBuf.height );
1078
0
  }
1079
0
}
1080
1081
void PelStorage::compactResize( const UnitArea& area )
1082
0
{
1083
0
  CHECK( bufs.size() < area.blocks.size(), "Cannot increase buffer size when compacting!" );
1084
1085
0
  for( uint32_t i = 0; i < area.blocks.size(); i++ )
1086
0
  {
1087
0
    CHECK( m_maxArea.blocks[i].area() < area.blocks[i].area(), "Cannot increase buffer size when compacting!" );
1088
1089
0
    bufs[i].Size::operator=( area.blocks[i].size() );
1090
0
    bufs[i].stride = bufs[i].width;
1091
0
  }
1092
0
}
1093
1094
void PelStorage::takeOwnership( PelStorage& other )
1095
0
{
1096
0
  chromaFormat = other.chromaFormat;
1097
1098
0
  const uint32_t numCh = getNumberValidComponents( chromaFormat );
1099
1100
0
  bufs.resize(numCh);
1101
1102
0
  for( uint32_t i = 0; i < numCh; i++ )
1103
0
  {
1104
0
    PelBuf cPelBuf = other.get( ComponentID( i ) );
1105
0
    bufs[i] = PelBuf( cPelBuf.bufAt( 0, 0 ), cPelBuf.stride, cPelBuf.width, cPelBuf.height );
1106
0
    std::swap( m_origin[i], other.m_origin[i]);
1107
0
  }
1108
1109
0
  m_maxArea = other.m_maxArea;
1110
1111
0
  other.destroy();
1112
0
}
1113
1114
1115
void PelStorage::swap( PelStorage& other )
1116
0
{
1117
0
  const uint32_t numCh = getNumberValidComponents( chromaFormat );
1118
1119
0
  for( uint32_t i = 0; i < numCh; i++ )
1120
0
  {
1121
    // check this otherwise it would turn out to get very weird
1122
0
    CHECK( chromaFormat                   != other.chromaFormat                  , "Incompatible formats" );
1123
0
    CHECK( get( ComponentID( i ) )        != other.get( ComponentID( i ) )       , "Incompatible formats" );
1124
0
    CHECK( get( ComponentID( i ) ).stride != other.get( ComponentID( i ) ).stride, "Incompatible formats" );
1125
1126
0
    std::swap( bufs[i].buf,    other.bufs[i].buf );
1127
0
    std::swap( bufs[i].stride, other.bufs[i].stride );
1128
0
    std::swap( m_origin[i],    other.m_origin[i] );
1129
0
  }
1130
0
}
1131
1132
void PelStorage::destroy()
1133
0
{
1134
0
  chromaFormat = NUM_CHROMA_FORMAT;
1135
0
  for( uint32_t i = 0; i < MAX_NUM_COMP; i++ )
1136
0
  {
1137
0
    if( m_origin[i] )
1138
0
    {
1139
0
      xFree( m_origin[i] );
1140
0
      m_origin[i] = nullptr;
1141
0
    }
1142
0
  }
1143
0
  bufs.clear();
1144
0
}
1145
1146
PelBuf PelStorage::getBuf( const ComponentID CompID )
1147
0
{
1148
0
  return bufs[CompID];
1149
0
}
1150
1151
const CPelBuf PelStorage::getBuf( const ComponentID CompID ) const
1152
0
{
1153
0
  return bufs[CompID];
1154
0
}
1155
1156
PelBuf PelStorage::getBuf( const CompArea& blk )
1157
0
{
1158
0
  const PelBuf& r = bufs[blk.compID];
1159
0
  return PelBuf( r.buf + rsAddr( blk, r.stride ), r.stride, blk );
1160
0
}
1161
1162
const CPelBuf PelStorage::getBuf( const CompArea& blk ) const
1163
0
{
1164
0
  const PelBuf& r = bufs[blk.compID];
1165
0
  return CPelBuf( r.buf + rsAddr( blk, r.stride ), r.stride, blk );
1166
0
}
1167
1168
PelUnitBuf PelStorage::getBuf( const UnitArea& unit )
1169
0
{
1170
0
  return ( chromaFormat == CHROMA_400 ) ? PelUnitBuf( chromaFormat, getBuf( unit.Y() ) ) : PelUnitBuf( chromaFormat, getBuf( unit.Y() ), getBuf( unit.Cb() ), getBuf( unit.Cr() ) );
1171
0
}
1172
1173
const CPelUnitBuf PelStorage::getBuf( const UnitArea& unit ) const
1174
0
{
1175
0
  return ( chromaFormat == CHROMA_400 ) ? CPelUnitBuf( chromaFormat, getBuf( unit.Y() ) ) : CPelUnitBuf( chromaFormat, getBuf( unit.Y() ), getBuf( unit.Cb() ), getBuf( unit.Cr() ) );
1176
0
}
1177
1178
PelUnitBuf PelStorage::getBuf(const int strY, const int strCb, const int strCr, const UnitArea& unit)
1179
0
{
1180
0
  CHECKD( unit.Y().width > bufs[COMP_Y].width && unit.Y().height > bufs[COMP_Y].height, "unsuported request" );
1181
0
  CHECKD( strY > bufs[COMP_Y].stride, "unsuported request" );
1182
0
  CHECKD( strCb > bufs[COMP_Cb].stride, "unsuported request" );
1183
0
  CHECKD( strCr > bufs[COMP_Cr].stride, "unsuported request" );
1184
0
  return (chromaFormat == CHROMA_400) ? PelUnitBuf(chromaFormat, PelBuf( bufs[COMP_Y].buf, strY, unit.Y())) : PelUnitBuf(chromaFormat, PelBuf( bufs[COMP_Y].buf, strY, unit.Y()), PelBuf( bufs[COMP_Cb].buf, strCb, unit.Cb()), PelBuf( bufs[COMP_Cr].buf, strCr, unit.Cr()));
1185
0
}
1186
1187
const CPelUnitBuf PelStorage::getBuf(const int strY, const int strCb, const int strCr, const UnitArea& unit) const
1188
0
{
1189
0
  CHECKD( unit.Y().width > bufs[COMP_Y].width && unit.Y().height > bufs[COMP_Y].height, "unsuported request" );
1190
0
  CHECKD( strY > bufs[COMP_Y].stride, "unsuported request" );
1191
0
  CHECKD( strCb > bufs[COMP_Cb].stride, "unsuported request" );
1192
0
  CHECKD( strCr > bufs[COMP_Cr].stride, "unsuported request" );
1193
0
  return (chromaFormat == CHROMA_400) ? CPelUnitBuf(chromaFormat, CPelBuf( bufs[COMP_Y].buf, strY, unit.Y())) : CPelUnitBuf(chromaFormat, CPelBuf( bufs[COMP_Y].buf, strY, unit.Y()), CPelBuf( bufs[COMP_Cb].buf, strCb, unit.Cb()), CPelBuf( bufs[COMP_Cr].buf, strCr, unit.Cr()));
1194
0
}
1195
1196
PelUnitBuf PelStorage::getBufPart(const UnitArea& unit)
1197
0
{
1198
0
  CHECKD( unit.Y().width > bufs[COMP_Y].width && unit.Y().height > bufs[COMP_Y].height, "unsuported request" );
1199
0
  return (chromaFormat == CHROMA_400) ? PelUnitBuf(chromaFormat, PelBuf( bufs[COMP_Y].buf, bufs[COMP_Y].stride, unit.Y())) : PelUnitBuf(chromaFormat, PelBuf( bufs[COMP_Y].buf, bufs[COMP_Y].stride, unit.Y()), PelBuf( bufs[COMP_Cb].buf, bufs[COMP_Cb].stride, unit.Cb()), PelBuf( bufs[COMP_Cr].buf, bufs[COMP_Cr].stride, unit.Cr()));
1200
0
}
1201
1202
const CPelUnitBuf PelStorage::getBufPart(const UnitArea& unit) const
1203
0
{
1204
0
  CHECKD(unit.Y().width > bufs[COMP_Y].width && unit.Y().height > bufs[COMP_Y].height, "unsuported request");
1205
0
  return (chromaFormat == CHROMA_400) ? CPelUnitBuf(chromaFormat, CPelBuf(bufs[COMP_Y].buf, unit.Y().width, unit.Y())) : CPelUnitBuf(chromaFormat, CPelBuf(bufs[COMP_Y].buf, unit.Y().width, unit.Y()), CPelBuf(bufs[COMP_Cb].buf, unit.Cb().width, unit.Cb()), CPelBuf(bufs[COMP_Cr].buf, unit.Cr().width, unit.Cr()));
1206
0
}
1207
1208
const CPelUnitBuf PelStorage::getCompactBuf(const UnitArea& unit) const
1209
0
{
1210
0
  CHECKD( unit.Y().width > bufs[COMP_Y].width && unit.Y().height > bufs[COMP_Y].height, "unsuported request" );
1211
1212
0
  PelUnitBuf ret;
1213
0
  ret.chromaFormat = chromaFormat;
1214
0
  ret.bufs.resize_noinit( chromaFormat == CHROMA_400 ? 1 : 3 );
1215
  
1216
0
  ret.Y   ().buf = bufs[COMP_Y ].buf; ret.Y ().width = ret.Y ().stride = unit.Y ().width; ret.Y ().height = unit.Y ().height;
1217
0
  if( chromaFormat != CHROMA_400 )
1218
0
  {
1219
0
    ret.Cb().buf = bufs[COMP_Cb].buf; ret.Cb().width = ret.Cb().stride = unit.Cb().width; ret.Cb().height = unit.Cb().height;
1220
0
    ret.Cr().buf = bufs[COMP_Cr].buf; ret.Cr().width = ret.Cr().stride = unit.Cr().width; ret.Cr().height = unit.Cr().height;
1221
0
  }
1222
1223
0
  return ret;
1224
0
}
1225
1226
PelUnitBuf PelStorage::getCompactBuf(const UnitArea& unit)
1227
0
{
1228
0
  CHECKD( unit.Y().width > bufs[COMP_Y].width && unit.Y().height > bufs[COMP_Y].height, "unsuported request" );
1229
1230
0
  PelUnitBuf ret;
1231
0
  ret.chromaFormat = chromaFormat;
1232
0
  ret.bufs.resize_noinit( chromaFormat == CHROMA_400 ? 1 : 3 );
1233
1234
0
  ret.Y   ().buf = bufs[COMP_Y ].buf; ret.Y ().width = ret.Y ().stride = unit.Y ().width; ret.Y ().height = unit.Y ().height;
1235
0
  if( chromaFormat != CHROMA_400 )
1236
0
  {
1237
0
    ret.Cb().buf = bufs[COMP_Cb].buf; ret.Cb().width = ret.Cb().stride = unit.Cb().width; ret.Cb().height = unit.Cb().height;
1238
0
    ret.Cr().buf = bufs[COMP_Cr].buf; ret.Cr().width = ret.Cr().stride = unit.Cr().width; ret.Cr().height = unit.Cr().height;
1239
0
  }
1240
1241
0
  return ret;
1242
0
}
1243
1244
const CPelBuf PelStorage::getCompactBuf(const CompArea& carea) const
1245
0
{
1246
0
  return CPelBuf( bufs[carea.compID].buf, carea.width, carea);
1247
0
}
1248
1249
PelBuf PelStorage::getCompactBuf(const CompArea& carea)
1250
0
{
1251
0
  return PelBuf( bufs[carea.compID].buf, carea.width, carea);
1252
0
}
1253
1254
void downsampleYuv(PelBuf& dest, const vvencYUVPlane& yuvPlaneIn, int downsampleStep)
1255
0
{
1256
0
  const int widthd = dest.width;
1257
0
  const int heightd = dest.height;
1258
0
  int difStride = dest.stride - dest.width;
1259
1260
0
  const int16_t* src = yuvPlaneIn.ptr;
1261
0
  const int instride = yuvPlaneIn.stride;
1262
0
  const int width = yuvPlaneIn.width;
1263
0
  int n = 0;
1264
0
  for (int j = 0; j < heightd; j++)
1265
0
  {
1266
0
    int i = 0;
1267
0
    for (i = 0; i < widthd; i++)
1268
0
    {
1269
0
      long int b = 0;
1270
0
      for (int r = 0; r < downsampleStep; r++)
1271
0
      {
1272
0
        int posr = width * r;
1273
0
        for (int n = 0; n < downsampleStep; n++)
1274
0
        {
1275
0
          b += src[posr + n];
1276
0
        }
1277
0
      }
1278
0
      src += downsampleStep;
1279
0
      dest.buf[n] = (int16_t)((b + 2) / (downsampleStep << 1));
1280
0
      n++;
1281
0
    }
1282
0
    n += difStride;
1283
0
    src = src - downsampleStep * i + width;
1284
1285
0
    src += (instride * (downsampleStep - 1));
1286
0
  }
1287
0
}
1288
1289
void copyPadToPelUnitBuf( PelUnitBuf pelUnitBuf, const vvencYUVBuffer& yuvBuffer, const ChromaFormat& chFmt )
1290
0
{
1291
0
  CHECK( pelUnitBuf.bufs.size() == 0, "pelUnitBuf not initialized" );
1292
0
  pelUnitBuf.chromaFormat = chFmt;
1293
0
  const int numComp = getNumberValidComponents( chFmt );
1294
0
  for ( int i = 0; i < numComp; i++ )
1295
0
  {
1296
0
    const vvencYUVPlane& src = yuvBuffer.planes[ i ];
1297
0
    CHECK( src.ptr == nullptr, "yuvBuffer not setup" );
1298
0
    PelBuf& dest = pelUnitBuf.bufs[i];
1299
0
    CHECK( dest.buf == nullptr, "yuvBuffer not setup" );
1300
1301
0
    if (dest.width < src.width)
1302
0
    {
1303
0
      downsampleYuv(dest, src, 2);
1304
0
    }
1305
0
    else
1306
0
    {
1307
0
      for (int y = 0; y < src.height; y++)
1308
0
      {
1309
0
        ::memcpy(dest.buf + y * dest.stride, src.ptr + y * src.stride, src.width * sizeof(int16_t));
1310
1311
        // pad right if required
1312
0
        for (int x = src.width; x < dest.width; x++)
1313
0
        {
1314
0
          dest.buf[x + y * dest.stride] = dest.buf[src.width - 1 + y * dest.stride];
1315
0
        }
1316
0
      }
1317
1318
      // pad bottom if required
1319
0
      for (int y = src.height; y < dest.height; y++)
1320
0
      {
1321
0
        ::memcpy(dest.buf + y * dest.stride, dest.buf + (src.height - 1) * dest.stride, dest.width * sizeof(int16_t));
1322
0
      }
1323
0
    }
1324
0
  }
1325
0
}
1326
1327
/*
1328
void setupPelUnitBuf( const YUVBuffer& yuvBuffer, PelUnitBuf& pelUnitBuf, const ChromaFormat& chFmt )
1329
{
1330
  CHECK( pelUnitBuf.bufs.size() != 0, "pelUnitBuf already in use" );
1331
  pelUnitBuf.chromaFormat = chFmt;
1332
  const int numComp = getNumberValidComponents( chFmt );
1333
  for ( int i = 0; i < numComp; i++ )
1334
  {
1335
    const YUVBuffer::Plane& yuvPlane = yuvBuffer.planes[ i ];
1336
    CHECK( yuvPlane.ptr == nullptr, "yuvBuffer not setup" );
1337
    PelBuf area( yuvPlane.ptr, yuvPlane.stride, yuvPlane.width, yuvPlane.height );
1338
    pelUnitBuf.bufs.push_back( area );
1339
  }
1340
}
1341
*/
1342
void setupYuvBuffer ( const PelUnitBuf& pelUnitBuf, vvencYUVBuffer& yuvBuffer, const Window* confWindow )
1343
0
{
1344
0
  const ChromaFormat chFmt = pelUnitBuf.chromaFormat;
1345
0
  const int numComp        = getNumberValidComponents( chFmt );
1346
0
  for ( int i = 0; i < numComp; i++ )
1347
0
  {
1348
0
    const ComponentID compId = ComponentID( i );
1349
0
          PelBuf area        = pelUnitBuf.get( compId );
1350
0
    const int sx             = getComponentScaleX( compId, chFmt );
1351
0
    const int sy             = getComponentScaleY( compId, chFmt );
1352
0
    vvencYUVPlane& yuvPlane = yuvBuffer.planes[ i ];
1353
0
    CHECK( yuvPlane.ptr != nullptr, "yuvBuffer already in use" );
1354
0
    yuvPlane.ptr             = area.bufAt( confWindow->winLeftOffset >> sx, confWindow->winTopOffset >> sy );
1355
0
    yuvPlane.width           = ( ( area.width  << sx ) - ( confWindow->winLeftOffset + confWindow->winRightOffset  ) ) >> sx;
1356
0
    yuvPlane.height          = ( ( area.height << sy ) - ( confWindow->winTopOffset  + confWindow->winBottomOffset ) ) >> sy;
1357
0
    yuvPlane.stride          = area.stride;
1358
0
  }
1359
0
}
1360
1361
} // namespace vvenc
1362
1363
//! \}
1364