Coverage Report

Created: 2026-06-10 07:00

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/vvenc/source/Lib/CommonLib/Buffer.cpp
Line
Count
Source
1
/* -----------------------------------------------------------------------------
2
The copyright in this software is being made available under the Clear BSD
3
License, included below. No patent rights, trademark rights and/or 
4
other Intellectual Property Rights other than the copyrights concerning 
5
the Software are granted under this license.
6
7
The Clear BSD License
8
9
Copyright (c) 2019-2026, Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. & The VVenC Authors.
10
All rights reserved.
11
12
Redistribution and use in source and binary forms, with or without modification,
13
are permitted (subject to the limitations in the disclaimer below) provided that
14
the following conditions are met:
15
16
     * Redistributions of source code must retain the above copyright notice,
17
     this list of conditions and the following disclaimer.
18
19
     * Redistributions in binary form must reproduce the above copyright
20
     notice, this list of conditions and the following disclaimer in the
21
     documentation and/or other materials provided with the distribution.
22
23
     * Neither the name of the copyright holder nor the names of its
24
     contributors may be used to endorse or promote products derived from this
25
     software without specific prior written permission.
26
27
NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
28
THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
29
CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
31
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
32
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
33
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
34
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
35
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
36
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38
POSSIBILITY OF SUCH DAMAGE.
39
40
41
------------------------------------------------------------------------------------------- */
42
43
44
/** \file     Buffer.cpp
45
 *  \brief    Low-overhead class describing 2D memory layout
46
 */
47
48
#define DONT_UNDEF_SIZE_AWARE_PER_EL_OP
49
50
// unit needs to come first due to a forward declaration
51
#include "Unit.h"
52
#include "Slice.h"
53
#include "InterpolationFilter.h"
54
55
//! \ingroup CommonLib
56
//! \{
57
58
namespace vvenc {
59
60
void weightCiipCore( Pel* res, const Pel* src, const int numSamples, int numIntra )
61
0
{
62
0
  if( numIntra == 1 )
63
0
  {
64
0
    for (int n = 0; n < numSamples; n+=2)
65
0
    {
66
0
      res[n  ] = (res[n  ] + src[n  ] + 1) >> 1;
67
0
      res[n+1] = (res[n+1] + src[n+1] + 1) >> 1;
68
0
    }
69
0
  }
70
0
  else
71
0
  {
72
0
    const Pel* scale   = numIntra ? src : res;
73
0
    const Pel* unscale = numIntra ? res : src;
74
75
0
    for (int n = 0; n < numSamples; n+=2)
76
0
    {
77
0
      res[n  ] = (unscale[n  ] + 3*scale[n  ] + 2) >> 2;
78
0
      res[n+1] = (unscale[n+1] + 3*scale[n+1] + 2) >> 2;
79
0
    }
80
0
  }
81
0
}
82
83
template< unsigned inputSize, unsigned outputSize >
84
void mipMatrixMulCore( Pel* res, const Pel* input, const uint8_t* weight, const int maxVal, const int inputOffset, bool transpose )
85
235k
{
86
235k
  Pel buffer[ outputSize*outputSize];
87
88
235k
  int sum = 0;
89
2.12M
  for( int i = 0; i < inputSize; i++ )
90
1.88M
  {
91
1.88M
    sum += input[i];
92
1.88M
  }
93
235k
  const int offset = (1 << (MIP_SHIFT_MATRIX - 1)) - MIP_OFFSET_MATRIX * sum + (inputOffset << MIP_SHIFT_MATRIX);
94
235k
  CHECK( inputSize != 4 * (inputSize >> 2), "Error, input size not divisible by four" );
95
96
235k
  Pel* mat = transpose ? buffer : res;
97
235k
  unsigned posRes = 0;
98
15.0M
  for( unsigned n = 0; n < outputSize*outputSize; n++ )
99
14.8M
  {
100
14.8M
    int tmp0 = input[0] * weight[0];
101
14.8M
    int tmp1 = input[1] * weight[1];
102
14.8M
    int tmp2 = input[2] * weight[2];
103
14.8M
    int tmp3 = input[3] * weight[3];
104
14.8M
    if( 8 == inputSize )
105
14.8M
    {
106
14.8M
      tmp0 += input[4] * weight[4];
107
14.8M
      tmp1 += input[5] * weight[5];
108
14.8M
      tmp2 += input[6] * weight[6];
109
14.8M
      tmp3 += input[7] * weight[7];
110
14.8M
    }
111
14.8M
    mat[posRes++] = Clip3<int>( 0, maxVal, ((tmp0 + tmp1 + tmp2 + tmp3 + offset) >> MIP_SHIFT_MATRIX) );
112
113
14.8M
    weight += inputSize;
114
14.8M
  }
115
116
235k
  if( transpose )
117
107k
  {
118
953k
    for( int j = 0; j < outputSize; j++ )
119
845k
    {
120
7.56M
      for( int i = 0; i < outputSize; i++ )
121
6.72M
      {
122
6.72M
        res[j * outputSize + i] = buffer[i * outputSize + j];
123
6.72M
      }
124
845k
    }
125
107k
  }
126
235k
}
Unexecuted instantiation: void vvenc::mipMatrixMulCore<4u, 4u>(short*, short const*, unsigned char const*, int, int, bool)
void vvenc::mipMatrixMulCore<8u, 4u>(short*, short const*, unsigned char const*, int, int, bool)
Line
Count
Source
85
5.61k
{
86
5.61k
  Pel buffer[ outputSize*outputSize];
87
88
5.61k
  int sum = 0;
89
50.5k
  for( int i = 0; i < inputSize; i++ )
90
44.9k
  {
91
44.9k
    sum += input[i];
92
44.9k
  }
93
5.61k
  const int offset = (1 << (MIP_SHIFT_MATRIX - 1)) - MIP_OFFSET_MATRIX * sum + (inputOffset << MIP_SHIFT_MATRIX);
94
5.61k
  CHECK( inputSize != 4 * (inputSize >> 2), "Error, input size not divisible by four" );
95
96
5.61k
  Pel* mat = transpose ? buffer : res;
97
5.61k
  unsigned posRes = 0;
98
95.4k
  for( unsigned n = 0; n < outputSize*outputSize; n++ )
99
89.8k
  {
100
89.8k
    int tmp0 = input[0] * weight[0];
101
89.8k
    int tmp1 = input[1] * weight[1];
102
89.8k
    int tmp2 = input[2] * weight[2];
103
89.8k
    int tmp3 = input[3] * weight[3];
104
89.8k
    if( 8 == inputSize )
105
89.8k
    {
106
89.8k
      tmp0 += input[4] * weight[4];
107
89.8k
      tmp1 += input[5] * weight[5];
108
89.8k
      tmp2 += input[6] * weight[6];
109
89.8k
      tmp3 += input[7] * weight[7];
110
89.8k
    }
111
89.8k
    mat[posRes++] = Clip3<int>( 0, maxVal, ((tmp0 + tmp1 + tmp2 + tmp3 + offset) >> MIP_SHIFT_MATRIX) );
112
113
89.8k
    weight += inputSize;
114
89.8k
  }
115
116
5.61k
  if( transpose )
117
2.80k
  {
118
14.0k
    for( int j = 0; j < outputSize; j++ )
119
11.2k
    {
120
56.1k
      for( int i = 0; i < outputSize; i++ )
121
44.9k
      {
122
44.9k
        res[j * outputSize + i] = buffer[i * outputSize + j];
123
44.9k
      }
124
11.2k
    }
125
2.80k
  }
126
5.61k
}
void vvenc::mipMatrixMulCore<8u, 8u>(short*, short const*, unsigned char const*, int, int, bool)
Line
Count
Source
85
230k
{
86
230k
  Pel buffer[ outputSize*outputSize];
87
88
230k
  int sum = 0;
89
2.07M
  for( int i = 0; i < inputSize; i++ )
90
1.84M
  {
91
1.84M
    sum += input[i];
92
1.84M
  }
93
230k
  const int offset = (1 << (MIP_SHIFT_MATRIX - 1)) - MIP_OFFSET_MATRIX * sum + (inputOffset << MIP_SHIFT_MATRIX);
94
230k
  CHECK( inputSize != 4 * (inputSize >> 2), "Error, input size not divisible by four" );
95
96
230k
  Pel* mat = transpose ? buffer : res;
97
230k
  unsigned posRes = 0;
98
14.9M
  for( unsigned n = 0; n < outputSize*outputSize; n++ )
99
14.7M
  {
100
14.7M
    int tmp0 = input[0] * weight[0];
101
14.7M
    int tmp1 = input[1] * weight[1];
102
14.7M
    int tmp2 = input[2] * weight[2];
103
14.7M
    int tmp3 = input[3] * weight[3];
104
14.7M
    if( 8 == inputSize )
105
14.7M
    {
106
14.7M
      tmp0 += input[4] * weight[4];
107
14.7M
      tmp1 += input[5] * weight[5];
108
14.7M
      tmp2 += input[6] * weight[6];
109
14.7M
      tmp3 += input[7] * weight[7];
110
14.7M
    }
111
14.7M
    mat[posRes++] = Clip3<int>( 0, maxVal, ((tmp0 + tmp1 + tmp2 + tmp3 + offset) >> MIP_SHIFT_MATRIX) );
112
113
14.7M
    weight += inputSize;
114
14.7M
  }
115
116
230k
  if( transpose )
117
104k
  {
118
939k
    for( int j = 0; j < outputSize; j++ )
119
834k
    {
120
7.51M
      for( int i = 0; i < outputSize; i++ )
121
6.67M
      {
122
6.67M
        res[j * outputSize + i] = buffer[i * outputSize + j];
123
6.67M
      }
124
834k
    }
125
104k
  }
126
230k
}
127
128
template< typename T >
129
void addAvgCore( const T* src1, int src1Stride, const T* src2, int src2Stride, T* dest, int dstStride, int width, int height, unsigned rshift, int offset, const ClpRng& clpRng )
130
0
{
131
0
#define ADD_AVG_CORE_OP( ADDR ) dest[ADDR] = ClipPel( rightShiftU( ( src1[ADDR] + src2[ADDR] + offset ), rshift ), clpRng )
132
0
#define ADD_AVG_CORE_INC    \
133
0
  src1 += src1Stride;       \
134
0
  src2 += src2Stride;       \
135
0
  dest +=  dstStride;       \
136
0
137
0
  SIZE_AWARE_PER_EL_OP( ADD_AVG_CORE_OP, ADD_AVG_CORE_INC );
138
139
0
#undef ADD_AVG_CORE_OP
140
0
#undef ADD_AVG_CORE_INC
141
0
}
142
143
template<typename T>
144
void addWeightedAvgCore( const T* src1, int src1Stride, const T* src2, int src2Stride, T* dest, int destStride, int width, int height, unsigned rshift, int offset, int w0, int w1, const ClpRng& clpRng )
145
0
{
146
0
#define ADD_WGHT_AVG_OP( ADDR ) dest[ADDR] = ClipPel( rightShiftU( ( src1[ADDR]*w0 + src2[ADDR]*w1 + offset ), rshift ), clpRng )
147
0
#define ADD_WGHT_AVG_INC     \
148
0
    src1 += src1Stride; \
149
0
    src2 += src2Stride; \
150
0
    dest += destStride; \
151
0
152
0
  SIZE_AWARE_PER_EL_OP( ADD_WGHT_AVG_OP, ADD_WGHT_AVG_INC );
153
154
0
#undef ADD_WGHT_AVG_OP
155
0
#undef ADD_WGHT_AVG_INC
156
0
}
157
158
template<typename T>
159
void subsCore( const T* src0, int src0Stride, const T* src1, int src1Stride, T* dest, int destStride, int width, int height )
160
729k
{
161
729k
#define SUBS_INC                \
162
729k
  dest += destStride;  \
163
729k
  src0 += src0Stride;  \
164
729k
  src1 += src1Stride;  \
165
729k
166
335M
#define SUBS_OP( ADDR ) dest[ADDR] = src0[ADDR] - src1[ADDR]
167
168
335M
  SIZE_AWARE_PER_EL_OP( SUBS_OP, SUBS_INC );
169
170
729k
#undef SUBS_OP
171
729k
#undef SUBS_INC
172
729k
}
173
174
void removeHighFreq(int16_t* dst, int dstStride, const int16_t* src, int srcStride, int width, int height)
175
0
{
176
0
#define REM_HF_INC  \
177
0
 src += srcStride; \
178
0
 dst += dstStride; \
179
0
180
0
#define REM_HF_OP( ADDR )      dst[ADDR] =             2 * dst[ADDR] - src[ADDR]
181
182
0
 SIZE_AWARE_PER_EL_OP(REM_HF_OP, REM_HF_INC);
183
184
0
#undef REM_HF_INC
185
0
#undef REM_HF_OP
186
0
#undef REM_HF_OP_CLIP
187
0
}
188
189
template<typename T>
190
void reconstructCore( const T* src1, int src1Stride, const T* src2, int src2Stride, T* dest, int dstStride, int width, int height, const ClpRng& clpRng )
191
8.43k
{
192
3.44M
#define RECO_CORE_OP( ADDR ) dest[ADDR] = ClipPel( src1[ADDR] + src2[ADDR], clpRng )
193
8.43k
#define RECO_CORE_INC     \
194
8.43k
  src1 += src1Stride;     \
195
8.43k
  src2 += src2Stride;     \
196
8.43k
  dest +=  dstStride;     \
197
8.43k
198
3.44M
  SIZE_AWARE_PER_EL_OP( RECO_CORE_OP, RECO_CORE_INC );
199
200
8.43k
#undef RECO_CORE_OP
201
8.43k
#undef RECO_CORE_INC
202
8.43k
}
203
204
template<typename T>
205
void recoCore( const T* src1, const T* src2, T* dest, int numSamples, const ClpRng& clpRng )
206
2.04M
{
207
304M
  for( int n = 0; n < numSamples; n+=2)
208
302M
  {
209
302M
    dest[n]   = ClipPel( src1[n]   + src2[n], clpRng );
210
302M
    dest[n+1] = ClipPel( src1[n+1] + src2[n+1], clpRng );
211
302M
  }
212
2.04M
}
213
214
template<typename T>
215
void copyClipCore( const T* src, Pel* dst, int numSamples, const ClpRng& clpRng )
216
0
{
217
0
  for( int n = 0; n < numSamples; n+=2)
218
0
  {
219
0
    dst[n]   = ClipPel( src[n]   , clpRng );
220
0
    dst[n+1] = ClipPel( src[n+1] , clpRng );
221
0
  }
222
0
}
223
224
template< typename T >
225
void addAvgCore( const T* src1, const T* src2, T* dest, int numSamples, unsigned rshift, int offset, const ClpRng& clpRng )
226
0
{
227
0
  for( int n = 0; n < numSamples; n+=2)
228
0
  {
229
0
    dest[n]   = ClipPel( rightShiftU( ( src1[n]   + src2[n]   + offset ), rshift ), clpRng );
230
0
    dest[n+1] = ClipPel( rightShiftU( ( src1[n+1] + src2[n+1] + offset ), rshift ), clpRng );
231
0
  }
232
0
}
233
234
template< typename T >
235
void roundGeoCore( const T* src, T* dest, const int numSamples, unsigned rshift, int offset, const ClpRng &clpRng)
236
0
{
237
0
  for( int i = 0; i < numSamples; i+=2)
238
0
  {
239
0
    dest[i]   = ClipPel(rightShiftU(src[i  ] + offset, rshift), clpRng);
240
0
    dest[i+1] = ClipPel(rightShiftU(src[i+1] + offset, rshift), clpRng);
241
0
  }
242
0
}
243
244
template<typename T>
245
void linTfCore( const T* src, int srcStride, Pel* dst, int dstStride, int width, int height, int scale, unsigned shift, int offset, const ClpRng& clpRng, bool bClip )
246
186k
{
247
186k
#define LINTF_CORE_INC  \
248
186k
  src += srcStride;     \
249
186k
  dst += dstStride;     \
250
186k
251
186k
  if( bClip )
252
186k
  {
253
37.0M
#define LINTF_CORE_OP( ADDR ) dst[ADDR] = ( Pel ) ClipPel( rightShiftU( scale * src[ADDR], shift ) + offset, clpRng )
254
255
37.0M
  SIZE_AWARE_PER_EL_OP( LINTF_CORE_OP, LINTF_CORE_INC );
256
257
186k
#undef LINTF_CORE_OP
258
186k
  }
259
0
  else
260
0
  {
261
0
#define LINTF_CORE_OP( ADDR ) dst[ADDR] = ( Pel ) ( rightShiftU( scale * src[ADDR], shift ) + offset )
262
263
0
  SIZE_AWARE_PER_EL_OP( LINTF_CORE_OP, LINTF_CORE_INC );
264
265
0
#undef LINTF_CORE_OP
266
0
  }
267
186k
#undef LINTF_CORE_INC
268
186k
}
269
270
template<typename T, int N>
271
void transposeNxNCore( const Pel* src, int srcStride, Pel* dst, int dstStride )
272
6.47M
{
273
56.8M
  for( int i = 0; i < N; i++ )
274
50.3M
  {
275
447M
    for( int j = 0; j < N; j++ )
276
397M
    {
277
397M
      dst[j * dstStride] = src[j];
278
397M
    }
279
280
50.3M
    dst++;
281
50.3M
    src += srcStride;
282
50.3M
  }
283
6.47M
}
void vvenc::transposeNxNCore<short, 4>(short const*, int, short*, int)
Line
Count
Source
272
361k
{
273
1.80M
  for( int i = 0; i < N; i++ )
274
1.44M
  {
275
7.23M
    for( int j = 0; j < N; j++ )
276
5.78M
    {
277
5.78M
      dst[j * dstStride] = src[j];
278
5.78M
    }
279
280
1.44M
    dst++;
281
1.44M
    src += srcStride;
282
1.44M
  }
283
361k
}
void vvenc::transposeNxNCore<short, 8>(short const*, int, short*, int)
Line
Count
Source
272
6.11M
{
273
55.0M
  for( int i = 0; i < N; i++ )
274
48.9M
  {
275
440M
    for( int j = 0; j < N; j++ )
276
391M
    {
277
391M
      dst[j * dstStride] = src[j];
278
391M
    }
279
280
48.9M
    dst++;
281
48.9M
    src += srcStride;
282
48.9M
  }
283
6.11M
}
284
285
template<typename T>
286
void copyClipCore( const T* src, int srcStride, Pel* dst, int dstStride, int width, int height, const ClpRng& clpRng )
287
0
{
288
0
#define RECO_OP( ADDR ) dst[ADDR] = ClipPel( src[ADDR], clpRng )
289
0
#define RECO_INC      \
290
0
    src += srcStride; \
291
0
    dst += dstStride; \
292
0
293
0
  SIZE_AWARE_PER_EL_OP( RECO_OP, RECO_INC );
294
295
0
#undef RECO_OP
296
0
#undef RECO_INC
297
0
}
298
299
void copyBufferCore( const char* src, int srcStride, char* dst, int dstStride, int numBytes, int height)
300
7.77M
{
301
129M
  for( int i = 0; i < height; i++, src += srcStride, dst += dstStride )
302
121M
  {
303
121M
    memcpy( dst, src, numBytes );
304
121M
  }
305
7.77M
}
306
307
void applyLutCore( const Pel* src, const ptrdiff_t srcStride, Pel* dst, const ptrdiff_t dstStride, int width, int height, const Pel* lut )
308
0
{
309
0
#define RSP_SGNL_OP( ADDR ) dst[ADDR] = lut[src[ADDR]]
310
0
#define RSP_SGNL_INC        src      += srcStride; dst += dstStride;
311
312
0
  SIZE_AWARE_PER_EL_OP( RSP_SGNL_OP, RSP_SGNL_INC )
313
314
0
#undef RSP_SGNL_OP
315
0
#undef RSP_SGNL_INC
316
0
}
317
318
void fillMapPtr_Core( void** ptrMap, const ptrdiff_t mapStride, int width, int height, void* val )
319
403k
{
320
403k
  if( width == mapStride )
321
265k
  {
322
265k
    std::fill_n( ptrMap, width * height, val );
323
265k
  }
324
137k
  else
325
137k
  {
326
1.26M
    while( height-- )
327
1.13M
    {
328
1.13M
      std::fill_n( ptrMap, width, val );
329
1.13M
      ptrMap += mapStride;
330
1.13M
    }
331
137k
  }
332
403k
}
333
334
uint64_t AvgHighPassCore( const int width, const int height, const Pel* pSrc, const int iSrcStride)
335
10.1k
{
336
10.1k
  uint64_t saAct = 0;
337
816k
  for (int y = 1; y < height - 1; y++)
338
805k
  {
339
74.9M
    for (int x = 1; x < width - 1; x++) // center cols
340
74.1M
    {
341
74.1M
      const int s = 12 * (int) pSrc[x  ] - 2 * ((int) pSrc[x-1] + (int) pSrc[x+1] + (int) pSrc[x  -iSrcStride] + (int) pSrc[x  +iSrcStride])
342
74.1M
                             - ((int) pSrc[x-1-iSrcStride] + (int) pSrc[x+1-iSrcStride] + (int) pSrc[x-1+iSrcStride] + (int) pSrc[x+1+iSrcStride]);
343
74.1M
      saAct += abs (s);
344
74.1M
    }
345
805k
    pSrc += iSrcStride;
346
805k
  }
347
10.1k
  return saAct;
348
10.1k
}
349
350
uint64_t HDHighPassCore  (const int width, const int height,const Pel*  pSrc,const Pel* pSM1,const int iSrcStride,const int iSM1Stride)
351
0
{
352
0
  uint64_t taAct = 0;
353
0
  for (int y = 1; y < height - 1; y++)
354
0
  {
355
0
    for (int x = 1; x < width - 1; x++)  // cnt cols
356
0
    {
357
0
      const int t = (int) pSrc[x] - (int) pSM1[x];
358
0
      taAct += (1 + 3 * abs (t)) >> 1;
359
0
    }
360
0
    pSrc += iSrcStride;
361
0
    pSM1 += iSM1Stride;
362
0
  }
363
0
  return taAct;
364
0
}
365
366
uint64_t  HDHighPass2Core  (const int width, const int height,const Pel*  pSrc,const Pel* pSM1,const Pel* pSM2,const int iSrcStride,const int iSM1Stride,const int iSM2Stride)
367
0
{
368
0
  uint64_t taAct = 0;
369
0
  for (int y = 1; y < height - 1; y++)
370
0
  {
371
0
    for (int x = 1; x < width - 1; x++)  // cnt cols
372
0
    {
373
0
      const int t = (int) pSrc[x] - 2 * (int) pSM1[x] + (int) pSM2[x];
374
0
      taAct += abs (t);
375
0
    }
376
0
    pSrc += iSrcStride;
377
0
    pSM1 += iSM1Stride;
378
0
    pSM2 += iSM2Stride;
379
0
  }
380
0
  return taAct;
381
0
}
382
uint64_t AvgHighPassWithDownsamplingCore( const int width, const int height, const Pel* pSrc, const int iSrcStride)
383
0
{
384
0
  uint64_t saAct = 0;
385
0
  pSrc -= iSrcStride;
386
0
  pSrc -= iSrcStride;
387
0
 for (int y = 2; y < height - 2; y += 2)
388
0
 {
389
0
   for (int x = 2; x < width - 2; x += 2)
390
0
   {
391
0
     const int f = 12 * ((int)pSrc[ y   *iSrcStride + x  ] + (int)pSrc[ y   *iSrcStride + x+1] + (int)pSrc[(y+1)*iSrcStride + x  ] + (int)pSrc[(y+1)*iSrcStride + x+1])
392
0
                  - 3 * ((int)pSrc[(y-1)*iSrcStride + x  ] + (int)pSrc[(y-1)*iSrcStride + x+1] + (int)pSrc[(y+2)*iSrcStride + x  ] + (int)pSrc[(y+2)*iSrcStride + x+1])
393
0
                  - 3 * ((int)pSrc[ y   *iSrcStride + x-1] + (int)pSrc[ y   *iSrcStride + x+2] + (int)pSrc[(y+1)*iSrcStride + x-1] + (int)pSrc[(y+1)*iSrcStride + x+2])
394
0
                  - 2 * ((int)pSrc[(y-1)*iSrcStride + x-1] + (int)pSrc[(y-1)*iSrcStride + x+2] + (int)pSrc[(y+2)*iSrcStride + x-1] + (int)pSrc[(y+2)*iSrcStride + x+2])
395
0
                      - ((int)pSrc[(y-2)*iSrcStride + x-1] + (int)pSrc[(y-2)*iSrcStride + x  ] + (int)pSrc[(y-2)*iSrcStride + x+1] + (int)pSrc[(y-2)*iSrcStride + x+2]
396
0
                       + (int)pSrc[(y+3)*iSrcStride + x-1] + (int)pSrc[(y+3)*iSrcStride + x  ] + (int)pSrc[(y+3)*iSrcStride + x+1] + (int)pSrc[(y+3)*iSrcStride + x+2]
397
0
                       + (int)pSrc[(y-1)*iSrcStride + x-2] + (int)pSrc[ y   *iSrcStride + x-2] + (int)pSrc[(y+1)*iSrcStride + x-2] + (int)pSrc[(y+2)*iSrcStride + x-2]
398
0
                       + (int)pSrc[(y-1)*iSrcStride + x+3] + (int)pSrc[ y   *iSrcStride + x+3] + (int)pSrc[(y+1)*iSrcStride + x+3] + (int)pSrc[(y+2)*iSrcStride + x+3]);
399
0
     saAct += (uint64_t) abs(f);
400
0
   }
401
0
 }
402
0
 return saAct;
403
0
}
404
uint64_t AvgHighPassWithDownsamplingDiff1stCore (const int width, const int  height, const Pel* pSrc,const Pel* pSrcM1, const int iSrcStride, const int iSrcM1Stride)
405
0
{
406
0
  uint64_t taAct = 0;
407
0
  pSrc -= iSrcStride;
408
0
  pSrc -= iSrcStride;
409
0
  pSrcM1-=iSrcM1Stride;
410
0
  pSrcM1-=iSrcM1Stride;
411
412
0
  for (uint32_t y = 2; y < height-2; y += 2)
413
0
  {
414
0
    for (uint32_t x = 2; x < width-2; x += 2)
415
0
    {
416
0
      const int t = (int)pSrc  [y*iSrcStride + x] + (int)pSrc  [y*iSrcStride + x+1] + (int)pSrc  [(y+1)*iSrcStride + x] + (int)pSrc  [(y+1)*iSrcStride + x+1]
417
0
                 - ((int)pSrcM1[y*iSrcM1Stride + x] + (int)pSrcM1[y*iSrcM1Stride + x+1] + (int)pSrcM1[(y+1)*iSrcM1Stride + x] + (int)pSrcM1[(y+1)*iSrcM1Stride + x+1]);
418
0
      taAct += (1 + 3 * abs (t)) >> 1;
419
0
    }
420
0
  }
421
0
  return (taAct );
422
0
}
423
424
uint64_t AvgHighPassWithDownsamplingDiff2ndCore (const int width,const int height,const Pel* pSrc,const Pel* pSrcM1,const Pel* pSrcM2,const int iSrcStride,const int iSM1Stride,const int iSM2Stride)
425
0
{
426
0
  uint64_t taAct = 0;
427
428
0
  pSrc -= iSrcStride;
429
0
  pSrc -= iSrcStride;
430
0
  pSrcM1-=iSM1Stride;
431
0
  pSrcM1-=iSM1Stride;
432
0
  pSrcM2-=iSM2Stride;
433
0
  pSrcM2-=iSM2Stride;
434
435
0
  for (uint32_t y = 2; y < height-2; y += 2)
436
0
  {
437
0
    for (uint32_t x = 2; x < width-2; x += 2)
438
0
    {
439
0
      const int t = (int)pSrc  [y*iSrcStride + x] + (int)pSrc  [y*iSrcStride + x+1] + (int)pSrc  [(y+1)*iSrcStride + x] + (int)pSrc  [(y+1)*iSrcStride + x+1]
440
0
                            - 2 * ((int)pSrcM1[y*iSM1Stride + x] + (int)pSrcM1[y*iSM1Stride + x+1] + (int)pSrcM1[(y+1)*iSM1Stride + x] + (int)pSrcM1[(y+1)*iSM1Stride + x+1])
441
0
                            + (int)pSrcM2[y*iSM2Stride + x] + (int)pSrcM2[y*iSM2Stride + x+1] + (int)pSrcM2[(y+1)*iSM2Stride + x] + (int)pSrcM2[(y+1)*iSM2Stride + x+1];
442
0
      taAct += (uint64_t) abs(t);
443
0
    }
444
0
  }
445
0
  return (taAct);
446
0
}
447
448
PelBufferOps::PelBufferOps()
449
11
{
450
11
  isInitX86Done = false;
451
452
11
  addAvg            = addAvgCore<Pel>;
453
11
  reco              = recoCore<Pel>;
454
11
  copyClip          = copyClipCore<Pel>;
455
11
  roundGeo          = roundGeoCore<Pel>;
456
457
11
  addAvg4           = addAvgCore<Pel>;
458
11
  addAvg8           = addAvgCore<Pel>;
459
11
  addAvg16          = addAvgCore<Pel>;
460
461
11
  sub4              = subsCore<Pel>;
462
11
  sub8              = subsCore<Pel>;
463
464
11
  wghtAvg4          = addWeightedAvgCore<Pel>;
465
11
  wghtAvg8          = addWeightedAvgCore<Pel>;
466
467
11
  copyClip4         = copyClipCore<Pel>;
468
11
  copyClip8         = copyClipCore<Pel>;
469
470
11
  reco4             = reconstructCore<Pel>;
471
11
  reco8             = reconstructCore<Pel>;
472
473
11
  linTf4            = linTfCore<Pel>;
474
11
  linTf8            = linTfCore<Pel>;
475
476
11
  copyBuffer        = copyBufferCore;
477
478
11
  removeHighFreq8   = removeHighFreq;
479
11
  removeHighFreq4   = removeHighFreq;
480
481
11
  transpose4x4      = transposeNxNCore<Pel,4>;
482
11
  transpose8x8      = transposeNxNCore<Pel,8>;
483
11
  mipMatrixMul_4_4  = mipMatrixMulCore<4,4>;
484
11
  mipMatrixMul_8_4  = mipMatrixMulCore<8,4>;
485
11
  mipMatrixMul_8_8  = mipMatrixMulCore<8,8>;
486
11
  weightCiip        = weightCiipCore;
487
11
  roundIntVector    = nullptr;
488
489
11
  applyLut          = applyLutCore;
490
491
11
  fillPtrMap        = fillMapPtr_Core;
492
11
  AvgHighPassWithDownsampling = AvgHighPassWithDownsamplingCore;
493
11
  AvgHighPass = AvgHighPassCore;
494
11
  AvgHighPassWithDownsamplingDiff1st = AvgHighPassWithDownsamplingDiff1stCore;
495
11
  AvgHighPassWithDownsamplingDiff2nd = AvgHighPassWithDownsamplingDiff2ndCore;
496
11
  HDHighPass = HDHighPassCore;
497
11
  HDHighPass2 = HDHighPass2Core;
498
11
}
499
500
PelBufferOps g_pelBufOP = PelBufferOps();
501
502
template<>
503
void AreaBuf<Pel>::addWeightedAvg(const AreaBuf<const Pel>& other1, const AreaBuf<const Pel>& other2, const ClpRng& clpRng, const int8_t BcwIdx)
504
0
{
505
0
  const int8_t w0 = getBcwWeight( BcwIdx, REF_PIC_LIST_0 );
506
0
  const int8_t w1 = getBcwWeight( BcwIdx, REF_PIC_LIST_1 );
507
0
  const int8_t log2WeightBase = g_BcwLog2WeightBase;
508
0
  const Pel* src0 = other1.buf;
509
0
  const Pel* src2 = other2.buf;
510
0
        Pel* dest =        buf;
511
512
0
  const int src1Stride = other1.stride;
513
0
  const int src2Stride = other2.stride;
514
0
  const int destStride =        stride;
515
0
  const int clipbd     = clpRng.bd;
516
0
  const int shiftNum   = std::max<int>( 2, ( IF_INTERNAL_PREC - clipbd ) ) + log2WeightBase;
517
0
  const int offset     = ( 1 << ( shiftNum - 1 ) ) + ( IF_INTERNAL_OFFS << log2WeightBase );
518
519
0
  if( ( width & 7 ) == 0 )
520
0
  {
521
0
    g_pelBufOP.wghtAvg8( src0, src1Stride, src2, src2Stride, dest, destStride, width, height, shiftNum, offset, w0, w1, clpRng );
522
0
  }
523
0
  else if( ( width & 3 ) == 0 )
524
0
  {
525
0
    g_pelBufOP.wghtAvg4( src0, src1Stride, src2, src2Stride, dest, destStride, width, height, shiftNum, offset, w0, w1, clpRng );
526
0
  }
527
0
  else
528
0
  {
529
0
#define WGHT_AVG_OP( ADDR ) dest[ADDR] = ClipPel( rightShiftU( ( src0[ADDR]*w0 + src2[ADDR]*w1 + offset ), shiftNum ), clpRng )
530
0
#define WGHT_AVG_INC    \
531
0
    src0 += src1Stride; \
532
0
    src2 += src2Stride; \
533
0
    dest += destStride; \
534
0
535
0
    SIZE_AWARE_PER_EL_OP( WGHT_AVG_OP, WGHT_AVG_INC );
536
537
0
#undef WGHT_AVG_OP
538
0
#undef WGHT_AVG_INC
539
0
  }
540
0
}
541
542
template<>
543
void AreaBuf<Pel>::rspSignal( const Pel* pLUT)
544
0
{
545
0
  g_pelBufOP.applyLut( buf, stride, buf, stride, width, height, pLUT );
546
0
}
547
548
549
template<>
550
void AreaBuf<Pel>::rspSignal( const AreaBuf<const Pel>& other, const Pel* pLUT)
551
0
{
552
0
  g_pelBufOP.applyLut( other.buf, other.stride, buf, stride, width, height, pLUT );
553
0
}
554
555
template<>
556
void AreaBuf<Pel>::scaleSignal(const int scale, const bool dir, const ClpRng& clpRng)
557
0
{
558
0
        Pel* dst = buf;
559
0
  const Pel* src = buf;
560
0
  const int maxAbsclipBD = (1<<clpRng.bd) - 1;
561
562
0
  if (dir) // forward
563
0
  {
564
0
    if (width == 1)
565
0
    {
566
0
      THROW("Blocks of width = 1 not supported");
567
0
    }
568
0
    else
569
0
    {
570
0
      for (unsigned y = 0; y < height; y++)
571
0
      {
572
0
        for (unsigned x = 0; x < width; x++)
573
0
        {
574
0
          int sign = src[x] >= 0 ? 1 : -1;
575
0
          int absval = sign * src[x];
576
0
          dst[x] = (Pel)Clip3(-maxAbsclipBD, maxAbsclipBD, sign * (((absval << CSCALE_FP_PREC) + (scale >> 1)) / scale));
577
0
        }
578
0
        dst += stride;
579
0
        src += stride;
580
0
      }
581
0
    }
582
0
  }
583
0
  else // inverse
584
0
  {
585
0
    for (unsigned y = 0; y < height; y++)
586
0
    {
587
0
      for (unsigned x = 0; x < width; x++)
588
0
      {
589
0
        int val    = Clip3<int>((-maxAbsclipBD - 1), maxAbsclipBD, (int)src[x]);
590
0
        int sign   = src[x] >= 0 ? 1 : -1;
591
0
        int absval = sign * val;
592
0
               val = sign * ((absval * scale + (1 << (CSCALE_FP_PREC - 1))) >> CSCALE_FP_PREC);
593
0
        if (sizeof(Pel) == 2) // avoid overflow when storing data
594
0
        {
595
0
          val = Clip3<int>(-32768, 32767, val);
596
0
        }
597
0
        dst[x] = (Pel)val;
598
0
      }
599
0
      dst += stride;
600
0
      src += stride;
601
0
    }
602
0
  }
603
0
}
604
605
template<>
606
void AreaBuf<Pel>::addAvg( const AreaBuf<const Pel>& other1, const AreaBuf<const Pel>& other2, const ClpRng& clpRng)
607
0
{
608
0
  const Pel* src0 = other1.buf;
609
0
  const Pel* src2 = other2.buf;
610
0
        Pel* dest =        buf;
611
612
0
  const unsigned src1Stride = other1.stride;
613
0
  const unsigned src2Stride = other2.stride;
614
0
  const unsigned destStride =        stride;
615
0
  const int      clipbd     = clpRng.bd;
616
0
  const unsigned shiftNum   = std::max<int>(2, (IF_INTERNAL_PREC - clipbd)) + 1;
617
0
  const int      offset     = (1 << (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS;
618
619
0
#if ENABLE_SIMD_OPT_BUFFER
620
0
  if( destStride == width )
621
0
  {
622
0
    g_pelBufOP.addAvg(src0, src2, dest, width * height, shiftNum, offset, clpRng);
623
0
  }
624
0
  else if ((width & 15) == 0)
625
0
  {
626
0
    g_pelBufOP.addAvg16(src0, src1Stride, src2, src2Stride, dest, destStride, width, height, shiftNum, offset, clpRng);
627
0
  }
628
0
  else if( ( width & 7 ) == 0 )
629
0
  {
630
0
    g_pelBufOP.addAvg8( src0, src1Stride, src2, src2Stride, dest, destStride, width, height, shiftNum, offset, clpRng );
631
0
  }
632
0
  else if( ( width & 3 ) == 0 )
633
0
  {
634
0
    g_pelBufOP.addAvg4( src0, src1Stride, src2, src2Stride, dest, destStride, width, height, shiftNum, offset, clpRng );
635
0
  }
636
0
  else
637
0
#endif
638
0
  {
639
0
#define ADD_AVG_OP( ADDR ) dest[ADDR] = ClipPel( rightShiftU( ( src0[ADDR] + src2[ADDR] + offset ), shiftNum ), clpRng )
640
0
#define ADD_AVG_INC     \
641
0
    src0 += src1Stride; \
642
0
    src2 += src2Stride; \
643
0
    dest += destStride; \
644
0
645
0
    SIZE_AWARE_PER_EL_OP( ADD_AVG_OP, ADD_AVG_INC );
646
647
0
#undef ADD_AVG_OP
648
0
#undef ADD_AVG_INC
649
0
  }
650
0
}
651
652
template<>
653
void AreaBuf<Pel>::subtract( const AreaBuf<const Pel>& minuend, const AreaBuf<const Pel>& subtrahend )
654
729k
{
655
729k
  CHECKD( width  != minuend.width,     "Incompatible size" );
656
729k
  CHECKD( height != minuend.height,    "Incompatible size" );
657
729k
  CHECKD( width  != subtrahend.width,  "Incompatible size");
658
729k
  CHECKD( height != subtrahend.height, "Incompatible size");
659
  
660
729k
        Pel* dest =            buf;
661
729k
  const Pel* mins = minuend   .buf;
662
729k
  const Pel* subs = subtrahend.buf;
663
664
665
729k
#if ENABLE_SIMD_OPT_BUFFER
666
729k
  const unsigned destStride =            stride;
667
729k
  const unsigned minsStride = minuend.   stride;
668
729k
  const unsigned subsStride = subtrahend.stride;
669
670
729k
  if( ( width & 7 ) == 0 )
671
615k
  {
672
615k
    g_pelBufOP.sub8( mins, minsStride, subs, subsStride, dest, destStride, width, height );
673
615k
  }
674
113k
  else if( ( width & 3 ) == 0 )
675
113k
  {
676
113k
    g_pelBufOP.sub4( mins, minsStride, subs, subsStride, dest, destStride, width, height );
677
113k
  }
678
18.4E
  else
679
18.4E
#endif
680
18.4E
  {
681
18.4E
#define SUBS_INC                \
682
18.4E
    dest +=            stride;  \
683
18.4E
    mins += minuend   .stride;  \
684
18.4E
    subs += subtrahend.stride;  \
685
18.4E
686
18.4E
#define SUBS_OP( ADDR ) dest[ADDR] = mins[ADDR] - subs[ADDR]
687
688
18.4E
    SIZE_AWARE_PER_EL_OP( SUBS_OP, SUBS_INC );
689
690
18.4E
#undef SUBS_OP
691
18.4E
#undef SUBS_INC
692
18.4E
  }
693
729k
}
694
695
template<>
696
void AreaBuf<const Pel>::calcVarianceSplit( const AreaBuf<const Pel>& Org, const uint32_t  size, int& varh,int& varv) const
697
0
{
698
0
  CHECK( Org.width != Org.height, "Incompatible size!" );
699
0
  int stride = Org.stride;
700
0
  const Pel* src;
701
0
  Pel data;
702
0
  double variance=0;
703
0
  double mean=0;
704
0
  int64_t sum[4]={0,0,0,0};
705
0
  int64_t sum_sqr[4]={0,0,0,0};
706
0
  uint32_t halfsize =size>>1;
707
0
  uint32_t off[4]={0,halfsize,size*halfsize,size*halfsize+halfsize};
708
0
  int n,x,y;
709
710
0
  for( n = 0; n < 4; n++)
711
0
  {
712
0
    src = Org.buf+off[n];
713
0
    for( y = 0; y < halfsize; y++)
714
0
    {
715
0
      for(x = 0; x < halfsize; x++)
716
0
      {
717
0
        data=src[y*stride+x];
718
0
        sum[n]+=data;
719
0
        sum_sqr[n]+= data*data;
720
0
      }
721
0
    }
722
0
  }
723
0
  int num=size*(size>>1);
724
  // varhu
725
0
  mean=(double)(sum[0]+sum[1])/(num);
726
0
  variance =  (double)(sum_sqr[0]+sum_sqr[1])/(num) - (mean*mean);
727
0
  varh =(int)(variance+0.5);
728
  // varhl
729
0
  mean=(double)(sum[2]+sum[3])/(num);
730
0
  variance =  (double)(sum_sqr[2]+sum_sqr[3])/(num) - (mean*mean);
731
0
  varh +=(int)(variance+0.5);
732
  // varvl
733
0
  mean=(double)(sum[0]+sum[2])/(num);
734
0
  variance =  (double)(sum_sqr[0]+sum_sqr[2])/(num) - (mean*mean);
735
0
  varv =(int)(variance+0.5);
736
  // varvr
737
0
  mean=(double)(sum[1]+sum[3])/(num);
738
0
  variance =  (double)(sum_sqr[1]+sum_sqr[3])/(num) - (mean*mean);
739
0
  varv +=(int)(variance+0.5);
740
0
}
741
742
template<>
743
void AreaBuf<Pel>::copyClip( const AreaBuf<const Pel>& src, const ClpRng& clpRng )
744
0
{
745
0
  const Pel* srcp = src.buf;
746
0
        Pel* dest =     buf;
747
748
0
  const unsigned srcStride  = src.stride;
749
0
  const unsigned destStride = stride;
750
751
0
  if( destStride == width)
752
0
  {
753
0
    g_pelBufOP.copyClip(srcp, dest, width * height, clpRng);
754
0
  }
755
0
  else if ((width & 7) == 0)
756
0
  {
757
0
    g_pelBufOP.copyClip8(srcp, srcStride, dest, destStride, width, height, clpRng);
758
0
  }
759
0
  else if ((width & 3) == 0)
760
0
  {
761
0
    g_pelBufOP.copyClip4(srcp, srcStride, dest, destStride, width, height, clpRng);
762
0
  }
763
0
  else
764
0
  {
765
0
    for( int y = 0; y < height; y++ )
766
0
    {
767
0
      dest[0] = ClipPel( srcp[0], clpRng);
768
0
      dest[1] = ClipPel( srcp[1], clpRng);
769
0
      srcp += srcStride;
770
0
      dest += destStride;
771
0
    }                                                         \
772
0
  }
773
0
}
774
775
template<>
776
void AreaBuf<Pel>::reconstruct( const AreaBuf<const Pel>& pred, const AreaBuf<const Pel>& resi, const ClpRng& clpRng )
777
2.05M
{
778
2.05M
  const Pel* src1 = pred.buf;
779
2.05M
  const Pel* src2 = resi.buf;
780
2.05M
        Pel* dest =      buf;
781
782
2.05M
  const unsigned src1Stride = pred.stride;
783
2.05M
  const unsigned src2Stride = resi.stride;
784
2.05M
  const unsigned destStride =      stride;
785
2.05M
  if( src2Stride == width )
786
2.04M
  {
787
2.04M
    g_pelBufOP.reco( pred.buf, resi.buf, buf, width * height, clpRng );
788
2.04M
  }
789
8.43k
  else if( ( width & 7 ) == 0 )
790
5.06k
  {
791
5.06k
    g_pelBufOP.reco8( src1, src1Stride, src2, src2Stride, dest, destStride, width, height, clpRng );
792
5.06k
  }
793
3.37k
  else if( ( width & 3 ) == 0 )
794
3.37k
  {
795
3.37k
    g_pelBufOP.reco4( src1, src1Stride, src2, src2Stride, dest, destStride, width, height, clpRng );
796
3.37k
  }
797
0
  else if( ( width & 1 ) == 0 )
798
0
  {
799
0
    for( int y = 0; y < height; y++ )
800
0
    {
801
0
      dest[0] = ClipPel( src1[0] + src2[0], clpRng);
802
0
      dest[1] = ClipPel( src1[1] + src2[1], clpRng);
803
0
      src1 += src1Stride;
804
0
      src2 += src2Stride;
805
0
      dest += destStride;
806
0
    }                        
807
0
  }
808
0
  else
809
0
  {
810
0
    CHECKD( width != 1, "Expecting width to be '1'!" );
811
812
0
    for( int y = 0; y < height; y++ )
813
0
    {
814
0
      dest[0] = ClipPel( src1[0] + src2[0], clpRng );
815
816
0
      src1 += src1Stride;
817
0
      src2 += src2Stride;
818
0
      dest += destStride;
819
0
    }
820
0
  }
821
2.05M
}
822
823
template<>
824
void AreaBuf<Pel>::linearTransform( const int scale, const unsigned shift, const int offset, bool bClip, const ClpRng& clpRng )
825
186k
{
826
186k
  const Pel* src = buf;
827
186k
        Pel* dst = buf;
828
829
186k
  if( stride == width)
830
186k
  {
831
186k
    if( width > 2 && height > 2 )
832
175k
    {
833
175k
      g_pelBufOP.linTf8( src, stride<<2, dst, stride<<2, width<<2, height>>2, scale, shift, offset, clpRng, bClip );
834
175k
    }
835
11.1k
    else
836
11.1k
    {
837
11.1k
      g_pelBufOP.linTf4( src, stride<<1, dst, stride<<1, width<<1, height>>1, scale, shift, offset, clpRng, bClip );
838
11.1k
    }
839
186k
  }
840
0
  else if( ( width & 7 ) == 0 )
841
0
  {
842
0
    g_pelBufOP.linTf8( src, stride, dst, stride, width, height, scale, shift, offset, clpRng, bClip );
843
0
  }
844
0
  else if( ( width & 3 ) == 0 )
845
0
  {
846
0
    g_pelBufOP.linTf4( src, stride, dst, stride, width, height, scale, shift, offset, clpRng, bClip );
847
0
  }
848
0
  else
849
0
  {
850
0
    if( bClip )
851
0
    {
852
0
      for( int y = 0; y < height; y++ )
853
0
      {
854
0
        dst[0] = ( Pel ) ClipPel( rightShiftU( scale * src[0], shift ) + offset, clpRng );
855
0
        dst[1] = ( Pel ) ClipPel( rightShiftU( scale * src[1], shift ) + offset, clpRng );
856
0
        src += stride;
857
0
        dst += stride;
858
0
      }
859
0
    }
860
0
    else
861
0
    {
862
0
      for( int y = 0; y < height; y++ )
863
0
      {
864
0
        dst[0] = ( Pel ) ( rightShiftU( scale * src[0], shift ) + offset );
865
0
        dst[1] = ( Pel ) ( rightShiftU( scale * src[1], shift ) + offset );
866
0
        src += stride;
867
0
        dst += stride;
868
0
      }
869
0
    }
870
0
  }
871
186k
}
872
873
#if ENABLE_SIMD_OPT_BUFFER
874
875
template<>
876
void AreaBuf<Pel>::transposedFrom( const AreaBuf<const Pel>& other )
877
544k
{
878
544k
  CHECK( width != other.height || height != other.width, "Incompatible size" );
879
880
544k
  if( ( ( width | height ) & 7 ) == 0 )
881
438k
  {
882
438k
    const Pel* src = other.buf;
883
884
1.78M
    for( unsigned y = 0; y < other.height; y += 8 )
885
1.35M
    {
886
1.35M
      Pel* dst = buf + y;
887
888
7.46M
      for( unsigned x = 0; x < other.width; x += 8 )
889
6.11M
      {
890
6.11M
        g_pelBufOP.transpose8x8( &src[x], other.stride, dst, stride );
891
892
6.11M
        dst += 8 * stride;
893
6.11M
      }
894
895
1.35M
      src += 8 * other.stride;
896
1.35M
    }
897
438k
  }
898
105k
  else if( ( ( width | height ) & 3 ) == 0 )
899
96.8k
  {
900
96.8k
    const Pel* src = other.buf;
901
902
288k
    for( unsigned y = 0; y < other.height; y += 4 )
903
191k
    {
904
191k
      Pel* dst = buf + y;
905
906
553k
      for( unsigned x = 0; x < other.width; x += 4 )
907
361k
      {
908
361k
        g_pelBufOP.transpose4x4( &src[x], other.stride, dst, stride );
909
910
361k
        dst += 4 * stride;
911
361k
      }
912
913
191k
      src += 4 * other.stride;
914
191k
    }
915
96.8k
  }
916
9.09k
  else
917
9.09k
  {
918
9.09k
          Pel* dst =       buf;
919
9.09k
    const Pel* src = other.buf;
920
9.09k
    width          = other.height;
921
9.09k
    height         = other.width;
922
9.09k
    stride         = stride < width ? width : stride;
923
924
115k
    for( unsigned y = 0; y < other.height; y++ )
925
105k
    {
926
317k
      for( unsigned x = 0; x < other.width; x++ )
927
211k
      {
928
211k
        dst[y + x*stride] = src[x + y * other.stride];
929
211k
      }
930
105k
    }
931
9.09k
  }
932
544k
}
933
#endif
934
935
template<>
936
void AreaBuf<Pel>::weightCiip( const AreaBuf<const Pel>& intra, const int numIntra )
937
0
{
938
0
  CHECK(width == 2, "Width of 2 is not supported");
939
0
  g_pelBufOP.weightCiip( buf, intra.buf, width * height, numIntra );
940
0
}
941
942
template<>
943
void AreaBuf<MotionInfo>::fill( const MotionInfo& val )
944
20.6k
{
945
20.6k
  if( width == stride )
946
20.6k
  {
947
20.6k
    std::fill_n( buf, width * height, val );
948
20.6k
  }
949
0
  else
950
0
  {
951
0
    MotionInfo* dst = buf;
952
953
0
    for( int y = 0; y < height; y++, dst += stride )
954
0
    {
955
0
      std::fill_n( dst, width, val );
956
0
    }
957
0
  }
958
20.6k
}
959
960
PelStorage::PelStorage()
961
3.78M
{
962
15.1M
  for( uint32_t i = 0; i < MAX_NUM_COMP; i++ )
963
11.3M
  {
964
11.3M
    m_origin[i] = nullptr;
965
11.3M
  }
966
3.78M
}
967
968
PelStorage::~PelStorage()
969
3.78M
{
970
3.78M
  destroy();
971
3.78M
}
972
973
void PelStorage::create( const UnitArea& _UnitArea )
974
1.67M
{
975
1.67M
  create( _UnitArea.chromaFormat, _UnitArea.blocks[0] );
976
1.67M
  m_maxArea = _UnitArea;
977
1.67M
}
978
979
void PelStorage::create( const ChromaFormat &_chromaFormat, const Area& _area )
980
3.32M
{
981
3.32M
  CHECK( !bufs.empty(), "Trying to re-create an already initialized buffer" );
982
983
3.32M
  chromaFormat = _chromaFormat;
984
985
3.32M
  const uint32_t numComp = getNumberValidComponents( _chromaFormat );
986
987
3.32M
  uint32_t bufSize = 0;
988
11.8M
  for( uint32_t i = 0; i < numComp; i++ )
989
8.55M
  {
990
8.55M
    const ComponentID compID = ComponentID( i );
991
8.55M
    const unsigned totalWidth  = _area.width  >> getComponentScaleX( compID, _chromaFormat );
992
8.55M
    const unsigned totalHeight = _area.height >> getComponentScaleY( compID, _chromaFormat );
993
994
8.55M
    const uint32_t area = totalWidth * totalHeight;
995
8.55M
    CHECK( !area, "Trying to create a buffer with zero area" );
996
8.55M
    bufSize += area;
997
8.55M
  }
998
999
3.32M
  bufSize += 1; // for SIMD DMVR on the bottom right corner, which overreads the lines by 1 sample
1000
1001
  //allocate one buffer
1002
3.32M
  m_origin[0] = ( Pel* ) xMalloc( Pel, bufSize );
1003
1004
3.32M
  Pel* topLeft = m_origin[0];
1005
11.8M
  for( uint32_t i = 0; i < numComp; i++ )
1006
8.55M
  {
1007
8.55M
    const ComponentID compID = ComponentID( i );
1008
8.55M
    const unsigned totalWidth  = _area.width  >> getComponentScaleX( compID, _chromaFormat );
1009
8.55M
    const unsigned totalHeight = _area.height >> getComponentScaleY( compID, _chromaFormat );
1010
8.55M
    const uint32_t area = totalWidth * totalHeight;
1011
1012
8.55M
    bufs.push_back( PelBuf( topLeft, totalWidth, totalWidth, totalHeight ) );
1013
8.55M
    topLeft += area;
1014
8.55M
  }
1015
1016
3.32M
  m_maxArea = UnitArea( _chromaFormat, _area );
1017
3.32M
}
1018
1019
void PelStorage::create( const ChromaFormat &_chromaFormat, const Area& _area, const unsigned _maxCUSize, const unsigned _margin, const unsigned _alignment, const bool _scaleChromaMargin )
1020
154k
{
1021
154k
  CHECK( !bufs.empty(), "Trying to re-create an already initialized buffer" );
1022
1023
154k
  chromaFormat = _chromaFormat;
1024
1025
154k
  const uint32_t numComp = getNumberValidComponents( _chromaFormat );
1026
1027
154k
  unsigned extHeight = _area.height;
1028
154k
  unsigned extWidth  = _area.width;
1029
1030
154k
  if( _maxCUSize )
1031
28.8k
  {
1032
28.8k
    extHeight = ( ( _area.height + _maxCUSize - 1 ) / _maxCUSize ) * _maxCUSize;
1033
28.8k
    extWidth  = ( ( _area.width  + _maxCUSize - 1 ) / _maxCUSize ) * _maxCUSize;
1034
28.8k
  }
1035
1036
546k
  for( uint32_t i = 0; i < numComp; i++ )
1037
392k
  {
1038
392k
    const ComponentID compID = ComponentID( i );
1039
392k
    const unsigned scaleX = getComponentScaleX( compID, _chromaFormat );
1040
392k
    const unsigned scaleY = getComponentScaleY( compID, _chromaFormat );
1041
1042
392k
    unsigned scaledHeight = extHeight >> scaleY;
1043
392k
    unsigned scaledWidth  = extWidth  >> scaleX;
1044
392k
    unsigned ymargin      = _margin >> (_scaleChromaMargin?scaleY:0);
1045
392k
    unsigned xmargin      = _margin >> (_scaleChromaMargin?scaleX:0);
1046
392k
    unsigned totalWidth   = scaledWidth + 2*xmargin;
1047
392k
    unsigned totalHeight  = scaledHeight +2*ymargin;
1048
1049
392k
    if( _alignment )
1050
219k
    {
1051
      // make sure buffer lines are align
1052
219k
      CHECK( _alignment != MEMORY_ALIGN_DEF_SIZE, "Unsupported alignment" );
1053
219k
      totalWidth = ( ( totalWidth + _alignment - 1 ) / _alignment ) * _alignment;
1054
219k
    }
1055
392k
    uint32_t area = totalWidth * totalHeight;
1056
392k
    CHECK( !area, "Trying to create a buffer with zero area" );
1057
1058
392k
    m_origin[i] = ( Pel* ) xMalloc( Pel, area );
1059
392k
    Pel* topLeft = m_origin[i] + totalWidth * ymargin + xmargin;
1060
392k
    bufs.push_back( PelBuf( topLeft, totalWidth, _area.width >> scaleX, _area.height >> scaleY ) );
1061
392k
  }
1062
1063
154k
  m_maxArea = UnitArea( _chromaFormat, _area );
1064
154k
}
1065
1066
void PelStorage::createFromBuf( PelUnitBuf buf )
1067
2.22k
{
1068
2.22k
  chromaFormat = buf.chromaFormat;
1069
1070
2.22k
  const uint32_t numCh = getNumberValidComponents( chromaFormat );
1071
1072
2.22k
  bufs.resize(numCh);
1073
1074
8.88k
  for( uint32_t i = 0; i < numCh; i++ )
1075
6.66k
  {
1076
6.66k
    PelBuf cPelBuf = buf.get( ComponentID( i ) );
1077
6.66k
    bufs[i] = PelBuf( cPelBuf.bufAt( 0, 0 ), cPelBuf.stride, cPelBuf.width, cPelBuf.height );
1078
6.66k
  }
1079
2.22k
}
1080
1081
void PelStorage::compactResize( const UnitArea& area )
1082
2.04M
{
1083
2.04M
  CHECK( bufs.size() < area.blocks.size(), "Cannot increase buffer size when compacting!" );
1084
1085
6.90M
  for( uint32_t i = 0; i < area.blocks.size(); i++ )
1086
4.86M
  {
1087
4.86M
    CHECK( m_maxArea.blocks[i].area() < area.blocks[i].area(), "Cannot increase buffer size when compacting!" );
1088
1089
4.86M
    bufs[i].Size::operator=( area.blocks[i].size() );
1090
4.86M
    bufs[i].stride = bufs[i].width;
1091
4.86M
  }
1092
2.04M
}
1093
1094
void PelStorage::takeOwnership( PelStorage& other )
1095
0
{
1096
0
  chromaFormat = other.chromaFormat;
1097
1098
0
  const uint32_t numCh = getNumberValidComponents( chromaFormat );
1099
1100
0
  bufs.resize(numCh);
1101
1102
0
  for( uint32_t i = 0; i < numCh; i++ )
1103
0
  {
1104
0
    PelBuf cPelBuf = other.get( ComponentID( i ) );
1105
0
    bufs[i] = PelBuf( cPelBuf.bufAt( 0, 0 ), cPelBuf.stride, cPelBuf.width, cPelBuf.height );
1106
0
    std::swap( m_origin[i], other.m_origin[i]);
1107
0
  }
1108
1109
0
  m_maxArea = other.m_maxArea;
1110
1111
0
  other.destroy();
1112
0
}
1113
1114
1115
void PelStorage::swap( PelStorage& other )
1116
0
{
1117
0
  const uint32_t numCh = getNumberValidComponents( chromaFormat );
1118
1119
0
  for( uint32_t i = 0; i < numCh; i++ )
1120
0
  {
1121
    // check this otherwise it would turn out to get very weird
1122
0
    CHECK( chromaFormat                   != other.chromaFormat                  , "Incompatible formats" );
1123
0
    CHECK( get( ComponentID( i ) )        != other.get( ComponentID( i ) )       , "Incompatible formats" );
1124
0
    CHECK( get( ComponentID( i ) ).stride != other.get( ComponentID( i ) ).stride, "Incompatible formats" );
1125
1126
0
    std::swap( bufs[i].buf,    other.bufs[i].buf );
1127
0
    std::swap( bufs[i].stride, other.bufs[i].stride );
1128
0
    std::swap( m_origin[i],    other.m_origin[i] );
1129
0
  }
1130
0
}
1131
1132
void PelStorage::destroy()
1133
7.24M
{
1134
7.24M
  chromaFormat = NUM_CHROMA_FORMAT;
1135
28.9M
  for( uint32_t i = 0; i < MAX_NUM_COMP; i++ )
1136
21.7M
  {
1137
21.7M
    if( m_origin[i] )
1138
3.71M
    {
1139
3.71M
      xFree( m_origin[i] );
1140
3.71M
      m_origin[i] = nullptr;
1141
3.71M
    }
1142
21.7M
  }
1143
7.24M
  bufs.clear();
1144
7.24M
}
1145
1146
PelBuf PelStorage::getBuf( const ComponentID CompID )
1147
14.6k
{
1148
14.6k
  return bufs[CompID];
1149
14.6k
}
1150
1151
const CPelBuf PelStorage::getBuf( const ComponentID CompID ) const
1152
0
{
1153
0
  return bufs[CompID];
1154
0
}
1155
1156
PelBuf PelStorage::getBuf( const CompArea& blk )
1157
19.1M
{
1158
19.1M
  const PelBuf& r = bufs[blk.compID];
1159
19.1M
  return PelBuf( r.buf + rsAddr( blk, r.stride ), r.stride, blk );
1160
19.1M
}
1161
1162
const CPelBuf PelStorage::getBuf( const CompArea& blk ) const
1163
25.4k
{
1164
25.4k
  const PelBuf& r = bufs[blk.compID];
1165
25.4k
  return CPelBuf( r.buf + rsAddr( blk, r.stride ), r.stride, blk );
1166
25.4k
}
1167
1168
PelUnitBuf PelStorage::getBuf( const UnitArea& unit )
1169
3.46k
{
1170
3.46k
  return ( chromaFormat == CHROMA_400 ) ? PelUnitBuf( chromaFormat, getBuf( unit.Y() ) ) : PelUnitBuf( chromaFormat, getBuf( unit.Y() ), getBuf( unit.Cb() ), getBuf( unit.Cr() ) );
1171
3.46k
}
1172
1173
const CPelUnitBuf PelStorage::getBuf( const UnitArea& unit ) const
1174
0
{
1175
0
  return ( chromaFormat == CHROMA_400 ) ? CPelUnitBuf( chromaFormat, getBuf( unit.Y() ) ) : CPelUnitBuf( chromaFormat, getBuf( unit.Y() ), getBuf( unit.Cb() ), getBuf( unit.Cr() ) );
1176
0
}
1177
1178
PelUnitBuf PelStorage::getBuf(const int strY, const int strCb, const int strCr, const UnitArea& unit)
1179
0
{
1180
0
  CHECKD( unit.Y().width > bufs[COMP_Y].width && unit.Y().height > bufs[COMP_Y].height, "unsuported request" );
1181
0
  CHECKD( strY > bufs[COMP_Y].stride, "unsuported request" );
1182
0
  CHECKD( strCb > bufs[COMP_Cb].stride, "unsuported request" );
1183
0
  CHECKD( strCr > bufs[COMP_Cr].stride, "unsuported request" );
1184
0
  return (chromaFormat == CHROMA_400) ? PelUnitBuf(chromaFormat, PelBuf( bufs[COMP_Y].buf, strY, unit.Y())) : PelUnitBuf(chromaFormat, PelBuf( bufs[COMP_Y].buf, strY, unit.Y()), PelBuf( bufs[COMP_Cb].buf, strCb, unit.Cb()), PelBuf( bufs[COMP_Cr].buf, strCr, unit.Cr()));
1185
0
}
1186
1187
const CPelUnitBuf PelStorage::getBuf(const int strY, const int strCb, const int strCr, const UnitArea& unit) const
1188
0
{
1189
0
  CHECKD( unit.Y().width > bufs[COMP_Y].width && unit.Y().height > bufs[COMP_Y].height, "unsuported request" );
1190
0
  CHECKD( strY > bufs[COMP_Y].stride, "unsuported request" );
1191
0
  CHECKD( strCb > bufs[COMP_Cb].stride, "unsuported request" );
1192
0
  CHECKD( strCr > bufs[COMP_Cr].stride, "unsuported request" );
1193
0
  return (chromaFormat == CHROMA_400) ? CPelUnitBuf(chromaFormat, CPelBuf( bufs[COMP_Y].buf, strY, unit.Y())) : CPelUnitBuf(chromaFormat, CPelBuf( bufs[COMP_Y].buf, strY, unit.Y()), CPelBuf( bufs[COMP_Cb].buf, strCb, unit.Cb()), CPelBuf( bufs[COMP_Cr].buf, strCr, unit.Cr()));
1194
0
}
1195
1196
PelUnitBuf PelStorage::getBufPart(const UnitArea& unit)
1197
0
{
1198
0
  CHECKD( unit.Y().width > bufs[COMP_Y].width && unit.Y().height > bufs[COMP_Y].height, "unsuported request" );
1199
0
  return (chromaFormat == CHROMA_400) ? PelUnitBuf(chromaFormat, PelBuf( bufs[COMP_Y].buf, bufs[COMP_Y].stride, unit.Y())) : PelUnitBuf(chromaFormat, PelBuf( bufs[COMP_Y].buf, bufs[COMP_Y].stride, unit.Y()), PelBuf( bufs[COMP_Cb].buf, bufs[COMP_Cb].stride, unit.Cb()), PelBuf( bufs[COMP_Cr].buf, bufs[COMP_Cr].stride, unit.Cr()));
1200
0
}
1201
1202
const CPelUnitBuf PelStorage::getBufPart(const UnitArea& unit) const
1203
0
{
1204
0
  CHECKD(unit.Y().width > bufs[COMP_Y].width && unit.Y().height > bufs[COMP_Y].height, "unsuported request");
1205
0
  return (chromaFormat == CHROMA_400) ? CPelUnitBuf(chromaFormat, CPelBuf(bufs[COMP_Y].buf, unit.Y().width, unit.Y())) : CPelUnitBuf(chromaFormat, CPelBuf(bufs[COMP_Y].buf, unit.Y().width, unit.Y()), CPelBuf(bufs[COMP_Cb].buf, unit.Cb().width, unit.Cb()), CPelBuf(bufs[COMP_Cr].buf, unit.Cr().width, unit.Cr()));
1206
0
}
1207
1208
const CPelUnitBuf PelStorage::getCompactBuf(const UnitArea& unit) const
1209
0
{
1210
0
  CHECKD( unit.Y().width > bufs[COMP_Y].width && unit.Y().height > bufs[COMP_Y].height, "unsuported request" );
1211
1212
0
  PelUnitBuf ret;
1213
0
  ret.chromaFormat = chromaFormat;
1214
0
  ret.bufs.resize_noinit( chromaFormat == CHROMA_400 ? 1 : 3 );
1215
  
1216
0
  ret.Y   ().buf = bufs[COMP_Y ].buf; ret.Y ().width = ret.Y ().stride = unit.Y ().width; ret.Y ().height = unit.Y ().height;
1217
0
  if( chromaFormat != CHROMA_400 )
1218
0
  {
1219
0
    ret.Cb().buf = bufs[COMP_Cb].buf; ret.Cb().width = ret.Cb().stride = unit.Cb().width; ret.Cb().height = unit.Cb().height;
1220
0
    ret.Cr().buf = bufs[COMP_Cr].buf; ret.Cr().width = ret.Cr().stride = unit.Cr().width; ret.Cr().height = unit.Cr().height;
1221
0
  }
1222
1223
0
  return ret;
1224
0
}
1225
1226
PelUnitBuf PelStorage::getCompactBuf(const UnitArea& unit)
1227
130k
{
1228
130k
  CHECKD( unit.Y().width > bufs[COMP_Y].width && unit.Y().height > bufs[COMP_Y].height, "unsuported request" );
1229
1230
130k
  PelUnitBuf ret;
1231
130k
  ret.chromaFormat = chromaFormat;
1232
130k
  ret.bufs.resize_noinit( chromaFormat == CHROMA_400 ? 1 : 3 );
1233
1234
130k
  ret.Y   ().buf = bufs[COMP_Y ].buf; ret.Y ().width = ret.Y ().stride = unit.Y ().width; ret.Y ().height = unit.Y ().height;
1235
130k
  if( chromaFormat != CHROMA_400 )
1236
130k
  {
1237
130k
    ret.Cb().buf = bufs[COMP_Cb].buf; ret.Cb().width = ret.Cb().stride = unit.Cb().width; ret.Cb().height = unit.Cb().height;
1238
130k
    ret.Cr().buf = bufs[COMP_Cr].buf; ret.Cr().width = ret.Cr().stride = unit.Cr().width; ret.Cr().height = unit.Cr().height;
1239
130k
  }
1240
1241
130k
  return ret;
1242
130k
}
1243
1244
const CPelBuf PelStorage::getCompactBuf(const CompArea& carea) const
1245
0
{
1246
0
  return CPelBuf( bufs[carea.compID].buf, carea.width, carea);
1247
0
}
1248
1249
PelBuf PelStorage::getCompactBuf(const CompArea& carea)
1250
0
{
1251
0
  return PelBuf( bufs[carea.compID].buf, carea.width, carea);
1252
0
}
1253
1254
void downsampleYuv(PelBuf& dest, const vvencYUVPlane& yuvPlaneIn, int downsampleStep)
1255
0
{
1256
0
  const int widthd = dest.width;
1257
0
  const int heightd = dest.height;
1258
0
  int difStride = dest.stride - dest.width;
1259
1260
0
  const int16_t* src = yuvPlaneIn.ptr;
1261
0
  const int instride = yuvPlaneIn.stride;
1262
0
  const int width = yuvPlaneIn.width;
1263
0
  int n = 0;
1264
0
  for (int j = 0; j < heightd; j++)
1265
0
  {
1266
0
    int i = 0;
1267
0
    for (i = 0; i < widthd; i++)
1268
0
    {
1269
0
      long int b = 0;
1270
0
      for (int r = 0; r < downsampleStep; r++)
1271
0
      {
1272
0
        int posr = width * r;
1273
0
        for (int n = 0; n < downsampleStep; n++)
1274
0
        {
1275
0
          b += src[posr + n];
1276
0
        }
1277
0
      }
1278
0
      src += downsampleStep;
1279
0
      dest.buf[n] = (int16_t)((b + 2) / (downsampleStep << 1));
1280
0
      n++;
1281
0
    }
1282
0
    n += difStride;
1283
0
    src = src - downsampleStep * i + width;
1284
1285
0
    src += (instride * (downsampleStep - 1));
1286
0
  }
1287
0
}
1288
1289
void copyPadToPelUnitBuf( PelUnitBuf pelUnitBuf, const vvencYUVBuffer& yuvBuffer, const ChromaFormat& chFmt )
1290
1.11k
{
1291
1.11k
  CHECK( pelUnitBuf.bufs.size() == 0, "pelUnitBuf not initialized" );
1292
1.11k
  pelUnitBuf.chromaFormat = chFmt;
1293
1.11k
  const int numComp = getNumberValidComponents( chFmt );
1294
4.44k
  for ( int i = 0; i < numComp; i++ )
1295
3.33k
  {
1296
3.33k
    const vvencYUVPlane& src = yuvBuffer.planes[ i ];
1297
3.33k
    CHECK( src.ptr == nullptr, "yuvBuffer not setup" );
1298
3.33k
    PelBuf& dest = pelUnitBuf.bufs[i];
1299
3.33k
    CHECK( dest.buf == nullptr, "yuvBuffer not setup" );
1300
1301
3.33k
    if (dest.width < src.width)
1302
0
    {
1303
0
      downsampleYuv(dest, src, 2);
1304
0
    }
1305
3.33k
    else
1306
3.33k
    {
1307
338k
      for (int y = 0; y < src.height; y++)
1308
335k
      {
1309
335k
        ::memcpy(dest.buf + y * dest.stride, src.ptr + y * src.stride, src.width * sizeof(int16_t));
1310
1311
        // pad right if required
1312
335k
        for (int x = src.width; x < dest.width; x++)
1313
0
        {
1314
0
          dest.buf[x + y * dest.stride] = dest.buf[src.width - 1 + y * dest.stride];
1315
0
        }
1316
335k
      }
1317
1318
      // pad bottom if required
1319
3.33k
      for (int y = src.height; y < dest.height; y++)
1320
0
      {
1321
0
        ::memcpy(dest.buf + y * dest.stride, dest.buf + (src.height - 1) * dest.stride, dest.width * sizeof(int16_t));
1322
0
      }
1323
3.33k
    }
1324
3.33k
  }
1325
1.11k
}
1326
1327
/*
1328
void setupPelUnitBuf( const YUVBuffer& yuvBuffer, PelUnitBuf& pelUnitBuf, const ChromaFormat& chFmt )
1329
{
1330
  CHECK( pelUnitBuf.bufs.size() != 0, "pelUnitBuf already in use" );
1331
  pelUnitBuf.chromaFormat = chFmt;
1332
  const int numComp = getNumberValidComponents( chFmt );
1333
  for ( int i = 0; i < numComp; i++ )
1334
  {
1335
    const YUVBuffer::Plane& yuvPlane = yuvBuffer.planes[ i ];
1336
    CHECK( yuvPlane.ptr == nullptr, "yuvBuffer not setup" );
1337
    PelBuf area( yuvPlane.ptr, yuvPlane.stride, yuvPlane.width, yuvPlane.height );
1338
    pelUnitBuf.bufs.push_back( area );
1339
  }
1340
}
1341
*/
1342
void setupYuvBuffer ( const PelUnitBuf& pelUnitBuf, vvencYUVBuffer& yuvBuffer, const Window* confWindow )
1343
0
{
1344
0
  const ChromaFormat chFmt = pelUnitBuf.chromaFormat;
1345
0
  const int numComp        = getNumberValidComponents( chFmt );
1346
0
  for ( int i = 0; i < numComp; i++ )
1347
0
  {
1348
0
    const ComponentID compId = ComponentID( i );
1349
0
          PelBuf area        = pelUnitBuf.get( compId );
1350
0
    const int sx             = getComponentScaleX( compId, chFmt );
1351
0
    const int sy             = getComponentScaleY( compId, chFmt );
1352
0
    vvencYUVPlane& yuvPlane = yuvBuffer.planes[ i ];
1353
0
    CHECK( yuvPlane.ptr != nullptr, "yuvBuffer already in use" );
1354
0
    yuvPlane.ptr             = area.bufAt( confWindow->winLeftOffset >> sx, confWindow->winTopOffset >> sy );
1355
0
    yuvPlane.width           = ( ( area.width  << sx ) - ( confWindow->winLeftOffset + confWindow->winRightOffset  ) ) >> sx;
1356
0
    yuvPlane.height          = ( ( area.height << sy ) - ( confWindow->winTopOffset  + confWindow->winBottomOffset ) ) >> sy;
1357
0
    yuvPlane.stride          = area.stride;
1358
0
  }
1359
0
}
1360
1361
} // namespace vvenc
1362
1363
//! \}
1364