Coverage Report

Created: 2026-05-30 06:10

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/vvenc/source/Lib/CommonLib/Buffer.cpp
Line
Count
Source
1
/* -----------------------------------------------------------------------------
2
The copyright in this software is being made available under the Clear BSD
3
License, included below. No patent rights, trademark rights and/or 
4
other Intellectual Property Rights other than the copyrights concerning 
5
the Software are granted under this license.
6
7
The Clear BSD License
8
9
Copyright (c) 2019-2026, Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. & The VVenC Authors.
10
All rights reserved.
11
12
Redistribution and use in source and binary forms, with or without modification,
13
are permitted (subject to the limitations in the disclaimer below) provided that
14
the following conditions are met:
15
16
     * Redistributions of source code must retain the above copyright notice,
17
     this list of conditions and the following disclaimer.
18
19
     * Redistributions in binary form must reproduce the above copyright
20
     notice, this list of conditions and the following disclaimer in the
21
     documentation and/or other materials provided with the distribution.
22
23
     * Neither the name of the copyright holder nor the names of its
24
     contributors may be used to endorse or promote products derived from this
25
     software without specific prior written permission.
26
27
NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
28
THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
29
CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
31
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
32
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
33
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
34
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
35
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
36
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38
POSSIBILITY OF SUCH DAMAGE.
39
40
41
------------------------------------------------------------------------------------------- */
42
43
44
/** \file     Buffer.cpp
45
 *  \brief    Low-overhead class describing 2D memory layout
46
 */
47
48
#define DONT_UNDEF_SIZE_AWARE_PER_EL_OP
49
50
// unit needs to come first due to a forward declaration
51
#include "Unit.h"
52
#include "Slice.h"
53
#include "InterpolationFilter.h"
54
55
//! \ingroup CommonLib
56
//! \{
57
58
namespace vvenc {
59
60
void weightCiipCore( Pel* res, const Pel* src, const int numSamples, int numIntra )
61
0
{
62
0
  if( numIntra == 1 )
63
0
  {
64
0
    for (int n = 0; n < numSamples; n+=2)
65
0
    {
66
0
      res[n  ] = (res[n  ] + src[n  ] + 1) >> 1;
67
0
      res[n+1] = (res[n+1] + src[n+1] + 1) >> 1;
68
0
    }
69
0
  }
70
0
  else
71
0
  {
72
0
    const Pel* scale   = numIntra ? src : res;
73
0
    const Pel* unscale = numIntra ? res : src;
74
75
0
    for (int n = 0; n < numSamples; n+=2)
76
0
    {
77
0
      res[n  ] = (unscale[n  ] + 3*scale[n  ] + 2) >> 2;
78
0
      res[n+1] = (unscale[n+1] + 3*scale[n+1] + 2) >> 2;
79
0
    }
80
0
  }
81
0
}
82
83
template< unsigned inputSize, unsigned outputSize >
84
void mipMatrixMulCore( Pel* res, const Pel* input, const uint8_t* weight, const int maxVal, const int inputOffset, bool transpose )
85
276k
{
86
276k
  Pel buffer[ outputSize*outputSize];
87
88
276k
  int sum = 0;
89
2.48M
  for( int i = 0; i < inputSize; i++ )
90
2.20M
  {
91
2.20M
    sum += input[i];
92
2.20M
  }
93
276k
  const int offset = (1 << (MIP_SHIFT_MATRIX - 1)) - MIP_OFFSET_MATRIX * sum + (inputOffset << MIP_SHIFT_MATRIX);
94
276k
  CHECK( inputSize != 4 * (inputSize >> 2), "Error, input size not divisible by four" );
95
96
276k
  Pel* mat = transpose ? buffer : res;
97
276k
  unsigned posRes = 0;
98
17.6M
  for( unsigned n = 0; n < outputSize*outputSize; n++ )
99
17.3M
  {
100
17.3M
    int tmp0 = input[0] * weight[0];
101
17.3M
    int tmp1 = input[1] * weight[1];
102
17.3M
    int tmp2 = input[2] * weight[2];
103
17.3M
    int tmp3 = input[3] * weight[3];
104
17.3M
    if( 8 == inputSize )
105
17.3M
    {
106
17.3M
      tmp0 += input[4] * weight[4];
107
17.3M
      tmp1 += input[5] * weight[5];
108
17.3M
      tmp2 += input[6] * weight[6];
109
17.3M
      tmp3 += input[7] * weight[7];
110
17.3M
    }
111
17.3M
    mat[posRes++] = Clip3<int>( 0, maxVal, ((tmp0 + tmp1 + tmp2 + tmp3 + offset) >> MIP_SHIFT_MATRIX) );
112
113
17.3M
    weight += inputSize;
114
17.3M
  }
115
116
276k
  if( transpose )
117
125k
  {
118
1.11M
    for( int j = 0; j < outputSize; j++ )
119
991k
    {
120
8.86M
      for( int i = 0; i < outputSize; i++ )
121
7.87M
      {
122
7.87M
        res[j * outputSize + i] = buffer[i * outputSize + j];
123
7.87M
      }
124
991k
    }
125
125k
  }
126
276k
}
Unexecuted instantiation: void vvenc::mipMatrixMulCore<4u, 4u>(short*, short const*, unsigned char const*, int, int, bool)
void vvenc::mipMatrixMulCore<8u, 4u>(short*, short const*, unsigned char const*, int, int, bool)
Line
Count
Source
85
6.43k
{
86
6.43k
  Pel buffer[ outputSize*outputSize];
87
88
6.43k
  int sum = 0;
89
57.8k
  for( int i = 0; i < inputSize; i++ )
90
51.4k
  {
91
51.4k
    sum += input[i];
92
51.4k
  }
93
6.43k
  const int offset = (1 << (MIP_SHIFT_MATRIX - 1)) - MIP_OFFSET_MATRIX * sum + (inputOffset << MIP_SHIFT_MATRIX);
94
6.43k
  CHECK( inputSize != 4 * (inputSize >> 2), "Error, input size not divisible by four" );
95
96
6.43k
  Pel* mat = transpose ? buffer : res;
97
6.43k
  unsigned posRes = 0;
98
109k
  for( unsigned n = 0; n < outputSize*outputSize; n++ )
99
102k
  {
100
102k
    int tmp0 = input[0] * weight[0];
101
102k
    int tmp1 = input[1] * weight[1];
102
102k
    int tmp2 = input[2] * weight[2];
103
102k
    int tmp3 = input[3] * weight[3];
104
102k
    if( 8 == inputSize )
105
102k
    {
106
102k
      tmp0 += input[4] * weight[4];
107
102k
      tmp1 += input[5] * weight[5];
108
102k
      tmp2 += input[6] * weight[6];
109
102k
      tmp3 += input[7] * weight[7];
110
102k
    }
111
102k
    mat[posRes++] = Clip3<int>( 0, maxVal, ((tmp0 + tmp1 + tmp2 + tmp3 + offset) >> MIP_SHIFT_MATRIX) );
112
113
102k
    weight += inputSize;
114
102k
  }
115
116
6.43k
  if( transpose )
117
3.21k
  {
118
16.0k
    for( int j = 0; j < outputSize; j++ )
119
12.8k
    {
120
64.3k
      for( int i = 0; i < outputSize; i++ )
121
51.4k
      {
122
51.4k
        res[j * outputSize + i] = buffer[i * outputSize + j];
123
51.4k
      }
124
12.8k
    }
125
3.21k
  }
126
6.43k
}
void vvenc::mipMatrixMulCore<8u, 8u>(short*, short const*, unsigned char const*, int, int, bool)
Line
Count
Source
85
269k
{
86
269k
  Pel buffer[ outputSize*outputSize];
87
88
269k
  int sum = 0;
89
2.42M
  for( int i = 0; i < inputSize; i++ )
90
2.15M
  {
91
2.15M
    sum += input[i];
92
2.15M
  }
93
269k
  const int offset = (1 << (MIP_SHIFT_MATRIX - 1)) - MIP_OFFSET_MATRIX * sum + (inputOffset << MIP_SHIFT_MATRIX);
94
269k
  CHECK( inputSize != 4 * (inputSize >> 2), "Error, input size not divisible by four" );
95
96
269k
  Pel* mat = transpose ? buffer : res;
97
269k
  unsigned posRes = 0;
98
17.5M
  for( unsigned n = 0; n < outputSize*outputSize; n++ )
99
17.2M
  {
100
17.2M
    int tmp0 = input[0] * weight[0];
101
17.2M
    int tmp1 = input[1] * weight[1];
102
17.2M
    int tmp2 = input[2] * weight[2];
103
17.2M
    int tmp3 = input[3] * weight[3];
104
17.2M
    if( 8 == inputSize )
105
17.2M
    {
106
17.2M
      tmp0 += input[4] * weight[4];
107
17.2M
      tmp1 += input[5] * weight[5];
108
17.2M
      tmp2 += input[6] * weight[6];
109
17.2M
      tmp3 += input[7] * weight[7];
110
17.2M
    }
111
17.2M
    mat[posRes++] = Clip3<int>( 0, maxVal, ((tmp0 + tmp1 + tmp2 + tmp3 + offset) >> MIP_SHIFT_MATRIX) );
112
113
17.2M
    weight += inputSize;
114
17.2M
  }
115
116
269k
  if( transpose )
117
122k
  {
118
1.10M
    for( int j = 0; j < outputSize; j++ )
119
978k
    {
120
8.80M
      for( int i = 0; i < outputSize; i++ )
121
7.82M
      {
122
7.82M
        res[j * outputSize + i] = buffer[i * outputSize + j];
123
7.82M
      }
124
978k
    }
125
122k
  }
126
269k
}
127
128
template< typename T >
129
void addAvgCore( const T* src1, int src1Stride, const T* src2, int src2Stride, T* dest, int dstStride, int width, int height, unsigned rshift, int offset, const ClpRng& clpRng )
130
0
{
131
0
#define ADD_AVG_CORE_OP( ADDR ) dest[ADDR] = ClipPel( rightShiftU( ( src1[ADDR] + src2[ADDR] + offset ), rshift ), clpRng )
132
0
#define ADD_AVG_CORE_INC    \
133
0
  src1 += src1Stride;       \
134
0
  src2 += src2Stride;       \
135
0
  dest +=  dstStride;       \
136
0
137
0
  SIZE_AWARE_PER_EL_OP( ADD_AVG_CORE_OP, ADD_AVG_CORE_INC );
138
139
0
#undef ADD_AVG_CORE_OP
140
0
#undef ADD_AVG_CORE_INC
141
0
}
142
143
template<typename T>
144
void addWeightedAvgCore( const T* src1, int src1Stride, const T* src2, int src2Stride, T* dest, int destStride, int width, int height, unsigned rshift, int offset, int w0, int w1, const ClpRng& clpRng )
145
0
{
146
0
#define ADD_WGHT_AVG_OP( ADDR ) dest[ADDR] = ClipPel( rightShiftU( ( src1[ADDR]*w0 + src2[ADDR]*w1 + offset ), rshift ), clpRng )
147
0
#define ADD_WGHT_AVG_INC     \
148
0
    src1 += src1Stride; \
149
0
    src2 += src2Stride; \
150
0
    dest += destStride; \
151
0
152
0
  SIZE_AWARE_PER_EL_OP( ADD_WGHT_AVG_OP, ADD_WGHT_AVG_INC );
153
154
0
#undef ADD_WGHT_AVG_OP
155
0
#undef ADD_WGHT_AVG_INC
156
0
}
157
158
template<typename T>
159
void subsCore( const T* src0, int src0Stride, const T* src1, int src1Stride, T* dest, int destStride, int width, int height )
160
849k
{
161
849k
#define SUBS_INC                \
162
849k
  dest += destStride;  \
163
849k
  src0 += src0Stride;  \
164
849k
  src1 += src1Stride;  \
165
849k
166
400M
#define SUBS_OP( ADDR ) dest[ADDR] = src0[ADDR] - src1[ADDR]
167
168
400M
  SIZE_AWARE_PER_EL_OP( SUBS_OP, SUBS_INC );
169
170
849k
#undef SUBS_OP
171
849k
#undef SUBS_INC
172
849k
}
173
174
void removeHighFreq(int16_t* dst, int dstStride, const int16_t* src, int srcStride, int width, int height)
175
0
{
176
0
#define REM_HF_INC  \
177
0
 src += srcStride; \
178
0
 dst += dstStride; \
179
0
180
0
#define REM_HF_OP( ADDR )      dst[ADDR] =             2 * dst[ADDR] - src[ADDR]
181
182
0
 SIZE_AWARE_PER_EL_OP(REM_HF_OP, REM_HF_INC);
183
184
0
#undef REM_HF_INC
185
0
#undef REM_HF_OP
186
0
#undef REM_HF_OP_CLIP
187
0
}
188
189
template<typename T>
190
void reconstructCore( const T* src1, int src1Stride, const T* src2, int src2Stride, T* dest, int dstStride, int width, int height, const ClpRng& clpRng )
191
9.46k
{
192
4.09M
#define RECO_CORE_OP( ADDR ) dest[ADDR] = ClipPel( src1[ADDR] + src2[ADDR], clpRng )
193
9.46k
#define RECO_CORE_INC     \
194
9.46k
  src1 += src1Stride;     \
195
9.46k
  src2 += src2Stride;     \
196
9.46k
  dest +=  dstStride;     \
197
9.46k
198
4.09M
  SIZE_AWARE_PER_EL_OP( RECO_CORE_OP, RECO_CORE_INC );
199
200
9.46k
#undef RECO_CORE_OP
201
9.46k
#undef RECO_CORE_INC
202
9.46k
}
203
204
template<typename T>
205
void recoCore( const T* src1, const T* src2, T* dest, int numSamples, const ClpRng& clpRng )
206
2.38M
{
207
364M
  for( int n = 0; n < numSamples; n+=2)
208
362M
  {
209
362M
    dest[n]   = ClipPel( src1[n]   + src2[n], clpRng );
210
362M
    dest[n+1] = ClipPel( src1[n+1] + src2[n+1], clpRng );
211
362M
  }
212
2.38M
}
213
214
template<typename T>
215
void copyClipCore( const T* src, Pel* dst, int numSamples, const ClpRng& clpRng )
216
0
{
217
0
  for( int n = 0; n < numSamples; n+=2)
218
0
  {
219
0
    dst[n]   = ClipPel( src[n]   , clpRng );
220
0
    dst[n+1] = ClipPel( src[n+1] , clpRng );
221
0
  }
222
0
}
223
224
template< typename T >
225
void addAvgCore( const T* src1, const T* src2, T* dest, int numSamples, unsigned rshift, int offset, const ClpRng& clpRng )
226
0
{
227
0
  for( int n = 0; n < numSamples; n+=2)
228
0
  {
229
0
    dest[n]   = ClipPel( rightShiftU( ( src1[n]   + src2[n]   + offset ), rshift ), clpRng );
230
0
    dest[n+1] = ClipPel( rightShiftU( ( src1[n+1] + src2[n+1] + offset ), rshift ), clpRng );
231
0
  }
232
0
}
233
234
template< typename T >
235
void roundGeoCore( const T* src, T* dest, const int numSamples, unsigned rshift, int offset, const ClpRng &clpRng)
236
0
{
237
0
  for( int i = 0; i < numSamples; i+=2)
238
0
  {
239
0
    dest[i]   = ClipPel(rightShiftU(src[i  ] + offset, rshift), clpRng);
240
0
    dest[i+1] = ClipPel(rightShiftU(src[i+1] + offset, rshift), clpRng);
241
0
  }
242
0
}
243
244
template<typename T>
245
void linTfCore( const T* src, int srcStride, Pel* dst, int dstStride, int width, int height, int scale, unsigned shift, int offset, const ClpRng& clpRng, bool bClip )
246
217k
{
247
217k
#define LINTF_CORE_INC  \
248
217k
  src += srcStride;     \
249
217k
  dst += dstStride;     \
250
217k
251
217k
  if( bClip )
252
217k
  {
253
44.3M
#define LINTF_CORE_OP( ADDR ) dst[ADDR] = ( Pel ) ClipPel( rightShiftU( scale * src[ADDR], shift ) + offset, clpRng )
254
255
44.3M
  SIZE_AWARE_PER_EL_OP( LINTF_CORE_OP, LINTF_CORE_INC );
256
257
217k
#undef LINTF_CORE_OP
258
217k
  }
259
0
  else
260
0
  {
261
0
#define LINTF_CORE_OP( ADDR ) dst[ADDR] = ( Pel ) ( rightShiftU( scale * src[ADDR], shift ) + offset )
262
263
0
  SIZE_AWARE_PER_EL_OP( LINTF_CORE_OP, LINTF_CORE_INC );
264
265
0
#undef LINTF_CORE_OP
266
0
  }
267
217k
#undef LINTF_CORE_INC
268
217k
}
269
270
template<typename T, int N>
271
void transposeNxNCore( const Pel* src, int srcStride, Pel* dst, int dstStride )
272
7.73M
{
273
67.9M
  for( int i = 0; i < N; i++ )
274
60.2M
  {
275
535M
    for( int j = 0; j < N; j++ )
276
475M
    {
277
475M
      dst[j * dstStride] = src[j];
278
475M
    }
279
280
60.2M
    dst++;
281
60.2M
    src += srcStride;
282
60.2M
  }
283
7.73M
}
void vvenc::transposeNxNCore<short, 4>(short const*, int, short*, int)
Line
Count
Source
272
416k
{
273
2.08M
  for( int i = 0; i < N; i++ )
274
1.66M
  {
275
8.33M
    for( int j = 0; j < N; j++ )
276
6.66M
    {
277
6.66M
      dst[j * dstStride] = src[j];
278
6.66M
    }
279
280
1.66M
    dst++;
281
1.66M
    src += srcStride;
282
1.66M
  }
283
416k
}
void vvenc::transposeNxNCore<short, 8>(short const*, int, short*, int)
Line
Count
Source
272
7.31M
{
273
65.8M
  for( int i = 0; i < N; i++ )
274
58.5M
  {
275
526M
    for( int j = 0; j < N; j++ )
276
468M
    {
277
468M
      dst[j * dstStride] = src[j];
278
468M
    }
279
280
58.5M
    dst++;
281
58.5M
    src += srcStride;
282
58.5M
  }
283
7.31M
}
284
285
template<typename T>
286
void copyClipCore( const T* src, int srcStride, Pel* dst, int dstStride, int width, int height, const ClpRng& clpRng )
287
0
{
288
0
#define RECO_OP( ADDR ) dst[ADDR] = ClipPel( src[ADDR], clpRng )
289
0
#define RECO_INC      \
290
0
    src += srcStride; \
291
0
    dst += dstStride; \
292
0
293
0
  SIZE_AWARE_PER_EL_OP( RECO_OP, RECO_INC );
294
295
0
#undef RECO_OP
296
0
#undef RECO_INC
297
0
}
298
299
void copyBufferCore( const char* src, int srcStride, char* dst, int dstStride, int numBytes, int height)
300
9.08M
{
301
153M
  for( int i = 0; i < height; i++, src += srcStride, dst += dstStride )
302
143M
  {
303
143M
    memcpy( dst, src, numBytes );
304
143M
  }
305
9.08M
}
306
307
void applyLutCore( const Pel* src, const ptrdiff_t srcStride, Pel* dst, const ptrdiff_t dstStride, int width, int height, const Pel* lut )
308
0
{
309
0
#define RSP_SGNL_OP( ADDR ) dst[ADDR] = lut[src[ADDR]]
310
0
#define RSP_SGNL_INC        src      += srcStride; dst += dstStride;
311
312
0
  SIZE_AWARE_PER_EL_OP( RSP_SGNL_OP, RSP_SGNL_INC )
313
314
0
#undef RSP_SGNL_OP
315
0
#undef RSP_SGNL_INC
316
0
}
317
318
void fillMapPtr_Core( void** ptrMap, const ptrdiff_t mapStride, int width, int height, void* val )
319
472k
{
320
472k
  if( width == mapStride )
321
309k
  {
322
309k
    std::fill_n( ptrMap, width * height, val );
323
309k
  }
324
162k
  else
325
162k
  {
326
1.51M
    while( height-- )
327
1.35M
    {
328
1.35M
      std::fill_n( ptrMap, width, val );
329
1.35M
      ptrMap += mapStride;
330
1.35M
    }
331
162k
  }
332
472k
}
333
334
uint64_t AvgHighPassCore( const int width, const int height, const Pel* pSrc, const int iSrcStride)
335
11.6k
{
336
11.6k
  uint64_t saAct = 0;
337
950k
  for (int y = 1; y < height - 1; y++)
338
938k
  {
339
88.7M
    for (int x = 1; x < width - 1; x++) // center cols
340
87.8M
    {
341
87.8M
      const int s = 12 * (int) pSrc[x  ] - 2 * ((int) pSrc[x-1] + (int) pSrc[x+1] + (int) pSrc[x  -iSrcStride] + (int) pSrc[x  +iSrcStride])
342
87.8M
                             - ((int) pSrc[x-1-iSrcStride] + (int) pSrc[x+1-iSrcStride] + (int) pSrc[x-1+iSrcStride] + (int) pSrc[x+1+iSrcStride]);
343
87.8M
      saAct += abs (s);
344
87.8M
    }
345
938k
    pSrc += iSrcStride;
346
938k
  }
347
11.6k
  return saAct;
348
11.6k
}
349
350
uint64_t HDHighPassCore  (const int width, const int height,const Pel*  pSrc,const Pel* pSM1,const int iSrcStride,const int iSM1Stride)
351
0
{
352
0
  uint64_t taAct = 0;
353
0
  for (int y = 1; y < height - 1; y++)
354
0
  {
355
0
    for (int x = 1; x < width - 1; x++)  // cnt cols
356
0
    {
357
0
      const int t = (int) pSrc[x] - (int) pSM1[x];
358
0
      taAct += (1 + 3 * abs (t)) >> 1;
359
0
    }
360
0
    pSrc += iSrcStride;
361
0
    pSM1 += iSM1Stride;
362
0
  }
363
0
  return taAct;
364
0
}
365
366
uint64_t  HDHighPass2Core  (const int width, const int height,const Pel*  pSrc,const Pel* pSM1,const Pel* pSM2,const int iSrcStride,const int iSM1Stride,const int iSM2Stride)
367
0
{
368
0
  uint64_t taAct = 0;
369
0
  for (int y = 1; y < height - 1; y++)
370
0
  {
371
0
    for (int x = 1; x < width - 1; x++)  // cnt cols
372
0
    {
373
0
      const int t = (int) pSrc[x] - 2 * (int) pSM1[x] + (int) pSM2[x];
374
0
      taAct += abs (t);
375
0
    }
376
0
    pSrc += iSrcStride;
377
0
    pSM1 += iSM1Stride;
378
0
    pSM2 += iSM2Stride;
379
0
  }
380
0
  return taAct;
381
0
}
382
uint64_t AvgHighPassWithDownsamplingCore( const int width, const int height, const Pel* pSrc, const int iSrcStride)
383
0
{
384
0
  uint64_t saAct = 0;
385
0
  pSrc -= iSrcStride;
386
0
  pSrc -= iSrcStride;
387
0
 for (int y = 2; y < height - 2; y += 2)
388
0
 {
389
0
   for (int x = 2; x < width - 2; x += 2)
390
0
   {
391
0
     const int f = 12 * ((int)pSrc[ y   *iSrcStride + x  ] + (int)pSrc[ y   *iSrcStride + x+1] + (int)pSrc[(y+1)*iSrcStride + x  ] + (int)pSrc[(y+1)*iSrcStride + x+1])
392
0
                  - 3 * ((int)pSrc[(y-1)*iSrcStride + x  ] + (int)pSrc[(y-1)*iSrcStride + x+1] + (int)pSrc[(y+2)*iSrcStride + x  ] + (int)pSrc[(y+2)*iSrcStride + x+1])
393
0
                  - 3 * ((int)pSrc[ y   *iSrcStride + x-1] + (int)pSrc[ y   *iSrcStride + x+2] + (int)pSrc[(y+1)*iSrcStride + x-1] + (int)pSrc[(y+1)*iSrcStride + x+2])
394
0
                  - 2 * ((int)pSrc[(y-1)*iSrcStride + x-1] + (int)pSrc[(y-1)*iSrcStride + x+2] + (int)pSrc[(y+2)*iSrcStride + x-1] + (int)pSrc[(y+2)*iSrcStride + x+2])
395
0
                      - ((int)pSrc[(y-2)*iSrcStride + x-1] + (int)pSrc[(y-2)*iSrcStride + x  ] + (int)pSrc[(y-2)*iSrcStride + x+1] + (int)pSrc[(y-2)*iSrcStride + x+2]
396
0
                       + (int)pSrc[(y+3)*iSrcStride + x-1] + (int)pSrc[(y+3)*iSrcStride + x  ] + (int)pSrc[(y+3)*iSrcStride + x+1] + (int)pSrc[(y+3)*iSrcStride + x+2]
397
0
                       + (int)pSrc[(y-1)*iSrcStride + x-2] + (int)pSrc[ y   *iSrcStride + x-2] + (int)pSrc[(y+1)*iSrcStride + x-2] + (int)pSrc[(y+2)*iSrcStride + x-2]
398
0
                       + (int)pSrc[(y-1)*iSrcStride + x+3] + (int)pSrc[ y   *iSrcStride + x+3] + (int)pSrc[(y+1)*iSrcStride + x+3] + (int)pSrc[(y+2)*iSrcStride + x+3]);
399
0
     saAct += (uint64_t) abs(f);
400
0
   }
401
0
 }
402
0
 return saAct;
403
0
}
404
uint64_t AvgHighPassWithDownsamplingDiff1stCore (const int width, const int  height, const Pel* pSrc,const Pel* pSrcM1, const int iSrcStride, const int iSrcM1Stride)
405
0
{
406
0
  uint64_t taAct = 0;
407
0
  pSrc -= iSrcStride;
408
0
  pSrc -= iSrcStride;
409
0
  pSrcM1-=iSrcM1Stride;
410
0
  pSrcM1-=iSrcM1Stride;
411
412
0
  for (uint32_t y = 2; y < height-2; y += 2)
413
0
  {
414
0
    for (uint32_t x = 2; x < width-2; x += 2)
415
0
    {
416
0
      const int t = (int)pSrc  [y*iSrcStride + x] + (int)pSrc  [y*iSrcStride + x+1] + (int)pSrc  [(y+1)*iSrcStride + x] + (int)pSrc  [(y+1)*iSrcStride + x+1]
417
0
                 - ((int)pSrcM1[y*iSrcM1Stride + x] + (int)pSrcM1[y*iSrcM1Stride + x+1] + (int)pSrcM1[(y+1)*iSrcM1Stride + x] + (int)pSrcM1[(y+1)*iSrcM1Stride + x+1]);
418
0
      taAct += (1 + 3 * abs (t)) >> 1;
419
0
    }
420
0
  }
421
0
  return (taAct );
422
0
}
423
424
uint64_t AvgHighPassWithDownsamplingDiff2ndCore (const int width,const int height,const Pel* pSrc,const Pel* pSrcM1,const Pel* pSrcM2,const int iSrcStride,const int iSM1Stride,const int iSM2Stride)
425
0
{
426
0
  uint64_t taAct = 0;
427
428
0
  pSrc -= iSrcStride;
429
0
  pSrc -= iSrcStride;
430
0
  pSrcM1-=iSM1Stride;
431
0
  pSrcM1-=iSM1Stride;
432
0
  pSrcM2-=iSM2Stride;
433
0
  pSrcM2-=iSM2Stride;
434
435
0
  for (uint32_t y = 2; y < height-2; y += 2)
436
0
  {
437
0
    for (uint32_t x = 2; x < width-2; x += 2)
438
0
    {
439
0
      const int t = (int)pSrc  [y*iSrcStride + x] + (int)pSrc  [y*iSrcStride + x+1] + (int)pSrc  [(y+1)*iSrcStride + x] + (int)pSrc  [(y+1)*iSrcStride + x+1]
440
0
                            - 2 * ((int)pSrcM1[y*iSM1Stride + x] + (int)pSrcM1[y*iSM1Stride + x+1] + (int)pSrcM1[(y+1)*iSM1Stride + x] + (int)pSrcM1[(y+1)*iSM1Stride + x+1])
441
0
                            + (int)pSrcM2[y*iSM2Stride + x] + (int)pSrcM2[y*iSM2Stride + x+1] + (int)pSrcM2[(y+1)*iSM2Stride + x] + (int)pSrcM2[(y+1)*iSM2Stride + x+1];
442
0
      taAct += (uint64_t) abs(t);
443
0
    }
444
0
  }
445
0
  return (taAct);
446
0
}
447
448
PelBufferOps::PelBufferOps()
449
12
{
450
12
  isInitX86Done = false;
451
452
12
  addAvg            = addAvgCore<Pel>;
453
12
  reco              = recoCore<Pel>;
454
12
  copyClip          = copyClipCore<Pel>;
455
12
  roundGeo          = roundGeoCore<Pel>;
456
457
12
  addAvg4           = addAvgCore<Pel>;
458
12
  addAvg8           = addAvgCore<Pel>;
459
12
  addAvg16          = addAvgCore<Pel>;
460
461
12
  sub4              = subsCore<Pel>;
462
12
  sub8              = subsCore<Pel>;
463
464
12
  wghtAvg4          = addWeightedAvgCore<Pel>;
465
12
  wghtAvg8          = addWeightedAvgCore<Pel>;
466
467
12
  copyClip4         = copyClipCore<Pel>;
468
12
  copyClip8         = copyClipCore<Pel>;
469
470
12
  reco4             = reconstructCore<Pel>;
471
12
  reco8             = reconstructCore<Pel>;
472
473
12
  linTf4            = linTfCore<Pel>;
474
12
  linTf8            = linTfCore<Pel>;
475
476
12
  copyBuffer        = copyBufferCore;
477
478
12
  removeHighFreq8   = removeHighFreq;
479
12
  removeHighFreq4   = removeHighFreq;
480
481
12
  transpose4x4      = transposeNxNCore<Pel,4>;
482
12
  transpose8x8      = transposeNxNCore<Pel,8>;
483
12
  mipMatrixMul_4_4  = mipMatrixMulCore<4,4>;
484
12
  mipMatrixMul_8_4  = mipMatrixMulCore<8,4>;
485
12
  mipMatrixMul_8_8  = mipMatrixMulCore<8,8>;
486
12
  weightCiip        = weightCiipCore;
487
12
  roundIntVector    = nullptr;
488
489
12
  applyLut          = applyLutCore;
490
491
12
  fillPtrMap        = fillMapPtr_Core;
492
12
  AvgHighPassWithDownsampling = AvgHighPassWithDownsamplingCore;
493
12
  AvgHighPass = AvgHighPassCore;
494
12
  AvgHighPassWithDownsamplingDiff1st = AvgHighPassWithDownsamplingDiff1stCore;
495
12
  AvgHighPassWithDownsamplingDiff2nd = AvgHighPassWithDownsamplingDiff2ndCore;
496
12
  HDHighPass = HDHighPassCore;
497
12
  HDHighPass2 = HDHighPass2Core;
498
12
}
499
500
PelBufferOps g_pelBufOP = PelBufferOps();
501
502
template<>
503
void AreaBuf<Pel>::addWeightedAvg(const AreaBuf<const Pel>& other1, const AreaBuf<const Pel>& other2, const ClpRng& clpRng, const int8_t BcwIdx)
504
0
{
505
0
  const int8_t w0 = getBcwWeight( BcwIdx, REF_PIC_LIST_0 );
506
0
  const int8_t w1 = getBcwWeight( BcwIdx, REF_PIC_LIST_1 );
507
0
  const int8_t log2WeightBase = g_BcwLog2WeightBase;
508
0
  const Pel* src0 = other1.buf;
509
0
  const Pel* src2 = other2.buf;
510
0
        Pel* dest =        buf;
511
512
0
  const int src1Stride = other1.stride;
513
0
  const int src2Stride = other2.stride;
514
0
  const int destStride =        stride;
515
0
  const int clipbd     = clpRng.bd;
516
0
  const int shiftNum   = std::max<int>( 2, ( IF_INTERNAL_PREC - clipbd ) ) + log2WeightBase;
517
0
  const int offset     = ( 1 << ( shiftNum - 1 ) ) + ( IF_INTERNAL_OFFS << log2WeightBase );
518
519
0
  if( ( width & 7 ) == 0 )
520
0
  {
521
0
    g_pelBufOP.wghtAvg8( src0, src1Stride, src2, src2Stride, dest, destStride, width, height, shiftNum, offset, w0, w1, clpRng );
522
0
  }
523
0
  else if( ( width & 3 ) == 0 )
524
0
  {
525
0
    g_pelBufOP.wghtAvg4( src0, src1Stride, src2, src2Stride, dest, destStride, width, height, shiftNum, offset, w0, w1, clpRng );
526
0
  }
527
0
  else
528
0
  {
529
0
#define WGHT_AVG_OP( ADDR ) dest[ADDR] = ClipPel( rightShiftU( ( src0[ADDR]*w0 + src2[ADDR]*w1 + offset ), shiftNum ), clpRng )
530
0
#define WGHT_AVG_INC    \
531
0
    src0 += src1Stride; \
532
0
    src2 += src2Stride; \
533
0
    dest += destStride; \
534
0
535
0
    SIZE_AWARE_PER_EL_OP( WGHT_AVG_OP, WGHT_AVG_INC );
536
537
0
#undef WGHT_AVG_OP
538
0
#undef WGHT_AVG_INC
539
0
  }
540
0
}
541
542
template<>
543
void AreaBuf<Pel>::rspSignal( const Pel* pLUT)
544
0
{
545
0
  g_pelBufOP.applyLut( buf, stride, buf, stride, width, height, pLUT );
546
0
}
547
548
549
template<>
550
void AreaBuf<Pel>::rspSignal( const AreaBuf<const Pel>& other, const Pel* pLUT)
551
0
{
552
0
  g_pelBufOP.applyLut( other.buf, other.stride, buf, stride, width, height, pLUT );
553
0
}
554
555
template<>
556
void AreaBuf<Pel>::scaleSignal(const int scale, const bool dir, const ClpRng& clpRng)
557
0
{
558
0
        Pel* dst = buf;
559
0
  const Pel* src = buf;
560
0
  const int maxAbsclipBD = (1<<clpRng.bd) - 1;
561
562
0
  if (dir) // forward
563
0
  {
564
0
    if (width == 1)
565
0
    {
566
0
      THROW("Blocks of width = 1 not supported");
567
0
    }
568
0
    else
569
0
    {
570
0
      for (unsigned y = 0; y < height; y++)
571
0
      {
572
0
        for (unsigned x = 0; x < width; x++)
573
0
        {
574
0
          int sign = src[x] >= 0 ? 1 : -1;
575
0
          int absval = sign * src[x];
576
0
          dst[x] = (Pel)Clip3(-maxAbsclipBD, maxAbsclipBD, sign * (((absval << CSCALE_FP_PREC) + (scale >> 1)) / scale));
577
0
        }
578
0
        dst += stride;
579
0
        src += stride;
580
0
      }
581
0
    }
582
0
  }
583
0
  else // inverse
584
0
  {
585
0
    for (unsigned y = 0; y < height; y++)
586
0
    {
587
0
      for (unsigned x = 0; x < width; x++)
588
0
      {
589
0
        int val    = Clip3<int>((-maxAbsclipBD - 1), maxAbsclipBD, (int)src[x]);
590
0
        int sign   = src[x] >= 0 ? 1 : -1;
591
0
        int absval = sign * val;
592
0
               val = sign * ((absval * scale + (1 << (CSCALE_FP_PREC - 1))) >> CSCALE_FP_PREC);
593
0
        if (sizeof(Pel) == 2) // avoid overflow when storing data
594
0
        {
595
0
          val = Clip3<int>(-32768, 32767, val);
596
0
        }
597
0
        dst[x] = (Pel)val;
598
0
      }
599
0
      dst += stride;
600
0
      src += stride;
601
0
    }
602
0
  }
603
0
}
604
605
template<>
606
void AreaBuf<Pel>::addAvg( const AreaBuf<const Pel>& other1, const AreaBuf<const Pel>& other2, const ClpRng& clpRng)
607
0
{
608
0
  const Pel* src0 = other1.buf;
609
0
  const Pel* src2 = other2.buf;
610
0
        Pel* dest =        buf;
611
612
0
  const unsigned src1Stride = other1.stride;
613
0
  const unsigned src2Stride = other2.stride;
614
0
  const unsigned destStride =        stride;
615
0
  const int      clipbd     = clpRng.bd;
616
0
  const unsigned shiftNum   = std::max<int>(2, (IF_INTERNAL_PREC - clipbd)) + 1;
617
0
  const int      offset     = (1 << (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS;
618
619
0
#if ENABLE_SIMD_OPT_BUFFER
620
0
  if( destStride == width )
621
0
  {
622
0
    g_pelBufOP.addAvg(src0, src2, dest, width * height, shiftNum, offset, clpRng);
623
0
  }
624
0
  else if ((width & 15) == 0)
625
0
  {
626
0
    g_pelBufOP.addAvg16(src0, src1Stride, src2, src2Stride, dest, destStride, width, height, shiftNum, offset, clpRng);
627
0
  }
628
0
  else if( ( width & 7 ) == 0 )
629
0
  {
630
0
    g_pelBufOP.addAvg8( src0, src1Stride, src2, src2Stride, dest, destStride, width, height, shiftNum, offset, clpRng );
631
0
  }
632
0
  else if( ( width & 3 ) == 0 )
633
0
  {
634
0
    g_pelBufOP.addAvg4( src0, src1Stride, src2, src2Stride, dest, destStride, width, height, shiftNum, offset, clpRng );
635
0
  }
636
0
  else
637
0
#endif
638
0
  {
639
0
#define ADD_AVG_OP( ADDR ) dest[ADDR] = ClipPel( rightShiftU( ( src0[ADDR] + src2[ADDR] + offset ), shiftNum ), clpRng )
640
0
#define ADD_AVG_INC     \
641
0
    src0 += src1Stride; \
642
0
    src2 += src2Stride; \
643
0
    dest += destStride; \
644
0
645
0
    SIZE_AWARE_PER_EL_OP( ADD_AVG_OP, ADD_AVG_INC );
646
647
0
#undef ADD_AVG_OP
648
0
#undef ADD_AVG_INC
649
0
  }
650
0
}
651
652
template<>
653
void AreaBuf<Pel>::subtract( const AreaBuf<const Pel>& minuend, const AreaBuf<const Pel>& subtrahend )
654
849k
{
655
849k
  CHECKD( width  != minuend.width,     "Incompatible size" );
656
849k
  CHECKD( height != minuend.height,    "Incompatible size" );
657
849k
  CHECKD( width  != subtrahend.width,  "Incompatible size");
658
849k
  CHECKD( height != subtrahend.height, "Incompatible size");
659
  
660
849k
        Pel* dest =            buf;
661
849k
  const Pel* mins = minuend   .buf;
662
849k
  const Pel* subs = subtrahend.buf;
663
664
665
849k
#if ENABLE_SIMD_OPT_BUFFER
666
849k
  const unsigned destStride =            stride;
667
849k
  const unsigned minsStride = minuend.   stride;
668
849k
  const unsigned subsStride = subtrahend.stride;
669
670
849k
  if( ( width & 7 ) == 0 )
671
720k
  {
672
720k
    g_pelBufOP.sub8( mins, minsStride, subs, subsStride, dest, destStride, width, height );
673
720k
  }
674
129k
  else if( ( width & 3 ) == 0 )
675
129k
  {
676
129k
    g_pelBufOP.sub4( mins, minsStride, subs, subsStride, dest, destStride, width, height );
677
129k
  }
678
18.4E
  else
679
18.4E
#endif
680
18.4E
  {
681
18.4E
#define SUBS_INC                \
682
18.4E
    dest +=            stride;  \
683
18.4E
    mins += minuend   .stride;  \
684
18.4E
    subs += subtrahend.stride;  \
685
18.4E
686
18.4E
#define SUBS_OP( ADDR ) dest[ADDR] = mins[ADDR] - subs[ADDR]
687
688
18.4E
    SIZE_AWARE_PER_EL_OP( SUBS_OP, SUBS_INC );
689
690
18.4E
#undef SUBS_OP
691
18.4E
#undef SUBS_INC
692
18.4E
  }
693
849k
}
694
695
template<>
696
void AreaBuf<const Pel>::calcVarianceSplit( const AreaBuf<const Pel>& Org, const uint32_t  size, int& varh,int& varv) const
697
0
{
698
0
  CHECK( Org.width != Org.height, "Incompatible size!" );
699
0
  int stride = Org.stride;
700
0
  const Pel* src;
701
0
  Pel data;
702
0
  double variance=0;
703
0
  double mean=0;
704
0
  int64_t sum[4]={0,0,0,0};
705
0
  int64_t sum_sqr[4]={0,0,0,0};
706
0
  uint32_t halfsize =size>>1;
707
0
  uint32_t off[4]={0,halfsize,size*halfsize,size*halfsize+halfsize};
708
0
  int n,x,y;
709
710
0
  for( n = 0; n < 4; n++)
711
0
  {
712
0
    src = Org.buf+off[n];
713
0
    for( y = 0; y < halfsize; y++)
714
0
    {
715
0
      for(x = 0; x < halfsize; x++)
716
0
      {
717
0
        data=src[y*stride+x];
718
0
        sum[n]+=data;
719
0
        sum_sqr[n]+= data*data;
720
0
      }
721
0
    }
722
0
  }
723
0
  int num=size*(size>>1);
724
  // varhu
725
0
  mean=(double)(sum[0]+sum[1])/(num);
726
0
  variance =  (double)(sum_sqr[0]+sum_sqr[1])/(num) - (mean*mean);
727
0
  varh =(int)(variance+0.5);
728
  // varhl
729
0
  mean=(double)(sum[2]+sum[3])/(num);
730
0
  variance =  (double)(sum_sqr[2]+sum_sqr[3])/(num) - (mean*mean);
731
0
  varh +=(int)(variance+0.5);
732
  // varvl
733
0
  mean=(double)(sum[0]+sum[2])/(num);
734
0
  variance =  (double)(sum_sqr[0]+sum_sqr[2])/(num) - (mean*mean);
735
0
  varv =(int)(variance+0.5);
736
  // varvr
737
0
  mean=(double)(sum[1]+sum[3])/(num);
738
0
  variance =  (double)(sum_sqr[1]+sum_sqr[3])/(num) - (mean*mean);
739
0
  varv +=(int)(variance+0.5);
740
0
}
741
742
template<>
743
void AreaBuf<Pel>::copyClip( const AreaBuf<const Pel>& src, const ClpRng& clpRng )
744
0
{
745
0
  const Pel* srcp = src.buf;
746
0
        Pel* dest =     buf;
747
748
0
  const unsigned srcStride  = src.stride;
749
0
  const unsigned destStride = stride;
750
751
0
  if( destStride == width)
752
0
  {
753
0
    g_pelBufOP.copyClip(srcp, dest, width * height, clpRng);
754
0
  }
755
0
  else if ((width & 7) == 0)
756
0
  {
757
0
    g_pelBufOP.copyClip8(srcp, srcStride, dest, destStride, width, height, clpRng);
758
0
  }
759
0
  else if ((width & 3) == 0)
760
0
  {
761
0
    g_pelBufOP.copyClip4(srcp, srcStride, dest, destStride, width, height, clpRng);
762
0
  }
763
0
  else
764
0
  {
765
0
    for( int y = 0; y < height; y++ )
766
0
    {
767
0
      dest[0] = ClipPel( srcp[0], clpRng);
768
0
      dest[1] = ClipPel( srcp[1], clpRng);
769
0
      srcp += srcStride;
770
0
      dest += destStride;
771
0
    }                                                         \
772
0
  }
773
0
}
774
775
template<>
776
void AreaBuf<Pel>::reconstruct( const AreaBuf<const Pel>& pred, const AreaBuf<const Pel>& resi, const ClpRng& clpRng )
777
2.39M
{
778
2.39M
  const Pel* src1 = pred.buf;
779
2.39M
  const Pel* src2 = resi.buf;
780
2.39M
        Pel* dest =      buf;
781
782
2.39M
  const unsigned src1Stride = pred.stride;
783
2.39M
  const unsigned src2Stride = resi.stride;
784
2.39M
  const unsigned destStride =      stride;
785
2.39M
  if( src2Stride == width )
786
2.38M
  {
787
2.38M
    g_pelBufOP.reco( pred.buf, resi.buf, buf, width * height, clpRng );
788
2.38M
  }
789
9.46k
  else if( ( width & 7 ) == 0 )
790
5.95k
  {
791
5.95k
    g_pelBufOP.reco8( src1, src1Stride, src2, src2Stride, dest, destStride, width, height, clpRng );
792
5.95k
  }
793
3.51k
  else if( ( width & 3 ) == 0 )
794
3.51k
  {
795
3.51k
    g_pelBufOP.reco4( src1, src1Stride, src2, src2Stride, dest, destStride, width, height, clpRng );
796
3.51k
  }
797
0
  else if( ( width & 1 ) == 0 )
798
0
  {
799
0
    for( int y = 0; y < height; y++ )
800
0
    {
801
0
      dest[0] = ClipPel( src1[0] + src2[0], clpRng);
802
0
      dest[1] = ClipPel( src1[1] + src2[1], clpRng);
803
0
      src1 += src1Stride;
804
0
      src2 += src2Stride;
805
0
      dest += destStride;
806
0
    }                        
807
0
  }
808
0
  else
809
0
  {
810
0
    CHECKD( width != 1, "Expecting width to be '1'!" );
811
812
0
    for( int y = 0; y < height; y++ )
813
0
    {
814
0
      dest[0] = ClipPel( src1[0] + src2[0], clpRng );
815
816
0
      src1 += src1Stride;
817
0
      src2 += src2Stride;
818
0
      dest += destStride;
819
0
    }
820
0
  }
821
2.39M
}
822
823
template<>
824
void AreaBuf<Pel>::linearTransform( const int scale, const unsigned shift, const int offset, bool bClip, const ClpRng& clpRng )
825
217k
{
826
217k
  const Pel* src = buf;
827
217k
        Pel* dst = buf;
828
829
217k
  if( stride == width)
830
217k
  {
831
217k
    if( width > 2 && height > 2 )
832
204k
    {
833
204k
      g_pelBufOP.linTf8( src, stride<<2, dst, stride<<2, width<<2, height>>2, scale, shift, offset, clpRng, bClip );
834
204k
    }
835
12.9k
    else
836
12.9k
    {
837
12.9k
      g_pelBufOP.linTf4( src, stride<<1, dst, stride<<1, width<<1, height>>1, scale, shift, offset, clpRng, bClip );
838
12.9k
    }
839
217k
  }
840
0
  else if( ( width & 7 ) == 0 )
841
0
  {
842
0
    g_pelBufOP.linTf8( src, stride, dst, stride, width, height, scale, shift, offset, clpRng, bClip );
843
0
  }
844
0
  else if( ( width & 3 ) == 0 )
845
0
  {
846
0
    g_pelBufOP.linTf4( src, stride, dst, stride, width, height, scale, shift, offset, clpRng, bClip );
847
0
  }
848
0
  else
849
0
  {
850
0
    if( bClip )
851
0
    {
852
0
      for( int y = 0; y < height; y++ )
853
0
      {
854
0
        dst[0] = ( Pel ) ClipPel( rightShiftU( scale * src[0], shift ) + offset, clpRng );
855
0
        dst[1] = ( Pel ) ClipPel( rightShiftU( scale * src[1], shift ) + offset, clpRng );
856
0
        src += stride;
857
0
        dst += stride;
858
0
      }
859
0
    }
860
0
    else
861
0
    {
862
0
      for( int y = 0; y < height; y++ )
863
0
      {
864
0
        dst[0] = ( Pel ) ( rightShiftU( scale * src[0], shift ) + offset );
865
0
        dst[1] = ( Pel ) ( rightShiftU( scale * src[1], shift ) + offset );
866
0
        src += stride;
867
0
        dst += stride;
868
0
      }
869
0
    }
870
0
  }
871
217k
}
872
873
#if ENABLE_SIMD_OPT_BUFFER
874
875
template<>
876
void AreaBuf<Pel>::transposedFrom( const AreaBuf<const Pel>& other )
877
639k
{
878
639k
  CHECK( width != other.height || height != other.width, "Incompatible size" );
879
880
639k
  if( ( ( width | height ) & 7 ) == 0 )
881
518k
  {
882
518k
    const Pel* src = other.buf;
883
884
2.12M
    for( unsigned y = 0; y < other.height; y += 8 )
885
1.60M
    {
886
1.60M
      Pel* dst = buf + y;
887
888
8.92M
      for( unsigned x = 0; x < other.width; x += 8 )
889
7.31M
      {
890
7.31M
        g_pelBufOP.transpose8x8( &src[x], other.stride, dst, stride );
891
892
7.31M
        dst += 8 * stride;
893
7.31M
      }
894
895
1.60M
      src += 8 * other.stride;
896
1.60M
    }
897
518k
  }
898
121k
  else if( ( ( width | height ) & 3 ) == 0 )
899
110k
  {
900
110k
    const Pel* src = other.buf;
901
902
327k
    for( unsigned y = 0; y < other.height; y += 4 )
903
216k
    {
904
216k
      Pel* dst = buf + y;
905
906
633k
      for( unsigned x = 0; x < other.width; x += 4 )
907
416k
      {
908
416k
        g_pelBufOP.transpose4x4( &src[x], other.stride, dst, stride );
909
910
416k
        dst += 4 * stride;
911
416k
      }
912
913
216k
      src += 4 * other.stride;
914
216k
    }
915
110k
  }
916
10.3k
  else
917
10.3k
  {
918
10.3k
          Pel* dst =       buf;
919
10.3k
    const Pel* src = other.buf;
920
10.3k
    width          = other.height;
921
10.3k
    height         = other.width;
922
10.3k
    stride         = stride < width ? width : stride;
923
924
129k
    for( unsigned y = 0; y < other.height; y++ )
925
118k
    {
926
356k
      for( unsigned x = 0; x < other.width; x++ )
927
237k
      {
928
237k
        dst[y + x*stride] = src[x + y * other.stride];
929
237k
      }
930
118k
    }
931
10.3k
  }
932
639k
}
933
#endif
934
935
template<>
936
void AreaBuf<Pel>::weightCiip( const AreaBuf<const Pel>& intra, const int numIntra )
937
0
{
938
0
  CHECK(width == 2, "Width of 2 is not supported");
939
0
  g_pelBufOP.weightCiip( buf, intra.buf, width * height, numIntra );
940
0
}
941
942
template<>
943
void AreaBuf<MotionInfo>::fill( const MotionInfo& val )
944
24.3k
{
945
24.3k
  if( width == stride )
946
24.3k
  {
947
24.3k
    std::fill_n( buf, width * height, val );
948
24.3k
  }
949
0
  else
950
0
  {
951
0
    MotionInfo* dst = buf;
952
953
0
    for( int y = 0; y < height; y++, dst += stride )
954
0
    {
955
0
      std::fill_n( dst, width, val );
956
0
    }
957
0
  }
958
24.3k
}
959
960
PelStorage::PelStorage()
961
4.42M
{
962
17.6M
  for( uint32_t i = 0; i < MAX_NUM_COMP; i++ )
963
13.2M
  {
964
13.2M
    m_origin[i] = nullptr;
965
13.2M
  }
966
4.42M
}
967
968
PelStorage::~PelStorage()
969
4.42M
{
970
4.42M
  destroy();
971
4.42M
}
972
973
void PelStorage::create( const UnitArea& _UnitArea )
974
1.95M
{
975
1.95M
  create( _UnitArea.chromaFormat, _UnitArea.blocks[0] );
976
1.95M
  m_maxArea = _UnitArea;
977
1.95M
}
978
979
void PelStorage::create( const ChromaFormat &_chromaFormat, const Area& _area )
980
3.88M
{
981
3.88M
  CHECK( !bufs.empty(), "Trying to re-create an already initialized buffer" );
982
983
3.88M
  chromaFormat = _chromaFormat;
984
985
3.88M
  const uint32_t numComp = getNumberValidComponents( _chromaFormat );
986
987
3.88M
  uint32_t bufSize = 0;
988
13.8M
  for( uint32_t i = 0; i < numComp; i++ )
989
9.98M
  {
990
9.98M
    const ComponentID compID = ComponentID( i );
991
9.98M
    const unsigned totalWidth  = _area.width  >> getComponentScaleX( compID, _chromaFormat );
992
9.98M
    const unsigned totalHeight = _area.height >> getComponentScaleY( compID, _chromaFormat );
993
994
9.98M
    const uint32_t area = totalWidth * totalHeight;
995
9.98M
    CHECK( !area, "Trying to create a buffer with zero area" );
996
9.98M
    bufSize += area;
997
9.98M
  }
998
999
3.88M
  bufSize += 1; // for SIMD DMVR on the bottom right corner, which overreads the lines by 1 sample
1000
1001
  //allocate one buffer
1002
3.88M
  m_origin[0] = ( Pel* ) xMalloc( Pel, bufSize );
1003
1004
3.88M
  Pel* topLeft = m_origin[0];
1005
13.8M
  for( uint32_t i = 0; i < numComp; i++ )
1006
9.98M
  {
1007
9.98M
    const ComponentID compID = ComponentID( i );
1008
9.98M
    const unsigned totalWidth  = _area.width  >> getComponentScaleX( compID, _chromaFormat );
1009
9.98M
    const unsigned totalHeight = _area.height >> getComponentScaleY( compID, _chromaFormat );
1010
9.98M
    const uint32_t area = totalWidth * totalHeight;
1011
1012
9.98M
    bufs.push_back( PelBuf( topLeft, totalWidth, totalWidth, totalHeight ) );
1013
9.98M
    topLeft += area;
1014
9.98M
  }
1015
1016
3.88M
  m_maxArea = UnitArea( _chromaFormat, _area );
1017
3.88M
}
1018
1019
void PelStorage::create( const ChromaFormat &_chromaFormat, const Area& _area, const unsigned _maxCUSize, const unsigned _margin, const unsigned _alignment, const bool _scaleChromaMargin )
1020
180k
{
1021
180k
  CHECK( !bufs.empty(), "Trying to re-create an already initialized buffer" );
1022
1023
180k
  chromaFormat = _chromaFormat;
1024
1025
180k
  const uint32_t numComp = getNumberValidComponents( _chromaFormat );
1026
1027
180k
  unsigned extHeight = _area.height;
1028
180k
  unsigned extWidth  = _area.width;
1029
1030
180k
  if( _maxCUSize )
1031
33.7k
  {
1032
33.7k
    extHeight = ( ( _area.height + _maxCUSize - 1 ) / _maxCUSize ) * _maxCUSize;
1033
33.7k
    extWidth  = ( ( _area.width  + _maxCUSize - 1 ) / _maxCUSize ) * _maxCUSize;
1034
33.7k
  }
1035
1036
638k
  for( uint32_t i = 0; i < numComp; i++ )
1037
458k
  {
1038
458k
    const ComponentID compID = ComponentID( i );
1039
458k
    const unsigned scaleX = getComponentScaleX( compID, _chromaFormat );
1040
458k
    const unsigned scaleY = getComponentScaleY( compID, _chromaFormat );
1041
1042
458k
    unsigned scaledHeight = extHeight >> scaleY;
1043
458k
    unsigned scaledWidth  = extWidth  >> scaleX;
1044
458k
    unsigned ymargin      = _margin >> (_scaleChromaMargin?scaleY:0);
1045
458k
    unsigned xmargin      = _margin >> (_scaleChromaMargin?scaleX:0);
1046
458k
    unsigned totalWidth   = scaledWidth + 2*xmargin;
1047
458k
    unsigned totalHeight  = scaledHeight +2*ymargin;
1048
1049
458k
    if( _alignment )
1050
257k
    {
1051
      // make sure buffer lines are align
1052
257k
      CHECK( _alignment != MEMORY_ALIGN_DEF_SIZE, "Unsupported alignment" );
1053
257k
      totalWidth = ( ( totalWidth + _alignment - 1 ) / _alignment ) * _alignment;
1054
257k
    }
1055
458k
    uint32_t area = totalWidth * totalHeight;
1056
458k
    CHECK( !area, "Trying to create a buffer with zero area" );
1057
1058
458k
    m_origin[i] = ( Pel* ) xMalloc( Pel, area );
1059
458k
    Pel* topLeft = m_origin[i] + totalWidth * ymargin + xmargin;
1060
458k
    bufs.push_back( PelBuf( topLeft, totalWidth, _area.width >> scaleX, _area.height >> scaleY ) );
1061
458k
  }
1062
1063
180k
  m_maxArea = UnitArea( _chromaFormat, _area );
1064
180k
}
1065
1066
void PelStorage::createFromBuf( PelUnitBuf buf )
1067
2.59k
{
1068
2.59k
  chromaFormat = buf.chromaFormat;
1069
1070
2.59k
  const uint32_t numCh = getNumberValidComponents( chromaFormat );
1071
1072
2.59k
  bufs.resize(numCh);
1073
1074
10.3k
  for( uint32_t i = 0; i < numCh; i++ )
1075
7.78k
  {
1076
7.78k
    PelBuf cPelBuf = buf.get( ComponentID( i ) );
1077
7.78k
    bufs[i] = PelBuf( cPelBuf.bufAt( 0, 0 ), cPelBuf.stride, cPelBuf.width, cPelBuf.height );
1078
7.78k
  }
1079
2.59k
}
1080
1081
void PelStorage::compactResize( const UnitArea& area )
1082
2.38M
{
1083
2.38M
  CHECK( bufs.size() < area.blocks.size(), "Cannot increase buffer size when compacting!" );
1084
1085
8.07M
  for( uint32_t i = 0; i < area.blocks.size(); i++ )
1086
5.68M
  {
1087
5.68M
    CHECK( m_maxArea.blocks[i].area() < area.blocks[i].area(), "Cannot increase buffer size when compacting!" );
1088
1089
5.68M
    bufs[i].Size::operator=( area.blocks[i].size() );
1090
5.68M
    bufs[i].stride = bufs[i].width;
1091
5.68M
  }
1092
2.38M
}
1093
1094
void PelStorage::takeOwnership( PelStorage& other )
1095
0
{
1096
0
  chromaFormat = other.chromaFormat;
1097
1098
0
  const uint32_t numCh = getNumberValidComponents( chromaFormat );
1099
1100
0
  bufs.resize(numCh);
1101
1102
0
  for( uint32_t i = 0; i < numCh; i++ )
1103
0
  {
1104
0
    PelBuf cPelBuf = other.get( ComponentID( i ) );
1105
0
    bufs[i] = PelBuf( cPelBuf.bufAt( 0, 0 ), cPelBuf.stride, cPelBuf.width, cPelBuf.height );
1106
0
    std::swap( m_origin[i], other.m_origin[i]);
1107
0
  }
1108
1109
0
  m_maxArea = other.m_maxArea;
1110
1111
0
  other.destroy();
1112
0
}
1113
1114
1115
void PelStorage::swap( PelStorage& other )
1116
0
{
1117
0
  const uint32_t numCh = getNumberValidComponents( chromaFormat );
1118
1119
0
  for( uint32_t i = 0; i < numCh; i++ )
1120
0
  {
1121
    // check this otherwise it would turn out to get very weird
1122
0
    CHECK( chromaFormat                   != other.chromaFormat                  , "Incompatible formats" );
1123
0
    CHECK( get( ComponentID( i ) )        != other.get( ComponentID( i ) )       , "Incompatible formats" );
1124
0
    CHECK( get( ComponentID( i ) ).stride != other.get( ComponentID( i ) ).stride, "Incompatible formats" );
1125
1126
0
    std::swap( bufs[i].buf,    other.bufs[i].buf );
1127
0
    std::swap( bufs[i].stride, other.bufs[i].stride );
1128
0
    std::swap( m_origin[i],    other.m_origin[i] );
1129
0
  }
1130
0
}
1131
1132
void PelStorage::destroy()
1133
8.46M
{
1134
8.46M
  chromaFormat = NUM_CHROMA_FORMAT;
1135
33.8M
  for( uint32_t i = 0; i < MAX_NUM_COMP; i++ )
1136
25.4M
  {
1137
25.4M
    if( m_origin[i] )
1138
4.34M
    {
1139
4.34M
      xFree( m_origin[i] );
1140
4.34M
      m_origin[i] = nullptr;
1141
4.34M
    }
1142
25.4M
  }
1143
8.46M
  bufs.clear();
1144
8.46M
}
1145
1146
PelBuf PelStorage::getBuf( const ComponentID CompID )
1147
17.1k
{
1148
17.1k
  return bufs[CompID];
1149
17.1k
}
1150
1151
const CPelBuf PelStorage::getBuf( const ComponentID CompID ) const
1152
0
{
1153
0
  return bufs[CompID];
1154
0
}
1155
1156
PelBuf PelStorage::getBuf( const CompArea& blk )
1157
22.4M
{
1158
22.4M
  const PelBuf& r = bufs[blk.compID];
1159
22.4M
  return PelBuf( r.buf + rsAddr( blk, r.stride ), r.stride, blk );
1160
22.4M
}
1161
1162
const CPelBuf PelStorage::getBuf( const CompArea& blk ) const
1163
30.1k
{
1164
30.1k
  const PelBuf& r = bufs[blk.compID];
1165
30.1k
  return CPelBuf( r.buf + rsAddr( blk, r.stride ), r.stride, blk );
1166
30.1k
}
1167
1168
PelUnitBuf PelStorage::getBuf( const UnitArea& unit )
1169
4.04k
{
1170
4.04k
  return ( chromaFormat == CHROMA_400 ) ? PelUnitBuf( chromaFormat, getBuf( unit.Y() ) ) : PelUnitBuf( chromaFormat, getBuf( unit.Y() ), getBuf( unit.Cb() ), getBuf( unit.Cr() ) );
1171
4.04k
}
1172
1173
const CPelUnitBuf PelStorage::getBuf( const UnitArea& unit ) const
1174
0
{
1175
0
  return ( chromaFormat == CHROMA_400 ) ? CPelUnitBuf( chromaFormat, getBuf( unit.Y() ) ) : CPelUnitBuf( chromaFormat, getBuf( unit.Y() ), getBuf( unit.Cb() ), getBuf( unit.Cr() ) );
1176
0
}
1177
1178
PelUnitBuf PelStorage::getBuf(const int strY, const int strCb, const int strCr, const UnitArea& unit)
1179
0
{
1180
0
  CHECKD( unit.Y().width > bufs[COMP_Y].width && unit.Y().height > bufs[COMP_Y].height, "unsuported request" );
1181
0
  CHECKD( strY > bufs[COMP_Y].stride, "unsuported request" );
1182
0
  CHECKD( strCb > bufs[COMP_Cb].stride, "unsuported request" );
1183
0
  CHECKD( strCr > bufs[COMP_Cr].stride, "unsuported request" );
1184
0
  return (chromaFormat == CHROMA_400) ? PelUnitBuf(chromaFormat, PelBuf( bufs[COMP_Y].buf, strY, unit.Y())) : PelUnitBuf(chromaFormat, PelBuf( bufs[COMP_Y].buf, strY, unit.Y()), PelBuf( bufs[COMP_Cb].buf, strCb, unit.Cb()), PelBuf( bufs[COMP_Cr].buf, strCr, unit.Cr()));
1185
0
}
1186
1187
const CPelUnitBuf PelStorage::getBuf(const int strY, const int strCb, const int strCr, const UnitArea& unit) const
1188
0
{
1189
0
  CHECKD( unit.Y().width > bufs[COMP_Y].width && unit.Y().height > bufs[COMP_Y].height, "unsuported request" );
1190
0
  CHECKD( strY > bufs[COMP_Y].stride, "unsuported request" );
1191
0
  CHECKD( strCb > bufs[COMP_Cb].stride, "unsuported request" );
1192
0
  CHECKD( strCr > bufs[COMP_Cr].stride, "unsuported request" );
1193
0
  return (chromaFormat == CHROMA_400) ? CPelUnitBuf(chromaFormat, CPelBuf( bufs[COMP_Y].buf, strY, unit.Y())) : CPelUnitBuf(chromaFormat, CPelBuf( bufs[COMP_Y].buf, strY, unit.Y()), CPelBuf( bufs[COMP_Cb].buf, strCb, unit.Cb()), CPelBuf( bufs[COMP_Cr].buf, strCr, unit.Cr()));
1194
0
}
1195
1196
PelUnitBuf PelStorage::getBufPart(const UnitArea& unit)
1197
0
{
1198
0
  CHECKD( unit.Y().width > bufs[COMP_Y].width && unit.Y().height > bufs[COMP_Y].height, "unsuported request" );
1199
0
  return (chromaFormat == CHROMA_400) ? PelUnitBuf(chromaFormat, PelBuf( bufs[COMP_Y].buf, bufs[COMP_Y].stride, unit.Y())) : PelUnitBuf(chromaFormat, PelBuf( bufs[COMP_Y].buf, bufs[COMP_Y].stride, unit.Y()), PelBuf( bufs[COMP_Cb].buf, bufs[COMP_Cb].stride, unit.Cb()), PelBuf( bufs[COMP_Cr].buf, bufs[COMP_Cr].stride, unit.Cr()));
1200
0
}
1201
1202
const CPelUnitBuf PelStorage::getBufPart(const UnitArea& unit) const
1203
0
{
1204
0
  CHECKD(unit.Y().width > bufs[COMP_Y].width && unit.Y().height > bufs[COMP_Y].height, "unsuported request");
1205
0
  return (chromaFormat == CHROMA_400) ? CPelUnitBuf(chromaFormat, CPelBuf(bufs[COMP_Y].buf, unit.Y().width, unit.Y())) : CPelUnitBuf(chromaFormat, CPelBuf(bufs[COMP_Y].buf, unit.Y().width, unit.Y()), CPelBuf(bufs[COMP_Cb].buf, unit.Cb().width, unit.Cb()), CPelBuf(bufs[COMP_Cr].buf, unit.Cr().width, unit.Cr()));
1206
0
}
1207
1208
const CPelUnitBuf PelStorage::getCompactBuf(const UnitArea& unit) const
1209
0
{
1210
0
  CHECKD( unit.Y().width > bufs[COMP_Y].width && unit.Y().height > bufs[COMP_Y].height, "unsuported request" );
1211
1212
0
  PelUnitBuf ret;
1213
0
  ret.chromaFormat = chromaFormat;
1214
0
  ret.bufs.resize_noinit( chromaFormat == CHROMA_400 ? 1 : 3 );
1215
  
1216
0
  ret.Y   ().buf = bufs[COMP_Y ].buf; ret.Y ().width = ret.Y ().stride = unit.Y ().width; ret.Y ().height = unit.Y ().height;
1217
0
  if( chromaFormat != CHROMA_400 )
1218
0
  {
1219
0
    ret.Cb().buf = bufs[COMP_Cb].buf; ret.Cb().width = ret.Cb().stride = unit.Cb().width; ret.Cb().height = unit.Cb().height;
1220
0
    ret.Cr().buf = bufs[COMP_Cr].buf; ret.Cr().width = ret.Cr().stride = unit.Cr().width; ret.Cr().height = unit.Cr().height;
1221
0
  }
1222
1223
0
  return ret;
1224
0
}
1225
1226
PelUnitBuf PelStorage::getCompactBuf(const UnitArea& unit)
1227
153k
{
1228
153k
  CHECKD( unit.Y().width > bufs[COMP_Y].width && unit.Y().height > bufs[COMP_Y].height, "unsuported request" );
1229
1230
153k
  PelUnitBuf ret;
1231
153k
  ret.chromaFormat = chromaFormat;
1232
153k
  ret.bufs.resize_noinit( chromaFormat == CHROMA_400 ? 1 : 3 );
1233
1234
153k
  ret.Y   ().buf = bufs[COMP_Y ].buf; ret.Y ().width = ret.Y ().stride = unit.Y ().width; ret.Y ().height = unit.Y ().height;
1235
153k
  if( chromaFormat != CHROMA_400 )
1236
153k
  {
1237
153k
    ret.Cb().buf = bufs[COMP_Cb].buf; ret.Cb().width = ret.Cb().stride = unit.Cb().width; ret.Cb().height = unit.Cb().height;
1238
153k
    ret.Cr().buf = bufs[COMP_Cr].buf; ret.Cr().width = ret.Cr().stride = unit.Cr().width; ret.Cr().height = unit.Cr().height;
1239
153k
  }
1240
1241
153k
  return ret;
1242
153k
}
1243
1244
const CPelBuf PelStorage::getCompactBuf(const CompArea& carea) const
1245
0
{
1246
0
  return CPelBuf( bufs[carea.compID].buf, carea.width, carea);
1247
0
}
1248
1249
PelBuf PelStorage::getCompactBuf(const CompArea& carea)
1250
0
{
1251
0
  return PelBuf( bufs[carea.compID].buf, carea.width, carea);
1252
0
}
1253
1254
void downsampleYuv(PelBuf& dest, const vvencYUVPlane& yuvPlaneIn, int downsampleStep)
1255
0
{
1256
0
  const int widthd = dest.width;
1257
0
  const int heightd = dest.height;
1258
0
  int difStride = dest.stride - dest.width;
1259
1260
0
  const int16_t* src = yuvPlaneIn.ptr;
1261
0
  const int instride = yuvPlaneIn.stride;
1262
0
  const int width = yuvPlaneIn.width;
1263
0
  int n = 0;
1264
0
  for (int j = 0; j < heightd; j++)
1265
0
  {
1266
0
    int i = 0;
1267
0
    for (i = 0; i < widthd; i++)
1268
0
    {
1269
0
      long int b = 0;
1270
0
      for (int r = 0; r < downsampleStep; r++)
1271
0
      {
1272
0
        int posr = width * r;
1273
0
        for (int n = 0; n < downsampleStep; n++)
1274
0
        {
1275
0
          b += src[posr + n];
1276
0
        }
1277
0
      }
1278
0
      src += downsampleStep;
1279
0
      dest.buf[n] = (int16_t)((b + 2) / (downsampleStep << 1));
1280
0
      n++;
1281
0
    }
1282
0
    n += difStride;
1283
0
    src = src - downsampleStep * i + width;
1284
1285
0
    src += (instride * (downsampleStep - 1));
1286
0
  }
1287
0
}
1288
1289
void copyPadToPelUnitBuf( PelUnitBuf pelUnitBuf, const vvencYUVBuffer& yuvBuffer, const ChromaFormat& chFmt )
1290
1.29k
{
1291
1.29k
  CHECK( pelUnitBuf.bufs.size() == 0, "pelUnitBuf not initialized" );
1292
1.29k
  pelUnitBuf.chromaFormat = chFmt;
1293
1.29k
  const int numComp = getNumberValidComponents( chFmt );
1294
5.19k
  for ( int i = 0; i < numComp; i++ )
1295
3.89k
  {
1296
3.89k
    const vvencYUVPlane& src = yuvBuffer.planes[ i ];
1297
3.89k
    CHECK( src.ptr == nullptr, "yuvBuffer not setup" );
1298
3.89k
    PelBuf& dest = pelUnitBuf.bufs[i];
1299
3.89k
    CHECK( dest.buf == nullptr, "yuvBuffer not setup" );
1300
1301
3.89k
    if (dest.width < src.width)
1302
0
    {
1303
0
      downsampleYuv(dest, src, 2);
1304
0
    }
1305
3.89k
    else
1306
3.89k
    {
1307
398k
      for (int y = 0; y < src.height; y++)
1308
395k
      {
1309
395k
        ::memcpy(dest.buf + y * dest.stride, src.ptr + y * src.stride, src.width * sizeof(int16_t));
1310
1311
        // pad right if required
1312
395k
        for (int x = src.width; x < dest.width; x++)
1313
0
        {
1314
0
          dest.buf[x + y * dest.stride] = dest.buf[src.width - 1 + y * dest.stride];
1315
0
        }
1316
395k
      }
1317
1318
      // pad bottom if required
1319
3.89k
      for (int y = src.height; y < dest.height; y++)
1320
0
      {
1321
0
        ::memcpy(dest.buf + y * dest.stride, dest.buf + (src.height - 1) * dest.stride, dest.width * sizeof(int16_t));
1322
0
      }
1323
3.89k
    }
1324
3.89k
  }
1325
1.29k
}
1326
1327
/*
1328
void setupPelUnitBuf( const YUVBuffer& yuvBuffer, PelUnitBuf& pelUnitBuf, const ChromaFormat& chFmt )
1329
{
1330
  CHECK( pelUnitBuf.bufs.size() != 0, "pelUnitBuf already in use" );
1331
  pelUnitBuf.chromaFormat = chFmt;
1332
  const int numComp = getNumberValidComponents( chFmt );
1333
  for ( int i = 0; i < numComp; i++ )
1334
  {
1335
    const YUVBuffer::Plane& yuvPlane = yuvBuffer.planes[ i ];
1336
    CHECK( yuvPlane.ptr == nullptr, "yuvBuffer not setup" );
1337
    PelBuf area( yuvPlane.ptr, yuvPlane.stride, yuvPlane.width, yuvPlane.height );
1338
    pelUnitBuf.bufs.push_back( area );
1339
  }
1340
}
1341
*/
1342
void setupYuvBuffer ( const PelUnitBuf& pelUnitBuf, vvencYUVBuffer& yuvBuffer, const Window* confWindow )
1343
0
{
1344
0
  const ChromaFormat chFmt = pelUnitBuf.chromaFormat;
1345
0
  const int numComp        = getNumberValidComponents( chFmt );
1346
0
  for ( int i = 0; i < numComp; i++ )
1347
0
  {
1348
0
    const ComponentID compId = ComponentID( i );
1349
0
          PelBuf area        = pelUnitBuf.get( compId );
1350
0
    const int sx             = getComponentScaleX( compId, chFmt );
1351
0
    const int sy             = getComponentScaleY( compId, chFmt );
1352
0
    vvencYUVPlane& yuvPlane = yuvBuffer.planes[ i ];
1353
0
    CHECK( yuvPlane.ptr != nullptr, "yuvBuffer already in use" );
1354
0
    yuvPlane.ptr             = area.bufAt( confWindow->winLeftOffset >> sx, confWindow->winTopOffset >> sy );
1355
0
    yuvPlane.width           = ( ( area.width  << sx ) - ( confWindow->winLeftOffset + confWindow->winRightOffset  ) ) >> sx;
1356
0
    yuvPlane.height          = ( ( area.height << sy ) - ( confWindow->winTopOffset  + confWindow->winBottomOffset ) ) >> sy;
1357
0
    yuvPlane.stride          = area.stride;
1358
0
  }
1359
0
}
1360
1361
} // namespace vvenc
1362
1363
//! \}
1364