Coverage Report

Created: 2026-06-15 06:25

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/vvenc/source/Lib/CommonLib/Buffer.cpp
Line
Count
Source
1
/* -----------------------------------------------------------------------------
2
The copyright in this software is being made available under the Clear BSD
3
License, included below. No patent rights, trademark rights and/or 
4
other Intellectual Property Rights other than the copyrights concerning 
5
the Software are granted under this license.
6
7
The Clear BSD License
8
9
Copyright (c) 2019-2026, Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. & The VVenC Authors.
10
All rights reserved.
11
12
Redistribution and use in source and binary forms, with or without modification,
13
are permitted (subject to the limitations in the disclaimer below) provided that
14
the following conditions are met:
15
16
     * Redistributions of source code must retain the above copyright notice,
17
     this list of conditions and the following disclaimer.
18
19
     * Redistributions in binary form must reproduce the above copyright
20
     notice, this list of conditions and the following disclaimer in the
21
     documentation and/or other materials provided with the distribution.
22
23
     * Neither the name of the copyright holder nor the names of its
24
     contributors may be used to endorse or promote products derived from this
25
     software without specific prior written permission.
26
27
NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
28
THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
29
CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
31
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
32
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
33
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
34
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
35
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
36
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38
POSSIBILITY OF SUCH DAMAGE.
39
40
41
------------------------------------------------------------------------------------------- */
42
43
44
/** \file     Buffer.cpp
45
 *  \brief    Low-overhead class describing 2D memory layout
46
 */
47
48
#define DONT_UNDEF_SIZE_AWARE_PER_EL_OP
49
50
// unit needs to come first due to a forward declaration
51
#include "Unit.h"
52
#include "Slice.h"
53
#include "InterpolationFilter.h"
54
55
//! \ingroup CommonLib
56
//! \{
57
58
namespace vvenc {
59
60
void weightCiipCore( Pel* res, const Pel* src, const int numSamples, int numIntra )
61
0
{
62
0
  if( numIntra == 1 )
63
0
  {
64
0
    for (int n = 0; n < numSamples; n+=2)
65
0
    {
66
0
      res[n  ] = (res[n  ] + src[n  ] + 1) >> 1;
67
0
      res[n+1] = (res[n+1] + src[n+1] + 1) >> 1;
68
0
    }
69
0
  }
70
0
  else
71
0
  {
72
0
    const Pel* scale   = numIntra ? src : res;
73
0
    const Pel* unscale = numIntra ? res : src;
74
75
0
    for (int n = 0; n < numSamples; n+=2)
76
0
    {
77
0
      res[n  ] = (unscale[n  ] + 3*scale[n  ] + 2) >> 2;
78
0
      res[n+1] = (unscale[n+1] + 3*scale[n+1] + 2) >> 2;
79
0
    }
80
0
  }
81
0
}
82
83
template< unsigned inputSize, unsigned outputSize >
84
void mipMatrixMulCore( Pel* res, const Pel* input, const uint8_t* weight, const int maxVal, const int inputOffset, bool transpose )
85
231k
{
86
231k
  Pel buffer[ outputSize*outputSize];
87
88
231k
  int sum = 0;
89
2.08M
  for( int i = 0; i < inputSize; i++ )
90
1.85M
  {
91
1.85M
    sum += input[i];
92
1.85M
  }
93
231k
  const int offset = (1 << (MIP_SHIFT_MATRIX - 1)) - MIP_OFFSET_MATRIX * sum + (inputOffset << MIP_SHIFT_MATRIX);
94
231k
  CHECK( inputSize != 4 * (inputSize >> 2), "Error, input size not divisible by four" );
95
96
231k
  Pel* mat = transpose ? buffer : res;
97
231k
  unsigned posRes = 0;
98
14.7M
  for( unsigned n = 0; n < outputSize*outputSize; n++ )
99
14.5M
  {
100
14.5M
    int tmp0 = input[0] * weight[0];
101
14.5M
    int tmp1 = input[1] * weight[1];
102
14.5M
    int tmp2 = input[2] * weight[2];
103
14.5M
    int tmp3 = input[3] * weight[3];
104
14.5M
    if( 8 == inputSize )
105
14.5M
    {
106
14.5M
      tmp0 += input[4] * weight[4];
107
14.5M
      tmp1 += input[5] * weight[5];
108
14.5M
      tmp2 += input[6] * weight[6];
109
14.5M
      tmp3 += input[7] * weight[7];
110
14.5M
    }
111
14.5M
    mat[posRes++] = Clip3<int>( 0, maxVal, ((tmp0 + tmp1 + tmp2 + tmp3 + offset) >> MIP_SHIFT_MATRIX) );
112
113
14.5M
    weight += inputSize;
114
14.5M
  }
115
116
231k
  if( transpose )
117
105k
  {
118
935k
    for( int j = 0; j < outputSize; j++ )
119
830k
    {
120
7.42M
      for( int i = 0; i < outputSize; i++ )
121
6.59M
      {
122
6.59M
        res[j * outputSize + i] = buffer[i * outputSize + j];
123
6.59M
      }
124
830k
    }
125
105k
  }
126
231k
}
Unexecuted instantiation: void vvenc::mipMatrixMulCore<4u, 4u>(short*, short const*, unsigned char const*, int, int, bool)
void vvenc::mipMatrixMulCore<8u, 4u>(short*, short const*, unsigned char const*, int, int, bool)
Line
Count
Source
85
5.66k
{
86
5.66k
  Pel buffer[ outputSize*outputSize];
87
88
5.66k
  int sum = 0;
89
50.9k
  for( int i = 0; i < inputSize; i++ )
90
45.3k
  {
91
45.3k
    sum += input[i];
92
45.3k
  }
93
5.66k
  const int offset = (1 << (MIP_SHIFT_MATRIX - 1)) - MIP_OFFSET_MATRIX * sum + (inputOffset << MIP_SHIFT_MATRIX);
94
5.66k
  CHECK( inputSize != 4 * (inputSize >> 2), "Error, input size not divisible by four" );
95
96
5.66k
  Pel* mat = transpose ? buffer : res;
97
5.66k
  unsigned posRes = 0;
98
96.2k
  for( unsigned n = 0; n < outputSize*outputSize; n++ )
99
90.6k
  {
100
90.6k
    int tmp0 = input[0] * weight[0];
101
90.6k
    int tmp1 = input[1] * weight[1];
102
90.6k
    int tmp2 = input[2] * weight[2];
103
90.6k
    int tmp3 = input[3] * weight[3];
104
90.6k
    if( 8 == inputSize )
105
90.6k
    {
106
90.6k
      tmp0 += input[4] * weight[4];
107
90.6k
      tmp1 += input[5] * weight[5];
108
90.6k
      tmp2 += input[6] * weight[6];
109
90.6k
      tmp3 += input[7] * weight[7];
110
90.6k
    }
111
90.6k
    mat[posRes++] = Clip3<int>( 0, maxVal, ((tmp0 + tmp1 + tmp2 + tmp3 + offset) >> MIP_SHIFT_MATRIX) );
112
113
90.6k
    weight += inputSize;
114
90.6k
  }
115
116
5.66k
  if( transpose )
117
2.83k
  {
118
14.1k
    for( int j = 0; j < outputSize; j++ )
119
11.3k
    {
120
56.6k
      for( int i = 0; i < outputSize; i++ )
121
45.3k
      {
122
45.3k
        res[j * outputSize + i] = buffer[i * outputSize + j];
123
45.3k
      }
124
11.3k
    }
125
2.83k
  }
126
5.66k
}
void vvenc::mipMatrixMulCore<8u, 8u>(short*, short const*, unsigned char const*, int, int, bool)
Line
Count
Source
85
225k
{
86
225k
  Pel buffer[ outputSize*outputSize];
87
88
225k
  int sum = 0;
89
2.03M
  for( int i = 0; i < inputSize; i++ )
90
1.80M
  {
91
1.80M
    sum += input[i];
92
1.80M
  }
93
225k
  const int offset = (1 << (MIP_SHIFT_MATRIX - 1)) - MIP_OFFSET_MATRIX * sum + (inputOffset << MIP_SHIFT_MATRIX);
94
225k
  CHECK( inputSize != 4 * (inputSize >> 2), "Error, input size not divisible by four" );
95
96
225k
  Pel* mat = transpose ? buffer : res;
97
225k
  unsigned posRes = 0;
98
14.6M
  for( unsigned n = 0; n < outputSize*outputSize; n++ )
99
14.4M
  {
100
14.4M
    int tmp0 = input[0] * weight[0];
101
14.4M
    int tmp1 = input[1] * weight[1];
102
14.4M
    int tmp2 = input[2] * weight[2];
103
14.4M
    int tmp3 = input[3] * weight[3];
104
14.4M
    if( 8 == inputSize )
105
14.4M
    {
106
14.4M
      tmp0 += input[4] * weight[4];
107
14.4M
      tmp1 += input[5] * weight[5];
108
14.4M
      tmp2 += input[6] * weight[6];
109
14.4M
      tmp3 += input[7] * weight[7];
110
14.4M
    }
111
14.4M
    mat[posRes++] = Clip3<int>( 0, maxVal, ((tmp0 + tmp1 + tmp2 + tmp3 + offset) >> MIP_SHIFT_MATRIX) );
112
113
14.4M
    weight += inputSize;
114
14.4M
  }
115
116
225k
  if( transpose )
117
102k
  {
118
921k
    for( int j = 0; j < outputSize; j++ )
119
819k
    {
120
7.37M
      for( int i = 0; i < outputSize; i++ )
121
6.55M
      {
122
6.55M
        res[j * outputSize + i] = buffer[i * outputSize + j];
123
6.55M
      }
124
819k
    }
125
102k
  }
126
225k
}
127
128
template< typename T >
129
void addAvgCore( const T* src1, int src1Stride, const T* src2, int src2Stride, T* dest, int dstStride, int width, int height, unsigned rshift, int offset, const ClpRng& clpRng )
130
0
{
131
0
#define ADD_AVG_CORE_OP( ADDR ) dest[ADDR] = ClipPel( rightShiftU( ( src1[ADDR] + src2[ADDR] + offset ), rshift ), clpRng )
132
0
#define ADD_AVG_CORE_INC    \
133
0
  src1 += src1Stride;       \
134
0
  src2 += src2Stride;       \
135
0
  dest +=  dstStride;       \
136
0
137
0
  SIZE_AWARE_PER_EL_OP( ADD_AVG_CORE_OP, ADD_AVG_CORE_INC );
138
139
0
#undef ADD_AVG_CORE_OP
140
0
#undef ADD_AVG_CORE_INC
141
0
}
142
143
template<typename T>
144
void addWeightedAvgCore( const T* src1, int src1Stride, const T* src2, int src2Stride, T* dest, int destStride, int width, int height, unsigned rshift, int offset, int w0, int w1, const ClpRng& clpRng )
145
0
{
146
0
#define ADD_WGHT_AVG_OP( ADDR ) dest[ADDR] = ClipPel( rightShiftU( ( src1[ADDR]*w0 + src2[ADDR]*w1 + offset ), rshift ), clpRng )
147
0
#define ADD_WGHT_AVG_INC     \
148
0
    src1 += src1Stride; \
149
0
    src2 += src2Stride; \
150
0
    dest += destStride; \
151
0
152
0
  SIZE_AWARE_PER_EL_OP( ADD_WGHT_AVG_OP, ADD_WGHT_AVG_INC );
153
154
0
#undef ADD_WGHT_AVG_OP
155
0
#undef ADD_WGHT_AVG_INC
156
0
}
157
158
template<typename T>
159
void subsCore( const T* src0, int src0Stride, const T* src1, int src1Stride, T* dest, int destStride, int width, int height )
160
718k
{
161
718k
#define SUBS_INC                \
162
718k
  dest += destStride;  \
163
718k
  src0 += src0Stride;  \
164
718k
  src1 += src1Stride;  \
165
718k
166
326M
#define SUBS_OP( ADDR ) dest[ADDR] = src0[ADDR] - src1[ADDR]
167
168
326M
  SIZE_AWARE_PER_EL_OP( SUBS_OP, SUBS_INC );
169
170
718k
#undef SUBS_OP
171
718k
#undef SUBS_INC
172
718k
}
173
174
void removeHighFreq(int16_t* dst, int dstStride, const int16_t* src, int srcStride, int width, int height)
175
0
{
176
0
#define REM_HF_INC  \
177
0
 src += srcStride; \
178
0
 dst += dstStride; \
179
0
180
0
#define REM_HF_OP( ADDR )      dst[ADDR] =             2 * dst[ADDR] - src[ADDR]
181
182
0
 SIZE_AWARE_PER_EL_OP(REM_HF_OP, REM_HF_INC);
183
184
0
#undef REM_HF_INC
185
0
#undef REM_HF_OP
186
0
#undef REM_HF_OP_CLIP
187
0
}
188
189
template<typename T>
190
void reconstructCore( const T* src1, int src1Stride, const T* src2, int src2Stride, T* dest, int dstStride, int width, int height, const ClpRng& clpRng )
191
8.23k
{
192
3.35M
#define RECO_CORE_OP( ADDR ) dest[ADDR] = ClipPel( src1[ADDR] + src2[ADDR], clpRng )
193
8.23k
#define RECO_CORE_INC     \
194
8.23k
  src1 += src1Stride;     \
195
8.23k
  src2 += src2Stride;     \
196
8.23k
  dest +=  dstStride;     \
197
8.23k
198
3.35M
  SIZE_AWARE_PER_EL_OP( RECO_CORE_OP, RECO_CORE_INC );
199
200
8.23k
#undef RECO_CORE_OP
201
8.23k
#undef RECO_CORE_INC
202
8.23k
}
203
204
template<typename T>
205
void recoCore( const T* src1, const T* src2, T* dest, int numSamples, const ClpRng& clpRng )
206
2.01M
{
207
296M
  for( int n = 0; n < numSamples; n+=2)
208
294M
  {
209
294M
    dest[n]   = ClipPel( src1[n]   + src2[n], clpRng );
210
294M
    dest[n+1] = ClipPel( src1[n+1] + src2[n+1], clpRng );
211
294M
  }
212
2.01M
}
213
214
template<typename T>
215
void copyClipCore( const T* src, Pel* dst, int numSamples, const ClpRng& clpRng )
216
0
{
217
0
  for( int n = 0; n < numSamples; n+=2)
218
0
  {
219
0
    dst[n]   = ClipPel( src[n]   , clpRng );
220
0
    dst[n+1] = ClipPel( src[n+1] , clpRng );
221
0
  }
222
0
}
223
224
template< typename T >
225
void addAvgCore( const T* src1, const T* src2, T* dest, int numSamples, unsigned rshift, int offset, const ClpRng& clpRng )
226
0
{
227
0
  for( int n = 0; n < numSamples; n+=2)
228
0
  {
229
0
    dest[n]   = ClipPel( rightShiftU( ( src1[n]   + src2[n]   + offset ), rshift ), clpRng );
230
0
    dest[n+1] = ClipPel( rightShiftU( ( src1[n+1] + src2[n+1] + offset ), rshift ), clpRng );
231
0
  }
232
0
}
233
234
template< typename T >
235
void roundGeoCore( const T* src, T* dest, const int numSamples, unsigned rshift, int offset, const ClpRng &clpRng)
236
0
{
237
0
  for( int i = 0; i < numSamples; i+=2)
238
0
  {
239
0
    dest[i]   = ClipPel(rightShiftU(src[i  ] + offset, rshift), clpRng);
240
0
    dest[i+1] = ClipPel(rightShiftU(src[i+1] + offset, rshift), clpRng);
241
0
  }
242
0
}
243
244
template<typename T>
245
void linTfCore( const T* src, int srcStride, Pel* dst, int dstStride, int width, int height, int scale, unsigned shift, int offset, const ClpRng& clpRng, bool bClip )
246
184k
{
247
184k
#define LINTF_CORE_INC  \
248
184k
  src += srcStride;     \
249
184k
  dst += dstStride;     \
250
184k
251
184k
  if( bClip )
252
184k
  {
253
35.8M
#define LINTF_CORE_OP( ADDR ) dst[ADDR] = ( Pel ) ClipPel( rightShiftU( scale * src[ADDR], shift ) + offset, clpRng )
254
255
35.8M
  SIZE_AWARE_PER_EL_OP( LINTF_CORE_OP, LINTF_CORE_INC );
256
257
184k
#undef LINTF_CORE_OP
258
184k
  }
259
0
  else
260
0
  {
261
0
#define LINTF_CORE_OP( ADDR ) dst[ADDR] = ( Pel ) ( rightShiftU( scale * src[ADDR], shift ) + offset )
262
263
0
  SIZE_AWARE_PER_EL_OP( LINTF_CORE_OP, LINTF_CORE_INC );
264
265
0
#undef LINTF_CORE_OP
266
0
  }
267
184k
#undef LINTF_CORE_INC
268
184k
}
269
270
template<typename T, int N>
271
void transposeNxNCore( const Pel* src, int srcStride, Pel* dst, int dstStride )
272
6.31M
{
273
55.3M
  for( int i = 0; i < N; i++ )
274
49.0M
  {
275
435M
    for( int j = 0; j < N; j++ )
276
386M
    {
277
386M
      dst[j * dstStride] = src[j];
278
386M
    }
279
280
49.0M
    dst++;
281
49.0M
    src += srcStride;
282
49.0M
  }
283
6.31M
}
void vvenc::transposeNxNCore<short, 4>(short const*, int, short*, int)
Line
Count
Source
272
357k
{
273
1.78M
  for( int i = 0; i < N; i++ )
274
1.43M
  {
275
7.15M
    for( int j = 0; j < N; j++ )
276
5.72M
    {
277
5.72M
      dst[j * dstStride] = src[j];
278
5.72M
    }
279
280
1.43M
    dst++;
281
1.43M
    src += srcStride;
282
1.43M
  }
283
357k
}
void vvenc::transposeNxNCore<short, 8>(short const*, int, short*, int)
Line
Count
Source
272
5.95M
{
273
53.6M
  for( int i = 0; i < N; i++ )
274
47.6M
  {
275
428M
    for( int j = 0; j < N; j++ )
276
381M
    {
277
381M
      dst[j * dstStride] = src[j];
278
381M
    }
279
280
47.6M
    dst++;
281
47.6M
    src += srcStride;
282
47.6M
  }
283
5.95M
}
284
285
template<typename T>
286
void copyClipCore( const T* src, int srcStride, Pel* dst, int dstStride, int width, int height, const ClpRng& clpRng )
287
0
{
288
0
#define RECO_OP( ADDR ) dst[ADDR] = ClipPel( src[ADDR], clpRng )
289
0
#define RECO_INC      \
290
0
    src += srcStride; \
291
0
    dst += dstStride; \
292
0
293
0
  SIZE_AWARE_PER_EL_OP( RECO_OP, RECO_INC );
294
295
0
#undef RECO_OP
296
0
#undef RECO_INC
297
0
}
298
299
void copyBufferCore( const char* src, int srcStride, char* dst, int dstStride, int numBytes, int height)
300
7.65M
{
301
126M
  for( int i = 0; i < height; i++, src += srcStride, dst += dstStride )
302
118M
  {
303
118M
    memcpy( dst, src, numBytes );
304
118M
  }
305
7.65M
}
306
307
void applyLutCore( const Pel* src, const ptrdiff_t srcStride, Pel* dst, const ptrdiff_t dstStride, int width, int height, const Pel* lut )
308
0
{
309
0
#define RSP_SGNL_OP( ADDR ) dst[ADDR] = lut[src[ADDR]]
310
0
#define RSP_SGNL_INC        src      += srcStride; dst += dstStride;
311
312
0
  SIZE_AWARE_PER_EL_OP( RSP_SGNL_OP, RSP_SGNL_INC )
313
314
0
#undef RSP_SGNL_OP
315
0
#undef RSP_SGNL_INC
316
0
}
317
318
void fillMapPtr_Core( void** ptrMap, const ptrdiff_t mapStride, int width, int height, void* val )
319
396k
{
320
396k
  if( width == mapStride )
321
260k
  {
322
260k
    std::fill_n( ptrMap, width * height, val );
323
260k
  }
324
136k
  else
325
136k
  {
326
1.25M
    while( height-- )
327
1.11M
    {
328
1.11M
      std::fill_n( ptrMap, width, val );
329
1.11M
      ptrMap += mapStride;
330
1.11M
    }
331
136k
  }
332
396k
}
333
334
uint64_t AvgHighPassCore( const int width, const int height, const Pel* pSrc, const int iSrcStride)
335
9.87k
{
336
9.87k
  uint64_t saAct = 0;
337
791k
  for (int y = 1; y < height - 1; y++)
338
781k
  {
339
72.9M
    for (int x = 1; x < width - 1; x++) // center cols
340
72.1M
    {
341
72.1M
      const int s = 12 * (int) pSrc[x  ] - 2 * ((int) pSrc[x-1] + (int) pSrc[x+1] + (int) pSrc[x  -iSrcStride] + (int) pSrc[x  +iSrcStride])
342
72.1M
                             - ((int) pSrc[x-1-iSrcStride] + (int) pSrc[x+1-iSrcStride] + (int) pSrc[x-1+iSrcStride] + (int) pSrc[x+1+iSrcStride]);
343
72.1M
      saAct += abs (s);
344
72.1M
    }
345
781k
    pSrc += iSrcStride;
346
781k
  }
347
9.87k
  return saAct;
348
9.87k
}
349
350
uint64_t HDHighPassCore  (const int width, const int height,const Pel*  pSrc,const Pel* pSM1,const int iSrcStride,const int iSM1Stride)
351
0
{
352
0
  uint64_t taAct = 0;
353
0
  for (int y = 1; y < height - 1; y++)
354
0
  {
355
0
    for (int x = 1; x < width - 1; x++)  // cnt cols
356
0
    {
357
0
      const int t = (int) pSrc[x] - (int) pSM1[x];
358
0
      taAct += (1 + 3 * abs (t)) >> 1;
359
0
    }
360
0
    pSrc += iSrcStride;
361
0
    pSM1 += iSM1Stride;
362
0
  }
363
0
  return taAct;
364
0
}
365
366
uint64_t  HDHighPass2Core  (const int width, const int height,const Pel*  pSrc,const Pel* pSM1,const Pel* pSM2,const int iSrcStride,const int iSM1Stride,const int iSM2Stride)
367
0
{
368
0
  uint64_t taAct = 0;
369
0
  for (int y = 1; y < height - 1; y++)
370
0
  {
371
0
    for (int x = 1; x < width - 1; x++)  // cnt cols
372
0
    {
373
0
      const int t = (int) pSrc[x] - 2 * (int) pSM1[x] + (int) pSM2[x];
374
0
      taAct += abs (t);
375
0
    }
376
0
    pSrc += iSrcStride;
377
0
    pSM1 += iSM1Stride;
378
0
    pSM2 += iSM2Stride;
379
0
  }
380
0
  return taAct;
381
0
}
382
uint64_t AvgHighPassWithDownsamplingCore( const int width, const int height, const Pel* pSrc, const int iSrcStride)
383
0
{
384
0
  uint64_t saAct = 0;
385
0
  pSrc -= iSrcStride;
386
0
  pSrc -= iSrcStride;
387
0
 for (int y = 2; y < height - 2; y += 2)
388
0
 {
389
0
   for (int x = 2; x < width - 2; x += 2)
390
0
   {
391
0
     const int f = 12 * ((int)pSrc[ y   *iSrcStride + x  ] + (int)pSrc[ y   *iSrcStride + x+1] + (int)pSrc[(y+1)*iSrcStride + x  ] + (int)pSrc[(y+1)*iSrcStride + x+1])
392
0
                  - 3 * ((int)pSrc[(y-1)*iSrcStride + x  ] + (int)pSrc[(y-1)*iSrcStride + x+1] + (int)pSrc[(y+2)*iSrcStride + x  ] + (int)pSrc[(y+2)*iSrcStride + x+1])
393
0
                  - 3 * ((int)pSrc[ y   *iSrcStride + x-1] + (int)pSrc[ y   *iSrcStride + x+2] + (int)pSrc[(y+1)*iSrcStride + x-1] + (int)pSrc[(y+1)*iSrcStride + x+2])
394
0
                  - 2 * ((int)pSrc[(y-1)*iSrcStride + x-1] + (int)pSrc[(y-1)*iSrcStride + x+2] + (int)pSrc[(y+2)*iSrcStride + x-1] + (int)pSrc[(y+2)*iSrcStride + x+2])
395
0
                      - ((int)pSrc[(y-2)*iSrcStride + x-1] + (int)pSrc[(y-2)*iSrcStride + x  ] + (int)pSrc[(y-2)*iSrcStride + x+1] + (int)pSrc[(y-2)*iSrcStride + x+2]
396
0
                       + (int)pSrc[(y+3)*iSrcStride + x-1] + (int)pSrc[(y+3)*iSrcStride + x  ] + (int)pSrc[(y+3)*iSrcStride + x+1] + (int)pSrc[(y+3)*iSrcStride + x+2]
397
0
                       + (int)pSrc[(y-1)*iSrcStride + x-2] + (int)pSrc[ y   *iSrcStride + x-2] + (int)pSrc[(y+1)*iSrcStride + x-2] + (int)pSrc[(y+2)*iSrcStride + x-2]
398
0
                       + (int)pSrc[(y-1)*iSrcStride + x+3] + (int)pSrc[ y   *iSrcStride + x+3] + (int)pSrc[(y+1)*iSrcStride + x+3] + (int)pSrc[(y+2)*iSrcStride + x+3]);
399
0
     saAct += (uint64_t) abs(f);
400
0
   }
401
0
 }
402
0
 return saAct;
403
0
}
404
uint64_t AvgHighPassWithDownsamplingDiff1stCore (const int width, const int  height, const Pel* pSrc,const Pel* pSrcM1, const int iSrcStride, const int iSrcM1Stride)
405
0
{
406
0
  uint64_t taAct = 0;
407
0
  pSrc -= iSrcStride;
408
0
  pSrc -= iSrcStride;
409
0
  pSrcM1-=iSrcM1Stride;
410
0
  pSrcM1-=iSrcM1Stride;
411
412
0
  for (uint32_t y = 2; y < height-2; y += 2)
413
0
  {
414
0
    for (uint32_t x = 2; x < width-2; x += 2)
415
0
    {
416
0
      const int t = (int)pSrc  [y*iSrcStride + x] + (int)pSrc  [y*iSrcStride + x+1] + (int)pSrc  [(y+1)*iSrcStride + x] + (int)pSrc  [(y+1)*iSrcStride + x+1]
417
0
                 - ((int)pSrcM1[y*iSrcM1Stride + x] + (int)pSrcM1[y*iSrcM1Stride + x+1] + (int)pSrcM1[(y+1)*iSrcM1Stride + x] + (int)pSrcM1[(y+1)*iSrcM1Stride + x+1]);
418
0
      taAct += (1 + 3 * abs (t)) >> 1;
419
0
    }
420
0
  }
421
0
  return (taAct );
422
0
}
423
424
uint64_t AvgHighPassWithDownsamplingDiff2ndCore (const int width,const int height,const Pel* pSrc,const Pel* pSrcM1,const Pel* pSrcM2,const int iSrcStride,const int iSM1Stride,const int iSM2Stride)
425
0
{
426
0
  uint64_t taAct = 0;
427
428
0
  pSrc -= iSrcStride;
429
0
  pSrc -= iSrcStride;
430
0
  pSrcM1-=iSM1Stride;
431
0
  pSrcM1-=iSM1Stride;
432
0
  pSrcM2-=iSM2Stride;
433
0
  pSrcM2-=iSM2Stride;
434
435
0
  for (uint32_t y = 2; y < height-2; y += 2)
436
0
  {
437
0
    for (uint32_t x = 2; x < width-2; x += 2)
438
0
    {
439
0
      const int t = (int)pSrc  [y*iSrcStride + x] + (int)pSrc  [y*iSrcStride + x+1] + (int)pSrc  [(y+1)*iSrcStride + x] + (int)pSrc  [(y+1)*iSrcStride + x+1]
440
0
                            - 2 * ((int)pSrcM1[y*iSM1Stride + x] + (int)pSrcM1[y*iSM1Stride + x+1] + (int)pSrcM1[(y+1)*iSM1Stride + x] + (int)pSrcM1[(y+1)*iSM1Stride + x+1])
441
0
                            + (int)pSrcM2[y*iSM2Stride + x] + (int)pSrcM2[y*iSM2Stride + x+1] + (int)pSrcM2[(y+1)*iSM2Stride + x] + (int)pSrcM2[(y+1)*iSM2Stride + x+1];
442
0
      taAct += (uint64_t) abs(t);
443
0
    }
444
0
  }
445
0
  return (taAct);
446
0
}
447
448
PelBufferOps::PelBufferOps()
449
11
{
450
11
  isInitX86Done = false;
451
452
11
  addAvg            = addAvgCore<Pel>;
453
11
  reco              = recoCore<Pel>;
454
11
  copyClip          = copyClipCore<Pel>;
455
11
  roundGeo          = roundGeoCore<Pel>;
456
457
11
  addAvg4           = addAvgCore<Pel>;
458
11
  addAvg8           = addAvgCore<Pel>;
459
11
  addAvg16          = addAvgCore<Pel>;
460
461
11
  sub4              = subsCore<Pel>;
462
11
  sub8              = subsCore<Pel>;
463
464
11
  wghtAvg4          = addWeightedAvgCore<Pel>;
465
11
  wghtAvg8          = addWeightedAvgCore<Pel>;
466
467
11
  copyClip4         = copyClipCore<Pel>;
468
11
  copyClip8         = copyClipCore<Pel>;
469
470
11
  reco4             = reconstructCore<Pel>;
471
11
  reco8             = reconstructCore<Pel>;
472
473
11
  linTf4            = linTfCore<Pel>;
474
11
  linTf8            = linTfCore<Pel>;
475
476
11
  copyBuffer        = copyBufferCore;
477
478
11
  removeHighFreq8   = removeHighFreq;
479
11
  removeHighFreq4   = removeHighFreq;
480
481
11
  transpose4x4      = transposeNxNCore<Pel,4>;
482
11
  transpose8x8      = transposeNxNCore<Pel,8>;
483
11
  mipMatrixMul_4_4  = mipMatrixMulCore<4,4>;
484
11
  mipMatrixMul_8_4  = mipMatrixMulCore<8,4>;
485
11
  mipMatrixMul_8_8  = mipMatrixMulCore<8,8>;
486
11
  weightCiip        = weightCiipCore;
487
11
  roundIntVector    = nullptr;
488
489
11
  applyLut          = applyLutCore;
490
491
11
  fillPtrMap        = fillMapPtr_Core;
492
11
  AvgHighPassWithDownsampling = AvgHighPassWithDownsamplingCore;
493
11
  AvgHighPass = AvgHighPassCore;
494
11
  AvgHighPassWithDownsamplingDiff1st = AvgHighPassWithDownsamplingDiff1stCore;
495
11
  AvgHighPassWithDownsamplingDiff2nd = AvgHighPassWithDownsamplingDiff2ndCore;
496
11
  HDHighPass = HDHighPassCore;
497
11
  HDHighPass2 = HDHighPass2Core;
498
11
}
499
500
PelBufferOps g_pelBufOP = PelBufferOps();
501
502
template<>
503
void AreaBuf<Pel>::addWeightedAvg(const AreaBuf<const Pel>& other1, const AreaBuf<const Pel>& other2, const ClpRng& clpRng, const int8_t BcwIdx)
504
0
{
505
0
  const int8_t w0 = getBcwWeight( BcwIdx, REF_PIC_LIST_0 );
506
0
  const int8_t w1 = getBcwWeight( BcwIdx, REF_PIC_LIST_1 );
507
0
  const int8_t log2WeightBase = g_BcwLog2WeightBase;
508
0
  const Pel* src0 = other1.buf;
509
0
  const Pel* src2 = other2.buf;
510
0
        Pel* dest =        buf;
511
512
0
  const int src1Stride = other1.stride;
513
0
  const int src2Stride = other2.stride;
514
0
  const int destStride =        stride;
515
0
  const int clipbd     = clpRng.bd;
516
0
  const int shiftNum   = std::max<int>( 2, ( IF_INTERNAL_PREC - clipbd ) ) + log2WeightBase;
517
0
  const int offset     = ( 1 << ( shiftNum - 1 ) ) + ( IF_INTERNAL_OFFS << log2WeightBase );
518
519
0
  if( ( width & 7 ) == 0 )
520
0
  {
521
0
    g_pelBufOP.wghtAvg8( src0, src1Stride, src2, src2Stride, dest, destStride, width, height, shiftNum, offset, w0, w1, clpRng );
522
0
  }
523
0
  else if( ( width & 3 ) == 0 )
524
0
  {
525
0
    g_pelBufOP.wghtAvg4( src0, src1Stride, src2, src2Stride, dest, destStride, width, height, shiftNum, offset, w0, w1, clpRng );
526
0
  }
527
0
  else
528
0
  {
529
0
#define WGHT_AVG_OP( ADDR ) dest[ADDR] = ClipPel( rightShiftU( ( src0[ADDR]*w0 + src2[ADDR]*w1 + offset ), shiftNum ), clpRng )
530
0
#define WGHT_AVG_INC    \
531
0
    src0 += src1Stride; \
532
0
    src2 += src2Stride; \
533
0
    dest += destStride; \
534
0
535
0
    SIZE_AWARE_PER_EL_OP( WGHT_AVG_OP, WGHT_AVG_INC );
536
537
0
#undef WGHT_AVG_OP
538
0
#undef WGHT_AVG_INC
539
0
  }
540
0
}
541
542
template<>
543
void AreaBuf<Pel>::rspSignal( const Pel* pLUT)
544
0
{
545
0
  g_pelBufOP.applyLut( buf, stride, buf, stride, width, height, pLUT );
546
0
}
547
548
549
template<>
550
void AreaBuf<Pel>::rspSignal( const AreaBuf<const Pel>& other, const Pel* pLUT)
551
0
{
552
0
  g_pelBufOP.applyLut( other.buf, other.stride, buf, stride, width, height, pLUT );
553
0
}
554
555
template<>
556
void AreaBuf<Pel>::scaleSignal(const int scale, const bool dir, const ClpRng& clpRng)
557
0
{
558
0
        Pel* dst = buf;
559
0
  const Pel* src = buf;
560
0
  const int maxAbsclipBD = (1<<clpRng.bd) - 1;
561
562
0
  if (dir) // forward
563
0
  {
564
0
    if (width == 1)
565
0
    {
566
0
      THROW("Blocks of width = 1 not supported");
567
0
    }
568
0
    else
569
0
    {
570
0
      for (unsigned y = 0; y < height; y++)
571
0
      {
572
0
        for (unsigned x = 0; x < width; x++)
573
0
        {
574
0
          int sign = src[x] >= 0 ? 1 : -1;
575
0
          int absval = sign * src[x];
576
0
          dst[x] = (Pel)Clip3(-maxAbsclipBD, maxAbsclipBD, sign * (((absval << CSCALE_FP_PREC) + (scale >> 1)) / scale));
577
0
        }
578
0
        dst += stride;
579
0
        src += stride;
580
0
      }
581
0
    }
582
0
  }
583
0
  else // inverse
584
0
  {
585
0
    for (unsigned y = 0; y < height; y++)
586
0
    {
587
0
      for (unsigned x = 0; x < width; x++)
588
0
      {
589
0
        int val    = Clip3<int>((-maxAbsclipBD - 1), maxAbsclipBD, (int)src[x]);
590
0
        int sign   = src[x] >= 0 ? 1 : -1;
591
0
        int absval = sign * val;
592
0
               val = sign * ((absval * scale + (1 << (CSCALE_FP_PREC - 1))) >> CSCALE_FP_PREC);
593
0
        if (sizeof(Pel) == 2) // avoid overflow when storing data
594
0
        {
595
0
          val = Clip3<int>(-32768, 32767, val);
596
0
        }
597
0
        dst[x] = (Pel)val;
598
0
      }
599
0
      dst += stride;
600
0
      src += stride;
601
0
    }
602
0
  }
603
0
}
604
605
template<>
606
void AreaBuf<Pel>::addAvg( const AreaBuf<const Pel>& other1, const AreaBuf<const Pel>& other2, const ClpRng& clpRng)
607
0
{
608
0
  const Pel* src0 = other1.buf;
609
0
  const Pel* src2 = other2.buf;
610
0
        Pel* dest =        buf;
611
612
0
  const unsigned src1Stride = other1.stride;
613
0
  const unsigned src2Stride = other2.stride;
614
0
  const unsigned destStride =        stride;
615
0
  const int      clipbd     = clpRng.bd;
616
0
  const unsigned shiftNum   = std::max<int>(2, (IF_INTERNAL_PREC - clipbd)) + 1;
617
0
  const int      offset     = (1 << (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS;
618
619
0
#if ENABLE_SIMD_OPT_BUFFER
620
0
  if( destStride == width )
621
0
  {
622
0
    g_pelBufOP.addAvg(src0, src2, dest, width * height, shiftNum, offset, clpRng);
623
0
  }
624
0
  else if ((width & 15) == 0)
625
0
  {
626
0
    g_pelBufOP.addAvg16(src0, src1Stride, src2, src2Stride, dest, destStride, width, height, shiftNum, offset, clpRng);
627
0
  }
628
0
  else if( ( width & 7 ) == 0 )
629
0
  {
630
0
    g_pelBufOP.addAvg8( src0, src1Stride, src2, src2Stride, dest, destStride, width, height, shiftNum, offset, clpRng );
631
0
  }
632
0
  else if( ( width & 3 ) == 0 )
633
0
  {
634
0
    g_pelBufOP.addAvg4( src0, src1Stride, src2, src2Stride, dest, destStride, width, height, shiftNum, offset, clpRng );
635
0
  }
636
0
  else
637
0
#endif
638
0
  {
639
0
#define ADD_AVG_OP( ADDR ) dest[ADDR] = ClipPel( rightShiftU( ( src0[ADDR] + src2[ADDR] + offset ), shiftNum ), clpRng )
640
0
#define ADD_AVG_INC     \
641
0
    src0 += src1Stride; \
642
0
    src2 += src2Stride; \
643
0
    dest += destStride; \
644
0
645
0
    SIZE_AWARE_PER_EL_OP( ADD_AVG_OP, ADD_AVG_INC );
646
647
0
#undef ADD_AVG_OP
648
0
#undef ADD_AVG_INC
649
0
  }
650
0
}
651
652
template<>
653
void AreaBuf<Pel>::subtract( const AreaBuf<const Pel>& minuend, const AreaBuf<const Pel>& subtrahend )
654
718k
{
655
718k
  CHECKD( width  != minuend.width,     "Incompatible size" );
656
718k
  CHECKD( height != minuend.height,    "Incompatible size" );
657
718k
  CHECKD( width  != subtrahend.width,  "Incompatible size");
658
718k
  CHECKD( height != subtrahend.height, "Incompatible size");
659
  
660
718k
        Pel* dest =            buf;
661
718k
  const Pel* mins = minuend   .buf;
662
718k
  const Pel* subs = subtrahend.buf;
663
664
665
718k
#if ENABLE_SIMD_OPT_BUFFER
666
718k
  const unsigned destStride =            stride;
667
718k
  const unsigned minsStride = minuend.   stride;
668
718k
  const unsigned subsStride = subtrahend.stride;
669
670
718k
  if( ( width & 7 ) == 0 )
671
604k
  {
672
604k
    g_pelBufOP.sub8( mins, minsStride, subs, subsStride, dest, destStride, width, height );
673
604k
  }
674
113k
  else if( ( width & 3 ) == 0 )
675
113k
  {
676
113k
    g_pelBufOP.sub4( mins, minsStride, subs, subsStride, dest, destStride, width, height );
677
113k
  }
678
0
  else
679
0
#endif
680
0
  {
681
0
#define SUBS_INC                \
682
0
    dest +=            stride;  \
683
0
    mins += minuend   .stride;  \
684
0
    subs += subtrahend.stride;  \
685
0
686
0
#define SUBS_OP( ADDR ) dest[ADDR] = mins[ADDR] - subs[ADDR]
687
688
0
    SIZE_AWARE_PER_EL_OP( SUBS_OP, SUBS_INC );
689
690
0
#undef SUBS_OP
691
0
#undef SUBS_INC
692
0
  }
693
718k
}
694
695
template<>
696
void AreaBuf<const Pel>::calcVarianceSplit( const AreaBuf<const Pel>& Org, const uint32_t  size, int& varh,int& varv) const
697
0
{
698
0
  CHECK( Org.width != Org.height, "Incompatible size!" );
699
0
  int stride = Org.stride;
700
0
  const Pel* src;
701
0
  Pel data;
702
0
  double variance=0;
703
0
  double mean=0;
704
0
  int64_t sum[4]={0,0,0,0};
705
0
  int64_t sum_sqr[4]={0,0,0,0};
706
0
  uint32_t halfsize =size>>1;
707
0
  uint32_t off[4]={0,halfsize,size*halfsize,size*halfsize+halfsize};
708
0
  int n,x,y;
709
710
0
  for( n = 0; n < 4; n++)
711
0
  {
712
0
    src = Org.buf+off[n];
713
0
    for( y = 0; y < halfsize; y++)
714
0
    {
715
0
      for(x = 0; x < halfsize; x++)
716
0
      {
717
0
        data=src[y*stride+x];
718
0
        sum[n]+=data;
719
0
        sum_sqr[n]+= data*data;
720
0
      }
721
0
    }
722
0
  }
723
0
  int num=size*(size>>1);
724
  // varhu
725
0
  mean=(double)(sum[0]+sum[1])/(num);
726
0
  variance =  (double)(sum_sqr[0]+sum_sqr[1])/(num) - (mean*mean);
727
0
  varh =(int)(variance+0.5);
728
  // varhl
729
0
  mean=(double)(sum[2]+sum[3])/(num);
730
0
  variance =  (double)(sum_sqr[2]+sum_sqr[3])/(num) - (mean*mean);
731
0
  varh +=(int)(variance+0.5);
732
  // varvl
733
0
  mean=(double)(sum[0]+sum[2])/(num);
734
0
  variance =  (double)(sum_sqr[0]+sum_sqr[2])/(num) - (mean*mean);
735
0
  varv =(int)(variance+0.5);
736
  // varvr
737
0
  mean=(double)(sum[1]+sum[3])/(num);
738
0
  variance =  (double)(sum_sqr[1]+sum_sqr[3])/(num) - (mean*mean);
739
0
  varv +=(int)(variance+0.5);
740
0
}
741
742
template<>
743
void AreaBuf<Pel>::copyClip( const AreaBuf<const Pel>& src, const ClpRng& clpRng )
744
0
{
745
0
  const Pel* srcp = src.buf;
746
0
        Pel* dest =     buf;
747
748
0
  const unsigned srcStride  = src.stride;
749
0
  const unsigned destStride = stride;
750
751
0
  if( destStride == width)
752
0
  {
753
0
    g_pelBufOP.copyClip(srcp, dest, width * height, clpRng);
754
0
  }
755
0
  else if ((width & 7) == 0)
756
0
  {
757
0
    g_pelBufOP.copyClip8(srcp, srcStride, dest, destStride, width, height, clpRng);
758
0
  }
759
0
  else if ((width & 3) == 0)
760
0
  {
761
0
    g_pelBufOP.copyClip4(srcp, srcStride, dest, destStride, width, height, clpRng);
762
0
  }
763
0
  else
764
0
  {
765
0
    for( int y = 0; y < height; y++ )
766
0
    {
767
0
      dest[0] = ClipPel( srcp[0], clpRng);
768
0
      dest[1] = ClipPel( srcp[1], clpRng);
769
0
      srcp += srcStride;
770
0
      dest += destStride;
771
0
    }                                                         \
772
0
  }
773
0
}
774
775
template<>
776
void AreaBuf<Pel>::reconstruct( const AreaBuf<const Pel>& pred, const AreaBuf<const Pel>& resi, const ClpRng& clpRng )
777
2.01M
{
778
2.01M
  const Pel* src1 = pred.buf;
779
2.01M
  const Pel* src2 = resi.buf;
780
2.01M
        Pel* dest =      buf;
781
782
2.01M
  const unsigned src1Stride = pred.stride;
783
2.01M
  const unsigned src2Stride = resi.stride;
784
2.01M
  const unsigned destStride =      stride;
785
2.01M
  if( src2Stride == width )
786
2.01M
  {
787
2.01M
    g_pelBufOP.reco( pred.buf, resi.buf, buf, width * height, clpRng );
788
2.01M
  }
789
8.23k
  else if( ( width & 7 ) == 0 )
790
4.95k
  {
791
4.95k
    g_pelBufOP.reco8( src1, src1Stride, src2, src2Stride, dest, destStride, width, height, clpRng );
792
4.95k
  }
793
3.28k
  else if( ( width & 3 ) == 0 )
794
3.28k
  {
795
3.28k
    g_pelBufOP.reco4( src1, src1Stride, src2, src2Stride, dest, destStride, width, height, clpRng );
796
3.28k
  }
797
18.4E
  else if( ( width & 1 ) == 0 )
798
0
  {
799
0
    for( int y = 0; y < height; y++ )
800
0
    {
801
0
      dest[0] = ClipPel( src1[0] + src2[0], clpRng);
802
0
      dest[1] = ClipPel( src1[1] + src2[1], clpRng);
803
0
      src1 += src1Stride;
804
0
      src2 += src2Stride;
805
0
      dest += destStride;
806
0
    }                        
807
0
  }
808
18.4E
  else
809
18.4E
  {
810
18.4E
    CHECKD( width != 1, "Expecting width to be '1'!" );
811
812
18.4E
    for( int y = 0; y < height; y++ )
813
0
    {
814
0
      dest[0] = ClipPel( src1[0] + src2[0], clpRng );
815
816
0
      src1 += src1Stride;
817
0
      src2 += src2Stride;
818
0
      dest += destStride;
819
0
    }
820
18.4E
  }
821
2.01M
}
822
823
template<>
824
void AreaBuf<Pel>::linearTransform( const int scale, const unsigned shift, const int offset, bool bClip, const ClpRng& clpRng )
825
184k
{
826
184k
  const Pel* src = buf;
827
184k
        Pel* dst = buf;
828
829
184k
  if( stride == width)
830
184k
  {
831
184k
    if( width > 2 && height > 2 )
832
172k
    {
833
172k
      g_pelBufOP.linTf8( src, stride<<2, dst, stride<<2, width<<2, height>>2, scale, shift, offset, clpRng, bClip );
834
172k
    }
835
11.2k
    else
836
11.2k
    {
837
11.2k
      g_pelBufOP.linTf4( src, stride<<1, dst, stride<<1, width<<1, height>>1, scale, shift, offset, clpRng, bClip );
838
11.2k
    }
839
184k
  }
840
0
  else if( ( width & 7 ) == 0 )
841
0
  {
842
0
    g_pelBufOP.linTf8( src, stride, dst, stride, width, height, scale, shift, offset, clpRng, bClip );
843
0
  }
844
0
  else if( ( width & 3 ) == 0 )
845
0
  {
846
0
    g_pelBufOP.linTf4( src, stride, dst, stride, width, height, scale, shift, offset, clpRng, bClip );
847
0
  }
848
0
  else
849
0
  {
850
0
    if( bClip )
851
0
    {
852
0
      for( int y = 0; y < height; y++ )
853
0
      {
854
0
        dst[0] = ( Pel ) ClipPel( rightShiftU( scale * src[0], shift ) + offset, clpRng );
855
0
        dst[1] = ( Pel ) ClipPel( rightShiftU( scale * src[1], shift ) + offset, clpRng );
856
0
        src += stride;
857
0
        dst += stride;
858
0
      }
859
0
    }
860
0
    else
861
0
    {
862
0
      for( int y = 0; y < height; y++ )
863
0
      {
864
0
        dst[0] = ( Pel ) ( rightShiftU( scale * src[0], shift ) + offset );
865
0
        dst[1] = ( Pel ) ( rightShiftU( scale * src[1], shift ) + offset );
866
0
        src += stride;
867
0
        dst += stride;
868
0
      }
869
0
    }
870
0
  }
871
184k
}
872
873
#if ENABLE_SIMD_OPT_BUFFER
874
875
template<>
876
void AreaBuf<Pel>::transposedFrom( const AreaBuf<const Pel>& other )
877
537k
{
878
537k
  CHECK( width != other.height || height != other.width, "Incompatible size" );
879
880
537k
  if( ( ( width | height ) & 7 ) == 0 )
881
431k
  {
882
431k
    const Pel* src = other.buf;
883
884
1.75M
    for( unsigned y = 0; y < other.height; y += 8 )
885
1.31M
    {
886
1.31M
      Pel* dst = buf + y;
887
888
7.27M
      for( unsigned x = 0; x < other.width; x += 8 )
889
5.95M
      {
890
5.95M
        g_pelBufOP.transpose8x8( &src[x], other.stride, dst, stride );
891
892
5.95M
        dst += 8 * stride;
893
5.95M
      }
894
895
1.31M
      src += 8 * other.stride;
896
1.31M
    }
897
431k
  }
898
105k
  else if( ( ( width | height ) & 3 ) == 0 )
899
96.3k
  {
900
96.3k
    const Pel* src = other.buf;
901
902
284k
    for( unsigned y = 0; y < other.height; y += 4 )
903
188k
    {
904
188k
      Pel* dst = buf + y;
905
906
545k
      for( unsigned x = 0; x < other.width; x += 4 )
907
357k
      {
908
357k
        g_pelBufOP.transpose4x4( &src[x], other.stride, dst, stride );
909
910
357k
        dst += 4 * stride;
911
357k
      }
912
913
188k
      src += 4 * other.stride;
914
188k
    }
915
96.3k
  }
916
9.23k
  else
917
9.23k
  {
918
9.23k
          Pel* dst =       buf;
919
9.23k
    const Pel* src = other.buf;
920
9.23k
    width          = other.height;
921
9.23k
    height         = other.width;
922
9.23k
    stride         = stride < width ? width : stride;
923
924
116k
    for( unsigned y = 0; y < other.height; y++ )
925
107k
    {
926
322k
      for( unsigned x = 0; x < other.width; x++ )
927
215k
      {
928
215k
        dst[y + x*stride] = src[x + y * other.stride];
929
215k
      }
930
107k
    }
931
9.23k
  }
932
537k
}
933
#endif
934
935
template<>
936
void AreaBuf<Pel>::weightCiip( const AreaBuf<const Pel>& intra, const int numIntra )
937
0
{
938
0
  CHECK(width == 2, "Width of 2 is not supported");
939
0
  g_pelBufOP.weightCiip( buf, intra.buf, width * height, numIntra );
940
0
}
941
942
template<>
943
void AreaBuf<MotionInfo>::fill( const MotionInfo& val )
944
20.2k
{
945
20.2k
  if( width == stride )
946
20.2k
  {
947
20.2k
    std::fill_n( buf, width * height, val );
948
20.2k
  }
949
0
  else
950
0
  {
951
0
    MotionInfo* dst = buf;
952
953
0
    for( int y = 0; y < height; y++, dst += stride )
954
0
    {
955
0
      std::fill_n( dst, width, val );
956
0
    }
957
0
  }
958
20.2k
}
959
960
PelStorage::PelStorage()
961
3.70M
{
962
14.8M
  for( uint32_t i = 0; i < MAX_NUM_COMP; i++ )
963
11.1M
  {
964
11.1M
    m_origin[i] = nullptr;
965
11.1M
  }
966
3.70M
}
967
968
PelStorage::~PelStorage()
969
3.70M
{
970
3.70M
  destroy();
971
3.70M
}
972
973
void PelStorage::create( const UnitArea& _UnitArea )
974
1.63M
{
975
1.63M
  create( _UnitArea.chromaFormat, _UnitArea.blocks[0] );
976
1.63M
  m_maxArea = _UnitArea;
977
1.63M
}
978
979
void PelStorage::create( const ChromaFormat &_chromaFormat, const Area& _area )
980
3.24M
{
981
3.24M
  CHECK( !bufs.empty(), "Trying to re-create an already initialized buffer" );
982
983
3.24M
  chromaFormat = _chromaFormat;
984
985
3.24M
  const uint32_t numComp = getNumberValidComponents( _chromaFormat );
986
987
3.24M
  uint32_t bufSize = 0;
988
11.6M
  for( uint32_t i = 0; i < numComp; i++ )
989
8.35M
  {
990
8.35M
    const ComponentID compID = ComponentID( i );
991
8.35M
    const unsigned totalWidth  = _area.width  >> getComponentScaleX( compID, _chromaFormat );
992
8.35M
    const unsigned totalHeight = _area.height >> getComponentScaleY( compID, _chromaFormat );
993
994
8.35M
    const uint32_t area = totalWidth * totalHeight;
995
8.35M
    CHECK( !area, "Trying to create a buffer with zero area" );
996
8.35M
    bufSize += area;
997
8.35M
  }
998
999
3.24M
  bufSize += 1; // for SIMD DMVR on the bottom right corner, which overreads the lines by 1 sample
1000
1001
  //allocate one buffer
1002
3.24M
  m_origin[0] = ( Pel* ) xMalloc( Pel, bufSize );
1003
1004
3.24M
  Pel* topLeft = m_origin[0];
1005
11.6M
  for( uint32_t i = 0; i < numComp; i++ )
1006
8.35M
  {
1007
8.35M
    const ComponentID compID = ComponentID( i );
1008
8.35M
    const unsigned totalWidth  = _area.width  >> getComponentScaleX( compID, _chromaFormat );
1009
8.35M
    const unsigned totalHeight = _area.height >> getComponentScaleY( compID, _chromaFormat );
1010
8.35M
    const uint32_t area = totalWidth * totalHeight;
1011
1012
8.35M
    bufs.push_back( PelBuf( topLeft, totalWidth, totalWidth, totalHeight ) );
1013
8.35M
    topLeft += area;
1014
8.35M
  }
1015
1016
3.24M
  m_maxArea = UnitArea( _chromaFormat, _area );
1017
3.24M
}
1018
1019
void PelStorage::create( const ChromaFormat &_chromaFormat, const Area& _area, const unsigned _maxCUSize, const unsigned _margin, const unsigned _alignment, const bool _scaleChromaMargin )
1020
150k
{
1021
150k
  CHECK( !bufs.empty(), "Trying to re-create an already initialized buffer" );
1022
1023
150k
  chromaFormat = _chromaFormat;
1024
1025
150k
  const uint32_t numComp = getNumberValidComponents( _chromaFormat );
1026
1027
150k
  unsigned extHeight = _area.height;
1028
150k
  unsigned extWidth  = _area.width;
1029
1030
150k
  if( _maxCUSize )
1031
28.2k
  {
1032
28.2k
    extHeight = ( ( _area.height + _maxCUSize - 1 ) / _maxCUSize ) * _maxCUSize;
1033
28.2k
    extWidth  = ( ( _area.width  + _maxCUSize - 1 ) / _maxCUSize ) * _maxCUSize;
1034
28.2k
  }
1035
1036
534k
  for( uint32_t i = 0; i < numComp; i++ )
1037
383k
  {
1038
383k
    const ComponentID compID = ComponentID( i );
1039
383k
    const unsigned scaleX = getComponentScaleX( compID, _chromaFormat );
1040
383k
    const unsigned scaleY = getComponentScaleY( compID, _chromaFormat );
1041
1042
383k
    unsigned scaledHeight = extHeight >> scaleY;
1043
383k
    unsigned scaledWidth  = extWidth  >> scaleX;
1044
383k
    unsigned ymargin      = _margin >> (_scaleChromaMargin?scaleY:0);
1045
383k
    unsigned xmargin      = _margin >> (_scaleChromaMargin?scaleX:0);
1046
383k
    unsigned totalWidth   = scaledWidth + 2*xmargin;
1047
383k
    unsigned totalHeight  = scaledHeight +2*ymargin;
1048
1049
383k
    if( _alignment )
1050
215k
    {
1051
      // make sure buffer lines are align
1052
215k
      CHECK( _alignment != MEMORY_ALIGN_DEF_SIZE, "Unsupported alignment" );
1053
215k
      totalWidth = ( ( totalWidth + _alignment - 1 ) / _alignment ) * _alignment;
1054
215k
    }
1055
383k
    uint32_t area = totalWidth * totalHeight;
1056
383k
    CHECK( !area, "Trying to create a buffer with zero area" );
1057
1058
383k
    m_origin[i] = ( Pel* ) xMalloc( Pel, area );
1059
383k
    Pel* topLeft = m_origin[i] + totalWidth * ymargin + xmargin;
1060
383k
    bufs.push_back( PelBuf( topLeft, totalWidth, _area.width >> scaleX, _area.height >> scaleY ) );
1061
383k
  }
1062
1063
150k
  m_maxArea = UnitArea( _chromaFormat, _area );
1064
150k
}
1065
1066
void PelStorage::createFromBuf( PelUnitBuf buf )
1067
2.17k
{
1068
2.17k
  chromaFormat = buf.chromaFormat;
1069
1070
2.17k
  const uint32_t numCh = getNumberValidComponents( chromaFormat );
1071
1072
2.17k
  bufs.resize(numCh);
1073
1074
8.68k
  for( uint32_t i = 0; i < numCh; i++ )
1075
6.51k
  {
1076
6.51k
    PelBuf cPelBuf = buf.get( ComponentID( i ) );
1077
6.51k
    bufs[i] = PelBuf( cPelBuf.bufAt( 0, 0 ), cPelBuf.stride, cPelBuf.width, cPelBuf.height );
1078
6.51k
  }
1079
2.17k
}
1080
1081
void PelStorage::compactResize( const UnitArea& area )
1082
2.00M
{
1083
2.00M
  CHECK( bufs.size() < area.blocks.size(), "Cannot increase buffer size when compacting!" );
1084
1085
6.77M
  for( uint32_t i = 0; i < area.blocks.size(); i++ )
1086
4.77M
  {
1087
4.77M
    CHECK( m_maxArea.blocks[i].area() < area.blocks[i].area(), "Cannot increase buffer size when compacting!" );
1088
1089
4.77M
    bufs[i].Size::operator=( area.blocks[i].size() );
1090
4.77M
    bufs[i].stride = bufs[i].width;
1091
4.77M
  }
1092
2.00M
}
1093
1094
void PelStorage::takeOwnership( PelStorage& other )
1095
0
{
1096
0
  chromaFormat = other.chromaFormat;
1097
1098
0
  const uint32_t numCh = getNumberValidComponents( chromaFormat );
1099
1100
0
  bufs.resize(numCh);
1101
1102
0
  for( uint32_t i = 0; i < numCh; i++ )
1103
0
  {
1104
0
    PelBuf cPelBuf = other.get( ComponentID( i ) );
1105
0
    bufs[i] = PelBuf( cPelBuf.bufAt( 0, 0 ), cPelBuf.stride, cPelBuf.width, cPelBuf.height );
1106
0
    std::swap( m_origin[i], other.m_origin[i]);
1107
0
  }
1108
1109
0
  m_maxArea = other.m_maxArea;
1110
1111
0
  other.destroy();
1112
0
}
1113
1114
1115
void PelStorage::swap( PelStorage& other )
1116
0
{
1117
0
  const uint32_t numCh = getNumberValidComponents( chromaFormat );
1118
1119
0
  for( uint32_t i = 0; i < numCh; i++ )
1120
0
  {
1121
    // check this otherwise it would turn out to get very weird
1122
0
    CHECK( chromaFormat                   != other.chromaFormat                  , "Incompatible formats" );
1123
0
    CHECK( get( ComponentID( i ) )        != other.get( ComponentID( i ) )       , "Incompatible formats" );
1124
0
    CHECK( get( ComponentID( i ) ).stride != other.get( ComponentID( i ) ).stride, "Incompatible formats" );
1125
1126
0
    std::swap( bufs[i].buf,    other.bufs[i].buf );
1127
0
    std::swap( bufs[i].stride, other.bufs[i].stride );
1128
0
    std::swap( m_origin[i],    other.m_origin[i] );
1129
0
  }
1130
0
}
1131
1132
void PelStorage::destroy()
1133
7.08M
{
1134
7.08M
  chromaFormat = NUM_CHROMA_FORMAT;
1135
28.3M
  for( uint32_t i = 0; i < MAX_NUM_COMP; i++ )
1136
21.2M
  {
1137
21.2M
    if( m_origin[i] )
1138
3.63M
    {
1139
3.63M
      xFree( m_origin[i] );
1140
3.63M
      m_origin[i] = nullptr;
1141
3.63M
    }
1142
21.2M
  }
1143
7.08M
  bufs.clear();
1144
7.08M
}
1145
1146
PelBuf PelStorage::getBuf( const ComponentID CompID )
1147
14.2k
{
1148
14.2k
  return bufs[CompID];
1149
14.2k
}
1150
1151
const CPelBuf PelStorage::getBuf( const ComponentID CompID ) const
1152
0
{
1153
0
  return bufs[CompID];
1154
0
}
1155
1156
PelBuf PelStorage::getBuf( const CompArea& blk )
1157
18.8M
{
1158
18.8M
  const PelBuf& r = bufs[blk.compID];
1159
18.8M
  return PelBuf( r.buf + rsAddr( blk, r.stride ), r.stride, blk );
1160
18.8M
}
1161
1162
const CPelBuf PelStorage::getBuf( const CompArea& blk ) const
1163
24.5k
{
1164
24.5k
  const PelBuf& r = bufs[blk.compID];
1165
24.5k
  return CPelBuf( r.buf + rsAddr( blk, r.stride ), r.stride, blk );
1166
24.5k
}
1167
1168
PelUnitBuf PelStorage::getBuf( const UnitArea& unit )
1169
3.33k
{
1170
3.33k
  return ( chromaFormat == CHROMA_400 ) ? PelUnitBuf( chromaFormat, getBuf( unit.Y() ) ) : PelUnitBuf( chromaFormat, getBuf( unit.Y() ), getBuf( unit.Cb() ), getBuf( unit.Cr() ) );
1171
3.33k
}
1172
1173
const CPelUnitBuf PelStorage::getBuf( const UnitArea& unit ) const
1174
0
{
1175
0
  return ( chromaFormat == CHROMA_400 ) ? CPelUnitBuf( chromaFormat, getBuf( unit.Y() ) ) : CPelUnitBuf( chromaFormat, getBuf( unit.Y() ), getBuf( unit.Cb() ), getBuf( unit.Cr() ) );
1176
0
}
1177
1178
PelUnitBuf PelStorage::getBuf(const int strY, const int strCb, const int strCr, const UnitArea& unit)
1179
0
{
1180
0
  CHECKD( unit.Y().width > bufs[COMP_Y].width && unit.Y().height > bufs[COMP_Y].height, "unsuported request" );
1181
0
  CHECKD( strY > bufs[COMP_Y].stride, "unsuported request" );
1182
0
  CHECKD( strCb > bufs[COMP_Cb].stride, "unsuported request" );
1183
0
  CHECKD( strCr > bufs[COMP_Cr].stride, "unsuported request" );
1184
0
  return (chromaFormat == CHROMA_400) ? PelUnitBuf(chromaFormat, PelBuf( bufs[COMP_Y].buf, strY, unit.Y())) : PelUnitBuf(chromaFormat, PelBuf( bufs[COMP_Y].buf, strY, unit.Y()), PelBuf( bufs[COMP_Cb].buf, strCb, unit.Cb()), PelBuf( bufs[COMP_Cr].buf, strCr, unit.Cr()));
1185
0
}
1186
1187
const CPelUnitBuf PelStorage::getBuf(const int strY, const int strCb, const int strCr, const UnitArea& unit) const
1188
0
{
1189
0
  CHECKD( unit.Y().width > bufs[COMP_Y].width && unit.Y().height > bufs[COMP_Y].height, "unsuported request" );
1190
0
  CHECKD( strY > bufs[COMP_Y].stride, "unsuported request" );
1191
0
  CHECKD( strCb > bufs[COMP_Cb].stride, "unsuported request" );
1192
0
  CHECKD( strCr > bufs[COMP_Cr].stride, "unsuported request" );
1193
0
  return (chromaFormat == CHROMA_400) ? CPelUnitBuf(chromaFormat, CPelBuf( bufs[COMP_Y].buf, strY, unit.Y())) : CPelUnitBuf(chromaFormat, CPelBuf( bufs[COMP_Y].buf, strY, unit.Y()), CPelBuf( bufs[COMP_Cb].buf, strCb, unit.Cb()), CPelBuf( bufs[COMP_Cr].buf, strCr, unit.Cr()));
1194
0
}
1195
1196
PelUnitBuf PelStorage::getBufPart(const UnitArea& unit)
1197
0
{
1198
0
  CHECKD( unit.Y().width > bufs[COMP_Y].width && unit.Y().height > bufs[COMP_Y].height, "unsuported request" );
1199
0
  return (chromaFormat == CHROMA_400) ? PelUnitBuf(chromaFormat, PelBuf( bufs[COMP_Y].buf, bufs[COMP_Y].stride, unit.Y())) : PelUnitBuf(chromaFormat, PelBuf( bufs[COMP_Y].buf, bufs[COMP_Y].stride, unit.Y()), PelBuf( bufs[COMP_Cb].buf, bufs[COMP_Cb].stride, unit.Cb()), PelBuf( bufs[COMP_Cr].buf, bufs[COMP_Cr].stride, unit.Cr()));
1200
0
}
1201
1202
const CPelUnitBuf PelStorage::getBufPart(const UnitArea& unit) const
1203
0
{
1204
0
  CHECKD(unit.Y().width > bufs[COMP_Y].width && unit.Y().height > bufs[COMP_Y].height, "unsuported request");
1205
0
  return (chromaFormat == CHROMA_400) ? CPelUnitBuf(chromaFormat, CPelBuf(bufs[COMP_Y].buf, unit.Y().width, unit.Y())) : CPelUnitBuf(chromaFormat, CPelBuf(bufs[COMP_Y].buf, unit.Y().width, unit.Y()), CPelBuf(bufs[COMP_Cb].buf, unit.Cb().width, unit.Cb()), CPelBuf(bufs[COMP_Cr].buf, unit.Cr().width, unit.Cr()));
1206
0
}
1207
1208
const CPelUnitBuf PelStorage::getCompactBuf(const UnitArea& unit) const
1209
0
{
1210
0
  CHECKD( unit.Y().width > bufs[COMP_Y].width && unit.Y().height > bufs[COMP_Y].height, "unsuported request" );
1211
1212
0
  PelUnitBuf ret;
1213
0
  ret.chromaFormat = chromaFormat;
1214
0
  ret.bufs.resize_noinit( chromaFormat == CHROMA_400 ? 1 : 3 );
1215
  
1216
0
  ret.Y   ().buf = bufs[COMP_Y ].buf; ret.Y ().width = ret.Y ().stride = unit.Y ().width; ret.Y ().height = unit.Y ().height;
1217
0
  if( chromaFormat != CHROMA_400 )
1218
0
  {
1219
0
    ret.Cb().buf = bufs[COMP_Cb].buf; ret.Cb().width = ret.Cb().stride = unit.Cb().width; ret.Cb().height = unit.Cb().height;
1220
0
    ret.Cr().buf = bufs[COMP_Cr].buf; ret.Cr().width = ret.Cr().stride = unit.Cr().width; ret.Cr().height = unit.Cr().height;
1221
0
  }
1222
1223
0
  return ret;
1224
0
}
1225
1226
PelUnitBuf PelStorage::getCompactBuf(const UnitArea& unit)
1227
128k
{
1228
128k
  CHECKD( unit.Y().width > bufs[COMP_Y].width && unit.Y().height > bufs[COMP_Y].height, "unsuported request" );
1229
1230
128k
  PelUnitBuf ret;
1231
128k
  ret.chromaFormat = chromaFormat;
1232
128k
  ret.bufs.resize_noinit( chromaFormat == CHROMA_400 ? 1 : 3 );
1233
1234
128k
  ret.Y   ().buf = bufs[COMP_Y ].buf; ret.Y ().width = ret.Y ().stride = unit.Y ().width; ret.Y ().height = unit.Y ().height;
1235
128k
  if( chromaFormat != CHROMA_400 )
1236
128k
  {
1237
128k
    ret.Cb().buf = bufs[COMP_Cb].buf; ret.Cb().width = ret.Cb().stride = unit.Cb().width; ret.Cb().height = unit.Cb().height;
1238
128k
    ret.Cr().buf = bufs[COMP_Cr].buf; ret.Cr().width = ret.Cr().stride = unit.Cr().width; ret.Cr().height = unit.Cr().height;
1239
128k
  }
1240
1241
128k
  return ret;
1242
128k
}
1243
1244
const CPelBuf PelStorage::getCompactBuf(const CompArea& carea) const
1245
0
{
1246
0
  return CPelBuf( bufs[carea.compID].buf, carea.width, carea);
1247
0
}
1248
1249
PelBuf PelStorage::getCompactBuf(const CompArea& carea)
1250
0
{
1251
0
  return PelBuf( bufs[carea.compID].buf, carea.width, carea);
1252
0
}
1253
1254
void downsampleYuv(PelBuf& dest, const vvencYUVPlane& yuvPlaneIn, int downsampleStep)
1255
0
{
1256
0
  const int widthd = dest.width;
1257
0
  const int heightd = dest.height;
1258
0
  int difStride = dest.stride - dest.width;
1259
1260
0
  const int16_t* src = yuvPlaneIn.ptr;
1261
0
  const int instride = yuvPlaneIn.stride;
1262
0
  const int width = yuvPlaneIn.width;
1263
0
  int n = 0;
1264
0
  for (int j = 0; j < heightd; j++)
1265
0
  {
1266
0
    int i = 0;
1267
0
    for (i = 0; i < widthd; i++)
1268
0
    {
1269
0
      long int b = 0;
1270
0
      for (int r = 0; r < downsampleStep; r++)
1271
0
      {
1272
0
        int posr = width * r;
1273
0
        for (int n = 0; n < downsampleStep; n++)
1274
0
        {
1275
0
          b += src[posr + n];
1276
0
        }
1277
0
      }
1278
0
      src += downsampleStep;
1279
0
      dest.buf[n] = (int16_t)((b + 2) / (downsampleStep << 1));
1280
0
      n++;
1281
0
    }
1282
0
    n += difStride;
1283
0
    src = src - downsampleStep * i + width;
1284
1285
0
    src += (instride * (downsampleStep - 1));
1286
0
  }
1287
0
}
1288
1289
void copyPadToPelUnitBuf( PelUnitBuf pelUnitBuf, const vvencYUVBuffer& yuvBuffer, const ChromaFormat& chFmt )
1290
1.08k
{
1291
1.08k
  CHECK( pelUnitBuf.bufs.size() == 0, "pelUnitBuf not initialized" );
1292
1.08k
  pelUnitBuf.chromaFormat = chFmt;
1293
1.08k
  const int numComp = getNumberValidComponents( chFmt );
1294
4.34k
  for ( int i = 0; i < numComp; i++ )
1295
3.25k
  {
1296
3.25k
    const vvencYUVPlane& src = yuvBuffer.planes[ i ];
1297
3.25k
    CHECK( src.ptr == nullptr, "yuvBuffer not setup" );
1298
3.25k
    PelBuf& dest = pelUnitBuf.bufs[i];
1299
3.25k
    CHECK( dest.buf == nullptr, "yuvBuffer not setup" );
1300
1301
3.25k
    if (dest.width < src.width)
1302
0
    {
1303
0
      downsampleYuv(dest, src, 2);
1304
0
    }
1305
3.25k
    else
1306
3.25k
    {
1307
329k
      for (int y = 0; y < src.height; y++)
1308
326k
      {
1309
326k
        ::memcpy(dest.buf + y * dest.stride, src.ptr + y * src.stride, src.width * sizeof(int16_t));
1310
1311
        // pad right if required
1312
326k
        for (int x = src.width; x < dest.width; x++)
1313
0
        {
1314
0
          dest.buf[x + y * dest.stride] = dest.buf[src.width - 1 + y * dest.stride];
1315
0
        }
1316
326k
      }
1317
1318
      // pad bottom if required
1319
3.25k
      for (int y = src.height; y < dest.height; y++)
1320
0
      {
1321
0
        ::memcpy(dest.buf + y * dest.stride, dest.buf + (src.height - 1) * dest.stride, dest.width * sizeof(int16_t));
1322
0
      }
1323
3.25k
    }
1324
3.25k
  }
1325
1.08k
}
1326
1327
/*
1328
void setupPelUnitBuf( const YUVBuffer& yuvBuffer, PelUnitBuf& pelUnitBuf, const ChromaFormat& chFmt )
1329
{
1330
  CHECK( pelUnitBuf.bufs.size() != 0, "pelUnitBuf already in use" );
1331
  pelUnitBuf.chromaFormat = chFmt;
1332
  const int numComp = getNumberValidComponents( chFmt );
1333
  for ( int i = 0; i < numComp; i++ )
1334
  {
1335
    const YUVBuffer::Plane& yuvPlane = yuvBuffer.planes[ i ];
1336
    CHECK( yuvPlane.ptr == nullptr, "yuvBuffer not setup" );
1337
    PelBuf area( yuvPlane.ptr, yuvPlane.stride, yuvPlane.width, yuvPlane.height );
1338
    pelUnitBuf.bufs.push_back( area );
1339
  }
1340
}
1341
*/
1342
void setupYuvBuffer ( const PelUnitBuf& pelUnitBuf, vvencYUVBuffer& yuvBuffer, const Window* confWindow )
1343
0
{
1344
0
  const ChromaFormat chFmt = pelUnitBuf.chromaFormat;
1345
0
  const int numComp        = getNumberValidComponents( chFmt );
1346
0
  for ( int i = 0; i < numComp; i++ )
1347
0
  {
1348
0
    const ComponentID compId = ComponentID( i );
1349
0
          PelBuf area        = pelUnitBuf.get( compId );
1350
0
    const int sx             = getComponentScaleX( compId, chFmt );
1351
0
    const int sy             = getComponentScaleY( compId, chFmt );
1352
0
    vvencYUVPlane& yuvPlane = yuvBuffer.planes[ i ];
1353
0
    CHECK( yuvPlane.ptr != nullptr, "yuvBuffer already in use" );
1354
0
    yuvPlane.ptr             = area.bufAt( confWindow->winLeftOffset >> sx, confWindow->winTopOffset >> sy );
1355
0
    yuvPlane.width           = ( ( area.width  << sx ) - ( confWindow->winLeftOffset + confWindow->winRightOffset  ) ) >> sx;
1356
0
    yuvPlane.height          = ( ( area.height << sy ) - ( confWindow->winTopOffset  + confWindow->winBottomOffset ) ) >> sy;
1357
0
    yuvPlane.stride          = area.stride;
1358
0
  }
1359
0
}
1360
1361
} // namespace vvenc
1362
1363
//! \}
1364