Coverage Report

Created: 2026-06-16 07:20

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/vvdec/source/Lib/CommonLib/Buffer.cpp
Line
Count
Source
1
/* -----------------------------------------------------------------------------
2
The copyright in this software is being made available under the Clear BSD
3
License, included below. No patent rights, trademark rights and/or 
4
other Intellectual Property Rights other than the copyrights concerning 
5
the Software are granted under this license.
6
7
The Clear BSD License
8
9
Copyright (c) 2018-2026, Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. & The VVdeC Authors.
10
All rights reserved.
11
12
Redistribution and use in source and binary forms, with or without modification,
13
are permitted (subject to the limitations in the disclaimer below) provided that
14
the following conditions are met:
15
16
     * Redistributions of source code must retain the above copyright notice,
17
     this list of conditions and the following disclaimer.
18
19
     * Redistributions in binary form must reproduce the above copyright
20
     notice, this list of conditions and the following disclaimer in the
21
     documentation and/or other materials provided with the distribution.
22
23
     * Neither the name of the copyright holder nor the names of its
24
     contributors may be used to endorse or promote products derived from this
25
     software without specific prior written permission.
26
27
NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
28
THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
29
CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
31
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
32
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
33
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
34
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
35
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
36
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38
POSSIBILITY OF SUCH DAMAGE.
39
40
41
------------------------------------------------------------------------------------------- */
42
43
/** \file     Buffer.cpp
44
 *  \brief    Low-overhead class describing 2D memory layout
45
 */
46
47
#define DONT_UNDEF_SIZE_AWARE_PER_EL_OP
48
49
// unit needs to come first due to a forward declaration
50
51
#include "Unit.h"
52
#include "Buffer.h"
53
#include "InterpolationFilter.h"
54
#include "Picture.h"
55
#include "Slice.h"
56
57
#include <memory>
58
59
#if ENABLE_SIMD_OPT_BUFFER && defined( TARGET_SIMD_X86 )
60
#include "CommonDefX86.h"
61
#include <simde/x86/sse.h>
62
#endif
63
64
namespace vvdec
65
{
66
67
template< typename T >
68
void addAvgCore( const T* src1, ptrdiff_t src1Stride, const T* src2, ptrdiff_t src2Stride, T* dest, ptrdiff_t dstStride, int width, int height, int rshift, int offset, const ClpRng& clpRng )
69
0
{
70
0
#define ADD_AVG_CORE_OP( ADDR ) dest[ADDR] = ClipPel( rightShift( ( src1[ADDR] + src2[ADDR] + offset ), rshift ), clpRng )
71
0
#define ADD_AVG_CORE_INC    \
72
0
  src1 += src1Stride;       \
73
0
  src2 += src2Stride;       \
74
0
  dest +=  dstStride;       \
75
0
76
0
  SIZE_AWARE_PER_EL_OP( ADD_AVG_CORE_OP, ADD_AVG_CORE_INC );
77
78
0
#undef ADD_AVG_CORE_OP
79
0
#undef ADD_AVG_CORE_INC
80
0
}
81
82
template<typename T>
83
void reconstructCore( const T* src1, ptrdiff_t src1Stride, const T* src2, ptrdiff_t src2Stride, T* dest, ptrdiff_t dstStride, int width, int height, const ClpRng& clpRng )
84
0
{
85
0
#define RECO_CORE_OP( ADDR ) dest[ADDR] = ClipPel( src1[ADDR] + src2[ADDR], clpRng )
86
0
#define RECO_CORE_INC     \
87
0
  src1 += src1Stride;     \
88
0
  src2 += src2Stride;     \
89
0
  dest +=  dstStride;     \
90
0
91
0
  SIZE_AWARE_PER_EL_OP( RECO_CORE_OP, RECO_CORE_INC );
92
93
0
#undef RECO_CORE_OP
94
0
#undef RECO_CORE_INC
95
0
}
96
97
98
template<typename T>
99
void linTfCore( const T* src, ptrdiff_t srcStride, Pel *dst, ptrdiff_t dstStride, int width, int height, int scale, int shift, int offset, const ClpRng& clpRng, bool bClip )
100
0
{
101
0
#define LINTF_CORE_OP( ADDR ) dst[ADDR] = ( Pel ) bClip ? ClipPel( rightShift( scale * src[ADDR], shift ) + offset, clpRng ) : ( rightShift( scale * src[ADDR], shift ) + offset )
102
0
#define LINTF_CORE_INC  \
103
0
  src += srcStride;     \
104
0
  dst += dstStride;     \
105
0
106
0
  SIZE_AWARE_PER_EL_OP( LINTF_CORE_OP, LINTF_CORE_INC );
107
108
0
#undef LINTF_CORE_OP
109
0
#undef LINTF_CORE_INC
110
0
}
111
112
template<typename T>
113
void transpose4x4Core( const Pel* src, ptrdiff_t srcStride, Pel* dst, ptrdiff_t dstStride )
114
0
{
115
0
  for( int i = 0; i < 4; i++ )
116
0
  {
117
0
    for( int j = 0; j < 4; j++ )
118
0
    {
119
0
      dst[j * dstStride] = src[j];
120
0
    }
121
122
0
    dst++;
123
0
    src += srcStride;
124
0
  }
125
0
}
126
127
template<typename T>
128
void transpose8x8Core( const Pel* src, ptrdiff_t srcStride, Pel* dst, ptrdiff_t dstStride )
129
0
{
130
0
  for( int i = 0; i < 8; i++ )
131
0
  {
132
0
    for( int j = 0; j < 8; j++ )
133
0
    {
134
0
      dst[j * dstStride] = src[j];
135
0
    }
136
137
0
    dst++;
138
0
    src += srcStride;
139
0
  }
140
0
}
141
142
template<typename T>
143
void copyClipCore( const T* src, ptrdiff_t srcStride, Pel *dst, ptrdiff_t dstStride, int width, int height, const ClpRng& clpRng )
144
{
145
#define RECO_OP( ADDR ) dst[ADDR] = ClipPel( src[ADDR], clpRng )
146
#define RECO_INC      \
147
    src += srcStride; \
148
    dst += dstStride; \
149
150
  SIZE_AWARE_PER_EL_OP( RECO_OP, RECO_INC );
151
152
#undef RECO_OP
153
#undef RECO_INC
154
}
155
156
template<typename T>
157
void addWeightedAvgCore( const T* src1, ptrdiff_t src1Stride, const T* src2, ptrdiff_t src2Stride, T* dest, ptrdiff_t destStride, int width, int height, int rshift, int offset, int w0, int w1, const ClpRng& clpRng )
158
0
{
159
0
#define ADD_WGHT_AVG_OP( ADDR ) dest[ADDR] = ClipPel( rightShift( ( src1[ADDR]*w0 + src2[ADDR]*w1 + offset ), rshift ), clpRng )
160
0
#define ADD_WGHT_AVG_INC     \
161
0
    src1 += src1Stride; \
162
0
    src2 += src2Stride; \
163
0
    dest += destStride; \
164
0
165
0
  SIZE_AWARE_PER_EL_OP( ADD_WGHT_AVG_OP, ADD_WGHT_AVG_INC );
166
167
0
#undef ADD_WGHT_AVG_OP
168
0
#undef ADD_WGHT_AVG_INC
169
0
}
170
171
void copyBufferCore( const char *src, ptrdiff_t srcStride, char *dst, ptrdiff_t dstStride, int width, int height )
172
0
{
173
0
#if ENABLE_SIMD_OPT_BUFFER && defined( TARGET_SIMD_X86 )
174
0
  _mm_prefetch( (const char *) ( src ),             _MM_HINT_T0 );
175
0
  _mm_prefetch( (const char *) ( src + srcStride ), _MM_HINT_T0 );
176
0
  _mm_prefetch( (const char *) ( dst ),             _MM_HINT_T0 );
177
0
  _mm_prefetch( (const char *) ( dst + dstStride ), _MM_HINT_T0 );
178
179
0
#endif
180
0
  if( width == srcStride && width == dstStride )
181
0
  {
182
0
    memcpy( dst, src, width * height );
183
0
    return;
184
0
  }
185
186
0
  for( int i = 0; i < height; i++ )
187
0
  {
188
0
#if ENABLE_SIMD_OPT_BUFFER && defined( TARGET_SIMD_X86 )
189
0
    _mm_prefetch( (const char *) ( src + srcStride ), _MM_HINT_T0 );
190
0
    _mm_prefetch( (const char *) ( dst + dstStride ), _MM_HINT_T0 );
191
192
0
#endif
193
0
    memcpy( dst, src, width );
194
195
0
    src += srcStride;
196
0
    dst += dstStride;
197
0
  }
198
0
}
199
200
void applyLutCore( Pel* ptr, ptrdiff_t ptrStride, int width, int height, const Pel* lut )
201
0
{
202
  //    const auto rsp_sgnl_op  = [=, &dst]( int ADDR ){ dst[ADDR] = lut[dst[ADDR]]; };
203
  //    const auto rsp_sgnl_inc = [=, &dst]            { dst += stride;              };
204
205
  //    size_aware_pel_op( rsp_sgnl_op, rsp_sgnl_inc, width, height );
206
207
0
    #define RSP_SGNL_OP( ADDR ) ptr[ADDR] = lut[ptr[ADDR]]
208
0
    #define RSP_SGNL_INC        ptr      += ptrStride;
209
210
0
      SIZE_AWARE_PER_EL_OP( RSP_SGNL_OP, RSP_SGNL_INC )
211
212
0
    #undef RSP_SGNL_OP
213
0
    #undef RSP_SGNL_INC
214
    
215
0
    return;
216
0
}
217
218
void fillN_CuCore( CodingUnit** ptr, ptrdiff_t ptrStride, int width, int height, CodingUnit* cuPtr )
219
0
{
220
0
  if( width == ptrStride )
221
0
  {
222
0
    std::fill_n( ptr, width * height, cuPtr );
223
0
  }
224
0
  else
225
0
  {
226
0
    CodingUnit** dst = ptr;
227
228
0
    for( int y = 0; y < height; y++, dst += ptrStride )
229
0
    {
230
0
      std::fill_n( dst, width, cuPtr );
231
0
    }
232
0
  }
233
0
}
234
235
void sampleRateConvCore( const std::pair<int, int> scalingRatio, const std::pair<int, int> compScale,
236
                         const Pel* orgSrc, const ptrdiff_t orgStride, const int orgWidth, const int orgHeight,
237
                         const int beforeScaleLeftOffset, const int beforeScaleTopOffset,
238
                         Pel* scaledSrc, const ptrdiff_t scaledStride, const int scaledWidth, const int scaledHeight,
239
                         const int afterScaleLeftOffset, const int afterScaleTopOffset,
240
                         const int bitDepth, const bool useLumaFilter,
241
                         const bool horCollocatedPositionFlag, const bool verCollocatedPositionFlag )
242
0
{
243
0
  if( orgWidth == scaledWidth && orgHeight == scaledHeight && scalingRatio == SCALE_1X && !beforeScaleLeftOffset && !beforeScaleTopOffset && !afterScaleLeftOffset && !afterScaleTopOffset )
244
0
  {
245
0
    g_pelBufOP.copyBuffer( ( const char * ) orgSrc, orgStride * sizeof( Pel ), ( char* ) scaledSrc, scaledStride * sizeof( Pel ), orgWidth * sizeof( Pel ), orgHeight );
246
247
0
    return;
248
0
  }
249
250
0
  const TFilterCoeff* filterHor = useLumaFilter ? &InterpolationFilter::m_lumaFilter[0][0] : &InterpolationFilter::m_chromaFilter[0][0];
251
0
  const TFilterCoeff* filterVer = useLumaFilter ? &InterpolationFilter::m_lumaFilter[0][0] : &InterpolationFilter::m_chromaFilter[0][0];
252
253
0
  const int numFracPositions  = useLumaFilter ? 15 : 31;
254
0
  const int numFracShift      = useLumaFilter ? 4 : 5;
255
0
  const int posShiftX         = SCALE_RATIO_BITS - numFracShift + compScale.first;
256
0
  const int posShiftY         = SCALE_RATIO_BITS - numFracShift + compScale.second;
257
0
  int addX  = (1 << (posShiftX - 1)) + (beforeScaleLeftOffset << SCALE_RATIO_BITS) + ((int( 1 - horCollocatedPositionFlag ) * 8 * (scalingRatio.first - SCALE_1X.first) + (1 << (2 + compScale.first))) >> (3 + compScale.first));
258
0
  int addY  = (1 << (posShiftY - 1)) + (beforeScaleTopOffset << SCALE_RATIO_BITS) + ((int( 1 - verCollocatedPositionFlag ) * 8 * (scalingRatio.second - SCALE_1X.second) + (1 << (2 + compScale.second))) >> (3 + compScale.second));
259
260
0
  const int filterLength = useLumaFilter ? NTAPS_LUMA : NTAPS_CHROMA;
261
0
  const int log2Norm     = 12;
262
263
0
  CHECK( bitDepth > 17, "Overflow may happen!" );
264
265
0
  const int maxVal = (1 << bitDepth) - 1;
266
0
  std::unique_ptr<int[]> buf( new int[orgHeight * scaledWidth] );
267
268
0
  for( int i = 0; i < scaledWidth; i++ )
269
0
  {
270
0
    const Pel* org = orgSrc;
271
0
    int refPos = (((i << compScale.first) - afterScaleLeftOffset) * scalingRatio.first + addX) >> posShiftX;
272
0
    int integer = refPos >> numFracShift;
273
0
    int frac = refPos & numFracPositions;
274
0
    int* tmp = buf.get() + i;
275
276
0
    for( int j = 0; j < orgHeight; j++ )
277
0
    {
278
0
      int sum = 0;
279
0
      const TFilterCoeff* f = filterHor + frac * filterLength;
280
281
0
      for( int k = 0; k < filterLength; k++ )
282
0
      {
283
0
        int xInt = std::min<int>( std::max( 0, integer + k - filterLength / 2 + 1 ), orgWidth - 1 );
284
0
        sum += f[k] * org[xInt]; // postpone horizontal filtering gain removal after vertical filtering
285
0
      }
286
287
0
      *tmp = sum;
288
289
0
      tmp += scaledWidth;
290
0
      org += orgStride;
291
0
    }
292
0
  }
293
294
0
  Pel* dst = scaledSrc;
295
296
0
  for( int j = 0; j < scaledHeight; j++ )
297
0
  {
298
0
    int refPos = (((j << compScale.second) - afterScaleTopOffset) * scalingRatio.second + addY) >> posShiftY;
299
0
    int integer = refPos >> numFracShift;
300
0
    int frac = refPos & numFracPositions;
301
302
0
    for( int i = 0; i < scaledWidth; i++ )
303
0
    {
304
0
      int sum = 0;
305
0
      int* tmp = buf.get() + i;
306
0
      const TFilterCoeff* f = filterVer + frac * filterLength;
307
308
0
      for( int k = 0; k < filterLength; k++ )
309
0
      {
310
0
        int yInt = std::min<int>( std::max( 0, integer + k - filterLength / 2 + 1 ), orgHeight - 1 );
311
0
        sum += f[k] * tmp[yInt * scaledWidth];
312
0
      }
313
314
0
      dst[i] = std::min<int>( std::max( 0, (sum + (1 << (log2Norm - 1))) >> log2Norm ), maxVal );
315
0
    }
316
317
0
    dst += scaledStride;
318
0
  }
319
0
}
320
321
void rspFwdCore( Pel* ptr, ptrdiff_t ptrStride, int width, int height, const int bd, const Pel OrgCW, const Pel* LmcsPivot, const Pel* ScaleCoeff, const Pel* InputPivot )
322
0
{
323
0
  int idxY;
324
0
  int shift = getLog2( OrgCW );
325
326
  //    const auto rsp_sgnl_op  = [=, &dst]( int ADDR ){ idxY = ( dst[ADDR] >> shift ); dst[ADDR] = static_cast<Pel>( ClipBD<int>( LmcsPivot[idxY] + ( ( ScaleCoeff[idxY] * ( dst[ADDR] - InputPivot[idxY] ) + ( 1 << 10 ) ) >> 11 ), bd ) ); };
327
  //    const auto rsp_sgnl_inc = [=, &dst]            { dst += stride; };
328
329
  //    size_aware_pel_op( rsp_sgnl_op, rsp_sgnl_inc, width, height );
330
331
0
#define RSP_FWD_OP( ADDR ) { idxY = ( ptr[ADDR] >> shift ); ptr[ADDR] = static_cast<Pel>( ClipBD<int>( LmcsPivot[idxY] + ( ( ScaleCoeff[idxY] * ( ptr[ADDR] - InputPivot[idxY] ) + ( 1 << 10 ) ) >> 11 ), bd ) ); }
332
0
#define RSP_FWD_INC        ptr      += ptrStride;
333
334
0
  SIZE_AWARE_PER_EL_OP( RSP_FWD_OP, RSP_FWD_INC )
335
336
0
#undef RSP_FWD_OP
337
0
#undef RSP_FWD_INC
338
0
}
339
340
PelBufferOps::PelBufferOps()
341
254
{
342
254
  addAvg4  = addAvgCore<Pel>;
343
254
  addAvg8  = addAvgCore<Pel>;
344
254
  addAvg16 = addAvgCore<Pel>;
345
346
254
  reco4 = reconstructCore<Pel>;
347
254
  reco8 = reconstructCore<Pel>;
348
349
254
  linTf4 = linTfCore<Pel>;
350
254
  linTf8 = linTfCore<Pel>;
351
352
254
  wghtAvg4 = addWeightedAvgCore<Pel>;
353
254
  wghtAvg8 = addWeightedAvgCore<Pel>;
354
355
254
  copyBuffer = copyBufferCore;
356
357
254
  transpose4x4 = transpose4x4Core<Pel>;
358
254
  transpose8x8 = transpose8x8Core<Pel>;
359
360
254
  applyLut = applyLutCore;
361
254
  rspFwd   = rspFwdCore;
362
254
  rspBcw   = nullptr;
363
364
254
  fillN_CU = fillN_CuCore;
365
366
254
  sampleRateConv = sampleRateConvCore;
367
254
}
368
369
PelBufferOps g_pelBufOP = PelBufferOps();
370
371
template<>
372
void AreaBuf<Pel>::addWeightedAvg(const AreaBuf<const Pel> &other1, const AreaBuf<const Pel> &other2, const ClpRng& clpRng, const int8_t bcwIdx)
373
0
{
374
0
  const int8_t w0 = getBcwWeight(bcwIdx, REF_PIC_LIST_0);
375
0
  const int8_t w1 = getBcwWeight(bcwIdx, REF_PIC_LIST_1);
376
0
  const int8_t log2WeightBase = g_BcwLog2WeightBase;
377
0
  const Pel* src0 = other1.buf;
378
0
  const Pel* src2 = other2.buf;
379
0
  Pel* dest = buf;
380
381
0
  const ptrdiff_t src1Stride = other1.stride;
382
0
  const ptrdiff_t src2Stride = other2.stride;
383
0
  const ptrdiff_t destStride = stride;
384
0
  const int clipbd    = clpRng.bd;
385
0
  const int shiftNum  = std::max<int>( 2, ( IF_INTERNAL_PREC - clipbd ) ) + log2WeightBase;
386
0
  const int offset    = ( 1 << ( shiftNum - 1 ) ) + ( IF_INTERNAL_OFFS << log2WeightBase );
387
388
0
  if( ( width & 7 ) == 0 )
389
0
  {
390
0
    g_pelBufOP.wghtAvg8( src0, src1Stride, src2, src2Stride, dest, destStride, width, height, shiftNum, offset, w0, w1, clpRng );
391
0
  }
392
0
  else if( ( width & 3 ) == 0 )
393
0
  {
394
0
    g_pelBufOP.wghtAvg4( src0, src1Stride, src2, src2Stride, dest, destStride, width, height, shiftNum, offset, w0, w1, clpRng );
395
0
  }
396
0
  else
397
0
  {
398
0
#define ADD_AVG_OP( ADDR ) dest[ADDR] = ClipPel( rightShift( ( src0[ADDR]*w0 + src2[ADDR]*w1 + offset ), shiftNum ), clpRng )
399
0
#define ADD_AVG_INC     \
400
0
    src0 += src1Stride; \
401
0
    src2 += src2Stride; \
402
0
    dest += destStride; \
403
0
404
0
    SIZE_AWARE_PER_EL_OP( ADD_AVG_OP, ADD_AVG_INC );
405
406
0
#undef ADD_AVG_OP
407
0
#undef ADD_AVG_INC
408
0
  }
409
0
}
410
411
template<>
412
void AreaBuf<Pel>::scaleSignal(const int scale, const ClpRng& clpRng)
413
0
{
414
0
  Pel* dst = buf;
415
0
  Pel* src = buf;
416
0
  int sign, absval;
417
0
  int maxAbsclipBD = ( 1 << clpRng.bd ) - 1;
418
419
0
  for (unsigned y = 0; y < height; y++)
420
0
  {
421
0
    for (unsigned x = 0; x < width; x++)
422
0
    {
423
0
      src[x] = Clip3<Pel>( -maxAbsclipBD - 1, maxAbsclipBD, src[x] );
424
0
      sign   = src[x] >= 0 ? 1 : -1;
425
0
      absval = sign * src[x];
426
427
0
      int val = sign * ((absval * scale + (1 << (CSCALE_FP_PREC - 1))) >> CSCALE_FP_PREC);
428
429
0
      if( sizeof( Pel ) == 2 ) // avoid overflow when storing data
430
0
      {
431
0
          val = Clip3<int>(-32768, 32767, val);
432
0
      }
433
0
      dst[x] = (Pel)val;
434
0
    }
435
0
    dst += stride;
436
0
    src += stride;
437
0
  }
438
0
}
439
440
template<>
441
void AreaBuf<Pel>::addAvg( const AreaBuf<const Pel> &other1, const AreaBuf<const Pel> &other2, const ClpRng& clpRng)
442
0
{
443
0
  const Pel* src0 = other1.buf;
444
0
  const Pel* src2 = other2.buf;
445
0
        Pel* dest =        buf;
446
447
0
  const ptrdiff_t src1Stride = other1.stride;
448
0
  const ptrdiff_t src2Stride = other2.stride;
449
0
  const ptrdiff_t destStride =        stride;
450
0
  const int       clipbd     = clpRng.bd;
451
0
  const int       shiftNum   = std::max<int>(2, (IF_INTERNAL_PREC - clipbd)) + 1;
452
0
  const int       offset     = (1 << (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS;
453
454
0
  if( ( width & 15 ) == 0 )
455
0
  {
456
0
    g_pelBufOP.addAvg16( src0, src1Stride, src2, src2Stride, dest, destStride, width, height, shiftNum, offset, clpRng );
457
0
  }
458
0
  else if( ( width & 7 ) == 0 )
459
0
  {
460
0
    g_pelBufOP.addAvg8( src0, src1Stride, src2, src2Stride, dest, destStride, width, height, shiftNum, offset, clpRng );
461
0
  }
462
0
  else if( ( width & 3 ) == 0 )
463
0
  {
464
0
    g_pelBufOP.addAvg4( src0, src1Stride, src2, src2Stride, dest, destStride, width, height, shiftNum, offset, clpRng );
465
0
  }
466
0
  else
467
0
  {
468
0
#define ADD_AVG_OP( ADDR ) dest[ADDR] = ClipPel( rightShift( ( src0[ADDR] + src2[ADDR] + offset ), shiftNum ), clpRng )
469
0
#define ADD_AVG_INC     \
470
0
    src0 += src1Stride; \
471
0
    src2 += src2Stride; \
472
0
    dest += destStride; \
473
0
474
0
    SIZE_AWARE_PER_EL_OP( ADD_AVG_OP, ADD_AVG_INC );
475
476
0
#undef ADD_AVG_OP
477
0
#undef ADD_AVG_INC
478
0
  }
479
0
}
480
481
template<>
482
void AreaBuf<Pel>::reconstruct( const AreaBuf<const Pel> &pred, const AreaBuf<const Pel> &resi, const ClpRng& clpRng )
483
43.2k
{
484
43.2k
  const Pel* src1 = pred.buf;
485
43.2k
  const Pel* src2 = resi.buf;
486
43.2k
        Pel* dest =      buf;
487
488
43.2k
  const ptrdiff_t src1Stride = pred.stride;
489
43.2k
  const ptrdiff_t src2Stride = resi.stride;
490
43.2k
  const ptrdiff_t destStride =      stride;
491
492
43.2k
  if( ( width & 7 ) == 0 )
493
39.1k
  {
494
39.1k
    g_pelBufOP.reco8( src1, src1Stride, src2, src2Stride, dest, destStride, width, height, clpRng );
495
39.1k
  }
496
4.05k
  else if( ( width & 3 ) == 0 )
497
3.59k
  {
498
3.59k
    g_pelBufOP.reco4( src1, src1Stride, src2, src2Stride, dest, destStride, width, height, clpRng );
499
3.59k
  }
500
460
  else
501
460
  {
502
15.9k
#define RECO_OP( ADDR ) dest[ADDR] = ClipPel( src1[ADDR] + src2[ADDR], clpRng )
503
460
#define RECO_INC        \
504
460
    src1 += src1Stride; \
505
460
    src2 += src2Stride; \
506
460
    dest += destStride; \
507
460
508
15.9k
    SIZE_AWARE_PER_EL_OP( RECO_OP, RECO_INC );
509
510
460
#undef RECO_OP
511
460
#undef RECO_INC
512
460
  }
513
43.2k
}
514
515
template<>
516
void AreaBuf<Pel>::linearTransform( const int scale, const int shift, const int offset, bool bClip, const ClpRng& clpRng )
517
12.4k
{
518
12.4k
  const Pel* src = buf;
519
12.4k
        Pel* dst = buf;
520
521
12.4k
  if( width == 1 )
522
0
  {
523
0
    THROW_FATAL( "Blocks of width = 1 not supported" );
524
0
  }
525
12.4k
  else if( ( width & 7 ) == 0 )
526
10.4k
  {
527
10.4k
    g_pelBufOP.linTf8( src, stride, dst, stride, width, height, scale, shift, offset, clpRng, bClip );
528
10.4k
  }
529
1.95k
  else if( ( width & 3 ) == 0 )
530
1.95k
  {
531
1.95k
    g_pelBufOP.linTf4( src, stride, dst, stride, width, height, scale, shift, offset, clpRng, bClip );
532
1.95k
  }
533
0
  else
534
0
  {
535
0
#define LINTF_OP( ADDR ) dst[ADDR] = ( Pel ) bClip ? ClipPel( rightShift( scale * src[ADDR], shift ) + offset, clpRng ) : ( rightShift( scale * src[ADDR], shift ) + offset )
536
0
#define LINTF_INC        \
537
0
    src += stride;       \
538
0
    dst += stride;       \
539
0
540
0
    SIZE_AWARE_PER_EL_OP( LINTF_OP, LINTF_INC );
541
542
0
#undef RECO_OP
543
0
#undef RECO_INC
544
0
  }
545
12.4k
}
546
547
#if ENABLE_SIMD_OPT_BUFFER && defined(TARGET_SIMD_X86)
548
template<>
549
void AreaBuf<Pel>::transposedFrom( const AreaBuf<const Pel> &other )
550
12.1k
{
551
12.1k
  CHECK( width != other.height || height != other.width, "Incompatible size" );
552
553
12.1k
  if( ( ( width | height ) & 7 ) == 0 )
554
10.1k
  {
555
10.1k
    const Pel* src = other.buf;
556
557
43.1k
    for( unsigned y = 0; y < other.height; y += 8 )
558
32.9k
    {
559
32.9k
      Pel* dst = buf + y;
560
561
157k
      for( unsigned x = 0; x < other.width; x += 8 )
562
124k
      {
563
124k
        g_pelBufOP.transpose8x8( &src[x], other.stride, dst, stride );
564
565
124k
        dst += 8 * stride;
566
124k
      }
567
568
32.9k
      src += 8 * other.stride;
569
32.9k
    }
570
10.1k
  }
571
2.00k
  else if( ( ( width | height ) & 3 ) == 0 )
572
1.72k
  {
573
1.72k
    const Pel* src = other.buf;
574
575
5.20k
    for( unsigned y = 0; y < other.height; y += 4 )
576
3.47k
    {
577
3.47k
      Pel* dst = buf + y;
578
579
9.90k
      for( unsigned x = 0; x < other.width; x += 4 )
580
6.42k
      {
581
6.42k
        g_pelBufOP.transpose4x4( &src[x], other.stride, dst, stride );
582
583
6.42k
        dst += 4 * stride;
584
6.42k
      }
585
586
3.47k
      src += 4 * other.stride;
587
3.47k
    }
588
1.72k
  }
589
284
  else
590
284
  {
591
284
          Pel* dst =       buf;
592
284
    const Pel* src = other.buf;
593
284
    width          = other.height;
594
284
    height         = other.width;
595
284
    stride         = stride < width ? width : stride;
596
597
5.64k
    for( unsigned y = 0; y < other.height; y++ )
598
5.36k
    {
599
14.6k
      for( unsigned x = 0; x < other.width; x++ )
600
9.24k
      {
601
9.24k
        dst[y + x*stride] = src[x + y * other.stride];
602
9.24k
      }
603
5.36k
    }
604
284
  }
605
12.1k
}
606
#endif
607
608
template<>
609
void AreaBuf<MotionInfo>::fill( const MotionInfo& val )
610
23.3k
{
611
23.3k
  if( width == stride )
612
0
  {
613
0
    std::fill_n( buf, width * height, val );
614
0
  }
615
23.3k
  else
616
23.3k
  {
617
23.3k
    MotionInfo* dst = buf;
618
619
181k
    for( int y = 0; y < height; y++, dst += stride )
620
157k
    {
621
157k
      std::fill_n( dst, width, val );
622
157k
    }
623
23.3k
  }
624
23.3k
}
625
626
PelStorage::PelStorage()
627
93.5k
{
628
374k
  for( uint32_t i = 0; i < MAX_NUM_COMPONENT; i++ )
629
280k
  {
630
280k
    m_origin[i]        = nullptr;
631
280k
    m_allocator[i]     = nullptr;
632
280k
  }
633
93.5k
}
634
635
PelStorage::~PelStorage()
636
93.5k
{
637
93.5k
  destroy();
638
93.5k
}
639
640
void PelStorage::create( const UnitArea &_UnitArea )
641
25.2k
{
642
25.2k
  create( _UnitArea.chromaFormat, _UnitArea.blocks[0] );
643
25.2k
}
644
645
void PelStorage::create( const ChromaFormat _chromaFormat, const Size& _size, const unsigned _maxCUSize, const unsigned _margin, const unsigned _alignmentByte, const bool _scaleChromaMargin, const UserAllocator* userAlloc )
646
49.4k
{
647
49.4k
  CHECK( !bufs.empty(), "Trying to re-create an already initialized buffer" );
648
649
49.4k
  chromaFormat = _chromaFormat;
650
651
49.4k
  const uint32_t numCh = getNumberValidComponents( _chromaFormat );
652
653
49.4k
  unsigned extHeight = _size.height;
654
49.4k
  unsigned extWidth  = _size.width;
655
656
49.4k
  if( _maxCUSize )
657
24.2k
  {
658
24.2k
    extHeight = ( ( _size.height + _maxCUSize - 1 ) / _maxCUSize ) * _maxCUSize;
659
24.2k
    extWidth  = ( ( _size.width  + _maxCUSize - 1 ) / _maxCUSize ) * _maxCUSize;
660
24.2k
  }
661
662
49.4k
  const unsigned _alignment = _alignmentByte / sizeof( Pel );
663
664
197k
  for( uint32_t i = 0; i < numCh; i++ )
665
148k
  {
666
148k
    const ComponentID compID = ComponentID( i );
667
148k
    const unsigned scaleX = getComponentScaleX( compID, _chromaFormat );
668
148k
    const unsigned scaleY = getComponentScaleY( compID, _chromaFormat );
669
670
148k
    unsigned scaledHeight = extHeight >> scaleY;
671
148k
    unsigned scaledWidth  = extWidth  >> scaleX;
672
148k
    unsigned ymargin      = _margin >> (_scaleChromaMargin?scaleY:0);
673
148k
    unsigned xmargin      = _margin >> (_scaleChromaMargin?scaleX:0);
674
675
148k
#if 1
676
148k
    if( _alignment && xmargin )
677
4.29k
    {
678
4.29k
      xmargin = ( ( xmargin + _alignment - 1 ) / _alignment ) * _alignment;
679
4.29k
    }
680
681
148k
#endif
682
148k
    SizeType totalWidth   = scaledWidth + 2 * xmargin;
683
148k
    SizeType totalHeight  = scaledHeight +2 * ymargin;
684
685
148k
    if( _alignment )
686
4.29k
    {
687
      // make sure buffer lines are align
688
4.29k
      CHECK( _alignmentByte != MEMORY_ALIGN_DEF_SIZE, "Unsupported alignment" );
689
4.29k
      totalWidth = ( ( totalWidth + _alignment - 1 ) / _alignment ) * _alignment;
690
4.29k
    }
691
692
148k
#if ENABLE_SIMD_OPT_INTER
693
148k
    uint32_t area = totalWidth * totalHeight + 1; // +1 for the extra Pel overread in prefetchPad_SSE, in case reading from the very bottom right of the picture
694
#else
695
    uint32_t area = totalWidth * totalHeight;
696
#endif
697
148k
    CHECK( !area, "Trying to create a buffer with zero area" );
698
699
148k
    m_origSi[i] = Size{ totalWidth, totalHeight };
700
148k
    if( userAlloc && userAlloc->enabled )
701
0
    {
702
0
      m_origin[i] = ( Pel* ) userAlloc->create( userAlloc->opaque, (vvdecComponentType)i, sizeof(Pel)*area, MEMORY_ALIGN_DEF_SIZE, &m_allocator[i] );
703
0
      CHECK( m_origin[i] == nullptr, "external allocator callback failed (returned NULL)." );
704
0
      m_externAllocator = true;
705
0
      m_userAlloc       = userAlloc;
706
0
    }
707
148k
    else
708
148k
    {
709
148k
      m_origin[i] = ( Pel* ) xMalloc( Pel, area );
710
148k
    }
711
148k
    Pel* topLeft = m_origin[i] + totalWidth * ymargin + xmargin;
712
148k
    bufs.push_back( PelBuf( topLeft, totalWidth, _size.width >> scaleX, _size.height >> scaleY ) );
713
148k
  }
714
49.4k
}
715
716
void PelStorage::createFromBuf( PelUnitBuf buf )
717
723
{
718
723
  chromaFormat = buf.chromaFormat;
719
720
723
  const uint32_t numCh = getNumberValidComponents( chromaFormat );
721
722
723
  bufs.resize(numCh);
723
724
2.89k
  for( uint32_t i = 0; i < numCh; i++ )
725
2.16k
  {
726
2.16k
    PelBuf cPelBuf = buf.get( ComponentID( i ) );
727
2.16k
    bufs[i] = PelBuf( cPelBuf.bufAt( 0, 0 ), cPelBuf.stride, cPelBuf.width, cPelBuf.height );
728
2.16k
  }
729
723
}
730
731
void PelStorage::swap( PelStorage& other )
732
3
{
733
3
  const uint32_t numCh = getNumberValidComponents( chromaFormat );
734
735
12
  for( uint32_t i = 0; i < numCh; i++ )
736
9
  {
737
    // check this otherwise it would turn out to get very weird
738
9
    CHECK( chromaFormat                   != other.chromaFormat                  , "Incompatible formats" );
739
9
    CHECK( get( ComponentID( i ) )        != other.get( ComponentID( i ) )       , "Incompatible formats" );
740
9
    CHECK( get( ComponentID( i ) ).stride != other.get( ComponentID( i ) ).stride, "Incompatible formats" );
741
742
9
    std::swap( bufs[i].buf,    other.bufs[i].buf );
743
9
    std::swap( bufs[i].stride, other.bufs[i].stride );
744
9
    std::swap( m_origin[i],    other.m_origin[i] );
745
9
    std::swap( m_allocator[i], other.m_allocator[i] );
746
9
  }
747
3
  std::swap( m_externAllocator, other.m_externAllocator );
748
3
  std::swap( m_userAlloc,       other.m_userAlloc );
749
3
}
750
751
void PelStorage::destroy()
752
202k
{
753
202k
  chromaFormat = NUM_CHROMA_FORMAT;
754
810k
  for( uint32_t i = 0; i < MAX_NUM_COMPONENT; i++ )
755
607k
  {
756
607k
    if( m_origin[i] )
757
148k
    {
758
148k
      if ( !m_externAllocator )
759
148k
      {
760
148k
        xFree( m_origin[i] );
761
148k
      }
762
0
      else if( m_allocator[i])
763
0
      {
764
0
        CHECK( m_userAlloc->unref == nullptr, "vvdecUnrefBufferCallback not valid, cannot unref picture buffer" )
765
0
        m_userAlloc->unref( m_userAlloc->opaque, m_allocator[i] );
766
0
      }
767
148k
      m_origin[i] = nullptr;
768
148k
    }
769
607k
  }
770
202k
  bufs.clear();
771
202k
}
772
773
PelBuf PelStorage::getBuf( const ComponentID CompID )
774
0
{
775
0
  return bufs[CompID];
776
0
}
777
778
const CPelBuf PelStorage::getBuf( const ComponentID CompID ) const
779
0
{
780
0
  return bufs[CompID];
781
0
}
782
783
PelBuf PelStorage::getBuf( const CompArea &blk )
784
135k
{
785
135k
  const PelBuf& r = bufs[blk.compID()];
786
787
135k
  CHECKD( rsAddr( blk.bottomRight(), r.stride ) >= ( ( r.height - 1 ) * r.stride + r.width ), "Trying to access a buf outside of bound!" );
788
789
135k
  return PelBuf( r.buf + rsAddr( blk, r.stride ), r.stride, blk );
790
135k
}
791
792
const CPelBuf PelStorage::getBuf( const CompArea &blk ) const
793
0
{
794
0
  const PelBuf& r = bufs[blk.compID()];
795
0
  return CPelBuf( r.buf + rsAddr( blk, r.stride ), r.stride, blk );
796
0
}
797
798
PelUnitBuf PelStorage::getBuf( const UnitArea &unit )
799
0
{
800
0
  return ( chromaFormat == CHROMA_400 ) ? PelUnitBuf( chromaFormat, getBuf( unit.Y() ) ) : PelUnitBuf( chromaFormat, getBuf( unit.Y() ), getBuf( unit.Cb() ), getBuf( unit.Cr() ) );
801
0
}
802
803
const CPelUnitBuf PelStorage::getBuf( const UnitArea &unit ) const
804
0
{
805
0
  return ( chromaFormat == CHROMA_400 ) ? CPelUnitBuf( chromaFormat, getBuf( unit.Y() ) ) : CPelUnitBuf( chromaFormat, getBuf( unit.Y() ), getBuf( unit.Cb() ), getBuf( unit.Cr() ) );
806
0
}
807
808
template<>
809
void UnitBuf<Pel>::colorSpaceConvert( const UnitBuf<Pel> &other, const ClpRng& clpRng )
810
0
{
811
0
  const Pel* pOrg0 = bufs[COMPONENT_Y ].buf;
812
0
  const Pel* pOrg1 = bufs[COMPONENT_Cb].buf;
813
0
  const Pel* pOrg2 = bufs[COMPONENT_Cr].buf;
814
0
  const ptrdiff_t strideOrg = bufs[COMPONENT_Y ].stride;
815
816
0
  Pel* pDst0 = other.bufs[COMPONENT_Y ].buf;
817
0
  Pel* pDst1 = other.bufs[COMPONENT_Cb].buf;
818
0
  Pel* pDst2 = other.bufs[COMPONENT_Cr].buf;
819
0
  const ptrdiff_t strideDst = other.bufs[COMPONENT_Y ].stride;
820
821
0
  int width  = bufs[COMPONENT_Y].width;
822
0
  int height = bufs[COMPONENT_Y].height;
823
0
  int maxAbsclipBD = (1 << (clpRng.bd + 1)) - 1;
824
0
  int y0, cg, co;
825
826
0
  CHECKD( bufs[COMPONENT_Y].stride != bufs[COMPONENT_Cb].stride || bufs[COMPONENT_Y].stride != bufs[COMPONENT_Cr].stride, "unequal stride for 444 content" );
827
0
  CHECKD( other.bufs[COMPONENT_Y].stride != other.bufs[COMPONENT_Cb].stride || other.bufs[COMPONENT_Y].stride != other.bufs[COMPONENT_Cr].stride, "unequal stride for 444 content" );
828
0
  CHECKD( bufs[COMPONENT_Y].width != other.bufs[COMPONENT_Y].width || bufs[COMPONENT_Y].height != other.bufs[COMPONENT_Y].height, "unequal block size" );
829
830
0
  for( int y = 0; y < height; y++ )
831
0
  {
832
0
    for( int x = 0; x < width; x++ )
833
0
    {
834
0
      y0 = pOrg0[x];
835
0
      cg = pOrg1[x];
836
0
      co = pOrg2[x];
837
838
0
      y0 = Clip3((-maxAbsclipBD - 1), maxAbsclipBD, y0);
839
0
      cg = Clip3((-maxAbsclipBD - 1), maxAbsclipBD, cg);
840
0
      co = Clip3((-maxAbsclipBD - 1), maxAbsclipBD, co);
841
842
0
      int t = y0 - (cg >> 1);
843
0
      pDst0[x] = cg + t;
844
0
      pDst1[x] = t - (co >> 1);
845
0
      pDst2[x] = co + pDst1[x];
846
0
    }
847
848
0
    pOrg0 += strideOrg;
849
0
    pOrg1 += strideOrg;
850
0
    pOrg2 += strideOrg;
851
0
    pDst0 += strideDst;
852
0
    pDst1 += strideDst;
853
0
    pDst2 += strideDst;
854
0
  }
855
0
}
856
857
template void UnitBuf<Pel>::writeToFile( std::string filename ) const;
858
859
template<typename T>
860
void UnitBuf<T>::writeToFile( std::string filename ) const
861
0
{
862
0
  FILE* f = fopen( filename.c_str(), "w" );
863
0
  CHECK_FATAL( f == nullptr, "writeToFile() cannot open file for writing" )
864
865
0
  for( auto& b: bufs )
866
0
  {
867
0
    for( unsigned y = 0; y < b.height; y++ )
868
0
    {
869
0
      fwrite( b.bufAt( 0, y ), sizeof( T ), b.width, f );
870
0
    }
871
0
  }
872
873
0
  fclose( f );
874
0
}
875
876
}