Coverage Report

Created: 2026-05-30 06:10

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/vvdec/source/Lib/CommonLib/Buffer.cpp
Line
Count
Source
1
/* -----------------------------------------------------------------------------
2
The copyright in this software is being made available under the Clear BSD
3
License, included below. No patent rights, trademark rights and/or 
4
other Intellectual Property Rights other than the copyrights concerning 
5
the Software are granted under this license.
6
7
The Clear BSD License
8
9
Copyright (c) 2018-2026, Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. & The VVdeC Authors.
10
All rights reserved.
11
12
Redistribution and use in source and binary forms, with or without modification,
13
are permitted (subject to the limitations in the disclaimer below) provided that
14
the following conditions are met:
15
16
     * Redistributions of source code must retain the above copyright notice,
17
     this list of conditions and the following disclaimer.
18
19
     * Redistributions in binary form must reproduce the above copyright
20
     notice, this list of conditions and the following disclaimer in the
21
     documentation and/or other materials provided with the distribution.
22
23
     * Neither the name of the copyright holder nor the names of its
24
     contributors may be used to endorse or promote products derived from this
25
     software without specific prior written permission.
26
27
NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
28
THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
29
CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
31
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
32
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
33
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
34
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
35
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
36
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38
POSSIBILITY OF SUCH DAMAGE.
39
40
41
------------------------------------------------------------------------------------------- */
42
43
/** \file     Buffer.cpp
44
 *  \brief    Low-overhead class describing 2D memory layout
45
 */
46
47
#define DONT_UNDEF_SIZE_AWARE_PER_EL_OP
48
49
// unit needs to come first due to a forward declaration
50
51
#include "Unit.h"
52
#include "Buffer.h"
53
#include "InterpolationFilter.h"
54
#include "Picture.h"
55
#include "Slice.h"
56
57
#include <memory>
58
59
#if ENABLE_SIMD_OPT_BUFFER && defined( TARGET_SIMD_X86 )
60
#include "CommonDefX86.h"
61
#include <simde/x86/sse.h>
62
#endif
63
64
namespace vvdec
65
{
66
67
template< typename T >
68
void addAvgCore( const T* src1, ptrdiff_t src1Stride, const T* src2, ptrdiff_t src2Stride, T* dest, ptrdiff_t dstStride, int width, int height, int rshift, int offset, const ClpRng& clpRng )
69
0
{
70
0
#define ADD_AVG_CORE_OP( ADDR ) dest[ADDR] = ClipPel( rightShift( ( src1[ADDR] + src2[ADDR] + offset ), rshift ), clpRng )
71
0
#define ADD_AVG_CORE_INC    \
72
0
  src1 += src1Stride;       \
73
0
  src2 += src2Stride;       \
74
0
  dest +=  dstStride;       \
75
0
76
0
  SIZE_AWARE_PER_EL_OP( ADD_AVG_CORE_OP, ADD_AVG_CORE_INC );
77
78
0
#undef ADD_AVG_CORE_OP
79
0
#undef ADD_AVG_CORE_INC
80
0
}
81
82
template<typename T>
83
void reconstructCore( const T* src1, ptrdiff_t src1Stride, const T* src2, ptrdiff_t src2Stride, T* dest, ptrdiff_t dstStride, int width, int height, const ClpRng& clpRng )
84
0
{
85
0
#define RECO_CORE_OP( ADDR ) dest[ADDR] = ClipPel( src1[ADDR] + src2[ADDR], clpRng )
86
0
#define RECO_CORE_INC     \
87
0
  src1 += src1Stride;     \
88
0
  src2 += src2Stride;     \
89
0
  dest +=  dstStride;     \
90
0
91
0
  SIZE_AWARE_PER_EL_OP( RECO_CORE_OP, RECO_CORE_INC );
92
93
0
#undef RECO_CORE_OP
94
0
#undef RECO_CORE_INC
95
0
}
96
97
98
template<typename T>
99
void linTfCore( const T* src, ptrdiff_t srcStride, Pel *dst, ptrdiff_t dstStride, int width, int height, int scale, int shift, int offset, const ClpRng& clpRng, bool bClip )
100
0
{
101
0
#define LINTF_CORE_OP( ADDR ) dst[ADDR] = ( Pel ) bClip ? ClipPel( rightShift( scale * src[ADDR], shift ) + offset, clpRng ) : ( rightShift( scale * src[ADDR], shift ) + offset )
102
0
#define LINTF_CORE_INC  \
103
0
  src += srcStride;     \
104
0
  dst += dstStride;     \
105
0
106
0
  SIZE_AWARE_PER_EL_OP( LINTF_CORE_OP, LINTF_CORE_INC );
107
108
0
#undef LINTF_CORE_OP
109
0
#undef LINTF_CORE_INC
110
0
}
111
112
template<typename T>
113
void transpose4x4Core( const Pel* src, ptrdiff_t srcStride, Pel* dst, ptrdiff_t dstStride )
114
0
{
115
0
  for( int i = 0; i < 4; i++ )
116
0
  {
117
0
    for( int j = 0; j < 4; j++ )
118
0
    {
119
0
      dst[j * dstStride] = src[j];
120
0
    }
121
122
0
    dst++;
123
0
    src += srcStride;
124
0
  }
125
0
}
126
127
template<typename T>
128
void transpose8x8Core( const Pel* src, ptrdiff_t srcStride, Pel* dst, ptrdiff_t dstStride )
129
0
{
130
0
  for( int i = 0; i < 8; i++ )
131
0
  {
132
0
    for( int j = 0; j < 8; j++ )
133
0
    {
134
0
      dst[j * dstStride] = src[j];
135
0
    }
136
137
0
    dst++;
138
0
    src += srcStride;
139
0
  }
140
0
}
141
142
template<typename T>
143
void copyClipCore( const T* src, ptrdiff_t srcStride, Pel *dst, ptrdiff_t dstStride, int width, int height, const ClpRng& clpRng )
144
{
145
#define RECO_OP( ADDR ) dst[ADDR] = ClipPel( src[ADDR], clpRng )
146
#define RECO_INC      \
147
    src += srcStride; \
148
    dst += dstStride; \
149
150
  SIZE_AWARE_PER_EL_OP( RECO_OP, RECO_INC );
151
152
#undef RECO_OP
153
#undef RECO_INC
154
}
155
156
template<typename T>
157
void addWeightedAvgCore( const T* src1, ptrdiff_t src1Stride, const T* src2, ptrdiff_t src2Stride, T* dest, ptrdiff_t destStride, int width, int height, int rshift, int offset, int w0, int w1, const ClpRng& clpRng )
158
0
{
159
0
#define ADD_WGHT_AVG_OP( ADDR ) dest[ADDR] = ClipPel( rightShift( ( src1[ADDR]*w0 + src2[ADDR]*w1 + offset ), rshift ), clpRng )
160
0
#define ADD_WGHT_AVG_INC     \
161
0
    src1 += src1Stride; \
162
0
    src2 += src2Stride; \
163
0
    dest += destStride; \
164
0
165
0
  SIZE_AWARE_PER_EL_OP( ADD_WGHT_AVG_OP, ADD_WGHT_AVG_INC );
166
167
0
#undef ADD_WGHT_AVG_OP
168
0
#undef ADD_WGHT_AVG_INC
169
0
}
170
171
void copyBufferCore( const char *src, ptrdiff_t srcStride, char *dst, ptrdiff_t dstStride, int width, int height )
172
0
{
173
#if ENABLE_SIMD_OPT_BUFFER && defined( TARGET_SIMD_X86 )
174
  _mm_prefetch( (const char *) ( src ),             _MM_HINT_T0 );
175
  _mm_prefetch( (const char *) ( src + srcStride ), _MM_HINT_T0 );
176
  _mm_prefetch( (const char *) ( dst ),             _MM_HINT_T0 );
177
  _mm_prefetch( (const char *) ( dst + dstStride ), _MM_HINT_T0 );
178
179
#endif
180
0
  if( width == srcStride && width == dstStride )
181
0
  {
182
0
    memcpy( dst, src, width * height );
183
0
    return;
184
0
  }
185
186
0
  for( int i = 0; i < height; i++ )
187
0
  {
188
#if ENABLE_SIMD_OPT_BUFFER && defined( TARGET_SIMD_X86 )
189
    _mm_prefetch( (const char *) ( src + srcStride ), _MM_HINT_T0 );
190
    _mm_prefetch( (const char *) ( dst + dstStride ), _MM_HINT_T0 );
191
192
#endif
193
0
    memcpy( dst, src, width );
194
195
0
    src += srcStride;
196
0
    dst += dstStride;
197
0
  }
198
0
}
199
200
void applyLutCore( Pel* ptr, ptrdiff_t ptrStride, int width, int height, const Pel* lut )
201
0
{
202
  //    const auto rsp_sgnl_op  = [=, &dst]( int ADDR ){ dst[ADDR] = lut[dst[ADDR]]; };
203
  //    const auto rsp_sgnl_inc = [=, &dst]            { dst += stride;              };
204
205
  //    size_aware_pel_op( rsp_sgnl_op, rsp_sgnl_inc, width, height );
206
207
0
    #define RSP_SGNL_OP( ADDR ) ptr[ADDR] = lut[ptr[ADDR]]
208
0
    #define RSP_SGNL_INC        ptr      += ptrStride;
209
210
0
      SIZE_AWARE_PER_EL_OP( RSP_SGNL_OP, RSP_SGNL_INC )
211
212
0
    #undef RSP_SGNL_OP
213
0
    #undef RSP_SGNL_INC
214
    
215
0
    return;
216
0
}
217
218
void fillN_CuCore( CodingUnit** ptr, ptrdiff_t ptrStride, int width, int height, CodingUnit* cuPtr )
219
0
{
220
0
  if( width == ptrStride )
221
0
  {
222
0
    std::fill_n( ptr, width * height, cuPtr );
223
0
  }
224
0
  else
225
0
  {
226
0
    CodingUnit** dst = ptr;
227
228
0
    for( int y = 0; y < height; y++, dst += ptrStride )
229
0
    {
230
0
      std::fill_n( dst, width, cuPtr );
231
0
    }
232
0
  }
233
0
}
234
235
void sampleRateConvCore( const std::pair<int, int> scalingRatio, const std::pair<int, int> compScale,
236
                         const Pel* orgSrc, const ptrdiff_t orgStride, const int orgWidth, const int orgHeight,
237
                         const int beforeScaleLeftOffset, const int beforeScaleTopOffset,
238
                         Pel* scaledSrc, const ptrdiff_t scaledStride, const int scaledWidth, const int scaledHeight,
239
                         const int afterScaleLeftOffset, const int afterScaleTopOffset,
240
                         const int bitDepth, const bool useLumaFilter,
241
                         const bool horCollocatedPositionFlag, const bool verCollocatedPositionFlag )
242
0
{
243
0
  if( orgWidth == scaledWidth && orgHeight == scaledHeight && scalingRatio == SCALE_1X && !beforeScaleLeftOffset && !beforeScaleTopOffset && !afterScaleLeftOffset && !afterScaleTopOffset )
244
0
  {
245
0
    g_pelBufOP.copyBuffer( ( const char * ) orgSrc, orgStride * sizeof( Pel ), ( char* ) scaledSrc, scaledStride * sizeof( Pel ), orgWidth * sizeof( Pel ), orgHeight );
246
247
0
    return;
248
0
  }
249
250
0
  const TFilterCoeff* filterHor = useLumaFilter ? &InterpolationFilter::m_lumaFilter[0][0] : &InterpolationFilter::m_chromaFilter[0][0];
251
0
  const TFilterCoeff* filterVer = useLumaFilter ? &InterpolationFilter::m_lumaFilter[0][0] : &InterpolationFilter::m_chromaFilter[0][0];
252
253
0
  const int numFracPositions  = useLumaFilter ? 15 : 31;
254
0
  const int numFracShift      = useLumaFilter ? 4 : 5;
255
0
  const int posShiftX         = SCALE_RATIO_BITS - numFracShift + compScale.first;
256
0
  const int posShiftY         = SCALE_RATIO_BITS - numFracShift + compScale.second;
257
0
  int addX  = (1 << (posShiftX - 1)) + (beforeScaleLeftOffset << SCALE_RATIO_BITS) + ((int( 1 - horCollocatedPositionFlag ) * 8 * (scalingRatio.first - SCALE_1X.first) + (1 << (2 + compScale.first))) >> (3 + compScale.first));
258
0
  int addY  = (1 << (posShiftY - 1)) + (beforeScaleTopOffset << SCALE_RATIO_BITS) + ((int( 1 - verCollocatedPositionFlag ) * 8 * (scalingRatio.second - SCALE_1X.second) + (1 << (2 + compScale.second))) >> (3 + compScale.second));
259
260
0
  const int filterLength = useLumaFilter ? NTAPS_LUMA : NTAPS_CHROMA;
261
0
  const int log2Norm     = 12;
262
263
0
  CHECK( bitDepth > 17, "Overflow may happen!" );
264
265
0
  const int maxVal = (1 << bitDepth) - 1;
266
0
  std::unique_ptr<int[]> buf( new int[orgHeight * scaledWidth] );
267
268
0
  for( int i = 0; i < scaledWidth; i++ )
269
0
  {
270
0
    const Pel* org = orgSrc;
271
0
    int refPos = (((i << compScale.first) - afterScaleLeftOffset) * scalingRatio.first + addX) >> posShiftX;
272
0
    int integer = refPos >> numFracShift;
273
0
    int frac = refPos & numFracPositions;
274
0
    int* tmp = buf.get() + i;
275
276
0
    for( int j = 0; j < orgHeight; j++ )
277
0
    {
278
0
      int sum = 0;
279
0
      const TFilterCoeff* f = filterHor + frac * filterLength;
280
281
0
      for( int k = 0; k < filterLength; k++ )
282
0
      {
283
0
        int xInt = std::min<int>( std::max( 0, integer + k - filterLength / 2 + 1 ), orgWidth - 1 );
284
0
        sum += f[k] * org[xInt]; // postpone horizontal filtering gain removal after vertical filtering
285
0
      }
286
287
0
      *tmp = sum;
288
289
0
      tmp += scaledWidth;
290
0
      org += orgStride;
291
0
    }
292
0
  }
293
294
0
  Pel* dst = scaledSrc;
295
296
0
  for( int j = 0; j < scaledHeight; j++ )
297
0
  {
298
0
    int refPos = (((j << compScale.second) - afterScaleTopOffset) * scalingRatio.second + addY) >> posShiftY;
299
0
    int integer = refPos >> numFracShift;
300
0
    int frac = refPos & numFracPositions;
301
302
0
    for( int i = 0; i < scaledWidth; i++ )
303
0
    {
304
0
      int sum = 0;
305
0
      int* tmp = buf.get() + i;
306
0
      const TFilterCoeff* f = filterVer + frac * filterLength;
307
308
0
      for( int k = 0; k < filterLength; k++ )
309
0
      {
310
0
        int yInt = std::min<int>( std::max( 0, integer + k - filterLength / 2 + 1 ), orgHeight - 1 );
311
0
        sum += f[k] * tmp[yInt * scaledWidth];
312
0
      }
313
314
0
      dst[i] = std::min<int>( std::max( 0, (sum + (1 << (log2Norm - 1))) >> log2Norm ), maxVal );
315
0
    }
316
317
0
    dst += scaledStride;
318
0
  }
319
0
}
320
321
void rspFwdCore( Pel* ptr, ptrdiff_t ptrStride, int width, int height, const int bd, const Pel OrgCW, const Pel* LmcsPivot, const Pel* ScaleCoeff, const Pel* InputPivot )
322
0
{
323
0
  int idxY;
324
0
  int shift = getLog2( OrgCW );
325
326
  //    const auto rsp_sgnl_op  = [=, &dst]( int ADDR ){ idxY = ( dst[ADDR] >> shift ); dst[ADDR] = static_cast<Pel>( ClipBD<int>( LmcsPivot[idxY] + ( ( ScaleCoeff[idxY] * ( dst[ADDR] - InputPivot[idxY] ) + ( 1 << 10 ) ) >> 11 ), bd ) ); };
327
  //    const auto rsp_sgnl_inc = [=, &dst]            { dst += stride; };
328
329
  //    size_aware_pel_op( rsp_sgnl_op, rsp_sgnl_inc, width, height );
330
331
0
#define RSP_FWD_OP( ADDR ) { idxY = ( ptr[ADDR] >> shift ); ptr[ADDR] = static_cast<Pel>( ClipBD<int>( LmcsPivot[idxY] + ( ( ScaleCoeff[idxY] * ( ptr[ADDR] - InputPivot[idxY] ) + ( 1 << 10 ) ) >> 11 ), bd ) ); }
332
0
#define RSP_FWD_INC        ptr      += ptrStride;
333
334
0
  SIZE_AWARE_PER_EL_OP( RSP_FWD_OP, RSP_FWD_INC )
335
336
0
#undef RSP_FWD_OP
337
0
#undef RSP_FWD_INC
338
0
}
339
340
PelBufferOps::PelBufferOps()
341
12
{
342
12
  addAvg4  = addAvgCore<Pel>;
343
12
  addAvg8  = addAvgCore<Pel>;
344
12
  addAvg16 = addAvgCore<Pel>;
345
346
12
  reco4 = reconstructCore<Pel>;
347
12
  reco8 = reconstructCore<Pel>;
348
349
12
  linTf4 = linTfCore<Pel>;
350
12
  linTf8 = linTfCore<Pel>;
351
352
12
  wghtAvg4 = addWeightedAvgCore<Pel>;
353
12
  wghtAvg8 = addWeightedAvgCore<Pel>;
354
355
12
  copyBuffer = copyBufferCore;
356
357
12
  transpose4x4 = transpose4x4Core<Pel>;
358
12
  transpose8x8 = transpose8x8Core<Pel>;
359
360
12
  applyLut = applyLutCore;
361
12
  rspFwd   = rspFwdCore;
362
12
  rspBcw   = nullptr;
363
364
12
  fillN_CU = fillN_CuCore;
365
366
12
  sampleRateConv = sampleRateConvCore;
367
12
}
368
369
PelBufferOps g_pelBufOP = PelBufferOps();
370
371
template<>
372
void AreaBuf<Pel>::addWeightedAvg(const AreaBuf<const Pel> &other1, const AreaBuf<const Pel> &other2, const ClpRng& clpRng, const int8_t bcwIdx)
373
0
{
374
0
  const int8_t w0 = getBcwWeight(bcwIdx, REF_PIC_LIST_0);
375
0
  const int8_t w1 = getBcwWeight(bcwIdx, REF_PIC_LIST_1);
376
0
  const int8_t log2WeightBase = g_BcwLog2WeightBase;
377
0
  const Pel* src0 = other1.buf;
378
0
  const Pel* src2 = other2.buf;
379
0
  Pel* dest = buf;
380
381
0
  const ptrdiff_t src1Stride = other1.stride;
382
0
  const ptrdiff_t src2Stride = other2.stride;
383
0
  const ptrdiff_t destStride = stride;
384
0
  const int clipbd    = clpRng.bd;
385
0
  const int shiftNum  = std::max<int>( 2, ( IF_INTERNAL_PREC - clipbd ) ) + log2WeightBase;
386
0
  const int offset    = ( 1 << ( shiftNum - 1 ) ) + ( IF_INTERNAL_OFFS << log2WeightBase );
387
388
0
  if( ( width & 7 ) == 0 )
389
0
  {
390
0
    g_pelBufOP.wghtAvg8( src0, src1Stride, src2, src2Stride, dest, destStride, width, height, shiftNum, offset, w0, w1, clpRng );
391
0
  }
392
0
  else if( ( width & 3 ) == 0 )
393
0
  {
394
0
    g_pelBufOP.wghtAvg4( src0, src1Stride, src2, src2Stride, dest, destStride, width, height, shiftNum, offset, w0, w1, clpRng );
395
0
  }
396
0
  else
397
0
  {
398
0
#define ADD_AVG_OP( ADDR ) dest[ADDR] = ClipPel( rightShift( ( src0[ADDR]*w0 + src2[ADDR]*w1 + offset ), shiftNum ), clpRng )
399
0
#define ADD_AVG_INC     \
400
0
    src0 += src1Stride; \
401
0
    src2 += src2Stride; \
402
0
    dest += destStride; \
403
0
404
0
    SIZE_AWARE_PER_EL_OP( ADD_AVG_OP, ADD_AVG_INC );
405
406
0
#undef ADD_AVG_OP
407
0
#undef ADD_AVG_INC
408
0
  }
409
0
}
410
411
template<>
412
void AreaBuf<Pel>::scaleSignal(const int scale, const ClpRng& clpRng)
413
0
{
414
0
  Pel* dst = buf;
415
0
  Pel* src = buf;
416
0
  int sign, absval;
417
0
  int maxAbsclipBD = ( 1 << clpRng.bd ) - 1;
418
419
0
  for (unsigned y = 0; y < height; y++)
420
0
  {
421
0
    for (unsigned x = 0; x < width; x++)
422
0
    {
423
0
      src[x] = Clip3<Pel>( -maxAbsclipBD - 1, maxAbsclipBD, src[x] );
424
0
      sign   = src[x] >= 0 ? 1 : -1;
425
0
      absval = sign * src[x];
426
427
0
      int val = sign * ((absval * scale + (1 << (CSCALE_FP_PREC - 1))) >> CSCALE_FP_PREC);
428
429
0
      if( sizeof( Pel ) == 2 ) // avoid overflow when storing data
430
0
      {
431
0
          val = Clip3<int>(-32768, 32767, val);
432
0
      }
433
0
      dst[x] = (Pel)val;
434
0
    }
435
0
    dst += stride;
436
0
    src += stride;
437
0
  }
438
0
}
439
440
template<>
441
void AreaBuf<Pel>::addAvg( const AreaBuf<const Pel> &other1, const AreaBuf<const Pel> &other2, const ClpRng& clpRng)
442
0
{
443
0
  const Pel* src0 = other1.buf;
444
0
  const Pel* src2 = other2.buf;
445
0
        Pel* dest =        buf;
446
447
0
  const ptrdiff_t src1Stride = other1.stride;
448
0
  const ptrdiff_t src2Stride = other2.stride;
449
0
  const ptrdiff_t destStride =        stride;
450
0
  const int       clipbd     = clpRng.bd;
451
0
  const int       shiftNum   = std::max<int>(2, (IF_INTERNAL_PREC - clipbd)) + 1;
452
0
  const int       offset     = (1 << (shiftNum - 1)) + 2 * IF_INTERNAL_OFFS;
453
454
0
  if( ( width & 15 ) == 0 )
455
0
  {
456
0
    g_pelBufOP.addAvg16( src0, src1Stride, src2, src2Stride, dest, destStride, width, height, shiftNum, offset, clpRng );
457
0
  }
458
0
  else if( ( width & 7 ) == 0 )
459
0
  {
460
0
    g_pelBufOP.addAvg8( src0, src1Stride, src2, src2Stride, dest, destStride, width, height, shiftNum, offset, clpRng );
461
0
  }
462
0
  else if( ( width & 3 ) == 0 )
463
0
  {
464
0
    g_pelBufOP.addAvg4( src0, src1Stride, src2, src2Stride, dest, destStride, width, height, shiftNum, offset, clpRng );
465
0
  }
466
0
  else
467
0
  {
468
0
#define ADD_AVG_OP( ADDR ) dest[ADDR] = ClipPel( rightShift( ( src0[ADDR] + src2[ADDR] + offset ), shiftNum ), clpRng )
469
0
#define ADD_AVG_INC     \
470
0
    src0 += src1Stride; \
471
0
    src2 += src2Stride; \
472
0
    dest += destStride; \
473
0
474
0
    SIZE_AWARE_PER_EL_OP( ADD_AVG_OP, ADD_AVG_INC );
475
476
0
#undef ADD_AVG_OP
477
0
#undef ADD_AVG_INC
478
0
  }
479
0
}
480
481
template<>
482
void AreaBuf<Pel>::reconstruct( const AreaBuf<const Pel> &pred, const AreaBuf<const Pel> &resi, const ClpRng& clpRng )
483
0
{
484
0
  const Pel* src1 = pred.buf;
485
0
  const Pel* src2 = resi.buf;
486
0
        Pel* dest =      buf;
487
488
0
  const ptrdiff_t src1Stride = pred.stride;
489
0
  const ptrdiff_t src2Stride = resi.stride;
490
0
  const ptrdiff_t destStride =      stride;
491
492
0
  if( ( width & 7 ) == 0 )
493
0
  {
494
0
    g_pelBufOP.reco8( src1, src1Stride, src2, src2Stride, dest, destStride, width, height, clpRng );
495
0
  }
496
0
  else if( ( width & 3 ) == 0 )
497
0
  {
498
0
    g_pelBufOP.reco4( src1, src1Stride, src2, src2Stride, dest, destStride, width, height, clpRng );
499
0
  }
500
0
  else
501
0
  {
502
0
#define RECO_OP( ADDR ) dest[ADDR] = ClipPel( src1[ADDR] + src2[ADDR], clpRng )
503
0
#define RECO_INC        \
504
0
    src1 += src1Stride; \
505
0
    src2 += src2Stride; \
506
0
    dest += destStride; \
507
0
508
0
    SIZE_AWARE_PER_EL_OP( RECO_OP, RECO_INC );
509
510
0
#undef RECO_OP
511
0
#undef RECO_INC
512
0
  }
513
0
}
514
515
template<>
516
void AreaBuf<Pel>::linearTransform( const int scale, const int shift, const int offset, bool bClip, const ClpRng& clpRng )
517
0
{
518
0
  const Pel* src = buf;
519
0
        Pel* dst = buf;
520
521
0
  if( width == 1 )
522
0
  {
523
0
    THROW_FATAL( "Blocks of width = 1 not supported" );
524
0
  }
525
0
  else if( ( width & 7 ) == 0 )
526
0
  {
527
0
    g_pelBufOP.linTf8( src, stride, dst, stride, width, height, scale, shift, offset, clpRng, bClip );
528
0
  }
529
0
  else if( ( width & 3 ) == 0 )
530
0
  {
531
0
    g_pelBufOP.linTf4( src, stride, dst, stride, width, height, scale, shift, offset, clpRng, bClip );
532
0
  }
533
0
  else
534
0
  {
535
0
#define LINTF_OP( ADDR ) dst[ADDR] = ( Pel ) bClip ? ClipPel( rightShift( scale * src[ADDR], shift ) + offset, clpRng ) : ( rightShift( scale * src[ADDR], shift ) + offset )
536
0
#define LINTF_INC        \
537
0
    src += stride;       \
538
0
    dst += stride;       \
539
0
540
0
    SIZE_AWARE_PER_EL_OP( LINTF_OP, LINTF_INC );
541
542
0
#undef RECO_OP
543
0
#undef RECO_INC
544
0
  }
545
0
}
546
547
#if ENABLE_SIMD_OPT_BUFFER && defined(TARGET_SIMD_X86)
548
template<>
549
void AreaBuf<Pel>::transposedFrom( const AreaBuf<const Pel> &other )
550
{
551
  CHECK( width != other.height || height != other.width, "Incompatible size" );
552
553
  if( ( ( width | height ) & 7 ) == 0 )
554
  {
555
    const Pel* src = other.buf;
556
557
    for( unsigned y = 0; y < other.height; y += 8 )
558
    {
559
      Pel* dst = buf + y;
560
561
      for( unsigned x = 0; x < other.width; x += 8 )
562
      {
563
        g_pelBufOP.transpose8x8( &src[x], other.stride, dst, stride );
564
565
        dst += 8 * stride;
566
      }
567
568
      src += 8 * other.stride;
569
    }
570
  }
571
  else if( ( ( width | height ) & 3 ) == 0 )
572
  {
573
    const Pel* src = other.buf;
574
575
    for( unsigned y = 0; y < other.height; y += 4 )
576
    {
577
      Pel* dst = buf + y;
578
579
      for( unsigned x = 0; x < other.width; x += 4 )
580
      {
581
        g_pelBufOP.transpose4x4( &src[x], other.stride, dst, stride );
582
583
        dst += 4 * stride;
584
      }
585
586
      src += 4 * other.stride;
587
    }
588
  }
589
  else
590
  {
591
          Pel* dst =       buf;
592
    const Pel* src = other.buf;
593
    width          = other.height;
594
    height         = other.width;
595
    stride         = stride < width ? width : stride;
596
597
    for( unsigned y = 0; y < other.height; y++ )
598
    {
599
      for( unsigned x = 0; x < other.width; x++ )
600
      {
601
        dst[y + x*stride] = src[x + y * other.stride];
602
      }
603
    }
604
  }
605
}
606
#endif
607
608
template<>
609
void AreaBuf<MotionInfo>::fill( const MotionInfo& val )
610
0
{
611
0
  if( width == stride )
612
0
  {
613
0
    std::fill_n( buf, width * height, val );
614
0
  }
615
0
  else
616
0
  {
617
0
    MotionInfo* dst = buf;
618
619
0
    for( int y = 0; y < height; y++, dst += stride )
620
0
    {
621
0
      std::fill_n( dst, width, val );
622
0
    }
623
0
  }
624
0
}
625
626
PelStorage::PelStorage()
627
0
{
628
0
  for( uint32_t i = 0; i < MAX_NUM_COMPONENT; i++ )
629
0
  {
630
0
    m_origin[i]        = nullptr;
631
0
    m_allocator[i]     = nullptr;
632
0
  }
633
0
}
634
635
PelStorage::~PelStorage()
636
0
{
637
0
  destroy();
638
0
}
639
640
void PelStorage::create( const UnitArea &_UnitArea )
641
0
{
642
0
  create( _UnitArea.chromaFormat, _UnitArea.blocks[0] );
643
0
}
644
645
void PelStorage::create( const ChromaFormat _chromaFormat, const Size& _size, const unsigned _maxCUSize, const unsigned _margin, const unsigned _alignmentByte, const bool _scaleChromaMargin, const UserAllocator* userAlloc )
646
0
{
647
0
  CHECK( !bufs.empty(), "Trying to re-create an already initialized buffer" );
648
649
0
  chromaFormat = _chromaFormat;
650
651
0
  const uint32_t numCh = getNumberValidComponents( _chromaFormat );
652
653
0
  unsigned extHeight = _size.height;
654
0
  unsigned extWidth  = _size.width;
655
656
0
  if( _maxCUSize )
657
0
  {
658
0
    extHeight = ( ( _size.height + _maxCUSize - 1 ) / _maxCUSize ) * _maxCUSize;
659
0
    extWidth  = ( ( _size.width  + _maxCUSize - 1 ) / _maxCUSize ) * _maxCUSize;
660
0
  }
661
662
0
  const unsigned _alignment = _alignmentByte / sizeof( Pel );
663
664
0
  for( uint32_t i = 0; i < numCh; i++ )
665
0
  {
666
0
    const ComponentID compID = ComponentID( i );
667
0
    const unsigned scaleX = getComponentScaleX( compID, _chromaFormat );
668
0
    const unsigned scaleY = getComponentScaleY( compID, _chromaFormat );
669
670
0
    unsigned scaledHeight = extHeight >> scaleY;
671
0
    unsigned scaledWidth  = extWidth  >> scaleX;
672
0
    unsigned ymargin      = _margin >> (_scaleChromaMargin?scaleY:0);
673
0
    unsigned xmargin      = _margin >> (_scaleChromaMargin?scaleX:0);
674
675
0
#if 1
676
0
    if( _alignment && xmargin )
677
0
    {
678
0
      xmargin = ( ( xmargin + _alignment - 1 ) / _alignment ) * _alignment;
679
0
    }
680
681
0
#endif
682
0
    SizeType totalWidth   = scaledWidth + 2 * xmargin;
683
0
    SizeType totalHeight  = scaledHeight +2 * ymargin;
684
685
0
    if( _alignment )
686
0
    {
687
      // make sure buffer lines are align
688
0
      CHECK( _alignmentByte != MEMORY_ALIGN_DEF_SIZE, "Unsupported alignment" );
689
0
      totalWidth = ( ( totalWidth + _alignment - 1 ) / _alignment ) * _alignment;
690
0
    }
691
692
0
#if ENABLE_SIMD_OPT_INTER
693
0
    uint32_t area = totalWidth * totalHeight + 1; // +1 for the extra Pel overread in prefetchPad_SSE, in case reading from the very bottom right of the picture
694
#else
695
    uint32_t area = totalWidth * totalHeight;
696
#endif
697
0
    CHECK( !area, "Trying to create a buffer with zero area" );
698
699
0
    m_origSi[i] = Size{ totalWidth, totalHeight };
700
0
    if( userAlloc && userAlloc->enabled )
701
0
    {
702
0
      m_origin[i] = ( Pel* ) userAlloc->create( userAlloc->opaque, (vvdecComponentType)i, sizeof(Pel)*area, MEMORY_ALIGN_DEF_SIZE, &m_allocator[i] );
703
0
      CHECK( m_origin[i] == nullptr, "external allocator callback failed (returned NULL)." );
704
0
      m_externAllocator = true;
705
0
      m_userAlloc       = userAlloc;
706
0
    }
707
0
    else
708
0
    {
709
0
      m_origin[i] = ( Pel* ) xMalloc( Pel, area );
710
0
    }
711
0
    Pel* topLeft = m_origin[i] + totalWidth * ymargin + xmargin;
712
0
    bufs.push_back( PelBuf( topLeft, totalWidth, _size.width >> scaleX, _size.height >> scaleY ) );
713
0
  }
714
0
}
715
716
void PelStorage::createFromBuf( PelUnitBuf buf )
717
0
{
718
0
  chromaFormat = buf.chromaFormat;
719
720
0
  const uint32_t numCh = getNumberValidComponents( chromaFormat );
721
722
0
  bufs.resize(numCh);
723
724
0
  for( uint32_t i = 0; i < numCh; i++ )
725
0
  {
726
0
    PelBuf cPelBuf = buf.get( ComponentID( i ) );
727
0
    bufs[i] = PelBuf( cPelBuf.bufAt( 0, 0 ), cPelBuf.stride, cPelBuf.width, cPelBuf.height );
728
0
  }
729
0
}
730
731
void PelStorage::swap( PelStorage& other )
732
0
{
733
0
  const uint32_t numCh = getNumberValidComponents( chromaFormat );
734
735
0
  for( uint32_t i = 0; i < numCh; i++ )
736
0
  {
737
    // check this otherwise it would turn out to get very weird
738
0
    CHECK( chromaFormat                   != other.chromaFormat                  , "Incompatible formats" );
739
0
    CHECK( get( ComponentID( i ) )        != other.get( ComponentID( i ) )       , "Incompatible formats" );
740
0
    CHECK( get( ComponentID( i ) ).stride != other.get( ComponentID( i ) ).stride, "Incompatible formats" );
741
742
0
    std::swap( bufs[i].buf,    other.bufs[i].buf );
743
0
    std::swap( bufs[i].stride, other.bufs[i].stride );
744
0
    std::swap( m_origin[i],    other.m_origin[i] );
745
0
    std::swap( m_allocator[i], other.m_allocator[i] );
746
0
  }
747
0
  std::swap( m_externAllocator, other.m_externAllocator );
748
0
  std::swap( m_userAlloc,       other.m_userAlloc );
749
0
}
750
751
void PelStorage::destroy()
752
0
{
753
0
  chromaFormat = NUM_CHROMA_FORMAT;
754
0
  for( uint32_t i = 0; i < MAX_NUM_COMPONENT; i++ )
755
0
  {
756
0
    if( m_origin[i] )
757
0
    {
758
0
      if ( !m_externAllocator )
759
0
      {
760
0
        xFree( m_origin[i] );
761
0
      }
762
0
      else if( m_allocator[i])
763
0
      {
764
0
        CHECK( m_userAlloc->unref == nullptr, "vvdecUnrefBufferCallback not valid, cannot unref picture buffer" )
765
0
        m_userAlloc->unref( m_userAlloc->opaque, m_allocator[i] );
766
0
      }
767
0
      m_origin[i] = nullptr;
768
0
    }
769
0
  }
770
0
  bufs.clear();
771
0
}
772
773
PelBuf PelStorage::getBuf( const ComponentID CompID )
774
0
{
775
0
  return bufs[CompID];
776
0
}
777
778
const CPelBuf PelStorage::getBuf( const ComponentID CompID ) const
779
0
{
780
0
  return bufs[CompID];
781
0
}
782
783
PelBuf PelStorage::getBuf( const CompArea &blk )
784
0
{
785
0
  const PelBuf& r = bufs[blk.compID()];
786
787
0
  CHECKD( rsAddr( blk.bottomRight(), r.stride ) >= ( ( r.height - 1 ) * r.stride + r.width ), "Trying to access a buf outside of bound!" );
788
789
0
  return PelBuf( r.buf + rsAddr( blk, r.stride ), r.stride, blk );
790
0
}
791
792
const CPelBuf PelStorage::getBuf( const CompArea &blk ) const
793
0
{
794
0
  const PelBuf& r = bufs[blk.compID()];
795
0
  return CPelBuf( r.buf + rsAddr( blk, r.stride ), r.stride, blk );
796
0
}
797
798
PelUnitBuf PelStorage::getBuf( const UnitArea &unit )
799
0
{
800
0
  return ( chromaFormat == CHROMA_400 ) ? PelUnitBuf( chromaFormat, getBuf( unit.Y() ) ) : PelUnitBuf( chromaFormat, getBuf( unit.Y() ), getBuf( unit.Cb() ), getBuf( unit.Cr() ) );
801
0
}
802
803
const CPelUnitBuf PelStorage::getBuf( const UnitArea &unit ) const
804
0
{
805
0
  return ( chromaFormat == CHROMA_400 ) ? CPelUnitBuf( chromaFormat, getBuf( unit.Y() ) ) : CPelUnitBuf( chromaFormat, getBuf( unit.Y() ), getBuf( unit.Cb() ), getBuf( unit.Cr() ) );
806
0
}
807
808
template<>
809
void UnitBuf<Pel>::colorSpaceConvert( const UnitBuf<Pel> &other, const ClpRng& clpRng )
810
0
{
811
0
  const Pel* pOrg0 = bufs[COMPONENT_Y ].buf;
812
0
  const Pel* pOrg1 = bufs[COMPONENT_Cb].buf;
813
0
  const Pel* pOrg2 = bufs[COMPONENT_Cr].buf;
814
0
  const ptrdiff_t strideOrg = bufs[COMPONENT_Y ].stride;
815
816
0
  Pel* pDst0 = other.bufs[COMPONENT_Y ].buf;
817
0
  Pel* pDst1 = other.bufs[COMPONENT_Cb].buf;
818
0
  Pel* pDst2 = other.bufs[COMPONENT_Cr].buf;
819
0
  const ptrdiff_t strideDst = other.bufs[COMPONENT_Y ].stride;
820
821
0
  int width  = bufs[COMPONENT_Y].width;
822
0
  int height = bufs[COMPONENT_Y].height;
823
0
  int maxAbsclipBD = (1 << (clpRng.bd + 1)) - 1;
824
0
  int y0, cg, co;
825
826
0
  CHECKD( bufs[COMPONENT_Y].stride != bufs[COMPONENT_Cb].stride || bufs[COMPONENT_Y].stride != bufs[COMPONENT_Cr].stride, "unequal stride for 444 content" );
827
0
  CHECKD( other.bufs[COMPONENT_Y].stride != other.bufs[COMPONENT_Cb].stride || other.bufs[COMPONENT_Y].stride != other.bufs[COMPONENT_Cr].stride, "unequal stride for 444 content" );
828
0
  CHECKD( bufs[COMPONENT_Y].width != other.bufs[COMPONENT_Y].width || bufs[COMPONENT_Y].height != other.bufs[COMPONENT_Y].height, "unequal block size" );
829
830
0
  for( int y = 0; y < height; y++ )
831
0
  {
832
0
    for( int x = 0; x < width; x++ )
833
0
    {
834
0
      y0 = pOrg0[x];
835
0
      cg = pOrg1[x];
836
0
      co = pOrg2[x];
837
838
0
      y0 = Clip3((-maxAbsclipBD - 1), maxAbsclipBD, y0);
839
0
      cg = Clip3((-maxAbsclipBD - 1), maxAbsclipBD, cg);
840
0
      co = Clip3((-maxAbsclipBD - 1), maxAbsclipBD, co);
841
842
0
      int t = y0 - (cg >> 1);
843
0
      pDst0[x] = cg + t;
844
0
      pDst1[x] = t - (co >> 1);
845
0
      pDst2[x] = co + pDst1[x];
846
0
    }
847
848
0
    pOrg0 += strideOrg;
849
0
    pOrg1 += strideOrg;
850
0
    pOrg2 += strideOrg;
851
0
    pDst0 += strideDst;
852
0
    pDst1 += strideDst;
853
0
    pDst2 += strideDst;
854
0
  }
855
0
}
856
857
template void UnitBuf<Pel>::writeToFile( std::string filename ) const;
858
859
template<typename T>
860
void UnitBuf<T>::writeToFile( std::string filename ) const
861
0
{
862
0
  FILE* f = fopen( filename.c_str(), "w" );
863
0
  CHECK_FATAL( f == nullptr, "writeToFile() cannot open file for writing" )
864
865
0
  for( auto& b: bufs )
866
0
  {
867
0
    for( unsigned y = 0; y < b.height; y++ )
868
0
    {
869
0
      fwrite( b.bufAt( 0, y ), sizeof( T ), b.width, f );
870
0
    }
871
0
  }
872
873
0
  fclose( f );
874
0
}
875
876
}